/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2015, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file TEncSearch.cpp \brief encoder search class */ #include "TLibCommon/CommonDef.h" #include "TLibCommon/TComRom.h" #include "TLibCommon/TComMotionInfo.h" #include "TEncSearch.h" #include "TLibCommon/TComTU.h" #include "TLibCommon/Debug.h" #include #include //! \ingroup TLibEncoder //! \{ static const TComMv s_acMvRefineH[9] = { TComMv( 0, 0 ), // 0 TComMv( 0, -1 ), // 1 TComMv( 0, 1 ), // 2 TComMv( -1, 0 ), // 3 TComMv( 1, 0 ), // 4 TComMv( -1, -1 ), // 5 TComMv( 1, -1 ), // 6 TComMv( -1, 1 ), // 7 TComMv( 1, 1 ) // 8 }; static const TComMv s_acMvRefineQ[9] = { TComMv( 0, 0 ), // 0 TComMv( 0, -1 ), // 1 TComMv( 0, 1 ), // 2 TComMv( -1, -1 ), // 5 TComMv( 1, -1 ), // 6 TComMv( -1, 0 ), // 3 TComMv( 1, 0 ), // 4 TComMv( -1, 1 ), // 7 TComMv( 1, 1 ) // 8 }; static Void offsetSubTUCBFs(TComTU &rTu, const ComponentID compID) { TComDataCU *pcCU = rTu.getCU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(compID); const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> 1; //move the CBFs down a level and set the parent CBF UChar subTUCBF[2]; UChar combinedSubTUCBF = 0; for (UInt subTU = 0; subTU < 2; subTU++) { const UInt subTUAbsPartIdx = uiAbsPartIdx + (subTU * partIdxesPerSubTU); subTUCBF[subTU] = pcCU->getCbf(subTUAbsPartIdx, compID, uiTrDepth); combinedSubTUCBF |= subTUCBF[subTU]; } for (UInt subTU = 0; subTU < 2; subTU++) { const UInt subTUAbsPartIdx = uiAbsPartIdx + (subTU * partIdxesPerSubTU); const UChar compositeCBF = (subTUCBF[subTU] << 1) | combinedSubTUCBF; pcCU->setCbfPartRange((compositeCBF << uiTrDepth), compID, subTUAbsPartIdx, partIdxesPerSubTU); } } TEncSearch::TEncSearch() : m_puhQTTempTrIdx(NULL) , m_pcQTTempTComYuv(NULL) , m_pcEncCfg (NULL) , m_pcTrQuant (NULL) , m_pcRdCost (NULL) , m_pcEntropyCoder (NULL) , m_iSearchRange (0) , m_bipredSearchRange (0) , m_motionEstimationSearchMethod (MESEARCH_FULL) , m_pppcRDSbacCoder (NULL) , m_pcRDGoOnSbacCoder (NULL) , m_pTempPel (NULL) , m_isInitialized (false) { for (UInt ch=0; chgetQuadtreeTULog2MaxSize()-m_pcEncCfg->getQuadtreeTULog2MinSize()+1; for (UInt ch=0; chgetChromaFormatIdc(); initTempBuff(cform); m_pTempPel = new Pel[maxCUWidth*maxCUHeight]; const UInt uiNumLayersToAllocate = pcEncCfg->getQuadtreeTULog2MaxSize()-pcEncCfg->getQuadtreeTULog2MinSize()+1; const UInt uiNumPartitions = 1<<(maxTotalCUDepth<<1); for (UInt ch=0; ch>(csx+csy)]; #if ADAPTIVE_QP_SELECTION m_ppcQTTempArlCoeff[ch][layer] = new TCoeff[(maxCUWidth*maxCUHeight)>>(csx+csy) ]; #endif } m_phQTTempCrossComponentPredictionAlpha[ch] = new SChar [uiNumPartitions]; m_pSharedPredTransformSkip[ch] = new Pel [MAX_CU_SIZE*MAX_CU_SIZE]; m_pcQTTempTUCoeff[ch] = new TCoeff[MAX_CU_SIZE*MAX_CU_SIZE]; #if ADAPTIVE_QP_SELECTION m_ppcQTTempTUArlCoeff[ch] = new TCoeff[MAX_CU_SIZE*MAX_CU_SIZE]; #endif m_puhQTTempTransformSkipFlag[ch] = new UChar [uiNumPartitions]; } m_puhQTTempTrIdx = new UChar [uiNumPartitions]; m_pcQTTempTComYuv = new TComYuv[uiNumLayersToAllocate]; for( UInt ui = 0; ui < uiNumLayersToAllocate; ++ui ) { m_pcQTTempTComYuv[ui].create( maxCUWidth, maxCUHeight, pcEncCfg->getChromaFormatIdc() ); } m_pcQTTempTransformSkipTComYuv.create( maxCUWidth, maxCUHeight, pcEncCfg->getChromaFormatIdc() ); m_tmpYuvPred.create(MAX_CU_SIZE, MAX_CU_SIZE, pcEncCfg->getChromaFormatIdc()); m_isInitialized = true; } __inline Void TEncSearch::xTZSearchHelp( const TComPattern* const pcPatternKey, IntTZSearchStruct& rcStruct, const Int iSearchX, const Int iSearchY, const UChar ucPointNr, const UInt uiDistance ) { Distortion uiSad = 0; const Pel* const piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iYStride + iSearchX; #if NH_3D_IC m_cDistParam.bUseIC = pcPatternKey->getICFlag(); #endif #if NH_3D_SDC_INTER m_cDistParam.bUseSDCMRSAD = pcPatternKey->getSDCMRSADFlag(); #endif //-- jclee for using the SAD function pointer m_pcRdCost->setDistParam( pcPatternKey, piRefSrch, rcStruct.iYStride, m_cDistParam ); setDistParamComp(COMPONENT_Y); // distortion m_cDistParam.bitDepth = pcPatternKey->getBitDepthY(); m_cDistParam.m_maximumDistortionForEarlyExit = rcStruct.uiBestSad; if((m_pcEncCfg->getRestrictMESampling() == false) && m_pcEncCfg->getMotionEstimationSearchMethod() == MESEARCH_SELECTIVE) { Int isubShift = 0; // motion cost Distortion uiBitCost = m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY ); // Skip search if bit cost is already larger than best SAD if (uiBitCost < rcStruct.uiBestSad) { if ( m_cDistParam.iRows > 32 ) { m_cDistParam.iSubShift = 4; } else if ( m_cDistParam.iRows > 16 ) { m_cDistParam.iSubShift = 3; } else if ( m_cDistParam.iRows > 8 ) { m_cDistParam.iSubShift = 2; } else { m_cDistParam.iSubShift = 1; } Distortion uiTempSad = m_cDistParam.DistFunc( &m_cDistParam ); if((uiTempSad + uiBitCost) < rcStruct.uiBestSad) { uiSad += uiTempSad >> m_cDistParam.iSubShift; while(m_cDistParam.iSubShift > 0) { isubShift = m_cDistParam.iSubShift -1; m_cDistParam.pOrg = pcPatternKey->getROIY() + (pcPatternKey->getPatternLStride() << isubShift); m_cDistParam.pCur = piRefSrch + (rcStruct.iYStride << isubShift); uiTempSad = m_cDistParam.DistFunc( &m_cDistParam ); uiSad += uiTempSad >> m_cDistParam.iSubShift; if(((uiSad << isubShift) + uiBitCost) > rcStruct.uiBestSad) { break; } m_cDistParam.iSubShift--; } if(m_cDistParam.iSubShift == 0) { uiSad += uiBitCost; if( uiSad < rcStruct.uiBestSad ) { rcStruct.uiBestSad = uiSad; rcStruct.iBestX = iSearchX; rcStruct.iBestY = iSearchY; rcStruct.uiBestDistance = uiDistance; rcStruct.uiBestRound = 0; rcStruct.ucPointNr = ucPointNr; m_cDistParam.m_maximumDistortionForEarlyExit = uiSad; } } } } } else { // fast encoder decision: use subsampled SAD when rows > 8 for integer ME if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE3 ) { if ( m_cDistParam.iRows > 8 ) { m_cDistParam.iSubShift = 1; } } uiSad = m_cDistParam.DistFunc( &m_cDistParam ); // only add motion cost if uiSad is smaller than best. Otherwise pointless // to add motion cost. if( uiSad < rcStruct.uiBestSad ) { // motion cost uiSad += m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY ); if( uiSad < rcStruct.uiBestSad ) { rcStruct.uiBestSad = uiSad; rcStruct.iBestX = iSearchX; rcStruct.iBestY = iSearchY; rcStruct.uiBestDistance = uiDistance; rcStruct.uiBestRound = 0; rcStruct.ucPointNr = ucPointNr; m_cDistParam.m_maximumDistortionForEarlyExit = uiSad; } } } } __inline Void TEncSearch::xTZ2PointSearch( const TComPattern* const pcPatternKey, IntTZSearchStruct& rcStruct, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB ) { Int iSrchRngHorLeft = pcMvSrchRngLT->getHor(); Int iSrchRngHorRight = pcMvSrchRngRB->getHor(); Int iSrchRngVerTop = pcMvSrchRngLT->getVer(); Int iSrchRngVerBottom = pcMvSrchRngRB->getVer(); // 2 point search, // 1 2 3 // check only the 2 untested points // 4 0 5 // around the start point // 6 7 8 Int iStartX = rcStruct.iBestX; Int iStartY = rcStruct.iBestY; switch( rcStruct.ucPointNr ) { case 1: { if ( (iStartX - 1) >= iSrchRngHorLeft ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY, 0, 2 ); } if ( (iStartY - 1) >= iSrchRngVerTop ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY - 1, 0, 2 ); } } break; case 2: { if ( (iStartY - 1) >= iSrchRngVerTop ) { if ( (iStartX - 1) >= iSrchRngHorLeft ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY - 1, 0, 2 ); } if ( (iStartX + 1) <= iSrchRngHorRight ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY - 1, 0, 2 ); } } } break; case 3: { if ( (iStartY - 1) >= iSrchRngVerTop ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY - 1, 0, 2 ); } if ( (iStartX + 1) <= iSrchRngHorRight ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY, 0, 2 ); } } break; case 4: { if ( (iStartX - 1) >= iSrchRngHorLeft ) { if ( (iStartY + 1) <= iSrchRngVerBottom ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY + 1, 0, 2 ); } if ( (iStartY - 1) >= iSrchRngVerTop ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY - 1, 0, 2 ); } } } break; case 5: { if ( (iStartX + 1) <= iSrchRngHorRight ) { if ( (iStartY - 1) >= iSrchRngVerTop ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY - 1, 0, 2 ); } if ( (iStartY + 1) <= iSrchRngVerBottom ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY + 1, 0, 2 ); } } } break; case 6: { if ( (iStartX - 1) >= iSrchRngHorLeft ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY , 0, 2 ); } if ( (iStartY + 1) <= iSrchRngVerBottom ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY + 1, 0, 2 ); } } break; case 7: { if ( (iStartY + 1) <= iSrchRngVerBottom ) { if ( (iStartX - 1) >= iSrchRngHorLeft ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY + 1, 0, 2 ); } if ( (iStartX + 1) <= iSrchRngHorRight ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY + 1, 0, 2 ); } } } break; case 8: { if ( (iStartX + 1) <= iSrchRngHorRight ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY, 0, 2 ); } if ( (iStartY + 1) <= iSrchRngVerBottom ) { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY + 1, 0, 2 ); } } break; default: { assert( false ); } break; } // switch( rcStruct.ucPointNr ) } __inline Void TEncSearch::xTZ8PointSquareSearch( const TComPattern* const pcPatternKey, IntTZSearchStruct& rcStruct, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB, const Int iStartX, const Int iStartY, const Int iDist ) { const Int iSrchRngHorLeft = pcMvSrchRngLT->getHor(); const Int iSrchRngHorRight = pcMvSrchRngRB->getHor(); const Int iSrchRngVerTop = pcMvSrchRngLT->getVer(); const Int iSrchRngVerBottom = pcMvSrchRngRB->getVer(); // 8 point search, // 1 2 3 // search around the start point // 4 0 5 // with the required distance // 6 7 8 assert( iDist != 0 ); const Int iTop = iStartY - iDist; const Int iBottom = iStartY + iDist; const Int iLeft = iStartX - iDist; const Int iRight = iStartX + iDist; rcStruct.uiBestRound += 1; if ( iTop >= iSrchRngVerTop ) // check top { if ( iLeft >= iSrchRngHorLeft ) // check top left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iTop, 1, iDist ); } // top middle xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist ); if ( iRight <= iSrchRngHorRight ) // check top right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iTop, 3, iDist ); } } // check top if ( iLeft >= iSrchRngHorLeft ) // check middle left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist ); } if ( iRight <= iSrchRngHorRight ) // check middle right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist ); } if ( iBottom <= iSrchRngVerBottom ) // check bottom { if ( iLeft >= iSrchRngHorLeft ) // check bottom left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iBottom, 6, iDist ); } // check bottom middle xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist ); if ( iRight <= iSrchRngHorRight ) // check bottom right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iBottom, 8, iDist ); } } // check bottom } __inline Void TEncSearch::xTZ8PointDiamondSearch( const TComPattern*const pcPatternKey, IntTZSearchStruct& rcStruct, const TComMv*const pcMvSrchRngLT, const TComMv*const pcMvSrchRngRB, const Int iStartX, const Int iStartY, const Int iDist, const Bool bCheckCornersAtDist1 ) { const Int iSrchRngHorLeft = pcMvSrchRngLT->getHor(); const Int iSrchRngHorRight = pcMvSrchRngRB->getHor(); const Int iSrchRngVerTop = pcMvSrchRngLT->getVer(); const Int iSrchRngVerBottom = pcMvSrchRngRB->getVer(); // 8 point search, // 1 2 3 // search around the start point // 4 0 5 // with the required distance // 6 7 8 assert ( iDist != 0 ); const Int iTop = iStartY - iDist; const Int iBottom = iStartY + iDist; const Int iLeft = iStartX - iDist; const Int iRight = iStartX + iDist; rcStruct.uiBestRound += 1; if ( iDist == 1 ) { if ( iTop >= iSrchRngVerTop ) // check top { if (bCheckCornersAtDist1) { if ( iLeft >= iSrchRngHorLeft) // check top-left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iTop, 1, iDist ); } xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist ); if ( iRight <= iSrchRngHorRight ) // check middle right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iTop, 3, iDist ); } } else { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist ); } } if ( iLeft >= iSrchRngHorLeft ) // check middle left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist ); } if ( iRight <= iSrchRngHorRight ) // check middle right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist ); } if ( iBottom <= iSrchRngVerBottom ) // check bottom { if (bCheckCornersAtDist1) { if ( iLeft >= iSrchRngHorLeft) // check top-left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iBottom, 6, iDist ); } xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist ); if ( iRight <= iSrchRngHorRight ) // check middle right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iBottom, 8, iDist ); } } else { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist ); } } } else { if ( iDist <= 8 ) { const Int iTop_2 = iStartY - (iDist>>1); const Int iBottom_2 = iStartY + (iDist>>1); const Int iLeft_2 = iStartX - (iDist>>1); const Int iRight_2 = iStartX + (iDist>>1); if ( iTop >= iSrchRngVerTop && iLeft >= iSrchRngHorLeft && iRight <= iSrchRngHorRight && iBottom <= iSrchRngVerBottom ) // check border { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iTop_2, 1, iDist>>1 ); xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iTop_2, 3, iDist>>1 ); xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iBottom_2, 6, iDist>>1 ); xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iBottom_2, 8, iDist>>1 ); xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist ); } else // check border { if ( iTop >= iSrchRngVerTop ) // check top { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist ); } if ( iTop_2 >= iSrchRngVerTop ) // check half top { if ( iLeft_2 >= iSrchRngHorLeft ) // check half left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) ); } if ( iRight_2 <= iSrchRngHorRight ) // check half right { xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iTop_2, 3, (iDist>>1) ); } } // check half top if ( iLeft >= iSrchRngHorLeft ) // check left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist ); } if ( iRight <= iSrchRngHorRight ) // check right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist ); } if ( iBottom_2 <= iSrchRngVerBottom ) // check half bottom { if ( iLeft_2 >= iSrchRngHorLeft ) // check half left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) ); } if ( iRight_2 <= iSrchRngHorRight ) // check half right { xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) ); } } // check half bottom if ( iBottom <= iSrchRngVerBottom ) // check bottom { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist ); } } // check border } else // iDist > 8 { if ( iTop >= iSrchRngVerTop && iLeft >= iSrchRngHorLeft && iRight <= iSrchRngHorRight && iBottom <= iSrchRngVerBottom ) // check border { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 0, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 0, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 0, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 0, iDist ); for ( Int index = 1; index < 4; index++ ) { const Int iPosYT = iTop + ((iDist>>2) * index); const Int iPosYB = iBottom - ((iDist>>2) * index); const Int iPosXL = iStartX - ((iDist>>2) * index); const Int iPosXR = iStartX + ((iDist>>2) * index); xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYT, 0, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYT, 0, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYB, 0, iDist ); xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYB, 0, iDist ); } } else // check border { if ( iTop >= iSrchRngVerTop ) // check top { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 0, iDist ); } if ( iLeft >= iSrchRngHorLeft ) // check left { xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 0, iDist ); } if ( iRight <= iSrchRngHorRight ) // check right { xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 0, iDist ); } if ( iBottom <= iSrchRngVerBottom ) // check bottom { xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 0, iDist ); } for ( Int index = 1; index < 4; index++ ) { const Int iPosYT = iTop + ((iDist>>2) * index); const Int iPosYB = iBottom - ((iDist>>2) * index); const Int iPosXL = iStartX - ((iDist>>2) * index); const Int iPosXR = iStartX + ((iDist>>2) * index); if ( iPosYT >= iSrchRngVerTop ) // check top { if ( iPosXL >= iSrchRngHorLeft ) // check left { xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYT, 0, iDist ); } if ( iPosXR <= iSrchRngHorRight ) // check right { xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYT, 0, iDist ); } } // check top if ( iPosYB <= iSrchRngVerBottom ) // check bottom { if ( iPosXL >= iSrchRngHorLeft ) // check left { xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYB, 0, iDist ); } if ( iPosXR <= iSrchRngHorRight ) // check right { xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYB, 0, iDist ); } } // check bottom } // for ... } // check border } // iDist <= 8 } // iDist == 1 } Distortion TEncSearch::xPatternRefinement( TComPattern* pcPatternKey, TComMv baseRefMv, Int iFrac, TComMv& rcMvFrac, Bool bAllowUseOfHadamard ) { Distortion uiDist; Distortion uiDistBest = std::numeric_limits::max(); UInt uiDirecBest = 0; Pel* piRefPos; Int iRefStride = m_filteredBlock[0][0].getStride(COMPONENT_Y); m_pcRdCost->setDistParam( pcPatternKey, m_filteredBlock[0][0].getAddr(COMPONENT_Y), iRefStride, 1, m_cDistParam, m_pcEncCfg->getUseHADME() && bAllowUseOfHadamard ); const TComMv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ); for (UInt i = 0; i < 9; i++) { TComMv cMvTest = pcMvRefine[i]; cMvTest += baseRefMv; Int horVal = cMvTest.getHor() * iFrac; Int verVal = cMvTest.getVer() * iFrac; piRefPos = m_filteredBlock[ verVal & 3 ][ horVal & 3 ].getAddr(COMPONENT_Y); if ( horVal == 2 && ( verVal & 1 ) == 0 ) { piRefPos += 1; } if ( ( horVal & 1 ) == 0 && verVal == 2 ) { piRefPos += iRefStride; } cMvTest = pcMvRefine[i]; cMvTest += rcMvFrac; setDistParamComp(COMPONENT_Y); #if NH_3D_IC m_cDistParam.bUseIC = pcPatternKey->getICFlag(); #endif #if NH_3D_SDC_INTER m_cDistParam.bUseSDCMRSAD = pcPatternKey->getSDCMRSADFlag(); #endif m_cDistParam.pCur = piRefPos; m_cDistParam.bitDepth = pcPatternKey->getBitDepthY(); uiDist = m_cDistParam.DistFunc( &m_cDistParam ); uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.getHor(), cMvTest.getVer() ); if ( uiDist < uiDistBest ) { uiDistBest = uiDist; uiDirecBest = i; m_cDistParam.m_maximumDistortionForEarlyExit = uiDist; } } rcMvFrac = pcMvRefine[uiDirecBest]; return uiDistBest; } Void TEncSearch::xEncSubdivCbfQT(TComTU &rTu, Bool bLuma, Bool bChroma ) { TComDataCU* pcCU=rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); const UInt uiSubdiv = ( uiTrMode > uiTrDepth ? 1 : 0 ); const UInt uiLog2LumaTrafoSize = rTu.GetLog2LumaTrSize(); if( pcCU->isIntra(0) && pcCU->getPartitionSize(0) == SIZE_NxN && uiTrDepth == 0 ) { assert( uiSubdiv ); } else if( uiLog2LumaTrafoSize > pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() ) { assert( uiSubdiv ); } else if( uiLog2LumaTrafoSize == pcCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() ) { assert( !uiSubdiv ); } else if( uiLog2LumaTrafoSize == pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ) { assert( !uiSubdiv ); } else { assert( uiLog2LumaTrafoSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ); if( bLuma ) { m_pcEntropyCoder->encodeTransformSubdivFlag( uiSubdiv, 5 - uiLog2LumaTrafoSize ); } } if ( bChroma ) { const UInt numberValidComponents = getNumberValidComponents(rTu.GetChromaFormat()); for (UInt ch=COMPONENT_Cb; chgetCbf( uiAbsPartIdx, compID, uiTrDepth-1 ) )) { m_pcEntropyCoder->encodeQtCbf(rTu, compID, (uiSubdiv == 0)); } } } if( uiSubdiv ) { TComTURecurse tuRecurse(rTu, false); do { xEncSubdivCbfQT( tuRecurse, bLuma, bChroma ); } while (tuRecurse.nextSection(rTu)); } else { //===== Cbfs ===== if( bLuma ) { m_pcEntropyCoder->encodeQtCbf( rTu, COMPONENT_Y, true ); } } } Void TEncSearch::xEncCoeffQT(TComTU &rTu, const ComponentID component, Bool bRealCoeff ) { TComDataCU* pcCU=rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiTrDepth=rTu.GetTransformDepthRel(); const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); const UInt uiSubdiv = ( uiTrMode > uiTrDepth ? 1 : 0 ); if( uiSubdiv ) { TComTURecurse tuRecurseChild(rTu, false); do { xEncCoeffQT( tuRecurseChild, component, bRealCoeff ); } while (tuRecurseChild.nextSection(rTu) ); } else if (rTu.ProcessComponentSection(component)) { //===== coefficients ===== const UInt uiLog2TrafoSize = rTu.GetLog2LumaTrSize(); UInt uiCoeffOffset = rTu.getCoefficientOffset(component); UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrafoSize; TCoeff* pcCoeff = bRealCoeff ? pcCU->getCoeff(component) : m_ppcQTTempCoeff[component][uiQTLayer]; if (isChroma(component) && (pcCU->getCbf( rTu.GetAbsPartIdxTU(), COMPONENT_Y, uiTrMode ) != 0) && pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() ) { m_pcEntropyCoder->encodeCrossComponentPrediction( rTu, component ); } m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeff+uiCoeffOffset, component ); } } Void TEncSearch::xEncIntraHeader( TComDataCU* pcCU, UInt uiTrDepth, UInt uiAbsPartIdx, Bool bLuma, Bool bChroma ) { if( bLuma ) { // CU header if( uiAbsPartIdx == 0 ) { if( !pcCU->getSlice()->isIntra() ) { if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()) { m_pcEntropyCoder->encodeCUTransquantBypassFlag( pcCU, 0, true ); } m_pcEntropyCoder->encodeSkipFlag( pcCU, 0, true ); #if NH_3D_DIS m_pcEntropyCoder->encodeDIS(pcCU, 0, true ); if(!pcCU->getDISFlag(uiAbsPartIdx)) #endif m_pcEntropyCoder->encodePredMode( pcCU, 0, true ); } #if NH_3D_DIS else { m_pcEntropyCoder->encodeDIS(pcCU, 0, true ); } #endif #if NH_3D_DIS if(!pcCU->getDISFlag(uiAbsPartIdx)) { #endif m_pcEntropyCoder ->encodePartSize( pcCU, 0, pcCU->getDepth(0), true ); if (pcCU->isIntra(0) && pcCU->getPartitionSize(0) == SIZE_2Nx2N ) { m_pcEntropyCoder->encodeIPCMInfo( pcCU, 0, true ); if ( pcCU->getIPCMFlag (0)) { #if NH_3D_SDC_INTRA m_pcEntropyCoder->encodeSDCFlag( pcCU, 0, true ); #endif return; } } #if NH_3D_DIS } #endif } #if NH_3D_DIS if(!pcCU->getDISFlag(uiAbsPartIdx)) { #endif // luma prediction mode if( pcCU->getPartitionSize(0) == SIZE_2Nx2N ) { if (uiAbsPartIdx==0) { m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, 0 ); #if NH_3D_SDC_INTRA m_pcEntropyCoder->encodeSDCFlag( pcCU, 0, true ); #if NH_3D_DMM if( pcCU->getSlice()->getIsDepth() && ( !pcCU->getSDCFlag( 0 ) ) && isDmmMode( pcCU->getIntraDir( CHANNEL_TYPE_LUMA, 0 ) ) ) { m_pcEntropyCoder->encodeDeltaDC( pcCU, 0 ); } #endif #endif } } else { UInt uiQNumParts = pcCU->getTotalNumPart() >> 2; if (uiTrDepth>0 && (uiAbsPartIdx%uiQNumParts)==0) { m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, uiAbsPartIdx ); #if NH_3D_SDC_INTRA if( uiAbsPartIdx == 0 ) { m_pcEntropyCoder->encodeSDCFlag( pcCU, 0, true ); } #if NH_3D_DMM if( pcCU->getSlice()->getIsDepth() && ( !pcCU->getSDCFlag( uiAbsPartIdx ) ) && isDmmMode( pcCU->getIntraDir( CHANNEL_TYPE_LUMA, uiAbsPartIdx ) ) ) { m_pcEntropyCoder->encodeDeltaDC( pcCU, uiAbsPartIdx ); } #endif #endif #if NH_3D_DIS } #endif } } } if( bChroma ) { if( pcCU->getPartitionSize(0) == SIZE_2Nx2N || !enable4ChromaPUsInIntraNxNCU(pcCU->getPic()->getChromaFormat())) { if(uiAbsPartIdx==0) { m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiAbsPartIdx ); } } else { UInt uiQNumParts = pcCU->getTotalNumPart() >> 2; assert(uiTrDepth>0); if ((uiAbsPartIdx%uiQNumParts)==0) { m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiAbsPartIdx ); } } } } UInt TEncSearch::xGetIntraBitsQT(TComTU &rTu, Bool bLuma, Bool bChroma, Bool bRealCoeff /* just for test */ ) { TComDataCU* pcCU=rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiTrDepth=rTu.GetTransformDepthRel(); m_pcEntropyCoder->resetBits(); xEncIntraHeader ( pcCU, uiTrDepth, uiAbsPartIdx, bLuma, bChroma ); xEncSubdivCbfQT ( rTu, bLuma, bChroma ); if( bLuma ) { xEncCoeffQT ( rTu, COMPONENT_Y, bRealCoeff ); } if( bChroma ) { xEncCoeffQT ( rTu, COMPONENT_Cb, bRealCoeff ); xEncCoeffQT ( rTu, COMPONENT_Cr, bRealCoeff ); } UInt uiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); return uiBits; } UInt TEncSearch::xGetIntraBitsQTChroma(TComTU &rTu, ComponentID compID, Bool bRealCoeff /* just for test */ ) { m_pcEntropyCoder->resetBits(); xEncCoeffQT ( rTu, compID, bRealCoeff ); UInt uiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); return uiBits; } Void TEncSearch::xIntraCodingTUBlock( TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE], const Bool checkCrossCPrediction, #if NH_3D_VSO Dist& ruiDist, #else Distortion& ruiDist, #endif const ComponentID compID, TComTU& rTu DEBUG_STRING_FN_DECLARE(sDebug) ,Int default0Save1Load2 #if NH_3D_ENC_DEPTH , Bool zeroResiFlag #endif ) { if (!rTu.ProcessComponentSection(compID)) { return; } const Bool bIsLuma = isLuma(compID); const TComRectangle &rect = rTu.getRect(compID); TComDataCU *pcCU = rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const TComSPS &sps = *(pcCU->getSlice()->getSPS()); const UInt uiTrDepth = rTu.GetTransformDepthRelAdj(compID); const UInt uiFullDepth = rTu.GetTransformDepthTotal(); const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); const ChromaFormat chFmt = pcOrgYuv->getChromaFormat(); const ChannelType chType = toChannelType(compID); const Int bitDepth = sps.getBitDepth(chType); const UInt uiWidth = rect.width; const UInt uiHeight = rect.height; const UInt uiStride = pcOrgYuv ->getStride (compID); Pel *piOrg = pcOrgYuv ->getAddr( compID, uiAbsPartIdx ); Pel *piPred = pcPredYuv->getAddr( compID, uiAbsPartIdx ); Pel *piResi = pcResiYuv->getAddr( compID, uiAbsPartIdx ); Pel *piReco = pcPredYuv->getAddr( compID, uiAbsPartIdx ); const UInt uiQTLayer = sps.getQuadtreeTULog2MaxSize() - uiLog2TrSize; Pel *piRecQt = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( compID, uiAbsPartIdx ); const UInt uiRecQtStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride(compID); const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx; Pel *piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder ); UInt uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride ( compID ); TCoeff *pcCoeff = m_ppcQTTempCoeff[compID][uiQTLayer] + rTu.getCoefficientOffset(compID); Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID); #if ADAPTIVE_QP_SELECTION TCoeff *pcArlCoeff = m_ppcQTTempArlCoeff[compID][ uiQTLayer ] + rTu.getCoefficientOffset(compID); #endif const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx ); const UInt partsPerMinCU = 1<<(2*(sps.getMaxTotalCUDepth() - sps.getLog2DiffMaxMinCodingBlockSize())); const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && !bIsLuma) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt, partsPerMinCU)) : uiChPredMode; const UInt uiChFinalMode = ((chFmt == CHROMA_422) && !bIsLuma) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode; const Int blkX = g_auiRasterToPelX[ g_auiZscanToRaster[ uiAbsPartIdx ] ]; const Int blkY = g_auiRasterToPelY[ g_auiZscanToRaster[ uiAbsPartIdx ] ]; const Int bufferOffset = blkX + (blkY * MAX_CU_SIZE); Pel *const encoderLumaResidual = resiLuma[RESIDUAL_ENCODER_SIDE ] + bufferOffset; Pel *const reconstructedLumaResidual = resiLuma[RESIDUAL_RECONSTRUCTED] + bufferOffset; const Bool bUseCrossCPrediction = isChroma(compID) && (uiChPredMode == DM_CHROMA_IDX) && checkCrossCPrediction; const Bool bUseReconstructedResidualForEstimate = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate(); Pel *const lumaResidualForEstimate = bUseReconstructedResidualForEstimate ? reconstructedLumaResidual : encoderLumaResidual; #if DEBUG_STRING const Int debugPredModeMask=DebugStringGetPredModeMask(MODE_INTRA); #endif //===== init availability pattern ===== DEBUG_STRING_NEW(sTemp) #if !DEBUG_STRING if( default0Save1Load2 != 2 ) #endif { const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(compID, uiChFinalMode, uiWidth, uiHeight, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag()); initIntraPatternChType( rTu, compID, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sDebug) ); //===== get prediction signal ===== #if NH_3D_DMM if( bIsLuma && isDmmMode( uiChFinalMode ) ) { predIntraLumaDmm( pcCU, uiAbsPartIdx, getDmmType( uiChFinalMode ), piPred, uiStride, uiWidth, uiHeight ); } else { #endif predIntraAng( compID, uiChFinalMode, piOrg, uiStride, piPred, uiStride, rTu, bUseFilteredPredictions ); #if NH_3D_DMM } #endif // save prediction if( default0Save1Load2 == 1 ) { Pel* pPred = piPred; Pel* pPredBuf = m_pSharedPredTransformSkip[compID]; Int k = 0; for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { pPredBuf[ k ++ ] = pPred[ uiX ]; } pPred += uiStride; } } } #if !DEBUG_STRING else { // load prediction Pel* pPred = piPred; Pel* pPredBuf = m_pSharedPredTransformSkip[compID]; Int k = 0; for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { pPred[ uiX ] = pPredBuf[ k ++ ]; } pPred += uiStride; } } #endif //===== get residual signal ===== { // get residual Pel* pOrg = piOrg; Pel* pPred = piPred; Pel* pResi = piResi; for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { #if NH_3D_ENC_DEPTH if( zeroResiFlag ) { memset( pResi, 0, sizeof( Pel ) * uiWidth ); pResi += uiStride; } else { #endif for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { pResi[ uiX ] = pOrg[ uiX ] - pPred[ uiX ]; } pOrg += uiStride; pResi += uiStride; pPred += uiStride; #if NH_3D_ENC_DEPTH } #endif } } if (pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { if (bUseCrossCPrediction) { if (xCalcCrossComponentPredictionAlpha( rTu, compID, lumaResidualForEstimate, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride ) == 0) { return; } TComTrQuant::crossComponentPrediction ( rTu, compID, reconstructedLumaResidual, piResi, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride, uiStride, false ); } else if (isLuma(compID) && !bUseReconstructedResidualForEstimate) { xStoreCrossComponentPredictionResult( encoderLumaResidual, piResi, rTu, 0, 0, MAX_CU_SIZE, uiStride ); } } //===== transform and quantization ===== //--- init rate estimation arrays for RDOQ --- if( useTransformSkip ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ() ) { m_pcEntropyCoder->estimateBit( m_pcTrQuant->m_pcEstBitsSbac, uiWidth, uiHeight, chType ); } //--- transform and quantization --- TCoeff uiAbsSum = 0; if (bIsLuma) { pcCU ->setTrIdxSubParts ( uiTrDepth, uiAbsPartIdx, uiFullDepth ); } const QpParam cQP(*pcCU, compID); #if RDOQ_CHROMA_LAMBDA m_pcTrQuant->selectLambda (compID); #endif m_pcTrQuant->transformNxN ( rTu, compID, piResi, uiStride, pcCoeff, #if ADAPTIVE_QP_SELECTION pcArlCoeff, #endif uiAbsSum, cQP ); //--- inverse transform --- #if DEBUG_STRING if ( (uiAbsSum > 0) || (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask) ) #else if ( uiAbsSum > 0 ) #endif { m_pcTrQuant->invTransformNxN ( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sDebug, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) ); } else { Pel* pResi = piResi; memset( pcCoeff, 0, sizeof( TCoeff ) * uiWidth * uiHeight ); for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { memset( pResi, 0, sizeof( Pel ) * uiWidth ); pResi += uiStride; } } //===== reconstruction ===== { Pel* pPred = piPred; Pel* pResi = piResi; Pel* pReco = piReco; Pel* pRecQt = piRecQt; Pel* pRecIPred = piRecIPred; if (pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { if (bUseCrossCPrediction) { TComTrQuant::crossComponentPrediction( rTu, compID, reconstructedLumaResidual, piResi, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride, uiStride, true ); } else if (isLuma(compID)) { xStoreCrossComponentPredictionResult( reconstructedLumaResidual, piResi, rTu, 0, 0, MAX_CU_SIZE, uiStride ); } } #if DEBUG_STRING std::stringstream ss(stringstream::out); const Bool bDebugPred=((DebugOptionList::DebugString_Pred.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID)); const Bool bDebugResi=((DebugOptionList::DebugString_Resi.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID)); const Bool bDebugReco=((DebugOptionList::DebugString_Reco.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID)); if (bDebugPred || bDebugResi || bDebugReco) { ss << "###: " << "CompID: " << compID << " pred mode (ch/fin): " << uiChPredMode << "/" << uiChFinalMode << " absPartIdx: " << rTu.GetAbsPartIdxTU() << "\n"; for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { ss << "###: "; if (bDebugPred) { ss << " - pred: "; for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { ss << pPred[ uiX ] << ", "; } } if (bDebugResi) { ss << " - resi: "; } for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { if (bDebugResi) { ss << pResi[ uiX ] << ", "; } pReco [ uiX ] = Pel(ClipBD( Int(pPred[uiX]) + Int(pResi[uiX]), bitDepth )); pRecQt [ uiX ] = pReco[ uiX ]; pRecIPred[ uiX ] = pReco[ uiX ]; } if (bDebugReco) { ss << " - reco: "; for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { ss << pReco[ uiX ] << ", "; } } pPred += uiStride; pResi += uiStride; pReco += uiStride; pRecQt += uiRecQtStride; pRecIPred += uiRecIPredStride; ss << "\n"; } DEBUG_STRING_APPEND(sDebug, ss.str()) } else #endif { for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { pReco [ uiX ] = Pel(ClipBD( Int(pPred[uiX]) + Int(pResi[uiX]), bitDepth )); pRecQt [ uiX ] = pReco[ uiX ]; pRecIPred[ uiX ] = pReco[ uiX ]; } pPred += uiStride; pResi += uiStride; pReco += uiStride; pRecQt += uiRecQtStride; pRecIPred += uiRecIPredStride; } } } //===== update distortion ===== #if NH_3D_VSO // M39 if ( m_pcRdCost->getUseVSO() ) { ruiDist += m_pcRdCost->getDistPartVSO ( pcCU, uiAbsPartIdx, bitDepth, piReco, uiStride, piOrg, uiStride, uiWidth, uiHeight, false ); } else #endif ruiDist += m_pcRdCost->getDistPart( bitDepth, piReco, uiStride, piOrg, uiStride, uiWidth, uiHeight, compID ); } Void TEncSearch::xRecurIntraCodingLumaQT(TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE], #if NH_3D_VSO Dist& ruiDistY, #else Distortion& ruiDistY, #endif #if HHI_RQT_INTRA_SPEEDUP Bool bCheckFirst, #endif Double& dRDCost, TComTU& rTu DEBUG_STRING_FN_DECLARE(sDebug) #if NH_3D_ENC_DEPTH , Bool zeroResiFlag #endif ) { #if NH_MV D_PRINT_INC_INDENT( g_traceModeCheck, "xRecurIntraCodingLumaQT; zeroResiFlag " + n2s(zeroResiFlag) ) #endif TComDataCU *pcCU = rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiFullDepth = rTu.GetTransformDepthTotal(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); Bool bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() ); Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ); Pel resiLumaSplit [NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]; Pel resiLumaSingle[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]; Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES]; for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++) { bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise } bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate()); #if HHI_RQT_INTRA_SPEEDUP Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize(); Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE); // don't check split if TU size is less or equal to max TU size Bool noSplitIntraMaxTuSize = bCheckFull; if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice) { // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice noSplitIntraMaxTuSize = ( uiLog2TrSize <= min(maxTuSize,4) ); // if maximum RD-penalty don't check TU size 32x32 if(m_pcEncCfg->getRDpenalty()==2) { bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4)); } } if( bCheckFirst && noSplitIntraMaxTuSize ) { bCheckSplit = false; } #else Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize(); Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE); // if maximum RD-penalty don't check TU size 32x32 if((m_pcEncCfg->getRDpenalty()==2) && !isIntraSlice) { bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4)); } #endif #if NH_3D_DMM if( isDmmMode( pcCU->getIntraDir( CHANNEL_TYPE_LUMA, uiAbsPartIdx ) ) ) { bCheckSplit = false; } #endif Double dSingleCost = MAX_DOUBLE; #if NH_3D_VSO Dist uiSingleDistLuma = 0; #else Distortion uiSingleDistLuma = 0; #endif UInt uiSingleCbfLuma = 0; Bool checkTransformSkip = pcCU->getSlice()->getPPS()->getUseTransformSkip(); Int bestModeId[MAX_NUM_COMPONENT] = { 0, 0, 0}; checkTransformSkip &= TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(COMPONENT_Y), pcCU->getSlice()->getPPS()->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize()); checkTransformSkip &= (!pcCU->getCUTransquantBypass(0)); assert (rTu.ProcessComponentSection(COMPONENT_Y)); const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y); if ( m_pcEncCfg->getUseTransformSkipFast() ) { checkTransformSkip &= (pcCU->getPartitionSize(uiAbsPartIdx)==SIZE_NxN); } if( bCheckFull ) { if(checkTransformSkip == true) { //----- store original entropy coding status ----- m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); #if NH_3D_VSO Dist singleDistTmpLuma = 0; #else Distortion singleDistTmpLuma = 0; #endif UInt singleCbfTmpLuma = 0; Double singleCostTmp = 0; Int firstCheckId = 0; for(Int modeId = firstCheckId; modeId < 2; modeId ++) { DEBUG_STRING_NEW(sModeString) Int default0Save1Load2 = 0; singleDistTmpLuma=0; if(modeId == firstCheckId) { default0Save1Load2 = 1; } else { default0Save1Load2 = 2; } pcCU->setTransformSkipSubParts ( modeId, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan ); xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 ); singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth ); //----- determine rate and r-d cost ----- if(modeId == 1 && singleCbfTmpLuma == 0) { //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. singleCostTmp = MAX_DOUBLE; } else { UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false ); #if NH_3D_VSO // M NEW if ( m_pcRdCost->getUseLambdaScaleVSO() ) { singleCostTmp = m_pcRdCost->calcRdCostVSO( uiSingleBits, singleDistTmpLuma ); } else #endif singleCostTmp = m_pcRdCost->calcRdCost( uiSingleBits, singleDistTmpLuma ); } if(singleCostTmp < dSingleCost) { DEBUG_STRING_SWAP(sDebug, sModeString) dSingleCost = singleCostTmp; uiSingleDistLuma = singleDistTmpLuma; uiSingleCbfLuma = singleCbfTmpLuma; bestModeId[COMPONENT_Y] = modeId; if(bestModeId[COMPONENT_Y] == firstCheckId) { xStoreIntraResultQT(COMPONENT_Y, rTu ); m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_TEMP_BEST ] ); } if (pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { const Int xOffset = rTu.getRect( COMPONENT_Y ).x0; const Int yOffset = rTu.getRect( COMPONENT_Y ).y0; for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++) { if (bMaintainResidual[storedResidualIndex]) { xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE); } } } } if (modeId == firstCheckId) { m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } } pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan ); if(bestModeId[COMPONENT_Y] == firstCheckId) { xLoadIntraResultQT(COMPONENT_Y, rTu ); pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, rTu.GetTransformDepthTotalAdj(COMPONENT_Y) ); m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiFullDepth ][ CI_TEMP_BEST ] ); } } else { //----- store original entropy coding status ----- if( bCheckSplit ) { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } //----- code luma/chroma block with given intra prediction mode and store Cbf----- dSingleCost = 0.0; #if NH_3D_ENC_DEPTH xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug), 0, zeroResiFlag ); #else pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan ); xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug)); #endif if( bCheckSplit ) { uiSingleCbfLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth ); } //----- determine rate and r-d cost ----- UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false ); if(m_pcEncCfg->getRDpenalty() && (uiLog2TrSize==5) && !isIntraSlice) { uiSingleBits=uiSingleBits*4; } #if NH_3D_VSO // M40 if ( m_pcRdCost->getUseLambdaScaleVSO()) { dSingleCost = m_pcRdCost->calcRdCostVSO( uiSingleBits, uiSingleDistLuma ); } else #endif dSingleCost = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDistLuma ); if (pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { const Int xOffset = rTu.getRect( COMPONENT_Y ).x0; const Int yOffset = rTu.getRect( COMPONENT_Y ).y0; for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++) { if (bMaintainResidual[storedResidualIndex]) { xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE); } } } } } if( bCheckSplit ) { //----- store full entropy coding status, load original entropy coding status ----- if( bCheckFull ) { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] ); m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } else { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } //----- code splitted block ----- Double dSplitCost = 0.0; #if NH_3D_VSO Dist uiSplitDistLuma = 0; #else Distortion uiSplitDistLuma = 0; #endif UInt uiSplitCbfLuma = 0; TComTURecurse tuRecurseChild(rTu, false); DEBUG_STRING_NEW(sSplit) do { DEBUG_STRING_NEW(sChild) #if HHI_RQT_INTRA_SPEEDUP xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) ); #else xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) ); #endif DEBUG_STRING_APPEND(sSplit, sChild) uiSplitCbfLuma |= pcCU->getCbf( tuRecurseChild.GetAbsPartIdxTU(), COMPONENT_Y, tuRecurseChild.GetTransformDepthRel() ); } while (tuRecurseChild.nextSection(rTu) ); UInt uiPartsDiv = rTu.GetAbsPartIdxNumParts(); { if (uiSplitCbfLuma) { const UInt flag=1<getCbf( COMPONENT_Y ); for( UInt uiOffs = 0; uiOffs < uiPartsDiv; uiOffs++ ) { pBase[ uiAbsPartIdx + uiOffs ] |= flag; } } } //----- restore context states ----- m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); //----- determine rate and r-d cost ----- UInt uiSplitBits = xGetIntraBitsQT( rTu, true, false, false ); #if NH_3D_VSO // M41 if( m_pcRdCost->getUseLambdaScaleVSO() ) { dSplitCost = m_pcRdCost->calcRdCostVSO( uiSplitBits, uiSplitDistLuma ); } else #endif dSplitCost = m_pcRdCost->calcRdCost( uiSplitBits, uiSplitDistLuma ); //===== compare and set best ===== if( dSplitCost < dSingleCost ) { //--- update cost --- DEBUG_STRING_SWAP(sSplit, sDebug) ruiDistY += uiSplitDistLuma; dRDCost += dSplitCost; if (pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { const Int xOffset = rTu.getRect( COMPONENT_Y ).x0; const Int yOffset = rTu.getRect( COMPONENT_Y ).y0; for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++) { if (bMaintainResidual[storedResidualIndex]) { xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSplit[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE); } } } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif return; } //----- set entropy coding status ----- m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_TEST ] ); //--- set transform index and Cbf values --- pcCU->setTrIdxSubParts( uiTrDepth, uiAbsPartIdx, uiFullDepth ); const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y); pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan ); pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan ); //--- set reconstruction for next intra prediction blocks --- const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx; const UInt uiWidth = tuRect.width; const UInt uiHeight = tuRect.height; Pel* piSrc = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( COMPONENT_Y, uiAbsPartIdx ); UInt uiSrcStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride ( COMPONENT_Y ); Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder ); UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride ( COMPONENT_Y ); for( UInt uiY = 0; uiY < uiHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { piDes[ uiX ] = piSrc[ uiX ]; } } } #if NH_3D_VSO // M42 if ( m_pcRdCost->getUseRenModel() && bCheckFull ) { UInt uiWidth = pcCU->getWidth ( 0 ) >> uiTrDepth; UInt uiHeight = pcCU->getHeight( 0 ) >> uiTrDepth; UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; Pel* piSrc = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( COMPONENT_Y, uiAbsPartIdx ); UInt uiSrcStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride( COMPONENT_Y ); m_pcRdCost->setRenModelData( pcCU, uiAbsPartIdx, piSrc, (Int) uiSrcStride, (Int) uiWidth, (Int) uiHeight ); } #endif ruiDistY += uiSingleDistLuma; dRDCost += dSingleCost; #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } #if NH_3D_DIS #if NH_3D_VSO Void TEncSearch::xIntraCodingDIS( TComDataCU* pcCU, UInt uiAbsPartIdx, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, Dist& ruiDist, Double& dRDCost, UInt uiPredMode ) #else Void TEncSearch::xIntraCodingDIS( TComDataCU* pcCU, UInt uiAbsPartIdx, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, Distortion& ruiDist, Double& dRDCost, UInt uiPredMode ) #endif { UInt uiWidth = pcCU ->getWidth ( 0 ); UInt uiHeight = pcCU ->getHeight ( 0 ); UInt uiStride = pcOrgYuv ->getStride (COMPONENT_Y); Pel* piOrg = pcOrgYuv ->getAddr(COMPONENT_Y, uiAbsPartIdx ); Pel* piPred = pcPredYuv->getAddr(COMPONENT_Y, uiAbsPartIdx ); AOF( uiWidth == uiHeight ); AOF( uiAbsPartIdx == 0 ); pcCU->setDISTypeSubParts((UChar)uiPredMode, uiAbsPartIdx, pcCU->getDepth(0)); //===== reconstruction ===== TComTURecurse rTu(pcCU, 0); const ChromaFormat chFmt = rTu.GetChromaFormat(); DEBUG_STRING_NEW(sTemp) if ( uiPredMode == 0 ) { const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, VER_IDX, uiWidth, uiHeight, chFmt, pcCU->getSlice()->getSPS()->getSpsRangeExtension().getIntraSmoothingDisabledFlag()); initIntraPatternChType( rTu, COMPONENT_Y, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sTemp) ); predIntraAng( COMPONENT_Y, VER_IDX, 0, uiStride, piPred, uiStride, rTu, bUseFilteredPredictions ); } else if ( uiPredMode == 1 ) { const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, HOR_IDX, uiWidth, uiHeight, chFmt, pcCU->getSlice()->getSPS()->getSpsRangeExtension().getIntraSmoothingDisabledFlag()); initIntraPatternChType( rTu, COMPONENT_Y, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sTemp) ); predIntraAng( COMPONENT_Y, HOR_IDX, 0, uiStride, piPred, uiStride, rTu, bUseFilteredPredictions ); } else if ( uiPredMode == 2 ) { Pel pSingleDepth = 1 << ( pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 1 ); pcCU->getNeighDepth ( 0, 0, &pSingleDepth, 0 ); for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { piPred[ uiX ] = pSingleDepth; } piPred+= uiStride; } } else if ( uiPredMode == 3 ) { Pel pSingleDepth = 1 << ( pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 1 ); pcCU->getNeighDepth ( 0, 0, &pSingleDepth, 1 ); for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { piPred[ uiX ] = pSingleDepth; } piPred+= uiStride; } } // clear UV UInt uiStrideC = pcPredYuv->getStride(COMPONENT_Cb); Pel *pRecCb = pcPredYuv->getAddr(COMPONENT_Cb); Pel *pRecCr = pcPredYuv->getAddr(COMPONENT_Cr); for (Int y=0; ygetSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_CHROMA)-1); pRecCr[x] = 1<<(pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_CHROMA)-1); } pRecCb += uiStrideC; pRecCr += uiStrideC; } piPred = pcPredYuv->getAddr(COMPONENT_Y, uiAbsPartIdx ); //===== determine distortion ===== #if NH_3D_VSO if ( m_pcRdCost->getUseVSO() ) ruiDist = m_pcRdCost->getDistPartVSO ( pcCU, uiAbsPartIdx, pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), piPred, uiStride, piOrg, uiStride, uiWidth, uiHeight, false ); else #endif ruiDist = m_pcRdCost->getDistPart(pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), piPred, uiStride, piOrg, uiStride, uiWidth, uiHeight, COMPONENT_Y ); //===== determine rate and r-d cost ===== m_pcEntropyCoder->resetBits(); m_pcEntropyCoder->encodeDIS( pcCU, 0, true ); UInt uiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO if ( m_pcRdCost->getUseLambdaScaleVSO()) dRDCost = m_pcRdCost->calcRdCostVSO( uiBits, ruiDist ); else #endif dRDCost = m_pcRdCost->calcRdCost( uiBits, ruiDist ); } #endif #if NH_3D_SDC_INTRA #if NH_3D_VSO Void TEncSearch::xIntraCodingSDC( TComDataCU* pcCU, UInt uiAbsPartIdx, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, Dist& ruiDist, Double& dRDCost, Bool bZeroResidual, Int iSDCDeltaResi ) #else Void TEncSearch::xIntraCodingSDC( TComDataCU* pcCU, UInt uiAbsPartIdx, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, Distortion& ruiDist, Double& dRDCost, Bool bZeroResidual, Int iSDCDeltaResi ) #endif { UInt uiWidth = pcCU->getWidth ( 0 ); UInt uiHeight = pcCU->getHeight( 0 ); UInt uiLumaPredMode = pcCU->getIntraDir( CHANNEL_TYPE_LUMA, uiAbsPartIdx ); const Int bitDepthY = pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); const TComSPS &sps = *(pcCU->getSlice()->getSPS()); const ChromaFormat chFmt = pcCU->getPic()->getChromaFormat(); UInt sdcDepth = 0; UInt uiStride; Pel* piOrg; Pel* piPred; Pel* piReco; Pel* piRecIPred; UInt uiRecIPredStride; Pel apDCPredValues[2]; Pel apDCOrigValues[2]; UInt uiNumSegments; Bool* pbMask = NULL; UInt uiMaskStride = 0; #if NH_3D_DMM if( isDmmMode( uiLumaPredMode ) ) { assert( uiWidth == uiHeight ); assert( uiWidth >= DMM_MIN_SIZE && uiWidth <= DMM_MAX_SIZE ); assert( !(( uiWidth >> pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() ) > 1) ); uiNumSegments = 2; uiStride = pcOrgYuv ->getStride( COMPONENT_Y ); piOrg = pcOrgYuv ->getAddr ( COMPONENT_Y, uiAbsPartIdx ); piPred = pcPredYuv->getAddr ( COMPONENT_Y, uiAbsPartIdx ); piReco = pcPredYuv->getAddr ( COMPONENT_Y, uiAbsPartIdx ); piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr ( COMPONENT_Y, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu() + uiAbsPartIdx ); uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y ); //===== init availability pattern ===== TComTURecurse tuRecurseCU(pcCU, 0); TComTURecurse tuRecurseWithPU(tuRecurseCU, false, TComTU::DONT_SPLIT); DEBUG_STRING_NEW(sTemp) initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, false DEBUG_STRING_PASS_INTO(sTemp) ); // get partition pbMask = new Bool[ uiWidth*uiHeight ]; uiMaskStride = uiWidth; switch( getDmmType( uiLumaPredMode ) ) { case( DMM1_IDX ): { (getWedgeListScaled( uiWidth )->at( pcCU->getDmm1WedgeTabIdx( uiAbsPartIdx ) )).getPatternScaledCopy( uiWidth, pbMask ); } break; case( DMM4_IDX ): { predContourFromTex( pcCU, uiAbsPartIdx, uiWidth, uiHeight, pbMask ); } break; default: assert(0); } // get predicted partition values Pel predDC1 = 0, predDC2 = 0; predBiSegDCs( pcCU, uiAbsPartIdx, uiWidth, uiHeight, pbMask, uiMaskStride, predDC1, predDC2 ); // set prediction signal Pel* pDst = piPred; assignBiSegDCs( pDst, uiStride, pbMask, uiMaskStride, predDC1, predDC2 ); apDCPredValues[0] = predDC1; apDCPredValues[1] = predDC2; // get original partition values xCalcBiSegDCs( piOrg, uiStride, pbMask, uiMaskStride, apDCOrigValues[0], apDCOrigValues[1], 0, (uiMaskStride > 16) ); } else // regular HEVC intra modes { #endif uiNumSegments = 1; if( ( uiWidth >> pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() ) > 1 ) { sdcDepth = g_aucConvertToBit[uiWidth] + 2 - pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize(); } //===== loop over partitions ===== TComTURecurse tuRecurseCU(pcCU, 0); TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (sdcDepth==0)?TComTU::DONT_SPLIT:TComTU::QUAD_SPLIT); do { const TComRectangle &puRect = tuRecurseWithPU.getRect(COMPONENT_Y); const UInt uiAbsPartIdxTU = tuRecurseWithPU.GetAbsPartIdxTU(); Pel* piOrgTU = pcOrgYuv ->getAddr ( COMPONENT_Y, uiAbsPartIdxTU ); Pel* piPredTU = pcPredYuv->getAddr ( COMPONENT_Y, uiAbsPartIdxTU ); UInt uiStrideTU = pcPredYuv->getStride ( COMPONENT_Y ); Pel* piRecIPredTU = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu() + uiAbsPartIdxTU ); UInt uiRecIPredStrideTU = pcCU->getPic()->getPicYuvRec()->getStride(COMPONENT_Y); const Bool bUseFilter = TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiLumaPredMode, puRect.width, puRect.height, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag()); //===== init pattern for luma prediction ===== DEBUG_STRING_NEW(sTemp2) initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, bUseFilter DEBUG_STRING_PASS_INTO(sTemp2) ); predIntraAng( COMPONENT_Y, uiLumaPredMode, piOrgTU, uiStrideTU, piPredTU, uiStrideTU, tuRecurseWithPU, bUseFilter ); // copy for prediction of next part for( UInt uiY = 0; uiY < puRect.height; uiY++ ) { for( UInt uiX = 0; uiX < puRect.width; uiX++ ) { piPredTU [ uiX ] = ClipBD( piPredTU[ uiX ], bitDepthY ); piRecIPredTU [ uiX ] = piPredTU[ uiX ]; } piPredTU += uiStrideTU; piRecIPredTU += uiRecIPredStrideTU; } } while (tuRecurseWithPU.nextSection(tuRecurseCU)); // reset to full block uiWidth = pcCU->getWidth( 0 ); uiHeight = pcCU->getHeight( 0 ); uiStride = pcOrgYuv ->getStride( COMPONENT_Y ); piOrg = pcOrgYuv ->getAddr ( COMPONENT_Y, uiAbsPartIdx ); piPred = pcPredYuv->getAddr ( COMPONENT_Y, uiAbsPartIdx ); piReco = pcPredYuv->getAddr ( COMPONENT_Y, uiAbsPartIdx ); piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr ( COMPONENT_Y, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu() + uiAbsPartIdx ); uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y ); // get predicted and original DC predConstantSDC( piPred, uiStride, uiWidth, apDCPredValues[0] ); apDCPredValues[1] = 0; xCalcConstantSDC( piOrg, uiStride, uiWidth, apDCOrigValues[0] ); apDCOrigValues[1] = 0; #if NH_3D_DMM } #endif for( UInt uiSegment = 0; uiSegment < uiNumSegments; uiSegment++ ) { // remap reconstructed value to valid depth values Pel pDCRec = ( !bZeroResidual ) ? apDCOrigValues[uiSegment] : apDCPredValues[uiSegment]; // get residual (idx) #if NH_3D_DLT Pel pResidualIdx = pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), pDCRec ) - pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), apDCPredValues[uiSegment] ); #else Pel pResidualIdx = pDCRec - apDCPredValues[uiSegment]; #endif if( !bZeroResidual ) { #if NH_3D_DLT Pel pPredIdx = pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), apDCPredValues[uiSegment] ); Int pTestIdx = pPredIdx + pResidualIdx + iSDCDeltaResi; if( pTestIdx >= 0 && pTestIdx < pcCU->getSlice()->getPPS()->getDLT()->getNumDepthValues( pcCU->getSlice()->getLayerIdInVps() ) ) { pResidualIdx += iSDCDeltaResi; } #else pResidualIdx += iSDCDeltaResi; #endif } // save SDC DC offset pcCU->setSDCSegmentDCOffset(pResidualIdx, uiSegment, uiAbsPartIdx); } // reconstruct residual based on mask + DC residuals Pel apDCResiValues[2]; for( UInt uiSegment = 0; uiSegment < uiNumSegments; uiSegment++ ) { #if NH_3D_DLT Pel pPredIdx = pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), apDCPredValues[uiSegment] ); Pel pResiIdx = pcCU->getSDCSegmentDCOffset(uiSegment, uiAbsPartIdx); Pel pRecoValue = pcCU->getSlice()->getPPS()->getDLT()->idx2DepthValue( pcCU->getSlice()->getLayerIdInVps(), pPredIdx + pResiIdx ); apDCResiValues[uiSegment] = pRecoValue - apDCPredValues[uiSegment]; #else apDCResiValues[uiSegment] = pcCU->getSDCSegmentDCOffset(uiSegment, uiAbsPartIdx); #endif } //===== reconstruction ===== Bool* pMask = pbMask; Pel* pPred = piPred; Pel* pReco = piReco; Pel* pRecIPred = piRecIPred; for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { UChar ucSegment = pMask?(UChar)pMask[uiX]:0; assert( ucSegment < uiNumSegments ); Pel pResiDC = apDCResiValues[ucSegment]; pReco [ uiX ] = ClipBD( pPred[ uiX ] + pResiDC, bitDepthY); pRecIPred[ uiX ] = pReco[ uiX ]; } pPred += uiStride; pReco += uiStride; pRecIPred += uiRecIPredStride; pMask += uiMaskStride; } // clear chroma UInt uiStrideC = pcPredYuv->getStride( COMPONENT_Cb ); Pel *pRecCb = pcPredYuv->getAddr ( COMPONENT_Cb, uiAbsPartIdx ); Pel *pRecCr = pcPredYuv->getAddr ( COMPONENT_Cr, uiAbsPartIdx ); for (Int y=0; ygetUseVSO() ) { ruiDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPartIdx, bitDepthY, piPred, uiStride, piOrg, uiStride, uiWidth, uiHeight, false ); } else #endif { #if NH_3D_FIX_INTRA_SDC_VSO_OFF ruiDist = m_pcRdCost->getDistPart( bitDepthY, piPred, uiStride, piOrg, uiStride, uiWidth, uiHeight, COMPONENT_Y ); #else ruiDist = m_pcRdCost->getDistPart( bitDepthY, piPred, uiStride, piOrg, uiStride, uiWidth, uiHeight, COMPONENT_Y, DF_SAD ); #endif } //===== determine rate and r-d cost ===== m_pcEntropyCoder->resetBits(); // encode reduced intra header if( !pcCU->getSlice()->isIntra() ) { if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()) { m_pcEntropyCoder->encodeCUTransquantBypassFlag( pcCU, 0, true ); } m_pcEntropyCoder->encodeSkipFlag( pcCU, 0, true ); m_pcEntropyCoder->encodePredMode( pcCU, 0, true ); } m_pcEntropyCoder->encodePartSize( pcCU, 0, pcCU->getDepth( 0 ), true ); // encode pred direction + DC residual data m_pcEntropyCoder->encodePredInfo( pcCU, 0 ); m_pcEntropyCoder->encodeSDCFlag( pcCU, 0, true ); Bool bDummy = false; m_pcEntropyCoder->encodeCoeff( pcCU, 0, pcCU->getDepth( 0 ), bDummy, bDummy ); UInt uiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO if ( m_pcRdCost->getUseLambdaScaleVSO()) dRDCost = m_pcRdCost->calcRdCostVSO( uiBits, ruiDist ); else #endif dRDCost = m_pcRdCost->calcRdCost( uiBits, ruiDist ); } #endif Void TEncSearch::xSetIntraResultLumaQT(TComYuv* pcRecoYuv, TComTU &rTu) { TComDataCU *pcCU = rTu.getCU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); if( uiTrMode == uiTrDepth ) { UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; //===== copy transform coefficients ===== const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y); const UInt coeffOffset = rTu.getCoefficientOffset(COMPONENT_Y); const UInt numCoeffInBlock = tuRect.width * tuRect.height; if (numCoeffInBlock!=0) { const TCoeff* srcCoeff = m_ppcQTTempCoeff[COMPONENT_Y][uiQTLayer] + coeffOffset; TCoeff* destCoeff = pcCU->getCoeff(COMPONENT_Y) + coeffOffset; ::memcpy( destCoeff, srcCoeff, sizeof(TCoeff)*numCoeffInBlock ); #if ADAPTIVE_QP_SELECTION const TCoeff* srcArlCoeff = m_ppcQTTempArlCoeff[COMPONENT_Y][ uiQTLayer ] + coeffOffset; TCoeff* destArlCoeff = pcCU->getArlCoeff (COMPONENT_Y) + coeffOffset; ::memcpy( destArlCoeff, srcArlCoeff, sizeof( TCoeff ) * numCoeffInBlock ); #endif m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Y, pcRecoYuv, uiAbsPartIdx, tuRect.width, tuRect.height ); } } else { TComTURecurse tuRecurseChild(rTu, false); do { xSetIntraResultLumaQT( pcRecoYuv, tuRecurseChild ); } while (tuRecurseChild.nextSection(rTu)); } } Void TEncSearch::xStoreIntraResultQT(const ComponentID compID, TComTU &rTu ) { TComDataCU *pcCU=rTu.getCU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); if ( compID==COMPONENT_Y || uiTrMode == uiTrDepth ) { assert(uiTrMode == uiTrDepth); const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; if (rTu.ProcessComponentSection(compID)) { const TComRectangle &tuRect=rTu.getRect(compID); //===== copy transform coefficients ===== const UInt uiNumCoeff = tuRect.width * tuRect.height; TCoeff* pcCoeffSrc = m_ppcQTTempCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID); TCoeff* pcCoeffDst = m_pcQTTempTUCoeff[compID]; ::memcpy( pcCoeffDst, pcCoeffSrc, sizeof( TCoeff ) * uiNumCoeff ); #if ADAPTIVE_QP_SELECTION TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID); TCoeff* pcArlCoeffDst = m_ppcQTTempTUArlCoeff[compID]; ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeff ); #endif //===== copy reconstruction ===== m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( compID, &m_pcQTTempTransformSkipTComYuv, uiAbsPartIdx, tuRect.width, tuRect.height ); } } } Void TEncSearch::xLoadIntraResultQT(const ComponentID compID, TComTU &rTu) { TComDataCU *pcCU=rTu.getCU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); if ( compID==COMPONENT_Y || uiTrMode == uiTrDepth ) { assert(uiTrMode == uiTrDepth); const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx; if (rTu.ProcessComponentSection(compID)) { const TComRectangle &tuRect=rTu.getRect(compID); //===== copy transform coefficients ===== const UInt uiNumCoeff = tuRect.width * tuRect.height; TCoeff* pcCoeffDst = m_ppcQTTempCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID); TCoeff* pcCoeffSrc = m_pcQTTempTUCoeff[compID]; ::memcpy( pcCoeffDst, pcCoeffSrc, sizeof( TCoeff ) * uiNumCoeff ); #if ADAPTIVE_QP_SELECTION TCoeff* pcArlCoeffDst = m_ppcQTTempArlCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID); TCoeff* pcArlCoeffSrc = m_ppcQTTempTUArlCoeff[compID]; ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeff ); #endif //===== copy reconstruction ===== m_pcQTTempTransformSkipTComYuv.copyPartToPartComponent( compID, &m_pcQTTempTComYuv[ uiQTLayer ], uiAbsPartIdx, tuRect.width, tuRect.height ); Pel* piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder ); UInt uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride (compID); Pel* piRecQt = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( compID, uiAbsPartIdx ); UInt uiRecQtStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride (compID); UInt uiWidth = tuRect.width; UInt uiHeight = tuRect.height; Pel* pRecQt = piRecQt; Pel* pRecIPred = piRecIPred; for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { pRecIPred[ uiX ] = pRecQt [ uiX ]; } pRecQt += uiRecQtStride; pRecIPred += uiRecIPredStride; } } } } Void TEncSearch::xStoreCrossComponentPredictionResult( Pel *pResiDst, const Pel *pResiSrc, TComTU &rTu, const Int xOffset, const Int yOffset, const Int strideDst, const Int strideSrc ) { const Pel *pSrc = pResiSrc + yOffset * strideSrc + xOffset; Pel *pDst = pResiDst + yOffset * strideDst + xOffset; for( Int y = 0; y < rTu.getRect( COMPONENT_Y ).height; y++ ) { ::memcpy( pDst, pSrc, sizeof(Pel) * rTu.getRect( COMPONENT_Y ).width ); pDst += strideDst; pSrc += strideSrc; } } SChar TEncSearch::xCalcCrossComponentPredictionAlpha( TComTU &rTu, const ComponentID compID, const Pel* piResiL, const Pel* piResiC, const Int width, const Int height, const Int strideL, const Int strideC ) { const Pel *pResiL = piResiL; const Pel *pResiC = piResiC; TComDataCU *pCU = rTu.getCU(); const Int absPartIdx = rTu.GetAbsPartIdxTU( compID ); const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth(); SChar alpha = 0; Int SSxy = 0; Int SSxx = 0; for( UInt uiY = 0; uiY < height; uiY++ ) { for( UInt uiX = 0; uiX < width; uiX++ ) { const Pel scaledResiL = rightShift( pResiL[ uiX ], diffBitDepth ); SSxy += ( scaledResiL * pResiC[ uiX ] ); SSxx += ( scaledResiL * scaledResiL ); } pResiL += strideL; pResiC += strideC; } if( SSxx != 0 ) { Double dAlpha = SSxy / Double( SSxx ); alpha = SChar(Clip3(-16, 16, (Int)(dAlpha * 16))); static const SChar alphaQuant[17] = {0, 1, 1, 2, 2, 2, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8}; alpha = (alpha < 0) ? -alphaQuant[Int(-alpha)] : alphaQuant[Int(alpha)]; } pCU->setCrossComponentPredictionAlphaPartRange( alpha, compID, absPartIdx, rTu.GetAbsPartIdxNumParts( compID ) ); return alpha; } Void TEncSearch::xRecurIntraChromaCodingQT(TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE], #if NH_3D_VSO Dist& ruiDist, #else Distortion& ruiDist, #endif TComTU& rTu DEBUG_STRING_FN_DECLARE(sDebug)) { TComDataCU *pcCU = rTu.getCU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const ChromaFormat format = rTu.GetChromaFormat(); UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); const UInt numberValidComponents = getNumberValidComponents(format); if( uiTrMode == uiTrDepth ) { if (!rTu.ProcessChannelSection(CHANNEL_TYPE_CHROMA)) { return; } const UInt uiFullDepth = rTu.GetTransformDepthTotal(); Bool checkTransformSkip = pcCU->getSlice()->getPPS()->getUseTransformSkip(); checkTransformSkip &= TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(COMPONENT_Cb), pcCU->getSlice()->getPPS()->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize()); if ( m_pcEncCfg->getUseTransformSkipFast() ) { checkTransformSkip &= TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(COMPONENT_Y), pcCU->getSlice()->getPPS()->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize()); if (checkTransformSkip) { Int nbLumaSkip = 0; const UInt maxAbsPartIdxSub=uiAbsPartIdx + (rTu.ProcessingAllQuadrants(COMPONENT_Cb)?1:4); for(UInt absPartIdxSub = uiAbsPartIdx; absPartIdxSub < maxAbsPartIdxSub; absPartIdxSub ++) { nbLumaSkip += pcCU->getTransformSkip(absPartIdxSub, COMPONENT_Y); } checkTransformSkip &= (nbLumaSkip > 0); } } for (UInt ch=COMPONENT_Cb; chstore( m_pppcRDSbacCoder[uiFullDepth][CI_QT_TRAFO_ROOT] ); const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height; TComTURecurse TUIterator(rTu, false, (splitIntoSubTUs ? TComTU::VERTICAL_SPLIT : TComTU::DONT_SPLIT), true, compID); const UInt partIdxesPerSubTU = TUIterator.GetAbsPartIdxNumParts(compID); do { const UInt subTUAbsPartIdx = TUIterator.GetAbsPartIdxTU(compID); Double dSingleCost = MAX_DOUBLE; Int bestModeId = 0; #if NH_3D_VSO Dist singleDistC = 0; #else Distortion singleDistC = 0; #endif UInt singleCbfC = 0; #if NH_3D_VSO Dist singleDistCTmp = 0; #else Distortion singleDistCTmp = 0; #endif Double singleCostTmp = 0; UInt singleCbfCTmp = 0; SChar bestCrossCPredictionAlpha = 0; Int bestTransformSkipMode = 0; const Bool checkCrossComponentPrediction = (pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, subTUAbsPartIdx) == DM_CHROMA_IDX) && pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && (pcCU->getCbf(subTUAbsPartIdx, COMPONENT_Y, uiTrDepth) != 0); const Int crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1; const Int transformSkipModesToTest = checkTransformSkip ? 2 : 1; const Int totalModesToTest = crossCPredictionModesToTest * transformSkipModesToTest; Int currModeId = 0; Int default0Save1Load2 = 0; for(Int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++) { for(Int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++) { pcCU->setCrossComponentPredictionAlphaPartRange(0, compID, subTUAbsPartIdx, partIdxesPerSubTU); DEBUG_STRING_NEW(sDebugMode) pcCU->setTransformSkipPartRange( transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU ); currModeId++; const Bool isOneMode = (totalModesToTest == 1); const Bool isLastMode = (currModeId == totalModesToTest); // currModeId is indexed from 1 if (isOneMode) { default0Save1Load2 = 0; } else if (!isOneMode && (transformSkipModeId == 0) && (crossCPredictionModeId == 0)) { default0Save1Load2 = 1; //save prediction on first mode } else { default0Save1Load2 = 2; //load it on subsequent modes } singleDistCTmp = 0; xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, (crossCPredictionModeId != 0), singleDistCTmp, compID, TUIterator DEBUG_STRING_PASS_INTO(sDebugMode), default0Save1Load2); singleCbfCTmp = pcCU->getCbf( subTUAbsPartIdx, compID, uiTrDepth); if ( ((crossCPredictionModeId == 1) && (pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) == 0)) || ((transformSkipModeId == 1) && (singleCbfCTmp == 0))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden. { singleCostTmp = MAX_DOUBLE; } else if (!isOneMode) { UInt bitsTmp = xGetIntraBitsQTChroma( TUIterator, compID, false ); singleCostTmp = m_pcRdCost->calcRdCost( bitsTmp, singleDistCTmp); } if(singleCostTmp < dSingleCost) { DEBUG_STRING_SWAP(sDebugBestMode, sDebugMode) dSingleCost = singleCostTmp; singleDistC = singleDistCTmp; bestCrossCPredictionAlpha = (crossCPredictionModeId != 0) ? pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) : 0; bestTransformSkipMode = transformSkipModeId; bestModeId = currModeId; singleCbfC = singleCbfCTmp; if (!isOneMode && !isLastMode) { xStoreIntraResultQT(compID, TUIterator); m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiFullDepth ][ CI_TEMP_BEST ] ); } } if (!isOneMode && !isLastMode) { m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] ); } } } if(bestModeId < totalModesToTest) { xLoadIntraResultQT(compID, TUIterator); pcCU->setCbfPartRange( singleCbfC << uiTrDepth, compID, subTUAbsPartIdx, partIdxesPerSubTU ); m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiFullDepth ][ CI_TEMP_BEST ] ); } DEBUG_STRING_APPEND(sDebug, sDebugBestMode) pcCU ->setTransformSkipPartRange ( bestTransformSkipMode, compID, subTUAbsPartIdx, partIdxesPerSubTU ); pcCU ->setCrossComponentPredictionAlphaPartRange( bestCrossCPredictionAlpha, compID, subTUAbsPartIdx, partIdxesPerSubTU ); ruiDist += singleDistC; } while (TUIterator.nextSection(rTu)); if (splitIntoSubTUs) { offsetSubTUCBFs(rTu, compID); } } } else { UInt uiSplitCbf[MAX_NUM_COMPONENT] = {0,0,0}; TComTURecurse tuRecurseChild(rTu, false); const UInt uiTrDepthChild = tuRecurseChild.GetTransformDepthRel(); do { DEBUG_STRING_NEW(sChild) xRecurIntraChromaCodingQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, ruiDist, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) ); DEBUG_STRING_APPEND(sDebug, sChild) const UInt uiAbsPartIdxSub=tuRecurseChild.GetAbsPartIdxTU(); for(UInt ch=COMPONENT_Cb; chgetCbf( uiAbsPartIdxSub, ComponentID(ch), uiTrDepthChild ); } } while ( tuRecurseChild.nextSection(rTu) ); UInt uiPartsDiv = rTu.GetAbsPartIdxNumParts(); for(UInt ch=COMPONENT_Cb; chgetCbf( compID ); for( UInt uiOffs = 0; uiOffs < uiPartsDiv; uiOffs++ ) { pBase[ uiAbsPartIdx + uiOffs ] |= flag; } } } } } Void TEncSearch::xSetIntraResultChromaQT(TComYuv* pcRecoYuv, TComTU &rTu) { if (!rTu.ProcessChannelSection(CHANNEL_TYPE_CHROMA)) { return; } TComDataCU *pcCU=rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiTrDepth = rTu.GetTransformDepthRel(); UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); if( uiTrMode == uiTrDepth ) { UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; //===== copy transform coefficients ===== const TComRectangle &tuRectCb=rTu.getRect(COMPONENT_Cb); UInt uiNumCoeffC = tuRectCb.width*tuRectCb.height;//( pcCU->getSlice()->getSPS()->getMaxCUWidth() * pcCU->getSlice()->getSPS()->getMaxCUHeight() ) >> ( uiFullDepth << 1 ); const UInt offset = rTu.getCoefficientOffset(COMPONENT_Cb); const UInt numberValidComponents = getNumberValidComponents(rTu.GetChromaFormat()); for (UInt ch=COMPONENT_Cb; chgetCoeff(component) + offset;//(uiNumCoeffIncC*uiAbsPartIdx); ::memcpy( dest, src, sizeof(TCoeff)*uiNumCoeffC ); #if ADAPTIVE_QP_SELECTION TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[component][ uiQTLayer ] + offset;//( uiNumCoeffIncC * uiAbsPartIdx ); TCoeff* pcArlCoeffDst = pcCU->getArlCoeff(component) + offset;//( uiNumCoeffIncC * uiAbsPartIdx ); ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeffC ); #endif } //===== copy reconstruction ===== m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Cb, pcRecoYuv, uiAbsPartIdx, tuRectCb.width, tuRectCb.height ); m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Cr, pcRecoYuv, uiAbsPartIdx, tuRectCb.width, tuRectCb.height ); } else { TComTURecurse tuRecurseChild(rTu, false); do { xSetIntraResultChromaQT( pcRecoYuv, tuRecurseChild ); } while (tuRecurseChild.nextSection(rTu)); } } #if NH_3D_DIS Void TEncSearch::estIntraPredDIS( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, UInt& ruiDistC, Bool bLumaOnly ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "estIntraPredDis"); #endif UInt uiDepth = pcCU->getDepth(0); UInt uiWidth = pcCU->getWidth (0); UInt uiHeight = pcCU->getHeight(0); #if NH_3D_VSO // M36 Pel* piOrg = pcOrgYuv ->getAddr(COMPONENT_Y, 0, uiWidth ); UInt uiStride = pcPredYuv->getStride(COMPONENT_Y); Dist uiDist = 0; Double dCost = 0.0; Dist uiBestDist = 0; Double dBestCost = MAX_DOUBLE; UInt uiBestDISType = 0; #else Distortion uiDist = 0; Double dCost = 0.0; Distortion uiBestDist = 0; Double dBestCost = MAX_DOUBLE; UInt uiBestDISType = 0; #endif for( UInt uiPredMode = 0; uiPredMode < 4 ; uiPredMode++ ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "uiPredMode" + n2s(uiPredMode ) ); #endif // set context models m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); // determine residual for partition uiDist = 0; dCost = 0.0; #if NH_3D_VSO // M36 if( m_pcRdCost->getUseRenModel() ) { m_pcRdCost->setRenModelData( pcCU, 0, piOrg, uiStride, uiWidth, uiHeight ); } #endif xIntraCodingDIS(pcCU, 0, pcOrgYuv, pcPredYuv, uiDist, dCost, uiPredMode); // check r-d cost if( dCost < dBestCost ) { uiBestDist = uiDist; dBestCost = dCost; uiBestDISType = pcCU->getDISType(0); // copy reconstruction pcPredYuv->copyPartToPartYuv(pcRecoYuv, 0, uiWidth, uiHeight); } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } pcCU->setDISTypeSubParts((UChar)uiBestDISType, 0, uiDepth); //===== reset context models ===== m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); //===== set distortion (rate and r-d costs are determined later) ===== pcCU->getTotalDistortion() = uiBestDist; #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } #endif Void TEncSearch::estIntraPredLumaQT(TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE] DEBUG_STRING_FN_DECLARE(sDebug) #if NH_3D_ENC_DEPTH , Bool bOnlyIVP #endif ) { #if NH_MV D_PRINT_INC_INDENT( g_traceModeCheck, "estIntraPredLumaQT"); #endif const UInt uiDepth = pcCU->getDepth(0); const UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; const UInt uiNumPU = 1<<(2*uiInitTrDepth); const UInt uiQNumParts = pcCU->getTotalNumPart() >> 2; const UInt uiWidthBit = pcCU->getIntraSizeIdx(0); const ChromaFormat chFmt = pcCU->getPic()->getChromaFormat(); const UInt numberValidComponents = getNumberValidComponents(chFmt); const TComSPS &sps = *(pcCU->getSlice()->getSPS()); const TComPPS &pps = *(pcCU->getSlice()->getPPS()); #if NH_3D_VSO Dist uiOverallDistY = 0; #else Distortion uiOverallDistY = 0; #endif UInt CandNum; Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ]; Pel resiLumaPU[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]; Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES]; for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++) { bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise } bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate()); // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantisation divisor is 1. #if FULL_NBIT const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ? sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0))) : m_pcRdCost->getSqrtLambda(); #else const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ? sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (sps.getBitDepth(CHANNEL_TYPE_LUMA) - 8)) / 3.0))) : m_pcRdCost->getSqrtLambda(); #endif //===== set QP and clear Cbf ===== if ( pps.getUseDQP() == true) { pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth ); } else { pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth ); } //===== loop over partitions ===== TComTURecurse tuRecurseCU(pcCU, 0); TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT); do { const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU(); #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "uiPartOffset: " + n2s(uiPartOffset ) ); #endif // for( UInt uiPU = 0, uiPartOffset=0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts ) //{ //===== init pattern for luma prediction ===== DEBUG_STRING_NEW(sTemp2) //===== determine set of modes to be tested (using prediction signal only) ===== Int numModesAvailable = 35; //total number of Intra modes UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM]; Int numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled()?g_aucIntraModeNumFast_UseMPM[ uiWidthBit ] : g_aucIntraModeNumFast_NotUseMPM[ uiWidthBit ]; // this should always be true assert (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y)); initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) ); #if NH_3D_ENC_DEPTH if( bOnlyIVP ) { numModesForFullRD = 0; } else { #endif Bool doFastSearch = (numModesForFullRD != numModesAvailable); if (doFastSearch) { assert(numModesForFullRD < numModesAvailable); for( Int i=0; i < numModesForFullRD; i++ ) { CandCostList[ i ] = MAX_DOUBLE; } CandNum = 0; const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y); const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU(); Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx ); Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx ); UInt uiStride = pcPredYuv->getStride( COMPONENT_Y ); DistParam distParam; const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0; m_pcRdCost->setDistParam(distParam, sps.getBitDepth(CHANNEL_TYPE_LUMA), piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard); #if NH_3D distParam.bUseIC = false; #endif #if NH_3D_SDC_INTER distParam.bUseSDCMRSAD = false; #endif distParam.bApplyWeight = false; for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ ) { UInt uiMode = modeIdx; #if !NH_3D_VSO Distortion uiSad = 0; #endif #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "preTest; uiMode " + n2s(uiMode) ); #endif const Bool bUseFilter=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiMode, puRect.width, puRect.height, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag()); predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) ); #if NH_3D_VSO // M34 Dist uiSad; if ( m_pcRdCost->getUseVSO() ) { if ( m_pcRdCost->getUseEstimatedVSD() ) { uiSad = (Dist) ( m_pcRdCost->getDistPartVSD( pcCU, uiPartOffset, distParam.bitDepth , piPred, uiStride, piOrg, uiStride, distParam.iCols, distParam.iRows, true ) ); } else { uiSad = m_pcRdCost->getDistPartVSO( pcCU, uiPartOffset, distParam.bitDepth , piPred, uiStride, piOrg, uiStride, distParam.iCols, distParam.iRows, true ); } } else { uiSad = distParam.DistFunc(&distParam); } #else // use hadamard transform here uiSad+=distParam.DistFunc(&distParam); #endif UInt iModeBits = 0; // NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. iModeBits+=xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, CHANNEL_TYPE_LUMA ); #if NH_3D_VSO // M35 Double dLambda; if ( m_pcRdCost->getUseLambdaScaleVSO() ) { dLambda = m_pcRdCost->getUseRenModel() ? m_pcRdCost->getLambdaVSO() : sqrtLambdaForFirstPass; } else { dLambda = m_pcRdCost->getSqrtLambda(); } Double cost = (Double)uiSad + (Double)iModeBits * dLambda; #else Double cost = (Double)uiSad + (Double)iModeBits * sqrtLambdaForFirstPass; #endif #if DEBUG_INTRA_SEARCH_COSTS std::cout << "1st pass mode " << uiMode << " SAD = " << uiSad << ", mode bits = " << iModeBits << ", cost = " << cost << "\n"; #endif CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList ); #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } if (m_pcEncCfg->getFastUDIUseMPMEnabled()) { Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1}; Int iMode = -1; pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode ); const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES); for( Int j=0; j < numCand; j++) { Bool mostProbableModeIncluded = false; Int mostProbableMode = uiPreds[j]; for( Int i=0; i < numModesForFullRD; i++) { mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]); } if (!mostProbableModeIncluded) { uiRdModeList[numModesForFullRD++] = mostProbableMode; } } } } else { for( Int i=0; i < numModesForFullRD; i++) { uiRdModeList[i] = i; } } #if NH_3D_ENC_DEPTH } #endif #if NH_3D_DMM if( m_pcEncCfg->getIsDepth() ) { const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y); const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU(); Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx ); Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx ); UInt uiStride = pcPredYuv->getStride( COMPONENT_Y ); if( puRect.width >= DMM_MIN_SIZE && puRect.width <= DMM_MAX_SIZE && puRect.width == puRect.height && ((m_pcEncCfg->getUseDMM() && pcCU->getSlice()->getIntraSdcWedgeFlag()) || pcCU->getSlice()->getIntraContourFlag()) ) { #if NH_3D_ENC_DEPTH if( bOnlyIVP ) { Bool* dmm4Pattern = new Bool[ puRect.width*puRect.height ]; UInt patternStride = puRect.width; predContourFromTex( pcCU, uiPartOffset, puRect.width, puRect.height, dmm4Pattern ); Pel deltaDC1 = 0; Pel deltaDC2 = 0; xSearchDmmDeltaDCs( pcCU, uiPartOffset, piOrg, piPred, uiStride, dmm4Pattern, patternStride, puRect.width, puRect.height, deltaDC1, deltaDC2 ); pcCU->setDmmDeltaDC( DMM4_IDX, 0, uiPartOffset, deltaDC1 ); pcCU->setDmmDeltaDC( DMM4_IDX, 1, uiPartOffset, deltaDC2 ); uiRdModeList[ numModesForFullRD++ ] = (DMM4_IDX+DMM_OFFSET); delete[] dmm4Pattern; } else { Int threshold = max(((pcCU->getQP(0))>>3)-1,3); Int varThreshold = (Int)( threshold * threshold - 8 ); UInt varCU = m_pcRdCost->calcVAR( piOrg, uiStride, puRect.width, puRect.height, pcCU->getDepth(0), pcCU->getSlice()->getSPS()->getMaxCUWidth() ); if( uiRdModeList[0] != PLANAR_IDX || varCU >= varThreshold ) { #endif UInt startIdx = ( m_pcEncCfg->getUseDMM() && pcCU->getSlice()->getIntraSdcWedgeFlag() ) ? 0 : 1; UInt endIdx = ( pcCU->getSlice()->getIntraContourFlag() ) ? 1 : 0; for( UInt dmmType = startIdx; dmmType <= endIdx; dmmType++ ) { #if H_3D_FCO if ( !(pcCU->getSlice()->getIvPic(false, pcCU->getSlice()->getViewIndex() )->getReconMark()) && (DMM4_IDX == dmmType ) ) { continue; } #endif Bool* biSegPattern = new Bool[ puRect.width*puRect.height ]; UInt patternStride = puRect.width; Pel deltaDC1 = 0; Pel deltaDC2 = 0; switch( dmmType ) { case( DMM1_IDX ): { UInt uiTabIdx = 0; xSearchDmm1Wedge( pcCU, uiPartOffset, piOrg, uiStride, puRect.width, puRect.height, uiTabIdx ); pcCU->setDmm1WedgeTabIdxSubParts( uiTabIdx, uiPartOffset, uiDepth + uiInitTrDepth ); (getWedgeListScaled( puRect.width )->at( pcCU->getDmm1WedgeTabIdx( uiAbsPartIdx ) )).getPatternScaledCopy( puRect.width, biSegPattern ); } break; case( DMM4_IDX ): { predContourFromTex( pcCU, uiPartOffset, puRect.width, puRect.height, biSegPattern ); } break; default: assert(0); } if( biSegPattern ) { xSearchDmmDeltaDCs( pcCU, uiPartOffset, piOrg, piPred, uiStride, biSegPattern, patternStride, puRect.width, puRect.height, deltaDC1, deltaDC2 ); pcCU->setDmmDeltaDC( (DmmID)dmmType, 0, uiPartOffset, deltaDC1 ); pcCU->setDmmDeltaDC( (DmmID)dmmType, 1, uiPartOffset, deltaDC2 ); uiRdModeList[ numModesForFullRD++ ] = (dmmType+DMM_OFFSET); delete[] biSegPattern; } } #if NH_3D_ENC_DEPTH } } #endif } } #endif //===== check modes (using r-d costs) ===== #if HHI_RQT_INTRA_SPEEDUP_MOD UInt uiSecondBestMode = MAX_UINT; Double dSecondBestPUCost = MAX_DOUBLE; #endif DEBUG_STRING_NEW(sPU) UInt uiBestPUMode = 0; #if NH_3D_ENC_DEPTH UInt uiBestPUModeConv = 0; UInt uiSecondBestPUModeConv = 0; UInt uiThirdBestPUModeConv = 0; #endif #if NH_3D_VSO Dist uiBestPUDistY = 0; #else Distortion uiBestPUDistY = 0; #endif Double dBestPUCost = MAX_DOUBLE; #if NH_3D_ENC_DEPTH Double dBestPUCostConv = MAX_DOUBLE; UInt rdSDC = m_pcEncCfg->getIsDepth() ? numModesForFullRD : 0; #endif #if NH_3D_SDC_INTRA Bool bBestUseSDC = false; Pel apBestDCOffsets[2] = {0,0}; #endif #if NH_3D_ENC_DEPTH for( UInt uiMode = 0; uiMode < numModesForFullRD + rdSDC; uiMode++ ) #else #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST UInt max=numModesForFullRD; if (DebugOptionList::ForceLumaMode.isSet()) { max=0; // we are forcing a direction, so don't bother with mode check } for ( UInt uiMode = 0; uiMode < max; uiMode++) #else for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ ) #endif #endif { // set luma prediction mode #if !NH_3D_ENC_DEPTH UInt uiOrgMode = uiRdModeList[uiMode]; #endif #if NH_3D_ENC_DEPTH UInt uiOrgMode; if (uiMode < numModesForFullRD) { uiOrgMode = uiRdModeList[uiMode]; } else { const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y); const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU(); Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx ); UInt uiStride = pcPredYuv->getStride( COMPONENT_Y ); UInt varCU = m_pcRdCost->calcVAR(piOrg, uiStride, puRect.width, puRect.height, pcCU->getDepth(0), pcCU->getSlice()->getSPS()->getMaxCUWidth()); uiOrgMode = uiRdModeList[uiMode - numModesForFullRD]; if (uiBestPUModeConv <= 1 ) { if (uiOrgMode > 1 && varCU < 1) continue; } else { if (uiOrgMode != uiBestPUModeConv && uiOrgMode != uiSecondBestPUModeConv && uiOrgMode != uiThirdBestPUModeConv && uiOrgMode > 1 && uiOrgMode < NUM_INTRA_MODE && varCU < 4) continue; } } #endif #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "Test; uiOrgMode: " + n2s(uiOrgMode) ); #endif pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth ); #if NH_3D_SDC_INTRA #if NH_3D_ENC_DEPTH Bool bTestSDC = ( ( m_pcEncCfg->getUseSDC() && pcCU->getSlice()->getIntraSdcWedgeFlag() ) && pcCU->getSDCAvailable(uiPartOffset) && uiMode >= numModesForFullRD); #else Bool bTestSDC = ( m_pcEncCfg->getUseSDC() && pcCU->getSDCAvailable(uiPartOffset) ); #endif for( UInt uiSDC=0; uiSDC<=(bTestSDC?1:0); uiSDC++ ) { #if NH_3D_ENC_DEPTH if (!uiSDC && uiMode >= numModesForFullRD) continue; #endif pcCU->setSDCFlagSubParts( (uiSDC != 0), uiPartOffset, uiDepth + uiInitTrDepth ); Double dOffsetCost[3] = {MAX_DOUBLE,MAX_DOUBLE,MAX_DOUBLE}; for( Int iOffset = 1; iOffset <= 5; iOffset++ ) { Int iSDCDeltaResi = 0; if(iOffset % 2 == 0) { iSDCDeltaResi = iOffset >> 1; } else { iSDCDeltaResi = -1 * (iOffset >> 1); } if( ( uiSDC == 0 ) && iSDCDeltaResi != 0 ) { continue; } if( iOffset > 3) { if ( dOffsetCost[0] < (0.9*dOffsetCost[1]) && dOffsetCost[0] < (0.9*dOffsetCost[2]) ) { continue; } if ( dOffsetCost[1] < dOffsetCost[0] && dOffsetCost[0] < dOffsetCost[2] && iOffset == 5) { continue; } if ( dOffsetCost[0] < dOffsetCost[1] && dOffsetCost[2] < dOffsetCost[0] && iOffset == 4) { continue; } } #endif #if NH_3D_ENC_DEPTH Bool zeroResiTest = (pcCU->getSlice()->getIsDepth() && !pcCU->getSlice()->isIRAP()); #if NH_3D_SDC_INTRA zeroResiTest = zeroResiTest || pcCU->getSDCFlag(uiPartOffset); if( uiSDC != 0 && iSDCDeltaResi != 0 ) { zeroResiTest = false; } #endif for( UInt zeroResi = 0; zeroResi <= ( zeroResiTest ? 1 : 0 ); zeroResi++ ) { #endif DEBUG_STRING_NEW(sMode) // set context models m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); // determine residual for partition #if NH_3D_VSO Dist uiPUDistY = 0; #else Distortion uiPUDistY = 0; #endif Double dPUCost = 0.0; #if NH_3D_VSO // M36 if( m_pcRdCost->getUseRenModel() ) { m_pcRdCost->setRenModelData( pcCU, uiPartOffset, pcOrgYuv, &tuRecurseWithPU ); } #endif #if NH_3D_SDC_INTRA if( pcCU->getSDCFlag(uiPartOffset) ) { pcCU->setTrIdxSubParts(0, uiPartOffset, uiDepth + uiInitTrDepth); pcCU->setCbfSubParts(1, COMPONENT_Y, uiPartOffset, uiDepth + uiInitTrDepth); // start encoding with SDC xIntraCodingSDC(pcCU, uiPartOffset, pcOrgYuv, pcPredYuv, uiPUDistY, dPUCost, ( zeroResi != 0 ), iSDCDeltaResi ); if ( zeroResi == 0 && iOffset <= 3 ) { dOffsetCost [iOffset -1] = dPUCost; } } else { #endif #if HHI_RQT_INTRA_SPEEDUP #if NH_3D_ENC_DEPTH xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode), (zeroResi != 0) ); #if NH_3D_ENC_DEPTH if( dPUCost < dBestPUCostConv ) { uiThirdBestPUModeConv = uiSecondBestPUModeConv; uiSecondBestPUModeConv = uiBestPUModeConv; uiBestPUModeConv = uiOrgMode; dBestPUCostConv = dPUCost; } #endif #else xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) ); #endif #else #if NH_3D_ENC_DEPTH xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode), (zeroResi != 0) ); #else xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) ); #endif #endif #if NH_3D_SDC_INTRA } #endif #if DEBUG_INTRA_SEARCH_COSTS std::cout << "2nd pass [luma,chroma] mode [" << Int(pcCU->getIntraDir(CHANNEL_TYPE_LUMA, uiPartOffset)) << "," << Int(pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, uiPartOffset)) << "] cost = " << dPUCost << "\n"; #endif // check r-d cost if( dPUCost < dBestPUCost ) { DEBUG_STRING_SWAP(sPU, sMode) #if HHI_RQT_INTRA_SPEEDUP_MOD uiSecondBestMode = uiBestPUMode; dSecondBestPUCost = dBestPUCost; #endif uiBestPUMode = uiOrgMode; uiBestPUDistY = uiPUDistY; dBestPUCost = dPUCost; #if NH_3D_SDC_INTRA if( pcCU->getSDCFlag(uiPartOffset) ) { bBestUseSDC = true; // copy reconstruction UInt uiWidthPU = tuRecurseWithPU.getRect( COMPONENT_Y ).width; UInt uiHeightPU = tuRecurseWithPU.getRect( COMPONENT_Y ).height; UInt uiWidth = pcCU->getWidth (0) >> uiInitTrDepth; UInt uiHeight = pcCU->getHeight(0) >> uiInitTrDepth; AOF(uiWidth==uiWidthPU); AOF(uiHeight==uiHeightPU); pcPredYuv->copyPartToPartComponent(COMPONENT_Y, pcRecoYuv, uiPartOffset, uiWidth, uiHeight); // copy DC values apBestDCOffsets[0] = pcCU->getSDCSegmentDCOffset(0, uiPartOffset); apBestDCOffsets[1] = pcCU->getSDCSegmentDCOffset(1, uiPartOffset); } else { bBestUseSDC = false; #endif xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU ); if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0; const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0; for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++) { if (bMaintainResidual[storedResidualIndex]) { xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE ); } } } UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts(); ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) ); for (UInt component = 0; component < numberValidComponents; component++) { const ComponentID compID = ComponentID(component); ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); } #if NH_3D_SDC_INTRA } #endif } #if HHI_RQT_INTRA_SPEEDUP_MOD else if( dPUCost < dSecondBestPUCost ) { uiSecondBestMode = uiOrgMode; dSecondBestPUCost = dPUCost; } #endif #if NH_3D_ENC_DEPTH } #endif #if NH_3D_SDC_INTRA } // SDC residual loop } // SDC loop #endif #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } // Mode loop #if HHI_RQT_INTRA_SPEEDUP #if HHI_RQT_INTRA_SPEEDUP_MOD for( UInt ui =0; ui < 2; ++ui ) #endif { #if HHI_RQT_INTRA_SPEEDUP_MOD UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode; if( uiOrgMode == MAX_UINT ) { break; } #else UInt uiOrgMode = uiBestPUMode; #endif #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST if (DebugOptionList::ForceLumaMode.isSet()) { uiOrgMode = DebugOptionList::ForceLumaMode.getInt(); } #endif pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth ); #if NH_3D_SDC_INTRA pcCU->setSDCFlagSubParts(false, uiPartOffset, uiDepth + uiInitTrDepth); #endif DEBUG_STRING_NEW(sModeTree) // set context models m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); // determine residual for partition #if NH_3D_VSO Dist uiPUDistY = 0; #else Distortion uiPUDistY = 0; #endif Double dPUCost = 0.0; #if NH_3D_VSO // M37 //check if necessary // reset Model if( m_pcRdCost->getUseRenModel() ) { m_pcRdCost->setRenModelData( pcCU, uiPartOffset, pcOrgYuv, &tuRecurseWithPU ); } #endif xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree)); // check r-d cost if( dPUCost < dBestPUCost ) { DEBUG_STRING_SWAP(sPU, sModeTree) uiBestPUMode = uiOrgMode; uiBestPUDistY = uiPUDistY; dBestPUCost = dPUCost; #if NH_3D_SDC_INTRA bBestUseSDC = false; #endif xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU ); if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) { const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0; const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0; for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++) { if (bMaintainResidual[storedResidualIndex]) { xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE ); } } } const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts(); ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) ); for (UInt component = 0; component < numberValidComponents; component++) { const ComponentID compID = ComponentID(component); ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) ); } } } // Mode loop #endif DEBUG_STRING_APPEND(sDebug, sPU) //--- update overall distortion --- uiOverallDistY += uiBestPUDistY; #if NH_3D_SDC_INTRA if( bBestUseSDC ) { pcCU->setTrIdxSubParts(0, uiPartOffset, uiDepth + uiInitTrDepth); pcCU->setCbfSubParts(1, COMPONENT_Y, uiPartOffset, uiDepth + uiInitTrDepth); //=== copy best DC segment values back to CU ==== pcCU->setSDCSegmentDCOffset(apBestDCOffsets[0], 0, uiPartOffset); pcCU->setSDCSegmentDCOffset(apBestDCOffsets[1], 1, uiPartOffset); } else { #endif //--- update transform index and cbf --- const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts(); ::memcpy( pcCU->getTransformIdx() + uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) ); for (UInt component = 0; component < numberValidComponents; component++) { const ComponentID compID = ComponentID(component); ::memcpy( pcCU->getCbf( compID ) + uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getTransformSkip( compID ) + uiPartOffset, m_puhQTTempTransformSkipFlag[compID ], uiQPartNum * sizeof( UChar ) ); #if NH_3D_SDC_INTRA } #endif } //--- set reconstruction for next intra prediction blocks --- if( !tuRecurseWithPU.IsLastSection() ) { const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y); const UInt uiCompWidth = puRect.width; const UInt uiCompHeight = puRect.height; const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiPartOffset; Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder ); const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y); const Pel* piSrc = pcRecoYuv->getAddr( COMPONENT_Y, uiPartOffset ); const UInt uiSrcStride = pcRecoYuv->getStride( COMPONENT_Y); for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride ) { for( UInt uiX = 0; uiX < uiCompWidth; uiX++ ) { piDes[ uiX ] = piSrc[ uiX ]; } } #if NH_3D_VSO // M38 // set model if( m_pcRdCost->getUseRenModel() ) { m_pcRdCost->setRenModelData( pcCU, uiPartOffset, pcRecoYuv, &tuRecurseWithPU ); } #endif } //=== update PU data ==== pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth ); #if NH_3D_SDC_INTRA pcCU->setSDCFlagSubParts ( bBestUseSDC, uiPartOffset, uiDepth + uiInitTrDepth ); #endif #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } while (tuRecurseWithPU.nextSection(tuRecurseCU)); if( uiNumPU > 1 ) { // set Cbf for all blocks UInt uiCombCbfY = 0; UInt uiCombCbfU = 0; UInt uiCombCbfV = 0; UInt uiPartIdx = 0; for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts ) { uiCombCbfY |= pcCU->getCbf( uiPartIdx, COMPONENT_Y, 1 ); uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 ); uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 ); } for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ ) { pcCU->getCbf( COMPONENT_Y )[ uiOffs ] |= uiCombCbfY; pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU; pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV; } } //===== reset context models ===== m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); //===== set distortion (rate and r-d costs are determined later) ===== pcCU->getTotalDistortion() = uiOverallDistY; #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } Void TEncSearch::estIntraPredChromaQT(TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE] DEBUG_STRING_FN_DECLARE(sDebug)) { const UInt uiInitTrDepth = pcCU->getPartitionSize(0) != SIZE_2Nx2N && enable4ChromaPUsInIntraNxNCU(pcOrgYuv->getChromaFormat()) ? 1 : 0; TComTURecurse tuRecurseCU(pcCU, 0); TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT); const UInt uiQNumParts = tuRecurseWithPU.GetAbsPartIdxNumParts(); const UInt uiDepthCU=tuRecurseWithPU.getCUDepth(); const UInt numberValidComponents = pcCU->getPic()->getNumberValidComponents(); do { UInt uiBestMode = 0; #if NH_3D_VSO Dist uiBestDist = 0; #else Distortion uiBestDist = 0; #endif Double dBestCost = MAX_DOUBLE; //----- init mode list ----- if (tuRecurseWithPU.ProcessChannelSection(CHANNEL_TYPE_CHROMA)) { UInt uiModeList[FAST_UDI_MAX_RDMODE_NUM]; const UInt uiQPartNum = uiQNumParts; const UInt uiPartOffset = tuRecurseWithPU.GetAbsPartIdxTU(); { UInt uiMinMode = 0; UInt uiMaxMode = NUM_CHROMA_MODE; //----- check chroma modes ----- pcCU->getAllowedChromaDir( uiPartOffset, uiModeList ); #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST if (DebugOptionList::ForceChromaMode.isSet()) { uiMinMode=DebugOptionList::ForceChromaMode.getInt(); if (uiModeList[uiMinMode]==34) { uiMinMode=4; // if the fixed mode has been renumbered because DM_CHROMA covers it, use DM_CHROMA. } uiMaxMode=uiMinMode+1; } #endif DEBUG_STRING_NEW(sPU) for( UInt uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++ ) { //----- restore context models ----- m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepthCU][CI_CURR_BEST] ); DEBUG_STRING_NEW(sMode) //----- chroma coding ----- #if NH_3D_VSO Dist uiDist = 0; #else Distortion uiDist = 0; #endif pcCU->setIntraDirSubParts ( CHANNEL_TYPE_CHROMA, uiModeList[uiMode], uiPartOffset, uiDepthCU+uiInitTrDepth ); xRecurIntraChromaCodingQT ( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, uiDist, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) ); if( pcCU->getSlice()->getPPS()->getUseTransformSkip() ) { m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepthCU][CI_CURR_BEST] ); } UInt uiBits = xGetIntraBitsQT( tuRecurseWithPU, false, true, false ); Double dCost = m_pcRdCost->calcRdCost( uiBits, uiDist ); //----- compare ----- if( dCost < dBestCost ) { DEBUG_STRING_SWAP(sPU, sMode); dBestCost = dCost; uiBestDist = uiDist; uiBestMode = uiModeList[uiMode]; xSetIntraResultChromaQT( pcRecoYuv, tuRecurseWithPU ); for (UInt componentIndex = COMPONENT_Cb; componentIndex < numberValidComponents; componentIndex++) { const ComponentID compID = ComponentID(componentIndex); ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID )+uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip( compID )+uiPartOffset, uiQPartNum * sizeof( UChar ) ); ::memcpy( m_phQTTempCrossComponentPredictionAlpha[compID], pcCU->getCrossComponentPredictionAlpha(compID)+uiPartOffset, uiQPartNum * sizeof( SChar ) ); } } } DEBUG_STRING_APPEND(sDebug, sPU) //----- set data ----- for (UInt componentIndex = COMPONENT_Cb; componentIndex < numberValidComponents; componentIndex++) { const ComponentID compID = ComponentID(componentIndex); ::memcpy( pcCU->getCbf( compID )+uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getTransformSkip( compID )+uiPartOffset, m_puhQTTempTransformSkipFlag[compID], uiQPartNum * sizeof( UChar ) ); ::memcpy( pcCU->getCrossComponentPredictionAlpha(compID)+uiPartOffset, m_phQTTempCrossComponentPredictionAlpha[compID], uiQPartNum * sizeof( SChar ) ); } } if( ! tuRecurseWithPU.IsLastSection() ) { for (UInt ch=COMPONENT_Cb; chgetZorderIdxInCtu() + tuRecurseWithPU.GetAbsPartIdxTU(); Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder ); const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( compID); const Pel* piSrc = pcRecoYuv->getAddr( compID, uiPartOffset ); const UInt uiSrcStride = pcRecoYuv->getStride( compID); for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride ) { for( UInt uiX = 0; uiX < uiCompWidth; uiX++ ) { piDes[ uiX ] = piSrc[ uiX ]; } } } } pcCU->setIntraDirSubParts( CHANNEL_TYPE_CHROMA, uiBestMode, uiPartOffset, uiDepthCU+uiInitTrDepth ); pcCU->getTotalDistortion () += uiBestDist; } } while (tuRecurseWithPU.nextSection(tuRecurseCU)); //----- restore context models ----- if( uiInitTrDepth != 0 ) { // set Cbf for all blocks UInt uiCombCbfU = 0; UInt uiCombCbfV = 0; UInt uiPartIdx = 0; for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts ) { uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 ); uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 ); } for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ ) { pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU; pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV; } } m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepthCU][CI_CURR_BEST] ); } /** Function for encoding and reconstructing luma/chroma samples of a PCM mode CU. * \param pcCU pointer to current CU * \param uiAbsPartIdx part index * \param pOrg pointer to original sample arrays * \param pPCM pointer to PCM code arrays * \param pPred pointer to prediction signal arrays * \param pResi pointer to residual signal arrays * \param pReco pointer to reconstructed sample arrays * \param uiStride stride of the original/prediction/residual sample arrays * \param uiWidth block width * \param uiHeight block height * \param compID texture component type */ Void TEncSearch::xEncPCM (TComDataCU* pcCU, UInt uiAbsPartIdx, Pel* pOrg, Pel* pPCM, Pel* pPred, Pel* pResi, Pel* pReco, UInt uiStride, UInt uiWidth, UInt uiHeight, const ComponentID compID ) { const UInt uiReconStride = pcCU->getPic()->getPicYuvRec()->getStride(compID); const UInt uiPCMBitDepth = pcCU->getSlice()->getSPS()->getPCMBitDepth(toChannelType(compID)); const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); Pel* pRecoPic = pcCU->getPic()->getPicYuvRec()->getAddr(compID, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu()+uiAbsPartIdx); const Int pcmShiftRight=(channelBitDepth - Int(uiPCMBitDepth)); assert(pcmShiftRight >= 0); for( UInt uiY = 0; uiY < uiHeight; uiY++ ) { for( UInt uiX = 0; uiX < uiWidth; uiX++ ) { // Reset pred and residual pPred[uiX] = 0; pResi[uiX] = 0; // Encode pPCM[uiX] = (pOrg[uiX]>>pcmShiftRight); // Reconstruction pReco [uiX] = (pPCM[uiX]<<(pcmShiftRight)); pRecoPic[uiX] = pReco[uiX]; } pPred += uiStride; pResi += uiStride; pPCM += uiWidth; pOrg += uiStride; pReco += uiStride; pRecoPic += uiReconStride; } } //! Function for PCM mode estimation. Void TEncSearch::IPCMSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv ) { UInt uiDepth = pcCU->getDepth(0); const Distortion uiDistortion = 0; UInt uiBits; Double dCost; for (UInt ch=0; ch < pcCU->getPic()->getNumberValidComponents(); ch++) { const ComponentID compID = ComponentID(ch); const UInt width = pcCU->getWidth(0) >> pcCU->getPic()->getComponentScaleX(compID); const UInt height = pcCU->getHeight(0) >> pcCU->getPic()->getComponentScaleY(compID); const UInt stride = pcPredYuv->getStride(compID); Pel * pOrig = pcOrgYuv->getAddr (compID, 0, width); Pel * pResi = pcResiYuv->getAddr(compID, 0, width); Pel * pPred = pcPredYuv->getAddr(compID, 0, width); Pel * pReco = pcRecoYuv->getAddr(compID, 0, width); Pel * pPCM = pcCU->getPCMSample (compID); xEncPCM ( pcCU, 0, pOrig, pPCM, pPred, pResi, pReco, stride, width, height, compID ); } m_pcEntropyCoder->resetBits(); xEncIntraHeader ( pcCU, uiDepth, 0, true, false); uiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO // M43 // GT: This needs to be checked distortion is not necessarily 0 in case of VSO. if( m_pcRdCost->getUseLambdaScaleVSO() ) { dCost = m_pcRdCost->calcRdCostVSO( uiBits, uiDistortion ); } else #endif dCost = m_pcRdCost->calcRdCost( uiBits, uiDistortion ); m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); pcCU->getTotalBits() = uiBits; pcCU->getTotalCost() = dCost; pcCU->getTotalDistortion() = uiDistortion; pcCU->copyToPic(uiDepth); } Void TEncSearch::xGetInterPredictionError( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, Distortion& ruiErr, Bool /*bHadamard*/ ) { motionCompensation( pcCU, &m_tmpYuvPred, REF_PIC_LIST_X, iPartIdx ); UInt uiAbsPartIdx = 0; Int iWidth = 0; Int iHeight = 0; pcCU->getPartIndexAndSize( iPartIdx, uiAbsPartIdx, iWidth, iHeight ); DistParam cDistParam; cDistParam.bApplyWeight = false; m_pcRdCost->setDistParam( cDistParam, pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), pcYuvOrg->getAddr( COMPONENT_Y, uiAbsPartIdx ), pcYuvOrg->getStride(COMPONENT_Y), m_tmpYuvPred .getAddr( COMPONENT_Y, uiAbsPartIdx ), m_tmpYuvPred.getStride(COMPONENT_Y), iWidth, iHeight, m_pcEncCfg->getUseHADME() && (pcCU->getCUTransquantBypass(iPartIdx) == 0) ); #if NH_3D_IC cDistParam.bUseIC = false; #endif #if NH_3D_SDC_INTER cDistParam.bUseSDCMRSAD = false; #endif ruiErr = cDistParam.DistFunc( &cDistParam ); } //! estimation of best merge coding Void TEncSearch::xMergeEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPUIdx, UInt& uiInterDir, TComMvField* pacMvField, UInt& uiMergeIndex, Distortion& ruiCost, TComMvField* cMvFieldNeighbours, UChar* uhInterDirNeighbours, Int& numValidMergeCand #if NH_3D_VSP , Int* vspFlag #endif #if NH_3D_SPIVMP , Bool* pbSPIVMPFlag, TComMvField* pcMvFieldSP, UChar* puhInterDirSP #endif ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "xMergeEstimation" ); #endif UInt uiAbsPartIdx = 0; Int iWidth = 0; Int iHeight = 0; pcCU->getPartIndexAndSize( iPUIdx, uiAbsPartIdx, iWidth, iHeight ); UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ); #if NH_3D_DBBP DbbpTmpData* pDBBPTmpData = pcCU->getDBBPTmpData(); if( pcCU->getDBBPFlag(0) ) { AOF( uiAbsPartIdx == 0 ); AOF( iPUIdx == 0 ); AOF( pcCU->getPartitionSize(0) == SIZE_2Nx2N ); AOF( pDBBPTmpData->eVirtualPartSize != NUMBER_OF_PART_SIZES ); // temporary change of partition size for candidate derivation pcCU->setPartSizeSubParts( pDBBPTmpData->eVirtualPartSize, 0, pcCU->getDepth(0)); iPUIdx = pcCU->getDBBPTmpData()->uiVirtualPartIndex; // if this is handling the second segment, make sure that motion info of first segment is available if( iPUIdx == 1 ) { pcCU->setInterDirSubParts(pDBBPTmpData->auhInterDir[0], 0, 0, pcCU->getDepth(0)); // interprets depth relative to LCU level for ( UInt uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ ) { RefPicList eRefList = (RefPicList)uiRefListIdx; pcCU->getCUMvField( eRefList )->setAllMvField( pDBBPTmpData->acMvField[0][eRefList], pDBBPTmpData->eVirtualPartSize, 0, 0, 0 ); // interprets depth relative to rpcTempCU level } } // update these values to virtual partition size pcCU->getPartIndexAndSize( iPUIdx, uiAbsPartIdx, iWidth, iHeight ); } #endif PartSize partSize = pcCU->getPartitionSize( 0 ); #if NH_3D_DBBP if ( pcCU->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2() && partSize != SIZE_2Nx2N && pcCU->getWidth( 0 ) <= 8 && pcCU->getDBBPFlag(0) == false ) #else if ( pcCU->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2() && partSize != SIZE_2Nx2N && pcCU->getWidth( 0 ) <= 8 ) #endif { if ( iPUIdx == 0 ) { pcCU->setPartSizeSubParts( SIZE_2Nx2N, 0, uiDepth ); // temporarily set #if NH_3D_MLC pcCU->initAvailableFlags(); pcCU->getInterMergeCandidates( 0, 0, cMvFieldNeighbours,uhInterDirNeighbours, numValidMergeCand ); pcCU->xGetInterMergeCandidates( 0, 0, cMvFieldNeighbours,uhInterDirNeighbours #if NH_3D_SPIVMP , pcMvFieldSP, puhInterDirSP #endif , numValidMergeCand ); pcCU->buildMCL( cMvFieldNeighbours,uhInterDirNeighbours #if NH_3D_VSP , vspFlag #endif #if NH_3D_SPIVMP , pbSPIVMPFlag #endif , numValidMergeCand ); #else pcCU->getInterMergeCandidates( 0, 0, cMvFieldNeighbours,uhInterDirNeighbours, numValidMergeCand ); #endif pcCU->setPartSizeSubParts( partSize, 0, uiDepth ); // restore } } else { #if NH_3D_MLC pcCU->initAvailableFlags(); pcCU->getInterMergeCandidates( uiAbsPartIdx, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand ); pcCU->xGetInterMergeCandidates( uiAbsPartIdx, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours #if NH_3D_SPIVMP , pcMvFieldSP, puhInterDirSP #endif , numValidMergeCand ); pcCU->buildMCL( cMvFieldNeighbours, uhInterDirNeighbours #if NH_3D_VSP , vspFlag #endif #if NH_3D_SPIVMP , pbSPIVMPFlag #endif , numValidMergeCand ); #else pcCU->getInterMergeCandidates( uiAbsPartIdx, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand ); #endif } xRestrictBipredMergeCand( pcCU, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand ); #if NH_3D_DBBP if( pcCU->getDBBPFlag(0) ) { // reset to 2Nx2N for actual motion search iPUIdx = 0; AOF( pcCU->getPartitionSize(0) == pDBBPTmpData->eVirtualPartSize ); pcCU->setPartSizeSubParts( SIZE_2Nx2N, 0, pcCU->getDepth(0)); // restore values for 2Nx2N partition size pcCU->getPartIndexAndSize( iPUIdx, uiAbsPartIdx, iWidth, iHeight ); AOF( uiAbsPartIdx == 0 ); AOF( iWidth == iHeight ); } #endif ruiCost = std::numeric_limits::max(); for( UInt uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "uiMergeCand: " + n2s(uiMergeCand) ); #endif Distortion uiCostCand = std::numeric_limits::max(); UInt uiBitsCand = 0; PartSize ePartSize = pcCU->getPartitionSize( 0 ); #if NH_3D_VSP pcCU->setVSPFlagSubParts( vspFlag[uiMergeCand], uiAbsPartIdx, iPUIdx, pcCU->getDepth( uiAbsPartIdx ) ); #endif #if NH_3D_SPIVMP pcCU->setSPIVMPFlagSubParts( pbSPIVMPFlag[uiMergeCand], uiAbsPartIdx, iPUIdx, pcCU->getDepth( uiAbsPartIdx )); if (pbSPIVMPFlag[uiMergeCand]) { UInt uiSPAddr; Int iNumSPInOneLine, iNumSP, iSPWidth, iSPHeight; pcCU->getSPPara(iWidth, iHeight, iNumSP, iNumSPInOneLine, iSPWidth, iSPHeight); for (Int iPartitionIdx = 0; iPartitionIdx < iNumSP; iPartitionIdx++) { pcCU->getSPAbsPartIdx(uiAbsPartIdx, iSPWidth, iSPHeight, iPartitionIdx, iNumSPInOneLine, uiSPAddr); pcCU->getCUMvField( REF_PIC_LIST_0 )->setMvFieldSP(pcCU, uiSPAddr, pcMvFieldSP[2*iPartitionIdx], iSPWidth, iSPHeight); pcCU->getCUMvField( REF_PIC_LIST_1 )->setMvFieldSP(pcCU, uiSPAddr, pcMvFieldSP[2*iPartitionIdx + 1], iSPWidth, iSPHeight); } } else #endif #if NH_3D_VSP #if NH_3D_DBBP if ( vspFlag[uiMergeCand] && !pcCU->getDBBPFlag(0) ) #else if ( vspFlag[uiMergeCand] ) #endif { UInt partAddr; Int vspSize; Int width, height; pcCU->getPartIndexAndSize( iPUIdx, partAddr, width, height ); if( uhInterDirNeighbours[ uiMergeCand ] & 0x01 ) { pcCU->setMvFieldPUForVSP( pcCU, partAddr, width, height, REF_PIC_LIST_0, cMvFieldNeighbours[ 2*uiMergeCand + 0 ].getRefIdx(), vspSize ); pcCU->setVSPFlag( partAddr, vspSize ); } else { pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( cMvFieldNeighbours[0 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx ); } if( uhInterDirNeighbours[ uiMergeCand ] & 0x02 ) { pcCU->setMvFieldPUForVSP( pcCU, partAddr, width, height, REF_PIC_LIST_1, cMvFieldNeighbours[ 2*uiMergeCand + 1 ].getRefIdx(), vspSize ); pcCU->setVSPFlag( partAddr, vspSize ); } else { pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( cMvFieldNeighbours[1 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx ); } } else { #endif pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( cMvFieldNeighbours[0 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( cMvFieldNeighbours[1 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx ); #if NH_3D_VSP } #endif xGetInterPredictionError( pcCU, pcYuvOrg, iPUIdx, uiCostCand, m_pcEncCfg->getUseHADME() ); uiBitsCand = uiMergeCand + 1; if (uiMergeCand == m_pcEncCfg->getMaxNumMergeCand() -1) { uiBitsCand--; } uiCostCand = uiCostCand + m_pcRdCost->getCost( uiBitsCand ); #if NH_MV D_PRINT_INDENT( g_traceRDCost, "IP RD Cost: " + n2s(uiCostCand)); #endif if ( uiCostCand < ruiCost ) { ruiCost = uiCostCand; pacMvField[0] = cMvFieldNeighbours[0 + 2*uiMergeCand]; pacMvField[1] = cMvFieldNeighbours[1 + 2*uiMergeCand]; uiInterDir = uhInterDirNeighbours[uiMergeCand]; uiMergeIndex = uiMergeCand; } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } /** convert bi-pred merge candidates to uni-pred * \param pcCU * \param puIdx * \param mvFieldNeighbours * \param interDirNeighbours * \param numValidMergeCand * \returns Void */ Void TEncSearch::xRestrictBipredMergeCand( TComDataCU* pcCU, UInt puIdx, TComMvField* mvFieldNeighbours, UChar* interDirNeighbours, Int numValidMergeCand ) { if ( pcCU->isBipredRestriction(puIdx) ) { for( UInt mergeCand = 0; mergeCand < numValidMergeCand; ++mergeCand ) { if ( interDirNeighbours[mergeCand] == 3 ) { interDirNeighbours[mergeCand] = 1; mvFieldNeighbours[(mergeCand << 1) + 1].setMvField(TComMv(0,0), -1); } } } } //! search of the best candidate for inter prediction #if AMP_MRG #if NH_3D_FAST_TEXTURE_ENCODING Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv DEBUG_STRING_FN_DECLARE(sDebug), Bool bFMD, Bool bUseRes, Bool bUseMRG ) #else Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv DEBUG_STRING_FN_DECLARE(sDebug), Bool bUseRes, Bool bUseMRG ) #endif #else Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, Bool bUseRes ) #endif { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "predInterSearch"); #endif for(UInt i=0; iclear(); if ( !bUseRes ) { pcResiYuv->clear(); } pcRecoYuv->clear(); TComMv cMvSrchRngLT; TComMv cMvSrchRngRB; TComMv cMvZero; TComMv TempMv; //kolya TComMv cMv[2]; TComMv cMvBi[2]; TComMv cMvTemp[2][33]; Int iNumPart = pcCU->getNumPartitions(); Int iNumPredDir = pcCU->getSlice()->isInterP() ? 1 : 2; TComMv cMvPred[2][33]; TComMv cMvPredBi[2][33]; Int aaiMvpIdxBi[2][33]; Int aaiMvpIdx[2][33]; Int aaiMvpNum[2][33]; AMVPInfo aacAMVPInfo[2][33]; Int iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage. Int iRefIdxBi[2]; UInt uiPartAddr; Int iRoiWidth, iRoiHeight; UInt uiMbBits[3] = {1, 1, 0}; UInt uiLastMode = 0; Int iRefStart, iRefEnd; PartSize ePartSize = pcCU->getPartitionSize( 0 ); Int bestBiPRefIdxL1 = 0; Int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits::max(); #if NH_3D_IV_MERGE TComMvField cMvFieldNeighbours[MRG_MAX_NUM_CANDS_MEM << 1]; // double length for mv of both lists UChar uhInterDirNeighbours[MRG_MAX_NUM_CANDS_MEM]; #else TComMvField cMvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists UChar uhInterDirNeighbours[MRG_MAX_NUM_CANDS]; #endif Int numValidMergeCand = 0 ; for ( Int iPartIdx = 0; iPartIdx < iNumPart; iPartIdx++ ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "iPartIdx: " + n2s(iPartIdx) ); #endif Distortion uiCost[2] = { std::numeric_limits::max(), std::numeric_limits::max() }; Distortion uiCostBi = std::numeric_limits::max(); Distortion uiCostTemp; UInt uiBits[3]; UInt uiBitsTemp; Distortion bestBiPDist = std::numeric_limits::max(); Distortion uiCostTempL0[MAX_NUM_REF]; for (Int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++) { uiCostTempL0[iNumRef] = std::numeric_limits::max(); } UInt uiBitsTempL0[MAX_NUM_REF]; TComMv mvValidList1; Int refIdxValidList1 = 0; UInt bitsValidList1 = MAX_UINT; Distortion costValidList1 = std::numeric_limits::max(); xGetBlkBits( ePartSize, pcCU->getSlice()->isInterP(), iPartIdx, uiLastMode, uiMbBits); pcCU->getPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight ); #if NH_3D_VSP pcCU->setVSPFlagSubParts( 0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr) ); #endif #if AMP_MRG Bool bTestNormalMC = true; #if NH_3D_FAST_TEXTURE_ENCODING if (bFMD||( bUseMRG && pcCU->getWidth( 0 ) > 8 && iNumPart == 2 )) #else if ( bUseMRG && pcCU->getWidth( 0 ) > 8 && iNumPart == 2 ) #endif { bTestNormalMC = false; } if (bTestNormalMC) { #endif // Uni-directional prediction for ( Int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "iRefList: " + n2s(iRefList) ); #endif RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); for ( Int iRefIdxTemp = 0; iRefIdxTemp < pcCU->getSlice()->getNumRefIdx(eRefPicList); iRefIdxTemp++ ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "iRefIdxTemp: " + n2s(iRefIdxTemp) ); #endif uiBitsTemp = uiMbBits[iRefList]; if ( pcCU->getSlice()->getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; if ( iRefIdxTemp == pcCU->getSlice()->getNumRefIdx(eRefPicList)-1 ) { uiBitsTemp--; } } xEstimateMvPredAMVP( pcCU, pcOrgYuv, iPartIdx, eRefPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], false, &biPDistTemp); aaiMvpIdx[iRefList][iRefIdxTemp] = pcCU->getMVPIdx(eRefPicList, uiPartAddr); aaiMvpNum[iRefList][iRefIdxTemp] = pcCU->getMVPNum(eRefPicList, uiPartAddr); if(pcCU->getSlice()->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist) { bestBiPDist = biPDistTemp; bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp]; bestBiPRefIdxL1 = iRefIdxTemp; } uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; if ( m_pcEncCfg->getFastMEForGenBLowDelayEnabled() && iRefList == 1 ) // list 1 { if ( pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp ) >= 0 ) { cMvTemp[1][iRefIdxTemp] = cMvTemp[0][pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )]; uiCostTemp = uiCostTempL0[pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )]; /*first subtract the bit-rate part of the cost of the other list*/ uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )] ); /*correct the bit-rate part of the current ref*/ m_pcRdCost->setPredictor ( cMvPred[iRefList][iRefIdxTemp] ); uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer() ); /*calculate the correct cost*/ uiCostTemp += m_pcRdCost->getCost( uiBitsTemp ); } else { xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); } } else { xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); } xCopyAMVPInfo(pcCU->getCUMvField(eRefPicList)->getAMVPInfo(), &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE ) xCheckBestMVP(pcCU, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp); if ( iRefList == 0 ) { uiCostTempL0[iRefIdxTemp] = uiCostTemp; uiBitsTempL0[iRefIdxTemp] = uiBitsTemp; } if ( uiCostTemp < uiCost[iRefList] ) { uiCost[iRefList] = uiCostTemp; uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction // set motion cMv[iRefList] = cMvTemp[iRefList][iRefIdxTemp]; iRefIdx[iRefList] = iRefIdxTemp; } if ( iRefList == 1 && uiCostTemp < costValidList1 && pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp ) < 0 ) { costValidList1 = uiCostTemp; bitsValidList1 = uiBitsTemp; // set motion mvValidList1 = cMvTemp[iRefList][iRefIdxTemp]; refIdxValidList1 = iRefIdxTemp; } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } // Bi-predictive Motion estimation if ( (pcCU->getSlice()->isInterB()) && (pcCU->isBipredRestriction(iPartIdx) == false) ) { cMvBi[0] = cMv[0]; cMvBi[1] = cMv[1]; iRefIdxBi[0] = iRefIdx[0]; iRefIdxBi[1] = iRefIdx[1]; ::memcpy(cMvPredBi, cMvPred, sizeof(cMvPred)); ::memcpy(aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx)); UInt uiMotBits[2]; if(pcCU->getSlice()->getMvdL1ZeroFlag()) { xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()); pcCU->setMVPIdxSubParts( bestBiPMvpL1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1; cMvPredBi[1][bestBiPRefIdxL1] = pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_acMvCand[bestBiPMvpL1]; cMvBi[1] = cMvPredBi[1][bestBiPRefIdxL1]; iRefIdxBi[1] = bestBiPRefIdxL1; pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMv( cMvBi[1], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllRefIdx( iRefIdxBi[1], ePartSize, uiPartAddr, 0, iPartIdx ); TComYuv* pcYuvPred = &m_acYuvPred[REF_PIC_LIST_1]; motionCompensation( pcCU, pcYuvPred, REF_PIC_LIST_1, iPartIdx ); uiMotBits[0] = uiBits[0] - uiMbBits[0]; uiMotBits[1] = uiMbBits[1]; if ( pcCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1 ) { uiMotBits[1] += bestBiPRefIdxL1+1; if ( bestBiPRefIdxL1 == pcCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1)-1 ) { uiMotBits[1]--; } } uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS]; uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1]; cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1]; } else { uiMotBits[0] = uiBits[0] - uiMbBits[0]; uiMotBits[1] = uiBits[1] - uiMbBits[1]; uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1]; } // 4-times iteration (default) Int iNumIter = 4; // fast encoder setting: only one iteration if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || pcCU->getSlice()->getMvdL1ZeroFlag() ) { iNumIter = 1; } for ( Int iIter = 0; iIter < iNumIter; iIter++ ) { Int iRefList = iIter % 2; if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 ) { if( uiCost[0] <= uiCost[1] ) { iRefList = 1; } else { iRefList = 0; } } else if ( iIter == 0 ) { iRefList = 0; } if ( iIter == 0 && !pcCU->getSlice()->getMvdL1ZeroFlag()) { pcCU->getCUMvField(RefPicList(1-iRefList))->setAllMv( cMv[1-iRefList], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(RefPicList(1-iRefList))->setAllRefIdx( iRefIdx[1-iRefList], ePartSize, uiPartAddr, 0, iPartIdx ); TComYuv* pcYuvPred = &m_acYuvPred[1-iRefList]; motionCompensation ( pcCU, pcYuvPred, RefPicList(1-iRefList), iPartIdx ); } RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); if(pcCU->getSlice()->getMvdL1ZeroFlag()) { iRefList = 0; eRefPicList = REF_PIC_LIST_0; } Bool bChanged = false; iRefStart = 0; iRefEnd = pcCU->getSlice()->getNumRefIdx(eRefPicList)-1; for ( Int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ ) { uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; if ( pcCU->getSlice()->getNumRefIdx(eRefPicList) > 1 ) { uiBitsTemp += iRefIdxTemp+1; if ( iRefIdxTemp == pcCU->getSlice()->getNumRefIdx(eRefPicList)-1 ) { uiBitsTemp--; } } uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS]; // call ME xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, true ); xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], pcCU->getCUMvField(eRefPicList)->getAMVPInfo()); xCheckBestMVP(pcCU, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp); if ( uiCostTemp < uiCostBi ) { bChanged = true; cMvBi[iRefList] = cMvTemp[iRefList][iRefIdxTemp]; iRefIdxBi[iRefList] = iRefIdxTemp; uiCostBi = uiCostTemp; uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList]; uiBits[2] = uiBitsTemp; if(iNumIter!=1) { // Set motion pcCU->getCUMvField( eRefPicList )->setAllMv( cMvBi[iRefList], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField( eRefPicList )->setAllRefIdx( iRefIdxBi[iRefList], ePartSize, uiPartAddr, 0, iPartIdx ); TComYuv* pcYuvPred = &m_acYuvPred[iRefList]; motionCompensation( pcCU, pcYuvPred, eRefPicList, iPartIdx ); } } } // for loop-iRefIdxTemp if ( !bChanged ) { if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) { xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], pcCU->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo()); xCheckBestMVP(pcCU, REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi); if(!pcCU->getSlice()->getMvdL1ZeroFlag()) { xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()); xCheckBestMVP(pcCU, REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi); } } break; } } // for loop-iter } // if (B_SLICE) #if AMP_MRG } //end if bTestNormalMC #endif // Clear Motion Field pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( TComMvField(), ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( TComMvField(), ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); UInt uiMEBits = 0; // Set Motion Field_ cMv[1] = mvValidList1; iRefIdx[1] = refIdxValidList1; uiBits[1] = bitsValidList1; uiCost[1] = costValidList1; #if AMP_MRG if (bTestNormalMC) { #endif if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) { uiLastMode = 2; pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMv( cMvBi[0], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_0)->setAllRefIdx( iRefIdxBi[0], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMv( cMvBi[1], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx( iRefIdxBi[1], ePartSize, uiPartAddr, 0, iPartIdx ); TempMv = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]]; pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx ); TempMv = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]]; pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->setInterDirSubParts( 3, uiPartAddr, iPartIdx, pcCU->getDepth(0) ); pcCU->setMVPIdxSubParts( aaiMvpIdxBi[0][iRefIdxBi[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( aaiMvpNum[0][iRefIdxBi[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPIdxSubParts( aaiMvpIdxBi[1][iRefIdxBi[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( aaiMvpNum[1][iRefIdxBi[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); uiMEBits = uiBits[2]; } else if ( uiCost[0] <= uiCost[1] ) { uiLastMode = 0; pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMv( cMv[0], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_0)->setAllRefIdx( iRefIdx[0], ePartSize, uiPartAddr, 0, iPartIdx ); TempMv = cMv[0] - cMvPred[0][iRefIdx[0]]; pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->setInterDirSubParts( 1, uiPartAddr, iPartIdx, pcCU->getDepth(0) ); pcCU->setMVPIdxSubParts( aaiMvpIdx[0][iRefIdx[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( aaiMvpNum[0][iRefIdx[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); uiMEBits = uiBits[0]; } else { uiLastMode = 1; pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMv( cMv[1], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx( iRefIdx[1], ePartSize, uiPartAddr, 0, iPartIdx ); TempMv = cMv[1] - cMvPred[1][iRefIdx[1]]; pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->setInterDirSubParts( 2, uiPartAddr, iPartIdx, pcCU->getDepth(0) ); pcCU->setMVPIdxSubParts( aaiMvpIdx[1][iRefIdx[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( aaiMvpNum[1][iRefIdx[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); uiMEBits = uiBits[1]; } #if AMP_MRG } // end if bTestNormalMC #endif #if NH_3D_DBBP // test merge mode for DBBP (2Nx2N) if ( pcCU->getPartitionSize( uiPartAddr ) != SIZE_2Nx2N || pcCU->getDBBPFlag(0) ) #else if ( pcCU->getPartitionSize( uiPartAddr ) != SIZE_2Nx2N ) #endif { UInt uiMRGInterDir = 0; TComMvField cMRGMvField[2]; UInt uiMRGIndex = 0; UInt uiMEInterDir = 0; TComMvField cMEMvField[2]; m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) ); #if AMP_MRG // calculate ME cost Distortion uiMEError = std::numeric_limits::max(); Distortion uiMECost = std::numeric_limits::max(); if (bTestNormalMC) { xGetInterPredictionError( pcCU, pcOrgYuv, iPartIdx, uiMEError, m_pcEncCfg->getUseHADME() ); uiMECost = uiMEError + m_pcRdCost->getCost( uiMEBits ); #if NH_MV D_PRINT_INDENT( g_traceRDCost, "IP RD Cost: " + n2s(uiMECost)); #endif } #else // calculate ME cost Distortion uiMEError = std::numeric_limits::max(); xGetInterPredictionError( pcCU, pcOrgYuv, iPartIdx, uiMEError, m_pcEncCfg->getUseHADME() ); Distortion uiMECost = uiMEError + m_pcRdCost->getCost( uiMEBits ); #endif // save ME result. uiMEInterDir = pcCU->getInterDir( uiPartAddr ); TComDataCU::getMvField( pcCU, uiPartAddr, REF_PIC_LIST_0, cMEMvField[0] ); TComDataCU::getMvField( pcCU, uiPartAddr, REF_PIC_LIST_1, cMEMvField[1] ); // find Merge result Distortion uiMRGCost = std::numeric_limits::max(); #if NH_3D_VSP Int vspFlag[MRG_MAX_NUM_CANDS_MEM]; memset(vspFlag, 0, sizeof(Int)*MRG_MAX_NUM_CANDS_MEM); UInt uiAbsPartIdx = 0; Int iWidth = 0; Int iHeight = 0; pcCU->getPartIndexAndSize( iPartIdx, uiAbsPartIdx, iWidth, iHeight ); DisInfo OriginalDvInfo = pcCU->getDvInfo(uiAbsPartIdx); #endif #if NH_3D_SPIVMP Bool bSPIVMPFlag[MRG_MAX_NUM_CANDS_MEM]; memset(bSPIVMPFlag, false, sizeof(Bool)*MRG_MAX_NUM_CANDS_MEM); TComMvField* pcMvFieldSP; UChar* puhInterDirSP; pcMvFieldSP = new TComMvField[pcCU->getPic()->getPicSym()->getNumPartitionsInCtu()*2]; puhInterDirSP = new UChar[pcCU->getPic()->getPicSym()->getNumPartitionsInCtu()]; #endif xMergeEstimation( pcCU, pcOrgYuv, iPartIdx, uiMRGInterDir, cMRGMvField, uiMRGIndex, uiMRGCost, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand #if NH_3D_VSP , vspFlag #endif #if NH_3D_SPIVMP , bSPIVMPFlag, pcMvFieldSP, puhInterDirSP #endif ); if ( uiMRGCost < uiMECost ) { // set Merge result pcCU->setMergeFlagSubParts ( true, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); pcCU->setMergeIndexSubParts( uiMRGIndex, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); #if NH_3D_VSP pcCU->setVSPFlagSubParts( vspFlag[uiMRGIndex], uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); #endif #if NH_3D_SPIVMP pcCU->setSPIVMPFlagSubParts(bSPIVMPFlag[uiMRGIndex], uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); if (bSPIVMPFlag[uiMRGIndex]!=0) { UInt uiSPAddr; Int iNumSPInOneLine, iNumSP, iSPWidth, iSPHeight; pcCU->getSPPara(iRoiWidth, iRoiHeight, iNumSP, iNumSPInOneLine, iSPWidth, iSPHeight); for (Int iPartitionIdx = 0; iPartitionIdx < iNumSP; iPartitionIdx++) { pcCU->getSPAbsPartIdx(uiPartAddr, iSPWidth, iSPHeight, iPartitionIdx, iNumSPInOneLine, uiSPAddr); pcCU->setInterDirSP(puhInterDirSP[iPartitionIdx], uiSPAddr, iSPWidth, iSPHeight); pcCU->getCUMvField( REF_PIC_LIST_0 )->setMvFieldSP(pcCU, uiSPAddr, pcMvFieldSP[2*iPartitionIdx], iSPWidth, iSPHeight); pcCU->getCUMvField( REF_PIC_LIST_1 )->setMvFieldSP(pcCU, uiSPAddr, pcMvFieldSP[2*iPartitionIdx + 1], iSPWidth, iSPHeight); } if ( pcCU->getInterDir(uiPartAddr) == 3 && pcCU->isBipredRestriction(iPartIdx) ) { pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMv( TComMv(0,0), ePartSize, uiPartAddr, 0, iPartIdx); pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllRefIdx( -1, ePartSize, uiPartAddr, 0, iPartIdx); pcCU->setInterDirSubParts( 1, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr )); } } else #endif #if NH_3D_VSP #if NH_3D_DBBP if ( vspFlag[uiMRGIndex] && !pcCU->getDBBPFlag(uiPartAddr) ) #else if ( vspFlag[uiMRGIndex] ) #endif { UInt partAddrTemp; Int vspSize; Int width, height; pcCU->getPartIndexAndSize( iPartIdx, partAddrTemp, width, height ); // true or pcCU->getTotalNumPart()==256 if( uiMRGInterDir & 0x01 ) { pcCU->setMvFieldPUForVSP( pcCU, partAddrTemp, width, height, REF_PIC_LIST_0, cMRGMvField[0].getRefIdx(), vspSize ); pcCU->setVSPFlag( partAddrTemp, vspSize ); } else { pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMRGMvField[0], ePartSize, uiPartAddr, 0, iPartIdx ); } if( uiMRGInterDir & 0x02 ) { pcCU->setMvFieldPUForVSP( pcCU, partAddrTemp, width, height, REF_PIC_LIST_1, cMRGMvField[1].getRefIdx(), vspSize ); pcCU->setVSPFlag( partAddrTemp, vspSize ); } else { pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMRGMvField[1], ePartSize, uiPartAddr, 0, iPartIdx ); } pcCU->setInterDirSubParts ( uiMRGInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); } else { #endif pcCU->setInterDirSubParts ( uiMRGInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMRGMvField[0], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMRGMvField[1], ePartSize, uiPartAddr, 0, iPartIdx ); #if NH_3D_VSP } #endif #if H_3D } #endif pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr)); } else { #if NH_3D_SPIVMP pcCU->setSPIVMPFlagSubParts(0, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); #endif // set ME result pcCU->setMergeFlagSubParts( false, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); pcCU->setInterDirSubParts ( uiMEInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); #if NH_3D_VSP pcCU->setVSPFlagSubParts ( 0, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); pcCU->setDvInfoSubParts(OriginalDvInfo, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) ); #endif pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMEMvField[0], ePartSize, uiPartAddr, 0, iPartIdx ); pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMEMvField[1], ePartSize, uiPartAddr, 0, iPartIdx ); } #if NH_3D_SPIVMP delete[] pcMvFieldSP; delete[] puhInterDirSP; #endif } // MC motionCompensation ( pcCU, pcPredYuv, REF_PIC_LIST_X, iPartIdx ); #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } // end of for ( Int iPartIdx = 0; iPartIdx < iNumPart; iPartIdx++ ) setWpScalingDistParam( pcCU, -1, REF_PIC_LIST_X ); #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif return; } // AMVP Void TEncSearch::xEstimateMvPredAMVP( TComDataCU* pcCU, TComYuv* pcOrgYuv, UInt uiPartIdx, RefPicList eRefPicList, Int iRefIdx, TComMv& rcMvPred, Bool bFilled, Distortion* puiDistBiP ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "xEstimateMvPredAMVP"); #endif AMVPInfo* pcAMVPInfo = pcCU->getCUMvField(eRefPicList)->getAMVPInfo(); TComMv cBestMv; Int iBestIdx = 0; TComMv cZeroMv; TComMv cMvPred; Distortion uiBestCost = std::numeric_limits::max(); UInt uiPartAddr = 0; Int iRoiWidth, iRoiHeight; Int i; pcCU->getPartIndexAndSize( uiPartIdx, uiPartAddr, iRoiWidth, iRoiHeight ); // Fill the MV Candidates if (!bFilled) { #if NH_3D_DBBP DbbpTmpData* pDBBPTmpData = pcCU->getDBBPTmpData(); if( pcCU->getDBBPFlag(0) ) { AOF( uiPartAddr == 0 ); AOF( uiPartIdx == 0 ); AOF( pcCU->getPartitionSize(0) == SIZE_2Nx2N ); AOF( pDBBPTmpData->eVirtualPartSize != NUMBER_OF_PART_SIZES ); AOF( iRoiWidth == iRoiHeight ); // temporary change of partition size for candidate derivation pcCU->setPartSizeSubParts( pDBBPTmpData->eVirtualPartSize, 0, pcCU->getDepth(0)); uiPartIdx = pcCU->getDBBPTmpData()->uiVirtualPartIndex; // if this is handling the second segment, make sure that motion info of first segment is set to first segment if( uiPartIdx == 1 ) { pcCU->setInterDirSubParts(pDBBPTmpData->auhInterDir[0], 0, 0, pcCU->getDepth(0)); // interprets depth relative to LCU level for ( UInt uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ ) { RefPicList eRefList = (RefPicList)uiRefListIdx; pcCU->getCUMvField( eRefList )->setAllMvField( pDBBPTmpData->acMvField[0][eRefList], pDBBPTmpData->eVirtualPartSize, 0, 0, 0 ); // interprets depth relative to rpcTempCU level } } // update values to virtual partition size pcCU->getPartIndexAndSize( uiPartIdx, uiPartAddr, iRoiWidth, iRoiHeight ); } #endif pcCU->fillMvpCand( uiPartIdx, uiPartAddr, eRefPicList, iRefIdx, pcAMVPInfo ); #if NH_3D_DBBP if( pcCU->getDBBPFlag(0) ) { // restore 2Nx2N partitioning for motion estimation uiPartIdx = 0; AOF( pcCU->getPartitionSize(0) == pDBBPTmpData->eVirtualPartSize ); pcCU->setPartSizeSubParts( SIZE_2Nx2N, 0, pcCU->getDepth(0)); // restore values for 2Nx2N partition size pcCU->getPartIndexAndSize( uiPartIdx, uiPartAddr, iRoiWidth, iRoiHeight ); AOF(uiPartAddr==0); } #endif } // initialize Mvp index & Mvp iBestIdx = 0; cBestMv = pcAMVPInfo->m_acMvCand[0]; if (pcAMVPInfo->iN <= 1) { rcMvPred = cBestMv; pcCU->setMVPIdxSubParts( iBestIdx, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( pcAMVPInfo->iN, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr)); if(pcCU->getSlice()->getMvdL1ZeroFlag() && eRefPicList==REF_PIC_LIST_1) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "Init"); #endif (*puiDistBiP) = xGetTemplateCost( pcCU, uiPartAddr, pcOrgYuv, &m_cYuvPredTemp, rcMvPred, 0, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, iRoiWidth, iRoiHeight); #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } return; } if (bFilled) { assert(pcCU->getMVPIdx(eRefPicList,uiPartAddr) >= 0); rcMvPred = pcAMVPInfo->m_acMvCand[pcCU->getMVPIdx(eRefPicList,uiPartAddr)]; return; } m_cYuvPredTemp.clear(); //-- Check Minimum Cost. for ( i = 0 ; i < pcAMVPInfo->iN; i++) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "Cand i=" + n2s(i) + " X: " + n2s( pcAMVPInfo->m_acMvCand[i].getHor() ) + " Y: " + n2s( pcAMVPInfo->m_acMvCand[i].getVer() )); #endif Distortion uiTmpCost; uiTmpCost = xGetTemplateCost( pcCU, uiPartAddr, pcOrgYuv, &m_cYuvPredTemp, pcAMVPInfo->m_acMvCand[i], i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, iRoiWidth, iRoiHeight); if ( uiBestCost > uiTmpCost ) { uiBestCost = uiTmpCost; cBestMv = pcAMVPInfo->m_acMvCand[i]; iBestIdx = i; (*puiDistBiP) = uiTmpCost; } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } m_cYuvPredTemp.clear(); // Setting Best MVP rcMvPred = cBestMv; pcCU->setMVPIdxSubParts( iBestIdx, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr)); pcCU->setMVPNumSubParts( pcAMVPInfo->iN, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr)); #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif return; } UInt TEncSearch::xGetMvpIdxBits(Int iIdx, Int iNum) { assert(iIdx >= 0 && iNum >= 0 && iIdx < iNum); if (iNum == 1) { return 0; } UInt uiLength = 1; Int iTemp = iIdx; if ( iTemp == 0 ) { return uiLength; } Bool bCodeLast = ( iNum-1 > iTemp ); uiLength += (iTemp-1); if( bCodeLast ) { uiLength++; } return uiLength; } Void TEncSearch::xGetBlkBits( PartSize eCUMode, Bool bPSlice, Int iPartIdx, UInt uiLastMode, UInt uiBlkBit[3]) { if ( eCUMode == SIZE_2Nx2N ) { uiBlkBit[0] = (! bPSlice) ? 3 : 1; uiBlkBit[1] = 3; uiBlkBit[2] = 5; } else if ( (eCUMode == SIZE_2NxN || eCUMode == SIZE_2NxnU) || eCUMode == SIZE_2NxnD ) { UInt aauiMbBits[2][3][3] = { { {0,0,3}, {0,0,0}, {0,0,0} } , { {5,7,7}, {7,5,7}, {9-3,9-3,9-3} } }; if ( bPSlice ) { uiBlkBit[0] = 3; uiBlkBit[1] = 0; uiBlkBit[2] = 0; } else { ::memcpy( uiBlkBit, aauiMbBits[iPartIdx][uiLastMode], 3*sizeof(UInt) ); } } else if ( (eCUMode == SIZE_Nx2N || eCUMode == SIZE_nLx2N) || eCUMode == SIZE_nRx2N ) { UInt aauiMbBits[2][3][3] = { { {0,2,3}, {0,0,0}, {0,0,0} } , { {5,7,7}, {7-2,7-2,9-2}, {9-3,9-3,9-3} } }; if ( bPSlice ) { uiBlkBit[0] = 3; uiBlkBit[1] = 0; uiBlkBit[2] = 0; } else { ::memcpy( uiBlkBit, aauiMbBits[iPartIdx][uiLastMode], 3*sizeof(UInt) ); } } else if ( eCUMode == SIZE_NxN ) { uiBlkBit[0] = (! bPSlice) ? 3 : 1; uiBlkBit[1] = 3; uiBlkBit[2] = 5; } else { printf("Wrong!\n"); assert( 0 ); } } Void TEncSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst) { pDst->iN = pSrc->iN; for (Int i = 0; i < pSrc->iN; i++) { pDst->m_acMvCand[i] = pSrc->m_acMvCand[i]; } } Void TEncSearch::xCheckBestMVP ( TComDataCU* pcCU, RefPicList eRefPicList, TComMv cMv, TComMv& rcMvPred, Int& riMVPIdx, UInt& ruiBits, Distortion& ruiCost ) { AMVPInfo* pcAMVPInfo = pcCU->getCUMvField(eRefPicList)->getAMVPInfo(); assert(pcAMVPInfo->m_acMvCand[riMVPIdx] == rcMvPred); if (pcAMVPInfo->iN < 2) { return; } m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(0) ); m_pcRdCost->setCostScale ( 0 ); Int iBestMVPIdx = riMVPIdx; m_pcRdCost->setPredictor( rcMvPred ); Int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer()); iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS]; Int iBestMvBits = iOrgMvBits; for (Int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->iN; iMVPIdx++) { if (iMVPIdx == riMVPIdx) { continue; } m_pcRdCost->setPredictor( pcAMVPInfo->m_acMvCand[iMVPIdx] ); Int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer()); iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS]; if (iMvBits < iBestMvBits) { iBestMvBits = iMvBits; iBestMVPIdx = iMVPIdx; } } if (iBestMVPIdx != riMVPIdx) //if changed { rcMvPred = pcAMVPInfo->m_acMvCand[iBestMVPIdx]; riMVPIdx = iBestMVPIdx; UInt uiOrgBits = ruiBits; ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits; ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits )) + m_pcRdCost->getCost( ruiBits ); } } Distortion TEncSearch::xGetTemplateCost( TComDataCU* pcCU, UInt uiPartAddr, TComYuv* pcOrgYuv, TComYuv* pcTemplateCand, TComMv cMvCand, Int iMVPIdx, Int iMVPNum, RefPicList eRefPicList, Int iRefIdx, Int iSizeX, Int iSizeY ) { Distortion uiCost = std::numeric_limits::max(); TComPicYuv* pcPicYuvRef = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdx )->getPicYuvRec(); pcCU->clipMv( cMvCand ); #if NH_3D_IC Bool bICFlag = pcCU->getICFlag( uiPartAddr ) && ( pcCU->getSlice()->getViewIndex() != pcCU->getSlice()->getRefPic( eRefPicList, iRefIdx )->getViewIndex() ); #endif // prediction pattern if ( pcCU->getSlice()->testWeightPred() && pcCU->getSlice()->getSliceType()==P_SLICE ) { xPredInterBlk( COMPONENT_Y, pcCU, pcPicYuvRef, uiPartAddr, &cMvCand, iSizeX, iSizeY, pcTemplateCand, true, pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) #if NH_3D_ARP , false //add this for IC, otherwise, it could be removed #endif ); } else { xPredInterBlk( COMPONENT_Y, pcCU, pcPicYuvRef, uiPartAddr, &cMvCand, iSizeX, iSizeY, pcTemplateCand, false, pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) #if NH_3D_ARP , false #endif #if NH_3D_IC , bICFlag #endif ); } if ( pcCU->getSlice()->testWeightPred() && pcCU->getSlice()->getSliceType()==P_SLICE ) { xWeightedPredictionUni( pcCU, pcTemplateCand, uiPartAddr, iSizeX, iSizeY, eRefPicList, pcTemplateCand, iRefIdx ); } // calc distortion uiCost = m_pcRdCost->getDistPart( pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), pcTemplateCand->getAddr(COMPONENT_Y, uiPartAddr), pcTemplateCand->getStride(COMPONENT_Y), pcOrgYuv->getAddr(COMPONENT_Y, uiPartAddr), pcOrgYuv->getStride(COMPONENT_Y), iSizeX, iSizeY, COMPONENT_Y, DF_SAD ); uiCost = (UInt) m_pcRdCost->calcRdCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum], uiCost, DF_SAD ); return uiCost; } Void TEncSearch::xMotionEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, RefPicList eRefPicList, TComMv* pcMvPred, Int iRefIdxPred, TComMv& rcMv, UInt& ruiBits, Distortion& ruiCost, Bool bBi ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "xMotionEstimation"); #endif UInt uiPartAddr; Int iRoiWidth; Int iRoiHeight; TComMv cMvHalf, cMvQter; TComMv cMvSrchRngLT; TComMv cMvSrchRngRB; TComYuv* pcYuv = pcYuvOrg; assert(eRefPicList < MAX_NUM_REF_LIST_ADAPT_SR && iRefIdxPredgetPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight ); #if NH_3D_IC Bool bICFlag = pcCU->getICFlag( uiPartAddr ) && ( pcCU->getSlice()->getViewIndex() != pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getViewIndex() ); pcPatternKey->setICFlag( bICFlag ); #endif #if NH_3D_SDC_INTER pcPatternKey->setSDCMRSADFlag( pcCU->getSlice()->getInterSdcFlag() ); #endif if ( bBi ) // Bipredictive ME { TComYuv* pcYuvOther = &m_acYuvPred[1-(Int)eRefPicList]; pcYuv = &m_cYuvPredTemp; pcYuvOrg->copyPartToPartYuv( pcYuv, uiPartAddr, iRoiWidth, iRoiHeight ); pcYuv->removeHighFreq( pcYuvOther, uiPartAddr, iRoiWidth, iRoiHeight, pcCU->getSlice()->getSPS()->getBitDepths().recon, m_pcEncCfg->getClipForBiPredMeEnabled() ); fWeight = 0.5; } m_cDistParam.bIsBiPred = bBi; // Search key pattern initialization pcPatternKey->initPattern( pcYuv->getAddr ( COMPONENT_Y, uiPartAddr ), iRoiWidth, iRoiHeight, pcYuv->getStride(COMPONENT_Y), pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) ); Pel* piRefY = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu() + uiPartAddr ); Int iRefStride = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPicYuvRec()->getStride(COMPONENT_Y); TComMv cMvPred = *pcMvPred; #if NH_MV m_vertRestriction = m_pcEncCfg->getUseDisparitySearchRangeRestriction() && ( pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPOC() == pcCU->getSlice()->getPOC() ); #endif if ( bBi ) { xSetSearchRange ( pcCU, rcMv , iSrchRng, cMvSrchRngLT, cMvSrchRngRB ); } else { xSetSearchRange ( pcCU, cMvPred, iSrchRng, cMvSrchRngLT, cMvSrchRngRB ); } m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) ); m_pcRdCost->setPredictor ( *pcMvPred ); #if NH_3D_INTEGER_MV_DEPTH if( pcCU->getSlice()->getIsDepth() ) { m_pcRdCost->setCostScale ( 0 ); } else { #endif m_pcRdCost->setCostScale ( 2 ); #if NH_3D_INTEGER_MV_DEPTH } #endif setWpScalingDistParam( pcCU, iRefIdxPred, eRefPicList ); // Do integer search if ( (m_motionEstimationSearchMethod==MESEARCH_FULL) || bBi ) { xPatternSearch ( pcPatternKey, piRefY, iRefStride, &cMvSrchRngLT, &cMvSrchRngRB, rcMv, ruiCost ); } else { rcMv = *pcMvPred; #if NH_MV_FIX_VERT_MV_REST if ( m_vertRestriction ) { if (rcMv.getVer() > cMvSrchRngRB.getVer()<<2) { rcMv.setVer(cMvSrchRngRB.getVer()<<2); } } #endif const TComMv *pIntegerMv2Nx2NPred=0; if (pcCU->getPartitionSize(0) != SIZE_2Nx2N || pcCU->getDepth(0) != 0) { pIntegerMv2Nx2NPred = &(m_integerMv2Nx2N[eRefPicList][iRefIdxPred]); } xPatternSearchFast ( pcCU, pcPatternKey, piRefY, iRefStride, &cMvSrchRngLT, &cMvSrchRngRB, rcMv, ruiCost, pIntegerMv2Nx2NPred ); if (pcCU->getPartitionSize(0) == SIZE_2Nx2N) { m_integerMv2Nx2N[eRefPicList][iRefIdxPred] = rcMv; } } m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) ); #if NH_3D_INTEGER_MV_DEPTH if( ! pcCU->getSlice()->getIsDepth() ) { #endif m_pcRdCost->setCostScale ( 1 ); const Bool bIsLosslessCoded = pcCU->getCUTransquantBypass(uiPartAddr) != 0; xPatternSearchFracDIF( bIsLosslessCoded, pcPatternKey, piRefY, iRefStride, &rcMv, cMvHalf, cMvQter, ruiCost ); m_pcRdCost->setCostScale( 0 ); rcMv <<= 2; rcMv += (cMvHalf <<= 1); rcMv += cMvQter; #if NH_3D_INTEGER_MV_DEPTH } #endif UInt uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.getHor(), rcMv.getVer() ); #if NH_3D_INTEGER_MV_DEPTH if( pcCU->getSlice()->getIsDepth() ) { ruiCost += m_pcRdCost->getCost( uiMvBits ); } #endif ruiBits += uiMvBits; ruiCost = (Distortion)( floor( fWeight * ( (Double)ruiCost - (Double)m_pcRdCost->getCost( uiMvBits ) ) ) + (Double)m_pcRdCost->getCost( ruiBits ) ); #if NH_MV D_PRINT_INDENT(g_traceRDCost, "ME Cost:" + n2s(ruiCost) ); D_DEC_INDENT ( g_traceModeCheck ); #endif } Void TEncSearch::xSetSearchRange ( const TComDataCU* const pcCU, const TComMv& cMvPred, const Int iSrchRng, TComMv& rcMvSrchRngLT, TComMv& rcMvSrchRngRB ) { Int iMvShift = 2; #if NH_3D_INTEGER_MV_DEPTH if( pcCU->getSlice()->getIsDepth() ) { iMvShift = 0; } #endif TComMv cTmpMvPred = cMvPred; pcCU->clipMv( cTmpMvPred ); rcMvSrchRngLT.setHor( cTmpMvPred.getHor() - (iSrchRng << iMvShift) ); rcMvSrchRngLT.setVer( cTmpMvPred.getVer() - (iSrchRng << iMvShift) ); rcMvSrchRngRB.setHor( cTmpMvPred.getHor() + (iSrchRng << iMvShift) ); rcMvSrchRngRB.setVer( cTmpMvPred.getVer() + (iSrchRng << iMvShift) ); #if NH_MV if ( m_vertRestriction ) { Int mvRestricted = ( m_pcEncCfg->getVerticalDisparitySearchRange() - 1 ) << iMvShift ; // -1 to consider subpel search if ( rcMvSrchRngRB.getVer() >= mvRestricted ) { rcMvSrchRngRB.setVer( mvRestricted ); //only positive side is restricted } } #endif pcCU->clipMv ( rcMvSrchRngLT ); pcCU->clipMv ( rcMvSrchRngRB ); #if ME_ENABLE_ROUNDING_OF_MVS rcMvSrchRngLT.divideByPowerOf2(iMvShift); rcMvSrchRngRB.divideByPowerOf2(iMvShift); #else rcMvSrchRngLT >>= iMvShift; rcMvSrchRngRB >>= iMvShift; #endif } Void TEncSearch::xPatternSearch( const TComPattern* const pcPatternKey, const Pel* piRefY, const Int iRefStride, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB, TComMv& rcMv, Distortion& ruiSAD ) { Int iSrchRngHorLeft = pcMvSrchRngLT->getHor(); Int iSrchRngHorRight = pcMvSrchRngRB->getHor(); Int iSrchRngVerTop = pcMvSrchRngLT->getVer(); Int iSrchRngVerBottom = pcMvSrchRngRB->getVer(); Distortion uiSad; Distortion uiSadBest = std::numeric_limits::max(); Int iBestX = 0; Int iBestY = 0; //-- jclee for using the SAD function pointer m_pcRdCost->setDistParam( pcPatternKey, piRefY, iRefStride, m_cDistParam ); // fast encoder decision: use subsampled SAD for integer ME if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE3 ) { if ( m_cDistParam.iRows > 8 ) { m_cDistParam.iSubShift = 1; } } piRefY += (iSrchRngVerTop * iRefStride); for ( Int y = iSrchRngVerTop; y <= iSrchRngVerBottom; y++ ) { for ( Int x = iSrchRngHorLeft; x <= iSrchRngHorRight; x++ ) { // find min. distortion position m_cDistParam.pCur = piRefY + x; setDistParamComp(COMPONENT_Y); m_cDistParam.bitDepth = pcPatternKey->getBitDepthY(); #if NH_3D_IC m_cDistParam.bUseIC = pcPatternKey->getICFlag(); #endif #if NH_3D_SDC_INTER m_cDistParam.bUseSDCMRSAD = pcPatternKey->getSDCMRSADFlag(); #endif uiSad = m_cDistParam.DistFunc( &m_cDistParam ); // motion cost uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y ); if ( uiSad < uiSadBest ) { uiSadBest = uiSad; iBestX = x; iBestY = y; m_cDistParam.m_maximumDistortionForEarlyExit = uiSad; } } piRefY += iRefStride; } rcMv.set( iBestX, iBestY ); ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY ); return; } Void TEncSearch::xPatternSearchFast( const TComDataCU* const pcCU, const TComPattern* const pcPatternKey, const Pel* const piRefY, const Int iRefStride, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB, TComMv &rcMv, Distortion &ruiSAD, const TComMv* const pIntegerMv2Nx2NPred ) { assert (MD_LEFT < NUM_MV_PREDICTORS); pcCU->getMvPredLeft ( m_acMvPredictors[MD_LEFT] ); assert (MD_ABOVE < NUM_MV_PREDICTORS); pcCU->getMvPredAbove ( m_acMvPredictors[MD_ABOVE] ); assert (MD_ABOVE_RIGHT < NUM_MV_PREDICTORS); pcCU->getMvPredAboveRight ( m_acMvPredictors[MD_ABOVE_RIGHT] ); switch ( m_motionEstimationSearchMethod ) { case MESEARCH_DIAMOND: xTZSearch( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false ); break; case MESEARCH_SELECTIVE: xTZSearchSelective( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred ); break; case MESEARCH_DIAMOND_ENHANCED: xTZSearch( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true ); break; case MESEARCH_FULL: // shouldn't get here. default: break; } } Void TEncSearch::xTZSearch( const TComDataCU* const pcCU, const TComPattern* const pcPatternKey, const Pel* const piRefY, const Int iRefStride, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB, TComMv &rcMv, Distortion &ruiSAD, const TComMv* const pIntegerMv2Nx2NPred, const Bool bExtendedSettings) { const Bool bUseAdaptiveRaster = bExtendedSettings; const Int iRaster = 5; const Bool bTestOtherPredictedMV = bExtendedSettings; const Bool bTestZeroVector = true; const Bool bTestZeroVectorStart = bExtendedSettings; const Bool bTestZeroVectorStop = false; const Bool bFirstSearchDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const Bool bFirstCornersForDiamondDist1 = bExtendedSettings; const Bool bFirstSearchStop = m_pcEncCfg->getFastMEAssumingSmootherMVEnabled(); const UInt uiFirstSearchRounds = 3; // first search stop X rounds after best match (must be >=1) const Bool bEnableRasterSearch = true; const Bool bAlwaysRasterSearch = bExtendedSettings; // true: BETTER but factor 2 slower const Bool bRasterRefinementEnable = false; // enable either raster refinement or star refinement const Bool bRasterRefinementDiamond = false; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const Bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings; const Bool bStarRefinementEnable = true; // enable either star refinement or raster refinement const Bool bStarRefinementDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const Bool bStarRefinementCornersForDiamondDist1 = bExtendedSettings; const Bool bStarRefinementStop = false; const UInt uiStarRefinementRounds = 2; // star refinement stop X rounds after best match (must be >=1) const Bool bNewZeroNeighbourhoodTest = bExtendedSettings; UInt uiSearchRange = m_iSearchRange; pcCU->clipMv( rcMv ); #if NH_3D_INTEGER_MV_DEPTH if( ! pcCU->getSlice()->getIsDepth() ) #endif #if ME_ENABLE_ROUNDING_OF_MVS rcMv.divideByPowerOf2(2); #else rcMv >>= 2; #endif // init TZSearchStruct IntTZSearchStruct cStruct; cStruct.iYStride = iRefStride; cStruct.piRefY = piRefY; cStruct.uiBestSad = MAX_UINT; // set rcMv (Median predictor) as start point and as best point xTZSearchHelp( pcPatternKey, cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 ); // test whether one of PRED_A, PRED_B, PRED_C MV is better start point than Median predictor if ( bTestOtherPredictedMV ) { for ( UInt index = 0; index < NUM_MV_PREDICTORS; index++ ) { TComMv cMv = m_acMvPredictors[index]; pcCU->clipMv( cMv ); #if NH_3D_INTEGER_MV_DEPTH if( ! pcCU->getSlice()->getIsDepth() ) { #endif #if ME_ENABLE_ROUNDING_OF_MVS cMv.divideByPowerOf2(2); #else cMv >>= 2; #endif #if NH_3D_INTEGER_MV_DEPTH } #endif if (cMv != rcMv && (cMv.getHor() != cStruct.iBestX && cMv.getVer() != cStruct.iBestY)) { // only test cMV if not obviously previously tested. xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 ); } } } // test whether zero Mv is better start point than Median predictor if ( bTestZeroVector ) { if ((rcMv.getHor() != 0 || rcMv.getVer() != 0) && (0 != cStruct.iBestX || 0 != cStruct.iBestY)) { // only test 0-vector if not obviously previously tested. xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 ); } } Int iSrchRngHorLeft = pcMvSrchRngLT->getHor(); Int iSrchRngHorRight = pcMvSrchRngRB->getHor(); Int iSrchRngVerTop = pcMvSrchRngLT->getVer(); Int iSrchRngVerBottom = pcMvSrchRngRB->getVer(); if (pIntegerMv2Nx2NPred != 0) { TComMv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred; integerMv2Nx2NPred <<= 2; pcCU->clipMv( integerMv2Nx2NPred ); #if ME_ENABLE_ROUNDING_OF_MVS integerMv2Nx2NPred.divideByPowerOf2(2); #else integerMv2Nx2NPred >>= 2; #endif if ((rcMv != integerMv2Nx2NPred) && (integerMv2Nx2NPred.getHor() != cStruct.iBestX || integerMv2Nx2NPred.getVer() != cStruct.iBestY)) { // only test integerMv2Nx2NPred if not obviously previously tested. xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0); } // reset search range TComMv cMvSrchRngLT; TComMv cMvSrchRngRB; Int iSrchRng = m_iSearchRange; TComMv currBestMv(cStruct.iBestX, cStruct.iBestY ); currBestMv <<= 2; xSetSearchRange( pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB ); iSrchRngHorLeft = cMvSrchRngLT.getHor(); iSrchRngHorRight = cMvSrchRngRB.getHor(); iSrchRngVerTop = cMvSrchRngLT.getVer(); iSrchRngVerBottom = cMvSrchRngRB.getVer(); } // start search Int iDist = 0; Int iStartX = cStruct.iBestX; Int iStartY = cStruct.iBestY; const Bool bBestCandidateZero = (cStruct.iBestX == 0) && (cStruct.iBestY == 0); // first search around best position up to now. // The following works as a "subsampled/log" window search around the best candidate for ( iDist = 1; iDist <= (Int)uiSearchRange; iDist*=2 ) { if ( bFirstSearchDiamond == 1 ) { xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 ); } else { xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist ); } if ( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion { break; } } if (!bNewZeroNeighbourhoodTest) { // test whether zero Mv is a better start point than Median predictor if ( bTestZeroVectorStart && ((cStruct.iBestX != 0) || (cStruct.iBestY != 0)) ) { xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 ); if ( (cStruct.iBestX == 0) && (cStruct.iBestY == 0) ) { // test its neighborhood for ( iDist = 1; iDist <= (Int)uiSearchRange; iDist*=2 ) { xTZ8PointDiamondSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, 0, 0, iDist, false ); if ( bTestZeroVectorStop && (cStruct.uiBestRound > 0) ) // stop criterion { break; } } } } } else { // Test also zero neighbourhood but with half the range // It was reported that the original (above) search scheme using bTestZeroVectorStart did not // make sense since one would have already checked the zero candidate earlier // and thus the conditions for that test would have not been satisfied if (bTestZeroVectorStart == true && bBestCandidateZero != true) { for ( iDist = 1; iDist <= ((Int)uiSearchRange >> 1); iDist*=2 ) { xTZ8PointDiamondSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, 0, 0, iDist, false ); if ( bTestZeroVectorStop && (cStruct.uiBestRound > 2) ) // stop criterion { break; } } } } // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB ); } // raster search if distance is too big if (bUseAdaptiveRaster) { int iWindowSize = iRaster; Int iSrchRngRasterLeft = iSrchRngHorLeft; Int iSrchRngRasterRight = iSrchRngHorRight; Int iSrchRngRasterTop = iSrchRngVerTop; Int iSrchRngRasterBottom = iSrchRngVerBottom; if (!(bEnableRasterSearch && ( ((Int)(cStruct.uiBestDistance) > iRaster)))) { iWindowSize ++; iSrchRngRasterLeft /= 2; iSrchRngRasterRight /= 2; iSrchRngRasterTop /= 2; iSrchRngRasterBottom /= 2; } cStruct.uiBestDistance = iWindowSize; for ( iStartY = iSrchRngRasterTop; iStartY <= iSrchRngRasterBottom; iStartY += iWindowSize ) { for ( iStartX = iSrchRngRasterLeft; iStartX <= iSrchRngRasterRight; iStartX += iWindowSize ) { xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, iWindowSize ); } } } else { if ( bEnableRasterSearch && ( ((Int)(cStruct.uiBestDistance) > iRaster) || bAlwaysRasterSearch ) ) { cStruct.uiBestDistance = iRaster; for ( iStartY = iSrchRngVerTop; iStartY <= iSrchRngVerBottom; iStartY += iRaster ) { for ( iStartX = iSrchRngHorLeft; iStartX <= iSrchRngHorRight; iStartX += iRaster ) { xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, iRaster ); } } } } // raster refinement if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 ) { while ( cStruct.uiBestDistance > 0 ) { iStartX = cStruct.iBestX; iStartY = cStruct.iBestY; if ( cStruct.uiBestDistance > 1 ) { iDist = cStruct.uiBestDistance >>= 1; if ( bRasterRefinementDiamond == 1 ) { xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 ); } else { xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist ); } } // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; if ( cStruct.ucPointNr != 0 ) { xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB ); } } } } // star refinement if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 ) { while ( cStruct.uiBestDistance > 0 ) { iStartX = cStruct.iBestX; iStartY = cStruct.iBestY; cStruct.uiBestDistance = 0; cStruct.ucPointNr = 0; for ( iDist = 1; iDist < (Int)uiSearchRange + 1; iDist*=2 ) { if ( bStarRefinementDiamond == 1 ) { xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 ); } else { xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist ); } if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion { break; } } // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; if ( cStruct.ucPointNr != 0 ) { xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB ); } } } } // write out best match rcMv.set( cStruct.iBestX, cStruct.iBestY ); ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY ); } Void TEncSearch::xTZSearchSelective( const TComDataCU* const pcCU, const TComPattern* const pcPatternKey, const Pel* const piRefY, const Int iRefStride, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB, TComMv &rcMv, Distortion &ruiSAD, const TComMv* const pIntegerMv2Nx2NPred ) { const Bool bTestOtherPredictedMV = true; const Bool bTestZeroVector = true; const Bool bEnableRasterSearch = true; const Bool bAlwaysRasterSearch = false; // 1: BETTER but factor 15x slower const Bool bStarRefinementEnable = true; // enable either star refinement or raster refinement const Bool bStarRefinementDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch const Bool bStarRefinementStop = false; const UInt uiStarRefinementRounds = 2; // star refinement stop X rounds after best match (must be >=1) const UInt uiSearchRange = m_iSearchRange; const Int uiSearchRangeInitial = m_iSearchRange >> 2; const Int uiSearchStep = 4; const Int iMVDistThresh = 8; Int iSrchRngHorLeft = pcMvSrchRngLT->getHor(); Int iSrchRngHorRight = pcMvSrchRngRB->getHor(); Int iSrchRngVerTop = pcMvSrchRngLT->getVer(); Int iSrchRngVerBottom = pcMvSrchRngRB->getVer(); Int iFirstSrchRngHorLeft = 0; Int iFirstSrchRngHorRight = 0; Int iFirstSrchRngVerTop = 0; Int iFirstSrchRngVerBottom = 0; Int iStartX = 0; Int iStartY = 0; Int iBestX = 0; Int iBestY = 0; Int iDist = 0; pcCU->clipMv( rcMv ); #if ME_ENABLE_ROUNDING_OF_MVS rcMv.divideByPowerOf2(2); #else rcMv >>= 2; #endif // init TZSearchStruct IntTZSearchStruct cStruct; cStruct.iYStride = iRefStride; cStruct.piRefY = piRefY; cStruct.uiBestSad = MAX_UINT; cStruct.iBestX = 0; cStruct.iBestY = 0; // set rcMv (Median predictor) as start point and as best point xTZSearchHelp( pcPatternKey, cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 ); // test whether one of PRED_A, PRED_B, PRED_C MV is better start point than Median predictor if ( bTestOtherPredictedMV ) { for ( UInt index = 0; index < NUM_MV_PREDICTORS; index++ ) { TComMv cMv = m_acMvPredictors[index]; pcCU->clipMv( cMv ); #if ME_ENABLE_ROUNDING_OF_MVS cMv.divideByPowerOf2(2); #else cMv >>= 2; #endif xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 ); } } // test whether zero Mv is better start point than Median predictor if ( bTestZeroVector ) { xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 ); } if ( pIntegerMv2Nx2NPred != 0 ) { TComMv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred; integerMv2Nx2NPred <<= 2; pcCU->clipMv( integerMv2Nx2NPred ); #if ME_ENABLE_ROUNDING_OF_MVS integerMv2Nx2NPred.divideByPowerOf2(2); #else integerMv2Nx2NPred >>= 2; #endif xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0); // reset search range TComMv cMvSrchRngLT; TComMv cMvSrchRngRB; Int iSrchRng = m_iSearchRange; TComMv currBestMv(cStruct.iBestX, cStruct.iBestY ); currBestMv <<= 2; xSetSearchRange( pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB ); iSrchRngHorLeft = cMvSrchRngLT.getHor(); iSrchRngHorRight = cMvSrchRngRB.getHor(); iSrchRngVerTop = cMvSrchRngLT.getVer(); iSrchRngVerBottom = cMvSrchRngRB.getVer(); } // Initial search iBestX = cStruct.iBestX; iBestY = cStruct.iBestY; iFirstSrchRngHorLeft = ((iBestX - uiSearchRangeInitial) > iSrchRngHorLeft) ? (iBestX - uiSearchRangeInitial) : iSrchRngHorLeft; iFirstSrchRngVerTop = ((iBestY - uiSearchRangeInitial) > iSrchRngVerTop) ? (iBestY - uiSearchRangeInitial) : iSrchRngVerTop; iFirstSrchRngHorRight = ((iBestX + uiSearchRangeInitial) < iSrchRngHorRight) ? (iBestX + uiSearchRangeInitial) : iSrchRngHorRight; iFirstSrchRngVerBottom = ((iBestY + uiSearchRangeInitial) < iSrchRngVerBottom) ? (iBestY + uiSearchRangeInitial) : iSrchRngVerBottom; for ( iStartY = iFirstSrchRngVerTop; iStartY <= iFirstSrchRngVerBottom; iStartY += uiSearchStep ) { for ( iStartX = iFirstSrchRngHorLeft; iStartX <= iFirstSrchRngHorRight; iStartX += uiSearchStep ) { xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, 0 ); xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, 1, false ); xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, 2, false ); } } Int iMaxMVDistToPred = (abs(cStruct.iBestX - iBestX) > iMVDistThresh || abs(cStruct.iBestY - iBestY) > iMVDistThresh); //full search with early exit if MV is distant from predictors if ( bEnableRasterSearch && (iMaxMVDistToPred || bAlwaysRasterSearch) ) { for ( iStartY = iSrchRngVerTop; iStartY <= iSrchRngVerBottom; iStartY += 1 ) { for ( iStartX = iSrchRngHorLeft; iStartX <= iSrchRngHorRight; iStartX += 1 ) { xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, 1 ); } } } //Smaller MV, refine around predictor else if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 ) { // start refinement while ( cStruct.uiBestDistance > 0 ) { iStartX = cStruct.iBestX; iStartY = cStruct.iBestY; cStruct.uiBestDistance = 0; cStruct.ucPointNr = 0; for ( iDist = 1; iDist < (Int)uiSearchRange + 1; iDist*=2 ) { if ( bStarRefinementDiamond == 1 ) { xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, false ); } else { xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist ); } if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion { break; } } // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1 if ( cStruct.uiBestDistance == 1 ) { cStruct.uiBestDistance = 0; if ( cStruct.ucPointNr != 0 ) { xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB ); } } } } // write out best match rcMv.set( cStruct.iBestX, cStruct.iBestY ); ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY ); } Void TEncSearch::xPatternSearchFracDIF( Bool bIsLosslessCoded, TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, TComMv* pcMvInt, TComMv& rcMvHalf, TComMv& rcMvQter, Distortion& ruiCost ) { // Reference pattern initialization (integer scale) TComPattern cPatternRoi; Int iOffset = pcMvInt->getHor() + pcMvInt->getVer() * iRefStride; cPatternRoi.initPattern(piRefY + iOffset, pcPatternKey->getROIYWidth(), pcPatternKey->getROIYHeight(), iRefStride, pcPatternKey->getBitDepthY()); // Half-pel refinement xExtDIFUpSamplingH ( &cPatternRoi ); rcMvHalf = *pcMvInt; rcMvHalf <<= 1; // for mv-cost TComMv baseRefMv(0, 0); ruiCost = xPatternRefinement( pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded ); m_pcRdCost->setCostScale( 0 ); xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf ); baseRefMv = rcMvHalf; baseRefMv <<= 1; rcMvQter = *pcMvInt; rcMvQter <<= 1; // for mv-cost rcMvQter += rcMvHalf; rcMvQter <<= 1; ruiCost = xPatternRefinement( pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded ); } //! encode residual and calculate rate-distortion for a CU block Void TEncSearch::encodeResAndCalcRdInterCU( TComDataCU* pcCU, TComYuv* pcYuvOrg, TComYuv* pcYuvPred, TComYuv* pcYuvResi, TComYuv* pcYuvResiBest, TComYuv* pcYuvRec, Bool bSkipResidual DEBUG_STRING_FN_DECLARE(sDebug) ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "encodeResAndCalcRdInterCU; Skip residual: " + n2s(bSkipResidual)); #endif assert ( !pcCU->isIntra(0) ); const UInt cuWidthPixels = pcCU->getWidth ( 0 ); const UInt cuHeightPixels = pcCU->getHeight( 0 ); const Int numValidComponents = pcCU->getPic()->getNumberValidComponents(); const TComSPS &sps=*(pcCU->getSlice()->getSPS()); // The pcCU is not marked as skip-mode at this point, and its m_pcTrCoeff, m_pcArlCoeff, m_puhCbf, m_puhTrIdx will all be 0. // due to prior calls to TComDataCU::initEstData( ); if ( bSkipResidual ) // No residual coding : SKIP mode { pcCU->setSkipFlagSubParts( true, 0, pcCU->getDepth(0) ); pcYuvResi->clear(); pcYuvPred->copyToPartYuv( pcYuvRec, 0 ); #if NH_3D_VSO Dist distortion = 0; #else Distortion distortion = 0; #endif for (Int comp=0; comp < numValidComponents; comp++) { const ComponentID compID=ComponentID(comp); const UInt csx=pcYuvOrg->getComponentScaleX(compID); const UInt csy=pcYuvOrg->getComponentScaleY(compID); #if NH_3D_VSO // M13 if ( m_pcRdCost->getUseVSO() ) { distortion += m_pcRdCost->getDistPartVSO( pcCU, 0, sps.getBitDepth(toChannelType(compID)), pcYuvRec->getAddr(compID), pcYuvRec->getStride(compID), pcYuvOrg->getAddr(compID), pcYuvOrg->getStride(compID), cuWidthPixels >> csx, cuHeightPixels >> csy, false ); } else { #endif distortion += m_pcRdCost->getDistPart( sps.getBitDepth(toChannelType(compID)), pcYuvRec->getAddr(compID), pcYuvRec->getStride(compID), pcYuvOrg->getAddr(compID), pcYuvOrg->getStride(compID), cuWidthPixels >> csx, cuHeightPixels >> csy, compID); #if NH_3D_VSO // MIgnore } #endif } m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[pcCU->getDepth(0)][CI_CURR_BEST]); m_pcEntropyCoder->resetBits(); if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()) { m_pcEntropyCoder->encodeCUTransquantBypassFlag(pcCU, 0, true); } m_pcEntropyCoder->encodeSkipFlag(pcCU, 0, true); m_pcEntropyCoder->encodeMergeIndex( pcCU, 0, true ); #if NH_3D_ARP m_pcEntropyCoder->encodeARPW( pcCU, 0 ); #endif #if NH_3D_IC m_pcEntropyCoder->encodeICFlag( pcCU, 0, true ); #endif UInt uiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); pcCU->getTotalBits() = uiBits; pcCU->getTotalDistortion() = distortion; #if NH_3D_VSO //M 14 if ( m_pcRdCost->getUseLambdaScaleVSO() ) { pcCU->getTotalCost() = m_pcRdCost->calcRdCostVSO( uiBits, distortion ); } else #endif pcCU->getTotalCost() = m_pcRdCost->calcRdCost( uiBits, distortion ); m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[pcCU->getDepth(0)][CI_TEMP_BEST]); #if DEBUG_STRING pcYuvResiBest->clear(); // Clear the residual image, if we didn't code it. for(UInt i=0; igetUseEstimatedVSD() && m_pcRdCost->getUseRenModel() ) { const UInt csx = pcYuvRec->getComponentScaleX( COMPONENT_Y ); const UInt csy = pcYuvRec->getComponentScaleY( COMPONENT_Y ); Pel* piSrc = pcYuvRec->getAddr ( COMPONENT_Y ); UInt uiSrcStride = pcYuvRec->getStride( COMPONENT_Y ); m_pcRdCost->setRenModelData( pcCU, 0, piSrc, uiSrcStride, cuWidthPixels >> csx, cuHeightPixels >> csy); } #endif #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif return; } // Residual coding. pcYuvResi->subtract( pcYuvOrg, pcYuvPred, 0, cuWidthPixels ); TComTURecurse tuLevel0(pcCU, 0); Double nonZeroCost = 0; UInt nonZeroBits = 0; #if NH_3D_VSO Dist nonZeroDistortion = 0; Dist zeroDistortion = 0; #else Distortion nonZeroDistortion = 0; Distortion zeroDistortion = 0; #endif m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ pcCU->getDepth( 0 ) ][ CI_CURR_BEST ] ); #if NH_3D_VSO // M16 // M18 if ( m_pcRdCost->getUseVSO() ) // This creating and destroying need to be fixed. { m_cYuvRecTemp.create( pcYuvPred->getWidth( COMPONENT_Y ), pcYuvPred->getHeight( COMPONENT_Y ), CHROMA_400 ); } #if ENC_DEC_TRACE && NH_MV_ENC_DEC_TRAC Bool oldTraceRDCost = g_traceRDCost; g_traceRDCost = false; Bool oldTraceModeCheck = g_traceModeCheck; g_traceModeCheck = false; Bool oldTraceFracBits = g_traceEncFracBits; g_traceEncFracBits = false; #endif xEstimateInterResidualQT( pcYuvResi, pcYuvOrg, pcYuvPred, nonZeroCost, nonZeroBits, nonZeroDistortion, &zeroDistortion, tuLevel0 DEBUG_STRING_PASS_INTO(sDebug) ); #if ENC_DEC_TRACE && NH_MV_ENC_DEC_TRAC g_traceRDCost = oldTraceRDCost; g_traceEncFracBits = oldTraceFracBits; g_traceModeCheck = oldTraceModeCheck; #endif if ( m_pcRdCost->getUseVSO() ) { m_cYuvRecTemp.destroy(); } #else xEstimateInterResidualQT( pcYuvResi, nonZeroCost, nonZeroBits, nonZeroDistortion, &zeroDistortion, tuLevel0 DEBUG_STRING_PASS_INTO(sDebug) ); #endif // ------------------------------------------------------- // set the coefficients in the pcCU, and also calculates the residual data. // If a block full of 0's is efficient, then just use 0's. // The costs at this point do not include header bits. m_pcEntropyCoder->resetBits(); m_pcEntropyCoder->encodeQtRootCbfZero( ); const UInt zeroResiBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO // M19 Double zeroCost; if( m_pcRdCost->getUseLambdaScaleVSO() ) { zeroCost = (pcCU->isLosslessCoded( 0 )) ? (nonZeroCost+1) : (m_pcRdCost->calcRdCostVSO( zeroResiBits, zeroDistortion )); } else { zeroCost = (pcCU->isLosslessCoded( 0 )) ? (nonZeroCost+1) : (m_pcRdCost->calcRdCost( zeroResiBits, zeroDistortion )); } #else const Double zeroCost = (pcCU->isLosslessCoded( 0 )) ? (nonZeroCost+1) : (m_pcRdCost->calcRdCost( zeroResiBits, zeroDistortion )); #endif #if NH_3D_SPIVMP if ( zeroCost < nonZeroCost || pcCU->getQtRootCbf(0)==0) #else if ( zeroCost < nonZeroCost || !pcCU->getQtRootCbf(0) ) #endif { const UInt uiQPartNum = tuLevel0.GetAbsPartIdxNumParts(); ::memset( pcCU->getTransformIdx() , 0, uiQPartNum * sizeof(UChar) ); for (Int comp=0; comp < numValidComponents; comp++) { const ComponentID component = ComponentID(comp); ::memset( pcCU->getCbf( component ) , 0, uiQPartNum * sizeof(UChar) ); ::memset( pcCU->getCrossComponentPredictionAlpha(component), 0, ( uiQPartNum * sizeof(SChar) ) ); } static const UInt useTS[MAX_NUM_COMPONENT]={0,0,0}; pcCU->setTransformSkipSubParts ( useTS, 0, pcCU->getDepth(0) ); #if DEBUG_STRING sDebug.clear(); for(UInt i=0; iload( m_pppcRDSbacCoder[pcCU->getDepth(0)][CI_CURR_BEST] ); UInt finalBits = 0; xAddSymbolBitsInter( pcCU, finalBits ); // we've now encoded the pcCU, and so have a valid bit cost if ( !pcCU->getQtRootCbf( 0 ) ) { pcYuvResiBest->clear(); // Clear the residual image, if we didn't code it. } else { xSetInterResidualQTData( pcYuvResiBest, true, tuLevel0 ); // else set the residual image data pcYUVResiBest from the various temp images. } m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ pcCU->getDepth( 0 ) ][ CI_TEMP_BEST ] ); pcYuvRec->addClip ( pcYuvPred, pcYuvResiBest, 0, cuWidthPixels, sps.getBitDepths() ); // update with clipped distortion and cost (previously unclipped reconstruction values were used) #if NH_3D_VSO Dist finalDistortion = 0; #else Distortion finalDistortion = 0; #endif for(Int comp=0; compgetUseVSO() ) { finalDistortion += m_pcRdCost->getDistPartVSO ( pcCU, 0, sps.getBitDepth(toChannelType(compID)), pcYuvRec->getAddr( compID ), pcYuvRec->getStride( compID ), pcYuvOrg->getAddr( compID ), pcYuvOrg->getStride( compID ), cuWidthPixels >> pcYuvOrg->getComponentScaleX(compID), cuHeightPixels >> pcYuvOrg->getComponentScaleY(compID), false ); } else { #endif finalDistortion += m_pcRdCost->getDistPart( sps.getBitDepth(toChannelType(compID)), pcYuvRec->getAddr(compID ), pcYuvRec->getStride(compID ), pcYuvOrg->getAddr(compID ), pcYuvOrg->getStride(compID), cuWidthPixels >> pcYuvOrg->getComponentScaleX(compID), cuHeightPixels >> pcYuvOrg->getComponentScaleY(compID), compID); #if NH_3D_VSO // M23 } #endif } pcCU->getTotalBits() = finalBits; pcCU->getTotalDistortion() = finalDistortion; #if NH_3D_VSO if ( m_pcRdCost->getUseLambdaScaleVSO() ) { pcCU->getTotalCost() = m_pcRdCost->calcRdCostVSO( finalBits, finalDistortion ); } else #endif pcCU->getTotalCost() = m_pcRdCost->calcRdCost( finalBits, finalDistortion ); #if NH_3D_VSO // M24 // necessary?? if( m_pcRdCost->getUseRenModel() && !m_pcRdCost->getUseEstimatedVSD() ) { Pel* piSrc = pcYuvRec->getAddr ( COMPONENT_Y ); UInt uiSrcStride = pcYuvRec->getStride( COMPONENT_Y ); m_pcRdCost->setRenModelData( pcCU, 0, piSrc, uiSrcStride, cuWidthPixels >> pcYuvOrg->getComponentScaleX(COMPONENT_Y), cuHeightPixels >> pcYuvOrg->getComponentScaleY(COMPONENT_Y)); } #endif #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } #if NH_3D_SDC_INTER Void TEncSearch::encodeResAndCalcRdInterSDCCU( TComDataCU* pcCU, TComYuv* pcOrg, TComYuv* pcPred, TComYuv* pcResi, TComYuv* pcRec, Int uiOffest, const UInt uiDepth ) { if( !pcCU->getSlice()->getIsDepth() || pcCU->isIntra( 0 ) ) { return; } pcCU->setSDCFlagSubParts( true, 0, uiDepth ); UInt uiWidth = pcCU->getWidth ( 0 ); UInt uiHeight = pcCU->getHeight( 0 ); UInt uiSegSize = 0; Pel *pPred, *pOrg; UInt uiPredStride = pcPred->getStride( COMPONENT_Y ); UInt uiOrgStride = pcOrg->getStride( COMPONENT_Y ); UInt uiPelX, uiPelY; pPred = pcPred->getAddr( COMPONENT_Y ); pOrg = pcOrg->getAddr( COMPONENT_Y ); Int pResDC = 0; Int bitDepthY = pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); Int bitDepthC = pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_CHROMA); //calculate dc value for prediction and original signal, and calculate residual and reconstruction for( uiPelY = 0; uiPelY < uiHeight; uiPelY++ ) { for( uiPelX = 0; uiPelX < uiWidth; uiPelX++ ) { pResDC += (Int)( pOrg [uiPelX] - pPred[uiPelX] ); uiSegSize++; } pOrg += uiOrgStride; pPred += uiPredStride; } Int iResiOffset = ( pResDC > 0 ? ( uiSegSize >> 1 ) : -1*( uiSegSize >> 1 ) ); pResDC = ( pResDC + iResiOffset ) / (Int) uiSegSize; pcCU->setSDCSegmentDCOffset( pResDC + uiOffest, 0, 0 ); Pel *pRec; UInt uiRecStride = pcRec->getStride( COMPONENT_Y ); pPred = pcPred->getAddr( COMPONENT_Y ); pRec = pcRec->getAddr( COMPONENT_Y ); for( uiPelY = 0; uiPelY < uiHeight; uiPelY++ ) { for( uiPelX = 0; uiPelX < uiWidth; uiPelX++ ) { pRec[ uiPelX ] = Clip3( 0, (1 << bitDepthY) - 1, pPred[uiPelX] + pcCU->getSDCSegmentDCOffset(0, 0) ); } pPred += uiPredStride; pRec += uiRecStride; } // clear UV UInt uiStrideC = pcRec->getStride( COMPONENT_Cb ); Pel *pRecCb = pcRec->getAddr( COMPONENT_Cb ); Pel *pRecCr = pcRec->getAddr( COMPONENT_Cr ); for (Int y=0; y < uiHeight/2; y++) { for (Int x=0; x < uiWidth/2; x++) { pRecCb[x] = (Pel)( 1 << ( bitDepthC - 1 ) ); pRecCr[x] = (Pel)( 1 << ( bitDepthC - 1 ) ); } pRecCb += uiStrideC; pRecCr += uiStrideC; } #if NH_3D_VSO Dist ruiDist; #else Distortion ruiDist; #endif Double rdCost; #if NH_3D_VSO // M13 if ( m_pcRdCost->getUseVSO() ) { ruiDist = m_pcRdCost->getDistPartVSO( pcCU, 0, bitDepthY, pcRec->getAddr( COMPONENT_Y ), pcRec->getStride( COMPONENT_Y ), pcOrg->getAddr( COMPONENT_Y ), pcOrg->getStride( COMPONENT_Y ), uiWidth, uiHeight , false ); } else { #endif { ruiDist = m_pcRdCost->getDistPart( bitDepthY, pcRec->getAddr( COMPONENT_Y ), uiRecStride, pcOrg->getAddr( COMPONENT_Y ), uiOrgStride, uiWidth, uiHeight, COMPONENT_Y ); } #if NH_3D_VSO } #endif Bool bNonSkip = false; bNonSkip |= ( pcCU->getSDCSegmentDCOffset( 0, 0 ) != 0 ) ? 1 : 0; if( !bNonSkip ) { pcCU->getTotalBits() = MAX_INT; pcCU->getTotalDistortion() = MAX_INT; pcCU->getTotalCost() = MAX_DOUBLE; } else { //----- determine rate and r-d cost ----- UInt uiBits = 0; m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[pcCU->getDepth(0)][CI_CURR_BEST] ); xAddSymbolBitsInter( pcCU, uiBits ); #if NH_3D_VSO //M 14 if ( m_pcRdCost->getUseLambdaScaleVSO() ) { rdCost = m_pcRdCost->calcRdCostVSO( uiBits, ruiDist ); } else #endif { rdCost = m_pcRdCost->calcRdCost( uiBits, ruiDist ); } pcCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits(); pcCU->getTotalDistortion() = ruiDist; pcCU->getTotalCost() = rdCost; m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ pcCU->getDepth( 0 ) ][ CI_TEMP_BEST ] ); } #if NH_3D_VSO // necessary? // M15 // set Model if( !m_pcRdCost->getUseEstimatedVSD() && m_pcRdCost->getUseRenModel() ) { Pel* piSrc = pcRec->getAddr( COMPONENT_Y ); UInt uiSrcStride = pcRec->getStride( COMPONENT_Y ); m_pcRdCost->setRenModelData( pcCU, 0, piSrc, uiSrcStride, uiWidth, uiHeight ); } #endif } #endif Void TEncSearch::xEstimateInterResidualQT( TComYuv *pcResi, #if NH_3D_VSO // M25 TComYuv *pcOrg, TComYuv *pcPred, #endif Double &rdCost, UInt &ruiBits, #if NH_3D_VSO Dist &ruiDist, Dist *puiZeroDist, #else Distortion &ruiDist, Distortion *puiZeroDist, #endif TComTU &rTu DEBUG_STRING_FN_DECLARE(sDebug) ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "xEstimateInterResidualQT"); #endif TComDataCU *pcCU = rTu.getCU(); const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); const UInt uiDepth = rTu.GetTransformDepthTotal(); const UInt uiTrMode = rTu.GetTransformDepthRel(); const UInt subTUDepth = uiTrMode + 1; const UInt numValidComp = pcCU->getPic()->getNumberValidComponents(); DEBUG_STRING_NEW(sSingleStringComp[MAX_NUM_COMPONENT]) assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) ); const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); UInt SplitFlag = ((pcCU->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && pcCU->isInter(uiAbsPartIdx) && ( pcCU->getPartitionSize(uiAbsPartIdx) != SIZE_2Nx2N )); #if DEBUG_STRING const Int debugPredModeMask = DebugStringGetPredModeMask(pcCU->getPredictionMode(uiAbsPartIdx)); #endif Bool bCheckFull; if ( SplitFlag && uiDepth == pcCU->getDepth(uiAbsPartIdx) && ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ) ) { bCheckFull = false; } else { bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() ); } const Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ); assert( bCheckFull || bCheckSplit ); // code full block Double dSingleCost = MAX_DOUBLE; UInt uiSingleBits = 0; #if NH_3D_VSO Dist uiSingleDistComp [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}}; Dist uiSingleDist = 0; #else Distortion uiSingleDistComp [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}}; Distortion uiSingleDist = 0; #endif TCoeff uiAbsSum [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}}; UInt uiBestTransformMode [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}}; // Stores the best explicit RDPCM mode for a TU encoded without split UInt bestExplicitRdpcmModeUnSplit[MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{3,3}, {3,3}, {3,3}}; SChar bestCrossCPredictionAlpha [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}}; m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] ); if( bCheckFull ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "bCheckFull" ); #endif Double minCost[MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/]; Bool checkTransformSkip[MAX_NUM_COMPONENT]; pcCU->setTrIdxSubParts( uiTrMode, uiAbsPartIdx, uiDepth ); m_pcEntropyCoder->resetBits(); memset( m_pTempPel, 0, sizeof( Pel ) * rTu.getRect(COMPONENT_Y).width * rTu.getRect(COMPONENT_Y).height ); // not necessary needed for inside of recursion (only at the beginning) const UInt uiQTTempAccessLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; TCoeff *pcCoeffCurr[MAX_NUM_COMPONENT]; #if ADAPTIVE_QP_SELECTION TCoeff *pcArlCoeffCurr[MAX_NUM_COMPONENT]; #endif for(UInt i=0; igetSlice()->getSPS()->getBitDepth(toChannelType(compID)); pcCoeffCurr[compID] = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID); #if ADAPTIVE_QP_SELECTION pcArlCoeffCurr[compID] = m_ppcQTTempArlCoeff[compID ][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID); #endif if(rTu.ProcessComponentSection(compID)) { const QpParam cQP(*pcCU, compID); checkTransformSkip[compID] = pcCU->getSlice()->getPPS()->getUseTransformSkip() && TUCompRectHasAssociatedTransformSkipFlag(rTu.getRect(compID), pcCU->getSlice()->getPPS()->getPpsRangeExtension().getLog2MaxTransformSkipBlockSize()) && (!pcCU->isLosslessCoded(0)); const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height; TComTURecurse TUIterator(rTu, false, (splitIntoSubTUs ? TComTU::VERTICAL_SPLIT : TComTU::DONT_SPLIT), true, compID); const UInt partIdxesPerSubTU = TUIterator.GetAbsPartIdxNumParts(compID); do { const UInt subTUIndex = TUIterator.GetSectionNumber(); const UInt subTUAbsPartIdx = TUIterator.GetAbsPartIdxTU(compID); const TComRectangle &tuCompRect = TUIterator.getRect(compID); const UInt subTUBufferOffset = tuCompRect.width * tuCompRect.height * subTUIndex; TCoeff *currentCoefficients = pcCoeffCurr[compID] + subTUBufferOffset; #if ADAPTIVE_QP_SELECTION TCoeff *currentARLCoefficients = pcArlCoeffCurr[compID] + subTUBufferOffset; #endif const Bool isCrossCPredictionAvailable = isChroma(compID) && pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && (pcCU->getCbf(subTUAbsPartIdx, COMPONENT_Y, uiTrMode) != 0); SChar preCalcAlpha = 0; const Pel *pLumaResi = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( COMPONENT_Y, rTu.getRect( COMPONENT_Y ).x0, rTu.getRect( COMPONENT_Y ).y0 ); if (isCrossCPredictionAvailable) { const Bool bUseReconstructedResidualForEstimate = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate(); const Pel *const lumaResidualForEstimate = bUseReconstructedResidualForEstimate ? pLumaResi : pcResi->getAddrPix(COMPONENT_Y, tuCompRect.x0, tuCompRect.y0); const UInt lumaResidualStrideForEstimate = bUseReconstructedResidualForEstimate ? m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y) : pcResi->getStride(COMPONENT_Y); preCalcAlpha = xCalcCrossComponentPredictionAlpha(TUIterator, compID, lumaResidualForEstimate, pcResi->getAddrPix(compID, tuCompRect.x0, tuCompRect.y0), tuCompRect.width, tuCompRect.height, lumaResidualStrideForEstimate, pcResi->getStride(compID)); } const Int transformSkipModesToTest = checkTransformSkip[compID] ? 2 : 1; const Int crossCPredictionModesToTest = (preCalcAlpha != 0) ? 2 : 1; // preCalcAlpha cannot be anything other than 0 if isCrossCPredictionAvailable is false const Bool isOneMode = (crossCPredictionModesToTest == 1) && (transformSkipModesToTest == 1); for (Int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++) { #if NH_MV D_PRINT_INC_INDENT( g_traceModeCheck && ( transformSkipModeId > 0) , "TransformSkipModeId: " + n2s(transformSkipModeId) ); #endif pcCU->setTransformSkipPartRange(transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU); for (Int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++) { const Bool isFirstMode = (transformSkipModeId == 0) && (crossCPredictionModeId == 0); const Bool bUseCrossCPrediction = crossCPredictionModeId != 0; #if NH_MV D_PRINT_INC_INDENT( g_traceModeCheck, "Zero" ); #endif m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] ); m_pcEntropyCoder->resetBits(); pcCU->setTransformSkipPartRange(transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU); pcCU->setCrossComponentPredictionAlphaPartRange((bUseCrossCPrediction ? preCalcAlpha : 0), compID, subTUAbsPartIdx, partIdxesPerSubTU ); if ((compID != COMPONENT_Cr) && ((transformSkipModeId == 1) ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ())) { m_pcEntropyCoder->estimateBit(m_pcTrQuant->m_pcEstBitsSbac, tuCompRect.width, tuCompRect.height, toChannelType(compID)); } #if RDOQ_CHROMA_LAMBDA m_pcTrQuant->selectLambda(compID); #endif Pel *pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0); UInt resiStride = m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID); TCoeff bestCoeffComp [MAX_TU_SIZE*MAX_TU_SIZE]; Pel bestResiComp [MAX_TU_SIZE*MAX_TU_SIZE]; #if ADAPTIVE_QP_SELECTION TCoeff bestArlCoeffComp[MAX_TU_SIZE*MAX_TU_SIZE]; #endif TCoeff currAbsSum = 0; UInt currCompBits = 0; #if NH_3D_VSO Dist currCompDist = 0; #else Distortion currCompDist = 0; #endif Double currCompCost = 0; UInt nonCoeffBits = 0; #if NH_3D_VSO Dist nonCoeffDist = 0; #else Distortion nonCoeffDist = 0; #endif Double nonCoeffCost = 0; if(!isOneMode && !isFirstMode) { memcpy(bestCoeffComp, currentCoefficients, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height)); #if ADAPTIVE_QP_SELECTION memcpy(bestArlCoeffComp, currentARLCoefficients, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height)); #endif for(Int y = 0; y < tuCompRect.height; y++) { memcpy(&bestResiComp[y * tuCompRect.width], (pcResiCurrComp + (y * resiStride)), (sizeof(Pel) * tuCompRect.width)); } } if (bUseCrossCPrediction) { TComTrQuant::crossComponentPrediction(TUIterator, compID, pLumaResi, pcResi->getAddrPix(compID, tuCompRect.x0, tuCompRect.y0), crossCPredictedResidualBuffer, tuCompRect.width, tuCompRect.height, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y), pcResi->getStride(compID), tuCompRect.width, false); m_pcTrQuant->transformNxN(TUIterator, compID, crossCPredictedResidualBuffer, tuCompRect.width, currentCoefficients, #if ADAPTIVE_QP_SELECTION currentARLCoefficients, #endif currAbsSum, cQP); } else { m_pcTrQuant->transformNxN(TUIterator, compID, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), currentCoefficients, #if ADAPTIVE_QP_SELECTION currentARLCoefficients, #endif currAbsSum, cQP); } if(isFirstMode || (currAbsSum == 0)) { if (bUseCrossCPrediction) { TComTrQuant::crossComponentPrediction(TUIterator, compID, pLumaResi, m_pTempPel, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0), tuCompRect.width, tuCompRect.height, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y), tuCompRect.width, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), true); #if NH_3D_VSO // No VSO needed here. assert( !m_pcRdCost->getUseVSO() ); #endif nonCoeffDist = m_pcRdCost->getDistPart( channelBitDepth, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride( compID ), pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); // initialized with zero residual distortion } else { #if NH_3D_VSO // M27 if ( m_pcRdCost->getUseVSO() ) { if( m_pcRdCost->getUseEstimatedVSD() ) { nonCoeffDist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPartIdx, channelBitDepth, m_pTempPel, tuCompRect.width, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride( compID ), tuCompRect.width, tuCompRect.height, false ); } else { nonCoeffDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPartIdx, channelBitDepth, pcPred->getAddr( compID, uiAbsPartIdx ), pcPred->getStride( compID ), pcOrg->getAddr( compID, uiAbsPartIdx), pcOrg->getStride( compID ), tuCompRect.width, tuCompRect.height, false ); // initialized with zero residual distortion } } else #endif nonCoeffDist = m_pcRdCost->getDistPart( channelBitDepth, m_pTempPel, tuCompRect.width, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); // initialized with zero residual distortion } m_pcEntropyCoder->encodeQtCbfZero( TUIterator, toChannelType(compID) ); if ( isCrossCPredictionAvailable ) { m_pcEntropyCoder->encodeCrossComponentPrediction( TUIterator, compID ); } nonCoeffBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO // M29 if ( m_pcRdCost->getUseLambdaScaleVSO()) { nonCoeffCost = m_pcRdCost->calcRdCostVSO( nonCoeffBits, nonCoeffDist ); } else #endif nonCoeffCost = m_pcRdCost->calcRdCost( nonCoeffBits, nonCoeffDist ); } if((puiZeroDist != NULL) && isFirstMode) { *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion } DEBUG_STRING_NEW(sSingleStringTest) #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif if( currAbsSum > 0 ) //if non-zero coefficients are present, a residual needs to be derived for further prediction { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "NonZero"); #endif if (isFirstMode) { m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] ); m_pcEntropyCoder->resetBits(); } m_pcEntropyCoder->encodeQtCbf( TUIterator, compID, true ); if (isCrossCPredictionAvailable) { m_pcEntropyCoder->encodeCrossComponentPrediction( TUIterator, compID ); } m_pcEntropyCoder->encodeCoeffNxN( TUIterator, currentCoefficients, compID ); currCompBits = m_pcEntropyCoder->getNumberOfWrittenBits(); pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ); m_pcTrQuant->invTransformNxN( TUIterator, compID, pcResiCurrComp, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), currentCoefficients, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sSingleStringTest, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) ); if (bUseCrossCPrediction) { #if NH_3D_VSO assert( !m_pcRdCost->getUseVSO() ); #endif TComTrQuant::crossComponentPrediction(TUIterator, compID, pLumaResi, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0), m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0), tuCompRect.width, tuCompRect.height, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y), m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID ), m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID ), true); } #if NH_3D_VSO // M28 if ( m_pcRdCost->getUseVSO() ) { if ( m_pcRdCost->getUseEstimatedVSD() ) { currCompDist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPartIdx, channelBitDepth, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, false); } else { m_cYuvRecTemp.addClipPartLuma( channelBitDepth, &m_pcQTTempTComYuv[uiQTTempAccessLayer], pcPred, uiAbsPartIdx, tuCompRect.width ); currCompDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPartIdx, channelBitDepth, m_cYuvRecTemp.getAddr(compID, uiAbsPartIdx), m_cYuvRecTemp.getStride( compID), pcOrg->getAddr ( compID, uiAbsPartIdx ), pcOrg->getStride( compID ), tuCompRect.width, tuCompRect.height, false ); } } else #endif currCompDist = m_pcRdCost->getDistPart( channelBitDepth, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); #if NH_3D_VSO // M NEW01 if ( m_pcRdCost->getUseLambdaScaleVSO() ) { currCompCost = m_pcRdCost->calcRdCostVSO(currCompBits, currCompDist); } else #endif currCompCost = m_pcRdCost->calcRdCost(currCompBits, currCompDist); if (pcCU->isLosslessCoded(0)) { nonCoeffCost = MAX_DOUBLE; } } else if ((transformSkipModeId == 1) && !bUseCrossCPrediction) { currCompCost = MAX_DOUBLE; } else { currCompBits = nonCoeffBits; currCompDist = nonCoeffDist; currCompCost = nonCoeffCost; } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif // evaluate if ((currCompCost < minCost[compID][subTUIndex]) || ((transformSkipModeId == 1) && (currCompCost == minCost[compID][subTUIndex]))) { bestExplicitRdpcmModeUnSplit[compID][subTUIndex] = pcCU->getExplicitRdpcmMode(compID, subTUAbsPartIdx); if(isFirstMode) //check for forced null { if((nonCoeffCost < currCompCost) || (currAbsSum == 0)) { memset(currentCoefficients, 0, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height)); currAbsSum = 0; currCompBits = nonCoeffBits; currCompDist = nonCoeffDist; currCompCost = nonCoeffCost; } } #if DEBUG_STRING if (currAbsSum > 0) { DEBUG_STRING_SWAP(sSingleStringComp[compID], sSingleStringTest) } else { sSingleStringComp[compID].clear(); } #endif uiAbsSum [compID][subTUIndex] = currAbsSum; uiSingleDistComp [compID][subTUIndex] = currCompDist; minCost [compID][subTUIndex] = currCompCost; uiBestTransformMode [compID][subTUIndex] = transformSkipModeId; bestCrossCPredictionAlpha[compID][subTUIndex] = (crossCPredictionModeId == 1) ? pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) : 0; if (uiAbsSum[compID][subTUIndex] == 0) { if (bUseCrossCPrediction) { TComTrQuant::crossComponentPrediction(TUIterator, compID, pLumaResi, m_pTempPel, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0), tuCompRect.width, tuCompRect.height, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y), tuCompRect.width, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), true); } else { pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0); const UInt uiStride = m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID); for(UInt uiY = 0; uiY < tuCompRect.height; uiY++) { memset(pcResiCurrComp, 0, (sizeof(Pel) * tuCompRect.width)); pcResiCurrComp += uiStride; } } } } else { // reset memcpy(currentCoefficients, bestCoeffComp, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height)); #if ADAPTIVE_QP_SELECTION memcpy(currentARLCoefficients, bestArlCoeffComp, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height)); #endif for (Int y = 0; y < tuCompRect.height; y++) { memcpy((pcResiCurrComp + (y * resiStride)), &bestResiComp[y * tuCompRect.width], (sizeof(Pel) * tuCompRect.width)); } } } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } pcCU->setExplicitRdpcmModePartRange ( bestExplicitRdpcmModeUnSplit[compID][subTUIndex], compID, subTUAbsPartIdx, partIdxesPerSubTU); pcCU->setTransformSkipPartRange ( uiBestTransformMode [compID][subTUIndex], compID, subTUAbsPartIdx, partIdxesPerSubTU ); pcCU->setCbfPartRange ((((uiAbsSum [compID][subTUIndex] > 0) ? 1 : 0) << uiTrMode), compID, subTUAbsPartIdx, partIdxesPerSubTU ); pcCU->setCrossComponentPredictionAlphaPartRange( bestCrossCPredictionAlpha [compID][subTUIndex], compID, subTUAbsPartIdx, partIdxesPerSubTU ); } while (TUIterator.nextSection(rTu)); //end of sub-TU loop } // processing section } // component loop #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "Final"); #endif for(UInt ch = 0; ch < numValidComp; ch++) { const ComponentID compID = ComponentID(ch); if (rTu.ProcessComponentSection(compID) && (rTu.getRect(compID).width != rTu.getRect(compID).height)) { offsetSubTUCBFs(rTu, compID); //the CBFs up to now have been defined for two sub-TUs - shift them down a level and replace with the parent level CBF } } m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] ); m_pcEntropyCoder->resetBits(); if( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ) { m_pcEntropyCoder->encodeTransformSubdivFlag( 0, 5 - uiLog2TrSize ); } for(UInt ch = 0; ch < numValidComp; ch++) { const UInt chOrderChange = ((ch + 1) == numValidComp) ? 0 : (ch + 1); const ComponentID compID=ComponentID(chOrderChange); if( rTu.ProcessComponentSection(compID) ) { m_pcEntropyCoder->encodeQtCbf( rTu, compID, true ); } } for(UInt ch = 0; ch < numValidComp; ch++) { const ComponentID compID=ComponentID(ch); if (rTu.ProcessComponentSection(compID)) { if(isChroma(compID) && (uiAbsSum[COMPONENT_Y][0] != 0)) { m_pcEntropyCoder->encodeCrossComponentPrediction( rTu, compID ); } m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeffCurr[compID], compID ); for (UInt subTUIndex = 0; subTUIndex < 2; subTUIndex++) { uiSingleDist += uiSingleDistComp[compID][subTUIndex]; } } } uiSingleBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO if ( m_pcRdCost->getUseLambdaScaleVSO() ) { dSingleCost = m_pcRdCost->calcRdCostVSO( uiSingleBits, uiSingleDist ); } else #endif dSingleCost = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDist ); #if NH_MV D_DEC_INDENT( g_traceModeCheck ); D_DEC_INDENT( g_traceModeCheck ); #endif } // check full // code sub-blocks if( bCheckSplit ) { #if NH_MV D_PRINT_INC_INDENT(g_traceModeCheck, "bCheckSplit" ); #endif if( bCheckFull ) { m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_TEST ] ); m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] ); } #if NH_3D_VSO Dist uiSubdivDist = 0; #else Distortion uiSubdivDist = 0; #endif UInt uiSubdivBits = 0; Double dSubdivCost = 0.0; //save the non-split CBFs in case we need to restore them later UInt bestCBF [MAX_NUM_COMPONENT]; UInt bestsubTUCBF[MAX_NUM_COMPONENT][2]; for(UInt ch = 0; ch < numValidComp; ch++) { const ComponentID compID=ComponentID(ch); if (rTu.ProcessComponentSection(compID)) { bestCBF[compID] = pcCU->getCbf(uiAbsPartIdx, compID, uiTrMode); const TComRectangle &tuCompRect = rTu.getRect(compID); if (tuCompRect.width != tuCompRect.height) { const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> 1; for (UInt subTU = 0; subTU < 2; subTU++) { bestsubTUCBF[compID][subTU] = pcCU->getCbf ((uiAbsPartIdx + (subTU * partIdxesPerSubTU)), compID, subTUDepth); } } } } TComTURecurse tuRecurseChild(rTu, false); const UInt uiQPartNumSubdiv = tuRecurseChild.GetAbsPartIdxNumParts(); DEBUG_STRING_NEW(sSplitString[MAX_NUM_COMPONENT]) do { DEBUG_STRING_NEW(childString) #if NH_3D_VSO xEstimateInterResidualQT( pcResi, pcOrg, pcPred, dSubdivCost, uiSubdivBits, uiSubdivDist, bCheckFull ? NULL : puiZeroDist, tuRecurseChild DEBUG_STRING_PASS_INTO(childString)); #else xEstimateInterResidualQT( pcResi, dSubdivCost, uiSubdivBits, uiSubdivDist, bCheckFull ? NULL : puiZeroDist, tuRecurseChild DEBUG_STRING_PASS_INTO(childString)); #endif #if DEBUG_STRING // split the string by component and append to the relevant output (because decoder decodes in channel order, whereas this search searches by TU-order) std::size_t lastPos=0; const std::size_t endStrng=childString.find(debug_reorder_data_inter_token[MAX_NUM_COMPONENT], lastPos); for(UInt ch = 0; ch < numValidComp; ch++) { if (lastPos!=std::string::npos && childString.find(debug_reorder_data_inter_token[ch], lastPos)==lastPos) { lastPos+=strlen(debug_reorder_data_inter_token[ch]); // skip leading string } std::size_t pos=childString.find(debug_reorder_data_inter_token[ch+1], lastPos); if (pos!=std::string::npos && pos>endStrng) { lastPos=endStrng; } sSplitString[ch]+=childString.substr(lastPos, (pos==std::string::npos)? std::string::npos : (pos-lastPos) ); lastPos=pos; } #endif } while ( tuRecurseChild.nextSection(rTu) ) ; UInt uiCbfAny=0; for(UInt ch = 0; ch < numValidComp; ch++) { UInt uiYUVCbf = 0; for( UInt ui = 0; ui < 4; ++ui ) { uiYUVCbf |= pcCU->getCbf( uiAbsPartIdx + ui * uiQPartNumSubdiv, ComponentID(ch), uiTrMode + 1 ); } UChar *pBase=pcCU->getCbf( ComponentID(ch) ); const UInt flags=uiYUVCbf << uiTrMode; for( UInt ui = 0; ui < 4 * uiQPartNumSubdiv; ++ui ) { pBase[uiAbsPartIdx + ui] |= flags; } uiCbfAny|=uiYUVCbf; } m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] ); m_pcEntropyCoder->resetBits(); // when compID isn't a channel, code Cbfs: xEncodeInterResidualQT( MAX_NUM_COMPONENT, rTu ); for(UInt ch = 0; ch < numValidComp; ch++) { xEncodeInterResidualQT( ComponentID(ch), rTu ); } uiSubdivBits = m_pcEntropyCoder->getNumberOfWrittenBits(); #if NH_3D_VSO if( m_pcRdCost->getUseLambdaScaleVSO()) { dSubdivCost = m_pcRdCost->calcRdCostVSO( uiSubdivBits, uiSubdivDist ); } else #endif dSubdivCost = m_pcRdCost->calcRdCost( uiSubdivBits, uiSubdivDist ); if (!bCheckFull || (uiCbfAny && (dSubdivCost < dSingleCost))) { rdCost += dSubdivCost; ruiBits += uiSubdivBits; ruiDist += uiSubdivDist; #if DEBUG_STRING for(UInt ch = 0; ch < numValidComp; ch++) { DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[ch]) DEBUG_STRING_APPEND(sDebug, sSplitString[ch]) } #endif } else { rdCost += dSingleCost; ruiBits += uiSingleBits; ruiDist += uiSingleDist; //restore state to unsplit pcCU->setTrIdxSubParts( uiTrMode, uiAbsPartIdx, uiDepth ); for(UInt ch = 0; ch < numValidComp; ch++) { const ComponentID compID=ComponentID(ch); DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[ch]) if (rTu.ProcessComponentSection(compID)) { DEBUG_STRING_APPEND(sDebug, sSingleStringComp[compID]) const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height; const UInt numberOfSections = splitIntoSubTUs ? 2 : 1; const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> (splitIntoSubTUs ? 1 : 0); for (UInt subTUIndex = 0; subTUIndex < numberOfSections; subTUIndex++) { const UInt uisubTUPartIdx = uiAbsPartIdx + (subTUIndex * partIdxesPerSubTU); if (splitIntoSubTUs) { const UChar combinedCBF = (bestsubTUCBF[compID][subTUIndex] << subTUDepth) | (bestCBF[compID] << uiTrMode); pcCU->setCbfPartRange(combinedCBF, compID, uisubTUPartIdx, partIdxesPerSubTU); } else { pcCU->setCbfPartRange((bestCBF[compID] << uiTrMode), compID, uisubTUPartIdx, partIdxesPerSubTU); } pcCU->setCrossComponentPredictionAlphaPartRange(bestCrossCPredictionAlpha[compID][subTUIndex], compID, uisubTUPartIdx, partIdxesPerSubTU); pcCU->setTransformSkipPartRange(uiBestTransformMode[compID][subTUIndex], compID, uisubTUPartIdx, partIdxesPerSubTU); pcCU->setExplicitRdpcmModePartRange(bestExplicitRdpcmModeUnSplit[compID][subTUIndex], compID, uisubTUPartIdx, partIdxesPerSubTU); } } } m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_TEST ] ); } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif } else { rdCost += dSingleCost; ruiBits += uiSingleBits; ruiDist += uiSingleDist; #if DEBUG_STRING for(UInt ch = 0; ch < numValidComp; ch++) { const ComponentID compID=ComponentID(ch); DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[compID]) if (rTu.ProcessComponentSection(compID)) { DEBUG_STRING_APPEND(sDebug, sSingleStringComp[compID]) } } #endif } #if NH_MV D_DEC_INDENT( g_traceModeCheck ); #endif DEBUG_STRING_APPEND(sDebug, debug_reorder_data_inter_token[MAX_NUM_COMPONENT]) } Void TEncSearch::xEncodeInterResidualQT( const ComponentID compID, TComTU &rTu ) { TComDataCU* pcCU=rTu.getCU(); const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU(); const UInt uiCurrTrMode = rTu.GetTransformDepthRel(); assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) ); const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); const Bool bSubdiv = uiCurrTrMode != uiTrMode; const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); if (compID==MAX_NUM_COMPONENT) // we are not processing a channel, instead we always recurse and code the CBFs { if( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ) { if((pcCU->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && (pcCU->getPartitionSize(uiAbsPartIdx) != SIZE_2Nx2N)) { assert(bSubdiv); // Inferred splitting rule - see derivation and use of interSplitFlag in the specification. } else { m_pcEntropyCoder->encodeTransformSubdivFlag( bSubdiv, 5 - uiLog2TrSize ); } } assert( !pcCU->isIntra(uiAbsPartIdx) ); const Bool bFirstCbfOfCU = uiCurrTrMode == 0; for (UInt ch=COMPONENT_Cb; chgetPic()->getNumberValidComponents(); ch++) { const ComponentID compIdInner=ComponentID(ch); if( bFirstCbfOfCU || rTu.ProcessingAllQuadrants(compIdInner) ) { if( bFirstCbfOfCU || pcCU->getCbf( uiAbsPartIdx, compIdInner, uiCurrTrMode - 1 ) ) { m_pcEntropyCoder->encodeQtCbf( rTu, compIdInner, !bSubdiv ); } } else { assert( pcCU->getCbf( uiAbsPartIdx, compIdInner, uiCurrTrMode ) == pcCU->getCbf( uiAbsPartIdx, compIdInner, uiCurrTrMode - 1 ) ); } } if (!bSubdiv) { m_pcEntropyCoder->encodeQtCbf( rTu, COMPONENT_Y, true ); } } if( !bSubdiv ) { if (compID != MAX_NUM_COMPONENT) // we have already coded the CBFs, so now we code coefficients { if (rTu.ProcessComponentSection(compID)) { if (isChroma(compID) && (pcCU->getCbf(uiAbsPartIdx, COMPONENT_Y, uiTrMode) != 0)) { m_pcEntropyCoder->encodeCrossComponentPrediction(rTu, compID); } if (pcCU->getCbf(uiAbsPartIdx, compID, uiTrMode) != 0) { const UInt uiQTTempAccessLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize; TCoeff *pcCoeffCurr = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID); m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeffCurr, compID ); } } } } else { if( compID==MAX_NUM_COMPONENT || pcCU->getCbf( uiAbsPartIdx, compID, uiCurrTrMode ) ) { TComTURecurse tuRecurseChild(rTu, false); do { xEncodeInterResidualQT( compID, tuRecurseChild ); } while (tuRecurseChild.nextSection(rTu)); } } } Void TEncSearch::xSetInterResidualQTData( TComYuv* pcResi, Bool bSpatial, TComTU &rTu ) // TODO: turn this into two functions for bSpatial=true and false. { TComDataCU* pcCU=rTu.getCU(); const UInt uiCurrTrMode=rTu.GetTransformDepthRel(); const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU(); assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) ); const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); const TComSPS *sps=pcCU->getSlice()->getSPS(); if( uiCurrTrMode == uiTrMode ) { const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize(); const UInt uiQTTempAccessLayer = sps->getQuadtreeTULog2MaxSize() - uiLog2TrSize; if( bSpatial ) { // Data to be copied is in the spatial domain, i.e., inverse-transformed. for(UInt i=0; igetNumberValidComponents(); i++) { const ComponentID compID=ComponentID(i); if (rTu.ProcessComponentSection(compID)) { const TComRectangle &rectCompTU(rTu.getRect(compID)); m_pcQTTempTComYuv[uiQTTempAccessLayer].copyPartToPartComponentMxN ( compID, pcResi, rectCompTU ); } } } else { for (UInt ch=0; ch < getNumberValidComponents(sps->getChromaFormatIdc()); ch++) { const ComponentID compID = ComponentID(ch); if (rTu.ProcessComponentSection(compID)) { const TComRectangle &rectCompTU(rTu.getRect(compID)); const UInt numCoeffInBlock = rectCompTU.width * rectCompTU.height; const UInt offset = rTu.getCoefficientOffset(compID); TCoeff* dest = pcCU->getCoeff(compID) + offset; const TCoeff* src = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + offset; ::memcpy( dest, src, sizeof(TCoeff)*numCoeffInBlock ); #if ADAPTIVE_QP_SELECTION TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[compID][uiQTTempAccessLayer] + offset; TCoeff* pcArlCoeffDst = pcCU->getArlCoeff(compID) + offset; ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * numCoeffInBlock ); #endif } } } } else { TComTURecurse tuRecurseChild(rTu, false); do { xSetInterResidualQTData( pcResi, bSpatial, tuRecurseChild ); } while (tuRecurseChild.nextSection(rTu)); } } UInt TEncSearch::xModeBitsIntra( TComDataCU* pcCU, UInt uiMode, UInt uiPartOffset, UInt uiDepth, const ChannelType chType ) { // Reload only contexts required for coding intra mode information m_pcRDGoOnSbacCoder->loadIntraDirMode( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST], chType ); #if NH_3D_DMM if( pcCU->getSlice()->getIsDepth() && isLuma(chType) ) { m_pcRDGoOnSbacCoder->loadIntraDepthDmm( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] ); } #endif // Temporarily set the intra dir being tested, and only // for absPartIdx, since encodeIntraDirModeLuma/Chroma only use // the entry at absPartIdx. UChar &rIntraDirVal=pcCU->getIntraDir( chType )[uiPartOffset]; UChar origVal=rIntraDirVal; rIntraDirVal = uiMode; //pcCU->setIntraDirSubParts ( chType, uiMode, uiPartOffset, uiDepth + uiInitTrDepth ); m_pcEntropyCoder->resetBits(); if (isLuma(chType)) { m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, uiPartOffset); } else { m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiPartOffset); } rIntraDirVal = origVal; // restore return m_pcEntropyCoder->getNumberOfWrittenBits(); } UInt TEncSearch::xUpdateCandList( UInt uiMode, Double uiCost, UInt uiFastCandNum, UInt * CandModeList, Double * CandCostList ) { UInt i; UInt shift=0; while ( shiftgetMergeFlag( 0 ) && pcCU->getPartitionSize( 0 ) == SIZE_2Nx2N && !pcCU->getQtRootCbf( 0 )) { pcCU->setSkipFlagSubParts( true, 0, pcCU->getDepth(0) ); m_pcEntropyCoder->resetBits(); if(pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()) { m_pcEntropyCoder->encodeCUTransquantBypassFlag(pcCU, 0, true); } m_pcEntropyCoder->encodeSkipFlag(pcCU, 0, true); m_pcEntropyCoder->encodeMergeIndex(pcCU, 0, true); #if NH_3D_ARP m_pcEntropyCoder->encodeARPW( pcCU, 0 ); #endif #if NH_3D_IC m_pcEntropyCoder->encodeICFlag( pcCU, 0, true ); #endif ruiBits += m_pcEntropyCoder->getNumberOfWrittenBits(); } else { m_pcEntropyCoder->resetBits(); if(pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()) { m_pcEntropyCoder->encodeCUTransquantBypassFlag(pcCU, 0, true); } m_pcEntropyCoder->encodeSkipFlag ( pcCU, 0, true ); #if NH_3D_DIS m_pcEntropyCoder->encodeDIS ( pcCU, 0, true ); #endif m_pcEntropyCoder->encodePredMode( pcCU, 0, true ); m_pcEntropyCoder->encodePartSize( pcCU, 0, pcCU->getDepth(0), true ); m_pcEntropyCoder->encodePredInfo( pcCU, 0 ); #if NH_3D_SDC_INTRA m_pcEntropyCoder->encodeSDCFlag( pcCU, 0, true ); #endif #if NH_3D_ARP m_pcEntropyCoder->encodeARPW( pcCU , 0 ); #endif #if NH_3D_IC m_pcEntropyCoder->encodeICFlag( pcCU, 0, true ); #endif #if NH_3D_DBBP m_pcEntropyCoder->encodeDBBPFlag( pcCU, 0, true ); #endif Bool codeDeltaQp = false; Bool codeChromaQpAdj = false; m_pcEntropyCoder->encodeCoeff ( pcCU, 0, pcCU->getDepth(0), codeDeltaQp, codeChromaQpAdj ); ruiBits += m_pcEntropyCoder->getNumberOfWrittenBits(); } } /** * \brief Generate half-sample interpolated block * * \param pattern Reference picture ROI * \param biPred Flag indicating whether block is for biprediction */ Void TEncSearch::xExtDIFUpSamplingH( TComPattern* pattern ) { Int width = pattern->getROIYWidth(); Int height = pattern->getROIYHeight(); Int srcStride = pattern->getPatternLStride(); Int intStride = m_filteredBlockTmp[0].getStride(COMPONENT_Y); Int dstStride = m_filteredBlock[0][0].getStride(COMPONENT_Y); Pel *intPtr; Pel *dstPtr; Int filterSize = NTAPS_LUMA; Int halfFilterSize = (filterSize>>1); Pel *srcPtr = pattern->getROIY() - halfFilterSize*srcStride - 1; const ChromaFormat chFmt = m_filteredBlock[0][0].getChromaFormat(); m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0].getAddr(COMPONENT_Y), intStride, width+1, height+filterSize, 0, false, chFmt, pattern->getBitDepthY()); m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2].getAddr(COMPONENT_Y), intStride, width+1, height+filterSize, 2, false, chFmt, pattern->getBitDepthY()); intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + halfFilterSize * intStride + 1; dstPtr = m_filteredBlock[0][0].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width+0, height+0, 0, false, true, chFmt, pattern->getBitDepthY()); intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride + 1; dstPtr = m_filteredBlock[2][0].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width+0, height+1, 2, false, true, chFmt, pattern->getBitDepthY()); intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][2].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width+1, height+0, 0, false, true, chFmt, pattern->getBitDepthY()); intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[2][2].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width+1, height+1, 2, false, true, chFmt, pattern->getBitDepthY()); } /** * \brief Generate quarter-sample interpolated blocks * * \param pattern Reference picture ROI * \param halfPelRef Half-pel mv * \param biPred Flag indicating whether block is for biprediction */ Void TEncSearch::xExtDIFUpSamplingQ( TComPattern* pattern, TComMv halfPelRef ) { Int width = pattern->getROIYWidth(); Int height = pattern->getROIYHeight(); Int srcStride = pattern->getPatternLStride(); Pel *srcPtr; Int intStride = m_filteredBlockTmp[0].getStride(COMPONENT_Y); Int dstStride = m_filteredBlock[0][0].getStride(COMPONENT_Y); Pel *intPtr; Pel *dstPtr; Int filterSize = NTAPS_LUMA; Int halfFilterSize = (filterSize>>1); Int extHeight = (halfPelRef.getVer() == 0) ? height + filterSize : height + filterSize-1; const ChromaFormat chFmt = m_filteredBlock[0][0].getChromaFormat(); // Horizontal filter 1/4 srcPtr = pattern->getROIY() - halfFilterSize * srcStride - 1; intPtr = m_filteredBlockTmp[1].getAddr(COMPONENT_Y); if (halfPelRef.getVer() > 0) { srcPtr += srcStride; } if (halfPelRef.getHor() >= 0) { srcPtr += 1; } m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1, false, chFmt, pattern->getBitDepthY()); // Horizontal filter 3/4 srcPtr = pattern->getROIY() - halfFilterSize*srcStride - 1; intPtr = m_filteredBlockTmp[3].getAddr(COMPONENT_Y); if (halfPelRef.getVer() > 0) { srcPtr += srcStride; } if (halfPelRef.getHor() > 0) { srcPtr += 1; } m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3, false, chFmt, pattern->getBitDepthY()); // Generate @ 1,1 intPtr = m_filteredBlockTmp[1].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[1][1].getAddr(COMPONENT_Y); if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1, false, true, chFmt, pattern->getBitDepthY()); // Generate @ 3,1 intPtr = m_filteredBlockTmp[1].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[3][1].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3, false, true, chFmt, pattern->getBitDepthY()); if (halfPelRef.getVer() != 0) { // Generate @ 2,1 intPtr = m_filteredBlockTmp[1].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[2][1].getAddr(COMPONENT_Y); if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2, false, true, chFmt, pattern->getBitDepthY()); // Generate @ 2,3 intPtr = m_filteredBlockTmp[3].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[2][3].getAddr(COMPONENT_Y); if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2, false, true, chFmt, pattern->getBitDepthY()); } else { // Generate @ 0,1 intPtr = m_filteredBlockTmp[1].getAddr(COMPONENT_Y) + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][1].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0, false, true, chFmt, pattern->getBitDepthY()); // Generate @ 0,3 intPtr = m_filteredBlockTmp[3].getAddr(COMPONENT_Y) + halfFilterSize * intStride; dstPtr = m_filteredBlock[0][3].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0, false, true, chFmt, pattern->getBitDepthY()); } if (halfPelRef.getHor() != 0) { // Generate @ 1,2 intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[1][2].getAddr(COMPONENT_Y); if (halfPelRef.getHor() > 0) { intPtr += 1; } if (halfPelRef.getVer() >= 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1, false, true, chFmt, pattern->getBitDepthY()); // Generate @ 3,2 intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[3][2].getAddr(COMPONENT_Y); if (halfPelRef.getHor() > 0) { intPtr += 1; } if (halfPelRef.getVer() > 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3, false, true, chFmt, pattern->getBitDepthY()); } else { // Generate @ 1,0 intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride + 1; dstPtr = m_filteredBlock[1][0].getAddr(COMPONENT_Y); if (halfPelRef.getVer() >= 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1, false, true, chFmt, pattern->getBitDepthY()); // Generate @ 3,0 intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride + 1; dstPtr = m_filteredBlock[3][0].getAddr(COMPONENT_Y); if (halfPelRef.getVer() > 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3, false, true, chFmt, pattern->getBitDepthY()); } // Generate @ 1,3 intPtr = m_filteredBlockTmp[3].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[1][3].getAddr(COMPONENT_Y); if (halfPelRef.getVer() == 0) { intPtr += intStride; } m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1, false, true, chFmt, pattern->getBitDepthY()); // Generate @ 3,3 intPtr = m_filteredBlockTmp[3].getAddr(COMPONENT_Y) + (halfFilterSize-1) * intStride; dstPtr = m_filteredBlock[3][3].getAddr(COMPONENT_Y); m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3, false, true, chFmt, pattern->getBitDepthY()); } //! set wp tables Void TEncSearch::setWpScalingDistParam( TComDataCU* pcCU, Int iRefIdx, RefPicList eRefPicListCur ) { if ( iRefIdx<0 ) { m_cDistParam.bApplyWeight = false; return; } TComSlice *pcSlice = pcCU->getSlice(); WPScalingParam *wp0 , *wp1; m_cDistParam.bApplyWeight = ( pcSlice->getSliceType()==P_SLICE && pcSlice->testWeightPred() ) || ( pcSlice->getSliceType()==B_SLICE && pcSlice->testWeightBiPred() ) ; if ( !m_cDistParam.bApplyWeight ) { return; } Int iRefIdx0 = ( eRefPicListCur == REF_PIC_LIST_0 ) ? iRefIdx : (-1); Int iRefIdx1 = ( eRefPicListCur == REF_PIC_LIST_1 ) ? iRefIdx : (-1); getWpScaling( pcCU, iRefIdx0, iRefIdx1, wp0 , wp1 ); if ( iRefIdx0 < 0 ) { wp0 = NULL; } if ( iRefIdx1 < 0 ) { wp1 = NULL; } m_cDistParam.wpCur = NULL; if ( eRefPicListCur == REF_PIC_LIST_0 ) { m_cDistParam.wpCur = wp0; } else { m_cDistParam.wpCur = wp1; } } #if NH_3D_DMM // ------------------------------------------------------------------------------------------------------------------- // Depth intra search // ------------------------------------------------------------------------------------------------------------------- Void TEncSearch::xCalcBiSegDCs( Pel* ptrSrc, UInt srcStride, Bool* biSegPattern, Int patternStride, Pel& valDC1, Pel& valDC2, Pel defaultVal, Bool subSamp ) { valDC1 = defaultVal; valDC2 = defaultVal; UInt uiDC1 = 0; UInt uiDC2 = 0; UInt uiNumPixDC1 = 0, uiNumPixDC2 = 0; Int subSamplePix = subSamp ? 2 : 1; Pel* piTemp = ptrSrc; for( UInt uiY = 0; uiY < patternStride; uiY += subSamplePix ) { for( UInt uiX = 0; uiX < patternStride; uiX += subSamplePix ) { if( true == biSegPattern[uiX] ) { uiDC2 += piTemp[uiX]; uiNumPixDC2++; } else { uiDC1 += piTemp[uiX]; uiNumPixDC1++; } } piTemp += subSamplePix*srcStride; biSegPattern += subSamplePix*patternStride; } if( uiNumPixDC1 > 0 ) { valDC1 = uiDC1 / uiNumPixDC1; } if( uiNumPixDC2 > 0 ) { valDC2 = uiDC2 / uiNumPixDC2; } } Void TEncSearch::xSearchDmmDeltaDCs( TComDataCU* pcCU, UInt uiAbsPtIdx, Pel* piOrig, Pel* piPredic, UInt uiStride, Bool* biSegPattern, Int patternStride, UInt uiWidth, UInt uiHeight, Pel& rDeltaDC1, Pel& rDeltaDC2 ) { assert( biSegPattern ); Int bitDepthY = pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); Pel origDC1 = 0; Pel origDC2 = 0; xCalcBiSegDCs ( piOrig, uiStride, biSegPattern, patternStride, origDC1, origDC2, (1<<(bitDepthY-1)) ); assignBiSegDCs( piPredic, uiStride, biSegPattern, patternStride, origDC1, origDC2 ); Pel predDC1 = 0; Pel predDC2 = 0; predBiSegDCs( pcCU, uiAbsPtIdx, uiWidth, uiHeight, biSegPattern, patternStride, predDC1, predDC2 ); rDeltaDC1 = origDC1 - predDC1; rDeltaDC2 = origDC2 - predDC2; #if NH_3D_VSO if( m_pcRdCost->getUseVSO() ) { Pel fullDeltaDC1 = rDeltaDC1; Pel fullDeltaDC2 = rDeltaDC2; Dist uiBestDist = RDO_DIST_MAX; UInt uiBestQStepDC1 = 0; UInt uiBestQStepDC2 = 0; UInt uiDeltaDC1Max = abs(fullDeltaDC1); UInt uiDeltaDC2Max = abs(fullDeltaDC2); //VSO Level delta DC check range extension uiDeltaDC1Max += (uiDeltaDC1Max>>1); uiDeltaDC2Max += (uiDeltaDC2Max>>1); // limit search range to [0, IBDI_MAX] if( fullDeltaDC1 < 0 && uiDeltaDC1Max > abs(predDC1) ) { uiDeltaDC1Max = abs(predDC1); } if( fullDeltaDC1 >= 0 && uiDeltaDC1Max > ((1< abs(predDC2) ) { uiDeltaDC2Max = abs(predDC2); } if( fullDeltaDC2 >= 0 && uiDeltaDC2Max > ((1<getUseEstimatedVSD() ) { uiOrgDist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPtIdx, bitDepthY, piPredic, uiStride, piOrig, uiStride, uiWidth, uiHeight, false ); } else { uiOrgDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPtIdx, bitDepthY, piPredic, uiStride, piOrig, uiStride, uiWidth, uiHeight, false ); } uiBestDist = uiOrgDist; uiBestQStepDC1 = abs(fullDeltaDC1); uiBestQStepDC2 = abs(fullDeltaDC2); // coarse search with step size 4 for( UInt uiQStepDC1 = 0; uiQStepDC1 < uiDeltaDC1Max; uiQStepDC1 += 4 ) { Pel testDC1 = ClipBD( predDC1 + ((Int)(uiQStepDC1) * (( fullDeltaDC1 < 0 ) ? -1 : 1)), bitDepthY ); for( UInt uiQStepDC2 = 0; uiQStepDC2 < uiDeltaDC2Max; uiQStepDC2 += 4 ) { Pel testDC2 = ClipBD( predDC2 + ((Int)(uiQStepDC2) * (( fullDeltaDC2 < 0 ) ? -1 : 1)), bitDepthY ); assignBiSegDCs( piPredic, uiStride, biSegPattern, patternStride, testDC1, testDC2 ); Dist uiAct4Dist = RDO_DIST_MAX; if( m_pcRdCost->getUseEstimatedVSD() ) { uiAct4Dist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPtIdx, bitDepthY, piPredic, uiStride, piOrig, uiStride, uiWidth, uiHeight, false ); } else { uiAct4Dist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPtIdx, bitDepthY, piPredic, uiStride, piOrig, uiStride, uiWidth, uiHeight, false ); } if( uiAct4Dist < uiBestDist || uiBestDist == RDO_DIST_MAX ) { uiBestDist = uiAct4Dist; uiBestQStepDC1 = uiQStepDC1; uiBestQStepDC2 = uiQStepDC2; } } } // refinement +-3 for( UInt uiQStepDC1 = (UInt)max(0, ((Int)uiBestQStepDC1-3)); uiQStepDC1 <= (uiBestQStepDC1+3); uiQStepDC1++ ) { Pel testDC1 = ClipBD( predDC1 + ((Int)(uiQStepDC1) * (( fullDeltaDC1 < 0 ) ? -1 : 1)), bitDepthY ); for( UInt uiQStepDC2 = (UInt)max(0, ((Int)uiBestQStepDC2-3)); uiQStepDC2 <= (uiBestQStepDC2+3); uiQStepDC2++ ) { Pel testDC2 = ClipBD( predDC2 + ((Int)(uiQStepDC2) * (( fullDeltaDC2 < 0 ) ? -1 : 1)), bitDepthY ); assignBiSegDCs( piPredic, uiStride, biSegPattern, patternStride, testDC1, testDC2 ); Dist uiActDist = RDO_DIST_MAX; if( m_pcRdCost->getUseEstimatedVSD() ) { uiActDist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPtIdx, bitDepthY, piPredic, uiStride, piOrig, uiStride, uiWidth, uiHeight, false ); } else { uiActDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPtIdx, bitDepthY, piPredic, uiStride, piOrig, uiStride, uiWidth, uiHeight, false ); } if( uiActDist < uiBestDist || uiBestDist == RDO_DIST_MAX ) { uiBestDist = uiActDist; uiBestQStepDC1 = uiQStepDC1; uiBestQStepDC2 = uiQStepDC2; } } } rDeltaDC1 = (Int)(uiBestQStepDC1) * (Int)(( fullDeltaDC1 < 0 ) ? -1 : 1); rDeltaDC2 = (Int)(uiBestQStepDC2) * (Int)(( fullDeltaDC2 < 0 ) ? -1 : 1); } #endif #if NH_3D_DLT rDeltaDC1 = pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), ClipBD(predDC1 + rDeltaDC1, bitDepthY )) - pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), predDC1 ); rDeltaDC2 = pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), ClipBD(predDC2 + rDeltaDC2, bitDepthY )) - pcCU->getSlice()->getPPS()->getDLT()->depthValue2idx( pcCU->getSlice()->getLayerIdInVps(), predDC2 ); #endif } Void TEncSearch::xSearchDmm1Wedge( TComDataCU* pcCU, UInt uiAbsPtIdx, Pel* piRef, UInt uiRefStride, UInt uiWidth, UInt uiHeight, UInt& ruiTabIdx ) { ruiTabIdx = 0; Int bitDepthY = pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); // local pred buffer TComYuv cPredYuv; cPredYuv.create( uiWidth, uiHeight, CHROMA_400 ); cPredYuv.clear(); UInt uiPredStride = cPredYuv.getStride( COMPONENT_Y ); Pel* piPred = cPredYuv.getAddr( COMPONENT_Y ); Pel refDC1 = 0; Pel refDC2 = 0; WedgeList* pacWedgeList = getWedgeListScaled ( uiWidth ); WedgeNodeList* pacWedgeNodeList = getWedgeNodeListScaled( uiWidth ); // coarse wedge search #if NH_3D_VSO Dist uiBestDist = RDO_DIST_MAX; #else Distortion uiBestDist = RDO_DIST_MAX; #endif UInt uiBestNodeId = 0; for( UInt uiNodeId = 0; uiNodeId < pacWedgeNodeList->size(); uiNodeId++ ) { TComWedgelet* pcWedgelet = &(pacWedgeList->at(pacWedgeNodeList->at(uiNodeId).getPatternIdx())); Bool *pbPattern = pcWedgelet->getPatternScaled(uiWidth); UInt uiStride = uiWidth; xCalcBiSegDCs ( piRef, uiRefStride, pbPattern, uiStride, refDC1, refDC2, (1<<(bitDepthY-1)) ); assignBiSegDCs( piPred, uiPredStride, pbPattern, uiStride, refDC1, refDC2 ); #if !NH_3D_VSO Distortion uiActDist = RDO_DIST_MAX; #else Dist uiActDist = RDO_DIST_MAX; if( m_pcRdCost->getUseVSO() ) { if( m_pcRdCost->getUseEstimatedVSD() ) { uiActDist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPtIdx, bitDepthY, piPred, uiPredStride, piRef, uiRefStride, uiWidth, uiHeight, false ); } else { uiActDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPtIdx, bitDepthY, piPred, uiPredStride, piRef, uiRefStride, uiWidth, uiHeight, false ); } } else #endif { uiActDist = m_pcRdCost->getDistPart( bitDepthY, piPred, uiPredStride, piRef, uiRefStride, uiWidth, uiHeight, COMPONENT_Y, DF_SAD ); } if( uiActDist < uiBestDist || uiBestDist == RDO_DIST_MAX ) { uiBestDist = uiActDist; uiBestNodeId = uiNodeId; } } // refinement #if NH_3D_VSO Dist uiBestDistRef = uiBestDist; #else Distortion uiBestDistRef = uiBestDist; #endif UInt uiBestTabIdxRef = pacWedgeNodeList->at(uiBestNodeId).getPatternIdx(); for( UInt uiRefId = 0; uiRefId < DMM_NUM_WEDGE_REFINES; uiRefId++ ) { if( pacWedgeNodeList->at(uiBestNodeId).getRefineIdx( uiRefId ) != DMM_NO_WEDGE_IDX ) { TComWedgelet* pcWedgelet = &(pacWedgeList->at(pacWedgeNodeList->at(uiBestNodeId).getRefineIdx( uiRefId ))); Bool *pbPattern = pcWedgelet->getPatternScaled(uiWidth); UInt uiStride = uiWidth; xCalcBiSegDCs ( piRef, uiRefStride, pbPattern, uiStride, refDC1, refDC2, (1<<(bitDepthY-1)) ); assignBiSegDCs( piPred, uiPredStride, pbPattern, uiStride, refDC1, refDC2 ); #if !NH_3D_VSO Distortion uiActDist = RDO_DIST_MAX; #else Dist uiActDist = RDO_DIST_MAX; if( m_pcRdCost->getUseVSO() ) { if( m_pcRdCost->getUseEstimatedVSD() ) { uiActDist = m_pcRdCost->getDistPartVSD( pcCU, uiAbsPtIdx, bitDepthY, piPred, uiPredStride, piRef, uiRefStride, uiWidth, uiHeight, false ); } else { uiActDist = m_pcRdCost->getDistPartVSO( pcCU, uiAbsPtIdx, bitDepthY, piPred, uiPredStride, piRef, uiRefStride, uiWidth, uiHeight, false ); } } else #endif { uiActDist = m_pcRdCost->getDistPart( bitDepthY, piPred, uiPredStride, piRef, uiRefStride, uiWidth, uiHeight, COMPONENT_Y, DF_SAD ); } if( uiActDist < uiBestDistRef || uiBestDistRef == RDO_DIST_MAX ) { uiBestDistRef = uiActDist; uiBestTabIdxRef = pacWedgeNodeList->at(uiBestNodeId).getRefineIdx( uiRefId ); } } } ruiTabIdx = uiBestTabIdxRef; cPredYuv.destroy(); return; } #endif #if NH_3D_SDC_INTRA Void TEncSearch::xCalcConstantSDC( Pel* ptrSrc, UInt srcStride, UInt uiSize, Pel& valDC ) { valDC = 0; UInt uiDC = 0; UInt uiNumPixDC = 0; Int subSamplePix = ( uiSize > 16 ) ? 2 : 1; Pel* piTemp = ptrSrc; for( UInt uiY = 0; uiY < uiSize; uiY += subSamplePix ) { for( UInt uiX = 0; uiX < uiSize; uiX += subSamplePix ) { uiDC += piTemp[uiX]; uiNumPixDC++; } piTemp += subSamplePix*srcStride; } if( uiNumPixDC > 0 ) { valDC = uiDC / uiNumPixDC; } } #endif //! \}