Changeset 56 in 3DVCSoftware for trunk/source/Lib/TLibCommon/TComRdCost.cpp
- Timestamp:
- 11 May 2012, 21:20:17 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/source/Lib/TLibCommon/TComRdCost.cpp
r5 r56 2 2 * License, included below. This software may be subject to other third party 3 3 * and contributor rights, including patent rights, and no such rights are 4 * granted under this license. 4 * granted under this license. 5 5 * 6 * Copyright (c) 2010-201 1,ISO/IEC6 * Copyright (c) 2010-2012, ITU/ISO/IEC 7 7 * All rights reserved. 8 8 * … … 15 15 * this list of conditions and the following disclaimer in the documentation 16 16 * and/or other materials provided with the distribution. 17 * * Neither the name of the I SO/IEC nor the names of its contributors may17 * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may 18 18 * be used to endorse or promote products derived from this software without 19 19 * specific prior written permission. … … 32 32 */ 33 33 34 35 36 34 /** \file TComRdCost.cpp 37 35 \brief RD cost computation class … … 40 38 #include <math.h> 41 39 #include <assert.h> 40 #include "TComRom.h" 42 41 #include "TComRdCost.h" 43 42 #include "TComDataCU.h" 44 43 44 //! \ingroup TLibCommon 45 //! \{ 45 46 46 47 TComRdCost::TComRdCost() … … 51 52 TComRdCost::~TComRdCost() 52 53 { 54 #if !FIX203 53 55 xUninit(); 56 #endif 54 57 } 55 58 56 59 // Calculate RD functions 57 Double TComRdCost::calcRdCost( UInt uiBits, Dist uiDistortion, Bool bFlag, DFunc eDFunc )60 Double TComRdCost::calcRdCost( UInt uiBits, UInt uiDistortion, Bool bFlag, DFunc eDFunc ) 58 61 { 59 62 Double dRdCost = 0.0; … … 85 88 { 86 89 // Intra8x8, Intra4x4 Block only... 90 #if LOSSLESS_CODING && SEQUENCE_LEVEL_LOSSLESS 91 dRdCost = (Double)(uiBits); 92 #else 87 93 dRdCost = (((Double)uiDistortion) + ((Double)uiBits * dLambda)); 94 #endif 88 95 } 89 96 else … … 92 99 { 93 100 dRdCost = ((Double)uiDistortion + (Double)((Int)(uiBits * dLambda+.5)>>16)); 94 dRdCost = (Double)( Dist)floor(dRdCost);101 dRdCost = (Double)(UInt)floor(dRdCost); 95 102 } 96 103 else 97 104 { 105 #if LOSSLESS_CODING && SEQUENCE_LEVEL_LOSSLESS 106 dRdCost = (Double)(uiBits); 107 #else 98 108 dRdCost = ((Double)uiDistortion + (Double)((Int)(uiBits * dLambda+.5))); 99 dRdCost = (Double)(Dist)floor(dRdCost); 109 dRdCost = (Double)(UInt)floor(dRdCost); 110 #endif 100 111 } 101 112 } … … 131 142 { 132 143 // Intra8x8, Intra4x4 Block only... 144 #if LOSSLESS_CODING && SEQUENCE_LEVEL_LOSSLESS 145 dRdCost = (Double)(uiBits); 146 #else 133 147 dRdCost = (((Double)(Int64)uiDistortion) + ((Double)(Int64)uiBits * dLambda)); 148 #endif 134 149 } 135 150 else … … 142 157 else 143 158 { 159 #if LOSSLESS_CODING && SEQUENCE_LEVEL_LOSSLESS 160 dRdCost = (Double)(uiBits); 161 #else 144 162 dRdCost = ((Double)(Int64)uiDistortion + (Double)((Int)((Int64)uiBits * dLambda+.5))); 145 163 dRdCost = (Double)(UInt)floor(dRdCost); 164 #endif 146 165 } 147 166 } … … 166 185 } 167 186 #endif 168 169 187 170 188 // Initalize Function Pointer by [eDFunc] … … 189 207 m_afpDistortFunc[14] = TComRdCost::xGetSAD16N; 190 208 191 m_afpDistortFunc[15] = TComRdCost::xGetSADs; 192 m_afpDistortFunc[16] = TComRdCost::xGetSADs4; 193 m_afpDistortFunc[17] = TComRdCost::xGetSADs8; 194 m_afpDistortFunc[18] = TComRdCost::xGetSADs16; 195 m_afpDistortFunc[19] = TComRdCost::xGetSADs32; 196 m_afpDistortFunc[20] = TComRdCost::xGetSADs64; 197 m_afpDistortFunc[21] = TComRdCost::xGetSADs16N; 198 209 m_afpDistortFunc[15] = TComRdCost::xGetSAD; 210 m_afpDistortFunc[16] = TComRdCost::xGetSAD4; 211 m_afpDistortFunc[17] = TComRdCost::xGetSAD8; 212 m_afpDistortFunc[18] = TComRdCost::xGetSAD16; 213 m_afpDistortFunc[19] = TComRdCost::xGetSAD32; 214 m_afpDistortFunc[20] = TComRdCost::xGetSAD64; 215 m_afpDistortFunc[21] = TComRdCost::xGetSAD16N; 216 217 #if AMP_SAD 218 m_afpDistortFunc[43] = TComRdCost::xGetSAD12; 219 m_afpDistortFunc[44] = TComRdCost::xGetSAD24; 220 m_afpDistortFunc[45] = TComRdCost::xGetSAD48; 221 222 m_afpDistortFunc[46] = TComRdCost::xGetSAD12; 223 m_afpDistortFunc[47] = TComRdCost::xGetSAD24; 224 m_afpDistortFunc[48] = TComRdCost::xGetSAD48; 225 #endif 199 226 m_afpDistortFunc[22] = TComRdCost::xGetHADs; 200 #ifdef DCM_RDCOST_TEMP_FIX //Temporary fix since xGetHADs4 and xGetHADs8 assume that the row size cannot be 1, 2, 3 or 6 when the column size is 4 or 8.201 227 m_afpDistortFunc[23] = TComRdCost::xGetHADs; 202 228 m_afpDistortFunc[24] = TComRdCost::xGetHADs; 203 #else204 m_afpDistortFunc[23] = TComRdCost::xGetHADs4;205 m_afpDistortFunc[24] = TComRdCost::xGetHADs8;206 #endif207 229 m_afpDistortFunc[25] = TComRdCost::xGetHADs; 208 230 m_afpDistortFunc[26] = TComRdCost::xGetHADs; … … 210 232 m_afpDistortFunc[28] = TComRdCost::xGetHADs; 211 233 212 #ifdef ROUNDING_CONTROL_BIPRED 213 m_afpDistortFuncRnd[0] = NULL; 214 m_afpDistortFuncRnd[1] = TComRdCost::xGetSSE; 215 m_afpDistortFuncRnd[2] = TComRdCost::xGetSSE4; 216 m_afpDistortFuncRnd[3] = TComRdCost::xGetSSE8; 217 m_afpDistortFuncRnd[4] = TComRdCost::xGetSSE16; 218 m_afpDistortFuncRnd[5] = TComRdCost::xGetSSE32; 219 m_afpDistortFuncRnd[6] = TComRdCost::xGetSSE64; 220 m_afpDistortFuncRnd[7] = TComRdCost::xGetSSE16N; 221 222 m_afpDistortFuncRnd[8] = TComRdCost::xGetSAD; 223 m_afpDistortFuncRnd[9] = TComRdCost::xGetSAD4; 224 m_afpDistortFuncRnd[10] = TComRdCost::xGetSAD8; 225 m_afpDistortFuncRnd[11] = TComRdCost::xGetSAD16; 226 m_afpDistortFuncRnd[12] = TComRdCost::xGetSAD32; 227 m_afpDistortFuncRnd[13] = TComRdCost::xGetSAD64; 228 m_afpDistortFuncRnd[14] = TComRdCost::xGetSAD16N; 229 230 m_afpDistortFuncRnd[15] = TComRdCost::xGetSADs; 231 m_afpDistortFuncRnd[16] = TComRdCost::xGetSADs4; 232 m_afpDistortFuncRnd[17] = TComRdCost::xGetSADs8; 233 m_afpDistortFuncRnd[18] = TComRdCost::xGetSADs16; 234 m_afpDistortFuncRnd[19] = TComRdCost::xGetSADs32; 235 m_afpDistortFuncRnd[20] = TComRdCost::xGetSADs64; 236 m_afpDistortFuncRnd[21] = TComRdCost::xGetSADs16N; 237 238 m_afpDistortFuncRnd[22] = TComRdCost::xGetHADs; 239 m_afpDistortFuncRnd[23] = TComRdCost::xGetHADs4; 240 m_afpDistortFuncRnd[24] = TComRdCost::xGetHADs8; 241 m_afpDistortFuncRnd[25] = TComRdCost::xGetHADs; 242 m_afpDistortFuncRnd[26] = TComRdCost::xGetHADs; 243 m_afpDistortFuncRnd[27] = TComRdCost::xGetHADs; 244 m_afpDistortFuncRnd[28] = TComRdCost::xGetHADs; 245 #endif 246 234 #if !FIX203 247 235 m_puiComponentCostOriginP = NULL; 248 236 m_puiComponentCost = NULL; 249 237 m_puiVerCost = NULL; 250 238 m_puiHorCost = NULL; 239 #endif 251 240 m_uiCost = 0; 252 241 m_iCostScale = 0; 242 #if !FIX203 253 243 m_iSearchLimit = 0xdeaddead; 254 255 m_puiMultiviewRegCostHorOrgP = 0;256 m_puiMultiviewRegCostVerOrgP = 0;257 m_puiMultiviewRegCostHor = 0;258 m_puiMultiviewRegCostVer = 0;259 244 260 245 #if HHI_VSO … … 271 256 m_dLambdaScale = 1; 272 257 #endif 273 } 274 258 #endif 259 } 260 261 #if !FIX203 275 262 Void TComRdCost::initRateDistortionModel( Int iSubPelSearchLimit ) 276 263 { … … 285 272 m_iSearchLimit = iSubPelSearchLimit; 286 273 287 m_puiComponentCostOriginP = new UInt[ 4 * iSubPelSearchLimit ]; 288 m_puiMultiviewRegCostHorOrgP = new UInt[ 4 * iSubPelSearchLimit ]; 289 m_puiMultiviewRegCostVerOrgP = new UInt[ 4 * iSubPelSearchLimit ]; 274 m_puiComponentCostOriginP = new UInt[ 4 * iSubPelSearchLimit ]; 290 275 iSubPelSearchLimit *= 2; 291 276 292 m_puiComponentCost = m_puiComponentCostOriginP + iSubPelSearchLimit; 293 m_puiMultiviewRegCostHor = m_puiMultiviewRegCostHorOrgP + iSubPelSearchLimit; 294 m_puiMultiviewRegCostVer = m_puiMultiviewRegCostVerOrgP + iSubPelSearchLimit; 277 m_puiComponentCost = m_puiComponentCostOriginP + iSubPelSearchLimit; 295 278 296 279 for( Int n = -iSubPelSearchLimit; n < iSubPelSearchLimit; n++) 297 280 { 298 m_puiComponentCost [n] = xGetComponentBits( n ); 299 m_puiMultiviewRegCostHor[n] = xGetComponentBits( n ); // first version 300 m_puiMultiviewRegCostVer[n] = xGetComponentBits( n ); // first version 281 m_puiComponentCost[n] = xGetComponentBits( n ); 301 282 } 302 283 } … … 310 291 m_puiComponentCostOriginP = NULL; 311 292 } 312 313 if( m_puiMultiviewRegCostHorOrgP ) 314 { 315 delete [] m_puiMultiviewRegCostHorOrgP; 316 m_puiMultiviewRegCostHorOrgP = NULL; 317 } 318 if( m_puiMultiviewRegCostVerOrgP ) 319 { 320 delete [] m_puiMultiviewRegCostVerOrgP; 321 m_puiMultiviewRegCostVerOrgP = NULL; 322 } 323 324 #if HHI_VSO 325 if ( m_apRefPics != NULL ) 326 { 327 delete[] m_apRefPics; 328 m_apRefPics = NULL; 329 } 330 331 if ( m_paaiShiftLUTs != NULL ) { // Delete only first dimension, other dimension are not create in this class 332 delete[] m_paaiShiftLUTs; 333 m_paaiShiftLUTs = NULL; 334 }; 335 #endif 336 } 293 } 294 #endif 337 295 338 296 UInt TComRdCost::xGetComponentBits( Int iVal ) … … 351 309 return uiLength; 352 310 } 353 354 #ifdef ROUNDING_CONTROL_BIPRED355 // Setting the Distortion Parameter for Inter (ME)356 Void TComRdCost::setDistParam_Bi( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, DistParam& rcDistParam )357 {358 // set Original & Curr Pointer / Stride359 rcDistParam.pOrg = pcPatternKey->getROIY();360 rcDistParam.pCur = piRefY;361 362 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();363 rcDistParam.iStrideCur = iRefStride;364 365 // set Block Width / Height366 rcDistParam.iCols = pcPatternKey->getROIYWidth();367 rcDistParam.iRows = pcPatternKey->getROIYHeight();368 rcDistParam.DistFuncRnd = m_afpDistortFuncRnd[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];369 370 // initialize371 rcDistParam.iSubShift = 0;372 }373 374 // Setting the Distortion Parameter for Inter (subpel ME with step)375 Void TComRdCost::setDistParam_Bi( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME )376 {377 // set Original & Curr Pointer / Stride378 rcDistParam.pOrg = pcPatternKey->getROIY();379 rcDistParam.pCur = piRefY;380 381 rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();382 rcDistParam.iStrideCur = iRefStride * iStep;383 384 // set Step for interpolated buffer385 rcDistParam.iStep = iStep;386 387 // set Block Width / Height388 rcDistParam.iCols = pcPatternKey->getROIYWidth();389 rcDistParam.iRows = pcPatternKey->getROIYHeight();390 391 // set distortion function392 if ( !bHADME )393 {394 rcDistParam.DistFuncRnd = m_afpDistortFuncRnd[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];395 }396 else397 {398 rcDistParam.DistFuncRnd = m_afpDistortFuncRnd[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];399 }400 401 // initialize402 rcDistParam.iSubShift = 0;403 }404 #endif405 311 406 312 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam ) … … 434 340 rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; 435 341 342 #if AMP_SAD 343 if (rcDistParam.iCols == 12) 344 { 345 rcDistParam.DistFunc = m_afpDistortFunc[43 ]; 346 } 347 else if (rcDistParam.iCols == 24) 348 { 349 rcDistParam.DistFunc = m_afpDistortFunc[44 ]; 350 } 351 else if (rcDistParam.iCols == 48) 352 { 353 rcDistParam.DistFunc = m_afpDistortFunc[45 ]; 354 } 355 #endif 356 436 357 // initialize 437 358 rcDistParam.iSubShift = 0; … … 443 364 444 365 // Setting the Distortion Parameter for Inter (subpel ME with step) 366 #if NS_HAD 367 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME, Bool bUseNSHAD ) 368 #else 445 369 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME ) 370 #endif 446 371 { 447 372 // set Original & Curr Pointer / Stride … … 458 383 rcDistParam.iCols = pcPatternKey->getROIYWidth(); 459 384 rcDistParam.iRows = pcPatternKey->getROIYHeight(); 385 #if NS_HAD 386 rcDistParam.bUseNSHAD = bUseNSHAD; 387 #endif 460 388 461 389 // set distortion function … … 463 391 { 464 392 rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; 393 #if AMP_SAD 394 if (rcDistParam.iCols == 12) 395 { 396 rcDistParam.DistFunc = m_afpDistortFunc[46 ]; 397 } 398 else if (rcDistParam.iCols == 24) 399 { 400 rcDistParam.DistFunc = m_afpDistortFunc[47 ]; 401 } 402 else if (rcDistParam.iCols == 48) 403 { 404 rcDistParam.DistFunc = m_afpDistortFunc[48 ]; 405 } 406 #endif 465 407 } 466 408 else … … 478 420 479 421 Void 422 #if NS_HAD 423 TComRdCost::setDistParam( DistParam& rcDP, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard, Bool bUseNSHAD ) 424 #else 480 425 TComRdCost::setDistParam( DistParam& rcDP, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard ) 426 #endif 481 427 { 482 428 rcDP.pOrg = p1; … … 493 439 rcDP.iStrideUsed = 0; 494 440 #endif 441 #if NS_HAD 442 rcDP.bUseNSHAD = bUseNSHAD; 443 #endif 495 444 } 496 445 … … 561 510 #endif 562 511 512 #if WEIGHTED_CHROMA_DISTORTION 513 UInt TComRdCost::getDistPart( Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, Bool bWeighted, DFunc eDFunc ) 514 #else 563 515 UInt TComRdCost::getDistPart( Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc ) 516 #endif 564 517 { 565 518 DistParam cDtParam; … … 569 522 cDtParam.iStrideOrg = iOrgStride; 570 523 cDtParam.iStrideCur = iCurStride; 571 #ifdef DCM_RDCOST_TEMP_FIX //Temporary fix since DistParam is lacking a constructor and the variable iStep is not initialized572 524 cDtParam.iStep = 1; 573 #endif 574 #ifdef WEIGHT_PRED 575 cDtParam.applyWeight = false; 525 526 cDtParam.bApplyWeight = false; 576 527 cDtParam.uiComp = 255; // just for assert: to be sure it was set before use, since only values 0,1 or 2 are allowed. 577 #endif 528 529 #if WEIGHTED_CHROMA_DISTORTION 530 if (bWeighted) 531 { 532 return ((int) (m_chromaDistortionWeight * cDtParam.DistFunc( &cDtParam ))); 533 } 534 else 535 { 536 return cDtParam.DistFunc( &cDtParam ); 537 } 538 #else 578 539 return cDtParam.DistFunc( &cDtParam ); 579 } 540 #endif 541 } 542 543 580 544 581 545 // ==================================================================================================================== … … 587 551 // -------------------------------------------------------------------------------------------------------------------- 588 552 589 #ifdef ROUNDING_CONTROL_BIPRED590 UInt TComRdCost::xGetSAD( DistParam* pcDtParam, Pel* pRefY, Bool bRound )591 {592 Pel* piOrg = pcDtParam->pOrg;593 Pel* piCur = pcDtParam->pCur;594 Pel* piRef = pRefY;595 Int iRows = pcDtParam->iRows;596 Int iCols = pcDtParam->iCols;597 Int iStrideCur = pcDtParam->iStrideCur;598 Int iStrideOrg = pcDtParam->iStrideOrg;599 Pel pred;600 601 UInt uiSum = 0;602 603 for( ; iRows != 0; iRows-- )604 {605 for (Int n = 0; n < iCols; n++ )606 {607 pred = (piCur[n] + piRef[n] + bRound) >> 1 ;608 uiSum += abs( piOrg[n] - pred );609 }610 piOrg += iStrideOrg;611 piCur += iStrideCur;612 piRef += iCols;613 }614 615 return ( uiSum >> g_uiBitIncrement );616 }617 618 UInt TComRdCost::xGetSAD4( DistParam* pcDtParam, Pel* pRefY, Bool bRound )619 {620 Pel* piOrg = pcDtParam->pOrg;621 Pel* piCur = pcDtParam->pCur;622 Pel* piRef = pRefY;623 Int iRows = pcDtParam->iRows;624 Int iSubShift = pcDtParam->iSubShift;625 Int iSubStep = ( 1 << iSubShift );626 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;627 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;628 Int iStrideRef = pcDtParam->iCols*iSubStep;629 Pel pred;630 631 UInt uiSum = 0;632 633 for( ; iRows != 0; iRows-=iSubStep )634 {635 pred = (piCur[0] + piRef[0] + bRound) >> 1 ;636 uiSum += abs( piOrg[0] - pred );637 pred = (piCur[1] + piRef[1] + bRound) >> 1 ;638 uiSum += abs( piOrg[1] - pred );639 pred = (piCur[2] + piRef[2] + bRound) >> 1 ;640 uiSum += abs( piOrg[2] - pred );641 pred = (piCur[3] + piRef[3] + bRound) >> 1 ;642 uiSum += abs( piOrg[3] - pred );643 644 piOrg += iStrideOrg;645 piCur += iStrideCur;646 piRef += iStrideRef;647 }648 649 uiSum <<= iSubShift;650 return ( uiSum >> g_uiBitIncrement );651 }652 653 UInt TComRdCost::xGetSAD8( DistParam* pcDtParam, Pel* pRefY, Bool bRound )654 {655 Pel* piOrg = pcDtParam->pOrg;656 Pel* piCur = pcDtParam->pCur;657 Pel* piRef = pRefY;658 Int iRows = pcDtParam->iRows;659 Int iSubShift = pcDtParam->iSubShift;660 Int iSubStep = ( 1 << iSubShift );661 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;662 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;663 Int iStrideRef = pcDtParam->iCols*iSubStep;664 Pel pred;665 666 UInt uiSum = 0;667 668 for( ; iRows != 0; iRows-=iSubStep )669 {670 pred = (piCur[0] + piRef[0] + bRound) >> 1 ;671 uiSum += abs( piOrg[0] - pred );672 pred = (piCur[1] + piRef[1] + bRound) >> 1 ;673 uiSum += abs( piOrg[1] - pred );674 pred = (piCur[2] + piRef[2] + bRound) >> 1 ;675 uiSum += abs( piOrg[2] - pred );676 pred = (piCur[3] + piRef[3] + bRound) >> 1 ;677 uiSum += abs( piOrg[3] - pred );678 pred = (piCur[4] + piRef[4] + bRound) >> 1 ;679 uiSum += abs( piOrg[4] - pred );680 pred = (piCur[5] + piRef[5] + bRound) >> 1 ;681 uiSum += abs( piOrg[5] - pred );682 pred = (piCur[6] + piRef[6] + bRound) >> 1 ;683 uiSum += abs( piOrg[6] - pred );684 pred = (piCur[7] + piRef[7] + bRound) >> 1 ;685 uiSum += abs( piOrg[7] - pred );686 687 piOrg += iStrideOrg;688 piCur += iStrideCur;689 piRef += iStrideRef;690 }691 692 uiSum <<= iSubShift;693 return ( uiSum >> g_uiBitIncrement );694 }695 696 UInt TComRdCost::xGetSAD16( DistParam* pcDtParam, Pel* pRefY, Bool bRound )697 {698 Pel* piOrg = pcDtParam->pOrg;699 Pel* piCur = pcDtParam->pCur;700 Pel* piRef = pRefY;701 Int iRows = pcDtParam->iRows;702 Int iSubShift = pcDtParam->iSubShift;703 Int iSubStep = ( 1 << iSubShift );704 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;705 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;706 Int iStrideRef = pcDtParam->iCols*iSubStep;707 Pel pred;708 709 UInt uiSum = 0;710 711 for( ; iRows != 0; iRows-=iSubStep )712 {713 pred = (piCur[0] + piRef[0] + bRound) >> 1 ;714 uiSum += abs( piOrg[0] - pred );715 pred = (piCur[1] + piRef[1] + bRound) >> 1 ;716 uiSum += abs( piOrg[1] - pred );717 pred = (piCur[2] + piRef[2] + bRound) >> 1 ;718 uiSum += abs( piOrg[2] - pred );719 pred = (piCur[3] + piRef[3] + bRound) >> 1 ;720 uiSum += abs( piOrg[3] - pred );721 pred = (piCur[4] + piRef[4] + bRound) >> 1 ;722 uiSum += abs( piOrg[4] - pred );723 pred = (piCur[5] + piRef[5] + bRound) >> 1 ;724 uiSum += abs( piOrg[5] - pred );725 pred = (piCur[6] + piRef[6] + bRound) >> 1 ;726 uiSum += abs( piOrg[6] - pred );727 pred = (piCur[7] + piRef[7] + bRound) >> 1 ;728 uiSum += abs( piOrg[7] - pred );729 pred = (piCur[8] + piRef[8] + bRound) >> 1 ;730 uiSum += abs( piOrg[8] - pred );731 pred = (piCur[9] + piRef[9] + bRound) >> 1 ;732 uiSum += abs( piOrg[9] - pred );733 pred = (piCur[10] + piRef[10] + bRound) >> 1 ;734 uiSum += abs( piOrg[10] - pred );735 pred = (piCur[11] + piRef[11] + bRound) >> 1 ;736 uiSum += abs( piOrg[11] - pred );737 pred = (piCur[12] + piRef[12] + bRound) >> 1 ;738 uiSum += abs( piOrg[12] - pred );739 pred = (piCur[13] + piRef[13] + bRound) >> 1 ;740 uiSum += abs( piOrg[13] - pred );741 pred = (piCur[14] + piRef[14] + bRound) >> 1 ;742 uiSum += abs( piOrg[14] - pred );743 pred = (piCur[15] + piRef[15] + bRound) >> 1 ;744 uiSum += abs( piOrg[15] - pred );745 746 piOrg += iStrideOrg;747 piCur += iStrideCur;748 piRef += iStrideRef;749 }750 751 uiSum <<= iSubShift;752 return ( uiSum >> g_uiBitIncrement );753 }754 755 UInt TComRdCost::xGetSAD16N( DistParam* pcDtParam, Pel* pRefY, Bool bRound )756 {757 Pel* piOrg = pcDtParam->pOrg;758 Pel* piCur = pcDtParam->pCur;759 Pel* piRef = pRefY;760 Int iRows = pcDtParam->iRows;761 Int iCols = pcDtParam->iCols;762 Int iSubShift = pcDtParam->iSubShift;763 Int iSubStep = ( 1 << iSubShift );764 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;765 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;766 Int iStrideRef = iCols*iSubStep;767 Pel pred;768 769 UInt uiSum = 0;770 771 for( ; iRows != 0; iRows-=iSubStep )772 {773 for (Int n = 0; n < iCols; n+=16 )774 {775 776 pred = (piCur[n+ 0] + piRef[n+ 0] + bRound) >> 1 ;777 uiSum += abs( piOrg[n+ 0] - pred );778 pred = (piCur[n+ 1] + piRef[n+ 1] + bRound) >> 1 ;779 uiSum += abs( piOrg[n+ 1] - pred );780 pred = (piCur[n+ 2] + piRef[n+ 2] + bRound) >> 1 ;781 uiSum += abs( piOrg[n+ 2] - pred );782 pred = (piCur[n+ 3] + piRef[n+ 3] + bRound) >> 1 ;783 uiSum += abs( piOrg[n+ 3] - pred );784 pred = (piCur[n+ 4] + piRef[n+ 4] + bRound) >> 1 ;785 uiSum += abs( piOrg[n+ 4] - pred );786 pred = (piCur[n+ 5] + piRef[n+ 5] + bRound) >> 1 ;787 uiSum += abs( piOrg[n+ 5] - pred );788 pred = (piCur[n+ 6] + piRef[n+ 6] + bRound) >> 1 ;789 uiSum += abs( piOrg[n+ 6] - pred );790 pred = (piCur[n+ 7] + piRef[n+ 7] + bRound) >> 1 ;791 uiSum += abs( piOrg[n+ 7] - pred );792 pred = (piCur[n+ 8] + piRef[n+ 8] + bRound) >> 1 ;793 uiSum += abs( piOrg[n+ 8] - pred );794 pred = (piCur[n+ 9] + piRef[n+ 9] + bRound) >> 1 ;795 uiSum += abs( piOrg[n+ 9] - pred );796 pred = (piCur[n+ 10] + piRef[n+ 10] + bRound) >> 1 ;797 uiSum += abs( piOrg[n+ 10] - pred );798 pred = (piCur[n+ 11] + piRef[n+ 11] + bRound) >> 1 ;799 uiSum += abs( piOrg[n+ 11] - pred );800 pred = (piCur[n+ 12] + piRef[n+ 12] + bRound) >> 1 ;801 uiSum += abs( piOrg[n+ 12] - pred );802 pred = (piCur[n+ 13] + piRef[n+ 13] + bRound) >> 1 ;803 uiSum += abs( piOrg[n+ 13] - pred );804 pred = (piCur[n+ 14] + piRef[n+ 14] + bRound) >> 1 ;805 uiSum += abs( piOrg[n+ 14] - pred );806 pred = (piCur[n+ 15] + piRef[n+ 15] + bRound) >> 1 ;807 uiSum += abs( piOrg[n+ 15] - pred );808 809 }810 piOrg += iStrideOrg;811 piCur += iStrideCur;812 piRef += iStrideRef;813 }814 815 uiSum <<= iSubShift;816 return ( uiSum >> g_uiBitIncrement );817 }818 819 UInt TComRdCost::xGetSAD32( DistParam* pcDtParam, Pel* pRefY, Bool bRound )820 {821 Pel* piOrg = pcDtParam->pOrg;822 Pel* piCur = pcDtParam->pCur;823 Pel* piRef = pRefY;824 Int iRows = pcDtParam->iRows;825 Int iSubShift = pcDtParam->iSubShift;826 Int iSubStep = ( 1 << iSubShift );827 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;828 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;829 Int iStrideRef = pcDtParam->iCols*iSubStep;830 Pel pred;831 832 UInt uiSum = 0;833 834 for( ; iRows != 0; iRows-=iSubStep )835 {836 pred = (piCur[0] + piRef[0] + bRound) >> 1 ;837 uiSum += abs( piOrg[0] - pred );838 pred = (piCur[1] + piRef[1] + bRound) >> 1 ;839 uiSum += abs( piOrg[1] - pred );840 pred = (piCur[2] + piRef[2] + bRound) >> 1 ;841 uiSum += abs( piOrg[2] - pred );842 pred = (piCur[3] + piRef[3] + bRound) >> 1 ;843 uiSum += abs( piOrg[3] - pred );844 pred = (piCur[4] + piRef[4] + bRound) >> 1 ;845 uiSum += abs( piOrg[4] - pred );846 pred = (piCur[5] + piRef[5] + bRound) >> 1 ;847 uiSum += abs( piOrg[5] - pred );848 pred = (piCur[6] + piRef[6] + bRound) >> 1 ;849 uiSum += abs( piOrg[6] - pred );850 pred = (piCur[7] + piRef[7] + bRound) >> 1 ;851 uiSum += abs( piOrg[7] - pred );852 pred = (piCur[8] + piRef[8] + bRound) >> 1 ;853 uiSum += abs( piOrg[8] - pred );854 pred = (piCur[9] + piRef[9] + bRound) >> 1 ;855 uiSum += abs( piOrg[9] - pred );856 857 pred = (piCur[10] + piRef[10] + bRound) >> 1 ;858 uiSum += abs( piOrg[10] - pred );859 pred = (piCur[11] + piRef[11] + bRound) >> 1 ;860 uiSum += abs( piOrg[11] - pred );861 pred = (piCur[12] + piRef[12] + bRound) >> 1 ;862 uiSum += abs( piOrg[12] - pred );863 pred = (piCur[13] + piRef[13] + bRound) >> 1 ;864 uiSum += abs( piOrg[13] - pred );865 pred = (piCur[14] + piRef[14] + bRound) >> 1 ;866 uiSum += abs( piOrg[14] - pred );867 pred = (piCur[15] + piRef[15] + bRound) >> 1 ;868 uiSum += abs( piOrg[15] - pred );869 pred = (piCur[16] + piRef[16] + bRound) >> 1 ;870 uiSum += abs( piOrg[16] - pred );871 pred = (piCur[17] + piRef[17] + bRound) >> 1 ;872 uiSum += abs( piOrg[17] - pred );873 pred = (piCur[18] + piRef[18] + bRound) >> 1 ;874 uiSum += abs( piOrg[18] - pred );875 pred = (piCur[19] + piRef[19] + bRound) >> 1 ;876 uiSum += abs( piOrg[19] - pred );877 878 pred = (piCur[20] + piRef[20] + bRound) >> 1 ;879 uiSum += abs( piOrg[20] - pred );880 pred = (piCur[21] + piRef[21] + bRound) >> 1 ;881 uiSum += abs( piOrg[21] - pred );882 pred = (piCur[22] + piRef[22] + bRound) >> 1 ;883 uiSum += abs( piOrg[22] - pred );884 pred = (piCur[23] + piRef[23] + bRound) >> 1 ;885 uiSum += abs( piOrg[23] - pred );886 pred = (piCur[24] + piRef[24] + bRound) >> 1 ;887 uiSum += abs( piOrg[24] - pred );888 pred = (piCur[25] + piRef[25] + bRound) >> 1 ;889 uiSum += abs( piOrg[25] - pred );890 pred = (piCur[26] + piRef[26] + bRound) >> 1 ;891 uiSum += abs( piOrg[26] - pred );892 pred = (piCur[27] + piRef[27] + bRound) >> 1 ;893 uiSum += abs( piOrg[27] - pred );894 pred = (piCur[28] + piRef[28] + bRound) >> 1 ;895 uiSum += abs( piOrg[28] - pred );896 pred = (piCur[29] + piRef[29] + bRound) >> 1 ;897 uiSum += abs( piOrg[29] - pred );898 899 pred = (piCur[30] + piRef[30] + bRound) >> 1 ;900 uiSum += abs( piOrg[30] - pred );901 pred = (piCur[31] + piRef[31] + bRound) >> 1 ;902 uiSum += abs( piOrg[31] - pred );903 904 piOrg += iStrideOrg;905 piCur += iStrideCur;906 piRef += iStrideRef;907 }908 909 uiSum <<= iSubShift;910 return ( uiSum >> g_uiBitIncrement );911 }912 913 UInt TComRdCost::xGetSAD64( DistParam* pcDtParam, Pel* pRefY, Bool bRound )914 {915 Pel* piOrg = pcDtParam->pOrg;916 Pel* piCur = pcDtParam->pCur;917 Pel* piRef = pRefY;918 Int iRows = pcDtParam->iRows;919 Int iSubShift = pcDtParam->iSubShift;920 Int iSubStep = ( 1 << iSubShift );921 Int iStrideCur = pcDtParam->iStrideCur*iSubStep;922 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;923 Int iStrideRef = pcDtParam->iCols*iSubStep;924 Pel pred;925 926 UInt uiSum = 0;927 928 for( ; iRows != 0; iRows-=iSubStep )929 {930 931 pred = (piCur[0] + piRef[0] + bRound) >> 1 ;932 uiSum += abs( piOrg[0] - pred );933 pred = (piCur[1] + piRef[1] + bRound) >> 1 ;934 uiSum += abs( piOrg[1] - pred );935 pred = (piCur[2] + piRef[2] + bRound) >> 1 ;936 uiSum += abs( piOrg[2] - pred );937 pred = (piCur[3] + piRef[3] + bRound) >> 1 ;938 uiSum += abs( piOrg[3] - pred );939 pred = (piCur[4] + piRef[4] + bRound) >> 1 ;940 uiSum += abs( piOrg[4] - pred );941 pred = (piCur[5] + piRef[5] + bRound) >> 1 ;942 uiSum += abs( piOrg[5] - pred );943 pred = (piCur[6] + piRef[6] + bRound) >> 1 ;944 uiSum += abs( piOrg[6] - pred );945 pred = (piCur[7] + piRef[7] + bRound) >> 1 ;946 uiSum += abs( piOrg[7] - pred );947 pred = (piCur[8] + piRef[8] + bRound) >> 1 ;948 uiSum += abs( piOrg[8] - pred );949 pred = (piCur[9] + piRef[9] + bRound) >> 1 ;950 uiSum += abs( piOrg[9] - pred );951 952 pred = (piCur[10] + piRef[10] + bRound) >> 1 ;953 uiSum += abs( piOrg[10] - pred );954 pred = (piCur[11] + piRef[11] + bRound) >> 1 ;955 uiSum += abs( piOrg[11] - pred );956 pred = (piCur[12] + piRef[12] + bRound) >> 1 ;957 uiSum += abs( piOrg[12] - pred );958 pred = (piCur[13] + piRef[13] + bRound) >> 1 ;959 uiSum += abs( piOrg[13] - pred );960 pred = (piCur[14] + piRef[14] + bRound) >> 1 ;961 uiSum += abs( piOrg[14] - pred );962 pred = (piCur[15] + piRef[15] + bRound) >> 1 ;963 uiSum += abs( piOrg[15] - pred );964 pred = (piCur[16] + piRef[16] + bRound) >> 1 ;965 uiSum += abs( piOrg[16] - pred );966 pred = (piCur[17] + piRef[17] + bRound) >> 1 ;967 uiSum += abs( piOrg[17] - pred );968 pred = (piCur[18] + piRef[18] + bRound) >> 1 ;969 uiSum += abs( piOrg[18] - pred );970 pred = (piCur[19] + piRef[19] + bRound) >> 1 ;971 uiSum += abs( piOrg[19] - pred );972 973 pred = (piCur[20] + piRef[20] + bRound) >> 1 ;974 uiSum += abs( piOrg[20] - pred );975 pred = (piCur[21] + piRef[21] + bRound) >> 1 ;976 uiSum += abs( piOrg[21] - pred );977 pred = (piCur[22] + piRef[22] + bRound) >> 1 ;978 uiSum += abs( piOrg[22] - pred );979 pred = (piCur[23] + piRef[23] + bRound) >> 1 ;980 uiSum += abs( piOrg[23] - pred );981 pred = (piCur[24] + piRef[24] + bRound) >> 1 ;982 uiSum += abs( piOrg[24] - pred );983 pred = (piCur[25] + piRef[25] + bRound) >> 1 ;984 uiSum += abs( piOrg[25] - pred );985 pred = (piCur[26] + piRef[26] + bRound) >> 1 ;986 uiSum += abs( piOrg[26] - pred );987 pred = (piCur[27] + piRef[27] + bRound) >> 1 ;988 uiSum += abs( piOrg[27] - pred );989 pred = (piCur[28] + piRef[28] + bRound) >> 1 ;990 uiSum += abs( piOrg[28] - pred );991 pred = (piCur[29] + piRef[29] + bRound) >> 1 ;992 uiSum += abs( piOrg[29] - pred );993 994 pred = (piCur[30] + piRef[30] + bRound) >> 1 ;995 uiSum += abs( piOrg[30] - pred );996 pred = (piCur[31] + piRef[31] + bRound) >> 1 ;997 uiSum += abs( piOrg[31] - pred );998 pred = (piCur[32] + piRef[32] + bRound) >> 1 ;999 uiSum += abs( piOrg[32] - pred );1000 pred = (piCur[33] + piRef[33] + bRound) >> 1 ;1001 uiSum += abs( piOrg[33] - pred );1002 pred = (piCur[34] + piRef[34] + bRound) >> 1 ;1003 uiSum += abs( piOrg[34] - pred );1004 pred = (piCur[35] + piRef[35] + bRound) >> 1 ;1005 uiSum += abs( piOrg[35] - pred );1006 pred = (piCur[36] + piRef[36] + bRound) >> 1 ;1007 uiSum += abs( piOrg[36] - pred );1008 pred = (piCur[37] + piRef[37] + bRound) >> 1 ;1009 uiSum += abs( piOrg[37] - pred );1010 pred = (piCur[38] + piRef[38] + bRound) >> 1 ;1011 uiSum += abs( piOrg[38] - pred );1012 pred = (piCur[39] + piRef[39] + bRound) >> 1 ;1013 uiSum += abs( piOrg[39] - pred );1014 1015 pred = (piCur[40] + piRef[40] + bRound) >> 1 ;1016 uiSum += abs( piOrg[40] - pred );1017 pred = (piCur[41] + piRef[41] + bRound) >> 1 ;1018 uiSum += abs( piOrg[41] - pred );1019 pred = (piCur[42] + piRef[42] + bRound) >> 1 ;1020 uiSum += abs( piOrg[42] - pred );1021 pred = (piCur[43] + piRef[43] + bRound) >> 1 ;1022 uiSum += abs( piOrg[43] - pred );1023 pred = (piCur[44] + piRef[44] + bRound) >> 1 ;1024 uiSum += abs( piOrg[44] - pred );1025 pred = (piCur[45] + piRef[45] + bRound) >> 1 ;1026 uiSum += abs( piOrg[45] - pred );1027 pred = (piCur[46] + piRef[46] + bRound) >> 1 ;1028 uiSum += abs( piOrg[46] - pred );1029 pred = (piCur[47] + piRef[47] + bRound) >> 1 ;1030 uiSum += abs( piOrg[47] - pred );1031 pred = (piCur[48] + piRef[48] + bRound) >> 1 ;1032 uiSum += abs( piOrg[48] - pred );1033 pred = (piCur[49] + piRef[49] + bRound) >> 1 ;1034 uiSum += abs( piOrg[49] - pred );1035 1036 pred = (piCur[50] + piRef[50] + bRound) >> 1 ;1037 uiSum += abs( piOrg[50] - pred );1038 pred = (piCur[51] + piRef[51] + bRound) >> 1 ;1039 uiSum += abs( piOrg[51] - pred );1040 pred = (piCur[52] + piRef[52] + bRound) >> 1 ;1041 uiSum += abs( piOrg[52] - pred );1042 pred = (piCur[53] + piRef[53] + bRound) >> 1 ;1043 uiSum += abs( piOrg[53] - pred );1044 pred = (piCur[54] + piRef[54] + bRound) >> 1 ;1045 uiSum += abs( piOrg[54] - pred );1046 pred = (piCur[55] + piRef[55] + bRound) >> 1 ;1047 uiSum += abs( piOrg[55] - pred );1048 pred = (piCur[56] + piRef[56] + bRound) >> 1 ;1049 uiSum += abs( piOrg[56] - pred );1050 pred = (piCur[57] + piRef[57] + bRound) >> 1 ;1051 uiSum += abs( piOrg[57] - pred );1052 pred = (piCur[58] + piRef[58] + bRound) >> 1 ;1053 uiSum += abs( piOrg[58] - pred );1054 pred = (piCur[59] + piRef[59] + bRound) >> 1 ;1055 uiSum += abs( piOrg[59] - pred );1056 1057 pred = (piCur[60] + piRef[60] + bRound) >> 1 ;1058 uiSum += abs( piOrg[60] - pred );1059 pred = (piCur[61] + piRef[61] + bRound) >> 1 ;1060 uiSum += abs( piOrg[61] - pred );1061 pred = (piCur[62] + piRef[62] + bRound) >> 1 ;1062 uiSum += abs( piOrg[62] - pred );1063 pred = (piCur[63] + piRef[63] + bRound) >> 1 ;1064 uiSum += abs( piOrg[63] - pred );1065 1066 piOrg += iStrideOrg;1067 piCur += iStrideCur;1068 piRef += iStrideRef;1069 }1070 1071 uiSum <<= iSubShift;1072 return ( uiSum >> g_uiBitIncrement );1073 }1074 #endif1075 1076 553 UInt TComRdCost::xGetSAD( DistParam* pcDtParam ) 1077 554 { 1078 #ifdef WEIGHT_PRED 1079 if ( pcDtParam->applyWeight ) 1080 { 1081 assert(pcDtParam->iSubShift==0); 555 if ( pcDtParam->bApplyWeight ) 556 { 1082 557 return xGetSADw( pcDtParam ); 1083 558 } 1084 #endif1085 559 Pel* piOrg = pcDtParam->pOrg; 1086 560 Pel* piCur = pcDtParam->pCur; … … 1107 581 UInt TComRdCost::xGetSAD4( DistParam* pcDtParam ) 1108 582 { 1109 #ifdef WEIGHT_PRED 1110 if ( pcDtParam->applyWeight ) 583 if ( pcDtParam->bApplyWeight ) 1111 584 { 1112 585 return xGetSADw( pcDtParam ); 1113 586 } 1114 #endif1115 587 Pel* piOrg = pcDtParam->pOrg; 1116 588 Pel* piCur = pcDtParam->pCur; … … 1140 612 UInt TComRdCost::xGetSAD8( DistParam* pcDtParam ) 1141 613 { 1142 #ifdef WEIGHT_PRED 1143 if ( pcDtParam->applyWeight ) 614 if ( pcDtParam->bApplyWeight ) 1144 615 { 1145 616 return xGetSADw( pcDtParam ); 1146 617 } 1147 #endif1148 618 Pel* piOrg = pcDtParam->pOrg; 1149 619 Pel* piCur = pcDtParam->pCur; … … 1177 647 UInt TComRdCost::xGetSAD16( DistParam* pcDtParam ) 1178 648 { 1179 #ifdef WEIGHT_PRED 1180 if ( pcDtParam->applyWeight ) 649 if ( pcDtParam->bApplyWeight ) 1181 650 { 1182 651 return xGetSADw( pcDtParam ); 1183 652 } 1184 #endif1185 653 Pel* piOrg = pcDtParam->pOrg; 1186 654 Pel* piCur = pcDtParam->pCur; … … 1220 688 } 1221 689 690 #if AMP_SAD 691 UInt TComRdCost::xGetSAD12( DistParam* pcDtParam ) 692 { 693 if ( pcDtParam->bApplyWeight ) 694 { 695 return xGetSADw( pcDtParam ); 696 } 697 Pel* piOrg = pcDtParam->pOrg; 698 Pel* piCur = pcDtParam->pCur; 699 Int iRows = pcDtParam->iRows; 700 Int iSubShift = pcDtParam->iSubShift; 701 Int iSubStep = ( 1 << iSubShift ); 702 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; 703 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; 704 705 UInt uiSum = 0; 706 707 for( ; iRows != 0; iRows-=iSubStep ) 708 { 709 uiSum += abs( piOrg[0] - piCur[0] ); 710 uiSum += abs( piOrg[1] - piCur[1] ); 711 uiSum += abs( piOrg[2] - piCur[2] ); 712 uiSum += abs( piOrg[3] - piCur[3] ); 713 uiSum += abs( piOrg[4] - piCur[4] ); 714 uiSum += abs( piOrg[5] - piCur[5] ); 715 uiSum += abs( piOrg[6] - piCur[6] ); 716 uiSum += abs( piOrg[7] - piCur[7] ); 717 uiSum += abs( piOrg[8] - piCur[8] ); 718 uiSum += abs( piOrg[9] - piCur[9] ); 719 uiSum += abs( piOrg[10] - piCur[10] ); 720 uiSum += abs( piOrg[11] - piCur[11] ); 721 722 piOrg += iStrideOrg; 723 piCur += iStrideCur; 724 } 725 726 uiSum <<= iSubShift; 727 return ( uiSum >> g_uiBitIncrement ); 728 } 729 #endif 730 1222 731 UInt TComRdCost::xGetSAD16N( DistParam* pcDtParam ) 1223 732 { 1224 #ifdef WEIGHT_PRED1225 if ( pcDtParam->applyWeight )1226 {1227 return xGetSAD16Nw( pcDtParam );1228 }1229 #endif1230 733 Pel* piOrg = pcDtParam->pOrg; 1231 734 Pel* piCur = pcDtParam->pCur; … … 1270 773 UInt TComRdCost::xGetSAD32( DistParam* pcDtParam ) 1271 774 { 1272 #ifdef WEIGHT_PRED 1273 if ( pcDtParam->applyWeight ) 775 if ( pcDtParam->bApplyWeight ) 1274 776 { 1275 777 return xGetSADw( pcDtParam ); 1276 778 } 1277 #endif1278 779 Pel* piOrg = pcDtParam->pOrg; 1279 780 Pel* piCur = pcDtParam->pCur; … … 1329 830 } 1330 831 832 #if AMP_SAD 833 UInt TComRdCost::xGetSAD24( DistParam* pcDtParam ) 834 { 835 if ( pcDtParam->bApplyWeight ) 836 { 837 return xGetSADw( pcDtParam ); 838 } 839 Pel* piOrg = pcDtParam->pOrg; 840 Pel* piCur = pcDtParam->pCur; 841 Int iRows = pcDtParam->iRows; 842 Int iSubShift = pcDtParam->iSubShift; 843 Int iSubStep = ( 1 << iSubShift ); 844 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; 845 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; 846 847 UInt uiSum = 0; 848 849 for( ; iRows != 0; iRows-=iSubStep ) 850 { 851 uiSum += abs( piOrg[0] - piCur[0] ); 852 uiSum += abs( piOrg[1] - piCur[1] ); 853 uiSum += abs( piOrg[2] - piCur[2] ); 854 uiSum += abs( piOrg[3] - piCur[3] ); 855 uiSum += abs( piOrg[4] - piCur[4] ); 856 uiSum += abs( piOrg[5] - piCur[5] ); 857 uiSum += abs( piOrg[6] - piCur[6] ); 858 uiSum += abs( piOrg[7] - piCur[7] ); 859 uiSum += abs( piOrg[8] - piCur[8] ); 860 uiSum += abs( piOrg[9] - piCur[9] ); 861 uiSum += abs( piOrg[10] - piCur[10] ); 862 uiSum += abs( piOrg[11] - piCur[11] ); 863 uiSum += abs( piOrg[12] - piCur[12] ); 864 uiSum += abs( piOrg[13] - piCur[13] ); 865 uiSum += abs( piOrg[14] - piCur[14] ); 866 uiSum += abs( piOrg[15] - piCur[15] ); 867 uiSum += abs( piOrg[16] - piCur[16] ); 868 uiSum += abs( piOrg[17] - piCur[17] ); 869 uiSum += abs( piOrg[18] - piCur[18] ); 870 uiSum += abs( piOrg[19] - piCur[19] ); 871 uiSum += abs( piOrg[20] - piCur[20] ); 872 uiSum += abs( piOrg[21] - piCur[21] ); 873 uiSum += abs( piOrg[22] - piCur[22] ); 874 uiSum += abs( piOrg[23] - piCur[23] ); 875 876 piOrg += iStrideOrg; 877 piCur += iStrideCur; 878 } 879 880 uiSum <<= iSubShift; 881 return ( uiSum >> g_uiBitIncrement ); 882 } 883 884 #endif 885 1331 886 UInt TComRdCost::xGetSAD64( DistParam* pcDtParam ) 1332 887 { 1333 #ifdef WEIGHT_PRED 1334 if ( pcDtParam->applyWeight ) 888 if ( pcDtParam->bApplyWeight ) 1335 889 { 1336 890 return xGetSADw( pcDtParam ); 1337 891 } 1338 #endif1339 892 Pel* piOrg = pcDtParam->pOrg; 1340 893 Pel* piCur = pcDtParam->pCur; … … 1422 975 } 1423 976 1424 // -------------------------------------------------------------------------------------------------------------------- 1425 // SAD with step (used in fractional search)1426 // -------------------------------------------------------------------------------------------------------------------- 1427 1428 #ifdef ROUNDING_CONTROL_BIPRED 1429 UInt TComRdCost::xGetSADs( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1430 { 977 #if AMP_SAD 978 UInt TComRdCost::xGetSAD48( DistParam* pcDtParam ) 979 { 980 if ( pcDtParam->bApplyWeight ) 981 { 982 return xGetSADw( pcDtParam ); 983 } 1431 984 Pel* piOrg = pcDtParam->pOrg; 1432 985 Pel* piCur = pcDtParam->pCur; 1433 Pel* piRef = pRefY;1434 986 Int iRows = pcDtParam->iRows; 1435 Int iCols = pcDtParam->iCols; 1436 Int iStrideCur = pcDtParam->iStrideCur; 1437 Int iStrideOrg = pcDtParam->iStrideOrg; 1438 Int iStep = pcDtParam->iStep; 1439 Pel pred; 987 Int iSubShift = pcDtParam->iSubShift; 988 Int iSubStep = ( 1 << iSubShift ); 989 Int iStrideCur = pcDtParam->iStrideCur*iSubStep; 990 Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep; 1440 991 1441 992 UInt uiSum = 0; 1442 993 1443 for( ; iRows != 0; iRows-- ) 1444 { 1445 for (Int n = 0; n < iCols; n++ ) 1446 { 1447 pred = (piCur[n*iStep] + piRef[n] + bRound) >> 1 ; 1448 uiSum += abs( piOrg[n] - pred ); 1449 } 1450 piOrg += iStrideOrg; 1451 piCur += iStrideCur; 1452 piRef += iCols; 1453 } 1454 1455 return ( uiSum >> g_uiBitIncrement ); 1456 } 1457 1458 UInt TComRdCost::xGetSADs4( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1459 { 1460 Pel* piOrg = pcDtParam->pOrg; 1461 Pel* piCur = pcDtParam->pCur; 1462 Pel* piRef = pRefY; 1463 Int iRows = pcDtParam->iRows; 1464 Int iStrideCur = pcDtParam->iStrideCur; 1465 Int iStrideOrg = pcDtParam->iStrideOrg; 1466 Int iStrideRef = pcDtParam->iCols; 1467 Int iStep = pcDtParam->iStep; 1468 Int iStep2 = iStep<<1; 1469 Int iStep3 = iStep2 + iStep; 1470 Pel pred; 1471 1472 UInt uiSum = 0; 1473 1474 for( ; iRows != 0; iRows-- ) 1475 { 1476 1477 pred = (piCur[0] + piRef[0] + bRound) >> 1 ; uiSum += abs( piOrg[0] - pred ); 1478 pred = (piCur[iStep ] + piRef[1] + bRound) >> 1 ; uiSum += abs( piOrg[1] - pred ); 1479 pred = (piCur[iStep2] + piRef[2] + bRound) >> 1 ; uiSum += abs( piOrg[2] - pred ); 1480 pred = (piCur[iStep3] + piRef[3] + bRound) >> 1 ; uiSum += abs( piOrg[3] - pred ); 994 for( ; iRows != 0; iRows-=iSubStep ) 995 { 996 uiSum += abs( piOrg[0] - piCur[0] ); 997 uiSum += abs( piOrg[1] - piCur[1] ); 998 uiSum += abs( piOrg[2] - piCur[2] ); 999 uiSum += abs( piOrg[3] - piCur[3] ); 1000 uiSum += abs( piOrg[4] - piCur[4] ); 1001 uiSum += abs( piOrg[5] - piCur[5] ); 1002 uiSum += abs( piOrg[6] - piCur[6] ); 1003 uiSum += abs( piOrg[7] - piCur[7] ); 1004 uiSum += abs( piOrg[8] - piCur[8] ); 1005 uiSum += abs( piOrg[9] - piCur[9] ); 1006 uiSum += abs( piOrg[10] - piCur[10] ); 1007 uiSum += abs( piOrg[11] - piCur[11] ); 1008 uiSum += abs( piOrg[12] - piCur[12] ); 1009 uiSum += abs( piOrg[13] - piCur[13] ); 1010 uiSum += abs( piOrg[14] - piCur[14] ); 1011 uiSum += abs( piOrg[15] - piCur[15] ); 1012 uiSum += abs( piOrg[16] - piCur[16] ); 1013 uiSum += abs( piOrg[17] - piCur[17] ); 1014 uiSum += abs( piOrg[18] - piCur[18] ); 1015 uiSum += abs( piOrg[19] - piCur[19] ); 1016 uiSum += abs( piOrg[20] - piCur[20] ); 1017 uiSum += abs( piOrg[21] - piCur[21] ); 1018 uiSum += abs( piOrg[22] - piCur[22] ); 1019 uiSum += abs( piOrg[23] - piCur[23] ); 1020 uiSum += abs( piOrg[24] - piCur[24] ); 1021 uiSum += abs( piOrg[25] - piCur[25] ); 1022 uiSum += abs( piOrg[26] - piCur[26] ); 1023 uiSum += abs( piOrg[27] - piCur[27] ); 1024 uiSum += abs( piOrg[28] - piCur[28] ); 1025 uiSum += abs( piOrg[29] - piCur[29] ); 1026 uiSum += abs( piOrg[30] - piCur[30] ); 1027 uiSum += abs( piOrg[31] - piCur[31] ); 1028 uiSum += abs( piOrg[32] - piCur[32] ); 1029 uiSum += abs( piOrg[33] - piCur[33] ); 1030 uiSum += abs( piOrg[34] - piCur[34] ); 1031 uiSum += abs( piOrg[35] - piCur[35] ); 1032 uiSum += abs( piOrg[36] - piCur[36] ); 1033 uiSum += abs( piOrg[37] - piCur[37] ); 1034 uiSum += abs( piOrg[38] - piCur[38] ); 1035 uiSum += abs( piOrg[39] - piCur[39] ); 1036 uiSum += abs( piOrg[40] - piCur[40] ); 1037 uiSum += abs( piOrg[41] - piCur[41] ); 1038 uiSum += abs( piOrg[42] - piCur[42] ); 1039 uiSum += abs( piOrg[43] - piCur[43] ); 1040 uiSum += abs( piOrg[44] - piCur[44] ); 1041 uiSum += abs( piOrg[45] - piCur[45] ); 1042 uiSum += abs( piOrg[46] - piCur[46] ); 1043 uiSum += abs( piOrg[47] - piCur[47] ); 1481 1044 1482 1045 piOrg += iStrideOrg; 1483 1046 piCur += iStrideCur; 1484 piRef += iStrideRef;1485 }1486 1047 } 1048 1049 uiSum <<= iSubShift; 1487 1050 return ( uiSum >> g_uiBitIncrement ); 1488 1051 } 1489 1490 UInt TComRdCost::xGetSADs8( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1491 { 1492 Pel* piOrg = pcDtParam->pOrg; 1493 Pel* piCur = pcDtParam->pCur; 1494 Pel* piRef = pRefY; 1495 Int iRows = pcDtParam->iRows; 1496 Int iStrideCur = pcDtParam->iStrideCur; 1497 Int iStrideOrg = pcDtParam->iStrideOrg; 1498 Int iStrideRef = pcDtParam->iCols; 1499 Int iStep = pcDtParam->iStep; 1500 Int iStep2 = iStep<<1; 1501 Int iStep3 = iStep2 + iStep; 1502 Int iStep4 = iStep3 + iStep; 1503 Int iStep5 = iStep4 + iStep; 1504 Int iStep6 = iStep5 + iStep; 1505 Int iStep7 = iStep6 + iStep; 1506 Pel pred; 1507 1508 UInt uiSum = 0; 1509 1510 for( ; iRows != 0; iRows-- ) 1511 { 1512 1513 pred = (piCur[0] + piRef[0] + bRound) >> 1 ; uiSum += abs( piOrg[0] - pred ); 1514 pred = (piCur[iStep ] + piRef[1] + bRound) >> 1 ; uiSum += abs( piOrg[1] - pred ); 1515 pred = (piCur[iStep2] + piRef[2] + bRound) >> 1 ; uiSum += abs( piOrg[2] - pred ); 1516 pred = (piCur[iStep3] + piRef[3] + bRound) >> 1 ; uiSum += abs( piOrg[3] - pred ); 1517 pred = (piCur[iStep4] + piRef[4] + bRound) >> 1 ; uiSum += abs( piOrg[4] - pred ); 1518 pred = (piCur[iStep5] + piRef[5] + bRound) >> 1 ; uiSum += abs( piOrg[5] - pred ); 1519 pred = (piCur[iStep6] + piRef[6] + bRound) >> 1 ; uiSum += abs( piOrg[6] - pred ); 1520 pred = (piCur[iStep7] + piRef[7] + bRound) >> 1 ; uiSum += abs( piOrg[7] - pred ); 1521 1522 piOrg += iStrideOrg; 1523 piCur += iStrideCur; 1524 piRef += iStrideRef; 1525 } 1526 1527 return ( uiSum >> g_uiBitIncrement ); 1528 } 1529 1530 UInt TComRdCost::xGetSADs16( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1531 { 1532 Pel* piOrg = pcDtParam->pOrg; 1533 Pel* piCur = pcDtParam->pCur; 1534 Pel* piRef = pRefY; 1535 Int iRows = pcDtParam->iRows; 1536 Int iStrideCur = pcDtParam->iStrideCur; 1537 Int iStrideOrg = pcDtParam->iStrideOrg; 1538 Int iStrideRef = pcDtParam->iCols; 1539 Int iStep = pcDtParam->iStep; 1540 Int iStep2 = iStep<<1; 1541 Int iStep3 = iStep2 + iStep; 1542 Int iStep4 = iStep3 + iStep; 1543 Int iStep5 = iStep4 + iStep; 1544 Int iStep6 = iStep5 + iStep; 1545 Int iStep7 = iStep6 + iStep; 1546 Int iStep8 = iStep7 + iStep; 1547 Int iStep9 = iStep8 + iStep; 1548 Int iStep10 = iStep9 + iStep; 1549 Int iStep11 = iStep10 + iStep; 1550 Int iStep12 = iStep11 + iStep; 1551 Int iStep13 = iStep12 + iStep; 1552 Int iStep14 = iStep13 + iStep; 1553 Int iStep15 = iStep14 + iStep; 1554 Pel pred; 1555 1556 UInt uiSum = 0; 1557 1558 for( ; iRows != 0; iRows-- ) 1559 { 1560 pred = (piCur[0] + piRef[0] + bRound) >> 1 ; uiSum += abs( piOrg[0] - pred ); 1561 pred = (piCur[iStep ] + piRef[1] + bRound) >> 1 ; uiSum += abs( piOrg[1] - pred ); 1562 pred = (piCur[iStep2] + piRef[2] + bRound) >> 1 ; uiSum += abs( piOrg[2] - pred ); 1563 pred = (piCur[iStep3] + piRef[3] + bRound) >> 1 ; uiSum += abs( piOrg[3] - pred ); 1564 pred = (piCur[iStep4] + piRef[4] + bRound) >> 1 ; uiSum += abs( piOrg[4] - pred ); 1565 pred = (piCur[iStep5] + piRef[5] + bRound) >> 1 ; uiSum += abs( piOrg[5] - pred ); 1566 pred = (piCur[iStep6] + piRef[6] + bRound) >> 1 ; uiSum += abs( piOrg[6] - pred ); 1567 pred = (piCur[iStep7] + piRef[7] + bRound) >> 1 ; uiSum += abs( piOrg[7] - pred ); 1568 pred = (piCur[iStep8] + piRef[8] + bRound) >> 1 ; uiSum += abs( piOrg[8] - pred ); 1569 pred = (piCur[iStep9] + piRef[9] + bRound) >> 1 ; uiSum += abs( piOrg[9] - pred ); 1570 pred = (piCur[iStep10] + piRef[10] + bRound) >> 1 ; uiSum += abs( piOrg[10] - pred ); 1571 pred = (piCur[iStep11] + piRef[11] + bRound) >> 1 ; uiSum += abs( piOrg[11] - pred ); 1572 pred = (piCur[iStep12] + piRef[12] + bRound) >> 1 ; uiSum += abs( piOrg[12] - pred ); 1573 pred = (piCur[iStep13] + piRef[13] + bRound) >> 1 ; uiSum += abs( piOrg[13] - pred ); 1574 pred = (piCur[iStep14] + piRef[14] + bRound) >> 1 ; uiSum += abs( piOrg[14] - pred ); 1575 pred = (piCur[iStep15] + piRef[15] + bRound) >> 1 ; uiSum += abs( piOrg[15] - pred ); 1576 1577 piOrg += iStrideOrg; 1578 piCur += iStrideCur; 1579 piRef += iStrideRef; 1580 } 1581 1582 return ( uiSum >> g_uiBitIncrement ); 1583 } 1584 1585 UInt TComRdCost::xGetSADs16N( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1586 { 1587 Pel* piOrg = pcDtParam->pOrg; 1588 Pel* piCur = pcDtParam->pCur; 1589 Pel* piRef = pRefY; 1590 Int iRows = pcDtParam->iRows; 1591 Int iCols = pcDtParam->iCols; 1592 Int iStrideCur = pcDtParam->iStrideCur; 1593 Int iStrideOrg = pcDtParam->iStrideOrg; 1594 Int iStrideRef = pcDtParam->iCols; 1595 Int iStep = pcDtParam->iStep; 1596 Pel pred; 1597 1598 UInt uiSum = 0; 1599 1600 for( ; iRows != 0; iRows-- ) 1601 { 1602 for (Int n = 0; n < iCols; n+=16 ) 1603 { 1604 pred = (piCur[iStep*(n +0)] + piRef[n + 0] + bRound) >> 1 ; uiSum += abs( piOrg[n +0] - pred ); 1605 pred = (piCur[iStep*(n +1)] + piRef[n + 1] + bRound) >> 1 ; uiSum += abs( piOrg[n +1] - pred ); 1606 pred = (piCur[iStep*(n +2)] + piRef[n + 2] + bRound) >> 1 ; uiSum += abs( piOrg[n +2] - pred ); 1607 pred = (piCur[iStep*(n +3)] + piRef[n + 3] + bRound) >> 1 ; uiSum += abs( piOrg[n +3] - pred ); 1608 pred = (piCur[iStep*(n +4)] + piRef[n + 4] + bRound) >> 1 ; uiSum += abs( piOrg[n +4] - pred ); 1609 pred = (piCur[iStep*(n +5)] + piRef[n + 5] + bRound) >> 1 ; uiSum += abs( piOrg[n +5] - pred ); 1610 pred = (piCur[iStep*(n +6)] + piRef[n + 6] + bRound) >> 1 ; uiSum += abs( piOrg[n +6] - pred ); 1611 pred = (piCur[iStep*(n +7)] + piRef[n + 7] + bRound) >> 1 ; uiSum += abs( piOrg[n +7] - pred ); 1612 pred = (piCur[iStep*(n +8)] + piRef[n + 8] + bRound) >> 1 ; uiSum += abs( piOrg[n +8] - pred ); 1613 pred = (piCur[iStep*(n +9)] + piRef[n + 9] + bRound) >> 1 ; uiSum += abs( piOrg[n +9] - pred ); 1614 pred = (piCur[iStep*(n +10)] + piRef[n + 10] + bRound) >> 1 ; uiSum += abs( piOrg[n +10] - pred ); 1615 pred = (piCur[iStep*(n +11)] + piRef[n + 11] + bRound) >> 1 ; uiSum += abs( piOrg[n +11] - pred ); 1616 pred = (piCur[iStep*(n +12)] + piRef[n + 12] + bRound) >> 1 ; uiSum += abs( piOrg[n +12] - pred ); 1617 pred = (piCur[iStep*(n +13)] + piRef[n + 13] + bRound) >> 1 ; uiSum += abs( piOrg[n +13] - pred ); 1618 pred = (piCur[iStep*(n +14)] + piRef[n + 14] + bRound) >> 1 ; uiSum += abs( piOrg[n +14] - pred ); 1619 pred = (piCur[iStep*(n +15)] + piRef[n + 15] + bRound) >> 1 ; uiSum += abs( piOrg[n +15] - pred ); 1620 } 1621 piOrg += iStrideOrg; 1622 piCur += iStrideCur; 1623 piRef += iStrideRef; 1624 } 1625 1626 return ( uiSum >> g_uiBitIncrement ); 1627 } 1628 1629 UInt TComRdCost::xGetSADs32( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1630 { 1631 Pel* piOrg = pcDtParam->pOrg; 1632 Pel* piCur = pcDtParam->pCur; 1633 Pel* piRef = pRefY; 1634 Int iRows = pcDtParam->iRows; 1635 Int iStrideCur = pcDtParam->iStrideCur; 1636 Int iStrideOrg = pcDtParam->iStrideOrg; 1637 Int iStrideRef = pcDtParam->iCols; 1638 Int iStep = pcDtParam->iStep; 1639 Int iStep2 = iStep<<1; 1640 Int iStep3 = iStep2 + iStep; 1641 Int iStep4 = iStep3 + iStep; 1642 Int iStep5 = iStep4 + iStep; 1643 Int iStep6 = iStep5 + iStep; 1644 Int iStep7 = iStep6 + iStep; 1645 Int iStep8 = iStep7 + iStep; 1646 Int iStep9 = iStep8 + iStep; 1647 Int iStep10 = iStep9 + iStep; 1648 Int iStep11 = iStep10 + iStep; 1649 Int iStep12 = iStep11 + iStep; 1650 Int iStep13 = iStep12 + iStep; 1651 Int iStep14 = iStep13 + iStep; 1652 Int iStep15 = iStep14 + iStep; 1653 Int iStep16 = iStep15 + iStep; 1654 Int iStep17 = iStep16 + iStep; 1655 Int iStep18 = iStep17 + iStep; 1656 Int iStep19 = iStep18 + iStep; 1657 Int iStep20 = iStep19 + iStep; 1658 Int iStep21 = iStep20 + iStep; 1659 Int iStep22 = iStep21 + iStep; 1660 Int iStep23 = iStep22 + iStep; 1661 Int iStep24 = iStep23 + iStep; 1662 Int iStep25 = iStep24 + iStep; 1663 Int iStep26 = iStep25 + iStep; 1664 Int iStep27 = iStep26 + iStep; 1665 Int iStep28 = iStep27 + iStep; 1666 Int iStep29 = iStep28 + iStep; 1667 Int iStep30 = iStep29 + iStep; 1668 Int iStep31 = iStep30 + iStep; 1669 Pel pred; 1670 1671 UInt uiSum = 0; 1672 1673 for( ; iRows != 0; iRows-- ) 1674 { 1675 pred = (piCur[0] + piRef[0] + bRound) >> 1 ; uiSum += abs( piOrg[0] - pred ); 1676 pred = (piCur[iStep ] + piRef[1] + bRound) >> 1 ; uiSum += abs( piOrg[1] - pred ); 1677 pred = (piCur[iStep2] + piRef[2] + bRound) >> 1 ; uiSum += abs( piOrg[2] - pred ); 1678 pred = (piCur[iStep3] + piRef[3] + bRound) >> 1 ; uiSum += abs( piOrg[3] - pred ); 1679 pred = (piCur[iStep4] + piRef[4] + bRound) >> 1 ; uiSum += abs( piOrg[4] - pred ); 1680 pred = (piCur[iStep5] + piRef[5] + bRound) >> 1 ; uiSum += abs( piOrg[5] - pred ); 1681 pred = (piCur[iStep6] + piRef[6] + bRound) >> 1 ; uiSum += abs( piOrg[6] - pred ); 1682 pred = (piCur[iStep7] + piRef[7] + bRound) >> 1 ; uiSum += abs( piOrg[7] - pred ); 1683 pred = (piCur[iStep8] + piRef[8] + bRound) >> 1 ; uiSum += abs( piOrg[8] - pred ); 1684 pred = (piCur[iStep9] + piRef[9] + bRound) >> 1 ; uiSum += abs( piOrg[9] - pred ); 1685 pred = (piCur[iStep10] + piRef[10] + bRound) >> 1 ; uiSum += abs( piOrg[10] - pred ); 1686 pred = (piCur[iStep11] + piRef[11] + bRound) >> 1 ; uiSum += abs( piOrg[11] - pred ); 1687 pred = (piCur[iStep12] + piRef[12] + bRound) >> 1 ; uiSum += abs( piOrg[12] - pred ); 1688 pred = (piCur[iStep13] + piRef[13] + bRound) >> 1 ; uiSum += abs( piOrg[13] - pred ); 1689 pred = (piCur[iStep14] + piRef[14] + bRound) >> 1 ; uiSum += abs( piOrg[14] - pred ); 1690 pred = (piCur[iStep15] + piRef[15] + bRound) >> 1 ; uiSum += abs( piOrg[15] - pred ); 1691 pred = (piCur[iStep16] + piRef[16] + bRound) >> 1 ; uiSum += abs( piOrg[16] - pred ); 1692 pred = (piCur[iStep17] + piRef[17] + bRound) >> 1 ; uiSum += abs( piOrg[17] - pred ); 1693 pred = (piCur[iStep18] + piRef[18] + bRound) >> 1 ; uiSum += abs( piOrg[18] - pred ); 1694 pred = (piCur[iStep19] + piRef[19] + bRound) >> 1 ; uiSum += abs( piOrg[19] - pred ); 1695 pred = (piCur[iStep20] + piRef[20] + bRound) >> 1 ; uiSum += abs( piOrg[20] - pred ); 1696 pred = (piCur[iStep21] + piRef[21] + bRound) >> 1 ; uiSum += abs( piOrg[21] - pred ); 1697 pred = (piCur[iStep22] + piRef[22] + bRound) >> 1 ; uiSum += abs( piOrg[22] - pred ); 1698 pred = (piCur[iStep23] + piRef[23] + bRound) >> 1 ; uiSum += abs( piOrg[23] - pred ); 1699 pred = (piCur[iStep24] + piRef[24] + bRound) >> 1 ; uiSum += abs( piOrg[24] - pred ); 1700 pred = (piCur[iStep25] + piRef[25] + bRound) >> 1 ; uiSum += abs( piOrg[25] - pred ); 1701 pred = (piCur[iStep26] + piRef[26] + bRound) >> 1 ; uiSum += abs( piOrg[26] - pred ); 1702 pred = (piCur[iStep27] + piRef[27] + bRound) >> 1 ; uiSum += abs( piOrg[27] - pred ); 1703 pred = (piCur[iStep28] + piRef[28] + bRound) >> 1 ; uiSum += abs( piOrg[28] - pred ); 1704 pred = (piCur[iStep29] + piRef[29] + bRound) >> 1 ; uiSum += abs( piOrg[29] - pred ); 1705 pred = (piCur[iStep30] + piRef[30] + bRound) >> 1 ; uiSum += abs( piOrg[30] - pred ); 1706 pred = (piCur[iStep31] + piRef[31] + bRound) >> 1 ; uiSum += abs( piOrg[31] - pred ); 1707 1708 1709 piOrg += iStrideOrg; 1710 piCur += iStrideCur; 1711 piRef += iStrideRef; 1712 } 1713 1714 return ( uiSum >> g_uiBitIncrement ); 1715 } 1716 1717 UInt TComRdCost::xGetSADs64( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 1718 { 1719 Pel* piOrg = pcDtParam->pOrg; 1720 Pel* piCur = pcDtParam->pCur; 1721 Pel* piRef = pRefY; 1722 Int iRows = pcDtParam->iRows; 1723 Int iStrideCur = pcDtParam->iStrideCur; 1724 Int iStrideOrg = pcDtParam->iStrideOrg; 1725 Int iStrideRef = pcDtParam->iCols; 1726 Int iStep = pcDtParam->iStep; 1727 Int iStep2 = iStep<<1; 1728 Int iStep3 = iStep2 + iStep; 1729 Int iStep4 = iStep3 + iStep; 1730 Int iStep5 = iStep4 + iStep; 1731 Int iStep6 = iStep5 + iStep; 1732 Int iStep7 = iStep6 + iStep; 1733 Int iStep8 = iStep7 + iStep; 1734 Int iStep9 = iStep8 + iStep; 1735 Int iStep10 = iStep9 + iStep; 1736 Int iStep11 = iStep10 + iStep; 1737 Int iStep12 = iStep11 + iStep; 1738 Int iStep13 = iStep12 + iStep; 1739 Int iStep14 = iStep13 + iStep; 1740 Int iStep15 = iStep14 + iStep; 1741 Int iStep16 = iStep15 + iStep; 1742 Int iStep17 = iStep16 + iStep; 1743 Int iStep18 = iStep17 + iStep; 1744 Int iStep19 = iStep18 + iStep; 1745 Int iStep20 = iStep19 + iStep; 1746 Int iStep21 = iStep20 + iStep; 1747 Int iStep22 = iStep21 + iStep; 1748 Int iStep23 = iStep22 + iStep; 1749 Int iStep24 = iStep23 + iStep; 1750 Int iStep25 = iStep24 + iStep; 1751 Int iStep26 = iStep25 + iStep; 1752 Int iStep27 = iStep26 + iStep; 1753 Int iStep28 = iStep27 + iStep; 1754 Int iStep29 = iStep28 + iStep; 1755 Int iStep30 = iStep29 + iStep; 1756 Int iStep31 = iStep30 + iStep; 1757 Int iStep32 = iStep31 + iStep; 1758 Int iStep33 = iStep32 + iStep; 1759 Int iStep34 = iStep33 + iStep; 1760 Int iStep35 = iStep34 + iStep; 1761 Int iStep36 = iStep35 + iStep; 1762 Int iStep37 = iStep36 + iStep; 1763 Int iStep38 = iStep37 + iStep; 1764 Int iStep39 = iStep38 + iStep; 1765 Int iStep40 = iStep39 + iStep; 1766 Int iStep41 = iStep40 + iStep; 1767 Int iStep42 = iStep41 + iStep; 1768 Int iStep43 = iStep42 + iStep; 1769 Int iStep44 = iStep43 + iStep; 1770 Int iStep45 = iStep44 + iStep; 1771 Int iStep46 = iStep45 + iStep; 1772 Int iStep47 = iStep46 + iStep; 1773 Int iStep48 = iStep47 + iStep; 1774 Int iStep49 = iStep48 + iStep; 1775 Int iStep50 = iStep49 + iStep; 1776 Int iStep51 = iStep50 + iStep; 1777 Int iStep52 = iStep51 + iStep; 1778 Int iStep53 = iStep52 + iStep; 1779 Int iStep54 = iStep53 + iStep; 1780 Int iStep55 = iStep54 + iStep; 1781 Int iStep56 = iStep55 + iStep; 1782 Int iStep57 = iStep56 + iStep; 1783 Int iStep58 = iStep57 + iStep; 1784 Int iStep59 = iStep58 + iStep; 1785 Int iStep60 = iStep59 + iStep; 1786 Int iStep61 = iStep60 + iStep; 1787 Int iStep62 = iStep61 + iStep; 1788 Int iStep63 = iStep62 + iStep; 1789 Pel pred; 1790 1791 UInt uiSum = 0; 1792 1793 for( ; iRows != 0; iRows-- ) 1794 { 1795 pred = (piCur[0] + piRef[0] + bRound) >> 1 ; uiSum += abs( piOrg[0] - pred ); 1796 pred = (piCur[iStep ] + piRef[1] + bRound) >> 1 ; uiSum += abs( piOrg[1] - pred ); 1797 pred = (piCur[iStep2] + piRef[2] + bRound) >> 1 ; uiSum += abs( piOrg[2] - pred ); 1798 pred = (piCur[iStep3] + piRef[3] + bRound) >> 1 ; uiSum += abs( piOrg[3] - pred ); 1799 pred = (piCur[iStep4] + piRef[4] + bRound) >> 1 ; uiSum += abs( piOrg[4] - pred ); 1800 pred = (piCur[iStep5] + piRef[5] + bRound) >> 1 ; uiSum += abs( piOrg[5] - pred ); 1801 pred = (piCur[iStep6] + piRef[6] + bRound) >> 1 ; uiSum += abs( piOrg[6] - pred ); 1802 pred = (piCur[iStep7] + piRef[7] + bRound) >> 1 ; uiSum += abs( piOrg[7] - pred ); 1803 pred = (piCur[iStep8] + piRef[8] + bRound) >> 1 ; uiSum += abs( piOrg[8] - pred ); 1804 pred = (piCur[iStep9] + piRef[9] + bRound) >> 1 ; uiSum += abs( piOrg[9] - pred ); 1805 1806 pred = (piCur[iStep10] + piRef[10] + bRound) >> 1 ; uiSum += abs( piOrg[10] - pred ); 1807 pred = (piCur[iStep11] + piRef[11] + bRound) >> 1 ; uiSum += abs( piOrg[11] - pred ); 1808 pred = (piCur[iStep12] + piRef[12] + bRound) >> 1 ; uiSum += abs( piOrg[12] - pred ); 1809 pred = (piCur[iStep13] + piRef[13] + bRound) >> 1 ; uiSum += abs( piOrg[13] - pred ); 1810 pred = (piCur[iStep14] + piRef[14] + bRound) >> 1 ; uiSum += abs( piOrg[14] - pred ); 1811 pred = (piCur[iStep15] + piRef[15] + bRound) >> 1 ; uiSum += abs( piOrg[15] - pred ); 1812 pred = (piCur[iStep16] + piRef[16] + bRound) >> 1 ; uiSum += abs( piOrg[16] - pred ); 1813 pred = (piCur[iStep17] + piRef[17] + bRound) >> 1 ; uiSum += abs( piOrg[17] - pred ); 1814 pred = (piCur[iStep18] + piRef[18] + bRound) >> 1 ; uiSum += abs( piOrg[18] - pred ); 1815 pred = (piCur[iStep19] + piRef[19] + bRound) >> 1 ; uiSum += abs( piOrg[19] - pred ); 1816 pred = (piCur[iStep20] + piRef[20] + bRound) >> 1 ; uiSum += abs( piOrg[20] - pred ); 1817 1818 pred = (piCur[iStep21] + piRef[21] + bRound) >> 1 ; uiSum += abs( piOrg[21] - pred ); 1819 pred = (piCur[iStep22] + piRef[22] + bRound) >> 1 ; uiSum += abs( piOrg[22] - pred ); 1820 pred = (piCur[iStep23] + piRef[23] + bRound) >> 1 ; uiSum += abs( piOrg[23] - pred ); 1821 pred = (piCur[iStep24] + piRef[24] + bRound) >> 1 ; uiSum += abs( piOrg[24] - pred ); 1822 pred = (piCur[iStep25] + piRef[25] + bRound) >> 1 ; uiSum += abs( piOrg[25] - pred ); 1823 pred = (piCur[iStep26] + piRef[26] + bRound) >> 1 ; uiSum += abs( piOrg[26] - pred ); 1824 pred = (piCur[iStep27] + piRef[27] + bRound) >> 1 ; uiSum += abs( piOrg[27] - pred ); 1825 pred = (piCur[iStep28] + piRef[28] + bRound) >> 1 ; uiSum += abs( piOrg[28] - pred ); 1826 pred = (piCur[iStep29] + piRef[29] + bRound) >> 1 ; uiSum += abs( piOrg[29] - pred ); 1827 1828 pred = (piCur[iStep30] + piRef[30] + bRound) >> 1 ; uiSum += abs( piOrg[30] - pred ); 1829 pred = (piCur[iStep31] + piRef[31] + bRound) >> 1 ; uiSum += abs( piOrg[31] - pred ); 1830 pred = (piCur[iStep32] + piRef[32] + bRound) >> 1 ; uiSum += abs( piOrg[32] - pred ); 1831 pred = (piCur[iStep33] + piRef[33] + bRound) >> 1 ; uiSum += abs( piOrg[33] - pred ); 1832 pred = (piCur[iStep34] + piRef[34] + bRound) >> 1 ; uiSum += abs( piOrg[34] - pred ); 1833 pred = (piCur[iStep35] + piRef[35] + bRound) >> 1 ; uiSum += abs( piOrg[35] - pred ); 1834 pred = (piCur[iStep36] + piRef[36] + bRound) >> 1 ; uiSum += abs( piOrg[36] - pred ); 1835 pred = (piCur[iStep37] + piRef[37] + bRound) >> 1 ; uiSum += abs( piOrg[37] - pred ); 1836 pred = (piCur[iStep38] + piRef[38] + bRound) >> 1 ; uiSum += abs( piOrg[38] - pred ); 1837 pred = (piCur[iStep39] + piRef[39] + bRound) >> 1 ; uiSum += abs( piOrg[39] - pred ); 1838 1839 pred = (piCur[iStep40] + piRef[40] + bRound) >> 1 ; uiSum += abs( piOrg[40] - pred ); 1840 pred = (piCur[iStep41] + piRef[41] + bRound) >> 1 ; uiSum += abs( piOrg[41] - pred ); 1841 pred = (piCur[iStep42] + piRef[42] + bRound) >> 1 ; uiSum += abs( piOrg[42] - pred ); 1842 pred = (piCur[iStep43] + piRef[43] + bRound) >> 1 ; uiSum += abs( piOrg[43] - pred ); 1843 pred = (piCur[iStep44] + piRef[44] + bRound) >> 1 ; uiSum += abs( piOrg[44] - pred ); 1844 pred = (piCur[iStep45] + piRef[45] + bRound) >> 1 ; uiSum += abs( piOrg[45] - pred ); 1845 pred = (piCur[iStep46] + piRef[46] + bRound) >> 1 ; uiSum += abs( piOrg[46] - pred ); 1846 pred = (piCur[iStep47] + piRef[47] + bRound) >> 1 ; uiSum += abs( piOrg[47] - pred ); 1847 pred = (piCur[iStep48] + piRef[48] + bRound) >> 1 ; uiSum += abs( piOrg[48] - pred ); 1848 pred = (piCur[iStep49] + piRef[49] + bRound) >> 1 ; uiSum += abs( piOrg[49] - pred ); 1849 1850 pred = (piCur[iStep50] + piRef[50] + bRound) >> 1 ; uiSum += abs( piOrg[50] - pred ); 1851 pred = (piCur[iStep51] + piRef[51] + bRound) >> 1 ; uiSum += abs( piOrg[51] - pred ); 1852 pred = (piCur[iStep52] + piRef[52] + bRound) >> 1 ; uiSum += abs( piOrg[52] - pred ); 1853 pred = (piCur[iStep53] + piRef[53] + bRound) >> 1 ; uiSum += abs( piOrg[53] - pred ); 1854 pred = (piCur[iStep54] + piRef[54] + bRound) >> 1 ; uiSum += abs( piOrg[54] - pred ); 1855 pred = (piCur[iStep55] + piRef[55] + bRound) >> 1 ; uiSum += abs( piOrg[55] - pred ); 1856 pred = (piCur[iStep56] + piRef[56] + bRound) >> 1 ; uiSum += abs( piOrg[56] - pred ); 1857 pred = (piCur[iStep57] + piRef[57] + bRound) >> 1 ; uiSum += abs( piOrg[57] - pred ); 1858 pred = (piCur[iStep58] + piRef[58] + bRound) >> 1 ; uiSum += abs( piOrg[58] - pred ); 1859 pred = (piCur[iStep59] + piRef[59] + bRound) >> 1 ; uiSum += abs( piOrg[59] - pred ); 1860 1861 pred = (piCur[iStep60] + piRef[60] + bRound) >> 1 ; uiSum += abs( piOrg[60] - pred ); 1862 pred = (piCur[iStep61] + piRef[61] + bRound) >> 1 ; uiSum += abs( piOrg[61] - pred ); 1863 pred = (piCur[iStep62] + piRef[62] + bRound) >> 1 ; uiSum += abs( piOrg[62] - pred ); 1864 pred = (piCur[iStep63] + piRef[63] + bRound) >> 1 ; uiSum += abs( piOrg[63] - pred ); 1865 1866 piOrg += iStrideOrg; 1867 piCur += iStrideCur; 1868 piRef += iStrideRef; 1869 } 1870 1871 return ( uiSum >> g_uiBitIncrement ); 1872 } 1873 #endif 1874 1875 UInt TComRdCost::xGetSADs( DistParam* pcDtParam ) 1876 { 1877 #ifdef WEIGHT_PRED 1878 if ( pcDtParam->applyWeight ) 1879 { 1880 return xGetSADsw( pcDtParam ); 1881 } 1882 #endif 1883 Pel* piOrg = pcDtParam->pOrg; 1884 Pel* piCur = pcDtParam->pCur; 1885 Int iRows = pcDtParam->iRows; 1886 Int iCols = pcDtParam->iCols; 1887 Int iStrideCur = pcDtParam->iStrideCur; 1888 Int iStrideOrg = pcDtParam->iStrideOrg; 1889 Int iStep = pcDtParam->iStep; 1890 1891 UInt uiSum = 0; 1892 1893 for( ; iRows != 0; iRows-- ) 1894 { 1895 for (Int n = 0; n < iCols; n++ ) 1896 { 1897 uiSum += abs( piOrg[n] - piCur[n*iStep] ); 1898 } 1899 piOrg += iStrideOrg; 1900 piCur += iStrideCur; 1901 } 1902 1903 return ( uiSum >> g_uiBitIncrement ); 1904 } 1905 1906 UInt TComRdCost::xGetSADs4( DistParam* pcDtParam ) 1907 { 1908 #ifdef WEIGHT_PRED 1909 if ( pcDtParam->applyWeight ) 1910 { 1911 return xGetSADs4w( pcDtParam ); 1912 } 1913 #endif 1914 Pel* piOrg = pcDtParam->pOrg; 1915 Pel* piCur = pcDtParam->pCur; 1916 Int iRows = pcDtParam->iRows; 1917 Int iStrideCur = pcDtParam->iStrideCur; 1918 Int iStrideOrg = pcDtParam->iStrideOrg; 1919 Int iStep = pcDtParam->iStep; 1920 Int iStep2 = iStep<<1; 1921 Int iStep3 = iStep2 + iStep; 1922 1923 UInt uiSum = 0; 1924 1925 for( ; iRows != 0; iRows-- ) 1926 { 1927 uiSum += abs( piOrg[0] - piCur[ 0] ); 1928 uiSum += abs( piOrg[1] - piCur[iStep ] ); 1929 uiSum += abs( piOrg[2] - piCur[iStep2] ); 1930 uiSum += abs( piOrg[3] - piCur[iStep3] ); 1931 1932 piOrg += iStrideOrg; 1933 piCur += iStrideCur; 1934 } 1935 1936 return ( uiSum >> g_uiBitIncrement ); 1937 } 1938 1939 UInt TComRdCost::xGetSADs8( DistParam* pcDtParam ) 1940 { 1941 #ifdef WEIGHT_PRED 1942 if ( pcDtParam->applyWeight ) 1943 { 1944 return xGetSADs8w( pcDtParam ); 1945 } 1946 #endif 1947 Pel* piOrg = pcDtParam->pOrg; 1948 Pel* piCur = pcDtParam->pCur; 1949 Int iRows = pcDtParam->iRows; 1950 Int iStrideCur = pcDtParam->iStrideCur; 1951 Int iStrideOrg = pcDtParam->iStrideOrg; 1952 Int iStep = pcDtParam->iStep; 1953 Int iStep2 = iStep<<1; 1954 Int iStep3 = iStep2 + iStep; 1955 Int iStep4 = iStep3 + iStep; 1956 Int iStep5 = iStep4 + iStep; 1957 Int iStep6 = iStep5 + iStep; 1958 Int iStep7 = iStep6 + iStep; 1959 1960 UInt uiSum = 0; 1961 1962 for( ; iRows != 0; iRows-- ) 1963 { 1964 uiSum += abs( piOrg[0] - piCur[ 0] ); 1965 uiSum += abs( piOrg[1] - piCur[iStep ] ); 1966 uiSum += abs( piOrg[2] - piCur[iStep2] ); 1967 uiSum += abs( piOrg[3] - piCur[iStep3] ); 1968 uiSum += abs( piOrg[4] - piCur[iStep4] ); 1969 uiSum += abs( piOrg[5] - piCur[iStep5] ); 1970 uiSum += abs( piOrg[6] - piCur[iStep6] ); 1971 uiSum += abs( piOrg[7] - piCur[iStep7] ); 1972 1973 piOrg += iStrideOrg; 1974 piCur += iStrideCur; 1975 } 1976 1977 return ( uiSum >> g_uiBitIncrement ); 1978 } 1979 1980 UInt TComRdCost::xGetSADs16( DistParam* pcDtParam ) 1981 { 1982 #ifdef WEIGHT_PRED 1983 if ( pcDtParam->applyWeight ) 1984 { 1985 return xGetSADs16w( pcDtParam ); 1986 } 1987 #endif 1988 Pel* piOrg = pcDtParam->pOrg; 1989 Pel* piCur = pcDtParam->pCur; 1990 Int iRows = pcDtParam->iRows; 1991 Int iStrideCur = pcDtParam->iStrideCur; 1992 Int iStrideOrg = pcDtParam->iStrideOrg; 1993 Int iStep = pcDtParam->iStep; 1994 Int iStep2 = iStep<<1; 1995 Int iStep3 = iStep2 + iStep; 1996 Int iStep4 = iStep3 + iStep; 1997 Int iStep5 = iStep4 + iStep; 1998 Int iStep6 = iStep5 + iStep; 1999 Int iStep7 = iStep6 + iStep; 2000 Int iStep8 = iStep7 + iStep; 2001 Int iStep9 = iStep8 + iStep; 2002 Int iStep10 = iStep9 + iStep; 2003 Int iStep11 = iStep10 + iStep; 2004 Int iStep12 = iStep11 + iStep; 2005 Int iStep13 = iStep12 + iStep; 2006 Int iStep14 = iStep13 + iStep; 2007 Int iStep15 = iStep14 + iStep; 2008 2009 UInt uiSum = 0; 2010 2011 for( ; iRows != 0; iRows-- ) 2012 { 2013 uiSum += abs( piOrg[ 0] - piCur[ 0] ); 2014 uiSum += abs( piOrg[ 1] - piCur[iStep ] ); 2015 uiSum += abs( piOrg[ 2] - piCur[iStep2 ] ); 2016 uiSum += abs( piOrg[ 3] - piCur[iStep3 ] ); 2017 uiSum += abs( piOrg[ 4] - piCur[iStep4 ] ); 2018 uiSum += abs( piOrg[ 5] - piCur[iStep5 ] ); 2019 uiSum += abs( piOrg[ 6] - piCur[iStep6 ] ); 2020 uiSum += abs( piOrg[ 7] - piCur[iStep7 ] ); 2021 uiSum += abs( piOrg[ 8] - piCur[iStep8 ] ); 2022 uiSum += abs( piOrg[ 9] - piCur[iStep9 ] ); 2023 uiSum += abs( piOrg[10] - piCur[iStep10] ); 2024 uiSum += abs( piOrg[11] - piCur[iStep11] ); 2025 uiSum += abs( piOrg[12] - piCur[iStep12] ); 2026 uiSum += abs( piOrg[13] - piCur[iStep13] ); 2027 uiSum += abs( piOrg[14] - piCur[iStep14] ); 2028 uiSum += abs( piOrg[15] - piCur[iStep15] ); 2029 2030 piOrg += iStrideOrg; 2031 piCur += iStrideCur; 2032 } 2033 2034 return ( uiSum >> g_uiBitIncrement ); 2035 } 2036 2037 UInt TComRdCost::xGetSADs16N( DistParam* pcDtParam ) 2038 { 2039 #ifdef WEIGHT_PRED 2040 if ( pcDtParam->applyWeight ) 2041 { 2042 return xGetSADs16Nw( pcDtParam ); 2043 } 2044 #endif 2045 Pel* piOrg = pcDtParam->pOrg; 2046 Pel* piCur = pcDtParam->pCur; 2047 Int iRows = pcDtParam->iRows; 2048 Int iCols = pcDtParam->iCols; 2049 Int iStrideCur = pcDtParam->iStrideCur; 2050 Int iStrideOrg = pcDtParam->iStrideOrg; 2051 Int iStep = pcDtParam->iStep; 2052 2053 UInt uiSum = 0; 2054 2055 for( ; iRows != 0; iRows-- ) 2056 { 2057 for (Int n = 0; n < iCols; n+=16 ) 2058 { 2059 uiSum += abs( piOrg[n +0] - piCur[iStep*(n +0)] ); 2060 uiSum += abs( piOrg[n +1] - piCur[iStep*(n +1)] ); 2061 uiSum += abs( piOrg[n +2] - piCur[iStep*(n +2)] ); 2062 uiSum += abs( piOrg[n +3] - piCur[iStep*(n +3)] ); 2063 uiSum += abs( piOrg[n +4] - piCur[iStep*(n +4)] ); 2064 uiSum += abs( piOrg[n +5] - piCur[iStep*(n +5)] ); 2065 uiSum += abs( piOrg[n +6] - piCur[iStep*(n +6)] ); 2066 uiSum += abs( piOrg[n +7] - piCur[iStep*(n +7)] ); 2067 uiSum += abs( piOrg[n +8] - piCur[iStep*(n +8)] ); 2068 uiSum += abs( piOrg[n +9] - piCur[iStep*(n +9)] ); 2069 uiSum += abs( piOrg[n+10] - piCur[iStep*(n+10)] ); 2070 uiSum += abs( piOrg[n+11] - piCur[iStep*(n+11)] ); 2071 uiSum += abs( piOrg[n+12] - piCur[iStep*(n+12)] ); 2072 uiSum += abs( piOrg[n+13] - piCur[iStep*(n+13)] ); 2073 uiSum += abs( piOrg[n+14] - piCur[iStep*(n+14)] ); 2074 uiSum += abs( piOrg[n+15] - piCur[iStep*(n+15)] ); 2075 } 2076 piOrg += iStrideOrg; 2077 piCur += iStrideCur; 2078 } 2079 2080 return ( uiSum >> g_uiBitIncrement ); 2081 } 2082 2083 UInt TComRdCost::xGetSADs32( DistParam* pcDtParam ) 2084 { 2085 #ifdef WEIGHT_PRED 2086 if ( pcDtParam->applyWeight ) 2087 { 2088 return xGetSADs32w( pcDtParam ); 2089 } 2090 #endif 2091 Pel* piOrg = pcDtParam->pOrg; 2092 Pel* piCur = pcDtParam->pCur; 2093 Int iRows = pcDtParam->iRows; 2094 Int iStrideCur = pcDtParam->iStrideCur; 2095 Int iStrideOrg = pcDtParam->iStrideOrg; 2096 Int iStep = pcDtParam->iStep; 2097 Int iStep2 = iStep<<1; 2098 Int iStep3 = iStep2 + iStep; 2099 Int iStep4 = iStep3 + iStep; 2100 Int iStep5 = iStep4 + iStep; 2101 Int iStep6 = iStep5 + iStep; 2102 Int iStep7 = iStep6 + iStep; 2103 Int iStep8 = iStep7 + iStep; 2104 Int iStep9 = iStep8 + iStep; 2105 Int iStep10 = iStep9 + iStep; 2106 Int iStep11 = iStep10 + iStep; 2107 Int iStep12 = iStep11 + iStep; 2108 Int iStep13 = iStep12 + iStep; 2109 Int iStep14 = iStep13 + iStep; 2110 Int iStep15 = iStep14 + iStep; 2111 Int iStep16 = iStep15 + iStep; 2112 Int iStep17 = iStep16 + iStep; 2113 Int iStep18 = iStep17 + iStep; 2114 Int iStep19 = iStep18 + iStep; 2115 Int iStep20 = iStep19 + iStep; 2116 Int iStep21 = iStep20 + iStep; 2117 Int iStep22 = iStep21 + iStep; 2118 Int iStep23 = iStep22 + iStep; 2119 Int iStep24 = iStep23 + iStep; 2120 Int iStep25 = iStep24 + iStep; 2121 Int iStep26 = iStep25 + iStep; 2122 Int iStep27 = iStep26 + iStep; 2123 Int iStep28 = iStep27 + iStep; 2124 Int iStep29 = iStep28 + iStep; 2125 Int iStep30 = iStep29 + iStep; 2126 Int iStep31 = iStep30 + iStep; 2127 2128 UInt uiSum = 0; 2129 2130 for( ; iRows != 0; iRows-- ) 2131 { 2132 uiSum += abs( piOrg[ 0] - piCur[ 0] ); 2133 uiSum += abs( piOrg[ 1] - piCur[iStep ] ); 2134 uiSum += abs( piOrg[ 2] - piCur[iStep2 ] ); 2135 uiSum += abs( piOrg[ 3] - piCur[iStep3 ] ); 2136 uiSum += abs( piOrg[ 4] - piCur[iStep4 ] ); 2137 uiSum += abs( piOrg[ 5] - piCur[iStep5 ] ); 2138 uiSum += abs( piOrg[ 6] - piCur[iStep6 ] ); 2139 uiSum += abs( piOrg[ 7] - piCur[iStep7 ] ); 2140 uiSum += abs( piOrg[ 8] - piCur[iStep8 ] ); 2141 uiSum += abs( piOrg[ 9] - piCur[iStep9 ] ); 2142 uiSum += abs( piOrg[10] - piCur[iStep10] ); 2143 uiSum += abs( piOrg[11] - piCur[iStep11] ); 2144 uiSum += abs( piOrg[12] - piCur[iStep12] ); 2145 uiSum += abs( piOrg[13] - piCur[iStep13] ); 2146 uiSum += abs( piOrg[14] - piCur[iStep14] ); 2147 uiSum += abs( piOrg[15] - piCur[iStep15] ); 2148 uiSum += abs( piOrg[16] - piCur[iStep16] ); 2149 uiSum += abs( piOrg[17] - piCur[iStep17] ); 2150 uiSum += abs( piOrg[18] - piCur[iStep18] ); 2151 uiSum += abs( piOrg[19] - piCur[iStep19] ); 2152 uiSum += abs( piOrg[20] - piCur[iStep20] ); 2153 uiSum += abs( piOrg[21] - piCur[iStep21] ); 2154 uiSum += abs( piOrg[22] - piCur[iStep22] ); 2155 uiSum += abs( piOrg[23] - piCur[iStep23] ); 2156 uiSum += abs( piOrg[24] - piCur[iStep24] ); 2157 uiSum += abs( piOrg[25] - piCur[iStep25] ); 2158 uiSum += abs( piOrg[26] - piCur[iStep26] ); 2159 uiSum += abs( piOrg[27] - piCur[iStep27] ); 2160 uiSum += abs( piOrg[28] - piCur[iStep28] ); 2161 uiSum += abs( piOrg[29] - piCur[iStep29] ); 2162 uiSum += abs( piOrg[30] - piCur[iStep30] ); 2163 uiSum += abs( piOrg[31] - piCur[iStep31] ); 2164 2165 piOrg += iStrideOrg; 2166 piCur += iStrideCur; 2167 } 2168 2169 return ( uiSum >> g_uiBitIncrement ); 2170 } 2171 2172 UInt TComRdCost::xGetSADs64( DistParam* pcDtParam ) 2173 { 2174 #ifdef WEIGHT_PRED 2175 if ( pcDtParam->applyWeight ) 2176 { 2177 return xGetSADs64w( pcDtParam ); 2178 } 2179 #endif 2180 Pel* piOrg = pcDtParam->pOrg; 2181 Pel* piCur = pcDtParam->pCur; 2182 Int iRows = pcDtParam->iRows; 2183 Int iStrideCur = pcDtParam->iStrideCur; 2184 Int iStrideOrg = pcDtParam->iStrideOrg; 2185 Int iStep = pcDtParam->iStep; 2186 Int iStep2 = iStep<<1; 2187 Int iStep3 = iStep2 + iStep; 2188 Int iStep4 = iStep3 + iStep; 2189 Int iStep5 = iStep4 + iStep; 2190 Int iStep6 = iStep5 + iStep; 2191 Int iStep7 = iStep6 + iStep; 2192 Int iStep8 = iStep7 + iStep; 2193 Int iStep9 = iStep8 + iStep; 2194 Int iStep10 = iStep9 + iStep; 2195 Int iStep11 = iStep10 + iStep; 2196 Int iStep12 = iStep11 + iStep; 2197 Int iStep13 = iStep12 + iStep; 2198 Int iStep14 = iStep13 + iStep; 2199 Int iStep15 = iStep14 + iStep; 2200 Int iStep16 = iStep15 + iStep; 2201 Int iStep17 = iStep16 + iStep; 2202 Int iStep18 = iStep17 + iStep; 2203 Int iStep19 = iStep18 + iStep; 2204 Int iStep20 = iStep19 + iStep; 2205 Int iStep21 = iStep20 + iStep; 2206 Int iStep22 = iStep21 + iStep; 2207 Int iStep23 = iStep22 + iStep; 2208 Int iStep24 = iStep23 + iStep; 2209 Int iStep25 = iStep24 + iStep; 2210 Int iStep26 = iStep25 + iStep; 2211 Int iStep27 = iStep26 + iStep; 2212 Int iStep28 = iStep27 + iStep; 2213 Int iStep29 = iStep28 + iStep; 2214 Int iStep30 = iStep29 + iStep; 2215 Int iStep31 = iStep30 + iStep; 2216 Int iStep32 = iStep31 + iStep; 2217 Int iStep33 = iStep32 + iStep; 2218 Int iStep34 = iStep33 + iStep; 2219 Int iStep35 = iStep34 + iStep; 2220 Int iStep36 = iStep35 + iStep; 2221 Int iStep37 = iStep36 + iStep; 2222 Int iStep38 = iStep37 + iStep; 2223 Int iStep39 = iStep38 + iStep; 2224 Int iStep40 = iStep39 + iStep; 2225 Int iStep41 = iStep40 + iStep; 2226 Int iStep42 = iStep41 + iStep; 2227 Int iStep43 = iStep42 + iStep; 2228 Int iStep44 = iStep43 + iStep; 2229 Int iStep45 = iStep44 + iStep; 2230 Int iStep46 = iStep45 + iStep; 2231 Int iStep47 = iStep46 + iStep; 2232 Int iStep48 = iStep47 + iStep; 2233 Int iStep49 = iStep48 + iStep; 2234 Int iStep50 = iStep49 + iStep; 2235 Int iStep51 = iStep50 + iStep; 2236 Int iStep52 = iStep51 + iStep; 2237 Int iStep53 = iStep52 + iStep; 2238 Int iStep54 = iStep53 + iStep; 2239 Int iStep55 = iStep54 + iStep; 2240 Int iStep56 = iStep55 + iStep; 2241 Int iStep57 = iStep56 + iStep; 2242 Int iStep58 = iStep57 + iStep; 2243 Int iStep59 = iStep58 + iStep; 2244 Int iStep60 = iStep59 + iStep; 2245 Int iStep61 = iStep60 + iStep; 2246 Int iStep62 = iStep61 + iStep; 2247 Int iStep63 = iStep62 + iStep; 2248 2249 UInt uiSum = 0; 2250 2251 for( ; iRows != 0; iRows-- ) 2252 { 2253 uiSum += abs( piOrg[ 0] - piCur[ 0] ); 2254 uiSum += abs( piOrg[ 1] - piCur[iStep ] ); 2255 uiSum += abs( piOrg[ 2] - piCur[iStep2 ] ); 2256 uiSum += abs( piOrg[ 3] - piCur[iStep3 ] ); 2257 uiSum += abs( piOrg[ 4] - piCur[iStep4 ] ); 2258 uiSum += abs( piOrg[ 5] - piCur[iStep5 ] ); 2259 uiSum += abs( piOrg[ 6] - piCur[iStep6 ] ); 2260 uiSum += abs( piOrg[ 7] - piCur[iStep7 ] ); 2261 uiSum += abs( piOrg[ 8] - piCur[iStep8 ] ); 2262 uiSum += abs( piOrg[ 9] - piCur[iStep9 ] ); 2263 uiSum += abs( piOrg[10] - piCur[iStep10] ); 2264 uiSum += abs( piOrg[11] - piCur[iStep11] ); 2265 uiSum += abs( piOrg[12] - piCur[iStep12] ); 2266 uiSum += abs( piOrg[13] - piCur[iStep13] ); 2267 uiSum += abs( piOrg[14] - piCur[iStep14] ); 2268 uiSum += abs( piOrg[15] - piCur[iStep15] ); 2269 uiSum += abs( piOrg[16] - piCur[iStep16] ); 2270 uiSum += abs( piOrg[17] - piCur[iStep17] ); 2271 uiSum += abs( piOrg[18] - piCur[iStep18] ); 2272 uiSum += abs( piOrg[19] - piCur[iStep19] ); 2273 uiSum += abs( piOrg[20] - piCur[iStep20] ); 2274 uiSum += abs( piOrg[21] - piCur[iStep21] ); 2275 uiSum += abs( piOrg[22] - piCur[iStep22] ); 2276 uiSum += abs( piOrg[23] - piCur[iStep23] ); 2277 uiSum += abs( piOrg[24] - piCur[iStep24] ); 2278 uiSum += abs( piOrg[25] - piCur[iStep25] ); 2279 uiSum += abs( piOrg[26] - piCur[iStep26] ); 2280 uiSum += abs( piOrg[27] - piCur[iStep27] ); 2281 uiSum += abs( piOrg[28] - piCur[iStep28] ); 2282 uiSum += abs( piOrg[29] - piCur[iStep29] ); 2283 uiSum += abs( piOrg[30] - piCur[iStep30] ); 2284 uiSum += abs( piOrg[31] - piCur[iStep31] ); 2285 uiSum += abs( piOrg[32] - piCur[iStep32] ); 2286 uiSum += abs( piOrg[33] - piCur[iStep33] ); 2287 uiSum += abs( piOrg[34] - piCur[iStep34] ); 2288 uiSum += abs( piOrg[35] - piCur[iStep35] ); 2289 uiSum += abs( piOrg[36] - piCur[iStep36] ); 2290 uiSum += abs( piOrg[37] - piCur[iStep37] ); 2291 uiSum += abs( piOrg[38] - piCur[iStep38] ); 2292 uiSum += abs( piOrg[39] - piCur[iStep39] ); 2293 uiSum += abs( piOrg[40] - piCur[iStep40] ); 2294 uiSum += abs( piOrg[41] - piCur[iStep41] ); 2295 uiSum += abs( piOrg[42] - piCur[iStep42] ); 2296 uiSum += abs( piOrg[43] - piCur[iStep43] ); 2297 uiSum += abs( piOrg[44] - piCur[iStep44] ); 2298 uiSum += abs( piOrg[45] - piCur[iStep45] ); 2299 uiSum += abs( piOrg[46] - piCur[iStep46] ); 2300 uiSum += abs( piOrg[47] - piCur[iStep47] ); 2301 uiSum += abs( piOrg[48] - piCur[iStep48] ); 2302 uiSum += abs( piOrg[49] - piCur[iStep49] ); 2303 uiSum += abs( piOrg[50] - piCur[iStep50] ); 2304 uiSum += abs( piOrg[51] - piCur[iStep51] ); 2305 uiSum += abs( piOrg[52] - piCur[iStep52] ); 2306 uiSum += abs( piOrg[53] - piCur[iStep53] ); 2307 uiSum += abs( piOrg[54] - piCur[iStep54] ); 2308 uiSum += abs( piOrg[55] - piCur[iStep55] ); 2309 uiSum += abs( piOrg[56] - piCur[iStep56] ); 2310 uiSum += abs( piOrg[57] - piCur[iStep57] ); 2311 uiSum += abs( piOrg[58] - piCur[iStep58] ); 2312 uiSum += abs( piOrg[59] - piCur[iStep59] ); 2313 uiSum += abs( piOrg[60] - piCur[iStep60] ); 2314 uiSum += abs( piOrg[61] - piCur[iStep61] ); 2315 uiSum += abs( piOrg[62] - piCur[iStep62] ); 2316 uiSum += abs( piOrg[63] - piCur[iStep63] ); 2317 2318 piOrg += iStrideOrg; 2319 piCur += iStrideCur; 2320 } 2321 2322 return ( uiSum >> g_uiBitIncrement ); 2323 } 1052 #endif 2324 1053 2325 1054 // -------------------------------------------------------------------------------------------------------------------- … … 2328 1057 2329 1058 #if IBDI_DISTORTION 2330 #ifdef ROUNDING_CONTROL_BIPRED2331 UInt TComRdCost::xGetSSE( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2332 {2333 Pel* piOrg = pcDtParam->pOrg;2334 Pel* piCur = pcDtParam->pCur;2335 Pel* piRef = pRefY;2336 Int iRows = pcDtParam->iRows;2337 Int iCols = pcDtParam->iCols;2338 Int iStrideOrg = pcDtParam->iStrideOrg;2339 Int iStrideCur = pcDtParam->iStrideCur;2340 Pel pred;2341 2342 UInt uiSum = 0;2343 Int iShift = g_uiBitIncrement;2344 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2345 2346 Int iTemp;2347 2348 for( ; iRows != 0; iRows-- )2349 {2350 for (Int n = 0; n < iCols; n++ )2351 {2352 pred = (piCur[n] + piRef[n] + bRound) >> 1 ;2353 iTemp = ((piOrg[n]+iOffset)>>iShift) - ((pred+iOffset)>>iShift);2354 uiSum += iTemp * iTemp;2355 }2356 piOrg += iStrideOrg;2357 piCur += iStrideCur;2358 piRef += iCols;2359 }2360 2361 return ( uiSum );2362 }2363 2364 UInt TComRdCost::xGetSSE4( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2365 {2366 Pel* piOrg = pcDtParam->pOrg;2367 Pel* piCur = pcDtParam->pCur;2368 Pel* piRef = pRefY;2369 Int iRows = pcDtParam->iRows;2370 Int iStrideOrg = pcDtParam->iStrideOrg;2371 Int iStrideCur = pcDtParam->iStrideCur;2372 Int iStrideRef = pcDtParam->iCols;2373 Pel pred;2374 2375 UInt uiSum = 0;2376 Int iShift = g_uiBitIncrement;2377 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2378 2379 Int iTemp;2380 2381 for( ; iRows != 0; iRows-- )2382 {2383 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = ((piOrg[0]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2384 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = ((piOrg[1]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2385 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = ((piOrg[2]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2386 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = ((piOrg[3]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2387 2388 piOrg += iStrideOrg;2389 piCur += iStrideCur;2390 piRef += iStrideRef;2391 }2392 2393 return ( uiSum );2394 }2395 2396 UInt TComRdCost::xGetSSE8( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2397 {2398 Pel* piOrg = pcDtParam->pOrg;2399 Pel* piCur = pcDtParam->pCur;2400 Pel* piRef = pRefY;2401 Int iRows = pcDtParam->iRows;2402 Int iStrideOrg = pcDtParam->iStrideOrg;2403 Int iStrideCur = pcDtParam->iStrideCur;2404 Int iStrideRef = pcDtParam->iCols;2405 Pel pred;2406 2407 UInt uiSum = 0;2408 Int iShift = g_uiBitIncrement;2409 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2410 2411 Int iTemp;2412 2413 for( ; iRows != 0; iRows-- )2414 {2415 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = ((piOrg[0]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2416 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = ((piOrg[1]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2417 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = ((piOrg[2]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2418 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = ((piOrg[3]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2419 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = ((piOrg[4]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2420 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = ((piOrg[5]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2421 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = ((piOrg[6]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2422 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = ((piOrg[7]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2423 2424 piOrg += iStrideOrg;2425 piCur += iStrideCur;2426 piRef += iStrideRef;2427 }2428 2429 return ( uiSum );2430 }2431 2432 UInt TComRdCost::xGetSSE16( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2433 {2434 Pel* piOrg = pcDtParam->pOrg;2435 Pel* piCur = pcDtParam->pCur;2436 Pel* piRef = pRefY;2437 Int iRows = pcDtParam->iRows;2438 Int iStrideOrg = pcDtParam->iStrideOrg;2439 Int iStrideCur = pcDtParam->iStrideCur;2440 Int iStrideRef = pcDtParam->iCols;2441 Pel pred;2442 2443 UInt uiSum = 0;2444 Int iShift = g_uiBitIncrement;2445 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2446 2447 Int iTemp;2448 2449 for( ; iRows != 0; iRows-- )2450 {2451 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = ((piOrg[0]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2452 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = ((piOrg[1]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2453 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = ((piOrg[2]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2454 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = ((piOrg[3]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2455 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = ((piOrg[4]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2456 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = ((piOrg[5]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2457 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = ((piOrg[6]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2458 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = ((piOrg[7]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2459 pred = (piCur[8] + piRef[8] + bRound) >> 1; iTemp = ((piOrg[8]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2460 pred = (piCur[9] + piRef[9] + bRound) >> 1; iTemp = ((piOrg[9]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2461 pred = (piCur[10] + piRef[10] + bRound) >> 1; iTemp = ((piOrg[10]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2462 pred = (piCur[11] + piRef[11] + bRound) >> 1; iTemp = ((piOrg[11]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2463 pred = (piCur[12] + piRef[12] + bRound) >> 1; iTemp = ((piOrg[12]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2464 pred = (piCur[13] + piRef[13] + bRound) >> 1; iTemp = ((piOrg[13]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2465 pred = (piCur[14] + piRef[14] + bRound) >> 1; iTemp = ((piOrg[14]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2466 pred = (piCur[15] + piRef[15] + bRound) >> 1; iTemp = ((piOrg[15]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2467 2468 piOrg += iStrideOrg;2469 piCur += iStrideCur;2470 piRef += iStrideRef;2471 }2472 2473 return ( uiSum );2474 }2475 2476 UInt TComRdCost::xGetSSE16N( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2477 {2478 Pel* piOrg = pcDtParam->pOrg;2479 Pel* piCur = pcDtParam->pCur;2480 Pel* piRef = pRefY;2481 Int iRows = pcDtParam->iRows;2482 Int iCols = pcDtParam->iCols;2483 Int iStrideOrg = pcDtParam->iStrideOrg;2484 Int iStrideCur = pcDtParam->iStrideCur;2485 Pel pred;2486 2487 UInt uiSum = 0;2488 Int iShift = g_uiBitIncrement;2489 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2490 Int iTemp;2491 2492 for( ; iRows != 0; iRows-- )2493 {2494 for (Int n = 0; n < iCols; n+=16 )2495 {2496 pred = (piCur[n+ 0] + piRef[n+ 0] + bRound) >> 1; iTemp = ((piOrg[n+ 0]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2497 pred = (piCur[n+ 1] + piRef[n+ 1] + bRound) >> 1; iTemp = ((piOrg[n+ 1]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2498 pred = (piCur[n+ 2] + piRef[n+ 2] + bRound) >> 1; iTemp = ((piOrg[n+ 2]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2499 pred = (piCur[n+ 3] + piRef[n+ 3] + bRound) >> 1; iTemp = ((piOrg[n+ 3]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2500 pred = (piCur[n+ 4] + piRef[n+ 4] + bRound) >> 1; iTemp = ((piOrg[n+ 4]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2501 pred = (piCur[n+ 5] + piRef[n+ 5] + bRound) >> 1; iTemp = ((piOrg[n+ 5]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2502 pred = (piCur[n+ 6] + piRef[n+ 6] + bRound) >> 1; iTemp = ((piOrg[n+ 6]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2503 pred = (piCur[n+ 7] + piRef[n+ 7] + bRound) >> 1; iTemp = ((piOrg[n+ 7]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2504 pred = (piCur[n+ 8] + piRef[n+ 8] + bRound) >> 1; iTemp = ((piOrg[n+ 8]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2505 pred = (piCur[n+ 9] + piRef[n+ 9] + bRound) >> 1; iTemp = ((piOrg[n+ 9]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2506 pred = (piCur[n+ 10] + piRef[n+ 10] + bRound) >> 1; iTemp = ((piOrg[n+ 10]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2507 pred = (piCur[n+ 11] + piRef[n+ 11] + bRound) >> 1; iTemp = ((piOrg[n+ 11]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2508 pred = (piCur[n+ 12] + piRef[n+ 12] + bRound) >> 1; iTemp = ((piOrg[n+ 12]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2509 pred = (piCur[n+ 13] + piRef[n+ 13] + bRound) >> 1; iTemp = ((piOrg[n+ 13]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2510 pred = (piCur[n+ 14] + piRef[n+ 14] + bRound) >> 1; iTemp = ((piOrg[n+ 14]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2511 pred = (piCur[n+ 15] + piRef[n+ 15] + bRound) >> 1; iTemp = ((piOrg[n+ 15]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2512 }2513 piOrg += iStrideOrg;2514 piCur += iStrideCur;2515 piRef += iCols;2516 }2517 2518 return ( uiSum );2519 }2520 2521 UInt TComRdCost::xGetSSE32( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2522 {2523 Pel* piOrg = pcDtParam->pOrg;2524 Pel* piCur = pcDtParam->pCur;2525 Pel* piRef = pRefY;2526 Int iRows = pcDtParam->iRows;2527 Int iStrideOrg = pcDtParam->iStrideOrg;2528 Int iStrideCur = pcDtParam->iStrideCur;2529 Int iStrideRef = pcDtParam->iCols;2530 Pel pred;2531 2532 UInt uiSum = 0;2533 Int iShift = g_uiBitIncrement;2534 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2535 Int iTemp;2536 2537 for( ; iRows != 0; iRows-- )2538 {2539 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = ((piOrg[0]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2540 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = ((piOrg[1]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2541 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = ((piOrg[2]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2542 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = ((piOrg[3]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2543 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = ((piOrg[4]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2544 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = ((piOrg[5]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2545 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = ((piOrg[6]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2546 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = ((piOrg[7]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2547 pred = (piCur[8] + piRef[8] + bRound) >> 1; iTemp = ((piOrg[8]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2548 pred = (piCur[9] + piRef[9] + bRound) >> 1; iTemp = ((piOrg[9]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2549 pred = (piCur[10] + piRef[10] + bRound) >> 1; iTemp = ((piOrg[10]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2550 pred = (piCur[11] + piRef[11] + bRound) >> 1; iTemp = ((piOrg[11]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2551 pred = (piCur[12] + piRef[12] + bRound) >> 1; iTemp = ((piOrg[12]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2552 pred = (piCur[13] + piRef[13] + bRound) >> 1; iTemp = ((piOrg[13]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2553 pred = (piCur[14] + piRef[14] + bRound) >> 1; iTemp = ((piOrg[14]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2554 pred = (piCur[15] + piRef[15] + bRound) >> 1; iTemp = ((piOrg[15]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2555 pred = (piCur[16] + piRef[16] + bRound) >> 1; iTemp = ((piOrg[16]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2556 pred = (piCur[17] + piRef[17] + bRound) >> 1; iTemp = ((piOrg[17]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2557 pred = (piCur[18] + piRef[18] + bRound) >> 1; iTemp = ((piOrg[18]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2558 pred = (piCur[19] + piRef[19] + bRound) >> 1; iTemp = ((piOrg[19]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2559 pred = (piCur[20] + piRef[20] + bRound) >> 1; iTemp = ((piOrg[20]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2560 pred = (piCur[21] + piRef[21] + bRound) >> 1; iTemp = ((piOrg[21]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2561 pred = (piCur[22] + piRef[22] + bRound) >> 1; iTemp = ((piOrg[22]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2562 pred = (piCur[23] + piRef[23] + bRound) >> 1; iTemp = ((piOrg[23]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2563 pred = (piCur[24] + piRef[24] + bRound) >> 1; iTemp = ((piOrg[24]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2564 pred = (piCur[25] + piRef[25] + bRound) >> 1; iTemp = ((piOrg[25]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2565 pred = (piCur[26] + piRef[26] + bRound) >> 1; iTemp = ((piOrg[26]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2566 pred = (piCur[27] + piRef[27] + bRound) >> 1; iTemp = ((piOrg[27]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2567 pred = (piCur[28] + piRef[28] + bRound) >> 1; iTemp = ((piOrg[28]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2568 pred = (piCur[29] + piRef[29] + bRound) >> 1; iTemp = ((piOrg[29]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2569 pred = (piCur[30] + piRef[30] + bRound) >> 1; iTemp = ((piOrg[30]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2570 pred = (piCur[31] + piRef[31] + bRound) >> 1; iTemp = ((piOrg[31]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2571 2572 piOrg += iStrideOrg;2573 piCur += iStrideCur;2574 piRef += iStrideRef;2575 }2576 2577 return ( uiSum );2578 }2579 2580 UInt TComRdCost::xGetSSE64( DistParam* pcDtParam, Pel* pRefY, Bool bRound )2581 {2582 Pel* piOrg = pcDtParam->pOrg;2583 Pel* piCur = pcDtParam->pCur;2584 Pel* piRef = pRefY;2585 Int iRows = pcDtParam->iRows;2586 Int iStrideOrg = pcDtParam->iStrideOrg;2587 Int iStrideCur = pcDtParam->iStrideCur;2588 Int iStrideRef = pcDtParam->iCols;2589 Pel pred;2590 2591 UInt uiSum = 0;2592 Int iShift = g_uiBitIncrement;2593 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;2594 Int iTemp;2595 2596 for( ; iRows != 0; iRows-- )2597 {2598 2599 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = ((piOrg[0]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2600 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = ((piOrg[1]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2601 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = ((piOrg[2]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2602 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = ((piOrg[3]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2603 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = ((piOrg[4]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2604 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = ((piOrg[5]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2605 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = ((piOrg[6]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2606 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = ((piOrg[7]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2607 pred = (piCur[8] + piRef[8] + bRound) >> 1; iTemp = ((piOrg[8]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2608 pred = (piCur[9] + piRef[9] + bRound) >> 1; iTemp = ((piOrg[9]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2609 pred = (piCur[10] + piRef[10] + bRound) >> 1; iTemp = ((piOrg[10]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2610 pred = (piCur[11] + piRef[11] + bRound) >> 1; iTemp = ((piOrg[11]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2611 pred = (piCur[12] + piRef[12] + bRound) >> 1; iTemp = ((piOrg[12]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2612 pred = (piCur[13] + piRef[13] + bRound) >> 1; iTemp = ((piOrg[13]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2613 pred = (piCur[14] + piRef[14] + bRound) >> 1; iTemp = ((piOrg[14]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2614 pred = (piCur[15] + piRef[15] + bRound) >> 1; iTemp = ((piOrg[15]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2615 pred = (piCur[16] + piRef[16] + bRound) >> 1; iTemp = ((piOrg[16]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2616 pred = (piCur[17] + piRef[17] + bRound) >> 1; iTemp = ((piOrg[17]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2617 pred = (piCur[18] + piRef[18] + bRound) >> 1; iTemp = ((piOrg[18]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2618 pred = (piCur[19] + piRef[19] + bRound) >> 1; iTemp = ((piOrg[19]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2619 pred = (piCur[20] + piRef[20] + bRound) >> 1; iTemp = ((piOrg[20]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2620 pred = (piCur[21] + piRef[21] + bRound) >> 1; iTemp = ((piOrg[21]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2621 pred = (piCur[22] + piRef[22] + bRound) >> 1; iTemp = ((piOrg[22]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2622 pred = (piCur[23] + piRef[23] + bRound) >> 1; iTemp = ((piOrg[23]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2623 pred = (piCur[24] + piRef[24] + bRound) >> 1; iTemp = ((piOrg[24]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2624 pred = (piCur[25] + piRef[25] + bRound) >> 1; iTemp = ((piOrg[25]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2625 pred = (piCur[26] + piRef[26] + bRound) >> 1; iTemp = ((piOrg[26]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2626 pred = (piCur[27] + piRef[27] + bRound) >> 1; iTemp = ((piOrg[27]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2627 pred = (piCur[28] + piRef[28] + bRound) >> 1; iTemp = ((piOrg[28]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2628 pred = (piCur[29] + piRef[29] + bRound) >> 1; iTemp = ((piOrg[29]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2629 2630 pred = (piCur[30] + piRef[30] + bRound) >> 1; iTemp = ((piOrg[30]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2631 pred = (piCur[31] + piRef[31] + bRound) >> 1; iTemp = ((piOrg[31]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2632 pred = (piCur[32] + piRef[32] + bRound) >> 1; iTemp = ((piOrg[32]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2633 pred = (piCur[33] + piRef[33] + bRound) >> 1; iTemp = ((piOrg[33]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2634 pred = (piCur[34] + piRef[34] + bRound) >> 1; iTemp = ((piOrg[34]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2635 pred = (piCur[35] + piRef[35] + bRound) >> 1; iTemp = ((piOrg[35]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2636 pred = (piCur[36] + piRef[36] + bRound) >> 1; iTemp = ((piOrg[36]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2637 pred = (piCur[37] + piRef[37] + bRound) >> 1; iTemp = ((piOrg[37]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2638 pred = (piCur[38] + piRef[38] + bRound) >> 1; iTemp = ((piOrg[38]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2639 pred = (piCur[39] + piRef[39] + bRound) >> 1; iTemp = ((piOrg[39]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2640 2641 pred = (piCur[40] + piRef[40] + bRound) >> 1; iTemp = ((piOrg[40]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2642 pred = (piCur[41] + piRef[41] + bRound) >> 1; iTemp = ((piOrg[41]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2643 pred = (piCur[42] + piRef[42] + bRound) >> 1; iTemp = ((piOrg[42]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2644 pred = (piCur[43] + piRef[43] + bRound) >> 1; iTemp = ((piOrg[43]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2645 pred = (piCur[44] + piRef[44] + bRound) >> 1; iTemp = ((piOrg[44]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2646 pred = (piCur[45] + piRef[45] + bRound) >> 1; iTemp = ((piOrg[45]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2647 pred = (piCur[46] + piRef[46] + bRound) >> 1; iTemp = ((piOrg[46]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2648 pred = (piCur[47] + piRef[47] + bRound) >> 1; iTemp = ((piOrg[47]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2649 pred = (piCur[48] + piRef[48] + bRound) >> 1; iTemp = ((piOrg[48]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2650 pred = (piCur[49] + piRef[49] + bRound) >> 1; iTemp = ((piOrg[49]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2651 2652 pred = (piCur[50] + piRef[50] + bRound) >> 1; iTemp = ((piOrg[50]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2653 pred = (piCur[51] + piRef[51] + bRound) >> 1; iTemp = ((piOrg[51]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2654 pred = (piCur[52] + piRef[52] + bRound) >> 1; iTemp = ((piOrg[52]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2655 pred = (piCur[53] + piRef[53] + bRound) >> 1; iTemp = ((piOrg[53]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2656 pred = (piCur[54] + piRef[54] + bRound) >> 1; iTemp = ((piOrg[54]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2657 pred = (piCur[55] + piRef[55] + bRound) >> 1; iTemp = ((piOrg[55]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2658 pred = (piCur[56] + piRef[56] + bRound) >> 1; iTemp = ((piOrg[56]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2659 pred = (piCur[57] + piRef[57] + bRound) >> 1; iTemp = ((piOrg[57]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2660 pred = (piCur[58] + piRef[58] + bRound) >> 1; iTemp = ((piOrg[58]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2661 pred = (piCur[59] + piRef[59] + bRound) >> 1; iTemp = ((piOrg[59]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2662 2663 pred = (piCur[60] + piRef[60] + bRound) >> 1; iTemp = ((piOrg[60]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2664 pred = (piCur[61] + piRef[61] + bRound) >> 1; iTemp = ((piOrg[61]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2665 pred = (piCur[62] + piRef[62] + bRound) >> 1; iTemp = ((piOrg[62]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2666 pred = (piCur[63] + piRef[63] + bRound) >> 1; iTemp = ((piOrg[63]+iOffset)>>iShift) - ((pred+iOffset)>>iShift); uiSum += iTemp * iTemp;2667 2668 piOrg += iStrideOrg;2669 piCur += iStrideCur;2670 piRef += iStrideRef;2671 }2672 2673 return ( uiSum );2674 }2675 #endif2676 2677 2678 1059 UInt TComRdCost::xGetSSE( DistParam* pcDtParam ) 2679 1060 { 2680 #ifdef WEIGHT_PRED2681 if ( pcDtParam->applyWeight )2682 {2683 return xGetSSEw( pcDtParam , pRefY, bRound );2684 }2685 #endif2686 1061 Pel* piOrg = pcDtParam->pOrg; 2687 1062 Pel* piCur = pcDtParam->pCur; … … 2713 1088 UInt TComRdCost::xGetSSE4( DistParam* pcDtParam ) 2714 1089 { 2715 #ifdef WEIGHT_PRED2716 if ( pcDtParam->applyWeight )2717 {2718 assert( pcDtParam->iCols == 4 );2719 return xGetSSEw( pcDtParam , pRefY, bRound );2720 }2721 #endif2722 1090 Pel* piOrg = pcDtParam->pOrg; 2723 1091 Pel* piCur = pcDtParam->pCur; … … 2749 1117 UInt TComRdCost::xGetSSE8( DistParam* pcDtParam ) 2750 1118 { 2751 #ifdef WEIGHT_PRED2752 if ( pcDtParam->applyWeight )2753 {2754 assert( pcDtParam->iCols == 8 );2755 return xGetSSEw( pcDtParam , pRefY, bRound );2756 }2757 #endif2758 1119 Pel* piOrg = pcDtParam->pOrg; 2759 1120 Pel* piCur = pcDtParam->pCur; … … 2788 1149 UInt TComRdCost::xGetSSE16( DistParam* pcDtParam ) 2789 1150 { 2790 #ifdef WEIGHT_PRED2791 if ( pcDtParam->applyWeight )2792 {2793 assert( pcDtParam->iCols == 16 );2794 return xGetSSEw( pcDtParam , pRefY, bRound );2795 }2796 #endif2797 1151 Pel* piOrg = pcDtParam->pOrg; 2798 1152 Pel* piCur = pcDtParam->pCur; … … 2836 1190 UInt TComRdCost::xGetSSE16N( DistParam* pcDtParam ) 2837 1191 { 2838 #ifdef WEIGHT_PRED2839 if ( pcDtParam->applyWeight )2840 {2841 assert( pcDtParam->iCols == 16 );2842 return xGetSSEw( pcDtParam , pRefY, bRound );2843 }2844 #endif2845 1192 Pel* piOrg = pcDtParam->pOrg; 2846 1193 Pel* piCur = pcDtParam->pCur; … … 2887 1234 UInt TComRdCost::xGetSSE32( DistParam* pcDtParam ) 2888 1235 { 2889 #ifdef WEIGHT_PRED2890 if ( pcDtParam->applyWeight )2891 {2892 assert( pcDtParam->iCols == 32 );2893 return xGetSSEw( pcDtParam , pRefY, bRound );2894 }2895 #endif2896 1236 Pel* piOrg = pcDtParam->pOrg; 2897 1237 Pel* piCur = pcDtParam->pCur; … … 2950 1290 UInt TComRdCost::xGetSSE64( DistParam* pcDtParam ) 2951 1291 { 2952 #ifdef WEIGHT_PRED2953 if ( pcDtParam->applyWeight )2954 {2955 assert( pcDtParam->iCols == 64 );2956 return xGetSSEw( pcDtParam , pRefY, bRound );2957 }2958 #endif2959 1292 Pel* piOrg = pcDtParam->pOrg; 2960 1293 Pel* piCur = pcDtParam->pCur; … … 3042 1375 } 3043 1376 #else 3044 #ifdef ROUNDING_CONTROL_BIPRED3045 UInt TComRdCost::xGetSSE( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3046 {3047 Pel* piOrg = pcDtParam->pOrg;3048 Pel* piCur = pcDtParam->pCur;3049 Pel* piRef = pRefY;3050 Int iRows = pcDtParam->iRows;3051 Int iCols = pcDtParam->iCols;3052 Int iStrideOrg = pcDtParam->iStrideOrg;3053 Int iStrideCur = pcDtParam->iStrideCur;3054 Pel pred;3055 3056 UInt uiSum = 0;3057 UInt uiShift = g_uiBitIncrement<<1;3058 3059 Int iTemp;3060 3061 for( ; iRows != 0; iRows-- )3062 {3063 for (Int n = 0; n < iCols; n++ )3064 {3065 pred = (piCur[n] + piRef[n] + bRound) >> 1 ;3066 iTemp = piOrg[n] - pred;3067 uiSum += ( iTemp * iTemp ) >> uiShift;3068 }3069 piOrg += iStrideOrg;3070 piCur += iStrideCur;3071 piRef += iCols;3072 }3073 3074 return ( uiSum );3075 }3076 3077 UInt TComRdCost::xGetSSE4( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3078 {3079 Pel* piOrg = pcDtParam->pOrg;3080 Pel* piCur = pcDtParam->pCur;3081 Pel* piRef = pRefY;3082 Int iRows = pcDtParam->iRows;3083 Int iStrideOrg = pcDtParam->iStrideOrg;3084 Int iStrideCur = pcDtParam->iStrideCur;3085 Int iStrideRef = pcDtParam->iCols;3086 Pel pred;3087 3088 UInt uiSum = 0;3089 UInt uiShift = g_uiBitIncrement<<1;3090 3091 Int iTemp;3092 3093 for( ; iRows != 0; iRows-- )3094 {3095 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = piOrg[0] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3096 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = piOrg[1] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3097 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = piOrg[2] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3098 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = piOrg[3] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3099 3100 piOrg += iStrideOrg;3101 piCur += iStrideCur;3102 piRef += iStrideRef;3103 }3104 3105 return ( uiSum );3106 }3107 3108 UInt TComRdCost::xGetSSE8( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3109 {3110 Pel* piOrg = pcDtParam->pOrg;3111 Pel* piCur = pcDtParam->pCur;3112 Pel* piRef = pRefY;3113 Int iRows = pcDtParam->iRows;3114 Int iStrideOrg = pcDtParam->iStrideOrg;3115 Int iStrideCur = pcDtParam->iStrideCur;3116 Int iStrideRef = pcDtParam->iCols;3117 Pel pred;3118 3119 UInt uiSum = 0;3120 UInt uiShift = g_uiBitIncrement<<1;3121 3122 Int iTemp;3123 3124 for( ; iRows != 0; iRows-- )3125 {3126 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = piOrg[0] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3127 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = piOrg[1] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3128 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = piOrg[2] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3129 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = piOrg[3] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3130 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = piOrg[4] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3131 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = piOrg[5] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3132 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = piOrg[6] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3133 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = piOrg[7] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3134 3135 piOrg += iStrideOrg;3136 piCur += iStrideCur;3137 piRef += iStrideRef;3138 }3139 3140 return ( uiSum );3141 }3142 3143 UInt TComRdCost::xGetSSE16( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3144 {3145 Pel* piOrg = pcDtParam->pOrg;3146 Pel* piCur = pcDtParam->pCur;3147 Pel* piRef = pRefY;3148 Int iRows = pcDtParam->iRows;3149 Int iStrideOrg = pcDtParam->iStrideOrg;3150 Int iStrideCur = pcDtParam->iStrideCur;3151 Int iStrideRef = pcDtParam->iCols;3152 Pel pred;3153 3154 UInt uiSum = 0;3155 UInt uiShift = g_uiBitIncrement<<1;3156 3157 Int iTemp;3158 3159 for( ; iRows != 0; iRows-- )3160 {3161 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = piOrg[0] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3162 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = piOrg[1] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3163 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = piOrg[2] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3164 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = piOrg[3] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3165 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = piOrg[4] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3166 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = piOrg[5] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3167 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = piOrg[6] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3168 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = piOrg[7] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3169 pred = (piCur[8] + piRef[8] + bRound) >> 1; iTemp = piOrg[8] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3170 pred = (piCur[9] + piRef[9] + bRound) >> 1; iTemp = piOrg[9] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3171 pred = (piCur[10] + piRef[10] + bRound) >> 1; iTemp = piOrg[10] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3172 pred = (piCur[11] + piRef[11] + bRound) >> 1; iTemp = piOrg[11] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3173 pred = (piCur[12] + piRef[12] + bRound) >> 1; iTemp = piOrg[12] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3174 pred = (piCur[13] + piRef[13] + bRound) >> 1; iTemp = piOrg[13] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3175 pred = (piCur[14] + piRef[14] + bRound) >> 1; iTemp = piOrg[14] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3176 pred = (piCur[15] + piRef[15] + bRound) >> 1; iTemp = piOrg[15] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3177 3178 piOrg += iStrideOrg;3179 piCur += iStrideCur;3180 piRef += iStrideRef;3181 }3182 3183 return ( uiSum );3184 }3185 3186 UInt TComRdCost::xGetSSE16N( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3187 {3188 Pel* piOrg = pcDtParam->pOrg;3189 Pel* piCur = pcDtParam->pCur;3190 Pel* piRef = pRefY;3191 Int iRows = pcDtParam->iRows;3192 Int iCols = pcDtParam->iCols;3193 Int iStrideOrg = pcDtParam->iStrideOrg;3194 Int iStrideCur = pcDtParam->iStrideCur;3195 Pel pred;3196 3197 UInt uiSum = 0;3198 UInt uiShift = g_uiBitIncrement<<1;3199 Int iTemp;3200 3201 for( ; iRows != 0; iRows-- )3202 {3203 for (Int n = 0; n < iCols; n+=16 )3204 {3205 pred = (piCur[n+ 0] + piRef[n+ 0] + bRound) >> 1; iTemp = piOrg[n+ 0] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3206 pred = (piCur[n+ 1] + piRef[n+ 1] + bRound) >> 1; iTemp = piOrg[n+ 1] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3207 pred = (piCur[n+ 2] + piRef[n+ 2] + bRound) >> 1; iTemp = piOrg[n+ 2] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3208 pred = (piCur[n+ 3] + piRef[n+ 3] + bRound) >> 1; iTemp = piOrg[n+ 3] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3209 pred = (piCur[n+ 4] + piRef[n+ 4] + bRound) >> 1; iTemp = piOrg[n+ 4] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3210 pred = (piCur[n+ 5] + piRef[n+ 5] + bRound) >> 1; iTemp = piOrg[n+ 5] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3211 pred = (piCur[n+ 6] + piRef[n+ 6] + bRound) >> 1; iTemp = piOrg[n+ 6] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3212 pred = (piCur[n+ 7] + piRef[n+ 7] + bRound) >> 1; iTemp = piOrg[n+ 7] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3213 pred = (piCur[n+ 8] + piRef[n+ 8] + bRound) >> 1; iTemp = piOrg[n+ 8] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3214 pred = (piCur[n+ 9] + piRef[n+ 9] + bRound) >> 1; iTemp = piOrg[n+ 9] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3215 pred = (piCur[n+ 10] + piRef[n+ 10] + bRound) >> 1; iTemp = piOrg[n+ 10] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3216 pred = (piCur[n+ 11] + piRef[n+ 11] + bRound) >> 1; iTemp = piOrg[n+ 11] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3217 pred = (piCur[n+ 12] + piRef[n+ 12] + bRound) >> 1; iTemp = piOrg[n+ 12] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3218 pred = (piCur[n+ 13] + piRef[n+ 13] + bRound) >> 1; iTemp = piOrg[n+ 13] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3219 pred = (piCur[n+ 14] + piRef[n+ 14] + bRound) >> 1; iTemp = piOrg[n+ 14] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3220 pred = (piCur[n+ 15] + piRef[n+ 15] + bRound) >> 1; iTemp = piOrg[n+ 15] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3221 }3222 piOrg += iStrideOrg;3223 piCur += iStrideCur;3224 piRef += iCols;3225 }3226 3227 return ( uiSum );3228 }3229 3230 UInt TComRdCost::xGetSSE32( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3231 {3232 Pel* piOrg = pcDtParam->pOrg;3233 Pel* piCur = pcDtParam->pCur;3234 Pel* piRef = pRefY;3235 Int iRows = pcDtParam->iRows;3236 Int iStrideOrg = pcDtParam->iStrideOrg;3237 Int iStrideCur = pcDtParam->iStrideCur;3238 Int iStrideRef = pcDtParam->iCols;3239 Pel pred;3240 3241 UInt uiSum = 0;3242 UInt uiShift = g_uiBitIncrement<<1;3243 Int iTemp;3244 3245 for( ; iRows != 0; iRows-- )3246 {3247 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = piOrg[0] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3248 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = piOrg[1] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3249 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = piOrg[2] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3250 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = piOrg[3] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3251 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = piOrg[4] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3252 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = piOrg[5] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3253 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = piOrg[6] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3254 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = piOrg[7] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3255 pred = (piCur[8] + piRef[8] + bRound) >> 1; iTemp = piOrg[8] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3256 pred = (piCur[9] + piRef[9] + bRound) >> 1; iTemp = piOrg[9] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3257 pred = (piCur[10] + piRef[10] + bRound) >> 1; iTemp = piOrg[10] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3258 pred = (piCur[11] + piRef[11] + bRound) >> 1; iTemp = piOrg[11] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3259 pred = (piCur[12] + piRef[12] + bRound) >> 1; iTemp = piOrg[12] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3260 pred = (piCur[13] + piRef[13] + bRound) >> 1; iTemp = piOrg[13] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3261 pred = (piCur[14] + piRef[14] + bRound) >> 1; iTemp = piOrg[14] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3262 pred = (piCur[15] + piRef[15] + bRound) >> 1; iTemp = piOrg[15] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3263 pred = (piCur[16] + piRef[16] + bRound) >> 1; iTemp = piOrg[16] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3264 pred = (piCur[17] + piRef[17] + bRound) >> 1; iTemp = piOrg[17] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3265 pred = (piCur[18] + piRef[18] + bRound) >> 1; iTemp = piOrg[18] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3266 pred = (piCur[19] + piRef[19] + bRound) >> 1; iTemp = piOrg[19] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3267 pred = (piCur[20] + piRef[20] + bRound) >> 1; iTemp = piOrg[20] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3268 pred = (piCur[21] + piRef[21] + bRound) >> 1; iTemp = piOrg[21] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3269 pred = (piCur[22] + piRef[22] + bRound) >> 1; iTemp = piOrg[22] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3270 pred = (piCur[23] + piRef[23] + bRound) >> 1; iTemp = piOrg[23] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3271 pred = (piCur[24] + piRef[24] + bRound) >> 1; iTemp = piOrg[24] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3272 pred = (piCur[25] + piRef[25] + bRound) >> 1; iTemp = piOrg[25] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3273 pred = (piCur[26] + piRef[26] + bRound) >> 1; iTemp = piOrg[26] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3274 pred = (piCur[27] + piRef[27] + bRound) >> 1; iTemp = piOrg[27] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3275 pred = (piCur[28] + piRef[28] + bRound) >> 1; iTemp = piOrg[28] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3276 pred = (piCur[29] + piRef[29] + bRound) >> 1; iTemp = piOrg[29] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3277 pred = (piCur[30] + piRef[30] + bRound) >> 1; iTemp = piOrg[30] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3278 pred = (piCur[31] + piRef[31] + bRound) >> 1; iTemp = piOrg[31] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3279 3280 piOrg += iStrideOrg;3281 piCur += iStrideCur;3282 piRef += iStrideRef;3283 }3284 3285 return ( uiSum );3286 }3287 3288 UInt TComRdCost::xGetSSE64( DistParam* pcDtParam, Pel* pRefY, Bool bRound )3289 {3290 Pel* piOrg = pcDtParam->pOrg;3291 Pel* piCur = pcDtParam->pCur;3292 Pel* piRef = pRefY;3293 Int iRows = pcDtParam->iRows;3294 Int iStrideOrg = pcDtParam->iStrideOrg;3295 Int iStrideCur = pcDtParam->iStrideCur;3296 Int iStrideRef = pcDtParam->iCols;3297 Pel pred;3298 3299 UInt uiSum = 0;3300 UInt uiShift = g_uiBitIncrement<<1;3301 Int iTemp;3302 3303 for( ; iRows != 0; iRows-- )3304 {3305 3306 pred = (piCur[0] + piRef[0] + bRound) >> 1; iTemp = piOrg[0] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3307 pred = (piCur[1] + piRef[1] + bRound) >> 1; iTemp = piOrg[1] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3308 pred = (piCur[2] + piRef[2] + bRound) >> 1; iTemp = piOrg[2] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3309 pred = (piCur[3] + piRef[3] + bRound) >> 1; iTemp = piOrg[3] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3310 pred = (piCur[4] + piRef[4] + bRound) >> 1; iTemp = piOrg[4] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3311 pred = (piCur[5] + piRef[5] + bRound) >> 1; iTemp = piOrg[5] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3312 pred = (piCur[6] + piRef[6] + bRound) >> 1; iTemp = piOrg[6] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3313 pred = (piCur[7] + piRef[7] + bRound) >> 1; iTemp = piOrg[7] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3314 pred = (piCur[8] + piRef[8] + bRound) >> 1; iTemp = piOrg[8] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3315 pred = (piCur[9] + piRef[9] + bRound) >> 1; iTemp = piOrg[9] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3316 pred = (piCur[10] + piRef[10] + bRound) >> 1; iTemp = piOrg[10] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3317 pred = (piCur[11] + piRef[11] + bRound) >> 1; iTemp = piOrg[11] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3318 pred = (piCur[12] + piRef[12] + bRound) >> 1; iTemp = piOrg[12] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3319 pred = (piCur[13] + piRef[13] + bRound) >> 1; iTemp = piOrg[13] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3320 pred = (piCur[14] + piRef[14] + bRound) >> 1; iTemp = piOrg[14] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3321 pred = (piCur[15] + piRef[15] + bRound) >> 1; iTemp = piOrg[15] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3322 pred = (piCur[16] + piRef[16] + bRound) >> 1; iTemp = piOrg[16] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3323 pred = (piCur[17] + piRef[17] + bRound) >> 1; iTemp = piOrg[17] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3324 pred = (piCur[18] + piRef[18] + bRound) >> 1; iTemp = piOrg[18] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3325 pred = (piCur[19] + piRef[19] + bRound) >> 1; iTemp = piOrg[19] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3326 pred = (piCur[20] + piRef[20] + bRound) >> 1; iTemp = piOrg[20] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3327 pred = (piCur[21] + piRef[21] + bRound) >> 1; iTemp = piOrg[21] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3328 pred = (piCur[22] + piRef[22] + bRound) >> 1; iTemp = piOrg[22] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3329 pred = (piCur[23] + piRef[23] + bRound) >> 1; iTemp = piOrg[23] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3330 pred = (piCur[24] + piRef[24] + bRound) >> 1; iTemp = piOrg[24] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3331 pred = (piCur[25] + piRef[25] + bRound) >> 1; iTemp = piOrg[25] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3332 pred = (piCur[26] + piRef[26] + bRound) >> 1; iTemp = piOrg[26] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3333 pred = (piCur[27] + piRef[27] + bRound) >> 1; iTemp = piOrg[27] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3334 pred = (piCur[28] + piRef[28] + bRound) >> 1; iTemp = piOrg[28] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3335 pred = (piCur[29] + piRef[29] + bRound) >> 1; iTemp = piOrg[29] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3336 3337 pred = (piCur[30] + piRef[30] + bRound) >> 1; iTemp = piOrg[30] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3338 pred = (piCur[31] + piRef[31] + bRound) >> 1; iTemp = piOrg[31] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3339 pred = (piCur[32] + piRef[32] + bRound) >> 1; iTemp = piOrg[32] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3340 pred = (piCur[33] + piRef[33] + bRound) >> 1; iTemp = piOrg[33] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3341 pred = (piCur[34] + piRef[34] + bRound) >> 1; iTemp = piOrg[34] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3342 pred = (piCur[35] + piRef[35] + bRound) >> 1; iTemp = piOrg[35] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3343 pred = (piCur[36] + piRef[36] + bRound) >> 1; iTemp = piOrg[36] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3344 pred = (piCur[37] + piRef[37] + bRound) >> 1; iTemp = piOrg[37] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3345 pred = (piCur[38] + piRef[38] + bRound) >> 1; iTemp = piOrg[38] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3346 pred = (piCur[39] + piRef[39] + bRound) >> 1; iTemp = piOrg[39] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3347 3348 pred = (piCur[40] + piRef[40] + bRound) >> 1; iTemp = piOrg[40] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3349 pred = (piCur[41] + piRef[41] + bRound) >> 1; iTemp = piOrg[41] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3350 pred = (piCur[42] + piRef[42] + bRound) >> 1; iTemp = piOrg[42] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3351 pred = (piCur[43] + piRef[43] + bRound) >> 1; iTemp = piOrg[43] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3352 pred = (piCur[44] + piRef[44] + bRound) >> 1; iTemp = piOrg[44] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3353 pred = (piCur[45] + piRef[45] + bRound) >> 1; iTemp = piOrg[45] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3354 pred = (piCur[46] + piRef[46] + bRound) >> 1; iTemp = piOrg[46] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3355 pred = (piCur[47] + piRef[47] + bRound) >> 1; iTemp = piOrg[47] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3356 pred = (piCur[48] + piRef[48] + bRound) >> 1; iTemp = piOrg[48] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3357 pred = (piCur[49] + piRef[49] + bRound) >> 1; iTemp = piOrg[49] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3358 3359 pred = (piCur[50] + piRef[50] + bRound) >> 1; iTemp = piOrg[50] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3360 pred = (piCur[51] + piRef[51] + bRound) >> 1; iTemp = piOrg[51] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3361 pred = (piCur[52] + piRef[52] + bRound) >> 1; iTemp = piOrg[52] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3362 pred = (piCur[53] + piRef[53] + bRound) >> 1; iTemp = piOrg[53] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3363 pred = (piCur[54] + piRef[54] + bRound) >> 1; iTemp = piOrg[54] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3364 pred = (piCur[55] + piRef[55] + bRound) >> 1; iTemp = piOrg[55] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3365 pred = (piCur[56] + piRef[56] + bRound) >> 1; iTemp = piOrg[56] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3366 pred = (piCur[57] + piRef[57] + bRound) >> 1; iTemp = piOrg[57] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3367 pred = (piCur[58] + piRef[58] + bRound) >> 1; iTemp = piOrg[58] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3368 pred = (piCur[59] + piRef[59] + bRound) >> 1; iTemp = piOrg[59] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3369 3370 pred = (piCur[60] + piRef[60] + bRound) >> 1; iTemp = piOrg[60] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3371 pred = (piCur[61] + piRef[61] + bRound) >> 1; iTemp = piOrg[61] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3372 pred = (piCur[62] + piRef[62] + bRound) >> 1; iTemp = piOrg[62] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3373 pred = (piCur[63] + piRef[63] + bRound) >> 1; iTemp = piOrg[63] - pred; uiSum += ( iTemp * iTemp ) >> uiShift;3374 3375 piOrg += iStrideOrg;3376 piCur += iStrideCur;3377 piRef += iStrideRef;3378 }3379 3380 return ( uiSum );3381 }3382 #endif3383 3384 1377 UInt TComRdCost::xGetSSE( DistParam* pcDtParam ) 3385 1378 { 3386 #ifdef WEIGHT_PRED 3387 if ( pcDtParam->applyWeight ) 1379 if ( pcDtParam->bApplyWeight ) 3388 1380 { 3389 1381 return xGetSSEw( pcDtParam ); 3390 1382 } 3391 #endif3392 1383 Pel* piOrg = pcDtParam->pOrg; 3393 1384 Pel* piCur = pcDtParam->pCur; … … 3412 1403 { 3413 1404 if( piUsed[n] ) 3414 3415 3416 3417 1405 { 1406 iTemp = piOrg[n ] - piCur[n ]; 1407 uiSum += ( iTemp * iTemp ) >> uiShift; 1408 } 3418 1409 } 3419 1410 piOrg += iStrideOrg; … … 3435 1426 piCur += iStrideCur; 3436 1427 } 3437 #if HHI_INTERVIEW_SKIP1428 #if HHI_INTERVIEW_SKIP 3438 1429 } 3439 1430 #endif … … 3444 1435 UInt TComRdCost::xGetSSE4( DistParam* pcDtParam ) 3445 1436 { 3446 #ifdef WEIGHT_PRED 3447 if ( pcDtParam->applyWeight ) 1437 if ( pcDtParam->bApplyWeight ) 3448 1438 { 3449 1439 assert( pcDtParam->iCols == 4 ); 3450 1440 return xGetSSEw( pcDtParam ); 3451 1441 } 3452 #endif3453 1442 Pel* piOrg = pcDtParam->pOrg; 3454 1443 Pel* piCur = pcDtParam->pCur; … … 3502 1491 UInt TComRdCost::xGetSSE8( DistParam* pcDtParam ) 3503 1492 { 3504 #ifdef WEIGHT_PRED 3505 if ( pcDtParam->applyWeight ) 1493 if ( pcDtParam->bApplyWeight ) 3506 1494 { 3507 1495 assert( pcDtParam->iCols == 8 ); 3508 1496 return xGetSSEw( pcDtParam ); 3509 1497 } 3510 #endif3511 1498 Pel* piOrg = pcDtParam->pOrg; 3512 1499 Pel* piCur = pcDtParam->pCur; … … 3567 1554 UInt TComRdCost::xGetSSE16( DistParam* pcDtParam ) 3568 1555 { 3569 #ifdef WEIGHT_PRED 3570 if ( pcDtParam->applyWeight ) 1556 if ( pcDtParam->bApplyWeight ) 3571 1557 { 3572 1558 assert( pcDtParam->iCols == 16 ); 3573 1559 return xGetSSEw( pcDtParam ); 3574 1560 } 3575 #endif3576 1561 Pel* piOrg = pcDtParam->pOrg; 3577 1562 Pel* piCur = pcDtParam->pCur; … … 3649 1634 UInt TComRdCost::xGetSSE16N( DistParam* pcDtParam ) 3650 1635 { 3651 #ifdef WEIGHT_PRED 3652 if ( pcDtParam->applyWeight ) 1636 if ( pcDtParam->bApplyWeight ) 3653 1637 { 3654 1638 return xGetSSEw( pcDtParam ); 3655 1639 } 3656 #endif3657 1640 Pel* piOrg = pcDtParam->pOrg; 3658 1641 Pel* piCur = pcDtParam->pCur; … … 3735 1718 UInt TComRdCost::xGetSSE32( DistParam* pcDtParam ) 3736 1719 { 3737 #ifdef WEIGHT_PRED 3738 if ( pcDtParam->applyWeight ) 1720 if ( pcDtParam->bApplyWeight ) 3739 1721 { 3740 1722 assert( pcDtParam->iCols == 32 ); 3741 1723 return xGetSSEw( pcDtParam ); 3742 1724 } 3743 #endif3744 1725 Pel* piOrg = pcDtParam->pOrg; 3745 1726 Pel* piCur = pcDtParam->pCur; … … 3848 1829 UInt TComRdCost::xGetSSE64( DistParam* pcDtParam ) 3849 1830 { 3850 #ifdef WEIGHT_PRED 3851 if ( pcDtParam->applyWeight ) 1831 if ( pcDtParam->bApplyWeight ) 3852 1832 { 3853 1833 assert( pcDtParam->iCols == 64 ); 3854 1834 return xGetSSEw( pcDtParam ); 3855 1835 } 3856 #endif3857 1836 Pel* piOrg = pcDtParam->pOrg; 3858 1837 Pel* piCur = pcDtParam->pCur; … … 4027 2006 // -------------------------------------------------------------------------------------------------------------------- 4028 2007 4029 #ifdef ROUNDING_CONTROL_BIPRED 4030 4031 UInt TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep, Pel* pRefY, Int refYStride, Bool bRound ) 2008 UInt TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 4032 2009 { 4033 2010 Int satd = 0, diff[4], m[4]; 4034 Pel pred; 4035 4036 pred = ( (piCur[0] + pRefY[0] + bRound) >> 1); 4037 diff[0] = (piOrg[0 ] - pred) << 1; 4038 pred = ( (piCur[iStep] + pRefY[1] + bRound) >> 1); 4039 diff[1] = (piOrg[1 ] - pred) << 1; 4040 pred = ( (piCur[iStrideCur] + pRefY[refYStride] + bRound) >> 1); 4041 diff[2] = (piOrg[iStrideOrg ] - pred) << 1; 4042 pred = ( (piCur[iStep + iStrideCur] + pRefY[refYStride + 1] + bRound) >> 1); 4043 diff[3] = (piOrg[iStrideOrg + 1] - pred) << 1; 4044 2011 assert( iStep == 1 ); 2012 diff[0] = piOrg[0 ] - piCur[0]; 2013 diff[1] = piOrg[1 ] - piCur[1]; 2014 diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur]; 2015 diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur]; 4045 2016 m[0] = diff[0] + diff[2]; 4046 2017 m[1] = diff[1] + diff[3]; … … 4056 2027 } 4057 2028 4058 UInt TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep , Pel* pRefY, Int refYStride, Bool bRound)2029 UInt TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 4059 2030 { 4060 2031 Int k, satd = 0, diff[16], m[16], d[16]; 4061 Pel pred; 4062 Pel* piRef = pRefY; 4063 2032 2033 assert( iStep == 1 ); 4064 2034 for( k = 0; k < 16; k+=4 ) 4065 2035 { 4066 pred = ( (piCur[0*iStep] + piRef[0] + bRound) >> 1); 4067 diff[k+0] = (piOrg[0] - pred) << 1; 4068 pred = ( (piCur[1*iStep] + piRef[1] + bRound) >> 1); 4069 diff[k+1] = (piOrg[1] - pred) << 1; 4070 pred = ( (piCur[2*iStep] + piRef[2] + bRound) >> 1); 4071 diff[k+2] = (piOrg[2] - pred) << 1; 4072 pred = ( (piCur[3*iStep] + piRef[3] + bRound) >> 1); 4073 diff[k+3] = (piOrg[3] - pred) << 1; 4074 2036 diff[k+0] = piOrg[0] - piCur[0]; 2037 diff[k+1] = piOrg[1] - piCur[1]; 2038 diff[k+2] = piOrg[2] - piCur[2]; 2039 diff[k+3] = piOrg[3] - piCur[3]; 2040 4075 2041 piCur += iStrideCur; 4076 2042 piOrg += iStrideOrg; 4077 piRef += refYStride;4078 2043 } 4079 2044 … … 4156 2121 } 4157 2122 4158 UInt TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep , Pel* pRefY, Int refYStride, Bool bRound)2123 UInt TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 4159 2124 { 4160 2125 Int k, i, j, jj, sad=0; 4161 2126 Int diff[64], m1[8][8], m2[8][8], m3[8][8]; 4162 Pel pred; 4163 Pel* piRef = pRefY; 4164 Int iStep2 = iStep<<1; 4165 Int iStep3 = iStep2 + iStep; 4166 Int iStep4 = iStep3 + iStep; 4167 Int iStep5 = iStep4 + iStep; 4168 Int iStep6 = iStep5 + iStep; 4169 Int iStep7 = iStep6 + iStep; 4170 4171 for( k = 0; k < 64; k+=8 ) 4172 { 4173 pred = ( (piCur[0 ] + piRef[0] + bRound) >> 1 ); diff[k ] = (piOrg[0] - pred) << 1; 4174 pred = ( (piCur[iStep ] + piRef[1] + bRound) >> 1 ); diff[k+1] = (piOrg[1] - pred) << 1; 4175 pred = ( (piCur[iStep2] + piRef[2] + bRound) >> 1 ); diff[k+2] = (piOrg[2] - pred) << 1; 4176 pred = ( (piCur[iStep3] + piRef[3] + bRound) >> 1 ); diff[k+3] = (piOrg[3] - pred) << 1; 4177 pred = ( (piCur[iStep4] + piRef[4] + bRound) >> 1 ); diff[k+4] = (piOrg[4] - pred) << 1; 4178 pred = ( (piCur[iStep5] + piRef[5] + bRound) >> 1 ); diff[k+5] = (piOrg[5] - pred) << 1; 4179 pred = ( (piCur[iStep6] + piRef[6] + bRound) >> 1 ); diff[k+6] = (piOrg[6] - pred) << 1; 4180 pred = ( (piCur[iStep7] + piRef[7] + bRound) >> 1 ); diff[k+7] = (piOrg[7] - pred) << 1; 2127 assert( iStep == 1 ); 2128 for( k = 0; k < 64; k += 8 ) 2129 { 2130 diff[k+0] = piOrg[0] - piCur[0]; 2131 diff[k+1] = piOrg[1] - piCur[1]; 2132 diff[k+2] = piOrg[2] - piCur[2]; 2133 diff[k+3] = piOrg[3] - piCur[3]; 2134 diff[k+4] = piOrg[4] - piCur[4]; 2135 diff[k+5] = piOrg[5] - piCur[5]; 2136 diff[k+6] = piOrg[6] - piCur[6]; 2137 diff[k+7] = piOrg[7] - piCur[7]; 2138 4181 2139 piCur += iStrideCur; 4182 2140 piOrg += iStrideOrg; 4183 piRef += refYStride;4184 }2141 } 2142 4185 2143 //horizontal 4186 2144 for (j=0; j < 8; j++) … … 4245 2203 m2[7][i] = m1[6][i] - m1[7][i]; 4246 2204 } 4247 for (j=0; j < 8; j++) 4248 for (i=0; i < 8; i++) 4249 sad += (abs(m2[j][i])); 2205 2206 for (i = 0; i < 8; i++) 2207 { 2208 for (j = 0; j < 8; j++) 2209 { 2210 sad += abs(m2[i][j]); 2211 } 2212 } 4250 2213 4251 2214 sad=((sad+2)>>2); … … 4254 2217 } 4255 2218 4256 UInt TComRdCost::xGetHADs4( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 4257 { 4258 Pel* piOrg = pcDtParam->pOrg; 4259 Pel* piCur = pcDtParam->pCur; 4260 Pel* piRef = pRefY; 4261 Int iRows = pcDtParam->iRows; 4262 Int iStrideCur = pcDtParam->iStrideCur; 4263 Int iStrideOrg = pcDtParam->iStrideOrg; 4264 Int iStep = pcDtParam->iStep; 4265 Int y; 4266 Int iOffsetOrg = iStrideOrg<<2; 4267 Int iOffsetCur = iStrideCur<<2; 4268 4269 UInt uiSum = 0; 4270 4271 for ( y=0; y<iRows; y+= 4 ) 4272 { 4273 uiSum += xCalcHADs4x4( piOrg, piCur, iStrideOrg, iStrideCur, iStep, piRef, pcDtParam->iCols, bRound ); 4274 piOrg += iOffsetOrg; 4275 piCur += iOffsetCur; 4276 piRef += (pcDtParam->iCols << 2); 4277 } 4278 4279 return ( uiSum >> g_uiBitIncrement ); 4280 } 4281 4282 UInt TComRdCost::xGetHADs8( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 4283 { 4284 Pel* piOrg = pcDtParam->pOrg; 4285 Pel* piCur = pcDtParam->pCur; 4286 Pel* piRef = pRefY; 4287 Int iRows = pcDtParam->iRows; 4288 Int iStrideCur = pcDtParam->iStrideCur; 4289 Int iStrideOrg = pcDtParam->iStrideOrg; 4290 Int iStep = pcDtParam->iStep; 4291 Int y; 4292 4293 UInt uiSum = 0; 4294 4295 if ( iRows == 4 ) // 8x4 case 4296 { 4297 uiSum += xCalcHADs4x4( piOrg+0, piCur , iStrideOrg, iStrideCur, iStep, piRef, pcDtParam->iCols, bRound ); 4298 uiSum += xCalcHADs4x4( piOrg+4, piCur+4*iStep, iStrideOrg, iStrideCur, iStep, piRef+4, pcDtParam->iCols, bRound ); 4299 } 4300 else 4301 { 4302 Int iOffsetOrg = iStrideOrg<<3; 4303 Int iOffsetCur = iStrideCur<<3; 4304 for ( y=0; y<iRows; y+= 8 ) 4305 { 4306 uiSum += xCalcHADs8x8( piOrg, piCur, iStrideOrg, iStrideCur, iStep, piRef, pcDtParam->iCols, bRound ); 4307 piOrg += iOffsetOrg; 4308 piCur += iOffsetCur; 4309 piRef += (pcDtParam->iCols << 3); 4310 } 4311 } 4312 4313 return ( uiSum >> g_uiBitIncrement ); 4314 } 4315 4316 UInt TComRdCost::xGetHADs( DistParam* pcDtParam, Pel* pRefY, Bool bRound ) 4317 { 4318 Pel* piOrg = pcDtParam->pOrg; 4319 Pel* piCur = pcDtParam->pCur; 4320 Pel* piRef = pRefY; 4321 Int iRows = pcDtParam->iRows; 4322 Int iCols = pcDtParam->iCols; 4323 Int iStrideCur = pcDtParam->iStrideCur; 4324 Int iStrideOrg = pcDtParam->iStrideOrg; 4325 Int iStep = pcDtParam->iStep; 4326 4327 Int x, y; 4328 4329 UInt uiSum = 0; 4330 4331 if( ( iRows % 8 == 0) && (iCols % 8 == 0) ) 4332 { 4333 Int iOffsetOrg = iStrideOrg<<3; 4334 Int iOffsetCur = iStrideCur<<3; 4335 for ( y=0; y<iRows; y+= 8 ) 4336 { 4337 for ( x=0; x<iCols; x+= 8 ) // do HAD over 8xiCols pixels 4338 { 4339 uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep, &piRef[x], iCols, bRound ); 4340 } 4341 piOrg += iOffsetOrg; 4342 piCur += iOffsetCur; 4343 piRef += (iCols << 3); 4344 } 4345 } 4346 else if( ( iRows % 4 == 0) && (iCols % 4 == 0) ) 4347 { 4348 Int iOffsetOrg = iStrideOrg<<2; 4349 Int iOffsetCur = iStrideCur<<2; 4350 4351 for ( y=0; y<iRows; y+= 4 ) 4352 { 4353 for ( x=0; x<iCols; x+= 4 ) // do HAD over 4xiCols pixels 4354 { 4355 uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep, &piRef[x], iCols, bRound ); 4356 } 4357 piOrg += iOffsetOrg; 4358 piCur += iOffsetCur; 4359 piRef += (iCols << 2); 4360 } 4361 } 4362 else 4363 { 4364 for ( y=0; y<iRows; y+=2 ) 4365 { 4366 for ( x=0; x<iCols; x+=2 )// do HAD over 2xiCols pixels 4367 { 4368 uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep, &piRef[x], iCols, bRound ); 4369 } 4370 piOrg += iStrideOrg; // should this be (iStrideOrg << 1) ? 4371 piCur += iStrideCur; 4372 piRef += iCols; 4373 } 4374 } 4375 4376 return ( uiSum >> g_uiBitIncrement ); 4377 } 4378 4379 #endif 4380 4381 UInt TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 4382 { 4383 Int satd = 0, diff[4], m[4]; 4384 diff[0] = piOrg[0 ] - piCur[0*iStep]; 4385 diff[1] = piOrg[1 ] - piCur[1*iStep]; 4386 diff[2] = piOrg[iStrideOrg ] - piCur[0*iStep + iStrideCur]; 4387 diff[3] = piOrg[iStrideOrg + 1] - piCur[1*iStep + iStrideCur]; 4388 4389 m[0] = diff[0] + diff[2]; 4390 m[1] = diff[1] + diff[3]; 4391 m[2] = diff[0] - diff[2]; 4392 m[3] = diff[1] - diff[3]; 4393 4394 satd += abs(m[0] + m[1]); 4395 satd += abs(m[0] - m[1]); 4396 satd += abs(m[2] + m[3]); 4397 satd += abs(m[2] - m[3]); 4398 4399 return satd; 4400 } 4401 4402 UInt TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 4403 { 4404 Int k, satd = 0, diff[16], m[16], d[16]; 4405 4406 for( k = 0; k < 16; k+=4 ) 4407 { 4408 diff[k+0] = piOrg[0] - piCur[0*iStep]; 4409 diff[k+1] = piOrg[1] - piCur[1*iStep]; 4410 diff[k+2] = piOrg[2] - piCur[2*iStep]; 4411 diff[k+3] = piOrg[3] - piCur[3*iStep]; 4412 2219 #if NS_HAD 2220 UInt TComRdCost::xCalcHADs16x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 2221 { 2222 Int k, i, j, jj, sad=0; 2223 Int diff[64], m1[4][16], m2[4][16]; 2224 assert( iStep == 1 ); 2225 for( k = 0; k < 64; k += 16 ) 2226 { 2227 diff[k+0] = piOrg[0] - piCur[0]; 2228 diff[k+1] = piOrg[1] - piCur[1]; 2229 diff[k+2] = piOrg[2] - piCur[2]; 2230 diff[k+3] = piOrg[3] - piCur[3]; 2231 diff[k+4] = piOrg[4] - piCur[4]; 2232 diff[k+5] = piOrg[5] - piCur[5]; 2233 diff[k+6] = piOrg[6] - piCur[6]; 2234 diff[k+7] = piOrg[7] - piCur[7]; 2235 2236 diff[k+8] = piOrg[8] - piCur[8] ; 2237 diff[k+9] = piOrg[9] - piCur[9] ; 2238 diff[k+10] = piOrg[10] - piCur[10]; 2239 diff[k+11] = piOrg[11] - piCur[11]; 2240 diff[k+12] = piOrg[12] - piCur[12]; 2241 diff[k+13] = piOrg[13] - piCur[13]; 2242 diff[k+14] = piOrg[14] - piCur[14]; 2243 diff[k+15] = piOrg[15] - piCur[15]; 2244 4413 2245 piCur += iStrideCur; 4414 2246 piOrg += iStrideOrg; 4415 2247 } 4416 4417 /*===== hadamard transform =====*/ 4418 m[ 0] = diff[ 0] + diff[12]; 4419 m[ 1] = diff[ 1] + diff[13]; 4420 m[ 2] = diff[ 2] + diff[14]; 4421 m[ 3] = diff[ 3] + diff[15]; 4422 m[ 4] = diff[ 4] + diff[ 8]; 4423 m[ 5] = diff[ 5] + diff[ 9]; 4424 m[ 6] = diff[ 6] + diff[10]; 4425 m[ 7] = diff[ 7] + diff[11]; 4426 m[ 8] = diff[ 4] - diff[ 8]; 4427 m[ 9] = diff[ 5] - diff[ 9]; 4428 m[10] = diff[ 6] - diff[10]; 4429 m[11] = diff[ 7] - diff[11]; 4430 m[12] = diff[ 0] - diff[12]; 4431 m[13] = diff[ 1] - diff[13]; 4432 m[14] = diff[ 2] - diff[14]; 4433 m[15] = diff[ 3] - diff[15]; 4434 4435 d[ 0] = m[ 0] + m[ 4]; 4436 d[ 1] = m[ 1] + m[ 5]; 4437 d[ 2] = m[ 2] + m[ 6]; 4438 d[ 3] = m[ 3] + m[ 7]; 4439 d[ 4] = m[ 8] + m[12]; 4440 d[ 5] = m[ 9] + m[13]; 4441 d[ 6] = m[10] + m[14]; 4442 d[ 7] = m[11] + m[15]; 4443 d[ 8] = m[ 0] - m[ 4]; 4444 d[ 9] = m[ 1] - m[ 5]; 4445 d[10] = m[ 2] - m[ 6]; 4446 d[11] = m[ 3] - m[ 7]; 4447 d[12] = m[12] - m[ 8]; 4448 d[13] = m[13] - m[ 9]; 4449 d[14] = m[14] - m[10]; 4450 d[15] = m[15] - m[11]; 4451 4452 m[ 0] = d[ 0] + d[ 3]; 4453 m[ 1] = d[ 1] + d[ 2]; 4454 m[ 2] = d[ 1] - d[ 2]; 4455 m[ 3] = d[ 0] - d[ 3]; 4456 m[ 4] = d[ 4] + d[ 7]; 4457 m[ 5] = d[ 5] + d[ 6]; 4458 m[ 6] = d[ 5] - d[ 6]; 4459 m[ 7] = d[ 4] - d[ 7]; 4460 m[ 8] = d[ 8] + d[11]; 4461 m[ 9] = d[ 9] + d[10]; 4462 m[10] = d[ 9] - d[10]; 4463 m[11] = d[ 8] - d[11]; 4464 m[12] = d[12] + d[15]; 4465 m[13] = d[13] + d[14]; 4466 m[14] = d[13] - d[14]; 4467 m[15] = d[12] - d[15]; 4468 4469 d[ 0] = m[ 0] + m[ 1]; 4470 d[ 1] = m[ 0] - m[ 1]; 4471 d[ 2] = m[ 2] + m[ 3]; 4472 d[ 3] = m[ 3] - m[ 2]; 4473 d[ 4] = m[ 4] + m[ 5]; 4474 d[ 5] = m[ 4] - m[ 5]; 4475 d[ 6] = m[ 6] + m[ 7]; 4476 d[ 7] = m[ 7] - m[ 6]; 4477 d[ 8] = m[ 8] + m[ 9]; 4478 d[ 9] = m[ 8] - m[ 9]; 4479 d[10] = m[10] + m[11]; 4480 d[11] = m[11] - m[10]; 4481 d[12] = m[12] + m[13]; 4482 d[13] = m[12] - m[13]; 4483 d[14] = m[14] + m[15]; 4484 d[15] = m[15] - m[14]; 4485 4486 for (k=0; k<16; ++k) 4487 { 4488 satd += abs(d[k]); 4489 } 4490 satd = ((satd+1)>>1); 4491 4492 return satd; 4493 } 4494 4495 UInt TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 2248 2249 //horizontal 2250 for (j=0; j < 4; j++) 2251 { 2252 jj = j << 4; 2253 2254 m2[j][0] = diff[jj ] + diff[jj+8]; 2255 m2[j][1] = diff[jj+1] + diff[jj+9]; 2256 m2[j][2] = diff[jj+2] + diff[jj+10]; 2257 m2[j][3] = diff[jj+3] + diff[jj+11]; 2258 m2[j][4] = diff[jj+4] + diff[jj+12]; 2259 m2[j][5] = diff[jj+5] + diff[jj+13]; 2260 m2[j][6] = diff[jj+6] + diff[jj+14]; 2261 m2[j][7] = diff[jj+7] + diff[jj+15]; 2262 m2[j][8] = diff[jj ] - diff[jj+8]; 2263 m2[j][9] = diff[jj+1] - diff[jj+9]; 2264 m2[j][10] = diff[jj+2] - diff[jj+10]; 2265 m2[j][11] = diff[jj+3] - diff[jj+11]; 2266 m2[j][12] = diff[jj+4] - diff[jj+12]; 2267 m2[j][13] = diff[jj+5] - diff[jj+13]; 2268 m2[j][14] = diff[jj+6] - diff[jj+14]; 2269 m2[j][15] = diff[jj+7] - diff[jj+15]; 2270 2271 m1[j][0] = m2[j][0] + m2[j][4]; 2272 m1[j][1] = m2[j][1] + m2[j][5]; 2273 m1[j][2] = m2[j][2] + m2[j][6]; 2274 m1[j][3] = m2[j][3] + m2[j][7]; 2275 m1[j][4] = m2[j][0] - m2[j][4]; 2276 m1[j][5] = m2[j][1] - m2[j][5]; 2277 m1[j][6] = m2[j][2] - m2[j][6]; 2278 m1[j][7] = m2[j][3] - m2[j][7]; 2279 m1[j][8] = m2[j][8] + m2[j][12]; 2280 m1[j][9] = m2[j][9] + m2[j][13]; 2281 m1[j][10] = m2[j][10] + m2[j][14]; 2282 m1[j][11] = m2[j][11] + m2[j][15]; 2283 m1[j][12] = m2[j][8] - m2[j][12]; 2284 m1[j][13] = m2[j][9] - m2[j][13]; 2285 m1[j][14] = m2[j][10] - m2[j][14]; 2286 m1[j][15] = m2[j][11] - m2[j][15]; 2287 2288 m2[j][0] = m1[j][0] + m1[j][2]; 2289 m2[j][1] = m1[j][1] + m1[j][3]; 2290 m2[j][2] = m1[j][0] - m1[j][2]; 2291 m2[j][3] = m1[j][1] - m1[j][3]; 2292 m2[j][4] = m1[j][4] + m1[j][6]; 2293 m2[j][5] = m1[j][5] + m1[j][7]; 2294 m2[j][6] = m1[j][4] - m1[j][6]; 2295 m2[j][7] = m1[j][5] - m1[j][7]; 2296 m2[j][8] = m1[j][8] + m1[j][10]; 2297 m2[j][9] = m1[j][9] + m1[j][11]; 2298 m2[j][10] = m1[j][8] - m1[j][10]; 2299 m2[j][11] = m1[j][9] - m1[j][11]; 2300 m2[j][12] = m1[j][12] + m1[j][14]; 2301 m2[j][13] = m1[j][13] + m1[j][15]; 2302 m2[j][14] = m1[j][12] - m1[j][14]; 2303 m2[j][15] = m1[j][13] - m1[j][15]; 2304 2305 m1[j][0] = m2[j][0] + m2[j][1]; 2306 m1[j][1] = m2[j][0] - m2[j][1]; 2307 m1[j][2] = m2[j][2] + m2[j][3]; 2308 m1[j][3] = m2[j][2] - m2[j][3]; 2309 m1[j][4] = m2[j][4] + m2[j][5]; 2310 m1[j][5] = m2[j][4] - m2[j][5]; 2311 m1[j][6] = m2[j][6] + m2[j][7]; 2312 m1[j][7] = m2[j][6] - m2[j][7]; 2313 m1[j][8] = m2[j][8] + m2[j][9]; 2314 m1[j][9] = m2[j][8] - m2[j][9]; 2315 m1[j][10] = m2[j][10] + m2[j][11]; 2316 m1[j][11] = m2[j][10] - m2[j][11]; 2317 m1[j][12] = m2[j][12] + m2[j][13]; 2318 m1[j][13] = m2[j][12] - m2[j][13]; 2319 m1[j][14] = m2[j][14] + m2[j][15]; 2320 m1[j][15] = m2[j][14] - m2[j][15]; 2321 } 2322 2323 //vertical 2324 for (i=0; i < 16; i++) 2325 { 2326 m2[0][i] = m1[0][i] + m1[2][i]; 2327 m2[1][i] = m1[1][i] + m1[3][i]; 2328 m2[2][i] = m1[0][i] - m1[2][i]; 2329 m2[3][i] = m1[1][i] - m1[3][i]; 2330 2331 m1[0][i] = m2[0][i] + m2[1][i]; 2332 m1[1][i] = m2[0][i] - m2[1][i]; 2333 m1[2][i] = m2[2][i] + m2[3][i]; 2334 m1[3][i] = m2[2][i] - m2[3][i]; 2335 } 2336 2337 for (i = 0; i < 4; i++) 2338 { 2339 for (j = 0; j < 16; j++) 2340 { 2341 sad += abs(m1[i][j]); 2342 } 2343 } 2344 2345 sad=((sad+2)>>2); 2346 2347 return sad; 2348 } 2349 2350 UInt TComRdCost::xCalcHADs4x16( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 4496 2351 { 4497 2352 Int k, i, j, jj, sad=0; 4498 Int diff[64], m1[8][8], m2[8][8], m3[8][8]; 4499 Int iStep2 = iStep<<1; 4500 Int iStep3 = iStep2 + iStep; 4501 Int iStep4 = iStep3 + iStep; 4502 Int iStep5 = iStep4 + iStep; 4503 Int iStep6 = iStep5 + iStep; 4504 Int iStep7 = iStep6 + iStep; 4505 4506 for( k = 0; k < 64; k+=8 ) 4507 { 4508 diff[k+0] = piOrg[0] - piCur[ 0]; 4509 diff[k+1] = piOrg[1] - piCur[iStep ]; 4510 diff[k+2] = piOrg[2] - piCur[iStep2]; 4511 diff[k+3] = piOrg[3] - piCur[iStep3]; 4512 diff[k+4] = piOrg[4] - piCur[iStep4]; 4513 diff[k+5] = piOrg[5] - piCur[iStep5]; 4514 diff[k+6] = piOrg[6] - piCur[iStep6]; 4515 diff[k+7] = piOrg[7] - piCur[iStep7]; 4516 2353 Int diff[64], m1[16][4], m2[16][4], m3[16][4]; 2354 assert( iStep == 1 ); 2355 for( k = 0; k < 64; k += 4 ) 2356 { 2357 diff[k+0] = piOrg[0] - piCur[0]; 2358 diff[k+1] = piOrg[1] - piCur[1]; 2359 diff[k+2] = piOrg[2] - piCur[2]; 2360 diff[k+3] = piOrg[3] - piCur[3]; 2361 4517 2362 piCur += iStrideCur; 4518 2363 piOrg += iStrideOrg; 4519 2364 } 4520 2365 4521 2366 //horizontal 4522 for (j=0; j < 8; j++) 4523 { 4524 jj = j << 3; 4525 m2[j][0] = diff[jj ] + diff[jj+4]; 4526 m2[j][1] = diff[jj+1] + diff[jj+5]; 4527 m2[j][2] = diff[jj+2] + diff[jj+6]; 4528 m2[j][3] = diff[jj+3] + diff[jj+7]; 4529 m2[j][4] = diff[jj ] - diff[jj+4]; 4530 m2[j][5] = diff[jj+1] - diff[jj+5]; 4531 m2[j][6] = diff[jj+2] - diff[jj+6]; 4532 m2[j][7] = diff[jj+3] - diff[jj+7]; 4533 4534 m1[j][0] = m2[j][0] + m2[j][2]; 4535 m1[j][1] = m2[j][1] + m2[j][3]; 4536 m1[j][2] = m2[j][0] - m2[j][2]; 4537 m1[j][3] = m2[j][1] - m2[j][3]; 4538 m1[j][4] = m2[j][4] + m2[j][6]; 4539 m1[j][5] = m2[j][5] + m2[j][7]; 4540 m1[j][6] = m2[j][4] - m2[j][6]; 4541 m1[j][7] = m2[j][5] - m2[j][7]; 4542 4543 m2[j][0] = m1[j][0] + m1[j][1]; 4544 m2[j][1] = m1[j][0] - m1[j][1]; 4545 m2[j][2] = m1[j][2] + m1[j][3]; 4546 m2[j][3] = m1[j][2] - m1[j][3]; 4547 m2[j][4] = m1[j][4] + m1[j][5]; 4548 m2[j][5] = m1[j][4] - m1[j][5]; 4549 m2[j][6] = m1[j][6] + m1[j][7]; 4550 m2[j][7] = m1[j][6] - m1[j][7]; 4551 } 4552 2367 for (j=0; j < 16; j++) 2368 { 2369 jj = j << 2; 2370 m2[j][0] = diff[jj ] + diff[jj+2]; 2371 m2[j][1] = diff[jj+1] + diff[jj+3]; 2372 m2[j][2] = diff[jj ] - diff[jj+2]; 2373 m2[j][3] = diff[jj+1] - diff[jj+3]; 2374 2375 m1[j][0] = m2[j][0] + m2[j][1]; 2376 m1[j][1] = m2[j][0] - m2[j][1]; 2377 m1[j][2] = m2[j][2] + m2[j][3]; 2378 m1[j][3] = m2[j][2] - m2[j][3]; 2379 } 2380 4553 2381 //vertical 4554 for (i=0; i < 8; i++) 4555 { 4556 m3[0][i] = m2[0][i] + m2[4][i]; 4557 m3[1][i] = m2[1][i] + m2[5][i]; 4558 m3[2][i] = m2[2][i] + m2[6][i]; 4559 m3[3][i] = m2[3][i] + m2[7][i]; 4560 m3[4][i] = m2[0][i] - m2[4][i]; 4561 m3[5][i] = m2[1][i] - m2[5][i]; 4562 m3[6][i] = m2[2][i] - m2[6][i]; 4563 m3[7][i] = m2[3][i] - m2[7][i]; 4564 4565 m1[0][i] = m3[0][i] + m3[2][i]; 4566 m1[1][i] = m3[1][i] + m3[3][i]; 4567 m1[2][i] = m3[0][i] - m3[2][i]; 4568 m1[3][i] = m3[1][i] - m3[3][i]; 4569 m1[4][i] = m3[4][i] + m3[6][i]; 4570 m1[5][i] = m3[5][i] + m3[7][i]; 4571 m1[6][i] = m3[4][i] - m3[6][i]; 4572 m1[7][i] = m3[5][i] - m3[7][i]; 4573 4574 m2[0][i] = m1[0][i] + m1[1][i]; 4575 m2[1][i] = m1[0][i] - m1[1][i]; 4576 m2[2][i] = m1[2][i] + m1[3][i]; 4577 m2[3][i] = m1[2][i] - m1[3][i]; 4578 m2[4][i] = m1[4][i] + m1[5][i]; 4579 m2[5][i] = m1[4][i] - m1[5][i]; 4580 m2[6][i] = m1[6][i] + m1[7][i]; 4581 m2[7][i] = m1[6][i] - m1[7][i]; 4582 } 4583 4584 for (j=0; j < 8; j++) 4585 { 4586 for (i=0; i < 8; i++) 4587 sad += (abs(m2[j][i])); 4588 } 4589 2382 for (i=0; i < 4; i++) 2383 { 2384 m2[0][i] = m1[0][i] + m1[8][i]; 2385 m2[1][i] = m1[1][i] + m1[9][i]; 2386 m2[2][i] = m1[2][i] + m1[10][i]; 2387 m2[3][i] = m1[3][i] + m1[11][i]; 2388 m2[4][i] = m1[4][i] + m1[12][i]; 2389 m2[5][i] = m1[5][i] + m1[13][i]; 2390 m2[6][i] = m1[6][i] + m1[14][i]; 2391 m2[7][i] = m1[7][i] + m1[15][i]; 2392 m2[8][i] = m1[0][i] - m1[8][i]; 2393 m2[9][i] = m1[1][i] - m1[9][i]; 2394 m2[10][i] = m1[2][i] - m1[10][i]; 2395 m2[11][i] = m1[3][i] - m1[11][i]; 2396 m2[12][i] = m1[4][i] - m1[12][i]; 2397 m2[13][i] = m1[5][i] - m1[13][i]; 2398 m2[14][i] = m1[6][i] - m1[14][i]; 2399 m2[15][i] = m1[7][i] - m1[15][i]; 2400 2401 m3[0][i] = m2[0][i] + m2[4][i]; 2402 m3[1][i] = m2[1][i] + m2[5][i]; 2403 m3[2][i] = m2[2][i] + m2[6][i]; 2404 m3[3][i] = m2[3][i] + m2[7][i]; 2405 m3[4][i] = m2[0][i] - m2[4][i]; 2406 m3[5][i] = m2[1][i] - m2[5][i]; 2407 m3[6][i] = m2[2][i] - m2[6][i]; 2408 m3[7][i] = m2[3][i] - m2[7][i]; 2409 m3[8][i] = m2[8][i] + m2[12][i]; 2410 m3[9][i] = m2[9][i] + m2[13][i]; 2411 m3[10][i] = m2[10][i] + m2[14][i]; 2412 m3[11][i] = m2[11][i] + m2[15][i]; 2413 m3[12][i] = m2[8][i] - m2[12][i]; 2414 m3[13][i] = m2[9][i] - m2[13][i]; 2415 m3[14][i] = m2[10][i] - m2[14][i]; 2416 m3[15][i] = m2[11][i] - m2[15][i]; 2417 2418 m1[0][i] = m3[0][i] + m3[2][i]; 2419 m1[1][i] = m3[1][i] + m3[3][i]; 2420 m1[2][i] = m3[0][i] - m3[2][i]; 2421 m1[3][i] = m3[1][i] - m3[3][i]; 2422 m1[4][i] = m3[4][i] + m3[6][i]; 2423 m1[5][i] = m3[5][i] + m3[7][i]; 2424 m1[6][i] = m3[4][i] - m3[6][i]; 2425 m1[7][i] = m3[5][i] - m3[7][i]; 2426 m1[8][i] = m3[8][i] + m3[10][i]; 2427 m1[9][i] = m3[9][i] + m3[11][i]; 2428 m1[10][i] = m3[8][i] - m3[10][i]; 2429 m1[11][i] = m3[9][i] - m3[11][i]; 2430 m1[12][i] = m3[12][i] + m3[14][i]; 2431 m1[13][i] = m3[13][i] + m3[15][i]; 2432 m1[14][i] = m3[12][i] - m3[14][i]; 2433 m1[15][i] = m3[13][i] - m3[15][i]; 2434 2435 m2[0][i] = m1[0][i] + m1[1][i]; 2436 m2[1][i] = m1[0][i] - m1[1][i]; 2437 m2[2][i] = m1[2][i] + m1[3][i]; 2438 m2[3][i] = m1[2][i] - m1[3][i]; 2439 m2[4][i] = m1[4][i] + m1[5][i]; 2440 m2[5][i] = m1[4][i] - m1[5][i]; 2441 m2[6][i] = m1[6][i] + m1[7][i]; 2442 m2[7][i] = m1[6][i] - m1[7][i]; 2443 m2[8][i] = m1[8][i] + m1[9][i]; 2444 m2[9][i] = m1[8][i] - m1[9][i]; 2445 m2[10][i] = m1[10][i] + m1[11][i]; 2446 m2[11][i] = m1[10][i] - m1[11][i]; 2447 m2[12][i] = m1[12][i] + m1[13][i]; 2448 m2[13][i] = m1[12][i] - m1[13][i]; 2449 m2[14][i] = m1[14][i] + m1[15][i]; 2450 m2[15][i] = m1[14][i] - m1[15][i]; 2451 } 2452 2453 for (i = 0; i < 16; i++) 2454 { 2455 for (j = 0; j < 4; j++) 2456 { 2457 sad += abs(m2[i][j]); 2458 } 2459 } 2460 4590 2461 sad=((sad+2)>>2); 4591 2462 4592 2463 return sad; 4593 2464 } 2465 #endif 4594 2466 4595 2467 UInt TComRdCost::xGetHADs4( DistParam* pcDtParam ) 4596 2468 { 4597 #ifdef WEIGHT_PRED 4598 if ( pcDtParam->applyWeight ) 2469 if ( pcDtParam->bApplyWeight ) 4599 2470 { 4600 2471 return xGetHADs4w( pcDtParam ); 4601 2472 } 4602 #endif4603 2473 Pel* piOrg = pcDtParam->pOrg; 4604 2474 Pel* piCur = pcDtParam->pCur; … … 4625 2495 UInt TComRdCost::xGetHADs8( DistParam* pcDtParam ) 4626 2496 { 4627 #ifdef WEIGHT_PRED 4628 if ( pcDtParam->applyWeight ) 2497 if ( pcDtParam->bApplyWeight ) 4629 2498 { 4630 2499 return xGetHADs8w( pcDtParam ); 4631 2500 } 4632 #endif4633 2501 Pel* piOrg = pcDtParam->pOrg; 4634 2502 Pel* piCur = pcDtParam->pCur; … … 4663 2531 UInt TComRdCost::xGetHADs( DistParam* pcDtParam ) 4664 2532 { 4665 #ifdef WEIGHT_PRED 4666 if ( pcDtParam->applyWeight ) 2533 if ( pcDtParam->bApplyWeight ) 4667 2534 { 4668 2535 return xGetHADsw( pcDtParam ); 4669 2536 } 4670 #endif4671 2537 Pel* piOrg = pcDtParam->pOrg; 4672 2538 Pel* piCur = pcDtParam->pCur; … … 4681 2547 UInt uiSum = 0; 4682 2548 2549 #if NS_HAD 2550 if( ( ( iRows % 8 == 0) && (iCols % 8 == 0) && ( iRows == iCols ) ) || ( ( iRows % 8 == 0 ) && (iCols % 8 == 0) && !pcDtParam->bUseNSHAD ) ) 2551 #else 4683 2552 if( ( iRows % 8 == 0) && (iCols % 8 == 0) ) 2553 #endif 4684 2554 { 4685 2555 Int iOffsetOrg = iStrideOrg<<3; … … 4695 2565 } 4696 2566 } 2567 #if NS_HAD 2568 else if ( ( iCols > 8 ) && ( iCols > iRows ) && pcDtParam->bUseNSHAD ) 2569 { 2570 Int iOffsetOrg = iStrideOrg<<2; 2571 Int iOffsetCur = iStrideCur<<2; 2572 for ( y=0; y<iRows; y+= 4 ) 2573 { 2574 for ( x=0; x<iCols; x+= 16 ) 2575 { 2576 uiSum += xCalcHADs16x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); 2577 } 2578 piOrg += iOffsetOrg; 2579 piCur += iOffsetCur; 2580 } 2581 } 2582 else if ( ( iRows > 8 ) && ( iCols < iRows ) && pcDtParam->bUseNSHAD ) 2583 { 2584 Int iOffsetOrg = iStrideOrg<<4; 2585 Int iOffsetCur = iStrideCur<<4; 2586 for ( y=0; y<iRows; y+= 16 ) 2587 { 2588 for ( x=0; x<iCols; x+= 4 ) 2589 { 2590 uiSum += xCalcHADs4x16( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); 2591 } 2592 piOrg += iOffsetOrg; 2593 piCur += iOffsetCur; 2594 } 2595 } 2596 #endif 4697 2597 else if( ( iRows % 4 == 0) && (iCols % 4 == 0) ) 4698 2598 { … … 4710 2610 } 4711 2611 } 4712 #ifdef DCM_RDCOST_TEMP_FIX //Temporary fix since row size can be 1 or 3 for chroma (such a case does not occur under current encoder settings)4713 2612 else if( ( iRows % 2 == 0) && (iCols % 2 == 0) ) 4714 2613 { 4715 2614 Int iOffsetOrg = iStrideOrg<<1; 4716 2615 Int iOffsetCur = iStrideCur<<1; 4717 #else4718 else4719 {4720 #endif4721 2616 for ( y=0; y<iRows; y+=2 ) 4722 2617 { … … 4725 2620 uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); 4726 2621 } 4727 #ifdef DCM_RDCOST_TEMP_FIX //Temporary fix since we need to increment by 2*iStride instead of iStride4728 2622 piOrg += iOffsetOrg; 4729 2623 piCur += iOffsetCur; 4730 #else 4731 piOrg += iStrideOrg; 4732 piCur += iStrideCur; 4733 #endif 4734 } 4735 } 4736 #ifdef DCM_RDCOST_TEMP_FIX //Temporary fix to return MAX_UINT until this case is properly handled 2624 } 2625 } 4737 2626 else 4738 2627 { 4739 printf("xGetHADs not supported for this dimension. Skipping computation of HAD and returning MAX_UINT\n"); 4740 return (MAX_UINT); 4741 } 4742 #endif 2628 assert(false); 2629 } 4743 2630 4744 2631 return ( uiSum >> g_uiBitIncrement ); 4745 2632 } 4746 4747 2633 4748 2634 #if HHI_VSO … … 4783 2669 }; 4784 2670 4785 Void TComRdCost::setRefDataFromMVDInfo( TComMVDRefData* pRefInfo )4786 {4787 if ( m_apRefPics != NULL )4788 {4789 delete[] m_apRefPics;4790 m_apRefPics = NULL;4791 };4792 4793 if ( m_paaiShiftLUTs != NULL )4794 { // Delete only first dimension, other dimension are not create in this class4795 delete[] m_paaiShiftLUTs;4796 m_paaiShiftLUTs = NULL;4797 };4798 4799 4800 m_uiNumberRefPics = ( m_uiVSOMode == 1 ) ? 3 : pRefInfo->getNumOfRefViews();4801 m_apRefPics = new TComPicYuv*[ m_uiNumberRefPics ];4802 m_paaiShiftLUTs = new Int**[ m_uiNumberRefPics ];4803 4804 if ( m_uiVSOMode == 1 )4805 {4806 pRefInfo->getRefPicYuvAndLUTMode1(m_apRefPics, m_paaiShiftLUTs);4807 }4808 else4809 {4810 pRefInfo->getRefPicYuvAndLUT(m_apRefPics, m_paaiShiftLUTs);4811 }4812 m_pcVideoPicYuv = pRefInfo->getPicYuvVideo();4813 }4814 2671 4815 2672 Void TComRdCost::setVSOMode( UInt uiIn ) … … 4830 2687 Double TComRdCost::calcRdCostVSO( UInt uiBits, Dist uiDistortion, Bool bFlag, DFunc eDFunc ) 4831 2688 { 4832 assert( m_bUseLambdaScaleVSO ); 2689 assert( m_bUseLambdaScaleVSO ); 4833 2690 4834 2691 Double dRdCost = 0.0; 4835 Double dLambda = 0.0; 2692 Double dLambda = 0.0; 4836 2693 4837 2694 switch ( eDFunc ) … … 4895 2752 4896 2753 #endif 2754 //! \}
Note: See TracChangeset for help on using the changeset viewer.