HEVC Test Model (HM)  HM-16.3
TComRdCost.cpp
Go to the documentation of this file.
1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2015, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  * this list of conditions and the following disclaimer in the documentation
16  * and/or other materials provided with the distribution.
17  * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  * be used to endorse or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
38 #include <math.h>
39 #include <assert.h>
40 #include "TComRom.h"
41 #include "TComRdCost.h"
42 
45 
47 {
48  init();
49 }
50 
52 {
53 }
54 
55 // Calculate RD functions
56 Double TComRdCost::calcRdCost( UInt uiBits, Distortion uiDistortion, Bool bFlag, DFunc eDFunc )
57 {
58  Double dRdCost = 0.0;
59  Double dLambda = 0.0;
60 
61  switch ( eDFunc )
62  {
63  case DF_SSE:
64  assert(0);
65  break;
66  case DF_SAD:
67 #if RExt__HIGH_BIT_DEPTH_SUPPORT
68  dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
69 #else
70  dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
71 #endif
72  break;
73  case DF_DEFAULT:
74  dLambda = m_dLambda;
75  break;
76  case DF_SSE_FRAME:
77  dLambda = m_dFrameLambda;
78  break;
79  default:
80  assert (0);
81  break;
82  }
83 
84  if (bFlag) //NOTE: this "bFlag" is never true
85  {
86  // Intra8x8, Intra4x4 Block only...
88  {
89  dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
90  }
91  else
92  {
93  dRdCost = (((Double)uiDistortion) + ((Double)uiBits * dLambda));
94  }
95  }
96  else
97  {
98  if (eDFunc == DF_SAD)
99  {
101  {
102  dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
103  }
104  else
105  {
106  dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
107  }
108  }
109  else
110  {
112  {
113  dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
114  }
115  else
116  {
117  dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
118  }
119  }
120  }
121 
122  return dRdCost;
123 }
124 
125 Double TComRdCost::calcRdCost64( UInt64 uiBits, UInt64 uiDistortion, Bool bFlag, DFunc eDFunc )
126 {
127  Double dRdCost = 0.0;
128  Double dLambda = 0.0;
129 
130  switch ( eDFunc )
131  {
132  case DF_SSE:
133  assert(0);
134  break;
135  case DF_SAD:
136 #if RExt__HIGH_BIT_DEPTH_SUPPORT
137  dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
138 #else
139  dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
140 #endif
141  break;
142  case DF_DEFAULT:
143  dLambda = m_dLambda;
144  break;
145  case DF_SSE_FRAME:
146  dLambda = m_dFrameLambda;
147  break;
148  default:
149  assert (0);
150  break;
151  }
152 
153  if (bFlag) //NOTE: this "bFlag" is never true
154  {
155  // Intra8x8, Intra4x4 Block only...
157  {
158  dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
159  }
160  else
161  {
162  dRdCost = (((Double)(Int64)uiDistortion) + ((Double)(Int64)uiBits * dLambda));
163  }
164  }
165  else
166  {
167  if (eDFunc == DF_SAD)
168  {
170  {
171  dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
172  }
173  else
174  {
175  dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
176  }
177  }
178  else
179  {
181  {
182  dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
183  }
184  else
185  {
186  dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
187  }
188  }
189  }
190 
191  return dRdCost;
192 }
193 
195 {
196  m_dLambda = dLambda;
197  m_sqrtLambda = sqrt(m_dLambda);
198 #if RExt__HIGH_BIT_DEPTH_SUPPORT
199  m_dLambdaMotionSAD[0] = 65536.0 * m_sqrtLambda;
200  m_dLambdaMotionSSE[0] = 65536.0 * m_dLambda;
201 #if FULL_NBIT
202  dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
203 #else
204  dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
205 #endif
206  m_dLambdaMotionSAD[1] = 65536.0 * sqrt(dLambda);
207  m_dLambdaMotionSSE[1] = 65536.0 * dLambda;
208 #else
209  m_uiLambdaMotionSAD[0] = (UInt)floor(65536.0 * m_sqrtLambda);
210  m_uiLambdaMotionSSE[0] = (UInt)floor(65536.0 * m_dLambda );
211 #if FULL_NBIT
212  dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
213 #else
214  dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
215 #endif
216  m_uiLambdaMotionSAD[1] = (UInt)floor(65536.0 * sqrt(dLambda));
217  m_uiLambdaMotionSSE[1] = (UInt)floor(65536.0 * dLambda );
218 #endif
219 }
220 
221 
222 // Initalize Function Pointer by [eDFunc]
224 {
225  m_afpDistortFunc[DF_DEFAULT] = NULL; // for DF_DEFAULT
226 
234 
242 
250 
251 #if AMP_SAD
255 
259 #endif
267 
269 
270 #if RExt__HIGH_BIT_DEPTH_SUPPORT
271  m_dCost = 0;
272 #else
273  m_uiCost = 0;
274 #endif
275  m_iCostScale = 0;
276 }
277 
279 {
280  UInt uiLength = 1;
281  UInt uiTemp = ( iVal <= 0) ? (-iVal<<1)+1: (iVal<<1);
282 
283  assert ( uiTemp );
284 
285  while ( 1 != uiTemp )
286  {
287  uiTemp >>= 1;
288  uiLength += 2;
289  }
290 
291  return uiLength;
292 }
293 
294 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam )
295 {
296  // set Block Width / Height
297  rcDistParam.iCols = uiBlkWidth;
298  rcDistParam.iRows = uiBlkHeight;
299  rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
300 
301  // initialize
302  rcDistParam.iSubShift = 0;
303 }
304 
305 // Setting the Distortion Parameter for Inter (ME)
306 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, DistParam& rcDistParam )
307 {
308  // set Original & Curr Pointer / Stride
309  rcDistParam.pOrg = pcPatternKey->getROIY();
310  rcDistParam.pCur = piRefY;
311 
312  rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
313  rcDistParam.iStrideCur = iRefStride;
314 
315  // set Block Width / Height
316  rcDistParam.iCols = pcPatternKey->getROIYWidth();
317  rcDistParam.iRows = pcPatternKey->getROIYHeight();
318  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
319 
320 #if AMP_SAD
321  if (rcDistParam.iCols == 12)
322  {
323  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD12];
324  }
325  else if (rcDistParam.iCols == 24)
326  {
327  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD24];
328  }
329  else if (rcDistParam.iCols == 48)
330  {
331  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD48];
332  }
333 #endif
334 
335  // initialize
336  rcDistParam.iSubShift = 0;
337 }
338 
339 // Setting the Distortion Parameter for Inter (subpel ME with step)
340 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME )
341 {
342  // set Original & Curr Pointer / Stride
343  rcDistParam.pOrg = pcPatternKey->getROIY();
344  rcDistParam.pCur = piRefY;
345 
346  rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
347  rcDistParam.iStrideCur = iRefStride * iStep;
348 
349  // set Step for interpolated buffer
350  rcDistParam.iStep = iStep;
351 
352  // set Block Width / Height
353  rcDistParam.iCols = pcPatternKey->getROIYWidth();
354  rcDistParam.iRows = pcPatternKey->getROIYHeight();
355 
356  // set distortion function
357  if ( !bHADME )
358  {
359  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
360 #if AMP_SAD
361  if (rcDistParam.iCols == 12)
362  {
363  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS12];
364  }
365  else if (rcDistParam.iCols == 24)
366  {
367  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS24];
368  }
369  else if (rcDistParam.iCols == 48)
370  {
371  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS48];
372  }
373 #endif
374  }
375  else
376  {
377  rcDistParam.DistFunc = m_afpDistortFunc[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
378  }
379 
380  // initialize
381  rcDistParam.iSubShift = 0;
382 }
383 
384 Void TComRdCost::setDistParam( DistParam& rcDP, Int bitDepth, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard )
385 {
386  rcDP.pOrg = p1;
387  rcDP.pCur = p2;
388  rcDP.iStrideOrg = iStride1;
389  rcDP.iStrideCur = iStride2;
390  rcDP.iCols = iWidth;
391  rcDP.iRows = iHeight;
392  rcDP.iStep = 1;
393  rcDP.iSubShift = 0;
394  rcDP.bitDepth = bitDepth;
395  rcDP.DistFunc = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ];
396 }
397 
398 Distortion TComRdCost::calcHAD( Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
399 {
400  Distortion uiSum = 0;
401  Int x, y;
402 
403  if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
404  {
405  for ( y=0; y<iHeight; y+= 8 )
406  {
407  for ( x=0; x<iWidth; x+= 8 )
408  {
409  uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
410  }
411  pi0 += iStride0*8;
412  pi1 += iStride1*8;
413  }
414  }
415  else
416  {
417  assert ( ( (iWidth % 4) == 0 ) && ( (iHeight % 4) == 0 ) );
418 
419  for ( y=0; y<iHeight; y+= 4 )
420  {
421  for ( x=0; x<iWidth; x+= 4 )
422  {
423  uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
424  }
425  pi0 += iStride0*4;
426  pi1 += iStride1*4;
427  }
428  }
429 
430  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8) );
431 }
432 
433 Distortion TComRdCost::getDistPart( Int bitDepth, Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc )
434 {
435  DistParam cDtParam;
436  setDistParam( uiBlkWidth, uiBlkHeight, eDFunc, cDtParam );
437  cDtParam.pOrg = piOrg;
438  cDtParam.pCur = piCur;
439  cDtParam.iStrideOrg = iOrgStride;
440  cDtParam.iStrideCur = iCurStride;
441  cDtParam.iStep = 1;
442 
443  cDtParam.bApplyWeight = false;
444  cDtParam.compIdx = MAX_NUM_COMPONENT; // just for assert: to be sure it was set before use
445  cDtParam.bitDepth = bitDepth;
446 
447  if (isChroma(compID))
448  {
449  return ((Distortion) (m_distortionWeight[compID] * cDtParam.DistFunc( &cDtParam )));
450  }
451  else
452  {
453  return cDtParam.DistFunc( &cDtParam );
454  }
455 }
456 
457 // ====================================================================================================================
458 // Distortion functions
459 // ====================================================================================================================
460 
461 // --------------------------------------------------------------------------------------------------------------------
462 // SAD
463 // --------------------------------------------------------------------------------------------------------------------
464 
466 {
467  if ( pcDtParam->bApplyWeight )
468  {
469  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
470  }
471  const Pel* piOrg = pcDtParam->pOrg;
472  const Pel* piCur = pcDtParam->pCur;
473  Int iRows = pcDtParam->iRows;
474  Int iCols = pcDtParam->iCols;
475  Int iStrideCur = pcDtParam->iStrideCur;
476  Int iStrideOrg = pcDtParam->iStrideOrg;
477 
478  Distortion uiSum = 0;
479 
480  for( ; iRows != 0; iRows-- )
481  {
482  for (Int n = 0; n < iCols; n++ )
483  {
484  uiSum += abs( piOrg[n] - piCur[n] );
485  }
486  piOrg += iStrideOrg;
487  piCur += iStrideCur;
488  }
489 
490  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
491 }
492 
494 {
495  if ( pcDtParam->bApplyWeight )
496  {
497  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
498  }
499  const Pel* piOrg = pcDtParam->pOrg;
500  const Pel* piCur = pcDtParam->pCur;
501  Int iRows = pcDtParam->iRows;
502  Int iSubShift = pcDtParam->iSubShift;
503  Int iSubStep = ( 1 << iSubShift );
504  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
505  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
506 
507  Distortion uiSum = 0;
508 
509  for( ; iRows != 0; iRows-=iSubStep )
510  {
511  uiSum += abs( piOrg[0] - piCur[0] );
512  uiSum += abs( piOrg[1] - piCur[1] );
513  uiSum += abs( piOrg[2] - piCur[2] );
514  uiSum += abs( piOrg[3] - piCur[3] );
515 
516  piOrg += iStrideOrg;
517  piCur += iStrideCur;
518  }
519 
520  uiSum <<= iSubShift;
521  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
522 }
523 
525 {
526  if ( pcDtParam->bApplyWeight )
527  {
528  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
529  }
530  const Pel* piOrg = pcDtParam->pOrg;
531  const Pel* piCur = pcDtParam->pCur;
532  Int iRows = pcDtParam->iRows;
533  Int iSubShift = pcDtParam->iSubShift;
534  Int iSubStep = ( 1 << iSubShift );
535  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
536  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
537 
538  Distortion uiSum = 0;
539 
540  for( ; iRows != 0; iRows-=iSubStep )
541  {
542  uiSum += abs( piOrg[0] - piCur[0] );
543  uiSum += abs( piOrg[1] - piCur[1] );
544  uiSum += abs( piOrg[2] - piCur[2] );
545  uiSum += abs( piOrg[3] - piCur[3] );
546  uiSum += abs( piOrg[4] - piCur[4] );
547  uiSum += abs( piOrg[5] - piCur[5] );
548  uiSum += abs( piOrg[6] - piCur[6] );
549  uiSum += abs( piOrg[7] - piCur[7] );
550 
551  piOrg += iStrideOrg;
552  piCur += iStrideCur;
553  }
554 
555  uiSum <<= iSubShift;
556  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
557 }
558 
560 {
561  if ( pcDtParam->bApplyWeight )
562  {
563  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
564  }
565  const Pel* piOrg = pcDtParam->pOrg;
566  const Pel* piCur = pcDtParam->pCur;
567  Int iRows = pcDtParam->iRows;
568  Int iSubShift = pcDtParam->iSubShift;
569  Int iSubStep = ( 1 << iSubShift );
570  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
571  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
572 
573  Distortion uiSum = 0;
574 
575  for( ; iRows != 0; iRows-=iSubStep )
576  {
577  uiSum += abs( piOrg[0] - piCur[0] );
578  uiSum += abs( piOrg[1] - piCur[1] );
579  uiSum += abs( piOrg[2] - piCur[2] );
580  uiSum += abs( piOrg[3] - piCur[3] );
581  uiSum += abs( piOrg[4] - piCur[4] );
582  uiSum += abs( piOrg[5] - piCur[5] );
583  uiSum += abs( piOrg[6] - piCur[6] );
584  uiSum += abs( piOrg[7] - piCur[7] );
585  uiSum += abs( piOrg[8] - piCur[8] );
586  uiSum += abs( piOrg[9] - piCur[9] );
587  uiSum += abs( piOrg[10] - piCur[10] );
588  uiSum += abs( piOrg[11] - piCur[11] );
589  uiSum += abs( piOrg[12] - piCur[12] );
590  uiSum += abs( piOrg[13] - piCur[13] );
591  uiSum += abs( piOrg[14] - piCur[14] );
592  uiSum += abs( piOrg[15] - piCur[15] );
593 
594  piOrg += iStrideOrg;
595  piCur += iStrideCur;
596  }
597 
598  uiSum <<= iSubShift;
599  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
600 }
601 
602 #if AMP_SAD
604 {
605  if ( pcDtParam->bApplyWeight )
606  {
607  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
608  }
609  const Pel* piOrg = pcDtParam->pOrg;
610  const Pel* piCur = pcDtParam->pCur;
611  Int iRows = pcDtParam->iRows;
612  Int iSubShift = pcDtParam->iSubShift;
613  Int iSubStep = ( 1 << iSubShift );
614  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
615  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
616 
617  Distortion uiSum = 0;
618 
619  for( ; iRows != 0; iRows-=iSubStep )
620  {
621  uiSum += abs( piOrg[0] - piCur[0] );
622  uiSum += abs( piOrg[1] - piCur[1] );
623  uiSum += abs( piOrg[2] - piCur[2] );
624  uiSum += abs( piOrg[3] - piCur[3] );
625  uiSum += abs( piOrg[4] - piCur[4] );
626  uiSum += abs( piOrg[5] - piCur[5] );
627  uiSum += abs( piOrg[6] - piCur[6] );
628  uiSum += abs( piOrg[7] - piCur[7] );
629  uiSum += abs( piOrg[8] - piCur[8] );
630  uiSum += abs( piOrg[9] - piCur[9] );
631  uiSum += abs( piOrg[10] - piCur[10] );
632  uiSum += abs( piOrg[11] - piCur[11] );
633 
634  piOrg += iStrideOrg;
635  piCur += iStrideCur;
636  }
637 
638  uiSum <<= iSubShift;
639  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
640 }
641 #endif
642 
644 {
645  const Pel* piOrg = pcDtParam->pOrg;
646  const Pel* piCur = pcDtParam->pCur;
647  Int iRows = pcDtParam->iRows;
648  Int iCols = pcDtParam->iCols;
649  Int iSubShift = pcDtParam->iSubShift;
650  Int iSubStep = ( 1 << iSubShift );
651  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
652  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
653 
654  Distortion uiSum = 0;
655 
656  for( ; iRows != 0; iRows-=iSubStep )
657  {
658  for (Int n = 0; n < iCols; n+=16 )
659  {
660  uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] );
661  uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] );
662  uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] );
663  uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] );
664  uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] );
665  uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] );
666  uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] );
667  uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] );
668  uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] );
669  uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] );
670  uiSum += abs( piOrg[n+10] - piCur[n+10] );
671  uiSum += abs( piOrg[n+11] - piCur[n+11] );
672  uiSum += abs( piOrg[n+12] - piCur[n+12] );
673  uiSum += abs( piOrg[n+13] - piCur[n+13] );
674  uiSum += abs( piOrg[n+14] - piCur[n+14] );
675  uiSum += abs( piOrg[n+15] - piCur[n+15] );
676  }
677  piOrg += iStrideOrg;
678  piCur += iStrideCur;
679  }
680 
681  uiSum <<= iSubShift;
682  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
683 }
684 
686 {
687  if ( pcDtParam->bApplyWeight )
688  {
689  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
690  }
691  const Pel* piOrg = pcDtParam->pOrg;
692  const Pel* piCur = pcDtParam->pCur;
693  Int iRows = pcDtParam->iRows;
694  Int iSubShift = pcDtParam->iSubShift;
695  Int iSubStep = ( 1 << iSubShift );
696  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
697  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
698 
699  Distortion uiSum = 0;
700 
701  for( ; iRows != 0; iRows-=iSubStep )
702  {
703  uiSum += abs( piOrg[0] - piCur[0] );
704  uiSum += abs( piOrg[1] - piCur[1] );
705  uiSum += abs( piOrg[2] - piCur[2] );
706  uiSum += abs( piOrg[3] - piCur[3] );
707  uiSum += abs( piOrg[4] - piCur[4] );
708  uiSum += abs( piOrg[5] - piCur[5] );
709  uiSum += abs( piOrg[6] - piCur[6] );
710  uiSum += abs( piOrg[7] - piCur[7] );
711  uiSum += abs( piOrg[8] - piCur[8] );
712  uiSum += abs( piOrg[9] - piCur[9] );
713  uiSum += abs( piOrg[10] - piCur[10] );
714  uiSum += abs( piOrg[11] - piCur[11] );
715  uiSum += abs( piOrg[12] - piCur[12] );
716  uiSum += abs( piOrg[13] - piCur[13] );
717  uiSum += abs( piOrg[14] - piCur[14] );
718  uiSum += abs( piOrg[15] - piCur[15] );
719  uiSum += abs( piOrg[16] - piCur[16] );
720  uiSum += abs( piOrg[17] - piCur[17] );
721  uiSum += abs( piOrg[18] - piCur[18] );
722  uiSum += abs( piOrg[19] - piCur[19] );
723  uiSum += abs( piOrg[20] - piCur[20] );
724  uiSum += abs( piOrg[21] - piCur[21] );
725  uiSum += abs( piOrg[22] - piCur[22] );
726  uiSum += abs( piOrg[23] - piCur[23] );
727  uiSum += abs( piOrg[24] - piCur[24] );
728  uiSum += abs( piOrg[25] - piCur[25] );
729  uiSum += abs( piOrg[26] - piCur[26] );
730  uiSum += abs( piOrg[27] - piCur[27] );
731  uiSum += abs( piOrg[28] - piCur[28] );
732  uiSum += abs( piOrg[29] - piCur[29] );
733  uiSum += abs( piOrg[30] - piCur[30] );
734  uiSum += abs( piOrg[31] - piCur[31] );
735 
736  piOrg += iStrideOrg;
737  piCur += iStrideCur;
738  }
739 
740  uiSum <<= iSubShift;
741  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
742 }
743 
744 #if AMP_SAD
746 {
747  if ( pcDtParam->bApplyWeight )
748  {
749  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
750  }
751  const Pel* piOrg = pcDtParam->pOrg;
752  const Pel* piCur = pcDtParam->pCur;
753  Int iRows = pcDtParam->iRows;
754  Int iSubShift = pcDtParam->iSubShift;
755  Int iSubStep = ( 1 << iSubShift );
756  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
757  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
758 
759  Distortion uiSum = 0;
760 
761  for( ; iRows != 0; iRows-=iSubStep )
762  {
763  uiSum += abs( piOrg[0] - piCur[0] );
764  uiSum += abs( piOrg[1] - piCur[1] );
765  uiSum += abs( piOrg[2] - piCur[2] );
766  uiSum += abs( piOrg[3] - piCur[3] );
767  uiSum += abs( piOrg[4] - piCur[4] );
768  uiSum += abs( piOrg[5] - piCur[5] );
769  uiSum += abs( piOrg[6] - piCur[6] );
770  uiSum += abs( piOrg[7] - piCur[7] );
771  uiSum += abs( piOrg[8] - piCur[8] );
772  uiSum += abs( piOrg[9] - piCur[9] );
773  uiSum += abs( piOrg[10] - piCur[10] );
774  uiSum += abs( piOrg[11] - piCur[11] );
775  uiSum += abs( piOrg[12] - piCur[12] );
776  uiSum += abs( piOrg[13] - piCur[13] );
777  uiSum += abs( piOrg[14] - piCur[14] );
778  uiSum += abs( piOrg[15] - piCur[15] );
779  uiSum += abs( piOrg[16] - piCur[16] );
780  uiSum += abs( piOrg[17] - piCur[17] );
781  uiSum += abs( piOrg[18] - piCur[18] );
782  uiSum += abs( piOrg[19] - piCur[19] );
783  uiSum += abs( piOrg[20] - piCur[20] );
784  uiSum += abs( piOrg[21] - piCur[21] );
785  uiSum += abs( piOrg[22] - piCur[22] );
786  uiSum += abs( piOrg[23] - piCur[23] );
787 
788  piOrg += iStrideOrg;
789  piCur += iStrideCur;
790  }
791 
792  uiSum <<= iSubShift;
793  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
794 }
795 
796 #endif
797 
799 {
800  if ( pcDtParam->bApplyWeight )
801  {
802  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
803  }
804  const Pel* piOrg = pcDtParam->pOrg;
805  const Pel* piCur = pcDtParam->pCur;
806  Int iRows = pcDtParam->iRows;
807  Int iSubShift = pcDtParam->iSubShift;
808  Int iSubStep = ( 1 << iSubShift );
809  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
810  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
811 
812  Distortion uiSum = 0;
813 
814  for( ; iRows != 0; iRows-=iSubStep )
815  {
816  uiSum += abs( piOrg[0] - piCur[0] );
817  uiSum += abs( piOrg[1] - piCur[1] );
818  uiSum += abs( piOrg[2] - piCur[2] );
819  uiSum += abs( piOrg[3] - piCur[3] );
820  uiSum += abs( piOrg[4] - piCur[4] );
821  uiSum += abs( piOrg[5] - piCur[5] );
822  uiSum += abs( piOrg[6] - piCur[6] );
823  uiSum += abs( piOrg[7] - piCur[7] );
824  uiSum += abs( piOrg[8] - piCur[8] );
825  uiSum += abs( piOrg[9] - piCur[9] );
826  uiSum += abs( piOrg[10] - piCur[10] );
827  uiSum += abs( piOrg[11] - piCur[11] );
828  uiSum += abs( piOrg[12] - piCur[12] );
829  uiSum += abs( piOrg[13] - piCur[13] );
830  uiSum += abs( piOrg[14] - piCur[14] );
831  uiSum += abs( piOrg[15] - piCur[15] );
832  uiSum += abs( piOrg[16] - piCur[16] );
833  uiSum += abs( piOrg[17] - piCur[17] );
834  uiSum += abs( piOrg[18] - piCur[18] );
835  uiSum += abs( piOrg[19] - piCur[19] );
836  uiSum += abs( piOrg[20] - piCur[20] );
837  uiSum += abs( piOrg[21] - piCur[21] );
838  uiSum += abs( piOrg[22] - piCur[22] );
839  uiSum += abs( piOrg[23] - piCur[23] );
840  uiSum += abs( piOrg[24] - piCur[24] );
841  uiSum += abs( piOrg[25] - piCur[25] );
842  uiSum += abs( piOrg[26] - piCur[26] );
843  uiSum += abs( piOrg[27] - piCur[27] );
844  uiSum += abs( piOrg[28] - piCur[28] );
845  uiSum += abs( piOrg[29] - piCur[29] );
846  uiSum += abs( piOrg[30] - piCur[30] );
847  uiSum += abs( piOrg[31] - piCur[31] );
848  uiSum += abs( piOrg[32] - piCur[32] );
849  uiSum += abs( piOrg[33] - piCur[33] );
850  uiSum += abs( piOrg[34] - piCur[34] );
851  uiSum += abs( piOrg[35] - piCur[35] );
852  uiSum += abs( piOrg[36] - piCur[36] );
853  uiSum += abs( piOrg[37] - piCur[37] );
854  uiSum += abs( piOrg[38] - piCur[38] );
855  uiSum += abs( piOrg[39] - piCur[39] );
856  uiSum += abs( piOrg[40] - piCur[40] );
857  uiSum += abs( piOrg[41] - piCur[41] );
858  uiSum += abs( piOrg[42] - piCur[42] );
859  uiSum += abs( piOrg[43] - piCur[43] );
860  uiSum += abs( piOrg[44] - piCur[44] );
861  uiSum += abs( piOrg[45] - piCur[45] );
862  uiSum += abs( piOrg[46] - piCur[46] );
863  uiSum += abs( piOrg[47] - piCur[47] );
864  uiSum += abs( piOrg[48] - piCur[48] );
865  uiSum += abs( piOrg[49] - piCur[49] );
866  uiSum += abs( piOrg[50] - piCur[50] );
867  uiSum += abs( piOrg[51] - piCur[51] );
868  uiSum += abs( piOrg[52] - piCur[52] );
869  uiSum += abs( piOrg[53] - piCur[53] );
870  uiSum += abs( piOrg[54] - piCur[54] );
871  uiSum += abs( piOrg[55] - piCur[55] );
872  uiSum += abs( piOrg[56] - piCur[56] );
873  uiSum += abs( piOrg[57] - piCur[57] );
874  uiSum += abs( piOrg[58] - piCur[58] );
875  uiSum += abs( piOrg[59] - piCur[59] );
876  uiSum += abs( piOrg[60] - piCur[60] );
877  uiSum += abs( piOrg[61] - piCur[61] );
878  uiSum += abs( piOrg[62] - piCur[62] );
879  uiSum += abs( piOrg[63] - piCur[63] );
880 
881  piOrg += iStrideOrg;
882  piCur += iStrideCur;
883  }
884 
885  uiSum <<= iSubShift;
886  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
887 }
888 
889 #if AMP_SAD
891 {
892  if ( pcDtParam->bApplyWeight )
893  {
894  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
895  }
896  const Pel* piOrg = pcDtParam->pOrg;
897  const Pel* piCur = pcDtParam->pCur;
898  Int iRows = pcDtParam->iRows;
899  Int iSubShift = pcDtParam->iSubShift;
900  Int iSubStep = ( 1 << iSubShift );
901  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
902  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
903 
904  Distortion uiSum = 0;
905 
906  for( ; iRows != 0; iRows-=iSubStep )
907  {
908  uiSum += abs( piOrg[0] - piCur[0] );
909  uiSum += abs( piOrg[1] - piCur[1] );
910  uiSum += abs( piOrg[2] - piCur[2] );
911  uiSum += abs( piOrg[3] - piCur[3] );
912  uiSum += abs( piOrg[4] - piCur[4] );
913  uiSum += abs( piOrg[5] - piCur[5] );
914  uiSum += abs( piOrg[6] - piCur[6] );
915  uiSum += abs( piOrg[7] - piCur[7] );
916  uiSum += abs( piOrg[8] - piCur[8] );
917  uiSum += abs( piOrg[9] - piCur[9] );
918  uiSum += abs( piOrg[10] - piCur[10] );
919  uiSum += abs( piOrg[11] - piCur[11] );
920  uiSum += abs( piOrg[12] - piCur[12] );
921  uiSum += abs( piOrg[13] - piCur[13] );
922  uiSum += abs( piOrg[14] - piCur[14] );
923  uiSum += abs( piOrg[15] - piCur[15] );
924  uiSum += abs( piOrg[16] - piCur[16] );
925  uiSum += abs( piOrg[17] - piCur[17] );
926  uiSum += abs( piOrg[18] - piCur[18] );
927  uiSum += abs( piOrg[19] - piCur[19] );
928  uiSum += abs( piOrg[20] - piCur[20] );
929  uiSum += abs( piOrg[21] - piCur[21] );
930  uiSum += abs( piOrg[22] - piCur[22] );
931  uiSum += abs( piOrg[23] - piCur[23] );
932  uiSum += abs( piOrg[24] - piCur[24] );
933  uiSum += abs( piOrg[25] - piCur[25] );
934  uiSum += abs( piOrg[26] - piCur[26] );
935  uiSum += abs( piOrg[27] - piCur[27] );
936  uiSum += abs( piOrg[28] - piCur[28] );
937  uiSum += abs( piOrg[29] - piCur[29] );
938  uiSum += abs( piOrg[30] - piCur[30] );
939  uiSum += abs( piOrg[31] - piCur[31] );
940  uiSum += abs( piOrg[32] - piCur[32] );
941  uiSum += abs( piOrg[33] - piCur[33] );
942  uiSum += abs( piOrg[34] - piCur[34] );
943  uiSum += abs( piOrg[35] - piCur[35] );
944  uiSum += abs( piOrg[36] - piCur[36] );
945  uiSum += abs( piOrg[37] - piCur[37] );
946  uiSum += abs( piOrg[38] - piCur[38] );
947  uiSum += abs( piOrg[39] - piCur[39] );
948  uiSum += abs( piOrg[40] - piCur[40] );
949  uiSum += abs( piOrg[41] - piCur[41] );
950  uiSum += abs( piOrg[42] - piCur[42] );
951  uiSum += abs( piOrg[43] - piCur[43] );
952  uiSum += abs( piOrg[44] - piCur[44] );
953  uiSum += abs( piOrg[45] - piCur[45] );
954  uiSum += abs( piOrg[46] - piCur[46] );
955  uiSum += abs( piOrg[47] - piCur[47] );
956 
957  piOrg += iStrideOrg;
958  piCur += iStrideCur;
959  }
960 
961  uiSum <<= iSubShift;
962  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
963 }
964 #endif
965 
966 // --------------------------------------------------------------------------------------------------------------------
967 // SSE
968 // --------------------------------------------------------------------------------------------------------------------
969 
971 {
972  if ( pcDtParam->bApplyWeight )
973  {
974  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
975  }
976  const Pel* piOrg = pcDtParam->pOrg;
977  const Pel* piCur = pcDtParam->pCur;
978  Int iRows = pcDtParam->iRows;
979  Int iCols = pcDtParam->iCols;
980  Int iStrideOrg = pcDtParam->iStrideOrg;
981  Int iStrideCur = pcDtParam->iStrideCur;
982 
983  Distortion uiSum = 0;
984  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
985 
986  Intermediate_Int iTemp;
987 
988  for( ; iRows != 0; iRows-- )
989  {
990  for (Int n = 0; n < iCols; n++ )
991  {
992  iTemp = piOrg[n ] - piCur[n ];
993  uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
994  }
995  piOrg += iStrideOrg;
996  piCur += iStrideCur;
997  }
998 
999  return ( uiSum );
1000 }
1001 
1003 {
1004  if ( pcDtParam->bApplyWeight )
1005  {
1006  assert( pcDtParam->iCols == 4 );
1007  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1008  }
1009  const Pel* piOrg = pcDtParam->pOrg;
1010  const Pel* piCur = pcDtParam->pCur;
1011  Int iRows = pcDtParam->iRows;
1012  Int iStrideOrg = pcDtParam->iStrideOrg;
1013  Int iStrideCur = pcDtParam->iStrideCur;
1014 
1015  Distortion uiSum = 0;
1016  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1017 
1018  Intermediate_Int iTemp;
1019 
1020  for( ; iRows != 0; iRows-- )
1021  {
1022 
1023  iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1024  iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1025  iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1026  iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1027 
1028  piOrg += iStrideOrg;
1029  piCur += iStrideCur;
1030  }
1031 
1032  return ( uiSum );
1033 }
1034 
1036 {
1037  if ( pcDtParam->bApplyWeight )
1038  {
1039  assert( pcDtParam->iCols == 8 );
1040  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1041  }
1042  const Pel* piOrg = pcDtParam->pOrg;
1043  const Pel* piCur = pcDtParam->pCur;
1044  Int iRows = pcDtParam->iRows;
1045  Int iStrideOrg = pcDtParam->iStrideOrg;
1046  Int iStrideCur = pcDtParam->iStrideCur;
1047 
1048  Distortion uiSum = 0;
1049  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1050 
1051  Intermediate_Int iTemp;
1052 
1053  for( ; iRows != 0; iRows-- )
1054  {
1055  iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1056  iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1057  iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1058  iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1059  iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1060  iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1061  iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1062  iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1063 
1064  piOrg += iStrideOrg;
1065  piCur += iStrideCur;
1066  }
1067 
1068  return ( uiSum );
1069 }
1070 
1072 {
1073  if ( pcDtParam->bApplyWeight )
1074  {
1075  assert( pcDtParam->iCols == 16 );
1076  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1077  }
1078  const Pel* piOrg = pcDtParam->pOrg;
1079  const Pel* piCur = pcDtParam->pCur;
1080  Int iRows = pcDtParam->iRows;
1081  Int iStrideOrg = pcDtParam->iStrideOrg;
1082  Int iStrideCur = pcDtParam->iStrideCur;
1083 
1084  Distortion uiSum = 0;
1085  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1086 
1087  Intermediate_Int iTemp;
1088 
1089  for( ; iRows != 0; iRows-- )
1090  {
1091 
1092  iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1093  iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1094  iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1095  iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1096  iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1097  iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1098  iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1099  iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1100  iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1101  iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1102  iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1103  iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1104  iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1105  iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1106  iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1107  iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1108 
1109  piOrg += iStrideOrg;
1110  piCur += iStrideCur;
1111  }
1112 
1113  return ( uiSum );
1114 }
1115 
1117 {
1118  if ( pcDtParam->bApplyWeight )
1119  {
1120  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1121  }
1122  const Pel* piOrg = pcDtParam->pOrg;
1123  const Pel* piCur = pcDtParam->pCur;
1124  Int iRows = pcDtParam->iRows;
1125  Int iCols = pcDtParam->iCols;
1126  Int iStrideOrg = pcDtParam->iStrideOrg;
1127  Int iStrideCur = pcDtParam->iStrideCur;
1128 
1129  Distortion uiSum = 0;
1130  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1131 
1132  Intermediate_Int iTemp;
1133 
1134  for( ; iRows != 0; iRows-- )
1135  {
1136  for (Int n = 0; n < iCols; n+=16 )
1137  {
1138 
1139  iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1140  iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1141  iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1142  iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1143  iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1144  iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1145  iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1146  iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1147  iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1148  iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1149  iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1150  iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1151  iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1152  iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1153  iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1154  iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1155 
1156  }
1157  piOrg += iStrideOrg;
1158  piCur += iStrideCur;
1159  }
1160 
1161  return ( uiSum );
1162 }
1163 
1165 {
1166  if ( pcDtParam->bApplyWeight )
1167  {
1168  assert( pcDtParam->iCols == 32 );
1169  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1170  }
1171  const Pel* piOrg = pcDtParam->pOrg;
1172  const Pel* piCur = pcDtParam->pCur;
1173  Int iRows = pcDtParam->iRows;
1174  Int iStrideOrg = pcDtParam->iStrideOrg;
1175  Int iStrideCur = pcDtParam->iStrideCur;
1176 
1177  Distortion uiSum = 0;
1178  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1179 
1180  Intermediate_Int iTemp;
1181 
1182  for( ; iRows != 0; iRows-- )
1183  {
1184 
1185  iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1186  iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1187  iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1188  iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1189  iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1190  iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1191  iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1192  iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1193  iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1194  iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1195  iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1196  iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1197  iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1198  iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1199  iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1200  iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1201  iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1202  iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1203  iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1204  iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1205  iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1206  iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1207  iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1208  iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1209  iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1210  iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1211  iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1212  iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1213  iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1214  iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1215  iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1216  iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1217 
1218  piOrg += iStrideOrg;
1219  piCur += iStrideCur;
1220  }
1221 
1222  return ( uiSum );
1223 }
1224 
1226 {
1227  if ( pcDtParam->bApplyWeight )
1228  {
1229  assert( pcDtParam->iCols == 64 );
1230  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1231  }
1232  const Pel* piOrg = pcDtParam->pOrg;
1233  const Pel* piCur = pcDtParam->pCur;
1234  Int iRows = pcDtParam->iRows;
1235  Int iStrideOrg = pcDtParam->iStrideOrg;
1236  Int iStrideCur = pcDtParam->iStrideCur;
1237 
1238  Distortion uiSum = 0;
1239  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1240 
1241  Intermediate_Int iTemp;
1242 
1243  for( ; iRows != 0; iRows-- )
1244  {
1245  iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1246  iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1247  iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1248  iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1249  iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1250  iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1251  iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1252  iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1253  iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1254  iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1255  iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1256  iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1257  iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1258  iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1259  iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1260  iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1261  iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1262  iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1263  iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1264  iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1265  iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1266  iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1267  iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1268  iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1269  iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1270  iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1271  iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1272  iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1273  iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1274  iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1275  iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1276  iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1277  iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1278  iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1279  iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1280  iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1281  iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1282  iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1283  iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1284  iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1285  iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1286  iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1287  iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1288  iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1289  iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1290  iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1291  iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1292  iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1293  iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1294  iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1295  iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1296  iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1297  iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1298  iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1299  iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1300  iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1301  iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1302  iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1303  iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1304  iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1305  iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1306  iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1307  iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1308  iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1309 
1310  piOrg += iStrideOrg;
1311  piCur += iStrideCur;
1312  }
1313 
1314  return ( uiSum );
1315 }
1316 
1317 // --------------------------------------------------------------------------------------------------------------------
1318 // HADAMARD with step (used in fractional search)
1319 // --------------------------------------------------------------------------------------------------------------------
1320 
1321 Distortion TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1322 {
1323  Distortion satd = 0;
1324  TCoeff diff[4], m[4];
1325  assert( iStep == 1 );
1326  diff[0] = piOrg[0 ] - piCur[0];
1327  diff[1] = piOrg[1 ] - piCur[1];
1328  diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur];
1329  diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur];
1330  m[0] = diff[0] + diff[2];
1331  m[1] = diff[1] + diff[3];
1332  m[2] = diff[0] - diff[2];
1333  m[3] = diff[1] - diff[3];
1334 
1335  satd += abs(m[0] + m[1]);
1336  satd += abs(m[0] - m[1]);
1337  satd += abs(m[2] + m[3]);
1338  satd += abs(m[2] - m[3]);
1339 
1340  return satd;
1341 }
1342 
1343 Distortion TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1344 {
1345  Int k;
1346  Distortion satd = 0;
1347  TCoeff diff[16], m[16], d[16];
1348 
1349  assert( iStep == 1 );
1350  for( k = 0; k < 16; k+=4 )
1351  {
1352  diff[k+0] = piOrg[0] - piCur[0];
1353  diff[k+1] = piOrg[1] - piCur[1];
1354  diff[k+2] = piOrg[2] - piCur[2];
1355  diff[k+3] = piOrg[3] - piCur[3];
1356 
1357  piCur += iStrideCur;
1358  piOrg += iStrideOrg;
1359  }
1360 
1361  /*===== hadamard transform =====*/
1362  m[ 0] = diff[ 0] + diff[12];
1363  m[ 1] = diff[ 1] + diff[13];
1364  m[ 2] = diff[ 2] + diff[14];
1365  m[ 3] = diff[ 3] + diff[15];
1366  m[ 4] = diff[ 4] + diff[ 8];
1367  m[ 5] = diff[ 5] + diff[ 9];
1368  m[ 6] = diff[ 6] + diff[10];
1369  m[ 7] = diff[ 7] + diff[11];
1370  m[ 8] = diff[ 4] - diff[ 8];
1371  m[ 9] = diff[ 5] - diff[ 9];
1372  m[10] = diff[ 6] - diff[10];
1373  m[11] = diff[ 7] - diff[11];
1374  m[12] = diff[ 0] - diff[12];
1375  m[13] = diff[ 1] - diff[13];
1376  m[14] = diff[ 2] - diff[14];
1377  m[15] = diff[ 3] - diff[15];
1378 
1379  d[ 0] = m[ 0] + m[ 4];
1380  d[ 1] = m[ 1] + m[ 5];
1381  d[ 2] = m[ 2] + m[ 6];
1382  d[ 3] = m[ 3] + m[ 7];
1383  d[ 4] = m[ 8] + m[12];
1384  d[ 5] = m[ 9] + m[13];
1385  d[ 6] = m[10] + m[14];
1386  d[ 7] = m[11] + m[15];
1387  d[ 8] = m[ 0] - m[ 4];
1388  d[ 9] = m[ 1] - m[ 5];
1389  d[10] = m[ 2] - m[ 6];
1390  d[11] = m[ 3] - m[ 7];
1391  d[12] = m[12] - m[ 8];
1392  d[13] = m[13] - m[ 9];
1393  d[14] = m[14] - m[10];
1394  d[15] = m[15] - m[11];
1395 
1396  m[ 0] = d[ 0] + d[ 3];
1397  m[ 1] = d[ 1] + d[ 2];
1398  m[ 2] = d[ 1] - d[ 2];
1399  m[ 3] = d[ 0] - d[ 3];
1400  m[ 4] = d[ 4] + d[ 7];
1401  m[ 5] = d[ 5] + d[ 6];
1402  m[ 6] = d[ 5] - d[ 6];
1403  m[ 7] = d[ 4] - d[ 7];
1404  m[ 8] = d[ 8] + d[11];
1405  m[ 9] = d[ 9] + d[10];
1406  m[10] = d[ 9] - d[10];
1407  m[11] = d[ 8] - d[11];
1408  m[12] = d[12] + d[15];
1409  m[13] = d[13] + d[14];
1410  m[14] = d[13] - d[14];
1411  m[15] = d[12] - d[15];
1412 
1413  d[ 0] = m[ 0] + m[ 1];
1414  d[ 1] = m[ 0] - m[ 1];
1415  d[ 2] = m[ 2] + m[ 3];
1416  d[ 3] = m[ 3] - m[ 2];
1417  d[ 4] = m[ 4] + m[ 5];
1418  d[ 5] = m[ 4] - m[ 5];
1419  d[ 6] = m[ 6] + m[ 7];
1420  d[ 7] = m[ 7] - m[ 6];
1421  d[ 8] = m[ 8] + m[ 9];
1422  d[ 9] = m[ 8] - m[ 9];
1423  d[10] = m[10] + m[11];
1424  d[11] = m[11] - m[10];
1425  d[12] = m[12] + m[13];
1426  d[13] = m[12] - m[13];
1427  d[14] = m[14] + m[15];
1428  d[15] = m[15] - m[14];
1429 
1430  for (k=0; k<16; ++k)
1431  {
1432  satd += abs(d[k]);
1433  }
1434  satd = ((satd+1)>>1);
1435 
1436  return satd;
1437 }
1438 
1439 Distortion TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1440 {
1441  Int k, i, j, jj;
1442  Distortion sad = 0;
1443  TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8];
1444  assert( iStep == 1 );
1445  for( k = 0; k < 64; k += 8 )
1446  {
1447  diff[k+0] = piOrg[0] - piCur[0];
1448  diff[k+1] = piOrg[1] - piCur[1];
1449  diff[k+2] = piOrg[2] - piCur[2];
1450  diff[k+3] = piOrg[3] - piCur[3];
1451  diff[k+4] = piOrg[4] - piCur[4];
1452  diff[k+5] = piOrg[5] - piCur[5];
1453  diff[k+6] = piOrg[6] - piCur[6];
1454  diff[k+7] = piOrg[7] - piCur[7];
1455 
1456  piCur += iStrideCur;
1457  piOrg += iStrideOrg;
1458  }
1459 
1460  //horizontal
1461  for (j=0; j < 8; j++)
1462  {
1463  jj = j << 3;
1464  m2[j][0] = diff[jj ] + diff[jj+4];
1465  m2[j][1] = diff[jj+1] + diff[jj+5];
1466  m2[j][2] = diff[jj+2] + diff[jj+6];
1467  m2[j][3] = diff[jj+3] + diff[jj+7];
1468  m2[j][4] = diff[jj ] - diff[jj+4];
1469  m2[j][5] = diff[jj+1] - diff[jj+5];
1470  m2[j][6] = diff[jj+2] - diff[jj+6];
1471  m2[j][7] = diff[jj+3] - diff[jj+7];
1472 
1473  m1[j][0] = m2[j][0] + m2[j][2];
1474  m1[j][1] = m2[j][1] + m2[j][3];
1475  m1[j][2] = m2[j][0] - m2[j][2];
1476  m1[j][3] = m2[j][1] - m2[j][3];
1477  m1[j][4] = m2[j][4] + m2[j][6];
1478  m1[j][5] = m2[j][5] + m2[j][7];
1479  m1[j][6] = m2[j][4] - m2[j][6];
1480  m1[j][7] = m2[j][5] - m2[j][7];
1481 
1482  m2[j][0] = m1[j][0] + m1[j][1];
1483  m2[j][1] = m1[j][0] - m1[j][1];
1484  m2[j][2] = m1[j][2] + m1[j][3];
1485  m2[j][3] = m1[j][2] - m1[j][3];
1486  m2[j][4] = m1[j][4] + m1[j][5];
1487  m2[j][5] = m1[j][4] - m1[j][5];
1488  m2[j][6] = m1[j][6] + m1[j][7];
1489  m2[j][7] = m1[j][6] - m1[j][7];
1490  }
1491 
1492  //vertical
1493  for (i=0; i < 8; i++)
1494  {
1495  m3[0][i] = m2[0][i] + m2[4][i];
1496  m3[1][i] = m2[1][i] + m2[5][i];
1497  m3[2][i] = m2[2][i] + m2[6][i];
1498  m3[3][i] = m2[3][i] + m2[7][i];
1499  m3[4][i] = m2[0][i] - m2[4][i];
1500  m3[5][i] = m2[1][i] - m2[5][i];
1501  m3[6][i] = m2[2][i] - m2[6][i];
1502  m3[7][i] = m2[3][i] - m2[7][i];
1503 
1504  m1[0][i] = m3[0][i] + m3[2][i];
1505  m1[1][i] = m3[1][i] + m3[3][i];
1506  m1[2][i] = m3[0][i] - m3[2][i];
1507  m1[3][i] = m3[1][i] - m3[3][i];
1508  m1[4][i] = m3[4][i] + m3[6][i];
1509  m1[5][i] = m3[5][i] + m3[7][i];
1510  m1[6][i] = m3[4][i] - m3[6][i];
1511  m1[7][i] = m3[5][i] - m3[7][i];
1512 
1513  m2[0][i] = m1[0][i] + m1[1][i];
1514  m2[1][i] = m1[0][i] - m1[1][i];
1515  m2[2][i] = m1[2][i] + m1[3][i];
1516  m2[3][i] = m1[2][i] - m1[3][i];
1517  m2[4][i] = m1[4][i] + m1[5][i];
1518  m2[5][i] = m1[4][i] - m1[5][i];
1519  m2[6][i] = m1[6][i] + m1[7][i];
1520  m2[7][i] = m1[6][i] - m1[7][i];
1521  }
1522 
1523  for (i = 0; i < 8; i++)
1524  {
1525  for (j = 0; j < 8; j++)
1526  {
1527  sad += abs(m2[i][j]);
1528  }
1529  }
1530 
1531  sad=((sad+2)>>2);
1532 
1533  return sad;
1534 }
1535 
1536 
1538 {
1539  if ( pcDtParam->bApplyWeight )
1540  {
1541  return TComRdCostWeightPrediction::xGetHADsw( pcDtParam );
1542  }
1543  Pel* piOrg = pcDtParam->pOrg;
1544  Pel* piCur = pcDtParam->pCur;
1545  Int iRows = pcDtParam->iRows;
1546  Int iCols = pcDtParam->iCols;
1547  Int iStrideCur = pcDtParam->iStrideCur;
1548  Int iStrideOrg = pcDtParam->iStrideOrg;
1549  Int iStep = pcDtParam->iStep;
1550 
1551  Int x, y;
1552 
1553  Distortion uiSum = 0;
1554 
1555  if( ( iRows % 8 == 0) && (iCols % 8 == 0) )
1556  {
1557  Int iOffsetOrg = iStrideOrg<<3;
1558  Int iOffsetCur = iStrideCur<<3;
1559  for ( y=0; y<iRows; y+= 8 )
1560  {
1561  for ( x=0; x<iCols; x+= 8 )
1562  {
1563  uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1564  }
1565  piOrg += iOffsetOrg;
1566  piCur += iOffsetCur;
1567  }
1568  }
1569  else if( ( iRows % 4 == 0) && (iCols % 4 == 0) )
1570  {
1571  Int iOffsetOrg = iStrideOrg<<2;
1572  Int iOffsetCur = iStrideCur<<2;
1573 
1574  for ( y=0; y<iRows; y+= 4 )
1575  {
1576  for ( x=0; x<iCols; x+= 4 )
1577  {
1578  uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1579  }
1580  piOrg += iOffsetOrg;
1581  piCur += iOffsetCur;
1582  }
1583  }
1584  else if( ( iRows % 2 == 0) && (iCols % 2 == 0) )
1585  {
1586  Int iOffsetOrg = iStrideOrg<<1;
1587  Int iOffsetCur = iStrideCur<<1;
1588  for ( y=0; y<iRows; y+=2 )
1589  {
1590  for ( x=0; x<iCols; x+=2 )
1591  {
1592  uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1593  }
1594  piOrg += iOffsetOrg;
1595  piCur += iOffsetCur;
1596  }
1597  }
1598  else
1599  {
1600  assert(false);
1601  }
1602 
1603  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
1604 }
1605 
8xM SSE
Definition: TypeDef.h:429
8xM SAD with step
Definition: TypeDef.h:445
4xM HAD with step
Definition: TypeDef.h:452
Int getROIYHeight()
Definition: TComPattern.h:92
Int iCols
Definition: TComRdCost.h:75
static Distortion xCalcHADs2x2(Pel *piOrg, Pel *piCurr, Int iStrideOrg, Int iStrideCur, Int iStep)
Int iRows
Definition: TComRdCost.h:74
UInt Distortion
distortion measurement
Definition: TypeDef.h:703
8xM HAD with step
Definition: TypeDef.h:453
void Void
Definition: TypeDef.h:285
static Distortion xGetSAD48(DistParam *pcDtParam)
Definition: TComRdCost.cpp:890
Double m_dLambda
Definition: TComRdCost.h:112
Pel * pOrg
Definition: TComRdCost.h:70
virtual ~TComRdCost()
Definition: TComRdCost.cpp:51
static Distortion xGetHADs(DistParam *pcDtParam)
global variables & functions (header)
static Distortion xGetSSE4(DistParam *pcDtParam)
Int getROIYWidth()
Definition: TComPattern.h:91
#define NULL
Definition: CommonDef.h:100
4xM SAD with step
Definition: TypeDef.h:444
static Distortion xGetSAD64(DistParam *pcDtParam)
Definition: TComRdCost.cpp:798
neighbouring pixel access class for all components
Definition: TComPattern.h:80
Char g_aucConvertToBit[(1<<(6))+1]
Definition: TComRom.cpp:558
RD cost computation classes (header)
unsigned int UInt
Definition: TypeDef.h:297
16NxM HAD with step
Definition: TypeDef.h:457
Short Pel
pixel type
Definition: TypeDef.h:692
static Distortion xGetSSE(DistParam *pcDtParam)
Definition: TComRdCost.cpp:970
UInt m_uiLambdaMotionSSE[2]
Definition: TComRdCost.h:119
static Distortion xGetSAD32(DistParam *pcDtParam)
Definition: TComRdCost.cpp:685
UInt m_uiLambdaMotionSAD[2]
Definition: TComRdCost.h:118
Double calcRdCost64(UInt64 uiBits, UInt64 uiDistortion, Bool bFlag=false, DFunc eDFunc=DF_DEFAULT)
Definition: TComRdCost.cpp:125
general size SSE
Definition: TypeDef.h:427
16NxM SSE
Definition: TypeDef.h:433
static Distortion xGetSAD24(DistParam *pcDtParam)
Definition: TComRdCost.cpp:745
Pel * pCur
Definition: TComRdCost.h:71
4xM SSE
Definition: TypeDef.h:428
Distortion xGetHADsw(DistParam *pcDtParam)
get weighted Hadamard cost
Int m_iCostScale
Definition: TComRdCost.h:130
Double calcRdCost(UInt uiBits, Distortion uiDistortion, Bool bFlag=false, DFunc eDFunc=DF_DEFAULT)
Definition: TComRdCost.cpp:56
Distortion calcHAD(Int bitDepth, Pel *pi0, Int iStride0, Pel *pi1, Int iStride1, Int iWidth, Int iHeight)
Definition: TComRdCost.cpp:398
8xM SAD
Definition: TypeDef.h:437
Int Intermediate_Int
used as intermediate value in calculations
Definition: TypeDef.h:696
Void init()
Definition: TComRdCost.cpp:223
Int iSubShift
Definition: TComRdCost.h:86
Int iStep
Definition: TComRdCost.h:76
bool Bool
Definition: TypeDef.h:286
16NxM SAD with step
Definition: TypeDef.h:449
static Distortion xGetSAD16(DistParam *pcDtParam)
Definition: TComRdCost.cpp:559
long long Int64
Definition: TypeDef.h:317
static Distortion xGetSSE8(DistParam *pcDtParam)
distortion parameter class
Definition: TComRdCost.h:67
Int iStrideOrg
Definition: TComRdCost.h:72
static Distortion xGetSSE16(DistParam *pcDtParam)
static Distortion xGetSSE16N(DistParam *pcDtParam)
Double m_sqrtLambda
Definition: TComRdCost.h:113
UInt xGetComponentBits(Int iVal)
Definition: TComRdCost.cpp:278
static Distortion xCalcHADs8x8(Pel *piOrg, Pel *piCurr, Int iStrideOrg, Int iStrideCur, Int iStep)
static Distortion xGetSSE64(DistParam *pcDtParam)
static Distortion xGetSAD(DistParam *pcDtParam)
Definition: TComRdCost.cpp:465
general size SAD
Definition: TypeDef.h:435
Int TCoeff
transform coefficient
Definition: TypeDef.h:693
16xM SAD with step
Definition: TypeDef.h:446
ComponentID compIdx
Definition: TComRdCost.h:82
64xM HAD with step
Definition: TypeDef.h:456
Int getPatternLStride()
Definition: TComPattern.h:93
Bool bApplyWeight
Definition: TComRdCost.h:80
16NxM SAD
Definition: TypeDef.h:441
Distortion getDistPart(Int bitDepth, Pel *piCur, Int iCurStride, Pel *piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc=DF_SSE)
Definition: TComRdCost.cpp:433
Int iStrideCur
Definition: TComRdCost.h:73
Double m_distortionWeight[MAX_NUM_COMPONENT]
Definition: TComRdCost.h:111
Void setDistParam(UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam &rcDistParam)
Definition: TComRdCost.cpp:294
Void setLambda(Double dLambda)
Definition: TComRdCost.cpp:194
unsigned long long UInt64
Definition: TypeDef.h:318
static Distortion xGetSAD4(DistParam *pcDtParam)
Definition: TComRdCost.cpp:493
Double m_dFrameLambda
Definition: TComRdCost.h:121
static Distortion xGetSSE32(DistParam *pcDtParam)
32xM SAD
Definition: TypeDef.h:439
Int g_bitDepth[MAX_NUM_CHANNEL_TYPE]
Definition: TComRom.cpp:548
static Distortion xGetSAD16N(DistParam *pcDtParam)
Definition: TComRdCost.cpp:643
Int bitDepth
Definition: TComRdCost.h:78
32xM HAD with step
Definition: TypeDef.h:455
32xM SSE
Definition: TypeDef.h:431
static Distortion xCalcHADs4x4(Pel *piOrg, Pel *piCurr, Int iStrideOrg, Int iStrideCur, Int iStep)
CostMode m_costMode
Definition: TComRdCost.h:110
FpDistFunc DistFunc
Definition: TComRdCost.h:77
general size Hadamard with step
Definition: TypeDef.h:451
#define DISTORTION_PRECISION_ADJUSTMENT(x)
Definition: TypeDef.h:269
4xM SAD
Definition: TypeDef.h:436
int Int
Definition: TypeDef.h:296
static Bool isChroma(const ComponentID id)
ComponentID
Definition: TypeDef.h:368
FpDistFunc m_afpDistortFunc[DF_TOTAL_FUNCTIONS]
Definition: TComRdCost.h:109
Frame-based SSE.
Definition: TypeDef.h:468
Pel * getROIY()
Definition: TComPattern.h:90
static Distortion xGetSAD12(DistParam *pcDtParam)
Definition: TComRdCost.cpp:603
64xM SAD with step
Definition: TypeDef.h:448
double Double
Definition: TypeDef.h:298
16xM SSE
Definition: TypeDef.h:430
Distortion xGetSSEw(DistParam *pcDtParam)
64xM SAD
Definition: TypeDef.h:440
32xM SAD with step
Definition: TypeDef.h:447
static Distortion xGetSAD8(DistParam *pcDtParam)
Definition: TComRdCost.cpp:524
UInt m_uiCost
Definition: TComRdCost.h:128
16xM HAD with step
Definition: TypeDef.h:454
Distortion xGetSADw(DistParam *pcDtParam)
16xM SAD
Definition: TypeDef.h:438
#define LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME
QP' to use for mixed_lossy_lossless coding.
Definition: TypeDef.h:228
DFunc
distortion function index
Definition: TypeDef.h:424
64xM SSE
Definition: TypeDef.h:432
general size SAD with step
Definition: TypeDef.h:443