HEVC Test Model (HM)  HM-16.18
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TComRdCost.cpp
Go to the documentation of this file.
1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2017, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  * this list of conditions and the following disclaimer in the documentation
16  * and/or other materials provided with the distribution.
17  * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  * be used to endorse or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
38 #include <math.h>
39 #include <assert.h>
40 #include <limits>
41 #include "TComRom.h"
42 #include "TComRdCost.h"
43 
44 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
45 #include <emmintrin.h>
46 #include <xmmintrin.h>
47 #endif
48 
51 
53 {
54  init();
55 }
56 
58 {
59 }
60 
61 // Calculate RD functions
62 Double TComRdCost::calcRdCost( Double numBits, Double distortion, DFunc eDFunc )
63 {
64  Double lambda = 1.0;
65 
66  switch ( eDFunc )
67  {
68  case DF_SSE:
69  assert(0);
70  break;
71  case DF_SAD:
72  lambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
73  break;
74  case DF_DEFAULT:
75  lambda = m_dLambda;
76  break;
77  case DF_SSE_FRAME:
78  lambda = m_dFrameLambda;
79  break;
80  default:
81  assert (0);
82  break;
83  }
84 
85  if (eDFunc == DF_SAD)
86  {
88  {
89  return ((distortion * 65536.0) / lambda) + numBits; // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
90  }
91  else
92  {
93  return distortion + (((numBits * lambda) ) / 65536.0);
94  }
95  }
96  else
97  {
99  {
100  return (distortion / lambda) + numBits; // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
101  }
102  else
103  {
104  return distortion + (numBits * lambda);
105  }
106  }
107 }
108 
109 Void TComRdCost::setLambda( Double dLambda, const BitDepths &bitDepths )
110 {
111  m_dLambda = dLambda;
112  m_sqrtLambda = sqrt(m_dLambda);
113  m_dLambdaMotionSAD[0] = 65536.0 * m_sqrtLambda;
114  m_dLambdaMotionSSE[0] = 65536.0 * m_dLambda;
115 #if FULL_NBIT
116  dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
117 #else
118  dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (bitDepths.recon[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
119 #endif
120  m_dLambdaMotionSAD[1] = 65536.0 * sqrt(dLambda);
121  m_dLambdaMotionSSE[1] = 65536.0 * dLambda;
122 }
123 
124 
125 // Initalize Function Pointer by [eDFunc]
127 {
128  m_afpDistortFunc[DF_DEFAULT] = NULL; // for DF_DEFAULT
129 
137 
145 
153 
157 
161 
169 
171 
172  m_motionLambda = 0;
173  m_iCostScale = 0;
174 }
175 
176 // Static member function
178 {
179  assert(iVal != std::numeric_limits<Int>::min());
180  UInt uiLength = 1;
181  UInt uiTemp = ( iVal <= 0) ? (UInt(-iVal)<<1)+1: UInt(iVal<<1);
182 
183  while ( 1 != uiTemp )
184  {
185  uiTemp >>= 1;
186  uiLength += 2;
187  }
188 
189  return uiLength;
190 }
191 
192 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam )
193 {
194  // set Block Width / Height
195  rcDistParam.iCols = uiBlkWidth;
196  rcDistParam.iRows = uiBlkHeight;
197  rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
198 
199  // initialize
200  rcDistParam.iSubShift = 0;
201  rcDistParam.m_maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
202 }
203 
204 // Setting the Distortion Parameter for Inter (ME)
205 Void TComRdCost::setDistParam( const TComPattern* const pcPatternKey, const Pel* piRefY, Int iRefStride, DistParam& rcDistParam )
206 {
207  // set Original & Curr Pointer / Stride
208  rcDistParam.pOrg = pcPatternKey->getROIY();
209  rcDistParam.pCur = piRefY;
210 
211  rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
212  rcDistParam.iStrideCur = iRefStride;
213 
214  // set Block Width / Height
215  rcDistParam.iCols = pcPatternKey->getROIYWidth();
216  rcDistParam.iRows = pcPatternKey->getROIYHeight();
217  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
218  rcDistParam.m_maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
219 
220  if (rcDistParam.iCols == 12)
221  {
222  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD12];
223  }
224  else if (rcDistParam.iCols == 24)
225  {
226  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD24];
227  }
228  else if (rcDistParam.iCols == 48)
229  {
230  rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD48];
231  }
232 
233  // initialize
234  rcDistParam.iSubShift = 0;
235 }
236 
237 // Setting the Distortion Parameter for Inter (subpel ME with step)
238 Void TComRdCost::setDistParam( const TComPattern* const pcPatternKey, const Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME )
239 {
240  // set Original & Curr Pointer / Stride
241  rcDistParam.pOrg = pcPatternKey->getROIY();
242  rcDistParam.pCur = piRefY;
243 
244  rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
245  rcDistParam.iStrideCur = iRefStride * iStep;
246 
247  // set Step for interpolated buffer
248  rcDistParam.iStep = iStep;
249 
250  // set Block Width / Height
251  rcDistParam.iCols = pcPatternKey->getROIYWidth();
252  rcDistParam.iRows = pcPatternKey->getROIYHeight();
253 
254  rcDistParam.m_maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
255 
256  // set distortion function
257  if ( !bHADME )
258  {
259  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
260  if (rcDistParam.iCols == 12)
261  {
262  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS12];
263  }
264  else if (rcDistParam.iCols == 24)
265  {
266  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS24];
267  }
268  else if (rcDistParam.iCols == 48)
269  {
270  rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS48];
271  }
272  }
273  else
274  {
275  rcDistParam.DistFunc = m_afpDistortFunc[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
276  }
277 
278  // initialize
279  rcDistParam.iSubShift = 0;
280 }
281 
282 Void TComRdCost::setDistParam( DistParam& rcDP, Int bitDepth, const Pel* p1, Int iStride1, const Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard )
283 {
284  rcDP.pOrg = p1;
285  rcDP.pCur = p2;
286  rcDP.iStrideOrg = iStride1;
287  rcDP.iStrideCur = iStride2;
288  rcDP.iCols = iWidth;
289  rcDP.iRows = iHeight;
290  rcDP.iStep = 1;
291  rcDP.iSubShift = 0;
292  rcDP.bitDepth = bitDepth;
293  rcDP.DistFunc = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ];
294  rcDP.m_maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
295 }
296 
297 Distortion TComRdCost::calcHAD( Int bitDepth, const Pel* pi0, Int iStride0, const Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
298 {
299  Distortion uiSum = 0;
300  Int x, y;
301 
302  if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
303  {
304  for ( y=0; y<iHeight; y+= 8 )
305  {
306  for ( x=0; x<iWidth; x+= 8 )
307  {
308  uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1
310  , bitDepth
311 #endif
312  );
313  }
314  pi0 += iStride0*8;
315  pi1 += iStride1*8;
316  }
317  }
318  else
319  {
320  assert ( ( (iWidth % 4) == 0 ) && ( (iHeight % 4) == 0 ) );
321 
322  for ( y=0; y<iHeight; y+= 4 )
323  {
324  for ( x=0; x<iWidth; x+= 4 )
325  {
326  uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
327  }
328  pi0 += iStride0*4;
329  pi1 += iStride1*4;
330  }
331  }
332 
333  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8) );
334 }
335 
336 Distortion TComRdCost::getDistPart( Int bitDepth, const Pel* piCur, Int iCurStride, const Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc )
337 {
338  DistParam cDtParam;
339  setDistParam( uiBlkWidth, uiBlkHeight, eDFunc, cDtParam );
340  cDtParam.pOrg = piOrg;
341  cDtParam.pCur = piCur;
342  cDtParam.iStrideOrg = iOrgStride;
343  cDtParam.iStrideCur = iCurStride;
344  cDtParam.iStep = 1;
345 
346  cDtParam.bApplyWeight = false;
347  cDtParam.compIdx = MAX_NUM_COMPONENT; // just for assert: to be sure it was set before use
348  cDtParam.bitDepth = bitDepth;
349 
350  if (isChroma(compID))
351  {
352  return ((Distortion) (m_distortionWeight[compID] * cDtParam.DistFunc( &cDtParam )));
353  }
354  else
355  {
356  return cDtParam.DistFunc( &cDtParam );
357  }
358 }
359 
360 // ====================================================================================================================
361 // Distortion functions
362 // ====================================================================================================================
363 
364 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
365 inline Int simdSADLine4n16b( const Pel * piOrg , const Pel * piCur , Int nWidth )
366 {
367  // internal bit-depth must be 12-bit or lower
368  assert( !( nWidth & 0x03 ) );
369  __m128i org , cur , abs , sum;
370  sum = _mm_setzero_si128();
371  for( Int n = 0 ; n < nWidth ; n += 4 )
372  {
373  org = _mm_loadl_epi64( ( __m128i* )( piOrg + n ) );
374  cur = _mm_loadl_epi64( ( __m128i* )( piCur + n ) );
375  abs = _mm_subs_epi16( _mm_max_epi16( org , cur ) , _mm_min_epi16( org , cur ) );
376  sum = _mm_adds_epu16( abs , sum );
377  }
378  __m128i zero = _mm_setzero_si128();
379  sum = _mm_unpacklo_epi16( sum , zero );
380  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) );
381  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) );
382  return( _mm_cvtsi128_si32( sum ) );
383 }
384 
385 inline Int simdSADLine8n16b( const Pel * piOrg , const Pel * piCur , Int nWidth )
386 {
387  // internal bit-depth must be 12-bit or lower
388  assert( !( nWidth & 0x07 ) );
389  __m128i org , cur , abs , sum;
390  sum = _mm_setzero_si128();
391  for( Int n = 0 ; n < nWidth ; n += 8 )
392  {
393  org = _mm_loadu_si128( ( __m128i* )( piOrg + n ) );
394  cur = _mm_loadu_si128( ( __m128i* )( piCur + n ) );
395  abs = _mm_subs_epi16( _mm_max_epi16( org , cur ) , _mm_min_epi16( org , cur ) );
396  sum = _mm_adds_epu16( abs , sum );
397  }
398  __m128i zero = _mm_setzero_si128();
399  __m128i hi = _mm_unpackhi_epi16( sum , zero );
400  __m128i lo = _mm_unpacklo_epi16( sum , zero );
401  sum = _mm_add_epi32( lo , hi );
402  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) );
403  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) );
404  return( _mm_cvtsi128_si32( sum ) );
405 }
406 
407 inline Void simd8x8Transpose32b( __m128i * pBuffer )
408 {
409  __m128 tmp[16];
410  for( Int n = 0 ; n < 16 ; n++ )
411  {
412  tmp[n] = _mm_castsi128_ps( pBuffer[n] );
413  }
414  _MM_TRANSPOSE4_PS( tmp[0] , tmp[2] , tmp[4] , tmp[6] );
415  _MM_TRANSPOSE4_PS( tmp[1] , tmp[3] , tmp[5] , tmp[7] );
416  _MM_TRANSPOSE4_PS( tmp[8] , tmp[10] , tmp[12] , tmp[14] );
417  _MM_TRANSPOSE4_PS( tmp[9] , tmp[11] , tmp[13] , tmp[15] );
418  for( Int n = 0 ; n < 8 ; n += 2 )
419  {
420  pBuffer[n] = _mm_castps_si128( tmp[n] );
421  pBuffer[n+1] = _mm_castps_si128( tmp[n+8] );
422  pBuffer[n+8] = _mm_castps_si128( tmp[n+1] );
423  pBuffer[n+9] = _mm_castps_si128( tmp[n+9] );
424  }
425 }
426 
427 #ifdef __GNUC__
428 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
429 #if GCC_VERSION > 40600 && GCC_VERSION < 40700
430 __attribute__((optimize("no-tree-vrp")))
431 #endif
432 #endif
433 Void simd8x8HAD1D32b( __m128i * pInput , __m128i * pOutput )
434 {
435  __m128i m1[8][2] , m2[8][2];
436 
437  m2[0][0] = _mm_add_epi32( pInput[0] ,pInput[8 ] ); m2[0][1] = _mm_add_epi32( pInput[1] ,pInput[9 ] );
438  m2[1][0] = _mm_add_epi32( pInput[2] ,pInput[10] ); m2[1][1] = _mm_add_epi32( pInput[3] ,pInput[11] );
439  m2[2][0] = _mm_add_epi32( pInput[4] ,pInput[12] ); m2[2][1] = _mm_add_epi32( pInput[5] ,pInput[13] );
440  m2[3][0] = _mm_add_epi32( pInput[6] ,pInput[14] ); m2[3][1] = _mm_add_epi32( pInput[7] ,pInput[15] );
441  m2[4][0] = _mm_sub_epi32( pInput[0] ,pInput[8 ] ); m2[4][1] = _mm_sub_epi32( pInput[1] ,pInput[9 ] );
442  m2[5][0] = _mm_sub_epi32( pInput[2] ,pInput[10] ); m2[5][1] = _mm_sub_epi32( pInput[3] ,pInput[11] );
443  m2[6][0] = _mm_sub_epi32( pInput[4] ,pInput[12] ); m2[6][1] = _mm_sub_epi32( pInput[5] ,pInput[13] );
444  m2[7][0] = _mm_sub_epi32( pInput[6] ,pInput[14] ); m2[7][1] = _mm_sub_epi32( pInput[7] ,pInput[15] );
445 
446  m1[0][0] = _mm_add_epi32( m2[0][0] , m2[2][0] ); m1[0][1] = _mm_add_epi32( m2[0][1] , m2[2][1] );
447  m1[1][0] = _mm_add_epi32( m2[1][0] , m2[3][0] ); m1[1][1] = _mm_add_epi32( m2[1][1] , m2[3][1] );
448  m1[2][0] = _mm_sub_epi32( m2[0][0] , m2[2][0] ); m1[2][1] = _mm_sub_epi32( m2[0][1] , m2[2][1] );
449  m1[3][0] = _mm_sub_epi32( m2[1][0] , m2[3][0] ); m1[3][1] = _mm_sub_epi32( m2[1][1] , m2[3][1] );
450  m1[4][0] = _mm_add_epi32( m2[4][0] , m2[6][0] ); m1[4][1] = _mm_add_epi32( m2[4][1] , m2[6][1] );
451  m1[5][0] = _mm_add_epi32( m2[5][0] , m2[7][0] ); m1[5][1] = _mm_add_epi32( m2[5][1] , m2[7][1] );
452  m1[6][0] = _mm_sub_epi32( m2[4][0] , m2[6][0] ); m1[6][1] = _mm_sub_epi32( m2[4][1] , m2[6][1] );
453  m1[7][0] = _mm_sub_epi32( m2[5][0] , m2[7][0] ); m1[7][1] = _mm_sub_epi32( m2[5][1] , m2[7][1] );
454 
455  pInput[0 ] = _mm_add_epi32( m1[0][0] , m1[1][0] ); pInput[1 ] = _mm_add_epi32( m1[0][1] , m1[1][1] );
456  pInput[2 ] = _mm_sub_epi32( m1[0][0] , m1[1][0] ); pInput[3 ] = _mm_sub_epi32( m1[0][1] , m1[1][1] );
457  pInput[4 ] = _mm_add_epi32( m1[2][0] , m1[3][0] ); pInput[5 ] = _mm_add_epi32( m1[2][1] , m1[3][1] );
458  pInput[6 ] = _mm_sub_epi32( m1[2][0] , m1[3][0] ); pInput[7 ] = _mm_sub_epi32( m1[2][1] , m1[3][1] );
459  pInput[8 ] = _mm_add_epi32( m1[4][0] , m1[5][0] ); pInput[9 ] = _mm_add_epi32( m1[4][1] , m1[5][1] );
460  pInput[10] = _mm_sub_epi32( m1[4][0] , m1[5][0] ); pInput[11] = _mm_sub_epi32( m1[4][1] , m1[5][1] );
461  pInput[12] = _mm_add_epi32( m1[6][0] , m1[7][0] ); pInput[13] = _mm_add_epi32( m1[6][1] , m1[7][1] );
462  pInput[14] = _mm_sub_epi32( m1[6][0] , m1[7][0] ); pInput[15] = _mm_sub_epi32( m1[6][1] , m1[7][1] );
463 }
464 
465 inline __m128i simdAbs32b( __m128i m )
466 {
467  const __m128i zero = _mm_setzero_si128();
468  __m128i tmp = _mm_sub_epi32( zero , m );
469  __m128i mask = _mm_cmpgt_epi32( m , tmp );
470  return( _mm_or_si128( _mm_and_si128( mask , m ) , _mm_andnot_si128( mask , tmp ) ) );
471 }
472 
473 UInt simdHADs8x8( const Pel * piOrg, const Pel * piCur, Int iStrideOrg, Int iStrideCur )
474 {
475  __m128i mmDiff[8][2];
476  __m128i mmZero = _mm_setzero_si128();
477  for( Int n = 0 ; n < 8 ; n++ , piOrg += iStrideOrg , piCur += iStrideCur )
478  {
479  __m128i diff = _mm_sub_epi16( _mm_loadu_si128( ( __m128i* )piOrg ) , _mm_loadu_si128( ( __m128i* )piCur ) );
480  // sign extension
481  __m128i mask = _mm_cmplt_epi16( diff , mmZero );
482  mmDiff[n][0] = _mm_unpacklo_epi16( diff , mask );
483  mmDiff[n][1] = _mm_unpackhi_epi16( diff , mask );
484  }
485 
486  // transpose
487  simd8x8Transpose32b( &mmDiff[0][0] );
488 
489  // horizontal
490  simd8x8HAD1D32b( &mmDiff[0][0] , &mmDiff[0][0] );
491 
492  // transpose
493  simd8x8Transpose32b( &mmDiff[0][0] );
494 
495  // vertical
496  simd8x8HAD1D32b( &mmDiff[0][0] , &mmDiff[0][0] );
497 
498  __m128i mmSum = _mm_setzero_si128();
499  for( Int n = 0 ; n < 8 ; n++ )
500  {
501  mmSum = _mm_add_epi32( mmSum , simdAbs32b( mmDiff[n][0] ) );
502  mmSum = _mm_add_epi32( mmSum , simdAbs32b( mmDiff[n][1] ) );
503  }
504  mmSum = _mm_add_epi32( mmSum , _mm_shuffle_epi32( mmSum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) );
505  mmSum = _mm_add_epi32( mmSum , _mm_shuffle_epi32( mmSum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) );
506 
507  UInt sad = _mm_cvtsi128_si32( mmSum );
508  sad = ( sad + 2 ) >> 2;
509 
510  return( sad );
511 }
512 #endif
513 
514 // --------------------------------------------------------------------------------------------------------------------
515 // SAD
516 // --------------------------------------------------------------------------------------------------------------------
517 
519 {
520  if ( pcDtParam->bApplyWeight )
521  {
522  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
523  }
524  const Pel* piOrg = pcDtParam->pOrg;
525  const Pel* piCur = pcDtParam->pCur;
526  const Int iCols = pcDtParam->iCols;
527  const Int iStrideCur = pcDtParam->iStrideCur;
528  const Int iStrideOrg = pcDtParam->iStrideOrg;
529  const UInt distortionShift = DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth - 8);
530 
531  Distortion uiSum = 0;
532 
533 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
534  if( pcDtParam->bitDepth <= 10 )
535  {
536  if( ( iCols & 0x07 ) == 0 )
537  {
538  for( Int iRows = pcDtParam->iRows ; iRows != 0; iRows-- )
539  {
540  uiSum += simdSADLine8n16b( piOrg , piCur , iCols );
541  piOrg += iStrideOrg;
542  piCur += iStrideCur;
543  }
544  }
545  else
546  {
547  for( Int iRows = pcDtParam->iRows; iRows != 0; iRows-- )
548  {
549  uiSum += simdSADLine4n16b( piOrg , piCur , iCols );
550  piOrg += iStrideOrg;
551  piCur += iStrideCur;
552  }
553  }
554  }
555  else
556  {
557 #endif
558  for(Int iRows = pcDtParam->iRows ; iRows != 0; iRows-- )
559  {
560  for (Int n = 0; n < iCols; n++ )
561  {
562  uiSum += abs( piOrg[n] - piCur[n] );
563  }
564  if (pcDtParam->m_maximumDistortionForEarlyExit < ( uiSum >> distortionShift ))
565  {
566  return ( uiSum >> distortionShift );
567  }
568  piOrg += iStrideOrg;
569  piCur += iStrideCur;
570  }
571 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
572  }
573 #endif
574 
575  return ( uiSum >> distortionShift );
576 }
577 
579 {
580  if ( pcDtParam->bApplyWeight )
581  {
582  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
583  }
584  const Pel* piOrg = pcDtParam->pOrg;
585  const Pel* piCur = pcDtParam->pCur;
586  Int iRows = pcDtParam->iRows;
587  Int iSubShift = pcDtParam->iSubShift;
588  Int iSubStep = ( 1 << iSubShift );
589  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
590  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
591 
592  Distortion uiSum = 0;
593 
594 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
595  if( pcDtParam->bitDepth <= 10 )
596  {
597  for( ; iRows != 0; iRows-=iSubStep )
598  {
599  uiSum += simdSADLine4n16b( piOrg , piCur , 4 );
600  piOrg += iStrideOrg;
601  piCur += iStrideCur;
602  }
603  }
604  else
605  {
606 #endif
607  for( ; iRows != 0; iRows-=iSubStep )
608  {
609  uiSum += abs( piOrg[0] - piCur[0] );
610  uiSum += abs( piOrg[1] - piCur[1] );
611  uiSum += abs( piOrg[2] - piCur[2] );
612  uiSum += abs( piOrg[3] - piCur[3] );
613 
614  piOrg += iStrideOrg;
615  piCur += iStrideCur;
616  }
617 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
618  }
619 #endif
620 
621  uiSum <<= iSubShift;
622  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
623 }
624 
626 {
627  if ( pcDtParam->bApplyWeight )
628  {
629  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
630  }
631  const Pel* piOrg = pcDtParam->pOrg;
632  const Pel* piCur = pcDtParam->pCur;
633  Int iRows = pcDtParam->iRows;
634  Int iSubShift = pcDtParam->iSubShift;
635  Int iSubStep = ( 1 << iSubShift );
636  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
637  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
638 
639  Distortion uiSum = 0;
640 
641 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
642  if( pcDtParam->bitDepth <= 10 )
643  {
644  for( ; iRows != 0; iRows-=iSubStep )
645  {
646  uiSum += simdSADLine8n16b( piOrg , piCur , 8 );
647  piOrg += iStrideOrg;
648  piCur += iStrideCur;
649  }
650  }
651  else
652  {
653 #endif
654  for( ; iRows != 0; iRows-=iSubStep )
655  {
656  uiSum += abs( piOrg[0] - piCur[0] );
657  uiSum += abs( piOrg[1] - piCur[1] );
658  uiSum += abs( piOrg[2] - piCur[2] );
659  uiSum += abs( piOrg[3] - piCur[3] );
660  uiSum += abs( piOrg[4] - piCur[4] );
661  uiSum += abs( piOrg[5] - piCur[5] );
662  uiSum += abs( piOrg[6] - piCur[6] );
663  uiSum += abs( piOrg[7] - piCur[7] );
664 
665  piOrg += iStrideOrg;
666  piCur += iStrideCur;
667  }
668 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
669  }
670 #endif
671 
672  uiSum <<= iSubShift;
673  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
674 }
675 
677 {
678  if ( pcDtParam->bApplyWeight )
679  {
680  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
681  }
682  const Pel* piOrg = pcDtParam->pOrg;
683  const Pel* piCur = pcDtParam->pCur;
684  Int iRows = pcDtParam->iRows;
685  Int iSubShift = pcDtParam->iSubShift;
686  Int iSubStep = ( 1 << iSubShift );
687  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
688  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
689 
690  Distortion uiSum = 0;
691 
692 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
693  if( pcDtParam->bitDepth <= 10 )
694  {
695  for( ; iRows != 0; iRows-=iSubStep )
696  {
697  uiSum += simdSADLine8n16b( piOrg , piCur , 16 );
698  piOrg += iStrideOrg;
699  piCur += iStrideCur;
700  }
701  }
702  else
703  {
704 #endif
705  for( ; iRows != 0; iRows-=iSubStep )
706  {
707  uiSum += abs( piOrg[0] - piCur[0] );
708  uiSum += abs( piOrg[1] - piCur[1] );
709  uiSum += abs( piOrg[2] - piCur[2] );
710  uiSum += abs( piOrg[3] - piCur[3] );
711  uiSum += abs( piOrg[4] - piCur[4] );
712  uiSum += abs( piOrg[5] - piCur[5] );
713  uiSum += abs( piOrg[6] - piCur[6] );
714  uiSum += abs( piOrg[7] - piCur[7] );
715  uiSum += abs( piOrg[8] - piCur[8] );
716  uiSum += abs( piOrg[9] - piCur[9] );
717  uiSum += abs( piOrg[10] - piCur[10] );
718  uiSum += abs( piOrg[11] - piCur[11] );
719  uiSum += abs( piOrg[12] - piCur[12] );
720  uiSum += abs( piOrg[13] - piCur[13] );
721  uiSum += abs( piOrg[14] - piCur[14] );
722  uiSum += abs( piOrg[15] - piCur[15] );
723 
724  piOrg += iStrideOrg;
725  piCur += iStrideCur;
726  }
727 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
728  }
729 #endif
730 
731  uiSum <<= iSubShift;
732  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
733 }
734 
736 {
737  if ( pcDtParam->bApplyWeight )
738  {
739  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
740  }
741  const Pel* piOrg = pcDtParam->pOrg;
742  const Pel* piCur = pcDtParam->pCur;
743  Int iRows = pcDtParam->iRows;
744  Int iSubShift = pcDtParam->iSubShift;
745  Int iSubStep = ( 1 << iSubShift );
746  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
747  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
748 
749  Distortion uiSum = 0;
750 
751  for( ; iRows != 0; iRows-=iSubStep )
752  {
753  uiSum += abs( piOrg[0] - piCur[0] );
754  uiSum += abs( piOrg[1] - piCur[1] );
755  uiSum += abs( piOrg[2] - piCur[2] );
756  uiSum += abs( piOrg[3] - piCur[3] );
757  uiSum += abs( piOrg[4] - piCur[4] );
758  uiSum += abs( piOrg[5] - piCur[5] );
759  uiSum += abs( piOrg[6] - piCur[6] );
760  uiSum += abs( piOrg[7] - piCur[7] );
761  uiSum += abs( piOrg[8] - piCur[8] );
762  uiSum += abs( piOrg[9] - piCur[9] );
763  uiSum += abs( piOrg[10] - piCur[10] );
764  uiSum += abs( piOrg[11] - piCur[11] );
765 
766  piOrg += iStrideOrg;
767  piCur += iStrideCur;
768  }
769 
770  uiSum <<= iSubShift;
771  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
772 }
773 
775 {
776  const Pel* piOrg = pcDtParam->pOrg;
777  const Pel* piCur = pcDtParam->pCur;
778  Int iRows = pcDtParam->iRows;
779  Int iCols = pcDtParam->iCols;
780  Int iSubShift = pcDtParam->iSubShift;
781  Int iSubStep = ( 1 << iSubShift );
782  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
783  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
784 
785  Distortion uiSum = 0;
786 
787 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
788  if( pcDtParam->bitDepth <= 10 )
789  {
790  for( ; iRows != 0; iRows-=iSubStep )
791  {
792  uiSum += simdSADLine8n16b( piOrg , piCur , iCols );
793  piOrg += iStrideOrg;
794  piCur += iStrideCur;
795  }
796  }
797  else
798  {
799 #endif
800  for( ; iRows != 0; iRows-=iSubStep )
801  {
802  for (Int n = 0; n < iCols; n+=16 )
803  {
804  uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] );
805  uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] );
806  uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] );
807  uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] );
808  uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] );
809  uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] );
810  uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] );
811  uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] );
812  uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] );
813  uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] );
814  uiSum += abs( piOrg[n+10] - piCur[n+10] );
815  uiSum += abs( piOrg[n+11] - piCur[n+11] );
816  uiSum += abs( piOrg[n+12] - piCur[n+12] );
817  uiSum += abs( piOrg[n+13] - piCur[n+13] );
818  uiSum += abs( piOrg[n+14] - piCur[n+14] );
819  uiSum += abs( piOrg[n+15] - piCur[n+15] );
820  }
821  piOrg += iStrideOrg;
822  piCur += iStrideCur;
823  }
824 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
825  }
826 #endif
827 
828  uiSum <<= iSubShift;
829  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
830 }
831 
833 {
834  if ( pcDtParam->bApplyWeight )
835  {
836  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
837  }
838  const Pel* piOrg = pcDtParam->pOrg;
839  const Pel* piCur = pcDtParam->pCur;
840  Int iRows = pcDtParam->iRows;
841  Int iSubShift = pcDtParam->iSubShift;
842  Int iSubStep = ( 1 << iSubShift );
843  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
844  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
845 
846  Distortion uiSum = 0;
847 
848 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
849  if( pcDtParam->bitDepth <= 10 )
850  {
851  for( ; iRows != 0; iRows-=iSubStep )
852  {
853  uiSum += simdSADLine8n16b( piOrg , piCur , 32 );
854  piOrg += iStrideOrg;
855  piCur += iStrideCur;
856  }
857  }
858  else
859  {
860 #endif
861  for( ; iRows != 0; iRows-=iSubStep )
862  {
863  uiSum += abs( piOrg[0] - piCur[0] );
864  uiSum += abs( piOrg[1] - piCur[1] );
865  uiSum += abs( piOrg[2] - piCur[2] );
866  uiSum += abs( piOrg[3] - piCur[3] );
867  uiSum += abs( piOrg[4] - piCur[4] );
868  uiSum += abs( piOrg[5] - piCur[5] );
869  uiSum += abs( piOrg[6] - piCur[6] );
870  uiSum += abs( piOrg[7] - piCur[7] );
871  uiSum += abs( piOrg[8] - piCur[8] );
872  uiSum += abs( piOrg[9] - piCur[9] );
873  uiSum += abs( piOrg[10] - piCur[10] );
874  uiSum += abs( piOrg[11] - piCur[11] );
875  uiSum += abs( piOrg[12] - piCur[12] );
876  uiSum += abs( piOrg[13] - piCur[13] );
877  uiSum += abs( piOrg[14] - piCur[14] );
878  uiSum += abs( piOrg[15] - piCur[15] );
879  uiSum += abs( piOrg[16] - piCur[16] );
880  uiSum += abs( piOrg[17] - piCur[17] );
881  uiSum += abs( piOrg[18] - piCur[18] );
882  uiSum += abs( piOrg[19] - piCur[19] );
883  uiSum += abs( piOrg[20] - piCur[20] );
884  uiSum += abs( piOrg[21] - piCur[21] );
885  uiSum += abs( piOrg[22] - piCur[22] );
886  uiSum += abs( piOrg[23] - piCur[23] );
887  uiSum += abs( piOrg[24] - piCur[24] );
888  uiSum += abs( piOrg[25] - piCur[25] );
889  uiSum += abs( piOrg[26] - piCur[26] );
890  uiSum += abs( piOrg[27] - piCur[27] );
891  uiSum += abs( piOrg[28] - piCur[28] );
892  uiSum += abs( piOrg[29] - piCur[29] );
893  uiSum += abs( piOrg[30] - piCur[30] );
894  uiSum += abs( piOrg[31] - piCur[31] );
895 
896  piOrg += iStrideOrg;
897  piCur += iStrideCur;
898  }
899 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
900  }
901 #endif
902 
903  uiSum <<= iSubShift;
904  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
905 }
906 
908 {
909  if ( pcDtParam->bApplyWeight )
910  {
911  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
912  }
913  const Pel* piOrg = pcDtParam->pOrg;
914  const Pel* piCur = pcDtParam->pCur;
915  Int iRows = pcDtParam->iRows;
916  Int iSubShift = pcDtParam->iSubShift;
917  Int iSubStep = ( 1 << iSubShift );
918  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
919  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
920 
921  Distortion uiSum = 0;
922 
923 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
924  if( pcDtParam->bitDepth <= 10 )
925  {
926  for( ; iRows != 0; iRows-=iSubStep )
927  {
928  uiSum += simdSADLine8n16b( piOrg , piCur , 24 );
929  piOrg += iStrideOrg;
930  piCur += iStrideCur;
931  }
932  }
933  else
934  {
935 #endif
936  for( ; iRows != 0; iRows-=iSubStep )
937  {
938  uiSum += abs( piOrg[0] - piCur[0] );
939  uiSum += abs( piOrg[1] - piCur[1] );
940  uiSum += abs( piOrg[2] - piCur[2] );
941  uiSum += abs( piOrg[3] - piCur[3] );
942  uiSum += abs( piOrg[4] - piCur[4] );
943  uiSum += abs( piOrg[5] - piCur[5] );
944  uiSum += abs( piOrg[6] - piCur[6] );
945  uiSum += abs( piOrg[7] - piCur[7] );
946  uiSum += abs( piOrg[8] - piCur[8] );
947  uiSum += abs( piOrg[9] - piCur[9] );
948  uiSum += abs( piOrg[10] - piCur[10] );
949  uiSum += abs( piOrg[11] - piCur[11] );
950  uiSum += abs( piOrg[12] - piCur[12] );
951  uiSum += abs( piOrg[13] - piCur[13] );
952  uiSum += abs( piOrg[14] - piCur[14] );
953  uiSum += abs( piOrg[15] - piCur[15] );
954  uiSum += abs( piOrg[16] - piCur[16] );
955  uiSum += abs( piOrg[17] - piCur[17] );
956  uiSum += abs( piOrg[18] - piCur[18] );
957  uiSum += abs( piOrg[19] - piCur[19] );
958  uiSum += abs( piOrg[20] - piCur[20] );
959  uiSum += abs( piOrg[21] - piCur[21] );
960  uiSum += abs( piOrg[22] - piCur[22] );
961  uiSum += abs( piOrg[23] - piCur[23] );
962 
963  piOrg += iStrideOrg;
964  piCur += iStrideCur;
965  }
966 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
967  }
968 #endif
969 
970  uiSum <<= iSubShift;
971  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
972 }
973 
975 {
976  if ( pcDtParam->bApplyWeight )
977  {
978  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
979  }
980  const Pel* piOrg = pcDtParam->pOrg;
981  const Pel* piCur = pcDtParam->pCur;
982  Int iRows = pcDtParam->iRows;
983  Int iSubShift = pcDtParam->iSubShift;
984  Int iSubStep = ( 1 << iSubShift );
985  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
986  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
987 
988  Distortion uiSum = 0;
989 
990 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
991  if( pcDtParam->bitDepth <= 10 )
992  {
993  for( ; iRows != 0; iRows-=iSubStep )
994  {
995  uiSum += simdSADLine8n16b( piOrg , piCur , 64 );
996  piOrg += iStrideOrg;
997  piCur += iStrideCur;
998  }
999  }
1000  else
1001  {
1002 #endif
1003  for( ; iRows != 0; iRows-=iSubStep )
1004  {
1005  uiSum += abs( piOrg[0] - piCur[0] );
1006  uiSum += abs( piOrg[1] - piCur[1] );
1007  uiSum += abs( piOrg[2] - piCur[2] );
1008  uiSum += abs( piOrg[3] - piCur[3] );
1009  uiSum += abs( piOrg[4] - piCur[4] );
1010  uiSum += abs( piOrg[5] - piCur[5] );
1011  uiSum += abs( piOrg[6] - piCur[6] );
1012  uiSum += abs( piOrg[7] - piCur[7] );
1013  uiSum += abs( piOrg[8] - piCur[8] );
1014  uiSum += abs( piOrg[9] - piCur[9] );
1015  uiSum += abs( piOrg[10] - piCur[10] );
1016  uiSum += abs( piOrg[11] - piCur[11] );
1017  uiSum += abs( piOrg[12] - piCur[12] );
1018  uiSum += abs( piOrg[13] - piCur[13] );
1019  uiSum += abs( piOrg[14] - piCur[14] );
1020  uiSum += abs( piOrg[15] - piCur[15] );
1021  uiSum += abs( piOrg[16] - piCur[16] );
1022  uiSum += abs( piOrg[17] - piCur[17] );
1023  uiSum += abs( piOrg[18] - piCur[18] );
1024  uiSum += abs( piOrg[19] - piCur[19] );
1025  uiSum += abs( piOrg[20] - piCur[20] );
1026  uiSum += abs( piOrg[21] - piCur[21] );
1027  uiSum += abs( piOrg[22] - piCur[22] );
1028  uiSum += abs( piOrg[23] - piCur[23] );
1029  uiSum += abs( piOrg[24] - piCur[24] );
1030  uiSum += abs( piOrg[25] - piCur[25] );
1031  uiSum += abs( piOrg[26] - piCur[26] );
1032  uiSum += abs( piOrg[27] - piCur[27] );
1033  uiSum += abs( piOrg[28] - piCur[28] );
1034  uiSum += abs( piOrg[29] - piCur[29] );
1035  uiSum += abs( piOrg[30] - piCur[30] );
1036  uiSum += abs( piOrg[31] - piCur[31] );
1037  uiSum += abs( piOrg[32] - piCur[32] );
1038  uiSum += abs( piOrg[33] - piCur[33] );
1039  uiSum += abs( piOrg[34] - piCur[34] );
1040  uiSum += abs( piOrg[35] - piCur[35] );
1041  uiSum += abs( piOrg[36] - piCur[36] );
1042  uiSum += abs( piOrg[37] - piCur[37] );
1043  uiSum += abs( piOrg[38] - piCur[38] );
1044  uiSum += abs( piOrg[39] - piCur[39] );
1045  uiSum += abs( piOrg[40] - piCur[40] );
1046  uiSum += abs( piOrg[41] - piCur[41] );
1047  uiSum += abs( piOrg[42] - piCur[42] );
1048  uiSum += abs( piOrg[43] - piCur[43] );
1049  uiSum += abs( piOrg[44] - piCur[44] );
1050  uiSum += abs( piOrg[45] - piCur[45] );
1051  uiSum += abs( piOrg[46] - piCur[46] );
1052  uiSum += abs( piOrg[47] - piCur[47] );
1053  uiSum += abs( piOrg[48] - piCur[48] );
1054  uiSum += abs( piOrg[49] - piCur[49] );
1055  uiSum += abs( piOrg[50] - piCur[50] );
1056  uiSum += abs( piOrg[51] - piCur[51] );
1057  uiSum += abs( piOrg[52] - piCur[52] );
1058  uiSum += abs( piOrg[53] - piCur[53] );
1059  uiSum += abs( piOrg[54] - piCur[54] );
1060  uiSum += abs( piOrg[55] - piCur[55] );
1061  uiSum += abs( piOrg[56] - piCur[56] );
1062  uiSum += abs( piOrg[57] - piCur[57] );
1063  uiSum += abs( piOrg[58] - piCur[58] );
1064  uiSum += abs( piOrg[59] - piCur[59] );
1065  uiSum += abs( piOrg[60] - piCur[60] );
1066  uiSum += abs( piOrg[61] - piCur[61] );
1067  uiSum += abs( piOrg[62] - piCur[62] );
1068  uiSum += abs( piOrg[63] - piCur[63] );
1069 
1070  piOrg += iStrideOrg;
1071  piCur += iStrideCur;
1072  }
1073 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
1074  }
1075 #endif
1076 
1077  uiSum <<= iSubShift;
1078  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
1079 }
1080 
1082 {
1083  if ( pcDtParam->bApplyWeight )
1084  {
1085  return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
1086  }
1087  const Pel* piOrg = pcDtParam->pOrg;
1088  const Pel* piCur = pcDtParam->pCur;
1089  Int iRows = pcDtParam->iRows;
1090  Int iSubShift = pcDtParam->iSubShift;
1091  Int iSubStep = ( 1 << iSubShift );
1092  Int iStrideCur = pcDtParam->iStrideCur*iSubStep;
1093  Int iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
1094 
1095  Distortion uiSum = 0;
1096 
1097 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
1098  if( pcDtParam->bitDepth <= 10 )
1099  {
1100  for( ; iRows != 0; iRows-=iSubStep )
1101  {
1102  uiSum += simdSADLine8n16b( piOrg , piCur , 48 );
1103  piOrg += iStrideOrg;
1104  piCur += iStrideCur;
1105  }
1106  }
1107  else
1108  {
1109 #endif
1110  for( ; iRows != 0; iRows-=iSubStep )
1111  {
1112  uiSum += abs( piOrg[0] - piCur[0] );
1113  uiSum += abs( piOrg[1] - piCur[1] );
1114  uiSum += abs( piOrg[2] - piCur[2] );
1115  uiSum += abs( piOrg[3] - piCur[3] );
1116  uiSum += abs( piOrg[4] - piCur[4] );
1117  uiSum += abs( piOrg[5] - piCur[5] );
1118  uiSum += abs( piOrg[6] - piCur[6] );
1119  uiSum += abs( piOrg[7] - piCur[7] );
1120  uiSum += abs( piOrg[8] - piCur[8] );
1121  uiSum += abs( piOrg[9] - piCur[9] );
1122  uiSum += abs( piOrg[10] - piCur[10] );
1123  uiSum += abs( piOrg[11] - piCur[11] );
1124  uiSum += abs( piOrg[12] - piCur[12] );
1125  uiSum += abs( piOrg[13] - piCur[13] );
1126  uiSum += abs( piOrg[14] - piCur[14] );
1127  uiSum += abs( piOrg[15] - piCur[15] );
1128  uiSum += abs( piOrg[16] - piCur[16] );
1129  uiSum += abs( piOrg[17] - piCur[17] );
1130  uiSum += abs( piOrg[18] - piCur[18] );
1131  uiSum += abs( piOrg[19] - piCur[19] );
1132  uiSum += abs( piOrg[20] - piCur[20] );
1133  uiSum += abs( piOrg[21] - piCur[21] );
1134  uiSum += abs( piOrg[22] - piCur[22] );
1135  uiSum += abs( piOrg[23] - piCur[23] );
1136  uiSum += abs( piOrg[24] - piCur[24] );
1137  uiSum += abs( piOrg[25] - piCur[25] );
1138  uiSum += abs( piOrg[26] - piCur[26] );
1139  uiSum += abs( piOrg[27] - piCur[27] );
1140  uiSum += abs( piOrg[28] - piCur[28] );
1141  uiSum += abs( piOrg[29] - piCur[29] );
1142  uiSum += abs( piOrg[30] - piCur[30] );
1143  uiSum += abs( piOrg[31] - piCur[31] );
1144  uiSum += abs( piOrg[32] - piCur[32] );
1145  uiSum += abs( piOrg[33] - piCur[33] );
1146  uiSum += abs( piOrg[34] - piCur[34] );
1147  uiSum += abs( piOrg[35] - piCur[35] );
1148  uiSum += abs( piOrg[36] - piCur[36] );
1149  uiSum += abs( piOrg[37] - piCur[37] );
1150  uiSum += abs( piOrg[38] - piCur[38] );
1151  uiSum += abs( piOrg[39] - piCur[39] );
1152  uiSum += abs( piOrg[40] - piCur[40] );
1153  uiSum += abs( piOrg[41] - piCur[41] );
1154  uiSum += abs( piOrg[42] - piCur[42] );
1155  uiSum += abs( piOrg[43] - piCur[43] );
1156  uiSum += abs( piOrg[44] - piCur[44] );
1157  uiSum += abs( piOrg[45] - piCur[45] );
1158  uiSum += abs( piOrg[46] - piCur[46] );
1159  uiSum += abs( piOrg[47] - piCur[47] );
1160 
1161  piOrg += iStrideOrg;
1162  piCur += iStrideCur;
1163  }
1164 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
1165  }
1166 #endif
1167 
1168  uiSum <<= iSubShift;
1169  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
1170 }
1171 
1172 // --------------------------------------------------------------------------------------------------------------------
1173 // SSE
1174 // --------------------------------------------------------------------------------------------------------------------
1175 
1177 {
1178  if ( pcDtParam->bApplyWeight )
1179  {
1180  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1181  }
1182  const Pel* piOrg = pcDtParam->pOrg;
1183  const Pel* piCur = pcDtParam->pCur;
1184  Int iRows = pcDtParam->iRows;
1185  Int iCols = pcDtParam->iCols;
1186  Int iStrideOrg = pcDtParam->iStrideOrg;
1187  Int iStrideCur = pcDtParam->iStrideCur;
1188 
1189  Distortion uiSum = 0;
1190  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1191 
1192  Intermediate_Int iTemp;
1193 
1194  for( ; iRows != 0; iRows-- )
1195  {
1196  for (Int n = 0; n < iCols; n++ )
1197  {
1198  iTemp = piOrg[n ] - piCur[n ];
1199  uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1200  }
1201  piOrg += iStrideOrg;
1202  piCur += iStrideCur;
1203  }
1204 
1205  return ( uiSum );
1206 }
1207 
1209 {
1210  if ( pcDtParam->bApplyWeight )
1211  {
1212  assert( pcDtParam->iCols == 4 );
1213  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1214  }
1215  const Pel* piOrg = pcDtParam->pOrg;
1216  const Pel* piCur = pcDtParam->pCur;
1217  Int iRows = pcDtParam->iRows;
1218  Int iStrideOrg = pcDtParam->iStrideOrg;
1219  Int iStrideCur = pcDtParam->iStrideCur;
1220 
1221  Distortion uiSum = 0;
1222  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1223 
1224  Intermediate_Int iTemp;
1225 
1226  for( ; iRows != 0; iRows-- )
1227  {
1228 
1229  iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1230  iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1231  iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1232  iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1233 
1234  piOrg += iStrideOrg;
1235  piCur += iStrideCur;
1236  }
1237 
1238  return ( uiSum );
1239 }
1240 
1242 {
1243  if ( pcDtParam->bApplyWeight )
1244  {
1245  assert( pcDtParam->iCols == 8 );
1246  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1247  }
1248  const Pel* piOrg = pcDtParam->pOrg;
1249  const Pel* piCur = pcDtParam->pCur;
1250  Int iRows = pcDtParam->iRows;
1251  Int iStrideOrg = pcDtParam->iStrideOrg;
1252  Int iStrideCur = pcDtParam->iStrideCur;
1253 
1254  Distortion uiSum = 0;
1255  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1256 
1257  Intermediate_Int iTemp;
1258 
1259  for( ; iRows != 0; iRows-- )
1260  {
1261  iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1262  iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1263  iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1264  iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1265  iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1266  iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1267  iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1268  iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1269 
1270  piOrg += iStrideOrg;
1271  piCur += iStrideCur;
1272  }
1273 
1274  return ( uiSum );
1275 }
1276 
1278 {
1279  if ( pcDtParam->bApplyWeight )
1280  {
1281  assert( pcDtParam->iCols == 16 );
1282  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1283  }
1284  const Pel* piOrg = pcDtParam->pOrg;
1285  const Pel* piCur = pcDtParam->pCur;
1286  Int iRows = pcDtParam->iRows;
1287  Int iStrideOrg = pcDtParam->iStrideOrg;
1288  Int iStrideCur = pcDtParam->iStrideCur;
1289 
1290  Distortion uiSum = 0;
1291  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1292 
1293  Intermediate_Int iTemp;
1294 
1295  for( ; iRows != 0; iRows-- )
1296  {
1297 
1298  iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1299  iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1300  iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1301  iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1302  iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1303  iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1304  iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1305  iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1306  iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1307  iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1308  iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1309  iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1310  iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1311  iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1312  iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1313  iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1314 
1315  piOrg += iStrideOrg;
1316  piCur += iStrideCur;
1317  }
1318 
1319  return ( uiSum );
1320 }
1321 
1323 {
1324  if ( pcDtParam->bApplyWeight )
1325  {
1326  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1327  }
1328  const Pel* piOrg = pcDtParam->pOrg;
1329  const Pel* piCur = pcDtParam->pCur;
1330  Int iRows = pcDtParam->iRows;
1331  Int iCols = pcDtParam->iCols;
1332  Int iStrideOrg = pcDtParam->iStrideOrg;
1333  Int iStrideCur = pcDtParam->iStrideCur;
1334 
1335  Distortion uiSum = 0;
1336  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1337 
1338  Intermediate_Int iTemp;
1339 
1340  for( ; iRows != 0; iRows-- )
1341  {
1342  for (Int n = 0; n < iCols; n+=16 )
1343  {
1344 
1345  iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1346  iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1347  iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1348  iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1349  iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1350  iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1351  iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1352  iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1353  iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1354  iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1355  iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1356  iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1357  iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1358  iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1359  iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1360  iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1361 
1362  }
1363  piOrg += iStrideOrg;
1364  piCur += iStrideCur;
1365  }
1366 
1367  return ( uiSum );
1368 }
1369 
1371 {
1372  if ( pcDtParam->bApplyWeight )
1373  {
1374  assert( pcDtParam->iCols == 32 );
1375  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1376  }
1377  const Pel* piOrg = pcDtParam->pOrg;
1378  const Pel* piCur = pcDtParam->pCur;
1379  Int iRows = pcDtParam->iRows;
1380  Int iStrideOrg = pcDtParam->iStrideOrg;
1381  Int iStrideCur = pcDtParam->iStrideCur;
1382 
1383  Distortion uiSum = 0;
1384  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1385 
1386  Intermediate_Int iTemp;
1387 
1388  for( ; iRows != 0; iRows-- )
1389  {
1390 
1391  iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1392  iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1393  iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1394  iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1395  iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1396  iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1397  iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1398  iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1399  iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1400  iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1401  iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1402  iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1403  iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1404  iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1405  iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1406  iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1407  iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1408  iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1409  iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1410  iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1411  iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1412  iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1413  iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1414  iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1415  iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1416  iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1417  iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1418  iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1419  iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1420  iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1421  iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1422  iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1423 
1424  piOrg += iStrideOrg;
1425  piCur += iStrideCur;
1426  }
1427 
1428  return ( uiSum );
1429 }
1430 
1432 {
1433  if ( pcDtParam->bApplyWeight )
1434  {
1435  assert( pcDtParam->iCols == 64 );
1436  return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1437  }
1438  const Pel* piOrg = pcDtParam->pOrg;
1439  const Pel* piCur = pcDtParam->pCur;
1440  Int iRows = pcDtParam->iRows;
1441  Int iStrideOrg = pcDtParam->iStrideOrg;
1442  Int iStrideCur = pcDtParam->iStrideCur;
1443 
1444  Distortion uiSum = 0;
1445  UInt uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1446 
1447  Intermediate_Int iTemp;
1448 
1449  for( ; iRows != 0; iRows-- )
1450  {
1451  iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1452  iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1453  iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1454  iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1455  iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1456  iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1457  iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1458  iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1459  iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1460  iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1461  iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1462  iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1463  iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1464  iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1465  iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1466  iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1467  iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1468  iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1469  iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1470  iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1471  iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1472  iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1473  iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1474  iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1475  iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1476  iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1477  iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1478  iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1479  iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1480  iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1481  iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1482  iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1483  iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1484  iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1485  iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1486  iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1487  iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1488  iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1489  iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1490  iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1491  iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1492  iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1493  iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1494  iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1495  iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1496  iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1497  iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1498  iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1499  iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1500  iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1501  iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1502  iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1503  iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1504  iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1505  iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1506  iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1507  iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1508  iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1509  iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1510  iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1511  iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1512  iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1513  iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1514  iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1515 
1516  piOrg += iStrideOrg;
1517  piCur += iStrideCur;
1518  }
1519 
1520  return ( uiSum );
1521 }
1522 
1523 // --------------------------------------------------------------------------------------------------------------------
1524 // HADAMARD with step (used in fractional search)
1525 // --------------------------------------------------------------------------------------------------------------------
1526 
1527 Distortion TComRdCost::xCalcHADs2x2( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1528 {
1529  Distortion satd = 0;
1530  TCoeff diff[4], m[4];
1531  assert( iStep == 1 );
1532  diff[0] = piOrg[0 ] - piCur[0];
1533  diff[1] = piOrg[1 ] - piCur[1];
1534  diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur];
1535  diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur];
1536  m[0] = diff[0] + diff[2];
1537  m[1] = diff[1] + diff[3];
1538  m[2] = diff[0] - diff[2];
1539  m[3] = diff[1] - diff[3];
1540 
1541  satd += abs(m[0] + m[1]);
1542  satd += abs(m[0] - m[1]);
1543  satd += abs(m[2] + m[3]);
1544  satd += abs(m[2] - m[3]);
1545 
1546  return satd;
1547 }
1548 
1549 Distortion TComRdCost::xCalcHADs4x4( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1550 {
1551  Int k;
1552  Distortion satd = 0;
1553  TCoeff diff[16], m[16], d[16];
1554 
1555  assert( iStep == 1 );
1556  for( k = 0; k < 16; k+=4 )
1557  {
1558  diff[k+0] = piOrg[0] - piCur[0];
1559  diff[k+1] = piOrg[1] - piCur[1];
1560  diff[k+2] = piOrg[2] - piCur[2];
1561  diff[k+3] = piOrg[3] - piCur[3];
1562 
1563  piCur += iStrideCur;
1564  piOrg += iStrideOrg;
1565  }
1566 
1567  /*===== hadamard transform =====*/
1568  m[ 0] = diff[ 0] + diff[12];
1569  m[ 1] = diff[ 1] + diff[13];
1570  m[ 2] = diff[ 2] + diff[14];
1571  m[ 3] = diff[ 3] + diff[15];
1572  m[ 4] = diff[ 4] + diff[ 8];
1573  m[ 5] = diff[ 5] + diff[ 9];
1574  m[ 6] = diff[ 6] + diff[10];
1575  m[ 7] = diff[ 7] + diff[11];
1576  m[ 8] = diff[ 4] - diff[ 8];
1577  m[ 9] = diff[ 5] - diff[ 9];
1578  m[10] = diff[ 6] - diff[10];
1579  m[11] = diff[ 7] - diff[11];
1580  m[12] = diff[ 0] - diff[12];
1581  m[13] = diff[ 1] - diff[13];
1582  m[14] = diff[ 2] - diff[14];
1583  m[15] = diff[ 3] - diff[15];
1584 
1585  d[ 0] = m[ 0] + m[ 4];
1586  d[ 1] = m[ 1] + m[ 5];
1587  d[ 2] = m[ 2] + m[ 6];
1588  d[ 3] = m[ 3] + m[ 7];
1589  d[ 4] = m[ 8] + m[12];
1590  d[ 5] = m[ 9] + m[13];
1591  d[ 6] = m[10] + m[14];
1592  d[ 7] = m[11] + m[15];
1593  d[ 8] = m[ 0] - m[ 4];
1594  d[ 9] = m[ 1] - m[ 5];
1595  d[10] = m[ 2] - m[ 6];
1596  d[11] = m[ 3] - m[ 7];
1597  d[12] = m[12] - m[ 8];
1598  d[13] = m[13] - m[ 9];
1599  d[14] = m[14] - m[10];
1600  d[15] = m[15] - m[11];
1601 
1602  m[ 0] = d[ 0] + d[ 3];
1603  m[ 1] = d[ 1] + d[ 2];
1604  m[ 2] = d[ 1] - d[ 2];
1605  m[ 3] = d[ 0] - d[ 3];
1606  m[ 4] = d[ 4] + d[ 7];
1607  m[ 5] = d[ 5] + d[ 6];
1608  m[ 6] = d[ 5] - d[ 6];
1609  m[ 7] = d[ 4] - d[ 7];
1610  m[ 8] = d[ 8] + d[11];
1611  m[ 9] = d[ 9] + d[10];
1612  m[10] = d[ 9] - d[10];
1613  m[11] = d[ 8] - d[11];
1614  m[12] = d[12] + d[15];
1615  m[13] = d[13] + d[14];
1616  m[14] = d[13] - d[14];
1617  m[15] = d[12] - d[15];
1618 
1619  d[ 0] = m[ 0] + m[ 1];
1620  d[ 1] = m[ 0] - m[ 1];
1621  d[ 2] = m[ 2] + m[ 3];
1622  d[ 3] = m[ 3] - m[ 2];
1623  d[ 4] = m[ 4] + m[ 5];
1624  d[ 5] = m[ 4] - m[ 5];
1625  d[ 6] = m[ 6] + m[ 7];
1626  d[ 7] = m[ 7] - m[ 6];
1627  d[ 8] = m[ 8] + m[ 9];
1628  d[ 9] = m[ 8] - m[ 9];
1629  d[10] = m[10] + m[11];
1630  d[11] = m[11] - m[10];
1631  d[12] = m[12] + m[13];
1632  d[13] = m[12] - m[13];
1633  d[14] = m[14] + m[15];
1634  d[15] = m[15] - m[14];
1635 
1636  for (k=0; k<16; ++k)
1637  {
1638  satd += abs(d[k]);
1639  }
1640  satd = ((satd+1)>>1);
1641 
1642  return satd;
1643 }
1644 
1645 Distortion TComRdCost::xCalcHADs8x8( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep
1647  , Int bitDepth
1648 #endif
1649  )
1650 {
1651 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
1652  if( bitDepth <= 10 )
1653  {
1654  return( simdHADs8x8( piOrg , piCur , iStrideOrg , iStrideCur ) );
1655  }
1656 #endif
1657  Int k, i, j, jj;
1658  Distortion sad = 0;
1659  TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8];
1660  assert( iStep == 1 );
1661  for( k = 0; k < 64; k += 8 )
1662  {
1663  diff[k+0] = piOrg[0] - piCur[0];
1664  diff[k+1] = piOrg[1] - piCur[1];
1665  diff[k+2] = piOrg[2] - piCur[2];
1666  diff[k+3] = piOrg[3] - piCur[3];
1667  diff[k+4] = piOrg[4] - piCur[4];
1668  diff[k+5] = piOrg[5] - piCur[5];
1669  diff[k+6] = piOrg[6] - piCur[6];
1670  diff[k+7] = piOrg[7] - piCur[7];
1671 
1672  piCur += iStrideCur;
1673  piOrg += iStrideOrg;
1674  }
1675 
1676  //horizontal
1677  for (j=0; j < 8; j++)
1678  {
1679  jj = j << 3;
1680  m2[j][0] = diff[jj ] + diff[jj+4];
1681  m2[j][1] = diff[jj+1] + diff[jj+5];
1682  m2[j][2] = diff[jj+2] + diff[jj+6];
1683  m2[j][3] = diff[jj+3] + diff[jj+7];
1684  m2[j][4] = diff[jj ] - diff[jj+4];
1685  m2[j][5] = diff[jj+1] - diff[jj+5];
1686  m2[j][6] = diff[jj+2] - diff[jj+6];
1687  m2[j][7] = diff[jj+3] - diff[jj+7];
1688 
1689  m1[j][0] = m2[j][0] + m2[j][2];
1690  m1[j][1] = m2[j][1] + m2[j][3];
1691  m1[j][2] = m2[j][0] - m2[j][2];
1692  m1[j][3] = m2[j][1] - m2[j][3];
1693  m1[j][4] = m2[j][4] + m2[j][6];
1694  m1[j][5] = m2[j][5] + m2[j][7];
1695  m1[j][6] = m2[j][4] - m2[j][6];
1696  m1[j][7] = m2[j][5] - m2[j][7];
1697 
1698  m2[j][0] = m1[j][0] + m1[j][1];
1699  m2[j][1] = m1[j][0] - m1[j][1];
1700  m2[j][2] = m1[j][2] + m1[j][3];
1701  m2[j][3] = m1[j][2] - m1[j][3];
1702  m2[j][4] = m1[j][4] + m1[j][5];
1703  m2[j][5] = m1[j][4] - m1[j][5];
1704  m2[j][6] = m1[j][6] + m1[j][7];
1705  m2[j][7] = m1[j][6] - m1[j][7];
1706  }
1707 
1708  //vertical
1709  for (i=0; i < 8; i++)
1710  {
1711  m3[0][i] = m2[0][i] + m2[4][i];
1712  m3[1][i] = m2[1][i] + m2[5][i];
1713  m3[2][i] = m2[2][i] + m2[6][i];
1714  m3[3][i] = m2[3][i] + m2[7][i];
1715  m3[4][i] = m2[0][i] - m2[4][i];
1716  m3[5][i] = m2[1][i] - m2[5][i];
1717  m3[6][i] = m2[2][i] - m2[6][i];
1718  m3[7][i] = m2[3][i] - m2[7][i];
1719 
1720  m1[0][i] = m3[0][i] + m3[2][i];
1721  m1[1][i] = m3[1][i] + m3[3][i];
1722  m1[2][i] = m3[0][i] - m3[2][i];
1723  m1[3][i] = m3[1][i] - m3[3][i];
1724  m1[4][i] = m3[4][i] + m3[6][i];
1725  m1[5][i] = m3[5][i] + m3[7][i];
1726  m1[6][i] = m3[4][i] - m3[6][i];
1727  m1[7][i] = m3[5][i] - m3[7][i];
1728 
1729  m2[0][i] = m1[0][i] + m1[1][i];
1730  m2[1][i] = m1[0][i] - m1[1][i];
1731  m2[2][i] = m1[2][i] + m1[3][i];
1732  m2[3][i] = m1[2][i] - m1[3][i];
1733  m2[4][i] = m1[4][i] + m1[5][i];
1734  m2[5][i] = m1[4][i] - m1[5][i];
1735  m2[6][i] = m1[6][i] + m1[7][i];
1736  m2[7][i] = m1[6][i] - m1[7][i];
1737  }
1738 
1739  for (i = 0; i < 8; i++)
1740  {
1741  for (j = 0; j < 8; j++)
1742  {
1743  sad += abs(m2[i][j]);
1744  }
1745  }
1746 
1747  sad=((sad+2)>>2);
1748 
1749  return sad;
1750 }
1751 
1752 
1754 {
1755  if ( pcDtParam->bApplyWeight )
1756  {
1757  return TComRdCostWeightPrediction::xGetHADsw( pcDtParam );
1758  }
1759  const Pel* piOrg = pcDtParam->pOrg;
1760  const Pel* piCur = pcDtParam->pCur;
1761  const Int iRows = pcDtParam->iRows;
1762  const Int iCols = pcDtParam->iCols;
1763  const Int iStrideCur = pcDtParam->iStrideCur;
1764  const Int iStrideOrg = pcDtParam->iStrideOrg;
1765  const Int iStep = pcDtParam->iStep;
1766 
1767  Int x, y;
1768 
1769  Distortion uiSum = 0;
1770 
1771  if( ( iRows % 8 == 0) && (iCols % 8 == 0) )
1772  {
1773  Int iOffsetOrg = iStrideOrg<<3;
1774  Int iOffsetCur = iStrideCur<<3;
1775  for ( y=0; y<iRows; y+= 8 )
1776  {
1777  for ( x=0; x<iCols; x+= 8 )
1778  {
1779  uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep
1781  , pcDtParam->bitDepth
1782 #endif
1783  );
1784  }
1785  piOrg += iOffsetOrg;
1786  piCur += iOffsetCur;
1787  }
1788  }
1789  else if( ( iRows % 4 == 0) && (iCols % 4 == 0) )
1790  {
1791  Int iOffsetOrg = iStrideOrg<<2;
1792  Int iOffsetCur = iStrideCur<<2;
1793 
1794  for ( y=0; y<iRows; y+= 4 )
1795  {
1796  for ( x=0; x<iCols; x+= 4 )
1797  {
1798  uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1799  }
1800  piOrg += iOffsetOrg;
1801  piCur += iOffsetCur;
1802  }
1803  }
1804  else if( ( iRows % 2 == 0) && (iCols % 2 == 0) )
1805  {
1806  Int iOffsetOrg = iStrideOrg<<1;
1807  Int iOffsetCur = iStrideCur<<1;
1808  for ( y=0; y<iRows; y+=2 )
1809  {
1810  for ( x=0; x<iCols; x+=2 )
1811  {
1812  uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1813  }
1814  piOrg += iOffsetOrg;
1815  piCur += iOffsetCur;
1816  }
1817  }
1818  else
1819  {
1820  assert(false);
1821  }
1822 
1823  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
1824 }
1825 
SChar g_aucConvertToBit[MAX_CU_SIZE+1]
Definition: TComRom.cpp:572
8xM SSE
Definition: TypeDef.h:384
8xM SAD with step
Definition: TypeDef.h:400
4xM HAD with step
Definition: TypeDef.h:407
Int iCols
Definition: TComRdCost.h:75
Int iRows
Definition: TComRdCost.h:74
UInt Distortion
distortion measurement
Definition: TypeDef.h:260
8xM HAD with step
Definition: TypeDef.h:408
void Void
Definition: TypeDef.h:203
static Distortion xGetSAD48(DistParam *pcDtParam)
const Pel * pCur
Definition: TComRdCost.h:71
Double m_dLambda
Definition: TComRdCost.h:119
virtual ~TComRdCost()
Definition: TComRdCost.cpp:57
static Distortion xGetHADs(DistParam *pcDtParam)
global variables &amp; functions (header)
static Distortion xGetSSE4(DistParam *pcDtParam)
#define NULL
Definition: CommonDef.h:107
4xM SAD with step
Definition: TypeDef.h:399
static Distortion xGetSAD64(DistParam *pcDtParam)
Definition: TComRdCost.cpp:974
Int getROIYHeight() const
Definition: TComPattern.h:76
neighbouring pixel access class for all components
Definition: TComPattern.h:55
RD cost computation classes (header)
unsigned int UInt
Definition: TypeDef.h:212
16NxM HAD with step
Definition: TypeDef.h:412
static Distortion xCalcHADs2x2(const Pel *piOrg, const Pel *piCurr, Int iStrideOrg, Int iStrideCur, Int iStep)
Short Pel
pixel type
Definition: TypeDef.h:249
static Distortion xGetSSE(DistParam *pcDtParam)
Double m_dLambdaMotionSAD[2]
Definition: TComRdCost.h:121
static Distortion xGetSAD32(DistParam *pcDtParam)
Definition: TComRdCost.cpp:832
Int recon[MAX_NUM_CHANNEL_TYPE]
the bit depth as indicated in the SPS
Definition: TypeDef.h:793
#define RExt__HIGH_BIT_DEPTH_SUPPORT
0 (default) use data type definitions for 8-10 bit video, 1 = use larger data types to allow for up t...
Definition: TypeDef.h:138
general size SSE
Definition: TypeDef.h:382
16NxM SSE
Definition: TypeDef.h:388
static Distortion xGetSAD24(DistParam *pcDtParam)
Definition: TComRdCost.cpp:907
4xM SSE
Definition: TypeDef.h:383
static Distortion xCalcHADs8x8(const Pel *piOrg, const Pel *piCurr, Int iStrideOrg, Int iStrideCur, Int iStep)
Distortion xGetHADsw(DistParam *pcDtParam)
get weighted Hadamard cost
Int m_iCostScale
Definition: TComRdCost.h:128
Int getROIYWidth() const
Definition: TComPattern.h:75
8xM SAD
Definition: TypeDef.h:392
Int Intermediate_Int
used as intermediate value in calculations
Definition: TypeDef.h:253
Void init()
Definition: TComRdCost.cpp:126
Int iSubShift
During cost calculations, if distortion exceeds this value, cost calculations may early-terminate...
Definition: TComRdCost.h:89
Double m_motionLambda
Definition: TComRdCost.h:127
static Distortion xCalcHADs4x4(const Pel *piOrg, const Pel *piCurr, Int iStrideOrg, Int iStrideCur, Int iStep)
Int iStep
Definition: TComRdCost.h:76
bool Bool
Definition: TypeDef.h:204
16NxM SAD with step
Definition: TypeDef.h:404
Double calcRdCost(Double numBits, Double distortion, DFunc eDFunc=DF_DEFAULT)
Definition: TComRdCost.cpp:62
static Distortion xGetSAD16(DistParam *pcDtParam)
Definition: TComRdCost.cpp:676
static const Int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME
QP&#39; to use for mixed_lossy_lossless coding.
Definition: CommonDef.h:211
static Distortion xGetSSE8(DistParam *pcDtParam)
distortion parameter class
Definition: TComRdCost.h:67
Int iStrideOrg
Definition: TComRdCost.h:72
static Distortion xGetSSE16(DistParam *pcDtParam)
static Distortion xGetSSE16N(DistParam *pcDtParam)
Double m_sqrtLambda
Definition: TComRdCost.h:120
Void setLambda(Double dLambda, const BitDepths &bitDepths)
Definition: TComRdCost.cpp:109
static Distortion xGetSSE64(DistParam *pcDtParam)
static Distortion xGetSAD(DistParam *pcDtParam)
Definition: TComRdCost.cpp:518
Distortion m_maximumDistortionForEarlyExit
Definition: TComRdCost.h:85
general size SAD
Definition: TypeDef.h:390
Int TCoeff
transform coefficient
Definition: TypeDef.h:250
16xM SAD with step
Definition: TypeDef.h:401
ComponentID compIdx
Definition: TComRdCost.h:84
64xM HAD with step
Definition: TypeDef.h:411
__inline Pel * getROIY()
Definition: TComPattern.h:89
Double m_dLambdaMotionSSE[2]
Definition: TComRdCost.h:122
Bool bApplyWeight
Definition: TComRdCost.h:80
16NxM SAD
Definition: TypeDef.h:396
Int iStrideCur
Definition: TComRdCost.h:73
Double m_distortionWeight[MAX_NUM_COMPONENT]
Definition: TComRdCost.h:118
Void setDistParam(UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam &rcDistParam)
Definition: TComRdCost.cpp:192
#define VECTOR_CODING__DISTORTION_CALCULATIONS
enable vector coding for distortion calculations 0 (default if SSE not possible) disable SSE vector c...
Definition: TypeDef.h:146
static Distortion xGetSAD4(DistParam *pcDtParam)
Definition: TComRdCost.cpp:578
Double m_dFrameLambda
Definition: TComRdCost.h:123
static Distortion xGetSSE32(DistParam *pcDtParam)
32xM SAD
Definition: TypeDef.h:394
static Distortion xGetSAD16N(DistParam *pcDtParam)
Definition: TComRdCost.cpp:774
Int bitDepth
Definition: TComRdCost.h:78
32xM HAD with step
Definition: TypeDef.h:410
32xM SSE
Definition: TypeDef.h:386
CostMode m_costMode
Definition: TComRdCost.h:117
FpDistFunc DistFunc
Definition: TComRdCost.h:77
general size Hadamard with step
Definition: TypeDef.h:406
#define DISTORTION_PRECISION_ADJUSTMENT(x)
Definition: TypeDef.h:164
4xM SAD
Definition: TypeDef.h:391
Distortion getDistPart(Int bitDepth, const Pel *piCur, Int iCurStride, const Pel *piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc=DF_SSE)
Definition: TComRdCost.cpp:336
int Int
Definition: TypeDef.h:211
static Bool isChroma(const ComponentID id)
ComponentID
Definition: TypeDef.h:308
static UInt xGetExpGolombNumberOfBits(Int iVal)
Definition: TComRdCost.cpp:177
FpDistFunc m_afpDistortFunc[DF_TOTAL_FUNCTIONS]
Definition: TComRdCost.h:116
Frame-based SSE.
Definition: TypeDef.h:422
Int getPatternLStride() const
Definition: TComPattern.h:77
static Distortion xGetSAD12(DistParam *pcDtParam)
Definition: TComRdCost.cpp:735
64xM SAD with step
Definition: TypeDef.h:403
double Double
Definition: TypeDef.h:213
16xM SSE
Definition: TypeDef.h:385
Distortion xGetSSEw(DistParam *pcDtParam)
64xM SAD
Definition: TypeDef.h:395
32xM SAD with step
Definition: TypeDef.h:402
Distortion calcHAD(Int bitDepth, const Pel *pi0, Int iStride0, const Pel *pi1, Int iStride1, Int iWidth, Int iHeight)
Definition: TComRdCost.cpp:297
static Distortion xGetSAD8(DistParam *pcDtParam)
Definition: TComRdCost.cpp:625
const Pel * pOrg
Definition: TComRdCost.h:70
16xM HAD with step
Definition: TypeDef.h:409
Distortion xGetSADw(DistParam *pcDtParam)
16xM SAD
Definition: TypeDef.h:393
DFunc
distortion function index
Definition: TypeDef.h:379
64xM SSE
Definition: TypeDef.h:387
general size SAD with step
Definition: TypeDef.h:398