Changeset 1413 in 3DVCSoftware for trunk/source/Lib/TLibCommon/TComRdCost.cpp


Ignore:
Timestamp:
11 Jul 2018, 15:19:49 (6 years ago)
Author:
tech
Message:

Merged HTM-16.2-dev@1412

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/source/Lib/TLibCommon/TComRdCost.cpp

    r1405 r1413  
    44 * granted under this license.
    55 *
    6  * Copyright (c) 2010-2016, ITU/ISO/IEC
     6 * Copyright (c) 2010-2017, ITU/ISO/IEC
    77 * All rights reserved.
    88 *
     
    4141#include "TComRom.h"
    4242#include "TComRdCost.h"
     43
     44#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     45#include <emmintrin.h>
     46#include <xmmintrin.h>
     47#endif
     48
    4349#if NH_3D_VSO
    4450#include "TComDataCU.h"
     
    208214  m_bUseEstimatedVSD        = false;
    209215#endif
    210 #if NH_3D_DBBP
     216#if NH_3D
    211217  m_bUseMask                = false;
    212218#endif
     
    237243  rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
    238244
    239 #if NH_3D_DBBP
     245#if NH_3D
    240246  if( m_bUseMask )
    241247  {
     
    298304  }
    299305
    300 #if NH_3D_DBBP
     306#if NH_3D
    301307  if( m_bUseMask )
    302308  {
     
    349355  }
    350356
    351 #if NH_3D_DBBP
     357#if NH_3D
    352358  if( m_bUseMask )
    353359  {
     
    372378  rcDP.DistFunc   = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ];
    373379  rcDP.m_maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
    374 #if NH_3D_DBBP
     380#if NH_3D
    375381  if( m_bUseMask )
    376382  {
     
    391397      for ( x=0; x<iWidth; x+= 8 )
    392398      {
    393         uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
     399        uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1
     400#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     401          , bitDepth
     402#endif
     403          );
    394404      }
    395405      pi0 += iStride0*8;
     
    415425}
    416426
    417 #if NH_3D_ENC_DEPTH
     427#if NH_3D
    418428UInt TComRdCost::calcVAR (Pel* pi0, Int stride, Int width, Int height, Int cuDepth, UInt maxCuWidth)
    419429{
     
    465475  cDtParam.bitDepth     = bitDepth;
    466476
    467 #if NH_3D_IC
     477#if NH_3D
    468478  cDtParam.bUseIC       = false;
    469 #endif
    470 #if NH_3D_SDC_INTER
    471479  cDtParam.bUseSDCMRSAD = false;
    472480#endif
     
    481489  }
    482490}
     491
     492// ====================================================================================================================
     493// Distortion functions
     494// ====================================================================================================================
     495
     496#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     497inline Int simdSADLine4n16b( const Pel * piOrg , const Pel * piCur , Int nWidth )
     498{
     499  // internal bit-depth must be 12-bit or lower
     500  assert( !( nWidth & 0x03 ) );
     501  __m128i org , cur , abs , sum;
     502  sum = _mm_setzero_si128();
     503  for( Int n = 0 ; n < nWidth ; n += 4 )
     504  {
     505    org = _mm_loadl_epi64( ( __m128i* )( piOrg + n ) );
     506    cur = _mm_loadl_epi64( ( __m128i* )( piCur + n ) );
     507    abs = _mm_subs_epi16( _mm_max_epi16( org , cur )  , _mm_min_epi16( org , cur ) );
     508    sum = _mm_adds_epu16( abs , sum );
     509  }
     510  __m128i zero =  _mm_setzero_si128();
     511  sum = _mm_unpacklo_epi16( sum , zero );
     512  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) );
     513  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) );
     514  return( _mm_cvtsi128_si32( sum ) );
     515}
     516
     517inline Int simdSADLine8n16b( const Pel * piOrg , const Pel * piCur , Int nWidth )
     518{
     519  // internal bit-depth must be 12-bit or lower
     520  assert( !( nWidth & 0x07 ) );
     521  __m128i org , cur , abs , sum;
     522  sum = _mm_setzero_si128();
     523  for( Int n = 0 ; n < nWidth ; n += 8 )
     524  {
     525    org = _mm_loadu_si128( ( __m128i* )( piOrg + n ) );
     526    cur = _mm_loadu_si128( ( __m128i* )( piCur + n ) );
     527    abs = _mm_subs_epi16( _mm_max_epi16( org , cur )  , _mm_min_epi16( org , cur ) );
     528    sum = _mm_adds_epu16( abs , sum );
     529  }
     530  __m128i zero =  _mm_setzero_si128();
     531  __m128i hi = _mm_unpackhi_epi16( sum , zero );
     532  __m128i lo = _mm_unpacklo_epi16( sum , zero );
     533  sum = _mm_add_epi32( lo , hi );
     534  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) );
     535  sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) );
     536  return( _mm_cvtsi128_si32( sum ) );
     537}
     538
     539inline Void simd8x8Transpose32b( __m128i * pBuffer )
     540{
     541  __m128 tmp[16];
     542  for( Int n = 0 ; n < 16 ; n++ )
     543  {
     544    tmp[n] = _mm_castsi128_ps( pBuffer[n] );
     545  }
     546  _MM_TRANSPOSE4_PS( tmp[0] , tmp[2] , tmp[4] , tmp[6] );
     547  _MM_TRANSPOSE4_PS( tmp[1] , tmp[3] , tmp[5] , tmp[7] );
     548  _MM_TRANSPOSE4_PS( tmp[8] , tmp[10] , tmp[12] , tmp[14] );
     549  _MM_TRANSPOSE4_PS( tmp[9] , tmp[11] , tmp[13] , tmp[15] );
     550  for( Int n = 0 ; n < 8 ; n += 2 )
     551  {
     552    pBuffer[n] = _mm_castps_si128( tmp[n] );
     553    pBuffer[n+1]  = _mm_castps_si128( tmp[n+8] );
     554    pBuffer[n+8] = _mm_castps_si128( tmp[n+1] );
     555    pBuffer[n+9]  = _mm_castps_si128( tmp[n+9] );
     556  }
     557}
     558
     559#ifdef __GNUC__
     560#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
     561#if GCC_VERSION > 40600 && GCC_VERSION < 40700
     562__attribute__((optimize("no-tree-vrp")))
     563#endif
     564#endif
     565Void simd8x8HAD1D32b( __m128i * pInput , __m128i * pOutput )
     566{
     567  __m128i m1[8][2] , m2[8][2];
     568
     569  m2[0][0] = _mm_add_epi32( pInput[0] ,pInput[8 ] );  m2[0][1] = _mm_add_epi32( pInput[1] ,pInput[9 ] );
     570  m2[1][0] = _mm_add_epi32( pInput[2] ,pInput[10] );  m2[1][1] = _mm_add_epi32( pInput[3] ,pInput[11] );
     571  m2[2][0] = _mm_add_epi32( pInput[4] ,pInput[12] );  m2[2][1] = _mm_add_epi32( pInput[5] ,pInput[13] );
     572  m2[3][0] = _mm_add_epi32( pInput[6] ,pInput[14] );  m2[3][1] = _mm_add_epi32( pInput[7] ,pInput[15] );
     573  m2[4][0] = _mm_sub_epi32( pInput[0] ,pInput[8 ] );  m2[4][1] = _mm_sub_epi32( pInput[1] ,pInput[9 ] );
     574  m2[5][0] = _mm_sub_epi32( pInput[2] ,pInput[10] );  m2[5][1] = _mm_sub_epi32( pInput[3] ,pInput[11] );
     575  m2[6][0] = _mm_sub_epi32( pInput[4] ,pInput[12] );  m2[6][1] = _mm_sub_epi32( pInput[5] ,pInput[13] );
     576  m2[7][0] = _mm_sub_epi32( pInput[6] ,pInput[14] );  m2[7][1] = _mm_sub_epi32( pInput[7] ,pInput[15] );
     577
     578  m1[0][0] = _mm_add_epi32( m2[0][0] , m2[2][0] );  m1[0][1] = _mm_add_epi32( m2[0][1] , m2[2][1] );
     579  m1[1][0] = _mm_add_epi32( m2[1][0] , m2[3][0] );  m1[1][1] = _mm_add_epi32( m2[1][1] , m2[3][1] );
     580  m1[2][0] = _mm_sub_epi32( m2[0][0] , m2[2][0] );  m1[2][1] = _mm_sub_epi32( m2[0][1] , m2[2][1] );
     581  m1[3][0] = _mm_sub_epi32( m2[1][0] , m2[3][0] );  m1[3][1] = _mm_sub_epi32( m2[1][1] , m2[3][1] );
     582  m1[4][0] = _mm_add_epi32( m2[4][0] , m2[6][0] );  m1[4][1] = _mm_add_epi32( m2[4][1] , m2[6][1] );
     583  m1[5][0] = _mm_add_epi32( m2[5][0] , m2[7][0] );  m1[5][1] = _mm_add_epi32( m2[5][1] , m2[7][1] );
     584  m1[6][0] = _mm_sub_epi32( m2[4][0] , m2[6][0] );  m1[6][1] = _mm_sub_epi32( m2[4][1] , m2[6][1] );
     585  m1[7][0] = _mm_sub_epi32( m2[5][0] , m2[7][0] );  m1[7][1] = _mm_sub_epi32( m2[5][1] , m2[7][1] );
     586
     587  pInput[0 ] = _mm_add_epi32( m1[0][0] , m1[1][0] );  pInput[1 ] = _mm_add_epi32( m1[0][1] , m1[1][1] );
     588  pInput[2 ] = _mm_sub_epi32( m1[0][0] , m1[1][0] );  pInput[3 ] = _mm_sub_epi32( m1[0][1] , m1[1][1] );
     589  pInput[4 ] = _mm_add_epi32( m1[2][0] , m1[3][0] );  pInput[5 ] = _mm_add_epi32( m1[2][1] , m1[3][1] );
     590  pInput[6 ] = _mm_sub_epi32( m1[2][0] , m1[3][0] );  pInput[7 ] = _mm_sub_epi32( m1[2][1] , m1[3][1] );
     591  pInput[8 ] = _mm_add_epi32( m1[4][0] , m1[5][0] );  pInput[9 ] = _mm_add_epi32( m1[4][1] , m1[5][1] );
     592  pInput[10] = _mm_sub_epi32( m1[4][0] , m1[5][0] );  pInput[11] = _mm_sub_epi32( m1[4][1] , m1[5][1] );
     593  pInput[12] = _mm_add_epi32( m1[6][0] , m1[7][0] );  pInput[13] = _mm_add_epi32( m1[6][1] , m1[7][1] );
     594  pInput[14] = _mm_sub_epi32( m1[6][0] , m1[7][0] );  pInput[15] = _mm_sub_epi32( m1[6][1] , m1[7][1] );
     595}
     596
     597inline __m128i simdAbs32b( __m128i m )
     598{
     599  const __m128i zero = _mm_setzero_si128();
     600  __m128i tmp = _mm_sub_epi32( zero , m );
     601  __m128i mask = _mm_cmpgt_epi32( m , tmp );
     602  return( _mm_or_si128( _mm_and_si128( mask , m ) , _mm_andnot_si128( mask , tmp ) ) );
     603}
     604
     605UInt simdHADs8x8( const Pel * piOrg, const Pel * piCur, Int iStrideOrg, Int iStrideCur )
     606{
     607  __m128i mmDiff[8][2];
     608  __m128i mmZero = _mm_setzero_si128();
     609  for( Int n = 0 ; n < 8 ; n++ , piOrg += iStrideOrg , piCur += iStrideCur )
     610  {
     611    __m128i diff = _mm_sub_epi16( _mm_loadu_si128( ( __m128i* )piOrg ) , _mm_loadu_si128( ( __m128i* )piCur ) );
     612    // sign extension
     613    __m128i mask = _mm_cmplt_epi16( diff , mmZero );
     614    mmDiff[n][0] = _mm_unpacklo_epi16( diff , mask );
     615    mmDiff[n][1] = _mm_unpackhi_epi16( diff , mask );
     616  }
     617
     618  // transpose
     619  simd8x8Transpose32b( &mmDiff[0][0] );
     620
     621  // horizontal
     622  simd8x8HAD1D32b( &mmDiff[0][0] , &mmDiff[0][0] );
     623
     624  // transpose
     625  simd8x8Transpose32b( &mmDiff[0][0] );
     626
     627  // vertical
     628  simd8x8HAD1D32b( &mmDiff[0][0] , &mmDiff[0][0] );
     629
     630  __m128i mmSum = _mm_setzero_si128();
     631  for( Int n = 0 ; n < 8 ; n++ )
     632  {
     633    mmSum = _mm_add_epi32( mmSum , simdAbs32b( mmDiff[n][0] ) );
     634    mmSum = _mm_add_epi32( mmSum , simdAbs32b( mmDiff[n][1] ) );
     635  }
     636  mmSum = _mm_add_epi32( mmSum , _mm_shuffle_epi32( mmSum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) );
     637  mmSum = _mm_add_epi32( mmSum , _mm_shuffle_epi32( mmSum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) );
     638
     639  UInt sad = _mm_cvtsi128_si32( mmSum );
     640  sad = ( sad + 2 ) >> 2;
     641
     642  return( sad );
     643}
     644#endif
     645
     646
     647
    483648#if NH_3D_VSO
    484649// SAIT_VSO_EST_A0033
     
    507672#if NH_3D
    508673  cDtParam.bUseIC       = false;
    509 #endif
    510 #if NH_3D_SDC_INTER
    511674  cDtParam.bUseSDCMRSAD = false;
    512675#endif
     
    554717#endif
    555718
    556 // ====================================================================================================================
    557 // Distortion functions
    558 // ====================================================================================================================
    559 
    560 #if NH_3D_DBBP
     719#if NH_3D
    561720// --------------------------------------------------------------------------------------------------------------------
    562721// Masked distortion functions
     
    599758 
    600759  AOF(!pcDtParam->bApplyWeight);
    601 #if NH_3D_IC
    602760  AOF(!pcDtParam->bUseIC);
    603 #endif
    604761 
    605762  const Pel* piOrg   = pcDtParam->pOrg;
     
    674831    return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
    675832  }
    676 #if NH_3D_IC
     833#if NH_3D
    677834  if( pcDtParam->bUseIC )
    678835  {
    679836    return xGetSADic( pcDtParam );
    680837  }
    681 #endif
    682 #if NH_3D_SDC_INTER
     838
    683839  if( pcDtParam->bUseSDCMRSAD )
    684840  {
     
    696852  Distortion uiSum = 0;
    697853
     854#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     855  if( pcDtParam->bitDepth <= 10 )
     856  {
     857    if( ( iCols & 0x07 ) == 0 )
     858    {
     859      for( Int iRows   = pcDtParam->iRows ; iRows != 0; iRows-- )
     860      {
     861        uiSum += simdSADLine8n16b( piOrg , piCur , iCols );
     862        piOrg += iStrideOrg;
     863        piCur += iStrideCur;
     864      }
     865    }
     866    else
     867    {
     868      for( Int  iRows   = pcDtParam->iRows; iRows != 0; iRows-- )
     869      {
     870        uiSum += simdSADLine4n16b( piOrg , piCur , iCols );
     871        piOrg += iStrideOrg;
     872        piCur += iStrideCur;
     873      }
     874    }
     875  }
     876  else
     877  {
     878#endif
    698879  for(Int iRows = pcDtParam->iRows ; iRows != 0; iRows-- )
    699880  {
     
    709890    piCur += iStrideCur;
    710891  }
     892#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     893  }
     894#endif
    711895
    712896  return ( uiSum >> distortionShift );
     
    720904  }
    721905
    722 #if NH_3D_IC
     906#if NH_3D
    723907  if( pcDtParam->bUseIC )
    724908  {
    725909    return xGetSAD4ic( pcDtParam );
    726910  }
    727 #endif
    728 #if NH_3D_SDC_INTER
    729911  if( pcDtParam->bUseSDCMRSAD )
    730912  {
     
    743925  Distortion uiSum = 0;
    744926
     927#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     928  if( pcDtParam->bitDepth <= 10 )
     929  {
     930    for( ; iRows != 0; iRows-=iSubStep )
     931    {
     932      uiSum += simdSADLine4n16b( piOrg , piCur , 4 );
     933      piOrg += iStrideOrg;
     934      piCur += iStrideCur;
     935    }
     936  }
     937  else
     938  {
     939#endif
    745940  for( ; iRows != 0; iRows-=iSubStep )
    746941  {
     
    753948    piCur += iStrideCur;
    754949  }
     950#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     951  }
     952#endif
    755953
    756954  uiSum <<= iSubShift;
     
    765963  }
    766964
    767 #if NH_3D_IC
     965#if NH_3D
    768966  if( pcDtParam->bUseIC )
    769967  {
    770968    return xGetSAD8ic( pcDtParam );
    771969  }
    772 #endif
    773 #if NH_3D_SDC_INTER
    774970  if( pcDtParam->bUseSDCMRSAD )
    775971  {
     
    788984  Distortion uiSum = 0;
    789985
     986#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     987  if( pcDtParam->bitDepth <= 10 )
     988  {
     989    for( ; iRows != 0; iRows-=iSubStep )
     990    {
     991      uiSum += simdSADLine8n16b( piOrg , piCur , 8 );
     992      piOrg += iStrideOrg;
     993      piCur += iStrideCur;
     994    }
     995  }
     996  else
     997  {
     998#endif
    790999  for( ; iRows != 0; iRows-=iSubStep )
    7911000  {
     
    8021011    piCur += iStrideCur;
    8031012  }
     1013#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1014  }
     1015#endif
    8041016
    8051017  uiSum <<= iSubShift;
     
    8141026  }
    8151027
    816 #if NH_3D_IC
     1028#if NH_3D
    8171029  if( pcDtParam->bUseIC )
    8181030  {
    8191031    return xGetSAD16ic( pcDtParam );
    8201032  }
    821 #endif
    822 #if NH_3D_SDC_INTER
    8231033  if( pcDtParam->bUseSDCMRSAD )
    8241034  {
     
    8371047  Distortion uiSum = 0;
    8381048
     1049#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1050  if( pcDtParam->bitDepth <= 10 )
     1051  {
     1052    for( ; iRows != 0; iRows-=iSubStep )
     1053    {
     1054      uiSum += simdSADLine8n16b( piOrg , piCur , 16 );
     1055      piOrg += iStrideOrg;
     1056      piCur += iStrideCur;
     1057    }
     1058  }
     1059  else
     1060  {
     1061#endif
    8391062  for( ; iRows != 0; iRows-=iSubStep )
    8401063  {
     
    8591082    piCur += iStrideCur;
    8601083  }
     1084#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1085  }
     1086#endif
    8611087
    8621088  uiSum <<= iSubShift;
     
    8701096    return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
    8711097  }
    872 #if NH_3D_IC
     1098#if NH_3D
    8731099  if( pcDtParam->bUseIC )
    8741100  {
    8751101    return xGetSAD12ic( pcDtParam );
    8761102  }
    877 #endif
    878 #if NH_3D_SDC_INTER
    8791103  if( pcDtParam->bUseSDCMRSAD )
    8801104  {
     
    9181142Distortion TComRdCost::xGetSAD16N( DistParam* pcDtParam )
    9191143{
    920 #if NH_3D_IC
     1144#if NH_3D
    9211145  if( pcDtParam->bUseIC )
    9221146  {
    9231147    return xGetSAD16Nic( pcDtParam );
    9241148  }
    925 #endif
    926 #if NH_3D_SDC_INTER
    9271149  if( pcDtParam->bUseSDCMRSAD )
    9281150  {
     
    9421164  Distortion uiSum = 0;
    9431165
     1166#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1167  if( pcDtParam->bitDepth <= 10 )
     1168  {
     1169    for( ; iRows != 0; iRows-=iSubStep )
     1170    {
     1171      uiSum += simdSADLine8n16b( piOrg , piCur , iCols );
     1172      piOrg += iStrideOrg;
     1173      piCur += iStrideCur;
     1174    }
     1175  }
     1176  else
     1177  {
     1178#endif
    9441179  for( ; iRows != 0; iRows-=iSubStep )
    9451180  {
     
    9661201    piCur += iStrideCur;
    9671202  }
     1203#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1204  }
     1205#endif
    9681206
    9691207  uiSum <<= iSubShift;
     
    9781216  }
    9791217
    980 #if NH_3D_IC
     1218#if NH_3D
    9811219  if( pcDtParam->bUseIC )
    9821220  {
    9831221    return xGetSAD32ic( pcDtParam );
    9841222  }
    985 #endif
    986 #if NH_3D_SDC_INTER
    9871223  if( pcDtParam->bUseSDCMRSAD )
    9881224  {
     
    10011237  Distortion uiSum = 0;
    10021238
     1239#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1240  if( pcDtParam->bitDepth <= 10 )
     1241  {
     1242    for( ; iRows != 0; iRows-=iSubStep )
     1243    {
     1244      uiSum += simdSADLine8n16b( piOrg , piCur , 32 );
     1245      piOrg += iStrideOrg;
     1246      piCur += iStrideCur;
     1247    }
     1248  }
     1249  else
     1250  {
     1251#endif
    10031252  for( ; iRows != 0; iRows-=iSubStep )
    10041253  {
     
    10391288    piCur += iStrideCur;
    10401289  }
     1290#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1291  }
     1292#endif
    10411293
    10421294  uiSum <<= iSubShift;
     
    10511303  }
    10521304
    1053 #if NH_3D_IC
     1305#if NH_3D
    10541306  if( pcDtParam->bUseIC )
    10551307  {
    10561308    return xGetSAD24ic( pcDtParam );
    10571309  }
    1058 #endif
    1059 #if NH_3D_SDC_INTER
    10601310  if( pcDtParam->bUseSDCMRSAD )
    10611311  {
     
    10741324  Distortion uiSum = 0;
    10751325
     1326#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1327  if( pcDtParam->bitDepth <= 10 )
     1328  {
     1329    for( ; iRows != 0; iRows-=iSubStep )
     1330    {
     1331      uiSum += simdSADLine8n16b( piOrg , piCur , 24 );
     1332      piOrg += iStrideOrg;
     1333      piCur += iStrideCur;
     1334    }
     1335  }
     1336  else
     1337  {
     1338#endif
    10761339  for( ; iRows != 0; iRows-=iSubStep )
    10771340  {
     
    11041367    piCur += iStrideCur;
    11051368  }
     1369#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1370  }
     1371#endif
    11061372
    11071373  uiSum <<= iSubShift;
     
    11161382  }
    11171383
    1118 #if NH_3D_IC
     1384#if NH_3D
    11191385  if( pcDtParam->bUseIC )
    11201386  {
    11211387    return xGetSAD64ic( pcDtParam );
    11221388  }
    1123 #endif
    1124 #if NH_3D_SDC_INTER
    11251389  if( pcDtParam->bUseSDCMRSAD )
    11261390  {
     
    11391403  Distortion uiSum = 0;
    11401404
     1405#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1406  if( pcDtParam->bitDepth <= 10 )
     1407  {
     1408    for( ; iRows != 0; iRows-=iSubStep )
     1409    {
     1410      uiSum += simdSADLine8n16b( piOrg , piCur , 64 );
     1411      piOrg += iStrideOrg;
     1412      piCur += iStrideCur;
     1413    }
     1414  }
     1415  else
     1416  {
     1417#endif
    11411418  for( ; iRows != 0; iRows-=iSubStep )
    11421419  {
     
    12091486    piCur += iStrideCur;
    12101487  }
     1488#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1489  }
     1490#endif
    12111491
    12121492  uiSum <<= iSubShift;
     
    12201500    return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
    12211501  }
    1222 #if NH_3D_IC
     1502#if NH_3D
    12231503  if( pcDtParam->bUseIC )
    12241504  {
    12251505    return xGetSAD48ic( pcDtParam );
    12261506  }
    1227 #endif
    1228 #if NH_3D_SDC_INTER
    12291507  if( pcDtParam->bUseSDCMRSAD )
    12301508  {
     
    12431521  Distortion uiSum = 0;
    12441522
     1523#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1524  if( pcDtParam->bitDepth <= 10 )
     1525  {
     1526    for( ; iRows != 0; iRows-=iSubStep )
     1527    {
     1528      uiSum += simdSADLine8n16b( piOrg , piCur , 48 );
     1529      piOrg += iStrideOrg;
     1530      piCur += iStrideCur;
     1531    }
     1532  }
     1533  else
     1534  {
     1535#endif
    12451536  for( ; iRows != 0; iRows-=iSubStep )
    12461537  {
     
    12971588    piCur += iStrideCur;
    12981589  }
    1299 
     1590#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     1591  }
     1592#endif
    13001593  uiSum <<= iSubShift;
    13011594  return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
     
    13031596
    13041597
    1305 #if NH_3D_IC || NH_3D_SDC_INTER
     1598#if NH_3D
    13061599UInt TComRdCost::xGetSADic( DistParam* pcDtParam )
    13071600{
     
    31383431}
    31393432
    3140 Distortion TComRdCost::xCalcHADs8x8( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
    3141 {
     3433Distortion TComRdCost::xCalcHADs8x8( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep
     3434#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     3435  , Int bitDepth
     3436#endif
     3437  )
     3438{
     3439#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     3440  if( bitDepth <= 10 )
     3441{
     3442    return( simdHADs8x8( piOrg , piCur , iStrideOrg , iStrideCur ) );
     3443  }
     3444#endif
    31423445  Int k, i, j, jj;
    31433446  Distortion sad = 0;
     
    32423545    return TComRdCostWeightPrediction::xGetHADsw( pcDtParam );
    32433546  }
    3244 #if NH_3D_IC
     3547#if NH_3D
    32453548  if( pcDtParam->bUseIC )
    32463549  {
    32473550    return xGetHADsic( pcDtParam );
    32483551  }
    3249 #endif
    3250 #if NH_3D_SDC_INTER
    32513552  if( pcDtParam->bUseSDCMRSAD )
    32523553  {
     
    32753576      for ( x=0; x<iCols; x+= 8 )
    32763577      {
    3277         uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
     3578        uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep
     3579#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     3580          , pcDtParam->bitDepth
     3581#endif
     3582                            );
    32783583      }
    32793584      piOrg += iOffsetOrg;
     
    33183623}
    33193624
    3320 #if NH_3D_IC || NH_3D_SDC_INTER
     3625#if NH_3D
    33213626UInt TComRdCost::xGetHADsic( DistParam* pcDtParam )
    33223627{
     
    33893694      for ( x=0; x<iCols; x+= 8 )
    33903695      {
    3391         uiSum += xCalcHADs8x8( &tempOrgMinusDeltaDc[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
     3696        uiSum += xCalcHADs8x8( &tempOrgMinusDeltaDc[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep
     3697#if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
     3698          , pcDtParam->bitDepth
     3699#endif   
     3700          );
    33923701      }
    33933702      tempOrgMinusDeltaDc += iOffsetOrg;
     
    34703779  m_uiLambdaMotionSSEVSO = (UInt)floor(65536.0 *       m_dLambdaVSO    );
    34713780}
    3472 #endif
    3473 #if NH_3D_VSO
     3781
    34743782Dist TComRdCost::xGetDistVSOMode4( Int iStartPosX, Int iStartPosY, Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, Bool bSAD )
    34753783{
     
    36263934#endif
    36273935
     3936
    36283937//! \}
Note: See TracChangeset for help on using the changeset viewer.