Changeset 1413 in 3DVCSoftware for trunk/source/Lib/TLibCommon/TComRdCost.cpp
 Timestamp:
 11 Jul 2018, 15:19:49 (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/source/Lib/TLibCommon/TComRdCost.cpp
r1405 r1413 4 4 * granted under this license. 5 5 * 6 * Copyright (c) 2010201 6, ITU/ISO/IEC6 * Copyright (c) 20102017, ITU/ISO/IEC 7 7 * All rights reserved. 8 8 * … … 41 41 #include "TComRom.h" 42 42 #include "TComRdCost.h" 43 44 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 45 #include <emmintrin.h> 46 #include <xmmintrin.h> 47 #endif 48 43 49 #if NH_3D_VSO 44 50 #include "TComDataCU.h" … … 208 214 m_bUseEstimatedVSD = false; 209 215 #endif 210 #if NH_3D _DBBP216 #if NH_3D 211 217 m_bUseMask = false; 212 218 #endif … … 237 243 rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ]; 238 244 239 #if NH_3D _DBBP245 #if NH_3D 240 246 if( m_bUseMask ) 241 247 { … … 298 304 } 299 305 300 #if NH_3D _DBBP306 #if NH_3D 301 307 if( m_bUseMask ) 302 308 { … … 349 355 } 350 356 351 #if NH_3D _DBBP357 #if NH_3D 352 358 if( m_bUseMask ) 353 359 { … … 372 378 rcDP.DistFunc = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ]; 373 379 rcDP.m_maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); 374 #if NH_3D _DBBP380 #if NH_3D 375 381 if( m_bUseMask ) 376 382 { … … 391 397 for ( x=0; x<iWidth; x+= 8 ) 392 398 { 393 uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 ); 399 uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 400 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 401 , bitDepth 402 #endif 403 ); 394 404 } 395 405 pi0 += iStride0*8; … … 415 425 } 416 426 417 #if NH_3D _ENC_DEPTH427 #if NH_3D 418 428 UInt TComRdCost::calcVAR (Pel* pi0, Int stride, Int width, Int height, Int cuDepth, UInt maxCuWidth) 419 429 { … … 465 475 cDtParam.bitDepth = bitDepth; 466 476 467 #if NH_3D _IC477 #if NH_3D 468 478 cDtParam.bUseIC = false; 469 #endif470 #if NH_3D_SDC_INTER471 479 cDtParam.bUseSDCMRSAD = false; 472 480 #endif … … 481 489 } 482 490 } 491 492 // ==================================================================================================================== 493 // Distortion functions 494 // ==================================================================================================================== 495 496 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 497 inline Int simdSADLine4n16b( const Pel * piOrg , const Pel * piCur , Int nWidth ) 498 { 499 // internal bitdepth must be 12bit or lower 500 assert( !( nWidth & 0x03 ) ); 501 __m128i org , cur , abs , sum; 502 sum = _mm_setzero_si128(); 503 for( Int n = 0 ; n < nWidth ; n += 4 ) 504 { 505 org = _mm_loadl_epi64( ( __m128i* )( piOrg + n ) ); 506 cur = _mm_loadl_epi64( ( __m128i* )( piCur + n ) ); 507 abs = _mm_subs_epi16( _mm_max_epi16( org , cur ) , _mm_min_epi16( org , cur ) ); 508 sum = _mm_adds_epu16( abs , sum ); 509 } 510 __m128i zero = _mm_setzero_si128(); 511 sum = _mm_unpacklo_epi16( sum , zero ); 512 sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) ); 513 sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) ); 514 return( _mm_cvtsi128_si32( sum ) ); 515 } 516 517 inline Int simdSADLine8n16b( const Pel * piOrg , const Pel * piCur , Int nWidth ) 518 { 519 // internal bitdepth must be 12bit or lower 520 assert( !( nWidth & 0x07 ) ); 521 __m128i org , cur , abs , sum; 522 sum = _mm_setzero_si128(); 523 for( Int n = 0 ; n < nWidth ; n += 8 ) 524 { 525 org = _mm_loadu_si128( ( __m128i* )( piOrg + n ) ); 526 cur = _mm_loadu_si128( ( __m128i* )( piCur + n ) ); 527 abs = _mm_subs_epi16( _mm_max_epi16( org , cur ) , _mm_min_epi16( org , cur ) ); 528 sum = _mm_adds_epu16( abs , sum ); 529 } 530 __m128i zero = _mm_setzero_si128(); 531 __m128i hi = _mm_unpackhi_epi16( sum , zero ); 532 __m128i lo = _mm_unpacklo_epi16( sum , zero ); 533 sum = _mm_add_epi32( lo , hi ); 534 sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) ); 535 sum = _mm_add_epi32( sum , _mm_shuffle_epi32( sum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) ); 536 return( _mm_cvtsi128_si32( sum ) ); 537 } 538 539 inline Void simd8x8Transpose32b( __m128i * pBuffer ) 540 { 541 __m128 tmp[16]; 542 for( Int n = 0 ; n < 16 ; n++ ) 543 { 544 tmp[n] = _mm_castsi128_ps( pBuffer[n] ); 545 } 546 _MM_TRANSPOSE4_PS( tmp[0] , tmp[2] , tmp[4] , tmp[6] ); 547 _MM_TRANSPOSE4_PS( tmp[1] , tmp[3] , tmp[5] , tmp[7] ); 548 _MM_TRANSPOSE4_PS( tmp[8] , tmp[10] , tmp[12] , tmp[14] ); 549 _MM_TRANSPOSE4_PS( tmp[9] , tmp[11] , tmp[13] , tmp[15] ); 550 for( Int n = 0 ; n < 8 ; n += 2 ) 551 { 552 pBuffer[n] = _mm_castps_si128( tmp[n] ); 553 pBuffer[n+1] = _mm_castps_si128( tmp[n+8] ); 554 pBuffer[n+8] = _mm_castps_si128( tmp[n+1] ); 555 pBuffer[n+9] = _mm_castps_si128( tmp[n+9] ); 556 } 557 } 558 559 #ifdef __GNUC__ 560 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) 561 #if GCC_VERSION > 40600 && GCC_VERSION < 40700 562 __attribute__((optimize("notreevrp"))) 563 #endif 564 #endif 565 Void simd8x8HAD1D32b( __m128i * pInput , __m128i * pOutput ) 566 { 567 __m128i m1[8][2] , m2[8][2]; 568 569 m2[0][0] = _mm_add_epi32( pInput[0] ,pInput[8 ] ); m2[0][1] = _mm_add_epi32( pInput[1] ,pInput[9 ] ); 570 m2[1][0] = _mm_add_epi32( pInput[2] ,pInput[10] ); m2[1][1] = _mm_add_epi32( pInput[3] ,pInput[11] ); 571 m2[2][0] = _mm_add_epi32( pInput[4] ,pInput[12] ); m2[2][1] = _mm_add_epi32( pInput[5] ,pInput[13] ); 572 m2[3][0] = _mm_add_epi32( pInput[6] ,pInput[14] ); m2[3][1] = _mm_add_epi32( pInput[7] ,pInput[15] ); 573 m2[4][0] = _mm_sub_epi32( pInput[0] ,pInput[8 ] ); m2[4][1] = _mm_sub_epi32( pInput[1] ,pInput[9 ] ); 574 m2[5][0] = _mm_sub_epi32( pInput[2] ,pInput[10] ); m2[5][1] = _mm_sub_epi32( pInput[3] ,pInput[11] ); 575 m2[6][0] = _mm_sub_epi32( pInput[4] ,pInput[12] ); m2[6][1] = _mm_sub_epi32( pInput[5] ,pInput[13] ); 576 m2[7][0] = _mm_sub_epi32( pInput[6] ,pInput[14] ); m2[7][1] = _mm_sub_epi32( pInput[7] ,pInput[15] ); 577 578 m1[0][0] = _mm_add_epi32( m2[0][0] , m2[2][0] ); m1[0][1] = _mm_add_epi32( m2[0][1] , m2[2][1] ); 579 m1[1][0] = _mm_add_epi32( m2[1][0] , m2[3][0] ); m1[1][1] = _mm_add_epi32( m2[1][1] , m2[3][1] ); 580 m1[2][0] = _mm_sub_epi32( m2[0][0] , m2[2][0] ); m1[2][1] = _mm_sub_epi32( m2[0][1] , m2[2][1] ); 581 m1[3][0] = _mm_sub_epi32( m2[1][0] , m2[3][0] ); m1[3][1] = _mm_sub_epi32( m2[1][1] , m2[3][1] ); 582 m1[4][0] = _mm_add_epi32( m2[4][0] , m2[6][0] ); m1[4][1] = _mm_add_epi32( m2[4][1] , m2[6][1] ); 583 m1[5][0] = _mm_add_epi32( m2[5][0] , m2[7][0] ); m1[5][1] = _mm_add_epi32( m2[5][1] , m2[7][1] ); 584 m1[6][0] = _mm_sub_epi32( m2[4][0] , m2[6][0] ); m1[6][1] = _mm_sub_epi32( m2[4][1] , m2[6][1] ); 585 m1[7][0] = _mm_sub_epi32( m2[5][0] , m2[7][0] ); m1[7][1] = _mm_sub_epi32( m2[5][1] , m2[7][1] ); 586 587 pInput[0 ] = _mm_add_epi32( m1[0][0] , m1[1][0] ); pInput[1 ] = _mm_add_epi32( m1[0][1] , m1[1][1] ); 588 pInput[2 ] = _mm_sub_epi32( m1[0][0] , m1[1][0] ); pInput[3 ] = _mm_sub_epi32( m1[0][1] , m1[1][1] ); 589 pInput[4 ] = _mm_add_epi32( m1[2][0] , m1[3][0] ); pInput[5 ] = _mm_add_epi32( m1[2][1] , m1[3][1] ); 590 pInput[6 ] = _mm_sub_epi32( m1[2][0] , m1[3][0] ); pInput[7 ] = _mm_sub_epi32( m1[2][1] , m1[3][1] ); 591 pInput[8 ] = _mm_add_epi32( m1[4][0] , m1[5][0] ); pInput[9 ] = _mm_add_epi32( m1[4][1] , m1[5][1] ); 592 pInput[10] = _mm_sub_epi32( m1[4][0] , m1[5][0] ); pInput[11] = _mm_sub_epi32( m1[4][1] , m1[5][1] ); 593 pInput[12] = _mm_add_epi32( m1[6][0] , m1[7][0] ); pInput[13] = _mm_add_epi32( m1[6][1] , m1[7][1] ); 594 pInput[14] = _mm_sub_epi32( m1[6][0] , m1[7][0] ); pInput[15] = _mm_sub_epi32( m1[6][1] , m1[7][1] ); 595 } 596 597 inline __m128i simdAbs32b( __m128i m ) 598 { 599 const __m128i zero = _mm_setzero_si128(); 600 __m128i tmp = _mm_sub_epi32( zero , m ); 601 __m128i mask = _mm_cmpgt_epi32( m , tmp ); 602 return( _mm_or_si128( _mm_and_si128( mask , m ) , _mm_andnot_si128( mask , tmp ) ) ); 603 } 604 605 UInt simdHADs8x8( const Pel * piOrg, const Pel * piCur, Int iStrideOrg, Int iStrideCur ) 606 { 607 __m128i mmDiff[8][2]; 608 __m128i mmZero = _mm_setzero_si128(); 609 for( Int n = 0 ; n < 8 ; n++ , piOrg += iStrideOrg , piCur += iStrideCur ) 610 { 611 __m128i diff = _mm_sub_epi16( _mm_loadu_si128( ( __m128i* )piOrg ) , _mm_loadu_si128( ( __m128i* )piCur ) ); 612 // sign extension 613 __m128i mask = _mm_cmplt_epi16( diff , mmZero ); 614 mmDiff[n][0] = _mm_unpacklo_epi16( diff , mask ); 615 mmDiff[n][1] = _mm_unpackhi_epi16( diff , mask ); 616 } 617 618 // transpose 619 simd8x8Transpose32b( &mmDiff[0][0] ); 620 621 // horizontal 622 simd8x8HAD1D32b( &mmDiff[0][0] , &mmDiff[0][0] ); 623 624 // transpose 625 simd8x8Transpose32b( &mmDiff[0][0] ); 626 627 // vertical 628 simd8x8HAD1D32b( &mmDiff[0][0] , &mmDiff[0][0] ); 629 630 __m128i mmSum = _mm_setzero_si128(); 631 for( Int n = 0 ; n < 8 ; n++ ) 632 { 633 mmSum = _mm_add_epi32( mmSum , simdAbs32b( mmDiff[n][0] ) ); 634 mmSum = _mm_add_epi32( mmSum , simdAbs32b( mmDiff[n][1] ) ); 635 } 636 mmSum = _mm_add_epi32( mmSum , _mm_shuffle_epi32( mmSum , _MM_SHUFFLE( 2 , 3 , 0 , 1 ) ) ); 637 mmSum = _mm_add_epi32( mmSum , _mm_shuffle_epi32( mmSum , _MM_SHUFFLE( 1 , 0 , 3 , 2 ) ) ); 638 639 UInt sad = _mm_cvtsi128_si32( mmSum ); 640 sad = ( sad + 2 ) >> 2; 641 642 return( sad ); 643 } 644 #endif 645 646 647 483 648 #if NH_3D_VSO 484 649 // SAIT_VSO_EST_A0033 … … 507 672 #if NH_3D 508 673 cDtParam.bUseIC = false; 509 #endif510 #if NH_3D_SDC_INTER511 674 cDtParam.bUseSDCMRSAD = false; 512 675 #endif … … 554 717 #endif 555 718 556 // ==================================================================================================================== 557 // Distortion functions 558 // ==================================================================================================================== 559 560 #if NH_3D_DBBP 719 #if NH_3D 561 720 //  562 721 // Masked distortion functions … … 599 758 600 759 AOF(!pcDtParam>bApplyWeight); 601 #if NH_3D_IC602 760 AOF(!pcDtParam>bUseIC); 603 #endif604 761 605 762 const Pel* piOrg = pcDtParam>pOrg; … … 674 831 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); 675 832 } 676 #if NH_3D _IC833 #if NH_3D 677 834 if( pcDtParam>bUseIC ) 678 835 { 679 836 return xGetSADic( pcDtParam ); 680 837 } 681 #endif 682 #if NH_3D_SDC_INTER 838 683 839 if( pcDtParam>bUseSDCMRSAD ) 684 840 { … … 696 852 Distortion uiSum = 0; 697 853 854 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 855 if( pcDtParam>bitDepth <= 10 ) 856 { 857 if( ( iCols & 0x07 ) == 0 ) 858 { 859 for( Int iRows = pcDtParam>iRows ; iRows != 0; iRows ) 860 { 861 uiSum += simdSADLine8n16b( piOrg , piCur , iCols ); 862 piOrg += iStrideOrg; 863 piCur += iStrideCur; 864 } 865 } 866 else 867 { 868 for( Int iRows = pcDtParam>iRows; iRows != 0; iRows ) 869 { 870 uiSum += simdSADLine4n16b( piOrg , piCur , iCols ); 871 piOrg += iStrideOrg; 872 piCur += iStrideCur; 873 } 874 } 875 } 876 else 877 { 878 #endif 698 879 for(Int iRows = pcDtParam>iRows ; iRows != 0; iRows ) 699 880 { … … 709 890 piCur += iStrideCur; 710 891 } 892 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 893 } 894 #endif 711 895 712 896 return ( uiSum >> distortionShift ); … … 720 904 } 721 905 722 #if NH_3D _IC906 #if NH_3D 723 907 if( pcDtParam>bUseIC ) 724 908 { 725 909 return xGetSAD4ic( pcDtParam ); 726 910 } 727 #endif728 #if NH_3D_SDC_INTER729 911 if( pcDtParam>bUseSDCMRSAD ) 730 912 { … … 743 925 Distortion uiSum = 0; 744 926 927 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 928 if( pcDtParam>bitDepth <= 10 ) 929 { 930 for( ; iRows != 0; iRows=iSubStep ) 931 { 932 uiSum += simdSADLine4n16b( piOrg , piCur , 4 ); 933 piOrg += iStrideOrg; 934 piCur += iStrideCur; 935 } 936 } 937 else 938 { 939 #endif 745 940 for( ; iRows != 0; iRows=iSubStep ) 746 941 { … … 753 948 piCur += iStrideCur; 754 949 } 950 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 951 } 952 #endif 755 953 756 954 uiSum <<= iSubShift; … … 765 963 } 766 964 767 #if NH_3D _IC965 #if NH_3D 768 966 if( pcDtParam>bUseIC ) 769 967 { 770 968 return xGetSAD8ic( pcDtParam ); 771 969 } 772 #endif773 #if NH_3D_SDC_INTER774 970 if( pcDtParam>bUseSDCMRSAD ) 775 971 { … … 788 984 Distortion uiSum = 0; 789 985 986 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 987 if( pcDtParam>bitDepth <= 10 ) 988 { 989 for( ; iRows != 0; iRows=iSubStep ) 990 { 991 uiSum += simdSADLine8n16b( piOrg , piCur , 8 ); 992 piOrg += iStrideOrg; 993 piCur += iStrideCur; 994 } 995 } 996 else 997 { 998 #endif 790 999 for( ; iRows != 0; iRows=iSubStep ) 791 1000 { … … 802 1011 piCur += iStrideCur; 803 1012 } 1013 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1014 } 1015 #endif 804 1016 805 1017 uiSum <<= iSubShift; … … 814 1026 } 815 1027 816 #if NH_3D _IC1028 #if NH_3D 817 1029 if( pcDtParam>bUseIC ) 818 1030 { 819 1031 return xGetSAD16ic( pcDtParam ); 820 1032 } 821 #endif822 #if NH_3D_SDC_INTER823 1033 if( pcDtParam>bUseSDCMRSAD ) 824 1034 { … … 837 1047 Distortion uiSum = 0; 838 1048 1049 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1050 if( pcDtParam>bitDepth <= 10 ) 1051 { 1052 for( ; iRows != 0; iRows=iSubStep ) 1053 { 1054 uiSum += simdSADLine8n16b( piOrg , piCur , 16 ); 1055 piOrg += iStrideOrg; 1056 piCur += iStrideCur; 1057 } 1058 } 1059 else 1060 { 1061 #endif 839 1062 for( ; iRows != 0; iRows=iSubStep ) 840 1063 { … … 859 1082 piCur += iStrideCur; 860 1083 } 1084 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1085 } 1086 #endif 861 1087 862 1088 uiSum <<= iSubShift; … … 870 1096 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); 871 1097 } 872 #if NH_3D _IC1098 #if NH_3D 873 1099 if( pcDtParam>bUseIC ) 874 1100 { 875 1101 return xGetSAD12ic( pcDtParam ); 876 1102 } 877 #endif878 #if NH_3D_SDC_INTER879 1103 if( pcDtParam>bUseSDCMRSAD ) 880 1104 { … … 918 1142 Distortion TComRdCost::xGetSAD16N( DistParam* pcDtParam ) 919 1143 { 920 #if NH_3D _IC1144 #if NH_3D 921 1145 if( pcDtParam>bUseIC ) 922 1146 { 923 1147 return xGetSAD16Nic( pcDtParam ); 924 1148 } 925 #endif926 #if NH_3D_SDC_INTER927 1149 if( pcDtParam>bUseSDCMRSAD ) 928 1150 { … … 942 1164 Distortion uiSum = 0; 943 1165 1166 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1167 if( pcDtParam>bitDepth <= 10 ) 1168 { 1169 for( ; iRows != 0; iRows=iSubStep ) 1170 { 1171 uiSum += simdSADLine8n16b( piOrg , piCur , iCols ); 1172 piOrg += iStrideOrg; 1173 piCur += iStrideCur; 1174 } 1175 } 1176 else 1177 { 1178 #endif 944 1179 for( ; iRows != 0; iRows=iSubStep ) 945 1180 { … … 966 1201 piCur += iStrideCur; 967 1202 } 1203 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1204 } 1205 #endif 968 1206 969 1207 uiSum <<= iSubShift; … … 978 1216 } 979 1217 980 #if NH_3D _IC1218 #if NH_3D 981 1219 if( pcDtParam>bUseIC ) 982 1220 { 983 1221 return xGetSAD32ic( pcDtParam ); 984 1222 } 985 #endif986 #if NH_3D_SDC_INTER987 1223 if( pcDtParam>bUseSDCMRSAD ) 988 1224 { … … 1001 1237 Distortion uiSum = 0; 1002 1238 1239 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1240 if( pcDtParam>bitDepth <= 10 ) 1241 { 1242 for( ; iRows != 0; iRows=iSubStep ) 1243 { 1244 uiSum += simdSADLine8n16b( piOrg , piCur , 32 ); 1245 piOrg += iStrideOrg; 1246 piCur += iStrideCur; 1247 } 1248 } 1249 else 1250 { 1251 #endif 1003 1252 for( ; iRows != 0; iRows=iSubStep ) 1004 1253 { … … 1039 1288 piCur += iStrideCur; 1040 1289 } 1290 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1291 } 1292 #endif 1041 1293 1042 1294 uiSum <<= iSubShift; … … 1051 1303 } 1052 1304 1053 #if NH_3D _IC1305 #if NH_3D 1054 1306 if( pcDtParam>bUseIC ) 1055 1307 { 1056 1308 return xGetSAD24ic( pcDtParam ); 1057 1309 } 1058 #endif1059 #if NH_3D_SDC_INTER1060 1310 if( pcDtParam>bUseSDCMRSAD ) 1061 1311 { … … 1074 1324 Distortion uiSum = 0; 1075 1325 1326 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1327 if( pcDtParam>bitDepth <= 10 ) 1328 { 1329 for( ; iRows != 0; iRows=iSubStep ) 1330 { 1331 uiSum += simdSADLine8n16b( piOrg , piCur , 24 ); 1332 piOrg += iStrideOrg; 1333 piCur += iStrideCur; 1334 } 1335 } 1336 else 1337 { 1338 #endif 1076 1339 for( ; iRows != 0; iRows=iSubStep ) 1077 1340 { … … 1104 1367 piCur += iStrideCur; 1105 1368 } 1369 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1370 } 1371 #endif 1106 1372 1107 1373 uiSum <<= iSubShift; … … 1116 1382 } 1117 1383 1118 #if NH_3D _IC1384 #if NH_3D 1119 1385 if( pcDtParam>bUseIC ) 1120 1386 { 1121 1387 return xGetSAD64ic( pcDtParam ); 1122 1388 } 1123 #endif1124 #if NH_3D_SDC_INTER1125 1389 if( pcDtParam>bUseSDCMRSAD ) 1126 1390 { … … 1139 1403 Distortion uiSum = 0; 1140 1404 1405 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1406 if( pcDtParam>bitDepth <= 10 ) 1407 { 1408 for( ; iRows != 0; iRows=iSubStep ) 1409 { 1410 uiSum += simdSADLine8n16b( piOrg , piCur , 64 ); 1411 piOrg += iStrideOrg; 1412 piCur += iStrideCur; 1413 } 1414 } 1415 else 1416 { 1417 #endif 1141 1418 for( ; iRows != 0; iRows=iSubStep ) 1142 1419 { … … 1209 1486 piCur += iStrideCur; 1210 1487 } 1488 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1489 } 1490 #endif 1211 1491 1212 1492 uiSum <<= iSubShift; … … 1220 1500 return TComRdCostWeightPrediction::xGetSADw( pcDtParam ); 1221 1501 } 1222 #if NH_3D _IC1502 #if NH_3D 1223 1503 if( pcDtParam>bUseIC ) 1224 1504 { 1225 1505 return xGetSAD48ic( pcDtParam ); 1226 1506 } 1227 #endif1228 #if NH_3D_SDC_INTER1229 1507 if( pcDtParam>bUseSDCMRSAD ) 1230 1508 { … … 1243 1521 Distortion uiSum = 0; 1244 1522 1523 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1524 if( pcDtParam>bitDepth <= 10 ) 1525 { 1526 for( ; iRows != 0; iRows=iSubStep ) 1527 { 1528 uiSum += simdSADLine8n16b( piOrg , piCur , 48 ); 1529 piOrg += iStrideOrg; 1530 piCur += iStrideCur; 1531 } 1532 } 1533 else 1534 { 1535 #endif 1245 1536 for( ; iRows != 0; iRows=iSubStep ) 1246 1537 { … … 1297 1588 piCur += iStrideCur; 1298 1589 } 1299 1590 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 1591 } 1592 #endif 1300 1593 uiSum <<= iSubShift; 1301 1594 return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam>bitDepth8) ); … … 1303 1596 1304 1597 1305 #if NH_3D _IC  NH_3D_SDC_INTER1598 #if NH_3D 1306 1599 UInt TComRdCost::xGetSADic( DistParam* pcDtParam ) 1307 1600 { … … 3138 3431 } 3139 3432 3140 Distortion TComRdCost::xCalcHADs8x8( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep ) 3141 { 3433 Distortion TComRdCost::xCalcHADs8x8( const Pel *piOrg, const Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep 3434 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 3435 , Int bitDepth 3436 #endif 3437 ) 3438 { 3439 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 3440 if( bitDepth <= 10 ) 3441 { 3442 return( simdHADs8x8( piOrg , piCur , iStrideOrg , iStrideCur ) ); 3443 } 3444 #endif 3142 3445 Int k, i, j, jj; 3143 3446 Distortion sad = 0; … … 3242 3545 return TComRdCostWeightPrediction::xGetHADsw( pcDtParam ); 3243 3546 } 3244 #if NH_3D _IC3547 #if NH_3D 3245 3548 if( pcDtParam>bUseIC ) 3246 3549 { 3247 3550 return xGetHADsic( pcDtParam ); 3248 3551 } 3249 #endif3250 #if NH_3D_SDC_INTER3251 3552 if( pcDtParam>bUseSDCMRSAD ) 3252 3553 { … … 3275 3576 for ( x=0; x<iCols; x+= 8 ) 3276 3577 { 3277 uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); 3578 uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep 3579 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 3580 , pcDtParam>bitDepth 3581 #endif 3582 ); 3278 3583 } 3279 3584 piOrg += iOffsetOrg; … … 3318 3623 } 3319 3624 3320 #if NH_3D _IC  NH_3D_SDC_INTER3625 #if NH_3D 3321 3626 UInt TComRdCost::xGetHADsic( DistParam* pcDtParam ) 3322 3627 { … … 3389 3694 for ( x=0; x<iCols; x+= 8 ) 3390 3695 { 3391 uiSum += xCalcHADs8x8( &tempOrgMinusDeltaDc[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); 3696 uiSum += xCalcHADs8x8( &tempOrgMinusDeltaDc[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep 3697 #if VECTOR_CODING__DISTORTION_CALCULATIONS && (RExt__HIGH_BIT_DEPTH_SUPPORT==0) 3698 , pcDtParam>bitDepth 3699 #endif 3700 ); 3392 3701 } 3393 3702 tempOrgMinusDeltaDc += iOffsetOrg; … … 3470 3779 m_uiLambdaMotionSSEVSO = (UInt)floor(65536.0 * m_dLambdaVSO ); 3471 3780 } 3472 #endif 3473 #if NH_3D_VSO 3781 3474 3782 Dist TComRdCost::xGetDistVSOMode4( Int iStartPosX, Int iStartPosY, Pel* piCur, Int iCurStride, Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, Bool bSAD ) 3475 3783 { … … 3626 3934 #endif 3627 3935 3936 3628 3937 //! \}
Note: See TracChangeset for help on using the changeset viewer.