Changeset 1313 in 3DVCSoftware for trunk/source/Lib/TLibCommon/TComTrQuant.cpp
- Timestamp:
- 13 Aug 2015, 17:38:13 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/source/Lib/TLibCommon/TComTrQuant.cpp
r1179 r1313 2 2 * License, included below. This software may be subject to other third party 3 3 * and contributor rights, including patent rights, and no such rights are 4 * granted under this license. 4 * granted under this license. 5 5 * 6 * Copyright (c) 2010-2015, ITU/ISO/IEC6 * Copyright (c) 2010-2015, ITU/ISO/IEC 7 7 * All rights reserved. 8 8 * … … 38 38 #include <stdlib.h> 39 39 #include <math.h> 40 #include <limits> 40 41 #include <memory.h> 41 42 #include "TComTrQuant.h" 42 43 #include "TComPic.h" 43 44 #include "ContextTables.h" 45 #include "TComTU.h" 46 #include "Debug.h" 44 47 45 48 typedef struct … … 61 64 #define RDOQ_CHROMA 1 ///< use of RDOQ in chroma 62 65 66 63 67 // ==================================================================================================================== 64 // Tables68 // QpParam constructor 65 69 // ==================================================================================================================== 66 70 67 // RDOQ parameter 68 69 // ==================================================================================================================== 70 // Qp class member functions 71 // ==================================================================================================================== 72 73 QpParam::QpParam() 74 { 75 } 71 QpParam::QpParam(const Int qpy, 72 const ChannelType chType, 73 const Int qpBdOffset, 74 const Int chromaQPOffset, 75 const ChromaFormat chFmt ) 76 { 77 Int baseQp; 78 79 if(isLuma(chType)) 80 { 81 baseQp = qpy + qpBdOffset; 82 } 83 else 84 { 85 baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset ); 86 87 if(baseQp < 0) 88 { 89 baseQp = baseQp + qpBdOffset; 90 } 91 else 92 { 93 baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset; 94 } 95 } 96 97 Qp =baseQp; 98 per=baseQp/6; 99 rem=baseQp%6; 100 } 101 102 QpParam::QpParam(const TComDataCU &cu, const ComponentID compID) 103 { 104 Int chromaQpOffset = 0; 105 106 if (isChroma(compID)) 107 { 108 chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID); 109 chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID); 110 111 chromaQpOffset += cu.getSlice()->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEntry(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1]; 112 } 113 114 *this = QpParam(cu.getQP( 0 ), 115 toChannelType(compID), 116 cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)), 117 chromaQpOffset, 118 cu.getPic()->getChromaFormat()); 119 } 120 76 121 77 122 // ==================================================================================================================== … … 81 126 TComTrQuant::TComTrQuant() 82 127 { 83 m_cQP.clear();84 85 128 // allocate temporary buffers 86 m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];87 129 m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ]; 130 88 131 // allocate bit estimation class (for RDOQ) 89 132 m_pcEstBitsSbac = new estBitsSbacStruct; … … 99 142 m_plTempCoeff = NULL; 100 143 } 101 144 102 145 // delete bit estimation class 103 146 if ( m_pcEstBitsSbac ) … … 111 154 Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice) 112 155 { 156 // NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled? 157 113 158 Int qpBase = pcSlice->getSliceQpBase(); 114 159 Int sliceQpused = pcSlice->getSliceQp(); 115 160 Int sliceQpnext; 116 161 Double alpha = qpBase < 17 ? 0.5 : 1; 117 162 118 163 Int cnt=0; 119 164 for(Int u=1; u<=LEVEL_RANGE; u++) 120 { 165 { 121 166 cnt += m_sliceNsamples[u] ; 122 167 } … … 161 206 } 162 207 163 m_qpDelta[qpBase] = sliceQpnext - qpBase; 208 m_qpDelta[qpBase] = sliceQpnext - qpBase; 164 209 } 165 210 … … 173 218 174 219 Void TComTrQuant::clearSliceARLCnt() 175 { 220 { 176 221 memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1)); 177 222 memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1)); … … 180 225 181 226 182 /** Set qP for Quantization.183 * \param qpy QPy184 * \param bLowpass185 * \param eSliceType186 * \param eTxtType187 * \param qpBdOffset188 * \param chromaQPOffset189 *190 * return void191 */192 Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)193 {194 Int qpScaled;195 196 if(eTxtType == TEXT_LUMA)197 {198 qpScaled = qpy + qpBdOffset;199 }200 else201 {202 qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );203 204 if(qpScaled < 0)205 {206 qpScaled = qpScaled + qpBdOffset;207 }208 else209 {210 qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;211 }212 }213 m_cQP.setQpParam( qpScaled );214 }215 227 216 228 #if MATRIX_MULT … … 222 234 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only 223 235 */ 224 void xTr(Int bitDepth, Pel *block, Int *coeff, UInt uiStride, UInt uiTrSize, UInt uiMode) 225 { 226 Int i,j,k,iSum; 227 Int tmp[32*32]; 228 const Short *iT; 236 Void xTr(Int bitDepth, Pel *block, TCoeff *coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxLog2TrDynamicRange) 237 { 238 UInt i,j,k; 239 TCoeff iSum; 240 TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE]; 241 const TMatrixCoeff *iT; 229 242 UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2; 230 243 231 244 if (uiTrSize==4) 232 245 { 233 iT = g_aiT4[0];246 iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]); 234 247 } 235 248 else if (uiTrSize==8) 236 249 { 237 iT = g_aiT8[ 0];250 iT = g_aiT8[TRANSFORM_FORWARD][0]; 238 251 } 239 252 else if (uiTrSize==16) 240 253 { 241 iT = g_aiT16[ 0];254 iT = g_aiT16[TRANSFORM_FORWARD][0]; 242 255 } 243 256 else if (uiTrSize==32) 244 257 { 245 iT = g_aiT32[ 0];258 iT = g_aiT32[TRANSFORM_FORWARD][0]; 246 259 } 247 260 else … … 250 263 } 251 264 252 Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8 253 Int add_1st = 1<<(shift_1st-1); 254 Int shift_2nd = uiLog2TrSize + 6; 255 Int add_2nd = 1<<(shift_2nd-1); 265 const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD]; 266 267 const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange; 268 const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT; 269 const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0; 270 const Int add_2nd = 1<<(shift_2nd-1); 256 271 257 272 /* Horizontal transform */ 258 273 259 if (uiTrSize==4)260 {261 if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])262 {263 iT = g_as_DST_MAT_4[0];264 }265 }266 274 for (i=0; i<uiTrSize; i++) 267 275 { … … 276 284 } 277 285 } 278 286 279 287 /* Vertical transform */ 280 if (uiTrSize==4)281 {282 if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])283 {284 iT = g_as_DST_MAT_4[0];285 }286 else287 {288 iT = g_aiT4[0];289 }290 }291 288 for (i=0; i<uiTrSize; i++) 292 { 289 { 293 290 for (j=0; j<uiTrSize; j++) 294 291 { … … 296 293 for (k=0; k<uiTrSize; k++) 297 294 { 298 iSum += iT[i*uiTrSize+k]*tmp[j*uiTrSize+k]; 299 } 300 coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd; 295 iSum += iT[i*uiTrSize+k]*tmp[j*uiTrSize+k]; 296 } 297 coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd; 301 298 } 302 299 } … … 310 307 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only 311 308 */ 312 void xITr(Int *coeff, Pel *block, UInt uiStride, UInt uiTrSize, UInt uiMode) 313 { 314 Int i,j,k,iSum; 315 Int tmp[32*32]; 316 const Short *iT; 317 309 Void xITr(Int bitDepth, TCoeff *coeff, Pel *block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxLog2TrDynamicRange) 310 { 311 UInt i,j,k; 312 TCoeff iSum; 313 TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE]; 314 const TMatrixCoeff *iT; 315 318 316 if (uiTrSize==4) 319 317 { 320 iT = g_aiT4[0];318 iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]); 321 319 } 322 320 else if (uiTrSize==8) 323 321 { 324 iT = g_aiT8[ 0];322 iT = g_aiT8[TRANSFORM_INVERSE][0]; 325 323 } 326 324 else if (uiTrSize==16) 327 325 { 328 iT = g_aiT16[ 0];326 iT = g_aiT16[TRANSFORM_INVERSE][0]; 329 327 } 330 328 else if (uiTrSize==32) 331 329 { 332 iT = g_aiT32[ 0];330 iT = g_aiT32[TRANSFORM_INVERSE][0]; 333 331 } 334 332 else … … 336 334 assert(0); 337 335 } 338 339 Int shift_1st = SHIFT_INV_1ST; 340 Int add_1st = 1<<(shift_1st-1); 341 Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8; 342 Int add_2nd = 1<<(shift_2nd-1); 343 if (uiTrSize==4) 344 { 345 if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST 346 { 347 iT = g_as_DST_MAT_4[0]; 348 } 349 } 350 336 337 const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE]; 338 339 const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd 340 const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth; 341 const TCoeff clipMinimum = -(1 << maxLog2TrDynamicRange); 342 const TCoeff clipMaximum = (1 << maxLog2TrDynamicRange) - 1; 343 assert(shift_2nd>=0); 344 const Int add_1st = 1<<(shift_1st-1); 345 const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0; 346 351 347 /* Horizontal transform */ 352 348 for (i=0; i<uiTrSize; i++) 353 { 349 { 354 350 for (j=0; j<uiTrSize; j++) 355 351 { 356 352 iSum = 0; 357 353 for (k=0; k<uiTrSize; k++) 358 { 359 iSum += iT[k*uiTrSize+i]*coeff[k*uiTrSize+j]; 360 } 361 tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative 362 } 363 } 364 365 if (uiTrSize==4) 366 { 367 if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST 368 { 369 iT = g_as_DST_MAT_4[0]; 370 } 371 else 372 { 373 iT = g_aiT4[0]; 374 } 375 } 376 354 { 355 iSum += iT[k*uiTrSize+i]*coeff[k*uiTrSize+j]; 356 } 357 358 // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied 359 tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st); 360 } 361 } 362 377 363 /* Vertical transform */ 378 364 for (i=0; i<uiTrSize; i++) 379 { 365 { 380 366 for (j=0; j<uiTrSize; j++) 381 367 { 382 368 iSum = 0; 383 369 for (k=0; k<uiTrSize; k++) 384 { 370 { 385 371 iSum += iT[k*uiTrSize+j]*tmp[i*uiTrSize+k]; 386 372 } 387 block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative 388 } 389 } 390 } 391 392 #else //MATRIX_MULT 373 374 block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd); 375 } 376 } 377 } 378 379 #endif //MATRIX_MULT 380 393 381 394 382 /** 4x4 forward transform implemented using partial butterfly structure (1D) … … 396 384 * \param dst output data (transform coefficients) 397 385 * \param shift specifies right shift after 1D transform 386 * \param line 398 387 */ 399 400 void partialButterfly4(Short *src,Short *dst,Int shift, Int line) 388 Void partialButterfly4(TCoeff *src, TCoeff *dst, Int shift, Int line) 401 389 { 402 390 Int j; 403 IntE[2],O[2];404 Int add = 1<<(shift-1);391 TCoeff E[2],O[2]; 392 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 405 393 406 394 for (j=0; j<line; j++) 407 { 395 { 408 396 /* E and O */ 409 397 E[0] = src[0] + src[3]; … … 412 400 O[1] = src[1] - src[2]; 413 401 414 dst[0] = (g_aiT4[0][0]*E[0] + g_aiT4[0][1]*E[1] + add)>>shift;415 dst[2*line] = (g_aiT4[ 2][0]*E[0] + g_aiT4[2][1]*E[1] + add)>>shift;416 dst[line] = (g_aiT4[1][0]*O[0] + g_aiT4[1][1]*O[1] + add)>>shift;417 dst[3*line] = (g_aiT4[ 3][0]*O[0] + g_aiT4[3][1]*O[1] + add)>>shift;402 dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]*E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]*E[1] + add)>>shift; 403 dst[2*line] = (g_aiT4[TRANSFORM_FORWARD][2][0]*E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift; 404 dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]*O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]*O[1] + add)>>shift; 405 dst[3*line] = (g_aiT4[TRANSFORM_FORWARD][3][0]*O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift; 418 406 419 407 src += 4; … … 422 410 } 423 411 424 // Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm 412 // Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm 425 413 // give identical results 426 void fastForwardDst(Short *block,Short *coeff,Int shift) // input block, output coeff 427 { 428 Int i, c[4]; 429 Int rnd_factor = 1<<(shift-1); 414 Void fastForwardDst(TCoeff *block, TCoeff *coeff, Int shift) // input block, output coeff 415 { 416 Int i; 417 TCoeff c[4]; 418 TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0; 430 419 for (i=0; i<4; i++) 431 420 { 432 421 // Intermediate Variables 433 c[0] = block[4*i+0] + block[4*i+3]; 434 c[1] = block[4*i+1] + block[4*i+3]; 435 c[2] = block[4*i+0] - block[4*i+1]; 436 c[3] = 74* block[4*i+2]; 437 438 coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift; 439 coeff[ 4+i] = ( 74 * (block[4*i+0]+ block[4*i+1] - block[4*i+3]) + rnd_factor ) >> shift; 440 coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift; 441 coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift; 442 } 443 } 444 445 void fastInverseDst(Short *tmp,Short *block,Int shift) // input tmp, output block 446 { 447 Int i, c[4]; 448 Int rnd_factor = 1<<(shift-1); 422 c[0] = block[4*i+0]; 423 c[1] = block[4*i+1]; 424 c[2] = block[4*i+2]; 425 c[3] = block[4*i+3]; 426 427 for (Int row = 0; row < 4; row++) 428 { 429 TCoeff result = 0; 430 for (Int column = 0; column < 4; column++) 431 { 432 result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers 433 } 434 435 coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift); 436 } 437 } 438 } 439 440 Void fastInverseDst(TCoeff *tmp, TCoeff *block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block 441 { 442 Int i; 443 TCoeff c[4]; 444 TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0; 449 445 for (i=0; i<4; i++) 450 { 446 { 451 447 // Intermediate Variables 452 c[0] = tmp[ i] + tmp[ 8+i]; 453 c[1] = tmp[8+i] + tmp[12+i]; 454 c[2] = tmp[ i] - tmp[12+i]; 455 c[3] = 74* tmp[4+i]; 456 457 block[4*i+0] = Clip3( -32768, 32767, ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift ); 458 block[4*i+1] = Clip3( -32768, 32767, ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift ); 459 block[4*i+2] = Clip3( -32768, 32767, ( 74 * (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift ); 460 block[4*i+3] = Clip3( -32768, 32767, ( 55 * c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift ); 461 } 462 } 463 464 void partialButterflyInverse4(Short *src,Short *dst,Int shift, Int line) 448 c[0] = tmp[ i]; 449 c[1] = tmp[4 +i]; 450 c[2] = tmp[8 +i]; 451 c[3] = tmp[12+i]; 452 453 for (Int column = 0; column < 4; column++) 454 { 455 TCoeff &result = block[(i * 4) + column]; 456 457 result = 0; 458 for (Int row = 0; row < 4; row++) 459 { 460 result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers 461 } 462 463 result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift)); 464 } 465 } 466 } 467 468 /** 4x4 inverse transform implemented using partial butterfly structure (1D) 469 * \param src input data (transform coefficients) 470 * \param dst output data (residual) 471 * \param shift specifies right shift after 1D transform 472 * \param line 473 * \param outputMinimum minimum for clipping 474 * \param outputMaximum maximum for clipping 475 */ 476 Void partialButterflyInverse4(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum) 465 477 { 466 478 Int j; 467 IntE[2],O[2];468 Int add = 1<<(shift-1);479 TCoeff E[2],O[2]; 480 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 469 481 470 482 for (j=0; j<line; j++) 471 { 472 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 473 O[0] = g_aiT4[ 1][0]*src[line] + g_aiT4[3][0]*src[3*line];474 O[1] = g_aiT4[ 1][1]*src[line] + g_aiT4[3][1]*src[3*line];475 E[0] = g_aiT4[ 0][0]*src[0] + g_aiT4[2][0]*src[2*line];476 E[1] = g_aiT4[ 0][1]*src[0] + g_aiT4[2][1]*src[2*line];483 { 484 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 485 O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]*src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]*src[3*line]; 486 O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]*src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]*src[3*line]; 487 E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]*src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]*src[2*line]; 488 E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]*src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]*src[2*line]; 477 489 478 490 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ 479 dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );480 dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );481 dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );482 dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );483 491 dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift ); 492 dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift ); 493 dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift ); 494 dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift ); 495 484 496 src ++; 485 497 dst += 4; … … 487 499 } 488 500 489 490 void partialButterfly8(Short *src,Short *dst,Int shift, Int line) 501 /** 8x8 forward transform implemented using partial butterfly structure (1D) 502 * \param src input data (residual) 503 * \param dst output data (transform coefficients) 504 * \param shift specifies right shift after 1D transform 505 * \param line 506 */ 507 Void partialButterfly8(TCoeff *src, TCoeff *dst, Int shift, Int line) 491 508 { 492 509 Int j,k; 493 IntE[4],O[4];494 IntEE[2],EO[2];495 Int add = 1<<(shift-1);510 TCoeff E[4],O[4]; 511 TCoeff EE[2],EO[2]; 512 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 496 513 497 514 for (j=0; j<line; j++) 498 { 515 { 499 516 /* E and O*/ 500 517 for (k=0;k<4;k++) … … 502 519 E[k] = src[k] + src[7-k]; 503 520 O[k] = src[k] - src[7-k]; 504 } 521 } 505 522 /* EE and EO */ 506 EE[0] = E[0] + E[3]; 523 EE[0] = E[0] + E[3]; 507 524 EO[0] = E[0] - E[3]; 508 525 EE[1] = E[1] + E[2]; 509 526 EO[1] = E[1] - E[2]; 510 527 511 dst[0] = (g_aiT8[0][0]*EE[0] + g_aiT8[0][1]*EE[1] + add)>>shift;512 dst[4*line] = (g_aiT8[ 4][0]*EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;513 dst[2*line] = (g_aiT8[ 2][0]*EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;514 dst[6*line] = (g_aiT8[ 6][0]*EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;515 516 dst[line] = (g_aiT8[1][0]*O[0] + g_aiT8[1][1]*O[1] + g_aiT8[1][2]*O[2] + g_aiT8[1][3]*O[3] + add)>>shift;517 dst[3*line] = (g_aiT8[ 3][0]*O[0] + g_aiT8[3][1]*O[1] + g_aiT8[3][2]*O[2] + g_aiT8[3][3]*O[3] + add)>>shift;518 dst[5*line] = (g_aiT8[ 5][0]*O[0] + g_aiT8[5][1]*O[1] + g_aiT8[5][2]*O[2] + g_aiT8[5][3]*O[3] + add)>>shift;519 dst[7*line] = (g_aiT8[ 7][0]*O[0] + g_aiT8[7][1]*O[1] + g_aiT8[7][2]*O[2] + g_aiT8[7][3]*O[3] + add)>>shift;528 dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]*EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]*EE[1] + add)>>shift; 529 dst[4*line] = (g_aiT8[TRANSFORM_FORWARD][4][0]*EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift; 530 dst[2*line] = (g_aiT8[TRANSFORM_FORWARD][2][0]*EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift; 531 dst[6*line] = (g_aiT8[TRANSFORM_FORWARD][6][0]*EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift; 532 533 dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]*O[3] + add)>>shift; 534 dst[3*line] = (g_aiT8[TRANSFORM_FORWARD][3][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift; 535 dst[5*line] = (g_aiT8[TRANSFORM_FORWARD][5][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift; 536 dst[7*line] = (g_aiT8[TRANSFORM_FORWARD][7][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift; 520 537 521 538 src += 8; … … 524 541 } 525 542 526 527 void partialButterflyInverse8(Short *src,Short *dst,Int shift, Int line) 543 /** 8x8 inverse transform implemented using partial butterfly structure (1D) 544 * \param src input data (transform coefficients) 545 * \param dst output data (residual) 546 * \param shift specifies right shift after 1D transform 547 * \param line 548 * \param outputMinimum minimum for clipping 549 * \param outputMaximum maximum for clipping 550 */ 551 Void partialButterflyInverse8(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum) 528 552 { 529 553 Int j,k; 530 IntE[4],O[4];531 IntEE[2],EO[2];532 Int add = 1<<(shift-1);533 534 for (j=0; j<line; j++) 535 { 554 TCoeff E[4],O[4]; 555 TCoeff EE[2],EO[2]; 556 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 557 558 for (j=0; j<line; j++) 559 { 536 560 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 537 561 for (k=0;k<4;k++) 538 562 { 539 O[k] = g_aiT8[ 1][k]*src[line] + g_aiT8[ 3][k]*src[3*line] + g_aiT8[ 5][k]*src[5*line] + g_aiT8[ 7][k]*src[7*line]; 540 } 541 542 EO[0] = g_aiT8[2][0]*src[ 2*line ] + g_aiT8[6][0]*src[ 6*line ]; 543 EO[1] = g_aiT8[2][1]*src[ 2*line ] + g_aiT8[6][1]*src[ 6*line ]; 544 EE[0] = g_aiT8[0][0]*src[ 0 ] + g_aiT8[4][0]*src[ 4*line ]; 545 EE[1] = g_aiT8[0][1]*src[ 0 ] + g_aiT8[4][1]*src[ 4*line ]; 546 547 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ 563 O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]*src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]*src[3*line] + 564 g_aiT8[TRANSFORM_INVERSE][ 5][k]*src[5*line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]*src[7*line]; 565 } 566 567 EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]*src[ 2*line ] + g_aiT8[TRANSFORM_INVERSE][6][0]*src[ 6*line ]; 568 EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]*src[ 2*line ] + g_aiT8[TRANSFORM_INVERSE][6][1]*src[ 6*line ]; 569 EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]*src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]*src[ 4*line ]; 570 EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]*src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]*src[ 4*line ]; 571 572 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ 548 573 E[0] = EE[0] + EO[0]; 549 574 E[3] = EE[0] - EO[0]; … … 552 577 for (k=0;k<4;k++) 553 578 { 554 dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );555 dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );556 } 579 dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift ); 580 dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift ); 581 } 557 582 src ++; 558 583 dst += 8; … … 560 585 } 561 586 562 563 void partialButterfly16(Short *src,Short *dst,Int shift, Int line) 587 /** 16x16 forward transform implemented using partial butterfly structure (1D) 588 * \param src input data (residual) 589 * \param dst output data (transform coefficients) 590 * \param shift specifies right shift after 1D transform 591 * \param line 592 */ 593 Void partialButterfly16(TCoeff *src, TCoeff *dst, Int shift, Int line) 564 594 { 565 595 Int j,k; 566 IntE[8],O[8];567 IntEE[4],EO[4];568 IntEEE[2],EEO[2];569 Int add = 1<<(shift-1);570 571 for (j=0; j<line; j++) 572 { 596 TCoeff E[8],O[8]; 597 TCoeff EE[4],EO[4]; 598 TCoeff EEE[2],EEO[2]; 599 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 600 601 for (j=0; j<line; j++) 602 { 573 603 /* E and O*/ 574 604 for (k=0;k<8;k++) … … 576 606 E[k] = src[k] + src[15-k]; 577 607 O[k] = src[k] - src[15-k]; 578 } 608 } 579 609 /* EE and EO */ 580 610 for (k=0;k<4;k++) … … 584 614 } 585 615 /* EEE and EEO */ 586 EEE[0] = EE[0] + EE[3]; 616 EEE[0] = EE[0] + EE[3]; 587 617 EEO[0] = EE[0] - EE[3]; 588 618 EEE[1] = EE[1] + EE[2]; 589 619 EEO[1] = EE[1] - EE[2]; 590 620 591 dst[ 0 ] = (g_aiT16[ 0][0]*EEE[0] + g_aiT16[ 0][1]*EEE[1] + add)>>shift;592 dst[ 8*line ] = (g_aiT16[ 8][0]*EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;593 dst[ 4*line ] = (g_aiT16[ 4][0]*EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;594 dst[ 12*line] = (g_aiT16[ 12][0]*EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;621 dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]*EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]*EEE[1] + add)>>shift; 622 dst[ 8*line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]*EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift; 623 dst[ 4*line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]*EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift; 624 dst[ 12*line] = (g_aiT16[TRANSFORM_FORWARD][12][0]*EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift; 595 625 596 626 for (k=2;k<16;k+=4) 597 627 { 598 dst[ k*line ] = (g_aiT16[k][0]*EO[0] + g_aiT16[k][1]*EO[1] + g_aiT16[k][2]*EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift; 628 dst[ k*line ] = (g_aiT16[TRANSFORM_FORWARD][k][0]*EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] + 629 g_aiT16[TRANSFORM_FORWARD][k][2]*EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]*EO[3] + add)>>shift; 599 630 } 600 631 601 632 for (k=1;k<16;k+=2) 602 633 { 603 dst[ k*line ] = (g_aiT16[k][0]*O[0] + g_aiT16[k][1]*O[1] + g_aiT16[k][2]*O[2] + g_aiT16[k][3]*O[3] + 604 g_aiT16[k][4]*O[4] + g_aiT16[k][5]*O[5] + g_aiT16[k][6]*O[6] + g_aiT16[k][7]*O[7] + add)>>shift; 634 dst[ k*line ] = (g_aiT16[TRANSFORM_FORWARD][k][0]*O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] + 635 g_aiT16[TRANSFORM_FORWARD][k][2]*O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]*O[3] + 636 g_aiT16[TRANSFORM_FORWARD][k][4]*O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]*O[5] + 637 g_aiT16[TRANSFORM_FORWARD][k][6]*O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]*O[7] + add)>>shift; 605 638 } 606 639 607 640 src += 16; 608 dst ++; 609 610 } 611 } 612 613 614 void partialButterflyInverse16(Short *src,Short *dst,Int shift, Int line) 641 dst ++; 642 643 } 644 } 645 646 /** 16x16 inverse transform implemented using partial butterfly structure (1D) 647 * \param src input data (transform coefficients) 648 * \param dst output data (residual) 649 * \param shift specifies right shift after 1D transform 650 * \param line 651 * \param outputMinimum minimum for clipping 652 * \param outputMaximum maximum for clipping 653 */ 654 Void partialButterflyInverse16(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum) 615 655 { 616 656 Int j,k; 617 IntE[8],O[8];618 IntEE[4],EO[4];619 IntEEE[2],EEO[2];620 Int add = 1<<(shift-1);657 TCoeff E[8],O[8]; 658 TCoeff EE[4],EO[4]; 659 TCoeff EEE[2],EEO[2]; 660 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 621 661 622 662 for (j=0; j<line; j++) 623 { 663 { 624 664 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 625 665 for (k=0;k<8;k++) 626 666 { 627 O[k] = g_aiT16[ 1][k]*src[ line] + g_aiT16[ 3][k]*src[ 3*line] + g_aiT16[ 5][k]*src[ 5*line] + g_aiT16[ 7][k]*src[ 7*line] + 628 g_aiT16[ 9][k]*src[ 9*line] + g_aiT16[11][k]*src[11*line] + g_aiT16[13][k]*src[13*line] + g_aiT16[15][k]*src[15*line]; 667 O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]*src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]*src[ 3*line] + 668 g_aiT16[TRANSFORM_INVERSE][ 5][k]*src[ 5*line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]*src[ 7*line] + 669 g_aiT16[TRANSFORM_INVERSE][ 9][k]*src[ 9*line] + g_aiT16[TRANSFORM_INVERSE][11][k]*src[11*line] + 670 g_aiT16[TRANSFORM_INVERSE][13][k]*src[13*line] + g_aiT16[TRANSFORM_INVERSE][15][k]*src[15*line]; 629 671 } 630 672 for (k=0;k<4;k++) 631 673 { 632 EO[k] = g_aiT16[ 2][k]*src[ 2*line] + g_aiT16[ 6][k]*src[ 6*line] + g_aiT16[10][k]*src[10*line] + g_aiT16[14][k]*src[14*line]; 633 } 634 EEO[0] = g_aiT16[4][0]*src[ 4*line ] + g_aiT16[12][0]*src[ 12*line ]; 635 EEE[0] = g_aiT16[0][0]*src[ 0 ] + g_aiT16[ 8][0]*src[ 8*line ]; 636 EEO[1] = g_aiT16[4][1]*src[ 4*line ] + g_aiT16[12][1]*src[ 12*line ]; 637 EEE[1] = g_aiT16[0][1]*src[ 0 ] + g_aiT16[ 8][1]*src[ 8*line ]; 638 639 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ 674 EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]*src[ 2*line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]*src[ 6*line] + 675 g_aiT16[TRANSFORM_INVERSE][10][k]*src[10*line] + g_aiT16[TRANSFORM_INVERSE][14][k]*src[14*line]; 676 } 677 EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]*src[ 4*line ] + g_aiT16[TRANSFORM_INVERSE][12][0]*src[ 12*line ]; 678 EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]*src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]*src[ 8*line ]; 679 EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]*src[ 4*line ] + g_aiT16[TRANSFORM_INVERSE][12][1]*src[ 12*line ]; 680 EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]*src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]*src[ 8*line ]; 681 682 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ 640 683 for (k=0;k<2;k++) 641 684 { 642 685 EE[k] = EEE[k] + EEO[k]; 643 686 EE[k+2] = EEE[1-k] - EEO[1-k]; 644 } 687 } 645 688 for (k=0;k<4;k++) 646 689 { 647 690 E[k] = EE[k] + EO[k]; 648 691 E[k+4] = EE[3-k] - EO[3-k]; 649 } 692 } 650 693 for (k=0;k<8;k++) 651 694 { 652 dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );653 dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );654 } 655 src ++; 695 dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift ); 696 dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift ); 697 } 698 src ++; 656 699 dst += 16; 657 700 } 658 701 } 659 702 660 661 void partialButterfly32(Short *src,Short *dst,Int shift, Int line) 703 /** 32x32 forward transform implemented using partial butterfly structure (1D) 704 * \param src input data (residual) 705 * \param dst output data (transform coefficients) 706 * \param shift specifies right shift after 1D transform 707 * \param line 708 */ 709 Void partialButterfly32(TCoeff *src, TCoeff *dst, Int shift, Int line) 662 710 { 663 711 Int j,k; 664 IntE[16],O[16];665 IntEE[8],EO[8];666 IntEEE[4],EEO[4];667 IntEEEE[2],EEEO[2];668 Int add = 1<<(shift-1);712 TCoeff E[16],O[16]; 713 TCoeff EE[8],EO[8]; 714 TCoeff EEE[4],EEO[4]; 715 TCoeff EEEE[2],EEEO[2]; 716 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 669 717 670 718 for (j=0; j<line; j++) 671 { 719 { 672 720 /* E and O*/ 673 721 for (k=0;k<16;k++) … … 675 723 E[k] = src[k] + src[31-k]; 676 724 O[k] = src[k] - src[31-k]; 677 } 725 } 678 726 /* EE and EO */ 679 727 for (k=0;k<8;k++) … … 689 737 } 690 738 /* EEEE and EEEO */ 691 EEEE[0] = EEE[0] + EEE[3]; 739 EEEE[0] = EEE[0] + EEE[3]; 692 740 EEEO[0] = EEE[0] - EEE[3]; 693 741 EEEE[1] = EEE[1] + EEE[2]; 694 742 EEEO[1] = EEE[1] - EEE[2]; 695 743 696 dst[ 0 ] = (g_aiT32[ 0][0]*EEEE[0] + g_aiT32[ 0][1]*EEEE[1] + add)>>shift;697 dst[ 16*line ] = (g_aiT32[ 16][0]*EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;698 dst[ 8*line ] = (g_aiT32[ 8][0]*EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;699 dst[ 24*line ] = (g_aiT32[ 24][0]*EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;744 dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]*EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]*EEEE[1] + add)>>shift; 745 dst[ 16*line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]*EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift; 746 dst[ 8*line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]*EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift; 747 dst[ 24*line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]*EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift; 700 748 for (k=4;k<32;k+=8) 701 749 { 702 dst[ k*line ] = (g_aiT32[k][0]*EEO[0] + g_aiT32[k][1]*EEO[1] + g_aiT32[k][2]*EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift; 703 } 750 dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][0]*EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] + 751 g_aiT32[TRANSFORM_FORWARD][k][2]*EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]*EEO[3] + add)>>shift; 752 } 704 753 for (k=2;k<32;k+=4) 705 754 { 706 dst[ k*line ] = (g_aiT32[k][0]*EO[0] + g_aiT32[k][1]*EO[1] + g_aiT32[k][2]*EO[2] + g_aiT32[k][3]*EO[3] + 707 g_aiT32[k][4]*EO[4] + g_aiT32[k][5]*EO[5] + g_aiT32[k][6]*EO[6] + g_aiT32[k][7]*EO[7] + add)>>shift; 708 } 755 dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][0]*EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] + 756 g_aiT32[TRANSFORM_FORWARD][k][2]*EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]*EO[3] + 757 g_aiT32[TRANSFORM_FORWARD][k][4]*EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]*EO[5] + 758 g_aiT32[TRANSFORM_FORWARD][k][6]*EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]*EO[7] + add)>>shift; 759 } 709 760 for (k=1;k<32;k+=2) 710 761 { 711 dst[ k*line ] = (g_aiT32[k][ 0]*O[ 0] + g_aiT32[k][ 1]*O[ 1] + g_aiT32[k][ 2]*O[ 2] + g_aiT32[k][ 3]*O[ 3] + 712 g_aiT32[k][ 4]*O[ 4] + g_aiT32[k][ 5]*O[ 5] + g_aiT32[k][ 6]*O[ 6] + g_aiT32[k][ 7]*O[ 7] + 713 g_aiT32[k][ 8]*O[ 8] + g_aiT32[k][ 9]*O[ 9] + g_aiT32[k][10]*O[10] + g_aiT32[k][11]*O[11] + 714 g_aiT32[k][12]*O[12] + g_aiT32[k][13]*O[13] + g_aiT32[k][14]*O[14] + g_aiT32[k][15]*O[15] + add)>>shift; 715 } 762 dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]*O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] + 763 g_aiT32[TRANSFORM_FORWARD][k][ 2]*O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]*O[ 3] + 764 g_aiT32[TRANSFORM_FORWARD][k][ 4]*O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]*O[ 5] + 765 g_aiT32[TRANSFORM_FORWARD][k][ 6]*O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]*O[ 7] + 766 g_aiT32[TRANSFORM_FORWARD][k][ 8]*O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]*O[ 9] + 767 g_aiT32[TRANSFORM_FORWARD][k][10]*O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]*O[11] + 768 g_aiT32[TRANSFORM_FORWARD][k][12]*O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]*O[13] + 769 g_aiT32[TRANSFORM_FORWARD][k][14]*O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]*O[15] + add)>>shift; 770 } 771 716 772 src += 32; 717 773 dst ++; … … 719 775 } 720 776 721 722 void partialButterflyInverse32(Short *src,Short *dst,Int shift, Int line) 777 /** 32x32 inverse transform implemented using partial butterfly structure (1D) 778 * \param src input data (transform coefficients) 779 * \param dst output data (residual) 780 * \param shift specifies right shift after 1D transform 781 * \param line 782 * \param outputMinimum minimum for clipping 783 * \param outputMaximum maximum for clipping 784 */ 785 Void partialButterflyInverse32(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum) 723 786 { 724 787 Int j,k; 725 IntE[16],O[16];726 IntEE[8],EO[8];727 IntEEE[4],EEO[4];728 IntEEEE[2],EEEO[2];729 Int add = 1<<(shift-1);788 TCoeff E[16],O[16]; 789 TCoeff EE[8],EO[8]; 790 TCoeff EEE[4],EEO[4]; 791 TCoeff EEEE[2],EEEO[2]; 792 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0; 730 793 731 794 for (j=0; j<line; j++) 732 { 795 { 733 796 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ 734 797 for (k=0;k<16;k++) 735 798 { 736 O[k] = g_aiT32[ 1][k]*src[ line ] + g_aiT32[ 3][k]*src[ 3*line ] + g_aiT32[ 5][k]*src[ 5*line ] + g_aiT32[ 7][k]*src[ 7*line ] + 737 g_aiT32[ 9][k]*src[ 9*line ] + g_aiT32[11][k]*src[ 11*line ] + g_aiT32[13][k]*src[ 13*line ] + g_aiT32[15][k]*src[ 15*line ] + 738 g_aiT32[17][k]*src[ 17*line ] + g_aiT32[19][k]*src[ 19*line ] + g_aiT32[21][k]*src[ 21*line ] + g_aiT32[23][k]*src[ 23*line ] + 739 g_aiT32[25][k]*src[ 25*line ] + g_aiT32[27][k]*src[ 27*line ] + g_aiT32[29][k]*src[ 29*line ] + g_aiT32[31][k]*src[ 31*line ]; 799 O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]*src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]*src[ 3*line ] + 800 g_aiT32[TRANSFORM_INVERSE][ 5][k]*src[ 5*line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]*src[ 7*line ] + 801 g_aiT32[TRANSFORM_INVERSE][ 9][k]*src[ 9*line ] + g_aiT32[TRANSFORM_INVERSE][11][k]*src[ 11*line ] + 802 g_aiT32[TRANSFORM_INVERSE][13][k]*src[ 13*line ] + g_aiT32[TRANSFORM_INVERSE][15][k]*src[ 15*line ] + 803 g_aiT32[TRANSFORM_INVERSE][17][k]*src[ 17*line ] + g_aiT32[TRANSFORM_INVERSE][19][k]*src[ 19*line ] + 804 g_aiT32[TRANSFORM_INVERSE][21][k]*src[ 21*line ] + g_aiT32[TRANSFORM_INVERSE][23][k]*src[ 23*line ] + 805 g_aiT32[TRANSFORM_INVERSE][25][k]*src[ 25*line ] + g_aiT32[TRANSFORM_INVERSE][27][k]*src[ 27*line ] + 806 g_aiT32[TRANSFORM_INVERSE][29][k]*src[ 29*line ] + g_aiT32[TRANSFORM_INVERSE][31][k]*src[ 31*line ]; 740 807 } 741 808 for (k=0;k<8;k++) 742 809 { 743 EO[k] = g_aiT32[ 2][k]*src[ 2*line ] + g_aiT32[ 6][k]*src[ 6*line ] + g_aiT32[10][k]*src[ 10*line ] + g_aiT32[14][k]*src[ 14*line ] + 744 g_aiT32[18][k]*src[ 18*line ] + g_aiT32[22][k]*src[ 22*line ] + g_aiT32[26][k]*src[ 26*line ] + g_aiT32[30][k]*src[ 30*line ]; 810 EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]*src[ 2*line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]*src[ 6*line ] + 811 g_aiT32[TRANSFORM_INVERSE][10][k]*src[ 10*line ] + g_aiT32[TRANSFORM_INVERSE][14][k]*src[ 14*line ] + 812 g_aiT32[TRANSFORM_INVERSE][18][k]*src[ 18*line ] + g_aiT32[TRANSFORM_INVERSE][22][k]*src[ 22*line ] + 813 g_aiT32[TRANSFORM_INVERSE][26][k]*src[ 26*line ] + g_aiT32[TRANSFORM_INVERSE][30][k]*src[ 30*line ]; 745 814 } 746 815 for (k=0;k<4;k++) 747 816 { 748 EEO[k] = g_aiT32[4][k]*src[ 4*line ] + g_aiT32[12][k]*src[ 12*line ] + g_aiT32[20][k]*src[ 20*line ] + g_aiT32[28][k]*src[ 28*line ]; 749 } 750 EEEO[0] = g_aiT32[8][0]*src[ 8*line ] + g_aiT32[24][0]*src[ 24*line ]; 751 EEEO[1] = g_aiT32[8][1]*src[ 8*line ] + g_aiT32[24][1]*src[ 24*line ]; 752 EEEE[0] = g_aiT32[0][0]*src[ 0 ] + g_aiT32[16][0]*src[ 16*line ]; 753 EEEE[1] = g_aiT32[0][1]*src[ 0 ] + g_aiT32[16][1]*src[ 16*line ]; 817 EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]*src[ 4*line ] + g_aiT32[TRANSFORM_INVERSE][12][k]*src[ 12*line ] + 818 g_aiT32[TRANSFORM_INVERSE][20][k]*src[ 20*line ] + g_aiT32[TRANSFORM_INVERSE][28][k]*src[ 28*line ]; 819 } 820 EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]*src[ 8*line ] + g_aiT32[TRANSFORM_INVERSE][24][0]*src[ 24*line ]; 821 EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]*src[ 8*line ] + g_aiT32[TRANSFORM_INVERSE][24][1]*src[ 24*line ]; 822 EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]*src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]*src[ 16*line ]; 823 EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]*src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]*src[ 16*line ]; 754 824 755 825 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ … … 757 827 EEE[3] = EEEE[0] - EEEO[0]; 758 828 EEE[1] = EEEE[1] + EEEO[1]; 759 EEE[2] = EEEE[1] - EEEO[1]; 829 EEE[2] = EEEE[1] - EEEO[1]; 760 830 for (k=0;k<4;k++) 761 831 { 762 832 EE[k] = EEE[k] + EEO[k]; 763 833 EE[k+4] = EEE[3-k] - EEO[3-k]; 764 } 834 } 765 835 for (k=0;k<8;k++) 766 836 { 767 837 E[k] = EE[k] + EO[k]; 768 838 E[k+8] = EE[7-k] - EO[7-k]; 769 } 839 } 770 840 for (k=0;k<16;k++) 771 841 { 772 dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );773 dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );842 dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift ); 843 dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift ); 774 844 } 775 845 src ++; … … 779 849 780 850 /** MxN forward transform (2D) 781 * \param block input data (residual) 782 * \param coeff output data (transform coefficients) 783 * \param iWidth input data (width of transform) 784 * \param iHeight input data (height of transform) 851 * \param bitDepth [in] bit depth 852 * \param block [in] residual block 853 * \param coeff [out] transform coefficients 854 * \param iWidth [in] width of transform 855 * \param iHeight [in] height of transform 856 * \param useDST [in] 857 * \param maxLog2TrDynamicRange [in] 858 785 859 */ 786 void xTrMxN(Int bitDepth, Short *block,Short *coeff, Int iWidth, Int iHeight, UInt uiMode) 787 { 788 Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8 789 Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6 790 791 Short tmp[ 64 * 64 ]; 792 793 if( iWidth == 4 && iHeight == 4) 794 { 795 if (uiMode != REG_DCT) 796 { 797 fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output 798 fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output 799 } 800 else 801 { 802 partialButterfly4(block, tmp, shift_1st, iHeight); 803 partialButterfly4(tmp, coeff, shift_2nd, iWidth); 804 } 805 806 } 807 else if( iWidth == 8 && iHeight == 8) 808 { 809 partialButterfly8( block, tmp, shift_1st, iHeight ); 810 partialButterfly8( tmp, coeff, shift_2nd, iWidth ); 811 } 812 else if( iWidth == 16 && iHeight == 16) 813 { 814 partialButterfly16( block, tmp, shift_1st, iHeight ); 815 partialButterfly16( tmp, coeff, shift_2nd, iWidth ); 816 } 817 else if( iWidth == 32 && iHeight == 32) 818 { 819 partialButterfly32( block, tmp, shift_1st, iHeight ); 820 partialButterfly32( tmp, coeff, shift_2nd, iWidth ); 821 } 822 } 860 Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxLog2TrDynamicRange) 861 { 862 const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD]; 863 864 const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange; 865 const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT; 866 867 assert(shift_1st >= 0); 868 assert(shift_2nd >= 0); 869 870 TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ]; 871 872 switch (iWidth) 873 { 874 case 4: 875 { 876 if ((iHeight == 4) && useDST) // Check for DCT or DST 877 { 878 fastForwardDst( block, tmp, shift_1st ); 879 } 880 else 881 { 882 partialButterfly4 ( block, tmp, shift_1st, iHeight ); 883 } 884 } 885 break; 886 887 case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break; 888 case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break; 889 case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break; 890 default: 891 assert(0); exit (1); break; 892 } 893 894 switch (iHeight) 895 { 896 case 4: 897 { 898 if ((iWidth == 4) && useDST) // Check for DCT or DST 899 { 900 fastForwardDst( tmp, coeff, shift_2nd ); 901 } 902 else 903 { 904 partialButterfly4 ( tmp, coeff, shift_2nd, iWidth ); 905 } 906 } 907 break; 908 909 case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break; 910 case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break; 911 case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break; 912 default: 913 assert(0); exit (1); break; 914 } 915 } 916 917 823 918 /** MxN inverse transform (2D) 824 * \param coeff input data (transform coefficients) 825 * \param block output data (residual) 826 * \param iWidth input data (width of transform) 827 * \param iHeight input data (height of transform) 919 * \param bitDepth [in] bit depth 920 * \param coeff [in] transform coefficients 921 * \param block [out] residual block 922 * \param iWidth [in] width of transform 923 * \param iHeight [in] height of transform 924 * \param useDST [in] 925 * \param maxLog2TrDynamicRange [in] 828 926 */ 829 void xITrMxN(Int bitDepth, Short *coeff,Short *block, Int iWidth, Int iHeight, UInt uiMode) 830 { 831 Int shift_1st = SHIFT_INV_1ST; 832 Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8); 833 834 Short tmp[ 64*64]; 835 if( iWidth == 4 && iHeight == 4) 836 { 837 if (uiMode != REG_DCT) 838 { 839 fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output 840 fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output 841 } 842 else 843 { 844 partialButterflyInverse4(coeff,tmp,shift_1st,iWidth); 845 partialButterflyInverse4(tmp,block,shift_2nd,iHeight); 846 } 847 } 848 else if( iWidth == 8 && iHeight == 8) 849 { 850 partialButterflyInverse8(coeff,tmp,shift_1st,iWidth); 851 partialButterflyInverse8(tmp,block,shift_2nd,iHeight); 852 } 853 else if( iWidth == 16 && iHeight == 16) 854 { 855 partialButterflyInverse16(coeff,tmp,shift_1st,iWidth); 856 partialButterflyInverse16(tmp,block,shift_2nd,iHeight); 857 } 858 else if( iWidth == 32 && iHeight == 32) 859 { 860 partialButterflyInverse32(coeff,tmp,shift_1st,iWidth); 861 partialButterflyInverse32(tmp,block,shift_2nd,iHeight); 862 } 863 } 864 865 #endif //MATRIX_MULT 866 867 // To minimize the distortion only. No rate is considered. 868 Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const *scan, Int* deltaU, Int width, Int height ) 869 { 927 Void xITrMxN(Int bitDepth, TCoeff *coeff, TCoeff *block, Int iWidth, Int iHeight, Bool useDST, const Int maxLog2TrDynamicRange) 928 { 929 const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE]; 930 931 Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd 932 Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth; 933 const TCoeff clipMinimum = -(1 << maxLog2TrDynamicRange); 934 const TCoeff clipMaximum = (1 << maxLog2TrDynamicRange) - 1; 935 936 assert(shift_1st >= 0); 937 assert(shift_2nd >= 0); 938 939 TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE]; 940 941 switch (iHeight) 942 { 943 case 4: 944 { 945 if ((iWidth == 4) && useDST) // Check for DCT or DST 946 { 947 fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum); 948 } 949 else 950 { 951 partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); 952 } 953 } 954 break; 955 956 case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break; 957 case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break; 958 case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break; 959 960 default: 961 assert(0); exit (1); break; 962 } 963 964 switch (iWidth) 965 { 966 // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied 967 case 4: 968 { 969 if ((iHeight == 4) && useDST) // Check for DCT or DST 970 { 971 fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() ); 972 } 973 else 974 { 975 partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); 976 } 977 } 978 break; 979 980 case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break; 981 case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break; 982 case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break; 983 984 default: 985 assert(0); exit (1); break; 986 } 987 } 988 989 990 // To minimize the distortion only. No rate is considered. 991 Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters, const Int maxLog2TrDynamicRange ) 992 { 993 const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH; 994 const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT; 995 const UInt groupSize = 1 << MLS_CG_SIZE; 996 997 const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); 998 const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; 999 870 1000 Int lastCG = -1; 871 1001 Int absSum = 0 ; 872 1002 Int n ; 873 1003 874 for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )875 { 876 Int subPos = subSet << LOG2_SCAN_SET_SIZE;877 Int firstNZPosInCG= SCAN_SET_SIZE, lastNZPosInCG=-1 ;1004 for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- ) 1005 { 1006 Int subPos = subSet << MLS_CG_SIZE; 1007 Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ; 878 1008 absSum = 0 ; 879 1009 880 for(n = SCAN_SET_SIZE-1; n >= 0; --n )881 { 882 if( pQCoef[ scan[ n + subPos ]] )1010 for(n = groupSize-1; n >= 0; --n ) 1011 { 1012 if( pQCoef[ codingParameters.scan[ n + subPos ]] ) 883 1013 { 884 1014 lastNZPosInCG = n; … … 887 1017 } 888 1018 889 for(n = 0; n < SCAN_SET_SIZE; n++ )890 { 891 if( pQCoef[ scan[ n + subPos ]] )1019 for(n = 0; n <groupSize; n++ ) 1020 { 1021 if( pQCoef[ codingParameters.scan[ n + subPos ]] ) 892 1022 { 893 1023 firstNZPosInCG = n; … … 898 1028 for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ ) 899 1029 { 900 absSum += pQCoef[ scan[ n + subPos ]];901 } 902 903 if(lastNZPosInCG>=0 && lastCG==-1) 904 { 905 lastCG = 1 ; 1030 absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]); 1031 } 1032 1033 if(lastNZPosInCG>=0 && lastCG==-1) 1034 { 1035 lastCG = 1 ; 906 1036 } 907 1037 908 1038 if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD ) 909 1039 { 910 UInt signbit = (pQCoef[ scan[subPos+firstNZPosInCG]]>0?0:1) ;1040 UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ; 911 1041 if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity 912 1042 { 913 Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0; 914 915 for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n ) 916 { 917 UInt blkPos = scan[ n+subPos ]; 1043 TCoeff curCost = std::numeric_limits<TCoeff>::max(); 1044 TCoeff minCostInc = std::numeric_limits<TCoeff>::max(); 1045 Int minPos =-1, finalChange=0, curChange=0; 1046 1047 for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n ) 1048 { 1049 UInt blkPos = codingParameters.scan[ n+subPos ]; 918 1050 if(pQCoef[ blkPos ] != 0 ) 919 1051 { 920 1052 if(deltaU[blkPos]>0) 921 1053 { 922 curCost = - deltaU[blkPos]; 1054 curCost = - deltaU[blkPos]; 923 1055 curChange=1 ; 924 1056 } 925 else 1057 else 926 1058 { 927 1059 //curChange =-1; 928 1060 if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1) 929 1061 { 930 curCost =MAX_INT ;1062 curCost = std::numeric_limits<TCoeff>::max(); 931 1063 } 932 1064 else 933 1065 { 934 curCost = deltaU[blkPos]; 1066 curCost = deltaU[blkPos]; 935 1067 curChange =-1; 936 1068 } … … 944 1076 if(thisSignBit != signbit ) 945 1077 { 946 curCost = MAX_INT;1078 curCost = std::numeric_limits<TCoeff>::max(); 947 1079 } 948 1080 else 949 { 1081 { 950 1082 curCost = - (deltaU[blkPos]) ; 951 1083 curChange = 1 ; … … 967 1099 } //CG loop 968 1100 969 if(pQCoef[minPos] == 32767 || pQCoef[minPos] == -32768)1101 if(pQCoef[minPos] == entropyCodingMaximum || pQCoef[minPos] == entropyCodingMinimum) 970 1102 { 971 1103 finalChange = -1; … … 974 1106 if(pCoef[minPos]>=0) 975 1107 { 976 pQCoef[minPos] += finalChange ; 977 } 978 else 979 { 1108 pQCoef[minPos] += finalChange ; 1109 } 1110 else 1111 { 980 1112 pQCoef[minPos] -= finalChange ; 981 } 1113 } 982 1114 } // Hide 983 1115 } 984 if(lastCG==1) 1116 if(lastCG==1) 985 1117 { 986 1118 lastCG=0 ; … … 991 1123 } 992 1124 993 Void TComTrQuant::xQuant( TComDataCU* pcCU, 994 Int* pSrc, 995 TCoeff* pDes, 1125 1126 Void TComTrQuant::xQuant( TComTU &rTu, 1127 TCoeff * pSrc, 1128 TCoeff * pDes, 996 1129 #if ADAPTIVE_QP_SELECTION 997 Int*& pArlDes, 998 #endif 999 Int iWidth, 1000 Int iHeight, 1001 UInt& uiAcSum, 1002 TextType eTType, 1003 UInt uiAbsPartIdx ) 1004 { 1005 Int* piCoef = pSrc; 1130 TCoeff *pArlDes, 1131 #endif 1132 TCoeff &uiAbsSum, 1133 const ComponentID compID, 1134 const QpParam &cQP ) 1135 { 1136 const TComRectangle &rect = rTu.getRect(compID); 1137 const UInt uiWidth = rect.width; 1138 const UInt uiHeight = rect.height; 1139 TComDataCU* pcCU = rTu.getCU(); 1140 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 1141 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 1142 1143 TCoeff* piCoef = pSrc; 1006 1144 TCoeff* piQCoef = pDes; 1007 1145 #if ADAPTIVE_QP_SELECTION 1008 Int* piArlCCoef = pArlDes; 1009 #endif 1010 Int iAdd = 0; 1011 1012 Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ; 1013 if ( useRDOQ && (eTType == TEXT_LUMA || RDOQ_CHROMA)) 1014 { 1146 TCoeff* piArlCCoef = pArlDes; 1147 #endif 1148 1149 const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID); 1150 const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)); 1151 1152 Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ; 1153 if ( useRDOQ && (isLuma(compID) || RDOQ_CHROMA) ) 1154 { 1155 #if T0196_SELECTIVE_RDOQ 1156 if ( !m_useSelectiveRDOQ || xNeedRDOQ( rTu, piCoef, compID, cQP ) ) 1157 { 1158 #endif 1015 1159 #if ADAPTIVE_QP_SELECTION 1016 xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx);1160 xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP ); 1017 1161 #else 1018 xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx ); 1162 xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP ); 1163 #endif 1164 #if T0196_SELECTIVE_RDOQ 1165 } 1166 else 1167 { 1168 memset( pDes, 0, sizeof( TCoeff ) * uiWidth *uiHeight ); 1169 uiAbsSum = 0; 1170 } 1019 1171 #endif 1020 1172 } 1021 1173 else 1022 1174 { 1023 const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2; 1024 1025 UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx)); 1026 const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ]; 1027 1028 Int deltaU[32*32] ; 1175 TUEntropyCodingParameters codingParameters; 1176 getTUEntropyCodingParameters(codingParameters, rTu, compID); 1177 1178 const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); 1179 const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; 1180 1181 TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE]; 1182 1183 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID); 1184 1185 Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID); 1186 assert(scalingListType < SCALING_LIST_NUM); 1187 Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2); 1188 1189 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)); 1190 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; 1191 1192 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be 1193 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the 1194 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) 1195 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result 1196 */ 1197 1198 // Represents scaling through forward transform 1199 Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange); 1200 if (useTransformSkip && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) 1201 { 1202 iTransformShift = std::max<Int>(0, iTransformShift); 1203 } 1204 1205 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; 1206 // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset 1029 1207 1030 1208 #if ADAPTIVE_QP_SELECTION 1031 QpParam cQpBase; 1032 Int iQpBase = pcCU->getSlice()->getSliceQpBase(); 1033 1034 Int qpScaled; 1035 Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC(); 1036 1037 if(eTType == TEXT_LUMA) 1038 { 1039 qpScaled = iQpBase + qpBDOffset; 1040 } 1041 else 1042 { 1043 Int chromaQPOffset; 1044 if(eTType == TEXT_CHROMA_U) 1045 { 1046 chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb(); 1047 } 1048 else 1049 { 1050 chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr(); 1051 } 1052 iQpBase = iQpBase + chromaQPOffset; 1053 1054 qpScaled = Clip3( -qpBDOffset, 57, iQpBase); 1055 1056 if(qpScaled < 0) 1057 { 1058 qpScaled = qpScaled + qpBDOffset; 1059 } 1060 else 1061 { 1062 qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset; 1063 } 1064 } 1065 cQpBase.setQpParam(qpScaled); 1066 #endif 1067 1068 UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2; 1069 Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType]; 1070 assert(scalingListType < SCALING_LIST_NUM); 1071 Int *piQuantCoeff = 0; 1072 piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2); 1073 1074 UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC; 1075 Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform 1209 Int iQBitsC = MAX_INT; 1210 Int iAddC = MAX_INT; 1211 1212 if (m_bUseAdaptQpSelect) 1213 { 1214 iQBitsC = iQBits - ARL_C_PRECISION; 1215 iAddC = 1 << (iQBitsC-1); 1216 } 1217 #endif 1218 1219 const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9); 1220 const Int qBits8 = iQBits - 8; 1221 1222 for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ ) 1223 { 1224 const TCoeff iLevel = piCoef[uiBlockPos]; 1225 const TCoeff iSign = (iLevel < 0 ? -1: 1); 1226 1227 const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient); 1076 1228 1077 1229 #if ADAPTIVE_QP_SELECTION 1078 Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;1079 iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);1080 Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;1081 Int iAddC = 1 << (iQBitsC-1);1082 #else1083 Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits1084 iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);1085 #endif1086 1087 Int qBits8 = iQBits-8;1088 for( Int n = 0; n < iWidth*iHeight; n++ )1089 {1090 Int iLevel;1091 Int iSign;1092 UInt uiBlockPos = n;1093 iLevel = piCoef[uiBlockPos];1094 iSign = (iLevel < 0 ? -1: 1);1095 1096 #if ADAPTIVE_QP_SELECTION1097 Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];1098 1230 if( m_bUseAdaptQpSelect ) 1099 1231 { 1100 piArlCCoef[uiBlockPos] = ( Int)((tmpLevel + iAddC ) >> iQBitsC);1101 } 1102 iLevel = (Int)((tmpLevel + iAdd ) >> iQBits); 1103 deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8); 1104 #else 1105 iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;1106 deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 ); 1107 #endif 1108 uiAcSum += iLevel;1109 iLevel *= iSign; 1110 piQCoef[uiBlockPos] = Clip3 ( -32768, 32767, iLevel);1232 piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC); 1233 } 1234 #endif 1235 1236 const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits); 1237 deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8); 1238 1239 uiAbsSum += quantisedMagnitude; 1240 const TCoeff quantisedCoefficient = quantisedMagnitude * iSign; 1241 1242 piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient ); 1111 1243 } // for n 1244 1112 1245 if( pcCU->getSlice()->getPPS()->getSignHideFlag() ) 1113 1246 { 1114 if(uiA cSum>=2)1115 { 1116 signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight) ;1247 if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested 1248 { 1249 signBitHidingHDQ( piQCoef, piCoef, deltaU, codingParameters, maxLog2TrDynamicRange ) ; 1117 1250 } 1118 1251 } 1119 1252 } //if RDOQ 1120 1253 //return; 1121 1122 } 1123 1124 Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType ) 1125 { 1126 1127 const TCoeff* piQCoef = pSrc; 1128 Int* piCoef = pDes; 1129 1130 if ( iWidth > (Int)m_uiMaxTrSize ) 1131 { 1132 iWidth = m_uiMaxTrSize; 1133 iHeight = m_uiMaxTrSize; 1134 } 1135 1136 Int iShift,iAdd,iCoeffQ; 1137 UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2; 1138 1139 Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; 1140 1141 iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift; 1142 1143 TCoeff clipQCoef; 1144 1145 if(getUseScalingList()) 1146 { 1147 iShift += 4; 1148 Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2); 1149 1150 if(iShift > m_cQP.m_iPer) 1151 { 1152 iAdd = 1 << (iShift - m_cQP.m_iPer - 1); 1153 1154 for( Int n = 0; n < iWidth*iHeight; n++ ) 1155 { 1156 clipQCoef = Clip3( -32768, 32767, piQCoef[n] ); 1157 iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer); 1158 piCoef[n] = Clip3(-32768,32767,iCoeffQ); 1254 } 1255 1256 #if T0196_SELECTIVE_RDOQ 1257 Bool TComTrQuant::xNeedRDOQ( TComTU &rTu, TCoeff * pSrc, const ComponentID compID, const QpParam &cQP ) 1258 { 1259 const TComRectangle &rect = rTu.getRect(compID); 1260 const UInt uiWidth = rect.width; 1261 const UInt uiHeight = rect.height; 1262 TComDataCU* pcCU = rTu.getCU(); 1263 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 1264 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 1265 1266 TCoeff* piCoef = pSrc; 1267 1268 const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID); 1269 const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)); 1270 1271 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID); 1272 1273 Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID); 1274 assert(scalingListType < SCALING_LIST_NUM); 1275 Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2); 1276 1277 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)); 1278 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; 1279 1280 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be 1281 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the 1282 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) 1283 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result 1284 */ 1285 1286 // Represents scaling through forward transform 1287 Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange); 1288 if (useTransformSkip && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) 1289 { 1290 iTransformShift = std::max<Int>(0, iTransformShift); 1291 } 1292 1293 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; 1294 // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset 1295 1296 // iAdd is different from the iAdd used in normal quantization 1297 const Int iAdd = (compID == COMPONENT_Y ? 171 : 256) << (iQBits-9); 1298 1299 for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ ) 1300 { 1301 const TCoeff iLevel = piCoef[uiBlockPos]; 1302 const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient); 1303 const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits); 1304 1305 if ( quantisedMagnitude != 0 ) 1306 { 1307 return true; 1308 } 1309 } // for n 1310 return false; 1311 } 1312 #endif 1313 1314 Void TComTrQuant::xDeQuant( TComTU &rTu, 1315 const TCoeff * pSrc, 1316 TCoeff * pDes, 1317 const ComponentID compID, 1318 const QpParam &cQP ) 1319 { 1320 assert(compID<MAX_NUM_COMPONENT); 1321 1322 TComDataCU *pcCU = rTu.getCU(); 1323 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 1324 const TComRectangle &rect = rTu.getRect(compID); 1325 const UInt uiWidth = rect.width; 1326 const UInt uiHeight = rect.height; 1327 const TCoeff *const piQCoef = pSrc; 1328 TCoeff *const piCoef = pDes; 1329 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID); 1330 const UInt numSamplesInBlock = uiWidth*uiHeight; 1331 const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)); 1332 const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange); 1333 const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1; 1334 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)); 1335 const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID); 1336 #if O0043_BEST_EFFORT_DECODING 1337 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID)); 1338 #else 1339 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 1340 #endif 1341 1342 assert (scalingListType < SCALING_LIST_NUM); 1343 assert ( uiWidth <= m_uiMaxTrSize ); 1344 1345 // Represents scaling through forward transform 1346 const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); 1347 const Int originalTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange); 1348 const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift; 1349 1350 const Int QP_per = cQP.per; 1351 const Int QP_rem = cQP.rem; 1352 1353 const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); 1354 1355 if(enableScalingLists) 1356 { 1357 //from the dequantisation equation: 1358 //iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift 1359 //(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift 1360 const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS; 1361 const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits)); 1362 1363 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1)); 1364 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1; 1365 1366 Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2); 1367 1368 if(rightShift > 0) 1369 { 1370 const Intermediate_Int iAdd = 1 << (rightShift - 1); 1371 1372 for( Int n = 0; n < numSamplesInBlock; n++ ) 1373 { 1374 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n])); 1375 const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift; 1376 1377 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 1159 1378 } 1160 1379 } 1161 1380 else 1162 1381 { 1163 for( Int n = 0; n < iWidth*iHeight; n++ ) 1164 { 1165 clipQCoef = Clip3( -32768, 32767, piQCoef[n] ); 1166 iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation 1167 piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) ); 1382 const Int leftShift = -rightShift; 1383 1384 for( Int n = 0; n < numSamplesInBlock; n++ ) 1385 { 1386 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n])); 1387 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift; 1388 1389 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 1168 1390 } 1169 1391 } … … 1171 1393 else 1172 1394 { 1173 iAdd = 1 << (iShift-1); 1174 Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer; 1175 1176 for( Int n = 0; n < iWidth*iHeight; n++ ) 1177 { 1178 clipQCoef = Clip3( -32768, 32767, piQCoef[n] ); 1179 iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift; 1180 piCoef[n] = Clip3(-32768,32767,iCoeffQ); 1181 } 1182 } 1183 } 1184 1185 Void TComTrQuant::init( UInt uiMaxTrSize, 1186 Bool bUseRDOQ, 1187 Bool bUseRDOQTS, 1188 Bool bEnc, Bool useTransformSkipFast 1395 const Int scale = g_invQuantScales[QP_rem]; 1396 const Int scaleBits = (IQUANT_SHIFT + 1) ; 1397 1398 //from the dequantisation equation: 1399 //iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift); 1400 //(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift 1401 const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits)); 1402 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1)); 1403 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1; 1404 1405 if (rightShift > 0) 1406 { 1407 const Intermediate_Int iAdd = 1 << (rightShift - 1); 1408 1409 for( Int n = 0; n < numSamplesInBlock; n++ ) 1410 { 1411 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n])); 1412 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift; 1413 1414 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 1415 } 1416 } 1417 else 1418 { 1419 const Int leftShift = -rightShift; 1420 1421 for( Int n = 0; n < numSamplesInBlock; n++ ) 1422 { 1423 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n])); 1424 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift; 1425 1426 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 1427 } 1428 } 1429 } 1430 } 1431 1432 1433 Void TComTrQuant::init( UInt uiMaxTrSize, 1434 Bool bUseRDOQ, 1435 Bool bUseRDOQTS, 1436 #if T0196_SELECTIVE_RDOQ 1437 Bool useSelectiveRDOQ, 1438 #endif 1439 Bool bEnc, 1440 Bool useTransformSkipFast 1189 1441 #if ADAPTIVE_QP_SELECTION 1190 , Bool bUseAdaptQpSelect1442 , Bool bUseAdaptQpSelect 1191 1443 #endif 1192 1444 ) … … 1194 1446 m_uiMaxTrSize = uiMaxTrSize; 1195 1447 m_bEnc = bEnc; 1196 m_useRDOQ = bUseRDOQ; 1197 m_useRDOQTS = bUseRDOQTS; 1448 m_useRDOQ = bUseRDOQ; 1449 m_useRDOQTS = bUseRDOQTS; 1450 #if T0196_SELECTIVE_RDOQ 1451 m_useSelectiveRDOQ = useSelectiveRDOQ; 1452 #endif 1198 1453 #if ADAPTIVE_QP_SELECTION 1199 1454 m_bUseAdaptQpSelect = bUseAdaptQpSelect; … … 1202 1457 } 1203 1458 1204 Void TComTrQuant::transformNxN( TComDataCU* pcCU, 1205 Pel* pcResidual, 1206 UInt uiStride, 1207 TCoeff* rpcCoeff, 1459 1460 Void TComTrQuant::transformNxN( TComTU & rTu, 1461 const ComponentID compID, 1462 Pel * pcResidual, 1463 const UInt uiStride, 1464 TCoeff * rpcCoeff, 1208 1465 #if ADAPTIVE_QP_SELECTION 1209 Int*& rpcArlCoeff, 1210 #endif 1211 UInt uiWidth, 1212 UInt uiHeight, 1213 UInt& uiAbsSum, 1214 TextType eTType, 1215 UInt uiAbsPartIdx, 1216 Bool useTransformSkip 1217 ) 1218 { 1219 if (pcCU->getCUTransquantBypass(uiAbsPartIdx)) 1220 { 1221 uiAbsSum=0; 1222 for (UInt k = 0; k<uiHeight; k++) 1223 { 1224 for (UInt j = 0; j<uiWidth; j++) 1225 { 1226 rpcCoeff[k*uiWidth+j]= pcResidual[k*uiStride+j]; 1227 uiAbsSum += abs(pcResidual[k*uiStride+j]); 1228 } 1229 } 1466 TCoeff * pcArlCoeff, 1467 #endif 1468 TCoeff & uiAbsSum, 1469 const QpParam & cQP 1470 ) 1471 { 1472 const TComRectangle &rect = rTu.getRect(compID); 1473 const UInt uiWidth = rect.width; 1474 const UInt uiHeight = rect.height; 1475 TComDataCU* pcCU = rTu.getCU(); 1476 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 1477 const UInt uiOrgTrDepth = rTu.GetTransformDepthRel(); 1478 1479 uiAbsSum=0; 1480 1481 RDPCMMode rdpcmMode = RDPCM_OFF; 1482 rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode ); 1483 1484 if (rdpcmMode == RDPCM_OFF) 1485 { 1486 uiAbsSum = 0; 1487 //transform and quantise 1488 if(pcCU->getCUTransquantBypass(uiAbsPartIdx)) 1489 { 1490 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); 1491 const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1; 1492 1493 for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++) 1494 { 1495 for (UInt x = 0; x<uiWidth; x++, coefficientIndex++) 1496 { 1497 const Pel currentSample = pcResidual[(y * uiStride) + x]; 1498 1499 rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample; 1500 uiAbsSum += TCoeff(abs(currentSample)); 1501 } 1502 } 1503 } 1504 else 1505 { 1506 #if DEBUG_TRANSFORM_AND_QUANTISE 1507 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n"; 1508 printBlock(pcResidual, uiWidth, uiHeight, uiStride); 1509 #endif 1510 1511 assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) ); 1512 1513 if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) 1514 { 1515 xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID ); 1516 } 1517 else 1518 { 1519 const Int channelBitDepth=pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 1520 xT( channelBitDepth, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) ); 1521 } 1522 1523 #if DEBUG_TRANSFORM_AND_QUANTISE 1524 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n"; 1525 printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth); 1526 #endif 1527 1528 xQuant( rTu, m_plTempCoeff, rpcCoeff, 1529 1530 #if ADAPTIVE_QP_SELECTION 1531 pcArlCoeff, 1532 #endif 1533 uiAbsSum, compID, cQP ); 1534 1535 #if DEBUG_TRANSFORM_AND_QUANTISE 1536 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n"; 1537 printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth); 1538 #endif 1539 } 1540 } 1541 1542 //set the CBF 1543 pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID)); 1544 } 1545 1546 1547 Void TComTrQuant::invTransformNxN( TComTU &rTu, 1548 const ComponentID compID, 1549 Pel *pcResidual, 1550 const UInt uiStride, 1551 TCoeff * pcCoeff, 1552 const QpParam &cQP 1553 DEBUG_STRING_FN_DECLAREP(psDebug)) 1554 { 1555 TComDataCU* pcCU=rTu.getCU(); 1556 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 1557 const TComRectangle &rect = rTu.getRect(compID); 1558 const UInt uiWidth = rect.width; 1559 const UInt uiHeight = rect.height; 1560 1561 if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter 1562 { 1563 //------------------------------------------------ 1564 1565 //recurse deeper 1566 1567 TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID); 1568 1569 do 1570 { 1571 //------------------ 1572 1573 const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height; 1574 1575 Pel *subTUResidual = pcResidual + (lineOffset * uiStride); 1576 TCoeff *subTUCoefficients = pcCoeff + (lineOffset * subTURecurse.getRect(compID).width); 1577 1578 invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug)); 1579 1580 //------------------ 1581 1582 } while (subTURecurse.nextSection(rTu)); 1583 1584 //------------------------------------------------ 1585 1230 1586 return; 1231 1587 } 1232 UInt uiMode; //luma intra pred 1233 if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA ) 1234 { 1235 uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx ); 1588 1589 #if DEBUG_STRING 1590 if (psDebug) 1591 { 1592 std::stringstream ss(stringstream::out); 1593 printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth); 1594 DEBUG_STRING_APPEND((*psDebug), ss.str()) 1595 } 1596 #endif 1597 1598 if(pcCU->getCUTransquantBypass(uiAbsPartIdx)) 1599 { 1600 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); 1601 const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1; 1602 1603 for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++) 1604 { 1605 for (UInt x = 0; x<uiWidth; x++, coefficientIndex++) 1606 { 1607 pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]); 1608 } 1609 } 1236 1610 } 1237 1611 else 1238 1612 { 1239 uiMode = REG_DCT; 1240 } 1241 1613 #if DEBUG_TRANSFORM_AND_QUANTISE 1614 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n"; 1615 printBlock(pcCoeff, uiWidth, uiHeight, uiWidth); 1616 #endif 1617 1618 xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP); 1619 1620 #if DEBUG_TRANSFORM_AND_QUANTISE 1621 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n"; 1622 printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth); 1623 #endif 1624 1625 #if DEBUG_STRING 1626 if (psDebug) 1627 { 1628 std::stringstream ss(stringstream::out); 1629 printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth); 1630 (*psDebug)+=ss.str(); 1631 } 1632 #endif 1633 1634 if(pcCU->getTransformSkip(uiAbsPartIdx, compID)) 1635 { 1636 xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID ); 1637 1638 #if DEBUG_STRING 1639 if (psDebug) 1640 { 1641 std::stringstream ss(stringstream::out); 1642 printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride); 1643 (*psDebug)+=ss.str(); 1644 (*psDebug)+="(<- was a Transform-skipped block)\n"; 1645 } 1646 #endif 1647 } 1648 else 1649 { 1650 #if O0043_BEST_EFFORT_DECODING 1651 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID)); 1652 #else 1653 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 1654 #endif 1655 xIT( channelBitDepth, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) ); 1656 1657 #if DEBUG_STRING 1658 if (psDebug) 1659 { 1660 std::stringstream ss(stringstream::out); 1661 printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride); 1662 (*psDebug)+=ss.str(); 1663 (*psDebug)+="(<- was a Transformed block)\n"; 1664 } 1665 #endif 1666 } 1667 1668 #if DEBUG_TRANSFORM_AND_QUANTISE 1669 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n"; 1670 printBlock(pcResidual, uiWidth, uiHeight, uiStride); 1671 g_debugCounter++; 1672 #endif 1673 } 1674 1675 invRdpcmNxN( rTu, compID, pcResidual, uiStride ); 1676 } 1677 1678 Void TComTrQuant::invRecurTransformNxN( const ComponentID compID, 1679 TComYuv *pResidual, 1680 TComTU &rTu) 1681 { 1682 if (!rTu.ProcessComponentSection(compID)) 1683 { 1684 return; 1685 } 1686 1687 TComDataCU* pcCU = rTu.getCU(); 1688 UInt absPartIdxTU = rTu.GetAbsPartIdxTU(); 1689 UInt uiTrMode=rTu.GetTransformDepthRel(); 1690 if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) || !pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) ) 1691 { 1692 return; 1693 } 1694 1695 if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) ) 1696 { 1697 const TComRectangle &tuRect = rTu.getRect(compID); 1698 const Int uiStride = pResidual->getStride( compID ); 1699 Pel *rpcResidual = pResidual->getAddr( compID ); 1700 UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0); 1701 Pel *pResi = rpcResidual + uiAddr; 1702 TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID); 1703 1704 const QpParam cQP(*pcCU, compID); 1705 1706 if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0) 1707 { 1708 DEBUG_STRING_NEW(sTemp) 1709 #if DEBUG_STRING 1710 std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0; 1711 #endif 1712 1713 invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) ); 1714 1715 #if DEBUG_STRING 1716 if (psDebug != 0) 1717 { 1718 std::cout << (*psDebug); 1719 } 1720 #endif 1721 } 1722 1723 if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0)) 1724 { 1725 const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y ); 1726 const Int strideLuma = pResidual->getStride( COMPONENT_Y ); 1727 const Int tuWidth = rTu.getRect( compID ).width; 1728 const Int tuHeight = rTu.getRect( compID ).height; 1729 1730 if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0) 1731 { 1732 pResi = rpcResidual + uiAddr; 1733 const Pel *pResiLuma = piResiLuma + uiAddr; 1734 1735 crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true ); 1736 } 1737 } 1738 } 1739 else 1740 { 1741 TComTURecurse tuRecurseChild(rTu, false); 1742 do 1743 { 1744 invRecurTransformNxN( compID, pResidual, tuRecurseChild ); 1745 } while (tuRecurseChild.nextSection(rTu)); 1746 } 1747 } 1748 1749 Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode ) 1750 { 1751 TComDataCU *pcCU=rTu.getCU(); 1752 const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU(); 1753 1754 const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx ); 1755 const UInt uiWidth = rTu.getRect(compID).width; 1756 const UInt uiHeight = rTu.getRect(compID).height; 1757 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID); 1758 const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1; 1759 1760 UInt uiX = 0; 1761 UInt uiY = 0; 1762 1763 UInt &majorAxis = (mode == RDPCM_VER) ? uiX : uiY; 1764 UInt &minorAxis = (mode == RDPCM_VER) ? uiY : uiX; 1765 const UInt majorAxisLimit = (mode == RDPCM_VER) ? uiWidth : uiHeight; 1766 const UInt minorAxisLimit = (mode == RDPCM_VER) ? uiHeight : uiWidth; 1767 1768 const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF); 1769 1242 1770 uiAbsSum = 0; 1243 assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) ); 1244 Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC; 1245 if(useTransformSkip) 1246 { 1247 xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight ); 1248 } 1249 else 1250 { 1251 xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight ); 1252 } 1253 xQuant( pcCU, m_plTempCoeff, rpcCoeff, 1254 #if ADAPTIVE_QP_SELECTION 1255 rpcArlCoeff, 1256 #endif 1257 uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx ); 1258 } 1259 1260 Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip ) 1261 { 1262 if(transQuantBypass) 1263 { 1264 for (UInt k = 0; k<uiHeight; k++) 1265 { 1266 for (UInt j = 0; j<uiWidth; j++) 1267 { 1268 rpcResidual[k*uiStride+j] = pcCoeff[k*uiWidth+j]; 1269 } 1270 } 1271 return; 1272 } 1273 Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC; 1274 xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType); 1275 if(useTransformSkip == true) 1276 { 1277 xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight ); 1278 } 1279 else 1280 { 1281 xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight ); 1282 } 1283 } 1284 1285 Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff ) 1286 { 1287 if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) ) 1288 { 1289 return; 1290 } 1291 const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx ); 1292 1293 if( uiTrMode == stopTrMode ) 1294 { 1295 UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode; 1296 UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2; 1297 if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 ) 1298 { 1299 UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 ); 1300 if( ( uiAbsPartIdx % uiQPDiv ) != 0 ) 1301 { 1302 return; 1303 } 1304 uiWidth <<= 1; 1305 uiHeight <<= 1; 1306 } 1307 Pel* pResi = rpcResidual + uiAddr; 1308 Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt]; 1309 assert(scalingListType < SCALING_LIST_NUM); 1310 invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) ); 1311 } 1312 else 1313 { 1314 uiTrMode++; 1315 uiWidth >>= 1; 1316 uiHeight >>= 1; 1317 Int trWidth = uiWidth, trHeight = uiHeight; 1318 UInt uiAddrOffset = trHeight * uiStride; 1319 UInt uiCoefOffset = trWidth * trHeight; 1320 UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 ); 1321 { 1322 invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset; 1323 invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset; 1324 invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset; 1325 invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); 1771 1772 for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ ) 1773 { 1774 TCoeff accumulatorValue = 0; // 32-bit accumulator 1775 for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ ) 1776 { 1777 const UInt sampleIndex = (uiY * uiWidth) + uiX; 1778 const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex); 1779 const Pel currentSample = pcResidual[(uiY * uiStride) + uiX]; 1780 const TCoeff encoderSideDelta = TCoeff(currentSample) - accumulatorValue; 1781 1782 Pel reconstructedDelta; 1783 if ( bLossless ) 1784 { 1785 pcCoeff[coefficientIndex] = encoderSideDelta; 1786 reconstructedDelta = (Pel) encoderSideDelta; 1787 } 1788 else 1789 { 1790 transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint); 1791 invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex); 1792 } 1793 1794 uiAbsSum += abs(pcCoeff[coefficientIndex]); 1795 1796 if (mode != RDPCM_OFF) 1797 { 1798 accumulatorValue += reconstructedDelta; 1799 } 1800 } 1801 } 1802 } 1803 1804 Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode ) 1805 { 1806 TComDataCU *pcCU=rTu.getCU(); 1807 const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU(); 1808 1809 if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) || ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx))) 1810 { 1811 rdpcmMode = RDPCM_OFF; 1812 } 1813 else if ( pcCU->isIntra( uiAbsPartIdx ) ) 1814 { 1815 const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat(); 1816 const ChannelType chType = toChannelType(compID); 1817 const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx ); 1818 const TComSPS *sps=pcCU->getSlice()->getSPS(); 1819 const UInt partsPerMinCU = 1<<(2*(sps->getMaxTotalCUDepth() - sps->getLog2DiffMaxMinCodingBlockSize())); 1820 const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt, partsPerMinCU)) : uiChPredMode; 1821 const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode; 1822 1823 if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX) 1824 { 1825 rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR; 1826 applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode ); 1827 } 1828 else 1829 { 1830 rdpcmMode = RDPCM_OFF; 1831 } 1832 } 1833 else // not intra, need to select the best mode 1834 { 1835 const UInt uiWidth = rTu.getRect(compID).width; 1836 const UInt uiHeight = rTu.getRect(compID).height; 1837 1838 RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES; 1839 TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max(); 1840 TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE]; 1841 1842 for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++) 1843 { 1844 const RDPCMMode mode = RDPCMMode(modeIndex); 1845 1846 TCoeff currAbsSum = 0; 1847 1848 applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode ); 1849 1850 if (currAbsSum < bestAbsSum) 1851 { 1852 bestMode = mode; 1853 bestAbsSum = currAbsSum; 1854 if (mode != RDPCM_OFF) 1855 { 1856 memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff))); 1857 } 1858 } 1859 } 1860 1861 rdpcmMode = bestMode; 1862 uiAbsSum = bestAbsSum; 1863 1864 if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here 1865 { 1866 memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff))); 1867 } 1868 } 1869 1870 pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID)); 1871 } 1872 1873 Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride ) 1874 { 1875 TComDataCU *pcCU=rTu.getCU(); 1876 const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU(); 1877 1878 if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) || pcCU->getCUTransquantBypass(uiAbsPartIdx))) 1879 { 1880 const UInt uiWidth = rTu.getRect(compID).width; 1881 const UInt uiHeight = rTu.getRect(compID).height; 1882 1883 RDPCMMode rdpcmMode = RDPCM_OFF; 1884 1885 if ( pcCU->isIntra( uiAbsPartIdx ) ) 1886 { 1887 const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat(); 1888 const ChannelType chType = toChannelType(compID); 1889 const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx ); 1890 const TComSPS *sps=pcCU->getSlice()->getSPS(); 1891 const UInt partsPerMinCU = 1<<(2*(sps->getMaxTotalCUDepth() - sps->getLog2DiffMaxMinCodingBlockSize())); 1892 const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt, partsPerMinCU)) : uiChPredMode; 1893 const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode; 1894 1895 if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX) 1896 { 1897 rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR; 1898 } 1899 } 1900 else // not intra case 1901 { 1902 rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx )); 1903 } 1904 1905 const TCoeff pelMin=(TCoeff) std::numeric_limits<Pel>::min(); 1906 const TCoeff pelMax=(TCoeff) std::numeric_limits<Pel>::max(); 1907 if (rdpcmMode == RDPCM_VER) 1908 { 1909 for( UInt uiX = 0; uiX < uiWidth; uiX++ ) 1910 { 1911 Pel *pcCurResidual = pcResidual+uiX; 1912 TCoeff accumulator = *pcCurResidual; // 32-bit accumulator 1913 pcCurResidual+=uiStride; 1914 for( UInt uiY = 1; uiY < uiHeight; uiY++, pcCurResidual+=uiStride ) 1915 { 1916 accumulator += *(pcCurResidual); 1917 *pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator); 1918 } 1919 } 1920 } 1921 else if (rdpcmMode == RDPCM_HOR) 1922 { 1923 for( UInt uiY = 0; uiY < uiHeight; uiY++ ) 1924 { 1925 Pel *pcCurResidual = pcResidual+uiY*uiStride; 1926 TCoeff accumulator = *pcCurResidual; 1927 pcCurResidual++; 1928 for( UInt uiX = 1; uiX < uiWidth; uiX++, pcCurResidual++ ) 1929 { 1930 accumulator += *(pcCurResidual); 1931 *pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator); 1932 } 1933 } 1326 1934 } 1327 1935 } … … 1332 1940 // ------------------------------------------------------------------------------------------------ 1333 1941 1334 /** Wrapper function between HM interface and core NxN forward transform (2D) 1942 /** Wrapper function between HM interface and core NxN forward transform (2D) 1943 * \param channelBitDepth bit depth of channel 1944 * \param useDST 1335 1945 * \param piBlkResi input data (residual) 1946 * \param uiStride stride of input residual data 1336 1947 * \param psCoeff output data (transform coefficients) 1337 * \param uiStride stride of input residual data1338 * \param i Size transform size (iSize x iSize)1339 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only1948 * \param iWidth transform width 1949 * \param iHeight transform height 1950 * \param maxLog2TrDynamicRange 1340 1951 */ 1341 Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight ) 1342 { 1343 #if MATRIX_MULT 1344 Int iSize = iWidth; 1345 xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode); 1346 #else 1347 Int j; 1348 Short block[ 32 * 32 ]; 1349 Short coeff[ 32 * 32 ]; 1350 for (j = 0; j < iHeight; j++) 1351 { 1352 memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) ); 1353 } 1354 xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode ); 1355 for ( j = 0; j < iHeight * iWidth; j++ ) 1356 { 1357 psCoeff[ j ] = coeff[ j ]; 1358 } 1359 #endif 1360 } 1361 1362 1363 /** Wrapper function between HM interface and core NxN inverse transform (2D) 1952 Void TComTrQuant::xT( const Int channelBitDepth, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight, const Int maxLog2TrDynamicRange ) 1953 { 1954 #if MATRIX_MULT 1955 if( iWidth == iHeight) 1956 { 1957 xTr(channelBitDepth, piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, maxLog2TrDynamicRange); 1958 return; 1959 } 1960 #endif 1961 1962 TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ]; 1963 TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ]; 1964 1965 for (Int y = 0; y < iHeight; y++) 1966 { 1967 for (Int x = 0; x < iWidth; x++) 1968 { 1969 block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x]; 1970 } 1971 } 1972 1973 xTrMxN( channelBitDepth, block, coeff, iWidth, iHeight, useDST, maxLog2TrDynamicRange ); 1974 1975 memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff))); 1976 } 1977 1978 /** Wrapper function between HM interface and core NxN inverse transform (2D) 1979 * \param channelBitDepth bit depth of channel 1980 * \param useDST 1364 1981 * \param plCoef input data (transform coefficients) 1365 1982 * \param pResidual output data (residual) 1366 1983 * \param uiStride stride of input residual data 1367 * \param iSize transform size (iSize x iSize) 1368 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only 1984 * \param iWidth transform width 1985 * \param iHeight transform height 1986 * \param maxLog2TrDynamicRange 1369 1987 */ 1370 Void TComTrQuant::xIT( Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight)1371 { 1372 #if MATRIX_MULT 1373 Int iSize = iWidth;1374 xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);1375 #else 1376 Int j;1377 {1378 Short block[ 32 * 32 ]; 1379 Short coeff[ 32 * 32 ]; 1380 for ( j = 0; j < iHeight * iWidth; j++ )1381 {1382 coeff[j] = (Short)plCoef[j]; 1383 }1384 xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode ); 1385 {1386 for ( j = 0; j < iHeight; j++ ) 1387 {1388 memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );1389 }1390 }1391 return;1392 }1393 #endif 1394 } 1395 1988 Void TComTrQuant::xIT( const Int channelBitDepth, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight, const Int maxLog2TrDynamicRange ) 1989 { 1990 #if MATRIX_MULT 1991 if( iWidth == iHeight ) 1992 { 1993 xITr(channelBitDepth, plCoef, pResidual, uiStride, (UInt)iWidth, useDST, maxLog2TrDynamicRange); 1994 return; 1995 } 1996 #endif 1997 1998 TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ]; 1999 TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ]; 2000 2001 memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff))); 2002 2003 xITrMxN( channelBitDepth, coeff, block, iWidth, iHeight, useDST, maxLog2TrDynamicRange ); 2004 2005 for (Int y = 0; y < iHeight; y++) 2006 { 2007 for (Int x = 0; x < iWidth; x++) 2008 { 2009 pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]); 2010 } 2011 } 2012 } 2013 1396 2014 /** Wrapper function between HM interface and core 4x4 transform skipping 1397 2015 * \param piBlkResi input data (residual) 2016 * \param uiStride stride of input residual data 1398 2017 * \param psCoeff output data (transform coefficients) 1399 * \param uiStride stride of input residualdata1400 * \param iSize transform size (iSize x iSize)2018 * \param rTu reference to transform data 2019 * \param component colour component 1401 2020 */ 1402 Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height ) 1403 { 1404 assert( width == height ); 1405 UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2; 1406 Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; 1407 UInt transformSkipShift; 1408 Int j,k; 1409 if(shift >= 0) 1410 { 1411 transformSkipShift = shift; 1412 for (j = 0; j < height; j++) 1413 { 1414 for(k = 0; k < width; k ++) 1415 { 1416 psCoeff[j*height + k] = piBlkResi[j * uiStride + k] << transformSkipShift; 1417 } 1418 } 1419 } 1420 else 1421 { 1422 //The case when uiBitDepth > 13 1423 Int offset; 1424 transformSkipShift = -shift; 1425 offset = (1 << (transformSkipShift - 1)); 1426 for (j = 0; j < height; j++) 1427 { 1428 for(k = 0; k < width; k ++) 1429 { 1430 psCoeff[j*height + k] = (piBlkResi[j * uiStride + k] + offset) >> transformSkipShift; 1431 } 1432 } 1433 } 1434 } 1435 1436 /** Wrapper function between HM interface and core NxN transform skipping 2021 Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component ) 2022 { 2023 const TComRectangle &rect = rTu.getRect(component); 2024 const Int width = rect.width; 2025 const Int height = rect.height; 2026 const Int maxLog2TrDynamicRange = rTu.getCU()->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(component)); 2027 const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(toChannelType(component)); 2028 2029 Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(component), maxLog2TrDynamicRange); 2030 if (rTu.getCU()->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) 2031 { 2032 iTransformShift = std::max<Int>(0, iTransformShift); 2033 } 2034 2035 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component); 2036 const UInt uiSizeMinus1 = (width * height) - 1; 2037 2038 if (iTransformShift >= 0) 2039 { 2040 for (UInt y = 0, coefficientIndex = 0; y < height; y++) 2041 { 2042 for (UInt x = 0; x < width; x++, coefficientIndex++) 2043 { 2044 psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift; 2045 } 2046 } 2047 } 2048 else //for very high bit depths 2049 { 2050 iTransformShift = -iTransformShift; 2051 const TCoeff offset = 1 << (iTransformShift - 1); 2052 2053 for (UInt y = 0, coefficientIndex = 0; y < height; y++) 2054 { 2055 for (UInt x = 0; x < width; x++, coefficientIndex++) 2056 { 2057 psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift; 2058 } 2059 } 2060 } 2061 } 2062 2063 /** Wrapper function between HM interface and core NxN transform skipping 1437 2064 * \param plCoef input data (coefficients) 1438 2065 * \param pResidual output data (residual) 1439 2066 * \param uiStride stride of input residual data 1440 * \param iSize transform size (iSize x iSize) 2067 * \param rTu reference to transform data 2068 * \param component colour component ID 1441 2069 */ 1442 Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height ) 1443 { 1444 assert( width == height ); 1445 UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2; 1446 Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; 1447 UInt transformSkipShift; 1448 Int j,k; 1449 if(shift > 0) 1450 { 1451 Int offset; 1452 transformSkipShift = shift; 1453 offset = (1 << (transformSkipShift -1)); 1454 for ( j = 0; j < height; j++ ) 1455 { 1456 for(k = 0; k < width; k ++) 1457 { 1458 pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift; 1459 } 1460 } 1461 } 1462 else 1463 { 1464 //The case when uiBitDepth >= 13 1465 transformSkipShift = - shift; 1466 for ( j = 0; j < height; j++ ) 1467 { 1468 for(k = 0; k < width; k ++) 1469 { 1470 pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift; 2070 Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component ) 2071 { 2072 const TComRectangle &rect = rTu.getRect(component); 2073 const Int width = rect.width; 2074 const Int height = rect.height; 2075 const Int maxLog2TrDynamicRange = rTu.getCU()->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(component)); 2076 #if O0043_BEST_EFFORT_DECODING 2077 const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getStreamBitDepth(toChannelType(component)); 2078 #else 2079 const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(toChannelType(component)); 2080 #endif 2081 2082 Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(component), maxLog2TrDynamicRange); 2083 if (rTu.getCU()->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag()) 2084 { 2085 iTransformShift = std::max<Int>(0, iTransformShift); 2086 } 2087 2088 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component); 2089 const UInt uiSizeMinus1 = (width * height) - 1; 2090 2091 if (iTransformShift >= 0) 2092 { 2093 const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1)); 2094 2095 for (UInt y = 0, coefficientIndex = 0; y < height; y++) 2096 { 2097 for (UInt x = 0; x < width; x++, coefficientIndex++) 2098 { 2099 pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift); 2100 } 2101 } 2102 } 2103 else //for very high bit depths 2104 { 2105 iTransformShift = -iTransformShift; 2106 2107 for (UInt y = 0, coefficientIndex = 0; y < height; y++) 2108 { 2109 for (UInt x = 0; x < width; x++, coefficientIndex++) 2110 { 2111 pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift); 1471 2112 } 1472 2113 } … … 1475 2116 1476 2117 /** RDOQ with CABAC 1477 * \param pcCU pointer to coding unit structure2118 * \param rTu reference to transform data 1478 2119 * \param plSrcCoeff pointer to input buffer 1479 2120 * \param piDstCoeff reference to pointer to output buffer 1480 * \param uiWidth block width 1481 * \param uiHeight block height 2121 * \param piArlDstCoeff 1482 2122 * \param uiAbsSum reference to absolute sum of quantized transform coefficient 1483 * \param eTType plane type / luminance or chrominance1484 * \param uiAbsPartIdx absolute partition index1485 * \returns Void 2123 * \param compID colour component ID 2124 * \param cQP reference to quantization parameters 2125 1486 2126 * Rate distortion optimized quantization for entropy 1487 2127 * coding engines using probability models like CABAC 1488 2128 */ 1489 Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,1490 Int*plSrcCoeff,1491 TCoeff*piDstCoeff,2129 Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu, 2130 TCoeff * plSrcCoeff, 2131 TCoeff * piDstCoeff, 1492 2132 #if ADAPTIVE_QP_SELECTION 1493 Int*& piArlDstCoeff, 1494 #endif 1495 UInt uiWidth, 1496 UInt uiHeight, 1497 UInt& uiAbsSum, 1498 TextType eTType, 1499 UInt uiAbsPartIdx ) 1500 { 1501 UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2; 1502 1503 UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC; 1504 Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform 1505 UInt uiGoRiceParam = 0; 1506 Double d64BlockUncodedCost = 0; 1507 const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2; 1508 const UInt uiMaxNumCoeff = uiWidth * uiHeight; 1509 Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType]; 2133 TCoeff * piArlDstCoeff, 2134 #endif 2135 TCoeff &uiAbsSum, 2136 const ComponentID compID, 2137 const QpParam &cQP ) 2138 { 2139 const TComRectangle & rect = rTu.getRect(compID); 2140 const UInt uiWidth = rect.width; 2141 const UInt uiHeight = rect.height; 2142 TComDataCU * pcCU = rTu.getCU(); 2143 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 2144 const ChannelType channelType = toChannelType(compID); 2145 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID); 2146 2147 const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); 2148 const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)); 2149 const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(channelType); 2150 2151 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be 2152 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the 2153 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) 2154 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result 2155 */ 2156 2157 // Represents scaling through forward transform 2158 Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange); 2159 if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && extendedPrecision) 2160 { 2161 iTransformShift = std::max<Int>(0, iTransformShift); 2162 } 2163 2164 const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag(); 2165 const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR; 2166 UInt uiGoRiceParam = initialGolombRiceParameter; 2167 Double d64BlockUncodedCost = 0; 2168 const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2; 2169 const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2; 2170 const UInt uiMaxNumCoeff = uiWidth * uiHeight; 2171 assert(compID<MAX_NUM_COMPONENT); 2172 2173 Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID); 1510 2174 assert(scalingListType < SCALING_LIST_NUM); 1511 1512 Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits 1513 Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem); 1514 Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2); 1515 Int *piQCoef = piQCoefOrg; 1516 Double *pdErrScale = pdErrScaleOrg; 2175 2176 #if ADAPTIVE_QP_SELECTION 2177 memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff); 2178 #endif 2179 2180 Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ]; 2181 Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ]; 2182 Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ]; 2183 memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff ); 2184 memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff ); 2185 Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ]; 2186 Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ]; 2187 Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ]; 2188 TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ]; 2189 memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff ); 2190 memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff ); 2191 memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff ); 2192 memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff ); 2193 2194 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits 2195 const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem); 2196 const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2)); 2197 2198 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)); 2199 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; 2200 const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem); 2201 2202 const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); 2203 const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; 2204 1517 2205 #if ADAPTIVE_QP_SELECTION 1518 2206 Int iQBitsC = iQBits - ARL_C_PRECISION; 1519 2207 Int iAddC = 1 << (iQBitsC-1); 1520 2208 #endif 1521 UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx)); 1522 1523 #if ADAPTIVE_QP_SELECTION 1524 memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff); 1525 #endif 1526 1527 Double pdCostCoeff [ 32 * 32 ]; 1528 Double pdCostSig [ 32 * 32 ]; 1529 Double pdCostCoeff0[ 32 * 32 ]; 1530 ::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff ); 1531 ::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff ); 1532 Int rateIncUp [ 32 * 32 ]; 1533 Int rateIncDown [ 32 * 32 ]; 1534 Int sigRateDelta[ 32 * 32 ]; 1535 Int deltaU [ 32 * 32 ]; 1536 ::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff ); 1537 ::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff ); 1538 ::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff ); 1539 ::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff ); 1540 1541 const UInt * scanCG; 1542 { 1543 scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ]; 1544 if( uiLog2BlkSize == 3 ) 1545 { 1546 scanCG = g_sigLastScan8x8[ uiScanIdx ]; 1547 } 1548 else if( uiLog2BlkSize == 5 ) 1549 { 1550 scanCG = g_sigLastScanCG32x32; 1551 } 1552 } 1553 const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16 2209 2210 TUEntropyCodingParameters codingParameters; 2211 getTUEntropyCodingParameters(codingParameters, rTu, compID); 2212 const UInt uiCGSize = (1 << MLS_CG_SIZE); 2213 1554 2214 Double pdCostCoeffGroupSig[ MLS_GRP_NUM ]; 1555 2215 UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ]; 1556 UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;1557 2216 Int iCGLastScanPos = -1; 1558 2217 1559 2218 UInt uiCtxSet = 0; 1560 2219 Int c1 = 1; … … 1562 2221 Double d64BaseCost = 0; 1563 2222 Int iLastScanPos = -1; 1564 2223 1565 2224 UInt c1Idx = 0; 1566 2225 UInt c2Idx = 0; 1567 2226 Int baseLevel; 1568 1569 const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ]; 1570 1571 ::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM ); 1572 ::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM ); 1573 2227 2228 memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM ); 2229 memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM ); 2230 1574 2231 UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE; 1575 2232 Int iScanPos; 1576 coeffGroupRDStats rdStats; 1577 2233 coeffGroupRDStats rdStats; 2234 2235 const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID); 2236 1578 2237 for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--) 1579 2238 { 1580 UInt uiCGBlkPos = scanCG[ iCGScanPos ]; 1581 UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide; 1582 UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide); 1583 ::memset( &rdStats, 0, sizeof (coeffGroupRDStats)); 1584 1585 const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight); 2239 UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ]; 2240 UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups; 2241 UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups); 2242 2243 memset( &rdStats, 0, sizeof (coeffGroupRDStats)); 2244 2245 const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups); 2246 1586 2247 for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--) 1587 2248 { 1588 2249 iScanPos = iCGScanPos*uiCGSize + iScanPosinCG; 1589 2250 //===== quantization ===== 1590 UInt uiBlkPos = scan[iScanPos];2251 UInt uiBlkPos = codingParameters.scan[iScanPos]; 1591 2252 // set coeff 1592 Int uiQ = piQCoef[uiBlkPos]; 1593 Double dTemp = pdErrScale[uiBlkPos]; 1594 Int lLevelDouble = plSrcCoeff[ uiBlkPos ]; 1595 lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1))); 2253 2254 const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient; 2255 const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale; 2256 2257 const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient; 2258 2259 const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1))); 2260 1596 2261 #if ADAPTIVE_QP_SELECTION 1597 2262 if( m_bUseAdaptQpSelect ) 1598 2263 { 1599 piArlDstCoeff[uiBlkPos] = ( Int)(( lLevelDouble + iAddC) >> iQBitsC );1600 } 1601 #endif 1602 UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;1603 1604 Double dErr= Double( lLevelDouble );1605 pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;2264 piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC ); 2265 } 2266 #endif 2267 const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits)); 2268 2269 const Double dErr = Double( lLevelDouble ); 2270 pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale; 1606 2271 d64BlockUncodedCost += pdCostCoeff0[ iScanPos ]; 1607 2272 piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel; 1608 2273 1609 2274 if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 ) 1610 2275 { 1611 2276 iLastScanPos = iScanPos; 1612 uiCtxSet = (iScanPos < SCAN_SET_SIZE || eTType!=TEXT_LUMA) ? 0 : 2;2277 uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0); 1613 2278 iCGLastScanPos = iCGScanPos; 1614 2279 } 1615 2280 1616 2281 if ( iLastScanPos >= 0 ) 1617 2282 { 1618 2283 //===== coefficient level estimation ===== 1619 2284 UInt uiLevel; 1620 UInt uiOneCtx = 4 * uiCtxSet+ c1;1621 UInt uiAbsCtx = uiCtxSet+ c2;1622 2285 UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1; 2286 UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2; 2287 1623 2288 if( iScanPos == iLastScanPos ) 1624 2289 { 1625 uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], 1626 lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam, 1627 c1Idx, c2Idx, iQBits, dTemp, 1 ); 2290 uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], 2291 lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam, 2292 c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange 2293 ); 1628 2294 } 1629 2295 else 1630 2296 { 1631 UInt uiPosY = uiBlkPos >> uiLog2BlkSize; 1632 UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize ); 1633 UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType ); 2297 UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType ); 2298 1634 2299 uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], 1635 lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam, 1636 c1Idx, c2Idx, iQBits, dTemp, 0 ); 2300 lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam, 2301 c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange 2302 ); 2303 1637 2304 sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ]; 1638 2305 } 1639 deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8); 2306 2307 deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8)); 2308 1640 2309 if( uiLevel > 0 ) 1641 2310 { 1642 Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );1643 rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;1644 rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;2311 Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ); 2312 rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; 2313 rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; 1645 2314 } 1646 2315 else // uiLevel == 0 … … 1650 2319 piDstCoeff[ uiBlkPos ] = uiLevel; 1651 2320 d64BaseCost += pdCostCoeff [ iScanPos ]; 1652 1653 2321 1654 2322 baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1; 1655 2323 if( uiLevel >= baseLevel ) 1656 2324 { 1657 if (uiLevel> 3*(1<<uiGoRiceParam))2325 if (uiLevel > 3*(1<<uiGoRiceParam)) 1658 2326 { 1659 uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);2327 uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4)); 1660 2328 } 1661 2329 } … … 1664 2332 c1Idx ++; 1665 2333 } 1666 2334 1667 2335 //===== update bin model ===== 1668 2336 if( uiLevel > 1 ) 1669 2337 { 1670 c1 = 0; 2338 c1 = 0; 1671 2339 c2 += (c2 < 2); 1672 2340 c2Idx ++; … … 1676 2344 c1++; 1677 2345 } 1678 2346 1679 2347 //===== context set update ===== 1680 if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) ) 1681 { 2348 if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) ) 2349 { 2350 uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this **before** entering the final group 2351 c1 = 1; 1682 2352 c2 = 0; 1683 uiGoRiceParam = 0; 1684 1685 c1Idx = 0; 1686 c2Idx = 0; 1687 uiCtxSet = (iScanPos == SCAN_SET_SIZE || eTType!=TEXT_LUMA) ? 0 : 2; 1688 if( c1 == 0 ) 1689 { 1690 uiCtxSet++; 1691 } 1692 c1 = 1; 2353 c1Idx = 0; 2354 c2Idx = 0; 2355 uiGoRiceParam = initialGolombRiceParameter; 1693 2356 } 1694 2357 } … … 1713 2376 } 1714 2377 } //end for (iScanPosinCG) 1715 1716 if (iCGLastScanPos >= 0) 2378 2379 if (iCGLastScanPos >= 0) 1717 2380 { 1718 2381 if( iCGScanPos ) … … 1720 2383 if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0) 1721 2384 { 1722 UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);1723 d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;; 1724 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig); 1725 } 2385 UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups ); 2386 d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;; 2387 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig); 2388 } 1726 2389 else 1727 2390 { 1728 2391 if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below. 1729 2392 { 1730 if ( rdStats.iNNZbeforePos0 == 0 ) 2393 if ( rdStats.iNNZbeforePos0 == 0 ) 1731 2394 { 1732 2395 d64BaseCost -= rdStats.d64SigCost_0; … … 1735 2398 // rd-cost if SigCoeffGroupFlag = 0, initialization 1736 2399 Double d64CostZeroCG = d64BaseCost; 1737 2400 1738 2401 // add SigCoeffGroupFlag cost to total cost 1739 UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight); 2402 UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups ); 2403 1740 2404 if (iCGScanPos < iCGLastScanPos) 1741 2405 { 1742 d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig); 1743 d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig); 1744 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig); 2406 d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig); 2407 d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig); 2408 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig); 1745 2409 } 1746 2410 1747 2411 // try to convert the current coeff group from non-zero to all-zero 1748 2412 d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels 1749 2413 d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels 1750 2414 d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels 1751 2415 1752 2416 // if we can save cost, change this block to all-zero block 1753 if ( d64CostZeroCG < d64BaseCost ) 2417 if ( d64CostZeroCG < d64BaseCost ) 1754 2418 { 1755 2419 uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0; … … 1757 2421 if (iCGScanPos < iCGLastScanPos) 1758 2422 { 1759 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig); 2423 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig); 1760 2424 } 1761 // reset coeffs to 0 in this block 2425 // reset coeffs to 0 in this block 1762 2426 for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--) 1763 2427 { 1764 2428 iScanPos = iCGScanPos*uiCGSize + iScanPosinCG; 1765 UInt uiBlkPos = scan[ iScanPos ];1766 2429 UInt uiBlkPos = codingParameters.scan[ iScanPos ]; 2430 1767 2431 if (piDstCoeff[ uiBlkPos ]) 1768 2432 { … … 1772 2436 } 1773 2437 } 1774 } // end if ( d64CostAllZeros < d64BaseCost ) 2438 } // end if ( d64CostAllZeros < d64BaseCost ) 1775 2439 } 1776 2440 } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0) … … 1782 2446 } 1783 2447 } //end for (iCGScanPos) 1784 2448 1785 2449 //===== estimate last position ===== 1786 2450 if ( iLastScanPos < 0 ) … … 1788 2452 return; 1789 2453 } 1790 2454 1791 2455 Double d64BestCost = 0; 1792 2456 Int ui16CtxCbf = 0; 1793 2457 Int iBestLastIdxP1 = 0; 1794 if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA&& pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )2458 if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 ) 1795 2459 { 1796 2460 ui16CtxCbf = 0; … … 1800 2464 else 1801 2465 { 1802 ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ));1803 ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;2466 ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType ); 2467 ui16CtxCbf += getCBFContextOffset(compID); 1804 2468 d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] ); 1805 2469 d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] ); 1806 2470 } 1807 2471 2472 1808 2473 Bool bFoundLast = false; 1809 2474 for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--) 1810 2475 { 1811 UInt uiCGBlkPos = scanCG[ iCGScanPos ];1812 1813 d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; 2476 UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ]; 2477 2478 d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; 1814 2479 if (uiSigCoeffGroupFlag[ uiCGBlkPos ]) 1815 { 2480 { 1816 2481 for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--) 1817 2482 { 1818 2483 iScanPos = iCGScanPos*uiCGSize + iScanPosinCG; 1819 if (iScanPos > iLastScanPos) continue; 1820 UInt uiBlkPos = scan[iScanPos]; 1821 2484 2485 if (iScanPos > iLastScanPos) 2486 { 2487 continue; 2488 } 2489 UInt uiBlkPos = codingParameters.scan[iScanPos]; 2490 1822 2491 if( piDstCoeff[ uiBlkPos ] ) 1823 2492 { 1824 UInt uiPosY = uiBlkPos >> uiLog2Bl kSize;1825 UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2Bl kSize);1826 1827 Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY);2493 UInt uiPosY = uiBlkPos >> uiLog2BlockWidth; 2494 UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth ); 2495 2496 Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID ); 1828 2497 Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ]; 1829 2498 1830 2499 if( totalCost < d64BestCost ) 1831 2500 { … … 1845 2514 d64BaseCost -= pdCostSig[ iScanPos ]; 1846 2515 } 1847 } //end for 2516 } //end for 1848 2517 if (bFoundLast) 1849 2518 { … … 1851 2520 } 1852 2521 } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ]) 1853 } // end for 1854 2522 } // end for 2523 2524 1855 2525 for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ ) 1856 2526 { 1857 Int blkPos = scan[ scanPos ];1858 Int level= piDstCoeff[ blkPos ];2527 Int blkPos = codingParameters.scan[ scanPos ]; 2528 TCoeff level = piDstCoeff[ blkPos ]; 1859 2529 uiAbsSum += level; 1860 2530 piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level; 1861 2531 } 1862 2532 1863 2533 //===== clean uncoded coefficients ===== 1864 2534 for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ ) 1865 2535 { 1866 piDstCoeff[ scan[ scanPos ] ] = 0; 1867 } 1868 2536 piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0; 2537 } 2538 2539 1869 2540 if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2) 1870 2541 { 1871 Int64 rdFactor = (Int64) ( 1872 g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer)) 1873 / m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8))) 1874 + 0.5); 2542 const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]); 2543 Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per)) 2544 / m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth - 8))) 2545 + 0.5); 2546 1875 2547 Int lastCG = -1; 1876 2548 Int absSum = 0 ; 1877 2549 Int n ; 1878 1879 for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )1880 { 1881 Int subPos = subSet << LOG2_SCAN_SET_SIZE;1882 Int firstNZPosInCG= SCAN_SET_SIZE, lastNZPosInCG=-1 ;2550 2551 for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- ) 2552 { 2553 Int subPos = subSet << MLS_CG_SIZE; 2554 Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ; 1883 2555 absSum = 0 ; 1884 1885 for(n = SCAN_SET_SIZE-1; n >= 0; --n )1886 { 1887 if( piDstCoeff[ scan[ n + subPos ]] )2556 2557 for(n = uiCGSize-1; n >= 0; --n ) 2558 { 2559 if( piDstCoeff[ codingParameters.scan[ n + subPos ]] ) 1888 2560 { 1889 2561 lastNZPosInCG = n; … … 1891 2563 } 1892 2564 } 1893 1894 for(n = 0; n < SCAN_SET_SIZE; n++ )1895 { 1896 if( piDstCoeff[ scan[ n + subPos ]] )2565 2566 for(n = 0; n <uiCGSize; n++ ) 2567 { 2568 if( piDstCoeff[ codingParameters.scan[ n + subPos ]] ) 1897 2569 { 1898 2570 firstNZPosInCG = n; … … 1900 2572 } 1901 2573 } 1902 2574 1903 2575 for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ ) 1904 2576 { 1905 absSum += piDstCoeff[ scan[ n + subPos ]];1906 } 1907 2577 absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]); 2578 } 2579 1908 2580 if(lastNZPosInCG>=0 && lastCG==-1) 1909 2581 { 1910 lastCG = 1; 1911 } 1912 2582 lastCG = 1; 2583 } 2584 1913 2585 if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD ) 1914 2586 { 1915 UInt signbit = (piDstCoeff[ scan[subPos+firstNZPosInCG]]>0?0:1);2587 UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1); 1916 2588 if( signbit!=(absSum&0x1) ) // hide but need tune 1917 2589 { 1918 // calculate the cost 1919 Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;1920 Int minPos = -1, finalChange=0, curChange=0;1921 1922 for( n = (lastCG==1?lastNZPosInCG: SCAN_SET_SIZE-1) ; n >= 0; --n )2590 // calculate the cost 2591 Int64 minCostInc = std::numeric_limits<Int64>::max(), curCost = std::numeric_limits<Int64>::max(); 2592 Int minPos = -1, finalChange = 0, curChange = 0; 2593 2594 for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n ) 1923 2595 { 1924 UInt uiBlkPos = scan[ n + subPos ];2596 UInt uiBlkPos = codingParameters.scan[ n + subPos ]; 1925 2597 if(piDstCoeff[ uiBlkPos ] != 0 ) 1926 2598 { 1927 Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] 1928 Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos] 1929 - ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);1930 2599 Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos]; 2600 Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos] 2601 - ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0); 2602 1931 2603 if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1) 1932 2604 { 1933 costDown -= (4<<15) 2605 costDown -= (4<<15); 1934 2606 } 1935 2607 1936 2608 if(costUp<costDown) 1937 { 2609 { 1938 2610 curCost = costUp; 1939 curChange = 1 2611 curChange = 1; 1940 2612 } 1941 else 2613 else 1942 2614 { 1943 curChange = -1 2615 curChange = -1; 1944 2616 if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1) 1945 2617 { 1946 curCost = MAX_INT64;2618 curCost = std::numeric_limits<Int64>::max(); 1947 2619 } 1948 2620 else 1949 2621 { 1950 curCost = costDown ;2622 curCost = costDown; 1951 2623 } 1952 2624 } … … 1954 2626 else 1955 2627 { 1956 curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ; 2628 curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ; 1957 2629 curChange = 1 ; 1958 2630 1959 2631 if(n<firstNZPosInCG) 1960 2632 { … … 1962 2634 if(thissignbit != signbit ) 1963 2635 { 1964 curCost = MAX_INT64;2636 curCost = std::numeric_limits<Int64>::max(); 1965 2637 } 1966 2638 } 1967 2639 } 1968 2640 1969 2641 if( curCost<minCostInc) 1970 2642 { 1971 minCostInc = curCost 1972 finalChange = curChange 1973 minPos = uiBlkPos 2643 minCostInc = curCost; 2644 finalChange = curChange; 2645 minPos = uiBlkPos; 1974 2646 } 1975 2647 } 1976 1977 if(piDstCoeff[minPos] == 32767 || piDstCoeff[minPos] == -32768)2648 2649 if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum) 1978 2650 { 1979 2651 finalChange = -1; 1980 2652 } 1981 2653 1982 2654 if(plSrcCoeff[minPos]>=0) 1983 2655 { … … 1986 2658 else 1987 2659 { 1988 piDstCoeff[minPos] -= finalChange ; 1989 } 1990 } 1991 } 1992 2660 piDstCoeff[minPos] -= finalChange ; 2661 } 2662 } 2663 } 2664 1993 2665 if(lastCG==1) 1994 2666 { 1995 lastCG=0 ; 1996 } 1997 } 1998 } 1999 } 2667 lastCG=0 ; 2668 } 2669 } 2670 } 2671 } 2672 2000 2673 2001 2674 /** Pattern decision for context derivation process of significant_coeff_flag 2002 2675 * \param sigCoeffGroupFlag pointer to prior coded significant coeff group 2003 * \param posXCGcolumn of current coefficient group2004 * \param posYCGrow of current coefficient group2005 * \param width width of the block2006 * \param height height of the block2676 * \param uiCGPosX column of current coefficient group 2677 * \param uiCGPosY row of current coefficient group 2678 * \param widthInGroups width of the block 2679 * \param heightInGroups height of the block 2007 2680 * \returns pattern for current coefficient group 2008 2681 */ 2009 Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height ) 2010 { 2011 if( width == 4 && height == 4 ) return -1; 2682 Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups ) 2683 { 2684 if ((widthInGroups <= 1) && (heightInGroups <= 1)) 2685 { 2686 return 0; 2687 } 2688 2689 const Bool rightAvailable = uiCGPosX < (widthInGroups - 1); 2690 const Bool belowAvailable = uiCGPosY < (heightInGroups - 1); 2012 2691 2013 2692 UInt sigRight = 0; 2014 2693 UInt sigLower = 0; 2015 2694 2016 width >>= 2;2017 height >>= 2;2018 if( posXCG < width - 1 )2019 {2020 sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);2021 }2022 if (posYCG < height - 1 )2023 {2024 sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0); 2025 }2026 return sigRight + (sigLower<<1); 2027 } 2695 if (rightAvailable) 2696 { 2697 sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0); 2698 } 2699 if (belowAvailable) 2700 { 2701 sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0); 2702 } 2703 2704 return sigRight + (sigLower << 1); 2705 } 2706 2028 2707 2029 2708 /** Context derivation process of coeff_abs_significant_flag 2030 2709 * \param patternSigCtx pattern for current coefficient group 2031 * \param posX column of current scan position 2032 * \param posY row of current scan position 2033 * \param log2BlockSize log2 value of block size (square block) 2034 * \param width width of the block 2035 * \param height height of the block 2036 * \param textureType texture type (TEXT_LUMA...) 2710 * \param codingParameters coding parameters for the TU (includes the scan) 2711 * \param scanPosition current position in scan order 2712 * \param log2BlockWidth log2 width of the block 2713 * \param log2BlockHeight log2 height of the block 2714 * \param chanType channel type (CHANNEL_TYPE_LUMA/CHROMA) 2037 2715 * \returns ctxInc for current scan position 2038 2716 */ 2039 Int TComTrQuant::getSigCtxInc ( 2040 Int patternSigCtx, 2041 UInt scanIdx, 2042 Int posX, 2043 Int posY, 2044 Int log2BlockSize, 2045 TextType textureType 2046 ) 2047 { 2048 const Int ctxIndMap[16] = 2049 { 2050 0, 1, 4, 5, 2051 2, 3, 4, 5, 2052 6, 6, 8, 8, 2053 7, 7, 8, 8 2054 }; 2055 2056 if( posX + posY == 0 ) 2057 { 2058 return 0; 2059 } 2060 2061 if ( log2BlockSize == 2 ) 2062 { 2063 return ctxIndMap[ 4 * posY + posX ]; 2064 } 2065 2066 Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12); 2067 2068 Int posXinSubset = posX-((posX>>2)<<2); 2069 Int posYinSubset = posY-((posY>>2)<<2); 2070 Int cnt = 0; 2071 if(patternSigCtx==0) 2072 { 2073 cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0; 2074 } 2075 else if(patternSigCtx==1) 2076 { 2077 cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0; 2078 } 2079 else if(patternSigCtx==2) 2080 { 2081 cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0; 2717 Int TComTrQuant::getSigCtxInc ( Int patternSigCtx, 2718 const TUEntropyCodingParameters &codingParameters, 2719 const Int scanPosition, 2720 const Int log2BlockWidth, 2721 const Int log2BlockHeight, 2722 const ChannelType chanType) 2723 { 2724 if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE]) 2725 { 2726 //single context mode 2727 return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE]; 2728 } 2729 2730 const UInt rasterPosition = codingParameters.scan[scanPosition]; 2731 const UInt posY = rasterPosition >> log2BlockWidth; 2732 const UInt posX = rasterPosition - (posY << log2BlockWidth); 2733 2734 if ((posX + posY) == 0) 2735 { 2736 return 0; //special case for the DC context variable 2737 } 2738 2739 Int offset = MAX_INT; 2740 2741 if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4 2742 { 2743 offset = ctxIndMap4x4[ (4 * posY) + posX ]; 2082 2744 } 2083 2745 else 2084 2746 { 2085 cnt = 2; 2086 } 2087 2088 return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt; 2089 } 2747 Int cnt = 0; 2748 2749 switch (patternSigCtx) 2750 { 2751 //------------------ 2752 2753 case 0: //neither neighbouring group is significant 2754 { 2755 const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1); 2756 const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1); 2757 const Int posTotalInSubset = posXinSubset + posYinSubset; 2758 2759 //first N coefficients in scan order use 2; the next few use 1; the rest use 0. 2760 const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4; 2761 const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4; 2762 2763 cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2); 2764 } 2765 break; 2766 2767 //------------------ 2768 2769 case 1: //right group is significant, below is not 2770 { 2771 const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1); 2772 const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT; 2773 2774 cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0 2775 } 2776 break; 2777 2778 //------------------ 2779 2780 case 2: //below group is significant, right is not 2781 { 2782 const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1); 2783 const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH; 2784 2785 cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0 2786 } 2787 break; 2788 2789 //------------------ 2790 2791 case 3: //both neighbouring groups are significant 2792 { 2793 cnt = 2; 2794 } 2795 break; 2796 2797 //------------------ 2798 2799 default: 2800 std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl; 2801 exit(1); 2802 break; 2803 } 2804 2805 //------------------------------------------------ 2806 2807 const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0; 2808 2809 offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt; 2810 } 2811 2812 return codingParameters.firstSignificanceMapContext + offset; 2813 } 2814 2090 2815 2091 2816 /** Get the best level in RD sense 2092 * \param rd64CodedCost reference to coded cost 2093 * \param rd64CodedCost0 reference to cost when coefficient is 0 2094 * \param rd64CodedCostSig reference to cost of significant coefficient 2095 * \param lLevelDouble reference to unscaled quantized level 2096 * \param uiMaxAbsLevel scaled quantized level 2097 * \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag 2098 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC) 2099 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC) 2100 * \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3 2101 * \param iQBits quantization step size 2102 * \param dTemp correction factor 2103 * \param bLast indicates if the coefficient is the last significant 2817 * 2104 2818 * \returns best quantized transform level for given scan position 2819 * 2105 2820 * This method calculates the best quantized transform level for a given scan position. 2106 2821 */ 2107 __inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost, 2108 Double& rd64CodedCost0, 2109 Double& rd64CodedCostSig, 2110 Int lLevelDouble, 2111 UInt uiMaxAbsLevel, 2112 UShort ui16CtxNumSig, 2113 UShort ui16CtxNumOne, 2114 UShort ui16CtxNumAbs, 2115 UShort ui16AbsGoRice, 2116 UInt c1Idx, 2117 UInt c2Idx, 2118 Int iQBits, 2119 Double dTemp, 2120 Bool bLast ) const 2121 { 2122 Double dCurrCostSig = 0; 2822 __inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost, //< reference to coded cost 2823 Double& rd64CodedCost0, //< reference to cost when coefficient is 0 2824 Double& rd64CodedCostSig, //< rd64CodedCostSig reference to cost of significant coefficient 2825 Intermediate_Int lLevelDouble, //< reference to unscaled quantized level 2826 UInt uiMaxAbsLevel, //< scaled quantized level 2827 UShort ui16CtxNumSig, //< current ctxInc for coeff_abs_significant_flag 2828 UShort ui16CtxNumOne, //< current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC) 2829 UShort ui16CtxNumAbs, //< current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC) 2830 UShort ui16AbsGoRice, //< current Rice parameter for coeff_abs_level_minus3 2831 UInt c1Idx, //< 2832 UInt c2Idx, //< 2833 Int iQBits, //< quantization step size 2834 Double errorScale, //< 2835 Bool bLast, //< indicates if the coefficient is the last significant 2836 Bool useLimitedPrefixLength, //< 2837 const Int maxLog2TrDynamicRange //< 2838 ) const 2839 { 2840 Double dCurrCostSig = 0; 2123 2841 UInt uiBestAbsLevel = 0; 2124 2842 2125 2843 if( !bLast && uiMaxAbsLevel < 3 ) 2126 2844 { 2127 rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig ); 2845 rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig ); 2128 2846 rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig; 2129 2847 if( uiMaxAbsLevel == 0 ) … … 2145 2863 for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- ) 2146 2864 { 2147 Double dErr = Double( lLevelDouble - ( uiAbsLevel<< iQBits ) );2148 Double dCurrCost = dErr * dErr * dTemp + xGetICost(xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx ));2865 Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) ); 2866 Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, maxLog2TrDynamicRange ) ); 2149 2867 dCurrCost += dCurrCostSig; 2150 2868 … … 2165 2883 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC) 2166 2884 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3 2885 * \param c1Idx 2886 * \param c2Idx 2887 * \param useLimitedPrefixLength 2888 * \param maxLog2TrDynamicRange 2167 2889 * \returns cost of given absolute transform level 2168 2890 */ 2169 __inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel, 2170 UShort ui16CtxNumOne, 2171 UShort ui16CtxNumAbs, 2172 UShort ui16AbsGoRice 2173 , UInt c1Idx, 2174 UInt c2Idx 2891 __inline Int TComTrQuant::xGetICRate ( const UInt uiAbsLevel, 2892 const UShort ui16CtxNumOne, 2893 const UShort ui16CtxNumAbs, 2894 const UShort ui16AbsGoRice, 2895 const UInt c1Idx, 2896 const UInt c2Idx, 2897 const Bool useLimitedPrefixLength, 2898 const Int maxLog2TrDynamicRange 2175 2899 ) const 2176 2900 { 2177 Int iRate = Int(xGetIEPRate());2178 UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;2901 Int iRate = Int(xGetIEPRate()); // cost of sign bit 2902 UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1; 2179 2903 2180 2904 if ( uiAbsLevel >= baseLevel ) 2181 { 2905 { 2182 2906 UInt symbol = uiAbsLevel - baseLevel; 2183 2907 UInt length; … … 2187 2911 iRate += (length+1+ui16AbsGoRice)<< 15; 2188 2912 } 2913 else if (useLimitedPrefixLength) 2914 { 2915 const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange)); 2916 2917 UInt prefixLength = 0; 2918 UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION; 2919 2920 while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2))) 2921 { 2922 prefixLength++; 2923 } 2924 2925 const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ui16AbsGoRice) : (prefixLength + 1/*separator*/); 2926 2927 iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15; 2928 } 2189 2929 else 2190 2930 { … … 2193 2933 while (symbol >= (1<<length)) 2194 2934 { 2195 symbol -= (1<<(length++)); 2935 symbol -= (1<<(length++)); 2196 2936 } 2197 2937 iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15; 2198 2938 } 2939 2199 2940 if (c1Idx < C1FLAG_NUMBER) 2200 2941 { … … 2207 2948 } 2208 2949 } 2209 else 2210 if( uiAbsLevel == 1 ) 2950 else if( uiAbsLevel == 1 ) 2211 2951 { 2212 2952 iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ]; … … 2221 2961 iRate = 0; 2222 2962 } 2223 return iRate; 2963 2964 return iRate; 2224 2965 } 2225 2966 … … 2233 2974 * \param uiPosX X coordinate of the last significant coefficient 2234 2975 * \param uiPosY Y coordinate of the last significant coefficient 2976 * \param component colour component ID 2235 2977 * \returns cost of last significant coefficient 2236 2978 */ … … 2239 2981 */ 2240 2982 __inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX, 2241 const UInt uiPosY ) const 2983 const UInt uiPosY, 2984 const ComponentID component ) const 2242 2985 { 2243 2986 UInt uiCtxX = g_uiGroupIdx[uiPosX]; 2244 2987 UInt uiCtxY = g_uiGroupIdx[uiPosY]; 2245 Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ]; 2988 2989 Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ]; 2990 2246 2991 if( uiCtxX > 3 ) 2247 2992 { … … 2255 3000 } 2256 3001 2257 /** Calculates the cost for specific absolute transform level2258 * \param uiAbsLevel scaled quantized level2259 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)2260 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)2261 * \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater22262 * \returns cost of given absolute transform level2263 */2264 3002 __inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance, 2265 3003 UShort ui16CtxNumSig ) const … … 2287 3025 /** Context derivation process of coeff_abs_significant_flag 2288 3026 * \param uiSigCoeffGroupFlag significance map of L1 2289 * \param uiBlkX column of current scan position 2290 * \param uiBlkY row of current scan position 2291 * \param uiLog2BlkSize log2 value of block size 3027 * \param uiCGPosX column of current scan position 3028 * \param uiCGPosY row of current scan position 3029 * \param widthInGroups width of the block 3030 * \param heightInGroups height of the block 2292 3031 * \returns ctxInc for current scan position 2293 3032 */ 2294 UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag, 2295 const UInt uiCGPosX, 2296 const UInt uiCGPosY, 2297 Int width, Int height) 2298 { 2299 UInt uiRight = 0; 2300 UInt uiLower = 0; 2301 2302 width >>= 2; 2303 height >>= 2; 2304 if( uiCGPosX < width - 1 ) 2305 { 2306 uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0); 2307 } 2308 if (uiCGPosY < height - 1 ) 2309 { 2310 uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0); 2311 } 2312 return (uiRight || uiLower); 2313 2314 } 3033 UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag, 3034 const UInt uiCGPosX, 3035 const UInt uiCGPosY, 3036 const UInt widthInGroups, 3037 const UInt heightInGroups) 3038 { 3039 UInt sigRight = 0; 3040 UInt sigLower = 0; 3041 3042 if (uiCGPosX < (widthInGroups - 1)) 3043 { 3044 sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0); 3045 } 3046 if (uiCGPosY < (heightInGroups - 1)) 3047 { 3048 sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0); 3049 } 3050 3051 return ((sigRight + sigLower) != 0) ? 1 : 0; 3052 } 3053 3054 2315 3055 /** set quantized matrix coefficient for encode 2316 * \param scalingList quantaized matrix address 3056 * \param scalingList quantized matrix address 3057 * \param format chroma format 3058 * \param maxLog2TrDynamicRange 3059 * \param bitDepths reference to bit depth array for all channels 2317 3060 */ 2318 Void TComTrQuant::setScalingList(TComScalingList *scalingList )2319 { 2320 UInt size,list;2321 UInt qp;2322 2323 for( size=0;size<SCALING_LIST_SIZE_NUM;size++)2324 { 2325 for( list = 0; list < g_scalingListNum[size]; list++)2326 { 2327 for( qp=0;qp<SCALING_LIST_REM_NUM;qp++)3061 Void TComTrQuant::setScalingList(TComScalingList *scalingList, const Int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths) 3062 { 3063 const Int minimumQp = 0; 3064 const Int maximumQp = SCALING_LIST_REM_NUM; 3065 3066 for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++) 3067 { 3068 for(UInt list = 0; list < SCALING_LIST_NUM; list++) 3069 { 3070 for(Int qp = minimumQp; qp < maximumQp; qp++) 2328 3071 { 2329 3072 xSetScalingListEnc(scalingList,list,size,qp); 3073 xSetScalingListDec(*scalingList,list,size,qp); 3074 setErrScaleCoeff(list,size,qp,maxLog2TrDynamicRange, bitDepths); 3075 } 3076 } 3077 } 3078 } 3079 /** set quantized matrix coefficient for decode 3080 * \param scalingList quantized matrix address 3081 * \param format chroma format 3082 */ 3083 Void TComTrQuant::setScalingListDec(const TComScalingList &scalingList) 3084 { 3085 const Int minimumQp = 0; 3086 const Int maximumQp = SCALING_LIST_REM_NUM; 3087 3088 for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++) 3089 { 3090 for(UInt list = 0; list < SCALING_LIST_NUM; list++) 3091 { 3092 for(Int qp = minimumQp; qp < maximumQp; qp++) 3093 { 2330 3094 xSetScalingListDec(scalingList,list,size,qp); 2331 setErrScaleCoeff(list,size,qp); 2332 } 2333 } 2334 } 2335 } 2336 /** set quantized matrix coefficient for decode 2337 * \param scalingList quantaized matrix address 3095 } 3096 } 3097 } 3098 } 3099 /** set error scale coefficients 3100 * \param list list ID 3101 * \param size 3102 * \param qp quantization parameter 3103 * \param maxLog2TrDynamicRange 3104 * \param bitDepths reference to bit depth array for all channels 2338 3105 */ 2339 Void TComTrQuant::setScalingListDec(TComScalingList *scalingList) 2340 { 2341 UInt size,list; 2342 UInt qp; 2343 2344 for(size=0;size<SCALING_LIST_SIZE_NUM;size++) 2345 { 2346 for(list = 0; list < g_scalingListNum[size]; list++) 2347 { 2348 for(qp=0;qp<SCALING_LIST_REM_NUM;qp++) 2349 { 2350 xSetScalingListDec(scalingList,list,size,qp); 2351 } 2352 } 2353 } 2354 } 2355 /** set error scale coefficients 2356 * \param list List ID 2357 * \param uiSize Size 2358 * \param uiQP Quantization parameter 2359 */ 2360 Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp) 2361 { 2362 2363 UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2; 2364 Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY; 2365 Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform 3106 Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp, const Int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths) 3107 { 3108 const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2; 3109 const ChannelType channelType = ((list == 0) || (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA; 3110 3111 const Int channelBitDepth = bitDepths.recon[channelType]; 3112 const Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange[channelType]); // Represents scaling through forward transform 2366 3113 2367 3114 UInt i,uiMaxNumCoeff = g_scalingListSize[size]; … … 2371 3118 pdErrScale = getErrScaleCoeff(list, size, qp); 2372 3119 2373 Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function 2374 dErrScale = dErrScale*pow(2.0,-2.0*iTransformShift); // Compensate for scaling through forward transform 3120 Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function 3121 dErrScale = dErrScale*pow(2.0,(-2.0*iTransformShift)); // Compensate for scaling through forward transform 3122 2375 3123 for(i=0;i<uiMaxNumCoeff;i++) 2376 3124 { 2377 pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8))); 2378 } 3125 pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepths.recon[channelType] - 8))); 3126 } 3127 3128 getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepths.recon[channelType] - 8))); 2379 3129 } 2380 3130 2381 3131 /** set quantized matrix coefficient for encode 3132 * \param scalingList quantized matrix address 3133 * \param listId List index 3134 * \param sizeId size index 3135 * \param qp Quantization parameter 3136 * \param format chroma format 3137 */ 3138 Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp) 3139 { 3140 UInt width = g_scalingListSizeX[sizeId]; 3141 UInt height = g_scalingListSizeX[sizeId]; 3142 UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]); 3143 Int *quantcoeff; 3144 Int *coeff = scalingList->getScalingListAddress(sizeId,listId); 3145 quantcoeff = getQuantCoeff(listId, qp, sizeId); 3146 3147 Int quantScales = g_quantScales[qp]; 3148 3149 processScalingListEnc(coeff, 3150 quantcoeff, 3151 (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE), 3152 height, width, ratio, 3153 min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]), 3154 scalingList->getScalingListDC(sizeId,listId)); 3155 } 3156 3157 /** set quantized matrix coefficient for decode 2382 3158 * \param scalingList quantaized matrix address 2383 3159 * \param listId List index 2384 3160 * \param sizeId size index 2385 * \param uiQP Quantization parameter 3161 * \param qp Quantization parameter 3162 * \param format chroma format 2386 3163 */ 2387 Void TComTrQuant::xSetScalingList Enc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)2388 { 2389 UInt width = g_scalingListSizeX[sizeId];3164 Void TComTrQuant::xSetScalingListDec(const TComScalingList &scalingList, UInt listId, UInt sizeId, Int qp) 3165 { 3166 UInt width = g_scalingListSizeX[sizeId]; 2390 3167 UInt height = g_scalingListSizeX[sizeId]; 2391 UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]); 2392 Int *quantcoeff; 2393 Int *coeff = scalingList->getScalingListAddress(sizeId,listId); 2394 quantcoeff = getQuantCoeff(listId, qp, sizeId); 2395 2396 processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId)); 2397 } 2398 /** set quantized matrix coefficient for decode 2399 * \param scalingList quantaized matrix address 2400 * \param list List index 2401 * \param size size index 2402 * \param uiQP Quantization parameter 2403 */ 2404 Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp) 2405 { 2406 UInt width = g_scalingListSizeX[sizeId]; 2407 UInt height = g_scalingListSizeX[sizeId]; 2408 UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]); 3168 UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]); 2409 3169 Int *dequantcoeff; 2410 Int *coeff = scalingList->getScalingListAddress(sizeId,listId);3170 const Int *coeff = scalingList.getScalingListAddress(sizeId,listId); 2411 3171 2412 3172 dequantcoeff = getDequantCoeff(listId, qp, sizeId); 2413 processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId)); 3173 3174 Int invQuantScale = g_invQuantScales[qp]; 3175 3176 processScalingListDec(coeff, 3177 dequantcoeff, 3178 invQuantScale, 3179 height, width, ratio, 3180 min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]), 3181 scalingList.getScalingListDC(sizeId,listId)); 2414 3182 } 2415 3183 2416 3184 /** set flat matrix value to quantized coefficient 2417 3185 */ 2418 Void TComTrQuant::setFlatScalingList( )2419 { 2420 UInt size,list;2421 UInt qp;2422 2423 for( size=0;size<SCALING_LIST_SIZE_NUM;size++)2424 { 2425 for( list = 0; list < g_scalingListNum[size]; list++)2426 { 2427 for( qp=0;qp<SCALING_LIST_REM_NUM;qp++)3186 Void TComTrQuant::setFlatScalingList(const Int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths) 3187 { 3188 const Int minimumQp = 0; 3189 const Int maximumQp = SCALING_LIST_REM_NUM; 3190 3191 for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++) 3192 { 3193 for(UInt list = 0; list < SCALING_LIST_NUM; list++) 3194 { 3195 for(Int qp = minimumQp; qp < maximumQp; qp++) 2428 3196 { 2429 3197 xsetFlatScalingList(list,size,qp); 2430 setErrScaleCoeff(list,size,qp );3198 setErrScaleCoeff(list,size,qp,maxLog2TrDynamicRange, bitDepths); 2431 3199 } 2432 3200 } … … 2436 3204 /** set flat matrix value to quantized coefficient 2437 3205 * \param list List ID 2438 * \param uiQP Quantization parameter 2439 * \param uiSize Size 3206 * \param size size index 3207 * \param qp Quantization parameter 3208 * \param format chroma format 2440 3209 */ 2441 Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)3210 Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp) 2442 3211 { 2443 3212 UInt i,num = g_scalingListSize[size]; 2444 3213 Int *quantcoeff; 2445 3214 Int *dequantcoeff; 2446 Int quantScales = g_quantScales[qp]; 2447 Int invQuantScales = g_invQuantScales[qp]<<4; 3215 3216 Int quantScales = g_quantScales [qp]; 3217 Int invQuantScales = g_invQuantScales[qp] << 4; 2448 3218 2449 3219 quantcoeff = getQuantCoeff(list, qp, size); … … 2451 3221 2452 3222 for(i=0;i<num;i++) 2453 { 3223 { 2454 3224 *quantcoeff++ = quantScales; 2455 3225 *dequantcoeff++ = invQuantScales; … … 2469 3239 Void TComTrQuant::processScalingListEnc( Int *coeff, Int *quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc) 2470 3240 { 2471 Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT2472 Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT2473 3241 for(UInt j=0;j<height;j++) 2474 3242 { 2475 3243 for(UInt i=0;i<width;i++) 2476 3244 { 2477 quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio]; 2478 } 2479 } 3245 quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j / ratio) + i / ratio]; 3246 } 3247 } 3248 2480 3249 if(ratio > 1) 2481 3250 { … … 2483 3252 } 2484 3253 } 3254 2485 3255 /** set quantized matrix coefficient for decode 2486 3256 * \param coeff quantaized matrix address … … 2493 3263 * \param dc dc parameter 2494 3264 */ 2495 Void TComTrQuant::processScalingListDec( Int *coeff, Int *dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)3265 Void TComTrQuant::processScalingListDec( const Int *coeff, Int *dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc) 2496 3266 { 2497 3267 for(UInt j=0;j<height;j++) … … 2502 3272 } 2503 3273 } 3274 2504 3275 if(ratio > 1) 2505 3276 { … … 2514 3285 for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++) 2515 3286 { 2516 for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)2517 { 2518 for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)2519 { 2520 m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];2521 m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];3287 for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++) 3288 { 3289 for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++) 3290 { 3291 m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]]; 3292 m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]]; 2522 3293 m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]]; 2523 } 2524 } 2525 } 2526 // alias list [1] as [3]. 2527 for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++) 2528 { 2529 m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp]; 2530 m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp]; 2531 m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp]; 2532 } 2533 } 3294 } // listID loop 3295 } 3296 } 3297 } 3298 2534 3299 /** destroy quantization matrix array 2535 3300 */ … … 2538 3303 for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++) 2539 3304 { 2540 for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)3305 for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++) 2541 3306 { 2542 3307 for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++) 2543 3308 { 2544 if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp]; 2545 if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp]; 2546 if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp]; 2547 } 2548 } 3309 if(m_quantCoef[sizeId][listId][qp]) 3310 { 3311 delete [] m_quantCoef[sizeId][listId][qp]; 3312 } 3313 if(m_dequantCoef[sizeId][listId][qp]) 3314 { 3315 delete [] m_dequantCoef[sizeId][listId][qp]; 3316 } 3317 if(m_errScale[sizeId][listId][qp]) 3318 { 3319 delete [] m_errScale[sizeId][listId][qp]; 3320 } 3321 } 3322 } 3323 } 3324 } 3325 3326 Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const TCoeff resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint) 3327 { 3328 TComDataCU *pcCU = rTu.getCU(); 3329 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 3330 const TComRectangle &rect = rTu.getRect(compID); 3331 const UInt uiWidth = rect.width; 3332 const UInt uiHeight = rect.height; 3333 const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)); 3334 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 3335 const Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(compID), maxLog2TrDynamicRange); 3336 const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID); 3337 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true); 3338 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem]; 3339 3340 assert( scalingListType < SCALING_LIST_NUM ); 3341 const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) ); 3342 3343 3344 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be 3345 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the 3346 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) 3347 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result 3348 */ 3349 3350 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; 3351 // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset 3352 3353 const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9); 3354 3355 TCoeff transformedCoefficient; 3356 3357 // transform-skip 3358 if (iTransformShift >= 0) 3359 { 3360 transformedCoefficient = resiDiff << iTransformShift; 3361 } 3362 else // for very high bit depths 3363 { 3364 const Int iTrShiftNeg = -iTransformShift; 3365 const Int offset = 1 << (iTrShiftNeg - 1); 3366 transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg; 3367 } 3368 3369 // quantization 3370 const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1); 3371 3372 const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient; 3373 3374 const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient; 3375 3376 const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign; 3377 3378 const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); 3379 const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; 3380 pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient ); 3381 } 3382 3383 3384 Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos ) 3385 { 3386 TComDataCU *pcCU = rTu.getCU(); 3387 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(); 3388 const TComRectangle &rect = rTu.getRect(compID); 3389 const UInt uiWidth = rect.width; 3390 const UInt uiHeight = rect.height; 3391 const Int QP_per = cQP.per; 3392 const Int QP_rem = cQP.rem; 3393 const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)); 3394 #if O0043_BEST_EFFORT_DECODING 3395 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID)); 3396 #else 3397 const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID)); 3398 #endif 3399 const Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(compID), maxLog2TrDynamicRange); 3400 const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID); 3401 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true); 3402 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID); 3403 3404 assert( scalingListType < SCALING_LIST_NUM ); 3405 3406 const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0); 3407 3408 const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange); 3409 const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1; 3410 3411 // Dequantisation 3412 3413 TCoeff dequantisedSample; 3414 3415 if(enableScalingLists) 3416 { 3417 const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS; 3418 const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits)); 3419 3420 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1)); 3421 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1; 3422 3423 Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2); 3424 3425 if(rightShift > 0) 3426 { 3427 const Intermediate_Int iAdd = 1 << (rightShift - 1); 3428 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample)); 3429 const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift; 3430 3431 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 3432 } 3433 else 3434 { 3435 const Int leftShift = -rightShift; 3436 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample)); 3437 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift; 3438 3439 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 3440 } 3441 } 3442 else 3443 { 3444 const Int scale = g_invQuantScales[QP_rem]; 3445 const Int scaleBits = (IQUANT_SHIFT + 1) ; 3446 3447 const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits)); 3448 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1)); 3449 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1; 3450 3451 if (rightShift > 0) 3452 { 3453 const Intermediate_Int iAdd = 1 << (rightShift - 1); 3454 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample)); 3455 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift; 3456 3457 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 3458 } 3459 else 3460 { 3461 const Int leftShift = -rightShift; 3462 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample)); 3463 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift; 3464 3465 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ)); 3466 } 3467 } 3468 3469 // Inverse transform-skip 3470 3471 if (iTransformShift >= 0) 3472 { 3473 const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1)); 3474 reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift); 3475 } 3476 else //for very high bit depths 3477 { 3478 const Int iTrShiftNeg = -iTransformShift; 3479 reconSample = Pel(dequantisedSample << iTrShiftNeg); 3480 } 3481 } 3482 3483 3484 Void TComTrQuant::crossComponentPrediction( TComTU & rTu, 3485 const ComponentID compID, 3486 const Pel * piResiL, 3487 const Pel * piResiC, 3488 Pel * piResiT, 3489 const Int width, 3490 const Int height, 3491 const Int strideL, 3492 const Int strideC, 3493 const Int strideT, 3494 const Bool reverse ) 3495 { 3496 const Pel *pResiL = piResiL; 3497 const Pel *pResiC = piResiC; 3498 Pel *pResiT = piResiT; 3499 3500 TComDataCU *pCU = rTu.getCU(); 3501 const Int alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID ); 3502 const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth(); 3503 3504 for( Int y = 0; y < height; y++ ) 3505 { 3506 if (reverse) 3507 { 3508 // A constraint is to be added to the HEVC Standard to limit the size of pResiL and pResiC at this point. 3509 // The likely form of the constraint is to either restrict the values to CoeffMin to CoeffMax, 3510 // or to be representable in a bitDepthY+4 or bitDepthC+4 signed integer. 3511 // The result of the constraint is that for 8/10/12bit profiles, the input values 3512 // can be represented within a 16-bit Pel-type. 3513 #if RExt__HIGH_BIT_DEPTH_SUPPORT 3514 for( Int x = 0; x < width; x++ ) 3515 { 3516 pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3); 3517 } 3518 #else 3519 const Int minPel=std::numeric_limits<Pel>::min(); 3520 const Int maxPel=std::numeric_limits<Pel>::max(); 3521 for( Int x = 0; x < width; x++ ) 3522 { 3523 pResiT[x] = Clip3<Int>(minPel, maxPel, pResiC[x] + (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3)); 3524 } 3525 #endif 3526 } 3527 else 3528 { 3529 // Forward does not need clipping. Pel type should always be big enough. 3530 for( Int x = 0; x < width; x++ ) 3531 { 3532 pResiT[x] = pResiC[x] - (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3); 3533 } 3534 } 3535 3536 pResiL += strideL; 3537 pResiC += strideC; 3538 pResiT += strideT; 2549 3539 } 2550 3540 }
Note: See TracChangeset for help on using the changeset viewer.