Changeset 56 in 3DVCSoftware for trunk/source/Lib/TLibEncoder/TEncAdaptiveLoopFilter.cpp
- Timestamp:
- 11 May 2012, 21:20:17 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/source/Lib/TLibEncoder/TEncAdaptiveLoopFilter.cpp
r5 r56 2 2 * License, included below. This software may be subject to other third party 3 3 * and contributor rights, including patent rights, and no such rights are 4 * granted under this license. 4 * granted under this license. 5 5 * 6 * Copyright (c) 2010-201 1,ISO/IEC6 * Copyright (c) 2010-2012, ITU/ISO/IEC 7 7 * All rights reserved. 8 8 * … … 15 15 * this list of conditions and the following disclaimer in the documentation 16 16 * and/or other materials provided with the distribution. 17 * * Neither the name of the I SO/IEC nor the names of its contributors may17 * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may 18 18 * be used to endorse or promote products derived from this software without 19 19 * specific prior written permission. … … 32 32 */ 33 33 34 35 36 34 /** \file TEncAdaptiveLoopFilter.cpp 37 35 \brief estimation part of adaptive loop filter class … … 43 41 #include <math.h> 44 42 43 //! \ingroup TLibEncoder 44 //! \{ 45 45 46 // ==================================================================================================================== 46 47 // Constants 47 48 // ==================================================================================================================== 48 49 #if LCU_SYNTAX_ALF 50 #define ALF_NUM_OF_REDESIGN 1 51 #else 49 52 #define ALF_NUM_OF_REDESIGN 3 50 53 #endif 51 54 // ==================================================================================================================== 52 55 // Tables 53 56 // ==================================================================================================================== 54 55 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x9[81] = 56 { 57 0, 1, 2, 3, 4, 5, 6, 7, 8, 58 9, 10, 11, 12, 13, 14, 15, 16, 17, 59 18, 19, 20, 21, 22, 23, 24, 25, 26, 60 27, 28, 29, 30, 31, 32, 33, 34, 35, 61 36, 37, 38, 39, 40, 39, 38, 37, 36, 62 35, 34, 33, 32, 31, 30, 29, 28, 27, 63 26, 25, 24, 23, 22, 21, 20, 19, 18, 64 17, 16, 15, 14, 13, 12, 11, 10, 9, 65 8, 7, 6, 5, 4, 3, 2, 1, 0 57 #if LCU_SYNTAX_ALF 58 const Int TEncAdaptiveLoopFilter::m_alfNumPartsInRowTab[5] = 59 { 60 1, //level 0 61 2, //level 1 62 4, //level 2 63 8, //level 3 64 16 //level 4 66 65 }; 67 66 68 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray7x7[49] = 69 { 70 0, 1, 2, 3, 4, 5, 6, 71 7, 8, 9, 10, 11, 12, 13, 72 14, 15, 16, 17, 18, 19, 20, 73 21, 22, 23, 24, 23, 22, 21, 74 20, 19, 18, 17, 16, 15, 14, 75 13, 12, 11, 10, 9, 8, 7, 76 6, 5, 4, 3, 2, 1, 0, 67 const Int TEncAdaptiveLoopFilter::m_alfNumPartsLevelTab[5] = 68 { 69 1, //level 0 70 4, //level 1 71 16, //level 2 72 64, //level 3 73 256 //level 4 77 74 }; 78 75 79 const Int TEncAdaptiveLoopFilter::m_a iSymmetricArray5x5[25] =80 { 81 0, 1, 2, 3, 4,82 5, 6, 7, 8, 9,83 10, 11, 12, 11, 10,84 9, 8, 7, 6, 5,85 4, 3, 2, 1, 0,76 const Int TEncAdaptiveLoopFilter::m_alfNumCulPartsLevelTab[5] = 77 { 78 1, //level 0 79 5, //level 1 80 21, //level 2 81 85, //level 3 82 341, //level 4 86 83 }; 87 88 #if TI_ALF_MAX_VSIZE_7 89 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x7[63] = 90 { 91 0, 1, 2, 3, 4, 5, 6, 7, 8, 92 9, 10, 11, 12, 13, 14, 15, 16, 17, 93 18, 19, 20, 21, 22, 23, 24, 25, 26, 94 27, 28, 29, 30, 31, 30, 29, 28, 27, 95 26, 25, 24, 23, 22, 21, 20, 19, 18, 96 17, 16, 15, 14, 13, 12, 11, 10, 9, 97 8, 7, 6, 5, 4, 3, 2, 1, 0 98 }; 99 #endif 100 101 #if MQT_ALF_NPASS 102 #if TI_ALF_MAX_VSIZE_7 103 Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[21] = 104 #else 105 Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[22] = 106 #endif 107 { 108 #if TI_ALF_MAX_VSIZE_7 109 0, 1, 2, 110 3, 4, 5, 6, 7, 111 8, 9, 10, 11, 12, 13, 14, 112 15, 16, 17, 18, 19, 20 113 #else 114 0, 115 1, 2, 3, 116 4, 5, 6, 7, 8, 117 9, 10, 11, 12, 13, 14, 15, 118 16, 17, 18, 19, 20, 21 119 #endif 120 }; 121 122 Int TEncAdaptiveLoopFilter::m_aiTapPos7x7_In9x9Sym[14] = 123 { 124 #if TI_ALF_MAX_VSIZE_7 125 1, 126 4, 5, 6, 127 9, 10, 11, 12, 13, 128 16, 17, 18, 19, 20 129 130 #else 131 132 2, 133 5, 6, 7, 134 10, 11, 12, 13, 14, 135 17, 18, 19, 20, 21 136 #endif 137 }; 138 139 Int TEncAdaptiveLoopFilter::m_aiTapPos5x5_In9x9Sym[8] = 140 { 141 142 #if TI_ALF_MAX_VSIZE_7 143 5, 144 10, 11, 12, 145 17, 18, 19, 20 146 #else 147 6, 148 11, 12, 13, 149 18, 19, 20, 21 150 151 #endif 152 153 }; 154 155 Int* TEncAdaptiveLoopFilter::m_iTapPosTabIn9x9Sym[NO_TEST_FILT] = 156 { 157 m_aiTapPos9x9_In9x9Sym, m_aiTapPos7x7_In9x9Sym, m_aiTapPos5x5_In9x9Sym 158 }; 159 #endif 160 84 #endif 161 85 // ==================================================================================================================== 162 86 // Constructor / destructor 163 87 // ==================================================================================================================== 164 88 89 #if LCU_SYNTAX_ALF 90 ///AlfCorrData 91 AlfCorrData::AlfCorrData() 92 { 93 this->componentID = -1; 94 this->ECorr = NULL; 95 this->yCorr = NULL; 96 this->pixAcc = NULL; 97 } 98 99 AlfCorrData::AlfCorrData(Int cIdx) 100 { 101 const Int numCoef = ALF_MAX_NUM_COEF; 102 const Int maxNumGroups = NO_VAR_BINS; 103 104 Int numGroups = (cIdx == ALF_Y)?(maxNumGroups):(1); 105 106 this->componentID = cIdx; 107 108 this->ECorr = new Double**[numGroups]; 109 this->yCorr = new Double*[numGroups]; 110 this->pixAcc = new Double[numGroups]; 111 for(Int g= 0; g< numGroups; g++) 112 { 113 this->yCorr[g] = new Double[numCoef]; 114 for(Int j=0; j< numCoef; j++) 115 { 116 this->yCorr[g][j] = 0; 117 } 118 119 this->ECorr[g] = new Double*[numCoef]; 120 for(Int i=0; i< numCoef; i++) 121 { 122 this->ECorr[g][i] = new Double[numCoef]; 123 for(Int j=0; j< numCoef; j++) 124 { 125 this->ECorr[g][i][j] = 0; 126 } 127 } 128 this->pixAcc[g] = 0; 129 } 130 } 131 132 AlfCorrData::~AlfCorrData() 133 { 134 if(this->componentID >=0) 135 { 136 const Int numCoef = ALF_MAX_NUM_COEF; 137 const Int maxNumGroups = NO_VAR_BINS; 138 139 Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1); 140 141 for(Int g= 0; g< numGroups; g++) 142 { 143 for(Int i=0; i< numCoef; i++) 144 { 145 delete[] this->ECorr[g][i]; 146 } 147 delete[] this->ECorr[g]; 148 delete[] this->yCorr[g]; 149 } 150 delete[] this->ECorr; 151 delete[] this->yCorr; 152 delete[] this->pixAcc; 153 } 154 155 } 156 157 AlfCorrData& AlfCorrData::operator += (const AlfCorrData& src) 158 { 159 if(this->componentID >=0) 160 { 161 const Int numCoef = ALF_MAX_NUM_COEF; 162 const Int maxNumGroups = NO_VAR_BINS; 163 164 Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1); 165 for(Int g=0; g< numGroups; g++) 166 { 167 this->pixAcc[g] += src.pixAcc[g]; 168 169 for(Int j=0; j< numCoef; j++) 170 { 171 this->yCorr[g][j] += src.yCorr[g][j]; 172 for(Int i=0; i< numCoef; i++) 173 { 174 this->ECorr[g][j][i] += src.ECorr[g][j][i]; 175 } 176 } 177 } 178 } 179 180 return *this; 181 } 182 183 184 Void AlfCorrData::reset() 185 { 186 if(this->componentID >=0) 187 { 188 const Int numCoef = ALF_MAX_NUM_COEF; 189 const Int maxNumGroups = NO_VAR_BINS; 190 191 Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1); 192 for(Int g=0; g< numGroups; g++) 193 { 194 this->pixAcc[g] = 0; 195 196 for(Int j=0; j< numCoef; j++) 197 { 198 this->yCorr[g][j] = 0; 199 for(Int i=0; i< numCoef; i++) 200 { 201 this->ECorr[g][j][i] = 0; 202 } 203 } 204 205 206 } 207 } 208 209 } 210 211 Void AlfCorrData::mergeFrom(const AlfCorrData& src, Int* mergeTable, Bool doPixAccMerge) 212 { 213 assert(componentID == src.componentID); 214 215 reset(); 216 217 const Int numCoef = ALF_MAX_NUM_COEF; 218 219 Double **srcE, **dstE; 220 Double *srcy, *dsty; 221 222 switch(componentID) 223 { 224 case ALF_Cb: 225 case ALF_Cr: 226 { 227 srcE = src.ECorr [0]; 228 dstE = this->ECorr[0]; 229 230 srcy = src.yCorr[0]; 231 dsty = this->yCorr[0]; 232 233 for(Int j=0; j< numCoef; j++) 234 { 235 for(Int i=0; i< numCoef; i++) 236 { 237 dstE[j][i] += srcE[j][i]; 238 } 239 240 dsty[j] += srcy[j]; 241 } 242 if(doPixAccMerge) 243 { 244 this->pixAcc[0] = src.pixAcc[0]; 245 } 246 } 247 break; 248 case ALF_Y: 249 { 250 Int maxFilterSetSize = (Int)NO_VAR_BINS; 251 for (Int varInd=0; varInd< maxFilterSetSize; varInd++) 252 { 253 Int filtIdx = (mergeTable == NULL)?(0):(mergeTable[varInd]); 254 srcE = src.ECorr [varInd]; 255 dstE = this->ECorr[ filtIdx ]; 256 srcy = src.yCorr[varInd]; 257 dsty = this->yCorr[ filtIdx ]; 258 for(Int j=0; j< numCoef; j++) 259 { 260 for(Int i=0; i< numCoef; i++) 261 { 262 dstE[j][i] += srcE[j][i]; 263 } 264 dsty[j] += srcy[j]; 265 } 266 if(doPixAccMerge) 267 { 268 this->pixAcc[filtIdx] += src.pixAcc[varInd]; 269 } 270 } 271 } 272 break; 273 default: 274 { 275 printf("not a legal component ID\n"); 276 assert(0); 277 exit(-1); 278 } 279 } 280 } 281 282 ///AlfPicQTPart 283 AlfPicQTPart::AlfPicQTPart() 284 { 285 componentID = -1; 286 alfUnitParam = NULL; 287 alfCorr = NULL; 288 } 289 290 AlfPicQTPart::~AlfPicQTPart() 291 { 292 if(alfUnitParam != NULL) 293 { 294 if(alfUnitParam->alfFiltParam != NULL) 295 { 296 delete alfUnitParam->alfFiltParam; 297 alfUnitParam->alfFiltParam = NULL; 298 } 299 delete alfUnitParam; 300 alfUnitParam = NULL; 301 } 302 if(alfCorr != NULL) 303 { 304 delete alfCorr; 305 alfCorr = NULL; 306 } 307 } 308 309 AlfPicQTPart& AlfPicQTPart::operator= (const AlfPicQTPart& src) 310 { 311 componentID = src.componentID; 312 partCUXS = src.partCUXS; 313 partCUYS = src.partCUYS; 314 partCUXE = src.partCUXE; 315 partCUYE = src.partCUYE; 316 partIdx = src.partIdx; 317 partLevel = src.partLevel; 318 partCol = src.partCol; 319 partRow = src.partRow; 320 for(Int i=0; i<4; i++) 321 { 322 childPartIdx[i] = src.childPartIdx[i]; 323 } 324 parentPartIdx = src.parentPartIdx; 325 326 isBottomLevel = src.isBottomLevel; 327 isSplit = src.isSplit; 328 329 isProcessed = src.isProcessed; 330 splitMinCost = src.splitMinCost; 331 splitMinDist = src.splitMinDist; 332 splitMinRate = src.splitMinRate; 333 selfMinCost = src.selfMinCost; 334 selfMinDist = src.selfMinDist; 335 selfMinRate = src.selfMinRate; 336 337 numFilterBudget = src.numFilterBudget; 338 339 if(src.alfUnitParam != NULL) 340 { 341 if(alfUnitParam == NULL) 342 { 343 //create alfUnitparam 344 alfUnitParam = new AlfUnitParam; 345 alfUnitParam->alfFiltParam = new ALFParam(componentID); 346 } 347 //assign from src 348 alfUnitParam->mergeType = src.alfUnitParam->mergeType; 349 alfUnitParam->isEnabled = src.alfUnitParam->isEnabled; 350 alfUnitParam->isNewFilt = src.alfUnitParam->isNewFilt; 351 alfUnitParam->storedFiltIdx = src.alfUnitParam->storedFiltIdx; 352 *(alfUnitParam->alfFiltParam) = *(src.alfUnitParam->alfFiltParam); 353 } 354 else 355 { 356 printf("source quad-tree partition info is not complete\n"); 357 assert(0); 358 exit(-1); 359 } 360 361 if(src.alfCorr != NULL) 362 { 363 if(alfCorr == NULL) 364 { 365 alfCorr = new AlfCorrData(componentID); 366 } 367 alfCorr->reset(); 368 (*alfCorr) += (*(src.alfCorr)); 369 } 370 else 371 { 372 printf("source quad-tree partition info is not complete\n"); 373 assert(0); 374 exit(-1); 375 } 376 return *this; 377 } 378 #endif 379 380 165 381 TEncAdaptiveLoopFilter::TEncAdaptiveLoopFilter() 166 382 { 383 #if !LCU_SYNTAX_ALF 167 384 m_ppdAlfCorr = NULL; 385 m_ppdAlfCorrCb = NULL; 386 m_ppdAlfCorrCr = NULL; 168 387 m_pdDoubleAlfCoeff = NULL; 169 m_pcPic = NULL; 388 #endif 170 389 m_pcEntropyCoder = NULL; 390 #if !LCU_SYNTAX_ALF 171 391 m_pcBestAlfParam = NULL; 172 392 m_pcTempAlfParam = NULL; 393 #endif 173 394 m_pcPicYuvBest = NULL; 174 395 m_pcPicYuvTmp = NULL; 175 #if MTK_NONCROSS_INLOOP_FILTER 396 #if !LCU_SYNTAX_ALF 397 pcAlfParamShape0 = NULL; 398 pcAlfParamShape1 = NULL; 399 pcPicYuvRecShape0 = NULL; 400 pcPicYuvRecShape1 = NULL; 176 401 m_pcSliceYuvTmp = NULL; 177 402 #endif 178 #if MQT_BA_RA && MQT_ALF_NPASS 179 m_aiFilterCoeffSaved = NULL; 180 #endif 403 404 m_iALFMaxNumberFilters = NO_FILTERS; 405 406 m_bAlfCUCtrlEnabled = false; 181 407 } 182 408 … … 185 411 // ==================================================================================================================== 186 412 187 #if MQT_BA_RA && MQT_ALF_NPASS 413 #if LCU_SYNTAX_ALF 414 /** convert Level Row Col to Idx 415 * \param level, row, col 416 */ 417 Int TEncAdaptiveLoopFilter::convertLevelRowCol2Idx(Int level, Int row, Int col) 418 { 419 Int idx; 420 if (level == 0) 421 { 422 idx = 0; 423 } 424 else if (level == 1) 425 { 426 idx = 1 + row*2 + col; 427 } 428 else if (level == 2) 429 { 430 idx = 5 + row*4 + col; 431 } 432 else if (level == 3) 433 { 434 idx = 21 + row*8 + col; 435 } 436 else // (level == 4) 437 { 438 idx = 85 + row*16 + col; 439 } 440 return idx; 441 } 442 443 /** convert quadtree Idx to Level, Row, and Col 444 * \param idx, *level, *row, *col 445 */ 446 Void TEncAdaptiveLoopFilter::convertIdx2LevelRowCol(Int idx, Int *level, Int *row, Int *col) 447 { 448 if (idx == 0) 449 { 450 *level = 0; 451 *row = 0; 452 *col = 0; 453 } 454 else if (idx>=1 && idx<=4) 455 { 456 *level = 1; 457 *row = (idx-1) / 2; 458 *col = (idx-1) % 2; 459 } 460 else if (idx>=5 && idx<=20) 461 { 462 *level = 2; 463 *row = (idx-5) / 4; 464 *col = (idx-5) % 4; 465 } 466 else if (idx>=21 && idx<=84) 467 { 468 *level = 3; 469 *row = (idx-21) / 8; 470 *col = (idx-21) % 8; 471 } 472 else // (idx>=85 && idx<=340) 473 { 474 *level = 4; 475 *row = (idx-85) / 16; 476 *col = (idx-85) % 16; 477 } 478 } 479 480 /** Initial picture quad-tree 481 * \param [in] isPicBasedEncode picture quad-tree encoding is enabled or disabled 482 */ 483 Void TEncAdaptiveLoopFilter::initPicQuadTreePartition(Bool isPicBasedEncode) 484 { 485 if (!isPicBasedEncode) 486 { 487 return; 488 } 489 490 Int maxDepthInWidth = (Int)(logf((float)(m_numLCUInPicWidth ))/logf(2.0)); 491 Int maxDepthInHeight = (Int)(logf((float)(m_numLCUInPicHeight ))/logf(2.0)); 492 Int maxDepthInFilters = (Int)(logf((float)(m_iALFMaxNumberFilters ))/logf(2.0)); 493 m_alfPQTMaxDepth = (maxDepthInWidth > maxDepthInHeight ) ? maxDepthInHeight : maxDepthInWidth ; 494 m_alfPQTMaxDepth = (m_alfPQTMaxDepth > maxDepthInFilters) ? maxDepthInFilters : m_alfPQTMaxDepth ; 495 496 for (Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++) 497 { 498 m_alfPQTPart[compIdx] = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ]; 499 for (Int i = 0; i < m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++ ) 500 { 501 m_alfPQTPart[compIdx][i].alfCorr = new AlfCorrData(compIdx); 502 m_alfPQTPart[compIdx][i].alfUnitParam = new AlfUnitParam; 503 m_alfPQTPart[compIdx][i].alfUnitParam->alfFiltParam = new ALFParam(compIdx); 504 } 505 506 } 507 creatPQTPart(0, 0, 0, -1, 0, m_numLCUInPicWidth-1, 0, m_numLCUInPicHeight-1); 508 } 509 510 /** Reset picture quad-tree variables 511 */ 512 Void TEncAdaptiveLoopFilter::resetPQTPart() 513 { 514 Int compIdx, i; 515 516 for (compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++) 517 { 518 for (i = 0; i < m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++ ) 519 { 520 m_alfPQTPart[compIdx][i].isProcessed = false; 521 m_alfPQTPart[compIdx][i].selfMinCost = MAX_DOUBLE; 522 m_alfPQTPart[compIdx][i].splitMinCost = MAX_DOUBLE; 523 //reset correlations 524 m_alfPQTPart[compIdx][i].alfCorr->reset(); 525 //reset ALF unit param 526 m_alfPQTPart[compIdx][i].alfUnitParam->mergeType = ALF_MERGE_DISABLED; 527 m_alfPQTPart[compIdx][i].alfUnitParam->isEnabled = false; 528 m_alfPQTPart[compIdx][i].alfUnitParam->alfFiltParam->alf_flag = 0; 529 } 530 } 531 } 532 533 /** create picture quad-tree 534 * \param [in] partLevel quad-tree level 535 * \param [in] partRow row position at partLevel 536 * \param [in] partCol column position at partLevel 537 * \param [in] parentPartIdx parent partition index 538 * \param [in] partCUXS starting LCU X position 539 * \param [in] partCUXE ending LCU X position 540 * \param [in] partCUYS starting LCU Y position 541 * \param [in] partCUYE ending LCU Y position 542 */ 543 Void TEncAdaptiveLoopFilter::creatPQTPart(Int partLevel, Int partRow, Int partCol, Int parentPartIdx, Int partCUXS, Int partCUXE, Int partCUYS, Int partCUYE) 544 { 545 Int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol); 546 547 AlfPicQTPart *alfOnePartY, *alfOnePartCb, *alfOnePartCr; 548 549 alfOnePartY = &(m_alfPQTPart[ALF_Y ][partIdx]); 550 alfOnePartCb = &(m_alfPQTPart[ALF_Cb][partIdx]); 551 alfOnePartCr = &(m_alfPQTPart[ALF_Cr][partIdx]); 552 553 // Y, Cb, Cr 554 alfOnePartY->partIdx = alfOnePartCb->partIdx = alfOnePartCr->partIdx = partIdx; 555 alfOnePartY->partCol = alfOnePartCb->partCol = alfOnePartCr->partCol = partCol; 556 alfOnePartY->partRow = alfOnePartCb->partRow = alfOnePartCr->partRow = partRow; 557 alfOnePartY->partLevel = alfOnePartCb->partLevel = alfOnePartCr->partLevel = partLevel; 558 559 alfOnePartY->partCUXS = alfOnePartCb->partCUXS = alfOnePartCr->partCUXS = partCUXS; 560 alfOnePartY->partCUXE = alfOnePartCb->partCUXE = alfOnePartCr->partCUXE = partCUXE; 561 alfOnePartY->partCUYS = alfOnePartCb->partCUYS = alfOnePartCr->partCUYS = partCUYS; 562 alfOnePartY->partCUYE = alfOnePartCb->partCUYE = alfOnePartCr->partCUYE = partCUYE; 563 564 alfOnePartY->parentPartIdx = alfOnePartCb->parentPartIdx = alfOnePartCr->parentPartIdx = parentPartIdx; 565 alfOnePartY->isSplit = alfOnePartCb->isSplit = alfOnePartCr->isSplit = false; 566 567 #if LCUALF_FILTER_BUDGET_CONTROL_ENC 568 alfOnePartY->numFilterBudget = alfOnePartCb->numFilterBudget = alfOnePartCr->numFilterBudget = m_iALFMaxNumberFilters/m_alfNumPartsLevelTab[partLevel]; 569 #else 570 alfOnePartY->numFilterBudget = alfOnePartCb->numFilterBudget = alfOnePartCr->numFilterBudget = NO_VAR_BINS; 571 #endif 572 573 alfOnePartY->componentID = ALF_Y; 574 alfOnePartCb->componentID = ALF_Cb; 575 alfOnePartCr->componentID = ALF_Cr; 576 577 if (alfOnePartY->partLevel != m_alfPQTMaxDepth) 578 { 579 alfOnePartY->isBottomLevel = alfOnePartCb->isBottomLevel = alfOnePartCr->isBottomLevel = false; 580 581 Int downLevel = partLevel + 1; 582 Int downRowStart = partRow << 1; 583 Int downColStart = partCol << 1; 584 585 Int downRowIdx, downColIdx; 586 Int numCULeft, numCUTop; 587 Int downStartCUX, downStartCUY, downEndCUX, downEndCUY; 588 589 numCULeft = (partCUXE - partCUXS + 1) >> 1 ; 590 numCUTop = (partCUYE - partCUYS + 1) >> 1 ; 591 592 // ChildPart00 593 downStartCUX = partCUXS; 594 downEndCUX = downStartCUX + numCULeft - 1; 595 downStartCUY = partCUYS; 596 downEndCUY = downStartCUY + numCUTop - 1; 597 downRowIdx = downRowStart + 0; 598 downColIdx = downColStart + 0; 599 600 alfOnePartY->childPartIdx[0] = alfOnePartCb->childPartIdx[0] = alfOnePartCr->childPartIdx[0] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx); 601 creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY); 602 603 // ChildPart01 604 downStartCUX = partCUXS + numCULeft; 605 downEndCUX = partCUXE; 606 downStartCUY = partCUYS; 607 downEndCUY = downStartCUY + numCUTop - 1; 608 downRowIdx = downRowStart + 0; 609 downColIdx = downColStart + 1; 610 611 alfOnePartY->childPartIdx[1] = alfOnePartCb->childPartIdx[1] = alfOnePartCr->childPartIdx[1] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx); 612 creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY); 613 614 // ChildPart10 615 downStartCUX = partCUXS; 616 downEndCUX = downStartCUX + numCULeft - 1; 617 downStartCUY = partCUYS + numCUTop; 618 downEndCUY = partCUYE; 619 downRowIdx = downRowStart + 1; 620 downColIdx = downColStart + 0; 621 622 alfOnePartY->childPartIdx[2] = alfOnePartCb->childPartIdx[2] = alfOnePartCr->childPartIdx[2] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx); 623 creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY); 624 625 // ChildPart11 626 downStartCUX = partCUXS + numCULeft; 627 downEndCUX = partCUXE; 628 downStartCUY = partCUYS + numCUTop; 629 downEndCUY = partCUYE; 630 downRowIdx = downRowStart + 1; 631 downColIdx = downColStart + 1; 632 633 alfOnePartY->childPartIdx[3] = alfOnePartCb->childPartIdx[3] = alfOnePartCr->childPartIdx[3] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx); 634 creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY); 635 } 636 else 637 { 638 alfOnePartY->isBottomLevel = alfOnePartCb->isBottomLevel = alfOnePartCr->isBottomLevel = true; 639 640 alfOnePartY->childPartIdx[0] = alfOnePartCb->childPartIdx[0] = alfOnePartCr->childPartIdx[0] = -1; 641 alfOnePartY->childPartIdx[1] = alfOnePartCb->childPartIdx[1] = alfOnePartCr->childPartIdx[1] = -1; 642 alfOnePartY->childPartIdx[2] = alfOnePartCb->childPartIdx[2] = alfOnePartCr->childPartIdx[2] = -1; 643 alfOnePartY->childPartIdx[3] = alfOnePartCb->childPartIdx[3] = alfOnePartCr->childPartIdx[3] = -1; 644 } 645 } 646 647 /** create global buffers for ALF encoding 648 */ 649 Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers() 650 { 651 for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++) 652 { 653 m_alfPicFiltUnits[compIdx] = new AlfUnitParam[m_uiNumCUsInFrame]; 654 m_alfCorr[compIdx] = new AlfCorrData*[m_uiNumCUsInFrame]; 655 for(Int n=0; n< m_uiNumCUsInFrame; n++) 656 { 657 m_alfCorr[compIdx][n]= new AlfCorrData(compIdx); 658 m_alfCorr[compIdx][n]->reset(); 659 } 660 661 m_alfCorrMerged[compIdx] = new AlfCorrData(compIdx); 662 663 } 664 665 666 const Int numCoef = (Int)ALF_MAX_NUM_COEF; 667 668 for(Int i=0; i< (Int)NO_VAR_BINS; i++) 669 { 670 m_coeffNoFilter[i] = new Int[numCoef]; 671 } 672 673 m_numSlicesDataInOneLCU = new Int[m_uiNumCUsInFrame]; 674 675 } 676 677 /** destroy ALF global buffers 678 * This function is used to destroy the global ALF encoder buffers 679 */ 680 Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers() 681 { 682 for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++) 683 { 684 delete[] m_alfPicFiltUnits[compIdx]; 685 for(Int n=0; n< m_uiNumCUsInFrame; n++) 686 { 687 delete m_alfCorr[compIdx][n]; 688 } 689 690 delete[] m_alfCorr[compIdx]; 691 m_alfCorr[compIdx] = NULL; 692 693 delete m_alfCorrMerged[compIdx]; 694 } 695 696 //const Int numCoef = (Int)ALF_MAX_NUM_COEF; 697 698 for(Int i=0; i< (Int)NO_VAR_BINS; i++) 699 { 700 delete[] m_coeffNoFilter[i]; 701 } 702 703 delete[] m_numSlicesDataInOneLCU; 704 705 } 706 707 /** initialize ALF encoder at picture level 708 * \param [in] isAlfParamInSlice ALF parameters are coded in slice (true) or APS (false) 709 * \param [in] isPicBasedEncode picture-based encoding (true) or LCU-based encoding (false) 710 * \param [in] numSlices number of slices in current picture 711 * \param [in, out] alfParams ALF parameter set 712 * \param [in, out] alfCUCtrlParam ALF CU-on/off control parameters 713 */ 714 Void TEncAdaptiveLoopFilter::initALFEnc(Bool isAlfParamInSlice, Bool isPicBasedEncode, Int numSlices, AlfParamSet* & alfParams, std::vector<AlfCUCtrlInfo>* & alfCUCtrlParam) 715 { 716 m_picBasedALFEncode = isPicBasedEncode; 717 718 if(isAlfParamInSlice) 719 { 720 alfParams = new AlfParamSet[m_uiNumSlicesInPic]; 721 Int numLCUs = m_uiNumCUsInFrame; 722 723 for(Int s=0; s< m_uiNumSlicesInPic; s++) 724 { 725 numLCUs = (Int)(m_pcPic->getOneSliceCUDataForNDBFilter(s).size()); 726 alfParams[s].create(m_numLCUInPicWidth,m_numLCUInPicHeight, numLCUs ); 727 alfParams[s].createALFParam(); 728 } 729 alfCUCtrlParam = NULL; 730 } 731 else //ALF parameter in APS 732 { 733 alfParams = NULL; //ALF parameters are handled by APS 734 alfCUCtrlParam = new std::vector<AlfCUCtrlInfo>; 735 alfCUCtrlParam->resize(numSlices); 736 } 737 738 resetPicAlfUnit(); 739 740 if(m_picBasedALFEncode) 741 { 742 resetPQTPart(); 743 } 744 745 const Int numCoef = (Int)ALF_MAX_NUM_COEF; 746 #if LCUALF_QP_DEPENDENT_BITS 747 Int numBitShift = getAlfPrecisionBit( m_alfQP ); 748 #else 749 Int numBitShift = (Int)ALF_NUM_BIT_SHIFT; 750 #endif 751 for(Int i=0; i< (Int)NO_VAR_BINS; i++) 752 { 753 ::memset(&(m_coeffNoFilter[i][0]), 0, sizeof(Int)*numCoef); 754 m_coeffNoFilter[i][numCoef-1] = (1 << numBitShift); 755 } 756 757 } 758 759 /** Uninitialize ALF encoder at picture level 760 * \param [in, out] alfParams ALF parameter set 761 * \param [in, out] alfCUCtrlParam ALF CU-on/off control parameters 762 */ 763 Void TEncAdaptiveLoopFilter::uninitALFEnc(AlfParamSet* & alfParams, std::vector<AlfCUCtrlInfo>* & alfCUCtrlParam) 764 { 765 if(alfParams != NULL) 766 { 767 for(Int s=0; s< m_uiNumSlicesInPic; s++) 768 { 769 alfParams[s].releaseALFParam(); 770 } 771 delete[] alfParams; 772 alfParams = NULL; 773 } 774 775 if(alfCUCtrlParam != NULL) 776 { 777 delete alfCUCtrlParam; 778 alfCUCtrlParam = NULL; 779 } 780 } 781 782 /** reset ALF unit parameters in current picture 783 */ 784 Void TEncAdaptiveLoopFilter::resetPicAlfUnit() 785 { 786 for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++) 787 { 788 for(Int i=0; i< m_uiNumCUsInFrame; i++) 789 { 790 AlfUnitParam& alfUnit = m_alfPicFiltUnits[compIdx][i]; 791 alfUnit.mergeType = ALF_MERGE_DISABLED; 792 alfUnit.isEnabled = false; 793 alfUnit.isNewFilt = true; 794 alfUnit.alfFiltParam = m_alfFiltInfo[compIdx][i]; 795 796 alfUnit.alfFiltParam->alf_flag = 0; 797 } 798 } 799 } 800 801 #else 802 803 /** create ALF global buffers 804 * \param iALFEncodePassReduction 0: 16-pass encoding, 1: 1-pass encoding, 2: 2-pass encoding 805 * This function is used to create the filter buffers to perform time-delay filtering. 806 */ 188 807 Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers(Int iALFEncodePassReduction) 189 808 { 190 809 if(iALFEncodePassReduction) 191 810 { 811 Int iNumOfBuffer = m_iGOPSize +1; 812 192 813 for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++) 193 814 { 194 m_aiFilterCoeffSavedMethods[i] = new Int**[9]; 195 for(Int j=0; j< 9; j++) 196 { 815 m_mergeTableSavedMethods[i] = new Int*[iNumOfBuffer]; 816 m_aiFilterCoeffSavedMethods[i] = new Int**[iNumOfBuffer]; 817 for(Int j=0; j< iNumOfBuffer; j++) 818 { 819 m_mergeTableSavedMethods[i][j] = new Int[NO_VAR_BINS]; 197 820 m_aiFilterCoeffSavedMethods[i][j] = new Int*[NO_VAR_BINS]; 198 821 for(Int k=0; k< NO_VAR_BINS; k++) 199 822 { 200 m_aiFilterCoeffSavedMethods[i][j][k] = new Int[ MAX_SQR_FILT_LENGTH];823 m_aiFilterCoeffSavedMethods[i][j][k] = new Int[ALF_MAX_NUM_COEF]; 201 824 } 202 825 } 203 } 204 205 } 206 } 826 m_iPreviousFilterShapeMethods[i] = new Int[iNumOfBuffer]; 827 } 828 829 } 830 } 831 /** destroy ALF global buffers 832 * This function is used to destroy the filter buffers. 833 */ 207 834 208 835 Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers() … … 212 839 for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++) 213 840 { 214 for(Int j=0; j< 9; j++)841 for(Int j=0; j< m_iGOPSize+1; j++) 215 842 { 216 843 for(Int k=0; k< NO_VAR_BINS; k++) … … 219 846 } 220 847 delete[] m_aiFilterCoeffSavedMethods[i][j]; 848 delete[] m_mergeTableSavedMethods[i][j]; 221 849 } 222 850 delete[] m_aiFilterCoeffSavedMethods[i]; 223 } 224 225 } 226 227 } 228 #endif 229 851 delete[] m_iPreviousFilterShapeMethods[i]; 852 delete[] m_mergeTableSavedMethods[i]; 853 854 } 855 856 } 857 858 } 859 #endif 230 860 /** 231 861 \param pcPic picture (TComPic) pointer … … 234 864 Void TEncAdaptiveLoopFilter::startALFEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder ) 235 865 { 236 m_pcPic = pcPic;237 866 m_pcEntropyCoder = pcEntropyCoder; 238 239 m_eSliceType = pcPic->getSlice(0)->getSliceType(); 240 m_iPicNalReferenceIdc = (pcPic->getSlice(0)->isReferenced() ? 1 :0); 241 242 m_uiNumSCUInCU = m_pcPic->getNumPartInCU(); 243 867 #if !LCU_SYNTAX_ALF 244 868 xInitParam(); 869 #endif 245 870 xCreateTmpAlfCtrlFlags(); 246 871 … … 251 876 m_pcPicYuvTmp->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth); 252 877 m_pcPicYuvBest = pcPic->getPicYuvPred(); 253 878 #if !LCU_SYNTAX_ALF 254 879 m_pcBestAlfParam = new ALFParam; 255 880 m_pcTempAlfParam = new ALFParam; 256 881 allocALFParam(m_pcBestAlfParam); 257 882 allocALFParam(m_pcTempAlfParam); 258 m_im_width = iWidth; 259 m_im_height = iHeight; 260 883 pcPicYuvRecShape0 = new TComPicYuv(); 884 pcPicYuvRecShape0->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth); 885 886 pcPicYuvRecShape1 = new TComPicYuv(); 887 pcPicYuvRecShape1->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth); 888 889 pcAlfParamShape0 = new ALFParam; 890 pcAlfParamShape1 = new ALFParam; 891 892 allocALFParam(pcAlfParamShape0); 893 allocALFParam(pcAlfParamShape1); 894 261 895 // init qc_filter 262 initMatrix4D_double(&m_EGlobalSym, NO_TEST_FILT, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH); 263 initMatrix3D_double(&m_yGlobalSym, NO_TEST_FILT, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); 264 initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); 265 896 initMatrix4D_double(&m_EGlobalSym, NUM_ALF_FILTER_SHAPE+1, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH); 897 initMatrix3D_double(&m_yGlobalSym, NUM_ALF_FILTER_SHAPE+1, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); 898 #endif 899 initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, ALF_MAX_NUM_COEF); 900 #if !LCU_SYNTAX_ALF 266 901 m_pixAcc = (double *) calloc(NO_VAR_BINS, sizeof(double)); 267 #if !MQT_BA_RA 268 get_mem2Dpel(&m_varImg, m_im_height, m_im_width); 269 #endif 270 get_mem2Dpel(&m_maskImg, m_im_height, m_im_width); 271 902 #endif 903 initMatrix_Pel(&m_maskImg, m_img_height, m_img_width); 272 904 initMatrix_double(&m_E_temp, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);// 273 905 m_y_temp = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));// … … 275 907 initMatrix_double(&m_y_merged, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); // 276 908 m_pixAcc_merged = (double *) calloc(NO_VAR_BINS, sizeof(double));// 277 278 m_filterCoeffQuantMod = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));// 279 m_filterCoeff = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));// 280 m_filterCoeffQuant = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));// 281 initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);// 282 initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);// 283 909 m_filterCoeffQuantMod = (int *) calloc(ALF_MAX_NUM_COEF, sizeof(int));// 910 m_filterCoeff = (double *) calloc(ALF_MAX_NUM_COEF, sizeof(double));// 911 m_filterCoeffQuant = (int *) calloc(ALF_MAX_NUM_COEF, sizeof(int));// 912 initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, ALF_MAX_NUM_COEF);// 913 initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, ALF_MAX_NUM_COEF);// 914 915 #if LCU_SYNTAX_ALF 916 m_tempALFp = new ALFParam(ALF_Y); 917 #else 284 918 m_tempALFp = new ALFParam; 285 919 allocALFParam(m_tempALFp); 286 m_pcDummyEntropyCoder = m_pcEntropyCoder;287 288 #if MTK_NONCROSS_INLOOP_FILTER289 920 if( m_bUseNonCrossALF ) 290 921 { … … 293 924 } 294 925 #endif 295 296 297 926 } 298 927 299 928 Void TEncAdaptiveLoopFilter::endALFEnc() 300 929 { 930 #if !LCU_SYNTAX_ALF 301 931 xUninitParam(); 932 #endif 302 933 xDestroyTmpAlfCtrlFlags(); 303 934 … … 307 938 m_pcPic = NULL; 308 939 m_pcEntropyCoder = NULL; 309 940 #if !LCU_SYNTAX_ALF 310 941 freeALFParam(m_pcBestAlfParam); 311 942 freeALFParam(m_pcTempAlfParam); 312 943 delete m_pcBestAlfParam; 313 944 delete m_pcTempAlfParam; 945 946 pcPicYuvRecShape0->destroyLuma(); 947 delete pcPicYuvRecShape0; 948 pcPicYuvRecShape0 = NULL; 949 950 pcPicYuvRecShape1->destroyLuma(); 951 delete pcPicYuvRecShape1; 952 pcPicYuvRecShape1 = NULL; 953 954 freeALFParam(pcAlfParamShape0); 955 freeALFParam(pcAlfParamShape1); 956 957 delete pcAlfParamShape0; 958 delete pcAlfParamShape1; 959 314 960 // delete qc filters 315 destroyMatrix4D_double(m_EGlobalSym, NO_TEST_FILT, NO_VAR_BINS); 316 destroyMatrix3D_double(m_yGlobalSym, NO_TEST_FILT); 961 destroyMatrix4D_double(m_EGlobalSym, NUM_ALF_FILTER_SHAPE+1, NO_VAR_BINS); 962 destroyMatrix3D_double(m_yGlobalSym, NUM_ALF_FILTER_SHAPE+1); 963 #endif 317 964 destroyMatrix_int(m_filterCoeffSymQuant); 318 965 #if !LCU_SYNTAX_ALF 319 966 free(m_pixAcc); 320 #if !MQT_BA_RA 321 free_mem2Dpel(m_varImg); 322 #endif 323 free_mem2Dpel(m_maskImg); 324 967 #endif 968 destroyMatrix_Pel(m_maskImg); 325 969 destroyMatrix3D_double(m_E_merged, NO_VAR_BINS); 326 970 destroyMatrix_double(m_y_merged); … … 336 980 destroyMatrix_int(m_FilterCoeffQuantTemp); 337 981 982 #if LCU_SYNTAX_ALF 983 delete m_tempALFp; 984 #else 338 985 freeALFParam(m_tempALFp); 339 986 delete m_tempALFp; 340 341 #if MTK_NONCROSS_INLOOP_FILTER342 987 343 988 if(m_bUseNonCrossALF) … … 348 993 } 349 994 #endif 350 351 } 352 995 } 996 997 #if LCU_SYNTAX_ALF 998 999 /** Assign output ALF parameters 1000 * \param [in, out] alfParamSet ALF parameter set 1001 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters 1002 */ 1003 Void TEncAdaptiveLoopFilter::assignALFEncoderParam(AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam) 1004 { 1005 //assign CU control parameters 1006 if(m_bAlfCUCtrlEnabled) 1007 { 1008 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1009 { 1010 (*alfCtrlParam)[s]= m_vBestAlfCUCtrlParam[s]; 1011 } 1012 } 1013 1014 //assign RDO results to alfParamSet 1015 if(m_alfCoefInSlice) 1016 { 1017 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1018 { 1019 if(!m_pcPic->getValidSlice(s)) 1020 { 1021 continue; 1022 } 1023 1024 if( m_bestAlfParamSet[s].isEnabled[ALF_Y] || m_bestAlfParamSet[s].isEnabled[ALF_Cb] || m_bestAlfParamSet[s].isEnabled[ALF_Cr]) 1025 { 1026 m_bestAlfParamSet[s].isEnabled[ALF_Y] = true; 1027 } 1028 1029 copyAlfParamSet(&(alfParamSet[s]), &(m_bestAlfParamSet[s])); 1030 } 1031 } 1032 else 1033 { 1034 if( m_bestAlfParamSet->isEnabled[ALF_Y] || m_bestAlfParamSet->isEnabled[ALF_Cb] || m_bestAlfParamSet->isEnabled[ALF_Cr]) 1035 { 1036 m_bestAlfParamSet->isEnabled[ALF_Y] = true; 1037 } 1038 1039 copyAlfParamSet(alfParamSet, m_bestAlfParamSet); 1040 } 1041 1042 if(m_alfCoefInSlice) 1043 { 1044 delete[] m_bestAlfParamSet; 1045 } 1046 else 1047 { 1048 delete m_bestAlfParamSet; 1049 } 1050 } 1051 1052 /** initialize ALF encoder configurations 1053 * \param [in, out] alfParamSet ALF parameter set 1054 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters 1055 */ 1056 Void TEncAdaptiveLoopFilter::initALFEncoderParam(AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam) 1057 { 1058 //reset BA index map 1059 memset(&m_varImg[0][0], 0, sizeof(Pel)*(m_img_height*m_img_width)); 1060 1061 //reset mask 1062 for(Int y=0; y< m_img_height; y++) 1063 { 1064 for(Int x=0; x< m_img_width; x++) 1065 { 1066 m_maskImg[y][x] = 1; 1067 } 1068 } 1069 //get last valid slice index 1070 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1071 { 1072 if(m_pcPic->getValidSlice(s)) 1073 { 1074 m_lastSliceIdx = s; 1075 } 1076 } 1077 //reset alf CU control flags 1078 m_bAlfCUCtrlEnabled = (alfCtrlParam != NULL)?true:false; 1079 if(m_bAlfCUCtrlEnabled) 1080 { 1081 m_vBestAlfCUCtrlParam.resize(m_uiNumSlicesInPic); 1082 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1083 { 1084 m_vBestAlfCUCtrlParam[s].reset(); 1085 } 1086 } 1087 else 1088 { 1089 m_vBestAlfCUCtrlParam.clear(); 1090 } 1091 //get number slices in each LCU 1092 if(m_uiNumSlicesInPic == 1 || m_iSGDepth == 0) 1093 { 1094 for(Int n=0; n< m_uiNumCUsInFrame; n++) 1095 { 1096 m_numSlicesDataInOneLCU[n] = 1; 1097 } 1098 } 1099 else 1100 { 1101 Int count; 1102 Int prevSliceID = -1; 1103 1104 for(Int n=0; n< m_uiNumCUsInFrame; n++) 1105 { 1106 std::vector<NDBFBlockInfo>& vNDBFBlock = *(m_pcPic->getCU(n)->getNDBFilterBlocks()); 1107 1108 count = 0; 1109 1110 for(Int i=0; i< (Int)vNDBFBlock.size(); i++) 1111 { 1112 if(vNDBFBlock[i].sliceID != prevSliceID) 1113 { 1114 prevSliceID = vNDBFBlock[i].sliceID; 1115 count++; 1116 } 1117 } 1118 1119 m_numSlicesDataInOneLCU[n] = count; 1120 } 1121 } 1122 //set redesign number 1123 if(m_iALFEncodePassReduction) 1124 { 1125 m_iALFNumOfRedesign = 0; 1126 } 1127 else 1128 { 1129 m_iALFNumOfRedesign = ALF_NUM_OF_REDESIGN; 1130 } 1131 1132 //initialize m_bestAlfParamSet 1133 if(m_alfCoefInSlice) 1134 { 1135 m_bestAlfParamSet = new AlfParamSet[m_uiNumSlicesInPic]; 1136 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1137 { 1138 m_bestAlfParamSet[s].create( alfParamSet[s].numLCUInWidth, alfParamSet[s].numLCUInHeight, alfParamSet[s].numLCU); 1139 } 1140 } 1141 else 1142 { 1143 m_bestAlfParamSet = new AlfParamSet; 1144 m_bestAlfParamSet->create( alfParamSet->numLCUInWidth, alfParamSet->numLCUInHeight, alfParamSet->numLCU); 1145 } 1146 1147 } 1148 1149 /** copy ALF parameter set 1150 * \param [out] dst destination ALF parameter set 1151 * \param [in] src source ALF parameter set 1152 */ 1153 Void TEncAdaptiveLoopFilter::copyAlfParamSet(AlfParamSet* dst, AlfParamSet* src) 1154 { 1155 dst->numLCU = src->numLCU; 1156 dst->numLCUInWidth = src->numLCUInWidth; 1157 dst->numLCUInHeight = src->numLCUInHeight; 1158 1159 for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++) 1160 { 1161 dst->isEnabled[compIdx] = src->isEnabled[compIdx]; 1162 dst->isUniParam[compIdx] = src->isUniParam[compIdx]; 1163 1164 for(Int n=0; n< src->numLCU; n++) 1165 { 1166 dst->alfUnitParam[compIdx][n].isEnabled = src->alfUnitParam[compIdx][n].isEnabled; 1167 dst->alfUnitParam[compIdx][n].isNewFilt = src->alfUnitParam[compIdx][n].isNewFilt; 1168 dst->alfUnitParam[compIdx][n].mergeType = src->alfUnitParam[compIdx][n].mergeType; 1169 dst->alfUnitParam[compIdx][n].storedFiltIdx = src->alfUnitParam[compIdx][n].storedFiltIdx; 1170 *(dst->alfUnitParam[compIdx][n].alfFiltParam) = *(src->alfUnitParam[compIdx][n].alfFiltParam); 1171 } 1172 } 1173 } 1174 1175 1176 /** ALF encoding process top function 1177 * \param [in, out] alfParamSet ALF parameter set 1178 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters 1179 * \param [in] dLambdaLuma lambda value for luma RDO 1180 * \param [in] dLambdaChroma lambda value for chroma RDO 1181 */ 1182 #if ALF_CHROMA_LAMBDA 1183 #if HHI_INTERVIEW_SKIP 1184 Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambdaLuma, Double lambdaChroma, Bool bInterviewSkip) 1185 #else 1186 Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambdaLuma, Double lambdaChroma) 1187 #endif 1188 #else 1189 #if HHI_INTERVIEW_SKIP 1190 #else 1191 Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambda) 1192 #endif 1193 #endif 1194 { 1195 #if ALF_CHROMA_LAMBDA 1196 m_dLambdaLuma = lambdaLuma; 1197 m_dLambdaChroma = lambdaChroma; 1198 #else 1199 m_dLambdaLuma = lambda; 1200 m_dLambdaChroma = lambda; 1201 #endif 1202 TComPicYuv* yuvOrg = m_pcPic->getPicYuvOrg(); 1203 TComPicYuv* yuvRec = m_pcPic->getPicYuvRec(); 1204 TComPicYuv* yuvExtRec = m_pcTempPicYuv; 1205 #if HHI_INTERVIEW_SKIP 1206 TComPicYuv* pUsedPelMap = NULL; 1207 if( bInterviewSkip ) 1208 { 1209 pUsedPelMap = m_pcPic->getUsedPelsMap(); 1210 } 1211 #endif 1212 1213 //picture boundary padding 1214 yuvRec->copyToPic(yuvExtRec); 1215 yuvExtRec->setBorderExtension( false ); 1216 yuvExtRec->extendPicBorder (); 1217 1218 //initialize encoder parameters 1219 initALFEncoderParam(alfParamSet, alfCtrlParam); 1220 1221 //get LCU statistics 1222 getStatistics(yuvOrg, yuvExtRec); 1223 1224 //decide ALF parameters 1225 #if HHI_INTERVIEW_SKIP 1226 decideParameters(yuvOrg, yuvExtRec, yuvRec, pUsedPelMap, m_bestAlfParamSet, alfCtrlParam); 1227 #else 1228 decideParameters(yuvOrg, yuvExtRec, yuvRec, m_bestAlfParamSet, alfCtrlParam); 1229 #endif 1230 1231 //assign best parameters 1232 assignALFEncoderParam(alfParamSet, alfCtrlParam); 1233 } 1234 1235 /** Check if the current LCU can be merged with neighboring LCU 1236 * \param [in] compIdx luma/chroma component index 1237 * \param [out] alfUnitPic ALF unit parameters for all LCUs in picture 1238 */ 1239 Void TEncAdaptiveLoopFilter::checkMerge(Int compIdx, AlfUnitParam* alfUnitPic) 1240 { 1241 AlfUnitParam *alfUnitLeft, *alfUnitUp; 1242 1243 for(Int n=0; n< m_uiNumCUsInFrame; n++) 1244 { 1245 Int lcuPosX = (Int)(n % m_numLCUInPicWidth); 1246 Int lcuPosY = (Int)(n / m_numLCUInPicWidth); 1247 1248 AlfUnitParam& alfUnitCur = alfUnitPic[n]; 1249 1250 //check merge left 1251 if( lcuPosX != 0) 1252 { 1253 alfUnitLeft = &(alfUnitPic[n - 1]); 1254 if(alfUnitCur == *alfUnitLeft) 1255 { 1256 alfUnitCur.mergeType = ALF_MERGE_LEFT; 1257 alfUnitCur.isEnabled = alfUnitLeft->isEnabled; 1258 alfUnitCur.isNewFilt = alfUnitLeft->isNewFilt; 1259 alfUnitCur.storedFiltIdx = alfUnitLeft->storedFiltIdx; 1260 *(alfUnitCur.alfFiltParam) = *(alfUnitLeft->alfFiltParam); 1261 continue; 1262 } 1263 } 1264 1265 //check merge up 1266 if(lcuPosY !=0 ) 1267 { 1268 alfUnitUp = &(alfUnitPic[n - m_numLCUInPicWidth]); 1269 if(alfUnitCur == *alfUnitUp) 1270 { 1271 alfUnitCur.mergeType = ALF_MERGE_UP; 1272 alfUnitCur.isEnabled = alfUnitUp->isEnabled; 1273 alfUnitCur.isNewFilt = alfUnitUp->isNewFilt; 1274 alfUnitCur.storedFiltIdx = alfUnitUp->storedFiltIdx; 1275 *(alfUnitCur.alfFiltParam) = *(alfUnitUp->alfFiltParam); 1276 continue; 1277 } 1278 } 1279 } 1280 1281 } 1282 1283 /** Transfer ALF unit parameters for LCUs to to-be-coded ALF parameter set 1284 * \param [in] compIdx luma/chroma component index 1285 * \param [in] alfUnitPic ALF unit parameters for all LCUs in picture 1286 * \param [out] alfParamSet to-be-coded ALF parameter set 1287 */ 1288 Void TEncAdaptiveLoopFilter::transferToAlfParamSet(Int compIdx, AlfUnitParam* alfUnitPic, AlfParamSet* & alfParamSet) 1289 { 1290 1291 Int countFiltOffLCU = 0, countNewFilts = 0; 1292 1293 AlfUnitParam* alfUnitParams = alfParamSet->alfUnitParam[compIdx]; 1294 for(Int n=0; n< m_uiNumCUsInFrame; n++) 1295 { 1296 alfUnitParams[n] = alfUnitPic[n]; 1297 1298 1299 if(alfUnitParams[n].alfFiltParam->alf_flag == 0) 1300 { 1301 countFiltOffLCU++; 1302 } 1303 else 1304 { 1305 Bool isNewFiltInSlice = (alfUnitParams[n].mergeType == ALF_MERGE_DISABLED && alfUnitParams[n].isEnabled && alfUnitParams[n].isNewFilt); 1306 if( isNewFiltInSlice ) 1307 { 1308 countNewFilts++; 1309 } 1310 } 1311 } 1312 1313 //slice-level parameters 1314 AlfUnitParam* firstAlfUnitInSlice = &(alfUnitParams[0]); 1315 if( countFiltOffLCU == m_uiNumCUsInFrame ) //number of filter-off LCU is equal to the number of LCUs in slice 1316 { 1317 alfParamSet->isEnabled [compIdx] = false; 1318 alfParamSet->isUniParam[compIdx] = true; //uni-param, all off 1319 assert(firstAlfUnitInSlice->alfFiltParam->alf_flag == 0); 1320 } 1321 else 1322 { 1323 alfParamSet->isEnabled[compIdx] = true; 1324 if( countNewFilts == 1 && firstAlfUnitInSlice->alfFiltParam->alf_flag != 0 && countFiltOffLCU == 0) 1325 { 1326 alfParamSet->isUniParam[compIdx] = true; 1327 } 1328 else 1329 { 1330 alfParamSet->isUniParam[compIdx] = false; 1331 } 1332 } 1333 1334 } 1335 1336 /** Disable all ALF unit parameters in current component 1337 * \param [in] compIdx luma/chroma component index 1338 * \param [out] alfParamSet to-be-coded ALF parameter set 1339 * \param [in] alfUnitPic ALF unit parameters for all LCUs in picture 1340 */ 1341 Void TEncAdaptiveLoopFilter::disableComponentAlfParam(Int compIdx, AlfParamSet* alfParamSet, AlfUnitParam* alfUnitPic) 1342 { 1343 alfParamSet->isEnabled [compIdx] = false; 1344 alfParamSet->isUniParam[compIdx] = true; //all off 1345 1346 for(Int lcuPos = 0; lcuPos < m_uiNumCUsInFrame; lcuPos++) 1347 { 1348 AlfUnitParam& alfunitParam = alfUnitPic[lcuPos]; 1349 1350 alfunitParam.mergeType = ALF_MERGE_DISABLED; 1351 alfunitParam.isEnabled = false; 1352 alfunitParam.isNewFilt = false; 1353 alfunitParam.storedFiltIdx = -1; 1354 alfunitParam.alfFiltParam->alf_flag = 0; 1355 } 1356 1357 //check merge-up and merge-left 1358 checkMerge(compIdx, alfUnitPic); 1359 1360 //transfer to AlfParamSet 1361 transferToAlfParamSet(compIdx, alfUnitPic, alfParamSet); 1362 1363 } 1364 1365 /** Picture-based encoding 1366 * \param [out] alfParamSet to-be-coded ALF parameter set 1367 * \param [in, out] alfPicQTPart picture quad-tree partition 1368 * \param [in] compIdx luma/chroma component index 1369 * \param [in] pOrg picture buffer for original picture 1370 * \param [in] pDec picture buffer for un-filtered picture 1371 * \param [out] pRest picture buffer for filtered picture 1372 * \param [in] stride stride size for 1-D picture memory 1373 * \param [in, out] alfCorrLCUs correlation values for LCUs 1374 */ 1375 #if HHI_INTERVIEW_SKIP 1376 Void TEncAdaptiveLoopFilter::executePicBasedModeDecision(AlfParamSet* alfParamSet 1377 , AlfPicQTPart* alfPicQTPart 1378 , Int compIdx 1379 , Pel* pOrg, Pel* pDec, Pel* pRest, Pel* pUsed, Int stride, Int formatShift 1380 , AlfCorrData** alfCorrLCUs 1381 ) 1382 #else 1383 Void TEncAdaptiveLoopFilter::executePicBasedModeDecision(AlfParamSet* alfParamSet 1384 , AlfPicQTPart* alfPicQTPart 1385 , Int compIdx 1386 , Pel* pOrg, Pel* pDec, Pel* pRest, Int stride, Int formatShift 1387 , AlfCorrData** alfCorrLCUs 1388 ) 1389 #endif 1390 { 1391 if(compIdx != ALF_Y) 1392 { 1393 if(!alfParamSet->isEnabled[ALF_Y]) 1394 { 1395 disableComponentAlfParam(compIdx, alfParamSet, m_alfPicFiltUnits[compIdx]); 1396 return; 1397 } 1398 } 1399 1400 Int picWidth = (m_img_width >> formatShift); 1401 Int picHeight= (m_img_height >> formatShift); 1402 1403 Int64 minDist = 0; 1404 Int64 minRate = 0; 1405 Double minCost = 0; 1406 1407 decideQTPartition(alfPicQTPart, alfCorrLCUs, 0, 0, minCost, minDist, minRate); 1408 1409 //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx]) 1410 patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[compIdx]); 1411 1412 //check merge-up and merge-left 1413 checkMerge(compIdx, m_alfPicFiltUnits[compIdx]); 1414 1415 //transfer to AlfParamSet 1416 transferToAlfParamSet(compIdx, m_alfPicFiltUnits[compIdx], alfParamSet); 1417 1418 //reconstruction 1419 recALF(compIdx, m_alfFiltInfo[compIdx], pDec, pRest, stride, formatShift, NULL, false); 1420 1421 Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma); 1422 1423 1424 std::vector<AlfCUCtrlInfo> alfCUCtrlParamTemp(m_vBestAlfCUCtrlParam); 1425 minRate = calculateAlfParamSetRateRDO(compIdx, alfParamSet, &alfCUCtrlParamTemp); 1426 #if HHI_INTERVIEW_SKIP 1427 minDist = xCalcSSD(pOrg, pRest, pUsed, picWidth, picHeight, stride); 1428 #else 1429 minDist = xCalcSSD(pOrg, pRest, picWidth, picHeight, stride); 1430 #endif 1431 minCost = (Double)minDist + lambda*((Double)minRate); 1432 1433 //block on/off control 1434 if(compIdx == ALF_Y && m_bAlfCUCtrlEnabled) 1435 { 1436 #if HHI_INTERVIEW_SKIP 1437 decideBlockControl(pOrg, pDec, pRest, pUsed, stride, alfPicQTPart, alfParamSet, minRate, minDist, minCost); 1438 #else 1439 decideBlockControl(pOrg, pDec, pRest, stride, alfPicQTPart, alfParamSet, minRate, minDist, minCost); 1440 #endif 1441 } 1442 1443 //get filter-off distortion, rate, cost 1444 AlfParamSet alfParamSetOff; 1445 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1446 { 1447 alfCUCtrlParamTemp[s].reset(); 1448 } 1449 alfParamSetOff.isEnabled[compIdx] = false; 1450 alfParamSetOff.isUniParam[compIdx] = true; 1451 #if HHI_INTERVIEW_SKIP 1452 Int64 offDist = xCalcSSD(pOrg, pDec, pUsed, picWidth, picHeight, stride); 1453 #else 1454 Int64 offDist = xCalcSSD(pOrg, pDec, picWidth, picHeight, stride); 1455 #endif 1456 Int64 offRate = calculateAlfParamSetRateRDO(compIdx, &alfParamSetOff, &alfCUCtrlParamTemp); 1457 Double offCost = (Double)offDist + lambda*((Double)offRate); 1458 1459 if(offCost < minCost ) 1460 { 1461 //revert to filter-off results 1462 Pel* pelSrc = pDec; 1463 Pel* pelDst = pRest; 1464 for(Int y=0; y< picHeight; y++) 1465 { 1466 ::memcpy(pelDst, pelSrc, sizeof(Pel)*picWidth); 1467 pelSrc += stride; 1468 pelDst += stride; 1469 } 1470 1471 alfParamSet->isEnabled[compIdx] = false; 1472 alfParamSet->isUniParam[compIdx] = true; //all filter-off 1473 } 1474 1475 } 1476 1477 /** copy picture quadtree infromation 1478 * \param [out] alfPicQTPartDest destination part in picture quad tree 1479 * \param [in ] alfPicQTPartSrc source part in picture quad tree 1480 */ 1481 Void TEncAdaptiveLoopFilter::copyPicQT(AlfPicQTPart* alfPicQTPartDest, AlfPicQTPart* alfPicQTPartSrc) 1482 { 1483 for (Int i=0; i< m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++) 1484 { 1485 alfPicQTPartDest[i] = alfPicQTPartSrc[i]; 1486 } 1487 } 1488 1489 /** copy pixel values for one rectangular region 1490 * \param [out] imgDest destination part in picture quad tree 1491 * \param [in ] imgSrc source part in picture quad tree 1492 * \param [in ] stride source part in picture quad tree 1493 * \param [in ] yPos starting y position 1494 * \param [in ] height region height 1495 * \param [in ] xPos starting x position 1496 * \param [in ] width region width 1497 */ 1498 Void TEncAdaptiveLoopFilter::copyPixelsInOneRegion(Pel* imgDest, Pel* imgSrc, Int stride, Int yPos, Int height, Int xPos, Int width) 1499 { 1500 Int offset = (yPos*stride) + xPos; 1501 Pel *imgDestLine = imgDest + offset; 1502 Pel *imgSrcLine = imgSrc + offset; 1503 1504 for (Int j=0; j<height; j++) 1505 { 1506 ::memcpy(imgDestLine, imgSrcLine, sizeof(Pel)*width); 1507 imgDestLine += stride; 1508 imgSrcLine += stride; 1509 } 1510 } 1511 1512 /** Re-design ALF parameters for picture quad-tree partitions 1513 * \param [out] alfPicQTPart picture quad-tree partition information 1514 * \param [in ] partIdx partition index 1515 * \param [in ] partLevel partition level 1516 */ 1517 Void TEncAdaptiveLoopFilter::reDesignQT(AlfPicQTPart *alfPicQTPart, Int partIdx, Int partLevel) 1518 { 1519 AlfPicQTPart *alfPicQTOnePart = &(alfPicQTPart[partIdx]); 1520 Int nextPartLevel = partLevel + 1; 1521 1522 if (!alfPicQTOnePart->isSplit) 1523 { 1524 if (alfPicQTOnePart->alfUnitParam->alfFiltParam->alf_flag) 1525 { 1526 executeModeDecisionOnePart(alfPicQTPart, m_alfCorr[ALF_Y], partIdx, partLevel) ; 1527 } 1528 } 1529 else 1530 { 1531 for (Int i=0; i<4; i++) 1532 { 1533 reDesignQT(alfPicQTPart, alfPicQTOnePart->childPartIdx[i], nextPartLevel); 1534 } 1535 } 1536 } 1537 1538 /** CU-on/off control decision 1539 * \param [in ] imgOrg picture buffer for original picture 1540 * \param [in ] imgDec picture buffer for un-filtered picture 1541 * \param [in ] imgRest picture buffer for filtered picture 1542 * \param [in ] stride buffer stride size for 1-D picture memory 1543 * \param [in, out] alfPicQTPart picture quad-tree partition information 1544 * \param [in, out] alfParamSet ALF parameter set 1545 * \param [in, out ] minRate minimum rate 1546 * \param [in, out ] minDist minimum distortion 1547 * \param [in, out ] minCost minimum RD cost 1548 */ 1549 #if HHI_INTERVIEW_SKIP 1550 Void TEncAdaptiveLoopFilter::decideBlockControl(Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, AlfPicQTPart* alfPicQTPart, AlfParamSet* & alfParamSet, Int64 &minRate, Int64 &minDist, Double &minCost) 1551 #else 1552 Void TEncAdaptiveLoopFilter::decideBlockControl(Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, AlfPicQTPart* alfPicQTPart, AlfParamSet* & alfParamSet, Int64 &minRate, Int64 &minDist, Double &minCost) 1553 #endif 1554 { 1555 Int rate, ctrlDepth; 1556 Double cost; 1557 UInt64 dist; 1558 Bool isChanged = false; 1559 Pel *imgYtemp = getPicBuf(m_pcPicYuvTmp, ALF_Y); 1560 Pel *imgYBest = getPicBuf(m_pcPicYuvBest, ALF_Y); 1561 std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_vBestAlfCUCtrlParam); 1562 1563 AlfPicQTPart *alfPicQTPartNoCtrl = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ]; 1564 AlfPicQTPart *alfPicQTPartBest = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ]; 1565 1566 // backup data of PQT without block on/off 1567 copyPicQT(alfPicQTPartNoCtrl, alfPicQTPart); 1568 1569 for (ctrlDepth=0; ctrlDepth<4; ctrlDepth++) 1570 { 1571 // Restore data from PQT without block on/off 1572 copyPixelsInOneRegion(imgYtemp, imgRest, stride, 0, m_img_height, 0, m_img_width); 1573 copyPicQT(alfPicQTPart, alfPicQTPartNoCtrl); 1574 1575 for (Int reDesignRun=0; reDesignRun <= m_iALFNumOfRedesign; reDesignRun++) 1576 { 1577 // re-design filter 1578 if (reDesignRun > 0) 1579 { 1580 // re-gather statistics 1581 getOneCompStatistics(m_alfCorr[ALF_Y], ALF_Y, imgOrg, imgDec, stride, 0, true); 1582 1583 // reDesign in each QT partition 1584 reDesignQT(alfPicQTPart, 0, 0); 1585 1586 //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx]) 1587 patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]); 1588 1589 //reconstruction 1590 copyPixelsInOneRegion(imgYtemp, imgDec, stride, 0, m_img_height, 0, m_img_width); 1591 recALF(ALF_Y, m_alfFiltInfo[ALF_Y], imgDec, imgYtemp, stride, 0, NULL, false); 1592 } 1593 1594 // Gest distortion and decide on/off, Pel should be changed to TComPicYUV 1595 #if HHI_INTERVIEW_SKIP 1596 setCUAlfCtrlFlags((UInt)ctrlDepth, imgOrg, imgDec, imgYtemp, imgUsed, stride, dist, vAlfCUCtrlParamTemp); 1597 #else 1598 setCUAlfCtrlFlags((UInt)ctrlDepth, imgOrg, imgDec, imgYtemp, stride, dist, vAlfCUCtrlParamTemp); 1599 #endif 1600 1601 //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx]) 1602 patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]); 1603 1604 //check merge-up and merge-left 1605 checkMerge(ALF_Y, m_alfPicFiltUnits[ALF_Y]); 1606 1607 //transfer to AlfParamSet 1608 transferToAlfParamSet(ALF_Y, m_alfPicFiltUnits[ALF_Y], alfParamSet); 1609 1610 rate = calculateAlfParamSetRateRDO(ALF_Y, alfParamSet, &vAlfCUCtrlParamTemp); 1611 cost = (Double)dist + m_dLambdaLuma * ((Double)rate); 1612 1613 if (cost < minCost) 1614 { 1615 isChanged = true; 1616 minCost = cost; 1617 minDist = (Int64) dist; 1618 minRate = rate; 1619 1620 m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp; 1621 copyPixelsInOneRegion(imgYBest, imgYtemp, stride, 0, m_img_height, 0, m_img_width); 1622 1623 copyPicQT(alfPicQTPartBest, alfPicQTPart); 1624 xCopyTmpAlfCtrlFlagsFrom(); 1625 } 1626 1627 } 1628 } 1629 1630 if (isChanged == true) 1631 { 1632 copyPicQT(alfPicQTPart, alfPicQTPartBest); 1633 xCopyTmpAlfCtrlFlagsTo(); 1634 1635 copyPixelsInOneRegion(imgRest, imgYBest, stride, 0, m_img_height, 0, m_img_width); 1636 xCopyDecToRestCUs(imgDec, imgRest, stride); 1637 } 1638 else 1639 { 1640 copyPicQT(alfPicQTPart, alfPicQTPartNoCtrl); 1641 } 1642 1643 //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx]) 1644 patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]); 1645 1646 //check merge-up and merge-left 1647 checkMerge(ALF_Y, m_alfPicFiltUnits[ALF_Y]); 1648 1649 //transfer to AlfParamSet 1650 transferToAlfParamSet(ALF_Y, m_alfPicFiltUnits[ALF_Y], alfParamSet); 1651 1652 delete [] alfPicQTPartNoCtrl; 1653 alfPicQTPartNoCtrl = NULL; 1654 1655 delete [] alfPicQTPartBest; 1656 alfPicQTPartBest = NULL; 1657 } 1658 1659 /** Copy ALF unit parameters from quad-tree partition to LCUs 1660 * \param [in] alfPicQTPart picture quad-tree partition information 1661 * \param [in] partIdx partition index 1662 * \param [out] alfUnitPic ALF unit parameters for LCUs 1663 */ 1664 Void TEncAdaptiveLoopFilter::patchAlfUnitParams(AlfPicQTPart* alfPicQTPart, Int partIdx, AlfUnitParam* alfUnitPic) 1665 { 1666 AlfPicQTPart* alfQTPart = &(alfPicQTPart[partIdx]); 1667 //Int compIdx = alfQTPart->componentID; 1668 1669 if(alfQTPart->isSplit == false) 1670 { 1671 AlfUnitParam* alfpartParam = alfQTPart->alfUnitParam; 1672 1673 Int lcuPos; 1674 for(Int lcuPosY = alfQTPart->partCUYS; lcuPosY <= alfQTPart->partCUYE; lcuPosY++) 1675 { 1676 for(Int lcuPosX = alfQTPart->partCUXS; lcuPosX <= alfQTPart->partCUXE; lcuPosX++) 1677 { 1678 lcuPos = lcuPosY*m_numLCUInPicWidth + lcuPosX; 1679 AlfUnitParam& alfunitParam = alfUnitPic[lcuPos]; 1680 1681 alfunitParam.mergeType = alfpartParam->mergeType; 1682 alfunitParam.isEnabled = alfpartParam->isEnabled; 1683 alfunitParam.isNewFilt = alfpartParam->isNewFilt; 1684 alfunitParam.storedFiltIdx = alfpartParam->storedFiltIdx; //not used 1685 *(alfunitParam.alfFiltParam) = *(alfpartParam->alfFiltParam); 1686 } 1687 } 1688 } 1689 else 1690 { 1691 for(Int i=0; i< 4; i++) 1692 { 1693 patchAlfUnitParams(alfPicQTPart, alfQTPart->childPartIdx[i], alfUnitPic); 1694 } 1695 } 1696 } 1697 1698 /** Decide picture quad-tree partition 1699 * \param [in, out] alfPicQTPart picture quad-tree partition information 1700 * \param [in, out] alfPicLCUCorr correlations for LCUs 1701 * \param [int] partIdx partition index 1702 * \param [int] partLevel partition level 1703 * \param [in, out] cost cost for one partition 1704 * \param [in, out] dist distortion for one partition 1705 * \param [in, out] rate bitrate for one partition 1706 */ 1707 Void TEncAdaptiveLoopFilter::decideQTPartition(AlfPicQTPart* alfPicQTPart, AlfCorrData** alfPicLCUCorr, Int partIdx, Int partLevel, Double &cost, Int64 &dist, Int64 &rate) 1708 { 1709 AlfPicQTPart* alfPicQTOnePart = &(alfPicQTPart[partIdx]); 1710 Int nextPartLevel = partLevel + 1; 1711 Int childPartIdx; 1712 Double splitCost = 0; 1713 Int64 splitRate = 0; 1714 Int64 splitDist = 0; 1715 1716 if (!alfPicQTOnePart->isProcessed) 1717 { 1718 executeModeDecisionOnePart(alfPicQTPart, alfPicLCUCorr, partIdx, partLevel); 1719 1720 alfPicQTOnePart->isProcessed = true; 1721 } 1722 1723 if (!alfPicQTOnePart->isBottomLevel) 1724 { 1725 for (Int i=0; i<4; i++) 1726 { 1727 childPartIdx = alfPicQTOnePart->childPartIdx[i]; 1728 decideQTPartition(alfPicQTPart, alfPicLCUCorr, childPartIdx, nextPartLevel, splitCost, splitDist, splitRate); 1729 } 1730 1731 alfPicQTOnePart->splitMinCost = splitCost; 1732 alfPicQTOnePart->splitMinDist = splitDist; 1733 alfPicQTOnePart->splitMinRate = splitRate; 1734 1735 if (alfPicQTOnePart->splitMinCost < alfPicQTOnePart->selfMinCost) 1736 { 1737 alfPicQTOnePart->isSplit = true; 1738 } 1739 else 1740 { 1741 alfPicQTOnePart->isSplit = false; 1742 } 1743 } 1744 else 1745 { 1746 alfPicQTOnePart->isSplit = false; 1747 alfPicQTOnePart->splitMinCost = alfPicQTOnePart->selfMinCost; 1748 alfPicQTOnePart->splitMinDist = alfPicQTOnePart->selfMinDist; 1749 alfPicQTOnePart->splitMinRate = alfPicQTOnePart->selfMinRate; 1750 } 1751 1752 if (alfPicQTOnePart->isSplit) 1753 { 1754 cost += alfPicQTOnePart->splitMinCost; 1755 rate += alfPicQTOnePart->splitMinRate; 1756 dist += alfPicQTOnePart->splitMinDist; 1757 } 1758 else 1759 { 1760 cost += alfPicQTOnePart->selfMinCost; 1761 rate += alfPicQTOnePart->selfMinRate; 1762 dist += alfPicQTOnePart->selfMinDist; 1763 } 1764 1765 } 1766 1767 /** Mode decision process for one picture quad-tree partition 1768 * \param [in, out] alfPicQTPart picture quad-tree partition information 1769 * \param [in, out] alfPicLCUCorr correlations for LCUs 1770 * \param [int] partIdx partition index 1771 * \param [int] partLevel partition level 1772 */ 1773 Void TEncAdaptiveLoopFilter::executeModeDecisionOnePart(AlfPicQTPart *alfPicQTPart, AlfCorrData** alfPicLCUCorr, Int partIdx, Int partLevel) 1774 { 1775 AlfPicQTPart* alfQTPart = &(alfPicQTPart[partIdx]); 1776 Int compIdx = alfQTPart->componentID; 1777 Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma); 1778 1779 //gather correlations 1780 alfQTPart->alfCorr->reset(); 1781 for(Int lcuPosY = alfQTPart->partCUYS; lcuPosY <= alfQTPart->partCUYE; lcuPosY++) 1782 { 1783 for(Int lcuPosX = alfQTPart->partCUXS; lcuPosX <= alfQTPart->partCUXE; lcuPosX++) 1784 { 1785 *(alfQTPart->alfCorr) += *(alfPicLCUCorr[lcuPosY*m_numLCUInPicWidth + lcuPosX]); 1786 } 1787 } 1788 1789 //test filter on 1790 AlfUnitParam* alfPartUnitParam = alfQTPart->alfUnitParam; 1791 alfPartUnitParam->mergeType = ALF_MERGE_DISABLED; 1792 alfPartUnitParam->isEnabled = true; 1793 alfPartUnitParam->isNewFilt = true; 1794 alfPartUnitParam->storedFiltIdx = -1; 1795 alfPartUnitParam->alfFiltParam->alf_flag = 1; 1796 deriveFilterInfo(compIdx, alfQTPart->alfCorr, alfPartUnitParam->alfFiltParam, alfQTPart->numFilterBudget); 1797 1798 alfQTPart->selfMinDist = estimateFilterDistortion(compIdx, alfQTPart->alfCorr, m_filterCoeffSym, alfPartUnitParam->alfFiltParam->filters_per_group, m_varIndTab); 1799 alfQTPart->selfMinRate = calculateAlfUnitRateRDO(alfPartUnitParam); 1800 alfQTPart->selfMinCost = (Double)(alfQTPart->selfMinDist) + lambda*((Double)(alfQTPart->selfMinRate)); 1801 1802 alfQTPart->selfMinCost += ((lambda* 1.5)* ((Double)( (alfQTPart->partCUYE - alfQTPart->partCUYS+ 1)*(alfQTPart->partCUXE - alfQTPart->partCUXS +1) ))); //RDCO 1803 1804 1805 //test filter off 1806 AlfUnitParam alfUnitParamTemp(*(alfQTPart->alfUnitParam)); 1807 alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED; 1808 alfUnitParamTemp.isEnabled = false; 1809 Int64 dist = estimateFilterDistortion(compIdx, alfQTPart->alfCorr); 1810 Int64 rate = calculateAlfUnitRateRDO(&alfUnitParamTemp); 1811 Double cost = (Double)dist + lambda*((Double)rate); 1812 if(cost < alfQTPart->selfMinCost) 1813 { 1814 alfQTPart->selfMinCost = cost; 1815 alfQTPart->selfMinDist = dist; 1816 alfQTPart->selfMinRate = rate; 1817 *(alfQTPart->alfUnitParam) = alfUnitParamTemp; 1818 1819 alfQTPart->alfUnitParam->alfFiltParam->alf_flag = 0; 1820 } 1821 1822 } 1823 1824 /** Derive filter coefficients 1825 * \param [in, out] alfPicQTPart picture quad-tree partition information 1826 * \param [in, out] alfPicLCUCorr correlations for LCUs 1827 * \param [int] partIdx partition index 1828 * \param [int] partLevel partition level 1829 */ 1830 Void TEncAdaptiveLoopFilter::deriveFilterInfo(Int compIdx, AlfCorrData* alfCorr, ALFParam* alfFiltParam, Int maxNumFilters) 1831 { 1832 const Int filtNo = 0; 1833 const Int numCoeff = ALF_MAX_NUM_COEF; 1834 1835 switch(compIdx) 1836 { 1837 case ALF_Y: 1838 { 1839 Int lambdaForMerge = ((Int) m_dLambdaLuma) * (1<<(2*g_uiBitIncrement)); 1840 Int numFilters; 1841 1842 ::memset(m_varIndTab, 0, sizeof(Int)*NO_VAR_BINS); 1843 1844 xfindBestFilterVarPred(alfCorr->yCorr, alfCorr->ECorr, alfCorr->pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &numFilters, m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambdaForMerge, maxNumFilters); 1845 xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, numFilters, alfFiltParam); 1846 } 1847 break; 1848 case ALF_Cb: 1849 case ALF_Cr: 1850 { 1851 static Double coef[ALF_MAX_NUM_COEF]; 1852 1853 alfFiltParam->filters_per_group = 1; 1854 1855 gnsSolveByChol(alfCorr->ECorr[0], alfCorr->yCorr[0], coef, numCoeff); 1856 xQuantFilterCoef(coef, m_filterCoeffSym[0], filtNo, g_uiBitDepth + g_uiBitIncrement); 1857 ::memcpy(alfFiltParam->coeffmulti[0], m_filterCoeffSym[0], sizeof(Int)*numCoeff); 1858 predictALFCoeffChroma(alfFiltParam->coeffmulti[0]); 1859 } 1860 break; 1861 default: 1862 { 1863 printf("Not a legal component ID\n"); 1864 assert(0); 1865 exit(-1); 1866 } 1867 } 1868 1869 1870 } 1871 1872 /** Estimate rate-distortion cost for ALF parameter set 1873 * \param [in] compIdx luma/chroma component index 1874 * \param [in] alfParamSet ALF parameter set 1875 * \param [in] alfCUCtrlParam CU-on/off control parameters 1876 */ 1877 Int TEncAdaptiveLoopFilter::calculateAlfParamSetRateRDO(Int compIdx, AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCUCtrlParam) 1878 { 1879 Int rate = 0; 1880 1881 m_pcEntropyCoder->resetEntropy(); 1882 m_pcEntropyCoder->resetBits(); 1883 1884 1885 m_pcEntropyCoder->encodeAlfParamSet(alfParamSet, m_numLCUInPicWidth, m_uiNumCUsInFrame, 0, true, compIdx, compIdx); 1886 1887 if(m_bAlfCUCtrlEnabled) 1888 { 1889 for(Int s=0; s< m_uiNumSlicesInPic; s++) 1890 { 1891 m_pcEntropyCoder->encodeAlfCtrlParam( (*alfCUCtrlParam)[s], m_uiNumCUsInFrame); 1892 } 1893 } 1894 1895 rate = m_pcEntropyCoder->getNumberOfWrittenBits(); 1896 1897 return rate; 1898 } 1899 1900 /** Estimate rate-distortion cost for ALF unit parameters 1901 * \param [in] alfUnitParam ALF unit parameters 1902 * \param [in] numStoredFilters number of stored filter (set) 1903 */ 1904 Int TEncAdaptiveLoopFilter::calculateAlfUnitRateRDO(AlfUnitParam* alfUnitParam, Int numStoredFilters) 1905 { 1906 Int rate = 0; 1907 1908 if(alfUnitParam->mergeType != ALF_MERGE_LEFT) 1909 { 1910 m_pcEntropyCoder->resetEntropy(); 1911 m_pcEntropyCoder->resetBits(); 1912 1913 m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->mergeType == ALF_MERGE_UP)?1:0); 1914 1915 if(alfUnitParam->mergeType != ALF_MERGE_UP) 1916 { 1917 m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->isEnabled)?1:0); 1918 1919 if(alfUnitParam->isEnabled) 1920 { 1921 if(numStoredFilters > 0) 1922 { 1923 m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->isNewFilt)?1:0); 1924 } 1925 1926 if(!(alfUnitParam->isNewFilt) && numStoredFilters > 0) 1927 { 1928 m_pcEntropyCoder->encodeAlfStoredFilterSetIdx(alfUnitParam->storedFiltIdx, numStoredFilters); 1929 } 1930 else 1931 { 1932 m_pcEntropyCoder->encodeAlfParam(alfUnitParam->alfFiltParam); 1933 } 1934 1935 } 1936 } 1937 rate = m_pcEntropyCoder->getNumberOfWrittenBits(); 1938 } 1939 return rate; 1940 } 1941 1942 /** Estimate filtering distortion 1943 * \param [in] compIdx luma/chroma component index 1944 * \param [in] alfCorr correlations 1945 * \param [in] coeffSet filter coefficients 1946 * \param [in] filterSetSize number of filter set 1947 * \param [in] mergeTable merge table of filter set (only for luma BA) 1948 * \param [in] doPixAccMerge calculate pixel squared value (true) or not (false) 1949 */ 1950 Int64 TEncAdaptiveLoopFilter::estimateFilterDistortion(Int compIdx, AlfCorrData* alfCorr, Int** coeffSet, Int filterSetSize, Int* mergeTable, Bool doPixAccMerge) 1951 { 1952 const Int numCoeff = (Int)ALF_MAX_NUM_COEF; 1953 AlfCorrData* alfMerged = m_alfCorrMerged[compIdx]; 1954 1955 alfMerged->mergeFrom(*alfCorr, mergeTable, doPixAccMerge); 1956 1957 Int** coeff = (coeffSet == NULL)?(m_coeffNoFilter):(coeffSet); 1958 Int64 iDist = 0; 1959 for(Int f=0; f< filterSetSize; f++) 1960 { 1961 iDist += xFastFiltDistEstimation(alfMerged->ECorr[f], alfMerged->yCorr[f], coeff[f], numCoeff); 1962 } 1963 return iDist; 1964 } 1965 1966 /** Mode decision for ALF unit in LCU-based encoding 1967 * \param [in] compIdx luma/chroma component index 1968 * \param [in] alfUnitPic ALF unit parmeters for LCUs in picture 1969 * \param [in] lcuIdx LCU index (order) in slice 1970 * \param [in] lcuPos LCU position in picture 1971 * \param [in] numLCUWidth number of width in LCU 1972 * \param [in, out] alfUnitParams ALF unit parameters for LCUs in slice 1973 * \param [in] alfCorr correlations 1974 * \param [in] storedFilters stored-filter buffer 1975 * \param [in] maxNumFilter constraint for number of filters 1976 * \param [in] lambda lagrangian multiplier for RDO 1977 * \param [in] isLeftUnitAvailable left ALF unit available (true) or not (false) 1978 * \param [in] isUpUnitAvailable upper ALF unit available (true) or not (false) 1979 */ 1980 Void TEncAdaptiveLoopFilter::decideLCUALFUnitParam(Int compIdx, AlfUnitParam* alfUnitPic, Int lcuIdx, Int lcuPos, Int numLCUWidth, AlfUnitParam* alfUnitParams, AlfCorrData* alfCorr, std::vector<ALFParam*>& storedFilters, Int maxNumFilter, Double lambda, Bool isLeftUnitAvailable, Bool isUpUnitAvailable) 1981 { 1982 Int numSliceDataInCurrLCU = m_numSlicesDataInOneLCU[lcuPos]; 1983 Int budgetNumFilters = (Int)(maxNumFilter/numSliceDataInCurrLCU); 1984 Int numStoredFilters = (Int)storedFilters.size(); 1985 Double cost, minCost = MAX_DOUBLE; 1986 Int64 dist; 1987 Int rate; 1988 1989 AlfUnitParam& alfUnitParamCurr = alfUnitParams[lcuIdx]; 1990 1991 ///--- new filter mode test --- 1992 AlfUnitParam alfUnitParamTemp(alfUnitParamCurr); 1993 alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED; 1994 alfUnitParamTemp.isEnabled = true; 1995 alfUnitParamTemp.isNewFilt = true; 1996 alfUnitParamTemp.storedFiltIdx = -1; 1997 deriveFilterInfo(compIdx, alfCorr, alfUnitParamTemp.alfFiltParam, budgetNumFilters); 1998 1999 dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab); 2000 rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters); 2001 cost = (Double)dist + lambda*((Double)rate); 2002 if(cost < minCost) 2003 { 2004 minCost = cost; 2005 alfUnitParamCurr = alfUnitParamTemp; 2006 2007 alfUnitParamCurr.alfFiltParam->alf_flag = 1; 2008 } 2009 2010 if(numSliceDataInCurrLCU == 1) 2011 { 2012 if(numStoredFilters > 0) 2013 { 2014 ///--- stored filter mode test ---// 2015 alfUnitParamTemp = alfUnitParamCurr; 2016 2017 alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED; 2018 alfUnitParamTemp.isEnabled = true; 2019 alfUnitParamTemp.isNewFilt = false; 2020 2021 for(Int i=0; i< numStoredFilters; i++) 2022 { 2023 ALFParam* storedALFParam = storedFilters[i]; 2024 2025 alfUnitParamTemp.storedFiltIdx = i; 2026 alfUnitParamTemp.alfFiltParam = storedALFParam; 2027 2028 assert(storedALFParam->alf_flag == 1); 2029 2030 reconstructCoefInfo(compIdx, storedALFParam, m_filterCoeffSym, m_varIndTab); 2031 2032 dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab); 2033 rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters); 2034 cost = (Double)dist + lambda*((Double)rate); 2035 2036 if(cost < minCost) 2037 { 2038 minCost = cost; 2039 alfUnitParamCurr = alfUnitParamTemp; 2040 } 2041 } 2042 } 2043 2044 /// merge-up test 2045 if(isUpUnitAvailable) 2046 { 2047 Int addrUp = lcuPos - m_numLCUInPicWidth; 2048 AlfUnitParam& alfUnitParamUp = alfUnitPic[addrUp]; 2049 2050 if(alfUnitParamUp.alfFiltParam->alf_flag == 1) 2051 { 2052 alfUnitParamTemp = alfUnitParamUp; 2053 alfUnitParamTemp.mergeType = ALF_MERGE_UP; 2054 2055 reconstructCoefInfo(compIdx, alfUnitParamTemp.alfFiltParam, m_filterCoeffSym, m_varIndTab); 2056 dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab); 2057 rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters); 2058 cost = (Double)dist + lambda*((Double)rate); 2059 2060 if(cost < minCost) 2061 { 2062 minCost = cost; 2063 2064 alfUnitParamCurr = alfUnitParamTemp; 2065 } 2066 2067 } 2068 2069 } //upper unit available 2070 2071 2072 /// merge-left test 2073 if(isLeftUnitAvailable) 2074 { 2075 Int addrLeft = lcuPos - 1; 2076 AlfUnitParam& alfUnitParamLeft = alfUnitPic[addrLeft]; 2077 2078 if(alfUnitParamLeft.alfFiltParam->alf_flag == 1) 2079 { 2080 alfUnitParamTemp = alfUnitParamLeft; 2081 alfUnitParamTemp.mergeType = ALF_MERGE_LEFT; 2082 2083 reconstructCoefInfo(compIdx, alfUnitParamTemp.alfFiltParam, m_filterCoeffSym, m_varIndTab); 2084 dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab); 2085 rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters); 2086 cost = (Double)dist + lambda*((Double)rate); 2087 2088 if(cost < minCost) 2089 { 2090 minCost = cost; 2091 2092 alfUnitParamCurr = alfUnitParamTemp; 2093 } 2094 2095 } 2096 2097 } //left unit available 2098 2099 } 2100 } 2101 2102 /** Choose the best ALF unit parameters when filter is not enabled. 2103 * \param [out] alfFiltOffParam ALF unit parameters for filter-off case 2104 * \param [in] lcuPos LCU position in picture 2105 * \param [in] alfUnitPic ALF unit parmeters for LCUs in picture 2106 * \param [in] isLeftUnitAvailable left ALF unit available (true) or not (false) 2107 * \param [in] isUpUnitAvailable upper ALF unit available (true) or not (false) 2108 */ 2109 Void TEncAdaptiveLoopFilter::getFiltOffAlfUnitParam(AlfUnitParam* alfFiltOffParam, Int lcuPos, AlfUnitParam* alfUnitPic, Bool isLeftUnitAvailable, Bool isUpUnitAvailable) 2110 { 2111 Int numSliceDataInCurrLCU = m_numSlicesDataInOneLCU[lcuPos]; 2112 2113 if(numSliceDataInCurrLCU == 1) 2114 { 2115 if(isLeftUnitAvailable) 2116 { 2117 Int addrLeft = lcuPos - 1; 2118 AlfUnitParam& alfUnitParamLeft = alfUnitPic[addrLeft]; 2119 2120 if(alfUnitParamLeft.alfFiltParam->alf_flag == 0) 2121 { 2122 alfFiltOffParam->mergeType = ALF_MERGE_LEFT; 2123 alfFiltOffParam->isEnabled = false; 2124 alfFiltOffParam->alfFiltParam = alfUnitParamLeft.alfFiltParam; 2125 2126 return; 2127 } 2128 } 2129 2130 if(isUpUnitAvailable) 2131 { 2132 Int addrUp = lcuPos - m_numLCUInPicWidth; 2133 AlfUnitParam& alfUnitParamUp = alfUnitPic[addrUp]; 2134 2135 if(alfUnitParamUp.alfFiltParam->alf_flag == 0) 2136 { 2137 alfFiltOffParam->mergeType = ALF_MERGE_UP; 2138 alfFiltOffParam->isEnabled = false; 2139 alfFiltOffParam->alfFiltParam = alfUnitParamUp.alfFiltParam; 2140 2141 return; 2142 } 2143 2144 } 2145 } 2146 2147 2148 alfFiltOffParam->mergeType = ALF_MERGE_DISABLED; 2149 alfFiltOffParam->isEnabled = false; 2150 alfFiltOffParam->alfFiltParam = alfUnitPic[lcuPos].alfFiltParam; 2151 2152 return; 2153 } 2154 2155 /** Calculate distortion for ALF LCU 2156 * \param [in] skipLCUBottomLines true for considering skipping bottom LCU lines 2157 * \param [in] compIdx luma/chroma component index 2158 * \param [in] alfLCUInfo ALF LCU information 2159 * \param [in] picSrc source picture buffer 2160 * \param [in] picCmp to-be-compared picture buffer 2161 * \param [in] stride buffer stride size for 1-D pictrue memory 2162 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0) 2163 * \return the distortion 2164 */ 2165 #if HHI_INTERVIEW_SKIP 2166 Int64 TEncAdaptiveLoopFilter::calcAlfLCUDist(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo& alfLCUInfo, Pel* picSrc, Pel* picCmp, Pel* picUsed, Int stride, Int formatShift) 2167 #else 2168 Int64 TEncAdaptiveLoopFilter::calcAlfLCUDist(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo& alfLCUInfo, Pel* picSrc, Pel* picCmp, Int stride, Int formatShift) 2169 #endif 2170 { 2171 Int64 dist = 0; 2172 Int posOffset, ypos, xpos, height, width; 2173 Pel* pelCmp; 2174 Pel* pelSrc; 2175 #if HHI_INTERVIEW_SKIP 2176 Pel* pelUsed = NULL ; 2177 #endif 2178 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2179 Int endypos; 2180 Bool notSkipLinesBelowVB = true; 2181 Int lcuAddr = alfLCUInfo.pcCU->getAddr(); 2182 if(skipLCUBottomLines) 2183 { 2184 if(lcuAddr + m_numLCUInPicWidth < m_uiNumCUsInFrame) 2185 { 2186 notSkipLinesBelowVB = false; 2187 } 2188 } 2189 #endif 2190 2191 switch(compIdx) 2192 { 2193 case ALF_Cb: 2194 case ALF_Cr: 2195 { 2196 for(Int n=0; n< alfLCUInfo.numSGU; n++) 2197 { 2198 ypos = (Int)(alfLCUInfo[n].posY >> formatShift); 2199 xpos = (Int)(alfLCUInfo[n].posX >> formatShift); 2200 height = (Int)(alfLCUInfo[n].height >> formatShift); 2201 width = (Int)(alfLCUInfo[n].width >> formatShift); 2202 2203 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2204 if(!notSkipLinesBelowVB ) 2205 { 2206 endypos = ypos+ height -1; 2207 Int iLineVBPos = m_lcuHeightChroma - 2; 2208 Int yEndLineInLCU = endypos % m_lcuHeightChroma; 2209 height = (yEndLineInLCU >= iLineVBPos) ? (height - 2) : height ; 2210 } 2211 #endif 2212 2213 posOffset = (ypos * stride) + xpos; 2214 pelCmp = picCmp + posOffset; 2215 pelSrc = picSrc + posOffset; 2216 2217 2218 #if HHI_INTERVIEW_SKIP 2219 if( picUsed) 2220 { 2221 pelUsed = picUsed+ posOffset; 2222 } 2223 dist += xCalcSSD( pelSrc, pelCmp, pelUsed, width, height, stride ); 2224 #else 2225 dist += xCalcSSD( pelSrc, pelCmp, width, height, stride ); 2226 #endif 2227 } 2228 2229 } 2230 break; 2231 case ALF_Y: 2232 { 2233 for(Int n=0; n< alfLCUInfo.numSGU; n++) 2234 { 2235 ypos = (Int)(alfLCUInfo[n].posY); 2236 xpos = (Int)(alfLCUInfo[n].posX); 2237 height = (Int)(alfLCUInfo[n].height); 2238 width = (Int)(alfLCUInfo[n].width); 2239 2240 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2241 if(!notSkipLinesBelowVB) 2242 { 2243 endypos = ypos+ height -1; 2244 Int iLineVBPos = m_lcuHeight - 4; 2245 Int yEndLineInLCU = endypos % m_lcuHeight; 2246 height = (yEndLineInLCU >= iLineVBPos) ? (height - 4) : height ; 2247 } 2248 #endif 2249 2250 posOffset = (ypos * stride) + xpos; 2251 pelCmp = picCmp + posOffset; 2252 pelSrc = picSrc + posOffset; 2253 2254 #if HHI_INTERVIEW_SKIP 2255 if( picUsed ) 2256 { 2257 pelUsed = picUsed+ posOffset; 2258 } 2259 dist += xCalcSSD( pelSrc, pelCmp, pelUsed, width, height, stride ); 2260 #else 2261 dist += xCalcSSD( pelSrc, pelCmp, width, height, stride ); 2262 #endif 2263 } 2264 2265 } 2266 break; 2267 default: 2268 { 2269 printf("not a legal component ID for ALF \n"); 2270 assert(0); 2271 exit(-1); 2272 } 2273 } 2274 2275 return dist; 2276 } 2277 2278 /** Copy one ALF LCU region 2279 * \param [in] alfLCUInfo ALF LCU information 2280 * \param [out] picDst to-be-compared picture buffer 2281 * \param [in] picSrc source picture buffer 2282 * \param [in] stride buffer stride size for 1-D pictrue memory 2283 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0) 2284 */ 2285 Void TEncAdaptiveLoopFilter::copyOneAlfLCU(AlfLCUInfo& alfLCUInfo, Pel* picDst, Pel* picSrc, Int stride, Int formatShift) 2286 { 2287 Int posOffset, ypos, xpos, height, width; 2288 Pel* pelDst; 2289 Pel* pelSrc; 2290 2291 for(Int n=0; n< alfLCUInfo.numSGU; n++) 2292 { 2293 ypos = (Int)(alfLCUInfo[n].posY >> formatShift); 2294 xpos = (Int)(alfLCUInfo[n].posX >> formatShift); 2295 height = (Int)(alfLCUInfo[n].height >> formatShift); 2296 width = (Int)(alfLCUInfo[n].width >> formatShift); 2297 2298 posOffset = ( ypos * stride)+ xpos; 2299 pelDst = picDst + posOffset; 2300 pelSrc = picSrc + posOffset; 2301 2302 for(Int j=0; j< height; j++) 2303 { 2304 ::memcpy(pelDst, pelSrc, sizeof(Pel)*width); 2305 pelDst += stride; 2306 pelSrc += stride; 2307 } 2308 } 2309 2310 } 2311 2312 /** Reconstruct ALF LCU pixels 2313 * \param [in] compIdx luma/chroma component index 2314 * \param [in] alfLCUInfo ALF LCU information 2315 * \param [in] alfUnitParam ALF unit parameters 2316 * \param [in] picDec picture buffer for un-filtered picture 2317 * \param [out] picRest picture buffer for reconstructed picture 2318 * \param [in] stride buffer stride size for 1-D pictrue memory 2319 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0) 2320 */ 2321 Void TEncAdaptiveLoopFilter::reconstructOneAlfLCU(Int compIdx, AlfLCUInfo& alfLCUInfo, AlfUnitParam* alfUnitParam, Pel* picDec, Pel* picRest, Int stride, Int formatShift) 2322 { 2323 ALFParam* alfParam = alfUnitParam->alfFiltParam; 2324 Int ypos, xpos, height, width; 2325 2326 if( alfUnitParam->isEnabled) 2327 { 2328 assert(alfParam->alf_flag == 1); 2329 2330 //reconstruct ALF coefficients & related parameters 2331 reconstructCoefInfo(compIdx, alfParam, m_filterCoeffSym, m_varIndTab); 2332 2333 //filtering process 2334 for(Int n=0; n< alfLCUInfo.numSGU; n++) 2335 { 2336 ypos = (Int)(alfLCUInfo[n].posY >> formatShift); 2337 xpos = (Int)(alfLCUInfo[n].posX >> formatShift); 2338 height = (Int)(alfLCUInfo[n].height >> formatShift); 2339 width = (Int)(alfLCUInfo[n].width >> formatShift); 2340 2341 filterOneCompRegion(picRest, picDec, stride, (compIdx!=ALF_Y), ypos, ypos+height, xpos, xpos+width, m_filterCoeffSym, m_varIndTab, m_varImg); 2342 } 2343 } 2344 else 2345 { 2346 copyOneAlfLCU(alfLCUInfo, picRest, picDec, stride, formatShift); 2347 } 2348 } 2349 2350 /** LCU-based mode decision 2351 * \param [in, out] alfParamSet ALF parameter set 2352 * \param [in] compIdx luma/chroma component index 2353 * \param [in] pOrg picture buffer for original picture 2354 * \param [in] pDec picture buffer for un-filtered picture 2355 * \param [out] pRest picture buffer for reconstructed picture 2356 * \param [in] stride buffer stride size for 1-D pictrue memory 2357 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0) 2358 * \param [in] alfCorrLCUs correlations for LCUs 2359 */ 2360 #if HHI_INTERVIEW_SKIP 2361 Void TEncAdaptiveLoopFilter::executeLCUBasedModeDecision(AlfParamSet* alfParamSet 2362 ,Int compIdx, Pel* pOrg, Pel* pDec, Pel* pRest, Pel* pUsed, Int stride, Int formatShift 2363 ,AlfCorrData** alfCorrLCUs 2364 ) 2365 #else 2366 Void TEncAdaptiveLoopFilter::executeLCUBasedModeDecision(AlfParamSet* alfParamSet 2367 ,Int compIdx, Pel* pOrg, Pel* pDec, Pel* pRest, Int stride, Int formatShift 2368 ,AlfCorrData** alfCorrLCUs 2369 ) 2370 #endif 2371 { 2372 Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma); 2373 static Int* isProcessed = NULL; 2374 2375 AlfUnitParam* alfUnitPic = m_alfPicFiltUnits[compIdx]; 2376 2377 Int64 distEnc, distOff; 2378 Int rateEnc, rateOff; 2379 Double costEnc, costOff; 2380 Bool isLeftUnitAvailable, isUpUnitAvailable; 2381 2382 isProcessed = new Int[m_uiNumCUsInFrame]; 2383 ::memset(isProcessed, 0, sizeof(Int)*m_uiNumCUsInFrame); 2384 2385 #if LCUALF_FILTER_BUDGET_CONTROL_ENC 2386 Int numProcessedLCU = 0; 2387 m_alfFiltBudgetPerLcu = (Double)(m_iALFMaxNumberFilters) / (Double)(m_uiNumCUsInFrame); 2388 m_alfUsedFilterNum = 0; 2389 #endif 2390 2391 for(Int s=0; s<= m_lastSliceIdx; s++) 2392 { 2393 if(!m_pcPic->getValidSlice(s)) 2394 { 2395 continue; 2396 } 2397 Bool isAcrossSlice = (m_alfCoefInSlice)?(!m_isNonCrossSlice):(true); 2398 Int numLCUWidth = alfParamSet[s].numLCUInWidth; 2399 2400 AlfUnitParam* alfSliceUnitParams = alfParamSet[s].alfUnitParam[compIdx]; 2401 std::vector<ALFParam*> storedFilters; 2402 storedFilters.clear(); //reset stored filter buffer at the slice beginning 2403 2404 Int u =0; //counter for LCU index in slice 2405 Int countFiltOffLCU = 0; //counter for number of LCU with filter-off mode 2406 Int countNewFilts = 0; //counter for number of LCU with new filter inside slice 2407 2408 Int numTilesInSlice = (Int)m_pvpSliceTileAlfLCU[s].size(); 2409 for(Int t=0; t< numTilesInSlice; t++) 2410 { 2411 std::vector<AlfLCUInfo*> & vpAlfLCU = m_pvpSliceTileAlfLCU[s][t]; 2412 Pel* pSrc = pDec; 2413 2414 if(m_bUseNonCrossALF) 2415 { 2416 pSrc = getPicBuf(m_pcSliceYuvTmp, compIdx); 2417 copyRegion(vpAlfLCU, pSrc, pDec, stride, formatShift); 2418 extendRegionBorder(vpAlfLCU, pSrc, stride, formatShift); 2419 } 2420 2421 Int numLCUs = (Int)vpAlfLCU.size(); 2422 for(Int n=0; n< numLCUs; n++) 2423 { 2424 AlfLCUInfo* alfLCU = vpAlfLCU[n]; //ALF LCU information 2425 TComDataCU* pcCU = alfLCU->pcCU; 2426 Int addr = pcCU->getAddr(); //real LCU addr 2427 AlfUnitParam* alfUnitParam = &(alfSliceUnitParams[u]); 2428 2429 if(isProcessed[addr] == 0) 2430 { 2431 Int maxNumFilter = (Int)NO_VAR_BINS; 2432 2433 #if LCUALF_FILTER_BUDGET_CONTROL_ENC 2434 Bool isOutOfFilterBudget = true; 2435 Double usedFiltBudget = (numProcessedLCU == 0) ? 0.0 : (Double)m_alfUsedFilterNum / (Double)(numProcessedLCU); 2436 if ( (m_alfFiltBudgetPerLcu >= usedFiltBudget) && (m_alfUsedFilterNum < m_iALFMaxNumberFilters) ) 2437 { 2438 isOutOfFilterBudget = false; 2439 Int leftNumFilt = m_iALFMaxNumberFilters - m_alfUsedFilterNum; 2440 Int avgNumFilt = leftNumFilt / (m_uiNumCUsInFrame - numProcessedLCU) + 1 ; 2441 maxNumFilter = (leftNumFilt < avgNumFilt) ? leftNumFilt : avgNumFilt ; 2442 } 2443 #endif 2444 2445 AlfCorrData* alfCorr = alfCorrLCUs[addr]; //ALF LCU correlation 2446 alfUnitParam->alfFiltParam = alfUnitPic[addr].alfFiltParam; 2447 2448 //mode decision 2449 isLeftUnitAvailable = ( (addr % m_numLCUInPicWidth != 0) && (u != 0)); 2450 isUpUnitAvailable = (((Int)(addr/m_numLCUInPicWidth) > 0) && ( ( (u - numLCUWidth) >= 0) || isAcrossSlice )); 2451 2452 decideLCUALFUnitParam(compIdx, alfUnitPic, u, addr, numLCUWidth, alfSliceUnitParams, alfCorr, storedFilters, maxNumFilter, lambda, isLeftUnitAvailable, isUpUnitAvailable); 2453 reconstructOneAlfLCU(compIdx, *alfLCU, alfUnitParam, pSrc, pRest, stride, formatShift); 2454 #if HHI_INTERVIEW_SKIP 2455 distEnc = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pRest, pUsed, stride, formatShift); 2456 #else 2457 distEnc = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pRest, stride, formatShift); 2458 #endif 2459 rateEnc = calculateAlfUnitRateRDO(alfUnitParam, (Int)storedFilters.size()); 2460 costEnc = (Double)distEnc + lambda*((Double)rateEnc); 2461 costEnc += ((lambda* 1.5)*1.0); //RDCO 2462 2463 //v.s. filter off case 2464 AlfUnitParam alfUnitParamOff; 2465 getFiltOffAlfUnitParam(&alfUnitParamOff, addr, alfUnitPic, isLeftUnitAvailable, isUpUnitAvailable); 2466 #if HHI_INTERVIEW_SKIP 2467 distOff = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pSrc, pUsed, stride, formatShift); 2468 #else 2469 distOff = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pSrc, stride, formatShift); 2470 #endif 2471 rateOff = calculateAlfUnitRateRDO(&alfUnitParamOff, (Int)storedFilters.size()); 2472 costOff = (Double)distOff + lambda*((Double)rateOff); 2473 2474 #if LCUALF_FILTER_BUDGET_CONTROL_ENC 2475 if( (costOff < costEnc) || isOutOfFilterBudget) 2476 #else 2477 if( costOff < costEnc) 2478 #endif 2479 { 2480 //filter off. set alf_flag = 0, copy pDest to pRest 2481 *alfUnitParam = alfUnitParamOff; 2482 alfUnitParam->alfFiltParam->alf_flag = 0; 2483 copyOneAlfLCU(*alfLCU, pRest, pSrc, stride, formatShift); 2484 } 2485 2486 if(alfUnitParam->mergeType == ALF_MERGE_DISABLED) 2487 { 2488 if(alfUnitParam->isEnabled) 2489 { 2490 if(alfUnitParam->isNewFilt) 2491 { 2492 //update stored filter buffer 2493 storedFilters.push_back(alfUnitParam->alfFiltParam); 2494 assert(alfUnitParam->alfFiltParam->alf_flag == 1); 2495 } 2496 } 2497 } 2498 2499 alfUnitPic[addr] = *alfUnitParam; 2500 2501 isProcessed[addr] = 1; 2502 2503 #if LCUALF_FILTER_BUDGET_CONTROL_ENC 2504 numProcessedLCU++; 2505 if(alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt) 2506 { 2507 m_alfUsedFilterNum += alfUnitParam->alfFiltParam->filters_per_group; 2508 } 2509 #endif 2510 } 2511 else 2512 { 2513 //keep the ALF parameters in LCU are the same 2514 *alfUnitParam = alfUnitPic[addr]; 2515 reconstructOneAlfLCU(compIdx, *alfLCU, alfUnitParam, pSrc, pRest, stride, formatShift); 2516 2517 #if LCUALF_FILTER_BUDGET_CONTROL_ENC 2518 if(alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt) 2519 { 2520 m_alfUsedFilterNum += alfUnitParam->alfFiltParam->filters_per_group; 2521 } 2522 #endif 2523 } 2524 2525 if(alfUnitParam->alfFiltParam->alf_flag == 0) 2526 { 2527 countFiltOffLCU++; 2528 } 2529 else 2530 { 2531 Bool isNewFiltInSlice = (alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt); 2532 Bool isMergeAcrossSlice = ( alfUnitParam->mergeType == ALF_MERGE_UP && (u-numLCUWidth < 0) ); 2533 2534 if( isNewFiltInSlice || isMergeAcrossSlice ) 2535 { 2536 countNewFilts++; 2537 } 2538 } 2539 2540 u++; 2541 2542 } //LCU 2543 } //tile 2544 2545 2546 //slice-level parameters 2547 AlfUnitParam* firstAlfUnitInSlice = &(alfSliceUnitParams[0]); 2548 if( countFiltOffLCU == u ) //number of filter-off LCU is equal to the number of LCUs in slice 2549 { 2550 alfParamSet[s].isEnabled [compIdx] = false; 2551 alfParamSet[s].isUniParam[compIdx] = true; //uni-param, all off 2552 assert(firstAlfUnitInSlice->alfFiltParam->alf_flag == 0); 2553 } 2554 else 2555 { 2556 alfParamSet[s].isEnabled[compIdx] = true; 2557 if( countNewFilts == 1 && firstAlfUnitInSlice->alfFiltParam->alf_flag != 0 && countFiltOffLCU == 0 ) 2558 { 2559 alfParamSet[s].isUniParam[compIdx] = true; 2560 } 2561 else 2562 { 2563 alfParamSet[s].isUniParam[compIdx] = false; 2564 } 2565 } 2566 } //slice 2567 2568 2569 delete[] isProcessed; 2570 isProcessed = NULL; 2571 } 2572 2573 2574 /** Decide ALF parameter set for luma/chroma components (top function) 2575 * \param [in] pPicOrg picture buffer for original picture 2576 * \param [in] pPicDec picture buffer for un-filtered picture 2577 * \param [out] pPicRest picture buffer for reconstructed picture 2578 * \param [in, out] alfParamSet ALF parameter set 2579 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters 2580 */ 2581 #if HHI_INTERVIEW_SKIP 2582 Void TEncAdaptiveLoopFilter::decideParameters(TComPicYuv* pPicOrg, TComPicYuv* pPicDec, TComPicYuv* pPicRest, TComPicYuv* pUsedPelMap 2583 , AlfParamSet* alfParamSet 2584 , std::vector<AlfCUCtrlInfo>* alfCtrlParam) 2585 #else 2586 Void TEncAdaptiveLoopFilter::decideParameters(TComPicYuv* pPicOrg, TComPicYuv* pPicDec, TComPicYuv* pPicRest 2587 , AlfParamSet* alfParamSet 2588 , std::vector<AlfCUCtrlInfo>* alfCtrlParam) 2589 #endif 2590 { 2591 static Int lumaStride = pPicOrg->getStride(); 2592 static Int chromaStride = pPicOrg->getCStride(); 2593 2594 Pel *pOrg, *pDec, *pRest; 2595 Int stride, formatShift; 2596 #if HHI_INTERVIEW_SKIP 2597 Pel *pUsed = NULL ; 2598 #endif 2599 2600 for(Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++) 2601 { 2602 pOrg = getPicBuf(pPicOrg, compIdx); 2603 pDec = getPicBuf(pPicDec, compIdx); 2604 pRest = getPicBuf(pPicRest, compIdx); 2605 #if HHI_INTERVIEW_SKIP 2606 if( pUsedPelMap ) 2607 { 2608 pUsed = getPicBuf(pUsedPelMap, compIdx); 2609 } 2610 #endif 2611 stride = (compIdx == ALF_Y)?(lumaStride):(chromaStride); 2612 formatShift = (compIdx == ALF_Y)?(0):(1); 2613 2614 AlfCorrData** alfCorrComp = m_alfCorr[compIdx]; 2615 2616 if(!m_picBasedALFEncode) //lcu-based optimization 2617 { 2618 #if HHI_INTERVIEW_SKIP 2619 executeLCUBasedModeDecision(alfParamSet, compIdx, pOrg, pDec, pRest, pUsed, stride, formatShift, alfCorrComp); 2620 #else 2621 executeLCUBasedModeDecision(alfParamSet, compIdx, pOrg, pDec, pRest, stride, formatShift, alfCorrComp); 2622 #endif 2623 } 2624 else //picture-based optimization 2625 { 2626 AlfPicQTPart* alfPicQTPart = m_alfPQTPart[compIdx]; 2627 #if HHI_INTERVIEW_SKIP 2628 executePicBasedModeDecision(alfParamSet, alfPicQTPart, compIdx, pOrg, pDec, pRest, pUsed, stride, formatShift, alfCorrComp); 2629 #else 2630 executePicBasedModeDecision(alfParamSet, alfPicQTPart, compIdx, pOrg, pDec, pRest, stride, formatShift, alfCorrComp); 2631 #endif 2632 } 2633 2634 } //component 2635 2636 } 2637 2638 /** Gather correlations for all LCUs in picture 2639 * \param [in] pPicOrg picture buffer for original picture 2640 * \param [in] pPicDec picture buffer for un-filtered picture 2641 */ 2642 Void TEncAdaptiveLoopFilter::getStatistics(TComPicYuv* pPicOrg, TComPicYuv* pPicDec) 2643 { 2644 Int lumaStride = pPicOrg->getStride(); 2645 Int chromaStride = pPicOrg->getCStride(); 2646 const Int chromaFormatShift = 1; 2647 2648 //calculate BA index 2649 calcOneRegionVar(m_varImg, getPicBuf(pPicDec, ALF_Y), lumaStride, false, 0, m_img_height, 0, m_img_width); 2650 for(Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++) 2651 { 2652 AlfCorrData** alfCorrComp = m_alfCorr[compIdx]; 2653 Int formatShift = (compIdx == ALF_Y)?(0):(chromaFormatShift); 2654 Int stride = (compIdx == ALF_Y)?(lumaStride):(chromaStride); 2655 2656 getOneCompStatistics(alfCorrComp, compIdx, getPicBuf(pPicOrg, compIdx), getPicBuf(pPicDec, compIdx), stride, formatShift, false); 2657 } 2658 } 2659 2660 /** Gather correlations for all LCUs of one luma/chroma component in picture 2661 * \param [out] alfCorrComp correlations for LCUs 2662 * \param [in] compIdx luma/chroma component index 2663 * \param [in] imgOrg picture buffer for original picture 2664 * \param [in] imgDec picture buffer for un-filtered picture 2665 * \param [in] stride buffer stride size for 1-D pictrue memory 2666 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0) 2667 * \param [in] isRedesignPhase at re-design filter stage (true) or not (false) 2668 */ 2669 Void TEncAdaptiveLoopFilter::getOneCompStatistics(AlfCorrData** alfCorrComp, Int compIdx, Pel* imgOrg, Pel* imgDec, Int stride, Int formatShift, Bool isRedesignPhase) 2670 { 2671 2672 // initialize to zero 2673 for(Int n=0; n< m_uiNumCUsInFrame; n++) 2674 { 2675 alfCorrComp[n]->reset(); 2676 } 2677 2678 for(Int s=0; s<= m_lastSliceIdx; s++) 2679 { 2680 if(!m_pcPic->getValidSlice(s)) 2681 { 2682 continue; 2683 } 2684 Int numTilesInSlice = (Int)m_pvpSliceTileAlfLCU[s].size(); 2685 for(Int t=0; t< numTilesInSlice; t++) 2686 { 2687 std::vector<AlfLCUInfo*> & vpAlfLCU = m_pvpSliceTileAlfLCU[s][t]; 2688 Pel* pSrc = imgDec; 2689 2690 if(m_bUseNonCrossALF) 2691 { 2692 pSrc = getPicBuf(m_pcSliceYuvTmp, compIdx); 2693 copyRegion(vpAlfLCU, pSrc, imgDec, stride, formatShift); 2694 extendRegionBorder(vpAlfLCU, pSrc, stride, formatShift); 2695 } 2696 2697 Int numLCUs = (Int)vpAlfLCU.size(); 2698 for(Int n=0; n< numLCUs; n++) 2699 { 2700 AlfLCUInfo* alfLCU = vpAlfLCU[n]; 2701 Int addr = alfLCU->pcCU->getAddr(); 2702 getStatisticsOneLCU(!m_picBasedALFEncode, compIdx, alfLCU, alfCorrComp[addr], imgOrg, pSrc, stride, formatShift, isRedesignPhase); 2703 } //LCU 2704 } //tile 2705 } //slice 2706 2707 } 2708 2709 /** Gather correlations for one LCU 2710 * \param [out] alfCorrComp correlations for LCUs 2711 * \param [in] compIdx luma/chroma component index 2712 * \param [in] imgOrg picture buffer for original picture 2713 * \param [in] imgDec picture buffer for un-filtered picture 2714 * \param [in] stride buffer stride size for 1-D pictrue memory 2715 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0) 2716 * \param [in] isRedesignPhase at re-design filter stage (true) or not (false) 2717 */ 2718 Void TEncAdaptiveLoopFilter::getStatisticsOneLCU(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo* alfLCU, AlfCorrData* alfCorr, Pel* pPicOrg, Pel* pPicSrc, Int stride, Int formatShift, Bool isRedesignPhase) 2719 { 2720 Int numBlocks = alfLCU->numSGU; 2721 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2722 Int lcuAddr = alfLCU->pcCU->getAddr(); 2723 Bool notSkipLinesBelowVB = true; 2724 Int endypos; 2725 #endif 2726 Bool isLastBlock; 2727 Int ypos, xpos, height, width; 2728 2729 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2730 if(skipLCUBottomLines) 2731 { 2732 if(lcuAddr + m_numLCUInPicWidth < m_uiNumCUsInFrame) 2733 { 2734 notSkipLinesBelowVB = false; 2735 } 2736 } 2737 #endif 2738 2739 switch(compIdx) 2740 { 2741 case ALF_Cb: 2742 case ALF_Cr: 2743 { 2744 for(Int n=0; n< numBlocks; n++) 2745 { 2746 isLastBlock = (n== numBlocks-1); 2747 NDBFBlockInfo& AlfSGU = (*alfLCU)[n]; 2748 2749 ypos = (Int)(AlfSGU.posY >> formatShift); 2750 xpos = (Int)(AlfSGU.posX >> formatShift); 2751 height = (Int)(AlfSGU.height>> formatShift); 2752 width = (Int)(AlfSGU.width >> formatShift); 2753 2754 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2755 if(!notSkipLinesBelowVB ) 2756 { 2757 endypos = ypos+ height -1; 2758 Int iLineVBPos = m_lcuHeightChroma - 2; 2759 Int yEndLineInLCU = endypos % m_lcuHeightChroma; 2760 height = (yEndLineInLCU >= iLineVBPos) ? (height - 2) : height ; 2761 } 2762 #endif 2763 2764 #if ALF_SINGLE_FILTER_SHAPE 2765 calcCorrOneCompRegionChma(pPicOrg, pPicSrc, stride, ypos, xpos, height, width, alfCorr->ECorr[0], alfCorr->yCorr[0], isLastBlock); 2766 #endif 2767 } 2768 } 2769 break; 2770 case ALF_Y: 2771 { 2772 Bool forceCollection = true; 2773 2774 if(isRedesignPhase) 2775 { 2776 Int numValidPels = 0; 2777 for(Int n=0; n< numBlocks; n++) 2778 { 2779 NDBFBlockInfo& AlfSGU = (*alfLCU)[n]; 2780 2781 ypos = (Int)(AlfSGU.posY ); 2782 xpos = (Int)(AlfSGU.posX ); 2783 height = (Int)(AlfSGU.height); 2784 width = (Int)(AlfSGU.width ); 2785 2786 for (Int y = ypos; y < ypos+ height; y++) 2787 { 2788 for (Int x = xpos; x < xpos + width; x++) 2789 { 2790 if (m_maskImg[y][x] == 1) 2791 { 2792 numValidPels++; 2793 } 2794 } 2795 } 2796 } 2797 2798 if(numValidPels > 0) 2799 { 2800 forceCollection = false; 2801 } 2802 } 2803 2804 for(Int n=0; n< numBlocks; n++) 2805 { 2806 isLastBlock = (n== numBlocks-1); 2807 NDBFBlockInfo& AlfSGU = (*alfLCU)[n]; 2808 2809 ypos = (Int)(AlfSGU.posY ); 2810 xpos = (Int)(AlfSGU.posX ); 2811 height = (Int)(AlfSGU.height); 2812 width = (Int)(AlfSGU.width ); 2813 2814 #if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER 2815 endypos = ypos+ height -1; 2816 if(!notSkipLinesBelowVB) 2817 { 2818 Int iLineVBPos = m_lcuHeight - 4; 2819 Int yEndLineInLCU = endypos % m_lcuHeight; 2820 height = (yEndLineInLCU >= iLineVBPos) ? (height - 4) : height ; 2821 } 2822 #endif 2823 2824 #if ALF_SINGLE_FILTER_SHAPE 2825 calcCorrOneCompRegionLuma(pPicOrg, pPicSrc, stride, ypos, xpos, height, width, alfCorr->ECorr, alfCorr->yCorr, alfCorr->pixAcc, forceCollection, isLastBlock); 2826 #endif 2827 } 2828 } 2829 break; 2830 default: 2831 { 2832 printf("Not a legal component index for ALF\n"); 2833 assert(0); 2834 exit(-1); 2835 } 2836 } 2837 } 2838 2839 2840 #if ALF_SINGLE_FILTER_SHAPE 2841 /** Gather correlations for one region for chroma component 2842 * \param [in] imgOrg picture buffer for original picture 2843 * \param [in] imgPad picture buffer for un-filtered picture 2844 * \param [in] stride buffer stride size for 1-D pictrue memory 2845 * \param [in] yPos region starting y position 2846 * \param [in] xPos region starting x position 2847 * \param [in] height region height 2848 * \param [in] width region width 2849 * \param [out] eCorr auto-correlation matrix 2850 * \param [out] yCorr cross-correlation array 2851 * \param [in] isSymmCopyBlockMatrix symmetrically copy correlation values in eCorr (true) or not (false) 2852 */ 2853 Void TEncAdaptiveLoopFilter::calcCorrOneCompRegionChma(Pel* imgOrg, Pel* imgPad, Int stride 2854 , Int yPos, Int xPos, Int height, Int width 2855 , Double **eCorr, Double *yCorr, Bool isSymmCopyBlockMatrix 2856 ) 2857 { 2858 Int yPosEnd = yPos + height; 2859 Int xPosEnd = xPos + width; 2860 Int N = ALF_MAX_NUM_COEF; //m_sqrFiltLengthTab[0]; 2861 2862 Int imgHeightChroma = m_img_height>>1; 2863 2864 Int yLineInLCU, paddingLine; 2865 Int ELocal[ALF_MAX_NUM_COEF]; 2866 Pel *imgPad1, *imgPad2, *imgPad3, *imgPad4, *imgPad5, *imgPad6; 2867 Int i, j, k, l, yLocal; 2868 2869 imgPad += (yPos*stride); 2870 imgOrg += (yPos*stride); 2871 2872 for (i= yPos; i< yPosEnd; i++) 2873 { 2874 yLineInLCU = i % m_lcuHeightChroma; 2875 2876 if (yLineInLCU==0 && i>0) 2877 { 2878 paddingLine = yLineInLCU + 2 ; 2879 imgPad1 = imgPad + stride; 2880 imgPad2 = imgPad - stride; 2881 imgPad3 = imgPad + 2*stride; 2882 imgPad4 = imgPad - 2*stride; 2883 imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride; 2884 imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;; 2885 } 2886 else if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma ) 2887 { 2888 imgPad1 = imgPad + stride; 2889 imgPad2 = imgPad - stride; 2890 imgPad3 = imgPad + 2*stride; 2891 imgPad4 = imgPad - 2*stride; 2892 imgPad5 = imgPad + 3*stride; 2893 imgPad6 = imgPad - 3*stride; 2894 } 2895 else if (yLineInLCU < m_lineIdxPadTopChroma) 2896 { 2897 paddingLine = - yLineInLCU + m_lineIdxPadTopChroma - 1; 2898 imgPad1 = (paddingLine < 1) ? imgPad : imgPad + min(paddingLine, 1)*stride; 2899 imgPad2 = (paddingLine < 1) ? imgPad : imgPad - stride; 2900 imgPad3 = (paddingLine < 2) ? imgPad : imgPad + min(paddingLine, 2)*stride; 2901 imgPad4 = (paddingLine < 2) ? imgPad : imgPad - 2*stride; 2902 imgPad5 = (paddingLine < 3) ? imgPad : imgPad + min(paddingLine, 3)*stride; 2903 imgPad6 = (paddingLine < 3) ? imgPad : imgPad - 3*stride; 2904 } 2905 else 2906 { 2907 paddingLine = yLineInLCU - m_lineIdxPadTopChroma ; 2908 imgPad1 = (paddingLine < 1) ? imgPad : imgPad + stride; 2909 imgPad2 = (paddingLine < 1) ? imgPad : imgPad - min(paddingLine, 1)*stride; 2910 imgPad3 = (paddingLine < 2) ? imgPad : imgPad + 2*stride; 2911 imgPad4 = (paddingLine < 2) ? imgPad : imgPad - min(paddingLine, 2)*stride; 2912 imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride; 2913 imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride; 2914 } 2915 2916 for (j= xPos; j< xPosEnd; j++) 2917 { 2918 memset(ELocal, 0, N*sizeof(Int)); 2919 2920 ELocal[0] = (imgPad5[j] + imgPad6[j]); 2921 2922 ELocal[1] = (imgPad3[j] + imgPad4[j]); 2923 2924 ELocal[2] = (imgPad1[j-1] + imgPad2[j+1]); 2925 ELocal[3] = (imgPad1[j ] + imgPad2[j ]); 2926 ELocal[4] = (imgPad1[j+1] + imgPad2[j-1]); 2927 2928 ELocal[5] = (imgPad[j+4] + imgPad[j-4]); 2929 ELocal[6] = (imgPad[j+3] + imgPad[j-3]); 2930 ELocal[7] = (imgPad[j+2] + imgPad[j-2]); 2931 ELocal[8] = (imgPad[j+1] + imgPad[j-1]); 2932 ELocal[9] = (imgPad[j ]); 2933 2934 yLocal= (Int)imgOrg[j]; 2935 2936 for(k=0; k<N; k++) 2937 { 2938 eCorr[k][k] += ELocal[k]*ELocal[k]; 2939 for(l=k+1; l<N; l++) 2940 { 2941 eCorr[k][l] += ELocal[k]*ELocal[l]; 2942 } 2943 2944 yCorr[k] += yLocal*ELocal[k]; 2945 } 2946 } 2947 2948 imgPad+= stride; 2949 imgOrg+= stride; 2950 } 2951 2952 if(isSymmCopyBlockMatrix) 2953 { 2954 for(j=0; j<N-1; j++) 2955 { 2956 for(i=j+1; i<N; i++) 2957 { 2958 eCorr[i][j] = eCorr[j][i]; 2959 } 2960 } 2961 } 2962 } 2963 2964 /** Gather correlations for one region for luma component 2965 * \param [in] imgOrg picture buffer for original picture 2966 * \param [in] imgPad picture buffer for un-filtered picture 2967 * \param [in] stride buffer stride size for 1-D pictrue memory 2968 * \param [in] yPos region starting y position 2969 * \param [in] xPos region starting x position 2970 * \param [in] height region height 2971 * \param [in] width region width 2972 * \param [out] eCorr auto-correlation matrix 2973 * \param [out] yCorr cross-correlation array 2974 * \param [out] pixAcc pixel squared value 2975 * \param [in] isforceCollection all pixel are used for correlation calculation (true) or not (false) 2976 * \param [in] isSymmCopyBlockMatrix symmetrically copy correlation values in eCorr (true) or not (false) 2977 */ 2978 Void TEncAdaptiveLoopFilter::calcCorrOneCompRegionLuma(Pel* imgOrg, Pel* imgPad, Int stride 2979 ,Int yPos, Int xPos, Int height, Int width 2980 ,Double ***eCorr, Double **yCorr, Double *pixAcc 2981 ,Bool isforceCollection, Bool isSymmCopyBlockMatrix 2982 ) 2983 { 2984 Int yPosEnd = yPos + height; 2985 Int xPosEnd = xPos + width; 2986 Int yLineInLCU; 2987 Int paddingLine ; 2988 Int N = ALF_MAX_NUM_COEF; //m_sqrFiltLengthTab[0]; 2989 2990 Int ELocal[ALF_MAX_NUM_COEF]; 2991 Pel *imgPad1, *imgPad2, *imgPad3, *imgPad4, *imgPad5, *imgPad6; 2992 Int i, j, k, l, yLocal, varInd; 2993 Double **E; 2994 Double *yy; 2995 2996 imgPad += (yPos*stride); 2997 imgOrg += (yPos*stride); 2998 2999 for (i= yPos; i< yPosEnd; i++) 3000 { 3001 yLineInLCU = i % m_lcuHeight; 3002 3003 if (yLineInLCU<m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height) 3004 { 3005 imgPad1 = imgPad + stride; 3006 imgPad2 = imgPad - stride; 3007 imgPad3 = imgPad + 2*stride; 3008 imgPad4 = imgPad - 2*stride; 3009 imgPad5 = imgPad + 3*stride; 3010 imgPad6 = imgPad - 3*stride; 3011 } 3012 else if (yLineInLCU<m_lineIdxPadTop) 3013 { 3014 paddingLine = - yLineInLCU + m_lineIdxPadTop - 1; 3015 imgPad1 = (paddingLine < 1) ? imgPad : imgPad + min(paddingLine, 1)*stride; 3016 imgPad2 = (paddingLine < 1) ? imgPad : imgPad - stride; 3017 imgPad3 = (paddingLine < 2) ? imgPad : imgPad + min(paddingLine, 2)*stride; 3018 imgPad4 = (paddingLine < 2) ? imgPad : imgPad - 2*stride; 3019 imgPad5 = (paddingLine < 3) ? imgPad : imgPad + min(paddingLine, 3)*stride; 3020 imgPad6 = (paddingLine < 3) ? imgPad : imgPad - 3*stride; 3021 } 3022 else 3023 { 3024 paddingLine = yLineInLCU - m_lineIdxPadTop; 3025 imgPad1 = (paddingLine < 1) ? imgPad : imgPad + stride; 3026 imgPad2 = (paddingLine < 1) ? imgPad : imgPad - min(paddingLine, 1)*stride; 3027 imgPad3 = (paddingLine < 2) ? imgPad : imgPad + 2*stride; 3028 imgPad4 = (paddingLine < 2) ? imgPad : imgPad - min(paddingLine, 2)*stride; 3029 imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride; 3030 imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride; 3031 } 3032 3033 for (j= xPos; j< xPosEnd; j++) 3034 { 3035 if ( m_maskImg[i][j] || isforceCollection ) 3036 { 3037 varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W]; 3038 memset(ELocal, 0, N*sizeof(Int)); 3039 3040 ELocal[0] = (imgPad5[j] + imgPad6[j]); 3041 ELocal[1] = (imgPad3[j] + imgPad4[j]); 3042 3043 ELocal[2] = (imgPad1[j-1] + imgPad2[j+1]); 3044 ELocal[3] = (imgPad1[j ] + imgPad2[j ]); 3045 ELocal[4] = (imgPad1[j+1] + imgPad2[j-1]); 3046 3047 ELocal[5] = (imgPad[j+4] + imgPad[j-4]); 3048 ELocal[6] = (imgPad[j+3] + imgPad[j-3]); 3049 ELocal[7] = (imgPad[j+2] + imgPad[j-2]); 3050 ELocal[8] = (imgPad[j+1] + imgPad[j-1]); 3051 ELocal[9] = (imgPad[j ]); 3052 3053 yLocal= imgOrg[j]; 3054 pixAcc[varInd] += (yLocal*yLocal); 3055 E = eCorr[varInd]; 3056 yy = yCorr[varInd]; 3057 3058 for (k=0; k<N; k++) 3059 { 3060 for (l=k; l<N; l++) 3061 { 3062 E[k][l]+=(double)(ELocal[k]*ELocal[l]); 3063 } 3064 yy[k]+=(double)(ELocal[k]*yLocal); 3065 } 3066 } 3067 } 3068 imgPad += stride; 3069 imgOrg += stride; 3070 } 3071 3072 if(isSymmCopyBlockMatrix) 3073 { 3074 for (varInd=0; varInd<NO_VAR_BINS; varInd++) 3075 { 3076 E = eCorr[varInd]; 3077 for (k=1; k<N; k++) 3078 { 3079 for (l=0; l<k; l++) 3080 { 3081 E[k][l] = E[l][k]; 3082 } 3083 } 3084 } 3085 } 3086 3087 } 3088 #endif 3089 3090 #else 3091 3092 3093 #if ALF_CHROMA_LAMBDA 3094 /** 3095 \param pcAlfParam ALF parameter 3096 \param [out] pvAlfCtrlParam ALF CU control parameters container for slices 3097 \param dLambdaLuma luma lambda value for RD cost computation 3098 \param dLambdaChroma chroma lambda value for RD cost computation 3099 \retval ruiDist distortion 3100 \retval ruiBits required bits 3101 \retval ruiMaxAlfCtrlDepth optimal partition depth 3102 */ 3103 #if HHI_INTERVIEW_SKIP 3104 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambdaLuma, Double dLambdaChroma, UInt64& ruiDist, UInt64& ruiBits, Bool bInterviewSkip) 3105 #else 3106 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambdaLuma, Double dLambdaChroma, UInt64& ruiDist, UInt64& ruiBits) 3107 3108 #endif 3109 #else 353 3110 /** 354 3111 \param pcAlfParam ALF parameter … … 358 3115 \retval ruiMaxAlfCtrlDepth optimal partition depth 359 3116 */ 360 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, UInt& ruiMaxAlfCtrlDepth ) 361 { 362 Int tap, num_coef; 363 364 // set global variables 365 tap = ALF_MAX_NUM_TAP; 366 #if TI_ALF_MAX_VSIZE_7 367 Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap); 368 num_coef = (tap * tapV + 1) >> 1; 369 #else 370 num_coef = (tap*tap+1)>>1; 371 #endif 372 num_coef = num_coef + 1; // DC offset 3117 #if HHI_INTERVIEW_SKIP 3118 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, Bool bInterviewSkip) 3119 #else 3120 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits) 3121 3122 #endif 3123 #endif 3124 { 373 3125 374 3126 // set lambda 3127 #if ALF_CHROMA_LAMBDA 3128 m_dLambdaLuma = dLambdaLuma; 3129 m_dLambdaChroma = dLambdaChroma; 3130 #else 375 3131 m_dLambdaLuma = dLambda; 376 3132 m_dLambdaChroma = dLambda; 377 3133 #endif 3134 3135 m_lcuHeight = m_pcPic->getSlice(0)->getSPS()->getMaxCUHeight(); 3136 3137 #if ALF_SINGLE_FILTER_SHAPE 3138 m_lineIdxPadBot = m_lcuHeight - 4 - 3; // DFRegion, Vertical Taps 3139 #else 3140 m_lineIdxPadBot = m_lcuHeight - 4 - 4; // DFRegion, Vertical Taps 3141 #endif 3142 m_lineIdxPadTop = m_lcuHeight - 4; // DFRegion 3143 3144 m_lcuHeightChroma = m_lcuHeight>>1; 3145 #if ALF_SINGLE_FILTER_SHAPE 3146 m_lineIdxPadBotChroma = m_lcuHeightChroma - 2 - 3; // DFRegion, Vertical Taps 3147 #else 3148 m_lineIdxPadBotChroma = m_lcuHeightChroma - 2 - 4; // DFRegion, Vertical Taps 3149 #endif 3150 m_lineIdxPadTopChroma = m_lcuHeightChroma - 2 ; // DFRegion 3151 378 3152 TComPicYuv* pcPicOrg = m_pcPic->getPicYuvOrg(); 379 3153 … … 381 3155 TComPicYuv* pcPicYuvRec = m_pcPic->getPicYuvRec(); 382 3156 TComPicYuv* pcPicYuvExtRec = m_pcTempPicYuv; 3157 #if HHI_INTERVIEW_SKIP 3158 TComPicYuv* pcUsedPelMap = m_pcPic->getUsedPelsMap() ; 3159 if(bInterviewSkip) 3160 assert( pcUsedPelMap ) ; 3161 #endif 383 3162 384 3163 pcPicYuvRec->copyToPic(pcPicYuvExtRec); 385 #if MTK_NONCROSS_INLOOP_FILTER386 3164 if(!m_bUseNonCrossALF) 387 3165 { 388 #endif389 3166 pcPicYuvExtRec->setBorderExtension( false ); 390 3167 pcPicYuvExtRec->extendPicBorder (); 391 #if MTK_NONCROSS_INLOOP_FILTER 392 } 393 #endif 3168 } 394 3169 395 3170 // set min cost … … 403 3178 404 3179 // calc original cost 3180 #if HHI_INTERVIEW_SKIP 3181 xCalcRDCost( pcPicOrg, pcPicYuvRec, pcUsedPelMap, NULL, uiOrigRate, uiOrigDist, dOrigCost ); 3182 #else 405 3183 xCalcRDCost( pcPicOrg, pcPicYuvRec, NULL, uiOrigRate, uiOrigDist, dOrigCost ); 3184 #endif 406 3185 m_pcBestAlfParam->alf_flag = 0; 407 m_pcBestAlfParam->cu_control_flag = 0;408 409 3186 // initialize temp_alfps 410 3187 m_pcTempAlfParam->alf_flag = 1; 411 m_pcTempAlfParam->tap = tap;412 #if TI_ALF_MAX_VSIZE_7413 m_pcTempAlfParam->tapV = tapV;414 #endif415 m_pcTempAlfParam->num_coeff = num_coef;416 3188 m_pcTempAlfParam->chroma_idc = 0; 417 m_pcTempAlfParam->cu_control_flag = 0; 418 419 #if MQT_ALF_NPASS 3189 3190 m_bAlfCUCtrlEnabled = (pvAlfCtrlParam != NULL)?true:false; 3191 if(m_bAlfCUCtrlEnabled) 3192 { 3193 m_vBestAlfCUCtrlParam.resize(m_uiNumSlicesInPic); 3194 for(Int s=0; s< m_uiNumSlicesInPic; s++) 3195 { 3196 m_vBestAlfCUCtrlParam[s].cu_control_flag = 0; 3197 } 3198 } 3199 else 3200 { 3201 m_vBestAlfCUCtrlParam.clear(); 3202 } 3203 420 3204 setALFEncodingParam(m_pcPic); 421 #endif422 3205 423 3206 // adaptive in-loop wiener filtering 424 xEncALFLuma_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost ); 425 3207 #if HHI_INTERVIEW_SKIP 3208 xEncALFLuma( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost, bInterviewSkip ); 3209 #else 3210 xEncALFLuma( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost ); 3211 #endif 3212 426 3213 // cu-based filter on/off control 3214 #if HHI_INTERVIEW_SKIP 3215 xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, pcUsedPelMap, uiMinRate, uiMinDist, dMinCost ); 3216 #else 427 3217 xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost ); 428 429 // adaptive tap-length 430 xFilterTapDecision_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost ); 431 432 // compute RD cost 433 xCalcRDCost( pcPicOrg, pcPicYuvRec, m_pcBestAlfParam, uiMinRate, uiMinDist, dMinCost ); 3218 #endif 434 3219 435 3220 // compare RD cost to non-ALF case … … 444 3229 { 445 3230 m_pcBestAlfParam->alf_flag = 0; 446 m_pcBestAlfParam->cu_control_flag = 0; 447 3231 448 3232 uiMinRate = uiOrigRate; 449 3233 uiMinDist = uiOrigDist; 450 dMinCost = dMinCost;451 3234 452 3235 m_pcEntropyCoder->setAlfCtrl(false); 3236 if(m_bAlfCUCtrlEnabled) 3237 { 3238 for(Int s=0; s< m_uiNumSlicesInPic; s++) 3239 { 3240 m_vBestAlfCUCtrlParam[s].cu_control_flag = 0; 3241 } 3242 } 453 3243 pcPicYuvExtRec->copyToPicLuma(pcPicYuvRec); 454 3244 … … 456 3246 ruiDist = uiOrigDist; 457 3247 } 458 459 3248 // if ALF works 460 3249 if( m_pcBestAlfParam->alf_flag ) 461 3250 { 462 // predict ALF coefficients463 predictALFCoeff( m_pcBestAlfParam );464 465 3251 // do additional ALF process for chroma 466 x EncALFChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits );3252 xFilterTapDecisionChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits ); 467 3253 } 468 3254 … … 470 3256 copyALFParam(pcAlfParam, m_pcBestAlfParam); 471 3257 472 // store best depth 473 ruiMaxAlfCtrlDepth = m_pcEntropyCoder->getMaxAlfCtrlDepth(); 3258 if(m_bAlfCUCtrlEnabled) 3259 { 3260 for(Int s=0; s< m_uiNumSlicesInPic; s++) 3261 { 3262 (*pvAlfCtrlParam)[s]= m_vBestAlfCUCtrlParam[s]; 3263 } 3264 } 3265 } 3266 #endif 3267 3268 /** PCM LF disable process. 3269 * \param pcPic picture (TComPic) pointer 3270 * \returns Void 3271 * 3272 * \note Replace filtered sample values of PCM mode blocks with the transmitted and reconstructed ones. 3273 */ 3274 Void TEncAdaptiveLoopFilter::PCMLFDisableProcess (TComPic* pcPic) 3275 { 3276 xPCMRestoration(pcPic); 474 3277 } 475 3278 … … 478 3281 // ==================================================================================================================== 479 3282 480 Void TEncAdaptiveLoopFilter::xEncALFChroma( UInt64 uiLumaRate, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, UInt64& ruiBits )481 {482 // restriction for non-referenced B-slice483 if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0)484 {485 return;486 }487 488 Int tap, num_coef;489 490 // set global variables491 tap = ALF_MAX_NUM_TAP_C;492 num_coef = (tap*tap+1)>>1;493 num_coef = num_coef + 1; // DC offset494 495 // set min cost496 UInt64 uiMinRate = uiLumaRate;497 UInt64 uiMinDist = MAX_INT;498 Double dMinCost = MAX_DOUBLE;499 500 // calc original cost501 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);502 xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiMinRate, uiMinDist, dMinCost);503 504 // initialize temp_alfps505 m_pcTempAlfParam->chroma_idc = 3;506 m_pcTempAlfParam->tap_chroma = tap;507 m_pcTempAlfParam->num_coeff_chroma = num_coef;508 509 // Adaptive in-loop wiener filtering for chroma510 xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);511 512 // filter on/off decision for chroma513 Int iCWidth = (pcPicOrg->getWidth()>>1);514 Int iCHeight = (pcPicOrg->getHeight()>>1);515 Int iCStride = pcPicOrg->getCStride();516 UInt64 uiFiltDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicRest->getCbAddr(), iCWidth, iCHeight, iCStride);517 UInt64 uiFiltDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicRest->getCrAddr(), iCWidth, iCHeight, iCStride);518 UInt64 uiOrgDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicDec->getCbAddr(), iCWidth, iCHeight, iCStride);519 UInt64 uiOrgDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicDec->getCrAddr(), iCWidth, iCHeight, iCStride);520 521 m_pcTempAlfParam->chroma_idc = 0;522 if(uiOrgDistCb > uiFiltDistCb)523 m_pcTempAlfParam->chroma_idc += 2;524 if(uiOrgDistCr > uiFiltDistCr )525 m_pcTempAlfParam->chroma_idc += 1;526 527 if(m_pcTempAlfParam->chroma_idc)528 {529 if(m_pcTempAlfParam->chroma_idc!=3)530 {531 // chroma filter re-design532 xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);533 }534 535 UInt64 uiRate, uiDist;536 Double dCost;537 xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost);538 539 if( dCost < dMinCost )540 {541 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);542 predictALFCoeffChroma(m_pcBestAlfParam);543 544 ruiBits += uiRate;545 ruiDist += uiDist;546 }547 else548 {549 m_pcBestAlfParam->chroma_idc = 0;550 551 if((m_pcTempAlfParam->chroma_idc>>1)&0x01)552 pcPicDec->copyToPicCb(pcPicRest);553 if(m_pcTempAlfParam->chroma_idc&0x01)554 pcPicDec->copyToPicCr(pcPicRest);555 556 ruiBits += uiMinRate;557 ruiDist += uiMinDist;558 }559 }560 else561 {562 m_pcBestAlfParam->chroma_idc = 0;563 564 ruiBits += uiMinRate;565 ruiDist += uiMinDist;566 567 pcPicDec->copyToPicCb(pcPicRest);568 pcPicDec->copyToPicCr(pcPicRest);569 }570 }571 572 3283 // ==================================================================================================================== 573 3284 // Private member functions 574 3285 // ==================================================================================================================== 575 3286 #if !LCU_SYNTAX_ALF 576 3287 Void TEncAdaptiveLoopFilter::xInitParam() 577 3288 { … … 616 3327 } 617 3328 } 3329 if (m_ppdAlfCorrCb != NULL) 3330 { 3331 for (i = 0; i < ALF_MAX_NUM_COEF; i++) 3332 { 3333 for (j = 0; j < ALF_MAX_NUM_COEF+1; j++) 3334 { 3335 m_ppdAlfCorrCb[i][j] = 0; 3336 } 3337 } 3338 } 3339 else 3340 { 3341 m_ppdAlfCorrCb = new Double*[ALF_MAX_NUM_COEF]; 3342 for (i = 0; i < ALF_MAX_NUM_COEF; i++) 3343 { 3344 m_ppdAlfCorrCb[i] = new Double[ALF_MAX_NUM_COEF+1]; 3345 for (j = 0; j < ALF_MAX_NUM_COEF+1; j++) 3346 { 3347 m_ppdAlfCorrCb[i][j] = 0; 3348 } 3349 } 3350 } 3351 3352 if (m_ppdAlfCorrCr != NULL) 3353 { 3354 for (i = 0; i < ALF_MAX_NUM_COEF; i++) 3355 { 3356 for (j = 0; j < ALF_MAX_NUM_COEF+1; j++) 3357 { 3358 m_ppdAlfCorrCr[i][j] = 0; 3359 } 3360 } 3361 } 3362 else 3363 { 3364 m_ppdAlfCorrCr = new Double*[ALF_MAX_NUM_COEF]; 3365 for (i = 0; i < ALF_MAX_NUM_COEF; i++) 3366 { 3367 m_ppdAlfCorrCr[i] = new Double[ALF_MAX_NUM_COEF+1]; 3368 for (j = 0; j < ALF_MAX_NUM_COEF+1; j++) 3369 { 3370 m_ppdAlfCorrCr[i][j] = 0; 3371 } 3372 } 3373 } 618 3374 } 619 3375 … … 638 3394 m_pdDoubleAlfCoeff = NULL; 639 3395 } 640 } 641 3396 if (m_ppdAlfCorrCb != NULL) 3397 { 3398 for (i = 0; i < ALF_MAX_NUM_COEF; i++) 3399 { 3400 delete[] m_ppdAlfCorrCb[i]; 3401 m_ppdAlfCorrCb[i] = NULL; 3402 } 3403 delete[] m_ppdAlfCorrCb; 3404 m_ppdAlfCorrCb = NULL; 3405 } 3406 3407 if (m_ppdAlfCorrCr != NULL) 3408 { 3409 for (i = 0; i < ALF_MAX_NUM_COEF; i++) 3410 { 3411 delete[] m_ppdAlfCorrCr[i]; 3412 m_ppdAlfCorrCr[i] = NULL; 3413 } 3414 delete[] m_ppdAlfCorrCr; 3415 m_ppdAlfCorrCr = NULL; 3416 } 3417 } 3418 #endif 642 3419 Void TEncAdaptiveLoopFilter::xCreateTmpAlfCtrlFlags() 643 3420 { … … 676 3453 } 677 3454 678 Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags() 679 { 680 for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ ) 681 { 682 TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr ); 683 xEncodeCUAlfCtrlFlag(pcCU, 0, 0); 684 } 685 } 686 3455 /** Encode ALF CU control flags 3456 */ 3457 Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags(std::vector<AlfCUCtrlInfo> &vAlfCUCtrlParam) 3458 { 3459 for(Int s=0; s< m_uiNumSlicesInPic; s++) 3460 { 3461 if(!m_pcPic->getValidSlice(s)) 3462 { 3463 continue; 3464 } 3465 3466 AlfCUCtrlInfo& rCUCtrlInfo = vAlfCUCtrlParam[s]; 3467 if(rCUCtrlInfo.cu_control_flag == 1) 3468 { 3469 for(Int i=0; i< (Int)rCUCtrlInfo.alf_cu_flag.size(); i++) 3470 { 3471 m_pcEntropyCoder->encodeAlfCtrlFlag(rCUCtrlInfo.alf_cu_flag[i]); 3472 } 3473 } 3474 } 3475 } 687 3476 Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth) 688 3477 { … … 694 3483 695 3484 #if AD_HOCS_SLICES 696 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->get Width() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )3485 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 697 3486 #else 698 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->get Width() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )3487 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 699 3488 #endif 700 3489 { … … 711 3500 712 3501 #if AD_HOCS_SLICES 713 if( ( uiLPelX < pcCU->getSlice()->getSPS()->get Width() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )714 #else 715 if( ( uiLPelX < pcCU->getSlice()->getSPS()->get Width() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )3502 if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 3503 #else 3504 if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 716 3505 #endif 717 3506 xEncodeCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1); … … 722 3511 m_pcEntropyCoder->encodeAlfCtrlFlag(pcCU, uiAbsPartIdx); 723 3512 } 724 #if MTK_NONCROSS_INLOOP_FILTER 725 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix) 726 #else 727 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride) 728 #endif 729 { 730 //Patch should be extended before this point................ 731 //ext_offset = tap>>1; 732 733 #if TI_ALF_MAX_VSIZE_7 734 Int iTapV = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap); 735 Int N = (iTap * iTapV + 1) >> 1; 736 Int offsetV = iTapV >> 1; 737 #else 738 Int N = (iTap*iTap+1)>>1; 739 #endif 740 Int offset = iTap>>1; 741 742 const Int* pFiltPos; 743 744 switch(iTap) 745 { 746 case 5: 747 pFiltPos = m_aiSymmetricArray5x5; 748 break; 749 case 7: 750 pFiltPos = m_aiSymmetricArray7x7; 751 break; 752 case 9: 753 #if TI_ALF_MAX_VSIZE_7 754 pFiltPos = m_aiSymmetricArray9x7; 755 #else 756 pFiltPos = m_aiSymmetricArray9x9; 757 #endif 758 break; 759 default: 760 #if TI_ALF_MAX_VSIZE_7 761 pFiltPos = m_aiSymmetricArray9x7; 762 #else 763 pFiltPos = m_aiSymmetricArray9x9; 764 #endif 3513 3514 #if !LCU_SYNTAX_ALF 3515 3516 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pImgOrg, Pel* pImgPad, Int filtNo, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix) 3517 { 3518 Int yposEnd = ypos + iHeight -1; 3519 Int xposEnd = xpos + iWidth -1; 3520 Int N = m_sqrFiltLengthTab[filtNo]; 3521 3522 Int imgHeightChroma = m_img_height>>1; 3523 Int yLineInLCU; 3524 Int paddingline ; 3525 3526 Int ELocal[ALF_MAX_NUM_COEF]; 3527 Pel *pImgPad1, *pImgPad2, *pImgPad3, *pImgPad4; 3528 Int i, j, k, l; 3529 Int yLocal; 3530 3531 pImgPad += (ypos*iCmpStride); 3532 pImgOrg += (ypos*iOrgStride); 3533 3534 switch(filtNo) 3535 { 3536 #if !ALF_SINGLE_FILTER_SHAPE 3537 case ALF_STAR5x5: 3538 { 3539 for (i= ypos; i<= yposEnd; i++) 3540 { 3541 yLineInLCU = i % m_lcuHeightChroma; 3542 3543 if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma ) 3544 { 3545 pImgPad1 = pImgPad + iCmpStride; 3546 pImgPad2 = pImgPad - iCmpStride; 3547 pImgPad3 = pImgPad + 2*iCmpStride; 3548 pImgPad4 = pImgPad - 2*iCmpStride; 3549 } 3550 else if (yLineInLCU < m_lineIdxPadTopChroma) 3551 { 3552 paddingline = - yLineInLCU + m_lineIdxPadTopChroma - 1; 3553 pImgPad1 = pImgPad + min(paddingline, 1)*iCmpStride; 3554 pImgPad2 = pImgPad - iCmpStride; 3555 pImgPad3 = pImgPad + min(paddingline, 2)*iCmpStride; 3556 pImgPad4 = pImgPad - 2*iCmpStride; 3557 } 3558 else 3559 { 3560 paddingline = yLineInLCU - m_lineIdxPadTopChroma ; 3561 pImgPad1 = pImgPad + iCmpStride; 3562 pImgPad2 = pImgPad - min(paddingline, 1)*iCmpStride; 3563 pImgPad3 = pImgPad + 2*iCmpStride; 3564 pImgPad4 = pImgPad - min(paddingline, 2)*iCmpStride; 3565 } 3566 3567 if ( (yLineInLCU == m_lineIdxPadTopChroma || yLineInLCU == m_lineIdxPadTopChroma-1) && i-yLineInLCU+m_lcuHeightChroma < imgHeightChroma ) 3568 { 3569 pImgPad+= iCmpStride; 3570 pImgOrg+= iOrgStride; 3571 continue; 3572 } 3573 else 3574 { 3575 for (j= xpos; j<= xposEnd; j++) 3576 { 3577 memset(ELocal, 0, N*sizeof(Int)); 3578 3579 ELocal[0] = (pImgPad3[j+2] + pImgPad4[j-2]); 3580 ELocal[1] = (pImgPad3[j ] + pImgPad4[j ]); 3581 ELocal[2] = (pImgPad3[j-2] + pImgPad4[j+2]); 3582 3583 ELocal[3] = (pImgPad1[j+1] + pImgPad2[j-1]); 3584 ELocal[4] = (pImgPad1[j ] + pImgPad2[j ]); 3585 ELocal[5] = (pImgPad1[j-1] + pImgPad2[j+1]); 3586 3587 ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]); 3588 ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]); 3589 ELocal[8] = (pImgPad[j ]); 3590 3591 yLocal= (Int)pImgOrg[j]; 3592 3593 for(k=0; k<N; k++) 3594 { 3595 m_ppdAlfCorr[k][k] += ELocal[k]*ELocal[k]; 3596 for(l=k+1; l<N; l++) 3597 { 3598 m_ppdAlfCorr[k][l] += ELocal[k]*ELocal[l]; 3599 } 3600 3601 m_ppdAlfCorr[k][N] += yLocal*ELocal[k]; 3602 } 3603 } 3604 pImgPad+= iCmpStride; 3605 pImgOrg+= iOrgStride; 3606 } 3607 3608 } 3609 } 3610 break; 3611 case ALF_CROSS9x9: 3612 { 3613 Pel *pImgPad5, *pImgPad6, *pImgPad7, *pImgPad8; 3614 #else 3615 case ALF_CROSS9x7_SQUARE3x3: 3616 { 3617 Pel *pImgPad5, *pImgPad6; 3618 #endif 3619 for (i= ypos; i<= yposEnd; i++) 3620 { 3621 yLineInLCU = i % m_lcuHeightChroma; 3622 3623 if (yLineInLCU<2 && i> 2) 3624 { 3625 paddingline = yLineInLCU + 2 ; 3626 pImgPad1 = pImgPad + iCmpStride; 3627 pImgPad2 = pImgPad - iCmpStride; 3628 pImgPad3 = pImgPad + 2*iCmpStride; 3629 pImgPad4 = pImgPad - 2*iCmpStride; 3630 pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + 3*iCmpStride; 3631 pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - min(paddingline, 3)*iCmpStride;; 3632 #if !ALF_SINGLE_FILTER_SHAPE 3633 pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + 4*iCmpStride; 3634 pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - min(paddingline, 4)*iCmpStride;; 3635 #endif 3636 } 3637 else if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma ) 3638 { 3639 pImgPad1 = pImgPad + iCmpStride; 3640 pImgPad2 = pImgPad - iCmpStride; 3641 pImgPad3 = pImgPad + 2*iCmpStride; 3642 pImgPad4 = pImgPad - 2*iCmpStride; 3643 pImgPad5 = pImgPad + 3*iCmpStride; 3644 pImgPad6 = pImgPad - 3*iCmpStride; 3645 #if !ALF_SINGLE_FILTER_SHAPE 3646 pImgPad7 = pImgPad + 4*iCmpStride; 3647 pImgPad8 = pImgPad - 4*iCmpStride; 3648 #endif 3649 } 3650 else if (yLineInLCU < m_lineIdxPadTopChroma) 3651 { 3652 paddingline = - yLineInLCU + m_lineIdxPadTopChroma - 1; 3653 pImgPad1 = (paddingline < 1) ? pImgPad : pImgPad + min(paddingline, 1)*iCmpStride; 3654 pImgPad2 = (paddingline < 1) ? pImgPad : pImgPad - iCmpStride; 3655 pImgPad3 = (paddingline < 2) ? pImgPad : pImgPad + min(paddingline, 2)*iCmpStride; 3656 pImgPad4 = (paddingline < 2) ? pImgPad : pImgPad - 2*iCmpStride; 3657 pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + min(paddingline, 3)*iCmpStride; 3658 pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - 3*iCmpStride; 3659 #if !ALF_SINGLE_FILTER_SHAPE 3660 pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + min(paddingline, 4)*iCmpStride; 3661 pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - 4*iCmpStride; 3662 #endif 3663 } 3664 else 3665 { 3666 paddingline = yLineInLCU - m_lineIdxPadTopChroma ; 3667 pImgPad1 = (paddingline < 1) ? pImgPad : pImgPad + iCmpStride; 3668 pImgPad2 = (paddingline < 1) ? pImgPad : pImgPad - min(paddingline, 1)*iCmpStride; 3669 pImgPad3 = (paddingline < 2) ? pImgPad : pImgPad + 2*iCmpStride; 3670 pImgPad4 = (paddingline < 2) ? pImgPad : pImgPad - min(paddingline, 2)*iCmpStride; 3671 pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + 3*iCmpStride; 3672 pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - min(paddingline, 3)*iCmpStride; 3673 #if !ALF_SINGLE_FILTER_SHAPE 3674 pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + 4*iCmpStride; 3675 pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - min(paddingline, 4)*iCmpStride; 3676 #endif 3677 } 3678 3679 for (j= xpos; j<= xposEnd; j++) 3680 { 3681 memset(ELocal, 0, N*sizeof(Int)); 3682 #if ALF_SINGLE_FILTER_SHAPE 3683 ELocal[0] = (pImgPad5[j]+pImgPad6[j]); 3684 ELocal[1] = (pImgPad3[j]+pImgPad4[j]); 3685 ELocal[2] = (pImgPad1[j-1]+pImgPad2[j+1]); 3686 ELocal[3] = (pImgPad1[j]+pImgPad2[j]); 3687 ELocal[4] = (pImgPad1[j+1]+pImgPad2[j-1]); 3688 ELocal[5] = (pImgPad[j+4]+pImgPad[j-4]); 3689 ELocal[6] = (pImgPad[j+3]+pImgPad[j-3]); 3690 ELocal[7] = (pImgPad[j+2]+pImgPad[j-2]); 3691 ELocal[8] = (pImgPad[j+1]+pImgPad[j-1]); 3692 ELocal[9] = (pImgPad[j ]); 3693 #else 3694 ELocal[0] = (pImgPad7[j] + pImgPad8[j]); 3695 3696 ELocal[1] = (pImgPad5[j] + pImgPad6[j]); 3697 3698 ELocal[2] = (pImgPad3[j] + pImgPad4[j]); 3699 3700 ELocal[3] = (pImgPad1[j] + pImgPad2[j]); 3701 3702 ELocal[4] = (pImgPad[j+4] + pImgPad[j-4]); 3703 ELocal[5] = (pImgPad[j+3] + pImgPad[j-3]); 3704 ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]); 3705 ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]); 3706 ELocal[8] = (pImgPad[j ] ); 3707 #endif 3708 yLocal= (Int)pImgOrg[j]; 3709 3710 for(k=0; k<N; k++) 3711 { 3712 m_ppdAlfCorr[k][k] += ELocal[k]*ELocal[k]; 3713 for(l=k+1; l<N; l++) 3714 { 3715 m_ppdAlfCorr[k][l] += ELocal[k]*ELocal[l]; 3716 } 3717 3718 m_ppdAlfCorr[k][N] += yLocal*ELocal[k]; 3719 } 3720 } 3721 pImgPad+= iCmpStride; 3722 pImgOrg+= iOrgStride; 3723 } 3724 3725 } 3726 break; 3727 default: 3728 { 3729 printf("Not a supported filter shape\n"); 765 3730 assert(0); 766 break; 767 } 768 769 Pel* pTerm = new Pel[N]; 770 771 Int i, j; 772 #if MTK_NONCROSS_INLOOP_FILTER 773 for (Int y = ypos; y < ypos + iHeight; y++) 774 { 775 for (Int x = xpos; x < xpos + iWidth; x++) 776 { 777 #else 778 for (Int y = 0; y < iHeight; y++) 779 { 780 for (Int x = 0; x < iWidth; x++) 781 { 782 #endif 783 i = 0; 784 ::memset(pTerm, 0, sizeof(Pel)*N); 785 #if TI_ALF_MAX_VSIZE_7 786 for (Int yy = y - offsetV; yy <= y + offsetV; yy++) 787 #else 788 for(Int yy=y-offset; yy<=y+offset; yy++) 789 #endif 790 { 791 for(Int xx=x-offset; xx<=x+offset; xx++) 792 { 793 pTerm[pFiltPos[i]] += pCmp[xx + yy*iCmpStride]; 794 i++; 795 } 796 } 797 798 for(j=0; j<N; j++) 799 { 800 m_ppdAlfCorr[j][j] += pTerm[j]*pTerm[j]; 801 for(i=j+1; i<N; i++) 802 m_ppdAlfCorr[j][i] += pTerm[j]*pTerm[i]; 803 804 // DC offset 805 m_ppdAlfCorr[j][N] += pTerm[j]; 806 m_ppdAlfCorr[j][N+1] += pOrg[x+y*iOrgStride]*pTerm[j]; 807 } 808 // DC offset 809 for(i=0; i<N; i++) 810 m_ppdAlfCorr[N][i] += pTerm[i]; 811 m_ppdAlfCorr[N][N] += 1; 812 m_ppdAlfCorr[N][N+1] += pOrg[x+y*iOrgStride]; 813 } 814 } 815 #if MTK_NONCROSS_INLOOP_FILTER 3731 exit(1); 3732 } 3733 } 3734 816 3735 if(bSymmCopyBlockMatrix) 817 3736 { 818 #endif 819 for(j=0; j<N-1; j++) 820 { 821 for(i=j+1; i<N; i++) 822 m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i]; 823 } 824 #if MTK_NONCROSS_INLOOP_FILTER 825 } 826 #endif 827 828 delete[] pTerm; 829 pTerm = NULL; 830 } 831 3737 for(j=0; j<N-1; j++) 3738 { 3739 for(i=j+1; i<N; i++) 3740 { 3741 m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i]; 3742 } 3743 } 3744 } 3745 } 3746 3747 #endif 832 3748 #if IBDI_DISTORTION 833 UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride ) 3749 #if HHI_INTERVIEW_SKIP 3750 UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Pel* pUsed, Int iWidth, Int iHeight, Int iStride ) 834 3751 { 835 3752 UInt64 uiSSD = 0; … … 844 3761 for( x = 0; x < iWidth; x++ ) 845 3762 { 3763 if ( pUsed ) // interview skipped 3764 { 3765 if( pUsed[x] ) 3766 { 846 3767 iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp; 3768 } 3769 } 3770 else // no interview skip 3771 { 3772 iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp; 3773 } 847 3774 } 848 3775 pOrg += iStride; 849 3776 pCmp += iStride; 3777 if(pUsed) 3778 { 3779 pUsed+= iStride; 3780 } 850 3781 } 851 3782 … … 858 3789 Int x, y; 859 3790 3791 Int iShift = g_uiBitIncrement; 3792 Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0; 3793 Int iTemp; 3794 3795 for( y = 0; y < iHeight; y++ ) 3796 { 3797 for( x = 0; x < iWidth; x++ ) 3798 { 3799 iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp; 3800 } 3801 pOrg += iStride; 3802 pCmp += iStride; 3803 } 3804 3805 return uiSSD;; 3806 } 3807 #endif 3808 #else 3809 #if HHI_INTERVIEW_SKIP 3810 UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Pel* pUsed, Int iWidth, Int iHeight, Int iStride ) 3811 { 3812 UInt64 uiSSD = 0; 3813 Int x, y; 3814 3815 UInt uiShift = g_uiBitIncrement<<1; 3816 Int iTemp =0 ; 3817 3818 for( y = 0; y < iHeight; y++ ) 3819 { 3820 for( x = 0; x < iWidth; x++ ) 3821 { 3822 if ( pUsed ) // interview skipped 3823 { 3824 if( pUsed[x] ) 3825 { 3826 iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift; 3827 } 3828 } 3829 else // no interview skip 3830 { 3831 iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift; 3832 } 3833 } 3834 pOrg += iStride; 3835 pCmp += iStride; 3836 if(pUsed) 3837 { 3838 pUsed+= iStride; 3839 } 3840 } 3841 3842 return uiSSD;; 3843 } 3844 #else 3845 UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride ) 3846 { 3847 UInt64 uiSSD = 0; 3848 Int x, y; 860 3849 UInt uiShift = g_uiBitIncrement<<1; 861 3850 Int iTemp; … … 874 3863 } 875 3864 #endif 3865 #endif 876 3866 877 3867 Int TEncAdaptiveLoopFilter::xGauss(Double **a, Int N) … … 883 3873 { 884 3874 if (a[k][k] <0.000001) 3875 { 885 3876 return 1; 3877 } 886 3878 } 887 3879 … … 902 3894 t = a[i][N]; 903 3895 for(j=i+1; j<N; j++) 3896 { 904 3897 t -= a[i][j] * a[j][N]; 3898 } 905 3899 a[i][N] = t / a[i][i]; 906 3900 } … … 932 3926 } 933 3927 } while( i <= j ); 934 if ( upper < j ) xFilterCoefQuickSort(coef_data, coef_num, upper, j); 935 if ( i < lower ) xFilterCoefQuickSort(coef_data, coef_num, i, lower); 3928 if ( upper < j ) 3929 { 3930 xFilterCoefQuickSort(coef_data, coef_num, upper, j); 3931 } 3932 if ( i < lower ) 3933 { 3934 xFilterCoefQuickSort(coef_data, coef_num, i, lower); 3935 } 936 3936 } 937 3937 … … 946 3946 Int *nc; 947 3947 const Int *pFiltMag; 948 949 switch(tap) 950 { 951 case 5: 952 pFiltMag = m_aiSymmetricMag5x5; 953 break; 954 case 7: 955 pFiltMag = m_aiSymmetricMag7x7; 956 break; 957 case 9: 958 #if TI_ALF_MAX_VSIZE_7 959 pFiltMag = m_aiSymmetricMag9x7; 960 #else 961 pFiltMag = m_aiSymmetricMag9x9; 962 #endif 963 break; 964 default: 965 #if TI_ALF_MAX_VSIZE_7 966 pFiltMag = m_aiSymmetricMag9x7; 967 #else 968 pFiltMag = m_aiSymmetricMag9x9; 969 #endif 970 assert(0); 971 break; 972 } 973 974 #if TI_ALF_MAX_VSIZE_7 975 Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap); 976 N = (tap * tapV + 1) >> 1; 977 #else 978 N = (tap*tap+1)>>1; 979 #endif 980 3948 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 3949 Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP ); 3950 #endif 3951 3952 N = m_sqrFiltLengthTab[tap]; 3953 #if ALF_SINGLE_FILTER_SHAPE 3954 pFiltMag = weightsShape1Sym; 3955 #else 3956 // star shape 3957 if(tap == 0) 3958 { 3959 pFiltMag = weightsShape0Sym; 3960 } 3961 // cross shape 3962 else 3963 { 3964 pFiltMag = weightsShape1Sym; 3965 } 3966 #endif 3967 981 3968 dh = new Double[N]; 982 3969 nc = new Int[N]; 983 3970 3971 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 3972 max_value = (1<<(1+alfPrecisionBit))-1; 3973 min_value = 0-(1<<(1+alfPrecisionBit)); 3974 #else 984 3975 max_value = (1<<(1+ALF_NUM_BIT_SHIFT))-1; 985 3976 min_value = 0-(1<<(1+ALF_NUM_BIT_SHIFT)); 986 3977 #endif 3978 987 3979 dbl_total_gain=0.0; 988 3980 q_total_gain=0; … … 990 3982 { 991 3983 if(h[i]>=0.0) 3984 { 3985 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 3986 qh[i] = (Int)( h[i]*(1<<alfPrecisionBit)+0.5); 3987 #else 992 3988 qh[i] = (Int)( h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5); 3989 #endif 3990 } 993 3991 else 3992 { 3993 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 3994 qh[i] = -(Int)(-h[i]*(1<<alfPrecisionBit)+0.5); 3995 #else 994 3996 qh[i] = -(Int)(-h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5); 995 3997 #endif 3998 } 3999 4000 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 4001 dh[i] = (Double)qh[i]/(Double)(1<<alfPrecisionBit) - h[i]; 4002 #else 996 4003 dh[i] = (Double)qh[i]/(Double)(1<<ALF_NUM_BIT_SHIFT) - h[i]; 4004 #endif 997 4005 dh[i]*=pFiltMag[i]; 998 4006 dbl_total_gain += h[i]*pFiltMag[i]; … … 1002 4010 1003 4011 // modification of quantized filter coefficients 4012 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 4013 total_gain = (Int)(dbl_total_gain*(1<<alfPrecisionBit)+0.5); 4014 #else 1004 4015 total_gain = (Int)(dbl_total_gain*(1<<ALF_NUM_BIT_SHIFT)+0.5); 1005 4016 #endif 1006 4017 if( q_total_gain != total_gain ) 1007 4018 { … … 1020 4031 { 1021 4032 if(dh[N-1]>0) 4033 { 1022 4034 qh[N-1]--; 4035 } 1023 4036 else 1024 4037 { … … 1042 4055 { 1043 4056 if(dh[N-1]<0) 4057 { 1044 4058 qh[N-1]++; 4059 } 1045 4060 else 1046 4061 { … … 1056 4071 for(i=0; i<N; i++) 1057 4072 { 1058 qh[i] = Max(min_value,Min(max_value, qh[i])); 1059 } 1060 1061 // DC offset 1062 // max_value = Min( (1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma)))-1, (1<<14)-1); 1063 // min_value = Max( -(1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma))), -(1<<14) ); 1064 max_value = Min( (1<<(3+g_uiBitDepth + g_uiBitIncrement))-1, (1<<14)-1); 1065 min_value = Max( -(1<<(3+g_uiBitDepth + g_uiBitIncrement)), -(1<<14) ); 1066 1067 qh[N] = (h[N]>=0.0)? (Int)( h[N]*(1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5) : -(Int)(-h[N]*(1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5); 1068 qh[N] = Max(min_value,Min(max_value, qh[N])); 1069 4073 qh[i] = max(min_value,min(max_value, qh[i])); 4074 } 4075 4076 checkFilterCoeffValue(qh, N, true); 4077 1070 4078 delete[] dh; 1071 4079 dh = NULL; … … 1074 4082 nc = NULL; 1075 4083 } 1076 4084 #if !LCU_SYNTAX_ALF 1077 4085 Void TEncAdaptiveLoopFilter::xClearFilterCoefInt(Int* qh, Int N) 1078 4086 { … … 1081 4089 1082 4090 // center pos 1083 qh[N-2] = 1<<ALF_NUM_BIT_SHIFT; 1084 } 1085 1086 Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost) 4091 qh[N-1] = 1<<ALF_NUM_BIT_SHIFT; 4092 } 4093 /** Calculate RD cost 4094 * \param [in] pAlfParam ALF parameters 4095 * \param [out] ruiRate coding bits 4096 * \param [in] uiDist distortion 4097 * \param [out] rdCost rate-distortion cost 4098 * \param [in] pvAlfCUCtrlParam ALF CU control parameters 4099 */ 4100 Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam) 1087 4101 { 1088 4102 if(pAlfParam != NULL) 1089 4103 { 1090 Int* piTmpCoef; 1091 piTmpCoef = new Int[ALF_MAX_NUM_COEF]; 1092 1093 memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff); 1094 1095 predictALFCoeff(pAlfParam); 1096 4104 m_pcEntropyCoder->resetEntropy(); 4105 m_pcEntropyCoder->resetBits(); 4106 m_pcEntropyCoder->encodeAlfParam(pAlfParam); 4107 4108 ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 4109 4110 if(pvAlfCUCtrlParam != NULL) 4111 { 4112 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4113 { 4114 if(!m_pcPic->getValidSlice(s)) 4115 { 4116 continue; 4117 } 4118 m_pcEntropyCoder->resetEntropy(); 4119 m_pcEntropyCoder->resetBits(); 4120 m_pcEntropyCoder->encodeAlfCtrlParam( (*pvAlfCUCtrlParam)[s], m_uiNumCUsInFrame); 4121 ruiRate += m_pcEntropyCoder->getNumberOfWrittenBits(); 4122 } 4123 } 4124 else 4125 { 4126 ruiRate += m_uiNumSlicesInPic; 4127 } 4128 } 4129 else 4130 { 4131 ruiRate = 1; 4132 } 4133 4134 rdCost = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist); 4135 } 4136 4137 /** Calculate RD cost 4138 * \param [in] pcPicOrg original picture buffer 4139 * \param [in] pcPicCmp compared picture buffer 4140 * \param [in] pAlfParam ALF parameters 4141 * \param [out] ruiRate coding bits 4142 * \param [out] ruiDist distortion 4143 * \param [out] rdCost rate-distortion cost 4144 * \param [in] pvAlfCUCtrlParam ALF CU control parameters 4145 */ 4146 #if HHI_INTERVIEW_SKIP 4147 Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, TComPicYuv* pcUsedPelMap, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam) 4148 #else 4149 Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam) 4150 #endif 4151 { 4152 if(pAlfParam != NULL) 4153 { 1097 4154 m_pcEntropyCoder->resetEntropy(); 1098 4155 m_pcEntropyCoder->resetBits(); 1099 4156 m_pcEntropyCoder->encodeAlfParam(pAlfParam); 1100 4157 1101 if(pAlfParam->cu_control_flag)1102 {1103 #if TSB_ALF_HEADER1104 m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);1105 #else1106 xEncodeCUAlfCtrlFlags();1107 #endif1108 }1109 4158 ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 1110 memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff); 1111 delete[] piTmpCoef; 1112 piTmpCoef = NULL; 1113 } 1114 else 1115 { 1116 ruiRate = 1; 1117 } 1118 1119 rdCost = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist); 1120 } 1121 1122 Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost) 1123 { 1124 if(pAlfParam != NULL) 1125 { 1126 Int* piTmpCoef; 1127 piTmpCoef = new Int[ALF_MAX_NUM_COEF]; 1128 1129 memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff); 1130 1131 predictALFCoeff(pAlfParam); 1132 1133 m_pcEntropyCoder->resetEntropy(); 1134 m_pcEntropyCoder->resetBits(); 1135 m_pcEntropyCoder->encodeAlfParam(pAlfParam); 1136 1137 if(pAlfParam->cu_control_flag) 1138 { 1139 #if TSB_ALF_HEADER 1140 m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam); 1141 #else 1142 xEncodeCUAlfCtrlFlags(); 1143 #endif 1144 } 1145 ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 1146 memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff); 1147 delete[] piTmpCoef; 1148 piTmpCoef = NULL; 4159 4160 if(pvAlfCUCtrlParam != NULL) 4161 { 4162 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4163 { 4164 if(! m_pcPic->getValidSlice(s)) 4165 { 4166 continue; 4167 } 4168 m_pcEntropyCoder->resetEntropy(); 4169 m_pcEntropyCoder->resetBits(); 4170 m_pcEntropyCoder->encodeAlfCtrlParam( (*pvAlfCUCtrlParam)[s], m_uiNumCUsInFrame); 4171 ruiRate += m_pcEntropyCoder->getNumberOfWrittenBits(); 4172 } 4173 4174 } 4175 else 4176 { 4177 ruiRate += m_uiNumSlicesInPic; 4178 } 1149 4179 } 1150 4180 else … … 1156 4186 rdCost = (Double)(ruiRate) * m_dLambdaLuma + (Double)(ruiDist); 1157 4187 } 1158 4188 /** Calculate RD cost for chroma ALF 4189 * \param pcPicOrg original picture buffer 4190 * \param pcPicCmp compared picture buffer 4191 * \param pAlfParam ALF parameters 4192 * \returns ruiRate bitrate 4193 * \returns uiDist distortion 4194 * \returns rdCost RD cost 4195 */ 1159 4196 Void TEncAdaptiveLoopFilter::xCalcRDCostChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost) 1160 4197 { 1161 4198 if(pAlfParam->chroma_idc) 1162 4199 { 1163 Int* piTmpCoef; 1164 piTmpCoef = new Int[ALF_MAX_NUM_COEF_C]; 1165 1166 memcpy(piTmpCoef, pAlfParam->coeff_chroma, sizeof(Int)*pAlfParam->num_coeff_chroma); 1167 1168 predictALFCoeffChroma(pAlfParam); 1169 1170 m_pcEntropyCoder->resetEntropy(); 1171 m_pcEntropyCoder->resetBits(); 1172 m_pcEntropyCoder->encodeAlfParam(pAlfParam); 1173 1174 if(pAlfParam->cu_control_flag) 1175 { 1176 #if TSB_ALF_HEADER 1177 m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam); 1178 #else 1179 xEncodeCUAlfCtrlFlags(); 1180 #endif 1181 } 1182 ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 1183 memcpy(pAlfParam->coeff_chroma, piTmpCoef, sizeof(int)*pAlfParam->num_coeff_chroma); 1184 delete[] piTmpCoef; 1185 piTmpCoef = NULL; 4200 ruiRate = xCalcRateChroma(pAlfParam); 1186 4201 } 1187 4202 ruiDist = 0; … … 1191 4206 } 1192 4207 1193 Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest) 1194 { 1195 Int i, tap, N, err_code; 1196 Int* qh; 1197 1198 tap = m_pcTempAlfParam->tap_chroma; 1199 N = m_pcTempAlfParam->num_coeff_chroma; 1200 qh = m_pcTempAlfParam->coeff_chroma; 1201 1202 // initialize correlation 1203 for(i=0; i<N; i++) 1204 memset(m_ppdAlfCorr[i], 0, sizeof(Double)*(N+1)); 1205 1206 if ((m_pcTempAlfParam->chroma_idc>>1)&0x01) 1207 { 1208 Pel* pOrg = pcPicOrg->getCbAddr(); 1209 Pel* pCmp = pcPicDec->getCbAddr(); 1210 #if MTK_NONCROSS_INLOOP_FILTER 4208 Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(ALFParam* pcAlfParam, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest) 4209 { 4210 Int filtNo = pcAlfParam->filter_shape_chroma; 4211 Int *coeff = pcAlfParam->coeff_chroma; 4212 Int iChromaFormatShift = 1; //4:2:0 4213 4214 if ((pcAlfParam->chroma_idc>>1)&0x01) 4215 { 1211 4216 if(!m_bUseNonCrossALF) 1212 xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true); 4217 { 4218 Int iStride = pcPicRest->getCStride(); 4219 Pel* pDec = pcPicDec->getCbAddr(); 4220 Pel* pRest = pcPicRest->getCbAddr(); 4221 4222 filterChroma(pRest, pDec, iStride, 0, (Int)(m_img_height>>1) -1, 0, (Int)(m_img_width>>1)-1, filtNo, coeff); 4223 } 1213 4224 else 1214 xCalcCorrelationFuncforChromaSlices(ALF_Cb, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride()); 1215 #else 1216 xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride()); 1217 #endif 1218 } 1219 if ((m_pcTempAlfParam->chroma_idc)&0x01) 1220 { 1221 Pel* pOrg = pcPicOrg->getCrAddr(); 1222 Pel* pCmp = pcPicDec->getCrAddr(); 1223 #if MTK_NONCROSS_INLOOP_FILTER 4225 { 4226 xFilterChromaSlices(ALF_Cb, pcPicDec, pcPicRest, coeff, filtNo, iChromaFormatShift); 4227 } 4228 } 4229 if ((pcAlfParam->chroma_idc)&0x01) 4230 { 1224 4231 if(!m_bUseNonCrossALF) 1225 xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true); 4232 { 4233 Int iStride = pcPicRest->getCStride(); 4234 Pel* pDec = pcPicDec->getCrAddr(); 4235 Pel* pRest = pcPicRest->getCrAddr(); 4236 4237 filterChroma(pRest, pDec, iStride, 0, (Int)(m_img_height>>1) -1, 0, (Int)(m_img_width>>1)-1, filtNo, coeff); 4238 } 1226 4239 else 1227 xCalcCorrelationFuncforChromaSlices(ALF_Cr, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride()); 1228 #else 1229 xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride()); 1230 #endif 1231 } 1232 1233 err_code = xGauss(m_ppdAlfCorr, N); 1234 1235 if(err_code) 1236 { 1237 xClearFilterCoefInt(qh, N); 1238 } 1239 else 1240 { 1241 for(i=0; i<N; i++) 1242 m_pdDoubleAlfCoeff[i] = m_ppdAlfCorr[i][N]; 1243 1244 xQuantFilterCoef(m_pdDoubleAlfCoeff, qh, tap, g_uiBitDepth + g_uiBitIncrement); 1245 } 1246 1247 1248 if ((m_pcTempAlfParam->chroma_idc>>1)&0x01) 1249 { 1250 #if MTK_NONCROSS_INLOOP_FILTER 1251 if(! m_bUseNonCrossALF) 1252 xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 0); 1253 else 1254 xFrameChromaforSlices(ALF_Cb, pcPicDec, pcPicRest, qh, tap); 1255 #else 1256 xFrameChroma(pcPicDec, pcPicRest, qh, tap, 0); 1257 #endif 1258 } 1259 if ((m_pcTempAlfParam->chroma_idc)&0x01) 1260 { 1261 #if MTK_NONCROSS_INLOOP_FILTER 1262 if(! m_bUseNonCrossALF) 1263 xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 1); 1264 else 1265 xFrameChromaforSlices(ALF_Cr, pcPicDec, pcPicRest, qh, tap); 1266 #else 1267 xFrameChroma(pcPicDec, pcPicRest, qh, tap, 1); 1268 #endif 1269 } 1270 1271 if(m_pcTempAlfParam->chroma_idc<3) 1272 { 1273 if(m_pcTempAlfParam->chroma_idc==1) 4240 { 4241 xFilterChromaSlices(ALF_Cr, pcPicDec, pcPicRest, coeff, filtNo, iChromaFormatShift); 4242 } 4243 } 4244 4245 if(pcAlfParam->chroma_idc<3) 4246 { 4247 if(pcAlfParam->chroma_idc==1) 1274 4248 { 1275 4249 pcPicDec->copyToPicCb(pcPicRest); 1276 4250 } 1277 if( m_pcTempAlfParam->chroma_idc==2)4251 if(pcAlfParam->chroma_idc==2) 1278 4252 { 1279 4253 pcPicDec->copyToPicCr(pcPicRest); 1280 4254 } 1281 4255 } 1282 1283 } 1284 4256 4257 } 4258 #endif 4259 #if LCU_SYNTAX_ALF 4260 /** Restore the not-filtered pixels 4261 * \param [in] imgDec picture buffer before filtering 4262 * \param [out] imgRest picture buffer after filtering 4263 * \param [in] stride stride size for 1-D picture memory 4264 */ 4265 Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(Pel* imgDec, Pel* imgRest, Int stride) 4266 #else 4267 /** Restore the not-filtered pixels 4268 * \param pcPicDec picture buffer before filtering 4269 * \param pcPicRest picture buffer after filtering 4270 */ 1285 4271 Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(TComPicYuv* pcPicDec, TComPicYuv* pcPicRest) 1286 { 4272 #endif 4273 { 4274 4275 if(m_uiNumSlicesInPic > 1) 4276 { 4277 #if LCU_SYNTAX_ALF 4278 Pel* pPicDecLuma = imgDec; 4279 Pel* pPicRestLuma = imgRest; 4280 #else 4281 Pel* pPicDecLuma = pcPicDec->getLumaAddr(); 4282 Pel* pPicRestLuma = pcPicRest->getLumaAddr(); 4283 Int stride = pcPicDec->getStride(); 4284 #endif 4285 UInt SUWidth = m_pcPic->getMinCUWidth(); 4286 UInt SUHeight = m_pcPic->getMinCUHeight(); 4287 4288 UInt startSU, endSU, LCUX, LCUY, currSU, LPelX, TPelY; 4289 UInt posOffset; 4290 Pel *pDec, *pRest; 4291 4292 for(Int s=0; s< m_uiNumSlicesInPic; s++) 4293 { 4294 if(!m_pcPic->getValidSlice(s)) 4295 { 4296 continue; 4297 } 4298 std::vector< AlfLCUInfo* >& vpSliceAlfLCU = m_pvpAlfLCU[s]; 4299 for(Int i=0; i< vpSliceAlfLCU.size(); i++) 4300 { 4301 AlfLCUInfo& rAlfLCU = *(vpSliceAlfLCU[i]); 4302 TComDataCU* pcCU = rAlfLCU.pcCU; 4303 startSU = rAlfLCU.startSU; 4304 endSU = rAlfLCU.endSU; 4305 LCUX = pcCU->getCUPelX(); 4306 LCUY = pcCU->getCUPelY(); 4307 4308 for(currSU= startSU; currSU<= endSU; currSU++) 4309 { 4310 LPelX = LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[currSU] ]; 4311 TPelY = LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[currSU] ]; 4312 if( !( LPelX < m_img_width ) || !( TPelY < m_img_height ) ) 4313 { 4314 continue; 4315 } 4316 if(!pcCU->getAlfCtrlFlag(currSU)) 4317 { 4318 posOffset = TPelY*stride + LPelX; 4319 pDec = pPicDecLuma + posOffset; 4320 pRest= pPicRestLuma+ posOffset; 4321 for(Int y=0; y< SUHeight; y++) 4322 { 4323 ::memcpy(pRest, pDec, sizeof(Pel)*SUWidth); 4324 pDec += stride; 4325 pRest+= stride; 4326 } 4327 } 4328 } 4329 } 4330 } 4331 return; 4332 } 4333 1287 4334 for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ ) 1288 4335 { 1289 4336 TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr ); 4337 #if LCU_SYNTAX_ALF 4338 xCopyDecToRestCU(pcCU, 0, 0, imgDec, imgRest, stride); 4339 #else 1290 4340 xCopyDecToRestCU(pcCU, 0, 0, pcPicDec, pcPicRest); 1291 } 1292 } 1293 4341 #endif 4342 } 4343 } 4344 4345 #if LCU_SYNTAX_ALF 4346 Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, Pel* imgDec, Pel* imgRest, Int stride) 4347 #else 1294 4348 Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest) 4349 #endif 1295 4350 { 1296 4351 Bool bBoundary = false; … … 1300 4355 UInt uiBPelY = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1; 1301 4356 1302 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->get Width() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )4357 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 1303 4358 { 1304 4359 bBoundary = true; … … 1313 4368 uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ]; 1314 4369 1315 if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) ) 4370 if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 4371 #if LCU_SYNTAX_ALF 4372 xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, imgDec, imgRest, stride); 4373 #else 1316 4374 xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, pcPicDec, pcPicRest); 4375 #endif 1317 4376 } 1318 4377 return; … … 1321 4380 if (!pcCU->getAlfCtrlFlag(uiAbsPartIdx)) 1322 4381 { 4382 #if !LCU_SYNTAX_ALF 1323 4383 UInt uiCUAddr = pcCU->getAddr(); 1324 4384 #endif 1325 4385 Int iWidth = pcCU->getWidth(uiAbsPartIdx); 1326 4386 Int iHeight = pcCU->getHeight(uiAbsPartIdx); 1327 4387 #if LCU_SYNTAX_ALF 4388 copyPixelsInOneRegion(imgRest, imgDec, stride, (Int)uiTPelY, iHeight, (Int)uiLPelX, iWidth); 4389 #else 1328 4390 Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx); 1329 4391 Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx); … … 1341 4403 pFilt += iFiltStride; 1342 4404 } 1343 } 1344 } 1345 1346 Void TEncAdaptiveLoopFilter::xcollectStatCodeFilterCoeffForce0(int **pDiffQFilterCoeffIntPP, int fl, int sqrFiltLength, 1347 int filters_per_group, int bitsVarBin[]) 1348 { 1349 int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal, 1350 *pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB], 1351 minKStart, minBitsKStart, bitsKStart; 1352 1353 pDepthInt=pDepthIntTab[fl-2]; 1354 1355 maxScanVal=0; 1356 for (i=0; i<sqrFiltLength; i++) 1357 { 1358 maxScanVal=max(maxScanVal, pDepthInt[i]); 1359 } 1360 1361 // vlc for all 1362 memset(bitsCoeffScan, 0, MAX_SCAN_VAL * MAX_EXP_GOLOMB * sizeof(int)); 1363 for(ind=0; ind<filters_per_group; ++ind) 1364 { 1365 for(i = 0; i < sqrFiltLength; i++) 1366 { 1367 scanPos=pDepthInt[i]-1; 1368 coeffVal=abs(pDiffQFilterCoeffIntPP[ind][i]); 1369 for (k=1; k<15; k++) 1370 { 1371 bitsCoeffScan[scanPos][k] += lengthGolomb(coeffVal, k); 1372 } 1373 } 1374 } 1375 1376 minBitsKStart=0; 1377 minKStart = -1; 1378 for (k=1; k<8; k++) 1379 { 1380 bitsKStart=0; kStart=k; 1381 for (scanPos=0; scanPos<maxScanVal; scanPos++) 1382 { 1383 kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin]; 1384 1385 if (bitsCoeffScan[scanPos][kStart+1]<minBits) 1386 { 1387 kMin=kStart+1; minBits=bitsCoeffScan[scanPos][kMin]; 1388 } 1389 kStart=kMin; 1390 bitsKStart+=minBits; 1391 } 1392 if (bitsKStart<minBitsKStart || k==1) 1393 { 1394 minBitsKStart=bitsKStart; 1395 minKStart=k; 1396 } 1397 } 1398 1399 kStart = minKStart; 1400 for (scanPos=0; scanPos<maxScanVal; scanPos++) 1401 { 1402 kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin]; 1403 1404 if (bitsCoeffScan[scanPos][kStart+1]<minBits) 1405 { 1406 kMin = kStart+1; 1407 minBits = bitsCoeffScan[scanPos][kMin]; 1408 } 1409 1410 kMinTab[scanPos] = kMin; 1411 kStart = kMin; 1412 } 1413 1414 for(ind=0; ind<filters_per_group; ++ind) 1415 { 1416 bitsVarBin[ind]=0; 1417 for(i = 0; i < sqrFiltLength; i++) 1418 { 1419 scanPos=pDepthInt[i]-1; 1420 bitsVarBin[ind] += lengthGolomb(abs(pDiffQFilterCoeffIntPP[ind][i]), kMinTab[scanPos]); 1421 } 1422 } 1423 } 1424 1425 Void TEncAdaptiveLoopFilter::xdecideCoeffForce0(int codedVarBins[NO_VAR_BINS], double errorForce0Coeff[], double errorForce0CoeffTab[NO_VAR_BINS][2], int bitsVarBin[NO_VAR_BINS], double lambda, int filters_per_fr) 1426 { 1427 int filtNo; 1428 double lagrangianDiff; 1429 int ind; 1430 1431 errorForce0Coeff[0]=errorForce0Coeff[1]=0; 1432 for (ind=0; ind<16; ind++) codedVarBins[ind]=0; 1433 1434 for(filtNo=0; filtNo<filters_per_fr; filtNo++) 1435 { 1436 // No coeffcient prediction bits used 1437 #if ENABLE_FORCECOEFF0 1438 lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]); 1439 codedVarBins[filtNo]=(lagrangianDiff>0)? 1 : 0; 1440 errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]]; 1441 errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1]; 1442 #else 1443 lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]); 1444 codedVarBins[filtNo]= 1; 1445 errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]]; 1446 errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1]; 1447 #endif 1448 } 1449 } 1450 1451 double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int codedVarBins[NO_VAR_BINS], int *forceCoeff0, 1452 int **filterCoeffSymQuant, int fl, int sqrFiltLength, 1453 int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2], 1454 double *errorQuant, double lambda) 1455 1456 { 1457 int bitsVarBin[NO_VAR_BINS], createBistream, coeffBits, coeffBitsForce0; 1458 double errorForce0Coeff[2], lagrangianForce0, lagrangian; 1459 1460 xcollectStatCodeFilterCoeffForce0(filterCoeffSymQuant, fl, sqrFiltLength, 1461 filters_per_fr, bitsVarBin); 1462 1463 xdecideCoeffForce0(codedVarBins, errorForce0Coeff, errorForce0CoeffTab, bitsVarBin, lambda, filters_per_fr); 1464 1465 coeffBitsForce0 = xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength, 1466 filters_per_fr, codedVarBins, createBistream=0, m_tempALFp); 1467 1468 coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength, filters_per_fr, 1469 createBistream=0, m_tempALFp); 1470 1471 lagrangianForce0=errorForce0Coeff[0]+lambda*coeffBitsForce0; 1472 lagrangian=errorForce0Coeff[1]+lambda*coeffBits; 1473 if (lagrangianForce0<lagrangian) 1474 { 1475 *errorQuant=errorForce0Coeff[0]; 1476 *forceCoeff0=1; 1477 return(lagrangianForce0); 1478 } 1479 else 1480 { 1481 *errorQuant=errorForce0Coeff[1]; 1482 *forceCoeff0=0; 1483 return(lagrangian); 4405 #endif 4406 } 4407 } 4408 4409 double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int **filterCoeffSymQuant, int filter_shape, int sqrFiltLength, int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2], 4410 double lambda) 4411 { 4412 Int coeffBits, i; 4413 Double error=0, lagrangian; 4414 coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength, filters_per_fr, 4415 0, m_tempALFp); 4416 for(i=0;i<filters_per_fr;i++) 4417 { 4418 error += errorForce0CoeffTab[i][1]; 4419 } 4420 lagrangian = error + lambda * coeffBits; 4421 return (lagrangian); 4422 } 4423 4424 /** Predict ALF luma filter coefficients. Centre coefficient is always predicted. Determines if left neighbour should be predicted. 4425 */ 4426 Void TEncAdaptiveLoopFilter::predictALFCoeffLumaEnc(ALFParam* pcAlfParam, Int **pfilterCoeffSym, Int filter_shape) 4427 { 4428 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 4429 Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP ); 4430 #endif 4431 Int sum, coeffPred, ind; 4432 const Int* pFiltMag = NULL; 4433 pFiltMag = weightsTabShapes[filter_shape]; 4434 for(ind = 0; ind < pcAlfParam->filters_per_group; ++ind) 4435 { 4436 sum = 0; 4437 for(Int i = 0; i < pcAlfParam->num_coeff-2; i++) 4438 { 4439 sum += pFiltMag[i]*pfilterCoeffSym[ind][i]; 4440 } 4441 4442 if((pcAlfParam->predMethod==0)|(ind==0)) 4443 { 4444 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 4445 coeffPred = ((1<<alfPrecisionBit)-sum) >> 2; 4446 #else 4447 coeffPred = ((1<<ALF_NUM_BIT_SHIFT)-sum) >> 2; 4448 #endif 4449 } 4450 else 4451 { 4452 coeffPred = (0-sum) >> 2; 4453 } 4454 if(abs(pfilterCoeffSym[ind][pcAlfParam->num_coeff-2]-coeffPred) < abs(pfilterCoeffSym[ind][pcAlfParam->num_coeff-2])) 4455 { 4456 pcAlfParam->nbSPred[ind] = 0; 4457 } 4458 else 4459 { 4460 pcAlfParam->nbSPred[ind] = 1; 4461 coeffPred = 0; 4462 } 4463 sum += pFiltMag[pcAlfParam->num_coeff-2]*pfilterCoeffSym[ind][pcAlfParam->num_coeff-2]; 4464 pfilterCoeffSym[ind][pcAlfParam->num_coeff-2] -= coeffPred; 4465 if((pcAlfParam->predMethod==0)|(ind==0)) 4466 { 4467 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 4468 coeffPred = (1<<alfPrecisionBit)-sum; 4469 #else 4470 coeffPred = (1<<ALF_NUM_BIT_SHIFT)-sum; 4471 #endif 4472 } 4473 else 4474 { 4475 coeffPred = -sum; 4476 } 4477 pfilterCoeffSym[ind][pcAlfParam->num_coeff-1] -= coeffPred; 1484 4478 } 1485 4479 } … … 1493 4487 Int64 Newbit_ct; 1494 4488 1495 bit_ct0 = xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0); 1496 4489 for(ind = 0; ind < filters_per_group; ind++) 4490 { 4491 for(i = 0; i < sqrFiltLength; i++) 4492 { 4493 m_FilterCoeffQuantTemp[ind][i]=FilterCoeffQuant[ind][i]; 4494 } 4495 } 4496 ALFp->filters_per_group = filters_per_group; 4497 ALFp->predMethod = 0; 4498 ALFp->num_coeff = sqrFiltLength; 4499 predictALFCoeffLumaEnc(ALFp, m_FilterCoeffQuantTemp, fl); 4500 Int nbFlagIntra[16]; 4501 for(ind = 0; ind < filters_per_group; ind++) 4502 { 4503 nbFlagIntra[ind] = ALFp->nbSPred[ind]; 4504 } 4505 bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group, 0); 1497 4506 for(ind = 0; ind < filters_per_group; ++ind) 1498 4507 { … … 1508 4517 } 1509 4518 } 4519 ALFp->predMethod = 1; 4520 predictALFCoeffLumaEnc(ALFp, m_diffFilterCoeffQuant, fl); 1510 4521 1511 4522 if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0) >= bit_ct0) … … 1513 4524 predMethod = 0; 1514 4525 if(filters_per_group > 1) 4526 { 1515 4527 bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream); 1516 bit_ct += xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream); 4528 } 4529 bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group, createBistream); 1517 4530 } 1518 4531 else … … 1520 4533 predMethod = 1; 1521 4534 if(filters_per_group > 1) 4535 { 1522 4536 bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream); 4537 } 1523 4538 bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream); 1524 4539 } 1525 1526 ALFp->forceCoeff0 = 0;1527 ALFp->filters_per_group_diff = filters_per_group;1528 4540 ALFp->filters_per_group = filters_per_group; 1529 4541 ALFp->predMethod = predMethod; 1530 4542 ALFp->num_coeff = sqrFiltLength; 1531 if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM) 1532 ALFp->realfiltNo=2; 1533 else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM) 1534 ALFp->realfiltNo=1; 1535 else 1536 ALFp->realfiltNo=0; 1537 4543 ALFp->filter_shape = fl; 1538 4544 for(ind = 0; ind < filters_per_group; ++ind) 1539 4545 { … … 1541 4547 { 1542 4548 if (predMethod) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i]; 1543 else ALFp->coeffmulti[ind][i] = FilterCoeffQuant[ind][i]; 1544 } 1545 } 1546 m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct); 1547 4549 else 4550 { 4551 ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i]; 4552 } 4553 } 4554 if(predMethod==0) 4555 { 4556 ALFp->nbSPred[ind] = nbFlagIntra[ind]; 4557 } 4558 } 4559 m_pcEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct); 1548 4560 1549 4561 // return(bit_ct); … … 1551 4563 } 1552 4564 1553 1554 Int TEncAdaptiveLoopFilter::xsendAllFiltersPPPredForce0(int **FilterCoeffQuant, int fl, int sqrFiltLength, int filters_per_group, 1555 int codedVarBins[NO_VAR_BINS], int createBistream, ALFParam* ALFp) 1556 { 1557 int ind, bit_ct=0, bit_ct0, i, j; 1558 int filters_per_group_temp, filters_per_group_diff; 1559 int chosenPred = 0; 1560 int force0 = 1; 1561 Int64 Newbit_ct; 1562 1563 i = 0; 1564 for(ind = 0; ind < filters_per_group; ind++) 1565 { 1566 if(codedVarBins[ind] == 1) 1567 { 1568 for(j = 0; j < sqrFiltLength; j++) 1569 m_FilterCoeffQuantTemp[i][j]=FilterCoeffQuant[ind][j]; 1570 i++; 1571 } 1572 } 1573 filters_per_group_diff = filters_per_group_temp = i; 1574 1575 for(ind = 0; ind < filters_per_group; ++ind) 1576 { 1577 if(ind == 0) 1578 { 1579 for(i = 0; i < sqrFiltLength; i++) 1580 m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i]; 1581 } 1582 else 1583 { 1584 for(i = 0; i < sqrFiltLength; i++) 1585 m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i] - m_FilterCoeffQuantTemp[ind-1][i]; 1586 } 1587 } 1588 1589 if(!((filters_per_group_temp == 0) && (filters_per_group == 1))) 1590 { 1591 bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, 0); 1592 1593 if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_diff, 0) >= bit_ct0) 1594 { 1595 chosenPred = 0; 1596 bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream); 1597 bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, createBistream); 1598 } 1599 else 1600 { 1601 chosenPred = 1; 1602 bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream); 1603 bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_temp, createBistream); 1604 } 1605 } 1606 ALFp->forceCoeff0 = 1; 1607 ALFp->predMethod = chosenPred; 1608 ALFp->filters_per_group_diff = filters_per_group_diff; 1609 ALFp->filters_per_group = filters_per_group; 1610 ALFp->num_coeff = sqrFiltLength; 1611 if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM) 1612 ALFp->realfiltNo=2; 1613 else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM) 1614 ALFp->realfiltNo=1; 1615 else 1616 ALFp->realfiltNo=0; 1617 1618 for(ind = 0; ind < filters_per_group; ++ind) 1619 { 1620 ALFp->codedVarBins[ind] = codedVarBins[ind]; 1621 } 1622 for(ind = 0; ind < filters_per_group_diff; ++ind) 1623 { 1624 for(i = 0; i < sqrFiltLength; i++) 1625 { 1626 if (chosenPred) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i]; 1627 else ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i]; 1628 } 1629 } 1630 m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct); 1631 1632 return ((Int)Newbit_ct); 1633 } 1634 1635 //filtNo==-1/realfiltNo, noFilters=filters_per_frames, realfiltNo=filtNo 1636 Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filtNo, int noFilters, int varIndTab[NO_VAR_BINS], int frNo, int createBitstream,int realfiltNo, ALFParam* ALFp) 1637 { 1638 int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0, codePrediction; 4565 Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filters_per_fr, int varIndTab[NO_VAR_BINS], int filter_shape, ALFParam* ALFp) 4566 { 4567 int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0; 1639 4568 Int64 NewbitCt; 1640 1641 codePrediction = 0; 1642 4569 1643 4570 //send realfiltNo (tap related) 1644 ALFp->realfiltNo = realfiltNo; 1645 ALFp->filtNo = filtNo; 1646 1647 if(filtNo >= 0) 1648 { 1649 // decide startSecondFilter and filterPattern 1650 if(noFilters > 1) 1651 { 1652 memset(filterPattern, 0, NO_VAR_BINS * sizeof(int)); 1653 for(i = 1; i < NO_VAR_BINS; ++i) 1654 { 1655 if(varIndTab[i] != varIndTab[i-1]) 1656 { 1657 filterPattern[i] = 1; 1658 startSecondFilter = i; 1659 } 1660 } 1661 memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int)); 1662 ALFp->startSecondFilter = startSecondFilter; 1663 } 1664 1665 //send noFilters (filters_per_frame) 1666 //0: filters_per_frame = 1 1667 //1: filters_per_frame = 2 1668 //2: filters_per_frame > 2 (exact number from filterPattern) 1669 1670 ALFp->noFilters = min(noFilters-1,2); 1671 if (noFilters<=0) printf("error\n"); 1672 } 1673 m_pcDummyEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt); 4571 ALFp->filter_shape = filter_shape; 4572 4573 // decide startSecondFilter and filterPattern 4574 memset(filterPattern, 0, NO_VAR_BINS * sizeof(int)); 4575 if(filters_per_fr > 1) 4576 { 4577 for(i = 1; i < NO_VAR_BINS; ++i) 4578 { 4579 if(varIndTab[i] != varIndTab[i-1]) 4580 { 4581 filterPattern[i] = 1; 4582 startSecondFilter = i; 4583 } 4584 } 4585 } 4586 memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int)); 4587 ALFp->startSecondFilter = startSecondFilter; 4588 4589 assert(filters_per_fr>0); 4590 m_pcEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt); 4591 1674 4592 bitCt = (int) NewbitCt; 1675 4593 return(bitCt); … … 1680 4598 { 1681 4599 int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal, len = 0, 1682 *pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],4600 *pDepthInt=NULL, kMinTab[MAX_SCAN_VAL], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB], 1683 4601 minKStart, minBitsKStart, bitsKStart; 1684 1685 pDepthInt = pDepthIntTab[fl-2]; 4602 #if ALF_SINGLE_FILTER_SHAPE 4603 Int minScanVal = MIN_SCAN_POS_CROSS; 4604 #else 4605 int minScanVal = (fl==ALF_STAR5x5) ? 0 : MIN_SCAN_POS_CROSS; 4606 #endif 4607 pDepthInt = pDepthIntTabShapes[fl]; 1686 4608 1687 4609 maxScanVal = 0; … … 1712 4634 bitsKStart = 0; 1713 4635 kStart = k; 1714 for(scanPos = 0; scanPos < maxScanVal; scanPos++)4636 for(scanPos = minScanVal; scanPos < maxScanVal; scanPos++) 1715 4637 { 1716 4638 kMin = kStart; … … 1733 4655 1734 4656 kStart = minKStart; 1735 for(scanPos = 0; scanPos < maxScanVal; scanPos++)4657 for(scanPos = minScanVal; scanPos < maxScanVal; scanPos++) 1736 4658 { 1737 4659 kMin = kStart; … … 1750 4672 // Coding parameters 1751 4673 // len += lengthFilterCodingParams(minKStart, maxScanVal, kMinTab, createBitstream); 4674 #if LCU_SYNTAX_ALF 4675 if (filters_per_group == 1) 4676 { 4677 len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP, 4678 kTableTabShapes[ALF_CROSS9x7_SQUARE3x3], createBitstream); 4679 } 4680 else 4681 { 4682 #endif 1752 4683 len += (3 + maxScanVal); 1753 4684 … … 1755 4686 len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP, 1756 4687 kMinTab, createBitstream); 1757 4688 #if LCU_SYNTAX_ALF 4689 } 4690 #endif 4691 1758 4692 return len; 1759 4693 } … … 1764 4698 int q = coeffVal / m; 1765 4699 if(coeffVal != 0) 4700 { 1766 4701 return(q + 2 + k); 4702 } 1767 4703 else 4704 { 1768 4705 return(q + 1 + k); 4706 } 1769 4707 } 1770 4708 … … 1775 4713 1776 4714 if(force0) 4715 { 1777 4716 bit_cnt = 2 + filters_per_group; 4717 } 1778 4718 else 4719 { 1779 4720 bit_cnt = 2; 4721 } 1780 4722 return bit_cnt; 1781 4723 … … 1793 4735 { 1794 4736 scanPos = pDepthInt[i] - 1; 4737 #if LCU_SYNTAX_ALF 4738 Int k = (filters_per_group == 1) ? kMinTab[i] : kMinTab[scanPos]; 4739 bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), k); 4740 #else 1795 4741 bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), kMinTab[scanPos]); 4742 #endif 1796 4743 } 1797 4744 } … … 1799 4746 } 1800 4747 1801 Void TEncAdaptiveLoopFilter::xEncALFLuma_qc ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost ) 4748 #if !LCU_SYNTAX_ALF 4749 4750 Void TEncAdaptiveLoopFilter::xEncALFLuma ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost ) 1802 4751 { 1803 4752 //pcPicDec: extended decoded 1804 4753 //pcPicRest: original decoded: filtered signal will be stored 1805 4754 1806 4755 UInt64 uiRate; 1807 4756 UInt64 uiDist; 1808 4757 Double dCost; 1809 #if !MQT_ALF_NPASS1810 Int Height = pcPicOrg->getHeight();1811 Int Width = pcPicOrg->getWidth();1812 #endif1813 4758 Int LumaStride = pcPicOrg->getStride(); 1814 imgpel* pOrg = (imgpel*) pcPicOrg->getLumaAddr(); 1815 imgpel* pRest = (imgpel*) pcPicRest->getLumaAddr(); 1816 imgpel* pDec = (imgpel*) pcPicDec->getLumaAddr(); 1817 1818 Int tap = ALF_MIN_NUM_TAP; 1819 m_pcTempAlfParam->tap = tap; 1820 #if TI_ALF_MAX_VSIZE_7 1821 m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap); 1822 m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap); 1823 #else 1824 m_pcTempAlfParam->num_coeff = (Int)tap*tap/4 + 2; 1825 #endif 1826 1827 #if MQT_BA_RA 1828 1829 #if MQT_ALF_NPASS 1830 1831 static Bool bFirst = true; 1832 static Int* apiVarIndTabBest[NUM_ALF_CLASS_METHOD]; 1833 static Int** appiBestCoeffSet[NUM_ALF_CLASS_METHOD]; 1834 1835 static Double*** adBestySym; 1836 static Double**** adBestESym; 1837 static Double** adBestpixAcc; 1838 1839 if(bFirst) 1840 { 1841 if(m_iALFEncodePassReduction) 1842 { 1843 initMatrix4D_double(&adBestESym,NUM_ALF_CLASS_METHOD, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH); 1844 initMatrix3D_double(&adBestySym,NUM_ALF_CLASS_METHOD, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); 1845 initMatrix_double (&adBestpixAcc,NUM_ALF_CLASS_METHOD, NO_VAR_BINS ); 1846 1847 for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++) 1848 { 1849 1850 apiVarIndTabBest[i] = new Int[NO_VAR_BINS]; 1851 appiBestCoeffSet[i] = new Int*[NO_VAR_BINS]; 1852 for(Int j=0; j< NO_VAR_BINS; j++) 1853 { 1854 appiBestCoeffSet[i][j]= new Int[MAX_SQR_FILT_LENGTH]; 1855 } 1856 } 1857 } 1858 1859 bFirst = false; 1860 } 1861 1862 Int ibestfiltNo[NUM_ALF_CLASS_METHOD]; 1863 Int ibestfilters_per_fr[NUM_ALF_CLASS_METHOD]; 1864 Int64 iDist; 1865 Int64 iMinMethodDist = MAX_INT; 1866 UInt64 uiMinMethodRate; 1867 Double dMinMethodCost = MAX_DOUBLE; 1868 #endif 4759 Pel* pOrg = pcPicOrg->getLumaAddr(); 4760 Pel* pRest = pcPicRest->getLumaAddr(); 4761 Pel* pDec = pcPicDec->getLumaAddr(); 4762 4763 Double dMinMethodCost = MAX_DOUBLE; 4764 UInt64 uiMinMethodDist = MAX_UINT; 4765 UInt64 uiMinMethodRate = MAX_UINT; 1869 4766 Int iBestClassMethod = ALF_RA; 1870 4767 Double adExtraCostReduction[NUM_ALF_CLASS_METHOD]; … … 1879 4776 pcAlfParam->alf_flag = 1; 1880 4777 pcAlfParam->chroma_idc = 0; 1881 pcAlfParam->cu_control_flag = 0;1882 pcAlfParam->tap = tap;1883 #if TI_ALF_MAX_VSIZE_71884 pcAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);1885 pcAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);1886 #else1887 pcAlfParam->num_coeff = (Int)tap*tap/4 + 2;1888 #endif1889 4778 1890 4779 switch(i) … … 1892 4781 case ALF_RA: 1893 4782 { 1894 adExtraCostReduction[i] = (double)(m_im_height * m_im_width) * m_dLambdaLuma * 2.0 / 4096.0; 4783 adExtraCostReduction[i] = (double)(m_img_height * m_img_width) * m_dLambdaLuma * 2.0 / 4096.0; 4784 } 4785 break; 4786 case ALF_BA: 4787 { 4788 adExtraCostReduction[i] = 0.0; 1895 4789 } 1896 4790 break; 1897 4791 default: 1898 4792 { 1899 adExtraCostReduction[i] = 0.0; 1900 } 1901 break; 1902 } 1903 1904 } 4793 printf("Not a support adaptation method\n"); 4794 assert(0); 4795 exit(-1); 4796 } 4797 } 4798 } 4799 1905 4800 1906 4801 for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++) 1907 4802 { 1908 pcAlfParam = &(cFrmAlfParam[i]); 1909 m_varImg = m_varImgMethods[i]; 1910 m_uiVarGenMethod = pcAlfParam->alf_pcr_region_flag = i; 1911 #if MQT_ALF_NPASS 1912 if(m_iALFEncodePassReduction) 1913 { 1914 m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[m_uiVarGenMethod]; 1915 } 4803 m_uiVarGenMethod = i; 4804 4805 pcAlfParam = &(cFrmAlfParam[m_uiVarGenMethod]); 4806 m_varImg = m_varImgMethods[m_uiVarGenMethod]; 4807 4808 pcAlfParam->alf_pcr_region_flag = m_uiVarGenMethod; 4809 1916 4810 setInitialMask(pcPicOrg, pcPicDec); 1917 #else 1918 for (Int i=0; i<Height; i++) 1919 { 1920 for (Int j=0; j<Width; j++) 1921 { 1922 m_maskImg[i][j] = 1; 1923 } 1924 } 1925 #if MTK_NONCROSS_INLOOP_FILTER 1926 if(!m_bUseNonCrossALF) 1927 calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride); 4811 4812 if(m_iALFEncodePassReduction == 0) 4813 { 4814 static Int best_filter_shape = 0; 4815 if (m_uiVarGenMethod == 0) 4816 { 4817 UInt64 MinRate_Shape0 = MAX_INT; 4818 UInt64 MinDist_Shape0 = MAX_INT; 4819 Double MinCost_Shape0 = MAX_DOUBLE; 4820 4821 UInt64 MinRate_Shape1 = MAX_INT; 4822 UInt64 MinDist_Shape1 = MAX_INT; 4823 Double MinCost_Shape1 = MAX_DOUBLE; 4824 4825 #if ALF_SINGLE_FILTER_SHAPE 4826 Int filter_shape = 0; 4827 #else 4828 for (Int filter_shape = 0; filter_shape < 2 ;filter_shape ++) 4829 #endif 4830 { 4831 pcAlfParam->filter_shape = filter_shape; 4832 pcAlfParam->num_coeff = m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filter_shape]; 4833 xFirstFilteringFrameLuma(pOrg, pDec, m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->filter_shape, LumaStride); 4834 xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost); 4835 if (filter_shape == 0) 4836 { 4837 // copy Shape0 4838 MinRate_Shape0 = uiRate; 4839 MinDist_Shape0 = uiDist; 4840 MinCost_Shape0 = dCost; 4841 m_pcPicYuvTmp->copyToPicLuma(pcPicYuvRecShape0); 4842 copyALFParam(pcAlfParamShape0, pcAlfParam); 4843 } 4844 else //if (filter_shape == 1) 4845 { 4846 // copy Shape1 4847 MinRate_Shape1 = uiRate; 4848 MinDist_Shape1 = uiDist; 4849 MinCost_Shape1 = dCost; 4850 m_pcPicYuvTmp->copyToPicLuma(pcPicYuvRecShape1); 4851 copyALFParam(pcAlfParamShape1, pcAlfParam); 4852 } 4853 } 4854 4855 if (MinCost_Shape0 <= MinCost_Shape1) 4856 { 4857 pcPicYuvRecShape0->copyToPicLuma(m_pcPicYuvTmp); 4858 copyALFParam(pcAlfParam, pcAlfParamShape0); 4859 uiRate = MinRate_Shape0; 4860 uiDist = MinDist_Shape0; 4861 dCost = MinCost_Shape0; 4862 best_filter_shape = 0; 4863 } 4864 else //if (MinCost_Shape1 < MinCost_Shape0) 4865 { 4866 pcPicYuvRecShape1->copyToPicLuma(m_pcPicYuvTmp); 4867 copyALFParam(pcAlfParam, pcAlfParamShape1); 4868 uiRate = MinRate_Shape1; 4869 uiDist = MinDist_Shape1; 4870 dCost = MinCost_Shape1; 4871 best_filter_shape = 1; 4872 } 4873 } 4874 else 4875 { 4876 pcAlfParam->filter_shape = best_filter_shape; 4877 pcAlfParam->num_coeff = m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[best_filter_shape]; 4878 xFirstFilteringFrameLuma(pOrg, pDec, m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, best_filter_shape, LumaStride); 4879 xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost); 4880 } 4881 } 1928 4882 else 1929 calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride); 1930 #else 1931 calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride); 1932 #endif 1933 #endif 1934 1935 #if MQT_ALF_NPASS 1936 if(m_iALFEncodePassReduction) 1937 { 1938 xFirstEstimateFilteringFrameLumaAllTap(pOrg, pDec, LumaStride, 1939 pcAlfParam, apiVarIndTabBest[i], appiBestCoeffSet[i], 1940 ibestfiltNo[i], ibestfilters_per_fr[i], 1941 adBestySym[i], adBestESym[i], adBestpixAcc[i], 1942 uiRate, iDist, dCost); 1943 1944 } 1945 else 1946 { 1947 #endif 1948 xFirstFilteringFrameLuma(pOrg, pDec, (imgpel*)m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->tap, LumaStride); 1949 #if MQT_ALF_NPASS 1950 } 1951 #endif 1952 1953 #if MQT_ALF_NPASS 1954 if(!m_iALFEncodePassReduction) 1955 { 1956 #endif 1957 xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost); 1958 #if MQT_ALF_NPASS 1959 iDist = (Int64)uiDist; 1960 } 1961 #endif 1962 1963 dCost -= adExtraCostReduction[i]; 4883 { 4884 decideFilterShapeLuma(pOrg, pDec, LumaStride, pcAlfParam, uiRate, uiDist, dCost); 4885 } 4886 4887 dCost -= adExtraCostReduction[m_uiVarGenMethod]; 1964 4888 1965 4889 if(dCost < dMinMethodCost) 1966 4890 { 1967 iBestClassMethod = i;4891 iBestClassMethod = m_uiVarGenMethod; 1968 4892 dMinMethodCost = dCost; 1969 4893 uiMinMethodRate= uiRate; 1970 iMinMethodDist = iDist; 1971 #if MQT_ALF_NPASS 1972 if(!m_iALFEncodePassReduction) 1973 { 1974 #endif 4894 uiMinMethodDist = uiDist; 4895 4896 if(m_iALFEncodePassReduction == 0) 4897 { 1975 4898 m_pcPicYuvTmp->copyToPicLuma(pcPicRest); 1976 #if MQT_ALF_NPASS 1977 } 1978 #endif 1979 4899 } 1980 4900 } 1981 1982 } 1983 1984 dMinMethodCost += adExtraCostReduction[iBestClassMethod]; 1985 1986 1987 m_varImg= m_varImgMethods[iBestClassMethod]; 4901 } 1988 4902 1989 4903 m_uiVarGenMethod = iBestClassMethod; 1990 1991 #if MQT_ALF_NPASS 1992 if(m_iALFEncodePassReduction) 1993 { 1994 1995 m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[iBestClassMethod]; 1996 1997 setInitialMask(pcPicOrg, pcPicDec); 1998 1999 m_pcBestAlfParam->alf_flag = 1; 2000 m_pcBestAlfParam->cu_control_flag = 0; 2001 m_pcBestAlfParam->chroma_idc = 0; 2002 m_pcBestAlfParam->alf_pcr_region_flag = iBestClassMethod; 2003 2004 m_pcBestAlfParam->tap = cFrmAlfParam[iBestClassMethod].tap; 2005 #if TI_ALF_MAX_VSIZE_7 2006 m_pcBestAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap); 2007 m_pcBestAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap); 2008 #else 2009 m_pcBestAlfParam->num_coeff = (Int)tap*tap/4 + 2; 2010 #endif 2011 2012 xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcBestAlfParam, m_pcBestAlfParam->tap, LumaStride); 2013 2014 2015 2016 xCalcRDCost(pcPicOrg, pcPicRest, m_pcBestAlfParam, uiMinMethodRate, uiDist, dMinMethodCost); 2017 iMinMethodDist = (Int64)uiDist; 4904 dMinMethodCost += adExtraCostReduction[m_uiVarGenMethod]; 4905 m_varImg= m_varImgMethods[m_uiVarGenMethod]; 4906 4907 pcAlfParam = &(cFrmAlfParam[m_uiVarGenMethod]); 4908 4909 ALFParam cAlfParamWithBestMethod; 4910 allocALFParam(&cAlfParamWithBestMethod); 4911 4912 4913 if(m_iALFEncodePassReduction ==0) 4914 { 4915 copyALFParam(&cAlfParamWithBestMethod, pcAlfParam); 2018 4916 } 2019 4917 else 2020 4918 { 2021 #endif 2022 copyALFParam(m_pcBestAlfParam, &cFrmAlfParam[iBestClassMethod]); 2023 #if MQT_ALF_NPASS 2024 2025 } 2026 #endif 2027 2028 ruiMinRate = uiMinMethodRate; 2029 ruiMinDist = (UInt64)iMinMethodDist; 2030 rdMinCost = dMinMethodCost; 2031 2032 4919 cAlfParamWithBestMethod.alf_flag = 1; 4920 cAlfParamWithBestMethod.chroma_idc = 0; 4921 cAlfParamWithBestMethod.alf_pcr_region_flag = m_uiVarGenMethod; 4922 cAlfParamWithBestMethod.filter_shape= pcAlfParam->filter_shape; 4923 cAlfParamWithBestMethod.num_coeff = m_sqrFiltLengthTab[cAlfParamWithBestMethod.filter_shape]; 4924 decodeFilterSet(pcAlfParam, m_varIndTab, m_filterCoeffSym); 4925 if(!m_bUseNonCrossALF) 4926 { 4927 filterLuma(pRest, pDec, LumaStride, 0, m_img_height-1, 0, m_img_width-1, pcAlfParam->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg); 4928 } 4929 else 4930 { 4931 xfilterSlicesEncoder(pDec, pRest, LumaStride, pcAlfParam->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg); 4932 } 4933 xcodeFiltCoeff(m_filterCoeffSym, pcAlfParam->filter_shape, m_varIndTab, pcAlfParam->filters_per_group,&cAlfParamWithBestMethod); 4934 4935 xCalcRDCost(pcPicOrg, pcPicRest, &cAlfParamWithBestMethod, uiMinMethodRate, uiMinMethodDist, dMinMethodCost); 4936 4937 } 4938 4939 if(dMinMethodCost < rdMinCost ) 4940 { 4941 ruiMinRate = uiMinMethodRate; 4942 ruiMinDist = uiMinMethodDist; 4943 rdMinCost = dMinMethodCost; 4944 copyALFParam(m_pcBestAlfParam, &cAlfParamWithBestMethod); 4945 } 4946 4947 freeALFParam(&cAlfParamWithBestMethod); 2033 4948 for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++) 2034 4949 { 2035 4950 freeALFParam(&cFrmAlfParam[i]); 2036 4951 } 2037 2038 #else 2039 2040 #if MQT_ALF_NPASS 2041 setInitialMask(pcPicOrg, pcPicDec); 2042 #else 2043 for (Int i=0; i<Height; i++) 2044 { 2045 for (Int j=0; j<Width; j++) 2046 { 2047 m_maskImg[i][j] = 1; 2048 } 2049 } 2050 #if MTK_NONCROSS_INLOOP_FILTER 4952 } 4953 4954 4955 4956 Void TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(Pel* imgOrg, Pel* imgDec, Pel* imgRest, ALFParam* ALFp, Int filtNo, Int stride) 4957 { 2051 4958 if(!m_bUseNonCrossALF) 2052 calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride); 4959 { 4960 xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, imgOrg, imgDec, filtNo, stride); 4961 } 2053 4962 else 2054 calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride); 2055 #else 2056 calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride); 2057 #endif 2058 #endif 2059 2060 #if MQT_ALF_NPASS 2061 if(m_iALFEncodePassReduction) 2062 { 2063 xFirstFilteringFrameLumaAllTap(pOrg, pDec, pRest, LumaStride); 2064 } 2065 else 2066 #endif 2067 xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, m_pcTempAlfParam->tap, LumaStride); 2068 2069 xCalcRDCost(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost); // change this function final coding 2070 2071 if( dCost < rdMinCost) 2072 { 2073 ruiMinRate = uiRate; 2074 ruiMinDist = uiDist; 2075 rdMinCost = dCost; 2076 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 2077 } 2078 2079 #endif 2080 } 2081 2082 Void TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, ALFParam* ALFp, Int tap, Int Stride) 2083 { 2084 #if MTK_NONCROSS_INLOOP_FILTER 2085 if(!m_bUseNonCrossALF) 2086 xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, tap, Stride); 2087 else 2088 xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, tap, Stride); 2089 #else 2090 xstoreInBlockMatrix(ImgOrg, ImgDec, tap, Stride); 2091 #endif 2092 2093 2094 xFilteringFrameLuma_qc(ImgOrg, ImgDec, ImgRest, ALFp, tap, Stride); 2095 } 2096 2097 2098 #if MTK_NONCROSS_INLOOP_FILTER 2099 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride) 2100 #else 2101 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrix(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride) 2102 #endif 2103 { 2104 #if MQT_BA_RA 2105 Int var_step_size_w = VAR_SIZE_W; 2106 Int var_step_size_h = VAR_SIZE_H; 2107 #endif 2108 2109 Int i,j,k,l,varInd,ii,jj; 2110 Int x, y; 2111 Int fl =tap/2; 2112 #if TI_ALF_MAX_VSIZE_7 2113 Int flV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl); 2114 Int sqrFiltLength = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(tap); 2115 #else 2116 Int sqrFiltLength=(((tap*tap)/4 + 1) + 1); 2117 #endif 2118 Int fl2=9/2; //extended size at each side of the frame 4963 { 4964 xstoreInBlockMatrixforSlices(imgOrg, imgDec, filtNo, stride); 4965 } 4966 4967 4968 xFilteringFrameLuma(imgOrg, imgDec, imgRest, ALFp, filtNo, stride); 4969 } 4970 4971 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, Pel* pImgOrg, Pel* pImgPad, Int filtNo, Int stride) 4972 { 4973 4974 Pel regionOfInterested = (m_iDesignCurrentFilter ==1)?(1):(0); 4975 Int sqrFiltLength = (filtNo == 2)?((Int)(MAX_SQR_FILT_LENGTH)):(m_sqrFiltLengthTab[filtNo]); 4976 Int yposEnd = ypos + iheight -1; 4977 Int xposEnd = xpos + iwidth -1; 4978 Double ***EShape = m_EGlobalSym[filtNo]; 4979 Double **yShape = m_yGlobalSym[filtNo]; 4980 2119 4981 Int ELocal[MAX_SQR_FILT_LENGTH]; 2120 Int yLocal; 2121 Int *p_pattern; 2122 Int filtNo =2; 4982 Pel *pImgPad1, *pImgPad2, *pImgPad3, *pImgPad4; 4983 Int i,j,k,l,varInd, yLocal; 2123 4984 double **E,*yy; 2124 #if MTK_NONCROSS_INLOOP_FILTER 2125 static Int count_valid; 2126 #else 2127 Int count_valid=0; 2128 #endif 2129 if (tap==9) 2130 filtNo =0; 2131 else if (tap==7) 2132 filtNo =1; 2133 2134 p_pattern= m_patternTab[filtNo]; 2135 2136 #if MTK_NONCROSS_INLOOP_FILTER 4985 4986 static Int numValidPels; 2137 4987 if(bResetBlockMatrix) 2138 4988 { 2139 count_valid = 0; 2140 #endif 2141 memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS); 2142 for (varInd=0; varInd<NO_VAR_BINS; varInd++) 2143 { 2144 memset(m_yGlobalSym[filtNo][varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH); 2145 for (k=0; k<sqrFiltLength; k++) 2146 { 2147 memset(m_EGlobalSym[filtNo][varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH); 2148 } 2149 } 2150 for (i = fl2; i < m_im_height+fl2; i++) 2151 { 2152 for (j = fl2; j < m_im_width+fl2; j++) 2153 { 2154 if (m_maskImg[i-fl2][j-fl2] == 1) 2155 { 2156 count_valid++; 2157 } 2158 } 2159 } 2160 #if MTK_NONCROSS_INLOOP_FILTER 2161 } 2162 #endif 2163 2164 { 2165 #if MTK_NONCROSS_INLOOP_FILTER 2166 x = y = fl2; //cytsai: shall x, y be removed ? 2167 2168 for (i= ypos; i< ypos + iheight; i++) 2169 { 2170 for (j= xpos; j< xpos + iwidth; j++) 2171 { 2172 #else 2173 for (i=0,y=fl2; i<m_im_height; i++,y++) 2174 { 2175 for (j=0,x=fl2; j<m_im_width; j++,x++) 2176 { 2177 #endif 2178 #if MQT_ALF_NPASS 2179 Int condition = (m_maskImg[i][j] == 1); 2180 if (m_iDesignCurrentFilter) 2181 { 2182 condition = (m_maskImg[i][j] == 0 && count_valid > 0); 4989 numValidPels = 0; 4990 memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS); 4991 for (varInd=0; varInd<NO_VAR_BINS; varInd++) 4992 { 4993 memset(yShape[varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH); 4994 for (k=0; k<sqrFiltLength; k++) 4995 { 4996 memset(EShape[varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH); 4997 } 4998 } 4999 for (i = 0; i < m_img_height; i++) 5000 { 5001 for (j = 0; j < m_img_width; j++) 5002 { 5003 if (m_maskImg[i][j] == regionOfInterested) 5004 { 5005 numValidPels++; 2183 5006 } 2184 if(!condition) 2185 { 2186 #else 2187 if (m_maskImg[i][j] == 0 && count_valid > 0) 2188 { 2189 5007 } 5008 } 5009 } 5010 5011 Int yLineInLCU; 5012 Int paddingLine ; 5013 5014 pImgPad += (ypos* stride); 5015 pImgOrg += (ypos* stride); 5016 5017 switch(filtNo) 5018 { 5019 #if !ALF_SINGLE_FILTER_SHAPE 5020 case ALF_STAR5x5: 5021 { 5022 for (i= ypos; i<= yposEnd; i++) 5023 { 5024 yLineInLCU = i % m_lcuHeight; 5025 5026 if (yLineInLCU < m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height ) 5027 { 5028 pImgPad1 = pImgPad + stride; 5029 pImgPad2 = pImgPad - stride; 5030 pImgPad3 = pImgPad + 2*stride; 5031 pImgPad4 = pImgPad - 2*stride; 5032 } 5033 else if (yLineInLCU < m_lineIdxPadTop) 5034 { 5035 paddingLine = - yLineInLCU + m_lineIdxPadTop - 1; 5036 pImgPad1 = pImgPad + min(paddingLine, 1)*stride; 5037 pImgPad2 = pImgPad - stride; 5038 pImgPad3 = pImgPad + min(paddingLine, 2)*stride; 5039 pImgPad4 = pImgPad - 2*stride; 2190 5040 } 2191 5041 else 2192 5042 { 2193 #endif 2194 #if MQT_BA_RA 2195 varInd = m_varImg[i/var_step_size_h][j/var_step_size_w]; 2196 #else 2197 varInd=min(m_varImg[i][j], NO_VAR_BINS-1); 2198 #endif 2199 k=0; 2200 memset(ELocal, 0, sqrFiltLength*sizeof(int)); 2201 #if TI_ALF_MAX_VSIZE_7 2202 for (ii = -flV; ii < 0; ii++) 2203 #else 2204 for (ii=-fl; ii<0; ii++) 2205 #endif 5043 paddingLine = yLineInLCU - m_lineIdxPadTop; 5044 pImgPad1 = pImgPad + stride; 5045 pImgPad2 = pImgPad - min(paddingLine, 1)*stride; 5046 pImgPad3 = pImgPad + 2*stride; 5047 pImgPad4 = pImgPad - min(paddingLine, 2)*stride; 5048 } 5049 5050 if ( (yLineInLCU == m_lineIdxPadTop || yLineInLCU == m_lineIdxPadTop-1) && i-yLineInLCU+m_lcuHeight < m_img_height ) 5051 { 5052 pImgPad+= stride; 5053 pImgOrg+= stride; 5054 continue; 5055 } 5056 else 5057 { 5058 for (j= xpos; j<= xposEnd; j++) 5059 { 5060 if ( (m_maskImg[i][j] == regionOfInterested) || (numValidPels == 0) ) 2206 5061 { 2207 for (jj=-fl-ii; jj<=fl+ii; jj++) 2208 { 2209 ELocal[p_pattern[k++]]+=(ImgDec[(i+ii)*Stride + (j+jj)]+ImgDec[(i-ii)*Stride + (j-jj)]); 5062 varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W]; 5063 memset(ELocal, 0, 10*sizeof(Int)); 5064 5065 ELocal[0] = (pImgPad3[j+2] + pImgPad4[j-2]); 5066 ELocal[1] = (pImgPad3[j ] + pImgPad4[j ]); 5067 ELocal[2] = (pImgPad3[j-2] + pImgPad4[j+2]); 5068 5069 ELocal[3] = (pImgPad1[j+1] + pImgPad2[j-1]); 5070 ELocal[4] = (pImgPad1[j ] + pImgPad2[j ]); 5071 ELocal[5] = (pImgPad1[j-1] + pImgPad2[j+1]); 5072 5073 ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]); 5074 ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]); 5075 ELocal[8] = (pImgPad[j ]); 5076 5077 yLocal= pImgOrg[j]; 5078 m_pixAcc[varInd]+=(yLocal*yLocal); 5079 E= EShape[varInd]; 5080 yy= yShape[varInd]; 5081 5082 for (k=0; k<10; k++) 5083 { 5084 for (l=k; l<10; l++) 5085 { 5086 E[k][l]+=(double)(ELocal[k]*ELocal[l]); 5087 } 5088 yy[k]+=(double)(ELocal[k]*yLocal); 2210 5089 } 2211 } 2212 for (jj=-fl; jj<0; jj++) 2213 ELocal[p_pattern[k++]]+=(ImgDec[(i)*Stride + (j+jj)]+ImgDec[(i)*Stride + (j-jj)]); 2214 ELocal[p_pattern[k++]]+=ImgDec[(i)*Stride + (j)]; 2215 ELocal[sqrFiltLength-1]=1; 2216 yLocal=ImgOrg[(i)*Stride + (j)]; 2217 2218 m_pixAcc[varInd]+=(yLocal*yLocal); 2219 E= m_EGlobalSym[filtNo][varInd]; 2220 yy= m_yGlobalSym[filtNo][varInd]; 2221 2222 for (k=0; k<sqrFiltLength; k++) 2223 { 2224 for (l=k; l<sqrFiltLength; l++) 2225 E[k][l]+=(double)(ELocal[k]*ELocal[l]); 2226 yy[k]+=(double)(ELocal[k]*yLocal); 5090 2227 5091 } 2228 5092 } 2229 } 2230 } 2231 } 2232 2233 #if MTK_NONCROSS_INLOOP_FILTER 5093 pImgPad+= stride; 5094 pImgOrg+= stride; 5095 } 5096 } 5097 } 5098 break; 5099 case ALF_CROSS9x9: 5100 { 5101 Pel *pImgPad5, *pImgPad6, *pImgPad7, *pImgPad8; 5102 #else 5103 case ALF_CROSS9x7_SQUARE3x3: 5104 { 5105 Pel *pImgPad5, *pImgPad6; 5106 #endif 5107 for (i= ypos; i<= yposEnd; i++) 5108 { 5109 yLineInLCU = i % m_lcuHeight; 5110 5111 if (yLineInLCU<m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height) 5112 { 5113 pImgPad1 = pImgPad + stride; 5114 pImgPad2 = pImgPad - stride; 5115 pImgPad3 = pImgPad + 2*stride; 5116 pImgPad4 = pImgPad - 2*stride; 5117 pImgPad5 = pImgPad + 3*stride; 5118 pImgPad6 = pImgPad - 3*stride; 5119 #if !ALF_SINGLE_FILTER_SHAPE 5120 pImgPad7 = pImgPad + 4*stride; 5121 pImgPad8 = pImgPad - 4*stride; 5122 #endif 5123 } 5124 else if (yLineInLCU<m_lineIdxPadTop) 5125 { 5126 paddingLine = - yLineInLCU + m_lineIdxPadTop - 1; 5127 pImgPad1 = (paddingLine < 1) ? pImgPad : pImgPad + min(paddingLine, 1)*stride; 5128 pImgPad2 = (paddingLine < 1) ? pImgPad : pImgPad - stride; 5129 pImgPad3 = (paddingLine < 2) ? pImgPad : pImgPad + min(paddingLine, 2)*stride; 5130 pImgPad4 = (paddingLine < 2) ? pImgPad : pImgPad - 2*stride; 5131 pImgPad5 = (paddingLine < 3) ? pImgPad : pImgPad + min(paddingLine, 3)*stride; 5132 pImgPad6 = (paddingLine < 3) ? pImgPad : pImgPad - 3*stride; 5133 #if !ALF_SINGLE_FILTER_SHAPE 5134 pImgPad7 = (paddingLine < 4) ? pImgPad : pImgPad + min(paddingLine, 4)*stride; 5135 pImgPad8 = (paddingLine < 4) ? pImgPad : pImgPad - 4*stride; 5136 #endif 5137 } 5138 else 5139 { 5140 paddingLine = yLineInLCU - m_lineIdxPadTop; 5141 pImgPad1 = (paddingLine < 1) ? pImgPad : pImgPad + stride; 5142 pImgPad2 = (paddingLine < 1) ? pImgPad : pImgPad - min(paddingLine, 1)*stride; 5143 pImgPad3 = (paddingLine < 2) ? pImgPad : pImgPad + 2*stride; 5144 pImgPad4 = (paddingLine < 2) ? pImgPad : pImgPad - min(paddingLine, 2)*stride; 5145 pImgPad5 = (paddingLine < 3) ? pImgPad : pImgPad + 3*stride; 5146 pImgPad6 = (paddingLine < 3) ? pImgPad : pImgPad - min(paddingLine, 3)*stride; 5147 #if !ALF_SINGLE_FILTER_SHAPE 5148 pImgPad7 = (paddingLine < 4) ? pImgPad : pImgPad + 4*stride; 5149 pImgPad8 = (paddingLine < 4) ? pImgPad : pImgPad - min(paddingLine, 4)*stride; 5150 #endif 5151 } 5152 5153 for (j= xpos; j<= xposEnd; j++) 5154 { 5155 if ( (m_maskImg[i][j] == regionOfInterested) || (numValidPels == 0) ) 5156 { 5157 varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W]; 5158 5159 #if ALF_SINGLE_FILTER_SHAPE 5160 memset(ELocal, 0, (sqrFiltLength+1)*sizeof(Int)); 5161 5162 ELocal[0] = (pImgPad5[j]+pImgPad6[j]); 5163 ELocal[1] = (pImgPad3[j]+pImgPad4[j]); 5164 ELocal[2] = (pImgPad1[j-1]+pImgPad2[j+1]); 5165 ELocal[3] = (pImgPad1[j]+pImgPad2[j]); 5166 ELocal[4] = (pImgPad1[j+1]+pImgPad2[j-1]); 5167 ELocal[5] = (pImgPad[j+4]+pImgPad[j-4]); 5168 ELocal[6] = (pImgPad[j+3]+pImgPad[j-3]); 5169 ELocal[7] = (pImgPad[j+2]+pImgPad[j-2]); 5170 ELocal[8] = (pImgPad[j+1]+pImgPad[j-1]); 5171 ELocal[9] = (pImgPad[j ]); 5172 #else 5173 memset(ELocal, 0, 10*sizeof(Int)); 5174 5175 ELocal[0] = (pImgPad7[j] + pImgPad8[j]); 5176 5177 ELocal[1] = (pImgPad5[j] + pImgPad6[j]); 5178 5179 ELocal[2] = (pImgPad3[j] + pImgPad4[j]); 5180 5181 ELocal[3] = (pImgPad1[j] + pImgPad2[j]); 5182 5183 ELocal[4] = (pImgPad[j+4] + pImgPad[j-4]); 5184 ELocal[5] = (pImgPad[j+3] + pImgPad[j-3]); 5185 ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]); 5186 ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]); 5187 ELocal[8] = (pImgPad[j ] ); 5188 #endif 5189 yLocal= pImgOrg[j]; 5190 m_pixAcc[varInd]+=(yLocal*yLocal); 5191 E= EShape[varInd]; 5192 yy= yShape[varInd]; 5193 5194 #if ALF_SINGLE_FILTER_SHAPE 5195 for (k=0; k<(sqrFiltLength+1); k++) 5196 { 5197 for (l=k; l<(sqrFiltLength+1); l++) 5198 { 5199 E[k][l]+=(double)(ELocal[k]*ELocal[l]); 5200 } 5201 yy[k]+=(double)(ELocal[k]*yLocal); 5202 } 5203 #else 5204 for (k=0; k<10; k++) 5205 { 5206 for (l=k; l<10; l++) 5207 { 5208 E[k][l]+=(double)(ELocal[k]*ELocal[l]); 5209 } 5210 yy[k]+=(double)(ELocal[k]*yLocal); 5211 } 5212 #endif 5213 } 5214 } 5215 pImgPad+= stride; 5216 pImgOrg+= stride; 5217 } 5218 5219 } 5220 break; 5221 default: 5222 { 5223 printf("Not a supported filter shape\n"); 5224 assert(0); 5225 exit(1); 5226 } 5227 } 5228 2234 5229 if(bSymmCopyBlockMatrix) 2235 5230 { 2236 #endif 2237 2238 // Matrix EGlobalSeq is symmetric, only part of it is calculated 2239 for (varInd=0; varInd<NO_VAR_BINS; varInd++) 2240 { 2241 double **pE = m_EGlobalSym[filtNo][varInd]; 2242 for (k=1; k<sqrFiltLength; k++) 2243 { 2244 for (l=0; l<k; l++) 2245 { 2246 pE[k][l]=pE[l][k]; 2247 } 2248 } 2249 } 2250 #if MTK_NONCROSS_INLOOP_FILTER 2251 } 2252 #endif 2253 2254 } 2255 2256 Void TEncAdaptiveLoopFilter::xFilteringFrameLuma_qc(imgpel* ImgOrg, imgpel* imgY_pad, imgpel* ImgFilt, ALFParam* ALFp, Int tap, Int Stride) 2257 { 2258 int filtNo,filters_per_fr; 5231 for (varInd=0; varInd<NO_VAR_BINS; varInd++) 5232 { 5233 double **pE = EShape[varInd]; 5234 for (k=1; k<sqrFiltLength; k++) 5235 { 5236 for (l=0; l<k; l++) 5237 { 5238 pE[k][l]=pE[l][k]; 5239 } 5240 } 5241 } 5242 } 5243 } 5244 5245 5246 Void TEncAdaptiveLoopFilter::xFilteringFrameLuma(Pel* imgOrg, Pel* imgPad, Pel* imgFilt, ALFParam* ALFp, Int filtNo, Int stride) 5247 { 2259 5248 static double **ySym, ***ESym; 2260 int lambda_val = (Int) m_dLambdaLuma; 2261 lambda_val = lambda_val * (1<<(2*g_uiBitIncrement)); 2262 if (tap==9) 2263 filtNo =0; 2264 else if (tap==7) 2265 filtNo =1; 2266 else 2267 filtNo=2; 2268 5249 Int filters_per_fr; 5250 Int lambdaVal = (Int) m_dLambdaLuma; 5251 lambdaVal = lambdaVal * (1<<(2*g_uiBitIncrement)); 5252 2269 5253 ESym=m_EGlobalSym[filtNo]; 2270 5254 ySym=m_yGlobalSym[filtNo]; 2271 2272 xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr, 2273 m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val); 2274 2275 // g_filterCoeffPrevSelected = g_filterCoeffSym 2276 xcalcPredFilterCoeff(filtNo); 2277 2278 //filter the frame with g_filterCoeffPrevSelected 2279 #if MTK_NONCROSS_INLOOP_FILTER 5255 5256 xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambdaVal); 5257 2280 5258 if(!m_bUseNonCrossALF) 2281 xfilterFrame_en(0, 0, m_im_height, m_im_width, imgY_pad, ImgFilt, filtNo, Stride); 5259 { 5260 filterLuma(imgFilt, imgPad, stride, 0, m_img_height-1, 0, m_img_width-1, ALFp->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg); 5261 } 2282 5262 else 2283 xfilterSlices_en(imgY_pad, ImgFilt, filtNo, Stride); 2284 #else 2285 xfilterFrame_en(imgY_pad, ImgFilt, filtNo, Stride); 2286 #endif 2287 2288 xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,0, ALFp); 2289 } 2290 2291 #if MTK_NONCROSS_INLOOP_FILTER 2292 Void TEncAdaptiveLoopFilter::xfilterFrame_en(int ypos, int xpos, int iheight, int iwidth, imgpel* ImgDec, imgpel* ImgRest,int filtNo, int Stride) 2293 #else 2294 Void TEncAdaptiveLoopFilter::xfilterFrame_en(imgpel* ImgDec, imgpel* ImgRest,int filtNo, int Stride) 2295 #endif 2296 { 2297 #if MQT_BA_RA 2298 imgpel *imgY_rec = ImgDec; 2299 imgpel *p_imgY_pad, *p_imgY_pad0; 2300 int var_step_size_w = VAR_SIZE_W; 2301 int var_step_size_h = VAR_SIZE_H; 2302 int i,j,y,x; 2303 #else 2304 int i,j,ii,jj,y,x; 2305 #endif 2306 int *pattern; 2307 int fl, fl_temp, sqrFiltLength; 2308 int pixelInt; 2309 int offset = (1<<(NUM_BITS - 2)); 2310 2311 pattern=m_patternTab_filt[filtNo]; 2312 fl_temp=m_flTab[filtNo]; 2313 #if !MQT_BA_RA 2314 #if TI_ALF_MAX_VSIZE_7 2315 Int fl_tempV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl_temp); 2316 #endif 2317 #endif 2318 sqrFiltLength=MAX_SQR_FILT_LENGTH; fl=FILTER_LENGTH/2; 2319 2320 #if MTK_NONCROSS_INLOOP_FILTER 2321 for (y= ypos, i = fl+ ypos; i < ypos+ iheight+ fl; i++, y++) 2322 { 2323 for (x= xpos, j = fl+ xpos; j < xpos+ iwidth+ fl; j++, x++) 2324 { 2325 #else 2326 for (y=0, i = fl; i < m_im_height+fl; i++, y++) 2327 { 2328 for (x=0, j = fl; j < m_im_width+fl; j++, x++) 2329 { 2330 #endif 2331 #if MQT_BA_RA 2332 int varInd=m_varImg[y/var_step_size_h][x/var_step_size_w]; 2333 #else 2334 int varInd=m_varImg[i-fl][j-fl]; 2335 imgpel *im1,*im2; 2336 #endif 2337 int *coef = m_filterCoeffPrevSelected[varInd]; 2338 pattern=m_patternTab_filt[filtNo]; 2339 pixelInt= m_filterCoeffPrevSelected[varInd][sqrFiltLength-1]; 2340 2341 #if MQT_BA_RA 2342 if (filtNo == 2) //5x5 2343 { 2344 pixelInt += coef[22]* (imgY_rec[(i-fl+2)*Stride + j-fl]+imgY_rec[(i-fl-2)*Stride + j-fl]); 2345 2346 pixelInt += coef[30]* (imgY_rec[(i-fl+1)*Stride + j-fl+1]+imgY_rec[(i-fl-1)*Stride + j-fl-1]); 2347 pixelInt += coef[31]* (imgY_rec[(i-fl+1)*Stride + j-fl] +imgY_rec[(i-fl-1)*Stride + j-fl]); 2348 pixelInt += coef[32]* (imgY_rec[(i-fl+1)*Stride + j-fl-1]+imgY_rec[(i-fl-1)*Stride + j-fl+1]); 2349 2350 pixelInt += coef[38]* (imgY_rec[(i-fl)*Stride + j-fl-2]+imgY_rec[(i-fl)*Stride + j-fl+2]); 2351 pixelInt += coef[39]* (imgY_rec[(i-fl)*Stride + j-fl-1]+imgY_rec[(i-fl)*Stride + j-fl+1]); 2352 pixelInt += coef[40]* (imgY_rec[(i-fl)*Stride + j-fl]); 2353 } 2354 else if (filtNo == 1) //7x7 2355 { 2356 pixelInt += coef[13]* (imgY_rec[(i-fl+3)*Stride + j-fl]+imgY_rec[(i-fl-3)*Stride + j-fl]); 2357 2358 p_imgY_pad = imgY_rec + (i-fl+2)*Stride; 2359 p_imgY_pad0 = imgY_rec + (i-fl-2)*Stride; 2360 pixelInt += coef[21]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]); 2361 pixelInt += coef[22]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]); 2362 pixelInt += coef[23]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]); 2363 2364 p_imgY_pad = imgY_rec + (i-fl+1)*Stride; 2365 p_imgY_pad0 = imgY_rec + (i-fl-1)*Stride; 2366 pixelInt += coef[29]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]); 2367 pixelInt += coef[30]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]); 2368 pixelInt += coef[31]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]); 2369 pixelInt += coef[32]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]); 2370 pixelInt += coef[33]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]); 2371 2372 p_imgY_pad = imgY_rec + (i-fl)*Stride; 2373 pixelInt += coef[37]* (p_imgY_pad[j-fl+3]+p_imgY_pad[j-fl-3]); 2374 pixelInt += coef[38]* (p_imgY_pad[j-fl+2]+p_imgY_pad[j-fl-2]); 2375 pixelInt += coef[39]* (p_imgY_pad[j-fl+1]+p_imgY_pad[j-fl-1]); 2376 pixelInt += coef[40]* (p_imgY_pad[j-fl]); 2377 2378 } 2379 else 2380 { 2381 #if !TI_ALF_MAX_VSIZE_7 2382 pixelInt += coef[4]* (imgY_rec[(i-fl+4)*Stride + j-fl]+imgY_rec[(i-fl-4)*Stride + j-fl]); 2383 #endif 2384 p_imgY_pad = imgY_rec + (i-fl+3)*Stride; 2385 p_imgY_pad0 = imgY_rec + (i-fl-3)*Stride; 2386 pixelInt += coef[12]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]); 2387 pixelInt += coef[13]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]); 2388 pixelInt += coef[14]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]); 2389 2390 p_imgY_pad = imgY_rec + (i-fl+2)*Stride; 2391 p_imgY_pad0 = imgY_rec + (i-fl-2)*Stride; 2392 pixelInt += coef[20]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]); 2393 pixelInt += coef[21]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]); 2394 pixelInt += coef[22]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]); 2395 pixelInt += coef[23]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]); 2396 pixelInt += coef[24]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]); 2397 2398 p_imgY_pad = imgY_rec + (i-fl+1)*Stride; 2399 p_imgY_pad0 = imgY_rec + (i-fl-1)*Stride; 2400 pixelInt += coef[28]* (p_imgY_pad[j-fl+3]+p_imgY_pad0[j-fl-3]); 2401 pixelInt += coef[29]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]); 2402 pixelInt += coef[30]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]); 2403 pixelInt += coef[31]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]); 2404 pixelInt += coef[32]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]); 2405 pixelInt += coef[33]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]); 2406 pixelInt += coef[34]* (p_imgY_pad[j-fl-3]+p_imgY_pad0[j-fl+3]); 2407 2408 p_imgY_pad = imgY_rec + (i-fl)*Stride; 2409 pixelInt += coef[36]* (p_imgY_pad[j-fl+4]+p_imgY_pad[j-fl-4]); 2410 pixelInt += coef[37]* (p_imgY_pad[j-fl+3]+p_imgY_pad[j-fl-3]); 2411 pixelInt += coef[38]* (p_imgY_pad[j-fl+2]+p_imgY_pad[j-fl-2]); 2412 pixelInt += coef[39]* (p_imgY_pad[j-fl+1]+p_imgY_pad[j-fl-1]); 2413 pixelInt += coef[40]* (p_imgY_pad[j-fl]); 2414 2415 } 2416 #else 2417 2418 #if TI_ALF_MAX_VSIZE_7 2419 for (ii = -fl_tempV; ii < 0; ii++) 2420 #else 2421 for (ii=-fl_temp; ii<0; ii++) 2422 #endif 2423 { 2424 im1= &(ImgDec[(y+ii)*Stride + x-fl_temp-ii]); 2425 im2= &(ImgDec[(y-ii)*Stride + x+fl_temp+ii]); 2426 for (jj=-fl_temp-ii; jj<=fl_temp+ii; jj++,im1++,im2--) 2427 pixelInt+=((*im1+ *im2)*coef[*(pattern++)]); 2428 } 2429 im1= &(ImgDec[y*Stride + x-fl_temp]); 2430 im2= &(ImgDec[y*Stride + x+fl_temp]); 2431 for (jj=-fl_temp; jj<0; jj++,im1++,im2--) 2432 pixelInt+=((*im1+ *im2)*coef[*(pattern++)]); 2433 pixelInt+=(ImgDec[y*Stride + x]*coef[*(pattern++)]); 2434 #endif 2435 2436 pixelInt=(int)((pixelInt+offset) >> (NUM_BITS - 1)); 2437 ImgRest[y*Stride + x] = Clip3(0, g_uiIBDI_MAX, pixelInt); 2438 } 2439 } 2440 } 2441 2442 Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double **ySym, double ***ESym, double *pixAcc, int **filterCoeffSym, int **filterCoeffSymQuant, int filtNo, int *filters_per_fr_best, int varIndTab[], imgpel **imgY_rec, imgpel **varImg, imgpel **maskImg, imgpel **imgY_pad, double lambda_val) 2443 { 2444 int filters_per_fr, firstFilt, coded, forceCoeff0, 2445 interval[NO_VAR_BINS][2], intervalBest[NO_VAR_BINS][2]; 2446 int i, k, varInd; 2447 static double ***E_temp, **y_temp, *pixAcc_temp; 2448 static int **FilterCoeffQuantTemp; 2449 double error, lambda, lagrangian, lagrangianMin; 2450 5263 { 5264 xfilterSlicesEncoder(imgPad, imgFilt, stride, filtNo, m_filterCoeffSym, m_varIndTab, m_varImg); 5265 } 5266 5267 xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,ALFp); 5268 } 5269 #endif 5270 5271 #if LCU_SYNTAX_ALF 5272 Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double **ySym, double ***ESym, double *pixAcc, Int **filterCoeffSym, Int **filterCoeffSymQuant, Int filter_shape, Int *filters_per_fr_best, Int varIndTab[], Pel **imgY_rec, Pel **varImg, Pel **maskImg, Pel **imgY_pad, double lambda_val, Int numMaxFilters) 5273 #else 5274 Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double **ySym, double ***ESym, double *pixAcc, Int **filterCoeffSym, Int **filterCoeffSymQuant, Int filter_shape, Int *filters_per_fr_best, Int varIndTab[], Pel **imgY_rec, Pel **varImg, Pel **maskImg, Pel **imgY_pad, double lambda_val) 5275 #endif 5276 { 5277 Int filters_per_fr, firstFilt, interval[NO_VAR_BINS][2], intervalBest[NO_VAR_BINS][2]; 5278 int i; 5279 double lagrangian, lagrangianMin; 2451 5280 int sqrFiltLength; 2452 int * pattern, *patternMap, *weights;2453 int numBits,coeffBits;5281 int *weights; 5282 Int coeffBits; 2454 5283 double errorForce0CoeffTab[NO_VAR_BINS][2]; 2455 int codedVarBins[NO_VAR_BINS], createBistream /*, forceCoeff0 */; 2456 int usePrevFilt[NO_VAR_BINS], usePrevFiltDefault[NO_VAR_BINS]; 2457 static int first=0; 2458 2459 for (i = 0; i < NO_VAR_BINS; i++) 2460 usePrevFiltDefault[i]=usePrevFilt[i]=1; 2461 lambda = lambda_val; 2462 sqrFiltLength=MAX_SQR_FILT_LENGTH; 2463 2464 if (first==0) 2465 { 2466 initMatrix3D_double(&E_temp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH); 2467 initMatrix_double(&y_temp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); 2468 pixAcc_temp = (double *) calloc(NO_VAR_BINS, sizeof(double)); 2469 initMatrix_int(&FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); 2470 first=1; 2471 } 2472 2473 sqrFiltLength=m_sqrFiltLengthTab[filtNo]; 2474 Int fl = m_flTab[filtNo]; 2475 weights=m_weightsTab[filtNo]; 2476 patternMap=m_patternMapTab[filtNo]; 2477 pattern=m_patternTab[filtNo]; 2478 2479 memcpy(pixAcc_temp,pixAcc,sizeof(double)*NO_VAR_BINS); 2480 for (varInd=0; varInd<NO_VAR_BINS; varInd++) 2481 { 2482 memcpy(y_temp[varInd],ySym[varInd],sizeof(double)*sqrFiltLength); 2483 for (k=0; k<sqrFiltLength; k++) 2484 memcpy(E_temp[varInd][k],ESym[varInd][k],sizeof(double)*sqrFiltLength); 2485 } 2486 5284 5285 sqrFiltLength= m_sqrFiltLengthTab[filter_shape] ; 5286 weights = weightsTabShapes[filter_shape]; 5287 2487 5288 // zero all variables 2488 5289 memset(varIndTab,0,sizeof(int)*NO_VAR_BINS); 2489 5290 2490 5291 for(i = 0; i < NO_VAR_BINS; i++) 2491 5292 { 2492 memset(filterCoeffSym[i],0,sizeof(int)* MAX_SQR_FILT_LENGTH);2493 memset(filterCoeffSymQuant[i],0,sizeof(int)* MAX_SQR_FILT_LENGTH);2494 } 2495 5293 memset(filterCoeffSym[i],0,sizeof(int)*ALF_MAX_NUM_COEF); 5294 memset(filterCoeffSymQuant[i],0,sizeof(int)*ALF_MAX_NUM_COEF); 5295 } 5296 2496 5297 firstFilt=1; lagrangianMin=0; 2497 5298 filters_per_fr=NO_FILTERS; 2498 5299 2499 5300 while(filters_per_fr>=1) 2500 5301 { 2501 findFilterGroupingError(E_temp, y_temp, pixAcc_temp, interval, sqrFiltLength, filters_per_fr); 2502 findFilterCoeff(E_temp, y_temp, pixAcc_temp, filterCoeffSym, filterCoeffSymQuant, interval, 2503 varIndTab, sqrFiltLength, filters_per_fr, weights, numBits=NUM_BITS, errorForce0CoeffTab); 2504 lagrangian=xfindBestCoeffCodMethod(codedVarBins, &forceCoeff0, filterCoeffSymQuant, fl, 2505 sqrFiltLength, filters_per_fr, errorForce0CoeffTab, &error, lambda); 2506 2507 if (lagrangian<lagrangianMin || firstFilt==1) 5302 mergeFiltersGreedy(ySym, ESym, pixAcc, interval, sqrFiltLength, filters_per_fr); 5303 findFilterCoeff(ESym, ySym, pixAcc, filterCoeffSym, filterCoeffSymQuant, interval, 5304 varIndTab, sqrFiltLength, filters_per_fr, weights, errorForce0CoeffTab); 5305 5306 lagrangian=xfindBestCoeffCodMethod(filterCoeffSymQuant, filter_shape, sqrFiltLength, filters_per_fr, errorForce0CoeffTab, lambda_val); 5307 #if LCU_SYNTAX_ALF 5308 if (lagrangian<lagrangianMin || firstFilt==1 || filters_per_fr == numMaxFilters) 5309 #else 5310 if (lagrangian<lagrangianMin || firstFilt==1 || filters_per_fr == m_iALFMaxNumberFilters) 5311 #endif 2508 5312 { 2509 5313 firstFilt=0; … … 2515 5319 filters_per_fr--; 2516 5320 } 2517 2518 findFilterCoeff(E_temp, y_temp, pixAcc_temp, filterCoeffSym, filterCoeffSymQuant, intervalBest, 2519 varIndTab, sqrFiltLength, (*filters_per_fr_best), weights, numBits=NUM_BITS, errorForce0CoeffTab); 2520 2521 xfindBestCoeffCodMethod(codedVarBins, &forceCoeff0, filterCoeffSymQuant, fl, sqrFiltLength, 2522 (*filters_per_fr_best), errorForce0CoeffTab, &error, lambda); 2523 2524 coded=1; 2525 if (forceCoeff0==1 && (*filters_per_fr_best)==1) 2526 { 2527 coded=0; 2528 coeffBits = xcodeAuxInfo(-1, (*filters_per_fr_best), varIndTab, 0, createBistream=0,filtNo, m_tempALFp); 2529 } 2530 else 2531 { 2532 coeffBits = xcodeAuxInfo(filtNo, (*filters_per_fr_best), varIndTab, 0, createBistream=0,filtNo, m_tempALFp); 2533 } 2534 2535 if (forceCoeff0==0) 2536 { 2537 coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength, 2538 (*filters_per_fr_best), createBistream=0, m_tempALFp); 2539 } 2540 else 2541 { 2542 if ((*filters_per_fr_best)==1) 2543 { 2544 for(varInd=0; varInd<(*filters_per_fr_best); varInd++) 2545 { 2546 memset(filterCoeffSym[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH); 2547 memset(filterCoeffSymQuant[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH); 2548 } 2549 } 2550 else 2551 { 2552 coeffBits += xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength, 2553 (*filters_per_fr_best), codedVarBins, createBistream=0, m_tempALFp); 2554 2555 for(varInd=0; varInd<(*filters_per_fr_best); varInd++) 2556 { 2557 if (codedVarBins[varInd]==0) 2558 { 2559 memset(filterCoeffSym[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH); 2560 memset(filterCoeffSymQuant[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH); 2561 } 2562 } 2563 } 2564 } 2565 } 2566 2567 2568 Void TEncAdaptiveLoopFilter::xcalcPredFilterCoeff(int filtNo) 2569 { 2570 int *patternMap, varInd, i, k; 2571 2572 patternMap=m_patternMapTab[filtNo]; 2573 for(varInd=0; varInd<NO_VAR_BINS; ++varInd) 2574 { 2575 k=0; 2576 for(i = 0; i < MAX_SQR_FILT_LENGTH; i++) 2577 { 2578 if (patternMap[i]>0) 2579 { 2580 m_filterCoeffPrevSelected[varInd][i]=m_filterCoeffSym[m_varIndTab[varInd]][k]; 2581 k++; 2582 } 2583 else 2584 { 2585 m_filterCoeffPrevSelected[varInd][i]=0; 2586 } 2587 #if MQT_ALF_NPASS 2588 if (m_iALFEncodePassReduction && (!m_iUsePreviousFilter || !m_iDesignCurrentFilter)) 2589 { 2590 if((m_iCurrentPOC%m_iGOPSize) == 0) 2591 { 2592 m_aiFilterCoeffSaved[0][varInd][i] = m_aiFilterCoeffSaved[m_iGOPSize][varInd][i]; 2593 m_aiFilterCoeffSaved[m_iGOPSize][varInd][i] = m_filterCoeffPrevSelected[varInd][i]; 2594 } 2595 else 2596 { 2597 m_aiFilterCoeffSaved[m_iCurrentPOC%m_iGOPSize][varInd][i] = m_filterCoeffPrevSelected[varInd][i]; 2598 } 2599 } 2600 #endif 2601 } 2602 } 2603 } 2604 2605 #if MQT_ALF_NPASS 2606 UInt TEncAdaptiveLoopFilter::xcodeFiltCoeff(int **filterCoeffSymQuant, int filtNo, int varIndTab[], int filters_per_fr_best, int frNo, ALFParam* ALFp) 2607 #else 2608 Void TEncAdaptiveLoopFilter::xcodeFiltCoeff(int **filterCoeffSymQuant, int filtNo, int varIndTab[], int filters_per_fr_best, int frNo, ALFParam* ALFp) 2609 #endif 2610 { 2611 int varInd, forceCoeff0, codedVarBins[NO_VAR_BINS], coeffBits, createBistream, sqrFiltLength=m_sqrFiltLengthTab[filtNo], 2612 fl=m_flTab[filtNo], coded; 2613 2614 ALFp->filters_per_group_diff = filters_per_fr_best; 5321 #if !ALF_16_BA_GROUPS 5322 if ( (m_uiVarGenMethod == ALF_BA) && ((*filters_per_fr_best) > 1) ) 5323 { 5324 Int iLastFilter = (*filters_per_fr_best)-1; 5325 if (intervalBest[iLastFilter][0] == NO_VAR_BINS-1) 5326 { 5327 intervalBest[iLastFilter-1][1] = NO_VAR_BINS-1; 5328 (*filters_per_fr_best) = iLastFilter; 5329 } 5330 } 5331 #endif 5332 findFilterCoeff(ESym, ySym, pixAcc, filterCoeffSym, filterCoeffSymQuant, intervalBest, 5333 varIndTab, sqrFiltLength, (*filters_per_fr_best), weights, errorForce0CoeffTab); 5334 5335 5336 xfindBestCoeffCodMethod(filterCoeffSymQuant, filter_shape, sqrFiltLength, (*filters_per_fr_best), errorForce0CoeffTab, lambda_val); 5337 coeffBits = xcodeAuxInfo((*filters_per_fr_best), varIndTab, filter_shape, m_tempALFp); 5338 coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength, (*filters_per_fr_best), 0, m_tempALFp); 5339 5340 if( *filters_per_fr_best == 1) 5341 { 5342 ::memset(varIndTab, 0, sizeof(Int)*NO_VAR_BINS); 5343 } 5344 } 5345 5346 5347 /** code filter coefficients 5348 * \param filterCoeffSymQuant filter coefficients buffer 5349 * \param filtNo filter No. 5350 * \param varIndTab[] merge index information 5351 * \param filters_per_fr_best the number of filters used in this picture 5352 * \param frNo 5353 * \param ALFp ALF parameters 5354 * \returns bitrate 5355 */ 5356 UInt TEncAdaptiveLoopFilter::xcodeFiltCoeff(Int **filterCoeffSymQuant, Int filter_shape, Int varIndTab[], Int filters_per_fr_best, ALFParam* ALFp) 5357 { 5358 Int coeffBits; 5359 Int sqrFiltLength = m_sqrFiltLengthTab[filter_shape] ; 5360 2615 5361 ALFp->filters_per_group = filters_per_fr_best; 2616 2617 for(varInd=0; varInd<filters_per_fr_best; varInd++) 2618 { 2619 codedVarBins[varInd] = 1; 2620 } 2621 memcpy (ALFp->codedVarBins, codedVarBins, sizeof(int)*NO_VAR_BINS); 2622 forceCoeff0=0; 2623 for(varInd=0; varInd<filters_per_fr_best; varInd++) 2624 { 2625 if (codedVarBins[varInd] == 0) 2626 { 2627 forceCoeff0=1; 2628 break; 2629 } 2630 } 2631 2632 coded=1; 2633 if (forceCoeff0==1 && filters_per_fr_best==1) 2634 { 2635 coded=0; 2636 coeffBits = xcodeAuxInfo(-1, filters_per_fr_best, varIndTab, frNo, createBistream=1,filtNo, ALFp); 2637 } 2638 else 2639 { 2640 coeffBits = xcodeAuxInfo(filtNo, filters_per_fr_best, varIndTab, frNo, createBistream=1,filtNo, ALFp); 2641 } 2642 2643 ALFp->forceCoeff0 = forceCoeff0; 5362 5363 coeffBits = xcodeAuxInfo(filters_per_fr_best, varIndTab, filter_shape, ALFp); 5364 5365 2644 5366 ALFp->predMethod = 0; 2645 5367 ALFp->num_coeff = sqrFiltLength; 2646 ALFp->realfiltNo=filtNo; 5368 ALFp->filter_shape=filter_shape; 5369 2647 5370 if (filters_per_fr_best <= 1) 2648 5371 { 2649 ALFp->forceCoeff0 = 0;2650 5372 ALFp->predMethod = 0; 2651 5373 } 2652 2653 if (forceCoeff0==0) 2654 { 2655 coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength, 2656 filters_per_fr_best, createBistream=1, ALFp); 2657 } 2658 else if (filters_per_fr_best>1) 2659 { 2660 coeffBits += xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength, 2661 filters_per_fr_best, codedVarBins, createBistream=1, ALFp); 2662 } 2663 2664 #if MQT_ALF_NPASS 5374 5375 coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength, 5376 filters_per_fr_best, 1, ALFp); 5377 2665 5378 return (UInt)coeffBits; 2666 #endif 2667 } 2668 2669 2670 2671 #if TSB_ALF_HEADER 2672 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, ALFParam *pAlfParam) 2673 #else 2674 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist) 5379 } 5380 5381 Void TEncAdaptiveLoopFilter::getCtrlFlagsFromCU(AlfLCUInfo* pcAlfLCU, std::vector<UInt> *pvFlags, Int alfDepth, UInt maxNumSUInLCU) 5382 { 5383 const UInt startSU = pcAlfLCU->startSU; 5384 const UInt endSU = pcAlfLCU->endSU; 5385 const Bool bAllSUsInLCUInSameSlice = pcAlfLCU->bAllSUsInLCUInSameSlice; 5386 5387 TComDataCU* pcCU = pcAlfLCU->pcCU; 5388 UInt currSU, CUDepth, setDepth, ctrlNumSU; 5389 5390 currSU = startSU; 5391 5392 if(bAllSUsInLCUInSameSlice) 5393 { 5394 while(currSU < maxNumSUInLCU) 5395 { 5396 //depth of this CU 5397 CUDepth = pcCU->getDepth(currSU); 5398 5399 //choose the min. depth for ALF 5400 setDepth = (alfDepth < CUDepth)?(alfDepth):(CUDepth); 5401 ctrlNumSU = maxNumSUInLCU >> (setDepth << 1); 5402 5403 pvFlags->push_back(pcCU->getAlfCtrlFlag(currSU)); 5404 currSU += ctrlNumSU; 5405 } 5406 5407 return; 5408 } 5409 5410 5411 const UInt LCUX = pcCU->getCUPelX(); 5412 const UInt LCUY = pcCU->getCUPelY(); 5413 5414 Bool bFirst, bValidCU; 5415 UInt idx, LPelXSU, TPelYSU; 5416 5417 bFirst= true; 5418 while(currSU <= endSU) 5419 { 5420 //check picture boundary 5421 while(!( LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[currSU] ] < m_img_width ) || 5422 !( LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[currSU] ] < m_img_height ) 5423 ) 5424 { 5425 currSU++; 5426 5427 if(currSU >= maxNumSUInLCU || currSU > endSU) 5428 { 5429 break; 5430 } 5431 } 5432 5433 if(currSU >= maxNumSUInLCU || currSU > endSU) 5434 { 5435 break; 5436 } 5437 5438 //depth of this CU 5439 CUDepth = pcCU->getDepth(currSU); 5440 5441 //choose the min. depth for ALF 5442 setDepth = (alfDepth < CUDepth)?(alfDepth):(CUDepth); 5443 ctrlNumSU = maxNumSUInLCU >> (setDepth << 1); 5444 5445 if(bFirst) 5446 { 5447 if(currSU !=0 ) 5448 { 5449 currSU = ((UInt)(currSU/ctrlNumSU))* ctrlNumSU; 5450 } 5451 bFirst = false; 5452 } 5453 5454 bValidCU = false; 5455 for(idx = currSU; idx < currSU + ctrlNumSU; idx++) 5456 { 5457 if(idx < startSU || idx > endSU) 5458 { 5459 continue; 5460 } 5461 5462 LPelXSU = LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[idx] ]; 5463 TPelYSU = LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[idx] ]; 5464 5465 if( !( LPelXSU < m_img_width ) || !( TPelYSU < m_img_height ) ) 5466 { 5467 continue; 5468 } 5469 5470 bValidCU = true; 5471 } 5472 5473 if(bValidCU) 5474 { 5475 pvFlags->push_back(pcCU->getAlfCtrlFlag(currSU)); 5476 } 5477 5478 currSU += ctrlNumSU; 5479 } 5480 } 5481 5482 5483 /** set ALF CU control flags 5484 * \param [in] uiAlfCtrlDepth ALF CU control depth 5485 * \param [in] pcPicOrg picture of original signal 5486 * \param [in] pcPicDec picture before filtering 5487 * \param [in] pcPicRest picture after filtering 5488 * \param [out] ruiDist distortion after CU control 5489 * \param [in,out]vAlfCUCtrlParam ALF CU control parameters 5490 */ 5491 #if LCU_SYNTAX_ALF 5492 #if HHI_INTERVIEW_SKIP 5493 Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlags(UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam) 5494 #else 5495 Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlags(UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam) 5496 #endif 5497 #else 5498 #if HHI_INTERVIEW_SKIP 5499 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, TComPicYuv* pUsedPelMap, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam) 5500 #else 5501 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam) 5502 #endif 2675 5503 #endif 2676 5504 { 2677 5505 ruiDist = 0; 2678 #if TSB_ALF_HEADER 2679 pAlfParam->num_alf_cu_flag = 0; 2680 #endif 2681 2682 for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ ) 2683 { 2684 TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr ); 2685 #if TSB_ALF_HEADER 2686 xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, pAlfParam); 2687 #else 2688 xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist); 2689 #endif 2690 } 2691 } 2692 2693 #if TSB_ALF_HEADER 2694 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, ALFParam *pAlfParam) 2695 #else 2696 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist) 5506 std::vector<UInt> uiFlags; 5507 5508 //initial 5509 for(Int s=0; s< m_uiNumSlicesInPic; s++) 5510 { 5511 vAlfCUCtrlParam[s].cu_control_flag = 1; 5512 vAlfCUCtrlParam[s].alf_max_depth = uiAlfCtrlDepth; 5513 5514 vAlfCUCtrlParam[s].alf_cu_flag.reserve(m_uiNumCUsInFrame << ((g_uiMaxCUDepth-1)*2)); 5515 vAlfCUCtrlParam[s].alf_cu_flag.resize(0); 5516 } 5517 5518 //LCU-based on/off control 5519 for( UInt CUAddr = 0; CUAddr < m_pcPic->getNumCUsInFrame() ; CUAddr++ ) 5520 { 5521 TComDataCU* pcCU = m_pcPic->getCU( CUAddr ); 5522 #if LCU_SYNTAX_ALF 5523 #if HHI_INTERVIEW_SKIP 5524 setCUAlfCtrlFlag(pcCU, 0, 0, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, imgUsed, stride, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag); 5525 #else 5526 setCUAlfCtrlFlag(pcCU, 0, 0, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, stride, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag); 5527 #endif 5528 #else 5529 #if HHI_INTERVIEW_SKIP 5530 xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest,imgUsed, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag); 5531 #else 5532 xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag); 5533 #endif 5534 #endif 5535 } 5536 vAlfCUCtrlParam[0].num_alf_cu_flag = (UInt)(vAlfCUCtrlParam[0].alf_cu_flag.size()); 5537 5538 5539 if(m_uiNumSlicesInPic > 1) 5540 { 5541 //reset the first slice on/off flags 5542 vAlfCUCtrlParam[0].alf_cu_flag.resize(0); 5543 5544 //distribute on/off flags to slices 5545 std::vector<UInt> vCtrlFlags; 5546 vCtrlFlags.reserve(1 << ((g_uiMaxCUDepth-1)*2)); 5547 5548 for(Int s=0; s < m_uiNumSlicesInPic; s++) 5549 { 5550 if(!m_pcPic->getValidSlice(s)) 5551 { 5552 continue; 5553 } 5554 std::vector< AlfLCUInfo* >& vpAlfLCU = m_pvpAlfLCU[s]; 5555 for(Int i=0; i< vpAlfLCU.size(); i++) 5556 { 5557 //get on/off flags for one LCU 5558 vCtrlFlags.resize(0); 5559 getCtrlFlagsFromCU(vpAlfLCU[i], &vCtrlFlags, (Int)uiAlfCtrlDepth, m_pcPic->getNumPartInCU()); 5560 5561 for(Int k=0; k< vCtrlFlags.size(); k++) 5562 { 5563 vAlfCUCtrlParam[s].alf_cu_flag.push_back( vCtrlFlags[k]); 5564 } 5565 } //i (LCU) 5566 vAlfCUCtrlParam[s].num_alf_cu_flag = (UInt)(vAlfCUCtrlParam[s].alf_cu_flag.size()); 5567 } //s (Slice) 5568 } 5569 } 5570 5571 #if LCU_SYNTAX_ALF 5572 #if HHI_INTERVIEW_SKIP 5573 Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag) 5574 #else 5575 Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag) 5576 #endif 5577 #else 5578 #if HHI_INTERVIEW_SKIP 5579 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, TComPicYuv* pcUsedPelMap, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag) 5580 #else 5581 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag) 5582 #endif 2697 5583 #endif 2698 5584 { … … 2703 5589 UInt uiBPelY = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1; 2704 5590 2705 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->get Width() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )5591 if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 2706 5592 { 2707 5593 bBoundary = true; … … 2716 5602 uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ]; 2717 5603 2718 if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) ) 2719 #if TSB_ALF_HEADER 2720 xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, pAlfParam); 2721 #else 2722 xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist); 5604 if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) ) 5605 #if LCU_SYNTAX_ALF 5606 #if HHI_INTERVIEW_SKIP 5607 setCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, imgUsed, stride, ruiDist, vCUCtrlFlag); 5608 #else 5609 setCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, stride, ruiDist, vCUCtrlFlag); 5610 #endif 5611 #else 5612 #if HHI_INTERVIEW_SKIP 5613 xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, imgUsed, ruiDist, vCUCtrlFlag); 5614 #else 5615 xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, vCUCtrlFlag); 5616 #endif 2723 5617 #endif 2724 5618 } … … 2730 5624 return; 2731 5625 } 2732 5626 #if !LCU_SYNTAX_ALF 2733 5627 UInt uiCUAddr = pcCU->getAddr(); 5628 #endif 2734 5629 UInt64 uiRecSSD = 0; 2735 5630 UInt64 uiFiltSSD = 0; … … 2747 5642 uiBPelY = uiTPelY + iHeight - 1; 2748 5643 2749 if( uiRPelX >= pcCU->getSlice()->getSPS()->get Width() )2750 { 2751 iWidth = pcCU->getSlice()->getSPS()->get Width() - uiLPelX;5644 if( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) 5645 { 5646 iWidth = pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() - uiLPelX; 2752 5647 } 2753 5648 2754 if( uiBPelY >= pcCU->getSlice()->getSPS()->get Height() )2755 { 2756 iHeight = pcCU->getSlice()->getSPS()->get Height() - uiTPelY;5649 if( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) 5650 { 5651 iHeight = pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() - uiTPelY; 2757 5652 } 2758 5653 … … 2766 5661 } 2767 5662 5663 #if LCU_SYNTAX_ALF 5664 Int offset = uiTPelY*stride + uiLPelX; 5665 Pel* pOrg = imgOrg + offset; 5666 Pel* pRec = imgDec + offset; 5667 Pel* pFilt = imgRest + offset; 5668 5669 #if HHI_INTERVIEW_SKIP 5670 Pel* pUsed = NULL ; 5671 if( imgUsed ) 5672 { 5673 pUsed = imgUsed + offset; 5674 } 5675 uiRecSSD += xCalcSSD( pOrg, pRec, pUsed, iWidth, iHeight, stride ); 5676 uiFiltSSD += xCalcSSD( pOrg, pFilt, pUsed, iWidth, iHeight, stride ); 5677 #else 5678 uiRecSSD += xCalcSSD( pOrg, pRec, iWidth, iHeight, stride ); 5679 uiFiltSSD += xCalcSSD( pOrg, pFilt, iWidth, iHeight, stride ); 5680 #endif 5681 #else 2768 5682 Pel* pOrg = pcPicOrg->getLumaAddr(uiCUAddr, uiAbsPartIdx); 2769 5683 Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx); 2770 5684 Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx); 2771 5685 5686 #if HHI_INTERVIEW_SKIP 5687 Pel* pUsed = pcUsedPelMap->getLumaAddr(uiCUAddr, uiAbsPartIdx); 5688 uiRecSSD += xCalcSSD( pOrg, pRec, pUsed, iWidth, iHeight, pcPicOrg->getStride() ); 5689 uiFiltSSD += xCalcSSD( pOrg, pFilt, pUsed, iWidth, iHeight, pcPicOrg->getStride() ); 5690 #else 2772 5691 uiRecSSD += xCalcSSD( pOrg, pRec, iWidth, iHeight, pcPicOrg->getStride() ); 2773 5692 uiFiltSSD += xCalcSSD( pOrg, pFilt, iWidth, iHeight, pcPicOrg->getStride() ); 2774 5693 #endif 5694 #endif 2775 5695 if (uiFiltSSD < uiRecSSD) 2776 5696 { 2777 5697 ruiDist += uiFiltSSD; 2778 5698 pcCU->setAlfCtrlFlagSubParts(1, uiAbsPartIdx, uiSetDepth); 2779 #if TSB_ALF_HEADER 2780 pAlfParam->alf_cu_flag[pAlfParam->num_alf_cu_flag]=1; 2781 #endif 5699 vCUCtrlFlag.push_back(1); 5700 5701 #if LCU_SYNTAX_ALF 5702 for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(m_img_height-1)) ;i++) 5703 { 5704 for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(m_img_width-1)) ;j++) 5705 { 5706 #else 2782 5707 for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(pcPicOrg->getHeight()-1)) ;i++) 2783 5708 { 2784 5709 for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(pcPicOrg->getWidth()-1)) ;j++) 2785 5710 { 5711 #endif 2786 5712 m_maskImg[i][j]=1; 2787 5713 } … … 2792 5718 ruiDist += uiRecSSD; 2793 5719 pcCU->setAlfCtrlFlagSubParts(0, uiAbsPartIdx, uiSetDepth); 2794 #if TSB_ALF_HEADER 2795 pAlfParam->alf_cu_flag[pAlfParam->num_alf_cu_flag]=0; 2796 #endif 5720 vCUCtrlFlag.push_back(0); 5721 #if LCU_SYNTAX_ALF 5722 for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(m_img_height-1)) ;i++) 5723 { 5724 for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(m_img_width-1)) ;j++) 5725 { 5726 #else 2797 5727 for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(pcPicOrg->getHeight()-1)) ;i++) 2798 5728 { 2799 5729 for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(pcPicOrg->getWidth()-1)) ;j++) 2800 5730 { 5731 #endif 2801 5732 m_maskImg[i][j]=0; 2802 5733 } 2803 5734 } 2804 5735 } 2805 #if TSB_ALF_HEADER 2806 pAlfParam->num_alf_cu_flag++; 2807 #endif 2808 } 5736 } 5737 5738 #if !LCU_SYNTAX_ALF 2809 5739 2810 5740 Void TEncAdaptiveLoopFilter::xReDesignFilterCoeff_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, Bool bReadCorr) 2811 5741 { 2812 2813 Int tap = m_pcTempAlfParam->tap; 5742 Int tap = m_pcTempAlfParam->filter_shape; 2814 5743 Int LumaStride = pcPicOrg->getStride(); 2815 imgpel* pOrg = (imgpel*)pcPicOrg->getLumaAddr();2816 imgpel* pDec = (imgpel*)pcPicDec->getLumaAddr();2817 imgpel* pRest = (imgpel*)pcPicRest->getLumaAddr();5744 Pel* pOrg = pcPicOrg->getLumaAddr(); 5745 Pel* pDec = pcPicDec->getLumaAddr(); 5746 Pel* pRest = pcPicRest->getLumaAddr(); 2818 5747 xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, tap, LumaStride); 2819 5748 2820 } 5749 if (m_iALFEncodePassReduction) 5750 { 5751 if(!m_iUsePreviousFilter) 5752 { 5753 saveFilterCoeffToBuffer(m_filterCoeffSym, m_pcTempAlfParam->filters_per_group, m_varIndTab, m_pcTempAlfParam->alf_pcr_region_flag, tap); 5754 } 5755 } 5756 } 5757 2821 5758 Void TEncAdaptiveLoopFilter::xCUAdaptiveControl_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost) 2822 5759 { 2823 #if MQT_ALF_NPASS 2824 imgpel** maskImgTemp; 5760 if(!m_bAlfCUCtrlEnabled) return; 5761 Bool bChanged = false; 5762 std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_vBestAlfCUCtrlParam); 5763 5764 Pel** maskImgTemp; 2825 5765 2826 5766 if(m_iALFEncodePassReduction == 2) 2827 5767 { 2828 get_mem2Dpel(&maskImgTemp, m_im_height, m_im_width); 2829 } 2830 #endif 5768 initMatrix_Pel(&maskImgTemp, m_img_height, m_img_width); 5769 } 2831 5770 2832 5771 m_pcEntropyCoder->setAlfCtrl(true); … … 2843 5782 pcPicRest->copyToPicLuma(m_pcPicYuvTmp); 2844 5783 copyALFParam(m_pcTempAlfParam, &cFrmAlfParam); 2845 m_pcTempAlfParam->cu_control_flag = 1; 2846 2847 #if MQT_ALF_NPASS 5784 2848 5785 for (UInt uiRD = 0; uiRD <= m_iALFNumOfRedesign; uiRD++) 2849 #else2850 for (UInt uiRD = 0; uiRD <= ALF_NUM_OF_REDESIGN; uiRD++)2851 #endif2852 5786 { 2853 5787 if (uiRD) … … 2860 5794 Double dCost; 2861 5795 //m_pcPicYuvTmp: filtered signal, pcPicDec: orig reconst 2862 #if TSB_ALF_HEADER 2863 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here 2864 #else 2865 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here 2866 #endif 2867 2868 xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost); 2869 5796 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, vAlfCUCtrlParamTemp); 5797 xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost, &vAlfCUCtrlParamTemp); 2870 5798 if (dCost < rdMinCost) 2871 5799 { 5800 bChanged = true; 5801 m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp; 2872 5802 uiBestDepth = uiDepth; 2873 5803 rdMinCost = dCost; … … 2878 5808 //save maskImg 2879 5809 xCopyTmpAlfCtrlFlagsFrom(); 2880 #if MQT_ALF_NPASS2881 5810 if(m_iALFEncodePassReduction == 2) 2882 5811 { 2883 ::memcpy(maskImgTemp[0], m_maskImg[0], sizeof( imgpel)*m_im_height* m_im_width);5812 ::memcpy(maskImgTemp[0], m_maskImg[0], sizeof(Pel)*m_img_height* m_img_width); 2884 5813 } 2885 #endif 2886 } 2887 } 2888 } 2889 2890 if (m_pcBestAlfParam->cu_control_flag) 2891 { 2892 #if MQT_ALF_NPASS 5814 } 5815 } 5816 } 5817 5818 if(bChanged) 5819 { 2893 5820 if(m_iALFEncodePassReduction == 2) 2894 5821 { 2895 5822 UInt uiDepth = uiBestDepth; 2896 ::memcpy(m_maskImg[0], maskImgTemp[0], sizeof( imgpel)*m_im_height* m_im_width);5823 ::memcpy(m_maskImg[0], maskImgTemp[0], sizeof(Pel)*m_img_height* m_img_width); 2897 5824 xCopyTmpAlfCtrlFlagsTo(); 2898 5825 … … 2907 5834 UInt64 uiRate, uiDist; 2908 5835 Double dCost; 2909 2910 #if TSB_ALF_HEADER 2911 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here 2912 #else 2913 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here 2914 #endif 2915 2916 xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost); 2917 5836 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, vAlfCUCtrlParamTemp); 5837 xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost, &vAlfCUCtrlParamTemp); 2918 5838 if (dCost < rdMinCost) 2919 5839 { … … 2924 5844 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 2925 5845 xCopyTmpAlfCtrlFlagsFrom(); 2926 }2927 }2928 #endif 5846 m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp; 5847 } 5848 } 2929 5849 2930 5850 m_pcEntropyCoder->setAlfCtrl(true); 2931 5851 m_pcEntropyCoder->setMaxAlfCtrlDepth(uiBestDepth); 2932 5852 xCopyTmpAlfCtrlFlagsTo(); 5853 2933 5854 m_pcPicYuvBest->copyToPicLuma(pcPicRest);//copy m_pcPicYuvBest to pcPicRest 2934 5855 xCopyDecToRestCUs(pcPicDec, pcPicRest); //pcPicRest = pcPicDec … … 2941 5862 freeALFParam(&cFrmAlfParam); 2942 5863 2943 #if MQT_ALF_NPASS2944 5864 if(m_iALFEncodePassReduction == 2) 2945 5865 { 2946 free_mem2Dpel(maskImgTemp); 2947 } 2948 #endif 2949 } 2950 2951 2952 Void TEncAdaptiveLoopFilter::xFilterTapDecision_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost) 2953 { 2954 #if MQT_ALF_NPASS 2955 if(m_iALFEncodePassReduction) 2956 { 2957 return; // filter tap has been decided in xEncALFLuma_qc 2958 } 2959 #endif 2960 2961 // restriction for non-referenced B-slice 2962 if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0) 2963 { 2964 return; 2965 } 2966 2967 UInt64 uiRate, uiDist; 2968 Double dCost; 2969 2970 if (m_pcBestAlfParam->cu_control_flag) 2971 { 2972 xCopyTmpAlfCtrlFlagsFrom(); 2973 } 2974 2975 Bool bChanged = false; 2976 for (Int iTap = ALF_MIN_NUM_TAP; iTap <= ALF_MAX_NUM_TAP; iTap += 2) 2977 { 2978 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 2979 m_pcTempAlfParam->tap = iTap; 2980 #if TI_ALF_MAX_VSIZE_7 2981 m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap); 2982 m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap); 2983 #else 2984 m_pcTempAlfParam->num_coeff = (Int)(iTap*iTap/4) + 2; 2985 #endif 2986 2987 if (m_pcTempAlfParam->cu_control_flag) 2988 { 2989 xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, false); 2990 #if TSB_ALF_HEADER 2991 xSetCUAlfCtrlFlags_qc(m_pcEntropyCoder->getMaxAlfCtrlDepth(), pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); 2992 #else 2993 xSetCUAlfCtrlFlags_qc(m_pcEntropyCoder->getMaxAlfCtrlDepth(), pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); 2994 #endif 2995 xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost); 2996 } 2997 2998 else 2999 { 3000 Int Height = pcPicOrg->getHeight(); 3001 Int Width = pcPicOrg->getWidth(); 3002 for (Int i=0; i<Height; i++) 3003 { 3004 for (Int j=0; j<Width; j++) 3005 { 3006 m_maskImg[i][j] = 1; 3007 } 3008 } 3009 xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, false); 3010 3011 xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, m_pcTempAlfParam, uiRate, uiDist, dCost); 3012 } 3013 3014 if (dCost < rdMinCost) 3015 { 3016 rdMinCost = dCost; 3017 ruiMinDist = uiDist; 3018 ruiMinRate = uiRate; 3019 m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest); 3020 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 3021 bChanged = true; 3022 if (m_pcTempAlfParam->cu_control_flag) 3023 { 3024 xCopyTmpAlfCtrlFlagsFrom(); 3025 } 3026 } 3027 } 3028 3029 if (m_pcBestAlfParam->cu_control_flag) 3030 { 3031 xCopyTmpAlfCtrlFlagsTo(); 3032 if (bChanged) 3033 { 3034 m_pcPicYuvBest->copyToPicLuma(pcPicRest); 3035 xCopyDecToRestCUs(pcPicDec, pcPicRest); 3036 } 3037 } 3038 else if (m_pcBestAlfParam->tap > ALF_MIN_NUM_TAP) 3039 { 3040 m_pcPicYuvBest->copyToPicLuma(pcPicRest); 3041 } 3042 3043 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 3044 } 3045 5866 destroyMatrix_Pel(maskImgTemp); 5867 } 5868 } 5869 5870 #endif 3046 5871 3047 5872 #define ROUND(a) (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5)) … … 3050 5875 3051 5876 //Find filter coeff related 3052 Int TEncAdaptiveLoopFilter::gnsCholeskyDec( double **inpMatr, double outMatr[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], int noEq)5877 Int TEncAdaptiveLoopFilter::gnsCholeskyDec(Double **inpMatr, Double outMatr[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF], Int noEq) 3053 5878 { 3054 int 3055 i, j, k; /* Looping Variables */ 3056 double 3057 scale; /* scaling factor for each row */ 3058 double 3059 invDiag[MAX_SQR_FILT_LENGTH]; /* Vector of the inverse of diagonal entries of outMatr */ 3060 3061 3062 /* 3063 * Cholesky decomposition starts 3064 */ 5879 Int i, j, k; /* Looping Variables */ 5880 Double scale; /* scaling factor for each row */ 5881 Double invDiag[ALF_MAX_NUM_COEF]; /* Vector of the inverse of diagonal entries of outMatr */ 5882 5883 // Cholesky decomposition starts 3065 5884 3066 5885 for(i = 0; i < noEq; i++) … … 3069 5888 { 3070 5889 /* Compute the scaling factor */ 3071 scale=inpMatr[i][j]; 3072 if ( i > 0) for( k = i - 1 ; k >= 0 ; k--) 3073 scale -= outMatr[k][j] * outMatr[k][i]; 3074 5890 scale = inpMatr[i][j]; 5891 if ( i > 0) 5892 { 5893 for( k = i - 1 ; k >= 0 ; k--) 5894 { 5895 scale -= outMatr[k][j] * outMatr[k][i]; 5896 } 5897 } 3075 5898 /* Compute i'th row of outMatr */ 3076 if(i ==j)5899 if(i == j) 3077 5900 { 3078 5901 if(scale <= REG_SQR ) // if(scale <= 0 ) /* If inpMatr is singular */ 3079 5902 { 3080 return (0);5903 return 0; 3081 5904 } 3082 else /* Normal operation */ 3083 invDiag[i] = 1.0/(outMatr[i][i]=sqrt(scale)); 5905 else 5906 { 5907 /* Normal operation */ 5908 invDiag[i] = 1.0 / (outMatr[i][i] = sqrt(scale)); 5909 } 3084 5910 } 3085 5911 else 3086 5912 { 3087 outMatr[i][j] = scale *invDiag[i]; /* Upper triangular part */5913 outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part */ 3088 5914 outMatr[j][i] = 0.0; /* Lower triangular part set to 0 */ 3089 5915 } 3090 5916 } 3091 5917 } 3092 return(1); /* Signal that Cholesky factorization is successfully performed */ 3093 } 3094 3095 3096 Void TEncAdaptiveLoopFilter::gnsTransposeBacksubstitution(double U[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], double rhs[], double x[], int order) 3097 { 3098 int 3099 i,j; /* Looping variables */ 3100 double 3101 sum; /* Holds backsubstitution from already handled rows */ 5918 return 1; /* Signal that Cholesky factorization is successfully performed */ 5919 } 5920 5921 5922 Void TEncAdaptiveLoopFilter::gnsTransposeBacksubstitution(Double U[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF], Double rhs[], Double x[], Int order) 5923 { 5924 Int i,j; /* Looping variables */ 5925 Double sum; /* Holds backsubstitution from already handled rows */ 3102 5926 3103 5927 /* Backsubstitution starts */ 3104 x[0] = rhs[0] /U[0][0]; /* First row of U' */5928 x[0] = rhs[0] / U[0][0]; /* First row of U' */ 3105 5929 for (i = 1; i < order; i++) 3106 5930 { /* For the rows 1..order-1 */ 3107 5931 3108 5932 for (j = 0, sum = 0.0; j < i; j++) /* Backsubst already solved unknowns */ 3109 sum += x[j]*U[j][i]; 3110 3111 x[i]=(rhs[i] - sum)/U[i][i]; /* i'th component of solution vect. */ 3112 } 3113 } 3114 3115 3116 3117 Void TEncAdaptiveLoopFilter::gnsBacksubstitution(double R[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], double z[MAX_SQR_FILT_LENGTH], int R_size, double A[MAX_SQR_FILT_LENGTH]) 3118 { 3119 int i, j; 3120 double sum; 5933 { 5934 sum += x[j] * U[j][i]; 5935 } 5936 x[i] = (rhs[i] - sum) / U[i][i]; /* i'th component of solution vect. */ 5937 } 5938 } 5939 5940 Void TEncAdaptiveLoopFilter::gnsBacksubstitution(Double R[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF], Double z[ALF_MAX_NUM_COEF], Int R_size, Double A[MAX_SQR_FILT_LENGTH]) 5941 { 5942 Int i, j; 5943 Double sum; 3121 5944 3122 5945 R_size--; … … 3126 5949 for (i = R_size-1; i >= 0; i--) 3127 5950 { 3128 for (j = i+1, sum = 0.0; j <= R_size; j++) 5951 for (j = i + 1, sum = 0.0; j <= R_size; j++) 5952 { 3129 5953 sum += R[i][j] * A[j]; 5954 } 3130 5955 3131 5956 A[i] = (z[i] - sum) / R[i][i]; … … 3134 5959 3135 5960 3136 Int TEncAdaptiveLoopFilter::gnsSolveByChol(double **LHS, double *rhs, double *x, int noEq) 3137 { 3138 double aux[MAX_SQR_FILT_LENGTH]; /* Auxiliary vector */ 3139 double U[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH]; /* Upper triangular Cholesky factor of LHS */ 3140 int i, singular; /* Looping variable */ 5961 Int TEncAdaptiveLoopFilter::gnsSolveByChol(Double **LHS, Double *rhs, Double *x, Int noEq) 5962 { 5963 assert(noEq > 0); 5964 5965 Double aux[ALF_MAX_NUM_COEF]; /* Auxiliary vector */ 5966 Double U[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF]; /* Upper triangular Cholesky factor of LHS */ 5967 Int i, singular; /* Looping variable */ 3141 5968 3142 5969 /* The equation to be solved is LHSx = rhs */ … … 3145 5972 if(gnsCholeskyDec(LHS, U, noEq)) /* If Cholesky decomposition has been successful */ 3146 5973 { 3147 singular =1;5974 singular = 1; 3148 5975 /* Now, the equation is U'*U*x = rhs, where U is upper triangular 3149 5976 * Solve U'*aux = rhs for aux … … 3157 5984 else /* LHS was singular */ 3158 5985 { 3159 singular =0;5986 singular = 0; 3160 5987 3161 5988 /* Regularize LHS */ 3162 for(i=0; i<noEq; i++) 5989 for(i=0; i < noEq; i++) 5990 { 3163 5991 LHS[i][i] += REG; 5992 } 3164 5993 /* Compute upper triangular U such that U'*U = regularized LHS */ 3165 5994 singular = gnsCholeskyDec(LHS, U, noEq); 3166 /* Solve U'*aux = rhs for aux */ 3167 gnsTransposeBacksubstitution(U, rhs, aux, noEq); 3168 3169 /* Solve U*x = aux for x */ 3170 gnsBacksubstitution(U, aux, noEq, x); 5995 if ( singular == 1 ) 5996 { 5997 /* Solve U'*aux = rhs for aux */ 5998 gnsTransposeBacksubstitution(U, rhs, aux, noEq); 5999 6000 /* Solve U*x = aux for x */ 6001 gnsBacksubstitution(U, aux, noEq, x); 6002 } 6003 else 6004 { 6005 x[0] = 1.0; 6006 for (i = 1; i < noEq; i++ ) 6007 { 6008 x[i] = 0.0; 6009 } 6010 } 3171 6011 } 3172 return(singular); 3173 } 3174 3175 3176 ////////////////////////////////////////////////////////////////////////////////////////// 3177 3178 3179 Void TEncAdaptiveLoopFilter::add_A(double **Amerged, double ***A, int start, int stop, int size) 6012 return singular; 6013 } 6014 6015 Void TEncAdaptiveLoopFilter::add_A(Double **Amerged, Double ***A, Int start, Int stop, Int size) 3180 6016 { 3181 int 3182 i, j, ind; /* Looping variable */ 3183 3184 for (i=0; i<size; i++) 3185 { 3186 for (j=0; j<size; j++) 3187 { 3188 Amerged[i][j]=0; 3189 for (ind=start; ind<=stop; ind++) 3190 { 3191 Amerged[i][j]+=A[ind][i][j]; 3192 } 3193 } 3194 } 3195 } 3196 3197 Void TEncAdaptiveLoopFilter::add_b(double *bmerged, double **b, int start, int stop, int size) 6017 Int i, j, ind; /* Looping variable */ 6018 6019 for (i = 0; i < size; i++) 6020 { 6021 for (j = 0; j < size; j++) 6022 { 6023 Amerged[i][j] = 0; 6024 for (ind = start; ind <= stop; ind++) 6025 { 6026 Amerged[i][j] += A[ind][i][j]; 6027 } 6028 } 6029 } 6030 } 6031 6032 Void TEncAdaptiveLoopFilter::add_b(Double *bmerged, Double **b, Int start, Int stop, Int size) 3198 6033 { 3199 int 3200 i, ind; /* Looping variable */ 3201 3202 for (i=0; i<size; i++) 3203 { 3204 bmerged[i]=0; 3205 for (ind=start; ind<=stop; ind++) 3206 { 3207 bmerged[i]+=b[ind][i]; 3208 } 3209 } 3210 } 3211 3212 double TEncAdaptiveLoopFilter::calculateErrorCoeffProvided(double **A, double *b, double *c, int size) 3213 { 3214 int i, j; 3215 double error, sum=0; 3216 3217 error=0; 3218 for (i=0; i<size; i++) //diagonal 3219 { 3220 sum=0; 3221 for (j=i+1; j<size; j++) 3222 sum+=(A[j][i]+A[i][j])*c[j]; 3223 error+=(A[i][i]*c[i]+sum-2*b[i])*c[i]; 3224 } 3225 3226 return(error); 3227 } 3228 3229 double TEncAdaptiveLoopFilter::calculateErrorAbs(double **A, double *b, double y, int size) 3230 { 3231 int i; 3232 double error, sum; 3233 double c[MAX_SQR_FILT_LENGTH]; 6034 Int i, ind; /* Looping variable */ 6035 6036 for (i = 0; i < size; i++) 6037 { 6038 bmerged[i] = 0; 6039 for (ind = start; ind <= stop; ind++) 6040 { 6041 bmerged[i] += b[ind][i]; 6042 } 6043 } 6044 } 6045 6046 Double TEncAdaptiveLoopFilter::calculateErrorCoeffProvided(Double **A, Double *b, Double *c, Int size) 6047 { 6048 Int i, j; 6049 Double error, sum = 0; 6050 6051 error = 0; 6052 for (i = 0; i < size; i++) //diagonal 6053 { 6054 sum = 0; 6055 for (j = i + 1; j < size; j++) 6056 { 6057 sum += (A[j][i] + A[i][j]) * c[j]; 6058 } 6059 error += (A[i][i] * c[i] + sum - 2 * b[i]) * c[i]; 6060 } 6061 6062 return error; 6063 } 6064 6065 Double TEncAdaptiveLoopFilter::calculateErrorAbs(Double **A, Double *b, Double y, Int size) 6066 { 6067 Int i; 6068 Double error, sum; 6069 Double c[ALF_MAX_NUM_COEF]; 3234 6070 3235 6071 gnsSolveByChol(A, b, c, size); 3236 6072 3237 sum =0;3238 for (i =0; i<size; i++)3239 { 3240 sum +=c[i]*b[i];3241 } 3242 error =y-sum;3243 3244 return (error);3245 } 3246 3247 double TEncAdaptiveLoopFilter::mergeFiltersGreedy(double **yGlobalSeq, double ***EGlobalSeq, double *pixAccGlobalSeq, int intervalBest[NO_VAR_BINS][2], int sqrFiltLength, int noIntervals)3248 { 3249 int first, ind, ind1, ind2, i, j, bestToMerge ;3250 double error, error1, error2, errorMin;3251 static double pixAcc_temp, error_tab[NO_VAR_BINS],error_comb_tab[NO_VAR_BINS];3252 static int indexList[NO_VAR_BINS], available[NO_VAR_BINS], noRemaining;6073 sum = 0; 6074 for (i = 0; i < size; i++) 6075 { 6076 sum += c[i] * b[i]; 6077 } 6078 error = y - sum; 6079 6080 return error; 6081 } 6082 6083 Double TEncAdaptiveLoopFilter::mergeFiltersGreedy(Double **yGlobalSeq, Double ***EGlobalSeq, Double *pixAccGlobalSeq, Int intervalBest[NO_VAR_BINS][2], Int sqrFiltLength, Int noIntervals) 6084 { 6085 Int first, ind, ind1, ind2, i, j, bestToMerge ; 6086 Double error, error1, error2, errorMin; 6087 static Double pixAcc_temp, error_tab[NO_VAR_BINS],error_comb_tab[NO_VAR_BINS]; 6088 static Int indexList[NO_VAR_BINS], available[NO_VAR_BINS], noRemaining; 3253 6089 if (noIntervals == NO_FILTERS) 3254 6090 { 3255 noRemaining =NO_VAR_BINS;6091 noRemaining = NO_VAR_BINS; 3256 6092 for (ind=0; ind<NO_VAR_BINS; ind++) 3257 6093 { 3258 indexList[ind] =ind;3259 available[ind] =1;3260 m_pixAcc_merged[ind] =pixAccGlobalSeq[ind];3261 memcpy(m_y_merged[ind], yGlobalSeq[ind],sizeof(double)*sqrFiltLength);3262 for (i=0; i <sqrFiltLength; i++)3263 { 3264 memcpy(m_E_merged[ind][i], EGlobalSeq[ind][i],sizeof(double)*sqrFiltLength);6094 indexList[ind] = ind; 6095 available[ind] = 1; 6096 m_pixAcc_merged[ind] = pixAccGlobalSeq[ind]; 6097 memcpy(m_y_merged[ind], yGlobalSeq[ind], sizeof(Double)*sqrFiltLength); 6098 for (i=0; i < sqrFiltLength; i++) 6099 { 6100 memcpy(m_E_merged[ind][i], EGlobalSeq[ind][i], sizeof(Double)*sqrFiltLength); 3265 6101 } 3266 6102 } … … 3269 6105 if (noIntervals == NO_FILTERS) 3270 6106 { 3271 for (ind =0; ind<NO_VAR_BINS; ind++)3272 { 3273 error_tab[ind] =calculateErrorAbs(m_E_merged[ind], m_y_merged[ind], m_pixAcc_merged[ind], sqrFiltLength);3274 } 3275 for (ind =0; ind<NO_VAR_BINS-1; ind++)3276 { 3277 ind1 =indexList[ind];3278 ind2 =indexList[ind+1];6107 for (ind = 0; ind < NO_VAR_BINS; ind++) 6108 { 6109 error_tab[ind] = calculateErrorAbs(m_E_merged[ind], m_y_merged[ind], m_pixAcc_merged[ind], sqrFiltLength); 6110 } 6111 for (ind = 0; ind < NO_VAR_BINS - 1; ind++) 6112 { 6113 ind1 = indexList[ind]; 6114 ind2 = indexList[ind+1]; 3279 6115 3280 error1 =error_tab[ind1];3281 error2 =error_tab[ind2];6116 error1 = error_tab[ind1]; 6117 error2 = error_tab[ind2]; 3282 6118 3283 pixAcc_temp =m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];3284 for (i =0; i<sqrFiltLength; i++)3285 { 3286 m_y_temp[i] =m_y_merged[ind1][i]+m_y_merged[ind2][i];3287 for (j =0; j<sqrFiltLength; j++)3288 { 3289 m_E_temp[i][j] =m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];6119 pixAcc_temp = m_pixAcc_merged[ind1] + m_pixAcc_merged[ind2]; 6120 for (i = 0; i < sqrFiltLength; i++) 6121 { 6122 m_y_temp[i] = m_y_merged[ind1][i] + m_y_merged[ind2][i]; 6123 for (j = 0; j < sqrFiltLength; j++) 6124 { 6125 m_E_temp[i][j] = m_E_merged[ind1][i][j] + m_E_merged[ind2][i][j]; 3290 6126 } 3291 6127 } 3292 error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2; 3293 } 3294 } 3295 while (noRemaining>noIntervals) 3296 { 3297 errorMin=0; first=1; 6128 error_comb_tab[ind1] = calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength) - error1 - error2; 6129 } 6130 } 6131 while (noRemaining > noIntervals) 6132 { 6133 errorMin = 0; 6134 first = 1; 3298 6135 bestToMerge = 0; 3299 for (ind =0; ind<noRemaining-1; ind++)6136 for (ind = 0; ind < noRemaining - 1; ind++) 3300 6137 { 3301 6138 error = error_comb_tab[indexList[ind]]; 3302 if ((error <errorMin || first==1))3303 { 3304 errorMin =error;3305 bestToMerge =ind;3306 first =0;3307 } 3308 } 3309 ind1 =indexList[bestToMerge];3310 ind2 =indexList[bestToMerge+1];3311 m_pixAcc_merged[ind1] +=m_pixAcc_merged[ind2];3312 for (i =0; i<sqrFiltLength; i++)3313 { 3314 m_y_merged[ind1][i] +=m_y_merged[ind2][i];3315 for (j =0; j<sqrFiltLength; j++)3316 { 3317 m_E_merged[ind1][i][j] +=m_E_merged[ind2][i][j];3318 } 3319 } 3320 available[ind2] =0;6139 if ((error < errorMin || first == 1)) 6140 { 6141 errorMin = error; 6142 bestToMerge = ind; 6143 first = 0; 6144 } 6145 } 6146 ind1 = indexList[bestToMerge]; 6147 ind2 = indexList[bestToMerge+1]; 6148 m_pixAcc_merged[ind1] += m_pixAcc_merged[ind2]; 6149 for (i = 0; i < sqrFiltLength; i++) 6150 { 6151 m_y_merged[ind1][i] += m_y_merged[ind2][i]; 6152 for (j = 0; j < sqrFiltLength; j++) 6153 { 6154 m_E_merged[ind1][i][j] += m_E_merged[ind2][i][j]; 6155 } 6156 } 6157 available[ind2] = 0; 3321 6158 3322 6159 //update error tables 3323 error_tab[ind1] =error_comb_tab[ind1]+error_tab[ind1]+error_tab[ind2];6160 error_tab[ind1] = error_comb_tab[ind1] + error_tab[ind1] + error_tab[ind2]; 3324 6161 if (indexList[bestToMerge] > 0) 3325 6162 { 3326 ind1=indexList[bestToMerge-1]; 3327 ind2=indexList[bestToMerge]; 3328 error1=error_tab[ind1]; 3329 error2=error_tab[ind2]; 3330 pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2]; 6163 ind1 = indexList[bestToMerge-1]; 6164 ind2 = indexList[bestToMerge]; 6165 error1 = error_tab[ind1]; 6166 error2 = error_tab[ind2]; 6167 pixAcc_temp = m_pixAcc_merged[ind1] + m_pixAcc_merged[ind2]; 6168 for (i = 0; i < sqrFiltLength; i++) 6169 { 6170 m_y_temp[i] = m_y_merged[ind1][i] + m_y_merged[ind2][i]; 6171 for (j = 0; j < sqrFiltLength; j++) 6172 { 6173 m_E_temp[i][j] = m_E_merged[ind1][i][j] + m_E_merged[ind2][i][j]; 6174 } 6175 } 6176 error_comb_tab[ind1] = calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength) - error1 - error2; 6177 } 6178 if (indexList[bestToMerge+1] < NO_VAR_BINS - 1) 6179 { 6180 ind1 = indexList[bestToMerge]; 6181 ind2 = indexList[bestToMerge+2]; 6182 error1 = error_tab[ind1]; 6183 error2 = error_tab[ind2]; 6184 pixAcc_temp = m_pixAcc_merged[ind1] + m_pixAcc_merged[ind2]; 3331 6185 for (i=0; i<sqrFiltLength; i++) 3332 6186 { 3333 m_y_temp[i] =m_y_merged[ind1][i]+m_y_merged[ind2][i];3334 for (j=0; j <sqrFiltLength; j++)3335 { 3336 m_E_temp[i][j] =m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];6187 m_y_temp[i] = m_y_merged[ind1][i] + m_y_merged[ind2][i]; 6188 for (j=0; j < sqrFiltLength; j++) 6189 { 6190 m_E_temp[i][j] = m_E_merged[ind1][i][j] + m_E_merged[ind2][i][j]; 3337 6191 } 3338 6192 } 3339 error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2; 3340 } 3341 if (indexList[bestToMerge+1] < NO_VAR_BINS-1) 3342 { 3343 ind1=indexList[bestToMerge]; 3344 ind2=indexList[bestToMerge+2]; 3345 error1=error_tab[ind1]; 3346 error2=error_tab[ind2]; 3347 pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2]; 3348 for (i=0; i<sqrFiltLength; i++) 3349 { 3350 m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i]; 3351 for (j=0; j<sqrFiltLength; j++) 3352 { 3353 m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j]; 3354 } 3355 } 3356 error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2; 6193 error_comb_tab[ind1] = calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength) - error1 - error2; 3357 6194 } 3358 6195 3359 6196 ind=0; 3360 for (i =0; i<NO_VAR_BINS; i++)3361 { 3362 if (available[i] ==1)3363 { 3364 indexList[ind] =i;6197 for (i = 0; i < NO_VAR_BINS; i++) 6198 { 6199 if (available[i] == 1) 6200 { 6201 indexList[ind] = i; 3365 6202 ind++; 3366 6203 } … … 3369 6206 } 3370 6207 3371 3372 errorMin=0;3373 for (ind=0; ind<noIntervals; ind++)3374 {3375 errorMin+=error_tab[indexList[ind]];3376 }3377 3378 for (ind=0; ind<noIntervals-1; ind++)3379 {3380 intervalBest[ind][ 0]=indexList[ind]; intervalBest[ind][1]=indexList[ind+1]-1;3381 } 3382 3383 intervalBest[noIntervals-1][0] =indexList[noIntervals-1];3384 intervalBest[noIntervals-1][1] =NO_VAR_BINS-1;6208 errorMin = 0; 6209 for (ind = 0; ind < noIntervals; ind++) 6210 { 6211 errorMin += error_tab[indexList[ind]]; 6212 } 6213 6214 for (ind = 0; ind < noIntervals - 1; ind++) 6215 { 6216 intervalBest[ind][0] = indexList[ind]; 6217 intervalBest[ind][1] = indexList[ind+1] - 1; 6218 } 6219 6220 intervalBest[noIntervals-1][0] = indexList[noIntervals-1]; 6221 intervalBest[noIntervals-1][1] = NO_VAR_BINS-1; 3385 6222 3386 6223 return(errorMin); 3387 6224 } 3388 6225 3389 3390 3391 double TEncAdaptiveLoopFilter::findFilterGroupingError(double ***EGlobalSeq, double **yGlobalSeq, double *pixAccGlobalSeq, int intervalBest[NO_VAR_BINS][2], int sqrFiltLength, int filters_per_fr) 6226 Void TEncAdaptiveLoopFilter::roundFiltCoeff(Int *FilterCoeffQuan, Double *FilterCoeff, Int sqrFiltLength, Int factor) 6227 { 6228 Int i; 6229 Double diff; 6230 Int diffInt, sign; 6231 6232 for(i = 0; i < sqrFiltLength; i++) 6233 { 6234 sign = (FilterCoeff[i] > 0)? 1 : -1; 6235 diff = FilterCoeff[i] * sign; 6236 diffInt = (Int)(diff * (Double)factor + 0.5); 6237 FilterCoeffQuan[i] = diffInt * sign; 6238 } 6239 } 6240 6241 Double TEncAdaptiveLoopFilter::QuantizeIntegerFilterPP(Double *filterCoeff, Int *filterCoeffQuant, Double **E, Double *y, Int sqrFiltLength, Int *weights) 3392 6242 { 3393 6243 double error; 3394 3395 // find best filters for each frame group 3396 error = 0; 3397 error += mergeFiltersGreedy(yGlobalSeq, EGlobalSeq, pixAccGlobalSeq, intervalBest, sqrFiltLength, filters_per_fr); 3398 3399 return(error); 3400 } 3401 3402 3403 Void TEncAdaptiveLoopFilter::roundFiltCoeff(int *FilterCoeffQuan, double *FilterCoeff, int sqrFiltLength, int factor) 3404 { 3405 int i; 3406 double diff; 3407 int diffInt, sign; 3408 3409 for(i = 0; i < sqrFiltLength; i++) 3410 { 3411 sign = (FilterCoeff[i]>0) ? 1: -1; 3412 diff = FilterCoeff[i]*sign; 3413 diffInt = (int)(diff*(double)factor+0.5); 3414 FilterCoeffQuan[i] = diffInt*sign; 3415 } 3416 } 3417 3418 Double TEncAdaptiveLoopFilter::QuantizeIntegerFilterPP(double *filterCoeff, int *filterCoeffQuant, double **E, double *y, int sqrFiltLength, int *weights, int bit_depth) 3419 { 3420 double error; 3421 3422 int factor = (1<<(bit_depth-1)), i; 6244 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 6245 Int factor = (1<<(getAlfPrecisionBit(m_alfQP))); 6246 #else 6247 Int factor = (1<< ((Int)ALF_NUM_BIT_SHIFT) ); 6248 #endif 6249 Int i; 3423 6250 int quantCoeffSum, minInd, targetCoeffSumInt, k, diff; 3424 6251 double targetCoeffSum, errMin; … … 3464 6291 minInd=k; 3465 6292 } 3466 } // if (weights(k)<=diff) {3467 } // for (k=0; k<sqrFiltLength; k++) {6293 } // if (weights(k)<=diff) 6294 } // for (k=0; k<sqrFiltLength; k++) 3468 6295 filterCoeffQuant[minInd]--; 3469 6296 } … … 3491 6318 minInd=k; 3492 6319 } 3493 } // if (weights(k)<=diff) {3494 } // for (k=0; k<sqrFiltLength; k++) {6320 } // if (weights(k)<=diff) 6321 } // for (k=0; k<sqrFiltLength; k++) 3495 6322 filterCoeffQuant[minInd]++; 3496 6323 } … … 3510 6337 } 3511 6338 6339 checkFilterCoeffValue(filterCoeffQuant, sqrFiltLength, false); 6340 3512 6341 for (i=0; i<sqrFiltLength; i++) 3513 6342 { … … 3518 6347 return(error); 3519 6348 } 3520 3521 Double TEncAdaptiveLoopFilter::findFilterCoeff(double ***EGlobalSeq, double **yGlobalSeq, double *pixAccGlobalSeq, int **filterCoeffSeq, int **filterCoeffQuantSeq, int intervalBest[NO_VAR_BINS][2], int varIndTab[NO_VAR_BINS], int sqrFiltLength, int filters_per_fr, int *weights, int bit_depth, double errorTabForce0Coeff[NO_VAR_BINS][2]) 6349 Double TEncAdaptiveLoopFilter::findFilterCoeff(double ***EGlobalSeq, double **yGlobalSeq, double *pixAccGlobalSeq, int **filterCoeffSeq, int **filterCoeffQuantSeq, int intervalBest[NO_VAR_BINS][2], int varIndTab[NO_VAR_BINS], int sqrFiltLength, int filters_per_fr, int *weights, double errorTabForce0Coeff[NO_VAR_BINS][2]) 3522 6350 { 3523 6351 static double pixAcc_temp; … … 3536 6364 3537 6365 // Find coeffcients 3538 errorTabForce0Coeff[filtNo][1] = pixAcc_temp + QuantizeIntegerFilterPP(m_filterCoeff, m_filterCoeffQuant, m_E_temp, m_y_temp, sqrFiltLength, weights , bit_depth);6366 errorTabForce0Coeff[filtNo][1] = pixAcc_temp + QuantizeIntegerFilterPP(m_filterCoeff, m_filterCoeffQuant, m_E_temp, m_y_temp, sqrFiltLength, weights); 3539 6367 errorTabForce0Coeff[filtNo][0] = pixAcc_temp; 3540 6368 error += errorTabForce0Coeff[filtNo][1]; … … 3556 6384 } 3557 6385 3558 #if MQT_ALF_NPASS 6386 #if !LCU_SYNTAX_ALF 6387 6388 /** Save redesigned filter set to buffer 6389 * \param filterCoeffPrevSelected filter set buffer 6390 */ 6391 Void TEncAdaptiveLoopFilter::saveFilterCoeffToBuffer(Int **filterSet, Int numFilter, Int* mergeTable, Int mode, Int filtNo) 6392 { 6393 Int iBufferIndex = m_iCurrentPOC % m_iGOPSize; 6394 6395 static Bool bFirst = true; 6396 static Bool* pbFirstAccess; 6397 if(bFirst) 6398 { 6399 pbFirstAccess = new Bool[NUM_ALF_CLASS_METHOD]; 6400 for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++) 6401 { 6402 pbFirstAccess[i]= true; 6403 } 6404 bFirst = false; 6405 } 6406 6407 if(iBufferIndex == 0) 6408 { 6409 if(pbFirstAccess[mode]) 6410 { 6411 //store merge table 6412 ::memcpy(m_mergeTableSavedMethods[mode][m_iGOPSize], mergeTable, sizeof(Int)*NO_VAR_BINS); 6413 //store coefficients 6414 for(Int varInd=0; varInd< numFilter; varInd++) 6415 { 6416 ::memcpy(m_aiFilterCoeffSavedMethods[mode][m_iGOPSize][varInd],filterSet[varInd], sizeof(Int)*ALF_MAX_NUM_COEF ); 6417 } 6418 //store filter shape 6419 m_iPreviousFilterShapeMethods[mode][m_iGOPSize]= filtNo; 6420 6421 pbFirstAccess[mode] = false; 6422 } 6423 6424 6425 //store merge table 6426 ::memcpy(m_mergeTableSavedMethods[mode][0 ], m_mergeTableSavedMethods[mode][m_iGOPSize], sizeof(Int)*NO_VAR_BINS); 6427 ::memcpy(m_mergeTableSavedMethods[mode][m_iGOPSize], mergeTable, sizeof(Int)*NO_VAR_BINS); 6428 6429 //store coefficients 6430 for(Int varInd=0; varInd< NO_VAR_BINS; varInd++) 6431 { 6432 ::memcpy(m_aiFilterCoeffSavedMethods[mode][0][varInd],m_aiFilterCoeffSavedMethods[mode][m_iGOPSize][varInd], sizeof(Int)*ALF_MAX_NUM_COEF ); 6433 } 6434 6435 for(Int varInd=0; varInd< numFilter; varInd++) 6436 { 6437 ::memcpy(m_aiFilterCoeffSavedMethods[mode][m_iGOPSize][varInd],filterSet[varInd], sizeof(Int)*ALF_MAX_NUM_COEF ); 6438 } 6439 6440 //store filter shape 6441 m_iPreviousFilterShapeMethods[mode][0]= m_iPreviousFilterShapeMethods[mode][m_iGOPSize]; 6442 m_iPreviousFilterShapeMethods[mode][m_iGOPSize]= filtNo; 6443 } 6444 else 6445 { 6446 6447 //store merge table 6448 ::memcpy(m_mergeTableSavedMethods[mode][iBufferIndex], mergeTable, sizeof(Int)*NO_VAR_BINS); 6449 6450 //store coefficients 6451 for(Int varInd=0; varInd< numFilter; varInd++) 6452 { 6453 ::memcpy(m_aiFilterCoeffSavedMethods[mode][iBufferIndex][varInd],filterSet[varInd], sizeof(Int)*ALF_MAX_NUM_COEF ); 6454 } 6455 //store filter_shape 6456 m_iPreviousFilterShapeMethods[mode][iBufferIndex]= filtNo; 6457 6458 } 6459 } 6460 6461 6462 /** set initial m_maskImg with previous (time-delayed) filters 6463 * \param pcPicOrg original picture 6464 * \param pcPicDec reconstructed picture after deblocking 6465 */ 6466 Void TEncAdaptiveLoopFilter::setMaskWithTimeDelayedResults(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec) 6467 { 6468 6469 static Pel** bestImgMask; 6470 static Bool bFirst = true; 6471 if(bFirst) 6472 { 6473 initMatrix_Pel(&bestImgMask, m_img_height, m_img_width); 6474 bFirst = false; 6475 } 6476 6477 Pel* pDec = pcPicDec->getLumaAddr(); 6478 Pel* pOrg = pcPicOrg->getLumaAddr(); 6479 Pel* pRest = m_pcPicYuvTmp->getLumaAddr(); 6480 Int LumaStride = pcPicOrg->getStride(); 6481 Int*** pppCoeffSaved = m_aiFilterCoeffSavedMethods [m_uiVarGenMethod]; 6482 Int** ppMergeTableSaved = m_mergeTableSavedMethods [m_uiVarGenMethod]; 6483 Int* pFilterShapeSaved = m_iPreviousFilterShapeMethods[m_uiVarGenMethod]; 6484 Int iBufIdx; 6485 6486 UInt64 uiRate, uiDist; 6487 Double dCost, dMinCost = MAX_DOUBLE; 6488 ALFParam cAlfParam; 6489 allocALFParam(&cAlfParam); 6490 cAlfParam.alf_flag = 0; 6491 cAlfParam.chroma_idc = 0; 6492 6493 //filter frame with the previous time-delayed filters 6494 Int filtNo; 6495 Int maxDepth = (pcPicOrg->getWidth() < 1000) ?(2):(g_uiMaxCUDepth); 6496 m_pcEntropyCoder->setAlfCtrl(true); 6497 m_pcTempAlfParam->alf_flag = 1; 6498 m_pcTempAlfParam->alf_pcr_region_flag = m_uiVarGenMethod; 6499 6500 for (Int index=0; index<2; index++) 6501 { 6502 iBufIdx = setFilterIdx(index); 6503 filtNo = m_pcTempAlfParam->filter_shape = pFilterShapeSaved[iBufIdx]; 6504 #if ALF_SINGLE_FILTER_SHAPE 6505 assert(filtNo == ALF_CROSS9x7_SQUARE3x3); 6506 #else 6507 assert(filtNo == ALF_STAR5x5 || filtNo == ALF_CROSS9x9); 6508 #endif 6509 m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filtNo]; 6510 if(!m_bUseNonCrossALF) 6511 { 6512 filterLuma(pRest, pDec, LumaStride, 0, m_img_height-1, 0, m_img_width-1, filtNo, pppCoeffSaved[iBufIdx], ppMergeTableSaved[iBufIdx], m_varImg); 6513 } 6514 else 6515 { 6516 xfilterSlicesEncoder(pDec, pRest, LumaStride, filtNo, pppCoeffSaved[iBufIdx], ppMergeTableSaved[iBufIdx], m_varImg); 6517 } 6518 6519 for (UInt uiDepth = 0; uiDepth < maxDepth; uiDepth++) 6520 { 6521 m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth); 6522 std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_uiNumSlicesInPic); 6523 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, vAlfCUCtrlParamTemp); 6524 m_pcEntropyCoder->resetEntropy(); 6525 m_pcEntropyCoder->resetBits(); 6526 xEncodeCUAlfCtrlFlags(vAlfCUCtrlParamTemp); 6527 uiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 6528 dCost = (Double)(uiRate) * m_dLambdaLuma + (Double)(uiDist); 6529 6530 if (dCost < dMinCost) 6531 { 6532 dMinCost = dCost; 6533 copyALFParam(&cAlfParam, m_pcTempAlfParam); 6534 ::memcpy(bestImgMask[0], m_maskImg[0], sizeof(Pel)*m_img_height* m_img_width); 6535 } 6536 } 6537 } 6538 filtNo = cAlfParam.filter_shape; 6539 6540 6541 ::memcpy(m_maskImg[0], bestImgMask[0], sizeof(Pel)*m_img_height* m_img_width); 6542 6543 m_pcEntropyCoder->setAlfCtrl(false); 6544 m_pcEntropyCoder->setMaxAlfCtrlDepth(0); 6545 6546 // generate filters for future reference 6547 m_iDesignCurrentFilter = 0; 6548 6549 int filters_per_fr; 6550 int lambda_val = (Int)m_dLambdaLuma; 6551 6552 lambda_val = lambda_val * (1<<(2*g_uiBitIncrement)); 6553 6554 if(!m_bUseNonCrossALF) 6555 { 6556 xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, pOrg, pDec, cAlfParam.filter_shape, LumaStride); 6557 } 6558 else 6559 { 6560 xstoreInBlockMatrixforSlices(pOrg, pDec, cAlfParam.filter_shape, LumaStride); 6561 } 6562 xfindBestFilterVarPred(m_yGlobalSym[filtNo], m_EGlobalSym[filtNo], m_pixAcc, 6563 m_filterCoeffSym, m_filterCoeffSymQuant, 6564 filtNo, &filters_per_fr, 6565 m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val); 6566 6567 saveFilterCoeffToBuffer(m_filterCoeffSym, filters_per_fr, m_varIndTab, cAlfParam.alf_pcr_region_flag, filtNo); 6568 m_iDesignCurrentFilter = 1; 6569 6570 freeALFParam(&cAlfParam); 6571 6572 } 6573 6574 6575 /** set ALF encoding parameters 6576 * \param pcPic picture pointer 6577 */ 3559 6578 Void TEncAdaptiveLoopFilter::setALFEncodingParam(TComPic *pcPic) 3560 6579 { … … 3563 6582 m_iALFNumOfRedesign = 0; 3564 6583 m_iCurrentPOC = m_pcPic->getPOC(); 3565 3566 if((m_eSliceType == I_SLICE) || (m_iGOPSize==8 && (m_iCurrentPOC % 4 == 0))) 6584 if((pcPic->getSlice(0)->getSliceType() == I_SLICE) || (m_iGOPSize==8 && (m_iCurrentPOC % 4 == 0))) 3567 6585 { 3568 6586 m_iUsePreviousFilter = 0; … … 3581 6599 } 3582 6600 3583 Void TEncAdaptiveLoopFilter::xcalcPredFilterCoeffPrev(Int filtNo) 3584 { 3585 int varInd, i; 3586 3587 for(varInd=0; varInd<NO_VAR_BINS; ++varInd) 3588 { 3589 for(i = 0; i < MAX_SQR_FILT_LENGTH; i++) 3590 { 3591 m_filterCoeffPrevSelected[varInd][i]=m_aiFilterCoeffSaved[m_iFilterIdx][varInd][i]; 3592 } 3593 } 3594 } 3595 3596 Void TEncAdaptiveLoopFilter::setFilterIdx(Int index) 3597 { 6601 /** set filter buffer index 6602 * \param index the processing order of time-delayed filtering 6603 */ 6604 Int TEncAdaptiveLoopFilter::setFilterIdx(Int index) 6605 { 6606 Int iBufIdx; 6607 3598 6608 if (m_iGOPSize == 8) 3599 6609 { 3600 if ((m_iCurrentPOC % m_iGOPSize) == 0) 3601 { 3602 Int FiltTable[2] = {0, m_iGOPSize}; 3603 m_iFilterIdx = FiltTable[index]; 3604 } 3605 if ((m_iCurrentPOC % m_iGOPSize) == 4) 3606 { 3607 Int FiltTable[2] = {0, m_iGOPSize}; 3608 m_iFilterIdx = FiltTable[index]; 3609 } 3610 if ((m_iCurrentPOC % m_iGOPSize) == 2) 3611 { 3612 Int FiltTable[2] = {0, 4}; 3613 m_iFilterIdx = FiltTable[index]; 3614 } 3615 if ((m_iCurrentPOC % m_iGOPSize) == 6) 3616 { 3617 Int FiltTable[2] = {4, m_iGOPSize}; 3618 m_iFilterIdx = FiltTable[index]; 3619 } 3620 if ((m_iCurrentPOC % m_iGOPSize) == 1) 3621 { 3622 Int FiltTable[2] = {0, 2}; 3623 m_iFilterIdx = FiltTable[index]; 3624 } 3625 if ((m_iCurrentPOC % m_iGOPSize) == 3) 3626 { 3627 Int FiltTable[2] = {2, 4}; 3628 m_iFilterIdx = FiltTable[index]; 3629 } 3630 if ((m_iCurrentPOC % m_iGOPSize) == 5) 3631 { 3632 Int FiltTable[2] = {4, 6}; 3633 m_iFilterIdx = FiltTable[index]; 3634 } 3635 if ((m_iCurrentPOC % m_iGOPSize) == 7) 3636 { 3637 Int FiltTable[2] = {6, m_iGOPSize}; 3638 m_iFilterIdx = FiltTable[index]; 6610 switch(m_iCurrentPOC % m_iGOPSize) 6611 { 6612 case 0: 6613 { 6614 iBufIdx = (index == 0)?0:m_iGOPSize; 6615 } 6616 break; 6617 case 1: 6618 { 6619 iBufIdx = (index == 0)?0:2; 6620 } 6621 break; 6622 case 2: 6623 { 6624 iBufIdx = (index == 0)?0:4; 6625 } 6626 break; 6627 case 3: 6628 { 6629 iBufIdx = (index == 0)?2:4; 6630 } 6631 break; 6632 case 4: 6633 { 6634 iBufIdx = (index == 0)?0:m_iGOPSize; 6635 } 6636 break; 6637 case 5: 6638 { 6639 iBufIdx = (index == 0)?4:6; 6640 } 6641 break; 6642 case 6: 6643 { 6644 iBufIdx = (index == 0)?4:m_iGOPSize; 6645 } 6646 break; 6647 case 7: 6648 { 6649 iBufIdx = (index == 0)?6:m_iGOPSize; 6650 } 6651 break; 6652 default: 6653 { 6654 printf("error\n"); 6655 assert(0); 6656 } 3639 6657 } 3640 6658 } 3641 6659 else 3642 6660 { 3643 Int FiltTable[2] = {0, m_iGOPSize}; 3644 m_iFilterIdx = FiltTable[index]; 3645 } 3646 } 3647 6661 iBufIdx = (index == 0)?0:m_iGOPSize; 6662 } 6663 6664 return iBufIdx; 6665 } 6666 6667 6668 /** set initial m_maskImg 6669 * \param pcPicOrg original picture pointer 6670 * \param pcPicDec reconstructed picture pointer 6671 */ 3648 6672 Void TEncAdaptiveLoopFilter::setInitialMask(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec) 3649 6673 { … … 3651 6675 Int Width = pcPicOrg->getWidth(); 3652 6676 Int LumaStride = pcPicOrg->getStride(); 3653 imgpel* pDec = (imgpel*)pcPicDec->getLumaAddr(); 3654 3655 #if MTK_NONCROSS_INLOOP_FILTER 3656 if(!m_bUseNonCrossALF) 3657 calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride); 6677 Pel* pDec = pcPicDec->getLumaAddr(); 6678 6679 calcVar(m_varImg, pDec, LumaStride, m_uiVarGenMethod); 6680 6681 if(!m_iALFEncodePassReduction || !m_iUsePreviousFilter) 6682 { 6683 for(Int y=0; y<Height; y++) 6684 { 6685 for(Int x=0; x<Width; x++) 6686 { 6687 m_maskImg[y][x] = 1; 6688 } 6689 } 6690 } 3658 6691 else 3659 calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride); 3660 #else 3661 calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride); 3662 #endif 3663 3664 if(!m_iALFEncodePassReduction || !m_iUsePreviousFilter) 3665 { 3666 for(Int y=0; y<Height; y++) 3667 { 3668 for(Int x=0; x<Width; x++) 3669 { 3670 m_maskImg[y][x] = 1; 3671 } 3672 } 3673 } 3674 else 3675 { 3676 Int uiBestDepth=0; 3677 UInt64 uiRate, uiDist, uiMinRate, uiMinDist; 3678 Double dCost, dMinCost = MAX_DOUBLE; 3679 //imgpel* pOrg = (imgpel*)pcPicOrg->getLumaAddr(); 3680 imgpel* pRest = (imgpel*)m_pcPicYuvTmp->getLumaAddr(); 3681 3682 Int iTap = 9; 3683 Int filtNo = 0; 3684 m_pcTempAlfParam->cu_control_flag = 0; 3685 m_pcTempAlfParam->tap = iTap; 3686 #if TI_ALF_MAX_VSIZE_7 3687 m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap); 3688 m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(iTap); 3689 #else 3690 m_pcTempAlfParam->num_coeff = iTap*iTap/4 + 2; 3691 #endif 3692 3693 for (Int index=0; index<2; index++) 3694 { 3695 setFilterIdx(index); 3696 xcalcPredFilterCoeffPrev(filtNo); 3697 #if MTK_NONCROSS_INLOOP_FILTER 3698 if(!m_bUseNonCrossALF) 3699 xfilterFrame_en(0, 0, Height, Width, pDec, pRest, filtNo, LumaStride); 3700 else 3701 xfilterSlices_en(pDec, pRest, filtNo, LumaStride); 3702 #else 3703 xfilterFrame_en(pDec, pRest, filtNo, LumaStride); 3704 #endif 3705 xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, NULL, uiRate, uiDist, dCost); 3706 if (dCost < dMinCost) 3707 { 3708 dMinCost = dCost; 3709 uiMinDist = uiDist; 3710 uiMinRate = uiRate; 3711 m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest); 3712 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 3713 } 3714 } 3715 m_pcPicYuvBest->copyToPicLuma(m_pcPicYuvTmp); 3716 3717 m_pcEntropyCoder->setAlfCtrl(true); 3718 Int maxDepth = g_uiMaxCUDepth; 3719 if (pcPicOrg->getWidth() < 1000) maxDepth = 2; 3720 for (UInt uiDepth = 0; uiDepth < maxDepth; uiDepth++) 3721 { 3722 m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth); 3723 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 3724 m_pcTempAlfParam->cu_control_flag = 1; 3725 3726 #if TSB_ALF_HEADER 3727 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here 3728 #else 3729 xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here 3730 #endif 3731 m_pcEntropyCoder->resetEntropy(); 3732 m_pcEntropyCoder->resetBits(); 3733 xEncodeCUAlfCtrlFlags(); 3734 uiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 3735 dCost = (Double)(uiRate) * m_dLambdaLuma + (Double)(uiDist); 3736 3737 if (dCost < dMinCost) 3738 { 3739 uiBestDepth = uiDepth; 3740 dMinCost = dCost; 3741 uiMinDist = uiDist; 3742 uiMinRate = uiRate; 3743 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 3744 //save maskImg 3745 xCopyTmpAlfCtrlFlagsFrom(); 3746 } 3747 } 3748 3749 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 3750 m_iDesignCurrentFilter = 0; // design filter for subsequent slices 3751 xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, true); 3752 m_iDesignCurrentFilter = 1; 3753 3754 if (m_pcBestAlfParam->cu_control_flag) 3755 { 3756 m_pcEntropyCoder->setAlfCtrl(true); 3757 m_pcEntropyCoder->setMaxAlfCtrlDepth(uiBestDepth); 3758 xCopyTmpAlfCtrlFlagsTo(); 3759 } 3760 else 3761 { 3762 m_pcEntropyCoder->setAlfCtrl(false); 3763 m_pcEntropyCoder->setMaxAlfCtrlDepth(0); 3764 for(Int y=0; y<Height; y++) 3765 { 3766 for(Int x=0; x<Width; x++) 3767 { 3768 m_maskImg[y][x] = 1; 3769 } 3770 } 3771 } 3772 } 3773 } 3774 3775 #if MQT_BA_RA 3776 Void TEncAdaptiveLoopFilter::xFirstEstimateFilteringFrameLumaAllTap(imgpel* ImgOrg, imgpel* ImgDec, Int Stride, 3777 ALFParam* pcAlfSaved, 3778 Int* aiVarIndTabBest, 3779 Int** ppiBestCoeffSet, 3780 Int& ibestfiltNo, 3781 Int& ibestfilters_per_fr, 3782 Double** ppdBesty, 3783 Double*** pppdBestE, 3784 Double* pdBestpixAcc, 3785 UInt64& ruiRate, 3786 Int64& riDist, 3787 Double& rdCost 3788 ) 3789 #else 3790 Void TEncAdaptiveLoopFilter::xFirstFilteringFrameLumaAllTap(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, Int Stride) 3791 #endif 3792 { 3793 #if !MQT_BA_RA 3794 static Bool bFirst = true; 3795 static Int aiVarIndTabBest[NO_VAR_BINS]; 3796 #endif 6692 { 6693 setMaskWithTimeDelayedResults(pcPicOrg, pcPicDec); 6694 } 6695 } 6696 6697 6698 6699 /** Estimate RD cost of all filter size & store the best one 6700 * \param ImgOrg original picture 6701 * \param ImgDec reconstructed picture after deblocking 6702 * \param Sride line buffer size of picture buffer 6703 * \param pcAlfSaved the best Alf parameters 6704 * \returns ruiDist estimated distortion 6705 * \returns ruiRate required bits 6706 * \returns rdCost estimated R-D cost 6707 */ 6708 6709 Void TEncAdaptiveLoopFilter::decideFilterShapeLuma(Pel* ImgOrg, Pel* ImgDec, Int Stride, ALFParam* pcAlfSaved, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost) 6710 { 3797 6711 static Double **ySym, ***ESym; 3798 #if !MQT_BA_RA3799 static Int** ppiBestCoeffSet;3800 3801 if(bFirst)3802 {3803 initMatrix_int(&ppiBestCoeffSet, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);3804 bFirst = false;3805 }3806 #endif3807 3808 6712 Int lambda_val = ((Int) m_dLambdaLuma) * (1<<(2*g_uiBitIncrement)); 3809 #if MQT_BA_RA3810 6713 Int filtNo, filters_per_fr; 3811 #else3812 Int filtNo, ibestfiltNo=0, filters_per_fr, ibestfilters_per_fr=0;3813 #endif3814 6714 Int64 iEstimatedDist; 3815 6715 UInt64 uiRate; 3816 6716 Double dEstimatedCost, dEstimatedMinCost = MAX_DOUBLE;; 3817 Bool bMatrixBaseReady = false; 3818 m_iMatrixBaseFiltNo = 0; 3819 #if MQT_BA_RA 6717 6718 UInt uiBitShift = (g_uiBitIncrement<<1); 6719 Int64 iEstimateDistBeforeFilter; 6720 Int* coeffNoFilter[NUM_ALF_FILTER_SHAPE][NO_VAR_BINS]; 6721 for(Int filter_shape = 0; filter_shape < NUM_ALF_FILTER_SHAPE; filter_shape++) 6722 { 6723 for(Int i=0; i< NO_VAR_BINS; i++) 6724 { 6725 coeffNoFilter[filter_shape][i]= new Int[ALF_MAX_NUM_COEF]; 6726 ::memset(coeffNoFilter[filter_shape][i], 0, sizeof(Int)*ALF_MAX_NUM_COEF); 6727 coeffNoFilter[filter_shape][i][ m_sqrFiltLengthTab[filter_shape]-1 ] = (1 << ((Int)ALF_NUM_BIT_SHIFT)); 6728 } 6729 } 6730 3820 6731 m_pcTempAlfParam->alf_flag = 1; 3821 m_pcTempAlfParam->cu_control_flag = 0;3822 6732 m_pcTempAlfParam->chroma_idc = 0; 3823 #endif 3824 for(Int iTap = ALF_MAX_NUM_TAP; iTap>=ALF_MIN_NUM_TAP; iTap -= 2) 3825 { 3826 m_pcTempAlfParam->tap = iTap; 3827 #if TI_ALF_MAX_VSIZE_7 3828 m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap); 3829 m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(iTap); 3830 #else 3831 m_pcTempAlfParam->num_coeff = iTap*iTap/4 + 2; 3832 #endif 3833 if (iTap==9) 3834 { 3835 filtNo = 0; 3836 } 3837 else if (iTap==7) 3838 { 3839 filtNo = 1; 3840 } 3841 else 3842 { 3843 filtNo = 2; 3844 } 6733 m_pcTempAlfParam->alf_pcr_region_flag = m_uiVarGenMethod; 6734 6735 for (int filter_shape = 0; filter_shape < NUM_ALF_FILTER_SHAPE ;filter_shape ++) 6736 { 6737 m_pcTempAlfParam->filter_shape = filtNo = filter_shape; 6738 m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filtNo] ; 3845 6739 3846 6740 ESym = m_EGlobalSym [filtNo]; 3847 6741 ySym = m_yGlobalSym [filtNo]; 3848 6742 3849 if( bMatrixBaseReady ) 3850 { 3851 xretriveBlockMatrix(m_pcTempAlfParam->num_coeff, m_iTapPosTabIn9x9Sym[filtNo], 3852 m_EGlobalSym[m_iMatrixBaseFiltNo], ESym, 3853 m_yGlobalSym[m_iMatrixBaseFiltNo], ySym); 3854 6743 if(!m_bUseNonCrossALF) 6744 { 6745 xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, ImgOrg, ImgDec, filter_shape, Stride); 3855 6746 } 3856 6747 else 3857 #if MTK_NONCROSS_INLOOP_FILTER 3858 { 3859 if(!m_bUseNonCrossALF) 3860 xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, iTap, Stride); 3861 else 3862 xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, iTap, Stride); 3863 } 3864 #else 3865 xstoreInBlockMatrix(ImgOrg, ImgDec, iTap, Stride); 3866 #endif 3867 if(filtNo == m_iMatrixBaseFiltNo) 3868 { 3869 bMatrixBaseReady = true; 3870 } 3871 3872 xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr, 3873 m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val); 3874 3875 uiRate = xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,0, m_pcTempAlfParam); 6748 { 6749 xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, filter_shape, Stride); 6750 } 6751 xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr, m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val); 6752 6753 //estimate R-D cost 6754 uiRate = xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr, m_pcTempAlfParam); 3876 6755 iEstimatedDist = xEstimateFiltDist(filters_per_fr, m_varIndTab, ESym, ySym, m_filterCoeffSym, m_pcTempAlfParam->num_coeff); 6756 iEstimateDistBeforeFilter = xEstimateFiltDist(filters_per_fr, m_varIndTab, ESym, ySym, coeffNoFilter[filter_shape], m_pcTempAlfParam->num_coeff); 6757 iEstimatedDist -= iEstimateDistBeforeFilter; 3877 6758 dEstimatedCost = (Double)(uiRate) * m_dLambdaLuma + (Double)(iEstimatedDist); 3878 6759 … … 3880 6761 { 3881 6762 dEstimatedMinCost = dEstimatedCost; 3882 ibestfiltNo = filtNo; 3883 ibestfilters_per_fr = filters_per_fr; 3884 #if MQT_BA_RA 6763 copyALFParam(pcAlfSaved, m_pcTempAlfParam); 6764 iEstimatedDist += iEstimateDistBeforeFilter; 6765 6766 for(Int i=0; i< filters_per_fr; i++ ) 6767 { 6768 iEstimatedDist += (((Int64)m_pixAcc_merged[i]) >> uiBitShift); 6769 } 6770 ruiDist = (iEstimatedDist > 0)?((UInt64)iEstimatedDist):(0); 6771 rdCost = dEstimatedMinCost + (Double)(ruiDist); 3885 6772 ruiRate = uiRate; 3886 riDist = iEstimatedDist; 3887 rdCost = dEstimatedMinCost; 3888 3889 copyALFParam(pcAlfSaved, m_pcTempAlfParam); 3890 #else 3891 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 3892 #endif 3893 ::memcpy(aiVarIndTabBest, m_varIndTab, sizeof(Int)*NO_VAR_BINS); 3894 for(Int i=0; i< ibestfilters_per_fr; i++ ) 3895 { 3896 ::memcpy( ppiBestCoeffSet[i], m_filterCoeffSym[i], sizeof(Int) * m_pcTempAlfParam->num_coeff); 3897 } 3898 } 3899 } 3900 3901 filtNo = ibestfiltNo; 3902 filters_per_fr = ibestfilters_per_fr; 3903 ::memcpy(m_varIndTab, aiVarIndTabBest, sizeof(Int)*NO_VAR_BINS); 3904 for(Int i=0; i< filters_per_fr; i++ ) 3905 { 3906 #if MQT_BA_RA 3907 ::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * pcAlfSaved->num_coeff); 3908 #else 3909 ::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * m_pcTempAlfParam->num_coeff); 3910 #endif 3911 } 3912 3913 xcalcPredFilterCoeff(filtNo); 3914 3915 #if MQT_BA_RA 3916 3917 3918 ::memset( pdBestpixAcc, 0,sizeof(double)*NO_VAR_BINS); 3919 for (Int varInd=0; varInd<NO_VAR_BINS; varInd++) 3920 { 3921 ::memset(ppdBesty[varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH); 3922 for (Int k=0; k< pcAlfSaved->num_coeff; k++) 3923 { 3924 ::memset(pppdBestE[varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH); 3925 } 3926 } 3927 ESym = m_EGlobalSym [filtNo]; 3928 ySym = m_yGlobalSym [filtNo]; 3929 3930 ::memcpy( pdBestpixAcc, m_pixAcc ,sizeof(double)*NO_VAR_BINS); 3931 for (Int varInd=0; varInd<NO_VAR_BINS; varInd++) 3932 { 3933 ::memcpy(ppdBesty[varInd],ySym[varInd],sizeof(double)*MAX_SQR_FILT_LENGTH); 3934 for (Int k=0; k< pcAlfSaved->num_coeff; k++) 3935 { 3936 ::memcpy(pppdBestE[varInd][k],ESym[varInd][k],sizeof(double)*MAX_SQR_FILT_LENGTH); 3937 } 3938 } 3939 3940 3941 #else 3942 3943 filtNo = ibestfiltNo; 3944 filters_per_fr = ibestfilters_per_fr; 3945 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 3946 ::memcpy(m_varIndTab, aiVarIndTabBest, sizeof(Int)*NO_VAR_BINS); 3947 for(Int i=0; i< filters_per_fr; i++ ) 3948 { 3949 ::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * m_pcTempAlfParam->num_coeff); 3950 } 3951 3952 xcalcPredFilterCoeff(filtNo); 3953 #if MTK_NONCROSS_INLOOP_FILTER 3954 if(!m_bUseNonCrossALF) 3955 xfilterFrame_en(0, 0, m_im_height, m_im_width, ImgDec, ImgRest, filtNo, Stride); 3956 else 3957 xfilterSlices_en(ImgDec, ImgRest, filtNo, Stride); 3958 #else 3959 xfilterFrame_en(ImgDec, ImgRest, filtNo, Stride); 3960 #endif 3961 3962 #endif 3963 } 3964 3965 Void TEncAdaptiveLoopFilter::xretriveBlockMatrix(Int iNumTaps, 3966 Int* piTapPosInMaxFilter, 3967 Double*** pppdEBase, Double*** pppdETarget, 3968 Double** ppdyBase, Double** ppdyTarget ) 3969 { 3970 Int varInd; 3971 Int i, j, r, c; 3972 3973 Double** ppdSrcE; 3974 Double** ppdDstE; 3975 Double* pdSrcy; 3976 Double* pdDsty; 3977 3978 for (varInd=0; varInd< NO_VAR_BINS; varInd++) 3979 { 3980 ppdSrcE = pppdEBase [varInd]; 3981 ppdDstE = pppdETarget[varInd]; 3982 pdSrcy = ppdyBase [varInd]; 3983 pdDsty = ppdyTarget [varInd]; 3984 3985 for(j=0; j< iNumTaps; j++) 3986 { 3987 r = piTapPosInMaxFilter[j]; 3988 3989 for(i=j; i< iNumTaps; i++) 3990 { 3991 c = piTapPosInMaxFilter[i]; 3992 3993 //auto-correlation retrieval 3994 ppdDstE[j][i] = ppdSrcE[r][c]; 3995 3996 } 3997 3998 //cross-correlation retrieval 3999 pdDsty[j] = pdSrcy[r]; 4000 4001 } 4002 4003 //symmetric copy 4004 for(j=1; j< iNumTaps; j++) 4005 for(i=0; i< j; i++) 4006 ppdDstE[j][i] = ppdDstE[i][j]; 4007 4008 } 4009 4010 } 4011 6773 } 6774 } 6775 6776 if (!m_iUsePreviousFilter) 6777 { 6778 decodeFilterSet(pcAlfSaved, m_varIndTab, m_filterCoeffSym); 6779 saveFilterCoeffToBuffer(m_filterCoeffSym, pcAlfSaved->filters_per_group, m_varIndTab, pcAlfSaved->alf_pcr_region_flag, pcAlfSaved->filter_shape); 6780 } 6781 6782 if( m_iUsePreviousFilter ) 6783 { 6784 UInt64 uiOffRegionDistortion = 0; 6785 Int iPelDiff; 6786 Pel* pOrgTemp = (Pel*)ImgOrg; 6787 Pel* pDecTemp = (Pel*)ImgDec; 6788 for(Int y=0; y< m_img_height; y++) 6789 { 6790 for(Int x=0; x< m_img_width; x++) 6791 { 6792 if(m_maskImg[y][x] == 0) 6793 { 6794 iPelDiff = pOrgTemp[x] - pDecTemp[x]; 6795 uiOffRegionDistortion += (UInt64)( (iPelDiff*iPelDiff) >> uiBitShift ); 6796 } 6797 } 6798 pOrgTemp += Stride; 6799 pDecTemp += Stride; 6800 6801 ruiDist += uiOffRegionDistortion; 6802 rdCost += (Double)uiOffRegionDistortion; 6803 } 6804 } 6805 6806 #if !ALF_SINGLE_FILTER_SHAPE 6807 // if ALF_STAR5x5 is selected, the distortion of 2 skipped lines per LCU should be added. 6808 if(pcAlfSaved->filter_shape == ALF_STAR5x5) 6809 { 6810 Int iPelDiff; 6811 UInt64 uiSkipPelsDistortion = 0; 6812 Pel *pOrgTemp, *pDecTemp; 6813 for(Int y= m_lineIdxPadTop-1; y< m_img_height - m_lcuHeight ; y += m_lcuHeight) 6814 { 6815 pOrgTemp = ImgOrg + y*Stride; 6816 pDecTemp = ImgDec + y*Stride; 6817 for(Int x=0; x< m_img_width; x++) 6818 { 6819 if(m_maskImg[y][x] == 1) 6820 { 6821 iPelDiff = pOrgTemp[x] - pDecTemp[x]; 6822 uiSkipPelsDistortion += (UInt64)( (iPelDiff*iPelDiff) >> uiBitShift ); 6823 } 6824 } 6825 6826 pOrgTemp += Stride; 6827 pDecTemp += Stride; 6828 for(Int x=0; x< m_img_width; x++) 6829 { 6830 if(m_maskImg[y+1][x] == 1) 6831 { 6832 iPelDiff = pOrgTemp[x] - pDecTemp[x]; 6833 uiSkipPelsDistortion += (UInt64)( (iPelDiff*iPelDiff) >> uiBitShift ); 6834 } 6835 } 6836 } 6837 ruiDist += uiSkipPelsDistortion; 6838 rdCost += (Double)uiSkipPelsDistortion; 6839 } 6840 #endif 6841 6842 for(Int filter_shape = 0; filter_shape < NUM_ALF_FILTER_SHAPE; filter_shape++) 6843 { 6844 for(Int i=0; i< NO_VAR_BINS; i++) 6845 { 6846 delete[] coeffNoFilter[filter_shape][i]; 6847 } 6848 } 6849 } 6850 6851 6852 #endif 6853 6854 /** Estimate filtering distortion by correlation values and filter coefficients 6855 * \param ppdE auto-correlation matrix 6856 * \param pdy cross-correlation array 6857 * \param piCoeff filter coefficients 6858 * \param iFiltLength numbr of filter taps 6859 * \returns estimated distortion 6860 */ 4012 6861 Int64 TEncAdaptiveLoopFilter::xFastFiltDistEstimation(Double** ppdE, Double* pdy, Int* piCoeff, Int iFiltLength) 4013 6862 { 4014 6863 //static memory 4015 static Bool bFirst = true; 4016 static Double* pdcoeff; 4017 if(bFirst) 4018 { 4019 pdcoeff= new Double[MAX_SQR_FILT_LENGTH]; 4020 bFirst= false; 4021 } 4022 6864 Double pdcoeff[ALF_MAX_NUM_COEF]; 4023 6865 //variable 4024 6866 Int i,j; 4025 6867 Int64 iDist; 4026 6868 Double dDist, dsum; 4027 6869 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 6870 Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP ); 6871 #endif 4028 6872 4029 6873 for(i=0; i< iFiltLength; i++) 4030 pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< (NUM_BITS - 1) ); 4031 6874 { 6875 #if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 6876 pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<<alfPrecisionBit); 6877 #else 6878 pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< ((Int)ALF_NUM_BIT_SHIFT) ); 6879 #endif 6880 } 4032 6881 4033 6882 dDist =0; … … 4036 6885 dsum= ((Double)ppdE[i][i]) * pdcoeff[i]; 4037 6886 for(j=i+1; j< iFiltLength; j++) 6887 { 4038 6888 dsum += (Double)(2*ppdE[i][j])* pdcoeff[j]; 6889 } 4039 6890 4040 6891 dDist += ((dsum - 2.0 * pdy[i])* pdcoeff[i] ); … … 4056 6907 } 4057 6908 6909 6910 #if !LCU_SYNTAX_ALF 6911 6912 /** Estimate total filtering cost of all groups 6913 * \param filters_per_fr number of filters for the slice 6914 * \param VarIndTab merge index of all groups 6915 * \param pppdE auto-correlation matrix pointer for all groups 6916 * \param ppdy cross-correlation array pointer for all groups 6917 * \returns estimated distortion 6918 */ 4058 6919 Int64 TEncAdaptiveLoopFilter::xEstimateFiltDist(Int filters_per_fr, Int* VarIndTab, 4059 6920 Double*** pppdE, Double** ppdy, … … 4076 6937 //clean m_E_merged one line 4077 6938 for(i=0; i < iFiltLength; i++) 6939 { 4078 6940 m_E_merged[f][j][i] = 0; 6941 } 4079 6942 4080 6943 //clean m_y_merged 4081 6944 m_y_merged[f][j] = 0; 4082 6945 } 6946 m_pixAcc_merged[f] = 0; 4083 6947 } 4084 6948 … … 4096 6960 { 4097 6961 for(i=0; i< iFiltLength; i++) 6962 { 4098 6963 ppdDstE[j][i] += ppdSrcE[j][i]; 6964 } 4099 6965 4100 6966 pdDsty[j] += pdSrcy[j]; 4101 6967 } 6968 m_pixAcc_merged[ VarIndTab[varInd] ] += m_pixAcc[varInd]; 6969 4102 6970 } 4103 6971 … … 4117 6985 4118 6986 } 4119 #endif 4120 4121 #if MTK_NONCROSS_INLOOP_FILTER 4122 4123 Void TEncAdaptiveLoopFilter::calcVarforSlices(imgpel **varmap, imgpel *imgY_Dec, Int pad_size, Int fl, Int img_stride) 4124 { 4125 #if MQT_BA_RA 4126 if(m_uiVarGenMethod == ALF_RA) 4127 { 4128 return; 4129 } 4130 #endif 4131 4132 Pel* pPicSrc = (Pel *)imgY_Dec; 4133 Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr(); 4134 4135 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4136 { 4137 CAlfSlice* pSlice = &(m_pSlice[s]); 4138 4139 pSlice->copySliceLuma(pPicSlice, pPicSrc, img_stride); 4140 pSlice->extendSliceBorderLuma(pPicSlice, img_stride, (UInt)EXTEND_NUM_PEL); 4141 calcVarforOneSlice(pSlice, varmap, (imgpel*)pPicSlice, pad_size, fl, img_stride); 4142 } 4143 } 4144 4145 4146 4147 Void TEncAdaptiveLoopFilter::xfilterSlices_en(imgpel* ImgDec, imgpel* ImgRest,int filtNo, int iStride) 6987 6988 /** Calculate ALF grouping indices for ALF slices 6989 * \param varmap grouping indices buffer 6990 * \param imgY_Dec picture buffer 6991 * \param pad_size (max. filter tap)/2 6992 * \param fl VAR_SIZE 6993 * \param img_stride picture buffer stride 6994 */ 6995 Void TEncAdaptiveLoopFilter::xfilterSlicesEncoder(Pel* ImgDec, Pel* ImgRest, Int iStride, Int filtNo, Int** filterCoeff, Int* mergeTable, Pel** varImg) 4148 6996 { 4149 6997 Pel* pPicSrc = (Pel *)ImgDec; … … 4152 7000 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4153 7001 { 4154 CAlfSlice* pSlice = &(m_pSlice[s]); 4155 4156 pSlice->copySliceLuma(pPicSlice, pPicSrc, iStride); 4157 pSlice->extendSliceBorderLuma(pPicSlice, iStride, EXTEND_NUM_PEL); 4158 4159 xfilterOneSlice_en(pSlice, (imgpel*)pPicSlice, ImgRest, filtNo, iStride); 4160 } 4161 } 4162 4163 4164 Void TEncAdaptiveLoopFilter::xfilterOneSlice_en(CAlfSlice* pSlice, imgpel* ImgDec, imgpel* ImgRest,int filtNo, int iStride) 4165 { 4166 UInt uiNumLCUs = pSlice->getNumLCUs(); 4167 4168 Int iHeight, iWidth; 4169 Int ypos, xpos; 4170 4171 for(UInt i=0; i< uiNumLCUs; i++) 4172 { 4173 CAlfCU* pcAlfCU = &((*pSlice)[i]); 4174 4175 ypos = pcAlfCU->getCU()->getCUPelY(); 4176 xpos = pcAlfCU->getCU()->getCUPelX(); 4177 iHeight = pcAlfCU->getHeight(); 4178 iWidth = pcAlfCU->getWidth(); 4179 4180 xfilterFrame_en(ypos, xpos, iHeight, iWidth, ImgDec, ImgRest, filtNo, iStride); 4181 } 4182 } 4183 4184 4185 4186 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrixforSlices(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int iStride) 7002 if(!m_pcPic->getValidSlice(s)) 7003 { 7004 continue; 7005 } 7006 std::vector< std::vector<AlfLCUInfo*> > & vpSliceTileAlfLCU = m_pvpSliceTileAlfLCU[s]; 7007 7008 for(Int t=0; t< (Int)vpSliceTileAlfLCU.size(); t++) 7009 { 7010 std::vector<AlfLCUInfo*> & vpAlfLCU = vpSliceTileAlfLCU[t]; 7011 copyRegion(vpAlfLCU, pPicSlice, pPicSrc, iStride); 7012 extendRegionBorder(vpAlfLCU, pPicSlice, iStride); 7013 filterLumaRegion(vpAlfLCU, pPicSlice, ImgRest, iStride, filtNo, filterCoeff, mergeTable, varImg); 7014 } 7015 } 7016 } 7017 7018 /** Calculate block autocorrelations and crosscorrelations for ALF slices 7019 * \param ImgOrg original picture 7020 * \param ImgDec picture before filtering 7021 * \param tap filter tap size 7022 * \param iStride picture buffer stride 7023 */ 7024 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrixforSlices(Pel* ImgOrg, Pel* ImgDec, Int tap, Int iStride) 4187 7025 { 4188 7026 Pel* pPicSrc = (Pel *)ImgDec; 4189 7027 Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr(); 4190 7028 7029 UInt iLastValidSliceID =0; 4191 7030 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4192 7031 { 4193 CAlfSlice* pSlice = &(m_pSlice[s]); 4194 pSlice->copySliceLuma(pPicSlice, pPicSrc, iStride); 4195 pSlice->extendSliceBorderLuma(pPicSlice, iStride, (UInt)EXTEND_NUM_PEL); 4196 xstoreInBlockMatrixforOneSlice(pSlice, ImgOrg, (imgpel*)pPicSlice, tap, iStride, (s==0), (s== m_uiNumSlicesInPic-1)); 4197 } 4198 } 4199 4200 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrixforOneSlice(CAlfSlice* pSlice, 4201 imgpel* ImgOrg, imgpel* ImgDec, 7032 if(m_pcPic->getValidSlice(s)) 7033 { 7034 iLastValidSliceID = s; 7035 } 7036 } 7037 7038 for(UInt s=0; s<= iLastValidSliceID; s++) 7039 { 7040 if(!m_pcPic->getValidSlice(s)) 7041 { 7042 continue; 7043 } 7044 std::vector< std::vector<AlfLCUInfo*> > & vpSliceTileAlfLCU = m_pvpSliceTileAlfLCU[s]; 7045 Int numValidTilesInSlice = (Int)vpSliceTileAlfLCU.size(); 7046 for(Int t=0; t< numValidTilesInSlice; t++) 7047 { 7048 std::vector<AlfLCUInfo*> & vpAlfLCU = vpSliceTileAlfLCU[t]; 7049 copyRegion(vpAlfLCU, pPicSlice, pPicSrc, iStride); 7050 extendRegionBorder(vpAlfLCU, pPicSlice, iStride); 7051 xstoreInBlockMatrixforRegion(vpAlfLCU, ImgOrg, pPicSlice, tap, iStride, (s==0)&&(t==0), (s== iLastValidSliceID)&&(t==numValidTilesInSlice-1)); 7052 } 7053 } 7054 } 7055 7056 /** Calculate block autocorrelations and crosscorrelations for one ALF region 7057 * \param vpAlfLCU ALF LCU data container 7058 * \param ImgOrg original picture 7059 * \param ImgDec picture before filtering 7060 * \param tap filter tap size 7061 * \param iStride picture buffer stride 7062 * \param bFirstSlice true for the first processing slice of the picture 7063 * \param bLastSlice true for the last processing slice of the picture 7064 */ 7065 Void TEncAdaptiveLoopFilter::xstoreInBlockMatrixforRegion(std::vector< AlfLCUInfo* > &vpAlfLCU, 7066 Pel* ImgOrg, Pel* ImgDec, 4202 7067 Int tap, Int iStride, 4203 7068 Bool bFirstSlice, … … 4206 7071 { 4207 7072 4208 4209 UInt uiNumLCUs = pSlice->getNumLCUs(); 4210 7073 UInt uiNumLCUs = (UInt)vpAlfLCU.size(); 4211 7074 Int iHeight, iWidth; 4212 7075 Int ypos, xpos; 4213 7076 Bool bFirstLCU, bLastLCU; 7077 Bool bFirstSGU, bLastSGU; 7078 UInt numSGUs; 4214 7079 4215 7080 for(UInt i=0; i< uiNumLCUs; i++) … … 4217 7082 bFirstLCU = (i==0); 4218 7083 bLastLCU = (i== uiNumLCUs -1); 4219 4220 CAlfCU* pcAlfCU = &((*pSlice)[i]); 4221 ypos = pcAlfCU->getCU()->getCUPelY(); 4222 xpos = pcAlfCU->getCU()->getCUPelX(); 4223 iHeight = pcAlfCU->getHeight(); 4224 iWidth = pcAlfCU->getWidth(); 4225 4226 xstoreInBlockMatrix(ypos, xpos, iHeight, iWidth, 4227 (bFirstSlice && bFirstLCU),(bLastSlice && bLastLCU), 4228 ImgOrg, ImgDec,tap, iStride); 4229 } 4230 } 4231 4232 4233 7084 AlfLCUInfo& cAlfLCU = *(vpAlfLCU[i]); 7085 numSGUs = cAlfLCU.numSGU; 7086 for(UInt j=0; j< numSGUs; j++) 7087 { 7088 bFirstSGU= (j ==0); 7089 bLastSGU = (j == numSGUs -1); 7090 7091 ypos = (Int)(cAlfLCU[j].posY ); 7092 xpos = (Int)(cAlfLCU[j].posX ); 7093 iHeight = (Int)(cAlfLCU[j].height); 7094 iWidth = (Int)(cAlfLCU[j].width ); 7095 7096 xstoreInBlockMatrix(ypos, xpos, iHeight, iWidth, 7097 (bFirstSlice && bFirstLCU && bFirstSGU),(bLastSlice && bLastLCU && bLastSGU), 7098 ImgOrg, ImgDec,tap, iStride); 7099 } 7100 } 7101 } 7102 7103 7104 /** Calculate autocorrelations and crosscorrelations for chroma slices 7105 * \param ComponentID Cb or Cr 7106 * \param pOrg original picture 7107 * \param pCmp picture before filtering 7108 * \param iTap filter tap size 7109 * \param iOrgStride picture buffer stride for pOrg 7110 * \param iCmpStride picture buffer stride for pCmp 7111 */ 4234 7112 Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaSlices(Int ComponentID, Pel* pOrg, Pel* pCmp, Int iTap, Int iOrgStride, Int iCmpStride) 4235 7113 { … … 4239 7117 Pel* pPicSrc = pCmp; 4240 7118 Pel* pPicSlice = (ComponentID == ALF_Cb)?(m_pcSliceYuvTmp->getCbAddr()):(m_pcSliceYuvTmp->getCrAddr()); 4241 7119 Int chromaFormatShift = 1; 7120 7121 UInt iLastValidSliceID =0; 4242 7122 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4243 7123 { 4244 CAlfSlice* pSlice = &(m_pSlice[s]); 4245 4246 pSlice->copySliceChroma(pPicSlice, pPicSrc, iCmpStride); 4247 pSlice->extendSliceBorderChroma(pPicSlice, iCmpStride, (UInt)EXTEND_NUM_PEL_C); 4248 4249 xCalcCorrelationFuncforChromaOneSlice(pSlice, pOrg, pPicSlice, iTap, iCmpStride,(s==m_uiNumSlicesInPic-1)); 4250 } 4251 } 4252 4253 Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaOneSlice(CAlfSlice* pSlice, Pel* pOrg, Pel* pCmp, Int iTap, Int iStride, Bool bLastSlice) 4254 { 4255 UInt uiNumLCUs = pSlice->getNumLCUs(); 7124 if(m_pcPic->getValidSlice(s)) 7125 { 7126 iLastValidSliceID = s; 7127 } 7128 } 7129 7130 for(UInt s=0; s<= iLastValidSliceID; s++) 7131 { 7132 if(!m_pcPic->getValidSlice(s)) 7133 { 7134 continue; 7135 } 7136 std::vector< std::vector<AlfLCUInfo*> > & vpSliceTileAlfLCU = m_pvpSliceTileAlfLCU[s]; 7137 Int numValidTilesInSlice = (Int)vpSliceTileAlfLCU.size(); 7138 for(Int t=0; t< numValidTilesInSlice; t++) 7139 { 7140 std::vector<AlfLCUInfo*> & vpAlfLCU = vpSliceTileAlfLCU[t]; 7141 copyRegion(vpAlfLCU, pPicSlice, pPicSrc, iCmpStride, chromaFormatShift); 7142 extendRegionBorder(vpAlfLCU, pPicSlice, iCmpStride, chromaFormatShift); 7143 xCalcCorrelationFuncforChromaRegion(vpAlfLCU, pOrg, pPicSlice, iTap, iCmpStride,(s== iLastValidSliceID)&&(t== numValidTilesInSlice-1), chromaFormatShift); 7144 } 7145 } 7146 } 7147 7148 /** Calculate autocorrelations and crosscorrelations for one chroma slice 7149 * \param vpAlfLCU ALF LCU data container 7150 * \param pOrg original picture 7151 * \param pCmp picture before filtering 7152 * \param iTap filter tap size 7153 * \param iStride picture buffer stride 7154 * \param bLastSlice the last processing slice of picture 7155 */ 7156 Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaRegion(std::vector< AlfLCUInfo* > &vpAlfLCU, Pel* pOrg, Pel* pCmp, Int filtNo, Int iStride, Bool bLastSlice, Int iFormatShift) 7157 { 7158 UInt uiNumLCUs = (UInt)vpAlfLCU.size(); 4256 7159 4257 7160 Int iHeight, iWidth; 4258 7161 Int ypos, xpos; 4259 7162 Bool bLastLCU; 7163 Bool bLastSGU; 7164 UInt numSGUs; 4260 7165 4261 7166 for(UInt i=0; i< uiNumLCUs; i++) … … 4263 7168 bLastLCU = (i== uiNumLCUs -1); 4264 7169 4265 CAlfCU* pcAlfCU = &((*pSlice)[i]); 4266 ypos = ( pcAlfCU->getCU()->getCUPelY() >> 1 ); 4267 xpos = ( pcAlfCU->getCU()->getCUPelX() >> 1 ); 4268 iHeight = (Int)( pcAlfCU->getHeight() >> 1); 4269 iWidth = (Int)( pcAlfCU->getWidth() >> 1); 4270 4271 xCalcCorrelationFunc(ypos, xpos, pOrg, pCmp, iTap, iWidth, iHeight, iStride, iStride, (bLastSlice && bLastLCU ) ); 4272 } 4273 } 4274 4275 Void TEncAdaptiveLoopFilter::xFrameChromaforSlices(Int ComponentID, TComPicYuv* pcPicDecYuv, TComPicYuv* pcPicRestYuv, Int *qh, Int iTap ) 4276 { 4277 Pel* pPicDec = (ComponentID == ALF_Cb)?( pcPicDecYuv->getCbAddr()):( pcPicDecYuv->getCrAddr()); 4278 // Pel* pPicRest = (ComponentID == ALF_Cb)?( pcPicRestYuv->getCbAddr()):( pcPicRestYuv->getCrAddr()); 4279 Pel* pPicSlice = (ComponentID == ALF_Cb)?(m_pcSliceYuvTmp->getCbAddr()):(m_pcSliceYuvTmp->getCrAddr()); 4280 4281 Int iStride = pcPicDecYuv->getCStride(); 4282 4283 assert(iStride == pcPicRestYuv->getCStride()); 4284 4285 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 4286 { 4287 CAlfSlice* pSlice = &(m_pSlice[s]); 4288 4289 pSlice->copySliceChroma(pPicSlice, pPicDec, iStride); 4290 pSlice->extendSliceBorderChroma(pPicSlice, iStride, (UInt)EXTEND_NUM_PEL_C); 4291 4292 xFrameChromaforOneSlice(pSlice, ComponentID, m_pcSliceYuvTmp, pcPicRestYuv, qh, iTap); 4293 } 4294 } 4295 4296 #endif 4297 4298 4299 #if MTK_SAO 4300 inline Double xRoundIbdi2(Double x) 4301 { 4302 return ((x)>0) ? (Int)(((Int)(x)+(1<<(g_uiBitIncrement-1)))/(1<<g_uiBitIncrement)) : ((Int)(((Int)(x)-(1<<(g_uiBitIncrement-1)))/(1<<g_uiBitIncrement))); 4303 } 4304 4305 inline Double xRoundIbdi(Double x) 4306 { 4307 return (g_uiBitIncrement >0 ? xRoundIbdi2((x)) : ((x)>=0 ? ((Int)((x)+0.5)) : ((Int)((x)-0.5)))) ; 4308 } 4309 4310 /** run QAO One Part. 4311 * \param pQAOOnePart, iPartIdx 4312 */ 4313 Void TEncSampleAdaptiveOffset::xQAOOnePart(SAOQTPart* pQAOOnePart, Int iPartIdx) 4314 { 4315 Int iTypeIdx; 4316 Int iNumTotalType = MAX_NUM_SAO_TYPE; 4317 4318 Int64 iEstDist; 4319 Int64 iOffsetOrg; 4320 Int64 iOffset; 4321 Int64 iCount; 4322 Int iClassIdx; 4323 Int uiShift = g_uiBitIncrement << 1; 4324 Double dAreaWeight = (pQAOOnePart->part_xe - pQAOOnePart->part_xs + 1) * (pQAOOnePart->part_ye - pQAOOnePart->part_ys + 1); 4325 Double dComplexityCost = 0; 4326 Int iQaoPara1 = SAO_RDCO; 4327 4328 UInt uiDepth = pQAOOnePart->PartLevel; 4329 4330 // m_iDistOrg [iPartIdx] = 0; 4331 4332 m_iDistOrg [iPartIdx] = (Int64)((Double)(iQaoPara1)/10000 * m_dLambdaLuma * dAreaWeight); 4333 4334 for (iTypeIdx=-1; iTypeIdx<iNumTotalType; iTypeIdx++) 4335 { 4336 if( m_bUseSBACRD ) 4337 { 4338 m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); // pcCU->getDepth(0) ==> m_puhDepth[uiIdx] 4339 m_pcRDGoOnSbacCoder->resetBits(); 7170 AlfLCUInfo& cAlfLCU = *(vpAlfLCU[i]); 7171 numSGUs = cAlfLCU.numSGU; 7172 for(UInt j=0; j< numSGUs; j++) 7173 { 7174 bLastSGU = (j == numSGUs -1); 7175 ypos = (Int)(cAlfLCU[j].posY >> iFormatShift); 7176 xpos = (Int)(cAlfLCU[j].posX >> iFormatShift); 7177 iHeight = (Int)(cAlfLCU[j].height >> iFormatShift); 7178 iWidth = (Int)(cAlfLCU[j].width >> iFormatShift); 7179 xCalcCorrelationFunc(ypos, xpos, pOrg, pCmp, filtNo, iWidth, iHeight, iStride, iStride, (bLastSlice && bLastLCU && bLastSGU) ); 7180 } 7181 } 7182 } 7183 7184 // ==================================================================================================================== 7185 // Protected member functions 7186 // ==================================================================================================================== 7187 7188 Void TEncAdaptiveLoopFilter::xFilterTapDecisionChroma( UInt64 uiLumaRate, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, UInt64& ruiBits ) 7189 { 7190 Int iShape, num_coeff; 7191 Int64 iOrgDistCb, iOrgDistCr, iFiltDistCb, iFiltDistCr, iDist; 7192 Bool bChanged = false; 7193 Int* qh = m_pcTempAlfParam->coeff_chroma; 7194 7195 UInt64 uiMinRate = uiLumaRate; 7196 UInt64 uiMinDist = MAX_INT; 7197 Double dMinCost = MAX_DOUBLE; 7198 Double dLocalMinCost = MAX_DOUBLE; 7199 7200 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 7201 xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiMinRate, uiMinDist, dMinCost); 7202 7203 #if ALF_SINGLE_FILTER_SHAPE 7204 iShape = 0; 7205 #else 7206 for(iShape = 0; iShape < 2; iShape++) 7207 #endif 7208 { 7209 // set global variables 7210 num_coeff = m_sqrFiltLengthTab[iShape]; 7211 m_pcTempAlfParam->chroma_idc = 3; 7212 m_pcTempAlfParam->filter_shape_chroma = iShape; 7213 m_pcTempAlfParam->num_coeff_chroma = num_coeff; 7214 7215 // keep original corr pointer 7216 Double **ppdTmpCorr = m_ppdAlfCorr; 7217 7218 // calc Cb matrix 7219 m_pcTempAlfParam->chroma_idc = 2; 7220 m_ppdAlfCorr = m_ppdAlfCorrCb; 7221 for(Int i=0; i<ALF_MAX_NUM_COEF; i++) 7222 { 7223 ::memset(m_ppdAlfCorr[i], 0, sizeof(Double) * (ALF_MAX_NUM_COEF + 1)); 7224 } 7225 Pel *pOrg = pcPicOrg->getCbAddr(); 7226 Pel *pCmp = pcPicDec->getCbAddr(); 7227 if(!m_bUseNonCrossALF) 7228 { 7229 xCalcCorrelationFunc(0, 0, pOrg, pCmp, iShape, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true); 4340 7230 } 4341 7231 else 4342 7232 { 7233 xCalcCorrelationFuncforChromaSlices(ALF_Cb, pOrg, pCmp, iShape, pcPicOrg->getCStride(), pcPicDec->getCStride()); 7234 } 7235 7236 // calc Cr matrix 7237 m_pcTempAlfParam->chroma_idc = 1; 7238 m_ppdAlfCorr = m_ppdAlfCorrCr; 7239 for(Int i=0; i<ALF_MAX_NUM_COEF; i++) 7240 { 7241 ::memset(m_ppdAlfCorr[i], 0, sizeof(Double) * (ALF_MAX_NUM_COEF + 1)); 7242 } 7243 pOrg = pcPicOrg->getCrAddr(); 7244 pCmp = pcPicDec->getCrAddr(); 7245 if(!m_bUseNonCrossALF) 7246 { 7247 xCalcCorrelationFunc(0, 0, pOrg, pCmp, iShape, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true); 7248 } 7249 else 7250 { 7251 xCalcCorrelationFuncforChromaSlices(ALF_Cr, pOrg, pCmp, iShape, pcPicOrg->getCStride(), pcPicDec->getCStride()); 7252 } 7253 7254 // restore original corr pointer 7255 m_ppdAlfCorr = ppdTmpCorr; 7256 7257 // calc original dist 7258 memset(qh, 0, sizeof(Int)*num_coeff); 7259 qh[num_coeff-1] = 1<<((Int)ALF_NUM_BIT_SHIFT); 7260 iOrgDistCb = xFastFiltDistEstimationChroma(m_ppdAlfCorrCb, qh, num_coeff); 7261 iOrgDistCr = xFastFiltDistEstimationChroma(m_ppdAlfCorrCr, qh, num_coeff); 7262 7263 for(Int iCmp=1; iCmp<=3; iCmp++) 7264 { 7265 m_pcTempAlfParam->chroma_idc = iCmp; 7266 xCalcALFCoeffChroma(iCmp, iShape, qh); 7267 iFiltDistCb = ((iCmp>>1)&0x1) ? xFastFiltDistEstimationChroma(m_ppdAlfCorrCb, qh, num_coeff) : iOrgDistCb; 7268 iFiltDistCr = ((iCmp) &0x1) ? xFastFiltDistEstimationChroma(m_ppdAlfCorrCr, qh, num_coeff) : iOrgDistCr; 7269 iDist = iFiltDistCb + iFiltDistCr; 7270 UInt64 uiRate = xCalcRateChroma(m_pcTempAlfParam); 7271 Double dCost = (Double)iDist + m_dLambdaChroma * (Double)uiRate; 7272 if(dCost < dLocalMinCost) 7273 { 7274 dLocalMinCost = dCost; 7275 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 7276 bChanged = true; 7277 } 7278 } 7279 } 7280 copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam); 7281 if(!bChanged) 7282 { 7283 m_pcBestAlfParam->chroma_idc = 0; 7284 return; 7285 } 7286 7287 // Adaptive in-loop wiener filtering for chroma 7288 xFilteringFrameChroma(m_pcTempAlfParam, pcPicOrg, pcPicDec, pcPicRest); 7289 7290 // filter on/off decision for chroma 7291 Int iCWidth = (pcPicOrg->getWidth()>>1); 7292 Int iCHeight = (pcPicOrg->getHeight()>>1); 7293 Int iCStride = pcPicOrg->getCStride(); 7294 UInt64 uiFiltDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicRest->getCbAddr(), iCWidth, iCHeight, iCStride); 7295 UInt64 uiFiltDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicRest->getCrAddr(), iCWidth, iCHeight, iCStride); 7296 UInt64 uiOrgDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicDec->getCbAddr(), iCWidth, iCHeight, iCStride); 7297 UInt64 uiOrgDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicDec->getCrAddr(), iCWidth, iCHeight, iCStride); 7298 if(((m_pcTempAlfParam->chroma_idc)>>1 & 0x1) && (uiOrgDistCb<=uiFiltDistCb)) 7299 { 7300 m_pcTempAlfParam->chroma_idc -= 2; 7301 pcPicDec->copyToPicCb(pcPicRest); 7302 } 7303 if(((m_pcTempAlfParam->chroma_idc) & 0x1) && (uiOrgDistCr<=uiFiltDistCr)) 7304 { 7305 m_pcTempAlfParam->chroma_idc -= 1; 7306 pcPicDec->copyToPicCr(pcPicRest); 7307 } 7308 7309 if(m_pcTempAlfParam->chroma_idc) 7310 { 7311 UInt64 uiRate, uiDist; 7312 Double dCost; 7313 xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost); 7314 7315 if( dCost < dMinCost ) 7316 { 7317 copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam); 7318 predictALFCoeffChroma(m_pcBestAlfParam); 7319 7320 ruiBits += uiRate; 7321 ruiDist += uiDist; 7322 } 7323 else 7324 { 7325 m_pcBestAlfParam->chroma_idc = 0; 7326 7327 if((m_pcTempAlfParam->chroma_idc>>1)&0x01) 7328 { 7329 pcPicDec->copyToPicCb(pcPicRest); 7330 } 7331 if(m_pcTempAlfParam->chroma_idc&0x01) 7332 { 7333 pcPicDec->copyToPicCr(pcPicRest); 7334 } 7335 7336 ruiBits += uiMinRate; 7337 ruiDist += uiMinDist; 7338 } 7339 } 7340 else 7341 { 7342 m_pcBestAlfParam->chroma_idc = 0; 7343 7344 ruiBits += uiMinRate; 7345 ruiDist += uiMinDist; 7346 7347 pcPicDec->copyToPicCb(pcPicRest); 7348 pcPicDec->copyToPicCr(pcPicRest); 7349 } 7350 } 7351 7352 Int64 TEncAdaptiveLoopFilter::xFastFiltDistEstimationChroma(Double** ppdCorr, Int* piCoeff, Int iSqrFiltLength) 7353 { 7354 Double pdcoeff[ALF_MAX_NUM_COEF]; 7355 Int i,j; 7356 Int64 iDist; 7357 Double dDist, dsum; 7358 for(i=0; i< iSqrFiltLength; i++) 7359 { 7360 pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< ((Int)ALF_NUM_BIT_SHIFT) ); 7361 } 7362 7363 dDist =0; 7364 for(i=0; i< iSqrFiltLength; i++) 7365 { 7366 dsum= ((Double)ppdCorr[i][i]) * pdcoeff[i]; 7367 for(j=i+1; j< iSqrFiltLength; j++) 7368 { 7369 dsum += (Double)(2*ppdCorr[i][j])* pdcoeff[j]; 7370 } 7371 7372 dDist += ((dsum - 2.0 * ppdCorr[i][iSqrFiltLength])* pdcoeff[i] ); 7373 } 7374 7375 UInt uiShift = g_uiBitIncrement<<1; 7376 if(dDist < 0) 7377 { 7378 iDist = -(((Int64)(-dDist + 0.5)) >> uiShift); 7379 } 7380 else //dDist >=0 7381 { 7382 iDist= ((Int64)(dDist+0.5)) >> uiShift; 7383 } 7384 7385 return iDist; 7386 } 7387 7388 Void TEncAdaptiveLoopFilter::xCalcALFCoeffChroma(Int iChromaIdc, Int iShape, Int* piCoeff) 7389 { 7390 Int iSqrFiltLength = m_sqrFiltLengthTab[iShape]; 7391 7392 for(Int i=0; i<iSqrFiltLength; i++) 7393 { 7394 memset(m_ppdAlfCorr[i], 0, sizeof(Double)*(iSqrFiltLength + 1)); 7395 } 7396 7397 // retrive 7398 if((iChromaIdc>>1) & 0x1) 7399 { 7400 for(Int i=0; i<iSqrFiltLength; i++) 7401 { 7402 for(Int j=i; j<iSqrFiltLength+1; j++) 7403 { 7404 m_ppdAlfCorr[i][j] += m_ppdAlfCorrCb[i][j]; 7405 } 7406 } 7407 } 7408 if(iChromaIdc & 0x1) 7409 { 7410 for(Int i=0; i<iSqrFiltLength; i++) 7411 { 7412 for(Int j=i; j<iSqrFiltLength+1; j++) 7413 { 7414 m_ppdAlfCorr[i][j] += m_ppdAlfCorrCr[i][j]; 7415 } 7416 } 7417 } 7418 7419 // copy 7420 for(Int i=1; i<iSqrFiltLength; i++) 7421 { 7422 for(Int j=0; j<i; j++) 7423 { 7424 m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i]; 7425 } 7426 } 7427 7428 Double *corr = new Double[iSqrFiltLength]; 7429 for(Int i=0; i<iSqrFiltLength; i++) 7430 { 7431 corr[i] = m_ppdAlfCorr[i][iSqrFiltLength]; 7432 } 7433 7434 // calc coeff 7435 gnsSolveByChol(m_ppdAlfCorr, corr, m_pdDoubleAlfCoeff, iSqrFiltLength); 7436 xQuantFilterCoef(m_pdDoubleAlfCoeff, piCoeff, iShape, g_uiBitDepth + g_uiBitIncrement); 7437 delete [] corr; 7438 } 7439 7440 UInt64 TEncAdaptiveLoopFilter::xCalcRateChroma(ALFParam* pAlfParam) 7441 { 7442 UInt64 uiRate; 7443 Int* piTmpCoef; 7444 piTmpCoef = new Int[ALF_MAX_NUM_COEF]; 7445 memcpy(piTmpCoef, pAlfParam->coeff_chroma, sizeof(Int)*pAlfParam->num_coeff_chroma); 7446 7447 predictALFCoeffChroma(pAlfParam); 7448 7449 m_pcEntropyCoder->resetEntropy(); 7450 m_pcEntropyCoder->resetBits(); 7451 m_pcEntropyCoder->encodeAlfParam(pAlfParam); 7452 uiRate = m_pcEntropyCoder->getNumberOfWrittenBits(); 7453 if (m_vBestAlfCUCtrlParam.size() != 0) 7454 { 7455 for(UInt s=0; s< m_uiNumSlicesInPic; s++) 7456 { 7457 if(!m_pcPic->getValidSlice(s)) 7458 { 7459 continue; 7460 } 4343 7461 m_pcEntropyCoder->resetEntropy(); 4344 7462 m_pcEntropyCoder->resetBits(); 4345 } 4346 4347 iEstDist = 0; 4348 4349 m_pcEntropyCoder->m_pcEntropyCoderIf->codeAoUvlc(iTypeIdx+1); 4350 4351 if (iTypeIdx>=0) 4352 { 4353 4354 for(iClassIdx=1; iClassIdx < m_iNumClass[iTypeIdx]+1; iClassIdx++) 4355 { 4356 if(m_iCount [iPartIdx][iTypeIdx][iClassIdx]) 4357 { 4358 m_iOffset[iPartIdx][iTypeIdx][iClassIdx] = (Int64) xRoundIbdi((Double)(m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx]<<m_uiAoBitDepth) / (Double)m_iCount [iPartIdx][iTypeIdx][iClassIdx]); 4359 } 4360 else 4361 { 4362 m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx] = 0; 4363 m_iOffset[iPartIdx][iTypeIdx][iClassIdx] = 0; 4364 } 4365 4366 iCount = m_iCount [iPartIdx][iTypeIdx][iClassIdx]; 4367 iOffset = m_iOffset[iPartIdx][iTypeIdx][iClassIdx] << (g_uiBitIncrement-m_uiAoBitDepth); 4368 iOffsetOrg = m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx]; 4369 iEstDist += (( iCount*iOffset*iOffset-iOffsetOrg*iOffset*2 ) >> uiShift); 4370 m_pcEntropyCoder->m_pcEntropyCoderIf->codeAoSvlc((Int)m_iOffset[iPartIdx][iTypeIdx][iClassIdx]); 4371 } 4372 m_iDist[iPartIdx][iTypeIdx] = iEstDist; 4373 m_iRate[iPartIdx][iTypeIdx] = m_pcEntropyCoder->getNumberOfWrittenBits(); 4374 4375 m_dCost[iPartIdx][iTypeIdx] = (Double)((Double)m_iDist[iPartIdx][iTypeIdx] + m_dLambdaLuma * (Double) m_iRate[iPartIdx][iTypeIdx]); 4376 dComplexityCost = (Double)(iQaoPara1)/10000 * m_dLambdaLuma * (Double)m_iWeightAO[iTypeIdx] * dAreaWeight; 4377 m_dCost[iPartIdx][iTypeIdx] = (Double)((Double)m_iDist[iPartIdx][iTypeIdx] + m_dLambdaLuma * (Double) m_iRate[iPartIdx][iTypeIdx]) + dComplexityCost; 4378 4379 // printf("\n%3d:%10.f, %10.0f, %10.0f",iPartIdx,(Double)m_iDist[iPartIdx][iTypeIdx], dComplexityCost); 4380 4381 // printf("\n%d, %d, %6d, %6d, %f", iPartIdx, iTypeIdx, (Int)m_iDist[iPartIdx][iTypeIdx], (Int)m_iRate[iPartIdx][iTypeIdx], m_dCost[iPartIdx][iTypeIdx]); 4382 if(m_dCost[iPartIdx][iTypeIdx] < m_dCostPartBest[iPartIdx]) 4383 { 4384 m_iDistOrg [iPartIdx] = (Int64)dComplexityCost; 4385 m_dCostPartBest[iPartIdx] = m_dCost[iPartIdx][iTypeIdx]; 4386 m_iTypePartBest[iPartIdx] = iTypeIdx; 4387 if( m_bUseSBACRD ) 4388 m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[pQAOOnePart->PartLevel][CI_TEMP_BEST] ); 4389 } 4390 } 4391 else 4392 { 4393 4394 if(m_iDistOrg[iPartIdx] < m_dCostPartBest[iPartIdx] ) 4395 { 4396 m_dCostPartBest[iPartIdx] = (Double) m_iDistOrg[iPartIdx] + m_pcEntropyCoder->getNumberOfWrittenBits()*m_dLambdaLuma ; 4397 m_iTypePartBest[iPartIdx] = -1; 4398 if( m_bUseSBACRD ) 4399 m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[pQAOOnePart->PartLevel][CI_TEMP_BEST] ); 4400 } 4401 } 4402 } 4403 4404 pQAOOnePart->bProcessed = true; 4405 pQAOOnePart->bSplit = false; 4406 pQAOOnePart->iMinDist = m_iTypePartBest[iPartIdx] >= 0 ? m_iDist[iPartIdx][m_iTypePartBest[iPartIdx]] : m_iDistOrg[iPartIdx]; 4407 pQAOOnePart->iMinRate = (Int) (m_iTypePartBest[iPartIdx] >= 0 ? m_iRate[iPartIdx][m_iTypePartBest[iPartIdx]] : 0); 4408 pQAOOnePart->dMinCost = pQAOOnePart->iMinDist + m_dLambdaLuma * pQAOOnePart->iMinRate; 4409 pQAOOnePart->iBestType = m_iTypePartBest[iPartIdx]; 4410 if (pQAOOnePart->iBestType != -1) 4411 { 4412 pQAOOnePart->bEnableFlag = 1; 4413 pQAOOnePart->iLength = m_iNumClass[m_psQAOPart[iPartIdx].iBestType]; 4414 for (Int i=0; i<pQAOOnePart->iLength ; i++) 4415 pQAOOnePart->iOffset[i] = (Int) m_iOffset[iPartIdx][pQAOOnePart->iBestType][i+1]; 7463 m_pcEntropyCoder->encodeAlfCtrlParam( m_vBestAlfCUCtrlParam[s], m_uiNumCUsInFrame); 7464 uiRate += m_pcEntropyCoder->getNumberOfWrittenBits(); 7465 } 4416 7466 } 4417 7467 else 4418 7468 { 4419 pQAOOnePart->bEnableFlag = 0; 4420 pQAOOnePart->iLength = 0; 4421 } 4422 4423 } 4424 4425 /** run Part Tree Disable. 4426 * \param pQAOOnePart, iPartIdx 4427 */ 4428 Void TEncSampleAdaptiveOffset::xPartTreeDisable(Int iPartIdx) 4429 { 4430 SAOQTPart* pQAOPart= &(m_psQAOPart[iPartIdx]); 4431 4432 pQAOPart->bEnableFlag = false; 4433 pQAOPart->bSplit = false; 4434 pQAOPart->iLength = 0; 4435 pQAOPart->iBestType = -1; 4436 4437 if (pQAOPart->PartLevel < m_uiMaxSplitLevel) 4438 { 4439 for (Int i=0; i<NUM_DOWN_PART; i++) 4440 { 4441 xPartTreeDisable(pQAOPart->DownPartsIdx[i]); 4442 } 4443 } 4444 4445 } 4446 4447 /** run QuadTree Decision Function. 4448 * \param iPartIdx, pcPicOrg, pcPicDec, pcPicRest, &dCostFinal 4449 */ 4450 Void TEncSampleAdaptiveOffset::xQuadTreeDecisionFunc(Int iPartIdx, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, Double &dCostFinal) 4451 { 4452 SAOQTPart* pQAOPart= &(m_psQAOPart[iPartIdx]); 4453 UInt uiDepth = pQAOPart->PartLevel; 4454 UInt uhNextDepth = uiDepth+1; 4455 4456 if (iPartIdx == 0) 4457 { 4458 dCostFinal = 0; 4459 } 4460 4461 //QAO for this part 4462 if(!pQAOPart->bProcessed) 4463 { 4464 xQAOOnePart (pQAOPart, iPartIdx); 4465 } 4466 4467 //QAO for sub 4 parts 4468 if (pQAOPart->PartLevel < m_uiMaxSplitLevel) 4469 { 4470 Double dCostNotSplit = m_dLambdaLuma + pQAOPart->dMinCost; 4471 Double dCostSplit = m_dLambdaLuma; 4472 4473 for (Int i=0; i< NUM_DOWN_PART ;i++) 4474 { 4475 if( m_bUseSBACRD ) 4476 { 4477 if ( 0 == iPartIdx) //initialize RD with previous depth buffer 4478 m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); 4479 else 4480 m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]); 4481 } 4482 xQuadTreeDecisionFunc(pQAOPart->DownPartsIdx[i], pcPicOrg, pcPicDec, pcPicRest, dCostFinal); 4483 dCostSplit += dCostFinal; 4484 if( m_bUseSBACRD ) 4485 { 4486 m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]); 4487 } 4488 } 4489 4490 4491 if(dCostSplit < dCostNotSplit) 4492 { 4493 dCostFinal = dCostSplit; 4494 pQAOPart->bSplit = true; 4495 pQAOPart->bEnableFlag = false; 4496 pQAOPart->iLength = 0; 4497 pQAOPart->iBestType = -1; 4498 } 4499 else 4500 { 4501 dCostFinal = dCostNotSplit; 4502 pQAOPart->bSplit = false; 4503 for (Int i=0; i<NUM_DOWN_PART; i++) 4504 { 4505 xPartTreeDisable(pQAOPart->DownPartsIdx[i]); 4506 } 4507 } 4508 } 4509 else 4510 { 4511 dCostFinal = pQAOPart->dMinCost; 4512 } 4513 } 4514 /** destory TEncSampleAdaptiveOffset class. 4515 * \param 4516 */ 4517 Void TEncSampleAdaptiveOffset::destoryEncBuffer() 4518 { 4519 4520 for (Int i=0;i<m_iNumTotalParts;i++) 4521 { 4522 for (Int j=0;j<MAX_NUM_SAO_TYPE;j++) 4523 { 4524 if (m_iCount [i][j]) 4525 { 4526 delete [] m_iCount [i][j]; 4527 } 4528 if (m_iOffset[i][j]) 4529 { 4530 delete [] m_iOffset[i][j]; 4531 } 4532 if (m_iOffsetOrg[i][j]) 4533 { 4534 delete [] m_iOffsetOrg[i][j]; 4535 } 4536 } 4537 if (m_iRate[i]) 4538 { 4539 delete [] m_iRate[i]; 4540 } 4541 if (m_iDist[i]) 4542 { 4543 delete [] m_iDist[i]; 4544 } 4545 if (m_dCost[i]) 4546 { 4547 delete [] m_dCost[i]; 4548 } 4549 if (m_iCount [i]) 4550 { 4551 delete [] m_iCount [i]; 4552 } 4553 if (m_iOffset[i]) 4554 { 4555 delete [] m_iOffset[i]; 4556 } 4557 if (m_iOffsetOrg[i]) 4558 { 4559 delete [] m_iOffsetOrg[i]; 4560 } 4561 4562 } 4563 if (m_iDistOrg) 4564 { 4565 delete [] m_iDistOrg ; m_iDistOrg = NULL; 4566 } 4567 if (m_dCostPartBest) 4568 { 4569 delete [] m_dCostPartBest ; m_dCostPartBest = NULL; 4570 } 4571 if (m_iTypePartBest) 4572 { 4573 delete [] m_iTypePartBest ; m_iTypePartBest = NULL; 4574 } 4575 if (m_iRate) 4576 { 4577 delete [] m_iRate ; m_iRate = NULL; 4578 } 4579 if (m_iDist) 4580 { 4581 delete [] m_iDist ; m_iDist = NULL; 4582 } 4583 if (m_dCost) 4584 { 4585 delete [] m_dCost ; m_dCost = NULL; 4586 } 4587 if (m_iCount) 4588 { 4589 delete [] m_iCount ; m_iCount = NULL; 4590 } 4591 if (m_iOffset) 4592 { 4593 delete [] m_iOffset ; m_iOffset = NULL; 4594 } 4595 if (m_iOffsetOrg) 4596 { 4597 delete [] m_iOffsetOrg ; m_iOffsetOrg = NULL; 4598 } 4599 4600 4601 } 4602 Void TEncSampleAdaptiveOffset::createEncBuffer() 4603 { 4604 m_iDistOrg = new Int64 [m_iNumTotalParts]; 4605 m_dCostPartBest = new Double [m_iNumTotalParts]; 4606 m_iTypePartBest = new Int [m_iNumTotalParts]; 4607 4608 m_iRate = new Int64* [m_iNumTotalParts]; 4609 m_iDist = new Int64* [m_iNumTotalParts]; 4610 m_dCost = new Double*[m_iNumTotalParts]; 4611 4612 m_iCount = new Int64 **[m_iNumTotalParts]; 4613 m_iOffset = new Int64 **[m_iNumTotalParts]; 4614 m_iOffsetOrg = new Int64 **[m_iNumTotalParts]; 4615 4616 for (Int i=0;i<m_iNumTotalParts;i++) 4617 { 4618 m_iRate[i] = new Int64 [MAX_NUM_SAO_TYPE]; 4619 m_iDist[i] = new Int64 [MAX_NUM_SAO_TYPE]; 4620 m_dCost[i] = new Double [MAX_NUM_SAO_TYPE]; 4621 4622 m_iCount [i] = new Int64 *[MAX_NUM_SAO_TYPE]; 4623 m_iOffset[i] = new Int64 *[MAX_NUM_SAO_TYPE]; 4624 m_iOffsetOrg[i] = new Int64 *[MAX_NUM_SAO_TYPE]; 4625 4626 for (Int j=0;j<MAX_NUM_SAO_TYPE;j++) 4627 { 4628 m_iCount [i][j] = new Int64 [MAX_NUM_QAO_CLASS]; 4629 m_iOffset[i][j] = new Int64 [MAX_NUM_QAO_CLASS]; 4630 m_iOffsetOrg[i][j]= new Int64 [MAX_NUM_QAO_CLASS]; 4631 } 4632 } 4633 4634 } 4635 4636 /** start Sao Encoder. 4637 * \param pcPic, pcEntropyCoder, pppcRDSbacCoder, pcRDGoOnSbacCoder 4638 */ 4639 Void TEncSampleAdaptiveOffset::startSaoEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder, TEncSbac*** pppcRDSbacCoder, TEncSbac* pcRDGoOnSbacCoder) 4640 { 4641 if( pcRDGoOnSbacCoder ) 4642 m_bUseSBACRD = true; 4643 else 4644 m_bUseSBACRD = false; 4645 4646 m_pcPic = pcPic; 4647 m_pcEntropyCoder = pcEntropyCoder; 4648 4649 m_pppcRDSbacCoder = pppcRDSbacCoder; 4650 m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder; 4651 m_pcEntropyCoder->resetEntropy(); 4652 m_pcEntropyCoder->resetBits(); 4653 4654 if( m_bUseSBACRD ) 4655 { 4656 m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[0][CI_NEXT_BEST]); 4657 m_pppcRDSbacCoder[0][CI_CURR_BEST]->load( m_pppcRDSbacCoder[0][CI_NEXT_BEST]); 4658 } 4659 4660 m_bSaoFlag = 0; 4661 for (Int i=0;i<m_iNumTotalParts;i++) 4662 { 4663 m_dCostPartBest[i] = MAX_DOUBLE; 4664 m_iTypePartBest[i] = -1; 4665 m_iDistOrg[i] = 0; 4666 for (Int j=0;j<MAX_NUM_SAO_TYPE;j++) 4667 { 4668 m_iDist[i][j] = 0; 4669 m_iRate[i][j] = 0; 4670 m_dCost[i][j] = 0; 4671 for (Int k=0;k<MAX_NUM_QAO_CLASS;k++) 4672 { 4673 m_iCount [i][j][k] = 0; 4674 m_iOffset[i][j][k] = 0; 4675 m_iOffsetOrg[i][j][k] = 0; 4676 } 4677 } 4678 } 4679 4680 for(Int i=0; i< m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; i++) 4681 { 4682 m_psQAOPart[i].bEnableFlag = 0; 4683 m_psQAOPart[i].iBestType = -1; 4684 m_psQAOPart[i].iLength = 0; 4685 m_psQAOPart[i].bSplit = false; 4686 m_psQAOPart[i].bProcessed = false; 4687 m_psQAOPart[i].dMinCost = MAX_DOUBLE; 4688 m_psQAOPart[i].iMinDist = MAX_INT; 4689 m_psQAOPart[i].iMinRate = MAX_INT; 4690 4691 for (Int j=0;j<MAX_NUM_QAO_CLASS;j++) 4692 { 4693 m_psQAOPart[i].iOffset[j] = 0; 4694 } 4695 } 4696 4697 for(Int i=0; i< m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; i++) 4698 { 4699 m_psQAOPart[i].bEnableFlag = 0; 4700 m_psQAOPart[i].iBestType = -1; 4701 m_psQAOPart[i].iLength = 0; 4702 for (Int j=0;j<MAX_NUM_QAO_CLASS;j++) 4703 { 4704 m_psQAOPart[i].iOffset[j] = 0; 4705 } 4706 } 4707 4708 } 4709 4710 /** end Sao Encoder. 4711 * \param 4712 */ 4713 Void TEncSampleAdaptiveOffset::endSaoEnc() 4714 { 4715 m_pcPic = NULL; 4716 m_pcEntropyCoder = NULL; 4717 } 4718 4719 inline int xSign(int x) 4720 { 4721 return ((x >> 31) | ((int)( (((unsigned int) -x)) >> 31))); 4722 } 4723 /** calculate Ao Stats Cu 4724 * \param iAddr, iPartIdx 4725 */ 4726 Void TEncSampleAdaptiveOffset::calcAoStatsCu(Int iAddr, Int iPartIdx) 4727 { 4728 Int x,y; 4729 TComDataCU *pTmpCu = m_pcPic->getCU(iAddr); 4730 TComSPS *pTmpSPS = m_pcPic->getSlice(0)->getSPS(); 4731 4732 4733 Pel* pOrg ; 4734 Pel* pRec ; 4735 Int iStride = m_pcPic->getStride(); 4736 Int iLcuWidth = pTmpSPS->getMaxCUHeight(); 4737 Int iLcuHeight = pTmpSPS->getMaxCUWidth(); 4738 Int iPicWidth = pTmpSPS->getWidth(); 4739 Int iPicHeight = pTmpSPS->getHeight(); 4740 UInt uiLPelX = pTmpCu->getCUPelX(); 4741 UInt uiRPelX = uiLPelX + iLcuWidth; 4742 UInt uiTPelY = pTmpCu->getCUPelY(); 4743 UInt uiBPelY = uiTPelY + iLcuHeight; 4744 uiRPelX = uiRPelX > iPicWidth ? iPicWidth : uiRPelX; 4745 uiBPelY = uiBPelY > iPicHeight? iPicHeight: uiBPelY; 4746 iLcuWidth = uiRPelX - uiLPelX; 4747 iLcuHeight = uiBPelY - uiTPelY; 4748 Int64* iStats ; 4749 Int64* iCount ; 4750 Int iClassIdx; 4751 4752 4753 // if(m_iAoType == BO_0 || m_iAoType == BO_1) 4754 { 4755 iStats = m_iOffsetOrg[iPartIdx][SAO_BO_0]; 4756 iCount = m_iCount [iPartIdx][SAO_BO_0]; 4757 4758 pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr); 4759 pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr); 4760 4761 for (y=0; y<iLcuHeight; y++) 4762 { 4763 for (x=0; x<iLcuWidth; x++) 4764 { 4765 iClassIdx = m_ppLumaTableBo0[pRec[x]]; 4766 if (iClassIdx) 4767 { 4768 iStats[iClassIdx] += (pOrg[x] - pRec[x]); 4769 iCount[iClassIdx] ++; 4770 } 4771 } 4772 pOrg += iStride; 4773 pRec += iStride; 4774 } 4775 4776 iStats = m_iOffsetOrg[iPartIdx][SAO_BO_1]; 4777 iCount = m_iCount [iPartIdx][SAO_BO_1]; 4778 4779 pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr); 4780 pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr); 4781 4782 for (y=0; y<iLcuHeight; y++) 4783 { 4784 for (x=0; x<iLcuWidth; x++) 4785 { 4786 iClassIdx = m_ppLumaTableBo1[pRec[x]]; 4787 if (iClassIdx) 4788 { 4789 iStats[iClassIdx] += (pOrg[x] - pRec[x]); 4790 iCount[iClassIdx] ++; 4791 } 4792 } 4793 pOrg += iStride; 4794 pRec += iStride; 4795 } 4796 } 4797 4798 Int iSignLeft; 4799 Int iSignRight; 4800 Int iSignDown; 4801 Int iSignDown1; 4802 Int iSignDown2; 4803 4804 UInt uiEdgeType; 4805 4806 // if (m_iAoType == EO_0 || m_iAoType == EO_1 || m_iAoType == EO_2 || m_iAoType == EO_3) 4807 { 4808 // if (m_iAoType == EO_0 ) 4809 { 4810 iStats = m_iOffsetOrg[iPartIdx][SAO_EO_0]; 4811 iCount = m_iCount [iPartIdx][SAO_EO_0]; 4812 4813 pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr); 4814 pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr); 4815 for (y=0; y<iLcuHeight; y++) 4816 { 4817 iSignLeft = xSign(pRec[1] - pRec[0]); 4818 for (x=1; x<iLcuWidth-1; x++) 4819 { 4820 iSignRight = xSign(pRec[x] - pRec[x+1]); 4821 uiEdgeType = iSignRight + iSignLeft + 2; 4822 iSignLeft = -iSignRight; 4823 4824 iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]); 4825 iCount[m_auiEoTable[uiEdgeType]] ++; 4826 } 4827 pOrg += iStride; 4828 pRec += iStride; 4829 } 4830 } 4831 4832 // if (m_iAoType == EO_1 ) 4833 { 4834 iStats = m_iOffsetOrg[iPartIdx][SAO_EO_1]; 4835 iCount = m_iCount [iPartIdx][SAO_EO_1]; 4836 4837 pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr); 4838 pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr); 4839 pOrg += iStride; 4840 pRec += iStride; 4841 4842 for (x=0; x< iLcuWidth; x++) 4843 { 4844 m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride]); 4845 } 4846 4847 for (y=1; y<iLcuHeight-1; y++) 4848 { 4849 for (x=0; x<iLcuWidth; x++) 4850 { 4851 4852 iSignDown = xSign(pRec[x] - pRec[x+iStride]); 4853 uiEdgeType = iSignDown + m_iUpBuff1[x] + 2; 4854 m_iUpBuff1[x]= -iSignDown; 4855 4856 iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]); 4857 iCount[m_auiEoTable[uiEdgeType]] ++; 4858 4859 } 4860 pOrg += iStride; 4861 pRec += iStride; 4862 } 4863 } 4864 // if (m_iAoType == EO_2 ) 4865 { 4866 iStats = m_iOffsetOrg[iPartIdx][SAO_EO_2]; 4867 iCount = m_iCount [iPartIdx][SAO_EO_2]; 4868 4869 pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr); 4870 pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr); 4871 pOrg += iStride; 4872 pRec += iStride; 4873 for (x=1; x<iLcuWidth; x++) 4874 { 4875 m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride-1]); 4876 } 4877 for (y=1; y<iLcuHeight-1; y++) 4878 { 4879 iSignDown2 = xSign(pRec[iStride] - pRec[0]); 4880 for (x=1; x<iLcuWidth-1; x++) 4881 { 4882 iSignDown1 = xSign(pRec[x] - pRec[x+iStride+1]) ; 4883 uiEdgeType = iSignDown1 + m_iUpBuff1[x] + 2; 4884 m_iUpBufft[x+1] = -iSignDown1; 4885 iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]); 4886 iCount[m_auiEoTable[uiEdgeType]] ++; 4887 } 4888 m_iUpBufft[1] = iSignDown2; 4889 ipSwap = m_iUpBuff1; 4890 m_iUpBuff1 = m_iUpBufft; 4891 m_iUpBufft = ipSwap; 4892 4893 pRec += iStride; 4894 pOrg += iStride; 4895 } 4896 } 4897 // if (m_iAoType == EO_3 ) 4898 { 4899 iStats = m_iOffsetOrg[iPartIdx][SAO_EO_3]; 4900 iCount = m_iCount [iPartIdx][SAO_EO_3]; 4901 4902 pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr); 4903 pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr); 4904 pOrg += iStride; 4905 pRec += iStride; 4906 for (x=0; x<iLcuWidth-1; x++) 4907 { 4908 m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride+1]); 4909 } 4910 4911 for (y=1; y<iLcuHeight-1; y++) 4912 { 4913 for (x=1; x<iLcuWidth-1; x++) 4914 { 4915 iSignDown1 = xSign(pRec[x] - pRec[x+iStride-1]) ; 4916 uiEdgeType = iSignDown1 + m_iUpBuff1[x] + 2; 4917 m_iUpBuff1[x-1] = -iSignDown1; 4918 iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]); 4919 iCount[m_auiEoTable[uiEdgeType]] ++; 4920 } 4921 m_iUpBuff1[iLcuWidth-2] = xSign(pRec[iLcuWidth-2 + iStride] - pRec[iLcuWidth-1]); 4922 4923 pRec += iStride; 4924 pOrg += iStride; 4925 } 4926 } 4927 } 4928 4929 } 4930 4931 /** run get QAO Stats 4932 * \param pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt 4933 */ 4934 Void TEncSampleAdaptiveOffset::xGetQAOStats(TComPicYuv* pcPicYuvOrg, TComPicYuv* pcPicYuvRec, TComPicYuv* pcPicYuvExt) 4935 { 4936 Int iLevelIdx, iPartIdx, iTypeIdx, iClassIdx; 4937 Int i; 4938 Int iNumTotalType = MAX_NUM_SAO_TYPE; 4939 Int LcuIdxX; 4940 Int LcuIdxY; 4941 Int iAddr; 4942 Int iFrameWidthInCU = m_pcPic->getFrameWidthInCU(); 4943 Int iDownPartIdx; 4944 Int iPartStart; 4945 Int iPartEnd; 4946 4947 if (m_uiMaxSplitLevel == 0) 4948 { 4949 iPartIdx = 0; 4950 for (LcuIdxY = m_psQAOPart[iPartIdx].StartCUY; LcuIdxY<= m_psQAOPart[iPartIdx].EndCUY; LcuIdxY++) 4951 { 4952 for (LcuIdxX = m_psQAOPart[iPartIdx].StartCUX; LcuIdxX<= m_psQAOPart[iPartIdx].EndCUX; LcuIdxX++) 4953 { 4954 iAddr = LcuIdxY*iFrameWidthInCU + LcuIdxX; 4955 calcAoStatsCu(iAddr, iPartIdx); 4956 } 4957 } 4958 4959 } 4960 else 4961 { 4962 for(iPartIdx=m_aiNumCulPartsLevel[m_uiMaxSplitLevel-1]; iPartIdx<m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; iPartIdx++) 4963 { 4964 for (LcuIdxY = m_psQAOPart[iPartIdx].StartCUY; LcuIdxY<= m_psQAOPart[iPartIdx].EndCUY; LcuIdxY++) 4965 { 4966 for (LcuIdxX = m_psQAOPart[iPartIdx].StartCUX; LcuIdxX<= m_psQAOPart[iPartIdx].EndCUX; LcuIdxX++) 4967 { 4968 iAddr = LcuIdxY*iFrameWidthInCU + LcuIdxX; 4969 calcAoStatsCu(iAddr, iPartIdx); 4970 } 4971 } 4972 } 4973 for (iLevelIdx=m_uiMaxSplitLevel-1; iLevelIdx>=0; iLevelIdx--) 4974 { 4975 iPartStart = (iLevelIdx > 0) ? m_aiNumCulPartsLevel[iLevelIdx-1] : 0; 4976 iPartEnd = m_aiNumCulPartsLevel[iLevelIdx]; 4977 for(iPartIdx = iPartStart; iPartIdx < iPartEnd; iPartIdx++) 4978 { 4979 for (i=0; i<NUM_DOWN_PART; i++) 4980 { 4981 iDownPartIdx = m_psQAOPart[iPartIdx].DownPartsIdx[i]; 4982 for (iTypeIdx=0; iTypeIdx<iNumTotalType; iTypeIdx++) 4983 { 4984 for (iClassIdx=0; iClassIdx<m_iNumClass[iTypeIdx]+1; iClassIdx++) 4985 { 4986 m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx] += m_iOffsetOrg[iDownPartIdx][iTypeIdx][iClassIdx]; 4987 m_iCount [iPartIdx][iTypeIdx][iClassIdx] += m_iCount [iDownPartIdx][iTypeIdx][iClassIdx]; 4988 } 4989 } 4990 } 4991 } 4992 } 4993 } 4994 } 4995 4996 /** Sample adaptive offset Process 4997 * \param dLambda 4998 */ 4999 Void TEncSampleAdaptiveOffset::SAOProcess( Double dLambda) 5000 { 5001 // set lambda 5002 TComPicYuv* pcPicYuvOrg = m_pcPic->getPicYuvOrg(); 5003 TComPicYuv* pcPicYuvRec = m_pcPic->getPicYuvRec(); 5004 5005 TComPicYuv* pcPicYuvExt = NULL; 5006 5007 m_eSliceType = m_pcPic->getSlice(0)->getSliceType(); 5008 m_iPicNalReferenceIdc = (m_pcPic->getSlice(0)->isReferenced() ? 1 :0); 5009 5010 m_dLambdaLuma = dLambda; 5011 m_dLambdaChroma = dLambda; 5012 5013 if (g_uiBitIncrement>1) 5014 { 5015 m_uiAoBitDepth = 1; 5016 } 5017 else 5018 { 5019 m_uiAoBitDepth = 0; 5020 } 5021 5022 Double dCostFinal = 0; 5023 5024 xGetQAOStats(pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt); 5025 xQuadTreeDecisionFunc(0, pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt, dCostFinal); 5026 m_bSaoFlag = dCostFinal < m_iDistOrg[0] ? 1:0; 5027 5028 5029 if(m_bSaoFlag) 5030 { 5031 xProcessQuadTreeAo( 0, pcPicYuvRec, pcPicYuvExt); 5032 } 5033 5034 } 5035 5036 5037 #endif 7469 uiRate += m_uiNumSlicesInPic; 7470 } 7471 memcpy(pAlfParam->coeff_chroma, piTmpCoef, sizeof(int)*pAlfParam->num_coeff_chroma); 7472 delete[] piTmpCoef; 7473 piTmpCoef = NULL; 7474 7475 return uiRate; 7476 } 7477 #endif 7478 7479 //! \}
Note: See TracChangeset for help on using the changeset viewer.