Ignore:
Timestamp:
11 May 2012, 21:20:17 (13 years ago)
Author:
hschwarz
Message:

updated trunk (move to HM6.1)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/source/Lib/TLibEncoder/TEncAdaptiveLoopFilter.cpp

    r5 r56  
    22 * License, included below. This software may be subject to other third party
    33 * and contributor rights, including patent rights, and no such rights are
    4  * granted under this license.
     4 * granted under this license. 
    55 *
    6  * Copyright (c) 2010-2011, ISO/IEC
     6 * Copyright (c) 2010-2012, ITU/ISO/IEC
    77 * All rights reserved.
    88 *
     
    1515 *    this list of conditions and the following disclaimer in the documentation
    1616 *    and/or other materials provided with the distribution.
    17  *  * Neither the name of the ISO/IEC nor the names of its contributors may
     17 *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
    1818 *    be used to endorse or promote products derived from this software without
    1919 *    specific prior written permission.
     
    3232 */
    3333
    34 
    35 
    3634/** \file     TEncAdaptiveLoopFilter.cpp
    3735 \brief    estimation part of adaptive loop filter class
     
    4341#include <math.h>
    4442
     43//! \ingroup TLibEncoder
     44//! \{
     45
    4546// ====================================================================================================================
    4647// Constants
    4748// ====================================================================================================================
    48 
     49#if LCU_SYNTAX_ALF
     50#define ALF_NUM_OF_REDESIGN 1
     51#else
    4952#define ALF_NUM_OF_REDESIGN 3
    50 
     53#endif
    5154// ====================================================================================================================
    5255// Tables
    5356// ====================================================================================================================
    54 
    55 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x9[81] =
    56 {
    57    0,  1,  2,  3,  4,  5,  6,  7,  8,
    58    9, 10, 11, 12, 13, 14, 15, 16, 17,
    59   18, 19, 20, 21, 22, 23, 24, 25, 26,
    60   27, 28, 29, 30, 31, 32, 33, 34, 35,
    61   36, 37, 38, 39, 40, 39, 38, 37, 36,
    62   35, 34, 33, 32, 31, 30, 29, 28, 27,
    63   26, 25, 24, 23, 22, 21, 20, 19, 18,
    64   17, 16, 15, 14, 13, 12, 11, 10,  9,
    65    8,  7,  6,  5,  4,  3,  2,  1,  0
     57#if LCU_SYNTAX_ALF
     58const Int TEncAdaptiveLoopFilter::m_alfNumPartsInRowTab[5] =
     59{
     60  1,      //level 0
     61  2,      //level 1
     62  4,      //level 2
     63  8,      //level 3
     64  16      //level 4
    6665};
    6766
    68 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray7x7[49] =
    69 {
    70   0,  1,  2,  3,  4,  5,  6,
    71   7,  8,  9, 10, 11, 12, 13,
    72   14, 15, 16, 17, 18, 19, 20,
    73   21, 22, 23, 24, 23, 22, 21,
    74   20, 19, 18, 17, 16, 15, 14,
    75   13, 12, 11, 10,  9,  8,  7,
    76   6,  5,  4,  3,  2,  1,  0,
     67const Int TEncAdaptiveLoopFilter::m_alfNumPartsLevelTab[5] =
     68{
     69  1,      //level 0
     70  4,      //level 1
     71  16,     //level 2
     72  64,     //level 3
     73  256     //level 4
    7774};
    7875
    79 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray5x5[25] =
    80 {
    81   0,  1,  2,  3,  4,
    82   5,  6,  7,  8,  9,
    83   10, 11, 12, 11, 10,
    84   9,  8,  7,  6,  5,
    85   4,  3,  2,  1,  0,
     76const Int TEncAdaptiveLoopFilter::m_alfNumCulPartsLevelTab[5] =
     77{
     78  1,    //level 0
     79  5,    //level 1
     80  21,   //level 2
     81  85,   //level 3
     82  341,  //level 4
    8683};
    87 
    88 #if TI_ALF_MAX_VSIZE_7
    89 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x7[63] =
    90 {
    91    0,  1,  2,  3,  4,  5,  6,  7,  8,
    92    9, 10, 11, 12, 13, 14, 15, 16, 17,
    93   18, 19, 20, 21, 22, 23, 24, 25, 26,
    94   27, 28, 29, 30, 31, 30, 29, 28, 27,
    95   26, 25, 24, 23, 22, 21, 20, 19, 18,
    96   17, 16, 15, 14, 13, 12, 11, 10,  9,
    97    8,  7,  6,  5,  4,  3,  2,  1,  0
    98 };
    99 #endif
    100 
    101 #if MQT_ALF_NPASS
    102 #if TI_ALF_MAX_VSIZE_7
    103 Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[21] =
    104 #else
    105 Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[22] =
    106 #endif
    107 {
    108 #if TI_ALF_MAX_VSIZE_7
    109                   0,  1,  2,
    110               3,  4,  5,  6,  7,
    111           8,  9, 10, 11, 12, 13, 14,
    112      15, 16, 17, 18, 19, 20
    113 #else
    114                    0,
    115                1,  2,  3,
    116            4,  5,  6,  7,  8,
    117        9, 10, 11, 12, 13, 14, 15,
    118   16, 17, 18, 19, 20, 21
    119 #endif
    120 };
    121 
    122 Int TEncAdaptiveLoopFilter::m_aiTapPos7x7_In9x9Sym[14] =
    123 {                 
    124 #if TI_ALF_MAX_VSIZE_7
    125                   1,   
    126               4,  5,  6,   
    127           9, 10, 11, 12, 13,   
    128      16, 17, 18, 19, 20
    129 
    130 #else
    131 
    132                2,
    133            5,  6,  7,
    134       10, 11, 12, 13, 14,
    135   17, 18, 19, 20, 21
    136 #endif
    137 };
    138 
    139 Int TEncAdaptiveLoopFilter::m_aiTapPos5x5_In9x9Sym[8]  =
    140 {
    141 
    142 #if TI_ALF_MAX_VSIZE_7
    143             5,
    144        10, 11, 12,
    145    17, 18, 19, 20
    146 #else
    147            6,
    148       11, 12, 13,
    149   18, 19, 20, 21
    150 
    151 #endif
    152 
    153 };
    154 
    155 Int* TEncAdaptiveLoopFilter::m_iTapPosTabIn9x9Sym[NO_TEST_FILT] =
    156 {
    157   m_aiTapPos9x9_In9x9Sym, m_aiTapPos7x7_In9x9Sym, m_aiTapPos5x5_In9x9Sym
    158 };
    159 #endif
    160 
     84#endif
    16185// ====================================================================================================================
    16286// Constructor / destructor
    16387// ====================================================================================================================
    16488
     89#if LCU_SYNTAX_ALF
     90///AlfCorrData
     91AlfCorrData::AlfCorrData()
     92{
     93  this->componentID = -1;
     94  this->ECorr  = NULL;
     95  this->yCorr  = NULL;
     96  this->pixAcc = NULL;
     97}
     98
     99AlfCorrData::AlfCorrData(Int cIdx)
     100{
     101  const Int numCoef = ALF_MAX_NUM_COEF;
     102  const Int maxNumGroups = NO_VAR_BINS;
     103
     104  Int numGroups = (cIdx == ALF_Y)?(maxNumGroups):(1);
     105
     106  this->componentID = cIdx;
     107
     108  this->ECorr = new Double**[numGroups];
     109  this->yCorr = new Double*[numGroups];
     110  this->pixAcc = new Double[numGroups];
     111  for(Int g= 0; g< numGroups; g++)
     112  {
     113    this->yCorr[g] = new Double[numCoef];
     114    for(Int j=0; j< numCoef; j++)
     115    {
     116      this->yCorr[g][j] = 0;
     117    }
     118
     119    this->ECorr[g] = new Double*[numCoef];
     120    for(Int i=0; i< numCoef; i++)
     121    {
     122      this->ECorr[g][i] = new Double[numCoef];
     123      for(Int j=0; j< numCoef; j++)
     124      {
     125        this->ECorr[g][i][j] = 0;
     126      }
     127    }
     128    this->pixAcc[g] = 0; 
     129  }
     130}
     131
     132AlfCorrData::~AlfCorrData()
     133{
     134  if(this->componentID >=0)
     135  {
     136    const Int numCoef = ALF_MAX_NUM_COEF;
     137    const Int maxNumGroups = NO_VAR_BINS;
     138
     139    Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1);
     140
     141    for(Int g= 0; g< numGroups; g++)
     142    {
     143      for(Int i=0; i< numCoef; i++)
     144      {
     145        delete[] this->ECorr[g][i];
     146      }
     147      delete[] this->ECorr[g];
     148      delete[] this->yCorr[g];
     149    }
     150    delete[] this->ECorr;
     151    delete[] this->yCorr;
     152    delete[] this->pixAcc;
     153  }
     154
     155}
     156
     157AlfCorrData& AlfCorrData::operator += (const AlfCorrData& src)
     158{
     159  if(this->componentID >=0)
     160  {
     161    const Int numCoef = ALF_MAX_NUM_COEF;
     162    const Int maxNumGroups = NO_VAR_BINS;
     163
     164    Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1);
     165    for(Int g=0; g< numGroups; g++)
     166    {
     167      this->pixAcc[g] += src.pixAcc[g];
     168
     169      for(Int j=0; j< numCoef; j++)
     170      {
     171        this->yCorr[g][j] += src.yCorr[g][j];
     172        for(Int i=0; i< numCoef; i++)
     173        {
     174          this->ECorr[g][j][i] += src.ECorr[g][j][i];
     175        }
     176      }
     177    }
     178  }
     179
     180  return *this;
     181}
     182
     183
     184Void AlfCorrData::reset()
     185{
     186  if(this->componentID >=0)
     187  {
     188    const Int numCoef = ALF_MAX_NUM_COEF;
     189    const Int maxNumGroups = NO_VAR_BINS;
     190
     191    Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1);
     192    for(Int g=0; g< numGroups; g++)
     193    {
     194      this->pixAcc[g] = 0;
     195
     196      for(Int j=0; j< numCoef; j++)
     197      {
     198        this->yCorr[g][j] = 0;
     199        for(Int i=0; i< numCoef; i++)
     200        {
     201          this->ECorr[g][j][i] = 0;
     202        }
     203      }
     204
     205
     206    }
     207  }
     208
     209}
     210
     211Void AlfCorrData::mergeFrom(const AlfCorrData& src, Int* mergeTable, Bool doPixAccMerge)
     212{
     213  assert(componentID == src.componentID);
     214
     215  reset();
     216
     217  const Int numCoef = ALF_MAX_NUM_COEF;
     218
     219  Double **srcE, **dstE;
     220  Double *srcy, *dsty;
     221
     222  switch(componentID)
     223  {
     224  case ALF_Cb:
     225  case ALF_Cr:
     226    {
     227      srcE = src.ECorr  [0];
     228      dstE = this->ECorr[0];
     229
     230      srcy  = src.yCorr[0];
     231      dsty  = this->yCorr[0];
     232
     233      for(Int j=0; j< numCoef; j++)
     234      {
     235        for(Int i=0; i< numCoef; i++)
     236        {
     237          dstE[j][i] += srcE[j][i];
     238        }
     239
     240        dsty[j] += srcy[j];
     241      }
     242      if(doPixAccMerge)
     243      {
     244        this->pixAcc[0] = src.pixAcc[0];
     245      }
     246    }
     247    break;
     248  case ALF_Y:
     249    {
     250      Int maxFilterSetSize = (Int)NO_VAR_BINS;
     251      for (Int varInd=0; varInd< maxFilterSetSize; varInd++)
     252      {
     253        Int filtIdx = (mergeTable == NULL)?(0):(mergeTable[varInd]);
     254        srcE = src.ECorr  [varInd];
     255        dstE = this->ECorr[ filtIdx ];
     256        srcy  = src.yCorr[varInd];
     257        dsty  = this->yCorr[ filtIdx ];
     258        for(Int j=0; j< numCoef; j++)
     259        {
     260          for(Int i=0; i< numCoef; i++)
     261          {
     262            dstE[j][i] += srcE[j][i];
     263          }
     264          dsty[j] += srcy[j];
     265        }
     266        if(doPixAccMerge)
     267        {
     268          this->pixAcc[filtIdx] += src.pixAcc[varInd];
     269        }
     270      }
     271    }
     272    break;
     273  default:
     274    {
     275      printf("not a legal component ID\n");
     276      assert(0);
     277      exit(-1);
     278    }
     279  }
     280}
     281
     282///AlfPicQTPart
     283AlfPicQTPart::AlfPicQTPart()
     284{
     285  componentID = -1;
     286  alfUnitParam = NULL;
     287  alfCorr = NULL;
     288}
     289
     290AlfPicQTPart::~AlfPicQTPart()
     291{
     292  if(alfUnitParam != NULL)
     293  {
     294    if(alfUnitParam->alfFiltParam != NULL)
     295    {
     296      delete alfUnitParam->alfFiltParam;
     297      alfUnitParam->alfFiltParam = NULL;
     298    }
     299    delete alfUnitParam;
     300    alfUnitParam = NULL;
     301  }
     302  if(alfCorr != NULL)
     303  {
     304    delete alfCorr;
     305    alfCorr = NULL;
     306  }
     307}
     308
     309AlfPicQTPart& AlfPicQTPart::operator= (const AlfPicQTPart& src)
     310{
     311  componentID = src.componentID;
     312  partCUXS    = src.partCUXS;
     313  partCUYS    = src.partCUYS;
     314  partCUXE    = src.partCUXE;
     315  partCUYE    = src.partCUYE;
     316  partIdx     = src.partIdx;
     317  partLevel   = src.partLevel;
     318  partCol     = src.partCol;
     319  partRow     = src.partRow;
     320  for(Int i=0; i<4; i++)
     321  {
     322    childPartIdx[i] = src.childPartIdx[i];
     323  }
     324  parentPartIdx = src.parentPartIdx;
     325
     326  isBottomLevel = src.isBottomLevel;
     327  isSplit       = src.isSplit;
     328
     329  isProcessed   = src.isProcessed;
     330  splitMinCost  = src.splitMinCost;
     331  splitMinDist  = src.splitMinDist;
     332  splitMinRate  = src.splitMinRate;
     333  selfMinCost   = src.selfMinCost;
     334  selfMinDist   = src.selfMinDist;
     335  selfMinRate   = src.selfMinRate;
     336
     337  numFilterBudget = src.numFilterBudget;
     338
     339  if(src.alfUnitParam != NULL)
     340  {
     341    if(alfUnitParam == NULL)
     342    {
     343      //create alfUnitparam
     344      alfUnitParam = new AlfUnitParam;
     345      alfUnitParam->alfFiltParam = new ALFParam(componentID);
     346    }
     347    //assign from src
     348    alfUnitParam->mergeType = src.alfUnitParam->mergeType;
     349    alfUnitParam->isEnabled = src.alfUnitParam->isEnabled;
     350    alfUnitParam->isNewFilt = src.alfUnitParam->isNewFilt;
     351    alfUnitParam->storedFiltIdx = src.alfUnitParam->storedFiltIdx;
     352    *(alfUnitParam->alfFiltParam) = *(src.alfUnitParam->alfFiltParam);   
     353  }
     354  else
     355  {
     356    printf("source quad-tree partition info is not complete\n");
     357    assert(0);
     358    exit(-1);
     359  }
     360
     361  if(src.alfCorr != NULL)
     362  {
     363    if(alfCorr == NULL)
     364    {
     365      alfCorr = new AlfCorrData(componentID);
     366    }
     367    alfCorr->reset();
     368    (*alfCorr) += (*(src.alfCorr));
     369  }
     370  else
     371  {
     372    printf("source quad-tree partition info is not complete\n");
     373    assert(0);
     374    exit(-1);
     375  }
     376  return *this;
     377}
     378#endif
     379
     380
    165381TEncAdaptiveLoopFilter::TEncAdaptiveLoopFilter()
    166382{
     383#if !LCU_SYNTAX_ALF
    167384  m_ppdAlfCorr = NULL;
     385  m_ppdAlfCorrCb = NULL;
     386  m_ppdAlfCorrCr = NULL;
    168387  m_pdDoubleAlfCoeff = NULL;
    169   m_pcPic = NULL;
     388#endif
    170389  m_pcEntropyCoder = NULL;
     390#if !LCU_SYNTAX_ALF
    171391  m_pcBestAlfParam = NULL;
    172392  m_pcTempAlfParam = NULL;
     393#endif
    173394  m_pcPicYuvBest = NULL;
    174395  m_pcPicYuvTmp = NULL;
    175 #if MTK_NONCROSS_INLOOP_FILTER
     396#if !LCU_SYNTAX_ALF
     397  pcAlfParamShape0 = NULL;
     398  pcAlfParamShape1 = NULL;
     399  pcPicYuvRecShape0 = NULL;
     400  pcPicYuvRecShape1 = NULL;
    176401  m_pcSliceYuvTmp = NULL;
    177402#endif
    178 #if MQT_BA_RA && MQT_ALF_NPASS
    179   m_aiFilterCoeffSaved = NULL;
    180 #endif
     403
     404  m_iALFMaxNumberFilters = NO_FILTERS;
     405
     406  m_bAlfCUCtrlEnabled = false;
    181407}
    182408
     
    185411// ====================================================================================================================
    186412
    187 #if MQT_BA_RA && MQT_ALF_NPASS
     413#if LCU_SYNTAX_ALF
     414/** convert Level Row Col to Idx
     415 * \param   level,  row,  col
     416 */
     417Int TEncAdaptiveLoopFilter::convertLevelRowCol2Idx(Int level, Int row, Int col)
     418{
     419  Int idx;
     420  if (level == 0)
     421  {
     422    idx = 0;
     423  }
     424  else if (level == 1)
     425  {
     426    idx = 1 + row*2 + col;
     427  }
     428  else if (level == 2)
     429  {
     430    idx = 5 + row*4 + col;
     431  }
     432  else if (level == 3)
     433  {
     434    idx = 21 + row*8 + col;
     435  }
     436  else // (level == 4)
     437  {
     438    idx = 85 + row*16 + col;
     439  }
     440  return idx;
     441}
     442
     443/** convert quadtree Idx to Level, Row, and Col
     444 * \param  idx,  *level,  *row,  *col
     445 */
     446Void TEncAdaptiveLoopFilter::convertIdx2LevelRowCol(Int idx, Int *level, Int *row, Int *col)
     447{
     448  if (idx == 0)
     449  {
     450    *level = 0;
     451    *row = 0;
     452    *col = 0;
     453  }
     454  else if (idx>=1 && idx<=4)
     455  {
     456    *level = 1;
     457    *row = (idx-1) / 2;
     458    *col = (idx-1) % 2;
     459  }
     460  else if (idx>=5 && idx<=20)
     461  {
     462    *level = 2;
     463    *row = (idx-5) / 4;
     464    *col = (idx-5) % 4;
     465  }
     466  else if (idx>=21 && idx<=84)
     467  {
     468    *level = 3;
     469    *row = (idx-21) / 8;
     470    *col = (idx-21) % 8;
     471  }
     472  else // (idx>=85 && idx<=340)
     473  {
     474    *level = 4;
     475    *row = (idx-85) / 16;
     476    *col = (idx-85) % 16;
     477  }
     478}
     479
     480/** Initial picture quad-tree
     481 * \param [in] isPicBasedEncode picture quad-tree encoding is enabled or disabled
     482 */
     483Void TEncAdaptiveLoopFilter::initPicQuadTreePartition(Bool isPicBasedEncode)
     484{
     485  if (!isPicBasedEncode)
     486  {
     487    return;
     488  }
     489 
     490  Int maxDepthInWidth   = (Int)(logf((float)(m_numLCUInPicWidth     ))/logf(2.0));
     491  Int maxDepthInHeight  = (Int)(logf((float)(m_numLCUInPicHeight    ))/logf(2.0));
     492  Int maxDepthInFilters = (Int)(logf((float)(m_iALFMaxNumberFilters ))/logf(2.0));
     493  m_alfPQTMaxDepth = (maxDepthInWidth  > maxDepthInHeight ) ? maxDepthInHeight  : maxDepthInWidth ;
     494  m_alfPQTMaxDepth = (m_alfPQTMaxDepth > maxDepthInFilters) ? maxDepthInFilters : m_alfPQTMaxDepth ;
     495
     496  for (Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     497  {
     498    m_alfPQTPart[compIdx] = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ];
     499    for (Int i = 0; i < m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++ )
     500    {
     501      m_alfPQTPart[compIdx][i].alfCorr = new AlfCorrData(compIdx);
     502      m_alfPQTPart[compIdx][i].alfUnitParam = new AlfUnitParam;
     503      m_alfPQTPart[compIdx][i].alfUnitParam->alfFiltParam = new ALFParam(compIdx);
     504    }
     505
     506  }
     507  creatPQTPart(0, 0, 0, -1, 0, m_numLCUInPicWidth-1, 0, m_numLCUInPicHeight-1);
     508}
     509
     510/** Reset picture quad-tree variables
     511 */
     512Void TEncAdaptiveLoopFilter::resetPQTPart()
     513{
     514  Int compIdx, i;
     515
     516  for (compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     517  {
     518    for (i = 0; i < m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++ )
     519    {
     520      m_alfPQTPart[compIdx][i].isProcessed  = false;
     521      m_alfPQTPart[compIdx][i].selfMinCost  = MAX_DOUBLE;
     522      m_alfPQTPart[compIdx][i].splitMinCost = MAX_DOUBLE;     
     523      //reset correlations
     524      m_alfPQTPart[compIdx][i].alfCorr->reset();
     525      //reset ALF unit param
     526      m_alfPQTPart[compIdx][i].alfUnitParam->mergeType = ALF_MERGE_DISABLED;
     527      m_alfPQTPart[compIdx][i].alfUnitParam->isEnabled = false;
     528      m_alfPQTPart[compIdx][i].alfUnitParam->alfFiltParam->alf_flag = 0;
     529    }
     530  }
     531}
     532
     533/** create picture quad-tree
     534 * \param [in] partLevel quad-tree level
     535 * \param [in] partRow row position at partLevel
     536 * \param [in] partCol column position at partLevel
     537 * \param [in] parentPartIdx parent partition index
     538 * \param [in] partCUXS starting LCU X position
     539 * \param [in] partCUXE ending LCU X position
     540 * \param [in] partCUYS starting LCU Y position
     541 * \param [in] partCUYE ending LCU Y position
     542 */
     543Void TEncAdaptiveLoopFilter::creatPQTPart(Int partLevel, Int partRow, Int partCol, Int parentPartIdx, Int partCUXS, Int partCUXE, Int partCUYS, Int partCUYE)
     544{
     545  Int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
     546
     547  AlfPicQTPart *alfOnePartY, *alfOnePartCb, *alfOnePartCr;
     548
     549  alfOnePartY  = &(m_alfPQTPart[ALF_Y ][partIdx]);
     550  alfOnePartCb = &(m_alfPQTPart[ALF_Cb][partIdx]);
     551  alfOnePartCr = &(m_alfPQTPart[ALF_Cr][partIdx]);
     552
     553  // Y, Cb, Cr
     554  alfOnePartY->partIdx   = alfOnePartCb->partIdx   = alfOnePartCr->partIdx   = partIdx;
     555  alfOnePartY->partCol   = alfOnePartCb->partCol   = alfOnePartCr->partCol   = partCol;
     556  alfOnePartY->partRow   = alfOnePartCb->partRow   = alfOnePartCr->partRow   = partRow;
     557  alfOnePartY->partLevel = alfOnePartCb->partLevel = alfOnePartCr->partLevel = partLevel;
     558
     559  alfOnePartY->partCUXS  = alfOnePartCb->partCUXS  = alfOnePartCr->partCUXS  = partCUXS; 
     560  alfOnePartY->partCUXE  = alfOnePartCb->partCUXE  = alfOnePartCr->partCUXE  = partCUXE;
     561  alfOnePartY->partCUYS  = alfOnePartCb->partCUYS  = alfOnePartCr->partCUYS  = partCUYS;
     562  alfOnePartY->partCUYE  = alfOnePartCb->partCUYE  = alfOnePartCr->partCUYE  = partCUYE;
     563
     564  alfOnePartY->parentPartIdx = alfOnePartCb->parentPartIdx = alfOnePartCr->parentPartIdx = parentPartIdx; 
     565  alfOnePartY->isSplit       = alfOnePartCb->isSplit       = alfOnePartCr->isSplit       = false;
     566
     567#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     568  alfOnePartY->numFilterBudget = alfOnePartCb->numFilterBudget = alfOnePartCr->numFilterBudget = m_iALFMaxNumberFilters/m_alfNumPartsLevelTab[partLevel];
     569#else
     570  alfOnePartY->numFilterBudget = alfOnePartCb->numFilterBudget = alfOnePartCr->numFilterBudget = NO_VAR_BINS;
     571#endif
     572
     573  alfOnePartY->componentID  = ALF_Y;
     574  alfOnePartCb->componentID = ALF_Cb;
     575  alfOnePartCr->componentID = ALF_Cr;
     576
     577  if (alfOnePartY->partLevel != m_alfPQTMaxDepth)
     578  {
     579    alfOnePartY->isBottomLevel = alfOnePartCb->isBottomLevel = alfOnePartCr->isBottomLevel = false;
     580
     581    Int downLevel    = partLevel + 1;
     582    Int downRowStart = partRow << 1;
     583    Int downColStart = partCol << 1;
     584
     585    Int downRowIdx, downColIdx;
     586    Int numCULeft, numCUTop;
     587    Int downStartCUX, downStartCUY, downEndCUX, downEndCUY;
     588
     589    numCULeft = (partCUXE - partCUXS + 1) >> 1 ;
     590    numCUTop  = (partCUYE - partCUYS + 1) >> 1 ;
     591
     592    // ChildPart00
     593    downStartCUX = partCUXS;
     594    downEndCUX   = downStartCUX + numCULeft - 1;
     595    downStartCUY = partCUYS;
     596    downEndCUY   = downStartCUY + numCUTop  - 1;
     597    downRowIdx   = downRowStart + 0;
     598    downColIdx   = downColStart + 0;
     599
     600    alfOnePartY->childPartIdx[0] = alfOnePartCb->childPartIdx[0] = alfOnePartCr->childPartIdx[0] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     601    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     602
     603    // ChildPart01
     604    downStartCUX = partCUXS + numCULeft;
     605    downEndCUX   = partCUXE;
     606    downStartCUY = partCUYS;
     607    downEndCUY   = downStartCUY + numCUTop  - 1;
     608    downRowIdx   = downRowStart + 0;
     609    downColIdx   = downColStart + 1;
     610
     611    alfOnePartY->childPartIdx[1] = alfOnePartCb->childPartIdx[1] = alfOnePartCr->childPartIdx[1] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     612    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     613
     614    // ChildPart10
     615    downStartCUX = partCUXS;
     616    downEndCUX   = downStartCUX + numCULeft - 1;
     617    downStartCUY = partCUYS + numCUTop;
     618    downEndCUY   = partCUYE;
     619    downRowIdx   = downRowStart + 1;
     620    downColIdx   = downColStart + 0;
     621
     622    alfOnePartY->childPartIdx[2] = alfOnePartCb->childPartIdx[2] = alfOnePartCr->childPartIdx[2] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     623    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     624
     625    // ChildPart11
     626    downStartCUX = partCUXS + numCULeft;
     627    downEndCUX   = partCUXE;
     628    downStartCUY = partCUYS + numCUTop;
     629    downEndCUY   = partCUYE;
     630    downRowIdx   = downRowStart + 1;
     631    downColIdx   = downColStart + 1;
     632
     633    alfOnePartY->childPartIdx[3] = alfOnePartCb->childPartIdx[3] = alfOnePartCr->childPartIdx[3] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     634    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     635  }
     636  else
     637  {
     638    alfOnePartY->isBottomLevel = alfOnePartCb->isBottomLevel = alfOnePartCr->isBottomLevel = true;
     639
     640    alfOnePartY->childPartIdx[0] = alfOnePartCb->childPartIdx[0] = alfOnePartCr->childPartIdx[0] = -1;
     641    alfOnePartY->childPartIdx[1] = alfOnePartCb->childPartIdx[1] = alfOnePartCr->childPartIdx[1] = -1;
     642    alfOnePartY->childPartIdx[2] = alfOnePartCb->childPartIdx[2] = alfOnePartCr->childPartIdx[2] = -1;
     643    alfOnePartY->childPartIdx[3] = alfOnePartCb->childPartIdx[3] = alfOnePartCr->childPartIdx[3] = -1;
     644  }
     645}
     646
     647/** create global buffers for ALF encoding
     648 */
     649Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers()
     650{
     651  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     652  {
     653    m_alfPicFiltUnits[compIdx] = new AlfUnitParam[m_uiNumCUsInFrame];
     654    m_alfCorr[compIdx] = new AlfCorrData*[m_uiNumCUsInFrame];
     655    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     656    {
     657      m_alfCorr[compIdx][n]= new AlfCorrData(compIdx);
     658      m_alfCorr[compIdx][n]->reset();
     659    }
     660
     661    m_alfCorrMerged[compIdx] = new AlfCorrData(compIdx);
     662
     663  }
     664
     665
     666  const Int numCoef = (Int)ALF_MAX_NUM_COEF;
     667
     668  for(Int i=0; i< (Int)NO_VAR_BINS; i++)
     669  {
     670    m_coeffNoFilter[i] = new Int[numCoef];
     671  }
     672
     673  m_numSlicesDataInOneLCU = new Int[m_uiNumCUsInFrame];
     674
     675}
     676
     677/** destroy ALF global buffers
     678 * This function is used to destroy the global ALF encoder buffers
     679 */
     680Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers()
     681{
     682  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     683  {
     684    delete[] m_alfPicFiltUnits[compIdx];
     685    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     686    {
     687      delete m_alfCorr[compIdx][n];
     688    }
     689
     690    delete[] m_alfCorr[compIdx];
     691    m_alfCorr[compIdx] = NULL;
     692
     693    delete m_alfCorrMerged[compIdx];
     694  }
     695
     696  //const Int numCoef = (Int)ALF_MAX_NUM_COEF;
     697
     698  for(Int i=0; i< (Int)NO_VAR_BINS; i++)
     699  {
     700    delete[] m_coeffNoFilter[i];
     701  }
     702
     703  delete[] m_numSlicesDataInOneLCU;
     704
     705}
     706
     707/** initialize ALF encoder at picture level
     708 * \param [in] isAlfParamInSlice ALF parameters are coded in slice (true) or APS (false)
     709 * \param [in] isPicBasedEncode picture-based encoding (true) or LCU-based encoding (false)
     710 * \param [in] numSlices number of slices in current picture
     711 * \param [in, out] alfParams ALF parameter set
     712 * \param [in, out] alfCUCtrlParam ALF CU-on/off control parameters
     713 */
     714Void TEncAdaptiveLoopFilter::initALFEnc(Bool isAlfParamInSlice, Bool isPicBasedEncode, Int numSlices, AlfParamSet* & alfParams, std::vector<AlfCUCtrlInfo>* & alfCUCtrlParam)
     715{
     716  m_picBasedALFEncode = isPicBasedEncode;
     717
     718  if(isAlfParamInSlice)
     719  {
     720    alfParams = new AlfParamSet[m_uiNumSlicesInPic];
     721    Int numLCUs = m_uiNumCUsInFrame;
     722
     723    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     724    {
     725      numLCUs = (Int)(m_pcPic->getOneSliceCUDataForNDBFilter(s).size());
     726      alfParams[s].create(m_numLCUInPicWidth,m_numLCUInPicHeight, numLCUs );
     727      alfParams[s].createALFParam();
     728    }
     729    alfCUCtrlParam = NULL;
     730  }
     731  else //ALF parameter in APS
     732  {
     733    alfParams = NULL; //ALF parameters are handled by APS
     734    alfCUCtrlParam = new std::vector<AlfCUCtrlInfo>;
     735    alfCUCtrlParam->resize(numSlices);
     736  }
     737
     738  resetPicAlfUnit();
     739
     740  if(m_picBasedALFEncode)
     741  {
     742    resetPQTPart(); 
     743  }
     744
     745  const Int numCoef = (Int)ALF_MAX_NUM_COEF;
     746#if LCUALF_QP_DEPENDENT_BITS
     747  Int numBitShift = getAlfPrecisionBit( m_alfQP );
     748#else
     749  Int numBitShift = (Int)ALF_NUM_BIT_SHIFT;
     750#endif
     751  for(Int i=0; i< (Int)NO_VAR_BINS; i++)
     752  {
     753    ::memset(&(m_coeffNoFilter[i][0]), 0, sizeof(Int)*numCoef);
     754    m_coeffNoFilter[i][numCoef-1] = (1 << numBitShift);
     755  }
     756
     757}
     758
     759/** Uninitialize ALF encoder at picture level
     760 * \param [in, out] alfParams ALF parameter set
     761 * \param [in, out] alfCUCtrlParam ALF CU-on/off control parameters
     762 */
     763Void TEncAdaptiveLoopFilter::uninitALFEnc(AlfParamSet* & alfParams, std::vector<AlfCUCtrlInfo>* & alfCUCtrlParam)
     764{
     765  if(alfParams != NULL)
     766  {
     767    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     768    {
     769      alfParams[s].releaseALFParam();
     770    }
     771    delete[] alfParams;
     772    alfParams = NULL;
     773  }
     774
     775  if(alfCUCtrlParam != NULL)
     776  {
     777    delete alfCUCtrlParam;
     778    alfCUCtrlParam = NULL;
     779  }
     780}
     781
     782/** reset ALF unit parameters in current picture
     783 */
     784Void TEncAdaptiveLoopFilter::resetPicAlfUnit()
     785{
     786  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     787  {
     788    for(Int i=0; i< m_uiNumCUsInFrame; i++)
     789    {
     790      AlfUnitParam& alfUnit = m_alfPicFiltUnits[compIdx][i];
     791      alfUnit.mergeType = ALF_MERGE_DISABLED;
     792      alfUnit.isEnabled = false;
     793      alfUnit.isNewFilt = true;
     794      alfUnit.alfFiltParam = m_alfFiltInfo[compIdx][i];
     795
     796      alfUnit.alfFiltParam->alf_flag = 0;
     797    }
     798  }
     799}
     800
     801#else
     802
     803/** create ALF global buffers
     804 * \param iALFEncodePassReduction 0: 16-pass encoding, 1: 1-pass encoding, 2: 2-pass encoding
     805 * This function is used to create the filter buffers to perform time-delay filtering.
     806 */
    188807Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers(Int iALFEncodePassReduction)
    189808{
    190809  if(iALFEncodePassReduction)
    191810  {
     811    Int iNumOfBuffer = m_iGOPSize +1;
     812
    192813    for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    193814    {
    194       m_aiFilterCoeffSavedMethods[i] = new Int**[9];
    195       for(Int j=0; j< 9; j++)
    196       {
     815      m_mergeTableSavedMethods[i] = new Int*[iNumOfBuffer];
     816      m_aiFilterCoeffSavedMethods[i] = new Int**[iNumOfBuffer];
     817      for(Int j=0; j< iNumOfBuffer; j++)
     818      {
     819        m_mergeTableSavedMethods[i][j] = new Int[NO_VAR_BINS];
    197820        m_aiFilterCoeffSavedMethods[i][j] = new Int*[NO_VAR_BINS];
    198821        for(Int k=0; k< NO_VAR_BINS; k++)
    199822        {
    200           m_aiFilterCoeffSavedMethods[i][j][k] = new Int[MAX_SQR_FILT_LENGTH];
     823          m_aiFilterCoeffSavedMethods[i][j][k] = new Int[ALF_MAX_NUM_COEF];
    201824        }
    202825      }
    203     }
    204 
    205   }
    206 }
     826      m_iPreviousFilterShapeMethods[i] = new Int[iNumOfBuffer];
     827    }
     828
     829  }
     830}
     831/** destroy ALF global buffers
     832 * This function is used to destroy the filter buffers.
     833 */
    207834
    208835Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers()
     
    212839    for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    213840    {
    214       for(Int j=0; j< 9; j++)
     841      for(Int j=0; j< m_iGOPSize+1; j++)
    215842      {
    216843        for(Int k=0; k< NO_VAR_BINS; k++)
     
    219846        }
    220847        delete[] m_aiFilterCoeffSavedMethods[i][j];
     848        delete[] m_mergeTableSavedMethods[i][j];
    221849      }
    222850      delete[] m_aiFilterCoeffSavedMethods[i];
    223     }
    224 
    225   }
    226 
    227 }
    228 #endif
    229 
     851      delete[] m_iPreviousFilterShapeMethods[i];
     852      delete[] m_mergeTableSavedMethods[i];
     853
     854    }
     855
     856  }
     857
     858}
     859#endif
    230860/**
    231861 \param pcPic           picture (TComPic) pointer
     
    234864Void TEncAdaptiveLoopFilter::startALFEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder )
    235865{
    236   m_pcPic = pcPic;
    237866  m_pcEntropyCoder = pcEntropyCoder;
    238  
    239   m_eSliceType = pcPic->getSlice(0)->getSliceType();
    240   m_iPicNalReferenceIdc = (pcPic->getSlice(0)->isReferenced() ? 1 :0);
    241  
    242   m_uiNumSCUInCU = m_pcPic->getNumPartInCU();
    243  
     867#if !LCU_SYNTAX_ALF
    244868  xInitParam();
     869#endif
    245870  xCreateTmpAlfCtrlFlags();
    246871 
     
    251876  m_pcPicYuvTmp->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
    252877  m_pcPicYuvBest = pcPic->getPicYuvPred();
    253  
     878#if !LCU_SYNTAX_ALF 
    254879  m_pcBestAlfParam = new ALFParam;
    255880  m_pcTempAlfParam = new ALFParam;
    256881  allocALFParam(m_pcBestAlfParam);
    257882  allocALFParam(m_pcTempAlfParam);
    258   m_im_width = iWidth;
    259   m_im_height = iHeight;
    260  
     883  pcPicYuvRecShape0 = new TComPicYuv();
     884  pcPicYuvRecShape0->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
     885 
     886  pcPicYuvRecShape1 = new TComPicYuv();
     887  pcPicYuvRecShape1->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
     888
     889  pcAlfParamShape0 = new ALFParam;
     890  pcAlfParamShape1 = new ALFParam;
     891
     892  allocALFParam(pcAlfParamShape0); 
     893  allocALFParam(pcAlfParamShape1);
     894
    261895  // init qc_filter
    262   initMatrix4D_double(&m_EGlobalSym, NO_TEST_FILT,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
    263   initMatrix3D_double(&m_yGlobalSym, NO_TEST_FILT, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    264   initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    265  
     896  initMatrix4D_double(&m_EGlobalSym, NUM_ALF_FILTER_SHAPE+1,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
     897  initMatrix3D_double(&m_yGlobalSym, NUM_ALF_FILTER_SHAPE+1, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
     898#endif
     899  initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, ALF_MAX_NUM_COEF);
     900#if !LCU_SYNTAX_ALF
    266901  m_pixAcc = (double *) calloc(NO_VAR_BINS, sizeof(double));
    267 #if !MQT_BA_RA
    268   get_mem2Dpel(&m_varImg, m_im_height, m_im_width);
    269 #endif
    270   get_mem2Dpel(&m_maskImg, m_im_height, m_im_width);
    271  
     902#endif
     903  initMatrix_Pel(&m_maskImg, m_img_height, m_img_width);
    272904  initMatrix_double(&m_E_temp, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);//
    273905  m_y_temp = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));//
     
    275907  initMatrix_double(&m_y_merged, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); //
    276908  m_pixAcc_merged = (double *) calloc(NO_VAR_BINS, sizeof(double));//
    277  
    278   m_filterCoeffQuantMod = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));//
    279   m_filterCoeff = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));//
    280   m_filterCoeffQuant = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));//
    281   initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);//
    282   initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);//
    283  
     909  m_filterCoeffQuantMod = (int *) calloc(ALF_MAX_NUM_COEF, sizeof(int));//
     910  m_filterCoeff = (double *) calloc(ALF_MAX_NUM_COEF, sizeof(double));//
     911  m_filterCoeffQuant = (int *) calloc(ALF_MAX_NUM_COEF, sizeof(int));//
     912  initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, ALF_MAX_NUM_COEF);//
     913  initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, ALF_MAX_NUM_COEF);//
     914
     915#if LCU_SYNTAX_ALF
     916  m_tempALFp = new ALFParam(ALF_Y);
     917#else
    284918  m_tempALFp = new ALFParam;
    285919  allocALFParam(m_tempALFp);
    286   m_pcDummyEntropyCoder = m_pcEntropyCoder;
    287 
    288 #if MTK_NONCROSS_INLOOP_FILTER
    289920  if( m_bUseNonCrossALF )
    290921  {
     
    293924  }
    294925#endif
    295 
    296 
    297926}
    298927
    299928Void TEncAdaptiveLoopFilter::endALFEnc()
    300929{
     930#if !LCU_SYNTAX_ALF
    301931  xUninitParam();
     932#endif
    302933  xDestroyTmpAlfCtrlFlags();
    303934 
     
    307938  m_pcPic = NULL;
    308939  m_pcEntropyCoder = NULL;
    309  
     940#if !LCU_SYNTAX_ALF
    310941  freeALFParam(m_pcBestAlfParam);
    311942  freeALFParam(m_pcTempAlfParam);
    312943  delete m_pcBestAlfParam;
    313944  delete m_pcTempAlfParam;
     945
     946  pcPicYuvRecShape0->destroyLuma();
     947  delete pcPicYuvRecShape0;
     948  pcPicYuvRecShape0 = NULL;
     949
     950  pcPicYuvRecShape1->destroyLuma();
     951  delete pcPicYuvRecShape1;
     952  pcPicYuvRecShape1 = NULL;
     953
     954  freeALFParam(pcAlfParamShape0);
     955  freeALFParam(pcAlfParamShape1);
     956
     957  delete pcAlfParamShape0;
     958  delete pcAlfParamShape1;
     959
    314960  // delete qc filters
    315   destroyMatrix4D_double(m_EGlobalSym, NO_TEST_FILT,  NO_VAR_BINS);
    316   destroyMatrix3D_double(m_yGlobalSym, NO_TEST_FILT);
     961  destroyMatrix4D_double(m_EGlobalSym, NUM_ALF_FILTER_SHAPE+1,  NO_VAR_BINS);
     962  destroyMatrix3D_double(m_yGlobalSym, NUM_ALF_FILTER_SHAPE+1);
     963#endif
    317964  destroyMatrix_int(m_filterCoeffSymQuant);
    318  
     965#if !LCU_SYNTAX_ALF 
    319966  free(m_pixAcc);
    320 #if !MQT_BA_RA
    321   free_mem2Dpel(m_varImg);
    322 #endif
    323   free_mem2Dpel(m_maskImg);
    324  
     967#endif
     968  destroyMatrix_Pel(m_maskImg);
    325969  destroyMatrix3D_double(m_E_merged, NO_VAR_BINS);
    326970  destroyMatrix_double(m_y_merged);
     
    336980  destroyMatrix_int(m_FilterCoeffQuantTemp);
    337981 
     982#if LCU_SYNTAX_ALF
     983  delete m_tempALFp;
     984#else
    338985  freeALFParam(m_tempALFp);
    339986  delete m_tempALFp;
    340 
    341 #if MTK_NONCROSS_INLOOP_FILTER
    342987
    343988  if(m_bUseNonCrossALF)
     
    348993  }
    349994#endif
    350 
    351 }
    352 
     995}
     996
     997#if LCU_SYNTAX_ALF
     998
     999/** Assign output ALF parameters
     1000 * \param [in, out] alfParamSet ALF parameter set
     1001 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     1002 */
     1003Void TEncAdaptiveLoopFilter::assignALFEncoderParam(AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     1004{
     1005  //assign CU control parameters
     1006  if(m_bAlfCUCtrlEnabled)
     1007  {
     1008    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1009    {
     1010      (*alfCtrlParam)[s]= m_vBestAlfCUCtrlParam[s];
     1011    }
     1012  }
     1013
     1014  //assign RDO results to alfParamSet
     1015  if(m_alfCoefInSlice)
     1016  {
     1017    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1018    {
     1019      if(!m_pcPic->getValidSlice(s))
     1020      {
     1021        continue;
     1022      }
     1023
     1024      if( m_bestAlfParamSet[s].isEnabled[ALF_Y] || m_bestAlfParamSet[s].isEnabled[ALF_Cb] || m_bestAlfParamSet[s].isEnabled[ALF_Cr])
     1025      {
     1026        m_bestAlfParamSet[s].isEnabled[ALF_Y] = true;
     1027      }
     1028
     1029      copyAlfParamSet(&(alfParamSet[s]), &(m_bestAlfParamSet[s]));
     1030    }
     1031  }
     1032  else
     1033  {
     1034    if( m_bestAlfParamSet->isEnabled[ALF_Y] || m_bestAlfParamSet->isEnabled[ALF_Cb] || m_bestAlfParamSet->isEnabled[ALF_Cr])
     1035    {
     1036      m_bestAlfParamSet->isEnabled[ALF_Y] = true;
     1037    }
     1038
     1039    copyAlfParamSet(alfParamSet, m_bestAlfParamSet);
     1040  }
     1041
     1042  if(m_alfCoefInSlice)
     1043  {
     1044    delete[] m_bestAlfParamSet;
     1045  }
     1046  else
     1047  {
     1048    delete m_bestAlfParamSet;
     1049  }
     1050}
     1051
     1052/** initialize ALF encoder configurations
     1053 * \param [in, out] alfParamSet ALF parameter set
     1054 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     1055 */
     1056Void TEncAdaptiveLoopFilter::initALFEncoderParam(AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     1057{
     1058  //reset BA index map
     1059  memset(&m_varImg[0][0], 0, sizeof(Pel)*(m_img_height*m_img_width));
     1060
     1061  //reset mask
     1062  for(Int y=0; y< m_img_height; y++)
     1063  {
     1064    for(Int x=0; x< m_img_width; x++)
     1065    {
     1066      m_maskImg[y][x] = 1;
     1067    }
     1068  }
     1069  //get last valid slice index
     1070  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1071  {
     1072    if(m_pcPic->getValidSlice(s))
     1073    {
     1074      m_lastSliceIdx = s;
     1075    }
     1076  }
     1077  //reset alf CU control flags
     1078  m_bAlfCUCtrlEnabled = (alfCtrlParam != NULL)?true:false;
     1079  if(m_bAlfCUCtrlEnabled)
     1080  {
     1081    m_vBestAlfCUCtrlParam.resize(m_uiNumSlicesInPic);
     1082    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1083    {
     1084      m_vBestAlfCUCtrlParam[s].reset();
     1085    }
     1086  }
     1087  else
     1088  {
     1089    m_vBestAlfCUCtrlParam.clear();
     1090  }
     1091  //get number slices in each LCU
     1092  if(m_uiNumSlicesInPic == 1 || m_iSGDepth == 0)
     1093  {
     1094    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1095    {
     1096      m_numSlicesDataInOneLCU[n] = 1;
     1097    }
     1098  }
     1099  else
     1100  {
     1101    Int count;
     1102    Int prevSliceID = -1;
     1103
     1104    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1105    {
     1106      std::vector<NDBFBlockInfo>& vNDBFBlock = *(m_pcPic->getCU(n)->getNDBFilterBlocks());
     1107
     1108      count = 0;
     1109
     1110      for(Int i=0; i< (Int)vNDBFBlock.size(); i++)
     1111      {
     1112        if(vNDBFBlock[i].sliceID != prevSliceID)
     1113        {
     1114          prevSliceID = vNDBFBlock[i].sliceID;
     1115          count++;
     1116        }
     1117      }
     1118
     1119      m_numSlicesDataInOneLCU[n] = count;
     1120    }
     1121  }
     1122  //set redesign number
     1123  if(m_iALFEncodePassReduction)
     1124  {
     1125    m_iALFNumOfRedesign = 0;
     1126  }
     1127  else
     1128  {
     1129    m_iALFNumOfRedesign = ALF_NUM_OF_REDESIGN;
     1130  }
     1131
     1132  //initialize m_bestAlfParamSet
     1133  if(m_alfCoefInSlice)
     1134  {
     1135    m_bestAlfParamSet = new AlfParamSet[m_uiNumSlicesInPic];
     1136    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1137    {
     1138      m_bestAlfParamSet[s].create( alfParamSet[s].numLCUInWidth, alfParamSet[s].numLCUInHeight, alfParamSet[s].numLCU);
     1139    }
     1140  }
     1141  else
     1142  {
     1143    m_bestAlfParamSet = new AlfParamSet;
     1144    m_bestAlfParamSet->create( alfParamSet->numLCUInWidth, alfParamSet->numLCUInHeight, alfParamSet->numLCU);
     1145  }
     1146
     1147}
     1148
     1149/** copy ALF parameter set
     1150 * \param [out] dst destination ALF parameter set
     1151 * \param [in] src source ALF parameter set
     1152 */
     1153Void TEncAdaptiveLoopFilter::copyAlfParamSet(AlfParamSet* dst, AlfParamSet* src)
     1154{
     1155  dst->numLCU = src->numLCU;
     1156  dst->numLCUInWidth = src->numLCUInWidth;
     1157  dst->numLCUInHeight = src->numLCUInHeight;
     1158
     1159  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     1160  {
     1161    dst->isEnabled[compIdx] = src->isEnabled[compIdx];
     1162    dst->isUniParam[compIdx] = src->isUniParam[compIdx];
     1163
     1164    for(Int n=0; n< src->numLCU; n++)
     1165    {
     1166      dst->alfUnitParam[compIdx][n].isEnabled = src->alfUnitParam[compIdx][n].isEnabled;
     1167      dst->alfUnitParam[compIdx][n].isNewFilt = src->alfUnitParam[compIdx][n].isNewFilt;
     1168      dst->alfUnitParam[compIdx][n].mergeType = src->alfUnitParam[compIdx][n].mergeType;
     1169      dst->alfUnitParam[compIdx][n].storedFiltIdx = src->alfUnitParam[compIdx][n].storedFiltIdx;
     1170      *(dst->alfUnitParam[compIdx][n].alfFiltParam) = *(src->alfUnitParam[compIdx][n].alfFiltParam);
     1171    }
     1172  }
     1173}
     1174
     1175
     1176/** ALF encoding process top function
     1177 * \param [in, out] alfParamSet ALF parameter set
     1178 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     1179 * \param [in] dLambdaLuma lambda value for luma RDO
     1180 * \param [in] dLambdaChroma lambda value for chroma RDO
     1181 */
     1182#if ALF_CHROMA_LAMBDA
     1183#if HHI_INTERVIEW_SKIP
     1184Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambdaLuma, Double lambdaChroma, Bool bInterviewSkip)
     1185#else
     1186Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambdaLuma, Double lambdaChroma)
     1187#endif
     1188#else
     1189#if HHI_INTERVIEW_SKIP
     1190#else
     1191Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambda)
     1192#endif
     1193#endif
     1194{
     1195#if ALF_CHROMA_LAMBDA
     1196  m_dLambdaLuma   = lambdaLuma;
     1197  m_dLambdaChroma = lambdaChroma;
     1198#else
     1199  m_dLambdaLuma   = lambda;
     1200  m_dLambdaChroma = lambda;
     1201#endif
     1202  TComPicYuv* yuvOrg    = m_pcPic->getPicYuvOrg();
     1203  TComPicYuv* yuvRec    = m_pcPic->getPicYuvRec();
     1204  TComPicYuv* yuvExtRec = m_pcTempPicYuv;
     1205#if HHI_INTERVIEW_SKIP
     1206  TComPicYuv* pUsedPelMap = NULL;
     1207  if( bInterviewSkip )
     1208  {
     1209    pUsedPelMap = m_pcPic->getUsedPelsMap();
     1210  }
     1211#endif
     1212
     1213  //picture boundary padding
     1214  yuvRec->copyToPic(yuvExtRec);
     1215  yuvExtRec->setBorderExtension( false );
     1216  yuvExtRec->extendPicBorder   ();
     1217
     1218  //initialize encoder parameters
     1219  initALFEncoderParam(alfParamSet, alfCtrlParam);
     1220
     1221  //get LCU statistics
     1222  getStatistics(yuvOrg, yuvExtRec);
     1223
     1224  //decide ALF parameters
     1225#if HHI_INTERVIEW_SKIP
     1226  decideParameters(yuvOrg, yuvExtRec, yuvRec, pUsedPelMap, m_bestAlfParamSet, alfCtrlParam);
     1227#else
     1228  decideParameters(yuvOrg, yuvExtRec, yuvRec, m_bestAlfParamSet, alfCtrlParam);
     1229#endif
     1230
     1231  //assign best parameters
     1232  assignALFEncoderParam(alfParamSet, alfCtrlParam);
     1233}
     1234
     1235/** Check if the current LCU can be merged with neighboring LCU
     1236 * \param [in] compIdx luma/chroma component index
     1237 * \param [out] alfUnitPic ALF unit parameters for all LCUs in picture
     1238 */
     1239Void TEncAdaptiveLoopFilter::checkMerge(Int compIdx, AlfUnitParam* alfUnitPic)
     1240{
     1241  AlfUnitParam *alfUnitLeft, *alfUnitUp;
     1242
     1243  for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1244  {
     1245    Int lcuPosX = (Int)(n % m_numLCUInPicWidth);
     1246    Int lcuPosY = (Int)(n / m_numLCUInPicWidth);
     1247
     1248    AlfUnitParam& alfUnitCur = alfUnitPic[n];
     1249
     1250    //check merge left
     1251    if( lcuPosX != 0)
     1252    {
     1253      alfUnitLeft = &(alfUnitPic[n - 1]);
     1254      if(alfUnitCur == *alfUnitLeft)
     1255      {
     1256        alfUnitCur.mergeType = ALF_MERGE_LEFT;
     1257        alfUnitCur.isEnabled = alfUnitLeft->isEnabled;
     1258        alfUnitCur.isNewFilt = alfUnitLeft->isNewFilt;
     1259        alfUnitCur.storedFiltIdx = alfUnitLeft->storedFiltIdx;
     1260        *(alfUnitCur.alfFiltParam) = *(alfUnitLeft->alfFiltParam);
     1261        continue;
     1262      }
     1263    }
     1264
     1265    //check merge up
     1266    if(lcuPosY !=0 )
     1267    {
     1268      alfUnitUp = &(alfUnitPic[n - m_numLCUInPicWidth]);
     1269      if(alfUnitCur == *alfUnitUp)
     1270      {
     1271        alfUnitCur.mergeType = ALF_MERGE_UP;
     1272        alfUnitCur.isEnabled = alfUnitUp->isEnabled;
     1273        alfUnitCur.isNewFilt = alfUnitUp->isNewFilt;
     1274        alfUnitCur.storedFiltIdx = alfUnitUp->storedFiltIdx;
     1275        *(alfUnitCur.alfFiltParam) = *(alfUnitUp->alfFiltParam);
     1276        continue;
     1277      }
     1278    }
     1279  }
     1280
     1281}
     1282
     1283/** Transfer ALF unit parameters for LCUs to to-be-coded ALF parameter set
     1284 * \param [in] compIdx luma/chroma component index
     1285 * \param [in] alfUnitPic ALF unit parameters for all LCUs in picture
     1286 * \param [out] alfParamSet to-be-coded ALF parameter set
     1287 */
     1288Void TEncAdaptiveLoopFilter::transferToAlfParamSet(Int compIdx, AlfUnitParam* alfUnitPic, AlfParamSet* & alfParamSet)
     1289{
     1290
     1291  Int countFiltOffLCU = 0, countNewFilts = 0;
     1292
     1293  AlfUnitParam* alfUnitParams = alfParamSet->alfUnitParam[compIdx];
     1294  for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1295  {
     1296    alfUnitParams[n] = alfUnitPic[n];
     1297
     1298
     1299    if(alfUnitParams[n].alfFiltParam->alf_flag == 0)
     1300    {
     1301      countFiltOffLCU++;
     1302    }
     1303    else
     1304    {
     1305      Bool isNewFiltInSlice =   (alfUnitParams[n].mergeType == ALF_MERGE_DISABLED && alfUnitParams[n].isEnabled && alfUnitParams[n].isNewFilt);
     1306      if( isNewFiltInSlice )
     1307      {
     1308        countNewFilts++;
     1309      }
     1310    }
     1311  }
     1312
     1313  //slice-level parameters
     1314  AlfUnitParam* firstAlfUnitInSlice = &(alfUnitParams[0]);
     1315  if( countFiltOffLCU == m_uiNumCUsInFrame ) //number of filter-off LCU is equal to the number of LCUs in slice
     1316  {
     1317    alfParamSet->isEnabled [compIdx] = false;   
     1318    alfParamSet->isUniParam[compIdx] = true; //uni-param, all off
     1319    assert(firstAlfUnitInSlice->alfFiltParam->alf_flag == 0);
     1320  }
     1321  else
     1322  {
     1323    alfParamSet->isEnabled[compIdx] = true;
     1324    if( countNewFilts == 1 && firstAlfUnitInSlice->alfFiltParam->alf_flag != 0 && countFiltOffLCU == 0)
     1325    {
     1326      alfParamSet->isUniParam[compIdx] = true;
     1327    }
     1328    else
     1329    {
     1330      alfParamSet->isUniParam[compIdx] = false;
     1331    }
     1332  }
     1333
     1334}
     1335
     1336/** Disable all ALF unit parameters in current component
     1337 * \param [in] compIdx luma/chroma component index
     1338 * \param [out] alfParamSet to-be-coded ALF parameter set
     1339 * \param [in] alfUnitPic ALF unit parameters for all LCUs in picture
     1340 */
     1341Void TEncAdaptiveLoopFilter::disableComponentAlfParam(Int compIdx, AlfParamSet* alfParamSet, AlfUnitParam* alfUnitPic)
     1342{
     1343  alfParamSet->isEnabled [compIdx] = false;
     1344  alfParamSet->isUniParam[compIdx] = true; //all off
     1345
     1346  for(Int lcuPos = 0; lcuPos < m_uiNumCUsInFrame; lcuPos++)
     1347  {
     1348    AlfUnitParam& alfunitParam = alfUnitPic[lcuPos];
     1349
     1350    alfunitParam.mergeType = ALF_MERGE_DISABLED;
     1351    alfunitParam.isEnabled = false;
     1352    alfunitParam.isNewFilt = false;
     1353    alfunitParam.storedFiltIdx = -1;
     1354    alfunitParam.alfFiltParam->alf_flag = 0;
     1355  }
     1356
     1357  //check merge-up and merge-left
     1358  checkMerge(compIdx, alfUnitPic);
     1359
     1360  //transfer to AlfParamSet
     1361  transferToAlfParamSet(compIdx, alfUnitPic, alfParamSet);
     1362
     1363}
     1364
     1365/** Picture-based encoding
     1366 * \param [out] alfParamSet to-be-coded ALF parameter set
     1367 * \param [in, out] alfPicQTPart picture quad-tree partition
     1368 * \param [in] compIdx luma/chroma component index
     1369 * \param [in] pOrg picture buffer for original picture
     1370 * \param [in] pDec picture buffer for un-filtered picture
     1371 * \param [out] pRest picture buffer for filtered picture
     1372 * \param [in] stride stride size for 1-D picture memory
     1373 * \param [in, out] alfCorrLCUs correlation values for LCUs
     1374 */
     1375#if HHI_INTERVIEW_SKIP
     1376Void TEncAdaptiveLoopFilter::executePicBasedModeDecision(AlfParamSet* alfParamSet
     1377                                                        , AlfPicQTPart* alfPicQTPart
     1378                                                        , Int compIdx
     1379                                                        , Pel* pOrg, Pel* pDec, Pel* pRest, Pel* pUsed, Int stride, Int formatShift
     1380                                                        , AlfCorrData** alfCorrLCUs
     1381                                                        )
     1382#else
     1383Void TEncAdaptiveLoopFilter::executePicBasedModeDecision(AlfParamSet* alfParamSet
     1384                                                        , AlfPicQTPart* alfPicQTPart
     1385                                                        , Int compIdx
     1386                                                        , Pel* pOrg, Pel* pDec, Pel* pRest, Int stride, Int formatShift
     1387                                                        , AlfCorrData** alfCorrLCUs
     1388                                                        )
     1389#endif
     1390{
     1391  if(compIdx != ALF_Y)
     1392  {
     1393    if(!alfParamSet->isEnabled[ALF_Y])
     1394    {
     1395      disableComponentAlfParam(compIdx, alfParamSet, m_alfPicFiltUnits[compIdx]);
     1396      return;
     1397    }
     1398  }
     1399
     1400  Int picWidth = (m_img_width >> formatShift);
     1401  Int picHeight= (m_img_height >> formatShift);
     1402
     1403  Int64  minDist = 0;
     1404  Int64  minRate = 0;
     1405  Double minCost = 0;
     1406
     1407  decideQTPartition(alfPicQTPart, alfCorrLCUs, 0, 0, minCost, minDist, minRate);
     1408
     1409  //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1410  patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[compIdx]);
     1411
     1412  //check merge-up and merge-left
     1413  checkMerge(compIdx, m_alfPicFiltUnits[compIdx]);
     1414
     1415  //transfer to AlfParamSet
     1416  transferToAlfParamSet(compIdx, m_alfPicFiltUnits[compIdx], alfParamSet);
     1417
     1418  //reconstruction
     1419  recALF(compIdx, m_alfFiltInfo[compIdx], pDec, pRest, stride, formatShift, NULL, false);
     1420
     1421  Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma);
     1422
     1423
     1424  std::vector<AlfCUCtrlInfo> alfCUCtrlParamTemp(m_vBestAlfCUCtrlParam);
     1425  minRate = calculateAlfParamSetRateRDO(compIdx, alfParamSet, &alfCUCtrlParamTemp);
     1426#if HHI_INTERVIEW_SKIP
     1427  minDist = xCalcSSD(pOrg, pRest, pUsed, picWidth, picHeight, stride);
     1428#else
     1429  minDist = xCalcSSD(pOrg, pRest, picWidth, picHeight, stride);
     1430#endif
     1431  minCost = (Double)minDist + lambda*((Double)minRate);
     1432
     1433  //block on/off control
     1434  if(compIdx == ALF_Y && m_bAlfCUCtrlEnabled)
     1435  {
     1436#if HHI_INTERVIEW_SKIP
     1437    decideBlockControl(pOrg, pDec, pRest, pUsed, stride, alfPicQTPart, alfParamSet, minRate, minDist, minCost);
     1438#else
     1439    decideBlockControl(pOrg, pDec, pRest, stride, alfPicQTPart, alfParamSet, minRate, minDist, minCost); 
     1440#endif
     1441  }
     1442
     1443  //get filter-off distortion, rate, cost
     1444  AlfParamSet alfParamSetOff;
     1445  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1446  {
     1447    alfCUCtrlParamTemp[s].reset();
     1448  }
     1449  alfParamSetOff.isEnabled[compIdx] = false;
     1450  alfParamSetOff.isUniParam[compIdx] = true;
     1451#if HHI_INTERVIEW_SKIP
     1452  Int64  offDist = xCalcSSD(pOrg, pDec, pUsed, picWidth, picHeight, stride);
     1453#else
     1454  Int64  offDist = xCalcSSD(pOrg, pDec, picWidth, picHeight, stride);
     1455#endif
     1456  Int64  offRate = calculateAlfParamSetRateRDO(compIdx, &alfParamSetOff, &alfCUCtrlParamTemp);
     1457  Double offCost = (Double)offDist + lambda*((Double)offRate);
     1458
     1459  if(offCost < minCost  )
     1460  {
     1461    //revert to filter-off results
     1462    Pel* pelSrc = pDec;
     1463    Pel* pelDst = pRest;
     1464    for(Int y=0; y< picHeight; y++)
     1465    {
     1466      ::memcpy(pelDst, pelSrc, sizeof(Pel)*picWidth);
     1467      pelSrc += stride;
     1468      pelDst += stride;
     1469    }
     1470
     1471    alfParamSet->isEnabled[compIdx] = false;
     1472    alfParamSet->isUniParam[compIdx] = true; //all filter-off
     1473  }
     1474
     1475}
     1476
     1477/** copy picture quadtree infromation
     1478 * \param [out] alfPicQTPartDest destination part in picture quad tree
     1479 * \param [in ] alfPicQTPartSrc source part in picture quad tree
     1480 */
     1481Void TEncAdaptiveLoopFilter::copyPicQT(AlfPicQTPart* alfPicQTPartDest, AlfPicQTPart* alfPicQTPartSrc)
     1482{
     1483  for (Int i=0; i< m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++)
     1484  {
     1485    alfPicQTPartDest[i] = alfPicQTPartSrc[i];
     1486  }
     1487}
     1488
     1489/** copy pixel values for one rectangular region
     1490 * \param [out] imgDest destination part in picture quad tree
     1491 * \param [in ] imgSrc source part in picture quad tree
     1492 * \param [in ] stride source part in picture quad tree
     1493 * \param [in ] yPos starting y position
     1494 * \param [in ] height region height
     1495 * \param [in ] xPos starting x position
     1496 * \param [in ] width region width
     1497 */
     1498Void TEncAdaptiveLoopFilter::copyPixelsInOneRegion(Pel* imgDest, Pel* imgSrc, Int stride, Int yPos, Int height, Int xPos, Int width)
     1499{
     1500  Int offset = (yPos*stride) + xPos;
     1501  Pel *imgDestLine = imgDest + offset;
     1502  Pel *imgSrcLine  = imgSrc  + offset;
     1503
     1504  for (Int j=0; j<height; j++)
     1505  {
     1506    ::memcpy(imgDestLine, imgSrcLine, sizeof(Pel)*width);
     1507    imgDestLine += stride;
     1508    imgSrcLine  += stride;
     1509  }
     1510}
     1511
     1512/** Re-design ALF parameters for picture quad-tree partitions
     1513 * \param [out] alfPicQTPart picture quad-tree partition information
     1514 * \param [in ] partIdx partition index
     1515 * \param [in ] partLevel partition level
     1516 */
     1517Void TEncAdaptiveLoopFilter::reDesignQT(AlfPicQTPart *alfPicQTPart, Int partIdx, Int partLevel)
     1518{
     1519  AlfPicQTPart *alfPicQTOnePart = &(alfPicQTPart[partIdx]); 
     1520  Int nextPartLevel = partLevel + 1;
     1521
     1522  if (!alfPicQTOnePart->isSplit)
     1523  {
     1524    if (alfPicQTOnePart->alfUnitParam->alfFiltParam->alf_flag)
     1525    {
     1526      executeModeDecisionOnePart(alfPicQTPart, m_alfCorr[ALF_Y], partIdx, partLevel) ;     
     1527    }
     1528  }
     1529  else
     1530  {
     1531    for (Int i=0; i<4; i++)
     1532    {
     1533      reDesignQT(alfPicQTPart, alfPicQTOnePart->childPartIdx[i], nextPartLevel);
     1534    }
     1535  } 
     1536}
     1537
     1538/** CU-on/off control decision
     1539 * \param [in ] imgOrg picture buffer for original picture
     1540 * \param [in ] imgDec picture buffer for un-filtered picture
     1541 * \param [in ] imgRest picture buffer for filtered picture
     1542 * \param [in ] stride buffer stride size for 1-D picture memory
     1543 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1544 * \param [in, out] alfParamSet ALF parameter set
     1545 * \param [in, out ] minRate minimum rate
     1546 * \param [in, out ] minDist minimum distortion
     1547 * \param [in, out ] minCost minimum RD cost
     1548 */
     1549#if HHI_INTERVIEW_SKIP
     1550Void TEncAdaptiveLoopFilter::decideBlockControl(Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, AlfPicQTPart* alfPicQTPart, AlfParamSet* & alfParamSet, Int64 &minRate, Int64 &minDist, Double &minCost)
     1551#else
     1552Void TEncAdaptiveLoopFilter::decideBlockControl(Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, AlfPicQTPart* alfPicQTPart, AlfParamSet* & alfParamSet, Int64 &minRate, Int64 &minDist, Double &minCost)
     1553#endif
     1554{
     1555  Int    rate, ctrlDepth;
     1556  Double cost;
     1557  UInt64 dist;
     1558  Bool isChanged = false;
     1559  Pel *imgYtemp = getPicBuf(m_pcPicYuvTmp, ALF_Y);
     1560  Pel *imgYBest = getPicBuf(m_pcPicYuvBest, ALF_Y);
     1561  std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_vBestAlfCUCtrlParam); 
     1562
     1563  AlfPicQTPart *alfPicQTPartNoCtrl = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ];
     1564  AlfPicQTPart *alfPicQTPartBest   = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ];
     1565
     1566  // backup data of PQT without block on/off
     1567  copyPicQT(alfPicQTPartNoCtrl, alfPicQTPart);
     1568
     1569  for (ctrlDepth=0; ctrlDepth<4; ctrlDepth++)
     1570  {       
     1571    // Restore data from PQT without block on/off
     1572    copyPixelsInOneRegion(imgYtemp, imgRest, stride, 0, m_img_height, 0, m_img_width);
     1573    copyPicQT(alfPicQTPart, alfPicQTPartNoCtrl);
     1574
     1575    for (Int reDesignRun=0; reDesignRun <= m_iALFNumOfRedesign; reDesignRun++)
     1576    {
     1577      // re-design filter
     1578      if (reDesignRun > 0)
     1579      {
     1580        // re-gather statistics
     1581        getOneCompStatistics(m_alfCorr[ALF_Y], ALF_Y, imgOrg, imgDec, stride, 0, true);
     1582
     1583        // reDesign in each QT partition
     1584        reDesignQT(alfPicQTPart, 0, 0);
     1585
     1586        //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1587        patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]);
     1588
     1589        //reconstruction
     1590        copyPixelsInOneRegion(imgYtemp, imgDec, stride, 0, m_img_height, 0, m_img_width);
     1591        recALF(ALF_Y, m_alfFiltInfo[ALF_Y], imgDec, imgYtemp, stride, 0, NULL, false);
     1592      }
     1593
     1594      // Gest distortion and decide on/off, Pel should be changed to TComPicYUV
     1595#if HHI_INTERVIEW_SKIP
     1596      setCUAlfCtrlFlags((UInt)ctrlDepth, imgOrg, imgDec, imgYtemp, imgUsed, stride, dist, vAlfCUCtrlParamTemp);
     1597#else
     1598      setCUAlfCtrlFlags((UInt)ctrlDepth, imgOrg, imgDec, imgYtemp, stride, dist, vAlfCUCtrlParamTemp);   
     1599#endif
     1600
     1601      //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1602      patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]);
     1603
     1604      //check merge-up and merge-left
     1605      checkMerge(ALF_Y, m_alfPicFiltUnits[ALF_Y]);
     1606
     1607      //transfer to AlfParamSet
     1608      transferToAlfParamSet(ALF_Y, m_alfPicFiltUnits[ALF_Y], alfParamSet);
     1609
     1610      rate = calculateAlfParamSetRateRDO(ALF_Y, alfParamSet, &vAlfCUCtrlParamTemp);
     1611      cost = (Double)dist + m_dLambdaLuma * ((Double)rate);
     1612
     1613      if (cost < minCost)
     1614      {
     1615        isChanged     = true;
     1616        minCost       = cost;
     1617        minDist       = (Int64) dist;
     1618        minRate       = rate;
     1619
     1620        m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp;
     1621        copyPixelsInOneRegion(imgYBest, imgYtemp, stride, 0, m_img_height, 0, m_img_width);
     1622
     1623        copyPicQT(alfPicQTPartBest, alfPicQTPart);
     1624        xCopyTmpAlfCtrlFlagsFrom();
     1625      }
     1626
     1627    }
     1628  }
     1629
     1630  if (isChanged == true)
     1631  {
     1632    copyPicQT(alfPicQTPart, alfPicQTPartBest);
     1633    xCopyTmpAlfCtrlFlagsTo();
     1634
     1635    copyPixelsInOneRegion(imgRest, imgYBest, stride, 0, m_img_height, 0, m_img_width);
     1636    xCopyDecToRestCUs(imgDec, imgRest, stride);
     1637  }
     1638  else
     1639  {
     1640    copyPicQT(alfPicQTPart, alfPicQTPartNoCtrl);
     1641  }
     1642
     1643  //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1644  patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]);
     1645
     1646  //check merge-up and merge-left
     1647  checkMerge(ALF_Y, m_alfPicFiltUnits[ALF_Y]);
     1648
     1649  //transfer to AlfParamSet
     1650  transferToAlfParamSet(ALF_Y, m_alfPicFiltUnits[ALF_Y], alfParamSet);
     1651
     1652  delete [] alfPicQTPartNoCtrl;
     1653  alfPicQTPartNoCtrl = NULL;
     1654
     1655  delete [] alfPicQTPartBest;
     1656  alfPicQTPartBest = NULL;
     1657}
     1658
     1659/** Copy ALF unit parameters from quad-tree partition to LCUs
     1660 * \param [in] alfPicQTPart picture quad-tree partition information
     1661 * \param [in] partIdx partition index
     1662 * \param [out] alfUnitPic ALF unit parameters for LCUs
     1663 */
     1664Void TEncAdaptiveLoopFilter::patchAlfUnitParams(AlfPicQTPart* alfPicQTPart, Int partIdx, AlfUnitParam* alfUnitPic)
     1665{
     1666  AlfPicQTPart* alfQTPart = &(alfPicQTPart[partIdx]);
     1667  //Int compIdx = alfQTPart->componentID;
     1668
     1669  if(alfQTPart->isSplit == false)
     1670  {
     1671    AlfUnitParam* alfpartParam = alfQTPart->alfUnitParam;
     1672
     1673    Int lcuPos;
     1674    for(Int lcuPosY = alfQTPart->partCUYS; lcuPosY <= alfQTPart->partCUYE; lcuPosY++)
     1675    {
     1676      for(Int lcuPosX = alfQTPart->partCUXS; lcuPosX <= alfQTPart->partCUXE; lcuPosX++)
     1677      {
     1678        lcuPos = lcuPosY*m_numLCUInPicWidth + lcuPosX;
     1679        AlfUnitParam& alfunitParam = alfUnitPic[lcuPos];
     1680
     1681        alfunitParam.mergeType = alfpartParam->mergeType;
     1682        alfunitParam.isEnabled = alfpartParam->isEnabled;
     1683        alfunitParam.isNewFilt = alfpartParam->isNewFilt;
     1684        alfunitParam.storedFiltIdx = alfpartParam->storedFiltIdx; //not used
     1685        *(alfunitParam.alfFiltParam) = *(alfpartParam->alfFiltParam);
     1686      }
     1687    }
     1688  }
     1689  else
     1690  {
     1691    for(Int i=0; i< 4; i++)
     1692    {
     1693      patchAlfUnitParams(alfPicQTPart, alfQTPart->childPartIdx[i], alfUnitPic);     
     1694    }
     1695  }
     1696}
     1697
     1698/** Decide picture quad-tree partition
     1699 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1700 * \param [in, out] alfPicLCUCorr correlations for LCUs
     1701 * \param [int] partIdx partition index
     1702 * \param [int] partLevel partition level
     1703 * \param [in, out] cost cost for one partition
     1704 * \param [in, out] dist distortion for one partition
     1705 * \param [in, out] rate bitrate for one partition
     1706 */
     1707Void TEncAdaptiveLoopFilter::decideQTPartition(AlfPicQTPart* alfPicQTPart, AlfCorrData** alfPicLCUCorr, Int partIdx, Int partLevel, Double &cost, Int64 &dist, Int64 &rate)
     1708{
     1709  AlfPicQTPart* alfPicQTOnePart = &(alfPicQTPart[partIdx]);
     1710  Int nextPartLevel = partLevel + 1;
     1711  Int childPartIdx;
     1712  Double splitCost = 0;
     1713  Int64  splitRate = 0;
     1714  Int64  splitDist = 0; 
     1715
     1716  if (!alfPicQTOnePart->isProcessed)
     1717  {
     1718    executeModeDecisionOnePart(alfPicQTPart, alfPicLCUCorr, partIdx, partLevel);
     1719
     1720    alfPicQTOnePart->isProcessed = true;
     1721  }
     1722
     1723  if (!alfPicQTOnePart->isBottomLevel)
     1724  {   
     1725    for (Int i=0; i<4; i++)
     1726    {     
     1727      childPartIdx = alfPicQTOnePart->childPartIdx[i];
     1728      decideQTPartition(alfPicQTPart, alfPicLCUCorr, childPartIdx, nextPartLevel, splitCost, splitDist, splitRate);     
     1729    }
     1730
     1731    alfPicQTOnePart->splitMinCost = splitCost;
     1732    alfPicQTOnePart->splitMinDist = splitDist;
     1733    alfPicQTOnePart->splitMinRate = splitRate;
     1734
     1735    if (alfPicQTOnePart->splitMinCost < alfPicQTOnePart->selfMinCost)
     1736    {
     1737      alfPicQTOnePart->isSplit = true;
     1738    }
     1739    else
     1740    {
     1741      alfPicQTOnePart->isSplit = false;
     1742    }
     1743  }
     1744  else
     1745  {
     1746    alfPicQTOnePart->isSplit = false;
     1747    alfPicQTOnePart->splitMinCost = alfPicQTOnePart->selfMinCost;
     1748    alfPicQTOnePart->splitMinDist = alfPicQTOnePart->selfMinDist;
     1749    alfPicQTOnePart->splitMinRate = alfPicQTOnePart->selfMinRate;
     1750  }
     1751
     1752  if (alfPicQTOnePart->isSplit)
     1753  {
     1754    cost += alfPicQTOnePart->splitMinCost;
     1755    rate += alfPicQTOnePart->splitMinRate;
     1756    dist += alfPicQTOnePart->splitMinDist;
     1757  }
     1758  else
     1759  {
     1760    cost += alfPicQTOnePart->selfMinCost;
     1761    rate += alfPicQTOnePart->selfMinRate;
     1762    dist += alfPicQTOnePart->selfMinDist;
     1763  }
     1764
     1765}
     1766
     1767/** Mode decision process for one picture quad-tree partition
     1768 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1769 * \param [in, out] alfPicLCUCorr correlations for LCUs
     1770 * \param [int] partIdx partition index
     1771 * \param [int] partLevel partition level
     1772 */
     1773Void TEncAdaptiveLoopFilter::executeModeDecisionOnePart(AlfPicQTPart *alfPicQTPart, AlfCorrData** alfPicLCUCorr, Int partIdx, Int partLevel)
     1774{
     1775  AlfPicQTPart* alfQTPart = &(alfPicQTPart[partIdx]);
     1776  Int compIdx = alfQTPart->componentID;
     1777  Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma);
     1778
     1779  //gather correlations
     1780  alfQTPart->alfCorr->reset();
     1781  for(Int lcuPosY = alfQTPart->partCUYS; lcuPosY <= alfQTPart->partCUYE; lcuPosY++)
     1782  {
     1783    for(Int lcuPosX = alfQTPart->partCUXS; lcuPosX <= alfQTPart->partCUXE; lcuPosX++)
     1784    {
     1785      *(alfQTPart->alfCorr) +=  *(alfPicLCUCorr[lcuPosY*m_numLCUInPicWidth + lcuPosX]);
     1786    }
     1787  }
     1788
     1789  //test filter on
     1790  AlfUnitParam* alfPartUnitParam = alfQTPart->alfUnitParam;
     1791  alfPartUnitParam->mergeType = ALF_MERGE_DISABLED;
     1792  alfPartUnitParam->isEnabled = true;
     1793  alfPartUnitParam->isNewFilt = true;
     1794  alfPartUnitParam->storedFiltIdx = -1;
     1795  alfPartUnitParam->alfFiltParam->alf_flag = 1;
     1796  deriveFilterInfo(compIdx, alfQTPart->alfCorr, alfPartUnitParam->alfFiltParam, alfQTPart->numFilterBudget);
     1797
     1798  alfQTPart->selfMinDist = estimateFilterDistortion(compIdx, alfQTPart->alfCorr, m_filterCoeffSym, alfPartUnitParam->alfFiltParam->filters_per_group, m_varIndTab);
     1799  alfQTPart->selfMinRate = calculateAlfUnitRateRDO(alfPartUnitParam);
     1800  alfQTPart->selfMinCost = (Double)(alfQTPart->selfMinDist) + lambda*((Double)(alfQTPart->selfMinRate));
     1801 
     1802  alfQTPart->selfMinCost +=  ((lambda* 1.5)* ((Double)( (alfQTPart->partCUYE - alfQTPart->partCUYS+ 1)*(alfQTPart->partCUXE - alfQTPart->partCUXS +1) )));  //RDCO
     1803 
     1804
     1805  //test filter off
     1806  AlfUnitParam alfUnitParamTemp(*(alfQTPart->alfUnitParam));
     1807  alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED;
     1808  alfUnitParamTemp.isEnabled = false;
     1809  Int64  dist = estimateFilterDistortion(compIdx, alfQTPart->alfCorr);
     1810  Int64  rate = calculateAlfUnitRateRDO(&alfUnitParamTemp);
     1811  Double cost = (Double)dist + lambda*((Double)rate);
     1812  if(cost < alfQTPart->selfMinCost)
     1813  {
     1814    alfQTPart->selfMinCost = cost;
     1815    alfQTPart->selfMinDist = dist;
     1816    alfQTPart->selfMinRate = rate;
     1817    *(alfQTPart->alfUnitParam) = alfUnitParamTemp;
     1818
     1819    alfQTPart->alfUnitParam->alfFiltParam->alf_flag = 0;
     1820  }
     1821
     1822}
     1823
     1824/** Derive filter coefficients
     1825 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1826 * \param [in, out] alfPicLCUCorr correlations for LCUs
     1827 * \param [int] partIdx partition index
     1828 * \param [int] partLevel partition level
     1829 */
     1830Void TEncAdaptiveLoopFilter::deriveFilterInfo(Int compIdx, AlfCorrData* alfCorr, ALFParam* alfFiltParam, Int maxNumFilters)
     1831{
     1832  const Int filtNo = 0;
     1833  const Int numCoeff = ALF_MAX_NUM_COEF;
     1834
     1835  switch(compIdx)
     1836  {
     1837  case ALF_Y:
     1838    {       
     1839      Int lambdaForMerge = ((Int) m_dLambdaLuma) * (1<<(2*g_uiBitIncrement));
     1840      Int numFilters;
     1841
     1842      ::memset(m_varIndTab, 0, sizeof(Int)*NO_VAR_BINS);
     1843
     1844      xfindBestFilterVarPred(alfCorr->yCorr, alfCorr->ECorr, alfCorr->pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &numFilters, m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambdaForMerge, maxNumFilters);
     1845      xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, numFilters, alfFiltParam);
     1846    }
     1847    break;
     1848  case ALF_Cb:
     1849  case ALF_Cr:
     1850    {
     1851      static Double coef[ALF_MAX_NUM_COEF];
     1852
     1853      alfFiltParam->filters_per_group = 1;
     1854
     1855      gnsSolveByChol(alfCorr->ECorr[0], alfCorr->yCorr[0], coef, numCoeff);
     1856      xQuantFilterCoef(coef, m_filterCoeffSym[0], filtNo, g_uiBitDepth + g_uiBitIncrement);
     1857      ::memcpy(alfFiltParam->coeffmulti[0], m_filterCoeffSym[0], sizeof(Int)*numCoeff);
     1858      predictALFCoeffChroma(alfFiltParam->coeffmulti[0]);
     1859    }
     1860    break;
     1861  default:
     1862    {
     1863      printf("Not a legal component ID\n");
     1864      assert(0);
     1865      exit(-1);
     1866    }
     1867  }
     1868
     1869
     1870}
     1871
     1872/** Estimate rate-distortion cost for ALF parameter set
     1873 * \param [in] compIdx luma/chroma component index
     1874 * \param [in] alfParamSet ALF parameter set
     1875 * \param [in] alfCUCtrlParam CU-on/off control parameters
     1876 */
     1877Int TEncAdaptiveLoopFilter::calculateAlfParamSetRateRDO(Int compIdx, AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCUCtrlParam)
     1878{
     1879  Int rate = 0;
     1880
     1881  m_pcEntropyCoder->resetEntropy();
     1882  m_pcEntropyCoder->resetBits();
     1883
     1884
     1885  m_pcEntropyCoder->encodeAlfParamSet(alfParamSet, m_numLCUInPicWidth, m_uiNumCUsInFrame, 0, true, compIdx, compIdx);
     1886
     1887  if(m_bAlfCUCtrlEnabled)
     1888  {
     1889    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1890    {
     1891      m_pcEntropyCoder->encodeAlfCtrlParam( (*alfCUCtrlParam)[s], m_uiNumCUsInFrame);     
     1892    }
     1893  }
     1894
     1895  rate = m_pcEntropyCoder->getNumberOfWrittenBits();
     1896
     1897  return rate;
     1898}
     1899
     1900/** Estimate rate-distortion cost for ALF unit parameters
     1901 * \param [in] alfUnitParam ALF unit parameters
     1902 * \param [in] numStoredFilters number of stored filter (set)
     1903 */
     1904Int TEncAdaptiveLoopFilter::calculateAlfUnitRateRDO(AlfUnitParam* alfUnitParam, Int numStoredFilters)
     1905{
     1906  Int rate = 0;
     1907
     1908  if(alfUnitParam->mergeType != ALF_MERGE_LEFT)
     1909  {
     1910    m_pcEntropyCoder->resetEntropy();
     1911    m_pcEntropyCoder->resetBits();
     1912
     1913    m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->mergeType == ALF_MERGE_UP)?1:0);
     1914
     1915    if(alfUnitParam->mergeType != ALF_MERGE_UP)
     1916    {
     1917      m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->isEnabled)?1:0);
     1918
     1919      if(alfUnitParam->isEnabled)
     1920      {
     1921        if(numStoredFilters > 0)
     1922        {
     1923          m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->isNewFilt)?1:0);
     1924        }
     1925
     1926        if(!(alfUnitParam->isNewFilt) && numStoredFilters > 0)
     1927        {
     1928          m_pcEntropyCoder->encodeAlfStoredFilterSetIdx(alfUnitParam->storedFiltIdx, numStoredFilters);
     1929        }
     1930        else
     1931        {
     1932          m_pcEntropyCoder->encodeAlfParam(alfUnitParam->alfFiltParam);
     1933        }
     1934
     1935      }
     1936    }
     1937    rate = m_pcEntropyCoder->getNumberOfWrittenBits();
     1938  }
     1939  return rate;
     1940}
     1941
     1942/** Estimate filtering distortion
     1943 * \param [in] compIdx luma/chroma component index
     1944 * \param [in] alfCorr correlations
     1945 * \param [in] coeffSet filter coefficients
     1946 * \param [in] filterSetSize number of filter set
     1947 * \param [in] mergeTable merge table of filter set (only for luma BA)
     1948 * \param [in] doPixAccMerge calculate pixel squared value (true) or not (false)
     1949 */
     1950Int64 TEncAdaptiveLoopFilter::estimateFilterDistortion(Int compIdx, AlfCorrData* alfCorr, Int** coeffSet, Int filterSetSize, Int* mergeTable, Bool doPixAccMerge)
     1951{
     1952  const Int numCoeff = (Int)ALF_MAX_NUM_COEF;
     1953  AlfCorrData* alfMerged = m_alfCorrMerged[compIdx];
     1954
     1955  alfMerged->mergeFrom(*alfCorr, mergeTable, doPixAccMerge);
     1956
     1957  Int**     coeff = (coeffSet == NULL)?(m_coeffNoFilter):(coeffSet);
     1958  Int64     iDist = 0;
     1959  for(Int f=0; f< filterSetSize; f++)
     1960  {
     1961    iDist += xFastFiltDistEstimation(alfMerged->ECorr[f], alfMerged->yCorr[f], coeff[f], numCoeff);
     1962  }
     1963  return iDist;
     1964}
     1965
     1966/** Mode decision for ALF unit in LCU-based encoding
     1967 * \param [in] compIdx luma/chroma component index
     1968 * \param [in] alfUnitPic ALF unit parmeters for LCUs in picture
     1969 * \param [in] lcuIdx LCU index (order) in slice
     1970 * \param [in] lcuPos LCU position in picture
     1971 * \param [in] numLCUWidth number of width in LCU
     1972 * \param [in, out] alfUnitParams ALF unit parameters for LCUs in slice
     1973 * \param [in] alfCorr correlations
     1974 * \param [in] storedFilters stored-filter buffer
     1975 * \param [in] maxNumFilter constraint for number of filters
     1976 * \param [in] lambda lagrangian multiplier for RDO
     1977 * \param [in] isLeftUnitAvailable left ALF unit available (true) or not (false)
     1978 * \param [in] isUpUnitAvailable upper ALF unit available (true) or not (false)
     1979 */
     1980Void TEncAdaptiveLoopFilter::decideLCUALFUnitParam(Int compIdx, AlfUnitParam* alfUnitPic, Int lcuIdx, Int lcuPos, Int numLCUWidth, AlfUnitParam* alfUnitParams, AlfCorrData* alfCorr, std::vector<ALFParam*>& storedFilters, Int maxNumFilter, Double lambda, Bool isLeftUnitAvailable, Bool isUpUnitAvailable)
     1981{
     1982  Int    numSliceDataInCurrLCU = m_numSlicesDataInOneLCU[lcuPos];
     1983  Int    budgetNumFilters = (Int)(maxNumFilter/numSliceDataInCurrLCU);
     1984  Int    numStoredFilters = (Int)storedFilters.size();
     1985  Double cost, minCost = MAX_DOUBLE;
     1986  Int64  dist;
     1987  Int    rate;
     1988
     1989  AlfUnitParam& alfUnitParamCurr = alfUnitParams[lcuIdx];
     1990
     1991  ///--- new filter mode test ---
     1992  AlfUnitParam alfUnitParamTemp(alfUnitParamCurr);
     1993  alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED;
     1994  alfUnitParamTemp.isEnabled = true;
     1995  alfUnitParamTemp.isNewFilt = true;
     1996  alfUnitParamTemp.storedFiltIdx = -1;
     1997  deriveFilterInfo(compIdx, alfCorr, alfUnitParamTemp.alfFiltParam, budgetNumFilters);
     1998
     1999  dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2000  rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2001  cost = (Double)dist + lambda*((Double)rate);
     2002  if(cost < minCost)
     2003  {
     2004    minCost = cost;
     2005    alfUnitParamCurr = alfUnitParamTemp;
     2006
     2007    alfUnitParamCurr.alfFiltParam->alf_flag = 1;
     2008  }
     2009
     2010  if(numSliceDataInCurrLCU == 1)
     2011  {
     2012    if(numStoredFilters > 0)
     2013    {
     2014      ///--- stored filter mode test ---//
     2015      alfUnitParamTemp = alfUnitParamCurr;
     2016
     2017      alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED;
     2018      alfUnitParamTemp.isEnabled = true;
     2019      alfUnitParamTemp.isNewFilt = false;
     2020
     2021      for(Int i=0; i< numStoredFilters; i++)
     2022      {
     2023        ALFParam* storedALFParam = storedFilters[i];
     2024
     2025        alfUnitParamTemp.storedFiltIdx = i;
     2026        alfUnitParamTemp.alfFiltParam  = storedALFParam;
     2027
     2028        assert(storedALFParam->alf_flag == 1);
     2029
     2030        reconstructCoefInfo(compIdx, storedALFParam, m_filterCoeffSym, m_varIndTab);
     2031
     2032        dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2033        rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2034        cost = (Double)dist + lambda*((Double)rate);
     2035
     2036        if(cost < minCost)
     2037        {
     2038          minCost = cost;
     2039          alfUnitParamCurr = alfUnitParamTemp;
     2040        }
     2041      }
     2042    }
     2043
     2044    /// merge-up test
     2045    if(isUpUnitAvailable)
     2046    {
     2047      Int addrUp = lcuPos - m_numLCUInPicWidth;
     2048      AlfUnitParam& alfUnitParamUp = alfUnitPic[addrUp];
     2049
     2050      if(alfUnitParamUp.alfFiltParam->alf_flag == 1)
     2051      {
     2052        alfUnitParamTemp = alfUnitParamUp;
     2053        alfUnitParamTemp.mergeType    = ALF_MERGE_UP;
     2054
     2055        reconstructCoefInfo(compIdx, alfUnitParamTemp.alfFiltParam, m_filterCoeffSym, m_varIndTab);
     2056        dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2057        rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2058        cost = (Double)dist + lambda*((Double)rate);
     2059
     2060        if(cost < minCost)
     2061        {
     2062          minCost = cost;
     2063
     2064          alfUnitParamCurr = alfUnitParamTemp;
     2065        }
     2066
     2067      }
     2068
     2069    } //upper unit available
     2070
     2071
     2072    /// merge-left test
     2073    if(isLeftUnitAvailable)
     2074    {
     2075      Int addrLeft = lcuPos - 1;
     2076      AlfUnitParam& alfUnitParamLeft = alfUnitPic[addrLeft];
     2077
     2078      if(alfUnitParamLeft.alfFiltParam->alf_flag == 1)
     2079      {
     2080        alfUnitParamTemp = alfUnitParamLeft;
     2081        alfUnitParamTemp.mergeType    = ALF_MERGE_LEFT;
     2082
     2083        reconstructCoefInfo(compIdx, alfUnitParamTemp.alfFiltParam, m_filterCoeffSym, m_varIndTab);
     2084        dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2085        rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2086        cost = (Double)dist + lambda*((Double)rate);
     2087
     2088        if(cost < minCost)
     2089        {
     2090          minCost = cost;
     2091
     2092          alfUnitParamCurr = alfUnitParamTemp;
     2093        }
     2094
     2095      }
     2096
     2097    } //left unit available
     2098
     2099  }
     2100}
     2101
     2102/** Choose the best ALF unit parameters when filter is not enabled.
     2103 * \param [out] alfFiltOffParam ALF unit parameters for filter-off case
     2104 * \param [in] lcuPos LCU position in picture
     2105 * \param [in] alfUnitPic ALF unit parmeters for LCUs in picture
     2106 * \param [in] isLeftUnitAvailable left ALF unit available (true) or not (false)
     2107 * \param [in] isUpUnitAvailable upper ALF unit available (true) or not (false)
     2108 */
     2109Void TEncAdaptiveLoopFilter::getFiltOffAlfUnitParam(AlfUnitParam* alfFiltOffParam, Int lcuPos, AlfUnitParam* alfUnitPic, Bool isLeftUnitAvailable, Bool isUpUnitAvailable)
     2110{
     2111  Int    numSliceDataInCurrLCU = m_numSlicesDataInOneLCU[lcuPos];
     2112
     2113  if(numSliceDataInCurrLCU == 1)
     2114  {
     2115    if(isLeftUnitAvailable)
     2116    {
     2117      Int addrLeft = lcuPos - 1;
     2118      AlfUnitParam& alfUnitParamLeft = alfUnitPic[addrLeft];
     2119
     2120      if(alfUnitParamLeft.alfFiltParam->alf_flag == 0)
     2121      {
     2122        alfFiltOffParam->mergeType    = ALF_MERGE_LEFT;
     2123        alfFiltOffParam->isEnabled    = false;
     2124        alfFiltOffParam->alfFiltParam = alfUnitParamLeft.alfFiltParam;
     2125
     2126        return;
     2127      }
     2128    }
     2129
     2130    if(isUpUnitAvailable)
     2131    {
     2132      Int addrUp = lcuPos - m_numLCUInPicWidth;
     2133      AlfUnitParam& alfUnitParamUp = alfUnitPic[addrUp];
     2134
     2135      if(alfUnitParamUp.alfFiltParam->alf_flag == 0)
     2136      {
     2137        alfFiltOffParam->mergeType    = ALF_MERGE_UP;
     2138        alfFiltOffParam->isEnabled    = false;
     2139        alfFiltOffParam->alfFiltParam = alfUnitParamUp.alfFiltParam;
     2140
     2141        return;
     2142      }
     2143
     2144    }
     2145  }
     2146
     2147
     2148  alfFiltOffParam->mergeType = ALF_MERGE_DISABLED;
     2149  alfFiltOffParam->isEnabled = false;
     2150  alfFiltOffParam->alfFiltParam = alfUnitPic[lcuPos].alfFiltParam;
     2151
     2152  return;
     2153}
     2154
     2155/** Calculate distortion for ALF LCU
     2156 * \param [in] skipLCUBottomLines true for considering skipping bottom LCU lines
     2157 * \param [in] compIdx luma/chroma component index
     2158 * \param [in] alfLCUInfo ALF LCU information
     2159 * \param [in] picSrc source picture buffer
     2160 * \param [in] picCmp to-be-compared picture buffer
     2161 * \param [in] stride buffer stride size for 1-D pictrue memory
     2162 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2163 * \return the distortion
     2164 */
     2165#if HHI_INTERVIEW_SKIP
     2166Int64 TEncAdaptiveLoopFilter::calcAlfLCUDist(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo& alfLCUInfo, Pel* picSrc, Pel* picCmp, Pel* picUsed, Int stride, Int formatShift)
     2167#else
     2168Int64 TEncAdaptiveLoopFilter::calcAlfLCUDist(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo& alfLCUInfo, Pel* picSrc, Pel* picCmp, Int stride, Int formatShift)
     2169#endif
     2170{
     2171  Int64 dist = 0; 
     2172  Int  posOffset, ypos, xpos, height, width;
     2173  Pel* pelCmp;
     2174  Pel* pelSrc;
     2175#if HHI_INTERVIEW_SKIP
     2176  Pel* pelUsed = NULL ;
     2177#endif
     2178#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2179  Int endypos;
     2180  Bool notSkipLinesBelowVB = true;
     2181  Int lcuAddr = alfLCUInfo.pcCU->getAddr();
     2182  if(skipLCUBottomLines)
     2183  {
     2184    if(lcuAddr + m_numLCUInPicWidth < m_uiNumCUsInFrame)
     2185    {
     2186      notSkipLinesBelowVB = false;
     2187    }
     2188  }
     2189#endif
     2190
     2191  switch(compIdx)
     2192  {
     2193  case ALF_Cb:
     2194  case ALF_Cr:
     2195    {
     2196      for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2197      {
     2198        ypos    = (Int)(alfLCUInfo[n].posY   >> formatShift);
     2199        xpos    = (Int)(alfLCUInfo[n].posX   >> formatShift);
     2200        height  = (Int)(alfLCUInfo[n].height >> formatShift);
     2201        width   = (Int)(alfLCUInfo[n].width  >> formatShift);
     2202
     2203#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2204        if(!notSkipLinesBelowVB )
     2205        {
     2206          endypos = ypos+ height -1;
     2207          Int iLineVBPos = m_lcuHeightChroma - 2;
     2208          Int yEndLineInLCU = endypos % m_lcuHeightChroma;
     2209          height = (yEndLineInLCU >= iLineVBPos) ? (height - 2) : height ;
     2210        }
     2211#endif
     2212
     2213        posOffset = (ypos * stride) + xpos;
     2214        pelCmp    = picCmp + posOffset;   
     2215        pelSrc    = picSrc + posOffset;   
     2216
     2217
     2218#if HHI_INTERVIEW_SKIP
     2219        if( picUsed)
     2220        {
     2221          pelUsed   = picUsed+ posOffset;
     2222        }
     2223        dist  += xCalcSSD( pelSrc, pelCmp,  pelUsed, width, height, stride );
     2224#else
     2225        dist  += xCalcSSD( pelSrc, pelCmp,  width, height, stride );
     2226#endif
     2227      }
     2228
     2229    }
     2230    break;
     2231  case ALF_Y:
     2232    {
     2233      for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2234      {
     2235        ypos    = (Int)(alfLCUInfo[n].posY);
     2236        xpos    = (Int)(alfLCUInfo[n].posX);
     2237        height  = (Int)(alfLCUInfo[n].height);
     2238        width   = (Int)(alfLCUInfo[n].width);
     2239
     2240#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2241        if(!notSkipLinesBelowVB)
     2242        {
     2243          endypos = ypos+ height -1;
     2244          Int iLineVBPos = m_lcuHeight - 4;
     2245          Int yEndLineInLCU = endypos % m_lcuHeight;
     2246          height = (yEndLineInLCU >= iLineVBPos) ? (height - 4) : height ;
     2247        }
     2248#endif
     2249
     2250        posOffset = (ypos * stride) + xpos;
     2251        pelCmp    = picCmp + posOffset;   
     2252        pelSrc    = picSrc + posOffset;   
     2253
     2254#if HHI_INTERVIEW_SKIP
     2255        if( picUsed )
     2256        {
     2257          pelUsed   = picUsed+ posOffset;
     2258        }
     2259        dist  += xCalcSSD( pelSrc, pelCmp,  pelUsed, width, height, stride );
     2260#else
     2261        dist  += xCalcSSD( pelSrc, pelCmp,  width, height, stride );
     2262#endif
     2263      }
     2264
     2265    }
     2266    break;
     2267  default:
     2268    {
     2269      printf("not a legal component ID for ALF \n");
     2270      assert(0);
     2271      exit(-1);
     2272    }
     2273  }
     2274
     2275  return dist;
     2276}
     2277
     2278/** Copy one ALF LCU region
     2279 * \param [in] alfLCUInfo ALF LCU information
     2280 * \param [out] picDst to-be-compared picture buffer
     2281 * \param [in] picSrc source picture buffer
     2282 * \param [in] stride buffer stride size for 1-D pictrue memory
     2283 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2284 */
     2285Void TEncAdaptiveLoopFilter::copyOneAlfLCU(AlfLCUInfo& alfLCUInfo, Pel* picDst, Pel* picSrc, Int stride, Int formatShift)
     2286{
     2287  Int posOffset, ypos, xpos, height, width;
     2288  Pel* pelDst;
     2289  Pel* pelSrc;
     2290
     2291  for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2292  {
     2293    ypos    = (Int)(alfLCUInfo[n].posY   >> formatShift);
     2294    xpos    = (Int)(alfLCUInfo[n].posX   >> formatShift);
     2295    height  = (Int)(alfLCUInfo[n].height >> formatShift);
     2296    width   = (Int)(alfLCUInfo[n].width  >> formatShift);
     2297
     2298    posOffset  = ( ypos * stride)+ xpos;
     2299    pelDst   = picDst  + posOffset;   
     2300    pelSrc   = picSrc  + posOffset;   
     2301
     2302    for(Int j=0; j< height; j++)
     2303    {
     2304      ::memcpy(pelDst, pelSrc, sizeof(Pel)*width);
     2305      pelDst += stride;
     2306      pelSrc += stride;
     2307    }
     2308  }
     2309
     2310}
     2311
     2312/** Reconstruct ALF LCU pixels
     2313 * \param [in] compIdx luma/chroma component index
     2314 * \param [in] alfLCUInfo ALF LCU information
     2315 * \param [in] alfUnitParam ALF unit parameters
     2316 * \param [in] picDec picture buffer for un-filtered picture
     2317 * \param [out] picRest picture buffer for reconstructed picture
     2318 * \param [in] stride buffer stride size for 1-D pictrue memory
     2319 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2320 */
     2321Void TEncAdaptiveLoopFilter::reconstructOneAlfLCU(Int compIdx, AlfLCUInfo& alfLCUInfo, AlfUnitParam* alfUnitParam, Pel* picDec, Pel* picRest, Int stride, Int formatShift)
     2322{
     2323  ALFParam* alfParam = alfUnitParam->alfFiltParam;
     2324  Int ypos, xpos, height, width;
     2325
     2326  if( alfUnitParam->isEnabled)
     2327  {
     2328    assert(alfParam->alf_flag == 1);
     2329
     2330    //reconstruct ALF coefficients & related parameters
     2331    reconstructCoefInfo(compIdx, alfParam, m_filterCoeffSym, m_varIndTab);
     2332
     2333    //filtering process
     2334    for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2335    {
     2336      ypos    = (Int)(alfLCUInfo[n].posY   >> formatShift);
     2337      xpos    = (Int)(alfLCUInfo[n].posX   >> formatShift);
     2338      height  = (Int)(alfLCUInfo[n].height >> formatShift);
     2339      width   = (Int)(alfLCUInfo[n].width  >> formatShift);
     2340
     2341      filterOneCompRegion(picRest, picDec, stride, (compIdx!=ALF_Y), ypos, ypos+height, xpos, xpos+width, m_filterCoeffSym, m_varIndTab, m_varImg);
     2342    }
     2343  }
     2344  else
     2345  {
     2346    copyOneAlfLCU(alfLCUInfo, picRest, picDec, stride, formatShift);
     2347  }
     2348}
     2349
     2350/** LCU-based mode decision
     2351 * \param [in, out] alfParamSet ALF parameter set
     2352 * \param [in] compIdx luma/chroma component index
     2353 * \param [in] pOrg picture buffer for original picture
     2354 * \param [in] pDec picture buffer for un-filtered picture
     2355 * \param [out] pRest picture buffer for reconstructed picture
     2356 * \param [in] stride buffer stride size for 1-D pictrue memory
     2357 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2358 * \param [in] alfCorrLCUs correlations for LCUs
     2359 */
     2360#if HHI_INTERVIEW_SKIP
     2361Void TEncAdaptiveLoopFilter::executeLCUBasedModeDecision(AlfParamSet* alfParamSet
     2362                                                        ,Int compIdx, Pel* pOrg, Pel* pDec, Pel* pRest, Pel* pUsed, Int stride, Int formatShift
     2363                                                        ,AlfCorrData** alfCorrLCUs
     2364                                                        )
     2365#else
     2366Void TEncAdaptiveLoopFilter::executeLCUBasedModeDecision(AlfParamSet* alfParamSet
     2367                                                        ,Int compIdx, Pel* pOrg, Pel* pDec, Pel* pRest, Int stride, Int formatShift
     2368                                                        ,AlfCorrData** alfCorrLCUs
     2369                                                        )
     2370#endif
     2371{
     2372  Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma);
     2373  static Int* isProcessed = NULL;
     2374
     2375  AlfUnitParam* alfUnitPic = m_alfPicFiltUnits[compIdx];
     2376
     2377  Int64  distEnc, distOff;
     2378  Int    rateEnc, rateOff;
     2379  Double costEnc, costOff;
     2380  Bool isLeftUnitAvailable, isUpUnitAvailable;
     2381
     2382  isProcessed = new Int[m_uiNumCUsInFrame];
     2383  ::memset(isProcessed, 0, sizeof(Int)*m_uiNumCUsInFrame);
     2384
     2385#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2386  Int numProcessedLCU = 0;
     2387  m_alfFiltBudgetPerLcu = (Double)(m_iALFMaxNumberFilters) / (Double)(m_uiNumCUsInFrame);
     2388  m_alfUsedFilterNum = 0;
     2389#endif
     2390
     2391  for(Int s=0; s<= m_lastSliceIdx; s++)
     2392  {
     2393    if(!m_pcPic->getValidSlice(s))
     2394    {
     2395      continue;
     2396    }
     2397    Bool isAcrossSlice = (m_alfCoefInSlice)?(!m_isNonCrossSlice):(true);
     2398    Int  numLCUWidth   = alfParamSet[s].numLCUInWidth;
     2399
     2400    AlfUnitParam* alfSliceUnitParams = alfParamSet[s].alfUnitParam[compIdx];
     2401    std::vector<ALFParam*> storedFilters;
     2402    storedFilters.clear(); //reset stored filter buffer at the slice beginning
     2403
     2404    Int u =0; //counter for LCU index in slice
     2405    Int countFiltOffLCU = 0; //counter for number of LCU with filter-off mode
     2406    Int countNewFilts = 0; //counter for number of LCU with new filter inside slice
     2407
     2408    Int numTilesInSlice = (Int)m_pvpSliceTileAlfLCU[s].size();
     2409    for(Int t=0; t< numTilesInSlice; t++)
     2410    {
     2411      std::vector<AlfLCUInfo*> & vpAlfLCU = m_pvpSliceTileAlfLCU[s][t];
     2412      Pel* pSrc = pDec;
     2413
     2414      if(m_bUseNonCrossALF)
     2415      {
     2416        pSrc = getPicBuf(m_pcSliceYuvTmp, compIdx);
     2417        copyRegion(vpAlfLCU, pSrc, pDec, stride, formatShift);
     2418        extendRegionBorder(vpAlfLCU, pSrc, stride, formatShift);
     2419      }
     2420
     2421      Int numLCUs = (Int)vpAlfLCU.size();
     2422      for(Int n=0; n< numLCUs; n++)
     2423      {
     2424        AlfLCUInfo*   alfLCU       = vpAlfLCU[n];                  //ALF LCU information
     2425        TComDataCU*   pcCU         = alfLCU->pcCU;
     2426        Int           addr         = pcCU->getAddr();              //real LCU addr
     2427        AlfUnitParam* alfUnitParam = &(alfSliceUnitParams[u]);
     2428
     2429        if(isProcessed[addr] == 0)
     2430        {
     2431          Int           maxNumFilter = (Int)NO_VAR_BINS;   
     2432
     2433#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2434          Bool          isOutOfFilterBudget = true;
     2435          Double        usedFiltBudget = (numProcessedLCU == 0) ? 0.0 : (Double)m_alfUsedFilterNum / (Double)(numProcessedLCU);
     2436          if ( (m_alfFiltBudgetPerLcu >= usedFiltBudget) && (m_alfUsedFilterNum < m_iALFMaxNumberFilters) )
     2437          {
     2438            isOutOfFilterBudget = false;
     2439            Int leftNumFilt = m_iALFMaxNumberFilters - m_alfUsedFilterNum;
     2440            Int avgNumFilt  = leftNumFilt / (m_uiNumCUsInFrame - numProcessedLCU) + 1 ;
     2441            maxNumFilter = (leftNumFilt < avgNumFilt) ? leftNumFilt : avgNumFilt ;
     2442          }
     2443#endif
     2444
     2445          AlfCorrData*  alfCorr      = alfCorrLCUs[addr];            //ALF LCU correlation
     2446          alfUnitParam->alfFiltParam = alfUnitPic[addr].alfFiltParam;
     2447
     2448          //mode decision
     2449          isLeftUnitAvailable = (   (addr % m_numLCUInPicWidth != 0) && (u != 0));
     2450          isUpUnitAvailable   = (((Int)(addr/m_numLCUInPicWidth) > 0) && ( ( (u - numLCUWidth) >= 0) || isAcrossSlice ));
     2451
     2452          decideLCUALFUnitParam(compIdx, alfUnitPic, u, addr, numLCUWidth, alfSliceUnitParams, alfCorr, storedFilters, maxNumFilter, lambda, isLeftUnitAvailable, isUpUnitAvailable);
     2453          reconstructOneAlfLCU(compIdx, *alfLCU, alfUnitParam, pSrc, pRest, stride, formatShift);
     2454#if HHI_INTERVIEW_SKIP
     2455          distEnc = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pRest, pUsed, stride, formatShift);
     2456#else
     2457          distEnc = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pRest, stride, formatShift);
     2458#endif
     2459          rateEnc = calculateAlfUnitRateRDO(alfUnitParam, (Int)storedFilters.size());
     2460          costEnc = (Double)distEnc + lambda*((Double)rateEnc);
     2461          costEnc += ((lambda* 1.5)*1.0);  //RDCO
     2462
     2463          //v.s. filter off case
     2464          AlfUnitParam alfUnitParamOff;
     2465          getFiltOffAlfUnitParam(&alfUnitParamOff, addr, alfUnitPic, isLeftUnitAvailable, isUpUnitAvailable);
     2466#if HHI_INTERVIEW_SKIP
     2467          distOff = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pSrc, pUsed, stride, formatShift);
     2468#else
     2469          distOff = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pSrc, stride, formatShift);
     2470#endif
     2471          rateOff = calculateAlfUnitRateRDO(&alfUnitParamOff, (Int)storedFilters.size());
     2472          costOff = (Double)distOff + lambda*((Double)rateOff);
     2473
     2474#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2475          if( (costOff < costEnc)  ||  isOutOfFilterBudget)
     2476#else
     2477          if( costOff < costEnc)
     2478#endif
     2479          {
     2480            //filter off. set alf_flag = 0, copy pDest to pRest
     2481            *alfUnitParam = alfUnitParamOff;
     2482            alfUnitParam->alfFiltParam->alf_flag = 0;
     2483            copyOneAlfLCU(*alfLCU, pRest, pSrc, stride, formatShift);
     2484          }
     2485
     2486          if(alfUnitParam->mergeType == ALF_MERGE_DISABLED)
     2487          {
     2488            if(alfUnitParam->isEnabled)
     2489            {
     2490              if(alfUnitParam->isNewFilt)
     2491              {
     2492                //update stored filter buffer
     2493                storedFilters.push_back(alfUnitParam->alfFiltParam);
     2494                assert(alfUnitParam->alfFiltParam->alf_flag == 1);
     2495              }
     2496            }
     2497          }
     2498
     2499          alfUnitPic[addr] = *alfUnitParam;
     2500
     2501          isProcessed[addr] = 1;
     2502
     2503#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2504          numProcessedLCU++;
     2505          if(alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt)
     2506          {
     2507            m_alfUsedFilterNum += alfUnitParam->alfFiltParam->filters_per_group;
     2508          }
     2509#endif
     2510        }
     2511        else
     2512        {
     2513          //keep the ALF parameters in LCU are the same
     2514          *alfUnitParam = alfUnitPic[addr];
     2515          reconstructOneAlfLCU(compIdx, *alfLCU, alfUnitParam, pSrc, pRest, stride, formatShift);
     2516
     2517#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2518          if(alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt)
     2519          {
     2520            m_alfUsedFilterNum += alfUnitParam->alfFiltParam->filters_per_group;
     2521          }
     2522#endif
     2523        }
     2524
     2525        if(alfUnitParam->alfFiltParam->alf_flag == 0)
     2526        {
     2527          countFiltOffLCU++;
     2528        }
     2529        else
     2530        {
     2531          Bool isNewFiltInSlice =   (alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt);
     2532          Bool isMergeAcrossSlice = ( alfUnitParam->mergeType == ALF_MERGE_UP && (u-numLCUWidth < 0) );
     2533
     2534          if( isNewFiltInSlice || isMergeAcrossSlice )
     2535          {
     2536            countNewFilts++;
     2537          }
     2538        }
     2539
     2540        u++;     
     2541
     2542      } //LCU
     2543    } //tile
     2544
     2545
     2546    //slice-level parameters
     2547    AlfUnitParam* firstAlfUnitInSlice = &(alfSliceUnitParams[0]);
     2548    if( countFiltOffLCU == u ) //number of filter-off LCU is equal to the number of LCUs in slice
     2549    {
     2550      alfParamSet[s].isEnabled [compIdx] = false;   
     2551      alfParamSet[s].isUniParam[compIdx] = true; //uni-param, all off
     2552      assert(firstAlfUnitInSlice->alfFiltParam->alf_flag == 0);
     2553    }
     2554    else
     2555    {
     2556      alfParamSet[s].isEnabled[compIdx] = true;
     2557      if( countNewFilts == 1 && firstAlfUnitInSlice->alfFiltParam->alf_flag != 0 && countFiltOffLCU == 0 )
     2558      {
     2559        alfParamSet[s].isUniParam[compIdx] = true;
     2560      }
     2561      else
     2562      {
     2563        alfParamSet[s].isUniParam[compIdx] = false;
     2564      }
     2565    }
     2566  } //slice
     2567
     2568
     2569  delete[] isProcessed;
     2570  isProcessed = NULL;
     2571}
     2572
     2573
     2574/** Decide ALF parameter set for luma/chroma components (top function)
     2575 * \param [in] pPicOrg picture buffer for original picture
     2576 * \param [in] pPicDec picture buffer for un-filtered picture
     2577 * \param [out] pPicRest picture buffer for reconstructed picture
     2578 * \param [in, out] alfParamSet ALF parameter set
     2579 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     2580 */
     2581#if HHI_INTERVIEW_SKIP
     2582Void TEncAdaptiveLoopFilter::decideParameters(TComPicYuv* pPicOrg, TComPicYuv* pPicDec, TComPicYuv* pPicRest, TComPicYuv* pUsedPelMap
     2583                                            , AlfParamSet* alfParamSet
     2584                                            , std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     2585#else
     2586Void TEncAdaptiveLoopFilter::decideParameters(TComPicYuv* pPicOrg, TComPicYuv* pPicDec, TComPicYuv* pPicRest
     2587                                            , AlfParamSet* alfParamSet
     2588                                            , std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     2589#endif
     2590{
     2591  static Int lumaStride        = pPicOrg->getStride();
     2592  static Int chromaStride      = pPicOrg->getCStride();
     2593
     2594  Pel *pOrg, *pDec, *pRest;
     2595  Int stride, formatShift;
     2596#if HHI_INTERVIEW_SKIP
     2597  Pel *pUsed = NULL ;
     2598#endif
     2599
     2600  for(Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     2601  {
     2602    pOrg        = getPicBuf(pPicOrg, compIdx);
     2603    pDec        = getPicBuf(pPicDec, compIdx);
     2604    pRest       = getPicBuf(pPicRest, compIdx);
     2605#if HHI_INTERVIEW_SKIP
     2606    if( pUsedPelMap )
     2607    {
     2608      pUsed        = getPicBuf(pUsedPelMap, compIdx);
     2609    }
     2610#endif
     2611    stride      = (compIdx == ALF_Y)?(lumaStride):(chromaStride);
     2612    formatShift = (compIdx == ALF_Y)?(0):(1);
     2613
     2614    AlfCorrData** alfCorrComp     = m_alfCorr[compIdx];
     2615
     2616    if(!m_picBasedALFEncode) //lcu-based optimization
     2617    {
     2618#if HHI_INTERVIEW_SKIP
     2619      executeLCUBasedModeDecision(alfParamSet, compIdx, pOrg, pDec, pRest, pUsed, stride, formatShift, alfCorrComp);
     2620#else
     2621      executeLCUBasedModeDecision(alfParamSet, compIdx, pOrg, pDec, pRest, stride, formatShift, alfCorrComp);
     2622#endif
     2623    }
     2624    else //picture-based optimization
     2625    {
     2626      AlfPicQTPart* alfPicQTPart = m_alfPQTPart[compIdx];
     2627#if HHI_INTERVIEW_SKIP
     2628      executePicBasedModeDecision(alfParamSet, alfPicQTPart, compIdx, pOrg, pDec, pRest, pUsed, stride, formatShift, alfCorrComp);
     2629#else
     2630      executePicBasedModeDecision(alfParamSet, alfPicQTPart, compIdx, pOrg, pDec, pRest, stride, formatShift, alfCorrComp);
     2631#endif
     2632    } 
     2633
     2634  } //component
     2635
     2636}
     2637
     2638/** Gather correlations for all LCUs in picture
     2639 * \param [in] pPicOrg picture buffer for original picture
     2640 * \param [in] pPicDec picture buffer for un-filtered picture
     2641 */
     2642Void TEncAdaptiveLoopFilter::getStatistics(TComPicYuv* pPicOrg, TComPicYuv* pPicDec)
     2643{
     2644  Int lumaStride   = pPicOrg->getStride();
     2645  Int chromaStride = pPicOrg->getCStride();
     2646  const  Int chromaFormatShift = 1;
     2647
     2648  //calculate BA index
     2649  calcOneRegionVar(m_varImg, getPicBuf(pPicDec, ALF_Y), lumaStride, false, 0, m_img_height, 0, m_img_width);
     2650  for(Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     2651  {
     2652    AlfCorrData** alfCorrComp = m_alfCorr[compIdx];
     2653    Int          formatShift = (compIdx == ALF_Y)?(0):(chromaFormatShift);
     2654    Int          stride      = (compIdx == ALF_Y)?(lumaStride):(chromaStride);
     2655
     2656    getOneCompStatistics(alfCorrComp, compIdx, getPicBuf(pPicOrg, compIdx), getPicBuf(pPicDec, compIdx), stride, formatShift, false);
     2657  }
     2658}
     2659
     2660/** Gather correlations for all LCUs of one luma/chroma component in picture
     2661 * \param [out] alfCorrComp correlations for LCUs
     2662 * \param [in] compIdx luma/chroma component index
     2663 * \param [in] imgOrg picture buffer for original picture
     2664 * \param [in] imgDec picture buffer for un-filtered picture
     2665 * \param [in] stride buffer stride size for 1-D pictrue memory
     2666 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2667 * \param [in] isRedesignPhase at re-design filter stage (true) or not (false)
     2668 */
     2669Void TEncAdaptiveLoopFilter::getOneCompStatistics(AlfCorrData** alfCorrComp, Int compIdx, Pel* imgOrg, Pel* imgDec, Int stride, Int formatShift, Bool isRedesignPhase)
     2670{
     2671
     2672  // initialize to zero
     2673  for(Int n=0; n< m_uiNumCUsInFrame; n++)
     2674  {
     2675    alfCorrComp[n]->reset();
     2676  }
     2677
     2678  for(Int s=0; s<= m_lastSliceIdx; s++)
     2679  {
     2680    if(!m_pcPic->getValidSlice(s))
     2681    {
     2682      continue;
     2683    }
     2684    Int numTilesInSlice = (Int)m_pvpSliceTileAlfLCU[s].size();
     2685    for(Int t=0; t< numTilesInSlice; t++)
     2686    {
     2687      std::vector<AlfLCUInfo*> & vpAlfLCU = m_pvpSliceTileAlfLCU[s][t];
     2688      Pel* pSrc = imgDec;
     2689
     2690      if(m_bUseNonCrossALF)
     2691      {
     2692        pSrc = getPicBuf(m_pcSliceYuvTmp, compIdx);
     2693        copyRegion(vpAlfLCU, pSrc, imgDec, stride, formatShift);
     2694        extendRegionBorder(vpAlfLCU, pSrc, stride, formatShift);
     2695      }
     2696
     2697      Int numLCUs = (Int)vpAlfLCU.size();
     2698      for(Int n=0; n< numLCUs; n++)
     2699      {
     2700        AlfLCUInfo* alfLCU = vpAlfLCU[n];
     2701        Int addr = alfLCU->pcCU->getAddr();
     2702        getStatisticsOneLCU(!m_picBasedALFEncode, compIdx, alfLCU, alfCorrComp[addr], imgOrg, pSrc, stride, formatShift, isRedesignPhase);
     2703      } //LCU
     2704    } //tile
     2705  } //slice
     2706
     2707}
     2708
     2709/** Gather correlations for one LCU
     2710 * \param [out] alfCorrComp correlations for LCUs
     2711 * \param [in] compIdx luma/chroma component index
     2712 * \param [in] imgOrg picture buffer for original picture
     2713 * \param [in] imgDec picture buffer for un-filtered picture
     2714 * \param [in] stride buffer stride size for 1-D pictrue memory
     2715 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2716 * \param [in] isRedesignPhase at re-design filter stage (true) or not (false)
     2717 */
     2718Void TEncAdaptiveLoopFilter::getStatisticsOneLCU(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo* alfLCU, AlfCorrData* alfCorr, Pel* pPicOrg, Pel* pPicSrc, Int stride, Int formatShift, Bool isRedesignPhase)
     2719{
     2720  Int numBlocks = alfLCU->numSGU;
     2721#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2722  Int  lcuAddr = alfLCU->pcCU->getAddr();
     2723  Bool notSkipLinesBelowVB = true;
     2724  Int  endypos;
     2725#endif
     2726  Bool isLastBlock;
     2727  Int ypos, xpos, height, width;
     2728
     2729#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2730  if(skipLCUBottomLines)
     2731  {
     2732    if(lcuAddr + m_numLCUInPicWidth < m_uiNumCUsInFrame)
     2733    {
     2734      notSkipLinesBelowVB = false;
     2735    }
     2736  }
     2737#endif
     2738
     2739  switch(compIdx)
     2740  {
     2741  case ALF_Cb:
     2742  case ALF_Cr:
     2743    {
     2744      for(Int n=0; n< numBlocks; n++)
     2745      {
     2746        isLastBlock = (n== numBlocks-1);
     2747        NDBFBlockInfo& AlfSGU = (*alfLCU)[n];
     2748
     2749        ypos   = (Int)(AlfSGU.posY  >> formatShift);
     2750        xpos   = (Int)(AlfSGU.posX  >> formatShift);
     2751        height = (Int)(AlfSGU.height>> formatShift);
     2752        width  = (Int)(AlfSGU.width >> formatShift);
     2753
     2754#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2755        if(!notSkipLinesBelowVB )
     2756        {
     2757          endypos = ypos+ height -1;
     2758          Int iLineVBPos = m_lcuHeightChroma - 2;
     2759          Int yEndLineInLCU = endypos % m_lcuHeightChroma;
     2760          height = (yEndLineInLCU >= iLineVBPos) ? (height - 2) : height ;
     2761        }
     2762#endif
     2763
     2764#if ALF_SINGLE_FILTER_SHAPE
     2765        calcCorrOneCompRegionChma(pPicOrg, pPicSrc, stride, ypos, xpos, height, width, alfCorr->ECorr[0], alfCorr->yCorr[0], isLastBlock);
     2766#endif
     2767      }
     2768    }
     2769    break;
     2770  case ALF_Y:
     2771    {
     2772      Bool forceCollection = true;
     2773
     2774      if(isRedesignPhase)
     2775      {
     2776        Int numValidPels = 0;
     2777        for(Int n=0; n< numBlocks; n++)
     2778        {
     2779          NDBFBlockInfo& AlfSGU = (*alfLCU)[n];
     2780
     2781          ypos   = (Int)(AlfSGU.posY  );
     2782          xpos   = (Int)(AlfSGU.posX  );
     2783          height = (Int)(AlfSGU.height);
     2784          width  = (Int)(AlfSGU.width );
     2785
     2786          for (Int y = ypos; y < ypos+ height; y++)
     2787          {
     2788            for (Int x = xpos; x < xpos + width; x++)
     2789            {
     2790              if (m_maskImg[y][x] == 1)
     2791              {
     2792                numValidPels++;
     2793              }
     2794            }
     2795          }
     2796        }
     2797
     2798        if(numValidPels > 0)
     2799        {
     2800          forceCollection = false;
     2801        }
     2802      }
     2803
     2804      for(Int n=0; n< numBlocks; n++)
     2805      {
     2806        isLastBlock = (n== numBlocks-1);
     2807        NDBFBlockInfo& AlfSGU = (*alfLCU)[n];
     2808
     2809        ypos   = (Int)(AlfSGU.posY  );
     2810        xpos   = (Int)(AlfSGU.posX  );
     2811        height = (Int)(AlfSGU.height);
     2812        width  = (Int)(AlfSGU.width );
     2813
     2814#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2815        endypos = ypos+ height -1;
     2816        if(!notSkipLinesBelowVB)
     2817        {
     2818          Int iLineVBPos = m_lcuHeight - 4;
     2819          Int yEndLineInLCU = endypos % m_lcuHeight;
     2820          height = (yEndLineInLCU >= iLineVBPos) ? (height - 4) : height ;
     2821        }
     2822#endif
     2823
     2824#if ALF_SINGLE_FILTER_SHAPE
     2825        calcCorrOneCompRegionLuma(pPicOrg, pPicSrc, stride, ypos, xpos, height, width, alfCorr->ECorr, alfCorr->yCorr, alfCorr->pixAcc, forceCollection, isLastBlock);
     2826#endif       
     2827      }
     2828    }
     2829    break;
     2830  default:
     2831    {
     2832      printf("Not a legal component index for ALF\n");
     2833      assert(0);
     2834      exit(-1);
     2835    }
     2836  }
     2837}
     2838
     2839
     2840#if ALF_SINGLE_FILTER_SHAPE
     2841/** Gather correlations for one region for chroma component
     2842 * \param [in] imgOrg picture buffer for original picture
     2843 * \param [in] imgPad picture buffer for un-filtered picture
     2844 * \param [in] stride buffer stride size for 1-D pictrue memory
     2845 * \param [in] yPos region starting y position
     2846 * \param [in] xPos region starting x position
     2847 * \param [in] height region height
     2848 * \param [in] width region width
     2849 * \param [out] eCorr auto-correlation matrix
     2850 * \param [out] yCorr cross-correlation array
     2851 * \param [in] isSymmCopyBlockMatrix symmetrically copy correlation values in eCorr (true) or not (false)
     2852 */
     2853Void TEncAdaptiveLoopFilter::calcCorrOneCompRegionChma(Pel* imgOrg, Pel* imgPad, Int stride
     2854                                                     , Int yPos, Int xPos, Int height, Int width
     2855                                                     , Double **eCorr, Double *yCorr, Bool isSymmCopyBlockMatrix
     2856                                                      )
     2857{
     2858  Int yPosEnd = yPos + height;
     2859  Int xPosEnd = xPos + width;
     2860  Int N = ALF_MAX_NUM_COEF; //m_sqrFiltLengthTab[0];
     2861
     2862  Int imgHeightChroma = m_img_height>>1;
     2863
     2864  Int yLineInLCU, paddingLine;
     2865  Int ELocal[ALF_MAX_NUM_COEF];
     2866  Pel *imgPad1, *imgPad2, *imgPad3, *imgPad4, *imgPad5, *imgPad6;
     2867  Int i, j, k, l, yLocal;
     2868
     2869  imgPad += (yPos*stride);
     2870  imgOrg += (yPos*stride);
     2871
     2872  for (i= yPos; i< yPosEnd; i++)
     2873  {
     2874    yLineInLCU = i % m_lcuHeightChroma;
     2875
     2876    if (yLineInLCU==0 && i>0)
     2877    {
     2878      paddingLine = yLineInLCU + 2 ;
     2879      imgPad1 = imgPad + stride;
     2880      imgPad2 = imgPad - stride;
     2881      imgPad3 = imgPad + 2*stride;
     2882      imgPad4 = imgPad - 2*stride;
     2883      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride;
     2884      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;;
     2885    }
     2886    else if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma )
     2887    {
     2888      imgPad1 = imgPad + stride;
     2889      imgPad2 = imgPad - stride;
     2890      imgPad3 = imgPad + 2*stride;
     2891      imgPad4 = imgPad - 2*stride;
     2892      imgPad5 = imgPad + 3*stride;
     2893      imgPad6 = imgPad - 3*stride;
     2894    }
     2895    else if (yLineInLCU < m_lineIdxPadTopChroma)
     2896    {
     2897      paddingLine = - yLineInLCU + m_lineIdxPadTopChroma - 1;
     2898      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + min(paddingLine, 1)*stride;
     2899      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - stride;
     2900      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + min(paddingLine, 2)*stride;
     2901      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - 2*stride;
     2902      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + min(paddingLine, 3)*stride;
     2903      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - 3*stride;
     2904    }
     2905    else
     2906    {
     2907      paddingLine = yLineInLCU - m_lineIdxPadTopChroma ;
     2908      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + stride;
     2909      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - min(paddingLine, 1)*stride;
     2910      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + 2*stride;
     2911      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - min(paddingLine, 2)*stride;
     2912      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride;
     2913      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;
     2914    }
     2915
     2916    for (j= xPos; j< xPosEnd; j++)
     2917    {
     2918      memset(ELocal, 0, N*sizeof(Int));
     2919
     2920      ELocal[0] = (imgPad5[j] + imgPad6[j]);
     2921
     2922      ELocal[1] = (imgPad3[j] + imgPad4[j]);
     2923
     2924      ELocal[2] = (imgPad1[j-1] + imgPad2[j+1]);
     2925      ELocal[3] = (imgPad1[j  ] + imgPad2[j  ]);
     2926      ELocal[4] = (imgPad1[j+1] + imgPad2[j-1]);
     2927
     2928      ELocal[5] = (imgPad[j+4] + imgPad[j-4]);
     2929      ELocal[6] = (imgPad[j+3] + imgPad[j-3]);
     2930      ELocal[7] = (imgPad[j+2] + imgPad[j-2]);
     2931      ELocal[8] = (imgPad[j+1] + imgPad[j-1]);
     2932      ELocal[9] = (imgPad[j  ]);
     2933
     2934      yLocal= (Int)imgOrg[j];
     2935
     2936      for(k=0; k<N; k++)
     2937      {
     2938        eCorr[k][k] += ELocal[k]*ELocal[k];
     2939        for(l=k+1; l<N; l++)
     2940        {
     2941          eCorr[k][l] += ELocal[k]*ELocal[l];
     2942        }
     2943
     2944        yCorr[k] += yLocal*ELocal[k];
     2945      }
     2946    }
     2947
     2948    imgPad+= stride;
     2949    imgOrg+= stride;
     2950  }
     2951
     2952  if(isSymmCopyBlockMatrix)
     2953  {
     2954    for(j=0; j<N-1; j++)
     2955    {
     2956      for(i=j+1; i<N; i++)
     2957      {
     2958        eCorr[i][j] = eCorr[j][i];
     2959      }
     2960    }
     2961  }
     2962}
     2963
     2964/** Gather correlations for one region for luma component
     2965 * \param [in] imgOrg picture buffer for original picture
     2966 * \param [in] imgPad picture buffer for un-filtered picture
     2967 * \param [in] stride buffer stride size for 1-D pictrue memory
     2968 * \param [in] yPos region starting y position
     2969 * \param [in] xPos region starting x position
     2970 * \param [in] height region height
     2971 * \param [in] width region width
     2972 * \param [out] eCorr auto-correlation matrix
     2973 * \param [out] yCorr cross-correlation array
     2974 * \param [out] pixAcc pixel squared value
     2975 * \param [in] isforceCollection all pixel are used for correlation calculation (true) or not (false)
     2976 * \param [in] isSymmCopyBlockMatrix symmetrically copy correlation values in eCorr (true) or not (false)
     2977 */
     2978Void TEncAdaptiveLoopFilter::calcCorrOneCompRegionLuma(Pel* imgOrg, Pel* imgPad, Int stride
     2979                                                      ,Int yPos, Int xPos, Int height, Int width
     2980                                                      ,Double ***eCorr, Double **yCorr, Double *pixAcc
     2981                                                      ,Bool isforceCollection, Bool isSymmCopyBlockMatrix
     2982                                                      )
     2983{
     2984  Int yPosEnd = yPos + height;
     2985  Int xPosEnd = xPos + width;
     2986  Int yLineInLCU;
     2987  Int paddingLine ;
     2988  Int N = ALF_MAX_NUM_COEF; //m_sqrFiltLengthTab[0];
     2989
     2990  Int ELocal[ALF_MAX_NUM_COEF];
     2991  Pel *imgPad1, *imgPad2, *imgPad3, *imgPad4, *imgPad5, *imgPad6;
     2992  Int i, j, k, l, yLocal, varInd;
     2993  Double **E;
     2994  Double *yy;
     2995
     2996  imgPad += (yPos*stride);
     2997  imgOrg += (yPos*stride);
     2998
     2999  for (i= yPos; i< yPosEnd; i++)
     3000  {
     3001    yLineInLCU = i % m_lcuHeight;
     3002
     3003    if (yLineInLCU<m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height)
     3004    {
     3005      imgPad1 = imgPad + stride;
     3006      imgPad2 = imgPad - stride;
     3007      imgPad3 = imgPad + 2*stride;
     3008      imgPad4 = imgPad - 2*stride;
     3009      imgPad5 = imgPad + 3*stride;
     3010      imgPad6 = imgPad - 3*stride;
     3011    }
     3012    else if (yLineInLCU<m_lineIdxPadTop)
     3013    {
     3014      paddingLine = - yLineInLCU + m_lineIdxPadTop - 1;
     3015      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + min(paddingLine, 1)*stride;
     3016      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - stride;
     3017      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + min(paddingLine, 2)*stride;
     3018      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - 2*stride;
     3019      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + min(paddingLine, 3)*stride;
     3020      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - 3*stride;
     3021    }
     3022    else
     3023    {
     3024      paddingLine = yLineInLCU - m_lineIdxPadTop;
     3025      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + stride;
     3026      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - min(paddingLine, 1)*stride;
     3027      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + 2*stride;
     3028      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - min(paddingLine, 2)*stride;
     3029      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride;
     3030      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;
     3031    }         
     3032
     3033    for (j= xPos; j< xPosEnd; j++)
     3034    {
     3035      if ( m_maskImg[i][j] || isforceCollection )
     3036      {
     3037        varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W];
     3038        memset(ELocal, 0, N*sizeof(Int));
     3039
     3040        ELocal[0] = (imgPad5[j] + imgPad6[j]);
     3041        ELocal[1] = (imgPad3[j] + imgPad4[j]);
     3042
     3043        ELocal[2] = (imgPad1[j-1] + imgPad2[j+1]);
     3044        ELocal[3] = (imgPad1[j  ] + imgPad2[j  ]);
     3045        ELocal[4] = (imgPad1[j+1] + imgPad2[j-1]);
     3046
     3047        ELocal[5] = (imgPad[j+4] + imgPad[j-4]);
     3048        ELocal[6] = (imgPad[j+3] + imgPad[j-3]);
     3049        ELocal[7] = (imgPad[j+2] + imgPad[j-2]);
     3050        ELocal[8] = (imgPad[j+1] + imgPad[j-1]);
     3051        ELocal[9] = (imgPad[j  ]);
     3052
     3053        yLocal= imgOrg[j];
     3054        pixAcc[varInd] += (yLocal*yLocal);
     3055        E  = eCorr[varInd];
     3056        yy = yCorr[varInd];
     3057
     3058        for (k=0; k<N; k++)
     3059        {
     3060          for (l=k; l<N; l++)
     3061          {
     3062            E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     3063          }
     3064          yy[k]+=(double)(ELocal[k]*yLocal);
     3065        }
     3066      }
     3067    }
     3068    imgPad += stride;
     3069    imgOrg += stride;
     3070  }
     3071
     3072  if(isSymmCopyBlockMatrix)
     3073  {
     3074    for (varInd=0; varInd<NO_VAR_BINS; varInd++)
     3075    {
     3076      E = eCorr[varInd];
     3077      for (k=1; k<N; k++)
     3078      {
     3079        for (l=0; l<k; l++)
     3080        {
     3081          E[k][l] = E[l][k];
     3082        }
     3083      }
     3084    }
     3085  }
     3086
     3087}
     3088#endif
     3089
     3090#else
     3091
     3092
     3093#if ALF_CHROMA_LAMBDA 
     3094/**
     3095 \param pcAlfParam           ALF parameter
     3096 \param [out] pvAlfCtrlParam ALF CU control parameters container for slices
     3097 \param dLambdaLuma          luma lambda value for RD cost computation
     3098 \param dLambdaChroma        chroma lambda value for RD cost computation
     3099 \retval ruiDist             distortion
     3100 \retval ruiBits             required bits
     3101 \retval ruiMaxAlfCtrlDepth  optimal partition depth
     3102 */
     3103#if HHI_INTERVIEW_SKIP
     3104Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambdaLuma, Double dLambdaChroma, UInt64& ruiDist, UInt64& ruiBits, Bool bInterviewSkip)
     3105#else
     3106Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambdaLuma, Double dLambdaChroma, UInt64& ruiDist, UInt64& ruiBits)
     3107
     3108#endif
     3109#else
    3533110/**
    3543111 \param pcAlfParam           ALF parameter
     
    3583115 \retval ruiMaxAlfCtrlDepth  optimal partition depth
    3593116 */
    360 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, UInt& ruiMaxAlfCtrlDepth )
    361 {
    362   Int tap, num_coef;
    363  
    364   // set global variables
    365   tap         = ALF_MAX_NUM_TAP;
    366 #if TI_ALF_MAX_VSIZE_7
    367   Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap);
    368   num_coef = (tap * tapV + 1) >> 1;
    369 #else
    370   num_coef    = (tap*tap+1)>>1;
    371 #endif
    372   num_coef    = num_coef + 1; // DC offset
     3117#if HHI_INTERVIEW_SKIP
     3118Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, Bool bInterviewSkip)
     3119#else
     3120Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits)
     3121
     3122#endif
     3123#endif
     3124{
    3733125 
    3743126  // set lambda
     3127#if ALF_CHROMA_LAMBDA 
     3128  m_dLambdaLuma   = dLambdaLuma;
     3129  m_dLambdaChroma = dLambdaChroma;
     3130#else
    3753131  m_dLambdaLuma   = dLambda;
    3763132  m_dLambdaChroma = dLambda;
    377  
     3133#endif
     3134
     3135  m_lcuHeight = m_pcPic->getSlice(0)->getSPS()->getMaxCUHeight();
     3136
     3137#if ALF_SINGLE_FILTER_SHAPE
     3138  m_lineIdxPadBot = m_lcuHeight - 4 - 3; // DFRegion, Vertical Taps
     3139#else
     3140  m_lineIdxPadBot = m_lcuHeight - 4 - 4; // DFRegion, Vertical Taps
     3141#endif
     3142  m_lineIdxPadTop = m_lcuHeight - 4; // DFRegion
     3143
     3144  m_lcuHeightChroma = m_lcuHeight>>1;
     3145#if ALF_SINGLE_FILTER_SHAPE
     3146  m_lineIdxPadBotChroma = m_lcuHeightChroma - 2 - 3; // DFRegion, Vertical Taps
     3147#else
     3148  m_lineIdxPadBotChroma = m_lcuHeightChroma - 2 - 4; // DFRegion, Vertical Taps
     3149#endif
     3150  m_lineIdxPadTopChroma = m_lcuHeightChroma - 2 ; // DFRegion
     3151
    3783152  TComPicYuv* pcPicOrg = m_pcPic->getPicYuvOrg();
    3793153 
     
    3813155  TComPicYuv* pcPicYuvRec    = m_pcPic->getPicYuvRec();
    3823156  TComPicYuv* pcPicYuvExtRec = m_pcTempPicYuv;
     3157#if HHI_INTERVIEW_SKIP
     3158  TComPicYuv* pcUsedPelMap   = m_pcPic->getUsedPelsMap() ;
     3159  if(bInterviewSkip)
     3160    assert( pcUsedPelMap ) ;
     3161#endif
    3833162 
    3843163  pcPicYuvRec->copyToPic(pcPicYuvExtRec);
    385 #if MTK_NONCROSS_INLOOP_FILTER
    3863164  if(!m_bUseNonCrossALF)
    3873165  {
    388 #endif 
    3893166  pcPicYuvExtRec->setBorderExtension( false );
    3903167  pcPicYuvExtRec->extendPicBorder   ();
    391 #if MTK_NONCROSS_INLOOP_FILTER
    392   }
    393 #endif 
     3168  }
    3943169 
    3953170  // set min cost
     
    4033178 
    4043179  // calc original cost
     3180#if HHI_INTERVIEW_SKIP
     3181  xCalcRDCost( pcPicOrg, pcPicYuvRec, pcUsedPelMap, NULL, uiOrigRate, uiOrigDist, dOrigCost );
     3182#else
    4053183  xCalcRDCost( pcPicOrg, pcPicYuvRec, NULL, uiOrigRate, uiOrigDist, dOrigCost );
     3184#endif
    4063185  m_pcBestAlfParam->alf_flag = 0;
    407   m_pcBestAlfParam->cu_control_flag = 0;
    408  
    4093186  // initialize temp_alfps
    4103187  m_pcTempAlfParam->alf_flag        = 1;
    411   m_pcTempAlfParam->tap             = tap;
    412 #if TI_ALF_MAX_VSIZE_7
    413   m_pcTempAlfParam->tapV            = tapV;
    414 #endif
    415   m_pcTempAlfParam->num_coeff       = num_coef;
    4163188  m_pcTempAlfParam->chroma_idc      = 0;
    417   m_pcTempAlfParam->cu_control_flag = 0;
    418  
    419 #if MQT_ALF_NPASS
     3189
     3190  m_bAlfCUCtrlEnabled = (pvAlfCtrlParam != NULL)?true:false;
     3191  if(m_bAlfCUCtrlEnabled)
     3192  {
     3193    m_vBestAlfCUCtrlParam.resize(m_uiNumSlicesInPic);
     3194    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3195    {
     3196      m_vBestAlfCUCtrlParam[s].cu_control_flag = 0;
     3197    }
     3198  }
     3199  else
     3200  {
     3201    m_vBestAlfCUCtrlParam.clear();
     3202  }
     3203
    4203204  setALFEncodingParam(m_pcPic);
    421 #endif
    4223205
    4233206  // adaptive in-loop wiener filtering
    424   xEncALFLuma_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
    425  
     3207#if HHI_INTERVIEW_SKIP
     3208  xEncALFLuma( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost, bInterviewSkip );
     3209#else
     3210  xEncALFLuma( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
     3211#endif
     3212
    4263213  // cu-based filter on/off control
     3214#if HHI_INTERVIEW_SKIP
     3215  xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, pcUsedPelMap, uiMinRate, uiMinDist, dMinCost );
     3216#else
    4273217  xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
    428  
    429   // adaptive tap-length
    430   xFilterTapDecision_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
    431  
    432   // compute RD cost
    433   xCalcRDCost( pcPicOrg, pcPicYuvRec, m_pcBestAlfParam, uiMinRate, uiMinDist, dMinCost );
     3218#endif
    4343219 
    4353220  // compare RD cost to non-ALF case
     
    4443229  {
    4453230    m_pcBestAlfParam->alf_flag        = 0;
    446     m_pcBestAlfParam->cu_control_flag = 0;
    447    
     3231
    4483232    uiMinRate = uiOrigRate;
    4493233    uiMinDist = uiOrigDist;
    450     dMinCost = dMinCost;
    4513234   
    4523235    m_pcEntropyCoder->setAlfCtrl(false);
     3236    if(m_bAlfCUCtrlEnabled)
     3237    {
     3238      for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3239      {
     3240        m_vBestAlfCUCtrlParam[s].cu_control_flag = 0;
     3241      }
     3242    }
    4533243    pcPicYuvExtRec->copyToPicLuma(pcPicYuvRec);
    4543244   
     
    4563246    ruiDist = uiOrigDist;
    4573247  }
    458  
    4593248  // if ALF works
    4603249  if( m_pcBestAlfParam->alf_flag )
    4613250  {
    462     // predict ALF coefficients
    463     predictALFCoeff( m_pcBestAlfParam );
    464    
    4653251    // do additional ALF process for chroma
    466     xEncALFChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits );
     3252    xFilterTapDecisionChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits );
    4673253  }
    4683254 
     
    4703256  copyALFParam(pcAlfParam, m_pcBestAlfParam);
    4713257 
    472   // store best depth
    473   ruiMaxAlfCtrlDepth = m_pcEntropyCoder->getMaxAlfCtrlDepth();
     3258  if(m_bAlfCUCtrlEnabled)
     3259  {
     3260    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3261    {
     3262      (*pvAlfCtrlParam)[s]= m_vBestAlfCUCtrlParam[s];
     3263    }
     3264  }
     3265}
     3266#endif
     3267
     3268/** PCM LF disable process.
     3269 * \param pcPic picture (TComPic) pointer
     3270 * \returns Void
     3271 *
     3272 * \note Replace filtered sample values of PCM mode blocks with the transmitted and reconstructed ones.
     3273 */
     3274Void TEncAdaptiveLoopFilter::PCMLFDisableProcess (TComPic* pcPic)
     3275{
     3276  xPCMRestoration(pcPic);
    4743277}
    4753278
     
    4783281// ====================================================================================================================
    4793282
    480 Void TEncAdaptiveLoopFilter::xEncALFChroma( UInt64 uiLumaRate, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, UInt64& ruiBits )
    481 {
    482   // restriction for non-referenced B-slice
    483   if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0)
    484   {
    485     return;
    486   }
    487  
    488   Int tap, num_coef;
    489  
    490   // set global variables
    491   tap         = ALF_MAX_NUM_TAP_C;
    492   num_coef    = (tap*tap+1)>>1;
    493   num_coef    = num_coef + 1; // DC offset
    494  
    495   // set min cost
    496   UInt64 uiMinRate = uiLumaRate;
    497   UInt64 uiMinDist = MAX_INT;
    498   Double dMinCost  = MAX_DOUBLE;
    499  
    500   // calc original cost
    501   copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    502   xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiMinRate, uiMinDist, dMinCost);
    503  
    504   // initialize temp_alfps
    505   m_pcTempAlfParam->chroma_idc = 3;
    506   m_pcTempAlfParam->tap_chroma       = tap;
    507   m_pcTempAlfParam->num_coeff_chroma = num_coef;
    508  
    509   // Adaptive in-loop wiener filtering for chroma
    510   xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);
    511  
    512   // filter on/off decision for chroma
    513   Int iCWidth = (pcPicOrg->getWidth()>>1);
    514   Int iCHeight = (pcPicOrg->getHeight()>>1);
    515   Int iCStride = pcPicOrg->getCStride();
    516   UInt64 uiFiltDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicRest->getCbAddr(), iCWidth, iCHeight, iCStride);
    517   UInt64 uiFiltDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicRest->getCrAddr(), iCWidth, iCHeight, iCStride);
    518   UInt64 uiOrgDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicDec->getCbAddr(), iCWidth, iCHeight, iCStride);
    519   UInt64 uiOrgDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicDec->getCrAddr(), iCWidth, iCHeight, iCStride);
    520  
    521   m_pcTempAlfParam->chroma_idc = 0;
    522   if(uiOrgDistCb > uiFiltDistCb)
    523     m_pcTempAlfParam->chroma_idc += 2;
    524   if(uiOrgDistCr  > uiFiltDistCr )
    525     m_pcTempAlfParam->chroma_idc += 1;
    526  
    527   if(m_pcTempAlfParam->chroma_idc)
    528   {
    529     if(m_pcTempAlfParam->chroma_idc!=3)
    530     {
    531       // chroma filter re-design
    532       xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);
    533     }
    534    
    535     UInt64 uiRate, uiDist;
    536     Double dCost;
    537     xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost);
    538    
    539     if( dCost < dMinCost )
    540     {
    541       copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    542       predictALFCoeffChroma(m_pcBestAlfParam);
    543      
    544       ruiBits += uiRate;
    545       ruiDist += uiDist;
    546     }
    547     else
    548     {
    549       m_pcBestAlfParam->chroma_idc = 0;
    550      
    551       if((m_pcTempAlfParam->chroma_idc>>1)&0x01)
    552         pcPicDec->copyToPicCb(pcPicRest);
    553       if(m_pcTempAlfParam->chroma_idc&0x01)
    554         pcPicDec->copyToPicCr(pcPicRest);
    555      
    556       ruiBits += uiMinRate;
    557       ruiDist += uiMinDist;
    558     }
    559   }
    560   else
    561   {
    562     m_pcBestAlfParam->chroma_idc = 0;
    563    
    564     ruiBits += uiMinRate;
    565     ruiDist += uiMinDist;
    566    
    567     pcPicDec->copyToPicCb(pcPicRest);
    568     pcPicDec->copyToPicCr(pcPicRest);
    569   }
    570 }
    571 
    5723283// ====================================================================================================================
    5733284// Private member functions
    5743285// ====================================================================================================================
    575 
     3286#if !LCU_SYNTAX_ALF
    5763287Void TEncAdaptiveLoopFilter::xInitParam()
    5773288{
     
    6163327    }
    6173328  }
     3329  if (m_ppdAlfCorrCb != NULL)
     3330  {
     3331    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3332    {
     3333      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3334      {
     3335        m_ppdAlfCorrCb[i][j] = 0;
     3336      }
     3337    }
     3338  }
     3339  else
     3340  {
     3341    m_ppdAlfCorrCb = new Double*[ALF_MAX_NUM_COEF];
     3342    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3343    {
     3344      m_ppdAlfCorrCb[i] = new Double[ALF_MAX_NUM_COEF+1];
     3345      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3346      {
     3347        m_ppdAlfCorrCb[i][j] = 0;
     3348      }
     3349    }
     3350  }
     3351 
     3352  if (m_ppdAlfCorrCr != NULL)
     3353  {
     3354    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3355    {
     3356      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3357      {
     3358        m_ppdAlfCorrCr[i][j] = 0;
     3359      }
     3360    }
     3361  }
     3362  else
     3363  {
     3364    m_ppdAlfCorrCr = new Double*[ALF_MAX_NUM_COEF];
     3365    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3366    {
     3367      m_ppdAlfCorrCr[i] = new Double[ALF_MAX_NUM_COEF+1];
     3368      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3369      {
     3370        m_ppdAlfCorrCr[i][j] = 0;
     3371      }
     3372    }
     3373  }
    6183374}
    6193375
     
    6383394    m_pdDoubleAlfCoeff = NULL;
    6393395  }
    640 }
    641 
     3396  if (m_ppdAlfCorrCb != NULL)
     3397  {
     3398    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3399    {
     3400      delete[] m_ppdAlfCorrCb[i];
     3401      m_ppdAlfCorrCb[i] = NULL;
     3402    }
     3403    delete[] m_ppdAlfCorrCb;
     3404    m_ppdAlfCorrCb = NULL;
     3405  }
     3406 
     3407  if (m_ppdAlfCorrCr != NULL)
     3408  {
     3409    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3410    {
     3411      delete[] m_ppdAlfCorrCr[i];
     3412      m_ppdAlfCorrCr[i] = NULL;
     3413    }
     3414    delete[] m_ppdAlfCorrCr;
     3415    m_ppdAlfCorrCr = NULL;
     3416  }
     3417}
     3418#endif
    6423419Void TEncAdaptiveLoopFilter::xCreateTmpAlfCtrlFlags()
    6433420{
     
    6763453}
    6773454
    678 Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags()
    679 {
    680   for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
    681   {
    682     TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
    683     xEncodeCUAlfCtrlFlag(pcCU, 0, 0);
    684   }
    685 }
    686 
     3455/** Encode ALF CU control flags
     3456 */
     3457Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags(std::vector<AlfCUCtrlInfo> &vAlfCUCtrlParam)
     3458{
     3459  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3460  {
     3461    if(!m_pcPic->getValidSlice(s))
     3462    {
     3463      continue;
     3464    }
     3465
     3466    AlfCUCtrlInfo& rCUCtrlInfo = vAlfCUCtrlParam[s];
     3467    if(rCUCtrlInfo.cu_control_flag == 1)
     3468    {
     3469      for(Int i=0; i< (Int)rCUCtrlInfo.alf_cu_flag.size(); i++)
     3470      {
     3471        m_pcEntropyCoder->encodeAlfCtrlFlag(rCUCtrlInfo.alf_cu_flag[i]);
     3472      }
     3473    }
     3474  }
     3475}
    6873476Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth)
    6883477{
     
    6943483 
    6953484#if AD_HOCS_SLICES 
    696   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     3485  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    6973486#else 
    698   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     3487  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    6993488#endif 
    7003489  {
     
    7113500     
    7123501#if AD_HOCS_SLICES     
    713       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
    714 #else
    715       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
     3502      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
     3503#else
     3504      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    7163505#endif     
    7173506        xEncodeCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1);
     
    7223511  m_pcEntropyCoder->encodeAlfCtrlFlag(pcCU, uiAbsPartIdx);
    7233512}
    724 #if MTK_NONCROSS_INLOOP_FILTER
    725 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix)
    726 #else
    727 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride)
    728 #endif
    729 {
    730   //Patch should be extended before this point................
    731   //ext_offset  = tap>>1;
    732  
    733 #if TI_ALF_MAX_VSIZE_7
    734   Int iTapV   = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
    735   Int N       = (iTap * iTapV + 1) >> 1;
    736   Int offsetV = iTapV >> 1;
    737 #else
    738   Int N      = (iTap*iTap+1)>>1;
    739 #endif
    740   Int offset = iTap>>1;
    741  
    742   const Int* pFiltPos;
    743  
    744   switch(iTap)
    745   {
    746     case 5:
    747       pFiltPos = m_aiSymmetricArray5x5;
    748       break;
    749     case 7:
    750       pFiltPos = m_aiSymmetricArray7x7;
    751       break;
    752     case 9:
    753 #if TI_ALF_MAX_VSIZE_7
    754       pFiltPos = m_aiSymmetricArray9x7;
    755 #else
    756       pFiltPos = m_aiSymmetricArray9x9;
    757 #endif
    758       break;
    759     default:
    760 #if TI_ALF_MAX_VSIZE_7
    761       pFiltPos = m_aiSymmetricArray9x7;
    762 #else
    763       pFiltPos = m_aiSymmetricArray9x9;
    764 #endif
     3513
     3514#if !LCU_SYNTAX_ALF
     3515
     3516Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pImgOrg, Pel* pImgPad, Int filtNo, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix)
     3517{
     3518  Int     yposEnd = ypos + iHeight -1;
     3519  Int     xposEnd = xpos + iWidth  -1;
     3520  Int     N       = m_sqrFiltLengthTab[filtNo];
     3521
     3522  Int imgHeightChroma = m_img_height>>1;
     3523  Int yLineInLCU;
     3524  Int paddingline ;
     3525
     3526  Int ELocal[ALF_MAX_NUM_COEF];
     3527  Pel *pImgPad1, *pImgPad2, *pImgPad3, *pImgPad4;
     3528  Int i, j, k, l;
     3529  Int yLocal;
     3530
     3531  pImgPad += (ypos*iCmpStride);
     3532  pImgOrg += (ypos*iOrgStride);
     3533
     3534  switch(filtNo)
     3535  {
     3536#if !ALF_SINGLE_FILTER_SHAPE
     3537  case ALF_STAR5x5:
     3538    {
     3539      for (i= ypos; i<= yposEnd; i++)
     3540      {
     3541        yLineInLCU = i % m_lcuHeightChroma;
     3542
     3543        if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma )
     3544        {
     3545          pImgPad1 = pImgPad +   iCmpStride;
     3546          pImgPad2 = pImgPad -   iCmpStride;
     3547          pImgPad3 = pImgPad + 2*iCmpStride;
     3548          pImgPad4 = pImgPad - 2*iCmpStride;
     3549        }
     3550        else if (yLineInLCU < m_lineIdxPadTopChroma)
     3551        {
     3552          paddingline = - yLineInLCU + m_lineIdxPadTopChroma - 1;
     3553          pImgPad1 = pImgPad + min(paddingline, 1)*iCmpStride;
     3554          pImgPad2 = pImgPad -   iCmpStride;
     3555          pImgPad3 = pImgPad + min(paddingline, 2)*iCmpStride;
     3556          pImgPad4 = pImgPad - 2*iCmpStride;
     3557        }
     3558        else
     3559        {
     3560          paddingline = yLineInLCU - m_lineIdxPadTopChroma ;
     3561          pImgPad1 = pImgPad +   iCmpStride;
     3562          pImgPad2 = pImgPad - min(paddingline, 1)*iCmpStride;
     3563          pImgPad3 = pImgPad + 2*iCmpStride;
     3564          pImgPad4 = pImgPad - min(paddingline, 2)*iCmpStride;
     3565        }
     3566
     3567        if ( (yLineInLCU == m_lineIdxPadTopChroma || yLineInLCU == m_lineIdxPadTopChroma-1) && i-yLineInLCU+m_lcuHeightChroma < imgHeightChroma )
     3568        {
     3569          pImgPad+= iCmpStride;
     3570          pImgOrg+= iOrgStride;
     3571          continue;
     3572        }
     3573        else
     3574        {
     3575        for (j= xpos; j<= xposEnd; j++)
     3576        {
     3577          memset(ELocal, 0, N*sizeof(Int));
     3578
     3579          ELocal[0] = (pImgPad3[j+2] + pImgPad4[j-2]);
     3580          ELocal[1] = (pImgPad3[j  ] + pImgPad4[j  ]);
     3581          ELocal[2] = (pImgPad3[j-2] + pImgPad4[j+2]);
     3582
     3583          ELocal[3] = (pImgPad1[j+1] + pImgPad2[j-1]);
     3584          ELocal[4] = (pImgPad1[j  ] + pImgPad2[j  ]);
     3585          ELocal[5] = (pImgPad1[j-1] + pImgPad2[j+1]);
     3586
     3587          ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     3588          ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     3589          ELocal[8] = (pImgPad[j  ]);
     3590
     3591          yLocal= (Int)pImgOrg[j];
     3592
     3593          for(k=0; k<N; k++)
     3594          {
     3595            m_ppdAlfCorr[k][k] += ELocal[k]*ELocal[k];
     3596            for(l=k+1; l<N; l++)
     3597            {
     3598              m_ppdAlfCorr[k][l] += ELocal[k]*ELocal[l];
     3599            }
     3600
     3601            m_ppdAlfCorr[k][N] += yLocal*ELocal[k];
     3602          }
     3603        }
     3604        pImgPad+= iCmpStride;
     3605        pImgOrg+= iOrgStride;
     3606      }
     3607
     3608      }
     3609    }
     3610    break;
     3611  case ALF_CROSS9x9:
     3612    {
     3613      Pel *pImgPad5, *pImgPad6, *pImgPad7, *pImgPad8;
     3614#else
     3615  case ALF_CROSS9x7_SQUARE3x3:
     3616    {
     3617      Pel *pImgPad5, *pImgPad6;
     3618#endif
     3619      for (i= ypos; i<= yposEnd; i++)
     3620      {
     3621        yLineInLCU = i % m_lcuHeightChroma;
     3622
     3623        if (yLineInLCU<2 && i> 2)
     3624        {
     3625          paddingline = yLineInLCU + 2 ;
     3626          pImgPad1 = pImgPad +   iCmpStride;
     3627          pImgPad2 = pImgPad -   iCmpStride;
     3628          pImgPad3 = pImgPad + 2*iCmpStride;
     3629          pImgPad4 = pImgPad - 2*iCmpStride;
     3630          pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + 3*iCmpStride;
     3631          pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - min(paddingline, 3)*iCmpStride;;
     3632#if !ALF_SINGLE_FILTER_SHAPE
     3633          pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + 4*iCmpStride;
     3634          pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - min(paddingline, 4)*iCmpStride;;     
     3635#endif
     3636        }
     3637        else if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma )
     3638        {
     3639          pImgPad1 = pImgPad +   iCmpStride;
     3640          pImgPad2 = pImgPad -   iCmpStride;
     3641          pImgPad3 = pImgPad + 2*iCmpStride;
     3642          pImgPad4 = pImgPad - 2*iCmpStride;
     3643          pImgPad5 = pImgPad + 3*iCmpStride;
     3644          pImgPad6 = pImgPad - 3*iCmpStride;
     3645#if !ALF_SINGLE_FILTER_SHAPE
     3646          pImgPad7 = pImgPad + 4*iCmpStride;
     3647          pImgPad8 = pImgPad - 4*iCmpStride;
     3648#endif
     3649        }
     3650        else if (yLineInLCU < m_lineIdxPadTopChroma)
     3651        {
     3652          paddingline = - yLineInLCU + m_lineIdxPadTopChroma - 1;
     3653          pImgPad1 = (paddingline < 1) ? pImgPad : pImgPad + min(paddingline, 1)*iCmpStride;
     3654          pImgPad2 = (paddingline < 1) ? pImgPad : pImgPad -   iCmpStride;
     3655          pImgPad3 = (paddingline < 2) ? pImgPad : pImgPad + min(paddingline, 2)*iCmpStride;
     3656          pImgPad4 = (paddingline < 2) ? pImgPad : pImgPad - 2*iCmpStride;
     3657          pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + min(paddingline, 3)*iCmpStride;
     3658          pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - 3*iCmpStride;
     3659#if !ALF_SINGLE_FILTER_SHAPE
     3660          pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + min(paddingline, 4)*iCmpStride;
     3661          pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - 4*iCmpStride;
     3662#endif
     3663        }
     3664        else
     3665        {
     3666          paddingline = yLineInLCU - m_lineIdxPadTopChroma ;
     3667          pImgPad1 = (paddingline < 1) ? pImgPad : pImgPad +   iCmpStride;
     3668          pImgPad2 = (paddingline < 1) ? pImgPad : pImgPad - min(paddingline, 1)*iCmpStride;
     3669          pImgPad3 = (paddingline < 2) ? pImgPad : pImgPad + 2*iCmpStride;
     3670          pImgPad4 = (paddingline < 2) ? pImgPad : pImgPad - min(paddingline, 2)*iCmpStride;
     3671          pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + 3*iCmpStride;
     3672          pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - min(paddingline, 3)*iCmpStride;
     3673#if !ALF_SINGLE_FILTER_SHAPE
     3674          pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + 4*iCmpStride;
     3675          pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - min(paddingline, 4)*iCmpStride;
     3676#endif
     3677        }
     3678
     3679        for (j= xpos; j<= xposEnd; j++)
     3680        {
     3681          memset(ELocal, 0, N*sizeof(Int));
     3682#if ALF_SINGLE_FILTER_SHAPE
     3683          ELocal[0] = (pImgPad5[j]+pImgPad6[j]);
     3684          ELocal[1] = (pImgPad3[j]+pImgPad4[j]);
     3685          ELocal[2] = (pImgPad1[j-1]+pImgPad2[j+1]);
     3686          ELocal[3] = (pImgPad1[j]+pImgPad2[j]);
     3687          ELocal[4] = (pImgPad1[j+1]+pImgPad2[j-1]);
     3688          ELocal[5] = (pImgPad[j+4]+pImgPad[j-4]);
     3689          ELocal[6] = (pImgPad[j+3]+pImgPad[j-3]);
     3690          ELocal[7] = (pImgPad[j+2]+pImgPad[j-2]);
     3691          ELocal[8] = (pImgPad[j+1]+pImgPad[j-1]);
     3692          ELocal[9] = (pImgPad[j  ]);
     3693#else
     3694          ELocal[0] = (pImgPad7[j] + pImgPad8[j]);
     3695
     3696          ELocal[1] = (pImgPad5[j] + pImgPad6[j]);
     3697
     3698          ELocal[2] = (pImgPad3[j] + pImgPad4[j]);
     3699
     3700          ELocal[3] = (pImgPad1[j] + pImgPad2[j]);
     3701
     3702          ELocal[4] = (pImgPad[j+4] + pImgPad[j-4]);
     3703          ELocal[5] = (pImgPad[j+3] + pImgPad[j-3]);
     3704          ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     3705          ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     3706          ELocal[8] = (pImgPad[j  ] );
     3707#endif
     3708          yLocal= (Int)pImgOrg[j];
     3709
     3710          for(k=0; k<N; k++)
     3711          {
     3712            m_ppdAlfCorr[k][k] += ELocal[k]*ELocal[k];
     3713            for(l=k+1; l<N; l++)
     3714            {
     3715              m_ppdAlfCorr[k][l] += ELocal[k]*ELocal[l];
     3716            }
     3717
     3718            m_ppdAlfCorr[k][N] += yLocal*ELocal[k];
     3719          }
     3720        }
     3721        pImgPad+= iCmpStride;
     3722        pImgOrg+= iOrgStride;
     3723      }
     3724
     3725    }
     3726    break;
     3727  default:
     3728    {
     3729      printf("Not a supported filter shape\n");
    7653730      assert(0);
    766       break;
    767   }
    768  
    769   Pel* pTerm = new Pel[N];
    770  
    771   Int i, j;
    772 #if MTK_NONCROSS_INLOOP_FILTER
    773   for (Int y = ypos; y < ypos + iHeight; y++)
    774   {
    775     for (Int x = xpos; x < xpos + iWidth; x++)
    776     {
    777 #else   
    778   for (Int y = 0; y < iHeight; y++)
    779   {
    780     for (Int x = 0; x < iWidth; x++)
    781     {
    782 #endif
    783       i = 0;
    784       ::memset(pTerm, 0, sizeof(Pel)*N);
    785 #if TI_ALF_MAX_VSIZE_7
    786       for (Int yy = y - offsetV; yy <= y + offsetV; yy++)
    787 #else
    788       for(Int yy=y-offset; yy<=y+offset; yy++)
    789 #endif
    790       {
    791         for(Int xx=x-offset; xx<=x+offset; xx++)
    792         {
    793           pTerm[pFiltPos[i]] += pCmp[xx + yy*iCmpStride];
    794           i++;
    795         }
    796       }
    797      
    798       for(j=0; j<N; j++)
    799       {
    800         m_ppdAlfCorr[j][j] += pTerm[j]*pTerm[j];
    801         for(i=j+1; i<N; i++)
    802           m_ppdAlfCorr[j][i] += pTerm[j]*pTerm[i];
    803        
    804         // DC offset
    805         m_ppdAlfCorr[j][N]   += pTerm[j];
    806         m_ppdAlfCorr[j][N+1] += pOrg[x+y*iOrgStride]*pTerm[j];
    807       }
    808       // DC offset
    809       for(i=0; i<N; i++)
    810         m_ppdAlfCorr[N][i] += pTerm[i];
    811       m_ppdAlfCorr[N][N]   += 1;
    812       m_ppdAlfCorr[N][N+1] += pOrg[x+y*iOrgStride];
    813     }
    814   }
    815 #if MTK_NONCROSS_INLOOP_FILTER
     3731      exit(1);
     3732    }
     3733  }
     3734
    8163735  if(bSymmCopyBlockMatrix)
    8173736  {
    818 #endif
    819   for(j=0; j<N-1; j++)
    820   {
    821     for(i=j+1; i<N; i++)
    822       m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i];
    823   }
    824 #if MTK_NONCROSS_INLOOP_FILTER
    825   }
    826 #endif
    827 
    828   delete[] pTerm;
    829   pTerm = NULL;
    830 }
    831 
     3737    for(j=0; j<N-1; j++)
     3738    {
     3739      for(i=j+1; i<N; i++)
     3740      {
     3741        m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i];
     3742      }
     3743    }
     3744  }
     3745}
     3746
     3747#endif
    8323748#if IBDI_DISTORTION
    833 UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride )
     3749#if HHI_INTERVIEW_SKIP
     3750UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Pel* pUsed, Int iWidth, Int iHeight, Int iStride )
    8343751{
    8353752  UInt64 uiSSD = 0;
     
    8443761    for( x = 0; x < iWidth; x++ )
    8453762    {
     3763      if ( pUsed ) // interview skipped
     3764      {
     3765        if( pUsed[x] )
     3766        {
    8463767      iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
     3768        }
     3769      }
     3770      else         // no interview skip
     3771      {
     3772          iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
     3773      }
    8473774    }
    8483775    pOrg += iStride;
    8493776    pCmp += iStride;
     3777    if(pUsed)
     3778    {
     3779      pUsed+= iStride;
     3780    }
    8503781  }
    8513782
     
    8583789  Int x, y;
    8593790 
     3791  Int iShift = g_uiBitIncrement;
     3792  Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;
     3793  Int iTemp;
     3794
     3795  for( y = 0; y < iHeight; y++ )
     3796  {
     3797    for( x = 0; x < iWidth; x++ )
     3798    {
     3799      iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
     3800    }
     3801    pOrg += iStride;
     3802    pCmp += iStride;
     3803  }
     3804
     3805  return uiSSD;;
     3806}
     3807#endif
     3808#else
     3809#if HHI_INTERVIEW_SKIP
     3810UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Pel* pUsed, Int iWidth, Int iHeight, Int iStride )
     3811{
     3812  UInt64 uiSSD = 0;
     3813  Int x, y;
     3814
     3815  UInt uiShift = g_uiBitIncrement<<1;
     3816  Int iTemp =0 ;
     3817
     3818  for( y = 0; y < iHeight; y++ )
     3819  {
     3820    for( x = 0; x < iWidth; x++ )
     3821    {
     3822      if ( pUsed ) // interview skipped
     3823      {
     3824        if( pUsed[x] )
     3825        {
     3826          iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift;
     3827        }
     3828      }
     3829      else         // no interview skip
     3830      {
     3831          iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift;
     3832      }
     3833    }
     3834    pOrg += iStride;
     3835    pCmp += iStride;
     3836    if(pUsed)
     3837    {
     3838      pUsed+= iStride;
     3839    }
     3840  }
     3841
     3842  return uiSSD;;
     3843}
     3844#else
     3845UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride )
     3846{
     3847  UInt64 uiSSD = 0;
     3848  Int x, y;
    8603849  UInt uiShift = g_uiBitIncrement<<1;
    8613850  Int iTemp;
     
    8743863}
    8753864#endif
     3865#endif
    8763866
    8773867Int TEncAdaptiveLoopFilter::xGauss(Double **a, Int N)
     
    8833873  {
    8843874    if (a[k][k] <0.000001)
     3875    {
    8853876      return 1;
     3877    }
    8863878  }
    8873879 
     
    9023894    t = a[i][N];
    9033895    for(j=i+1; j<N; j++)
     3896    {
    9043897      t -= a[i][j] * a[j][N];
     3898    }
    9053899    a[i][N] = t / a[i][i];
    9063900  }
     
    9323926    }
    9333927  } while( i <= j );
    934   if ( upper < j ) xFilterCoefQuickSort(coef_data, coef_num, upper, j);
    935   if ( i < lower ) xFilterCoefQuickSort(coef_data, coef_num, i, lower);
     3928  if ( upper < j )
     3929  {
     3930    xFilterCoefQuickSort(coef_data, coef_num, upper, j);
     3931  }
     3932  if ( i < lower )
     3933  {
     3934    xFilterCoefQuickSort(coef_data, coef_num, i, lower);
     3935  }
    9363936}
    9373937
     
    9463946  Int    *nc;
    9473947  const Int    *pFiltMag;
    948  
    949   switch(tap)
    950   {
    951     case 5:
    952       pFiltMag = m_aiSymmetricMag5x5;
    953       break;
    954     case 7:
    955       pFiltMag = m_aiSymmetricMag7x7;
    956       break;
    957     case 9:
    958 #if TI_ALF_MAX_VSIZE_7
    959       pFiltMag = m_aiSymmetricMag9x7;
    960 #else
    961       pFiltMag = m_aiSymmetricMag9x9;
    962 #endif
    963       break;
    964     default:
    965 #if TI_ALF_MAX_VSIZE_7
    966       pFiltMag = m_aiSymmetricMag9x7;
    967 #else
    968       pFiltMag = m_aiSymmetricMag9x9;
    969 #endif
    970       assert(0);
    971       break;
    972   }
    973  
    974 #if TI_ALF_MAX_VSIZE_7
    975   Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap);
    976   N = (tap * tapV + 1) >> 1;
    977 #else
    978   N = (tap*tap+1)>>1;
    979 #endif
    980  
     3948#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     3949  Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP );
     3950#endif
     3951
     3952  N = m_sqrFiltLengthTab[tap];
     3953#if ALF_SINGLE_FILTER_SHAPE
     3954  pFiltMag = weightsShape1Sym;
     3955#else
     3956  // star shape
     3957  if(tap == 0)
     3958  {
     3959    pFiltMag = weightsShape0Sym;
     3960  }
     3961  // cross shape
     3962  else
     3963  {
     3964    pFiltMag = weightsShape1Sym;
     3965  }
     3966#endif
     3967
    9813968  dh = new Double[N];
    9823969  nc = new Int[N];
    9833970 
     3971#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 
     3972  max_value =   (1<<(1+alfPrecisionBit))-1;
     3973  min_value = 0-(1<<(1+alfPrecisionBit));
     3974#else
    9843975  max_value =   (1<<(1+ALF_NUM_BIT_SHIFT))-1;
    9853976  min_value = 0-(1<<(1+ALF_NUM_BIT_SHIFT));
    986  
     3977#endif
     3978
    9873979  dbl_total_gain=0.0;
    9883980  q_total_gain=0;
     
    9903982  {
    9913983    if(h[i]>=0.0)
     3984    {
     3985#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     3986      qh[i] =  (Int)( h[i]*(1<<alfPrecisionBit)+0.5);
     3987#else
    9923988      qh[i] =  (Int)( h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5);
     3989#endif
     3990    }
    9933991    else
     3992    {
     3993#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     3994      qh[i] = -(Int)(-h[i]*(1<<alfPrecisionBit)+0.5);
     3995#else
    9943996      qh[i] = -(Int)(-h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5);
    995    
     3997#endif
     3998    }
     3999
     4000#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4001    dh[i] = (Double)qh[i]/(Double)(1<<alfPrecisionBit) - h[i];
     4002#else
    9964003    dh[i] = (Double)qh[i]/(Double)(1<<ALF_NUM_BIT_SHIFT) - h[i];
     4004#endif
    9974005    dh[i]*=pFiltMag[i];
    9984006    dbl_total_gain += h[i]*pFiltMag[i];
     
    10024010 
    10034011  // modification of quantized filter coefficients
     4012#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4013  total_gain = (Int)(dbl_total_gain*(1<<alfPrecisionBit)+0.5);
     4014#else
    10044015  total_gain = (Int)(dbl_total_gain*(1<<ALF_NUM_BIT_SHIFT)+0.5);
    1005  
     4016#endif 
    10064017  if( q_total_gain != total_gain )
    10074018  {
     
    10204031      {
    10214032        if(dh[N-1]>0)
     4033        {
    10224034          qh[N-1]--;
     4035        }
    10234036        else
    10244037        {
     
    10424055      {
    10434056        if(dh[N-1]<0)
     4057        {
    10444058          qh[N-1]++;
     4059        }
    10454060        else
    10464061        {
     
    10564071  for(i=0; i<N; i++)
    10574072  {
    1058     qh[i] = Max(min_value,Min(max_value, qh[i]));
    1059   }
    1060  
    1061   // DC offset
    1062   //  max_value = Min(  (1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma)))-1, (1<<14)-1);
    1063   //  min_value = Max( -(1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma))),  -(1<<14)  );
    1064   max_value = Min(  (1<<(3+g_uiBitDepth + g_uiBitIncrement))-1, (1<<14)-1);
    1065   min_value = Max( -(1<<(3+g_uiBitDepth + g_uiBitIncrement)),  -(1<<14)  );
    1066  
    1067   qh[N] =  (h[N]>=0.0)? (Int)( h[N]*(1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5) : -(Int)(-h[N]*(1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5);
    1068   qh[N] = Max(min_value,Min(max_value, qh[N]));
    1069  
     4073    qh[i] = max(min_value,min(max_value, qh[i]));
     4074  }
     4075
     4076  checkFilterCoeffValue(qh, N, true);
     4077
    10704078  delete[] dh;
    10714079  dh = NULL;
     
    10744082  nc = NULL;
    10754083}
    1076 
     4084#if !LCU_SYNTAX_ALF
    10774085Void TEncAdaptiveLoopFilter::xClearFilterCoefInt(Int* qh, Int N)
    10784086{
     
    10814089 
    10824090  // center pos
    1083   qh[N-2]  = 1<<ALF_NUM_BIT_SHIFT;
    1084 }
    1085 
    1086 Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost)
     4091  qh[N-1]  = 1<<ALF_NUM_BIT_SHIFT;
     4092}
     4093/** Calculate RD cost
     4094 * \param [in] pAlfParam ALF parameters
     4095 * \param [out] ruiRate coding bits
     4096 * \param [in] uiDist distortion
     4097 * \param [out] rdCost rate-distortion cost
     4098 * \param [in] pvAlfCUCtrlParam ALF CU control parameters
     4099 */
     4100Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam)
    10874101{
    10884102  if(pAlfParam != NULL)
    10894103  {
    1090     Int* piTmpCoef;
    1091     piTmpCoef = new Int[ALF_MAX_NUM_COEF];
    1092    
    1093     memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff);
    1094    
    1095     predictALFCoeff(pAlfParam);
    1096    
     4104    m_pcEntropyCoder->resetEntropy();
     4105    m_pcEntropyCoder->resetBits();
     4106    m_pcEntropyCoder->encodeAlfParam(pAlfParam);
     4107
     4108    ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
     4109
     4110    if(pvAlfCUCtrlParam != NULL)
     4111    {
     4112      for(UInt s=0; s< m_uiNumSlicesInPic; s++)
     4113      {
     4114        if(!m_pcPic->getValidSlice(s))
     4115        {
     4116          continue;
     4117        }
     4118        m_pcEntropyCoder->resetEntropy();
     4119        m_pcEntropyCoder->resetBits();
     4120        m_pcEntropyCoder->encodeAlfCtrlParam( (*pvAlfCUCtrlParam)[s], m_uiNumCUsInFrame);
     4121        ruiRate += m_pcEntropyCoder->getNumberOfWrittenBits();
     4122      }
     4123    }
     4124    else
     4125    {
     4126      ruiRate += m_uiNumSlicesInPic;
     4127    }
     4128  }
     4129  else
     4130  {
     4131    ruiRate = 1;
     4132  }
     4133 
     4134  rdCost      = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist);
     4135}
     4136
     4137/** Calculate RD cost
     4138 * \param [in] pcPicOrg original picture buffer
     4139 * \param [in] pcPicCmp compared picture buffer
     4140 * \param [in] pAlfParam ALF parameters
     4141 * \param [out] ruiRate coding bits
     4142 * \param [out] ruiDist distortion
     4143 * \param [out] rdCost rate-distortion cost
     4144 * \param [in] pvAlfCUCtrlParam ALF CU control parameters
     4145 */
     4146#if HHI_INTERVIEW_SKIP
     4147Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, TComPicYuv* pcUsedPelMap, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam)
     4148#else
     4149Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam)
     4150#endif
     4151{
     4152  if(pAlfParam != NULL)
     4153  {
    10974154    m_pcEntropyCoder->resetEntropy();
    10984155    m_pcEntropyCoder->resetBits();
    10994156    m_pcEntropyCoder->encodeAlfParam(pAlfParam);
    11004157   
    1101     if(pAlfParam->cu_control_flag)
    1102     {
    1103 #if TSB_ALF_HEADER
    1104       m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
    1105 #else
    1106       xEncodeCUAlfCtrlFlags();
    1107 #endif
    1108     }
    11094158    ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    1110     memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff);
    1111     delete[] piTmpCoef;
    1112     piTmpCoef = NULL;
    1113   }
    1114   else
    1115   {
    1116     ruiRate = 1;
    1117   }
    1118  
    1119   rdCost      = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist);
    1120 }
    1121 
    1122 Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
    1123 {
    1124   if(pAlfParam != NULL)
    1125   {
    1126     Int* piTmpCoef;
    1127     piTmpCoef = new Int[ALF_MAX_NUM_COEF];
    1128    
    1129     memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff);
    1130    
    1131     predictALFCoeff(pAlfParam);
    1132    
    1133     m_pcEntropyCoder->resetEntropy();
    1134     m_pcEntropyCoder->resetBits();
    1135     m_pcEntropyCoder->encodeAlfParam(pAlfParam);
    1136    
    1137     if(pAlfParam->cu_control_flag)
    1138     {
    1139 #if TSB_ALF_HEADER
    1140       m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
    1141 #else
    1142       xEncodeCUAlfCtrlFlags();
    1143 #endif
    1144     }
    1145     ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    1146     memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff);
    1147     delete[] piTmpCoef;
    1148     piTmpCoef = NULL;
     4159
     4160    if(pvAlfCUCtrlParam != NULL)
     4161    {
     4162      for(UInt s=0; s< m_uiNumSlicesInPic; s++)
     4163      {
     4164        if(! m_pcPic->getValidSlice(s))
     4165        {
     4166          continue;
     4167        }
     4168        m_pcEntropyCoder->resetEntropy();
     4169        m_pcEntropyCoder->resetBits();
     4170        m_pcEntropyCoder->encodeAlfCtrlParam( (*pvAlfCUCtrlParam)[s], m_uiNumCUsInFrame);
     4171        ruiRate += m_pcEntropyCoder->getNumberOfWrittenBits();
     4172      }
     4173
     4174    }
     4175    else
     4176    {
     4177      ruiRate += m_uiNumSlicesInPic;
     4178    }
    11494179  }
    11504180  else
     
    11564186  rdCost      = (Double)(ruiRate) * m_dLambdaLuma + (Double)(ruiDist);
    11574187}
    1158 
     4188/** Calculate RD cost for chroma ALF
     4189 * \param pcPicOrg original picture buffer
     4190 * \param pcPicCmp compared picture buffer
     4191 * \param pAlfParam ALF parameters
     4192 * \returns ruiRate bitrate
     4193 * \returns uiDist distortion
     4194 * \returns rdCost RD cost
     4195 */
    11594196Void TEncAdaptiveLoopFilter::xCalcRDCostChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
    11604197{
    11614198  if(pAlfParam->chroma_idc)
    11624199  {
    1163     Int* piTmpCoef;
    1164     piTmpCoef = new Int[ALF_MAX_NUM_COEF_C];
    1165    
    1166     memcpy(piTmpCoef, pAlfParam->coeff_chroma, sizeof(Int)*pAlfParam->num_coeff_chroma);
    1167    
    1168     predictALFCoeffChroma(pAlfParam);
    1169    
    1170     m_pcEntropyCoder->resetEntropy();
    1171     m_pcEntropyCoder->resetBits();
    1172     m_pcEntropyCoder->encodeAlfParam(pAlfParam);
    1173    
    1174     if(pAlfParam->cu_control_flag)
    1175     {
    1176 #if TSB_ALF_HEADER
    1177       m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
    1178 #else
    1179       xEncodeCUAlfCtrlFlags();
    1180 #endif
    1181     }
    1182     ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    1183     memcpy(pAlfParam->coeff_chroma, piTmpCoef, sizeof(int)*pAlfParam->num_coeff_chroma);
    1184     delete[] piTmpCoef;
    1185     piTmpCoef = NULL;
     4200    ruiRate = xCalcRateChroma(pAlfParam);
    11864201  }
    11874202  ruiDist = 0;
     
    11914206}
    11924207
    1193 Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
    1194 {
    1195   Int    i, tap, N, err_code;
    1196   Int* qh;
    1197  
    1198   tap  = m_pcTempAlfParam->tap_chroma;
    1199   N    = m_pcTempAlfParam->num_coeff_chroma;
    1200   qh   = m_pcTempAlfParam->coeff_chroma;
    1201  
    1202   // initialize correlation
    1203   for(i=0; i<N; i++)
    1204     memset(m_ppdAlfCorr[i], 0, sizeof(Double)*(N+1));
    1205  
    1206   if ((m_pcTempAlfParam->chroma_idc>>1)&0x01)
    1207   {
    1208     Pel* pOrg = pcPicOrg->getCbAddr();
    1209     Pel* pCmp = pcPicDec->getCbAddr();
    1210 #if MTK_NONCROSS_INLOOP_FILTER
     4208Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(ALFParam* pcAlfParam, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
     4209{
     4210  Int filtNo = pcAlfParam->filter_shape_chroma;
     4211  Int *coeff = pcAlfParam->coeff_chroma;
     4212  Int iChromaFormatShift = 1; //4:2:0
     4213
     4214  if ((pcAlfParam->chroma_idc>>1)&0x01)
     4215  {
    12114216    if(!m_bUseNonCrossALF)
    1212       xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
     4217    {
     4218      Int iStride   = pcPicRest->getCStride();
     4219      Pel* pDec  = pcPicDec->getCbAddr();
     4220      Pel* pRest = pcPicRest->getCbAddr();
     4221
     4222      filterChroma(pRest, pDec, iStride, 0, (Int)(m_img_height>>1) -1, 0, (Int)(m_img_width>>1)-1, filtNo,  coeff);
     4223    }
    12134224    else
    1214       xCalcCorrelationFuncforChromaSlices(ALF_Cb, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride());
    1215 #else       
    1216     xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride());
    1217 #endif
    1218   }
    1219   if ((m_pcTempAlfParam->chroma_idc)&0x01)
    1220   {
    1221     Pel* pOrg = pcPicOrg->getCrAddr();
    1222     Pel* pCmp = pcPicDec->getCrAddr();
    1223 #if MTK_NONCROSS_INLOOP_FILTER
     4225    {
     4226      xFilterChromaSlices(ALF_Cb, pcPicDec, pcPicRest, coeff, filtNo, iChromaFormatShift);
     4227    }
     4228  }
     4229  if ((pcAlfParam->chroma_idc)&0x01)
     4230  {
    12244231    if(!m_bUseNonCrossALF)
    1225       xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
     4232    {
     4233      Int iStride   = pcPicRest->getCStride();
     4234      Pel* pDec  = pcPicDec->getCrAddr();
     4235      Pel* pRest = pcPicRest->getCrAddr();
     4236
     4237      filterChroma(pRest, pDec, iStride, 0, (Int)(m_img_height>>1) -1, 0, (Int)(m_img_width>>1)-1, filtNo,  coeff);
     4238    }
    12264239    else
    1227       xCalcCorrelationFuncforChromaSlices(ALF_Cr, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride());
    1228 #else
    1229     xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride());
    1230 #endif
    1231   }
    1232  
    1233   err_code = xGauss(m_ppdAlfCorr, N);
    1234  
    1235   if(err_code)
    1236   {
    1237     xClearFilterCoefInt(qh, N);
    1238   }
    1239   else
    1240   {
    1241     for(i=0; i<N; i++)
    1242       m_pdDoubleAlfCoeff[i] = m_ppdAlfCorr[i][N];
    1243    
    1244     xQuantFilterCoef(m_pdDoubleAlfCoeff, qh, tap, g_uiBitDepth + g_uiBitIncrement);
    1245   }
    1246  
    1247  
    1248   if ((m_pcTempAlfParam->chroma_idc>>1)&0x01)
    1249   {
    1250 #if MTK_NONCROSS_INLOOP_FILTER
    1251     if(! m_bUseNonCrossALF)
    1252       xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 0);
    1253     else
    1254       xFrameChromaforSlices(ALF_Cb, pcPicDec, pcPicRest, qh, tap);
    1255 #else
    1256     xFrameChroma(pcPicDec, pcPicRest, qh, tap, 0);
    1257 #endif
    1258   }
    1259   if ((m_pcTempAlfParam->chroma_idc)&0x01)
    1260   {
    1261 #if MTK_NONCROSS_INLOOP_FILTER
    1262     if(! m_bUseNonCrossALF)
    1263       xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 1);
    1264     else
    1265       xFrameChromaforSlices(ALF_Cr, pcPicDec, pcPicRest, qh, tap);
    1266 #else
    1267     xFrameChroma(pcPicDec, pcPicRest, qh, tap, 1);
    1268 #endif
    1269   }
    1270  
    1271   if(m_pcTempAlfParam->chroma_idc<3)
    1272   {
    1273     if(m_pcTempAlfParam->chroma_idc==1)
     4240    {
     4241      xFilterChromaSlices(ALF_Cr, pcPicDec, pcPicRest, coeff, filtNo, iChromaFormatShift);
     4242    }
     4243  }
     4244
     4245  if(pcAlfParam->chroma_idc<3)
     4246  {
     4247    if(pcAlfParam->chroma_idc==1)
    12744248    {
    12754249      pcPicDec->copyToPicCb(pcPicRest);
    12764250    }
    1277     if(m_pcTempAlfParam->chroma_idc==2)
     4251    if(pcAlfParam->chroma_idc==2)
    12784252    {
    12794253      pcPicDec->copyToPicCr(pcPicRest);
    12804254    }
    12814255  }
    1282  
    1283 }
    1284 
     4256
     4257}
     4258#endif
     4259#if LCU_SYNTAX_ALF
     4260/** Restore the not-filtered pixels
     4261 * \param [in] imgDec picture buffer before filtering
     4262 * \param [out] imgRest picture buffer after filtering
     4263 * \param [in] stride stride size for 1-D picture memory
     4264 */
     4265Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(Pel* imgDec, Pel* imgRest, Int stride)
     4266#else
     4267/** Restore the not-filtered pixels
     4268 * \param pcPicDec picture buffer before filtering
     4269 * \param pcPicRest picture buffer after filtering
     4270 */
    12854271Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
    1286 {
     4272#endif
     4273{
     4274
     4275  if(m_uiNumSlicesInPic > 1)
     4276  {
     4277#if LCU_SYNTAX_ALF
     4278    Pel* pPicDecLuma  = imgDec;
     4279    Pel* pPicRestLuma = imgRest;
     4280#else
     4281    Pel* pPicDecLuma  = pcPicDec->getLumaAddr();
     4282    Pel* pPicRestLuma = pcPicRest->getLumaAddr();
     4283    Int  stride       = pcPicDec->getStride();
     4284#endif
     4285    UInt SUWidth      = m_pcPic->getMinCUWidth();
     4286    UInt SUHeight     = m_pcPic->getMinCUHeight();
     4287
     4288    UInt startSU, endSU, LCUX, LCUY, currSU, LPelX, TPelY;
     4289    UInt posOffset;
     4290    Pel *pDec, *pRest;
     4291
     4292    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     4293    {
     4294      if(!m_pcPic->getValidSlice(s))
     4295      {
     4296        continue;
     4297      }
     4298      std::vector< AlfLCUInfo* >&  vpSliceAlfLCU = m_pvpAlfLCU[s];
     4299      for(Int i=0; i< vpSliceAlfLCU.size(); i++)
     4300      {
     4301        AlfLCUInfo& rAlfLCU    = *(vpSliceAlfLCU[i]);
     4302        TComDataCU* pcCU       = rAlfLCU.pcCU;
     4303        startSU                = rAlfLCU.startSU;
     4304        endSU                  = rAlfLCU.endSU;
     4305        LCUX                 = pcCU->getCUPelX();
     4306        LCUY                 = pcCU->getCUPelY();
     4307
     4308        for(currSU= startSU; currSU<= endSU; currSU++)
     4309        {
     4310          LPelX   = LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[currSU] ];
     4311          TPelY   = LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[currSU] ];
     4312          if( !( LPelX < m_img_width )  || !( TPelY < m_img_height )  )
     4313          {
     4314            continue;
     4315          }
     4316          if(!pcCU->getAlfCtrlFlag(currSU))
     4317          {
     4318            posOffset = TPelY*stride + LPelX;
     4319            pDec = pPicDecLuma + posOffset;
     4320            pRest= pPicRestLuma+ posOffset;
     4321            for(Int y=0; y< SUHeight; y++)
     4322            {
     4323              ::memcpy(pRest, pDec, sizeof(Pel)*SUWidth);
     4324              pDec += stride;
     4325              pRest+= stride;
     4326            }
     4327          }
     4328        }
     4329      }
     4330    }
     4331    return;
     4332  }
     4333
    12874334  for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
    12884335  {
    12894336    TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
     4337#if LCU_SYNTAX_ALF
     4338    xCopyDecToRestCU(pcCU, 0, 0, imgDec, imgRest, stride);
     4339#else
    12904340    xCopyDecToRestCU(pcCU, 0, 0, pcPicDec, pcPicRest);
    1291   }
    1292 }
    1293 
     4341#endif
     4342  }
     4343}
     4344
     4345#if LCU_SYNTAX_ALF
     4346Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, Pel* imgDec, Pel* imgRest, Int stride)
     4347#else
    12944348Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
     4349#endif
    12954350{
    12964351  Bool bBoundary = false;
     
    13004355  UInt uiBPelY   = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
    13014356 
    1302   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     4357  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    13034358  {
    13044359    bBoundary = true;
     
    13134368      uiTPelY   = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
    13144369     
    1315       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )     
     4370      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )     
     4371#if LCU_SYNTAX_ALF
     4372        xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, imgDec, imgRest, stride);
     4373#else
    13164374        xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, pcPicDec, pcPicRest);
     4375#endif
    13174376    }
    13184377    return;
     
    13214380  if (!pcCU->getAlfCtrlFlag(uiAbsPartIdx))
    13224381  {
     4382#if !LCU_SYNTAX_ALF
    13234383    UInt uiCUAddr = pcCU->getAddr();
    1324    
     4384#endif   
    13254385    Int iWidth = pcCU->getWidth(uiAbsPartIdx);
    13264386    Int iHeight = pcCU->getHeight(uiAbsPartIdx);
    1327    
     4387#if LCU_SYNTAX_ALF
     4388    copyPixelsInOneRegion(imgRest, imgDec, stride, (Int)uiTPelY, iHeight, (Int)uiLPelX, iWidth);
     4389#else
    13284390    Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx);
    13294391    Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx);
     
    13414403      pFilt += iFiltStride;
    13424404    }
    1343   }
    1344 }
    1345 
    1346 Void TEncAdaptiveLoopFilter::xcollectStatCodeFilterCoeffForce0(int **pDiffQFilterCoeffIntPP, int fl, int sqrFiltLength,
    1347                                                                int filters_per_group, int bitsVarBin[])
    1348 {
    1349   int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal,
    1350   *pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
    1351   minKStart, minBitsKStart, bitsKStart;
    1352  
    1353   pDepthInt=pDepthIntTab[fl-2];
    1354  
    1355   maxScanVal=0;
    1356   for (i=0; i<sqrFiltLength; i++)
    1357   {
    1358     maxScanVal=max(maxScanVal, pDepthInt[i]);
    1359   }
    1360  
    1361   // vlc for all
    1362   memset(bitsCoeffScan, 0, MAX_SCAN_VAL * MAX_EXP_GOLOMB * sizeof(int));
    1363   for(ind=0; ind<filters_per_group; ++ind)
    1364   {
    1365     for(i = 0; i < sqrFiltLength; i++)
    1366     {     
    1367       scanPos=pDepthInt[i]-1;
    1368       coeffVal=abs(pDiffQFilterCoeffIntPP[ind][i]);
    1369       for (k=1; k<15; k++)
    1370       {
    1371         bitsCoeffScan[scanPos][k] += lengthGolomb(coeffVal, k);
    1372       }
    1373     }
    1374   }
    1375  
    1376   minBitsKStart=0;
    1377   minKStart = -1;
    1378   for (k=1; k<8; k++)
    1379   {
    1380     bitsKStart=0; kStart=k;
    1381     for (scanPos=0; scanPos<maxScanVal; scanPos++)
    1382     {
    1383       kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin];
    1384      
    1385       if (bitsCoeffScan[scanPos][kStart+1]<minBits)
    1386       {
    1387         kMin=kStart+1; minBits=bitsCoeffScan[scanPos][kMin];
    1388       }
    1389       kStart=kMin;
    1390       bitsKStart+=minBits;
    1391     }
    1392     if (bitsKStart<minBitsKStart || k==1)
    1393     {
    1394       minBitsKStart=bitsKStart;
    1395       minKStart=k;
    1396     }
    1397   }
    1398  
    1399   kStart = minKStart;
    1400   for (scanPos=0; scanPos<maxScanVal; scanPos++)
    1401   {
    1402     kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin];
    1403    
    1404     if (bitsCoeffScan[scanPos][kStart+1]<minBits)
    1405     {
    1406       kMin = kStart+1;
    1407       minBits = bitsCoeffScan[scanPos][kMin];
    1408     }
    1409    
    1410     kMinTab[scanPos] = kMin;
    1411     kStart = kMin;
    1412   }
    1413  
    1414   for(ind=0; ind<filters_per_group; ++ind)
    1415   {
    1416     bitsVarBin[ind]=0;
    1417     for(i = 0; i < sqrFiltLength; i++)
    1418     {
    1419       scanPos=pDepthInt[i]-1;
    1420       bitsVarBin[ind] += lengthGolomb(abs(pDiffQFilterCoeffIntPP[ind][i]), kMinTab[scanPos]);
    1421     }
    1422   }
    1423 }
    1424 
    1425 Void TEncAdaptiveLoopFilter::xdecideCoeffForce0(int codedVarBins[NO_VAR_BINS], double errorForce0Coeff[], double errorForce0CoeffTab[NO_VAR_BINS][2], int bitsVarBin[NO_VAR_BINS], double lambda, int filters_per_fr)
    1426 {
    1427   int filtNo;
    1428   double lagrangianDiff;
    1429   int ind;
    1430  
    1431   errorForce0Coeff[0]=errorForce0Coeff[1]=0;
    1432   for (ind=0; ind<16; ind++) codedVarBins[ind]=0;
    1433  
    1434   for(filtNo=0; filtNo<filters_per_fr; filtNo++)
    1435   {
    1436     // No coeffcient prediction bits used
    1437 #if ENABLE_FORCECOEFF0
    1438     lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]);
    1439     codedVarBins[filtNo]=(lagrangianDiff>0)? 1 : 0;
    1440     errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]];
    1441     errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1];
    1442 #else
    1443     lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]);
    1444     codedVarBins[filtNo]= 1;
    1445     errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]];
    1446     errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1];
    1447 #endif
    1448   }   
    1449 }
    1450 
    1451 double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int codedVarBins[NO_VAR_BINS], int *forceCoeff0,
    1452                                                        int **filterCoeffSymQuant, int fl, int sqrFiltLength,
    1453                                                        int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2],
    1454                                                        double *errorQuant, double lambda)
    1455 
    1456 {
    1457   int bitsVarBin[NO_VAR_BINS], createBistream, coeffBits, coeffBitsForce0;
    1458   double errorForce0Coeff[2], lagrangianForce0, lagrangian;
    1459  
    1460   xcollectStatCodeFilterCoeffForce0(filterCoeffSymQuant, fl, sqrFiltLength, 
    1461                                     filters_per_fr, bitsVarBin);
    1462  
    1463   xdecideCoeffForce0(codedVarBins, errorForce0Coeff, errorForce0CoeffTab, bitsVarBin, lambda, filters_per_fr);
    1464  
    1465   coeffBitsForce0 = xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
    1466                                                 filters_per_fr, codedVarBins, createBistream=0, m_tempALFp);
    1467  
    1468   coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength, filters_per_fr,
    1469                                     createBistream=0, m_tempALFp);
    1470  
    1471   lagrangianForce0=errorForce0Coeff[0]+lambda*coeffBitsForce0;
    1472   lagrangian=errorForce0Coeff[1]+lambda*coeffBits;
    1473   if (lagrangianForce0<lagrangian)
    1474   {
    1475     *errorQuant=errorForce0Coeff[0];
    1476     *forceCoeff0=1;
    1477     return(lagrangianForce0);
    1478   }
    1479   else
    1480   {
    1481     *errorQuant=errorForce0Coeff[1];
    1482     *forceCoeff0=0;
    1483     return(lagrangian);
     4405#endif
     4406  }
     4407}
     4408
     4409double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int **filterCoeffSymQuant, int filter_shape, int sqrFiltLength, int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2],
     4410  double lambda)
     4411{
     4412  Int coeffBits, i;
     4413  Double error=0, lagrangian;
     4414  coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength, filters_per_fr,
     4415    0, m_tempALFp);
     4416  for(i=0;i<filters_per_fr;i++)
     4417  {
     4418    error += errorForce0CoeffTab[i][1];
     4419  }
     4420  lagrangian = error + lambda * coeffBits;
     4421  return (lagrangian);
     4422}
     4423
     4424/** Predict ALF luma filter coefficients. Centre coefficient is always predicted. Determines if left neighbour should be predicted.
     4425 */
     4426Void TEncAdaptiveLoopFilter::predictALFCoeffLumaEnc(ALFParam* pcAlfParam, Int **pfilterCoeffSym, Int filter_shape)
     4427{
     4428#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4429  Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP );
     4430#endif
     4431  Int sum, coeffPred, ind;
     4432  const Int* pFiltMag = NULL;
     4433  pFiltMag = weightsTabShapes[filter_shape];
     4434  for(ind = 0; ind < pcAlfParam->filters_per_group; ++ind)
     4435  {
     4436    sum = 0;
     4437    for(Int i = 0; i < pcAlfParam->num_coeff-2; i++)
     4438    {
     4439      sum +=  pFiltMag[i]*pfilterCoeffSym[ind][i];
     4440    }
     4441
     4442    if((pcAlfParam->predMethod==0)|(ind==0))
     4443    {
     4444#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4445      coeffPred = ((1<<alfPrecisionBit)-sum) >> 2;
     4446#else
     4447      coeffPred = ((1<<ALF_NUM_BIT_SHIFT)-sum) >> 2;
     4448#endif
     4449    }
     4450    else
     4451    {
     4452      coeffPred = (0-sum) >> 2;
     4453    }
     4454    if(abs(pfilterCoeffSym[ind][pcAlfParam->num_coeff-2]-coeffPred) < abs(pfilterCoeffSym[ind][pcAlfParam->num_coeff-2]))
     4455    {
     4456      pcAlfParam->nbSPred[ind] = 0;
     4457    }
     4458    else
     4459    {
     4460      pcAlfParam->nbSPred[ind] = 1;
     4461      coeffPred = 0;
     4462    }
     4463    sum += pFiltMag[pcAlfParam->num_coeff-2]*pfilterCoeffSym[ind][pcAlfParam->num_coeff-2];
     4464    pfilterCoeffSym[ind][pcAlfParam->num_coeff-2] -= coeffPred;
     4465    if((pcAlfParam->predMethod==0)|(ind==0))
     4466    {
     4467#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4468      coeffPred = (1<<alfPrecisionBit)-sum;
     4469#else
     4470      coeffPred = (1<<ALF_NUM_BIT_SHIFT)-sum;
     4471#endif
     4472    }
     4473    else
     4474    {
     4475      coeffPred = -sum;
     4476    }
     4477    pfilterCoeffSym[ind][pcAlfParam->num_coeff-1] -= coeffPred;
    14844478  }
    14854479}
     
    14934487  Int64 Newbit_ct;
    14944488 
    1495   bit_ct0 = xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0);
    1496  
     4489  for(ind = 0; ind < filters_per_group; ind++)
     4490  {
     4491    for(i = 0; i < sqrFiltLength; i++)
     4492    {
     4493      m_FilterCoeffQuantTemp[ind][i]=FilterCoeffQuant[ind][i];
     4494    }
     4495  }
     4496  ALFp->filters_per_group = filters_per_group;
     4497  ALFp->predMethod = 0;
     4498  ALFp->num_coeff = sqrFiltLength;
     4499  predictALFCoeffLumaEnc(ALFp, m_FilterCoeffQuantTemp, fl);
     4500  Int nbFlagIntra[16];
     4501  for(ind = 0; ind < filters_per_group; ind++)
     4502  {
     4503    nbFlagIntra[ind] = ALFp->nbSPred[ind];
     4504  }
     4505  bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group, 0);
    14974506  for(ind = 0; ind < filters_per_group; ++ind)
    14984507  {
     
    15084517    }
    15094518  }
     4519  ALFp->predMethod = 1;
     4520  predictALFCoeffLumaEnc(ALFp, m_diffFilterCoeffQuant, fl);
    15104521 
    15114522  if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0) >= bit_ct0)
     
    15134524    predMethod = 0; 
    15144525    if(filters_per_group > 1)
     4526    {
    15154527      bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream);
    1516     bit_ct += xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream);
     4528    }
     4529    bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group, createBistream);
    15174530  }
    15184531  else
     
    15204533    predMethod = 1;
    15214534    if(filters_per_group > 1)
     4535    {
    15224536      bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream);
     4537    }
    15234538    bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream);
    15244539  }
    1525  
    1526   ALFp->forceCoeff0 = 0;
    1527   ALFp->filters_per_group_diff = filters_per_group;
    15284540  ALFp->filters_per_group = filters_per_group;
    15294541  ALFp->predMethod = predMethod;
    15304542  ALFp->num_coeff = sqrFiltLength;
    1531   if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM)
    1532     ALFp->realfiltNo=2;
    1533   else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM)
    1534     ALFp->realfiltNo=1;
    1535   else
    1536     ALFp->realfiltNo=0;
    1537  
     4543  ALFp->filter_shape = fl;
    15384544  for(ind = 0; ind < filters_per_group; ++ind)
    15394545  {
     
    15414547    {
    15424548      if (predMethod) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i];
    1543       else ALFp->coeffmulti[ind][i] = FilterCoeffQuant[ind][i];
    1544     }
    1545   }
    1546   m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
    1547  
     4549      else
     4550      {
     4551        ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i];
     4552      }
     4553    }
     4554    if(predMethod==0)
     4555    {
     4556      ALFp->nbSPred[ind] = nbFlagIntra[ind];
     4557    }
     4558  }
     4559  m_pcEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
    15484560 
    15494561  //  return(bit_ct);
     
    15514563}
    15524564
    1553 
    1554 Int TEncAdaptiveLoopFilter::xsendAllFiltersPPPredForce0(int **FilterCoeffQuant, int fl, int sqrFiltLength, int filters_per_group,
    1555                                                         int codedVarBins[NO_VAR_BINS], int createBistream, ALFParam* ALFp)
    1556 {
    1557   int ind, bit_ct=0, bit_ct0, i, j;
    1558   int filters_per_group_temp, filters_per_group_diff;
    1559   int chosenPred = 0;
    1560   int force0 = 1;
    1561   Int64 Newbit_ct;
    1562  
    1563   i = 0;
    1564   for(ind = 0; ind < filters_per_group; ind++)
    1565   {
    1566     if(codedVarBins[ind] == 1)
    1567     {
    1568       for(j = 0; j < sqrFiltLength; j++)
    1569         m_FilterCoeffQuantTemp[i][j]=FilterCoeffQuant[ind][j];
    1570       i++;
    1571     }
    1572   }
    1573   filters_per_group_diff = filters_per_group_temp = i;
    1574  
    1575   for(ind = 0; ind < filters_per_group; ++ind)
    1576   {
    1577     if(ind == 0)
    1578     {
    1579       for(i = 0; i < sqrFiltLength; i++)
    1580         m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i];
    1581     }
    1582     else
    1583     {
    1584       for(i = 0; i < sqrFiltLength; i++)
    1585         m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i] - m_FilterCoeffQuantTemp[ind-1][i];
    1586     }
    1587   }
    1588  
    1589   if(!((filters_per_group_temp == 0) && (filters_per_group == 1)))
    1590   {
    1591     bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, 0);
    1592    
    1593     if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_diff, 0) >= bit_ct0)
    1594     {
    1595       chosenPred = 0;
    1596       bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream);
    1597       bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, createBistream);
    1598     }
    1599     else
    1600     {
    1601       chosenPred = 1;
    1602       bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream);
    1603       bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_temp, createBistream);
    1604     }
    1605   }
    1606   ALFp->forceCoeff0 = 1;
    1607   ALFp->predMethod = chosenPred;
    1608   ALFp->filters_per_group_diff = filters_per_group_diff;
    1609   ALFp->filters_per_group = filters_per_group;
    1610   ALFp->num_coeff = sqrFiltLength;
    1611   if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM)
    1612     ALFp->realfiltNo=2;
    1613   else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM)
    1614     ALFp->realfiltNo=1;
    1615   else
    1616     ALFp->realfiltNo=0;
    1617  
    1618   for(ind = 0; ind < filters_per_group; ++ind)
    1619   {
    1620     ALFp->codedVarBins[ind] = codedVarBins[ind];
    1621   }
    1622   for(ind = 0; ind < filters_per_group_diff; ++ind)
    1623   {
    1624     for(i = 0; i < sqrFiltLength; i++)
    1625     {
    1626       if (chosenPred) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i];
    1627       else ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i];
    1628     }
    1629   }
    1630   m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
    1631  
    1632   return ((Int)Newbit_ct);
    1633 }
    1634 
    1635 //filtNo==-1/realfiltNo, noFilters=filters_per_frames, realfiltNo=filtNo
    1636 Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filtNo, int noFilters, int varIndTab[NO_VAR_BINS], int frNo, int createBitstream,int realfiltNo, ALFParam* ALFp)
    1637 {
    1638   int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0, codePrediction;
     4565Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filters_per_fr, int varIndTab[NO_VAR_BINS], int filter_shape, ALFParam* ALFp)
     4566{
     4567  int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0;
    16394568  Int64 NewbitCt;
    1640  
    1641   codePrediction = 0;
    1642  
     4569
    16434570  //send realfiltNo (tap related)
    1644   ALFp->realfiltNo = realfiltNo;
    1645   ALFp->filtNo = filtNo;
    1646  
    1647   if(filtNo >= 0)
    1648   {
    1649     // decide startSecondFilter and filterPattern
    1650     if(noFilters > 1)
    1651     {
    1652       memset(filterPattern, 0, NO_VAR_BINS * sizeof(int));
    1653       for(i = 1; i < NO_VAR_BINS; ++i)
    1654       {
    1655         if(varIndTab[i] != varIndTab[i-1])
    1656         {
    1657           filterPattern[i] = 1;
    1658           startSecondFilter = i;
    1659         }
    1660       }
    1661       memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int));
    1662       ALFp->startSecondFilter = startSecondFilter;
    1663     }
    1664    
    1665     //send noFilters (filters_per_frame)
    1666     //0: filters_per_frame = 1
    1667     //1: filters_per_frame = 2
    1668     //2: filters_per_frame > 2 (exact number from filterPattern)
    1669 
    1670     ALFp->noFilters = min(noFilters-1,2);
    1671     if (noFilters<=0) printf("error\n");
    1672   }
    1673   m_pcDummyEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt);
     4571  ALFp->filter_shape = filter_shape;
     4572
     4573  // decide startSecondFilter and filterPattern
     4574  memset(filterPattern, 0, NO_VAR_BINS * sizeof(int));
     4575  if(filters_per_fr > 1)
     4576  {
     4577    for(i = 1; i < NO_VAR_BINS; ++i)
     4578    {
     4579      if(varIndTab[i] != varIndTab[i-1])
     4580      {
     4581        filterPattern[i] = 1;
     4582        startSecondFilter = i;
     4583      }
     4584    }
     4585  }
     4586  memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int));
     4587  ALFp->startSecondFilter = startSecondFilter;
     4588
     4589  assert(filters_per_fr>0);
     4590  m_pcEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt);
     4591
    16744592  bitCt = (int) NewbitCt;
    16754593  return(bitCt);
     
    16804598{
    16814599  int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal, len = 0,
    1682   *pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
     4600    *pDepthInt=NULL, kMinTab[MAX_SCAN_VAL], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
    16834601  minKStart, minBitsKStart, bitsKStart;
    1684  
    1685   pDepthInt = pDepthIntTab[fl-2];
     4602#if ALF_SINGLE_FILTER_SHAPE
     4603  Int minScanVal = MIN_SCAN_POS_CROSS;
     4604#else 
     4605  int minScanVal = (fl==ALF_STAR5x5) ? 0 : MIN_SCAN_POS_CROSS;
     4606#endif
     4607  pDepthInt = pDepthIntTabShapes[fl];
    16864608 
    16874609  maxScanVal = 0;
     
    17124634    bitsKStart = 0;
    17134635    kStart = k;
    1714     for(scanPos = 0; scanPos < maxScanVal; scanPos++)
     4636    for(scanPos = minScanVal; scanPos < maxScanVal; scanPos++)
    17154637    {
    17164638      kMin = kStart;
     
    17334655 
    17344656  kStart = minKStart;
    1735   for(scanPos = 0; scanPos < maxScanVal; scanPos++)
     4657  for(scanPos = minScanVal; scanPos < maxScanVal; scanPos++)
    17364658  {
    17374659    kMin = kStart;
     
    17504672  // Coding parameters
    17514673  //  len += lengthFilterCodingParams(minKStart, maxScanVal, kMinTab, createBitstream);
     4674#if LCU_SYNTAX_ALF
     4675  if (filters_per_group == 1)
     4676  {
     4677    len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP,
     4678      kTableTabShapes[ALF_CROSS9x7_SQUARE3x3], createBitstream);
     4679  }
     4680  else
     4681  {
     4682#endif
    17524683  len += (3 + maxScanVal);
    17534684 
     
    17554686  len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP,
    17564687                            kMinTab, createBitstream);
    1757  
     4688#if LCU_SYNTAX_ALF
     4689  }
     4690#endif
     4691
    17584692  return len;
    17594693}
     
    17644698  int q = coeffVal / m;
    17654699  if(coeffVal != 0)
     4700  {
    17664701    return(q + 2 + k);
     4702  }
    17674703  else
     4704  {
    17684705    return(q + 1 + k);
     4706  }
    17694707}
    17704708
     
    17754713 
    17764714  if(force0)
     4715  {
    17774716    bit_cnt = 2 + filters_per_group;
     4717  }
    17784718  else
     4719  {
    17794720    bit_cnt = 2;
     4721  }
    17804722  return bit_cnt;
    17814723 
     
    17934735    {
    17944736      scanPos = pDepthInt[i] - 1;
     4737#if LCU_SYNTAX_ALF
     4738      Int k = (filters_per_group == 1) ? kMinTab[i] : kMinTab[scanPos];
     4739      bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), k);
     4740#else
    17954741      bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), kMinTab[scanPos]);
     4742#endif
    17964743    }
    17974744  }
     
    17994746}
    18004747
    1801 Void   TEncAdaptiveLoopFilter::xEncALFLuma_qc ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost )
     4748#if !LCU_SYNTAX_ALF
     4749
     4750Void   TEncAdaptiveLoopFilter::xEncALFLuma ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost )
    18024751{
    18034752  //pcPicDec: extended decoded
    18044753  //pcPicRest: original decoded: filtered signal will be stored
    1805  
     4754
    18064755  UInt64  uiRate;
    18074756  UInt64  uiDist;
    18084757  Double dCost;
    1809 #if !MQT_ALF_NPASS
    1810   Int    Height = pcPicOrg->getHeight();
    1811   Int    Width = pcPicOrg->getWidth();
    1812 #endif
    18134758  Int    LumaStride = pcPicOrg->getStride();
    1814   imgpel* pOrg = (imgpel*) pcPicOrg->getLumaAddr();
    1815   imgpel* pRest = (imgpel*) pcPicRest->getLumaAddr();
    1816   imgpel* pDec = (imgpel*) pcPicDec->getLumaAddr();
    1817 
    1818   Int tap               = ALF_MIN_NUM_TAP;
    1819   m_pcTempAlfParam->tap = tap;
    1820 #if TI_ALF_MAX_VSIZE_7
    1821   m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap);
    1822   m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap);
    1823 #else
    1824   m_pcTempAlfParam->num_coeff = (Int)tap*tap/4 + 2;
    1825 #endif
    1826 
    1827 #if MQT_BA_RA
    1828 
    1829 #if MQT_ALF_NPASS
    1830 
    1831   static Bool   bFirst = true;
    1832   static Int*   apiVarIndTabBest[NUM_ALF_CLASS_METHOD];
    1833   static Int**  appiBestCoeffSet[NUM_ALF_CLASS_METHOD];
    1834 
    1835   static Double***  adBestySym;
    1836   static Double**** adBestESym;
    1837   static Double**   adBestpixAcc; 
    1838 
    1839   if(bFirst)
    1840   {
    1841     if(m_iALFEncodePassReduction)
    1842     {
    1843       initMatrix4D_double(&adBestESym,NUM_ALF_CLASS_METHOD,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
    1844       initMatrix3D_double(&adBestySym,NUM_ALF_CLASS_METHOD,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    1845       initMatrix_double  (&adBestpixAcc,NUM_ALF_CLASS_METHOD,  NO_VAR_BINS );
    1846 
    1847       for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    1848       {
    1849 
    1850         apiVarIndTabBest[i] = new Int[NO_VAR_BINS];
    1851         appiBestCoeffSet[i] = new Int*[NO_VAR_BINS];
    1852         for(Int j=0; j< NO_VAR_BINS; j++)
    1853         {
    1854           appiBestCoeffSet[i][j]= new Int[MAX_SQR_FILT_LENGTH];
    1855         }
    1856       }
    1857     }
    1858 
    1859     bFirst = false;
    1860   }
    1861 
    1862   Int         ibestfiltNo[NUM_ALF_CLASS_METHOD];
    1863   Int         ibestfilters_per_fr[NUM_ALF_CLASS_METHOD];
    1864   Int64       iDist;
    1865   Int64       iMinMethodDist = MAX_INT;
    1866   UInt64      uiMinMethodRate;
    1867   Double      dMinMethodCost = MAX_DOUBLE;
    1868 #endif
     4759  Pel* pOrg  = pcPicOrg->getLumaAddr();
     4760  Pel* pRest = pcPicRest->getLumaAddr();
     4761  Pel* pDec  = pcPicDec->getLumaAddr();
     4762
     4763  Double    dMinMethodCost  = MAX_DOUBLE;
     4764  UInt64    uiMinMethodDist = MAX_UINT;
     4765  UInt64    uiMinMethodRate = MAX_UINT;
    18694766  Int       iBestClassMethod = ALF_RA;
    18704767  Double    adExtraCostReduction[NUM_ALF_CLASS_METHOD];
     
    18794776    pcAlfParam->alf_flag        = 1;
    18804777    pcAlfParam->chroma_idc      = 0;
    1881     pcAlfParam->cu_control_flag = 0;
    1882     pcAlfParam->tap = tap;
    1883 #if TI_ALF_MAX_VSIZE_7
    1884     pcAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);
    1885     pcAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);
    1886 #else
    1887     pcAlfParam->num_coeff = (Int)tap*tap/4 + 2;
    1888 #endif
    18894778
    18904779    switch(i)
     
    18924781    case ALF_RA:
    18934782      {
    1894         adExtraCostReduction[i] = (double)(m_im_height * m_im_width) * m_dLambdaLuma * 2.0 / 4096.0;
     4783        adExtraCostReduction[i] = (double)(m_img_height * m_img_width) * m_dLambdaLuma * 2.0 / 4096.0;
     4784      }
     4785      break;
     4786    case ALF_BA:
     4787      {
     4788        adExtraCostReduction[i] = 0.0;
    18954789      }
    18964790      break;
    18974791    default:
    18984792      {
    1899         adExtraCostReduction[i] = 0.0;
    1900       }
    1901       break;
    1902     }
    1903 
    1904   }
     4793        printf("Not a support adaptation method\n");
     4794        assert(0);
     4795        exit(-1);
     4796      }
     4797    }
     4798  }
     4799
    19054800
    19064801  for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    19074802  {
    1908     pcAlfParam       = &(cFrmAlfParam[i]);
    1909     m_varImg         = m_varImgMethods[i];
    1910     m_uiVarGenMethod = pcAlfParam->alf_pcr_region_flag = i;
    1911 #if MQT_ALF_NPASS
    1912     if(m_iALFEncodePassReduction)
    1913     {
    1914       m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[m_uiVarGenMethod];
    1915     }
     4803    m_uiVarGenMethod = i;
     4804
     4805    pcAlfParam       = &(cFrmAlfParam[m_uiVarGenMethod]);
     4806    m_varImg         = m_varImgMethods[m_uiVarGenMethod];
     4807
     4808    pcAlfParam->alf_pcr_region_flag = m_uiVarGenMethod;
     4809
    19164810    setInitialMask(pcPicOrg, pcPicDec);
    1917 #else
    1918     for (Int i=0; i<Height; i++)
    1919     {
    1920       for (Int j=0; j<Width; j++)
    1921       {
    1922         m_maskImg[i][j] = 1;
    1923       }
    1924     }
    1925 #if MTK_NONCROSS_INLOOP_FILTER
    1926     if(!m_bUseNonCrossALF)
    1927       calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
     4811
     4812    if(m_iALFEncodePassReduction == 0)
     4813    {
     4814      static Int best_filter_shape = 0;
     4815      if (m_uiVarGenMethod == 0)
     4816      {
     4817        UInt64 MinRate_Shape0 = MAX_INT;
     4818        UInt64 MinDist_Shape0 = MAX_INT;
     4819        Double MinCost_Shape0 = MAX_DOUBLE;
     4820
     4821        UInt64 MinRate_Shape1 = MAX_INT;
     4822        UInt64 MinDist_Shape1 = MAX_INT;
     4823        Double MinCost_Shape1 = MAX_DOUBLE;
     4824
     4825#if ALF_SINGLE_FILTER_SHAPE
     4826        Int filter_shape = 0;
     4827#else       
     4828        for (Int filter_shape = 0; filter_shape < 2 ;filter_shape ++)
     4829#endif
     4830        {
     4831          pcAlfParam->filter_shape = filter_shape;
     4832          pcAlfParam->num_coeff = m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filter_shape];
     4833          xFirstFilteringFrameLuma(pOrg, pDec, m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->filter_shape, LumaStride);
     4834          xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
     4835          if (filter_shape == 0)
     4836          {
     4837            // copy Shape0
     4838            MinRate_Shape0 = uiRate;
     4839            MinDist_Shape0 = uiDist;
     4840            MinCost_Shape0 = dCost;
     4841            m_pcPicYuvTmp->copyToPicLuma(pcPicYuvRecShape0);
     4842            copyALFParam(pcAlfParamShape0, pcAlfParam);
     4843          }
     4844          else //if (filter_shape == 1)
     4845          {
     4846            // copy Shape1
     4847            MinRate_Shape1 = uiRate;
     4848            MinDist_Shape1 = uiDist;
     4849            MinCost_Shape1  = dCost;
     4850            m_pcPicYuvTmp->copyToPicLuma(pcPicYuvRecShape1);
     4851            copyALFParam(pcAlfParamShape1, pcAlfParam);
     4852          }
     4853        }
     4854
     4855        if (MinCost_Shape0 <= MinCost_Shape1)
     4856        {
     4857          pcPicYuvRecShape0->copyToPicLuma(m_pcPicYuvTmp);
     4858          copyALFParam(pcAlfParam, pcAlfParamShape0);
     4859          uiRate = MinRate_Shape0;
     4860          uiDist = MinDist_Shape0;
     4861          dCost = MinCost_Shape0;
     4862          best_filter_shape = 0;
     4863        }
     4864        else //if (MinCost_Shape1 < MinCost_Shape0)
     4865        {
     4866          pcPicYuvRecShape1->copyToPicLuma(m_pcPicYuvTmp);
     4867          copyALFParam(pcAlfParam, pcAlfParamShape1);
     4868          uiRate = MinRate_Shape1;
     4869          uiDist = MinDist_Shape1;
     4870          dCost = MinCost_Shape1;
     4871          best_filter_shape = 1;
     4872        }
     4873      }
     4874      else
     4875      {
     4876        pcAlfParam->filter_shape = best_filter_shape;
     4877        pcAlfParam->num_coeff = m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[best_filter_shape];
     4878        xFirstFilteringFrameLuma(pOrg, pDec, m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, best_filter_shape, LumaStride);
     4879        xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
     4880      }
     4881    }
    19284882    else
    1929       calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
    1930 #else
    1931     calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
    1932 #endif
    1933 #endif
    1934 
    1935 #if MQT_ALF_NPASS
    1936     if(m_iALFEncodePassReduction)
    1937     {
    1938       xFirstEstimateFilteringFrameLumaAllTap(pOrg, pDec, LumaStride,
    1939         pcAlfParam, apiVarIndTabBest[i], appiBestCoeffSet[i],
    1940         ibestfiltNo[i], ibestfilters_per_fr[i],
    1941         adBestySym[i], adBestESym[i], adBestpixAcc[i],
    1942         uiRate, iDist, dCost);
    1943 
    1944     }
    1945     else
    1946     {
    1947 #endif
    1948       xFirstFilteringFrameLuma(pOrg, pDec, (imgpel*)m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->tap, LumaStride);
    1949 #if MQT_ALF_NPASS
    1950     }
    1951 #endif
    1952 
    1953 #if MQT_ALF_NPASS
    1954     if(!m_iALFEncodePassReduction)
    1955     {
    1956 #endif
    1957       xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
    1958 #if MQT_ALF_NPASS
    1959       iDist = (Int64)uiDist;
    1960     }
    1961 #endif
    1962 
    1963     dCost -= adExtraCostReduction[i];
     4883    {
     4884      decideFilterShapeLuma(pOrg, pDec, LumaStride, pcAlfParam, uiRate, uiDist, dCost);
     4885    }
     4886
     4887    dCost -= adExtraCostReduction[m_uiVarGenMethod];
    19644888
    19654889    if(dCost < dMinMethodCost)
    19664890    {
    1967       iBestClassMethod = i;
     4891      iBestClassMethod = m_uiVarGenMethod;
    19684892      dMinMethodCost = dCost;
    19694893      uiMinMethodRate= uiRate;
    1970       iMinMethodDist = iDist;
    1971 #if MQT_ALF_NPASS
    1972       if(!m_iALFEncodePassReduction)
    1973       {
    1974 #endif
     4894      uiMinMethodDist = uiDist;
     4895
     4896      if(m_iALFEncodePassReduction == 0)
     4897      {
    19754898        m_pcPicYuvTmp->copyToPicLuma(pcPicRest);
    1976 #if MQT_ALF_NPASS
    1977       }
    1978 #endif
    1979 
     4899      }
    19804900    } 
    1981 
    1982   }
    1983 
    1984   dMinMethodCost += adExtraCostReduction[iBestClassMethod];
    1985 
    1986 
    1987   m_varImg= m_varImgMethods[iBestClassMethod];
     4901  }
    19884902
    19894903  m_uiVarGenMethod = iBestClassMethod;
    1990 
    1991 #if MQT_ALF_NPASS
    1992   if(m_iALFEncodePassReduction)
    1993   {
    1994 
    1995     m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[iBestClassMethod];
    1996 
    1997     setInitialMask(pcPicOrg, pcPicDec);
    1998 
    1999     m_pcBestAlfParam->alf_flag = 1;
    2000     m_pcBestAlfParam->cu_control_flag = 0;
    2001     m_pcBestAlfParam->chroma_idc = 0;
    2002     m_pcBestAlfParam->alf_pcr_region_flag = iBestClassMethod;
    2003 
    2004     m_pcBestAlfParam->tap = cFrmAlfParam[iBestClassMethod].tap;
    2005 #if TI_ALF_MAX_VSIZE_7
    2006     m_pcBestAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);
    2007     m_pcBestAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);
    2008 #else
    2009     m_pcBestAlfParam->num_coeff = (Int)tap*tap/4 + 2;
    2010 #endif
    2011 
    2012     xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcBestAlfParam, m_pcBestAlfParam->tap, LumaStride);
    2013 
    2014 
    2015 
    2016     xCalcRDCost(pcPicOrg, pcPicRest, m_pcBestAlfParam, uiMinMethodRate, uiDist, dMinMethodCost);
    2017     iMinMethodDist = (Int64)uiDist;
     4904  dMinMethodCost += adExtraCostReduction[m_uiVarGenMethod];
     4905  m_varImg= m_varImgMethods[m_uiVarGenMethod];
     4906
     4907  pcAlfParam = &(cFrmAlfParam[m_uiVarGenMethod]);
     4908
     4909  ALFParam  cAlfParamWithBestMethod;
     4910  allocALFParam(&cAlfParamWithBestMethod); 
     4911
     4912
     4913  if(m_iALFEncodePassReduction ==0)
     4914  {
     4915    copyALFParam(&cAlfParamWithBestMethod, pcAlfParam);
    20184916  }
    20194917  else
    20204918  {
    2021 #endif
    2022     copyALFParam(m_pcBestAlfParam, &cFrmAlfParam[iBestClassMethod]);
    2023 #if MQT_ALF_NPASS
    2024 
    2025   }
    2026 #endif
    2027 
    2028   ruiMinRate = uiMinMethodRate;
    2029   ruiMinDist = (UInt64)iMinMethodDist;
    2030   rdMinCost =  dMinMethodCost;
    2031 
    2032 
     4919    cAlfParamWithBestMethod.alf_flag = 1;
     4920    cAlfParamWithBestMethod.chroma_idc = 0;
     4921    cAlfParamWithBestMethod.alf_pcr_region_flag = m_uiVarGenMethod;
     4922    cAlfParamWithBestMethod.filter_shape= pcAlfParam->filter_shape;
     4923    cAlfParamWithBestMethod.num_coeff = m_sqrFiltLengthTab[cAlfParamWithBestMethod.filter_shape];
     4924    decodeFilterSet(pcAlfParam, m_varIndTab, m_filterCoeffSym);
     4925    if(!m_bUseNonCrossALF)
     4926    {
     4927      filterLuma(pRest, pDec, LumaStride, 0, m_img_height-1, 0, m_img_width-1,  pcAlfParam->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg);
     4928    }
     4929    else
     4930    {
     4931      xfilterSlicesEncoder(pDec, pRest, LumaStride, pcAlfParam->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg);
     4932    }
     4933    xcodeFiltCoeff(m_filterCoeffSym, pcAlfParam->filter_shape, m_varIndTab, pcAlfParam->filters_per_group,&cAlfParamWithBestMethod);
     4934
     4935    xCalcRDCost(pcPicOrg, pcPicRest, &cAlfParamWithBestMethod, uiMinMethodRate, uiMinMethodDist, dMinMethodCost);
     4936
     4937  }
     4938
     4939  if(dMinMethodCost < rdMinCost )
     4940  {
     4941    ruiMinRate = uiMinMethodRate;
     4942    ruiMinDist = uiMinMethodDist;
     4943    rdMinCost =  dMinMethodCost;
     4944    copyALFParam(m_pcBestAlfParam, &cAlfParamWithBestMethod);
     4945  }
     4946
     4947  freeALFParam(&cAlfParamWithBestMethod);
    20334948  for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    20344949  {
    20354950    freeALFParam(&cFrmAlfParam[i]);
    20364951  }
    2037 
    2038 #else 
    2039 
    2040 #if MQT_ALF_NPASS
    2041   setInitialMask(pcPicOrg, pcPicDec);
    2042 #else
    2043   for (Int i=0; i<Height; i++)
    2044   {
    2045     for (Int j=0; j<Width; j++)
    2046     {
    2047       m_maskImg[i][j] = 1;
    2048     }
    2049   }
    2050 #if MTK_NONCROSS_INLOOP_FILTER
     4952}
     4953
     4954
     4955
     4956Void   TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(Pel* imgOrg, Pel* imgDec, Pel* imgRest, ALFParam* ALFp, Int filtNo, Int stride)
     4957{
    20514958  if(!m_bUseNonCrossALF)
    2052     calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
     4959  {
     4960    xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, imgOrg, imgDec, filtNo, stride);
     4961  }
    20534962  else
    2054     calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
    2055 #else
    2056   calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
    2057 #endif
    2058 #endif
    2059 
    2060 #if MQT_ALF_NPASS
    2061   if(m_iALFEncodePassReduction)
    2062   {
    2063     xFirstFilteringFrameLumaAllTap(pOrg, pDec, pRest, LumaStride);
    2064   }
    2065   else
    2066 #endif
    2067     xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, m_pcTempAlfParam->tap, LumaStride);
    2068 
    2069   xCalcRDCost(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost); // change this function final coding
    2070  
    2071   if( dCost < rdMinCost)
    2072   {
    2073     ruiMinRate = uiRate;
    2074     ruiMinDist = uiDist;
    2075     rdMinCost = dCost;
    2076     copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    2077   }
    2078 
    2079 #endif
    2080 }
    2081 
    2082 Void   TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, ALFParam* ALFp, Int tap, Int Stride)
    2083 {
    2084 #if MTK_NONCROSS_INLOOP_FILTER
    2085   if(!m_bUseNonCrossALF)
    2086     xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, tap, Stride);
    2087   else
    2088     xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, tap, Stride);
    2089 #else
    2090   xstoreInBlockMatrix(ImgOrg, ImgDec, tap, Stride);
    2091 #endif
    2092 
    2093 
    2094   xFilteringFrameLuma_qc(ImgOrg, ImgDec, ImgRest, ALFp, tap, Stride);
    2095 }
    2096 
    2097 
    2098 #if MTK_NONCROSS_INLOOP_FILTER
    2099 Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride)
    2100 #else
    2101 Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrix(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride)
    2102 #endif
    2103 {
    2104 #if MQT_BA_RA
    2105   Int var_step_size_w = VAR_SIZE_W;
    2106   Int var_step_size_h = VAR_SIZE_H;
    2107 #endif
    2108 
    2109   Int i,j,k,l,varInd,ii,jj;
    2110   Int x, y;
    2111   Int fl =tap/2;
    2112 #if TI_ALF_MAX_VSIZE_7
    2113   Int flV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl);
    2114   Int sqrFiltLength = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(tap);
    2115 #else
    2116   Int sqrFiltLength=(((tap*tap)/4 + 1) + 1);
    2117 #endif
    2118   Int fl2=9/2; //extended size at each side of the frame
     4963  {
     4964    xstoreInBlockMatrixforSlices(imgOrg, imgDec, filtNo, stride);
     4965  }
     4966
     4967
     4968  xFilteringFrameLuma(imgOrg, imgDec, imgRest, ALFp, filtNo, stride);
     4969}
     4970
     4971Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, Pel* pImgOrg, Pel* pImgPad, Int filtNo, Int stride)
     4972{
     4973
     4974  Pel  regionOfInterested = (m_iDesignCurrentFilter ==1)?(1):(0);
     4975  Int     sqrFiltLength      = (filtNo == 2)?((Int)(MAX_SQR_FILT_LENGTH)):(m_sqrFiltLengthTab[filtNo]);
     4976  Int     yposEnd            = ypos + iheight -1;
     4977  Int     xposEnd            = xpos + iwidth  -1;
     4978  Double ***EShape           = m_EGlobalSym[filtNo];
     4979  Double **yShape            = m_yGlobalSym[filtNo];
     4980
    21194981  Int ELocal[MAX_SQR_FILT_LENGTH];
    2120   Int yLocal;
    2121   Int *p_pattern;
    2122   Int filtNo =2;
     4982  Pel *pImgPad1, *pImgPad2, *pImgPad3, *pImgPad4;
     4983  Int i,j,k,l,varInd, yLocal;
    21234984  double **E,*yy;
    2124 #if MTK_NONCROSS_INLOOP_FILTER
    2125   static Int count_valid;
    2126 #else
    2127   Int count_valid=0;
    2128 #endif
    2129   if (tap==9)
    2130     filtNo =0;
    2131   else if (tap==7)
    2132     filtNo =1;
    2133  
    2134   p_pattern= m_patternTab[filtNo];
    2135  
    2136 #if MTK_NONCROSS_INLOOP_FILTER
     4985
     4986  static Int numValidPels;
    21374987  if(bResetBlockMatrix)
    21384988  {
    2139     count_valid = 0;
    2140 #endif
    2141   memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS);
    2142   for (varInd=0; varInd<NO_VAR_BINS; varInd++)
    2143   {
    2144     memset(m_yGlobalSym[filtNo][varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
    2145     for (k=0; k<sqrFiltLength; k++)
    2146     {
    2147       memset(m_EGlobalSym[filtNo][varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
    2148     }
    2149   }
    2150   for (i = fl2; i < m_im_height+fl2; i++)
    2151   {
    2152     for (j = fl2; j < m_im_width+fl2; j++)
    2153     {
    2154       if (m_maskImg[i-fl2][j-fl2] == 1)
    2155       {
    2156         count_valid++;
    2157       }
    2158     }
    2159   }
    2160 #if MTK_NONCROSS_INLOOP_FILTER
    2161   }
    2162 #endif
    2163 
    2164   {
    2165 #if MTK_NONCROSS_INLOOP_FILTER
    2166     x = y = fl2; //cytsai: shall x, y  be removed ?
    2167 
    2168     for (i= ypos; i< ypos + iheight; i++)
    2169     {
    2170       for (j= xpos; j< xpos + iwidth; j++)
    2171       {
    2172 #else
    2173     for (i=0,y=fl2; i<m_im_height; i++,y++)
    2174     {
    2175       for (j=0,x=fl2; j<m_im_width; j++,x++)
    2176       {
    2177 #endif
    2178 #if MQT_ALF_NPASS
    2179         Int condition = (m_maskImg[i][j] == 1);
    2180         if (m_iDesignCurrentFilter)
    2181         {
    2182           condition = (m_maskImg[i][j] == 0 && count_valid > 0);
     4989    numValidPels = 0;
     4990    memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS);
     4991    for (varInd=0; varInd<NO_VAR_BINS; varInd++)
     4992    {
     4993      memset(yShape[varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
     4994      for (k=0; k<sqrFiltLength; k++)
     4995      {
     4996        memset(EShape[varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
     4997      }
     4998    }
     4999    for (i = 0; i < m_img_height; i++)
     5000    {
     5001      for (j = 0; j < m_img_width; j++)
     5002      {
     5003        if (m_maskImg[i][j] == regionOfInterested)
     5004        {
     5005          numValidPels++;
    21835006        }
    2184         if(!condition)
    2185         {
    2186 #else
    2187         if (m_maskImg[i][j] == 0 && count_valid > 0)
    2188         {
    2189 
     5007      }
     5008    }
     5009  }
     5010
     5011  Int yLineInLCU;
     5012  Int paddingLine ;
     5013
     5014  pImgPad += (ypos* stride);
     5015  pImgOrg += (ypos* stride);
     5016
     5017  switch(filtNo)
     5018  {
     5019#if !ALF_SINGLE_FILTER_SHAPE
     5020  case ALF_STAR5x5:
     5021    {
     5022      for (i= ypos; i<= yposEnd; i++)
     5023      {
     5024        yLineInLCU = i % m_lcuHeight;
     5025
     5026        if (yLineInLCU < m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height )
     5027        {
     5028          pImgPad1 = pImgPad +   stride;
     5029          pImgPad2 = pImgPad -   stride;
     5030          pImgPad3 = pImgPad + 2*stride;
     5031          pImgPad4 = pImgPad - 2*stride;
     5032        }
     5033        else if (yLineInLCU < m_lineIdxPadTop)
     5034        {
     5035          paddingLine = - yLineInLCU + m_lineIdxPadTop - 1;
     5036          pImgPad1 = pImgPad + min(paddingLine, 1)*stride;
     5037          pImgPad2 = pImgPad -   stride;
     5038          pImgPad3 = pImgPad + min(paddingLine, 2)*stride;
     5039          pImgPad4 = pImgPad - 2*stride;
    21905040        }
    21915041        else
    21925042        {
    2193 #endif
    2194 #if MQT_BA_RA
    2195           varInd = m_varImg[i/var_step_size_h][j/var_step_size_w];
    2196 #else
    2197           varInd=min(m_varImg[i][j], NO_VAR_BINS-1);
    2198 #endif
    2199           k=0;
    2200           memset(ELocal, 0, sqrFiltLength*sizeof(int));
    2201 #if TI_ALF_MAX_VSIZE_7
    2202           for (ii = -flV; ii < 0; ii++)
    2203 #else
    2204           for (ii=-fl; ii<0; ii++)
    2205 #endif
     5043          paddingLine = yLineInLCU - m_lineIdxPadTop;
     5044          pImgPad1 = pImgPad +   stride;
     5045          pImgPad2 = pImgPad - min(paddingLine, 1)*stride;
     5046          pImgPad3 = pImgPad + 2*stride;
     5047          pImgPad4 = pImgPad - min(paddingLine, 2)*stride;
     5048        }
     5049
     5050        if ( (yLineInLCU == m_lineIdxPadTop || yLineInLCU == m_lineIdxPadTop-1) && i-yLineInLCU+m_lcuHeight < m_img_height )
     5051        {
     5052          pImgPad+= stride;
     5053          pImgOrg+= stride;
     5054          continue;
     5055        }
     5056        else
     5057        {
     5058        for (j= xpos; j<= xposEnd; j++)
     5059        {
     5060          if ( (m_maskImg[i][j] == regionOfInterested) || (numValidPels == 0) )
    22065061          {
    2207             for (jj=-fl-ii; jj<=fl+ii; jj++)
    2208             { 
    2209               ELocal[p_pattern[k++]]+=(ImgDec[(i+ii)*Stride + (j+jj)]+ImgDec[(i-ii)*Stride + (j-jj)]);
     5062            varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W];
     5063            memset(ELocal, 0, 10*sizeof(Int));
     5064
     5065            ELocal[0] = (pImgPad3[j+2] + pImgPad4[j-2]);
     5066            ELocal[1] = (pImgPad3[j  ] + pImgPad4[j  ]);
     5067            ELocal[2] = (pImgPad3[j-2] + pImgPad4[j+2]);
     5068
     5069            ELocal[3] = (pImgPad1[j+1] + pImgPad2[j-1]);
     5070            ELocal[4] = (pImgPad1[j  ] + pImgPad2[j  ]);
     5071            ELocal[5] = (pImgPad1[j-1] + pImgPad2[j+1]);
     5072
     5073            ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     5074            ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     5075            ELocal[8] = (pImgPad[j  ]);
     5076
     5077            yLocal= pImgOrg[j];
     5078            m_pixAcc[varInd]+=(yLocal*yLocal);
     5079            E= EShape[varInd]; 
     5080            yy= yShape[varInd];
     5081
     5082            for (k=0; k<10; k++)
     5083            {
     5084              for (l=k; l<10; l++)
     5085              {
     5086                E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     5087              }
     5088              yy[k]+=(double)(ELocal[k]*yLocal);
    22105089            }
    2211           }
    2212           for (jj=-fl; jj<0; jj++)
    2213             ELocal[p_pattern[k++]]+=(ImgDec[(i)*Stride + (j+jj)]+ImgDec[(i)*Stride + (j-jj)]);
    2214           ELocal[p_pattern[k++]]+=ImgDec[(i)*Stride + (j)];
    2215           ELocal[sqrFiltLength-1]=1;
    2216           yLocal=ImgOrg[(i)*Stride + (j)];
    2217 
    2218           m_pixAcc[varInd]+=(yLocal*yLocal);
    2219           E= m_EGlobalSym[filtNo][varInd];
    2220           yy= m_yGlobalSym[filtNo][varInd];
    2221 
    2222           for (k=0; k<sqrFiltLength; k++)
    2223           {
    2224             for (l=k; l<sqrFiltLength; l++)
    2225               E[k][l]+=(double)(ELocal[k]*ELocal[l]);
    2226             yy[k]+=(double)(ELocal[k]*yLocal);
     5090
    22275091          }
    22285092        }
    2229       }
    2230     }
    2231   }
    2232 
    2233 #if MTK_NONCROSS_INLOOP_FILTER
     5093        pImgPad+= stride;
     5094        pImgOrg+= stride;
     5095        }
     5096      }
     5097    }
     5098    break;
     5099  case ALF_CROSS9x9:
     5100    {
     5101      Pel *pImgPad5, *pImgPad6, *pImgPad7, *pImgPad8;
     5102#else
     5103  case ALF_CROSS9x7_SQUARE3x3:
     5104    {
     5105      Pel *pImgPad5, *pImgPad6;
     5106#endif
     5107      for (i= ypos; i<= yposEnd; i++)
     5108      {
     5109        yLineInLCU = i % m_lcuHeight;
     5110
     5111        if (yLineInLCU<m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height)
     5112        {
     5113          pImgPad1 = pImgPad +   stride;
     5114          pImgPad2 = pImgPad -   stride;
     5115          pImgPad3 = pImgPad + 2*stride;
     5116          pImgPad4 = pImgPad - 2*stride;
     5117          pImgPad5 = pImgPad + 3*stride;
     5118          pImgPad6 = pImgPad - 3*stride;
     5119#if !ALF_SINGLE_FILTER_SHAPE
     5120          pImgPad7 = pImgPad + 4*stride;
     5121          pImgPad8 = pImgPad - 4*stride;
     5122#endif
     5123        }
     5124        else if (yLineInLCU<m_lineIdxPadTop)
     5125        {
     5126          paddingLine = - yLineInLCU + m_lineIdxPadTop - 1;
     5127          pImgPad1 = (paddingLine < 1) ? pImgPad : pImgPad + min(paddingLine, 1)*stride;
     5128          pImgPad2 = (paddingLine < 1) ? pImgPad : pImgPad -   stride;
     5129          pImgPad3 = (paddingLine < 2) ? pImgPad : pImgPad + min(paddingLine, 2)*stride;
     5130          pImgPad4 = (paddingLine < 2) ? pImgPad : pImgPad - 2*stride;
     5131          pImgPad5 = (paddingLine < 3) ? pImgPad : pImgPad + min(paddingLine, 3)*stride;
     5132          pImgPad6 = (paddingLine < 3) ? pImgPad : pImgPad - 3*stride;
     5133#if !ALF_SINGLE_FILTER_SHAPE
     5134          pImgPad7 = (paddingLine < 4) ? pImgPad : pImgPad + min(paddingLine, 4)*stride;
     5135          pImgPad8 = (paddingLine < 4) ? pImgPad : pImgPad - 4*stride;
     5136#endif
     5137        }
     5138        else
     5139        {
     5140          paddingLine = yLineInLCU - m_lineIdxPadTop;
     5141          pImgPad1 = (paddingLine < 1) ? pImgPad : pImgPad +   stride;
     5142          pImgPad2 = (paddingLine < 1) ? pImgPad : pImgPad - min(paddingLine, 1)*stride;
     5143          pImgPad3 = (paddingLine < 2) ? pImgPad : pImgPad + 2*stride;
     5144          pImgPad4 = (paddingLine < 2) ? pImgPad : pImgPad - min(paddingLine, 2)*stride;
     5145          pImgPad5 = (paddingLine < 3) ? pImgPad : pImgPad + 3*stride;
     5146          pImgPad6 = (paddingLine < 3) ? pImgPad : pImgPad - min(paddingLine, 3)*stride;
     5147#if !ALF_SINGLE_FILTER_SHAPE
     5148          pImgPad7 = (paddingLine < 4) ? pImgPad : pImgPad + 4*stride;
     5149          pImgPad8 = (paddingLine < 4) ? pImgPad : pImgPad - min(paddingLine, 4)*stride;
     5150#endif
     5151        }         
     5152
     5153        for (j= xpos; j<= xposEnd; j++)
     5154        {
     5155          if ( (m_maskImg[i][j] == regionOfInterested) || (numValidPels == 0) )
     5156          {
     5157            varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W];
     5158
     5159#if ALF_SINGLE_FILTER_SHAPE
     5160            memset(ELocal, 0, (sqrFiltLength+1)*sizeof(Int));
     5161
     5162            ELocal[0] = (pImgPad5[j]+pImgPad6[j]);
     5163            ELocal[1] = (pImgPad3[j]+pImgPad4[j]);
     5164            ELocal[2] = (pImgPad1[j-1]+pImgPad2[j+1]);
     5165            ELocal[3] = (pImgPad1[j]+pImgPad2[j]);
     5166            ELocal[4] = (pImgPad1[j+1]+pImgPad2[j-1]);
     5167            ELocal[5] = (pImgPad[j+4]+pImgPad[j-4]);
     5168            ELocal[6] = (pImgPad[j+3]+pImgPad[j-3]);
     5169            ELocal[7] = (pImgPad[j+2]+pImgPad[j-2]);
     5170            ELocal[8] = (pImgPad[j+1]+pImgPad[j-1]);
     5171            ELocal[9] = (pImgPad[j  ]);
     5172#else
     5173            memset(ELocal, 0, 10*sizeof(Int));
     5174
     5175            ELocal[0] = (pImgPad7[j] + pImgPad8[j]);
     5176
     5177            ELocal[1] = (pImgPad5[j] + pImgPad6[j]);
     5178
     5179            ELocal[2] = (pImgPad3[j] + pImgPad4[j]);
     5180
     5181            ELocal[3] = (pImgPad1[j] + pImgPad2[j]);
     5182
     5183            ELocal[4] = (pImgPad[j+4] + pImgPad[j-4]);
     5184            ELocal[5] = (pImgPad[j+3] + pImgPad[j-3]);
     5185            ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     5186            ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     5187            ELocal[8] = (pImgPad[j  ] );
     5188#endif
     5189            yLocal= pImgOrg[j];
     5190            m_pixAcc[varInd]+=(yLocal*yLocal);
     5191            E= EShape[varInd];
     5192            yy= yShape[varInd];
     5193
     5194#if ALF_SINGLE_FILTER_SHAPE
     5195            for (k=0; k<(sqrFiltLength+1); k++)
     5196            {
     5197              for (l=k; l<(sqrFiltLength+1); l++)
     5198              {
     5199                E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     5200              }
     5201              yy[k]+=(double)(ELocal[k]*yLocal);
     5202            }
     5203#else
     5204            for (k=0; k<10; k++)
     5205            {
     5206              for (l=k; l<10; l++)
     5207              {
     5208                E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     5209              }
     5210              yy[k]+=(double)(ELocal[k]*yLocal);
     5211            }
     5212#endif
     5213          }
     5214        }
     5215        pImgPad+= stride;
     5216        pImgOrg+= stride;
     5217      }
     5218
     5219    }
     5220    break;
     5221  default:
     5222    {
     5223      printf("Not a supported filter shape\n");
     5224      assert(0);
     5225      exit(1);
     5226    }
     5227  }
     5228
    22345229  if(bSymmCopyBlockMatrix)
    22355230  {
    2236 #endif
    2237 
    2238   // Matrix EGlobalSeq is symmetric, only part of it is calculated
    2239   for (varInd=0; varInd<NO_VAR_BINS; varInd++)
    2240   {
    2241     double **pE = m_EGlobalSym[filtNo][varInd];
    2242     for (k=1; k<sqrFiltLength; k++)
    2243     {
    2244       for (l=0; l<k; l++)
    2245       {
    2246         pE[k][l]=pE[l][k];
    2247       }
    2248     }
    2249   }
    2250 #if MTK_NONCROSS_INLOOP_FILTER
    2251   }
    2252 #endif
    2253 
    2254 }
    2255 
    2256 Void   TEncAdaptiveLoopFilter::xFilteringFrameLuma_qc(imgpel* ImgOrg, imgpel* imgY_pad, imgpel* ImgFilt, ALFParam* ALFp, Int tap, Int Stride)
    2257 {
    2258   int  filtNo,filters_per_fr;
     5231    for (varInd=0; varInd<NO_VAR_BINS; varInd++)
     5232    {
     5233      double **pE = EShape[varInd];
     5234      for (k=1; k<sqrFiltLength; k++)
     5235      {
     5236        for (l=0; l<k; l++)
     5237        {
     5238          pE[k][l]=pE[l][k];
     5239        }
     5240      }
     5241    }
     5242  }
     5243}
     5244
     5245
     5246Void   TEncAdaptiveLoopFilter::xFilteringFrameLuma(Pel* imgOrg, Pel* imgPad, Pel* imgFilt, ALFParam* ALFp, Int filtNo, Int stride)
     5247{
    22595248  static double **ySym, ***ESym;
    2260   int lambda_val = (Int) m_dLambdaLuma;
    2261   lambda_val = lambda_val * (1<<(2*g_uiBitIncrement));
    2262   if (tap==9)
    2263     filtNo =0;
    2264   else if (tap==7)
    2265     filtNo =1;
    2266   else
    2267     filtNo=2;
    2268  
     5249  Int  filters_per_fr;
     5250  Int lambdaVal = (Int) m_dLambdaLuma;
     5251  lambdaVal = lambdaVal * (1<<(2*g_uiBitIncrement));
     5252
    22695253  ESym=m_EGlobalSym[filtNo]; 
    22705254  ySym=m_yGlobalSym[filtNo];
    2271  
    2272   xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,
    2273                          m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
    2274  
    2275   // g_filterCoeffPrevSelected = g_filterCoeffSym
    2276   xcalcPredFilterCoeff(filtNo);
    2277  
    2278   //filter the frame with g_filterCoeffPrevSelected
    2279 #if MTK_NONCROSS_INLOOP_FILTER
     5255
     5256  xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambdaVal);
     5257
    22805258  if(!m_bUseNonCrossALF)
    2281     xfilterFrame_en(0, 0, m_im_height, m_im_width, imgY_pad, ImgFilt, filtNo, Stride);
     5259  {
     5260    filterLuma(imgFilt, imgPad, stride, 0, m_img_height-1, 0, m_img_width-1,  ALFp->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg);
     5261  }
    22825262  else
    2283     xfilterSlices_en(imgY_pad, ImgFilt, filtNo, Stride);
    2284 #else
    2285   xfilterFrame_en(imgY_pad, ImgFilt, filtNo, Stride);
    2286 #endif
    2287 
    2288   xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,0, ALFp);
    2289 }
    2290 
    2291 #if MTK_NONCROSS_INLOOP_FILTER
    2292 Void TEncAdaptiveLoopFilter::xfilterFrame_en(int ypos, int xpos, int iheight, int iwidth, imgpel* ImgDec, imgpel* ImgRest,int filtNo, int Stride)
    2293 #else
    2294 Void TEncAdaptiveLoopFilter::xfilterFrame_en(imgpel* ImgDec, imgpel* ImgRest,int filtNo, int Stride)
    2295 #endif
    2296 {
    2297 #if MQT_BA_RA
    2298   imgpel *imgY_rec = ImgDec;
    2299   imgpel *p_imgY_pad, *p_imgY_pad0;
    2300   int var_step_size_w = VAR_SIZE_W;
    2301   int var_step_size_h = VAR_SIZE_H;
    2302   int i,j,y,x;
    2303 #else
    2304   int i,j,ii,jj,y,x;
    2305 #endif
    2306   int  *pattern;
    2307   int fl, fl_temp, sqrFiltLength;
    2308   int pixelInt;
    2309   int offset = (1<<(NUM_BITS - 2));
    2310  
    2311   pattern=m_patternTab_filt[filtNo];
    2312   fl_temp=m_flTab[filtNo];
    2313 #if !MQT_BA_RA
    2314 #if TI_ALF_MAX_VSIZE_7
    2315   Int fl_tempV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl_temp);
    2316 #endif
    2317 #endif
    2318   sqrFiltLength=MAX_SQR_FILT_LENGTH;  fl=FILTER_LENGTH/2;
    2319  
    2320 #if MTK_NONCROSS_INLOOP_FILTER
    2321   for (y= ypos, i = fl+ ypos; i < ypos+ iheight+ fl; i++, y++)
    2322   {
    2323     for (x= xpos, j = fl+ xpos; j < xpos+ iwidth+ fl; j++, x++)
    2324     {
    2325 #else
    2326   for (y=0, i = fl; i < m_im_height+fl; i++, y++)
    2327   {
    2328     for (x=0, j = fl; j < m_im_width+fl; j++, x++)
    2329     {
    2330 #endif
    2331 #if MQT_BA_RA
    2332       int varInd=m_varImg[y/var_step_size_h][x/var_step_size_w];
    2333 #else
    2334       int varInd=m_varImg[i-fl][j-fl];
    2335       imgpel *im1,*im2;
    2336 #endif
    2337       int *coef = m_filterCoeffPrevSelected[varInd];
    2338       pattern=m_patternTab_filt[filtNo];
    2339       pixelInt= m_filterCoeffPrevSelected[varInd][sqrFiltLength-1];
    2340 
    2341 #if MQT_BA_RA
    2342       if (filtNo == 2) //5x5
    2343       {
    2344         pixelInt += coef[22]* (imgY_rec[(i-fl+2)*Stride + j-fl]+imgY_rec[(i-fl-2)*Stride + j-fl]);
    2345 
    2346         pixelInt += coef[30]* (imgY_rec[(i-fl+1)*Stride + j-fl+1]+imgY_rec[(i-fl-1)*Stride + j-fl-1]);
    2347         pixelInt += coef[31]* (imgY_rec[(i-fl+1)*Stride + j-fl]  +imgY_rec[(i-fl-1)*Stride + j-fl]);
    2348         pixelInt += coef[32]* (imgY_rec[(i-fl+1)*Stride + j-fl-1]+imgY_rec[(i-fl-1)*Stride + j-fl+1]);
    2349 
    2350         pixelInt += coef[38]* (imgY_rec[(i-fl)*Stride + j-fl-2]+imgY_rec[(i-fl)*Stride + j-fl+2]);
    2351         pixelInt += coef[39]* (imgY_rec[(i-fl)*Stride + j-fl-1]+imgY_rec[(i-fl)*Stride + j-fl+1]);
    2352         pixelInt += coef[40]* (imgY_rec[(i-fl)*Stride + j-fl]);
    2353       }
    2354       else if (filtNo == 1) //7x7
    2355       {
    2356         pixelInt += coef[13]* (imgY_rec[(i-fl+3)*Stride + j-fl]+imgY_rec[(i-fl-3)*Stride + j-fl]);
    2357 
    2358         p_imgY_pad = imgY_rec + (i-fl+2)*Stride;
    2359         p_imgY_pad0 = imgY_rec + (i-fl-2)*Stride;
    2360         pixelInt += coef[21]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
    2361         pixelInt += coef[22]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
    2362         pixelInt += coef[23]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
    2363 
    2364         p_imgY_pad = imgY_rec + (i-fl+1)*Stride;
    2365         p_imgY_pad0 = imgY_rec + (i-fl-1)*Stride;
    2366         pixelInt += coef[29]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]);
    2367         pixelInt += coef[30]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
    2368         pixelInt += coef[31]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
    2369         pixelInt += coef[32]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
    2370         pixelInt += coef[33]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]);
    2371 
    2372         p_imgY_pad = imgY_rec + (i-fl)*Stride;
    2373         pixelInt += coef[37]* (p_imgY_pad[j-fl+3]+p_imgY_pad[j-fl-3]);
    2374         pixelInt += coef[38]* (p_imgY_pad[j-fl+2]+p_imgY_pad[j-fl-2]);
    2375         pixelInt += coef[39]* (p_imgY_pad[j-fl+1]+p_imgY_pad[j-fl-1]);
    2376         pixelInt += coef[40]* (p_imgY_pad[j-fl]);
    2377 
    2378       }
    2379       else
    2380       {
    2381 #if !TI_ALF_MAX_VSIZE_7
    2382         pixelInt += coef[4]* (imgY_rec[(i-fl+4)*Stride + j-fl]+imgY_rec[(i-fl-4)*Stride + j-fl]);
    2383 #endif         
    2384         p_imgY_pad = imgY_rec + (i-fl+3)*Stride;
    2385         p_imgY_pad0 = imgY_rec + (i-fl-3)*Stride;
    2386         pixelInt += coef[12]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
    2387         pixelInt += coef[13]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
    2388         pixelInt += coef[14]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
    2389 
    2390         p_imgY_pad = imgY_rec + (i-fl+2)*Stride;
    2391         p_imgY_pad0 = imgY_rec + (i-fl-2)*Stride;
    2392         pixelInt += coef[20]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]);
    2393         pixelInt += coef[21]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
    2394         pixelInt += coef[22]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
    2395         pixelInt += coef[23]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
    2396         pixelInt += coef[24]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]);
    2397 
    2398         p_imgY_pad = imgY_rec + (i-fl+1)*Stride;
    2399         p_imgY_pad0 = imgY_rec + (i-fl-1)*Stride;
    2400         pixelInt += coef[28]* (p_imgY_pad[j-fl+3]+p_imgY_pad0[j-fl-3]);
    2401         pixelInt += coef[29]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]);
    2402         pixelInt += coef[30]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
    2403         pixelInt += coef[31]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
    2404         pixelInt += coef[32]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
    2405         pixelInt += coef[33]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]);
    2406         pixelInt += coef[34]* (p_imgY_pad[j-fl-3]+p_imgY_pad0[j-fl+3]);
    2407 
    2408         p_imgY_pad = imgY_rec + (i-fl)*Stride;
    2409         pixelInt += coef[36]* (p_imgY_pad[j-fl+4]+p_imgY_pad[j-fl-4]);
    2410         pixelInt += coef[37]* (p_imgY_pad[j-fl+3]+p_imgY_pad[j-fl-3]);
    2411         pixelInt += coef[38]* (p_imgY_pad[j-fl+2]+p_imgY_pad[j-fl-2]);
    2412         pixelInt += coef[39]* (p_imgY_pad[j-fl+1]+p_imgY_pad[j-fl-1]);
    2413         pixelInt += coef[40]* (p_imgY_pad[j-fl]);
    2414 
    2415       }
    2416 #else
    2417 
    2418 #if TI_ALF_MAX_VSIZE_7
    2419       for (ii = -fl_tempV; ii < 0; ii++)
    2420 #else
    2421       for (ii=-fl_temp; ii<0; ii++)
    2422 #endif
    2423       {
    2424         im1= &(ImgDec[(y+ii)*Stride + x-fl_temp-ii]);
    2425         im2= &(ImgDec[(y-ii)*Stride + x+fl_temp+ii]);
    2426         for (jj=-fl_temp-ii; jj<=fl_temp+ii; jj++,im1++,im2--)
    2427           pixelInt+=((*im1+ *im2)*coef[*(pattern++)]);
    2428       }
    2429       im1= &(ImgDec[y*Stride + x-fl_temp]);
    2430       im2= &(ImgDec[y*Stride + x+fl_temp]);
    2431       for (jj=-fl_temp; jj<0; jj++,im1++,im2--)
    2432         pixelInt+=((*im1+ *im2)*coef[*(pattern++)]);
    2433       pixelInt+=(ImgDec[y*Stride + x]*coef[*(pattern++)]);
    2434 #endif 
    2435 
    2436       pixelInt=(int)((pixelInt+offset) >> (NUM_BITS - 1));
    2437       ImgRest[y*Stride + x] = Clip3(0, g_uiIBDI_MAX, pixelInt);
    2438     }
    2439   }
    2440 }
    2441 
    2442 Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double **ySym, double ***ESym, double *pixAcc, int **filterCoeffSym, int **filterCoeffSymQuant, int filtNo, int *filters_per_fr_best, int varIndTab[], imgpel **imgY_rec, imgpel **varImg, imgpel **maskImg, imgpel **imgY_pad, double lambda_val)
    2443 {
    2444   int filters_per_fr, firstFilt, coded, forceCoeff0,
    2445   interval[NO_VAR_BINS][2], intervalBest[NO_VAR_BINS][2];
    2446   int i, k, varInd;
    2447   static double ***E_temp, **y_temp, *pixAcc_temp;
    2448   static int **FilterCoeffQuantTemp;
    2449   double  error, lambda, lagrangian, lagrangianMin;
    2450  
     5263  {
     5264    xfilterSlicesEncoder(imgPad, imgFilt, stride, filtNo, m_filterCoeffSym, m_varIndTab, m_varImg);
     5265  }
     5266
     5267  xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,ALFp);
     5268}
     5269#endif
     5270
     5271#if LCU_SYNTAX_ALF
     5272Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double **ySym, double ***ESym, double *pixAcc, Int **filterCoeffSym, Int **filterCoeffSymQuant, Int filter_shape, Int *filters_per_fr_best, Int varIndTab[], Pel **imgY_rec, Pel **varImg, Pel **maskImg, Pel **imgY_pad, double lambda_val, Int numMaxFilters)
     5273#else
     5274Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double **ySym, double ***ESym, double *pixAcc, Int **filterCoeffSym, Int **filterCoeffSymQuant, Int filter_shape, Int *filters_per_fr_best, Int varIndTab[], Pel **imgY_rec, Pel **varImg, Pel **maskImg, Pel **imgY_pad, double lambda_val)
     5275#endif
     5276{
     5277  Int filters_per_fr, firstFilt, interval[NO_VAR_BINS][2], intervalBest[NO_VAR_BINS][2];
     5278  int i;
     5279  double  lagrangian, lagrangianMin;
    24515280  int sqrFiltLength;
    2452   int *pattern, *patternMap, *weights;
    2453   int numBits, coeffBits;
     5281  int *weights;
     5282  Int coeffBits;
    24545283  double errorForce0CoeffTab[NO_VAR_BINS][2];
    2455   int  codedVarBins[NO_VAR_BINS], createBistream /*, forceCoeff0 */;
    2456   int  usePrevFilt[NO_VAR_BINS], usePrevFiltDefault[NO_VAR_BINS];
    2457   static int first=0;
    2458  
    2459   for (i = 0; i < NO_VAR_BINS; i++)
    2460     usePrevFiltDefault[i]=usePrevFilt[i]=1;
    2461   lambda = lambda_val;
    2462   sqrFiltLength=MAX_SQR_FILT_LENGTH;
    2463  
    2464   if (first==0)
    2465   {
    2466     initMatrix3D_double(&E_temp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
    2467     initMatrix_double(&y_temp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    2468     pixAcc_temp = (double *) calloc(NO_VAR_BINS, sizeof(double));
    2469     initMatrix_int(&FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    2470     first=1;
    2471   }
    2472  
    2473   sqrFiltLength=m_sqrFiltLengthTab[filtNo];   
    2474   Int fl = m_flTab[filtNo];
    2475   weights=m_weightsTab[filtNo];               
    2476   patternMap=m_patternMapTab[filtNo]; 
    2477   pattern=m_patternTab[filtNo];
    2478  
    2479   memcpy(pixAcc_temp,pixAcc,sizeof(double)*NO_VAR_BINS);
    2480   for (varInd=0; varInd<NO_VAR_BINS; varInd++)
    2481   {
    2482     memcpy(y_temp[varInd],ySym[varInd],sizeof(double)*sqrFiltLength);
    2483     for (k=0; k<sqrFiltLength; k++)
    2484       memcpy(E_temp[varInd][k],ESym[varInd][k],sizeof(double)*sqrFiltLength);
    2485   }
    2486  
     5284 
     5285  sqrFiltLength= m_sqrFiltLengthTab[filter_shape] ;
     5286  weights = weightsTabShapes[filter_shape];
     5287
    24875288  // zero all variables
    24885289  memset(varIndTab,0,sizeof(int)*NO_VAR_BINS);
    2489  
     5290
    24905291  for(i = 0; i < NO_VAR_BINS; i++)
    24915292  {
    2492     memset(filterCoeffSym[i],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
    2493     memset(filterCoeffSymQuant[i],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
    2494   }
    2495  
     5293    memset(filterCoeffSym[i],0,sizeof(int)*ALF_MAX_NUM_COEF);
     5294    memset(filterCoeffSymQuant[i],0,sizeof(int)*ALF_MAX_NUM_COEF);
     5295  }
     5296
    24965297  firstFilt=1;  lagrangianMin=0;
    24975298  filters_per_fr=NO_FILTERS;
    2498  
     5299
    24995300  while(filters_per_fr>=1)
    25005301  {
    2501     findFilterGroupingError(E_temp, y_temp, pixAcc_temp, interval, sqrFiltLength, filters_per_fr);
    2502     findFilterCoeff(E_temp, y_temp, pixAcc_temp, filterCoeffSym, filterCoeffSymQuant, interval,
    2503                     varIndTab, sqrFiltLength, filters_per_fr, weights, numBits=NUM_BITS,  errorForce0CoeffTab);
    2504     lagrangian=xfindBestCoeffCodMethod(codedVarBins, &forceCoeff0, filterCoeffSymQuant, fl,
    2505                                        sqrFiltLength, filters_per_fr, errorForce0CoeffTab, &error, lambda);
    2506    
    2507     if (lagrangian<lagrangianMin || firstFilt==1)
     5302    mergeFiltersGreedy(ySym, ESym, pixAcc, interval, sqrFiltLength, filters_per_fr);
     5303    findFilterCoeff(ESym, ySym, pixAcc, filterCoeffSym, filterCoeffSymQuant, interval,
     5304      varIndTab, sqrFiltLength, filters_per_fr, weights, errorForce0CoeffTab);
     5305
     5306    lagrangian=xfindBestCoeffCodMethod(filterCoeffSymQuant, filter_shape, sqrFiltLength, filters_per_fr, errorForce0CoeffTab, lambda_val);
     5307#if LCU_SYNTAX_ALF
     5308    if (lagrangian<lagrangianMin || firstFilt==1 || filters_per_fr == numMaxFilters)
     5309#else
     5310    if (lagrangian<lagrangianMin || firstFilt==1 || filters_per_fr == m_iALFMaxNumberFilters)
     5311#endif
    25085312    {
    25095313      firstFilt=0;
     
    25155319    filters_per_fr--;
    25165320  }
    2517  
    2518   findFilterCoeff(E_temp, y_temp, pixAcc_temp, filterCoeffSym, filterCoeffSymQuant, intervalBest,
    2519                   varIndTab, sqrFiltLength, (*filters_per_fr_best), weights, numBits=NUM_BITS, errorForce0CoeffTab);
    2520  
    2521   xfindBestCoeffCodMethod(codedVarBins, &forceCoeff0, filterCoeffSymQuant, fl, sqrFiltLength,
    2522                           (*filters_per_fr_best), errorForce0CoeffTab, &error, lambda);
    2523  
    2524   coded=1;
    2525   if (forceCoeff0==1 && (*filters_per_fr_best)==1)
    2526   {
    2527     coded=0;
    2528     coeffBits = xcodeAuxInfo(-1, (*filters_per_fr_best), varIndTab, 0, createBistream=0,filtNo, m_tempALFp);
    2529   }
    2530   else
    2531   {
    2532     coeffBits = xcodeAuxInfo(filtNo, (*filters_per_fr_best), varIndTab, 0, createBistream=0,filtNo, m_tempALFp);
    2533   }
    2534 
    2535   if (forceCoeff0==0)
    2536   {
    2537     coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength,
    2538       (*filters_per_fr_best), createBistream=0, m_tempALFp);
    2539   }
    2540   else
    2541   {
    2542     if ((*filters_per_fr_best)==1)
    2543     {
    2544       for(varInd=0; varInd<(*filters_per_fr_best); varInd++)
    2545       {
    2546         memset(filterCoeffSym[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
    2547         memset(filterCoeffSymQuant[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
    2548       }
    2549     }
    2550     else
    2551     {
    2552       coeffBits += xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
    2553         (*filters_per_fr_best), codedVarBins, createBistream=0, m_tempALFp);
    2554 
    2555       for(varInd=0; varInd<(*filters_per_fr_best); varInd++)
    2556       {
    2557         if (codedVarBins[varInd]==0)
    2558         {
    2559           memset(filterCoeffSym[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
    2560           memset(filterCoeffSymQuant[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
    2561         }
    2562       }
    2563     }
    2564   }
    2565 }
    2566 
    2567 
    2568 Void TEncAdaptiveLoopFilter::xcalcPredFilterCoeff(int filtNo)
    2569 {
    2570   int *patternMap, varInd, i, k;
    2571  
    2572   patternMap=m_patternMapTab[filtNo];
    2573   for(varInd=0; varInd<NO_VAR_BINS; ++varInd)
    2574   {
    2575     k=0;
    2576     for(i = 0; i < MAX_SQR_FILT_LENGTH; i++)
    2577     {
    2578       if (patternMap[i]>0)
    2579       {
    2580         m_filterCoeffPrevSelected[varInd][i]=m_filterCoeffSym[m_varIndTab[varInd]][k];
    2581         k++;
    2582       }
    2583       else
    2584       {
    2585         m_filterCoeffPrevSelected[varInd][i]=0;
    2586       }
    2587 #if MQT_ALF_NPASS
    2588       if (m_iALFEncodePassReduction && (!m_iUsePreviousFilter || !m_iDesignCurrentFilter))
    2589       {
    2590         if((m_iCurrentPOC%m_iGOPSize) == 0)
    2591         {
    2592           m_aiFilterCoeffSaved[0][varInd][i] = m_aiFilterCoeffSaved[m_iGOPSize][varInd][i];
    2593           m_aiFilterCoeffSaved[m_iGOPSize][varInd][i] = m_filterCoeffPrevSelected[varInd][i];
    2594         }
    2595         else
    2596         {
    2597           m_aiFilterCoeffSaved[m_iCurrentPOC%m_iGOPSize][varInd][i] = m_filterCoeffPrevSelected[varInd][i];
    2598         }
    2599       }
    2600 #endif
    2601     }
    2602   }
    2603 }
    2604 
    2605 #if MQT_ALF_NPASS
    2606 UInt TEncAdaptiveLoopFilter::xcodeFiltCoeff(int **filterCoeffSymQuant, int filtNo, int varIndTab[], int filters_per_fr_best, int frNo, ALFParam* ALFp)
    2607 #else
    2608 Void TEncAdaptiveLoopFilter::xcodeFiltCoeff(int **filterCoeffSymQuant, int filtNo, int varIndTab[], int filters_per_fr_best, int frNo, ALFParam* ALFp)
    2609 #endif
    2610 {
    2611   int varInd, forceCoeff0, codedVarBins[NO_VAR_BINS], coeffBits, createBistream,   sqrFiltLength=m_sqrFiltLengthTab[filtNo],
    2612   fl=m_flTab[filtNo], coded;
    2613  
    2614   ALFp->filters_per_group_diff = filters_per_fr_best;
     5321#if !ALF_16_BA_GROUPS
     5322  if ( (m_uiVarGenMethod == ALF_BA) && ((*filters_per_fr_best) > 1) )
     5323  {
     5324    Int iLastFilter = (*filters_per_fr_best)-1;
     5325    if (intervalBest[iLastFilter][0] == NO_VAR_BINS-1)
     5326    {
     5327      intervalBest[iLastFilter-1][1] = NO_VAR_BINS-1;
     5328      (*filters_per_fr_best) = iLastFilter;
     5329    }
     5330  }
     5331#endif
     5332  findFilterCoeff(ESym, ySym, pixAcc, filterCoeffSym, filterCoeffSymQuant, intervalBest,
     5333    varIndTab, sqrFiltLength, (*filters_per_fr_best), weights, errorForce0CoeffTab);
     5334
     5335
     5336  xfindBestCoeffCodMethod(filterCoeffSymQuant, filter_shape, sqrFiltLength, (*filters_per_fr_best), errorForce0CoeffTab, lambda_val);
     5337  coeffBits = xcodeAuxInfo((*filters_per_fr_best), varIndTab, filter_shape, m_tempALFp);
     5338  coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength, (*filters_per_fr_best), 0, m_tempALFp);
     5339
     5340  if( *filters_per_fr_best == 1)
     5341  {
     5342    ::memset(varIndTab, 0, sizeof(Int)*NO_VAR_BINS);
     5343  }
     5344}
     5345
     5346
     5347/** code filter coefficients
     5348 * \param filterCoeffSymQuant filter coefficients buffer
     5349 * \param filtNo filter No.
     5350 * \param varIndTab[] merge index information
     5351 * \param filters_per_fr_best the number of filters used in this picture
     5352 * \param frNo
     5353 * \param ALFp ALF parameters
     5354 * \returns bitrate
     5355 */
     5356UInt TEncAdaptiveLoopFilter::xcodeFiltCoeff(Int **filterCoeffSymQuant, Int filter_shape, Int varIndTab[], Int filters_per_fr_best, ALFParam* ALFp)
     5357{
     5358  Int coeffBits;   
     5359  Int sqrFiltLength = m_sqrFiltLengthTab[filter_shape] ;
     5360
    26155361  ALFp->filters_per_group = filters_per_fr_best;
    2616  
    2617   for(varInd=0; varInd<filters_per_fr_best; varInd++)
    2618   {
    2619     codedVarBins[varInd] = 1;
    2620   }
    2621   memcpy (ALFp->codedVarBins, codedVarBins, sizeof(int)*NO_VAR_BINS);
    2622   forceCoeff0=0;
    2623   for(varInd=0; varInd<filters_per_fr_best; varInd++)
    2624   {
    2625     if (codedVarBins[varInd] == 0)
    2626     {
    2627       forceCoeff0=1;
    2628       break;
    2629     }
    2630   }
    2631 
    2632   coded=1;
    2633   if (forceCoeff0==1 && filters_per_fr_best==1)
    2634   {
    2635     coded=0;
    2636     coeffBits = xcodeAuxInfo(-1, filters_per_fr_best, varIndTab, frNo, createBistream=1,filtNo, ALFp);
    2637   }
    2638   else
    2639   {
    2640     coeffBits = xcodeAuxInfo(filtNo, filters_per_fr_best, varIndTab, frNo, createBistream=1,filtNo, ALFp);
    2641   }
    2642  
    2643   ALFp->forceCoeff0 = forceCoeff0;
     5362
     5363  coeffBits = xcodeAuxInfo(filters_per_fr_best, varIndTab, filter_shape, ALFp);
     5364
     5365
    26445366  ALFp->predMethod = 0;
    26455367  ALFp->num_coeff = sqrFiltLength;
    2646   ALFp->realfiltNo=filtNo;
     5368  ALFp->filter_shape=filter_shape;
     5369
    26475370  if (filters_per_fr_best <= 1)
    26485371  {
    2649     ALFp->forceCoeff0 = 0;
    26505372    ALFp->predMethod = 0;
    26515373  }
    2652  
    2653   if (forceCoeff0==0)
    2654   {
    2655     coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength,
    2656                                        filters_per_fr_best, createBistream=1, ALFp);
    2657   }
    2658   else if (filters_per_fr_best>1)
    2659   {
    2660     coeffBits += xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
    2661                                              filters_per_fr_best, codedVarBins, createBistream=1, ALFp);
    2662   }
    2663 
    2664 #if MQT_ALF_NPASS
     5374
     5375  coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength,
     5376    filters_per_fr_best, 1, ALFp);
     5377
    26655378  return (UInt)coeffBits;
    2666 #endif
    2667 }
    2668 
    2669 
    2670 
    2671 #if TSB_ALF_HEADER
    2672 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, ALFParam *pAlfParam)
    2673 #else
    2674 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist)
     5379}
     5380
     5381Void TEncAdaptiveLoopFilter::getCtrlFlagsFromCU(AlfLCUInfo* pcAlfLCU, std::vector<UInt> *pvFlags, Int alfDepth, UInt maxNumSUInLCU)
     5382{
     5383  const UInt startSU               = pcAlfLCU->startSU;
     5384  const UInt endSU                 = pcAlfLCU->endSU;
     5385  const Bool bAllSUsInLCUInSameSlice = pcAlfLCU->bAllSUsInLCUInSameSlice;
     5386
     5387  TComDataCU* pcCU = pcAlfLCU->pcCU;
     5388  UInt  currSU, CUDepth, setDepth, ctrlNumSU;
     5389
     5390  currSU = startSU;
     5391
     5392  if(bAllSUsInLCUInSameSlice)
     5393  {
     5394    while(currSU < maxNumSUInLCU)
     5395    {
     5396      //depth of this CU
     5397      CUDepth = pcCU->getDepth(currSU);
     5398
     5399      //choose the min. depth for ALF
     5400      setDepth   = (alfDepth < CUDepth)?(alfDepth):(CUDepth);
     5401      ctrlNumSU = maxNumSUInLCU >> (setDepth << 1);
     5402
     5403      pvFlags->push_back(pcCU->getAlfCtrlFlag(currSU));
     5404      currSU += ctrlNumSU;
     5405    }
     5406
     5407    return;
     5408  }
     5409
     5410
     5411  const UInt  LCUX = pcCU->getCUPelX();
     5412  const UInt  LCUY = pcCU->getCUPelY();
     5413
     5414  Bool  bFirst, bValidCU;
     5415  UInt  idx, LPelXSU, TPelYSU;
     5416
     5417  bFirst= true;
     5418  while(currSU <= endSU)
     5419  {
     5420    //check picture boundary
     5421    while(!( LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[currSU] ] < m_img_width  ) ||
     5422          !( LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[currSU] ] < m_img_height )
     5423      )
     5424    {
     5425      currSU++;
     5426
     5427      if(currSU >= maxNumSUInLCU || currSU > endSU)
     5428      {
     5429        break;
     5430      }
     5431    }
     5432
     5433    if(currSU >= maxNumSUInLCU || currSU > endSU)
     5434    {
     5435      break;
     5436    }
     5437
     5438    //depth of this CU
     5439    CUDepth = pcCU->getDepth(currSU);
     5440
     5441    //choose the min. depth for ALF
     5442    setDepth   = (alfDepth < CUDepth)?(alfDepth):(CUDepth);
     5443    ctrlNumSU = maxNumSUInLCU >> (setDepth << 1);
     5444
     5445    if(bFirst)
     5446    {
     5447      if(currSU !=0 )
     5448      {
     5449        currSU = ((UInt)(currSU/ctrlNumSU))* ctrlNumSU;
     5450      }
     5451      bFirst = false;
     5452    }
     5453
     5454    bValidCU = false;
     5455    for(idx = currSU; idx < currSU + ctrlNumSU; idx++)
     5456    {
     5457      if(idx < startSU || idx > endSU)
     5458      {
     5459        continue;
     5460      }
     5461
     5462      LPelXSU   = LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[idx] ];
     5463      TPelYSU   = LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[idx] ];
     5464
     5465      if( !( LPelXSU < m_img_width )  || !( TPelYSU < m_img_height )  )
     5466      {
     5467        continue;
     5468      }
     5469
     5470      bValidCU = true;
     5471    }
     5472
     5473    if(bValidCU)
     5474    {
     5475      pvFlags->push_back(pcCU->getAlfCtrlFlag(currSU));
     5476    }
     5477
     5478    currSU += ctrlNumSU;
     5479  }
     5480}
     5481
     5482
     5483/** set ALF CU control flags
     5484 * \param [in] uiAlfCtrlDepth ALF CU control depth
     5485 * \param [in] pcPicOrg picture of original signal
     5486 * \param [in] pcPicDec picture before filtering
     5487 * \param [in] pcPicRest picture after filtering
     5488 * \param [out] ruiDist distortion after CU control
     5489 * \param [in,out]vAlfCUCtrlParam ALF CU control parameters
     5490 */
     5491#if LCU_SYNTAX_ALF 
     5492#if HHI_INTERVIEW_SKIP
     5493Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlags(UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam)
     5494#else
     5495Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlags(UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam)
     5496#endif
     5497#else
     5498#if HHI_INTERVIEW_SKIP
     5499Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, TComPicYuv* pUsedPelMap, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam)
     5500#else
     5501Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, std::vector<AlfCUCtrlInfo>& vAlfCUCtrlParam)
     5502#endif
    26755503#endif
    26765504{
    26775505  ruiDist = 0;
    2678 #if TSB_ALF_HEADER
    2679   pAlfParam->num_alf_cu_flag = 0;
    2680 #endif
    2681  
    2682   for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
    2683   {
    2684     TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
    2685 #if TSB_ALF_HEADER
    2686     xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, pAlfParam);
    2687 #else
    2688     xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist);
    2689 #endif
    2690   }
    2691 }
    2692 
    2693 #if TSB_ALF_HEADER
    2694 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, ALFParam *pAlfParam)
    2695 #else
    2696 Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist)
     5506  std::vector<UInt> uiFlags;
     5507
     5508  //initial
     5509  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     5510  {
     5511    vAlfCUCtrlParam[s].cu_control_flag = 1;
     5512    vAlfCUCtrlParam[s].alf_max_depth   = uiAlfCtrlDepth;
     5513
     5514    vAlfCUCtrlParam[s].alf_cu_flag.reserve(m_uiNumCUsInFrame << ((g_uiMaxCUDepth-1)*2));
     5515    vAlfCUCtrlParam[s].alf_cu_flag.resize(0);
     5516  }
     5517
     5518  //LCU-based on/off control
     5519  for( UInt CUAddr = 0; CUAddr < m_pcPic->getNumCUsInFrame() ; CUAddr++ )
     5520  {
     5521    TComDataCU* pcCU = m_pcPic->getCU( CUAddr );
     5522#if LCU_SYNTAX_ALF
     5523#if HHI_INTERVIEW_SKIP
     5524    setCUAlfCtrlFlag(pcCU, 0, 0, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, imgUsed, stride, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag);
     5525#else
     5526    setCUAlfCtrlFlag(pcCU, 0, 0, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, stride, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag);
     5527#endif
     5528#else
     5529#if HHI_INTERVIEW_SKIP
     5530    xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest,imgUsed, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag);
     5531#else
     5532    xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, vAlfCUCtrlParam[0].alf_cu_flag);
     5533#endif
     5534#endif
     5535  }
     5536  vAlfCUCtrlParam[0].num_alf_cu_flag = (UInt)(vAlfCUCtrlParam[0].alf_cu_flag.size());
     5537
     5538
     5539  if(m_uiNumSlicesInPic > 1)
     5540  {
     5541    //reset the first slice on/off flags
     5542    vAlfCUCtrlParam[0].alf_cu_flag.resize(0);
     5543
     5544    //distribute on/off flags to slices
     5545    std::vector<UInt> vCtrlFlags;
     5546    vCtrlFlags.reserve(1 << ((g_uiMaxCUDepth-1)*2));
     5547
     5548    for(Int s=0; s < m_uiNumSlicesInPic; s++)
     5549    {
     5550      if(!m_pcPic->getValidSlice(s))
     5551      {
     5552        continue;
     5553      }
     5554      std::vector< AlfLCUInfo* >& vpAlfLCU = m_pvpAlfLCU[s];
     5555      for(Int i=0; i< vpAlfLCU.size(); i++)
     5556      {
     5557        //get on/off flags for one LCU
     5558        vCtrlFlags.resize(0);
     5559        getCtrlFlagsFromCU(vpAlfLCU[i], &vCtrlFlags, (Int)uiAlfCtrlDepth, m_pcPic->getNumPartInCU());
     5560
     5561        for(Int k=0; k< vCtrlFlags.size(); k++)
     5562        {
     5563          vAlfCUCtrlParam[s].alf_cu_flag.push_back( vCtrlFlags[k]);
     5564        }
     5565      } //i (LCU)
     5566      vAlfCUCtrlParam[s].num_alf_cu_flag = (UInt)(vAlfCUCtrlParam[s].alf_cu_flag.size());
     5567    } //s (Slice)
     5568  }
     5569}
     5570
     5571#if LCU_SYNTAX_ALF
     5572#if HHI_INTERVIEW_SKIP
     5573Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag)
     5574#else
     5575Void TEncAdaptiveLoopFilter::setCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag)
     5576#endif
     5577#else
     5578#if HHI_INTERVIEW_SKIP
     5579Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, TComPicYuv* pcUsedPelMap, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag)
     5580#else
     5581Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, std::vector<UInt>& vCUCtrlFlag)
     5582#endif
    26975583#endif
    26985584{
     
    27035589  UInt uiBPelY   = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
    27045590 
    2705   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     5591  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    27065592  {
    27075593    bBoundary = true;
     
    27165602      uiTPelY   = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
    27175603     
    2718       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
    2719 #if TSB_ALF_HEADER
    2720         xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, pAlfParam);
    2721 #else
    2722       xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist);
     5604      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
     5605#if LCU_SYNTAX_ALF
     5606#if HHI_INTERVIEW_SKIP
     5607        setCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, imgUsed, stride, ruiDist, vCUCtrlFlag);
     5608#else
     5609        setCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, imgOrg, imgDec, imgRest, stride, ruiDist, vCUCtrlFlag);
     5610#endif
     5611#else
     5612#if HHI_INTERVIEW_SKIP
     5613        xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, imgUsed, ruiDist, vCUCtrlFlag);
     5614#else
     5615        xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, vCUCtrlFlag);
     5616#endif
    27235617#endif
    27245618    }
     
    27305624    return;
    27315625  }
    2732  
     5626#if !LCU_SYNTAX_ALF
    27335627  UInt uiCUAddr = pcCU->getAddr();
     5628#endif
    27345629  UInt64 uiRecSSD = 0;
    27355630  UInt64 uiFiltSSD = 0;
     
    27475642    uiBPelY   = uiTPelY + iHeight - 1;
    27485643
    2749     if( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() )
    2750     {
    2751       iWidth = pcCU->getSlice()->getSPS()->getWidth() - uiLPelX;
     5644    if( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() )
     5645    {
     5646      iWidth = pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() - uiLPelX;
    27525647    }
    27535648   
    2754     if( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() )
    2755     {
    2756       iHeight = pcCU->getSlice()->getSPS()->getHeight() - uiTPelY;
     5649    if( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() )
     5650    {
     5651      iHeight = pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() - uiTPelY;
    27575652    }
    27585653   
     
    27665661  }
    27675662 
     5663#if LCU_SYNTAX_ALF
     5664  Int  offset = uiTPelY*stride + uiLPelX;
     5665  Pel* pOrg  = imgOrg  + offset;
     5666  Pel* pRec  = imgDec  + offset;
     5667  Pel* pFilt = imgRest + offset;
     5668
     5669#if HHI_INTERVIEW_SKIP
     5670  Pel* pUsed = NULL ;
     5671  if( imgUsed )
     5672  {
     5673    pUsed = imgUsed + offset;
     5674  }
     5675  uiRecSSD  += xCalcSSD( pOrg, pRec,  pUsed, iWidth, iHeight, stride );
     5676  uiFiltSSD += xCalcSSD( pOrg, pFilt, pUsed, iWidth, iHeight, stride );
     5677#else
     5678  uiRecSSD  += xCalcSSD( pOrg, pRec,  iWidth, iHeight, stride );
     5679  uiFiltSSD += xCalcSSD( pOrg, pFilt, iWidth, iHeight, stride );
     5680#endif
     5681#else
    27685682  Pel* pOrg = pcPicOrg->getLumaAddr(uiCUAddr, uiAbsPartIdx);
    27695683  Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx);
    27705684  Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx);
    27715685 
     5686#if HHI_INTERVIEW_SKIP
     5687  Pel* pUsed = pcUsedPelMap->getLumaAddr(uiCUAddr, uiAbsPartIdx);
     5688  uiRecSSD  += xCalcSSD( pOrg, pRec, pUsed, iWidth, iHeight, pcPicOrg->getStride() );
     5689  uiFiltSSD += xCalcSSD( pOrg, pFilt, pUsed, iWidth, iHeight, pcPicOrg->getStride() );
     5690#else
    27725691  uiRecSSD  += xCalcSSD( pOrg, pRec,  iWidth, iHeight, pcPicOrg->getStride() );
    27735692  uiFiltSSD += xCalcSSD( pOrg, pFilt, iWidth, iHeight, pcPicOrg->getStride() );
    2774  
     5693#endif
     5694#endif
    27755695  if (uiFiltSSD < uiRecSSD)
    27765696  {
    27775697    ruiDist += uiFiltSSD;
    27785698    pcCU->setAlfCtrlFlagSubParts(1, uiAbsPartIdx, uiSetDepth);
    2779 #if TSB_ALF_HEADER
    2780     pAlfParam->alf_cu_flag[pAlfParam->num_alf_cu_flag]=1;
    2781 #endif
     5699    vCUCtrlFlag.push_back(1);
     5700
     5701#if LCU_SYNTAX_ALF
     5702    for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(m_img_height-1))  ;i++)
     5703    {
     5704      for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(m_img_width-1)) ;j++)
     5705      {
     5706#else
    27825707    for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(pcPicOrg->getHeight()-1))  ;i++)
    27835708    {
    27845709      for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(pcPicOrg->getWidth()-1)) ;j++)
    27855710      {
     5711#endif
    27865712        m_maskImg[i][j]=1;
    27875713      }
     
    27925718    ruiDist += uiRecSSD;
    27935719    pcCU->setAlfCtrlFlagSubParts(0, uiAbsPartIdx, uiSetDepth);
    2794 #if TSB_ALF_HEADER
    2795     pAlfParam->alf_cu_flag[pAlfParam->num_alf_cu_flag]=0;
    2796 #endif
     5720    vCUCtrlFlag.push_back(0);
     5721#if LCU_SYNTAX_ALF
     5722    for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(m_img_height-1))  ;i++)
     5723    {
     5724      for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(m_img_width-1)) ;j++)
     5725      {
     5726#else
    27975727    for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(pcPicOrg->getHeight()-1))  ;i++)
    27985728    {
    27995729      for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(pcPicOrg->getWidth()-1)) ;j++)
    28005730      {
     5731#endif
    28015732        m_maskImg[i][j]=0;
    28025733      }
    28035734    }
    28045735  }
    2805 #if TSB_ALF_HEADER
    2806   pAlfParam->num_alf_cu_flag++;
    2807 #endif
    2808 }
     5736}
     5737
     5738#if !LCU_SYNTAX_ALF
    28095739
    28105740Void TEncAdaptiveLoopFilter::xReDesignFilterCoeff_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, Bool bReadCorr)
    28115741{
    2812  
    2813   Int tap = m_pcTempAlfParam->tap;
     5742  Int tap = m_pcTempAlfParam->filter_shape;
    28145743  Int    LumaStride = pcPicOrg->getStride();
    2815   imgpel* pOrg = (imgpel*)pcPicOrg->getLumaAddr();
    2816   imgpel* pDec = (imgpel*)pcPicDec->getLumaAddr();
    2817   imgpel* pRest = (imgpel*)pcPicRest->getLumaAddr();
     5744  Pel* pOrg  = pcPicOrg->getLumaAddr();
     5745  Pel* pDec  = pcPicDec->getLumaAddr();
     5746  Pel* pRest = pcPicRest->getLumaAddr();
    28185747  xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, tap, LumaStride);
    28195748 
    2820 }
     5749  if (m_iALFEncodePassReduction)
     5750  {
     5751    if(!m_iUsePreviousFilter)
     5752    {
     5753      saveFilterCoeffToBuffer(m_filterCoeffSym, m_pcTempAlfParam->filters_per_group, m_varIndTab, m_pcTempAlfParam->alf_pcr_region_flag, tap);
     5754    }
     5755  }
     5756}
     5757
    28215758Void TEncAdaptiveLoopFilter::xCUAdaptiveControl_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost)
    28225759{
    2823 #if MQT_ALF_NPASS
    2824   imgpel** maskImgTemp;
     5760  if(!m_bAlfCUCtrlEnabled) return;
     5761  Bool bChanged = false;
     5762  std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_vBestAlfCUCtrlParam);
     5763
     5764  Pel** maskImgTemp;
    28255765
    28265766  if(m_iALFEncodePassReduction == 2)
    28275767  {
    2828     get_mem2Dpel(&maskImgTemp, m_im_height, m_im_width);
    2829   }
    2830 #endif
     5768    initMatrix_Pel(&maskImgTemp, m_img_height, m_img_width);
     5769  }
    28315770
    28325771  m_pcEntropyCoder->setAlfCtrl(true);
     
    28435782    pcPicRest->copyToPicLuma(m_pcPicYuvTmp);
    28445783    copyALFParam(m_pcTempAlfParam, &cFrmAlfParam);
    2845     m_pcTempAlfParam->cu_control_flag = 1;
    2846    
    2847 #if MQT_ALF_NPASS
     5784
    28485785    for (UInt uiRD = 0; uiRD <= m_iALFNumOfRedesign; uiRD++)
    2849 #else
    2850     for (UInt uiRD = 0; uiRD <= ALF_NUM_OF_REDESIGN; uiRD++)
    2851 #endif
    28525786    {
    28535787      if (uiRD)
     
    28605794      Double dCost;
    28615795     //m_pcPicYuvTmp: filtered signal, pcPicDec: orig reconst
    2862 #if TSB_ALF_HEADER
    2863       xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here
    2864 #else
    2865       xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here
    2866 #endif
    2867      
    2868       xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost);
    2869      
     5796      xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, vAlfCUCtrlParamTemp);
     5797      xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost, &vAlfCUCtrlParamTemp);
    28705798      if (dCost < rdMinCost)
    28715799      {
     5800        bChanged = true;
     5801        m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp;
    28725802        uiBestDepth = uiDepth;
    28735803        rdMinCost = dCost;
     
    28785808        //save maskImg
    28795809        xCopyTmpAlfCtrlFlagsFrom();
    2880 #if MQT_ALF_NPASS
    28815810        if(m_iALFEncodePassReduction == 2)
    28825811        {
    2883           ::memcpy(maskImgTemp[0], m_maskImg[0], sizeof(imgpel)*m_im_height* m_im_width);
     5812          ::memcpy(maskImgTemp[0], m_maskImg[0], sizeof(Pel)*m_img_height* m_img_width);
    28845813        }
    2885 #endif
    2886       }
    2887     }
    2888   }
    2889  
    2890   if (m_pcBestAlfParam->cu_control_flag)
    2891   {
    2892 #if MQT_ALF_NPASS
     5814      }
     5815    }
     5816  }
     5817
     5818  if(bChanged)
     5819  {
    28935820    if(m_iALFEncodePassReduction == 2)
    28945821    {
    28955822      UInt uiDepth = uiBestDepth;
    2896       ::memcpy(m_maskImg[0], maskImgTemp[0], sizeof(imgpel)*m_im_height* m_im_width);
     5823      ::memcpy(m_maskImg[0], maskImgTemp[0], sizeof(Pel)*m_img_height* m_img_width);
    28975824      xCopyTmpAlfCtrlFlagsTo();
    28985825 
     
    29075834      UInt64 uiRate, uiDist;
    29085835      Double dCost;
    2909 
    2910 #if TSB_ALF_HEADER
    2911       xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here
    2912 #else
    2913       xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here
    2914 #endif
    2915 
    2916       xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost);
    2917 
     5836      xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, vAlfCUCtrlParamTemp);
     5837      xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost, &vAlfCUCtrlParamTemp);
    29185838      if (dCost < rdMinCost)
    29195839      {
     
    29245844        copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    29255845        xCopyTmpAlfCtrlFlagsFrom();
    2926       }
    2927     }
    2928 #endif
     5846        m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp;
     5847      }
     5848    }
    29295849
    29305850    m_pcEntropyCoder->setAlfCtrl(true);
    29315851    m_pcEntropyCoder->setMaxAlfCtrlDepth(uiBestDepth);
    29325852    xCopyTmpAlfCtrlFlagsTo();
     5853
    29335854    m_pcPicYuvBest->copyToPicLuma(pcPicRest);//copy m_pcPicYuvBest to pcPicRest
    29345855    xCopyDecToRestCUs(pcPicDec, pcPicRest); //pcPicRest = pcPicDec
     
    29415862  freeALFParam(&cFrmAlfParam);
    29425863
    2943 #if MQT_ALF_NPASS
    29445864  if(m_iALFEncodePassReduction == 2)
    29455865  {
    2946     free_mem2Dpel(maskImgTemp);
    2947   }
    2948 #endif
    2949 }
    2950 
    2951 
    2952 Void TEncAdaptiveLoopFilter::xFilterTapDecision_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost)
    2953 {
    2954 #if MQT_ALF_NPASS
    2955   if(m_iALFEncodePassReduction)
    2956   {
    2957     return;  // filter tap has been decided in xEncALFLuma_qc
    2958   }
    2959 #endif
    2960 
    2961   // restriction for non-referenced B-slice
    2962   if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0)
    2963   {
    2964     return;
    2965   }
    2966  
    2967   UInt64 uiRate, uiDist;
    2968   Double dCost;
    2969  
    2970   if (m_pcBestAlfParam->cu_control_flag)
    2971   {
    2972     xCopyTmpAlfCtrlFlagsFrom();
    2973   }
    2974  
    2975   Bool bChanged = false;
    2976   for (Int iTap = ALF_MIN_NUM_TAP; iTap <= ALF_MAX_NUM_TAP; iTap += 2)
    2977   {
    2978     copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    2979     m_pcTempAlfParam->tap = iTap;
    2980 #if TI_ALF_MAX_VSIZE_7
    2981     m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap);
    2982     m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap);
    2983 #else
    2984     m_pcTempAlfParam->num_coeff = (Int)(iTap*iTap/4) + 2;
    2985 #endif
    2986    
    2987     if (m_pcTempAlfParam->cu_control_flag)
    2988     {
    2989       xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, false);
    2990 #if TSB_ALF_HEADER
    2991       xSetCUAlfCtrlFlags_qc(m_pcEntropyCoder->getMaxAlfCtrlDepth(), pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam);
    2992 #else
    2993       xSetCUAlfCtrlFlags_qc(m_pcEntropyCoder->getMaxAlfCtrlDepth(), pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist);
    2994 #endif
    2995       xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost);
    2996     }
    2997 
    2998     else
    2999     {
    3000       Int    Height = pcPicOrg->getHeight();
    3001       Int    Width = pcPicOrg->getWidth();
    3002       for (Int i=0; i<Height; i++)
    3003       {
    3004         for (Int j=0; j<Width; j++)
    3005         {
    3006           m_maskImg[i][j] = 1;
    3007         }
    3008       }
    3009       xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, false);
    3010 
    3011       xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, m_pcTempAlfParam, uiRate, uiDist, dCost);
    3012     }
    3013 
    3014     if (dCost < rdMinCost)
    3015     {
    3016       rdMinCost = dCost;
    3017       ruiMinDist = uiDist;
    3018       ruiMinRate = uiRate;
    3019       m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest);
    3020       copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    3021       bChanged = true;
    3022       if (m_pcTempAlfParam->cu_control_flag)
    3023       {
    3024         xCopyTmpAlfCtrlFlagsFrom();
    3025       }
    3026     }
    3027   }
    3028  
    3029   if (m_pcBestAlfParam->cu_control_flag)
    3030   {
    3031     xCopyTmpAlfCtrlFlagsTo();
    3032     if (bChanged)
    3033     {
    3034       m_pcPicYuvBest->copyToPicLuma(pcPicRest);
    3035       xCopyDecToRestCUs(pcPicDec, pcPicRest);
    3036     }
    3037   }
    3038   else if (m_pcBestAlfParam->tap > ALF_MIN_NUM_TAP)
    3039   {
    3040     m_pcPicYuvBest->copyToPicLuma(pcPicRest);
    3041   }
    3042  
    3043   copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    3044 }
    3045 
     5866    destroyMatrix_Pel(maskImgTemp);
     5867  }
     5868}
     5869
     5870#endif
    30465871
    30475872#define ROUND(a)  (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5))
     
    30505875
    30515876//Find filter coeff related
    3052 Int TEncAdaptiveLoopFilter::gnsCholeskyDec(double **inpMatr, double outMatr[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], int noEq)
     5877Int TEncAdaptiveLoopFilter::gnsCholeskyDec(Double **inpMatr, Double outMatr[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF], Int noEq)
    30535878{
    3054   int
    3055   i, j, k;     /* Looping Variables */
    3056   double
    3057   scale;       /* scaling factor for each row */
    3058   double
    3059   invDiag[MAX_SQR_FILT_LENGTH];  /* Vector of the inverse of diagonal entries of outMatr */
    3060  
    3061  
    3062   /*
    3063    *  Cholesky decomposition starts
    3064    */
     5879  Int i, j, k;     /* Looping Variables */
     5880  Double scale;       /* scaling factor for each row */
     5881  Double invDiag[ALF_MAX_NUM_COEF];  /* Vector of the inverse of diagonal entries of outMatr */
     5882 
     5883  //  Cholesky decomposition starts
    30655884 
    30665885  for(i = 0; i < noEq; i++)
     
    30695888    {
    30705889      /* Compute the scaling factor */
    3071       scale=inpMatr[i][j];
    3072       if ( i > 0) for( k = i - 1 ; k >= 0 ; k--)
    3073         scale -= outMatr[k][j] * outMatr[k][i];
    3074      
     5890      scale = inpMatr[i][j];
     5891      if ( i > 0)
     5892      {
     5893        for( k = i - 1 ; k >= 0 ; k--)
     5894        {
     5895          scale -= outMatr[k][j] * outMatr[k][i];
     5896        }
     5897      }
    30755898      /* Compute i'th row of outMatr */
    3076       if(i==j)
     5899      if(i == j)
    30775900      {
    30785901        if(scale <= REG_SQR ) // if(scale <= 0 )  /* If inpMatr is singular */
    30795902        {
    3080           return(0);
     5903          return 0;
    30815904        }
    3082         else              /* Normal operation */
    3083           invDiag[i] =  1.0/(outMatr[i][i]=sqrt(scale));
     5905        else
     5906        {
     5907           /* Normal operation */
     5908           invDiag[i] =  1.0 / (outMatr[i][i] = sqrt(scale));
     5909        }
    30845910      }
    30855911      else
    30865912      {
    3087         outMatr[i][j] = scale*invDiag[i]; /* Upper triangular part          */
     5913        outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part          */
    30885914        outMatr[j][i] = 0.0;              /* Lower triangular part set to 0 */
    30895915      }                   
    30905916    }
    30915917  }
    3092   return(1); /* Signal that Cholesky factorization is successfully performed */
    3093 }
    3094 
    3095 
    3096 Void TEncAdaptiveLoopFilter::gnsTransposeBacksubstitution(double U[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], double rhs[], double x[], int order)
    3097 {
    3098   int
    3099   i,j;              /* Looping variables */
    3100   double
    3101   sum;              /* Holds backsubstitution from already handled rows */
     5918  return 1; /* Signal that Cholesky factorization is successfully performed */
     5919}
     5920
     5921
     5922Void TEncAdaptiveLoopFilter::gnsTransposeBacksubstitution(Double U[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF], Double rhs[], Double x[], Int order)
     5923{
     5924  Int i,j;              /* Looping variables */
     5925  Double sum;              /* Holds backsubstitution from already handled rows */
    31025926 
    31035927  /* Backsubstitution starts */
    3104   x[0] = rhs[0]/U[0][0];               /* First row of U'                   */
     5928  x[0] = rhs[0] / U[0][0];               /* First row of U'                   */
    31055929  for (i = 1; i < order; i++)
    31065930  {         /* For the rows 1..order-1           */
    31075931   
    31085932    for (j = 0, sum = 0.0; j < i; j++) /* Backsubst already solved unknowns */
    3109       sum += x[j]*U[j][i];
    3110    
    3111     x[i]=(rhs[i] - sum)/U[i][i];       /* i'th component of solution vect.  */
    3112   }
    3113 }
    3114 
    3115 
    3116 
    3117 Void  TEncAdaptiveLoopFilter::gnsBacksubstitution(double R[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], double z[MAX_SQR_FILT_LENGTH], int R_size, double A[MAX_SQR_FILT_LENGTH])
    3118 {
    3119   int i, j;
    3120   double sum;
     5933    {
     5934      sum += x[j] * U[j][i];
     5935    }
     5936    x[i] = (rhs[i] - sum) / U[i][i];       /* i'th component of solution vect.  */
     5937  }
     5938}
     5939
     5940Void  TEncAdaptiveLoopFilter::gnsBacksubstitution(Double R[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF], Double z[ALF_MAX_NUM_COEF], Int R_size, Double A[MAX_SQR_FILT_LENGTH])
     5941{
     5942  Int i, j;
     5943  Double sum;
    31215944 
    31225945  R_size--;
     
    31265949  for (i = R_size-1; i >= 0; i--)
    31275950  {
    3128     for (j = i+1, sum = 0.0; j <= R_size; j++)
     5951    for (j = i + 1, sum = 0.0; j <= R_size; j++)
     5952    {
    31295953      sum += R[i][j] * A[j];
     5954    }
    31305955   
    31315956    A[i] = (z[i] - sum) / R[i][i];
     
    31345959
    31355960
    3136 Int TEncAdaptiveLoopFilter::gnsSolveByChol(double **LHS, double *rhs, double *x, int noEq)
    3137 {
    3138   double aux[MAX_SQR_FILT_LENGTH];     /* Auxiliary vector */
    3139   double U[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH];    /* Upper triangular Cholesky factor of LHS */
    3140   int  i, singular;          /* Looping variable */
     5961Int TEncAdaptiveLoopFilter::gnsSolveByChol(Double **LHS, Double *rhs, Double *x, Int noEq)
     5962{
     5963  assert(noEq > 0);
     5964
     5965  Double aux[ALF_MAX_NUM_COEF];     /* Auxiliary vector */
     5966  Double U[ALF_MAX_NUM_COEF][ALF_MAX_NUM_COEF];    /* Upper triangular Cholesky factor of LHS */
     5967  Int  i, singular;          /* Looping variable */
    31415968 
    31425969  /* The equation to be solved is LHSx = rhs */
     
    31455972  if(gnsCholeskyDec(LHS, U, noEq)) /* If Cholesky decomposition has been successful */
    31465973  {
    3147     singular=1;
     5974    singular = 1;
    31485975    /* Now, the equation is  U'*U*x = rhs, where U is upper triangular
    31495976     * Solve U'*aux = rhs for aux
     
    31575984  else /* LHS was singular */
    31585985  {
    3159     singular=0;
     5986    singular = 0;
    31605987   
    31615988    /* Regularize LHS */
    3162     for(i=0; i<noEq; i++)
     5989    for(i=0; i < noEq; i++)
     5990    {
    31635991      LHS[i][i] += REG;
     5992    }
    31645993    /* Compute upper triangular U such that U'*U = regularized LHS */
    31655994    singular = gnsCholeskyDec(LHS, U, noEq);
    3166     /* Solve  U'*aux = rhs for aux */ 
    3167     gnsTransposeBacksubstitution(U, rhs, aux, noEq);   
    3168    
    3169     /* Solve U*x = aux for x */
    3170     gnsBacksubstitution(U, aux, noEq, x);
     5995    if ( singular == 1 )
     5996    {
     5997      /* Solve  U'*aux = rhs for aux */ 
     5998      gnsTransposeBacksubstitution(U, rhs, aux, noEq);   
     5999     
     6000      /* Solve U*x = aux for x */
     6001      gnsBacksubstitution(U, aux, noEq, x);     
     6002    }
     6003    else
     6004    {
     6005      x[0] = 1.0;
     6006      for (i = 1; i < noEq; i++ )
     6007      {
     6008        x[i] = 0.0;
     6009      }
     6010    }
    31716011  } 
    3172   return(singular);
    3173 }
    3174 
    3175 
    3176 //////////////////////////////////////////////////////////////////////////////////////////
    3177 
    3178 
    3179 Void TEncAdaptiveLoopFilter::add_A(double **Amerged, double ***A, int start, int stop, int size)
     6012  return singular;
     6013}
     6014
     6015Void TEncAdaptiveLoopFilter::add_A(Double **Amerged, Double ***A, Int start, Int stop, Int size)
    31806016{
    3181   int
    3182   i, j, ind;          /* Looping variable */
    3183  
    3184   for (i=0; i<size; i++)
    3185   {
    3186     for (j=0; j<size; j++)
    3187     {
    3188       Amerged[i][j]=0;
    3189       for (ind=start; ind<=stop; ind++)
    3190       {
    3191         Amerged[i][j]+=A[ind][i][j];
    3192       }
    3193     }
    3194   }
    3195 }
    3196 
    3197 Void TEncAdaptiveLoopFilter::add_b(double *bmerged, double **b, int start, int stop, int size)
     6017  Int i, j, ind;          /* Looping variable */
     6018 
     6019  for (i = 0; i < size; i++)
     6020  {
     6021    for (j = 0; j < size; j++)
     6022    {
     6023      Amerged[i][j] = 0;
     6024      for (ind = start; ind <= stop; ind++)
     6025      {
     6026        Amerged[i][j] += A[ind][i][j];
     6027      }
     6028    }
     6029  }
     6030}
     6031
     6032Void TEncAdaptiveLoopFilter::add_b(Double *bmerged, Double **b, Int start, Int stop, Int size)
    31986033{
    3199   int
    3200   i, ind;          /* Looping variable */
    3201  
    3202   for (i=0; i<size; i++)
    3203   {
    3204     bmerged[i]=0;
    3205     for (ind=start; ind<=stop; ind++)
    3206     {
    3207       bmerged[i]+=b[ind][i];
    3208     }
    3209   }
    3210 }
    3211 
    3212 double TEncAdaptiveLoopFilter::calculateErrorCoeffProvided(double **A, double *b, double *c, int size)
    3213 {
    3214   int i, j;
    3215   double error, sum=0;
    3216  
    3217   error=0;
    3218   for (i=0; i<size; i++)   //diagonal
    3219   {
    3220     sum=0;
    3221     for (j=i+1; j<size; j++)
    3222       sum+=(A[j][i]+A[i][j])*c[j];
    3223     error+=(A[i][i]*c[i]+sum-2*b[i])*c[i];
    3224   }
    3225  
    3226   return(error);
    3227 }
    3228 
    3229 double TEncAdaptiveLoopFilter::calculateErrorAbs(double **A, double *b, double y, int size)
    3230 {
    3231   int i;
    3232   double error, sum;
    3233   double c[MAX_SQR_FILT_LENGTH];
     6034  Int i, ind;          /* Looping variable */
     6035 
     6036  for (i = 0; i < size; i++)
     6037  {
     6038    bmerged[i] = 0;
     6039    for (ind = start; ind <= stop; ind++)
     6040    {
     6041      bmerged[i] += b[ind][i];
     6042    }
     6043  }
     6044}
     6045
     6046Double TEncAdaptiveLoopFilter::calculateErrorCoeffProvided(Double **A, Double *b, Double *c, Int size)
     6047{
     6048  Int i, j;
     6049  Double error, sum = 0;
     6050 
     6051  error = 0;
     6052  for (i = 0; i < size; i++)   //diagonal
     6053  {
     6054    sum = 0;
     6055    for (j = i + 1; j < size; j++)
     6056    {
     6057      sum += (A[j][i] + A[i][j]) * c[j];
     6058    }
     6059    error += (A[i][i] * c[i] + sum - 2 * b[i]) * c[i];
     6060  }
     6061 
     6062  return error;
     6063}
     6064
     6065Double TEncAdaptiveLoopFilter::calculateErrorAbs(Double **A, Double *b, Double y, Int size)
     6066{
     6067  Int i;
     6068  Double error, sum;
     6069  Double c[ALF_MAX_NUM_COEF];
    32346070 
    32356071  gnsSolveByChol(A, b, c, size);
    32366072 
    3237   sum=0;
    3238   for (i=0; i<size; i++)
    3239   {
    3240     sum+=c[i]*b[i];
    3241   }
    3242   error=y-sum;
    3243  
    3244   return(error);
    3245 }
    3246 
    3247 double TEncAdaptiveLoopFilter::mergeFiltersGreedy(double **yGlobalSeq, double ***EGlobalSeq, double *pixAccGlobalSeq, int intervalBest[NO_VAR_BINS][2], int sqrFiltLength, int noIntervals)
    3248 {
    3249   int first, ind, ind1, ind2, i, j, bestToMerge ;
    3250   double error, error1, error2, errorMin;
    3251   static double pixAcc_temp, error_tab[NO_VAR_BINS],error_comb_tab[NO_VAR_BINS];
    3252   static int indexList[NO_VAR_BINS], available[NO_VAR_BINS], noRemaining;
     6073  sum = 0;
     6074  for (i = 0; i < size; i++)
     6075  {
     6076    sum += c[i] * b[i];
     6077  }
     6078  error = y - sum;
     6079 
     6080  return error;
     6081}
     6082
     6083Double TEncAdaptiveLoopFilter::mergeFiltersGreedy(Double **yGlobalSeq, Double ***EGlobalSeq, Double *pixAccGlobalSeq, Int intervalBest[NO_VAR_BINS][2], Int sqrFiltLength, Int noIntervals)
     6084{
     6085  Int first, ind, ind1, ind2, i, j, bestToMerge ;
     6086  Double error, error1, error2, errorMin;
     6087  static Double pixAcc_temp, error_tab[NO_VAR_BINS],error_comb_tab[NO_VAR_BINS];
     6088  static Int indexList[NO_VAR_BINS], available[NO_VAR_BINS], noRemaining;
    32536089  if (noIntervals == NO_FILTERS)
    32546090  {
    3255     noRemaining=NO_VAR_BINS;
     6091    noRemaining = NO_VAR_BINS;
    32566092    for (ind=0; ind<NO_VAR_BINS; ind++)
    32576093    {
    3258       indexList[ind]=ind;
    3259       available[ind]=1;
    3260       m_pixAcc_merged[ind]=pixAccGlobalSeq[ind];
    3261       memcpy(m_y_merged[ind],yGlobalSeq[ind],sizeof(double)*sqrFiltLength);
    3262       for (i=0; i<sqrFiltLength; i++)
    3263       {
    3264         memcpy(m_E_merged[ind][i],EGlobalSeq[ind][i],sizeof(double)*sqrFiltLength);
     6094      indexList[ind] = ind;
     6095      available[ind] = 1;
     6096      m_pixAcc_merged[ind] = pixAccGlobalSeq[ind];
     6097      memcpy(m_y_merged[ind], yGlobalSeq[ind], sizeof(Double)*sqrFiltLength);
     6098      for (i=0; i < sqrFiltLength; i++)
     6099      {
     6100        memcpy(m_E_merged[ind][i], EGlobalSeq[ind][i], sizeof(Double)*sqrFiltLength);
    32656101      }
    32666102    }
     
    32696105  if (noIntervals == NO_FILTERS)
    32706106  {
    3271     for (ind=0; ind<NO_VAR_BINS; ind++)
    3272     {
    3273       error_tab[ind]=calculateErrorAbs(m_E_merged[ind], m_y_merged[ind], m_pixAcc_merged[ind], sqrFiltLength);
    3274     }
    3275     for (ind=0; ind<NO_VAR_BINS-1; ind++)
    3276     {
    3277       ind1=indexList[ind];
    3278       ind2=indexList[ind+1];
     6107    for (ind = 0; ind < NO_VAR_BINS; ind++)
     6108    {
     6109      error_tab[ind] = calculateErrorAbs(m_E_merged[ind], m_y_merged[ind], m_pixAcc_merged[ind], sqrFiltLength);
     6110    }
     6111    for (ind = 0; ind < NO_VAR_BINS - 1; ind++)
     6112    {
     6113      ind1 = indexList[ind];
     6114      ind2 = indexList[ind+1];
    32796115     
    3280       error1=error_tab[ind1];
    3281       error2=error_tab[ind2];
     6116      error1 = error_tab[ind1];
     6117      error2 = error_tab[ind2];
    32826118     
    3283       pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];
    3284       for (i=0; i<sqrFiltLength; i++)
    3285       {
    3286         m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i];
    3287         for (j=0; j<sqrFiltLength; j++)
    3288         {
    3289           m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];
     6119      pixAcc_temp = m_pixAcc_merged[ind1] + m_pixAcc_merged[ind2];
     6120      for (i = 0; i < sqrFiltLength; i++)
     6121      {
     6122        m_y_temp[i] = m_y_merged[ind1][i] + m_y_merged[ind2][i];
     6123        for (j = 0; j < sqrFiltLength; j++)
     6124        {
     6125          m_E_temp[i][j] = m_E_merged[ind1][i][j] + m_E_merged[ind2][i][j];
    32906126        }
    32916127      }
    3292       error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2;
    3293     }
    3294   }
    3295   while (noRemaining>noIntervals)
    3296   {
    3297     errorMin=0; first=1;
     6128      error_comb_tab[ind1] = calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength) - error1 - error2;
     6129    }
     6130  }
     6131  while (noRemaining > noIntervals)
     6132  {
     6133    errorMin = 0;
     6134    first = 1;
    32986135    bestToMerge = 0;
    3299     for (ind=0; ind<noRemaining-1; ind++)
     6136    for (ind = 0; ind < noRemaining - 1; ind++)
    33006137    {
    33016138      error = error_comb_tab[indexList[ind]];
    3302       if ((error<errorMin || first==1))
    3303       {
    3304         errorMin=error;
    3305         bestToMerge=ind;
    3306         first=0;
    3307       }
    3308     }
    3309     ind1=indexList[bestToMerge];
    3310     ind2=indexList[bestToMerge+1];
    3311     m_pixAcc_merged[ind1]+=m_pixAcc_merged[ind2];
    3312     for (i=0; i<sqrFiltLength; i++)
    3313     {
    3314       m_y_merged[ind1][i]+=m_y_merged[ind2][i];
    3315       for (j=0; j<sqrFiltLength; j++)
    3316       {
    3317         m_E_merged[ind1][i][j]+=m_E_merged[ind2][i][j];
    3318       }
    3319     }
    3320     available[ind2]=0;
     6139      if ((error < errorMin || first == 1))
     6140      {
     6141        errorMin = error;
     6142        bestToMerge = ind;
     6143        first = 0;
     6144      }
     6145    }
     6146    ind1 = indexList[bestToMerge];
     6147    ind2 = indexList[bestToMerge+1];
     6148    m_pixAcc_merged[ind1] += m_pixAcc_merged[ind2];
     6149    for (i = 0; i < sqrFiltLength; i++)
     6150    {
     6151      m_y_merged[ind1][i] += m_y_merged[ind2][i];
     6152      for (j = 0; j < sqrFiltLength; j++)
     6153      {
     6154        m_E_merged[ind1][i][j] += m_E_merged[ind2][i][j];
     6155      }
     6156    }
     6157    available[ind2] = 0;
    33216158   
    33226159    //update error tables
    3323     error_tab[ind1]=error_comb_tab[ind1]+error_tab[ind1]+error_tab[ind2];
     6160    error_tab[ind1] = error_comb_tab[ind1] + error_tab[ind1] + error_tab[ind2];
    33246161    if (indexList[bestToMerge] > 0)
    33256162    {
    3326       ind1=indexList[bestToMerge-1];
    3327       ind2=indexList[bestToMerge];
    3328       error1=error_tab[ind1];
    3329       error2=error_tab[ind2];
    3330       pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];
     6163      ind1 = indexList[bestToMerge-1];
     6164      ind2 = indexList[bestToMerge];
     6165      error1 = error_tab[ind1];
     6166      error2 = error_tab[ind2];
     6167      pixAcc_temp = m_pixAcc_merged[ind1] + m_pixAcc_merged[ind2];
     6168      for (i = 0; i < sqrFiltLength; i++)
     6169      {
     6170        m_y_temp[i] = m_y_merged[ind1][i] + m_y_merged[ind2][i];
     6171        for (j = 0; j < sqrFiltLength; j++)
     6172        {
     6173          m_E_temp[i][j] = m_E_merged[ind1][i][j] + m_E_merged[ind2][i][j];
     6174        }
     6175      }
     6176      error_comb_tab[ind1] = calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength) - error1 - error2;
     6177    }
     6178    if (indexList[bestToMerge+1] < NO_VAR_BINS - 1)
     6179    {
     6180      ind1 = indexList[bestToMerge];
     6181      ind2 = indexList[bestToMerge+2];
     6182      error1 = error_tab[ind1];
     6183      error2 = error_tab[ind2];
     6184      pixAcc_temp = m_pixAcc_merged[ind1] + m_pixAcc_merged[ind2];
    33316185      for (i=0; i<sqrFiltLength; i++)
    33326186      {
    3333         m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i];
    3334         for (j=0; j<sqrFiltLength; j++)
    3335         {
    3336           m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];
     6187        m_y_temp[i] = m_y_merged[ind1][i] + m_y_merged[ind2][i];
     6188        for (j=0; j < sqrFiltLength; j++)
     6189        {
     6190          m_E_temp[i][j] = m_E_merged[ind1][i][j] + m_E_merged[ind2][i][j];
    33376191        }
    33386192      }
    3339       error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2;
    3340     }
    3341     if (indexList[bestToMerge+1] < NO_VAR_BINS-1)
    3342     {
    3343       ind1=indexList[bestToMerge];
    3344       ind2=indexList[bestToMerge+2];
    3345       error1=error_tab[ind1];
    3346       error2=error_tab[ind2];
    3347       pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];
    3348       for (i=0; i<sqrFiltLength; i++)
    3349       {
    3350         m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i];
    3351         for (j=0; j<sqrFiltLength; j++)
    3352         {
    3353           m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];
    3354         }
    3355       }
    3356       error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2;
     6193      error_comb_tab[ind1] = calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength) - error1 - error2;
    33576194    }
    33586195   
    33596196    ind=0;
    3360     for (i=0; i<NO_VAR_BINS; i++)
    3361     {
    3362       if (available[i]==1)
    3363       {
    3364         indexList[ind]=i;
     6197    for (i = 0; i < NO_VAR_BINS; i++)
     6198    {
     6199      if (available[i] == 1)
     6200      {
     6201        indexList[ind] = i;
    33656202        ind++;
    33666203      }
     
    33696206  }
    33706207 
    3371  
    3372   errorMin=0;
    3373   for (ind=0; ind<noIntervals; ind++)
    3374   {
    3375     errorMin+=error_tab[indexList[ind]];
    3376   }
    3377  
    3378   for (ind=0; ind<noIntervals-1; ind++)
    3379   {
    3380     intervalBest[ind][0]=indexList[ind]; intervalBest[ind][1]=indexList[ind+1]-1;
    3381   }
    3382  
    3383   intervalBest[noIntervals-1][0]=indexList[noIntervals-1];
    3384   intervalBest[noIntervals-1][1]=NO_VAR_BINS-1;
     6208  errorMin = 0;
     6209  for (ind = 0; ind < noIntervals; ind++)
     6210  {
     6211    errorMin += error_tab[indexList[ind]];
     6212  }
     6213 
     6214  for (ind = 0; ind < noIntervals - 1; ind++)
     6215  {
     6216    intervalBest[ind][0] = indexList[ind];
     6217    intervalBest[ind][1] = indexList[ind+1] - 1;
     6218  }
     6219 
     6220  intervalBest[noIntervals-1][0] = indexList[noIntervals-1];
     6221  intervalBest[noIntervals-1][1] = NO_VAR_BINS-1;
    33856222 
    33866223  return(errorMin);
    33876224}
    33886225
    3389 
    3390 
    3391 double TEncAdaptiveLoopFilter::findFilterGroupingError(double ***EGlobalSeq, double **yGlobalSeq, double *pixAccGlobalSeq, int intervalBest[NO_VAR_BINS][2], int sqrFiltLength, int filters_per_fr)
     6226Void TEncAdaptiveLoopFilter::roundFiltCoeff(Int *FilterCoeffQuan, Double *FilterCoeff, Int sqrFiltLength, Int factor)
     6227{
     6228  Int i;
     6229  Double diff;
     6230  Int diffInt, sign;
     6231 
     6232  for(i = 0; i < sqrFiltLength; i++)
     6233  {
     6234    sign = (FilterCoeff[i] > 0)? 1 : -1;
     6235    diff = FilterCoeff[i] * sign;
     6236    diffInt = (Int)(diff * (Double)factor + 0.5);
     6237    FilterCoeffQuan[i] = diffInt * sign;
     6238  }
     6239}
     6240
     6241Double TEncAdaptiveLoopFilter::QuantizeIntegerFilterPP(Double *filterCoeff, Int *filterCoeffQuant, Double **E, Double *y, Int sqrFiltLength, Int *weights)
    33926242{
    33936243  double error;
    3394  
    3395   // find best filters for each frame group
    3396   error = 0;
    3397   error += mergeFiltersGreedy(yGlobalSeq, EGlobalSeq, pixAccGlobalSeq, intervalBest, sqrFiltLength, filters_per_fr);
    3398  
    3399   return(error);
    3400 }
    3401 
    3402 
    3403 Void TEncAdaptiveLoopFilter::roundFiltCoeff(int *FilterCoeffQuan, double *FilterCoeff, int sqrFiltLength, int factor)
    3404 {
    3405   int i;
    3406   double diff;
    3407   int diffInt, sign;
    3408  
    3409   for(i = 0; i < sqrFiltLength; i++)
    3410   {
    3411     sign               = (FilterCoeff[i]>0) ?  1: -1;
    3412     diff               = FilterCoeff[i]*sign;
    3413     diffInt            = (int)(diff*(double)factor+0.5);
    3414     FilterCoeffQuan[i] = diffInt*sign;
    3415   }
    3416 }
    3417 
    3418 Double TEncAdaptiveLoopFilter::QuantizeIntegerFilterPP(double *filterCoeff, int *filterCoeffQuant, double **E, double *y, int sqrFiltLength, int *weights, int bit_depth)
    3419 {
    3420   double error;
    3421  
    3422   int factor = (1<<(bit_depth-1)), i;
     6244#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     6245  Int factor = (1<<(getAlfPrecisionBit(m_alfQP)));
     6246#else
     6247  Int factor = (1<<  ((Int)ALF_NUM_BIT_SHIFT)  );
     6248#endif
     6249  Int i;
    34236250  int quantCoeffSum, minInd, targetCoeffSumInt, k, diff;
    34246251  double targetCoeffSum, errMin;
     
    34646291            minInd=k;
    34656292          }
    3466         } // if (weights(k)<=diff){
    3467       } // for (k=0; k<sqrFiltLength; k++){
     6293        } // if (weights(k)<=diff)
     6294      } // for (k=0; k<sqrFiltLength; k++)
    34686295      filterCoeffQuant[minInd]--;
    34696296    }
     
    34916318            minInd=k;
    34926319          }
    3493         } // if (weights(k)<=diff){
    3494       } // for (k=0; k<sqrFiltLength; k++){
     6320        } // if (weights(k)<=diff)
     6321      } // for (k=0; k<sqrFiltLength; k++)
    34956322      filterCoeffQuant[minInd]++;
    34966323    }
     
    35106337  }
    35116338 
     6339  checkFilterCoeffValue(filterCoeffQuant, sqrFiltLength, false);
     6340
    35126341  for (i=0; i<sqrFiltLength; i++)
    35136342  {
     
    35186347  return(error);
    35196348}
    3520 
    3521 Double TEncAdaptiveLoopFilter::findFilterCoeff(double ***EGlobalSeq, double **yGlobalSeq, double *pixAccGlobalSeq, int **filterCoeffSeq, int **filterCoeffQuantSeq, int intervalBest[NO_VAR_BINS][2], int varIndTab[NO_VAR_BINS], int sqrFiltLength, int filters_per_fr, int *weights, int bit_depth, double errorTabForce0Coeff[NO_VAR_BINS][2])
     6349Double TEncAdaptiveLoopFilter::findFilterCoeff(double ***EGlobalSeq, double **yGlobalSeq, double *pixAccGlobalSeq, int **filterCoeffSeq, int **filterCoeffQuantSeq, int intervalBest[NO_VAR_BINS][2], int varIndTab[NO_VAR_BINS], int sqrFiltLength, int filters_per_fr, int *weights, double errorTabForce0Coeff[NO_VAR_BINS][2])
    35226350{
    35236351  static double pixAcc_temp;
     
    35366364   
    35376365    // Find coeffcients
    3538     errorTabForce0Coeff[filtNo][1] = pixAcc_temp + QuantizeIntegerFilterPP(m_filterCoeff, m_filterCoeffQuant, m_E_temp, m_y_temp, sqrFiltLength, weights, bit_depth);
     6366    errorTabForce0Coeff[filtNo][1] = pixAcc_temp + QuantizeIntegerFilterPP(m_filterCoeff, m_filterCoeffQuant, m_E_temp, m_y_temp, sqrFiltLength, weights);
    35396367    errorTabForce0Coeff[filtNo][0] = pixAcc_temp;
    35406368    error += errorTabForce0Coeff[filtNo][1];
     
    35566384}
    35576385
    3558 #if MQT_ALF_NPASS
     6386#if !LCU_SYNTAX_ALF
     6387
     6388/** Save redesigned filter set to buffer
     6389 * \param filterCoeffPrevSelected filter set buffer
     6390 */
     6391Void TEncAdaptiveLoopFilter::saveFilterCoeffToBuffer(Int **filterSet, Int numFilter, Int* mergeTable, Int mode, Int filtNo)
     6392{
     6393  Int iBufferIndex = m_iCurrentPOC % m_iGOPSize;
     6394
     6395  static Bool bFirst = true;
     6396  static Bool* pbFirstAccess;
     6397  if(bFirst)
     6398  {
     6399    pbFirstAccess = new Bool[NUM_ALF_CLASS_METHOD];
     6400    for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
     6401    {
     6402      pbFirstAccess[i]= true;
     6403    }
     6404    bFirst = false;
     6405  }
     6406
     6407  if(iBufferIndex == 0)
     6408  {
     6409    if(pbFirstAccess[mode])
     6410    {
     6411      //store merge table
     6412      ::memcpy(m_mergeTableSavedMethods[mode][m_iGOPSize], mergeTable, sizeof(Int)*NO_VAR_BINS);
     6413      //store coefficients
     6414      for(Int varInd=0; varInd< numFilter; varInd++)
     6415      {
     6416        ::memcpy(m_aiFilterCoeffSavedMethods[mode][m_iGOPSize][varInd],filterSet[varInd], sizeof(Int)*ALF_MAX_NUM_COEF );
     6417      }
     6418      //store filter shape
     6419      m_iPreviousFilterShapeMethods[mode][m_iGOPSize]= filtNo;
     6420
     6421      pbFirstAccess[mode] = false;
     6422    }
     6423
     6424
     6425    //store merge table
     6426    ::memcpy(m_mergeTableSavedMethods[mode][0         ], m_mergeTableSavedMethods[mode][m_iGOPSize], sizeof(Int)*NO_VAR_BINS);
     6427    ::memcpy(m_mergeTableSavedMethods[mode][m_iGOPSize], mergeTable,                          sizeof(Int)*NO_VAR_BINS);
     6428
     6429    //store coefficients
     6430    for(Int varInd=0; varInd< NO_VAR_BINS; varInd++)
     6431    {
     6432      ::memcpy(m_aiFilterCoeffSavedMethods[mode][0][varInd],m_aiFilterCoeffSavedMethods[mode][m_iGOPSize][varInd], sizeof(Int)*ALF_MAX_NUM_COEF );
     6433    }
     6434
     6435    for(Int varInd=0; varInd< numFilter; varInd++)
     6436    {
     6437      ::memcpy(m_aiFilterCoeffSavedMethods[mode][m_iGOPSize][varInd],filterSet[varInd], sizeof(Int)*ALF_MAX_NUM_COEF );
     6438    }
     6439
     6440    //store filter shape
     6441    m_iPreviousFilterShapeMethods[mode][0]= m_iPreviousFilterShapeMethods[mode][m_iGOPSize];
     6442    m_iPreviousFilterShapeMethods[mode][m_iGOPSize]= filtNo;
     6443  }
     6444  else
     6445  {
     6446
     6447    //store merge table
     6448    ::memcpy(m_mergeTableSavedMethods[mode][iBufferIndex], mergeTable, sizeof(Int)*NO_VAR_BINS);
     6449
     6450    //store coefficients
     6451    for(Int varInd=0; varInd< numFilter; varInd++)
     6452    {
     6453      ::memcpy(m_aiFilterCoeffSavedMethods[mode][iBufferIndex][varInd],filterSet[varInd], sizeof(Int)*ALF_MAX_NUM_COEF );
     6454    }
     6455    //store filter_shape
     6456    m_iPreviousFilterShapeMethods[mode][iBufferIndex]= filtNo;
     6457
     6458  }
     6459}
     6460
     6461
     6462/** set initial m_maskImg with previous (time-delayed) filters
     6463 * \param pcPicOrg original picture
     6464 * \param pcPicDec reconstructed picture after deblocking
     6465 */
     6466Void TEncAdaptiveLoopFilter::setMaskWithTimeDelayedResults(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec)
     6467{
     6468
     6469  static Pel** bestImgMask;
     6470  static Bool bFirst = true;
     6471  if(bFirst)
     6472  {
     6473    initMatrix_Pel(&bestImgMask, m_img_height, m_img_width);
     6474    bFirst = false;
     6475  }
     6476
     6477  Pel*    pDec       = pcPicDec->getLumaAddr();
     6478  Pel*    pOrg       = pcPicOrg->getLumaAddr();
     6479  Pel*    pRest      = m_pcPicYuvTmp->getLumaAddr();
     6480  Int     LumaStride = pcPicOrg->getStride();
     6481  Int***   pppCoeffSaved     = m_aiFilterCoeffSavedMethods  [m_uiVarGenMethod];
     6482  Int**    ppMergeTableSaved = m_mergeTableSavedMethods     [m_uiVarGenMethod];
     6483  Int*     pFilterShapeSaved = m_iPreviousFilterShapeMethods[m_uiVarGenMethod];
     6484  Int      iBufIdx;
     6485
     6486  UInt64    uiRate, uiDist;
     6487  Double    dCost, dMinCost = MAX_DOUBLE;
     6488  ALFParam  cAlfParam;
     6489  allocALFParam(&cAlfParam);
     6490  cAlfParam.alf_flag        = 0;
     6491  cAlfParam.chroma_idc      = 0;
     6492
     6493  //filter frame with the previous time-delayed filters
     6494  Int filtNo;
     6495  Int maxDepth = (pcPicOrg->getWidth() < 1000) ?(2):(g_uiMaxCUDepth);
     6496  m_pcEntropyCoder->setAlfCtrl(true);
     6497  m_pcTempAlfParam->alf_flag = 1;
     6498  m_pcTempAlfParam->alf_pcr_region_flag = m_uiVarGenMethod;
     6499
     6500  for (Int index=0; index<2; index++)
     6501  {
     6502    iBufIdx = setFilterIdx(index);
     6503    filtNo = m_pcTempAlfParam->filter_shape = pFilterShapeSaved[iBufIdx];
     6504#if ALF_SINGLE_FILTER_SHAPE
     6505    assert(filtNo == ALF_CROSS9x7_SQUARE3x3);
     6506#else
     6507    assert(filtNo == ALF_STAR5x5 || filtNo == ALF_CROSS9x9);
     6508#endif
     6509    m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filtNo];
     6510    if(!m_bUseNonCrossALF)
     6511    {
     6512      filterLuma(pRest, pDec, LumaStride, 0, m_img_height-1, 0, m_img_width-1,  filtNo, pppCoeffSaved[iBufIdx], ppMergeTableSaved[iBufIdx], m_varImg);
     6513    }
     6514    else
     6515    {
     6516      xfilterSlicesEncoder(pDec, pRest, LumaStride, filtNo, pppCoeffSaved[iBufIdx], ppMergeTableSaved[iBufIdx], m_varImg);
     6517    }
     6518
     6519    for (UInt uiDepth = 0; uiDepth < maxDepth; uiDepth++)
     6520    {
     6521      m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth);
     6522      std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_uiNumSlicesInPic);
     6523      xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, vAlfCUCtrlParamTemp);
     6524      m_pcEntropyCoder->resetEntropy();
     6525      m_pcEntropyCoder->resetBits();
     6526      xEncodeCUAlfCtrlFlags(vAlfCUCtrlParamTemp);
     6527      uiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
     6528      dCost  = (Double)(uiRate) * m_dLambdaLuma + (Double)(uiDist);
     6529
     6530      if (dCost < dMinCost)
     6531      {
     6532        dMinCost    = dCost;
     6533        copyALFParam(&cAlfParam, m_pcTempAlfParam);
     6534        ::memcpy(bestImgMask[0], m_maskImg[0], sizeof(Pel)*m_img_height* m_img_width);
     6535      }
     6536    }
     6537  }
     6538  filtNo = cAlfParam.filter_shape;
     6539
     6540
     6541  ::memcpy(m_maskImg[0], bestImgMask[0], sizeof(Pel)*m_img_height* m_img_width);
     6542
     6543  m_pcEntropyCoder->setAlfCtrl(false);
     6544  m_pcEntropyCoder->setMaxAlfCtrlDepth(0);
     6545
     6546  // generate filters for future reference
     6547  m_iDesignCurrentFilter = 0;
     6548
     6549  int  filters_per_fr;
     6550  int  lambda_val = (Int)m_dLambdaLuma;
     6551
     6552  lambda_val = lambda_val * (1<<(2*g_uiBitIncrement));
     6553
     6554  if(!m_bUseNonCrossALF)
     6555  {
     6556    xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, pOrg, pDec, cAlfParam.filter_shape, LumaStride);
     6557  }
     6558  else
     6559  {
     6560    xstoreInBlockMatrixforSlices(pOrg, pDec, cAlfParam.filter_shape, LumaStride);
     6561  }
     6562  xfindBestFilterVarPred(m_yGlobalSym[filtNo], m_EGlobalSym[filtNo], m_pixAcc,
     6563    m_filterCoeffSym, m_filterCoeffSymQuant,
     6564    filtNo, &filters_per_fr,
     6565    m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
     6566
     6567  saveFilterCoeffToBuffer(m_filterCoeffSym, filters_per_fr, m_varIndTab, cAlfParam.alf_pcr_region_flag, filtNo);
     6568  m_iDesignCurrentFilter = 1;
     6569
     6570  freeALFParam(&cAlfParam);
     6571
     6572}
     6573
     6574
     6575/** set ALF encoding parameters
     6576 * \param pcPic picture pointer
     6577 */
    35596578Void TEncAdaptiveLoopFilter::setALFEncodingParam(TComPic *pcPic)
    35606579{
     
    35636582    m_iALFNumOfRedesign = 0;
    35646583    m_iCurrentPOC = m_pcPic->getPOC();
    3565 
    3566     if((m_eSliceType == I_SLICE) || (m_iGOPSize==8 && (m_iCurrentPOC % 4 == 0)))
     6584    if((pcPic->getSlice(0)->getSliceType() == I_SLICE) || (m_iGOPSize==8 && (m_iCurrentPOC % 4 == 0)))
    35676585    {
    35686586      m_iUsePreviousFilter = 0;
     
    35816599}
    35826600
    3583 Void TEncAdaptiveLoopFilter::xcalcPredFilterCoeffPrev(Int filtNo)
    3584 {
    3585   int varInd, i;
    3586 
    3587   for(varInd=0; varInd<NO_VAR_BINS; ++varInd)
    3588   {
    3589     for(i = 0; i < MAX_SQR_FILT_LENGTH; i++)
    3590     {
    3591       m_filterCoeffPrevSelected[varInd][i]=m_aiFilterCoeffSaved[m_iFilterIdx][varInd][i];
    3592     }
    3593   }
    3594 }
    3595 
    3596 Void TEncAdaptiveLoopFilter::setFilterIdx(Int index)
    3597 {
     6601/** set filter buffer index
     6602 * \param index the processing order of time-delayed filtering
     6603 */
     6604Int TEncAdaptiveLoopFilter::setFilterIdx(Int index)
     6605{
     6606  Int iBufIdx;
     6607
    35986608  if (m_iGOPSize == 8)
    35996609  {
    3600     if ((m_iCurrentPOC % m_iGOPSize) == 0)
    3601     {
    3602       Int FiltTable[2] = {0, m_iGOPSize};
    3603       m_iFilterIdx = FiltTable[index];
    3604     }
    3605     if ((m_iCurrentPOC % m_iGOPSize) == 4)
    3606     {
    3607       Int FiltTable[2] = {0, m_iGOPSize};
    3608       m_iFilterIdx = FiltTable[index];
    3609     }
    3610     if ((m_iCurrentPOC % m_iGOPSize) == 2)
    3611     {
    3612       Int FiltTable[2] = {0, 4};
    3613       m_iFilterIdx = FiltTable[index];
    3614     }
    3615     if ((m_iCurrentPOC % m_iGOPSize) == 6)
    3616     {
    3617       Int FiltTable[2] = {4, m_iGOPSize};
    3618       m_iFilterIdx = FiltTable[index];
    3619     }
    3620     if ((m_iCurrentPOC % m_iGOPSize) == 1)
    3621     {
    3622       Int FiltTable[2] = {0, 2};
    3623       m_iFilterIdx = FiltTable[index];
    3624     }
    3625     if ((m_iCurrentPOC % m_iGOPSize) == 3)
    3626     {
    3627       Int FiltTable[2] = {2, 4};
    3628       m_iFilterIdx = FiltTable[index];
    3629     }
    3630     if ((m_iCurrentPOC % m_iGOPSize) == 5)
    3631     {
    3632       Int FiltTable[2] = {4, 6};
    3633       m_iFilterIdx = FiltTable[index];
    3634     }
    3635     if ((m_iCurrentPOC % m_iGOPSize) == 7)
    3636     {
    3637       Int FiltTable[2] = {6, m_iGOPSize};
    3638       m_iFilterIdx = FiltTable[index];
     6610    switch(m_iCurrentPOC % m_iGOPSize)
     6611    {
     6612    case 0:
     6613      {
     6614        iBufIdx = (index == 0)?0:m_iGOPSize;
     6615      }
     6616      break;
     6617    case 1:
     6618      {
     6619        iBufIdx = (index == 0)?0:2;
     6620      }
     6621      break;
     6622    case 2:
     6623      {
     6624        iBufIdx = (index == 0)?0:4;
     6625      }
     6626      break;
     6627    case 3:
     6628      {
     6629        iBufIdx = (index == 0)?2:4;
     6630      }
     6631      break;
     6632    case 4:
     6633      {
     6634        iBufIdx = (index == 0)?0:m_iGOPSize;
     6635      }
     6636      break;
     6637    case 5:
     6638      {
     6639        iBufIdx = (index == 0)?4:6;
     6640      }
     6641      break;
     6642    case 6:
     6643      {
     6644        iBufIdx = (index == 0)?4:m_iGOPSize;
     6645      }
     6646      break;
     6647    case 7:
     6648      {
     6649        iBufIdx = (index == 0)?6:m_iGOPSize;
     6650      }
     6651      break;
     6652    default:
     6653      {
     6654        printf("error\n");
     6655        assert(0);
     6656      }
    36396657    }
    36406658  }
    36416659  else
    36426660  {
    3643     Int FiltTable[2] = {0, m_iGOPSize};
    3644     m_iFilterIdx = FiltTable[index];
    3645   }
    3646 }
    3647 
     6661    iBufIdx = (index == 0)?0:m_iGOPSize;
     6662  }
     6663
     6664  return iBufIdx;
     6665}
     6666
     6667
     6668/** set initial m_maskImg
     6669 * \param pcPicOrg original picture pointer
     6670 * \param pcPicDec reconstructed picture pointer
     6671 */
    36486672Void TEncAdaptiveLoopFilter::setInitialMask(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec)
    36496673{
     
    36516675  Int Width = pcPicOrg->getWidth();
    36526676  Int LumaStride = pcPicOrg->getStride();
    3653   imgpel* pDec = (imgpel*)pcPicDec->getLumaAddr();
    3654 
    3655 #if MTK_NONCROSS_INLOOP_FILTER
    3656   if(!m_bUseNonCrossALF)
    3657     calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
     6677  Pel* pDec = pcPicDec->getLumaAddr();
     6678
     6679  calcVar(m_varImg, pDec, LumaStride, m_uiVarGenMethod);
     6680
     6681  if(!m_iALFEncodePassReduction || !m_iUsePreviousFilter)
     6682  {
     6683    for(Int y=0; y<Height; y++)
     6684    {
     6685      for(Int x=0; x<Width; x++)
     6686      {
     6687        m_maskImg[y][x] = 1;
     6688      }
     6689    }
     6690  }
    36586691  else
    3659     calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
    3660 #else
    3661   calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
    3662 #endif
    3663 
    3664   if(!m_iALFEncodePassReduction || !m_iUsePreviousFilter)
    3665   {
    3666     for(Int y=0; y<Height; y++)
    3667     {
    3668       for(Int x=0; x<Width; x++)
    3669       {
    3670         m_maskImg[y][x] = 1;
    3671       }
    3672     }
    3673   }
    3674   else
    3675   {
    3676     Int uiBestDepth=0;
    3677     UInt64 uiRate, uiDist, uiMinRate, uiMinDist;
    3678     Double dCost, dMinCost = MAX_DOUBLE;
    3679     //imgpel* pOrg = (imgpel*)pcPicOrg->getLumaAddr();
    3680     imgpel* pRest = (imgpel*)m_pcPicYuvTmp->getLumaAddr();
    3681 
    3682     Int iTap = 9;
    3683     Int filtNo = 0;
    3684     m_pcTempAlfParam->cu_control_flag = 0;
    3685     m_pcTempAlfParam->tap = iTap;
    3686 #if TI_ALF_MAX_VSIZE_7
    3687     m_pcTempAlfParam->tapV      = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
    3688     m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(iTap);
    3689 #else
    3690     m_pcTempAlfParam->num_coeff = iTap*iTap/4 + 2;
    3691 #endif
    3692 
    3693     for (Int index=0; index<2; index++)
    3694     {
    3695       setFilterIdx(index);
    3696       xcalcPredFilterCoeffPrev(filtNo);
    3697 #if MTK_NONCROSS_INLOOP_FILTER
    3698       if(!m_bUseNonCrossALF)
    3699         xfilterFrame_en(0, 0, Height, Width, pDec, pRest, filtNo, LumaStride);
    3700       else
    3701         xfilterSlices_en(pDec, pRest, filtNo, LumaStride);
    3702 #else
    3703       xfilterFrame_en(pDec, pRest, filtNo, LumaStride);
    3704 #endif
    3705       xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, NULL, uiRate, uiDist, dCost);
    3706       if (dCost < dMinCost)
    3707       {
    3708         dMinCost  = dCost;
    3709         uiMinDist = uiDist;
    3710         uiMinRate = uiRate;
    3711         m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest);
    3712         copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    3713       }
    3714     }
    3715     m_pcPicYuvBest->copyToPicLuma(m_pcPicYuvTmp);
    3716 
    3717     m_pcEntropyCoder->setAlfCtrl(true);
    3718     Int maxDepth = g_uiMaxCUDepth;
    3719     if (pcPicOrg->getWidth() < 1000) maxDepth = 2;
    3720     for (UInt uiDepth = 0; uiDepth < maxDepth; uiDepth++)
    3721     {
    3722       m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth);
    3723       copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    3724       m_pcTempAlfParam->cu_control_flag = 1;
    3725 
    3726 #if TSB_ALF_HEADER
    3727       xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here
    3728 #else
    3729       xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here
    3730 #endif
    3731       m_pcEntropyCoder->resetEntropy();
    3732       m_pcEntropyCoder->resetBits();
    3733       xEncodeCUAlfCtrlFlags();
    3734       uiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    3735       dCost  = (Double)(uiRate) * m_dLambdaLuma + (Double)(uiDist);
    3736 
    3737       if (dCost < dMinCost)
    3738       {
    3739         uiBestDepth = uiDepth;
    3740         dMinCost    = dCost;
    3741         uiMinDist   = uiDist;
    3742         uiMinRate   = uiRate;
    3743         copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    3744         //save maskImg
    3745         xCopyTmpAlfCtrlFlagsFrom();
    3746       }
    3747     }
    3748 
    3749     copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    3750     m_iDesignCurrentFilter = 0; // design filter for subsequent slices
    3751     xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, true);
    3752     m_iDesignCurrentFilter = 1;
    3753 
    3754     if (m_pcBestAlfParam->cu_control_flag)
    3755     {
    3756       m_pcEntropyCoder->setAlfCtrl(true);
    3757       m_pcEntropyCoder->setMaxAlfCtrlDepth(uiBestDepth);
    3758       xCopyTmpAlfCtrlFlagsTo();
    3759     }
    3760     else
    3761     {
    3762       m_pcEntropyCoder->setAlfCtrl(false);
    3763       m_pcEntropyCoder->setMaxAlfCtrlDepth(0);
    3764       for(Int y=0; y<Height; y++)
    3765       {
    3766         for(Int x=0; x<Width; x++)
    3767         {
    3768           m_maskImg[y][x] = 1;
    3769         }
    3770       }
    3771     }
    3772   }
    3773 }
    3774 
    3775 #if MQT_BA_RA
    3776 Void  TEncAdaptiveLoopFilter::xFirstEstimateFilteringFrameLumaAllTap(imgpel* ImgOrg, imgpel* ImgDec, Int Stride,
    3777                                                                      ALFParam* pcAlfSaved,
    3778                                                                      Int* aiVarIndTabBest,
    3779                                                                      Int** ppiBestCoeffSet,
    3780                                                                      Int& ibestfiltNo,
    3781                                                                      Int& ibestfilters_per_fr,
    3782                                                                      Double**  ppdBesty,
    3783                                                                      Double*** pppdBestE,
    3784                                                                      Double*   pdBestpixAcc,
    3785                                                                      UInt64& ruiRate,
    3786                                                                      Int64& riDist,
    3787                                                                      Double& rdCost
    3788                                                                      )
    3789 #else
    3790 Void   TEncAdaptiveLoopFilter::xFirstFilteringFrameLumaAllTap(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, Int Stride)
    3791 #endif
    3792 {
    3793 #if !MQT_BA_RA
    3794   static Bool bFirst = true;
    3795   static Int  aiVarIndTabBest[NO_VAR_BINS];
    3796 #endif
     6692  {
     6693    setMaskWithTimeDelayedResults(pcPicOrg, pcPicDec);
     6694  }
     6695}
     6696
     6697
     6698
     6699/** Estimate RD cost of all filter size & store the best one
     6700 * \param ImgOrg original picture
     6701 * \param ImgDec reconstructed picture after deblocking
     6702 * \param Sride  line buffer size of picture buffer
     6703 * \param pcAlfSaved the best Alf parameters
     6704 * \returns ruiDist             estimated distortion
     6705 * \returns ruiRate             required bits
     6706 * \returns rdCost              estimated R-D cost
     6707 */
     6708
     6709Void  TEncAdaptiveLoopFilter::decideFilterShapeLuma(Pel* ImgOrg, Pel* ImgDec, Int Stride, ALFParam* pcAlfSaved, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
     6710{
    37976711  static Double **ySym, ***ESym;
    3798 #if !MQT_BA_RA
    3799   static Int**  ppiBestCoeffSet;
    3800 
    3801   if(bFirst)
    3802   {
    3803     initMatrix_int(&ppiBestCoeffSet, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    3804     bFirst = false;
    3805   }
    3806 #endif
    3807 
    38086712  Int    lambda_val = ((Int) m_dLambdaLuma) * (1<<(2*g_uiBitIncrement));
    3809 #if MQT_BA_RA
    38106713  Int    filtNo, filters_per_fr;
    3811 #else
    3812   Int    filtNo, ibestfiltNo=0, filters_per_fr, ibestfilters_per_fr=0;
    3813 #endif
    38146714  Int64  iEstimatedDist;
    38156715  UInt64 uiRate;
    38166716  Double dEstimatedCost, dEstimatedMinCost = MAX_DOUBLE;;
    3817   Bool   bMatrixBaseReady  = false;
    3818   m_iMatrixBaseFiltNo = 0;
    3819 #if MQT_BA_RA
     6717
     6718  UInt   uiBitShift = (g_uiBitIncrement<<1);
     6719  Int64  iEstimateDistBeforeFilter;
     6720  Int*   coeffNoFilter[NUM_ALF_FILTER_SHAPE][NO_VAR_BINS];
     6721  for(Int filter_shape = 0; filter_shape < NUM_ALF_FILTER_SHAPE; filter_shape++)
     6722  {
     6723    for(Int i=0; i< NO_VAR_BINS; i++)
     6724    {
     6725      coeffNoFilter[filter_shape][i]= new Int[ALF_MAX_NUM_COEF];
     6726      ::memset(coeffNoFilter[filter_shape][i], 0, sizeof(Int)*ALF_MAX_NUM_COEF);
     6727      coeffNoFilter[filter_shape][i][ m_sqrFiltLengthTab[filter_shape]-1 ] = (1 << ((Int)ALF_NUM_BIT_SHIFT));
     6728    }
     6729  }
     6730
    38206731  m_pcTempAlfParam->alf_flag = 1;
    3821   m_pcTempAlfParam->cu_control_flag = 0;
    38226732  m_pcTempAlfParam->chroma_idc = 0;
    3823 #endif
    3824   for(Int iTap = ALF_MAX_NUM_TAP; iTap>=ALF_MIN_NUM_TAP; iTap -= 2)
    3825   {
    3826     m_pcTempAlfParam->tap = iTap;
    3827 #if TI_ALF_MAX_VSIZE_7
    3828     m_pcTempAlfParam->tapV      = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
    3829     m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(iTap);
    3830 #else
    3831     m_pcTempAlfParam->num_coeff = iTap*iTap/4 + 2;
    3832 #endif
    3833     if (iTap==9)
    3834     {
    3835       filtNo = 0;
    3836     }
    3837     else if (iTap==7)
    3838     {
    3839       filtNo = 1;
    3840     }
    3841     else
    3842     {
    3843       filtNo = 2;
    3844     }
     6733  m_pcTempAlfParam->alf_pcr_region_flag = m_uiVarGenMethod;
     6734
     6735  for (int filter_shape = 0; filter_shape < NUM_ALF_FILTER_SHAPE ;filter_shape ++)
     6736  {
     6737    m_pcTempAlfParam->filter_shape = filtNo = filter_shape;
     6738    m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filtNo] ; 
    38456739
    38466740    ESym     = m_EGlobalSym     [filtNo];
    38476741    ySym     = m_yGlobalSym     [filtNo];
    38486742
    3849     if( bMatrixBaseReady )
    3850     {
    3851       xretriveBlockMatrix(m_pcTempAlfParam->num_coeff, m_iTapPosTabIn9x9Sym[filtNo],
    3852                           m_EGlobalSym[m_iMatrixBaseFiltNo], ESym,
    3853                           m_yGlobalSym[m_iMatrixBaseFiltNo], ySym);
    3854 
     6743    if(!m_bUseNonCrossALF)
     6744    {
     6745      xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, ImgOrg, ImgDec, filter_shape, Stride);
    38556746    }
    38566747    else
    3857 #if MTK_NONCROSS_INLOOP_FILTER
    3858     {
    3859       if(!m_bUseNonCrossALF)
    3860         xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, iTap, Stride);
    3861       else
    3862         xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, iTap, Stride);
    3863     }
    3864 #else
    3865     xstoreInBlockMatrix(ImgOrg, ImgDec, iTap, Stride);
    3866 #endif
    3867     if(filtNo == m_iMatrixBaseFiltNo)
    3868     {
    3869       bMatrixBaseReady = true;
    3870     }
    3871 
    3872     xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,
    3873                            m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
    3874 
    3875     uiRate         = xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,0, m_pcTempAlfParam);
     6748    {
     6749      xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, filter_shape, Stride);
     6750    }
     6751    xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr, m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
     6752
     6753    //estimate R-D cost
     6754    uiRate         = xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr, m_pcTempAlfParam);
    38766755    iEstimatedDist = xEstimateFiltDist(filters_per_fr, m_varIndTab, ESym, ySym, m_filterCoeffSym, m_pcTempAlfParam->num_coeff);
     6756    iEstimateDistBeforeFilter = xEstimateFiltDist(filters_per_fr, m_varIndTab, ESym, ySym, coeffNoFilter[filter_shape], m_pcTempAlfParam->num_coeff);
     6757    iEstimatedDist -= iEstimateDistBeforeFilter;
    38776758    dEstimatedCost = (Double)(uiRate) * m_dLambdaLuma + (Double)(iEstimatedDist);
    38786759
     
    38806761    {
    38816762      dEstimatedMinCost   = dEstimatedCost;
    3882       ibestfiltNo         = filtNo;
    3883       ibestfilters_per_fr = filters_per_fr;
    3884 #if MQT_BA_RA
     6763      copyALFParam(pcAlfSaved, m_pcTempAlfParam);
     6764      iEstimatedDist += iEstimateDistBeforeFilter;
     6765
     6766      for(Int i=0; i< filters_per_fr; i++ )
     6767      {
     6768        iEstimatedDist += (((Int64)m_pixAcc_merged[i]) >> uiBitShift);
     6769      }
     6770      ruiDist = (iEstimatedDist > 0)?((UInt64)iEstimatedDist):(0);
     6771      rdCost  = dEstimatedMinCost + (Double)(ruiDist);
    38856772      ruiRate = uiRate;
    3886       riDist  = iEstimatedDist;
    3887       rdCost  = dEstimatedMinCost;
    3888 
    3889       copyALFParam(pcAlfSaved, m_pcTempAlfParam);
    3890 #else
    3891       copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    3892 #endif
    3893       ::memcpy(aiVarIndTabBest, m_varIndTab, sizeof(Int)*NO_VAR_BINS);
    3894       for(Int i=0; i< ibestfilters_per_fr; i++ )
    3895       {
    3896         ::memcpy( ppiBestCoeffSet[i], m_filterCoeffSym[i], sizeof(Int) * m_pcTempAlfParam->num_coeff);
    3897       }
    3898     }
    3899   }
    3900 
    3901   filtNo         = ibestfiltNo;
    3902   filters_per_fr = ibestfilters_per_fr;
    3903   ::memcpy(m_varIndTab, aiVarIndTabBest, sizeof(Int)*NO_VAR_BINS);
    3904   for(Int i=0; i< filters_per_fr; i++ )
    3905   {
    3906 #if MQT_BA_RA
    3907     ::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * pcAlfSaved->num_coeff);
    3908 #else
    3909     ::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * m_pcTempAlfParam->num_coeff);
    3910 #endif
    3911   }
    3912 
    3913   xcalcPredFilterCoeff(filtNo);
    3914 
    3915 #if MQT_BA_RA
    3916 
    3917 
    3918   ::memset( pdBestpixAcc, 0,sizeof(double)*NO_VAR_BINS);
    3919   for (Int varInd=0; varInd<NO_VAR_BINS; varInd++)
    3920   {
    3921     ::memset(ppdBesty[varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
    3922     for (Int k=0; k< pcAlfSaved->num_coeff; k++)
    3923     {
    3924       ::memset(pppdBestE[varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
    3925     }
    3926   }
    3927   ESym     = m_EGlobalSym     [filtNo];
    3928   ySym     = m_yGlobalSym     [filtNo];
    3929 
    3930   ::memcpy( pdBestpixAcc, m_pixAcc ,sizeof(double)*NO_VAR_BINS);
    3931   for (Int varInd=0; varInd<NO_VAR_BINS; varInd++)
    3932   {
    3933     ::memcpy(ppdBesty[varInd],ySym[varInd],sizeof(double)*MAX_SQR_FILT_LENGTH);
    3934     for (Int k=0; k< pcAlfSaved->num_coeff; k++)
    3935     {
    3936       ::memcpy(pppdBestE[varInd][k],ESym[varInd][k],sizeof(double)*MAX_SQR_FILT_LENGTH);
    3937     }
    3938   }
    3939 
    3940 
    3941 #else
    3942 
    3943   filtNo         = ibestfiltNo;
    3944   filters_per_fr = ibestfilters_per_fr;
    3945   copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    3946   ::memcpy(m_varIndTab, aiVarIndTabBest, sizeof(Int)*NO_VAR_BINS);
    3947   for(Int i=0; i< filters_per_fr; i++ )
    3948   {
    3949     ::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * m_pcTempAlfParam->num_coeff);
    3950   }
    3951 
    3952   xcalcPredFilterCoeff(filtNo);
    3953 #if MTK_NONCROSS_INLOOP_FILTER
    3954   if(!m_bUseNonCrossALF)
    3955     xfilterFrame_en(0, 0, m_im_height, m_im_width, ImgDec, ImgRest, filtNo, Stride);
    3956   else
    3957     xfilterSlices_en(ImgDec, ImgRest, filtNo, Stride);
    3958 #else
    3959   xfilterFrame_en(ImgDec, ImgRest, filtNo, Stride);
    3960 #endif
    3961 
    3962 #endif
    3963 }
    3964 
    3965 Void TEncAdaptiveLoopFilter::xretriveBlockMatrix(Int iNumTaps,
    3966                                                  Int* piTapPosInMaxFilter,
    3967                                                  Double*** pppdEBase, Double*** pppdETarget,
    3968                                                  Double**  ppdyBase,  Double** ppdyTarget )
    3969 {
    3970   Int varInd;
    3971   Int i, j, r, c;
    3972 
    3973   Double** ppdSrcE;
    3974   Double** ppdDstE;
    3975   Double*  pdSrcy;
    3976   Double*  pdDsty;
    3977 
    3978   for (varInd=0; varInd< NO_VAR_BINS; varInd++)
    3979   {
    3980     ppdSrcE = pppdEBase  [varInd];
    3981     ppdDstE = pppdETarget[varInd];
    3982     pdSrcy  = ppdyBase   [varInd];
    3983     pdDsty  = ppdyTarget [varInd];
    3984 
    3985     for(j=0; j< iNumTaps; j++)
    3986     {
    3987       r = piTapPosInMaxFilter[j];
    3988 
    3989       for(i=j; i< iNumTaps; i++)
    3990       {
    3991         c = piTapPosInMaxFilter[i];
    3992 
    3993         //auto-correlation retrieval
    3994         ppdDstE[j][i] = ppdSrcE[r][c];
    3995 
    3996       }
    3997 
    3998       //cross-correlation retrieval
    3999       pdDsty[j] = pdSrcy[r];
    4000 
    4001     }
    4002 
    4003     //symmetric copy
    4004     for(j=1; j< iNumTaps; j++)
    4005       for(i=0; i< j; i++)
    4006         ppdDstE[j][i] = ppdDstE[i][j];
    4007 
    4008   }
    4009 
    4010 }
    4011 
     6773    }
     6774  }
     6775
     6776  if (!m_iUsePreviousFilter)
     6777  {
     6778    decodeFilterSet(pcAlfSaved, m_varIndTab, m_filterCoeffSym);
     6779    saveFilterCoeffToBuffer(m_filterCoeffSym, pcAlfSaved->filters_per_group, m_varIndTab, pcAlfSaved->alf_pcr_region_flag, pcAlfSaved->filter_shape);
     6780  }
     6781
     6782  if( m_iUsePreviousFilter )
     6783  {
     6784    UInt64 uiOffRegionDistortion = 0;
     6785    Int    iPelDiff;
     6786    Pel*   pOrgTemp = (Pel*)ImgOrg;
     6787    Pel*   pDecTemp = (Pel*)ImgDec;
     6788    for(Int y=0; y< m_img_height; y++)
     6789    {
     6790      for(Int x=0; x< m_img_width; x++)
     6791      {
     6792        if(m_maskImg[y][x] == 0)
     6793        {
     6794          iPelDiff = pOrgTemp[x] - pDecTemp[x];
     6795          uiOffRegionDistortion += (UInt64)(  (iPelDiff*iPelDiff) >> uiBitShift );
     6796        }
     6797      }
     6798      pOrgTemp += Stride;
     6799      pDecTemp += Stride;
     6800
     6801      ruiDist += uiOffRegionDistortion;
     6802      rdCost  += (Double)uiOffRegionDistortion;
     6803    }
     6804  }
     6805 
     6806#if !ALF_SINGLE_FILTER_SHAPE
     6807  // if ALF_STAR5x5 is selected, the distortion of 2 skipped lines per LCU should be added.
     6808  if(pcAlfSaved->filter_shape == ALF_STAR5x5)
     6809  {
     6810    Int    iPelDiff;
     6811    UInt64  uiSkipPelsDistortion = 0;
     6812    Pel   *pOrgTemp, *pDecTemp;
     6813    for(Int y= m_lineIdxPadTop-1; y< m_img_height - m_lcuHeight ; y += m_lcuHeight)
     6814    {
     6815      pOrgTemp = ImgOrg + y*Stride;
     6816      pDecTemp = ImgDec + y*Stride;
     6817      for(Int x=0; x< m_img_width; x++)
     6818      {
     6819        if(m_maskImg[y][x] == 1)
     6820        {
     6821          iPelDiff = pOrgTemp[x] - pDecTemp[x];
     6822          uiSkipPelsDistortion += (UInt64)(  (iPelDiff*iPelDiff) >> uiBitShift );
     6823        }
     6824      }
     6825
     6826      pOrgTemp += Stride;
     6827      pDecTemp += Stride;
     6828      for(Int x=0; x< m_img_width; x++)
     6829      {
     6830        if(m_maskImg[y+1][x] == 1)
     6831        {
     6832          iPelDiff = pOrgTemp[x] - pDecTemp[x];
     6833          uiSkipPelsDistortion += (UInt64)(  (iPelDiff*iPelDiff) >> uiBitShift );
     6834        }
     6835      }
     6836    }
     6837    ruiDist += uiSkipPelsDistortion;
     6838    rdCost  += (Double)uiSkipPelsDistortion;
     6839  }
     6840#endif
     6841
     6842  for(Int filter_shape = 0; filter_shape < NUM_ALF_FILTER_SHAPE; filter_shape++)
     6843  {
     6844    for(Int i=0; i< NO_VAR_BINS; i++)
     6845    {
     6846      delete[] coeffNoFilter[filter_shape][i];
     6847    }
     6848  }
     6849}
     6850
     6851
     6852#endif
     6853
     6854/** Estimate filtering distortion by correlation values and filter coefficients
     6855 * \param ppdE auto-correlation matrix
     6856 * \param pdy cross-correlation array
     6857 * \param piCoeff  filter coefficients
     6858 * \param iFiltLength numbr of filter taps
     6859 * \returns estimated distortion
     6860 */
    40126861Int64 TEncAdaptiveLoopFilter::xFastFiltDistEstimation(Double** ppdE, Double* pdy, Int* piCoeff, Int iFiltLength)
    40136862{
    40146863  //static memory
    4015   static Bool     bFirst = true;
    4016   static Double*  pdcoeff;
    4017   if(bFirst)
    4018   {
    4019     pdcoeff= new Double[MAX_SQR_FILT_LENGTH];       
    4020     bFirst= false;
    4021   }
    4022 
     6864  Double pdcoeff[ALF_MAX_NUM_COEF];
    40236865  //variable
    40246866  Int    i,j;
    40256867  Int64  iDist;
    40266868  Double dDist, dsum;
    4027 
     6869#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     6870  Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP );
     6871#endif
    40286872
    40296873  for(i=0; i< iFiltLength; i++)
    4030     pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< (NUM_BITS - 1) );
    4031 
     6874  {
     6875#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     6876    pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<<alfPrecisionBit);
     6877#else
     6878    pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< ((Int)ALF_NUM_BIT_SHIFT) );
     6879#endif
     6880  }
    40326881
    40336882  dDist =0;
     
    40366885    dsum= ((Double)ppdE[i][i]) * pdcoeff[i];
    40376886    for(j=i+1; j< iFiltLength; j++)
     6887    {
    40386888      dsum += (Double)(2*ppdE[i][j])* pdcoeff[j];
     6889    }
    40396890
    40406891    dDist += ((dsum - 2.0 * pdy[i])* pdcoeff[i] );
     
    40566907}
    40576908
     6909
     6910#if !LCU_SYNTAX_ALF
     6911
     6912/** Estimate total filtering cost of all groups
     6913 * \param filters_per_fr number of filters for the slice
     6914 * \param VarIndTab merge index of all groups
     6915 * \param pppdE  auto-correlation matrix pointer for all groups
     6916 * \param ppdy cross-correlation array pointer for all groups
     6917 * \returns estimated distortion
     6918 */
    40586919Int64 TEncAdaptiveLoopFilter::xEstimateFiltDist(Int filters_per_fr, Int* VarIndTab,
    40596920                                                Double*** pppdE, Double** ppdy,
     
    40766937      //clean m_E_merged one line
    40776938      for(i=0; i < iFiltLength; i++)
     6939      {
    40786940        m_E_merged[f][j][i] = 0;
     6941      }
    40796942
    40806943      //clean m_y_merged
    40816944      m_y_merged[f][j] = 0;
    40826945    }
     6946    m_pixAcc_merged[f] = 0;
    40836947  }
    40846948
     
    40966960    {
    40976961      for(i=0; i< iFiltLength; i++)
     6962      {
    40986963        ppdDstE[j][i] += ppdSrcE[j][i];
     6964      }
    40996965
    41006966      pdDsty[j] += pdSrcy[j];
    41016967    }
     6968    m_pixAcc_merged[ VarIndTab[varInd]  ] += m_pixAcc[varInd];
     6969
    41026970  }
    41036971
     
    41176985
    41186986}
    4119 #endif
    4120 
    4121 #if MTK_NONCROSS_INLOOP_FILTER
    4122 
    4123 Void TEncAdaptiveLoopFilter::calcVarforSlices(imgpel **varmap, imgpel *imgY_Dec, Int pad_size, Int fl, Int img_stride)
    4124 {
    4125 #if MQT_BA_RA
    4126   if(m_uiVarGenMethod == ALF_RA)
    4127   {
    4128     return;
    4129   }
    4130 #endif
    4131 
    4132   Pel* pPicSrc   = (Pel *)imgY_Dec;
    4133   Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr();
    4134 
    4135   for(UInt s=0; s< m_uiNumSlicesInPic; s++)
    4136   {
    4137     CAlfSlice* pSlice = &(m_pSlice[s]);
    4138 
    4139     pSlice->copySliceLuma(pPicSlice, pPicSrc, img_stride);
    4140     pSlice->extendSliceBorderLuma(pPicSlice, img_stride, (UInt)EXTEND_NUM_PEL);
    4141     calcVarforOneSlice(pSlice, varmap, (imgpel*)pPicSlice, pad_size, fl, img_stride);
    4142   }
    4143 }
    4144 
    4145 
    4146 
    4147 Void TEncAdaptiveLoopFilter::xfilterSlices_en(imgpel* ImgDec, imgpel* ImgRest,int filtNo, int iStride)
     6987
     6988/** Calculate ALF grouping indices for ALF slices
     6989 * \param varmap grouping indices buffer
     6990 * \param imgY_Dec picture buffer
     6991 * \param pad_size (max. filter tap)/2
     6992 * \param fl  VAR_SIZE
     6993 * \param img_stride picture buffer stride
     6994 */
     6995Void TEncAdaptiveLoopFilter::xfilterSlicesEncoder(Pel* ImgDec, Pel* ImgRest, Int iStride, Int filtNo, Int** filterCoeff, Int* mergeTable, Pel** varImg)
    41486996{
    41496997  Pel* pPicSrc   = (Pel *)ImgDec;
     
    41527000  for(UInt s=0; s< m_uiNumSlicesInPic; s++)
    41537001  {
    4154     CAlfSlice* pSlice = &(m_pSlice[s]);
    4155 
    4156     pSlice->copySliceLuma(pPicSlice, pPicSrc, iStride);
    4157     pSlice->extendSliceBorderLuma(pPicSlice, iStride, EXTEND_NUM_PEL);
    4158 
    4159     xfilterOneSlice_en(pSlice, (imgpel*)pPicSlice, ImgRest, filtNo, iStride);
    4160   }
    4161 }
    4162 
    4163 
    4164 Void TEncAdaptiveLoopFilter::xfilterOneSlice_en(CAlfSlice* pSlice, imgpel* ImgDec, imgpel* ImgRest,int filtNo, int iStride)
    4165 {
    4166   UInt uiNumLCUs = pSlice->getNumLCUs();
    4167 
    4168   Int iHeight, iWidth;
    4169   Int ypos, xpos;
    4170 
    4171   for(UInt i=0; i< uiNumLCUs; i++)
    4172   {
    4173     CAlfCU* pcAlfCU = &((*pSlice)[i]);
    4174 
    4175     ypos    = pcAlfCU->getCU()->getCUPelY();
    4176     xpos    = pcAlfCU->getCU()->getCUPelX();
    4177     iHeight = pcAlfCU->getHeight();
    4178     iWidth  = pcAlfCU->getWidth();
    4179 
    4180     xfilterFrame_en(ypos, xpos, iHeight, iWidth, ImgDec, ImgRest, filtNo, iStride);
    4181   }
    4182 }
    4183 
    4184 
    4185 
    4186 Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrixforSlices(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int iStride)
     7002    if(!m_pcPic->getValidSlice(s))
     7003    {
     7004      continue;
     7005    }
     7006    std::vector< std::vector<AlfLCUInfo*> > & vpSliceTileAlfLCU = m_pvpSliceTileAlfLCU[s];
     7007
     7008    for(Int t=0; t< (Int)vpSliceTileAlfLCU.size(); t++)
     7009    {
     7010      std::vector<AlfLCUInfo*> & vpAlfLCU = vpSliceTileAlfLCU[t];
     7011      copyRegion(vpAlfLCU, pPicSlice, pPicSrc, iStride);
     7012      extendRegionBorder(vpAlfLCU, pPicSlice, iStride);
     7013      filterLumaRegion(vpAlfLCU, pPicSlice, ImgRest, iStride, filtNo, filterCoeff, mergeTable, varImg);
     7014    }
     7015  }
     7016}
     7017
     7018/** Calculate block autocorrelations and crosscorrelations for ALF slices
     7019 * \param ImgOrg original picture
     7020 * \param ImgDec picture before filtering
     7021 * \param tap  filter tap size
     7022 * \param iStride picture buffer stride
     7023 */
     7024Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrixforSlices(Pel* ImgOrg, Pel* ImgDec, Int tap, Int iStride)
    41877025{
    41887026  Pel* pPicSrc   = (Pel *)ImgDec;
    41897027  Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr();
    41907028
     7029  UInt iLastValidSliceID =0;
    41917030  for(UInt s=0; s< m_uiNumSlicesInPic; s++)
    41927031  {
    4193     CAlfSlice* pSlice = &(m_pSlice[s]);
    4194     pSlice->copySliceLuma(pPicSlice, pPicSrc, iStride);
    4195     pSlice->extendSliceBorderLuma(pPicSlice, iStride, (UInt)EXTEND_NUM_PEL);
    4196     xstoreInBlockMatrixforOneSlice(pSlice, ImgOrg, (imgpel*)pPicSlice, tap, iStride, (s==0), (s== m_uiNumSlicesInPic-1));
    4197   }
    4198 }
    4199 
    4200 Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrixforOneSlice(CAlfSlice* pSlice,
    4201                                                               imgpel* ImgOrg, imgpel* ImgDec,
     7032    if(m_pcPic->getValidSlice(s))
     7033    {
     7034      iLastValidSliceID = s;
     7035    }
     7036  }
     7037
     7038  for(UInt s=0; s<= iLastValidSliceID; s++)
     7039  {
     7040    if(!m_pcPic->getValidSlice(s))
     7041    {
     7042      continue;
     7043    }
     7044    std::vector< std::vector<AlfLCUInfo*> > & vpSliceTileAlfLCU = m_pvpSliceTileAlfLCU[s];
     7045    Int numValidTilesInSlice = (Int)vpSliceTileAlfLCU.size();
     7046    for(Int t=0; t< numValidTilesInSlice; t++)
     7047    {
     7048      std::vector<AlfLCUInfo*> & vpAlfLCU = vpSliceTileAlfLCU[t];
     7049      copyRegion(vpAlfLCU, pPicSlice, pPicSrc, iStride);
     7050      extendRegionBorder(vpAlfLCU, pPicSlice, iStride);
     7051      xstoreInBlockMatrixforRegion(vpAlfLCU, ImgOrg, pPicSlice, tap, iStride, (s==0)&&(t==0), (s== iLastValidSliceID)&&(t==numValidTilesInSlice-1));
     7052    }
     7053  }
     7054}
     7055
     7056/** Calculate block autocorrelations and crosscorrelations for one ALF region
     7057 * \param vpAlfLCU ALF LCU data container
     7058 * \param ImgOrg original picture
     7059 * \param ImgDec picture before filtering
     7060 * \param tap  filter tap size
     7061 * \param iStride picture buffer stride
     7062 * \param bFirstSlice  true for the first processing slice of the picture
     7063 * \param bLastSlice true for the last processing slice of the picture
     7064 */
     7065Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrixforRegion(std::vector< AlfLCUInfo* > &vpAlfLCU,
     7066                                                              Pel* ImgOrg, Pel* ImgDec,
    42027067                                                              Int tap, Int iStride,
    42037068                                                              Bool bFirstSlice,
     
    42067071{
    42077072
    4208 
    4209   UInt uiNumLCUs = pSlice->getNumLCUs();
    4210 
     7073  UInt uiNumLCUs = (UInt)vpAlfLCU.size();
    42117074  Int iHeight, iWidth;
    42127075  Int ypos, xpos;
    42137076  Bool bFirstLCU, bLastLCU;
     7077  Bool bFirstSGU, bLastSGU;
     7078  UInt numSGUs;
    42147079
    42157080  for(UInt i=0; i< uiNumLCUs; i++)
     
    42177082    bFirstLCU = (i==0);
    42187083    bLastLCU  = (i== uiNumLCUs -1);
    4219 
    4220     CAlfCU* pcAlfCU = &((*pSlice)[i]);
    4221     ypos    = pcAlfCU->getCU()->getCUPelY();
    4222     xpos    = pcAlfCU->getCU()->getCUPelX();
    4223     iHeight = pcAlfCU->getHeight();
    4224     iWidth  = pcAlfCU->getWidth();
    4225 
    4226     xstoreInBlockMatrix(ypos, xpos, iHeight, iWidth,
    4227       (bFirstSlice && bFirstLCU),(bLastSlice && bLastLCU),
    4228       ImgOrg, ImgDec,tap, iStride);
    4229   }
    4230 }
    4231 
    4232 
    4233 
     7084    AlfLCUInfo& cAlfLCU = *(vpAlfLCU[i]);
     7085    numSGUs = cAlfLCU.numSGU;
     7086    for(UInt j=0; j< numSGUs; j++)
     7087    {
     7088      bFirstSGU= (j ==0);
     7089      bLastSGU = (j == numSGUs -1);
     7090
     7091      ypos    = (Int)(cAlfLCU[j].posY  );
     7092      xpos    = (Int)(cAlfLCU[j].posX  );
     7093      iHeight = (Int)(cAlfLCU[j].height);
     7094      iWidth  = (Int)(cAlfLCU[j].width );
     7095
     7096      xstoreInBlockMatrix(ypos, xpos, iHeight, iWidth,
     7097        (bFirstSlice && bFirstLCU && bFirstSGU),(bLastSlice && bLastLCU && bLastSGU),
     7098        ImgOrg, ImgDec,tap, iStride);
     7099    }
     7100  }
     7101}
     7102
     7103
     7104/** Calculate autocorrelations and crosscorrelations for chroma slices
     7105 * \param ComponentID Cb or Cr
     7106 * \param pOrg original picture
     7107 * \param pCmp picture before filtering
     7108 * \param iTap  filter tap size
     7109 * \param iOrgStride picture buffer stride for pOrg
     7110 * \param iCmpStride picture buffer stride for pCmp
     7111 */
    42347112Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaSlices(Int ComponentID, Pel* pOrg, Pel* pCmp, Int iTap, Int iOrgStride, Int iCmpStride)
    42357113{
     
    42397117  Pel* pPicSrc   = pCmp;
    42407118  Pel* pPicSlice = (ComponentID == ALF_Cb)?(m_pcSliceYuvTmp->getCbAddr()):(m_pcSliceYuvTmp->getCrAddr());
    4241 
     7119  Int chromaFormatShift = 1;
     7120
     7121  UInt iLastValidSliceID =0;
    42427122  for(UInt s=0; s< m_uiNumSlicesInPic; s++)
    42437123  {
    4244     CAlfSlice* pSlice = &(m_pSlice[s]);
    4245 
    4246     pSlice->copySliceChroma(pPicSlice, pPicSrc, iCmpStride);
    4247     pSlice->extendSliceBorderChroma(pPicSlice, iCmpStride, (UInt)EXTEND_NUM_PEL_C);
    4248 
    4249     xCalcCorrelationFuncforChromaOneSlice(pSlice, pOrg, pPicSlice, iTap, iCmpStride,(s==m_uiNumSlicesInPic-1));
    4250   }
    4251 }
    4252 
    4253 Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaOneSlice(CAlfSlice* pSlice, Pel* pOrg, Pel* pCmp, Int iTap, Int iStride, Bool bLastSlice)
    4254 {
    4255   UInt uiNumLCUs = pSlice->getNumLCUs();
     7124    if(m_pcPic->getValidSlice(s))
     7125    {
     7126      iLastValidSliceID = s;
     7127    }
     7128  }
     7129
     7130  for(UInt s=0; s<= iLastValidSliceID; s++)
     7131  {
     7132    if(!m_pcPic->getValidSlice(s))
     7133    {
     7134      continue;
     7135    }
     7136    std::vector< std::vector<AlfLCUInfo*> > & vpSliceTileAlfLCU = m_pvpSliceTileAlfLCU[s];
     7137    Int numValidTilesInSlice = (Int)vpSliceTileAlfLCU.size();
     7138    for(Int t=0; t< numValidTilesInSlice; t++)
     7139    {
     7140      std::vector<AlfLCUInfo*> & vpAlfLCU = vpSliceTileAlfLCU[t];
     7141      copyRegion(vpAlfLCU, pPicSlice, pPicSrc, iCmpStride, chromaFormatShift);
     7142      extendRegionBorder(vpAlfLCU, pPicSlice, iCmpStride, chromaFormatShift);
     7143      xCalcCorrelationFuncforChromaRegion(vpAlfLCU, pOrg, pPicSlice, iTap, iCmpStride,(s== iLastValidSliceID)&&(t== numValidTilesInSlice-1), chromaFormatShift);
     7144    }
     7145  }
     7146}
     7147
     7148/** Calculate autocorrelations and crosscorrelations for one chroma slice
     7149 * \param vpAlfLCU ALF LCU data container
     7150 * \param pOrg original picture
     7151 * \param pCmp picture before filtering
     7152 * \param iTap  filter tap size
     7153 * \param iStride picture buffer stride
     7154 * \param bLastSlice the last processing slice of picture
     7155 */
     7156Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaRegion(std::vector< AlfLCUInfo* > &vpAlfLCU, Pel* pOrg, Pel* pCmp, Int filtNo, Int iStride, Bool bLastSlice, Int iFormatShift)
     7157{
     7158  UInt uiNumLCUs = (UInt)vpAlfLCU.size();
    42567159
    42577160  Int iHeight, iWidth;
    42587161  Int ypos, xpos;
    42597162  Bool bLastLCU;
     7163  Bool bLastSGU;
     7164  UInt numSGUs;
    42607165
    42617166  for(UInt i=0; i< uiNumLCUs; i++)
     
    42637168    bLastLCU  = (i== uiNumLCUs -1);
    42647169
    4265     CAlfCU* pcAlfCU = &((*pSlice)[i]);
    4266     ypos    = ( pcAlfCU->getCU()->getCUPelY() >> 1 );
    4267     xpos    = ( pcAlfCU->getCU()->getCUPelX() >> 1 );
    4268     iHeight = (Int)( pcAlfCU->getHeight() >> 1);
    4269     iWidth  = (Int)( pcAlfCU->getWidth() >> 1);
    4270 
    4271     xCalcCorrelationFunc(ypos, xpos, pOrg, pCmp, iTap, iWidth, iHeight, iStride, iStride, (bLastSlice && bLastLCU ) );
    4272   }
    4273 }
    4274 
    4275 Void TEncAdaptiveLoopFilter::xFrameChromaforSlices(Int ComponentID, TComPicYuv* pcPicDecYuv, TComPicYuv* pcPicRestYuv, Int *qh, Int iTap )
    4276 {
    4277   Pel* pPicDec   = (ComponentID == ALF_Cb)?(    pcPicDecYuv->getCbAddr()):(    pcPicDecYuv->getCrAddr());
    4278   //  Pel* pPicRest  = (ComponentID == ALF_Cb)?(   pcPicRestYuv->getCbAddr()):(   pcPicRestYuv->getCrAddr());
    4279   Pel* pPicSlice = (ComponentID == ALF_Cb)?(m_pcSliceYuvTmp->getCbAddr()):(m_pcSliceYuvTmp->getCrAddr());
    4280 
    4281   Int iStride = pcPicDecYuv->getCStride();
    4282 
    4283   assert(iStride == pcPicRestYuv->getCStride());
    4284 
    4285   for(UInt s=0; s< m_uiNumSlicesInPic; s++)
    4286   {
    4287     CAlfSlice* pSlice = &(m_pSlice[s]);
    4288 
    4289     pSlice->copySliceChroma(pPicSlice, pPicDec, iStride);
    4290     pSlice->extendSliceBorderChroma(pPicSlice, iStride, (UInt)EXTEND_NUM_PEL_C);
    4291 
    4292     xFrameChromaforOneSlice(pSlice, ComponentID, m_pcSliceYuvTmp, pcPicRestYuv, qh, iTap);
    4293   }
    4294 }
    4295 
    4296 #endif
    4297 
    4298 
    4299 #if MTK_SAO
    4300 inline Double xRoundIbdi2(Double x)
    4301 {
    4302   return ((x)>0) ? (Int)(((Int)(x)+(1<<(g_uiBitIncrement-1)))/(1<<g_uiBitIncrement)) : ((Int)(((Int)(x)-(1<<(g_uiBitIncrement-1)))/(1<<g_uiBitIncrement)));
    4303 }
    4304 
    4305 inline Double xRoundIbdi(Double x)
    4306 {
    4307   return (g_uiBitIncrement >0 ? xRoundIbdi2((x)) : ((x)>=0 ? ((Int)((x)+0.5)) : ((Int)((x)-0.5)))) ;
    4308 }
    4309 
    4310 /** run QAO One Part.
    4311  * \param  pQAOOnePart, iPartIdx
    4312  */
    4313 Void TEncSampleAdaptiveOffset::xQAOOnePart(SAOQTPart* pQAOOnePart, Int iPartIdx)
    4314 {
    4315   Int iTypeIdx;
    4316   Int iNumTotalType = MAX_NUM_SAO_TYPE;
    4317 
    4318   Int64 iEstDist;
    4319   Int64 iOffsetOrg;
    4320   Int64 iOffset;
    4321   Int64 iCount;
    4322   Int iClassIdx;
    4323   Int uiShift = g_uiBitIncrement << 1;
    4324   Double dAreaWeight =  (pQAOOnePart->part_xe - pQAOOnePart->part_xs + 1) * (pQAOOnePart->part_ye - pQAOOnePart->part_ys + 1);
    4325   Double dComplexityCost = 0;
    4326   Int    iQaoPara1 = SAO_RDCO;
    4327 
    4328   UInt uiDepth = pQAOOnePart->PartLevel;
    4329 
    4330   //   m_iDistOrg [iPartIdx] = 0;
    4331 
    4332   m_iDistOrg [iPartIdx] =  (Int64)((Double)(iQaoPara1)/10000 * m_dLambdaLuma * dAreaWeight);
    4333 
    4334   for (iTypeIdx=-1; iTypeIdx<iNumTotalType; iTypeIdx++)
    4335   {
    4336     if( m_bUseSBACRD )
    4337     {
    4338       m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); // pcCU->getDepth(0) ==> m_puhDepth[uiIdx]
    4339       m_pcRDGoOnSbacCoder->resetBits();
     7170    AlfLCUInfo& cAlfLCU = *(vpAlfLCU[i]);
     7171    numSGUs = cAlfLCU.numSGU;
     7172    for(UInt j=0; j< numSGUs; j++)
     7173    {
     7174      bLastSGU = (j == numSGUs -1);
     7175      ypos    = (Int)(cAlfLCU[j].posY   >> iFormatShift);
     7176      xpos    = (Int)(cAlfLCU[j].posX   >> iFormatShift);
     7177      iHeight = (Int)(cAlfLCU[j].height >> iFormatShift);
     7178      iWidth  = (Int)(cAlfLCU[j].width  >> iFormatShift);
     7179      xCalcCorrelationFunc(ypos, xpos, pOrg, pCmp, filtNo, iWidth, iHeight, iStride, iStride, (bLastSlice && bLastLCU && bLastSGU) );
     7180    }
     7181  }
     7182}
     7183
     7184// ====================================================================================================================
     7185// Protected member functions
     7186// ====================================================================================================================
     7187
     7188Void TEncAdaptiveLoopFilter::xFilterTapDecisionChroma( UInt64 uiLumaRate, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, UInt64& ruiBits )
     7189{
     7190  Int   iShape, num_coeff;
     7191  Int64 iOrgDistCb, iOrgDistCr, iFiltDistCb, iFiltDistCr, iDist;
     7192  Bool  bChanged = false;
     7193  Int*  qh = m_pcTempAlfParam->coeff_chroma;
     7194
     7195  UInt64 uiMinRate = uiLumaRate;
     7196  UInt64 uiMinDist = MAX_INT;
     7197  Double dMinCost  = MAX_DOUBLE;
     7198  Double dLocalMinCost = MAX_DOUBLE;
     7199
     7200  copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
     7201  xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiMinRate, uiMinDist, dMinCost);
     7202
     7203#if ALF_SINGLE_FILTER_SHAPE
     7204  iShape = 0;
     7205#else
     7206  for(iShape = 0; iShape < 2; iShape++)
     7207#endif 
     7208  {
     7209    // set global variables
     7210    num_coeff = m_sqrFiltLengthTab[iShape];
     7211    m_pcTempAlfParam->chroma_idc = 3;
     7212    m_pcTempAlfParam->filter_shape_chroma = iShape;
     7213    m_pcTempAlfParam->num_coeff_chroma = num_coeff;
     7214
     7215    // keep original corr pointer
     7216    Double **ppdTmpCorr = m_ppdAlfCorr;
     7217
     7218    // calc Cb matrix
     7219    m_pcTempAlfParam->chroma_idc = 2;
     7220    m_ppdAlfCorr = m_ppdAlfCorrCb;
     7221    for(Int i=0; i<ALF_MAX_NUM_COEF; i++)
     7222    {
     7223      ::memset(m_ppdAlfCorr[i], 0, sizeof(Double) * (ALF_MAX_NUM_COEF + 1));
     7224    }
     7225    Pel *pOrg = pcPicOrg->getCbAddr();
     7226    Pel *pCmp = pcPicDec->getCbAddr();
     7227    if(!m_bUseNonCrossALF)
     7228    {
     7229      xCalcCorrelationFunc(0, 0, pOrg, pCmp, iShape, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
    43407230    }
    43417231    else
    43427232    {
     7233      xCalcCorrelationFuncforChromaSlices(ALF_Cb, pOrg, pCmp, iShape, pcPicOrg->getCStride(), pcPicDec->getCStride());
     7234    }
     7235
     7236    // calc Cr matrix
     7237    m_pcTempAlfParam->chroma_idc = 1;
     7238    m_ppdAlfCorr = m_ppdAlfCorrCr;
     7239    for(Int i=0; i<ALF_MAX_NUM_COEF; i++)
     7240    {
     7241      ::memset(m_ppdAlfCorr[i], 0, sizeof(Double) * (ALF_MAX_NUM_COEF + 1));
     7242    }
     7243    pOrg = pcPicOrg->getCrAddr();
     7244    pCmp = pcPicDec->getCrAddr();
     7245    if(!m_bUseNonCrossALF)
     7246    {
     7247      xCalcCorrelationFunc(0, 0, pOrg, pCmp, iShape, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
     7248    }
     7249    else
     7250    {
     7251      xCalcCorrelationFuncforChromaSlices(ALF_Cr, pOrg, pCmp, iShape, pcPicOrg->getCStride(), pcPicDec->getCStride());
     7252    }
     7253
     7254    // restore original corr pointer
     7255    m_ppdAlfCorr = ppdTmpCorr;
     7256
     7257    // calc original dist
     7258    memset(qh, 0, sizeof(Int)*num_coeff);
     7259    qh[num_coeff-1] = 1<<((Int)ALF_NUM_BIT_SHIFT);
     7260    iOrgDistCb = xFastFiltDistEstimationChroma(m_ppdAlfCorrCb, qh, num_coeff);
     7261    iOrgDistCr = xFastFiltDistEstimationChroma(m_ppdAlfCorrCr, qh, num_coeff);
     7262
     7263    for(Int iCmp=1; iCmp<=3; iCmp++)
     7264    {
     7265      m_pcTempAlfParam->chroma_idc = iCmp;
     7266      xCalcALFCoeffChroma(iCmp, iShape, qh);
     7267      iFiltDistCb = ((iCmp>>1)&0x1) ? xFastFiltDistEstimationChroma(m_ppdAlfCorrCb, qh, num_coeff) : iOrgDistCb;
     7268      iFiltDistCr = ((iCmp)   &0x1) ? xFastFiltDistEstimationChroma(m_ppdAlfCorrCr, qh, num_coeff) : iOrgDistCr;
     7269      iDist = iFiltDistCb + iFiltDistCr;
     7270      UInt64 uiRate = xCalcRateChroma(m_pcTempAlfParam);
     7271      Double dCost  = (Double)iDist + m_dLambdaChroma * (Double)uiRate;
     7272      if(dCost < dLocalMinCost)
     7273      {
     7274        dLocalMinCost = dCost;
     7275        copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
     7276        bChanged = true;
     7277      }
     7278    }
     7279  }
     7280  copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
     7281  if(!bChanged)
     7282  {
     7283    m_pcBestAlfParam->chroma_idc = 0;
     7284    return;
     7285  }
     7286
     7287  // Adaptive in-loop wiener filtering for chroma
     7288  xFilteringFrameChroma(m_pcTempAlfParam, pcPicOrg, pcPicDec, pcPicRest);
     7289
     7290  // filter on/off decision for chroma
     7291  Int iCWidth = (pcPicOrg->getWidth()>>1);
     7292  Int iCHeight = (pcPicOrg->getHeight()>>1);
     7293  Int iCStride = pcPicOrg->getCStride();
     7294  UInt64 uiFiltDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicRest->getCbAddr(), iCWidth, iCHeight, iCStride);
     7295  UInt64 uiFiltDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicRest->getCrAddr(), iCWidth, iCHeight, iCStride);
     7296  UInt64 uiOrgDistCb  = xCalcSSD(pcPicOrg->getCbAddr(), pcPicDec->getCbAddr(), iCWidth, iCHeight, iCStride);
     7297  UInt64 uiOrgDistCr  = xCalcSSD(pcPicOrg->getCrAddr(), pcPicDec->getCrAddr(), iCWidth, iCHeight, iCStride);
     7298  if(((m_pcTempAlfParam->chroma_idc)>>1 & 0x1) && (uiOrgDistCb<=uiFiltDistCb))
     7299  {
     7300    m_pcTempAlfParam->chroma_idc -= 2;
     7301    pcPicDec->copyToPicCb(pcPicRest);
     7302  }
     7303  if(((m_pcTempAlfParam->chroma_idc)    & 0x1) && (uiOrgDistCr<=uiFiltDistCr))
     7304  {
     7305    m_pcTempAlfParam->chroma_idc -= 1;
     7306    pcPicDec->copyToPicCr(pcPicRest);
     7307  }
     7308
     7309  if(m_pcTempAlfParam->chroma_idc)
     7310  {
     7311    UInt64 uiRate, uiDist;
     7312    Double dCost;
     7313    xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost);
     7314
     7315    if( dCost < dMinCost )
     7316    {
     7317      copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
     7318      predictALFCoeffChroma(m_pcBestAlfParam);
     7319     
     7320      ruiBits += uiRate;
     7321      ruiDist += uiDist;
     7322    }
     7323    else
     7324    {
     7325      m_pcBestAlfParam->chroma_idc = 0;
     7326     
     7327      if((m_pcTempAlfParam->chroma_idc>>1)&0x01)
     7328      {
     7329        pcPicDec->copyToPicCb(pcPicRest);
     7330      }
     7331      if(m_pcTempAlfParam->chroma_idc&0x01)
     7332      {
     7333        pcPicDec->copyToPicCr(pcPicRest);
     7334      }
     7335     
     7336      ruiBits += uiMinRate;
     7337      ruiDist += uiMinDist;
     7338    }
     7339  }
     7340  else
     7341  {
     7342    m_pcBestAlfParam->chroma_idc = 0;
     7343   
     7344    ruiBits += uiMinRate;
     7345    ruiDist += uiMinDist;
     7346   
     7347    pcPicDec->copyToPicCb(pcPicRest);
     7348    pcPicDec->copyToPicCr(pcPicRest);
     7349  }
     7350}
     7351
     7352Int64 TEncAdaptiveLoopFilter::xFastFiltDistEstimationChroma(Double** ppdCorr, Int* piCoeff, Int iSqrFiltLength)
     7353{
     7354  Double pdcoeff[ALF_MAX_NUM_COEF];
     7355  Int    i,j;
     7356  Int64  iDist;
     7357  Double dDist, dsum;
     7358  for(i=0; i< iSqrFiltLength; i++)
     7359  {
     7360    pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< ((Int)ALF_NUM_BIT_SHIFT) );
     7361  }
     7362
     7363  dDist =0;
     7364  for(i=0; i< iSqrFiltLength; i++)
     7365  {
     7366    dsum= ((Double)ppdCorr[i][i]) * pdcoeff[i];
     7367    for(j=i+1; j< iSqrFiltLength; j++)
     7368    {
     7369      dsum += (Double)(2*ppdCorr[i][j])* pdcoeff[j];
     7370    }
     7371
     7372    dDist += ((dsum - 2.0 * ppdCorr[i][iSqrFiltLength])* pdcoeff[i] );
     7373  }
     7374
     7375  UInt uiShift = g_uiBitIncrement<<1;
     7376  if(dDist < 0)
     7377  {
     7378    iDist = -(((Int64)(-dDist + 0.5)) >> uiShift);
     7379  }
     7380  else //dDist >=0
     7381  {
     7382    iDist= ((Int64)(dDist+0.5)) >> uiShift;
     7383  }
     7384
     7385  return iDist;
     7386}
     7387
     7388Void TEncAdaptiveLoopFilter::xCalcALFCoeffChroma(Int iChromaIdc, Int iShape, Int* piCoeff)
     7389{
     7390  Int iSqrFiltLength = m_sqrFiltLengthTab[iShape];
     7391
     7392  for(Int i=0; i<iSqrFiltLength; i++)
     7393  {
     7394    memset(m_ppdAlfCorr[i], 0, sizeof(Double)*(iSqrFiltLength + 1));
     7395  }
     7396
     7397  // retrive
     7398  if((iChromaIdc>>1) & 0x1)
     7399  {
     7400    for(Int i=0; i<iSqrFiltLength; i++)
     7401    {
     7402      for(Int j=i; j<iSqrFiltLength+1; j++)
     7403      {
     7404        m_ppdAlfCorr[i][j] += m_ppdAlfCorrCb[i][j];
     7405      }
     7406    }
     7407  }
     7408  if(iChromaIdc & 0x1)
     7409  {
     7410    for(Int i=0; i<iSqrFiltLength; i++)
     7411    {
     7412      for(Int j=i; j<iSqrFiltLength+1; j++)
     7413      {
     7414        m_ppdAlfCorr[i][j] += m_ppdAlfCorrCr[i][j];
     7415      }
     7416    }
     7417  }
     7418
     7419  // copy
     7420  for(Int i=1; i<iSqrFiltLength; i++)
     7421  {
     7422    for(Int j=0; j<i; j++)
     7423    {
     7424      m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i];
     7425    }
     7426  }
     7427
     7428  Double *corr = new Double[iSqrFiltLength];
     7429  for(Int i=0; i<iSqrFiltLength; i++)
     7430  {
     7431    corr[i] = m_ppdAlfCorr[i][iSqrFiltLength];
     7432  }
     7433
     7434  // calc coeff
     7435  gnsSolveByChol(m_ppdAlfCorr, corr, m_pdDoubleAlfCoeff, iSqrFiltLength);
     7436  xQuantFilterCoef(m_pdDoubleAlfCoeff, piCoeff, iShape, g_uiBitDepth + g_uiBitIncrement);
     7437  delete [] corr;
     7438}
     7439
     7440UInt64 TEncAdaptiveLoopFilter::xCalcRateChroma(ALFParam* pAlfParam)
     7441{
     7442  UInt64 uiRate;
     7443  Int* piTmpCoef;
     7444  piTmpCoef = new Int[ALF_MAX_NUM_COEF];
     7445  memcpy(piTmpCoef, pAlfParam->coeff_chroma, sizeof(Int)*pAlfParam->num_coeff_chroma);
     7446
     7447  predictALFCoeffChroma(pAlfParam);
     7448
     7449  m_pcEntropyCoder->resetEntropy();
     7450  m_pcEntropyCoder->resetBits();
     7451  m_pcEntropyCoder->encodeAlfParam(pAlfParam);
     7452  uiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
     7453  if (m_vBestAlfCUCtrlParam.size() != 0)
     7454  {
     7455    for(UInt s=0; s< m_uiNumSlicesInPic; s++)
     7456    {
     7457      if(!m_pcPic->getValidSlice(s))
     7458      {
     7459        continue;
     7460      }
    43437461      m_pcEntropyCoder->resetEntropy();
    43447462      m_pcEntropyCoder->resetBits();
    4345     }
    4346 
    4347     iEstDist = 0;
    4348 
    4349     m_pcEntropyCoder->m_pcEntropyCoderIf->codeAoUvlc(iTypeIdx+1);
    4350 
    4351     if (iTypeIdx>=0)
    4352     {
    4353 
    4354       for(iClassIdx=1; iClassIdx < m_iNumClass[iTypeIdx]+1; iClassIdx++)
    4355       {
    4356         if(m_iCount [iPartIdx][iTypeIdx][iClassIdx])
    4357         {
    4358           m_iOffset[iPartIdx][iTypeIdx][iClassIdx]    = (Int64) xRoundIbdi((Double)(m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx]<<m_uiAoBitDepth) / (Double)m_iCount [iPartIdx][iTypeIdx][iClassIdx]);
    4359         }
    4360         else
    4361         {
    4362           m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx] = 0;
    4363           m_iOffset[iPartIdx][iTypeIdx][iClassIdx] = 0;
    4364         }
    4365 
    4366         iCount     =  m_iCount [iPartIdx][iTypeIdx][iClassIdx];
    4367         iOffset    =  m_iOffset[iPartIdx][iTypeIdx][iClassIdx] << (g_uiBitIncrement-m_uiAoBitDepth);
    4368         iOffsetOrg =  m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx];
    4369         iEstDist   += (( iCount*iOffset*iOffset-iOffsetOrg*iOffset*2 ) >> uiShift);
    4370         m_pcEntropyCoder->m_pcEntropyCoderIf->codeAoSvlc((Int)m_iOffset[iPartIdx][iTypeIdx][iClassIdx]);
    4371       }
    4372       m_iDist[iPartIdx][iTypeIdx] = iEstDist;
    4373       m_iRate[iPartIdx][iTypeIdx] = m_pcEntropyCoder->getNumberOfWrittenBits();
    4374 
    4375       m_dCost[iPartIdx][iTypeIdx] = (Double)((Double)m_iDist[iPartIdx][iTypeIdx] + m_dLambdaLuma * (Double) m_iRate[iPartIdx][iTypeIdx]);
    4376       dComplexityCost = (Double)(iQaoPara1)/10000 * m_dLambdaLuma * (Double)m_iWeightAO[iTypeIdx] * dAreaWeight;
    4377       m_dCost[iPartIdx][iTypeIdx] = (Double)((Double)m_iDist[iPartIdx][iTypeIdx] + m_dLambdaLuma * (Double) m_iRate[iPartIdx][iTypeIdx]) + dComplexityCost;
    4378 
    4379       //       printf("\n%3d:%10.f, %10.0f, %10.0f",iPartIdx,(Double)m_iDist[iPartIdx][iTypeIdx], dComplexityCost);
    4380 
    4381       // printf("\n%d, %d, %6d, %6d, %f", iPartIdx, iTypeIdx, (Int)m_iDist[iPartIdx][iTypeIdx], (Int)m_iRate[iPartIdx][iTypeIdx], m_dCost[iPartIdx][iTypeIdx]);
    4382       if(m_dCost[iPartIdx][iTypeIdx] < m_dCostPartBest[iPartIdx])
    4383       {
    4384         m_iDistOrg [iPartIdx] = (Int64)dComplexityCost;
    4385         m_dCostPartBest[iPartIdx] = m_dCost[iPartIdx][iTypeIdx];
    4386         m_iTypePartBest[iPartIdx] = iTypeIdx;
    4387         if( m_bUseSBACRD )
    4388           m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[pQAOOnePart->PartLevel][CI_TEMP_BEST] );
    4389       }
    4390     }
    4391     else
    4392     {
    4393 
    4394       if(m_iDistOrg[iPartIdx] < m_dCostPartBest[iPartIdx] )
    4395       {
    4396         m_dCostPartBest[iPartIdx] = (Double) m_iDistOrg[iPartIdx] + m_pcEntropyCoder->getNumberOfWrittenBits()*m_dLambdaLuma ;
    4397         m_iTypePartBest[iPartIdx] = -1;
    4398         if( m_bUseSBACRD )
    4399           m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[pQAOOnePart->PartLevel][CI_TEMP_BEST] );
    4400       }
    4401     }
    4402   }
    4403 
    4404   pQAOOnePart->bProcessed = true;
    4405   pQAOOnePart->bSplit     = false;
    4406   pQAOOnePart->iMinDist   =        m_iTypePartBest[iPartIdx] >= 0 ? m_iDist[iPartIdx][m_iTypePartBest[iPartIdx]] : m_iDistOrg[iPartIdx];
    4407   pQAOOnePart->iMinRate   = (Int) (m_iTypePartBest[iPartIdx] >= 0 ? m_iRate[iPartIdx][m_iTypePartBest[iPartIdx]] : 0);
    4408   pQAOOnePart->dMinCost   = pQAOOnePart->iMinDist + m_dLambdaLuma * pQAOOnePart->iMinRate;
    4409   pQAOOnePart->iBestType  = m_iTypePartBest[iPartIdx];
    4410   if (pQAOOnePart->iBestType != -1)
    4411   {
    4412     pQAOOnePart->bEnableFlag =  1;
    4413     pQAOOnePart->iLength = m_iNumClass[m_psQAOPart[iPartIdx].iBestType];
    4414     for (Int i=0; i<pQAOOnePart->iLength ; i++)
    4415       pQAOOnePart->iOffset[i] = (Int) m_iOffset[iPartIdx][pQAOOnePart->iBestType][i+1];
     7463      m_pcEntropyCoder->encodeAlfCtrlParam( m_vBestAlfCUCtrlParam[s], m_uiNumCUsInFrame);
     7464      uiRate += m_pcEntropyCoder->getNumberOfWrittenBits();
     7465    }
    44167466  }
    44177467  else
    44187468  {
    4419     pQAOOnePart->bEnableFlag =  0;
    4420     pQAOOnePart->iLength     =  0;
    4421   }
    4422 
    4423 }
    4424 
    4425 /** run Part Tree Disable.
    4426  * \param  pQAOOnePart, iPartIdx
    4427  */
    4428 Void TEncSampleAdaptiveOffset::xPartTreeDisable(Int iPartIdx)
    4429 {
    4430   SAOQTPart*  pQAOPart= &(m_psQAOPart[iPartIdx]);
    4431 
    4432   pQAOPart->bEnableFlag = false;
    4433   pQAOPart->bSplit      = false;
    4434   pQAOPart->iLength     =  0;
    4435   pQAOPart->iBestType   = -1;
    4436 
    4437   if (pQAOPart->PartLevel < m_uiMaxSplitLevel)
    4438   {
    4439     for (Int i=0; i<NUM_DOWN_PART; i++)
    4440     {
    4441       xPartTreeDisable(pQAOPart->DownPartsIdx[i]);
    4442     }
    4443   }
    4444 
    4445 }
    4446 
    4447 /** run QuadTree Decision Function.
    4448  * \param  iPartIdx, pcPicOrg, pcPicDec, pcPicRest, &dCostFinal
    4449  */
    4450 Void TEncSampleAdaptiveOffset::xQuadTreeDecisionFunc(Int iPartIdx, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, Double &dCostFinal)
    4451 {
    4452   SAOQTPart*  pQAOPart= &(m_psQAOPart[iPartIdx]);
    4453   UInt uiDepth = pQAOPart->PartLevel;
    4454   UInt uhNextDepth = uiDepth+1;
    4455 
    4456   if (iPartIdx == 0)
    4457   {
    4458     dCostFinal = 0;
    4459   }
    4460 
    4461   //QAO for this part
    4462   if(!pQAOPart->bProcessed)
    4463   {
    4464     xQAOOnePart (pQAOPart, iPartIdx);
    4465   }
    4466 
    4467   //QAO for sub 4 parts
    4468   if (pQAOPart->PartLevel < m_uiMaxSplitLevel)
    4469   {
    4470     Double      dCostNotSplit = m_dLambdaLuma + pQAOPart->dMinCost;
    4471     Double      dCostSplit    = m_dLambdaLuma;
    4472 
    4473     for (Int i=0; i< NUM_DOWN_PART ;i++)
    4474     {
    4475       if( m_bUseSBACRD ) 
    4476       {
    4477         if ( 0 == iPartIdx) //initialize RD with previous depth buffer
    4478           m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
    4479         else
    4480           m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
    4481       } 
    4482       xQuadTreeDecisionFunc(pQAOPart->DownPartsIdx[i], pcPicOrg, pcPicDec, pcPicRest, dCostFinal);
    4483       dCostSplit += dCostFinal;
    4484       if( m_bUseSBACRD )
    4485       {
    4486         m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
    4487       }
    4488     }
    4489 
    4490 
    4491     if(dCostSplit < dCostNotSplit)
    4492     {
    4493       dCostFinal = dCostSplit;
    4494       pQAOPart->bSplit      = true;
    4495       pQAOPart->bEnableFlag = false;
    4496       pQAOPart->iLength     =  0;
    4497       pQAOPart->iBestType   = -1;
    4498     }
    4499     else
    4500     {
    4501       dCostFinal = dCostNotSplit;
    4502       pQAOPart->bSplit = false;
    4503       for (Int i=0; i<NUM_DOWN_PART; i++)
    4504       {
    4505         xPartTreeDisable(pQAOPart->DownPartsIdx[i]);
    4506       }
    4507     }
    4508   }
    4509   else
    4510   {
    4511     dCostFinal = pQAOPart->dMinCost;
    4512   }
    4513 }
    4514 /** destory TEncSampleAdaptiveOffset class.
    4515  * \param 
    4516  */
    4517 Void TEncSampleAdaptiveOffset::destoryEncBuffer()
    4518 {
    4519 
    4520     for (Int i=0;i<m_iNumTotalParts;i++)
    4521     {
    4522       for (Int j=0;j<MAX_NUM_SAO_TYPE;j++)
    4523       {
    4524         if (m_iCount [i][j])
    4525         {
    4526           delete [] m_iCount [i][j];
    4527         }
    4528         if (m_iOffset[i][j])
    4529         {
    4530           delete [] m_iOffset[i][j];
    4531         }
    4532         if (m_iOffsetOrg[i][j])
    4533         {
    4534           delete [] m_iOffsetOrg[i][j];
    4535         }
    4536       }
    4537       if (m_iRate[i])
    4538       {
    4539         delete [] m_iRate[i];
    4540       }
    4541       if (m_iDist[i])
    4542       {
    4543         delete [] m_iDist[i];
    4544       }
    4545       if (m_dCost[i])
    4546       {
    4547         delete [] m_dCost[i];
    4548       }
    4549       if (m_iCount [i])
    4550       {
    4551         delete [] m_iCount [i];
    4552       }
    4553       if (m_iOffset[i])
    4554       {
    4555         delete [] m_iOffset[i];
    4556       }
    4557       if (m_iOffsetOrg[i])
    4558       {
    4559         delete [] m_iOffsetOrg[i];
    4560       }
    4561 
    4562     }
    4563     if (m_iDistOrg)
    4564     {
    4565       delete [] m_iDistOrg ; m_iDistOrg = NULL;
    4566     }
    4567     if (m_dCostPartBest)
    4568     {
    4569       delete [] m_dCostPartBest ; m_dCostPartBest = NULL;
    4570     }
    4571     if (m_iTypePartBest)
    4572     {
    4573       delete [] m_iTypePartBest ; m_iTypePartBest = NULL;
    4574     }
    4575     if (m_iRate)
    4576     {
    4577       delete [] m_iRate ; m_iRate = NULL;
    4578     }
    4579     if (m_iDist)
    4580     {
    4581       delete [] m_iDist ; m_iDist = NULL;
    4582     }
    4583     if (m_dCost)
    4584     {
    4585       delete [] m_dCost ; m_dCost = NULL;
    4586     }
    4587     if (m_iCount)
    4588     {
    4589       delete [] m_iCount  ; m_iCount = NULL;
    4590     }
    4591     if (m_iOffset)
    4592     {
    4593       delete [] m_iOffset ; m_iOffset = NULL;
    4594     }
    4595     if (m_iOffsetOrg)
    4596     {
    4597       delete [] m_iOffsetOrg ; m_iOffsetOrg = NULL;
    4598     }
    4599 
    4600 
    4601 }
    4602 Void TEncSampleAdaptiveOffset::createEncBuffer()
    4603 {
    4604     m_iDistOrg = new Int64 [m_iNumTotalParts];
    4605     m_dCostPartBest = new Double [m_iNumTotalParts];
    4606     m_iTypePartBest = new Int [m_iNumTotalParts];
    4607 
    4608     m_iRate = new Int64* [m_iNumTotalParts];
    4609     m_iDist = new Int64* [m_iNumTotalParts];
    4610     m_dCost = new Double*[m_iNumTotalParts];
    4611 
    4612     m_iCount  = new Int64 **[m_iNumTotalParts];
    4613     m_iOffset = new Int64 **[m_iNumTotalParts];
    4614     m_iOffsetOrg = new Int64 **[m_iNumTotalParts];
    4615 
    4616     for (Int i=0;i<m_iNumTotalParts;i++)
    4617     {
    4618       m_iRate[i] = new Int64  [MAX_NUM_SAO_TYPE];
    4619       m_iDist[i] = new Int64  [MAX_NUM_SAO_TYPE];
    4620       m_dCost[i] = new Double [MAX_NUM_SAO_TYPE];
    4621 
    4622       m_iCount [i] = new Int64 *[MAX_NUM_SAO_TYPE];
    4623       m_iOffset[i] = new Int64 *[MAX_NUM_SAO_TYPE];
    4624       m_iOffsetOrg[i] = new Int64 *[MAX_NUM_SAO_TYPE];
    4625 
    4626       for (Int j=0;j<MAX_NUM_SAO_TYPE;j++)
    4627       {
    4628         m_iCount [i][j] = new Int64 [MAX_NUM_QAO_CLASS];
    4629         m_iOffset[i][j] = new Int64 [MAX_NUM_QAO_CLASS];
    4630         m_iOffsetOrg[i][j]=  new Int64 [MAX_NUM_QAO_CLASS];
    4631       }
    4632     }
    4633 
    4634 }
    4635 
    4636 /** start Sao Encoder.
    4637  * \param pcPic, pcEntropyCoder, pppcRDSbacCoder, pcRDGoOnSbacCoder
    4638  */
    4639 Void TEncSampleAdaptiveOffset::startSaoEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder, TEncSbac*** pppcRDSbacCoder, TEncSbac* pcRDGoOnSbacCoder)
    4640 {
    4641   if( pcRDGoOnSbacCoder )
    4642     m_bUseSBACRD = true;
    4643   else
    4644     m_bUseSBACRD = false;
    4645 
    4646   m_pcPic = pcPic;
    4647   m_pcEntropyCoder = pcEntropyCoder;
    4648 
    4649   m_pppcRDSbacCoder = pppcRDSbacCoder;
    4650   m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder;
    4651   m_pcEntropyCoder->resetEntropy();
    4652   m_pcEntropyCoder->resetBits();
    4653 
    4654   if( m_bUseSBACRD )
    4655   {
    4656     m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[0][CI_NEXT_BEST]);
    4657     m_pppcRDSbacCoder[0][CI_CURR_BEST]->load( m_pppcRDSbacCoder[0][CI_NEXT_BEST]);
    4658   }
    4659 
    4660   m_bSaoFlag = 0;
    4661   for (Int i=0;i<m_iNumTotalParts;i++)
    4662   {
    4663     m_dCostPartBest[i] = MAX_DOUBLE;
    4664     m_iTypePartBest[i] = -1;
    4665     m_iDistOrg[i] = 0;
    4666     for (Int j=0;j<MAX_NUM_SAO_TYPE;j++)
    4667     {
    4668       m_iDist[i][j] = 0;
    4669       m_iRate[i][j] = 0;
    4670       m_dCost[i][j] = 0;
    4671       for (Int k=0;k<MAX_NUM_QAO_CLASS;k++)
    4672       {
    4673         m_iCount [i][j][k] = 0;
    4674         m_iOffset[i][j][k] = 0;
    4675         m_iOffsetOrg[i][j][k] = 0;
    4676       } 
    4677     }
    4678   }
    4679 
    4680   for(Int i=0; i< m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; i++)
    4681   {
    4682     m_psQAOPart[i].bEnableFlag   =  0;
    4683     m_psQAOPart[i].iBestType     = -1;
    4684     m_psQAOPart[i].iLength       =  0;
    4685     m_psQAOPart[i].bSplit        =  false;
    4686     m_psQAOPart[i].bProcessed    = false;
    4687     m_psQAOPart[i].dMinCost      = MAX_DOUBLE;
    4688     m_psQAOPart[i].iMinDist      = MAX_INT;
    4689     m_psQAOPart[i].iMinRate      = MAX_INT;
    4690 
    4691     for (Int j=0;j<MAX_NUM_QAO_CLASS;j++)
    4692     {
    4693       m_psQAOPart[i].iOffset[j] = 0;
    4694     }
    4695   }
    4696 
    4697   for(Int i=0; i< m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; i++)
    4698   {
    4699     m_psQAOPart[i].bEnableFlag   =  0;
    4700     m_psQAOPart[i].iBestType     = -1;
    4701     m_psQAOPart[i].iLength       =  0;
    4702     for (Int j=0;j<MAX_NUM_QAO_CLASS;j++)
    4703     {
    4704       m_psQAOPart[i].iOffset[j] = 0;
    4705     }
    4706   }
    4707 
    4708 }
    4709 
    4710 /** end Sao Encoder.
    4711  * \param
    4712  */
    4713 Void TEncSampleAdaptiveOffset::endSaoEnc()
    4714 {
    4715   m_pcPic = NULL;
    4716   m_pcEntropyCoder = NULL;
    4717 }
    4718 
    4719 inline int xSign(int x)
    4720 {
    4721   return ((x >> 31) | ((int)( (((unsigned int) -x)) >> 31)));
    4722 }
    4723 /** calculate Ao Stats Cu
    4724  * \param iAddr, iPartIdx
    4725  */
    4726 Void TEncSampleAdaptiveOffset::calcAoStatsCu(Int iAddr, Int iPartIdx)
    4727 {
    4728   Int x,y;
    4729   TComDataCU *pTmpCu = m_pcPic->getCU(iAddr);
    4730   TComSPS *pTmpSPS =  m_pcPic->getSlice(0)->getSPS();
    4731 
    4732 
    4733   Pel* pOrg      ;
    4734   Pel* pRec      ;
    4735   Int iStride    =  m_pcPic->getStride();
    4736   Int iLcuWidth  = pTmpSPS->getMaxCUHeight();
    4737   Int iLcuHeight = pTmpSPS->getMaxCUWidth();
    4738   Int iPicWidth  = pTmpSPS->getWidth();
    4739   Int iPicHeight = pTmpSPS->getHeight();
    4740   UInt uiLPelX   = pTmpCu->getCUPelX();
    4741   UInt uiRPelX   = uiLPelX + iLcuWidth;
    4742   UInt uiTPelY   = pTmpCu->getCUPelY();
    4743   UInt uiBPelY   = uiTPelY + iLcuHeight;
    4744   uiRPelX    = uiRPelX > iPicWidth ? iPicWidth : uiRPelX;
    4745   uiBPelY    = uiBPelY > iPicHeight? iPicHeight: uiBPelY;
    4746   iLcuWidth  = uiRPelX - uiLPelX;
    4747   iLcuHeight = uiBPelY - uiTPelY;
    4748   Int64* iStats ;
    4749   Int64* iCount ;
    4750   Int iClassIdx;
    4751 
    4752 
    4753   //   if(m_iAoType == BO_0 || m_iAoType == BO_1)
    4754   {
    4755     iStats = m_iOffsetOrg[iPartIdx][SAO_BO_0];
    4756     iCount = m_iCount    [iPartIdx][SAO_BO_0];
    4757 
    4758     pOrg      = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
    4759     pRec      = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
    4760 
    4761     for (y=0; y<iLcuHeight; y++)
    4762     {
    4763       for (x=0; x<iLcuWidth; x++)
    4764       {
    4765         iClassIdx =  m_ppLumaTableBo0[pRec[x]];
    4766         if (iClassIdx)
    4767         {
    4768           iStats[iClassIdx] += (pOrg[x] - pRec[x]);
    4769           iCount[iClassIdx] ++;
    4770         }
    4771       }
    4772       pOrg += iStride;
    4773       pRec += iStride;
    4774     }
    4775 
    4776     iStats = m_iOffsetOrg[iPartIdx][SAO_BO_1];
    4777     iCount = m_iCount    [iPartIdx][SAO_BO_1];
    4778 
    4779     pOrg      = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
    4780     pRec      = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
    4781 
    4782     for (y=0; y<iLcuHeight; y++)
    4783     {
    4784       for (x=0; x<iLcuWidth; x++)
    4785       {
    4786         iClassIdx =  m_ppLumaTableBo1[pRec[x]];
    4787         if (iClassIdx)
    4788         {
    4789           iStats[iClassIdx] += (pOrg[x] - pRec[x]);
    4790           iCount[iClassIdx] ++;
    4791         }
    4792       }
    4793       pOrg += iStride;
    4794       pRec += iStride;
    4795     }
    4796   }
    4797 
    4798   Int iSignLeft;
    4799   Int iSignRight;
    4800   Int iSignDown;
    4801   Int iSignDown1;
    4802   Int iSignDown2;
    4803 
    4804   UInt uiEdgeType;
    4805 
    4806   //   if (m_iAoType == EO_0  || m_iAoType == EO_1 || m_iAoType == EO_2 || m_iAoType == EO_3)
    4807   {
    4808     //     if (m_iAoType == EO_0  )
    4809     {
    4810       iStats = m_iOffsetOrg[iPartIdx][SAO_EO_0];
    4811       iCount = m_iCount    [iPartIdx][SAO_EO_0];
    4812 
    4813       pOrg      = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
    4814       pRec      = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
    4815       for (y=0; y<iLcuHeight; y++)
    4816       {
    4817         iSignLeft  = xSign(pRec[1] - pRec[0]);
    4818         for (x=1; x<iLcuWidth-1; x++)
    4819         {
    4820           iSignRight =  xSign(pRec[x] - pRec[x+1]);
    4821           uiEdgeType =  iSignRight + iSignLeft + 2;
    4822           iSignLeft  = -iSignRight;
    4823 
    4824           iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
    4825           iCount[m_auiEoTable[uiEdgeType]] ++;
    4826         }
    4827         pOrg += iStride;
    4828         pRec += iStride;
    4829       }
    4830     }
    4831 
    4832     //     if (m_iAoType == EO_1  )
    4833     {
    4834       iStats = m_iOffsetOrg[iPartIdx][SAO_EO_1];
    4835       iCount = m_iCount    [iPartIdx][SAO_EO_1];
    4836 
    4837       pOrg      = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
    4838       pRec      = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
    4839       pOrg += iStride;
    4840       pRec += iStride;
    4841 
    4842       for (x=0; x< iLcuWidth; x++)
    4843       {
    4844         m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride]);
    4845       }
    4846 
    4847       for (y=1; y<iLcuHeight-1; y++)
    4848       {
    4849         for (x=0; x<iLcuWidth; x++)
    4850         {
    4851 
    4852           iSignDown  =  xSign(pRec[x] - pRec[x+iStride]);
    4853           uiEdgeType =  iSignDown + m_iUpBuff1[x] + 2;
    4854           m_iUpBuff1[x]= -iSignDown;
    4855 
    4856           iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
    4857           iCount[m_auiEoTable[uiEdgeType]] ++;
    4858 
    4859         }
    4860         pOrg += iStride;
    4861         pRec += iStride;
    4862       }
    4863     }
    4864     //     if (m_iAoType == EO_2  )
    4865     {
    4866       iStats = m_iOffsetOrg[iPartIdx][SAO_EO_2];
    4867       iCount = m_iCount    [iPartIdx][SAO_EO_2];
    4868 
    4869       pOrg      = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
    4870       pRec      = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
    4871       pOrg += iStride;
    4872       pRec += iStride;
    4873       for (x=1; x<iLcuWidth; x++)
    4874       {
    4875         m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride-1]);
    4876       }
    4877       for (y=1; y<iLcuHeight-1; y++)
    4878       {
    4879         iSignDown2 = xSign(pRec[iStride] - pRec[0]);
    4880         for (x=1; x<iLcuWidth-1; x++)
    4881         {
    4882           iSignDown1      =  xSign(pRec[x] - pRec[x+iStride+1]) ;
    4883           uiEdgeType      =  iSignDown1 + m_iUpBuff1[x] + 2;
    4884           m_iUpBufft[x+1] = -iSignDown1;
    4885           iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
    4886           iCount[m_auiEoTable[uiEdgeType]] ++;
    4887         }
    4888         m_iUpBufft[1] = iSignDown2;
    4889         ipSwap     = m_iUpBuff1;
    4890         m_iUpBuff1 = m_iUpBufft;
    4891         m_iUpBufft = ipSwap;
    4892 
    4893         pRec += iStride;
    4894         pOrg += iStride;
    4895       }
    4896     }
    4897     //     if (m_iAoType == EO_3  )
    4898     {
    4899       iStats = m_iOffsetOrg[iPartIdx][SAO_EO_3];
    4900       iCount = m_iCount    [iPartIdx][SAO_EO_3];
    4901 
    4902       pOrg      = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
    4903       pRec      = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
    4904       pOrg += iStride;
    4905       pRec += iStride;
    4906       for (x=0; x<iLcuWidth-1; x++)
    4907       {
    4908         m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride+1]);
    4909       }
    4910 
    4911       for (y=1; y<iLcuHeight-1; y++)
    4912       {
    4913         for (x=1; x<iLcuWidth-1; x++)
    4914         {
    4915           iSignDown1      =  xSign(pRec[x] - pRec[x+iStride-1]) ;
    4916           uiEdgeType      =  iSignDown1 + m_iUpBuff1[x] + 2;
    4917           m_iUpBuff1[x-1]   = -iSignDown1;
    4918           iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
    4919           iCount[m_auiEoTable[uiEdgeType]] ++;
    4920         }
    4921         m_iUpBuff1[iLcuWidth-2] = xSign(pRec[iLcuWidth-2 + iStride] - pRec[iLcuWidth-1]);
    4922 
    4923         pRec += iStride;
    4924         pOrg += iStride;
    4925       }
    4926     }
    4927   }
    4928 
    4929 }
    4930 
    4931 /** run get QAO Stats
    4932  * \param pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt
    4933  */
    4934 Void TEncSampleAdaptiveOffset::xGetQAOStats(TComPicYuv* pcPicYuvOrg, TComPicYuv* pcPicYuvRec, TComPicYuv* pcPicYuvExt)
    4935 {
    4936   Int iLevelIdx, iPartIdx, iTypeIdx, iClassIdx;
    4937   Int i;
    4938   Int iNumTotalType = MAX_NUM_SAO_TYPE;
    4939   Int LcuIdxX;
    4940   Int LcuIdxY;
    4941   Int iAddr;
    4942   Int iFrameWidthInCU = m_pcPic->getFrameWidthInCU();
    4943   Int iDownPartIdx;
    4944   Int iPartStart;
    4945   Int iPartEnd;
    4946 
    4947   if (m_uiMaxSplitLevel == 0)
    4948   {
    4949     iPartIdx = 0;
    4950     for (LcuIdxY = m_psQAOPart[iPartIdx].StartCUY; LcuIdxY<= m_psQAOPart[iPartIdx].EndCUY; LcuIdxY++)
    4951     {
    4952       for (LcuIdxX = m_psQAOPart[iPartIdx].StartCUX; LcuIdxX<= m_psQAOPart[iPartIdx].EndCUX; LcuIdxX++)
    4953       {
    4954         iAddr = LcuIdxY*iFrameWidthInCU + LcuIdxX;
    4955         calcAoStatsCu(iAddr, iPartIdx);
    4956       }
    4957     }
    4958 
    4959   }
    4960   else
    4961   {
    4962     for(iPartIdx=m_aiNumCulPartsLevel[m_uiMaxSplitLevel-1]; iPartIdx<m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; iPartIdx++)
    4963     {
    4964       for (LcuIdxY = m_psQAOPart[iPartIdx].StartCUY; LcuIdxY<= m_psQAOPart[iPartIdx].EndCUY; LcuIdxY++)
    4965       {
    4966         for (LcuIdxX = m_psQAOPart[iPartIdx].StartCUX; LcuIdxX<= m_psQAOPart[iPartIdx].EndCUX; LcuIdxX++)
    4967         {
    4968           iAddr = LcuIdxY*iFrameWidthInCU + LcuIdxX;
    4969           calcAoStatsCu(iAddr, iPartIdx);
    4970         }
    4971       }
    4972     }
    4973     for (iLevelIdx=m_uiMaxSplitLevel-1; iLevelIdx>=0; iLevelIdx--)
    4974     {
    4975       iPartStart = (iLevelIdx > 0) ? m_aiNumCulPartsLevel[iLevelIdx-1] : 0;
    4976       iPartEnd   = m_aiNumCulPartsLevel[iLevelIdx];
    4977       for(iPartIdx = iPartStart; iPartIdx < iPartEnd; iPartIdx++)
    4978       {
    4979         for (i=0; i<NUM_DOWN_PART; i++)
    4980         {
    4981           iDownPartIdx = m_psQAOPart[iPartIdx].DownPartsIdx[i];
    4982           for (iTypeIdx=0; iTypeIdx<iNumTotalType; iTypeIdx++)
    4983           {
    4984             for (iClassIdx=0; iClassIdx<m_iNumClass[iTypeIdx]+1; iClassIdx++)
    4985             {
    4986               m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx] += m_iOffsetOrg[iDownPartIdx][iTypeIdx][iClassIdx];
    4987               m_iCount [iPartIdx][iTypeIdx][iClassIdx]    += m_iCount [iDownPartIdx][iTypeIdx][iClassIdx];
    4988             }
    4989           }
    4990         }
    4991       }
    4992     }
    4993   }
    4994 }
    4995 
    4996 /** Sample adaptive offset Process
    4997  * \param dLambda
    4998  */
    4999 Void TEncSampleAdaptiveOffset::SAOProcess( Double dLambda)
    5000 {
    5001   // set lambda
    5002   TComPicYuv* pcPicYuvOrg = m_pcPic->getPicYuvOrg();
    5003   TComPicYuv* pcPicYuvRec = m_pcPic->getPicYuvRec();
    5004 
    5005   TComPicYuv* pcPicYuvExt = NULL;
    5006 
    5007   m_eSliceType           =  m_pcPic->getSlice(0)->getSliceType();
    5008   m_iPicNalReferenceIdc  = (m_pcPic->getSlice(0)->isReferenced() ? 1 :0);
    5009 
    5010   m_dLambdaLuma    = dLambda;
    5011   m_dLambdaChroma  = dLambda;
    5012 
    5013   if (g_uiBitIncrement>1)
    5014   {
    5015     m_uiAoBitDepth = 1;
    5016   }
    5017   else
    5018   {
    5019     m_uiAoBitDepth = 0;
    5020   }
    5021 
    5022   Double dCostFinal = 0;
    5023 
    5024   xGetQAOStats(pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt);
    5025   xQuadTreeDecisionFunc(0, pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt, dCostFinal);
    5026   m_bSaoFlag = dCostFinal < m_iDistOrg[0] ? 1:0;
    5027 
    5028 
    5029   if(m_bSaoFlag)
    5030   {
    5031     xProcessQuadTreeAo( 0, pcPicYuvRec,  pcPicYuvExt);
    5032   }
    5033 
    5034 }
    5035 
    5036 
    5037 #endif
     7469    uiRate += m_uiNumSlicesInPic;
     7470  }
     7471  memcpy(pAlfParam->coeff_chroma, piTmpCoef, sizeof(int)*pAlfParam->num_coeff_chroma);
     7472  delete[] piTmpCoef;
     7473  piTmpCoef = NULL;
     7474
     7475  return uiRate;
     7476}
     7477#endif
     7478
     7479//! \}
Note: See TracChangeset for help on using the changeset viewer.