Changeset 56 in 3DVCSoftware for trunk/source/Lib/TLibEncoder


Ignore:
Timestamp:
11 May 2012, 21:20:17 (12 years ago)
Author:
hschwarz
Message:

updated trunk (move to HM6.1)

Location:
trunk/source/Lib/TLibEncoder
Files:
11 added
4 deleted
27 edited

Legend:

Unmodified
Added
Removed
  • trunk/source/Lib/TLibEncoder/SEIwrite.cpp

    r5 r56  
    44 * granted under this license.
    55 *
    6  * Copyright (c) 2010-2011, ISO/IEC
     6 * Copyright (c) 2010-2012, ITU/ISO/IEC
    77 * All rights reserved.
    88 *
     
    1515 *    this list of conditions and the following disclaimer in the documentation
    1616 *    and/or other materials provided with the distribution.
    17  *  * Neither the name of the ISO/IEC nor the names of its contributors may
     17 *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
    1818 *    be used to endorse or promote products derived from this software without
    1919 *    specific prior written permission.
     
    3232 */
    3333
     34#include "TLibCommon/TComBitCounter.h"
     35#include "TLibCommon/TComBitStream.h"
     36#include "TLibCommon/SEI.h"
     37#include "SEIwrite.h"
    3438
    35 
    36 #include "../TLibCommon/TComBitCounter.h"
    37 #include "../TLibCommon/TComBitStream.h"
    38 #include "../TLibCommon/SEI.h"
    39 #include "SEIwrite.h"
     39//! \ingroup TLibEncoder
     40//! \{
    4041
    4142static void writeSEIuserDataUnregistered(TComBitIf& bs, const SEIuserDataUnregistered &sei);
     
    5859
    5960/**
    60  * marshal a single SEI message @sei, storing the marshalled representation
    61  * in bitstream @bs.
     61 * marshal a single SEI message sei, storing the marshalled representation
     62 * in bitstream bs.
    6263 */
    6364void writeSEImessage(TComBitIf& bs, const SEI& sei)
     
    8687
    8788/**
    88  * marshal a user_data_unregistered SEI message @sei, storing the marshalled
    89  * representation in bitstream @bs.
     89 * marshal a user_data_unregistered SEI message sei, storing the marshalled
     90 * representation in bitstream bs.
    9091 */
    9192static void writeSEIuserDataUnregistered(TComBitIf& bs, const SEIuserDataUnregistered &sei)
     
    104105/**
    105106 * marshal a picture_digest SEI message, storing the marshalled
    106  * representation in bitstream @bs.
     107 * representation in bitstream bs.
    107108 */
    108109static void writeSEIpictureDigest(TComBitIf& bs, const SEIpictureDigest& sei)
     
    114115  }
    115116}
     117//! \}
  • trunk/source/Lib/TLibEncoder/SEIwrite.h

    r5 r56  
    44 * granted under this license.
    55 *
    6  * Copyright (c) 2010-2011, ISO/IEC
     6 * Copyright (c) 2010-2012, ITU/ISO/IEC
    77 * All rights reserved.
    88 *
     
    1515 *    this list of conditions and the following disclaimer in the documentation
    1616 *    and/or other materials provided with the distribution.
    17  *  * Neither the name of the ISO/IEC nor the names of its contributors may
     17 *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
    1818 *    be used to endorse or promote products derived from this software without
    1919 *    specific prior written permission.
     
    3232 */
    3333
    34 
    35 
    3634#pragma once
    3735
     
    3937class SEI;
    4038
     39//! \ingroup TLibEncoder
     40//! \{
     41
    4142void writeSEImessage(TComBitIf& bs, const SEI& sei);
     43
     44//! \}
  • trunk/source/Lib/TLibEncoder/TEncAdaptiveLoopFilter.cpp

    r5 r56  
    22 * License, included below. This software may be subject to other third party
    33 * and contributor rights, including patent rights, and no such rights are
    4  * granted under this license.
     4 * granted under this license. 
    55 *
    6  * Copyright (c) 2010-2011, ISO/IEC
     6 * Copyright (c) 2010-2012, ITU/ISO/IEC
    77 * All rights reserved.
    88 *
     
    1515 *    this list of conditions and the following disclaimer in the documentation
    1616 *    and/or other materials provided with the distribution.
    17  *  * Neither the name of the ISO/IEC nor the names of its contributors may
     17 *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
    1818 *    be used to endorse or promote products derived from this software without
    1919 *    specific prior written permission.
     
    3232 */
    3333
    34 
    35 
    3634/** \file     TEncAdaptiveLoopFilter.cpp
    3735 \brief    estimation part of adaptive loop filter class
     
    4341#include <math.h>
    4442
     43//! \ingroup TLibEncoder
     44//! \{
     45
    4546// ====================================================================================================================
    4647// Constants
    4748// ====================================================================================================================
    48 
     49#if LCU_SYNTAX_ALF
     50#define ALF_NUM_OF_REDESIGN 1
     51#else
    4952#define ALF_NUM_OF_REDESIGN 3
    50 
     53#endif
    5154// ====================================================================================================================
    5255// Tables
    5356// ====================================================================================================================
    54 
    55 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x9[81] =
    56 {
    57    0,  1,  2,  3,  4,  5,  6,  7,  8,
    58    9, 10, 11, 12, 13, 14, 15, 16, 17,
    59   18, 19, 20, 21, 22, 23, 24, 25, 26,
    60   27, 28, 29, 30, 31, 32, 33, 34, 35,
    61   36, 37, 38, 39, 40, 39, 38, 37, 36,
    62   35, 34, 33, 32, 31, 30, 29, 28, 27,
    63   26, 25, 24, 23, 22, 21, 20, 19, 18,
    64   17, 16, 15, 14, 13, 12, 11, 10,  9,
    65    8,  7,  6,  5,  4,  3,  2,  1,  0
     57#if LCU_SYNTAX_ALF
     58const Int TEncAdaptiveLoopFilter::m_alfNumPartsInRowTab[5] =
     59{
     60  1,      //level 0
     61  2,      //level 1
     62  4,      //level 2
     63  8,      //level 3
     64  16      //level 4
    6665};
    6766
    68 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray7x7[49] =
    69 {
    70   0,  1,  2,  3,  4,  5,  6,
    71   7,  8,  9, 10, 11, 12, 13,
    72   14, 15, 16, 17, 18, 19, 20,
    73   21, 22, 23, 24, 23, 22, 21,
    74   20, 19, 18, 17, 16, 15, 14,
    75   13, 12, 11, 10,  9,  8,  7,
    76   6,  5,  4,  3,  2,  1,  0,
     67const Int TEncAdaptiveLoopFilter::m_alfNumPartsLevelTab[5] =
     68{
     69  1,      //level 0
     70  4,      //level 1
     71  16,     //level 2
     72  64,     //level 3
     73  256     //level 4
    7774};
    7875
    79 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray5x5[25] =
    80 {
    81   0,  1,  2,  3,  4,
    82   5,  6,  7,  8,  9,
    83   10, 11, 12, 11, 10,
    84   9,  8,  7,  6,  5,
    85   4,  3,  2,  1,  0,
     76const Int TEncAdaptiveLoopFilter::m_alfNumCulPartsLevelTab[5] =
     77{
     78  1,    //level 0
     79  5,    //level 1
     80  21,   //level 2
     81  85,   //level 3
     82  341,  //level 4
    8683};
    87 
    88 #if TI_ALF_MAX_VSIZE_7
    89 const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x7[63] =
    90 {
    91    0,  1,  2,  3,  4,  5,  6,  7,  8,
    92    9, 10, 11, 12, 13, 14, 15, 16, 17,
    93   18, 19, 20, 21, 22, 23, 24, 25, 26,
    94   27, 28, 29, 30, 31, 30, 29, 28, 27,
    95   26, 25, 24, 23, 22, 21, 20, 19, 18,
    96   17, 16, 15, 14, 13, 12, 11, 10,  9,
    97    8,  7,  6,  5,  4,  3,  2,  1,  0
    98 };
    99 #endif
    100 
    101 #if MQT_ALF_NPASS
    102 #if TI_ALF_MAX_VSIZE_7
    103 Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[21] =
    104 #else
    105 Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[22] =
    106 #endif
    107 {
    108 #if TI_ALF_MAX_VSIZE_7
    109                   0,  1,  2,
    110               3,  4,  5,  6,  7,
    111           8,  9, 10, 11, 12, 13, 14,
    112      15, 16, 17, 18, 19, 20
    113 #else
    114                    0,
    115                1,  2,  3,
    116            4,  5,  6,  7,  8,
    117        9, 10, 11, 12, 13, 14, 15,
    118   16, 17, 18, 19, 20, 21
    119 #endif
    120 };
    121 
    122 Int TEncAdaptiveLoopFilter::m_aiTapPos7x7_In9x9Sym[14] =
    123 {                 
    124 #if TI_ALF_MAX_VSIZE_7
    125                   1,   
    126               4,  5,  6,   
    127           9, 10, 11, 12, 13,   
    128      16, 17, 18, 19, 20
    129 
    130 #else
    131 
    132                2,
    133            5,  6,  7,
    134       10, 11, 12, 13, 14,
    135   17, 18, 19, 20, 21
    136 #endif
    137 };
    138 
    139 Int TEncAdaptiveLoopFilter::m_aiTapPos5x5_In9x9Sym[8]  =
    140 {
    141 
    142 #if TI_ALF_MAX_VSIZE_7
    143             5,
    144        10, 11, 12,
    145    17, 18, 19, 20
    146 #else
    147            6,
    148       11, 12, 13,
    149   18, 19, 20, 21
    150 
    151 #endif
    152 
    153 };
    154 
    155 Int* TEncAdaptiveLoopFilter::m_iTapPosTabIn9x9Sym[NO_TEST_FILT] =
    156 {
    157   m_aiTapPos9x9_In9x9Sym, m_aiTapPos7x7_In9x9Sym, m_aiTapPos5x5_In9x9Sym
    158 };
    159 #endif
    160 
     84#endif
    16185// ====================================================================================================================
    16286// Constructor / destructor
    16387// ====================================================================================================================
    16488
     89#if LCU_SYNTAX_ALF
     90///AlfCorrData
     91AlfCorrData::AlfCorrData()
     92{
     93  this->componentID = -1;
     94  this->ECorr  = NULL;
     95  this->yCorr  = NULL;
     96  this->pixAcc = NULL;
     97}
     98
     99AlfCorrData::AlfCorrData(Int cIdx)
     100{
     101  const Int numCoef = ALF_MAX_NUM_COEF;
     102  const Int maxNumGroups = NO_VAR_BINS;
     103
     104  Int numGroups = (cIdx == ALF_Y)?(maxNumGroups):(1);
     105
     106  this->componentID = cIdx;
     107
     108  this->ECorr = new Double**[numGroups];
     109  this->yCorr = new Double*[numGroups];
     110  this->pixAcc = new Double[numGroups];
     111  for(Int g= 0; g< numGroups; g++)
     112  {
     113    this->yCorr[g] = new Double[numCoef];
     114    for(Int j=0; j< numCoef; j++)
     115    {
     116      this->yCorr[g][j] = 0;
     117    }
     118
     119    this->ECorr[g] = new Double*[numCoef];
     120    for(Int i=0; i< numCoef; i++)
     121    {
     122      this->ECorr[g][i] = new Double[numCoef];
     123      for(Int j=0; j< numCoef; j++)
     124      {
     125        this->ECorr[g][i][j] = 0;
     126      }
     127    }
     128    this->pixAcc[g] = 0; 
     129  }
     130}
     131
     132AlfCorrData::~AlfCorrData()
     133{
     134  if(this->componentID >=0)
     135  {
     136    const Int numCoef = ALF_MAX_NUM_COEF;
     137    const Int maxNumGroups = NO_VAR_BINS;
     138
     139    Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1);
     140
     141    for(Int g= 0; g< numGroups; g++)
     142    {
     143      for(Int i=0; i< numCoef; i++)
     144      {
     145        delete[] this->ECorr[g][i];
     146      }
     147      delete[] this->ECorr[g];
     148      delete[] this->yCorr[g];
     149    }
     150    delete[] this->ECorr;
     151    delete[] this->yCorr;
     152    delete[] this->pixAcc;
     153  }
     154
     155}
     156
     157AlfCorrData& AlfCorrData::operator += (const AlfCorrData& src)
     158{
     159  if(this->componentID >=0)
     160  {
     161    const Int numCoef = ALF_MAX_NUM_COEF;
     162    const Int maxNumGroups = NO_VAR_BINS;
     163
     164    Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1);
     165    for(Int g=0; g< numGroups; g++)
     166    {
     167      this->pixAcc[g] += src.pixAcc[g];
     168
     169      for(Int j=0; j< numCoef; j++)
     170      {
     171        this->yCorr[g][j] += src.yCorr[g][j];
     172        for(Int i=0; i< numCoef; i++)
     173        {
     174          this->ECorr[g][j][i] += src.ECorr[g][j][i];
     175        }
     176      }
     177    }
     178  }
     179
     180  return *this;
     181}
     182
     183
     184Void AlfCorrData::reset()
     185{
     186  if(this->componentID >=0)
     187  {
     188    const Int numCoef = ALF_MAX_NUM_COEF;
     189    const Int maxNumGroups = NO_VAR_BINS;
     190
     191    Int numGroups = (this->componentID == ALF_Y)?(maxNumGroups):(1);
     192    for(Int g=0; g< numGroups; g++)
     193    {
     194      this->pixAcc[g] = 0;
     195
     196      for(Int j=0; j< numCoef; j++)
     197      {
     198        this->yCorr[g][j] = 0;
     199        for(Int i=0; i< numCoef; i++)
     200        {
     201          this->ECorr[g][j][i] = 0;
     202        }
     203      }
     204
     205
     206    }
     207  }
     208
     209}
     210
     211Void AlfCorrData::mergeFrom(const AlfCorrData& src, Int* mergeTable, Bool doPixAccMerge)
     212{
     213  assert(componentID == src.componentID);
     214
     215  reset();
     216
     217  const Int numCoef = ALF_MAX_NUM_COEF;
     218
     219  Double **srcE, **dstE;
     220  Double *srcy, *dsty;
     221
     222  switch(componentID)
     223  {
     224  case ALF_Cb:
     225  case ALF_Cr:
     226    {
     227      srcE = src.ECorr  [0];
     228      dstE = this->ECorr[0];
     229
     230      srcy  = src.yCorr[0];
     231      dsty  = this->yCorr[0];
     232
     233      for(Int j=0; j< numCoef; j++)
     234      {
     235        for(Int i=0; i< numCoef; i++)
     236        {
     237          dstE[j][i] += srcE[j][i];
     238        }
     239
     240        dsty[j] += srcy[j];
     241      }
     242      if(doPixAccMerge)
     243      {
     244        this->pixAcc[0] = src.pixAcc[0];
     245      }
     246    }
     247    break;
     248  case ALF_Y:
     249    {
     250      Int maxFilterSetSize = (Int)NO_VAR_BINS;
     251      for (Int varInd=0; varInd< maxFilterSetSize; varInd++)
     252      {
     253        Int filtIdx = (mergeTable == NULL)?(0):(mergeTable[varInd]);
     254        srcE = src.ECorr  [varInd];
     255        dstE = this->ECorr[ filtIdx ];
     256        srcy  = src.yCorr[varInd];
     257        dsty  = this->yCorr[ filtIdx ];
     258        for(Int j=0; j< numCoef; j++)
     259        {
     260          for(Int i=0; i< numCoef; i++)
     261          {
     262            dstE[j][i] += srcE[j][i];
     263          }
     264          dsty[j] += srcy[j];
     265        }
     266        if(doPixAccMerge)
     267        {
     268          this->pixAcc[filtIdx] += src.pixAcc[varInd];
     269        }
     270      }
     271    }
     272    break;
     273  default:
     274    {
     275      printf("not a legal component ID\n");
     276      assert(0);
     277      exit(-1);
     278    }
     279  }
     280}
     281
     282///AlfPicQTPart
     283AlfPicQTPart::AlfPicQTPart()
     284{
     285  componentID = -1;
     286  alfUnitParam = NULL;
     287  alfCorr = NULL;
     288}
     289
     290AlfPicQTPart::~AlfPicQTPart()
     291{
     292  if(alfUnitParam != NULL)
     293  {
     294    if(alfUnitParam->alfFiltParam != NULL)
     295    {
     296      delete alfUnitParam->alfFiltParam;
     297      alfUnitParam->alfFiltParam = NULL;
     298    }
     299    delete alfUnitParam;
     300    alfUnitParam = NULL;
     301  }
     302  if(alfCorr != NULL)
     303  {
     304    delete alfCorr;
     305    alfCorr = NULL;
     306  }
     307}
     308
     309AlfPicQTPart& AlfPicQTPart::operator= (const AlfPicQTPart& src)
     310{
     311  componentID = src.componentID;
     312  partCUXS    = src.partCUXS;
     313  partCUYS    = src.partCUYS;
     314  partCUXE    = src.partCUXE;
     315  partCUYE    = src.partCUYE;
     316  partIdx     = src.partIdx;
     317  partLevel   = src.partLevel;
     318  partCol     = src.partCol;
     319  partRow     = src.partRow;
     320  for(Int i=0; i<4; i++)
     321  {
     322    childPartIdx[i] = src.childPartIdx[i];
     323  }
     324  parentPartIdx = src.parentPartIdx;
     325
     326  isBottomLevel = src.isBottomLevel;
     327  isSplit       = src.isSplit;
     328
     329  isProcessed   = src.isProcessed;
     330  splitMinCost  = src.splitMinCost;
     331  splitMinDist  = src.splitMinDist;
     332  splitMinRate  = src.splitMinRate;
     333  selfMinCost   = src.selfMinCost;
     334  selfMinDist   = src.selfMinDist;
     335  selfMinRate   = src.selfMinRate;
     336
     337  numFilterBudget = src.numFilterBudget;
     338
     339  if(src.alfUnitParam != NULL)
     340  {
     341    if(alfUnitParam == NULL)
     342    {
     343      //create alfUnitparam
     344      alfUnitParam = new AlfUnitParam;
     345      alfUnitParam->alfFiltParam = new ALFParam(componentID);
     346    }
     347    //assign from src
     348    alfUnitParam->mergeType = src.alfUnitParam->mergeType;
     349    alfUnitParam->isEnabled = src.alfUnitParam->isEnabled;
     350    alfUnitParam->isNewFilt = src.alfUnitParam->isNewFilt;
     351    alfUnitParam->storedFiltIdx = src.alfUnitParam->storedFiltIdx;
     352    *(alfUnitParam->alfFiltParam) = *(src.alfUnitParam->alfFiltParam);   
     353  }
     354  else
     355  {
     356    printf("source quad-tree partition info is not complete\n");
     357    assert(0);
     358    exit(-1);
     359  }
     360
     361  if(src.alfCorr != NULL)
     362  {
     363    if(alfCorr == NULL)
     364    {
     365      alfCorr = new AlfCorrData(componentID);
     366    }
     367    alfCorr->reset();
     368    (*alfCorr) += (*(src.alfCorr));
     369  }
     370  else
     371  {
     372    printf("source quad-tree partition info is not complete\n");
     373    assert(0);
     374    exit(-1);
     375  }
     376  return *this;
     377}
     378#endif
     379
     380
    165381TEncAdaptiveLoopFilter::TEncAdaptiveLoopFilter()
    166382{
     383#if !LCU_SYNTAX_ALF
    167384  m_ppdAlfCorr = NULL;
     385  m_ppdAlfCorrCb = NULL;
     386  m_ppdAlfCorrCr = NULL;
    168387  m_pdDoubleAlfCoeff = NULL;
    169   m_pcPic = NULL;
     388#endif
    170389  m_pcEntropyCoder = NULL;
     390#if !LCU_SYNTAX_ALF
    171391  m_pcBestAlfParam = NULL;
    172392  m_pcTempAlfParam = NULL;
     393#endif
    173394  m_pcPicYuvBest = NULL;
    174395  m_pcPicYuvTmp = NULL;
    175 #if MTK_NONCROSS_INLOOP_FILTER
     396#if !LCU_SYNTAX_ALF
     397  pcAlfParamShape0 = NULL;
     398  pcAlfParamShape1 = NULL;
     399  pcPicYuvRecShape0 = NULL;
     400  pcPicYuvRecShape1 = NULL;
    176401  m_pcSliceYuvTmp = NULL;
    177402#endif
    178 #if MQT_BA_RA && MQT_ALF_NPASS
    179   m_aiFilterCoeffSaved = NULL;
    180 #endif
     403
     404  m_iALFMaxNumberFilters = NO_FILTERS;
     405
     406  m_bAlfCUCtrlEnabled = false;
    181407}
    182408
     
    185411// ====================================================================================================================
    186412
    187 #if MQT_BA_RA && MQT_ALF_NPASS
     413#if LCU_SYNTAX_ALF
     414/** convert Level Row Col to Idx
     415 * \param   level,  row,  col
     416 */
     417Int TEncAdaptiveLoopFilter::convertLevelRowCol2Idx(Int level, Int row, Int col)
     418{
     419  Int idx;
     420  if (level == 0)
     421  {
     422    idx = 0;
     423  }
     424  else if (level == 1)
     425  {
     426    idx = 1 + row*2 + col;
     427  }
     428  else if (level == 2)
     429  {
     430    idx = 5 + row*4 + col;
     431  }
     432  else if (level == 3)
     433  {
     434    idx = 21 + row*8 + col;
     435  }
     436  else // (level == 4)
     437  {
     438    idx = 85 + row*16 + col;
     439  }
     440  return idx;
     441}
     442
     443/** convert quadtree Idx to Level, Row, and Col
     444 * \param  idx,  *level,  *row,  *col
     445 */
     446Void TEncAdaptiveLoopFilter::convertIdx2LevelRowCol(Int idx, Int *level, Int *row, Int *col)
     447{
     448  if (idx == 0)
     449  {
     450    *level = 0;
     451    *row = 0;
     452    *col = 0;
     453  }
     454  else if (idx>=1 && idx<=4)
     455  {
     456    *level = 1;
     457    *row = (idx-1) / 2;
     458    *col = (idx-1) % 2;
     459  }
     460  else if (idx>=5 && idx<=20)
     461  {
     462    *level = 2;
     463    *row = (idx-5) / 4;
     464    *col = (idx-5) % 4;
     465  }
     466  else if (idx>=21 && idx<=84)
     467  {
     468    *level = 3;
     469    *row = (idx-21) / 8;
     470    *col = (idx-21) % 8;
     471  }
     472  else // (idx>=85 && idx<=340)
     473  {
     474    *level = 4;
     475    *row = (idx-85) / 16;
     476    *col = (idx-85) % 16;
     477  }
     478}
     479
     480/** Initial picture quad-tree
     481 * \param [in] isPicBasedEncode picture quad-tree encoding is enabled or disabled
     482 */
     483Void TEncAdaptiveLoopFilter::initPicQuadTreePartition(Bool isPicBasedEncode)
     484{
     485  if (!isPicBasedEncode)
     486  {
     487    return;
     488  }
     489 
     490  Int maxDepthInWidth   = (Int)(logf((float)(m_numLCUInPicWidth     ))/logf(2.0));
     491  Int maxDepthInHeight  = (Int)(logf((float)(m_numLCUInPicHeight    ))/logf(2.0));
     492  Int maxDepthInFilters = (Int)(logf((float)(m_iALFMaxNumberFilters ))/logf(2.0));
     493  m_alfPQTMaxDepth = (maxDepthInWidth  > maxDepthInHeight ) ? maxDepthInHeight  : maxDepthInWidth ;
     494  m_alfPQTMaxDepth = (m_alfPQTMaxDepth > maxDepthInFilters) ? maxDepthInFilters : m_alfPQTMaxDepth ;
     495
     496  for (Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     497  {
     498    m_alfPQTPart[compIdx] = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ];
     499    for (Int i = 0; i < m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++ )
     500    {
     501      m_alfPQTPart[compIdx][i].alfCorr = new AlfCorrData(compIdx);
     502      m_alfPQTPart[compIdx][i].alfUnitParam = new AlfUnitParam;
     503      m_alfPQTPart[compIdx][i].alfUnitParam->alfFiltParam = new ALFParam(compIdx);
     504    }
     505
     506  }
     507  creatPQTPart(0, 0, 0, -1, 0, m_numLCUInPicWidth-1, 0, m_numLCUInPicHeight-1);
     508}
     509
     510/** Reset picture quad-tree variables
     511 */
     512Void TEncAdaptiveLoopFilter::resetPQTPart()
     513{
     514  Int compIdx, i;
     515
     516  for (compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     517  {
     518    for (i = 0; i < m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++ )
     519    {
     520      m_alfPQTPart[compIdx][i].isProcessed  = false;
     521      m_alfPQTPart[compIdx][i].selfMinCost  = MAX_DOUBLE;
     522      m_alfPQTPart[compIdx][i].splitMinCost = MAX_DOUBLE;     
     523      //reset correlations
     524      m_alfPQTPart[compIdx][i].alfCorr->reset();
     525      //reset ALF unit param
     526      m_alfPQTPart[compIdx][i].alfUnitParam->mergeType = ALF_MERGE_DISABLED;
     527      m_alfPQTPart[compIdx][i].alfUnitParam->isEnabled = false;
     528      m_alfPQTPart[compIdx][i].alfUnitParam->alfFiltParam->alf_flag = 0;
     529    }
     530  }
     531}
     532
     533/** create picture quad-tree
     534 * \param [in] partLevel quad-tree level
     535 * \param [in] partRow row position at partLevel
     536 * \param [in] partCol column position at partLevel
     537 * \param [in] parentPartIdx parent partition index
     538 * \param [in] partCUXS starting LCU X position
     539 * \param [in] partCUXE ending LCU X position
     540 * \param [in] partCUYS starting LCU Y position
     541 * \param [in] partCUYE ending LCU Y position
     542 */
     543Void TEncAdaptiveLoopFilter::creatPQTPart(Int partLevel, Int partRow, Int partCol, Int parentPartIdx, Int partCUXS, Int partCUXE, Int partCUYS, Int partCUYE)
     544{
     545  Int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
     546
     547  AlfPicQTPart *alfOnePartY, *alfOnePartCb, *alfOnePartCr;
     548
     549  alfOnePartY  = &(m_alfPQTPart[ALF_Y ][partIdx]);
     550  alfOnePartCb = &(m_alfPQTPart[ALF_Cb][partIdx]);
     551  alfOnePartCr = &(m_alfPQTPart[ALF_Cr][partIdx]);
     552
     553  // Y, Cb, Cr
     554  alfOnePartY->partIdx   = alfOnePartCb->partIdx   = alfOnePartCr->partIdx   = partIdx;
     555  alfOnePartY->partCol   = alfOnePartCb->partCol   = alfOnePartCr->partCol   = partCol;
     556  alfOnePartY->partRow   = alfOnePartCb->partRow   = alfOnePartCr->partRow   = partRow;
     557  alfOnePartY->partLevel = alfOnePartCb->partLevel = alfOnePartCr->partLevel = partLevel;
     558
     559  alfOnePartY->partCUXS  = alfOnePartCb->partCUXS  = alfOnePartCr->partCUXS  = partCUXS; 
     560  alfOnePartY->partCUXE  = alfOnePartCb->partCUXE  = alfOnePartCr->partCUXE  = partCUXE;
     561  alfOnePartY->partCUYS  = alfOnePartCb->partCUYS  = alfOnePartCr->partCUYS  = partCUYS;
     562  alfOnePartY->partCUYE  = alfOnePartCb->partCUYE  = alfOnePartCr->partCUYE  = partCUYE;
     563
     564  alfOnePartY->parentPartIdx = alfOnePartCb->parentPartIdx = alfOnePartCr->parentPartIdx = parentPartIdx; 
     565  alfOnePartY->isSplit       = alfOnePartCb->isSplit       = alfOnePartCr->isSplit       = false;
     566
     567#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     568  alfOnePartY->numFilterBudget = alfOnePartCb->numFilterBudget = alfOnePartCr->numFilterBudget = m_iALFMaxNumberFilters/m_alfNumPartsLevelTab[partLevel];
     569#else
     570  alfOnePartY->numFilterBudget = alfOnePartCb->numFilterBudget = alfOnePartCr->numFilterBudget = NO_VAR_BINS;
     571#endif
     572
     573  alfOnePartY->componentID  = ALF_Y;
     574  alfOnePartCb->componentID = ALF_Cb;
     575  alfOnePartCr->componentID = ALF_Cr;
     576
     577  if (alfOnePartY->partLevel != m_alfPQTMaxDepth)
     578  {
     579    alfOnePartY->isBottomLevel = alfOnePartCb->isBottomLevel = alfOnePartCr->isBottomLevel = false;
     580
     581    Int downLevel    = partLevel + 1;
     582    Int downRowStart = partRow << 1;
     583    Int downColStart = partCol << 1;
     584
     585    Int downRowIdx, downColIdx;
     586    Int numCULeft, numCUTop;
     587    Int downStartCUX, downStartCUY, downEndCUX, downEndCUY;
     588
     589    numCULeft = (partCUXE - partCUXS + 1) >> 1 ;
     590    numCUTop  = (partCUYE - partCUYS + 1) >> 1 ;
     591
     592    // ChildPart00
     593    downStartCUX = partCUXS;
     594    downEndCUX   = downStartCUX + numCULeft - 1;
     595    downStartCUY = partCUYS;
     596    downEndCUY   = downStartCUY + numCUTop  - 1;
     597    downRowIdx   = downRowStart + 0;
     598    downColIdx   = downColStart + 0;
     599
     600    alfOnePartY->childPartIdx[0] = alfOnePartCb->childPartIdx[0] = alfOnePartCr->childPartIdx[0] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     601    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     602
     603    // ChildPart01
     604    downStartCUX = partCUXS + numCULeft;
     605    downEndCUX   = partCUXE;
     606    downStartCUY = partCUYS;
     607    downEndCUY   = downStartCUY + numCUTop  - 1;
     608    downRowIdx   = downRowStart + 0;
     609    downColIdx   = downColStart + 1;
     610
     611    alfOnePartY->childPartIdx[1] = alfOnePartCb->childPartIdx[1] = alfOnePartCr->childPartIdx[1] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     612    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     613
     614    // ChildPart10
     615    downStartCUX = partCUXS;
     616    downEndCUX   = downStartCUX + numCULeft - 1;
     617    downStartCUY = partCUYS + numCUTop;
     618    downEndCUY   = partCUYE;
     619    downRowIdx   = downRowStart + 1;
     620    downColIdx   = downColStart + 0;
     621
     622    alfOnePartY->childPartIdx[2] = alfOnePartCb->childPartIdx[2] = alfOnePartCr->childPartIdx[2] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     623    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     624
     625    // ChildPart11
     626    downStartCUX = partCUXS + numCULeft;
     627    downEndCUX   = partCUXE;
     628    downStartCUY = partCUYS + numCUTop;
     629    downEndCUY   = partCUYE;
     630    downRowIdx   = downRowStart + 1;
     631    downColIdx   = downColStart + 1;
     632
     633    alfOnePartY->childPartIdx[3] = alfOnePartCb->childPartIdx[3] = alfOnePartCr->childPartIdx[3] = convertLevelRowCol2Idx(downLevel, downRowIdx, downColIdx);
     634    creatPQTPart(downLevel, downRowIdx, downColIdx, partIdx, downStartCUX, downEndCUX, downStartCUY, downEndCUY);
     635  }
     636  else
     637  {
     638    alfOnePartY->isBottomLevel = alfOnePartCb->isBottomLevel = alfOnePartCr->isBottomLevel = true;
     639
     640    alfOnePartY->childPartIdx[0] = alfOnePartCb->childPartIdx[0] = alfOnePartCr->childPartIdx[0] = -1;
     641    alfOnePartY->childPartIdx[1] = alfOnePartCb->childPartIdx[1] = alfOnePartCr->childPartIdx[1] = -1;
     642    alfOnePartY->childPartIdx[2] = alfOnePartCb->childPartIdx[2] = alfOnePartCr->childPartIdx[2] = -1;
     643    alfOnePartY->childPartIdx[3] = alfOnePartCb->childPartIdx[3] = alfOnePartCr->childPartIdx[3] = -1;
     644  }
     645}
     646
     647/** create global buffers for ALF encoding
     648 */
     649Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers()
     650{
     651  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     652  {
     653    m_alfPicFiltUnits[compIdx] = new AlfUnitParam[m_uiNumCUsInFrame];
     654    m_alfCorr[compIdx] = new AlfCorrData*[m_uiNumCUsInFrame];
     655    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     656    {
     657      m_alfCorr[compIdx][n]= new AlfCorrData(compIdx);
     658      m_alfCorr[compIdx][n]->reset();
     659    }
     660
     661    m_alfCorrMerged[compIdx] = new AlfCorrData(compIdx);
     662
     663  }
     664
     665
     666  const Int numCoef = (Int)ALF_MAX_NUM_COEF;
     667
     668  for(Int i=0; i< (Int)NO_VAR_BINS; i++)
     669  {
     670    m_coeffNoFilter[i] = new Int[numCoef];
     671  }
     672
     673  m_numSlicesDataInOneLCU = new Int[m_uiNumCUsInFrame];
     674
     675}
     676
     677/** destroy ALF global buffers
     678 * This function is used to destroy the global ALF encoder buffers
     679 */
     680Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers()
     681{
     682  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     683  {
     684    delete[] m_alfPicFiltUnits[compIdx];
     685    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     686    {
     687      delete m_alfCorr[compIdx][n];
     688    }
     689
     690    delete[] m_alfCorr[compIdx];
     691    m_alfCorr[compIdx] = NULL;
     692
     693    delete m_alfCorrMerged[compIdx];
     694  }
     695
     696  //const Int numCoef = (Int)ALF_MAX_NUM_COEF;
     697
     698  for(Int i=0; i< (Int)NO_VAR_BINS; i++)
     699  {
     700    delete[] m_coeffNoFilter[i];
     701  }
     702
     703  delete[] m_numSlicesDataInOneLCU;
     704
     705}
     706
     707/** initialize ALF encoder at picture level
     708 * \param [in] isAlfParamInSlice ALF parameters are coded in slice (true) or APS (false)
     709 * \param [in] isPicBasedEncode picture-based encoding (true) or LCU-based encoding (false)
     710 * \param [in] numSlices number of slices in current picture
     711 * \param [in, out] alfParams ALF parameter set
     712 * \param [in, out] alfCUCtrlParam ALF CU-on/off control parameters
     713 */
     714Void TEncAdaptiveLoopFilter::initALFEnc(Bool isAlfParamInSlice, Bool isPicBasedEncode, Int numSlices, AlfParamSet* & alfParams, std::vector<AlfCUCtrlInfo>* & alfCUCtrlParam)
     715{
     716  m_picBasedALFEncode = isPicBasedEncode;
     717
     718  if(isAlfParamInSlice)
     719  {
     720    alfParams = new AlfParamSet[m_uiNumSlicesInPic];
     721    Int numLCUs = m_uiNumCUsInFrame;
     722
     723    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     724    {
     725      numLCUs = (Int)(m_pcPic->getOneSliceCUDataForNDBFilter(s).size());
     726      alfParams[s].create(m_numLCUInPicWidth,m_numLCUInPicHeight, numLCUs );
     727      alfParams[s].createALFParam();
     728    }
     729    alfCUCtrlParam = NULL;
     730  }
     731  else //ALF parameter in APS
     732  {
     733    alfParams = NULL; //ALF parameters are handled by APS
     734    alfCUCtrlParam = new std::vector<AlfCUCtrlInfo>;
     735    alfCUCtrlParam->resize(numSlices);
     736  }
     737
     738  resetPicAlfUnit();
     739
     740  if(m_picBasedALFEncode)
     741  {
     742    resetPQTPart(); 
     743  }
     744
     745  const Int numCoef = (Int)ALF_MAX_NUM_COEF;
     746#if LCUALF_QP_DEPENDENT_BITS
     747  Int numBitShift = getAlfPrecisionBit( m_alfQP );
     748#else
     749  Int numBitShift = (Int)ALF_NUM_BIT_SHIFT;
     750#endif
     751  for(Int i=0; i< (Int)NO_VAR_BINS; i++)
     752  {
     753    ::memset(&(m_coeffNoFilter[i][0]), 0, sizeof(Int)*numCoef);
     754    m_coeffNoFilter[i][numCoef-1] = (1 << numBitShift);
     755  }
     756
     757}
     758
     759/** Uninitialize ALF encoder at picture level
     760 * \param [in, out] alfParams ALF parameter set
     761 * \param [in, out] alfCUCtrlParam ALF CU-on/off control parameters
     762 */
     763Void TEncAdaptiveLoopFilter::uninitALFEnc(AlfParamSet* & alfParams, std::vector<AlfCUCtrlInfo>* & alfCUCtrlParam)
     764{
     765  if(alfParams != NULL)
     766  {
     767    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     768    {
     769      alfParams[s].releaseALFParam();
     770    }
     771    delete[] alfParams;
     772    alfParams = NULL;
     773  }
     774
     775  if(alfCUCtrlParam != NULL)
     776  {
     777    delete alfCUCtrlParam;
     778    alfCUCtrlParam = NULL;
     779  }
     780}
     781
     782/** reset ALF unit parameters in current picture
     783 */
     784Void TEncAdaptiveLoopFilter::resetPicAlfUnit()
     785{
     786  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     787  {
     788    for(Int i=0; i< m_uiNumCUsInFrame; i++)
     789    {
     790      AlfUnitParam& alfUnit = m_alfPicFiltUnits[compIdx][i];
     791      alfUnit.mergeType = ALF_MERGE_DISABLED;
     792      alfUnit.isEnabled = false;
     793      alfUnit.isNewFilt = true;
     794      alfUnit.alfFiltParam = m_alfFiltInfo[compIdx][i];
     795
     796      alfUnit.alfFiltParam->alf_flag = 0;
     797    }
     798  }
     799}
     800
     801#else
     802
     803/** create ALF global buffers
     804 * \param iALFEncodePassReduction 0: 16-pass encoding, 1: 1-pass encoding, 2: 2-pass encoding
     805 * This function is used to create the filter buffers to perform time-delay filtering.
     806 */
    188807Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers(Int iALFEncodePassReduction)
    189808{
    190809  if(iALFEncodePassReduction)
    191810  {
     811    Int iNumOfBuffer = m_iGOPSize +1;
     812
    192813    for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    193814    {
    194       m_aiFilterCoeffSavedMethods[i] = new Int**[9];
    195       for(Int j=0; j< 9; j++)
    196       {
     815      m_mergeTableSavedMethods[i] = new Int*[iNumOfBuffer];
     816      m_aiFilterCoeffSavedMethods[i] = new Int**[iNumOfBuffer];
     817      for(Int j=0; j< iNumOfBuffer; j++)
     818      {
     819        m_mergeTableSavedMethods[i][j] = new Int[NO_VAR_BINS];
    197820        m_aiFilterCoeffSavedMethods[i][j] = new Int*[NO_VAR_BINS];
    198821        for(Int k=0; k< NO_VAR_BINS; k++)
    199822        {
    200           m_aiFilterCoeffSavedMethods[i][j][k] = new Int[MAX_SQR_FILT_LENGTH];
     823          m_aiFilterCoeffSavedMethods[i][j][k] = new Int[ALF_MAX_NUM_COEF];
    201824        }
    202825      }
    203     }
    204 
    205   }
    206 }
     826      m_iPreviousFilterShapeMethods[i] = new Int[iNumOfBuffer];
     827    }
     828
     829  }
     830}
     831/** destroy ALF global buffers
     832 * This function is used to destroy the filter buffers.
     833 */
    207834
    208835Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers()
     
    212839    for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    213840    {
    214       for(Int j=0; j< 9; j++)
     841      for(Int j=0; j< m_iGOPSize+1; j++)
    215842      {
    216843        for(Int k=0; k< NO_VAR_BINS; k++)
     
    219846        }
    220847        delete[] m_aiFilterCoeffSavedMethods[i][j];
     848        delete[] m_mergeTableSavedMethods[i][j];
    221849      }
    222850      delete[] m_aiFilterCoeffSavedMethods[i];
    223     }
    224 
    225   }
    226 
    227 }
    228 #endif
    229 
     851      delete[] m_iPreviousFilterShapeMethods[i];
     852      delete[] m_mergeTableSavedMethods[i];
     853
     854    }
     855
     856  }
     857
     858}
     859#endif
    230860/**
    231861 \param pcPic           picture (TComPic) pointer
     
    234864Void TEncAdaptiveLoopFilter::startALFEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder )
    235865{
    236   m_pcPic = pcPic;
    237866  m_pcEntropyCoder = pcEntropyCoder;
    238  
    239   m_eSliceType = pcPic->getSlice(0)->getSliceType();
    240   m_iPicNalReferenceIdc = (pcPic->getSlice(0)->isReferenced() ? 1 :0);
    241  
    242   m_uiNumSCUInCU = m_pcPic->getNumPartInCU();
    243  
     867#if !LCU_SYNTAX_ALF
    244868  xInitParam();
     869#endif
    245870  xCreateTmpAlfCtrlFlags();
    246871 
     
    251876  m_pcPicYuvTmp->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
    252877  m_pcPicYuvBest = pcPic->getPicYuvPred();
    253  
     878#if !LCU_SYNTAX_ALF 
    254879  m_pcBestAlfParam = new ALFParam;
    255880  m_pcTempAlfParam = new ALFParam;
    256881  allocALFParam(m_pcBestAlfParam);
    257882  allocALFParam(m_pcTempAlfParam);
    258   m_im_width = iWidth;
    259   m_im_height = iHeight;
    260  
     883  pcPicYuvRecShape0 = new TComPicYuv();
     884  pcPicYuvRecShape0->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
     885 
     886  pcPicYuvRecShape1 = new TComPicYuv();
     887  pcPicYuvRecShape1->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
     888
     889  pcAlfParamShape0 = new ALFParam;
     890  pcAlfParamShape1 = new ALFParam;
     891
     892  allocALFParam(pcAlfParamShape0); 
     893  allocALFParam(pcAlfParamShape1);
     894
    261895  // init qc_filter
    262   initMatrix4D_double(&m_EGlobalSym, NO_TEST_FILT,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
    263   initMatrix3D_double(&m_yGlobalSym, NO_TEST_FILT, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    264   initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    265  
     896  initMatrix4D_double(&m_EGlobalSym, NUM_ALF_FILTER_SHAPE+1,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
     897  initMatrix3D_double(&m_yGlobalSym, NUM_ALF_FILTER_SHAPE+1, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
     898#endif
     899  initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, ALF_MAX_NUM_COEF);
     900#if !LCU_SYNTAX_ALF
    266901  m_pixAcc = (double *) calloc(NO_VAR_BINS, sizeof(double));
    267 #if !MQT_BA_RA
    268   get_mem2Dpel(&m_varImg, m_im_height, m_im_width);
    269 #endif
    270   get_mem2Dpel(&m_maskImg, m_im_height, m_im_width);
    271  
     902#endif
     903  initMatrix_Pel(&m_maskImg, m_img_height, m_img_width);
    272904  initMatrix_double(&m_E_temp, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);//
    273905  m_y_temp = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));//
     
    275907  initMatrix_double(&m_y_merged, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); //
    276908  m_pixAcc_merged = (double *) calloc(NO_VAR_BINS, sizeof(double));//
    277  
    278   m_filterCoeffQuantMod = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));//
    279   m_filterCoeff = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));//
    280   m_filterCoeffQuant = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));//
    281   initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);//
    282   initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);//
    283  
     909  m_filterCoeffQuantMod = (int *) calloc(ALF_MAX_NUM_COEF, sizeof(int));//
     910  m_filterCoeff = (double *) calloc(ALF_MAX_NUM_COEF, sizeof(double));//
     911  m_filterCoeffQuant = (int *) calloc(ALF_MAX_NUM_COEF, sizeof(int));//
     912  initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, ALF_MAX_NUM_COEF);//
     913  initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, ALF_MAX_NUM_COEF);//
     914
     915#if LCU_SYNTAX_ALF
     916  m_tempALFp = new ALFParam(ALF_Y);
     917#else
    284918  m_tempALFp = new ALFParam;
    285919  allocALFParam(m_tempALFp);
    286   m_pcDummyEntropyCoder = m_pcEntropyCoder;
    287 
    288 #if MTK_NONCROSS_INLOOP_FILTER
    289920  if( m_bUseNonCrossALF )
    290921  {
     
    293924  }
    294925#endif
    295 
    296 
    297926}
    298927
    299928Void TEncAdaptiveLoopFilter::endALFEnc()
    300929{
     930#if !LCU_SYNTAX_ALF
    301931  xUninitParam();
     932#endif
    302933  xDestroyTmpAlfCtrlFlags();
    303934 
     
    307938  m_pcPic = NULL;
    308939  m_pcEntropyCoder = NULL;
    309  
     940#if !LCU_SYNTAX_ALF
    310941  freeALFParam(m_pcBestAlfParam);
    311942  freeALFParam(m_pcTempAlfParam);
    312943  delete m_pcBestAlfParam;
    313944  delete m_pcTempAlfParam;
     945
     946  pcPicYuvRecShape0->destroyLuma();
     947  delete pcPicYuvRecShape0;
     948  pcPicYuvRecShape0 = NULL;
     949
     950  pcPicYuvRecShape1->destroyLuma();
     951  delete pcPicYuvRecShape1;
     952  pcPicYuvRecShape1 = NULL;
     953
     954  freeALFParam(pcAlfParamShape0);
     955  freeALFParam(pcAlfParamShape1);
     956
     957  delete pcAlfParamShape0;
     958  delete pcAlfParamShape1;
     959
    314960  // delete qc filters
    315   destroyMatrix4D_double(m_EGlobalSym, NO_TEST_FILT,  NO_VAR_BINS);
    316   destroyMatrix3D_double(m_yGlobalSym, NO_TEST_FILT);
     961  destroyMatrix4D_double(m_EGlobalSym, NUM_ALF_FILTER_SHAPE+1,  NO_VAR_BINS);
     962  destroyMatrix3D_double(m_yGlobalSym, NUM_ALF_FILTER_SHAPE+1);
     963#endif
    317964  destroyMatrix_int(m_filterCoeffSymQuant);
    318  
     965#if !LCU_SYNTAX_ALF 
    319966  free(m_pixAcc);
    320 #if !MQT_BA_RA
    321   free_mem2Dpel(m_varImg);
    322 #endif
    323   free_mem2Dpel(m_maskImg);
    324  
     967#endif
     968  destroyMatrix_Pel(m_maskImg);
    325969  destroyMatrix3D_double(m_E_merged, NO_VAR_BINS);
    326970  destroyMatrix_double(m_y_merged);
     
    336980  destroyMatrix_int(m_FilterCoeffQuantTemp);
    337981 
     982#if LCU_SYNTAX_ALF
     983  delete m_tempALFp;
     984#else
    338985  freeALFParam(m_tempALFp);
    339986  delete m_tempALFp;
    340 
    341 #if MTK_NONCROSS_INLOOP_FILTER
    342987
    343988  if(m_bUseNonCrossALF)
     
    348993  }
    349994#endif
    350 
    351 }
    352 
     995}
     996
     997#if LCU_SYNTAX_ALF
     998
     999/** Assign output ALF parameters
     1000 * \param [in, out] alfParamSet ALF parameter set
     1001 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     1002 */
     1003Void TEncAdaptiveLoopFilter::assignALFEncoderParam(AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     1004{
     1005  //assign CU control parameters
     1006  if(m_bAlfCUCtrlEnabled)
     1007  {
     1008    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1009    {
     1010      (*alfCtrlParam)[s]= m_vBestAlfCUCtrlParam[s];
     1011    }
     1012  }
     1013
     1014  //assign RDO results to alfParamSet
     1015  if(m_alfCoefInSlice)
     1016  {
     1017    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1018    {
     1019      if(!m_pcPic->getValidSlice(s))
     1020      {
     1021        continue;
     1022      }
     1023
     1024      if( m_bestAlfParamSet[s].isEnabled[ALF_Y] || m_bestAlfParamSet[s].isEnabled[ALF_Cb] || m_bestAlfParamSet[s].isEnabled[ALF_Cr])
     1025      {
     1026        m_bestAlfParamSet[s].isEnabled[ALF_Y] = true;
     1027      }
     1028
     1029      copyAlfParamSet(&(alfParamSet[s]), &(m_bestAlfParamSet[s]));
     1030    }
     1031  }
     1032  else
     1033  {
     1034    if( m_bestAlfParamSet->isEnabled[ALF_Y] || m_bestAlfParamSet->isEnabled[ALF_Cb] || m_bestAlfParamSet->isEnabled[ALF_Cr])
     1035    {
     1036      m_bestAlfParamSet->isEnabled[ALF_Y] = true;
     1037    }
     1038
     1039    copyAlfParamSet(alfParamSet, m_bestAlfParamSet);
     1040  }
     1041
     1042  if(m_alfCoefInSlice)
     1043  {
     1044    delete[] m_bestAlfParamSet;
     1045  }
     1046  else
     1047  {
     1048    delete m_bestAlfParamSet;
     1049  }
     1050}
     1051
     1052/** initialize ALF encoder configurations
     1053 * \param [in, out] alfParamSet ALF parameter set
     1054 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     1055 */
     1056Void TEncAdaptiveLoopFilter::initALFEncoderParam(AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     1057{
     1058  //reset BA index map
     1059  memset(&m_varImg[0][0], 0, sizeof(Pel)*(m_img_height*m_img_width));
     1060
     1061  //reset mask
     1062  for(Int y=0; y< m_img_height; y++)
     1063  {
     1064    for(Int x=0; x< m_img_width; x++)
     1065    {
     1066      m_maskImg[y][x] = 1;
     1067    }
     1068  }
     1069  //get last valid slice index
     1070  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1071  {
     1072    if(m_pcPic->getValidSlice(s))
     1073    {
     1074      m_lastSliceIdx = s;
     1075    }
     1076  }
     1077  //reset alf CU control flags
     1078  m_bAlfCUCtrlEnabled = (alfCtrlParam != NULL)?true:false;
     1079  if(m_bAlfCUCtrlEnabled)
     1080  {
     1081    m_vBestAlfCUCtrlParam.resize(m_uiNumSlicesInPic);
     1082    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1083    {
     1084      m_vBestAlfCUCtrlParam[s].reset();
     1085    }
     1086  }
     1087  else
     1088  {
     1089    m_vBestAlfCUCtrlParam.clear();
     1090  }
     1091  //get number slices in each LCU
     1092  if(m_uiNumSlicesInPic == 1 || m_iSGDepth == 0)
     1093  {
     1094    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1095    {
     1096      m_numSlicesDataInOneLCU[n] = 1;
     1097    }
     1098  }
     1099  else
     1100  {
     1101    Int count;
     1102    Int prevSliceID = -1;
     1103
     1104    for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1105    {
     1106      std::vector<NDBFBlockInfo>& vNDBFBlock = *(m_pcPic->getCU(n)->getNDBFilterBlocks());
     1107
     1108      count = 0;
     1109
     1110      for(Int i=0; i< (Int)vNDBFBlock.size(); i++)
     1111      {
     1112        if(vNDBFBlock[i].sliceID != prevSliceID)
     1113        {
     1114          prevSliceID = vNDBFBlock[i].sliceID;
     1115          count++;
     1116        }
     1117      }
     1118
     1119      m_numSlicesDataInOneLCU[n] = count;
     1120    }
     1121  }
     1122  //set redesign number
     1123  if(m_iALFEncodePassReduction)
     1124  {
     1125    m_iALFNumOfRedesign = 0;
     1126  }
     1127  else
     1128  {
     1129    m_iALFNumOfRedesign = ALF_NUM_OF_REDESIGN;
     1130  }
     1131
     1132  //initialize m_bestAlfParamSet
     1133  if(m_alfCoefInSlice)
     1134  {
     1135    m_bestAlfParamSet = new AlfParamSet[m_uiNumSlicesInPic];
     1136    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1137    {
     1138      m_bestAlfParamSet[s].create( alfParamSet[s].numLCUInWidth, alfParamSet[s].numLCUInHeight, alfParamSet[s].numLCU);
     1139    }
     1140  }
     1141  else
     1142  {
     1143    m_bestAlfParamSet = new AlfParamSet;
     1144    m_bestAlfParamSet->create( alfParamSet->numLCUInWidth, alfParamSet->numLCUInHeight, alfParamSet->numLCU);
     1145  }
     1146
     1147}
     1148
     1149/** copy ALF parameter set
     1150 * \param [out] dst destination ALF parameter set
     1151 * \param [in] src source ALF parameter set
     1152 */
     1153Void TEncAdaptiveLoopFilter::copyAlfParamSet(AlfParamSet* dst, AlfParamSet* src)
     1154{
     1155  dst->numLCU = src->numLCU;
     1156  dst->numLCUInWidth = src->numLCUInWidth;
     1157  dst->numLCUInHeight = src->numLCUInHeight;
     1158
     1159  for(Int compIdx =0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     1160  {
     1161    dst->isEnabled[compIdx] = src->isEnabled[compIdx];
     1162    dst->isUniParam[compIdx] = src->isUniParam[compIdx];
     1163
     1164    for(Int n=0; n< src->numLCU; n++)
     1165    {
     1166      dst->alfUnitParam[compIdx][n].isEnabled = src->alfUnitParam[compIdx][n].isEnabled;
     1167      dst->alfUnitParam[compIdx][n].isNewFilt = src->alfUnitParam[compIdx][n].isNewFilt;
     1168      dst->alfUnitParam[compIdx][n].mergeType = src->alfUnitParam[compIdx][n].mergeType;
     1169      dst->alfUnitParam[compIdx][n].storedFiltIdx = src->alfUnitParam[compIdx][n].storedFiltIdx;
     1170      *(dst->alfUnitParam[compIdx][n].alfFiltParam) = *(src->alfUnitParam[compIdx][n].alfFiltParam);
     1171    }
     1172  }
     1173}
     1174
     1175
     1176/** ALF encoding process top function
     1177 * \param [in, out] alfParamSet ALF parameter set
     1178 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     1179 * \param [in] dLambdaLuma lambda value for luma RDO
     1180 * \param [in] dLambdaChroma lambda value for chroma RDO
     1181 */
     1182#if ALF_CHROMA_LAMBDA
     1183#if HHI_INTERVIEW_SKIP
     1184Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambdaLuma, Double lambdaChroma, Bool bInterviewSkip)
     1185#else
     1186Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambdaLuma, Double lambdaChroma)
     1187#endif
     1188#else
     1189#if HHI_INTERVIEW_SKIP
     1190#else
     1191Void TEncAdaptiveLoopFilter::ALFProcess( AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCtrlParam, Double lambda)
     1192#endif
     1193#endif
     1194{
     1195#if ALF_CHROMA_LAMBDA
     1196  m_dLambdaLuma   = lambdaLuma;
     1197  m_dLambdaChroma = lambdaChroma;
     1198#else
     1199  m_dLambdaLuma   = lambda;
     1200  m_dLambdaChroma = lambda;
     1201#endif
     1202  TComPicYuv* yuvOrg    = m_pcPic->getPicYuvOrg();
     1203  TComPicYuv* yuvRec    = m_pcPic->getPicYuvRec();
     1204  TComPicYuv* yuvExtRec = m_pcTempPicYuv;
     1205#if HHI_INTERVIEW_SKIP
     1206  TComPicYuv* pUsedPelMap = NULL;
     1207  if( bInterviewSkip )
     1208  {
     1209    pUsedPelMap = m_pcPic->getUsedPelsMap();
     1210  }
     1211#endif
     1212
     1213  //picture boundary padding
     1214  yuvRec->copyToPic(yuvExtRec);
     1215  yuvExtRec->setBorderExtension( false );
     1216  yuvExtRec->extendPicBorder   ();
     1217
     1218  //initialize encoder parameters
     1219  initALFEncoderParam(alfParamSet, alfCtrlParam);
     1220
     1221  //get LCU statistics
     1222  getStatistics(yuvOrg, yuvExtRec);
     1223
     1224  //decide ALF parameters
     1225#if HHI_INTERVIEW_SKIP
     1226  decideParameters(yuvOrg, yuvExtRec, yuvRec, pUsedPelMap, m_bestAlfParamSet, alfCtrlParam);
     1227#else
     1228  decideParameters(yuvOrg, yuvExtRec, yuvRec, m_bestAlfParamSet, alfCtrlParam);
     1229#endif
     1230
     1231  //assign best parameters
     1232  assignALFEncoderParam(alfParamSet, alfCtrlParam);
     1233}
     1234
     1235/** Check if the current LCU can be merged with neighboring LCU
     1236 * \param [in] compIdx luma/chroma component index
     1237 * \param [out] alfUnitPic ALF unit parameters for all LCUs in picture
     1238 */
     1239Void TEncAdaptiveLoopFilter::checkMerge(Int compIdx, AlfUnitParam* alfUnitPic)
     1240{
     1241  AlfUnitParam *alfUnitLeft, *alfUnitUp;
     1242
     1243  for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1244  {
     1245    Int lcuPosX = (Int)(n % m_numLCUInPicWidth);
     1246    Int lcuPosY = (Int)(n / m_numLCUInPicWidth);
     1247
     1248    AlfUnitParam& alfUnitCur = alfUnitPic[n];
     1249
     1250    //check merge left
     1251    if( lcuPosX != 0)
     1252    {
     1253      alfUnitLeft = &(alfUnitPic[n - 1]);
     1254      if(alfUnitCur == *alfUnitLeft)
     1255      {
     1256        alfUnitCur.mergeType = ALF_MERGE_LEFT;
     1257        alfUnitCur.isEnabled = alfUnitLeft->isEnabled;
     1258        alfUnitCur.isNewFilt = alfUnitLeft->isNewFilt;
     1259        alfUnitCur.storedFiltIdx = alfUnitLeft->storedFiltIdx;
     1260        *(alfUnitCur.alfFiltParam) = *(alfUnitLeft->alfFiltParam);
     1261        continue;
     1262      }
     1263    }
     1264
     1265    //check merge up
     1266    if(lcuPosY !=0 )
     1267    {
     1268      alfUnitUp = &(alfUnitPic[n - m_numLCUInPicWidth]);
     1269      if(alfUnitCur == *alfUnitUp)
     1270      {
     1271        alfUnitCur.mergeType = ALF_MERGE_UP;
     1272        alfUnitCur.isEnabled = alfUnitUp->isEnabled;
     1273        alfUnitCur.isNewFilt = alfUnitUp->isNewFilt;
     1274        alfUnitCur.storedFiltIdx = alfUnitUp->storedFiltIdx;
     1275        *(alfUnitCur.alfFiltParam) = *(alfUnitUp->alfFiltParam);
     1276        continue;
     1277      }
     1278    }
     1279  }
     1280
     1281}
     1282
     1283/** Transfer ALF unit parameters for LCUs to to-be-coded ALF parameter set
     1284 * \param [in] compIdx luma/chroma component index
     1285 * \param [in] alfUnitPic ALF unit parameters for all LCUs in picture
     1286 * \param [out] alfParamSet to-be-coded ALF parameter set
     1287 */
     1288Void TEncAdaptiveLoopFilter::transferToAlfParamSet(Int compIdx, AlfUnitParam* alfUnitPic, AlfParamSet* & alfParamSet)
     1289{
     1290
     1291  Int countFiltOffLCU = 0, countNewFilts = 0;
     1292
     1293  AlfUnitParam* alfUnitParams = alfParamSet->alfUnitParam[compIdx];
     1294  for(Int n=0; n< m_uiNumCUsInFrame; n++)
     1295  {
     1296    alfUnitParams[n] = alfUnitPic[n];
     1297
     1298
     1299    if(alfUnitParams[n].alfFiltParam->alf_flag == 0)
     1300    {
     1301      countFiltOffLCU++;
     1302    }
     1303    else
     1304    {
     1305      Bool isNewFiltInSlice =   (alfUnitParams[n].mergeType == ALF_MERGE_DISABLED && alfUnitParams[n].isEnabled && alfUnitParams[n].isNewFilt);
     1306      if( isNewFiltInSlice )
     1307      {
     1308        countNewFilts++;
     1309      }
     1310    }
     1311  }
     1312
     1313  //slice-level parameters
     1314  AlfUnitParam* firstAlfUnitInSlice = &(alfUnitParams[0]);
     1315  if( countFiltOffLCU == m_uiNumCUsInFrame ) //number of filter-off LCU is equal to the number of LCUs in slice
     1316  {
     1317    alfParamSet->isEnabled [compIdx] = false;   
     1318    alfParamSet->isUniParam[compIdx] = true; //uni-param, all off
     1319    assert(firstAlfUnitInSlice->alfFiltParam->alf_flag == 0);
     1320  }
     1321  else
     1322  {
     1323    alfParamSet->isEnabled[compIdx] = true;
     1324    if( countNewFilts == 1 && firstAlfUnitInSlice->alfFiltParam->alf_flag != 0 && countFiltOffLCU == 0)
     1325    {
     1326      alfParamSet->isUniParam[compIdx] = true;
     1327    }
     1328    else
     1329    {
     1330      alfParamSet->isUniParam[compIdx] = false;
     1331    }
     1332  }
     1333
     1334}
     1335
     1336/** Disable all ALF unit parameters in current component
     1337 * \param [in] compIdx luma/chroma component index
     1338 * \param [out] alfParamSet to-be-coded ALF parameter set
     1339 * \param [in] alfUnitPic ALF unit parameters for all LCUs in picture
     1340 */
     1341Void TEncAdaptiveLoopFilter::disableComponentAlfParam(Int compIdx, AlfParamSet* alfParamSet, AlfUnitParam* alfUnitPic)
     1342{
     1343  alfParamSet->isEnabled [compIdx] = false;
     1344  alfParamSet->isUniParam[compIdx] = true; //all off
     1345
     1346  for(Int lcuPos = 0; lcuPos < m_uiNumCUsInFrame; lcuPos++)
     1347  {
     1348    AlfUnitParam& alfunitParam = alfUnitPic[lcuPos];
     1349
     1350    alfunitParam.mergeType = ALF_MERGE_DISABLED;
     1351    alfunitParam.isEnabled = false;
     1352    alfunitParam.isNewFilt = false;
     1353    alfunitParam.storedFiltIdx = -1;
     1354    alfunitParam.alfFiltParam->alf_flag = 0;
     1355  }
     1356
     1357  //check merge-up and merge-left
     1358  checkMerge(compIdx, alfUnitPic);
     1359
     1360  //transfer to AlfParamSet
     1361  transferToAlfParamSet(compIdx, alfUnitPic, alfParamSet);
     1362
     1363}
     1364
     1365/** Picture-based encoding
     1366 * \param [out] alfParamSet to-be-coded ALF parameter set
     1367 * \param [in, out] alfPicQTPart picture quad-tree partition
     1368 * \param [in] compIdx luma/chroma component index
     1369 * \param [in] pOrg picture buffer for original picture
     1370 * \param [in] pDec picture buffer for un-filtered picture
     1371 * \param [out] pRest picture buffer for filtered picture
     1372 * \param [in] stride stride size for 1-D picture memory
     1373 * \param [in, out] alfCorrLCUs correlation values for LCUs
     1374 */
     1375#if HHI_INTERVIEW_SKIP
     1376Void TEncAdaptiveLoopFilter::executePicBasedModeDecision(AlfParamSet* alfParamSet
     1377                                                        , AlfPicQTPart* alfPicQTPart
     1378                                                        , Int compIdx
     1379                                                        , Pel* pOrg, Pel* pDec, Pel* pRest, Pel* pUsed, Int stride, Int formatShift
     1380                                                        , AlfCorrData** alfCorrLCUs
     1381                                                        )
     1382#else
     1383Void TEncAdaptiveLoopFilter::executePicBasedModeDecision(AlfParamSet* alfParamSet
     1384                                                        , AlfPicQTPart* alfPicQTPart
     1385                                                        , Int compIdx
     1386                                                        , Pel* pOrg, Pel* pDec, Pel* pRest, Int stride, Int formatShift
     1387                                                        , AlfCorrData** alfCorrLCUs
     1388                                                        )
     1389#endif
     1390{
     1391  if(compIdx != ALF_Y)
     1392  {
     1393    if(!alfParamSet->isEnabled[ALF_Y])
     1394    {
     1395      disableComponentAlfParam(compIdx, alfParamSet, m_alfPicFiltUnits[compIdx]);
     1396      return;
     1397    }
     1398  }
     1399
     1400  Int picWidth = (m_img_width >> formatShift);
     1401  Int picHeight= (m_img_height >> formatShift);
     1402
     1403  Int64  minDist = 0;
     1404  Int64  minRate = 0;
     1405  Double minCost = 0;
     1406
     1407  decideQTPartition(alfPicQTPart, alfCorrLCUs, 0, 0, minCost, minDist, minRate);
     1408
     1409  //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1410  patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[compIdx]);
     1411
     1412  //check merge-up and merge-left
     1413  checkMerge(compIdx, m_alfPicFiltUnits[compIdx]);
     1414
     1415  //transfer to AlfParamSet
     1416  transferToAlfParamSet(compIdx, m_alfPicFiltUnits[compIdx], alfParamSet);
     1417
     1418  //reconstruction
     1419  recALF(compIdx, m_alfFiltInfo[compIdx], pDec, pRest, stride, formatShift, NULL, false);
     1420
     1421  Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma);
     1422
     1423
     1424  std::vector<AlfCUCtrlInfo> alfCUCtrlParamTemp(m_vBestAlfCUCtrlParam);
     1425  minRate = calculateAlfParamSetRateRDO(compIdx, alfParamSet, &alfCUCtrlParamTemp);
     1426#if HHI_INTERVIEW_SKIP
     1427  minDist = xCalcSSD(pOrg, pRest, pUsed, picWidth, picHeight, stride);
     1428#else
     1429  minDist = xCalcSSD(pOrg, pRest, picWidth, picHeight, stride);
     1430#endif
     1431  minCost = (Double)minDist + lambda*((Double)minRate);
     1432
     1433  //block on/off control
     1434  if(compIdx == ALF_Y && m_bAlfCUCtrlEnabled)
     1435  {
     1436#if HHI_INTERVIEW_SKIP
     1437    decideBlockControl(pOrg, pDec, pRest, pUsed, stride, alfPicQTPart, alfParamSet, minRate, minDist, minCost);
     1438#else
     1439    decideBlockControl(pOrg, pDec, pRest, stride, alfPicQTPart, alfParamSet, minRate, minDist, minCost); 
     1440#endif
     1441  }
     1442
     1443  //get filter-off distortion, rate, cost
     1444  AlfParamSet alfParamSetOff;
     1445  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1446  {
     1447    alfCUCtrlParamTemp[s].reset();
     1448  }
     1449  alfParamSetOff.isEnabled[compIdx] = false;
     1450  alfParamSetOff.isUniParam[compIdx] = true;
     1451#if HHI_INTERVIEW_SKIP
     1452  Int64  offDist = xCalcSSD(pOrg, pDec, pUsed, picWidth, picHeight, stride);
     1453#else
     1454  Int64  offDist = xCalcSSD(pOrg, pDec, picWidth, picHeight, stride);
     1455#endif
     1456  Int64  offRate = calculateAlfParamSetRateRDO(compIdx, &alfParamSetOff, &alfCUCtrlParamTemp);
     1457  Double offCost = (Double)offDist + lambda*((Double)offRate);
     1458
     1459  if(offCost < minCost  )
     1460  {
     1461    //revert to filter-off results
     1462    Pel* pelSrc = pDec;
     1463    Pel* pelDst = pRest;
     1464    for(Int y=0; y< picHeight; y++)
     1465    {
     1466      ::memcpy(pelDst, pelSrc, sizeof(Pel)*picWidth);
     1467      pelSrc += stride;
     1468      pelDst += stride;
     1469    }
     1470
     1471    alfParamSet->isEnabled[compIdx] = false;
     1472    alfParamSet->isUniParam[compIdx] = true; //all filter-off
     1473  }
     1474
     1475}
     1476
     1477/** copy picture quadtree infromation
     1478 * \param [out] alfPicQTPartDest destination part in picture quad tree
     1479 * \param [in ] alfPicQTPartSrc source part in picture quad tree
     1480 */
     1481Void TEncAdaptiveLoopFilter::copyPicQT(AlfPicQTPart* alfPicQTPartDest, AlfPicQTPart* alfPicQTPartSrc)
     1482{
     1483  for (Int i=0; i< m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth]; i++)
     1484  {
     1485    alfPicQTPartDest[i] = alfPicQTPartSrc[i];
     1486  }
     1487}
     1488
     1489/** copy pixel values for one rectangular region
     1490 * \param [out] imgDest destination part in picture quad tree
     1491 * \param [in ] imgSrc source part in picture quad tree
     1492 * \param [in ] stride source part in picture quad tree
     1493 * \param [in ] yPos starting y position
     1494 * \param [in ] height region height
     1495 * \param [in ] xPos starting x position
     1496 * \param [in ] width region width
     1497 */
     1498Void TEncAdaptiveLoopFilter::copyPixelsInOneRegion(Pel* imgDest, Pel* imgSrc, Int stride, Int yPos, Int height, Int xPos, Int width)
     1499{
     1500  Int offset = (yPos*stride) + xPos;
     1501  Pel *imgDestLine = imgDest + offset;
     1502  Pel *imgSrcLine  = imgSrc  + offset;
     1503
     1504  for (Int j=0; j<height; j++)
     1505  {
     1506    ::memcpy(imgDestLine, imgSrcLine, sizeof(Pel)*width);
     1507    imgDestLine += stride;
     1508    imgSrcLine  += stride;
     1509  }
     1510}
     1511
     1512/** Re-design ALF parameters for picture quad-tree partitions
     1513 * \param [out] alfPicQTPart picture quad-tree partition information
     1514 * \param [in ] partIdx partition index
     1515 * \param [in ] partLevel partition level
     1516 */
     1517Void TEncAdaptiveLoopFilter::reDesignQT(AlfPicQTPart *alfPicQTPart, Int partIdx, Int partLevel)
     1518{
     1519  AlfPicQTPart *alfPicQTOnePart = &(alfPicQTPart[partIdx]); 
     1520  Int nextPartLevel = partLevel + 1;
     1521
     1522  if (!alfPicQTOnePart->isSplit)
     1523  {
     1524    if (alfPicQTOnePart->alfUnitParam->alfFiltParam->alf_flag)
     1525    {
     1526      executeModeDecisionOnePart(alfPicQTPart, m_alfCorr[ALF_Y], partIdx, partLevel) ;     
     1527    }
     1528  }
     1529  else
     1530  {
     1531    for (Int i=0; i<4; i++)
     1532    {
     1533      reDesignQT(alfPicQTPart, alfPicQTOnePart->childPartIdx[i], nextPartLevel);
     1534    }
     1535  } 
     1536}
     1537
     1538/** CU-on/off control decision
     1539 * \param [in ] imgOrg picture buffer for original picture
     1540 * \param [in ] imgDec picture buffer for un-filtered picture
     1541 * \param [in ] imgRest picture buffer for filtered picture
     1542 * \param [in ] stride buffer stride size for 1-D picture memory
     1543 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1544 * \param [in, out] alfParamSet ALF parameter set
     1545 * \param [in, out ] minRate minimum rate
     1546 * \param [in, out ] minDist minimum distortion
     1547 * \param [in, out ] minCost minimum RD cost
     1548 */
     1549#if HHI_INTERVIEW_SKIP
     1550Void TEncAdaptiveLoopFilter::decideBlockControl(Pel* imgOrg, Pel* imgDec, Pel* imgRest, Pel* imgUsed, Int stride, AlfPicQTPart* alfPicQTPart, AlfParamSet* & alfParamSet, Int64 &minRate, Int64 &minDist, Double &minCost)
     1551#else
     1552Void TEncAdaptiveLoopFilter::decideBlockControl(Pel* imgOrg, Pel* imgDec, Pel* imgRest, Int stride, AlfPicQTPart* alfPicQTPart, AlfParamSet* & alfParamSet, Int64 &minRate, Int64 &minDist, Double &minCost)
     1553#endif
     1554{
     1555  Int    rate, ctrlDepth;
     1556  Double cost;
     1557  UInt64 dist;
     1558  Bool isChanged = false;
     1559  Pel *imgYtemp = getPicBuf(m_pcPicYuvTmp, ALF_Y);
     1560  Pel *imgYBest = getPicBuf(m_pcPicYuvBest, ALF_Y);
     1561  std::vector<AlfCUCtrlInfo> vAlfCUCtrlParamTemp(m_vBestAlfCUCtrlParam); 
     1562
     1563  AlfPicQTPart *alfPicQTPartNoCtrl = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ];
     1564  AlfPicQTPart *alfPicQTPartBest   = new AlfPicQTPart [ m_alfNumCulPartsLevelTab[m_alfPQTMaxDepth] ];
     1565
     1566  // backup data of PQT without block on/off
     1567  copyPicQT(alfPicQTPartNoCtrl, alfPicQTPart);
     1568
     1569  for (ctrlDepth=0; ctrlDepth<4; ctrlDepth++)
     1570  {       
     1571    // Restore data from PQT without block on/off
     1572    copyPixelsInOneRegion(imgYtemp, imgRest, stride, 0, m_img_height, 0, m_img_width);
     1573    copyPicQT(alfPicQTPart, alfPicQTPartNoCtrl);
     1574
     1575    for (Int reDesignRun=0; reDesignRun <= m_iALFNumOfRedesign; reDesignRun++)
     1576    {
     1577      // re-design filter
     1578      if (reDesignRun > 0)
     1579      {
     1580        // re-gather statistics
     1581        getOneCompStatistics(m_alfCorr[ALF_Y], ALF_Y, imgOrg, imgDec, stride, 0, true);
     1582
     1583        // reDesign in each QT partition
     1584        reDesignQT(alfPicQTPart, 0, 0);
     1585
     1586        //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1587        patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]);
     1588
     1589        //reconstruction
     1590        copyPixelsInOneRegion(imgYtemp, imgDec, stride, 0, m_img_height, 0, m_img_width);
     1591        recALF(ALF_Y, m_alfFiltInfo[ALF_Y], imgDec, imgYtemp, stride, 0, NULL, false);
     1592      }
     1593
     1594      // Gest distortion and decide on/off, Pel should be changed to TComPicYUV
     1595#if HHI_INTERVIEW_SKIP
     1596      setCUAlfCtrlFlags((UInt)ctrlDepth, imgOrg, imgDec, imgYtemp, imgUsed, stride, dist, vAlfCUCtrlParamTemp);
     1597#else
     1598      setCUAlfCtrlFlags((UInt)ctrlDepth, imgOrg, imgDec, imgYtemp, stride, dist, vAlfCUCtrlParamTemp);   
     1599#endif
     1600
     1601      //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1602      patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]);
     1603
     1604      //check merge-up and merge-left
     1605      checkMerge(ALF_Y, m_alfPicFiltUnits[ALF_Y]);
     1606
     1607      //transfer to AlfParamSet
     1608      transferToAlfParamSet(ALF_Y, m_alfPicFiltUnits[ALF_Y], alfParamSet);
     1609
     1610      rate = calculateAlfParamSetRateRDO(ALF_Y, alfParamSet, &vAlfCUCtrlParamTemp);
     1611      cost = (Double)dist + m_dLambdaLuma * ((Double)rate);
     1612
     1613      if (cost < minCost)
     1614      {
     1615        isChanged     = true;
     1616        minCost       = cost;
     1617        minDist       = (Int64) dist;
     1618        minRate       = rate;
     1619
     1620        m_vBestAlfCUCtrlParam = vAlfCUCtrlParamTemp;
     1621        copyPixelsInOneRegion(imgYBest, imgYtemp, stride, 0, m_img_height, 0, m_img_width);
     1622
     1623        copyPicQT(alfPicQTPartBest, alfPicQTPart);
     1624        xCopyTmpAlfCtrlFlagsFrom();
     1625      }
     1626
     1627    }
     1628  }
     1629
     1630  if (isChanged == true)
     1631  {
     1632    copyPicQT(alfPicQTPart, alfPicQTPartBest);
     1633    xCopyTmpAlfCtrlFlagsTo();
     1634
     1635    copyPixelsInOneRegion(imgRest, imgYBest, stride, 0, m_img_height, 0, m_img_width);
     1636    xCopyDecToRestCUs(imgDec, imgRest, stride);
     1637  }
     1638  else
     1639  {
     1640    copyPicQT(alfPicQTPart, alfPicQTPartNoCtrl);
     1641  }
     1642
     1643  //patch quad-tree decision to m_alfPicFiltUnits (m_alfFiltInfo[compIdx])
     1644  patchAlfUnitParams(alfPicQTPart, 0, m_alfPicFiltUnits[ALF_Y]);
     1645
     1646  //check merge-up and merge-left
     1647  checkMerge(ALF_Y, m_alfPicFiltUnits[ALF_Y]);
     1648
     1649  //transfer to AlfParamSet
     1650  transferToAlfParamSet(ALF_Y, m_alfPicFiltUnits[ALF_Y], alfParamSet);
     1651
     1652  delete [] alfPicQTPartNoCtrl;
     1653  alfPicQTPartNoCtrl = NULL;
     1654
     1655  delete [] alfPicQTPartBest;
     1656  alfPicQTPartBest = NULL;
     1657}
     1658
     1659/** Copy ALF unit parameters from quad-tree partition to LCUs
     1660 * \param [in] alfPicQTPart picture quad-tree partition information
     1661 * \param [in] partIdx partition index
     1662 * \param [out] alfUnitPic ALF unit parameters for LCUs
     1663 */
     1664Void TEncAdaptiveLoopFilter::patchAlfUnitParams(AlfPicQTPart* alfPicQTPart, Int partIdx, AlfUnitParam* alfUnitPic)
     1665{
     1666  AlfPicQTPart* alfQTPart = &(alfPicQTPart[partIdx]);
     1667  //Int compIdx = alfQTPart->componentID;
     1668
     1669  if(alfQTPart->isSplit == false)
     1670  {
     1671    AlfUnitParam* alfpartParam = alfQTPart->alfUnitParam;
     1672
     1673    Int lcuPos;
     1674    for(Int lcuPosY = alfQTPart->partCUYS; lcuPosY <= alfQTPart->partCUYE; lcuPosY++)
     1675    {
     1676      for(Int lcuPosX = alfQTPart->partCUXS; lcuPosX <= alfQTPart->partCUXE; lcuPosX++)
     1677      {
     1678        lcuPos = lcuPosY*m_numLCUInPicWidth + lcuPosX;
     1679        AlfUnitParam& alfunitParam = alfUnitPic[lcuPos];
     1680
     1681        alfunitParam.mergeType = alfpartParam->mergeType;
     1682        alfunitParam.isEnabled = alfpartParam->isEnabled;
     1683        alfunitParam.isNewFilt = alfpartParam->isNewFilt;
     1684        alfunitParam.storedFiltIdx = alfpartParam->storedFiltIdx; //not used
     1685        *(alfunitParam.alfFiltParam) = *(alfpartParam->alfFiltParam);
     1686      }
     1687    }
     1688  }
     1689  else
     1690  {
     1691    for(Int i=0; i< 4; i++)
     1692    {
     1693      patchAlfUnitParams(alfPicQTPart, alfQTPart->childPartIdx[i], alfUnitPic);     
     1694    }
     1695  }
     1696}
     1697
     1698/** Decide picture quad-tree partition
     1699 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1700 * \param [in, out] alfPicLCUCorr correlations for LCUs
     1701 * \param [int] partIdx partition index
     1702 * \param [int] partLevel partition level
     1703 * \param [in, out] cost cost for one partition
     1704 * \param [in, out] dist distortion for one partition
     1705 * \param [in, out] rate bitrate for one partition
     1706 */
     1707Void TEncAdaptiveLoopFilter::decideQTPartition(AlfPicQTPart* alfPicQTPart, AlfCorrData** alfPicLCUCorr, Int partIdx, Int partLevel, Double &cost, Int64 &dist, Int64 &rate)
     1708{
     1709  AlfPicQTPart* alfPicQTOnePart = &(alfPicQTPart[partIdx]);
     1710  Int nextPartLevel = partLevel + 1;
     1711  Int childPartIdx;
     1712  Double splitCost = 0;
     1713  Int64  splitRate = 0;
     1714  Int64  splitDist = 0; 
     1715
     1716  if (!alfPicQTOnePart->isProcessed)
     1717  {
     1718    executeModeDecisionOnePart(alfPicQTPart, alfPicLCUCorr, partIdx, partLevel);
     1719
     1720    alfPicQTOnePart->isProcessed = true;
     1721  }
     1722
     1723  if (!alfPicQTOnePart->isBottomLevel)
     1724  {   
     1725    for (Int i=0; i<4; i++)
     1726    {     
     1727      childPartIdx = alfPicQTOnePart->childPartIdx[i];
     1728      decideQTPartition(alfPicQTPart, alfPicLCUCorr, childPartIdx, nextPartLevel, splitCost, splitDist, splitRate);     
     1729    }
     1730
     1731    alfPicQTOnePart->splitMinCost = splitCost;
     1732    alfPicQTOnePart->splitMinDist = splitDist;
     1733    alfPicQTOnePart->splitMinRate = splitRate;
     1734
     1735    if (alfPicQTOnePart->splitMinCost < alfPicQTOnePart->selfMinCost)
     1736    {
     1737      alfPicQTOnePart->isSplit = true;
     1738    }
     1739    else
     1740    {
     1741      alfPicQTOnePart->isSplit = false;
     1742    }
     1743  }
     1744  else
     1745  {
     1746    alfPicQTOnePart->isSplit = false;
     1747    alfPicQTOnePart->splitMinCost = alfPicQTOnePart->selfMinCost;
     1748    alfPicQTOnePart->splitMinDist = alfPicQTOnePart->selfMinDist;
     1749    alfPicQTOnePart->splitMinRate = alfPicQTOnePart->selfMinRate;
     1750  }
     1751
     1752  if (alfPicQTOnePart->isSplit)
     1753  {
     1754    cost += alfPicQTOnePart->splitMinCost;
     1755    rate += alfPicQTOnePart->splitMinRate;
     1756    dist += alfPicQTOnePart->splitMinDist;
     1757  }
     1758  else
     1759  {
     1760    cost += alfPicQTOnePart->selfMinCost;
     1761    rate += alfPicQTOnePart->selfMinRate;
     1762    dist += alfPicQTOnePart->selfMinDist;
     1763  }
     1764
     1765}
     1766
     1767/** Mode decision process for one picture quad-tree partition
     1768 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1769 * \param [in, out] alfPicLCUCorr correlations for LCUs
     1770 * \param [int] partIdx partition index
     1771 * \param [int] partLevel partition level
     1772 */
     1773Void TEncAdaptiveLoopFilter::executeModeDecisionOnePart(AlfPicQTPart *alfPicQTPart, AlfCorrData** alfPicLCUCorr, Int partIdx, Int partLevel)
     1774{
     1775  AlfPicQTPart* alfQTPart = &(alfPicQTPart[partIdx]);
     1776  Int compIdx = alfQTPart->componentID;
     1777  Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma);
     1778
     1779  //gather correlations
     1780  alfQTPart->alfCorr->reset();
     1781  for(Int lcuPosY = alfQTPart->partCUYS; lcuPosY <= alfQTPart->partCUYE; lcuPosY++)
     1782  {
     1783    for(Int lcuPosX = alfQTPart->partCUXS; lcuPosX <= alfQTPart->partCUXE; lcuPosX++)
     1784    {
     1785      *(alfQTPart->alfCorr) +=  *(alfPicLCUCorr[lcuPosY*m_numLCUInPicWidth + lcuPosX]);
     1786    }
     1787  }
     1788
     1789  //test filter on
     1790  AlfUnitParam* alfPartUnitParam = alfQTPart->alfUnitParam;
     1791  alfPartUnitParam->mergeType = ALF_MERGE_DISABLED;
     1792  alfPartUnitParam->isEnabled = true;
     1793  alfPartUnitParam->isNewFilt = true;
     1794  alfPartUnitParam->storedFiltIdx = -1;
     1795  alfPartUnitParam->alfFiltParam->alf_flag = 1;
     1796  deriveFilterInfo(compIdx, alfQTPart->alfCorr, alfPartUnitParam->alfFiltParam, alfQTPart->numFilterBudget);
     1797
     1798  alfQTPart->selfMinDist = estimateFilterDistortion(compIdx, alfQTPart->alfCorr, m_filterCoeffSym, alfPartUnitParam->alfFiltParam->filters_per_group, m_varIndTab);
     1799  alfQTPart->selfMinRate = calculateAlfUnitRateRDO(alfPartUnitParam);
     1800  alfQTPart->selfMinCost = (Double)(alfQTPart->selfMinDist) + lambda*((Double)(alfQTPart->selfMinRate));
     1801 
     1802  alfQTPart->selfMinCost +=  ((lambda* 1.5)* ((Double)( (alfQTPart->partCUYE - alfQTPart->partCUYS+ 1)*(alfQTPart->partCUXE - alfQTPart->partCUXS +1) )));  //RDCO
     1803 
     1804
     1805  //test filter off
     1806  AlfUnitParam alfUnitParamTemp(*(alfQTPart->alfUnitParam));
     1807  alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED;
     1808  alfUnitParamTemp.isEnabled = false;
     1809  Int64  dist = estimateFilterDistortion(compIdx, alfQTPart->alfCorr);
     1810  Int64  rate = calculateAlfUnitRateRDO(&alfUnitParamTemp);
     1811  Double cost = (Double)dist + lambda*((Double)rate);
     1812  if(cost < alfQTPart->selfMinCost)
     1813  {
     1814    alfQTPart->selfMinCost = cost;
     1815    alfQTPart->selfMinDist = dist;
     1816    alfQTPart->selfMinRate = rate;
     1817    *(alfQTPart->alfUnitParam) = alfUnitParamTemp;
     1818
     1819    alfQTPart->alfUnitParam->alfFiltParam->alf_flag = 0;
     1820  }
     1821
     1822}
     1823
     1824/** Derive filter coefficients
     1825 * \param [in, out] alfPicQTPart picture quad-tree partition information
     1826 * \param [in, out] alfPicLCUCorr correlations for LCUs
     1827 * \param [int] partIdx partition index
     1828 * \param [int] partLevel partition level
     1829 */
     1830Void TEncAdaptiveLoopFilter::deriveFilterInfo(Int compIdx, AlfCorrData* alfCorr, ALFParam* alfFiltParam, Int maxNumFilters)
     1831{
     1832  const Int filtNo = 0;
     1833  const Int numCoeff = ALF_MAX_NUM_COEF;
     1834
     1835  switch(compIdx)
     1836  {
     1837  case ALF_Y:
     1838    {       
     1839      Int lambdaForMerge = ((Int) m_dLambdaLuma) * (1<<(2*g_uiBitIncrement));
     1840      Int numFilters;
     1841
     1842      ::memset(m_varIndTab, 0, sizeof(Int)*NO_VAR_BINS);
     1843
     1844      xfindBestFilterVarPred(alfCorr->yCorr, alfCorr->ECorr, alfCorr->pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &numFilters, m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambdaForMerge, maxNumFilters);
     1845      xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, numFilters, alfFiltParam);
     1846    }
     1847    break;
     1848  case ALF_Cb:
     1849  case ALF_Cr:
     1850    {
     1851      static Double coef[ALF_MAX_NUM_COEF];
     1852
     1853      alfFiltParam->filters_per_group = 1;
     1854
     1855      gnsSolveByChol(alfCorr->ECorr[0], alfCorr->yCorr[0], coef, numCoeff);
     1856      xQuantFilterCoef(coef, m_filterCoeffSym[0], filtNo, g_uiBitDepth + g_uiBitIncrement);
     1857      ::memcpy(alfFiltParam->coeffmulti[0], m_filterCoeffSym[0], sizeof(Int)*numCoeff);
     1858      predictALFCoeffChroma(alfFiltParam->coeffmulti[0]);
     1859    }
     1860    break;
     1861  default:
     1862    {
     1863      printf("Not a legal component ID\n");
     1864      assert(0);
     1865      exit(-1);
     1866    }
     1867  }
     1868
     1869
     1870}
     1871
     1872/** Estimate rate-distortion cost for ALF parameter set
     1873 * \param [in] compIdx luma/chroma component index
     1874 * \param [in] alfParamSet ALF parameter set
     1875 * \param [in] alfCUCtrlParam CU-on/off control parameters
     1876 */
     1877Int TEncAdaptiveLoopFilter::calculateAlfParamSetRateRDO(Int compIdx, AlfParamSet* alfParamSet, std::vector<AlfCUCtrlInfo>* alfCUCtrlParam)
     1878{
     1879  Int rate = 0;
     1880
     1881  m_pcEntropyCoder->resetEntropy();
     1882  m_pcEntropyCoder->resetBits();
     1883
     1884
     1885  m_pcEntropyCoder->encodeAlfParamSet(alfParamSet, m_numLCUInPicWidth, m_uiNumCUsInFrame, 0, true, compIdx, compIdx);
     1886
     1887  if(m_bAlfCUCtrlEnabled)
     1888  {
     1889    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     1890    {
     1891      m_pcEntropyCoder->encodeAlfCtrlParam( (*alfCUCtrlParam)[s], m_uiNumCUsInFrame);     
     1892    }
     1893  }
     1894
     1895  rate = m_pcEntropyCoder->getNumberOfWrittenBits();
     1896
     1897  return rate;
     1898}
     1899
     1900/** Estimate rate-distortion cost for ALF unit parameters
     1901 * \param [in] alfUnitParam ALF unit parameters
     1902 * \param [in] numStoredFilters number of stored filter (set)
     1903 */
     1904Int TEncAdaptiveLoopFilter::calculateAlfUnitRateRDO(AlfUnitParam* alfUnitParam, Int numStoredFilters)
     1905{
     1906  Int rate = 0;
     1907
     1908  if(alfUnitParam->mergeType != ALF_MERGE_LEFT)
     1909  {
     1910    m_pcEntropyCoder->resetEntropy();
     1911    m_pcEntropyCoder->resetBits();
     1912
     1913    m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->mergeType == ALF_MERGE_UP)?1:0);
     1914
     1915    if(alfUnitParam->mergeType != ALF_MERGE_UP)
     1916    {
     1917      m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->isEnabled)?1:0);
     1918
     1919      if(alfUnitParam->isEnabled)
     1920      {
     1921        if(numStoredFilters > 0)
     1922        {
     1923          m_pcEntropyCoder->encodeAlfFlag( (alfUnitParam->isNewFilt)?1:0);
     1924        }
     1925
     1926        if(!(alfUnitParam->isNewFilt) && numStoredFilters > 0)
     1927        {
     1928          m_pcEntropyCoder->encodeAlfStoredFilterSetIdx(alfUnitParam->storedFiltIdx, numStoredFilters);
     1929        }
     1930        else
     1931        {
     1932          m_pcEntropyCoder->encodeAlfParam(alfUnitParam->alfFiltParam);
     1933        }
     1934
     1935      }
     1936    }
     1937    rate = m_pcEntropyCoder->getNumberOfWrittenBits();
     1938  }
     1939  return rate;
     1940}
     1941
     1942/** Estimate filtering distortion
     1943 * \param [in] compIdx luma/chroma component index
     1944 * \param [in] alfCorr correlations
     1945 * \param [in] coeffSet filter coefficients
     1946 * \param [in] filterSetSize number of filter set
     1947 * \param [in] mergeTable merge table of filter set (only for luma BA)
     1948 * \param [in] doPixAccMerge calculate pixel squared value (true) or not (false)
     1949 */
     1950Int64 TEncAdaptiveLoopFilter::estimateFilterDistortion(Int compIdx, AlfCorrData* alfCorr, Int** coeffSet, Int filterSetSize, Int* mergeTable, Bool doPixAccMerge)
     1951{
     1952  const Int numCoeff = (Int)ALF_MAX_NUM_COEF;
     1953  AlfCorrData* alfMerged = m_alfCorrMerged[compIdx];
     1954
     1955  alfMerged->mergeFrom(*alfCorr, mergeTable, doPixAccMerge);
     1956
     1957  Int**     coeff = (coeffSet == NULL)?(m_coeffNoFilter):(coeffSet);
     1958  Int64     iDist = 0;
     1959  for(Int f=0; f< filterSetSize; f++)
     1960  {
     1961    iDist += xFastFiltDistEstimation(alfMerged->ECorr[f], alfMerged->yCorr[f], coeff[f], numCoeff);
     1962  }
     1963  return iDist;
     1964}
     1965
     1966/** Mode decision for ALF unit in LCU-based encoding
     1967 * \param [in] compIdx luma/chroma component index
     1968 * \param [in] alfUnitPic ALF unit parmeters for LCUs in picture
     1969 * \param [in] lcuIdx LCU index (order) in slice
     1970 * \param [in] lcuPos LCU position in picture
     1971 * \param [in] numLCUWidth number of width in LCU
     1972 * \param [in, out] alfUnitParams ALF unit parameters for LCUs in slice
     1973 * \param [in] alfCorr correlations
     1974 * \param [in] storedFilters stored-filter buffer
     1975 * \param [in] maxNumFilter constraint for number of filters
     1976 * \param [in] lambda lagrangian multiplier for RDO
     1977 * \param [in] isLeftUnitAvailable left ALF unit available (true) or not (false)
     1978 * \param [in] isUpUnitAvailable upper ALF unit available (true) or not (false)
     1979 */
     1980Void TEncAdaptiveLoopFilter::decideLCUALFUnitParam(Int compIdx, AlfUnitParam* alfUnitPic, Int lcuIdx, Int lcuPos, Int numLCUWidth, AlfUnitParam* alfUnitParams, AlfCorrData* alfCorr, std::vector<ALFParam*>& storedFilters, Int maxNumFilter, Double lambda, Bool isLeftUnitAvailable, Bool isUpUnitAvailable)
     1981{
     1982  Int    numSliceDataInCurrLCU = m_numSlicesDataInOneLCU[lcuPos];
     1983  Int    budgetNumFilters = (Int)(maxNumFilter/numSliceDataInCurrLCU);
     1984  Int    numStoredFilters = (Int)storedFilters.size();
     1985  Double cost, minCost = MAX_DOUBLE;
     1986  Int64  dist;
     1987  Int    rate;
     1988
     1989  AlfUnitParam& alfUnitParamCurr = alfUnitParams[lcuIdx];
     1990
     1991  ///--- new filter mode test ---
     1992  AlfUnitParam alfUnitParamTemp(alfUnitParamCurr);
     1993  alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED;
     1994  alfUnitParamTemp.isEnabled = true;
     1995  alfUnitParamTemp.isNewFilt = true;
     1996  alfUnitParamTemp.storedFiltIdx = -1;
     1997  deriveFilterInfo(compIdx, alfCorr, alfUnitParamTemp.alfFiltParam, budgetNumFilters);
     1998
     1999  dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2000  rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2001  cost = (Double)dist + lambda*((Double)rate);
     2002  if(cost < minCost)
     2003  {
     2004    minCost = cost;
     2005    alfUnitParamCurr = alfUnitParamTemp;
     2006
     2007    alfUnitParamCurr.alfFiltParam->alf_flag = 1;
     2008  }
     2009
     2010  if(numSliceDataInCurrLCU == 1)
     2011  {
     2012    if(numStoredFilters > 0)
     2013    {
     2014      ///--- stored filter mode test ---//
     2015      alfUnitParamTemp = alfUnitParamCurr;
     2016
     2017      alfUnitParamTemp.mergeType = ALF_MERGE_DISABLED;
     2018      alfUnitParamTemp.isEnabled = true;
     2019      alfUnitParamTemp.isNewFilt = false;
     2020
     2021      for(Int i=0; i< numStoredFilters; i++)
     2022      {
     2023        ALFParam* storedALFParam = storedFilters[i];
     2024
     2025        alfUnitParamTemp.storedFiltIdx = i;
     2026        alfUnitParamTemp.alfFiltParam  = storedALFParam;
     2027
     2028        assert(storedALFParam->alf_flag == 1);
     2029
     2030        reconstructCoefInfo(compIdx, storedALFParam, m_filterCoeffSym, m_varIndTab);
     2031
     2032        dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2033        rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2034        cost = (Double)dist + lambda*((Double)rate);
     2035
     2036        if(cost < minCost)
     2037        {
     2038          minCost = cost;
     2039          alfUnitParamCurr = alfUnitParamTemp;
     2040        }
     2041      }
     2042    }
     2043
     2044    /// merge-up test
     2045    if(isUpUnitAvailable)
     2046    {
     2047      Int addrUp = lcuPos - m_numLCUInPicWidth;
     2048      AlfUnitParam& alfUnitParamUp = alfUnitPic[addrUp];
     2049
     2050      if(alfUnitParamUp.alfFiltParam->alf_flag == 1)
     2051      {
     2052        alfUnitParamTemp = alfUnitParamUp;
     2053        alfUnitParamTemp.mergeType    = ALF_MERGE_UP;
     2054
     2055        reconstructCoefInfo(compIdx, alfUnitParamTemp.alfFiltParam, m_filterCoeffSym, m_varIndTab);
     2056        dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2057        rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2058        cost = (Double)dist + lambda*((Double)rate);
     2059
     2060        if(cost < minCost)
     2061        {
     2062          minCost = cost;
     2063
     2064          alfUnitParamCurr = alfUnitParamTemp;
     2065        }
     2066
     2067      }
     2068
     2069    } //upper unit available
     2070
     2071
     2072    /// merge-left test
     2073    if(isLeftUnitAvailable)
     2074    {
     2075      Int addrLeft = lcuPos - 1;
     2076      AlfUnitParam& alfUnitParamLeft = alfUnitPic[addrLeft];
     2077
     2078      if(alfUnitParamLeft.alfFiltParam->alf_flag == 1)
     2079      {
     2080        alfUnitParamTemp = alfUnitParamLeft;
     2081        alfUnitParamTemp.mergeType    = ALF_MERGE_LEFT;
     2082
     2083        reconstructCoefInfo(compIdx, alfUnitParamTemp.alfFiltParam, m_filterCoeffSym, m_varIndTab);
     2084        dist = estimateFilterDistortion(compIdx, alfCorr, m_filterCoeffSym, alfUnitParamTemp.alfFiltParam->filters_per_group, m_varIndTab);
     2085        rate = calculateAlfUnitRateRDO(&alfUnitParamTemp, numStoredFilters);
     2086        cost = (Double)dist + lambda*((Double)rate);
     2087
     2088        if(cost < minCost)
     2089        {
     2090          minCost = cost;
     2091
     2092          alfUnitParamCurr = alfUnitParamTemp;
     2093        }
     2094
     2095      }
     2096
     2097    } //left unit available
     2098
     2099  }
     2100}
     2101
     2102/** Choose the best ALF unit parameters when filter is not enabled.
     2103 * \param [out] alfFiltOffParam ALF unit parameters for filter-off case
     2104 * \param [in] lcuPos LCU position in picture
     2105 * \param [in] alfUnitPic ALF unit parmeters for LCUs in picture
     2106 * \param [in] isLeftUnitAvailable left ALF unit available (true) or not (false)
     2107 * \param [in] isUpUnitAvailable upper ALF unit available (true) or not (false)
     2108 */
     2109Void TEncAdaptiveLoopFilter::getFiltOffAlfUnitParam(AlfUnitParam* alfFiltOffParam, Int lcuPos, AlfUnitParam* alfUnitPic, Bool isLeftUnitAvailable, Bool isUpUnitAvailable)
     2110{
     2111  Int    numSliceDataInCurrLCU = m_numSlicesDataInOneLCU[lcuPos];
     2112
     2113  if(numSliceDataInCurrLCU == 1)
     2114  {
     2115    if(isLeftUnitAvailable)
     2116    {
     2117      Int addrLeft = lcuPos - 1;
     2118      AlfUnitParam& alfUnitParamLeft = alfUnitPic[addrLeft];
     2119
     2120      if(alfUnitParamLeft.alfFiltParam->alf_flag == 0)
     2121      {
     2122        alfFiltOffParam->mergeType    = ALF_MERGE_LEFT;
     2123        alfFiltOffParam->isEnabled    = false;
     2124        alfFiltOffParam->alfFiltParam = alfUnitParamLeft.alfFiltParam;
     2125
     2126        return;
     2127      }
     2128    }
     2129
     2130    if(isUpUnitAvailable)
     2131    {
     2132      Int addrUp = lcuPos - m_numLCUInPicWidth;
     2133      AlfUnitParam& alfUnitParamUp = alfUnitPic[addrUp];
     2134
     2135      if(alfUnitParamUp.alfFiltParam->alf_flag == 0)
     2136      {
     2137        alfFiltOffParam->mergeType    = ALF_MERGE_UP;
     2138        alfFiltOffParam->isEnabled    = false;
     2139        alfFiltOffParam->alfFiltParam = alfUnitParamUp.alfFiltParam;
     2140
     2141        return;
     2142      }
     2143
     2144    }
     2145  }
     2146
     2147
     2148  alfFiltOffParam->mergeType = ALF_MERGE_DISABLED;
     2149  alfFiltOffParam->isEnabled = false;
     2150  alfFiltOffParam->alfFiltParam = alfUnitPic[lcuPos].alfFiltParam;
     2151
     2152  return;
     2153}
     2154
     2155/** Calculate distortion for ALF LCU
     2156 * \param [in] skipLCUBottomLines true for considering skipping bottom LCU lines
     2157 * \param [in] compIdx luma/chroma component index
     2158 * \param [in] alfLCUInfo ALF LCU information
     2159 * \param [in] picSrc source picture buffer
     2160 * \param [in] picCmp to-be-compared picture buffer
     2161 * \param [in] stride buffer stride size for 1-D pictrue memory
     2162 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2163 * \return the distortion
     2164 */
     2165#if HHI_INTERVIEW_SKIP
     2166Int64 TEncAdaptiveLoopFilter::calcAlfLCUDist(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo& alfLCUInfo, Pel* picSrc, Pel* picCmp, Pel* picUsed, Int stride, Int formatShift)
     2167#else
     2168Int64 TEncAdaptiveLoopFilter::calcAlfLCUDist(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo& alfLCUInfo, Pel* picSrc, Pel* picCmp, Int stride, Int formatShift)
     2169#endif
     2170{
     2171  Int64 dist = 0; 
     2172  Int  posOffset, ypos, xpos, height, width;
     2173  Pel* pelCmp;
     2174  Pel* pelSrc;
     2175#if HHI_INTERVIEW_SKIP
     2176  Pel* pelUsed = NULL ;
     2177#endif
     2178#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2179  Int endypos;
     2180  Bool notSkipLinesBelowVB = true;
     2181  Int lcuAddr = alfLCUInfo.pcCU->getAddr();
     2182  if(skipLCUBottomLines)
     2183  {
     2184    if(lcuAddr + m_numLCUInPicWidth < m_uiNumCUsInFrame)
     2185    {
     2186      notSkipLinesBelowVB = false;
     2187    }
     2188  }
     2189#endif
     2190
     2191  switch(compIdx)
     2192  {
     2193  case ALF_Cb:
     2194  case ALF_Cr:
     2195    {
     2196      for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2197      {
     2198        ypos    = (Int)(alfLCUInfo[n].posY   >> formatShift);
     2199        xpos    = (Int)(alfLCUInfo[n].posX   >> formatShift);
     2200        height  = (Int)(alfLCUInfo[n].height >> formatShift);
     2201        width   = (Int)(alfLCUInfo[n].width  >> formatShift);
     2202
     2203#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2204        if(!notSkipLinesBelowVB )
     2205        {
     2206          endypos = ypos+ height -1;
     2207          Int iLineVBPos = m_lcuHeightChroma - 2;
     2208          Int yEndLineInLCU = endypos % m_lcuHeightChroma;
     2209          height = (yEndLineInLCU >= iLineVBPos) ? (height - 2) : height ;
     2210        }
     2211#endif
     2212
     2213        posOffset = (ypos * stride) + xpos;
     2214        pelCmp    = picCmp + posOffset;   
     2215        pelSrc    = picSrc + posOffset;   
     2216
     2217
     2218#if HHI_INTERVIEW_SKIP
     2219        if( picUsed)
     2220        {
     2221          pelUsed   = picUsed+ posOffset;
     2222        }
     2223        dist  += xCalcSSD( pelSrc, pelCmp,  pelUsed, width, height, stride );
     2224#else
     2225        dist  += xCalcSSD( pelSrc, pelCmp,  width, height, stride );
     2226#endif
     2227      }
     2228
     2229    }
     2230    break;
     2231  case ALF_Y:
     2232    {
     2233      for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2234      {
     2235        ypos    = (Int)(alfLCUInfo[n].posY);
     2236        xpos    = (Int)(alfLCUInfo[n].posX);
     2237        height  = (Int)(alfLCUInfo[n].height);
     2238        width   = (Int)(alfLCUInfo[n].width);
     2239
     2240#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2241        if(!notSkipLinesBelowVB)
     2242        {
     2243          endypos = ypos+ height -1;
     2244          Int iLineVBPos = m_lcuHeight - 4;
     2245          Int yEndLineInLCU = endypos % m_lcuHeight;
     2246          height = (yEndLineInLCU >= iLineVBPos) ? (height - 4) : height ;
     2247        }
     2248#endif
     2249
     2250        posOffset = (ypos * stride) + xpos;
     2251        pelCmp    = picCmp + posOffset;   
     2252        pelSrc    = picSrc + posOffset;   
     2253
     2254#if HHI_INTERVIEW_SKIP
     2255        if( picUsed )
     2256        {
     2257          pelUsed   = picUsed+ posOffset;
     2258        }
     2259        dist  += xCalcSSD( pelSrc, pelCmp,  pelUsed, width, height, stride );
     2260#else
     2261        dist  += xCalcSSD( pelSrc, pelCmp,  width, height, stride );
     2262#endif
     2263      }
     2264
     2265    }
     2266    break;
     2267  default:
     2268    {
     2269      printf("not a legal component ID for ALF \n");
     2270      assert(0);
     2271      exit(-1);
     2272    }
     2273  }
     2274
     2275  return dist;
     2276}
     2277
     2278/** Copy one ALF LCU region
     2279 * \param [in] alfLCUInfo ALF LCU information
     2280 * \param [out] picDst to-be-compared picture buffer
     2281 * \param [in] picSrc source picture buffer
     2282 * \param [in] stride buffer stride size for 1-D pictrue memory
     2283 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2284 */
     2285Void TEncAdaptiveLoopFilter::copyOneAlfLCU(AlfLCUInfo& alfLCUInfo, Pel* picDst, Pel* picSrc, Int stride, Int formatShift)
     2286{
     2287  Int posOffset, ypos, xpos, height, width;
     2288  Pel* pelDst;
     2289  Pel* pelSrc;
     2290
     2291  for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2292  {
     2293    ypos    = (Int)(alfLCUInfo[n].posY   >> formatShift);
     2294    xpos    = (Int)(alfLCUInfo[n].posX   >> formatShift);
     2295    height  = (Int)(alfLCUInfo[n].height >> formatShift);
     2296    width   = (Int)(alfLCUInfo[n].width  >> formatShift);
     2297
     2298    posOffset  = ( ypos * stride)+ xpos;
     2299    pelDst   = picDst  + posOffset;   
     2300    pelSrc   = picSrc  + posOffset;   
     2301
     2302    for(Int j=0; j< height; j++)
     2303    {
     2304      ::memcpy(pelDst, pelSrc, sizeof(Pel)*width);
     2305      pelDst += stride;
     2306      pelSrc += stride;
     2307    }
     2308  }
     2309
     2310}
     2311
     2312/** Reconstruct ALF LCU pixels
     2313 * \param [in] compIdx luma/chroma component index
     2314 * \param [in] alfLCUInfo ALF LCU information
     2315 * \param [in] alfUnitParam ALF unit parameters
     2316 * \param [in] picDec picture buffer for un-filtered picture
     2317 * \param [out] picRest picture buffer for reconstructed picture
     2318 * \param [in] stride buffer stride size for 1-D pictrue memory
     2319 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2320 */
     2321Void TEncAdaptiveLoopFilter::reconstructOneAlfLCU(Int compIdx, AlfLCUInfo& alfLCUInfo, AlfUnitParam* alfUnitParam, Pel* picDec, Pel* picRest, Int stride, Int formatShift)
     2322{
     2323  ALFParam* alfParam = alfUnitParam->alfFiltParam;
     2324  Int ypos, xpos, height, width;
     2325
     2326  if( alfUnitParam->isEnabled)
     2327  {
     2328    assert(alfParam->alf_flag == 1);
     2329
     2330    //reconstruct ALF coefficients & related parameters
     2331    reconstructCoefInfo(compIdx, alfParam, m_filterCoeffSym, m_varIndTab);
     2332
     2333    //filtering process
     2334    for(Int n=0; n< alfLCUInfo.numSGU; n++)
     2335    {
     2336      ypos    = (Int)(alfLCUInfo[n].posY   >> formatShift);
     2337      xpos    = (Int)(alfLCUInfo[n].posX   >> formatShift);
     2338      height  = (Int)(alfLCUInfo[n].height >> formatShift);
     2339      width   = (Int)(alfLCUInfo[n].width  >> formatShift);
     2340
     2341      filterOneCompRegion(picRest, picDec, stride, (compIdx!=ALF_Y), ypos, ypos+height, xpos, xpos+width, m_filterCoeffSym, m_varIndTab, m_varImg);
     2342    }
     2343  }
     2344  else
     2345  {
     2346    copyOneAlfLCU(alfLCUInfo, picRest, picDec, stride, formatShift);
     2347  }
     2348}
     2349
     2350/** LCU-based mode decision
     2351 * \param [in, out] alfParamSet ALF parameter set
     2352 * \param [in] compIdx luma/chroma component index
     2353 * \param [in] pOrg picture buffer for original picture
     2354 * \param [in] pDec picture buffer for un-filtered picture
     2355 * \param [out] pRest picture buffer for reconstructed picture
     2356 * \param [in] stride buffer stride size for 1-D pictrue memory
     2357 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2358 * \param [in] alfCorrLCUs correlations for LCUs
     2359 */
     2360#if HHI_INTERVIEW_SKIP
     2361Void TEncAdaptiveLoopFilter::executeLCUBasedModeDecision(AlfParamSet* alfParamSet
     2362                                                        ,Int compIdx, Pel* pOrg, Pel* pDec, Pel* pRest, Pel* pUsed, Int stride, Int formatShift
     2363                                                        ,AlfCorrData** alfCorrLCUs
     2364                                                        )
     2365#else
     2366Void TEncAdaptiveLoopFilter::executeLCUBasedModeDecision(AlfParamSet* alfParamSet
     2367                                                        ,Int compIdx, Pel* pOrg, Pel* pDec, Pel* pRest, Int stride, Int formatShift
     2368                                                        ,AlfCorrData** alfCorrLCUs
     2369                                                        )
     2370#endif
     2371{
     2372  Double lambda = (compIdx == ALF_Y)?(m_dLambdaLuma):(m_dLambdaChroma);
     2373  static Int* isProcessed = NULL;
     2374
     2375  AlfUnitParam* alfUnitPic = m_alfPicFiltUnits[compIdx];
     2376
     2377  Int64  distEnc, distOff;
     2378  Int    rateEnc, rateOff;
     2379  Double costEnc, costOff;
     2380  Bool isLeftUnitAvailable, isUpUnitAvailable;
     2381
     2382  isProcessed = new Int[m_uiNumCUsInFrame];
     2383  ::memset(isProcessed, 0, sizeof(Int)*m_uiNumCUsInFrame);
     2384
     2385#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2386  Int numProcessedLCU = 0;
     2387  m_alfFiltBudgetPerLcu = (Double)(m_iALFMaxNumberFilters) / (Double)(m_uiNumCUsInFrame);
     2388  m_alfUsedFilterNum = 0;
     2389#endif
     2390
     2391  for(Int s=0; s<= m_lastSliceIdx; s++)
     2392  {
     2393    if(!m_pcPic->getValidSlice(s))
     2394    {
     2395      continue;
     2396    }
     2397    Bool isAcrossSlice = (m_alfCoefInSlice)?(!m_isNonCrossSlice):(true);
     2398    Int  numLCUWidth   = alfParamSet[s].numLCUInWidth;
     2399
     2400    AlfUnitParam* alfSliceUnitParams = alfParamSet[s].alfUnitParam[compIdx];
     2401    std::vector<ALFParam*> storedFilters;
     2402    storedFilters.clear(); //reset stored filter buffer at the slice beginning
     2403
     2404    Int u =0; //counter for LCU index in slice
     2405    Int countFiltOffLCU = 0; //counter for number of LCU with filter-off mode
     2406    Int countNewFilts = 0; //counter for number of LCU with new filter inside slice
     2407
     2408    Int numTilesInSlice = (Int)m_pvpSliceTileAlfLCU[s].size();
     2409    for(Int t=0; t< numTilesInSlice; t++)
     2410    {
     2411      std::vector<AlfLCUInfo*> & vpAlfLCU = m_pvpSliceTileAlfLCU[s][t];
     2412      Pel* pSrc = pDec;
     2413
     2414      if(m_bUseNonCrossALF)
     2415      {
     2416        pSrc = getPicBuf(m_pcSliceYuvTmp, compIdx);
     2417        copyRegion(vpAlfLCU, pSrc, pDec, stride, formatShift);
     2418        extendRegionBorder(vpAlfLCU, pSrc, stride, formatShift);
     2419      }
     2420
     2421      Int numLCUs = (Int)vpAlfLCU.size();
     2422      for(Int n=0; n< numLCUs; n++)
     2423      {
     2424        AlfLCUInfo*   alfLCU       = vpAlfLCU[n];                  //ALF LCU information
     2425        TComDataCU*   pcCU         = alfLCU->pcCU;
     2426        Int           addr         = pcCU->getAddr();              //real LCU addr
     2427        AlfUnitParam* alfUnitParam = &(alfSliceUnitParams[u]);
     2428
     2429        if(isProcessed[addr] == 0)
     2430        {
     2431          Int           maxNumFilter = (Int)NO_VAR_BINS;   
     2432
     2433#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2434          Bool          isOutOfFilterBudget = true;
     2435          Double        usedFiltBudget = (numProcessedLCU == 0) ? 0.0 : (Double)m_alfUsedFilterNum / (Double)(numProcessedLCU);
     2436          if ( (m_alfFiltBudgetPerLcu >= usedFiltBudget) && (m_alfUsedFilterNum < m_iALFMaxNumberFilters) )
     2437          {
     2438            isOutOfFilterBudget = false;
     2439            Int leftNumFilt = m_iALFMaxNumberFilters - m_alfUsedFilterNum;
     2440            Int avgNumFilt  = leftNumFilt / (m_uiNumCUsInFrame - numProcessedLCU) + 1 ;
     2441            maxNumFilter = (leftNumFilt < avgNumFilt) ? leftNumFilt : avgNumFilt ;
     2442          }
     2443#endif
     2444
     2445          AlfCorrData*  alfCorr      = alfCorrLCUs[addr];            //ALF LCU correlation
     2446          alfUnitParam->alfFiltParam = alfUnitPic[addr].alfFiltParam;
     2447
     2448          //mode decision
     2449          isLeftUnitAvailable = (   (addr % m_numLCUInPicWidth != 0) && (u != 0));
     2450          isUpUnitAvailable   = (((Int)(addr/m_numLCUInPicWidth) > 0) && ( ( (u - numLCUWidth) >= 0) || isAcrossSlice ));
     2451
     2452          decideLCUALFUnitParam(compIdx, alfUnitPic, u, addr, numLCUWidth, alfSliceUnitParams, alfCorr, storedFilters, maxNumFilter, lambda, isLeftUnitAvailable, isUpUnitAvailable);
     2453          reconstructOneAlfLCU(compIdx, *alfLCU, alfUnitParam, pSrc, pRest, stride, formatShift);
     2454#if HHI_INTERVIEW_SKIP
     2455          distEnc = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pRest, pUsed, stride, formatShift);
     2456#else
     2457          distEnc = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pRest, stride, formatShift);
     2458#endif
     2459          rateEnc = calculateAlfUnitRateRDO(alfUnitParam, (Int)storedFilters.size());
     2460          costEnc = (Double)distEnc + lambda*((Double)rateEnc);
     2461          costEnc += ((lambda* 1.5)*1.0);  //RDCO
     2462
     2463          //v.s. filter off case
     2464          AlfUnitParam alfUnitParamOff;
     2465          getFiltOffAlfUnitParam(&alfUnitParamOff, addr, alfUnitPic, isLeftUnitAvailable, isUpUnitAvailable);
     2466#if HHI_INTERVIEW_SKIP
     2467          distOff = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pSrc, pUsed, stride, formatShift);
     2468#else
     2469          distOff = calcAlfLCUDist(!m_picBasedALFEncode, compIdx, *alfLCU, pOrg, pSrc, stride, formatShift);
     2470#endif
     2471          rateOff = calculateAlfUnitRateRDO(&alfUnitParamOff, (Int)storedFilters.size());
     2472          costOff = (Double)distOff + lambda*((Double)rateOff);
     2473
     2474#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2475          if( (costOff < costEnc)  ||  isOutOfFilterBudget)
     2476#else
     2477          if( costOff < costEnc)
     2478#endif
     2479          {
     2480            //filter off. set alf_flag = 0, copy pDest to pRest
     2481            *alfUnitParam = alfUnitParamOff;
     2482            alfUnitParam->alfFiltParam->alf_flag = 0;
     2483            copyOneAlfLCU(*alfLCU, pRest, pSrc, stride, formatShift);
     2484          }
     2485
     2486          if(alfUnitParam->mergeType == ALF_MERGE_DISABLED)
     2487          {
     2488            if(alfUnitParam->isEnabled)
     2489            {
     2490              if(alfUnitParam->isNewFilt)
     2491              {
     2492                //update stored filter buffer
     2493                storedFilters.push_back(alfUnitParam->alfFiltParam);
     2494                assert(alfUnitParam->alfFiltParam->alf_flag == 1);
     2495              }
     2496            }
     2497          }
     2498
     2499          alfUnitPic[addr] = *alfUnitParam;
     2500
     2501          isProcessed[addr] = 1;
     2502
     2503#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2504          numProcessedLCU++;
     2505          if(alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt)
     2506          {
     2507            m_alfUsedFilterNum += alfUnitParam->alfFiltParam->filters_per_group;
     2508          }
     2509#endif
     2510        }
     2511        else
     2512        {
     2513          //keep the ALF parameters in LCU are the same
     2514          *alfUnitParam = alfUnitPic[addr];
     2515          reconstructOneAlfLCU(compIdx, *alfLCU, alfUnitParam, pSrc, pRest, stride, formatShift);
     2516
     2517#if LCUALF_FILTER_BUDGET_CONTROL_ENC
     2518          if(alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt)
     2519          {
     2520            m_alfUsedFilterNum += alfUnitParam->alfFiltParam->filters_per_group;
     2521          }
     2522#endif
     2523        }
     2524
     2525        if(alfUnitParam->alfFiltParam->alf_flag == 0)
     2526        {
     2527          countFiltOffLCU++;
     2528        }
     2529        else
     2530        {
     2531          Bool isNewFiltInSlice =   (alfUnitParam->mergeType == ALF_MERGE_DISABLED && alfUnitParam->isEnabled && alfUnitParam->isNewFilt);
     2532          Bool isMergeAcrossSlice = ( alfUnitParam->mergeType == ALF_MERGE_UP && (u-numLCUWidth < 0) );
     2533
     2534          if( isNewFiltInSlice || isMergeAcrossSlice )
     2535          {
     2536            countNewFilts++;
     2537          }
     2538        }
     2539
     2540        u++;     
     2541
     2542      } //LCU
     2543    } //tile
     2544
     2545
     2546    //slice-level parameters
     2547    AlfUnitParam* firstAlfUnitInSlice = &(alfSliceUnitParams[0]);
     2548    if( countFiltOffLCU == u ) //number of filter-off LCU is equal to the number of LCUs in slice
     2549    {
     2550      alfParamSet[s].isEnabled [compIdx] = false;   
     2551      alfParamSet[s].isUniParam[compIdx] = true; //uni-param, all off
     2552      assert(firstAlfUnitInSlice->alfFiltParam->alf_flag == 0);
     2553    }
     2554    else
     2555    {
     2556      alfParamSet[s].isEnabled[compIdx] = true;
     2557      if( countNewFilts == 1 && firstAlfUnitInSlice->alfFiltParam->alf_flag != 0 && countFiltOffLCU == 0 )
     2558      {
     2559        alfParamSet[s].isUniParam[compIdx] = true;
     2560      }
     2561      else
     2562      {
     2563        alfParamSet[s].isUniParam[compIdx] = false;
     2564      }
     2565    }
     2566  } //slice
     2567
     2568
     2569  delete[] isProcessed;
     2570  isProcessed = NULL;
     2571}
     2572
     2573
     2574/** Decide ALF parameter set for luma/chroma components (top function)
     2575 * \param [in] pPicOrg picture buffer for original picture
     2576 * \param [in] pPicDec picture buffer for un-filtered picture
     2577 * \param [out] pPicRest picture buffer for reconstructed picture
     2578 * \param [in, out] alfParamSet ALF parameter set
     2579 * \param [in, out] alfCtrlParam ALF CU-on/off control parameters
     2580 */
     2581#if HHI_INTERVIEW_SKIP
     2582Void TEncAdaptiveLoopFilter::decideParameters(TComPicYuv* pPicOrg, TComPicYuv* pPicDec, TComPicYuv* pPicRest, TComPicYuv* pUsedPelMap
     2583                                            , AlfParamSet* alfParamSet
     2584                                            , std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     2585#else
     2586Void TEncAdaptiveLoopFilter::decideParameters(TComPicYuv* pPicOrg, TComPicYuv* pPicDec, TComPicYuv* pPicRest
     2587                                            , AlfParamSet* alfParamSet
     2588                                            , std::vector<AlfCUCtrlInfo>* alfCtrlParam)
     2589#endif
     2590{
     2591  static Int lumaStride        = pPicOrg->getStride();
     2592  static Int chromaStride      = pPicOrg->getCStride();
     2593
     2594  Pel *pOrg, *pDec, *pRest;
     2595  Int stride, formatShift;
     2596#if HHI_INTERVIEW_SKIP
     2597  Pel *pUsed = NULL ;
     2598#endif
     2599
     2600  for(Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     2601  {
     2602    pOrg        = getPicBuf(pPicOrg, compIdx);
     2603    pDec        = getPicBuf(pPicDec, compIdx);
     2604    pRest       = getPicBuf(pPicRest, compIdx);
     2605#if HHI_INTERVIEW_SKIP
     2606    if( pUsedPelMap )
     2607    {
     2608      pUsed        = getPicBuf(pUsedPelMap, compIdx);
     2609    }
     2610#endif
     2611    stride      = (compIdx == ALF_Y)?(lumaStride):(chromaStride);
     2612    formatShift = (compIdx == ALF_Y)?(0):(1);
     2613
     2614    AlfCorrData** alfCorrComp     = m_alfCorr[compIdx];
     2615
     2616    if(!m_picBasedALFEncode) //lcu-based optimization
     2617    {
     2618#if HHI_INTERVIEW_SKIP
     2619      executeLCUBasedModeDecision(alfParamSet, compIdx, pOrg, pDec, pRest, pUsed, stride, formatShift, alfCorrComp);
     2620#else
     2621      executeLCUBasedModeDecision(alfParamSet, compIdx, pOrg, pDec, pRest, stride, formatShift, alfCorrComp);
     2622#endif
     2623    }
     2624    else //picture-based optimization
     2625    {
     2626      AlfPicQTPart* alfPicQTPart = m_alfPQTPart[compIdx];
     2627#if HHI_INTERVIEW_SKIP
     2628      executePicBasedModeDecision(alfParamSet, alfPicQTPart, compIdx, pOrg, pDec, pRest, pUsed, stride, formatShift, alfCorrComp);
     2629#else
     2630      executePicBasedModeDecision(alfParamSet, alfPicQTPart, compIdx, pOrg, pDec, pRest, stride, formatShift, alfCorrComp);
     2631#endif
     2632    } 
     2633
     2634  } //component
     2635
     2636}
     2637
     2638/** Gather correlations for all LCUs in picture
     2639 * \param [in] pPicOrg picture buffer for original picture
     2640 * \param [in] pPicDec picture buffer for un-filtered picture
     2641 */
     2642Void TEncAdaptiveLoopFilter::getStatistics(TComPicYuv* pPicOrg, TComPicYuv* pPicDec)
     2643{
     2644  Int lumaStride   = pPicOrg->getStride();
     2645  Int chromaStride = pPicOrg->getCStride();
     2646  const  Int chromaFormatShift = 1;
     2647
     2648  //calculate BA index
     2649  calcOneRegionVar(m_varImg, getPicBuf(pPicDec, ALF_Y), lumaStride, false, 0, m_img_height, 0, m_img_width);
     2650  for(Int compIdx = 0; compIdx < NUM_ALF_COMPONENT; compIdx++)
     2651  {
     2652    AlfCorrData** alfCorrComp = m_alfCorr[compIdx];
     2653    Int          formatShift = (compIdx == ALF_Y)?(0):(chromaFormatShift);
     2654    Int          stride      = (compIdx == ALF_Y)?(lumaStride):(chromaStride);
     2655
     2656    getOneCompStatistics(alfCorrComp, compIdx, getPicBuf(pPicOrg, compIdx), getPicBuf(pPicDec, compIdx), stride, formatShift, false);
     2657  }
     2658}
     2659
     2660/** Gather correlations for all LCUs of one luma/chroma component in picture
     2661 * \param [out] alfCorrComp correlations for LCUs
     2662 * \param [in] compIdx luma/chroma component index
     2663 * \param [in] imgOrg picture buffer for original picture
     2664 * \param [in] imgDec picture buffer for un-filtered picture
     2665 * \param [in] stride buffer stride size for 1-D pictrue memory
     2666 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2667 * \param [in] isRedesignPhase at re-design filter stage (true) or not (false)
     2668 */
     2669Void TEncAdaptiveLoopFilter::getOneCompStatistics(AlfCorrData** alfCorrComp, Int compIdx, Pel* imgOrg, Pel* imgDec, Int stride, Int formatShift, Bool isRedesignPhase)
     2670{
     2671
     2672  // initialize to zero
     2673  for(Int n=0; n< m_uiNumCUsInFrame; n++)
     2674  {
     2675    alfCorrComp[n]->reset();
     2676  }
     2677
     2678  for(Int s=0; s<= m_lastSliceIdx; s++)
     2679  {
     2680    if(!m_pcPic->getValidSlice(s))
     2681    {
     2682      continue;
     2683    }
     2684    Int numTilesInSlice = (Int)m_pvpSliceTileAlfLCU[s].size();
     2685    for(Int t=0; t< numTilesInSlice; t++)
     2686    {
     2687      std::vector<AlfLCUInfo*> & vpAlfLCU = m_pvpSliceTileAlfLCU[s][t];
     2688      Pel* pSrc = imgDec;
     2689
     2690      if(m_bUseNonCrossALF)
     2691      {
     2692        pSrc = getPicBuf(m_pcSliceYuvTmp, compIdx);
     2693        copyRegion(vpAlfLCU, pSrc, imgDec, stride, formatShift);
     2694        extendRegionBorder(vpAlfLCU, pSrc, stride, formatShift);
     2695      }
     2696
     2697      Int numLCUs = (Int)vpAlfLCU.size();
     2698      for(Int n=0; n< numLCUs; n++)
     2699      {
     2700        AlfLCUInfo* alfLCU = vpAlfLCU[n];
     2701        Int addr = alfLCU->pcCU->getAddr();
     2702        getStatisticsOneLCU(!m_picBasedALFEncode, compIdx, alfLCU, alfCorrComp[addr], imgOrg, pSrc, stride, formatShift, isRedesignPhase);
     2703      } //LCU
     2704    } //tile
     2705  } //slice
     2706
     2707}
     2708
     2709/** Gather correlations for one LCU
     2710 * \param [out] alfCorrComp correlations for LCUs
     2711 * \param [in] compIdx luma/chroma component index
     2712 * \param [in] imgOrg picture buffer for original picture
     2713 * \param [in] imgDec picture buffer for un-filtered picture
     2714 * \param [in] stride buffer stride size for 1-D pictrue memory
     2715 * \param [in] formatShift 0 for luma and 1 for chroma (4:2:0)
     2716 * \param [in] isRedesignPhase at re-design filter stage (true) or not (false)
     2717 */
     2718Void TEncAdaptiveLoopFilter::getStatisticsOneLCU(Bool skipLCUBottomLines, Int compIdx, AlfLCUInfo* alfLCU, AlfCorrData* alfCorr, Pel* pPicOrg, Pel* pPicSrc, Int stride, Int formatShift, Bool isRedesignPhase)
     2719{
     2720  Int numBlocks = alfLCU->numSGU;
     2721#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2722  Int  lcuAddr = alfLCU->pcCU->getAddr();
     2723  Bool notSkipLinesBelowVB = true;
     2724  Int  endypos;
     2725#endif
     2726  Bool isLastBlock;
     2727  Int ypos, xpos, height, width;
     2728
     2729#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2730  if(skipLCUBottomLines)
     2731  {
     2732    if(lcuAddr + m_numLCUInPicWidth < m_uiNumCUsInFrame)
     2733    {
     2734      notSkipLinesBelowVB = false;
     2735    }
     2736  }
     2737#endif
     2738
     2739  switch(compIdx)
     2740  {
     2741  case ALF_Cb:
     2742  case ALF_Cr:
     2743    {
     2744      for(Int n=0; n< numBlocks; n++)
     2745      {
     2746        isLastBlock = (n== numBlocks-1);
     2747        NDBFBlockInfo& AlfSGU = (*alfLCU)[n];
     2748
     2749        ypos   = (Int)(AlfSGU.posY  >> formatShift);
     2750        xpos   = (Int)(AlfSGU.posX  >> formatShift);
     2751        height = (Int)(AlfSGU.height>> formatShift);
     2752        width  = (Int)(AlfSGU.width >> formatShift);
     2753
     2754#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2755        if(!notSkipLinesBelowVB )
     2756        {
     2757          endypos = ypos+ height -1;
     2758          Int iLineVBPos = m_lcuHeightChroma - 2;
     2759          Int yEndLineInLCU = endypos % m_lcuHeightChroma;
     2760          height = (yEndLineInLCU >= iLineVBPos) ? (height - 2) : height ;
     2761        }
     2762#endif
     2763
     2764#if ALF_SINGLE_FILTER_SHAPE
     2765        calcCorrOneCompRegionChma(pPicOrg, pPicSrc, stride, ypos, xpos, height, width, alfCorr->ECorr[0], alfCorr->yCorr[0], isLastBlock);
     2766#endif
     2767      }
     2768    }
     2769    break;
     2770  case ALF_Y:
     2771    {
     2772      Bool forceCollection = true;
     2773
     2774      if(isRedesignPhase)
     2775      {
     2776        Int numValidPels = 0;
     2777        for(Int n=0; n< numBlocks; n++)
     2778        {
     2779          NDBFBlockInfo& AlfSGU = (*alfLCU)[n];
     2780
     2781          ypos   = (Int)(AlfSGU.posY  );
     2782          xpos   = (Int)(AlfSGU.posX  );
     2783          height = (Int)(AlfSGU.height);
     2784          width  = (Int)(AlfSGU.width );
     2785
     2786          for (Int y = ypos; y < ypos+ height; y++)
     2787          {
     2788            for (Int x = xpos; x < xpos + width; x++)
     2789            {
     2790              if (m_maskImg[y][x] == 1)
     2791              {
     2792                numValidPels++;
     2793              }
     2794            }
     2795          }
     2796        }
     2797
     2798        if(numValidPels > 0)
     2799        {
     2800          forceCollection = false;
     2801        }
     2802      }
     2803
     2804      for(Int n=0; n< numBlocks; n++)
     2805      {
     2806        isLastBlock = (n== numBlocks-1);
     2807        NDBFBlockInfo& AlfSGU = (*alfLCU)[n];
     2808
     2809        ypos   = (Int)(AlfSGU.posY  );
     2810        xpos   = (Int)(AlfSGU.posX  );
     2811        height = (Int)(AlfSGU.height);
     2812        width  = (Int)(AlfSGU.width );
     2813
     2814#if LCUALF_AVOID_USING_BOTTOM_LINES_ENCODER
     2815        endypos = ypos+ height -1;
     2816        if(!notSkipLinesBelowVB)
     2817        {
     2818          Int iLineVBPos = m_lcuHeight - 4;
     2819          Int yEndLineInLCU = endypos % m_lcuHeight;
     2820          height = (yEndLineInLCU >= iLineVBPos) ? (height - 4) : height ;
     2821        }
     2822#endif
     2823
     2824#if ALF_SINGLE_FILTER_SHAPE
     2825        calcCorrOneCompRegionLuma(pPicOrg, pPicSrc, stride, ypos, xpos, height, width, alfCorr->ECorr, alfCorr->yCorr, alfCorr->pixAcc, forceCollection, isLastBlock);
     2826#endif       
     2827      }
     2828    }
     2829    break;
     2830  default:
     2831    {
     2832      printf("Not a legal component index for ALF\n");
     2833      assert(0);
     2834      exit(-1);
     2835    }
     2836  }
     2837}
     2838
     2839
     2840#if ALF_SINGLE_FILTER_SHAPE
     2841/** Gather correlations for one region for chroma component
     2842 * \param [in] imgOrg picture buffer for original picture
     2843 * \param [in] imgPad picture buffer for un-filtered picture
     2844 * \param [in] stride buffer stride size for 1-D pictrue memory
     2845 * \param [in] yPos region starting y position
     2846 * \param [in] xPos region starting x position
     2847 * \param [in] height region height
     2848 * \param [in] width region width
     2849 * \param [out] eCorr auto-correlation matrix
     2850 * \param [out] yCorr cross-correlation array
     2851 * \param [in] isSymmCopyBlockMatrix symmetrically copy correlation values in eCorr (true) or not (false)
     2852 */
     2853Void TEncAdaptiveLoopFilter::calcCorrOneCompRegionChma(Pel* imgOrg, Pel* imgPad, Int stride
     2854                                                     , Int yPos, Int xPos, Int height, Int width
     2855                                                     , Double **eCorr, Double *yCorr, Bool isSymmCopyBlockMatrix
     2856                                                      )
     2857{
     2858  Int yPosEnd = yPos + height;
     2859  Int xPosEnd = xPos + width;
     2860  Int N = ALF_MAX_NUM_COEF; //m_sqrFiltLengthTab[0];
     2861
     2862  Int imgHeightChroma = m_img_height>>1;
     2863
     2864  Int yLineInLCU, paddingLine;
     2865  Int ELocal[ALF_MAX_NUM_COEF];
     2866  Pel *imgPad1, *imgPad2, *imgPad3, *imgPad4, *imgPad5, *imgPad6;
     2867  Int i, j, k, l, yLocal;
     2868
     2869  imgPad += (yPos*stride);
     2870  imgOrg += (yPos*stride);
     2871
     2872  for (i= yPos; i< yPosEnd; i++)
     2873  {
     2874    yLineInLCU = i % m_lcuHeightChroma;
     2875
     2876    if (yLineInLCU==0 && i>0)
     2877    {
     2878      paddingLine = yLineInLCU + 2 ;
     2879      imgPad1 = imgPad + stride;
     2880      imgPad2 = imgPad - stride;
     2881      imgPad3 = imgPad + 2*stride;
     2882      imgPad4 = imgPad - 2*stride;
     2883      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride;
     2884      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;;
     2885    }
     2886    else if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma )
     2887    {
     2888      imgPad1 = imgPad + stride;
     2889      imgPad2 = imgPad - stride;
     2890      imgPad3 = imgPad + 2*stride;
     2891      imgPad4 = imgPad - 2*stride;
     2892      imgPad5 = imgPad + 3*stride;
     2893      imgPad6 = imgPad - 3*stride;
     2894    }
     2895    else if (yLineInLCU < m_lineIdxPadTopChroma)
     2896    {
     2897      paddingLine = - yLineInLCU + m_lineIdxPadTopChroma - 1;
     2898      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + min(paddingLine, 1)*stride;
     2899      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - stride;
     2900      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + min(paddingLine, 2)*stride;
     2901      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - 2*stride;
     2902      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + min(paddingLine, 3)*stride;
     2903      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - 3*stride;
     2904    }
     2905    else
     2906    {
     2907      paddingLine = yLineInLCU - m_lineIdxPadTopChroma ;
     2908      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + stride;
     2909      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - min(paddingLine, 1)*stride;
     2910      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + 2*stride;
     2911      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - min(paddingLine, 2)*stride;
     2912      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride;
     2913      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;
     2914    }
     2915
     2916    for (j= xPos; j< xPosEnd; j++)
     2917    {
     2918      memset(ELocal, 0, N*sizeof(Int));
     2919
     2920      ELocal[0] = (imgPad5[j] + imgPad6[j]);
     2921
     2922      ELocal[1] = (imgPad3[j] + imgPad4[j]);
     2923
     2924      ELocal[2] = (imgPad1[j-1] + imgPad2[j+1]);
     2925      ELocal[3] = (imgPad1[j  ] + imgPad2[j  ]);
     2926      ELocal[4] = (imgPad1[j+1] + imgPad2[j-1]);
     2927
     2928      ELocal[5] = (imgPad[j+4] + imgPad[j-4]);
     2929      ELocal[6] = (imgPad[j+3] + imgPad[j-3]);
     2930      ELocal[7] = (imgPad[j+2] + imgPad[j-2]);
     2931      ELocal[8] = (imgPad[j+1] + imgPad[j-1]);
     2932      ELocal[9] = (imgPad[j  ]);
     2933
     2934      yLocal= (Int)imgOrg[j];
     2935
     2936      for(k=0; k<N; k++)
     2937      {
     2938        eCorr[k][k] += ELocal[k]*ELocal[k];
     2939        for(l=k+1; l<N; l++)
     2940        {
     2941          eCorr[k][l] += ELocal[k]*ELocal[l];
     2942        }
     2943
     2944        yCorr[k] += yLocal*ELocal[k];
     2945      }
     2946    }
     2947
     2948    imgPad+= stride;
     2949    imgOrg+= stride;
     2950  }
     2951
     2952  if(isSymmCopyBlockMatrix)
     2953  {
     2954    for(j=0; j<N-1; j++)
     2955    {
     2956      for(i=j+1; i<N; i++)
     2957      {
     2958        eCorr[i][j] = eCorr[j][i];
     2959      }
     2960    }
     2961  }
     2962}
     2963
     2964/** Gather correlations for one region for luma component
     2965 * \param [in] imgOrg picture buffer for original picture
     2966 * \param [in] imgPad picture buffer for un-filtered picture
     2967 * \param [in] stride buffer stride size for 1-D pictrue memory
     2968 * \param [in] yPos region starting y position
     2969 * \param [in] xPos region starting x position
     2970 * \param [in] height region height
     2971 * \param [in] width region width
     2972 * \param [out] eCorr auto-correlation matrix
     2973 * \param [out] yCorr cross-correlation array
     2974 * \param [out] pixAcc pixel squared value
     2975 * \param [in] isforceCollection all pixel are used for correlation calculation (true) or not (false)
     2976 * \param [in] isSymmCopyBlockMatrix symmetrically copy correlation values in eCorr (true) or not (false)
     2977 */
     2978Void TEncAdaptiveLoopFilter::calcCorrOneCompRegionLuma(Pel* imgOrg, Pel* imgPad, Int stride
     2979                                                      ,Int yPos, Int xPos, Int height, Int width
     2980                                                      ,Double ***eCorr, Double **yCorr, Double *pixAcc
     2981                                                      ,Bool isforceCollection, Bool isSymmCopyBlockMatrix
     2982                                                      )
     2983{
     2984  Int yPosEnd = yPos + height;
     2985  Int xPosEnd = xPos + width;
     2986  Int yLineInLCU;
     2987  Int paddingLine ;
     2988  Int N = ALF_MAX_NUM_COEF; //m_sqrFiltLengthTab[0];
     2989
     2990  Int ELocal[ALF_MAX_NUM_COEF];
     2991  Pel *imgPad1, *imgPad2, *imgPad3, *imgPad4, *imgPad5, *imgPad6;
     2992  Int i, j, k, l, yLocal, varInd;
     2993  Double **E;
     2994  Double *yy;
     2995
     2996  imgPad += (yPos*stride);
     2997  imgOrg += (yPos*stride);
     2998
     2999  for (i= yPos; i< yPosEnd; i++)
     3000  {
     3001    yLineInLCU = i % m_lcuHeight;
     3002
     3003    if (yLineInLCU<m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height)
     3004    {
     3005      imgPad1 = imgPad + stride;
     3006      imgPad2 = imgPad - stride;
     3007      imgPad3 = imgPad + 2*stride;
     3008      imgPad4 = imgPad - 2*stride;
     3009      imgPad5 = imgPad + 3*stride;
     3010      imgPad6 = imgPad - 3*stride;
     3011    }
     3012    else if (yLineInLCU<m_lineIdxPadTop)
     3013    {
     3014      paddingLine = - yLineInLCU + m_lineIdxPadTop - 1;
     3015      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + min(paddingLine, 1)*stride;
     3016      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - stride;
     3017      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + min(paddingLine, 2)*stride;
     3018      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - 2*stride;
     3019      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + min(paddingLine, 3)*stride;
     3020      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - 3*stride;
     3021    }
     3022    else
     3023    {
     3024      paddingLine = yLineInLCU - m_lineIdxPadTop;
     3025      imgPad1 = (paddingLine < 1) ? imgPad : imgPad + stride;
     3026      imgPad2 = (paddingLine < 1) ? imgPad : imgPad - min(paddingLine, 1)*stride;
     3027      imgPad3 = (paddingLine < 2) ? imgPad : imgPad + 2*stride;
     3028      imgPad4 = (paddingLine < 2) ? imgPad : imgPad - min(paddingLine, 2)*stride;
     3029      imgPad5 = (paddingLine < 3) ? imgPad : imgPad + 3*stride;
     3030      imgPad6 = (paddingLine < 3) ? imgPad : imgPad - min(paddingLine, 3)*stride;
     3031    }         
     3032
     3033    for (j= xPos; j< xPosEnd; j++)
     3034    {
     3035      if ( m_maskImg[i][j] || isforceCollection )
     3036      {
     3037        varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W];
     3038        memset(ELocal, 0, N*sizeof(Int));
     3039
     3040        ELocal[0] = (imgPad5[j] + imgPad6[j]);
     3041        ELocal[1] = (imgPad3[j] + imgPad4[j]);
     3042
     3043        ELocal[2] = (imgPad1[j-1] + imgPad2[j+1]);
     3044        ELocal[3] = (imgPad1[j  ] + imgPad2[j  ]);
     3045        ELocal[4] = (imgPad1[j+1] + imgPad2[j-1]);
     3046
     3047        ELocal[5] = (imgPad[j+4] + imgPad[j-4]);
     3048        ELocal[6] = (imgPad[j+3] + imgPad[j-3]);
     3049        ELocal[7] = (imgPad[j+2] + imgPad[j-2]);
     3050        ELocal[8] = (imgPad[j+1] + imgPad[j-1]);
     3051        ELocal[9] = (imgPad[j  ]);
     3052
     3053        yLocal= imgOrg[j];
     3054        pixAcc[varInd] += (yLocal*yLocal);
     3055        E  = eCorr[varInd];
     3056        yy = yCorr[varInd];
     3057
     3058        for (k=0; k<N; k++)
     3059        {
     3060          for (l=k; l<N; l++)
     3061          {
     3062            E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     3063          }
     3064          yy[k]+=(double)(ELocal[k]*yLocal);
     3065        }
     3066      }
     3067    }
     3068    imgPad += stride;
     3069    imgOrg += stride;
     3070  }
     3071
     3072  if(isSymmCopyBlockMatrix)
     3073  {
     3074    for (varInd=0; varInd<NO_VAR_BINS; varInd++)
     3075    {
     3076      E = eCorr[varInd];
     3077      for (k=1; k<N; k++)
     3078      {
     3079        for (l=0; l<k; l++)
     3080        {
     3081          E[k][l] = E[l][k];
     3082        }
     3083      }
     3084    }
     3085  }
     3086
     3087}
     3088#endif
     3089
     3090#else
     3091
     3092
     3093#if ALF_CHROMA_LAMBDA 
     3094/**
     3095 \param pcAlfParam           ALF parameter
     3096 \param [out] pvAlfCtrlParam ALF CU control parameters container for slices
     3097 \param dLambdaLuma          luma lambda value for RD cost computation
     3098 \param dLambdaChroma        chroma lambda value for RD cost computation
     3099 \retval ruiDist             distortion
     3100 \retval ruiBits             required bits
     3101 \retval ruiMaxAlfCtrlDepth  optimal partition depth
     3102 */
     3103#if HHI_INTERVIEW_SKIP
     3104Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambdaLuma, Double dLambdaChroma, UInt64& ruiDist, UInt64& ruiBits, Bool bInterviewSkip)
     3105#else
     3106Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambdaLuma, Double dLambdaChroma, UInt64& ruiDist, UInt64& ruiBits)
     3107
     3108#endif
     3109#else
    3533110/**
    3543111 \param pcAlfParam           ALF parameter
     
    3583115 \retval ruiMaxAlfCtrlDepth  optimal partition depth
    3593116 */
    360 Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, UInt& ruiMaxAlfCtrlDepth )
    361 {
    362   Int tap, num_coef;
    363  
    364   // set global variables
    365   tap         = ALF_MAX_NUM_TAP;
    366 #if TI_ALF_MAX_VSIZE_7
    367   Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap);
    368   num_coef = (tap * tapV + 1) >> 1;
    369 #else
    370   num_coef    = (tap*tap+1)>>1;
    371 #endif
    372   num_coef    = num_coef + 1; // DC offset
     3117#if HHI_INTERVIEW_SKIP
     3118Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, Bool bInterviewSkip)
     3119#else
     3120Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, std::vector<AlfCUCtrlInfo>* pvAlfCtrlParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits)
     3121
     3122#endif
     3123#endif
     3124{
    3733125 
    3743126  // set lambda
     3127#if ALF_CHROMA_LAMBDA 
     3128  m_dLambdaLuma   = dLambdaLuma;
     3129  m_dLambdaChroma = dLambdaChroma;
     3130#else
    3753131  m_dLambdaLuma   = dLambda;
    3763132  m_dLambdaChroma = dLambda;
    377  
     3133#endif
     3134
     3135  m_lcuHeight = m_pcPic->getSlice(0)->getSPS()->getMaxCUHeight();
     3136
     3137#if ALF_SINGLE_FILTER_SHAPE
     3138  m_lineIdxPadBot = m_lcuHeight - 4 - 3; // DFRegion, Vertical Taps
     3139#else
     3140  m_lineIdxPadBot = m_lcuHeight - 4 - 4; // DFRegion, Vertical Taps
     3141#endif
     3142  m_lineIdxPadTop = m_lcuHeight - 4; // DFRegion
     3143
     3144  m_lcuHeightChroma = m_lcuHeight>>1;
     3145#if ALF_SINGLE_FILTER_SHAPE
     3146  m_lineIdxPadBotChroma = m_lcuHeightChroma - 2 - 3; // DFRegion, Vertical Taps
     3147#else
     3148  m_lineIdxPadBotChroma = m_lcuHeightChroma - 2 - 4; // DFRegion, Vertical Taps
     3149#endif
     3150  m_lineIdxPadTopChroma = m_lcuHeightChroma - 2 ; // DFRegion
     3151
    3783152  TComPicYuv* pcPicOrg = m_pcPic->getPicYuvOrg();
    3793153 
     
    3813155  TComPicYuv* pcPicYuvRec    = m_pcPic->getPicYuvRec();
    3823156  TComPicYuv* pcPicYuvExtRec = m_pcTempPicYuv;
     3157#if HHI_INTERVIEW_SKIP
     3158  TComPicYuv* pcUsedPelMap   = m_pcPic->getUsedPelsMap() ;
     3159  if(bInterviewSkip)
     3160    assert( pcUsedPelMap ) ;
     3161#endif
    3833162 
    3843163  pcPicYuvRec->copyToPic(pcPicYuvExtRec);
    385 #if MTK_NONCROSS_INLOOP_FILTER
    3863164  if(!m_bUseNonCrossALF)
    3873165  {
    388 #endif 
    3893166  pcPicYuvExtRec->setBorderExtension( false );
    3903167  pcPicYuvExtRec->extendPicBorder   ();
    391 #if MTK_NONCROSS_INLOOP_FILTER
    392   }
    393 #endif 
     3168  }
    3943169 
    3953170  // set min cost
     
    4033178 
    4043179  // calc original cost
     3180#if HHI_INTERVIEW_SKIP
     3181  xCalcRDCost( pcPicOrg, pcPicYuvRec, pcUsedPelMap, NULL, uiOrigRate, uiOrigDist, dOrigCost );
     3182#else
    4053183  xCalcRDCost( pcPicOrg, pcPicYuvRec, NULL, uiOrigRate, uiOrigDist, dOrigCost );
     3184#endif
    4063185  m_pcBestAlfParam->alf_flag = 0;
    407   m_pcBestAlfParam->cu_control_flag = 0;
    408  
    4093186  // initialize temp_alfps
    4103187  m_pcTempAlfParam->alf_flag        = 1;
    411   m_pcTempAlfParam->tap             = tap;
    412 #if TI_ALF_MAX_VSIZE_7
    413   m_pcTempAlfParam->tapV            = tapV;
    414 #endif
    415   m_pcTempAlfParam->num_coeff       = num_coef;
    4163188  m_pcTempAlfParam->chroma_idc      = 0;
    417   m_pcTempAlfParam->cu_control_flag = 0;
    418  
    419 #if MQT_ALF_NPASS
     3189
     3190  m_bAlfCUCtrlEnabled = (pvAlfCtrlParam != NULL)?true:false;
     3191  if(m_bAlfCUCtrlEnabled)
     3192  {
     3193    m_vBestAlfCUCtrlParam.resize(m_uiNumSlicesInPic);
     3194    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3195    {
     3196      m_vBestAlfCUCtrlParam[s].cu_control_flag = 0;
     3197    }
     3198  }
     3199  else
     3200  {
     3201    m_vBestAlfCUCtrlParam.clear();
     3202  }
     3203
    4203204  setALFEncodingParam(m_pcPic);
    421 #endif
    4223205
    4233206  // adaptive in-loop wiener filtering
    424   xEncALFLuma_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
    425  
     3207#if HHI_INTERVIEW_SKIP
     3208  xEncALFLuma( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost, bInterviewSkip );
     3209#else
     3210  xEncALFLuma( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
     3211#endif
     3212
    4263213  // cu-based filter on/off control
     3214#if HHI_INTERVIEW_SKIP
     3215  xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, pcUsedPelMap, uiMinRate, uiMinDist, dMinCost );
     3216#else
    4273217  xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
    428  
    429   // adaptive tap-length
    430   xFilterTapDecision_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
    431  
    432   // compute RD cost
    433   xCalcRDCost( pcPicOrg, pcPicYuvRec, m_pcBestAlfParam, uiMinRate, uiMinDist, dMinCost );
     3218#endif
    4343219 
    4353220  // compare RD cost to non-ALF case
     
    4443229  {
    4453230    m_pcBestAlfParam->alf_flag        = 0;
    446     m_pcBestAlfParam->cu_control_flag = 0;
    447    
     3231
    4483232    uiMinRate = uiOrigRate;
    4493233    uiMinDist = uiOrigDist;
    450     dMinCost = dMinCost;
    4513234   
    4523235    m_pcEntropyCoder->setAlfCtrl(false);
     3236    if(m_bAlfCUCtrlEnabled)
     3237    {
     3238      for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3239      {
     3240        m_vBestAlfCUCtrlParam[s].cu_control_flag = 0;
     3241      }
     3242    }
    4533243    pcPicYuvExtRec->copyToPicLuma(pcPicYuvRec);
    4543244   
     
    4563246    ruiDist = uiOrigDist;
    4573247  }
    458  
    4593248  // if ALF works
    4603249  if( m_pcBestAlfParam->alf_flag )
    4613250  {
    462     // predict ALF coefficients
    463     predictALFCoeff( m_pcBestAlfParam );
    464    
    4653251    // do additional ALF process for chroma
    466     xEncALFChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits );
     3252    xFilterTapDecisionChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits );
    4673253  }
    4683254 
     
    4703256  copyALFParam(pcAlfParam, m_pcBestAlfParam);
    4713257 
    472   // store best depth
    473   ruiMaxAlfCtrlDepth = m_pcEntropyCoder->getMaxAlfCtrlDepth();
     3258  if(m_bAlfCUCtrlEnabled)
     3259  {
     3260    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3261    {
     3262      (*pvAlfCtrlParam)[s]= m_vBestAlfCUCtrlParam[s];
     3263    }
     3264  }
     3265}
     3266#endif
     3267
     3268/** PCM LF disable process.
     3269 * \param pcPic picture (TComPic) pointer
     3270 * \returns Void
     3271 *
     3272 * \note Replace filtered sample values of PCM mode blocks with the transmitted and reconstructed ones.
     3273 */
     3274Void TEncAdaptiveLoopFilter::PCMLFDisableProcess (TComPic* pcPic)
     3275{
     3276  xPCMRestoration(pcPic);
    4743277}
    4753278
     
    4783281// ====================================================================================================================
    4793282
    480 Void TEncAdaptiveLoopFilter::xEncALFChroma( UInt64 uiLumaRate, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, UInt64& ruiBits )
    481 {
    482   // restriction for non-referenced B-slice
    483   if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0)
    484   {
    485     return;
    486   }
    487  
    488   Int tap, num_coef;
    489  
    490   // set global variables
    491   tap         = ALF_MAX_NUM_TAP_C;
    492   num_coef    = (tap*tap+1)>>1;
    493   num_coef    = num_coef + 1; // DC offset
    494  
    495   // set min cost
    496   UInt64 uiMinRate = uiLumaRate;
    497   UInt64 uiMinDist = MAX_INT;
    498   Double dMinCost  = MAX_DOUBLE;
    499  
    500   // calc original cost
    501   copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
    502   xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiMinRate, uiMinDist, dMinCost);
    503  
    504   // initialize temp_alfps
    505   m_pcTempAlfParam->chroma_idc = 3;
    506   m_pcTempAlfParam->tap_chroma       = tap;
    507   m_pcTempAlfParam->num_coeff_chroma = num_coef;
    508  
    509   // Adaptive in-loop wiener filtering for chroma
    510   xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);
    511  
    512   // filter on/off decision for chroma
    513   Int iCWidth = (pcPicOrg->getWidth()>>1);
    514   Int iCHeight = (pcPicOrg->getHeight()>>1);
    515   Int iCStride = pcPicOrg->getCStride();
    516   UInt64 uiFiltDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicRest->getCbAddr(), iCWidth, iCHeight, iCStride);
    517   UInt64 uiFiltDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicRest->getCrAddr(), iCWidth, iCHeight, iCStride);
    518   UInt64 uiOrgDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicDec->getCbAddr(), iCWidth, iCHeight, iCStride);
    519   UInt64 uiOrgDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicDec->getCrAddr(), iCWidth, iCHeight, iCStride);
    520  
    521   m_pcTempAlfParam->chroma_idc = 0;
    522   if(uiOrgDistCb > uiFiltDistCb)
    523     m_pcTempAlfParam->chroma_idc += 2;
    524   if(uiOrgDistCr  > uiFiltDistCr )
    525     m_pcTempAlfParam->chroma_idc += 1;
    526  
    527   if(m_pcTempAlfParam->chroma_idc)
    528   {
    529     if(m_pcTempAlfParam->chroma_idc!=3)
    530     {
    531       // chroma filter re-design
    532       xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);
    533     }
    534    
    535     UInt64 uiRate, uiDist;
    536     Double dCost;
    537     xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost);
    538    
    539     if( dCost < dMinCost )
    540     {
    541       copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    542       predictALFCoeffChroma(m_pcBestAlfParam);
    543      
    544       ruiBits += uiRate;
    545       ruiDist += uiDist;
    546     }
    547     else
    548     {
    549       m_pcBestAlfParam->chroma_idc = 0;
    550      
    551       if((m_pcTempAlfParam->chroma_idc>>1)&0x01)
    552         pcPicDec->copyToPicCb(pcPicRest);
    553       if(m_pcTempAlfParam->chroma_idc&0x01)
    554         pcPicDec->copyToPicCr(pcPicRest);
    555      
    556       ruiBits += uiMinRate;
    557       ruiDist += uiMinDist;
    558     }
    559   }
    560   else
    561   {
    562     m_pcBestAlfParam->chroma_idc = 0;
    563    
    564     ruiBits += uiMinRate;
    565     ruiDist += uiMinDist;
    566    
    567     pcPicDec->copyToPicCb(pcPicRest);
    568     pcPicDec->copyToPicCr(pcPicRest);
    569   }
    570 }
    571 
    5723283// ====================================================================================================================
    5733284// Private member functions
    5743285// ====================================================================================================================
    575 
     3286#if !LCU_SYNTAX_ALF
    5763287Void TEncAdaptiveLoopFilter::xInitParam()
    5773288{
     
    6163327    }
    6173328  }
     3329  if (m_ppdAlfCorrCb != NULL)
     3330  {
     3331    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3332    {
     3333      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3334      {
     3335        m_ppdAlfCorrCb[i][j] = 0;
     3336      }
     3337    }
     3338  }
     3339  else
     3340  {
     3341    m_ppdAlfCorrCb = new Double*[ALF_MAX_NUM_COEF];
     3342    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3343    {
     3344      m_ppdAlfCorrCb[i] = new Double[ALF_MAX_NUM_COEF+1];
     3345      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3346      {
     3347        m_ppdAlfCorrCb[i][j] = 0;
     3348      }
     3349    }
     3350  }
     3351 
     3352  if (m_ppdAlfCorrCr != NULL)
     3353  {
     3354    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3355    {
     3356      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3357      {
     3358        m_ppdAlfCorrCr[i][j] = 0;
     3359      }
     3360    }
     3361  }
     3362  else
     3363  {
     3364    m_ppdAlfCorrCr = new Double*[ALF_MAX_NUM_COEF];
     3365    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3366    {
     3367      m_ppdAlfCorrCr[i] = new Double[ALF_MAX_NUM_COEF+1];
     3368      for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
     3369      {
     3370        m_ppdAlfCorrCr[i][j] = 0;
     3371      }
     3372    }
     3373  }
    6183374}
    6193375
     
    6383394    m_pdDoubleAlfCoeff = NULL;
    6393395  }
    640 }
    641 
     3396  if (m_ppdAlfCorrCb != NULL)
     3397  {
     3398    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3399    {
     3400      delete[] m_ppdAlfCorrCb[i];
     3401      m_ppdAlfCorrCb[i] = NULL;
     3402    }
     3403    delete[] m_ppdAlfCorrCb;
     3404    m_ppdAlfCorrCb = NULL;
     3405  }
     3406 
     3407  if (m_ppdAlfCorrCr != NULL)
     3408  {
     3409    for (i = 0; i < ALF_MAX_NUM_COEF; i++)
     3410    {
     3411      delete[] m_ppdAlfCorrCr[i];
     3412      m_ppdAlfCorrCr[i] = NULL;
     3413    }
     3414    delete[] m_ppdAlfCorrCr;
     3415    m_ppdAlfCorrCr = NULL;
     3416  }
     3417}
     3418#endif
    6423419Void TEncAdaptiveLoopFilter::xCreateTmpAlfCtrlFlags()
    6433420{
     
    6763453}
    6773454
    678 Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags()
    679 {
    680   for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
    681   {
    682     TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
    683     xEncodeCUAlfCtrlFlag(pcCU, 0, 0);
    684   }
    685 }
    686 
     3455/** Encode ALF CU control flags
     3456 */
     3457Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags(std::vector<AlfCUCtrlInfo> &vAlfCUCtrlParam)
     3458{
     3459  for(Int s=0; s< m_uiNumSlicesInPic; s++)
     3460  {
     3461    if(!m_pcPic->getValidSlice(s))
     3462    {
     3463      continue;
     3464    }
     3465
     3466    AlfCUCtrlInfo& rCUCtrlInfo = vAlfCUCtrlParam[s];
     3467    if(rCUCtrlInfo.cu_control_flag == 1)
     3468    {
     3469      for(Int i=0; i< (Int)rCUCtrlInfo.alf_cu_flag.size(); i++)
     3470      {
     3471        m_pcEntropyCoder->encodeAlfCtrlFlag(rCUCtrlInfo.alf_cu_flag[i]);
     3472      }
     3473    }
     3474  }
     3475}
    6873476Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth)
    6883477{
     
    6943483 
    6953484#if AD_HOCS_SLICES 
    696   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     3485  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    6973486#else 
    698   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     3487  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    6993488#endif 
    7003489  {
     
    7113500     
    7123501#if AD_HOCS_SLICES     
    713       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
    714 #else
    715       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
     3502      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
     3503#else
     3504      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    7163505#endif     
    7173506        xEncodeCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1);
     
    7223511  m_pcEntropyCoder->encodeAlfCtrlFlag(pcCU, uiAbsPartIdx);
    7233512}
    724 #if MTK_NONCROSS_INLOOP_FILTER
    725 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix)
    726 #else
    727 Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride)
    728 #endif
    729 {
    730   //Patch should be extended before this point................
    731   //ext_offset  = tap>>1;
    732  
    733 #if TI_ALF_MAX_VSIZE_7
    734   Int iTapV   = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
    735   Int N       = (iTap * iTapV + 1) >> 1;
    736   Int offsetV = iTapV >> 1;
    737 #else
    738   Int N      = (iTap*iTap+1)>>1;
    739 #endif
    740   Int offset = iTap>>1;
    741  
    742   const Int* pFiltPos;
    743  
    744   switch(iTap)
    745   {
    746     case 5:
    747       pFiltPos = m_aiSymmetricArray5x5;
    748       break;
    749     case 7:
    750       pFiltPos = m_aiSymmetricArray7x7;
    751       break;
    752     case 9:
    753 #if TI_ALF_MAX_VSIZE_7
    754       pFiltPos = m_aiSymmetricArray9x7;
    755 #else
    756       pFiltPos = m_aiSymmetricArray9x9;
    757 #endif
    758       break;
    759     default:
    760 #if TI_ALF_MAX_VSIZE_7
    761       pFiltPos = m_aiSymmetricArray9x7;
    762 #else
    763       pFiltPos = m_aiSymmetricArray9x9;
    764 #endif
     3513
     3514#if !LCU_SYNTAX_ALF
     3515
     3516Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pImgOrg, Pel* pImgPad, Int filtNo, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix)
     3517{
     3518  Int     yposEnd = ypos + iHeight -1;
     3519  Int     xposEnd = xpos + iWidth  -1;
     3520  Int     N       = m_sqrFiltLengthTab[filtNo];
     3521
     3522  Int imgHeightChroma = m_img_height>>1;
     3523  Int yLineInLCU;
     3524  Int paddingline ;
     3525
     3526  Int ELocal[ALF_MAX_NUM_COEF];
     3527  Pel *pImgPad1, *pImgPad2, *pImgPad3, *pImgPad4;
     3528  Int i, j, k, l;
     3529  Int yLocal;
     3530
     3531  pImgPad += (ypos*iCmpStride);
     3532  pImgOrg += (ypos*iOrgStride);
     3533
     3534  switch(filtNo)
     3535  {
     3536#if !ALF_SINGLE_FILTER_SHAPE
     3537  case ALF_STAR5x5:
     3538    {
     3539      for (i= ypos; i<= yposEnd; i++)
     3540      {
     3541        yLineInLCU = i % m_lcuHeightChroma;
     3542
     3543        if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma )
     3544        {
     3545          pImgPad1 = pImgPad +   iCmpStride;
     3546          pImgPad2 = pImgPad -   iCmpStride;
     3547          pImgPad3 = pImgPad + 2*iCmpStride;
     3548          pImgPad4 = pImgPad - 2*iCmpStride;
     3549        }
     3550        else if (yLineInLCU < m_lineIdxPadTopChroma)
     3551        {
     3552          paddingline = - yLineInLCU + m_lineIdxPadTopChroma - 1;
     3553          pImgPad1 = pImgPad + min(paddingline, 1)*iCmpStride;
     3554          pImgPad2 = pImgPad -   iCmpStride;
     3555          pImgPad3 = pImgPad + min(paddingline, 2)*iCmpStride;
     3556          pImgPad4 = pImgPad - 2*iCmpStride;
     3557        }
     3558        else
     3559        {
     3560          paddingline = yLineInLCU - m_lineIdxPadTopChroma ;
     3561          pImgPad1 = pImgPad +   iCmpStride;
     3562          pImgPad2 = pImgPad - min(paddingline, 1)*iCmpStride;
     3563          pImgPad3 = pImgPad + 2*iCmpStride;
     3564          pImgPad4 = pImgPad - min(paddingline, 2)*iCmpStride;
     3565        }
     3566
     3567        if ( (yLineInLCU == m_lineIdxPadTopChroma || yLineInLCU == m_lineIdxPadTopChroma-1) && i-yLineInLCU+m_lcuHeightChroma < imgHeightChroma )
     3568        {
     3569          pImgPad+= iCmpStride;
     3570          pImgOrg+= iOrgStride;
     3571          continue;
     3572        }
     3573        else
     3574        {
     3575        for (j= xpos; j<= xposEnd; j++)
     3576        {
     3577          memset(ELocal, 0, N*sizeof(Int));
     3578
     3579          ELocal[0] = (pImgPad3[j+2] + pImgPad4[j-2]);
     3580          ELocal[1] = (pImgPad3[j  ] + pImgPad4[j  ]);
     3581          ELocal[2] = (pImgPad3[j-2] + pImgPad4[j+2]);
     3582
     3583          ELocal[3] = (pImgPad1[j+1] + pImgPad2[j-1]);
     3584          ELocal[4] = (pImgPad1[j  ] + pImgPad2[j  ]);
     3585          ELocal[5] = (pImgPad1[j-1] + pImgPad2[j+1]);
     3586
     3587          ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     3588          ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     3589          ELocal[8] = (pImgPad[j  ]);
     3590
     3591          yLocal= (Int)pImgOrg[j];
     3592
     3593          for(k=0; k<N; k++)
     3594          {
     3595            m_ppdAlfCorr[k][k] += ELocal[k]*ELocal[k];
     3596            for(l=k+1; l<N; l++)
     3597            {
     3598              m_ppdAlfCorr[k][l] += ELocal[k]*ELocal[l];
     3599            }
     3600
     3601            m_ppdAlfCorr[k][N] += yLocal*ELocal[k];
     3602          }
     3603        }
     3604        pImgPad+= iCmpStride;
     3605        pImgOrg+= iOrgStride;
     3606      }
     3607
     3608      }
     3609    }
     3610    break;
     3611  case ALF_CROSS9x9:
     3612    {
     3613      Pel *pImgPad5, *pImgPad6, *pImgPad7, *pImgPad8;
     3614#else
     3615  case ALF_CROSS9x7_SQUARE3x3:
     3616    {
     3617      Pel *pImgPad5, *pImgPad6;
     3618#endif
     3619      for (i= ypos; i<= yposEnd; i++)
     3620      {
     3621        yLineInLCU = i % m_lcuHeightChroma;
     3622
     3623        if (yLineInLCU<2 && i> 2)
     3624        {
     3625          paddingline = yLineInLCU + 2 ;
     3626          pImgPad1 = pImgPad +   iCmpStride;
     3627          pImgPad2 = pImgPad -   iCmpStride;
     3628          pImgPad3 = pImgPad + 2*iCmpStride;
     3629          pImgPad4 = pImgPad - 2*iCmpStride;
     3630          pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + 3*iCmpStride;
     3631          pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - min(paddingline, 3)*iCmpStride;;
     3632#if !ALF_SINGLE_FILTER_SHAPE
     3633          pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + 4*iCmpStride;
     3634          pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - min(paddingline, 4)*iCmpStride;;     
     3635#endif
     3636        }
     3637        else if (yLineInLCU < m_lineIdxPadBotChroma || i-yLineInLCU+m_lcuHeightChroma >= imgHeightChroma )
     3638        {
     3639          pImgPad1 = pImgPad +   iCmpStride;
     3640          pImgPad2 = pImgPad -   iCmpStride;
     3641          pImgPad3 = pImgPad + 2*iCmpStride;
     3642          pImgPad4 = pImgPad - 2*iCmpStride;
     3643          pImgPad5 = pImgPad + 3*iCmpStride;
     3644          pImgPad6 = pImgPad - 3*iCmpStride;
     3645#if !ALF_SINGLE_FILTER_SHAPE
     3646          pImgPad7 = pImgPad + 4*iCmpStride;
     3647          pImgPad8 = pImgPad - 4*iCmpStride;
     3648#endif
     3649        }
     3650        else if (yLineInLCU < m_lineIdxPadTopChroma)
     3651        {
     3652          paddingline = - yLineInLCU + m_lineIdxPadTopChroma - 1;
     3653          pImgPad1 = (paddingline < 1) ? pImgPad : pImgPad + min(paddingline, 1)*iCmpStride;
     3654          pImgPad2 = (paddingline < 1) ? pImgPad : pImgPad -   iCmpStride;
     3655          pImgPad3 = (paddingline < 2) ? pImgPad : pImgPad + min(paddingline, 2)*iCmpStride;
     3656          pImgPad4 = (paddingline < 2) ? pImgPad : pImgPad - 2*iCmpStride;
     3657          pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + min(paddingline, 3)*iCmpStride;
     3658          pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - 3*iCmpStride;
     3659#if !ALF_SINGLE_FILTER_SHAPE
     3660          pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + min(paddingline, 4)*iCmpStride;
     3661          pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - 4*iCmpStride;
     3662#endif
     3663        }
     3664        else
     3665        {
     3666          paddingline = yLineInLCU - m_lineIdxPadTopChroma ;
     3667          pImgPad1 = (paddingline < 1) ? pImgPad : pImgPad +   iCmpStride;
     3668          pImgPad2 = (paddingline < 1) ? pImgPad : pImgPad - min(paddingline, 1)*iCmpStride;
     3669          pImgPad3 = (paddingline < 2) ? pImgPad : pImgPad + 2*iCmpStride;
     3670          pImgPad4 = (paddingline < 2) ? pImgPad : pImgPad - min(paddingline, 2)*iCmpStride;
     3671          pImgPad5 = (paddingline < 3) ? pImgPad : pImgPad + 3*iCmpStride;
     3672          pImgPad6 = (paddingline < 3) ? pImgPad : pImgPad - min(paddingline, 3)*iCmpStride;
     3673#if !ALF_SINGLE_FILTER_SHAPE
     3674          pImgPad7 = (paddingline < 4) ? pImgPad : pImgPad + 4*iCmpStride;
     3675          pImgPad8 = (paddingline < 4) ? pImgPad : pImgPad - min(paddingline, 4)*iCmpStride;
     3676#endif
     3677        }
     3678
     3679        for (j= xpos; j<= xposEnd; j++)
     3680        {
     3681          memset(ELocal, 0, N*sizeof(Int));
     3682#if ALF_SINGLE_FILTER_SHAPE
     3683          ELocal[0] = (pImgPad5[j]+pImgPad6[j]);
     3684          ELocal[1] = (pImgPad3[j]+pImgPad4[j]);
     3685          ELocal[2] = (pImgPad1[j-1]+pImgPad2[j+1]);
     3686          ELocal[3] = (pImgPad1[j]+pImgPad2[j]);
     3687          ELocal[4] = (pImgPad1[j+1]+pImgPad2[j-1]);
     3688          ELocal[5] = (pImgPad[j+4]+pImgPad[j-4]);
     3689          ELocal[6] = (pImgPad[j+3]+pImgPad[j-3]);
     3690          ELocal[7] = (pImgPad[j+2]+pImgPad[j-2]);
     3691          ELocal[8] = (pImgPad[j+1]+pImgPad[j-1]);
     3692          ELocal[9] = (pImgPad[j  ]);
     3693#else
     3694          ELocal[0] = (pImgPad7[j] + pImgPad8[j]);
     3695
     3696          ELocal[1] = (pImgPad5[j] + pImgPad6[j]);
     3697
     3698          ELocal[2] = (pImgPad3[j] + pImgPad4[j]);
     3699
     3700          ELocal[3] = (pImgPad1[j] + pImgPad2[j]);
     3701
     3702          ELocal[4] = (pImgPad[j+4] + pImgPad[j-4]);
     3703          ELocal[5] = (pImgPad[j+3] + pImgPad[j-3]);
     3704          ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     3705          ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     3706          ELocal[8] = (pImgPad[j  ] );
     3707#endif
     3708          yLocal= (Int)pImgOrg[j];
     3709
     3710          for(k=0; k<N; k++)
     3711          {
     3712            m_ppdAlfCorr[k][k] += ELocal[k]*ELocal[k];
     3713            for(l=k+1; l<N; l++)
     3714            {
     3715              m_ppdAlfCorr[k][l] += ELocal[k]*ELocal[l];
     3716            }
     3717
     3718            m_ppdAlfCorr[k][N] += yLocal*ELocal[k];
     3719          }
     3720        }
     3721        pImgPad+= iCmpStride;
     3722        pImgOrg+= iOrgStride;
     3723      }
     3724
     3725    }
     3726    break;
     3727  default:
     3728    {
     3729      printf("Not a supported filter shape\n");
    7653730      assert(0);
    766       break;
    767   }
    768  
    769   Pel* pTerm = new Pel[N];
    770  
    771   Int i, j;
    772 #if MTK_NONCROSS_INLOOP_FILTER
    773   for (Int y = ypos; y < ypos + iHeight; y++)
    774   {
    775     for (Int x = xpos; x < xpos + iWidth; x++)
    776     {
    777 #else   
    778   for (Int y = 0; y < iHeight; y++)
    779   {
    780     for (Int x = 0; x < iWidth; x++)
    781     {
    782 #endif
    783       i = 0;
    784       ::memset(pTerm, 0, sizeof(Pel)*N);
    785 #if TI_ALF_MAX_VSIZE_7
    786       for (Int yy = y - offsetV; yy <= y + offsetV; yy++)
    787 #else
    788       for(Int yy=y-offset; yy<=y+offset; yy++)
    789 #endif
    790       {
    791         for(Int xx=x-offset; xx<=x+offset; xx++)
    792         {
    793           pTerm[pFiltPos[i]] += pCmp[xx + yy*iCmpStride];
    794           i++;
    795         }
    796       }
    797      
    798       for(j=0; j<N; j++)
    799       {
    800         m_ppdAlfCorr[j][j] += pTerm[j]*pTerm[j];
    801         for(i=j+1; i<N; i++)
    802           m_ppdAlfCorr[j][i] += pTerm[j]*pTerm[i];
    803        
    804         // DC offset
    805         m_ppdAlfCorr[j][N]   += pTerm[j];
    806         m_ppdAlfCorr[j][N+1] += pOrg[x+y*iOrgStride]*pTerm[j];
    807       }
    808       // DC offset
    809       for(i=0; i<N; i++)
    810         m_ppdAlfCorr[N][i] += pTerm[i];
    811       m_ppdAlfCorr[N][N]   += 1;
    812       m_ppdAlfCorr[N][N+1] += pOrg[x+y*iOrgStride];
    813     }
    814   }
    815 #if MTK_NONCROSS_INLOOP_FILTER
     3731      exit(1);
     3732    }
     3733  }
     3734
    8163735  if(bSymmCopyBlockMatrix)
    8173736  {
    818 #endif
    819   for(j=0; j<N-1; j++)
    820   {
    821     for(i=j+1; i<N; i++)
    822       m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i];
    823   }
    824 #if MTK_NONCROSS_INLOOP_FILTER
    825   }
    826 #endif
    827 
    828   delete[] pTerm;
    829   pTerm = NULL;
    830 }
    831 
     3737    for(j=0; j<N-1; j++)
     3738    {
     3739      for(i=j+1; i<N; i++)
     3740      {
     3741        m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i];
     3742      }
     3743    }
     3744  }
     3745}
     3746
     3747#endif
    8323748#if IBDI_DISTORTION
    833 UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride )
     3749#if HHI_INTERVIEW_SKIP
     3750UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Pel* pUsed, Int iWidth, Int iHeight, Int iStride )
    8343751{
    8353752  UInt64 uiSSD = 0;
     
    8443761    for( x = 0; x < iWidth; x++ )
    8453762    {
     3763      if ( pUsed ) // interview skipped
     3764      {
     3765        if( pUsed[x] )
     3766        {
    8463767      iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
     3768        }
     3769      }
     3770      else         // no interview skip
     3771      {
     3772          iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
     3773      }
    8473774    }
    8483775    pOrg += iStride;
    8493776    pCmp += iStride;
     3777    if(pUsed)
     3778    {
     3779      pUsed+= iStride;
     3780    }
    8503781  }
    8513782
     
    8583789  Int x, y;
    8593790 
     3791  Int iShift = g_uiBitIncrement;
     3792  Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;
     3793  Int iTemp;
     3794
     3795  for( y = 0; y < iHeight; y++ )
     3796  {
     3797    for( x = 0; x < iWidth; x++ )
     3798    {
     3799      iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
     3800    }
     3801    pOrg += iStride;
     3802    pCmp += iStride;
     3803  }
     3804
     3805  return uiSSD;;
     3806}
     3807#endif
     3808#else
     3809#if HHI_INTERVIEW_SKIP
     3810UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Pel* pUsed, Int iWidth, Int iHeight, Int iStride )
     3811{
     3812  UInt64 uiSSD = 0;
     3813  Int x, y;
     3814
     3815  UInt uiShift = g_uiBitIncrement<<1;
     3816  Int iTemp =0 ;
     3817
     3818  for( y = 0; y < iHeight; y++ )
     3819  {
     3820    for( x = 0; x < iWidth; x++ )
     3821    {
     3822      if ( pUsed ) // interview skipped
     3823      {
     3824        if( pUsed[x] )
     3825        {
     3826          iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift;
     3827        }
     3828      }
     3829      else         // no interview skip
     3830      {
     3831          iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift;
     3832      }
     3833    }
     3834    pOrg += iStride;
     3835    pCmp += iStride;
     3836    if(pUsed)
     3837    {
     3838      pUsed+= iStride;
     3839    }
     3840  }
     3841
     3842  return uiSSD;;
     3843}
     3844#else
     3845UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride )
     3846{
     3847  UInt64 uiSSD = 0;
     3848  Int x, y;
    8603849  UInt uiShift = g_uiBitIncrement<<1;
    8613850  Int iTemp;
     
    8743863}
    8753864#endif
     3865#endif
    8763866
    8773867Int TEncAdaptiveLoopFilter::xGauss(Double **a, Int N)
     
    8833873  {
    8843874    if (a[k][k] <0.000001)
     3875    {
    8853876      return 1;
     3877    }
    8863878  }
    8873879 
     
    9023894    t = a[i][N];
    9033895    for(j=i+1; j<N; j++)
     3896    {
    9043897      t -= a[i][j] * a[j][N];
     3898    }
    9053899    a[i][N] = t / a[i][i];
    9063900  }
     
    9323926    }
    9333927  } while( i <= j );
    934   if ( upper < j ) xFilterCoefQuickSort(coef_data, coef_num, upper, j);
    935   if ( i < lower ) xFilterCoefQuickSort(coef_data, coef_num, i, lower);
     3928  if ( upper < j )
     3929  {
     3930    xFilterCoefQuickSort(coef_data, coef_num, upper, j);
     3931  }
     3932  if ( i < lower )
     3933  {
     3934    xFilterCoefQuickSort(coef_data, coef_num, i, lower);
     3935  }
    9363936}
    9373937
     
    9463946  Int    *nc;
    9473947  const Int    *pFiltMag;
    948  
    949   switch(tap)
    950   {
    951     case 5:
    952       pFiltMag = m_aiSymmetricMag5x5;
    953       break;
    954     case 7:
    955       pFiltMag = m_aiSymmetricMag7x7;
    956       break;
    957     case 9:
    958 #if TI_ALF_MAX_VSIZE_7
    959       pFiltMag = m_aiSymmetricMag9x7;
    960 #else
    961       pFiltMag = m_aiSymmetricMag9x9;
    962 #endif
    963       break;
    964     default:
    965 #if TI_ALF_MAX_VSIZE_7
    966       pFiltMag = m_aiSymmetricMag9x7;
    967 #else
    968       pFiltMag = m_aiSymmetricMag9x9;
    969 #endif
    970       assert(0);
    971       break;
    972   }
    973  
    974 #if TI_ALF_MAX_VSIZE_7
    975   Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap);
    976   N = (tap * tapV + 1) >> 1;
    977 #else
    978   N = (tap*tap+1)>>1;
    979 #endif
    980  
     3948#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     3949  Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP );
     3950#endif
     3951
     3952  N = m_sqrFiltLengthTab[tap];
     3953#if ALF_SINGLE_FILTER_SHAPE
     3954  pFiltMag = weightsShape1Sym;
     3955#else
     3956  // star shape
     3957  if(tap == 0)
     3958  {
     3959    pFiltMag = weightsShape0Sym;
     3960  }
     3961  // cross shape
     3962  else
     3963  {
     3964    pFiltMag = weightsShape1Sym;
     3965  }
     3966#endif
     3967
    9813968  dh = new Double[N];
    9823969  nc = new Int[N];
    9833970 
     3971#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS 
     3972  max_value =   (1<<(1+alfPrecisionBit))-1;
     3973  min_value = 0-(1<<(1+alfPrecisionBit));
     3974#else
    9843975  max_value =   (1<<(1+ALF_NUM_BIT_SHIFT))-1;
    9853976  min_value = 0-(1<<(1+ALF_NUM_BIT_SHIFT));
    986  
     3977#endif
     3978
    9873979  dbl_total_gain=0.0;
    9883980  q_total_gain=0;
     
    9903982  {
    9913983    if(h[i]>=0.0)
     3984    {
     3985#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     3986      qh[i] =  (Int)( h[i]*(1<<alfPrecisionBit)+0.5);
     3987#else
    9923988      qh[i] =  (Int)( h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5);
     3989#endif
     3990    }
    9933991    else
     3992    {
     3993#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     3994      qh[i] = -(Int)(-h[i]*(1<<alfPrecisionBit)+0.5);
     3995#else
    9943996      qh[i] = -(Int)(-h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5);
    995    
     3997#endif
     3998    }
     3999
     4000#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4001    dh[i] = (Double)qh[i]/(Double)(1<<alfPrecisionBit) - h[i];
     4002#else
    9964003    dh[i] = (Double)qh[i]/(Double)(1<<ALF_NUM_BIT_SHIFT) - h[i];
     4004#endif
    9974005    dh[i]*=pFiltMag[i];
    9984006    dbl_total_gain += h[i]*pFiltMag[i];
     
    10024010 
    10034011  // modification of quantized filter coefficients
     4012#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4013  total_gain = (Int)(dbl_total_gain*(1<<alfPrecisionBit)+0.5);
     4014#else
    10044015  total_gain = (Int)(dbl_total_gain*(1<<ALF_NUM_BIT_SHIFT)+0.5);
    1005  
     4016#endif 
    10064017  if( q_total_gain != total_gain )
    10074018  {
     
    10204031      {
    10214032        if(dh[N-1]>0)
     4033        {
    10224034          qh[N-1]--;
     4035        }
    10234036        else
    10244037        {
     
    10424055      {
    10434056        if(dh[N-1]<0)
     4057        {
    10444058          qh[N-1]++;
     4059        }
    10454060        else
    10464061        {
     
    10564071  for(i=0; i<N; i++)
    10574072  {
    1058     qh[i] = Max(min_value,Min(max_value, qh[i]));
    1059   }
    1060  
    1061   // DC offset
    1062   //  max_value = Min(  (1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma)))-1, (1<<14)-1);
    1063   //  min_value = Max( -(1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma))),  -(1<<14)  );
    1064   max_value = Min(  (1<<(3+g_uiBitDepth + g_uiBitIncrement))-1, (1<<14)-1);
    1065   min_value = Max( -(1<<(3+g_uiBitDepth + g_uiBitIncrement)),  -(1<<14)  );
    1066  
    1067   qh[N] =  (h[N]>=0.0)? (Int)( h[N]*(1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5) : -(Int)(-h[N]*(1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5);
    1068   qh[N] = Max(min_value,Min(max_value, qh[N]));
    1069  
     4073    qh[i] = max(min_value,min(max_value, qh[i]));
     4074  }
     4075
     4076  checkFilterCoeffValue(qh, N, true);
     4077
    10704078  delete[] dh;
    10714079  dh = NULL;
     
    10744082  nc = NULL;
    10754083}
    1076 
     4084#if !LCU_SYNTAX_ALF
    10774085Void TEncAdaptiveLoopFilter::xClearFilterCoefInt(Int* qh, Int N)
    10784086{
     
    10814089 
    10824090  // center pos
    1083   qh[N-2]  = 1<<ALF_NUM_BIT_SHIFT;
    1084 }
    1085 
    1086 Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost)
     4091  qh[N-1]  = 1<<ALF_NUM_BIT_SHIFT;
     4092}
     4093/** Calculate RD cost
     4094 * \param [in] pAlfParam ALF parameters
     4095 * \param [out] ruiRate coding bits
     4096 * \param [in] uiDist distortion
     4097 * \param [out] rdCost rate-distortion cost
     4098 * \param [in] pvAlfCUCtrlParam ALF CU control parameters
     4099 */
     4100Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam)
    10874101{
    10884102  if(pAlfParam != NULL)
    10894103  {
    1090     Int* piTmpCoef;
    1091     piTmpCoef = new Int[ALF_MAX_NUM_COEF];
    1092    
    1093     memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff);
    1094    
    1095     predictALFCoeff(pAlfParam);
    1096    
     4104    m_pcEntropyCoder->resetEntropy();
     4105    m_pcEntropyCoder->resetBits();
     4106    m_pcEntropyCoder->encodeAlfParam(pAlfParam);
     4107
     4108    ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
     4109
     4110    if(pvAlfCUCtrlParam != NULL)
     4111    {
     4112      for(UInt s=0; s< m_uiNumSlicesInPic; s++)
     4113      {
     4114        if(!m_pcPic->getValidSlice(s))
     4115        {
     4116          continue;
     4117        }
     4118        m_pcEntropyCoder->resetEntropy();
     4119        m_pcEntropyCoder->resetBits();
     4120        m_pcEntropyCoder->encodeAlfCtrlParam( (*pvAlfCUCtrlParam)[s], m_uiNumCUsInFrame);
     4121        ruiRate += m_pcEntropyCoder->getNumberOfWrittenBits();
     4122      }
     4123    }
     4124    else
     4125    {
     4126      ruiRate += m_uiNumSlicesInPic;
     4127    }
     4128  }
     4129  else
     4130  {
     4131    ruiRate = 1;
     4132  }
     4133 
     4134  rdCost      = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist);
     4135}
     4136
     4137/** Calculate RD cost
     4138 * \param [in] pcPicOrg original picture buffer
     4139 * \param [in] pcPicCmp compared picture buffer
     4140 * \param [in] pAlfParam ALF parameters
     4141 * \param [out] ruiRate coding bits
     4142 * \param [out] ruiDist distortion
     4143 * \param [out] rdCost rate-distortion cost
     4144 * \param [in] pvAlfCUCtrlParam ALF CU control parameters
     4145 */
     4146#if HHI_INTERVIEW_SKIP
     4147Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, TComPicYuv* pcUsedPelMap, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam)
     4148#else
     4149Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost, std::vector<AlfCUCtrlInfo>* pvAlfCUCtrlParam)
     4150#endif
     4151{
     4152  if(pAlfParam != NULL)
     4153  {
    10974154    m_pcEntropyCoder->resetEntropy();
    10984155    m_pcEntropyCoder->resetBits();
    10994156    m_pcEntropyCoder->encodeAlfParam(pAlfParam);
    11004157   
    1101     if(pAlfParam->cu_control_flag)
    1102     {
    1103 #if TSB_ALF_HEADER
    1104       m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
    1105 #else
    1106       xEncodeCUAlfCtrlFlags();
    1107 #endif
    1108     }
    11094158    ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    1110     memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff);
    1111     delete[] piTmpCoef;
    1112     piTmpCoef = NULL;
    1113   }
    1114   else
    1115   {
    1116     ruiRate = 1;
    1117   }
    1118  
    1119   rdCost      = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist);
    1120 }
    1121 
    1122 Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
    1123 {
    1124   if(pAlfParam != NULL)
    1125   {
    1126     Int* piTmpCoef;
    1127     piTmpCoef = new Int[ALF_MAX_NUM_COEF];
    1128    
    1129     memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff);
    1130    
    1131     predictALFCoeff(pAlfParam);
    1132    
    1133     m_pcEntropyCoder->resetEntropy();
    1134     m_pcEntropyCoder->resetBits();
    1135     m_pcEntropyCoder->encodeAlfParam(pAlfParam);
    1136    
    1137     if(pAlfParam->cu_control_flag)
    1138     {
    1139 #if TSB_ALF_HEADER
    1140       m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
    1141 #else
    1142       xEncodeCUAlfCtrlFlags();
    1143 #endif
    1144     }
    1145     ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    1146     memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff);
    1147     delete[] piTmpCoef;
    1148     piTmpCoef = NULL;
     4159
     4160    if(pvAlfCUCtrlParam != NULL)
     4161    {
     4162      for(UInt s=0; s< m_uiNumSlicesInPic; s++)
     4163      {
     4164        if(! m_pcPic->getValidSlice(s))
     4165        {
     4166          continue;
     4167        }
     4168        m_pcEntropyCoder->resetEntropy();
     4169        m_pcEntropyCoder->resetBits();
     4170        m_pcEntropyCoder->encodeAlfCtrlParam( (*pvAlfCUCtrlParam)[s], m_uiNumCUsInFrame);
     4171        ruiRate += m_pcEntropyCoder->getNumberOfWrittenBits();
     4172      }
     4173
     4174    }
     4175    else
     4176    {
     4177      ruiRate += m_uiNumSlicesInPic;
     4178    }
    11494179  }
    11504180  else
     
    11564186  rdCost      = (Double)(ruiRate) * m_dLambdaLuma + (Double)(ruiDist);
    11574187}
    1158 
     4188/** Calculate RD cost for chroma ALF
     4189 * \param pcPicOrg original picture buffer
     4190 * \param pcPicCmp compared picture buffer
     4191 * \param pAlfParam ALF parameters
     4192 * \returns ruiRate bitrate
     4193 * \returns uiDist distortion
     4194 * \returns rdCost RD cost
     4195 */
    11594196Void TEncAdaptiveLoopFilter::xCalcRDCostChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
    11604197{
    11614198  if(pAlfParam->chroma_idc)
    11624199  {
    1163     Int* piTmpCoef;
    1164     piTmpCoef = new Int[ALF_MAX_NUM_COEF_C];
    1165    
    1166     memcpy(piTmpCoef, pAlfParam->coeff_chroma, sizeof(Int)*pAlfParam->num_coeff_chroma);
    1167    
    1168     predictALFCoeffChroma(pAlfParam);
    1169    
    1170     m_pcEntropyCoder->resetEntropy();
    1171     m_pcEntropyCoder->resetBits();
    1172     m_pcEntropyCoder->encodeAlfParam(pAlfParam);
    1173    
    1174     if(pAlfParam->cu_control_flag)
    1175     {
    1176 #if TSB_ALF_HEADER
    1177       m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
    1178 #else
    1179       xEncodeCUAlfCtrlFlags();
    1180 #endif
    1181     }
    1182     ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
    1183     memcpy(pAlfParam->coeff_chroma, piTmpCoef, sizeof(int)*pAlfParam->num_coeff_chroma);
    1184     delete[] piTmpCoef;
    1185     piTmpCoef = NULL;
     4200    ruiRate = xCalcRateChroma(pAlfParam);
    11864201  }
    11874202  ruiDist = 0;
     
    11914206}
    11924207
    1193 Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
    1194 {
    1195   Int    i, tap, N, err_code;
    1196   Int* qh;
    1197  
    1198   tap  = m_pcTempAlfParam->tap_chroma;
    1199   N    = m_pcTempAlfParam->num_coeff_chroma;
    1200   qh   = m_pcTempAlfParam->coeff_chroma;
    1201  
    1202   // initialize correlation
    1203   for(i=0; i<N; i++)
    1204     memset(m_ppdAlfCorr[i], 0, sizeof(Double)*(N+1));
    1205  
    1206   if ((m_pcTempAlfParam->chroma_idc>>1)&0x01)
    1207   {
    1208     Pel* pOrg = pcPicOrg->getCbAddr();
    1209     Pel* pCmp = pcPicDec->getCbAddr();
    1210 #if MTK_NONCROSS_INLOOP_FILTER
     4208Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(ALFParam* pcAlfParam, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
     4209{
     4210  Int filtNo = pcAlfParam->filter_shape_chroma;
     4211  Int *coeff = pcAlfParam->coeff_chroma;
     4212  Int iChromaFormatShift = 1; //4:2:0
     4213
     4214  if ((pcAlfParam->chroma_idc>>1)&0x01)
     4215  {
    12114216    if(!m_bUseNonCrossALF)
    1212       xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
     4217    {
     4218      Int iStride   = pcPicRest->getCStride();
     4219      Pel* pDec  = pcPicDec->getCbAddr();
     4220      Pel* pRest = pcPicRest->getCbAddr();
     4221
     4222      filterChroma(pRest, pDec, iStride, 0, (Int)(m_img_height>>1) -1, 0, (Int)(m_img_width>>1)-1, filtNo,  coeff);
     4223    }
    12134224    else
    1214       xCalcCorrelationFuncforChromaSlices(ALF_Cb, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride());
    1215 #else       
    1216     xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride());
    1217 #endif
    1218   }
    1219   if ((m_pcTempAlfParam->chroma_idc)&0x01)
    1220   {
    1221     Pel* pOrg = pcPicOrg->getCrAddr();
    1222     Pel* pCmp = pcPicDec->getCrAddr();
    1223 #if MTK_NONCROSS_INLOOP_FILTER
     4225    {
     4226      xFilterChromaSlices(ALF_Cb, pcPicDec, pcPicRest, coeff, filtNo, iChromaFormatShift);
     4227    }
     4228  }
     4229  if ((pcAlfParam->chroma_idc)&0x01)
     4230  {
    12244231    if(!m_bUseNonCrossALF)
    1225       xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
     4232    {
     4233      Int iStride   = pcPicRest->getCStride();
     4234      Pel* pDec  = pcPicDec->getCrAddr();
     4235      Pel* pRest = pcPicRest->getCrAddr();
     4236
     4237      filterChroma(pRest, pDec, iStride, 0, (Int)(m_img_height>>1) -1, 0, (Int)(m_img_width>>1)-1, filtNo,  coeff);
     4238    }
    12264239    else
    1227       xCalcCorrelationFuncforChromaSlices(ALF_Cr, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride());
    1228 #else
    1229     xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride());
    1230 #endif
    1231   }
    1232  
    1233   err_code = xGauss(m_ppdAlfCorr, N);
    1234  
    1235   if(err_code)
    1236   {
    1237     xClearFilterCoefInt(qh, N);
    1238   }
    1239   else
    1240   {
    1241     for(i=0; i<N; i++)
    1242       m_pdDoubleAlfCoeff[i] = m_ppdAlfCorr[i][N];
    1243    
    1244     xQuantFilterCoef(m_pdDoubleAlfCoeff, qh, tap, g_uiBitDepth + g_uiBitIncrement);
    1245   }
    1246  
    1247  
    1248   if ((m_pcTempAlfParam->chroma_idc>>1)&0x01)
    1249   {
    1250 #if MTK_NONCROSS_INLOOP_FILTER
    1251     if(! m_bUseNonCrossALF)
    1252       xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 0);
    1253     else
    1254       xFrameChromaforSlices(ALF_Cb, pcPicDec, pcPicRest, qh, tap);
    1255 #else
    1256     xFrameChroma(pcPicDec, pcPicRest, qh, tap, 0);
    1257 #endif
    1258   }
    1259   if ((m_pcTempAlfParam->chroma_idc)&0x01)
    1260   {
    1261 #if MTK_NONCROSS_INLOOP_FILTER
    1262     if(! m_bUseNonCrossALF)
    1263       xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 1);
    1264     else
    1265       xFrameChromaforSlices(ALF_Cr, pcPicDec, pcPicRest, qh, tap);
    1266 #else
    1267     xFrameChroma(pcPicDec, pcPicRest, qh, tap, 1);
    1268 #endif
    1269   }
    1270  
    1271   if(m_pcTempAlfParam->chroma_idc<3)
    1272   {
    1273     if(m_pcTempAlfParam->chroma_idc==1)
     4240    {
     4241      xFilterChromaSlices(ALF_Cr, pcPicDec, pcPicRest, coeff, filtNo, iChromaFormatShift);
     4242    }
     4243  }
     4244
     4245  if(pcAlfParam->chroma_idc<3)
     4246  {
     4247    if(pcAlfParam->chroma_idc==1)
    12744248    {
    12754249      pcPicDec->copyToPicCb(pcPicRest);
    12764250    }
    1277     if(m_pcTempAlfParam->chroma_idc==2)
     4251    if(pcAlfParam->chroma_idc==2)
    12784252    {
    12794253      pcPicDec->copyToPicCr(pcPicRest);
    12804254    }
    12814255  }
    1282  
    1283 }
    1284 
     4256
     4257}
     4258#endif
     4259#if LCU_SYNTAX_ALF
     4260/** Restore the not-filtered pixels
     4261 * \param [in] imgDec picture buffer before filtering
     4262 * \param [out] imgRest picture buffer after filtering
     4263 * \param [in] stride stride size for 1-D picture memory
     4264 */
     4265Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(Pel* imgDec, Pel* imgRest, Int stride)
     4266#else
     4267/** Restore the not-filtered pixels
     4268 * \param pcPicDec picture buffer before filtering
     4269 * \param pcPicRest picture buffer after filtering
     4270 */
    12854271Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
    1286 {
     4272#endif
     4273{
     4274
     4275  if(m_uiNumSlicesInPic > 1)
     4276  {
     4277#if LCU_SYNTAX_ALF
     4278    Pel* pPicDecLuma  = imgDec;
     4279    Pel* pPicRestLuma = imgRest;
     4280#else
     4281    Pel* pPicDecLuma  = pcPicDec->getLumaAddr();
     4282    Pel* pPicRestLuma = pcPicRest->getLumaAddr();
     4283    Int  stride       = pcPicDec->getStride();
     4284#endif
     4285    UInt SUWidth      = m_pcPic->getMinCUWidth();
     4286    UInt SUHeight     = m_pcPic->getMinCUHeight();
     4287
     4288    UInt startSU, endSU, LCUX, LCUY, currSU, LPelX, TPelY;
     4289    UInt posOffset;
     4290    Pel *pDec, *pRest;
     4291
     4292    for(Int s=0; s< m_uiNumSlicesInPic; s++)
     4293    {
     4294      if(!m_pcPic->getValidSlice(s))
     4295      {
     4296        continue;
     4297      }
     4298      std::vector< AlfLCUInfo* >&  vpSliceAlfLCU = m_pvpAlfLCU[s];
     4299      for(Int i=0; i< vpSliceAlfLCU.size(); i++)
     4300      {
     4301        AlfLCUInfo& rAlfLCU    = *(vpSliceAlfLCU[i]);
     4302        TComDataCU* pcCU       = rAlfLCU.pcCU;
     4303        startSU                = rAlfLCU.startSU;
     4304        endSU                  = rAlfLCU.endSU;
     4305        LCUX                 = pcCU->getCUPelX();
     4306        LCUY                 = pcCU->getCUPelY();
     4307
     4308        for(currSU= startSU; currSU<= endSU; currSU++)
     4309        {
     4310          LPelX   = LCUX + g_auiRasterToPelX[ g_auiZscanToRaster[currSU] ];
     4311          TPelY   = LCUY + g_auiRasterToPelY[ g_auiZscanToRaster[currSU] ];
     4312          if( !( LPelX < m_img_width )  || !( TPelY < m_img_height )  )
     4313          {
     4314            continue;
     4315          }
     4316          if(!pcCU->getAlfCtrlFlag(currSU))
     4317          {
     4318            posOffset = TPelY*stride + LPelX;
     4319            pDec = pPicDecLuma + posOffset;
     4320            pRest= pPicRestLuma+ posOffset;
     4321            for(Int y=0; y< SUHeight; y++)
     4322            {
     4323              ::memcpy(pRest, pDec, sizeof(Pel)*SUWidth);
     4324              pDec += stride;
     4325              pRest+= stride;
     4326            }
     4327          }
     4328        }
     4329      }
     4330    }
     4331    return;
     4332  }
     4333
    12874334  for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
    12884335  {
    12894336    TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
     4337#if LCU_SYNTAX_ALF
     4338    xCopyDecToRestCU(pcCU, 0, 0, imgDec, imgRest, stride);
     4339#else
    12904340    xCopyDecToRestCU(pcCU, 0, 0, pcPicDec, pcPicRest);
    1291   }
    1292 }
    1293 
     4341#endif
     4342  }
     4343}
     4344
     4345#if LCU_SYNTAX_ALF
     4346Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, Pel* imgDec, Pel* imgRest, Int stride)
     4347#else
    12944348Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
     4349#endif
    12954350{
    12964351  Bool bBoundary = false;
     
    13004355  UInt uiBPelY   = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
    13014356 
    1302   if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
     4357  if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) || ( uiBPelY >= pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
    13034358  {
    13044359    bBoundary = true;
     
    13134368      uiTPelY   = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
    13144369     
    1315       if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )     
     4370      if( ( uiLPelX < pcCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )     
     4371#if LCU_SYNTAX_ALF
     4372        xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, imgDec, imgRest, stride);
     4373#else
    13164374        xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, pcPicDec, pcPicRest);
     4375#endif
    13174376    }
    13184377    return;
     
    13214380  if (!pcCU->getAlfCtrlFlag(uiAbsPartIdx))
    13224381  {
     4382#if !LCU_SYNTAX_ALF
    13234383    UInt uiCUAddr = pcCU->getAddr();
    1324    
     4384#endif   
    13254385    Int iWidth = pcCU->getWidth(uiAbsPartIdx);
    13264386    Int iHeight = pcCU->getHeight(uiAbsPartIdx);
    1327    
     4387#if LCU_SYNTAX_ALF
     4388    copyPixelsInOneRegion(imgRest, imgDec, stride, (Int)uiTPelY, iHeight, (Int)uiLPelX, iWidth);
     4389#else
    13284390    Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx);
    13294391    Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx);
     
    13414403      pFilt += iFiltStride;
    13424404    }
    1343   }
    1344 }
    1345 
    1346 Void TEncAdaptiveLoopFilter::xcollectStatCodeFilterCoeffForce0(int **pDiffQFilterCoeffIntPP, int fl, int sqrFiltLength,
    1347                                                                int filters_per_group, int bitsVarBin[])
    1348 {
    1349   int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal,
    1350   *pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
    1351   minKStart, minBitsKStart, bitsKStart;
    1352  
    1353   pDepthInt=pDepthIntTab[fl-2];
    1354  
    1355   maxScanVal=0;
    1356   for (i=0; i<sqrFiltLength; i++)
    1357   {
    1358     maxScanVal=max(maxScanVal, pDepthInt[i]);
    1359   }
    1360  
    1361   // vlc for all
    1362   memset(bitsCoeffScan, 0, MAX_SCAN_VAL * MAX_EXP_GOLOMB * sizeof(int));
    1363   for(ind=0; ind<filters_per_group; ++ind)
    1364   {
    1365     for(i = 0; i < sqrFiltLength; i++)
    1366     {     
    1367       scanPos=pDepthInt[i]-1;
    1368       coeffVal=abs(pDiffQFilterCoeffIntPP[ind][i]);
    1369       for (k=1; k<15; k++)
    1370       {
    1371         bitsCoeffScan[scanPos][k] += lengthGolomb(coeffVal, k);
    1372       }
    1373     }
    1374   }
    1375  
    1376   minBitsKStart=0;
    1377   minKStart = -1;
    1378   for (k=1; k<8; k++)
    1379   {
    1380     bitsKStart=0; kStart=k;
    1381     for (scanPos=0; scanPos<maxScanVal; scanPos++)
    1382     {
    1383       kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin];
    1384      
    1385       if (bitsCoeffScan[scanPos][kStart+1]<minBits)
    1386       {
    1387         kMin=kStart+1; minBits=bitsCoeffScan[scanPos][kMin];
    1388       }
    1389       kStart=kMin;
    1390       bitsKStart+=minBits;
    1391     }
    1392     if (bitsKStart<minBitsKStart || k==1)
    1393     {
    1394       minBitsKStart=bitsKStart;
    1395       minKStart=k;
    1396     }
    1397   }
    1398  
    1399   kStart = minKStart;
    1400   for (scanPos=0; scanPos<maxScanVal; scanPos++)
    1401   {
    1402     kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin];
    1403    
    1404     if (bitsCoeffScan[scanPos][kStart+1]<minBits)
    1405     {
    1406       kMin = kStart+1;
    1407       minBits = bitsCoeffScan[scanPos][kMin];
    1408     }
    1409    
    1410     kMinTab[scanPos] = kMin;
    1411     kStart = kMin;
    1412   }
    1413  
    1414   for(ind=0; ind<filters_per_group; ++ind)
    1415   {
    1416     bitsVarBin[ind]=0;
    1417     for(i = 0; i < sqrFiltLength; i++)
    1418     {
    1419       scanPos=pDepthInt[i]-1;
    1420       bitsVarBin[ind] += lengthGolomb(abs(pDiffQFilterCoeffIntPP[ind][i]), kMinTab[scanPos]);
    1421     }
    1422   }
    1423 }
    1424 
    1425 Void TEncAdaptiveLoopFilter::xdecideCoeffForce0(int codedVarBins[NO_VAR_BINS], double errorForce0Coeff[], double errorForce0CoeffTab[NO_VAR_BINS][2], int bitsVarBin[NO_VAR_BINS], double lambda, int filters_per_fr)
    1426 {
    1427   int filtNo;
    1428   double lagrangianDiff;
    1429   int ind;
    1430  
    1431   errorForce0Coeff[0]=errorForce0Coeff[1]=0;
    1432   for (ind=0; ind<16; ind++) codedVarBins[ind]=0;
    1433  
    1434   for(filtNo=0; filtNo<filters_per_fr; filtNo++)
    1435   {
    1436     // No coeffcient prediction bits used
    1437 #if ENABLE_FORCECOEFF0
    1438     lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]);
    1439     codedVarBins[filtNo]=(lagrangianDiff>0)? 1 : 0;
    1440     errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]];
    1441     errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1];
    1442 #else
    1443     lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]);
    1444     codedVarBins[filtNo]= 1;
    1445     errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]];
    1446     errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1];
    1447 #endif
    1448   }   
    1449 }
    1450 
    1451 double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int codedVarBins[NO_VAR_BINS], int *forceCoeff0,
    1452                                                        int **filterCoeffSymQuant, int fl, int sqrFiltLength,
    1453                                                        int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2],
    1454                                                        double *errorQuant, double lambda)
    1455 
    1456 {
    1457   int bitsVarBin[NO_VAR_BINS], createBistream, coeffBits, coeffBitsForce0;
    1458   double errorForce0Coeff[2], lagrangianForce0, lagrangian;
    1459  
    1460   xcollectStatCodeFilterCoeffForce0(filterCoeffSymQuant, fl, sqrFiltLength, 
    1461                                     filters_per_fr, bitsVarBin);
    1462  
    1463   xdecideCoeffForce0(codedVarBins, errorForce0Coeff, errorForce0CoeffTab, bitsVarBin, lambda, filters_per_fr);
    1464  
    1465   coeffBitsForce0 = xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
    1466                                                 filters_per_fr, codedVarBins, createBistream=0, m_tempALFp);
    1467  
    1468   coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength, filters_per_fr,
    1469                                     createBistream=0, m_tempALFp);
    1470  
    1471   lagrangianForce0=errorForce0Coeff[0]+lambda*coeffBitsForce0;
    1472   lagrangian=errorForce0Coeff[1]+lambda*coeffBits;
    1473   if (lagrangianForce0<lagrangian)
    1474   {
    1475     *errorQuant=errorForce0Coeff[0];
    1476     *forceCoeff0=1;
    1477     return(lagrangianForce0);
    1478   }
    1479   else
    1480   {
    1481     *errorQuant=errorForce0Coeff[1];
    1482     *forceCoeff0=0;
    1483     return(lagrangian);
     4405#endif
     4406  }
     4407}
     4408
     4409double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int **filterCoeffSymQuant, int filter_shape, int sqrFiltLength, int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2],
     4410  double lambda)
     4411{
     4412  Int coeffBits, i;
     4413  Double error=0, lagrangian;
     4414  coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, filter_shape, sqrFiltLength, filters_per_fr,
     4415    0, m_tempALFp);
     4416  for(i=0;i<filters_per_fr;i++)
     4417  {
     4418    error += errorForce0CoeffTab[i][1];
     4419  }
     4420  lagrangian = error + lambda * coeffBits;
     4421  return (lagrangian);
     4422}
     4423
     4424/** Predict ALF luma filter coefficients. Centre coefficient is always predicted. Determines if left neighbour should be predicted.
     4425 */
     4426Void TEncAdaptiveLoopFilter::predictALFCoeffLumaEnc(ALFParam* pcAlfParam, Int **pfilterCoeffSym, Int filter_shape)
     4427{
     4428#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4429  Int alfPrecisionBit = getAlfPrecisionBit( m_alfQP );
     4430#endif
     4431  Int sum, coeffPred, ind;
     4432  const Int* pFiltMag = NULL;
     4433  pFiltMag = weightsTabShapes[filter_shape];
     4434  for(ind = 0; ind < pcAlfParam->filters_per_group; ++ind)
     4435  {
     4436    sum = 0;
     4437    for(Int i = 0; i < pcAlfParam->num_coeff-2; i++)
     4438    {
     4439      sum +=  pFiltMag[i]*pfilterCoeffSym[ind][i];
     4440    }
     4441
     4442    if((pcAlfParam->predMethod==0)|(ind==0))
     4443    {
     4444#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4445      coeffPred = ((1<<alfPrecisionBit)-sum) >> 2;
     4446#else
     4447      coeffPred = ((1<<ALF_NUM_BIT_SHIFT)-sum) >> 2;
     4448#endif
     4449    }
     4450    else
     4451    {
     4452      coeffPred = (0-sum) >> 2;
     4453    }
     4454    if(abs(pfilterCoeffSym[ind][pcAlfParam->num_coeff-2]-coeffPred) < abs(pfilterCoeffSym[ind][pcAlfParam->num_coeff-2]))
     4455    {
     4456      pcAlfParam->nbSPred[ind] = 0;
     4457    }
     4458    else
     4459    {
     4460      pcAlfParam->nbSPred[ind] = 1;
     4461      coeffPred = 0;
     4462    }
     4463    sum += pFiltMag[pcAlfParam->num_coeff-2]*pfilterCoeffSym[ind][pcAlfParam->num_coeff-2];
     4464    pfilterCoeffSym[ind][pcAlfParam->num_coeff-2] -= coeffPred;
     4465    if((pcAlfParam->predMethod==0)|(ind==0))
     4466    {
     4467#if LCU_SYNTAX_ALF && LCUALF_QP_DEPENDENT_BITS
     4468      coeffPred = (1<<alfPrecisionBit)-sum;
     4469#else
     4470      coeffPred = (1<<ALF_NUM_BIT_SHIFT)-sum;
     4471#endif
     4472    }
     4473    else
     4474    {
     4475      coeffPred = -sum;
     4476    }
     4477    pfilterCoeffSym[ind][pcAlfParam->num_coeff-1] -= coeffPred;
    14844478  }
    14854479}
     
    14934487  Int64 Newbit_ct;
    14944488 
    1495   bit_ct0 = xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0);
    1496  
     4489  for(ind = 0; ind < filters_per_group; ind++)
     4490  {
     4491    for(i = 0; i < sqrFiltLength; i++)
     4492    {
     4493      m_FilterCoeffQuantTemp[ind][i]=FilterCoeffQuant[ind][i];
     4494    }
     4495  }
     4496  ALFp->filters_per_group = filters_per_group;
     4497  ALFp->predMethod = 0;
     4498  ALFp->num_coeff = sqrFiltLength;
     4499  predictALFCoeffLumaEnc(ALFp, m_FilterCoeffQuantTemp, fl);
     4500  Int nbFlagIntra[16];
     4501  for(ind = 0; ind < filters_per_group; ind++)
     4502  {
     4503    nbFlagIntra[ind] = ALFp->nbSPred[ind];
     4504  }
     4505  bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group, 0);
    14974506  for(ind = 0; ind < filters_per_group; ++ind)
    14984507  {
     
    15084517    }
    15094518  }
     4519  ALFp->predMethod = 1;
     4520  predictALFCoeffLumaEnc(ALFp, m_diffFilterCoeffQuant, fl);
    15104521 
    15114522  if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0) >= bit_ct0)
     
    15134524    predMethod = 0; 
    15144525    if(filters_per_group > 1)
     4526    {
    15154527      bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream);
    1516     bit_ct += xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream);
     4528    }
     4529    bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group, createBistream);
    15174530  }
    15184531  else
     
    15204533    predMethod = 1;
    15214534    if(filters_per_group > 1)
     4535    {
    15224536      bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream);
     4537    }
    15234538    bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream);
    15244539  }
    1525  
    1526   ALFp->forceCoeff0 = 0;
    1527   ALFp->filters_per_group_diff = filters_per_group;
    15284540  ALFp->filters_per_group = filters_per_group;
    15294541  ALFp->predMethod = predMethod;
    15304542  ALFp->num_coeff = sqrFiltLength;
    1531   if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM)
    1532     ALFp->realfiltNo=2;
    1533   else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM)
    1534     ALFp->realfiltNo=1;
    1535   else
    1536     ALFp->realfiltNo=0;
    1537  
     4543  ALFp->filter_shape = fl;
    15384544  for(ind = 0; ind < filters_per_group; ++ind)
    15394545  {
     
    15414547    {
    15424548      if (predMethod) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i];
    1543       else ALFp->coeffmulti[ind][i] = FilterCoeffQuant[ind][i];
    1544     }
    1545   }
    1546   m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
    1547  
     4549      else
     4550      {
     4551        ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i];
     4552      }
     4553    }
     4554    if(predMethod==0)
     4555    {
     4556      ALFp->nbSPred[ind] = nbFlagIntra[ind];
     4557    }
     4558  }
     4559  m_pcEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
    15484560 
    15494561  //  return(bit_ct);
     
    15514563}
    15524564
    1553 
    1554 Int TEncAdaptiveLoopFilter::xsendAllFiltersPPPredForce0(int **FilterCoeffQuant, int fl, int sqrFiltLength, int filters_per_group,
    1555                                                         int codedVarBins[NO_VAR_BINS], int createBistream, ALFParam* ALFp)
    1556 {
    1557   int ind, bit_ct=0, bit_ct0, i, j;
    1558   int filters_per_group_temp, filters_per_group_diff;
    1559   int chosenPred = 0;
    1560   int force0 = 1;
    1561   Int64 Newbit_ct;
    1562  
    1563   i = 0;
    1564   for(ind = 0; ind < filters_per_group; ind++)
    1565   {
    1566     if(codedVarBins[ind] == 1)
    1567     {
    1568       for(j = 0; j < sqrFiltLength; j++)
    1569         m_FilterCoeffQuantTemp[i][j]=FilterCoeffQuant[ind][j];
    1570       i++;
    1571     }
    1572   }
    1573   filters_per_group_diff = filters_per_group_temp = i;
    1574  
    1575   for(ind = 0; ind < filters_per_group; ++ind)
    1576   {
    1577     if(ind == 0)
    1578     {
    1579       for(i = 0; i < sqrFiltLength; i++)
    1580         m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i];
    1581     }
    1582     else
    1583     {
    1584       for(i = 0; i < sqrFiltLength; i++)
    1585         m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i] - m_FilterCoeffQuantTemp[ind-1][i];
    1586     }
    1587   }
    1588  
    1589   if(!((filters_per_group_temp == 0) && (filters_per_group == 1)))
    1590   {
    1591     bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, 0);
    1592    
    1593     if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_diff, 0) >= bit_ct0)
    1594     {
    1595       chosenPred = 0;
    1596       bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream);
    1597       bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, createBistream);
    1598     }
    1599     else
    1600     {
    1601       chosenPred = 1;
    1602       bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream);
    1603       bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_temp, createBistream);
    1604     }
    1605   }
    1606   ALFp->forceCoeff0 = 1;
    1607   ALFp->predMethod = chosenPred;
    1608   ALFp->filters_per_group_diff = filters_per_group_diff;
    1609   ALFp->filters_per_group = filters_per_group;
    1610   ALFp->num_coeff = sqrFiltLength;
    1611   if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM)
    1612     ALFp->realfiltNo=2;
    1613   else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM)
    1614     ALFp->realfiltNo=1;
    1615   else
    1616     ALFp->realfiltNo=0;
    1617  
    1618   for(ind = 0; ind < filters_per_group; ++ind)
    1619   {
    1620     ALFp->codedVarBins[ind] = codedVarBins[ind];
    1621   }
    1622   for(ind = 0; ind < filters_per_group_diff; ++ind)
    1623   {
    1624     for(i = 0; i < sqrFiltLength; i++)
    1625     {
    1626       if (chosenPred) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i];
    1627       else ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i];
    1628     }
    1629   }
    1630   m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
    1631  
    1632   return ((Int)Newbit_ct);
    1633 }
    1634 
    1635 //filtNo==-1/realfiltNo, noFilters=filters_per_frames, realfiltNo=filtNo
    1636 Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filtNo, int noFilters, int varIndTab[NO_VAR_BINS], int frNo, int createBitstream,int realfiltNo, ALFParam* ALFp)
    1637 {
    1638   int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0, codePrediction;
     4565Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filters_per_fr, int varIndTab[NO_VAR_BINS], int filter_shape, ALFParam* ALFp)
     4566{
     4567  int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0;
    16394568  Int64 NewbitCt;
    1640  
    1641   codePrediction = 0;
    1642  
     4569
    16434570  //send realfiltNo (tap related)
    1644   ALFp->realfiltNo = realfiltNo;
    1645   ALFp->filtNo = filtNo;
    1646  
    1647   if(filtNo >= 0)
    1648   {
    1649     // decide startSecondFilter and filterPattern
    1650     if(noFilters > 1)
    1651     {
    1652       memset(filterPattern, 0, NO_VAR_BINS * sizeof(int));
    1653       for(i = 1; i < NO_VAR_BINS; ++i)
    1654       {
    1655         if(varIndTab[i] != varIndTab[i-1])
    1656         {
    1657           filterPattern[i] = 1;
    1658           startSecondFilter = i;
    1659         }
    1660       }
    1661       memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int));
    1662       ALFp->startSecondFilter = startSecondFilter;
    1663     }
    1664    
    1665     //send noFilters (filters_per_frame)
    1666     //0: filters_per_frame = 1
    1667     //1: filters_per_frame = 2
    1668     //2: filters_per_frame > 2 (exact number from filterPattern)
    1669 
    1670     ALFp->noFilters = min(noFilters-1,2);
    1671     if (noFilters<=0) printf("error\n");
    1672   }
    1673   m_pcDummyEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt);
     4571  ALFp->filter_shape = filter_shape;
     4572
     4573  // decide startSecondFilter and filterPattern
     4574  memset(filterPattern, 0, NO_VAR_BINS * sizeof(int));
     4575  if(filters_per_fr > 1)
     4576  {
     4577    for(i = 1; i < NO_VAR_BINS; ++i)
     4578    {
     4579      if(varIndTab[i] != varIndTab[i-1])
     4580      {
     4581        filterPattern[i] = 1;
     4582        startSecondFilter = i;
     4583      }
     4584    }
     4585  }
     4586  memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int));
     4587  ALFp->startSecondFilter = startSecondFilter;
     4588
     4589  assert(filters_per_fr>0);
     4590  m_pcEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt);
     4591
    16744592  bitCt = (int) NewbitCt;
    16754593  return(bitCt);
     
    16804598{
    16814599  int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal, len = 0,
    1682   *pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
     4600    *pDepthInt=NULL, kMinTab[MAX_SCAN_VAL], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
    16834601  minKStart, minBitsKStart, bitsKStart;
    1684  
    1685   pDepthInt = pDepthIntTab[fl-2];
     4602#if ALF_SINGLE_FILTER_SHAPE
     4603  Int minScanVal = MIN_SCAN_POS_CROSS;
     4604#else 
     4605  int minScanVal = (fl==ALF_STAR5x5) ? 0 : MIN_SCAN_POS_CROSS;
     4606#endif
     4607  pDepthInt = pDepthIntTabShapes[fl];
    16864608 
    16874609  maxScanVal = 0;
     
    17124634    bitsKStart = 0;
    17134635    kStart = k;
    1714     for(scanPos = 0; scanPos < maxScanVal; scanPos++)
     4636    for(scanPos = minScanVal; scanPos < maxScanVal; scanPos++)
    17154637    {
    17164638      kMin = kStart;
     
    17334655 
    17344656  kStart = minKStart;
    1735   for(scanPos = 0; scanPos < maxScanVal; scanPos++)
     4657  for(scanPos = minScanVal; scanPos < maxScanVal; scanPos++)
    17364658  {
    17374659    kMin = kStart;
     
    17504672  // Coding parameters
    17514673  //  len += lengthFilterCodingParams(minKStart, maxScanVal, kMinTab, createBitstream);
     4674#if LCU_SYNTAX_ALF
     4675  if (filters_per_group == 1)
     4676  {
     4677    len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP,
     4678      kTableTabShapes[ALF_CROSS9x7_SQUARE3x3], createBitstream);
     4679  }
     4680  else
     4681  {
     4682#endif
    17524683  len += (3 + maxScanVal);
    17534684 
     
    17554686  len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP,
    17564687                            kMinTab, createBitstream);
    1757  
     4688#if LCU_SYNTAX_ALF
     4689  }
     4690#endif
     4691
    17584692  return len;
    17594693}
     
    17644698  int q = coeffVal / m;
    17654699  if(coeffVal != 0)
     4700  {
    17664701    return(q + 2 + k);
     4702  }
    17674703  else
     4704  {
    17684705    return(q + 1 + k);
     4706  }
    17694707}
    17704708
     
    17754713 
    17764714  if(force0)
     4715  {
    17774716    bit_cnt = 2 + filters_per_group;
     4717  }
    17784718  else
     4719  {
    17794720    bit_cnt = 2;
     4721  }
    17804722  return bit_cnt;
    17814723 
     
    17934735    {
    17944736      scanPos = pDepthInt[i] - 1;
     4737#if LCU_SYNTAX_ALF
     4738      Int k = (filters_per_group == 1) ? kMinTab[i] : kMinTab[scanPos];
     4739      bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), k);
     4740#else
    17954741      bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), kMinTab[scanPos]);
     4742#endif
    17964743    }
    17974744  }
     
    17994746}
    18004747
    1801 Void   TEncAdaptiveLoopFilter::xEncALFLuma_qc ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost )
     4748#if !LCU_SYNTAX_ALF
     4749
     4750Void   TEncAdaptiveLoopFilter::xEncALFLuma ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost )
    18024751{
    18034752  //pcPicDec: extended decoded
    18044753  //pcPicRest: original decoded: filtered signal will be stored
    1805  
     4754
    18064755  UInt64  uiRate;
    18074756  UInt64  uiDist;
    18084757  Double dCost;
    1809 #if !MQT_ALF_NPASS
    1810   Int    Height = pcPicOrg->getHeight();
    1811   Int    Width = pcPicOrg->getWidth();
    1812 #endif
    18134758  Int    LumaStride = pcPicOrg->getStride();
    1814   imgpel* pOrg = (imgpel*) pcPicOrg->getLumaAddr();
    1815   imgpel* pRest = (imgpel*) pcPicRest->getLumaAddr();
    1816   imgpel* pDec = (imgpel*) pcPicDec->getLumaAddr();
    1817 
    1818   Int tap               = ALF_MIN_NUM_TAP;
    1819   m_pcTempAlfParam->tap = tap;
    1820 #if TI_ALF_MAX_VSIZE_7
    1821   m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap);
    1822   m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap);
    1823 #else
    1824   m_pcTempAlfParam->num_coeff = (Int)tap*tap/4 + 2;
    1825 #endif
    1826 
    1827 #if MQT_BA_RA
    1828 
    1829 #if MQT_ALF_NPASS
    1830 
    1831   static Bool   bFirst = true;
    1832   static Int*   apiVarIndTabBest[NUM_ALF_CLASS_METHOD];
    1833   static Int**  appiBestCoeffSet[NUM_ALF_CLASS_METHOD];
    1834 
    1835   static Double***  adBestySym;
    1836   static Double**** adBestESym;
    1837   static Double**   adBestpixAcc; 
    1838 
    1839   if(bFirst)
    1840   {
    1841     if(m_iALFEncodePassReduction)
    1842     {
    1843       initMatrix4D_double(&adBestESym,NUM_ALF_CLASS_METHOD,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
    1844       initMatrix3D_double(&adBestySym,NUM_ALF_CLASS_METHOD,  NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
    1845       initMatrix_double  (&adBestpixAcc,NUM_ALF_CLASS_METHOD,  NO_VAR_BINS );
    1846 
    1847       for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    1848       {
    1849 
    1850         apiVarIndTabBest[i] = new Int[NO_VAR_BINS];
    1851         appiBestCoeffSet[i] = new Int*[NO_VAR_BINS];
    1852         for(Int j=0; j< NO_VAR_BINS; j++)
    1853         {
    1854           appiBestCoeffSet[i][j]= new Int[MAX_SQR_FILT_LENGTH];
    1855         }
    1856       }
    1857     }
    1858 
    1859     bFirst = false;
    1860   }
    1861 
    1862   Int         ibestfiltNo[NUM_ALF_CLASS_METHOD];
    1863   Int         ibestfilters_per_fr[NUM_ALF_CLASS_METHOD];
    1864   Int64       iDist;
    1865   Int64       iMinMethodDist = MAX_INT;
    1866   UInt64      uiMinMethodRate;
    1867   Double      dMinMethodCost = MAX_DOUBLE;
    1868 #endif
     4759  Pel* pOrg  = pcPicOrg->getLumaAddr();
     4760  Pel* pRest = pcPicRest->getLumaAddr();
     4761  Pel* pDec  = pcPicDec->getLumaAddr();
     4762
     4763  Double    dMinMethodCost  = MAX_DOUBLE;
     4764  UInt64    uiMinMethodDist = MAX_UINT;
     4765  UInt64    uiMinMethodRate = MAX_UINT;
    18694766  Int       iBestClassMethod = ALF_RA;
    18704767  Double    adExtraCostReduction[NUM_ALF_CLASS_METHOD];
     
    18794776    pcAlfParam->alf_flag        = 1;
    18804777    pcAlfParam->chroma_idc      = 0;
    1881     pcAlfParam->cu_control_flag = 0;
    1882     pcAlfParam->tap = tap;
    1883 #if TI_ALF_MAX_VSIZE_7
    1884     pcAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);
    1885     pcAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);
    1886 #else
    1887     pcAlfParam->num_coeff = (Int)tap*tap/4 + 2;
    1888 #endif
    18894778
    18904779    switch(i)
     
    18924781    case ALF_RA:
    18934782      {
    1894         adExtraCostReduction[i] = (double)(m_im_height * m_im_width) * m_dLambdaLuma * 2.0 / 4096.0;
     4783        adExtraCostReduction[i] = (double)(m_img_height * m_img_width) * m_dLambdaLuma * 2.0 / 4096.0;
     4784      }
     4785      break;
     4786    case ALF_BA:
     4787      {
     4788        adExtraCostReduction[i] = 0.0;
    18954789      }
    18964790      break;
    18974791    default:
    18984792      {
    1899         adExtraCostReduction[i] = 0.0;
    1900       }
    1901       break;
    1902     }
    1903 
    1904   }
     4793        printf("Not a support adaptation method\n");
     4794        assert(0);
     4795        exit(-1);
     4796      }
     4797    }
     4798  }
     4799
    19054800
    19064801  for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    19074802  {
    1908     pcAlfParam       = &(cFrmAlfParam[i]);
    1909     m_varImg         = m_varImgMethods[i];
    1910     m_uiVarGenMethod = pcAlfParam->alf_pcr_region_flag = i;
    1911 #if MQT_ALF_NPASS
    1912     if(m_iALFEncodePassReduction)
    1913     {
    1914       m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[m_uiVarGenMethod];
    1915     }
     4803    m_uiVarGenMethod = i;
     4804
     4805    pcAlfParam       = &(cFrmAlfParam[m_uiVarGenMethod]);
     4806    m_varImg         = m_varImgMethods[m_uiVarGenMethod];
     4807
     4808    pcAlfParam->alf_pcr_region_flag = m_uiVarGenMethod;
     4809
    19164810    setInitialMask(pcPicOrg, pcPicDec);
    1917 #else
    1918     for (Int i=0; i<Height; i++)
    1919     {
    1920       for (Int j=0; j<Width; j++)
    1921       {
    1922         m_maskImg[i][j] = 1;
    1923       }
    1924     }
    1925 #if MTK_NONCROSS_INLOOP_FILTER
    1926     if(!m_bUseNonCrossALF)
    1927       calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
     4811
     4812    if(m_iALFEncodePassReduction == 0)
     4813    {
     4814      static Int best_filter_shape = 0;
     4815      if (m_uiVarGenMethod == 0)
     4816      {
     4817        UInt64 MinRate_Shape0 = MAX_INT;
     4818        UInt64 MinDist_Shape0 = MAX_INT;
     4819        Double MinCost_Shape0 = MAX_DOUBLE;
     4820
     4821        UInt64 MinRate_Shape1 = MAX_INT;
     4822        UInt64 MinDist_Shape1 = MAX_INT;
     4823        Double MinCost_Shape1 = MAX_DOUBLE;
     4824
     4825#if ALF_SINGLE_FILTER_SHAPE
     4826        Int filter_shape = 0;
     4827#else       
     4828        for (Int filter_shape = 0; filter_shape < 2 ;filter_shape ++)
     4829#endif
     4830        {
     4831          pcAlfParam->filter_shape = filter_shape;
     4832          pcAlfParam->num_coeff = m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[filter_shape];
     4833          xFirstFilteringFrameLuma(pOrg, pDec, m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->filter_shape, LumaStride);
     4834          xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
     4835          if (filter_shape == 0)
     4836          {
     4837            // copy Shape0
     4838            MinRate_Shape0 = uiRate;
     4839            MinDist_Shape0 = uiDist;
     4840            MinCost_Shape0 = dCost;
     4841            m_pcPicYuvTmp->copyToPicLuma(pcPicYuvRecShape0);
     4842            copyALFParam(pcAlfParamShape0, pcAlfParam);
     4843          }
     4844          else //if (filter_shape == 1)
     4845          {
     4846            // copy Shape1
     4847            MinRate_Shape1 = uiRate;
     4848            MinDist_Shape1 = uiDist;
     4849            MinCost_Shape1  = dCost;
     4850            m_pcPicYuvTmp->copyToPicLuma(pcPicYuvRecShape1);
     4851            copyALFParam(pcAlfParamShape1, pcAlfParam);
     4852          }
     4853        }
     4854
     4855        if (MinCost_Shape0 <= MinCost_Shape1)
     4856        {
     4857          pcPicYuvRecShape0->copyToPicLuma(m_pcPicYuvTmp);
     4858          copyALFParam(pcAlfParam, pcAlfParamShape0);
     4859          uiRate = MinRate_Shape0;
     4860          uiDist = MinDist_Shape0;
     4861          dCost = MinCost_Shape0;
     4862          best_filter_shape = 0;
     4863        }
     4864        else //if (MinCost_Shape1 < MinCost_Shape0)
     4865        {
     4866          pcPicYuvRecShape1->copyToPicLuma(m_pcPicYuvTmp);
     4867          copyALFParam(pcAlfParam, pcAlfParamShape1);
     4868          uiRate = MinRate_Shape1;
     4869          uiDist = MinDist_Shape1;
     4870          dCost = MinCost_Shape1;
     4871          best_filter_shape = 1;
     4872        }
     4873      }
     4874      else
     4875      {
     4876        pcAlfParam->filter_shape = best_filter_shape;
     4877        pcAlfParam->num_coeff = m_pcTempAlfParam->num_coeff = m_sqrFiltLengthTab[best_filter_shape];
     4878        xFirstFilteringFrameLuma(pOrg, pDec, m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, best_filter_shape, LumaStride);
     4879        xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
     4880      }
     4881    }
    19284882    else
    1929       calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
    1930 #else
    1931     calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
    1932 #endif
    1933 #endif
    1934 
    1935 #if MQT_ALF_NPASS
    1936     if(m_iALFEncodePassReduction)
    1937     {
    1938       xFirstEstimateFilteringFrameLumaAllTap(pOrg, pDec, LumaStride,
    1939         pcAlfParam, apiVarIndTabBest[i], appiBestCoeffSet[i],
    1940         ibestfiltNo[i], ibestfilters_per_fr[i],
    1941         adBestySym[i], adBestESym[i], adBestpixAcc[i],
    1942         uiRate, iDist, dCost);
    1943 
    1944     }
    1945     else
    1946     {
    1947 #endif
    1948       xFirstFilteringFrameLuma(pOrg, pDec, (imgpel*)m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->tap, LumaStride);
    1949 #if MQT_ALF_NPASS
    1950     }
    1951 #endif
    1952 
    1953 #if MQT_ALF_NPASS
    1954     if(!m_iALFEncodePassReduction)
    1955     {
    1956 #endif
    1957       xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
    1958 #if MQT_ALF_NPASS
    1959       iDist = (Int64)uiDist;
    1960     }
    1961 #endif
    1962 
    1963     dCost -= adExtraCostReduction[i];
     4883    {
     4884      decideFilterShapeLuma(pOrg, pDec, LumaStride, pcAlfParam, uiRate, uiDist, dCost);
     4885    }
     4886
     4887    dCost -= adExtraCostReduction[m_uiVarGenMethod];
    19644888
    19654889    if(dCost < dMinMethodCost)
    19664890    {
    1967       iBestClassMethod = i;
     4891      iBestClassMethod = m_uiVarGenMethod;
    19684892      dMinMethodCost = dCost;
    19694893      uiMinMethodRate= uiRate;
    1970       iMinMethodDist = iDist;
    1971 #if MQT_ALF_NPASS
    1972       if(!m_iALFEncodePassReduction)
    1973       {
    1974 #endif
     4894      uiMinMethodDist = uiDist;
     4895
     4896      if(m_iALFEncodePassReduction == 0)
     4897      {
    19754898        m_pcPicYuvTmp->copyToPicLuma(pcPicRest);
    1976 #if MQT_ALF_NPASS
    1977       }
    1978 #endif
    1979 
     4899      }
    19804900    } 
    1981 
    1982   }
    1983 
    1984   dMinMethodCost += adExtraCostReduction[iBestClassMethod];
    1985 
    1986 
    1987   m_varImg= m_varImgMethods[iBestClassMethod];
     4901  }
    19884902
    19894903  m_uiVarGenMethod = iBestClassMethod;
    1990 
    1991 #if MQT_ALF_NPASS
    1992   if(m_iALFEncodePassReduction)
    1993   {
    1994 
    1995     m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[iBestClassMethod];
    1996 
    1997     setInitialMask(pcPicOrg, pcPicDec);
    1998 
    1999     m_pcBestAlfParam->alf_flag = 1;
    2000     m_pcBestAlfParam->cu_control_flag = 0;
    2001     m_pcBestAlfParam->chroma_idc = 0;
    2002     m_pcBestAlfParam->alf_pcr_region_flag = iBestClassMethod;
    2003 
    2004     m_pcBestAlfParam->tap = cFrmAlfParam[iBestClassMethod].tap;
    2005 #if TI_ALF_MAX_VSIZE_7
    2006     m_pcBestAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);
    2007     m_pcBestAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);
    2008 #else
    2009     m_pcBestAlfParam->num_coeff = (Int)tap*tap/4 + 2;
    2010 #endif
    2011 
    2012     xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcBestAlfParam, m_pcBestAlfParam->tap, LumaStride);
    2013 
    2014 
    2015 
    2016     xCalcRDCost(pcPicOrg, pcPicRest, m_pcBestAlfParam, uiMinMethodRate, uiDist, dMinMethodCost);
    2017     iMinMethodDist = (Int64)uiDist;
     4904  dMinMethodCost += adExtraCostReduction[m_uiVarGenMethod];
     4905  m_varImg= m_varImgMethods[m_uiVarGenMethod];
     4906
     4907  pcAlfParam = &(cFrmAlfParam[m_uiVarGenMethod]);
     4908
     4909  ALFParam  cAlfParamWithBestMethod;
     4910  allocALFParam(&cAlfParamWithBestMethod); 
     4911
     4912
     4913  if(m_iALFEncodePassReduction ==0)
     4914  {
     4915    copyALFParam(&cAlfParamWithBestMethod, pcAlfParam);
    20184916  }
    20194917  else
    20204918  {
    2021 #endif
    2022     copyALFParam(m_pcBestAlfParam, &cFrmAlfParam[iBestClassMethod]);
    2023 #if MQT_ALF_NPASS
    2024 
    2025   }
    2026 #endif
    2027 
    2028   ruiMinRate = uiMinMethodRate;
    2029   ruiMinDist = (UInt64)iMinMethodDist;
    2030   rdMinCost =  dMinMethodCost;
    2031 
    2032 
     4919    cAlfParamWithBestMethod.alf_flag = 1;
     4920    cAlfParamWithBestMethod.chroma_idc = 0;
     4921    cAlfParamWithBestMethod.alf_pcr_region_flag = m_uiVarGenMethod;
     4922    cAlfParamWithBestMethod.filter_shape= pcAlfParam->filter_shape;
     4923    cAlfParamWithBestMethod.num_coeff = m_sqrFiltLengthTab[cAlfParamWithBestMethod.filter_shape];
     4924    decodeFilterSet(pcAlfParam, m_varIndTab, m_filterCoeffSym);
     4925    if(!m_bUseNonCrossALF)
     4926    {
     4927      filterLuma(pRest, pDec, LumaStride, 0, m_img_height-1, 0, m_img_width-1,  pcAlfParam->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg);
     4928    }
     4929    else
     4930    {
     4931      xfilterSlicesEncoder(pDec, pRest, LumaStride, pcAlfParam->filter_shape, m_filterCoeffSym, m_varIndTab, m_varImg);
     4932    }
     4933    xcodeFiltCoeff(m_filterCoeffSym, pcAlfParam->filter_shape, m_varIndTab, pcAlfParam->filters_per_group,&cAlfParamWithBestMethod);
     4934
     4935    xCalcRDCost(pcPicOrg, pcPicRest, &cAlfParamWithBestMethod, uiMinMethodRate, uiMinMethodDist, dMinMethodCost);
     4936
     4937  }
     4938
     4939  if(dMinMethodCost < rdMinCost )
     4940  {
     4941    ruiMinRate = uiMinMethodRate;
     4942    ruiMinDist = uiMinMethodDist;
     4943    rdMinCost =  dMinMethodCost;
     4944    copyALFParam(m_pcBestAlfParam, &cAlfParamWithBestMethod);
     4945  }
     4946
     4947  freeALFParam(&cAlfParamWithBestMethod);
    20334948  for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
    20344949  {
    20354950    freeALFParam(&cFrmAlfParam[i]);
    20364951  }
    2037 
    2038 #else 
    2039 
    2040 #if MQT_ALF_NPASS
    2041   setInitialMask(pcPicOrg, pcPicDec);
    2042 #else
    2043   for (Int i=0; i<Height; i++)
    2044   {
    2045     for (Int j=0; j<Width; j++)
    2046     {
    2047       m_maskImg[i][j] = 1;
    2048     }
    2049   }
    2050 #if MTK_NONCROSS_INLOOP_FILTER
     4952}
     4953
     4954
     4955
     4956Void   TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(Pel* imgOrg, Pel* imgDec, Pel* imgRest, ALFParam* ALFp, Int filtNo, Int stride)
     4957{
    20514958  if(!m_bUseNonCrossALF)
    2052     calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
     4959  {
     4960    xstoreInBlockMatrix(0, 0, m_img_height, m_img_width, true, true, imgOrg, imgDec, filtNo, stride);
     4961  }
    20534962  else
    2054     calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
    2055 #else
    2056   calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
    2057 #endif
    2058 #endif
    2059 
    2060 #if MQT_ALF_NPASS
    2061   if(m_iALFEncodePassReduction)
    2062   {
    2063     xFirstFilteringFrameLumaAllTap(pOrg, pDec, pRest, LumaStride);
    2064   }
    2065   else
    2066 #endif
    2067     xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, m_pcTempAlfParam->tap, LumaStride);
    2068 
    2069   xCalcRDCost(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost); // change this function final coding
    2070  
    2071   if( dCost < rdMinCost)
    2072   {
    2073     ruiMinRate = uiRate;
    2074     ruiMinDist = uiDist;
    2075     rdMinCost = dCost;
    2076     copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
    2077   }
    2078 
    2079 #endif
    2080 }
    2081 
    2082 Void   TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, ALFParam* ALFp, Int tap, Int Stride)
    2083 {
    2084 #if MTK_NONCROSS_INLOOP_FILTER
    2085   if(!m_bUseNonCrossALF)
    2086     xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, tap, Stride);
    2087   else
    2088     xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, tap, Stride);
    2089 #else
    2090   xstoreInBlockMatrix(ImgOrg, ImgDec, tap, Stride);
    2091 #endif
    2092 
    2093 
    2094   xFilteringFrameLuma_qc(ImgOrg, ImgDec, ImgRest, ALFp, tap, Stride);
    2095 }
    2096 
    2097 
    2098 #if MTK_NONCROSS_INLOOP_FILTER
    2099 Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride)
    2100 #else
    2101 Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrix(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride)
    2102 #endif
    2103 {
    2104 #if MQT_BA_RA
    2105   Int var_step_size_w = VAR_SIZE_W;
    2106   Int var_step_size_h = VAR_SIZE_H;
    2107 #endif
    2108 
    2109   Int i,j,k,l,varInd,ii,jj;
    2110   Int x, y;
    2111   Int fl =tap/2;
    2112 #if TI_ALF_MAX_VSIZE_7
    2113   Int flV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl);
    2114   Int sqrFiltLength = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(tap);
    2115 #else
    2116   Int sqrFiltLength=(((tap*tap)/4 + 1) + 1);
    2117 #endif
    2118   Int fl2=9/2; //extended size at each side of the frame
     4963  {
     4964    xstoreInBlockMatrixforSlices(imgOrg, imgDec, filtNo, stride);
     4965  }
     4966
     4967
     4968  xFilteringFrameLuma(imgOrg, imgDec, imgRest, ALFp, filtNo, stride);
     4969}
     4970
     4971Void   TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, Pel* pImgOrg, Pel* pImgPad, Int filtNo, Int stride)
     4972{
     4973
     4974  Pel  regionOfInterested = (m_iDesignCurrentFilter ==1)?(1):(0);
     4975  Int     sqrFiltLength      = (filtNo == 2)?((Int)(MAX_SQR_FILT_LENGTH)):(m_sqrFiltLengthTab[filtNo]);
     4976  Int     yposEnd            = ypos + iheight -1;
     4977  Int     xposEnd            = xpos + iwidth  -1;
     4978  Double ***EShape           = m_EGlobalSym[filtNo];
     4979  Double **yShape            = m_yGlobalSym[filtNo];
     4980
    21194981  Int ELocal[MAX_SQR_FILT_LENGTH];
    2120   Int yLocal;
    2121   Int *p_pattern;
    2122   Int filtNo =2;
     4982  Pel *pImgPad1, *pImgPad2, *pImgPad3, *pImgPad4;
     4983  Int i,j,k,l,varInd, yLocal;
    21234984  double **E,*yy;
    2124 #if MTK_NONCROSS_INLOOP_FILTER
    2125   static Int count_valid;
    2126 #else
    2127   Int count_valid=0;
    2128 #endif
    2129   if (tap==9)
    2130     filtNo =0;
    2131   else if (tap==7)
    2132     filtNo =1;
    2133  
    2134   p_pattern= m_patternTab[filtNo];
    2135  
    2136 #if MTK_NONCROSS_INLOOP_FILTER
     4985
     4986  static Int numValidPels;
    21374987  if(bResetBlockMatrix)
    21384988  {
    2139     count_valid = 0;
    2140 #endif
    2141   memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS);
    2142   for (varInd=0; varInd<NO_VAR_BINS; varInd++)
    2143   {
    2144     memset(m_yGlobalSym[filtNo][varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
    2145     for (k=0; k<sqrFiltLength; k++)
    2146     {
    2147       memset(m_EGlobalSym[filtNo][varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
    2148     }
    2149   }
    2150   for (i = fl2; i < m_im_height+fl2; i++)
    2151   {
    2152     for (j = fl2; j < m_im_width+fl2; j++)
    2153     {
    2154       if (m_maskImg[i-fl2][j-fl2] == 1)
    2155       {
    2156         count_valid++;
    2157       }
    2158     }
    2159   }
    2160 #if MTK_NONCROSS_INLOOP_FILTER
    2161   }
    2162 #endif
    2163 
    2164   {
    2165 #if MTK_NONCROSS_INLOOP_FILTER
    2166     x = y = fl2; //cytsai: shall x, y  be removed ?
    2167 
    2168     for (i= ypos; i< ypos + iheight; i++)
    2169     {
    2170       for (j= xpos; j< xpos + iwidth; j++)
    2171       {
    2172 #else
    2173     for (i=0,y=fl2; i<m_im_height; i++,y++)
    2174     {
    2175       for (j=0,x=fl2; j<m_im_width; j++,x++)
    2176       {
    2177 #endif
    2178 #if MQT_ALF_NPASS
    2179         Int condition = (m_maskImg[i][j] == 1);
    2180         if (m_iDesignCurrentFilter)
    2181         {
    2182           condition = (m_maskImg[i][j] == 0 && count_valid > 0);
     4989    numValidPels = 0;
     4990    memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS);
     4991    for (varInd=0; varInd<NO_VAR_BINS; varInd++)
     4992    {
     4993      memset(yShape[varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
     4994      for (k=0; k<sqrFiltLength; k++)
     4995      {
     4996        memset(EShape[varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
     4997      }
     4998    }
     4999    for (i = 0; i < m_img_height; i++)
     5000    {
     5001      for (j = 0; j < m_img_width; j++)
     5002      {
     5003        if (m_maskImg[i][j] == regionOfInterested)
     5004        {
     5005          numValidPels++;
    21835006        }
    2184         if(!condition)
    2185         {
    2186 #else
    2187         if (m_maskImg[i][j] == 0 && count_valid > 0)
    2188         {
    2189 
     5007      }
     5008    }
     5009  }
     5010
     5011  Int yLineInLCU;
     5012  Int paddingLine ;
     5013
     5014  pImgPad += (ypos* stride);
     5015  pImgOrg += (ypos* stride);
     5016
     5017  switch(filtNo)
     5018  {
     5019#if !ALF_SINGLE_FILTER_SHAPE
     5020  case ALF_STAR5x5:
     5021    {
     5022      for (i= ypos; i<= yposEnd; i++)
     5023      {
     5024        yLineInLCU = i % m_lcuHeight;
     5025
     5026        if (yLineInLCU < m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height )
     5027        {
     5028          pImgPad1 = pImgPad +   stride;
     5029          pImgPad2 = pImgPad -   stride;
     5030          pImgPad3 = pImgPad + 2*stride;
     5031          pImgPad4 = pImgPad - 2*stride;
     5032        }
     5033        else if (yLineInLCU < m_lineIdxPadTop)
     5034        {
     5035          paddingLine = - yLineInLCU + m_lineIdxPadTop - 1;
     5036          pImgPad1 = pImgPad + min(paddingLine, 1)*stride;
     5037          pImgPad2 = pImgPad -   stride;
     5038          pImgPad3 = pImgPad + min(paddingLine, 2)*stride;
     5039          pImgPad4 = pImgPad - 2*stride;
    21905040        }
    21915041        else
    21925042        {
    2193 #endif
    2194 #if MQT_BA_RA
    2195           varInd = m_varImg[i/var_step_size_h][j/var_step_size_w];
    2196 #else
    2197           varInd=min(m_varImg[i][j], NO_VAR_BINS-1);
    2198 #endif
    2199           k=0;
    2200           memset(ELocal, 0, sqrFiltLength*sizeof(int));
    2201 #if TI_ALF_MAX_VSIZE_7
    2202           for (ii = -flV; ii < 0; ii++)
    2203 #else
    2204           for (ii=-fl; ii<0; ii++)
    2205 #endif
     5043          paddingLine = yLineInLCU - m_lineIdxPadTop;
     5044          pImgPad1 = pImgPad +   stride;
     5045          pImgPad2 = pImgPad - min(paddingLine, 1)*stride;
     5046          pImgPad3 = pImgPad + 2*stride;
     5047          pImgPad4 = pImgPad - min(paddingLine, 2)*stride;
     5048        }
     5049
     5050        if ( (yLineInLCU == m_lineIdxPadTop || yLineInLCU == m_lineIdxPadTop-1) && i-yLineInLCU+m_lcuHeight < m_img_height )
     5051        {
     5052          pImgPad+= stride;
     5053          pImgOrg+= stride;
     5054          continue;
     5055        }
     5056        else
     5057        {
     5058        for (j= xpos; j<= xposEnd; j++)
     5059        {
     5060          if ( (m_maskImg[i][j] == regionOfInterested) || (numValidPels == 0) )
    22065061          {
    2207             for (jj=-fl-ii; jj<=fl+ii; jj++)
    2208             { 
    2209               ELocal[p_pattern[k++]]+=(ImgDec[(i+ii)*Stride + (j+jj)]+ImgDec[(i-ii)*Stride + (j-jj)]);
     5062            varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W];
     5063            memset(ELocal, 0, 10*sizeof(Int));
     5064
     5065            ELocal[0] = (pImgPad3[j+2] + pImgPad4[j-2]);
     5066            ELocal[1] = (pImgPad3[j  ] + pImgPad4[j  ]);
     5067            ELocal[2] = (pImgPad3[j-2] + pImgPad4[j+2]);
     5068
     5069            ELocal[3] = (pImgPad1[j+1] + pImgPad2[j-1]);
     5070            ELocal[4] = (pImgPad1[j  ] + pImgPad2[j  ]);
     5071            ELocal[5] = (pImgPad1[j-1] + pImgPad2[j+1]);
     5072
     5073            ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     5074            ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     5075            ELocal[8] = (pImgPad[j  ]);
     5076
     5077            yLocal= pImgOrg[j];
     5078            m_pixAcc[varInd]+=(yLocal*yLocal);
     5079            E= EShape[varInd]; 
     5080            yy= yShape[varInd];
     5081
     5082            for (k=0; k<10; k++)
     5083            {
     5084              for (l=k; l<10; l++)
     5085              {
     5086                E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     5087              }
     5088              yy[k]+=(double)(ELocal[k]*yLocal);
    22105089            }
    2211           }
    2212           for (jj=-fl; jj<0; jj++)
    2213             ELocal[p_pattern[k++]]+=(ImgDec[(i)*Stride + (j+jj)]+ImgDec[(i)*Stride + (j-jj)]);
    2214           ELocal[p_pattern[k++]]+=ImgDec[(i)*Stride + (j)];
    2215           ELocal[sqrFiltLength-1]=1;
    2216           yLocal=ImgOrg[(i)*Stride + (j)];
    2217 
    2218           m_pixAcc[varInd]+=(yLocal*yLocal);
    2219           E= m_EGlobalSym[filtNo][varInd];
    2220           yy= m_yGlobalSym[filtNo][varInd];
    2221 
    2222           for (k=0; k<sqrFiltLength; k++)
    2223           {
    2224             for (l=k; l<sqrFiltLength; l++)
    2225               E[k][l]+=(double)(ELocal[k]*ELocal[l]);
    2226             yy[k]+=(double)(ELocal[k]*yLocal);
     5090
    22275091          }
    22285092        }
    2229       }
    2230     }
    2231   }
    2232 
    2233 #if MTK_NONCROSS_INLOOP_FILTER
     5093        pImgPad+= stride;
     5094        pImgOrg+= stride;
     5095        }
     5096      }
     5097    }
     5098    break;
     5099  case ALF_CROSS9x9:
     5100    {
     5101      Pel *pImgPad5, *pImgPad6, *pImgPad7, *pImgPad8;
     5102#else
     5103  case ALF_CROSS9x7_SQUARE3x3:
     5104    {
     5105      Pel *pImgPad5, *pImgPad6;
     5106#endif
     5107      for (i= ypos; i<= yposEnd; i++)
     5108      {
     5109        yLineInLCU = i % m_lcuHeight;
     5110
     5111        if (yLineInLCU<m_lineIdxPadBot || i-yLineInLCU+m_lcuHeight >= m_img_height)
     5112        {
     5113          pImgPad1 = pImgPad +   stride;
     5114          pImgPad2 = pImgPad -   stride;
     5115          pImgPad3 = pImgPad + 2*stride;
     5116          pImgPad4 = pImgPad - 2*stride;
     5117          pImgPad5 = pImgPad + 3*stride;
     5118          pImgPad6 = pImgPad - 3*stride;
     5119#if !ALF_SINGLE_FILTER_SHAPE
     5120          pImgPad7 = pImgPad + 4*stride;
     5121          pImgPad8 = pImgPad - 4*stride;
     5122#endif
     5123        }
     5124        else if (yLineInLCU<m_lineIdxPadTop)
     5125        {
     5126          paddingLine = - yLineInLCU + m_lineIdxPadTop - 1;
     5127          pImgPad1 = (paddingLine < 1) ? pImgPad : pImgPad + min(paddingLine, 1)*stride;
     5128          pImgPad2 = (paddingLine < 1) ? pImgPad : pImgPad -   stride;
     5129          pImgPad3 = (paddingLine < 2) ? pImgPad : pImgPad + min(paddingLine, 2)*stride;
     5130          pImgPad4 = (paddingLine < 2) ? pImgPad : pImgPad - 2*stride;
     5131          pImgPad5 = (paddingLine < 3) ? pImgPad : pImgPad + min(paddingLine, 3)*stride;
     5132          pImgPad6 = (paddingLine < 3) ? pImgPad : pImgPad - 3*stride;
     5133#if !ALF_SINGLE_FILTER_SHAPE
     5134          pImgPad7 = (paddingLine < 4) ? pImgPad : pImgPad + min(paddingLine, 4)*stride;
     5135          pImgPad8 = (paddingLine < 4) ? pImgPad : pImgPad - 4*stride;
     5136#endif
     5137        }
     5138        else
     5139        {
     5140          paddingLine = yLineInLCU - m_lineIdxPadTop;
     5141          pImgPad1 = (paddingLine < 1) ? pImgPad : pImgPad +   stride;
     5142          pImgPad2 = (paddingLine < 1) ? pImgPad : pImgPad - min(paddingLine, 1)*stride;
     5143          pImgPad3 = (paddingLine < 2) ? pImgPad : pImgPad + 2*stride;
     5144          pImgPad4 = (paddingLine < 2) ? pImgPad : pImgPad - min(paddingLine, 2)*stride;
     5145          pImgPad5 = (paddingLine < 3) ? pImgPad : pImgPad + 3*stride;
     5146          pImgPad6 = (paddingLine < 3) ? pImgPad : pImgPad - min(paddingLine, 3)*stride;
     5147#if !ALF_SINGLE_FILTER_SHAPE
     5148          pImgPad7 = (paddingLine < 4) ? pImgPad : pImgPad + 4*stride;
     5149          pImgPad8 = (paddingLine < 4) ? pImgPad : pImgPad - min(paddingLine, 4)*stride;
     5150#endif
     5151        }         
     5152
     5153        for (j= xpos; j<= xposEnd; j++)
     5154        {
     5155          if ( (m_maskImg[i][j] == regionOfInterested) || (numValidPels == 0) )
     5156          {
     5157            varInd = m_varImg[i/VAR_SIZE_H][j/VAR_SIZE_W];
     5158
     5159#if ALF_SINGLE_FILTER_SHAPE
     5160            memset(ELocal, 0, (sqrFiltLength+1)*sizeof(Int));
     5161
     5162            ELocal[0] = (pImgPad5[j]+pImgPad6[j]);
     5163            ELocal[1] = (pImgPad3[j]+pImgPad4[j]);
     5164            ELocal[2] = (pImgPad1[j-1]+pImgPad2[j+1]);
     5165            ELocal[3] = (pImgPad1[j]+pImgPad2[j]);
     5166            ELocal[4] = (pImgPad1[j+1]+pImgPad2[j-1]);
     5167            ELocal[5] = (pImgPad[j+4]+pImgPad[j-4]);
     5168            ELocal[6] = (pImgPad[j+3]+pImgPad[j-3]);
     5169            ELocal[7] = (pImgPad[j+2]+pImgPad[j-2]);
     5170            ELocal[8] = (pImgPad[j+1]+pImgPad[j-1]);
     5171            ELocal[9] = (pImgPad[j  ]);
     5172#else
     5173            memset(ELocal, 0, 10*sizeof(Int));
     5174
     5175            ELocal[0] = (pImgPad7[j] + pImgPad8[j]);
     5176
     5177            ELocal[1] = (pImgPad5[j] + pImgPad6[j]);
     5178
     5179            ELocal[2] = (pImgPad3[j] + pImgPad4[j]);
     5180
     5181            ELocal[3] = (pImgPad1[j] + pImgPad2[j]);
     5182
     5183            ELocal[4] = (pImgPad[j+4] + pImgPad[j-4]);
     5184            ELocal[5] = (pImgPad[j+3] + pImgPad[j-3]);
     5185            ELocal[6] = (pImgPad[j+2] + pImgPad[j-2]);
     5186            ELocal[7] = (pImgPad[j+1] + pImgPad[j-1]);
     5187            ELocal[8] = (pImgPad[j  ] );
     5188#endif
     5189            yLocal= pImgOrg[j];
     5190            m_pixAcc[varInd]+=(yLocal*yLocal);
     5191            E= EShape[varInd];
     5192            yy= yShape[varInd];
     5193
     5194#if ALF_SINGLE_FILTER_SHAPE
     5195            for (k=0; k<(sqrFiltLength+1); k++)
     5196            {
     5197              for (l=k; l<(sqrFiltLength+1); l++)
     5198              {
     5199                E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     5200              }
     5201              yy[k]+=(double)(ELocal[k]*yLocal);
     5202            }
     5203#else
     5204            for (k=0; k<10; k++)
     5205            {
     5206              for (l=k; l<10; l++)
     5207              {
     5208                E[k][l]+=(double)(ELocal[k]*ELocal[l]);
     5209              }
     5210              yy[k]+=(double)(ELocal[k]*yLocal);
     5211            }
     5212#endif
     5213          }
     5214        }
     5215        pImgPad+= stride;
     5216        pImgOrg+= stride;
     5217      }
     5218
     5219    }
     5220    break;
     5221  default:
     5222    {
     5223      printf("Not a supported filter shape\n");
     5224      assert(0);
     5225      exit(1);
     5226    }
     5227  }
     5228
    22345229  if(bSymmCopyBlockMatrix)
    22355230  {
    2236 #endif
    2237 
    2238   // Matrix EGlobalSeq is symmetric, only part of it is calculated
    2239   for (varInd=0; varInd<NO_VAR_BINS; varInd++)
    2240   {
    2241     double **pE = m_EGlobalSym[filtNo][varInd];
    2242     for (k=1; k<sqrFiltLength; k++)
    2243     {
    2244       for (l=0; l<k; l++)
    2245       {
    2246         pE[k][l]=pE[l][k];
    2247       }
    2248     }
    2249   }
    2250 #if MTK_NONCROSS_INLOOP_FILTER
    2251   }
    2252 #endif
    2253 
    2254 }
    2255 
    2256 Void   TEncAdaptiveLoopFilter::xFilteringFrameLuma_qc(imgpel* ImgOrg, imgpel* imgY_pad, imgpel* ImgFilt, ALFParam* ALFp, Int tap, Int Stride)
    2257 {
    2258   int  filtNo,filters_per_fr;
     5231    for (varInd=0; varInd<NO_VAR_BINS; varInd++)
     5232    {
     5233      double **pE = EShape[varInd];
     5234      for (k=1; k<sqrFiltLength; k++)
     5235      {
     5236        for (l=0; l<k; l++)
     5237        {
     5238          pE[k][l]=pE[l][k];
     5239        }
     5240      }
     5241    }
     5242  }
     5243}
     5244
     5245
     5246Void   TEncAdaptiveLoopFilter::xFilteringFrameLuma(Pel* imgOrg, Pel* imgPad, Pel* imgFilt, ALFParam* ALFp, Int filtNo, Int stride)
     5247{
    22595248  static double **ySym, ***ESym;
    2260   int lambda_val = (Int) m_dLambdaLuma;
    2261   lambda_val = lambda_val * (1<<(2*g_uiBitIncrement));
    2262   if (tap==9)
    2263     filtNo =0;
    2264   else if (tap==7)
    2265     filtNo =1;
    2266   else
    2267     filtNo=2;
    2268  
     5249  Int  filters_per_fr;
     5250  Int lambdaVal = (Int) m_dLambdaLuma;
     5251  lambdaVal = lambdaVal * (1<<(2*g_uiBitIncrement));
     5252
    22695253  ESym=m_EGlobalSym[filtNo]; 
    22705254  ySym=m_yGlobalSym[filtNo];
    2271  
    2272   xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,
    2273                          m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
    2274  
    2275   // g_filterCoeffPrevSelected = g_filterCoeffSym