HEVC Test Model (HM)  HM-16.18
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TEncSearch.cpp
Go to the documentation of this file.
1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2017, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  * this list of conditions and the following disclaimer in the documentation
16  * and/or other materials provided with the distribution.
17  * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  * be used to endorse or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
38 #include "TLibCommon/CommonDef.h"
39 #include "TLibCommon/TComRom.h"
41 #include "TEncSearch.h"
42 #include "TLibCommon/TComTU.h"
43 #include "TLibCommon/Debug.h"
44 #include <math.h>
45 #include <limits>
46 
47 
50 
51 static const TComMv s_acMvRefineH[9] =
52 {
53  TComMv( 0, 0 ), // 0
54  TComMv( 0, -1 ), // 1
55  TComMv( 0, 1 ), // 2
56  TComMv( -1, 0 ), // 3
57  TComMv( 1, 0 ), // 4
58  TComMv( -1, -1 ), // 5
59  TComMv( 1, -1 ), // 6
60  TComMv( -1, 1 ), // 7
61  TComMv( 1, 1 ) // 8
62 };
63 
64 static const TComMv s_acMvRefineQ[9] =
65 {
66  TComMv( 0, 0 ), // 0
67  TComMv( 0, -1 ), // 1
68  TComMv( 0, 1 ), // 2
69  TComMv( -1, -1 ), // 5
70  TComMv( 1, -1 ), // 6
71  TComMv( -1, 0 ), // 3
72  TComMv( 1, 0 ), // 4
73  TComMv( -1, 1 ), // 7
74  TComMv( 1, 1 ) // 8
75 };
76 
77 static Void offsetSubTUCBFs(TComTU &rTu, const ComponentID compID)
78 {
79  TComDataCU *pcCU = rTu.getCU();
80  const UInt uiTrDepth = rTu.GetTransformDepthRel();
81  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU(compID);
82  const UInt partIdxesPerSubTU = rTu.GetAbsPartIdxNumParts(compID) >> 1;
83 
84  //move the CBFs down a level and set the parent CBF
85 
86  UChar subTUCBF[2];
87  UChar combinedSubTUCBF = 0;
88 
89  for (UInt subTU = 0; subTU < 2; subTU++)
90  {
91  const UInt subTUAbsPartIdx = uiAbsPartIdx + (subTU * partIdxesPerSubTU);
92 
93  subTUCBF[subTU] = pcCU->getCbf(subTUAbsPartIdx, compID, uiTrDepth);
94  combinedSubTUCBF |= subTUCBF[subTU];
95  }
96 
97  for (UInt subTU = 0; subTU < 2; subTU++)
98  {
99  const UInt subTUAbsPartIdx = uiAbsPartIdx + (subTU * partIdxesPerSubTU);
100  const UChar compositeCBF = (subTUCBF[subTU] << 1) | combinedSubTUCBF;
101 
102  pcCU->setCbfPartRange((compositeCBF << uiTrDepth), compID, subTUAbsPartIdx, partIdxesPerSubTU);
103  }
104 }
105 
106 
108 : m_puhQTTempTrIdx(NULL)
109 , m_pcQTTempTComYuv(NULL)
110 , m_pcEncCfg (NULL)
111 , m_pcTrQuant (NULL)
112 , m_pcRdCost (NULL)
113 , m_pcEntropyCoder (NULL)
114 , m_iSearchRange (0)
115 , m_bipredSearchRange (0)
116 , m_motionEstimationSearchMethod (MESEARCH_FULL)
117 , m_pppcRDSbacCoder (NULL)
118 , m_pcRDGoOnSbacCoder (NULL)
119 , m_pTempPel (NULL)
120 , m_isInitialized (false)
121 {
122  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
123  {
124  m_ppcQTTempCoeff[ch] = NULL;
125 #if ADAPTIVE_QP_SELECTION
127 #endif
128  m_puhQTTempCbf[ch] = NULL;
131  m_pcQTTempTUCoeff[ch] = NULL;
132 #if ADAPTIVE_QP_SELECTION
134 #endif
136  }
137 
138  for (Int i=0; i<MAX_NUM_REF_LIST_ADAPT_SR; i++)
139  {
140  memset (m_aaiAdaptSR[i], 0, MAX_IDX_ADAPT_SR * sizeof (Int));
141  }
142  for (Int i=0; i<AMVP_MAX_NUM_CANDS+1; i++)
143  {
144  memset (m_auiMVPIdxCost[i], 0, (AMVP_MAX_NUM_CANDS+1) * sizeof (UInt) );
145  }
146 
148 }
149 
150 
152 {
153  assert (m_isInitialized);
154  if ( m_pTempPel )
155  {
156  delete [] m_pTempPel;
157  m_pTempPel = NULL;
158  }
159 
160  if ( m_pcEncCfg )
161  {
162  const UInt uiNumLayersAllocated = m_pcEncCfg->getQuadtreeTULog2MaxSize()-m_pcEncCfg->getQuadtreeTULog2MinSize()+1;
163 
164  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
165  {
166  for (UInt layer = 0; layer < uiNumLayersAllocated; layer++)
167  {
168  delete[] m_ppcQTTempCoeff[ch][layer];
169 #if ADAPTIVE_QP_SELECTION
170  delete[] m_ppcQTTempArlCoeff[ch][layer];
171 #endif
172  }
173  delete[] m_ppcQTTempCoeff[ch];
174  delete[] m_puhQTTempCbf[ch];
175 #if ADAPTIVE_QP_SELECTION
176  delete[] m_ppcQTTempArlCoeff[ch];
177 #endif
178  }
179 
180  for( UInt layer = 0; layer < uiNumLayersAllocated; layer++ )
181  {
182  m_pcQTTempTComYuv[layer].destroy();
183  }
184  }
185 
186  delete[] m_puhQTTempTrIdx;
187  delete[] m_pcQTTempTComYuv;
188 
189  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
190  {
191  delete[] m_pSharedPredTransformSkip[ch];
192  delete[] m_pcQTTempTUCoeff[ch];
193 #if ADAPTIVE_QP_SELECTION
194  delete[] m_ppcQTTempTUArlCoeff[ch];
195 #endif
197  delete[] m_puhQTTempTransformSkipFlag[ch];
198  }
200 
202  m_isInitialized = false;
203 }
204 
206 {
207  if (m_isInitialized)
208  {
209  destroy();
210  }
211 }
212 
213 
214 
215 
217  TComTrQuant* pcTrQuant,
218  Int iSearchRange,
219  Int bipredSearchRange,
220  MESearchMethod motionEstimationSearchMethod,
221  const UInt maxCUWidth,
222  const UInt maxCUHeight,
223  const UInt maxTotalCUDepth,
224  TEncEntropy* pcEntropyCoder,
225  TComRdCost* pcRdCost,
226  TEncSbac*** pppcRDSbacCoder,
227  TEncSbac* pcRDGoOnSbacCoder
228  )
229 {
230  assert (!m_isInitialized);
231  m_pcEncCfg = pcEncCfg;
232  m_pcTrQuant = pcTrQuant;
233  m_iSearchRange = iSearchRange;
234  m_bipredSearchRange = bipredSearchRange;
235  m_motionEstimationSearchMethod = motionEstimationSearchMethod;
236  m_pcEntropyCoder = pcEntropyCoder;
237  m_pcRdCost = pcRdCost;
238 
239  m_pppcRDSbacCoder = pppcRDSbacCoder;
240  m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder;
241 
242  for (UInt iDir = 0; iDir < MAX_NUM_REF_LIST_ADAPT_SR; iDir++)
243  {
244  for (UInt iRefIdx = 0; iRefIdx < MAX_IDX_ADAPT_SR; iRefIdx++)
245  {
246  m_aaiAdaptSR[iDir][iRefIdx] = iSearchRange;
247  }
248  }
249 
250  // initialize motion cost
251  for( Int iNum = 0; iNum < AMVP_MAX_NUM_CANDS+1; iNum++)
252  {
253  for( Int iIdx = 0; iIdx < AMVP_MAX_NUM_CANDS; iIdx++)
254  {
255  if (iIdx < iNum)
256  {
257  m_auiMVPIdxCost[iIdx][iNum] = xGetMvpIdxBits(iIdx, iNum);
258  }
259  else
260  {
261  m_auiMVPIdxCost[iIdx][iNum] = MAX_INT;
262  }
263  }
264  }
265 
266  const ChromaFormat cform=pcEncCfg->getChromaFormatIdc();
267  initTempBuff(cform);
268 
269  m_pTempPel = new Pel[maxCUWidth*maxCUHeight];
270 
271  const UInt uiNumLayersToAllocate = pcEncCfg->getQuadtreeTULog2MaxSize()-pcEncCfg->getQuadtreeTULog2MinSize()+1;
272  const UInt uiNumPartitions = 1<<(maxTotalCUDepth<<1);
273  for (UInt ch=0; ch<MAX_NUM_COMPONENT; ch++)
274  {
275  const UInt csx=::getComponentScaleX(ComponentID(ch), cform);
276  const UInt csy=::getComponentScaleY(ComponentID(ch), cform);
277  m_ppcQTTempCoeff[ch] = new TCoeff* [uiNumLayersToAllocate];
278 #if ADAPTIVE_QP_SELECTION
279  m_ppcQTTempArlCoeff[ch] = new TCoeff*[uiNumLayersToAllocate];
280 #endif
281  m_puhQTTempCbf[ch] = new UChar [uiNumPartitions];
282 
283  for (UInt layer = 0; layer < uiNumLayersToAllocate; layer++)
284  {
285  m_ppcQTTempCoeff[ch][layer] = new TCoeff[(maxCUWidth*maxCUHeight)>>(csx+csy)];
286 #if ADAPTIVE_QP_SELECTION
287  m_ppcQTTempArlCoeff[ch][layer] = new TCoeff[(maxCUWidth*maxCUHeight)>>(csx+csy) ];
288 #endif
289  }
290 
291  m_phQTTempCrossComponentPredictionAlpha[ch] = new SChar [uiNumPartitions];
294 #if ADAPTIVE_QP_SELECTION
296 #endif
297  m_puhQTTempTransformSkipFlag[ch] = new UChar [uiNumPartitions];
298  }
299  m_puhQTTempTrIdx = new UChar [uiNumPartitions];
300  m_pcQTTempTComYuv = new TComYuv[uiNumLayersToAllocate];
301  for( UInt ui = 0; ui < uiNumLayersToAllocate; ++ui )
302  {
303  m_pcQTTempTComYuv[ui].create( maxCUWidth, maxCUHeight, pcEncCfg->getChromaFormatIdc() );
304  }
305  m_pcQTTempTransformSkipTComYuv.create( maxCUWidth, maxCUHeight, pcEncCfg->getChromaFormatIdc() );
307  m_isInitialized = true;
308 }
309 
310 
311 __inline Void TEncSearch::xTZSearchHelp( const TComPattern* const pcPatternKey, IntTZSearchStruct& rcStruct, const Int iSearchX, const Int iSearchY, const UChar ucPointNr, const UInt uiDistance )
312 {
313  Distortion uiSad = 0;
314 
315  const Pel* const piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iYStride + iSearchX;
316 
317  //-- jclee for using the SAD function pointer
318  m_pcRdCost->setDistParam( pcPatternKey, piRefSrch, rcStruct.iYStride, m_cDistParam );
319 
321 
322  // distortion
323  m_cDistParam.bitDepth = pcPatternKey->getBitDepthY();
325 
327  {
328  Int isubShift = 0;
329  // motion cost
330  Distortion uiBitCost = m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY );
331 
332  // Skip search if bit cost is already larger than best SAD
333  if (uiBitCost < rcStruct.uiBestSad)
334  {
335  if ( m_cDistParam.iRows > 32 )
336  {
338  }
339  else if ( m_cDistParam.iRows > 16 )
340  {
342  }
343  else if ( m_cDistParam.iRows > 8 )
344  {
346  }
347  else
348  {
350  }
351 
353  if((uiTempSad + uiBitCost) < rcStruct.uiBestSad)
354  {
355  uiSad += uiTempSad >> m_cDistParam.iSubShift;
356  while(m_cDistParam.iSubShift > 0)
357  {
358  isubShift = m_cDistParam.iSubShift -1;
359  m_cDistParam.pOrg = pcPatternKey->getROIY() + (pcPatternKey->getPatternLStride() << isubShift);
360  m_cDistParam.pCur = piRefSrch + (rcStruct.iYStride << isubShift);
361  uiTempSad = m_cDistParam.DistFunc( &m_cDistParam );
362  uiSad += uiTempSad >> m_cDistParam.iSubShift;
363  if(((uiSad << isubShift) + uiBitCost) > rcStruct.uiBestSad)
364  {
365  break;
366  }
367 
369  }
370 
371  if(m_cDistParam.iSubShift == 0)
372  {
373  uiSad += uiBitCost;
374  if( uiSad < rcStruct.uiBestSad )
375  {
376  rcStruct.uiBestSad = uiSad;
377  rcStruct.iBestX = iSearchX;
378  rcStruct.iBestY = iSearchY;
379  rcStruct.uiBestDistance = uiDistance;
380  rcStruct.uiBestRound = 0;
381  rcStruct.ucPointNr = ucPointNr;
383  }
384  }
385  }
386  }
387  }
388  else
389  {
390  // fast encoder decision: use subsampled SAD when rows > 8 for integer ME
392  {
393  if ( m_cDistParam.iRows > 8 )
394  {
396  }
397  }
398 
399  uiSad = m_cDistParam.DistFunc( &m_cDistParam );
400 
401  // only add motion cost if uiSad is smaller than best. Otherwise pointless
402  // to add motion cost.
403  if( uiSad < rcStruct.uiBestSad )
404  {
405  // motion cost
406  uiSad += m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY );
407 
408  if( uiSad < rcStruct.uiBestSad )
409  {
410  rcStruct.uiBestSad = uiSad;
411  rcStruct.iBestX = iSearchX;
412  rcStruct.iBestY = iSearchY;
413  rcStruct.uiBestDistance = uiDistance;
414  rcStruct.uiBestRound = 0;
415  rcStruct.ucPointNr = ucPointNr;
417  }
418  }
419  }
420 }
421 
422 __inline Void TEncSearch::xTZ2PointSearch( const TComPattern* const pcPatternKey, IntTZSearchStruct& rcStruct, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB )
423 {
424  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
425  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
426  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
427  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
428 
429  // 2 point search, // 1 2 3
430  // check only the 2 untested points // 4 0 5
431  // around the start point // 6 7 8
432  Int iStartX = rcStruct.iBestX;
433  Int iStartY = rcStruct.iBestY;
434  switch( rcStruct.ucPointNr )
435  {
436  case 1:
437  {
438  if ( (iStartX - 1) >= iSrchRngHorLeft )
439  {
440  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY, 0, 2 );
441  }
442  if ( (iStartY - 1) >= iSrchRngVerTop )
443  {
444  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY - 1, 0, 2 );
445  }
446  }
447  break;
448  case 2:
449  {
450  if ( (iStartY - 1) >= iSrchRngVerTop )
451  {
452  if ( (iStartX - 1) >= iSrchRngHorLeft )
453  {
454  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY - 1, 0, 2 );
455  }
456  if ( (iStartX + 1) <= iSrchRngHorRight )
457  {
458  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY - 1, 0, 2 );
459  }
460  }
461  }
462  break;
463  case 3:
464  {
465  if ( (iStartY - 1) >= iSrchRngVerTop )
466  {
467  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY - 1, 0, 2 );
468  }
469  if ( (iStartX + 1) <= iSrchRngHorRight )
470  {
471  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY, 0, 2 );
472  }
473  }
474  break;
475  case 4:
476  {
477  if ( (iStartX - 1) >= iSrchRngHorLeft )
478  {
479  if ( (iStartY + 1) <= iSrchRngVerBottom )
480  {
481  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY + 1, 0, 2 );
482  }
483  if ( (iStartY - 1) >= iSrchRngVerTop )
484  {
485  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY - 1, 0, 2 );
486  }
487  }
488  }
489  break;
490  case 5:
491  {
492  if ( (iStartX + 1) <= iSrchRngHorRight )
493  {
494  if ( (iStartY - 1) >= iSrchRngVerTop )
495  {
496  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY - 1, 0, 2 );
497  }
498  if ( (iStartY + 1) <= iSrchRngVerBottom )
499  {
500  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY + 1, 0, 2 );
501  }
502  }
503  }
504  break;
505  case 6:
506  {
507  if ( (iStartX - 1) >= iSrchRngHorLeft )
508  {
509  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY , 0, 2 );
510  }
511  if ( (iStartY + 1) <= iSrchRngVerBottom )
512  {
513  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY + 1, 0, 2 );
514  }
515  }
516  break;
517  case 7:
518  {
519  if ( (iStartY + 1) <= iSrchRngVerBottom )
520  {
521  if ( (iStartX - 1) >= iSrchRngHorLeft )
522  {
523  xTZSearchHelp( pcPatternKey, rcStruct, iStartX - 1, iStartY + 1, 0, 2 );
524  }
525  if ( (iStartX + 1) <= iSrchRngHorRight )
526  {
527  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY + 1, 0, 2 );
528  }
529  }
530  }
531  break;
532  case 8:
533  {
534  if ( (iStartX + 1) <= iSrchRngHorRight )
535  {
536  xTZSearchHelp( pcPatternKey, rcStruct, iStartX + 1, iStartY, 0, 2 );
537  }
538  if ( (iStartY + 1) <= iSrchRngVerBottom )
539  {
540  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iStartY + 1, 0, 2 );
541  }
542  }
543  break;
544  default:
545  {
546  assert( false );
547  }
548  break;
549  } // switch( rcStruct.ucPointNr )
550 }
551 
552 
553 
554 
555 __inline Void TEncSearch::xTZ8PointSquareSearch( const TComPattern* const pcPatternKey, IntTZSearchStruct& rcStruct, const TComMv* const pcMvSrchRngLT, const TComMv* const pcMvSrchRngRB, const Int iStartX, const Int iStartY, const Int iDist )
556 {
557  const Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
558  const Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
559  const Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
560  const Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
561 
562  // 8 point search, // 1 2 3
563  // search around the start point // 4 0 5
564  // with the required distance // 6 7 8
565  assert( iDist != 0 );
566  const Int iTop = iStartY - iDist;
567  const Int iBottom = iStartY + iDist;
568  const Int iLeft = iStartX - iDist;
569  const Int iRight = iStartX + iDist;
570  rcStruct.uiBestRound += 1;
571 
572  if ( iTop >= iSrchRngVerTop ) // check top
573  {
574  if ( iLeft >= iSrchRngHorLeft ) // check top left
575  {
576  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iTop, 1, iDist );
577  }
578  // top middle
579  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
580 
581  if ( iRight <= iSrchRngHorRight ) // check top right
582  {
583  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iTop, 3, iDist );
584  }
585  } // check top
586  if ( iLeft >= iSrchRngHorLeft ) // check middle left
587  {
588  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
589  }
590  if ( iRight <= iSrchRngHorRight ) // check middle right
591  {
592  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
593  }
594  if ( iBottom <= iSrchRngVerBottom ) // check bottom
595  {
596  if ( iLeft >= iSrchRngHorLeft ) // check bottom left
597  {
598  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iBottom, 6, iDist );
599  }
600  // check bottom middle
601  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
602 
603  if ( iRight <= iSrchRngHorRight ) // check bottom right
604  {
605  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iBottom, 8, iDist );
606  }
607  } // check bottom
608 }
609 
610 
611 
612 
613 __inline Void TEncSearch::xTZ8PointDiamondSearch( const TComPattern*const pcPatternKey,
614  IntTZSearchStruct& rcStruct,
615  const TComMv*const pcMvSrchRngLT,
616  const TComMv*const pcMvSrchRngRB,
617  const Int iStartX,
618  const Int iStartY,
619  const Int iDist,
620  const Bool bCheckCornersAtDist1 )
621 {
622  const Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
623  const Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
624  const Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
625  const Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
626 
627  // 8 point search, // 1 2 3
628  // search around the start point // 4 0 5
629  // with the required distance // 6 7 8
630  assert ( iDist != 0 );
631  const Int iTop = iStartY - iDist;
632  const Int iBottom = iStartY + iDist;
633  const Int iLeft = iStartX - iDist;
634  const Int iRight = iStartX + iDist;
635  rcStruct.uiBestRound += 1;
636 
637  if ( iDist == 1 )
638  {
639  if ( iTop >= iSrchRngVerTop ) // check top
640  {
641  if (bCheckCornersAtDist1)
642  {
643  if ( iLeft >= iSrchRngHorLeft) // check top-left
644  {
645  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iTop, 1, iDist );
646  }
647  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
648  if ( iRight <= iSrchRngHorRight ) // check middle right
649  {
650  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iTop, 3, iDist );
651  }
652  }
653  else
654  {
655  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
656  }
657  }
658  if ( iLeft >= iSrchRngHorLeft ) // check middle left
659  {
660  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
661  }
662  if ( iRight <= iSrchRngHorRight ) // check middle right
663  {
664  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
665  }
666  if ( iBottom <= iSrchRngVerBottom ) // check bottom
667  {
668  if (bCheckCornersAtDist1)
669  {
670  if ( iLeft >= iSrchRngHorLeft) // check top-left
671  {
672  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iBottom, 6, iDist );
673  }
674  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
675  if ( iRight <= iSrchRngHorRight ) // check middle right
676  {
677  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iBottom, 8, iDist );
678  }
679  }
680  else
681  {
682  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
683  }
684  }
685  }
686  else
687  {
688  if ( iDist <= 8 )
689  {
690  const Int iTop_2 = iStartY - (iDist>>1);
691  const Int iBottom_2 = iStartY + (iDist>>1);
692  const Int iLeft_2 = iStartX - (iDist>>1);
693  const Int iRight_2 = iStartX + (iDist>>1);
694 
695  if ( iTop >= iSrchRngVerTop && iLeft >= iSrchRngHorLeft &&
696  iRight <= iSrchRngHorRight && iBottom <= iSrchRngVerBottom ) // check border
697  {
698  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
699  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iTop_2, 1, iDist>>1 );
700  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iTop_2, 3, iDist>>1 );
701  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
702  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
703  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iBottom_2, 6, iDist>>1 );
704  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iBottom_2, 8, iDist>>1 );
705  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
706  }
707  else // check border
708  {
709  if ( iTop >= iSrchRngVerTop ) // check top
710  {
711  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 2, iDist );
712  }
713  if ( iTop_2 >= iSrchRngVerTop ) // check half top
714  {
715  if ( iLeft_2 >= iSrchRngHorLeft ) // check half left
716  {
717  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) );
718  }
719  if ( iRight_2 <= iSrchRngHorRight ) // check half right
720  {
721  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iTop_2, 3, (iDist>>1) );
722  }
723  } // check half top
724  if ( iLeft >= iSrchRngHorLeft ) // check left
725  {
726  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 4, iDist );
727  }
728  if ( iRight <= iSrchRngHorRight ) // check right
729  {
730  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 5, iDist );
731  }
732  if ( iBottom_2 <= iSrchRngVerBottom ) // check half bottom
733  {
734  if ( iLeft_2 >= iSrchRngHorLeft ) // check half left
735  {
736  xTZSearchHelp( pcPatternKey, rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) );
737  }
738  if ( iRight_2 <= iSrchRngHorRight ) // check half right
739  {
740  xTZSearchHelp( pcPatternKey, rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) );
741  }
742  } // check half bottom
743  if ( iBottom <= iSrchRngVerBottom ) // check bottom
744  {
745  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 7, iDist );
746  }
747  } // check border
748  }
749  else // iDist > 8
750  {
751  if ( iTop >= iSrchRngVerTop && iLeft >= iSrchRngHorLeft &&
752  iRight <= iSrchRngHorRight && iBottom <= iSrchRngVerBottom ) // check border
753  {
754  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 0, iDist );
755  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 0, iDist );
756  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 0, iDist );
757  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 0, iDist );
758  for ( Int index = 1; index < 4; index++ )
759  {
760  const Int iPosYT = iTop + ((iDist>>2) * index);
761  const Int iPosYB = iBottom - ((iDist>>2) * index);
762  const Int iPosXL = iStartX - ((iDist>>2) * index);
763  const Int iPosXR = iStartX + ((iDist>>2) * index);
764  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYT, 0, iDist );
765  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYT, 0, iDist );
766  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYB, 0, iDist );
767  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYB, 0, iDist );
768  }
769  }
770  else // check border
771  {
772  if ( iTop >= iSrchRngVerTop ) // check top
773  {
774  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iTop, 0, iDist );
775  }
776  if ( iLeft >= iSrchRngHorLeft ) // check left
777  {
778  xTZSearchHelp( pcPatternKey, rcStruct, iLeft, iStartY, 0, iDist );
779  }
780  if ( iRight <= iSrchRngHorRight ) // check right
781  {
782  xTZSearchHelp( pcPatternKey, rcStruct, iRight, iStartY, 0, iDist );
783  }
784  if ( iBottom <= iSrchRngVerBottom ) // check bottom
785  {
786  xTZSearchHelp( pcPatternKey, rcStruct, iStartX, iBottom, 0, iDist );
787  }
788  for ( Int index = 1; index < 4; index++ )
789  {
790  const Int iPosYT = iTop + ((iDist>>2) * index);
791  const Int iPosYB = iBottom - ((iDist>>2) * index);
792  const Int iPosXL = iStartX - ((iDist>>2) * index);
793  const Int iPosXR = iStartX + ((iDist>>2) * index);
794 
795  if ( iPosYT >= iSrchRngVerTop ) // check top
796  {
797  if ( iPosXL >= iSrchRngHorLeft ) // check left
798  {
799  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYT, 0, iDist );
800  }
801  if ( iPosXR <= iSrchRngHorRight ) // check right
802  {
803  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYT, 0, iDist );
804  }
805  } // check top
806  if ( iPosYB <= iSrchRngVerBottom ) // check bottom
807  {
808  if ( iPosXL >= iSrchRngHorLeft ) // check left
809  {
810  xTZSearchHelp( pcPatternKey, rcStruct, iPosXL, iPosYB, 0, iDist );
811  }
812  if ( iPosXR <= iSrchRngHorRight ) // check right
813  {
814  xTZSearchHelp( pcPatternKey, rcStruct, iPosXR, iPosYB, 0, iDist );
815  }
816  } // check bottom
817  } // for ...
818  } // check border
819  } // iDist <= 8
820  } // iDist == 1
821 }
822 
824  TComMv baseRefMv,
825  Int iFrac, TComMv& rcMvFrac,
826  Bool bAllowUseOfHadamard
827  )
828 {
829  Distortion uiDist;
830  Distortion uiDistBest = std::numeric_limits<Distortion>::max();
831  UInt uiDirecBest = 0;
832 
833  Pel* piRefPos;
834  Int iRefStride = m_filteredBlock[0][0].getStride(COMPONENT_Y);
835 
836  m_pcRdCost->setDistParam( pcPatternKey, m_filteredBlock[0][0].getAddr(COMPONENT_Y), iRefStride, 1, m_cDistParam, m_pcEncCfg->getUseHADME() && bAllowUseOfHadamard );
837 
838  const TComMv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ);
839 
840 #if MCTS_ENC_CHECK
841  UInt maxRefinements = 9;
842  Int mvShift = 2;
843 
844  // filter length of sub-sample generation filter to be considered
845  const UInt LumaLTSampleOffset = 3;
846  const UInt LumaRBSampleOffset = 4;
847 
849  {
850  // if close to tile borders
851  if ( pcPatternKey->getROIYPosX() + (baseRefMv.getHor() >> mvShift ) < pcPatternKey->getTileLeftTopPelPosX() + LumaLTSampleOffset ||
852  pcPatternKey->getROIYPosY() + (baseRefMv.getVer() >> mvShift ) < pcPatternKey->getTileLeftTopPelPosY() + LumaLTSampleOffset ||
853  pcPatternKey->getROIYPosX() + (baseRefMv.getHor() >> mvShift) > pcPatternKey->getTileRightBottomPelPosX() - pcPatternKey->getROIYWidth() - LumaRBSampleOffset ||
854  pcPatternKey->getROIYPosY() + (baseRefMv.getVer() >> mvShift) > pcPatternKey->getTileRightBottomPelPosY() - pcPatternKey->getROIYHeight() - LumaRBSampleOffset
855  )
856  {
857  // only allow full pel positions to avoid filter dependency
858  maxRefinements = 1;
859  }
860  }
861 
862  for (UInt i = 0; i < maxRefinements; i++)
863 #else
864  for (UInt i = 0; i < 9; i++)
865 #endif
866  {
867  TComMv cMvTest = pcMvRefine[i];
868  cMvTest += baseRefMv;
869 
870  Int horVal = cMvTest.getHor() * iFrac;
871  Int verVal = cMvTest.getVer() * iFrac;
872  piRefPos = m_filteredBlock[ verVal & 3 ][ horVal & 3 ].getAddr(COMPONENT_Y);
873  if ( horVal == 2 && ( verVal & 1 ) == 0 )
874  {
875  piRefPos += 1;
876  }
877  if ( ( horVal & 1 ) == 0 && verVal == 2 )
878  {
879  piRefPos += iRefStride;
880  }
881  cMvTest = pcMvRefine[i];
882  cMvTest += rcMvFrac;
883 
885 
886  m_cDistParam.pCur = piRefPos;
887  m_cDistParam.bitDepth = pcPatternKey->getBitDepthY();
888  uiDist = m_cDistParam.DistFunc( &m_cDistParam );
889  uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.getHor(), cMvTest.getVer() );
890 
891  if ( uiDist < uiDistBest )
892  {
893  uiDistBest = uiDist;
894  uiDirecBest = i;
896  }
897  }
898 
899  rcMvFrac = pcMvRefine[uiDirecBest];
900 
901  return uiDistBest;
902 }
903 
904 
905 
906 Void
908  Bool bLuma,
909  Bool bChroma )
910 {
911  TComDataCU* pcCU=rTu.getCU();
912  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
913  const UInt uiTrDepth = rTu.GetTransformDepthRel();
914  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
915  const UInt uiSubdiv = ( uiTrMode > uiTrDepth ? 1 : 0 );
916  const UInt uiLog2LumaTrafoSize = rTu.GetLog2LumaTrSize();
917 
918  if( pcCU->isIntra(0) && pcCU->getPartitionSize(0) == SIZE_NxN && uiTrDepth == 0 )
919  {
920  assert( uiSubdiv );
921  }
922  else if( uiLog2LumaTrafoSize > pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() )
923  {
924  assert( uiSubdiv );
925  }
926  else if( uiLog2LumaTrafoSize == pcCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() )
927  {
928  assert( !uiSubdiv );
929  }
930  else if( uiLog2LumaTrafoSize == pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) )
931  {
932  assert( !uiSubdiv );
933  }
934  else
935  {
936  assert( uiLog2LumaTrafoSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );
937  if( bLuma )
938  {
939  m_pcEntropyCoder->encodeTransformSubdivFlag( uiSubdiv, 5 - uiLog2LumaTrafoSize );
940  }
941  }
942 
943  if ( bChroma )
944  {
945  const UInt numberValidComponents = getNumberValidComponents(rTu.GetChromaFormat());
946  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
947  {
948  const ComponentID compID=ComponentID(ch);
949  if( rTu.ProcessingAllQuadrants(compID) && (uiTrDepth==0 || pcCU->getCbf( uiAbsPartIdx, compID, uiTrDepth-1 ) ))
950  {
951  m_pcEntropyCoder->encodeQtCbf(rTu, compID, (uiSubdiv == 0));
952  }
953  }
954  }
955 
956  if( uiSubdiv )
957  {
958  TComTURecurse tuRecurse(rTu, false);
959  do
960  {
961  xEncSubdivCbfQT( tuRecurse, bLuma, bChroma );
962  } while (tuRecurse.nextSection(rTu));
963  }
964  else
965  {
966  //===== Cbfs =====
967  if( bLuma )
968  {
970  }
971  }
972 }
973 
974 
975 
976 
977 Void
979  const ComponentID component,
980  Bool bRealCoeff )
981 {
982  TComDataCU* pcCU=rTu.getCU();
983  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
984  const UInt uiTrDepth=rTu.GetTransformDepthRel();
985 
986  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
987  const UInt uiSubdiv = ( uiTrMode > uiTrDepth ? 1 : 0 );
988 
989  if( uiSubdiv )
990  {
991  TComTURecurse tuRecurseChild(rTu, false);
992  do
993  {
994  xEncCoeffQT( tuRecurseChild, component, bRealCoeff );
995  } while (tuRecurseChild.nextSection(rTu) );
996  }
997  else if (rTu.ProcessComponentSection(component))
998  {
999  //===== coefficients =====
1000  const UInt uiLog2TrafoSize = rTu.GetLog2LumaTrSize();
1001  UInt uiCoeffOffset = rTu.getCoefficientOffset(component);
1002  UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrafoSize;
1003  TCoeff* pcCoeff = bRealCoeff ? pcCU->getCoeff(component) : m_ppcQTTempCoeff[component][uiQTLayer];
1004 
1005  if (isChroma(component) && (pcCU->getCbf( rTu.GetAbsPartIdxTU(), COMPONENT_Y, uiTrMode ) != 0) && pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() )
1006  {
1008  }
1009 
1010  m_pcEntropyCoder->encodeCoeffNxN( rTu, pcCoeff+uiCoeffOffset, component );
1011  }
1012 }
1013 
1014 
1015 
1016 
1017 Void
1019  UInt uiTrDepth,
1020  UInt uiAbsPartIdx,
1021  Bool bLuma,
1022  Bool bChroma )
1023 {
1024  if( bLuma )
1025  {
1026  // CU header
1027  if( uiAbsPartIdx == 0 )
1028  {
1029  if( !pcCU->getSlice()->isIntra() )
1030  {
1032  {
1034  }
1035  m_pcEntropyCoder->encodeSkipFlag( pcCU, 0, true );
1036  m_pcEntropyCoder->encodePredMode( pcCU, 0, true );
1037  }
1038  m_pcEntropyCoder ->encodePartSize( pcCU, 0, pcCU->getDepth(0), true );
1039 
1040  if (pcCU->isIntra(0) && pcCU->getPartitionSize(0) == SIZE_2Nx2N )
1041  {
1042  m_pcEntropyCoder->encodeIPCMInfo( pcCU, 0, true );
1043 
1044  if ( pcCU->getIPCMFlag (0))
1045  {
1046  return;
1047  }
1048  }
1049  }
1050  // luma prediction mode
1051  if( pcCU->getPartitionSize(0) == SIZE_2Nx2N )
1052  {
1053  if (uiAbsPartIdx==0)
1054  {
1056  }
1057  }
1058  else
1059  {
1060  UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
1061  if (uiTrDepth>0 && (uiAbsPartIdx%uiQNumParts)==0)
1062  {
1063  m_pcEntropyCoder->encodeIntraDirModeLuma ( pcCU, uiAbsPartIdx );
1064  }
1065  }
1066  }
1067 
1068  if( bChroma )
1069  {
1071  {
1072  if(uiAbsPartIdx==0)
1073  {
1074  m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiAbsPartIdx );
1075  }
1076  }
1077  else
1078  {
1079  UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
1080  assert(uiTrDepth>0);
1081  if ((uiAbsPartIdx%uiQNumParts)==0)
1082  {
1083  m_pcEntropyCoder->encodeIntraDirModeChroma ( pcCU, uiAbsPartIdx );
1084  }
1085  }
1086  }
1087 }
1088 
1089 
1090 
1091 
1092 UInt
1094  Bool bLuma,
1095  Bool bChroma,
1096  Bool bRealCoeff /* just for test */ )
1097 {
1098  TComDataCU* pcCU=rTu.getCU();
1099  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1100  const UInt uiTrDepth=rTu.GetTransformDepthRel();
1102  xEncIntraHeader ( pcCU, uiTrDepth, uiAbsPartIdx, bLuma, bChroma );
1103  xEncSubdivCbfQT ( rTu, bLuma, bChroma );
1104 
1105  if( bLuma )
1106  {
1107  xEncCoeffQT ( rTu, COMPONENT_Y, bRealCoeff );
1108  }
1109  if( bChroma )
1110  {
1111  xEncCoeffQT ( rTu, COMPONENT_Cb, bRealCoeff );
1112  xEncCoeffQT ( rTu, COMPONENT_Cr, bRealCoeff );
1113  }
1115 
1116  return uiBits;
1117 }
1118 
1120  ComponentID compID,
1121  Bool bRealCoeff /* just for test */ )
1122 {
1124  xEncCoeffQT ( rTu, compID, bRealCoeff );
1126  return uiBits;
1127 }
1128 
1130  TComYuv* pcPredYuv,
1131  TComYuv* pcResiYuv,
1133  const Bool checkCrossCPrediction,
1134  Distortion& ruiDist,
1135  const ComponentID compID,
1136  TComTU& rTu
1137  DEBUG_STRING_FN_DECLARE(sDebug)
1138  ,Int default0Save1Load2
1139  )
1140 {
1141  if (!rTu.ProcessComponentSection(compID))
1142  {
1143  return;
1144  }
1145  const Bool bIsLuma = isLuma(compID);
1146  const TComRectangle &rect = rTu.getRect(compID);
1147  TComDataCU *pcCU = rTu.getCU();
1148  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1149  const TComSPS &sps = *(pcCU->getSlice()->getSPS());
1150 
1151  const UInt uiTrDepth = rTu.GetTransformDepthRelAdj(compID);
1152  const UInt uiFullDepth = rTu.GetTransformDepthTotal();
1153  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1154  const ChromaFormat chFmt = pcOrgYuv->getChromaFormat();
1155  const ChannelType chType = toChannelType(compID);
1156  const Int bitDepth = sps.getBitDepth(chType);
1157 
1158  const UInt uiWidth = rect.width;
1159  const UInt uiHeight = rect.height;
1160  const UInt uiStride = pcOrgYuv ->getStride (compID);
1161  Pel *piOrg = pcOrgYuv ->getAddr( compID, uiAbsPartIdx );
1162  Pel *piPred = pcPredYuv->getAddr( compID, uiAbsPartIdx );
1163  Pel *piResi = pcResiYuv->getAddr( compID, uiAbsPartIdx );
1164  Pel *piReco = pcPredYuv->getAddr( compID, uiAbsPartIdx );
1165  const UInt uiQTLayer = sps.getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1166  Pel *piRecQt = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( compID, uiAbsPartIdx );
1167  const UInt uiRecQtStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride(compID);
1168  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
1169  Pel *piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder );
1170  UInt uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride ( compID );
1171  TCoeff *pcCoeff = m_ppcQTTempCoeff[compID][uiQTLayer] + rTu.getCoefficientOffset(compID);
1172  Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1173 
1174 #if ADAPTIVE_QP_SELECTION
1175  TCoeff *pcArlCoeff = m_ppcQTTempArlCoeff[compID][ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1176 #endif
1177 
1178  const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1179  const UInt partsPerMinCU = 1<<(2*(sps.getMaxTotalCUDepth() - sps.getLog2DiffMaxMinCodingBlockSize()));
1180  const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && !bIsLuma) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt, partsPerMinCU)) : uiChPredMode;
1181  const UInt uiChFinalMode = ((chFmt == CHROMA_422) && !bIsLuma) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1182 
1183  const Int blkX = g_auiRasterToPelX[ g_auiZscanToRaster[ uiAbsPartIdx ] ];
1184  const Int blkY = g_auiRasterToPelY[ g_auiZscanToRaster[ uiAbsPartIdx ] ];
1185  const Int bufferOffset = blkX + (blkY * MAX_CU_SIZE);
1186  Pel *const encoderLumaResidual = resiLuma[RESIDUAL_ENCODER_SIDE ] + bufferOffset;
1187  Pel *const reconstructedLumaResidual = resiLuma[RESIDUAL_RECONSTRUCTED] + bufferOffset;
1188  const Bool bUseCrossCPrediction = isChroma(compID) && (uiChPredMode == DM_CHROMA_IDX) && checkCrossCPrediction;
1189  const Bool bUseReconstructedResidualForEstimate = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate();
1190  Pel *const lumaResidualForEstimate = bUseReconstructedResidualForEstimate ? reconstructedLumaResidual : encoderLumaResidual;
1191 
1192 #if DEBUG_STRING
1193  const Int debugPredModeMask=DebugStringGetPredModeMask(MODE_INTRA);
1194 #endif
1195 
1196  //===== init availability pattern =====
1197  DEBUG_STRING_NEW(sTemp)
1198 
1199 #if !DEBUG_STRING
1200  if( default0Save1Load2 != 2 )
1201 #endif
1202  {
1203  const Bool bUseFilteredPredictions=TComPrediction::filteringIntraReferenceSamples(compID, uiChFinalMode, uiWidth, uiHeight, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag());
1204 
1205  initIntraPatternChType( rTu, compID, bUseFilteredPredictions DEBUG_STRING_PASS_INTO(sDebug) );
1206 
1207  //===== get prediction signal =====
1208  predIntraAng( compID, uiChFinalMode, piOrg, uiStride, piPred, uiStride, rTu, bUseFilteredPredictions );
1209 
1210  // save prediction
1211  if( default0Save1Load2 == 1 )
1212  {
1213  Pel* pPred = piPred;
1214  Pel* pPredBuf = m_pSharedPredTransformSkip[compID];
1215  Int k = 0;
1216  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1217  {
1218  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1219  {
1220  pPredBuf[ k ++ ] = pPred[ uiX ];
1221  }
1222  pPred += uiStride;
1223  }
1224  }
1225  }
1226 #if !DEBUG_STRING
1227  else
1228  {
1229  // load prediction
1230  Pel* pPred = piPred;
1231  Pel* pPredBuf = m_pSharedPredTransformSkip[compID];
1232  Int k = 0;
1233  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1234  {
1235  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1236  {
1237  pPred[ uiX ] = pPredBuf[ k ++ ];
1238  }
1239  pPred += uiStride;
1240  }
1241  }
1242 #endif
1243 
1244  //===== get residual signal =====
1245  {
1246  // get residual
1247  Pel* pOrg = piOrg;
1248  Pel* pPred = piPred;
1249  Pel* pResi = piResi;
1250 
1251  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1252  {
1253  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1254  {
1255  pResi[ uiX ] = pOrg[ uiX ] - pPred[ uiX ];
1256  }
1257 
1258  pOrg += uiStride;
1259  pResi += uiStride;
1260  pPred += uiStride;
1261  }
1262  }
1263 
1265  {
1266  if (bUseCrossCPrediction)
1267  {
1268  if (xCalcCrossComponentPredictionAlpha( rTu, compID, lumaResidualForEstimate, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride ) == 0)
1269  {
1270  return;
1271  }
1272  TComTrQuant::crossComponentPrediction ( rTu, compID, reconstructedLumaResidual, piResi, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride, uiStride, false );
1273  }
1274  else if (isLuma(compID) && !bUseReconstructedResidualForEstimate)
1275  {
1276  xStoreCrossComponentPredictionResult( encoderLumaResidual, piResi, rTu, 0, 0, MAX_CU_SIZE, uiStride );
1277  }
1278  }
1279 
1280  //===== transform and quantization =====
1281  //--- init rate estimation arrays for RDOQ ---
1282  if( useTransformSkip ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ() )
1283  {
1284  COEFF_SCAN_TYPE scanType = COEFF_SCAN_TYPE(pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, uiHeight, compID));
1285  m_pcEntropyCoder->estimateBit( m_pcTrQuant->m_pcEstBitsSbac, uiWidth, uiHeight, chType, scanType );
1286  }
1287 
1288  //--- transform and quantization ---
1289  TCoeff uiAbsSum = 0;
1290  if (bIsLuma)
1291  {
1292  pcCU ->setTrIdxSubParts ( uiTrDepth, uiAbsPartIdx, uiFullDepth );
1293  }
1294 
1295  const QpParam cQP(*pcCU, compID);
1296 
1297 #if RDOQ_CHROMA_LAMBDA
1298  m_pcTrQuant->selectLambda (compID);
1299 #endif
1300 
1301  m_pcTrQuant->transformNxN ( rTu, compID, piResi, uiStride, pcCoeff,
1303  pcArlCoeff,
1304 #endif
1305  uiAbsSum, cQP
1306  );
1307 
1308  //--- inverse transform ---
1309 
1310 #if DEBUG_STRING
1311  if ( (uiAbsSum > 0) || (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask) )
1312 #else
1313  if ( uiAbsSum > 0 )
1314 #endif
1315  {
1316  m_pcTrQuant->invTransformNxN ( rTu, compID, piResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sDebug, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) );
1317  }
1318  else
1319  {
1320  Pel* pResi = piResi;
1321  memset( pcCoeff, 0, sizeof( TCoeff ) * uiWidth * uiHeight );
1322  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1323  {
1324  memset( pResi, 0, sizeof( Pel ) * uiWidth );
1325  pResi += uiStride;
1326  }
1327  }
1328 
1329 
1330  //===== reconstruction =====
1331  {
1332  Pel* pPred = piPred;
1333  Pel* pResi = piResi;
1334  Pel* pReco = piReco;
1335  Pel* pRecQt = piRecQt;
1336  Pel* pRecIPred = piRecIPred;
1337 
1339  {
1340  if (bUseCrossCPrediction)
1341  {
1342  TComTrQuant::crossComponentPrediction( rTu, compID, reconstructedLumaResidual, piResi, piResi, uiWidth, uiHeight, MAX_CU_SIZE, uiStride, uiStride, true );
1343  }
1344  else if (isLuma(compID))
1345  {
1346  xStoreCrossComponentPredictionResult( reconstructedLumaResidual, piResi, rTu, 0, 0, MAX_CU_SIZE, uiStride );
1347  }
1348  }
1349 
1350  #if DEBUG_STRING
1351  std::stringstream ss(stringstream::out);
1352  const Bool bDebugPred=((DebugOptionList::DebugString_Pred.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID));
1353  const Bool bDebugResi=((DebugOptionList::DebugString_Resi.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID));
1354  const Bool bDebugReco=((DebugOptionList::DebugString_Reco.getInt()&debugPredModeMask) && DEBUG_STRING_CHANNEL_CONDITION(compID));
1355 
1356  if (bDebugPred || bDebugResi || bDebugReco)
1357  {
1358  ss << "###: " << "CompID: " << compID << " pred mode (ch/fin): " << uiChPredMode << "/" << uiChFinalMode << " absPartIdx: " << rTu.GetAbsPartIdxTU() << "\n";
1359  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1360  {
1361  ss << "###: ";
1362  if (bDebugPred)
1363  {
1364  ss << " - pred: ";
1365  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1366  {
1367  ss << pPred[ uiX ] << ", ";
1368  }
1369  }
1370  if (bDebugResi)
1371  {
1372  ss << " - resi: ";
1373  }
1374  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1375  {
1376  if (bDebugResi)
1377  {
1378  ss << pResi[ uiX ] << ", ";
1379  }
1380  pReco [ uiX ] = Pel(ClipBD<Int>( Int(pPred[uiX]) + Int(pResi[uiX]), bitDepth ));
1381  pRecQt [ uiX ] = pReco[ uiX ];
1382  pRecIPred[ uiX ] = pReco[ uiX ];
1383  }
1384  if (bDebugReco)
1385  {
1386  ss << " - reco: ";
1387  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1388  {
1389  ss << pReco[ uiX ] << ", ";
1390  }
1391  }
1392  pPred += uiStride;
1393  pResi += uiStride;
1394  pReco += uiStride;
1395  pRecQt += uiRecQtStride;
1396  pRecIPred += uiRecIPredStride;
1397  ss << "\n";
1398  }
1399  DEBUG_STRING_APPEND(sDebug, ss.str())
1400  }
1401  else
1402 #endif
1403  {
1404 
1405  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1406  {
1407  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1408  {
1409  pReco [ uiX ] = Pel(ClipBD<Int>( Int(pPred[uiX]) + Int(pResi[uiX]), bitDepth ));
1410  pRecQt [ uiX ] = pReco[ uiX ];
1411  pRecIPred[ uiX ] = pReco[ uiX ];
1412  }
1413  pPred += uiStride;
1414  pResi += uiStride;
1415  pReco += uiStride;
1416  pRecQt += uiRecQtStride;
1417  pRecIPred += uiRecIPredStride;
1418  }
1419  }
1420  }
1421 
1422  //===== update distortion =====
1423  ruiDist += m_pcRdCost->getDistPart( bitDepth, piReco, uiStride, piOrg, uiStride, uiWidth, uiHeight, compID );
1424 }
1425 
1426 
1427 
1428 
1429 Void
1431  TComYuv* pcPredYuv,
1432  TComYuv* pcResiYuv,
1433  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],
1434  Distortion& ruiDistY,
1436  Bool bCheckFirst,
1437 #endif
1438  Double& dRDCost,
1439  TComTU& rTu
1440  DEBUG_STRING_FN_DECLARE(sDebug))
1441 {
1442  TComDataCU *pcCU = rTu.getCU();
1443  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1444  const UInt uiFullDepth = rTu.GetTransformDepthTotal();
1445  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1446  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1447  Bool bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() );
1448  Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );
1449 
1450  Pel resiLumaSplit [NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
1451  Pel resiLumaSingle[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
1452 
1453  Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
1454  for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)
1455  {
1456  bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
1457  }
1458 
1460 
1461 #if HHI_RQT_INTRA_SPEEDUP
1462  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
1463  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);
1464  // don't check split if TU size is less or equal to max TU size
1465  Bool noSplitIntraMaxTuSize = bCheckFull;
1466  if(m_pcEncCfg->getRDpenalty() && ! isIntraSlice)
1467  {
1468  // in addition don't check split if TU size is less or equal to 16x16 TU size for non-intra slice
1469  noSplitIntraMaxTuSize = ( uiLog2TrSize <= min(maxTuSize,4) );
1470 
1471  // if maximum RD-penalty don't check TU size 32x32
1472  if(m_pcEncCfg->getRDpenalty()==2)
1473  {
1474  bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4));
1475  }
1476  }
1477  if( bCheckFirst && noSplitIntraMaxTuSize )
1478 
1479  {
1480  bCheckSplit = false;
1481  }
1482 #else
1483  Int maxTuSize = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize();
1484  Int isIntraSlice = (pcCU->getSlice()->getSliceType() == I_SLICE);
1485  // if maximum RD-penalty don't check TU size 32x32
1486  if((m_pcEncCfg->getRDpenalty()==2) && !isIntraSlice)
1487  {
1488  bCheckFull = ( uiLog2TrSize <= min(maxTuSize,4));
1489  }
1490 #endif
1491  Double dSingleCost = MAX_DOUBLE;
1492  Distortion uiSingleDistLuma = 0;
1493  UInt uiSingleCbfLuma = 0;
1494  Bool checkTransformSkip = pcCU->getSlice()->getPPS()->getUseTransformSkip();
1495  Int bestModeId[MAX_NUM_COMPONENT] = { 0, 0, 0};
1497  checkTransformSkip &= (!pcCU->getCUTransquantBypass(0));
1498 
1499  assert (rTu.ProcessComponentSection(COMPONENT_Y));
1500  const UInt totalAdjustedDepthChan = rTu.GetTransformDepthTotalAdj(COMPONENT_Y);
1501 
1503  {
1504  checkTransformSkip &= (pcCU->getPartitionSize(uiAbsPartIdx)==SIZE_NxN);
1505  }
1506 
1507  if( bCheckFull )
1508  {
1509  if(checkTransformSkip == true)
1510  {
1511  //----- store original entropy coding status -----
1513 
1514  Distortion singleDistTmpLuma = 0;
1515  UInt singleCbfTmpLuma = 0;
1516  Double singleCostTmp = 0;
1517  Int firstCheckId = 0;
1518 
1519  for(Int modeId = firstCheckId; modeId < 2; modeId ++)
1520  {
1521  DEBUG_STRING_NEW(sModeString)
1522  Int default0Save1Load2 = 0;
1523  singleDistTmpLuma=0;
1524  if(modeId == firstCheckId)
1525  {
1526  default0Save1Load2 = 1;
1527  }
1528  else
1529  {
1530  default0Save1Load2 = 2;
1531  }
1532 
1533 
1534  pcCU->setTransformSkipSubParts ( modeId, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1535  xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, singleDistTmpLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sModeString), default0Save1Load2 );
1536 
1537  singleCbfTmpLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );
1538 
1539  //----- determine rate and r-d cost -----
1540  if(modeId == 1 && singleCbfTmpLuma == 0)
1541  {
1542  //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
1543  singleCostTmp = MAX_DOUBLE;
1544  }
1545  else
1546  {
1547  UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );
1548  singleCostTmp = m_pcRdCost->calcRdCost( uiSingleBits, singleDistTmpLuma );
1549  }
1550  if(singleCostTmp < dSingleCost)
1551  {
1552  DEBUG_STRING_SWAP(sDebug, sModeString)
1553  dSingleCost = singleCostTmp;
1554  uiSingleDistLuma = singleDistTmpLuma;
1555  uiSingleCbfLuma = singleCbfTmpLuma;
1556 
1557  bestModeId[COMPONENT_Y] = modeId;
1558  if(bestModeId[COMPONENT_Y] == firstCheckId)
1559  {
1562  }
1563 
1565  {
1566  const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
1567  const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
1568  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
1569  {
1570  if (bMaintainResidual[storedResidualIndex])
1571  {
1572  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
1573  }
1574  }
1575  }
1576  }
1577  if (modeId == firstCheckId)
1578  {
1579  m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
1580  }
1581  }
1582 
1583  pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1584 
1585  if(bestModeId[COMPONENT_Y] == firstCheckId)
1586  {
1587  xLoadIntraResultQT(COMPONENT_Y, rTu );
1588  pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, rTu.GetTransformDepthTotalAdj(COMPONENT_Y) );
1589 
1591  }
1592  }
1593  else
1594  {
1595  //----- store original entropy coding status -----
1596  if( bCheckSplit )
1597  {
1599  }
1600  //----- code luma/chroma block with given intra prediction mode and store Cbf-----
1601  dSingleCost = 0.0;
1602 
1603  pcCU ->setTransformSkipSubParts ( 0, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1604  xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSingle, false, uiSingleDistLuma, COMPONENT_Y, rTu DEBUG_STRING_PASS_INTO(sDebug));
1605 
1606  if( bCheckSplit )
1607  {
1608  uiSingleCbfLuma = pcCU->getCbf( uiAbsPartIdx, COMPONENT_Y, uiTrDepth );
1609  }
1610  //----- determine rate and r-d cost -----
1611  UInt uiSingleBits = xGetIntraBitsQT( rTu, true, false, false );
1612 
1613  if(m_pcEncCfg->getRDpenalty() && (uiLog2TrSize==5) && !isIntraSlice)
1614  {
1615  uiSingleBits=uiSingleBits*4;
1616  }
1617 
1618  dSingleCost = m_pcRdCost->calcRdCost( uiSingleBits, uiSingleDistLuma );
1619 
1621  {
1622  const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
1623  const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
1624  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
1625  {
1626  if (bMaintainResidual[storedResidualIndex])
1627  {
1628  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSingle[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
1629  }
1630  }
1631  }
1632  }
1633  }
1634 
1635  if( bCheckSplit )
1636  {
1637  //----- store full entropy coding status, load original entropy coding status -----
1638  if( bCheckFull )
1639  {
1642  }
1643  else
1644  {
1646  }
1647  //----- code splitted block -----
1648  Double dSplitCost = 0.0;
1649  Distortion uiSplitDistLuma = 0;
1650  UInt uiSplitCbfLuma = 0;
1651 
1652  TComTURecurse tuRecurseChild(rTu, false);
1653  DEBUG_STRING_NEW(sSplit)
1654  do
1655  {
1656  DEBUG_STRING_NEW(sChild)
1657 #if HHI_RQT_INTRA_SPEEDUP
1658  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, bCheckFirst, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
1659 #else
1660  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaSplit, uiSplitDistLuma, dSplitCost, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
1661 #endif
1662  DEBUG_STRING_APPEND(sSplit, sChild)
1663  uiSplitCbfLuma |= pcCU->getCbf( tuRecurseChild.GetAbsPartIdxTU(), COMPONENT_Y, tuRecurseChild.GetTransformDepthRel() );
1664  } while (tuRecurseChild.nextSection(rTu) );
1665 
1666  UInt uiPartsDiv = rTu.GetAbsPartIdxNumParts();
1667  {
1668  if (uiSplitCbfLuma)
1669  {
1670  const UInt flag=1<<uiTrDepth;
1671  UChar *pBase=pcCU->getCbf( COMPONENT_Y );
1672  for( UInt uiOffs = 0; uiOffs < uiPartsDiv; uiOffs++ )
1673  {
1674  pBase[ uiAbsPartIdx + uiOffs ] |= flag;
1675  }
1676  }
1677  }
1678  //----- restore context states -----
1680 
1681  //----- determine rate and r-d cost -----
1682  UInt uiSplitBits = xGetIntraBitsQT( rTu, true, false, false );
1683  dSplitCost = m_pcRdCost->calcRdCost( uiSplitBits, uiSplitDistLuma );
1684 
1685  //===== compare and set best =====
1686  if( dSplitCost < dSingleCost )
1687  {
1688  //--- update cost ---
1689  DEBUG_STRING_SWAP(sSplit, sDebug)
1690  ruiDistY += uiSplitDistLuma;
1691  dRDCost += dSplitCost;
1692 
1694  {
1695  const Int xOffset = rTu.getRect( COMPONENT_Y ).x0;
1696  const Int yOffset = rTu.getRect( COMPONENT_Y ).y0;
1697  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
1698  {
1699  if (bMaintainResidual[storedResidualIndex])
1700  {
1701  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaSplit[storedResidualIndex], rTu, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE);
1702  }
1703  }
1704  }
1705 
1706  return;
1707  }
1708 
1709  //----- set entropy coding status -----
1711 
1712  //--- set transform index and Cbf values ---
1713  pcCU->setTrIdxSubParts( uiTrDepth, uiAbsPartIdx, uiFullDepth );
1714  const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y);
1715  pcCU->setCbfSubParts ( uiSingleCbfLuma << uiTrDepth, COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1716  pcCU ->setTransformSkipSubParts ( bestModeId[COMPONENT_Y], COMPONENT_Y, uiAbsPartIdx, totalAdjustedDepthChan );
1717 
1718  //--- set reconstruction for next intra prediction blocks ---
1719  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1720  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
1721  const UInt uiWidth = tuRect.width;
1722  const UInt uiHeight = tuRect.height;
1723  Pel* piSrc = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( COMPONENT_Y, uiAbsPartIdx );
1724  UInt uiSrcStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride ( COMPONENT_Y );
1725  Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
1726  UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride ( COMPONENT_Y );
1727 
1728  for( UInt uiY = 0; uiY < uiHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
1729  {
1730  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1731  {
1732  piDes[ uiX ] = piSrc[ uiX ];
1733  }
1734  }
1735  }
1736  ruiDistY += uiSingleDistLuma;
1737  dRDCost += dSingleCost;
1738 }
1739 
1740 
1741 Void
1743 {
1744  TComDataCU *pcCU = rTu.getCU();
1745  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1746  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1747  UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1748  if( uiTrMode == uiTrDepth )
1749  {
1750  UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1751  UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1752 
1753  //===== copy transform coefficients =====
1754 
1755  const TComRectangle &tuRect=rTu.getRect(COMPONENT_Y);
1756  const UInt coeffOffset = rTu.getCoefficientOffset(COMPONENT_Y);
1757  const UInt numCoeffInBlock = tuRect.width * tuRect.height;
1758 
1759  if (numCoeffInBlock!=0)
1760  {
1761  const TCoeff* srcCoeff = m_ppcQTTempCoeff[COMPONENT_Y][uiQTLayer] + coeffOffset;
1762  TCoeff* destCoeff = pcCU->getCoeff(COMPONENT_Y) + coeffOffset;
1763  ::memcpy( destCoeff, srcCoeff, sizeof(TCoeff)*numCoeffInBlock );
1764 #if ADAPTIVE_QP_SELECTION
1765  const TCoeff* srcArlCoeff = m_ppcQTTempArlCoeff[COMPONENT_Y][ uiQTLayer ] + coeffOffset;
1766  TCoeff* destArlCoeff = pcCU->getArlCoeff (COMPONENT_Y) + coeffOffset;
1767  ::memcpy( destArlCoeff, srcArlCoeff, sizeof( TCoeff ) * numCoeffInBlock );
1768 #endif
1769  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Y, pcRecoYuv, uiAbsPartIdx, tuRect.width, tuRect.height );
1770  }
1771 
1772  }
1773  else
1774  {
1775  TComTURecurse tuRecurseChild(rTu, false);
1776  do
1777  {
1778  xSetIntraResultLumaQT( pcRecoYuv, tuRecurseChild );
1779  } while (tuRecurseChild.nextSection(rTu));
1780  }
1781 }
1782 
1783 
1784 Void
1786 {
1787  TComDataCU *pcCU=rTu.getCU();
1788  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1789  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1790  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1791  if ( compID==COMPONENT_Y || uiTrMode == uiTrDepth )
1792  {
1793  assert(uiTrMode == uiTrDepth);
1794  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1795  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1796 
1797  if (rTu.ProcessComponentSection(compID))
1798  {
1799  const TComRectangle &tuRect=rTu.getRect(compID);
1800 
1801  //===== copy transform coefficients =====
1802  const UInt uiNumCoeff = tuRect.width * tuRect.height;
1803  TCoeff* pcCoeffSrc = m_ppcQTTempCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1804  TCoeff* pcCoeffDst = m_pcQTTempTUCoeff[compID];
1805 
1806  ::memcpy( pcCoeffDst, pcCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1807 #if ADAPTIVE_QP_SELECTION
1808  TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1809  TCoeff* pcArlCoeffDst = m_ppcQTTempTUArlCoeff[compID];
1810  ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1811 #endif
1812  //===== copy reconstruction =====
1813  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( compID, &m_pcQTTempTransformSkipTComYuv, uiAbsPartIdx, tuRect.width, tuRect.height );
1814  }
1815  }
1816 }
1817 
1818 
1819 Void
1821 {
1822  TComDataCU *pcCU=rTu.getCU();
1823  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1824  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1825  const UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1826  if ( compID==COMPONENT_Y || uiTrMode == uiTrDepth )
1827  {
1828  assert(uiTrMode == uiTrDepth);
1829  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
1830  const UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
1831  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiAbsPartIdx;
1832 
1833  if (rTu.ProcessComponentSection(compID))
1834  {
1835  const TComRectangle &tuRect=rTu.getRect(compID);
1836 
1837  //===== copy transform coefficients =====
1838  const UInt uiNumCoeff = tuRect.width * tuRect.height;
1839  TCoeff* pcCoeffDst = m_ppcQTTempCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1840  TCoeff* pcCoeffSrc = m_pcQTTempTUCoeff[compID];
1841 
1842  ::memcpy( pcCoeffDst, pcCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1843 #if ADAPTIVE_QP_SELECTION
1844  TCoeff* pcArlCoeffDst = m_ppcQTTempArlCoeff[compID] [ uiQTLayer ] + rTu.getCoefficientOffset(compID);
1845  TCoeff* pcArlCoeffSrc = m_ppcQTTempTUArlCoeff[compID];
1846  ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeff );
1847 #endif
1848  //===== copy reconstruction =====
1849  m_pcQTTempTransformSkipTComYuv.copyPartToPartComponent( compID, &m_pcQTTempTComYuv[ uiQTLayer ], uiAbsPartIdx, tuRect.width, tuRect.height );
1850 
1851  Pel* piRecIPred = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder );
1852  UInt uiRecIPredStride = pcCU->getPic()->getPicYuvRec()->getStride (compID);
1853  Pel* piRecQt = m_pcQTTempTComYuv[ uiQTLayer ].getAddr( compID, uiAbsPartIdx );
1854  UInt uiRecQtStride = m_pcQTTempTComYuv[ uiQTLayer ].getStride (compID);
1855  UInt uiWidth = tuRect.width;
1856  UInt uiHeight = tuRect.height;
1857  Pel* pRecQt = piRecQt;
1858  Pel* pRecIPred = piRecIPred;
1859  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1860  {
1861  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1862  {
1863  pRecIPred[ uiX ] = pRecQt [ uiX ];
1864  }
1865  pRecQt += uiRecQtStride;
1866  pRecIPred += uiRecIPredStride;
1867  }
1868  }
1869  }
1870 }
1871 
1872 Void
1874  const Pel *pResiSrc,
1875  TComTU &rTu,
1876  const Int xOffset,
1877  const Int yOffset,
1878  const Int strideDst,
1879  const Int strideSrc )
1880 {
1881  const Pel *pSrc = pResiSrc + yOffset * strideSrc + xOffset;
1882  Pel *pDst = pResiDst + yOffset * strideDst + xOffset;
1883 
1884  for( Int y = 0; y < rTu.getRect( COMPONENT_Y ).height; y++ )
1885  {
1886  ::memcpy( pDst, pSrc, sizeof(Pel) * rTu.getRect( COMPONENT_Y ).width );
1887  pDst += strideDst;
1888  pSrc += strideSrc;
1889  }
1890 }
1891 
1892 SChar
1894  const ComponentID compID,
1895  const Pel* piResiL,
1896  const Pel* piResiC,
1897  const Int width,
1898  const Int height,
1899  const Int strideL,
1900  const Int strideC )
1901 {
1902  const Pel *pResiL = piResiL;
1903  const Pel *pResiC = piResiC;
1904 
1905  TComDataCU *pCU = rTu.getCU();
1906  const Int absPartIdx = rTu.GetAbsPartIdxTU( compID );
1907  const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
1908 
1909  SChar alpha = 0;
1910  Int SSxy = 0;
1911  Int SSxx = 0;
1912 
1913  for( UInt uiY = 0; uiY < height; uiY++ )
1914  {
1915  for( UInt uiX = 0; uiX < width; uiX++ )
1916  {
1917  const Pel scaledResiL = rightShift( pResiL[ uiX ], diffBitDepth );
1918  SSxy += ( scaledResiL * pResiC[ uiX ] );
1919  SSxx += ( scaledResiL * scaledResiL );
1920  }
1921 
1922  pResiL += strideL;
1923  pResiC += strideC;
1924  }
1925 
1926  if( SSxx != 0 )
1927  {
1928  Double dAlpha = SSxy / Double( SSxx );
1929  alpha = SChar(Clip3<Int>(-16, 16, (Int)(dAlpha * 16)));
1930 
1931  static const SChar alphaQuant[17] = {0, 1, 1, 2, 2, 2, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8};
1932 
1933  alpha = (alpha < 0) ? -alphaQuant[Int(-alpha)] : alphaQuant[Int(alpha)];
1934  }
1935  pCU->setCrossComponentPredictionAlphaPartRange( alpha, compID, absPartIdx, rTu.GetAbsPartIdxNumParts( compID ) );
1936 
1937  return alpha;
1938 }
1939 
1940 Void
1942  TComYuv* pcPredYuv,
1943  TComYuv* pcResiYuv,
1944  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE],
1945  Distortion& ruiDist,
1946  TComTU& rTu
1947  DEBUG_STRING_FN_DECLARE(sDebug))
1948 {
1949  TComDataCU *pcCU = rTu.getCU();
1950  const UInt uiTrDepth = rTu.GetTransformDepthRel();
1951  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1952  const ChromaFormat format = rTu.GetChromaFormat();
1953  UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1954  const UInt numberValidComponents = getNumberValidComponents(format);
1955 
1956  if( uiTrMode == uiTrDepth )
1957  {
1958  if (!rTu.ProcessChannelSection(CHANNEL_TYPE_CHROMA))
1959  {
1960  return;
1961  }
1962 
1963  const UInt uiFullDepth = rTu.GetTransformDepthTotal();
1964 
1965  Bool checkTransformSkip = pcCU->getSlice()->getPPS()->getUseTransformSkip();
1967 
1969  {
1971 
1972  if (checkTransformSkip)
1973  {
1974  Int nbLumaSkip = 0;
1975  const UInt maxAbsPartIdxSub=uiAbsPartIdx + (rTu.ProcessingAllQuadrants(COMPONENT_Cb)?1:4);
1976  for(UInt absPartIdxSub = uiAbsPartIdx; absPartIdxSub < maxAbsPartIdxSub; absPartIdxSub ++)
1977  {
1978  nbLumaSkip += pcCU->getTransformSkip(absPartIdxSub, COMPONENT_Y);
1979  }
1980  checkTransformSkip &= (nbLumaSkip > 0);
1981  }
1982  }
1983 
1984 
1985  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
1986  {
1987  const ComponentID compID = ComponentID(ch);
1988  DEBUG_STRING_NEW(sDebugBestMode)
1989 
1990  //use RDO to decide whether Cr/Cb takes TS
1992 
1993  const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height;
1994 
1995  TComTURecurse TUIterator(rTu, false, (splitIntoSubTUs ? TComTU::VERTICAL_SPLIT : TComTU::DONT_SPLIT), true, compID);
1996 
1997  const UInt partIdxesPerSubTU = TUIterator.GetAbsPartIdxNumParts(compID);
1998 
1999  do
2000  {
2001  const UInt subTUAbsPartIdx = TUIterator.GetAbsPartIdxTU(compID);
2002 
2003  Double dSingleCost = MAX_DOUBLE;
2004  Int bestModeId = 0;
2005  Distortion singleDistC = 0;
2006  UInt singleCbfC = 0;
2007  Distortion singleDistCTmp = 0;
2008  Double singleCostTmp = 0;
2009  UInt singleCbfCTmp = 0;
2010  SChar bestCrossCPredictionAlpha = 0;
2011  Int bestTransformSkipMode = 0;
2012 
2013  const Bool checkCrossComponentPrediction = (pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, subTUAbsPartIdx) == DM_CHROMA_IDX)
2015  && (pcCU->getCbf(subTUAbsPartIdx, COMPONENT_Y, uiTrDepth) != 0);
2016 
2017  const Int crossCPredictionModesToTest = checkCrossComponentPrediction ? 2 : 1;
2018  const Int transformSkipModesToTest = checkTransformSkip ? 2 : 1;
2019  const Int totalModesToTest = crossCPredictionModesToTest * transformSkipModesToTest;
2020  Int currModeId = 0;
2021  Int default0Save1Load2 = 0;
2022 
2023  for(Int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++)
2024  {
2025  for(Int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
2026  {
2027  pcCU->setCrossComponentPredictionAlphaPartRange(0, compID, subTUAbsPartIdx, partIdxesPerSubTU);
2028  DEBUG_STRING_NEW(sDebugMode)
2029  pcCU->setTransformSkipPartRange( transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2030  currModeId++;
2031 
2032  const Bool isOneMode = (totalModesToTest == 1);
2033  const Bool isLastMode = (currModeId == totalModesToTest); // currModeId is indexed from 1
2034 
2035  if (isOneMode)
2036  {
2037  default0Save1Load2 = 0;
2038  }
2039  else if (!isOneMode && (transformSkipModeId == 0) && (crossCPredictionModeId == 0))
2040  {
2041  default0Save1Load2 = 1; //save prediction on first mode
2042  }
2043  else
2044  {
2045  default0Save1Load2 = 2; //load it on subsequent modes
2046  }
2047 
2048  singleDistCTmp = 0;
2049 
2050  xIntraCodingTUBlock( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, (crossCPredictionModeId != 0), singleDistCTmp, compID, TUIterator DEBUG_STRING_PASS_INTO(sDebugMode), default0Save1Load2);
2051  singleCbfCTmp = pcCU->getCbf( subTUAbsPartIdx, compID, uiTrDepth);
2052 
2053  if ( ((crossCPredictionModeId == 1) && (pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) == 0))
2054  || ((transformSkipModeId == 1) && (singleCbfCTmp == 0))) //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
2055  {
2056  singleCostTmp = MAX_DOUBLE;
2057  }
2058  else if (!isOneMode)
2059  {
2060  UInt bitsTmp = xGetIntraBitsQTChroma( TUIterator, compID, false );
2061  singleCostTmp = m_pcRdCost->calcRdCost( bitsTmp, singleDistCTmp);
2062  }
2063 
2064  if(singleCostTmp < dSingleCost)
2065  {
2066  DEBUG_STRING_SWAP(sDebugBestMode, sDebugMode)
2067  dSingleCost = singleCostTmp;
2068  singleDistC = singleDistCTmp;
2069  bestCrossCPredictionAlpha = (crossCPredictionModeId != 0) ? pcCU->getCrossComponentPredictionAlpha(subTUAbsPartIdx, compID) : 0;
2070  bestTransformSkipMode = transformSkipModeId;
2071  bestModeId = currModeId;
2072  singleCbfC = singleCbfCTmp;
2073 
2074  if (!isOneMode && !isLastMode)
2075  {
2076  xStoreIntraResultQT(compID, TUIterator);
2078  }
2079  }
2080 
2081  if (!isOneMode && !isLastMode)
2082  {
2083  m_pcRDGoOnSbacCoder->load ( m_pppcRDSbacCoder[ uiFullDepth ][ CI_QT_TRAFO_ROOT ] );
2084  }
2085  }
2086  }
2087 
2088  if(bestModeId < totalModesToTest)
2089  {
2090  xLoadIntraResultQT(compID, TUIterator);
2091  pcCU->setCbfPartRange( singleCbfC << uiTrDepth, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2092 
2094  }
2095 
2096  DEBUG_STRING_APPEND(sDebug, sDebugBestMode)
2097  pcCU ->setTransformSkipPartRange ( bestTransformSkipMode, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2098  pcCU ->setCrossComponentPredictionAlphaPartRange( bestCrossCPredictionAlpha, compID, subTUAbsPartIdx, partIdxesPerSubTU );
2099  ruiDist += singleDistC;
2100  } while (TUIterator.nextSection(rTu));
2101 
2102  if (splitIntoSubTUs)
2103  {
2104  offsetSubTUCBFs(rTu, compID);
2105  }
2106  }
2107  }
2108  else
2109  {
2110  UInt uiSplitCbf[MAX_NUM_COMPONENT] = {0,0,0};
2111 
2112  TComTURecurse tuRecurseChild(rTu, false);
2113  const UInt uiTrDepthChild = tuRecurseChild.GetTransformDepthRel();
2114  do
2115  {
2116  DEBUG_STRING_NEW(sChild)
2117 
2118  xRecurIntraChromaCodingQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, ruiDist, tuRecurseChild DEBUG_STRING_PASS_INTO(sChild) );
2119 
2120  DEBUG_STRING_APPEND(sDebug, sChild)
2121  const UInt uiAbsPartIdxSub=tuRecurseChild.GetAbsPartIdxTU();
2122 
2123  for(UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2124  {
2125  uiSplitCbf[ch] |= pcCU->getCbf( uiAbsPartIdxSub, ComponentID(ch), uiTrDepthChild );
2126  }
2127  } while ( tuRecurseChild.nextSection(rTu) );
2128 
2129 
2130  UInt uiPartsDiv = rTu.GetAbsPartIdxNumParts();
2131  for(UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2132  {
2133  if (uiSplitCbf[ch])
2134  {
2135  const UInt flag=1<<uiTrDepth;
2136  ComponentID compID=ComponentID(ch);
2137  UChar *pBase=pcCU->getCbf( compID );
2138  for( UInt uiOffs = 0; uiOffs < uiPartsDiv; uiOffs++ )
2139  {
2140  pBase[ uiAbsPartIdx + uiOffs ] |= flag;
2141  }
2142  }
2143  }
2144  }
2145 }
2146 
2147 
2148 
2149 
2150 Void
2152 {
2154  {
2155  return;
2156  }
2157  TComDataCU *pcCU=rTu.getCU();
2158  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
2159  const UInt uiTrDepth = rTu.GetTransformDepthRel();
2160  UInt uiTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
2161  if( uiTrMode == uiTrDepth )
2162  {
2163  UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
2164  UInt uiQTLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
2165 
2166  //===== copy transform coefficients =====
2167  const TComRectangle &tuRectCb=rTu.getRect(COMPONENT_Cb);
2168  UInt uiNumCoeffC = tuRectCb.width*tuRectCb.height;//( pcCU->getSlice()->getSPS()->getMaxCUWidth() * pcCU->getSlice()->getSPS()->getMaxCUHeight() ) >> ( uiFullDepth << 1 );
2169  const UInt offset = rTu.getCoefficientOffset(COMPONENT_Cb);
2170 
2171  const UInt numberValidComponents = getNumberValidComponents(rTu.GetChromaFormat());
2172  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2173  {
2174  const ComponentID component = ComponentID(ch);
2175  const TCoeff* src = m_ppcQTTempCoeff[component][uiQTLayer] + offset;//(uiNumCoeffIncC*uiAbsPartIdx);
2176  TCoeff* dest = pcCU->getCoeff(component) + offset;//(uiNumCoeffIncC*uiAbsPartIdx);
2177  ::memcpy( dest, src, sizeof(TCoeff)*uiNumCoeffC );
2178 #if ADAPTIVE_QP_SELECTION
2179  TCoeff* pcArlCoeffSrc = m_ppcQTTempArlCoeff[component][ uiQTLayer ] + offset;//( uiNumCoeffIncC * uiAbsPartIdx );
2180  TCoeff* pcArlCoeffDst = pcCU->getArlCoeff(component) + offset;//( uiNumCoeffIncC * uiAbsPartIdx );
2181  ::memcpy( pcArlCoeffDst, pcArlCoeffSrc, sizeof( TCoeff ) * uiNumCoeffC );
2182 #endif
2183  }
2184 
2185  //===== copy reconstruction =====
2186 
2187  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Cb, pcRecoYuv, uiAbsPartIdx, tuRectCb.width, tuRectCb.height );
2188  m_pcQTTempTComYuv[ uiQTLayer ].copyPartToPartComponent( COMPONENT_Cr, pcRecoYuv, uiAbsPartIdx, tuRectCb.width, tuRectCb.height );
2189  }
2190  else
2191  {
2192  TComTURecurse tuRecurseChild(rTu, false);
2193  do
2194  {
2195  xSetIntraResultChromaQT( pcRecoYuv, tuRecurseChild );
2196  } while (tuRecurseChild.nextSection(rTu));
2197  }
2198 }
2199 
2200 
2201 
2202 Void
2204  TComYuv* pcOrgYuv,
2205  TComYuv* pcPredYuv,
2206  TComYuv* pcResiYuv,
2207  TComYuv* pcRecoYuv,
2208  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
2209  DEBUG_STRING_FN_DECLARE(sDebug))
2210 {
2211  const UInt uiDepth = pcCU->getDepth(0);
2212  const UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
2213  const UInt uiNumPU = 1<<(2*uiInitTrDepth);
2214  const UInt uiQNumParts = pcCU->getTotalNumPart() >> 2;
2215  const UInt uiWidthBit = pcCU->getIntraSizeIdx(0);
2216  const ChromaFormat chFmt = pcCU->getPic()->getChromaFormat();
2217  const UInt numberValidComponents = getNumberValidComponents(chFmt);
2218  const TComSPS &sps = *(pcCU->getSlice()->getSPS());
2219  const TComPPS &pps = *(pcCU->getSlice()->getPPS());
2220  Distortion uiOverallDistY = 0;
2221  UInt CandNum;
2222  Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ];
2223  Pel resiLumaPU[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
2224 
2225  Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
2226  for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)
2227  {
2228  bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
2229  }
2230 
2232 
2233  // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantisation divisor is 1.
2234 #if FULL_NBIT
2235  const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
2236  sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)))
2238 #else
2239  const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
2240  sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (sps.getBitDepth(CHANNEL_TYPE_LUMA) - 8)) / 3.0)))
2242 #endif
2243 
2244  //===== set QP and clear Cbf =====
2245  if ( pps.getUseDQP() == true)
2246  {
2247  pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
2248  }
2249  else
2250  {
2251  pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
2252  }
2253 
2254  //===== loop over partitions =====
2255  TComTURecurse tuRecurseCU(pcCU, 0);
2256  TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);
2257 
2258  do
2259  {
2260  const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU();
2261 // for( UInt uiPU = 0, uiPartOffset=0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )
2262  //{
2263  //===== init pattern for luma prediction =====
2264  DEBUG_STRING_NEW(sTemp2)
2265 
2266  //===== determine set of modes to be tested (using prediction signal only) =====
2267  Int numModesAvailable = 35; //total number of Intra modes
2268  UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
2269  Int numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled()?g_aucIntraModeNumFast_UseMPM[ uiWidthBit ] : g_aucIntraModeNumFast_NotUseMPM[ uiWidthBit ];
2270 
2271  // this should always be true
2272  assert (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y));
2273  initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) );
2274 
2275  Bool doFastSearch = (numModesForFullRD != numModesAvailable);
2276  if (doFastSearch)
2277  {
2278  assert(numModesForFullRD < numModesAvailable);
2279 
2280  for( Int i=0; i < numModesForFullRD; i++ )
2281  {
2282  CandCostList[ i ] = MAX_DOUBLE;
2283  }
2284  CandNum = 0;
2285 
2286  const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
2287  const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU();
2288 
2289  Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx );
2290  Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx );
2291  UInt uiStride = pcPredYuv->getStride( COMPONENT_Y );
2292  DistParam distParam;
2293  const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0;
2294  m_pcRdCost->setDistParam(distParam, sps.getBitDepth(CHANNEL_TYPE_LUMA), piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard);
2295  distParam.bApplyWeight = false;
2296  for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )
2297  {
2298  UInt uiMode = modeIdx;
2299  Distortion uiSad = 0;
2300 
2302 
2303  predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) );
2304 
2305  // use hadamard transform here
2306  uiSad+=distParam.DistFunc(&distParam);
2307 
2308  UInt iModeBits = 0;
2309 
2310  // NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
2311  iModeBits+=xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, CHANNEL_TYPE_LUMA );
2312 
2313  Double cost = (Double)uiSad + (Double)iModeBits * sqrtLambdaForFirstPass;
2314 
2315 #if DEBUG_INTRA_SEARCH_COSTS
2316  std::cout << "1st pass mode " << uiMode << " SAD = " << uiSad << ", mode bits = " << iModeBits << ", cost = " << cost << "\n";
2317 #endif
2318 
2319  CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
2320  }
2321 
2323  {
2324  Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1};
2325 
2326  Int iMode = -1;
2327  pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode );
2328 
2329  const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES);
2330 
2331  for( Int j=0; j < numCand; j++)
2332  {
2333  Bool mostProbableModeIncluded = false;
2334  Int mostProbableMode = uiPreds[j];
2335 
2336  for( Int i=0; i < numModesForFullRD; i++)
2337  {
2338  mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
2339  }
2340  if (!mostProbableModeIncluded)
2341  {
2342  uiRdModeList[numModesForFullRD++] = mostProbableMode;
2343  }
2344  }
2345  }
2346  }
2347  else
2348  {
2349  for( Int i=0; i < numModesForFullRD; i++)
2350  {
2351  uiRdModeList[i] = i;
2352  }
2353  }
2354 
2355  //===== check modes (using r-d costs) =====
2356 #if HHI_RQT_INTRA_SPEEDUP_MOD
2357  UInt uiSecondBestMode = MAX_UINT;
2358  Double dSecondBestPUCost = MAX_DOUBLE;
2359 #endif
2360  DEBUG_STRING_NEW(sPU)
2361  UInt uiBestPUMode = 0;
2362  Distortion uiBestPUDistY = 0;
2363  Double dBestPUCost = MAX_DOUBLE;
2364 
2365 #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
2366  UInt max=numModesForFullRD;
2367 
2368  if (DebugOptionList::ForceLumaMode.isSet())
2369  {
2370  max=0; // we are forcing a direction, so don't bother with mode check
2371  }
2372  for ( UInt uiMode = 0; uiMode < max; uiMode++)
2373 #else
2374  for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )
2375 #endif
2376  {
2377  // set luma prediction mode
2378  UInt uiOrgMode = uiRdModeList[uiMode];
2379 
2380  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
2381 
2382  DEBUG_STRING_NEW(sMode)
2383  // set context models
2385 
2386  // determine residual for partition
2387  Distortion uiPUDistY = 0;
2388  Double dPUCost = 0.0;
2389 #if HHI_RQT_INTRA_SPEEDUP
2390  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
2391 #else
2392  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
2393 #endif
2394 
2395 #if DEBUG_INTRA_SEARCH_COSTS
2396  std::cout << "2nd pass [luma,chroma] mode [" << Int(pcCU->getIntraDir(CHANNEL_TYPE_LUMA, uiPartOffset)) << "," << Int(pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, uiPartOffset)) << "] cost = " << dPUCost << "\n";
2397 #endif
2398 
2399  // check r-d cost
2400  if( dPUCost < dBestPUCost )
2401  {
2402  DEBUG_STRING_SWAP(sPU, sMode)
2403 #if HHI_RQT_INTRA_SPEEDUP_MOD
2404  uiSecondBestMode = uiBestPUMode;
2405  dSecondBestPUCost = dBestPUCost;
2406 #endif
2407  uiBestPUMode = uiOrgMode;
2408  uiBestPUDistY = uiPUDistY;
2409  dBestPUCost = dPUCost;
2410 
2411  xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
2412 
2414  {
2415  const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
2416  const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
2417  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
2418  {
2419  if (bMaintainResidual[storedResidualIndex])
2420  {
2421  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
2422  }
2423  }
2424  }
2425 
2426  UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
2427 
2428  ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2429  for (UInt component = 0; component < numberValidComponents; component++)
2430  {
2431  const ComponentID compID = ComponentID(component);
2432  ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2433  ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2434  }
2435  }
2436 #if HHI_RQT_INTRA_SPEEDUP_MOD
2437  else if( dPUCost < dSecondBestPUCost )
2438  {
2439  uiSecondBestMode = uiOrgMode;
2440  dSecondBestPUCost = dPUCost;
2441  }
2442 #endif
2443  } // Mode loop
2444 
2445 #if HHI_RQT_INTRA_SPEEDUP
2446 #if HHI_RQT_INTRA_SPEEDUP_MOD
2447  for( UInt ui =0; ui < 2; ++ui )
2448 #endif
2449  {
2450 #if HHI_RQT_INTRA_SPEEDUP_MOD
2451  UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode;
2452  if( uiOrgMode == MAX_UINT )
2453  {
2454  break;
2455  }
2456 #else
2457  UInt uiOrgMode = uiBestPUMode;
2458 #endif
2459 
2460 #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
2461  if (DebugOptionList::ForceLumaMode.isSet())
2462  {
2463  uiOrgMode = DebugOptionList::ForceLumaMode.getInt();
2464  }
2465 #endif
2466 
2467  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
2468  DEBUG_STRING_NEW(sModeTree)
2469 
2470  // set context models
2472 
2473  // determine residual for partition
2474  Distortion uiPUDistY = 0;
2475  Double dPUCost = 0.0;
2476 
2477  xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree));
2478 
2479  // check r-d cost
2480  if( dPUCost < dBestPUCost )
2481  {
2482  DEBUG_STRING_SWAP(sPU, sModeTree)
2483  uiBestPUMode = uiOrgMode;
2484  uiBestPUDistY = uiPUDistY;
2485  dBestPUCost = dPUCost;
2486 
2487  xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
2488 
2490  {
2491  const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
2492  const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
2493  for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
2494  {
2495  if (bMaintainResidual[storedResidualIndex])
2496  {
2497  xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
2498  }
2499  }
2500  }
2501 
2502  const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
2503  ::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2504 
2505  for (UInt component = 0; component < numberValidComponents; component++)
2506  {
2507  const ComponentID compID = ComponentID(component);
2508  ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2509  ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
2510  }
2511  }
2512  } // Mode loop
2513 #endif
2514 
2515  DEBUG_STRING_APPEND(sDebug, sPU)
2516 
2517  //--- update overall distortion ---
2518  uiOverallDistY += uiBestPUDistY;
2519 
2520  //--- update transform index and cbf ---
2521  const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
2522  ::memcpy( pcCU->getTransformIdx() + uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) );
2523  for (UInt component = 0; component < numberValidComponents; component++)
2524  {
2525  const ComponentID compID = ComponentID(component);
2526  ::memcpy( pcCU->getCbf( compID ) + uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
2527  ::memcpy( pcCU->getTransformSkip( compID ) + uiPartOffset, m_puhQTTempTransformSkipFlag[compID ], uiQPartNum * sizeof( UChar ) );
2528  }
2529 
2530  //--- set reconstruction for next intra prediction blocks ---
2531  if( !tuRecurseWithPU.IsLastSection() )
2532  {
2533  const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
2534  const UInt uiCompWidth = puRect.width;
2535  const UInt uiCompHeight = puRect.height;
2536 
2537  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + uiPartOffset;
2538  Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
2539  const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y);
2540  const Pel* piSrc = pcRecoYuv->getAddr( COMPONENT_Y, uiPartOffset );
2541  const UInt uiSrcStride = pcRecoYuv->getStride( COMPONENT_Y);
2542 
2543  for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
2544  {
2545  for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
2546  {
2547  piDes[ uiX ] = piSrc[ uiX ];
2548  }
2549  }
2550  }
2551 
2552  //=== update PU data ====
2553  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth );
2554  } while (tuRecurseWithPU.nextSection(tuRecurseCU));
2555 
2556 
2557  if( uiNumPU > 1 )
2558  { // set Cbf for all blocks
2559  UInt uiCombCbfY = 0;
2560  UInt uiCombCbfU = 0;
2561  UInt uiCombCbfV = 0;
2562  UInt uiPartIdx = 0;
2563  for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
2564  {
2565  uiCombCbfY |= pcCU->getCbf( uiPartIdx, COMPONENT_Y, 1 );
2566  uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
2567  uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
2568  }
2569  for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
2570  {
2571  pcCU->getCbf( COMPONENT_Y )[ uiOffs ] |= uiCombCbfY;
2572  pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
2573  pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
2574  }
2575  }
2576 
2577  //===== reset context models =====
2579 
2580  //===== set distortion (rate and r-d costs are determined later) =====
2581  pcCU->getTotalDistortion() = uiOverallDistY;
2582 }
2583 
2584 
2585 
2586 
2587 Void
2589  TComYuv* pcOrgYuv,
2590  TComYuv* pcPredYuv,
2591  TComYuv* pcResiYuv,
2592  TComYuv* pcRecoYuv,
2593  Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
2594  DEBUG_STRING_FN_DECLARE(sDebug))
2595 {
2596  const UInt uiInitTrDepth = pcCU->getPartitionSize(0) != SIZE_2Nx2N && enable4ChromaPUsInIntraNxNCU(pcOrgYuv->getChromaFormat()) ? 1 : 0;
2597 
2598  TComTURecurse tuRecurseCU(pcCU, 0);
2599  TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);
2600  const UInt uiQNumParts = tuRecurseWithPU.GetAbsPartIdxNumParts();
2601  const UInt uiDepthCU=tuRecurseWithPU.getCUDepth();
2602  const UInt numberValidComponents = pcCU->getPic()->getNumberValidComponents();
2603 
2604  do
2605  {
2606  UInt uiBestMode = 0;
2607  Distortion uiBestDist = 0;
2608  Double dBestCost = MAX_DOUBLE;
2609 
2610  //----- init mode list -----
2611  if (tuRecurseWithPU.ProcessChannelSection(CHANNEL_TYPE_CHROMA))
2612  {
2613  UInt uiModeList[FAST_UDI_MAX_RDMODE_NUM];
2614  const UInt uiQPartNum = uiQNumParts;
2615  const UInt uiPartOffset = tuRecurseWithPU.GetAbsPartIdxTU();
2616  {
2617  UInt uiMinMode = 0;
2618  UInt uiMaxMode = NUM_CHROMA_MODE;
2619 
2620  //----- check chroma modes -----
2621  pcCU->getAllowedChromaDir( uiPartOffset, uiModeList );
2622 
2623 #if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
2625  {
2627  if (uiModeList[uiMinMode]==34)
2628  {
2629  uiMinMode=4; // if the fixed mode has been renumbered because DM_CHROMA covers it, use DM_CHROMA.
2630  }
2631  uiMaxMode=uiMinMode+1;
2632  }
2633 #endif
2634 
2635  DEBUG_STRING_NEW(sPU)
2636 
2637  for( UInt uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++ )
2638  {
2639  //----- restore context models -----
2641 
2642  DEBUG_STRING_NEW(sMode)
2643  //----- chroma coding -----
2644  Distortion uiDist = 0;
2645  pcCU->setIntraDirSubParts ( CHANNEL_TYPE_CHROMA, uiModeList[uiMode], uiPartOffset, uiDepthCU+uiInitTrDepth );
2646  xRecurIntraChromaCodingQT ( pcOrgYuv, pcPredYuv, pcResiYuv, resiLuma, uiDist, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
2647 
2648  if( pcCU->getSlice()->getPPS()->getUseTransformSkip() )
2649  {
2650  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepthCU][CI_CURR_BEST] );
2651  }
2652 
2653  UInt uiBits = xGetIntraBitsQT( tuRecurseWithPU, false, true, false );
2654  Double dCost = m_pcRdCost->calcRdCost( uiBits, uiDist );
2655 
2656  //----- compare -----
2657  if( dCost < dBestCost )
2658  {
2659  DEBUG_STRING_SWAP(sPU, sMode);
2660  dBestCost = dCost;
2661  uiBestDist = uiDist;
2662  uiBestMode = uiModeList[uiMode];
2663 
2664  xSetIntraResultChromaQT( pcRecoYuv, tuRecurseWithPU );
2665  for (UInt componentIndex = COMPONENT_Cb; componentIndex < numberValidComponents; componentIndex++)
2666  {
2667  const ComponentID compID = ComponentID(componentIndex);
2668  ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID )+uiPartOffset, uiQPartNum * sizeof( UChar ) );
2669  ::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip( compID )+uiPartOffset, uiQPartNum * sizeof( UChar ) );
2670  ::memcpy( m_phQTTempCrossComponentPredictionAlpha[compID], pcCU->getCrossComponentPredictionAlpha(compID)+uiPartOffset, uiQPartNum * sizeof( SChar ) );
2671  }
2672  }
2673  }
2674 
2675  DEBUG_STRING_APPEND(sDebug, sPU)
2676 
2677  //----- set data -----
2678  for (UInt componentIndex = COMPONENT_Cb; componentIndex < numberValidComponents; componentIndex++)
2679  {
2680  const ComponentID compID = ComponentID(componentIndex);
2681  ::memcpy( pcCU->getCbf( compID )+uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
2682  ::memcpy( pcCU->getTransformSkip( compID )+uiPartOffset, m_puhQTTempTransformSkipFlag[compID], uiQPartNum * sizeof( UChar ) );
2683  ::memcpy( pcCU->getCrossComponentPredictionAlpha(compID)+uiPartOffset, m_phQTTempCrossComponentPredictionAlpha[compID], uiQPartNum * sizeof( SChar ) );
2684  }
2685  }
2686 
2687  if( ! tuRecurseWithPU.IsLastSection() )
2688  {
2689  for (UInt ch=COMPONENT_Cb; ch<numberValidComponents; ch++)
2690  {
2691  const ComponentID compID = ComponentID(ch);
2692  const TComRectangle &tuRect = tuRecurseWithPU.getRect(compID);
2693  const UInt uiCompWidth = tuRect.width;
2694  const UInt uiCompHeight = tuRect.height;
2695  const UInt uiZOrder = pcCU->getZorderIdxInCtu() + tuRecurseWithPU.GetAbsPartIdxTU();
2696  Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( compID, pcCU->getCtuRsAddr(), uiZOrder );
2697  const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( compID);
2698  const Pel* piSrc = pcRecoYuv->getAddr( compID, uiPartOffset );
2699  const UInt uiSrcStride = pcRecoYuv->getStride( compID);
2700 
2701  for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
2702  {
2703  for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
2704  {
2705  piDes[ uiX ] = piSrc[ uiX ];
2706  }
2707  }
2708  }
2709  }
2710 
2711  pcCU->setIntraDirSubParts( CHANNEL_TYPE_CHROMA, uiBestMode, uiPartOffset, uiDepthCU+uiInitTrDepth );
2712  pcCU->getTotalDistortion () += uiBestDist;
2713  }
2714 
2715  } while (tuRecurseWithPU.nextSection(tuRecurseCU));
2716 
2717  //----- restore context models -----
2718 
2719  if( uiInitTrDepth != 0 )
2720  { // set Cbf for all blocks
2721  UInt uiCombCbfU = 0;
2722  UInt uiCombCbfV = 0;
2723  UInt uiPartIdx = 0;
2724  for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
2725  {
2726  uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
2727  uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
2728  }
2729  for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
2730  {
2731  pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
2732  pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
2733  }
2734  }
2735 
2737 }
2738 
2739 
2740 
2741 
2755 Void TEncSearch::xEncPCM (TComDataCU* pcCU, UInt uiAbsPartIdx, Pel* pOrg, Pel* pPCM, Pel* pPred, Pel* pResi, Pel* pReco, UInt uiStride, UInt uiWidth, UInt uiHeight, const ComponentID compID )
2756 {
2757  const UInt uiReconStride = pcCU->getPic()->getPicYuvRec()->getStride(compID);
2758  const UInt uiPCMBitDepth = pcCU->getSlice()->getSPS()->getPCMBitDepth(toChannelType(compID));
2759  const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
2760  Pel* pRecoPic = pcCU->getPic()->getPicYuvRec()->getAddr(compID, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu()+uiAbsPartIdx);
2761 
2762  const Int pcmShiftRight=(channelBitDepth - Int(uiPCMBitDepth));
2763 
2764  assert(pcmShiftRight >= 0);
2765 
2766  for( UInt uiY = 0; uiY < uiHeight; uiY++ )
2767  {
2768  for( UInt uiX = 0; uiX < uiWidth; uiX++ )
2769  {
2770  // Reset pred and residual
2771  pPred[uiX] = 0;
2772  pResi[uiX] = 0;
2773  // Encode
2774  pPCM[uiX] = (pOrg[uiX]>>pcmShiftRight);
2775  // Reconstruction
2776  pReco [uiX] = (pPCM[uiX]<<(pcmShiftRight));
2777  pRecoPic[uiX] = pReco[uiX];
2778  }
2779  pPred += uiStride;
2780  pResi += uiStride;
2781  pPCM += uiWidth;
2782  pOrg += uiStride;
2783  pReco += uiStride;
2784  pRecoPic += uiReconStride;
2785  }
2786 }
2787 
2788 
2790 Void TEncSearch::IPCMSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv )
2791 {
2792  UInt uiDepth = pcCU->getDepth(0);
2793  const Distortion uiDistortion = 0;
2794  UInt uiBits;
2795 
2796  Double dCost;
2797 
2798  for (UInt ch=0; ch < pcCU->getPic()->getNumberValidComponents(); ch++)
2799  {
2800  const ComponentID compID = ComponentID(ch);
2801  const UInt width = pcCU->getWidth(0) >> pcCU->getPic()->getComponentScaleX(compID);
2802  const UInt height = pcCU->getHeight(0) >> pcCU->getPic()->getComponentScaleY(compID);
2803  const UInt stride = pcPredYuv->getStride(compID);
2804 
2805  Pel * pOrig = pcOrgYuv->getAddr (compID, 0, width);
2806  Pel * pResi = pcResiYuv->getAddr(compID, 0, width);
2807  Pel * pPred = pcPredYuv->getAddr(compID, 0, width);
2808  Pel * pReco = pcRecoYuv->getAddr(compID, 0, width);
2809  Pel * pPCM = pcCU->getPCMSample (compID);
2810 
2811  xEncPCM ( pcCU, 0, pOrig, pPCM, pPred, pResi, pReco, stride, width, height, compID );
2812 
2813  }
2814 
2816  xEncIntraHeader ( pcCU, uiDepth, 0, true, false);
2818 
2819  dCost = m_pcRdCost->calcRdCost( uiBits, uiDistortion );
2820 
2822 
2823  pcCU->getTotalBits() = uiBits;
2824  pcCU->getTotalCost() = dCost;
2825  pcCU->getTotalDistortion() = uiDistortion;
2826 
2827  pcCU->copyToPic(uiDepth);
2828 }
2829 
2830 
2831 
2832 
2833 Void TEncSearch::xGetInterPredictionError( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, Distortion& ruiErr, Bool /*bHadamard*/ )
2834 {
2835  motionCompensation( pcCU, &m_tmpYuvPred, REF_PIC_LIST_X, iPartIdx );
2836 
2837  UInt uiAbsPartIdx = 0;
2838  Int iWidth = 0;
2839  Int iHeight = 0;
2840  pcCU->getPartIndexAndSize( iPartIdx, uiAbsPartIdx, iWidth, iHeight );
2841 
2842  DistParam cDistParam;
2843 
2844  cDistParam.bApplyWeight = false;
2845 
2846 
2848  pcYuvOrg->getAddr( COMPONENT_Y, uiAbsPartIdx ), pcYuvOrg->getStride(COMPONENT_Y),
2850  iWidth, iHeight, m_pcEncCfg->getUseHADME() && (pcCU->getCUTransquantBypass(iPartIdx) == 0) );
2851 
2852  ruiErr = cDistParam.DistFunc( &cDistParam );
2853 }
2854 
2856 Void TEncSearch::xMergeEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPUIdx, UInt& uiInterDir, TComMvField* pacMvField, UInt& uiMergeIndex, Distortion& ruiCost, TComMvField* cMvFieldNeighbours, UChar* uhInterDirNeighbours, Int& numValidMergeCand )
2857 {
2858  UInt uiAbsPartIdx = 0;
2859  Int iWidth = 0;
2860  Int iHeight = 0;
2861 
2862  pcCU->getPartIndexAndSize( iPUIdx, uiAbsPartIdx, iWidth, iHeight );
2863  UInt uiDepth = pcCU->getDepth( uiAbsPartIdx );
2864 
2865  PartSize partSize = pcCU->getPartitionSize( 0 );
2866  if ( pcCU->getSlice()->getPPS()->getLog2ParallelMergeLevelMinus2() && partSize != SIZE_2Nx2N && pcCU->getWidth( 0 ) <= 8 )
2867  {
2868  if ( iPUIdx == 0 )
2869  {
2870  pcCU->setPartSizeSubParts( SIZE_2Nx2N, 0, uiDepth ); // temporarily set
2871 #if MCTS_ENC_CHECK
2872  UInt numSpatialMergeCandidates = 0;
2873  pcCU->getInterMergeCandidates( 0, 0, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand, numSpatialMergeCandidates );
2875  {
2876  numValidMergeCand = numSpatialMergeCandidates;
2877  }
2878 #else
2879  pcCU->getInterMergeCandidates( 0, 0, cMvFieldNeighbours,uhInterDirNeighbours, numValidMergeCand );
2880 #endif
2881  pcCU->setPartSizeSubParts( partSize, 0, uiDepth ); // restore
2882  }
2883  }
2884  else
2885  {
2886 #if MCTS_ENC_CHECK
2887  UInt numSpatialMergeCandidates = 0;
2888  pcCU->getInterMergeCandidates( uiAbsPartIdx, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand, numSpatialMergeCandidates );
2890  {
2891  numValidMergeCand = numSpatialMergeCandidates;
2892  }
2893 #else
2894  pcCU->getInterMergeCandidates( uiAbsPartIdx, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand );
2895 #endif
2896  }
2897 
2898  xRestrictBipredMergeCand( pcCU, iPUIdx, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand );
2899 
2900  ruiCost = std::numeric_limits<Distortion>::max();
2901  for( UInt uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand )
2902  {
2903  Distortion uiCostCand = std::numeric_limits<Distortion>::max();
2904  UInt uiBitsCand = 0;
2905 
2906  PartSize ePartSize = pcCU->getPartitionSize( 0 );
2907 
2908  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( cMvFieldNeighbours[0 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx );
2909  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( cMvFieldNeighbours[1 + 2*uiMergeCand], ePartSize, uiAbsPartIdx, 0, iPUIdx );
2910 
2911  xGetInterPredictionError( pcCU, pcYuvOrg, iPUIdx, uiCostCand, m_pcEncCfg->getUseHADME() );
2912  uiBitsCand = uiMergeCand + 1;
2913  if (uiMergeCand == m_pcEncCfg->getMaxNumMergeCand() -1)
2914  {
2915  uiBitsCand--;
2916  }
2917  uiCostCand = uiCostCand + m_pcRdCost->getCost( uiBitsCand );
2918  if ( uiCostCand < ruiCost )
2919  {
2920  ruiCost = uiCostCand;
2921  pacMvField[0] = cMvFieldNeighbours[0 + 2*uiMergeCand];
2922  pacMvField[1] = cMvFieldNeighbours[1 + 2*uiMergeCand];
2923  uiInterDir = uhInterDirNeighbours[uiMergeCand];
2924  uiMergeIndex = uiMergeCand;
2925  }
2926  }
2927 }
2928 
2937 Void TEncSearch::xRestrictBipredMergeCand( TComDataCU* pcCU, UInt puIdx, TComMvField* mvFieldNeighbours, UChar* interDirNeighbours, Int numValidMergeCand )
2938 {
2939  if ( pcCU->isBipredRestriction(puIdx) )
2940  {
2941  for( UInt mergeCand = 0; mergeCand < numValidMergeCand; ++mergeCand )
2942  {
2943  if ( interDirNeighbours[mergeCand] == 3 )
2944  {
2945  interDirNeighbours[mergeCand] = 1;
2946  mvFieldNeighbours[(mergeCand << 1) + 1].setMvField(TComMv(0,0), -1);
2947  }
2948  }
2949  }
2950 }
2951 
2953 #if AMP_MRG
2954 Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv DEBUG_STRING_FN_DECLARE(sDebug), Bool bUseRes, Bool bUseMRG )
2955 #else
2956 Void TEncSearch::predInterSearch( TComDataCU* pcCU, TComYuv* pcOrgYuv, TComYuv* pcPredYuv, TComYuv* pcResiYuv, TComYuv* pcRecoYuv, Bool bUseRes )
2957 #endif
2958 {
2959  for(UInt i=0; i<NUM_REF_PIC_LIST_01; i++)
2960  {
2961  m_acYuvPred[i].clear();
2962  }
2964  pcPredYuv->clear();
2965 
2966  if ( !bUseRes )
2967  {
2968  pcResiYuv->clear();
2969  }
2970 
2971  pcRecoYuv->clear();
2972 
2973  TComMv cMvSrchRngLT;
2974  TComMv cMvSrchRngRB;
2975 
2976  TComMv cMvZero;
2977  TComMv TempMv; //kolya
2978 
2979  TComMv cMv[2];
2980  TComMv cMvBi[2];
2981  TComMv cMvTemp[2][33];
2982 
2983  Int iNumPart = pcCU->getNumPartitions();
2984  Int iNumPredDir = pcCU->getSlice()->isInterP() ? 1 : 2;
2985 
2986  TComMv cMvPred[2][33];
2987 
2988  TComMv cMvPredBi[2][33];
2989  Int aaiMvpIdxBi[2][33];
2990 
2991  Int aaiMvpIdx[2][33];
2992  Int aaiMvpNum[2][33];
2993 
2994  AMVPInfo aacAMVPInfo[2][33];
2995 
2996  Int iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
2997  Int iRefIdxBi[2];
2998 
2999  UInt uiPartAddr;
3000  Int iRoiWidth, iRoiHeight;
3001 
3002  UInt uiMbBits[3] = {1, 1, 0};
3003 
3004  UInt uiLastMode = 0;
3005  Int iRefStart, iRefEnd;
3006 
3007  PartSize ePartSize = pcCU->getPartitionSize( 0 );
3008 
3009  Int bestBiPRefIdxL1 = 0;
3010  Int bestBiPMvpL1 = 0;
3011  Distortion biPDistTemp = std::numeric_limits<Distortion>::max();
3012 
3013  TComMvField cMvFieldNeighbours[MRG_MAX_NUM_CANDS << 1]; // double length for mv of both lists
3014  UChar uhInterDirNeighbours[MRG_MAX_NUM_CANDS];
3015  Int numValidMergeCand = 0 ;
3016 
3017  for ( Int iPartIdx = 0; iPartIdx < iNumPart; iPartIdx++ )
3018  {
3019  Distortion uiCost[2] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() };
3020  Distortion uiCostBi = std::numeric_limits<Distortion>::max();
3021  Distortion uiCostTemp;
3022 
3023  UInt uiBits[3];
3024  UInt uiBitsTemp;
3025  Distortion bestBiPDist = std::numeric_limits<Distortion>::max();
3026 
3027  Distortion uiCostTempL0[MAX_NUM_REF];
3028  for (Int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++)
3029  {
3030  uiCostTempL0[iNumRef] = std::numeric_limits<Distortion>::max();
3031  }
3032  UInt uiBitsTempL0[MAX_NUM_REF];
3033 
3034  TComMv mvValidList1;
3035  Int refIdxValidList1 = 0;
3036  UInt bitsValidList1 = MAX_UINT;
3037  Distortion costValidList1 = std::numeric_limits<Distortion>::max();
3038 
3039  xGetBlkBits( ePartSize, pcCU->getSlice()->isInterP(), iPartIdx, uiLastMode, uiMbBits);
3040 
3041  pcCU->getPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
3042 
3043 #if AMP_MRG
3044  Bool bTestNormalMC = true;
3045 
3046  if ( bUseMRG && pcCU->getWidth( 0 ) > 8 && iNumPart == 2 )
3047  {
3048  bTestNormalMC = false;
3049  }
3050 
3051  if (bTestNormalMC)
3052  {
3053 #endif
3054 
3055  // Uni-directional prediction
3056  for ( Int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
3057  {
3058  RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
3059 
3060  for ( Int iRefIdxTemp = 0; iRefIdxTemp < pcCU->getSlice()->getNumRefIdx(eRefPicList); iRefIdxTemp++ )
3061  {
3062  uiBitsTemp = uiMbBits[iRefList];
3063  if ( pcCU->getSlice()->getNumRefIdx(eRefPicList) > 1 )
3064  {
3065  uiBitsTemp += iRefIdxTemp+1;
3066  if ( iRefIdxTemp == pcCU->getSlice()->getNumRefIdx(eRefPicList)-1 )
3067  {
3068  uiBitsTemp--;
3069  }
3070  }
3071  xEstimateMvPredAMVP( pcCU, pcOrgYuv, iPartIdx, eRefPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], false, &biPDistTemp);
3072  aaiMvpIdx[iRefList][iRefIdxTemp] = pcCU->getMVPIdx(eRefPicList, uiPartAddr);
3073  aaiMvpNum[iRefList][iRefIdxTemp] = pcCU->getMVPNum(eRefPicList, uiPartAddr);
3074 
3075  if(pcCU->getSlice()->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist)
3076  {
3077  bestBiPDist = biPDistTemp;
3078  bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
3079  bestBiPRefIdxL1 = iRefIdxTemp;
3080  }
3081 
3082  uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
3083 
3084  if ( m_pcEncCfg->getFastMEForGenBLowDelayEnabled() && iRefList == 1 ) // list 1
3085  {
3086  if ( pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp ) >= 0 )
3087  {
3088  cMvTemp[1][iRefIdxTemp] = cMvTemp[0][pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )];
3089  uiCostTemp = uiCostTempL0[pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )];
3090  /*first subtract the bit-rate part of the cost of the other list*/
3091  uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp )] );
3092  /*correct the bit-rate part of the current ref*/
3093  m_pcRdCost->setPredictor ( cMvPred[iRefList][iRefIdxTemp] );
3094  uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].getHor(), cMvTemp[1][iRefIdxTemp].getVer() );
3095  /*calculate the correct cost*/
3096  uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
3097  }
3098  else
3099  {
3100  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
3101  }
3102  }
3103  else
3104  {
3105  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
3106  }
3107  xCopyAMVPInfo(pcCU->getCUMvField(eRefPicList)->getAMVPInfo(), &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
3108  xCheckBestMVP(pcCU, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
3109 
3110  if ( iRefList == 0 )
3111  {
3112  uiCostTempL0[iRefIdxTemp] = uiCostTemp;
3113  uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
3114  }
3115  if ( uiCostTemp < uiCost[iRefList] )
3116  {
3117  uiCost[iRefList] = uiCostTemp;
3118  uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
3119 
3120  // set motion
3121  cMv[iRefList] = cMvTemp[iRefList][iRefIdxTemp];
3122  iRefIdx[iRefList] = iRefIdxTemp;
3123  }
3124 
3125  if ( iRefList == 1 && uiCostTemp < costValidList1 && pcCU->getSlice()->getList1IdxToList0Idx( iRefIdxTemp ) < 0 )
3126  {
3127  costValidList1 = uiCostTemp;
3128  bitsValidList1 = uiBitsTemp;
3129 
3130  // set motion
3131  mvValidList1 = cMvTemp[iRefList][iRefIdxTemp];
3132  refIdxValidList1 = iRefIdxTemp;
3133  }
3134  }
3135  }
3136 
3137  // Bi-predictive Motion estimation
3138  if ( (pcCU->getSlice()->isInterB()) && (pcCU->isBipredRestriction(iPartIdx) == false) )
3139  {
3140 
3141  cMvBi[0] = cMv[0]; cMvBi[1] = cMv[1];
3142  iRefIdxBi[0] = iRefIdx[0]; iRefIdxBi[1] = iRefIdx[1];
3143 
3144  ::memcpy(cMvPredBi, cMvPred, sizeof(cMvPred));
3145  ::memcpy(aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx));
3146 
3147  UInt uiMotBits[2];
3148 
3149  if(pcCU->getSlice()->getMvdL1ZeroFlag())
3150  {
3151  xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());
3152  pcCU->setMVPIdxSubParts( bestBiPMvpL1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3153  aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
3154  cMvPredBi[1][bestBiPRefIdxL1] = pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_acMvCand[bestBiPMvpL1];
3155 
3156  cMvBi[1] = cMvPredBi[1][bestBiPRefIdxL1];
3157  iRefIdxBi[1] = bestBiPRefIdxL1;
3158  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMv( cMvBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3159  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllRefIdx( iRefIdxBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3160  TComYuv* pcYuvPred = &m_acYuvPred[REF_PIC_LIST_1];
3161  motionCompensation( pcCU, pcYuvPred, REF_PIC_LIST_1, iPartIdx );
3162 
3163  uiMotBits[0] = uiBits[0] - uiMbBits[0];
3164  uiMotBits[1] = uiMbBits[1];
3165 
3166  if ( pcCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1 )
3167  {
3168  uiMotBits[1] += bestBiPRefIdxL1+1;
3169  if ( bestBiPRefIdxL1 == pcCU->getSlice()->getNumRefIdx(REF_PIC_LIST_1)-1 )
3170  {
3171  uiMotBits[1]--;
3172  }
3173  }
3174 
3175  uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
3176 
3177  uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
3178 
3179  cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1];
3180  }
3181  else
3182  {
3183  uiMotBits[0] = uiBits[0] - uiMbBits[0];
3184  uiMotBits[1] = uiBits[1] - uiMbBits[1];
3185  uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
3186  }
3187 
3188  // 4-times iteration (default)
3189  Int iNumIter = 4;
3190 
3191  // fast encoder setting: only one iteration
3193  {
3194  iNumIter = 1;
3195  }
3196 
3197  for ( Int iIter = 0; iIter < iNumIter; iIter++ )
3198  {
3199  Int iRefList = iIter % 2;
3200 
3202  {
3203  if( uiCost[0] <= uiCost[1] )
3204  {
3205  iRefList = 1;
3206  }
3207  else
3208  {
3209  iRefList = 0;
3210  }
3211  }
3212  else if ( iIter == 0 )
3213  {
3214  iRefList = 0;
3215  }
3216  if ( iIter == 0 && !pcCU->getSlice()->getMvdL1ZeroFlag())
3217  {
3218  pcCU->getCUMvField(RefPicList(1-iRefList))->setAllMv( cMv[1-iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3219  pcCU->getCUMvField(RefPicList(1-iRefList))->setAllRefIdx( iRefIdx[1-iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3220  TComYuv* pcYuvPred = &m_acYuvPred[1-iRefList];
3221  motionCompensation ( pcCU, pcYuvPred, RefPicList(1-iRefList), iPartIdx );
3222  }
3223 
3224  RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
3225 
3226  if(pcCU->getSlice()->getMvdL1ZeroFlag())
3227  {
3228  iRefList = 0;
3229  eRefPicList = REF_PIC_LIST_0;
3230  }
3231 
3232  Bool bChanged = false;
3233 
3234  iRefStart = 0;
3235  iRefEnd = pcCU->getSlice()->getNumRefIdx(eRefPicList)-1;
3236 
3237  for ( Int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ )
3238  {
3239  uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
3240  if ( pcCU->getSlice()->getNumRefIdx(eRefPicList) > 1 )
3241  {
3242  uiBitsTemp += iRefIdxTemp+1;
3243  if ( iRefIdxTemp == pcCU->getSlice()->getNumRefIdx(eRefPicList)-1 )
3244  {
3245  uiBitsTemp--;
3246  }
3247  }
3248  uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
3249  // call ME
3250  xMotionEstimation ( pcCU, pcOrgYuv, iPartIdx, eRefPicList, &cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, true );
3251 
3252  xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], pcCU->getCUMvField(eRefPicList)->getAMVPInfo());
3253  xCheckBestMVP(pcCU, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
3254 
3255  if ( uiCostTemp < uiCostBi )
3256  {
3257  bChanged = true;
3258 
3259  cMvBi[iRefList] = cMvTemp[iRefList][iRefIdxTemp];
3260  iRefIdxBi[iRefList] = iRefIdxTemp;
3261 
3262  uiCostBi = uiCostTemp;
3263  uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
3264  uiBits[2] = uiBitsTemp;
3265 
3266  if(iNumIter!=1)
3267  {
3268  // Set motion
3269  pcCU->getCUMvField( eRefPicList )->setAllMv( cMvBi[iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3270  pcCU->getCUMvField( eRefPicList )->setAllRefIdx( iRefIdxBi[iRefList], ePartSize, uiPartAddr, 0, iPartIdx );
3271 
3272  TComYuv* pcYuvPred = &m_acYuvPred[iRefList];
3273  motionCompensation( pcCU, pcYuvPred, eRefPicList, iPartIdx );
3274  }
3275  }
3276  } // for loop-iRefIdxTemp
3277 
3278  if ( !bChanged )
3279  {
3280  if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] )
3281  {
3282  xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], pcCU->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo());
3283  xCheckBestMVP(pcCU, REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi);
3284  if(!pcCU->getSlice()->getMvdL1ZeroFlag())
3285  {
3286  xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], pcCU->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());
3287  xCheckBestMVP(pcCU, REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi);
3288  }
3289  }
3290  break;
3291  }
3292  } // for loop-iter
3293  } // if (B_SLICE)
3294 
3295 #if AMP_MRG
3296  } //end if bTestNormalMC
3297 #endif
3298  // Clear Motion Field
3299  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvField( TComMvField(), ePartSize, uiPartAddr, 0, iPartIdx );
3300  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField( TComMvField(), ePartSize, uiPartAddr, 0, iPartIdx );
3301  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3302  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3303 
3304  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3305  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3306  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3307  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3308 
3309  UInt uiMEBits = 0;
3310  // Set Motion Field_
3311  cMv[1] = mvValidList1;
3312  iRefIdx[1] = refIdxValidList1;
3313  uiBits[1] = bitsValidList1;
3314  uiCost[1] = costValidList1;
3315 
3316 #if AMP_MRG
3317  if (bTestNormalMC)
3318  {
3319 #endif
3320  if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
3321  {
3322  uiLastMode = 2;
3323  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMv( cMvBi[0], ePartSize, uiPartAddr, 0, iPartIdx );
3324  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllRefIdx( iRefIdxBi[0], ePartSize, uiPartAddr, 0, iPartIdx );
3325  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMv( cMvBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3326  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx( iRefIdxBi[1], ePartSize, uiPartAddr, 0, iPartIdx );
3327 
3328  TempMv = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
3329  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3330 
3331  TempMv = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
3332  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3333 
3334  pcCU->setInterDirSubParts( 3, uiPartAddr, iPartIdx, pcCU->getDepth(0) );
3335 
3336  pcCU->setMVPIdxSubParts( aaiMvpIdxBi[0][iRefIdxBi[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3337  pcCU->setMVPNumSubParts( aaiMvpNum[0][iRefIdxBi[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3338  pcCU->setMVPIdxSubParts( aaiMvpIdxBi[1][iRefIdxBi[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3339  pcCU->setMVPNumSubParts( aaiMvpNum[1][iRefIdxBi[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3340 
3341  uiMEBits = uiBits[2];
3342  }
3343  else if ( uiCost[0] <= uiCost[1] )
3344  {
3345  uiLastMode = 0;
3346  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMv( cMv[0], ePartSize, uiPartAddr, 0, iPartIdx );
3347  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllRefIdx( iRefIdx[0], ePartSize, uiPartAddr, 0, iPartIdx );
3348 
3349  TempMv = cMv[0] - cMvPred[0][iRefIdx[0]];
3350  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3351 
3352  pcCU->setInterDirSubParts( 1, uiPartAddr, iPartIdx, pcCU->getDepth(0) );
3353 
3354  pcCU->setMVPIdxSubParts( aaiMvpIdx[0][iRefIdx[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3355  pcCU->setMVPNumSubParts( aaiMvpNum[0][iRefIdx[0]], REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3356 
3357  uiMEBits = uiBits[0];
3358  }
3359  else
3360  {
3361  uiLastMode = 1;
3362  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMv( cMv[1], ePartSize, uiPartAddr, 0, iPartIdx );
3363  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx( iRefIdx[1], ePartSize, uiPartAddr, 0, iPartIdx );
3364 
3365  TempMv = cMv[1] - cMvPred[1][iRefIdx[1]];
3366  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( TempMv, ePartSize, uiPartAddr, 0, iPartIdx );
3367 
3368  pcCU->setInterDirSubParts( 2, uiPartAddr, iPartIdx, pcCU->getDepth(0) );
3369 
3370  pcCU->setMVPIdxSubParts( aaiMvpIdx[1][iRefIdx[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3371  pcCU->setMVPNumSubParts( aaiMvpNum[1][iRefIdx[1]], REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3372 
3373  uiMEBits = uiBits[1];
3374  }
3375 #if AMP_MRG
3376  } // end if bTestNormalMC
3377 #endif
3378 
3379  if ( pcCU->getPartitionSize( uiPartAddr ) != SIZE_2Nx2N )
3380  {
3381  UInt uiMRGInterDir = 0;
3382  TComMvField cMRGMvField[2];
3383  UInt uiMRGIndex = 0;
3384 
3385  UInt uiMEInterDir = 0;
3386  TComMvField cMEMvField[2];
3387 
3388  m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) );
3389 
3390 #if AMP_MRG
3391  // calculate ME cost
3392  Distortion uiMEError = std::numeric_limits<Distortion>::max();
3393  Distortion uiMECost = std::numeric_limits<Distortion>::max();
3394 
3395  if (bTestNormalMC)
3396  {
3397  xGetInterPredictionError( pcCU, pcOrgYuv, iPartIdx, uiMEError, m_pcEncCfg->getUseHADME() );
3398  uiMECost = uiMEError + m_pcRdCost->getCost( uiMEBits );
3399  }
3400 #else
3401  // calculate ME cost
3402  Distortion uiMEError = std::numeric_limits<Distortion>::max();
3403  xGetInterPredictionError( pcCU, pcOrgYuv, iPartIdx, uiMEError, m_pcEncCfg->getUseHADME() );
3404  Distortion uiMECost = uiMEError + m_pcRdCost->getCost( uiMEBits );
3405 #endif
3406  // save ME result.
3407  uiMEInterDir = pcCU->getInterDir( uiPartAddr );
3408  TComDataCU::getMvField( pcCU, uiPartAddr, REF_PIC_LIST_0, cMEMvField[0] );
3409  TComDataCU::getMvField( pcCU, uiPartAddr, REF_PIC_LIST_1, cMEMvField[1] );
3410 
3411  // find Merge result
3412  Distortion uiMRGCost = std::numeric_limits<Distortion>::max();
3413 
3414  xMergeEstimation( pcCU, pcOrgYuv, iPartIdx, uiMRGInterDir, cMRGMvField, uiMRGIndex, uiMRGCost, cMvFieldNeighbours, uhInterDirNeighbours, numValidMergeCand);
3415 
3416  if ( uiMRGCost < uiMECost )
3417  {
3418  // set Merge result
3419  pcCU->setMergeFlagSubParts ( true, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3420  pcCU->setMergeIndexSubParts( uiMRGIndex, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3421  pcCU->setInterDirSubParts ( uiMRGInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3422  pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMRGMvField[0], ePartSize, uiPartAddr, 0, iPartIdx );
3423  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMRGMvField[1], ePartSize, uiPartAddr, 0, iPartIdx );
3424 
3425  pcCU->getCUMvField(REF_PIC_LIST_0)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3426  pcCU->getCUMvField(REF_PIC_LIST_1)->setAllMvd ( cMvZero, ePartSize, uiPartAddr, 0, iPartIdx );
3427 
3428  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3429  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_0, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3430  pcCU->setMVPIdxSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3431  pcCU->setMVPNumSubParts( -1, REF_PIC_LIST_1, uiPartAddr, iPartIdx, pcCU->getDepth(uiPartAddr));
3432  }
3433  else
3434  {
3435  // set ME result
3436  pcCU->setMergeFlagSubParts( false, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3437  pcCU->setInterDirSubParts ( uiMEInterDir, uiPartAddr, iPartIdx, pcCU->getDepth( uiPartAddr ) );
3438  pcCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMEMvField[0], ePartSize, uiPartAddr, 0, iPartIdx );
3439  pcCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMEMvField[1], ePartSize, uiPartAddr, 0, iPartIdx );
3440  }
3441  }
3442 
3443 #if MCTS_ENC_CHECK
3444  if (m_pcEncCfg->getTMCTSSEITileConstraint() && (!checkTMctsMvp(pcCU, iPartIdx)))
3445  {
3446  pcCU->setTMctsMvpIsValid(false);
3447  return;
3448  }
3449 #endif
3450 
3451  // MC
3452  motionCompensation ( pcCU, pcPredYuv, REF_PIC_LIST_X, iPartIdx );
3453 
3454  } // end of for ( Int iPartIdx = 0; iPartIdx < iNumPart; iPartIdx++ )
3455 
3457 
3458  return;
3459 }
3460 
3461 
3462 // AMVP
3463 Void TEncSearch::xEstimateMvPredAMVP( TComDataCU* pcCU, TComYuv* pcOrgYuv, UInt uiPartIdx, RefPicList eRefPicList, Int iRefIdx, TComMv& rcMvPred, Bool bFilled, Distortion* puiDistBiP )
3464 {
3465  AMVPInfo* pcAMVPInfo = pcCU->getCUMvField(eRefPicList)->getAMVPInfo();
3466 
3467  TComMv cBestMv;
3468  Int iBestIdx = 0;
3469  TComMv cZeroMv;
3470  TComMv cMvPred;
3471  Distortion uiBestCost = std::numeric_limits<Distortion>::max();
3472  UInt uiPartAddr = 0;
3473  Int iRoiWidth, iRoiHeight;
3474  Int i;
3475  Int minMVPCand;
3476  Int maxMVPCand;
3477 
3478  pcCU->getPartIndexAndSize( uiPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
3479  // Fill the MV Candidates
3480  if (!bFilled)
3481  {
3482  pcCU->fillMvpCand( uiPartIdx, uiPartAddr, eRefPicList, iRefIdx, pcAMVPInfo );
3483  }
3484  // initialize Mvp index & Mvp
3485 #if MCTS_ENC_CHECK
3486  if (m_pcEncCfg->getTMCTSSEITileConstraint() && pcCU->isLastColumnCTUInTile() && (pcAMVPInfo->numSpatialMVPCandidates < pcAMVPInfo->iN))
3487  {
3488  iBestIdx = (pcAMVPInfo->numSpatialMVPCandidates == 0) ? 1 : 0;
3489  cBestMv = pcAMVPInfo->m_acMvCand[(pcAMVPInfo->numSpatialMVPCandidates == 0) ? 1 : 0];
3490  minMVPCand = (pcAMVPInfo->numSpatialMVPCandidates == 0) ? 1 : 0;
3491  maxMVPCand = (pcAMVPInfo->numSpatialMVPCandidates == 0) ? pcAMVPInfo->iN : 1;
3492  }
3493  else
3494  {
3495  iBestIdx = 0;
3496  cBestMv = pcAMVPInfo->m_acMvCand[0];
3497  minMVPCand = 0;
3498  maxMVPCand = pcAMVPInfo->iN;
3499  }
3500 #else
3501  iBestIdx = 0;
3502  cBestMv = pcAMVPInfo->m_acMvCand[0];
3503  minMVPCand = 0;
3504  maxMVPCand = pcAMVPInfo->iN;
3505 #endif
3506  if (pcAMVPInfo->iN <= 1)
3507  {
3508  rcMvPred = cBestMv;
3509 
3510  pcCU->setMVPIdxSubParts( iBestIdx, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3511  pcCU->setMVPNumSubParts( pcAMVPInfo->iN, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3512 
3513  if(pcCU->getSlice()->getMvdL1ZeroFlag() && eRefPicList==REF_PIC_LIST_1)
3514  {
3515  (*puiDistBiP) = xGetTemplateCost( pcCU, uiPartAddr, pcOrgYuv, &m_cYuvPredTemp, rcMvPred, 0, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, iRoiWidth, iRoiHeight);
3516  }
3517  return;
3518  }
3519 
3520  if (bFilled)
3521  {
3522  assert(pcCU->getMVPIdx(eRefPicList,uiPartAddr) >= 0);
3523  rcMvPred = pcAMVPInfo->m_acMvCand[pcCU->getMVPIdx(eRefPicList,uiPartAddr)];
3524  return;
3525  }
3526 
3528  //-- Check Minimum Cost.
3529  for ( i = minMVPCand ; i < maxMVPCand; i++)
3530  {
3531  Distortion uiTmpCost;
3532  uiTmpCost = xGetTemplateCost( pcCU, uiPartAddr, pcOrgYuv, &m_cYuvPredTemp, pcAMVPInfo->m_acMvCand[i], i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, iRoiWidth, iRoiHeight);
3533  if ( uiBestCost > uiTmpCost )
3534  {
3535  uiBestCost = uiTmpCost;
3536  cBestMv = pcAMVPInfo->m_acMvCand[i];
3537  iBestIdx = i;
3538  (*puiDistBiP) = uiTmpCost;
3539  }
3540  }
3541 
3543 
3544  // Setting Best MVP
3545  rcMvPred = cBestMv;
3546  pcCU->setMVPIdxSubParts( iBestIdx, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3547  pcCU->setMVPNumSubParts( pcAMVPInfo->iN, eRefPicList, uiPartAddr, uiPartIdx, pcCU->getDepth(uiPartAddr));
3548  return;
3549 }
3550 
3552 {
3553  assert(iIdx >= 0 && iNum >= 0 && iIdx < iNum);
3554 
3555  if (iNum == 1)
3556  {
3557  return 0;
3558  }
3559 
3560  UInt uiLength = 1;
3561  Int iTemp = iIdx;
3562  if ( iTemp == 0 )
3563  {
3564  return uiLength;
3565  }
3566 
3567  Bool bCodeLast = ( iNum-1 > iTemp );
3568 
3569  uiLength += (iTemp-1);
3570 
3571  if( bCodeLast )
3572  {
3573  uiLength++;
3574  }
3575 
3576  return uiLength;
3577 }
3578 
3579 Void TEncSearch::xGetBlkBits( PartSize eCUMode, Bool bPSlice, Int iPartIdx, UInt uiLastMode, UInt uiBlkBit[3])
3580 {
3581  if ( eCUMode == SIZE_2Nx2N )
3582  {
3583  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
3584  uiBlkBit[1] = 3;
3585  uiBlkBit[2] = 5;
3586  }
3587  else if ( (eCUMode == SIZE_2NxN || eCUMode == SIZE_2NxnU) || eCUMode == SIZE_2NxnD )
3588  {
3589  UInt aauiMbBits[2][3][3] = { { {0,0,3}, {0,0,0}, {0,0,0} } , { {5,7,7}, {7,5,7}, {9-3,9-3,9-3} } };
3590  if ( bPSlice )
3591  {
3592  uiBlkBit[0] = 3;
3593  uiBlkBit[1] = 0;
3594  uiBlkBit[2] = 0;
3595  }
3596  else
3597  {
3598  ::memcpy( uiBlkBit, aauiMbBits[iPartIdx][uiLastMode], 3*sizeof(UInt) );
3599  }
3600  }
3601  else if ( (eCUMode == SIZE_Nx2N || eCUMode == SIZE_nLx2N) || eCUMode == SIZE_nRx2N )
3602  {
3603  UInt aauiMbBits[2][3][3] = { { {0,2,3}, {0,0,0}, {0,0,0} } , { {5,7,7}, {7-2,7-2,9-2}, {9-3,9-3,9-3} } };
3604  if ( bPSlice )
3605  {
3606  uiBlkBit[0] = 3;
3607  uiBlkBit[1] = 0;
3608  uiBlkBit[2] = 0;
3609  }
3610  else
3611  {
3612  ::memcpy( uiBlkBit, aauiMbBits[iPartIdx][uiLastMode], 3*sizeof(UInt) );
3613  }
3614  }
3615  else if ( eCUMode == SIZE_NxN )
3616  {
3617  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
3618  uiBlkBit[1] = 3;
3619  uiBlkBit[2] = 5;
3620  }
3621  else
3622  {
3623  printf("Wrong!\n");
3624  assert( 0 );
3625  }
3626 }
3627 
3629 {
3630  pDst->iN = pSrc->iN;
3631  for (Int i = 0; i < pSrc->iN; i++)
3632  {
3633  pDst->m_acMvCand[i] = pSrc->m_acMvCand[i];
3634  }
3635 }
3636 
3637 Void TEncSearch::xCheckBestMVP ( TComDataCU* pcCU, RefPicList eRefPicList, TComMv cMv, TComMv& rcMvPred, Int& riMVPIdx, UInt& ruiBits, Distortion& ruiCost )
3638 {
3639  AMVPInfo* pcAMVPInfo = pcCU->getCUMvField(eRefPicList)->getAMVPInfo();
3640 
3641  assert(pcAMVPInfo->m_acMvCand[riMVPIdx] == rcMvPred);
3642 
3643  if (pcAMVPInfo->iN < 2)
3644  {
3645  return;
3646  }
3647 
3648  m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(0) );
3649  m_pcRdCost->setCostScale ( 0 );
3650 
3651  Int iBestMVPIdx = riMVPIdx;
3652 
3653  m_pcRdCost->setPredictor( rcMvPred );
3654  Int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer());
3655  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
3656  Int iBestMvBits = iOrgMvBits;
3657 
3658 #if MCTS_ENC_CHECK
3659  Int minMVPCand = 0;
3660  Int maxMVPCand = pcAMVPInfo->iN;
3661 
3663  {
3664  minMVPCand = (pcAMVPInfo->numSpatialMVPCandidates == 0) ? 1 : 0;
3665  maxMVPCand = (pcAMVPInfo->numSpatialMVPCandidates == 0) ? pcAMVPInfo->iN : 1;
3666  }
3667  for (Int iMVPIdx = minMVPCand; iMVPIdx < maxMVPCand; iMVPIdx++)
3668 #else
3669  for (Int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->iN; iMVPIdx++)
3670 #endif
3671  {
3672  if (iMVPIdx == riMVPIdx)
3673  {
3674  continue;
3675  }
3676 
3677  m_pcRdCost->setPredictor( pcAMVPInfo->m_acMvCand[iMVPIdx] );
3678 
3679  Int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer());
3680  iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
3681 
3682  if (iMvBits < iBestMvBits)
3683  {
3684  iBestMvBits = iMvBits;
3685  iBestMVPIdx = iMVPIdx;
3686  }
3687  }
3688 
3689  if (iBestMVPIdx != riMVPIdx) //if changed
3690  {
3691  rcMvPred = pcAMVPInfo->m_acMvCand[iBestMVPIdx];
3692 
3693  riMVPIdx = iBestMVPIdx;
3694  UInt uiOrgBits = ruiBits;
3695  ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
3696  ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits )) + m_pcRdCost->getCost( ruiBits );
3697  }
3698 }
3699 
3700 
3702  UInt uiPartAddr,
3703  TComYuv* pcOrgYuv,
3704  TComYuv* pcTemplateCand,
3705  TComMv cMvCand,
3706  Int iMVPIdx,
3707  Int iMVPNum,
3708  RefPicList eRefPicList,
3709  Int iRefIdx,
3710  Int iSizeX,
3711  Int iSizeY
3712  )
3713 {
3714  Distortion uiCost = std::numeric_limits<Distortion>::max();
3715 
3716  TComPicYuv* pcPicYuvRef = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdx )->getPicYuvRec();
3717 
3718  pcCU->clipMv( cMvCand );
3719 
3720  // prediction pattern
3721  if ( pcCU->getSlice()->testWeightPred() && pcCU->getSlice()->getSliceType()==P_SLICE )
3722  {
3723  xPredInterBlk( COMPONENT_Y, pcCU, pcPicYuvRef, uiPartAddr, &cMvCand, iSizeX, iSizeY, pcTemplateCand, true, pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) );
3724  }
3725  else
3726  {
3727  xPredInterBlk( COMPONENT_Y, pcCU, pcPicYuvRef, uiPartAddr, &cMvCand, iSizeX, iSizeY, pcTemplateCand, false, pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) );
3728  }
3729 
3730  if ( pcCU->getSlice()->testWeightPred() && pcCU->getSlice()->getSliceType()==P_SLICE )
3731  {
3732  xWeightedPredictionUni( pcCU, pcTemplateCand, uiPartAddr, iSizeX, iSizeY, eRefPicList, pcTemplateCand, iRefIdx );
3733  }
3734 
3735  // calc distortion
3736 
3737  uiCost = m_pcRdCost->getDistPart( pcCU->getSlice()->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), pcTemplateCand->getAddr(COMPONENT_Y, uiPartAddr), pcTemplateCand->getStride(COMPONENT_Y), pcOrgYuv->getAddr(COMPONENT_Y, uiPartAddr), pcOrgYuv->getStride(COMPONENT_Y), iSizeX, iSizeY, COMPONENT_Y, DF_SAD );
3738  uiCost = (UInt) m_pcRdCost->calcRdCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum], uiCost, DF_SAD );
3739  return uiCost;
3740 }
3741 
3742 
3743 Void TEncSearch::xMotionEstimation( TComDataCU* pcCU, TComYuv* pcYuvOrg, Int iPartIdx, RefPicList eRefPicList, TComMv* pcMvPred, Int iRefIdxPred, TComMv& rcMv, UInt& ruiBits, Distortion& ruiCost, Bool bBi )
3744 {
3745  UInt uiPartAddr;
3746  Int iRoiWidth;
3747  Int iRoiHeight;
3748 
3749  TComMv cMvHalf, cMvQter;
3750  TComMv cMvSrchRngLT;
3751  TComMv cMvSrchRngRB;
3752 
3753  TComYuv* pcYuv = pcYuvOrg;
3754 
3755  assert(eRefPicList < MAX_NUM_REF_LIST_ADAPT_SR && iRefIdxPred<Int(MAX_IDX_ADAPT_SR));
3756  m_iSearchRange = m_aaiAdaptSR[eRefPicList][iRefIdxPred];
3757 
3758  Int iSrchRng = ( bBi ? m_bipredSearchRange : m_iSearchRange );
3759  TComPattern cPattern;
3760 
3761  Double fWeight = 1.0;
3762 
3763  pcCU->getPartIndexAndSize( iPartIdx, uiPartAddr, iRoiWidth, iRoiHeight );
3764 
3765  if ( bBi ) // Bipredictive ME
3766  {
3767  TComYuv* pcYuvOther = &m_acYuvPred[1-(Int)eRefPicList];
3768  pcYuv = &m_cYuvPredTemp;
3769 
3770  pcYuvOrg->copyPartToPartYuv( pcYuv, uiPartAddr, iRoiWidth, iRoiHeight );
3771 
3772  pcYuv->removeHighFreq( pcYuvOther, uiPartAddr, iRoiWidth, iRoiHeight, pcCU->getSlice()->getSPS()->getBitDepths().recon, m_pcEncCfg->getClipForBiPredMeEnabled() );
3773 
3774  fWeight = 0.5;
3775  }
3776  m_cDistParam.bIsBiPred = bBi;
3777 
3778  // Search key pattern initialization
3779 #if MCTS_ENC_CHECK
3780  Int roiPosX, roiPosY;
3781  Int roiW, roiH;
3782  pcCU->getPartPosition(iPartIdx, roiPosX, roiPosY, roiW, roiH);
3783  assert(roiW == iRoiWidth);
3784  assert(roiH == iRoiHeight);
3785  cPattern.initPattern( pcYuv->getAddr(COMPONENT_Y, uiPartAddr),
3786  iRoiWidth,
3787  iRoiHeight,
3788  pcYuv->getStride(COMPONENT_Y),
3790  roiPosX,
3791  roiPosY);
3792  xInitTileBorders(pcCU, &cPattern);
3793 #else
3794  cPattern.initPattern( pcYuv->getAddr ( COMPONENT_Y, uiPartAddr ),
3795  iRoiWidth,
3796  iRoiHeight,
3797  pcYuv->getStride(COMPONENT_Y),
3799 #endif
3800 
3801  Pel* piRefY = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), pcCU->getZorderIdxInCtu() + uiPartAddr );
3802  Int iRefStride = pcCU->getSlice()->getRefPic( eRefPicList, iRefIdxPred )->getPicYuvRec()->getStride(COMPONENT_Y);
3803 
3804  TComMv cMvPred = *pcMvPred;
3805 
3806  if ( bBi )
3807  {
3808 #if MCTS_ENC_CHECK
3809  xSetSearchRange(pcCU, rcMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB, &cPattern);
3810 #else
3811  xSetSearchRange(pcCU, rcMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB);
3812 #endif
3813  }
3814  else
3815  {
3816 #if MCTS_ENC_CHECK
3817  xSetSearchRange(pcCU, cMvPred, iSrchRng, cMvSrchRngLT, cMvSrchRngRB, &cPattern);
3818 #else
3819  xSetSearchRange(pcCU, cMvPred, iSrchRng, cMvSrchRngLT, cMvSrchRngRB);
3820 #endif
3821  }
3822 
3823  m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) );
3824 
3825  m_pcRdCost->setPredictor ( *pcMvPred );
3826  m_pcRdCost->setCostScale ( 2 );
3827 
3828  setWpScalingDistParam( pcCU, iRefIdxPred, eRefPicList );
3829  // Do integer search
3831  {
3832  xPatternSearch ( &cPattern, piRefY, iRefStride, &cMvSrchRngLT, &cMvSrchRngRB, rcMv, ruiCost );
3833  }
3834  else
3835  {
3836  rcMv = *pcMvPred;
3837  const TComMv *pIntegerMv2Nx2NPred=0;
3838  if (pcCU->getPartitionSize(0) != SIZE_2Nx2N || pcCU->getDepth(0) != 0)
3839  {
3840  pIntegerMv2Nx2NPred = &(m_integerMv2Nx2N[eRefPicList][iRefIdxPred]);
3841  }
3842  xPatternSearchFast ( pcCU, &cPattern, piRefY, iRefStride, &cMvSrchRngLT, &cMvSrchRngRB, rcMv, ruiCost, pIntegerMv2Nx2NPred );
3843  if (pcCU->getPartitionSize(0) == SIZE_2Nx2N)
3844  {
3845  m_integerMv2Nx2N[eRefPicList][iRefIdxPred] = rcMv;
3846  }
3847  }
3848 
3849  m_pcRdCost->selectMotionLambda( true, 0, pcCU->getCUTransquantBypass(uiPartAddr) );
3850  m_pcRdCost->setCostScale ( 1 );
3851 
3852  const Bool bIsLosslessCoded = pcCU->getCUTransquantBypass(uiPartAddr) != 0;
3853  xPatternSearchFracDIF( bIsLosslessCoded, &cPattern, piRefY, iRefStride, &rcMv, cMvHalf, cMvQter, ruiCost );
3854 
3855  m_pcRdCost->setCostScale( 0 );
3856  rcMv <<= 2;
3857  rcMv += (cMvHalf <<= 1);
3858  rcMv += cMvQter;
3859 
3860  UInt uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.getHor(), rcMv.getVer() );
3861 
3862  ruiBits += uiMvBits;
3863  ruiCost = (Distortion)( floor( fWeight * ( (Double)ruiCost - (Double)m_pcRdCost->getCost( uiMvBits ) ) ) + (Double)m_pcRdCost->getCost( ruiBits ) );
3864 }
3865 
3866 #if MCTS_ENC_CHECK
3867 Void TEncSearch::xInitTileBorders(const TComDataCU* const pcCU, TComPattern* pcPatternKey)
3868 {
3870  {
3871  UInt tileXPosInCtus = 0;
3872  UInt tileYPosInCtus = 0;
3873  UInt tileWidthtInCtus = 0;
3874  UInt tileHeightInCtus = 0;
3875 
3876  getTilePosition(pcCU, tileXPosInCtus, tileYPosInCtus, tileWidthtInCtus, tileHeightInCtus);
3877 
3878  const Int ctuLength = pcCU->getPic()->getPicSym()->getSPS().getMaxCUWidth();
3879 
3880  // tile position in full pels
3881  const Int tileLeftTopPelPosX = ctuLength * tileXPosInCtus;
3882  const Int tileLeftTopPelPosY = ctuLength * tileYPosInCtus;
3883  const Int tileRightBottomPelPosX = ((tileWidthtInCtus + tileXPosInCtus) * ctuLength) - 1;
3884  const Int tileRightBottomPelPosY = ((tileHeightInCtus + tileYPosInCtus) * ctuLength) - 1;
3885 
3886  pcPatternKey->setTileBorders (tileLeftTopPelPosX,tileLeftTopPelPosY,tileRightBottomPelPosX,tileRightBottomPelPosY);
3887  }
3888 }
3889 #endif
3890 
3891 
3892 Void TEncSearch::xSetSearchRange ( const TComDataCU* const pcCU, const TComMv& cMvPred, const Int iSrchRng,
3893 #if MCTS_ENC_CHECK
3894  TComMv& rcMvSrchRngLT, TComMv& rcMvSrchRngRB, const TComPattern* const pcPatternKey )
3895 #else
3896  TComMv& rcMvSrchRngLT, TComMv& rcMvSrchRngRB )
3897 #endif
3898 {
3899  Int iMvShift = 2;
3900  TComMv cTmpMvPred = cMvPred;
3901  pcCU->clipMv( cTmpMvPred );
3902 
3903 #if MCTS_ENC_CHECK
3905  {
3906  const Int lRangeXLeft = max(cTmpMvPred.getHor() - (iSrchRng << iMvShift), (pcPatternKey->getTileLeftTopPelPosX() - pcPatternKey->getROIYPosX()) << iMvShift);
3907  const Int lRangeYTop = max(cTmpMvPred.getVer() - (iSrchRng << iMvShift), (pcPatternKey->getTileLeftTopPelPosY() - pcPatternKey->getROIYPosY()) << iMvShift);
3908  const Int lRangeXRight = min(cTmpMvPred.getHor() + (iSrchRng << iMvShift), (pcPatternKey->getTileRightBottomPelPosX() - (pcPatternKey->getROIYPosX() + pcPatternKey->getROIYWidth())) << iMvShift);
3909  const Int lRangeYBottom = min(cTmpMvPred.getVer() + (iSrchRng << iMvShift), (pcPatternKey->getTileRightBottomPelPosY() - (pcPatternKey->getROIYPosY() + pcPatternKey->getROIYHeight())) << iMvShift);
3910 
3911  rcMvSrchRngLT.setHor(lRangeXLeft);
3912  rcMvSrchRngLT.setVer(lRangeYTop);
3913 
3914  rcMvSrchRngRB.setHor(lRangeXRight);
3915  rcMvSrchRngRB.setVer(lRangeYBottom);
3916  }
3917  else
3918  {
3919  rcMvSrchRngLT.setHor(cTmpMvPred.getHor() - (iSrchRng << iMvShift));
3920  rcMvSrchRngLT.setVer(cTmpMvPred.getVer() - (iSrchRng << iMvShift));
3921 
3922  rcMvSrchRngRB.setHor( cTmpMvPred.getHor() + (iSrchRng << iMvShift));
3923  rcMvSrchRngRB.setVer( cTmpMvPred.getVer() + (iSrchRng << iMvShift) );
3924  }
3925 #else
3926  rcMvSrchRngLT.setHor( cTmpMvPred.getHor() - (iSrchRng << iMvShift) );
3927  rcMvSrchRngLT.setVer( cTmpMvPred.getVer() - (iSrchRng << iMvShift) );
3928 
3929  rcMvSrchRngRB.setHor( cTmpMvPred.getHor() + (iSrchRng << iMvShift));
3930  rcMvSrchRngRB.setVer( cTmpMvPred.getVer() + (iSrchRng << iMvShift) );
3931 #endif
3932 
3933  pcCU->clipMv ( rcMvSrchRngLT );
3934  pcCU->clipMv ( rcMvSrchRngRB );
3935 
3936 #if ME_ENABLE_ROUNDING_OF_MVS
3937  rcMvSrchRngLT.divideByPowerOf2(iMvShift);
3938  rcMvSrchRngRB.divideByPowerOf2(iMvShift);
3939 #else
3940  rcMvSrchRngLT >>= iMvShift;
3941  rcMvSrchRngRB >>= iMvShift;
3942 #endif
3943 }
3944 
3945 
3946 Void TEncSearch::xPatternSearch( const TComPattern* const pcPatternKey,
3947  const Pel* piRefY,
3948  const Int iRefStride,
3949  const TComMv* const pcMvSrchRngLT,
3950  const TComMv* const pcMvSrchRngRB,
3951  TComMv& rcMv,
3952  Distortion& ruiSAD )
3953 {
3954  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
3955  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
3956  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
3957  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
3958 
3959  Distortion uiSad;
3960  Distortion uiSadBest = std::numeric_limits<Distortion>::max();
3961  Int iBestX = 0;
3962  Int iBestY = 0;
3963 
3964  //-- jclee for using the SAD function pointer
3965  m_pcRdCost->setDistParam( pcPatternKey, piRefY, iRefStride, m_cDistParam );
3966 
3967  // fast encoder decision: use subsampled SAD for integer ME
3969  {
3970  if ( m_cDistParam.iRows > 8 )
3971  {
3972  m_cDistParam.iSubShift = 1;
3973  }
3974  }
3975 
3976  piRefY += (iSrchRngVerTop * iRefStride);
3977  for ( Int y = iSrchRngVerTop; y <= iSrchRngVerBottom; y++ )
3978  {
3979  for ( Int x = iSrchRngHorLeft; x <= iSrchRngHorRight; x++ )
3980  {
3981  // find min. distortion position
3982  m_cDistParam.pCur = piRefY + x;
3983 
3985 
3986  m_cDistParam.bitDepth = pcPatternKey->getBitDepthY();
3987  uiSad = m_cDistParam.DistFunc( &m_cDistParam );
3988 
3989  // motion cost
3990  uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y );
3991 
3992  if ( uiSad < uiSadBest )
3993  {
3994  uiSadBest = uiSad;
3995  iBestX = x;
3996  iBestY = y;
3998  }
3999  }
4000  piRefY += iRefStride;
4001  }
4002 
4003  rcMv.set( iBestX, iBestY );
4004 
4005  ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY );
4006  return;
4007 }
4008 
4009 
4011  const TComPattern* const pcPatternKey,
4012  const Pel* const piRefY,
4013  const Int iRefStride,
4014  const TComMv* const pcMvSrchRngLT,
4015  const TComMv* const pcMvSrchRngRB,
4016  TComMv& rcMv,
4017  Distortion& ruiSAD,
4018  const TComMv* const pIntegerMv2Nx2NPred )
4019 {
4020  assert (MD_LEFT < NUM_MV_PREDICTORS);
4022  assert (MD_ABOVE < NUM_MV_PREDICTORS);
4024  assert (MD_ABOVE_RIGHT < NUM_MV_PREDICTORS);
4026 
4028  {
4029  case MESEARCH_DIAMOND:
4030  xTZSearch( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred, false );
4031  break;
4032 
4033  case MESEARCH_SELECTIVE:
4034  xTZSearchSelective( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred );
4035  break;
4036 
4038  xTZSearch( pcCU, pcPatternKey, piRefY, iRefStride, pcMvSrchRngLT, pcMvSrchRngRB, rcMv, ruiSAD, pIntegerMv2Nx2NPred, true );
4039  break;
4040 
4041  case MESEARCH_FULL: // shouldn't get here.
4042  default:
4043  break;
4044  }
4045 }
4046 
4047 
4049  const TComPattern* const pcPatternKey,
4050  const Pel* const piRefY,
4051  const Int iRefStride,
4052  const TComMv* const pcMvSrchRngLT,
4053  const TComMv* const pcMvSrchRngRB,
4054  TComMv& rcMv,
4055  Distortion& ruiSAD,
4056  const TComMv* const pIntegerMv2Nx2NPred,
4057  const Bool bExtendedSettings)
4058 {
4059  const Bool bUseAdaptiveRaster = bExtendedSettings;
4060  const Int iRaster = 5;
4061  const Bool bTestOtherPredictedMV = bExtendedSettings;
4062  const Bool bTestZeroVector = true;
4063  const Bool bTestZeroVectorStart = bExtendedSettings;
4064  const Bool bTestZeroVectorStop = false;
4065  const Bool bFirstSearchDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch
4066  const Bool bFirstCornersForDiamondDist1 = bExtendedSettings;
4067  const Bool bFirstSearchStop = m_pcEncCfg->getFastMEAssumingSmootherMVEnabled();
4068  const UInt uiFirstSearchRounds = 3; // first search stop X rounds after best match (must be >=1)
4069  const Bool bEnableRasterSearch = true;
4070  const Bool bAlwaysRasterSearch = bExtendedSettings; // true: BETTER but factor 2 slower
4071  const Bool bRasterRefinementEnable = false; // enable either raster refinement or star refinement
4072  const Bool bRasterRefinementDiamond = false; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch
4073  const Bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings;
4074  const Bool bStarRefinementEnable = true; // enable either star refinement or raster refinement
4075  const Bool bStarRefinementDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch
4076  const Bool bStarRefinementCornersForDiamondDist1 = bExtendedSettings;
4077  const Bool bStarRefinementStop = false;
4078  const UInt uiStarRefinementRounds = 2; // star refinement stop X rounds after best match (must be >=1)
4079  const Bool bNewZeroNeighbourhoodTest = bExtendedSettings;
4080 
4081  UInt uiSearchRange = m_iSearchRange;
4082  pcCU->clipMv( rcMv );
4083 #if ME_ENABLE_ROUNDING_OF_MVS
4084  rcMv.divideByPowerOf2(2);
4085 #else
4086  rcMv >>= 2;
4087 #endif
4088  // init TZSearchStruct
4089  IntTZSearchStruct cStruct;
4090  cStruct.iYStride = iRefStride;
4091  cStruct.piRefY = piRefY;
4092  cStruct.uiBestSad = MAX_UINT;
4093 
4094  // set rcMv (Median predictor) as start point and as best point
4095  xTZSearchHelp( pcPatternKey, cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 );
4096 
4097  // test whether one of PRED_A, PRED_B, PRED_C MV is better start point than Median predictor
4098  if ( bTestOtherPredictedMV )
4099  {
4100  for ( UInt index = 0; index < NUM_MV_PREDICTORS; index++ )
4101  {
4102  TComMv cMv = m_acMvPredictors[index];
4103  pcCU->clipMv( cMv );
4104 #if ME_ENABLE_ROUNDING_OF_MVS
4105  cMv.divideByPowerOf2(2);
4106 #else
4107  cMv >>= 2;
4108 #endif
4109  if (cMv != rcMv && (cMv.getHor() != cStruct.iBestX && cMv.getVer() != cStruct.iBestY))
4110  {
4111  // only test cMV if not obviously previously tested.
4112  xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 );
4113  }
4114  }
4115  }
4116 
4117  // test whether zero Mv is better start point than Median predictor
4118  if ( bTestZeroVector )
4119  {
4120  if ((rcMv.getHor() != 0 || rcMv.getVer() != 0) &&
4121  (0 != cStruct.iBestX || 0 != cStruct.iBestY))
4122  {
4123  // only test 0-vector if not obviously previously tested.
4124  xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
4125  }
4126  }
4127 
4128  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
4129  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
4130  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
4131  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
4132 
4133  if (pIntegerMv2Nx2NPred != 0)
4134  {
4135  TComMv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
4136  integerMv2Nx2NPred <<= 2;
4137  pcCU->clipMv( integerMv2Nx2NPred );
4138 #if ME_ENABLE_ROUNDING_OF_MVS
4139  integerMv2Nx2NPred.divideByPowerOf2(2);
4140 #else
4141  integerMv2Nx2NPred >>= 2;
4142 #endif
4143  if ((rcMv != integerMv2Nx2NPred) &&
4144  (integerMv2Nx2NPred.getHor() != cStruct.iBestX || integerMv2Nx2NPred.getVer() != cStruct.iBestY))
4145  {
4146  // only test integerMv2Nx2NPred if not obviously previously tested.
4147  xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
4148  }
4149 
4150  // reset search range
4151  TComMv cMvSrchRngLT;
4152  TComMv cMvSrchRngRB;
4153  Int iSrchRng = m_iSearchRange;
4154  TComMv currBestMv(cStruct.iBestX, cStruct.iBestY );
4155  currBestMv <<= 2;
4156 #if MCTS_ENC_CHECK
4157  xSetSearchRange(pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB, pcPatternKey);
4158 #else
4159  xSetSearchRange(pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB);
4160 #endif
4161  iSrchRngHorLeft = cMvSrchRngLT.getHor();
4162  iSrchRngHorRight = cMvSrchRngRB.getHor();
4163  iSrchRngVerTop = cMvSrchRngLT.getVer();
4164  iSrchRngVerBottom = cMvSrchRngRB.getVer();
4165  }
4166 
4167  // start search
4168  Int iDist = 0;
4169  Int iStartX = cStruct.iBestX;
4170  Int iStartY = cStruct.iBestY;
4171 
4172  const Bool bBestCandidateZero = (cStruct.iBestX == 0) && (cStruct.iBestY == 0);
4173 
4174  // first search around best position up to now.
4175  // The following works as a "subsampled/log" window search around the best candidate
4176  for ( iDist = 1; iDist <= (Int)uiSearchRange; iDist*=2 )
4177  {
4178  if ( bFirstSearchDiamond == 1 )
4179  {
4180  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 );
4181  }
4182  else
4183  {
4184  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4185  }
4186 
4187  if ( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion
4188  {
4189  break;
4190  }
4191  }
4192 
4193  if (!bNewZeroNeighbourhoodTest)
4194  {
4195  // test whether zero Mv is a better start point than Median predictor
4196  if ( bTestZeroVectorStart && ((cStruct.iBestX != 0) || (cStruct.iBestY != 0)) )
4197  {
4198  xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
4199  if ( (cStruct.iBestX == 0) && (cStruct.iBestY == 0) )
4200  {
4201  // test its neighborhood
4202  for ( iDist = 1; iDist <= (Int)uiSearchRange; iDist*=2 )
4203  {
4204  xTZ8PointDiamondSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, 0, 0, iDist, false );
4205  if ( bTestZeroVectorStop && (cStruct.uiBestRound > 0) ) // stop criterion
4206  {
4207  break;
4208  }
4209  }
4210  }
4211  }
4212  }
4213  else
4214  {
4215  // Test also zero neighbourhood but with half the range
4216  // It was reported that the original (above) search scheme using bTestZeroVectorStart did not
4217  // make sense since one would have already checked the zero candidate earlier
4218  // and thus the conditions for that test would have not been satisfied
4219  if (bTestZeroVectorStart == true && bBestCandidateZero != true)
4220  {
4221  for ( iDist = 1; iDist <= ((Int)uiSearchRange >> 1); iDist*=2 )
4222  {
4223  xTZ8PointDiamondSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, 0, 0, iDist, false );
4224  if ( bTestZeroVectorStop && (cStruct.uiBestRound > 2) ) // stop criterion
4225  {
4226  break;
4227  }
4228  }
4229  }
4230  }
4231 
4232  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
4233  if ( cStruct.uiBestDistance == 1 )
4234  {
4235  cStruct.uiBestDistance = 0;
4236  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4237  }
4238 
4239  // raster search if distance is too big
4240  if (bUseAdaptiveRaster)
4241  {
4242  Int iWindowSize = iRaster;
4243  Int iSrchRngRasterLeft = iSrchRngHorLeft;
4244  Int iSrchRngRasterRight = iSrchRngHorRight;
4245  Int iSrchRngRasterTop = iSrchRngVerTop;
4246  Int iSrchRngRasterBottom = iSrchRngVerBottom;
4247 
4248  if (!(bEnableRasterSearch && ( ((Int)(cStruct.uiBestDistance) > iRaster))))
4249  {
4250  iWindowSize ++;
4251  iSrchRngRasterLeft /= 2;
4252  iSrchRngRasterRight /= 2;
4253  iSrchRngRasterTop /= 2;
4254  iSrchRngRasterBottom /= 2;
4255  }
4256  cStruct.uiBestDistance = iWindowSize;
4257  for ( iStartY = iSrchRngRasterTop; iStartY <= iSrchRngRasterBottom; iStartY += iWindowSize )
4258  {
4259  for ( iStartX = iSrchRngRasterLeft; iStartX <= iSrchRngRasterRight; iStartX += iWindowSize )
4260  {
4261  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, iWindowSize );
4262  }
4263  }
4264  }
4265  else
4266  {
4267  if ( bEnableRasterSearch && ( ((Int)(cStruct.uiBestDistance) > iRaster) || bAlwaysRasterSearch ) )
4268  {
4269  cStruct.uiBestDistance = iRaster;
4270  for ( iStartY = iSrchRngVerTop; iStartY <= iSrchRngVerBottom; iStartY += iRaster )
4271  {
4272  for ( iStartX = iSrchRngHorLeft; iStartX <= iSrchRngHorRight; iStartX += iRaster )
4273  {
4274  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, iRaster );
4275  }
4276  }
4277  }
4278  }
4279 
4280  // raster refinement
4281 
4282  if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 )
4283  {
4284  while ( cStruct.uiBestDistance > 0 )
4285  {
4286  iStartX = cStruct.iBestX;
4287  iStartY = cStruct.iBestY;
4288  if ( cStruct.uiBestDistance > 1 )
4289  {
4290  iDist = cStruct.uiBestDistance >>= 1;
4291  if ( bRasterRefinementDiamond == 1 )
4292  {
4293  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 );
4294  }
4295  else
4296  {
4297  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4298  }
4299  }
4300 
4301  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
4302  if ( cStruct.uiBestDistance == 1 )
4303  {
4304  cStruct.uiBestDistance = 0;
4305  if ( cStruct.ucPointNr != 0 )
4306  {
4307  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4308  }
4309  }
4310  }
4311  }
4312 
4313  // star refinement
4314  if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
4315  {
4316  while ( cStruct.uiBestDistance > 0 )
4317  {
4318  iStartX = cStruct.iBestX;
4319  iStartY = cStruct.iBestY;
4320  cStruct.uiBestDistance = 0;
4321  cStruct.ucPointNr = 0;
4322  for ( iDist = 1; iDist < (Int)uiSearchRange + 1; iDist*=2 )
4323  {
4324  if ( bStarRefinementDiamond == 1 )
4325  {
4326  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 );
4327  }
4328  else
4329  {
4330  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4331  }
4332  if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
4333  {
4334  break;
4335  }
4336  }
4337 
4338  // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
4339  if ( cStruct.uiBestDistance == 1 )
4340  {
4341  cStruct.uiBestDistance = 0;
4342  if ( cStruct.ucPointNr != 0 )
4343  {
4344  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4345  }
4346  }
4347  }
4348  }
4349 
4350  // write out best match
4351  rcMv.set( cStruct.iBestX, cStruct.iBestY );
4352  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY );
4353 }
4354 
4355 
4357  const TComPattern* const pcPatternKey,
4358  const Pel* const piRefY,
4359  const Int iRefStride,
4360  const TComMv* const pcMvSrchRngLT,
4361  const TComMv* const pcMvSrchRngRB,
4362  TComMv &rcMv,
4363  Distortion &ruiSAD,
4364  const TComMv* const pIntegerMv2Nx2NPred )
4365 {
4366  const Bool bTestOtherPredictedMV = true;
4367  const Bool bTestZeroVector = true;
4368  const Bool bEnableRasterSearch = true;
4369  const Bool bAlwaysRasterSearch = false; // 1: BETTER but factor 15x slower
4370  const Bool bStarRefinementEnable = true; // enable either star refinement or raster refinement
4371  const Bool bStarRefinementDiamond = true; // 1 = xTZ8PointDiamondSearch 0 = xTZ8PointSquareSearch
4372  const Bool bStarRefinementStop = false;
4373  const UInt uiStarRefinementRounds = 2; // star refinement stop X rounds after best match (must be >=1)
4374  const UInt uiSearchRange = m_iSearchRange;
4375  const Int uiSearchRangeInitial = m_iSearchRange >> 2;
4376  const Int uiSearchStep = 4;
4377  const Int iMVDistThresh = 8;
4378 
4379  Int iSrchRngHorLeft = pcMvSrchRngLT->getHor();
4380  Int iSrchRngHorRight = pcMvSrchRngRB->getHor();
4381  Int iSrchRngVerTop = pcMvSrchRngLT->getVer();
4382  Int iSrchRngVerBottom = pcMvSrchRngRB->getVer();
4383  Int iFirstSrchRngHorLeft = 0;
4384  Int iFirstSrchRngHorRight = 0;
4385  Int iFirstSrchRngVerTop = 0;
4386  Int iFirstSrchRngVerBottom = 0;
4387  Int iStartX = 0;
4388  Int iStartY = 0;
4389  Int iBestX = 0;
4390  Int iBestY = 0;
4391  Int iDist = 0;
4392 
4393  pcCU->clipMv( rcMv );
4394 #if ME_ENABLE_ROUNDING_OF_MVS
4395  rcMv.divideByPowerOf2(2);
4396 #else
4397  rcMv >>= 2;
4398 #endif
4399  // init TZSearchStruct
4400  IntTZSearchStruct cStruct;
4401  cStruct.iYStride = iRefStride;
4402  cStruct.piRefY = piRefY;
4403  cStruct.uiBestSad = MAX_UINT;
4404  cStruct.iBestX = 0;
4405  cStruct.iBestY = 0;
4406 
4407 
4408  // set rcMv (Median predictor) as start point and as best point
4409  xTZSearchHelp( pcPatternKey, cStruct, rcMv.getHor(), rcMv.getVer(), 0, 0 );
4410 
4411  // test whether one of PRED_A, PRED_B, PRED_C MV is better start point than Median predictor
4412  if ( bTestOtherPredictedMV )
4413  {
4414  for ( UInt index = 0; index < NUM_MV_PREDICTORS; index++ )
4415  {
4416  TComMv cMv = m_acMvPredictors[index];
4417  pcCU->clipMv( cMv );
4418 #if ME_ENABLE_ROUNDING_OF_MVS
4419  cMv.divideByPowerOf2(2);
4420 #else
4421  cMv >>= 2;
4422 #endif
4423  xTZSearchHelp( pcPatternKey, cStruct, cMv.getHor(), cMv.getVer(), 0, 0 );
4424  }
4425  }
4426 
4427  // test whether zero Mv is better start point than Median predictor
4428  if ( bTestZeroVector )
4429  {
4430  xTZSearchHelp( pcPatternKey, cStruct, 0, 0, 0, 0 );
4431  }
4432 
4433  if ( pIntegerMv2Nx2NPred != 0 )
4434  {
4435  TComMv integerMv2Nx2NPred = *pIntegerMv2Nx2NPred;
4436  integerMv2Nx2NPred <<= 2;
4437  pcCU->clipMv( integerMv2Nx2NPred );
4438 #if ME_ENABLE_ROUNDING_OF_MVS
4439  integerMv2Nx2NPred.divideByPowerOf2(2);
4440 #else
4441  integerMv2Nx2NPred >>= 2;
4442 #endif
4443  xTZSearchHelp(pcPatternKey, cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
4444 
4445  // reset search range
4446  TComMv cMvSrchRngLT;
4447  TComMv cMvSrchRngRB;
4448  Int iSrchRng = m_iSearchRange;
4449  TComMv currBestMv(cStruct.iBestX, cStruct.iBestY );
4450  currBestMv <<= 2;
4451 #if MCTS_ENC_CHECK
4452  xSetSearchRange(pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB, pcPatternKey);
4453 #else
4454  xSetSearchRange(pcCU, currBestMv, iSrchRng, cMvSrchRngLT, cMvSrchRngRB);
4455 #endif
4456  iSrchRngHorLeft = cMvSrchRngLT.getHor();
4457  iSrchRngHorRight = cMvSrchRngRB.getHor();
4458  iSrchRngVerTop = cMvSrchRngLT.getVer();
4459  iSrchRngVerBottom = cMvSrchRngRB.getVer();
4460  }
4461 
4462  // Initial search
4463  iBestX = cStruct.iBestX;
4464  iBestY = cStruct.iBestY;
4465  iFirstSrchRngHorLeft = ((iBestX - uiSearchRangeInitial) > iSrchRngHorLeft) ? (iBestX - uiSearchRangeInitial) : iSrchRngHorLeft;
4466  iFirstSrchRngVerTop = ((iBestY - uiSearchRangeInitial) > iSrchRngVerTop) ? (iBestY - uiSearchRangeInitial) : iSrchRngVerTop;
4467  iFirstSrchRngHorRight = ((iBestX + uiSearchRangeInitial) < iSrchRngHorRight) ? (iBestX + uiSearchRangeInitial) : iSrchRngHorRight;
4468  iFirstSrchRngVerBottom = ((iBestY + uiSearchRangeInitial) < iSrchRngVerBottom) ? (iBestY + uiSearchRangeInitial) : iSrchRngVerBottom;
4469 
4470  for ( iStartY = iFirstSrchRngVerTop; iStartY <= iFirstSrchRngVerBottom; iStartY += uiSearchStep )
4471  {
4472  for ( iStartX = iFirstSrchRngHorLeft; iStartX <= iFirstSrchRngHorRight; iStartX += uiSearchStep )
4473  {
4474  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, 0 );
4475  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, 1, false );
4476  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, 2, false );
4477  }
4478  }
4479 
4480  Int iMaxMVDistToPred = (abs(cStruct.iBestX - iBestX) > iMVDistThresh || abs(cStruct.iBestY - iBestY) > iMVDistThresh);
4481 
4482  //full search with early exit if MV is distant from predictors
4483  if ( bEnableRasterSearch && (iMaxMVDistToPred || bAlwaysRasterSearch) )
4484  {
4485  for ( iStartY = iSrchRngVerTop; iStartY <= iSrchRngVerBottom; iStartY += 1 )
4486  {
4487  for ( iStartX = iSrchRngHorLeft; iStartX <= iSrchRngHorRight; iStartX += 1 )
4488  {
4489  xTZSearchHelp( pcPatternKey, cStruct, iStartX, iStartY, 0, 1 );
4490  }
4491  }
4492  }
4493  //Smaller MV, refine around predictor
4494  else if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
4495  {
4496  // start refinement
4497  while ( cStruct.uiBestDistance > 0 )
4498  {
4499  iStartX = cStruct.iBestX;
4500  iStartY = cStruct.iBestY;
4501  cStruct.uiBestDistance = 0;
4502  cStruct.ucPointNr = 0;
4503  for ( iDist = 1; iDist < (Int)uiSearchRange + 1; iDist*=2 )
4504  {
4505  if ( bStarRefinementDiamond == 1 )
4506  {
4507  xTZ8PointDiamondSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist, false );
4508  }
4509  else
4510  {
4511  xTZ8PointSquareSearch ( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB, iStartX, iStartY, iDist );
4512  }
4513  if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
4514  {
4515  break;
4516  }
4517  }
4518 
4519  // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
4520  if ( cStruct.uiBestDistance == 1 )
4521  {
4522  cStruct.uiBestDistance = 0;
4523  if ( cStruct.ucPointNr != 0 )
4524  {
4525  xTZ2PointSearch( pcPatternKey, cStruct, pcMvSrchRngLT, pcMvSrchRngRB );
4526  }
4527  }
4528  }
4529  }
4530 
4531  // write out best match
4532  rcMv.set( cStruct.iBestX, cStruct.iBestY );
4533  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY );
4534 
4535 }
4536 
4537 
4539  Bool bIsLosslessCoded,
4540  TComPattern* pcPatternKey,
4541  Pel* piRefY,
4542  Int iRefStride,
4543  TComMv* pcMvInt,
4544  TComMv& rcMvHalf,
4545  TComMv& rcMvQter,
4546  Distortion& ruiCost
4547  )
4548 {
4549  // Reference pattern initialization (integer scale)
4550  TComPattern cPatternRoi;
4551  Int iOffset = pcMvInt->getHor() + pcMvInt->getVer() * iRefStride;
4552  cPatternRoi.initPattern(piRefY + iOffset,
4553  pcPatternKey->getROIYWidth(),
4554  pcPatternKey->getROIYHeight(),
4555  iRefStride,
4556 #if MCTS_ENC_CHECK
4557  pcPatternKey->getBitDepthY(),
4558  pcPatternKey->getROIYPosX(),
4559  pcPatternKey->getROIYPosY());
4560 #else
4561  pcPatternKey->getBitDepthY());
4562 #endif
4563 #if MCTS_ENC_CHECK
4564  cPatternRoi.setTileBorders(pcPatternKey->getTileLeftTopPelPosX(), pcPatternKey->getTileLeftTopPelPosY(), pcPatternKey->getTileRightBottomPelPosX(), pcPatternKey->getTileRightBottomPelPosY());
4565 #endif
4566 
4567  // Half-pel refinement
4568  xExtDIFUpSamplingH ( &cPatternRoi );
4569 
4570  rcMvHalf = *pcMvInt; rcMvHalf <<= 1; // for mv-cost
4571  TComMv baseRefMv(0, 0);
4572  ruiCost = xPatternRefinement( pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded );
4573 
4574  m_pcRdCost->setCostScale( 0 );
4575 
4576  xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf );
4577  baseRefMv = rcMvHalf;
4578  baseRefMv <<= 1;
4579 
4580  rcMvQter = *pcMvInt; rcMvQter <<= 1; // for mv-cost
4581  rcMvQter += rcMvHalf; rcMvQter <<= 1;
4582  ruiCost = xPatternRefinement( pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded );
4583 }
4584 
4585 
4588  TComYuv* pcYuvResi, TComYuv* pcYuvResiBest, TComYuv* pcYuvRec,
4589  Bool bSkipResidual DEBUG_STRING_FN_DECLARE(sDebug) )
4590 {
4591  assert ( !pcCU->isIntra(0) );
4592 
4593  const UInt cuWidthPixels = pcCU->getWidth ( 0 );
4594  const UInt cuHeightPixels = pcCU->getHeight( 0 );
4595  const Int numValidComponents = pcCU->getPic()->getNumberValidComponents();
4596  const TComSPS &sps=*(pcCU->getSlice()->getSPS());
4597 
4598  // The pcCU is not marked as skip-mode at this point, and its m_pcTrCoeff, m_pcArlCoeff, m_puhCbf, m_puhTrIdx will all be 0.
4599  // due to prior calls to TComDataCU::initEstData( );
4600 
4601  if ( bSkipResidual ) // No residual coding : SKIP mode
4602  {
4603  pcCU->setSkipFlagSubParts( true, 0, pcCU->getDepth(0) );
4604 
4605  pcYuvResi->clear();
4606 
4607  pcYuvPred->copyToPartYuv( pcYuvRec, 0 );
4608  Distortion distortion = 0;
4609 
4610  for (Int comp=0; comp < numValidComponents; comp++)
4611  {
4612  const ComponentID compID=ComponentID(comp);
4613  const UInt csx=pcYuvOrg->getComponentScaleX(compID);
4614  const UInt csy=pcYuvOrg->getComponentScaleY(compID);
4615  distortion += m_pcRdCost->getDistPart( sps.getBitDepth(toChannelType(compID)), pcYuvRec->getAddr(compID), pcYuvRec->getStride(compID), pcYuvOrg->getAddr(compID),
4616  pcYuvOrg->getStride(compID), cuWidthPixels >> csx, cuHeightPixels >> csy, compID);
4617  }
4618 
4621 
4623  {
4625  }
4626 
4627  m_pcEntropyCoder->encodeSkipFlag(pcCU, 0, true);
4628  m_pcEntropyCoder->encodeMergeIndex( pcCU, 0, true );
4629 
4631  pcCU->getTotalBits() = uiBits;
4632  pcCU->getTotalDistortion() = distortion;
4633  pcCU->getTotalCost() = m_pcRdCost->calcRdCost( uiBits, distortion );
4634 
4636 
4637 #if DEBUG_STRING
4638  pcYuvResiBest->clear(); // Clear the residual image, if we didn't code it.
4639  for(UInt i=0; i<MAX_NUM_COMPONENT+1; i++)
4640  {
4641  sDebug+=debug_reorder_data_inter_token[i];
4642  }
4643 #endif
4644 
4645  return;
4646  }
4647 
4648  // Residual coding.
4649 
4650  pcYuvResi->subtract( pcYuvOrg, pcYuvPred, 0, cuWidthPixels );
4651 
4652  TComTURecurse tuLevel0(pcCU, 0);
4653 
4654  Double nonZeroCost = 0;
4655  UInt nonZeroBits = 0;
4656  Distortion nonZeroDistortion = 0;
4657  Distortion zeroDistortion = 0;
4658 
4660 
4661  xEstimateInterResidualQT( pcYuvResi, nonZeroCost, nonZeroBits, nonZeroDistortion, &zeroDistortion, tuLevel0 DEBUG_STRING_PASS_INTO(sDebug) );
4662 
4663  // -------------------------------------------------------
4664  // set the coefficients in the pcCU, and also calculates the residual data.
4665  // If a block full of 0's is efficient, then just use 0's.
4666  // The costs at this point do not include header bits.
4667 
4670  const UInt zeroResiBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4671  const Double zeroCost = (pcCU->isLosslessCoded( 0 )) ? (nonZeroCost+1) : (m_pcRdCost->calcRdCost( zeroResiBits, zeroDistortion ));
4672 
4673  if ( zeroCost < nonZeroCost || !pcCU->getQtRootCbf(0) )
4674  {
4675  const UInt uiQPartNum = tuLevel0.GetAbsPartIdxNumParts();
4676  ::memset( pcCU->getTransformIdx() , 0, uiQPartNum * sizeof(UChar) );
4677  for (Int comp=0; comp < numValidComponents; comp++)
4678  {
4679  const ComponentID component = ComponentID(comp);
4680  ::memset( pcCU->getCbf( component ) , 0, uiQPartNum * sizeof(UChar) );
4681  ::memset( pcCU->getCrossComponentPredictionAlpha(component), 0, ( uiQPartNum * sizeof(SChar) ) );
4682  }
4683  static const UInt useTS[MAX_NUM_COMPONENT]={0,0,0};
4684  pcCU->setTransformSkipSubParts ( useTS, 0, pcCU->getDepth(0) );
4685 #if DEBUG_STRING
4686  sDebug.clear();
4687  for(UInt i=0; i<MAX_NUM_COMPONENT+1; i++)
4688  {
4689  sDebug+=debug_reorder_data_inter_token[i];
4690  }
4691 #endif
4692  }
4693  else
4694  {
4695  xSetInterResidualQTData( NULL, false, tuLevel0); // Call first time to set coefficients.
4696  }
4697 
4698  // all decisions now made. Fully encode the CU, including the headers:
4700 
4701  UInt finalBits = 0;
4702  xAddSymbolBitsInter( pcCU, finalBits );
4703  // we've now encoded the pcCU, and so have a valid bit cost
4704 
4705  if ( !pcCU->getQtRootCbf( 0 ) )
4706  {
4707  pcYuvResiBest->clear(); // Clear the residual image, if we didn't code it.
4708  }
4709  else
4710  {
4711  xSetInterResidualQTData( pcYuvResiBest, true, tuLevel0 ); // else set the residual image data pcYUVResiBest from the various temp images.
4712  }
4714 
4715  pcYuvRec->addClip ( pcYuvPred, pcYuvResiBest, 0, cuWidthPixels, sps.getBitDepths() );
4716 
4717  // update with clipped distortion and cost (previously unclipped reconstruction values were used)
4718 
4719  Distortion finalDistortion = 0;
4720  for(Int comp=0; comp<numValidComponents; comp++)
4721  {
4722  const ComponentID compID=ComponentID(comp);
4723  finalDistortion += m_pcRdCost->getDistPart( sps.getBitDepth(toChannelType(compID)), pcYuvRec->getAddr(compID ), pcYuvRec->getStride(compID ), pcYuvOrg->getAddr(compID ), pcYuvOrg->getStride(compID), cuWidthPixels >> pcYuvOrg->getComponentScaleX(compID), cuHeightPixels >> pcYuvOrg->getComponentScaleY(compID), compID);
4724  }
4725 
4726  pcCU->getTotalBits() = finalBits;
4727  pcCU->getTotalDistortion() = finalDistortion;
4728  pcCU->getTotalCost() = m_pcRdCost->calcRdCost( finalBits, finalDistortion );
4729 }
4730 
4731 
4732 
4734  Double &rdCost,
4735  UInt &ruiBits,
4736  Distortion &ruiDist,
4737  Distortion *puiZeroDist,
4738  TComTU &rTu
4739  DEBUG_STRING_FN_DECLARE(sDebug) )
4740 {
4741  TComDataCU *pcCU = rTu.getCU();
4742  const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
4743  const UInt uiDepth = rTu.GetTransformDepthTotal();
4744  const UInt uiTrMode = rTu.GetTransformDepthRel();
4745  const UInt subTUDepth = uiTrMode + 1;
4746  const UInt numValidComp = pcCU->getPic()->getNumberValidComponents();
4747  DEBUG_STRING_NEW(sSingleStringComp[MAX_NUM_COMPONENT])
4748 
4749  assert( pcCU->getDepth( 0 ) == pcCU->getDepth( uiAbsPartIdx ) );
4750  const UInt uiLog2TrSize = rTu.GetLog2LumaTrSize();
4751 
4752  UInt SplitFlag = ((pcCU->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && pcCU->isInter(uiAbsPartIdx) && ( pcCU->getPartitionSize(uiAbsPartIdx) != SIZE_2Nx2N ));
4753 #if DEBUG_STRING
4754  const Int debugPredModeMask = DebugStringGetPredModeMask(pcCU->getPredictionMode(uiAbsPartIdx));
4755 #endif
4756 
4757  Bool bCheckFull;
4758 
4759  if ( SplitFlag && uiDepth == pcCU->getDepth(uiAbsPartIdx) && ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) ) )
4760  {
4761  bCheckFull = false;
4762  }
4763  else
4764  {
4765  bCheckFull = ( uiLog2TrSize <= pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() );
4766  }
4767 
4768  const Bool bCheckSplit = ( uiLog2TrSize > pcCU->getQuadtreeTULog2MinSizeInCU(uiAbsPartIdx) );
4769 
4770  assert( bCheckFull || bCheckSplit );
4771 
4772  // code full block
4773  Double dSingleCost = MAX_DOUBLE;
4774  UInt uiSingleBits = 0;
4775  Distortion uiSingleDistComp [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4776  Distortion uiSingleDist = 0;
4777  TCoeff uiAbsSum [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4778  UInt uiBestTransformMode [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4779  // Stores the best explicit RDPCM mode for a TU encoded without split
4780  UInt bestExplicitRdpcmModeUnSplit[MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{3,3}, {3,3}, {3,3}};
4781  SChar bestCrossCPredictionAlpha [MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = {{0,0},{0,0},{0,0}};
4782 
4784 
4785  if( bCheckFull )
4786  {
4787  Double minCost[MAX_NUM_COMPONENT][2/*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/];
4788  Bool checkTransformSkip[MAX_NUM_COMPONENT];
4789  pcCU->setTrIdxSubParts( uiTrMode, uiAbsPartIdx, uiDepth );
4790 
4792 
4793  memset( m_pTempPel, 0, sizeof( Pel ) * rTu.getRect(COMPONENT_Y).width * rTu.getRect(COMPONENT_Y).height ); // not necessary needed for inside of recursion (only at the beginning)
4794 
4795  const UInt uiQTTempAccessLayer = pcCU->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - uiLog2TrSize;
4796  TCoeff *pcCoeffCurr[MAX_NUM_COMPONENT];
4797 #if ADAPTIVE_QP_SELECTION
4798  TCoeff *pcArlCoeffCurr[MAX_NUM_COMPONENT];
4799 #endif
4800 
4801  for(UInt i=0; i<numValidComp; i++)
4802  {
4803  minCost[i][0] = MAX_DOUBLE;
4804  minCost[i][1] = MAX_DOUBLE;
4805  }
4806 
4807  Pel crossCPredictedResidualBuffer[ MAX_TU_SIZE * MAX_TU_SIZE ];
4808 
4809  for(UInt i=0; i<numValidComp; i++)
4810  {
4811  checkTransformSkip[i]=false;
4812  const ComponentID compID=ComponentID(i);
4813  const Int channelBitDepth=pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
4814  pcCoeffCurr[compID] = m_ppcQTTempCoeff[compID][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID);
4815 #if ADAPTIVE_QP_SELECTION
4816  pcArlCoeffCurr[compID] = m_ppcQTTempArlCoeff[compID ][uiQTTempAccessLayer] + rTu.getCoefficientOffset(compID);
4817 #endif
4818 
4819  if(rTu.ProcessComponentSection(compID))
4820  {
4821  const QpParam cQP(*pcCU, compID);
4822 
4823  checkTransformSkip[compID] = pcCU->getSlice()->getPPS()->getUseTransformSkip() &&
4825  (!pcCU->isLosslessCoded(0));
4826 
4827  const Bool splitIntoSubTUs = rTu.getRect(compID).width != rTu.getRect(compID).height;
4828 
4829  TComTURecurse TUIterator(rTu, false, (splitIntoSubTUs ? TComTU::VERTICAL_SPLIT : TComTU::DONT_SPLIT), true, compID);
4830 
4831  const UInt partIdxesPerSubTU = TUIterator.GetAbsPartIdxNumParts(compID);
4832 
4833  do
4834  {
4835  const UInt subTUIndex = TUIterator.GetSectionNumber();
4836  const UInt subTUAbsPartIdx = TUIterator.GetAbsPartIdxTU(compID);
4837  const TComRectangle &tuCompRect = TUIterator.getRect(compID);
4838  const UInt subTUBufferOffset = tuCompRect.width * tuCompRect.height * subTUIndex;
4839 
4840  TCoeff *currentCoefficients = pcCoeffCurr[compID] + subTUBufferOffset;
4841 #if ADAPTIVE_QP_SELECTION
4842  TCoeff *currentARLCoefficients = pcArlCoeffCurr[compID] + subTUBufferOffset;
4843 #endif
4844  const Bool isCrossCPredictionAvailable = isChroma(compID)
4846  && (pcCU->getCbf(subTUAbsPartIdx, COMPONENT_Y, uiTrMode) != 0);
4847 
4848  SChar preCalcAlpha = 0;
4849  const Pel *pLumaResi = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( COMPONENT_Y, rTu.getRect( COMPONENT_Y ).x0, rTu.getRect( COMPONENT_Y ).y0 );
4850 
4851  if (isCrossCPredictionAvailable)
4852  {
4853  const Bool bUseReconstructedResidualForEstimate = m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate();
4854  const Pel *const lumaResidualForEstimate = bUseReconstructedResidualForEstimate ? pLumaResi : pcResi->getAddrPix(COMPONENT_Y, tuCompRect.x0, tuCompRect.y0);
4855  const UInt lumaResidualStrideForEstimate = bUseReconstructedResidualForEstimate ? m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y) : pcResi->getStride(COMPONENT_Y);
4856 
4857  preCalcAlpha = xCalcCrossComponentPredictionAlpha(TUIterator,
4858  compID,
4859  lumaResidualForEstimate,
4860  pcResi->getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4861  tuCompRect.width,
4862  tuCompRect.height,
4863  lumaResidualStrideForEstimate,
4864  pcResi->getStride(compID));
4865  }
4866 
4867  const Int transformSkipModesToTest = checkTransformSkip[compID] ? 2 : 1;
4868  const Int crossCPredictionModesToTest = (preCalcAlpha != 0) ? 2 : 1; // preCalcAlpha cannot be anything other than 0 if isCrossCPredictionAvailable is false
4869 
4870  const Bool isOneMode = (crossCPredictionModesToTest == 1) && (transformSkipModesToTest == 1);
4871 
4872  for (Int transformSkipModeId = 0; transformSkipModeId < transformSkipModesToTest; transformSkipModeId++)
4873  {
4874  pcCU->setTransformSkipPartRange(transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU);
4875 
4876  for (Int crossCPredictionModeId = 0; crossCPredictionModeId < crossCPredictionModesToTest; crossCPredictionModeId++)
4877  {
4878  const Bool isFirstMode = (transformSkipModeId == 0) && (crossCPredictionModeId == 0);
4879  const Bool bUseCrossCPrediction = crossCPredictionModeId != 0;
4880 
4881  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
4883 
4884  pcCU->setTransformSkipPartRange(transformSkipModeId, compID, subTUAbsPartIdx, partIdxesPerSubTU);
4885  pcCU->setCrossComponentPredictionAlphaPartRange((bUseCrossCPrediction ? preCalcAlpha : 0), compID, subTUAbsPartIdx, partIdxesPerSubTU );
4886 
4887  if ((compID != COMPONENT_Cr) && ((transformSkipModeId == 1) ? m_pcEncCfg->getUseRDOQTS() : m_pcEncCfg->getUseRDOQ()))
4888  {
4889  COEFF_SCAN_TYPE scanType = COEFF_SCAN_TYPE(pcCU->getCoefScanIdx(uiAbsPartIdx, tuCompRect.width, tuCompRect.height, compID));
4890  m_pcEntropyCoder->estimateBit(m_pcTrQuant->m_pcEstBitsSbac, tuCompRect.width, tuCompRect.height, toChannelType(compID), scanType);
4891  }
4892 
4893 #if RDOQ_CHROMA_LAMBDA
4894  m_pcTrQuant->selectLambda(compID);
4895 #endif
4896 
4897  Pel *pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0);
4898  UInt resiStride = m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID);
4899 
4900  TCoeff bestCoeffComp [MAX_TU_SIZE*MAX_TU_SIZE];
4901  Pel bestResiComp [MAX_TU_SIZE*MAX_TU_SIZE];
4902 
4903 #if ADAPTIVE_QP_SELECTION
4904  TCoeff bestArlCoeffComp[MAX_TU_SIZE*MAX_TU_SIZE];
4905 #endif
4906  TCoeff currAbsSum = 0;
4907  UInt currCompBits = 0;
4908  Distortion currCompDist = 0;
4909  Double currCompCost = 0;
4910  UInt nonCoeffBits = 0;
4911  Distortion nonCoeffDist = 0;
4912  Double nonCoeffCost = 0;
4913 
4914  if(!isOneMode && !isFirstMode)
4915  {
4916  memcpy(bestCoeffComp, currentCoefficients, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4917 #if ADAPTIVE_QP_SELECTION
4918  memcpy(bestArlCoeffComp, currentARLCoefficients, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
4919 #endif
4920  for(Int y = 0; y < tuCompRect.height; y++)
4921  {
4922  memcpy(&bestResiComp[y * tuCompRect.width], (pcResiCurrComp + (y * resiStride)), (sizeof(Pel) * tuCompRect.width));
4923  }
4924  }
4925 
4926  if (bUseCrossCPrediction)
4927  {
4929  compID,
4930  pLumaResi,
4931  pcResi->getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4932  crossCPredictedResidualBuffer,
4933  tuCompRect.width,
4934  tuCompRect.height,
4935  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
4936  pcResi->getStride(compID),
4937  tuCompRect.width,
4938  false);
4939 
4940  m_pcTrQuant->transformNxN(TUIterator, compID, crossCPredictedResidualBuffer, tuCompRect.width, currentCoefficients,
4942  currentARLCoefficients,
4943 #endif
4944  currAbsSum, cQP);
4945  }
4946  else
4947  {
4948  m_pcTrQuant->transformNxN(TUIterator, compID, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ), pcResi->getStride(compID), currentCoefficients,
4949 #if ADAPTIVE_QP_SELECTION
4950  currentARLCoefficients,
4951 #endif
4952  currAbsSum, cQP);
4953  }
4954 
4955  if(isFirstMode || (currAbsSum == 0))
4956  {
4957  if (bUseCrossCPrediction)
4958  {
4960  compID,
4961  pLumaResi,
4962  m_pTempPel,
4963  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
4964  tuCompRect.width,
4965  tuCompRect.height,
4966  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
4967  tuCompRect.width,
4968  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID),
4969  true);
4970 
4971  nonCoeffDist = m_pcRdCost->getDistPart( channelBitDepth, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4972  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride( compID ), pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4973  pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); // initialized with zero residual distortion
4974  }
4975  else
4976  {
4977  nonCoeffDist = m_pcRdCost->getDistPart( channelBitDepth, m_pTempPel, tuCompRect.width, pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
4978  pcResi->getStride(compID), tuCompRect.width, tuCompRect.height, compID); // initialized with zero residual distortion
4979  }
4980 
4981  m_pcEntropyCoder->encodeQtCbfZero( TUIterator, toChannelType(compID) );
4982 
4983  if ( isCrossCPredictionAvailable )
4984  {
4985  m_pcEntropyCoder->encodeCrossComponentPrediction( TUIterator, compID );
4986  }
4987 
4988  nonCoeffBits = m_pcEntropyCoder->getNumberOfWrittenBits();
4989  nonCoeffCost = m_pcRdCost->calcRdCost( nonCoeffBits, nonCoeffDist );
4990  }
4991 
4992  if((puiZeroDist != NULL) && isFirstMode)
4993  {
4994  *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion
4995  }
4996 
4997  DEBUG_STRING_NEW(sSingleStringTest)
4998 
4999  if( currAbsSum > 0 ) //if non-zero coefficients are present, a residual needs to be derived for further prediction
5000  {
5001  if (isFirstMode)
5002  {
5003  m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[ uiDepth ][ CI_QT_TRAFO_ROOT ] );
5005  }
5006 
5007  m_pcEntropyCoder->encodeQtCbf( TUIterator, compID, true );
5008 
5009  if (isCrossCPredictionAvailable)
5010  {
5011  m_pcEntropyCoder->encodeCrossComponentPrediction( TUIterator, compID );
5012  }
5013 
5014  m_pcEntropyCoder->encodeCoeffNxN( TUIterator, currentCoefficients, compID );
5015  currCompBits = m_pcEntropyCoder->getNumberOfWrittenBits();
5016 
5017  pcResiCurrComp = m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 );
5018 
5019  m_pcTrQuant->invTransformNxN( TUIterator, compID, pcResiCurrComp, m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID), currentCoefficients, cQP DEBUG_STRING_PASS_INTO_OPTIONAL(&sSingleStringTest, (DebugOptionList::DebugString_InvTran.getInt()&debugPredModeMask)) );
5020 
5021  if (bUseCrossCPrediction)
5022  {
5024  compID,
5025  pLumaResi,
5026  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
5027  m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix(compID, tuCompRect.x0, tuCompRect.y0),
5028  tuCompRect.width,
5029  tuCompRect.height,
5030  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(COMPONENT_Y),
5031  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID ),
5032  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID ),
5033  true);
5034  }
5035 
5036  currCompDist = m_pcRdCost->getDistPart( channelBitDepth, m_pcQTTempTComYuv[uiQTTempAccessLayer].getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
5037  m_pcQTTempTComYuv[uiQTTempAccessLayer].getStride(compID),
5038  pcResi->getAddrPix( compID, tuCompRect.x0, tuCompRect.y0 ),
5039  pcResi->getStride(compID),
5040  tuCompRect.width, tuCompRect.height, compID);
5041 
5042  currCompCost = m_pcRdCost->calcRdCost(currCompBits, currCompDist);
5043 
5044  if (pcCU->isLosslessCoded(0))
5045  {
5046  nonCoeffCost = MAX_DOUBLE;
5047  }
5048  }
5049  else if ((transformSkipModeId == 1) && !bUseCrossCPrediction)
5050  {
5051  currCompCost = MAX_DOUBLE;
5052  }
5053  else
5054  {
5055  currCompBits = nonCoeffBits;
5056  currCompDist = nonCoeffDist;
5057  currCompCost = nonCoeffCost;
5058  }
5059 
5060  // evaluate
5061  if ((currCompCost < minCost[compID][subTUIndex]) || ((transformSkipModeId == 1) && (currCompCost == minCost[compID][subTUIndex])))
5062  {
5063  bestExplicitRdpcmModeUnSplit[compID][subTUIndex] = pcCU->getExplicitRdpcmMode(compID, subTUAbsPartIdx);
5064 
5065  if(isFirstMode) //check for forced null
5066  {
5067  if((nonCoeffCost < currCompCost) || (currAbsSum == 0))
5068  {
5069  memset(currentCoefficients, 0, (sizeof(TCoeff) * tuCompRect.width * tuCompRect.height));
5070 
5071  currAbsSum = 0;
5072  currCompBits = nonCoeffBits;
5073  currCompDist = nonCoeffDist;
5074  currCompCost = nonCoeffCost;
5075  }
5076  }
5077 
5078 #if DEBUG_STRING
5079  if (currAbsSum > 0)
5080  {
5081  DEBUG_STRING_SWAP(sSingleStringComp[compID], sSingleStringTest)
5082  }
5083  else
5084  {
5085  sSingleStringComp[compID].clear();
5086  }
5087 #endif
5088 
5089  uiAbsSum [compID][subTUIndex] = currAbsSum;
5090  uiSingleDistComp [compID][subTUIndex] = currCompDist;
5091  minCost [compID][subTUIndex] = currCompCost;
5092  uiBestTransformMode [compID][subTUIndex] = transformSkipModeId;
5093  bestCrossCPredictionAlpha[compID][subTUIndex] = (crossCPredictionModeId == 1) ? pcCU->