Context navigation

source: SHVCSoftware/branches/SHM-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 1245

Visit:

Last change on this file since 1245 was 1240, checked in by seregin, 10 years ago
port rev 4228
Property svn:eol-style set to `native`
File size: 123.8 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2014, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <limits>
41	#include <memory.h>
42	#include "TComTrQuant.h"
43	#include "TComPic.h"
44	#include "ContextTables.h"
45	#include "TComTU.h"
46	#include "Debug.h"
47
48	typedef struct
49	{
50	Int iNNZbeforePos0;
51	Double d64CodedLevelandDist; // distortion and level cost only
52	Double d64UncodedDist; // all zero coded block distortion
53	Double d64SigCost;
54	Double d64SigCost_0;
55	} coeffGroupRDStats;
56
57	//! \ingroup TLibCommon
58	//! \{
59
60	// ====================================================================================================================
61	// Constants
62	// ====================================================================================================================
63
64	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
65
66
67	// ====================================================================================================================
68	// QpParam constructor
69	// ====================================================================================================================
70
71	QpParam::QpParam(const Int qpy,
72	const ChannelType chType,
73	const Int qpBdOffset,
74	const Int chromaQPOffset,
75	const ChromaFormat chFmt )
76	{
77	Int baseQp;
78
79	if(isLuma(chType))
80	{
81	baseQp = qpy + qpBdOffset;
82	}
83	else
84	{
85	baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
86
87	if(baseQp < 0)
88	{
89	baseQp = baseQp + qpBdOffset;
90	}
91	else
92	{
93	baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
94	}
95	}
96
97	Qp =baseQp;
98	per=baseQp/6;
99	rem=baseQp%6;
100	}
101
102	QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
103	{
104	Int chromaQpOffset = 0;
105
106	if (isChroma(compID))
107	{
108	chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
109	chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
110
111	chromaQpOffset += cu.getSlice()->getPPS()->getChromaQpAdjTableAt(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
112	}
113
114	#if SVC_EXTENSION
115	TComSlice* slice = const_cast<TComSlice*> (cu.getSlice());
116	#endif
117
118	*this = QpParam(cu.getQP( 0 ),
119	toChannelType(compID),
120	#if SVC_EXTENSION
121	isLuma(compID) ? slice->getQpBDOffsetY() : slice->getQpBDOffsetC(),
122	#else
123	cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
124	#endif
125	chromaQpOffset,
126	cu.getPic()->getChromaFormat());
127	}
128
129
130	// ====================================================================================================================
131	// TComTrQuant class member functions
132	// ====================================================================================================================
133
134	TComTrQuant::TComTrQuant()
135	{
136	// allocate temporary buffers
137	m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
138
139	// allocate bit estimation class (for RDOQ)
140	m_pcEstBitsSbac = new estBitsSbacStruct;
141	initScalingList();
142	}
143
144	TComTrQuant::~TComTrQuant()
145	{
146	// delete temporary buffers
147	if ( m_plTempCoeff )
148	{
149	delete [] m_plTempCoeff;
150	m_plTempCoeff = NULL;
151	}
152
153	// delete bit estimation class
154	if ( m_pcEstBitsSbac )
155	{
156	delete m_pcEstBitsSbac;
157	}
158	destroyScalingList();
159	}
160
161	#if ADAPTIVE_QP_SELECTION
162	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
163	{
164	// NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
165
166	Int qpBase = pcSlice->getSliceQpBase();
167	Int sliceQpused = pcSlice->getSliceQp();
168	Int sliceQpnext;
169	Double alpha = qpBase < 17 ? 0.5 : 1;
170
171	Int cnt=0;
172	for(Int u=1; u<=LEVEL_RANGE; u++)
173	{
174	cnt += m_sliceNsamples[u] ;
175	}
176
177	if( !m_useRDOQ )
178	{
179	sliceQpused = qpBase;
180	alpha = 0.5;
181	}
182
183	if( cnt > 120 )
184	{
185	Double sum = 0;
186	Int k = 0;
187	for(Int u=1; u<LEVEL_RANGE; u++)
188	{
189	sum += u*m_sliceSumC[u];
190	k += uum_sliceNsamples[u];
191	}
192
193	Int v;
194	Double q[MAX_QP+1] ;
195	for(v=0; v<=MAX_QP; v++)
196	{
197	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
198	}
199
200	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
201
202	for(v=0; v<MAX_QP; v++)
203	{
204	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
205	{
206	break;
207	}
208	}
209	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
210	}
211	else
212	{
213	sliceQpnext = sliceQpused;
214	}
215
216	m_qpDelta[qpBase] = sliceQpnext - qpBase;
217	}
218
219	Void TComTrQuant::initSliceQpDelta()
220	{
221	for(Int qp=0; qp<=MAX_QP; qp++)
222	{
223	m_qpDelta[qp] = qp < 17 ? 0 : 1;
224	}
225	}
226
227	Void TComTrQuant::clearSliceARLCnt()
228	{
229	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
230	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
231	}
232	#endif
233
234
235
236	#if MATRIX_MULT
237	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
238	* \param block pointer to input data (residual)
239	* \param coeff pointer to output data (transform coefficients)
240	* \param uiStride stride of input data
241	* \param uiTrSize transform size (uiTrSize x uiTrSize)
242	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
243	*/
244	Void xTr(Int bitDepth, Pel block, TCoeff coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
245	{
246	UInt i,j,k;
247	TCoeff iSum;
248	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
249	const TMatrixCoeff *iT;
250	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
251
252	if (uiTrSize==4)
253	{
254	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
255	}
256	else if (uiTrSize==8)
257	{
258	iT = g_aiT8[TRANSFORM_FORWARD][0];
259	}
260	else if (uiTrSize==16)
261	{
262	iT = g_aiT16[TRANSFORM_FORWARD][0];
263	}
264	else if (uiTrSize==32)
265	{
266	iT = g_aiT32[TRANSFORM_FORWARD][0];
267	}
268	else
269	{
270	assert(0);
271	}
272
273	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
274
275	const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
276	const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
277	const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
278	const Int add_2nd = 1<<(shift_2nd-1);
279
280	/* Horizontal transform */
281
282	for (i=0; i<uiTrSize; i++)
283	{
284	for (j=0; j<uiTrSize; j++)
285	{
286	iSum = 0;
287	for (k=0; k<uiTrSize; k++)
288	{
289	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
290	}
291	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
292	}
293	}
294
295	/* Vertical transform */
296	for (i=0; i<uiTrSize; i++)
297	{
298	for (j=0; j<uiTrSize; j++)
299	{
300	iSum = 0;
301	for (k=0; k<uiTrSize; k++)
302	{
303	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
304	}
305	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
306	}
307	}
308	}
309
310	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
311	* \param coeff pointer to input data (transform coefficients)
312	* \param block pointer to output data (residual)
313	* \param uiStride stride of output data
314	* \param uiTrSize transform size (uiTrSize x uiTrSize)
315	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
316	*/
317	Void xITr(Int bitDepth, TCoeff coeff, Pel block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
318	{
319	UInt i,j,k;
320	TCoeff iSum;
321	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
322	const TMatrixCoeff *iT;
323
324	if (uiTrSize==4)
325	{
326	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
327	}
328	else if (uiTrSize==8)
329	{
330	iT = g_aiT8[TRANSFORM_INVERSE][0];
331	}
332	else if (uiTrSize==16)
333	{
334	iT = g_aiT16[TRANSFORM_INVERSE][0];
335	}
336	else if (uiTrSize==32)
337	{
338	iT = g_aiT32[TRANSFORM_INVERSE][0];
339	}
340	else
341	{
342	assert(0);
343	}
344
345	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
346
347	const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
348	const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
349	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
350	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
351	assert(shift_2nd>=0);
352	const Int add_1st = 1<<(shift_1st-1);
353	const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
354
355	/* Horizontal transform */
356	for (i=0; i<uiTrSize; i++)
357	{
358	for (j=0; j<uiTrSize; j++)
359	{
360	iSum = 0;
361	for (k=0; k<uiTrSize; k++)
362	{
363	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
364	}
365
366	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
367	tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
368	}
369	}
370
371	/* Vertical transform */
372	for (i=0; i<uiTrSize; i++)
373	{
374	for (j=0; j<uiTrSize; j++)
375	{
376	iSum = 0;
377	for (k=0; k<uiTrSize; k++)
378	{
379	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
380	}
381
382	block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
383	}
384	}
385	}
386
387	#endif //MATRIX_MULT
388
389
390	/** 4x4 forward transform implemented using partial butterfly structure (1D)
391	* \param src input data (residual)
392	* \param dst output data (transform coefficients)
393	* \param shift specifies right shift after 1D transform
394	*/
395	Void partialButterfly4(TCoeff src, TCoeff dst, Int shift, Int line)
396	{
397	Int j;
398	TCoeff E[2],O[2];
399	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
400
401	for (j=0; j<line; j++)
402	{
403	/* E and O */
404	E[0] = src[0] + src[3];
405	O[0] = src[0] - src[3];
406	E[1] = src[1] + src[2];
407	O[1] = src[1] - src[2];
408
409	dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]E[1] + add)>>shift;
410	dst[2line] = (g_aiT4[TRANSFORM_FORWARD][2][0]E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
411	dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]O[1] + add)>>shift;
412	dst[3line] = (g_aiT4[TRANSFORM_FORWARD][3][0]O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
413
414	src += 4;
415	dst ++;
416	}
417	}
418
419	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
420	// give identical results
421	Void fastForwardDst(TCoeff block, TCoeff coeff, Int shift) // input block, output coeff
422	{
423	Int i;
424	TCoeff c[4];
425	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
426	for (i=0; i<4; i++)
427	{
428	// Intermediate Variables
429	c[0] = block[4*i+0];
430	c[1] = block[4*i+1];
431	c[2] = block[4*i+2];
432	c[3] = block[4*i+3];
433
434	for (Int row = 0; row < 4; row++)
435	{
436	TCoeff result = 0;
437	for (Int column = 0; column < 4; column++)
438	result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
439
440	coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
441	}
442	}
443	}
444
445	Void fastInverseDst(TCoeff tmp, TCoeff block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block
446	{
447	Int i;
448	TCoeff c[4];
449	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
450	for (i=0; i<4; i++)
451	{
452	// Intermediate Variables
453	c[0] = tmp[ i];
454	c[1] = tmp[4 +i];
455	c[2] = tmp[8 +i];
456	c[3] = tmp[12+i];
457
458	for (Int column = 0; column < 4; column++)
459	{
460	TCoeff &result = block[(i * 4) + column];
461
462	result = 0;
463	for (Int row = 0; row < 4; row++)
464	result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
465
466	result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
467	}
468	}
469	}
470
471	/** 4x4 inverse transform implemented using partial butterfly structure (1D)
472	* \param src input data (transform coefficients)
473	* \param dst output data (residual)
474	* \param shift specifies right shift after 1D transform
475	*/
476	Void partialButterflyInverse4(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
477	{
478	Int j;
479	TCoeff E[2],O[2];
480	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
481
482	for (j=0; j<line; j++)
483	{
484	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
485	O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]src[3*line];
486	O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]src[3*line];
487	E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]src[2*line];
488	E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]src[2*line];
489
490	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
491	dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
492	dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
493	dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
494	dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
495
496	src ++;
497	dst += 4;
498	}
499	}
500
501	/** 8x8 forward transform implemented using partial butterfly structure (1D)
502	* \param src input data (residual)
503	* \param dst output data (transform coefficients)
504	* \param shift specifies right shift after 1D transform
505	*/
506	Void partialButterfly8(TCoeff src, TCoeff dst, Int shift, Int line)
507	{
508	Int j,k;
509	TCoeff E[4],O[4];
510	TCoeff EE[2],EO[2];
511	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
512
513	for (j=0; j<line; j++)
514	{
515	/* E and O*/
516	for (k=0;k<4;k++)
517	{
518	E[k] = src[k] + src[7-k];
519	O[k] = src[k] - src[7-k];
520	}
521	/* EE and EO */
522	EE[0] = E[0] + E[3];
523	EO[0] = E[0] - E[3];
524	EE[1] = E[1] + E[2];
525	EO[1] = E[1] - E[2];
526
527	dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]EE[1] + add)>>shift;
528	dst[4line] = (g_aiT8[TRANSFORM_FORWARD][4][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
529	dst[2line] = (g_aiT8[TRANSFORM_FORWARD][2][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
530	dst[6line] = (g_aiT8[TRANSFORM_FORWARD][6][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
531
532	dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]O[3] + add)>>shift;
533	dst[3line] = (g_aiT8[TRANSFORM_FORWARD][3][0]O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
534	dst[5line] = (g_aiT8[TRANSFORM_FORWARD][5][0]O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
535	dst[7line] = (g_aiT8[TRANSFORM_FORWARD][7][0]O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
536
537	src += 8;
538	dst ++;
539	}
540	}
541
542	/** 8x8 inverse transform implemented using partial butterfly structure (1D)
543	* \param src input data (transform coefficients)
544	* \param dst output data (residual)
545	* \param shift specifies right shift after 1D transform
546	*/
547	Void partialButterflyInverse8(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
548	{
549	Int j,k;
550	TCoeff E[4],O[4];
551	TCoeff EE[2],EO[2];
552	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
553
554	for (j=0; j<line; j++)
555	{
556	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
557	for (k=0;k<4;k++)
558	{
559	O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]src[3*line] +
560	g_aiT8[TRANSFORM_INVERSE][ 5][k]src[5line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]src[7line];
561	}
562
563	EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][0]src[ 6line ];
564	EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][1]src[ 6line ];
565	EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]src[ 4*line ];
566	EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]src[ 4*line ];
567
568	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
569	E[0] = EE[0] + EO[0];
570	E[3] = EE[0] - EO[0];
571	E[1] = EE[1] + EO[1];
572	E[2] = EE[1] - EO[1];
573	for (k=0;k<4;k++)
574	{
575	dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
576	dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
577	}
578	src ++;
579	dst += 8;
580	}
581	}
582
583	/** 16x16 forward transform implemented using partial butterfly structure (1D)
584	* \param src input data (residual)
585	* \param dst output data (transform coefficients)
586	* \param shift specifies right shift after 1D transform
587	*/
588	Void partialButterfly16(TCoeff src, TCoeff dst, Int shift, Int line)
589	{
590	Int j,k;
591	TCoeff E[8],O[8];
592	TCoeff EE[4],EO[4];
593	TCoeff EEE[2],EEO[2];
594	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
595
596	for (j=0; j<line; j++)
597	{
598	/* E and O*/
599	for (k=0;k<8;k++)
600	{
601	E[k] = src[k] + src[15-k];
602	O[k] = src[k] - src[15-k];
603	}
604	/* EE and EO */
605	for (k=0;k<4;k++)
606	{
607	EE[k] = E[k] + E[7-k];
608	EO[k] = E[k] - E[7-k];
609	}
610	/* EEE and EEO */
611	EEE[0] = EE[0] + EE[3];
612	EEO[0] = EE[0] - EE[3];
613	EEE[1] = EE[1] + EE[2];
614	EEO[1] = EE[1] - EE[2];
615
616	dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]EEE[1] + add)>>shift;
617	dst[ 8line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
618	dst[ 4line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
619	dst[ 12line] = (g_aiT16[TRANSFORM_FORWARD][12][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
620
621	for (k=2;k<16;k+=4)
622	{
623	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
624	g_aiT16[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]EO[3] + add)>>shift;
625	}
626
627	for (k=1;k<16;k+=2)
628	{
629	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
630	g_aiT16[TRANSFORM_FORWARD][k][2]O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]O[3] +
631	g_aiT16[TRANSFORM_FORWARD][k][4]O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]O[5] +
632	g_aiT16[TRANSFORM_FORWARD][k][6]O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]O[7] + add)>>shift;
633	}
634
635	src += 16;
636	dst ++;
637
638	}
639	}
640
641	/** 16x16 inverse transform implemented using partial butterfly structure (1D)
642	* \param src input data (transform coefficients)
643	* \param dst output data (residual)
644	* \param shift specifies right shift after 1D transform
645	*/
646	Void partialButterflyInverse16(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
647	{
648	Int j,k;
649	TCoeff E[8],O[8];
650	TCoeff EE[4],EO[4];
651	TCoeff EEE[2],EEO[2];
652	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
653
654	for (j=0; j<line; j++)
655	{
656	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
657	for (k=0;k<8;k++)
658	{
659	O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]src[ 3*line] +
660	g_aiT16[TRANSFORM_INVERSE][ 5][k]src[ 5line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]src[ 7line] +
661	g_aiT16[TRANSFORM_INVERSE][ 9][k]src[ 9line] + g_aiT16[TRANSFORM_INVERSE][11][k]src[11line] +
662	g_aiT16[TRANSFORM_INVERSE][13][k]src[13line] + g_aiT16[TRANSFORM_INVERSE][15][k]src[15line];
663	}
664	for (k=0;k<4;k++)
665	{
666	EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]src[ 2line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]src[ 6line] +
667	g_aiT16[TRANSFORM_INVERSE][10][k]src[10line] + g_aiT16[TRANSFORM_INVERSE][14][k]src[14line];
668	}
669	EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][0]src[ 12line ];
670	EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]src[ 8*line ];
671	EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][1]src[ 12line ];
672	EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]src[ 8*line ];
673
674	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
675	for (k=0;k<2;k++)
676	{
677	EE[k] = EEE[k] + EEO[k];
678	EE[k+2] = EEE[1-k] - EEO[1-k];
679	}
680	for (k=0;k<4;k++)
681	{
682	E[k] = EE[k] + EO[k];
683	E[k+4] = EE[3-k] - EO[3-k];
684	}
685	for (k=0;k<8;k++)
686	{
687	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
688	dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
689	}
690	src ++;
691	dst += 16;
692	}
693	}
694
695	/** 32x32 forward transform implemented using partial butterfly structure (1D)
696	* \param src input data (residual)
697	* \param dst output data (transform coefficients)
698	* \param shift specifies right shift after 1D transform
699	*/
700	Void partialButterfly32(TCoeff src, TCoeff dst, Int shift, Int line)
701	{
702	Int j,k;
703	TCoeff E[16],O[16];
704	TCoeff EE[8],EO[8];
705	TCoeff EEE[4],EEO[4];
706	TCoeff EEEE[2],EEEO[2];
707	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
708
709	for (j=0; j<line; j++)
710	{
711	/* E and O*/
712	for (k=0;k<16;k++)
713	{
714	E[k] = src[k] + src[31-k];
715	O[k] = src[k] - src[31-k];
716	}
717	/* EE and EO */
718	for (k=0;k<8;k++)
719	{
720	EE[k] = E[k] + E[15-k];
721	EO[k] = E[k] - E[15-k];
722	}
723	/* EEE and EEO */
724	for (k=0;k<4;k++)
725	{
726	EEE[k] = EE[k] + EE[7-k];
727	EEO[k] = EE[k] - EE[7-k];
728	}
729	/* EEEE and EEEO */
730	EEEE[0] = EEE[0] + EEE[3];
731	EEEO[0] = EEE[0] - EEE[3];
732	EEEE[1] = EEE[1] + EEE[2];
733	EEEO[1] = EEE[1] - EEE[2];
734
735	dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]EEEE[1] + add)>>shift;
736	dst[ 16line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
737	dst[ 8line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
738	dst[ 24line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
739	for (k=4;k<32;k+=8)
740	{
741	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
742	g_aiT32[TRANSFORM_FORWARD][k][2]EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EEO[3] + add)>>shift;
743	}
744	for (k=2;k<32;k+=4)
745	{
746	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
747	g_aiT32[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EO[3] +
748	g_aiT32[TRANSFORM_FORWARD][k][4]EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]EO[5] +
749	g_aiT32[TRANSFORM_FORWARD][k][6]EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]EO[7] + add)>>shift;
750	}
751	for (k=1;k<32;k+=2)
752	{
753	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
754	g_aiT32[TRANSFORM_FORWARD][k][ 2]O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]O[ 3] +
755	g_aiT32[TRANSFORM_FORWARD][k][ 4]O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]O[ 5] +
756	g_aiT32[TRANSFORM_FORWARD][k][ 6]O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]O[ 7] +
757	g_aiT32[TRANSFORM_FORWARD][k][ 8]O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]O[ 9] +
758	g_aiT32[TRANSFORM_FORWARD][k][10]O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]O[11] +
759	g_aiT32[TRANSFORM_FORWARD][k][12]O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]O[13] +
760	g_aiT32[TRANSFORM_FORWARD][k][14]O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]O[15] + add)>>shift;
761	}
762
763	src += 32;
764	dst ++;
765	}
766	}
767
768	/** 32x32 inverse transform implemented using partial butterfly structure (1D)
769	* \param src input data (transform coefficients)
770	* \param dst output data (residual)
771	* \param shift specifies right shift after 1D transform
772	*/
773	Void partialButterflyInverse32(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
774	{
775	Int j,k;
776	TCoeff E[16],O[16];
777	TCoeff EE[8],EO[8];
778	TCoeff EEE[4],EEO[4];
779	TCoeff EEEE[2],EEEO[2];
780	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
781
782	for (j=0; j<line; j++)
783	{
784	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
785	for (k=0;k<16;k++)
786	{
787	O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]src[ 3*line ] +
788	g_aiT32[TRANSFORM_INVERSE][ 5][k]src[ 5line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]src[ 7line ] +
789	g_aiT32[TRANSFORM_INVERSE][ 9][k]src[ 9line ] + g_aiT32[TRANSFORM_INVERSE][11][k]src[ 11line ] +
790	g_aiT32[TRANSFORM_INVERSE][13][k]src[ 13line ] + g_aiT32[TRANSFORM_INVERSE][15][k]src[ 15line ] +
791	g_aiT32[TRANSFORM_INVERSE][17][k]src[ 17line ] + g_aiT32[TRANSFORM_INVERSE][19][k]src[ 19line ] +
792	g_aiT32[TRANSFORM_INVERSE][21][k]src[ 21line ] + g_aiT32[TRANSFORM_INVERSE][23][k]src[ 23line ] +
793	g_aiT32[TRANSFORM_INVERSE][25][k]src[ 25line ] + g_aiT32[TRANSFORM_INVERSE][27][k]src[ 27line ] +
794	g_aiT32[TRANSFORM_INVERSE][29][k]src[ 29line ] + g_aiT32[TRANSFORM_INVERSE][31][k]src[ 31line ];
795	}
796	for (k=0;k<8;k++)
797	{
798	EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]src[ 2line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]src[ 6line ] +
799	g_aiT32[TRANSFORM_INVERSE][10][k]src[ 10line ] + g_aiT32[TRANSFORM_INVERSE][14][k]src[ 14line ] +
800	g_aiT32[TRANSFORM_INVERSE][18][k]src[ 18line ] + g_aiT32[TRANSFORM_INVERSE][22][k]src[ 22line ] +
801	g_aiT32[TRANSFORM_INVERSE][26][k]src[ 26line ] + g_aiT32[TRANSFORM_INVERSE][30][k]src[ 30line ];
802	}
803	for (k=0;k<4;k++)
804	{
805	EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]src[ 4line ] + g_aiT32[TRANSFORM_INVERSE][12][k]src[ 12line ] +
806	g_aiT32[TRANSFORM_INVERSE][20][k]src[ 20line ] + g_aiT32[TRANSFORM_INVERSE][28][k]src[ 28line ];
807	}
808	EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][0]src[ 24line ];
809	EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][1]src[ 24line ];
810	EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]src[ 16*line ];
811	EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]src[ 16*line ];
812
813	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
814	EEE[0] = EEEE[0] + EEEO[0];
815	EEE[3] = EEEE[0] - EEEO[0];
816	EEE[1] = EEEE[1] + EEEO[1];
817	EEE[2] = EEEE[1] - EEEO[1];
818	for (k=0;k<4;k++)
819	{
820	EE[k] = EEE[k] + EEO[k];
821	EE[k+4] = EEE[3-k] - EEO[3-k];
822	}
823	for (k=0;k<8;k++)
824	{
825	E[k] = EE[k] + EO[k];
826	E[k+8] = EE[7-k] - EO[7-k];
827	}
828	for (k=0;k<16;k++)
829	{
830	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
831	dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
832	}
833	src ++;
834	dst += 32;
835	}
836	}
837
838	/** MxN forward transform (2D)
839	* \param block input data (residual)
840	* \param coeff output data (transform coefficients)
841	* \param iWidth input data (width of transform)
842	* \param iHeight input data (height of transform)
843	*/
844	Void xTrMxN(Int bitDepth, TCoeff block, TCoeff coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
845	{
846	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
847
848	const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
849	const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
850
851	assert(shift_1st >= 0);
852	assert(shift_2nd >= 0);
853
854	TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
855
856	switch (iWidth)
857	{
858	case 4:
859	{
860	if ((iHeight == 4) && useDST) // Check for DCT or DST
861	{
862	fastForwardDst( block, tmp, shift_1st );
863	}
864	else partialButterfly4 ( block, tmp, shift_1st, iHeight );
865	}
866	break;
867
868	case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break;
869	case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break;
870	case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break;
871	default:
872	assert(0); exit (1); break;
873	}
874
875	switch (iHeight)
876	{
877	case 4:
878	{
879	if ((iWidth == 4) && useDST) // Check for DCT or DST
880	{
881	fastForwardDst( tmp, coeff, shift_2nd );
882	}
883	else partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
884	}
885	break;
886
887	case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break;
888	case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break;
889	case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break;
890	default:
891	assert(0); exit (1); break;
892	}
893	}
894
895
896	/** MxN inverse transform (2D)
897	* \param coeff input data (transform coefficients)
898	* \param block output data (residual)
899	* \param iWidth input data (width of transform)
900	* \param iHeight input data (height of transform)
901	*/
902	Void xITrMxN(Int bitDepth, TCoeff coeff, TCoeff block, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
903	{
904	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
905
906	Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
907	Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
908	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
909	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
910
911	assert(shift_1st >= 0);
912	assert(shift_2nd >= 0);
913
914	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
915
916	switch (iHeight)
917	{
918	case 4:
919	{
920	if ((iWidth == 4) && useDST) // Check for DCT or DST
921	{
922	fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
923	}
924	else partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
925	}
926	break;
927
928	case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
929	case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
930	case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
931
932	default:
933	assert(0); exit (1); break;
934	}
935
936	switch (iWidth)
937	{
938	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
939	case 4:
940	{
941	if ((iHeight == 4) && useDST) // Check for DCT or DST
942	{
943	fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
944	}
945	else partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
946	}
947	break;
948
949	case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
950	case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
951	case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
952
953	default:
954	assert(0); exit (1); break;
955	}
956	}
957
958
959	// To minimize the distortion only. No rate is considered.
960	Void TComTrQuant::signBitHidingHDQ( const ComponentID compID, TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters )
961	{
962	const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
963	const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
964	const UInt groupSize = 1 << MLS_CG_SIZE;
965
966	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
967	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
968
969	Int lastCG = -1;
970	Int absSum = 0 ;
971	Int n ;
972
973	for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
974	{
975	Int subPos = subSet << MLS_CG_SIZE;
976	Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
977	absSum = 0 ;
978
979	for(n = groupSize-1; n >= 0; --n )
980	{
981	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
982	{
983	lastNZPosInCG = n;
984	break;
985	}
986	}
987
988	for(n = 0; n <groupSize; n++ )
989	{
990	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
991	{
992	firstNZPosInCG = n;
993	break;
994	}
995	}
996
997	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
998	{
999	absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
1000	}
1001
1002	if(lastNZPosInCG>=0 && lastCG==-1)
1003	{
1004	lastCG = 1 ;
1005	}
1006
1007	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1008	{
1009	UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
1010	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1011	{
1012	TCoeff curCost = std::numeric_limits<TCoeff>::max();
1013	TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
1014	Int minPos =-1, finalChange=0, curChange=0;
1015
1016	for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
1017	{
1018	UInt blkPos = codingParameters.scan[ n+subPos ];
1019	if(pQCoef[ blkPos ] != 0 )
1020	{
1021	if(deltaU[blkPos]>0)
1022	{
1023	curCost = - deltaU[blkPos];
1024	curChange=1 ;
1025	}
1026	else
1027	{
1028	//curChange =-1;
1029	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1030	{
1031	curCost = std::numeric_limits<TCoeff>::max();
1032	}
1033	else
1034	{
1035	curCost = deltaU[blkPos];
1036	curChange =-1;
1037	}
1038	}
1039	}
1040	else
1041	{
1042	if(n<firstNZPosInCG)
1043	{
1044	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1045	if(thisSignBit != signbit )
1046	{
1047	curCost = std::numeric_limits<TCoeff>::max();
1048	}
1049	else
1050	{
1051	curCost = - (deltaU[blkPos]) ;
1052	curChange = 1 ;
1053	}
1054	}
1055	else
1056	{
1057	curCost = - (deltaU[blkPos]) ;
1058	curChange = 1 ;
1059	}
1060	}
1061
1062	if( curCost<minCostInc)
1063	{
1064	minCostInc = curCost ;
1065	finalChange = curChange ;
1066	minPos = blkPos ;
1067	}
1068	} //CG loop
1069
1070	if(pQCoef[minPos] == entropyCodingMaximum \|\| pQCoef[minPos] == entropyCodingMinimum)
1071	{
1072	finalChange = -1;
1073	}
1074
1075	if(pCoef[minPos]>=0)
1076	{
1077	pQCoef[minPos] += finalChange ;
1078	}
1079	else
1080	{
1081	pQCoef[minPos] -= finalChange ;
1082	}
1083	} // Hide
1084	}
1085	if(lastCG==1)
1086	{
1087	lastCG=0 ;
1088	}
1089	} // TU loop
1090
1091	return;
1092	}
1093
1094
1095	Void TComTrQuant::xQuant( TComTU &rTu,
1096	TCoeff * pSrc,
1097	TCoeff * pDes,
1098	#if ADAPTIVE_QP_SELECTION
1099	TCoeff *pArlDes,
1100	#endif
1101	TCoeff &uiAbsSum,
1102	const ComponentID compID,
1103	const QpParam &cQP )
1104	{
1105	const TComRectangle &rect = rTu.getRect(compID);
1106	const UInt uiWidth = rect.width;
1107	const UInt uiHeight = rect.height;
1108	TComDataCU* pcCU = rTu.getCU();
1109	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1110
1111	TCoeff* piCoef = pSrc;
1112	TCoeff* piQCoef = pDes;
1113	#if ADAPTIVE_QP_SELECTION
1114	TCoeff* piArlCCoef = pArlDes;
1115	#endif
1116
1117	const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1118
1119	Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
1120	if ( useRDOQ && (isLuma(compID) \|\| RDOQ_CHROMA) )
1121	{
1122	#if ADAPTIVE_QP_SELECTION
1123	xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
1124	#else
1125	xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
1126	#endif
1127	}
1128	else
1129	{
1130	TUEntropyCodingParameters codingParameters;
1131	getTUEntropyCodingParameters(codingParameters, rTu, compID);
1132
1133	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1134	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1135
1136	TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
1137
1138	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1139
1140	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1141	assert(scalingListType < SCALING_LIST_NUM);
1142	Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1143
1144	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1145	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1146
1147	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1148	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1149	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1150	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1151	*/
1152
1153	// Represents scaling through forward transform
1154	Int iTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1155	if (useTransformSkip && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
1156	{
1157	iTransformShift = std::max<Int>(0, iTransformShift);
1158	}
1159
1160	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1161	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1162
1163	#if ADAPTIVE_QP_SELECTION
1164	Int iQBitsC = MAX_INT;
1165	Int iAddC = MAX_INT;
1166
1167	if (m_bUseAdaptQpSelect)
1168	{
1169	iQBitsC = iQBits - ARL_C_PRECISION;
1170	iAddC = 1 << (iQBitsC-1);
1171	}
1172	#endif
1173
1174	const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1175	const Int qBits8 = iQBits - 8;
1176
1177	for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1178	{
1179	const TCoeff iLevel = piCoef[uiBlockPos];
1180	const TCoeff iSign = (iLevel < 0 ? -1: 1);
1181
1182	const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1183
1184	#if ADAPTIVE_QP_SELECTION
1185	if( m_bUseAdaptQpSelect )
1186	{
1187	piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
1188	}
1189	#endif
1190
1191	const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1192	deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
1193
1194	uiAbsSum += quantisedMagnitude;
1195	const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
1196
1197	piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
1198	} // for n
1199
1200	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1201	{
1202	if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
1203	{
1204	signBitHidingHDQ( compID, piQCoef, piCoef, deltaU, codingParameters ) ;
1205	}
1206	}
1207	} //if RDOQ
1208	//return;
1209	}
1210
1211	Void TComTrQuant::xDeQuant( TComTU &rTu,
1212	const TCoeff * pSrc,
1213	TCoeff * pDes,
1214	const ComponentID compID,
1215	const QpParam &cQP )
1216	{
1217	assert(compID<MAX_NUM_COMPONENT);
1218
1219	TComDataCU *pcCU = rTu.getCU();
1220	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1221	const TComRectangle &rect = rTu.getRect(compID);
1222	const UInt uiWidth = rect.width;
1223	const UInt uiHeight = rect.height;
1224	const TCoeff *const piQCoef = pSrc;
1225	TCoeff *const piCoef = pDes;
1226	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1227	const UInt numSamplesInBlock = uiWidth*uiHeight;
1228	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1229	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1230	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1231	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1232
1233	assert (scalingListType < SCALING_LIST_NUM);
1234	assert ( uiWidth <= m_uiMaxTrSize );
1235
1236	// Represents scaling through forward transform
1237	const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1238	const Int originalTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1239	const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
1240
1241	const Int QP_per = cQP.per;
1242	const Int QP_rem = cQP.rem;
1243
1244	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
1245
1246	if(enableScalingLists)
1247	{
1248	//from the dequantisation equation:
1249	//iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
1250	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift
1251	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
1252	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
1253
1254	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1255	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1256
1257	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
1258
1259	if(rightShift > 0)
1260	{
1261	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1262
1263	for( Int n = 0; n < numSamplesInBlock; n++ )
1264	{
1265	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1266	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
1267
1268	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1269	}
1270	}
1271	else
1272	{
1273	const Int leftShift = -rightShift;
1274
1275	for( Int n = 0; n < numSamplesInBlock; n++ )
1276	{
1277	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1278	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
1279
1280	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1281	}
1282	}
1283	}
1284	else
1285	{
1286	const Int scale = g_invQuantScales[QP_rem];
1287	const Int scaleBits = (IQUANT_SHIFT + 1) ;
1288
1289	//from the dequantisation equation:
1290	//iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
1291	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift
1292	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
1293	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1294	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1295
1296	if (rightShift > 0)
1297	{
1298	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1299
1300	for( Int n = 0; n < numSamplesInBlock; n++ )
1301	{
1302	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1303	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
1304
1305	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1306	}
1307	}
1308	else
1309	{
1310	const Int leftShift = -rightShift;
1311
1312	for( Int n = 0; n < numSamplesInBlock; n++ )
1313	{
1314	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1315	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
1316
1317	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1318	}
1319	}
1320	}
1321	}
1322
1323
1324	Void TComTrQuant::init( UInt uiMaxTrSize,
1325	Bool bUseRDOQ,
1326	Bool bUseRDOQTS,
1327	Bool bEnc,
1328	Bool useTransformSkipFast
1329	#if ADAPTIVE_QP_SELECTION
1330	, Bool bUseAdaptQpSelect
1331	#endif
1332	)
1333	{
1334	m_uiMaxTrSize = uiMaxTrSize;
1335	m_bEnc = bEnc;
1336	m_useRDOQ = bUseRDOQ;
1337	m_useRDOQTS = bUseRDOQTS;
1338	#if ADAPTIVE_QP_SELECTION
1339	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1340	#endif
1341	m_useTransformSkipFast = useTransformSkipFast;
1342	}
1343
1344
1345	Void TComTrQuant::transformNxN( TComTU & rTu,
1346	const ComponentID compID,
1347	Pel * pcResidual,
1348	const UInt uiStride,
1349	TCoeff * rpcCoeff,
1350	#if ADAPTIVE_QP_SELECTION
1351	TCoeff * pcArlCoeff,
1352	#endif
1353	TCoeff & uiAbsSum,
1354	const QpParam & cQP
1355	)
1356	{
1357	const TComRectangle &rect = rTu.getRect(compID);
1358	const UInt uiWidth = rect.width;
1359	const UInt uiHeight = rect.height;
1360	TComDataCU* pcCU = rTu.getCU();
1361	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1362	const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
1363
1364	uiAbsSum=0;
1365
1366	RDPCMMode rdpcmMode = RDPCM_OFF;
1367	rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
1368
1369	if (rdpcmMode == RDPCM_OFF)
1370	{
1371	uiAbsSum = 0;
1372	//transform and quantise
1373	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1374	{
1375	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1376	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1377
1378	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1379	{
1380	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1381	{
1382	const Pel currentSample = pcResidual[(y * uiStride) + x];
1383
1384	rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
1385	uiAbsSum += TCoeff(abs(currentSample));
1386	}
1387	}
1388	}
1389	else
1390	{
1391	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1392	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
1393	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1394	#endif
1395
1396	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1397
1398	if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
1399	{
1400	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
1401	}
1402	else
1403	{
1404	xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1405	}
1406
1407	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1408	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
1409	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1410	#endif
1411
1412	xQuant( rTu, m_plTempCoeff, rpcCoeff,
1413
1414	#if ADAPTIVE_QP_SELECTION
1415	pcArlCoeff,
1416	#endif
1417	uiAbsSum, compID, cQP );
1418
1419	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1420	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
1421	printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
1422	#endif
1423	}
1424	}
1425
1426	//set the CBF
1427	pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1428	}
1429
1430
1431	Void TComTrQuant::invTransformNxN( TComTU &rTu,
1432	const ComponentID compID,
1433	Pel *pcResidual,
1434	const UInt uiStride,
1435	TCoeff * pcCoeff,
1436	const QpParam &cQP
1437	DEBUG_STRING_FN_DECLAREP(psDebug))
1438	{
1439	TComDataCU* pcCU=rTu.getCU();
1440	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1441	const TComRectangle &rect = rTu.getRect(compID);
1442	const UInt uiWidth = rect.width;
1443	const UInt uiHeight = rect.height;
1444
1445	if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
1446	{
1447	//------------------------------------------------
1448
1449	//recurse deeper
1450
1451	TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
1452
1453	do
1454	{
1455	//------------------
1456
1457	const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
1458
1459	Pel subTUResidual = pcResidual + (lineOffset uiStride);
1460	TCoeff subTUCoefficients = pcCoeff + (lineOffset subTURecurse.getRect(compID).width);
1461
1462	invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
1463
1464	//------------------
1465
1466	}
1467	while (subTURecurse.nextSection(rTu));
1468
1469	//------------------------------------------------
1470
1471	return;
1472	}
1473
1474	#if defined DEBUG_STRING
1475	if (psDebug)
1476	{
1477	std::stringstream ss(stringstream::out);
1478	printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
1479	DEBUG_STRING_APPEND((*psDebug), ss.str())
1480	}
1481	#endif
1482
1483	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1484	{
1485	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1486	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1487
1488	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1489	{
1490	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1491	{
1492	pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
1493	}
1494	}
1495	}
1496	else
1497	{
1498	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1499	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
1500	printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
1501	#endif
1502
1503	xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
1504
1505	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1506	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
1507	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1508	#endif
1509
1510	#if defined DEBUG_STRING
1511	if (psDebug)
1512	{
1513	std::stringstream ss(stringstream::out);
1514	printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1515	(*psDebug)+=ss.str();
1516	}
1517	#endif
1518
1519	if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
1520	{
1521	xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
1522
1523	#if defined DEBUG_STRING
1524	if (psDebug)
1525	{
1526	std::stringstream ss(stringstream::out);
1527	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1528	(*psDebug)+=ss.str();
1529	(*psDebug)+="(<- was a Transform-skipped block)\n";
1530	}
1531	#endif
1532	}
1533	else
1534	{
1535	xIT( compID, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight );
1536
1537	#if defined DEBUG_STRING
1538	if (psDebug)
1539	{
1540	std::stringstream ss(stringstream::out);
1541	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1542	(*psDebug)+=ss.str();
1543	(*psDebug)+="(<- was a Transformed block)\n";
1544	}
1545	#endif
1546	}
1547
1548	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1549	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
1550	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1551	g_debugCounter++;
1552	#endif
1553	}
1554
1555	invRdpcmNxN( rTu, compID, pcResidual, uiStride );
1556	}
1557
1558	Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
1559	TComYuv *pResidual,
1560	TComTU &rTu)
1561	{
1562	if (!rTu.ProcessComponentSection(compID)) return;
1563
1564	TComDataCU* pcCU = rTu.getCU();
1565	UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
1566	UInt uiTrMode=rTu.GetTransformDepthRel();
1567	if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) \|\| !pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) )
1568	{
1569	return;
1570	}
1571
1572	if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
1573	{
1574	const TComRectangle &tuRect = rTu.getRect(compID);
1575	const Int uiStride = pResidual->getStride( compID );
1576	Pel *rpcResidual = pResidual->getAddr( compID );
1577	UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0);
1578	Pel *pResi = rpcResidual + uiAddr;
1579	TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
1580
1581	const QpParam cQP(*pcCU, compID);
1582
1583	if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
1584	{
1585	DEBUG_STRING_NEW(sTemp)
1586	#ifdef DEBUG_STRING
1587	std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
1588	#endif
1589
1590	invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
1591
1592	#ifdef DEBUG_STRING
1593	if (psDebug != 0)
1594	std::cout << (*psDebug);
1595	#endif
1596	}
1597
1598	if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
1599	{
1600	const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
1601	const Int strideLuma = pResidual->getStride( COMPONENT_Y );
1602	const Int tuWidth = rTu.getRect( compID ).width;
1603	const Int tuHeight = rTu.getRect( compID ).height;
1604
1605	if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
1606	{
1607	pResi = rpcResidual + uiAddr;
1608	const Pel *pResiLuma = piResiLuma + uiAddr;
1609
1610	crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
1611	}
1612	}
1613	}
1614	else
1615	{
1616	TComTURecurse tuRecurseChild(rTu, false);
1617	do
1618	{
1619	invRecurTransformNxN( compID, pResidual, tuRecurseChild );
1620	}
1621	while (tuRecurseChild.nextSection(rTu));
1622	}
1623	}
1624
1625	Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
1626	{
1627	TComDataCU *pcCU=rTu.getCU();
1628	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1629
1630	const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx );
1631	const UInt uiWidth = rTu.getRect(compID).width;
1632	const UInt uiHeight = rTu.getRect(compID).height;
1633	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1634	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1635
1636	Pel reconstructedResi[MAX_TU_SIZE * MAX_TU_SIZE];
1637
1638	UInt uiX = 0;
1639	UInt uiY = 0;
1640
1641	UInt &majorAxis = (mode == RDPCM_VER) ? uiX : uiY;
1642	UInt &minorAxis = (mode == RDPCM_VER) ? uiY : uiX;
1643	const UInt majorAxisLimit = (mode == RDPCM_VER) ? uiWidth : uiHeight;
1644	const UInt minorAxisLimit = (mode == RDPCM_VER) ? uiHeight : uiWidth;
1645	static const TCoeff pelMin=(Int) std::numeric_limits<Pel>::min();
1646	static const TCoeff pelMax=(Int) std::numeric_limits<Pel>::max();
1647
1648	const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
1649
1650	uiAbsSum = 0;
1651
1652	for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
1653	{
1654	TCoeff accumulatorValue = 0; // 32-bit accumulator
1655	for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
1656	{
1657	const UInt sampleIndex = (uiY * uiWidth) + uiX;
1658	const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
1659	const Pel currentSample = pcResidual[(uiY * uiStride) + uiX];
1660	const TCoeff encoderSideDelta = TCoeff(currentSample) - accumulatorValue;
1661
1662	Pel reconstructedDelta;
1663	if ( bLossless )
1664	{
1665	pcCoeff[coefficientIndex] = encoderSideDelta;
1666	reconstructedDelta = encoderSideDelta;
1667	}
1668	else
1669	{
1670	transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
1671	invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
1672	}
1673
1674	uiAbsSum += abs(pcCoeff[coefficientIndex]);
1675
1676	if (mode == RDPCM_OFF)
1677	{
1678	reconstructedResi[sampleIndex] = reconstructedDelta;
1679	}
1680	else
1681	{
1682	accumulatorValue += reconstructedDelta;
1683	reconstructedResi[sampleIndex] = (Pel) Clip3<TCoeff>(pelMin, pelMax, accumulatorValue);
1684	}
1685	}
1686	}
1687	}
1688
1689	Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
1690	{
1691	TComDataCU *pcCU=rTu.getCU();
1692	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1693
1694	if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) \|\| ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1695	{
1696	rdpcmMode = RDPCM_OFF;
1697	}
1698	else if ( pcCU->isIntra( uiAbsPartIdx ) )
1699	{
1700	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
1701	const ChannelType chType = toChannelType(compID);
1702	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1703	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1704	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1705
1706	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1707	{
1708	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1709	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
1710	}
1711	else rdpcmMode = RDPCM_OFF;
1712	}
1713	else // not intra, need to select the best mode
1714	{
1715	const UInt uiWidth = rTu.getRect(compID).width;
1716	const UInt uiHeight = rTu.getRect(compID).height;
1717
1718	RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES;
1719	TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max();
1720	TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
1721
1722	for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
1723	{
1724	const RDPCMMode mode = RDPCMMode(modeIndex);
1725
1726	TCoeff currAbsSum = 0;
1727
1728	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
1729
1730	if (currAbsSum < bestAbsSum)
1731	{
1732	bestMode = mode;
1733	bestAbsSum = currAbsSum;
1734	if (mode != RDPCM_OFF)
1735	{
1736	memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
1737	}
1738	}
1739	}
1740
1741	rdpcmMode = bestMode;
1742	uiAbsSum = bestAbsSum;
1743
1744	if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
1745	{
1746	memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
1747	}
1748	}
1749
1750	pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1751	}
1752
1753	Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
1754	{
1755	TComDataCU *pcCU=rTu.getCU();
1756	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1757
1758	if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) \|\| pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1759	{
1760	const UInt uiWidth = rTu.getRect(compID).width;
1761	const UInt uiHeight = rTu.getRect(compID).height;
1762
1763	RDPCMMode rdpcmMode = RDPCM_OFF;
1764
1765	if ( pcCU->isIntra( uiAbsPartIdx ) )
1766	{
1767	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
1768	const ChannelType chType = toChannelType(compID);
1769	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1770	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1771	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1772
1773	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1774	{
1775	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1776	}
1777	}
1778	else // not intra case
1779	{
1780	rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
1781	}
1782
1783	static const TCoeff pelMin=(TCoeff) std::numeric_limits<Pel>::min();
1784	static const TCoeff pelMax=(TCoeff) std::numeric_limits<Pel>::max();
1785	if (rdpcmMode == RDPCM_VER)
1786	{
1787	for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1788	{
1789	Pel *pcCurResidual = pcResidual+uiX;
1790	TCoeff accumulator = *pcCurResidual; // 32-bit accumulator
1791	pcCurResidual+=uiStride;
1792	for( UInt uiY = 1; uiY < uiHeight; uiY++, pcCurResidual+=uiStride )
1793	{
1794	accumulator += *(pcCurResidual);
1795	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
1796	}
1797	}
1798	}
1799	else if (rdpcmMode == RDPCM_HOR)
1800	{
1801	for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1802	{
1803	Pel pcCurResidual = pcResidual+uiYuiStride;
1804	TCoeff accumulator = *pcCurResidual;
1805	pcCurResidual++;
1806	for( UInt uiX = 1; uiX < uiWidth; uiX++, pcCurResidual++ )
1807	{
1808	accumulator += *(pcCurResidual);
1809	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
1810	}
1811	}
1812	}
1813	}
1814	}
1815
1816	// ------------------------------------------------------------------------------------------------
1817	// Logical transform
1818	// ------------------------------------------------------------------------------------------------
1819
1820	/** Wrapper function between HM interface and core NxN forward transform (2D)
1821	* \param piBlkResi input data (residual)
1822	* \param psCoeff output data (transform coefficients)
1823	* \param uiStride stride of input residual data
1824	* \param iSize transform size (iSize x iSize)
1825	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1826	*/
1827	Void TComTrQuant::xT( const ComponentID compID, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight )
1828	{
1829	#if MATRIX_MULT
1830	if( iWidth == iHeight)
1831	{
1832	xTr(g_bitDepth[toChannelType(compID)], piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1833	return;
1834	}
1835	#endif
1836
1837	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1838	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1839
1840	for (Int y = 0; y < iHeight; y++)
1841	for (Int x = 0; x < iWidth; x++)
1842	{
1843	block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
1844	}
1845
1846	xTrMxN( g_bitDepth[toChannelType(compID)], block, coeff, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1847
1848	memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
1849	}
1850
1851	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1852	* \param plCoef input data (transform coefficients)
1853	* \param pResidual output data (residual)
1854	* \param uiStride stride of input residual data
1855	* \param iSize transform size (iSize x iSize)
1856	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1857	*/
1858	Void TComTrQuant::xIT( const ComponentID compID, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1859	{
1860	#if MATRIX_MULT
1861	if( iWidth == iHeight )
1862	{
1863	#if O0043_BEST_EFFORT_DECODING
1864	xITr(g_bitDepthInStream[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1865	#else
1866	xITr(g_bitDepth[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1867	#endif
1868	return;
1869	}
1870	#endif
1871
1872	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1873	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1874
1875	memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
1876
1877	#if O0043_BEST_EFFORT_DECODING
1878	xITrMxN( g_bitDepthInStream[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1879	#else
1880	xITrMxN( g_bitDepth[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1881	#endif
1882
1883	for (Int y = 0; y < iHeight; y++)
1884	for (Int x = 0; x < iWidth; x++)
1885	{
1886	pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
1887	}
1888	}
1889
1890	/** Wrapper function between HM interface and core 4x4 transform skipping
1891	* \param piBlkResi input data (residual)
1892	* \param psCoeff output data (transform coefficients)
1893	* \param uiStride stride of input residual data
1894	* \param iSize transform size (iSize x iSize)
1895	*/
1896	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
1897	{
1898	const TComRectangle &rect = rTu.getRect(component);
1899	const Int width = rect.width;
1900	const Int height = rect.height;
1901
1902	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1903	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1904	{
1905	iTransformShift = std::max<Int>(0, iTransformShift);
1906	}
1907
1908	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1909	const UInt uiSizeMinus1 = (width * height) - 1;
1910
1911	if (iTransformShift >= 0)
1912	{
1913	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1914	{
1915	for (UInt x = 0; x < width; x++, coefficientIndex++)
1916	{
1917	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
1918	}
1919	}
1920	}
1921	else //for very high bit depths
1922	{
1923	iTransformShift = -iTransformShift;
1924	const TCoeff offset = 1 << (iTransformShift - 1);
1925
1926	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1927	{
1928	for (UInt x = 0; x < width; x++, coefficientIndex++)
1929	{
1930	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
1931	}
1932	}
1933	}
1934	}
1935
1936	/** Wrapper function between HM interface and core NxN transform skipping
1937	* \param plCoef input data (coefficients)
1938	* \param pResidual output data (residual)
1939	* \param uiStride stride of input residual data
1940	* \param iSize transform size (iSize x iSize)
1941	*/
1942	Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
1943	{
1944	const TComRectangle &rect = rTu.getRect(component);
1945	const Int width = rect.width;
1946	const Int height = rect.height;
1947
1948	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1949	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1950	{
1951	iTransformShift = std::max<Int>(0, iTransformShift);
1952	}
1953
1954	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1955	const UInt uiSizeMinus1 = (width * height) - 1;
1956
1957	if (iTransformShift >= 0)
1958	{
1959	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
1960
1961	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1962	{
1963	for (UInt x = 0; x < width; x++, coefficientIndex++)
1964	{
1965	pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
1966	}
1967	}
1968	}
1969	else //for very high bit depths
1970	{
1971	iTransformShift = -iTransformShift;
1972
1973	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1974	{
1975	for (UInt x = 0; x < width; x++, coefficientIndex++)
1976	{
1977	pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
1978	}
1979	}
1980	}
1981	}
1982
1983	/** RDOQ with CABAC
1984	* \param pcCU pointer to coding unit structure
1985	* \param plSrcCoeff pointer to input buffer
1986	* \param piDstCoeff reference to pointer to output buffer
1987	* \param uiWidth block width
1988	* \param uiHeight block height
1989	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1990	* \param eTType plane type / luminance or chrominance
1991	* \param uiAbsPartIdx absolute partition index
1992	* \returns Void
1993	* Rate distortion optimized quantization for entropy
1994	* coding engines using probability models like CABAC
1995	*/
1996	Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
1997	TCoeff * plSrcCoeff,
1998	TCoeff * piDstCoeff,
1999	#if ADAPTIVE_QP_SELECTION
2000	TCoeff * piArlDstCoeff,
2001	#endif
2002	TCoeff &uiAbsSum,
2003	const ComponentID compID,
2004	const QpParam &cQP )
2005	{
2006	const TComRectangle & rect = rTu.getRect(compID);
2007	const UInt uiWidth = rect.width;
2008	const UInt uiHeight = rect.height;
2009	TComDataCU * pcCU = rTu.getCU();
2010	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
2011	const ChannelType channelType = toChannelType(compID);
2012	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
2013
2014	const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
2015
2016	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
2017	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
2018	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
2019	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
2020	*/
2021
2022	// Represents scaling through forward transform
2023	Int iTransformShift = getTransformShift(channelType, uiLog2TrSize);
2024	if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
2025	{
2026	iTransformShift = std::max<Int>(0, iTransformShift);
2027	}
2028
2029	const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getUseGolombRiceParameterAdaptation();
2030	const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
2031	UInt uiGoRiceParam = initialGolombRiceParameter;
2032	Double d64BlockUncodedCost = 0;
2033	const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
2034	const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
2035	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
2036	assert(compID<MAX_NUM_COMPONENT);
2037
2038	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
2039	assert(scalingListType < SCALING_LIST_NUM);
2040
2041	#if ADAPTIVE_QP_SELECTION
2042	memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
2043	#endif
2044
2045	Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
2046	Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
2047	Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
2048	memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
2049	memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
2050	Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
2051	Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
2052	Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
2053	TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
2054	memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
2055	memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
2056	memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
2057	memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
2058
2059	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
2060	const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
2061	const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
2062
2063	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
2064	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
2065	const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
2066
2067	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
2068	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
2069
2070	#if ADAPTIVE_QP_SELECTION
2071	Int iQBitsC = iQBits - ARL_C_PRECISION;
2072	Int iAddC = 1 << (iQBitsC-1);
2073	#endif
2074
2075	TUEntropyCodingParameters codingParameters;
2076	getTUEntropyCodingParameters(codingParameters, rTu, compID);
2077	const UInt uiCGSize = (1 << MLS_CG_SIZE);
2078
2079	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2080	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2081	Int iCGLastScanPos = -1;
2082
2083	UInt uiCtxSet = 0;
2084	Int c1 = 1;
2085	Int c2 = 0;
2086	Double d64BaseCost = 0;
2087	Int iLastScanPos = -1;
2088
2089	UInt c1Idx = 0;
2090	UInt c2Idx = 0;
2091	Int baseLevel;
2092
2093	memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
2094	memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
2095
2096	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2097	Int iScanPos;
2098	coeffGroupRDStats rdStats;
2099
2100	const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
2101
2102	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2103	{
2104	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2105	UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
2106	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
2107
2108	memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2109
2110	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
2111
2112	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2113	{
2114	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2115	//===== quantization =====
2116	UInt uiBlkPos = codingParameters.scan[iScanPos];
2117	// set coeff
2118
2119	const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
2120	const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
2121
2122	const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
2123
2124	const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, MAX_INTERMEDIATE_INT - (Intermediate_Int(1) << (iQBits - 1)));
2125
2126	#if ADAPTIVE_QP_SELECTION
2127	if( m_bUseAdaptQpSelect )
2128	{
2129	piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
2130	}
2131	#endif
2132	const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
2133
2134	const Double dErr = Double( lLevelDouble );
2135	pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
2136	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2137	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2138
2139	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2140	{
2141	iLastScanPos = iScanPos;
2142	uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
2143	iCGLastScanPos = iCGScanPos;
2144	}
2145
2146	if ( iLastScanPos >= 0 )
2147	{
2148	//===== coefficient level estimation =====
2149	UInt uiLevel;
2150	UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
2151	UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
2152
2153	if( iScanPos == iLastScanPos )
2154	{
2155	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2156	lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2157	c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, channelType
2158	);
2159	}
2160	else
2161	{
2162	UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
2163
2164	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2165	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2166	c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, channelType
2167	);
2168
2169	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2170	}
2171
2172	deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
2173
2174	if( uiLevel > 0 )
2175	{
2176	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType );
2177	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2178	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2179	}
2180	else // uiLevel == 0
2181	{
2182	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2183	}
2184	piDstCoeff[ uiBlkPos ] = uiLevel;
2185	d64BaseCost += pdCostCoeff [ iScanPos ];
2186
2187	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2188	if( uiLevel >= baseLevel )
2189	{
2190	if (uiLevel > 3*(1<<uiGoRiceParam))
2191	{
2192	uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
2193	}
2194	}
2195	if ( uiLevel >= 1)
2196	{
2197	c1Idx ++;
2198	}
2199
2200	//===== update bin model =====
2201	if( uiLevel > 1 )
2202	{
2203	c1 = 0;
2204	c2 += (c2 < 2);
2205	c2Idx ++;
2206	}
2207	else if( (c1 < 3) && (c1 > 0) && uiLevel)
2208	{
2209	c1++;
2210	}
2211
2212	//===== context set update =====
2213	if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
2214	{
2215	uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this before entering the final group
2216	c1 = 1;
2217	c2 = 0;
2218	c1Idx = 0;
2219	c2Idx = 0;
2220	uiGoRiceParam = initialGolombRiceParameter;
2221	}
2222	}
2223	else
2224	{
2225	d64BaseCost += pdCostCoeff0[ iScanPos ];
2226	}
2227	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2228	if (iScanPosinCG == 0 )
2229	{
2230	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2231	}
2232	if (piDstCoeff[ uiBlkPos ] )
2233	{
2234	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2235	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2236	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2237	if ( iScanPosinCG != 0 )
2238	{
2239	rdStats.iNNZbeforePos0++;
2240	}
2241	}
2242	} //end for (iScanPosinCG)
2243
2244	if (iCGLastScanPos >= 0)
2245	{
2246	if( iCGScanPos )
2247	{
2248	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2249	{
2250	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2251	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2252	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2253	}
2254	else
2255	{
2256	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2257	{
2258	if ( rdStats.iNNZbeforePos0 == 0 )
2259	{
2260	d64BaseCost -= rdStats.d64SigCost_0;
2261	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2262	}
2263	// rd-cost if SigCoeffGroupFlag = 0, initialization
2264	Double d64CostZeroCG = d64BaseCost;
2265
2266	// add SigCoeffGroupFlag cost to total cost
2267	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2268
2269	if (iCGScanPos < iCGLastScanPos)
2270	{
2271	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2272	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2273	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2274	}
2275
2276	// try to convert the current coeff group from non-zero to all-zero
2277	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2278	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2279	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2280
2281	// if we can save cost, change this block to all-zero block
2282	if ( d64CostZeroCG < d64BaseCost )
2283	{
2284	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2285	d64BaseCost = d64CostZeroCG;
2286	if (iCGScanPos < iCGLastScanPos)
2287	{
2288	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2289	}
2290	// reset coeffs to 0 in this block
2291	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2292	{
2293	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2294	UInt uiBlkPos = codingParameters.scan[ iScanPos ];
2295
2296	if (piDstCoeff[ uiBlkPos ])
2297	{
2298	piDstCoeff [ uiBlkPos ] = 0;
2299	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2300	pdCostSig [ iScanPos ] = 0;
2301	}
2302	}
2303	} // end if ( d64CostAllZeros < d64BaseCost )
2304	}
2305	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2306	}
2307	else
2308	{
2309	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2310	}
2311	}
2312	} //end for (iCGScanPos)
2313
2314	//===== estimate last position =====
2315	if ( iLastScanPos < 0 )
2316	{
2317	return;
2318	}
2319
2320	Double d64BestCost = 0;
2321	Int ui16CtxCbf = 0;
2322	Int iBestLastIdxP1 = 0;
2323	if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2324	{
2325	ui16CtxCbf = 0;
2326	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2327	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2328	}
2329	else
2330	{
2331	ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
2332	ui16CtxCbf += getCBFContextOffset(compID);
2333	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2334	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2335	}
2336
2337
2338	Bool bFoundLast = false;
2339	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2340	{
2341	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2342
2343	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2344	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2345	{
2346	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2347	{
2348	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2349
2350	if (iScanPos > iLastScanPos) continue;
2351	UInt uiBlkPos = codingParameters.scan[iScanPos];
2352
2353	if( piDstCoeff[ uiBlkPos ] )
2354	{
2355	UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
2356	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
2357
2358	Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
2359	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2360
2361	if( totalCost < d64BestCost )
2362	{
2363	iBestLastIdxP1 = iScanPos + 1;
2364	d64BestCost = totalCost;
2365	}
2366	if( piDstCoeff[ uiBlkPos ] > 1 )
2367	{
2368	bFoundLast = true;
2369	break;
2370	}
2371	d64BaseCost -= pdCostCoeff[ iScanPos ];
2372	d64BaseCost += pdCostCoeff0[ iScanPos ];
2373	}
2374	else
2375	{
2376	d64BaseCost -= pdCostSig[ iScanPos ];
2377	}
2378	} //end for
2379	if (bFoundLast)
2380	{
2381	break;
2382	}
2383	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2384	} // end for
2385
2386
2387	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2388	{
2389	Int blkPos = codingParameters.scan[ scanPos ];
2390	TCoeff level = piDstCoeff[ blkPos ];
2391	uiAbsSum += level;
2392	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2393	}
2394
2395	//===== clean uncoded coefficients =====
2396	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2397	{
2398	piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
2399	}
2400
2401
2402	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2403	{
2404	const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
2405	Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
2406	/ m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(g_bitDepth[channelType] - 8)))
2407	+ 0.5);
2408
2409	Int lastCG = -1;
2410	Int absSum = 0 ;
2411	Int n ;
2412
2413	for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
2414	{
2415	Int subPos = subSet << MLS_CG_SIZE;
2416	Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
2417	absSum = 0 ;
2418
2419	for(n = uiCGSize-1; n >= 0; --n )
2420	{
2421	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2422	{
2423	lastNZPosInCG = n;
2424	break;
2425	}
2426	}
2427
2428	for(n = 0; n <uiCGSize; n++ )
2429	{
2430	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2431	{
2432	firstNZPosInCG = n;
2433	break;
2434	}
2435	}
2436
2437	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2438	{
2439	absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
2440	}
2441
2442	if(lastNZPosInCG>=0 && lastCG==-1)
2443	{
2444	lastCG = 1;
2445	}
2446
2447	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2448	{
2449	UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
2450	if( signbit!=(absSum&0x1) ) // hide but need tune
2451	{
2452	// calculate the cost
2453	Int64 minCostInc = MAX_INT64, curCost = MAX_INT64;
2454	Int minPos = -1, finalChange = 0, curChange = 0;
2455
2456	for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
2457	{
2458	UInt uiBlkPos = codingParameters.scan[ n + subPos ];
2459	if(piDstCoeff[ uiBlkPos ] != 0 )
2460	{
2461	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
2462	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2463	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
2464
2465	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2466	{
2467	costDown -= (4<<15);
2468	}
2469
2470	if(costUp<costDown)
2471	{
2472	curCost = costUp;
2473	curChange = 1;
2474	}
2475	else
2476	{
2477	curChange = -1;
2478	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2479	{
2480	curCost = MAX_INT64;
2481	}
2482	else
2483	{
2484	curCost = costDown;
2485	}
2486	}
2487	}
2488	else
2489	{
2490	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2491	curChange = 1 ;
2492
2493	if(n<firstNZPosInCG)
2494	{
2495	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2496	if(thissignbit != signbit )
2497	{
2498	curCost = MAX_INT64;
2499	}
2500	}
2501	}
2502
2503	if( curCost<minCostInc)
2504	{
2505	minCostInc = curCost;
2506	finalChange = curChange;
2507	minPos = uiBlkPos;
2508	}
2509	}
2510
2511	if(piDstCoeff[minPos] == entropyCodingMaximum \|\| piDstCoeff[minPos] == entropyCodingMinimum)
2512	{
2513	finalChange = -1;
2514	}
2515
2516	if(plSrcCoeff[minPos]>=0)
2517	{
2518	piDstCoeff[minPos] += finalChange ;
2519	}
2520	else
2521	{
2522	piDstCoeff[minPos] -= finalChange ;
2523	}
2524	}
2525	}
2526
2527	if(lastCG==1)
2528	{
2529	lastCG=0 ;
2530	}
2531	}
2532	}
2533	}
2534
2535
2536	/** Pattern decision for context derivation process of significant_coeff_flag
2537	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2538	* \param uiCGPosX column of current coefficient group
2539	* \param uiCGPosY row of current coefficient group
2540	* \param width width of the block
2541	* \param height height of the block
2542	* \returns pattern for current coefficient group
2543	*/
2544	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
2545	{
2546	if ((widthInGroups <= 1) && (heightInGroups <= 1)) return 0;
2547
2548	const Bool rightAvailable = uiCGPosX < (widthInGroups - 1);
2549	const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
2550
2551	UInt sigRight = 0;
2552	UInt sigLower = 0;
2553
2554	if (rightAvailable) sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2555	if (belowAvailable) sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2556
2557	return sigRight + (sigLower << 1);
2558	}
2559
2560
2561	/** Context derivation process of coeff_abs_significant_flag
2562	* \param patternSigCtx pattern for current coefficient group
2563	* \param codingParameters coding parmeters for the TU (includes the scan)
2564	* \param scanPosition current position in scan order
2565	* \param log2BlockWidth log2 width of the block
2566	* \param log2BlockHeight log2 height of the block
2567	* \param ChannelType channel type (CHANNEL_TYPE_LUMA/CHROMA)
2568	* \returns ctxInc for current scan position
2569	*/
2570	Int TComTrQuant::getSigCtxInc ( Int patternSigCtx,
2571	const TUEntropyCodingParameters &codingParameters,
2572	const Int scanPosition,
2573	const Int log2BlockWidth,
2574	const Int log2BlockHeight,
2575	const ChannelType chanType)
2576	{
2577	if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
2578	{
2579	//single context mode
2580	return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
2581	}
2582
2583	const UInt rasterPosition = codingParameters.scan[scanPosition];
2584	const UInt posY = rasterPosition >> log2BlockWidth;
2585	const UInt posX = rasterPosition - (posY << log2BlockWidth);
2586
2587	if ((posX + posY) == 0) return 0; //special case for the DC context variable
2588
2589	Int offset = MAX_INT;
2590
2591	if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
2592	{
2593	offset = ctxIndMap4x4[ (4 * posY) + posX ];
2594	}
2595	else
2596	{
2597	Int cnt = 0;
2598
2599	switch (patternSigCtx)
2600	{
2601	//------------------
2602
2603	case 0: //neither neighbouring group is significant
2604	{
2605	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2606	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2607	const Int posTotalInSubset = posXinSubset + posYinSubset;
2608
2609	//first N coefficients in scan order use 2; the next few use 1; the rest use 0.
2610	const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
2611	const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
2612
2613	cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
2614	}
2615	break;
2616
2617	//------------------
2618
2619	case 1: //right group is significant, below is not
2620	{
2621	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2622	const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT;
2623
2624	cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
2625	}
2626	break;
2627
2628	//------------------
2629
2630	case 2: //below group is significant, right is not
2631	{
2632	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2633	const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH;
2634
2635	cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
2636	}
2637	break;
2638
2639	//------------------
2640
2641	case 3: //both neighbouring groups are significant
2642	{
2643	cnt = 2;
2644	}
2645	break;
2646
2647	//------------------
2648
2649	default:
2650	std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
2651	exit(1);
2652	break;
2653	}
2654
2655	//------------------------------------------------
2656
2657	const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
2658
2659	offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
2660	}
2661
2662	return codingParameters.firstSignificanceMapContext + offset;
2663	}
2664
2665
2666	/** Get the best level in RD sense
2667	* \param rd64CodedCost reference to coded cost
2668	* \param rd64CodedCost0 reference to cost when coefficient is 0
2669	* \param rd64CodedCostSig reference to cost of significant coefficient
2670	* \param lLevelDouble reference to unscaled quantized level
2671	* \param uiMaxAbsLevel scaled quantized level
2672	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2673	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2674	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2675	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2676	* \param iQBits quantization step size
2677	* \param dTemp correction factor
2678	* \param bLast indicates if the coefficient is the last significant
2679	* \returns best quantized transform level for given scan position
2680	* This method calculates the best quantized transform level for a given scan position.
2681	*/
2682	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2683	Double& rd64CodedCost0,
2684	Double& rd64CodedCostSig,
2685	Intermediate_Int lLevelDouble,
2686	UInt uiMaxAbsLevel,
2687	UShort ui16CtxNumSig,
2688	UShort ui16CtxNumOne,
2689	UShort ui16CtxNumAbs,
2690	UShort ui16AbsGoRice,
2691	UInt c1Idx,
2692	UInt c2Idx,
2693	Int iQBits,
2694	Double errorScale,
2695	Bool bLast,
2696	Bool useLimitedPrefixLength,
2697	ChannelType channelType
2698	) const
2699	{
2700	Double dCurrCostSig = 0;
2701	UInt uiBestAbsLevel = 0;
2702
2703	if( !bLast && uiMaxAbsLevel < 3 )
2704	{
2705	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2706	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2707	if( uiMaxAbsLevel == 0 )
2708	{
2709	return uiBestAbsLevel;
2710	}
2711	}
2712	else
2713	{
2714	rd64CodedCost = MAX_DOUBLE;
2715	}
2716
2717	if( !bLast )
2718	{
2719	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2720	}
2721
2722	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2723	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2724	{
2725	Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
2726	Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, channelType ) );
2727	dCurrCost += dCurrCostSig;
2728
2729	if( dCurrCost < rd64CodedCost )
2730	{
2731	uiBestAbsLevel = uiAbsLevel;
2732	rd64CodedCost = dCurrCost;
2733	rd64CodedCostSig = dCurrCostSig;
2734	}
2735	}
2736
2737	return uiBestAbsLevel;
2738	}
2739
2740	/** Calculates the cost for specific absolute transform level
2741	* \param uiAbsLevel scaled quantized level
2742	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2743	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2744	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2745	* \returns cost of given absolute transform level
2746	*/
2747	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2748	UShort ui16CtxNumOne,
2749	UShort ui16CtxNumAbs,
2750	UShort ui16AbsGoRice,
2751	UInt c1Idx,
2752	UInt c2Idx,
2753	Bool useLimitedPrefixLength,
2754	ChannelType channelType
2755	) const
2756	{
2757	Int iRate = Int(xGetIEPRate()); // cost of sign bit
2758	UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2759
2760	if ( uiAbsLevel >= baseLevel )
2761	{
2762	UInt symbol = uiAbsLevel - baseLevel;
2763	UInt length;
2764	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2765	{
2766	length = symbol>>ui16AbsGoRice;
2767	iRate += (length+1+ui16AbsGoRice)<< 15;
2768	}
2769	else if (useLimitedPrefixLength)
2770	{
2771	const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + g_maxTrDynamicRange[channelType]));
2772
2773	UInt prefixLength = 0;
2774	UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
2775
2776	while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
2777	{
2778	prefixLength++;
2779	}
2780
2781	const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (g_maxTrDynamicRange[channelType] - ui16AbsGoRice) : (prefixLength + 1/separator/);
2782
2783	iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
2784	}
2785	else
2786	{
2787	length = ui16AbsGoRice;
2788	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2789	while (symbol >= (1<<length))
2790	{
2791	symbol -= (1<<(length++));
2792	}
2793	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2794	}
2795
2796	if (c1Idx < C1FLAG_NUMBER)
2797	{
2798	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2799
2800	if (c2Idx < C2FLAG_NUMBER)
2801	{
2802	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2803	}
2804	}
2805	}
2806	else if( uiAbsLevel == 1 )
2807	{
2808	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2809	}
2810	else if( uiAbsLevel == 2 )
2811	{
2812	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2813	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2814	}
2815	else
2816	{
2817	iRate = 0;
2818	}
2819
2820	return iRate;
2821	}
2822
2823	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2824	UShort ui16CtxNumSig ) const
2825	{
2826	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2827	}
2828
2829	/** Calculates the cost of signaling the last significant coefficient in the block
2830	* \param uiPosX X coordinate of the last significant coefficient
2831	* \param uiPosY Y coordinate of the last significant coefficient
2832	* \returns cost of last significant coefficient
2833	*/
2834	/*
2835	* \param uiWidth width of the transform unit (TU)
2836	*/
2837	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2838	const UInt uiPosY,
2839	const ComponentID component ) const
2840	{
2841	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2842	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2843
2844	Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
2845
2846	if( uiCtxX > 3 )
2847	{
2848	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2849	}
2850	if( uiCtxY > 3 )
2851	{
2852	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2853	}
2854	return xGetICost( uiCost );
2855	}
2856
2857	/** Calculates the cost for specific absolute transform level
2858	* \param uiAbsLevel scaled quantized level
2859	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2860	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2861	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2862	* \returns cost of given absolute transform level
2863	*/
2864	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2865	UShort ui16CtxNumSig ) const
2866	{
2867	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2868	}
2869
2870	/** Get the cost for a specific rate
2871	* \param dRate rate of a bit
2872	* \returns cost at the specific rate
2873	*/
2874	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2875	{
2876	return m_dLambda * dRate;
2877	}
2878
2879	/** Get the cost of an equal probable bit
2880	* \returns cost of equal probable bit
2881	*/
2882	__inline Double TComTrQuant::xGetIEPRate ( ) const
2883	{
2884	return 32768;
2885	}
2886
2887	/** Context derivation process of coeff_abs_significant_flag
2888	* \param uiSigCoeffGroupFlag significance map of L1
2889	* \param uiBlkX column of current scan position
2890	* \param uiBlkY row of current scan position
2891	* \param uiLog2BlkSize log2 value of block size
2892	* \returns ctxInc for current scan position
2893	*/
2894	UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag,
2895	const UInt uiCGPosX,
2896	const UInt uiCGPosY,
2897	const UInt widthInGroups,
2898	const UInt heightInGroups)
2899	{
2900	UInt sigRight = 0;
2901	UInt sigLower = 0;
2902
2903	if (uiCGPosX < (widthInGroups - 1)) sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2904	if (uiCGPosY < (heightInGroups - 1)) sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2905
2906	return ((sigRight + sigLower) != 0) ? 1 : 0;
2907	}
2908
2909
2910	/** set quantized matrix coefficient for encode
2911	* \param scalingList quantaized matrix address
2912	*/
2913	Void TComTrQuant::setScalingList(TComScalingList *scalingList, const ChromaFormat format)
2914	{
2915	const Int minimumQp = 0;
2916	const Int maximumQp = SCALING_LIST_REM_NUM;
2917
2918	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2919	{
2920	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2921	{
2922	for(Int qp = minimumQp; qp < maximumQp; qp++)
2923	{
2924	xSetScalingListEnc(scalingList,list,size,qp,format);
2925	xSetScalingListDec(*scalingList,list,size,qp,format);
2926	setErrScaleCoeff(list,size,qp);
2927	}
2928	}
2929	}
2930	}
2931	/** set quantized matrix coefficient for decode
2932	* \param scalingList quantaized matrix address
2933	*/
2934	Void TComTrQuant::setScalingListDec(const TComScalingList &scalingList, const ChromaFormat format)
2935	{
2936	const Int minimumQp = 0;
2937	const Int maximumQp = SCALING_LIST_REM_NUM;
2938
2939	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2940	{
2941	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2942	{
2943	for(Int qp = minimumQp; qp < maximumQp; qp++)
2944	{
2945	xSetScalingListDec(scalingList,list,size,qp,format);
2946	}
2947	}
2948	}
2949	}
2950	/** set error scale coefficients
2951	* \param list List ID
2952	* \param uiSize Size
2953	* \param uiQP Quantization parameter
2954	*/
2955	Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp)
2956	{
2957	const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2958	const ChannelType channelType = ((list == 0) \|\| (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
2959
2960	const Int iTransformShift = getTransformShift(channelType, uiLog2TrSize); // Represents scaling through forward transform
2961
2962	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2963	Int *piQuantcoeff;
2964	Double *pdErrScale;
2965	piQuantcoeff = getQuantCoeff(list, qp,size);
2966	pdErrScale = getErrScaleCoeff(list, size, qp);
2967
2968	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2969	dErrScale = dErrScalepow(2.0,(-2.0iTransformShift)); // Compensate for scaling through forward transform
2970
2971	for(i=0;i<uiMaxNumCoeff;i++)
2972	{
2973	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2974	}
2975
2976	getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2977	}
2978
2979	/** set quantized matrix coefficient for encode
2980	* \param scalingList quantaized matrix address
2981	* \param listId List index
2982	* \param sizeId size index
2983	* \param uiQP Quantization parameter
2984	*/
2985	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2986	{
2987	UInt width = g_scalingListSizeX[sizeId];
2988	UInt height = g_scalingListSizeX[sizeId];
2989	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2990	Int *quantcoeff;
2991	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2992	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2993
2994	Int quantScales = g_quantScales[qp];
2995
2996	processScalingListEnc(coeff,
2997	quantcoeff,
2998	(quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
2999	height, width, ratio,
3000	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3001	scalingList->getScalingListDC(sizeId,listId));
3002	}
3003
3004	/** set quantized matrix coefficient for decode
3005	* \param scalingList quantaized matrix address
3006	* \param list List index
3007	* \param size size index
3008	* \param uiQP Quantization parameter
3009	*/
3010	Void TComTrQuant::xSetScalingListDec(const TComScalingList &scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
3011	{
3012	UInt width = g_scalingListSizeX[sizeId];
3013	UInt height = g_scalingListSizeX[sizeId];
3014	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3015	Int *dequantcoeff;
3016	const Int *coeff = scalingList.getScalingListAddress(sizeId,listId);
3017
3018	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
3019
3020	Int invQuantScale = g_invQuantScales[qp];
3021
3022	processScalingListDec(coeff,
3023	dequantcoeff,
3024	invQuantScale,
3025	height, width, ratio,
3026	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3027	scalingList.getScalingListDC(sizeId,listId));
3028	}
3029
3030	/** set flat matrix value to quantized coefficient
3031	*/
3032	Void TComTrQuant::setFlatScalingList(const ChromaFormat format)
3033	{
3034	const Int minimumQp = 0;
3035	const Int maximumQp = SCALING_LIST_REM_NUM;
3036
3037	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3038	{
3039	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3040	{
3041	for(Int qp = minimumQp; qp < maximumQp; qp++)
3042	{
3043	xsetFlatScalingList(list,size,qp,format);
3044	setErrScaleCoeff(list,size,qp);
3045	}
3046	}
3047	}
3048	}
3049
3050	/** set flat matrix value to quantized coefficient
3051	* \param list List ID
3052	* \param uiQP Quantization parameter
3053	* \param uiSize Size
3054	*/
3055	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp, const ChromaFormat format)
3056	{
3057	UInt i,num = g_scalingListSize[size];
3058	Int *quantcoeff;
3059	Int *dequantcoeff;
3060
3061	Int quantScales = g_quantScales [qp];
3062	Int invQuantScales = g_invQuantScales[qp] << 4;
3063
3064	quantcoeff = getQuantCoeff(list, qp, size);
3065	dequantcoeff = getDequantCoeff(list, qp, size);
3066
3067	for(i=0;i<num;i++)
3068	{
3069	*quantcoeff++ = quantScales;
3070	*dequantcoeff++ = invQuantScales;
3071	}
3072	}
3073
3074	/** set quantized matrix coefficient for encode
3075	* \param coeff quantaized matrix address
3076	* \param quantcoeff quantaized matrix address
3077	* \param quantScales Q(QP%6)
3078	* \param height height
3079	* \param width width
3080	* \param ratio ratio for upscale
3081	* \param sizuNum matrix size
3082	* \param dc dc parameter
3083	*/
3084	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3085	{
3086	for(UInt j=0;j<height;j++)
3087	{
3088	for(UInt i=0;i<width;i++)
3089	{
3090	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j / ratio) + i / ratio];
3091	}
3092	}
3093
3094	if(ratio > 1)
3095	{
3096	quantcoeff[0] = quantScales / dc;
3097	}
3098	}
3099
3100	/** set quantized matrix coefficient for decode
3101	* \param coeff quantaized matrix address
3102	* \param dequantcoeff quantaized matrix address
3103	* \param invQuantScales IQ(QP%6))
3104	* \param height height
3105	* \param width width
3106	* \param ratio ratio for upscale
3107	* \param sizuNum matrix size
3108	* \param dc dc parameter
3109	*/
3110	Void TComTrQuant::processScalingListDec( const Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3111	{
3112	for(UInt j=0;j<height;j++)
3113	{
3114	for(UInt i=0;i<width;i++)
3115	{
3116	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
3117	}
3118	}
3119
3120	if(ratio > 1)
3121	{
3122	dequantcoeff[0] = invQuantScales * dc;
3123	}
3124	}
3125
3126	/** initialization process of scaling list array
3127	*/
3128	Void TComTrQuant::initScalingList()
3129	{
3130	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3131	{
3132	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3133	{
3134	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3135	{
3136	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3137	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3138	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
3139	} // listID loop
3140	}
3141	}
3142	}
3143
3144	/** destroy quantization matrix array
3145	*/
3146	Void TComTrQuant::destroyScalingList()
3147	{
3148	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3149	{
3150	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3151	{
3152	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3153	{
3154	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
3155	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
3156	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
3157	}
3158	}
3159	}
3160	}
3161
3162	Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const TCoeff resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
3163	{
3164	TComDataCU *pcCU = rTu.getCU();
3165	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3166	const TComRectangle &rect = rTu.getRect(compID);
3167	const UInt uiWidth = rect.width;
3168	const UInt uiHeight = rect.height;
3169	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3170	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3171	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3172	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
3173
3174	assert( scalingListType < SCALING_LIST_NUM );
3175	const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
3176
3177
3178	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
3179	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
3180	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
3181	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
3182	*/
3183
3184	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
3185	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
3186
3187	const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
3188
3189	TCoeff transformedCoefficient;
3190
3191	// transform-skip
3192	if (iTransformShift >= 0)
3193	{
3194	transformedCoefficient = resiDiff << iTransformShift;
3195	}
3196	else // for very high bit depths
3197	{
3198	const Int iTrShiftNeg = -iTransformShift;
3199	const Int offset = 1 << (iTrShiftNeg - 1);
3200	transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
3201	}
3202
3203	// quantization
3204	const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
3205
3206	const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
3207
3208	const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
3209
3210	const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
3211
3212	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3213	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3214	pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
3215	}
3216
3217
3218	Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
3219	{
3220	TComDataCU *pcCU = rTu.getCU();
3221	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3222	const TComRectangle &rect = rTu.getRect(compID);
3223	const UInt uiWidth = rect.width;
3224	const UInt uiHeight = rect.height;
3225	const Int QP_per = cQP.per;
3226	const Int QP_rem = cQP.rem;
3227	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3228	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3229	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3230	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
3231
3232	assert( scalingListType < SCALING_LIST_NUM );
3233
3234	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
3235
3236	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3237	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3238
3239	// Dequantisation
3240
3241	TCoeff dequantisedSample;
3242
3243	if(enableScalingLists)
3244	{
3245	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
3246	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
3247
3248	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3249	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3250
3251	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
3252
3253	if(rightShift > 0)
3254	{
3255	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3256	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3257	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
3258
3259	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3260	}
3261	else
3262	{
3263	const Int leftShift = -rightShift;
3264	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3265	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
3266
3267	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3268	}
3269	}
3270	else
3271	{
3272	const Int scale = g_invQuantScales[QP_rem];
3273	const Int scaleBits = (IQUANT_SHIFT + 1) ;
3274
3275	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
3276	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3277	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3278
3279	if (rightShift > 0)
3280	{
3281	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3282	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3283	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
3284
3285	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3286	}
3287	else
3288	{
3289	const Int leftShift = -rightShift;
3290	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3291	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
3292
3293	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3294	}
3295	}
3296
3297	// Inverse transform-skip
3298
3299	if (iTransformShift >= 0)
3300	{
3301	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
3302	reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift);
3303	}
3304	else //for very high bit depths
3305	{
3306	const Int iTrShiftNeg = -iTransformShift;
3307	reconSample = Pel(dequantisedSample << iTrShiftNeg);
3308	}
3309	}
3310
3311
3312	Void TComTrQuant::crossComponentPrediction( TComTU & rTu,
3313	const ComponentID compID,
3314	const Pel * piResiL,
3315	const Pel * piResiC,
3316	Pel * piResiT,
3317	const Int width,
3318	const Int height,
3319	const Int strideL,
3320	const Int strideC,
3321	const Int strideT,
3322	const Bool reverse )
3323	{
3324	const Pel *pResiL = piResiL;
3325	const Pel *pResiC = piResiC;
3326	Pel *pResiT = piResiT;
3327
3328	TComDataCU *pCU = rTu.getCU();
3329	const Int alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
3330	const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
3331
3332	for( Int y = 0; y < height; y++ )
3333	{
3334	if (reverse)
3335	{
3336	// A constraint is to be added to the HEVC Standard to limit the size of pResiL and pResiC at this point.
3337	// The likely form of the constraint is to either restrict the values to CoeffMin to CoeffMax,
3338	// or to be representable in a bitDepthY+4 or bitDepthC+4 signed integer.
3339	// The result of the constraint is that for 8/10/12bit profiles, the input values
3340	// can be represented within a 16-bit Pel-type.
3341	#if RExt__HIGH_BIT_DEPTH_SUPPORT
3342	for( Int x = 0; x < width; x++ )
3343	{
3344	pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
3345	}
3346	#else
3347	const Int minPel=std::numeric_limits<Pel>::min();
3348	const Int maxPel=std::numeric_limits<Pel>::max();
3349	for( Int x = 0; x < width; x++ )
3350	{
3351	pResiT[x] = Clip3<Int>(minPel, maxPel, pResiC[x] + (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3));
3352	}
3353	#endif
3354	}
3355	else
3356	{
3357	// Forward does not need clipping. Pel type should always be big enough.
3358	for( Int x = 0; x < width; x++ )
3359	{
3360	pResiT[x] = pResiC[x] - (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3);
3361	}
3362	}
3363
3364	pResiL += strideL;
3365	pResiC += strideC;
3366	pResiT += strideT;
3367	}
3368	}
3369
3370	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: