Context navigation

source: SHVCSoftware/branches/SHM-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 1174

Visit:

Last change on this file since 1174 was 1029, checked in by seregin, 10 years ago
merge with SHM-upgrade branch
Property svn:eol-style set to `native`
File size: 122.4 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2014, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <limits>
41	#include <memory.h>
42	#include "TComTrQuant.h"
43	#include "TComPic.h"
44	#include "ContextTables.h"
45	#include "TComTU.h"
46	#include "Debug.h"
47
48	typedef struct
49	{
50	Int iNNZbeforePos0;
51	Double d64CodedLevelandDist; // distortion and level cost only
52	Double d64UncodedDist; // all zero coded block distortion
53	Double d64SigCost;
54	Double d64SigCost_0;
55	} coeffGroupRDStats;
56
57	//! \ingroup TLibCommon
58	//! \{
59
60	// ====================================================================================================================
61	// Constants
62	// ====================================================================================================================
63
64	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
65
66
67	// ====================================================================================================================
68	// QpParam constructor
69	// ====================================================================================================================
70
71	QpParam::QpParam(const Int qpy,
72	const ChannelType chType,
73	const Int qpBdOffset,
74	const Int chromaQPOffset,
75	const ChromaFormat chFmt )
76	{
77	Int baseQp;
78
79	if(isLuma(chType))
80	{
81	baseQp = qpy + qpBdOffset;
82	}
83	else
84	{
85	baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
86
87	if(baseQp < 0)
88	{
89	baseQp = baseQp + qpBdOffset;
90	}
91	else
92	{
93	baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
94	}
95	}
96
97	Qp =baseQp;
98	per=baseQp/6;
99	rem=baseQp%6;
100	}
101
102	QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
103	{
104	Int chromaQpOffset = 0;
105
106	if (isChroma(compID))
107	{
108	chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
109	chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
110
111	chromaQpOffset += cu.getSlice()->getPPS()->getChromaQpAdjTableAt(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
112	}
113
114	#if REPN_FORMAT_IN_VPS
115	TComSlice* slice = const_cast<TComSlice*> (cu.getSlice());
116	#endif
117
118	*this = QpParam(cu.getQP( 0 ),
119	toChannelType(compID),
120	#if REPN_FORMAT_IN_VPS
121	isLuma(compID) ? slice->getQpBDOffsetY() : slice->getQpBDOffsetC(),
122	#else
123	cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
124	#endif
125	chromaQpOffset,
126	cu.getPic()->getChromaFormat());
127	}
128
129
130	// ====================================================================================================================
131	// TComTrQuant class member functions
132	// ====================================================================================================================
133
134	TComTrQuant::TComTrQuant()
135	{
136	// allocate temporary buffers
137	m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
138
139	// allocate bit estimation class (for RDOQ)
140	m_pcEstBitsSbac = new estBitsSbacStruct;
141	initScalingList();
142	}
143
144	TComTrQuant::~TComTrQuant()
145	{
146	// delete temporary buffers
147	if ( m_plTempCoeff )
148	{
149	delete [] m_plTempCoeff;
150	m_plTempCoeff = NULL;
151	}
152
153	// delete bit estimation class
154	if ( m_pcEstBitsSbac )
155	{
156	delete m_pcEstBitsSbac;
157	}
158	destroyScalingList();
159	}
160
161	#if ADAPTIVE_QP_SELECTION
162	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
163	{
164	// NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
165
166	Int qpBase = pcSlice->getSliceQpBase();
167	Int sliceQpused = pcSlice->getSliceQp();
168	Int sliceQpnext;
169	Double alpha = qpBase < 17 ? 0.5 : 1;
170
171	Int cnt=0;
172	for(Int u=1; u<=LEVEL_RANGE; u++)
173	{
174	cnt += m_sliceNsamples[u] ;
175	}
176
177	if( !m_useRDOQ )
178	{
179	sliceQpused = qpBase;
180	alpha = 0.5;
181	}
182
183	if( cnt > 120 )
184	{
185	Double sum = 0;
186	Int k = 0;
187	for(Int u=1; u<LEVEL_RANGE; u++)
188	{
189	sum += u*m_sliceSumC[u];
190	k += uum_sliceNsamples[u];
191	}
192
193	Int v;
194	Double q[MAX_QP+1] ;
195	for(v=0; v<=MAX_QP; v++)
196	{
197	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
198	}
199
200	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
201
202	for(v=0; v<MAX_QP; v++)
203	{
204	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
205	{
206	break;
207	}
208	}
209	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
210	}
211	else
212	{
213	sliceQpnext = sliceQpused;
214	}
215
216	m_qpDelta[qpBase] = sliceQpnext - qpBase;
217	}
218
219	Void TComTrQuant::initSliceQpDelta()
220	{
221	for(Int qp=0; qp<=MAX_QP; qp++)
222	{
223	m_qpDelta[qp] = qp < 17 ? 0 : 1;
224	}
225	}
226
227	Void TComTrQuant::clearSliceARLCnt()
228	{
229	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
230	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
231	}
232	#endif
233
234
235
236	#if MATRIX_MULT
237	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
238	* \param block pointer to input data (residual)
239	* \param coeff pointer to output data (transform coefficients)
240	* \param uiStride stride of input data
241	* \param uiTrSize transform size (uiTrSize x uiTrSize)
242	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
243	*/
244	Void xTr(Int bitDepth, Pel block, TCoeff coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
245	{
246	UInt i,j,k;
247	TCoeff iSum;
248	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
249	const TMatrixCoeff *iT;
250	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
251
252	if (uiTrSize==4)
253	{
254	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
255	}
256	else if (uiTrSize==8)
257	{
258	iT = g_aiT8[TRANSFORM_FORWARD][0];
259	}
260	else if (uiTrSize==16)
261	{
262	iT = g_aiT16[TRANSFORM_FORWARD][0];
263	}
264	else if (uiTrSize==32)
265	{
266	iT = g_aiT32[TRANSFORM_FORWARD][0];
267	}
268	else
269	{
270	assert(0);
271	}
272
273	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
274
275	const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
276	const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
277	const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
278	const Int add_2nd = 1<<(shift_2nd-1);
279
280	/* Horizontal transform */
281
282	for (i=0; i<uiTrSize; i++)
283	{
284	for (j=0; j<uiTrSize; j++)
285	{
286	iSum = 0;
287	for (k=0; k<uiTrSize; k++)
288	{
289	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
290	}
291	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
292	}
293	}
294
295	/* Vertical transform */
296	for (i=0; i<uiTrSize; i++)
297	{
298	for (j=0; j<uiTrSize; j++)
299	{
300	iSum = 0;
301	for (k=0; k<uiTrSize; k++)
302	{
303	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
304	}
305	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
306	}
307	}
308	}
309
310	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
311	* \param coeff pointer to input data (transform coefficients)
312	* \param block pointer to output data (residual)
313	* \param uiStride stride of output data
314	* \param uiTrSize transform size (uiTrSize x uiTrSize)
315	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
316	*/
317	Void xITr(Int bitDepth, TCoeff coeff, Pel block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
318	{
319	UInt i,j,k;
320	TCoeff iSum;
321	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
322	const TMatrixCoeff *iT;
323
324	if (uiTrSize==4)
325	{
326	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
327	}
328	else if (uiTrSize==8)
329	{
330	iT = g_aiT8[TRANSFORM_INVERSE][0];
331	}
332	else if (uiTrSize==16)
333	{
334	iT = g_aiT16[TRANSFORM_INVERSE][0];
335	}
336	else if (uiTrSize==32)
337	{
338	iT = g_aiT32[TRANSFORM_INVERSE][0];
339	}
340	else
341	{
342	assert(0);
343	}
344
345	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
346
347	const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
348	const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
349	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
350	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
351	assert(shift_2nd>=0);
352	const Int add_1st = 1<<(shift_1st-1);
353	const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
354
355	/* Horizontal transform */
356	for (i=0; i<uiTrSize; i++)
357	{
358	for (j=0; j<uiTrSize; j++)
359	{
360	iSum = 0;
361	for (k=0; k<uiTrSize; k++)
362	{
363	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
364	}
365
366	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
367	tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
368	}
369	}
370
371	/* Vertical transform */
372	for (i=0; i<uiTrSize; i++)
373	{
374	for (j=0; j<uiTrSize; j++)
375	{
376	iSum = 0;
377	for (k=0; k<uiTrSize; k++)
378	{
379	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
380	}
381
382	block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
383	}
384	}
385	}
386
387	#endif //MATRIX_MULT
388
389
390	/** 4x4 forward transform implemented using partial butterfly structure (1D)
391	* \param src input data (residual)
392	* \param dst output data (transform coefficients)
393	* \param shift specifies right shift after 1D transform
394	*/
395	Void partialButterfly4(TCoeff src, TCoeff dst, Int shift, Int line)
396	{
397	Int j;
398	TCoeff E[2],O[2];
399	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
400
401	for (j=0; j<line; j++)
402	{
403	/* E and O */
404	E[0] = src[0] + src[3];
405	O[0] = src[0] - src[3];
406	E[1] = src[1] + src[2];
407	O[1] = src[1] - src[2];
408
409	dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]E[1] + add)>>shift;
410	dst[2line] = (g_aiT4[TRANSFORM_FORWARD][2][0]E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
411	dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]O[1] + add)>>shift;
412	dst[3line] = (g_aiT4[TRANSFORM_FORWARD][3][0]O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
413
414	src += 4;
415	dst ++;
416	}
417	}
418
419	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
420	// give identical results
421	Void fastForwardDst(TCoeff block, TCoeff coeff, Int shift) // input block, output coeff
422	{
423	Int i;
424	TCoeff c[4];
425	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
426	for (i=0; i<4; i++)
427	{
428	// Intermediate Variables
429	c[0] = block[4*i+0];
430	c[1] = block[4*i+1];
431	c[2] = block[4*i+2];
432	c[3] = block[4*i+3];
433
434	for (Int row = 0; row < 4; row++)
435	{
436	TCoeff result = 0;
437	for (Int column = 0; column < 4; column++)
438	result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
439
440	coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
441	}
442	}
443	}
444
445	Void fastInverseDst(TCoeff tmp, TCoeff block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block
446	{
447	Int i;
448	TCoeff c[4];
449	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
450	for (i=0; i<4; i++)
451	{
452	// Intermediate Variables
453	c[0] = tmp[ i];
454	c[1] = tmp[4 +i];
455	c[2] = tmp[8 +i];
456	c[3] = tmp[12+i];
457
458	for (Int column = 0; column < 4; column++)
459	{
460	TCoeff &result = block[(i * 4) + column];
461
462	result = 0;
463	for (Int row = 0; row < 4; row++)
464	result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
465
466	result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
467	}
468	}
469	}
470
471	/** 4x4 inverse transform implemented using partial butterfly structure (1D)
472	* \param src input data (transform coefficients)
473	* \param dst output data (residual)
474	* \param shift specifies right shift after 1D transform
475	*/
476	Void partialButterflyInverse4(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
477	{
478	Int j;
479	TCoeff E[2],O[2];
480	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
481
482	for (j=0; j<line; j++)
483	{
484	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
485	O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]src[3*line];
486	O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]src[3*line];
487	E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]src[2*line];
488	E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]src[2*line];
489
490	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
491	dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
492	dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
493	dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
494	dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
495
496	src ++;
497	dst += 4;
498	}
499	}
500
501	/** 8x8 forward transform implemented using partial butterfly structure (1D)
502	* \param src input data (residual)
503	* \param dst output data (transform coefficients)
504	* \param shift specifies right shift after 1D transform
505	*/
506	Void partialButterfly8(TCoeff src, TCoeff dst, Int shift, Int line)
507	{
508	Int j,k;
509	TCoeff E[4],O[4];
510	TCoeff EE[2],EO[2];
511	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
512
513	for (j=0; j<line; j++)
514	{
515	/* E and O*/
516	for (k=0;k<4;k++)
517	{
518	E[k] = src[k] + src[7-k];
519	O[k] = src[k] - src[7-k];
520	}
521	/* EE and EO */
522	EE[0] = E[0] + E[3];
523	EO[0] = E[0] - E[3];
524	EE[1] = E[1] + E[2];
525	EO[1] = E[1] - E[2];
526
527	dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]EE[1] + add)>>shift;
528	dst[4line] = (g_aiT8[TRANSFORM_FORWARD][4][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
529	dst[2line] = (g_aiT8[TRANSFORM_FORWARD][2][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
530	dst[6line] = (g_aiT8[TRANSFORM_FORWARD][6][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
531
532	dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]O[3] + add)>>shift;
533	dst[3line] = (g_aiT8[TRANSFORM_FORWARD][3][0]O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
534	dst[5line] = (g_aiT8[TRANSFORM_FORWARD][5][0]O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
535	dst[7line] = (g_aiT8[TRANSFORM_FORWARD][7][0]O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
536
537	src += 8;
538	dst ++;
539	}
540	}
541
542	/** 8x8 inverse transform implemented using partial butterfly structure (1D)
543	* \param src input data (transform coefficients)
544	* \param dst output data (residual)
545	* \param shift specifies right shift after 1D transform
546	*/
547	Void partialButterflyInverse8(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
548	{
549	Int j,k;
550	TCoeff E[4],O[4];
551	TCoeff EE[2],EO[2];
552	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
553
554	for (j=0; j<line; j++)
555	{
556	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
557	for (k=0;k<4;k++)
558	{
559	O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]src[3*line] +
560	g_aiT8[TRANSFORM_INVERSE][ 5][k]src[5line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]src[7line];
561	}
562
563	EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][0]src[ 6line ];
564	EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][1]src[ 6line ];
565	EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]src[ 4*line ];
566	EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]src[ 4*line ];
567
568	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
569	E[0] = EE[0] + EO[0];
570	E[3] = EE[0] - EO[0];
571	E[1] = EE[1] + EO[1];
572	E[2] = EE[1] - EO[1];
573	for (k=0;k<4;k++)
574	{
575	dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
576	dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
577	}
578	src ++;
579	dst += 8;
580	}
581	}
582
583	/** 16x16 forward transform implemented using partial butterfly structure (1D)
584	* \param src input data (residual)
585	* \param dst output data (transform coefficients)
586	* \param shift specifies right shift after 1D transform
587	*/
588	Void partialButterfly16(TCoeff src, TCoeff dst, Int shift, Int line)
589	{
590	Int j,k;
591	TCoeff E[8],O[8];
592	TCoeff EE[4],EO[4];
593	TCoeff EEE[2],EEO[2];
594	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
595
596	for (j=0; j<line; j++)
597	{
598	/* E and O*/
599	for (k=0;k<8;k++)
600	{
601	E[k] = src[k] + src[15-k];
602	O[k] = src[k] - src[15-k];
603	}
604	/* EE and EO */
605	for (k=0;k<4;k++)
606	{
607	EE[k] = E[k] + E[7-k];
608	EO[k] = E[k] - E[7-k];
609	}
610	/* EEE and EEO */
611	EEE[0] = EE[0] + EE[3];
612	EEO[0] = EE[0] - EE[3];
613	EEE[1] = EE[1] + EE[2];
614	EEO[1] = EE[1] - EE[2];
615
616	dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]EEE[1] + add)>>shift;
617	dst[ 8line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
618	dst[ 4line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
619	dst[ 12line] = (g_aiT16[TRANSFORM_FORWARD][12][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
620
621	for (k=2;k<16;k+=4)
622	{
623	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
624	g_aiT16[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]EO[3] + add)>>shift;
625	}
626
627	for (k=1;k<16;k+=2)
628	{
629	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
630	g_aiT16[TRANSFORM_FORWARD][k][2]O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]O[3] +
631	g_aiT16[TRANSFORM_FORWARD][k][4]O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]O[5] +
632	g_aiT16[TRANSFORM_FORWARD][k][6]O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]O[7] + add)>>shift;
633	}
634
635	src += 16;
636	dst ++;
637
638	}
639	}
640
641	/** 16x16 inverse transform implemented using partial butterfly structure (1D)
642	* \param src input data (transform coefficients)
643	* \param dst output data (residual)
644	* \param shift specifies right shift after 1D transform
645	*/
646	Void partialButterflyInverse16(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
647	{
648	Int j,k;
649	TCoeff E[8],O[8];
650	TCoeff EE[4],EO[4];
651	TCoeff EEE[2],EEO[2];
652	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
653
654	for (j=0; j<line; j++)
655	{
656	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
657	for (k=0;k<8;k++)
658	{
659	O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]src[ 3*line] +
660	g_aiT16[TRANSFORM_INVERSE][ 5][k]src[ 5line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]src[ 7line] +
661	g_aiT16[TRANSFORM_INVERSE][ 9][k]src[ 9line] + g_aiT16[TRANSFORM_INVERSE][11][k]src[11line] +
662	g_aiT16[TRANSFORM_INVERSE][13][k]src[13line] + g_aiT16[TRANSFORM_INVERSE][15][k]src[15line];
663	}
664	for (k=0;k<4;k++)
665	{
666	EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]src[ 2line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]src[ 6line] +
667	g_aiT16[TRANSFORM_INVERSE][10][k]src[10line] + g_aiT16[TRANSFORM_INVERSE][14][k]src[14line];
668	}
669	EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][0]src[ 12line ];
670	EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]src[ 8*line ];
671	EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][1]src[ 12line ];
672	EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]src[ 8*line ];
673
674	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
675	for (k=0;k<2;k++)
676	{
677	EE[k] = EEE[k] + EEO[k];
678	EE[k+2] = EEE[1-k] - EEO[1-k];
679	}
680	for (k=0;k<4;k++)
681	{
682	E[k] = EE[k] + EO[k];
683	E[k+4] = EE[3-k] - EO[3-k];
684	}
685	for (k=0;k<8;k++)
686	{
687	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
688	dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
689	}
690	src ++;
691	dst += 16;
692	}
693	}
694
695	/** 32x32 forward transform implemented using partial butterfly structure (1D)
696	* \param src input data (residual)
697	* \param dst output data (transform coefficients)
698	* \param shift specifies right shift after 1D transform
699	*/
700	Void partialButterfly32(TCoeff src, TCoeff dst, Int shift, Int line)
701	{
702	Int j,k;
703	TCoeff E[16],O[16];
704	TCoeff EE[8],EO[8];
705	TCoeff EEE[4],EEO[4];
706	TCoeff EEEE[2],EEEO[2];
707	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
708
709	for (j=0; j<line; j++)
710	{
711	/* E and O*/
712	for (k=0;k<16;k++)
713	{
714	E[k] = src[k] + src[31-k];
715	O[k] = src[k] - src[31-k];
716	}
717	/* EE and EO */
718	for (k=0;k<8;k++)
719	{
720	EE[k] = E[k] + E[15-k];
721	EO[k] = E[k] - E[15-k];
722	}
723	/* EEE and EEO */
724	for (k=0;k<4;k++)
725	{
726	EEE[k] = EE[k] + EE[7-k];
727	EEO[k] = EE[k] - EE[7-k];
728	}
729	/* EEEE and EEEO */
730	EEEE[0] = EEE[0] + EEE[3];
731	EEEO[0] = EEE[0] - EEE[3];
732	EEEE[1] = EEE[1] + EEE[2];
733	EEEO[1] = EEE[1] - EEE[2];
734
735	dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]EEEE[1] + add)>>shift;
736	dst[ 16line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
737	dst[ 8line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
738	dst[ 24line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
739	for (k=4;k<32;k+=8)
740	{
741	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
742	g_aiT32[TRANSFORM_FORWARD][k][2]EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EEO[3] + add)>>shift;
743	}
744	for (k=2;k<32;k+=4)
745	{
746	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
747	g_aiT32[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EO[3] +
748	g_aiT32[TRANSFORM_FORWARD][k][4]EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]EO[5] +
749	g_aiT32[TRANSFORM_FORWARD][k][6]EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]EO[7] + add)>>shift;
750	}
751	for (k=1;k<32;k+=2)
752	{
753	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
754	g_aiT32[TRANSFORM_FORWARD][k][ 2]O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]O[ 3] +
755	g_aiT32[TRANSFORM_FORWARD][k][ 4]O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]O[ 5] +
756	g_aiT32[TRANSFORM_FORWARD][k][ 6]O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]O[ 7] +
757	g_aiT32[TRANSFORM_FORWARD][k][ 8]O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]O[ 9] +
758	g_aiT32[TRANSFORM_FORWARD][k][10]O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]O[11] +
759	g_aiT32[TRANSFORM_FORWARD][k][12]O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]O[13] +
760	g_aiT32[TRANSFORM_FORWARD][k][14]O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]O[15] + add)>>shift;
761	}
762
763	src += 32;
764	dst ++;
765	}
766	}
767
768	/** 32x32 inverse transform implemented using partial butterfly structure (1D)
769	* \param src input data (transform coefficients)
770	* \param dst output data (residual)
771	* \param shift specifies right shift after 1D transform
772	*/
773	Void partialButterflyInverse32(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
774	{
775	Int j,k;
776	TCoeff E[16],O[16];
777	TCoeff EE[8],EO[8];
778	TCoeff EEE[4],EEO[4];
779	TCoeff EEEE[2],EEEO[2];
780	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
781
782	for (j=0; j<line; j++)
783	{
784	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
785	for (k=0;k<16;k++)
786	{
787	O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]src[ 3*line ] +
788	g_aiT32[TRANSFORM_INVERSE][ 5][k]src[ 5line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]src[ 7line ] +
789	g_aiT32[TRANSFORM_INVERSE][ 9][k]src[ 9line ] + g_aiT32[TRANSFORM_INVERSE][11][k]src[ 11line ] +
790	g_aiT32[TRANSFORM_INVERSE][13][k]src[ 13line ] + g_aiT32[TRANSFORM_INVERSE][15][k]src[ 15line ] +
791	g_aiT32[TRANSFORM_INVERSE][17][k]src[ 17line ] + g_aiT32[TRANSFORM_INVERSE][19][k]src[ 19line ] +
792	g_aiT32[TRANSFORM_INVERSE][21][k]src[ 21line ] + g_aiT32[TRANSFORM_INVERSE][23][k]src[ 23line ] +
793	g_aiT32[TRANSFORM_INVERSE][25][k]src[ 25line ] + g_aiT32[TRANSFORM_INVERSE][27][k]src[ 27line ] +
794	g_aiT32[TRANSFORM_INVERSE][29][k]src[ 29line ] + g_aiT32[TRANSFORM_INVERSE][31][k]src[ 31line ];
795	}
796	for (k=0;k<8;k++)
797	{
798	EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]src[ 2line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]src[ 6line ] +
799	g_aiT32[TRANSFORM_INVERSE][10][k]src[ 10line ] + g_aiT32[TRANSFORM_INVERSE][14][k]src[ 14line ] +
800	g_aiT32[TRANSFORM_INVERSE][18][k]src[ 18line ] + g_aiT32[TRANSFORM_INVERSE][22][k]src[ 22line ] +
801	g_aiT32[TRANSFORM_INVERSE][26][k]src[ 26line ] + g_aiT32[TRANSFORM_INVERSE][30][k]src[ 30line ];
802	}
803	for (k=0;k<4;k++)
804	{
805	EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]src[ 4line ] + g_aiT32[TRANSFORM_INVERSE][12][k]src[ 12line ] +
806	g_aiT32[TRANSFORM_INVERSE][20][k]src[ 20line ] + g_aiT32[TRANSFORM_INVERSE][28][k]src[ 28line ];
807	}
808	EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][0]src[ 24line ];
809	EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][1]src[ 24line ];
810	EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]src[ 16*line ];
811	EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]src[ 16*line ];
812
813	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
814	EEE[0] = EEEE[0] + EEEO[0];
815	EEE[3] = EEEE[0] - EEEO[0];
816	EEE[1] = EEEE[1] + EEEO[1];
817	EEE[2] = EEEE[1] - EEEO[1];
818	for (k=0;k<4;k++)
819	{
820	EE[k] = EEE[k] + EEO[k];
821	EE[k+4] = EEE[3-k] - EEO[3-k];
822	}
823	for (k=0;k<8;k++)
824	{
825	E[k] = EE[k] + EO[k];
826	E[k+8] = EE[7-k] - EO[7-k];
827	}
828	for (k=0;k<16;k++)
829	{
830	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
831	dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
832	}
833	src ++;
834	dst += 32;
835	}
836	}
837
838	/** MxN forward transform (2D)
839	* \param block input data (residual)
840	* \param coeff output data (transform coefficients)
841	* \param iWidth input data (width of transform)
842	* \param iHeight input data (height of transform)
843	*/
844	Void xTrMxN(Int bitDepth, TCoeff block, TCoeff coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
845	{
846	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
847
848	const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
849	const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
850
851	assert(shift_1st >= 0);
852	assert(shift_2nd >= 0);
853
854	TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
855
856	switch (iWidth)
857	{
858	case 4:
859	{
860	if ((iHeight == 4) && useDST) // Check for DCT or DST
861	{
862	fastForwardDst( block, tmp, shift_1st );
863	}
864	else partialButterfly4 ( block, tmp, shift_1st, iHeight );
865	}
866	break;
867
868	case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break;
869	case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break;
870	case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break;
871	default:
872	assert(0); exit (1); break;
873	}
874
875	switch (iHeight)
876	{
877	case 4:
878	{
879	if ((iWidth == 4) && useDST) // Check for DCT or DST
880	{
881	fastForwardDst( tmp, coeff, shift_2nd );
882	}
883	else partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
884	}
885	break;
886
887	case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break;
888	case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break;
889	case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break;
890	default:
891	assert(0); exit (1); break;
892	}
893	}
894
895
896	/** MxN inverse transform (2D)
897	* \param coeff input data (transform coefficients)
898	* \param block output data (residual)
899	* \param iWidth input data (width of transform)
900	* \param iHeight input data (height of transform)
901	*/
902	Void xITrMxN(Int bitDepth, TCoeff coeff, TCoeff block, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
903	{
904	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
905
906	Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
907	Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
908	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
909	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
910
911	assert(shift_1st >= 0);
912	assert(shift_2nd >= 0);
913
914	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
915
916	switch (iHeight)
917	{
918	case 4:
919	{
920	if ((iWidth == 4) && useDST) // Check for DCT or DST
921	{
922	fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
923	}
924	else partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
925	}
926	break;
927
928	case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
929	case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
930	case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
931
932	default:
933	assert(0); exit (1); break;
934	}
935
936	switch (iWidth)
937	{
938	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
939	case 4:
940	{
941	if ((iHeight == 4) && useDST) // Check for DCT or DST
942	{
943	fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
944	}
945	else partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
946	}
947	break;
948
949	case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
950	case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
951	case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
952
953	default:
954	assert(0); exit (1); break;
955	}
956	}
957
958
959	// To minimize the distortion only. No rate is considered.
960	Void TComTrQuant::signBitHidingHDQ( const ComponentID compID, TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters )
961	{
962	const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
963	const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
964	const UInt groupSize = 1 << MLS_CG_SIZE;
965
966	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
967	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
968
969	Int lastCG = -1;
970	Int absSum = 0 ;
971	Int n ;
972
973	for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
974	{
975	Int subPos = subSet << MLS_CG_SIZE;
976	Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
977	absSum = 0 ;
978
979	for(n = groupSize-1; n >= 0; --n )
980	{
981	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
982	{
983	lastNZPosInCG = n;
984	break;
985	}
986	}
987
988	for(n = 0; n <groupSize; n++ )
989	{
990	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
991	{
992	firstNZPosInCG = n;
993	break;
994	}
995	}
996
997	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
998	{
999	absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
1000	}
1001
1002	if(lastNZPosInCG>=0 && lastCG==-1)
1003	{
1004	lastCG = 1 ;
1005	}
1006
1007	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1008	{
1009	UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
1010	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1011	{
1012	TCoeff curCost = std::numeric_limits<TCoeff>::max();
1013	TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
1014	Int minPos =-1, finalChange=0, curChange=0;
1015
1016	for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
1017	{
1018	UInt blkPos = codingParameters.scan[ n+subPos ];
1019	if(pQCoef[ blkPos ] != 0 )
1020	{
1021	if(deltaU[blkPos]>0)
1022	{
1023	curCost = - deltaU[blkPos];
1024	curChange=1 ;
1025	}
1026	else
1027	{
1028	//curChange =-1;
1029	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1030	{
1031	curCost = std::numeric_limits<TCoeff>::max();
1032	}
1033	else
1034	{
1035	curCost = deltaU[blkPos];
1036	curChange =-1;
1037	}
1038	}
1039	}
1040	else
1041	{
1042	if(n<firstNZPosInCG)
1043	{
1044	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1045	if(thisSignBit != signbit )
1046	{
1047	curCost = std::numeric_limits<TCoeff>::max();
1048	}
1049	else
1050	{
1051	curCost = - (deltaU[blkPos]) ;
1052	curChange = 1 ;
1053	}
1054	}
1055	else
1056	{
1057	curCost = - (deltaU[blkPos]) ;
1058	curChange = 1 ;
1059	}
1060	}
1061
1062	if( curCost<minCostInc)
1063	{
1064	minCostInc = curCost ;
1065	finalChange = curChange ;
1066	minPos = blkPos ;
1067	}
1068	} //CG loop
1069
1070	if(pQCoef[minPos] == entropyCodingMaximum \|\| pQCoef[minPos] == entropyCodingMinimum)
1071	{
1072	finalChange = -1;
1073	}
1074
1075	if(pCoef[minPos]>=0)
1076	{
1077	pQCoef[minPos] += finalChange ;
1078	}
1079	else
1080	{
1081	pQCoef[minPos] -= finalChange ;
1082	}
1083	} // Hide
1084	}
1085	if(lastCG==1)
1086	{
1087	lastCG=0 ;
1088	}
1089	} // TU loop
1090
1091	return;
1092	}
1093
1094
1095	Void TComTrQuant::xQuant( TComTU &rTu,
1096	TCoeff * pSrc,
1097	TCoeff * pDes,
1098	#if ADAPTIVE_QP_SELECTION
1099	TCoeff *pArlDes,
1100	#endif
1101	TCoeff &uiAbsSum,
1102	const ComponentID compID,
1103	const QpParam &cQP )
1104	{
1105	const TComRectangle &rect = rTu.getRect(compID);
1106	const UInt uiWidth = rect.width;
1107	const UInt uiHeight = rect.height;
1108	TComDataCU* pcCU = rTu.getCU();
1109	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1110
1111	TCoeff* piCoef = pSrc;
1112	TCoeff* piQCoef = pDes;
1113	#if ADAPTIVE_QP_SELECTION
1114	TCoeff* piArlCCoef = pArlDes;
1115	#endif
1116
1117	const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1118
1119	Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
1120	if ( useRDOQ && (isLuma(compID) \|\| RDOQ_CHROMA) )
1121	{
1122	#if ADAPTIVE_QP_SELECTION
1123	xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
1124	#else
1125	xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
1126	#endif
1127	}
1128	else
1129	{
1130	TUEntropyCodingParameters codingParameters;
1131	getTUEntropyCodingParameters(codingParameters, rTu, compID);
1132
1133	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1134	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1135
1136	TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
1137
1138	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1139
1140	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1141	assert(scalingListType < SCALING_LIST_NUM);
1142	Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1143
1144	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1145	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1146
1147	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1148	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1149	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1150	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1151	*/
1152
1153	// Represents scaling through forward transform
1154	Int iTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1155	if (useTransformSkip && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
1156	{
1157	iTransformShift = std::max<Int>(0, iTransformShift);
1158	}
1159
1160	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1161	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1162
1163	#if ADAPTIVE_QP_SELECTION
1164	Int iQBitsC = MAX_INT;
1165	Int iAddC = MAX_INT;
1166
1167	if (m_bUseAdaptQpSelect)
1168	{
1169	iQBitsC = iQBits - ARL_C_PRECISION;
1170	iAddC = 1 << (iQBitsC-1);
1171	}
1172	#endif
1173
1174	const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1175	const Int qBits8 = iQBits - 8;
1176
1177	for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1178	{
1179	const TCoeff iLevel = piCoef[uiBlockPos];
1180	const TCoeff iSign = (iLevel < 0 ? -1: 1);
1181
1182	const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1183
1184	#if ADAPTIVE_QP_SELECTION
1185	if( m_bUseAdaptQpSelect )
1186	{
1187	piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
1188	}
1189	#endif
1190
1191	const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1192	deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
1193
1194	uiAbsSum += quantisedMagnitude;
1195	const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
1196
1197	piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
1198	} // for n
1199
1200	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1201	{
1202	if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
1203	{
1204	signBitHidingHDQ( compID, piQCoef, piCoef, deltaU, codingParameters ) ;
1205	}
1206	}
1207	} //if RDOQ
1208	//return;
1209	}
1210
1211	Void TComTrQuant::xDeQuant( TComTU &rTu,
1212	const TCoeff * pSrc,
1213	TCoeff * pDes,
1214	const ComponentID compID,
1215	const QpParam &cQP )
1216	{
1217	assert(compID<MAX_NUM_COMPONENT);
1218
1219	TComDataCU *pcCU = rTu.getCU();
1220	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1221	const TComRectangle &rect = rTu.getRect(compID);
1222	const UInt uiWidth = rect.width;
1223	const UInt uiHeight = rect.height;
1224	const TCoeff *const piQCoef = pSrc;
1225	TCoeff *const piCoef = pDes;
1226	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1227	const UInt numSamplesInBlock = uiWidth*uiHeight;
1228	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1229	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1230	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1231	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1232
1233	assert (scalingListType < SCALING_LIST_NUM);
1234	assert ( uiWidth <= m_uiMaxTrSize );
1235
1236	// Represents scaling through forward transform
1237	const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1238	const Int originalTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1239	const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
1240
1241	const Int QP_per = cQP.per;
1242	const Int QP_rem = cQP.rem;
1243
1244	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
1245
1246	if(enableScalingLists)
1247	{
1248	//from the dequantisation equation:
1249	//iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
1250	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift
1251	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
1252	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
1253
1254	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1255	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1256
1257	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
1258
1259	if(rightShift > 0)
1260	{
1261	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1262
1263	for( Int n = 0; n < numSamplesInBlock; n++ )
1264	{
1265	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1266	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
1267
1268	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1269	}
1270	}
1271	else
1272	{
1273	const Int leftShift = -rightShift;
1274
1275	for( Int n = 0; n < numSamplesInBlock; n++ )
1276	{
1277	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1278	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
1279
1280	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1281	}
1282	}
1283	}
1284	else
1285	{
1286	const Int scale = g_invQuantScales[QP_rem];
1287	const Int scaleBits = (IQUANT_SHIFT + 1) ;
1288
1289	//from the dequantisation equation:
1290	//iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
1291	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift
1292	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
1293	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1294	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1295
1296	if (rightShift > 0)
1297	{
1298	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1299
1300	for( Int n = 0; n < numSamplesInBlock; n++ )
1301	{
1302	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1303	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
1304
1305	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1306	}
1307	}
1308	else
1309	{
1310	const Int leftShift = -rightShift;
1311
1312	for( Int n = 0; n < numSamplesInBlock; n++ )
1313	{
1314	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1315	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
1316
1317	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1318	}
1319	}
1320	}
1321	}
1322
1323
1324	Void TComTrQuant::init( UInt uiMaxTrSize,
1325	Bool bUseRDOQ,
1326	Bool bUseRDOQTS,
1327	Bool bEnc,
1328	Bool useTransformSkipFast
1329	#if ADAPTIVE_QP_SELECTION
1330	, Bool bUseAdaptQpSelect
1331	#endif
1332	)
1333	{
1334	m_uiMaxTrSize = uiMaxTrSize;
1335	m_bEnc = bEnc;
1336	m_useRDOQ = bUseRDOQ;
1337	m_useRDOQTS = bUseRDOQTS;
1338	#if ADAPTIVE_QP_SELECTION
1339	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1340	#endif
1341	m_useTransformSkipFast = useTransformSkipFast;
1342	}
1343
1344
1345	Void TComTrQuant::transformNxN( TComTU & rTu,
1346	const ComponentID compID,
1347	Pel * pcResidual,
1348	const UInt uiStride,
1349	TCoeff * rpcCoeff,
1350	#if ADAPTIVE_QP_SELECTION
1351	TCoeff * pcArlCoeff,
1352	#endif
1353	TCoeff & uiAbsSum,
1354	const QpParam & cQP
1355	)
1356	{
1357	const TComRectangle &rect = rTu.getRect(compID);
1358	const UInt uiWidth = rect.width;
1359	const UInt uiHeight = rect.height;
1360	TComDataCU* pcCU = rTu.getCU();
1361	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1362	const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
1363
1364	uiAbsSum=0;
1365
1366	RDPCMMode rdpcmMode = RDPCM_OFF;
1367	rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
1368
1369	if (rdpcmMode == RDPCM_OFF)
1370	{
1371	uiAbsSum = 0;
1372	//transform and quantise
1373	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1374	{
1375	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1376	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1377
1378	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1379	{
1380	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1381	{
1382	const Pel currentSample = pcResidual[(y * uiStride) + x];
1383
1384	rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
1385	uiAbsSum += TCoeff(abs(currentSample));
1386	}
1387	}
1388	}
1389	else
1390	{
1391	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1392	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
1393	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1394	#endif
1395
1396	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1397
1398	if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
1399	{
1400	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
1401	}
1402	else
1403	{
1404	xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1405	}
1406
1407	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1408	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
1409	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1410	#endif
1411
1412	xQuant( rTu, m_plTempCoeff, rpcCoeff,
1413
1414	#if ADAPTIVE_QP_SELECTION
1415	pcArlCoeff,
1416	#endif
1417	uiAbsSum, compID, cQP );
1418
1419	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1420	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
1421	printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
1422	#endif
1423	}
1424	}
1425
1426	//set the CBF
1427	pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1428	}
1429
1430
1431	Void TComTrQuant::invTransformNxN( TComTU &rTu,
1432	const ComponentID compID,
1433	Pel *pcResidual,
1434	const UInt uiStride,
1435	TCoeff * pcCoeff,
1436	const QpParam &cQP
1437	DEBUG_STRING_FN_DECLAREP(psDebug))
1438	{
1439	TComDataCU* pcCU=rTu.getCU();
1440	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1441	const TComRectangle &rect = rTu.getRect(compID);
1442	const UInt uiWidth = rect.width;
1443	const UInt uiHeight = rect.height;
1444
1445	if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
1446	{
1447	//------------------------------------------------
1448
1449	//recurse deeper
1450
1451	TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
1452
1453	do
1454	{
1455	//------------------
1456
1457	const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
1458
1459	Pel subTUResidual = pcResidual + (lineOffset uiStride);
1460	TCoeff subTUCoefficients = pcCoeff + (lineOffset subTURecurse.getRect(compID).width);
1461
1462	invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
1463
1464	//------------------
1465
1466	}
1467	while (subTURecurse.nextSection(rTu));
1468
1469	//------------------------------------------------
1470
1471	return;
1472	}
1473
1474	#if defined DEBUG_STRING
1475	if (psDebug)
1476	{
1477	std::stringstream ss(stringstream::out);
1478	printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
1479	DEBUG_STRING_APPEND((*psDebug), ss.str())
1480	}
1481	#endif
1482
1483	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1484	{
1485	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1486	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1487
1488	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1489	{
1490	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1491	{
1492	pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
1493	}
1494	}
1495	}
1496	else
1497	{
1498	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1499	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
1500	printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
1501	#endif
1502
1503	xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
1504
1505	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1506	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
1507	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1508	#endif
1509
1510	#if defined DEBUG_STRING
1511	if (psDebug)
1512	{
1513	std::stringstream ss(stringstream::out);
1514	printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1515	(*psDebug)+=ss.str();
1516	}
1517	#endif
1518
1519	if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
1520	{
1521	xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
1522
1523	#if defined DEBUG_STRING
1524	if (psDebug)
1525	{
1526	std::stringstream ss(stringstream::out);
1527	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1528	(*psDebug)+=ss.str();
1529	(*psDebug)+="(<- was a Transform-skipped block)\n";
1530	}
1531	#endif
1532	}
1533	else
1534	{
1535	xIT( compID, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight );
1536
1537	#if defined DEBUG_STRING
1538	if (psDebug)
1539	{
1540	std::stringstream ss(stringstream::out);
1541	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1542	(*psDebug)+=ss.str();
1543	(*psDebug)+="(<- was a Transformed block)\n";
1544	}
1545	#endif
1546	}
1547
1548	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1549	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
1550	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1551	g_debugCounter++;
1552	#endif
1553	}
1554
1555	invRdpcmNxN( rTu, compID, pcResidual, uiStride );
1556	}
1557
1558	Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
1559	TComYuv *pResidual,
1560	TComTU &rTu)
1561	{
1562	if (!rTu.ProcessComponentSection(compID)) return;
1563
1564	TComDataCU* pcCU = rTu.getCU();
1565	UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
1566	UInt uiTrMode=rTu.GetTransformDepthRel();
1567	if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) \|\| !pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) )
1568	{
1569	return;
1570	}
1571
1572	if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
1573	{
1574	const TComRectangle &tuRect = rTu.getRect(compID);
1575	const Int uiStride = pResidual->getStride( compID );
1576	Pel *rpcResidual = pResidual->getAddr( compID );
1577	UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0);
1578	Pel *pResi = rpcResidual + uiAddr;
1579	TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
1580
1581	const QpParam cQP(*pcCU, compID);
1582
1583	if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
1584	{
1585	DEBUG_STRING_NEW(sTemp)
1586	#ifdef DEBUG_STRING
1587	std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
1588	#endif
1589
1590	invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
1591
1592	#ifdef DEBUG_STRING
1593	if (psDebug != 0)
1594	std::cout << (*psDebug);
1595	#endif
1596	}
1597
1598	if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
1599	{
1600	const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
1601	const Int strideLuma = pResidual->getStride( COMPONENT_Y );
1602	const Int tuWidth = rTu.getRect( compID ).width;
1603	const Int tuHeight = rTu.getRect( compID ).height;
1604
1605	if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
1606	{
1607	pResi = rpcResidual + uiAddr;
1608	const Pel *pResiLuma = piResiLuma + uiAddr;
1609
1610	crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
1611	}
1612	}
1613	}
1614	else
1615	{
1616	TComTURecurse tuRecurseChild(rTu, false);
1617	do
1618	{
1619	invRecurTransformNxN( compID, pResidual, tuRecurseChild );
1620	}
1621	while (tuRecurseChild.nextSection(rTu));
1622	}
1623	}
1624
1625	Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
1626	{
1627	TComDataCU *pcCU=rTu.getCU();
1628	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1629
1630	const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx );
1631	const UInt uiWidth = rTu.getRect(compID).width;
1632	const UInt uiHeight = rTu.getRect(compID).height;
1633	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1634	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1635
1636	Pel reconstructedResi[MAX_TU_SIZE * MAX_TU_SIZE];
1637
1638	UInt uiX = 0;
1639	UInt uiY = 0;
1640
1641	UInt &majorAxis = (mode == RDPCM_HOR) ? uiX : uiY;
1642	UInt &minorAxis = (mode == RDPCM_HOR) ? uiY : uiX;
1643	const UInt majorAxisLimit = (mode == RDPCM_HOR) ? uiWidth : uiHeight;
1644	const UInt minorAxisLimit = (mode == RDPCM_HOR) ? uiHeight : uiWidth;
1645	const UInt referenceSampleOffset = (mode == RDPCM_HOR) ? 1 : uiWidth;
1646
1647	const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
1648
1649	uiAbsSum = 0;
1650
1651	for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
1652	{
1653	for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
1654	{
1655	const UInt sampleIndex = (uiY * uiWidth) + uiX;
1656	const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
1657	const Pel currentSample = pcResidual[(uiY * uiStride) + uiX];
1658	const Pel referenceSample = ((mode != RDPCM_OFF) && (majorAxis > 0)) ? reconstructedResi[sampleIndex - referenceSampleOffset] : 0;
1659
1660	const Pel encoderSideDelta = currentSample - referenceSample;
1661
1662	Pel reconstructedDelta;
1663	if ( bLossless )
1664	{
1665	pcCoeff[coefficientIndex] = encoderSideDelta;
1666	reconstructedDelta = encoderSideDelta;
1667	}
1668	else
1669	{
1670	transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
1671	invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
1672	}
1673
1674	uiAbsSum += abs(pcCoeff[coefficientIndex]);
1675
1676	reconstructedResi[sampleIndex] = reconstructedDelta + referenceSample;
1677	}
1678	}
1679	}
1680
1681	Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
1682	{
1683	TComDataCU *pcCU=rTu.getCU();
1684	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1685
1686	if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) \|\| ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1687	{
1688	rdpcmMode = RDPCM_OFF;
1689	}
1690	else if ( pcCU->isIntra( uiAbsPartIdx ) )
1691	{
1692	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
1693	const ChannelType chType = toChannelType(compID);
1694	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1695	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1696	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1697
1698	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1699	{
1700	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1701	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
1702	}
1703	else rdpcmMode = RDPCM_OFF;
1704	}
1705	else // not intra, need to select the best mode
1706	{
1707	const UInt uiWidth = rTu.getRect(compID).width;
1708	const UInt uiHeight = rTu.getRect(compID).height;
1709
1710	RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES;
1711	TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max();
1712	TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
1713
1714	for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
1715	{
1716	const RDPCMMode mode = RDPCMMode(modeIndex);
1717
1718	TCoeff currAbsSum = 0;
1719
1720	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
1721
1722	if (currAbsSum < bestAbsSum)
1723	{
1724	bestMode = mode;
1725	bestAbsSum = currAbsSum;
1726	if (mode != RDPCM_OFF)
1727	{
1728	memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
1729	}
1730	}
1731	}
1732
1733	rdpcmMode = bestMode;
1734	uiAbsSum = bestAbsSum;
1735
1736	if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
1737	{
1738	memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
1739	}
1740	}
1741
1742	pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1743	}
1744
1745	Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
1746	{
1747	TComDataCU *pcCU=rTu.getCU();
1748	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1749
1750	if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) \|\| pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1751	{
1752	const UInt uiWidth = rTu.getRect(compID).width;
1753	const UInt uiHeight = rTu.getRect(compID).height;
1754
1755	RDPCMMode rdpcmMode = RDPCM_OFF;
1756
1757	if ( pcCU->isIntra( uiAbsPartIdx ) )
1758	{
1759	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
1760	const ChannelType chType = toChannelType(compID);
1761	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1762	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1763	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1764
1765	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1766	{
1767	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1768	}
1769	}
1770	else // not intra case
1771	{
1772	rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
1773	}
1774
1775	if (rdpcmMode == RDPCM_VER)
1776	{
1777	pcResidual += uiStride; //start from row 1
1778
1779	for( UInt uiY = 1; uiY < uiHeight; uiY++ )
1780	{
1781	for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1782	{
1783	pcResidual[ uiX ] = pcResidual[ uiX ] + pcResidual [ (Int)uiX - (Int)uiStride ];
1784	}
1785	pcResidual += uiStride;
1786	}
1787	}
1788	else if (rdpcmMode == RDPCM_HOR)
1789	{
1790	for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1791	{
1792	for( UInt uiX = 1; uiX < uiWidth; uiX++ )
1793	{
1794	pcResidual[ uiX ] = pcResidual[ uiX ] + pcResidual [ (Int)uiX-1 ];
1795	}
1796	pcResidual += uiStride;
1797	}
1798	}
1799	}
1800	}
1801
1802	// ------------------------------------------------------------------------------------------------
1803	// Logical transform
1804	// ------------------------------------------------------------------------------------------------
1805
1806	/** Wrapper function between HM interface and core NxN forward transform (2D)
1807	* \param piBlkResi input data (residual)
1808	* \param psCoeff output data (transform coefficients)
1809	* \param uiStride stride of input residual data
1810	* \param iSize transform size (iSize x iSize)
1811	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1812	*/
1813	Void TComTrQuant::xT( const ComponentID compID, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight )
1814	{
1815	#if MATRIX_MULT
1816	if( iWidth == iHeight)
1817	{
1818	xTr(g_bitDepth[toChannelType(compID)], piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1819	return;
1820	}
1821	#endif
1822
1823	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1824	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1825
1826	for (Int y = 0; y < iHeight; y++)
1827	for (Int x = 0; x < iWidth; x++)
1828	{
1829	block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
1830	}
1831
1832	xTrMxN( g_bitDepth[toChannelType(compID)], block, coeff, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1833
1834	memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
1835	}
1836
1837	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1838	* \param plCoef input data (transform coefficients)
1839	* \param pResidual output data (residual)
1840	* \param uiStride stride of input residual data
1841	* \param iSize transform size (iSize x iSize)
1842	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1843	*/
1844	Void TComTrQuant::xIT( const ComponentID compID, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1845	{
1846	#if MATRIX_MULT
1847	if( iWidth == iHeight )
1848	{
1849	#if O0043_BEST_EFFORT_DECODING
1850	xITr(g_bitDepthInStream[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1851	#else
1852	xITr(g_bitDepth[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1853	#endif
1854	return;
1855	}
1856	#endif
1857
1858	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1859	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1860
1861	memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
1862
1863	#if O0043_BEST_EFFORT_DECODING
1864	xITrMxN( g_bitDepthInStream[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1865	#else
1866	xITrMxN( g_bitDepth[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1867	#endif
1868
1869	for (Int y = 0; y < iHeight; y++)
1870	for (Int x = 0; x < iWidth; x++)
1871	{
1872	pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
1873	}
1874	}
1875
1876	/** Wrapper function between HM interface and core 4x4 transform skipping
1877	* \param piBlkResi input data (residual)
1878	* \param psCoeff output data (transform coefficients)
1879	* \param uiStride stride of input residual data
1880	* \param iSize transform size (iSize x iSize)
1881	*/
1882	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
1883	{
1884	const TComRectangle &rect = rTu.getRect(component);
1885	const Int width = rect.width;
1886	const Int height = rect.height;
1887
1888	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1889	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1890	{
1891	iTransformShift = std::max<Int>(0, iTransformShift);
1892	}
1893
1894	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1895	const UInt uiSizeMinus1 = (width * height) - 1;
1896
1897	if (iTransformShift >= 0)
1898	{
1899	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1900	{
1901	for (UInt x = 0; x < width; x++, coefficientIndex++)
1902	{
1903	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
1904	}
1905	}
1906	}
1907	else //for very high bit depths
1908	{
1909	iTransformShift = -iTransformShift;
1910	const TCoeff offset = 1 << (iTransformShift - 1);
1911
1912	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1913	{
1914	for (UInt x = 0; x < width; x++, coefficientIndex++)
1915	{
1916	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
1917	}
1918	}
1919	}
1920	}
1921
1922	/** Wrapper function between HM interface and core NxN transform skipping
1923	* \param plCoef input data (coefficients)
1924	* \param pResidual output data (residual)
1925	* \param uiStride stride of input residual data
1926	* \param iSize transform size (iSize x iSize)
1927	*/
1928	Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
1929	{
1930	const TComRectangle &rect = rTu.getRect(component);
1931	const Int width = rect.width;
1932	const Int height = rect.height;
1933
1934	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1935	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1936	{
1937	iTransformShift = std::max<Int>(0, iTransformShift);
1938	}
1939
1940	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1941	const UInt uiSizeMinus1 = (width * height) - 1;
1942
1943	if (iTransformShift >= 0)
1944	{
1945	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
1946
1947	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1948	{
1949	for (UInt x = 0; x < width; x++, coefficientIndex++)
1950	{
1951	pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
1952	}
1953	}
1954	}
1955	else //for very high bit depths
1956	{
1957	iTransformShift = -iTransformShift;
1958
1959	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1960	{
1961	for (UInt x = 0; x < width; x++, coefficientIndex++)
1962	{
1963	pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
1964	}
1965	}
1966	}
1967	}
1968
1969	/** RDOQ with CABAC
1970	* \param pcCU pointer to coding unit structure
1971	* \param plSrcCoeff pointer to input buffer
1972	* \param piDstCoeff reference to pointer to output buffer
1973	* \param uiWidth block width
1974	* \param uiHeight block height
1975	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1976	* \param eTType plane type / luminance or chrominance
1977	* \param uiAbsPartIdx absolute partition index
1978	* \returns Void
1979	* Rate distortion optimized quantization for entropy
1980	* coding engines using probability models like CABAC
1981	*/
1982	Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
1983	TCoeff * plSrcCoeff,
1984	TCoeff * piDstCoeff,
1985	#if ADAPTIVE_QP_SELECTION
1986	TCoeff * piArlDstCoeff,
1987	#endif
1988	TCoeff &uiAbsSum,
1989	const ComponentID compID,
1990	const QpParam &cQP )
1991	{
1992	const TComRectangle & rect = rTu.getRect(compID);
1993	const UInt uiWidth = rect.width;
1994	const UInt uiHeight = rect.height;
1995	TComDataCU * pcCU = rTu.getCU();
1996	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1997	const ChannelType channelType = toChannelType(compID);
1998	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1999
2000	const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
2001
2002	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
2003	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
2004	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
2005	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
2006	*/
2007
2008	// Represents scaling through forward transform
2009	Int iTransformShift = getTransformShift(channelType, uiLog2TrSize);
2010	if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
2011	{
2012	iTransformShift = std::max<Int>(0, iTransformShift);
2013	}
2014
2015	const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getUseGolombRiceParameterAdaptation();
2016	const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
2017	UInt uiGoRiceParam = initialGolombRiceParameter;
2018	Double d64BlockUncodedCost = 0;
2019	const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
2020	const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
2021	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
2022	assert(compID<MAX_NUM_COMPONENT);
2023
2024	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
2025	assert(scalingListType < SCALING_LIST_NUM);
2026
2027	#if ADAPTIVE_QP_SELECTION
2028	memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
2029	#endif
2030
2031	Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
2032	Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
2033	Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
2034	memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
2035	memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
2036	Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
2037	Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
2038	Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
2039	TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
2040	memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
2041	memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
2042	memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
2043	memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
2044
2045	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
2046	const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
2047	const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
2048
2049	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
2050	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
2051	const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
2052
2053	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
2054	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
2055
2056	#if ADAPTIVE_QP_SELECTION
2057	Int iQBitsC = iQBits - ARL_C_PRECISION;
2058	Int iAddC = 1 << (iQBitsC-1);
2059	#endif
2060
2061	TUEntropyCodingParameters codingParameters;
2062	getTUEntropyCodingParameters(codingParameters, rTu, compID);
2063	const UInt uiCGSize = (1 << MLS_CG_SIZE);
2064
2065	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2066	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2067	Int iCGLastScanPos = -1;
2068
2069	UInt uiCtxSet = 0;
2070	Int c1 = 1;
2071	Int c2 = 0;
2072	Double d64BaseCost = 0;
2073	Int iLastScanPos = -1;
2074
2075	UInt c1Idx = 0;
2076	UInt c2Idx = 0;
2077	Int baseLevel;
2078
2079	memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
2080	memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
2081
2082	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2083	Int iScanPos;
2084	coeffGroupRDStats rdStats;
2085
2086	const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
2087
2088	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2089	{
2090	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2091	UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
2092	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
2093
2094	memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2095
2096	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
2097
2098	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2099	{
2100	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2101	//===== quantization =====
2102	UInt uiBlkPos = codingParameters.scan[iScanPos];
2103	// set coeff
2104
2105	const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
2106	const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
2107
2108	const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
2109
2110	const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, MAX_INTERMEDIATE_INT - (Intermediate_Int(1) << (iQBits - 1)));
2111
2112	#if ADAPTIVE_QP_SELECTION
2113	if( m_bUseAdaptQpSelect )
2114	{
2115	piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
2116	}
2117	#endif
2118	const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
2119
2120	const Double dErr = Double( lLevelDouble );
2121	pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
2122	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2123	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2124
2125	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2126	{
2127	iLastScanPos = iScanPos;
2128	uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
2129	iCGLastScanPos = iCGScanPos;
2130	}
2131
2132	if ( iLastScanPos >= 0 )
2133	{
2134	//===== coefficient level estimation =====
2135	UInt uiLevel;
2136	UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
2137	UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
2138
2139	if( iScanPos == iLastScanPos )
2140	{
2141	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2142	lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2143	c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, channelType
2144	);
2145	}
2146	else
2147	{
2148	UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
2149
2150	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2151	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2152	c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, channelType
2153	);
2154
2155	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2156	}
2157
2158	deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
2159
2160	if( uiLevel > 0 )
2161	{
2162	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType );
2163	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2164	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2165	}
2166	else // uiLevel == 0
2167	{
2168	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2169	}
2170	piDstCoeff[ uiBlkPos ] = uiLevel;
2171	d64BaseCost += pdCostCoeff [ iScanPos ];
2172
2173	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2174	if( uiLevel >= baseLevel )
2175	{
2176	if (uiLevel > 3*(1<<uiGoRiceParam))
2177	{
2178	uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
2179	}
2180	}
2181	if ( uiLevel >= 1)
2182	{
2183	c1Idx ++;
2184	}
2185
2186	//===== update bin model =====
2187	if( uiLevel > 1 )
2188	{
2189	c1 = 0;
2190	c2 += (c2 < 2);
2191	c2Idx ++;
2192	}
2193	else if( (c1 < 3) && (c1 > 0) && uiLevel)
2194	{
2195	c1++;
2196	}
2197
2198	//===== context set update =====
2199	if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
2200	{
2201	uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this before entering the final group
2202	c1 = 1;
2203	c2 = 0;
2204	c1Idx = 0;
2205	c2Idx = 0;
2206	uiGoRiceParam = initialGolombRiceParameter;
2207	}
2208	}
2209	else
2210	{
2211	d64BaseCost += pdCostCoeff0[ iScanPos ];
2212	}
2213	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2214	if (iScanPosinCG == 0 )
2215	{
2216	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2217	}
2218	if (piDstCoeff[ uiBlkPos ] )
2219	{
2220	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2221	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2222	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2223	if ( iScanPosinCG != 0 )
2224	{
2225	rdStats.iNNZbeforePos0++;
2226	}
2227	}
2228	} //end for (iScanPosinCG)
2229
2230	if (iCGLastScanPos >= 0)
2231	{
2232	if( iCGScanPos )
2233	{
2234	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2235	{
2236	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2237	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2238	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2239	}
2240	else
2241	{
2242	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2243	{
2244	if ( rdStats.iNNZbeforePos0 == 0 )
2245	{
2246	d64BaseCost -= rdStats.d64SigCost_0;
2247	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2248	}
2249	// rd-cost if SigCoeffGroupFlag = 0, initialization
2250	Double d64CostZeroCG = d64BaseCost;
2251
2252	// add SigCoeffGroupFlag cost to total cost
2253	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2254
2255	if (iCGScanPos < iCGLastScanPos)
2256	{
2257	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2258	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2259	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2260	}
2261
2262	// try to convert the current coeff group from non-zero to all-zero
2263	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2264	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2265	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2266
2267	// if we can save cost, change this block to all-zero block
2268	if ( d64CostZeroCG < d64BaseCost )
2269	{
2270	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2271	d64BaseCost = d64CostZeroCG;
2272	if (iCGScanPos < iCGLastScanPos)
2273	{
2274	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2275	}
2276	// reset coeffs to 0 in this block
2277	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2278	{
2279	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2280	UInt uiBlkPos = codingParameters.scan[ iScanPos ];
2281
2282	if (piDstCoeff[ uiBlkPos ])
2283	{
2284	piDstCoeff [ uiBlkPos ] = 0;
2285	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2286	pdCostSig [ iScanPos ] = 0;
2287	}
2288	}
2289	} // end if ( d64CostAllZeros < d64BaseCost )
2290	}
2291	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2292	}
2293	else
2294	{
2295	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2296	}
2297	}
2298	} //end for (iCGScanPos)
2299
2300	//===== estimate last position =====
2301	if ( iLastScanPos < 0 )
2302	{
2303	return;
2304	}
2305
2306	Double d64BestCost = 0;
2307	Int ui16CtxCbf = 0;
2308	Int iBestLastIdxP1 = 0;
2309	if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2310	{
2311	ui16CtxCbf = 0;
2312	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2313	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2314	}
2315	else
2316	{
2317	ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
2318	ui16CtxCbf += getCBFContextOffset(compID);
2319	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2320	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2321	}
2322
2323
2324	Bool bFoundLast = false;
2325	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2326	{
2327	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2328
2329	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2330	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2331	{
2332	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2333	{
2334	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2335
2336	if (iScanPos > iLastScanPos) continue;
2337	UInt uiBlkPos = codingParameters.scan[iScanPos];
2338
2339	if( piDstCoeff[ uiBlkPos ] )
2340	{
2341	UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
2342	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
2343
2344	Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
2345	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2346
2347	if( totalCost < d64BestCost )
2348	{
2349	iBestLastIdxP1 = iScanPos + 1;
2350	d64BestCost = totalCost;
2351	}
2352	if( piDstCoeff[ uiBlkPos ] > 1 )
2353	{
2354	bFoundLast = true;
2355	break;
2356	}
2357	d64BaseCost -= pdCostCoeff[ iScanPos ];
2358	d64BaseCost += pdCostCoeff0[ iScanPos ];
2359	}
2360	else
2361	{
2362	d64BaseCost -= pdCostSig[ iScanPos ];
2363	}
2364	} //end for
2365	if (bFoundLast)
2366	{
2367	break;
2368	}
2369	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2370	} // end for
2371
2372
2373	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2374	{
2375	Int blkPos = codingParameters.scan[ scanPos ];
2376	TCoeff level = piDstCoeff[ blkPos ];
2377	uiAbsSum += level;
2378	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2379	}
2380
2381	//===== clean uncoded coefficients =====
2382	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2383	{
2384	piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
2385	}
2386
2387
2388	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2389	{
2390	const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
2391	Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
2392	/ m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(g_bitDepth[channelType] - 8)))
2393	+ 0.5);
2394
2395	Int lastCG = -1;
2396	Int absSum = 0 ;
2397	Int n ;
2398
2399	for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
2400	{
2401	Int subPos = subSet << MLS_CG_SIZE;
2402	Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
2403	absSum = 0 ;
2404
2405	for(n = uiCGSize-1; n >= 0; --n )
2406	{
2407	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2408	{
2409	lastNZPosInCG = n;
2410	break;
2411	}
2412	}
2413
2414	for(n = 0; n <uiCGSize; n++ )
2415	{
2416	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2417	{
2418	firstNZPosInCG = n;
2419	break;
2420	}
2421	}
2422
2423	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2424	{
2425	absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
2426	}
2427
2428	if(lastNZPosInCG>=0 && lastCG==-1)
2429	{
2430	lastCG = 1;
2431	}
2432
2433	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2434	{
2435	UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
2436	if( signbit!=(absSum&0x1) ) // hide but need tune
2437	{
2438	// calculate the cost
2439	Int64 minCostInc = MAX_INT64, curCost = MAX_INT64;
2440	Int minPos = -1, finalChange = 0, curChange = 0;
2441
2442	for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
2443	{
2444	UInt uiBlkPos = codingParameters.scan[ n + subPos ];
2445	if(piDstCoeff[ uiBlkPos ] != 0 )
2446	{
2447	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
2448	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2449	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
2450
2451	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2452	{
2453	costDown -= (4<<15);
2454	}
2455
2456	if(costUp<costDown)
2457	{
2458	curCost = costUp;
2459	curChange = 1;
2460	}
2461	else
2462	{
2463	curChange = -1;
2464	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2465	{
2466	curCost = MAX_INT64;
2467	}
2468	else
2469	{
2470	curCost = costDown;
2471	}
2472	}
2473	}
2474	else
2475	{
2476	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2477	curChange = 1 ;
2478
2479	if(n<firstNZPosInCG)
2480	{
2481	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2482	if(thissignbit != signbit )
2483	{
2484	curCost = MAX_INT64;
2485	}
2486	}
2487	}
2488
2489	if( curCost<minCostInc)
2490	{
2491	minCostInc = curCost;
2492	finalChange = curChange;
2493	minPos = uiBlkPos;
2494	}
2495	}
2496
2497	if(piDstCoeff[minPos] == entropyCodingMaximum \|\| piDstCoeff[minPos] == entropyCodingMinimum)
2498	{
2499	finalChange = -1;
2500	}
2501
2502	if(plSrcCoeff[minPos]>=0)
2503	{
2504	piDstCoeff[minPos] += finalChange ;
2505	}
2506	else
2507	{
2508	piDstCoeff[minPos] -= finalChange ;
2509	}
2510	}
2511	}
2512
2513	if(lastCG==1)
2514	{
2515	lastCG=0 ;
2516	}
2517	}
2518	}
2519	}
2520
2521
2522	/** Pattern decision for context derivation process of significant_coeff_flag
2523	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2524	* \param uiCGPosX column of current coefficient group
2525	* \param uiCGPosY row of current coefficient group
2526	* \param width width of the block
2527	* \param height height of the block
2528	* \returns pattern for current coefficient group
2529	*/
2530	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
2531	{
2532	if ((widthInGroups <= 1) && (heightInGroups <= 1)) return 0;
2533
2534	const Bool rightAvailable = uiCGPosX < (widthInGroups - 1);
2535	const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
2536
2537	UInt sigRight = 0;
2538	UInt sigLower = 0;
2539
2540	if (rightAvailable) sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2541	if (belowAvailable) sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2542
2543	return sigRight + (sigLower << 1);
2544	}
2545
2546
2547	/** Context derivation process of coeff_abs_significant_flag
2548	* \param patternSigCtx pattern for current coefficient group
2549	* \param codingParameters coding parmeters for the TU (includes the scan)
2550	* \param scanPosition current position in scan order
2551	* \param log2BlockWidth log2 width of the block
2552	* \param log2BlockHeight log2 height of the block
2553	* \param ChannelType channel type (CHANNEL_TYPE_LUMA/CHROMA)
2554	* \returns ctxInc for current scan position
2555	*/
2556	Int TComTrQuant::getSigCtxInc ( Int patternSigCtx,
2557	const TUEntropyCodingParameters &codingParameters,
2558	const Int scanPosition,
2559	const Int log2BlockWidth,
2560	const Int log2BlockHeight,
2561	const ChannelType chanType)
2562	{
2563	if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
2564	{
2565	//single context mode
2566	return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
2567	}
2568
2569	const UInt rasterPosition = codingParameters.scan[scanPosition];
2570	const UInt posY = rasterPosition >> log2BlockWidth;
2571	const UInt posX = rasterPosition - (posY << log2BlockWidth);
2572
2573	if ((posX + posY) == 0) return 0; //special case for the DC context variable
2574
2575	Int offset = MAX_INT;
2576
2577	if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
2578	{
2579	offset = ctxIndMap4x4[ (4 * posY) + posX ];
2580	}
2581	else
2582	{
2583	Int cnt = 0;
2584
2585	switch (patternSigCtx)
2586	{
2587	//------------------
2588
2589	case 0: //neither neighbouring group is significant
2590	{
2591	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2592	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2593	const Int posTotalInSubset = posXinSubset + posYinSubset;
2594
2595	//first N coefficients in scan order use 2; the next few use 1; the rest use 0.
2596	const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
2597	const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
2598
2599	cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
2600	}
2601	break;
2602
2603	//------------------
2604
2605	case 1: //right group is significant, below is not
2606	{
2607	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2608	const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT;
2609
2610	cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
2611	}
2612	break;
2613
2614	//------------------
2615
2616	case 2: //below group is significant, right is not
2617	{
2618	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2619	const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH;
2620
2621	cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
2622	}
2623	break;
2624
2625	//------------------
2626
2627	case 3: //both neighbouring groups are significant
2628	{
2629	cnt = 2;
2630	}
2631	break;
2632
2633	//------------------
2634
2635	default:
2636	std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
2637	exit(1);
2638	break;
2639	}
2640
2641	//------------------------------------------------
2642
2643	const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
2644
2645	offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
2646	}
2647
2648	return codingParameters.firstSignificanceMapContext + offset;
2649	}
2650
2651
2652	/** Get the best level in RD sense
2653	* \param rd64CodedCost reference to coded cost
2654	* \param rd64CodedCost0 reference to cost when coefficient is 0
2655	* \param rd64CodedCostSig reference to cost of significant coefficient
2656	* \param lLevelDouble reference to unscaled quantized level
2657	* \param uiMaxAbsLevel scaled quantized level
2658	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2659	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2660	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2661	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2662	* \param iQBits quantization step size
2663	* \param dTemp correction factor
2664	* \param bLast indicates if the coefficient is the last significant
2665	* \returns best quantized transform level for given scan position
2666	* This method calculates the best quantized transform level for a given scan position.
2667	*/
2668	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2669	Double& rd64CodedCost0,
2670	Double& rd64CodedCostSig,
2671	Intermediate_Int lLevelDouble,
2672	UInt uiMaxAbsLevel,
2673	UShort ui16CtxNumSig,
2674	UShort ui16CtxNumOne,
2675	UShort ui16CtxNumAbs,
2676	UShort ui16AbsGoRice,
2677	UInt c1Idx,
2678	UInt c2Idx,
2679	Int iQBits,
2680	Double errorScale,
2681	Bool bLast,
2682	Bool useLimitedPrefixLength,
2683	ChannelType channelType
2684	) const
2685	{
2686	Double dCurrCostSig = 0;
2687	UInt uiBestAbsLevel = 0;
2688
2689	if( !bLast && uiMaxAbsLevel < 3 )
2690	{
2691	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2692	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2693	if( uiMaxAbsLevel == 0 )
2694	{
2695	return uiBestAbsLevel;
2696	}
2697	}
2698	else
2699	{
2700	rd64CodedCost = MAX_DOUBLE;
2701	}
2702
2703	if( !bLast )
2704	{
2705	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2706	}
2707
2708	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2709	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2710	{
2711	Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
2712	Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, channelType ) );
2713	dCurrCost += dCurrCostSig;
2714
2715	if( dCurrCost < rd64CodedCost )
2716	{
2717	uiBestAbsLevel = uiAbsLevel;
2718	rd64CodedCost = dCurrCost;
2719	rd64CodedCostSig = dCurrCostSig;
2720	}
2721	}
2722
2723	return uiBestAbsLevel;
2724	}
2725
2726	/** Calculates the cost for specific absolute transform level
2727	* \param uiAbsLevel scaled quantized level
2728	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2729	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2730	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2731	* \returns cost of given absolute transform level
2732	*/
2733	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2734	UShort ui16CtxNumOne,
2735	UShort ui16CtxNumAbs,
2736	UShort ui16AbsGoRice,
2737	UInt c1Idx,
2738	UInt c2Idx,
2739	Bool useLimitedPrefixLength,
2740	ChannelType channelType
2741	) const
2742	{
2743	Int iRate = Int(xGetIEPRate()); // cost of sign bit
2744	UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2745
2746	if ( uiAbsLevel >= baseLevel )
2747	{
2748	UInt symbol = uiAbsLevel - baseLevel;
2749	UInt length;
2750	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2751	{
2752	length = symbol>>ui16AbsGoRice;
2753	iRate += (length+1+ui16AbsGoRice)<< 15;
2754	}
2755	else if (useLimitedPrefixLength)
2756	{
2757	const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + g_maxTrDynamicRange[channelType]));
2758
2759	UInt prefixLength = 0;
2760	UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
2761
2762	while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
2763	{
2764	prefixLength++;
2765	}
2766
2767	const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (g_maxTrDynamicRange[channelType] - ui16AbsGoRice) : (prefixLength + 1/separator/);
2768
2769	iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
2770	}
2771	else
2772	{
2773	length = ui16AbsGoRice;
2774	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2775	while (symbol >= (1<<length))
2776	{
2777	symbol -= (1<<(length++));
2778	}
2779	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2780	}
2781
2782	if (c1Idx < C1FLAG_NUMBER)
2783	{
2784	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2785
2786	if (c2Idx < C2FLAG_NUMBER)
2787	{
2788	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2789	}
2790	}
2791	}
2792	else if( uiAbsLevel == 1 )
2793	{
2794	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2795	}
2796	else if( uiAbsLevel == 2 )
2797	{
2798	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2799	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2800	}
2801	else
2802	{
2803	iRate = 0;
2804	}
2805
2806	return iRate;
2807	}
2808
2809	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2810	UShort ui16CtxNumSig ) const
2811	{
2812	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2813	}
2814
2815	/** Calculates the cost of signaling the last significant coefficient in the block
2816	* \param uiPosX X coordinate of the last significant coefficient
2817	* \param uiPosY Y coordinate of the last significant coefficient
2818	* \returns cost of last significant coefficient
2819	*/
2820	/*
2821	* \param uiWidth width of the transform unit (TU)
2822	*/
2823	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2824	const UInt uiPosY,
2825	const ComponentID component ) const
2826	{
2827	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2828	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2829
2830	Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
2831
2832	if( uiCtxX > 3 )
2833	{
2834	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2835	}
2836	if( uiCtxY > 3 )
2837	{
2838	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2839	}
2840	return xGetICost( uiCost );
2841	}
2842
2843	/** Calculates the cost for specific absolute transform level
2844	* \param uiAbsLevel scaled quantized level
2845	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2846	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2847	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2848	* \returns cost of given absolute transform level
2849	*/
2850	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2851	UShort ui16CtxNumSig ) const
2852	{
2853	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2854	}
2855
2856	/** Get the cost for a specific rate
2857	* \param dRate rate of a bit
2858	* \returns cost at the specific rate
2859	*/
2860	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2861	{
2862	return m_dLambda * dRate;
2863	}
2864
2865	/** Get the cost of an equal probable bit
2866	* \returns cost of equal probable bit
2867	*/
2868	__inline Double TComTrQuant::xGetIEPRate ( ) const
2869	{
2870	return 32768;
2871	}
2872
2873	/** Context derivation process of coeff_abs_significant_flag
2874	* \param uiSigCoeffGroupFlag significance map of L1
2875	* \param uiBlkX column of current scan position
2876	* \param uiBlkY row of current scan position
2877	* \param uiLog2BlkSize log2 value of block size
2878	* \returns ctxInc for current scan position
2879	*/
2880	UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag,
2881	const UInt uiCGPosX,
2882	const UInt uiCGPosY,
2883	const UInt widthInGroups,
2884	const UInt heightInGroups)
2885	{
2886	UInt sigRight = 0;
2887	UInt sigLower = 0;
2888
2889	if (uiCGPosX < (widthInGroups - 1)) sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2890	if (uiCGPosY < (heightInGroups - 1)) sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2891
2892	return ((sigRight + sigLower) != 0) ? 1 : 0;
2893	}
2894
2895
2896	/** set quantized matrix coefficient for encode
2897	* \param scalingList quantaized matrix address
2898	*/
2899	Void TComTrQuant::setScalingList(TComScalingList *scalingList, const ChromaFormat format)
2900	{
2901	const Int minimumQp = 0;
2902	const Int maximumQp = SCALING_LIST_REM_NUM;
2903
2904	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2905	{
2906	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2907	{
2908	for(Int qp = minimumQp; qp < maximumQp; qp++)
2909	{
2910	xSetScalingListEnc(scalingList,list,size,qp,format);
2911	xSetScalingListDec(scalingList,list,size,qp,format);
2912	setErrScaleCoeff(list,size,qp);
2913	}
2914	}
2915	}
2916	}
2917	/** set quantized matrix coefficient for decode
2918	* \param scalingList quantaized matrix address
2919	*/
2920	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList, const ChromaFormat format)
2921	{
2922	const Int minimumQp = 0;
2923	const Int maximumQp = SCALING_LIST_REM_NUM;
2924
2925	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2926	{
2927	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2928	{
2929	for(Int qp = minimumQp; qp < maximumQp; qp++)
2930	{
2931	xSetScalingListDec(scalingList,list,size,qp,format);
2932	}
2933	}
2934	}
2935	}
2936	/** set error scale coefficients
2937	* \param list List ID
2938	* \param uiSize Size
2939	* \param uiQP Quantization parameter
2940	*/
2941	Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp)
2942	{
2943	const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2944	const ChannelType channelType = ((list == 0) \|\| (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
2945
2946	const Int iTransformShift = getTransformShift(channelType, uiLog2TrSize); // Represents scaling through forward transform
2947
2948	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2949	Int *piQuantcoeff;
2950	Double *pdErrScale;
2951	piQuantcoeff = getQuantCoeff(list, qp,size);
2952	pdErrScale = getErrScaleCoeff(list, size, qp);
2953
2954	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2955	dErrScale = dErrScalepow(2.0,(-2.0iTransformShift)); // Compensate for scaling through forward transform
2956
2957	for(i=0;i<uiMaxNumCoeff;i++)
2958	{
2959	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2960	}
2961
2962	getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2963	}
2964
2965	/** set quantized matrix coefficient for encode
2966	* \param scalingList quantaized matrix address
2967	* \param listId List index
2968	* \param sizeId size index
2969	* \param uiQP Quantization parameter
2970	*/
2971	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2972	{
2973	UInt width = g_scalingListSizeX[sizeId];
2974	UInt height = g_scalingListSizeX[sizeId];
2975	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2976	Int *quantcoeff;
2977	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2978	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2979
2980	Int quantScales = g_quantScales[qp];
2981
2982	processScalingListEnc(coeff,
2983	quantcoeff,
2984	(quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
2985	height, width, ratio,
2986	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
2987	scalingList->getScalingListDC(sizeId,listId));
2988	}
2989
2990	/** set quantized matrix coefficient for decode
2991	* \param scalingList quantaized matrix address
2992	* \param list List index
2993	* \param size size index
2994	* \param uiQP Quantization parameter
2995	*/
2996	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2997	{
2998	UInt width = g_scalingListSizeX[sizeId];
2999	UInt height = g_scalingListSizeX[sizeId];
3000	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3001	Int *dequantcoeff;
3002	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
3003
3004	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
3005
3006	Int invQuantScale = g_invQuantScales[qp];
3007
3008	processScalingListDec(coeff,
3009	dequantcoeff,
3010	invQuantScale,
3011	height, width, ratio,
3012	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3013	scalingList->getScalingListDC(sizeId,listId));
3014	}
3015
3016	/** set flat matrix value to quantized coefficient
3017	*/
3018	Void TComTrQuant::setFlatScalingList(const ChromaFormat format)
3019	{
3020	const Int minimumQp = 0;
3021	const Int maximumQp = SCALING_LIST_REM_NUM;
3022
3023	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3024	{
3025	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3026	{
3027	for(Int qp = minimumQp; qp < maximumQp; qp++)
3028	{
3029	xsetFlatScalingList(list,size,qp,format);
3030	setErrScaleCoeff(list,size,qp);
3031	}
3032	}
3033	}
3034	}
3035
3036	/** set flat matrix value to quantized coefficient
3037	* \param list List ID
3038	* \param uiQP Quantization parameter
3039	* \param uiSize Size
3040	*/
3041	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp, const ChromaFormat format)
3042	{
3043	UInt i,num = g_scalingListSize[size];
3044	Int *quantcoeff;
3045	Int *dequantcoeff;
3046
3047	Int quantScales = g_quantScales [qp];
3048	Int invQuantScales = g_invQuantScales[qp] << 4;
3049
3050	quantcoeff = getQuantCoeff(list, qp, size);
3051	dequantcoeff = getDequantCoeff(list, qp, size);
3052
3053	for(i=0;i<num;i++)
3054	{
3055	*quantcoeff++ = quantScales;
3056	*dequantcoeff++ = invQuantScales;
3057	}
3058	}
3059
3060	/** set quantized matrix coefficient for encode
3061	* \param coeff quantaized matrix address
3062	* \param quantcoeff quantaized matrix address
3063	* \param quantScales Q(QP%6)
3064	* \param height height
3065	* \param width width
3066	* \param ratio ratio for upscale
3067	* \param sizuNum matrix size
3068	* \param dc dc parameter
3069	*/
3070	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3071	{
3072	for(UInt j=0;j<height;j++)
3073	{
3074	for(UInt i=0;i<width;i++)
3075	{
3076	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j / ratio) + i / ratio];
3077	}
3078	}
3079
3080	if(ratio > 1)
3081	{
3082	quantcoeff[0] = quantScales / dc;
3083	}
3084	}
3085
3086	/** set quantized matrix coefficient for decode
3087	* \param coeff quantaized matrix address
3088	* \param dequantcoeff quantaized matrix address
3089	* \param invQuantScales IQ(QP%6))
3090	* \param height height
3091	* \param width width
3092	* \param ratio ratio for upscale
3093	* \param sizuNum matrix size
3094	* \param dc dc parameter
3095	*/
3096	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3097	{
3098	for(UInt j=0;j<height;j++)
3099	{
3100	for(UInt i=0;i<width;i++)
3101	{
3102	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
3103	}
3104	}
3105
3106	if(ratio > 1)
3107	{
3108	dequantcoeff[0] = invQuantScales * dc;
3109	}
3110	}
3111
3112	/** initialization process of scaling list array
3113	*/
3114	Void TComTrQuant::initScalingList()
3115	{
3116	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3117	{
3118	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3119	{
3120	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3121	{
3122	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3123	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3124	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
3125	} // listID loop
3126	}
3127	}
3128	}
3129
3130	/** destroy quantization matrix array
3131	*/
3132	Void TComTrQuant::destroyScalingList()
3133	{
3134	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3135	{
3136	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3137	{
3138	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3139	{
3140	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
3141	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
3142	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
3143	}
3144	}
3145	}
3146	}
3147
3148	Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const Pel resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
3149	{
3150	TComDataCU *pcCU = rTu.getCU();
3151	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3152	const TComRectangle &rect = rTu.getRect(compID);
3153	const UInt uiWidth = rect.width;
3154	const UInt uiHeight = rect.height;
3155	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3156	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3157	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3158	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
3159
3160	assert( scalingListType < SCALING_LIST_NUM );
3161	const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
3162
3163
3164	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
3165	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
3166	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
3167	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
3168	*/
3169
3170	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
3171	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
3172
3173	const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
3174
3175	TCoeff transformedCoefficient;
3176
3177	// transform-skip
3178	if (iTransformShift >= 0)
3179	{
3180	transformedCoefficient = resiDiff << iTransformShift;
3181	}
3182	else // for very high bit depths
3183	{
3184	const Int iTrShiftNeg = -iTransformShift;
3185	const Int offset = 1 << (iTrShiftNeg - 1);
3186	transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
3187	}
3188
3189	// quantization
3190	const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
3191
3192	const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
3193
3194	const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
3195
3196	const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
3197
3198	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3199	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3200	pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
3201	}
3202
3203
3204	Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
3205	{
3206	TComDataCU *pcCU = rTu.getCU();
3207	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3208	const TComRectangle &rect = rTu.getRect(compID);
3209	const UInt uiWidth = rect.width;
3210	const UInt uiHeight = rect.height;
3211	const Int QP_per = cQP.per;
3212	const Int QP_rem = cQP.rem;
3213	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3214	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3215	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3216	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
3217
3218	assert( scalingListType < SCALING_LIST_NUM );
3219
3220	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
3221
3222	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3223	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3224
3225	// Dequantisation
3226
3227	TCoeff dequantisedSample;
3228
3229	if(enableScalingLists)
3230	{
3231	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
3232	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
3233
3234	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3235	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3236
3237	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
3238
3239	if(rightShift > 0)
3240	{
3241	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3242	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3243	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
3244
3245	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3246	}
3247	else
3248	{
3249	const Int leftShift = -rightShift;
3250	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3251	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
3252
3253	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3254	}
3255	}
3256	else
3257	{
3258	const Int scale = g_invQuantScales[QP_rem];
3259	const Int scaleBits = (IQUANT_SHIFT + 1) ;
3260
3261	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
3262	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3263	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3264
3265	if (rightShift > 0)
3266	{
3267	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3268	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3269	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
3270
3271	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3272	}
3273	else
3274	{
3275	const Int leftShift = -rightShift;
3276	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3277	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
3278
3279	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3280	}
3281	}
3282
3283	// Inverse transform-skip
3284
3285	if (iTransformShift >= 0)
3286	{
3287	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
3288	reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift);
3289	}
3290	else //for very high bit depths
3291	{
3292	const Int iTrShiftNeg = -iTransformShift;
3293	reconSample = Pel(dequantisedSample << iTrShiftNeg);
3294	}
3295	}
3296
3297
3298	Void TComTrQuant::crossComponentPrediction( TComTU & rTu,
3299	const ComponentID compID,
3300	const Pel * piResiL,
3301	const Pel * piResiC,
3302	Pel * piResiT,
3303	const Int width,
3304	const Int height,
3305	const Int strideL,
3306	const Int strideC,
3307	const Int strideT,
3308	const Bool reverse )
3309	{
3310	const Pel *pResiL = piResiL;
3311	const Pel *pResiC = piResiC;
3312	Pel *pResiT = piResiT;
3313
3314	TComDataCU *pCU = rTu.getCU();
3315	const Char alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
3316	const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
3317
3318	for( Int y = 0; y < height; y++ )
3319	{
3320	if (reverse)
3321	{
3322	for( Int x = 0; x < width; x++ )
3323	{
3324	pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
3325	}
3326	}
3327	else
3328	{
3329	for( Int x = 0; x < width; x++ )
3330	{
3331	pResiT[x] = pResiC[x] - (( alpha * rightShift(pResiL[x], diffBitDepth) ) >> 3);
3332	}
3333	}
3334
3335	pResiL += strideL;
3336	pResiC += strideC;
3337	pResiT += strideT;
3338	}
3339	}
3340
3341	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: