Context navigation

source: SHVCSoftware/branches/SHM-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 1263

Visit:

Last change on this file since 1263 was 1260, checked in by seregin, 9 years ago
port rev 4257
Property svn:eol-style set to `native`
File size: 124.6 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2015, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <limits>
41	#include <memory.h>
42	#include "TComTrQuant.h"
43	#include "TComPic.h"
44	#include "ContextTables.h"
45	#include "TComTU.h"
46	#include "Debug.h"
47
48	typedef struct
49	{
50	Int iNNZbeforePos0;
51	Double d64CodedLevelandDist; // distortion and level cost only
52	Double d64UncodedDist; // all zero coded block distortion
53	Double d64SigCost;
54	Double d64SigCost_0;
55	} coeffGroupRDStats;
56
57	//! \ingroup TLibCommon
58	//! \{
59
60	// ====================================================================================================================
61	// Constants
62	// ====================================================================================================================
63
64	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
65
66
67	// ====================================================================================================================
68	// QpParam constructor
69	// ====================================================================================================================
70
71	QpParam::QpParam(const Int qpy,
72	const ChannelType chType,
73	const Int qpBdOffset,
74	const Int chromaQPOffset,
75	const ChromaFormat chFmt )
76	{
77	Int baseQp;
78
79	if(isLuma(chType))
80	{
81	baseQp = qpy + qpBdOffset;
82	}
83	else
84	{
85	baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
86
87	if(baseQp < 0)
88	{
89	baseQp = baseQp + qpBdOffset;
90	}
91	else
92	{
93	baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
94	}
95	}
96
97	Qp =baseQp;
98	per=baseQp/6;
99	rem=baseQp%6;
100	}
101
102	QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
103	{
104	Int chromaQpOffset = 0;
105
106	if (isChroma(compID))
107	{
108	chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
109	chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
110
111	chromaQpOffset += cu.getSlice()->getPPS()->getChromaQpAdjTableAt(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
112	}
113
114	#if SVC_EXTENSION
115	TComSlice* slice = const_cast<TComSlice*> (cu.getSlice());
116	#endif
117
118	*this = QpParam(cu.getQP( 0 ),
119	toChannelType(compID),
120	#if SVC_EXTENSION
121	isLuma(compID) ? slice->getQpBDOffsetY() : slice->getQpBDOffsetC(),
122	#else
123	cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
124	#endif
125	chromaQpOffset,
126	cu.getPic()->getChromaFormat());
127	}
128
129
130	// ====================================================================================================================
131	// TComTrQuant class member functions
132	// ====================================================================================================================
133
134	TComTrQuant::TComTrQuant()
135	{
136	// allocate temporary buffers
137	m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
138
139	// allocate bit estimation class (for RDOQ)
140	m_pcEstBitsSbac = new estBitsSbacStruct;
141	initScalingList();
142	}
143
144	TComTrQuant::~TComTrQuant()
145	{
146	// delete temporary buffers
147	if ( m_plTempCoeff )
148	{
149	delete [] m_plTempCoeff;
150	m_plTempCoeff = NULL;
151	}
152
153	// delete bit estimation class
154	if ( m_pcEstBitsSbac )
155	{
156	delete m_pcEstBitsSbac;
157	}
158	destroyScalingList();
159	}
160
161	#if ADAPTIVE_QP_SELECTION
162	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
163	{
164	// NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
165
166	Int qpBase = pcSlice->getSliceQpBase();
167	Int sliceQpused = pcSlice->getSliceQp();
168	Int sliceQpnext;
169	Double alpha = qpBase < 17 ? 0.5 : 1;
170
171	Int cnt=0;
172	for(Int u=1; u<=LEVEL_RANGE; u++)
173	{
174	cnt += m_sliceNsamples[u] ;
175	}
176
177	if( !m_useRDOQ )
178	{
179	sliceQpused = qpBase;
180	alpha = 0.5;
181	}
182
183	if( cnt > 120 )
184	{
185	Double sum = 0;
186	Int k = 0;
187	for(Int u=1; u<LEVEL_RANGE; u++)
188	{
189	sum += u*m_sliceSumC[u];
190	k += uum_sliceNsamples[u];
191	}
192
193	Int v;
194	Double q[MAX_QP+1] ;
195	for(v=0; v<=MAX_QP; v++)
196	{
197	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
198	}
199
200	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
201
202	for(v=0; v<MAX_QP; v++)
203	{
204	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
205	{
206	break;
207	}
208	}
209	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
210	}
211	else
212	{
213	sliceQpnext = sliceQpused;
214	}
215
216	m_qpDelta[qpBase] = sliceQpnext - qpBase;
217	}
218
219	Void TComTrQuant::initSliceQpDelta()
220	{
221	for(Int qp=0; qp<=MAX_QP; qp++)
222	{
223	m_qpDelta[qp] = qp < 17 ? 0 : 1;
224	}
225	}
226
227	Void TComTrQuant::clearSliceARLCnt()
228	{
229	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
230	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
231	}
232	#endif
233
234
235
236	#if MATRIX_MULT
237	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
238	* \param block pointer to input data (residual)
239	* \param coeff pointer to output data (transform coefficients)
240	* \param uiStride stride of input data
241	* \param uiTrSize transform size (uiTrSize x uiTrSize)
242	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
243	*/
244	Void xTr(Int bitDepth, Pel block, TCoeff coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
245	{
246	UInt i,j,k;
247	TCoeff iSum;
248	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
249	const TMatrixCoeff *iT;
250	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
251
252	if (uiTrSize==4)
253	{
254	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
255	}
256	else if (uiTrSize==8)
257	{
258	iT = g_aiT8[TRANSFORM_FORWARD][0];
259	}
260	else if (uiTrSize==16)
261	{
262	iT = g_aiT16[TRANSFORM_FORWARD][0];
263	}
264	else if (uiTrSize==32)
265	{
266	iT = g_aiT32[TRANSFORM_FORWARD][0];
267	}
268	else
269	{
270	assert(0);
271	}
272
273	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
274
275	const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
276	const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
277	const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
278	const Int add_2nd = 1<<(shift_2nd-1);
279
280	/* Horizontal transform */
281
282	for (i=0; i<uiTrSize; i++)
283	{
284	for (j=0; j<uiTrSize; j++)
285	{
286	iSum = 0;
287	for (k=0; k<uiTrSize; k++)
288	{
289	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
290	}
291	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
292	}
293	}
294
295	/* Vertical transform */
296	for (i=0; i<uiTrSize; i++)
297	{
298	for (j=0; j<uiTrSize; j++)
299	{
300	iSum = 0;
301	for (k=0; k<uiTrSize; k++)
302	{
303	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
304	}
305	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
306	}
307	}
308	}
309
310	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
311	* \param coeff pointer to input data (transform coefficients)
312	* \param block pointer to output data (residual)
313	* \param uiStride stride of output data
314	* \param uiTrSize transform size (uiTrSize x uiTrSize)
315	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
316	*/
317	Void xITr(Int bitDepth, TCoeff coeff, Pel block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
318	{
319	UInt i,j,k;
320	TCoeff iSum;
321	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
322	const TMatrixCoeff *iT;
323
324	if (uiTrSize==4)
325	{
326	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
327	}
328	else if (uiTrSize==8)
329	{
330	iT = g_aiT8[TRANSFORM_INVERSE][0];
331	}
332	else if (uiTrSize==16)
333	{
334	iT = g_aiT16[TRANSFORM_INVERSE][0];
335	}
336	else if (uiTrSize==32)
337	{
338	iT = g_aiT32[TRANSFORM_INVERSE][0];
339	}
340	else
341	{
342	assert(0);
343	}
344
345	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
346
347	const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
348	const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
349	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
350	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
351	assert(shift_2nd>=0);
352	const Int add_1st = 1<<(shift_1st-1);
353	const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
354
355	/* Horizontal transform */
356	for (i=0; i<uiTrSize; i++)
357	{
358	for (j=0; j<uiTrSize; j++)
359	{
360	iSum = 0;
361	for (k=0; k<uiTrSize; k++)
362	{
363	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
364	}
365
366	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
367	tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
368	}
369	}
370
371	/* Vertical transform */
372	for (i=0; i<uiTrSize; i++)
373	{
374	for (j=0; j<uiTrSize; j++)
375	{
376	iSum = 0;
377	for (k=0; k<uiTrSize; k++)
378	{
379	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
380	}
381
382	block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
383	}
384	}
385	}
386
387	#endif //MATRIX_MULT
388
389
390	/** 4x4 forward transform implemented using partial butterfly structure (1D)
391	* \param src input data (residual)
392	* \param dst output data (transform coefficients)
393	* \param shift specifies right shift after 1D transform
394	* \param line
395	*/
396	Void partialButterfly4(TCoeff src, TCoeff dst, Int shift, Int line)
397	{
398	Int j;
399	TCoeff E[2],O[2];
400	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
401
402	for (j=0; j<line; j++)
403	{
404	/* E and O */
405	E[0] = src[0] + src[3];
406	O[0] = src[0] - src[3];
407	E[1] = src[1] + src[2];
408	O[1] = src[1] - src[2];
409
410	dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]E[1] + add)>>shift;
411	dst[2line] = (g_aiT4[TRANSFORM_FORWARD][2][0]E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
412	dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]O[1] + add)>>shift;
413	dst[3line] = (g_aiT4[TRANSFORM_FORWARD][3][0]O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
414
415	src += 4;
416	dst ++;
417	}
418	}
419
420	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
421	// give identical results
422	Void fastForwardDst(TCoeff block, TCoeff coeff, Int shift) // input block, output coeff
423	{
424	Int i;
425	TCoeff c[4];
426	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
427	for (i=0; i<4; i++)
428	{
429	// Intermediate Variables
430	c[0] = block[4*i+0];
431	c[1] = block[4*i+1];
432	c[2] = block[4*i+2];
433	c[3] = block[4*i+3];
434
435	for (Int row = 0; row < 4; row++)
436	{
437	TCoeff result = 0;
438	for (Int column = 0; column < 4; column++)
439	{
440	result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
441	}
442
443	coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
444	}
445	}
446	}
447
448	Void fastInverseDst(TCoeff tmp, TCoeff block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block
449	{
450	Int i;
451	TCoeff c[4];
452	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
453	for (i=0; i<4; i++)
454	{
455	// Intermediate Variables
456	c[0] = tmp[ i];
457	c[1] = tmp[4 +i];
458	c[2] = tmp[8 +i];
459	c[3] = tmp[12+i];
460
461	for (Int column = 0; column < 4; column++)
462	{
463	TCoeff &result = block[(i * 4) + column];
464
465	result = 0;
466	for (Int row = 0; row < 4; row++)
467	{
468	result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
469	}
470
471	result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
472	}
473	}
474	}
475
476	/** 4x4 inverse transform implemented using partial butterfly structure (1D)
477	* \param src input data (transform coefficients)
478	* \param dst output data (residual)
479	* \param shift specifies right shift after 1D transform
480	* \param line
481	* \param outputMinimum minimum for clipping
482	* \param outputMaximum maximum for clipping
483	*/
484	Void partialButterflyInverse4(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
485	{
486	Int j;
487	TCoeff E[2],O[2];
488	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
489
490	for (j=0; j<line; j++)
491	{
492	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
493	O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]src[3*line];
494	O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]src[3*line];
495	E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]src[2*line];
496	E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]src[2*line];
497
498	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
499	dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
500	dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
501	dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
502	dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
503
504	src ++;
505	dst += 4;
506	}
507	}
508
509	/** 8x8 forward transform implemented using partial butterfly structure (1D)
510	* \param src input data (residual)
511	* \param dst output data (transform coefficients)
512	* \param shift specifies right shift after 1D transform
513	* \param line
514	*/
515	Void partialButterfly8(TCoeff src, TCoeff dst, Int shift, Int line)
516	{
517	Int j,k;
518	TCoeff E[4],O[4];
519	TCoeff EE[2],EO[2];
520	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
521
522	for (j=0; j<line; j++)
523	{
524	/* E and O*/
525	for (k=0;k<4;k++)
526	{
527	E[k] = src[k] + src[7-k];
528	O[k] = src[k] - src[7-k];
529	}
530	/* EE and EO */
531	EE[0] = E[0] + E[3];
532	EO[0] = E[0] - E[3];
533	EE[1] = E[1] + E[2];
534	EO[1] = E[1] - E[2];
535
536	dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]EE[1] + add)>>shift;
537	dst[4line] = (g_aiT8[TRANSFORM_FORWARD][4][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
538	dst[2line] = (g_aiT8[TRANSFORM_FORWARD][2][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
539	dst[6line] = (g_aiT8[TRANSFORM_FORWARD][6][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
540
541	dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]O[3] + add)>>shift;
542	dst[3line] = (g_aiT8[TRANSFORM_FORWARD][3][0]O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
543	dst[5line] = (g_aiT8[TRANSFORM_FORWARD][5][0]O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
544	dst[7line] = (g_aiT8[TRANSFORM_FORWARD][7][0]O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
545
546	src += 8;
547	dst ++;
548	}
549	}
550
551	/** 8x8 inverse transform implemented using partial butterfly structure (1D)
552	* \param src input data (transform coefficients)
553	* \param dst output data (residual)
554	* \param shift specifies right shift after 1D transform
555	* \param line
556	* \param outputMinimum minimum for clipping
557	* \param outputMaximum maximum for clipping
558	*/
559	Void partialButterflyInverse8(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
560	{
561	Int j,k;
562	TCoeff E[4],O[4];
563	TCoeff EE[2],EO[2];
564	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
565
566	for (j=0; j<line; j++)
567	{
568	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
569	for (k=0;k<4;k++)
570	{
571	O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]src[3*line] +
572	g_aiT8[TRANSFORM_INVERSE][ 5][k]src[5line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]src[7line];
573	}
574
575	EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][0]src[ 6line ];
576	EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][1]src[ 6line ];
577	EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]src[ 4*line ];
578	EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]src[ 4*line ];
579
580	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
581	E[0] = EE[0] + EO[0];
582	E[3] = EE[0] - EO[0];
583	E[1] = EE[1] + EO[1];
584	E[2] = EE[1] - EO[1];
585	for (k=0;k<4;k++)
586	{
587	dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
588	dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
589	}
590	src ++;
591	dst += 8;
592	}
593	}
594
595	/** 16x16 forward transform implemented using partial butterfly structure (1D)
596	* \param src input data (residual)
597	* \param dst output data (transform coefficients)
598	* \param shift specifies right shift after 1D transform
599	* \param line
600	*/
601	Void partialButterfly16(TCoeff src, TCoeff dst, Int shift, Int line)
602	{
603	Int j,k;
604	TCoeff E[8],O[8];
605	TCoeff EE[4],EO[4];
606	TCoeff EEE[2],EEO[2];
607	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
608
609	for (j=0; j<line; j++)
610	{
611	/* E and O*/
612	for (k=0;k<8;k++)
613	{
614	E[k] = src[k] + src[15-k];
615	O[k] = src[k] - src[15-k];
616	}
617	/* EE and EO */
618	for (k=0;k<4;k++)
619	{
620	EE[k] = E[k] + E[7-k];
621	EO[k] = E[k] - E[7-k];
622	}
623	/* EEE and EEO */
624	EEE[0] = EE[0] + EE[3];
625	EEO[0] = EE[0] - EE[3];
626	EEE[1] = EE[1] + EE[2];
627	EEO[1] = EE[1] - EE[2];
628
629	dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]EEE[1] + add)>>shift;
630	dst[ 8line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
631	dst[ 4line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
632	dst[ 12line] = (g_aiT16[TRANSFORM_FORWARD][12][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
633
634	for (k=2;k<16;k+=4)
635	{
636	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
637	g_aiT16[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]EO[3] + add)>>shift;
638	}
639
640	for (k=1;k<16;k+=2)
641	{
642	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
643	g_aiT16[TRANSFORM_FORWARD][k][2]O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]O[3] +
644	g_aiT16[TRANSFORM_FORWARD][k][4]O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]O[5] +
645	g_aiT16[TRANSFORM_FORWARD][k][6]O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]O[7] + add)>>shift;
646	}
647
648	src += 16;
649	dst ++;
650
651	}
652	}
653
654	/** 16x16 inverse transform implemented using partial butterfly structure (1D)
655	* \param src input data (transform coefficients)
656	* \param dst output data (residual)
657	* \param shift specifies right shift after 1D transform
658	* \param line
659	* \param outputMinimum minimum for clipping
660	* \param outputMaximum maximum for clipping
661	*/
662	Void partialButterflyInverse16(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
663	{
664	Int j,k;
665	TCoeff E[8],O[8];
666	TCoeff EE[4],EO[4];
667	TCoeff EEE[2],EEO[2];
668	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
669
670	for (j=0; j<line; j++)
671	{
672	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
673	for (k=0;k<8;k++)
674	{
675	O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]src[ 3*line] +
676	g_aiT16[TRANSFORM_INVERSE][ 5][k]src[ 5line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]src[ 7line] +
677	g_aiT16[TRANSFORM_INVERSE][ 9][k]src[ 9line] + g_aiT16[TRANSFORM_INVERSE][11][k]src[11line] +
678	g_aiT16[TRANSFORM_INVERSE][13][k]src[13line] + g_aiT16[TRANSFORM_INVERSE][15][k]src[15line];
679	}
680	for (k=0;k<4;k++)
681	{
682	EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]src[ 2line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]src[ 6line] +
683	g_aiT16[TRANSFORM_INVERSE][10][k]src[10line] + g_aiT16[TRANSFORM_INVERSE][14][k]src[14line];
684	}
685	EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][0]src[ 12line ];
686	EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]src[ 8*line ];
687	EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][1]src[ 12line ];
688	EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]src[ 8*line ];
689
690	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
691	for (k=0;k<2;k++)
692	{
693	EE[k] = EEE[k] + EEO[k];
694	EE[k+2] = EEE[1-k] - EEO[1-k];
695	}
696	for (k=0;k<4;k++)
697	{
698	E[k] = EE[k] + EO[k];
699	E[k+4] = EE[3-k] - EO[3-k];
700	}
701	for (k=0;k<8;k++)
702	{
703	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
704	dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
705	}
706	src ++;
707	dst += 16;
708	}
709	}
710
711	/** 32x32 forward transform implemented using partial butterfly structure (1D)
712	* \param src input data (residual)
713	* \param dst output data (transform coefficients)
714	* \param shift specifies right shift after 1D transform
715	* \param line
716	*/
717	Void partialButterfly32(TCoeff src, TCoeff dst, Int shift, Int line)
718	{
719	Int j,k;
720	TCoeff E[16],O[16];
721	TCoeff EE[8],EO[8];
722	TCoeff EEE[4],EEO[4];
723	TCoeff EEEE[2],EEEO[2];
724	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
725
726	for (j=0; j<line; j++)
727	{
728	/* E and O*/
729	for (k=0;k<16;k++)
730	{
731	E[k] = src[k] + src[31-k];
732	O[k] = src[k] - src[31-k];
733	}
734	/* EE and EO */
735	for (k=0;k<8;k++)
736	{
737	EE[k] = E[k] + E[15-k];
738	EO[k] = E[k] - E[15-k];
739	}
740	/* EEE and EEO */
741	for (k=0;k<4;k++)
742	{
743	EEE[k] = EE[k] + EE[7-k];
744	EEO[k] = EE[k] - EE[7-k];
745	}
746	/* EEEE and EEEO */
747	EEEE[0] = EEE[0] + EEE[3];
748	EEEO[0] = EEE[0] - EEE[3];
749	EEEE[1] = EEE[1] + EEE[2];
750	EEEO[1] = EEE[1] - EEE[2];
751
752	dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]EEEE[1] + add)>>shift;
753	dst[ 16line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
754	dst[ 8line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
755	dst[ 24line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
756	for (k=4;k<32;k+=8)
757	{
758	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
759	g_aiT32[TRANSFORM_FORWARD][k][2]EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EEO[3] + add)>>shift;
760	}
761	for (k=2;k<32;k+=4)
762	{
763	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
764	g_aiT32[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EO[3] +
765	g_aiT32[TRANSFORM_FORWARD][k][4]EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]EO[5] +
766	g_aiT32[TRANSFORM_FORWARD][k][6]EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]EO[7] + add)>>shift;
767	}
768	for (k=1;k<32;k+=2)
769	{
770	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
771	g_aiT32[TRANSFORM_FORWARD][k][ 2]O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]O[ 3] +
772	g_aiT32[TRANSFORM_FORWARD][k][ 4]O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]O[ 5] +
773	g_aiT32[TRANSFORM_FORWARD][k][ 6]O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]O[ 7] +
774	g_aiT32[TRANSFORM_FORWARD][k][ 8]O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]O[ 9] +
775	g_aiT32[TRANSFORM_FORWARD][k][10]O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]O[11] +
776	g_aiT32[TRANSFORM_FORWARD][k][12]O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]O[13] +
777	g_aiT32[TRANSFORM_FORWARD][k][14]O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]O[15] + add)>>shift;
778	}
779
780	src += 32;
781	dst ++;
782	}
783	}
784
785	/** 32x32 inverse transform implemented using partial butterfly structure (1D)
786	* \param src input data (transform coefficients)
787	* \param dst output data (residual)
788	* \param shift specifies right shift after 1D transform
789	* \param line
790	* \param outputMinimum minimum for clipping
791	* \param outputMaximum maximum for clipping
792	*/
793	Void partialButterflyInverse32(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
794	{
795	Int j,k;
796	TCoeff E[16],O[16];
797	TCoeff EE[8],EO[8];
798	TCoeff EEE[4],EEO[4];
799	TCoeff EEEE[2],EEEO[2];
800	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
801
802	for (j=0; j<line; j++)
803	{
804	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
805	for (k=0;k<16;k++)
806	{
807	O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]src[ 3*line ] +
808	g_aiT32[TRANSFORM_INVERSE][ 5][k]src[ 5line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]src[ 7line ] +
809	g_aiT32[TRANSFORM_INVERSE][ 9][k]src[ 9line ] + g_aiT32[TRANSFORM_INVERSE][11][k]src[ 11line ] +
810	g_aiT32[TRANSFORM_INVERSE][13][k]src[ 13line ] + g_aiT32[TRANSFORM_INVERSE][15][k]src[ 15line ] +
811	g_aiT32[TRANSFORM_INVERSE][17][k]src[ 17line ] + g_aiT32[TRANSFORM_INVERSE][19][k]src[ 19line ] +
812	g_aiT32[TRANSFORM_INVERSE][21][k]src[ 21line ] + g_aiT32[TRANSFORM_INVERSE][23][k]src[ 23line ] +
813	g_aiT32[TRANSFORM_INVERSE][25][k]src[ 25line ] + g_aiT32[TRANSFORM_INVERSE][27][k]src[ 27line ] +
814	g_aiT32[TRANSFORM_INVERSE][29][k]src[ 29line ] + g_aiT32[TRANSFORM_INVERSE][31][k]src[ 31line ];
815	}
816	for (k=0;k<8;k++)
817	{
818	EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]src[ 2line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]src[ 6line ] +
819	g_aiT32[TRANSFORM_INVERSE][10][k]src[ 10line ] + g_aiT32[TRANSFORM_INVERSE][14][k]src[ 14line ] +
820	g_aiT32[TRANSFORM_INVERSE][18][k]src[ 18line ] + g_aiT32[TRANSFORM_INVERSE][22][k]src[ 22line ] +
821	g_aiT32[TRANSFORM_INVERSE][26][k]src[ 26line ] + g_aiT32[TRANSFORM_INVERSE][30][k]src[ 30line ];
822	}
823	for (k=0;k<4;k++)
824	{
825	EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]src[ 4line ] + g_aiT32[TRANSFORM_INVERSE][12][k]src[ 12line ] +
826	g_aiT32[TRANSFORM_INVERSE][20][k]src[ 20line ] + g_aiT32[TRANSFORM_INVERSE][28][k]src[ 28line ];
827	}
828	EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][0]src[ 24line ];
829	EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][1]src[ 24line ];
830	EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]src[ 16*line ];
831	EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]src[ 16*line ];
832
833	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
834	EEE[0] = EEEE[0] + EEEO[0];
835	EEE[3] = EEEE[0] - EEEO[0];
836	EEE[1] = EEEE[1] + EEEO[1];
837	EEE[2] = EEEE[1] - EEEO[1];
838	for (k=0;k<4;k++)
839	{
840	EE[k] = EEE[k] + EEO[k];
841	EE[k+4] = EEE[3-k] - EEO[3-k];
842	}
843	for (k=0;k<8;k++)
844	{
845	E[k] = EE[k] + EO[k];
846	E[k+8] = EE[7-k] - EO[7-k];
847	}
848	for (k=0;k<16;k++)
849	{
850	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
851	dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
852	}
853	src ++;
854	dst += 32;
855	}
856	}
857
858	/** MxN forward transform (2D)
859	* \param bitDepth [in] bit depth
860	* \param block [in] residual block
861	* \param coeff [out] transform coefficients
862	* \param iWidth [in] width of transform
863	* \param iHeight [in] height of transform
864	* \param useDST [in]
865	* \param maxTrDynamicRange [in]
866
867	*/
868	Void xTrMxN(Int bitDepth, TCoeff block, TCoeff coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
869	{
870	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
871
872	const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
873	const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
874
875	assert(shift_1st >= 0);
876	assert(shift_2nd >= 0);
877
878	TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
879
880	switch (iWidth)
881	{
882	case 4:
883	{
884	if ((iHeight == 4) && useDST) // Check for DCT or DST
885	{
886	fastForwardDst( block, tmp, shift_1st );
887	}
888	else
889	{
890	partialButterfly4 ( block, tmp, shift_1st, iHeight );
891	}
892	}
893	break;
894
895	case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break;
896	case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break;
897	case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break;
898	default:
899	assert(0); exit (1); break;
900	}
901
902	switch (iHeight)
903	{
904	case 4:
905	{
906	if ((iWidth == 4) && useDST) // Check for DCT or DST
907	{
908	fastForwardDst( tmp, coeff, shift_2nd );
909	}
910	else
911	{
912	partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
913	}
914	}
915	break;
916
917	case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break;
918	case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break;
919	case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break;
920	default:
921	assert(0); exit (1); break;
922	}
923	}
924
925
926	/** MxN inverse transform (2D)
927	* \param bitDepth [in] bit depth
928	* \param coeff [in] transform coefficients
929	* \param block [out] residual block
930	* \param iWidth [in] width of transform
931	* \param iHeight [in] height of transform
932	* \param useDST [in]
933	* \param maxTrDynamicRange [in]
934	*/
935	Void xITrMxN(Int bitDepth, TCoeff coeff, TCoeff block, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
936	{
937	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
938
939	Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
940	Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
941	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
942	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
943
944	assert(shift_1st >= 0);
945	assert(shift_2nd >= 0);
946
947	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
948
949	switch (iHeight)
950	{
951	case 4:
952	{
953	if ((iWidth == 4) && useDST) // Check for DCT or DST
954	{
955	fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
956	}
957	else
958	{
959	partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
960	}
961	}
962	break;
963
964	case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
965	case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
966	case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
967
968	default:
969	assert(0); exit (1); break;
970	}
971
972	switch (iWidth)
973	{
974	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
975	case 4:
976	{
977	if ((iHeight == 4) && useDST) // Check for DCT or DST
978	{
979	fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
980	}
981	else
982	{
983	partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
984	}
985	}
986	break;
987
988	case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
989	case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
990	case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
991
992	default:
993	assert(0); exit (1); break;
994	}
995	}
996
997
998	// To minimize the distortion only. No rate is considered.
999	Void TComTrQuant::signBitHidingHDQ( const ComponentID compID, TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters )
1000	{
1001	const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
1002	const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
1003	const UInt groupSize = 1 << MLS_CG_SIZE;
1004
1005	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1006	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1007
1008	Int lastCG = -1;
1009	Int absSum = 0 ;
1010	Int n ;
1011
1012	for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
1013	{
1014	Int subPos = subSet << MLS_CG_SIZE;
1015	Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
1016	absSum = 0 ;
1017
1018	for(n = groupSize-1; n >= 0; --n )
1019	{
1020	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
1021	{
1022	lastNZPosInCG = n;
1023	break;
1024	}
1025	}
1026
1027	for(n = 0; n <groupSize; n++ )
1028	{
1029	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
1030	{
1031	firstNZPosInCG = n;
1032	break;
1033	}
1034	}
1035
1036	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1037	{
1038	absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
1039	}
1040
1041	if(lastNZPosInCG>=0 && lastCG==-1)
1042	{
1043	lastCG = 1 ;
1044	}
1045
1046	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1047	{
1048	UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
1049	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1050	{
1051	TCoeff curCost = std::numeric_limits<TCoeff>::max();
1052	TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
1053	Int minPos =-1, finalChange=0, curChange=0;
1054
1055	for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
1056	{
1057	UInt blkPos = codingParameters.scan[ n+subPos ];
1058	if(pQCoef[ blkPos ] != 0 )
1059	{
1060	if(deltaU[blkPos]>0)
1061	{
1062	curCost = - deltaU[blkPos];
1063	curChange=1 ;
1064	}
1065	else
1066	{
1067	//curChange =-1;
1068	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1069	{
1070	curCost = std::numeric_limits<TCoeff>::max();
1071	}
1072	else
1073	{
1074	curCost = deltaU[blkPos];
1075	curChange =-1;
1076	}
1077	}
1078	}
1079	else
1080	{
1081	if(n<firstNZPosInCG)
1082	{
1083	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1084	if(thisSignBit != signbit )
1085	{
1086	curCost = std::numeric_limits<TCoeff>::max();
1087	}
1088	else
1089	{
1090	curCost = - (deltaU[blkPos]) ;
1091	curChange = 1 ;
1092	}
1093	}
1094	else
1095	{
1096	curCost = - (deltaU[blkPos]) ;
1097	curChange = 1 ;
1098	}
1099	}
1100
1101	if( curCost<minCostInc)
1102	{
1103	minCostInc = curCost ;
1104	finalChange = curChange ;
1105	minPos = blkPos ;
1106	}
1107	} //CG loop
1108
1109	if(pQCoef[minPos] == entropyCodingMaximum \|\| pQCoef[minPos] == entropyCodingMinimum)
1110	{
1111	finalChange = -1;
1112	}
1113
1114	if(pCoef[minPos]>=0)
1115	{
1116	pQCoef[minPos] += finalChange ;
1117	}
1118	else
1119	{
1120	pQCoef[minPos] -= finalChange ;
1121	}
1122	} // Hide
1123	}
1124	if(lastCG==1)
1125	{
1126	lastCG=0 ;
1127	}
1128	} // TU loop
1129
1130	return;
1131	}
1132
1133
1134	Void TComTrQuant::xQuant( TComTU &rTu,
1135	TCoeff * pSrc,
1136	TCoeff * pDes,
1137	#if ADAPTIVE_QP_SELECTION
1138	TCoeff *pArlDes,
1139	#endif
1140	TCoeff &uiAbsSum,
1141	const ComponentID compID,
1142	const QpParam &cQP )
1143	{
1144	const TComRectangle &rect = rTu.getRect(compID);
1145	const UInt uiWidth = rect.width;
1146	const UInt uiHeight = rect.height;
1147	TComDataCU* pcCU = rTu.getCU();
1148	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1149
1150	TCoeff* piCoef = pSrc;
1151	TCoeff* piQCoef = pDes;
1152	#if ADAPTIVE_QP_SELECTION
1153	TCoeff* piArlCCoef = pArlDes;
1154	#endif
1155
1156	const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1157
1158	Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
1159	if ( useRDOQ && (isLuma(compID) \|\| RDOQ_CHROMA) )
1160	{
1161	#if ADAPTIVE_QP_SELECTION
1162	xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
1163	#else
1164	xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
1165	#endif
1166	}
1167	else
1168	{
1169	TUEntropyCodingParameters codingParameters;
1170	getTUEntropyCodingParameters(codingParameters, rTu, compID);
1171
1172	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1173	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1174
1175	TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
1176
1177	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1178
1179	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1180	assert(scalingListType < SCALING_LIST_NUM);
1181	Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1182
1183	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1184	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1185
1186	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1187	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1188	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1189	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1190	*/
1191
1192	// Represents scaling through forward transform
1193	Int iTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1194	if (useTransformSkip && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
1195	{
1196	iTransformShift = std::max<Int>(0, iTransformShift);
1197	}
1198
1199	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1200	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1201
1202	#if ADAPTIVE_QP_SELECTION
1203	Int iQBitsC = MAX_INT;
1204	Int iAddC = MAX_INT;
1205
1206	if (m_bUseAdaptQpSelect)
1207	{
1208	iQBitsC = iQBits - ARL_C_PRECISION;
1209	iAddC = 1 << (iQBitsC-1);
1210	}
1211	#endif
1212
1213	const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1214	const Int qBits8 = iQBits - 8;
1215
1216	for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1217	{
1218	const TCoeff iLevel = piCoef[uiBlockPos];
1219	const TCoeff iSign = (iLevel < 0 ? -1: 1);
1220
1221	const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1222
1223	#if ADAPTIVE_QP_SELECTION
1224	if( m_bUseAdaptQpSelect )
1225	{
1226	piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
1227	}
1228	#endif
1229
1230	const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1231	deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
1232
1233	uiAbsSum += quantisedMagnitude;
1234	const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
1235
1236	piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
1237	} // for n
1238
1239	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1240	{
1241	if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
1242	{
1243	signBitHidingHDQ( compID, piQCoef, piCoef, deltaU, codingParameters ) ;
1244	}
1245	}
1246	} //if RDOQ
1247	//return;
1248	}
1249
1250	Void TComTrQuant::xDeQuant( TComTU &rTu,
1251	const TCoeff * pSrc,
1252	TCoeff * pDes,
1253	const ComponentID compID,
1254	const QpParam &cQP )
1255	{
1256	assert(compID<MAX_NUM_COMPONENT);
1257
1258	TComDataCU *pcCU = rTu.getCU();
1259	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1260	const TComRectangle &rect = rTu.getRect(compID);
1261	const UInt uiWidth = rect.width;
1262	const UInt uiHeight = rect.height;
1263	const TCoeff *const piQCoef = pSrc;
1264	TCoeff *const piCoef = pDes;
1265	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1266	const UInt numSamplesInBlock = uiWidth*uiHeight;
1267	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1268	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1269	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1270	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1271
1272	assert (scalingListType < SCALING_LIST_NUM);
1273	assert ( uiWidth <= m_uiMaxTrSize );
1274
1275	// Represents scaling through forward transform
1276	const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1277	const Int originalTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1278	const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
1279
1280	const Int QP_per = cQP.per;
1281	const Int QP_rem = cQP.rem;
1282
1283	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
1284
1285	if(enableScalingLists)
1286	{
1287	//from the dequantisation equation:
1288	//iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
1289	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift
1290	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
1291	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
1292
1293	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1294	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1295
1296	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
1297
1298	if(rightShift > 0)
1299	{
1300	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1301
1302	for( Int n = 0; n < numSamplesInBlock; n++ )
1303	{
1304	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1305	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
1306
1307	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1308	}
1309	}
1310	else
1311	{
1312	const Int leftShift = -rightShift;
1313
1314	for( Int n = 0; n < numSamplesInBlock; n++ )
1315	{
1316	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1317	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
1318
1319	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1320	}
1321	}
1322	}
1323	else
1324	{
1325	const Int scale = g_invQuantScales[QP_rem];
1326	const Int scaleBits = (IQUANT_SHIFT + 1) ;
1327
1328	//from the dequantisation equation:
1329	//iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
1330	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift
1331	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
1332	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1333	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1334
1335	if (rightShift > 0)
1336	{
1337	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1338
1339	for( Int n = 0; n < numSamplesInBlock; n++ )
1340	{
1341	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1342	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
1343
1344	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1345	}
1346	}
1347	else
1348	{
1349	const Int leftShift = -rightShift;
1350
1351	for( Int n = 0; n < numSamplesInBlock; n++ )
1352	{
1353	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1354	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
1355
1356	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1357	}
1358	}
1359	}
1360	}
1361
1362
1363	Void TComTrQuant::init( UInt uiMaxTrSize,
1364	Bool bUseRDOQ,
1365	Bool bUseRDOQTS,
1366	Bool bEnc,
1367	Bool useTransformSkipFast
1368	#if ADAPTIVE_QP_SELECTION
1369	, Bool bUseAdaptQpSelect
1370	#endif
1371	)
1372	{
1373	m_uiMaxTrSize = uiMaxTrSize;
1374	m_bEnc = bEnc;
1375	m_useRDOQ = bUseRDOQ;
1376	m_useRDOQTS = bUseRDOQTS;
1377	#if ADAPTIVE_QP_SELECTION
1378	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1379	#endif
1380	m_useTransformSkipFast = useTransformSkipFast;
1381	}
1382
1383
1384	Void TComTrQuant::transformNxN( TComTU & rTu,
1385	const ComponentID compID,
1386	Pel * pcResidual,
1387	const UInt uiStride,
1388	TCoeff * rpcCoeff,
1389	#if ADAPTIVE_QP_SELECTION
1390	TCoeff * pcArlCoeff,
1391	#endif
1392	TCoeff & uiAbsSum,
1393	const QpParam & cQP
1394	)
1395	{
1396	const TComRectangle &rect = rTu.getRect(compID);
1397	const UInt uiWidth = rect.width;
1398	const UInt uiHeight = rect.height;
1399	TComDataCU* pcCU = rTu.getCU();
1400	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1401	const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
1402
1403	uiAbsSum=0;
1404
1405	RDPCMMode rdpcmMode = RDPCM_OFF;
1406	rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
1407
1408	if (rdpcmMode == RDPCM_OFF)
1409	{
1410	uiAbsSum = 0;
1411	//transform and quantise
1412	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1413	{
1414	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1415	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1416
1417	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1418	{
1419	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1420	{
1421	const Pel currentSample = pcResidual[(y * uiStride) + x];
1422
1423	rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
1424	uiAbsSum += TCoeff(abs(currentSample));
1425	}
1426	}
1427	}
1428	else
1429	{
1430	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1431	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
1432	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1433	#endif
1434
1435	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1436
1437	if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
1438	{
1439	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
1440	}
1441	else
1442	{
1443	xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1444	}
1445
1446	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1447	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
1448	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1449	#endif
1450
1451	xQuant( rTu, m_plTempCoeff, rpcCoeff,
1452
1453	#if ADAPTIVE_QP_SELECTION
1454	pcArlCoeff,
1455	#endif
1456	uiAbsSum, compID, cQP );
1457
1458	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1459	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
1460	printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
1461	#endif
1462	}
1463	}
1464
1465	//set the CBF
1466	pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1467	}
1468
1469
1470	Void TComTrQuant::invTransformNxN( TComTU &rTu,
1471	const ComponentID compID,
1472	Pel *pcResidual,
1473	const UInt uiStride,
1474	TCoeff * pcCoeff,
1475	const QpParam &cQP
1476	DEBUG_STRING_FN_DECLAREP(psDebug))
1477	{
1478	TComDataCU* pcCU=rTu.getCU();
1479	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1480	const TComRectangle &rect = rTu.getRect(compID);
1481	const UInt uiWidth = rect.width;
1482	const UInt uiHeight = rect.height;
1483
1484	if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
1485	{
1486	//------------------------------------------------
1487
1488	//recurse deeper
1489
1490	TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
1491
1492	do
1493	{
1494	//------------------
1495
1496	const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
1497
1498	Pel subTUResidual = pcResidual + (lineOffset uiStride);
1499	TCoeff subTUCoefficients = pcCoeff + (lineOffset subTURecurse.getRect(compID).width);
1500
1501	invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
1502
1503	//------------------
1504
1505	} while (subTURecurse.nextSection(rTu));
1506
1507	//------------------------------------------------
1508
1509	return;
1510	}
1511
1512	#if defined DEBUG_STRING
1513	if (psDebug)
1514	{
1515	std::stringstream ss(stringstream::out);
1516	printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
1517	DEBUG_STRING_APPEND((*psDebug), ss.str())
1518	}
1519	#endif
1520
1521	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1522	{
1523	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1524	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1525
1526	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1527	{
1528	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1529	{
1530	pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
1531	}
1532	}
1533	}
1534	else
1535	{
1536	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1537	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
1538	printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
1539	#endif
1540
1541	xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
1542
1543	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1544	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
1545	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1546	#endif
1547
1548	#if defined DEBUG_STRING
1549	if (psDebug)
1550	{
1551	std::stringstream ss(stringstream::out);
1552	printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1553	(*psDebug)+=ss.str();
1554	}
1555	#endif
1556
1557	if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
1558	{
1559	xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
1560
1561	#if defined DEBUG_STRING
1562	if (psDebug)
1563	{
1564	std::stringstream ss(stringstream::out);
1565	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1566	(*psDebug)+=ss.str();
1567	(*psDebug)+="(<- was a Transform-skipped block)\n";
1568	}
1569	#endif
1570	}
1571	else
1572	{
1573	xIT( compID, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight );
1574
1575	#if defined DEBUG_STRING
1576	if (psDebug)
1577	{
1578	std::stringstream ss(stringstream::out);
1579	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1580	(*psDebug)+=ss.str();
1581	(*psDebug)+="(<- was a Transformed block)\n";
1582	}
1583	#endif
1584	}
1585
1586	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
1587	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
1588	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1589	g_debugCounter++;
1590	#endif
1591	}
1592
1593	invRdpcmNxN( rTu, compID, pcResidual, uiStride );
1594	}
1595
1596	Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
1597	TComYuv *pResidual,
1598	TComTU &rTu)
1599	{
1600	if (!rTu.ProcessComponentSection(compID))
1601	{
1602	return;
1603	}
1604
1605	TComDataCU* pcCU = rTu.getCU();
1606	UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
1607	UInt uiTrMode=rTu.GetTransformDepthRel();
1608	if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) \|\| !pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) )
1609	{
1610	return;
1611	}
1612
1613	if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
1614	{
1615	const TComRectangle &tuRect = rTu.getRect(compID);
1616	const Int uiStride = pResidual->getStride( compID );
1617	Pel *rpcResidual = pResidual->getAddr( compID );
1618	UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0);
1619	Pel *pResi = rpcResidual + uiAddr;
1620	TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
1621
1622	const QpParam cQP(*pcCU, compID);
1623
1624	if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
1625	{
1626	DEBUG_STRING_NEW(sTemp)
1627	#ifdef DEBUG_STRING
1628	std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
1629	#endif
1630
1631	invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
1632
1633	#ifdef DEBUG_STRING
1634	if (psDebug != 0)
1635	{
1636	std::cout << (*psDebug);
1637	}
1638	#endif
1639	}
1640
1641	if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
1642	{
1643	const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
1644	const Int strideLuma = pResidual->getStride( COMPONENT_Y );
1645	const Int tuWidth = rTu.getRect( compID ).width;
1646	const Int tuHeight = rTu.getRect( compID ).height;
1647
1648	if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
1649	{
1650	pResi = rpcResidual + uiAddr;
1651	const Pel *pResiLuma = piResiLuma + uiAddr;
1652
1653	crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
1654	}
1655	}
1656	}
1657	else
1658	{
1659	TComTURecurse tuRecurseChild(rTu, false);
1660	do
1661	{
1662	invRecurTransformNxN( compID, pResidual, tuRecurseChild );
1663	} while (tuRecurseChild.nextSection(rTu));
1664	}
1665	}
1666
1667	Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
1668	{
1669	TComDataCU *pcCU=rTu.getCU();
1670	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1671
1672	const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx );
1673	const UInt uiWidth = rTu.getRect(compID).width;
1674	const UInt uiHeight = rTu.getRect(compID).height;
1675	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1676	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1677
1678	UInt uiX = 0;
1679	UInt uiY = 0;
1680
1681	UInt &majorAxis = (mode == RDPCM_VER) ? uiX : uiY;
1682	UInt &minorAxis = (mode == RDPCM_VER) ? uiY : uiX;
1683	const UInt majorAxisLimit = (mode == RDPCM_VER) ? uiWidth : uiHeight;
1684	const UInt minorAxisLimit = (mode == RDPCM_VER) ? uiHeight : uiWidth;
1685
1686	const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
1687
1688	uiAbsSum = 0;
1689
1690	for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
1691	{
1692	TCoeff accumulatorValue = 0; // 32-bit accumulator
1693	for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
1694	{
1695	const UInt sampleIndex = (uiY * uiWidth) + uiX;
1696	const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
1697	const Pel currentSample = pcResidual[(uiY * uiStride) + uiX];
1698	const TCoeff encoderSideDelta = TCoeff(currentSample) - accumulatorValue;
1699
1700	Pel reconstructedDelta;
1701	if ( bLossless )
1702	{
1703	pcCoeff[coefficientIndex] = encoderSideDelta;
1704	reconstructedDelta = encoderSideDelta;
1705	}
1706	else
1707	{
1708	transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
1709	invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
1710	}
1711
1712	uiAbsSum += abs(pcCoeff[coefficientIndex]);
1713
1714	if (mode != RDPCM_OFF)
1715	{
1716	accumulatorValue += reconstructedDelta;
1717	}
1718	}
1719	}
1720	}
1721
1722	Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
1723	{
1724	TComDataCU *pcCU=rTu.getCU();
1725	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1726
1727	if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) \|\| ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1728	{
1729	rdpcmMode = RDPCM_OFF;
1730	}
1731	else if ( pcCU->isIntra( uiAbsPartIdx ) )
1732	{
1733	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
1734	const ChannelType chType = toChannelType(compID);
1735	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1736	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1737	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1738
1739	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1740	{
1741	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1742	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
1743	}
1744	else
1745	{
1746	rdpcmMode = RDPCM_OFF;
1747	}
1748	}
1749	else // not intra, need to select the best mode
1750	{
1751	const UInt uiWidth = rTu.getRect(compID).width;
1752	const UInt uiHeight = rTu.getRect(compID).height;
1753
1754	RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES;
1755	TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max();
1756	TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
1757
1758	for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
1759	{
1760	const RDPCMMode mode = RDPCMMode(modeIndex);
1761
1762	TCoeff currAbsSum = 0;
1763
1764	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
1765
1766	if (currAbsSum < bestAbsSum)
1767	{
1768	bestMode = mode;
1769	bestAbsSum = currAbsSum;
1770	if (mode != RDPCM_OFF)
1771	{
1772	memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
1773	}
1774	}
1775	}
1776
1777	rdpcmMode = bestMode;
1778	uiAbsSum = bestAbsSum;
1779
1780	if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
1781	{
1782	memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
1783	}
1784	}
1785
1786	pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1787	}
1788
1789	Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
1790	{
1791	TComDataCU *pcCU=rTu.getCU();
1792	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1793
1794	if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) \|\| pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1795	{
1796	const UInt uiWidth = rTu.getRect(compID).width;
1797	const UInt uiHeight = rTu.getRect(compID).height;
1798
1799	RDPCMMode rdpcmMode = RDPCM_OFF;
1800
1801	if ( pcCU->isIntra( uiAbsPartIdx ) )
1802	{
1803	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
1804	const ChannelType chType = toChannelType(compID);
1805	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1806	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1807	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1808
1809	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1810	{
1811	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1812	}
1813	}
1814	else // not intra case
1815	{
1816	rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
1817	}
1818
1819	static const TCoeff pelMin=(TCoeff) std::numeric_limits<Pel>::min();
1820	static const TCoeff pelMax=(TCoeff) std::numeric_limits<Pel>::max();
1821	if (rdpcmMode == RDPCM_VER)
1822	{
1823	for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1824	{
1825	Pel *pcCurResidual = pcResidual+uiX;
1826	TCoeff accumulator = *pcCurResidual; // 32-bit accumulator
1827	pcCurResidual+=uiStride;
1828	for( UInt uiY = 1; uiY < uiHeight; uiY++, pcCurResidual+=uiStride )
1829	{
1830	accumulator += *(pcCurResidual);
1831	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
1832	}
1833	}
1834	}
1835	else if (rdpcmMode == RDPCM_HOR)
1836	{
1837	for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1838	{
1839	Pel pcCurResidual = pcResidual+uiYuiStride;
1840	TCoeff accumulator = *pcCurResidual;
1841	pcCurResidual++;
1842	for( UInt uiX = 1; uiX < uiWidth; uiX++, pcCurResidual++ )
1843	{
1844	accumulator += *(pcCurResidual);
1845	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
1846	}
1847	}
1848	}
1849	}
1850	}
1851
1852	// ------------------------------------------------------------------------------------------------
1853	// Logical transform
1854	// ------------------------------------------------------------------------------------------------
1855
1856	/** Wrapper function between HM interface and core NxN forward transform (2D)
1857	* \param compID colour component ID
1858	* \param useDST
1859	* \param piBlkResi input data (residual)
1860	* \param uiStride stride of input residual data
1861	* \param psCoeff output data (transform coefficients)
1862	* \param iWidth transform width
1863	* \param iHeight transform height
1864	*/
1865	Void TComTrQuant::xT( const ComponentID compID, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight )
1866	{
1867	#if MATRIX_MULT
1868	if( iWidth == iHeight)
1869	{
1870	xTr(g_bitDepth[toChannelType(compID)], piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1871	return;
1872	}
1873	#endif
1874
1875	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1876	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1877
1878	for (Int y = 0; y < iHeight; y++)
1879	{
1880	for (Int x = 0; x < iWidth; x++)
1881	{
1882	block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
1883	}
1884	}
1885
1886	xTrMxN( g_bitDepth[toChannelType(compID)], block, coeff, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1887
1888	memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
1889	}
1890
1891	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1892	* \param compID colour component ID
1893	* \param useDST
1894	* \param plCoef input data (transform coefficients)
1895	* \param pResidual output data (residual)
1896	* \param uiStride stride of input residual data
1897	* \param iWidth transform width
1898	* \param iHeight transform height
1899	*/
1900	Void TComTrQuant::xIT( const ComponentID compID, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1901	{
1902	#if MATRIX_MULT
1903	if( iWidth == iHeight )
1904	{
1905	#if O0043_BEST_EFFORT_DECODING
1906	xITr(g_bitDepthInStream[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1907	#else
1908	xITr(g_bitDepth[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1909	#endif
1910	return;
1911	}
1912	#endif
1913
1914	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1915	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1916
1917	memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
1918
1919	#if O0043_BEST_EFFORT_DECODING
1920	xITrMxN( g_bitDepthInStream[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1921	#else
1922	xITrMxN( g_bitDepth[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1923	#endif
1924
1925	for (Int y = 0; y < iHeight; y++)
1926	{
1927	for (Int x = 0; x < iWidth; x++)
1928	{
1929	pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
1930	}
1931	}
1932	}
1933
1934	/** Wrapper function between HM interface and core 4x4 transform skipping
1935	* \param piBlkResi input data (residual)
1936	* \param uiStride stride of input residual data
1937	* \param psCoeff output data (transform coefficients)
1938	* \param rTu reference to transform data
1939	* \param component colour component
1940	*/
1941	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
1942	{
1943	const TComRectangle &rect = rTu.getRect(component);
1944	const Int width = rect.width;
1945	const Int height = rect.height;
1946
1947	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1948	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1949	{
1950	iTransformShift = std::max<Int>(0, iTransformShift);
1951	}
1952
1953	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1954	const UInt uiSizeMinus1 = (width * height) - 1;
1955
1956	if (iTransformShift >= 0)
1957	{
1958	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1959	{
1960	for (UInt x = 0; x < width; x++, coefficientIndex++)
1961	{
1962	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
1963	}
1964	}
1965	}
1966	else //for very high bit depths
1967	{
1968	iTransformShift = -iTransformShift;
1969	const TCoeff offset = 1 << (iTransformShift - 1);
1970
1971	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1972	{
1973	for (UInt x = 0; x < width; x++, coefficientIndex++)
1974	{
1975	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
1976	}
1977	}
1978	}
1979	}
1980
1981	/** Wrapper function between HM interface and core NxN transform skipping
1982	* \param plCoef input data (coefficients)
1983	* \param pResidual output data (residual)
1984	* \param uiStride stride of input residual data
1985	* \param rTu reference to transform data
1986	* \param component colour component ID
1987	*/
1988	Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
1989	{
1990	const TComRectangle &rect = rTu.getRect(component);
1991	const Int width = rect.width;
1992	const Int height = rect.height;
1993
1994	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1995	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1996	{
1997	iTransformShift = std::max<Int>(0, iTransformShift);
1998	}
1999
2000	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
2001	const UInt uiSizeMinus1 = (width * height) - 1;
2002
2003	if (iTransformShift >= 0)
2004	{
2005	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
2006
2007	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
2008	{
2009	for (UInt x = 0; x < width; x++, coefficientIndex++)
2010	{
2011	pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
2012	}
2013	}
2014	}
2015	else //for very high bit depths
2016	{
2017	iTransformShift = -iTransformShift;
2018
2019	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
2020	{
2021	for (UInt x = 0; x < width; x++, coefficientIndex++)
2022	{
2023	pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
2024	}
2025	}
2026	}
2027	}
2028
2029	/** RDOQ with CABAC
2030	* \param rTu reference to transform data
2031	* \param plSrcCoeff pointer to input buffer
2032	* \param piDstCoeff reference to pointer to output buffer
2033	* \param piArlDstCoeff
2034	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
2035	* \param compID colour component ID
2036	* \param cQP reference to quantization parameters
2037
2038	* Rate distortion optimized quantization for entropy
2039	* coding engines using probability models like CABAC
2040	*/
2041	Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
2042	TCoeff * plSrcCoeff,
2043	TCoeff * piDstCoeff,
2044	#if ADAPTIVE_QP_SELECTION
2045	TCoeff * piArlDstCoeff,
2046	#endif
2047	TCoeff &uiAbsSum,
2048	const ComponentID compID,
2049	const QpParam &cQP )
2050	{
2051	const TComRectangle & rect = rTu.getRect(compID);
2052	const UInt uiWidth = rect.width;
2053	const UInt uiHeight = rect.height;
2054	TComDataCU * pcCU = rTu.getCU();
2055	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
2056	const ChannelType channelType = toChannelType(compID);
2057	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
2058
2059	const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
2060
2061	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
2062	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
2063	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
2064	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
2065	*/
2066
2067	// Represents scaling through forward transform
2068	Int iTransformShift = getTransformShift(channelType, uiLog2TrSize);
2069	if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
2070	{
2071	iTransformShift = std::max<Int>(0, iTransformShift);
2072	}
2073
2074	const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getUseGolombRiceParameterAdaptation();
2075	const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
2076	UInt uiGoRiceParam = initialGolombRiceParameter;
2077	Double d64BlockUncodedCost = 0;
2078	const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
2079	const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
2080	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
2081	assert(compID<MAX_NUM_COMPONENT);
2082
2083	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
2084	assert(scalingListType < SCALING_LIST_NUM);
2085
2086	#if ADAPTIVE_QP_SELECTION
2087	memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
2088	#endif
2089
2090	Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
2091	Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
2092	Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
2093	memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
2094	memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
2095	Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
2096	Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
2097	Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
2098	TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
2099	memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
2100	memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
2101	memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
2102	memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
2103
2104	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
2105	const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
2106	const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
2107
2108	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
2109	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
2110	const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
2111
2112	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
2113	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
2114
2115	#if ADAPTIVE_QP_SELECTION
2116	Int iQBitsC = iQBits - ARL_C_PRECISION;
2117	Int iAddC = 1 << (iQBitsC-1);
2118	#endif
2119
2120	TUEntropyCodingParameters codingParameters;
2121	getTUEntropyCodingParameters(codingParameters, rTu, compID);
2122	const UInt uiCGSize = (1 << MLS_CG_SIZE);
2123
2124	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2125	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2126	Int iCGLastScanPos = -1;
2127
2128	UInt uiCtxSet = 0;
2129	Int c1 = 1;
2130	Int c2 = 0;
2131	Double d64BaseCost = 0;
2132	Int iLastScanPos = -1;
2133
2134	UInt c1Idx = 0;
2135	UInt c2Idx = 0;
2136	Int baseLevel;
2137
2138	memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
2139	memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
2140
2141	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2142	Int iScanPos;
2143	coeffGroupRDStats rdStats;
2144
2145	const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
2146
2147	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2148	{
2149	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2150	UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
2151	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
2152
2153	memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2154
2155	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
2156
2157	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2158	{
2159	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2160	//===== quantization =====
2161	UInt uiBlkPos = codingParameters.scan[iScanPos];
2162	// set coeff
2163
2164	const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
2165	const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
2166
2167	const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
2168
2169	const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, MAX_INTERMEDIATE_INT - (Intermediate_Int(1) << (iQBits - 1)));
2170
2171	#if ADAPTIVE_QP_SELECTION
2172	if( m_bUseAdaptQpSelect )
2173	{
2174	piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
2175	}
2176	#endif
2177	const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
2178
2179	const Double dErr = Double( lLevelDouble );
2180	pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
2181	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2182	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2183
2184	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2185	{
2186	iLastScanPos = iScanPos;
2187	uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
2188	iCGLastScanPos = iCGScanPos;
2189	}
2190
2191	if ( iLastScanPos >= 0 )
2192	{
2193	//===== coefficient level estimation =====
2194	UInt uiLevel;
2195	UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
2196	UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
2197
2198	if( iScanPos == iLastScanPos )
2199	{
2200	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2201	lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2202	c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, channelType
2203	);
2204	}
2205	else
2206	{
2207	UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
2208
2209	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2210	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2211	c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, channelType
2212	);
2213
2214	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2215	}
2216
2217	deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
2218
2219	if( uiLevel > 0 )
2220	{
2221	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType );
2222	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2223	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2224	}
2225	else // uiLevel == 0
2226	{
2227	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2228	}
2229	piDstCoeff[ uiBlkPos ] = uiLevel;
2230	d64BaseCost += pdCostCoeff [ iScanPos ];
2231
2232	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2233	if( uiLevel >= baseLevel )
2234	{
2235	if (uiLevel > 3*(1<<uiGoRiceParam))
2236	{
2237	uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
2238	}
2239	}
2240	if ( uiLevel >= 1)
2241	{
2242	c1Idx ++;
2243	}
2244
2245	//===== update bin model =====
2246	if( uiLevel > 1 )
2247	{
2248	c1 = 0;
2249	c2 += (c2 < 2);
2250	c2Idx ++;
2251	}
2252	else if( (c1 < 3) && (c1 > 0) && uiLevel)
2253	{
2254	c1++;
2255	}
2256
2257	//===== context set update =====
2258	if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
2259	{
2260	uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this before entering the final group
2261	c1 = 1;
2262	c2 = 0;
2263	c1Idx = 0;
2264	c2Idx = 0;
2265	uiGoRiceParam = initialGolombRiceParameter;
2266	}
2267	}
2268	else
2269	{
2270	d64BaseCost += pdCostCoeff0[ iScanPos ];
2271	}
2272	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2273	if (iScanPosinCG == 0 )
2274	{
2275	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2276	}
2277	if (piDstCoeff[ uiBlkPos ] )
2278	{
2279	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2280	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2281	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2282	if ( iScanPosinCG != 0 )
2283	{
2284	rdStats.iNNZbeforePos0++;
2285	}
2286	}
2287	} //end for (iScanPosinCG)
2288
2289	if (iCGLastScanPos >= 0)
2290	{
2291	if( iCGScanPos )
2292	{
2293	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2294	{
2295	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2296	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2297	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2298	}
2299	else
2300	{
2301	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2302	{
2303	if ( rdStats.iNNZbeforePos0 == 0 )
2304	{
2305	d64BaseCost -= rdStats.d64SigCost_0;
2306	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2307	}
2308	// rd-cost if SigCoeffGroupFlag = 0, initialization
2309	Double d64CostZeroCG = d64BaseCost;
2310
2311	// add SigCoeffGroupFlag cost to total cost
2312	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2313
2314	if (iCGScanPos < iCGLastScanPos)
2315	{
2316	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2317	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2318	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2319	}
2320
2321	// try to convert the current coeff group from non-zero to all-zero
2322	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2323	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2324	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2325
2326	// if we can save cost, change this block to all-zero block
2327	if ( d64CostZeroCG < d64BaseCost )
2328	{
2329	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2330	d64BaseCost = d64CostZeroCG;
2331	if (iCGScanPos < iCGLastScanPos)
2332	{
2333	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2334	}
2335	// reset coeffs to 0 in this block
2336	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2337	{
2338	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2339	UInt uiBlkPos = codingParameters.scan[ iScanPos ];
2340
2341	if (piDstCoeff[ uiBlkPos ])
2342	{
2343	piDstCoeff [ uiBlkPos ] = 0;
2344	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2345	pdCostSig [ iScanPos ] = 0;
2346	}
2347	}
2348	} // end if ( d64CostAllZeros < d64BaseCost )
2349	}
2350	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2351	}
2352	else
2353	{
2354	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2355	}
2356	}
2357	} //end for (iCGScanPos)
2358
2359	//===== estimate last position =====
2360	if ( iLastScanPos < 0 )
2361	{
2362	return;
2363	}
2364
2365	Double d64BestCost = 0;
2366	Int ui16CtxCbf = 0;
2367	Int iBestLastIdxP1 = 0;
2368	if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2369	{
2370	ui16CtxCbf = 0;
2371	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2372	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2373	}
2374	else
2375	{
2376	ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
2377	ui16CtxCbf += getCBFContextOffset(compID);
2378	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2379	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2380	}
2381
2382
2383	Bool bFoundLast = false;
2384	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2385	{
2386	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2387
2388	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2389	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2390	{
2391	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2392	{
2393	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2394
2395	if (iScanPos > iLastScanPos)
2396	{
2397	continue;
2398	}
2399	UInt uiBlkPos = codingParameters.scan[iScanPos];
2400
2401	if( piDstCoeff[ uiBlkPos ] )
2402	{
2403	UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
2404	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
2405
2406	Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
2407	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2408
2409	if( totalCost < d64BestCost )
2410	{
2411	iBestLastIdxP1 = iScanPos + 1;
2412	d64BestCost = totalCost;
2413	}
2414	if( piDstCoeff[ uiBlkPos ] > 1 )
2415	{
2416	bFoundLast = true;
2417	break;
2418	}
2419	d64BaseCost -= pdCostCoeff[ iScanPos ];
2420	d64BaseCost += pdCostCoeff0[ iScanPos ];
2421	}
2422	else
2423	{
2424	d64BaseCost -= pdCostSig[ iScanPos ];
2425	}
2426	} //end for
2427	if (bFoundLast)
2428	{
2429	break;
2430	}
2431	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2432	} // end for
2433
2434
2435	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2436	{
2437	Int blkPos = codingParameters.scan[ scanPos ];
2438	TCoeff level = piDstCoeff[ blkPos ];
2439	uiAbsSum += level;
2440	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2441	}
2442
2443	//===== clean uncoded coefficients =====
2444	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2445	{
2446	piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
2447	}
2448
2449
2450	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2451	{
2452	const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
2453	Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
2454	/ m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(g_bitDepth[channelType] - 8)))
2455	+ 0.5);
2456
2457	Int lastCG = -1;
2458	Int absSum = 0 ;
2459	Int n ;
2460
2461	for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
2462	{
2463	Int subPos = subSet << MLS_CG_SIZE;
2464	Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
2465	absSum = 0 ;
2466
2467	for(n = uiCGSize-1; n >= 0; --n )
2468	{
2469	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2470	{
2471	lastNZPosInCG = n;
2472	break;
2473	}
2474	}
2475
2476	for(n = 0; n <uiCGSize; n++ )
2477	{
2478	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2479	{
2480	firstNZPosInCG = n;
2481	break;
2482	}
2483	}
2484
2485	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2486	{
2487	absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
2488	}
2489
2490	if(lastNZPosInCG>=0 && lastCG==-1)
2491	{
2492	lastCG = 1;
2493	}
2494
2495	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2496	{
2497	UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
2498	if( signbit!=(absSum&0x1) ) // hide but need tune
2499	{
2500	// calculate the cost
2501	Int64 minCostInc = MAX_INT64, curCost = MAX_INT64;
2502	Int minPos = -1, finalChange = 0, curChange = 0;
2503
2504	for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
2505	{
2506	UInt uiBlkPos = codingParameters.scan[ n + subPos ];
2507	if(piDstCoeff[ uiBlkPos ] != 0 )
2508	{
2509	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
2510	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2511	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
2512
2513	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2514	{
2515	costDown -= (4<<15);
2516	}
2517
2518	if(costUp<costDown)
2519	{
2520	curCost = costUp;
2521	curChange = 1;
2522	}
2523	else
2524	{
2525	curChange = -1;
2526	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2527	{
2528	curCost = MAX_INT64;
2529	}
2530	else
2531	{
2532	curCost = costDown;
2533	}
2534	}
2535	}
2536	else
2537	{
2538	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2539	curChange = 1 ;
2540
2541	if(n<firstNZPosInCG)
2542	{
2543	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2544	if(thissignbit != signbit )
2545	{
2546	curCost = MAX_INT64;
2547	}
2548	}
2549	}
2550
2551	if( curCost<minCostInc)
2552	{
2553	minCostInc = curCost;
2554	finalChange = curChange;
2555	minPos = uiBlkPos;
2556	}
2557	}
2558
2559	if(piDstCoeff[minPos] == entropyCodingMaximum \|\| piDstCoeff[minPos] == entropyCodingMinimum)
2560	{
2561	finalChange = -1;
2562	}
2563
2564	if(plSrcCoeff[minPos]>=0)
2565	{
2566	piDstCoeff[minPos] += finalChange ;
2567	}
2568	else
2569	{
2570	piDstCoeff[minPos] -= finalChange ;
2571	}
2572	}
2573	}
2574
2575	if(lastCG==1)
2576	{
2577	lastCG=0 ;
2578	}
2579	}
2580	}
2581	}
2582
2583
2584	/** Pattern decision for context derivation process of significant_coeff_flag
2585	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2586	* \param uiCGPosX column of current coefficient group
2587	* \param uiCGPosY row of current coefficient group
2588	* \param widthInGroups width of the block
2589	* \param heightInGroups height of the block
2590	* \returns pattern for current coefficient group
2591	*/
2592	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
2593	{
2594	if ((widthInGroups <= 1) && (heightInGroups <= 1))
2595	{
2596	return 0;
2597	}
2598
2599	const Bool rightAvailable = uiCGPosX < (widthInGroups - 1);
2600	const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
2601
2602	UInt sigRight = 0;
2603	UInt sigLower = 0;
2604
2605	if (rightAvailable)
2606	{
2607	sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2608	}
2609	if (belowAvailable)
2610	{
2611	sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2612	}
2613
2614	return sigRight + (sigLower << 1);
2615	}
2616
2617
2618	/** Context derivation process of coeff_abs_significant_flag
2619	* \param patternSigCtx pattern for current coefficient group
2620	* \param codingParameters coding parameters for the TU (includes the scan)
2621	* \param scanPosition current position in scan order
2622	* \param log2BlockWidth log2 width of the block
2623	* \param log2BlockHeight log2 height of the block
2624	* \param chanType channel type (CHANNEL_TYPE_LUMA/CHROMA)
2625	* \returns ctxInc for current scan position
2626	*/
2627	Int TComTrQuant::getSigCtxInc ( Int patternSigCtx,
2628	const TUEntropyCodingParameters &codingParameters,
2629	const Int scanPosition,
2630	const Int log2BlockWidth,
2631	const Int log2BlockHeight,
2632	const ChannelType chanType)
2633	{
2634	if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
2635	{
2636	//single context mode
2637	return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
2638	}
2639
2640	const UInt rasterPosition = codingParameters.scan[scanPosition];
2641	const UInt posY = rasterPosition >> log2BlockWidth;
2642	const UInt posX = rasterPosition - (posY << log2BlockWidth);
2643
2644	if ((posX + posY) == 0)
2645	{
2646	return 0; //special case for the DC context variable
2647	}
2648
2649	Int offset = MAX_INT;
2650
2651	if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
2652	{
2653	offset = ctxIndMap4x4[ (4 * posY) + posX ];
2654	}
2655	else
2656	{
2657	Int cnt = 0;
2658
2659	switch (patternSigCtx)
2660	{
2661	//------------------
2662
2663	case 0: //neither neighbouring group is significant
2664	{
2665	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2666	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2667	const Int posTotalInSubset = posXinSubset + posYinSubset;
2668
2669	//first N coefficients in scan order use 2; the next few use 1; the rest use 0.
2670	const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
2671	const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
2672
2673	cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
2674	}
2675	break;
2676
2677	//------------------
2678
2679	case 1: //right group is significant, below is not
2680	{
2681	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2682	const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT;
2683
2684	cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
2685	}
2686	break;
2687
2688	//------------------
2689
2690	case 2: //below group is significant, right is not
2691	{
2692	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2693	const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH;
2694
2695	cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
2696	}
2697	break;
2698
2699	//------------------
2700
2701	case 3: //both neighbouring groups are significant
2702	{
2703	cnt = 2;
2704	}
2705	break;
2706
2707	//------------------
2708
2709	default:
2710	std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
2711	exit(1);
2712	break;
2713	}
2714
2715	//------------------------------------------------
2716
2717	const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
2718
2719	offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
2720	}
2721
2722	return codingParameters.firstSignificanceMapContext + offset;
2723	}
2724
2725
2726	/** Get the best level in RD sense
2727	* \param rd64CodedCost reference to coded cost
2728	* \param rd64CodedCost0 reference to cost when coefficient is 0
2729	* \param rd64CodedCostSig reference to cost of significant coefficient
2730	* \param lLevelDouble reference to unscaled quantized level
2731	* \param uiMaxAbsLevel scaled quantized level
2732	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2733	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2734	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2735	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2736	* \param c1Idx
2737	* \param c2Idx
2738	* \param iQBits quantization step size
2739	* \param errorScale
2740	* \param bLast indicates if the coefficient is the last significant
2741	* \param useLimitedPrefixLength
2742	* \param channelType texture channel type (luma/chroma)
2743	* \returns best quantized transform level for given scan position
2744	* This method calculates the best quantized transform level for a given scan position.
2745	*/
2746	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2747	Double& rd64CodedCost0,
2748	Double& rd64CodedCostSig,
2749	Intermediate_Int lLevelDouble,
2750	UInt uiMaxAbsLevel,
2751	UShort ui16CtxNumSig,
2752	UShort ui16CtxNumOne,
2753	UShort ui16CtxNumAbs,
2754	UShort ui16AbsGoRice,
2755	UInt c1Idx,
2756	UInt c2Idx,
2757	Int iQBits,
2758	Double errorScale,
2759	Bool bLast,
2760	Bool useLimitedPrefixLength,
2761	ChannelType channelType
2762	) const
2763	{
2764	Double dCurrCostSig = 0;
2765	UInt uiBestAbsLevel = 0;
2766
2767	if( !bLast && uiMaxAbsLevel < 3 )
2768	{
2769	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2770	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2771	if( uiMaxAbsLevel == 0 )
2772	{
2773	return uiBestAbsLevel;
2774	}
2775	}
2776	else
2777	{
2778	rd64CodedCost = MAX_DOUBLE;
2779	}
2780
2781	if( !bLast )
2782	{
2783	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2784	}
2785
2786	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2787	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2788	{
2789	Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
2790	Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, channelType ) );
2791	dCurrCost += dCurrCostSig;
2792
2793	if( dCurrCost < rd64CodedCost )
2794	{
2795	uiBestAbsLevel = uiAbsLevel;
2796	rd64CodedCost = dCurrCost;
2797	rd64CodedCostSig = dCurrCostSig;
2798	}
2799	}
2800
2801	return uiBestAbsLevel;
2802	}
2803
2804	/** Calculates the cost for specific absolute transform level
2805	* \param uiAbsLevel scaled quantized level
2806	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2807	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2808	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2809	* \param c1Idx
2810	* \param c2Idx
2811	* \param useLimitedPrefixLength
2812	* \param channelType texture channel type (luma/chroma)
2813	* \returns cost of given absolute transform level
2814	*/
2815	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2816	UShort ui16CtxNumOne,
2817	UShort ui16CtxNumAbs,
2818	UShort ui16AbsGoRice,
2819	UInt c1Idx,
2820	UInt c2Idx,
2821	Bool useLimitedPrefixLength,
2822	ChannelType channelType
2823	) const
2824	{
2825	Int iRate = Int(xGetIEPRate()); // cost of sign bit
2826	UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2827
2828	if ( uiAbsLevel >= baseLevel )
2829	{
2830	UInt symbol = uiAbsLevel - baseLevel;
2831	UInt length;
2832	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2833	{
2834	length = symbol>>ui16AbsGoRice;
2835	iRate += (length+1+ui16AbsGoRice)<< 15;
2836	}
2837	else if (useLimitedPrefixLength)
2838	{
2839	const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + g_maxTrDynamicRange[channelType]));
2840
2841	UInt prefixLength = 0;
2842	UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
2843
2844	while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
2845	{
2846	prefixLength++;
2847	}
2848
2849	const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (g_maxTrDynamicRange[channelType] - ui16AbsGoRice) : (prefixLength + 1/separator/);
2850
2851	iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
2852	}
2853	else
2854	{
2855	length = ui16AbsGoRice;
2856	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2857	while (symbol >= (1<<length))
2858	{
2859	symbol -= (1<<(length++));
2860	}
2861	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2862	}
2863
2864	if (c1Idx < C1FLAG_NUMBER)
2865	{
2866	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2867
2868	if (c2Idx < C2FLAG_NUMBER)
2869	{
2870	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2871	}
2872	}
2873	}
2874	else if( uiAbsLevel == 1 )
2875	{
2876	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2877	}
2878	else if( uiAbsLevel == 2 )
2879	{
2880	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2881	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2882	}
2883	else
2884	{
2885	iRate = 0;
2886	}
2887
2888	return iRate;
2889	}
2890
2891	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2892	UShort ui16CtxNumSig ) const
2893	{
2894	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2895	}
2896
2897	/** Calculates the cost of signaling the last significant coefficient in the block
2898	* \param uiPosX X coordinate of the last significant coefficient
2899	* \param uiPosY Y coordinate of the last significant coefficient
2900	* \param component colour component ID
2901	* \returns cost of last significant coefficient
2902	*/
2903	/*
2904	* \param uiWidth width of the transform unit (TU)
2905	*/
2906	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2907	const UInt uiPosY,
2908	const ComponentID component ) const
2909	{
2910	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2911	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2912
2913	Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
2914
2915	if( uiCtxX > 3 )
2916	{
2917	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2918	}
2919	if( uiCtxY > 3 )
2920	{
2921	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2922	}
2923	return xGetICost( uiCost );
2924	}
2925
2926	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2927	UShort ui16CtxNumSig ) const
2928	{
2929	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2930	}
2931
2932	/** Get the cost for a specific rate
2933	* \param dRate rate of a bit
2934	* \returns cost at the specific rate
2935	*/
2936	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2937	{
2938	return m_dLambda * dRate;
2939	}
2940
2941	/** Get the cost of an equal probable bit
2942	* \returns cost of equal probable bit
2943	*/
2944	__inline Double TComTrQuant::xGetIEPRate ( ) const
2945	{
2946	return 32768;
2947	}
2948
2949	/** Context derivation process of coeff_abs_significant_flag
2950	* \param uiSigCoeffGroupFlag significance map of L1
2951	* \param uiCGPosX column of current scan position
2952	* \param uiCGPosY row of current scan position
2953	* \param widthInGroups width of the block
2954	* \param heightInGroups height of the block
2955	* \returns ctxInc for current scan position
2956	*/
2957	UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag,
2958	const UInt uiCGPosX,
2959	const UInt uiCGPosY,
2960	const UInt widthInGroups,
2961	const UInt heightInGroups)
2962	{
2963	UInt sigRight = 0;
2964	UInt sigLower = 0;
2965
2966	if (uiCGPosX < (widthInGroups - 1))
2967	{
2968	sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2969	}
2970	if (uiCGPosY < (heightInGroups - 1))
2971	{
2972	sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2973	}
2974
2975	return ((sigRight + sigLower) != 0) ? 1 : 0;
2976	}
2977
2978
2979	/** set quantized matrix coefficient for encode
2980	* \param scalingList quantized matrix address
2981	* \param format chroma format
2982	*/
2983	Void TComTrQuant::setScalingList(TComScalingList *scalingList, const ChromaFormat format)
2984	{
2985	const Int minimumQp = 0;
2986	const Int maximumQp = SCALING_LIST_REM_NUM;
2987
2988	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2989	{
2990	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2991	{
2992	for(Int qp = minimumQp; qp < maximumQp; qp++)
2993	{
2994	xSetScalingListEnc(scalingList,list,size,qp,format);
2995	xSetScalingListDec(*scalingList,list,size,qp,format);
2996	setErrScaleCoeff(list,size,qp);
2997	}
2998	}
2999	}
3000	}
3001	/** set quantized matrix coefficient for decode
3002	* \param scalingList quantized matrix address
3003	* \param format chroma format
3004	*/
3005	Void TComTrQuant::setScalingListDec(const TComScalingList &scalingList, const ChromaFormat format)
3006	{
3007	const Int minimumQp = 0;
3008	const Int maximumQp = SCALING_LIST_REM_NUM;
3009
3010	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3011	{
3012	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3013	{
3014	for(Int qp = minimumQp; qp < maximumQp; qp++)
3015	{
3016	xSetScalingListDec(scalingList,list,size,qp,format);
3017	}
3018	}
3019	}
3020	}
3021	/** set error scale coefficients
3022	* \param list List ID
3023	* \param size Size
3024	* \param qp Quantization parameter
3025	*/
3026	Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp)
3027	{
3028	const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
3029	const ChannelType channelType = ((list == 0) \|\| (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
3030
3031	const Int iTransformShift = getTransformShift(channelType, uiLog2TrSize); // Represents scaling through forward transform
3032
3033	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
3034	Int *piQuantcoeff;
3035	Double *pdErrScale;
3036	piQuantcoeff = getQuantCoeff(list, qp,size);
3037	pdErrScale = getErrScaleCoeff(list, size, qp);
3038
3039	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
3040	dErrScale = dErrScalepow(2.0,(-2.0iTransformShift)); // Compensate for scaling through forward transform
3041
3042	for(i=0;i<uiMaxNumCoeff;i++)
3043	{
3044	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
3045	}
3046
3047	getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
3048	}
3049
3050	/** set quantized matrix coefficient for encode
3051	* \param scalingList quantized matrix address
3052	* \param listId List index
3053	* \param sizeId size index
3054	* \param qp Quantization parameter
3055	* \param format chroma format
3056	*/
3057	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
3058	{
3059	UInt width = g_scalingListSizeX[sizeId];
3060	UInt height = g_scalingListSizeX[sizeId];
3061	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3062	Int *quantcoeff;
3063	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
3064	quantcoeff = getQuantCoeff(listId, qp, sizeId);
3065
3066	Int quantScales = g_quantScales[qp];
3067
3068	processScalingListEnc(coeff,
3069	quantcoeff,
3070	(quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
3071	height, width, ratio,
3072	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3073	scalingList->getScalingListDC(sizeId,listId));
3074	}
3075
3076	/** set quantized matrix coefficient for decode
3077	* \param scalingList quantaized matrix address
3078	* \param listId List index
3079	* \param sizeId size index
3080	* \param qp Quantization parameter
3081	* \param format chroma format
3082	*/
3083	Void TComTrQuant::xSetScalingListDec(const TComScalingList &scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
3084	{
3085	UInt width = g_scalingListSizeX[sizeId];
3086	UInt height = g_scalingListSizeX[sizeId];
3087	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3088	Int *dequantcoeff;
3089	const Int *coeff = scalingList.getScalingListAddress(sizeId,listId);
3090
3091	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
3092
3093	Int invQuantScale = g_invQuantScales[qp];
3094
3095	processScalingListDec(coeff,
3096	dequantcoeff,
3097	invQuantScale,
3098	height, width, ratio,
3099	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3100	scalingList.getScalingListDC(sizeId,listId));
3101	}
3102
3103	/** set flat matrix value to quantized coefficient
3104	*/
3105	Void TComTrQuant::setFlatScalingList(const ChromaFormat format)
3106	{
3107	const Int minimumQp = 0;
3108	const Int maximumQp = SCALING_LIST_REM_NUM;
3109
3110	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3111	{
3112	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3113	{
3114	for(Int qp = minimumQp; qp < maximumQp; qp++)
3115	{
3116	xsetFlatScalingList(list,size,qp,format);
3117	setErrScaleCoeff(list,size,qp);
3118	}
3119	}
3120	}
3121	}
3122
3123	/** set flat matrix value to quantized coefficient
3124	* \param list List ID
3125	* \param size size index
3126	* \param qp Quantization parameter
3127	* \param format chroma format
3128	*/
3129	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp, const ChromaFormat format)
3130	{
3131	UInt i,num = g_scalingListSize[size];
3132	Int *quantcoeff;
3133	Int *dequantcoeff;
3134
3135	Int quantScales = g_quantScales [qp];
3136	Int invQuantScales = g_invQuantScales[qp] << 4;
3137
3138	quantcoeff = getQuantCoeff(list, qp, size);
3139	dequantcoeff = getDequantCoeff(list, qp, size);
3140
3141	for(i=0;i<num;i++)
3142	{
3143	*quantcoeff++ = quantScales;
3144	*dequantcoeff++ = invQuantScales;
3145	}
3146	}
3147
3148	/** set quantized matrix coefficient for encode
3149	* \param coeff quantaized matrix address
3150	* \param quantcoeff quantaized matrix address
3151	* \param quantScales Q(QP%6)
3152	* \param height height
3153	* \param width width
3154	* \param ratio ratio for upscale
3155	* \param sizuNum matrix size
3156	* \param dc dc parameter
3157	*/
3158	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3159	{
3160	for(UInt j=0;j<height;j++)
3161	{
3162	for(UInt i=0;i<width;i++)
3163	{
3164	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j / ratio) + i / ratio];
3165	}
3166	}
3167
3168	if(ratio > 1)
3169	{
3170	quantcoeff[0] = quantScales / dc;
3171	}
3172	}
3173
3174	/** set quantized matrix coefficient for decode
3175	* \param coeff quantaized matrix address
3176	* \param dequantcoeff quantaized matrix address
3177	* \param invQuantScales IQ(QP%6))
3178	* \param height height
3179	* \param width width
3180	* \param ratio ratio for upscale
3181	* \param sizuNum matrix size
3182	* \param dc dc parameter
3183	*/
3184	Void TComTrQuant::processScalingListDec( const Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3185	{
3186	for(UInt j=0;j<height;j++)
3187	{
3188	for(UInt i=0;i<width;i++)
3189	{
3190	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
3191	}
3192	}
3193
3194	if(ratio > 1)
3195	{
3196	dequantcoeff[0] = invQuantScales * dc;
3197	}
3198	}
3199
3200	/** initialization process of scaling list array
3201	*/
3202	Void TComTrQuant::initScalingList()
3203	{
3204	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3205	{
3206	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3207	{
3208	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3209	{
3210	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3211	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3212	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
3213	} // listID loop
3214	}
3215	}
3216	}
3217
3218	/** destroy quantization matrix array
3219	*/
3220	Void TComTrQuant::destroyScalingList()
3221	{
3222	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3223	{
3224	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3225	{
3226	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3227	{
3228	if(m_quantCoef[sizeId][listId][qp])
3229	{
3230	delete [] m_quantCoef[sizeId][listId][qp];
3231	}
3232	if(m_dequantCoef[sizeId][listId][qp])
3233	{
3234	delete [] m_dequantCoef[sizeId][listId][qp];
3235	}
3236	if(m_errScale[sizeId][listId][qp])
3237	{
3238	delete [] m_errScale[sizeId][listId][qp];
3239	}
3240	}
3241	}
3242	}
3243	}
3244
3245	Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const TCoeff resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
3246	{
3247	TComDataCU *pcCU = rTu.getCU();
3248	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3249	const TComRectangle &rect = rTu.getRect(compID);
3250	const UInt uiWidth = rect.width;
3251	const UInt uiHeight = rect.height;
3252	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3253	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3254	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3255	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
3256
3257	assert( scalingListType < SCALING_LIST_NUM );
3258	const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
3259
3260
3261	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
3262	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
3263	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
3264	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
3265	*/
3266
3267	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
3268	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
3269
3270	const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
3271
3272	TCoeff transformedCoefficient;
3273
3274	// transform-skip
3275	if (iTransformShift >= 0)
3276	{
3277	transformedCoefficient = resiDiff << iTransformShift;
3278	}
3279	else // for very high bit depths
3280	{
3281	const Int iTrShiftNeg = -iTransformShift;
3282	const Int offset = 1 << (iTrShiftNeg - 1);
3283	transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
3284	}
3285
3286	// quantization
3287	const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
3288
3289	const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
3290
3291	const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
3292
3293	const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
3294
3295	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3296	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3297	pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
3298	}
3299
3300
3301	Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
3302	{
3303	TComDataCU *pcCU = rTu.getCU();
3304	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3305	const TComRectangle &rect = rTu.getRect(compID);
3306	const UInt uiWidth = rect.width;
3307	const UInt uiHeight = rect.height;
3308	const Int QP_per = cQP.per;
3309	const Int QP_rem = cQP.rem;
3310	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3311	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3312	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3313	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
3314
3315	assert( scalingListType < SCALING_LIST_NUM );
3316
3317	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
3318
3319	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3320	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3321
3322	// Dequantisation
3323
3324	TCoeff dequantisedSample;
3325
3326	if(enableScalingLists)
3327	{
3328	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
3329	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
3330
3331	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3332	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3333
3334	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
3335
3336	if(rightShift > 0)
3337	{
3338	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3339	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3340	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
3341
3342	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3343	}
3344	else
3345	{
3346	const Int leftShift = -rightShift;
3347	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3348	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
3349
3350	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3351	}
3352	}
3353	else
3354	{
3355	const Int scale = g_invQuantScales[QP_rem];
3356	const Int scaleBits = (IQUANT_SHIFT + 1) ;
3357
3358	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
3359	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3360	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3361
3362	if (rightShift > 0)
3363	{
3364	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3365	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3366	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
3367
3368	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3369	}
3370	else
3371	{
3372	const Int leftShift = -rightShift;
3373	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3374	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
3375
3376	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3377	}
3378	}
3379
3380	// Inverse transform-skip
3381
3382	if (iTransformShift >= 0)
3383	{
3384	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
3385	reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift);
3386	}
3387	else //for very high bit depths
3388	{
3389	const Int iTrShiftNeg = -iTransformShift;
3390	reconSample = Pel(dequantisedSample << iTrShiftNeg);
3391	}
3392	}
3393
3394
3395	Void TComTrQuant::crossComponentPrediction( TComTU & rTu,
3396	const ComponentID compID,
3397	const Pel * piResiL,
3398	const Pel * piResiC,
3399	Pel * piResiT,
3400	const Int width,
3401	const Int height,
3402	const Int strideL,
3403	const Int strideC,
3404	const Int strideT,
3405	const Bool reverse )
3406	{
3407	const Pel *pResiL = piResiL;
3408	const Pel *pResiC = piResiC;
3409	Pel *pResiT = piResiT;
3410
3411	TComDataCU *pCU = rTu.getCU();
3412	const Int alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
3413	const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
3414
3415	for( Int y = 0; y < height; y++ )
3416	{
3417	if (reverse)
3418	{
3419	// A constraint is to be added to the HEVC Standard to limit the size of pResiL and pResiC at this point.
3420	// The likely form of the constraint is to either restrict the values to CoeffMin to CoeffMax,
3421	// or to be representable in a bitDepthY+4 or bitDepthC+4 signed integer.
3422	// The result of the constraint is that for 8/10/12bit profiles, the input values
3423	// can be represented within a 16-bit Pel-type.
3424	#if RExt__HIGH_BIT_DEPTH_SUPPORT
3425	for( Int x = 0; x < width; x++ )
3426	{
3427	pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
3428	}
3429	#else
3430	const Int minPel=std::numeric_limits<Pel>::min();
3431	const Int maxPel=std::numeric_limits<Pel>::max();
3432	for( Int x = 0; x < width; x++ )
3433	{
3434	pResiT[x] = Clip3<Int>(minPel, maxPel, pResiC[x] + (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3));
3435	}
3436	#endif
3437	}
3438	else
3439	{
3440	// Forward does not need clipping. Pel type should always be big enough.
3441	for( Int x = 0; x < width; x++ )
3442	{
3443	pResiT[x] = pResiC[x] - (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3);
3444	}
3445	}
3446
3447	pResiL += strideL;
3448	pResiC += strideC;
3449	pResiT += strideT;
3450	}
3451	}
3452
3453	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: