Context navigation

← Previous revision
Next revision →
Blame
Revision log

TComTrQuant.cpp

Visit:

Last change on this file was 1413, checked in by tech, 6 years ago
Merged HTM-16.2-dev@1412
Property svn:eol-style set to `native`
File size: 130.3 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2017, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <limits>
41	#include <memory.h>
42	#include "TComTrQuant.h"
43	#include "TComPic.h"
44	#include "ContextTables.h"
45	#include "TComTU.h"
46	#include "Debug.h"
47
48	typedef struct
49	{
50	Int iNNZbeforePos0;
51	Double d64CodedLevelandDist; // distortion and level cost only
52	Double d64UncodedDist; // all zero coded block distortion
53	Double d64SigCost;
54	Double d64SigCost_0;
55	} coeffGroupRDStats;
56
57	//! \ingroup TLibCommon
58	//! \{
59
60	// ====================================================================================================================
61	// Constants
62	// ====================================================================================================================
63
64	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
65
66
67	// ====================================================================================================================
68	// QpParam constructor
69	// ====================================================================================================================
70
71	QpParam::QpParam(const Int qpy,
72	const ChannelType chType,
73	const Int qpBdOffset,
74	const Int chromaQPOffset,
75	const ChromaFormat chFmt )
76	{
77	Int baseQp;
78
79	if(isLuma(chType))
80	{
81	baseQp = qpy + qpBdOffset;
82	}
83	else
84	{
85	baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
86
87	if(baseQp < 0)
88	{
89	baseQp = baseQp + qpBdOffset;
90	}
91	else
92	{
93	baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
94	}
95	}
96
97	Qp =baseQp;
98	per=baseQp/6;
99	rem=baseQp%6;
100	}
101
102	QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
103	{
104	Int chromaQpOffset = 0;
105
106	if (isChroma(compID))
107	{
108	chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
109	chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
110
111	chromaQpOffset += cu.getSlice()->getPPS()->getPpsRangeExtension().getChromaQpOffsetListEntry(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
112	}
113
114	*this = QpParam(cu.getQP( 0 ),
115	toChannelType(compID),
116	cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
117	chromaQpOffset,
118	cu.getPic()->getChromaFormat());
119	}
120
121
122	// ====================================================================================================================
123	// TComTrQuant class member functions
124	// ====================================================================================================================
125
126	TComTrQuant::TComTrQuant()
127	{
128	// allocate temporary buffers
129	m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
130
131	// allocate bit estimation class (for RDOQ)
132	m_pcEstBitsSbac = new estBitsSbacStruct;
133	initScalingList();
134	}
135
136	TComTrQuant::~TComTrQuant()
137	{
138	// delete temporary buffers
139	if ( m_plTempCoeff )
140	{
141	delete [] m_plTempCoeff;
142	m_plTempCoeff = NULL;
143	}
144
145	// delete bit estimation class
146	if ( m_pcEstBitsSbac )
147	{
148	delete m_pcEstBitsSbac;
149	}
150	destroyScalingList();
151	}
152
153	#if ADAPTIVE_QP_SELECTION
154	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
155	{
156	// NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
157
158	Int qpBase = pcSlice->getSliceQpBase();
159	Int sliceQpused = pcSlice->getSliceQp();
160	Int sliceQpnext;
161	Double alpha = qpBase < 17 ? 0.5 : 1;
162
163	Int cnt=0;
164	for(Int u=1; u<=LEVEL_RANGE; u++)
165	{
166	cnt += m_sliceNsamples[u] ;
167	}
168
169	if( !m_useRDOQ )
170	{
171	sliceQpused = qpBase;
172	alpha = 0.5;
173	}
174
175	if( cnt > 120 )
176	{
177	Double sum = 0;
178	Int k = 0;
179	for(Int u=1; u<LEVEL_RANGE; u++)
180	{
181	sum += u*m_sliceSumC[u];
182	k += uum_sliceNsamples[u];
183	}
184
185	Int v;
186	Double q[MAX_QP+1] ;
187	for(v=0; v<=MAX_QP; v++)
188	{
189	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
190	}
191
192	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
193
194	for(v=0; v<MAX_QP; v++)
195	{
196	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
197	{
198	break;
199	}
200	}
201	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
202	}
203	else
204	{
205	sliceQpnext = sliceQpused;
206	}
207
208	m_qpDelta[qpBase] = sliceQpnext - qpBase;
209	}
210
211	Void TComTrQuant::initSliceQpDelta()
212	{
213	for(Int qp=0; qp<=MAX_QP; qp++)
214	{
215	m_qpDelta[qp] = qp < 17 ? 0 : 1;
216	}
217	}
218
219	Void TComTrQuant::clearSliceARLCnt()
220	{
221	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
222	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
223	}
224	#endif
225
226
227
228	#if MATRIX_MULT
229	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
230	* \param block pointer to input data (residual)
231	* \param coeff pointer to output data (transform coefficients)
232	* \param uiStride stride of input data
233	* \param uiTrSize transform size (uiTrSize x uiTrSize)
234	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
235	*/
236	Void xTr(Int bitDepth, Pel block, TCoeff coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxLog2TrDynamicRange)
237	{
238	UInt i,j,k;
239	TCoeff iSum;
240	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
241	const TMatrixCoeff *iT;
242	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
243
244	if (uiTrSize==4)
245	{
246	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
247	}
248	else if (uiTrSize==8)
249	{
250	iT = g_aiT8[TRANSFORM_FORWARD][0];
251	}
252	else if (uiTrSize==16)
253	{
254	iT = g_aiT16[TRANSFORM_FORWARD][0];
255	}
256	else if (uiTrSize==32)
257	{
258	iT = g_aiT32[TRANSFORM_FORWARD][0];
259	}
260	else
261	{
262	assert(0);
263	}
264
265	const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
266
267	const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
268	const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
269	const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
270	const Int add_2nd = 1<<(shift_2nd-1);
271
272	/* Horizontal transform */
273
274	for (i=0; i<uiTrSize; i++)
275	{
276	for (j=0; j<uiTrSize; j++)
277	{
278	iSum = 0;
279	for (k=0; k<uiTrSize; k++)
280	{
281	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
282	}
283	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
284	}
285	}
286
287	/* Vertical transform */
288	for (i=0; i<uiTrSize; i++)
289	{
290	for (j=0; j<uiTrSize; j++)
291	{
292	iSum = 0;
293	for (k=0; k<uiTrSize; k++)
294	{
295	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
296	}
297	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
298	}
299	}
300	}
301
302	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
303	* \param coeff pointer to input data (transform coefficients)
304	* \param block pointer to output data (residual)
305	* \param uiStride stride of output data
306	* \param uiTrSize transform size (uiTrSize x uiTrSize)
307	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
308	*/
309	Void xITr(Int bitDepth, TCoeff coeff, Pel block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxLog2TrDynamicRange)
310	{
311	UInt i,j,k;
312	TCoeff iSum;
313	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
314	const TMatrixCoeff *iT;
315
316	if (uiTrSize==4)
317	{
318	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
319	}
320	else if (uiTrSize==8)
321	{
322	iT = g_aiT8[TRANSFORM_INVERSE][0];
323	}
324	else if (uiTrSize==16)
325	{
326	iT = g_aiT16[TRANSFORM_INVERSE][0];
327	}
328	else if (uiTrSize==32)
329	{
330	iT = g_aiT32[TRANSFORM_INVERSE][0];
331	}
332	else
333	{
334	assert(0);
335	}
336
337	const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
338
339	const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
340	const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
341	const TCoeff clipMinimum = -(1 << maxLog2TrDynamicRange);
342	const TCoeff clipMaximum = (1 << maxLog2TrDynamicRange) - 1;
343	assert(shift_2nd>=0);
344	const Int add_1st = 1<<(shift_1st-1);
345	const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
346
347	/* Horizontal transform */
348	for (i=0; i<uiTrSize; i++)
349	{
350	for (j=0; j<uiTrSize; j++)
351	{
352	iSum = 0;
353	for (k=0; k<uiTrSize; k++)
354	{
355	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
356	}
357
358	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
359	tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
360	}
361	}
362
363	/* Vertical transform */
364	for (i=0; i<uiTrSize; i++)
365	{
366	for (j=0; j<uiTrSize; j++)
367	{
368	iSum = 0;
369	for (k=0; k<uiTrSize; k++)
370	{
371	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
372	}
373
374	block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
375	}
376	}
377	}
378
379	#endif //MATRIX_MULT
380
381
382	/** 4x4 forward transform implemented using partial butterfly structure (1D)
383	* \param src input data (residual)
384	* \param dst output data (transform coefficients)
385	* \param shift specifies right shift after 1D transform
386	* \param line
387	*/
388	Void partialButterfly4(TCoeff src, TCoeff dst, Int shift, Int line)
389	{
390	Int j;
391	TCoeff E[2],O[2];
392	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
393
394	for (j=0; j<line; j++)
395	{
396	/* E and O */
397	E[0] = src[0] + src[3];
398	O[0] = src[0] - src[3];
399	E[1] = src[1] + src[2];
400	O[1] = src[1] - src[2];
401
402	dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]E[1] + add)>>shift;
403	dst[2line] = (g_aiT4[TRANSFORM_FORWARD][2][0]E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
404	dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]O[1] + add)>>shift;
405	dst[3line] = (g_aiT4[TRANSFORM_FORWARD][3][0]O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
406
407	src += 4;
408	dst ++;
409	}
410	}
411
412	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
413	// give identical results
414	Void fastForwardDst(TCoeff block, TCoeff coeff, Int shift) // input block, output coeff
415	{
416	Int i;
417	TCoeff c[4];
418	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
419	for (i=0; i<4; i++)
420	{
421	// Intermediate Variables
422	c[0] = block[4*i+0];
423	c[1] = block[4*i+1];
424	c[2] = block[4*i+2];
425	c[3] = block[4*i+3];
426
427	for (Int row = 0; row < 4; row++)
428	{
429	TCoeff result = 0;
430	for (Int column = 0; column < 4; column++)
431	{
432	result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
433	}
434
435	coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
436	}
437	}
438	}
439
440	Void fastInverseDst(TCoeff tmp, TCoeff block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block
441	{
442	Int i;
443	TCoeff c[4];
444	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
445	for (i=0; i<4; i++)
446	{
447	// Intermediate Variables
448	c[0] = tmp[ i];
449	c[1] = tmp[4 +i];
450	c[2] = tmp[8 +i];
451	c[3] = tmp[12+i];
452
453	for (Int column = 0; column < 4; column++)
454	{
455	TCoeff &result = block[(i * 4) + column];
456
457	result = 0;
458	for (Int row = 0; row < 4; row++)
459	{
460	result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
461	}
462
463	result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
464	}
465	}
466	}
467
468	/** 4x4 inverse transform implemented using partial butterfly structure (1D)
469	* \param src input data (transform coefficients)
470	* \param dst output data (residual)
471	* \param shift specifies right shift after 1D transform
472	* \param line
473	* \param outputMinimum minimum for clipping
474	* \param outputMaximum maximum for clipping
475	*/
476	Void partialButterflyInverse4(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
477	{
478	Int j;
479	TCoeff E[2],O[2];
480	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
481
482	for (j=0; j<line; j++)
483	{
484	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
485	O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]src[3*line];
486	O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]src[3*line];
487	E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]src[2*line];
488	E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]src[2*line];
489
490	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
491	dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
492	dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
493	dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
494	dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
495
496	src ++;
497	dst += 4;
498	}
499	}
500
501	/** 8x8 forward transform implemented using partial butterfly structure (1D)
502	* \param src input data (residual)
503	* \param dst output data (transform coefficients)
504	* \param shift specifies right shift after 1D transform
505	* \param line
506	*/
507	Void partialButterfly8(TCoeff src, TCoeff dst, Int shift, Int line)
508	{
509	Int j,k;
510	TCoeff E[4],O[4];
511	TCoeff EE[2],EO[2];
512	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
513
514	for (j=0; j<line; j++)
515	{
516	/* E and O*/
517	for (k=0;k<4;k++)
518	{
519	E[k] = src[k] + src[7-k];
520	O[k] = src[k] - src[7-k];
521	}
522	/* EE and EO */
523	EE[0] = E[0] + E[3];
524	EO[0] = E[0] - E[3];
525	EE[1] = E[1] + E[2];
526	EO[1] = E[1] - E[2];
527
528	dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]EE[1] + add)>>shift;
529	dst[4line] = (g_aiT8[TRANSFORM_FORWARD][4][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
530	dst[2line] = (g_aiT8[TRANSFORM_FORWARD][2][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
531	dst[6line] = (g_aiT8[TRANSFORM_FORWARD][6][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
532
533	dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]O[3] + add)>>shift;
534	dst[3line] = (g_aiT8[TRANSFORM_FORWARD][3][0]O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
535	dst[5line] = (g_aiT8[TRANSFORM_FORWARD][5][0]O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
536	dst[7line] = (g_aiT8[TRANSFORM_FORWARD][7][0]O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
537
538	src += 8;
539	dst ++;
540	}
541	}
542
543	/** 8x8 inverse transform implemented using partial butterfly structure (1D)
544	* \param src input data (transform coefficients)
545	* \param dst output data (residual)
546	* \param shift specifies right shift after 1D transform
547	* \param line
548	* \param outputMinimum minimum for clipping
549	* \param outputMaximum maximum for clipping
550	*/
551	Void partialButterflyInverse8(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
552	{
553	Int j,k;
554	TCoeff E[4],O[4];
555	TCoeff EE[2],EO[2];
556	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
557
558	for (j=0; j<line; j++)
559	{
560	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
561	for (k=0;k<4;k++)
562	{
563	O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]src[3*line] +
564	g_aiT8[TRANSFORM_INVERSE][ 5][k]src[5line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]src[7line];
565	}
566
567	EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][0]src[ 6line ];
568	EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][1]src[ 6line ];
569	EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]src[ 4*line ];
570	EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]src[ 4*line ];
571
572	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
573	E[0] = EE[0] + EO[0];
574	E[3] = EE[0] - EO[0];
575	E[1] = EE[1] + EO[1];
576	E[2] = EE[1] - EO[1];
577	for (k=0;k<4;k++)
578	{
579	dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
580	dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
581	}
582	src ++;
583	dst += 8;
584	}
585	}
586
587	/** 16x16 forward transform implemented using partial butterfly structure (1D)
588	* \param src input data (residual)
589	* \param dst output data (transform coefficients)
590	* \param shift specifies right shift after 1D transform
591	* \param line
592	*/
593	Void partialButterfly16(TCoeff src, TCoeff dst, Int shift, Int line)
594	{
595	Int j,k;
596	TCoeff E[8],O[8];
597	TCoeff EE[4],EO[4];
598	TCoeff EEE[2],EEO[2];
599	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
600
601	for (j=0; j<line; j++)
602	{
603	/* E and O*/
604	for (k=0;k<8;k++)
605	{
606	E[k] = src[k] + src[15-k];
607	O[k] = src[k] - src[15-k];
608	}
609	/* EE and EO */
610	for (k=0;k<4;k++)
611	{
612	EE[k] = E[k] + E[7-k];
613	EO[k] = E[k] - E[7-k];
614	}
615	/* EEE and EEO */
616	EEE[0] = EE[0] + EE[3];
617	EEO[0] = EE[0] - EE[3];
618	EEE[1] = EE[1] + EE[2];
619	EEO[1] = EE[1] - EE[2];
620
621	dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]EEE[1] + add)>>shift;
622	dst[ 8line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
623	dst[ 4line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
624	dst[ 12line] = (g_aiT16[TRANSFORM_FORWARD][12][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
625
626	for (k=2;k<16;k+=4)
627	{
628	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
629	g_aiT16[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]EO[3] + add)>>shift;
630	}
631
632	for (k=1;k<16;k+=2)
633	{
634	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
635	g_aiT16[TRANSFORM_FORWARD][k][2]O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]O[3] +
636	g_aiT16[TRANSFORM_FORWARD][k][4]O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]O[5] +
637	g_aiT16[TRANSFORM_FORWARD][k][6]O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]O[7] + add)>>shift;
638	}
639
640	src += 16;
641	dst ++;
642
643	}
644	}
645
646	/** 16x16 inverse transform implemented using partial butterfly structure (1D)
647	* \param src input data (transform coefficients)
648	* \param dst output data (residual)
649	* \param shift specifies right shift after 1D transform
650	* \param line
651	* \param outputMinimum minimum for clipping
652	* \param outputMaximum maximum for clipping
653	*/
654	Void partialButterflyInverse16(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
655	{
656	Int j,k;
657	TCoeff E[8],O[8];
658	TCoeff EE[4],EO[4];
659	TCoeff EEE[2],EEO[2];
660	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
661
662	for (j=0; j<line; j++)
663	{
664	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
665	for (k=0;k<8;k++)
666	{
667	O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]src[ 3*line] +
668	g_aiT16[TRANSFORM_INVERSE][ 5][k]src[ 5line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]src[ 7line] +
669	g_aiT16[TRANSFORM_INVERSE][ 9][k]src[ 9line] + g_aiT16[TRANSFORM_INVERSE][11][k]src[11line] +
670	g_aiT16[TRANSFORM_INVERSE][13][k]src[13line] + g_aiT16[TRANSFORM_INVERSE][15][k]src[15line];
671	}
672	for (k=0;k<4;k++)
673	{
674	EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]src[ 2line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]src[ 6line] +
675	g_aiT16[TRANSFORM_INVERSE][10][k]src[10line] + g_aiT16[TRANSFORM_INVERSE][14][k]src[14line];
676	}
677	EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][0]src[ 12line ];
678	EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]src[ 8*line ];
679	EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][1]src[ 12line ];
680	EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]src[ 8*line ];
681
682	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
683	for (k=0;k<2;k++)
684	{
685	EE[k] = EEE[k] + EEO[k];
686	EE[k+2] = EEE[1-k] - EEO[1-k];
687	}
688	for (k=0;k<4;k++)
689	{
690	E[k] = EE[k] + EO[k];
691	E[k+4] = EE[3-k] - EO[3-k];
692	}
693	for (k=0;k<8;k++)
694	{
695	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
696	dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
697	}
698	src ++;
699	dst += 16;
700	}
701	}
702
703	/** 32x32 forward transform implemented using partial butterfly structure (1D)
704	* \param src input data (residual)
705	* \param dst output data (transform coefficients)
706	* \param shift specifies right shift after 1D transform
707	* \param line
708	*/
709	Void partialButterfly32(TCoeff src, TCoeff dst, Int shift, Int line)
710	{
711	Int j,k;
712	TCoeff E[16],O[16];
713	TCoeff EE[8],EO[8];
714	TCoeff EEE[4],EEO[4];
715	TCoeff EEEE[2],EEEO[2];
716	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
717
718	for (j=0; j<line; j++)
719	{
720	/* E and O*/
721	for (k=0;k<16;k++)
722	{
723	E[k] = src[k] + src[31-k];
724	O[k] = src[k] - src[31-k];
725	}
726	/* EE and EO */
727	for (k=0;k<8;k++)
728	{
729	EE[k] = E[k] + E[15-k];
730	EO[k] = E[k] - E[15-k];
731	}
732	/* EEE and EEO */
733	for (k=0;k<4;k++)
734	{
735	EEE[k] = EE[k] + EE[7-k];
736	EEO[k] = EE[k] - EE[7-k];
737	}
738	/* EEEE and EEEO */
739	EEEE[0] = EEE[0] + EEE[3];
740	EEEO[0] = EEE[0] - EEE[3];
741	EEEE[1] = EEE[1] + EEE[2];
742	EEEO[1] = EEE[1] - EEE[2];
743
744	dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]EEEE[1] + add)>>shift;
745	dst[ 16line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
746	dst[ 8line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
747	dst[ 24line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
748	for (k=4;k<32;k+=8)
749	{
750	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
751	g_aiT32[TRANSFORM_FORWARD][k][2]EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EEO[3] + add)>>shift;
752	}
753	for (k=2;k<32;k+=4)
754	{
755	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
756	g_aiT32[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EO[3] +
757	g_aiT32[TRANSFORM_FORWARD][k][4]EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]EO[5] +
758	g_aiT32[TRANSFORM_FORWARD][k][6]EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]EO[7] + add)>>shift;
759	}
760	for (k=1;k<32;k+=2)
761	{
762	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
763	g_aiT32[TRANSFORM_FORWARD][k][ 2]O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]O[ 3] +
764	g_aiT32[TRANSFORM_FORWARD][k][ 4]O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]O[ 5] +
765	g_aiT32[TRANSFORM_FORWARD][k][ 6]O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]O[ 7] +
766	g_aiT32[TRANSFORM_FORWARD][k][ 8]O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]O[ 9] +
767	g_aiT32[TRANSFORM_FORWARD][k][10]O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]O[11] +
768	g_aiT32[TRANSFORM_FORWARD][k][12]O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]O[13] +
769	g_aiT32[TRANSFORM_FORWARD][k][14]O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]O[15] + add)>>shift;
770	}
771
772	src += 32;
773	dst ++;
774	}
775	}
776
777	/** 32x32 inverse transform implemented using partial butterfly structure (1D)
778	* \param src input data (transform coefficients)
779	* \param dst output data (residual)
780	* \param shift specifies right shift after 1D transform
781	* \param line
782	* \param outputMinimum minimum for clipping
783	* \param outputMaximum maximum for clipping
784	*/
785	Void partialButterflyInverse32(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
786	{
787	Int j,k;
788	TCoeff E[16],O[16];
789	TCoeff EE[8],EO[8];
790	TCoeff EEE[4],EEO[4];
791	TCoeff EEEE[2],EEEO[2];
792	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
793
794	for (j=0; j<line; j++)
795	{
796	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
797	for (k=0;k<16;k++)
798	{
799	O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]src[ 3*line ] +
800	g_aiT32[TRANSFORM_INVERSE][ 5][k]src[ 5line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]src[ 7line ] +
801	g_aiT32[TRANSFORM_INVERSE][ 9][k]src[ 9line ] + g_aiT32[TRANSFORM_INVERSE][11][k]src[ 11line ] +
802	g_aiT32[TRANSFORM_INVERSE][13][k]src[ 13line ] + g_aiT32[TRANSFORM_INVERSE][15][k]src[ 15line ] +
803	g_aiT32[TRANSFORM_INVERSE][17][k]src[ 17line ] + g_aiT32[TRANSFORM_INVERSE][19][k]src[ 19line ] +
804	g_aiT32[TRANSFORM_INVERSE][21][k]src[ 21line ] + g_aiT32[TRANSFORM_INVERSE][23][k]src[ 23line ] +
805	g_aiT32[TRANSFORM_INVERSE][25][k]src[ 25line ] + g_aiT32[TRANSFORM_INVERSE][27][k]src[ 27line ] +
806	g_aiT32[TRANSFORM_INVERSE][29][k]src[ 29line ] + g_aiT32[TRANSFORM_INVERSE][31][k]src[ 31line ];
807	}
808	for (k=0;k<8;k++)
809	{
810	EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]src[ 2line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]src[ 6line ] +
811	g_aiT32[TRANSFORM_INVERSE][10][k]src[ 10line ] + g_aiT32[TRANSFORM_INVERSE][14][k]src[ 14line ] +
812	g_aiT32[TRANSFORM_INVERSE][18][k]src[ 18line ] + g_aiT32[TRANSFORM_INVERSE][22][k]src[ 22line ] +
813	g_aiT32[TRANSFORM_INVERSE][26][k]src[ 26line ] + g_aiT32[TRANSFORM_INVERSE][30][k]src[ 30line ];
814	}
815	for (k=0;k<4;k++)
816	{
817	EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]src[ 4line ] + g_aiT32[TRANSFORM_INVERSE][12][k]src[ 12line ] +
818	g_aiT32[TRANSFORM_INVERSE][20][k]src[ 20line ] + g_aiT32[TRANSFORM_INVERSE][28][k]src[ 28line ];
819	}
820	EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][0]src[ 24line ];
821	EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][1]src[ 24line ];
822	EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]src[ 16*line ];
823	EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]src[ 16*line ];
824
825	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
826	EEE[0] = EEEE[0] + EEEO[0];
827	EEE[3] = EEEE[0] - EEEO[0];
828	EEE[1] = EEEE[1] + EEEO[1];
829	EEE[2] = EEEE[1] - EEEO[1];
830	for (k=0;k<4;k++)
831	{
832	EE[k] = EEE[k] + EEO[k];
833	EE[k+4] = EEE[3-k] - EEO[3-k];
834	}
835	for (k=0;k<8;k++)
836	{
837	E[k] = EE[k] + EO[k];
838	E[k+8] = EE[7-k] - EO[7-k];
839	}
840	for (k=0;k<16;k++)
841	{
842	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
843	dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
844	}
845	src ++;
846	dst += 32;
847	}
848	}
849
850	/** MxN forward transform (2D)
851	* \param bitDepth [in] bit depth
852	* \param block [in] residual block
853	* \param coeff [out] transform coefficients
854	* \param iWidth [in] width of transform
855	* \param iHeight [in] height of transform
856	* \param useDST [in]
857	* \param maxLog2TrDynamicRange [in]
858
859	*/
860	Void xTrMxN(Int bitDepth, TCoeff block, TCoeff coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxLog2TrDynamicRange)
861	{
862	const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
863
864	const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
865	const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
866
867	assert(shift_1st >= 0);
868	assert(shift_2nd >= 0);
869
870	TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
871
872	switch (iWidth)
873	{
874	case 4:
875	{
876	if ((iHeight == 4) && useDST) // Check for DCT or DST
877	{
878	fastForwardDst( block, tmp, shift_1st );
879	}
880	else
881	{
882	partialButterfly4 ( block, tmp, shift_1st, iHeight );
883	}
884	}
885	break;
886
887	case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break;
888	case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break;
889	case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break;
890	default:
891	assert(0); exit (1); break;
892	}
893
894	switch (iHeight)
895	{
896	case 4:
897	{
898	if ((iWidth == 4) && useDST) // Check for DCT or DST
899	{
900	fastForwardDst( tmp, coeff, shift_2nd );
901	}
902	else
903	{
904	partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
905	}
906	}
907	break;
908
909	case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break;
910	case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break;
911	case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break;
912	default:
913	assert(0); exit (1); break;
914	}
915	}
916
917
918	/** MxN inverse transform (2D)
919	* \param bitDepth [in] bit depth
920	* \param coeff [in] transform coefficients
921	* \param block [out] residual block
922	* \param iWidth [in] width of transform
923	* \param iHeight [in] height of transform
924	* \param useDST [in]
925	* \param maxLog2TrDynamicRange [in]
926	*/
927	Void xITrMxN(Int bitDepth, TCoeff coeff, TCoeff block, Int iWidth, Int iHeight, Bool useDST, const Int maxLog2TrDynamicRange)
928	{
929	const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
930
931	Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
932	Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
933	const TCoeff clipMinimum = -(1 << maxLog2TrDynamicRange);
934	const TCoeff clipMaximum = (1 << maxLog2TrDynamicRange) - 1;
935
936	assert(shift_1st >= 0);
937	assert(shift_2nd >= 0);
938
939	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
940
941	switch (iHeight)
942	{
943	case 4:
944	{
945	if ((iWidth == 4) && useDST) // Check for DCT or DST
946	{
947	fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
948	}
949	else
950	{
951	partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
952	}
953	}
954	break;
955
956	case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
957	case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
958	case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
959
960	default:
961	assert(0); exit (1); break;
962	}
963
964	switch (iWidth)
965	{
966	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
967	case 4:
968	{
969	if ((iHeight == 4) && useDST) // Check for DCT or DST
970	{
971	fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
972	}
973	else
974	{
975	partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
976	}
977	}
978	break;
979
980	case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
981	case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
982	case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
983
984	default:
985	assert(0); exit (1); break;
986	}
987	}
988
989
990	// To minimize the distortion only. No rate is considered.
991	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters, const Int maxLog2TrDynamicRange )
992	{
993	const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
994	const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
995	const UInt groupSize = 1 << MLS_CG_SIZE;
996
997	const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
998	const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
999
1000	Int lastCG = -1;
1001	Int absSum = 0 ;
1002	Int n ;
1003
1004	for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
1005	{
1006	Int subPos = subSet << MLS_CG_SIZE;
1007	Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
1008	absSum = 0 ;
1009
1010	for(n = groupSize-1; n >= 0; --n )
1011	{
1012	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
1013	{
1014	lastNZPosInCG = n;
1015	break;
1016	}
1017	}
1018
1019	for(n = 0; n <groupSize; n++ )
1020	{
1021	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
1022	{
1023	firstNZPosInCG = n;
1024	break;
1025	}
1026	}
1027
1028	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1029	{
1030	absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
1031	}
1032
1033	if(lastNZPosInCG>=0 && lastCG==-1)
1034	{
1035	lastCG = 1 ;
1036	}
1037
1038	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1039	{
1040	UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
1041	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1042	{
1043	TCoeff curCost = std::numeric_limits<TCoeff>::max();
1044	TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
1045	Int minPos =-1, finalChange=0, curChange=0;
1046
1047	for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
1048	{
1049	UInt blkPos = codingParameters.scan[ n+subPos ];
1050	if(pQCoef[ blkPos ] != 0 )
1051	{
1052	if(deltaU[blkPos]>0)
1053	{
1054	curCost = - deltaU[blkPos];
1055	curChange=1 ;
1056	}
1057	else
1058	{
1059	//curChange =-1;
1060	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1061	{
1062	curCost = std::numeric_limits<TCoeff>::max();
1063	}
1064	else
1065	{
1066	curCost = deltaU[blkPos];
1067	curChange =-1;
1068	}
1069	}
1070	}
1071	else
1072	{
1073	if(n<firstNZPosInCG)
1074	{
1075	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1076	if(thisSignBit != signbit )
1077	{
1078	curCost = std::numeric_limits<TCoeff>::max();
1079	}
1080	else
1081	{
1082	curCost = - (deltaU[blkPos]) ;
1083	curChange = 1 ;
1084	}
1085	}
1086	else
1087	{
1088	curCost = - (deltaU[blkPos]) ;
1089	curChange = 1 ;
1090	}
1091	}
1092
1093	if( curCost<minCostInc)
1094	{
1095	minCostInc = curCost ;
1096	finalChange = curChange ;
1097	minPos = blkPos ;
1098	}
1099	} //CG loop
1100
1101	if(pQCoef[minPos] == entropyCodingMaximum \|\| pQCoef[minPos] == entropyCodingMinimum)
1102	{
1103	finalChange = -1;
1104	}
1105
1106	if(pCoef[minPos]>=0)
1107	{
1108	pQCoef[minPos] += finalChange ;
1109	}
1110	else
1111	{
1112	pQCoef[minPos] -= finalChange ;
1113	}
1114	} // Hide
1115	}
1116	if(lastCG==1)
1117	{
1118	lastCG=0 ;
1119	}
1120	} // TU loop
1121
1122	return;
1123	}
1124
1125
1126	Void TComTrQuant::xQuant( TComTU &rTu,
1127	TCoeff * pSrc,
1128	TCoeff * pDes,
1129	#if ADAPTIVE_QP_SELECTION
1130	TCoeff *pArlDes,
1131	#endif
1132	TCoeff &uiAbsSum,
1133	const ComponentID compID,
1134	const QpParam &cQP )
1135	{
1136	const TComRectangle &rect = rTu.getRect(compID);
1137	const UInt uiWidth = rect.width;
1138	const UInt uiHeight = rect.height;
1139	TComDataCU* pcCU = rTu.getCU();
1140	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1141	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
1142
1143	TCoeff* piCoef = pSrc;
1144	TCoeff* piQCoef = pDes;
1145	#if ADAPTIVE_QP_SELECTION
1146	TCoeff* piArlCCoef = pArlDes;
1147	#endif
1148
1149	const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1150	const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
1151
1152	Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
1153	if ( useRDOQ && (isLuma(compID) \|\| RDOQ_CHROMA) )
1154	{
1155	if ( !m_useSelectiveRDOQ \|\| xNeedRDOQ( rTu, piCoef, compID, cQP ) )
1156	{
1157	#if ADAPTIVE_QP_SELECTION
1158	xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
1159	#else
1160	xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
1161	#endif
1162	}
1163	else
1164	{
1165	memset( pDes, 0, sizeof( TCoeff ) * uiWidth *uiHeight );
1166	uiAbsSum = 0;
1167	}
1168	}
1169	else
1170	{
1171	TUEntropyCodingParameters codingParameters;
1172	getTUEntropyCodingParameters(codingParameters, rTu, compID);
1173
1174	const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
1175	const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
1176
1177	TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
1178
1179	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1180
1181	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1182	assert(scalingListType < SCALING_LIST_NUM);
1183	Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1184
1185	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1186	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1187
1188	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1189	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1190	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1191	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1192	*/
1193
1194	// Represents scaling through forward transform
1195	Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
1196	if (useTransformSkip && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
1197	{
1198	iTransformShift = std::max<Int>(0, iTransformShift);
1199	}
1200
1201	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1202	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1203
1204	#if ADAPTIVE_QP_SELECTION
1205	Int iQBitsC = MAX_INT;
1206	Int iAddC = MAX_INT;
1207
1208	if (m_bUseAdaptQpSelect)
1209	{
1210	iQBitsC = iQBits - ARL_C_PRECISION;
1211	iAddC = 1 << (iQBitsC-1);
1212	}
1213	#endif
1214
1215	const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1216	const Int qBits8 = iQBits - 8;
1217
1218	for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1219	{
1220	const TCoeff iLevel = piCoef[uiBlockPos];
1221	const TCoeff iSign = (iLevel < 0 ? -1: 1);
1222
1223	const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1224
1225	#if ADAPTIVE_QP_SELECTION
1226	if( m_bUseAdaptQpSelect )
1227	{
1228	piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
1229	}
1230	#endif
1231
1232	const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1233	deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
1234
1235	uiAbsSum += quantisedMagnitude;
1236	const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
1237
1238	piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
1239	} // for n
1240
1241	if( pcCU->getSlice()->getPPS()->getSignDataHidingEnabledFlag() )
1242	{
1243	if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
1244	{
1245	signBitHidingHDQ( piQCoef, piCoef, deltaU, codingParameters, maxLog2TrDynamicRange ) ;
1246	}
1247	}
1248	} //if RDOQ
1249	//return;
1250	}
1251
1252	Bool TComTrQuant::xNeedRDOQ( TComTU &rTu, TCoeff * pSrc, const ComponentID compID, const QpParam &cQP )
1253	{
1254	const TComRectangle &rect = rTu.getRect(compID);
1255	const UInt uiWidth = rect.width;
1256	const UInt uiHeight = rect.height;
1257	TComDataCU* pcCU = rTu.getCU();
1258	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1259	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
1260
1261	TCoeff* piCoef = pSrc;
1262
1263	const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1264	const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
1265
1266	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1267
1268	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1269	assert(scalingListType < SCALING_LIST_NUM);
1270	Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1271
1272	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1273	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1274
1275	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1276	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1277	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1278	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1279	*/
1280
1281	// Represents scaling through forward transform
1282	Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
1283	if (useTransformSkip && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
1284	{
1285	iTransformShift = std::max<Int>(0, iTransformShift);
1286	}
1287
1288	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1289	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1290
1291	// iAdd is different from the iAdd used in normal quantization
1292	const Int iAdd = (compID == COMPONENT_Y ? 171 : 256) << (iQBits-9);
1293
1294	for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1295	{
1296	const TCoeff iLevel = piCoef[uiBlockPos];
1297	const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1298	const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1299
1300	if ( quantisedMagnitude != 0 )
1301	{
1302	return true;
1303	}
1304	} // for n
1305	return false;
1306	}
1307
1308	Void TComTrQuant::xDeQuant( TComTU &rTu,
1309	const TCoeff * pSrc,
1310	TCoeff * pDes,
1311	const ComponentID compID,
1312	const QpParam &cQP )
1313	{
1314	assert(compID<MAX_NUM_COMPONENT);
1315
1316	TComDataCU *pcCU = rTu.getCU();
1317	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1318	const TComRectangle &rect = rTu.getRect(compID);
1319	const UInt uiWidth = rect.width;
1320	const UInt uiHeight = rect.height;
1321	const TCoeff *const piQCoef = pSrc;
1322	TCoeff *const piCoef = pDes;
1323	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1324	const UInt numSamplesInBlock = uiWidth*uiHeight;
1325	const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
1326	const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange);
1327	const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1;
1328	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1329	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1330	#if O0043_BEST_EFFORT_DECODING
1331	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID));
1332	#else
1333	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
1334	#endif
1335
1336	assert (scalingListType < SCALING_LIST_NUM);
1337	assert ( uiWidth <= m_uiMaxTrSize );
1338
1339	// Represents scaling through forward transform
1340	const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
1341	const Int originalTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
1342	const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
1343
1344	const Int QP_per = cQP.per;
1345	const Int QP_rem = cQP.rem;
1346
1347	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
1348
1349	if(enableScalingLists)
1350	{
1351	//from the dequantisation equation:
1352	//iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
1353	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift
1354	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
1355	const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
1356
1357	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1358	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1359
1360	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
1361
1362	if(rightShift > 0)
1363	{
1364	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1365
1366	for( Int n = 0; n < numSamplesInBlock; n++ )
1367	{
1368	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1369	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
1370
1371	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1372	}
1373	}
1374	else
1375	{
1376	const Int leftShift = -rightShift;
1377
1378	for( Int n = 0; n < numSamplesInBlock; n++ )
1379	{
1380	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1381	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
1382
1383	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1384	}
1385	}
1386	}
1387	else
1388	{
1389	const Int scale = g_invQuantScales[QP_rem];
1390	const Int scaleBits = (IQUANT_SHIFT + 1) ;
1391
1392	//from the dequantisation equation:
1393	//iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
1394	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift
1395	const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
1396	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1397	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1398
1399	if (rightShift > 0)
1400	{
1401	const Intermediate_Int iAdd = 1 << (rightShift - 1);
1402
1403	for( Int n = 0; n < numSamplesInBlock; n++ )
1404	{
1405	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1406	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
1407
1408	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1409	}
1410	}
1411	else
1412	{
1413	const Int leftShift = -rightShift;
1414
1415	for( Int n = 0; n < numSamplesInBlock; n++ )
1416	{
1417	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1418	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
1419
1420	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1421	}
1422	}
1423	}
1424	}
1425
1426
1427	Void TComTrQuant::init( UInt uiMaxTrSize,
1428	Bool bUseRDOQ,
1429	Bool bUseRDOQTS,
1430	Bool useSelectiveRDOQ,
1431	Bool bEnc,
1432	Bool useTransformSkipFast
1433	#if ADAPTIVE_QP_SELECTION
1434	, Bool bUseAdaptQpSelect
1435	#endif
1436	)
1437	{
1438	m_uiMaxTrSize = uiMaxTrSize;
1439	m_bEnc = bEnc;
1440	m_useRDOQ = bUseRDOQ;
1441	m_useRDOQTS = bUseRDOQTS;
1442	m_useSelectiveRDOQ = useSelectiveRDOQ;
1443	#if ADAPTIVE_QP_SELECTION
1444	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1445	#endif
1446	m_useTransformSkipFast = useTransformSkipFast;
1447	}
1448
1449
1450	Void TComTrQuant::transformNxN( TComTU & rTu,
1451	const ComponentID compID,
1452	Pel * pcResidual,
1453	const UInt uiStride,
1454	TCoeff * rpcCoeff,
1455	#if ADAPTIVE_QP_SELECTION
1456	TCoeff * pcArlCoeff,
1457	#endif
1458	TCoeff & uiAbsSum,
1459	const QpParam & cQP
1460	)
1461	{
1462	const TComRectangle &rect = rTu.getRect(compID);
1463	const UInt uiWidth = rect.width;
1464	const UInt uiHeight = rect.height;
1465	TComDataCU* pcCU = rTu.getCU();
1466	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1467	const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
1468
1469	uiAbsSum=0;
1470
1471	RDPCMMode rdpcmMode = RDPCM_OFF;
1472	rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
1473
1474	if (rdpcmMode == RDPCM_OFF)
1475	{
1476	uiAbsSum = 0;
1477	//transform and quantise
1478	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1479	{
1480	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1481	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1482
1483	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1484	{
1485	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1486	{
1487	const Pel currentSample = pcResidual[(y * uiStride) + x];
1488
1489	rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
1490	uiAbsSum += TCoeff(abs(currentSample));
1491	}
1492	}
1493	}
1494	else
1495	{
1496	#if DEBUG_TRANSFORM_AND_QUANTISE
1497	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
1498	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1499	#endif
1500
1501	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1502
1503	if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
1504	{
1505	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
1506	}
1507	else
1508	{
1509	const Int channelBitDepth=pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
1510	xT( channelBitDepth, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) );
1511	}
1512
1513	#if DEBUG_TRANSFORM_AND_QUANTISE
1514	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
1515	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1516	#endif
1517
1518	xQuant( rTu, m_plTempCoeff, rpcCoeff,
1519
1520	#if ADAPTIVE_QP_SELECTION
1521	pcArlCoeff,
1522	#endif
1523	uiAbsSum, compID, cQP );
1524
1525	#if DEBUG_TRANSFORM_AND_QUANTISE
1526	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
1527	printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
1528	#endif
1529	}
1530	}
1531
1532	//set the CBF
1533	pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1534	}
1535
1536
1537	Void TComTrQuant::invTransformNxN( TComTU &rTu,
1538	const ComponentID compID,
1539	Pel *pcResidual,
1540	const UInt uiStride,
1541	TCoeff * pcCoeff,
1542	const QpParam &cQP
1543	DEBUG_STRING_FN_DECLAREP(psDebug))
1544	{
1545	TComDataCU* pcCU=rTu.getCU();
1546	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1547	const TComRectangle &rect = rTu.getRect(compID);
1548	const UInt uiWidth = rect.width;
1549	const UInt uiHeight = rect.height;
1550
1551	if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
1552	{
1553	//------------------------------------------------
1554
1555	//recurse deeper
1556
1557	TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
1558
1559	do
1560	{
1561	//------------------
1562
1563	const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
1564
1565	Pel subTUResidual = pcResidual + (lineOffset uiStride);
1566	TCoeff subTUCoefficients = pcCoeff + (lineOffset subTURecurse.getRect(compID).width);
1567
1568	invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
1569
1570	//------------------
1571
1572	} while (subTURecurse.nextSection(rTu));
1573
1574	//------------------------------------------------
1575
1576	return;
1577	}
1578
1579	#if DEBUG_STRING
1580	if (psDebug)
1581	{
1582	std::stringstream ss(stringstream::out);
1583	printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
1584	DEBUG_STRING_APPEND((*psDebug), ss.str())
1585	}
1586	#endif
1587
1588	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1589	{
1590	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1591	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1592
1593	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1594	{
1595	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1596	{
1597	pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
1598	}
1599	}
1600	}
1601	else
1602	{
1603	#if DEBUG_TRANSFORM_AND_QUANTISE
1604	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
1605	printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
1606	#endif
1607
1608	xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
1609
1610	#if DEBUG_TRANSFORM_AND_QUANTISE
1611	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
1612	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1613	#endif
1614
1615	#if DEBUG_STRING
1616	if (psDebug)
1617	{
1618	std::stringstream ss(stringstream::out);
1619	printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1620	(*psDebug)+=ss.str();
1621	}
1622	#endif
1623
1624	if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
1625	{
1626	xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
1627
1628	#if DEBUG_STRING
1629	if (psDebug)
1630	{
1631	std::stringstream ss(stringstream::out);
1632	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1633	(*psDebug)+=ss.str();
1634	(*psDebug)+="(<- was a Transform-skipped block)\n";
1635	}
1636	#endif
1637	}
1638	else
1639	{
1640	#if O0043_BEST_EFFORT_DECODING
1641	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID));
1642	#else
1643	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
1644	#endif
1645	xIT( channelBitDepth, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight, pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)) );
1646
1647	#if DEBUG_STRING
1648	if (psDebug)
1649	{
1650	std::stringstream ss(stringstream::out);
1651	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1652	(*psDebug)+=ss.str();
1653	(*psDebug)+="(<- was a Transformed block)\n";
1654	}
1655	#endif
1656	}
1657
1658	#if DEBUG_TRANSFORM_AND_QUANTISE
1659	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
1660	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1661	g_debugCounter++;
1662	#endif
1663	}
1664
1665	invRdpcmNxN( rTu, compID, pcResidual, uiStride );
1666	}
1667
1668	Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
1669	TComYuv *pResidual,
1670	TComTU &rTu)
1671	{
1672	if (!rTu.ProcessComponentSection(compID))
1673	{
1674	return;
1675	}
1676
1677	TComDataCU* pcCU = rTu.getCU();
1678	UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
1679	UInt uiTrMode=rTu.GetTransformDepthRel();
1680	if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) \|\| !pcCU->getSlice()->getPPS()->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag()) )
1681	{
1682	return;
1683	}
1684
1685	if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
1686	{
1687	const TComRectangle &tuRect = rTu.getRect(compID);
1688	const Int uiStride = pResidual->getStride( compID );
1689	Pel *rpcResidual = pResidual->getAddr( compID );
1690	UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0);
1691	Pel *pResi = rpcResidual + uiAddr;
1692	TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
1693
1694	const QpParam cQP(*pcCU, compID);
1695
1696	if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
1697	{
1698	DEBUG_STRING_NEW(sTemp)
1699	#if DEBUG_STRING
1700	std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
1701	#endif
1702
1703	invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
1704
1705	#if DEBUG_STRING
1706	if (psDebug != 0)
1707	{
1708	std::cout << (*psDebug);
1709	}
1710	#endif
1711	}
1712
1713	if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
1714	{
1715	const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
1716	const Int strideLuma = pResidual->getStride( COMPONENT_Y );
1717	const Int tuWidth = rTu.getRect( compID ).width;
1718	const Int tuHeight = rTu.getRect( compID ).height;
1719
1720	if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
1721	{
1722	pResi = rpcResidual + uiAddr;
1723	const Pel *pResiLuma = piResiLuma + uiAddr;
1724
1725	crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
1726	}
1727	}
1728	}
1729	else
1730	{
1731	TComTURecurse tuRecurseChild(rTu, false);
1732	do
1733	{
1734	invRecurTransformNxN( compID, pResidual, tuRecurseChild );
1735	} while (tuRecurseChild.nextSection(rTu));
1736	}
1737	}
1738
1739	Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
1740	{
1741	TComDataCU *pcCU=rTu.getCU();
1742	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1743
1744	const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx );
1745	const UInt uiWidth = rTu.getRect(compID).width;
1746	const UInt uiHeight = rTu.getRect(compID).height;
1747	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1748	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1749
1750	UInt uiX = 0;
1751	UInt uiY = 0;
1752
1753	UInt &majorAxis = (mode == RDPCM_VER) ? uiX : uiY;
1754	UInt &minorAxis = (mode == RDPCM_VER) ? uiY : uiX;
1755	const UInt majorAxisLimit = (mode == RDPCM_VER) ? uiWidth : uiHeight;
1756	const UInt minorAxisLimit = (mode == RDPCM_VER) ? uiHeight : uiWidth;
1757
1758	const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
1759
1760	uiAbsSum = 0;
1761
1762	for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
1763	{
1764	TCoeff accumulatorValue = 0; // 32-bit accumulator
1765	for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
1766	{
1767	const UInt sampleIndex = (uiY * uiWidth) + uiX;
1768	const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
1769	const Pel currentSample = pcResidual[(uiY * uiStride) + uiX];
1770	const TCoeff encoderSideDelta = TCoeff(currentSample) - accumulatorValue;
1771
1772	Pel reconstructedDelta;
1773	if ( bLossless )
1774	{
1775	pcCoeff[coefficientIndex] = encoderSideDelta;
1776	reconstructedDelta = (Pel) encoderSideDelta;
1777	}
1778	else
1779	{
1780	transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
1781	invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
1782	}
1783
1784	uiAbsSum += abs(pcCoeff[coefficientIndex]);
1785
1786	if (mode != RDPCM_OFF)
1787	{
1788	accumulatorValue += reconstructedDelta;
1789	}
1790	}
1791	}
1792	}
1793
1794	Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
1795	{
1796	TComDataCU *pcCU=rTu.getCU();
1797	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1798
1799	if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) \|\| ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1800	{
1801	rdpcmMode = RDPCM_OFF;
1802	}
1803	else if ( pcCU->isIntra( uiAbsPartIdx ) )
1804	{
1805	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
1806	const ChannelType chType = toChannelType(compID);
1807	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1808	const TComSPS *sps=pcCU->getSlice()->getSPS();
1809	const UInt partsPerMinCU = 1<<(2*(sps->getMaxTotalCUDepth() - sps->getLog2DiffMaxMinCodingBlockSize()));
1810	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt, partsPerMinCU)) : uiChPredMode;
1811	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1812
1813	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1814	{
1815	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1816	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
1817	}
1818	else
1819	{
1820	rdpcmMode = RDPCM_OFF;
1821	}
1822	}
1823	else // not intra, need to select the best mode
1824	{
1825	const UInt uiWidth = rTu.getRect(compID).width;
1826	const UInt uiHeight = rTu.getRect(compID).height;
1827
1828	RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES;
1829	TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max();
1830	TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
1831
1832	for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
1833	{
1834	const RDPCMMode mode = RDPCMMode(modeIndex);
1835
1836	TCoeff currAbsSum = 0;
1837
1838	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
1839
1840	if (currAbsSum < bestAbsSum)
1841	{
1842	bestMode = mode;
1843	bestAbsSum = currAbsSum;
1844	if (mode != RDPCM_OFF)
1845	{
1846	memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
1847	}
1848	}
1849	}
1850
1851	rdpcmMode = bestMode;
1852	uiAbsSum = bestAbsSum;
1853
1854	if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
1855	{
1856	memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
1857	}
1858	}
1859
1860	pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1861	}
1862
1863	Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
1864	{
1865	TComDataCU *pcCU=rTu.getCU();
1866	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1867
1868	if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) \|\| pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1869	{
1870	const UInt uiWidth = rTu.getRect(compID).width;
1871	const UInt uiHeight = rTu.getRect(compID).height;
1872
1873	RDPCMMode rdpcmMode = RDPCM_OFF;
1874
1875	if ( pcCU->isIntra( uiAbsPartIdx ) )
1876	{
1877	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
1878	const ChannelType chType = toChannelType(compID);
1879	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1880	const TComSPS *sps=pcCU->getSlice()->getSPS();
1881	const UInt partsPerMinCU = 1<<(2*(sps->getMaxTotalCUDepth() - sps->getLog2DiffMaxMinCodingBlockSize()));
1882	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt, partsPerMinCU)) : uiChPredMode;
1883	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1884
1885	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
1886	{
1887	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1888	}
1889	}
1890	else // not intra case
1891	{
1892	rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
1893	}
1894
1895	const TCoeff pelMin=(TCoeff) std::numeric_limits<Pel>::min();
1896	const TCoeff pelMax=(TCoeff) std::numeric_limits<Pel>::max();
1897	if (rdpcmMode == RDPCM_VER)
1898	{
1899	for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1900	{
1901	Pel *pcCurResidual = pcResidual+uiX;
1902	TCoeff accumulator = *pcCurResidual; // 32-bit accumulator
1903	pcCurResidual+=uiStride;
1904	for( UInt uiY = 1; uiY < uiHeight; uiY++, pcCurResidual+=uiStride )
1905	{
1906	accumulator += *(pcCurResidual);
1907	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
1908	}
1909	}
1910	}
1911	else if (rdpcmMode == RDPCM_HOR)
1912	{
1913	for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1914	{
1915	Pel pcCurResidual = pcResidual+uiYuiStride;
1916	TCoeff accumulator = *pcCurResidual;
1917	pcCurResidual++;
1918	for( UInt uiX = 1; uiX < uiWidth; uiX++, pcCurResidual++ )
1919	{
1920	accumulator += *(pcCurResidual);
1921	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
1922	}
1923	}
1924	}
1925	}
1926	}
1927
1928	// ------------------------------------------------------------------------------------------------
1929	// Logical transform
1930	// ------------------------------------------------------------------------------------------------
1931
1932	/** Wrapper function between HM interface and core NxN forward transform (2D)
1933	* \param channelBitDepth bit depth of channel
1934	* \param useDST
1935	* \param piBlkResi input data (residual)
1936	* \param uiStride stride of input residual data
1937	* \param psCoeff output data (transform coefficients)
1938	* \param iWidth transform width
1939	* \param iHeight transform height
1940	* \param maxLog2TrDynamicRange
1941	*/
1942	Void TComTrQuant::xT( const Int channelBitDepth, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight, const Int maxLog2TrDynamicRange )
1943	{
1944	#if MATRIX_MULT
1945	if( iWidth == iHeight)
1946	{
1947	xTr(channelBitDepth, piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, maxLog2TrDynamicRange);
1948	return;
1949	}
1950	#endif
1951
1952	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1953	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1954
1955	for (Int y = 0; y < iHeight; y++)
1956	{
1957	for (Int x = 0; x < iWidth; x++)
1958	{
1959	block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
1960	}
1961	}
1962
1963	xTrMxN( channelBitDepth, block, coeff, iWidth, iHeight, useDST, maxLog2TrDynamicRange );
1964
1965	memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
1966	}
1967
1968	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1969	* \param channelBitDepth bit depth of channel
1970	* \param useDST
1971	* \param plCoef input data (transform coefficients)
1972	* \param pResidual output data (residual)
1973	* \param uiStride stride of input residual data
1974	* \param iWidth transform width
1975	* \param iHeight transform height
1976	* \param maxLog2TrDynamicRange
1977	*/
1978	Void TComTrQuant::xIT( const Int channelBitDepth, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight, const Int maxLog2TrDynamicRange )
1979	{
1980	#if MATRIX_MULT
1981	if( iWidth == iHeight )
1982	{
1983	xITr(channelBitDepth, plCoef, pResidual, uiStride, (UInt)iWidth, useDST, maxLog2TrDynamicRange);
1984	return;
1985	}
1986	#endif
1987
1988	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1989	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1990
1991	memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
1992
1993	xITrMxN( channelBitDepth, coeff, block, iWidth, iHeight, useDST, maxLog2TrDynamicRange );
1994
1995	for (Int y = 0; y < iHeight; y++)
1996	{
1997	for (Int x = 0; x < iWidth; x++)
1998	{
1999	pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
2000	}
2001	}
2002	}
2003
2004	/** Wrapper function between HM interface and core 4x4 transform skipping
2005	* \param piBlkResi input data (residual)
2006	* \param uiStride stride of input residual data
2007	* \param psCoeff output data (transform coefficients)
2008	* \param rTu reference to transform data
2009	* \param component colour component
2010	*/
2011	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
2012	{
2013	const TComRectangle &rect = rTu.getRect(component);
2014	const Int width = rect.width;
2015	const Int height = rect.height;
2016	const Int maxLog2TrDynamicRange = rTu.getCU()->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(component));
2017	const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(toChannelType(component));
2018
2019	Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(component), maxLog2TrDynamicRange);
2020	if (rTu.getCU()->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
2021	{
2022	iTransformShift = std::max<Int>(0, iTransformShift);
2023	}
2024
2025	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
2026	const UInt uiSizeMinus1 = (width * height) - 1;
2027
2028	if (iTransformShift >= 0)
2029	{
2030	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
2031	{
2032	for (UInt x = 0; x < width; x++, coefficientIndex++)
2033	{
2034	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
2035	}
2036	}
2037	}
2038	else //for very high bit depths
2039	{
2040	iTransformShift = -iTransformShift;
2041	const TCoeff offset = 1 << (iTransformShift - 1);
2042
2043	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
2044	{
2045	for (UInt x = 0; x < width; x++, coefficientIndex++)
2046	{
2047	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
2048	}
2049	}
2050	}
2051	}
2052
2053	/** Wrapper function between HM interface and core NxN transform skipping
2054	* \param plCoef input data (coefficients)
2055	* \param pResidual output data (residual)
2056	* \param uiStride stride of input residual data
2057	* \param rTu reference to transform data
2058	* \param component colour component ID
2059	*/
2060	Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
2061	{
2062	const TComRectangle &rect = rTu.getRect(component);
2063	const Int width = rect.width;
2064	const Int height = rect.height;
2065	const Int maxLog2TrDynamicRange = rTu.getCU()->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(component));
2066	#if O0043_BEST_EFFORT_DECODING
2067	const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getStreamBitDepth(toChannelType(component));
2068	#else
2069	const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(toChannelType(component));
2070	#endif
2071
2072	Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(component), maxLog2TrDynamicRange);
2073	if (rTu.getCU()->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
2074	{
2075	iTransformShift = std::max<Int>(0, iTransformShift);
2076	}
2077
2078	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
2079	const UInt uiSizeMinus1 = (width * height) - 1;
2080
2081	if (iTransformShift >= 0)
2082	{
2083	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
2084
2085	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
2086	{
2087	for (UInt x = 0; x < width; x++, coefficientIndex++)
2088	{
2089	pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
2090	}
2091	}
2092	}
2093	else //for very high bit depths
2094	{
2095	iTransformShift = -iTransformShift;
2096
2097	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
2098	{
2099	for (UInt x = 0; x < width; x++, coefficientIndex++)
2100	{
2101	pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
2102	}
2103	}
2104	}
2105	}
2106
2107	/** RDOQ with CABAC
2108	* \param rTu reference to transform data
2109	* \param plSrcCoeff pointer to input buffer
2110	* \param piDstCoeff reference to pointer to output buffer
2111	* \param piArlDstCoeff
2112	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
2113	* \param compID colour component ID
2114	* \param cQP reference to quantization parameters
2115
2116	* Rate distortion optimized quantization for entropy
2117	* coding engines using probability models like CABAC
2118	*/
2119	Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
2120	TCoeff * plSrcCoeff,
2121	TCoeff * piDstCoeff,
2122	#if ADAPTIVE_QP_SELECTION
2123	TCoeff * piArlDstCoeff,
2124	#endif
2125	TCoeff &uiAbsSum,
2126	const ComponentID compID,
2127	const QpParam &cQP )
2128	{
2129	const TComRectangle & rect = rTu.getRect(compID);
2130	const UInt uiWidth = rect.width;
2131	const UInt uiHeight = rect.height;
2132	TComDataCU * pcCU = rTu.getCU();
2133	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
2134	const ChannelType channelType = toChannelType(compID);
2135	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
2136
2137	const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getExtendedPrecisionProcessingFlag();
2138	const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
2139	const Int channelBitDepth = rTu.getCU()->getSlice()->getSPS()->getBitDepth(channelType);
2140
2141	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
2142	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
2143	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
2144	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
2145	*/
2146
2147	// Represents scaling through forward transform
2148	Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange);
2149	if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && extendedPrecision)
2150	{
2151	iTransformShift = std::max<Int>(0, iTransformShift);
2152	}
2153
2154	const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag();
2155	const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
2156	UInt uiGoRiceParam = initialGolombRiceParameter;
2157	Double d64BlockUncodedCost = 0;
2158	const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
2159	const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
2160	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
2161	assert(compID<MAX_NUM_COMPONENT);
2162
2163	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
2164	assert(scalingListType < SCALING_LIST_NUM);
2165
2166	#if ADAPTIVE_QP_SELECTION
2167	memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
2168	#endif
2169
2170	Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
2171	Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
2172	Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
2173	memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
2174	memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
2175	Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
2176	Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
2177	Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
2178	TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
2179	memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
2180	memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
2181	memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
2182	memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
2183
2184	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
2185	const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
2186	const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
2187
2188	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
2189	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
2190	const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
2191
2192	const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
2193	const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
2194
2195	#if ADAPTIVE_QP_SELECTION
2196	Int iQBitsC = iQBits - ARL_C_PRECISION;
2197	Int iAddC = 1 << (iQBitsC-1);
2198	#endif
2199
2200	TUEntropyCodingParameters codingParameters;
2201	getTUEntropyCodingParameters(codingParameters, rTu, compID);
2202	const UInt uiCGSize = (1 << MLS_CG_SIZE);
2203
2204	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2205	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2206	Int iCGLastScanPos = -1;
2207
2208	UInt uiCtxSet = 0;
2209	Int c1 = 1;
2210	Int c2 = 0;
2211	Double d64BaseCost = 0;
2212	Int iLastScanPos = -1;
2213
2214	UInt c1Idx = 0;
2215	UInt c2Idx = 0;
2216	Int baseLevel;
2217
2218	memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
2219	memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
2220
2221	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2222	Int iScanPos;
2223	coeffGroupRDStats rdStats;
2224
2225	const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
2226
2227	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2228	{
2229	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2230	UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
2231	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
2232
2233	memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2234
2235	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
2236
2237	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2238	{
2239	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2240	//===== quantization =====
2241	UInt uiBlkPos = codingParameters.scan[iScanPos];
2242	// set coeff
2243
2244	const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
2245	const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
2246
2247	const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
2248
2249	const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
2250
2251	#if ADAPTIVE_QP_SELECTION
2252	if( m_bUseAdaptQpSelect )
2253	{
2254	piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
2255	}
2256	#endif
2257	const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
2258
2259	const Double dErr = Double( lLevelDouble );
2260	pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
2261	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2262	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2263
2264	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2265	{
2266	iLastScanPos = iScanPos;
2267	uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
2268	iCGLastScanPos = iCGScanPos;
2269	}
2270
2271	if ( iLastScanPos >= 0 )
2272	{
2273	//===== coefficient level estimation =====
2274	UInt uiLevel;
2275	UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
2276	UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
2277
2278	if( iScanPos == iLastScanPos )
2279	{
2280	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2281	lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2282	c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange
2283	);
2284	}
2285	else
2286	{
2287	UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
2288
2289	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2290	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2291	c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange
2292	);
2293
2294	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2295	}
2296
2297	deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
2298
2299	if( uiLevel > 0 )
2300	{
2301	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange );
2302	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
2303	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, maxLog2TrDynamicRange ) - rateNow;
2304	}
2305	else // uiLevel == 0
2306	{
2307	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2308	}
2309	piDstCoeff[ uiBlkPos ] = uiLevel;
2310	d64BaseCost += pdCostCoeff [ iScanPos ];
2311
2312	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2313	if( uiLevel >= baseLevel )
2314	{
2315	if (uiLevel > 3*(1<<uiGoRiceParam))
2316	{
2317	uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
2318	}
2319	}
2320	if ( uiLevel >= 1)
2321	{
2322	c1Idx ++;
2323	}
2324
2325	//===== update bin model =====
2326	if( uiLevel > 1 )
2327	{
2328	c1 = 0;
2329	c2 += (c2 < 2);
2330	c2Idx ++;
2331	}
2332	else if( (c1 < 3) && (c1 > 0) && uiLevel)
2333	{
2334	c1++;
2335	}
2336
2337	//===== context set update =====
2338	if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
2339	{
2340	uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this before entering the final group
2341	c1 = 1;
2342	c2 = 0;
2343	c1Idx = 0;
2344	c2Idx = 0;
2345	uiGoRiceParam = initialGolombRiceParameter;
2346	}
2347	}
2348	else
2349	{
2350	d64BaseCost += pdCostCoeff0[ iScanPos ];
2351	}
2352	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2353	if (iScanPosinCG == 0 )
2354	{
2355	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2356	}
2357	if (piDstCoeff[ uiBlkPos ] )
2358	{
2359	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2360	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2361	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2362	if ( iScanPosinCG != 0 )
2363	{
2364	rdStats.iNNZbeforePos0++;
2365	}
2366	}
2367	} //end for (iScanPosinCG)
2368
2369	if (iCGLastScanPos >= 0)
2370	{
2371	if( iCGScanPos )
2372	{
2373	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2374	{
2375	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2376	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2377	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2378	}
2379	else
2380	{
2381	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2382	{
2383	if ( rdStats.iNNZbeforePos0 == 0 )
2384	{
2385	d64BaseCost -= rdStats.d64SigCost_0;
2386	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2387	}
2388	// rd-cost if SigCoeffGroupFlag = 0, initialization
2389	Double d64CostZeroCG = d64BaseCost;
2390
2391	// add SigCoeffGroupFlag cost to total cost
2392	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2393
2394	if (iCGScanPos < iCGLastScanPos)
2395	{
2396	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2397	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2398	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2399	}
2400
2401	// try to convert the current coeff group from non-zero to all-zero
2402	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2403	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2404	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2405
2406	// if we can save cost, change this block to all-zero block
2407	if ( d64CostZeroCG < d64BaseCost )
2408	{
2409	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2410	d64BaseCost = d64CostZeroCG;
2411	if (iCGScanPos < iCGLastScanPos)
2412	{
2413	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2414	}
2415	// reset coeffs to 0 in this block
2416	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2417	{
2418	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2419	UInt uiBlkPos = codingParameters.scan[ iScanPos ];
2420
2421	if (piDstCoeff[ uiBlkPos ])
2422	{
2423	piDstCoeff [ uiBlkPos ] = 0;
2424	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2425	pdCostSig [ iScanPos ] = 0;
2426	}
2427	}
2428	} // end if ( d64CostAllZeros < d64BaseCost )
2429	}
2430	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2431	}
2432	else
2433	{
2434	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2435	}
2436	}
2437	} //end for (iCGScanPos)
2438
2439	//===== estimate last position =====
2440	if ( iLastScanPos < 0 )
2441	{
2442	return;
2443	}
2444
2445	Double d64BestCost = 0;
2446	Int ui16CtxCbf = 0;
2447	Int iBestLastIdxP1 = 0;
2448	if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2449	{
2450	ui16CtxCbf = 0;
2451	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2452	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2453	}
2454	else
2455	{
2456	ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
2457	ui16CtxCbf += getCBFContextOffset(compID);
2458	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2459	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2460	}
2461
2462
2463	Bool bFoundLast = false;
2464	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2465	{
2466	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2467
2468	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2469	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2470	{
2471	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2472	{
2473	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2474
2475	if (iScanPos > iLastScanPos)
2476	{
2477	continue;
2478	}
2479	UInt uiBlkPos = codingParameters.scan[iScanPos];
2480
2481	if( piDstCoeff[ uiBlkPos ] )
2482	{
2483	UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
2484	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
2485
2486	Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
2487	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2488
2489	if( totalCost < d64BestCost )
2490	{
2491	iBestLastIdxP1 = iScanPos + 1;
2492	d64BestCost = totalCost;
2493	}
2494	if( piDstCoeff[ uiBlkPos ] > 1 )
2495	{
2496	bFoundLast = true;
2497	break;
2498	}
2499	d64BaseCost -= pdCostCoeff[ iScanPos ];
2500	d64BaseCost += pdCostCoeff0[ iScanPos ];
2501	}
2502	else
2503	{
2504	d64BaseCost -= pdCostSig[ iScanPos ];
2505	}
2506	} //end for
2507	if (bFoundLast)
2508	{
2509	break;
2510	}
2511	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2512	} // end for
2513
2514
2515	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2516	{
2517	Int blkPos = codingParameters.scan[ scanPos ];
2518	TCoeff level = piDstCoeff[ blkPos ];
2519	uiAbsSum += level;
2520	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2521	}
2522
2523	//===== clean uncoded coefficients =====
2524	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2525	{
2526	piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
2527	}
2528
2529
2530	if( pcCU->getSlice()->getPPS()->getSignDataHidingEnabledFlag() && uiAbsSum>=2)
2531	{
2532	const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
2533	Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
2534	/ m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth - 8)))
2535	+ 0.5);
2536
2537	Int lastCG = -1;
2538	Int absSum = 0 ;
2539	Int n ;
2540
2541	for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
2542	{
2543	Int subPos = subSet << MLS_CG_SIZE;
2544	Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
2545	absSum = 0 ;
2546
2547	for(n = uiCGSize-1; n >= 0; --n )
2548	{
2549	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2550	{
2551	lastNZPosInCG = n;
2552	break;
2553	}
2554	}
2555
2556	for(n = 0; n <uiCGSize; n++ )
2557	{
2558	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2559	{
2560	firstNZPosInCG = n;
2561	break;
2562	}
2563	}
2564
2565	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2566	{
2567	absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
2568	}
2569
2570	if(lastNZPosInCG>=0 && lastCG==-1)
2571	{
2572	lastCG = 1;
2573	}
2574
2575	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2576	{
2577	UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
2578	if( signbit!=(absSum&0x1) ) // hide but need tune
2579	{
2580	// calculate the cost
2581	Int64 minCostInc = std::numeric_limits<Int64>::max(), curCost = std::numeric_limits<Int64>::max();
2582	Int minPos = -1, finalChange = 0, curChange = 0;
2583
2584	for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
2585	{
2586	UInt uiBlkPos = codingParameters.scan[ n + subPos ];
2587	if(piDstCoeff[ uiBlkPos ] != 0 )
2588	{
2589	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
2590	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2591	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
2592
2593	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2594	{
2595	costDown -= (4<<15);
2596	}
2597
2598	if(costUp<costDown)
2599	{
2600	curCost = costUp;
2601	curChange = 1;
2602	}
2603	else
2604	{
2605	curChange = -1;
2606	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2607	{
2608	curCost = std::numeric_limits<Int64>::max();
2609	}
2610	else
2611	{
2612	curCost = costDown;
2613	}
2614	}
2615	}
2616	else
2617	{
2618	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2619	curChange = 1 ;
2620
2621	if(n<firstNZPosInCG)
2622	{
2623	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2624	if(thissignbit != signbit )
2625	{
2626	curCost = std::numeric_limits<Int64>::max();
2627	}
2628	}
2629	}
2630
2631	if( curCost<minCostInc)
2632	{
2633	minCostInc = curCost;
2634	finalChange = curChange;
2635	minPos = uiBlkPos;
2636	}
2637	}
2638
2639	if(piDstCoeff[minPos] == entropyCodingMaximum \|\| piDstCoeff[minPos] == entropyCodingMinimum)
2640	{
2641	finalChange = -1;
2642	}
2643
2644	if(plSrcCoeff[minPos]>=0)
2645	{
2646	piDstCoeff[minPos] += finalChange ;
2647	}
2648	else
2649	{
2650	piDstCoeff[minPos] -= finalChange ;
2651	}
2652	}
2653	}
2654
2655	if(lastCG==1)
2656	{
2657	lastCG=0 ;
2658	}
2659	}
2660	}
2661	}
2662
2663
2664	/** Pattern decision for context derivation process of significant_coeff_flag
2665	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2666	* \param uiCGPosX column of current coefficient group
2667	* \param uiCGPosY row of current coefficient group
2668	* \param widthInGroups width of the block
2669	* \param heightInGroups height of the block
2670	* \returns pattern for current coefficient group
2671	*/
2672	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
2673	{
2674	if ((widthInGroups <= 1) && (heightInGroups <= 1))
2675	{
2676	return 0;
2677	}
2678
2679	const Bool rightAvailable = uiCGPosX < (widthInGroups - 1);
2680	const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
2681
2682	UInt sigRight = 0;
2683	UInt sigLower = 0;
2684
2685	if (rightAvailable)
2686	{
2687	sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2688	}
2689	if (belowAvailable)
2690	{
2691	sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2692	}
2693
2694	return sigRight + (sigLower << 1);
2695	}
2696
2697
2698	/** Context derivation process of coeff_abs_significant_flag
2699	* \param patternSigCtx pattern for current coefficient group
2700	* \param codingParameters coding parameters for the TU (includes the scan)
2701	* \param scanPosition current position in scan order
2702	* \param log2BlockWidth log2 width of the block
2703	* \param log2BlockHeight log2 height of the block
2704	* \param chanType channel type (CHANNEL_TYPE_LUMA/CHROMA)
2705	* \returns ctxInc for current scan position
2706	*/
2707	Int TComTrQuant::getSigCtxInc ( Int patternSigCtx,
2708	const TUEntropyCodingParameters &codingParameters,
2709	const Int scanPosition,
2710	const Int log2BlockWidth,
2711	const Int log2BlockHeight,
2712	const ChannelType chanType)
2713	{
2714	if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
2715	{
2716	//single context mode
2717	return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
2718	}
2719
2720	const UInt rasterPosition = codingParameters.scan[scanPosition];
2721	const UInt posY = rasterPosition >> log2BlockWidth;
2722	const UInt posX = rasterPosition - (posY << log2BlockWidth);
2723
2724	if ((posX + posY) == 0)
2725	{
2726	return 0; //special case for the DC context variable
2727	}
2728
2729	Int offset = MAX_INT;
2730
2731	if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
2732	{
2733	offset = ctxIndMap4x4[ (4 * posY) + posX ];
2734	}
2735	else
2736	{
2737	Int cnt = 0;
2738
2739	switch (patternSigCtx)
2740	{
2741	//------------------
2742
2743	case 0: //neither neighbouring group is significant
2744	{
2745	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2746	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2747	const Int posTotalInSubset = posXinSubset + posYinSubset;
2748
2749	//first N coefficients in scan order use 2; the next few use 1; the rest use 0.
2750	const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
2751	const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
2752
2753	cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
2754	}
2755	break;
2756
2757	//------------------
2758
2759	case 1: //right group is significant, below is not
2760	{
2761	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2762	const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT;
2763
2764	cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
2765	}
2766	break;
2767
2768	//------------------
2769
2770	case 2: //below group is significant, right is not
2771	{
2772	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2773	const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH;
2774
2775	cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
2776	}
2777	break;
2778
2779	//------------------
2780
2781	case 3: //both neighbouring groups are significant
2782	{
2783	cnt = 2;
2784	}
2785	break;
2786
2787	//------------------
2788
2789	default:
2790	std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
2791	exit(1);
2792	break;
2793	}
2794
2795	//------------------------------------------------
2796
2797	const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
2798
2799	offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
2800	}
2801
2802	return codingParameters.firstSignificanceMapContext + offset;
2803	}
2804
2805
2806	/** Get the best level in RD sense
2807	*
2808	* \returns best quantized transform level for given scan position
2809	*
2810	* This method calculates the best quantized transform level for a given scan position.
2811	*/
2812	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost, //< reference to coded cost
2813	Double& rd64CodedCost0, //< reference to cost when coefficient is 0
2814	Double& rd64CodedCostSig, //< rd64CodedCostSig reference to cost of significant coefficient
2815	Intermediate_Int lLevelDouble, //< reference to unscaled quantized level
2816	UInt uiMaxAbsLevel, //< scaled quantized level
2817	UShort ui16CtxNumSig, //< current ctxInc for coeff_abs_significant_flag
2818	UShort ui16CtxNumOne, //< current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2819	UShort ui16CtxNumAbs, //< current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2820	UShort ui16AbsGoRice, //< current Rice parameter for coeff_abs_level_minus3
2821	UInt c1Idx, //<
2822	UInt c2Idx, //<
2823	Int iQBits, //< quantization step size
2824	Double errorScale, //<
2825	Bool bLast, //< indicates if the coefficient is the last significant
2826	Bool useLimitedPrefixLength, //<
2827	const Int maxLog2TrDynamicRange //<
2828	) const
2829	{
2830	Double dCurrCostSig = 0;
2831	UInt uiBestAbsLevel = 0;
2832
2833	if( !bLast && uiMaxAbsLevel < 3 )
2834	{
2835	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2836	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2837	if( uiMaxAbsLevel == 0 )
2838	{
2839	return uiBestAbsLevel;
2840	}
2841	}
2842	else
2843	{
2844	rd64CodedCost = MAX_DOUBLE;
2845	}
2846
2847	if( !bLast )
2848	{
2849	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2850	}
2851
2852	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2853	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2854	{
2855	Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
2856	Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, maxLog2TrDynamicRange ) );
2857	dCurrCost += dCurrCostSig;
2858
2859	if( dCurrCost < rd64CodedCost )
2860	{
2861	uiBestAbsLevel = uiAbsLevel;
2862	rd64CodedCost = dCurrCost;
2863	rd64CodedCostSig = dCurrCostSig;
2864	}
2865	}
2866
2867	return uiBestAbsLevel;
2868	}
2869
2870	/** Calculates the cost for specific absolute transform level
2871	* \param uiAbsLevel scaled quantized level
2872	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2873	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2874	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2875	* \param c1Idx
2876	* \param c2Idx
2877	* \param useLimitedPrefixLength
2878	* \param maxLog2TrDynamicRange
2879	* \returns cost of given absolute transform level
2880	*/
2881	__inline Int TComTrQuant::xGetICRate ( const UInt uiAbsLevel,
2882	const UShort ui16CtxNumOne,
2883	const UShort ui16CtxNumAbs,
2884	const UShort ui16AbsGoRice,
2885	const UInt c1Idx,
2886	const UInt c2Idx,
2887	const Bool useLimitedPrefixLength,
2888	const Int maxLog2TrDynamicRange
2889	) const
2890	{
2891	Int iRate = Int(xGetIEPRate()); // cost of sign bit
2892	UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2893
2894	if ( uiAbsLevel >= baseLevel )
2895	{
2896	UInt symbol = uiAbsLevel - baseLevel;
2897	UInt length;
2898	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2899	{
2900	length = symbol>>ui16AbsGoRice;
2901	iRate += (length+1+ui16AbsGoRice)<< 15;
2902	}
2903	else if (useLimitedPrefixLength)
2904	{
2905	const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange));
2906
2907	UInt prefixLength = 0;
2908	UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
2909
2910	while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
2911	{
2912	prefixLength++;
2913	}
2914
2915	const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ui16AbsGoRice) : (prefixLength + 1/separator/);
2916
2917	iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
2918	}
2919	else
2920	{
2921	length = ui16AbsGoRice;
2922	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2923	while (symbol >= (1<<length))
2924	{
2925	symbol -= (1<<(length++));
2926	}
2927	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2928	}
2929
2930	if (c1Idx < C1FLAG_NUMBER)
2931	{
2932	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2933
2934	if (c2Idx < C2FLAG_NUMBER)
2935	{
2936	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2937	}
2938	}
2939	}
2940	else if( uiAbsLevel == 1 )
2941	{
2942	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2943	}
2944	else if( uiAbsLevel == 2 )
2945	{
2946	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2947	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2948	}
2949	else
2950	{
2951	iRate = 0;
2952	}
2953
2954	return iRate;
2955	}
2956
2957	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2958	UShort ui16CtxNumSig ) const
2959	{
2960	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2961	}
2962
2963	/** Calculates the cost of signaling the last significant coefficient in the block
2964	* \param uiPosX X coordinate of the last significant coefficient
2965	* \param uiPosY Y coordinate of the last significant coefficient
2966	* \param component colour component ID
2967	* \returns cost of last significant coefficient
2968	*/
2969	/*
2970	* \param uiWidth width of the transform unit (TU)
2971	*/
2972	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2973	const UInt uiPosY,
2974	const ComponentID component ) const
2975	{
2976	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2977	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2978
2979	Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
2980
2981	if( uiCtxX > 3 )
2982	{
2983	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2984	}
2985	if( uiCtxY > 3 )
2986	{
2987	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2988	}
2989	return xGetICost( uiCost );
2990	}
2991
2992	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2993	UShort ui16CtxNumSig ) const
2994	{
2995	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2996	}
2997
2998	/** Get the cost for a specific rate
2999	* \param dRate rate of a bit
3000	* \returns cost at the specific rate
3001	*/
3002	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
3003	{
3004	return m_dLambda * dRate;
3005	}
3006
3007	/** Get the cost of an equal probable bit
3008	* \returns cost of equal probable bit
3009	*/
3010	__inline Double TComTrQuant::xGetIEPRate ( ) const
3011	{
3012	return 32768;
3013	}
3014
3015	/** Context derivation process of coeff_abs_significant_flag
3016	* \param uiSigCoeffGroupFlag significance map of L1
3017	* \param uiCGPosX column of current scan position
3018	* \param uiCGPosY row of current scan position
3019	* \param widthInGroups width of the block
3020	* \param heightInGroups height of the block
3021	* \returns ctxInc for current scan position
3022	*/
3023	UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag,
3024	const UInt uiCGPosX,
3025	const UInt uiCGPosY,
3026	const UInt widthInGroups,
3027	const UInt heightInGroups)
3028	{
3029	UInt sigRight = 0;
3030	UInt sigLower = 0;
3031
3032	if (uiCGPosX < (widthInGroups - 1))
3033	{
3034	sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
3035	}
3036	if (uiCGPosY < (heightInGroups - 1))
3037	{
3038	sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
3039	}
3040
3041	return ((sigRight + sigLower) != 0) ? 1 : 0;
3042	}
3043
3044
3045	/** set quantized matrix coefficient for encode
3046	* \param scalingList quantized matrix address
3047	* \param format chroma format
3048	* \param maxLog2TrDynamicRange
3049	* \param bitDepths reference to bit depth array for all channels
3050	*/
3051	Void TComTrQuant::setScalingList(TComScalingList *scalingList, const Int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
3052	{
3053	const Int minimumQp = 0;
3054	const Int maximumQp = SCALING_LIST_REM_NUM;
3055
3056	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3057	{
3058	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3059	{
3060	for(Int qp = minimumQp; qp < maximumQp; qp++)
3061	{
3062	xSetScalingListEnc(scalingList,list,size,qp);
3063	xSetScalingListDec(*scalingList,list,size,qp);
3064	setErrScaleCoeff(list,size,qp,maxLog2TrDynamicRange, bitDepths);
3065	}
3066	}
3067	}
3068	}
3069	/** set quantized matrix coefficient for decode
3070	* \param scalingList quantized matrix address
3071	* \param format chroma format
3072	*/
3073	Void TComTrQuant::setScalingListDec(const TComScalingList &scalingList)
3074	{
3075	const Int minimumQp = 0;
3076	const Int maximumQp = SCALING_LIST_REM_NUM;
3077
3078	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3079	{
3080	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3081	{
3082	for(Int qp = minimumQp; qp < maximumQp; qp++)
3083	{
3084	xSetScalingListDec(scalingList,list,size,qp);
3085	}
3086	}
3087	}
3088	}
3089	/** set error scale coefficients
3090	* \param list list ID
3091	* \param size
3092	* \param qp quantization parameter
3093	* \param maxLog2TrDynamicRange
3094	* \param bitDepths reference to bit depth array for all channels
3095	*/
3096	Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp, const Int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
3097	{
3098	const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
3099	const ChannelType channelType = ((list == 0) \|\| (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
3100
3101	const Int channelBitDepth = bitDepths.recon[channelType];
3102	const Int iTransformShift = getTransformShift(channelBitDepth, uiLog2TrSize, maxLog2TrDynamicRange[channelType]); // Represents scaling through forward transform
3103
3104	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
3105	Int *piQuantcoeff;
3106	Double *pdErrScale;
3107	piQuantcoeff = getQuantCoeff(list, qp,size);
3108	pdErrScale = getErrScaleCoeff(list, size, qp);
3109
3110	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
3111	dErrScale = dErrScalepow(2.0,(-2.0iTransformShift)); // Compensate for scaling through forward transform
3112
3113	for(i=0;i<uiMaxNumCoeff;i++)
3114	{
3115	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepths.recon[channelType] - 8)));
3116	}
3117
3118	getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepths.recon[channelType] - 8)));
3119	}
3120
3121	/** set quantized matrix coefficient for encode
3122	* \param scalingList quantized matrix address
3123	* \param listId List index
3124	* \param sizeId size index
3125	* \param qp Quantization parameter
3126	* \param format chroma format
3127	*/
3128	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp)
3129	{
3130	UInt width = g_scalingListSizeX[sizeId];
3131	UInt height = g_scalingListSizeX[sizeId];
3132	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3133	Int *quantcoeff;
3134	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
3135	quantcoeff = getQuantCoeff(listId, qp, sizeId);
3136
3137	Int quantScales = g_quantScales[qp];
3138
3139	processScalingListEnc(coeff,
3140	quantcoeff,
3141	(quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
3142	height, width, ratio,
3143	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3144	scalingList->getScalingListDC(sizeId,listId));
3145	}
3146
3147	/** set quantized matrix coefficient for decode
3148	* \param scalingList quantaized matrix address
3149	* \param listId List index
3150	* \param sizeId size index
3151	* \param qp Quantization parameter
3152	* \param format chroma format
3153	*/
3154	Void TComTrQuant::xSetScalingListDec(const TComScalingList &scalingList, UInt listId, UInt sizeId, Int qp)
3155	{
3156	UInt width = g_scalingListSizeX[sizeId];
3157	UInt height = g_scalingListSizeX[sizeId];
3158	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3159	Int *dequantcoeff;
3160	const Int *coeff = scalingList.getScalingListAddress(sizeId,listId);
3161
3162	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
3163
3164	Int invQuantScale = g_invQuantScales[qp];
3165
3166	processScalingListDec(coeff,
3167	dequantcoeff,
3168	invQuantScale,
3169	height, width, ratio,
3170	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3171	scalingList.getScalingListDC(sizeId,listId));
3172	}
3173
3174	/** set flat matrix value to quantized coefficient
3175	*/
3176	Void TComTrQuant::setFlatScalingList(const Int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths)
3177	{
3178	const Int minimumQp = 0;
3179	const Int maximumQp = SCALING_LIST_REM_NUM;
3180
3181	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3182	{
3183	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3184	{
3185	for(Int qp = minimumQp; qp < maximumQp; qp++)
3186	{
3187	xsetFlatScalingList(list,size,qp);
3188	setErrScaleCoeff(list,size,qp,maxLog2TrDynamicRange, bitDepths);
3189	}
3190	}
3191	}
3192	}
3193
3194	/** set flat matrix value to quantized coefficient
3195	* \param list List ID
3196	* \param size size index
3197	* \param qp Quantization parameter
3198	* \param format chroma format
3199	*/
3200	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp)
3201	{
3202	UInt i,num = g_scalingListSize[size];
3203	Int *quantcoeff;
3204	Int *dequantcoeff;
3205
3206	Int quantScales = g_quantScales [qp];
3207	Int invQuantScales = g_invQuantScales[qp] << 4;
3208
3209	quantcoeff = getQuantCoeff(list, qp, size);
3210	dequantcoeff = getDequantCoeff(list, qp, size);
3211
3212	for(i=0;i<num;i++)
3213	{
3214	*quantcoeff++ = quantScales;
3215	*dequantcoeff++ = invQuantScales;
3216	}
3217	}
3218
3219	/** set quantized matrix coefficient for encode
3220	* \param coeff quantaized matrix address
3221	* \param quantcoeff quantaized matrix address
3222	* \param quantScales Q(QP%6)
3223	* \param height height
3224	* \param width width
3225	* \param ratio ratio for upscale
3226	* \param sizuNum matrix size
3227	* \param dc dc parameter
3228	*/
3229	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3230	{
3231	for(UInt j=0;j<height;j++)
3232	{
3233	for(UInt i=0;i<width;i++)
3234	{
3235	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j / ratio) + i / ratio];
3236	}
3237	}
3238
3239	if(ratio > 1)
3240	{
3241	quantcoeff[0] = quantScales / dc;
3242	}
3243	}
3244
3245	/** set quantized matrix coefficient for decode
3246	* \param coeff quantaized matrix address
3247	* \param dequantcoeff quantaized matrix address
3248	* \param invQuantScales IQ(QP%6))
3249	* \param height height
3250	* \param width width
3251	* \param ratio ratio for upscale
3252	* \param sizuNum matrix size
3253	* \param dc dc parameter
3254	*/
3255	Void TComTrQuant::processScalingListDec( const Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3256	{
3257	for(UInt j=0;j<height;j++)
3258	{
3259	for(UInt i=0;i<width;i++)
3260	{
3261	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
3262	}
3263	}
3264
3265	if(ratio > 1)
3266	{
3267	dequantcoeff[0] = invQuantScales * dc;
3268	}
3269	}
3270
3271	/** initialization process of scaling list array
3272	*/
3273	Void TComTrQuant::initScalingList()
3274	{
3275	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3276	{
3277	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3278	{
3279	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3280	{
3281	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3282	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3283	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
3284	} // listID loop
3285	}
3286	}
3287	}
3288
3289	/** destroy quantization matrix array
3290	*/
3291	Void TComTrQuant::destroyScalingList()
3292	{
3293	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3294	{
3295	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3296	{
3297	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3298	{
3299	if(m_quantCoef[sizeId][listId][qp])
3300	{
3301	delete [] m_quantCoef[sizeId][listId][qp];
3302	}
3303	if(m_dequantCoef[sizeId][listId][qp])
3304	{
3305	delete [] m_dequantCoef[sizeId][listId][qp];
3306	}
3307	if(m_errScale[sizeId][listId][qp])
3308	{
3309	delete [] m_errScale[sizeId][listId][qp];
3310	}
3311	}
3312	}
3313	}
3314	}
3315
3316	Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const TCoeff resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
3317	{
3318	TComDataCU *pcCU = rTu.getCU();
3319	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3320	const TComRectangle &rect = rTu.getRect(compID);
3321	const UInt uiWidth = rect.width;
3322	const UInt uiHeight = rect.height;
3323	const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
3324	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
3325	const Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(compID), maxLog2TrDynamicRange);
3326	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3327	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3328	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
3329
3330	assert( scalingListType < SCALING_LIST_NUM );
3331	const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
3332
3333
3334	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
3335	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
3336	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
3337	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
3338	*/
3339
3340	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
3341	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
3342
3343	const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
3344
3345	TCoeff transformedCoefficient;
3346
3347	// transform-skip
3348	if (iTransformShift >= 0)
3349	{
3350	transformedCoefficient = resiDiff << iTransformShift;
3351	}
3352	else // for very high bit depths
3353	{
3354	const Int iTrShiftNeg = -iTransformShift;
3355	const Int offset = 1 << (iTrShiftNeg - 1);
3356	transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
3357	}
3358
3359	// quantization
3360	const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
3361
3362	const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
3363
3364	const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
3365
3366	const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
3367
3368	const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
3369	const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
3370	pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
3371	}
3372
3373
3374	Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
3375	{
3376	TComDataCU *pcCU = rTu.getCU();
3377	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3378	const TComRectangle &rect = rTu.getRect(compID);
3379	const UInt uiWidth = rect.width;
3380	const UInt uiHeight = rect.height;
3381	const Int QP_per = cQP.per;
3382	const Int QP_rem = cQP.rem;
3383	const Int maxLog2TrDynamicRange = pcCU->getSlice()->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID));
3384	#if O0043_BEST_EFFORT_DECODING
3385	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getStreamBitDepth(toChannelType(compID));
3386	#else
3387	const Int channelBitDepth = pcCU->getSlice()->getSPS()->getBitDepth(toChannelType(compID));
3388	#endif
3389	const Int iTransformShift = getTransformShift(channelBitDepth, rTu.GetEquivalentLog2TrSize(compID), maxLog2TrDynamicRange);
3390	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3391	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3392	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
3393
3394	assert( scalingListType < SCALING_LIST_NUM );
3395
3396	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
3397
3398	const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange);
3399	const TCoeff transformMaximum = (1 << maxLog2TrDynamicRange) - 1;
3400
3401	// Dequantisation
3402
3403	TCoeff dequantisedSample;
3404
3405	if(enableScalingLists)
3406	{
3407	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
3408	const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
3409
3410	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3411	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3412
3413	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
3414
3415	if(rightShift > 0)
3416	{
3417	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3418	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3419	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
3420
3421	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3422	}
3423	else
3424	{
3425	const Int leftShift = -rightShift;
3426	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3427	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
3428
3429	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3430	}
3431	}
3432	else
3433	{
3434	const Int scale = g_invQuantScales[QP_rem];
3435	const Int scaleBits = (IQUANT_SHIFT + 1) ;
3436
3437	const UInt targetInputBitDepth = std::min<UInt>((maxLog2TrDynamicRange + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
3438	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3439	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3440
3441	if (rightShift > 0)
3442	{
3443	const Intermediate_Int iAdd = 1 << (rightShift - 1);
3444	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3445	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
3446
3447	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3448	}
3449	else
3450	{
3451	const Int leftShift = -rightShift;
3452	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3453	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
3454
3455	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3456	}
3457	}
3458
3459	// Inverse transform-skip
3460
3461	if (iTransformShift >= 0)
3462	{
3463	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
3464	reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift);
3465	}
3466	else //for very high bit depths
3467	{
3468	const Int iTrShiftNeg = -iTransformShift;
3469	reconSample = Pel(dequantisedSample << iTrShiftNeg);
3470	}
3471	}
3472
3473
3474	Void TComTrQuant::crossComponentPrediction( TComTU & rTu,
3475	const ComponentID compID,
3476	const Pel * piResiL,
3477	const Pel * piResiC,
3478	Pel * piResiT,
3479	const Int width,
3480	const Int height,
3481	const Int strideL,
3482	const Int strideC,
3483	const Int strideT,
3484	const Bool reverse )
3485	{
3486	const Pel *pResiL = piResiL;
3487	const Pel *pResiC = piResiC;
3488	Pel *pResiT = piResiT;
3489
3490	TComDataCU *pCU = rTu.getCU();
3491	const Int alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
3492	const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
3493
3494	for( Int y = 0; y < height; y++ )
3495	{
3496	if (reverse)
3497	{
3498	// A constraint is to be added to the HEVC Standard to limit the size of pResiL and pResiC at this point.
3499	// The likely form of the constraint is to either restrict the values to CoeffMin to CoeffMax,
3500	// or to be representable in a bitDepthY+4 or bitDepthC+4 signed integer.
3501	// The result of the constraint is that for 8/10/12bit profiles, the input values
3502	// can be represented within a 16-bit Pel-type.
3503	#if RExt__HIGH_BIT_DEPTH_SUPPORT
3504	for( Int x = 0; x < width; x++ )
3505	{
3506	pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
3507	}
3508	#else
3509	const Int minPel=std::numeric_limits<Pel>::min();
3510	const Int maxPel=std::numeric_limits<Pel>::max();
3511	for( Int x = 0; x < width; x++ )
3512	{
3513	pResiT[x] = Clip3<Int>(minPel, maxPel, pResiC[x] + (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3));
3514	}
3515	#endif
3516	}
3517	else
3518	{
3519	// Forward does not need clipping. Pel type should always be big enough.
3520	for( Int x = 0; x < width; x++ )
3521	{
3522	pResiT[x] = pResiC[x] - (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3);
3523	}
3524	}
3525
3526	pResiL += strideL;
3527	pResiC += strideC;
3528	pResiT += strideT;
3529	}
3530	}
3531
3532	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-3V 3D-HEVC

Context navigation

source: 3DVCSoftware/trunk/source/Lib/TLibCommon/TComTrQuant.cpp

Download in other formats: