Context navigation

source: SHVCSoftware/trunk/source/Lib/TLibCommon/TComTrQuant.cpp @ 16

Visit:

Last change on this file since 16 was 16, checked in by seregin, 13 years ago
INTRA_BL_DST4x4: DST4x4 for luma IntraBL (L0067/L0204)
File size: 101.1 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_bUseRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	#if CHROMA_QP_EXTENSION
203	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
204	#else
205	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
206	#endif
207
208	if(qpScaled < 0)
209	{
210	qpScaled = qpScaled + qpBdOffset;
211	}
212	else
213	{
214	#if CHROMA_QP_EXTENSION
215	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
216	#else
217	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
218	#endif
219	}
220	}
221	m_cQP.setQpParam( qpScaled );
222	}
223
224	#if MATRIX_MULT
225	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
226	* \param block pointer to input data (residual)
227	* \param coeff pointer to output data (transform coefficients)
228	* \param uiStride stride of input data
229	* \param uiTrSize transform size (uiTrSize x uiTrSize)
230	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
231	*/
232	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
233	{
234	Int i,j,k,iSum;
235	Int tmp[32*32];
236	const short *iT;
237	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
238
239	if (uiTrSize==4)
240	{
241	iT = g_aiT4[0];
242	}
243	else if (uiTrSize==8)
244	{
245	iT = g_aiT8[0];
246	}
247	else if (uiTrSize==16)
248	{
249	iT = g_aiT16[0];
250	}
251	else if (uiTrSize==32)
252	{
253	iT = g_aiT32[0];
254	}
255	else
256	{
257	assert(0);
258	}
259
260	#if FULL_NBIT
261	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
262	#else
263	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
264	#endif
265
266	int add_1st = 1<<(shift_1st-1);
267	int shift_2nd = uiLog2TrSize + 6;
268	int add_2nd = 1<<(shift_2nd-1);
269
270	/* Horizontal transform */
271
272	if (uiTrSize==4)
273	{
274	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
275	{
276	iT = g_as_DST_MAT_4[0];
277	}
278	}
279	for (i=0; i<uiTrSize; i++)
280	{
281	for (j=0; j<uiTrSize; j++)
282	{
283	iSum = 0;
284	for (k=0; k<uiTrSize; k++)
285	{
286	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
287	}
288	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
289	}
290	}
291
292	/* Vertical transform */
293	if (uiTrSize==4)
294	{
295	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
296	{
297	iT = g_as_DST_MAT_4[0];
298	}
299	else
300	{
301	iT = g_aiT4[0];
302	}
303	}
304	for (i=0; i<uiTrSize; i++)
305	{
306	for (j=0; j<uiTrSize; j++)
307	{
308	iSum = 0;
309	for (k=0; k<uiTrSize; k++)
310	{
311	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
312	}
313	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
314	}
315	}
316	}
317
318	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
319	* \param coeff pointer to input data (transform coefficients)
320	* \param block pointer to output data (residual)
321	* \param uiStride stride of output data
322	* \param uiTrSize transform size (uiTrSize x uiTrSize)
323	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
324	*/
325	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
326	{
327	int i,j,k,iSum;
328	Int tmp[32*32];
329	const short *iT;
330
331	if (uiTrSize==4)
332	{
333	iT = g_aiT4[0];
334	}
335	else if (uiTrSize==8)
336	{
337	iT = g_aiT8[0];
338	}
339	else if (uiTrSize==16)
340	{
341	iT = g_aiT16[0];
342	}
343	else if (uiTrSize==32)
344	{
345	iT = g_aiT32[0];
346	}
347	else
348	{
349	assert(0);
350	}
351
352	int shift_1st = SHIFT_INV_1ST;
353	int add_1st = 1<<(shift_1st-1);
354	#if FULL_NBIT
355	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
356	#else
357	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
358	#endif
359	int add_2nd = 1<<(shift_2nd-1);
360	if (uiTrSize==4)
361	{
362	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
363	{
364	iT = g_as_DST_MAT_4[0];
365	}
366	}
367
368	/* Horizontal transform */
369	for (i=0; i<uiTrSize; i++)
370	{
371	for (j=0; j<uiTrSize; j++)
372	{
373	iSum = 0;
374	for (k=0; k<uiTrSize; k++)
375	{
376	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
377	}
378	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
379	}
380	}
381
382	if (uiTrSize==4)
383	{
384	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
385	{
386	iT = g_as_DST_MAT_4[0];
387	}
388	else
389	{
390	iT = g_aiT4[0];
391	}
392	}
393
394	/* Vertical transform */
395	for (i=0; i<uiTrSize; i++)
396	{
397	for (j=0; j<uiTrSize; j++)
398	{
399	iSum = 0;
400	for (k=0; k<uiTrSize; k++)
401	{
402	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
403	}
404	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
405	}
406	}
407	}
408
409	#else //MATRIX_MULT
410
411	/** 4x4 forward transform implemented using partial butterfly structure (1D)
412	* \param src input data (residual)
413	* \param dst output data (transform coefficients)
414	* \param shift specifies right shift after 1D transform
415	*/
416
417	void partialButterfly4(short src,short dst,int shift, int line)
418	{
419	int j;
420	int E[2],O[2];
421	int add = 1<<(shift-1);
422
423	for (j=0; j<line; j++)
424	{
425	/* E and O */
426	E[0] = src[0] + src[3];
427	O[0] = src[0] - src[3];
428	E[1] = src[1] + src[2];
429	O[1] = src[1] - src[2];
430
431	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
432	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
433	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
434	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
435
436	src += 4;
437	dst ++;
438	}
439	}
440
441	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
442	// give identical results
443	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
444	{
445	int i, c[4];
446	int rnd_factor = 1<<(shift-1);
447	for (i=0; i<4; i++)
448	{
449	// Intermediate Variables
450	c[0] = block[4i+0] + block[4i+3];
451	c[1] = block[4i+1] + block[4i+3];
452	c[2] = block[4i+0] - block[4i+1];
453	c[3] = 74* block[4*i+2];
454
455	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
456	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
457	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
458	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
459	}
460	}
461
462	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
463	{
464	int i, c[4];
465	int rnd_factor = 1<<(shift-1);
466	for (i=0; i<4; i++)
467	{
468	// Intermediate Variables
469	c[0] = tmp[ i] + tmp[ 8+i];
470	c[1] = tmp[8+i] + tmp[12+i];
471	c[2] = tmp[ i] - tmp[12+i];
472	c[3] = 74* tmp[4+i];
473
474	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
475	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
476	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
477	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
478	}
479	}
480
481	void partialButterflyInverse4(short src,short dst,int shift, int line)
482	{
483	int j;
484	int E[2],O[2];
485	int add = 1<<(shift-1);
486
487	for (j=0; j<line; j++)
488	{
489	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
490	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
491	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
492	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
493	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
494
495	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
496	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
497	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
498	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
499	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
500
501	src ++;
502	dst += 4;
503	}
504	}
505
506
507	void partialButterfly8(short src,short dst,int shift, int line)
508	{
509	int j,k;
510	int E[4],O[4];
511	int EE[2],EO[2];
512	int add = 1<<(shift-1);
513
514	for (j=0; j<line; j++)
515	{
516	/* E and O*/
517	for (k=0;k<4;k++)
518	{
519	E[k] = src[k] + src[7-k];
520	O[k] = src[k] - src[7-k];
521	}
522	/* EE and EO */
523	EE[0] = E[0] + E[3];
524	EO[0] = E[0] - E[3];
525	EE[1] = E[1] + E[2];
526	EO[1] = E[1] - E[2];
527
528	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
529	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
530	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
531	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
532
533	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
534	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
535	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
536	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
537
538	src += 8;
539	dst ++;
540	}
541	}
542
543
544	void partialButterflyInverse8(short src,short dst,int shift, int line)
545	{
546	int j,k;
547	int E[4],O[4];
548	int EE[2],EO[2];
549	int add = 1<<(shift-1);
550
551	for (j=0; j<line; j++)
552	{
553	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
554	for (k=0;k<4;k++)
555	{
556	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
557	}
558
559	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
560	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
561	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
562	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
563
564	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
565	E[0] = EE[0] + EO[0];
566	E[3] = EE[0] - EO[0];
567	E[1] = EE[1] + EO[1];
568	E[2] = EE[1] - EO[1];
569	for (k=0;k<4;k++)
570	{
571	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
572	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
573	}
574	src ++;
575	dst += 8;
576	}
577	}
578
579
580	void partialButterfly16(short src,short dst,int shift, int line)
581	{
582	int j,k;
583	int E[8],O[8];
584	int EE[4],EO[4];
585	int EEE[2],EEO[2];
586	int add = 1<<(shift-1);
587
588	for (j=0; j<line; j++)
589	{
590	/* E and O*/
591	for (k=0;k<8;k++)
592	{
593	E[k] = src[k] + src[15-k];
594	O[k] = src[k] - src[15-k];
595	}
596	/* EE and EO */
597	for (k=0;k<4;k++)
598	{
599	EE[k] = E[k] + E[7-k];
600	EO[k] = E[k] - E[7-k];
601	}
602	/* EEE and EEO */
603	EEE[0] = EE[0] + EE[3];
604	EEO[0] = EE[0] - EE[3];
605	EEE[1] = EE[1] + EE[2];
606	EEO[1] = EE[1] - EE[2];
607
608	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
609	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
610	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
611	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
612
613	for (k=2;k<16;k+=4)
614	{
615	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
616	}
617
618	for (k=1;k<16;k+=2)
619	{
620	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
621	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
622	}
623
624	src += 16;
625	dst ++;
626
627	}
628	}
629
630
631	void partialButterflyInverse16(short src,short dst,int shift, int line)
632	{
633	int j,k;
634	int E[8],O[8];
635	int EE[4],EO[4];
636	int EEE[2],EEO[2];
637	int add = 1<<(shift-1);
638
639	for (j=0; j<line; j++)
640	{
641	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
642	for (k=0;k<8;k++)
643	{
644	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
645	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
646	}
647	for (k=0;k<4;k++)
648	{
649	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
650	}
651	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
652	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
653	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
654	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
655
656	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
657	for (k=0;k<2;k++)
658	{
659	EE[k] = EEE[k] + EEO[k];
660	EE[k+2] = EEE[1-k] - EEO[1-k];
661	}
662	for (k=0;k<4;k++)
663	{
664	E[k] = EE[k] + EO[k];
665	E[k+4] = EE[3-k] - EO[3-k];
666	}
667	for (k=0;k<8;k++)
668	{
669	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
670	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
671	}
672	src ++;
673	dst += 16;
674	}
675	}
676
677
678	void partialButterfly32(short src,short dst,int shift, int line)
679	{
680	int j,k;
681	int E[16],O[16];
682	int EE[8],EO[8];
683	int EEE[4],EEO[4];
684	int EEEE[2],EEEO[2];
685	int add = 1<<(shift-1);
686
687	for (j=0; j<line; j++)
688	{
689	/* E and O*/
690	for (k=0;k<16;k++)
691	{
692	E[k] = src[k] + src[31-k];
693	O[k] = src[k] - src[31-k];
694	}
695	/* EE and EO */
696	for (k=0;k<8;k++)
697	{
698	EE[k] = E[k] + E[15-k];
699	EO[k] = E[k] - E[15-k];
700	}
701	/* EEE and EEO */
702	for (k=0;k<4;k++)
703	{
704	EEE[k] = EE[k] + EE[7-k];
705	EEO[k] = EE[k] - EE[7-k];
706	}
707	/* EEEE and EEEO */
708	EEEE[0] = EEE[0] + EEE[3];
709	EEEO[0] = EEE[0] - EEE[3];
710	EEEE[1] = EEE[1] + EEE[2];
711	EEEO[1] = EEE[1] - EEE[2];
712
713	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
714	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
715	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
716	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
717	for (k=4;k<32;k+=8)
718	{
719	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
720	}
721	for (k=2;k<32;k+=4)
722	{
723	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
724	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
725	}
726	for (k=1;k<32;k+=2)
727	{
728	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
729	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
730	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
731	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
732	}
733	src += 32;
734	dst ++;
735	}
736	}
737
738
739	void partialButterflyInverse32(short src,short dst,int shift, int line)
740	{
741	int j,k;
742	int E[16],O[16];
743	int EE[8],EO[8];
744	int EEE[4],EEO[4];
745	int EEEE[2],EEEO[2];
746	int add = 1<<(shift-1);
747
748	for (j=0; j<line; j++)
749	{
750	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
751	for (k=0;k<16;k++)
752	{
753	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
754	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
755	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
756	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
757	}
758	for (k=0;k<8;k++)
759	{
760	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
761	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
762	}
763	for (k=0;k<4;k++)
764	{
765	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
766	}
767	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
768	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
769	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
770	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
771
772	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
773	EEE[0] = EEEE[0] + EEEO[0];
774	EEE[3] = EEEE[0] - EEEO[0];
775	EEE[1] = EEEE[1] + EEEO[1];
776	EEE[2] = EEEE[1] - EEEO[1];
777	for (k=0;k<4;k++)
778	{
779	EE[k] = EEE[k] + EEO[k];
780	EE[k+4] = EEE[3-k] - EEO[3-k];
781	}
782	for (k=0;k<8;k++)
783	{
784	E[k] = EE[k] + EO[k];
785	E[k+8] = EE[7-k] - EO[7-k];
786	}
787	for (k=0;k<16;k++)
788	{
789	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
790	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
791	}
792	src ++;
793	dst += 32;
794	}
795	}
796
797	/** MxN forward transform (2D)
798	* \param block input data (residual)
799	* \param coeff output data (transform coefficients)
800	* \param iWidth input data (width of transform)
801	* \param iHeight input data (height of transform)
802	*/
803	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
804	{
805	#if FULL_NBIT
806	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
807	#else
808	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
809	#endif
810	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
811
812	short tmp[ 64 * 64 ];
813
814	#if !REMOVE_NSQT
815	if( iWidth == 16 && iHeight == 4)
816	{
817	partialButterfly16( block, tmp, shift_1st, iHeight );
818	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
819	}
820	else if( iWidth == 32 && iHeight == 8 )
821	{
822	partialButterfly32( block, tmp, shift_1st, iHeight );
823	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
824	}
825	else if( iWidth == 4 && iHeight == 16)
826	{
827	partialButterfly4( block, tmp, shift_1st, iHeight );
828	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
829	}
830	else if( iWidth == 8 && iHeight == 32 )
831	{
832	partialButterfly8( block, tmp, shift_1st, iHeight );
833	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
834	}
835	else
836	#endif
837	if( iWidth == 4 && iHeight == 4)
838	{
839	#if INTRA_TRANS_SIMP
840	if (uiMode != REG_DCT)
841	{
842	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
843	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
844	}
845	else
846	{
847	partialButterfly4(block, tmp, shift_1st, iHeight);
848	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
849	}
850
851	#else
852	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
853	{
854	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
855	}
856	else
857	{
858	partialButterfly4(block, tmp, shift_1st, iHeight);
859	}
860	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
861	{
862	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
863	}
864	else
865	{
866	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
867	}
868	#endif
869	}
870	else if( iWidth == 8 && iHeight == 8)
871	{
872	partialButterfly8( block, tmp, shift_1st, iHeight );
873	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
874	}
875	else if( iWidth == 16 && iHeight == 16)
876	{
877	partialButterfly16( block, tmp, shift_1st, iHeight );
878	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
879	}
880	else if( iWidth == 32 && iHeight == 32)
881	{
882	partialButterfly32( block, tmp, shift_1st, iHeight );
883	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
884	}
885	}
886	/** MxN inverse transform (2D)
887	* \param coeff input data (transform coefficients)
888	* \param block output data (residual)
889	* \param iWidth input data (width of transform)
890	* \param iHeight input data (height of transform)
891	*/
892	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
893	{
894	int shift_1st = SHIFT_INV_1ST;
895	#if FULL_NBIT
896	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
897	#else
898	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
899	#endif
900
901	short tmp[ 64*64];
902	#if !REMOVE_NSQT
903	if( iWidth == 16 && iHeight == 4)
904	{
905	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
906	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
907	}
908	else if( iWidth == 32 && iHeight == 8)
909	{
910	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
911	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
912	}
913	else if( iWidth == 4 && iHeight == 16)
914	{
915	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
916	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
917	}
918	else if( iWidth == 8 && iHeight == 32)
919	{
920	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
921	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
922	}
923	else
924	#endif
925	if( iWidth == 4 && iHeight == 4)
926	{
927	#if INTRA_TRANS_SIMP
928	if (uiMode != REG_DCT)
929	{
930	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
931	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
932	}
933	else
934	{
935	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
936	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
937	}
938	#else
939	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
940	{
941	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
942	}
943	else
944	{
945	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
946	}
947	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
948	{
949	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
950	}
951	else
952	{
953	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
954	}
955	#endif
956	}
957	else if( iWidth == 8 && iHeight == 8)
958	{
959	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
960	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
961	}
962	else if( iWidth == 16 && iHeight == 16)
963	{
964	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
965	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
966	}
967	else if( iWidth == 32 && iHeight == 32)
968	{
969	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
970	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
971	}
972	}
973
974	#endif //MATRIX_MULT
975
976	// To minimize the distortion only. No rate is considered.
977	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
978	{
979	Int lastCG = -1;
980	Int absSum = 0 ;
981	Int n ;
982
983	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
984	{
985	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
986	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
987	absSum = 0 ;
988
989	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
990	{
991	if( pQCoef[ scan[ n + subPos ]] )
992	{
993	lastNZPosInCG = n;
994	break;
995	}
996	}
997
998	for(n = 0; n <SCAN_SET_SIZE; n++ )
999	{
1000	if( pQCoef[ scan[ n + subPos ]] )
1001	{
1002	firstNZPosInCG = n;
1003	break;
1004	}
1005	}
1006
1007	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1008	{
1009	absSum += pQCoef[ scan[ n + subPos ]];
1010	}
1011
1012	if(lastNZPosInCG>=0 && lastCG==-1)
1013	{
1014	lastCG = 1 ;
1015	}
1016
1017	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1018	{
1019	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
1020	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1021	{
1022	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
1023
1024	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1025	{
1026	UInt blkPos = scan[ n+subPos ];
1027	if(pQCoef[ blkPos ] != 0 )
1028	{
1029	if(deltaU[blkPos]>0)
1030	{
1031	curCost = - deltaU[blkPos];
1032	curChange=1 ;
1033	}
1034	else
1035	{
1036	//curChange =-1;
1037	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1038	{
1039	curCost=MAX_INT ;
1040	}
1041	else
1042	{
1043	curCost = deltaU[blkPos];
1044	curChange =-1;
1045	}
1046	}
1047	}
1048	else
1049	{
1050	if(n<firstNZPosInCG)
1051	{
1052	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1053	if(thisSignBit != signbit )
1054	{
1055	curCost = MAX_INT;
1056	}
1057	else
1058	{
1059	curCost = - (deltaU[blkPos]) ;
1060	curChange = 1 ;
1061	}
1062	}
1063	else
1064	{
1065	curCost = - (deltaU[blkPos]) ;
1066	curChange = 1 ;
1067	}
1068	}
1069
1070	if( curCost<minCostInc)
1071	{
1072	minCostInc = curCost ;
1073	finalChange = curChange ;
1074	minPos = blkPos ;
1075	}
1076	} //CG loop
1077
1078	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
1079	{
1080	finalChange = -1;
1081	}
1082
1083	if(pCoef[minPos]>=0)
1084	{
1085	pQCoef[minPos] += finalChange ;
1086	}
1087	else
1088	{
1089	pQCoef[minPos] -= finalChange ;
1090	}
1091	} // Hide
1092	}
1093	if(lastCG==1)
1094	{
1095	lastCG=0 ;
1096	}
1097	} // TU loop
1098
1099	return;
1100	}
1101
1102	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1103	Int* pSrc,
1104	TCoeff* pDes,
1105	#if ADAPTIVE_QP_SELECTION
1106	Int*& pArlDes,
1107	#endif
1108	Int iWidth,
1109	Int iHeight,
1110	UInt& uiAcSum,
1111	TextType eTType,
1112	UInt uiAbsPartIdx )
1113	{
1114	Int* piCoef = pSrc;
1115	TCoeff* piQCoef = pDes;
1116	#if ADAPTIVE_QP_SELECTION
1117	Int* piArlCCoef = pArlDes;
1118	#endif
1119	Int iAdd = 0;
1120
1121	Bool useRDOQForTransformSkip = !(m_useTransformSkipFast && pcCU->getTransformSkip(uiAbsPartIdx,eTType));
1122	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) && useRDOQForTransformSkip)
1123	{
1124	#if ADAPTIVE_QP_SELECTION
1125	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1126	#else
1127	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1128	#endif
1129	}
1130	else
1131	{
1132	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1133
1134	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1135	if (scanIdx == SCAN_ZIGZAG)
1136	{
1137	scanIdx = SCAN_DIAG;
1138	}
1139
1140	#if REMOVE_NSQT
1141	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1142	#else
1143	if (iWidth != iHeight)
1144	{
1145	scanIdx = SCAN_DIAG;
1146	}
1147
1148	const UInt * scan;
1149	if (iWidth == iHeight)
1150	{
1151	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1152	}
1153	else
1154	{
1155	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
1156	}
1157	#endif
1158
1159	Int deltaU[32*32] ;
1160
1161	#if ADAPTIVE_QP_SELECTION
1162	QpParam cQpBase;
1163	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1164
1165	Int qpScaled;
1166	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1167
1168	if(eTType == TEXT_LUMA)
1169	{
1170	qpScaled = iQpBase + qpBDOffset;
1171	}
1172	else
1173	{
1174	#if CHROMA_QP_EXTENSION
1175	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1176	#else
1177	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
1178	#endif
1179
1180	if(qpScaled < 0)
1181	{
1182	qpScaled = qpScaled + qpBDOffset;
1183	}
1184	else
1185	{
1186	#if CHROMA_QP_EXTENSION
1187	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1188	#else
1189	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
1190	#endif
1191	}
1192	}
1193	cQpBase.setQpParam(qpScaled);
1194	#endif
1195
1196	#if !REMOVE_NSQT
1197	Bool bNonSqureFlag = ( iWidth != iHeight );
1198	#endif
1199	UInt dir = SCALING_LIST_SQT;
1200	#if !REMOVE_NSQT
1201	if( bNonSqureFlag )
1202	{
1203	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1204	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1205	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1206	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1207	iHeight = iWidth;
1208	}
1209	#endif
1210
1211	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1212	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1213	assert(scalingListType < 6);
1214	Int *piQuantCoeff = 0;
1215	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
1216
1217	#if FULL_NBIT
1218	UInt uiBitDepth = g_uiBitDepth;
1219	#else
1220	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1221	#endif
1222	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1223
1224	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1225
1226	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1227
1228	#if ADAPTIVE_QP_SELECTION
1229	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1230	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1231	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1232	Int iAddC = 1 << (iQBitsC-1);
1233	#endif
1234
1235	Int qBits8 = iQBits-8;
1236	for( Int n = 0; n < iWidth*iHeight; n++ )
1237	{
1238	Int iLevel;
1239	Int iSign;
1240	UInt uiBlockPos = n;
1241	iLevel = piCoef[uiBlockPos];
1242	iSign = (iLevel < 0 ? -1: 1);
1243
1244	#if ADAPTIVE_QP_SELECTION
1245	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1246	if( m_bUseAdaptQpSelect )
1247	{
1248	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1249	}
1250	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1251	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1252	#else
1253	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1254	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1255	#endif
1256	uiAcSum += iLevel;
1257	iLevel *= iSign;
1258	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1259	} // for n
1260	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1261	{
1262	if(uiAcSum>=2)
1263	{
1264	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1265	}
1266	}
1267	} //if RDOQ
1268	//return;
1269
1270	}
1271
1272	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1273	{
1274
1275	const TCoeff* piQCoef = pSrc;
1276	Int* piCoef = pDes;
1277	UInt dir = SCALING_LIST_SQT;
1278	#if !REMOVE_NSQT
1279	if( iWidth != iHeight )
1280	{
1281	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1282	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1283	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1284	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1285	iHeight = iWidth;
1286	}
1287	#endif
1288
1289	if ( iWidth > (Int)m_uiMaxTrSize )
1290	{
1291	iWidth = m_uiMaxTrSize;
1292	iHeight = m_uiMaxTrSize;
1293	}
1294
1295	Int iShift,iAdd,iCoeffQ;
1296	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1297
1298	#if FULL_NBIT
1299	UInt uiBitDepth = g_uiBitDepth;
1300	#else
1301	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1302	#endif
1303	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1304
1305	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1306
1307	TCoeff clipQCoef;
1308	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
1309	const Int levelLimit = 1 << bitRange;
1310
1311	if(getUseScalingList())
1312	{
1313	iShift += 4;
1314	if(iShift > m_cQP.m_iPer)
1315	{
1316	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1317	}
1318	else
1319	{
1320	iAdd = 0;
1321	}
1322	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1323
1324	if(iShift > m_cQP.m_iPer)
1325	{
1326	for( Int n = 0; n < iWidth*iHeight; n++ )
1327	{
1328	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1329	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1330	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1331	}
1332	}
1333	else
1334	{
1335	for( Int n = 0; n < iWidth*iHeight; n++ )
1336	{
1337	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1338	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1339	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1340	}
1341	}
1342	}
1343	else
1344	{
1345	iAdd = 1 << (iShift-1);
1346	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1347
1348	for( Int n = 0; n < iWidth*iHeight; n++ )
1349	{
1350	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1351	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1352	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1353	}
1354	}
1355	}
1356
1357	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1358	Bool bUseRDOQ, Bool bEnc, Bool useTransformSkipFast
1359	#if ADAPTIVE_QP_SELECTION
1360	, Bool bUseAdaptQpSelect
1361	#endif
1362	)
1363	{
1364	m_uiMaxTrSize = uiMaxTrSize;
1365	m_bEnc = bEnc;
1366	m_bUseRDOQ = bUseRDOQ;
1367	#if ADAPTIVE_QP_SELECTION
1368	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1369	#endif
1370	m_useTransformSkipFast = useTransformSkipFast;
1371	}
1372
1373	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1374	Pel* pcResidual,
1375	UInt uiStride,
1376	TCoeff* rpcCoeff,
1377	#if ADAPTIVE_QP_SELECTION
1378	Int*& rpcArlCoeff,
1379	#endif
1380	UInt uiWidth,
1381	UInt uiHeight,
1382	UInt& uiAbsSum,
1383	TextType eTType,
1384	UInt uiAbsPartIdx,
1385	Bool useTransformSkip
1386	)
1387	{
1388	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1389	{
1390	uiAbsSum=0;
1391	for (UInt k = 0; k<uiHeight; k++)
1392	{
1393	for (UInt j = 0; j<uiWidth; j++)
1394	{
1395	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1396	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1397	}
1398	}
1399	return;
1400	}
1401	UInt uiMode; //luma intra pred
1402	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1403	{
1404	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1405	}
1406	#if INTRA_BL_DST4x4
1407	else if(eTType == TEXT_LUMA && pcCU->isIntraBL(uiAbsPartIdx) )
1408	{
1409	uiMode = DC_IDX; //Using DST
1410	}
1411	#endif
1412	else
1413	{
1414	uiMode = REG_DCT;
1415	}
1416
1417	uiAbsSum = 0;
1418	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1419	if(useTransformSkip)
1420	{
1421	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1422	}
1423	else
1424	{
1425	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1426	}
1427	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1428	#if ADAPTIVE_QP_SELECTION
1429	rpcArlCoeff,
1430	#endif
1431	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1432	}
1433
1434	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1435	{
1436	if(transQuantBypass)
1437	{
1438	for (UInt k = 0; k<uiHeight; k++)
1439	{
1440	for (UInt j = 0; j<uiWidth; j++)
1441	{
1442	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1443	}
1444	}
1445	return;
1446	}
1447	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1448	if(useTransformSkip == true)
1449	{
1450	xITransformSkip( m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1451	}
1452	else
1453	{
1454	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1455	}
1456	}
1457
1458	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1459	{
1460	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1461	{
1462	return;
1463	}
1464
1465	UInt uiLumaTrMode, uiChromaTrMode;
1466	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
1467	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
1468
1469	if( uiTrMode == uiStopTrMode )
1470	{
1471	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1472	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1473	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1474	{
1475	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1476	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1477	{
1478	return;
1479	}
1480	uiWidth <<= 1;
1481	uiHeight <<= 1;
1482	}
1483	Pel* pResi = rpcResidual + uiAddr;
1484	#if !REMOVE_NSQT
1485	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
1486	{
1487	Int trWidth = uiWidth;
1488	Int trHeight = uiHeight;
1489	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1490
1491	uiWidth = trWidth;
1492	uiHeight = trHeight;
1493	}
1494	#endif
1495	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1496	assert(scalingListType < 6);
1497	#if INTER_TRANSFORMSKIP
1498	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1499	#else
1500	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
1501	#endif
1502	}
1503	else
1504	{
1505	uiTrMode++;
1506	uiWidth >>= 1;
1507	uiHeight >>= 1;
1508	Int trWidth = uiWidth, trHeight = uiHeight;
1509	#if !REMOVE_NSQT
1510	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
1511	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1512	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
1513	#endif
1514	UInt uiAddrOffset = trHeight * uiStride;
1515	UInt uiCoefOffset = trWidth * trHeight;
1516	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1517	#if !REMOVE_NSQT
1518	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
1519	if( uiInterTUSplitDirection != 2 )
1520	{
1521	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1522	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1523	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1524	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1525	}
1526	else
1527	#endif
1528	{
1529	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1530	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1531	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1532	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1533	}
1534	}
1535	}
1536
1537	// ------------------------------------------------------------------------------------------------
1538	// Logical transform
1539	// ------------------------------------------------------------------------------------------------
1540
1541	/** Wrapper function between HM interface and core NxN forward transform (2D)
1542	* \param piBlkResi input data (residual)
1543	* \param psCoeff output data (transform coefficients)
1544	* \param uiStride stride of input residual data
1545	* \param iSize transform size (iSize x iSize)
1546	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1547	*/
1548	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1549	{
1550	#if MATRIX_MULT
1551	Int iSize = iWidth;
1552	#if !REMOVE_NSQT
1553	if( iWidth != iHeight)
1554	{
1555	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
1556	return;
1557	}
1558	#endif
1559	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1560	#else
1561	Int j;
1562	{
1563	short block[ 64 * 64 ];
1564	short coeff[ 64 * 64 ];
1565	{
1566	for (j = 0; j < iHeight; j++)
1567	{
1568	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
1569	}
1570	}
1571	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
1572	for ( j = 0; j < iHeight * iWidth; j++ )
1573	{
1574	psCoeff[ j ] = coeff[ j ];
1575	}
1576	return ;
1577	}
1578	#endif
1579	}
1580
1581
1582	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1583	* \param plCoef input data (transform coefficients)
1584	* \param pResidual output data (residual)
1585	* \param uiStride stride of input residual data
1586	* \param iSize transform size (iSize x iSize)
1587	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1588	*/
1589	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1590	{
1591	#if MATRIX_MULT
1592	Int iSize = iWidth;
1593	#if !REMOVE_NSQT
1594	if( iWidth != iHeight )
1595	{
1596	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
1597	return;
1598	}
1599	#endif
1600	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1601	#else
1602	Int j;
1603	{
1604	short block[ 64 * 64 ];
1605	short coeff[ 64 * 64 ];
1606	for ( j = 0; j < iHeight * iWidth; j++ )
1607	{
1608	coeff[j] = (short)plCoef[j];
1609	}
1610	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
1611	{
1612	for ( j = 0; j < iHeight; j++ )
1613	{
1614	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
1615	}
1616	}
1617	return ;
1618	}
1619	#endif
1620	}
1621
1622	/** Wrapper function between HM interface and core 4x4 transform skipping
1623	* \param piBlkResi input data (residual)
1624	* \param psCoeff output data (transform coefficients)
1625	* \param uiStride stride of input residual data
1626	* \param iSize transform size (iSize x iSize)
1627	*/
1628	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1629	{
1630	assert( width == height );
1631	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1632	#if FULL_NBIT
1633	UInt uiBitDepth = g_uiBitDepth;
1634	#else
1635	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1636	#endif
1637	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1638	UInt transformSkipShift;
1639	Int j,k;
1640	if(shift >= 0)
1641	{
1642	transformSkipShift = shift;
1643	for (j = 0; j < height; j++)
1644	{
1645	for(k = 0; k < width; k ++)
1646	{
1647	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1648	}
1649	}
1650	}
1651	else
1652	{
1653	//The case when uiBitDepth > 13
1654	Int offset;
1655	transformSkipShift = -shift;
1656	offset = (1 << (transformSkipShift - 1));
1657	for (j = 0; j < height; j++)
1658	{
1659	for(k = 0; k < width; k ++)
1660	{
1661	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1662	}
1663	}
1664	}
1665	}
1666
1667	/** Wrapper function between HM interface and core NxN transform skipping
1668	* \param plCoef input data (coefficients)
1669	* \param pResidual output data (residual)
1670	* \param uiStride stride of input residual data
1671	* \param iSize transform size (iSize x iSize)
1672	*/
1673	Void TComTrQuant::xITransformSkip( Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1674	{
1675	assert( width == height );
1676	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1677	#if FULL_NBIT
1678	UInt uiBitDepth = g_uiBitDepth;
1679	#else
1680	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1681	#endif
1682	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1683	UInt transformSkipShift;
1684	Int j,k;
1685	if(shift > 0)
1686	{
1687	Int offset;
1688	transformSkipShift = shift;
1689	offset = (1 << (transformSkipShift -1));
1690	for ( j = 0; j < height; j++ )
1691	{
1692	for(k = 0; k < width; k ++)
1693	{
1694	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1695	}
1696	}
1697	}
1698	else
1699	{
1700	//The case when uiBitDepth >= 13
1701	transformSkipShift = - shift;
1702	for ( j = 0; j < height; j++ )
1703	{
1704	for(k = 0; k < width; k ++)
1705	{
1706	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1707	}
1708	}
1709	}
1710	}
1711
1712	/** RDOQ with CABAC
1713	* \param pcCU pointer to coding unit structure
1714	* \param plSrcCoeff pointer to input buffer
1715	* \param piDstCoeff reference to pointer to output buffer
1716	* \param uiWidth block width
1717	* \param uiHeight block height
1718	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1719	* \param eTType plane type / luminance or chrominance
1720	* \param uiAbsPartIdx absolute partition index
1721	* \returns Void
1722	* Rate distortion optimized quantization for entropy
1723	* coding engines using probability models like CABAC
1724	*/
1725	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1726	Int* plSrcCoeff,
1727	TCoeff* piDstCoeff,
1728	#if ADAPTIVE_QP_SELECTION
1729	Int*& piArlDstCoeff,
1730	#endif
1731	UInt uiWidth,
1732	UInt uiHeight,
1733	UInt& uiAbsSum,
1734	TextType eTType,
1735	UInt uiAbsPartIdx )
1736	{
1737	Int iQBits = m_cQP.m_iBits;
1738	Double dTemp = 0;
1739	UInt dir = SCALING_LIST_SQT;
1740	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1741	Int uiQ = g_quantScales[m_cQP.rem()];
1742	#if !REMOVE_NSQT
1743	if (uiWidth != uiHeight)
1744	{
1745	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
1746	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1747	}
1748	#endif
1749
1750	#if FULL_NBIT
1751	UInt uiBitDepth = g_uiBitDepth;
1752	#else
1753	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1754	#endif
1755	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1756	UInt uiGoRiceParam = 0;
1757	Double d64BlockUncodedCost = 0;
1758	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1759	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1760	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1761	assert(scalingListType < 6);
1762
1763	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1764	double dErrScale = 0;
1765	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
1766	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1767	Int *piQCoef = piQCoefOrg;
1768	double *pdErrScale = pdErrScaleOrg;
1769	#if ADAPTIVE_QP_SELECTION
1770	Int iQBitsC = iQBits - ARL_C_PRECISION;
1771	Int iAddC = 1 << (iQBitsC-1);
1772	#endif
1773	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1774	if (uiScanIdx == SCAN_ZIGZAG)
1775	{
1776	// Map value zigzag to diagonal scan
1777	uiScanIdx = SCAN_DIAG;
1778	}
1779	Int blockType = uiLog2BlkSize;
1780	#if !REMOVE_NSQT
1781	if (uiWidth != uiHeight)
1782	{
1783	uiScanIdx = SCAN_DIAG;
1784	blockType = 4;
1785	}
1786	#endif
1787
1788	#if ADAPTIVE_QP_SELECTION
1789	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1790	#endif
1791
1792	Double pdCostCoeff [ 32 * 32 ];
1793	Double pdCostSig [ 32 * 32 ];
1794	Double pdCostCoeff0[ 32 * 32 ];
1795	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1796	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1797	Int rateIncUp [ 32 * 32 ];
1798	Int rateIncDown [ 32 * 32 ];
1799	Int sigRateDelta[ 32 * 32 ];
1800	Int deltaU [ 32 * 32 ];
1801	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1802	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1803	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1804	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1805
1806	const UInt * scanCG;
1807	#if !REMOVE_NSQT
1808	if (uiWidth == uiHeight)
1809	#endif
1810	{
1811	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1812	if( uiLog2BlkSize == 3 )
1813	{
1814	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1815	}
1816	else if( uiLog2BlkSize == 5 )
1817	{
1818	scanCG = g_sigLastScanCG32x32;
1819	}
1820	}
1821	#if !REMOVE_NSQT
1822	else
1823	{
1824	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
1825	}
1826	#endif
1827	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1828	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1829	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1830	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1831	Int iCGLastScanPos = -1;
1832
1833	UInt uiCtxSet = 0;
1834	Int c1 = 1;
1835	Int c2 = 0;
1836	#if !REMOVE_NUM_GREATER1
1837	UInt uiNumOne = 0;
1838	#endif
1839	Double d64BaseCost = 0;
1840	Int iLastScanPos = -1;
1841	dTemp = dErrScale;
1842
1843	UInt c1Idx = 0;
1844	UInt c2Idx = 0;
1845	Int baseLevel;
1846
1847	#if REMOVE_NSQT
1848	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1849	#else
1850	const UInt * scan;
1851	if (uiWidth == uiHeight)
1852	{
1853	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1854	}
1855	else
1856	{
1857	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
1858	}
1859	#endif
1860
1861	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1862	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1863
1864	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1865	Int iScanPos;
1866	coeffGroupRDStats rdStats;
1867
1868	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1869	{
1870	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1871	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1872	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1873	#if !REMOVAL_8x2_2x8_CG
1874	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
1875	{
1876	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
1877	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
1878	}
1879	#endif
1880	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1881
1882	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1883	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1884	{
1885	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1886	//===== quantization =====
1887	UInt uiBlkPos = scan[iScanPos];
1888	// set coeff
1889	uiQ = piQCoef[uiBlkPos];
1890	dTemp = pdErrScale[uiBlkPos];
1891	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1892	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1893	#if ADAPTIVE_QP_SELECTION
1894	if( m_bUseAdaptQpSelect )
1895	{
1896	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1897	}
1898	#endif
1899	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1900
1901	Double dErr = Double( lLevelDouble );
1902	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1903	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1904	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1905
1906	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1907	{
1908	iLastScanPos = iScanPos;
1909	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1910	iCGLastScanPos = iCGScanPos;
1911	}
1912
1913	if ( iLastScanPos >= 0 )
1914	{
1915	//===== coefficient level estimation =====
1916	UInt uiLevel;
1917	UInt uiOneCtx = 4 * uiCtxSet + c1;
1918	UInt uiAbsCtx = uiCtxSet + c2;
1919
1920	if( iScanPos == iLastScanPos )
1921	{
1922	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1923	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1924	c1Idx, c2Idx, iQBits, dTemp, 1 );
1925	}
1926	else
1927	{
1928	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1929	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1930	#if REMOVAL_8x2_2x8_CG
1931	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1932	#else
1933	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1934	#endif
1935	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1936	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1937	c1Idx, c2Idx, iQBits, dTemp, 0 );
1938	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1939	}
1940	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1941	if( uiLevel > 0 )
1942	{
1943	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1944	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1945	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1946	}
1947	else // uiLevel == 0
1948	{
1949	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1950	}
1951	piDstCoeff[ uiBlkPos ] = uiLevel;
1952	d64BaseCost += pdCostCoeff [ iScanPos ];
1953
1954
1955	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1956	if( uiLevel >= baseLevel )
1957	{
1958	if(uiLevel > 3*(1<<uiGoRiceParam))
1959	{
1960	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1961	}
1962	}
1963	if ( uiLevel >= 1)
1964	{
1965	c1Idx ++;
1966	}
1967
1968	//===== update bin model =====
1969	if( uiLevel > 1 )
1970	{
1971	c1 = 0;
1972	c2 += (c2 < 2);
1973	#if !REMOVE_NUM_GREATER1
1974	uiNumOne++;
1975	#endif
1976	c2Idx ++;
1977	}
1978	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1979	{
1980	c1++;
1981	}
1982
1983	//===== context set update =====
1984	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1985	{
1986	#if !REMOVE_NUM_GREATER1
1987	c1 = 1;
1988	#endif
1989	c2 = 0;
1990	uiGoRiceParam = 0;
1991
1992	c1Idx = 0;
1993	c2Idx = 0;
1994	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1995	#if REMOVE_NUM_GREATER1
1996	if( c1 == 0 )
1997	#else
1998	if( uiNumOne > 0 )
1999	#endif
2000	{
2001	uiCtxSet++;
2002	}
2003	#if REMOVE_NUM_GREATER1
2004	c1 = 1;
2005	#else
2006	uiNumOne >>= 1;
2007	#endif
2008	}
2009	}
2010	else
2011	{
2012	d64BaseCost += pdCostCoeff0[ iScanPos ];
2013	}
2014	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2015	if (iScanPosinCG == 0 )
2016	{
2017	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2018	}
2019	if (piDstCoeff[ uiBlkPos ] )
2020	{
2021	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2022	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2023	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2024	if ( iScanPosinCG != 0 )
2025	{
2026	rdStats.iNNZbeforePos0++;
2027	}
2028	}
2029	} //end for (iScanPosinCG)
2030
2031	if (iCGLastScanPos >= 0)
2032	{
2033	if( iCGScanPos )
2034	{
2035	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2036	{
2037	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2038	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2039	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2040	}
2041	else
2042	{
2043	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2044	{
2045	if ( rdStats.iNNZbeforePos0 == 0 )
2046	{
2047	d64BaseCost -= rdStats.d64SigCost_0;
2048	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2049	}
2050	// rd-cost if SigCoeffGroupFlag = 0, initialization
2051	Double d64CostZeroCG = d64BaseCost;
2052
2053	// add SigCoeffGroupFlag cost to total cost
2054	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2055	if (iCGScanPos < iCGLastScanPos)
2056	{
2057	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2058	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2059	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2060	}
2061
2062	// try to convert the current coeff group from non-zero to all-zero
2063	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2064	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2065	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2066
2067	// if we can save cost, change this block to all-zero block
2068	if ( d64CostZeroCG < d64BaseCost )
2069	{
2070	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2071	d64BaseCost = d64CostZeroCG;
2072	if (iCGScanPos < iCGLastScanPos)
2073	{
2074	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2075	}
2076	// reset coeffs to 0 in this block
2077	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2078	{
2079	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2080	UInt uiBlkPos = scan[ iScanPos ];
2081
2082	if (piDstCoeff[ uiBlkPos ])
2083	{
2084	piDstCoeff [ uiBlkPos ] = 0;
2085	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2086	pdCostSig [ iScanPos ] = 0;
2087	}
2088	}
2089	} // end if ( d64CostAllZeros < d64BaseCost )
2090	}
2091	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2092	}
2093	else
2094	{
2095	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2096	}
2097	}
2098	} //end for (iCGScanPos)
2099
2100	//===== estimate last position =====
2101	if ( iLastScanPos < 0 )
2102	{
2103	return;
2104	}
2105
2106	Double d64BestCost = 0;
2107	Int ui16CtxCbf = 0;
2108	Int iBestLastIdxP1 = 0;
2109	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2110	{
2111	ui16CtxCbf = 0;
2112	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2113	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2114	}
2115	else
2116	{
2117	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
2118	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
2119	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2120	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2121	}
2122
2123	Bool bFoundLast = false;
2124	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2125	{
2126	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
2127
2128	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2129	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2130	{
2131	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2132	{
2133	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2134	if (iScanPos > iLastScanPos) continue;
2135	UInt uiBlkPos = scan[iScanPos];
2136
2137	if( piDstCoeff[ uiBlkPos ] )
2138	{
2139	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
2140	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
2141
2142	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
2143	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2144
2145	if( totalCost < d64BestCost )
2146	{
2147	iBestLastIdxP1 = iScanPos + 1;
2148	d64BestCost = totalCost;
2149	}
2150	if( piDstCoeff[ uiBlkPos ] > 1 )
2151	{
2152	bFoundLast = true;
2153	break;
2154	}
2155	d64BaseCost -= pdCostCoeff[ iScanPos ];
2156	d64BaseCost += pdCostCoeff0[ iScanPos ];
2157	}
2158	else
2159	{
2160	d64BaseCost -= pdCostSig[ iScanPos ];
2161	}
2162	} //end for
2163	if (bFoundLast)
2164	{
2165	break;
2166	}
2167	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2168	} // end for
2169
2170	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2171	{
2172	Int blkPos = scan[ scanPos ];
2173	Int level = piDstCoeff[ blkPos ];
2174	uiAbsSum += level;
2175	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2176	}
2177
2178	//===== clean uncoded coefficients =====
2179	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2180	{
2181	piDstCoeff[ scan[ scanPos ] ] = 0;
2182	}
2183
2184	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2185	{
2186	Int64 rdFactor = (Int64)((Double)(g_invQuantScales[m_cQP.rem()])(Double)(g_invQuantScales[m_cQP.rem()])(Double)(1<<(2m_cQP.m_iPer))/m_dLambda/16/(Double)(1<<(2g_uiBitIncrement)) + 0.5);
2187	Int lastCG = -1;
2188	Int absSum = 0 ;
2189	Int n ;
2190
2191	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
2192	{
2193	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
2194	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
2195	absSum = 0 ;
2196
2197	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
2198	{
2199	if( piDstCoeff[ scan[ n + subPos ]] )
2200	{
2201	lastNZPosInCG = n;
2202	break;
2203	}
2204	}
2205
2206	for(n = 0; n <SCAN_SET_SIZE; n++ )
2207	{
2208	if( piDstCoeff[ scan[ n + subPos ]] )
2209	{
2210	firstNZPosInCG = n;
2211	break;
2212	}
2213	}
2214
2215	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2216	{
2217	absSum += piDstCoeff[ scan[ n + subPos ]];
2218	}
2219
2220	if(lastNZPosInCG>=0 && lastCG==-1)
2221	{
2222	lastCG = 1;
2223	}
2224
2225	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2226	{
2227	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
2228	if( signbit!=(absSum&0x1) ) // hide but need tune
2229	{
2230	// calculate the cost
2231	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
2232	Int minPos =-1, finalChange=0, curChange=0;
2233
2234	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
2235	{
2236	UInt uiBlkPos = scan[ n + subPos ];
2237	if(piDstCoeff[ uiBlkPos ] != 0 )
2238	{
2239	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
2240	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2241	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
2242
2243	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2244	{
2245	costDown -= (4<<15) ;
2246	}
2247
2248	if(costUp<costDown)
2249	{
2250	curCost = costUp;
2251	curChange = 1 ;
2252	}
2253	else
2254	{
2255	curChange = -1 ;
2256	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2257	{
2258	curCost = MAX_INT64 ;
2259	}
2260	else
2261	{
2262	curCost = costDown ;
2263	}
2264	}
2265	}
2266	else
2267	{
2268	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2269	curChange = 1 ;
2270
2271	if(n<firstNZPosInCG)
2272	{
2273	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2274	if(thissignbit != signbit )
2275	{
2276	curCost = MAX_INT64;
2277	}
2278	}
2279	}
2280
2281	if( curCost<minCostInc)
2282	{
2283	minCostInc = curCost ;
2284	finalChange = curChange ;
2285	minPos = uiBlkPos ;
2286	}
2287	}
2288
2289	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2290	{
2291	finalChange = -1;
2292	}
2293
2294	if(plSrcCoeff[minPos]>=0)
2295	{
2296	piDstCoeff[minPos] += finalChange ;
2297	}
2298	else
2299	{
2300	piDstCoeff[minPos] -= finalChange ;
2301	}
2302	}
2303	}
2304
2305	if(lastCG==1)
2306	{
2307	lastCG=0 ;
2308	}
2309	}
2310	}
2311	}
2312
2313	/** Pattern decision for context derivation process of significant_coeff_flag
2314	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2315	* \param posXCG column of current coefficient group
2316	* \param posYCG row of current coefficient group
2317	* \param width width of the block
2318	* \param height height of the block
2319	* \returns pattern for current coefficient group
2320	*/
2321	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2322	{
2323	#if REMOVAL_8x2_2x8_CG
2324	if( width == 4 && height == 4 ) return -1;
2325	#else
2326	if( width == height && width <= 8 ) return -1;
2327	#endif
2328
2329	UInt sigRight = 0;
2330	UInt sigLower = 0;
2331
2332	width >>= 2;
2333	height >>= 2;
2334	if( posXCG < width - 1 )
2335	{
2336	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2337	}
2338	if (posYCG < height - 1 )
2339	{
2340	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2341	}
2342	return sigRight + (sigLower<<1);
2343	}
2344
2345	/** Context derivation process of coeff_abs_significant_flag
2346	* \param patternSigCtx pattern for current coefficient group
2347	* \param posX column of current scan position
2348	* \param posY row of current scan position
2349	* \param blockType log2 value of block size if square block, or 4 otherwise
2350	* \param width width of the block
2351	* \param height height of the block
2352	* \param textureType texture type (TEXT_LUMA...)
2353	* \returns ctxInc for current scan position
2354	*/
2355	Int TComTrQuant::getSigCtxInc (
2356	Int patternSigCtx,
2357	#if REMOVAL_8x2_2x8_CG
2358	UInt scanIdx,
2359	#endif
2360	Int posX,
2361	Int posY,
2362	Int blockType,
2363	Int width
2364	,Int height
2365	,TextType textureType
2366	)
2367	{
2368	const Int ctxIndMap[16] =
2369	{
2370	0, 1, 4, 5,
2371	2, 3, 4, 5,
2372	6, 6, 8, 8,
2373	7, 7, 8, 8
2374	};
2375
2376	if( posX + posY == 0 )
2377	{
2378	return 0;
2379	}
2380
2381	if ( blockType == 2 )
2382	{
2383	return ctxIndMap[ 4 * posY + posX ];
2384	}
2385
2386	#if !REMOVAL_8x2_2x8_CG
2387	if ( blockType == 3 )
2388	{
2389	return 9 + ctxIndMap[ 4 * (posY >> 1) + (posX >> 1) ];
2390	}
2391
2392	Int offset = 18;
2393	#else
2394	Int offset = blockType == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2395	#endif
2396
2397	Int posXinSubset = posX-((posX>>2)<<2);
2398	Int posYinSubset = posY-((posY>>2)<<2);
2399	Int cnt = 0;
2400	if(patternSigCtx==0)
2401	{
2402	#if REMOVAL_8x2_2x8_CG
2403	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2404	#else
2405	cnt = posXinSubset+posYinSubset<=2 ? 1 : 0;
2406	#endif
2407	}
2408	else if(patternSigCtx==1)
2409	{
2410	#if REMOVAL_8x2_2x8_CG
2411	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2412	#else
2413	cnt = posYinSubset<=1 ? 1 : 0;
2414	#endif
2415	}
2416	else if(patternSigCtx==2)
2417	{
2418	#if REMOVAL_8x2_2x8_CG
2419	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2420	#else
2421	cnt = posXinSubset<=1 ? 1 : 0;
2422	#endif
2423	}
2424	else
2425	{
2426	#if REMOVAL_8x2_2x8_CG
2427	cnt = 2;
2428	#else
2429	cnt = posXinSubset+posYinSubset<=4 ? 2 : 1;
2430	#endif
2431	}
2432
2433	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2434	}
2435
2436	/** Get the best level in RD sense
2437	* \param rd64CodedCost reference to coded cost
2438	* \param rd64CodedCost0 reference to cost when coefficient is 0
2439	* \param rd64CodedCostSig reference to cost of significant coefficient
2440	* \param lLevelDouble reference to unscaled quantized level
2441	* \param uiMaxAbsLevel scaled quantized level
2442	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2443	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2444	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2445	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2446	* \param iQBits quantization step size
2447	* \param dTemp correction factor
2448	* \param bLast indicates if the coefficient is the last significant
2449	* \returns best quantized transform level for given scan position
2450	* This method calculates the best quantized transform level for a given scan position.
2451	*/
2452	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2453	Double& rd64CodedCost0,
2454	Double& rd64CodedCostSig,
2455	Int lLevelDouble,
2456	UInt uiMaxAbsLevel,
2457	UShort ui16CtxNumSig,
2458	UShort ui16CtxNumOne,
2459	UShort ui16CtxNumAbs,
2460	UShort ui16AbsGoRice,
2461	UInt c1Idx,
2462	UInt c2Idx,
2463	Int iQBits,
2464	Double dTemp,
2465	Bool bLast ) const
2466	{
2467	Double dCurrCostSig = 0;
2468	UInt uiBestAbsLevel = 0;
2469
2470	if( !bLast && uiMaxAbsLevel < 3 )
2471	{
2472	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2473	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2474	if( uiMaxAbsLevel == 0 )
2475	{
2476	return uiBestAbsLevel;
2477	}
2478	}
2479	else
2480	{
2481	rd64CodedCost = MAX_DOUBLE;
2482	}
2483
2484	if( !bLast )
2485	{
2486	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2487	}
2488
2489	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2490	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2491	{
2492	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2493	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2494	dCurrCost += dCurrCostSig;
2495
2496	if( dCurrCost < rd64CodedCost )
2497	{
2498	uiBestAbsLevel = uiAbsLevel;
2499	rd64CodedCost = dCurrCost;
2500	rd64CodedCostSig = dCurrCostSig;
2501	}
2502	}
2503
2504	return uiBestAbsLevel;
2505	}
2506
2507	/** Calculates the cost for specific absolute transform level
2508	* \param uiAbsLevel scaled quantized level
2509	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2510	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2511	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2512	* \returns cost of given absolute transform level
2513	*/
2514	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2515	UShort ui16CtxNumOne,
2516	UShort ui16CtxNumAbs,
2517	UShort ui16AbsGoRice
2518	, UInt c1Idx,
2519	UInt c2Idx
2520	) const
2521	{
2522	Double iRate = xGetIEPRate();
2523	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2524
2525	if ( uiAbsLevel >= baseLevel )
2526	{
2527	UInt symbol = uiAbsLevel - baseLevel;
2528	UInt length;
2529	#if COEF_REMAIN_BIN_REDUCTION
2530	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2531	#else
2532	if (symbol < (8 << ui16AbsGoRice))
2533	#endif
2534	{
2535	length = symbol>>ui16AbsGoRice;
2536	iRate += (length+1+ui16AbsGoRice)<< 15;
2537	}
2538	else
2539	{
2540	length = ui16AbsGoRice;
2541	#if COEF_REMAIN_BIN_REDUCTION
2542	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2543	#else
2544	symbol = symbol - ( 8 << ui16AbsGoRice);
2545	#endif
2546	while (symbol >= (1<<length))
2547	{
2548	symbol -= (1<<(length++));
2549	}
2550	#if COEF_REMAIN_BIN_REDUCTION
2551	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2552	#else
2553	iRate += (8+length+1-ui16AbsGoRice+length)<< 15;
2554	#endif
2555	}
2556	if (c1Idx < C1FLAG_NUMBER)
2557	{
2558	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2559
2560	if (c2Idx < C2FLAG_NUMBER)
2561	{
2562	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2563	}
2564	}
2565	}
2566	else
2567	if( uiAbsLevel == 1 )
2568	{
2569	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2570	}
2571	else if( uiAbsLevel == 2 )
2572	{
2573	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2574	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2575	}
2576	else
2577	{
2578	assert (0);
2579	}
2580	return xGetICost( iRate );
2581	}
2582
2583	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2584	UShort ui16CtxNumOne,
2585	UShort ui16CtxNumAbs,
2586	UShort ui16AbsGoRice
2587	, UInt c1Idx,
2588	UInt c2Idx
2589	) const
2590	{
2591	Int iRate = 0;
2592	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2593
2594	if ( uiAbsLevel >= baseLevel )
2595	{
2596	UInt uiSymbol = uiAbsLevel - baseLevel;
2597	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2598	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2599
2600	if( bExpGolomb )
2601	{
2602	uiAbsLevel = uiSymbol - uiMaxVlc;
2603	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2604	iRate += iEGS << 15;
2605	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2606	}
2607
2608	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2609	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2610
2611	iRate += ui16NumBins << 15;
2612
2613	if (c1Idx < C1FLAG_NUMBER)
2614	{
2615	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2616
2617	if (c2Idx < C2FLAG_NUMBER)
2618	{
2619	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2620	}
2621	}
2622	}
2623	else
2624	if( uiAbsLevel == 0 )
2625	{
2626	return 0;
2627	}
2628	else if( uiAbsLevel == 1 )
2629	{
2630	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2631	}
2632	else if( uiAbsLevel == 2 )
2633	{
2634	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2635	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2636	}
2637	else
2638	{
2639	assert(0);
2640	}
2641	return iRate;
2642	}
2643
2644	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2645	UShort ui16CtxNumSig ) const
2646	{
2647	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2648	}
2649
2650	/** Calculates the cost of signaling the last significant coefficient in the block
2651	* \param uiPosX X coordinate of the last significant coefficient
2652	* \param uiPosY Y coordinate of the last significant coefficient
2653	* \returns cost of last significant coefficient
2654	*/
2655	/*
2656	* \param uiWidth width of the transform unit (TU)
2657	*/
2658	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2659	const UInt uiPosY,
2660	const UInt uiBlkWdth ) const
2661	{
2662	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2663	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2664	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2665	if( uiCtxX > 3 )
2666	{
2667	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2668	}
2669	if( uiCtxY > 3 )
2670	{
2671	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2672	}
2673	return xGetICost( uiCost );
2674	}
2675
2676	/** Calculates the cost for specific absolute transform level
2677	* \param uiAbsLevel scaled quantized level
2678	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2679	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2680	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2681	* \returns cost of given absolute transform level
2682	*/
2683	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2684	UShort ui16CtxNumSig ) const
2685	{
2686	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2687	}
2688
2689	/** Get the cost for a specific rate
2690	* \param dRate rate of a bit
2691	* \returns cost at the specific rate
2692	*/
2693	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2694	{
2695	return m_dLambda * dRate;
2696	}
2697
2698	/** Get the cost of an equal probable bit
2699	* \returns cost of equal probable bit
2700	*/
2701	__inline Double TComTrQuant::xGetIEPRate ( ) const
2702	{
2703	return 32768;
2704	}
2705
2706	/** Context derivation process of coeff_abs_significant_flag
2707	* \param uiSigCoeffGroupFlag significance map of L1
2708	* \param uiBlkX column of current scan position
2709	* \param uiBlkY row of current scan position
2710	* \param uiLog2BlkSize log2 value of block size
2711	* \returns ctxInc for current scan position
2712	*/
2713	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2714	const UInt uiCGPosX,
2715	const UInt uiCGPosY,
2716	const UInt scanIdx,
2717	Int width, Int height)
2718	{
2719	UInt uiRight = 0;
2720	UInt uiLower = 0;
2721
2722	width >>= 2;
2723	height >>= 2;
2724	#if !REMOVAL_8x2_2x8_CG
2725	if( width == 2 && height == 2 ) // 8x8
2726	{
2727	if( scanIdx == SCAN_HOR )
2728	{
2729	width = 1;
2730	height = 4;
2731	}
2732	else if( scanIdx == SCAN_VER )
2733	{
2734	width = 4;
2735	height = 1;
2736	}
2737	}
2738	#endif
2739	if( uiCGPosX < width - 1 )
2740	{
2741	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2742	}
2743	if (uiCGPosY < height - 1 )
2744	{
2745	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2746	}
2747	return (uiRight \|\| uiLower);
2748
2749	}
2750	/** set quantized matrix coefficient for encode
2751	* \param scalingList quantaized matrix address
2752	*/
2753	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2754	{
2755	UInt size,list;
2756	UInt qp;
2757
2758	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2759	{
2760	for(list = 0; list < g_scalingListNum[size]; list++)
2761	{
2762	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2763	{
2764	xSetScalingListEnc(scalingList,list,size,qp);
2765	xSetScalingListDec(scalingList,list,size,qp);
2766	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2767	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2768	{
2769	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2770	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2771	}
2772	}
2773	}
2774	}
2775	}
2776	/** set quantized matrix coefficient for decode
2777	* \param scalingList quantaized matrix address
2778	*/
2779	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2780	{
2781	UInt size,list;
2782	UInt qp;
2783
2784	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2785	{
2786	for(list = 0; list < g_scalingListNum[size]; list++)
2787	{
2788	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2789	{
2790	xSetScalingListDec(scalingList,list,size,qp);
2791	}
2792	}
2793	}
2794	}
2795	/** set error scale coefficients
2796	* \param list List ID
2797	* \param uiSize Size
2798	* \param uiQP Quantization parameter
2799	*/
2800	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
2801	{
2802
2803	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2804	#if FULL_NBIT
2805	UInt uiBitDepth = g_uiBitDepth;
2806	#else
2807	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
2808	#endif
2809
2810	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
2811
2812	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2813	Int *piQuantcoeff;
2814	double *pdErrScale;
2815	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
2816	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
2817
2818	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2819	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2820	for(i=0;i<uiMaxNumCoeff;i++)
2821	{
2822	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
2823	}
2824	}
2825
2826	/** set quantized matrix coefficient for encode
2827	* \param scalingList quantaized matrix address
2828	* \param listId List index
2829	* \param sizeId size index
2830	* \param uiQP Quantization parameter
2831	*/
2832	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2833	{
2834	UInt width = g_scalingListSizeX[sizeId];
2835	UInt height = g_scalingListSizeX[sizeId];
2836	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2837	Int *quantcoeff;
2838	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2839	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
2840
2841	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2842
2843	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
2844	{
2845	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2846	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2847
2848	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2849	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2850	}
2851	}
2852	/** set quantized matrix coefficient for decode
2853	* \param scalingList quantaized matrix address
2854	* \param list List index
2855	* \param size size index
2856	* \param uiQP Quantization parameter
2857	*/
2858	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2859	{
2860	UInt width = g_scalingListSizeX[sizeId];
2861	UInt height = g_scalingListSizeX[sizeId];
2862	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2863	Int *dequantcoeff;
2864	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2865
2866	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
2867	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2868
2869	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
2870	{
2871	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2872	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2873
2874	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2875
2876	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2877	}
2878	}
2879
2880	/** set flat matrix value to quantized coefficient
2881	*/
2882	Void TComTrQuant::setFlatScalingList()
2883	{
2884	UInt size,list;
2885	UInt qp;
2886
2887	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2888	{
2889	for(list = 0; list < g_scalingListNum[size]; list++)
2890	{
2891	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2892	{
2893	xsetFlatScalingList(list,size,qp);
2894	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2895	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2896	{
2897	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2898	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2899	}
2900	}
2901	}
2902	}
2903	}
2904
2905	/** set flat matrix value to quantized coefficient
2906	* \param list List ID
2907	* \param uiQP Quantization parameter
2908	* \param uiSize Size
2909	*/
2910	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2911	{
2912	UInt i,num = g_scalingListSize[size];
2913	UInt numDiv4 = num>>2;
2914	Int *quantcoeff;
2915	Int *dequantcoeff;
2916	Int quantScales = g_quantScales[qp];
2917	Int invQuantScales = g_invQuantScales[qp]<<4;
2918
2919	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
2920	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
2921
2922	for(i=0;i<num;i++)
2923	{
2924	*quantcoeff++ = quantScales;
2925	*dequantcoeff++ = invQuantScales;
2926	}
2927
2928	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2929	{
2930	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2931	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2932
2933	for(i=0;i<numDiv4;i++)
2934	{
2935	*quantcoeff++ = quantScales;
2936	*dequantcoeff++ = invQuantScales;
2937	}
2938	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2939	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2940
2941	for(i=0;i<numDiv4;i++)
2942	{
2943	*quantcoeff++ = quantScales;
2944	*dequantcoeff++ = invQuantScales;
2945	}
2946	}
2947	}
2948
2949	/** set quantized matrix coefficient for encode
2950	* \param coeff quantaized matrix address
2951	* \param quantcoeff quantaized matrix address
2952	* \param quantScales Q(QP%6)
2953	* \param height height
2954	* \param width width
2955	* \param ratio ratio for upscale
2956	* \param sizuNum matrix size
2957	* \param dc dc parameter
2958	*/
2959	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2960	{
2961	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2962	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2963	for(UInt j=0;j<height;j++)
2964	{
2965	for(UInt i=0;i<width;i++)
2966	{
2967	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2968	}
2969	}
2970	if(ratio > 1)
2971	{
2972	quantcoeff[0] = quantScales / dc;
2973	}
2974	}
2975	/** set quantized matrix coefficient for decode
2976	* \param coeff quantaized matrix address
2977	* \param dequantcoeff quantaized matrix address
2978	* \param invQuantScales IQ(QP%6))
2979	* \param height height
2980	* \param width width
2981	* \param ratio ratio for upscale
2982	* \param sizuNum matrix size
2983	* \param dc dc parameter
2984	*/
2985	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2986	{
2987	#if !REMOVE_NSQT
2988	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2989	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2990	#endif
2991	for(UInt j=0;j<height;j++)
2992	{
2993	for(UInt i=0;i<width;i++)
2994	{
2995	#if REMOVE_NSQT
2996	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2997	#else
2998	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
2999	#endif
3000	}
3001	}
3002	if(ratio > 1)
3003	{
3004	dequantcoeff[0] = invQuantScales * dc;
3005	}
3006	}
3007
3008	/** initialization process of scaling list array
3009	*/
3010	Void TComTrQuant::initScalingList()
3011	{
3012	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3013	{
3014	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3015	{
3016	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3017	{
3018	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3019	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3020	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
3021
3022	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3023	{
3024	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3025	{
3026	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3027	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3028	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
3029	}
3030	}
3031	}
3032	}
3033	}
3034	//copy for NSQT
3035	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3036	{
3037	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3038	{
3039	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
3040	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
3041	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
3042	}
3043	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3044	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3045	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3046	}
3047	}
3048	/** destroy quantization matrix array
3049	*/
3050	Void TComTrQuant::destroyScalingList()
3051	{
3052	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3053	{
3054	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3055	{
3056	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3057	{
3058	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3059	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3060	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
3061	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3062	{
3063	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3064	{
3065	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
3066	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
3067	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
3068	}
3069	}
3070	}
3071	}
3072	}
3073	}
3074
3075	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: