Context navigation

← Previous revision
Next revision →
Blame
Revision log

source: SHVCSoftware/branches/0.1.1-bugfix/source/Lib/TLibCommon/TComTrQuant.cpp

Visit:

Last change on this file was 2, checked in by seregin, 12 years ago
Initial import by Vadim Seregin <vseregin@…>
File size: 101.0 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_bUseRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	#if CHROMA_QP_EXTENSION
203	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
204	#else
205	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
206	#endif
207
208	if(qpScaled < 0)
209	{
210	qpScaled = qpScaled + qpBdOffset;
211	}
212	else
213	{
214	#if CHROMA_QP_EXTENSION
215	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
216	#else
217	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
218	#endif
219	}
220	}
221	m_cQP.setQpParam( qpScaled );
222	}
223
224	#if MATRIX_MULT
225	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
226	* \param block pointer to input data (residual)
227	* \param coeff pointer to output data (transform coefficients)
228	* \param uiStride stride of input data
229	* \param uiTrSize transform size (uiTrSize x uiTrSize)
230	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
231	*/
232	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
233	{
234	Int i,j,k,iSum;
235	Int tmp[32*32];
236	const short *iT;
237	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
238
239	if (uiTrSize==4)
240	{
241	iT = g_aiT4[0];
242	}
243	else if (uiTrSize==8)
244	{
245	iT = g_aiT8[0];
246	}
247	else if (uiTrSize==16)
248	{
249	iT = g_aiT16[0];
250	}
251	else if (uiTrSize==32)
252	{
253	iT = g_aiT32[0];
254	}
255	else
256	{
257	assert(0);
258	}
259
260	#if FULL_NBIT
261	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
262	#else
263	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
264	#endif
265
266	int add_1st = 1<<(shift_1st-1);
267	int shift_2nd = uiLog2TrSize + 6;
268	int add_2nd = 1<<(shift_2nd-1);
269
270	/* Horizontal transform */
271
272	if (uiTrSize==4)
273	{
274	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
275	{
276	iT = g_as_DST_MAT_4[0];
277	}
278	}
279	for (i=0; i<uiTrSize; i++)
280	{
281	for (j=0; j<uiTrSize; j++)
282	{
283	iSum = 0;
284	for (k=0; k<uiTrSize; k++)
285	{
286	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
287	}
288	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
289	}
290	}
291
292	/* Vertical transform */
293	if (uiTrSize==4)
294	{
295	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
296	{
297	iT = g_as_DST_MAT_4[0];
298	}
299	else
300	{
301	iT = g_aiT4[0];
302	}
303	}
304	for (i=0; i<uiTrSize; i++)
305	{
306	for (j=0; j<uiTrSize; j++)
307	{
308	iSum = 0;
309	for (k=0; k<uiTrSize; k++)
310	{
311	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
312	}
313	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
314	}
315	}
316	}
317
318	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
319	* \param coeff pointer to input data (transform coefficients)
320	* \param block pointer to output data (residual)
321	* \param uiStride stride of output data
322	* \param uiTrSize transform size (uiTrSize x uiTrSize)
323	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
324	*/
325	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
326	{
327	int i,j,k,iSum;
328	Int tmp[32*32];
329	const short *iT;
330
331	if (uiTrSize==4)
332	{
333	iT = g_aiT4[0];
334	}
335	else if (uiTrSize==8)
336	{
337	iT = g_aiT8[0];
338	}
339	else if (uiTrSize==16)
340	{
341	iT = g_aiT16[0];
342	}
343	else if (uiTrSize==32)
344	{
345	iT = g_aiT32[0];
346	}
347	else
348	{
349	assert(0);
350	}
351
352	int shift_1st = SHIFT_INV_1ST;
353	int add_1st = 1<<(shift_1st-1);
354	#if FULL_NBIT
355	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
356	#else
357	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
358	#endif
359	int add_2nd = 1<<(shift_2nd-1);
360	if (uiTrSize==4)
361	{
362	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
363	{
364	iT = g_as_DST_MAT_4[0];
365	}
366	}
367
368	/* Horizontal transform */
369	for (i=0; i<uiTrSize; i++)
370	{
371	for (j=0; j<uiTrSize; j++)
372	{
373	iSum = 0;
374	for (k=0; k<uiTrSize; k++)
375	{
376	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
377	}
378	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
379	}
380	}
381
382	if (uiTrSize==4)
383	{
384	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
385	{
386	iT = g_as_DST_MAT_4[0];
387	}
388	else
389	{
390	iT = g_aiT4[0];
391	}
392	}
393
394	/* Vertical transform */
395	for (i=0; i<uiTrSize; i++)
396	{
397	for (j=0; j<uiTrSize; j++)
398	{
399	iSum = 0;
400	for (k=0; k<uiTrSize; k++)
401	{
402	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
403	}
404	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
405	}
406	}
407	}
408
409	#else //MATRIX_MULT
410
411	/** 4x4 forward transform implemented using partial butterfly structure (1D)
412	* \param src input data (residual)
413	* \param dst output data (transform coefficients)
414	* \param shift specifies right shift after 1D transform
415	*/
416
417	void partialButterfly4(short src,short dst,int shift, int line)
418	{
419	int j;
420	int E[2],O[2];
421	int add = 1<<(shift-1);
422
423	for (j=0; j<line; j++)
424	{
425	/* E and O */
426	E[0] = src[0] + src[3];
427	O[0] = src[0] - src[3];
428	E[1] = src[1] + src[2];
429	O[1] = src[1] - src[2];
430
431	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
432	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
433	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
434	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
435
436	src += 4;
437	dst ++;
438	}
439	}
440
441	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
442	// give identical results
443	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
444	{
445	int i, c[4];
446	int rnd_factor = 1<<(shift-1);
447	for (i=0; i<4; i++)
448	{
449	// Intermediate Variables
450	c[0] = block[4i+0] + block[4i+3];
451	c[1] = block[4i+1] + block[4i+3];
452	c[2] = block[4i+0] - block[4i+1];
453	c[3] = 74* block[4*i+2];
454
455	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
456	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
457	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
458	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
459	}
460	}
461
462	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
463	{
464	int i, c[4];
465	int rnd_factor = 1<<(shift-1);
466	for (i=0; i<4; i++)
467	{
468	// Intermediate Variables
469	c[0] = tmp[ i] + tmp[ 8+i];
470	c[1] = tmp[8+i] + tmp[12+i];
471	c[2] = tmp[ i] - tmp[12+i];
472	c[3] = 74* tmp[4+i];
473
474	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
475	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
476	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
477	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
478	}
479	}
480
481	void partialButterflyInverse4(short src,short dst,int shift, int line)
482	{
483	int j;
484	int E[2],O[2];
485	int add = 1<<(shift-1);
486
487	for (j=0; j<line; j++)
488	{
489	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
490	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
491	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
492	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
493	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
494
495	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
496	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
497	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
498	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
499	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
500
501	src ++;
502	dst += 4;
503	}
504	}
505
506
507	void partialButterfly8(short src,short dst,int shift, int line)
508	{
509	int j,k;
510	int E[4],O[4];
511	int EE[2],EO[2];
512	int add = 1<<(shift-1);
513
514	for (j=0; j<line; j++)
515	{
516	/* E and O*/
517	for (k=0;k<4;k++)
518	{
519	E[k] = src[k] + src[7-k];
520	O[k] = src[k] - src[7-k];
521	}
522	/* EE and EO */
523	EE[0] = E[0] + E[3];
524	EO[0] = E[0] - E[3];
525	EE[1] = E[1] + E[2];
526	EO[1] = E[1] - E[2];
527
528	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
529	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
530	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
531	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
532
533	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
534	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
535	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
536	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
537
538	src += 8;
539	dst ++;
540	}
541	}
542
543
544	void partialButterflyInverse8(short src,short dst,int shift, int line)
545	{
546	int j,k;
547	int E[4],O[4];
548	int EE[2],EO[2];
549	int add = 1<<(shift-1);
550
551	for (j=0; j<line; j++)
552	{
553	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
554	for (k=0;k<4;k++)
555	{
556	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
557	}
558
559	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
560	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
561	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
562	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
563
564	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
565	E[0] = EE[0] + EO[0];
566	E[3] = EE[0] - EO[0];
567	E[1] = EE[1] + EO[1];
568	E[2] = EE[1] - EO[1];
569	for (k=0;k<4;k++)
570	{
571	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
572	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
573	}
574	src ++;
575	dst += 8;
576	}
577	}
578
579
580	void partialButterfly16(short src,short dst,int shift, int line)
581	{
582	int j,k;
583	int E[8],O[8];
584	int EE[4],EO[4];
585	int EEE[2],EEO[2];
586	int add = 1<<(shift-1);
587
588	for (j=0; j<line; j++)
589	{
590	/* E and O*/
591	for (k=0;k<8;k++)
592	{
593	E[k] = src[k] + src[15-k];
594	O[k] = src[k] - src[15-k];
595	}
596	/* EE and EO */
597	for (k=0;k<4;k++)
598	{
599	EE[k] = E[k] + E[7-k];
600	EO[k] = E[k] - E[7-k];
601	}
602	/* EEE and EEO */
603	EEE[0] = EE[0] + EE[3];
604	EEO[0] = EE[0] - EE[3];
605	EEE[1] = EE[1] + EE[2];
606	EEO[1] = EE[1] - EE[2];
607
608	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
609	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
610	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
611	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
612
613	for (k=2;k<16;k+=4)
614	{
615	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
616	}
617
618	for (k=1;k<16;k+=2)
619	{
620	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
621	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
622	}
623
624	src += 16;
625	dst ++;
626
627	}
628	}
629
630
631	void partialButterflyInverse16(short src,short dst,int shift, int line)
632	{
633	int j,k;
634	int E[8],O[8];
635	int EE[4],EO[4];
636	int EEE[2],EEO[2];
637	int add = 1<<(shift-1);
638
639	for (j=0; j<line; j++)
640	{
641	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
642	for (k=0;k<8;k++)
643	{
644	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
645	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
646	}
647	for (k=0;k<4;k++)
648	{
649	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
650	}
651	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
652	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
653	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
654	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
655
656	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
657	for (k=0;k<2;k++)
658	{
659	EE[k] = EEE[k] + EEO[k];
660	EE[k+2] = EEE[1-k] - EEO[1-k];
661	}
662	for (k=0;k<4;k++)
663	{
664	E[k] = EE[k] + EO[k];
665	E[k+4] = EE[3-k] - EO[3-k];
666	}
667	for (k=0;k<8;k++)
668	{
669	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
670	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
671	}
672	src ++;
673	dst += 16;
674	}
675	}
676
677
678	void partialButterfly32(short src,short dst,int shift, int line)
679	{
680	int j,k;
681	int E[16],O[16];
682	int EE[8],EO[8];
683	int EEE[4],EEO[4];
684	int EEEE[2],EEEO[2];
685	int add = 1<<(shift-1);
686
687	for (j=0; j<line; j++)
688	{
689	/* E and O*/
690	for (k=0;k<16;k++)
691	{
692	E[k] = src[k] + src[31-k];
693	O[k] = src[k] - src[31-k];
694	}
695	/* EE and EO */
696	for (k=0;k<8;k++)
697	{
698	EE[k] = E[k] + E[15-k];
699	EO[k] = E[k] - E[15-k];
700	}
701	/* EEE and EEO */
702	for (k=0;k<4;k++)
703	{
704	EEE[k] = EE[k] + EE[7-k];
705	EEO[k] = EE[k] - EE[7-k];
706	}
707	/* EEEE and EEEO */
708	EEEE[0] = EEE[0] + EEE[3];
709	EEEO[0] = EEE[0] - EEE[3];
710	EEEE[1] = EEE[1] + EEE[2];
711	EEEO[1] = EEE[1] - EEE[2];
712
713	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
714	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
715	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
716	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
717	for (k=4;k<32;k+=8)
718	{
719	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
720	}
721	for (k=2;k<32;k+=4)
722	{
723	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
724	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
725	}
726	for (k=1;k<32;k+=2)
727	{
728	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
729	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
730	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
731	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
732	}
733	src += 32;
734	dst ++;
735	}
736	}
737
738
739	void partialButterflyInverse32(short src,short dst,int shift, int line)
740	{
741	int j,k;
742	int E[16],O[16];
743	int EE[8],EO[8];
744	int EEE[4],EEO[4];
745	int EEEE[2],EEEO[2];
746	int add = 1<<(shift-1);
747
748	for (j=0; j<line; j++)
749	{
750	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
751	for (k=0;k<16;k++)
752	{
753	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
754	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
755	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
756	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
757	}
758	for (k=0;k<8;k++)
759	{
760	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
761	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
762	}
763	for (k=0;k<4;k++)
764	{
765	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
766	}
767	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
768	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
769	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
770	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
771
772	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
773	EEE[0] = EEEE[0] + EEEO[0];
774	EEE[3] = EEEE[0] - EEEO[0];
775	EEE[1] = EEEE[1] + EEEO[1];
776	EEE[2] = EEEE[1] - EEEO[1];
777	for (k=0;k<4;k++)
778	{
779	EE[k] = EEE[k] + EEO[k];
780	EE[k+4] = EEE[3-k] - EEO[3-k];
781	}
782	for (k=0;k<8;k++)
783	{
784	E[k] = EE[k] + EO[k];
785	E[k+8] = EE[7-k] - EO[7-k];
786	}
787	for (k=0;k<16;k++)
788	{
789	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
790	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
791	}
792	src ++;
793	dst += 32;
794	}
795	}
796
797	/** MxN forward transform (2D)
798	* \param block input data (residual)
799	* \param coeff output data (transform coefficients)
800	* \param iWidth input data (width of transform)
801	* \param iHeight input data (height of transform)
802	*/
803	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
804	{
805	#if FULL_NBIT
806	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
807	#else
808	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
809	#endif
810	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
811
812	short tmp[ 64 * 64 ];
813
814	#if !REMOVE_NSQT
815	if( iWidth == 16 && iHeight == 4)
816	{
817	partialButterfly16( block, tmp, shift_1st, iHeight );
818	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
819	}
820	else if( iWidth == 32 && iHeight == 8 )
821	{
822	partialButterfly32( block, tmp, shift_1st, iHeight );
823	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
824	}
825	else if( iWidth == 4 && iHeight == 16)
826	{
827	partialButterfly4( block, tmp, shift_1st, iHeight );
828	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
829	}
830	else if( iWidth == 8 && iHeight == 32 )
831	{
832	partialButterfly8( block, tmp, shift_1st, iHeight );
833	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
834	}
835	else
836	#endif
837	if( iWidth == 4 && iHeight == 4)
838	{
839	#if INTRA_TRANS_SIMP
840	if (uiMode != REG_DCT)
841	{
842	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
843	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
844	}
845	else
846	{
847	partialButterfly4(block, tmp, shift_1st, iHeight);
848	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
849	}
850
851	#else
852	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
853	{
854	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
855	}
856	else
857	{
858	partialButterfly4(block, tmp, shift_1st, iHeight);
859	}
860	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
861	{
862	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
863	}
864	else
865	{
866	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
867	}
868	#endif
869	}
870	else if( iWidth == 8 && iHeight == 8)
871	{
872	partialButterfly8( block, tmp, shift_1st, iHeight );
873	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
874	}
875	else if( iWidth == 16 && iHeight == 16)
876	{
877	partialButterfly16( block, tmp, shift_1st, iHeight );
878	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
879	}
880	else if( iWidth == 32 && iHeight == 32)
881	{
882	partialButterfly32( block, tmp, shift_1st, iHeight );
883	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
884	}
885	}
886	/** MxN inverse transform (2D)
887	* \param coeff input data (transform coefficients)
888	* \param block output data (residual)
889	* \param iWidth input data (width of transform)
890	* \param iHeight input data (height of transform)
891	*/
892	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
893	{
894	int shift_1st = SHIFT_INV_1ST;
895	#if FULL_NBIT
896	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
897	#else
898	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
899	#endif
900
901	short tmp[ 64*64];
902	#if !REMOVE_NSQT
903	if( iWidth == 16 && iHeight == 4)
904	{
905	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
906	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
907	}
908	else if( iWidth == 32 && iHeight == 8)
909	{
910	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
911	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
912	}
913	else if( iWidth == 4 && iHeight == 16)
914	{
915	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
916	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
917	}
918	else if( iWidth == 8 && iHeight == 32)
919	{
920	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
921	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
922	}
923	else
924	#endif
925	if( iWidth == 4 && iHeight == 4)
926	{
927	#if INTRA_TRANS_SIMP
928	if (uiMode != REG_DCT)
929	{
930	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
931	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
932	}
933	else
934	{
935	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
936	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
937	}
938	#else
939	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
940	{
941	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
942	}
943	else
944	{
945	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
946	}
947	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
948	{
949	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
950	}
951	else
952	{
953	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
954	}
955	#endif
956	}
957	else if( iWidth == 8 && iHeight == 8)
958	{
959	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
960	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
961	}
962	else if( iWidth == 16 && iHeight == 16)
963	{
964	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
965	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
966	}
967	else if( iWidth == 32 && iHeight == 32)
968	{
969	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
970	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
971	}
972	}
973
974	#endif //MATRIX_MULT
975
976	// To minimize the distortion only. No rate is considered.
977	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
978	{
979	Int lastCG = -1;
980	Int absSum = 0 ;
981	Int n ;
982
983	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
984	{
985	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
986	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
987	absSum = 0 ;
988
989	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
990	{
991	if( pQCoef[ scan[ n + subPos ]] )
992	{
993	lastNZPosInCG = n;
994	break;
995	}
996	}
997
998	for(n = 0; n <SCAN_SET_SIZE; n++ )
999	{
1000	if( pQCoef[ scan[ n + subPos ]] )
1001	{
1002	firstNZPosInCG = n;
1003	break;
1004	}
1005	}
1006
1007	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1008	{
1009	absSum += pQCoef[ scan[ n + subPos ]];
1010	}
1011
1012	if(lastNZPosInCG>=0 && lastCG==-1)
1013	{
1014	lastCG = 1 ;
1015	}
1016
1017	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1018	{
1019	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
1020	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1021	{
1022	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
1023
1024	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1025	{
1026	UInt blkPos = scan[ n+subPos ];
1027	if(pQCoef[ blkPos ] != 0 )
1028	{
1029	if(deltaU[blkPos]>0)
1030	{
1031	curCost = - deltaU[blkPos];
1032	curChange=1 ;
1033	}
1034	else
1035	{
1036	//curChange =-1;
1037	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1038	{
1039	curCost=MAX_INT ;
1040	}
1041	else
1042	{
1043	curCost = deltaU[blkPos];
1044	curChange =-1;
1045	}
1046	}
1047	}
1048	else
1049	{
1050	if(n<firstNZPosInCG)
1051	{
1052	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1053	if(thisSignBit != signbit )
1054	{
1055	curCost = MAX_INT;
1056	}
1057	else
1058	{
1059	curCost = - (deltaU[blkPos]) ;
1060	curChange = 1 ;
1061	}
1062	}
1063	else
1064	{
1065	curCost = - (deltaU[blkPos]) ;
1066	curChange = 1 ;
1067	}
1068	}
1069
1070	if( curCost<minCostInc)
1071	{
1072	minCostInc = curCost ;
1073	finalChange = curChange ;
1074	minPos = blkPos ;
1075	}
1076	} //CG loop
1077
1078	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
1079	{
1080	finalChange = -1;
1081	}
1082
1083	if(pCoef[minPos]>=0)
1084	{
1085	pQCoef[minPos] += finalChange ;
1086	}
1087	else
1088	{
1089	pQCoef[minPos] -= finalChange ;
1090	}
1091	} // Hide
1092	}
1093	if(lastCG==1)
1094	{
1095	lastCG=0 ;
1096	}
1097	} // TU loop
1098
1099	return;
1100	}
1101
1102	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1103	Int* pSrc,
1104	TCoeff* pDes,
1105	#if ADAPTIVE_QP_SELECTION
1106	Int*& pArlDes,
1107	#endif
1108	Int iWidth,
1109	Int iHeight,
1110	UInt& uiAcSum,
1111	TextType eTType,
1112	UInt uiAbsPartIdx )
1113	{
1114	Int* piCoef = pSrc;
1115	TCoeff* piQCoef = pDes;
1116	#if ADAPTIVE_QP_SELECTION
1117	Int* piArlCCoef = pArlDes;
1118	#endif
1119	Int iAdd = 0;
1120
1121	Bool useRDOQForTransformSkip = !(m_useTransformSkipFast && pcCU->getTransformSkip(uiAbsPartIdx,eTType));
1122	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) && useRDOQForTransformSkip)
1123	{
1124	#if ADAPTIVE_QP_SELECTION
1125	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1126	#else
1127	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1128	#endif
1129	}
1130	else
1131	{
1132	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1133
1134	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1135	if (scanIdx == SCAN_ZIGZAG)
1136	{
1137	scanIdx = SCAN_DIAG;
1138	}
1139
1140	#if REMOVE_NSQT
1141	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1142	#else
1143	if (iWidth != iHeight)
1144	{
1145	scanIdx = SCAN_DIAG;
1146	}
1147
1148	const UInt * scan;
1149	if (iWidth == iHeight)
1150	{
1151	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1152	}
1153	else
1154	{
1155	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
1156	}
1157	#endif
1158
1159	Int deltaU[32*32] ;
1160
1161	#if ADAPTIVE_QP_SELECTION
1162	QpParam cQpBase;
1163	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1164
1165	Int qpScaled;
1166	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1167
1168	if(eTType == TEXT_LUMA)
1169	{
1170	qpScaled = iQpBase + qpBDOffset;
1171	}
1172	else
1173	{
1174	#if CHROMA_QP_EXTENSION
1175	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1176	#else
1177	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
1178	#endif
1179
1180	if(qpScaled < 0)
1181	{
1182	qpScaled = qpScaled + qpBDOffset;
1183	}
1184	else
1185	{
1186	#if CHROMA_QP_EXTENSION
1187	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1188	#else
1189	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
1190	#endif
1191	}
1192	}
1193	cQpBase.setQpParam(qpScaled);
1194	#endif
1195
1196	#if !REMOVE_NSQT
1197	Bool bNonSqureFlag = ( iWidth != iHeight );
1198	#endif
1199	UInt dir = SCALING_LIST_SQT;
1200	#if !REMOVE_NSQT
1201	if( bNonSqureFlag )
1202	{
1203	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1204	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1205	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1206	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1207	iHeight = iWidth;
1208	}
1209	#endif
1210
1211	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1212	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1213	assert(scalingListType < 6);
1214	Int *piQuantCoeff = 0;
1215	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
1216
1217	#if FULL_NBIT
1218	UInt uiBitDepth = g_uiBitDepth;
1219	#else
1220	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1221	#endif
1222	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1223
1224	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1225
1226	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1227
1228	#if ADAPTIVE_QP_SELECTION
1229	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1230	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1231	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1232	Int iAddC = 1 << (iQBitsC-1);
1233	#endif
1234
1235	Int qBits8 = iQBits-8;
1236	for( Int n = 0; n < iWidth*iHeight; n++ )
1237	{
1238	Int iLevel;
1239	Int iSign;
1240	UInt uiBlockPos = n;
1241	iLevel = piCoef[uiBlockPos];
1242	iSign = (iLevel < 0 ? -1: 1);
1243
1244	#if ADAPTIVE_QP_SELECTION
1245	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1246	if( m_bUseAdaptQpSelect )
1247	{
1248	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1249	}
1250	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1251	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1252	#else
1253	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1254	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1255	#endif
1256	uiAcSum += iLevel;
1257	iLevel *= iSign;
1258	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1259	} // for n
1260	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1261	{
1262	if(uiAcSum>=2)
1263	{
1264	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1265	}
1266	}
1267	} //if RDOQ
1268	//return;
1269
1270	}
1271
1272	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1273	{
1274
1275	const TCoeff* piQCoef = pSrc;
1276	Int* piCoef = pDes;
1277	UInt dir = SCALING_LIST_SQT;
1278	#if !REMOVE_NSQT
1279	if( iWidth != iHeight )
1280	{
1281	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1282	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1283	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1284	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1285	iHeight = iWidth;
1286	}
1287	#endif
1288
1289	if ( iWidth > (Int)m_uiMaxTrSize )
1290	{
1291	iWidth = m_uiMaxTrSize;
1292	iHeight = m_uiMaxTrSize;
1293	}
1294
1295	Int iShift,iAdd,iCoeffQ;
1296	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1297
1298	#if FULL_NBIT
1299	UInt uiBitDepth = g_uiBitDepth;
1300	#else
1301	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1302	#endif
1303	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1304
1305	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1306
1307	TCoeff clipQCoef;
1308	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
1309	const Int levelLimit = 1 << bitRange;
1310
1311	if(getUseScalingList())
1312	{
1313	iShift += 4;
1314	if(iShift > m_cQP.m_iPer)
1315	{
1316	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1317	}
1318	else
1319	{
1320	iAdd = 0;
1321	}
1322	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1323
1324	if(iShift > m_cQP.m_iPer)
1325	{
1326	for( Int n = 0; n < iWidth*iHeight; n++ )
1327	{
1328	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1329	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1330	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1331	}
1332	}
1333	else
1334	{
1335	for( Int n = 0; n < iWidth*iHeight; n++ )
1336	{
1337	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1338	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1339	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1340	}
1341	}
1342	}
1343	else
1344	{
1345	iAdd = 1 << (iShift-1);
1346	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1347
1348	for( Int n = 0; n < iWidth*iHeight; n++ )
1349	{
1350	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1351	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1352	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1353	}
1354	}
1355	}
1356
1357	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1358	Bool bUseRDOQ, Bool bEnc, Bool useTransformSkipFast
1359	#if ADAPTIVE_QP_SELECTION
1360	, Bool bUseAdaptQpSelect
1361	#endif
1362	)
1363	{
1364	m_uiMaxTrSize = uiMaxTrSize;
1365	m_bEnc = bEnc;
1366	m_bUseRDOQ = bUseRDOQ;
1367	#if ADAPTIVE_QP_SELECTION
1368	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1369	#endif
1370	m_useTransformSkipFast = useTransformSkipFast;
1371	}
1372
1373	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1374	Pel* pcResidual,
1375	UInt uiStride,
1376	TCoeff* rpcCoeff,
1377	#if ADAPTIVE_QP_SELECTION
1378	Int*& rpcArlCoeff,
1379	#endif
1380	UInt uiWidth,
1381	UInt uiHeight,
1382	UInt& uiAbsSum,
1383	TextType eTType,
1384	UInt uiAbsPartIdx,
1385	Bool useTransformSkip
1386	)
1387	{
1388	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1389	{
1390	uiAbsSum=0;
1391	for (UInt k = 0; k<uiHeight; k++)
1392	{
1393	for (UInt j = 0; j<uiWidth; j++)
1394	{
1395	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1396	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1397	}
1398	}
1399	return;
1400	}
1401	UInt uiMode; //luma intra pred
1402	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1403	{
1404	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1405	}
1406	else
1407	{
1408	uiMode = REG_DCT;
1409	}
1410
1411	uiAbsSum = 0;
1412	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1413	if(useTransformSkip)
1414	{
1415	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1416	}
1417	else
1418	{
1419	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1420	}
1421	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1422	#if ADAPTIVE_QP_SELECTION
1423	rpcArlCoeff,
1424	#endif
1425	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1426	}
1427
1428	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1429	{
1430	if(transQuantBypass)
1431	{
1432	for (UInt k = 0; k<uiHeight; k++)
1433	{
1434	for (UInt j = 0; j<uiWidth; j++)
1435	{
1436	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1437	}
1438	}
1439	return;
1440	}
1441	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1442	if(useTransformSkip == true)
1443	{
1444	xITransformSkip( m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1445	}
1446	else
1447	{
1448	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1449	}
1450	}
1451
1452	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1453	{
1454	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1455	{
1456	return;
1457	}
1458
1459	UInt uiLumaTrMode, uiChromaTrMode;
1460	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
1461	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
1462
1463	if( uiTrMode == uiStopTrMode )
1464	{
1465	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1466	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1467	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1468	{
1469	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1470	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1471	{
1472	return;
1473	}
1474	uiWidth <<= 1;
1475	uiHeight <<= 1;
1476	}
1477	Pel* pResi = rpcResidual + uiAddr;
1478	#if !REMOVE_NSQT
1479	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
1480	{
1481	Int trWidth = uiWidth;
1482	Int trHeight = uiHeight;
1483	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1484
1485	uiWidth = trWidth;
1486	uiHeight = trHeight;
1487	}
1488	#endif
1489	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1490	assert(scalingListType < 6);
1491	#if INTER_TRANSFORMSKIP
1492	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1493	#else
1494	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
1495	#endif
1496	}
1497	else
1498	{
1499	uiTrMode++;
1500	uiWidth >>= 1;
1501	uiHeight >>= 1;
1502	Int trWidth = uiWidth, trHeight = uiHeight;
1503	#if !REMOVE_NSQT
1504	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
1505	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1506	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
1507	#endif
1508	UInt uiAddrOffset = trHeight * uiStride;
1509	UInt uiCoefOffset = trWidth * trHeight;
1510	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1511	#if !REMOVE_NSQT
1512	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
1513	if( uiInterTUSplitDirection != 2 )
1514	{
1515	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1516	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1517	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1518	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1519	}
1520	else
1521	#endif
1522	{
1523	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1524	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1525	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1526	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1527	}
1528	}
1529	}
1530
1531	// ------------------------------------------------------------------------------------------------
1532	// Logical transform
1533	// ------------------------------------------------------------------------------------------------
1534
1535	/** Wrapper function between HM interface and core NxN forward transform (2D)
1536	* \param piBlkResi input data (residual)
1537	* \param psCoeff output data (transform coefficients)
1538	* \param uiStride stride of input residual data
1539	* \param iSize transform size (iSize x iSize)
1540	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1541	*/
1542	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1543	{
1544	#if MATRIX_MULT
1545	Int iSize = iWidth;
1546	#if !REMOVE_NSQT
1547	if( iWidth != iHeight)
1548	{
1549	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
1550	return;
1551	}
1552	#endif
1553	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1554	#else
1555	Int j;
1556	{
1557	short block[ 64 * 64 ];
1558	short coeff[ 64 * 64 ];
1559	{
1560	for (j = 0; j < iHeight; j++)
1561	{
1562	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
1563	}
1564	}
1565	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
1566	for ( j = 0; j < iHeight * iWidth; j++ )
1567	{
1568	psCoeff[ j ] = coeff[ j ];
1569	}
1570	return ;
1571	}
1572	#endif
1573	}
1574
1575
1576	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1577	* \param plCoef input data (transform coefficients)
1578	* \param pResidual output data (residual)
1579	* \param uiStride stride of input residual data
1580	* \param iSize transform size (iSize x iSize)
1581	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1582	*/
1583	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1584	{
1585	#if MATRIX_MULT
1586	Int iSize = iWidth;
1587	#if !REMOVE_NSQT
1588	if( iWidth != iHeight )
1589	{
1590	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
1591	return;
1592	}
1593	#endif
1594	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1595	#else
1596	Int j;
1597	{
1598	short block[ 64 * 64 ];
1599	short coeff[ 64 * 64 ];
1600	for ( j = 0; j < iHeight * iWidth; j++ )
1601	{
1602	coeff[j] = (short)plCoef[j];
1603	}
1604	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
1605	{
1606	for ( j = 0; j < iHeight; j++ )
1607	{
1608	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
1609	}
1610	}
1611	return ;
1612	}
1613	#endif
1614	}
1615
1616	/** Wrapper function between HM interface and core 4x4 transform skipping
1617	* \param piBlkResi input data (residual)
1618	* \param psCoeff output data (transform coefficients)
1619	* \param uiStride stride of input residual data
1620	* \param iSize transform size (iSize x iSize)
1621	*/
1622	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1623	{
1624	assert( width == height );
1625	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1626	#if FULL_NBIT
1627	UInt uiBitDepth = g_uiBitDepth;
1628	#else
1629	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1630	#endif
1631	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1632	UInt transformSkipShift;
1633	Int j,k;
1634	if(shift >= 0)
1635	{
1636	transformSkipShift = shift;
1637	for (j = 0; j < height; j++)
1638	{
1639	for(k = 0; k < width; k ++)
1640	{
1641	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1642	}
1643	}
1644	}
1645	else
1646	{
1647	//The case when uiBitDepth > 13
1648	Int offset;
1649	transformSkipShift = -shift;
1650	offset = (1 << (transformSkipShift - 1));
1651	for (j = 0; j < height; j++)
1652	{
1653	for(k = 0; k < width; k ++)
1654	{
1655	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1656	}
1657	}
1658	}
1659	}
1660
1661	/** Wrapper function between HM interface and core NxN transform skipping
1662	* \param plCoef input data (coefficients)
1663	* \param pResidual output data (residual)
1664	* \param uiStride stride of input residual data
1665	* \param iSize transform size (iSize x iSize)
1666	*/
1667	Void TComTrQuant::xITransformSkip( Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1668	{
1669	assert( width == height );
1670	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1671	#if FULL_NBIT
1672	UInt uiBitDepth = g_uiBitDepth;
1673	#else
1674	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1675	#endif
1676	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1677	UInt transformSkipShift;
1678	Int j,k;
1679	if(shift > 0)
1680	{
1681	Int offset;
1682	transformSkipShift = shift;
1683	offset = (1 << (transformSkipShift -1));
1684	for ( j = 0; j < height; j++ )
1685	{
1686	for(k = 0; k < width; k ++)
1687	{
1688	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1689	}
1690	}
1691	}
1692	else
1693	{
1694	//The case when uiBitDepth >= 13
1695	transformSkipShift = - shift;
1696	for ( j = 0; j < height; j++ )
1697	{
1698	for(k = 0; k < width; k ++)
1699	{
1700	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1701	}
1702	}
1703	}
1704	}
1705
1706	/** RDOQ with CABAC
1707	* \param pcCU pointer to coding unit structure
1708	* \param plSrcCoeff pointer to input buffer
1709	* \param piDstCoeff reference to pointer to output buffer
1710	* \param uiWidth block width
1711	* \param uiHeight block height
1712	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1713	* \param eTType plane type / luminance or chrominance
1714	* \param uiAbsPartIdx absolute partition index
1715	* \returns Void
1716	* Rate distortion optimized quantization for entropy
1717	* coding engines using probability models like CABAC
1718	*/
1719	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1720	Int* plSrcCoeff,
1721	TCoeff* piDstCoeff,
1722	#if ADAPTIVE_QP_SELECTION
1723	Int*& piArlDstCoeff,
1724	#endif
1725	UInt uiWidth,
1726	UInt uiHeight,
1727	UInt& uiAbsSum,
1728	TextType eTType,
1729	UInt uiAbsPartIdx )
1730	{
1731	Int iQBits = m_cQP.m_iBits;
1732	Double dTemp = 0;
1733	UInt dir = SCALING_LIST_SQT;
1734	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1735	Int uiQ = g_quantScales[m_cQP.rem()];
1736	#if !REMOVE_NSQT
1737	if (uiWidth != uiHeight)
1738	{
1739	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
1740	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1741	}
1742	#endif
1743
1744	#if FULL_NBIT
1745	UInt uiBitDepth = g_uiBitDepth;
1746	#else
1747	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1748	#endif
1749	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1750	UInt uiGoRiceParam = 0;
1751	Double d64BlockUncodedCost = 0;
1752	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1753	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1754	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1755	assert(scalingListType < 6);
1756
1757	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1758	double dErrScale = 0;
1759	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
1760	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1761	Int *piQCoef = piQCoefOrg;
1762	double *pdErrScale = pdErrScaleOrg;
1763	#if ADAPTIVE_QP_SELECTION
1764	Int iQBitsC = iQBits - ARL_C_PRECISION;
1765	Int iAddC = 1 << (iQBitsC-1);
1766	#endif
1767	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1768	if (uiScanIdx == SCAN_ZIGZAG)
1769	{
1770	// Map value zigzag to diagonal scan
1771	uiScanIdx = SCAN_DIAG;
1772	}
1773	Int blockType = uiLog2BlkSize;
1774	#if !REMOVE_NSQT
1775	if (uiWidth != uiHeight)
1776	{
1777	uiScanIdx = SCAN_DIAG;
1778	blockType = 4;
1779	}
1780	#endif
1781
1782	#if ADAPTIVE_QP_SELECTION
1783	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1784	#endif
1785
1786	Double pdCostCoeff [ 32 * 32 ];
1787	Double pdCostSig [ 32 * 32 ];
1788	Double pdCostCoeff0[ 32 * 32 ];
1789	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1790	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1791	Int rateIncUp [ 32 * 32 ];
1792	Int rateIncDown [ 32 * 32 ];
1793	Int sigRateDelta[ 32 * 32 ];
1794	Int deltaU [ 32 * 32 ];
1795	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1796	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1797	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1798	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1799
1800	const UInt * scanCG;
1801	#if !REMOVE_NSQT
1802	if (uiWidth == uiHeight)
1803	#endif
1804	{
1805	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1806	if( uiLog2BlkSize == 3 )
1807	{
1808	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1809	}
1810	else if( uiLog2BlkSize == 5 )
1811	{
1812	scanCG = g_sigLastScanCG32x32;
1813	}
1814	}
1815	#if !REMOVE_NSQT
1816	else
1817	{
1818	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
1819	}
1820	#endif
1821	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1822	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1823	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1824	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1825	Int iCGLastScanPos = -1;
1826
1827	UInt uiCtxSet = 0;
1828	Int c1 = 1;
1829	Int c2 = 0;
1830	#if !REMOVE_NUM_GREATER1
1831	UInt uiNumOne = 0;
1832	#endif
1833	Double d64BaseCost = 0;
1834	Int iLastScanPos = -1;
1835	dTemp = dErrScale;
1836
1837	UInt c1Idx = 0;
1838	UInt c2Idx = 0;
1839	Int baseLevel;
1840
1841	#if REMOVE_NSQT
1842	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1843	#else
1844	const UInt * scan;
1845	if (uiWidth == uiHeight)
1846	{
1847	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1848	}
1849	else
1850	{
1851	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
1852	}
1853	#endif
1854
1855	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1856	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1857
1858	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1859	Int iScanPos;
1860	coeffGroupRDStats rdStats;
1861
1862	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1863	{
1864	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1865	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1866	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1867	#if !REMOVAL_8x2_2x8_CG
1868	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
1869	{
1870	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
1871	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
1872	}
1873	#endif
1874	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1875
1876	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1877	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1878	{
1879	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1880	//===== quantization =====
1881	UInt uiBlkPos = scan[iScanPos];
1882	// set coeff
1883	uiQ = piQCoef[uiBlkPos];
1884	dTemp = pdErrScale[uiBlkPos];
1885	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1886	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1887	#if ADAPTIVE_QP_SELECTION
1888	if( m_bUseAdaptQpSelect )
1889	{
1890	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1891	}
1892	#endif
1893	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1894
1895	Double dErr = Double( lLevelDouble );
1896	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1897	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1898	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1899
1900	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1901	{
1902	iLastScanPos = iScanPos;
1903	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1904	iCGLastScanPos = iCGScanPos;
1905	}
1906
1907	if ( iLastScanPos >= 0 )
1908	{
1909	//===== coefficient level estimation =====
1910	UInt uiLevel;
1911	UInt uiOneCtx = 4 * uiCtxSet + c1;
1912	UInt uiAbsCtx = uiCtxSet + c2;
1913
1914	if( iScanPos == iLastScanPos )
1915	{
1916	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1917	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1918	c1Idx, c2Idx, iQBits, dTemp, 1 );
1919	}
1920	else
1921	{
1922	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1923	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1924	#if REMOVAL_8x2_2x8_CG
1925	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1926	#else
1927	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1928	#endif
1929	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1930	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1931	c1Idx, c2Idx, iQBits, dTemp, 0 );
1932	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1933	}
1934	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1935	if( uiLevel > 0 )
1936	{
1937	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1938	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1939	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1940	}
1941	else // uiLevel == 0
1942	{
1943	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1944	}
1945	piDstCoeff[ uiBlkPos ] = uiLevel;
1946	d64BaseCost += pdCostCoeff [ iScanPos ];
1947
1948
1949	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1950	if( uiLevel >= baseLevel )
1951	{
1952	if(uiLevel > 3*(1<<uiGoRiceParam))
1953	{
1954	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1955	}
1956	}
1957	if ( uiLevel >= 1)
1958	{
1959	c1Idx ++;
1960	}
1961
1962	//===== update bin model =====
1963	if( uiLevel > 1 )
1964	{
1965	c1 = 0;
1966	c2 += (c2 < 2);
1967	#if !REMOVE_NUM_GREATER1
1968	uiNumOne++;
1969	#endif
1970	c2Idx ++;
1971	}
1972	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1973	{
1974	c1++;
1975	}
1976
1977	//===== context set update =====
1978	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1979	{
1980	#if !REMOVE_NUM_GREATER1
1981	c1 = 1;
1982	#endif
1983	c2 = 0;
1984	uiGoRiceParam = 0;
1985
1986	c1Idx = 0;
1987	c2Idx = 0;
1988	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1989	#if REMOVE_NUM_GREATER1
1990	if( c1 == 0 )
1991	#else
1992	if( uiNumOne > 0 )
1993	#endif
1994	{
1995	uiCtxSet++;
1996	}
1997	#if REMOVE_NUM_GREATER1
1998	c1 = 1;
1999	#else
2000	uiNumOne >>= 1;
2001	#endif
2002	}
2003	}
2004	else
2005	{
2006	d64BaseCost += pdCostCoeff0[ iScanPos ];
2007	}
2008	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2009	if (iScanPosinCG == 0 )
2010	{
2011	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2012	}
2013	if (piDstCoeff[ uiBlkPos ] )
2014	{
2015	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2016	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2017	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2018	if ( iScanPosinCG != 0 )
2019	{
2020	rdStats.iNNZbeforePos0++;
2021	}
2022	}
2023	} //end for (iScanPosinCG)
2024
2025	if (iCGLastScanPos >= 0)
2026	{
2027	if( iCGScanPos )
2028	{
2029	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2030	{
2031	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2032	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2033	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2034	}
2035	else
2036	{
2037	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2038	{
2039	if ( rdStats.iNNZbeforePos0 == 0 )
2040	{
2041	d64BaseCost -= rdStats.d64SigCost_0;
2042	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2043	}
2044	// rd-cost if SigCoeffGroupFlag = 0, initialization
2045	Double d64CostZeroCG = d64BaseCost;
2046
2047	// add SigCoeffGroupFlag cost to total cost
2048	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2049	if (iCGScanPos < iCGLastScanPos)
2050	{
2051	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2052	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2053	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2054	}
2055
2056	// try to convert the current coeff group from non-zero to all-zero
2057	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2058	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2059	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2060
2061	// if we can save cost, change this block to all-zero block
2062	if ( d64CostZeroCG < d64BaseCost )
2063	{
2064	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2065	d64BaseCost = d64CostZeroCG;
2066	if (iCGScanPos < iCGLastScanPos)
2067	{
2068	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2069	}
2070	// reset coeffs to 0 in this block
2071	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2072	{
2073	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2074	UInt uiBlkPos = scan[ iScanPos ];
2075
2076	if (piDstCoeff[ uiBlkPos ])
2077	{
2078	piDstCoeff [ uiBlkPos ] = 0;
2079	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2080	pdCostSig [ iScanPos ] = 0;
2081	}
2082	}
2083	} // end if ( d64CostAllZeros < d64BaseCost )
2084	}
2085	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2086	}
2087	else
2088	{
2089	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2090	}
2091	}
2092	} //end for (iCGScanPos)
2093
2094	//===== estimate last position =====
2095	if ( iLastScanPos < 0 )
2096	{
2097	return;
2098	}
2099
2100	Double d64BestCost = 0;
2101	Int ui16CtxCbf = 0;
2102	Int iBestLastIdxP1 = 0;
2103	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2104	{
2105	ui16CtxCbf = 0;
2106	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2107	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2108	}
2109	else
2110	{
2111	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
2112	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
2113	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2114	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2115	}
2116
2117	Bool bFoundLast = false;
2118	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2119	{
2120	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
2121
2122	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2123	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2124	{
2125	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2126	{
2127	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2128	if (iScanPos > iLastScanPos) continue;
2129	UInt uiBlkPos = scan[iScanPos];
2130
2131	if( piDstCoeff[ uiBlkPos ] )
2132	{
2133	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
2134	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
2135
2136	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
2137	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2138
2139	if( totalCost < d64BestCost )
2140	{
2141	iBestLastIdxP1 = iScanPos + 1;
2142	d64BestCost = totalCost;
2143	}
2144	if( piDstCoeff[ uiBlkPos ] > 1 )
2145	{
2146	bFoundLast = true;
2147	break;
2148	}
2149	d64BaseCost -= pdCostCoeff[ iScanPos ];
2150	d64BaseCost += pdCostCoeff0[ iScanPos ];
2151	}
2152	else
2153	{
2154	d64BaseCost -= pdCostSig[ iScanPos ];
2155	}
2156	} //end for
2157	if (bFoundLast)
2158	{
2159	break;
2160	}
2161	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2162	} // end for
2163
2164	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2165	{
2166	Int blkPos = scan[ scanPos ];
2167	Int level = piDstCoeff[ blkPos ];
2168	uiAbsSum += level;
2169	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2170	}
2171
2172	//===== clean uncoded coefficients =====
2173	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2174	{
2175	piDstCoeff[ scan[ scanPos ] ] = 0;
2176	}
2177
2178	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2179	{
2180	Int64 rdFactor = (Int64)((Double)(g_invQuantScales[m_cQP.rem()])(Double)(g_invQuantScales[m_cQP.rem()])(Double)(1<<(2m_cQP.m_iPer))/m_dLambda/16/(Double)(1<<(2g_uiBitIncrement)) + 0.5);
2181	Int lastCG = -1;
2182	Int absSum = 0 ;
2183	Int n ;
2184
2185	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
2186	{
2187	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
2188	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
2189	absSum = 0 ;
2190
2191	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
2192	{
2193	if( piDstCoeff[ scan[ n + subPos ]] )
2194	{
2195	lastNZPosInCG = n;
2196	break;
2197	}
2198	}
2199
2200	for(n = 0; n <SCAN_SET_SIZE; n++ )
2201	{
2202	if( piDstCoeff[ scan[ n + subPos ]] )
2203	{
2204	firstNZPosInCG = n;
2205	break;
2206	}
2207	}
2208
2209	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2210	{
2211	absSum += piDstCoeff[ scan[ n + subPos ]];
2212	}
2213
2214	if(lastNZPosInCG>=0 && lastCG==-1)
2215	{
2216	lastCG = 1;
2217	}
2218
2219	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2220	{
2221	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
2222	if( signbit!=(absSum&0x1) ) // hide but need tune
2223	{
2224	// calculate the cost
2225	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
2226	Int minPos =-1, finalChange=0, curChange=0;
2227
2228	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
2229	{
2230	UInt uiBlkPos = scan[ n + subPos ];
2231	if(piDstCoeff[ uiBlkPos ] != 0 )
2232	{
2233	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
2234	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2235	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
2236
2237	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2238	{
2239	costDown -= (4<<15) ;
2240	}
2241
2242	if(costUp<costDown)
2243	{
2244	curCost = costUp;
2245	curChange = 1 ;
2246	}
2247	else
2248	{
2249	curChange = -1 ;
2250	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2251	{
2252	curCost = MAX_INT64 ;
2253	}
2254	else
2255	{
2256	curCost = costDown ;
2257	}
2258	}
2259	}
2260	else
2261	{
2262	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2263	curChange = 1 ;
2264
2265	if(n<firstNZPosInCG)
2266	{
2267	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2268	if(thissignbit != signbit )
2269	{
2270	curCost = MAX_INT64;
2271	}
2272	}
2273	}
2274
2275	if( curCost<minCostInc)
2276	{
2277	minCostInc = curCost ;
2278	finalChange = curChange ;
2279	minPos = uiBlkPos ;
2280	}
2281	}
2282
2283	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2284	{
2285	finalChange = -1;
2286	}
2287
2288	if(plSrcCoeff[minPos]>=0)
2289	{
2290	piDstCoeff[minPos] += finalChange ;
2291	}
2292	else
2293	{
2294	piDstCoeff[minPos] -= finalChange ;
2295	}
2296	}
2297	}
2298
2299	if(lastCG==1)
2300	{
2301	lastCG=0 ;
2302	}
2303	}
2304	}
2305	}
2306
2307	/** Pattern decision for context derivation process of significant_coeff_flag
2308	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2309	* \param posXCG column of current coefficient group
2310	* \param posYCG row of current coefficient group
2311	* \param width width of the block
2312	* \param height height of the block
2313	* \returns pattern for current coefficient group
2314	*/
2315	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2316	{
2317	#if REMOVAL_8x2_2x8_CG
2318	if( width == 4 && height == 4 ) return -1;
2319	#else
2320	if( width == height && width <= 8 ) return -1;
2321	#endif
2322
2323	UInt sigRight = 0;
2324	UInt sigLower = 0;
2325
2326	width >>= 2;
2327	height >>= 2;
2328	if( posXCG < width - 1 )
2329	{
2330	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2331	}
2332	if (posYCG < height - 1 )
2333	{
2334	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2335	}
2336	return sigRight + (sigLower<<1);
2337	}
2338
2339	/** Context derivation process of coeff_abs_significant_flag
2340	* \param patternSigCtx pattern for current coefficient group
2341	* \param posX column of current scan position
2342	* \param posY row of current scan position
2343	* \param blockType log2 value of block size if square block, or 4 otherwise
2344	* \param width width of the block
2345	* \param height height of the block
2346	* \param textureType texture type (TEXT_LUMA...)
2347	* \returns ctxInc for current scan position
2348	*/
2349	Int TComTrQuant::getSigCtxInc (
2350	Int patternSigCtx,
2351	#if REMOVAL_8x2_2x8_CG
2352	UInt scanIdx,
2353	#endif
2354	Int posX,
2355	Int posY,
2356	Int blockType,
2357	Int width
2358	,Int height
2359	,TextType textureType
2360	)
2361	{
2362	const Int ctxIndMap[16] =
2363	{
2364	0, 1, 4, 5,
2365	2, 3, 4, 5,
2366	6, 6, 8, 8,
2367	7, 7, 8, 8
2368	};
2369
2370	if( posX + posY == 0 )
2371	{
2372	return 0;
2373	}
2374
2375	if ( blockType == 2 )
2376	{
2377	return ctxIndMap[ 4 * posY + posX ];
2378	}
2379
2380	#if !REMOVAL_8x2_2x8_CG
2381	if ( blockType == 3 )
2382	{
2383	return 9 + ctxIndMap[ 4 * (posY >> 1) + (posX >> 1) ];
2384	}
2385
2386	Int offset = 18;
2387	#else
2388	Int offset = blockType == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2389	#endif
2390
2391	Int posXinSubset = posX-((posX>>2)<<2);
2392	Int posYinSubset = posY-((posY>>2)<<2);
2393	Int cnt = 0;
2394	if(patternSigCtx==0)
2395	{
2396	#if REMOVAL_8x2_2x8_CG
2397	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2398	#else
2399	cnt = posXinSubset+posYinSubset<=2 ? 1 : 0;
2400	#endif
2401	}
2402	else if(patternSigCtx==1)
2403	{
2404	#if REMOVAL_8x2_2x8_CG
2405	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2406	#else
2407	cnt = posYinSubset<=1 ? 1 : 0;
2408	#endif
2409	}
2410	else if(patternSigCtx==2)
2411	{
2412	#if REMOVAL_8x2_2x8_CG
2413	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2414	#else
2415	cnt = posXinSubset<=1 ? 1 : 0;
2416	#endif
2417	}
2418	else
2419	{
2420	#if REMOVAL_8x2_2x8_CG
2421	cnt = 2;
2422	#else
2423	cnt = posXinSubset+posYinSubset<=4 ? 2 : 1;
2424	#endif
2425	}
2426
2427	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2428	}
2429
2430	/** Get the best level in RD sense
2431	* \param rd64CodedCost reference to coded cost
2432	* \param rd64CodedCost0 reference to cost when coefficient is 0
2433	* \param rd64CodedCostSig reference to cost of significant coefficient
2434	* \param lLevelDouble reference to unscaled quantized level
2435	* \param uiMaxAbsLevel scaled quantized level
2436	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2437	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2438	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2439	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2440	* \param iQBits quantization step size
2441	* \param dTemp correction factor
2442	* \param bLast indicates if the coefficient is the last significant
2443	* \returns best quantized transform level for given scan position
2444	* This method calculates the best quantized transform level for a given scan position.
2445	*/
2446	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2447	Double& rd64CodedCost0,
2448	Double& rd64CodedCostSig,
2449	Int lLevelDouble,
2450	UInt uiMaxAbsLevel,
2451	UShort ui16CtxNumSig,
2452	UShort ui16CtxNumOne,
2453	UShort ui16CtxNumAbs,
2454	UShort ui16AbsGoRice,
2455	UInt c1Idx,
2456	UInt c2Idx,
2457	Int iQBits,
2458	Double dTemp,
2459	Bool bLast ) const
2460	{
2461	Double dCurrCostSig = 0;
2462	UInt uiBestAbsLevel = 0;
2463
2464	if( !bLast && uiMaxAbsLevel < 3 )
2465	{
2466	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2467	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2468	if( uiMaxAbsLevel == 0 )
2469	{
2470	return uiBestAbsLevel;
2471	}
2472	}
2473	else
2474	{
2475	rd64CodedCost = MAX_DOUBLE;
2476	}
2477
2478	if( !bLast )
2479	{
2480	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2481	}
2482
2483	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2484	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2485	{
2486	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2487	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2488	dCurrCost += dCurrCostSig;
2489
2490	if( dCurrCost < rd64CodedCost )
2491	{
2492	uiBestAbsLevel = uiAbsLevel;
2493	rd64CodedCost = dCurrCost;
2494	rd64CodedCostSig = dCurrCostSig;
2495	}
2496	}
2497
2498	return uiBestAbsLevel;
2499	}
2500
2501	/** Calculates the cost for specific absolute transform level
2502	* \param uiAbsLevel scaled quantized level
2503	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2504	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2505	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2506	* \returns cost of given absolute transform level
2507	*/
2508	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2509	UShort ui16CtxNumOne,
2510	UShort ui16CtxNumAbs,
2511	UShort ui16AbsGoRice
2512	, UInt c1Idx,
2513	UInt c2Idx
2514	) const
2515	{
2516	Double iRate = xGetIEPRate();
2517	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2518
2519	if ( uiAbsLevel >= baseLevel )
2520	{
2521	UInt symbol = uiAbsLevel - baseLevel;
2522	UInt length;
2523	#if COEF_REMAIN_BIN_REDUCTION
2524	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2525	#else
2526	if (symbol < (8 << ui16AbsGoRice))
2527	#endif
2528	{
2529	length = symbol>>ui16AbsGoRice;
2530	iRate += (length+1+ui16AbsGoRice)<< 15;
2531	}
2532	else
2533	{
2534	length = ui16AbsGoRice;
2535	#if COEF_REMAIN_BIN_REDUCTION
2536	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2537	#else
2538	symbol = symbol - ( 8 << ui16AbsGoRice);
2539	#endif
2540	while (symbol >= (1<<length))
2541	{
2542	symbol -= (1<<(length++));
2543	}
2544	#if COEF_REMAIN_BIN_REDUCTION
2545	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2546	#else
2547	iRate += (8+length+1-ui16AbsGoRice+length)<< 15;
2548	#endif
2549	}
2550	if (c1Idx < C1FLAG_NUMBER)
2551	{
2552	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2553
2554	if (c2Idx < C2FLAG_NUMBER)
2555	{
2556	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2557	}
2558	}
2559	}
2560	else
2561	if( uiAbsLevel == 1 )
2562	{
2563	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2564	}
2565	else if( uiAbsLevel == 2 )
2566	{
2567	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2568	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2569	}
2570	else
2571	{
2572	assert (0);
2573	}
2574	return xGetICost( iRate );
2575	}
2576
2577	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2578	UShort ui16CtxNumOne,
2579	UShort ui16CtxNumAbs,
2580	UShort ui16AbsGoRice
2581	, UInt c1Idx,
2582	UInt c2Idx
2583	) const
2584	{
2585	Int iRate = 0;
2586	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2587
2588	if ( uiAbsLevel >= baseLevel )
2589	{
2590	UInt uiSymbol = uiAbsLevel - baseLevel;
2591	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2592	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2593
2594	if( bExpGolomb )
2595	{
2596	uiAbsLevel = uiSymbol - uiMaxVlc;
2597	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2598	iRate += iEGS << 15;
2599	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2600	}
2601
2602	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2603	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2604
2605	iRate += ui16NumBins << 15;
2606
2607	if (c1Idx < C1FLAG_NUMBER)
2608	{
2609	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2610
2611	if (c2Idx < C2FLAG_NUMBER)
2612	{
2613	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2614	}
2615	}
2616	}
2617	else
2618	if( uiAbsLevel == 0 )
2619	{
2620	return 0;
2621	}
2622	else if( uiAbsLevel == 1 )
2623	{
2624	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2625	}
2626	else if( uiAbsLevel == 2 )
2627	{
2628	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2629	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2630	}
2631	else
2632	{
2633	assert(0);
2634	}
2635	return iRate;
2636	}
2637
2638	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2639	UShort ui16CtxNumSig ) const
2640	{
2641	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2642	}
2643
2644	/** Calculates the cost of signaling the last significant coefficient in the block
2645	* \param uiPosX X coordinate of the last significant coefficient
2646	* \param uiPosY Y coordinate of the last significant coefficient
2647	* \returns cost of last significant coefficient
2648	*/
2649	/*
2650	* \param uiWidth width of the transform unit (TU)
2651	*/
2652	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2653	const UInt uiPosY,
2654	const UInt uiBlkWdth ) const
2655	{
2656	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2657	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2658	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2659	if( uiCtxX > 3 )
2660	{
2661	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2662	}
2663	if( uiCtxY > 3 )
2664	{
2665	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2666	}
2667	return xGetICost( uiCost );
2668	}
2669
2670	/** Calculates the cost for specific absolute transform level
2671	* \param uiAbsLevel scaled quantized level
2672	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2673	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2674	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2675	* \returns cost of given absolute transform level
2676	*/
2677	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2678	UShort ui16CtxNumSig ) const
2679	{
2680	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2681	}
2682
2683	/** Get the cost for a specific rate
2684	* \param dRate rate of a bit
2685	* \returns cost at the specific rate
2686	*/
2687	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2688	{
2689	return m_dLambda * dRate;
2690	}
2691
2692	/** Get the cost of an equal probable bit
2693	* \returns cost of equal probable bit
2694	*/
2695	__inline Double TComTrQuant::xGetIEPRate ( ) const
2696	{
2697	return 32768;
2698	}
2699
2700	/** Context derivation process of coeff_abs_significant_flag
2701	* \param uiSigCoeffGroupFlag significance map of L1
2702	* \param uiBlkX column of current scan position
2703	* \param uiBlkY row of current scan position
2704	* \param uiLog2BlkSize log2 value of block size
2705	* \returns ctxInc for current scan position
2706	*/
2707	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2708	const UInt uiCGPosX,
2709	const UInt uiCGPosY,
2710	const UInt scanIdx,
2711	Int width, Int height)
2712	{
2713	UInt uiRight = 0;
2714	UInt uiLower = 0;
2715
2716	width >>= 2;
2717	height >>= 2;
2718	#if !REMOVAL_8x2_2x8_CG
2719	if( width == 2 && height == 2 ) // 8x8
2720	{
2721	if( scanIdx == SCAN_HOR )
2722	{
2723	width = 1;
2724	height = 4;
2725	}
2726	else if( scanIdx == SCAN_VER )
2727	{
2728	width = 4;
2729	height = 1;
2730	}
2731	}
2732	#endif
2733	if( uiCGPosX < width - 1 )
2734	{
2735	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2736	}
2737	if (uiCGPosY < height - 1 )
2738	{
2739	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2740	}
2741	return (uiRight \|\| uiLower);
2742
2743	}
2744	/** set quantized matrix coefficient for encode
2745	* \param scalingList quantaized matrix address
2746	*/
2747	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2748	{
2749	UInt size,list;
2750	UInt qp;
2751
2752	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2753	{
2754	for(list = 0; list < g_scalingListNum[size]; list++)
2755	{
2756	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2757	{
2758	xSetScalingListEnc(scalingList,list,size,qp);
2759	xSetScalingListDec(scalingList,list,size,qp);
2760	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2761	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2762	{
2763	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2764	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2765	}
2766	}
2767	}
2768	}
2769	}
2770	/** set quantized matrix coefficient for decode
2771	* \param scalingList quantaized matrix address
2772	*/
2773	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2774	{
2775	UInt size,list;
2776	UInt qp;
2777
2778	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2779	{
2780	for(list = 0; list < g_scalingListNum[size]; list++)
2781	{
2782	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2783	{
2784	xSetScalingListDec(scalingList,list,size,qp);
2785	}
2786	}
2787	}
2788	}
2789	/** set error scale coefficients
2790	* \param list List ID
2791	* \param uiSize Size
2792	* \param uiQP Quantization parameter
2793	*/
2794	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
2795	{
2796
2797	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2798	#if FULL_NBIT
2799	UInt uiBitDepth = g_uiBitDepth;
2800	#else
2801	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
2802	#endif
2803
2804	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
2805
2806	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2807	Int *piQuantcoeff;
2808	double *pdErrScale;
2809	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
2810	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
2811
2812	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2813	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2814	for(i=0;i<uiMaxNumCoeff;i++)
2815	{
2816	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
2817	}
2818	}
2819
2820	/** set quantized matrix coefficient for encode
2821	* \param scalingList quantaized matrix address
2822	* \param listId List index
2823	* \param sizeId size index
2824	* \param uiQP Quantization parameter
2825	*/
2826	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2827	{
2828	UInt width = g_scalingListSizeX[sizeId];
2829	UInt height = g_scalingListSizeX[sizeId];
2830	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2831	Int *quantcoeff;
2832	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2833	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
2834
2835	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2836
2837	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
2838	{
2839	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2840	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2841
2842	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2843	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2844	}
2845	}
2846	/** set quantized matrix coefficient for decode
2847	* \param scalingList quantaized matrix address
2848	* \param list List index
2849	* \param size size index
2850	* \param uiQP Quantization parameter
2851	*/
2852	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2853	{
2854	UInt width = g_scalingListSizeX[sizeId];
2855	UInt height = g_scalingListSizeX[sizeId];
2856	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2857	Int *dequantcoeff;
2858	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2859
2860	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
2861	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2862
2863	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
2864	{
2865	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2866	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2867
2868	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2869
2870	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2871	}
2872	}
2873
2874	/** set flat matrix value to quantized coefficient
2875	*/
2876	Void TComTrQuant::setFlatScalingList()
2877	{
2878	UInt size,list;
2879	UInt qp;
2880
2881	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2882	{
2883	for(list = 0; list < g_scalingListNum[size]; list++)
2884	{
2885	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2886	{
2887	xsetFlatScalingList(list,size,qp);
2888	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2889	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2890	{
2891	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2892	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2893	}
2894	}
2895	}
2896	}
2897	}
2898
2899	/** set flat matrix value to quantized coefficient
2900	* \param list List ID
2901	* \param uiQP Quantization parameter
2902	* \param uiSize Size
2903	*/
2904	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2905	{
2906	UInt i,num = g_scalingListSize[size];
2907	UInt numDiv4 = num>>2;
2908	Int *quantcoeff;
2909	Int *dequantcoeff;
2910	Int quantScales = g_quantScales[qp];
2911	Int invQuantScales = g_invQuantScales[qp]<<4;
2912
2913	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
2914	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
2915
2916	for(i=0;i<num;i++)
2917	{
2918	*quantcoeff++ = quantScales;
2919	*dequantcoeff++ = invQuantScales;
2920	}
2921
2922	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2923	{
2924	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2925	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2926
2927	for(i=0;i<numDiv4;i++)
2928	{
2929	*quantcoeff++ = quantScales;
2930	*dequantcoeff++ = invQuantScales;
2931	}
2932	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2933	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2934
2935	for(i=0;i<numDiv4;i++)
2936	{
2937	*quantcoeff++ = quantScales;
2938	*dequantcoeff++ = invQuantScales;
2939	}
2940	}
2941	}
2942
2943	/** set quantized matrix coefficient for encode
2944	* \param coeff quantaized matrix address
2945	* \param quantcoeff quantaized matrix address
2946	* \param quantScales Q(QP%6)
2947	* \param height height
2948	* \param width width
2949	* \param ratio ratio for upscale
2950	* \param sizuNum matrix size
2951	* \param dc dc parameter
2952	*/
2953	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2954	{
2955	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2956	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2957	for(UInt j=0;j<height;j++)
2958	{
2959	for(UInt i=0;i<width;i++)
2960	{
2961	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2962	}
2963	}
2964	if(ratio > 1)
2965	{
2966	quantcoeff[0] = quantScales / dc;
2967	}
2968	}
2969	/** set quantized matrix coefficient for decode
2970	* \param coeff quantaized matrix address
2971	* \param dequantcoeff quantaized matrix address
2972	* \param invQuantScales IQ(QP%6))
2973	* \param height height
2974	* \param width width
2975	* \param ratio ratio for upscale
2976	* \param sizuNum matrix size
2977	* \param dc dc parameter
2978	*/
2979	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2980	{
2981	#if !REMOVE_NSQT
2982	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2983	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2984	#endif
2985	for(UInt j=0;j<height;j++)
2986	{
2987	for(UInt i=0;i<width;i++)
2988	{
2989	#if REMOVE_NSQT
2990	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2991	#else
2992	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
2993	#endif
2994	}
2995	}
2996	if(ratio > 1)
2997	{
2998	dequantcoeff[0] = invQuantScales * dc;
2999	}
3000	}
3001
3002	/** initialization process of scaling list array
3003	*/
3004	Void TComTrQuant::initScalingList()
3005	{
3006	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3007	{
3008	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3009	{
3010	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3011	{
3012	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3013	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3014	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
3015
3016	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3017	{
3018	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3019	{
3020	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3021	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3022	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
3023	}
3024	}
3025	}
3026	}
3027	}
3028	//copy for NSQT
3029	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3030	{
3031	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3032	{
3033	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
3034	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
3035	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
3036	}
3037	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3038	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3039	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3040	}
3041	}
3042	/** destroy quantization matrix array
3043	*/
3044	Void TComTrQuant::destroyScalingList()
3045	{
3046	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3047	{
3048	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3049	{
3050	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3051	{
3052	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3053	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3054	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
3055	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3056	{
3057	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3058	{
3059	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
3060	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
3061	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
3062	}
3063	}
3064	}
3065	}
3066	}
3067	}
3068
3069	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: