Context navigation

source: SHVCSoftware/branches/SHM-1.1-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 47

Visit:

Last change on this file since 47 was 30, checked in by seregin, 12 years ago
Adding braces and indents
File size: 101.8 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_bUseRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	#if CHROMA_QP_EXTENSION
203	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
204	#else
205	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
206	#endif
207
208	if(qpScaled < 0)
209	{
210	qpScaled = qpScaled + qpBdOffset;
211	}
212	else
213	{
214	#if CHROMA_QP_EXTENSION
215	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
216	#else
217	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
218	#endif
219	}
220	}
221	m_cQP.setQpParam( qpScaled );
222	}
223
224	#if MATRIX_MULT
225	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
226	* \param block pointer to input data (residual)
227	* \param coeff pointer to output data (transform coefficients)
228	* \param uiStride stride of input data
229	* \param uiTrSize transform size (uiTrSize x uiTrSize)
230	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
231	*/
232	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
233	{
234	Int i,j,k,iSum;
235	Int tmp[32*32];
236	const short *iT;
237	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
238
239	if (uiTrSize==4)
240	{
241	iT = g_aiT4[0];
242	}
243	else if (uiTrSize==8)
244	{
245	iT = g_aiT8[0];
246	}
247	else if (uiTrSize==16)
248	{
249	iT = g_aiT16[0];
250	}
251	else if (uiTrSize==32)
252	{
253	iT = g_aiT32[0];
254	}
255	else
256	{
257	assert(0);
258	}
259
260	#if FULL_NBIT
261	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
262	#else
263	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
264	#endif
265
266	int add_1st = 1<<(shift_1st-1);
267	int shift_2nd = uiLog2TrSize + 6;
268	int add_2nd = 1<<(shift_2nd-1);
269
270	/* Horizontal transform */
271
272	if (uiTrSize==4)
273	{
274	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
275	{
276	iT = g_as_DST_MAT_4[0];
277	}
278	}
279	for (i=0; i<uiTrSize; i++)
280	{
281	for (j=0; j<uiTrSize; j++)
282	{
283	iSum = 0;
284	for (k=0; k<uiTrSize; k++)
285	{
286	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
287	}
288	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
289	}
290	}
291
292	/* Vertical transform */
293	if (uiTrSize==4)
294	{
295	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
296	{
297	iT = g_as_DST_MAT_4[0];
298	}
299	else
300	{
301	iT = g_aiT4[0];
302	}
303	}
304	for (i=0; i<uiTrSize; i++)
305	{
306	for (j=0; j<uiTrSize; j++)
307	{
308	iSum = 0;
309	for (k=0; k<uiTrSize; k++)
310	{
311	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
312	}
313	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
314	}
315	}
316	}
317
318	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
319	* \param coeff pointer to input data (transform coefficients)
320	* \param block pointer to output data (residual)
321	* \param uiStride stride of output data
322	* \param uiTrSize transform size (uiTrSize x uiTrSize)
323	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
324	*/
325	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
326	{
327	int i,j,k,iSum;
328	Int tmp[32*32];
329	const short *iT;
330
331	if (uiTrSize==4)
332	{
333	iT = g_aiT4[0];
334	}
335	else if (uiTrSize==8)
336	{
337	iT = g_aiT8[0];
338	}
339	else if (uiTrSize==16)
340	{
341	iT = g_aiT16[0];
342	}
343	else if (uiTrSize==32)
344	{
345	iT = g_aiT32[0];
346	}
347	else
348	{
349	assert(0);
350	}
351
352	int shift_1st = SHIFT_INV_1ST;
353	int add_1st = 1<<(shift_1st-1);
354	#if FULL_NBIT
355	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
356	#else
357	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
358	#endif
359	int add_2nd = 1<<(shift_2nd-1);
360	if (uiTrSize==4)
361	{
362	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
363	{
364	iT = g_as_DST_MAT_4[0];
365	}
366	}
367
368	/* Horizontal transform */
369	for (i=0; i<uiTrSize; i++)
370	{
371	for (j=0; j<uiTrSize; j++)
372	{
373	iSum = 0;
374	for (k=0; k<uiTrSize; k++)
375	{
376	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
377	}
378	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
379	}
380	}
381
382	if (uiTrSize==4)
383	{
384	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
385	{
386	iT = g_as_DST_MAT_4[0];
387	}
388	else
389	{
390	iT = g_aiT4[0];
391	}
392	}
393
394	/* Vertical transform */
395	for (i=0; i<uiTrSize; i++)
396	{
397	for (j=0; j<uiTrSize; j++)
398	{
399	iSum = 0;
400	for (k=0; k<uiTrSize; k++)
401	{
402	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
403	}
404	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
405	}
406	}
407	}
408
409	#else //MATRIX_MULT
410
411	/** 4x4 forward transform implemented using partial butterfly structure (1D)
412	* \param src input data (residual)
413	* \param dst output data (transform coefficients)
414	* \param shift specifies right shift after 1D transform
415	*/
416
417	void partialButterfly4(short src,short dst,int shift, int line)
418	{
419	int j;
420	int E[2],O[2];
421	int add = 1<<(shift-1);
422
423	for (j=0; j<line; j++)
424	{
425	/* E and O */
426	E[0] = src[0] + src[3];
427	O[0] = src[0] - src[3];
428	E[1] = src[1] + src[2];
429	O[1] = src[1] - src[2];
430
431	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
432	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
433	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
434	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
435
436	src += 4;
437	dst ++;
438	}
439	}
440
441	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
442	// give identical results
443	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
444	{
445	int i, c[4];
446	int rnd_factor = 1<<(shift-1);
447	for (i=0; i<4; i++)
448	{
449	// Intermediate Variables
450	c[0] = block[4i+0] + block[4i+3];
451	c[1] = block[4i+1] + block[4i+3];
452	c[2] = block[4i+0] - block[4i+1];
453	c[3] = 74* block[4*i+2];
454
455	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
456	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
457	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
458	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
459	}
460	}
461
462	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
463	{
464	int i, c[4];
465	int rnd_factor = 1<<(shift-1);
466	for (i=0; i<4; i++)
467	{
468	// Intermediate Variables
469	c[0] = tmp[ i] + tmp[ 8+i];
470	c[1] = tmp[8+i] + tmp[12+i];
471	c[2] = tmp[ i] - tmp[12+i];
472	c[3] = 74* tmp[4+i];
473
474	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
475	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
476	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
477	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
478	}
479	}
480
481	void partialButterflyInverse4(short src,short dst,int shift, int line)
482	{
483	int j;
484	int E[2],O[2];
485	int add = 1<<(shift-1);
486
487	for (j=0; j<line; j++)
488	{
489	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
490	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
491	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
492	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
493	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
494
495	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
496	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
497	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
498	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
499	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
500
501	src ++;
502	dst += 4;
503	}
504	}
505
506
507	void partialButterfly8(short src,short dst,int shift, int line)
508	{
509	int j,k;
510	int E[4],O[4];
511	int EE[2],EO[2];
512	int add = 1<<(shift-1);
513
514	for (j=0; j<line; j++)
515	{
516	/* E and O*/
517	for (k=0;k<4;k++)
518	{
519	E[k] = src[k] + src[7-k];
520	O[k] = src[k] - src[7-k];
521	}
522	/* EE and EO */
523	EE[0] = E[0] + E[3];
524	EO[0] = E[0] - E[3];
525	EE[1] = E[1] + E[2];
526	EO[1] = E[1] - E[2];
527
528	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
529	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
530	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
531	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
532
533	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
534	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
535	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
536	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
537
538	src += 8;
539	dst ++;
540	}
541	}
542
543
544	void partialButterflyInverse8(short src,short dst,int shift, int line)
545	{
546	int j,k;
547	int E[4],O[4];
548	int EE[2],EO[2];
549	int add = 1<<(shift-1);
550
551	for (j=0; j<line; j++)
552	{
553	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
554	for (k=0;k<4;k++)
555	{
556	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
557	}
558
559	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
560	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
561	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
562	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
563
564	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
565	E[0] = EE[0] + EO[0];
566	E[3] = EE[0] - EO[0];
567	E[1] = EE[1] + EO[1];
568	E[2] = EE[1] - EO[1];
569	for (k=0;k<4;k++)
570	{
571	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
572	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
573	}
574	src ++;
575	dst += 8;
576	}
577	}
578
579
580	void partialButterfly16(short src,short dst,int shift, int line)
581	{
582	int j,k;
583	int E[8],O[8];
584	int EE[4],EO[4];
585	int EEE[2],EEO[2];
586	int add = 1<<(shift-1);
587
588	for (j=0; j<line; j++)
589	{
590	/* E and O*/
591	for (k=0;k<8;k++)
592	{
593	E[k] = src[k] + src[15-k];
594	O[k] = src[k] - src[15-k];
595	}
596	/* EE and EO */
597	for (k=0;k<4;k++)
598	{
599	EE[k] = E[k] + E[7-k];
600	EO[k] = E[k] - E[7-k];
601	}
602	/* EEE and EEO */
603	EEE[0] = EE[0] + EE[3];
604	EEO[0] = EE[0] - EE[3];
605	EEE[1] = EE[1] + EE[2];
606	EEO[1] = EE[1] - EE[2];
607
608	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
609	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
610	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
611	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
612
613	for (k=2;k<16;k+=4)
614	{
615	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
616	}
617
618	for (k=1;k<16;k+=2)
619	{
620	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
621	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
622	}
623
624	src += 16;
625	dst ++;
626
627	}
628	}
629
630
631	void partialButterflyInverse16(short src,short dst,int shift, int line)
632	{
633	int j,k;
634	int E[8],O[8];
635	int EE[4],EO[4];
636	int EEE[2],EEO[2];
637	int add = 1<<(shift-1);
638
639	for (j=0; j<line; j++)
640	{
641	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
642	for (k=0;k<8;k++)
643	{
644	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
645	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
646	}
647	for (k=0;k<4;k++)
648	{
649	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
650	}
651	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
652	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
653	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
654	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
655
656	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
657	for (k=0;k<2;k++)
658	{
659	EE[k] = EEE[k] + EEO[k];
660	EE[k+2] = EEE[1-k] - EEO[1-k];
661	}
662	for (k=0;k<4;k++)
663	{
664	E[k] = EE[k] + EO[k];
665	E[k+4] = EE[3-k] - EO[3-k];
666	}
667	for (k=0;k<8;k++)
668	{
669	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
670	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
671	}
672	src ++;
673	dst += 16;
674	}
675	}
676
677
678	void partialButterfly32(short src,short dst,int shift, int line)
679	{
680	int j,k;
681	int E[16],O[16];
682	int EE[8],EO[8];
683	int EEE[4],EEO[4];
684	int EEEE[2],EEEO[2];
685	int add = 1<<(shift-1);
686
687	for (j=0; j<line; j++)
688	{
689	/* E and O*/
690	for (k=0;k<16;k++)
691	{
692	E[k] = src[k] + src[31-k];
693	O[k] = src[k] - src[31-k];
694	}
695	/* EE and EO */
696	for (k=0;k<8;k++)
697	{
698	EE[k] = E[k] + E[15-k];
699	EO[k] = E[k] - E[15-k];
700	}
701	/* EEE and EEO */
702	for (k=0;k<4;k++)
703	{
704	EEE[k] = EE[k] + EE[7-k];
705	EEO[k] = EE[k] - EE[7-k];
706	}
707	/* EEEE and EEEO */
708	EEEE[0] = EEE[0] + EEE[3];
709	EEEO[0] = EEE[0] - EEE[3];
710	EEEE[1] = EEE[1] + EEE[2];
711	EEEO[1] = EEE[1] - EEE[2];
712
713	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
714	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
715	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
716	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
717	for (k=4;k<32;k+=8)
718	{
719	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
720	}
721	for (k=2;k<32;k+=4)
722	{
723	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
724	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
725	}
726	for (k=1;k<32;k+=2)
727	{
728	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
729	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
730	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
731	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
732	}
733	src += 32;
734	dst ++;
735	}
736	}
737
738
739	void partialButterflyInverse32(short src,short dst,int shift, int line)
740	{
741	int j,k;
742	int E[16],O[16];
743	int EE[8],EO[8];
744	int EEE[4],EEO[4];
745	int EEEE[2],EEEO[2];
746	int add = 1<<(shift-1);
747
748	for (j=0; j<line; j++)
749	{
750	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
751	for (k=0;k<16;k++)
752	{
753	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
754	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
755	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
756	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
757	}
758	for (k=0;k<8;k++)
759	{
760	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
761	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
762	}
763	for (k=0;k<4;k++)
764	{
765	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
766	}
767	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
768	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
769	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
770	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
771
772	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
773	EEE[0] = EEEE[0] + EEEO[0];
774	EEE[3] = EEEE[0] - EEEO[0];
775	EEE[1] = EEEE[1] + EEEO[1];
776	EEE[2] = EEEE[1] - EEEO[1];
777	for (k=0;k<4;k++)
778	{
779	EE[k] = EEE[k] + EEO[k];
780	EE[k+4] = EEE[3-k] - EEO[3-k];
781	}
782	for (k=0;k<8;k++)
783	{
784	E[k] = EE[k] + EO[k];
785	E[k+8] = EE[7-k] - EO[7-k];
786	}
787	for (k=0;k<16;k++)
788	{
789	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
790	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
791	}
792	src ++;
793	dst += 32;
794	}
795	}
796
797	/** MxN forward transform (2D)
798	* \param block input data (residual)
799	* \param coeff output data (transform coefficients)
800	* \param iWidth input data (width of transform)
801	* \param iHeight input data (height of transform)
802	*/
803	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
804	{
805	#if FULL_NBIT
806	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
807	#else
808	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
809	#endif
810	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
811
812	short tmp[ 64 * 64 ];
813
814	#if !REMOVE_NSQT
815	if( iWidth == 16 && iHeight == 4)
816	{
817	partialButterfly16( block, tmp, shift_1st, iHeight );
818	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
819	}
820	else if( iWidth == 32 && iHeight == 8 )
821	{
822	partialButterfly32( block, tmp, shift_1st, iHeight );
823	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
824	}
825	else if( iWidth == 4 && iHeight == 16)
826	{
827	partialButterfly4( block, tmp, shift_1st, iHeight );
828	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
829	}
830	else if( iWidth == 8 && iHeight == 32 )
831	{
832	partialButterfly8( block, tmp, shift_1st, iHeight );
833	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
834	}
835	else
836	#endif
837	if( iWidth == 4 && iHeight == 4)
838	{
839	#if INTRA_TRANS_SIMP
840	if (uiMode != REG_DCT)
841	{
842	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
843	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
844	}
845	else
846	{
847	partialButterfly4(block, tmp, shift_1st, iHeight);
848	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
849	}
850
851	#else
852	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
853	{
854	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
855	}
856	else
857	{
858	partialButterfly4(block, tmp, shift_1st, iHeight);
859	}
860	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
861	{
862	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
863	}
864	else
865	{
866	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
867	}
868	#endif
869	}
870	else if( iWidth == 8 && iHeight == 8)
871	{
872	partialButterfly8( block, tmp, shift_1st, iHeight );
873	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
874	}
875	else if( iWidth == 16 && iHeight == 16)
876	{
877	partialButterfly16( block, tmp, shift_1st, iHeight );
878	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
879	}
880	else if( iWidth == 32 && iHeight == 32)
881	{
882	partialButterfly32( block, tmp, shift_1st, iHeight );
883	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
884	}
885	}
886	/** MxN inverse transform (2D)
887	* \param coeff input data (transform coefficients)
888	* \param block output data (residual)
889	* \param iWidth input data (width of transform)
890	* \param iHeight input data (height of transform)
891	*/
892	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
893	{
894	int shift_1st = SHIFT_INV_1ST;
895	#if FULL_NBIT
896	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
897	#else
898	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
899	#endif
900
901	short tmp[ 64*64];
902	#if !REMOVE_NSQT
903	if( iWidth == 16 && iHeight == 4)
904	{
905	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
906	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
907	}
908	else if( iWidth == 32 && iHeight == 8)
909	{
910	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
911	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
912	}
913	else if( iWidth == 4 && iHeight == 16)
914	{
915	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
916	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
917	}
918	else if( iWidth == 8 && iHeight == 32)
919	{
920	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
921	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
922	}
923	else
924	#endif
925	if( iWidth == 4 && iHeight == 4)
926	{
927	#if INTRA_TRANS_SIMP
928	if (uiMode != REG_DCT)
929	{
930	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
931	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
932	}
933	else
934	{
935	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
936	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
937	}
938	#else
939	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
940	{
941	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
942	}
943	else
944	{
945	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
946	}
947	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
948	{
949	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
950	}
951	else
952	{
953	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
954	}
955	#endif
956	}
957	else if( iWidth == 8 && iHeight == 8)
958	{
959	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
960	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
961	}
962	else if( iWidth == 16 && iHeight == 16)
963	{
964	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
965	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
966	}
967	else if( iWidth == 32 && iHeight == 32)
968	{
969	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
970	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
971	}
972	}
973
974	#endif //MATRIX_MULT
975
976	// To minimize the distortion only. No rate is considered.
977	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
978	{
979	Int lastCG = -1;
980	Int absSum = 0 ;
981	Int n ;
982
983	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
984	{
985	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
986	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
987	absSum = 0 ;
988
989	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
990	{
991	if( pQCoef[ scan[ n + subPos ]] )
992	{
993	lastNZPosInCG = n;
994	break;
995	}
996	}
997
998	for(n = 0; n <SCAN_SET_SIZE; n++ )
999	{
1000	if( pQCoef[ scan[ n + subPos ]] )
1001	{
1002	firstNZPosInCG = n;
1003	break;
1004	}
1005	}
1006
1007	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1008	{
1009	absSum += pQCoef[ scan[ n + subPos ]];
1010	}
1011
1012	if(lastNZPosInCG>=0 && lastCG==-1)
1013	{
1014	lastCG = 1 ;
1015	}
1016
1017	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1018	{
1019	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
1020	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1021	{
1022	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
1023
1024	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1025	{
1026	UInt blkPos = scan[ n+subPos ];
1027	if(pQCoef[ blkPos ] != 0 )
1028	{
1029	if(deltaU[blkPos]>0)
1030	{
1031	curCost = - deltaU[blkPos];
1032	curChange=1 ;
1033	}
1034	else
1035	{
1036	//curChange =-1;
1037	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1038	{
1039	curCost=MAX_INT ;
1040	}
1041	else
1042	{
1043	curCost = deltaU[blkPos];
1044	curChange =-1;
1045	}
1046	}
1047	}
1048	else
1049	{
1050	if(n<firstNZPosInCG)
1051	{
1052	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1053	if(thisSignBit != signbit )
1054	{
1055	curCost = MAX_INT;
1056	}
1057	else
1058	{
1059	curCost = - (deltaU[blkPos]) ;
1060	curChange = 1 ;
1061	}
1062	}
1063	else
1064	{
1065	curCost = - (deltaU[blkPos]) ;
1066	curChange = 1 ;
1067	}
1068	}
1069
1070	if( curCost<minCostInc)
1071	{
1072	minCostInc = curCost ;
1073	finalChange = curChange ;
1074	minPos = blkPos ;
1075	}
1076	} //CG loop
1077
1078	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
1079	{
1080	finalChange = -1;
1081	}
1082
1083	if(pCoef[minPos]>=0)
1084	{
1085	pQCoef[minPos] += finalChange ;
1086	}
1087	else
1088	{
1089	pQCoef[minPos] -= finalChange ;
1090	}
1091	} // Hide
1092	}
1093	if(lastCG==1)
1094	{
1095	lastCG=0 ;
1096	}
1097	} // TU loop
1098
1099	return;
1100	}
1101
1102	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1103	Int* pSrc,
1104	TCoeff* pDes,
1105	#if ADAPTIVE_QP_SELECTION
1106	Int*& pArlDes,
1107	#endif
1108	Int iWidth,
1109	Int iHeight,
1110	UInt& uiAcSum,
1111	TextType eTType,
1112	UInt uiAbsPartIdx )
1113	{
1114	Int* piCoef = pSrc;
1115	TCoeff* piQCoef = pDes;
1116	#if ADAPTIVE_QP_SELECTION
1117	Int* piArlCCoef = pArlDes;
1118	#endif
1119	Int iAdd = 0;
1120
1121	Bool useRDOQForTransformSkip = !(m_useTransformSkipFast && pcCU->getTransformSkip(uiAbsPartIdx,eTType));
1122	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) && useRDOQForTransformSkip)
1123	{
1124	#if ADAPTIVE_QP_SELECTION
1125	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1126	#else
1127	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1128	#endif
1129	}
1130	else
1131	{
1132	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1133
1134	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1135	if (scanIdx == SCAN_ZIGZAG)
1136	{
1137	scanIdx = SCAN_DIAG;
1138	}
1139
1140	#if REMOVE_NSQT
1141	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1142	#else
1143	if (iWidth != iHeight)
1144	{
1145	scanIdx = SCAN_DIAG;
1146	}
1147
1148	const UInt * scan;
1149	if (iWidth == iHeight)
1150	{
1151	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1152	}
1153	else
1154	{
1155	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
1156	}
1157	#endif
1158
1159	Int deltaU[32*32] ;
1160
1161	#if ADAPTIVE_QP_SELECTION
1162	QpParam cQpBase;
1163	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1164
1165	Int qpScaled;
1166	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1167
1168	if(eTType == TEXT_LUMA)
1169	{
1170	qpScaled = iQpBase + qpBDOffset;
1171	}
1172	else
1173	{
1174	#if CHROMA_QP_EXTENSION
1175	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1176	#else
1177	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
1178	#endif
1179
1180	if(qpScaled < 0)
1181	{
1182	qpScaled = qpScaled + qpBDOffset;
1183	}
1184	else
1185	{
1186	#if CHROMA_QP_EXTENSION
1187	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1188	#else
1189	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
1190	#endif
1191	}
1192	}
1193	cQpBase.setQpParam(qpScaled);
1194	#endif
1195
1196	#if !REMOVE_NSQT
1197	Bool bNonSqureFlag = ( iWidth != iHeight );
1198	#endif
1199	UInt dir = SCALING_LIST_SQT;
1200	#if !REMOVE_NSQT
1201	if( bNonSqureFlag )
1202	{
1203	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1204	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1205	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1206	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1207	iHeight = iWidth;
1208	}
1209	#endif
1210
1211	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1212	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1213	assert(scalingListType < 6);
1214	Int *piQuantCoeff = 0;
1215	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
1216
1217	#if FULL_NBIT
1218	UInt uiBitDepth = g_uiBitDepth;
1219	#else
1220	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1221	#endif
1222	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1223
1224	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1225
1226	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1227
1228	#if ADAPTIVE_QP_SELECTION
1229	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1230	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1231	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1232	Int iAddC = 1 << (iQBitsC-1);
1233	#endif
1234
1235	Int qBits8 = iQBits-8;
1236	for( Int n = 0; n < iWidth*iHeight; n++ )
1237	{
1238	Int iLevel;
1239	Int iSign;
1240	UInt uiBlockPos = n;
1241	iLevel = piCoef[uiBlockPos];
1242	iSign = (iLevel < 0 ? -1: 1);
1243
1244	#if ADAPTIVE_QP_SELECTION
1245	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1246	if( m_bUseAdaptQpSelect )
1247	{
1248	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1249	}
1250	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1251	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1252	#else
1253	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1254	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1255	#endif
1256	uiAcSum += iLevel;
1257	iLevel *= iSign;
1258	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1259	} // for n
1260	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1261	{
1262	if(uiAcSum>=2)
1263	{
1264	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1265	}
1266	}
1267	} //if RDOQ
1268	//return;
1269
1270	}
1271
1272	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1273	{
1274
1275	const TCoeff* piQCoef = pSrc;
1276	Int* piCoef = pDes;
1277	UInt dir = SCALING_LIST_SQT;
1278	#if !REMOVE_NSQT
1279	if( iWidth != iHeight )
1280	{
1281	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1282	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1283	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1284	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1285	iHeight = iWidth;
1286	}
1287	#endif
1288
1289	if ( iWidth > (Int)m_uiMaxTrSize )
1290	{
1291	iWidth = m_uiMaxTrSize;
1292	iHeight = m_uiMaxTrSize;
1293	}
1294
1295	Int iShift,iAdd,iCoeffQ;
1296	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1297
1298	#if FULL_NBIT
1299	UInt uiBitDepth = g_uiBitDepth;
1300	#else
1301	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1302	#endif
1303	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1304
1305	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1306
1307	TCoeff clipQCoef;
1308	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
1309	const Int levelLimit = 1 << bitRange;
1310
1311	if(getUseScalingList())
1312	{
1313	iShift += 4;
1314	if(iShift > m_cQP.m_iPer)
1315	{
1316	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1317	}
1318	else
1319	{
1320	iAdd = 0;
1321	}
1322	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1323
1324	if(iShift > m_cQP.m_iPer)
1325	{
1326	for( Int n = 0; n < iWidth*iHeight; n++ )
1327	{
1328	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1329	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1330	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1331	}
1332	}
1333	else
1334	{
1335	for( Int n = 0; n < iWidth*iHeight; n++ )
1336	{
1337	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1338	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1339	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1340	}
1341	}
1342	}
1343	else
1344	{
1345	iAdd = 1 << (iShift-1);
1346	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1347
1348	for( Int n = 0; n < iWidth*iHeight; n++ )
1349	{
1350	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1351	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1352	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1353	}
1354	}
1355	}
1356
1357	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1358	Bool bUseRDOQ, Bool bEnc, Bool useTransformSkipFast
1359	#if ADAPTIVE_QP_SELECTION
1360	, Bool bUseAdaptQpSelect
1361	#endif
1362	)
1363	{
1364	m_uiMaxTrSize = uiMaxTrSize;
1365	m_bEnc = bEnc;
1366	m_bUseRDOQ = bUseRDOQ;
1367	#if ADAPTIVE_QP_SELECTION
1368	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1369	#endif
1370	m_useTransformSkipFast = useTransformSkipFast;
1371	}
1372
1373	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1374	Pel* pcResidual,
1375	UInt uiStride,
1376	TCoeff* rpcCoeff,
1377	#if ADAPTIVE_QP_SELECTION
1378	Int*& rpcArlCoeff,
1379	#endif
1380	UInt uiWidth,
1381	UInt uiHeight,
1382	UInt& uiAbsSum,
1383	TextType eTType,
1384	UInt uiAbsPartIdx,
1385	Bool useTransformSkip
1386	)
1387	{
1388	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1389	{
1390	uiAbsSum=0;
1391	for (UInt k = 0; k<uiHeight; k++)
1392	{
1393	for (UInt j = 0; j<uiWidth; j++)
1394	{
1395	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1396	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1397	}
1398	}
1399	return;
1400	}
1401	UInt uiMode; //luma intra pred
1402	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1403	{
1404	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1405	}
1406	#if INTRA_BL_DST4x4
1407	else if(eTType == TEXT_LUMA && pcCU->isIntraBL(uiAbsPartIdx) )
1408	{
1409	uiMode = DC_IDX; //Using DST
1410	}
1411	#endif
1412	else
1413	{
1414	uiMode = REG_DCT;
1415	}
1416
1417	uiAbsSum = 0;
1418	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1419	if(useTransformSkip)
1420	{
1421	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1422	}
1423	else
1424	{
1425	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1426	}
1427	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1428	#if ADAPTIVE_QP_SELECTION
1429	rpcArlCoeff,
1430	#endif
1431	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1432	}
1433
1434	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1435	{
1436	if(transQuantBypass)
1437	{
1438	for (UInt k = 0; k<uiHeight; k++)
1439	{
1440	for (UInt j = 0; j<uiWidth; j++)
1441	{
1442	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1443	}
1444	}
1445	return;
1446	}
1447	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1448	if(useTransformSkip == true)
1449	{
1450	xITransformSkip( m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1451	}
1452	else
1453	{
1454	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1455	}
1456	}
1457
1458	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1459	{
1460	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1461	{
1462	return;
1463	}
1464
1465	UInt uiLumaTrMode, uiChromaTrMode;
1466	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
1467	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
1468
1469	if( uiTrMode == uiStopTrMode )
1470	{
1471	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1472	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1473	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1474	{
1475	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1476	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1477	{
1478	return;
1479	}
1480	uiWidth <<= 1;
1481	uiHeight <<= 1;
1482	}
1483	Pel* pResi = rpcResidual + uiAddr;
1484	#if !REMOVE_NSQT
1485	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
1486	{
1487	Int trWidth = uiWidth;
1488	Int trHeight = uiHeight;
1489	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1490
1491	uiWidth = trWidth;
1492	uiHeight = trHeight;
1493	}
1494	#endif
1495	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1496	assert(scalingListType < 6);
1497	#if INTER_TRANSFORMSKIP
1498	#if NO_RESIDUAL_FLAG_FOR_BLPRED
1499	if(pcCU->isIntraBL(uiAbsPartIdx) && eTxt == TEXT_LUMA)
1500	{
1501	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, DC_IDX, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1502	}
1503	else
1504	{
1505	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1506	}
1507	#else
1508	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1509	#endif
1510	#else
1511	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
1512	#endif
1513	}
1514	else
1515	{
1516	uiTrMode++;
1517	uiWidth >>= 1;
1518	uiHeight >>= 1;
1519	Int trWidth = uiWidth, trHeight = uiHeight;
1520	#if !REMOVE_NSQT
1521	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
1522	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1523	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
1524	#endif
1525	UInt uiAddrOffset = trHeight * uiStride;
1526	UInt uiCoefOffset = trWidth * trHeight;
1527	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1528	#if !REMOVE_NSQT
1529	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
1530	if( uiInterTUSplitDirection != 2 )
1531	{
1532	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1533	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1534	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1535	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1536	}
1537	else
1538	#endif
1539	{
1540	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1541	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1542	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1543	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1544	}
1545	}
1546	}
1547
1548	// ------------------------------------------------------------------------------------------------
1549	// Logical transform
1550	// ------------------------------------------------------------------------------------------------
1551
1552	/** Wrapper function between HM interface and core NxN forward transform (2D)
1553	* \param piBlkResi input data (residual)
1554	* \param psCoeff output data (transform coefficients)
1555	* \param uiStride stride of input residual data
1556	* \param iSize transform size (iSize x iSize)
1557	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1558	*/
1559	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1560	{
1561	#if MATRIX_MULT
1562	Int iSize = iWidth;
1563	#if !REMOVE_NSQT
1564	if( iWidth != iHeight)
1565	{
1566	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
1567	return;
1568	}
1569	#endif
1570	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1571	#else
1572	Int j;
1573	{
1574	short block[ 64 * 64 ];
1575	short coeff[ 64 * 64 ];
1576	{
1577	for (j = 0; j < iHeight; j++)
1578	{
1579	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
1580	}
1581	}
1582	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
1583	for ( j = 0; j < iHeight * iWidth; j++ )
1584	{
1585	psCoeff[ j ] = coeff[ j ];
1586	}
1587	return ;
1588	}
1589	#endif
1590	}
1591
1592
1593	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1594	* \param plCoef input data (transform coefficients)
1595	* \param pResidual output data (residual)
1596	* \param uiStride stride of input residual data
1597	* \param iSize transform size (iSize x iSize)
1598	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1599	*/
1600	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1601	{
1602	#if MATRIX_MULT
1603	Int iSize = iWidth;
1604	#if !REMOVE_NSQT
1605	if( iWidth != iHeight )
1606	{
1607	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
1608	return;
1609	}
1610	#endif
1611	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1612	#else
1613	Int j;
1614	{
1615	short block[ 64 * 64 ];
1616	short coeff[ 64 * 64 ];
1617	for ( j = 0; j < iHeight * iWidth; j++ )
1618	{
1619	coeff[j] = (short)plCoef[j];
1620	}
1621	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
1622	{
1623	for ( j = 0; j < iHeight; j++ )
1624	{
1625	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
1626	}
1627	}
1628	return ;
1629	}
1630	#endif
1631	}
1632
1633	/** Wrapper function between HM interface and core 4x4 transform skipping
1634	* \param piBlkResi input data (residual)
1635	* \param psCoeff output data (transform coefficients)
1636	* \param uiStride stride of input residual data
1637	* \param iSize transform size (iSize x iSize)
1638	*/
1639	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1640	{
1641	assert( width == height );
1642	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1643	#if FULL_NBIT
1644	UInt uiBitDepth = g_uiBitDepth;
1645	#else
1646	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1647	#endif
1648	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1649	UInt transformSkipShift;
1650	Int j,k;
1651	if(shift >= 0)
1652	{
1653	transformSkipShift = shift;
1654	for (j = 0; j < height; j++)
1655	{
1656	for(k = 0; k < width; k ++)
1657	{
1658	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1659	}
1660	}
1661	}
1662	else
1663	{
1664	//The case when uiBitDepth > 13
1665	Int offset;
1666	transformSkipShift = -shift;
1667	offset = (1 << (transformSkipShift - 1));
1668	for (j = 0; j < height; j++)
1669	{
1670	for(k = 0; k < width; k ++)
1671	{
1672	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1673	}
1674	}
1675	}
1676	}
1677
1678	/** Wrapper function between HM interface and core NxN transform skipping
1679	* \param plCoef input data (coefficients)
1680	* \param pResidual output data (residual)
1681	* \param uiStride stride of input residual data
1682	* \param iSize transform size (iSize x iSize)
1683	*/
1684	Void TComTrQuant::xITransformSkip( Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1685	{
1686	assert( width == height );
1687	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1688	#if FULL_NBIT
1689	UInt uiBitDepth = g_uiBitDepth;
1690	#else
1691	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1692	#endif
1693	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1694	UInt transformSkipShift;
1695	Int j,k;
1696	if(shift > 0)
1697	{
1698	Int offset;
1699	transformSkipShift = shift;
1700	offset = (1 << (transformSkipShift -1));
1701	for ( j = 0; j < height; j++ )
1702	{
1703	for(k = 0; k < width; k ++)
1704	{
1705	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1706	}
1707	}
1708	}
1709	else
1710	{
1711	//The case when uiBitDepth >= 13
1712	transformSkipShift = - shift;
1713	for ( j = 0; j < height; j++ )
1714	{
1715	for(k = 0; k < width; k ++)
1716	{
1717	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1718	}
1719	}
1720	}
1721	}
1722
1723	/** RDOQ with CABAC
1724	* \param pcCU pointer to coding unit structure
1725	* \param plSrcCoeff pointer to input buffer
1726	* \param piDstCoeff reference to pointer to output buffer
1727	* \param uiWidth block width
1728	* \param uiHeight block height
1729	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1730	* \param eTType plane type / luminance or chrominance
1731	* \param uiAbsPartIdx absolute partition index
1732	* \returns Void
1733	* Rate distortion optimized quantization for entropy
1734	* coding engines using probability models like CABAC
1735	*/
1736	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1737	Int* plSrcCoeff,
1738	TCoeff* piDstCoeff,
1739	#if ADAPTIVE_QP_SELECTION
1740	Int*& piArlDstCoeff,
1741	#endif
1742	UInt uiWidth,
1743	UInt uiHeight,
1744	UInt& uiAbsSum,
1745	TextType eTType,
1746	UInt uiAbsPartIdx )
1747	{
1748	Int iQBits = m_cQP.m_iBits;
1749	Double dTemp = 0;
1750	UInt dir = SCALING_LIST_SQT;
1751	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1752	Int uiQ = g_quantScales[m_cQP.rem()];
1753	#if !REMOVE_NSQT
1754	if (uiWidth != uiHeight)
1755	{
1756	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
1757	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1758	}
1759	#endif
1760
1761	#if FULL_NBIT
1762	UInt uiBitDepth = g_uiBitDepth;
1763	#else
1764	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1765	#endif
1766	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1767	UInt uiGoRiceParam = 0;
1768	Double d64BlockUncodedCost = 0;
1769	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1770	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1771	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1772	assert(scalingListType < 6);
1773
1774	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1775	double dErrScale = 0;
1776	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
1777	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1778	Int *piQCoef = piQCoefOrg;
1779	double *pdErrScale = pdErrScaleOrg;
1780	#if ADAPTIVE_QP_SELECTION
1781	Int iQBitsC = iQBits - ARL_C_PRECISION;
1782	Int iAddC = 1 << (iQBitsC-1);
1783	#endif
1784	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1785	if (uiScanIdx == SCAN_ZIGZAG)
1786	{
1787	// Map value zigzag to diagonal scan
1788	uiScanIdx = SCAN_DIAG;
1789	}
1790	Int blockType = uiLog2BlkSize;
1791	#if !REMOVE_NSQT
1792	if (uiWidth != uiHeight)
1793	{
1794	uiScanIdx = SCAN_DIAG;
1795	blockType = 4;
1796	}
1797	#endif
1798
1799	#if ADAPTIVE_QP_SELECTION
1800	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1801	#endif
1802
1803	Double pdCostCoeff [ 32 * 32 ];
1804	Double pdCostSig [ 32 * 32 ];
1805	Double pdCostCoeff0[ 32 * 32 ];
1806	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1807	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1808	Int rateIncUp [ 32 * 32 ];
1809	Int rateIncDown [ 32 * 32 ];
1810	Int sigRateDelta[ 32 * 32 ];
1811	Int deltaU [ 32 * 32 ];
1812	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1813	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1814	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1815	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1816
1817	const UInt * scanCG;
1818	#if !REMOVE_NSQT
1819	if (uiWidth == uiHeight)
1820	#endif
1821	{
1822	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1823	if( uiLog2BlkSize == 3 )
1824	{
1825	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1826	}
1827	else if( uiLog2BlkSize == 5 )
1828	{
1829	scanCG = g_sigLastScanCG32x32;
1830	}
1831	}
1832	#if !REMOVE_NSQT
1833	else
1834	{
1835	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
1836	}
1837	#endif
1838	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1839	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1840	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1841	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1842	Int iCGLastScanPos = -1;
1843
1844	UInt uiCtxSet = 0;
1845	Int c1 = 1;
1846	Int c2 = 0;
1847	#if !REMOVE_NUM_GREATER1
1848	UInt uiNumOne = 0;
1849	#endif
1850	Double d64BaseCost = 0;
1851	Int iLastScanPos = -1;
1852	dTemp = dErrScale;
1853
1854	UInt c1Idx = 0;
1855	UInt c2Idx = 0;
1856	Int baseLevel;
1857
1858	#if REMOVE_NSQT
1859	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1860	#else
1861	const UInt * scan;
1862	if (uiWidth == uiHeight)
1863	{
1864	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1865	}
1866	else
1867	{
1868	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
1869	}
1870	#endif
1871
1872	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1873	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1874
1875	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1876	Int iScanPos;
1877	coeffGroupRDStats rdStats;
1878
1879	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1880	{
1881	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1882	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1883	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1884	#if !REMOVAL_8x2_2x8_CG
1885	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
1886	{
1887	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
1888	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
1889	}
1890	#endif
1891	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1892
1893	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1894	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1895	{
1896	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1897	//===== quantization =====
1898	UInt uiBlkPos = scan[iScanPos];
1899	// set coeff
1900	uiQ = piQCoef[uiBlkPos];
1901	dTemp = pdErrScale[uiBlkPos];
1902	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1903	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1904	#if ADAPTIVE_QP_SELECTION
1905	if( m_bUseAdaptQpSelect )
1906	{
1907	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1908	}
1909	#endif
1910	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1911
1912	Double dErr = Double( lLevelDouble );
1913	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1914	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1915	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1916
1917	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1918	{
1919	iLastScanPos = iScanPos;
1920	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1921	iCGLastScanPos = iCGScanPos;
1922	}
1923
1924	if ( iLastScanPos >= 0 )
1925	{
1926	//===== coefficient level estimation =====
1927	UInt uiLevel;
1928	UInt uiOneCtx = 4 * uiCtxSet + c1;
1929	UInt uiAbsCtx = uiCtxSet + c2;
1930
1931	if( iScanPos == iLastScanPos )
1932	{
1933	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1934	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1935	c1Idx, c2Idx, iQBits, dTemp, 1 );
1936	}
1937	else
1938	{
1939	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1940	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1941	#if REMOVAL_8x2_2x8_CG
1942	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1943	#else
1944	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1945	#endif
1946	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1947	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1948	c1Idx, c2Idx, iQBits, dTemp, 0 );
1949	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1950	}
1951	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1952	if( uiLevel > 0 )
1953	{
1954	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1955	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1956	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1957	}
1958	else // uiLevel == 0
1959	{
1960	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1961	}
1962	piDstCoeff[ uiBlkPos ] = uiLevel;
1963	d64BaseCost += pdCostCoeff [ iScanPos ];
1964
1965
1966	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1967	if( uiLevel >= baseLevel )
1968	{
1969	if(uiLevel > 3*(1<<uiGoRiceParam))
1970	{
1971	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1972	}
1973	}
1974	if ( uiLevel >= 1)
1975	{
1976	c1Idx ++;
1977	}
1978
1979	//===== update bin model =====
1980	if( uiLevel > 1 )
1981	{
1982	c1 = 0;
1983	c2 += (c2 < 2);
1984	#if !REMOVE_NUM_GREATER1
1985	uiNumOne++;
1986	#endif
1987	c2Idx ++;
1988	}
1989	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1990	{
1991	c1++;
1992	}
1993
1994	//===== context set update =====
1995	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1996	{
1997	#if !REMOVE_NUM_GREATER1
1998	c1 = 1;
1999	#endif
2000	c2 = 0;
2001	uiGoRiceParam = 0;
2002
2003	c1Idx = 0;
2004	c2Idx = 0;
2005	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
2006	#if REMOVE_NUM_GREATER1
2007	if( c1 == 0 )
2008	#else
2009	if( uiNumOne > 0 )
2010	#endif
2011	{
2012	uiCtxSet++;
2013	}
2014	#if REMOVE_NUM_GREATER1
2015	c1 = 1;
2016	#else
2017	uiNumOne >>= 1;
2018	#endif
2019	}
2020	}
2021	else
2022	{
2023	d64BaseCost += pdCostCoeff0[ iScanPos ];
2024	}
2025	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2026	if (iScanPosinCG == 0 )
2027	{
2028	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2029	}
2030	if (piDstCoeff[ uiBlkPos ] )
2031	{
2032	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2033	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2034	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2035	if ( iScanPosinCG != 0 )
2036	{
2037	rdStats.iNNZbeforePos0++;
2038	}
2039	}
2040	} //end for (iScanPosinCG)
2041
2042	if (iCGLastScanPos >= 0)
2043	{
2044	if( iCGScanPos )
2045	{
2046	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2047	{
2048	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2049	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2050	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2051	}
2052	else
2053	{
2054	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2055	{
2056	if ( rdStats.iNNZbeforePos0 == 0 )
2057	{
2058	d64BaseCost -= rdStats.d64SigCost_0;
2059	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2060	}
2061	// rd-cost if SigCoeffGroupFlag = 0, initialization
2062	Double d64CostZeroCG = d64BaseCost;
2063
2064	// add SigCoeffGroupFlag cost to total cost
2065	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2066	if (iCGScanPos < iCGLastScanPos)
2067	{
2068	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2069	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2070	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2071	}
2072
2073	// try to convert the current coeff group from non-zero to all-zero
2074	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2075	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2076	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2077
2078	// if we can save cost, change this block to all-zero block
2079	if ( d64CostZeroCG < d64BaseCost )
2080	{
2081	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2082	d64BaseCost = d64CostZeroCG;
2083	if (iCGScanPos < iCGLastScanPos)
2084	{
2085	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2086	}
2087	// reset coeffs to 0 in this block
2088	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2089	{
2090	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2091	UInt uiBlkPos = scan[ iScanPos ];
2092
2093	if (piDstCoeff[ uiBlkPos ])
2094	{
2095	piDstCoeff [ uiBlkPos ] = 0;
2096	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2097	pdCostSig [ iScanPos ] = 0;
2098	}
2099	}
2100	} // end if ( d64CostAllZeros < d64BaseCost )
2101	}
2102	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2103	}
2104	else
2105	{
2106	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2107	}
2108	}
2109	} //end for (iCGScanPos)
2110
2111	//===== estimate last position =====
2112	if ( iLastScanPos < 0 )
2113	{
2114	return;
2115	}
2116
2117	Double d64BestCost = 0;
2118	Int ui16CtxCbf = 0;
2119	Int iBestLastIdxP1 = 0;
2120	#if NO_RESIDUAL_FLAG_FOR_BLPRED
2121	if( (!pcCU->isIntra( uiAbsPartIdx ) \|\| pcCU->isIntraBL( uiAbsPartIdx )) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2122	#else
2123	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2124	#endif
2125	{
2126	ui16CtxCbf = 0;
2127	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2128	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2129	}
2130	else
2131	{
2132	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
2133	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
2134	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2135	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2136	}
2137
2138	Bool bFoundLast = false;
2139	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2140	{
2141	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
2142
2143	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2144	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2145	{
2146	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2147	{
2148	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2149	if (iScanPos > iLastScanPos) continue;
2150	UInt uiBlkPos = scan[iScanPos];
2151
2152	if( piDstCoeff[ uiBlkPos ] )
2153	{
2154	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
2155	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
2156
2157	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
2158	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2159
2160	if( totalCost < d64BestCost )
2161	{
2162	iBestLastIdxP1 = iScanPos + 1;
2163	d64BestCost = totalCost;
2164	}
2165	if( piDstCoeff[ uiBlkPos ] > 1 )
2166	{
2167	bFoundLast = true;
2168	break;
2169	}
2170	d64BaseCost -= pdCostCoeff[ iScanPos ];
2171	d64BaseCost += pdCostCoeff0[ iScanPos ];
2172	}
2173	else
2174	{
2175	d64BaseCost -= pdCostSig[ iScanPos ];
2176	}
2177	} //end for
2178	if (bFoundLast)
2179	{
2180	break;
2181	}
2182	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2183	} // end for
2184
2185	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2186	{
2187	Int blkPos = scan[ scanPos ];
2188	Int level = piDstCoeff[ blkPos ];
2189	uiAbsSum += level;
2190	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2191	}
2192
2193	//===== clean uncoded coefficients =====
2194	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2195	{
2196	piDstCoeff[ scan[ scanPos ] ] = 0;
2197	}
2198
2199	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2200	{
2201	Int64 rdFactor = (Int64)((Double)(g_invQuantScales[m_cQP.rem()])(Double)(g_invQuantScales[m_cQP.rem()])(Double)(1<<(2m_cQP.m_iPer))/m_dLambda/16/(Double)(1<<(2g_uiBitIncrement)) + 0.5);
2202	Int lastCG = -1;
2203	Int absSum = 0 ;
2204	Int n ;
2205
2206	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
2207	{
2208	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
2209	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
2210	absSum = 0 ;
2211
2212	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
2213	{
2214	if( piDstCoeff[ scan[ n + subPos ]] )
2215	{
2216	lastNZPosInCG = n;
2217	break;
2218	}
2219	}
2220
2221	for(n = 0; n <SCAN_SET_SIZE; n++ )
2222	{
2223	if( piDstCoeff[ scan[ n + subPos ]] )
2224	{
2225	firstNZPosInCG = n;
2226	break;
2227	}
2228	}
2229
2230	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2231	{
2232	absSum += piDstCoeff[ scan[ n + subPos ]];
2233	}
2234
2235	if(lastNZPosInCG>=0 && lastCG==-1)
2236	{
2237	lastCG = 1;
2238	}
2239
2240	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2241	{
2242	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
2243	if( signbit!=(absSum&0x1) ) // hide but need tune
2244	{
2245	// calculate the cost
2246	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
2247	Int minPos =-1, finalChange=0, curChange=0;
2248
2249	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
2250	{
2251	UInt uiBlkPos = scan[ n + subPos ];
2252	if(piDstCoeff[ uiBlkPos ] != 0 )
2253	{
2254	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
2255	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2256	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
2257
2258	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2259	{
2260	costDown -= (4<<15) ;
2261	}
2262
2263	if(costUp<costDown)
2264	{
2265	curCost = costUp;
2266	curChange = 1 ;
2267	}
2268	else
2269	{
2270	curChange = -1 ;
2271	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2272	{
2273	curCost = MAX_INT64 ;
2274	}
2275	else
2276	{
2277	curCost = costDown ;
2278	}
2279	}
2280	}
2281	else
2282	{
2283	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2284	curChange = 1 ;
2285
2286	if(n<firstNZPosInCG)
2287	{
2288	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2289	if(thissignbit != signbit )
2290	{
2291	curCost = MAX_INT64;
2292	}
2293	}
2294	}
2295
2296	if( curCost<minCostInc)
2297	{
2298	minCostInc = curCost ;
2299	finalChange = curChange ;
2300	minPos = uiBlkPos ;
2301	}
2302	}
2303
2304	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2305	{
2306	finalChange = -1;
2307	}
2308
2309	if(plSrcCoeff[minPos]>=0)
2310	{
2311	piDstCoeff[minPos] += finalChange ;
2312	}
2313	else
2314	{
2315	piDstCoeff[minPos] -= finalChange ;
2316	}
2317	}
2318	}
2319
2320	if(lastCG==1)
2321	{
2322	lastCG=0 ;
2323	}
2324	}
2325	}
2326	}
2327
2328	/** Pattern decision for context derivation process of significant_coeff_flag
2329	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2330	* \param posXCG column of current coefficient group
2331	* \param posYCG row of current coefficient group
2332	* \param width width of the block
2333	* \param height height of the block
2334	* \returns pattern for current coefficient group
2335	*/
2336	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2337	{
2338	#if REMOVAL_8x2_2x8_CG
2339	if( width == 4 && height == 4 ) return -1;
2340	#else
2341	if( width == height && width <= 8 ) return -1;
2342	#endif
2343
2344	UInt sigRight = 0;
2345	UInt sigLower = 0;
2346
2347	width >>= 2;
2348	height >>= 2;
2349	if( posXCG < width - 1 )
2350	{
2351	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2352	}
2353	if (posYCG < height - 1 )
2354	{
2355	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2356	}
2357	return sigRight + (sigLower<<1);
2358	}
2359
2360	/** Context derivation process of coeff_abs_significant_flag
2361	* \param patternSigCtx pattern for current coefficient group
2362	* \param posX column of current scan position
2363	* \param posY row of current scan position
2364	* \param blockType log2 value of block size if square block, or 4 otherwise
2365	* \param width width of the block
2366	* \param height height of the block
2367	* \param textureType texture type (TEXT_LUMA...)
2368	* \returns ctxInc for current scan position
2369	*/
2370	Int TComTrQuant::getSigCtxInc (
2371	Int patternSigCtx,
2372	#if REMOVAL_8x2_2x8_CG
2373	UInt scanIdx,
2374	#endif
2375	Int posX,
2376	Int posY,
2377	Int blockType,
2378	Int width
2379	,Int height
2380	,TextType textureType
2381	)
2382	{
2383	const Int ctxIndMap[16] =
2384	{
2385	0, 1, 4, 5,
2386	2, 3, 4, 5,
2387	6, 6, 8, 8,
2388	7, 7, 8, 8
2389	};
2390
2391	if( posX + posY == 0 )
2392	{
2393	return 0;
2394	}
2395
2396	if ( blockType == 2 )
2397	{
2398	return ctxIndMap[ 4 * posY + posX ];
2399	}
2400
2401	#if !REMOVAL_8x2_2x8_CG
2402	if ( blockType == 3 )
2403	{
2404	return 9 + ctxIndMap[ 4 * (posY >> 1) + (posX >> 1) ];
2405	}
2406
2407	Int offset = 18;
2408	#else
2409	Int offset = blockType == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2410	#endif
2411
2412	Int posXinSubset = posX-((posX>>2)<<2);
2413	Int posYinSubset = posY-((posY>>2)<<2);
2414	Int cnt = 0;
2415	if(patternSigCtx==0)
2416	{
2417	#if REMOVAL_8x2_2x8_CG
2418	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2419	#else
2420	cnt = posXinSubset+posYinSubset<=2 ? 1 : 0;
2421	#endif
2422	}
2423	else if(patternSigCtx==1)
2424	{
2425	#if REMOVAL_8x2_2x8_CG
2426	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2427	#else
2428	cnt = posYinSubset<=1 ? 1 : 0;
2429	#endif
2430	}
2431	else if(patternSigCtx==2)
2432	{
2433	#if REMOVAL_8x2_2x8_CG
2434	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2435	#else
2436	cnt = posXinSubset<=1 ? 1 : 0;
2437	#endif
2438	}
2439	else
2440	{
2441	#if REMOVAL_8x2_2x8_CG
2442	cnt = 2;
2443	#else
2444	cnt = posXinSubset+posYinSubset<=4 ? 2 : 1;
2445	#endif
2446	}
2447
2448	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2449	}
2450
2451	/** Get the best level in RD sense
2452	* \param rd64CodedCost reference to coded cost
2453	* \param rd64CodedCost0 reference to cost when coefficient is 0
2454	* \param rd64CodedCostSig reference to cost of significant coefficient
2455	* \param lLevelDouble reference to unscaled quantized level
2456	* \param uiMaxAbsLevel scaled quantized level
2457	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2458	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2459	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2460	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2461	* \param iQBits quantization step size
2462	* \param dTemp correction factor
2463	* \param bLast indicates if the coefficient is the last significant
2464	* \returns best quantized transform level for given scan position
2465	* This method calculates the best quantized transform level for a given scan position.
2466	*/
2467	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2468	Double& rd64CodedCost0,
2469	Double& rd64CodedCostSig,
2470	Int lLevelDouble,
2471	UInt uiMaxAbsLevel,
2472	UShort ui16CtxNumSig,
2473	UShort ui16CtxNumOne,
2474	UShort ui16CtxNumAbs,
2475	UShort ui16AbsGoRice,
2476	UInt c1Idx,
2477	UInt c2Idx,
2478	Int iQBits,
2479	Double dTemp,
2480	Bool bLast ) const
2481	{
2482	Double dCurrCostSig = 0;
2483	UInt uiBestAbsLevel = 0;
2484
2485	if( !bLast && uiMaxAbsLevel < 3 )
2486	{
2487	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2488	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2489	if( uiMaxAbsLevel == 0 )
2490	{
2491	return uiBestAbsLevel;
2492	}
2493	}
2494	else
2495	{
2496	rd64CodedCost = MAX_DOUBLE;
2497	}
2498
2499	if( !bLast )
2500	{
2501	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2502	}
2503
2504	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2505	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2506	{
2507	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2508	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2509	dCurrCost += dCurrCostSig;
2510
2511	if( dCurrCost < rd64CodedCost )
2512	{
2513	uiBestAbsLevel = uiAbsLevel;
2514	rd64CodedCost = dCurrCost;
2515	rd64CodedCostSig = dCurrCostSig;
2516	}
2517	}
2518
2519	return uiBestAbsLevel;
2520	}
2521
2522	/** Calculates the cost for specific absolute transform level
2523	* \param uiAbsLevel scaled quantized level
2524	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2525	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2526	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2527	* \returns cost of given absolute transform level
2528	*/
2529	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2530	UShort ui16CtxNumOne,
2531	UShort ui16CtxNumAbs,
2532	UShort ui16AbsGoRice
2533	, UInt c1Idx,
2534	UInt c2Idx
2535	) const
2536	{
2537	Double iRate = xGetIEPRate();
2538	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2539
2540	if ( uiAbsLevel >= baseLevel )
2541	{
2542	UInt symbol = uiAbsLevel - baseLevel;
2543	UInt length;
2544	#if COEF_REMAIN_BIN_REDUCTION
2545	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2546	#else
2547	if (symbol < (8 << ui16AbsGoRice))
2548	#endif
2549	{
2550	length = symbol>>ui16AbsGoRice;
2551	iRate += (length+1+ui16AbsGoRice)<< 15;
2552	}
2553	else
2554	{
2555	length = ui16AbsGoRice;
2556	#if COEF_REMAIN_BIN_REDUCTION
2557	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2558	#else
2559	symbol = symbol - ( 8 << ui16AbsGoRice);
2560	#endif
2561	while (symbol >= (1<<length))
2562	{
2563	symbol -= (1<<(length++));
2564	}
2565	#if COEF_REMAIN_BIN_REDUCTION
2566	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2567	#else
2568	iRate += (8+length+1-ui16AbsGoRice+length)<< 15;
2569	#endif
2570	}
2571	if (c1Idx < C1FLAG_NUMBER)
2572	{
2573	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2574
2575	if (c2Idx < C2FLAG_NUMBER)
2576	{
2577	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2578	}
2579	}
2580	}
2581	else
2582	if( uiAbsLevel == 1 )
2583	{
2584	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2585	}
2586	else if( uiAbsLevel == 2 )
2587	{
2588	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2589	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2590	}
2591	else
2592	{
2593	assert (0);
2594	}
2595	return xGetICost( iRate );
2596	}
2597
2598	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2599	UShort ui16CtxNumOne,
2600	UShort ui16CtxNumAbs,
2601	UShort ui16AbsGoRice
2602	, UInt c1Idx,
2603	UInt c2Idx
2604	) const
2605	{
2606	Int iRate = 0;
2607	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2608
2609	if ( uiAbsLevel >= baseLevel )
2610	{
2611	UInt uiSymbol = uiAbsLevel - baseLevel;
2612	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2613	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2614
2615	if( bExpGolomb )
2616	{
2617	uiAbsLevel = uiSymbol - uiMaxVlc;
2618	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2619	iRate += iEGS << 15;
2620	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2621	}
2622
2623	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2624	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2625
2626	iRate += ui16NumBins << 15;
2627
2628	if (c1Idx < C1FLAG_NUMBER)
2629	{
2630	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2631
2632	if (c2Idx < C2FLAG_NUMBER)
2633	{
2634	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2635	}
2636	}
2637	}
2638	else
2639	if( uiAbsLevel == 0 )
2640	{
2641	return 0;
2642	}
2643	else if( uiAbsLevel == 1 )
2644	{
2645	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2646	}
2647	else if( uiAbsLevel == 2 )
2648	{
2649	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2650	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2651	}
2652	else
2653	{
2654	assert(0);
2655	}
2656	return iRate;
2657	}
2658
2659	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2660	UShort ui16CtxNumSig ) const
2661	{
2662	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2663	}
2664
2665	/** Calculates the cost of signaling the last significant coefficient in the block
2666	* \param uiPosX X coordinate of the last significant coefficient
2667	* \param uiPosY Y coordinate of the last significant coefficient
2668	* \returns cost of last significant coefficient
2669	*/
2670	/*
2671	* \param uiWidth width of the transform unit (TU)
2672	*/
2673	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2674	const UInt uiPosY,
2675	const UInt uiBlkWdth ) const
2676	{
2677	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2678	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2679	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2680	if( uiCtxX > 3 )
2681	{
2682	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2683	}
2684	if( uiCtxY > 3 )
2685	{
2686	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2687	}
2688	return xGetICost( uiCost );
2689	}
2690
2691	/** Calculates the cost for specific absolute transform level
2692	* \param uiAbsLevel scaled quantized level
2693	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2694	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2695	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2696	* \returns cost of given absolute transform level
2697	*/
2698	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2699	UShort ui16CtxNumSig ) const
2700	{
2701	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2702	}
2703
2704	/** Get the cost for a specific rate
2705	* \param dRate rate of a bit
2706	* \returns cost at the specific rate
2707	*/
2708	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2709	{
2710	return m_dLambda * dRate;
2711	}
2712
2713	/** Get the cost of an equal probable bit
2714	* \returns cost of equal probable bit
2715	*/
2716	__inline Double TComTrQuant::xGetIEPRate ( ) const
2717	{
2718	return 32768;
2719	}
2720
2721	/** Context derivation process of coeff_abs_significant_flag
2722	* \param uiSigCoeffGroupFlag significance map of L1
2723	* \param uiBlkX column of current scan position
2724	* \param uiBlkY row of current scan position
2725	* \param uiLog2BlkSize log2 value of block size
2726	* \returns ctxInc for current scan position
2727	*/
2728	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2729	const UInt uiCGPosX,
2730	const UInt uiCGPosY,
2731	const UInt scanIdx,
2732	Int width, Int height)
2733	{
2734	UInt uiRight = 0;
2735	UInt uiLower = 0;
2736
2737	width >>= 2;
2738	height >>= 2;
2739	#if !REMOVAL_8x2_2x8_CG
2740	if( width == 2 && height == 2 ) // 8x8
2741	{
2742	if( scanIdx == SCAN_HOR )
2743	{
2744	width = 1;
2745	height = 4;
2746	}
2747	else if( scanIdx == SCAN_VER )
2748	{
2749	width = 4;
2750	height = 1;
2751	}
2752	}
2753	#endif
2754	if( uiCGPosX < width - 1 )
2755	{
2756	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2757	}
2758	if (uiCGPosY < height - 1 )
2759	{
2760	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2761	}
2762	return (uiRight \|\| uiLower);
2763
2764	}
2765	/** set quantized matrix coefficient for encode
2766	* \param scalingList quantaized matrix address
2767	*/
2768	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2769	{
2770	UInt size,list;
2771	UInt qp;
2772
2773	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2774	{
2775	for(list = 0; list < g_scalingListNum[size]; list++)
2776	{
2777	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2778	{
2779	xSetScalingListEnc(scalingList,list,size,qp);
2780	xSetScalingListDec(scalingList,list,size,qp);
2781	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2782	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2783	{
2784	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2785	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2786	}
2787	}
2788	}
2789	}
2790	}
2791	/** set quantized matrix coefficient for decode
2792	* \param scalingList quantaized matrix address
2793	*/
2794	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2795	{
2796	UInt size,list;
2797	UInt qp;
2798
2799	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2800	{
2801	for(list = 0; list < g_scalingListNum[size]; list++)
2802	{
2803	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2804	{
2805	xSetScalingListDec(scalingList,list,size,qp);
2806	}
2807	}
2808	}
2809	}
2810	/** set error scale coefficients
2811	* \param list List ID
2812	* \param uiSize Size
2813	* \param uiQP Quantization parameter
2814	*/
2815	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
2816	{
2817
2818	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2819	#if FULL_NBIT
2820	UInt uiBitDepth = g_uiBitDepth;
2821	#else
2822	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
2823	#endif
2824
2825	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
2826
2827	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2828	Int *piQuantcoeff;
2829	double *pdErrScale;
2830	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
2831	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
2832
2833	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2834	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2835	for(i=0;i<uiMaxNumCoeff;i++)
2836	{
2837	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
2838	}
2839	}
2840
2841	/** set quantized matrix coefficient for encode
2842	* \param scalingList quantaized matrix address
2843	* \param listId List index
2844	* \param sizeId size index
2845	* \param uiQP Quantization parameter
2846	*/
2847	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2848	{
2849	UInt width = g_scalingListSizeX[sizeId];
2850	UInt height = g_scalingListSizeX[sizeId];
2851	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2852	Int *quantcoeff;
2853	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2854	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
2855
2856	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2857
2858	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
2859	{
2860	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2861	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2862
2863	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2864	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2865	}
2866	}
2867	/** set quantized matrix coefficient for decode
2868	* \param scalingList quantaized matrix address
2869	* \param list List index
2870	* \param size size index
2871	* \param uiQP Quantization parameter
2872	*/
2873	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2874	{
2875	UInt width = g_scalingListSizeX[sizeId];
2876	UInt height = g_scalingListSizeX[sizeId];
2877	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2878	Int *dequantcoeff;
2879	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2880
2881	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
2882	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2883
2884	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
2885	{
2886	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2887	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2888
2889	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2890
2891	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2892	}
2893	}
2894
2895	/** set flat matrix value to quantized coefficient
2896	*/
2897	Void TComTrQuant::setFlatScalingList()
2898	{
2899	UInt size,list;
2900	UInt qp;
2901
2902	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2903	{
2904	for(list = 0; list < g_scalingListNum[size]; list++)
2905	{
2906	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2907	{
2908	xsetFlatScalingList(list,size,qp);
2909	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2910	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2911	{
2912	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2913	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2914	}
2915	}
2916	}
2917	}
2918	}
2919
2920	/** set flat matrix value to quantized coefficient
2921	* \param list List ID
2922	* \param uiQP Quantization parameter
2923	* \param uiSize Size
2924	*/
2925	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2926	{
2927	UInt i,num = g_scalingListSize[size];
2928	UInt numDiv4 = num>>2;
2929	Int *quantcoeff;
2930	Int *dequantcoeff;
2931	Int quantScales = g_quantScales[qp];
2932	Int invQuantScales = g_invQuantScales[qp]<<4;
2933
2934	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
2935	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
2936
2937	for(i=0;i<num;i++)
2938	{
2939	*quantcoeff++ = quantScales;
2940	*dequantcoeff++ = invQuantScales;
2941	}
2942
2943	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2944	{
2945	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2946	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2947
2948	for(i=0;i<numDiv4;i++)
2949	{
2950	*quantcoeff++ = quantScales;
2951	*dequantcoeff++ = invQuantScales;
2952	}
2953	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2954	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2955
2956	for(i=0;i<numDiv4;i++)
2957	{
2958	*quantcoeff++ = quantScales;
2959	*dequantcoeff++ = invQuantScales;
2960	}
2961	}
2962	}
2963
2964	/** set quantized matrix coefficient for encode
2965	* \param coeff quantaized matrix address
2966	* \param quantcoeff quantaized matrix address
2967	* \param quantScales Q(QP%6)
2968	* \param height height
2969	* \param width width
2970	* \param ratio ratio for upscale
2971	* \param sizuNum matrix size
2972	* \param dc dc parameter
2973	*/
2974	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2975	{
2976	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2977	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2978	for(UInt j=0;j<height;j++)
2979	{
2980	for(UInt i=0;i<width;i++)
2981	{
2982	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2983	}
2984	}
2985	if(ratio > 1)
2986	{
2987	quantcoeff[0] = quantScales / dc;
2988	}
2989	}
2990	/** set quantized matrix coefficient for decode
2991	* \param coeff quantaized matrix address
2992	* \param dequantcoeff quantaized matrix address
2993	* \param invQuantScales IQ(QP%6))
2994	* \param height height
2995	* \param width width
2996	* \param ratio ratio for upscale
2997	* \param sizuNum matrix size
2998	* \param dc dc parameter
2999	*/
3000	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3001	{
3002	#if !REMOVE_NSQT
3003	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
3004	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
3005	#endif
3006	for(UInt j=0;j<height;j++)
3007	{
3008	for(UInt i=0;i<width;i++)
3009	{
3010	#if REMOVE_NSQT
3011	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
3012	#else
3013	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
3014	#endif
3015	}
3016	}
3017	if(ratio > 1)
3018	{
3019	dequantcoeff[0] = invQuantScales * dc;
3020	}
3021	}
3022
3023	/** initialization process of scaling list array
3024	*/
3025	Void TComTrQuant::initScalingList()
3026	{
3027	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3028	{
3029	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3030	{
3031	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3032	{
3033	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3034	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3035	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
3036
3037	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3038	{
3039	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3040	{
3041	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3042	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3043	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
3044	}
3045	}
3046	}
3047	}
3048	}
3049	//copy for NSQT
3050	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3051	{
3052	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3053	{
3054	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
3055	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
3056	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
3057	}
3058	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3059	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3060	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3061	}
3062	}
3063	/** destroy quantization matrix array
3064	*/
3065	Void TComTrQuant::destroyScalingList()
3066	{
3067	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3068	{
3069	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3070	{
3071	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3072	{
3073	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3074	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3075	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
3076	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3077	{
3078	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3079	{
3080	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
3081	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
3082	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
3083	}
3084	}
3085	}
3086	}
3087	}
3088	}
3089
3090	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: