Context navigation

source: SHVCSoftware/trunk/source/Lib/TLibCommon/TComTrQuant.cpp @ 19

Visit:

Last change on this file since 19 was 17, checked in by seregin, 12 years ago
NO_RESIDUAL_FLAG_FOR_BLPRED: Root cbf for Intra_BL (L0437)
File size: 101.8 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_bUseRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	#if CHROMA_QP_EXTENSION
203	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
204	#else
205	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
206	#endif
207
208	if(qpScaled < 0)
209	{
210	qpScaled = qpScaled + qpBdOffset;
211	}
212	else
213	{
214	#if CHROMA_QP_EXTENSION
215	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
216	#else
217	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
218	#endif
219	}
220	}
221	m_cQP.setQpParam( qpScaled );
222	}
223
224	#if MATRIX_MULT
225	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
226	* \param block pointer to input data (residual)
227	* \param coeff pointer to output data (transform coefficients)
228	* \param uiStride stride of input data
229	* \param uiTrSize transform size (uiTrSize x uiTrSize)
230	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
231	*/
232	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
233	{
234	Int i,j,k,iSum;
235	Int tmp[32*32];
236	const short *iT;
237	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
238
239	if (uiTrSize==4)
240	{
241	iT = g_aiT4[0];
242	}
243	else if (uiTrSize==8)
244	{
245	iT = g_aiT8[0];
246	}
247	else if (uiTrSize==16)
248	{
249	iT = g_aiT16[0];
250	}
251	else if (uiTrSize==32)
252	{
253	iT = g_aiT32[0];
254	}
255	else
256	{
257	assert(0);
258	}
259
260	#if FULL_NBIT
261	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
262	#else
263	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
264	#endif
265
266	int add_1st = 1<<(shift_1st-1);
267	int shift_2nd = uiLog2TrSize + 6;
268	int add_2nd = 1<<(shift_2nd-1);
269
270	/* Horizontal transform */
271
272	if (uiTrSize==4)
273	{
274	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
275	{
276	iT = g_as_DST_MAT_4[0];
277	}
278	}
279	for (i=0; i<uiTrSize; i++)
280	{
281	for (j=0; j<uiTrSize; j++)
282	{
283	iSum = 0;
284	for (k=0; k<uiTrSize; k++)
285	{
286	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
287	}
288	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
289	}
290	}
291
292	/* Vertical transform */
293	if (uiTrSize==4)
294	{
295	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
296	{
297	iT = g_as_DST_MAT_4[0];
298	}
299	else
300	{
301	iT = g_aiT4[0];
302	}
303	}
304	for (i=0; i<uiTrSize; i++)
305	{
306	for (j=0; j<uiTrSize; j++)
307	{
308	iSum = 0;
309	for (k=0; k<uiTrSize; k++)
310	{
311	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
312	}
313	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
314	}
315	}
316	}
317
318	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
319	* \param coeff pointer to input data (transform coefficients)
320	* \param block pointer to output data (residual)
321	* \param uiStride stride of output data
322	* \param uiTrSize transform size (uiTrSize x uiTrSize)
323	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
324	*/
325	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
326	{
327	int i,j,k,iSum;
328	Int tmp[32*32];
329	const short *iT;
330
331	if (uiTrSize==4)
332	{
333	iT = g_aiT4[0];
334	}
335	else if (uiTrSize==8)
336	{
337	iT = g_aiT8[0];
338	}
339	else if (uiTrSize==16)
340	{
341	iT = g_aiT16[0];
342	}
343	else if (uiTrSize==32)
344	{
345	iT = g_aiT32[0];
346	}
347	else
348	{
349	assert(0);
350	}
351
352	int shift_1st = SHIFT_INV_1ST;
353	int add_1st = 1<<(shift_1st-1);
354	#if FULL_NBIT
355	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
356	#else
357	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
358	#endif
359	int add_2nd = 1<<(shift_2nd-1);
360	if (uiTrSize==4)
361	{
362	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
363	{
364	iT = g_as_DST_MAT_4[0];
365	}
366	}
367
368	/* Horizontal transform */
369	for (i=0; i<uiTrSize; i++)
370	{
371	for (j=0; j<uiTrSize; j++)
372	{
373	iSum = 0;
374	for (k=0; k<uiTrSize; k++)
375	{
376	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
377	}
378	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
379	}
380	}
381
382	if (uiTrSize==4)
383	{
384	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
385	{
386	iT = g_as_DST_MAT_4[0];
387	}
388	else
389	{
390	iT = g_aiT4[0];
391	}
392	}
393
394	/* Vertical transform */
395	for (i=0; i<uiTrSize; i++)
396	{
397	for (j=0; j<uiTrSize; j++)
398	{
399	iSum = 0;
400	for (k=0; k<uiTrSize; k++)
401	{
402	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
403	}
404	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
405	}
406	}
407	}
408
409	#else //MATRIX_MULT
410
411	/** 4x4 forward transform implemented using partial butterfly structure (1D)
412	* \param src input data (residual)
413	* \param dst output data (transform coefficients)
414	* \param shift specifies right shift after 1D transform
415	*/
416
417	void partialButterfly4(short src,short dst,int shift, int line)
418	{
419	int j;
420	int E[2],O[2];
421	int add = 1<<(shift-1);
422
423	for (j=0; j<line; j++)
424	{
425	/* E and O */
426	E[0] = src[0] + src[3];
427	O[0] = src[0] - src[3];
428	E[1] = src[1] + src[2];
429	O[1] = src[1] - src[2];
430
431	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
432	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
433	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
434	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
435
436	src += 4;
437	dst ++;
438	}
439	}
440
441	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
442	// give identical results
443	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
444	{
445	int i, c[4];
446	int rnd_factor = 1<<(shift-1);
447	for (i=0; i<4; i++)
448	{
449	// Intermediate Variables
450	c[0] = block[4i+0] + block[4i+3];
451	c[1] = block[4i+1] + block[4i+3];
452	c[2] = block[4i+0] - block[4i+1];
453	c[3] = 74* block[4*i+2];
454
455	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
456	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
457	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
458	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
459	}
460	}
461
462	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
463	{
464	int i, c[4];
465	int rnd_factor = 1<<(shift-1);
466	for (i=0; i<4; i++)
467	{
468	// Intermediate Variables
469	c[0] = tmp[ i] + tmp[ 8+i];
470	c[1] = tmp[8+i] + tmp[12+i];
471	c[2] = tmp[ i] - tmp[12+i];
472	c[3] = 74* tmp[4+i];
473
474	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
475	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
476	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
477	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
478	}
479	}
480
481	void partialButterflyInverse4(short src,short dst,int shift, int line)
482	{
483	int j;
484	int E[2],O[2];
485	int add = 1<<(shift-1);
486
487	for (j=0; j<line; j++)
488	{
489	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
490	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
491	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
492	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
493	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
494
495	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
496	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
497	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
498	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
499	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
500
501	src ++;
502	dst += 4;
503	}
504	}
505
506
507	void partialButterfly8(short src,short dst,int shift, int line)
508	{
509	int j,k;
510	int E[4],O[4];
511	int EE[2],EO[2];
512	int add = 1<<(shift-1);
513
514	for (j=0; j<line; j++)
515	{
516	/* E and O*/
517	for (k=0;k<4;k++)
518	{
519	E[k] = src[k] + src[7-k];
520	O[k] = src[k] - src[7-k];
521	}
522	/* EE and EO */
523	EE[0] = E[0] + E[3];
524	EO[0] = E[0] - E[3];
525	EE[1] = E[1] + E[2];
526	EO[1] = E[1] - E[2];
527
528	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
529	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
530	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
531	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
532
533	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
534	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
535	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
536	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
537
538	src += 8;
539	dst ++;
540	}
541	}
542
543
544	void partialButterflyInverse8(short src,short dst,int shift, int line)
545	{
546	int j,k;
547	int E[4],O[4];
548	int EE[2],EO[2];
549	int add = 1<<(shift-1);
550
551	for (j=0; j<line; j++)
552	{
553	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
554	for (k=0;k<4;k++)
555	{
556	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
557	}
558
559	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
560	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
561	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
562	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
563
564	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
565	E[0] = EE[0] + EO[0];
566	E[3] = EE[0] - EO[0];
567	E[1] = EE[1] + EO[1];
568	E[2] = EE[1] - EO[1];
569	for (k=0;k<4;k++)
570	{
571	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
572	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
573	}
574	src ++;
575	dst += 8;
576	}
577	}
578
579
580	void partialButterfly16(short src,short dst,int shift, int line)
581	{
582	int j,k;
583	int E[8],O[8];
584	int EE[4],EO[4];
585	int EEE[2],EEO[2];
586	int add = 1<<(shift-1);
587
588	for (j=0; j<line; j++)
589	{
590	/* E and O*/
591	for (k=0;k<8;k++)
592	{
593	E[k] = src[k] + src[15-k];
594	O[k] = src[k] - src[15-k];
595	}
596	/* EE and EO */
597	for (k=0;k<4;k++)
598	{
599	EE[k] = E[k] + E[7-k];
600	EO[k] = E[k] - E[7-k];
601	}
602	/* EEE and EEO */
603	EEE[0] = EE[0] + EE[3];
604	EEO[0] = EE[0] - EE[3];
605	EEE[1] = EE[1] + EE[2];
606	EEO[1] = EE[1] - EE[2];
607
608	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
609	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
610	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
611	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
612
613	for (k=2;k<16;k+=4)
614	{
615	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
616	}
617
618	for (k=1;k<16;k+=2)
619	{
620	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
621	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
622	}
623
624	src += 16;
625	dst ++;
626
627	}
628	}
629
630
631	void partialButterflyInverse16(short src,short dst,int shift, int line)
632	{
633	int j,k;
634	int E[8],O[8];
635	int EE[4],EO[4];
636	int EEE[2],EEO[2];
637	int add = 1<<(shift-1);
638
639	for (j=0; j<line; j++)
640	{
641	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
642	for (k=0;k<8;k++)
643	{
644	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
645	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
646	}
647	for (k=0;k<4;k++)
648	{
649	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
650	}
651	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
652	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
653	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
654	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
655
656	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
657	for (k=0;k<2;k++)
658	{
659	EE[k] = EEE[k] + EEO[k];
660	EE[k+2] = EEE[1-k] - EEO[1-k];
661	}
662	for (k=0;k<4;k++)
663	{
664	E[k] = EE[k] + EO[k];
665	E[k+4] = EE[3-k] - EO[3-k];
666	}
667	for (k=0;k<8;k++)
668	{
669	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
670	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
671	}
672	src ++;
673	dst += 16;
674	}
675	}
676
677
678	void partialButterfly32(short src,short dst,int shift, int line)
679	{
680	int j,k;
681	int E[16],O[16];
682	int EE[8],EO[8];
683	int EEE[4],EEO[4];
684	int EEEE[2],EEEO[2];
685	int add = 1<<(shift-1);
686
687	for (j=0; j<line; j++)
688	{
689	/* E and O*/
690	for (k=0;k<16;k++)
691	{
692	E[k] = src[k] + src[31-k];
693	O[k] = src[k] - src[31-k];
694	}
695	/* EE and EO */
696	for (k=0;k<8;k++)
697	{
698	EE[k] = E[k] + E[15-k];
699	EO[k] = E[k] - E[15-k];
700	}
701	/* EEE and EEO */
702	for (k=0;k<4;k++)
703	{
704	EEE[k] = EE[k] + EE[7-k];
705	EEO[k] = EE[k] - EE[7-k];
706	}
707	/* EEEE and EEEO */
708	EEEE[0] = EEE[0] + EEE[3];
709	EEEO[0] = EEE[0] - EEE[3];
710	EEEE[1] = EEE[1] + EEE[2];
711	EEEO[1] = EEE[1] - EEE[2];
712
713	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
714	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
715	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
716	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
717	for (k=4;k<32;k+=8)
718	{
719	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
720	}
721	for (k=2;k<32;k+=4)
722	{
723	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
724	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
725	}
726	for (k=1;k<32;k+=2)
727	{
728	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
729	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
730	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
731	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
732	}
733	src += 32;
734	dst ++;
735	}
736	}
737
738
739	void partialButterflyInverse32(short src,short dst,int shift, int line)
740	{
741	int j,k;
742	int E[16],O[16];
743	int EE[8],EO[8];
744	int EEE[4],EEO[4];
745	int EEEE[2],EEEO[2];
746	int add = 1<<(shift-1);
747
748	for (j=0; j<line; j++)
749	{
750	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
751	for (k=0;k<16;k++)
752	{
753	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
754	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
755	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
756	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
757	}
758	for (k=0;k<8;k++)
759	{
760	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
761	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
762	}
763	for (k=0;k<4;k++)
764	{
765	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
766	}
767	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
768	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
769	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
770	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
771
772	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
773	EEE[0] = EEEE[0] + EEEO[0];
774	EEE[3] = EEEE[0] - EEEO[0];
775	EEE[1] = EEEE[1] + EEEO[1];
776	EEE[2] = EEEE[1] - EEEO[1];
777	for (k=0;k<4;k++)
778	{
779	EE[k] = EEE[k] + EEO[k];
780	EE[k+4] = EEE[3-k] - EEO[3-k];
781	}
782	for (k=0;k<8;k++)
783	{
784	E[k] = EE[k] + EO[k];
785	E[k+8] = EE[7-k] - EO[7-k];
786	}
787	for (k=0;k<16;k++)
788	{
789	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
790	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
791	}
792	src ++;
793	dst += 32;
794	}
795	}
796
797	/** MxN forward transform (2D)
798	* \param block input data (residual)
799	* \param coeff output data (transform coefficients)
800	* \param iWidth input data (width of transform)
801	* \param iHeight input data (height of transform)
802	*/
803	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
804	{
805	#if FULL_NBIT
806	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
807	#else
808	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
809	#endif
810	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
811
812	short tmp[ 64 * 64 ];
813
814	#if !REMOVE_NSQT
815	if( iWidth == 16 && iHeight == 4)
816	{
817	partialButterfly16( block, tmp, shift_1st, iHeight );
818	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
819	}
820	else if( iWidth == 32 && iHeight == 8 )
821	{
822	partialButterfly32( block, tmp, shift_1st, iHeight );
823	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
824	}
825	else if( iWidth == 4 && iHeight == 16)
826	{
827	partialButterfly4( block, tmp, shift_1st, iHeight );
828	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
829	}
830	else if( iWidth == 8 && iHeight == 32 )
831	{
832	partialButterfly8( block, tmp, shift_1st, iHeight );
833	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
834	}
835	else
836	#endif
837	if( iWidth == 4 && iHeight == 4)
838	{
839	#if INTRA_TRANS_SIMP
840	if (uiMode != REG_DCT)
841	{
842	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
843	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
844	}
845	else
846	{
847	partialButterfly4(block, tmp, shift_1st, iHeight);
848	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
849	}
850
851	#else
852	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
853	{
854	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
855	}
856	else
857	{
858	partialButterfly4(block, tmp, shift_1st, iHeight);
859	}
860	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
861	{
862	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
863	}
864	else
865	{
866	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
867	}
868	#endif
869	}
870	else if( iWidth == 8 && iHeight == 8)
871	{
872	partialButterfly8( block, tmp, shift_1st, iHeight );
873	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
874	}
875	else if( iWidth == 16 && iHeight == 16)
876	{
877	partialButterfly16( block, tmp, shift_1st, iHeight );
878	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
879	}
880	else if( iWidth == 32 && iHeight == 32)
881	{
882	partialButterfly32( block, tmp, shift_1st, iHeight );
883	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
884	}
885	}
886	/** MxN inverse transform (2D)
887	* \param coeff input data (transform coefficients)
888	* \param block output data (residual)
889	* \param iWidth input data (width of transform)
890	* \param iHeight input data (height of transform)
891	*/
892	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
893	{
894	int shift_1st = SHIFT_INV_1ST;
895	#if FULL_NBIT
896	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
897	#else
898	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
899	#endif
900
901	short tmp[ 64*64];
902	#if !REMOVE_NSQT
903	if( iWidth == 16 && iHeight == 4)
904	{
905	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
906	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
907	}
908	else if( iWidth == 32 && iHeight == 8)
909	{
910	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
911	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
912	}
913	else if( iWidth == 4 && iHeight == 16)
914	{
915	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
916	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
917	}
918	else if( iWidth == 8 && iHeight == 32)
919	{
920	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
921	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
922	}
923	else
924	#endif
925	if( iWidth == 4 && iHeight == 4)
926	{
927	#if INTRA_TRANS_SIMP
928	if (uiMode != REG_DCT)
929	{
930	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
931	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
932	}
933	else
934	{
935	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
936	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
937	}
938	#else
939	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
940	{
941	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
942	}
943	else
944	{
945	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
946	}
947	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
948	{
949	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
950	}
951	else
952	{
953	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
954	}
955	#endif
956	}
957	else if( iWidth == 8 && iHeight == 8)
958	{
959	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
960	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
961	}
962	else if( iWidth == 16 && iHeight == 16)
963	{
964	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
965	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
966	}
967	else if( iWidth == 32 && iHeight == 32)
968	{
969	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
970	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
971	}
972	}
973
974	#endif //MATRIX_MULT
975
976	// To minimize the distortion only. No rate is considered.
977	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
978	{
979	Int lastCG = -1;
980	Int absSum = 0 ;
981	Int n ;
982
983	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
984	{
985	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
986	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
987	absSum = 0 ;
988
989	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
990	{
991	if( pQCoef[ scan[ n + subPos ]] )
992	{
993	lastNZPosInCG = n;
994	break;
995	}
996	}
997
998	for(n = 0; n <SCAN_SET_SIZE; n++ )
999	{
1000	if( pQCoef[ scan[ n + subPos ]] )
1001	{
1002	firstNZPosInCG = n;
1003	break;
1004	}
1005	}
1006
1007	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1008	{
1009	absSum += pQCoef[ scan[ n + subPos ]];
1010	}
1011
1012	if(lastNZPosInCG>=0 && lastCG==-1)
1013	{
1014	lastCG = 1 ;
1015	}
1016
1017	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1018	{
1019	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
1020	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1021	{
1022	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
1023
1024	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1025	{
1026	UInt blkPos = scan[ n+subPos ];
1027	if(pQCoef[ blkPos ] != 0 )
1028	{
1029	if(deltaU[blkPos]>0)
1030	{
1031	curCost = - deltaU[blkPos];
1032	curChange=1 ;
1033	}
1034	else
1035	{
1036	//curChange =-1;
1037	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1038	{
1039	curCost=MAX_INT ;
1040	}
1041	else
1042	{
1043	curCost = deltaU[blkPos];
1044	curChange =-1;
1045	}
1046	}
1047	}
1048	else
1049	{
1050	if(n<firstNZPosInCG)
1051	{
1052	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1053	if(thisSignBit != signbit )
1054	{
1055	curCost = MAX_INT;
1056	}
1057	else
1058	{
1059	curCost = - (deltaU[blkPos]) ;
1060	curChange = 1 ;
1061	}
1062	}
1063	else
1064	{
1065	curCost = - (deltaU[blkPos]) ;
1066	curChange = 1 ;
1067	}
1068	}
1069
1070	if( curCost<minCostInc)
1071	{
1072	minCostInc = curCost ;
1073	finalChange = curChange ;
1074	minPos = blkPos ;
1075	}
1076	} //CG loop
1077
1078	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
1079	{
1080	finalChange = -1;
1081	}
1082
1083	if(pCoef[minPos]>=0)
1084	{
1085	pQCoef[minPos] += finalChange ;
1086	}
1087	else
1088	{
1089	pQCoef[minPos] -= finalChange ;
1090	}
1091	} // Hide
1092	}
1093	if(lastCG==1)
1094	{
1095	lastCG=0 ;
1096	}
1097	} // TU loop
1098
1099	return;
1100	}
1101
1102	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1103	Int* pSrc,
1104	TCoeff* pDes,
1105	#if ADAPTIVE_QP_SELECTION
1106	Int*& pArlDes,
1107	#endif
1108	Int iWidth,
1109	Int iHeight,
1110	UInt& uiAcSum,
1111	TextType eTType,
1112	UInt uiAbsPartIdx )
1113	{
1114	Int* piCoef = pSrc;
1115	TCoeff* piQCoef = pDes;
1116	#if ADAPTIVE_QP_SELECTION
1117	Int* piArlCCoef = pArlDes;
1118	#endif
1119	Int iAdd = 0;
1120
1121	Bool useRDOQForTransformSkip = !(m_useTransformSkipFast && pcCU->getTransformSkip(uiAbsPartIdx,eTType));
1122	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) && useRDOQForTransformSkip)
1123	{
1124	#if ADAPTIVE_QP_SELECTION
1125	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1126	#else
1127	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1128	#endif
1129	}
1130	else
1131	{
1132	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1133
1134	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1135	if (scanIdx == SCAN_ZIGZAG)
1136	{
1137	scanIdx = SCAN_DIAG;
1138	}
1139
1140	#if REMOVE_NSQT
1141	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1142	#else
1143	if (iWidth != iHeight)
1144	{
1145	scanIdx = SCAN_DIAG;
1146	}
1147
1148	const UInt * scan;
1149	if (iWidth == iHeight)
1150	{
1151	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1152	}
1153	else
1154	{
1155	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
1156	}
1157	#endif
1158
1159	Int deltaU[32*32] ;
1160
1161	#if ADAPTIVE_QP_SELECTION
1162	QpParam cQpBase;
1163	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1164
1165	Int qpScaled;
1166	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1167
1168	if(eTType == TEXT_LUMA)
1169	{
1170	qpScaled = iQpBase + qpBDOffset;
1171	}
1172	else
1173	{
1174	#if CHROMA_QP_EXTENSION
1175	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1176	#else
1177	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
1178	#endif
1179
1180	if(qpScaled < 0)
1181	{
1182	qpScaled = qpScaled + qpBDOffset;
1183	}
1184	else
1185	{
1186	#if CHROMA_QP_EXTENSION
1187	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1188	#else
1189	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
1190	#endif
1191	}
1192	}
1193	cQpBase.setQpParam(qpScaled);
1194	#endif
1195
1196	#if !REMOVE_NSQT
1197	Bool bNonSqureFlag = ( iWidth != iHeight );
1198	#endif
1199	UInt dir = SCALING_LIST_SQT;
1200	#if !REMOVE_NSQT
1201	if( bNonSqureFlag )
1202	{
1203	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1204	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1205	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1206	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1207	iHeight = iWidth;
1208	}
1209	#endif
1210
1211	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1212	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1213	assert(scalingListType < 6);
1214	Int *piQuantCoeff = 0;
1215	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
1216
1217	#if FULL_NBIT
1218	UInt uiBitDepth = g_uiBitDepth;
1219	#else
1220	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1221	#endif
1222	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1223
1224	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1225
1226	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1227
1228	#if ADAPTIVE_QP_SELECTION
1229	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1230	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1231	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1232	Int iAddC = 1 << (iQBitsC-1);
1233	#endif
1234
1235	Int qBits8 = iQBits-8;
1236	for( Int n = 0; n < iWidth*iHeight; n++ )
1237	{
1238	Int iLevel;
1239	Int iSign;
1240	UInt uiBlockPos = n;
1241	iLevel = piCoef[uiBlockPos];
1242	iSign = (iLevel < 0 ? -1: 1);
1243
1244	#if ADAPTIVE_QP_SELECTION
1245	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1246	if( m_bUseAdaptQpSelect )
1247	{
1248	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1249	}
1250	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1251	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1252	#else
1253	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1254	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1255	#endif
1256	uiAcSum += iLevel;
1257	iLevel *= iSign;
1258	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1259	} // for n
1260	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1261	{
1262	if(uiAcSum>=2)
1263	{
1264	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1265	}
1266	}
1267	} //if RDOQ
1268	//return;
1269
1270	}
1271
1272	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1273	{
1274
1275	const TCoeff* piQCoef = pSrc;
1276	Int* piCoef = pDes;
1277	UInt dir = SCALING_LIST_SQT;
1278	#if !REMOVE_NSQT
1279	if( iWidth != iHeight )
1280	{
1281	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1282	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1283	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1284	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1285	iHeight = iWidth;
1286	}
1287	#endif
1288
1289	if ( iWidth > (Int)m_uiMaxTrSize )
1290	{
1291	iWidth = m_uiMaxTrSize;
1292	iHeight = m_uiMaxTrSize;
1293	}
1294
1295	Int iShift,iAdd,iCoeffQ;
1296	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1297
1298	#if FULL_NBIT
1299	UInt uiBitDepth = g_uiBitDepth;
1300	#else
1301	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1302	#endif
1303	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1304
1305	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1306
1307	TCoeff clipQCoef;
1308	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
1309	const Int levelLimit = 1 << bitRange;
1310
1311	if(getUseScalingList())
1312	{
1313	iShift += 4;
1314	if(iShift > m_cQP.m_iPer)
1315	{
1316	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1317	}
1318	else
1319	{
1320	iAdd = 0;
1321	}
1322	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1323
1324	if(iShift > m_cQP.m_iPer)
1325	{
1326	for( Int n = 0; n < iWidth*iHeight; n++ )
1327	{
1328	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1329	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1330	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1331	}
1332	}
1333	else
1334	{
1335	for( Int n = 0; n < iWidth*iHeight; n++ )
1336	{
1337	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1338	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1339	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1340	}
1341	}
1342	}
1343	else
1344	{
1345	iAdd = 1 << (iShift-1);
1346	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1347
1348	for( Int n = 0; n < iWidth*iHeight; n++ )
1349	{
1350	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1351	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1352	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1353	}
1354	}
1355	}
1356
1357	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1358	Bool bUseRDOQ, Bool bEnc, Bool useTransformSkipFast
1359	#if ADAPTIVE_QP_SELECTION
1360	, Bool bUseAdaptQpSelect
1361	#endif
1362	)
1363	{
1364	m_uiMaxTrSize = uiMaxTrSize;
1365	m_bEnc = bEnc;
1366	m_bUseRDOQ = bUseRDOQ;
1367	#if ADAPTIVE_QP_SELECTION
1368	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1369	#endif
1370	m_useTransformSkipFast = useTransformSkipFast;
1371	}
1372
1373	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1374	Pel* pcResidual,
1375	UInt uiStride,
1376	TCoeff* rpcCoeff,
1377	#if ADAPTIVE_QP_SELECTION
1378	Int*& rpcArlCoeff,
1379	#endif
1380	UInt uiWidth,
1381	UInt uiHeight,
1382	UInt& uiAbsSum,
1383	TextType eTType,
1384	UInt uiAbsPartIdx,
1385	Bool useTransformSkip
1386	)
1387	{
1388	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1389	{
1390	uiAbsSum=0;
1391	for (UInt k = 0; k<uiHeight; k++)
1392	{
1393	for (UInt j = 0; j<uiWidth; j++)
1394	{
1395	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1396	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1397	}
1398	}
1399	return;
1400	}
1401	UInt uiMode; //luma intra pred
1402	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1403	{
1404	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1405	}
1406	#if INTRA_BL_DST4x4
1407	else if(eTType == TEXT_LUMA && pcCU->isIntraBL(uiAbsPartIdx) )
1408	{
1409	uiMode = DC_IDX; //Using DST
1410	}
1411	#endif
1412	else
1413	{
1414	uiMode = REG_DCT;
1415	}
1416
1417	uiAbsSum = 0;
1418	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1419	if(useTransformSkip)
1420	{
1421	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1422	}
1423	else
1424	{
1425	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1426	}
1427	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1428	#if ADAPTIVE_QP_SELECTION
1429	rpcArlCoeff,
1430	#endif
1431	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1432	}
1433
1434	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1435	{
1436	if(transQuantBypass)
1437	{
1438	for (UInt k = 0; k<uiHeight; k++)
1439	{
1440	for (UInt j = 0; j<uiWidth; j++)
1441	{
1442	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1443	}
1444	}
1445	return;
1446	}
1447	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1448	if(useTransformSkip == true)
1449	{
1450	xITransformSkip( m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1451	}
1452	else
1453	{
1454	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1455	}
1456	}
1457
1458	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1459	{
1460	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1461	{
1462	return;
1463	}
1464
1465	UInt uiLumaTrMode, uiChromaTrMode;
1466	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
1467	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
1468
1469	if( uiTrMode == uiStopTrMode )
1470	{
1471	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1472	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1473	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1474	{
1475	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1476	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1477	{
1478	return;
1479	}
1480	uiWidth <<= 1;
1481	uiHeight <<= 1;
1482	}
1483	Pel* pResi = rpcResidual + uiAddr;
1484	#if !REMOVE_NSQT
1485	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
1486	{
1487	Int trWidth = uiWidth;
1488	Int trHeight = uiHeight;
1489	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1490
1491	uiWidth = trWidth;
1492	uiHeight = trHeight;
1493	}
1494	#endif
1495	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1496	assert(scalingListType < 6);
1497	#if INTER_TRANSFORMSKIP
1498	#if NO_RESIDUAL_FLAG_FOR_BLPRED
1499	if(pcCU->isIntraBL(uiAbsPartIdx) && eTxt == TEXT_LUMA)
1500	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, DC_IDX, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1501	else
1502	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1503	#else
1504	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1505	#endif
1506	#else
1507	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
1508	#endif
1509	}
1510	else
1511	{
1512	uiTrMode++;
1513	uiWidth >>= 1;
1514	uiHeight >>= 1;
1515	Int trWidth = uiWidth, trHeight = uiHeight;
1516	#if !REMOVE_NSQT
1517	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
1518	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1519	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
1520	#endif
1521	UInt uiAddrOffset = trHeight * uiStride;
1522	UInt uiCoefOffset = trWidth * trHeight;
1523	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1524	#if !REMOVE_NSQT
1525	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
1526	if( uiInterTUSplitDirection != 2 )
1527	{
1528	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1529	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1530	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1531	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1532	}
1533	else
1534	#endif
1535	{
1536	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1537	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1538	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1539	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1540	}
1541	}
1542	}
1543
1544	// ------------------------------------------------------------------------------------------------
1545	// Logical transform
1546	// ------------------------------------------------------------------------------------------------
1547
1548	/** Wrapper function between HM interface and core NxN forward transform (2D)
1549	* \param piBlkResi input data (residual)
1550	* \param psCoeff output data (transform coefficients)
1551	* \param uiStride stride of input residual data
1552	* \param iSize transform size (iSize x iSize)
1553	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1554	*/
1555	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1556	{
1557	#if MATRIX_MULT
1558	Int iSize = iWidth;
1559	#if !REMOVE_NSQT
1560	if( iWidth != iHeight)
1561	{
1562	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
1563	return;
1564	}
1565	#endif
1566	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1567	#else
1568	Int j;
1569	{
1570	short block[ 64 * 64 ];
1571	short coeff[ 64 * 64 ];
1572	{
1573	for (j = 0; j < iHeight; j++)
1574	{
1575	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
1576	}
1577	}
1578	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
1579	for ( j = 0; j < iHeight * iWidth; j++ )
1580	{
1581	psCoeff[ j ] = coeff[ j ];
1582	}
1583	return ;
1584	}
1585	#endif
1586	}
1587
1588
1589	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1590	* \param plCoef input data (transform coefficients)
1591	* \param pResidual output data (residual)
1592	* \param uiStride stride of input residual data
1593	* \param iSize transform size (iSize x iSize)
1594	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1595	*/
1596	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1597	{
1598	#if MATRIX_MULT
1599	Int iSize = iWidth;
1600	#if !REMOVE_NSQT
1601	if( iWidth != iHeight )
1602	{
1603	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
1604	return;
1605	}
1606	#endif
1607	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1608	#else
1609	Int j;
1610	{
1611	short block[ 64 * 64 ];
1612	short coeff[ 64 * 64 ];
1613	for ( j = 0; j < iHeight * iWidth; j++ )
1614	{
1615	coeff[j] = (short)plCoef[j];
1616	}
1617	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
1618	{
1619	for ( j = 0; j < iHeight; j++ )
1620	{
1621	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
1622	}
1623	}
1624	return ;
1625	}
1626	#endif
1627	}
1628
1629	/** Wrapper function between HM interface and core 4x4 transform skipping
1630	* \param piBlkResi input data (residual)
1631	* \param psCoeff output data (transform coefficients)
1632	* \param uiStride stride of input residual data
1633	* \param iSize transform size (iSize x iSize)
1634	*/
1635	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1636	{
1637	assert( width == height );
1638	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1639	#if FULL_NBIT
1640	UInt uiBitDepth = g_uiBitDepth;
1641	#else
1642	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1643	#endif
1644	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1645	UInt transformSkipShift;
1646	Int j,k;
1647	if(shift >= 0)
1648	{
1649	transformSkipShift = shift;
1650	for (j = 0; j < height; j++)
1651	{
1652	for(k = 0; k < width; k ++)
1653	{
1654	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1655	}
1656	}
1657	}
1658	else
1659	{
1660	//The case when uiBitDepth > 13
1661	Int offset;
1662	transformSkipShift = -shift;
1663	offset = (1 << (transformSkipShift - 1));
1664	for (j = 0; j < height; j++)
1665	{
1666	for(k = 0; k < width; k ++)
1667	{
1668	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1669	}
1670	}
1671	}
1672	}
1673
1674	/** Wrapper function between HM interface and core NxN transform skipping
1675	* \param plCoef input data (coefficients)
1676	* \param pResidual output data (residual)
1677	* \param uiStride stride of input residual data
1678	* \param iSize transform size (iSize x iSize)
1679	*/
1680	Void TComTrQuant::xITransformSkip( Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1681	{
1682	assert( width == height );
1683	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1684	#if FULL_NBIT
1685	UInt uiBitDepth = g_uiBitDepth;
1686	#else
1687	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1688	#endif
1689	Int shift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1690	UInt transformSkipShift;
1691	Int j,k;
1692	if(shift > 0)
1693	{
1694	Int offset;
1695	transformSkipShift = shift;
1696	offset = (1 << (transformSkipShift -1));
1697	for ( j = 0; j < height; j++ )
1698	{
1699	for(k = 0; k < width; k ++)
1700	{
1701	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1702	}
1703	}
1704	}
1705	else
1706	{
1707	//The case when uiBitDepth >= 13
1708	transformSkipShift = - shift;
1709	for ( j = 0; j < height; j++ )
1710	{
1711	for(k = 0; k < width; k ++)
1712	{
1713	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1714	}
1715	}
1716	}
1717	}
1718
1719	/** RDOQ with CABAC
1720	* \param pcCU pointer to coding unit structure
1721	* \param plSrcCoeff pointer to input buffer
1722	* \param piDstCoeff reference to pointer to output buffer
1723	* \param uiWidth block width
1724	* \param uiHeight block height
1725	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1726	* \param eTType plane type / luminance or chrominance
1727	* \param uiAbsPartIdx absolute partition index
1728	* \returns Void
1729	* Rate distortion optimized quantization for entropy
1730	* coding engines using probability models like CABAC
1731	*/
1732	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1733	Int* plSrcCoeff,
1734	TCoeff* piDstCoeff,
1735	#if ADAPTIVE_QP_SELECTION
1736	Int*& piArlDstCoeff,
1737	#endif
1738	UInt uiWidth,
1739	UInt uiHeight,
1740	UInt& uiAbsSum,
1741	TextType eTType,
1742	UInt uiAbsPartIdx )
1743	{
1744	Int iQBits = m_cQP.m_iBits;
1745	Double dTemp = 0;
1746	UInt dir = SCALING_LIST_SQT;
1747	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1748	Int uiQ = g_quantScales[m_cQP.rem()];
1749	#if !REMOVE_NSQT
1750	if (uiWidth != uiHeight)
1751	{
1752	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
1753	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1754	}
1755	#endif
1756
1757	#if FULL_NBIT
1758	UInt uiBitDepth = g_uiBitDepth;
1759	#else
1760	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1761	#endif
1762	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1763	UInt uiGoRiceParam = 0;
1764	Double d64BlockUncodedCost = 0;
1765	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1766	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1767	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1768	assert(scalingListType < 6);
1769
1770	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1771	double dErrScale = 0;
1772	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
1773	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1774	Int *piQCoef = piQCoefOrg;
1775	double *pdErrScale = pdErrScaleOrg;
1776	#if ADAPTIVE_QP_SELECTION
1777	Int iQBitsC = iQBits - ARL_C_PRECISION;
1778	Int iAddC = 1 << (iQBitsC-1);
1779	#endif
1780	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1781	if (uiScanIdx == SCAN_ZIGZAG)
1782	{
1783	// Map value zigzag to diagonal scan
1784	uiScanIdx = SCAN_DIAG;
1785	}
1786	Int blockType = uiLog2BlkSize;
1787	#if !REMOVE_NSQT
1788	if (uiWidth != uiHeight)
1789	{
1790	uiScanIdx = SCAN_DIAG;
1791	blockType = 4;
1792	}
1793	#endif
1794
1795	#if ADAPTIVE_QP_SELECTION
1796	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1797	#endif
1798
1799	Double pdCostCoeff [ 32 * 32 ];
1800	Double pdCostSig [ 32 * 32 ];
1801	Double pdCostCoeff0[ 32 * 32 ];
1802	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1803	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1804	Int rateIncUp [ 32 * 32 ];
1805	Int rateIncDown [ 32 * 32 ];
1806	Int sigRateDelta[ 32 * 32 ];
1807	Int deltaU [ 32 * 32 ];
1808	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1809	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1810	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1811	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1812
1813	const UInt * scanCG;
1814	#if !REMOVE_NSQT
1815	if (uiWidth == uiHeight)
1816	#endif
1817	{
1818	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1819	if( uiLog2BlkSize == 3 )
1820	{
1821	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1822	}
1823	else if( uiLog2BlkSize == 5 )
1824	{
1825	scanCG = g_sigLastScanCG32x32;
1826	}
1827	}
1828	#if !REMOVE_NSQT
1829	else
1830	{
1831	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
1832	}
1833	#endif
1834	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1835	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1836	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1837	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1838	Int iCGLastScanPos = -1;
1839
1840	UInt uiCtxSet = 0;
1841	Int c1 = 1;
1842	Int c2 = 0;
1843	#if !REMOVE_NUM_GREATER1
1844	UInt uiNumOne = 0;
1845	#endif
1846	Double d64BaseCost = 0;
1847	Int iLastScanPos = -1;
1848	dTemp = dErrScale;
1849
1850	UInt c1Idx = 0;
1851	UInt c2Idx = 0;
1852	Int baseLevel;
1853
1854	#if REMOVE_NSQT
1855	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1856	#else
1857	const UInt * scan;
1858	if (uiWidth == uiHeight)
1859	{
1860	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1861	}
1862	else
1863	{
1864	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
1865	}
1866	#endif
1867
1868	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1869	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1870
1871	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1872	Int iScanPos;
1873	coeffGroupRDStats rdStats;
1874
1875	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1876	{
1877	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1878	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1879	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1880	#if !REMOVAL_8x2_2x8_CG
1881	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
1882	{
1883	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
1884	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
1885	}
1886	#endif
1887	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1888
1889	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1890	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1891	{
1892	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1893	//===== quantization =====
1894	UInt uiBlkPos = scan[iScanPos];
1895	// set coeff
1896	uiQ = piQCoef[uiBlkPos];
1897	dTemp = pdErrScale[uiBlkPos];
1898	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1899	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1900	#if ADAPTIVE_QP_SELECTION
1901	if( m_bUseAdaptQpSelect )
1902	{
1903	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1904	}
1905	#endif
1906	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1907
1908	Double dErr = Double( lLevelDouble );
1909	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1910	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1911	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1912
1913	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1914	{
1915	iLastScanPos = iScanPos;
1916	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1917	iCGLastScanPos = iCGScanPos;
1918	}
1919
1920	if ( iLastScanPos >= 0 )
1921	{
1922	//===== coefficient level estimation =====
1923	UInt uiLevel;
1924	UInt uiOneCtx = 4 * uiCtxSet + c1;
1925	UInt uiAbsCtx = uiCtxSet + c2;
1926
1927	if( iScanPos == iLastScanPos )
1928	{
1929	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1930	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1931	c1Idx, c2Idx, iQBits, dTemp, 1 );
1932	}
1933	else
1934	{
1935	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1936	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1937	#if REMOVAL_8x2_2x8_CG
1938	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1939	#else
1940	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1941	#endif
1942	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1943	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1944	c1Idx, c2Idx, iQBits, dTemp, 0 );
1945	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1946	}
1947	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1948	if( uiLevel > 0 )
1949	{
1950	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1951	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1952	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1953	}
1954	else // uiLevel == 0
1955	{
1956	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1957	}
1958	piDstCoeff[ uiBlkPos ] = uiLevel;
1959	d64BaseCost += pdCostCoeff [ iScanPos ];
1960
1961
1962	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1963	if( uiLevel >= baseLevel )
1964	{
1965	if(uiLevel > 3*(1<<uiGoRiceParam))
1966	{
1967	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1968	}
1969	}
1970	if ( uiLevel >= 1)
1971	{
1972	c1Idx ++;
1973	}
1974
1975	//===== update bin model =====
1976	if( uiLevel > 1 )
1977	{
1978	c1 = 0;
1979	c2 += (c2 < 2);
1980	#if !REMOVE_NUM_GREATER1
1981	uiNumOne++;
1982	#endif
1983	c2Idx ++;
1984	}
1985	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1986	{
1987	c1++;
1988	}
1989
1990	//===== context set update =====
1991	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1992	{
1993	#if !REMOVE_NUM_GREATER1
1994	c1 = 1;
1995	#endif
1996	c2 = 0;
1997	uiGoRiceParam = 0;
1998
1999	c1Idx = 0;
2000	c2Idx = 0;
2001	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
2002	#if REMOVE_NUM_GREATER1
2003	if( c1 == 0 )
2004	#else
2005	if( uiNumOne > 0 )
2006	#endif
2007	{
2008	uiCtxSet++;
2009	}
2010	#if REMOVE_NUM_GREATER1
2011	c1 = 1;
2012	#else
2013	uiNumOne >>= 1;
2014	#endif
2015	}
2016	}
2017	else
2018	{
2019	d64BaseCost += pdCostCoeff0[ iScanPos ];
2020	}
2021	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2022	if (iScanPosinCG == 0 )
2023	{
2024	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2025	}
2026	if (piDstCoeff[ uiBlkPos ] )
2027	{
2028	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2029	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2030	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2031	if ( iScanPosinCG != 0 )
2032	{
2033	rdStats.iNNZbeforePos0++;
2034	}
2035	}
2036	} //end for (iScanPosinCG)
2037
2038	if (iCGLastScanPos >= 0)
2039	{
2040	if( iCGScanPos )
2041	{
2042	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2043	{
2044	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2045	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2046	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2047	}
2048	else
2049	{
2050	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2051	{
2052	if ( rdStats.iNNZbeforePos0 == 0 )
2053	{
2054	d64BaseCost -= rdStats.d64SigCost_0;
2055	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2056	}
2057	// rd-cost if SigCoeffGroupFlag = 0, initialization
2058	Double d64CostZeroCG = d64BaseCost;
2059
2060	// add SigCoeffGroupFlag cost to total cost
2061	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2062	if (iCGScanPos < iCGLastScanPos)
2063	{
2064	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2065	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2066	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2067	}
2068
2069	// try to convert the current coeff group from non-zero to all-zero
2070	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2071	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2072	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2073
2074	// if we can save cost, change this block to all-zero block
2075	if ( d64CostZeroCG < d64BaseCost )
2076	{
2077	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2078	d64BaseCost = d64CostZeroCG;
2079	if (iCGScanPos < iCGLastScanPos)
2080	{
2081	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2082	}
2083	// reset coeffs to 0 in this block
2084	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2085	{
2086	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2087	UInt uiBlkPos = scan[ iScanPos ];
2088
2089	if (piDstCoeff[ uiBlkPos ])
2090	{
2091	piDstCoeff [ uiBlkPos ] = 0;
2092	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2093	pdCostSig [ iScanPos ] = 0;
2094	}
2095	}
2096	} // end if ( d64CostAllZeros < d64BaseCost )
2097	}
2098	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2099	}
2100	else
2101	{
2102	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2103	}
2104	}
2105	} //end for (iCGScanPos)
2106
2107	//===== estimate last position =====
2108	if ( iLastScanPos < 0 )
2109	{
2110	return;
2111	}
2112
2113	Double d64BestCost = 0;
2114	Int ui16CtxCbf = 0;
2115	Int iBestLastIdxP1 = 0;
2116	#if NO_RESIDUAL_FLAG_FOR_BLPRED
2117	if( (!pcCU->isIntra( uiAbsPartIdx ) \|\| pcCU->isIntraBL( uiAbsPartIdx )) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2118	#else
2119	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2120	#endif
2121	{
2122	ui16CtxCbf = 0;
2123	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2124	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2125	}
2126	else
2127	{
2128	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
2129	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
2130	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2131	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2132	}
2133
2134	Bool bFoundLast = false;
2135	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2136	{
2137	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
2138
2139	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2140	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2141	{
2142	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2143	{
2144	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2145	if (iScanPos > iLastScanPos) continue;
2146	UInt uiBlkPos = scan[iScanPos];
2147
2148	if( piDstCoeff[ uiBlkPos ] )
2149	{
2150	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
2151	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
2152
2153	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
2154	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2155
2156	if( totalCost < d64BestCost )
2157	{
2158	iBestLastIdxP1 = iScanPos + 1;
2159	d64BestCost = totalCost;
2160	}
2161	if( piDstCoeff[ uiBlkPos ] > 1 )
2162	{
2163	bFoundLast = true;
2164	break;
2165	}
2166	d64BaseCost -= pdCostCoeff[ iScanPos ];
2167	d64BaseCost += pdCostCoeff0[ iScanPos ];
2168	}
2169	else
2170	{
2171	d64BaseCost -= pdCostSig[ iScanPos ];
2172	}
2173	} //end for
2174	if (bFoundLast)
2175	{
2176	break;
2177	}
2178	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2179	} // end for
2180
2181	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2182	{
2183	Int blkPos = scan[ scanPos ];
2184	Int level = piDstCoeff[ blkPos ];
2185	uiAbsSum += level;
2186	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2187	}
2188
2189	//===== clean uncoded coefficients =====
2190	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2191	{
2192	piDstCoeff[ scan[ scanPos ] ] = 0;
2193	}
2194
2195	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2196	{
2197	Int64 rdFactor = (Int64)((Double)(g_invQuantScales[m_cQP.rem()])(Double)(g_invQuantScales[m_cQP.rem()])(Double)(1<<(2m_cQP.m_iPer))/m_dLambda/16/(Double)(1<<(2g_uiBitIncrement)) + 0.5);
2198	Int lastCG = -1;
2199	Int absSum = 0 ;
2200	Int n ;
2201
2202	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
2203	{
2204	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
2205	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
2206	absSum = 0 ;
2207
2208	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
2209	{
2210	if( piDstCoeff[ scan[ n + subPos ]] )
2211	{
2212	lastNZPosInCG = n;
2213	break;
2214	}
2215	}
2216
2217	for(n = 0; n <SCAN_SET_SIZE; n++ )
2218	{
2219	if( piDstCoeff[ scan[ n + subPos ]] )
2220	{
2221	firstNZPosInCG = n;
2222	break;
2223	}
2224	}
2225
2226	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2227	{
2228	absSum += piDstCoeff[ scan[ n + subPos ]];
2229	}
2230
2231	if(lastNZPosInCG>=0 && lastCG==-1)
2232	{
2233	lastCG = 1;
2234	}
2235
2236	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2237	{
2238	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
2239	if( signbit!=(absSum&0x1) ) // hide but need tune
2240	{
2241	// calculate the cost
2242	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
2243	Int minPos =-1, finalChange=0, curChange=0;
2244
2245	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
2246	{
2247	UInt uiBlkPos = scan[ n + subPos ];
2248	if(piDstCoeff[ uiBlkPos ] != 0 )
2249	{
2250	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
2251	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2252	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
2253
2254	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2255	{
2256	costDown -= (4<<15) ;
2257	}
2258
2259	if(costUp<costDown)
2260	{
2261	curCost = costUp;
2262	curChange = 1 ;
2263	}
2264	else
2265	{
2266	curChange = -1 ;
2267	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2268	{
2269	curCost = MAX_INT64 ;
2270	}
2271	else
2272	{
2273	curCost = costDown ;
2274	}
2275	}
2276	}
2277	else
2278	{
2279	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2280	curChange = 1 ;
2281
2282	if(n<firstNZPosInCG)
2283	{
2284	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2285	if(thissignbit != signbit )
2286	{
2287	curCost = MAX_INT64;
2288	}
2289	}
2290	}
2291
2292	if( curCost<minCostInc)
2293	{
2294	minCostInc = curCost ;
2295	finalChange = curChange ;
2296	minPos = uiBlkPos ;
2297	}
2298	}
2299
2300	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2301	{
2302	finalChange = -1;
2303	}
2304
2305	if(plSrcCoeff[minPos]>=0)
2306	{
2307	piDstCoeff[minPos] += finalChange ;
2308	}
2309	else
2310	{
2311	piDstCoeff[minPos] -= finalChange ;
2312	}
2313	}
2314	}
2315
2316	if(lastCG==1)
2317	{
2318	lastCG=0 ;
2319	}
2320	}
2321	}
2322	}
2323
2324	/** Pattern decision for context derivation process of significant_coeff_flag
2325	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2326	* \param posXCG column of current coefficient group
2327	* \param posYCG row of current coefficient group
2328	* \param width width of the block
2329	* \param height height of the block
2330	* \returns pattern for current coefficient group
2331	*/
2332	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2333	{
2334	#if REMOVAL_8x2_2x8_CG
2335	if( width == 4 && height == 4 ) return -1;
2336	#else
2337	if( width == height && width <= 8 ) return -1;
2338	#endif
2339
2340	UInt sigRight = 0;
2341	UInt sigLower = 0;
2342
2343	width >>= 2;
2344	height >>= 2;
2345	if( posXCG < width - 1 )
2346	{
2347	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2348	}
2349	if (posYCG < height - 1 )
2350	{
2351	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2352	}
2353	return sigRight + (sigLower<<1);
2354	}
2355
2356	/** Context derivation process of coeff_abs_significant_flag
2357	* \param patternSigCtx pattern for current coefficient group
2358	* \param posX column of current scan position
2359	* \param posY row of current scan position
2360	* \param blockType log2 value of block size if square block, or 4 otherwise
2361	* \param width width of the block
2362	* \param height height of the block
2363	* \param textureType texture type (TEXT_LUMA...)
2364	* \returns ctxInc for current scan position
2365	*/
2366	Int TComTrQuant::getSigCtxInc (
2367	Int patternSigCtx,
2368	#if REMOVAL_8x2_2x8_CG
2369	UInt scanIdx,
2370	#endif
2371	Int posX,
2372	Int posY,
2373	Int blockType,
2374	Int width
2375	,Int height
2376	,TextType textureType
2377	)
2378	{
2379	const Int ctxIndMap[16] =
2380	{
2381	0, 1, 4, 5,
2382	2, 3, 4, 5,
2383	6, 6, 8, 8,
2384	7, 7, 8, 8
2385	};
2386
2387	if( posX + posY == 0 )
2388	{
2389	return 0;
2390	}
2391
2392	if ( blockType == 2 )
2393	{
2394	return ctxIndMap[ 4 * posY + posX ];
2395	}
2396
2397	#if !REMOVAL_8x2_2x8_CG
2398	if ( blockType == 3 )
2399	{
2400	return 9 + ctxIndMap[ 4 * (posY >> 1) + (posX >> 1) ];
2401	}
2402
2403	Int offset = 18;
2404	#else
2405	Int offset = blockType == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2406	#endif
2407
2408	Int posXinSubset = posX-((posX>>2)<<2);
2409	Int posYinSubset = posY-((posY>>2)<<2);
2410	Int cnt = 0;
2411	if(patternSigCtx==0)
2412	{
2413	#if REMOVAL_8x2_2x8_CG
2414	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2415	#else
2416	cnt = posXinSubset+posYinSubset<=2 ? 1 : 0;
2417	#endif
2418	}
2419	else if(patternSigCtx==1)
2420	{
2421	#if REMOVAL_8x2_2x8_CG
2422	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2423	#else
2424	cnt = posYinSubset<=1 ? 1 : 0;
2425	#endif
2426	}
2427	else if(patternSigCtx==2)
2428	{
2429	#if REMOVAL_8x2_2x8_CG
2430	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2431	#else
2432	cnt = posXinSubset<=1 ? 1 : 0;
2433	#endif
2434	}
2435	else
2436	{
2437	#if REMOVAL_8x2_2x8_CG
2438	cnt = 2;
2439	#else
2440	cnt = posXinSubset+posYinSubset<=4 ? 2 : 1;
2441	#endif
2442	}
2443
2444	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2445	}
2446
2447	/** Get the best level in RD sense
2448	* \param rd64CodedCost reference to coded cost
2449	* \param rd64CodedCost0 reference to cost when coefficient is 0
2450	* \param rd64CodedCostSig reference to cost of significant coefficient
2451	* \param lLevelDouble reference to unscaled quantized level
2452	* \param uiMaxAbsLevel scaled quantized level
2453	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2454	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2455	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2456	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2457	* \param iQBits quantization step size
2458	* \param dTemp correction factor
2459	* \param bLast indicates if the coefficient is the last significant
2460	* \returns best quantized transform level for given scan position
2461	* This method calculates the best quantized transform level for a given scan position.
2462	*/
2463	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2464	Double& rd64CodedCost0,
2465	Double& rd64CodedCostSig,
2466	Int lLevelDouble,
2467	UInt uiMaxAbsLevel,
2468	UShort ui16CtxNumSig,
2469	UShort ui16CtxNumOne,
2470	UShort ui16CtxNumAbs,
2471	UShort ui16AbsGoRice,
2472	UInt c1Idx,
2473	UInt c2Idx,
2474	Int iQBits,
2475	Double dTemp,
2476	Bool bLast ) const
2477	{
2478	Double dCurrCostSig = 0;
2479	UInt uiBestAbsLevel = 0;
2480
2481	if( !bLast && uiMaxAbsLevel < 3 )
2482	{
2483	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2484	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2485	if( uiMaxAbsLevel == 0 )
2486	{
2487	return uiBestAbsLevel;
2488	}
2489	}
2490	else
2491	{
2492	rd64CodedCost = MAX_DOUBLE;
2493	}
2494
2495	if( !bLast )
2496	{
2497	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2498	}
2499
2500	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2501	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2502	{
2503	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2504	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2505	dCurrCost += dCurrCostSig;
2506
2507	if( dCurrCost < rd64CodedCost )
2508	{
2509	uiBestAbsLevel = uiAbsLevel;
2510	rd64CodedCost = dCurrCost;
2511	rd64CodedCostSig = dCurrCostSig;
2512	}
2513	}
2514
2515	return uiBestAbsLevel;
2516	}
2517
2518	/** Calculates the cost for specific absolute transform level
2519	* \param uiAbsLevel scaled quantized level
2520	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2521	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2522	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2523	* \returns cost of given absolute transform level
2524	*/
2525	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2526	UShort ui16CtxNumOne,
2527	UShort ui16CtxNumAbs,
2528	UShort ui16AbsGoRice
2529	, UInt c1Idx,
2530	UInt c2Idx
2531	) const
2532	{
2533	Double iRate = xGetIEPRate();
2534	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2535
2536	if ( uiAbsLevel >= baseLevel )
2537	{
2538	UInt symbol = uiAbsLevel - baseLevel;
2539	UInt length;
2540	#if COEF_REMAIN_BIN_REDUCTION
2541	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2542	#else
2543	if (symbol < (8 << ui16AbsGoRice))
2544	#endif
2545	{
2546	length = symbol>>ui16AbsGoRice;
2547	iRate += (length+1+ui16AbsGoRice)<< 15;
2548	}
2549	else
2550	{
2551	length = ui16AbsGoRice;
2552	#if COEF_REMAIN_BIN_REDUCTION
2553	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2554	#else
2555	symbol = symbol - ( 8 << ui16AbsGoRice);
2556	#endif
2557	while (symbol >= (1<<length))
2558	{
2559	symbol -= (1<<(length++));
2560	}
2561	#if COEF_REMAIN_BIN_REDUCTION
2562	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2563	#else
2564	iRate += (8+length+1-ui16AbsGoRice+length)<< 15;
2565	#endif
2566	}
2567	if (c1Idx < C1FLAG_NUMBER)
2568	{
2569	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2570
2571	if (c2Idx < C2FLAG_NUMBER)
2572	{
2573	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2574	}
2575	}
2576	}
2577	else
2578	if( uiAbsLevel == 1 )
2579	{
2580	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2581	}
2582	else if( uiAbsLevel == 2 )
2583	{
2584	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2585	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2586	}
2587	else
2588	{
2589	assert (0);
2590	}
2591	return xGetICost( iRate );
2592	}
2593
2594	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2595	UShort ui16CtxNumOne,
2596	UShort ui16CtxNumAbs,
2597	UShort ui16AbsGoRice
2598	, UInt c1Idx,
2599	UInt c2Idx
2600	) const
2601	{
2602	Int iRate = 0;
2603	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2604
2605	if ( uiAbsLevel >= baseLevel )
2606	{
2607	UInt uiSymbol = uiAbsLevel - baseLevel;
2608	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2609	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2610
2611	if( bExpGolomb )
2612	{
2613	uiAbsLevel = uiSymbol - uiMaxVlc;
2614	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2615	iRate += iEGS << 15;
2616	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2617	}
2618
2619	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2620	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2621
2622	iRate += ui16NumBins << 15;
2623
2624	if (c1Idx < C1FLAG_NUMBER)
2625	{
2626	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2627
2628	if (c2Idx < C2FLAG_NUMBER)
2629	{
2630	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2631	}
2632	}
2633	}
2634	else
2635	if( uiAbsLevel == 0 )
2636	{
2637	return 0;
2638	}
2639	else if( uiAbsLevel == 1 )
2640	{
2641	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2642	}
2643	else if( uiAbsLevel == 2 )
2644	{
2645	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2646	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2647	}
2648	else
2649	{
2650	assert(0);
2651	}
2652	return iRate;
2653	}
2654
2655	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2656	UShort ui16CtxNumSig ) const
2657	{
2658	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2659	}
2660
2661	/** Calculates the cost of signaling the last significant coefficient in the block
2662	* \param uiPosX X coordinate of the last significant coefficient
2663	* \param uiPosY Y coordinate of the last significant coefficient
2664	* \returns cost of last significant coefficient
2665	*/
2666	/*
2667	* \param uiWidth width of the transform unit (TU)
2668	*/
2669	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2670	const UInt uiPosY,
2671	const UInt uiBlkWdth ) const
2672	{
2673	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2674	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2675	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2676	if( uiCtxX > 3 )
2677	{
2678	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2679	}
2680	if( uiCtxY > 3 )
2681	{
2682	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2683	}
2684	return xGetICost( uiCost );
2685	}
2686
2687	/** Calculates the cost for specific absolute transform level
2688	* \param uiAbsLevel scaled quantized level
2689	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2690	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2691	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2692	* \returns cost of given absolute transform level
2693	*/
2694	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2695	UShort ui16CtxNumSig ) const
2696	{
2697	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2698	}
2699
2700	/** Get the cost for a specific rate
2701	* \param dRate rate of a bit
2702	* \returns cost at the specific rate
2703	*/
2704	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2705	{
2706	return m_dLambda * dRate;
2707	}
2708
2709	/** Get the cost of an equal probable bit
2710	* \returns cost of equal probable bit
2711	*/
2712	__inline Double TComTrQuant::xGetIEPRate ( ) const
2713	{
2714	return 32768;
2715	}
2716
2717	/** Context derivation process of coeff_abs_significant_flag
2718	* \param uiSigCoeffGroupFlag significance map of L1
2719	* \param uiBlkX column of current scan position
2720	* \param uiBlkY row of current scan position
2721	* \param uiLog2BlkSize log2 value of block size
2722	* \returns ctxInc for current scan position
2723	*/
2724	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2725	const UInt uiCGPosX,
2726	const UInt uiCGPosY,
2727	const UInt scanIdx,
2728	Int width, Int height)
2729	{
2730	UInt uiRight = 0;
2731	UInt uiLower = 0;
2732
2733	width >>= 2;
2734	height >>= 2;
2735	#if !REMOVAL_8x2_2x8_CG
2736	if( width == 2 && height == 2 ) // 8x8
2737	{
2738	if( scanIdx == SCAN_HOR )
2739	{
2740	width = 1;
2741	height = 4;
2742	}
2743	else if( scanIdx == SCAN_VER )
2744	{
2745	width = 4;
2746	height = 1;
2747	}
2748	}
2749	#endif
2750	if( uiCGPosX < width - 1 )
2751	{
2752	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2753	}
2754	if (uiCGPosY < height - 1 )
2755	{
2756	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2757	}
2758	return (uiRight \|\| uiLower);
2759
2760	}
2761	/** set quantized matrix coefficient for encode
2762	* \param scalingList quantaized matrix address
2763	*/
2764	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2765	{
2766	UInt size,list;
2767	UInt qp;
2768
2769	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2770	{
2771	for(list = 0; list < g_scalingListNum[size]; list++)
2772	{
2773	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2774	{
2775	xSetScalingListEnc(scalingList,list,size,qp);
2776	xSetScalingListDec(scalingList,list,size,qp);
2777	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2778	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2779	{
2780	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2781	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2782	}
2783	}
2784	}
2785	}
2786	}
2787	/** set quantized matrix coefficient for decode
2788	* \param scalingList quantaized matrix address
2789	*/
2790	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2791	{
2792	UInt size,list;
2793	UInt qp;
2794
2795	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2796	{
2797	for(list = 0; list < g_scalingListNum[size]; list++)
2798	{
2799	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2800	{
2801	xSetScalingListDec(scalingList,list,size,qp);
2802	}
2803	}
2804	}
2805	}
2806	/** set error scale coefficients
2807	* \param list List ID
2808	* \param uiSize Size
2809	* \param uiQP Quantization parameter
2810	*/
2811	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
2812	{
2813
2814	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2815	#if FULL_NBIT
2816	UInt uiBitDepth = g_uiBitDepth;
2817	#else
2818	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
2819	#endif
2820
2821	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
2822
2823	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2824	Int *piQuantcoeff;
2825	double *pdErrScale;
2826	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
2827	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
2828
2829	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2830	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2831	for(i=0;i<uiMaxNumCoeff;i++)
2832	{
2833	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
2834	}
2835	}
2836
2837	/** set quantized matrix coefficient for encode
2838	* \param scalingList quantaized matrix address
2839	* \param listId List index
2840	* \param sizeId size index
2841	* \param uiQP Quantization parameter
2842	*/
2843	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2844	{
2845	UInt width = g_scalingListSizeX[sizeId];
2846	UInt height = g_scalingListSizeX[sizeId];
2847	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2848	Int *quantcoeff;
2849	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2850	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
2851
2852	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2853
2854	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
2855	{
2856	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2857	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2858
2859	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2860	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2861	}
2862	}
2863	/** set quantized matrix coefficient for decode
2864	* \param scalingList quantaized matrix address
2865	* \param list List index
2866	* \param size size index
2867	* \param uiQP Quantization parameter
2868	*/
2869	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2870	{
2871	UInt width = g_scalingListSizeX[sizeId];
2872	UInt height = g_scalingListSizeX[sizeId];
2873	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2874	Int *dequantcoeff;
2875	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2876
2877	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
2878	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2879
2880	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
2881	{
2882	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2883	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2884
2885	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2886
2887	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2888	}
2889	}
2890
2891	/** set flat matrix value to quantized coefficient
2892	*/
2893	Void TComTrQuant::setFlatScalingList()
2894	{
2895	UInt size,list;
2896	UInt qp;
2897
2898	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2899	{
2900	for(list = 0; list < g_scalingListNum[size]; list++)
2901	{
2902	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2903	{
2904	xsetFlatScalingList(list,size,qp);
2905	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2906	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2907	{
2908	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2909	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2910	}
2911	}
2912	}
2913	}
2914	}
2915
2916	/** set flat matrix value to quantized coefficient
2917	* \param list List ID
2918	* \param uiQP Quantization parameter
2919	* \param uiSize Size
2920	*/
2921	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2922	{
2923	UInt i,num = g_scalingListSize[size];
2924	UInt numDiv4 = num>>2;
2925	Int *quantcoeff;
2926	Int *dequantcoeff;
2927	Int quantScales = g_quantScales[qp];
2928	Int invQuantScales = g_invQuantScales[qp]<<4;
2929
2930	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
2931	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
2932
2933	for(i=0;i<num;i++)
2934	{
2935	*quantcoeff++ = quantScales;
2936	*dequantcoeff++ = invQuantScales;
2937	}
2938
2939	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2940	{
2941	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2942	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2943
2944	for(i=0;i<numDiv4;i++)
2945	{
2946	*quantcoeff++ = quantScales;
2947	*dequantcoeff++ = invQuantScales;
2948	}
2949	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2950	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2951
2952	for(i=0;i<numDiv4;i++)
2953	{
2954	*quantcoeff++ = quantScales;
2955	*dequantcoeff++ = invQuantScales;
2956	}
2957	}
2958	}
2959
2960	/** set quantized matrix coefficient for encode
2961	* \param coeff quantaized matrix address
2962	* \param quantcoeff quantaized matrix address
2963	* \param quantScales Q(QP%6)
2964	* \param height height
2965	* \param width width
2966	* \param ratio ratio for upscale
2967	* \param sizuNum matrix size
2968	* \param dc dc parameter
2969	*/
2970	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2971	{
2972	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2973	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2974	for(UInt j=0;j<height;j++)
2975	{
2976	for(UInt i=0;i<width;i++)
2977	{
2978	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2979	}
2980	}
2981	if(ratio > 1)
2982	{
2983	quantcoeff[0] = quantScales / dc;
2984	}
2985	}
2986	/** set quantized matrix coefficient for decode
2987	* \param coeff quantaized matrix address
2988	* \param dequantcoeff quantaized matrix address
2989	* \param invQuantScales IQ(QP%6))
2990	* \param height height
2991	* \param width width
2992	* \param ratio ratio for upscale
2993	* \param sizuNum matrix size
2994	* \param dc dc parameter
2995	*/
2996	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2997	{
2998	#if !REMOVE_NSQT
2999	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
3000	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
3001	#endif
3002	for(UInt j=0;j<height;j++)
3003	{
3004	for(UInt i=0;i<width;i++)
3005	{
3006	#if REMOVE_NSQT
3007	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
3008	#else
3009	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
3010	#endif
3011	}
3012	}
3013	if(ratio > 1)
3014	{
3015	dequantcoeff[0] = invQuantScales * dc;
3016	}
3017	}
3018
3019	/** initialization process of scaling list array
3020	*/
3021	Void TComTrQuant::initScalingList()
3022	{
3023	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3024	{
3025	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3026	{
3027	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3028	{
3029	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3030	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
3031	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
3032
3033	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3034	{
3035	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3036	{
3037	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3038	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
3039	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
3040	}
3041	}
3042	}
3043	}
3044	}
3045	//copy for NSQT
3046	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3047	{
3048	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3049	{
3050	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
3051	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
3052	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
3053	}
3054	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3055	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3056	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
3057	}
3058	}
3059	/** destroy quantization matrix array
3060	*/
3061	Void TComTrQuant::destroyScalingList()
3062	{
3063	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3064	{
3065	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
3066	{
3067	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3068	{
3069	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3070	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
3071	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
3072	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
3073	{
3074	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
3075	{
3076	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
3077	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
3078	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
3079	}
3080	}
3081	}
3082	}
3083	}
3084	}
3085
3086	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: