Context navigation

source: 3DVCSoftware/trunk/source/Lib/TLibCommon/TComTrQuant.cpp @ 399

Visit:

Last change on this file since 399 was 296, checked in by tech, 12 years ago
Reintegrated branch 5.1-dev0 rev. 295.
Property svn:eol-style set to `native`
File size: 92.2 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_bUseRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, Bool bLowpass, SliceType eSliceType, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
203
204	if(qpScaled < 0)
205	{
206	qpScaled = qpScaled + qpBdOffset;
207	}
208	else
209	{
210	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
211	}
212	}
213	m_cQP.setQpParam( qpScaled, bLowpass, eSliceType );
214	}
215
216	#if MATRIX_MULT
217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
218	* \param block pointer to input data (residual)
219	* \param coeff pointer to output data (transform coefficients)
220	* \param uiStride stride of input data
221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
223	*/
224	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
225	{
226	Int i,j,k,iSum;
227	Int tmp[32*32];
228	const short *iT;
229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
230
231	if (uiTrSize==4)
232	{
233	iT = g_aiT4[0];
234	}
235	else if (uiTrSize==8)
236	{
237	iT = g_aiT8[0];
238	}
239	else if (uiTrSize==16)
240	{
241	iT = g_aiT16[0];
242	}
243	else if (uiTrSize==32)
244	{
245	iT = g_aiT32[0];
246	}
247	else
248	{
249	assert(0);
250	}
251
252	#if FULL_NBIT
253	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
254	#else
255	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
256	#endif
257
258	int add_1st = 1<<(shift_1st-1);
259	int shift_2nd = uiLog2TrSize + 6;
260	int add_2nd = 1<<(shift_2nd-1);
261
262	/* Horizontal transform */
263
264	if (uiTrSize==4)
265	{
266	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
267	{
268	iT = g_as_DST_MAT_4[0];
269	}
270	}
271	for (i=0; i<uiTrSize; i++)
272	{
273	for (j=0; j<uiTrSize; j++)
274	{
275	iSum = 0;
276	for (k=0; k<uiTrSize; k++)
277	{
278	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
279	}
280	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
281	}
282	}
283
284	/* Vertical transform */
285	if (uiTrSize==4)
286	{
287	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
288	{
289	iT = g_as_DST_MAT_4[0];
290	}
291	else
292	{
293	iT = g_aiT4[0];
294	}
295	}
296	for (i=0; i<uiTrSize; i++)
297	{
298	for (j=0; j<uiTrSize; j++)
299	{
300	iSum = 0;
301	for (k=0; k<uiTrSize; k++)
302	{
303	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
304	}
305	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
306	}
307	}
308	}
309
310	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
311	* \param coeff pointer to input data (transform coefficients)
312	* \param block pointer to output data (residual)
313	* \param uiStride stride of output data
314	* \param uiTrSize transform size (uiTrSize x uiTrSize)
315	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
316	*/
317	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
318	{
319	int i,j,k,iSum;
320	Int tmp[32*32];
321	const short *iT;
322
323	if (uiTrSize==4)
324	{
325	iT = g_aiT4[0];
326	}
327	else if (uiTrSize==8)
328	{
329	iT = g_aiT8[0];
330	}
331	else if (uiTrSize==16)
332	{
333	iT = g_aiT16[0];
334	}
335	else if (uiTrSize==32)
336	{
337	iT = g_aiT32[0];
338	}
339	else
340	{
341	assert(0);
342	}
343
344	int shift_1st = SHIFT_INV_1ST;
345	int add_1st = 1<<(shift_1st-1);
346	#if FULL_NBIT
347	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
348	#else
349	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
350	#endif
351	int add_2nd = 1<<(shift_2nd-1);
352	if (uiTrSize==4)
353	{
354	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
355	{
356	iT = g_as_DST_MAT_4[0];
357	}
358	}
359
360	/* Horizontal transform */
361	for (i=0; i<uiTrSize; i++)
362	{
363	for (j=0; j<uiTrSize; j++)
364	{
365	iSum = 0;
366	for (k=0; k<uiTrSize; k++)
367	{
368	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
369	}
370	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
371	}
372	}
373
374	if (uiTrSize==4)
375	{
376	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
377	{
378	iT = g_as_DST_MAT_4[0];
379	}
380	else
381	{
382	iT = g_aiT4[0];
383	}
384	}
385
386	/* Vertical transform */
387	for (i=0; i<uiTrSize; i++)
388	{
389	for (j=0; j<uiTrSize; j++)
390	{
391	iSum = 0;
392	for (k=0; k<uiTrSize; k++)
393	{
394	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
395	}
396	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
397	}
398	}
399	}
400
401	#else //MATRIX_MULT
402
403	/** 4x4 forward transform implemented using partial butterfly structure (1D)
404	* \param src input data (residual)
405	* \param dst output data (transform coefficients)
406	* \param shift specifies right shift after 1D transform
407	*/
408
409	void partialButterfly4(short src,short dst,int shift, int line)
410	{
411	int j;
412	int E[2],O[2];
413	int add = 1<<(shift-1);
414
415	for (j=0; j<line; j++)
416	{
417	/* E and O */
418	E[0] = src[0] + src[3];
419	O[0] = src[0] - src[3];
420	E[1] = src[1] + src[2];
421	O[1] = src[1] - src[2];
422
423	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
424	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
425	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
426	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
427
428	src += 4;
429	dst ++;
430	}
431	}
432
433	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
434	// give identical results
435	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
436	{
437	int i, c[4];
438	int rnd_factor = 1<<(shift-1);
439	for (i=0; i<4; i++)
440	{
441	// Intermediate Variables
442	c[0] = block[4i+0] + block[4i+3];
443	c[1] = block[4i+1] + block[4i+3];
444	c[2] = block[4i+0] - block[4i+1];
445	c[3] = 74* block[4*i+2];
446
447	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
448	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
449	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
450	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
451	}
452	}
453
454	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
455	{
456	int i, c[4];
457	int rnd_factor = 1<<(shift-1);
458	for (i=0; i<4; i++)
459	{
460	// Intermediate Variables
461	c[0] = tmp[ i] + tmp[ 8+i];
462	c[1] = tmp[8+i] + tmp[12+i];
463	c[2] = tmp[ i] - tmp[12+i];
464	c[3] = 74* tmp[4+i];
465
466	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
467	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
468	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
469	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
470	}
471	}
472
473	void partialButterflyInverse4(short src,short dst,int shift, int line)
474	{
475	int j;
476	int E[2],O[2];
477	int add = 1<<(shift-1);
478
479	for (j=0; j<line; j++)
480	{
481	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
482	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
483	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
484	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
485	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
486
487	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
488	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
489	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
490	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
491	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
492
493	src ++;
494	dst += 4;
495	}
496	}
497
498
499	void partialButterfly8(short src,short dst,int shift, int line)
500	{
501	int j,k;
502	int E[4],O[4];
503	int EE[2],EO[2];
504	int add = 1<<(shift-1);
505
506	for (j=0; j<line; j++)
507	{
508	/* E and O*/
509	for (k=0;k<4;k++)
510	{
511	E[k] = src[k] + src[7-k];
512	O[k] = src[k] - src[7-k];
513	}
514	/* EE and EO */
515	EE[0] = E[0] + E[3];
516	EO[0] = E[0] - E[3];
517	EE[1] = E[1] + E[2];
518	EO[1] = E[1] - E[2];
519
520	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
521	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
522	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
523	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
524
525	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
526	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
527	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
528	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
529
530	src += 8;
531	dst ++;
532	}
533	}
534
535
536	void partialButterflyInverse8(short src,short dst,int shift, int line)
537	{
538	int j,k;
539	int E[4],O[4];
540	int EE[2],EO[2];
541	int add = 1<<(shift-1);
542
543	for (j=0; j<line; j++)
544	{
545	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
546	for (k=0;k<4;k++)
547	{
548	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
549	}
550
551	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
552	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
553	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
554	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
555
556	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
557	E[0] = EE[0] + EO[0];
558	E[3] = EE[0] - EO[0];
559	E[1] = EE[1] + EO[1];
560	E[2] = EE[1] - EO[1];
561	for (k=0;k<4;k++)
562	{
563	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
564	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
565	}
566	src ++;
567	dst += 8;
568	}
569	}
570
571
572	void partialButterfly16(short src,short dst,int shift, int line)
573	{
574	int j,k;
575	int E[8],O[8];
576	int EE[4],EO[4];
577	int EEE[2],EEO[2];
578	int add = 1<<(shift-1);
579
580	for (j=0; j<line; j++)
581	{
582	/* E and O*/
583	for (k=0;k<8;k++)
584	{
585	E[k] = src[k] + src[15-k];
586	O[k] = src[k] - src[15-k];
587	}
588	/* EE and EO */
589	for (k=0;k<4;k++)
590	{
591	EE[k] = E[k] + E[7-k];
592	EO[k] = E[k] - E[7-k];
593	}
594	/* EEE and EEO */
595	EEE[0] = EE[0] + EE[3];
596	EEO[0] = EE[0] - EE[3];
597	EEE[1] = EE[1] + EE[2];
598	EEO[1] = EE[1] - EE[2];
599
600	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
601	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
602	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
603	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
604
605	for (k=2;k<16;k+=4)
606	{
607	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
608	}
609
610	for (k=1;k<16;k+=2)
611	{
612	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
613	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
614	}
615
616	src += 16;
617	dst ++;
618
619	}
620	}
621
622
623	void partialButterflyInverse16(short src,short dst,int shift, int line)
624	{
625	int j,k;
626	int E[8],O[8];
627	int EE[4],EO[4];
628	int EEE[2],EEO[2];
629	int add = 1<<(shift-1);
630
631	for (j=0; j<line; j++)
632	{
633	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
634	for (k=0;k<8;k++)
635	{
636	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
637	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
638	}
639	for (k=0;k<4;k++)
640	{
641	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
642	}
643	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
644	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
645	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
646	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
647
648	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
649	for (k=0;k<2;k++)
650	{
651	EE[k] = EEE[k] + EEO[k];
652	EE[k+2] = EEE[1-k] - EEO[1-k];
653	}
654	for (k=0;k<4;k++)
655	{
656	E[k] = EE[k] + EO[k];
657	E[k+4] = EE[3-k] - EO[3-k];
658	}
659	for (k=0;k<8;k++)
660	{
661	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
662	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
663	}
664	src ++;
665	dst += 16;
666	}
667	}
668
669
670	void partialButterfly32(short src,short dst,int shift, int line)
671	{
672	int j,k;
673	int E[16],O[16];
674	int EE[8],EO[8];
675	int EEE[4],EEO[4];
676	int EEEE[2],EEEO[2];
677	int add = 1<<(shift-1);
678
679	for (j=0; j<line; j++)
680	{
681	/* E and O*/
682	for (k=0;k<16;k++)
683	{
684	E[k] = src[k] + src[31-k];
685	O[k] = src[k] - src[31-k];
686	}
687	/* EE and EO */
688	for (k=0;k<8;k++)
689	{
690	EE[k] = E[k] + E[15-k];
691	EO[k] = E[k] - E[15-k];
692	}
693	/* EEE and EEO */
694	for (k=0;k<4;k++)
695	{
696	EEE[k] = EE[k] + EE[7-k];
697	EEO[k] = EE[k] - EE[7-k];
698	}
699	/* EEEE and EEEO */
700	EEEE[0] = EEE[0] + EEE[3];
701	EEEO[0] = EEE[0] - EEE[3];
702	EEEE[1] = EEE[1] + EEE[2];
703	EEEO[1] = EEE[1] - EEE[2];
704
705	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
706	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
707	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
708	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
709	for (k=4;k<32;k+=8)
710	{
711	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
712	}
713	for (k=2;k<32;k+=4)
714	{
715	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
716	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
717	}
718	for (k=1;k<32;k+=2)
719	{
720	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
721	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
722	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
723	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
724	}
725	src += 32;
726	dst ++;
727	}
728	}
729
730
731	void partialButterflyInverse32(short src,short dst,int shift, int line)
732	{
733	int j,k;
734	int E[16],O[16];
735	int EE[8],EO[8];
736	int EEE[4],EEO[4];
737	int EEEE[2],EEEO[2];
738	int add = 1<<(shift-1);
739
740	for (j=0; j<line; j++)
741	{
742	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
743	for (k=0;k<16;k++)
744	{
745	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
746	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
747	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
748	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
749	}
750	for (k=0;k<8;k++)
751	{
752	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
753	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
754	}
755	for (k=0;k<4;k++)
756	{
757	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
758	}
759	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
760	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
761	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
762	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
763
764	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
765	EEE[0] = EEEE[0] + EEEO[0];
766	EEE[3] = EEEE[0] - EEEO[0];
767	EEE[1] = EEEE[1] + EEEO[1];
768	EEE[2] = EEEE[1] - EEEO[1];
769	for (k=0;k<4;k++)
770	{
771	EE[k] = EEE[k] + EEO[k];
772	EE[k+4] = EEE[3-k] - EEO[3-k];
773	}
774	for (k=0;k<8;k++)
775	{
776	E[k] = EE[k] + EO[k];
777	E[k+8] = EE[7-k] - EO[7-k];
778	}
779	for (k=0;k<16;k++)
780	{
781	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
782	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
783	}
784	src ++;
785	dst += 32;
786	}
787	}
788
789
790	/** MxN forward transform (2D)
791	* \param block input data (residual)
792	* \param coeff output data (transform coefficients)
793	* \param iWidth input data (width of transform)
794	* \param iHeight input data (height of transform)
795	*/
796	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
797	{
798	#if FULL_NBIT
799	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
800	#else
801	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
802	#endif
803	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
804
805	short tmp[ 64 * 64 ];
806
807	if( iWidth == 16 && iHeight == 4)
808	{
809	partialButterfly16( block, tmp, shift_1st, iHeight );
810	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
811	}
812	else if( iWidth == 32 && iHeight == 8 )
813	{
814	partialButterfly32( block, tmp, shift_1st, iHeight );
815	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
816	}
817	else if( iWidth == 4 && iHeight == 16)
818	{
819	partialButterfly4( block, tmp, shift_1st, iHeight );
820	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
821	}
822	else if( iWidth == 8 && iHeight == 32 )
823	{
824	partialButterfly8( block, tmp, shift_1st, iHeight );
825	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
826	}
827	else if( iWidth == 4 && iHeight == 4)
828	{
829	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
830	{
831	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
832	}
833	else
834	{
835	partialButterfly4(block, tmp, shift_1st, iHeight);
836	}
837	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
838	{
839	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
840	}
841	else
842	{
843	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
844	}
845	}
846	else if( iWidth == 8 && iHeight == 8)
847	{
848	partialButterfly8( block, tmp, shift_1st, iHeight );
849	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
850	}
851	else if( iWidth == 16 && iHeight == 16)
852	{
853	partialButterfly16( block, tmp, shift_1st, iHeight );
854	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
855	}
856	else if( iWidth == 32 && iHeight == 32)
857	{
858	partialButterfly32( block, tmp, shift_1st, iHeight );
859	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
860	}
861	}
862	/** MxN inverse transform (2D)
863	* \param coeff input data (transform coefficients)
864	* \param block output data (residual)
865	* \param iWidth input data (width of transform)
866	* \param iHeight input data (height of transform)
867	*/
868	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
869	{
870	int shift_1st = SHIFT_INV_1ST;
871	#if FULL_NBIT
872	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
873	#else
874	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
875	#endif
876
877	short tmp[ 64*64];
878	if( iWidth == 16 && iHeight == 4)
879	{
880	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
881	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
882	}
883	else if( iWidth == 32 && iHeight == 8)
884	{
885	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
886	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
887	}
888	else if( iWidth == 4 && iHeight == 16)
889	{
890	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
891	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
892	}
893	else if( iWidth == 8 && iHeight == 32)
894	{
895	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
896	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
897	}
898	else if( iWidth == 4 && iHeight == 4)
899	{
900	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
901	{
902	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
903	}
904	else
905	{
906	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
907	}
908	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
909	{
910	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
911	}
912	else
913	{
914	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
915	}
916	}
917	else if( iWidth == 8 && iHeight == 8)
918	{
919	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
920	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
921	}
922	else if( iWidth == 16 && iHeight == 16)
923	{
924	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
925	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
926	}
927	else if( iWidth == 32 && iHeight == 32)
928	{
929	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
930	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
931	}
932	}
933
934	#endif //MATRIX_MULT
935
936	// To minimize the distortion only. No rate is considered.
937	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
938	{
939	Int tsig = pcCU->getSlice()->getPPS()->getTSIG() ;
940	Int lastCG = -1;
941	Int absSum = 0 ;
942	Int n ;
943
944	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
945	{
946	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
947	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
948	absSum = 0 ;
949
950	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
951	{
952	if( pQCoef[ scan[ n + subPos ]] )
953	{
954	lastNZPosInCG = n;
955	break;
956	}
957	}
958
959	for(n = 0; n <SCAN_SET_SIZE; n++ )
960	{
961	if( pQCoef[ scan[ n + subPos ]] )
962	{
963	firstNZPosInCG = n;
964	break;
965	}
966	}
967
968	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
969	{
970	absSum += pQCoef[ scan[ n + subPos ]];
971	}
972
973	if(lastNZPosInCG>=0 && lastCG==-1)
974	{
975	lastCG = 1 ;
976	}
977
978	if( lastNZPosInCG-firstNZPosInCG>=tsig )
979	{
980	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
981	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
982	{
983	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
984
985	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
986	{
987	UInt blkPos = scan[ n+subPos ];
988	if(pQCoef[ blkPos ] != 0 )
989	{
990	if(deltaU[blkPos]>0)
991	{
992	curCost = - deltaU[blkPos];
993	curChange=1 ;
994	}
995	else
996	{
997	//curChange =-1;
998	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
999	{
1000	curCost=MAX_INT ;
1001	}
1002	else
1003	{
1004	curCost = deltaU[blkPos];
1005	curChange =-1;
1006	}
1007	}
1008	}
1009	else
1010	{
1011	if(n<firstNZPosInCG)
1012	{
1013	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1014	if(thisSignBit != signbit )
1015	{
1016	curCost = MAX_INT;
1017	}
1018	else
1019	{
1020	curCost = - (deltaU[blkPos]) ;
1021	curChange = 1 ;
1022	}
1023	}
1024	else
1025	{
1026	curCost = - (deltaU[blkPos]) ;
1027	curChange = 1 ;
1028	}
1029	}
1030
1031	if( curCost<minCostInc)
1032	{
1033	minCostInc = curCost ;
1034	finalChange = curChange ;
1035	minPos = blkPos ;
1036	}
1037	} //CG loop
1038
1039	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
1040	{
1041	finalChange = -1;
1042	}
1043
1044	if(pCoef[minPos]>=0)
1045	{
1046	pQCoef[minPos] += finalChange ;
1047	}
1048	else
1049	{
1050	pQCoef[minPos] -= finalChange ;
1051	}
1052	} // Hide
1053	}
1054	if(lastCG==1)
1055	{
1056	lastCG=0 ;
1057	}
1058	} // TU loop
1059
1060	return;
1061	}
1062
1063	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1064	Int* pSrc,
1065	TCoeff* pDes,
1066	#if ADAPTIVE_QP_SELECTION
1067	Int*& pArlDes,
1068	#endif
1069	Int iWidth,
1070	Int iHeight,
1071	UInt& uiAcSum,
1072	TextType eTType,
1073	UInt uiAbsPartIdx )
1074	{
1075	Int* piCoef = pSrc;
1076	TCoeff* piQCoef = pDes;
1077	#if ADAPTIVE_QP_SELECTION
1078	Int* piArlCCoef = pArlDes;
1079	#endif
1080	Int iAdd = 0;
1081
1082	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) )
1083	{
1084	#if ADAPTIVE_QP_SELECTION
1085	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1086	#else
1087	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1088	#endif
1089	}
1090	else
1091	{
1092	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1093
1094	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1095	if (scanIdx == SCAN_ZIGZAG)
1096	{
1097	scanIdx = SCAN_DIAG;
1098	}
1099
1100	if (iWidth != iHeight)
1101	{
1102	scanIdx = SCAN_DIAG;
1103	}
1104
1105	const UInt * scan;
1106	if (iWidth == iHeight)
1107	{
1108	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1109	}
1110	else
1111	{
1112	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
1113	}
1114
1115	Int deltaU[32*32] ;
1116
1117	#if ADAPTIVE_QP_SELECTION
1118	QpParam cQpBase;
1119	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1120
1121	Int qpScaled;
1122	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1123
1124	if(eTType == TEXT_LUMA)
1125	{
1126	qpScaled = iQpBase + qpBDOffset;
1127	}
1128	else
1129	{
1130	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
1131
1132	if(qpScaled < 0)
1133	{
1134	qpScaled = qpScaled + qpBDOffset;
1135	}
1136	else
1137	{
1138	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
1139	}
1140	}
1141	cQpBase.setQpParam(qpScaled, false, pcCU->getSlice()->getSliceType());
1142	#endif
1143
1144	Bool bNonSqureFlag = ( iWidth != iHeight );
1145	UInt dir = SCALING_LIST_SQT;
1146	if( bNonSqureFlag )
1147	{
1148	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1149	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1150	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1151	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1152	iHeight = iWidth;
1153	}
1154
1155	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1156	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1157	assert(scalingListType < 6);
1158	Int *piQuantCoeff = 0;
1159	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
1160
1161	#if FULL_NBIT
1162	UInt uiBitDepth = g_uiBitDepth;
1163	#else
1164	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1165	#endif
1166	UInt iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1167	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1168
1169	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1170
1171	#if ADAPTIVE_QP_SELECTION
1172	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1173	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1174	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1175	Int iAddC = 1 << (iQBitsC-1);
1176	#endif
1177
1178	Int qBits8 = iQBits-8;
1179	for( Int n = 0; n < iWidth*iHeight; n++ )
1180	{
1181	Int iLevel;
1182	Int iSign;
1183	UInt uiBlockPos = n;
1184	iLevel = piCoef[uiBlockPos];
1185	iSign = (iLevel < 0 ? -1: 1);
1186
1187	#if ADAPTIVE_QP_SELECTION
1188	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1189	if( m_bUseAdaptQpSelect )
1190	{
1191	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1192	}
1193	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1194	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1195	#else
1196	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1197	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1198	#endif
1199	uiAcSum += iLevel;
1200	iLevel *= iSign;
1201	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1202	} // for n
1203	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1204	{
1205	if(uiAcSum>=2)
1206	{
1207	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1208	}
1209	}
1210	} //if RDOQ
1211	//return;
1212
1213	}
1214
1215	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1216	{
1217
1218	const TCoeff* piQCoef = pSrc;
1219	Int* piCoef = pDes;
1220	UInt dir = SCALING_LIST_SQT;
1221	if( iWidth != iHeight )
1222	{
1223	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1224	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1225	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1226	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1227	iHeight = iWidth;
1228	}
1229
1230	if ( iWidth > (Int)m_uiMaxTrSize )
1231	{
1232	iWidth = m_uiMaxTrSize;
1233	iHeight = m_uiMaxTrSize;
1234	}
1235
1236	Int iShift,iAdd,iCoeffQ;
1237	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1238
1239	#if FULL_NBIT
1240	UInt uiBitDepth = g_uiBitDepth;
1241	#else
1242	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1243	#endif
1244	UInt iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1245	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1246
1247	TCoeff clipQCoef;
1248	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
1249	const Int levelLimit = 1 << bitRange;
1250
1251	if(getUseScalingList())
1252	{
1253	iShift += 4;
1254	if(iShift > m_cQP.m_iPer)
1255	{
1256	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1257	}
1258	else
1259	{
1260	iAdd = 0;
1261	}
1262	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1263
1264	if(iShift > m_cQP.m_iPer)
1265	{
1266	for( Int n = 0; n < iWidth*iHeight; n++ )
1267	{
1268	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1269	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1270	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1271	}
1272	}
1273	else
1274	{
1275	for( Int n = 0; n < iWidth*iHeight; n++ )
1276	{
1277	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1278	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1279	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1280	}
1281	}
1282	}
1283	else
1284	{
1285	iAdd = 1 << (iShift-1);
1286	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1287
1288	for( Int n = 0; n < iWidth*iHeight; n++ )
1289	{
1290	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1291	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1292	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1293	}
1294	}
1295	}
1296
1297	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1298	Bool bUseRDOQ, Bool bEnc
1299	#if ADAPTIVE_QP_SELECTION
1300	, Bool bUseAdaptQpSelect
1301	#endif
1302	)
1303	{
1304	m_uiMaxTrSize = uiMaxTrSize;
1305	m_bEnc = bEnc;
1306	m_bUseRDOQ = bUseRDOQ;
1307	#if ADAPTIVE_QP_SELECTION
1308	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1309	#endif
1310	}
1311
1312	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1313	Pel* pcResidual,
1314	UInt uiStride,
1315	TCoeff* rpcCoeff,
1316	#if ADAPTIVE_QP_SELECTION
1317	Int*& rpcArlCoeff,
1318	#endif
1319	UInt uiWidth,
1320	UInt uiHeight,
1321	UInt& uiAbsSum,
1322	TextType eTType,
1323	UInt uiAbsPartIdx )
1324	{
1325	#if LOSSLESS_CODING
1326	if((m_cQP.qp() == 0) && (pcCU->getSlice()->getSPS()->getUseLossless()))
1327	{
1328	uiAbsSum=0;
1329	for (UInt k = 0; k<uiHeight; k++)
1330	{
1331	for (UInt j = 0; j<uiWidth; j++)
1332	{
1333	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1334	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1335	}
1336	}
1337	return;
1338	}
1339	#endif
1340	UInt uiMode; //luma intra pred
1341	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1342	{
1343	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1344	}
1345	else
1346	{
1347	uiMode = REG_DCT;
1348	}
1349
1350	uiAbsSum = 0;
1351	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1352
1353	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1354	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1355	#if ADAPTIVE_QP_SELECTION
1356	rpcArlCoeff,
1357	#endif
1358	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1359	}
1360
1361	#if LOSSLESS_CODING
1362	Void TComTrQuant::invtransformNxN( TComDataCU* pcCU, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType)
1363	#else
1364	Void TComTrQuant::invtransformNxN( TextType eText, UInt uiMode,Pel& rpcResidual, UInt uiStride, TCoeff pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType)
1365	#endif
1366	{
1367	#if LOSSLESS_CODING
1368	if((m_cQP.qp() == 0) && (pcCU->getSlice()->getSPS()->getUseLossless()))
1369	{
1370	for (UInt k = 0; k<uiHeight; k++)
1371	{
1372	for (UInt j = 0; j<uiWidth; j++)
1373	{
1374	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1375	}
1376	}
1377	return;
1378	}
1379	#endif
1380	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1381	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1382	}
1383
1384	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1385	{
1386	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1387	{
1388	return;
1389	}
1390
1391	UInt uiLumaTrMode, uiChromaTrMode;
1392	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
1393	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
1394
1395	if( uiTrMode == uiStopTrMode )
1396	{
1397	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1398	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1399	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1400	{
1401	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1402	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1403	{
1404	return;
1405	}
1406	uiWidth <<= 1;
1407	uiHeight <<= 1;
1408	}
1409	Pel* pResi = rpcResidual + uiAddr;
1410	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
1411	{
1412	Int trWidth = uiWidth;
1413	Int trHeight = uiHeight;
1414	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1415
1416	uiWidth = trWidth;
1417	uiHeight = trHeight;
1418	}
1419	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1420	assert(scalingListType < 6);
1421	#if LOSSLESS_CODING
1422	invtransformNxN( pcCU, eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
1423	#else
1424	invtransformNxN( eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
1425	#endif
1426	}
1427	else
1428	{
1429	uiTrMode++;
1430	uiWidth >>= 1;
1431	uiHeight >>= 1;
1432	Int trWidth = uiWidth, trHeight = uiHeight;
1433	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
1434	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
1435	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
1436	UInt uiAddrOffset = trHeight * uiStride;
1437	UInt uiCoefOffset = trWidth * trHeight;
1438	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1439	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
1440	if( uiInterTUSplitDirection != 2 )
1441	{
1442	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1443	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1444	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1445	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1446	}
1447	else
1448	{
1449	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1450	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1451	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1452	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1453	}
1454	}
1455	}
1456
1457	// ------------------------------------------------------------------------------------------------
1458	// Logical transform
1459	// ------------------------------------------------------------------------------------------------
1460
1461	/** Wrapper function between HM interface and core NxN forward transform (2D)
1462	* \param piBlkResi input data (residual)
1463	* \param psCoeff output data (transform coefficients)
1464	* \param uiStride stride of input residual data
1465	* \param iSize transform size (iSize x iSize)
1466	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1467	*/
1468	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1469	{
1470	#if MATRIX_MULT
1471	Int iSize = iWidth;
1472	if( iWidth != iHeight)
1473	{
1474	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
1475	return;
1476	}
1477	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1478	#else
1479	Int j;
1480	{
1481	short block[ 64 * 64 ];
1482	short coeff[ 64 * 64 ];
1483	{
1484	for (j = 0; j < iHeight; j++)
1485	{
1486	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
1487	}
1488	}
1489	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
1490	for ( j = 0; j < iHeight * iWidth; j++ )
1491	{
1492	psCoeff[ j ] = coeff[ j ];
1493	}
1494	return ;
1495	}
1496	#endif
1497	}
1498
1499	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1500	* \param plCoef input data (transform coefficients)
1501	* \param pResidual output data (residual)
1502	* \param uiStride stride of input residual data
1503	* \param iSize transform size (iSize x iSize)
1504	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1505	*/
1506	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1507	{
1508	#if MATRIX_MULT
1509	Int iSize = iWidth;
1510	if( iWidth != iHeight )
1511	{
1512	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
1513	return;
1514	}
1515	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1516	#else
1517	Int j;
1518	{
1519	short block[ 64 * 64 ];
1520	short coeff[ 64 * 64 ];
1521	for ( j = 0; j < iHeight * iWidth; j++ )
1522	{
1523	coeff[j] = (short)plCoef[j];
1524	}
1525	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
1526	{
1527	for ( j = 0; j < iHeight; j++ )
1528	{
1529	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
1530	}
1531	}
1532	return ;
1533	}
1534	#endif
1535	}
1536
1537	/** RDOQ with CABAC
1538	* \param pcCU pointer to coding unit structure
1539	* \param plSrcCoeff pointer to input buffer
1540	* \param piDstCoeff reference to pointer to output buffer
1541	* \param uiWidth block width
1542	* \param uiHeight block height
1543	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1544	* \param eTType plane type / luminance or chrominance
1545	* \param uiAbsPartIdx absolute partition index
1546	* \returns Void
1547	* Rate distortion optimized quantization for entropy
1548	* coding engines using probability models like CABAC
1549	*/
1550	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1551	Int* plSrcCoeff,
1552	TCoeff* piDstCoeff,
1553	#if ADAPTIVE_QP_SELECTION
1554	Int*& piArlDstCoeff,
1555	#endif
1556	UInt uiWidth,
1557	UInt uiHeight,
1558	UInt& uiAbsSum,
1559	TextType eTType,
1560	UInt uiAbsPartIdx )
1561	{
1562	Int iQBits = m_cQP.m_iBits;
1563	Double dTemp = 0;
1564
1565	UInt dir = SCALING_LIST_SQT;
1566	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1567	Int uiQ = g_quantScales[m_cQP.rem()];
1568	if (uiWidth != uiHeight)
1569	{
1570	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
1571	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1572	}
1573
1574	#if FULL_NBIT
1575	UInt uiBitDepth = g_uiBitDepth;
1576	#else
1577	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1578	#endif
1579	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1580	UInt uiGoRiceParam = 0;
1581	Double d64BlockUncodedCost = 0;
1582	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1583	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1584	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1585	assert(scalingListType < 6);
1586
1587	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1588	double dErrScale = 0;
1589	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
1590	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1591	Int *piQCoef = piQCoefOrg;
1592	double *pdErrScale = pdErrScaleOrg;
1593	#if ADAPTIVE_QP_SELECTION
1594	Int iQBitsC = iQBits - ARL_C_PRECISION;
1595	Int iAddC = 1 << (iQBitsC-1);
1596	#endif
1597	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1598	if (uiScanIdx == SCAN_ZIGZAG)
1599	{
1600	// Map value zigzag to diagonal scan
1601	uiScanIdx = SCAN_DIAG;
1602	}
1603	Int blockType = uiLog2BlkSize;
1604	if (uiWidth != uiHeight)
1605	{
1606	uiScanIdx = SCAN_DIAG;
1607	blockType = 4;
1608	}
1609
1610	#if ADAPTIVE_QP_SELECTION
1611	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1612	#endif
1613
1614	Double pdCostCoeff [ 32 * 32 ];
1615	Double pdCostSig [ 32 * 32 ];
1616	Double pdCostCoeff0[ 32 * 32 ];
1617	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1618	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1619	Int rateIncUp [ 32 * 32 ];
1620	Int rateIncDown [ 32 * 32 ];
1621	Int sigRateDelta[ 32 * 32 ];
1622	Int deltaU [ 32 * 32 ];
1623	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1624	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1625	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1626	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1627
1628	const UInt * scanCG;
1629	if (uiWidth == uiHeight)
1630	{
1631	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1632	if( uiLog2BlkSize == 3 )
1633	{
1634	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1635	}
1636	else if( uiLog2BlkSize == 5 )
1637	{
1638	scanCG = g_sigLastScanCG32x32;
1639	}
1640	}
1641	else
1642	{
1643	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
1644	}
1645	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1646	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1647	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1648	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1649	Int iCGLastScanPos = -1;
1650
1651	UInt uiCtxSet = 0;
1652	Int c1 = 1;
1653	Int c2 = 0;
1654	UInt uiNumOne = 0;
1655	Double d64BaseCost = 0;
1656	Int iLastScanPos = -1;
1657	dTemp = dErrScale;
1658
1659	UInt c1Idx = 0;
1660	UInt c2Idx = 0;
1661	Int baseLevel;
1662
1663	const UInt * scan;
1664	if (uiWidth == uiHeight)
1665	{
1666	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1667	}
1668	else
1669	{
1670	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
1671	}
1672
1673	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1674	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1675
1676	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1677	Int iScanPos;
1678	coeffGroupRDStats rdStats;
1679
1680	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1681	{
1682	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1683	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1684	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1685	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
1686	{
1687	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
1688	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
1689	}
1690	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1691
1692	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1693	{
1694	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1695	//===== quantization =====
1696	UInt uiBlkPos = scan[iScanPos];
1697	// set coeff
1698	uiQ = piQCoef[uiBlkPos];
1699	dTemp = pdErrScale[uiBlkPos];
1700	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1701	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1702	#if ADAPTIVE_QP_SELECTION
1703	if( m_bUseAdaptQpSelect )
1704	{
1705	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1706	}
1707	#endif
1708	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1709
1710	Double dErr = Double( lLevelDouble );
1711	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1712	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1713	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1714
1715	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1716	{
1717	iLastScanPos = iScanPos;
1718	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1719	iCGLastScanPos = iCGScanPos;
1720	}
1721
1722	if ( iLastScanPos >= 0 )
1723	{
1724	//===== coefficient level estimation =====
1725	UInt uiLevel;
1726	UInt uiOneCtx = 4 * uiCtxSet + c1;
1727	UInt uiAbsCtx = uiCtxSet + c2;
1728
1729	if( iScanPos == iLastScanPos )
1730	{
1731	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1732	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1733	c1Idx, c2Idx, iQBits, dTemp, 1 );
1734	}
1735	else
1736	{
1737	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1738	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1739	UShort uiCtxSig = getSigCtxInc( piDstCoeff, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
1740	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1741	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1742	c1Idx, c2Idx, iQBits, dTemp, 0 );
1743	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1744	}
1745	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1746	if( uiLevel > 0 )
1747	{
1748	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1749	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1750	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1751	}
1752	else // uiLevel == 0
1753	{
1754	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1755	}
1756	piDstCoeff[ uiBlkPos ] = uiLevel;
1757	d64BaseCost += pdCostCoeff [ iScanPos ];
1758
1759
1760	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1761	if( uiLevel >= baseLevel )
1762	{
1763	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - baseLevel , 23 ) ];
1764	}
1765	if ( uiLevel >= 1)
1766	{
1767	c1Idx ++;
1768	}
1769
1770	//===== update bin model =====
1771	if( uiLevel > 1 )
1772	{
1773	c1 = 0;
1774	c2 += (c2 < 2);
1775	uiNumOne++;
1776	c2Idx ++;
1777	}
1778	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1779	{
1780	c1++;
1781	}
1782
1783	//===== context set update =====
1784	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1785	{
1786	c1 = 1;
1787	c2 = 0;
1788	uiGoRiceParam = 0;
1789
1790	c1Idx = 0;
1791	c2Idx = 0;
1792	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1793	if( uiNumOne > 0 )
1794	{
1795	uiCtxSet++;
1796	}
1797	uiNumOne >>= 1;
1798	}
1799	}
1800	else
1801	{
1802	d64BaseCost += pdCostCoeff0[ iScanPos ];
1803	}
1804	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1805	if (iScanPosinCG == 0 )
1806	{
1807	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1808	}
1809	if (piDstCoeff[ uiBlkPos ] )
1810	{
1811	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1812	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1813	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1814	if ( iScanPosinCG != 0 )
1815	{
1816	rdStats.iNNZbeforePos0++;
1817	}
1818	}
1819	} //end for (iScanPosinCG)
1820
1821	if (iCGLastScanPos >= 0)
1822	{
1823	if( iCGScanPos )
1824	{
1825	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1826	{
1827	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
1828	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1829	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1830	}
1831	else
1832	{
1833	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1834	{
1835	if ( rdStats.iNNZbeforePos0 == 0 )
1836	{
1837	d64BaseCost -= rdStats.d64SigCost_0;
1838	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1839	}
1840	// rd-cost if SigCoeffGroupFlag = 0, initialization
1841	Double d64CostZeroCG = d64BaseCost;
1842
1843	// add SigCoeffGroupFlag cost to total cost
1844	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
1845	if (iCGScanPos < iCGLastScanPos)
1846	{
1847	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1848	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1849	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1850	}
1851
1852	// try to convert the current coeff group from non-zero to all-zero
1853	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1854	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1855	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1856
1857	// if we can save cost, change this block to all-zero block
1858	if ( d64CostZeroCG < d64BaseCost )
1859	{
1860	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1861	d64BaseCost = d64CostZeroCG;
1862	if (iCGScanPos < iCGLastScanPos)
1863	{
1864	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1865	}
1866	// reset coeffs to 0 in this block
1867	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1868	{
1869	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1870	UInt uiBlkPos = scan[ iScanPos ];
1871
1872	if (piDstCoeff[ uiBlkPos ])
1873	{
1874	piDstCoeff [ uiBlkPos ] = 0;
1875	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1876	pdCostSig [ iScanPos ] = 0;
1877	}
1878	}
1879	} // end if ( d64CostAllZeros < d64BaseCost )
1880	}
1881	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1882	}
1883	else
1884	{
1885	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1886	}
1887	}
1888	} //end for (iCGScanPos)
1889
1890	//===== estimate last position =====
1891	if ( iLastScanPos < 0 )
1892	{
1893	return;
1894	}
1895
1896	Double d64BestCost = 0;
1897	Int ui16CtxCbf = 0;
1898	Int iBestLastIdxP1 = 0;
1899	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1900	{
1901	ui16CtxCbf = 0;
1902	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1903	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1904	}
1905	else
1906	{
1907	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1908	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1909	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1910	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1911	}
1912
1913	Bool bFoundLast = false;
1914	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1915	{
1916	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1917
1918	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1919	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1920	{
1921	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1922	{
1923	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1924	if (iScanPos > iLastScanPos) continue;
1925	UInt uiBlkPos = scan[iScanPos];
1926
1927	if( piDstCoeff[ uiBlkPos ] )
1928	{
1929	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1930	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1931
1932	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
1933	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1934
1935	if( totalCost < d64BestCost )
1936	{
1937	iBestLastIdxP1 = iScanPos + 1;
1938	d64BestCost = totalCost;
1939	}
1940	if( piDstCoeff[ uiBlkPos ] > 1 )
1941	{
1942	bFoundLast = true;
1943	break;
1944	}
1945	d64BaseCost -= pdCostCoeff[ iScanPos ];
1946	d64BaseCost += pdCostCoeff0[ iScanPos ];
1947	}
1948	else
1949	{
1950	d64BaseCost -= pdCostSig[ iScanPos ];
1951	}
1952	} //end for
1953	if (bFoundLast)
1954	{
1955	break;
1956	}
1957	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1958	} // end for
1959
1960	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1961	{
1962	Int blkPos = scan[ scanPos ];
1963	Int level = piDstCoeff[ blkPos ];
1964	uiAbsSum += level;
1965	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1966	}
1967
1968	//===== clean uncoded coefficients =====
1969	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1970	{
1971	piDstCoeff[ scan[ scanPos ] ] = 0;
1972	}
1973
1974	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1975	{
1976	Int rdFactor = (Int)((Double)(g_invQuantScales[m_cQP.rem()]g_invQuantScales[m_cQP.rem()]<<(2m_cQP.m_iPer))/m_dLambda/16 + 0.5) ;
1977
1978	Int tsig = pcCU->getSlice()->getPPS()->getTSIG() ;
1979
1980	Int lastCG = -1;
1981	Int absSum = 0 ;
1982	Int n ;
1983
1984	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1985	{
1986	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1987	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1988	absSum = 0 ;
1989
1990	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1991	{
1992	if( piDstCoeff[ scan[ n + subPos ]] )
1993	{
1994	lastNZPosInCG = n;
1995	break;
1996	}
1997	}
1998
1999	for(n = 0; n <SCAN_SET_SIZE; n++ )
2000	{
2001	if( piDstCoeff[ scan[ n + subPos ]] )
2002	{
2003	firstNZPosInCG = n;
2004	break;
2005	}
2006	}
2007
2008	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2009	{
2010	absSum += piDstCoeff[ scan[ n + subPos ]];
2011	}
2012
2013	if(lastNZPosInCG>=0 && lastCG==-1) lastCG =1 ;
2014
2015	if( lastNZPosInCG-firstNZPosInCG>=tsig )
2016	{
2017	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
2018	if( signbit!=(absSum&0x1) ) // hide but need tune
2019	{
2020	// calculate the cost
2021	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
2022
2023	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
2024	{
2025	UInt uiBlkPos = scan[ n + subPos ];
2026	if(piDstCoeff[ uiBlkPos ] != 0 )
2027	{
2028	Int costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
2029	Int costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2030	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
2031
2032	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2033	{
2034	costDown -= (4<<15) ;
2035	}
2036
2037	if(costUp<costDown)
2038	{
2039	curCost = costUp;
2040	curChange = 1 ;
2041	}
2042	else
2043	{
2044	curChange = -1 ;
2045	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2046	{
2047	curCost = MAX_INT ;
2048	}
2049	else
2050	{
2051	curCost = costDown ;
2052	}
2053	}
2054	}
2055	else
2056	{
2057	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2058	curChange = 1 ;
2059
2060	if(n<firstNZPosInCG)
2061	{
2062	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2063	if(thissignbit != signbit )
2064	{
2065	curCost = MAX_INT;
2066	}
2067	}
2068	}
2069
2070	if( curCost<minCostInc)
2071	{
2072	minCostInc = curCost ;
2073	finalChange = curChange ;
2074	minPos = uiBlkPos ;
2075	}
2076	}
2077
2078	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2079	{
2080	finalChange = -1;
2081	}
2082
2083	if(plSrcCoeff[minPos]>=0)
2084	{
2085	piDstCoeff[minPos] += finalChange ;
2086	}
2087	else
2088	{
2089	piDstCoeff[minPos] -= finalChange ;
2090	}
2091	}
2092	}
2093
2094	if(lastCG==1)
2095	{
2096	lastCG=0 ;
2097	}
2098	}
2099	}
2100	}
2101
2102	/** Context derivation process of coeff_abs_significant_flag
2103	* \param pcCoeff pointer to prior coded transform coefficients
2104	* \param posX column of current scan position
2105	* \param posY row of current scan position
2106	* \param blockType log2 value of block size if square block, or 4 otherwise
2107	* \param width width of the block
2108	* \param height height of the block
2109	* \param textureType texture type (TEXT_LUMA...)
2110	* \returns ctxInc for current scan position
2111	*/
2112	Int TComTrQuant::getSigCtxInc ( TCoeff* pcCoeff,
2113	Int posX,
2114	Int posY,
2115	Int blockType,
2116	Int width
2117	,Int height
2118	,TextType textureType
2119	)
2120	{
2121	if ( blockType == 2 )
2122	{
2123	//LUMA map
2124	const Int ctxIndMap4x4Luma[15] =
2125	{
2126	0, 1, 4, 5,
2127	2, 3, 4, 5,
2128	6, 6, 8, 8,
2129	7, 7, 8
2130	};
2131	//CHROMA map
2132	const Int ctxIndMap4x4Chroma[15] =
2133	{
2134	0, 1, 2, 4,
2135	1, 1, 2, 4,
2136	3, 3, 5, 5,
2137	4, 4, 5
2138	};
2139
2140	if (textureType == TEXT_LUMA)
2141	{
2142	return ctxIndMap4x4Luma[ 4 * posY + posX ];
2143	}
2144	else
2145	{
2146	return ctxIndMap4x4Chroma[ 4 * posY + posX ];
2147	}
2148	}
2149
2150	if ( blockType == 3 )
2151	{
2152	const Int map8x8[16] =
2153	{
2154	0, 1, 2, 3,
2155	4, 5, 6, 3,
2156	8, 6, 6, 7,
2157	9, 9, 7, 7
2158	};
2159
2160	Int offset = (textureType == TEXT_LUMA) ? 9 : 6;
2161
2162	if ( posX + posY == 0 )
2163	{
2164	return offset + 10;
2165	}
2166	return offset + map8x8[4 * (posY >> 1) + (posX >> 1)];
2167	}
2168
2169	Int offset = (textureType == TEXT_LUMA) ? 20 : 17;
2170	if( posX + posY == 0 )
2171	{
2172	return offset;
2173	}
2174	#if SIGMAP_CONST_AT_HIGH_FREQUENCY
2175	Int thredHighFreq = 3*(std::max(width, height)>>4);
2176	if ((posX>>2) + (posY>>2) >= thredHighFreq)
2177	{
2178	return (textureType == TEXT_LUMA) ? 24 : 18;
2179	}
2180	#endif
2181
2182	const TCoeff pData = pcCoeff + posX + posY width;
2183
2184
2185	Int cnt = 0;
2186	if( posX < width - 1 )
2187	{
2188	cnt += pData[1] != 0;
2189	if( posY < height - 1 )
2190	{
2191	cnt += pData[width+1] != 0;
2192	}
2193	if( posX < width - 2 )
2194	{
2195	cnt += pData[2] != 0;
2196	}
2197	}
2198	if ( posY < height - 1 )
2199	{
2200	if( ( ( posX & 3 ) \|\| ( posY & 3 ) ) && ( ( (posX+1) & 3 ) \|\| ( (posY+2) & 3 ) ) )
2201	{
2202	cnt += pData[width] != 0;
2203	}
2204	if ( posY < height - 2 && cnt < 4 )
2205	{
2206	cnt += pData[2*width] != 0;
2207	}
2208	}
2209
2210	cnt = ( cnt + 1 ) >> 1;
2211	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 4 : 1) + offset + cnt;
2212	}
2213
2214	/** Get the best level in RD sense
2215	* \param rd64CodedCost reference to coded cost
2216	* \param rd64CodedCost0 reference to cost when coefficient is 0
2217	* \param rd64CodedCostSig reference to cost of significant coefficient
2218	* \param lLevelDouble reference to unscaled quantized level
2219	* \param uiMaxAbsLevel scaled quantized level
2220	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2221	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2222	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2223	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2224	* \param iQBits quantization step size
2225	* \param dTemp correction factor
2226	* \param bLast indicates if the coefficient is the last significant
2227	* \returns best quantized transform level for given scan position
2228	* This method calculates the best quantized transform level for a given scan position.
2229	*/
2230	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2231	Double& rd64CodedCost0,
2232	Double& rd64CodedCostSig,
2233	Int lLevelDouble,
2234	UInt uiMaxAbsLevel,
2235	UShort ui16CtxNumSig,
2236	UShort ui16CtxNumOne,
2237	UShort ui16CtxNumAbs,
2238	UShort ui16AbsGoRice,
2239	UInt c1Idx,
2240	UInt c2Idx,
2241	Int iQBits,
2242	Double dTemp,
2243	Bool bLast ) const
2244	{
2245	Double dCurrCostSig = 0;
2246	UInt uiBestAbsLevel = 0;
2247
2248	if( !bLast && uiMaxAbsLevel < 3 )
2249	{
2250	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2251	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2252	if( uiMaxAbsLevel == 0 )
2253	{
2254	return uiBestAbsLevel;
2255	}
2256	}
2257	else
2258	{
2259	rd64CodedCost = MAX_DOUBLE;
2260	}
2261
2262	if( !bLast )
2263	{
2264	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2265	}
2266
2267	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2268	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2269	{
2270	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2271	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2272	dCurrCost += dCurrCostSig;
2273
2274	if( dCurrCost < rd64CodedCost )
2275	{
2276	uiBestAbsLevel = uiAbsLevel;
2277	rd64CodedCost = dCurrCost;
2278	rd64CodedCostSig = dCurrCostSig;
2279	}
2280	}
2281
2282	return uiBestAbsLevel;
2283	}
2284
2285	/** Calculates the cost for specific absolute transform level
2286	* \param uiAbsLevel scaled quantized level
2287	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2288	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2289	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2290	* \returns cost of given absolute transform level
2291	*/
2292	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2293	UShort ui16CtxNumOne,
2294	UShort ui16CtxNumAbs,
2295	UShort ui16AbsGoRice
2296	, UInt c1Idx,
2297	UInt c2Idx
2298	) const
2299	{
2300	Double iRate = xGetIEPRate();
2301	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2302
2303	if ( uiAbsLevel >= baseLevel )
2304	{
2305	UInt uiSymbol = uiAbsLevel - baseLevel;
2306	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2307	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2308
2309	if( bExpGolomb )
2310	{
2311	uiAbsLevel = uiSymbol - uiMaxVlc;
2312	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2313	iRate += iEGS << 15;
2314	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2315	}
2316
2317	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2318	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2319
2320	iRate += ui16NumBins << 15;
2321
2322	if (c1Idx < C1FLAG_NUMBER)
2323	{
2324	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2325
2326	if (c2Idx < C2FLAG_NUMBER)
2327	{
2328	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2329	}
2330	}
2331	}
2332	else
2333	if( uiAbsLevel == 1 )
2334	{
2335	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2336	}
2337	else if( uiAbsLevel == 2 )
2338	{
2339	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2340	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2341	}
2342	else
2343	{
2344	assert (0);
2345	}
2346	return xGetICost( iRate );
2347	}
2348
2349	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2350	UShort ui16CtxNumOne,
2351	UShort ui16CtxNumAbs,
2352	UShort ui16AbsGoRice
2353	, UInt c1Idx,
2354	UInt c2Idx
2355	) const
2356	{
2357	Int iRate = 0;
2358	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2359
2360	if ( uiAbsLevel >= baseLevel )
2361	{
2362	UInt uiSymbol = uiAbsLevel - baseLevel;
2363	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2364	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2365
2366	if( bExpGolomb )
2367	{
2368	uiAbsLevel = uiSymbol - uiMaxVlc;
2369	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2370	iRate += iEGS << 15;
2371	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2372	}
2373
2374	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2375	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2376
2377	iRate += ui16NumBins << 15;
2378
2379	if (c1Idx < C1FLAG_NUMBER)
2380	{
2381	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2382
2383	if (c2Idx < C2FLAG_NUMBER)
2384	{
2385	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2386	}
2387	}
2388	}
2389	else
2390	if( uiAbsLevel == 0 )
2391	{
2392	return 0;
2393	}
2394	else if( uiAbsLevel == 1 )
2395	{
2396	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2397	}
2398	else if( uiAbsLevel == 2 )
2399	{
2400	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2401	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2402	}
2403	else
2404	{
2405	assert(0);
2406	}
2407	return iRate;
2408	}
2409
2410	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2411	UShort ui16CtxNumSig ) const
2412	{
2413	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2414	}
2415
2416	/** Calculates the cost of signaling the last significant coefficient in the block
2417	* \param uiPosX X coordinate of the last significant coefficient
2418	* \param uiPosY Y coordinate of the last significant coefficient
2419	* \returns cost of last significant coefficient
2420	*/
2421	/*
2422	* \param uiWidth width of the transform unit (TU)
2423	*/
2424	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2425	const UInt uiPosY,
2426	const UInt uiBlkWdth ) const
2427	{
2428	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2429	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2430	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2431	if( uiCtxX > 3 )
2432	{
2433	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2434	}
2435	if( uiCtxY > 3 )
2436	{
2437	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2438	}
2439	return xGetICost( uiCost );
2440	}
2441
2442	/** Calculates the cost for specific absolute transform level
2443	* \param uiAbsLevel scaled quantized level
2444	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2445	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2446	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2447	* \returns cost of given absolute transform level
2448	*/
2449	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2450	UShort ui16CtxNumSig ) const
2451	{
2452	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2453	}
2454
2455	/** Get the cost for a specific rate
2456	* \param dRate rate of a bit
2457	* \returns cost at the specific rate
2458	*/
2459	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2460	{
2461	return m_dLambda * dRate;
2462	}
2463
2464	/** Get the cost of an equal probable bit
2465	* \returns cost of equal probable bit
2466	*/
2467	__inline Double TComTrQuant::xGetIEPRate ( ) const
2468	{
2469	return 32768;
2470	}
2471
2472	/** Context derivation process of coeff_abs_significant_flag
2473	* \param uiSigCoeffGroupFlag significance map of L1
2474	* \param uiBlkX column of current scan position
2475	* \param uiBlkY row of current scan position
2476	* \param uiLog2BlkSize log2 value of block size
2477	* \returns ctxInc for current scan position
2478	*/
2479	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2480	const UInt uiCGPosX,
2481	const UInt uiCGPosY,
2482	const UInt scanIdx,
2483	Int width, Int height)
2484	{
2485	UInt uiRight = 0;
2486	UInt uiLower = 0;
2487
2488	width >>= 2;
2489	height >>= 2;
2490	if( width == 2 && height == 2 ) // 8x8
2491	{
2492	if( scanIdx == SCAN_HOR )
2493	{
2494	width = 1;
2495	height = 4;
2496	}
2497	else if( scanIdx == SCAN_VER )
2498	{
2499	width = 4;
2500	height = 1;
2501	}
2502	}
2503	if( uiCGPosX < width - 1 )
2504	{
2505	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2506	}
2507	if (uiCGPosY < height - 1 )
2508	{
2509	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2510	}
2511	return (uiRight \|\| uiLower);
2512
2513	}
2514	/** set quantized matrix coefficient for encode
2515	* \param scalingList quantaized matrix address
2516	*/
2517	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2518	{
2519	UInt size,list;
2520	UInt qp;
2521
2522	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2523	{
2524	for(list = 0; list < g_scalingListNum[size]; list++)
2525	{
2526	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2527	{
2528	xSetScalingListEnc(scalingList,list,size,qp);
2529	xSetScalingListDec(scalingList,list,size,qp);
2530	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2531	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2532	{
2533	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2534	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2535	}
2536	}
2537	}
2538	}
2539	}
2540	/** set quantized matrix coefficient for decode
2541	* \param scalingList quantaized matrix address
2542	*/
2543	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2544	{
2545	UInt size,list;
2546	UInt qp;
2547
2548	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2549	{
2550	for(list = 0; list < g_scalingListNum[size]; list++)
2551	{
2552	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2553	{
2554	xSetScalingListDec(scalingList,list,size,qp);
2555	}
2556	}
2557	}
2558	}
2559	/** set error scale coefficients
2560	* \param list List ID
2561	* \param uiSize Size
2562	* \param uiQP Quantization parameter
2563	*/
2564	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
2565	{
2566
2567	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2568	#if FULL_NBIT
2569	UInt uiBitDepth = g_uiBitDepth;
2570	#else
2571	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
2572	#endif
2573
2574	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
2575
2576	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2577	Int *piQuantcoeff;
2578	double *pdErrScale;
2579	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
2580	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
2581
2582	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2583	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2584	for(i=0;i<uiMaxNumCoeff;i++)
2585	{
2586	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
2587	}
2588	}
2589
2590	/** set quantized matrix coefficient for encode
2591	* \param scalingList quantaized matrix address
2592	* \param listId List index
2593	* \param sizeId size index
2594	* \param uiQP Quantization parameter
2595	*/
2596	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2597	{
2598	UInt width = g_scalingListSizeX[sizeId];
2599	UInt height = g_scalingListSizeX[sizeId];
2600	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2601	Int *quantcoeff;
2602	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2603	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
2604
2605	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2606
2607	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
2608	{
2609	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2610	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2611
2612	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2613	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2614	}
2615	}
2616	/** set quantized matrix coefficient for decode
2617	* \param scalingList quantaized matrix address
2618	* \param list List index
2619	* \param size size index
2620	* \param uiQP Quantization parameter
2621	*/
2622	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2623	{
2624	UInt width = g_scalingListSizeX[sizeId];
2625	UInt height = g_scalingListSizeX[sizeId];
2626	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2627	Int *dequantcoeff;
2628	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2629
2630	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
2631	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2632
2633	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
2634	{
2635	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
2636	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2637
2638	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
2639
2640	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2641	}
2642	}
2643
2644	/** set flat matrix value to quantized coefficient
2645	*/
2646	Void TComTrQuant::setFlatScalingList()
2647	{
2648	UInt size,list;
2649	UInt qp;
2650
2651	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2652	{
2653	for(list = 0; list < g_scalingListNum[size]; list++)
2654	{
2655	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2656	{
2657	xsetFlatScalingList(list,size,qp);
2658	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
2659	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2660	{
2661	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
2662	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
2663	}
2664	}
2665	}
2666	}
2667	}
2668
2669	/** set flat matrix value to quantized coefficient
2670	* \param list List ID
2671	* \param uiQP Quantization parameter
2672	* \param uiSize Size
2673	*/
2674	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2675	{
2676	UInt i,num = g_scalingListSize[size];
2677	UInt numDiv4 = num>>2;
2678	Int *quantcoeff;
2679	Int *dequantcoeff;
2680	Int quantScales = g_quantScales[qp];
2681	Int invQuantScales = g_invQuantScales[qp]<<4;
2682
2683	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
2684	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
2685
2686	for(i=0;i<num;i++)
2687	{
2688	*quantcoeff++ = quantScales;
2689	*dequantcoeff++ = invQuantScales;
2690	}
2691
2692	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
2693	{
2694	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2695	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
2696
2697	for(i=0;i<numDiv4;i++)
2698	{
2699	*quantcoeff++ = quantScales;
2700	*dequantcoeff++ = invQuantScales;
2701	}
2702	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2703	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
2704
2705	for(i=0;i<numDiv4;i++)
2706	{
2707	*quantcoeff++ = quantScales;
2708	*dequantcoeff++ = invQuantScales;
2709	}
2710	}
2711	}
2712
2713	/** set quantized matrix coefficient for encode
2714	* \param coeff quantaized matrix address
2715	* \param quantcoeff quantaized matrix address
2716	* \param quantScales Q(QP%6)
2717	* \param height height
2718	* \param width width
2719	* \param ratio ratio for upscale
2720	* \param sizuNum matrix size
2721	* \param dc dc parameter
2722	*/
2723	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2724	{
2725	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2726	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2727	for(UInt j=0;j<height;j++)
2728	{
2729	for(UInt i=0;i<width;i++)
2730	{
2731	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2732	}
2733	}
2734	if(ratio > 1)
2735	{
2736	quantcoeff[0] = quantScales / dc;
2737	}
2738	}
2739	/** set quantized matrix coefficient for decode
2740	* \param coeff quantaized matrix address
2741	* \param dequantcoeff quantaized matrix address
2742	* \param invQuantScales IQ(QP%6))
2743	* \param height height
2744	* \param width width
2745	* \param ratio ratio for upscale
2746	* \param sizuNum matrix size
2747	* \param dc dc parameter
2748	*/
2749	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2750	{
2751	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2752	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2753	for(UInt j=0;j<height;j++)
2754	{
2755	for(UInt i=0;i<width;i++)
2756	{
2757	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
2758	}
2759	}
2760	if(ratio > 1)
2761	{
2762	dequantcoeff[0] = invQuantScales * dc;
2763	}
2764	}
2765
2766	/** initialization process of scaling list array
2767	*/
2768	Void TComTrQuant::initScalingList()
2769	{
2770	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2771	{
2772	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2773	{
2774	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2775	{
2776	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
2777	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
2778	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
2779
2780	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
2781	{
2782	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
2783	{
2784	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
2785	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
2786	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
2787	}
2788	}
2789	}
2790	}
2791	}
2792	//copy for NSQT
2793	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2794	{
2795	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
2796	{
2797	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
2798	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
2799	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
2800	}
2801	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
2802	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
2803	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
2804	}
2805	}
2806	/** destroy quantization matrix array
2807	*/
2808	Void TComTrQuant::destroyScalingList()
2809	{
2810	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2811	{
2812	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2813	{
2814	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2815	{
2816	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
2817	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
2818	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
2819	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
2820	{
2821	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
2822	{
2823	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
2824	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
2825	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
2826	}
2827	}
2828	}
2829	}
2830	}
2831	}
2832
2833	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: