Context Navigation

Back to Ticket #919

Ticket #919: TComTrQuant.cpp

File TComTrQuant.cpp, 87.3 KB (added by lkerofsky, 13 years ago)
patch to xQuant() for ChromaQPOffsets

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(Int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_useRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
203
204	if(qpScaled < 0)
205	{
206	qpScaled = qpScaled + qpBdOffset;
207	}
208	else
209	{
210	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
211	}
212	}
213	m_cQP.setQpParam( qpScaled );
214	}
215
216	#if MATRIX_MULT
217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
218	* \param block pointer to input data (residual)
219	* \param coeff pointer to output data (transform coefficients)
220	* \param uiStride stride of input data
221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
223	*/
224	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
225	{
226	Int i,j,k,iSum;
227	Int tmp[32*32];
228	const Short *iT;
229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
230
231	if (uiTrSize==4)
232	{
233	iT = g_aiT4[0];
234	}
235	else if (uiTrSize==8)
236	{
237	iT = g_aiT8[0];
238	}
239	else if (uiTrSize==16)
240	{
241	iT = g_aiT16[0];
242	}
243	else if (uiTrSize==32)
244	{
245	iT = g_aiT32[0];
246	}
247	else
248	{
249	assert(0);
250	}
251
252	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
253	Int add_1st = 1<<(shift_1st-1);
254	Int shift_2nd = uiLog2TrSize + 6;
255	Int add_2nd = 1<<(shift_2nd-1);
256
257	/* Horizontal transform */
258
259	if (uiTrSize==4)
260	{
261	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
262	{
263	iT = g_as_DST_MAT_4[0];
264	}
265	}
266	for (i=0; i<uiTrSize; i++)
267	{
268	for (j=0; j<uiTrSize; j++)
269	{
270	iSum = 0;
271	for (k=0; k<uiTrSize; k++)
272	{
273	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
274	}
275	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
276	}
277	}
278
279	/* Vertical transform */
280	if (uiTrSize==4)
281	{
282	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
283	{
284	iT = g_as_DST_MAT_4[0];
285	}
286	else
287	{
288	iT = g_aiT4[0];
289	}
290	}
291	for (i=0; i<uiTrSize; i++)
292	{
293	for (j=0; j<uiTrSize; j++)
294	{
295	iSum = 0;
296	for (k=0; k<uiTrSize; k++)
297	{
298	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
299	}
300	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
301	}
302	}
303	}
304
305	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
306	* \param coeff pointer to input data (transform coefficients)
307	* \param block pointer to output data (residual)
308	* \param uiStride stride of output data
309	* \param uiTrSize transform size (uiTrSize x uiTrSize)
310	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
311	*/
312	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
313	{
314	Int i,j,k,iSum;
315	Int tmp[32*32];
316	const Short *iT;
317
318	if (uiTrSize==4)
319	{
320	iT = g_aiT4[0];
321	}
322	else if (uiTrSize==8)
323	{
324	iT = g_aiT8[0];
325	}
326	else if (uiTrSize==16)
327	{
328	iT = g_aiT16[0];
329	}
330	else if (uiTrSize==32)
331	{
332	iT = g_aiT32[0];
333	}
334	else
335	{
336	assert(0);
337	}
338
339	Int shift_1st = SHIFT_INV_1ST;
340	Int add_1st = 1<<(shift_1st-1);
341	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
342	Int add_2nd = 1<<(shift_2nd-1);
343	if (uiTrSize==4)
344	{
345	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
346	{
347	iT = g_as_DST_MAT_4[0];
348	}
349	}
350
351	/* Horizontal transform */
352	for (i=0; i<uiTrSize; i++)
353	{
354	for (j=0; j<uiTrSize; j++)
355	{
356	iSum = 0;
357	for (k=0; k<uiTrSize; k++)
358	{
359	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
360	}
361	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
362	}
363	}
364
365	if (uiTrSize==4)
366	{
367	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
368	{
369	iT = g_as_DST_MAT_4[0];
370	}
371	else
372	{
373	iT = g_aiT4[0];
374	}
375	}
376
377	/* Vertical transform */
378	for (i=0; i<uiTrSize; i++)
379	{
380	for (j=0; j<uiTrSize; j++)
381	{
382	iSum = 0;
383	for (k=0; k<uiTrSize; k++)
384	{
385	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
386	}
387	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
388	}
389	}
390	}
391
392	#else //MATRIX_MULT
393
394	/** 4x4 forward transform implemented using partial butterfly structure (1D)
395	* \param src input data (residual)
396	* \param dst output data (transform coefficients)
397	* \param shift specifies right shift after 1D transform
398	*/
399
400	void partialButterfly4(Short src,Short dst,Int shift, Int line)
401	{
402	Int j;
403	Int E[2],O[2];
404	Int add = 1<<(shift-1);
405
406	for (j=0; j<line; j++)
407	{
408	/* E and O */
409	E[0] = src[0] + src[3];
410	O[0] = src[0] - src[3];
411	E[1] = src[1] + src[2];
412	O[1] = src[1] - src[2];
413
414	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
415	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
416	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
417	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
418
419	src += 4;
420	dst ++;
421	}
422	}
423
424	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
425	// give identical results
426	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
427	{
428	Int i, c[4];
429	Int rnd_factor = 1<<(shift-1);
430	for (i=0; i<4; i++)
431	{
432	// Intermediate Variables
433	c[0] = block[4i+0] + block[4i+3];
434	c[1] = block[4i+1] + block[4i+3];
435	c[2] = block[4i+0] - block[4i+1];
436	c[3] = 74* block[4*i+2];
437
438	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
439	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
440	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
441	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
442	}
443	}
444
445	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
446	{
447	Int i, c[4];
448	Int rnd_factor = 1<<(shift-1);
449	for (i=0; i<4; i++)
450	{
451	// Intermediate Variables
452	c[0] = tmp[ i] + tmp[ 8+i];
453	c[1] = tmp[8+i] + tmp[12+i];
454	c[2] = tmp[ i] - tmp[12+i];
455	c[3] = 74* tmp[4+i];
456
457	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
458	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
459	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
460	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
461	}
462	}
463
464	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
465	{
466	Int j;
467	Int E[2],O[2];
468	Int add = 1<<(shift-1);
469
470	for (j=0; j<line; j++)
471	{
472	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
473	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
474	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
475	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
476	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
477
478	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
479	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
480	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
481	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
482	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
483
484	src ++;
485	dst += 4;
486	}
487	}
488
489
490	void partialButterfly8(Short src,Short dst,Int shift, Int line)
491	{
492	Int j,k;
493	Int E[4],O[4];
494	Int EE[2],EO[2];
495	Int add = 1<<(shift-1);
496
497	for (j=0; j<line; j++)
498	{
499	/* E and O*/
500	for (k=0;k<4;k++)
501	{
502	E[k] = src[k] + src[7-k];
503	O[k] = src[k] - src[7-k];
504	}
505	/* EE and EO */
506	EE[0] = E[0] + E[3];
507	EO[0] = E[0] - E[3];
508	EE[1] = E[1] + E[2];
509	EO[1] = E[1] - E[2];
510
511	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
512	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
513	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
514	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
515
516	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
517	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
518	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
519	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
520
521	src += 8;
522	dst ++;
523	}
524	}
525
526
527	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
528	{
529	Int j,k;
530	Int E[4],O[4];
531	Int EE[2],EO[2];
532	Int add = 1<<(shift-1);
533
534	for (j=0; j<line; j++)
535	{
536	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
537	for (k=0;k<4;k++)
538	{
539	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
540	}
541
542	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
543	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
544	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
545	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
546
547	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
548	E[0] = EE[0] + EO[0];
549	E[3] = EE[0] - EO[0];
550	E[1] = EE[1] + EO[1];
551	E[2] = EE[1] - EO[1];
552	for (k=0;k<4;k++)
553	{
554	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
555	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
556	}
557	src ++;
558	dst += 8;
559	}
560	}
561
562
563	void partialButterfly16(Short src,Short dst,Int shift, Int line)
564	{
565	Int j,k;
566	Int E[8],O[8];
567	Int EE[4],EO[4];
568	Int EEE[2],EEO[2];
569	Int add = 1<<(shift-1);
570
571	for (j=0; j<line; j++)
572	{
573	/* E and O*/
574	for (k=0;k<8;k++)
575	{
576	E[k] = src[k] + src[15-k];
577	O[k] = src[k] - src[15-k];
578	}
579	/* EE and EO */
580	for (k=0;k<4;k++)
581	{
582	EE[k] = E[k] + E[7-k];
583	EO[k] = E[k] - E[7-k];
584	}
585	/* EEE and EEO */
586	EEE[0] = EE[0] + EE[3];
587	EEO[0] = EE[0] - EE[3];
588	EEE[1] = EE[1] + EE[2];
589	EEO[1] = EE[1] - EE[2];
590
591	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
592	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
593	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
594	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
595
596	for (k=2;k<16;k+=4)
597	{
598	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
599	}
600
601	for (k=1;k<16;k+=2)
602	{
603	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
604	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
605	}
606
607	src += 16;
608	dst ++;
609
610	}
611	}
612
613
614	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
615	{
616	Int j,k;
617	Int E[8],O[8];
618	Int EE[4],EO[4];
619	Int EEE[2],EEO[2];
620	Int add = 1<<(shift-1);
621
622	for (j=0; j<line; j++)
623	{
624	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
625	for (k=0;k<8;k++)
626	{
627	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
628	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
629	}
630	for (k=0;k<4;k++)
631	{
632	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
633	}
634	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
635	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
636	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
637	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
638
639	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
640	for (k=0;k<2;k++)
641	{
642	EE[k] = EEE[k] + EEO[k];
643	EE[k+2] = EEE[1-k] - EEO[1-k];
644	}
645	for (k=0;k<4;k++)
646	{
647	E[k] = EE[k] + EO[k];
648	E[k+4] = EE[3-k] - EO[3-k];
649	}
650	for (k=0;k<8;k++)
651	{
652	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
653	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
654	}
655	src ++;
656	dst += 16;
657	}
658	}
659
660
661	void partialButterfly32(Short src,Short dst,Int shift, Int line)
662	{
663	Int j,k;
664	Int E[16],O[16];
665	Int EE[8],EO[8];
666	Int EEE[4],EEO[4];
667	Int EEEE[2],EEEO[2];
668	Int add = 1<<(shift-1);
669
670	for (j=0; j<line; j++)
671	{
672	/* E and O*/
673	for (k=0;k<16;k++)
674	{
675	E[k] = src[k] + src[31-k];
676	O[k] = src[k] - src[31-k];
677	}
678	/* EE and EO */
679	for (k=0;k<8;k++)
680	{
681	EE[k] = E[k] + E[15-k];
682	EO[k] = E[k] - E[15-k];
683	}
684	/* EEE and EEO */
685	for (k=0;k<4;k++)
686	{
687	EEE[k] = EE[k] + EE[7-k];
688	EEO[k] = EE[k] - EE[7-k];
689	}
690	/* EEEE and EEEO */
691	EEEE[0] = EEE[0] + EEE[3];
692	EEEO[0] = EEE[0] - EEE[3];
693	EEEE[1] = EEE[1] + EEE[2];
694	EEEO[1] = EEE[1] - EEE[2];
695
696	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
697	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
698	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
699	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
700	for (k=4;k<32;k+=8)
701	{
702	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
703	}
704	for (k=2;k<32;k+=4)
705	{
706	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
707	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
708	}
709	for (k=1;k<32;k+=2)
710	{
711	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
712	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
713	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
714	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
715	}
716	src += 32;
717	dst ++;
718	}
719	}
720
721
722	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
723	{
724	Int j,k;
725	Int E[16],O[16];
726	Int EE[8],EO[8];
727	Int EEE[4],EEO[4];
728	Int EEEE[2],EEEO[2];
729	Int add = 1<<(shift-1);
730
731	for (j=0; j<line; j++)
732	{
733	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
734	for (k=0;k<16;k++)
735	{
736	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
737	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
738	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
739	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
740	}
741	for (k=0;k<8;k++)
742	{
743	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
744	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
745	}
746	for (k=0;k<4;k++)
747	{
748	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
749	}
750	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
751	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
752	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
753	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
754
755	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
756	EEE[0] = EEEE[0] + EEEO[0];
757	EEE[3] = EEEE[0] - EEEO[0];
758	EEE[1] = EEEE[1] + EEEO[1];
759	EEE[2] = EEEE[1] - EEEO[1];
760	for (k=0;k<4;k++)
761	{
762	EE[k] = EEE[k] + EEO[k];
763	EE[k+4] = EEE[3-k] - EEO[3-k];
764	}
765	for (k=0;k<8;k++)
766	{
767	E[k] = EE[k] + EO[k];
768	E[k+8] = EE[7-k] - EO[7-k];
769	}
770	for (k=0;k<16;k++)
771	{
772	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
773	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
774	}
775	src ++;
776	dst += 32;
777	}
778	}
779
780	/** MxN forward transform (2D)
781	* \param block input data (residual)
782	* \param coeff output data (transform coefficients)
783	* \param iWidth input data (width of transform)
784	* \param iHeight input data (height of transform)
785	*/
786	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
787	{
788	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
789	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
790
791	Short tmp[ 64 * 64 ];
792
793	if( iWidth == 4 && iHeight == 4)
794	{
795	if (uiMode != REG_DCT)
796	{
797	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
798	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
799	}
800	else
801	{
802	partialButterfly4(block, tmp, shift_1st, iHeight);
803	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
804	}
805
806	}
807	else if( iWidth == 8 && iHeight == 8)
808	{
809	partialButterfly8( block, tmp, shift_1st, iHeight );
810	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
811	}
812	else if( iWidth == 16 && iHeight == 16)
813	{
814	partialButterfly16( block, tmp, shift_1st, iHeight );
815	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
816	}
817	else if( iWidth == 32 && iHeight == 32)
818	{
819	partialButterfly32( block, tmp, shift_1st, iHeight );
820	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
821	}
822	}
823	/** MxN inverse transform (2D)
824	* \param coeff input data (transform coefficients)
825	* \param block output data (residual)
826	* \param iWidth input data (width of transform)
827	* \param iHeight input data (height of transform)
828	*/
829	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
830	{
831	Int shift_1st = SHIFT_INV_1ST;
832	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
833
834	Short tmp[ 64*64];
835	if( iWidth == 4 && iHeight == 4)
836	{
837	if (uiMode != REG_DCT)
838	{
839	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
840	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
841	}
842	else
843	{
844	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
845	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
846	}
847	}
848	else if( iWidth == 8 && iHeight == 8)
849	{
850	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
851	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
852	}
853	else if( iWidth == 16 && iHeight == 16)
854	{
855	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
856	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
857	}
858	else if( iWidth == 32 && iHeight == 32)
859	{
860	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
861	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
862	}
863	}
864
865	#endif //MATRIX_MULT
866
867	// To minimize the distortion only. No rate is considered.
868	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
869	{
870	Int lastCG = -1;
871	Int absSum = 0 ;
872	Int n ;
873
874	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
875	{
876	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
877	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
878	absSum = 0 ;
879
880	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
881	{
882	if( pQCoef[ scan[ n + subPos ]] )
883	{
884	lastNZPosInCG = n;
885	break;
886	}
887	}
888
889	for(n = 0; n <SCAN_SET_SIZE; n++ )
890	{
891	if( pQCoef[ scan[ n + subPos ]] )
892	{
893	firstNZPosInCG = n;
894	break;
895	}
896	}
897
898	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
899	{
900	absSum += pQCoef[ scan[ n + subPos ]];
901	}
902
903	if(lastNZPosInCG>=0 && lastCG==-1)
904	{
905	lastCG = 1 ;
906	}
907
908	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
909	{
910	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
911	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
912	{
913	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
914
915	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
916	{
917	UInt blkPos = scan[ n+subPos ];
918	if(pQCoef[ blkPos ] != 0 )
919	{
920	if(deltaU[blkPos]>0)
921	{
922	curCost = - deltaU[blkPos];
923	curChange=1 ;
924	}
925	else
926	{
927	//curChange =-1;
928	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
929	{
930	curCost=MAX_INT ;
931	}
932	else
933	{
934	curCost = deltaU[blkPos];
935	curChange =-1;
936	}
937	}
938	}
939	else
940	{
941	if(n<firstNZPosInCG)
942	{
943	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
944	if(thisSignBit != signbit )
945	{
946	curCost = MAX_INT;
947	}
948	else
949	{
950	curCost = - (deltaU[blkPos]) ;
951	curChange = 1 ;
952	}
953	}
954	else
955	{
956	curCost = - (deltaU[blkPos]) ;
957	curChange = 1 ;
958	}
959	}
960
961	if( curCost<minCostInc)
962	{
963	minCostInc = curCost ;
964	finalChange = curChange ;
965	minPos = blkPos ;
966	}
967	} //CG loop
968
969	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
970	{
971	finalChange = -1;
972	}
973
974	if(pCoef[minPos]>=0)
975	{
976	pQCoef[minPos] += finalChange ;
977	}
978	else
979	{
980	pQCoef[minPos] -= finalChange ;
981	}
982	} // Hide
983	}
984	if(lastCG==1)
985	{
986	lastCG=0 ;
987	}
988	} // TU loop
989
990	return;
991	}
992
993	Void TComTrQuant::xQuant( TComDataCU* pcCU,
994	Int* pSrc,
995	TCoeff* pDes,
996	#if ADAPTIVE_QP_SELECTION
997	Int*& pArlDes,
998	#endif
999	Int iWidth,
1000	Int iHeight,
1001	UInt& uiAcSum,
1002	TextType eTType,
1003	UInt uiAbsPartIdx )
1004	{
1005	Int* piCoef = pSrc;
1006	TCoeff* piQCoef = pDes;
1007	#if ADAPTIVE_QP_SELECTION
1008	Int* piArlCCoef = pArlDes;
1009	#endif
1010	Int iAdd = 0;
1011
1012	#if RDOQ_TRANSFORMSKIP
1013	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
1014	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
1015	#else
1016	Bool useRDOQForTransformSkip = !(m_useTransformSkipFast && pcCU->getTransformSkip(uiAbsPartIdx,eTType));
1017	if ( m_useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) && useRDOQForTransformSkip)
1018	#endif
1019	{
1020	#if ADAPTIVE_QP_SELECTION
1021	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1022	#else
1023	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1024	#endif
1025	}
1026	else
1027	{
1028	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1029
1030	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1031	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1032
1033	Int deltaU[32*32] ;
1034
1035	#if ADAPTIVE_QP_SELECTION
1036	QpParam cQpBase;
1037	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1038
1039	Int qpScaled;
1040	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1041
1042	if(eTType == TEXT_LUMA)
1043	{
1044	qpScaled = iQpBase + qpBDOffset;
1045	}
1046	else
1047	{
1048	Int chromaQPOffset;
1049	if(eTType == TEXT_CHROMA_U)
1050	{
1051	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
1052	}
1053	else
1054	{
1055	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
1056	}
1057	iQpBase = iQpBase + chromaQPOffset;
1058
1059	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1060
1061	if(qpScaled < 0)
1062	{
1063	qpScaled = qpScaled + qpBDOffset;
1064	}
1065	else
1066	{
1067	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1068	}
1069	}
1070	cQpBase.setQpParam(qpScaled);
1071	#endif
1072
1073	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1074	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1075	assert(scalingListType < 6);
1076	Int *piQuantCoeff = 0;
1077	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1078
1079	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1080	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1081
1082	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1083
1084	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1085
1086	#if ADAPTIVE_QP_SELECTION
1087	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1088	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1089	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1090	Int iAddC = 1 << (iQBitsC-1);
1091	#endif
1092
1093	Int qBits8 = iQBits-8;
1094	for( Int n = 0; n < iWidth*iHeight; n++ )
1095	{
1096	Int iLevel;
1097	Int iSign;
1098	UInt uiBlockPos = n;
1099	iLevel = piCoef[uiBlockPos];
1100	iSign = (iLevel < 0 ? -1: 1);
1101
1102	#if ADAPTIVE_QP_SELECTION
1103	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1104	if( m_bUseAdaptQpSelect )
1105	{
1106	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1107	}
1108	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1109	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1110	#else
1111	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1112	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1113	#endif
1114	uiAcSum += iLevel;
1115	iLevel *= iSign;
1116	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1117	} // for n
1118	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1119	{
1120	if(uiAcSum>=2)
1121	{
1122	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1123	}
1124	}
1125	} //if RDOQ
1126	//return;
1127
1128	}
1129
1130	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1131	{
1132
1133	const TCoeff* piQCoef = pSrc;
1134	Int* piCoef = pDes;
1135
1136	if ( iWidth > (Int)m_uiMaxTrSize )
1137	{
1138	iWidth = m_uiMaxTrSize;
1139	iHeight = m_uiMaxTrSize;
1140	}
1141
1142	Int iShift,iAdd,iCoeffQ;
1143	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1144
1145	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1146
1147	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1148
1149	TCoeff clipQCoef;
1150	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + bitDepth - m_cQP.m_iPer) );
1151	const Int levelLimit = 1 << bitRange;
1152
1153	if(getUseScalingList())
1154	{
1155	iShift += 4;
1156	if(iShift > m_cQP.m_iPer)
1157	{
1158	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1159	}
1160	else
1161	{
1162	iAdd = 0;
1163	}
1164	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1165
1166	if(iShift > m_cQP.m_iPer)
1167	{
1168	for( Int n = 0; n < iWidth*iHeight; n++ )
1169	{
1170	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1171	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1172	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1173	}
1174	}
1175	else
1176	{
1177	for( Int n = 0; n < iWidth*iHeight; n++ )
1178	{
1179	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1180	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1181	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1182	}
1183	}
1184	}
1185	else
1186	{
1187	iAdd = 1 << (iShift-1);
1188	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1189
1190	for( Int n = 0; n < iWidth*iHeight; n++ )
1191	{
1192	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1193	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1194	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1195	}
1196	}
1197	}
1198
1199	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1200	Bool bUseRDOQ,
1201	#if RDOQ_TRANSFORMSKIP
1202	Bool bUseRDOQTS,
1203	#endif
1204	Bool bEnc, Bool useTransformSkipFast
1205	#if ADAPTIVE_QP_SELECTION
1206	, Bool bUseAdaptQpSelect
1207	#endif
1208	)
1209	{
1210	m_uiMaxTrSize = uiMaxTrSize;
1211	m_bEnc = bEnc;
1212	m_useRDOQ = bUseRDOQ;
1213	#if RDOQ_TRANSFORMSKIP
1214	m_useRDOQTS = bUseRDOQTS;
1215	#endif
1216	#if ADAPTIVE_QP_SELECTION
1217	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1218	#endif
1219	m_useTransformSkipFast = useTransformSkipFast;
1220	}
1221
1222	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1223	Pel* pcResidual,
1224	UInt uiStride,
1225	TCoeff* rpcCoeff,
1226	#if ADAPTIVE_QP_SELECTION
1227	Int*& rpcArlCoeff,
1228	#endif
1229	UInt uiWidth,
1230	UInt uiHeight,
1231	UInt& uiAbsSum,
1232	TextType eTType,
1233	UInt uiAbsPartIdx,
1234	Bool useTransformSkip
1235	)
1236	{
1237	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1238	{
1239	uiAbsSum=0;
1240	for (UInt k = 0; k<uiHeight; k++)
1241	{
1242	for (UInt j = 0; j<uiWidth; j++)
1243	{
1244	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1245	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1246	}
1247	}
1248	return;
1249	}
1250	UInt uiMode; //luma intra pred
1251	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1252	{
1253	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1254	}
1255	else
1256	{
1257	uiMode = REG_DCT;
1258	}
1259
1260	uiAbsSum = 0;
1261	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1262	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1263	if(useTransformSkip)
1264	{
1265	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1266	}
1267	else
1268	{
1269	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1270	}
1271	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1272	#if ADAPTIVE_QP_SELECTION
1273	rpcArlCoeff,
1274	#endif
1275	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1276	}
1277
1278	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1279	{
1280	if(transQuantBypass)
1281	{
1282	for (UInt k = 0; k<uiHeight; k++)
1283	{
1284	for (UInt j = 0; j<uiWidth; j++)
1285	{
1286	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1287	}
1288	}
1289	return;
1290	}
1291	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1292	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1293	if(useTransformSkip == true)
1294	{
1295	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1296	}
1297	else
1298	{
1299	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1300	}
1301	}
1302
1303	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1304	{
1305	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1306	{
1307	return;
1308	}
1309	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1310
1311	if( uiTrMode == stopTrMode )
1312	{
1313	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1314	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1315	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1316	{
1317	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1318	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1319	{
1320	return;
1321	}
1322	uiWidth <<= 1;
1323	uiHeight <<= 1;
1324	}
1325	Pel* pResi = rpcResidual + uiAddr;
1326	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1327	assert(scalingListType < 6);
1328	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1329	}
1330	else
1331	{
1332	uiTrMode++;
1333	uiWidth >>= 1;
1334	uiHeight >>= 1;
1335	Int trWidth = uiWidth, trHeight = uiHeight;
1336	UInt uiAddrOffset = trHeight * uiStride;
1337	UInt uiCoefOffset = trWidth * trHeight;
1338	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1339	{
1340	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1341	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1342	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1343	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1344	}
1345	}
1346	}
1347
1348	// ------------------------------------------------------------------------------------------------
1349	// Logical transform
1350	// ------------------------------------------------------------------------------------------------
1351
1352	/** Wrapper function between HM interface and core NxN forward transform (2D)
1353	* \param piBlkResi input data (residual)
1354	* \param psCoeff output data (transform coefficients)
1355	* \param uiStride stride of input residual data
1356	* \param iSize transform size (iSize x iSize)
1357	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1358	*/
1359	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1360	{
1361	#if MATRIX_MULT
1362	Int iSize = iWidth;
1363	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1364	#else
1365	Int j;
1366	{
1367	Short block[ 64 * 64 ];
1368	Short coeff[ 64 * 64 ];
1369	{
1370	for (j = 0; j < iHeight; j++)
1371	{
1372	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
1373	}
1374	}
1375	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
1376	for ( j = 0; j < iHeight * iWidth; j++ )
1377	{
1378	psCoeff[ j ] = coeff[ j ];
1379	}
1380	return ;
1381	}
1382	#endif
1383	}
1384
1385
1386	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1387	* \param plCoef input data (transform coefficients)
1388	* \param pResidual output data (residual)
1389	* \param uiStride stride of input residual data
1390	* \param iSize transform size (iSize x iSize)
1391	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1392	*/
1393	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1394	{
1395	#if MATRIX_MULT
1396	Int iSize = iWidth;
1397	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1398	#else
1399	Int j;
1400	{
1401	Short block[ 64 * 64 ];
1402	Short coeff[ 64 * 64 ];
1403	for ( j = 0; j < iHeight * iWidth; j++ )
1404	{
1405	coeff[j] = (Short)plCoef[j];
1406	}
1407	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
1408	{
1409	for ( j = 0; j < iHeight; j++ )
1410	{
1411	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
1412	}
1413	}
1414	return ;
1415	}
1416	#endif
1417	}
1418
1419	/** Wrapper function between HM interface and core 4x4 transform skipping
1420	* \param piBlkResi input data (residual)
1421	* \param psCoeff output data (transform coefficients)
1422	* \param uiStride stride of input residual data
1423	* \param iSize transform size (iSize x iSize)
1424	*/
1425	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1426	{
1427	assert( width == height );
1428	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1429	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1430	UInt transformSkipShift;
1431	Int j,k;
1432	if(shift >= 0)
1433	{
1434	transformSkipShift = shift;
1435	for (j = 0; j < height; j++)
1436	{
1437	for(k = 0; k < width; k ++)
1438	{
1439	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1440	}
1441	}
1442	}
1443	else
1444	{
1445	//The case when uiBitDepth > 13
1446	Int offset;
1447	transformSkipShift = -shift;
1448	offset = (1 << (transformSkipShift - 1));
1449	for (j = 0; j < height; j++)
1450	{
1451	for(k = 0; k < width; k ++)
1452	{
1453	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1454	}
1455	}
1456	}
1457	}
1458
1459	/** Wrapper function between HM interface and core NxN transform skipping
1460	* \param plCoef input data (coefficients)
1461	* \param pResidual output data (residual)
1462	* \param uiStride stride of input residual data
1463	* \param iSize transform size (iSize x iSize)
1464	*/
1465	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1466	{
1467	assert( width == height );
1468	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1469	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1470	UInt transformSkipShift;
1471	Int j,k;
1472	if(shift > 0)
1473	{
1474	Int offset;
1475	transformSkipShift = shift;
1476	offset = (1 << (transformSkipShift -1));
1477	for ( j = 0; j < height; j++ )
1478	{
1479	for(k = 0; k < width; k ++)
1480	{
1481	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1482	}
1483	}
1484	}
1485	else
1486	{
1487	//The case when uiBitDepth >= 13
1488	transformSkipShift = - shift;
1489	for ( j = 0; j < height; j++ )
1490	{
1491	for(k = 0; k < width; k ++)
1492	{
1493	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1494	}
1495	}
1496	}
1497	}
1498
1499	/** RDOQ with CABAC
1500	* \param pcCU pointer to coding unit structure
1501	* \param plSrcCoeff pointer to input buffer
1502	* \param piDstCoeff reference to pointer to output buffer
1503	* \param uiWidth block width
1504	* \param uiHeight block height
1505	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1506	* \param eTType plane type / luminance or chrominance
1507	* \param uiAbsPartIdx absolute partition index
1508	* \returns Void
1509	* Rate distortion optimized quantization for entropy
1510	* coding engines using probability models like CABAC
1511	*/
1512	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1513	Int* plSrcCoeff,
1514	TCoeff* piDstCoeff,
1515	#if ADAPTIVE_QP_SELECTION
1516	Int*& piArlDstCoeff,
1517	#endif
1518	UInt uiWidth,
1519	UInt uiHeight,
1520	UInt& uiAbsSum,
1521	TextType eTType,
1522	UInt uiAbsPartIdx )
1523	{
1524	Int iQBits = m_cQP.m_iBits;
1525	Double dTemp = 0;
1526	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1527	Int uiQ = g_quantScales[m_cQP.rem()];
1528
1529	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1530	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1531	UInt uiGoRiceParam = 0;
1532	Double d64BlockUncodedCost = 0;
1533	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1534	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1535	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1536	assert(scalingListType < 6);
1537
1538	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1539	Double dErrScale = 0;
1540	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
1541	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1542	Int *piQCoef = piQCoefOrg;
1543	Double *pdErrScale = pdErrScaleOrg;
1544	#if ADAPTIVE_QP_SELECTION
1545	Int iQBitsC = iQBits - ARL_C_PRECISION;
1546	Int iAddC = 1 << (iQBitsC-1);
1547	#endif
1548	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1549
1550	#if ADAPTIVE_QP_SELECTION
1551	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1552	#endif
1553
1554	Double pdCostCoeff [ 32 * 32 ];
1555	Double pdCostSig [ 32 * 32 ];
1556	Double pdCostCoeff0[ 32 * 32 ];
1557	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1558	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1559	Int rateIncUp [ 32 * 32 ];
1560	Int rateIncDown [ 32 * 32 ];
1561	Int sigRateDelta[ 32 * 32 ];
1562	Int deltaU [ 32 * 32 ];
1563	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1564	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1565	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1566	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1567
1568	const UInt * scanCG;
1569	{
1570	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1571	if( uiLog2BlkSize == 3 )
1572	{
1573	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1574	}
1575	else if( uiLog2BlkSize == 5 )
1576	{
1577	scanCG = g_sigLastScanCG32x32;
1578	}
1579	}
1580	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1581	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1582	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1583	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1584	Int iCGLastScanPos = -1;
1585
1586	UInt uiCtxSet = 0;
1587	Int c1 = 1;
1588	Int c2 = 0;
1589	Double d64BaseCost = 0;
1590	Int iLastScanPos = -1;
1591	dTemp = dErrScale;
1592
1593	UInt c1Idx = 0;
1594	UInt c2Idx = 0;
1595	Int baseLevel;
1596
1597	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1598
1599	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1600	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1601
1602	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1603	Int iScanPos;
1604	coeffGroupRDStats rdStats;
1605
1606	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1607	{
1608	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1609	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1610	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1611	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1612
1613	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1614	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1615	{
1616	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1617	//===== quantization =====
1618	UInt uiBlkPos = scan[iScanPos];
1619	// set coeff
1620	uiQ = piQCoef[uiBlkPos];
1621	dTemp = pdErrScale[uiBlkPos];
1622	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1623	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1624	#if ADAPTIVE_QP_SELECTION
1625	if( m_bUseAdaptQpSelect )
1626	{
1627	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1628	}
1629	#endif
1630	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1631
1632	Double dErr = Double( lLevelDouble );
1633	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1634	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1635	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1636
1637	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1638	{
1639	iLastScanPos = iScanPos;
1640	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1641	iCGLastScanPos = iCGScanPos;
1642	}
1643
1644	if ( iLastScanPos >= 0 )
1645	{
1646	//===== coefficient level estimation =====
1647	UInt uiLevel;
1648	UInt uiOneCtx = 4 * uiCtxSet + c1;
1649	UInt uiAbsCtx = uiCtxSet + c2;
1650
1651	if( iScanPos == iLastScanPos )
1652	{
1653	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1654	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1655	c1Idx, c2Idx, iQBits, dTemp, 1 );
1656	}
1657	else
1658	{
1659	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1660	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1661	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, uiWidth, uiHeight, eTType );
1662	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1663	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1664	c1Idx, c2Idx, iQBits, dTemp, 0 );
1665	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1666	}
1667	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1668	if( uiLevel > 0 )
1669	{
1670	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1671	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1672	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1673	}
1674	else // uiLevel == 0
1675	{
1676	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1677	}
1678	piDstCoeff[ uiBlkPos ] = uiLevel;
1679	d64BaseCost += pdCostCoeff [ iScanPos ];
1680
1681
1682	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1683	if( uiLevel >= baseLevel )
1684	{
1685	if(uiLevel > 3*(1<<uiGoRiceParam))
1686	{
1687	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1688	}
1689	}
1690	if ( uiLevel >= 1)
1691	{
1692	c1Idx ++;
1693	}
1694
1695	//===== update bin model =====
1696	if( uiLevel > 1 )
1697	{
1698	c1 = 0;
1699	c2 += (c2 < 2);
1700	c2Idx ++;
1701	}
1702	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1703	{
1704	c1++;
1705	}
1706
1707	//===== context set update =====
1708	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1709	{
1710	c2 = 0;
1711	uiGoRiceParam = 0;
1712
1713	c1Idx = 0;
1714	c2Idx = 0;
1715	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1716	if( c1 == 0 )
1717	{
1718	uiCtxSet++;
1719	}
1720	c1 = 1;
1721	}
1722	}
1723	else
1724	{
1725	d64BaseCost += pdCostCoeff0[ iScanPos ];
1726	}
1727	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1728	if (iScanPosinCG == 0 )
1729	{
1730	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1731	}
1732	if (piDstCoeff[ uiBlkPos ] )
1733	{
1734	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1735	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1736	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1737	if ( iScanPosinCG != 0 )
1738	{
1739	rdStats.iNNZbeforePos0++;
1740	}
1741	}
1742	} //end for (iScanPosinCG)
1743
1744	if (iCGLastScanPos >= 0)
1745	{
1746	if( iCGScanPos )
1747	{
1748	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1749	{
1750	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
1751	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1752	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1753	}
1754	else
1755	{
1756	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1757	{
1758	if ( rdStats.iNNZbeforePos0 == 0 )
1759	{
1760	d64BaseCost -= rdStats.d64SigCost_0;
1761	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1762	}
1763	// rd-cost if SigCoeffGroupFlag = 0, initialization
1764	Double d64CostZeroCG = d64BaseCost;
1765
1766	// add SigCoeffGroupFlag cost to total cost
1767	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
1768	if (iCGScanPos < iCGLastScanPos)
1769	{
1770	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1771	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1772	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1773	}
1774
1775	// try to convert the current coeff group from non-zero to all-zero
1776	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1777	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1778	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1779
1780	// if we can save cost, change this block to all-zero block
1781	if ( d64CostZeroCG < d64BaseCost )
1782	{
1783	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1784	d64BaseCost = d64CostZeroCG;
1785	if (iCGScanPos < iCGLastScanPos)
1786	{
1787	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1788	}
1789	// reset coeffs to 0 in this block
1790	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1791	{
1792	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1793	UInt uiBlkPos = scan[ iScanPos ];
1794
1795	if (piDstCoeff[ uiBlkPos ])
1796	{
1797	piDstCoeff [ uiBlkPos ] = 0;
1798	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1799	pdCostSig [ iScanPos ] = 0;
1800	}
1801	}
1802	} // end if ( d64CostAllZeros < d64BaseCost )
1803	}
1804	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1805	}
1806	else
1807	{
1808	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1809	}
1810	}
1811	} //end for (iCGScanPos)
1812
1813	//===== estimate last position =====
1814	if ( iLastScanPos < 0 )
1815	{
1816	return;
1817	}
1818
1819	Double d64BestCost = 0;
1820	Int ui16CtxCbf = 0;
1821	Int iBestLastIdxP1 = 0;
1822	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1823	{
1824	ui16CtxCbf = 0;
1825	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1826	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1827	}
1828	else
1829	{
1830	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1831	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1832	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1833	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1834	}
1835
1836	Bool bFoundLast = false;
1837	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1838	{
1839	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1840
1841	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1842	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1843	{
1844	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1845	{
1846	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1847	if (iScanPos > iLastScanPos) continue;
1848	UInt uiBlkPos = scan[iScanPos];
1849
1850	if( piDstCoeff[ uiBlkPos ] )
1851	{
1852	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1853	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1854
1855	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
1856	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1857
1858	if( totalCost < d64BestCost )
1859	{
1860	iBestLastIdxP1 = iScanPos + 1;
1861	d64BestCost = totalCost;
1862	}
1863	if( piDstCoeff[ uiBlkPos ] > 1 )
1864	{
1865	bFoundLast = true;
1866	break;
1867	}
1868	d64BaseCost -= pdCostCoeff[ iScanPos ];
1869	d64BaseCost += pdCostCoeff0[ iScanPos ];
1870	}
1871	else
1872	{
1873	d64BaseCost -= pdCostSig[ iScanPos ];
1874	}
1875	} //end for
1876	if (bFoundLast)
1877	{
1878	break;
1879	}
1880	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1881	} // end for
1882
1883	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1884	{
1885	Int blkPos = scan[ scanPos ];
1886	Int level = piDstCoeff[ blkPos ];
1887	uiAbsSum += level;
1888	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1889	}
1890
1891	//===== clean uncoded coefficients =====
1892	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1893	{
1894	piDstCoeff[ scan[ scanPos ] ] = 0;
1895	}
1896
1897	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1898	{
1899	Int64 rdFactor = (Int64) (
1900	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
1901	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
1902	+ 0.5);
1903	Int lastCG = -1;
1904	Int absSum = 0 ;
1905	Int n ;
1906
1907	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1908	{
1909	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1910	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1911	absSum = 0 ;
1912
1913	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1914	{
1915	if( piDstCoeff[ scan[ n + subPos ]] )
1916	{
1917	lastNZPosInCG = n;
1918	break;
1919	}
1920	}
1921
1922	for(n = 0; n <SCAN_SET_SIZE; n++ )
1923	{
1924	if( piDstCoeff[ scan[ n + subPos ]] )
1925	{
1926	firstNZPosInCG = n;
1927	break;
1928	}
1929	}
1930
1931	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1932	{
1933	absSum += piDstCoeff[ scan[ n + subPos ]];
1934	}
1935
1936	if(lastNZPosInCG>=0 && lastCG==-1)
1937	{
1938	lastCG = 1;
1939	}
1940
1941	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1942	{
1943	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
1944	if( signbit!=(absSum&0x1) ) // hide but need tune
1945	{
1946	// calculate the cost
1947	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
1948	Int minPos =-1, finalChange=0, curChange=0;
1949
1950	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1951	{
1952	UInt uiBlkPos = scan[ n + subPos ];
1953	if(piDstCoeff[ uiBlkPos ] != 0 )
1954	{
1955	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
1956	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1957	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
1958
1959	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1960	{
1961	costDown -= (4<<15) ;
1962	}
1963
1964	if(costUp<costDown)
1965	{
1966	curCost = costUp;
1967	curChange = 1 ;
1968	}
1969	else
1970	{
1971	curChange = -1 ;
1972	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1973	{
1974	curCost = MAX_INT64 ;
1975	}
1976	else
1977	{
1978	curCost = costDown ;
1979	}
1980	}
1981	}
1982	else
1983	{
1984	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1985	curChange = 1 ;
1986
1987	if(n<firstNZPosInCG)
1988	{
1989	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1990	if(thissignbit != signbit )
1991	{
1992	curCost = MAX_INT64;
1993	}
1994	}
1995	}
1996
1997	if( curCost<minCostInc)
1998	{
1999	minCostInc = curCost ;
2000	finalChange = curChange ;
2001	minPos = uiBlkPos ;
2002	}
2003	}
2004
2005	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2006	{
2007	finalChange = -1;
2008	}
2009
2010	if(plSrcCoeff[minPos]>=0)
2011	{
2012	piDstCoeff[minPos] += finalChange ;
2013	}
2014	else
2015	{
2016	piDstCoeff[minPos] -= finalChange ;
2017	}
2018	}
2019	}
2020
2021	if(lastCG==1)
2022	{
2023	lastCG=0 ;
2024	}
2025	}
2026	}
2027	}
2028
2029	/** Pattern decision for context derivation process of significant_coeff_flag
2030	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2031	* \param posXCG column of current coefficient group
2032	* \param posYCG row of current coefficient group
2033	* \param width width of the block
2034	* \param height height of the block
2035	* \returns pattern for current coefficient group
2036	*/
2037	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2038	{
2039	if( width == 4 && height == 4 ) return -1;
2040
2041	UInt sigRight = 0;
2042	UInt sigLower = 0;
2043
2044	width >>= 2;
2045	height >>= 2;
2046	if( posXCG < width - 1 )
2047	{
2048	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2049	}
2050	if (posYCG < height - 1 )
2051	{
2052	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2053	}
2054	return sigRight + (sigLower<<1);
2055	}
2056
2057	/** Context derivation process of coeff_abs_significant_flag
2058	* \param patternSigCtx pattern for current coefficient group
2059	* \param posX column of current scan position
2060	* \param posY row of current scan position
2061	* \param log2BlockSize log2 value of block size (square block)
2062	* \param width width of the block
2063	* \param height height of the block
2064	* \param textureType texture type (TEXT_LUMA...)
2065	* \returns ctxInc for current scan position
2066	*/
2067	Int TComTrQuant::getSigCtxInc (
2068	Int patternSigCtx,
2069	UInt scanIdx,
2070	Int posX,
2071	Int posY,
2072	Int log2BlockSize,
2073	Int width
2074	,Int height
2075	,TextType textureType
2076	)
2077	{
2078	const Int ctxIndMap[16] =
2079	{
2080	0, 1, 4, 5,
2081	2, 3, 4, 5,
2082	6, 6, 8, 8,
2083	7, 7, 8, 8
2084	};
2085
2086	if( posX + posY == 0 )
2087	{
2088	return 0;
2089	}
2090
2091	if ( log2BlockSize == 2 )
2092	{
2093	return ctxIndMap[ 4 * posY + posX ];
2094	}
2095
2096	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2097
2098	Int posXinSubset = posX-((posX>>2)<<2);
2099	Int posYinSubset = posY-((posY>>2)<<2);
2100	Int cnt = 0;
2101	if(patternSigCtx==0)
2102	{
2103	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2104	}
2105	else if(patternSigCtx==1)
2106	{
2107	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2108	}
2109	else if(patternSigCtx==2)
2110	{
2111	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2112	}
2113	else
2114	{
2115	cnt = 2;
2116	}
2117
2118	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2119	}
2120
2121	/** Get the best level in RD sense
2122	* \param rd64CodedCost reference to coded cost
2123	* \param rd64CodedCost0 reference to cost when coefficient is 0
2124	* \param rd64CodedCostSig reference to cost of significant coefficient
2125	* \param lLevelDouble reference to unscaled quantized level
2126	* \param uiMaxAbsLevel scaled quantized level
2127	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2128	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2129	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2130	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2131	* \param iQBits quantization step size
2132	* \param dTemp correction factor
2133	* \param bLast indicates if the coefficient is the last significant
2134	* \returns best quantized transform level for given scan position
2135	* This method calculates the best quantized transform level for a given scan position.
2136	*/
2137	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2138	Double& rd64CodedCost0,
2139	Double& rd64CodedCostSig,
2140	Int lLevelDouble,
2141	UInt uiMaxAbsLevel,
2142	UShort ui16CtxNumSig,
2143	UShort ui16CtxNumOne,
2144	UShort ui16CtxNumAbs,
2145	UShort ui16AbsGoRice,
2146	UInt c1Idx,
2147	UInt c2Idx,
2148	Int iQBits,
2149	Double dTemp,
2150	Bool bLast ) const
2151	{
2152	Double dCurrCostSig = 0;
2153	UInt uiBestAbsLevel = 0;
2154
2155	if( !bLast && uiMaxAbsLevel < 3 )
2156	{
2157	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2158	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2159	if( uiMaxAbsLevel == 0 )
2160	{
2161	return uiBestAbsLevel;
2162	}
2163	}
2164	else
2165	{
2166	rd64CodedCost = MAX_DOUBLE;
2167	}
2168
2169	if( !bLast )
2170	{
2171	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2172	}
2173
2174	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2175	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2176	{
2177	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2178	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2179	dCurrCost += dCurrCostSig;
2180
2181	if( dCurrCost < rd64CodedCost )
2182	{
2183	uiBestAbsLevel = uiAbsLevel;
2184	rd64CodedCost = dCurrCost;
2185	rd64CodedCostSig = dCurrCostSig;
2186	}
2187	}
2188
2189	return uiBestAbsLevel;
2190	}
2191
2192	/** Calculates the cost for specific absolute transform level
2193	* \param uiAbsLevel scaled quantized level
2194	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2195	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2196	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2197	* \returns cost of given absolute transform level
2198	*/
2199	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2200	UShort ui16CtxNumOne,
2201	UShort ui16CtxNumAbs,
2202	UShort ui16AbsGoRice
2203	, UInt c1Idx,
2204	UInt c2Idx
2205	) const
2206	{
2207	Double iRate = xGetIEPRate();
2208	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2209
2210	if ( uiAbsLevel >= baseLevel )
2211	{
2212	UInt symbol = uiAbsLevel - baseLevel;
2213	UInt length;
2214	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2215	{
2216	length = symbol>>ui16AbsGoRice;
2217	iRate += (length+1+ui16AbsGoRice)<< 15;
2218	}
2219	else
2220	{
2221	length = ui16AbsGoRice;
2222	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2223	while (symbol >= (1<<length))
2224	{
2225	symbol -= (1<<(length++));
2226	}
2227	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2228	}
2229	if (c1Idx < C1FLAG_NUMBER)
2230	{
2231	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2232
2233	if (c2Idx < C2FLAG_NUMBER)
2234	{
2235	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2236	}
2237	}
2238	}
2239	else
2240	if( uiAbsLevel == 1 )
2241	{
2242	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2243	}
2244	else if( uiAbsLevel == 2 )
2245	{
2246	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2247	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2248	}
2249	else
2250	{
2251	assert (0);
2252	}
2253	return xGetICost( iRate );
2254	}
2255
2256	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2257	UShort ui16CtxNumOne,
2258	UShort ui16CtxNumAbs,
2259	UShort ui16AbsGoRice
2260	, UInt c1Idx,
2261	UInt c2Idx
2262	) const
2263	{
2264	Int iRate = 0;
2265	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2266
2267	if ( uiAbsLevel >= baseLevel )
2268	{
2269	UInt uiSymbol = uiAbsLevel - baseLevel;
2270	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2271	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2272
2273	if( bExpGolomb )
2274	{
2275	uiAbsLevel = uiSymbol - uiMaxVlc;
2276	Int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2277	iRate += iEGS << 15;
2278	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2279	}
2280
2281	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2282	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2283
2284	iRate += ui16NumBins << 15;
2285
2286	if (c1Idx < C1FLAG_NUMBER)
2287	{
2288	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2289
2290	if (c2Idx < C2FLAG_NUMBER)
2291	{
2292	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2293	}
2294	}
2295	}
2296	else
2297	if( uiAbsLevel == 0 )
2298	{
2299	return 0;
2300	}
2301	else if( uiAbsLevel == 1 )
2302	{
2303	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2304	}
2305	else if( uiAbsLevel == 2 )
2306	{
2307	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2308	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2309	}
2310	else
2311	{
2312	assert(0);
2313	}
2314	return iRate;
2315	}
2316
2317	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2318	UShort ui16CtxNumSig ) const
2319	{
2320	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2321	}
2322
2323	/** Calculates the cost of signaling the last significant coefficient in the block
2324	* \param uiPosX X coordinate of the last significant coefficient
2325	* \param uiPosY Y coordinate of the last significant coefficient
2326	* \returns cost of last significant coefficient
2327	*/
2328	/*
2329	* \param uiWidth width of the transform unit (TU)
2330	*/
2331	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2332	const UInt uiPosY,
2333	const UInt uiBlkWdth ) const
2334	{
2335	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2336	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2337	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2338	if( uiCtxX > 3 )
2339	{
2340	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2341	}
2342	if( uiCtxY > 3 )
2343	{
2344	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2345	}
2346	return xGetICost( uiCost );
2347	}
2348
2349	/** Calculates the cost for specific absolute transform level
2350	* \param uiAbsLevel scaled quantized level
2351	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2352	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2353	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2354	* \returns cost of given absolute transform level
2355	*/
2356	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2357	UShort ui16CtxNumSig ) const
2358	{
2359	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2360	}
2361
2362	/** Get the cost for a specific rate
2363	* \param dRate rate of a bit
2364	* \returns cost at the specific rate
2365	*/
2366	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2367	{
2368	return m_dLambda * dRate;
2369	}
2370
2371	/** Get the cost of an equal probable bit
2372	* \returns cost of equal probable bit
2373	*/
2374	__inline Double TComTrQuant::xGetIEPRate ( ) const
2375	{
2376	return 32768;
2377	}
2378
2379	/** Context derivation process of coeff_abs_significant_flag
2380	* \param uiSigCoeffGroupFlag significance map of L1
2381	* \param uiBlkX column of current scan position
2382	* \param uiBlkY row of current scan position
2383	* \param uiLog2BlkSize log2 value of block size
2384	* \returns ctxInc for current scan position
2385	*/
2386	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2387	const UInt uiCGPosX,
2388	const UInt uiCGPosY,
2389	const UInt scanIdx,
2390	Int width, Int height)
2391	{
2392	UInt uiRight = 0;
2393	UInt uiLower = 0;
2394
2395	width >>= 2;
2396	height >>= 2;
2397	if( uiCGPosX < width - 1 )
2398	{
2399	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2400	}
2401	if (uiCGPosY < height - 1 )
2402	{
2403	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2404	}
2405	return (uiRight \|\| uiLower);
2406
2407	}
2408	/** set quantized matrix coefficient for encode
2409	* \param scalingList quantaized matrix address
2410	*/
2411	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2412	{
2413	UInt size,list;
2414	UInt qp;
2415
2416	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2417	{
2418	for(list = 0; list < g_scalingListNum[size]; list++)
2419	{
2420	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2421	{
2422	xSetScalingListEnc(scalingList,list,size,qp);
2423	xSetScalingListDec(scalingList,list,size,qp);
2424	setErrScaleCoeff(list,size,qp);
2425	}
2426	}
2427	}
2428	}
2429	/** set quantized matrix coefficient for decode
2430	* \param scalingList quantaized matrix address
2431	*/
2432	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2433	{
2434	UInt size,list;
2435	UInt qp;
2436
2437	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2438	{
2439	for(list = 0; list < g_scalingListNum[size]; list++)
2440	{
2441	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2442	{
2443	xSetScalingListDec(scalingList,list,size,qp);
2444	}
2445	}
2446	}
2447	}
2448	/** set error scale coefficients
2449	* \param list List ID
2450	* \param uiSize Size
2451	* \param uiQP Quantization parameter
2452	*/
2453	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
2454	{
2455
2456	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2457	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
2458	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
2459
2460	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2461	Int *piQuantcoeff;
2462	Double *pdErrScale;
2463	piQuantcoeff = getQuantCoeff(list, qp,size);
2464	pdErrScale = getErrScaleCoeff(list, size, qp);
2465
2466	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2467	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2468	for(i=0;i<uiMaxNumCoeff;i++)
2469	{
2470	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
2471	}
2472	}
2473
2474	/** set quantized matrix coefficient for encode
2475	* \param scalingList quantaized matrix address
2476	* \param listId List index
2477	* \param sizeId size index
2478	* \param uiQP Quantization parameter
2479	*/
2480	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2481	{
2482	UInt width = g_scalingListSizeX[sizeId];
2483	UInt height = g_scalingListSizeX[sizeId];
2484	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2485	Int *quantcoeff;
2486	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2487	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2488
2489	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2490	}
2491	/** set quantized matrix coefficient for decode
2492	* \param scalingList quantaized matrix address
2493	* \param list List index
2494	* \param size size index
2495	* \param uiQP Quantization parameter
2496	*/
2497	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2498	{
2499	UInt width = g_scalingListSizeX[sizeId];
2500	UInt height = g_scalingListSizeX[sizeId];
2501	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2502	Int *dequantcoeff;
2503	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2504
2505	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2506	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2507	}
2508
2509	/** set flat matrix value to quantized coefficient
2510	*/
2511	Void TComTrQuant::setFlatScalingList()
2512	{
2513	UInt size,list;
2514	UInt qp;
2515
2516	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2517	{
2518	for(list = 0; list < g_scalingListNum[size]; list++)
2519	{
2520	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2521	{
2522	xsetFlatScalingList(list,size,qp);
2523	setErrScaleCoeff(list,size,qp);
2524	}
2525	}
2526	}
2527	}
2528
2529	/** set flat matrix value to quantized coefficient
2530	* \param list List ID
2531	* \param uiQP Quantization parameter
2532	* \param uiSize Size
2533	*/
2534	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2535	{
2536	UInt i,num = g_scalingListSize[size];
2537	Int *quantcoeff;
2538	Int *dequantcoeff;
2539	Int quantScales = g_quantScales[qp];
2540	Int invQuantScales = g_invQuantScales[qp]<<4;
2541
2542	quantcoeff = getQuantCoeff(list, qp, size);
2543	dequantcoeff = getDequantCoeff(list, qp, size);
2544
2545	for(i=0;i<num;i++)
2546	{
2547	*quantcoeff++ = quantScales;
2548	*dequantcoeff++ = invQuantScales;
2549	}
2550	}
2551
2552	/** set quantized matrix coefficient for encode
2553	* \param coeff quantaized matrix address
2554	* \param quantcoeff quantaized matrix address
2555	* \param quantScales Q(QP%6)
2556	* \param height height
2557	* \param width width
2558	* \param ratio ratio for upscale
2559	* \param sizuNum matrix size
2560	* \param dc dc parameter
2561	*/
2562	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2563	{
2564	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2565	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2566	for(UInt j=0;j<height;j++)
2567	{
2568	for(UInt i=0;i<width;i++)
2569	{
2570	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2571	}
2572	}
2573	if(ratio > 1)
2574	{
2575	quantcoeff[0] = quantScales / dc;
2576	}
2577	}
2578	/** set quantized matrix coefficient for decode
2579	* \param coeff quantaized matrix address
2580	* \param dequantcoeff quantaized matrix address
2581	* \param invQuantScales IQ(QP%6))
2582	* \param height height
2583	* \param width width
2584	* \param ratio ratio for upscale
2585	* \param sizuNum matrix size
2586	* \param dc dc parameter
2587	*/
2588	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2589	{
2590	for(UInt j=0;j<height;j++)
2591	{
2592	for(UInt i=0;i<width;i++)
2593	{
2594	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2595	}
2596	}
2597	if(ratio > 1)
2598	{
2599	dequantcoeff[0] = invQuantScales * dc;
2600	}
2601	}
2602
2603	/** initialization process of scaling list array
2604	*/
2605	Void TComTrQuant::initScalingList()
2606	{
2607	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2608	{
2609	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2610	{
2611	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2612	{
2613	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2614	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2615	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
2616	}
2617	}
2618	}
2619	// alias list [1] as [3].
2620	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2621	{
2622	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
2623	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
2624	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
2625	}
2626	}
2627	/** destroy quantization matrix array
2628	*/
2629	Void TComTrQuant::destroyScalingList()
2630	{
2631	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2632	{
2633	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2634	{
2635	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2636	{
2637	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
2638	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
2639	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
2640	}
2641	}
2642	}
2643	}
2644
2645	//! \}

Download in other formats:

Original Format

JVET HEVC

Context Navigation

Ticket #919: TComTrQuant.cpp

Download in other formats: