Context navigation

TComTrQuant.cpp @ 1430

Visit:

Last change on this file since 1430 was 54, checked in by seregin, 12 years ago
port simulcast
File size: 84.7 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2013, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(Int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_useRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
203
204	if(qpScaled < 0)
205	{
206	qpScaled = qpScaled + qpBdOffset;
207	}
208	else
209	{
210	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
211	}
212	}
213	m_cQP.setQpParam( qpScaled );
214	}
215
216	#if MATRIX_MULT
217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
218	* \param block pointer to input data (residual)
219	* \param coeff pointer to output data (transform coefficients)
220	* \param uiStride stride of input data
221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
223	*/
224	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
225	{
226	Int i,j,k,iSum;
227	Int tmp[32*32];
228	const Short *iT;
229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
230
231	if (uiTrSize==4)
232	{
233	iT = g_aiT4[0];
234	}
235	else if (uiTrSize==8)
236	{
237	iT = g_aiT8[0];
238	}
239	else if (uiTrSize==16)
240	{
241	iT = g_aiT16[0];
242	}
243	else if (uiTrSize==32)
244	{
245	iT = g_aiT32[0];
246	}
247	else
248	{
249	assert(0);
250	}
251
252	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
253	Int add_1st = 1<<(shift_1st-1);
254	Int shift_2nd = uiLog2TrSize + 6;
255	Int add_2nd = 1<<(shift_2nd-1);
256
257	/* Horizontal transform */
258
259	if (uiTrSize==4)
260	{
261	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
262	{
263	iT = g_as_DST_MAT_4[0];
264	}
265	}
266	for (i=0; i<uiTrSize; i++)
267	{
268	for (j=0; j<uiTrSize; j++)
269	{
270	iSum = 0;
271	for (k=0; k<uiTrSize; k++)
272	{
273	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
274	}
275	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
276	}
277	}
278
279	/* Vertical transform */
280	if (uiTrSize==4)
281	{
282	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
283	{
284	iT = g_as_DST_MAT_4[0];
285	}
286	else
287	{
288	iT = g_aiT4[0];
289	}
290	}
291	for (i=0; i<uiTrSize; i++)
292	{
293	for (j=0; j<uiTrSize; j++)
294	{
295	iSum = 0;
296	for (k=0; k<uiTrSize; k++)
297	{
298	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
299	}
300	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
301	}
302	}
303	}
304
305	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
306	* \param coeff pointer to input data (transform coefficients)
307	* \param block pointer to output data (residual)
308	* \param uiStride stride of output data
309	* \param uiTrSize transform size (uiTrSize x uiTrSize)
310	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
311	*/
312	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
313	{
314	Int i,j,k,iSum;
315	Int tmp[32*32];
316	const Short *iT;
317
318	if (uiTrSize==4)
319	{
320	iT = g_aiT4[0];
321	}
322	else if (uiTrSize==8)
323	{
324	iT = g_aiT8[0];
325	}
326	else if (uiTrSize==16)
327	{
328	iT = g_aiT16[0];
329	}
330	else if (uiTrSize==32)
331	{
332	iT = g_aiT32[0];
333	}
334	else
335	{
336	assert(0);
337	}
338
339	Int shift_1st = SHIFT_INV_1ST;
340	Int add_1st = 1<<(shift_1st-1);
341	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
342	Int add_2nd = 1<<(shift_2nd-1);
343	if (uiTrSize==4)
344	{
345	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
346	{
347	iT = g_as_DST_MAT_4[0];
348	}
349	}
350
351	/* Horizontal transform */
352	for (i=0; i<uiTrSize; i++)
353	{
354	for (j=0; j<uiTrSize; j++)
355	{
356	iSum = 0;
357	for (k=0; k<uiTrSize; k++)
358	{
359	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
360	}
361	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
362	}
363	}
364
365	if (uiTrSize==4)
366	{
367	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
368	{
369	iT = g_as_DST_MAT_4[0];
370	}
371	else
372	{
373	iT = g_aiT4[0];
374	}
375	}
376
377	/* Vertical transform */
378	for (i=0; i<uiTrSize; i++)
379	{
380	for (j=0; j<uiTrSize; j++)
381	{
382	iSum = 0;
383	for (k=0; k<uiTrSize; k++)
384	{
385	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
386	}
387	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
388	}
389	}
390	}
391
392	#else //MATRIX_MULT
393
394	/** 4x4 forward transform implemented using partial butterfly structure (1D)
395	* \param src input data (residual)
396	* \param dst output data (transform coefficients)
397	* \param shift specifies right shift after 1D transform
398	*/
399
400	void partialButterfly4(Short src,Short dst,Int shift, Int line)
401	{
402	Int j;
403	Int E[2],O[2];
404	Int add = 1<<(shift-1);
405
406	for (j=0; j<line; j++)
407	{
408	/* E and O */
409	E[0] = src[0] + src[3];
410	O[0] = src[0] - src[3];
411	E[1] = src[1] + src[2];
412	O[1] = src[1] - src[2];
413
414	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
415	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
416	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
417	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
418
419	src += 4;
420	dst ++;
421	}
422	}
423
424	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
425	// give identical results
426	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
427	{
428	Int i, c[4];
429	Int rnd_factor = 1<<(shift-1);
430	for (i=0; i<4; i++)
431	{
432	// Intermediate Variables
433	c[0] = block[4i+0] + block[4i+3];
434	c[1] = block[4i+1] + block[4i+3];
435	c[2] = block[4i+0] - block[4i+1];
436	c[3] = 74* block[4*i+2];
437
438	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
439	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
440	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
441	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
442	}
443	}
444
445	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
446	{
447	Int i, c[4];
448	Int rnd_factor = 1<<(shift-1);
449	for (i=0; i<4; i++)
450	{
451	// Intermediate Variables
452	c[0] = tmp[ i] + tmp[ 8+i];
453	c[1] = tmp[8+i] + tmp[12+i];
454	c[2] = tmp[ i] - tmp[12+i];
455	c[3] = 74* tmp[4+i];
456
457	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
458	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
459	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
460	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
461	}
462	}
463
464	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
465	{
466	Int j;
467	Int E[2],O[2];
468	Int add = 1<<(shift-1);
469
470	for (j=0; j<line; j++)
471	{
472	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
473	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
474	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
475	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
476	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
477
478	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
479	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
480	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
481	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
482	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
483
484	src ++;
485	dst += 4;
486	}
487	}
488
489
490	void partialButterfly8(Short src,Short dst,Int shift, Int line)
491	{
492	Int j,k;
493	Int E[4],O[4];
494	Int EE[2],EO[2];
495	Int add = 1<<(shift-1);
496
497	for (j=0; j<line; j++)
498	{
499	/* E and O*/
500	for (k=0;k<4;k++)
501	{
502	E[k] = src[k] + src[7-k];
503	O[k] = src[k] - src[7-k];
504	}
505	/* EE and EO */
506	EE[0] = E[0] + E[3];
507	EO[0] = E[0] - E[3];
508	EE[1] = E[1] + E[2];
509	EO[1] = E[1] - E[2];
510
511	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
512	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
513	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
514	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
515
516	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
517	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
518	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
519	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
520
521	src += 8;
522	dst ++;
523	}
524	}
525
526
527	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
528	{
529	Int j,k;
530	Int E[4],O[4];
531	Int EE[2],EO[2];
532	Int add = 1<<(shift-1);
533
534	for (j=0; j<line; j++)
535	{
536	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
537	for (k=0;k<4;k++)
538	{
539	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
540	}
541
542	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
543	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
544	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
545	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
546
547	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
548	E[0] = EE[0] + EO[0];
549	E[3] = EE[0] - EO[0];
550	E[1] = EE[1] + EO[1];
551	E[2] = EE[1] - EO[1];
552	for (k=0;k<4;k++)
553	{
554	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
555	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
556	}
557	src ++;
558	dst += 8;
559	}
560	}
561
562
563	void partialButterfly16(Short src,Short dst,Int shift, Int line)
564	{
565	Int j,k;
566	Int E[8],O[8];
567	Int EE[4],EO[4];
568	Int EEE[2],EEO[2];
569	Int add = 1<<(shift-1);
570
571	for (j=0; j<line; j++)
572	{
573	/* E and O*/
574	for (k=0;k<8;k++)
575	{
576	E[k] = src[k] + src[15-k];
577	O[k] = src[k] - src[15-k];
578	}
579	/* EE and EO */
580	for (k=0;k<4;k++)
581	{
582	EE[k] = E[k] + E[7-k];
583	EO[k] = E[k] - E[7-k];
584	}
585	/* EEE and EEO */
586	EEE[0] = EE[0] + EE[3];
587	EEO[0] = EE[0] - EE[3];
588	EEE[1] = EE[1] + EE[2];
589	EEO[1] = EE[1] - EE[2];
590
591	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
592	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
593	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
594	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
595
596	for (k=2;k<16;k+=4)
597	{
598	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
599	}
600
601	for (k=1;k<16;k+=2)
602	{
603	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
604	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
605	}
606
607	src += 16;
608	dst ++;
609
610	}
611	}
612
613
614	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
615	{
616	Int j,k;
617	Int E[8],O[8];
618	Int EE[4],EO[4];
619	Int EEE[2],EEO[2];
620	Int add = 1<<(shift-1);
621
622	for (j=0; j<line; j++)
623	{
624	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
625	for (k=0;k<8;k++)
626	{
627	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
628	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
629	}
630	for (k=0;k<4;k++)
631	{
632	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
633	}
634	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
635	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
636	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
637	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
638
639	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
640	for (k=0;k<2;k++)
641	{
642	EE[k] = EEE[k] + EEO[k];
643	EE[k+2] = EEE[1-k] - EEO[1-k];
644	}
645	for (k=0;k<4;k++)
646	{
647	E[k] = EE[k] + EO[k];
648	E[k+4] = EE[3-k] - EO[3-k];
649	}
650	for (k=0;k<8;k++)
651	{
652	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
653	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
654	}
655	src ++;
656	dst += 16;
657	}
658	}
659
660
661	void partialButterfly32(Short src,Short dst,Int shift, Int line)
662	{
663	Int j,k;
664	Int E[16],O[16];
665	Int EE[8],EO[8];
666	Int EEE[4],EEO[4];
667	Int EEEE[2],EEEO[2];
668	Int add = 1<<(shift-1);
669
670	for (j=0; j<line; j++)
671	{
672	/* E and O*/
673	for (k=0;k<16;k++)
674	{
675	E[k] = src[k] + src[31-k];
676	O[k] = src[k] - src[31-k];
677	}
678	/* EE and EO */
679	for (k=0;k<8;k++)
680	{
681	EE[k] = E[k] + E[15-k];
682	EO[k] = E[k] - E[15-k];
683	}
684	/* EEE and EEO */
685	for (k=0;k<4;k++)
686	{
687	EEE[k] = EE[k] + EE[7-k];
688	EEO[k] = EE[k] - EE[7-k];
689	}
690	/* EEEE and EEEO */
691	EEEE[0] = EEE[0] + EEE[3];
692	EEEO[0] = EEE[0] - EEE[3];
693	EEEE[1] = EEE[1] + EEE[2];
694	EEEO[1] = EEE[1] - EEE[2];
695
696	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
697	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
698	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
699	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
700	for (k=4;k<32;k+=8)
701	{
702	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
703	}
704	for (k=2;k<32;k+=4)
705	{
706	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
707	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
708	}
709	for (k=1;k<32;k+=2)
710	{
711	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
712	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
713	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
714	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
715	}
716	src += 32;
717	dst ++;
718	}
719	}
720
721
722	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
723	{
724	Int j,k;
725	Int E[16],O[16];
726	Int EE[8],EO[8];
727	Int EEE[4],EEO[4];
728	Int EEEE[2],EEEO[2];
729	Int add = 1<<(shift-1);
730
731	for (j=0; j<line; j++)
732	{
733	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
734	for (k=0;k<16;k++)
735	{
736	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
737	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
738	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
739	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
740	}
741	for (k=0;k<8;k++)
742	{
743	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
744	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
745	}
746	for (k=0;k<4;k++)
747	{
748	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
749	}
750	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
751	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
752	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
753	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
754
755	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
756	EEE[0] = EEEE[0] + EEEO[0];
757	EEE[3] = EEEE[0] - EEEO[0];
758	EEE[1] = EEEE[1] + EEEO[1];
759	EEE[2] = EEEE[1] - EEEO[1];
760	for (k=0;k<4;k++)
761	{
762	EE[k] = EEE[k] + EEO[k];
763	EE[k+4] = EEE[3-k] - EEO[3-k];
764	}
765	for (k=0;k<8;k++)
766	{
767	E[k] = EE[k] + EO[k];
768	E[k+8] = EE[7-k] - EO[7-k];
769	}
770	for (k=0;k<16;k++)
771	{
772	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
773	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
774	}
775	src ++;
776	dst += 32;
777	}
778	}
779
780	/** MxN forward transform (2D)
781	* \param block input data (residual)
782	* \param coeff output data (transform coefficients)
783	* \param iWidth input data (width of transform)
784	* \param iHeight input data (height of transform)
785	*/
786	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
787	{
788	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
789	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
790
791	Short tmp[ 64 * 64 ];
792
793	if( iWidth == 4 && iHeight == 4)
794	{
795	if (uiMode != REG_DCT)
796	{
797	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
798	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
799	}
800	else
801	{
802	partialButterfly4(block, tmp, shift_1st, iHeight);
803	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
804	}
805
806	}
807	else if( iWidth == 8 && iHeight == 8)
808	{
809	partialButterfly8( block, tmp, shift_1st, iHeight );
810	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
811	}
812	else if( iWidth == 16 && iHeight == 16)
813	{
814	partialButterfly16( block, tmp, shift_1st, iHeight );
815	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
816	}
817	else if( iWidth == 32 && iHeight == 32)
818	{
819	partialButterfly32( block, tmp, shift_1st, iHeight );
820	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
821	}
822	}
823	/** MxN inverse transform (2D)
824	* \param coeff input data (transform coefficients)
825	* \param block output data (residual)
826	* \param iWidth input data (width of transform)
827	* \param iHeight input data (height of transform)
828	*/
829	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
830	{
831	Int shift_1st = SHIFT_INV_1ST;
832	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
833
834	Short tmp[ 64*64];
835	if( iWidth == 4 && iHeight == 4)
836	{
837	if (uiMode != REG_DCT)
838	{
839	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
840	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
841	}
842	else
843	{
844	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
845	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
846	}
847	}
848	else if( iWidth == 8 && iHeight == 8)
849	{
850	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
851	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
852	}
853	else if( iWidth == 16 && iHeight == 16)
854	{
855	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
856	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
857	}
858	else if( iWidth == 32 && iHeight == 32)
859	{
860	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
861	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
862	}
863	}
864
865	#endif //MATRIX_MULT
866
867	// To minimize the distortion only. No rate is considered.
868	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
869	{
870	Int lastCG = -1;
871	Int absSum = 0 ;
872	Int n ;
873
874	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
875	{
876	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
877	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
878	absSum = 0 ;
879
880	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
881	{
882	if( pQCoef[ scan[ n + subPos ]] )
883	{
884	lastNZPosInCG = n;
885	break;
886	}
887	}
888
889	for(n = 0; n <SCAN_SET_SIZE; n++ )
890	{
891	if( pQCoef[ scan[ n + subPos ]] )
892	{
893	firstNZPosInCG = n;
894	break;
895	}
896	}
897
898	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
899	{
900	absSum += pQCoef[ scan[ n + subPos ]];
901	}
902
903	if(lastNZPosInCG>=0 && lastCG==-1)
904	{
905	lastCG = 1 ;
906	}
907
908	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
909	{
910	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
911	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
912	{
913	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
914
915	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
916	{
917	UInt blkPos = scan[ n+subPos ];
918	if(pQCoef[ blkPos ] != 0 )
919	{
920	if(deltaU[blkPos]>0)
921	{
922	curCost = - deltaU[blkPos];
923	curChange=1 ;
924	}
925	else
926	{
927	//curChange =-1;
928	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
929	{
930	curCost=MAX_INT ;
931	}
932	else
933	{
934	curCost = deltaU[blkPos];
935	curChange =-1;
936	}
937	}
938	}
939	else
940	{
941	if(n<firstNZPosInCG)
942	{
943	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
944	if(thisSignBit != signbit )
945	{
946	curCost = MAX_INT;
947	}
948	else
949	{
950	curCost = - (deltaU[blkPos]) ;
951	curChange = 1 ;
952	}
953	}
954	else
955	{
956	curCost = - (deltaU[blkPos]) ;
957	curChange = 1 ;
958	}
959	}
960
961	if( curCost<minCostInc)
962	{
963	minCostInc = curCost ;
964	finalChange = curChange ;
965	minPos = blkPos ;
966	}
967	} //CG loop
968
969	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
970	{
971	finalChange = -1;
972	}
973
974	if(pCoef[minPos]>=0)
975	{
976	pQCoef[minPos] += finalChange ;
977	}
978	else
979	{
980	pQCoef[minPos] -= finalChange ;
981	}
982	} // Hide
983	}
984	if(lastCG==1)
985	{
986	lastCG=0 ;
987	}
988	} // TU loop
989
990	return;
991	}
992
993	Void TComTrQuant::xQuant( TComDataCU* pcCU,
994	Int* pSrc,
995	TCoeff* pDes,
996	#if ADAPTIVE_QP_SELECTION
997	Int*& pArlDes,
998	#endif
999	Int iWidth,
1000	Int iHeight,
1001	UInt& uiAcSum,
1002	TextType eTType,
1003	UInt uiAbsPartIdx )
1004	{
1005	Int* piCoef = pSrc;
1006	TCoeff* piQCoef = pDes;
1007	#if ADAPTIVE_QP_SELECTION
1008	Int* piArlCCoef = pArlDes;
1009	#endif
1010	Int iAdd = 0;
1011
1012	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
1013	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
1014	{
1015	#if ADAPTIVE_QP_SELECTION
1016	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1017	#else
1018	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1019	#endif
1020	}
1021	else
1022	{
1023	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1024
1025	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1026	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1027
1028	Int deltaU[32*32] ;
1029
1030	#if ADAPTIVE_QP_SELECTION
1031	QpParam cQpBase;
1032	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1033
1034	Int qpScaled;
1035	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1036
1037	if(eTType == TEXT_LUMA)
1038	{
1039	qpScaled = iQpBase + qpBDOffset;
1040	}
1041	else
1042	{
1043	Int chromaQPOffset;
1044	if(eTType == TEXT_CHROMA_U)
1045	{
1046	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
1047	}
1048	else
1049	{
1050	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
1051	}
1052	iQpBase = iQpBase + chromaQPOffset;
1053
1054	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1055
1056	if(qpScaled < 0)
1057	{
1058	qpScaled = qpScaled + qpBDOffset;
1059	}
1060	else
1061	{
1062	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1063	}
1064	}
1065	cQpBase.setQpParam(qpScaled);
1066	#endif
1067
1068	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1069	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1070	assert(scalingListType < 6);
1071	Int *piQuantCoeff = 0;
1072	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1073
1074	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1075	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1076
1077	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1078
1079	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1080
1081	#if ADAPTIVE_QP_SELECTION
1082	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1083	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1084	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1085	Int iAddC = 1 << (iQBitsC-1);
1086	#endif
1087
1088	Int qBits8 = iQBits-8;
1089	for( Int n = 0; n < iWidth*iHeight; n++ )
1090	{
1091	Int iLevel;
1092	Int iSign;
1093	UInt uiBlockPos = n;
1094	iLevel = piCoef[uiBlockPos];
1095	iSign = (iLevel < 0 ? -1: 1);
1096
1097	#if ADAPTIVE_QP_SELECTION
1098	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1099	if( m_bUseAdaptQpSelect )
1100	{
1101	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1102	}
1103	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1104	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1105	#else
1106	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1107	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1108	#endif
1109	uiAcSum += iLevel;
1110	iLevel *= iSign;
1111	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1112	} // for n
1113	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1114	{
1115	if(uiAcSum>=2)
1116	{
1117	signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1118	}
1119	}
1120	} //if RDOQ
1121	//return;
1122
1123	}
1124
1125	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1126	{
1127
1128	const TCoeff* piQCoef = pSrc;
1129	Int* piCoef = pDes;
1130
1131	if ( iWidth > (Int)m_uiMaxTrSize )
1132	{
1133	iWidth = m_uiMaxTrSize;
1134	iHeight = m_uiMaxTrSize;
1135	}
1136
1137	Int iShift,iAdd,iCoeffQ;
1138	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1139
1140	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1141
1142	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1143
1144	TCoeff clipQCoef;
1145
1146	if(getUseScalingList())
1147	{
1148	iShift += 4;
1149	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1150
1151	if(iShift > m_cQP.m_iPer)
1152	{
1153	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1154
1155	for( Int n = 0; n < iWidth*iHeight; n++ )
1156	{
1157	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1158	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1159	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1160	}
1161	}
1162	else
1163	{
1164	for( Int n = 0; n < iWidth*iHeight; n++ )
1165	{
1166	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1167	iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation
1168	piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) );
1169	}
1170	}
1171	}
1172	else
1173	{
1174	iAdd = 1 << (iShift-1);
1175	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1176
1177	for( Int n = 0; n < iWidth*iHeight; n++ )
1178	{
1179	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1180	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1181	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1182	}
1183	}
1184	}
1185
1186	Void TComTrQuant::init( UInt uiMaxTrSize,
1187	Bool bUseRDOQ,
1188	Bool bUseRDOQTS,
1189	Bool bEnc, Bool useTransformSkipFast
1190	#if ADAPTIVE_QP_SELECTION
1191	, Bool bUseAdaptQpSelect
1192	#endif
1193	)
1194	{
1195	m_uiMaxTrSize = uiMaxTrSize;
1196	m_bEnc = bEnc;
1197	m_useRDOQ = bUseRDOQ;
1198	m_useRDOQTS = bUseRDOQTS;
1199	#if ADAPTIVE_QP_SELECTION
1200	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1201	#endif
1202	m_useTransformSkipFast = useTransformSkipFast;
1203	}
1204
1205	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1206	Pel* pcResidual,
1207	UInt uiStride,
1208	TCoeff* rpcCoeff,
1209	#if ADAPTIVE_QP_SELECTION
1210	Int*& rpcArlCoeff,
1211	#endif
1212	UInt uiWidth,
1213	UInt uiHeight,
1214	UInt& uiAbsSum,
1215	TextType eTType,
1216	UInt uiAbsPartIdx,
1217	Bool useTransformSkip
1218	)
1219	{
1220	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1221	{
1222	uiAbsSum=0;
1223	for (UInt k = 0; k<uiHeight; k++)
1224	{
1225	for (UInt j = 0; j<uiWidth; j++)
1226	{
1227	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1228	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1229	}
1230	}
1231	return;
1232	}
1233	UInt uiMode; //luma intra pred
1234	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1235	{
1236	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1237	}
1238	#if INTRA_BL_DST4x4
1239	else if(eTType == TEXT_LUMA && pcCU->isIntraBL(uiAbsPartIdx) )
1240	{
1241	uiMode = DC_IDX; //Using DST
1242	}
1243	#endif
1244	else
1245	{
1246	uiMode = REG_DCT;
1247	}
1248
1249	uiAbsSum = 0;
1250	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1251	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1252	if(useTransformSkip)
1253	{
1254	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1255	}
1256	else
1257	{
1258	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1259	}
1260	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1261	#if ADAPTIVE_QP_SELECTION
1262	rpcArlCoeff,
1263	#endif
1264	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1265	}
1266
1267	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1268	{
1269	if(transQuantBypass)
1270	{
1271	for (UInt k = 0; k<uiHeight; k++)
1272	{
1273	for (UInt j = 0; j<uiWidth; j++)
1274	{
1275	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1276	}
1277	}
1278	return;
1279	}
1280	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1281	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1282	if(useTransformSkip == true)
1283	{
1284	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1285	}
1286	else
1287	{
1288	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1289	}
1290	}
1291
1292	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1293	{
1294	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1295	{
1296	return;
1297	}
1298	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1299
1300	if( uiTrMode == stopTrMode )
1301	{
1302	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1303	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1304	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1305	{
1306	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1307	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1308	{
1309	return;
1310	}
1311	uiWidth <<= 1;
1312	uiHeight <<= 1;
1313	}
1314	Pel* pResi = rpcResidual + uiAddr;
1315	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1316	assert(scalingListType < 6);
1317	#if NO_RESIDUAL_FLAG_FOR_BLPRED
1318	if(pcCU->isIntraBL(uiAbsPartIdx) && eTxt == TEXT_LUMA)
1319	{
1320	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, DC_IDX, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1321	}
1322	else
1323	{
1324	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1325	}
1326	#else
1327	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1328	#endif
1329	}
1330	else
1331	{
1332	uiTrMode++;
1333	uiWidth >>= 1;
1334	uiHeight >>= 1;
1335	Int trWidth = uiWidth, trHeight = uiHeight;
1336	UInt uiAddrOffset = trHeight * uiStride;
1337	UInt uiCoefOffset = trWidth * trHeight;
1338	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1339	{
1340	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1341	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1342	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1343	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1344	}
1345	}
1346	}
1347
1348	// ------------------------------------------------------------------------------------------------
1349	// Logical transform
1350	// ------------------------------------------------------------------------------------------------
1351
1352	/** Wrapper function between HM interface and core NxN forward transform (2D)
1353	* \param piBlkResi input data (residual)
1354	* \param psCoeff output data (transform coefficients)
1355	* \param uiStride stride of input residual data
1356	* \param iSize transform size (iSize x iSize)
1357	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1358	*/
1359	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1360	{
1361	#if MATRIX_MULT
1362	Int iSize = iWidth;
1363	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1364	#else
1365	Int j;
1366	{
1367	Short block[ 64 * 64 ];
1368	Short coeff[ 64 * 64 ];
1369	{
1370	for (j = 0; j < iHeight; j++)
1371	{
1372	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
1373	}
1374	}
1375	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
1376	for ( j = 0; j < iHeight * iWidth; j++ )
1377	{
1378	psCoeff[ j ] = coeff[ j ];
1379	}
1380	return ;
1381	}
1382	#endif
1383	}
1384
1385
1386	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1387	* \param plCoef input data (transform coefficients)
1388	* \param pResidual output data (residual)
1389	* \param uiStride stride of input residual data
1390	* \param iSize transform size (iSize x iSize)
1391	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1392	*/
1393	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1394	{
1395	#if MATRIX_MULT
1396	Int iSize = iWidth;
1397	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1398	#else
1399	Int j;
1400	{
1401	Short block[ 64 * 64 ];
1402	Short coeff[ 64 * 64 ];
1403	for ( j = 0; j < iHeight * iWidth; j++ )
1404	{
1405	coeff[j] = (Short)plCoef[j];
1406	}
1407	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
1408	{
1409	for ( j = 0; j < iHeight; j++ )
1410	{
1411	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
1412	}
1413	}
1414	return ;
1415	}
1416	#endif
1417	}
1418
1419	/** Wrapper function between HM interface and core 4x4 transform skipping
1420	* \param piBlkResi input data (residual)
1421	* \param psCoeff output data (transform coefficients)
1422	* \param uiStride stride of input residual data
1423	* \param iSize transform size (iSize x iSize)
1424	*/
1425	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1426	{
1427	assert( width == height );
1428	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1429	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1430	UInt transformSkipShift;
1431	Int j,k;
1432	if(shift >= 0)
1433	{
1434	transformSkipShift = shift;
1435	for (j = 0; j < height; j++)
1436	{
1437	for(k = 0; k < width; k ++)
1438	{
1439	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1440	}
1441	}
1442	}
1443	else
1444	{
1445	//The case when uiBitDepth > 13
1446	Int offset;
1447	transformSkipShift = -shift;
1448	offset = (1 << (transformSkipShift - 1));
1449	for (j = 0; j < height; j++)
1450	{
1451	for(k = 0; k < width; k ++)
1452	{
1453	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1454	}
1455	}
1456	}
1457	}
1458
1459	/** Wrapper function between HM interface and core NxN transform skipping
1460	* \param plCoef input data (coefficients)
1461	* \param pResidual output data (residual)
1462	* \param uiStride stride of input residual data
1463	* \param iSize transform size (iSize x iSize)
1464	*/
1465	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1466	{
1467	assert( width == height );
1468	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1469	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1470	UInt transformSkipShift;
1471	Int j,k;
1472	if(shift > 0)
1473	{
1474	Int offset;
1475	transformSkipShift = shift;
1476	offset = (1 << (transformSkipShift -1));
1477	for ( j = 0; j < height; j++ )
1478	{
1479	for(k = 0; k < width; k ++)
1480	{
1481	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1482	}
1483	}
1484	}
1485	else
1486	{
1487	//The case when uiBitDepth >= 13
1488	transformSkipShift = - shift;
1489	for ( j = 0; j < height; j++ )
1490	{
1491	for(k = 0; k < width; k ++)
1492	{
1493	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1494	}
1495	}
1496	}
1497	}
1498
1499	/** RDOQ with CABAC
1500	* \param pcCU pointer to coding unit structure
1501	* \param plSrcCoeff pointer to input buffer
1502	* \param piDstCoeff reference to pointer to output buffer
1503	* \param uiWidth block width
1504	* \param uiHeight block height
1505	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1506	* \param eTType plane type / luminance or chrominance
1507	* \param uiAbsPartIdx absolute partition index
1508	* \returns Void
1509	* Rate distortion optimized quantization for entropy
1510	* coding engines using probability models like CABAC
1511	*/
1512	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1513	Int* plSrcCoeff,
1514	TCoeff* piDstCoeff,
1515	#if ADAPTIVE_QP_SELECTION
1516	Int*& piArlDstCoeff,
1517	#endif
1518	UInt uiWidth,
1519	UInt uiHeight,
1520	UInt& uiAbsSum,
1521	TextType eTType,
1522	UInt uiAbsPartIdx )
1523	{
1524	Int iQBits = m_cQP.m_iBits;
1525	Double dTemp = 0;
1526	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1527	Int uiQ = g_quantScales[m_cQP.rem()];
1528
1529	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1530	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1531	UInt uiGoRiceParam = 0;
1532	Double d64BlockUncodedCost = 0;
1533	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1534	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1535	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1536	assert(scalingListType < 6);
1537
1538	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1539	Double dErrScale = 0;
1540	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
1541	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1542	Int *piQCoef = piQCoefOrg;
1543	Double *pdErrScale = pdErrScaleOrg;
1544	#if ADAPTIVE_QP_SELECTION
1545	Int iQBitsC = iQBits - ARL_C_PRECISION;
1546	Int iAddC = 1 << (iQBitsC-1);
1547	#endif
1548	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1549
1550	#if ADAPTIVE_QP_SELECTION
1551	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1552	#endif
1553
1554	Double pdCostCoeff [ 32 * 32 ];
1555	Double pdCostSig [ 32 * 32 ];
1556	Double pdCostCoeff0[ 32 * 32 ];
1557	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1558	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1559	Int rateIncUp [ 32 * 32 ];
1560	Int rateIncDown [ 32 * 32 ];
1561	Int sigRateDelta[ 32 * 32 ];
1562	Int deltaU [ 32 * 32 ];
1563	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1564	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1565	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1566	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1567
1568	const UInt * scanCG;
1569	{
1570	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1571	if( uiLog2BlkSize == 3 )
1572	{
1573	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1574	}
1575	else if( uiLog2BlkSize == 5 )
1576	{
1577	scanCG = g_sigLastScanCG32x32;
1578	}
1579	}
1580	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1581	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1582	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1583	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1584	Int iCGLastScanPos = -1;
1585
1586	UInt uiCtxSet = 0;
1587	Int c1 = 1;
1588	Int c2 = 0;
1589	Double d64BaseCost = 0;
1590	Int iLastScanPos = -1;
1591	dTemp = dErrScale;
1592
1593	UInt c1Idx = 0;
1594	UInt c2Idx = 0;
1595	Int baseLevel;
1596
1597	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1598
1599	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1600	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1601
1602	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1603	Int iScanPos;
1604	coeffGroupRDStats rdStats;
1605
1606	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1607	{
1608	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1609	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1610	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1611	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1612
1613	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1614	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1615	{
1616	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1617	//===== quantization =====
1618	UInt uiBlkPos = scan[iScanPos];
1619	// set coeff
1620	uiQ = piQCoef[uiBlkPos];
1621	dTemp = pdErrScale[uiBlkPos];
1622	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1623	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1624	#if ADAPTIVE_QP_SELECTION
1625	if( m_bUseAdaptQpSelect )
1626	{
1627	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1628	}
1629	#endif
1630	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1631
1632	Double dErr = Double( lLevelDouble );
1633	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1634	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1635	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1636
1637	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1638	{
1639	iLastScanPos = iScanPos;
1640	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1641	iCGLastScanPos = iCGScanPos;
1642	}
1643
1644	if ( iLastScanPos >= 0 )
1645	{
1646	//===== coefficient level estimation =====
1647	UInt uiLevel;
1648	UInt uiOneCtx = 4 * uiCtxSet + c1;
1649	UInt uiAbsCtx = uiCtxSet + c2;
1650
1651	if( iScanPos == iLastScanPos )
1652	{
1653	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1654	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1655	c1Idx, c2Idx, iQBits, dTemp, 1 );
1656	}
1657	else
1658	{
1659	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1660	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1661	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
1662	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1663	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1664	c1Idx, c2Idx, iQBits, dTemp, 0 );
1665	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1666	}
1667	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1668	if( uiLevel > 0 )
1669	{
1670	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1671	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1672	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1673	}
1674	else // uiLevel == 0
1675	{
1676	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1677	}
1678	piDstCoeff[ uiBlkPos ] = uiLevel;
1679	d64BaseCost += pdCostCoeff [ iScanPos ];
1680
1681
1682	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1683	if( uiLevel >= baseLevel )
1684	{
1685	if(uiLevel > 3*(1<<uiGoRiceParam))
1686	{
1687	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1688	}
1689	}
1690	if ( uiLevel >= 1)
1691	{
1692	c1Idx ++;
1693	}
1694
1695	//===== update bin model =====
1696	if( uiLevel > 1 )
1697	{
1698	c1 = 0;
1699	c2 += (c2 < 2);
1700	c2Idx ++;
1701	}
1702	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1703	{
1704	c1++;
1705	}
1706
1707	//===== context set update =====
1708	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1709	{
1710	c2 = 0;
1711	uiGoRiceParam = 0;
1712
1713	c1Idx = 0;
1714	c2Idx = 0;
1715	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1716	if( c1 == 0 )
1717	{
1718	uiCtxSet++;
1719	}
1720	c1 = 1;
1721	}
1722	}
1723	else
1724	{
1725	d64BaseCost += pdCostCoeff0[ iScanPos ];
1726	}
1727	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1728	if (iScanPosinCG == 0 )
1729	{
1730	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1731	}
1732	if (piDstCoeff[ uiBlkPos ] )
1733	{
1734	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1735	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1736	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1737	if ( iScanPosinCG != 0 )
1738	{
1739	rdStats.iNNZbeforePos0++;
1740	}
1741	}
1742	} //end for (iScanPosinCG)
1743
1744	if (iCGLastScanPos >= 0)
1745	{
1746	if( iCGScanPos )
1747	{
1748	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1749	{
1750	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1751	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1752	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1753	}
1754	else
1755	{
1756	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1757	{
1758	if ( rdStats.iNNZbeforePos0 == 0 )
1759	{
1760	d64BaseCost -= rdStats.d64SigCost_0;
1761	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1762	}
1763	// rd-cost if SigCoeffGroupFlag = 0, initialization
1764	Double d64CostZeroCG = d64BaseCost;
1765
1766	// add SigCoeffGroupFlag cost to total cost
1767	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1768	if (iCGScanPos < iCGLastScanPos)
1769	{
1770	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1771	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1772	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1773	}
1774
1775	// try to convert the current coeff group from non-zero to all-zero
1776	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1777	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1778	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1779
1780	// if we can save cost, change this block to all-zero block
1781	if ( d64CostZeroCG < d64BaseCost )
1782	{
1783	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1784	d64BaseCost = d64CostZeroCG;
1785	if (iCGScanPos < iCGLastScanPos)
1786	{
1787	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1788	}
1789	// reset coeffs to 0 in this block
1790	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1791	{
1792	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1793	UInt uiBlkPos = scan[ iScanPos ];
1794
1795	if (piDstCoeff[ uiBlkPos ])
1796	{
1797	piDstCoeff [ uiBlkPos ] = 0;
1798	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1799	pdCostSig [ iScanPos ] = 0;
1800	}
1801	}
1802	} // end if ( d64CostAllZeros < d64BaseCost )
1803	}
1804	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1805	}
1806	else
1807	{
1808	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1809	}
1810	}
1811	} //end for (iCGScanPos)
1812
1813	//===== estimate last position =====
1814	if ( iLastScanPos < 0 )
1815	{
1816	return;
1817	}
1818
1819	Double d64BestCost = 0;
1820	Int ui16CtxCbf = 0;
1821	Int iBestLastIdxP1 = 0;
1822	#if NO_RESIDUAL_FLAG_FOR_BLPRED
1823	if( (!pcCU->isIntra( uiAbsPartIdx ) \|\| pcCU->isIntraBL( uiAbsPartIdx )) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1824	#else
1825	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1826	#endif
1827	{
1828	ui16CtxCbf = 0;
1829	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1830	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1831	}
1832	else
1833	{
1834	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1835	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1836	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1837	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1838	}
1839
1840	Bool bFoundLast = false;
1841	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1842	{
1843	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1844
1845	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1846	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1847	{
1848	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1849	{
1850	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1851	if (iScanPos > iLastScanPos) continue;
1852	UInt uiBlkPos = scan[iScanPos];
1853
1854	if( piDstCoeff[ uiBlkPos ] )
1855	{
1856	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1857	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1858
1859	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
1860	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1861
1862	if( totalCost < d64BestCost )
1863	{
1864	iBestLastIdxP1 = iScanPos + 1;
1865	d64BestCost = totalCost;
1866	}
1867	if( piDstCoeff[ uiBlkPos ] > 1 )
1868	{
1869	bFoundLast = true;
1870	break;
1871	}
1872	d64BaseCost -= pdCostCoeff[ iScanPos ];
1873	d64BaseCost += pdCostCoeff0[ iScanPos ];
1874	}
1875	else
1876	{
1877	d64BaseCost -= pdCostSig[ iScanPos ];
1878	}
1879	} //end for
1880	if (bFoundLast)
1881	{
1882	break;
1883	}
1884	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1885	} // end for
1886
1887	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1888	{
1889	Int blkPos = scan[ scanPos ];
1890	Int level = piDstCoeff[ blkPos ];
1891	uiAbsSum += level;
1892	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1893	}
1894
1895	//===== clean uncoded coefficients =====
1896	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1897	{
1898	piDstCoeff[ scan[ scanPos ] ] = 0;
1899	}
1900
1901	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1902	{
1903	Int64 rdFactor = (Int64) (
1904	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
1905	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
1906	+ 0.5);
1907	Int lastCG = -1;
1908	Int absSum = 0 ;
1909	Int n ;
1910
1911	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1912	{
1913	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1914	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1915	absSum = 0 ;
1916
1917	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1918	{
1919	if( piDstCoeff[ scan[ n + subPos ]] )
1920	{
1921	lastNZPosInCG = n;
1922	break;
1923	}
1924	}
1925
1926	for(n = 0; n <SCAN_SET_SIZE; n++ )
1927	{
1928	if( piDstCoeff[ scan[ n + subPos ]] )
1929	{
1930	firstNZPosInCG = n;
1931	break;
1932	}
1933	}
1934
1935	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1936	{
1937	absSum += piDstCoeff[ scan[ n + subPos ]];
1938	}
1939
1940	if(lastNZPosInCG>=0 && lastCG==-1)
1941	{
1942	lastCG = 1;
1943	}
1944
1945	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1946	{
1947	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
1948	if( signbit!=(absSum&0x1) ) // hide but need tune
1949	{
1950	// calculate the cost
1951	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
1952	Int minPos =-1, finalChange=0, curChange=0;
1953
1954	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1955	{
1956	UInt uiBlkPos = scan[ n + subPos ];
1957	if(piDstCoeff[ uiBlkPos ] != 0 )
1958	{
1959	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
1960	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1961	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
1962
1963	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1964	{
1965	costDown -= (4<<15) ;
1966	}
1967
1968	if(costUp<costDown)
1969	{
1970	curCost = costUp;
1971	curChange = 1 ;
1972	}
1973	else
1974	{
1975	curChange = -1 ;
1976	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1977	{
1978	curCost = MAX_INT64 ;
1979	}
1980	else
1981	{
1982	curCost = costDown ;
1983	}
1984	}
1985	}
1986	else
1987	{
1988	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1989	curChange = 1 ;
1990
1991	if(n<firstNZPosInCG)
1992	{
1993	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1994	if(thissignbit != signbit )
1995	{
1996	curCost = MAX_INT64;
1997	}
1998	}
1999	}
2000
2001	if( curCost<minCostInc)
2002	{
2003	minCostInc = curCost ;
2004	finalChange = curChange ;
2005	minPos = uiBlkPos ;
2006	}
2007	}
2008
2009	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
2010	{
2011	finalChange = -1;
2012	}
2013
2014	if(plSrcCoeff[minPos]>=0)
2015	{
2016	piDstCoeff[minPos] += finalChange ;
2017	}
2018	else
2019	{
2020	piDstCoeff[minPos] -= finalChange ;
2021	}
2022	}
2023	}
2024
2025	if(lastCG==1)
2026	{
2027	lastCG=0 ;
2028	}
2029	}
2030	}
2031	}
2032
2033	/** Pattern decision for context derivation process of significant_coeff_flag
2034	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2035	* \param posXCG column of current coefficient group
2036	* \param posYCG row of current coefficient group
2037	* \param width width of the block
2038	* \param height height of the block
2039	* \returns pattern for current coefficient group
2040	*/
2041	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2042	{
2043	if( width == 4 && height == 4 ) return -1;
2044
2045	UInt sigRight = 0;
2046	UInt sigLower = 0;
2047
2048	width >>= 2;
2049	height >>= 2;
2050	if( posXCG < width - 1 )
2051	{
2052	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2053	}
2054	if (posYCG < height - 1 )
2055	{
2056	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2057	}
2058	return sigRight + (sigLower<<1);
2059	}
2060
2061	/** Context derivation process of coeff_abs_significant_flag
2062	* \param patternSigCtx pattern for current coefficient group
2063	* \param posX column of current scan position
2064	* \param posY row of current scan position
2065	* \param log2BlockSize log2 value of block size (square block)
2066	* \param width width of the block
2067	* \param height height of the block
2068	* \param textureType texture type (TEXT_LUMA...)
2069	* \returns ctxInc for current scan position
2070	*/
2071	Int TComTrQuant::getSigCtxInc (
2072	Int patternSigCtx,
2073	UInt scanIdx,
2074	Int posX,
2075	Int posY,
2076	Int log2BlockSize,
2077	TextType textureType
2078	)
2079	{
2080	const Int ctxIndMap[16] =
2081	{
2082	0, 1, 4, 5,
2083	2, 3, 4, 5,
2084	6, 6, 8, 8,
2085	7, 7, 8, 8
2086	};
2087
2088	if( posX + posY == 0 )
2089	{
2090	return 0;
2091	}
2092
2093	if ( log2BlockSize == 2 )
2094	{
2095	return ctxIndMap[ 4 * posY + posX ];
2096	}
2097
2098	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2099
2100	Int posXinSubset = posX-((posX>>2)<<2);
2101	Int posYinSubset = posY-((posY>>2)<<2);
2102	Int cnt = 0;
2103	if(patternSigCtx==0)
2104	{
2105	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2106	}
2107	else if(patternSigCtx==1)
2108	{
2109	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2110	}
2111	else if(patternSigCtx==2)
2112	{
2113	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2114	}
2115	else
2116	{
2117	cnt = 2;
2118	}
2119
2120	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2121	}
2122
2123	/** Get the best level in RD sense
2124	* \param rd64CodedCost reference to coded cost
2125	* \param rd64CodedCost0 reference to cost when coefficient is 0
2126	* \param rd64CodedCostSig reference to cost of significant coefficient
2127	* \param lLevelDouble reference to unscaled quantized level
2128	* \param uiMaxAbsLevel scaled quantized level
2129	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2130	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2131	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2132	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2133	* \param iQBits quantization step size
2134	* \param dTemp correction factor
2135	* \param bLast indicates if the coefficient is the last significant
2136	* \returns best quantized transform level for given scan position
2137	* This method calculates the best quantized transform level for a given scan position.
2138	*/
2139	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2140	Double& rd64CodedCost0,
2141	Double& rd64CodedCostSig,
2142	Int lLevelDouble,
2143	UInt uiMaxAbsLevel,
2144	UShort ui16CtxNumSig,
2145	UShort ui16CtxNumOne,
2146	UShort ui16CtxNumAbs,
2147	UShort ui16AbsGoRice,
2148	UInt c1Idx,
2149	UInt c2Idx,
2150	Int iQBits,
2151	Double dTemp,
2152	Bool bLast ) const
2153	{
2154	Double dCurrCostSig = 0;
2155	UInt uiBestAbsLevel = 0;
2156
2157	if( !bLast && uiMaxAbsLevel < 3 )
2158	{
2159	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2160	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2161	if( uiMaxAbsLevel == 0 )
2162	{
2163	return uiBestAbsLevel;
2164	}
2165	}
2166	else
2167	{
2168	rd64CodedCost = MAX_DOUBLE;
2169	}
2170
2171	if( !bLast )
2172	{
2173	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2174	}
2175
2176	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2177	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2178	{
2179	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2180	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2181	dCurrCost += dCurrCostSig;
2182
2183	if( dCurrCost < rd64CodedCost )
2184	{
2185	uiBestAbsLevel = uiAbsLevel;
2186	rd64CodedCost = dCurrCost;
2187	rd64CodedCostSig = dCurrCostSig;
2188	}
2189	}
2190
2191	return uiBestAbsLevel;
2192	}
2193
2194	/** Calculates the cost for specific absolute transform level
2195	* \param uiAbsLevel scaled quantized level
2196	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2197	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2198	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2199	* \returns cost of given absolute transform level
2200	*/
2201	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2202	UShort ui16CtxNumOne,
2203	UShort ui16CtxNumAbs,
2204	UShort ui16AbsGoRice
2205	, UInt c1Idx,
2206	UInt c2Idx
2207	) const
2208	{
2209	Double iRate = xGetIEPRate();
2210	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2211
2212	if ( uiAbsLevel >= baseLevel )
2213	{
2214	UInt symbol = uiAbsLevel - baseLevel;
2215	UInt length;
2216	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2217	{
2218	length = symbol>>ui16AbsGoRice;
2219	iRate += (length+1+ui16AbsGoRice)<< 15;
2220	}
2221	else
2222	{
2223	length = ui16AbsGoRice;
2224	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2225	while (symbol >= (1<<length))
2226	{
2227	symbol -= (1<<(length++));
2228	}
2229	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2230	}
2231	if (c1Idx < C1FLAG_NUMBER)
2232	{
2233	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2234
2235	if (c2Idx < C2FLAG_NUMBER)
2236	{
2237	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2238	}
2239	}
2240	}
2241	else
2242	if( uiAbsLevel == 1 )
2243	{
2244	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2245	}
2246	else if( uiAbsLevel == 2 )
2247	{
2248	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2249	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2250	}
2251	else
2252	{
2253	assert (0);
2254	}
2255	return xGetICost( iRate );
2256	}
2257
2258	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2259	UShort ui16CtxNumOne,
2260	UShort ui16CtxNumAbs,
2261	UShort ui16AbsGoRice
2262	, UInt c1Idx,
2263	UInt c2Idx
2264	) const
2265	{
2266	Int iRate = 0;
2267	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2268
2269	if ( uiAbsLevel >= baseLevel )
2270	{
2271	UInt uiSymbol = uiAbsLevel - baseLevel;
2272	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2273	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2274
2275	if( bExpGolomb )
2276	{
2277	uiAbsLevel = uiSymbol - uiMaxVlc;
2278	Int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2279	iRate += iEGS << 15;
2280	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2281	}
2282
2283	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2284	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2285
2286	iRate += ui16NumBins << 15;
2287
2288	if (c1Idx < C1FLAG_NUMBER)
2289	{
2290	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2291
2292	if (c2Idx < C2FLAG_NUMBER)
2293	{
2294	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2295	}
2296	}
2297	}
2298	else
2299	if( uiAbsLevel == 0 )
2300	{
2301	return 0;
2302	}
2303	else if( uiAbsLevel == 1 )
2304	{
2305	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2306	}
2307	else if( uiAbsLevel == 2 )
2308	{
2309	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2310	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2311	}
2312	else
2313	{
2314	assert(0);
2315	}
2316	return iRate;
2317	}
2318
2319	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2320	UShort ui16CtxNumSig ) const
2321	{
2322	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2323	}
2324
2325	/** Calculates the cost of signaling the last significant coefficient in the block
2326	* \param uiPosX X coordinate of the last significant coefficient
2327	* \param uiPosY Y coordinate of the last significant coefficient
2328	* \returns cost of last significant coefficient
2329	*/
2330	/*
2331	* \param uiWidth width of the transform unit (TU)
2332	*/
2333	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2334	const UInt uiPosY ) const
2335	{
2336	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2337	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2338	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2339	if( uiCtxX > 3 )
2340	{
2341	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2342	}
2343	if( uiCtxY > 3 )
2344	{
2345	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2346	}
2347	return xGetICost( uiCost );
2348	}
2349
2350	/** Calculates the cost for specific absolute transform level
2351	* \param uiAbsLevel scaled quantized level
2352	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2353	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2354	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2355	* \returns cost of given absolute transform level
2356	*/
2357	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2358	UShort ui16CtxNumSig ) const
2359	{
2360	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2361	}
2362
2363	/** Get the cost for a specific rate
2364	* \param dRate rate of a bit
2365	* \returns cost at the specific rate
2366	*/
2367	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2368	{
2369	return m_dLambda * dRate;
2370	}
2371
2372	/** Get the cost of an equal probable bit
2373	* \returns cost of equal probable bit
2374	*/
2375	__inline Double TComTrQuant::xGetIEPRate ( ) const
2376	{
2377	return 32768;
2378	}
2379
2380	/** Context derivation process of coeff_abs_significant_flag
2381	* \param uiSigCoeffGroupFlag significance map of L1
2382	* \param uiBlkX column of current scan position
2383	* \param uiBlkY row of current scan position
2384	* \param uiLog2BlkSize log2 value of block size
2385	* \returns ctxInc for current scan position
2386	*/
2387	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2388	const UInt uiCGPosX,
2389	const UInt uiCGPosY,
2390	Int width, Int height)
2391	{
2392	UInt uiRight = 0;
2393	UInt uiLower = 0;
2394
2395	width >>= 2;
2396	height >>= 2;
2397	if( uiCGPosX < width - 1 )
2398	{
2399	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2400	}
2401	if (uiCGPosY < height - 1 )
2402	{
2403	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2404	}
2405	return (uiRight \|\| uiLower);
2406
2407	}
2408	/** set quantized matrix coefficient for encode
2409	* \param scalingList quantaized matrix address
2410	*/
2411	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2412	{
2413	UInt size,list;
2414	UInt qp;
2415
2416	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2417	{
2418	for(list = 0; list < g_scalingListNum[size]; list++)
2419	{
2420	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2421	{
2422	xSetScalingListEnc(scalingList,list,size,qp);
2423	xSetScalingListDec(scalingList,list,size,qp);
2424	setErrScaleCoeff(list,size,qp);
2425	}
2426	}
2427	}
2428	}
2429	/** set quantized matrix coefficient for decode
2430	* \param scalingList quantaized matrix address
2431	*/
2432	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2433	{
2434	UInt size,list;
2435	UInt qp;
2436
2437	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2438	{
2439	for(list = 0; list < g_scalingListNum[size]; list++)
2440	{
2441	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2442	{
2443	xSetScalingListDec(scalingList,list,size,qp);
2444	}
2445	}
2446	}
2447	}
2448	/** set error scale coefficients
2449	* \param list List ID
2450	* \param uiSize Size
2451	* \param uiQP Quantization parameter
2452	*/
2453	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
2454	{
2455
2456	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2457	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
2458	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
2459
2460	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2461	Int *piQuantcoeff;
2462	Double *pdErrScale;
2463	piQuantcoeff = getQuantCoeff(list, qp,size);
2464	pdErrScale = getErrScaleCoeff(list, size, qp);
2465
2466	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2467	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2468	for(i=0;i<uiMaxNumCoeff;i++)
2469	{
2470	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
2471	}
2472	}
2473
2474	/** set quantized matrix coefficient for encode
2475	* \param scalingList quantaized matrix address
2476	* \param listId List index
2477	* \param sizeId size index
2478	* \param uiQP Quantization parameter
2479	*/
2480	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2481	{
2482	UInt width = g_scalingListSizeX[sizeId];
2483	UInt height = g_scalingListSizeX[sizeId];
2484	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2485	Int *quantcoeff;
2486	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2487	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2488
2489	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2490	}
2491	/** set quantized matrix coefficient for decode
2492	* \param scalingList quantaized matrix address
2493	* \param list List index
2494	* \param size size index
2495	* \param uiQP Quantization parameter
2496	*/
2497	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2498	{
2499	UInt width = g_scalingListSizeX[sizeId];
2500	UInt height = g_scalingListSizeX[sizeId];
2501	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2502	Int *dequantcoeff;
2503	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2504
2505	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2506	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2507	}
2508
2509	/** set flat matrix value to quantized coefficient
2510	*/
2511	Void TComTrQuant::setFlatScalingList()
2512	{
2513	UInt size,list;
2514	UInt qp;
2515
2516	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2517	{
2518	for(list = 0; list < g_scalingListNum[size]; list++)
2519	{
2520	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2521	{
2522	xsetFlatScalingList(list,size,qp);
2523	setErrScaleCoeff(list,size,qp);
2524	}
2525	}
2526	}
2527	}
2528
2529	/** set flat matrix value to quantized coefficient
2530	* \param list List ID
2531	* \param uiQP Quantization parameter
2532	* \param uiSize Size
2533	*/
2534	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2535	{
2536	UInt i,num = g_scalingListSize[size];
2537	Int *quantcoeff;
2538	Int *dequantcoeff;
2539	Int quantScales = g_quantScales[qp];
2540	Int invQuantScales = g_invQuantScales[qp]<<4;
2541
2542	quantcoeff = getQuantCoeff(list, qp, size);
2543	dequantcoeff = getDequantCoeff(list, qp, size);
2544
2545	for(i=0;i<num;i++)
2546	{
2547	*quantcoeff++ = quantScales;
2548	*dequantcoeff++ = invQuantScales;
2549	}
2550	}
2551
2552	/** set quantized matrix coefficient for encode
2553	* \param coeff quantaized matrix address
2554	* \param quantcoeff quantaized matrix address
2555	* \param quantScales Q(QP%6)
2556	* \param height height
2557	* \param width width
2558	* \param ratio ratio for upscale
2559	* \param sizuNum matrix size
2560	* \param dc dc parameter
2561	*/
2562	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2563	{
2564	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2565	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2566	for(UInt j=0;j<height;j++)
2567	{
2568	for(UInt i=0;i<width;i++)
2569	{
2570	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2571	}
2572	}
2573	if(ratio > 1)
2574	{
2575	quantcoeff[0] = quantScales / dc;
2576	}
2577	}
2578	/** set quantized matrix coefficient for decode
2579	* \param coeff quantaized matrix address
2580	* \param dequantcoeff quantaized matrix address
2581	* \param invQuantScales IQ(QP%6))
2582	* \param height height
2583	* \param width width
2584	* \param ratio ratio for upscale
2585	* \param sizuNum matrix size
2586	* \param dc dc parameter
2587	*/
2588	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2589	{
2590	for(UInt j=0;j<height;j++)
2591	{
2592	for(UInt i=0;i<width;i++)
2593	{
2594	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2595	}
2596	}
2597	if(ratio > 1)
2598	{
2599	dequantcoeff[0] = invQuantScales * dc;
2600	}
2601	}
2602
2603	/** initialization process of scaling list array
2604	*/
2605	Void TComTrQuant::initScalingList()
2606	{
2607	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2608	{
2609	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2610	{
2611	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2612	{
2613	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2614	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2615	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
2616	}
2617	}
2618	}
2619	// alias list [1] as [3].
2620	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2621	{
2622	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
2623	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
2624	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
2625	}
2626	}
2627	/** destroy quantization matrix array
2628	*/
2629	Void TComTrQuant::destroyScalingList()
2630	{
2631	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2632	{
2633	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2634	{
2635	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2636	{
2637	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
2638	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
2639	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
2640	}
2641	}
2642	}
2643	}
2644
2645	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-VC SHVC

Context navigation

source: SHVCSoftware/branches/HM-10.0-dev-SHM/source/Lib/TLibCommon/TComTrQuant.cpp @ 1430

Download in other formats: