Context navigation

TComTrQuant.cpp @ 1606

Visit:

Last change on this file since 1606 was 320, checked in by seregin, 11 years ago
remove INTRA_BL_DST4x4
Property svn:eol-style set to `native`
File size: 83.7 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2013, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(Int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_useRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
203
204	if(qpScaled < 0)
205	{
206	qpScaled = qpScaled + qpBdOffset;
207	}
208	else
209	{
210	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
211	}
212	}
213	m_cQP.setQpParam( qpScaled );
214	}
215
216	#if MATRIX_MULT
217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
218	* \param block pointer to input data (residual)
219	* \param coeff pointer to output data (transform coefficients)
220	* \param uiStride stride of input data
221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
223	*/
224	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
225	{
226	Int i,j,k,iSum;
227	Int tmp[32*32];
228	const Short *iT;
229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
230
231	if (uiTrSize==4)
232	{
233	iT = g_aiT4[0];
234	}
235	else if (uiTrSize==8)
236	{
237	iT = g_aiT8[0];
238	}
239	else if (uiTrSize==16)
240	{
241	iT = g_aiT16[0];
242	}
243	else if (uiTrSize==32)
244	{
245	iT = g_aiT32[0];
246	}
247	else
248	{
249	assert(0);
250	}
251
252	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
253	Int add_1st = 1<<(shift_1st-1);
254	Int shift_2nd = uiLog2TrSize + 6;
255	Int add_2nd = 1<<(shift_2nd-1);
256
257	/* Horizontal transform */
258
259	if (uiTrSize==4)
260	{
261	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
262	{
263	iT = g_as_DST_MAT_4[0];
264	}
265	}
266	for (i=0; i<uiTrSize; i++)
267	{
268	for (j=0; j<uiTrSize; j++)
269	{
270	iSum = 0;
271	for (k=0; k<uiTrSize; k++)
272	{
273	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
274	}
275	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
276	}
277	}
278
279	/* Vertical transform */
280	if (uiTrSize==4)
281	{
282	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
283	{
284	iT = g_as_DST_MAT_4[0];
285	}
286	else
287	{
288	iT = g_aiT4[0];
289	}
290	}
291	for (i=0; i<uiTrSize; i++)
292	{
293	for (j=0; j<uiTrSize; j++)
294	{
295	iSum = 0;
296	for (k=0; k<uiTrSize; k++)
297	{
298	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
299	}
300	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
301	}
302	}
303	}
304
305	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
306	* \param coeff pointer to input data (transform coefficients)
307	* \param block pointer to output data (residual)
308	* \param uiStride stride of output data
309	* \param uiTrSize transform size (uiTrSize x uiTrSize)
310	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
311	*/
312	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
313	{
314	Int i,j,k,iSum;
315	Int tmp[32*32];
316	const Short *iT;
317
318	if (uiTrSize==4)
319	{
320	iT = g_aiT4[0];
321	}
322	else if (uiTrSize==8)
323	{
324	iT = g_aiT8[0];
325	}
326	else if (uiTrSize==16)
327	{
328	iT = g_aiT16[0];
329	}
330	else if (uiTrSize==32)
331	{
332	iT = g_aiT32[0];
333	}
334	else
335	{
336	assert(0);
337	}
338
339	Int shift_1st = SHIFT_INV_1ST;
340	Int add_1st = 1<<(shift_1st-1);
341	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
342	Int add_2nd = 1<<(shift_2nd-1);
343	if (uiTrSize==4)
344	{
345	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
346	{
347	iT = g_as_DST_MAT_4[0];
348	}
349	}
350
351	/* Horizontal transform */
352	for (i=0; i<uiTrSize; i++)
353	{
354	for (j=0; j<uiTrSize; j++)
355	{
356	iSum = 0;
357	for (k=0; k<uiTrSize; k++)
358	{
359	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
360	}
361	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
362	}
363	}
364
365	if (uiTrSize==4)
366	{
367	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
368	{
369	iT = g_as_DST_MAT_4[0];
370	}
371	else
372	{
373	iT = g_aiT4[0];
374	}
375	}
376
377	/* Vertical transform */
378	for (i=0; i<uiTrSize; i++)
379	{
380	for (j=0; j<uiTrSize; j++)
381	{
382	iSum = 0;
383	for (k=0; k<uiTrSize; k++)
384	{
385	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
386	}
387	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
388	}
389	}
390	}
391
392	#else //MATRIX_MULT
393
394	/** 4x4 forward transform implemented using partial butterfly structure (1D)
395	* \param src input data (residual)
396	* \param dst output data (transform coefficients)
397	* \param shift specifies right shift after 1D transform
398	*/
399
400	void partialButterfly4(Short src,Short dst,Int shift, Int line)
401	{
402	Int j;
403	Int E[2],O[2];
404	Int add = 1<<(shift-1);
405
406	for (j=0; j<line; j++)
407	{
408	/* E and O */
409	E[0] = src[0] + src[3];
410	O[0] = src[0] - src[3];
411	E[1] = src[1] + src[2];
412	O[1] = src[1] - src[2];
413
414	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
415	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
416	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
417	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
418
419	src += 4;
420	dst ++;
421	}
422	}
423
424	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
425	// give identical results
426	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
427	{
428	Int i, c[4];
429	Int rnd_factor = 1<<(shift-1);
430	for (i=0; i<4; i++)
431	{
432	// Intermediate Variables
433	c[0] = block[4i+0] + block[4i+3];
434	c[1] = block[4i+1] + block[4i+3];
435	c[2] = block[4i+0] - block[4i+1];
436	c[3] = 74* block[4*i+2];
437
438	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
439	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
440	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
441	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
442	}
443	}
444
445	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
446	{
447	Int i, c[4];
448	Int rnd_factor = 1<<(shift-1);
449	for (i=0; i<4; i++)
450	{
451	// Intermediate Variables
452	c[0] = tmp[ i] + tmp[ 8+i];
453	c[1] = tmp[8+i] + tmp[12+i];
454	c[2] = tmp[ i] - tmp[12+i];
455	c[3] = 74* tmp[4+i];
456
457	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
458	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
459	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
460	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
461	}
462	}
463
464	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
465	{
466	Int j;
467	Int E[2],O[2];
468	Int add = 1<<(shift-1);
469
470	for (j=0; j<line; j++)
471	{
472	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
473	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
474	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
475	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
476	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
477
478	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
479	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
480	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
481	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
482	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
483
484	src ++;
485	dst += 4;
486	}
487	}
488
489
490	void partialButterfly8(Short src,Short dst,Int shift, Int line)
491	{
492	Int j,k;
493	Int E[4],O[4];
494	Int EE[2],EO[2];
495	Int add = 1<<(shift-1);
496
497	for (j=0; j<line; j++)
498	{
499	/* E and O*/
500	for (k=0;k<4;k++)
501	{
502	E[k] = src[k] + src[7-k];
503	O[k] = src[k] - src[7-k];
504	}
505	/* EE and EO */
506	EE[0] = E[0] + E[3];
507	EO[0] = E[0] - E[3];
508	EE[1] = E[1] + E[2];
509	EO[1] = E[1] - E[2];
510
511	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
512	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
513	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
514	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
515
516	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
517	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
518	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
519	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
520
521	src += 8;
522	dst ++;
523	}
524	}
525
526
527	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
528	{
529	Int j,k;
530	Int E[4],O[4];
531	Int EE[2],EO[2];
532	Int add = 1<<(shift-1);
533
534	for (j=0; j<line; j++)
535	{
536	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
537	for (k=0;k<4;k++)
538	{
539	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
540	}
541
542	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
543	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
544	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
545	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
546
547	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
548	E[0] = EE[0] + EO[0];
549	E[3] = EE[0] - EO[0];
550	E[1] = EE[1] + EO[1];
551	E[2] = EE[1] - EO[1];
552	for (k=0;k<4;k++)
553	{
554	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
555	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
556	}
557	src ++;
558	dst += 8;
559	}
560	}
561
562
563	void partialButterfly16(Short src,Short dst,Int shift, Int line)
564	{
565	Int j,k;
566	Int E[8],O[8];
567	Int EE[4],EO[4];
568	Int EEE[2],EEO[2];
569	Int add = 1<<(shift-1);
570
571	for (j=0; j<line; j++)
572	{
573	/* E and O*/
574	for (k=0;k<8;k++)
575	{
576	E[k] = src[k] + src[15-k];
577	O[k] = src[k] - src[15-k];
578	}
579	/* EE and EO */
580	for (k=0;k<4;k++)
581	{
582	EE[k] = E[k] + E[7-k];
583	EO[k] = E[k] - E[7-k];
584	}
585	/* EEE and EEO */
586	EEE[0] = EE[0] + EE[3];
587	EEO[0] = EE[0] - EE[3];
588	EEE[1] = EE[1] + EE[2];
589	EEO[1] = EE[1] - EE[2];
590
591	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
592	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
593	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
594	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
595
596	for (k=2;k<16;k+=4)
597	{
598	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
599	}
600
601	for (k=1;k<16;k+=2)
602	{
603	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
604	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
605	}
606
607	src += 16;
608	dst ++;
609
610	}
611	}
612
613
614	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
615	{
616	Int j,k;
617	Int E[8],O[8];
618	Int EE[4],EO[4];
619	Int EEE[2],EEO[2];
620	Int add = 1<<(shift-1);
621
622	for (j=0; j<line; j++)
623	{
624	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
625	for (k=0;k<8;k++)
626	{
627	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
628	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
629	}
630	for (k=0;k<4;k++)
631	{
632	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
633	}
634	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
635	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
636	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
637	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
638
639	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
640	for (k=0;k<2;k++)
641	{
642	EE[k] = EEE[k] + EEO[k];
643	EE[k+2] = EEE[1-k] - EEO[1-k];
644	}
645	for (k=0;k<4;k++)
646	{
647	E[k] = EE[k] + EO[k];
648	E[k+4] = EE[3-k] - EO[3-k];
649	}
650	for (k=0;k<8;k++)
651	{
652	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
653	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
654	}
655	src ++;
656	dst += 16;
657	}
658	}
659
660
661	void partialButterfly32(Short src,Short dst,Int shift, Int line)
662	{
663	Int j,k;
664	Int E[16],O[16];
665	Int EE[8],EO[8];
666	Int EEE[4],EEO[4];
667	Int EEEE[2],EEEO[2];
668	Int add = 1<<(shift-1);
669
670	for (j=0; j<line; j++)
671	{
672	/* E and O*/
673	for (k=0;k<16;k++)
674	{
675	E[k] = src[k] + src[31-k];
676	O[k] = src[k] - src[31-k];
677	}
678	/* EE and EO */
679	for (k=0;k<8;k++)
680	{
681	EE[k] = E[k] + E[15-k];
682	EO[k] = E[k] - E[15-k];
683	}
684	/* EEE and EEO */
685	for (k=0;k<4;k++)
686	{
687	EEE[k] = EE[k] + EE[7-k];
688	EEO[k] = EE[k] - EE[7-k];
689	}
690	/* EEEE and EEEO */
691	EEEE[0] = EEE[0] + EEE[3];
692	EEEO[0] = EEE[0] - EEE[3];
693	EEEE[1] = EEE[1] + EEE[2];
694	EEEO[1] = EEE[1] - EEE[2];
695
696	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
697	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
698	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
699	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
700	for (k=4;k<32;k+=8)
701	{
702	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
703	}
704	for (k=2;k<32;k+=4)
705	{
706	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
707	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
708	}
709	for (k=1;k<32;k+=2)
710	{
711	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
712	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
713	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
714	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
715	}
716	src += 32;
717	dst ++;
718	}
719	}
720
721
722	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
723	{
724	Int j,k;
725	Int E[16],O[16];
726	Int EE[8],EO[8];
727	Int EEE[4],EEO[4];
728	Int EEEE[2],EEEO[2];
729	Int add = 1<<(shift-1);
730
731	for (j=0; j<line; j++)
732	{
733	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
734	for (k=0;k<16;k++)
735	{
736	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
737	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
738	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
739	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
740	}
741	for (k=0;k<8;k++)
742	{
743	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
744	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
745	}
746	for (k=0;k<4;k++)
747	{
748	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
749	}
750	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
751	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
752	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
753	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
754
755	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
756	EEE[0] = EEEE[0] + EEEO[0];
757	EEE[3] = EEEE[0] - EEEO[0];
758	EEE[1] = EEEE[1] + EEEO[1];
759	EEE[2] = EEEE[1] - EEEO[1];
760	for (k=0;k<4;k++)
761	{
762	EE[k] = EEE[k] + EEO[k];
763	EE[k+4] = EEE[3-k] - EEO[3-k];
764	}
765	for (k=0;k<8;k++)
766	{
767	E[k] = EE[k] + EO[k];
768	E[k+8] = EE[7-k] - EO[7-k];
769	}
770	for (k=0;k<16;k++)
771	{
772	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
773	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
774	}
775	src ++;
776	dst += 32;
777	}
778	}
779
780	/** MxN forward transform (2D)
781	* \param block input data (residual)
782	* \param coeff output data (transform coefficients)
783	* \param iWidth input data (width of transform)
784	* \param iHeight input data (height of transform)
785	*/
786	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
787	{
788	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
789	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
790
791	Short tmp[ 64 * 64 ];
792
793	if( iWidth == 4 && iHeight == 4)
794	{
795	if (uiMode != REG_DCT)
796	{
797	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
798	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
799	}
800	else
801	{
802	partialButterfly4(block, tmp, shift_1st, iHeight);
803	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
804	}
805
806	}
807	else if( iWidth == 8 && iHeight == 8)
808	{
809	partialButterfly8( block, tmp, shift_1st, iHeight );
810	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
811	}
812	else if( iWidth == 16 && iHeight == 16)
813	{
814	partialButterfly16( block, tmp, shift_1st, iHeight );
815	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
816	}
817	else if( iWidth == 32 && iHeight == 32)
818	{
819	partialButterfly32( block, tmp, shift_1st, iHeight );
820	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
821	}
822	}
823	/** MxN inverse transform (2D)
824	* \param coeff input data (transform coefficients)
825	* \param block output data (residual)
826	* \param iWidth input data (width of transform)
827	* \param iHeight input data (height of transform)
828	*/
829	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
830	{
831	Int shift_1st = SHIFT_INV_1ST;
832	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
833
834	Short tmp[ 64*64];
835	if( iWidth == 4 && iHeight == 4)
836	{
837	if (uiMode != REG_DCT)
838	{
839	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
840	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
841	}
842	else
843	{
844	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
845	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
846	}
847	}
848	else if( iWidth == 8 && iHeight == 8)
849	{
850	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
851	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
852	}
853	else if( iWidth == 16 && iHeight == 16)
854	{
855	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
856	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
857	}
858	else if( iWidth == 32 && iHeight == 32)
859	{
860	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
861	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
862	}
863	}
864
865	#endif //MATRIX_MULT
866
867	// To minimize the distortion only. No rate is considered.
868	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
869	{
870	Int lastCG = -1;
871	Int absSum = 0 ;
872	Int n ;
873
874	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
875	{
876	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
877	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
878	absSum = 0 ;
879
880	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
881	{
882	if( pQCoef[ scan[ n + subPos ]] )
883	{
884	lastNZPosInCG = n;
885	break;
886	}
887	}
888
889	for(n = 0; n <SCAN_SET_SIZE; n++ )
890	{
891	if( pQCoef[ scan[ n + subPos ]] )
892	{
893	firstNZPosInCG = n;
894	break;
895	}
896	}
897
898	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
899	{
900	absSum += pQCoef[ scan[ n + subPos ]];
901	}
902
903	if(lastNZPosInCG>=0 && lastCG==-1)
904	{
905	lastCG = 1 ;
906	}
907
908	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
909	{
910	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
911	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
912	{
913	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
914
915	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
916	{
917	UInt blkPos = scan[ n+subPos ];
918	if(pQCoef[ blkPos ] != 0 )
919	{
920	if(deltaU[blkPos]>0)
921	{
922	curCost = - deltaU[blkPos];
923	curChange=1 ;
924	}
925	else
926	{
927	//curChange =-1;
928	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
929	{
930	curCost=MAX_INT ;
931	}
932	else
933	{
934	curCost = deltaU[blkPos];
935	curChange =-1;
936	}
937	}
938	}
939	else
940	{
941	if(n<firstNZPosInCG)
942	{
943	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
944	if(thisSignBit != signbit )
945	{
946	curCost = MAX_INT;
947	}
948	else
949	{
950	curCost = - (deltaU[blkPos]) ;
951	curChange = 1 ;
952	}
953	}
954	else
955	{
956	curCost = - (deltaU[blkPos]) ;
957	curChange = 1 ;
958	}
959	}
960
961	if( curCost<minCostInc)
962	{
963	minCostInc = curCost ;
964	finalChange = curChange ;
965	minPos = blkPos ;
966	}
967	} //CG loop
968
969	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
970	{
971	finalChange = -1;
972	}
973
974	if(pCoef[minPos]>=0)
975	{
976	pQCoef[minPos] += finalChange ;
977	}
978	else
979	{
980	pQCoef[minPos] -= finalChange ;
981	}
982	} // Hide
983	}
984	if(lastCG==1)
985	{
986	lastCG=0 ;
987	}
988	} // TU loop
989
990	return;
991	}
992
993	Void TComTrQuant::xQuant( TComDataCU* pcCU,
994	Int* pSrc,
995	TCoeff* pDes,
996	#if ADAPTIVE_QP_SELECTION
997	Int*& pArlDes,
998	#endif
999	Int iWidth,
1000	Int iHeight,
1001	UInt& uiAcSum,
1002	TextType eTType,
1003	UInt uiAbsPartIdx )
1004	{
1005	Int* piCoef = pSrc;
1006	TCoeff* piQCoef = pDes;
1007	#if ADAPTIVE_QP_SELECTION
1008	Int* piArlCCoef = pArlDes;
1009	#endif
1010	Int iAdd = 0;
1011
1012	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
1013	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
1014	{
1015	#if ADAPTIVE_QP_SELECTION
1016	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1017	#else
1018	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1019	#endif
1020	}
1021	else
1022	{
1023	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1024
1025	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1026	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1027
1028	Int deltaU[32*32] ;
1029
1030	#if ADAPTIVE_QP_SELECTION
1031	QpParam cQpBase;
1032	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1033
1034	Int qpScaled;
1035	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1036
1037	if(eTType == TEXT_LUMA)
1038	{
1039	qpScaled = iQpBase + qpBDOffset;
1040	}
1041	else
1042	{
1043	Int chromaQPOffset;
1044	if(eTType == TEXT_CHROMA_U)
1045	{
1046	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
1047	}
1048	else
1049	{
1050	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
1051	}
1052	iQpBase = iQpBase + chromaQPOffset;
1053
1054	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1055
1056	if(qpScaled < 0)
1057	{
1058	qpScaled = qpScaled + qpBDOffset;
1059	}
1060	else
1061	{
1062	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1063	}
1064	}
1065	cQpBase.setQpParam(qpScaled);
1066	#endif
1067
1068	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1069	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1070	assert(scalingListType < 6);
1071	Int *piQuantCoeff = 0;
1072	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1073
1074	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1075	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1076
1077	#if ADAPTIVE_QP_SELECTION
1078	Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1079	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1080	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1081	Int iAddC = 1 << (iQBitsC-1);
1082	#else
1083	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1084	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1085	#endif
1086
1087	Int qBits8 = iQBits-8;
1088	for( Int n = 0; n < iWidth*iHeight; n++ )
1089	{
1090	Int iLevel;
1091	Int iSign;
1092	UInt uiBlockPos = n;
1093	iLevel = piCoef[uiBlockPos];
1094	iSign = (iLevel < 0 ? -1: 1);
1095
1096	#if ADAPTIVE_QP_SELECTION
1097	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1098	if( m_bUseAdaptQpSelect )
1099	{
1100	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1101	}
1102	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1103	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1104	#else
1105	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1106	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1107	#endif
1108	uiAcSum += iLevel;
1109	iLevel *= iSign;
1110	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1111	} // for n
1112	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1113	{
1114	if(uiAcSum>=2)
1115	{
1116	signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1117	}
1118	}
1119	} //if RDOQ
1120	//return;
1121
1122	}
1123
1124	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1125	{
1126
1127	const TCoeff* piQCoef = pSrc;
1128	Int* piCoef = pDes;
1129
1130	if ( iWidth > (Int)m_uiMaxTrSize )
1131	{
1132	iWidth = m_uiMaxTrSize;
1133	iHeight = m_uiMaxTrSize;
1134	}
1135
1136	Int iShift,iAdd,iCoeffQ;
1137	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1138
1139	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1140
1141	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1142
1143	TCoeff clipQCoef;
1144
1145	if(getUseScalingList())
1146	{
1147	iShift += 4;
1148	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1149
1150	if(iShift > m_cQP.m_iPer)
1151	{
1152	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1153
1154	for( Int n = 0; n < iWidth*iHeight; n++ )
1155	{
1156	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1157	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1158	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1159	}
1160	}
1161	else
1162	{
1163	for( Int n = 0; n < iWidth*iHeight; n++ )
1164	{
1165	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1166	iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation
1167	piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) );
1168	}
1169	}
1170	}
1171	else
1172	{
1173	iAdd = 1 << (iShift-1);
1174	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1175
1176	for( Int n = 0; n < iWidth*iHeight; n++ )
1177	{
1178	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1179	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1180	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1181	}
1182	}
1183	}
1184
1185	Void TComTrQuant::init( UInt uiMaxTrSize,
1186	Bool bUseRDOQ,
1187	Bool bUseRDOQTS,
1188	Bool bEnc, Bool useTransformSkipFast
1189	#if ADAPTIVE_QP_SELECTION
1190	, Bool bUseAdaptQpSelect
1191	#endif
1192	)
1193	{
1194	m_uiMaxTrSize = uiMaxTrSize;
1195	m_bEnc = bEnc;
1196	m_useRDOQ = bUseRDOQ;
1197	m_useRDOQTS = bUseRDOQTS;
1198	#if ADAPTIVE_QP_SELECTION
1199	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1200	#endif
1201	m_useTransformSkipFast = useTransformSkipFast;
1202	}
1203
1204	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1205	Pel* pcResidual,
1206	UInt uiStride,
1207	TCoeff* rpcCoeff,
1208	#if ADAPTIVE_QP_SELECTION
1209	Int*& rpcArlCoeff,
1210	#endif
1211	UInt uiWidth,
1212	UInt uiHeight,
1213	UInt& uiAbsSum,
1214	TextType eTType,
1215	UInt uiAbsPartIdx,
1216	Bool useTransformSkip
1217	)
1218	{
1219	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1220	{
1221	uiAbsSum=0;
1222	for (UInt k = 0; k<uiHeight; k++)
1223	{
1224	for (UInt j = 0; j<uiWidth; j++)
1225	{
1226	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1227	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1228	}
1229	}
1230	return;
1231	}
1232	UInt uiMode; //luma intra pred
1233	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1234	{
1235	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1236	}
1237	else
1238	{
1239	uiMode = REG_DCT;
1240	}
1241
1242	uiAbsSum = 0;
1243	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1244	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1245	if(useTransformSkip)
1246	{
1247	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1248	}
1249	else
1250	{
1251	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1252	}
1253	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1254	#if ADAPTIVE_QP_SELECTION
1255	rpcArlCoeff,
1256	#endif
1257	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1258	}
1259
1260	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1261	{
1262	if(transQuantBypass)
1263	{
1264	for (UInt k = 0; k<uiHeight; k++)
1265	{
1266	for (UInt j = 0; j<uiWidth; j++)
1267	{
1268	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1269	}
1270	}
1271	return;
1272	}
1273	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1274	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1275	if(useTransformSkip == true)
1276	{
1277	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1278	}
1279	else
1280	{
1281	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1282	}
1283	}
1284
1285	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1286	{
1287	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1288	{
1289	return;
1290	}
1291	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1292
1293	if( uiTrMode == stopTrMode )
1294	{
1295	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1296	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1297	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1298	{
1299	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1300	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1301	{
1302	return;
1303	}
1304	uiWidth <<= 1;
1305	uiHeight <<= 1;
1306	}
1307	Pel* pResi = rpcResidual + uiAddr;
1308	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1309	assert(scalingListType < 6);
1310	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1311	}
1312	else
1313	{
1314	uiTrMode++;
1315	uiWidth >>= 1;
1316	uiHeight >>= 1;
1317	Int trWidth = uiWidth, trHeight = uiHeight;
1318	UInt uiAddrOffset = trHeight * uiStride;
1319	UInt uiCoefOffset = trWidth * trHeight;
1320	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1321	{
1322	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1323	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1324	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1325	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1326	}
1327	}
1328	}
1329
1330	// ------------------------------------------------------------------------------------------------
1331	// Logical transform
1332	// ------------------------------------------------------------------------------------------------
1333
1334	/** Wrapper function between HM interface and core NxN forward transform (2D)
1335	* \param piBlkResi input data (residual)
1336	* \param psCoeff output data (transform coefficients)
1337	* \param uiStride stride of input residual data
1338	* \param iSize transform size (iSize x iSize)
1339	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1340	*/
1341	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1342	{
1343	#if MATRIX_MULT
1344	Int iSize = iWidth;
1345	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1346	#else
1347	Int j;
1348	Short block[ 32 * 32 ];
1349	Short coeff[ 32 * 32 ];
1350	for (j = 0; j < iHeight; j++)
1351	{
1352	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
1353	}
1354	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
1355	for ( j = 0; j < iHeight * iWidth; j++ )
1356	{
1357	psCoeff[ j ] = coeff[ j ];
1358	}
1359	#endif
1360	}
1361
1362
1363	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1364	* \param plCoef input data (transform coefficients)
1365	* \param pResidual output data (residual)
1366	* \param uiStride stride of input residual data
1367	* \param iSize transform size (iSize x iSize)
1368	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1369	*/
1370	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1371	{
1372	#if MATRIX_MULT
1373	Int iSize = iWidth;
1374	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1375	#else
1376	Int j;
1377	{
1378	Short block[ 32 * 32 ];
1379	Short coeff[ 32 * 32 ];
1380	for ( j = 0; j < iHeight * iWidth; j++ )
1381	{
1382	coeff[j] = (Short)plCoef[j];
1383	}
1384	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
1385	{
1386	for ( j = 0; j < iHeight; j++ )
1387	{
1388	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
1389	}
1390	}
1391	return ;
1392	}
1393	#endif
1394	}
1395
1396	/** Wrapper function between HM interface and core 4x4 transform skipping
1397	* \param piBlkResi input data (residual)
1398	* \param psCoeff output data (transform coefficients)
1399	* \param uiStride stride of input residual data
1400	* \param iSize transform size (iSize x iSize)
1401	*/
1402	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1403	{
1404	assert( width == height );
1405	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1406	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1407	UInt transformSkipShift;
1408	Int j,k;
1409	if(shift >= 0)
1410	{
1411	transformSkipShift = shift;
1412	for (j = 0; j < height; j++)
1413	{
1414	for(k = 0; k < width; k ++)
1415	{
1416	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1417	}
1418	}
1419	}
1420	else
1421	{
1422	//The case when uiBitDepth > 13
1423	Int offset;
1424	transformSkipShift = -shift;
1425	offset = (1 << (transformSkipShift - 1));
1426	for (j = 0; j < height; j++)
1427	{
1428	for(k = 0; k < width; k ++)
1429	{
1430	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1431	}
1432	}
1433	}
1434	}
1435
1436	/** Wrapper function between HM interface and core NxN transform skipping
1437	* \param plCoef input data (coefficients)
1438	* \param pResidual output data (residual)
1439	* \param uiStride stride of input residual data
1440	* \param iSize transform size (iSize x iSize)
1441	*/
1442	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1443	{
1444	assert( width == height );
1445	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1446	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1447	UInt transformSkipShift;
1448	Int j,k;
1449	if(shift > 0)
1450	{
1451	Int offset;
1452	transformSkipShift = shift;
1453	offset = (1 << (transformSkipShift -1));
1454	for ( j = 0; j < height; j++ )
1455	{
1456	for(k = 0; k < width; k ++)
1457	{
1458	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1459	}
1460	}
1461	}
1462	else
1463	{
1464	//The case when uiBitDepth >= 13
1465	transformSkipShift = - shift;
1466	for ( j = 0; j < height; j++ )
1467	{
1468	for(k = 0; k < width; k ++)
1469	{
1470	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1471	}
1472	}
1473	}
1474	}
1475
1476	/** RDOQ with CABAC
1477	* \param pcCU pointer to coding unit structure
1478	* \param plSrcCoeff pointer to input buffer
1479	* \param piDstCoeff reference to pointer to output buffer
1480	* \param uiWidth block width
1481	* \param uiHeight block height
1482	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1483	* \param eTType plane type / luminance or chrominance
1484	* \param uiAbsPartIdx absolute partition index
1485	* \returns Void
1486	* Rate distortion optimized quantization for entropy
1487	* coding engines using probability models like CABAC
1488	*/
1489	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1490	Int* plSrcCoeff,
1491	TCoeff* piDstCoeff,
1492	#if ADAPTIVE_QP_SELECTION
1493	Int*& piArlDstCoeff,
1494	#endif
1495	UInt uiWidth,
1496	UInt uiHeight,
1497	UInt& uiAbsSum,
1498	TextType eTType,
1499	UInt uiAbsPartIdx )
1500	{
1501	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1502
1503	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1504	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1505	UInt uiGoRiceParam = 0;
1506	Double d64BlockUncodedCost = 0;
1507	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1508	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1509	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1510	assert(scalingListType < 6);
1511
1512	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1513	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
1514	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1515	Int *piQCoef = piQCoefOrg;
1516	Double *pdErrScale = pdErrScaleOrg;
1517	#if ADAPTIVE_QP_SELECTION
1518	Int iQBitsC = iQBits - ARL_C_PRECISION;
1519	Int iAddC = 1 << (iQBitsC-1);
1520	#endif
1521	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1522
1523	#if ADAPTIVE_QP_SELECTION
1524	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1525	#endif
1526
1527	Double pdCostCoeff [ 32 * 32 ];
1528	Double pdCostSig [ 32 * 32 ];
1529	Double pdCostCoeff0[ 32 * 32 ];
1530	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1531	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1532	Int rateIncUp [ 32 * 32 ];
1533	Int rateIncDown [ 32 * 32 ];
1534	Int sigRateDelta[ 32 * 32 ];
1535	Int deltaU [ 32 * 32 ];
1536	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1537	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1538	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1539	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1540
1541	const UInt * scanCG;
1542	{
1543	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1544	if( uiLog2BlkSize == 3 )
1545	{
1546	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1547	}
1548	else if( uiLog2BlkSize == 5 )
1549	{
1550	scanCG = g_sigLastScanCG32x32;
1551	}
1552	}
1553	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1554	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1555	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1556	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1557	Int iCGLastScanPos = -1;
1558
1559	UInt uiCtxSet = 0;
1560	Int c1 = 1;
1561	Int c2 = 0;
1562	Double d64BaseCost = 0;
1563	Int iLastScanPos = -1;
1564
1565	UInt c1Idx = 0;
1566	UInt c2Idx = 0;
1567	Int baseLevel;
1568
1569	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1570
1571	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1572	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1573
1574	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1575	Int iScanPos;
1576	coeffGroupRDStats rdStats;
1577
1578	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1579	{
1580	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1581	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1582	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1583	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1584
1585	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1586	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1587	{
1588	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1589	//===== quantization =====
1590	UInt uiBlkPos = scan[iScanPos];
1591	// set coeff
1592	Int uiQ = piQCoef[uiBlkPos];
1593	Double dTemp = pdErrScale[uiBlkPos];
1594	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1595	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1596	#if ADAPTIVE_QP_SELECTION
1597	if( m_bUseAdaptQpSelect )
1598	{
1599	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1600	}
1601	#endif
1602	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1603
1604	Double dErr = Double( lLevelDouble );
1605	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1606	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1607	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1608
1609	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1610	{
1611	iLastScanPos = iScanPos;
1612	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1613	iCGLastScanPos = iCGScanPos;
1614	}
1615
1616	if ( iLastScanPos >= 0 )
1617	{
1618	//===== coefficient level estimation =====
1619	UInt uiLevel;
1620	UInt uiOneCtx = 4 * uiCtxSet + c1;
1621	UInt uiAbsCtx = uiCtxSet + c2;
1622
1623	if( iScanPos == iLastScanPos )
1624	{
1625	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1626	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1627	c1Idx, c2Idx, iQBits, dTemp, 1 );
1628	}
1629	else
1630	{
1631	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1632	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1633	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
1634	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1635	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1636	c1Idx, c2Idx, iQBits, dTemp, 0 );
1637	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1638	}
1639	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1640	if( uiLevel > 0 )
1641	{
1642	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1643	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1644	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1645	}
1646	else // uiLevel == 0
1647	{
1648	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1649	}
1650	piDstCoeff[ uiBlkPos ] = uiLevel;
1651	d64BaseCost += pdCostCoeff [ iScanPos ];
1652
1653
1654	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1655	if( uiLevel >= baseLevel )
1656	{
1657	if(uiLevel > 3*(1<<uiGoRiceParam))
1658	{
1659	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1660	}
1661	}
1662	if ( uiLevel >= 1)
1663	{
1664	c1Idx ++;
1665	}
1666
1667	//===== update bin model =====
1668	if( uiLevel > 1 )
1669	{
1670	c1 = 0;
1671	c2 += (c2 < 2);
1672	c2Idx ++;
1673	}
1674	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1675	{
1676	c1++;
1677	}
1678
1679	//===== context set update =====
1680	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1681	{
1682	c2 = 0;
1683	uiGoRiceParam = 0;
1684
1685	c1Idx = 0;
1686	c2Idx = 0;
1687	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1688	if( c1 == 0 )
1689	{
1690	uiCtxSet++;
1691	}
1692	c1 = 1;
1693	}
1694	}
1695	else
1696	{
1697	d64BaseCost += pdCostCoeff0[ iScanPos ];
1698	}
1699	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1700	if (iScanPosinCG == 0 )
1701	{
1702	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1703	}
1704	if (piDstCoeff[ uiBlkPos ] )
1705	{
1706	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1707	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1708	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1709	if ( iScanPosinCG != 0 )
1710	{
1711	rdStats.iNNZbeforePos0++;
1712	}
1713	}
1714	} //end for (iScanPosinCG)
1715
1716	if (iCGLastScanPos >= 0)
1717	{
1718	if( iCGScanPos )
1719	{
1720	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1721	{
1722	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1723	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1724	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1725	}
1726	else
1727	{
1728	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1729	{
1730	if ( rdStats.iNNZbeforePos0 == 0 )
1731	{
1732	d64BaseCost -= rdStats.d64SigCost_0;
1733	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1734	}
1735	// rd-cost if SigCoeffGroupFlag = 0, initialization
1736	Double d64CostZeroCG = d64BaseCost;
1737
1738	// add SigCoeffGroupFlag cost to total cost
1739	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1740	if (iCGScanPos < iCGLastScanPos)
1741	{
1742	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1743	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1744	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1745	}
1746
1747	// try to convert the current coeff group from non-zero to all-zero
1748	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1749	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1750	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1751
1752	// if we can save cost, change this block to all-zero block
1753	if ( d64CostZeroCG < d64BaseCost )
1754	{
1755	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1756	d64BaseCost = d64CostZeroCG;
1757	if (iCGScanPos < iCGLastScanPos)
1758	{
1759	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1760	}
1761	// reset coeffs to 0 in this block
1762	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1763	{
1764	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1765	UInt uiBlkPos = scan[ iScanPos ];
1766
1767	if (piDstCoeff[ uiBlkPos ])
1768	{
1769	piDstCoeff [ uiBlkPos ] = 0;
1770	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1771	pdCostSig [ iScanPos ] = 0;
1772	}
1773	}
1774	} // end if ( d64CostAllZeros < d64BaseCost )
1775	}
1776	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1777	}
1778	else
1779	{
1780	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1781	}
1782	}
1783	} //end for (iCGScanPos)
1784
1785	//===== estimate last position =====
1786	if ( iLastScanPos < 0 )
1787	{
1788	return;
1789	}
1790
1791	Double d64BestCost = 0;
1792	Int ui16CtxCbf = 0;
1793	Int iBestLastIdxP1 = 0;
1794	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1795	{
1796	ui16CtxCbf = 0;
1797	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1798	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1799	}
1800	else
1801	{
1802	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1803	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1804	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1805	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1806	}
1807
1808	Bool bFoundLast = false;
1809	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1810	{
1811	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1812
1813	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1814	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1815	{
1816	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1817	{
1818	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1819	if (iScanPos > iLastScanPos) continue;
1820	UInt uiBlkPos = scan[iScanPos];
1821
1822	if( piDstCoeff[ uiBlkPos ] )
1823	{
1824	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1825	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1826
1827	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
1828	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1829
1830	if( totalCost < d64BestCost )
1831	{
1832	iBestLastIdxP1 = iScanPos + 1;
1833	d64BestCost = totalCost;
1834	}
1835	if( piDstCoeff[ uiBlkPos ] > 1 )
1836	{
1837	bFoundLast = true;
1838	break;
1839	}
1840	d64BaseCost -= pdCostCoeff[ iScanPos ];
1841	d64BaseCost += pdCostCoeff0[ iScanPos ];
1842	}
1843	else
1844	{
1845	d64BaseCost -= pdCostSig[ iScanPos ];
1846	}
1847	} //end for
1848	if (bFoundLast)
1849	{
1850	break;
1851	}
1852	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1853	} // end for
1854
1855	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1856	{
1857	Int blkPos = scan[ scanPos ];
1858	Int level = piDstCoeff[ blkPos ];
1859	uiAbsSum += level;
1860	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1861	}
1862
1863	//===== clean uncoded coefficients =====
1864	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1865	{
1866	piDstCoeff[ scan[ scanPos ] ] = 0;
1867	}
1868
1869	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1870	{
1871	Int64 rdFactor = (Int64) (
1872	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
1873	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
1874	+ 0.5);
1875	Int lastCG = -1;
1876	Int absSum = 0 ;
1877	Int n ;
1878
1879	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1880	{
1881	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1882	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1883	absSum = 0 ;
1884
1885	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1886	{
1887	if( piDstCoeff[ scan[ n + subPos ]] )
1888	{
1889	lastNZPosInCG = n;
1890	break;
1891	}
1892	}
1893
1894	for(n = 0; n <SCAN_SET_SIZE; n++ )
1895	{
1896	if( piDstCoeff[ scan[ n + subPos ]] )
1897	{
1898	firstNZPosInCG = n;
1899	break;
1900	}
1901	}
1902
1903	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1904	{
1905	absSum += piDstCoeff[ scan[ n + subPos ]];
1906	}
1907
1908	if(lastNZPosInCG>=0 && lastCG==-1)
1909	{
1910	lastCG = 1;
1911	}
1912
1913	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1914	{
1915	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
1916	if( signbit!=(absSum&0x1) ) // hide but need tune
1917	{
1918	// calculate the cost
1919	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
1920	Int minPos =-1, finalChange=0, curChange=0;
1921
1922	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1923	{
1924	UInt uiBlkPos = scan[ n + subPos ];
1925	if(piDstCoeff[ uiBlkPos ] != 0 )
1926	{
1927	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
1928	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1929	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
1930
1931	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1932	{
1933	costDown -= (4<<15) ;
1934	}
1935
1936	if(costUp<costDown)
1937	{
1938	curCost = costUp;
1939	curChange = 1 ;
1940	}
1941	else
1942	{
1943	curChange = -1 ;
1944	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1945	{
1946	curCost = MAX_INT64 ;
1947	}
1948	else
1949	{
1950	curCost = costDown ;
1951	}
1952	}
1953	}
1954	else
1955	{
1956	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1957	curChange = 1 ;
1958
1959	if(n<firstNZPosInCG)
1960	{
1961	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1962	if(thissignbit != signbit )
1963	{
1964	curCost = MAX_INT64;
1965	}
1966	}
1967	}
1968
1969	if( curCost<minCostInc)
1970	{
1971	minCostInc = curCost ;
1972	finalChange = curChange ;
1973	minPos = uiBlkPos ;
1974	}
1975	}
1976
1977	if(piDstCoeff[minPos] == 32767 \|\| piDstCoeff[minPos] == -32768)
1978	{
1979	finalChange = -1;
1980	}
1981
1982	if(plSrcCoeff[minPos]>=0)
1983	{
1984	piDstCoeff[minPos] += finalChange ;
1985	}
1986	else
1987	{
1988	piDstCoeff[minPos] -= finalChange ;
1989	}
1990	}
1991	}
1992
1993	if(lastCG==1)
1994	{
1995	lastCG=0 ;
1996	}
1997	}
1998	}
1999	}
2000
2001	/** Pattern decision for context derivation process of significant_coeff_flag
2002	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2003	* \param posXCG column of current coefficient group
2004	* \param posYCG row of current coefficient group
2005	* \param width width of the block
2006	* \param height height of the block
2007	* \returns pattern for current coefficient group
2008	*/
2009	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2010	{
2011	if( width == 4 && height == 4 ) return -1;
2012
2013	UInt sigRight = 0;
2014	UInt sigLower = 0;
2015
2016	width >>= 2;
2017	height >>= 2;
2018	if( posXCG < width - 1 )
2019	{
2020	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2021	}
2022	if (posYCG < height - 1 )
2023	{
2024	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2025	}
2026	return sigRight + (sigLower<<1);
2027	}
2028
2029	/** Context derivation process of coeff_abs_significant_flag
2030	* \param patternSigCtx pattern for current coefficient group
2031	* \param posX column of current scan position
2032	* \param posY row of current scan position
2033	* \param log2BlockSize log2 value of block size (square block)
2034	* \param width width of the block
2035	* \param height height of the block
2036	* \param textureType texture type (TEXT_LUMA...)
2037	* \returns ctxInc for current scan position
2038	*/
2039	Int TComTrQuant::getSigCtxInc (
2040	Int patternSigCtx,
2041	UInt scanIdx,
2042	Int posX,
2043	Int posY,
2044	Int log2BlockSize,
2045	TextType textureType
2046	)
2047	{
2048	const Int ctxIndMap[16] =
2049	{
2050	0, 1, 4, 5,
2051	2, 3, 4, 5,
2052	6, 6, 8, 8,
2053	7, 7, 8, 8
2054	};
2055
2056	if( posX + posY == 0 )
2057	{
2058	return 0;
2059	}
2060
2061	if ( log2BlockSize == 2 )
2062	{
2063	return ctxIndMap[ 4 * posY + posX ];
2064	}
2065
2066	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2067
2068	Int posXinSubset = posX-((posX>>2)<<2);
2069	Int posYinSubset = posY-((posY>>2)<<2);
2070	Int cnt = 0;
2071	if(patternSigCtx==0)
2072	{
2073	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2074	}
2075	else if(patternSigCtx==1)
2076	{
2077	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2078	}
2079	else if(patternSigCtx==2)
2080	{
2081	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2082	}
2083	else
2084	{
2085	cnt = 2;
2086	}
2087
2088	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2089	}
2090
2091	/** Get the best level in RD sense
2092	* \param rd64CodedCost reference to coded cost
2093	* \param rd64CodedCost0 reference to cost when coefficient is 0
2094	* \param rd64CodedCostSig reference to cost of significant coefficient
2095	* \param lLevelDouble reference to unscaled quantized level
2096	* \param uiMaxAbsLevel scaled quantized level
2097	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2098	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2099	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2100	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2101	* \param iQBits quantization step size
2102	* \param dTemp correction factor
2103	* \param bLast indicates if the coefficient is the last significant
2104	* \returns best quantized transform level for given scan position
2105	* This method calculates the best quantized transform level for a given scan position.
2106	*/
2107	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2108	Double& rd64CodedCost0,
2109	Double& rd64CodedCostSig,
2110	Int lLevelDouble,
2111	UInt uiMaxAbsLevel,
2112	UShort ui16CtxNumSig,
2113	UShort ui16CtxNumOne,
2114	UShort ui16CtxNumAbs,
2115	UShort ui16AbsGoRice,
2116	UInt c1Idx,
2117	UInt c2Idx,
2118	Int iQBits,
2119	Double dTemp,
2120	Bool bLast ) const
2121	{
2122	Double dCurrCostSig = 0;
2123	UInt uiBestAbsLevel = 0;
2124
2125	if( !bLast && uiMaxAbsLevel < 3 )
2126	{
2127	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2128	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2129	if( uiMaxAbsLevel == 0 )
2130	{
2131	return uiBestAbsLevel;
2132	}
2133	}
2134	else
2135	{
2136	rd64CodedCost = MAX_DOUBLE;
2137	}
2138
2139	if( !bLast )
2140	{
2141	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2142	}
2143
2144	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2145	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2146	{
2147	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2148	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2149	dCurrCost += dCurrCostSig;
2150
2151	if( dCurrCost < rd64CodedCost )
2152	{
2153	uiBestAbsLevel = uiAbsLevel;
2154	rd64CodedCost = dCurrCost;
2155	rd64CodedCostSig = dCurrCostSig;
2156	}
2157	}
2158
2159	return uiBestAbsLevel;
2160	}
2161
2162	/** Calculates the cost for specific absolute transform level
2163	* \param uiAbsLevel scaled quantized level
2164	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2165	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2166	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2167	* \returns cost of given absolute transform level
2168	*/
2169	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2170	UShort ui16CtxNumOne,
2171	UShort ui16CtxNumAbs,
2172	UShort ui16AbsGoRice
2173	, UInt c1Idx,
2174	UInt c2Idx
2175	) const
2176	{
2177	Double iRate = xGetIEPRate();
2178	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2179
2180	if ( uiAbsLevel >= baseLevel )
2181	{
2182	UInt symbol = uiAbsLevel - baseLevel;
2183	UInt length;
2184	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2185	{
2186	length = symbol>>ui16AbsGoRice;
2187	iRate += (length+1+ui16AbsGoRice)<< 15;
2188	}
2189	else
2190	{
2191	length = ui16AbsGoRice;
2192	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2193	while (symbol >= (1<<length))
2194	{
2195	symbol -= (1<<(length++));
2196	}
2197	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2198	}
2199	if (c1Idx < C1FLAG_NUMBER)
2200	{
2201	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2202
2203	if (c2Idx < C2FLAG_NUMBER)
2204	{
2205	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2206	}
2207	}
2208	}
2209	else
2210	if( uiAbsLevel == 1 )
2211	{
2212	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2213	}
2214	else if( uiAbsLevel == 2 )
2215	{
2216	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2217	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2218	}
2219	else
2220	{
2221	assert (0);
2222	}
2223	return xGetICost( iRate );
2224	}
2225
2226	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2227	UShort ui16CtxNumOne,
2228	UShort ui16CtxNumAbs,
2229	UShort ui16AbsGoRice
2230	, UInt c1Idx,
2231	UInt c2Idx
2232	) const
2233	{
2234	Int iRate = 0;
2235	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2236
2237	if ( uiAbsLevel >= baseLevel )
2238	{
2239	UInt uiSymbol = uiAbsLevel - baseLevel;
2240	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2241	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2242
2243	if( bExpGolomb )
2244	{
2245	uiAbsLevel = uiSymbol - uiMaxVlc;
2246	Int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2247	iRate += iEGS << 15;
2248	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2249	}
2250
2251	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2252	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2253
2254	iRate += ui16NumBins << 15;
2255
2256	if (c1Idx < C1FLAG_NUMBER)
2257	{
2258	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2259
2260	if (c2Idx < C2FLAG_NUMBER)
2261	{
2262	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2263	}
2264	}
2265	}
2266	else
2267	if( uiAbsLevel == 0 )
2268	{
2269	return 0;
2270	}
2271	else if( uiAbsLevel == 1 )
2272	{
2273	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2274	}
2275	else if( uiAbsLevel == 2 )
2276	{
2277	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2278	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2279	}
2280	else
2281	{
2282	assert(0);
2283	}
2284	return iRate;
2285	}
2286
2287	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2288	UShort ui16CtxNumSig ) const
2289	{
2290	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2291	}
2292
2293	/** Calculates the cost of signaling the last significant coefficient in the block
2294	* \param uiPosX X coordinate of the last significant coefficient
2295	* \param uiPosY Y coordinate of the last significant coefficient
2296	* \returns cost of last significant coefficient
2297	*/
2298	/*
2299	* \param uiWidth width of the transform unit (TU)
2300	*/
2301	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2302	const UInt uiPosY ) const
2303	{
2304	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2305	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2306	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2307	if( uiCtxX > 3 )
2308	{
2309	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2310	}
2311	if( uiCtxY > 3 )
2312	{
2313	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2314	}
2315	return xGetICost( uiCost );
2316	}
2317
2318	/** Calculates the cost for specific absolute transform level
2319	* \param uiAbsLevel scaled quantized level
2320	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2321	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2322	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2323	* \returns cost of given absolute transform level
2324	*/
2325	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2326	UShort ui16CtxNumSig ) const
2327	{
2328	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2329	}
2330
2331	/** Get the cost for a specific rate
2332	* \param dRate rate of a bit
2333	* \returns cost at the specific rate
2334	*/
2335	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2336	{
2337	return m_dLambda * dRate;
2338	}
2339
2340	/** Get the cost of an equal probable bit
2341	* \returns cost of equal probable bit
2342	*/
2343	__inline Double TComTrQuant::xGetIEPRate ( ) const
2344	{
2345	return 32768;
2346	}
2347
2348	/** Context derivation process of coeff_abs_significant_flag
2349	* \param uiSigCoeffGroupFlag significance map of L1
2350	* \param uiBlkX column of current scan position
2351	* \param uiBlkY row of current scan position
2352	* \param uiLog2BlkSize log2 value of block size
2353	* \returns ctxInc for current scan position
2354	*/
2355	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2356	const UInt uiCGPosX,
2357	const UInt uiCGPosY,
2358	Int width, Int height)
2359	{
2360	UInt uiRight = 0;
2361	UInt uiLower = 0;
2362
2363	width >>= 2;
2364	height >>= 2;
2365	if( uiCGPosX < width - 1 )
2366	{
2367	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2368	}
2369	if (uiCGPosY < height - 1 )
2370	{
2371	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2372	}
2373	return (uiRight \|\| uiLower);
2374
2375	}
2376	/** set quantized matrix coefficient for encode
2377	* \param scalingList quantaized matrix address
2378	*/
2379	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2380	{
2381	UInt size,list;
2382	UInt qp;
2383
2384	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2385	{
2386	for(list = 0; list < g_scalingListNum[size]; list++)
2387	{
2388	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2389	{
2390	xSetScalingListEnc(scalingList,list,size,qp);
2391	xSetScalingListDec(scalingList,list,size,qp);
2392	setErrScaleCoeff(list,size,qp);
2393	}
2394	}
2395	}
2396	}
2397	/** set quantized matrix coefficient for decode
2398	* \param scalingList quantaized matrix address
2399	*/
2400	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2401	{
2402	UInt size,list;
2403	UInt qp;
2404
2405	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2406	{
2407	for(list = 0; list < g_scalingListNum[size]; list++)
2408	{
2409	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2410	{
2411	xSetScalingListDec(scalingList,list,size,qp);
2412	}
2413	}
2414	}
2415	}
2416	/** set error scale coefficients
2417	* \param list List ID
2418	* \param uiSize Size
2419	* \param uiQP Quantization parameter
2420	*/
2421	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
2422	{
2423
2424	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2425	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
2426	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
2427
2428	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2429	Int *piQuantcoeff;
2430	Double *pdErrScale;
2431	piQuantcoeff = getQuantCoeff(list, qp,size);
2432	pdErrScale = getErrScaleCoeff(list, size, qp);
2433
2434	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2435	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2436	for(i=0;i<uiMaxNumCoeff;i++)
2437	{
2438	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
2439	}
2440	}
2441
2442	/** set quantized matrix coefficient for encode
2443	* \param scalingList quantaized matrix address
2444	* \param listId List index
2445	* \param sizeId size index
2446	* \param uiQP Quantization parameter
2447	*/
2448	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2449	{
2450	UInt width = g_scalingListSizeX[sizeId];
2451	UInt height = g_scalingListSizeX[sizeId];
2452	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2453	Int *quantcoeff;
2454	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2455	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2456
2457	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2458	}
2459	/** set quantized matrix coefficient for decode
2460	* \param scalingList quantaized matrix address
2461	* \param list List index
2462	* \param size size index
2463	* \param uiQP Quantization parameter
2464	*/
2465	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2466	{
2467	UInt width = g_scalingListSizeX[sizeId];
2468	UInt height = g_scalingListSizeX[sizeId];
2469	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2470	Int *dequantcoeff;
2471	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2472
2473	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2474	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2475	}
2476
2477	/** set flat matrix value to quantized coefficient
2478	*/
2479	Void TComTrQuant::setFlatScalingList()
2480	{
2481	UInt size,list;
2482	UInt qp;
2483
2484	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2485	{
2486	for(list = 0; list < g_scalingListNum[size]; list++)
2487	{
2488	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2489	{
2490	xsetFlatScalingList(list,size,qp);
2491	setErrScaleCoeff(list,size,qp);
2492	}
2493	}
2494	}
2495	}
2496
2497	/** set flat matrix value to quantized coefficient
2498	* \param list List ID
2499	* \param uiQP Quantization parameter
2500	* \param uiSize Size
2501	*/
2502	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2503	{
2504	UInt i,num = g_scalingListSize[size];
2505	Int *quantcoeff;
2506	Int *dequantcoeff;
2507	Int quantScales = g_quantScales[qp];
2508	Int invQuantScales = g_invQuantScales[qp]<<4;
2509
2510	quantcoeff = getQuantCoeff(list, qp, size);
2511	dequantcoeff = getDequantCoeff(list, qp, size);
2512
2513	for(i=0;i<num;i++)
2514	{
2515	*quantcoeff++ = quantScales;
2516	*dequantcoeff++ = invQuantScales;
2517	}
2518	}
2519
2520	/** set quantized matrix coefficient for encode
2521	* \param coeff quantaized matrix address
2522	* \param quantcoeff quantaized matrix address
2523	* \param quantScales Q(QP%6)
2524	* \param height height
2525	* \param width width
2526	* \param ratio ratio for upscale
2527	* \param sizuNum matrix size
2528	* \param dc dc parameter
2529	*/
2530	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2531	{
2532	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2533	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2534	for(UInt j=0;j<height;j++)
2535	{
2536	for(UInt i=0;i<width;i++)
2537	{
2538	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2539	}
2540	}
2541	if(ratio > 1)
2542	{
2543	quantcoeff[0] = quantScales / dc;
2544	}
2545	}
2546	/** set quantized matrix coefficient for decode
2547	* \param coeff quantaized matrix address
2548	* \param dequantcoeff quantaized matrix address
2549	* \param invQuantScales IQ(QP%6))
2550	* \param height height
2551	* \param width width
2552	* \param ratio ratio for upscale
2553	* \param sizuNum matrix size
2554	* \param dc dc parameter
2555	*/
2556	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2557	{
2558	for(UInt j=0;j<height;j++)
2559	{
2560	for(UInt i=0;i<width;i++)
2561	{
2562	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2563	}
2564	}
2565	if(ratio > 1)
2566	{
2567	dequantcoeff[0] = invQuantScales * dc;
2568	}
2569	}
2570
2571	/** initialization process of scaling list array
2572	*/
2573	Void TComTrQuant::initScalingList()
2574	{
2575	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2576	{
2577	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2578	{
2579	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2580	{
2581	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2582	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2583	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
2584	}
2585	}
2586	}
2587	// alias list [1] as [3].
2588	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2589	{
2590	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
2591	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
2592	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
2593	}
2594	}
2595	/** destroy quantization matrix array
2596	*/
2597	Void TComTrQuant::destroyScalingList()
2598	{
2599	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2600	{
2601	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2602	{
2603	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2604	{
2605	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
2606	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
2607	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
2608	}
2609	}
2610	}
2611	}
2612
2613	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-VC SHVC

Context navigation

source: SHVCSoftware/branches/SHM-3.0-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 1606

Download in other formats: