Context navigation

TComTrQuant.cpp @ 757

Visit:

Last change on this file since 757 was 302, checked in by seregin, 11 years ago
update to HM11.0
File size: 87.0 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2013, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(Int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_useRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
203
204	if(qpScaled < 0)
205	{
206	qpScaled = qpScaled + qpBdOffset;
207	}
208	else
209	{
210	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
211	}
212	}
213	m_cQP.setQpParam( qpScaled );
214	}
215
216	#if MATRIX_MULT
217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
218	* \param block pointer to input data (residual)
219	* \param coeff pointer to output data (transform coefficients)
220	* \param uiStride stride of input data
221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
223	*/
224	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
225	{
226	Int i,j,k,iSum;
227	Int tmp[32*32];
228	const Short *iT;
229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
230
231	if (uiTrSize==4)
232	{
233	iT = g_aiT4[0];
234	}
235	else if (uiTrSize==8)
236	{
237	iT = g_aiT8[0];
238	}
239	else if (uiTrSize==16)
240	{
241	iT = g_aiT16[0];
242	}
243	else if (uiTrSize==32)
244	{
245	iT = g_aiT32[0];
246	}
247	else
248	{
249	assert(0);
250	}
251
252	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
253	Int add_1st = 1<<(shift_1st-1);
254	Int shift_2nd = uiLog2TrSize + 6;
255	Int add_2nd = 1<<(shift_2nd-1);
256
257	/* Horizontal transform */
258
259	if (uiTrSize==4)
260	{
261	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
262	{
263	iT = g_as_DST_MAT_4[0];
264	}
265	}
266	for (i=0; i<uiTrSize; i++)
267	{
268	for (j=0; j<uiTrSize; j++)
269	{
270	iSum = 0;
271	for (k=0; k<uiTrSize; k++)
272	{
273	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
274	}
275	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
276	}
277	}
278
279	/* Vertical transform */
280	if (uiTrSize==4)
281	{
282	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
283	{
284	iT = g_as_DST_MAT_4[0];
285	}
286	else
287	{
288	iT = g_aiT4[0];
289	}
290	}
291	for (i=0; i<uiTrSize; i++)
292	{
293	for (j=0; j<uiTrSize; j++)
294	{
295	iSum = 0;
296	for (k=0; k<uiTrSize; k++)
297	{
298	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
299	}
300	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
301	}
302	}
303	}
304
305	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
306	* \param coeff pointer to input data (transform coefficients)
307	* \param block pointer to output data (residual)
308	* \param uiStride stride of output data
309	* \param uiTrSize transform size (uiTrSize x uiTrSize)
310	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
311	*/
312	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
313	{
314	Int i,j,k,iSum;
315	Int tmp[32*32];
316	const Short *iT;
317
318	if (uiTrSize==4)
319	{
320	iT = g_aiT4[0];
321	}
322	else if (uiTrSize==8)
323	{
324	iT = g_aiT8[0];
325	}
326	else if (uiTrSize==16)
327	{
328	iT = g_aiT16[0];
329	}
330	else if (uiTrSize==32)
331	{
332	iT = g_aiT32[0];
333	}
334	else
335	{
336	assert(0);
337	}
338
339	Int shift_1st = SHIFT_INV_1ST;
340	Int add_1st = 1<<(shift_1st-1);
341	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
342	Int add_2nd = 1<<(shift_2nd-1);
343	if (uiTrSize==4)
344	{
345	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
346	{
347	iT = g_as_DST_MAT_4[0];
348	}
349	}
350
351	/* Horizontal transform */
352	for (i=0; i<uiTrSize; i++)
353	{
354	for (j=0; j<uiTrSize; j++)
355	{
356	iSum = 0;
357	for (k=0; k<uiTrSize; k++)
358	{
359	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
360	}
361	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
362	}
363	}
364
365	if (uiTrSize==4)
366	{
367	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
368	{
369	iT = g_as_DST_MAT_4[0];
370	}
371	else
372	{
373	iT = g_aiT4[0];
374	}
375	}
376
377	/* Vertical transform */
378	for (i=0; i<uiTrSize; i++)
379	{
380	for (j=0; j<uiTrSize; j++)
381	{
382	iSum = 0;
383	for (k=0; k<uiTrSize; k++)
384	{
385	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
386	}
387	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
388	}
389	}
390	}
391
392	#else //MATRIX_MULT
393
394	/** 4x4 forward transform implemented using partial butterfly structure (1D)
395	* \param src input data (residual)
396	* \param dst output data (transform coefficients)
397	* \param shift specifies right shift after 1D transform
398	*/
399
400	void partialButterfly4(Short src,Short dst,Int shift, Int line)
401	{
402	Int j;
403	Int E[2],O[2];
404	Int add = 1<<(shift-1);
405
406	for (j=0; j<line; j++)
407	{
408	/* E and O */
409	E[0] = src[0] + src[3];
410	O[0] = src[0] - src[3];
411	E[1] = src[1] + src[2];
412	O[1] = src[1] - src[2];
413
414	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
415	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
416	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
417	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
418
419	src += 4;
420	dst ++;
421	}
422	}
423
424	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
425	// give identical results
426	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
427	{
428	Int i, c[4];
429	Int rnd_factor = 1<<(shift-1);
430	for (i=0; i<4; i++)
431	{
432	// Intermediate Variables
433	c[0] = block[4i+0] + block[4i+3];
434	c[1] = block[4i+1] + block[4i+3];
435	c[2] = block[4i+0] - block[4i+1];
436	c[3] = 74* block[4*i+2];
437
438	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
439	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
440	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
441	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
442	}
443	}
444
445	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
446	{
447	Int i, c[4];
448	Int rnd_factor = 1<<(shift-1);
449	for (i=0; i<4; i++)
450	{
451	// Intermediate Variables
452	c[0] = tmp[ i] + tmp[ 8+i];
453	c[1] = tmp[8+i] + tmp[12+i];
454	c[2] = tmp[ i] - tmp[12+i];
455	c[3] = 74* tmp[4+i];
456
457	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
458	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
459	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
460	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
461	}
462	}
463
464	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
465	{
466	Int j;
467	Int E[2],O[2];
468	Int add = 1<<(shift-1);
469
470	for (j=0; j<line; j++)
471	{
472	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
473	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
474	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
475	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
476	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
477
478	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
479	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
480	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
481	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
482	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
483
484	src ++;
485	dst += 4;
486	}
487	}
488
489
490	void partialButterfly8(Short src,Short dst,Int shift, Int line)
491	{
492	Int j,k;
493	Int E[4],O[4];
494	Int EE[2],EO[2];
495	Int add = 1<<(shift-1);
496
497	for (j=0; j<line; j++)
498	{
499	/* E and O*/
500	for (k=0;k<4;k++)
501	{
502	E[k] = src[k] + src[7-k];
503	O[k] = src[k] - src[7-k];
504	}
505	/* EE and EO */
506	EE[0] = E[0] + E[3];
507	EO[0] = E[0] - E[3];
508	EE[1] = E[1] + E[2];
509	EO[1] = E[1] - E[2];
510
511	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
512	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
513	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
514	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
515
516	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
517	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
518	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
519	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
520
521	src += 8;
522	dst ++;
523	}
524	}
525
526
527	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
528	{
529	Int j,k;
530	Int E[4],O[4];
531	Int EE[2],EO[2];
532	Int add = 1<<(shift-1);
533
534	for (j=0; j<line; j++)
535	{
536	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
537	for (k=0;k<4;k++)
538	{
539	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
540	}
541
542	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
543	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
544	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
545	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
546
547	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
548	E[0] = EE[0] + EO[0];
549	E[3] = EE[0] - EO[0];
550	E[1] = EE[1] + EO[1];
551	E[2] = EE[1] - EO[1];
552	for (k=0;k<4;k++)
553	{
554	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
555	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
556	}
557	src ++;
558	dst += 8;
559	}
560	}
561
562
563	void partialButterfly16(Short src,Short dst,Int shift, Int line)
564	{
565	Int j,k;
566	Int E[8],O[8];
567	Int EE[4],EO[4];
568	Int EEE[2],EEO[2];
569	Int add = 1<<(shift-1);
570
571	for (j=0; j<line; j++)
572	{
573	/* E and O*/
574	for (k=0;k<8;k++)
575	{
576	E[k] = src[k] + src[15-k];
577	O[k] = src[k] - src[15-k];
578	}
579	/* EE and EO */
580	for (k=0;k<4;k++)
581	{
582	EE[k] = E[k] + E[7-k];
583	EO[k] = E[k] - E[7-k];
584	}
585	/* EEE and EEO */
586	EEE[0] = EE[0] + EE[3];
587	EEO[0] = EE[0] - EE[3];
588	EEE[1] = EE[1] + EE[2];
589	EEO[1] = EE[1] - EE[2];
590
591	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
592	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
593	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
594	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
595
596	for (k=2;k<16;k+=4)
597	{
598	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
599	}
600
601	for (k=1;k<16;k+=2)
602	{
603	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
604	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
605	}
606
607	src += 16;
608	dst ++;
609
610	}
611	}
612
613
614	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
615	{
616	Int j,k;
617	Int E[8],O[8];
618	Int EE[4],EO[4];
619	Int EEE[2],EEO[2];
620	Int add = 1<<(shift-1);
621
622	for (j=0; j<line; j++)
623	{
624	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
625	for (k=0;k<8;k++)
626	{
627	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
628	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
629	}
630	for (k=0;k<4;k++)
631	{
632	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
633	}
634	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
635	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
636	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
637	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
638
639	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
640	for (k=0;k<2;k++)
641	{
642	EE[k] = EEE[k] + EEO[k];
643	EE[k+2] = EEE[1-k] - EEO[1-k];
644	}
645	for (k=0;k<4;k++)
646	{
647	E[k] = EE[k] + EO[k];
648	E[k+4] = EE[3-k] - EO[3-k];
649	}
650	for (k=0;k<8;k++)
651	{
652	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
653	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
654	}
655	src ++;
656	dst += 16;
657	}
658	}
659
660
661	void partialButterfly32(Short src,Short dst,Int shift, Int line)
662	{
663	Int j,k;
664	Int E[16],O[16];
665	Int EE[8],EO[8];
666	Int EEE[4],EEO[4];
667	Int EEEE[2],EEEO[2];
668	Int add = 1<<(shift-1);
669
670	for (j=0; j<line; j++)
671	{
672	/* E and O*/
673	for (k=0;k<16;k++)
674	{
675	E[k] = src[k] + src[31-k];
676	O[k] = src[k] - src[31-k];
677	}
678	/* EE and EO */
679	for (k=0;k<8;k++)
680	{
681	EE[k] = E[k] + E[15-k];
682	EO[k] = E[k] - E[15-k];
683	}
684	/* EEE and EEO */
685	for (k=0;k<4;k++)
686	{
687	EEE[k] = EE[k] + EE[7-k];
688	EEO[k] = EE[k] - EE[7-k];
689	}
690	/* EEEE and EEEO */
691	EEEE[0] = EEE[0] + EEE[3];
692	EEEO[0] = EEE[0] - EEE[3];
693	EEEE[1] = EEE[1] + EEE[2];
694	EEEO[1] = EEE[1] - EEE[2];
695
696	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
697	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
698	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
699	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
700	for (k=4;k<32;k+=8)
701	{
702	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
703	}
704	for (k=2;k<32;k+=4)
705	{
706	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
707	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
708	}
709	for (k=1;k<32;k+=2)
710	{
711	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
712	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
713	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
714	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
715	}
716	src += 32;
717	dst ++;
718	}
719	}
720
721
722	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
723	{
724	Int j,k;
725	Int E[16],O[16];
726	Int EE[8],EO[8];
727	Int EEE[4],EEO[4];
728	Int EEEE[2],EEEO[2];
729	Int add = 1<<(shift-1);
730
731	for (j=0; j<line; j++)
732	{
733	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
734	for (k=0;k<16;k++)
735	{
736	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
737	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
738	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
739	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
740	}
741	for (k=0;k<8;k++)
742	{
743	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
744	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
745	}
746	for (k=0;k<4;k++)
747	{
748	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
749	}
750	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
751	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
752	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
753	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
754
755	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
756	EEE[0] = EEEE[0] + EEEO[0];
757	EEE[3] = EEEE[0] - EEEO[0];
758	EEE[1] = EEEE[1] + EEEO[1];
759	EEE[2] = EEEE[1] - EEEO[1];
760	for (k=0;k<4;k++)
761	{
762	EE[k] = EEE[k] + EEO[k];
763	EE[k+4] = EEE[3-k] - EEO[3-k];
764	}
765	for (k=0;k<8;k++)
766	{
767	E[k] = EE[k] + EO[k];
768	E[k+8] = EE[7-k] - EO[7-k];
769	}
770	for (k=0;k<16;k++)
771	{
772	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
773	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
774	}
775	src ++;
776	dst += 32;
777	}
778	}
779
780	/** MxN forward transform (2D)
781	* \param block input data (residual)
782	* \param coeff output data (transform coefficients)
783	* \param iWidth input data (width of transform)
784	* \param iHeight input data (height of transform)
785	*/
786	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
787	{
788	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
789	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
790
791	Short tmp[ 64 * 64 ];
792
793	if( iWidth == 4 && iHeight == 4)
794	{
795	if (uiMode != REG_DCT)
796	{
797	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
798	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
799	}
800	else
801	{
802	partialButterfly4(block, tmp, shift_1st, iHeight);
803	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
804	}
805
806	}
807	else if( iWidth == 8 && iHeight == 8)
808	{
809	partialButterfly8( block, tmp, shift_1st, iHeight );
810	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
811	}
812	else if( iWidth == 16 && iHeight == 16)
813	{
814	partialButterfly16( block, tmp, shift_1st, iHeight );
815	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
816	}
817	else if( iWidth == 32 && iHeight == 32)
818	{
819	partialButterfly32( block, tmp, shift_1st, iHeight );
820	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
821	}
822	}
823	/** MxN inverse transform (2D)
824	* \param coeff input data (transform coefficients)
825	* \param block output data (residual)
826	* \param iWidth input data (width of transform)
827	* \param iHeight input data (height of transform)
828	*/
829	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
830	{
831	Int shift_1st = SHIFT_INV_1ST;
832	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
833
834	Short tmp[ 64*64];
835	if( iWidth == 4 && iHeight == 4)
836	{
837	if (uiMode != REG_DCT)
838	{
839	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
840	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
841	}
842	else
843	{
844	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
845	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
846	}
847	}
848	else if( iWidth == 8 && iHeight == 8)
849	{
850	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
851	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
852	}
853	else if( iWidth == 16 && iHeight == 16)
854	{
855	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
856	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
857	}
858	else if( iWidth == 32 && iHeight == 32)
859	{
860	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
861	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
862	}
863	}
864
865	#endif //MATRIX_MULT
866
867	// To minimize the distortion only. No rate is considered.
868	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
869	{
870	Int lastCG = -1;
871	Int absSum = 0 ;
872	Int n ;
873
874	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
875	{
876	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
877	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
878	absSum = 0 ;
879
880	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
881	{
882	if( pQCoef[ scan[ n + subPos ]] )
883	{
884	lastNZPosInCG = n;
885	break;
886	}
887	}
888
889	for(n = 0; n <SCAN_SET_SIZE; n++ )
890	{
891	if( pQCoef[ scan[ n + subPos ]] )
892	{
893	firstNZPosInCG = n;
894	break;
895	}
896	}
897
898	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
899	{
900	absSum += pQCoef[ scan[ n + subPos ]];
901	}
902
903	if(lastNZPosInCG>=0 && lastCG==-1)
904	{
905	lastCG = 1 ;
906	}
907
908	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
909	{
910	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
911	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
912	{
913	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
914
915	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
916	{
917	UInt blkPos = scan[ n+subPos ];
918	if(pQCoef[ blkPos ] != 0 )
919	{
920	if(deltaU[blkPos]>0)
921	{
922	curCost = - deltaU[blkPos];
923	curChange=1 ;
924	}
925	else
926	{
927	//curChange =-1;
928	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
929	{
930	curCost=MAX_INT ;
931	}
932	else
933	{
934	curCost = deltaU[blkPos];
935	curChange =-1;
936	}
937	}
938	}
939	else
940	{
941	if(n<firstNZPosInCG)
942	{
943	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
944	if(thisSignBit != signbit )
945	{
946	curCost = MAX_INT;
947	}
948	else
949	{
950	curCost = - (deltaU[blkPos]) ;
951	curChange = 1 ;
952	}
953	}
954	else
955	{
956	curCost = - (deltaU[blkPos]) ;
957	curChange = 1 ;
958	}
959	}
960
961	if( curCost<minCostInc)
962	{
963	minCostInc = curCost ;
964	finalChange = curChange ;
965	minPos = blkPos ;
966	}
967	} //CG loop
968
969	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
970	{
971	finalChange = -1;
972	}
973
974	if(pCoef[minPos]>=0)
975	{
976	pQCoef[minPos] += finalChange ;
977	}
978	else
979	{
980	pQCoef[minPos] -= finalChange ;
981	}
982	} // Hide
983	}
984	if(lastCG==1)
985	{
986	lastCG=0 ;
987	}
988	} // TU loop
989
990	return;
991	}
992
993	Void TComTrQuant::xQuant( TComDataCU* pcCU,
994	Int* pSrc,
995	TCoeff* pDes,
996	#if ADAPTIVE_QP_SELECTION
997	Int*& pArlDes,
998	#endif
999	Int iWidth,
1000	Int iHeight,
1001	UInt& uiAcSum,
1002	TextType eTType,
1003	UInt uiAbsPartIdx )
1004	{
1005	Int* piCoef = pSrc;
1006	TCoeff* piQCoef = pDes;
1007	#if ADAPTIVE_QP_SELECTION
1008	Int* piArlCCoef = pArlDes;
1009	#endif
1010	Int iAdd = 0;
1011
1012	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
1013	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
1014	{
1015	#if ADAPTIVE_QP_SELECTION
1016	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1017	#else
1018	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1019	#endif
1020	}
1021	else
1022	{
1023	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1024
1025	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1026	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1027
1028	Int deltaU[32*32] ;
1029
1030	#if ADAPTIVE_QP_SELECTION
1031	QpParam cQpBase;
1032	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1033
1034	Int qpScaled;
1035	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1036
1037	if(eTType == TEXT_LUMA)
1038	{
1039	qpScaled = iQpBase + qpBDOffset;
1040	}
1041	else
1042	{
1043	Int chromaQPOffset;
1044	if(eTType == TEXT_CHROMA_U)
1045	{
1046	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
1047	}
1048	else
1049	{
1050	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
1051	}
1052	iQpBase = iQpBase + chromaQPOffset;
1053
1054	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1055
1056	if(qpScaled < 0)
1057	{
1058	qpScaled = qpScaled + qpBDOffset;
1059	}
1060	else
1061	{
1062	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1063	}
1064	}
1065	cQpBase.setQpParam(qpScaled);
1066	#endif
1067
1068	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1069	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1070	assert(scalingListType < 6);
1071	Int *piQuantCoeff = 0;
1072	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1073
1074	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1075	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1076
1077	#if ADAPTIVE_QP_SELECTION
1078	Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1079	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1080	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1081	Int iAddC = 1 << (iQBitsC-1);
1082	#else
1083	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1084	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1085	#endif
1086
1087	Int qBits8 = iQBits-8;
1088	for( Int n = 0; n < iWidth*iHeight; n++ )
1089	{
1090	Int iLevel;
1091	Int iSign;
1092	UInt uiBlockPos = n;
1093	iLevel = piCoef[uiBlockPos];
1094	iSign = (iLevel < 0 ? -1: 1);
1095
1096	#if ADAPTIVE_QP_SELECTION
1097	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1098	if( m_bUseAdaptQpSelect )
1099	{
1100	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1101	}
1102	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1103	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1104	#else
1105	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1106	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1107	#endif
1108	uiAcSum += iLevel;
1109	iLevel *= iSign;
1110	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1111	} // for n
1112	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1113	{
1114	if(uiAcSum>=2)
1115	{
1116	signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1117	}
1118	}
1119	} //if RDOQ
1120	//return;
1121
1122	}
1123
1124	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1125	{
1126
1127	const TCoeff* piQCoef = pSrc;
1128	Int* piCoef = pDes;
1129
1130	if ( iWidth > (Int)m_uiMaxTrSize )
1131	{
1132	iWidth = m_uiMaxTrSize;
1133	iHeight = m_uiMaxTrSize;
1134	}
1135
1136	Int iShift,iAdd,iCoeffQ;
1137	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1138
1139	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1140
1141	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1142
1143	TCoeff clipQCoef;
1144
1145	if(getUseScalingList())
1146	{
1147	iShift += 4;
1148	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1149
1150	if(iShift > m_cQP.m_iPer)
1151	{
1152	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1153
1154	for( Int n = 0; n < iWidth*iHeight; n++ )
1155	{
1156	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1157	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1158	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1159	}
1160	}
1161	else
1162	{
1163	for( Int n = 0; n < iWidth*iHeight; n++ )
1164	{
1165	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1166	iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation
1167	piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) );
1168	}
1169	}
1170	}
1171	else
1172	{
1173	iAdd = 1 << (iShift-1);
1174	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1175
1176	for( Int n = 0; n < iWidth*iHeight; n++ )
1177	{
1178	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1179	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1180	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1181	}
1182	}
1183	}
1184
1185	Void TComTrQuant::init( UInt uiMaxTrSize,
1186	Bool bUseRDOQ,
1187	Bool bUseRDOQTS,
1188	Bool bEnc, Bool useTransformSkipFast
1189	#if ADAPTIVE_QP_SELECTION
1190	, Bool bUseAdaptQpSelect
1191	#endif
1192	)
1193	{
1194	m_uiMaxTrSize = uiMaxTrSize;
1195	m_bEnc = bEnc;
1196	m_useRDOQ = bUseRDOQ;
1197	m_useRDOQTS = bUseRDOQTS;
1198	#if ADAPTIVE_QP_SELECTION
1199	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1200	#endif
1201	m_useTransformSkipFast = useTransformSkipFast;
1202	}
1203
1204	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1205	Pel* pcResidual,
1206	UInt uiStride,
1207	TCoeff* rpcCoeff,
1208	#if ADAPTIVE_QP_SELECTION
1209	Int*& rpcArlCoeff,
1210	#endif
1211	UInt uiWidth,
1212	UInt uiHeight,
1213	UInt& uiAbsSum,
1214	TextType eTType,
1215	UInt uiAbsPartIdx,
1216	Bool useTransformSkip
1217	)
1218	{
1219	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1220	{
1221	uiAbsSum=0;
1222	for (UInt k = 0; k<uiHeight; k++)
1223	{
1224	for (UInt j = 0; j<uiWidth; j++)
1225	{
1226	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1227	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1228	}
1229	}
1230	return;
1231	}
1232	UInt uiMode; //luma intra pred
1233	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1234	{
1235	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1236	}
1237	#if INTRA_BL_DST4x4
1238	else if(eTType == TEXT_LUMA && pcCU->isIntraBL(uiAbsPartIdx) )
1239	{
1240	uiMode = DC_IDX; //Using DST
1241	}
1242	#endif
1243	else
1244	{
1245	uiMode = REG_DCT;
1246	}
1247
1248	uiAbsSum = 0;
1249	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1250	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1251	if(useTransformSkip)
1252	{
1253	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1254	}
1255	else
1256	{
1257	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1258	}
1259	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1260	#if ADAPTIVE_QP_SELECTION
1261	rpcArlCoeff,
1262	#endif
1263	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1264	}
1265
1266	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1267	{
1268	if(transQuantBypass)
1269	{
1270	for (UInt k = 0; k<uiHeight; k++)
1271	{
1272	for (UInt j = 0; j<uiWidth; j++)
1273	{
1274	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1275	}
1276	}
1277	return;
1278	}
1279	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1280	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1281	if(useTransformSkip == true)
1282	{
1283	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1284	}
1285	else
1286	{
1287	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1288	}
1289	}
1290
1291	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1292	{
1293	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1294	{
1295	return;
1296	}
1297	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1298
1299	if( uiTrMode == stopTrMode )
1300	{
1301	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1302	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1303	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1304	{
1305	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1306	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1307	{
1308	return;
1309	}
1310	uiWidth <<= 1;
1311	uiHeight <<= 1;
1312	}
1313	Pel* pResi = rpcResidual + uiAddr;
1314	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1315	assert(scalingListType < 6);
1316	#if NO_RESIDUAL_FLAG_FOR_BLPRED
1317	if(pcCU->isIntraBL(uiAbsPartIdx) && eTxt == TEXT_LUMA)
1318	{
1319	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, DC_IDX, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1320	}
1321	else
1322	{
1323	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1324	}
1325	#else
1326	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1327	#endif
1328	}
1329	else
1330	{
1331	uiTrMode++;
1332	uiWidth >>= 1;
1333	uiHeight >>= 1;
1334	Int trWidth = uiWidth, trHeight = uiHeight;
1335	UInt uiAddrOffset = trHeight * uiStride;
1336	UInt uiCoefOffset = trWidth * trHeight;
1337	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1338	{
1339	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1340	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1341	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1342	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1343	}
1344	}
1345	}
1346
1347	// ------------------------------------------------------------------------------------------------
1348	// Logical transform
1349	// ------------------------------------------------------------------------------------------------
1350
1351	/** Wrapper function between HM interface and core NxN forward transform (2D)
1352	* \param piBlkResi input data (residual)
1353	* \param psCoeff output data (transform coefficients)
1354	* \param uiStride stride of input residual data
1355	* \param iSize transform size (iSize x iSize)
1356	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1357	*/
1358	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1359	{
1360	#if MATRIX_MULT
1361	Int iSize = iWidth;
1362	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1363	#else
1364	Int j;
1365	Short block[ 32 * 32 ];
1366	Short coeff[ 32 * 32 ];
1367	for (j = 0; j < iHeight; j++)
1368	{
1369	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
1370	}
1371	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
1372	for ( j = 0; j < iHeight * iWidth; j++ )
1373	{
1374	psCoeff[ j ] = coeff[ j ];
1375	}
1376	#endif
1377	}
1378
1379
1380	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1381	* \param plCoef input data (transform coefficients)
1382	* \param pResidual output data (residual)
1383	* \param uiStride stride of input residual data
1384	* \param iSize transform size (iSize x iSize)
1385	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1386	*/
1387	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1388	{
1389	#if MATRIX_MULT
1390	Int iSize = iWidth;
1391	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1392	#else
1393	Int j;
1394	{
1395	Short block[ 32 * 32 ];
1396	Short coeff[ 32 * 32 ];
1397	for ( j = 0; j < iHeight * iWidth; j++ )
1398	{
1399	coeff[j] = (Short)plCoef[j];
1400	}
1401	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
1402	{
1403	for ( j = 0; j < iHeight; j++ )
1404	{
1405	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
1406	}
1407	}
1408	return ;
1409	}
1410	#endif
1411	}
1412
1413	/** Wrapper function between HM interface and core 4x4 transform skipping
1414	* \param piBlkResi input data (residual)
1415	* \param psCoeff output data (transform coefficients)
1416	* \param uiStride stride of input residual data
1417	* \param iSize transform size (iSize x iSize)
1418	*/
1419	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1420	{
1421	assert( width == height );
1422	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1423	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1424	UInt transformSkipShift;
1425	Int j,k;
1426	if(shift >= 0)
1427	{
1428	transformSkipShift = shift;
1429	for (j = 0; j < height; j++)
1430	{
1431	for(k = 0; k < width; k ++)
1432	{
1433	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1434	}
1435	}
1436	}
1437	else
1438	{
1439	//The case when uiBitDepth > 13
1440	Int offset;
1441	transformSkipShift = -shift;
1442	offset = (1 << (transformSkipShift - 1));
1443	for (j = 0; j < height; j++)
1444	{
1445	for(k = 0; k < width; k ++)
1446	{
1447	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1448	}
1449	}
1450	}
1451	}
1452
1453	/** Wrapper function between HM interface and core NxN transform skipping
1454	* \param plCoef input data (coefficients)
1455	* \param pResidual output data (residual)
1456	* \param uiStride stride of input residual data
1457	* \param iSize transform size (iSize x iSize)
1458	*/
1459	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1460	{
1461	assert( width == height );
1462	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1463	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1464	UInt transformSkipShift;
1465	Int j,k;
1466	if(shift > 0)
1467	{
1468	Int offset;
1469	transformSkipShift = shift;
1470	offset = (1 << (transformSkipShift -1));
1471	for ( j = 0; j < height; j++ )
1472	{
1473	for(k = 0; k < width; k ++)
1474	{
1475	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1476	}
1477	}
1478	}
1479	else
1480	{
1481	//The case when uiBitDepth >= 13
1482	transformSkipShift = - shift;
1483	for ( j = 0; j < height; j++ )
1484	{
1485	for(k = 0; k < width; k ++)
1486	{
1487	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1488	}
1489	}
1490	}
1491	}
1492
1493	/** RDOQ with CABAC
1494	* \param pcCU pointer to coding unit structure
1495	* \param plSrcCoeff pointer to input buffer
1496	* \param piDstCoeff reference to pointer to output buffer
1497	* \param uiWidth block width
1498	* \param uiHeight block height
1499	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1500	* \param eTType plane type / luminance or chrominance
1501	* \param uiAbsPartIdx absolute partition index
1502	* \returns Void
1503	* Rate distortion optimized quantization for entropy
1504	* coding engines using probability models like CABAC
1505	*/
1506	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1507	Int* plSrcCoeff,
1508	TCoeff* piDstCoeff,
1509	#if ADAPTIVE_QP_SELECTION
1510	Int*& piArlDstCoeff,
1511	#endif
1512	UInt uiWidth,
1513	UInt uiHeight,
1514	UInt& uiAbsSum,
1515	TextType eTType,
1516	UInt uiAbsPartIdx )
1517	{
1518	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1519
1520	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1521	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1522	UInt uiGoRiceParam = 0;
1523	Double d64BlockUncodedCost = 0;
1524	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1525	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1526	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1527	assert(scalingListType < 6);
1528
1529	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1530	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
1531	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1532	Int *piQCoef = piQCoefOrg;
1533	Double *pdErrScale = pdErrScaleOrg;
1534	#if ADAPTIVE_QP_SELECTION
1535	Int iQBitsC = iQBits - ARL_C_PRECISION;
1536	Int iAddC = 1 << (iQBitsC-1);
1537	#endif
1538	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1539
1540	#if ADAPTIVE_QP_SELECTION
1541	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1542	#endif
1543
1544	Double pdCostCoeff [ 32 * 32 ];
1545	Double pdCostSig [ 32 * 32 ];
1546	Double pdCostCoeff0[ 32 * 32 ];
1547	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1548	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1549	Int rateIncUp [ 32 * 32 ];
1550	Int rateIncDown [ 32 * 32 ];
1551	Int sigRateDelta[ 32 * 32 ];
1552	Int deltaU [ 32 * 32 ];
1553	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1554	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1555	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1556	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1557
1558	const UInt * scanCG;
1559	{
1560	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1561	if( uiLog2BlkSize == 3 )
1562	{
1563	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1564	}
1565	else if( uiLog2BlkSize == 5 )
1566	{
1567	scanCG = g_sigLastScanCG32x32;
1568	}
1569	}
1570	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1571	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1572	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1573	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1574	Int iCGLastScanPos = -1;
1575
1576	UInt uiCtxSet = 0;
1577	Int c1 = 1;
1578	Int c2 = 0;
1579	Double d64BaseCost = 0;
1580	Int iLastScanPos = -1;
1581
1582	UInt c1Idx = 0;
1583	UInt c2Idx = 0;
1584	Int baseLevel;
1585
1586	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1587
1588	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1589	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1590
1591	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1592	Int iScanPos;
1593	coeffGroupRDStats rdStats;
1594
1595	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1596	{
1597	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1598	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1599	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1600	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1601
1602	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1603	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1604	{
1605	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1606	//===== quantization =====
1607	UInt uiBlkPos = scan[iScanPos];
1608	// set coeff
1609	Int uiQ = piQCoef[uiBlkPos];
1610	Double dTemp = pdErrScale[uiBlkPos];
1611	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1612	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1613	#if ADAPTIVE_QP_SELECTION
1614	if( m_bUseAdaptQpSelect )
1615	{
1616	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1617	}
1618	#endif
1619	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1620
1621	Double dErr = Double( lLevelDouble );
1622	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1623	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1624	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1625
1626	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1627	{
1628	iLastScanPos = iScanPos;
1629	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1630	iCGLastScanPos = iCGScanPos;
1631	}
1632
1633	if ( iLastScanPos >= 0 )
1634	{
1635	//===== coefficient level estimation =====
1636	UInt uiLevel;
1637	UInt uiOneCtx = 4 * uiCtxSet + c1;
1638	UInt uiAbsCtx = uiCtxSet + c2;
1639
1640	if( iScanPos == iLastScanPos )
1641	{
1642	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1643	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1644	c1Idx, c2Idx, iQBits, dTemp, 1 );
1645	}
1646	else
1647	{
1648	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1649	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1650	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
1651	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1652	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1653	c1Idx, c2Idx, iQBits, dTemp, 0 );
1654	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1655	}
1656	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1657	if( uiLevel > 0 )
1658	{
1659	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1660	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1661	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1662	}
1663	else // uiLevel == 0
1664	{
1665	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1666	}
1667	piDstCoeff[ uiBlkPos ] = uiLevel;
1668	d64BaseCost += pdCostCoeff [ iScanPos ];
1669
1670
1671	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1672	if( uiLevel >= baseLevel )
1673	{
1674	if(uiLevel > 3*(1<<uiGoRiceParam))
1675	{
1676	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1677	}
1678	}
1679	if ( uiLevel >= 1)
1680	{
1681	c1Idx ++;
1682	}
1683
1684	//===== update bin model =====
1685	if( uiLevel > 1 )
1686	{
1687	c1 = 0;
1688	c2 += (c2 < 2);
1689	c2Idx ++;
1690	}
1691	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1692	{
1693	c1++;
1694	}
1695
1696	//===== context set update =====
1697	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1698	{
1699	c2 = 0;
1700	uiGoRiceParam = 0;
1701
1702	c1Idx = 0;
1703	c2Idx = 0;
1704	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1705	if( c1 == 0 )
1706	{
1707	uiCtxSet++;
1708	}
1709	c1 = 1;
1710	}
1711	}
1712	else
1713	{
1714	d64BaseCost += pdCostCoeff0[ iScanPos ];
1715	}
1716	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1717	if (iScanPosinCG == 0 )
1718	{
1719	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1720	}
1721	if (piDstCoeff[ uiBlkPos ] )
1722	{
1723	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1724	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1725	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1726	if ( iScanPosinCG != 0 )
1727	{
1728	rdStats.iNNZbeforePos0++;
1729	}
1730	}
1731	} //end for (iScanPosinCG)
1732
1733	if (iCGLastScanPos >= 0)
1734	{
1735	if( iCGScanPos )
1736	{
1737	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1738	{
1739	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1740	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1741	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1742	}
1743	else
1744	{
1745	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1746	{
1747	if ( rdStats.iNNZbeforePos0 == 0 )
1748	{
1749	d64BaseCost -= rdStats.d64SigCost_0;
1750	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1751	}
1752	// rd-cost if SigCoeffGroupFlag = 0, initialization
1753	Double d64CostZeroCG = d64BaseCost;
1754
1755	// add SigCoeffGroupFlag cost to total cost
1756	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1757	if (iCGScanPos < iCGLastScanPos)
1758	{
1759	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1760	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1761	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1762	}
1763
1764	// try to convert the current coeff group from non-zero to all-zero
1765	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1766	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1767	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1768
1769	// if we can save cost, change this block to all-zero block
1770	if ( d64CostZeroCG < d64BaseCost )
1771	{
1772	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1773	d64BaseCost = d64CostZeroCG;
1774	if (iCGScanPos < iCGLastScanPos)
1775	{
1776	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1777	}
1778	// reset coeffs to 0 in this block
1779	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1780	{
1781	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1782	UInt uiBlkPos = scan[ iScanPos ];
1783
1784	if (piDstCoeff[ uiBlkPos ])
1785	{
1786	piDstCoeff [ uiBlkPos ] = 0;
1787	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1788	pdCostSig [ iScanPos ] = 0;
1789	}
1790	}
1791	} // end if ( d64CostAllZeros < d64BaseCost )
1792	}
1793	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1794	}
1795	else
1796	{
1797	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1798	}
1799	}
1800	} //end for (iCGScanPos)
1801
1802	//===== estimate last position =====
1803	if ( iLastScanPos < 0 )
1804	{
1805	return;
1806	}
1807
1808	Double d64BestCost = 0;
1809	Int ui16CtxCbf = 0;
1810	Int iBestLastIdxP1 = 0;
1811	#if NO_RESIDUAL_FLAG_FOR_BLPRED
1812	if( (!pcCU->isIntra( uiAbsPartIdx ) \|\| pcCU->isIntraBL( uiAbsPartIdx )) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1813	#else
1814	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1815	#endif
1816	{
1817	ui16CtxCbf = 0;
1818	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1819	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1820	}
1821	else
1822	{
1823	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1824	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1825	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1826	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1827	}
1828
1829	Bool bFoundLast = false;
1830	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1831	{
1832	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1833
1834	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1835	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1836	{
1837	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1838	{
1839	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1840	if (iScanPos > iLastScanPos) continue;
1841	UInt uiBlkPos = scan[iScanPos];
1842
1843	if( piDstCoeff[ uiBlkPos ] )
1844	{
1845	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1846	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1847
1848	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
1849	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1850
1851	if( totalCost < d64BestCost )
1852	{
1853	iBestLastIdxP1 = iScanPos + 1;
1854	d64BestCost = totalCost;
1855	}
1856	if( piDstCoeff[ uiBlkPos ] > 1 )
1857	{
1858	bFoundLast = true;
1859	break;
1860	}
1861	d64BaseCost -= pdCostCoeff[ iScanPos ];
1862	d64BaseCost += pdCostCoeff0[ iScanPos ];
1863	}
1864	else
1865	{
1866	d64BaseCost -= pdCostSig[ iScanPos ];
1867	}
1868	} //end for
1869	if (bFoundLast)
1870	{
1871	break;
1872	}
1873	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1874	} // end for
1875
1876	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1877	{
1878	Int blkPos = scan[ scanPos ];
1879	Int level = piDstCoeff[ blkPos ];
1880	uiAbsSum += level;
1881	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1882	}
1883
1884	//===== clean uncoded coefficients =====
1885	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1886	{
1887	piDstCoeff[ scan[ scanPos ] ] = 0;
1888	}
1889
1890	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1891	{
1892	Int64 rdFactor = (Int64) (
1893	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
1894	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
1895	+ 0.5);
1896	Int lastCG = -1;
1897	Int absSum = 0 ;
1898	Int n ;
1899
1900	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1901	{
1902	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1903	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1904	absSum = 0 ;
1905
1906	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1907	{
1908	if( piDstCoeff[ scan[ n + subPos ]] )
1909	{
1910	lastNZPosInCG = n;
1911	break;
1912	}
1913	}
1914
1915	for(n = 0; n <SCAN_SET_SIZE; n++ )
1916	{
1917	if( piDstCoeff[ scan[ n + subPos ]] )
1918	{
1919	firstNZPosInCG = n;
1920	break;
1921	}
1922	}
1923
1924	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1925	{
1926	absSum += piDstCoeff[ scan[ n + subPos ]];
1927	}
1928
1929	if(lastNZPosInCG>=0 && lastCG==-1)
1930	{
1931	lastCG = 1;
1932	}
1933
1934	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1935	{
1936	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
1937	if( signbit!=(absSum&0x1) ) // hide but need tune
1938	{
1939	// calculate the cost
1940	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
1941	Int minPos =-1, finalChange=0, curChange=0;
1942
1943	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1944	{
1945	UInt uiBlkPos = scan[ n + subPos ];
1946	if(piDstCoeff[ uiBlkPos ] != 0 )
1947	{
1948	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
1949	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1950	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
1951
1952	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1953	{
1954	costDown -= (4<<15) ;
1955	}
1956
1957	if(costUp<costDown)
1958	{
1959	curCost = costUp;
1960	curChange = 1 ;
1961	}
1962	else
1963	{
1964	curChange = -1 ;
1965	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1966	{
1967	curCost = MAX_INT64 ;
1968	}
1969	else
1970	{
1971	curCost = costDown ;
1972	}
1973	}
1974	}
1975	else
1976	{
1977	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1978	curChange = 1 ;
1979
1980	if(n<firstNZPosInCG)
1981	{
1982	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1983	if(thissignbit != signbit )
1984	{
1985	curCost = MAX_INT64;
1986	}
1987	}
1988	}
1989
1990	if( curCost<minCostInc)
1991	{
1992	minCostInc = curCost ;
1993	finalChange = curChange ;
1994	minPos = uiBlkPos ;
1995	}
1996	}
1997
1998	if(piDstCoeff[minPos] == 32767 \|\| piDstCoeff[minPos] == -32768)
1999	{
2000	finalChange = -1;
2001	}
2002
2003	if(plSrcCoeff[minPos]>=0)
2004	{
2005	piDstCoeff[minPos] += finalChange ;
2006	}
2007	else
2008	{
2009	piDstCoeff[minPos] -= finalChange ;
2010	}
2011	}
2012	}
2013
2014	if(lastCG==1)
2015	{
2016	lastCG=0 ;
2017	}
2018	}
2019	}
2020	}
2021
2022	/** Pattern decision for context derivation process of significant_coeff_flag
2023	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2024	* \param posXCG column of current coefficient group
2025	* \param posYCG row of current coefficient group
2026	* \param width width of the block
2027	* \param height height of the block
2028	* \returns pattern for current coefficient group
2029	*/
2030	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2031	{
2032	if( width == 4 && height == 4 ) return -1;
2033
2034	UInt sigRight = 0;
2035	UInt sigLower = 0;
2036
2037	width >>= 2;
2038	height >>= 2;
2039	if( posXCG < width - 1 )
2040	{
2041	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2042	}
2043	if (posYCG < height - 1 )
2044	{
2045	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2046	}
2047	return sigRight + (sigLower<<1);
2048	}
2049
2050	/** Context derivation process of coeff_abs_significant_flag
2051	* \param patternSigCtx pattern for current coefficient group
2052	* \param posX column of current scan position
2053	* \param posY row of current scan position
2054	* \param log2BlockSize log2 value of block size (square block)
2055	* \param width width of the block
2056	* \param height height of the block
2057	* \param textureType texture type (TEXT_LUMA...)
2058	* \returns ctxInc for current scan position
2059	*/
2060	Int TComTrQuant::getSigCtxInc (
2061	Int patternSigCtx,
2062	UInt scanIdx,
2063	Int posX,
2064	Int posY,
2065	Int log2BlockSize,
2066	TextType textureType
2067	)
2068	{
2069	const Int ctxIndMap[16] =
2070	{
2071	0, 1, 4, 5,
2072	2, 3, 4, 5,
2073	6, 6, 8, 8,
2074	7, 7, 8, 8
2075	};
2076
2077	if( posX + posY == 0 )
2078	{
2079	return 0;
2080	}
2081
2082	if ( log2BlockSize == 2 )
2083	{
2084	return ctxIndMap[ 4 * posY + posX ];
2085	}
2086
2087	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2088
2089	Int posXinSubset = posX-((posX>>2)<<2);
2090	Int posYinSubset = posY-((posY>>2)<<2);
2091	Int cnt = 0;
2092	if(patternSigCtx==0)
2093	{
2094	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2095	}
2096	else if(patternSigCtx==1)
2097	{
2098	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2099	}
2100	else if(patternSigCtx==2)
2101	{
2102	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2103	}
2104	else
2105	{
2106	cnt = 2;
2107	}
2108
2109	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2110	}
2111
2112	/** Get the best level in RD sense
2113	* \param rd64CodedCost reference to coded cost
2114	* \param rd64CodedCost0 reference to cost when coefficient is 0
2115	* \param rd64CodedCostSig reference to cost of significant coefficient
2116	* \param lLevelDouble reference to unscaled quantized level
2117	* \param uiMaxAbsLevel scaled quantized level
2118	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2119	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2120	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2121	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2122	* \param iQBits quantization step size
2123	* \param dTemp correction factor
2124	* \param bLast indicates if the coefficient is the last significant
2125	* \returns best quantized transform level for given scan position
2126	* This method calculates the best quantized transform level for a given scan position.
2127	*/
2128	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2129	Double& rd64CodedCost0,
2130	Double& rd64CodedCostSig,
2131	Int lLevelDouble,
2132	UInt uiMaxAbsLevel,
2133	UShort ui16CtxNumSig,
2134	UShort ui16CtxNumOne,
2135	UShort ui16CtxNumAbs,
2136	UShort ui16AbsGoRice,
2137	UInt c1Idx,
2138	UInt c2Idx,
2139	Int iQBits,
2140	Double dTemp,
2141	Bool bLast ) const
2142	{
2143	Double dCurrCostSig = 0;
2144	UInt uiBestAbsLevel = 0;
2145
2146	if( !bLast && uiMaxAbsLevel < 3 )
2147	{
2148	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2149	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2150	if( uiMaxAbsLevel == 0 )
2151	{
2152	return uiBestAbsLevel;
2153	}
2154	}
2155	else
2156	{
2157	rd64CodedCost = MAX_DOUBLE;
2158	}
2159
2160	if( !bLast )
2161	{
2162	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2163	}
2164
2165	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2166	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2167	{
2168	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2169	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2170	dCurrCost += dCurrCostSig;
2171
2172	if( dCurrCost < rd64CodedCost )
2173	{
2174	uiBestAbsLevel = uiAbsLevel;
2175	rd64CodedCost = dCurrCost;
2176	rd64CodedCostSig = dCurrCostSig;
2177	}
2178	}
2179
2180	return uiBestAbsLevel;
2181	}
2182
2183	/** Calculates the cost for specific absolute transform level
2184	* \param uiAbsLevel scaled quantized level
2185	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2186	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2187	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2188	* \returns cost of given absolute transform level
2189	*/
2190	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2191	UShort ui16CtxNumOne,
2192	UShort ui16CtxNumAbs,
2193	UShort ui16AbsGoRice
2194	, UInt c1Idx,
2195	UInt c2Idx
2196	) const
2197	{
2198	Double iRate = xGetIEPRate();
2199	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2200
2201	if ( uiAbsLevel >= baseLevel )
2202	{
2203	UInt symbol = uiAbsLevel - baseLevel;
2204	UInt length;
2205	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2206	{
2207	length = symbol>>ui16AbsGoRice;
2208	iRate += (length+1+ui16AbsGoRice)<< 15;
2209	}
2210	else
2211	{
2212	length = ui16AbsGoRice;
2213	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2214	while (symbol >= (1<<length))
2215	{
2216	symbol -= (1<<(length++));
2217	}
2218	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2219	}
2220	if (c1Idx < C1FLAG_NUMBER)
2221	{
2222	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2223
2224	if (c2Idx < C2FLAG_NUMBER)
2225	{
2226	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2227	}
2228	}
2229	}
2230	else
2231	if( uiAbsLevel == 1 )
2232	{
2233	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2234	}
2235	else if( uiAbsLevel == 2 )
2236	{
2237	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2238	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2239	}
2240	else
2241	{
2242	assert (0);
2243	}
2244	return xGetICost( iRate );
2245	}
2246
2247	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2248	UShort ui16CtxNumOne,
2249	UShort ui16CtxNumAbs,
2250	UShort ui16AbsGoRice
2251	, UInt c1Idx,
2252	UInt c2Idx
2253	) const
2254	{
2255	Int iRate = 0;
2256	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2257
2258	if ( uiAbsLevel >= baseLevel )
2259	{
2260	UInt uiSymbol = uiAbsLevel - baseLevel;
2261	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2262	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2263
2264	if( bExpGolomb )
2265	{
2266	uiAbsLevel = uiSymbol - uiMaxVlc;
2267	Int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2268	iRate += iEGS << 15;
2269	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2270	}
2271
2272	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2273	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2274
2275	iRate += ui16NumBins << 15;
2276
2277	if (c1Idx < C1FLAG_NUMBER)
2278	{
2279	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2280
2281	if (c2Idx < C2FLAG_NUMBER)
2282	{
2283	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2284	}
2285	}
2286	}
2287	else
2288	if( uiAbsLevel == 0 )
2289	{
2290	return 0;
2291	}
2292	else if( uiAbsLevel == 1 )
2293	{
2294	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2295	}
2296	else if( uiAbsLevel == 2 )
2297	{
2298	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2299	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2300	}
2301	else
2302	{
2303	assert(0);
2304	}
2305	return iRate;
2306	}
2307
2308	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2309	UShort ui16CtxNumSig ) const
2310	{
2311	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2312	}
2313
2314	/** Calculates the cost of signaling the last significant coefficient in the block
2315	* \param uiPosX X coordinate of the last significant coefficient
2316	* \param uiPosY Y coordinate of the last significant coefficient
2317	* \returns cost of last significant coefficient
2318	*/
2319	/*
2320	* \param uiWidth width of the transform unit (TU)
2321	*/
2322	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2323	const UInt uiPosY ) const
2324	{
2325	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2326	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2327	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2328	if( uiCtxX > 3 )
2329	{
2330	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2331	}
2332	if( uiCtxY > 3 )
2333	{
2334	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2335	}
2336	return xGetICost( uiCost );
2337	}
2338
2339	/** Calculates the cost for specific absolute transform level
2340	* \param uiAbsLevel scaled quantized level
2341	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2342	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2343	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2344	* \returns cost of given absolute transform level
2345	*/
2346	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2347	UShort ui16CtxNumSig ) const
2348	{
2349	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2350	}
2351
2352	/** Get the cost for a specific rate
2353	* \param dRate rate of a bit
2354	* \returns cost at the specific rate
2355	*/
2356	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2357	{
2358	return m_dLambda * dRate;
2359	}
2360
2361	/** Get the cost of an equal probable bit
2362	* \returns cost of equal probable bit
2363	*/
2364	__inline Double TComTrQuant::xGetIEPRate ( ) const
2365	{
2366	return 32768;
2367	}
2368
2369	/** Context derivation process of coeff_abs_significant_flag
2370	* \param uiSigCoeffGroupFlag significance map of L1
2371	* \param uiBlkX column of current scan position
2372	* \param uiBlkY row of current scan position
2373	* \param uiLog2BlkSize log2 value of block size
2374	* \returns ctxInc for current scan position
2375	*/
2376	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2377	const UInt uiCGPosX,
2378	const UInt uiCGPosY,
2379	Int width, Int height)
2380	{
2381	UInt uiRight = 0;
2382	UInt uiLower = 0;
2383
2384	width >>= 2;
2385	height >>= 2;
2386	if( uiCGPosX < width - 1 )
2387	{
2388	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2389	}
2390	if (uiCGPosY < height - 1 )
2391	{
2392	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2393	}
2394	return (uiRight \|\| uiLower);
2395
2396	}
2397	/** set quantized matrix coefficient for encode
2398	* \param scalingList quantaized matrix address
2399	*/
2400	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2401	{
2402	UInt size,list;
2403	UInt qp;
2404
2405	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2406	{
2407	for(list = 0; list < g_scalingListNum[size]; list++)
2408	{
2409	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2410	{
2411	xSetScalingListEnc(scalingList,list,size,qp);
2412	xSetScalingListDec(scalingList,list,size,qp);
2413	setErrScaleCoeff(list,size,qp);
2414	}
2415	}
2416	}
2417	}
2418	/** set quantized matrix coefficient for decode
2419	* \param scalingList quantaized matrix address
2420	*/
2421	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2422	{
2423	UInt size,list;
2424	UInt qp;
2425
2426	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2427	{
2428	for(list = 0; list < g_scalingListNum[size]; list++)
2429	{
2430	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2431	{
2432	xSetScalingListDec(scalingList,list,size,qp);
2433	}
2434	}
2435	}
2436	}
2437	/** set error scale coefficients
2438	* \param list List ID
2439	* \param uiSize Size
2440	* \param uiQP Quantization parameter
2441	*/
2442	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
2443	{
2444
2445	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2446	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
2447	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
2448
2449	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2450	Int *piQuantcoeff;
2451	Double *pdErrScale;
2452	piQuantcoeff = getQuantCoeff(list, qp,size);
2453	pdErrScale = getErrScaleCoeff(list, size, qp);
2454
2455	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2456	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2457	for(i=0;i<uiMaxNumCoeff;i++)
2458	{
2459	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
2460	}
2461	}
2462
2463	/** set quantized matrix coefficient for encode
2464	* \param scalingList quantaized matrix address
2465	* \param listId List index
2466	* \param sizeId size index
2467	* \param uiQP Quantization parameter
2468	*/
2469	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2470	{
2471	UInt width = g_scalingListSizeX[sizeId];
2472	UInt height = g_scalingListSizeX[sizeId];
2473	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2474	Int *quantcoeff;
2475	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2476	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2477
2478	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2479	}
2480	/** set quantized matrix coefficient for decode
2481	* \param scalingList quantaized matrix address
2482	* \param list List index
2483	* \param size size index
2484	* \param uiQP Quantization parameter
2485	*/
2486	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2487	{
2488	UInt width = g_scalingListSizeX[sizeId];
2489	UInt height = g_scalingListSizeX[sizeId];
2490	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2491	Int *dequantcoeff;
2492	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2493
2494	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2495	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2496	}
2497
2498	/** set flat matrix value to quantized coefficient
2499	*/
2500	Void TComTrQuant::setFlatScalingList()
2501	{
2502	UInt size,list;
2503	UInt qp;
2504
2505	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2506	{
2507	for(list = 0; list < g_scalingListNum[size]; list++)
2508	{
2509	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2510	{
2511	xsetFlatScalingList(list,size,qp);
2512	setErrScaleCoeff(list,size,qp);
2513	}
2514	}
2515	}
2516	}
2517
2518	/** set flat matrix value to quantized coefficient
2519	* \param list List ID
2520	* \param uiQP Quantization parameter
2521	* \param uiSize Size
2522	*/
2523	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2524	{
2525	UInt i,num = g_scalingListSize[size];
2526	Int *quantcoeff;
2527	Int *dequantcoeff;
2528	Int quantScales = g_quantScales[qp];
2529	Int invQuantScales = g_invQuantScales[qp]<<4;
2530
2531	quantcoeff = getQuantCoeff(list, qp, size);
2532	dequantcoeff = getDequantCoeff(list, qp, size);
2533
2534	for(i=0;i<num;i++)
2535	{
2536	*quantcoeff++ = quantScales;
2537	*dequantcoeff++ = invQuantScales;
2538	}
2539	}
2540
2541	/** set quantized matrix coefficient for encode
2542	* \param coeff quantaized matrix address
2543	* \param quantcoeff quantaized matrix address
2544	* \param quantScales Q(QP%6)
2545	* \param height height
2546	* \param width width
2547	* \param ratio ratio for upscale
2548	* \param sizuNum matrix size
2549	* \param dc dc parameter
2550	*/
2551	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2552	{
2553	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2554	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2555	for(UInt j=0;j<height;j++)
2556	{
2557	for(UInt i=0;i<width;i++)
2558	{
2559	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2560	}
2561	}
2562	if(ratio > 1)
2563	{
2564	quantcoeff[0] = quantScales / dc;
2565	}
2566	}
2567	/** set quantized matrix coefficient for decode
2568	* \param coeff quantaized matrix address
2569	* \param dequantcoeff quantaized matrix address
2570	* \param invQuantScales IQ(QP%6))
2571	* \param height height
2572	* \param width width
2573	* \param ratio ratio for upscale
2574	* \param sizuNum matrix size
2575	* \param dc dc parameter
2576	*/
2577	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2578	{
2579	for(UInt j=0;j<height;j++)
2580	{
2581	for(UInt i=0;i<width;i++)
2582	{
2583	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2584	}
2585	}
2586	if(ratio > 1)
2587	{
2588	dequantcoeff[0] = invQuantScales * dc;
2589	}
2590	}
2591
2592	/** initialization process of scaling list array
2593	*/
2594	Void TComTrQuant::initScalingList()
2595	{
2596	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2597	{
2598	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2599	{
2600	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2601	{
2602	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2603	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2604	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
2605	}
2606	}
2607	}
2608	// alias list [1] as [3].
2609	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2610	{
2611	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
2612	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
2613	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
2614	}
2615	}
2616	/** destroy quantization matrix array
2617	*/
2618	Void TComTrQuant::destroyScalingList()
2619	{
2620	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2621	{
2622	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2623	{
2624	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2625	{
2626	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
2627	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
2628	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
2629	}
2630	}
2631	}
2632	}
2633
2634	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-VC SHVC

Context navigation

source: SHVCSoftware/branches/SHM-2.1-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 757

Download in other formats: