Context navigation

TComTrQuant.cpp @ 1164

Visit:

Last change on this file since 1164 was 532, checked in by seregin, 11 years ago
update to HM-12.1 base
Property svn:eol-style set to `native`
File size: 82.1 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2013, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44	#define MAYBE_BUGFIX 1
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	#if MAYBE_BUGFIX
53	Void init()
54	{
55	iNNZbeforePos0 = 0;
56	d64CodedLevelandDist = 0;
57	d64UncodedDist = 0;
58	d64SigCost = 0;
59	d64SigCost_0 = 0;
60	}
61	#endif
62	} coeffGroupRDStats;
63
64	//! \ingroup TLibCommon
65	//! \{
66
67	// ====================================================================================================================
68	// Constants
69	// ====================================================================================================================
70
71	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
72
73	// ====================================================================================================================
74	// Tables
75	// ====================================================================================================================
76
77	// RDOQ parameter
78
79	// ====================================================================================================================
80	// Qp class member functions
81	// ====================================================================================================================
82
83	QpParam::QpParam()
84	{
85	}
86
87	// ====================================================================================================================
88	// TComTrQuant class member functions
89	// ====================================================================================================================
90
91	TComTrQuant::TComTrQuant()
92	{
93	m_cQP.clear();
94
95	// allocate temporary buffers
96	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
97
98	// allocate bit estimation class (for RDOQ)
99	m_pcEstBitsSbac = new estBitsSbacStruct;
100	initScalingList();
101	}
102
103	TComTrQuant::~TComTrQuant()
104	{
105	// delete temporary buffers
106	if ( m_plTempCoeff )
107	{
108	delete [] m_plTempCoeff;
109	m_plTempCoeff = NULL;
110	}
111
112	// delete bit estimation class
113	if ( m_pcEstBitsSbac )
114	{
115	delete m_pcEstBitsSbac;
116	}
117	destroyScalingList();
118	}
119
120	#if ADAPTIVE_QP_SELECTION
121	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
122	{
123	Int qpBase = pcSlice->getSliceQpBase();
124	Int sliceQpused = pcSlice->getSliceQp();
125	Int sliceQpnext;
126	Double alpha = qpBase < 17 ? 0.5 : 1;
127
128	Int cnt=0;
129	for(Int u=1; u<=LEVEL_RANGE; u++)
130	{
131	cnt += m_sliceNsamples[u] ;
132	}
133
134	if( !m_useRDOQ )
135	{
136	sliceQpused = qpBase;
137	alpha = 0.5;
138	}
139
140	if( cnt > 120 )
141	{
142	Double sum = 0;
143	Int k = 0;
144	for(Int u=1; u<LEVEL_RANGE; u++)
145	{
146	sum += u*m_sliceSumC[u];
147	k += uum_sliceNsamples[u];
148	}
149
150	Int v;
151	Double q[MAX_QP+1] ;
152	for(v=0; v<=MAX_QP; v++)
153	{
154	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
155	}
156
157	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
158
159	for(v=0; v<MAX_QP; v++)
160	{
161	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
162	{
163	break;
164	}
165	}
166	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
167	}
168	else
169	{
170	sliceQpnext = sliceQpused;
171	}
172
173	m_qpDelta[qpBase] = sliceQpnext - qpBase;
174	}
175
176	Void TComTrQuant::initSliceQpDelta()
177	{
178	for(Int qp=0; qp<=MAX_QP; qp++)
179	{
180	m_qpDelta[qp] = qp < 17 ? 0 : 1;
181	}
182	}
183
184	Void TComTrQuant::clearSliceARLCnt()
185	{
186	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
187	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
188	}
189	#endif
190
191
192	/** Set qP for Quantization.
193	* \param qpy QPy
194	* \param bLowpass
195	* \param eSliceType
196	* \param eTxtType
197	* \param qpBdOffset
198	* \param chromaQPOffset
199	*
200	* return void
201	*/
202	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
203	{
204	Int qpScaled;
205
206	if(eTxtType == TEXT_LUMA)
207	{
208	qpScaled = qpy + qpBdOffset;
209	}
210	else
211	{
212	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
213
214	if(qpScaled < 0)
215	{
216	qpScaled = qpScaled + qpBdOffset;
217	}
218	else
219	{
220	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
221	}
222	}
223	m_cQP.setQpParam( qpScaled );
224	}
225
226	#if MATRIX_MULT
227	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
228	* \param block pointer to input data (residual)
229	* \param coeff pointer to output data (transform coefficients)
230	* \param uiStride stride of input data
231	* \param uiTrSize transform size (uiTrSize x uiTrSize)
232	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
233	*/
234	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
235	{
236	Int i,j,k,iSum;
237	Int tmp[32*32];
238	const Short *iT;
239	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
240
241	if (uiTrSize==4)
242	{
243	iT = g_aiT4[0];
244	}
245	else if (uiTrSize==8)
246	{
247	iT = g_aiT8[0];
248	}
249	else if (uiTrSize==16)
250	{
251	iT = g_aiT16[0];
252	}
253	else if (uiTrSize==32)
254	{
255	iT = g_aiT32[0];
256	}
257	else
258	{
259	assert(0);
260	}
261
262	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
263	Int add_1st = 1<<(shift_1st-1);
264	Int shift_2nd = uiLog2TrSize + 6;
265	Int add_2nd = 1<<(shift_2nd-1);
266
267	/* Horizontal transform */
268
269	if (uiTrSize==4)
270	{
271	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
272	{
273	iT = g_as_DST_MAT_4[0];
274	}
275	}
276	for (i=0; i<uiTrSize; i++)
277	{
278	for (j=0; j<uiTrSize; j++)
279	{
280	iSum = 0;
281	for (k=0; k<uiTrSize; k++)
282	{
283	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
284	}
285	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
286	}
287	}
288
289	/* Vertical transform */
290	if (uiTrSize==4)
291	{
292	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
293	{
294	iT = g_as_DST_MAT_4[0];
295	}
296	else
297	{
298	iT = g_aiT4[0];
299	}
300	}
301	for (i=0; i<uiTrSize; i++)
302	{
303	for (j=0; j<uiTrSize; j++)
304	{
305	iSum = 0;
306	for (k=0; k<uiTrSize; k++)
307	{
308	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
309	}
310	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
311	}
312	}
313	}
314
315	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
316	* \param coeff pointer to input data (transform coefficients)
317	* \param block pointer to output data (residual)
318	* \param uiStride stride of output data
319	* \param uiTrSize transform size (uiTrSize x uiTrSize)
320	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
321	*/
322	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
323	{
324	Int i,j,k,iSum;
325	Int tmp[32*32];
326	const Short *iT;
327
328	if (uiTrSize==4)
329	{
330	iT = g_aiT4[0];
331	}
332	else if (uiTrSize==8)
333	{
334	iT = g_aiT8[0];
335	}
336	else if (uiTrSize==16)
337	{
338	iT = g_aiT16[0];
339	}
340	else if (uiTrSize==32)
341	{
342	iT = g_aiT32[0];
343	}
344	else
345	{
346	assert(0);
347	}
348
349	Int shift_1st = SHIFT_INV_1ST;
350	Int add_1st = 1<<(shift_1st-1);
351	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
352	Int add_2nd = 1<<(shift_2nd-1);
353	if (uiTrSize==4)
354	{
355	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
356	{
357	iT = g_as_DST_MAT_4[0];
358	}
359	}
360
361	/* Horizontal transform */
362	for (i=0; i<uiTrSize; i++)
363	{
364	for (j=0; j<uiTrSize; j++)
365	{
366	iSum = 0;
367	for (k=0; k<uiTrSize; k++)
368	{
369	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
370	}
371	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
372	}
373	}
374
375	if (uiTrSize==4)
376	{
377	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
378	{
379	iT = g_as_DST_MAT_4[0];
380	}
381	else
382	{
383	iT = g_aiT4[0];
384	}
385	}
386
387	/* Vertical transform */
388	for (i=0; i<uiTrSize; i++)
389	{
390	for (j=0; j<uiTrSize; j++)
391	{
392	iSum = 0;
393	for (k=0; k<uiTrSize; k++)
394	{
395	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
396	}
397	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
398	}
399	}
400	}
401
402	#else //MATRIX_MULT
403
404	/** 4x4 forward transform implemented using partial butterfly structure (1D)
405	* \param src input data (residual)
406	* \param dst output data (transform coefficients)
407	* \param shift specifies right shift after 1D transform
408	*/
409
410	void partialButterfly4(Short src,Short dst,Int shift, Int line)
411	{
412	Int j;
413	Int E[2],O[2];
414	Int add = 1<<(shift-1);
415
416	for (j=0; j<line; j++)
417	{
418	/* E and O */
419	E[0] = src[0] + src[3];
420	O[0] = src[0] - src[3];
421	E[1] = src[1] + src[2];
422	O[1] = src[1] - src[2];
423
424	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
425	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
426	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
427	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
428
429	src += 4;
430	dst ++;
431	}
432	}
433
434	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
435	// give identical results
436	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
437	{
438	Int i, c[4];
439	Int rnd_factor = 1<<(shift-1);
440	for (i=0; i<4; i++)
441	{
442	// Intermediate Variables
443	c[0] = block[4i+0] + block[4i+3];
444	c[1] = block[4i+1] + block[4i+3];
445	c[2] = block[4i+0] - block[4i+1];
446	c[3] = 74* block[4*i+2];
447
448	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
449	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
450	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
451	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
452	}
453	}
454
455	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
456	{
457	Int i, c[4];
458	Int rnd_factor = 1<<(shift-1);
459	for (i=0; i<4; i++)
460	{
461	// Intermediate Variables
462	c[0] = tmp[ i] + tmp[ 8+i];
463	c[1] = tmp[8+i] + tmp[12+i];
464	c[2] = tmp[ i] - tmp[12+i];
465	c[3] = 74* tmp[4+i];
466
467	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
468	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
469	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
470	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
471	}
472	}
473
474	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
475	{
476	Int j;
477	Int E[2],O[2];
478	Int add = 1<<(shift-1);
479
480	for (j=0; j<line; j++)
481	{
482	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
483	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
484	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
485	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
486	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
487
488	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
489	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
490	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
491	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
492	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
493
494	src ++;
495	dst += 4;
496	}
497	}
498
499
500	void partialButterfly8(Short src,Short dst,Int shift, Int line)
501	{
502	Int j,k;
503	Int E[4],O[4];
504	Int EE[2],EO[2];
505	Int add = 1<<(shift-1);
506
507	for (j=0; j<line; j++)
508	{
509	/* E and O*/
510	for (k=0;k<4;k++)
511	{
512	E[k] = src[k] + src[7-k];
513	O[k] = src[k] - src[7-k];
514	}
515	/* EE and EO */
516	EE[0] = E[0] + E[3];
517	EO[0] = E[0] - E[3];
518	EE[1] = E[1] + E[2];
519	EO[1] = E[1] - E[2];
520
521	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
522	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
523	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
524	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
525
526	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
527	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
528	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
529	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
530
531	src += 8;
532	dst ++;
533	}
534	}
535
536
537	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
538	{
539	Int j,k;
540	Int E[4],O[4];
541	Int EE[2],EO[2];
542	Int add = 1<<(shift-1);
543
544	for (j=0; j<line; j++)
545	{
546	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
547	for (k=0;k<4;k++)
548	{
549	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
550	}
551
552	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
553	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
554	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
555	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
556
557	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
558	E[0] = EE[0] + EO[0];
559	E[3] = EE[0] - EO[0];
560	E[1] = EE[1] + EO[1];
561	E[2] = EE[1] - EO[1];
562	for (k=0;k<4;k++)
563	{
564	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
565	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
566	}
567	src ++;
568	dst += 8;
569	}
570	}
571
572
573	void partialButterfly16(Short src,Short dst,Int shift, Int line)
574	{
575	Int j,k;
576	Int E[8],O[8];
577	Int EE[4],EO[4];
578	Int EEE[2],EEO[2];
579	Int add = 1<<(shift-1);
580
581	for (j=0; j<line; j++)
582	{
583	/* E and O*/
584	for (k=0;k<8;k++)
585	{
586	E[k] = src[k] + src[15-k];
587	O[k] = src[k] - src[15-k];
588	}
589	/* EE and EO */
590	for (k=0;k<4;k++)
591	{
592	EE[k] = E[k] + E[7-k];
593	EO[k] = E[k] - E[7-k];
594	}
595	/* EEE and EEO */
596	EEE[0] = EE[0] + EE[3];
597	EEO[0] = EE[0] - EE[3];
598	EEE[1] = EE[1] + EE[2];
599	EEO[1] = EE[1] - EE[2];
600
601	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
602	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
603	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
604	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
605
606	for (k=2;k<16;k+=4)
607	{
608	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
609	}
610
611	for (k=1;k<16;k+=2)
612	{
613	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
614	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
615	}
616
617	src += 16;
618	dst ++;
619
620	}
621	}
622
623
624	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
625	{
626	Int j,k;
627	Int E[8],O[8];
628	Int EE[4],EO[4];
629	Int EEE[2],EEO[2];
630	Int add = 1<<(shift-1);
631
632	for (j=0; j<line; j++)
633	{
634	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
635	for (k=0;k<8;k++)
636	{
637	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
638	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
639	}
640	for (k=0;k<4;k++)
641	{
642	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
643	}
644	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
645	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
646	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
647	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
648
649	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
650	for (k=0;k<2;k++)
651	{
652	EE[k] = EEE[k] + EEO[k];
653	EE[k+2] = EEE[1-k] - EEO[1-k];
654	}
655	for (k=0;k<4;k++)
656	{
657	E[k] = EE[k] + EO[k];
658	E[k+4] = EE[3-k] - EO[3-k];
659	}
660	for (k=0;k<8;k++)
661	{
662	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
663	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
664	}
665	src ++;
666	dst += 16;
667	}
668	}
669
670
671	void partialButterfly32(Short src,Short dst,Int shift, Int line)
672	{
673	Int j,k;
674	Int E[16],O[16];
675	Int EE[8],EO[8];
676	Int EEE[4],EEO[4];
677	Int EEEE[2],EEEO[2];
678	Int add = 1<<(shift-1);
679
680	for (j=0; j<line; j++)
681	{
682	/* E and O*/
683	for (k=0;k<16;k++)
684	{
685	E[k] = src[k] + src[31-k];
686	O[k] = src[k] - src[31-k];
687	}
688	/* EE and EO */
689	for (k=0;k<8;k++)
690	{
691	EE[k] = E[k] + E[15-k];
692	EO[k] = E[k] - E[15-k];
693	}
694	/* EEE and EEO */
695	for (k=0;k<4;k++)
696	{
697	EEE[k] = EE[k] + EE[7-k];
698	EEO[k] = EE[k] - EE[7-k];
699	}
700	/* EEEE and EEEO */
701	EEEE[0] = EEE[0] + EEE[3];
702	EEEO[0] = EEE[0] - EEE[3];
703	EEEE[1] = EEE[1] + EEE[2];
704	EEEO[1] = EEE[1] - EEE[2];
705
706	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
707	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
708	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
709	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
710	for (k=4;k<32;k+=8)
711	{
712	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
713	}
714	for (k=2;k<32;k+=4)
715	{
716	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
717	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
718	}
719	for (k=1;k<32;k+=2)
720	{
721	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
722	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
723	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
724	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
725	}
726	src += 32;
727	dst ++;
728	}
729	}
730
731
732	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
733	{
734	Int j,k;
735	Int E[16],O[16];
736	Int EE[8],EO[8];
737	Int EEE[4],EEO[4];
738	Int EEEE[2],EEEO[2];
739	Int add = 1<<(shift-1);
740
741	for (j=0; j<line; j++)
742	{
743	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
744	for (k=0;k<16;k++)
745	{
746	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
747	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
748	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
749	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
750	}
751	for (k=0;k<8;k++)
752	{
753	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
754	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
755	}
756	for (k=0;k<4;k++)
757	{
758	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
759	}
760	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
761	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
762	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
763	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
764
765	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
766	EEE[0] = EEEE[0] + EEEO[0];
767	EEE[3] = EEEE[0] - EEEO[0];
768	EEE[1] = EEEE[1] + EEEO[1];
769	EEE[2] = EEEE[1] - EEEO[1];
770	for (k=0;k<4;k++)
771	{
772	EE[k] = EEE[k] + EEO[k];
773	EE[k+4] = EEE[3-k] - EEO[3-k];
774	}
775	for (k=0;k<8;k++)
776	{
777	E[k] = EE[k] + EO[k];
778	E[k+8] = EE[7-k] - EO[7-k];
779	}
780	for (k=0;k<16;k++)
781	{
782	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
783	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
784	}
785	src ++;
786	dst += 32;
787	}
788	}
789
790	/** MxN forward transform (2D)
791	* \param block input data (residual)
792	* \param coeff output data (transform coefficients)
793	* \param iWidth input data (width of transform)
794	* \param iHeight input data (height of transform)
795	*/
796	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
797	{
798	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
799	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
800
801	Short tmp[ 64 * 64 ];
802
803	if( iWidth == 4 && iHeight == 4)
804	{
805	if (uiMode != REG_DCT)
806	{
807	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
808	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
809	}
810	else
811	{
812	partialButterfly4(block, tmp, shift_1st, iHeight);
813	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
814	}
815
816	}
817	else if( iWidth == 8 && iHeight == 8)
818	{
819	partialButterfly8( block, tmp, shift_1st, iHeight );
820	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
821	}
822	else if( iWidth == 16 && iHeight == 16)
823	{
824	partialButterfly16( block, tmp, shift_1st, iHeight );
825	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
826	}
827	else if( iWidth == 32 && iHeight == 32)
828	{
829	partialButterfly32( block, tmp, shift_1st, iHeight );
830	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
831	}
832	}
833	/** MxN inverse transform (2D)
834	* \param coeff input data (transform coefficients)
835	* \param block output data (residual)
836	* \param iWidth input data (width of transform)
837	* \param iHeight input data (height of transform)
838	*/
839	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
840	{
841	Int shift_1st = SHIFT_INV_1ST;
842	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
843
844	Short tmp[ 64*64];
845	if( iWidth == 4 && iHeight == 4)
846	{
847	if (uiMode != REG_DCT)
848	{
849	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
850	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
851	}
852	else
853	{
854	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
855	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
856	}
857	}
858	else if( iWidth == 8 && iHeight == 8)
859	{
860	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
861	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
862	}
863	else if( iWidth == 16 && iHeight == 16)
864	{
865	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
866	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
867	}
868	else if( iWidth == 32 && iHeight == 32)
869	{
870	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
871	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
872	}
873	}
874
875	#endif //MATRIX_MULT
876
877	// To minimize the distortion only. No rate is considered.
878	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
879	{
880	Int lastCG = -1;
881	Int absSum = 0 ;
882	Int n ;
883
884	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
885	{
886	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
887	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
888	absSum = 0 ;
889
890	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
891	{
892	if( pQCoef[ scan[ n + subPos ]] )
893	{
894	lastNZPosInCG = n;
895	break;
896	}
897	}
898
899	for(n = 0; n <SCAN_SET_SIZE; n++ )
900	{
901	if( pQCoef[ scan[ n + subPos ]] )
902	{
903	firstNZPosInCG = n;
904	break;
905	}
906	}
907
908	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
909	{
910	absSum += pQCoef[ scan[ n + subPos ]];
911	}
912
913	if(lastNZPosInCG>=0 && lastCG==-1)
914	{
915	lastCG = 1 ;
916	}
917
918	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
919	{
920	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
921	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
922	{
923	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
924
925	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
926	{
927	UInt blkPos = scan[ n+subPos ];
928	if(pQCoef[ blkPos ] != 0 )
929	{
930	if(deltaU[blkPos]>0)
931	{
932	curCost = - deltaU[blkPos];
933	curChange=1 ;
934	}
935	else
936	{
937	//curChange =-1;
938	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
939	{
940	curCost=MAX_INT ;
941	}
942	else
943	{
944	curCost = deltaU[blkPos];
945	curChange =-1;
946	}
947	}
948	}
949	else
950	{
951	if(n<firstNZPosInCG)
952	{
953	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
954	if(thisSignBit != signbit )
955	{
956	curCost = MAX_INT;
957	}
958	else
959	{
960	curCost = - (deltaU[blkPos]) ;
961	curChange = 1 ;
962	}
963	}
964	else
965	{
966	curCost = - (deltaU[blkPos]) ;
967	curChange = 1 ;
968	}
969	}
970
971	if( curCost<minCostInc)
972	{
973	minCostInc = curCost ;
974	finalChange = curChange ;
975	minPos = blkPos ;
976	}
977	} //CG loop
978
979	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
980	{
981	finalChange = -1;
982	}
983
984	if(pCoef[minPos]>=0)
985	{
986	pQCoef[minPos] += finalChange ;
987	}
988	else
989	{
990	pQCoef[minPos] -= finalChange ;
991	}
992	} // Hide
993	}
994	if(lastCG==1)
995	{
996	lastCG=0 ;
997	}
998	} // TU loop
999
1000	return;
1001	}
1002
1003	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1004	Int* pSrc,
1005	TCoeff* pDes,
1006	#if ADAPTIVE_QP_SELECTION
1007	Int*& pArlDes,
1008	#endif
1009	Int iWidth,
1010	Int iHeight,
1011	UInt& uiAcSum,
1012	TextType eTType,
1013	UInt uiAbsPartIdx )
1014	{
1015	Int* piCoef = pSrc;
1016	TCoeff* piQCoef = pDes;
1017	#if ADAPTIVE_QP_SELECTION
1018	Int* piArlCCoef = pArlDes;
1019	#endif
1020	Int iAdd = 0;
1021
1022	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
1023	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
1024	{
1025	#if ADAPTIVE_QP_SELECTION
1026	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1027	#else
1028	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1029	#endif
1030	}
1031	else
1032	{
1033	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1034
1035	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1036	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1037
1038	Int deltaU[32*32] ;
1039
1040	#if ADAPTIVE_QP_SELECTION
1041	QpParam cQpBase;
1042	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1043
1044	Int qpScaled;
1045	#if REPN_FORMAT_IN_VPS
1046	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getQpBDOffsetY() : pcCU->getSlice()->getQpBDOffsetC();
1047	#else
1048	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1049	#endif
1050
1051	if(eTType == TEXT_LUMA)
1052	{
1053	qpScaled = iQpBase + qpBDOffset;
1054	}
1055	else
1056	{
1057	Int chromaQPOffset;
1058	if(eTType == TEXT_CHROMA_U)
1059	{
1060	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
1061	}
1062	else
1063	{
1064	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
1065	}
1066	iQpBase = iQpBase + chromaQPOffset;
1067
1068	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1069
1070	if(qpScaled < 0)
1071	{
1072	qpScaled = qpScaled + qpBDOffset;
1073	}
1074	else
1075	{
1076	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1077	}
1078	}
1079	cQpBase.setQpParam(qpScaled);
1080	#endif
1081
1082	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1083	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1084	assert(scalingListType < 6);
1085	Int *piQuantCoeff = 0;
1086	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1087
1088	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1089	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1090
1091	#if ADAPTIVE_QP_SELECTION
1092	Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1093	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1094	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1095	Int iAddC = 1 << (iQBitsC-1);
1096	#else
1097	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1098	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1099	#endif
1100
1101	Int qBits8 = iQBits-8;
1102	for( Int n = 0; n < iWidth*iHeight; n++ )
1103	{
1104	Int iLevel;
1105	Int iSign;
1106	UInt uiBlockPos = n;
1107	iLevel = piCoef[uiBlockPos];
1108	iSign = (iLevel < 0 ? -1: 1);
1109
1110	#if ADAPTIVE_QP_SELECTION
1111	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1112	if( m_bUseAdaptQpSelect )
1113	{
1114	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1115	}
1116	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1117	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1118	#else
1119	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1120	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1121	#endif
1122	uiAcSum += iLevel;
1123	iLevel *= iSign;
1124	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1125	} // for n
1126	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1127	{
1128	if(uiAcSum>=2)
1129	{
1130	signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1131	}
1132	}
1133	} //if RDOQ
1134	//return;
1135
1136	}
1137
1138	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1139	{
1140
1141	const TCoeff* piQCoef = pSrc;
1142	Int* piCoef = pDes;
1143
1144	if ( iWidth > (Int)m_uiMaxTrSize )
1145	{
1146	iWidth = m_uiMaxTrSize;
1147	iHeight = m_uiMaxTrSize;
1148	}
1149
1150	Int iShift,iAdd,iCoeffQ;
1151	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1152
1153	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1154
1155	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1156
1157	TCoeff clipQCoef;
1158
1159	if(getUseScalingList())
1160	{
1161	iShift += 4;
1162	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1163
1164	if(iShift > m_cQP.m_iPer)
1165	{
1166	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1167
1168	for( Int n = 0; n < iWidth*iHeight; n++ )
1169	{
1170	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1171	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1172	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1173	}
1174	}
1175	else
1176	{
1177	for( Int n = 0; n < iWidth*iHeight; n++ )
1178	{
1179	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1180	iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation
1181	piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) );
1182	}
1183	}
1184	}
1185	else
1186	{
1187	iAdd = 1 << (iShift-1);
1188	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1189
1190	for( Int n = 0; n < iWidth*iHeight; n++ )
1191	{
1192	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1193	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1194	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1195	}
1196	}
1197	}
1198
1199	Void TComTrQuant::init( UInt uiMaxTrSize,
1200	Bool bUseRDOQ,
1201	Bool bUseRDOQTS,
1202	Bool bEnc, Bool useTransformSkipFast
1203	#if ADAPTIVE_QP_SELECTION
1204	, Bool bUseAdaptQpSelect
1205	#endif
1206	)
1207	{
1208	m_uiMaxTrSize = uiMaxTrSize;
1209	m_bEnc = bEnc;
1210	m_useRDOQ = bUseRDOQ;
1211	m_useRDOQTS = bUseRDOQTS;
1212	#if ADAPTIVE_QP_SELECTION
1213	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1214	#endif
1215	m_useTransformSkipFast = useTransformSkipFast;
1216	}
1217
1218	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1219	Pel* pcResidual,
1220	UInt uiStride,
1221	TCoeff* rpcCoeff,
1222	#if ADAPTIVE_QP_SELECTION
1223	Int*& rpcArlCoeff,
1224	#endif
1225	UInt uiWidth,
1226	UInt uiHeight,
1227	UInt& uiAbsSum,
1228	TextType eTType,
1229	UInt uiAbsPartIdx,
1230	Bool useTransformSkip
1231	)
1232	{
1233	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1234	{
1235	uiAbsSum=0;
1236	for (UInt k = 0; k<uiHeight; k++)
1237	{
1238	for (UInt j = 0; j<uiWidth; j++)
1239	{
1240	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1241	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1242	}
1243	}
1244	return;
1245	}
1246	UInt uiMode; //luma intra pred
1247	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1248	{
1249	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1250	}
1251	else
1252	{
1253	uiMode = REG_DCT;
1254	}
1255
1256	uiAbsSum = 0;
1257	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1258	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1259	if(useTransformSkip)
1260	{
1261	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1262	}
1263	else
1264	{
1265	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1266	}
1267	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1268	#if ADAPTIVE_QP_SELECTION
1269	rpcArlCoeff,
1270	#endif
1271	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1272	}
1273
1274	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1275	{
1276	if(transQuantBypass)
1277	{
1278	for (UInt k = 0; k<uiHeight; k++)
1279	{
1280	for (UInt j = 0; j<uiWidth; j++)
1281	{
1282	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1283	}
1284	}
1285	return;
1286	}
1287	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1288	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1289	if(useTransformSkip == true)
1290	{
1291	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1292	}
1293	else
1294	{
1295	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1296	}
1297	}
1298
1299	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1300	{
1301	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1302	{
1303	return;
1304	}
1305	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1306
1307	if( uiTrMode == stopTrMode )
1308	{
1309	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1310	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1311	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1312	{
1313	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1314	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1315	{
1316	return;
1317	}
1318	uiWidth <<= 1;
1319	uiHeight <<= 1;
1320	}
1321	Pel* pResi = rpcResidual + uiAddr;
1322	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1323	assert(scalingListType < 6);
1324	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1325	}
1326	else
1327	{
1328	uiTrMode++;
1329	uiWidth >>= 1;
1330	uiHeight >>= 1;
1331	Int trWidth = uiWidth, trHeight = uiHeight;
1332	UInt uiAddrOffset = trHeight * uiStride;
1333	UInt uiCoefOffset = trWidth * trHeight;
1334	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1335	{
1336	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1337	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1338	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1339	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1340	}
1341	}
1342	}
1343
1344	// ------------------------------------------------------------------------------------------------
1345	// Logical transform
1346	// ------------------------------------------------------------------------------------------------
1347
1348	/** Wrapper function between HM interface and core NxN forward transform (2D)
1349	* \param piBlkResi input data (residual)
1350	* \param psCoeff output data (transform coefficients)
1351	* \param uiStride stride of input residual data
1352	* \param iSize transform size (iSize x iSize)
1353	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1354	*/
1355	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1356	{
1357	#if MATRIX_MULT
1358	Int iSize = iWidth;
1359	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1360	#else
1361	Int j;
1362	Short block[ 32 * 32 ];
1363	Short coeff[ 32 * 32 ];
1364	for (j = 0; j < iHeight; j++)
1365	{
1366	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
1367	}
1368	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
1369	for ( j = 0; j < iHeight * iWidth; j++ )
1370	{
1371	psCoeff[ j ] = coeff[ j ];
1372	}
1373	#endif
1374	}
1375
1376
1377	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1378	* \param plCoef input data (transform coefficients)
1379	* \param pResidual output data (residual)
1380	* \param uiStride stride of input residual data
1381	* \param iSize transform size (iSize x iSize)
1382	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1383	*/
1384	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1385	{
1386	#if MATRIX_MULT
1387	Int iSize = iWidth;
1388	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1389	#else
1390	Int j;
1391	{
1392	Short block[ 32 * 32 ];
1393	Short coeff[ 32 * 32 ];
1394	for ( j = 0; j < iHeight * iWidth; j++ )
1395	{
1396	coeff[j] = (Short)plCoef[j];
1397	}
1398	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
1399	{
1400	for ( j = 0; j < iHeight; j++ )
1401	{
1402	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
1403	}
1404	}
1405	return ;
1406	}
1407	#endif
1408	}
1409
1410	/** Wrapper function between HM interface and core 4x4 transform skipping
1411	* \param piBlkResi input data (residual)
1412	* \param psCoeff output data (transform coefficients)
1413	* \param uiStride stride of input residual data
1414	* \param iSize transform size (iSize x iSize)
1415	*/
1416	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1417	{
1418	assert( width == height );
1419	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1420	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1421	UInt transformSkipShift;
1422	Int j,k;
1423	if(shift >= 0)
1424	{
1425	transformSkipShift = shift;
1426	for (j = 0; j < height; j++)
1427	{
1428	for(k = 0; k < width; k ++)
1429	{
1430	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1431	}
1432	}
1433	}
1434	else
1435	{
1436	//The case when uiBitDepth > 13
1437	Int offset;
1438	transformSkipShift = -shift;
1439	offset = (1 << (transformSkipShift - 1));
1440	for (j = 0; j < height; j++)
1441	{
1442	for(k = 0; k < width; k ++)
1443	{
1444	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1445	}
1446	}
1447	}
1448	}
1449
1450	/** Wrapper function between HM interface and core NxN transform skipping
1451	* \param plCoef input data (coefficients)
1452	* \param pResidual output data (residual)
1453	* \param uiStride stride of input residual data
1454	* \param iSize transform size (iSize x iSize)
1455	*/
1456	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1457	{
1458	assert( width == height );
1459	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1460	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1461	UInt transformSkipShift;
1462	Int j,k;
1463	if(shift > 0)
1464	{
1465	Int offset;
1466	transformSkipShift = shift;
1467	offset = (1 << (transformSkipShift -1));
1468	for ( j = 0; j < height; j++ )
1469	{
1470	for(k = 0; k < width; k ++)
1471	{
1472	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1473	}
1474	}
1475	}
1476	else
1477	{
1478	//The case when uiBitDepth >= 13
1479	transformSkipShift = - shift;
1480	for ( j = 0; j < height; j++ )
1481	{
1482	for(k = 0; k < width; k ++)
1483	{
1484	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1485	}
1486	}
1487	}
1488	}
1489
1490	/** RDOQ with CABAC
1491	* \param pcCU pointer to coding unit structure
1492	* \param plSrcCoeff pointer to input buffer
1493	* \param piDstCoeff reference to pointer to output buffer
1494	* \param uiWidth block width
1495	* \param uiHeight block height
1496	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1497	* \param eTType plane type / luminance or chrominance
1498	* \param uiAbsPartIdx absolute partition index
1499	* \returns Void
1500	* Rate distortion optimized quantization for entropy
1501	* coding engines using probability models like CABAC
1502	*/
1503	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1504	Int* plSrcCoeff,
1505	TCoeff* piDstCoeff,
1506	#if ADAPTIVE_QP_SELECTION
1507	Int*& piArlDstCoeff,
1508	#endif
1509	UInt uiWidth,
1510	UInt uiHeight,
1511	UInt& uiAbsSum,
1512	TextType eTType,
1513	UInt uiAbsPartIdx )
1514	{
1515	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1516
1517	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1518	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1519	UInt uiGoRiceParam = 0;
1520	Double d64BlockUncodedCost = 0;
1521	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1522	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1523	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1524	assert(scalingListType < 6);
1525
1526	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1527	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
1528	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1529	Int *piQCoef = piQCoefOrg;
1530	Double *pdErrScale = pdErrScaleOrg;
1531	#if ADAPTIVE_QP_SELECTION
1532	Int iQBitsC = iQBits - ARL_C_PRECISION;
1533	Int iAddC = 1 << (iQBitsC-1);
1534	#endif
1535	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1536
1537	#if ADAPTIVE_QP_SELECTION
1538	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1539	#endif
1540
1541	Double pdCostCoeff [ 32 * 32 ];
1542	Double pdCostSig [ 32 * 32 ];
1543	Double pdCostCoeff0[ 32 * 32 ];
1544	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1545	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1546	Int rateIncUp [ 32 * 32 ];
1547	Int rateIncDown [ 32 * 32 ];
1548	Int sigRateDelta[ 32 * 32 ];
1549	Int deltaU [ 32 * 32 ];
1550	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1551	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1552	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1553	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1554
1555	const UInt * scanCG;
1556	{
1557	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1558	if( uiLog2BlkSize == 3 )
1559	{
1560	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1561	}
1562	else if( uiLog2BlkSize == 5 )
1563	{
1564	scanCG = g_sigLastScanCG32x32;
1565	}
1566	}
1567	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1568	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1569	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1570	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1571	Int iCGLastScanPos = -1;
1572
1573	UInt uiCtxSet = 0;
1574	Int c1 = 1;
1575	Int c2 = 0;
1576	Double d64BaseCost = 0;
1577	Int iLastScanPos = -1;
1578
1579	UInt c1Idx = 0;
1580	UInt c2Idx = 0;
1581	Int baseLevel;
1582
1583	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1584
1585	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1586	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1587
1588	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1589	Int iScanPos;
1590	coeffGroupRDStats rdStats;
1591
1592	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1593	{
1594	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1595	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1596	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1597	#if MAYBE_BUGFIX
1598	rdStats.init();
1599	#else
1600	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1601	#endif
1602	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1603	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1604	{
1605	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1606	//===== quantization =====
1607	UInt uiBlkPos = scan[iScanPos];
1608	// set coeff
1609	Int uiQ = piQCoef[uiBlkPos];
1610	Double dTemp = pdErrScale[uiBlkPos];
1611	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1612	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1613	#if ADAPTIVE_QP_SELECTION
1614	if( m_bUseAdaptQpSelect )
1615	{
1616	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1617	}
1618	#endif
1619	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1620
1621	Double dErr = Double( lLevelDouble );
1622	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1623	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1624	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1625
1626	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1627	{
1628	iLastScanPos = iScanPos;
1629	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1630	iCGLastScanPos = iCGScanPos;
1631	}
1632
1633	if ( iLastScanPos >= 0 )
1634	{
1635	//===== coefficient level estimation =====
1636	UInt uiLevel;
1637	UInt uiOneCtx = 4 * uiCtxSet + c1;
1638	UInt uiAbsCtx = uiCtxSet + c2;
1639
1640	if( iScanPos == iLastScanPos )
1641	{
1642	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1643	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1644	c1Idx, c2Idx, iQBits, dTemp, 1 );
1645	}
1646	else
1647	{
1648	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1649	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1650	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
1651	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1652	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1653	c1Idx, c2Idx, iQBits, dTemp, 0 );
1654	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1655	}
1656	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1657	if( uiLevel > 0 )
1658	{
1659	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1660	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1661	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1662	}
1663	else // uiLevel == 0
1664	{
1665	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1666	}
1667	piDstCoeff[ uiBlkPos ] = uiLevel;
1668	d64BaseCost += pdCostCoeff [ iScanPos ];
1669
1670
1671	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1672	if( uiLevel >= baseLevel )
1673	{
1674	if(uiLevel > 3*(1<<uiGoRiceParam))
1675	{
1676	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1677	}
1678	}
1679	if ( uiLevel >= 1)
1680	{
1681	c1Idx ++;
1682	}
1683
1684	//===== update bin model =====
1685	if( uiLevel > 1 )
1686	{
1687	c1 = 0;
1688	c2 += (c2 < 2);
1689	c2Idx ++;
1690	}
1691	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1692	{
1693	c1++;
1694	}
1695
1696	//===== context set update =====
1697	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1698	{
1699	c2 = 0;
1700	uiGoRiceParam = 0;
1701
1702	c1Idx = 0;
1703	c2Idx = 0;
1704	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1705	if( c1 == 0 )
1706	{
1707	uiCtxSet++;
1708	}
1709	c1 = 1;
1710	}
1711	}
1712	else
1713	{
1714	d64BaseCost += pdCostCoeff0[ iScanPos ];
1715	}
1716	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1717	if (iScanPosinCG == 0 )
1718	{
1719	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1720	}
1721	if (piDstCoeff[ uiBlkPos ] )
1722	{
1723	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1724	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1725	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1726	if ( iScanPosinCG != 0 )
1727	{
1728	rdStats.iNNZbeforePos0++;
1729	}
1730	}
1731	} //end for (iScanPosinCG)
1732
1733	if (iCGLastScanPos >= 0)
1734	{
1735	if( iCGScanPos )
1736	{
1737	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1738	{
1739	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1740	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1741	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1742	}
1743	else
1744	{
1745	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1746	{
1747	if ( rdStats.iNNZbeforePos0 == 0 )
1748	{
1749	d64BaseCost -= rdStats.d64SigCost_0;
1750	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1751	}
1752	// rd-cost if SigCoeffGroupFlag = 0, initialization
1753	Double d64CostZeroCG = d64BaseCost;
1754
1755	// add SigCoeffGroupFlag cost to total cost
1756	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1757	if (iCGScanPos < iCGLastScanPos)
1758	{
1759	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1760	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1761	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1762	}
1763
1764	// try to convert the current coeff group from non-zero to all-zero
1765	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1766	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1767	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1768
1769	// if we can save cost, change this block to all-zero block
1770	if ( d64CostZeroCG < d64BaseCost )
1771	{
1772	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1773	d64BaseCost = d64CostZeroCG;
1774	if (iCGScanPos < iCGLastScanPos)
1775	{
1776	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1777	}
1778	// reset coeffs to 0 in this block
1779	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1780	{
1781	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1782	UInt uiBlkPos = scan[ iScanPos ];
1783
1784	if (piDstCoeff[ uiBlkPos ])
1785	{
1786	piDstCoeff [ uiBlkPos ] = 0;
1787	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1788	pdCostSig [ iScanPos ] = 0;
1789	}
1790	}
1791	} // end if ( d64CostAllZeros < d64BaseCost )
1792	}
1793	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1794	}
1795	else
1796	{
1797	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1798	}
1799	}
1800	} //end for (iCGScanPos)
1801
1802	//===== estimate last position =====
1803	if ( iLastScanPos < 0 )
1804	{
1805	return;
1806	}
1807
1808	Double d64BestCost = 0;
1809	Int ui16CtxCbf = 0;
1810	Int iBestLastIdxP1 = 0;
1811	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1812	{
1813	ui16CtxCbf = 0;
1814	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1815	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1816	}
1817	else
1818	{
1819	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1820	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1821	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1822	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1823	}
1824
1825	Bool bFoundLast = false;
1826	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1827	{
1828	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1829
1830	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1831	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1832	{
1833	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1834	{
1835	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1836	if (iScanPos > iLastScanPos) continue;
1837	UInt uiBlkPos = scan[iScanPos];
1838
1839	if( piDstCoeff[ uiBlkPos ] )
1840	{
1841	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1842	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1843
1844	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
1845	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1846
1847	if( totalCost < d64BestCost )
1848	{
1849	iBestLastIdxP1 = iScanPos + 1;
1850	d64BestCost = totalCost;
1851	}
1852	if( piDstCoeff[ uiBlkPos ] > 1 )
1853	{
1854	bFoundLast = true;
1855	break;
1856	}
1857	d64BaseCost -= pdCostCoeff[ iScanPos ];
1858	d64BaseCost += pdCostCoeff0[ iScanPos ];
1859	}
1860	else
1861	{
1862	d64BaseCost -= pdCostSig[ iScanPos ];
1863	}
1864	} //end for
1865	if (bFoundLast)
1866	{
1867	break;
1868	}
1869	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1870	} // end for
1871
1872	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1873	{
1874	Int blkPos = scan[ scanPos ];
1875	Int level = piDstCoeff[ blkPos ];
1876	uiAbsSum += level;
1877	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1878	}
1879
1880	//===== clean uncoded coefficients =====
1881	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1882	{
1883	piDstCoeff[ scan[ scanPos ] ] = 0;
1884	}
1885
1886	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1887	{
1888	Int64 rdFactor = (Int64) (
1889	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
1890	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
1891	+ 0.5);
1892	Int lastCG = -1;
1893	Int absSum = 0 ;
1894	Int n ;
1895
1896	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1897	{
1898	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1899	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1900	absSum = 0 ;
1901
1902	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1903	{
1904	if( piDstCoeff[ scan[ n + subPos ]] )
1905	{
1906	lastNZPosInCG = n;
1907	break;
1908	}
1909	}
1910
1911	for(n = 0; n <SCAN_SET_SIZE; n++ )
1912	{
1913	if( piDstCoeff[ scan[ n + subPos ]] )
1914	{
1915	firstNZPosInCG = n;
1916	break;
1917	}
1918	}
1919
1920	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1921	{
1922	absSum += piDstCoeff[ scan[ n + subPos ]];
1923	}
1924
1925	if(lastNZPosInCG>=0 && lastCG==-1)
1926	{
1927	lastCG = 1;
1928	}
1929
1930	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1931	{
1932	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
1933	if( signbit!=(absSum&0x1) ) // hide but need tune
1934	{
1935	// calculate the cost
1936	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
1937	Int minPos =-1, finalChange=0, curChange=0;
1938
1939	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1940	{
1941	UInt uiBlkPos = scan[ n + subPos ];
1942	if(piDstCoeff[ uiBlkPos ] != 0 )
1943	{
1944	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
1945	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1946	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
1947
1948	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1949	{
1950	costDown -= (4<<15) ;
1951	}
1952
1953	if(costUp<costDown)
1954	{
1955	curCost = costUp;
1956	curChange = 1 ;
1957	}
1958	else
1959	{
1960	curChange = -1 ;
1961	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1962	{
1963	curCost = MAX_INT64 ;
1964	}
1965	else
1966	{
1967	curCost = costDown ;
1968	}
1969	}
1970	}
1971	else
1972	{
1973	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1974	curChange = 1 ;
1975
1976	if(n<firstNZPosInCG)
1977	{
1978	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1979	if(thissignbit != signbit )
1980	{
1981	curCost = MAX_INT64;
1982	}
1983	}
1984	}
1985
1986	if( curCost<minCostInc)
1987	{
1988	minCostInc = curCost ;
1989	finalChange = curChange ;
1990	minPos = uiBlkPos ;
1991	}
1992	}
1993
1994	if(piDstCoeff[minPos] == 32767 \|\| piDstCoeff[minPos] == -32768)
1995	{
1996	finalChange = -1;
1997	}
1998
1999	if(plSrcCoeff[minPos]>=0)
2000	{
2001	piDstCoeff[minPos] += finalChange ;
2002	}
2003	else
2004	{
2005	piDstCoeff[minPos] -= finalChange ;
2006	}
2007	}
2008	}
2009
2010	if(lastCG==1)
2011	{
2012	lastCG=0 ;
2013	}
2014	}
2015	}
2016	}
2017
2018	/** Pattern decision for context derivation process of significant_coeff_flag
2019	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2020	* \param posXCG column of current coefficient group
2021	* \param posYCG row of current coefficient group
2022	* \param width width of the block
2023	* \param height height of the block
2024	* \returns pattern for current coefficient group
2025	*/
2026	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2027	{
2028	if( width == 4 && height == 4 ) return -1;
2029
2030	UInt sigRight = 0;
2031	UInt sigLower = 0;
2032
2033	width >>= 2;
2034	height >>= 2;
2035	if( posXCG < width - 1 )
2036	{
2037	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2038	}
2039	if (posYCG < height - 1 )
2040	{
2041	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2042	}
2043	return sigRight + (sigLower<<1);
2044	}
2045
2046	/** Context derivation process of coeff_abs_significant_flag
2047	* \param patternSigCtx pattern for current coefficient group
2048	* \param posX column of current scan position
2049	* \param posY row of current scan position
2050	* \param log2BlockSize log2 value of block size (square block)
2051	* \param width width of the block
2052	* \param height height of the block
2053	* \param textureType texture type (TEXT_LUMA...)
2054	* \returns ctxInc for current scan position
2055	*/
2056	Int TComTrQuant::getSigCtxInc (
2057	Int patternSigCtx,
2058	UInt scanIdx,
2059	Int posX,
2060	Int posY,
2061	Int log2BlockSize,
2062	TextType textureType
2063	)
2064	{
2065	const Int ctxIndMap[16] =
2066	{
2067	0, 1, 4, 5,
2068	2, 3, 4, 5,
2069	6, 6, 8, 8,
2070	7, 7, 8, 8
2071	};
2072
2073	if( posX + posY == 0 )
2074	{
2075	return 0;
2076	}
2077
2078	if ( log2BlockSize == 2 )
2079	{
2080	return ctxIndMap[ 4 * posY + posX ];
2081	}
2082
2083	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2084
2085	Int posXinSubset = posX-((posX>>2)<<2);
2086	Int posYinSubset = posY-((posY>>2)<<2);
2087	Int cnt = 0;
2088	if(patternSigCtx==0)
2089	{
2090	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2091	}
2092	else if(patternSigCtx==1)
2093	{
2094	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2095	}
2096	else if(patternSigCtx==2)
2097	{
2098	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2099	}
2100	else
2101	{
2102	cnt = 2;
2103	}
2104
2105	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2106	}
2107
2108	/** Get the best level in RD sense
2109	* \param rd64CodedCost reference to coded cost
2110	* \param rd64CodedCost0 reference to cost when coefficient is 0
2111	* \param rd64CodedCostSig reference to cost of significant coefficient
2112	* \param lLevelDouble reference to unscaled quantized level
2113	* \param uiMaxAbsLevel scaled quantized level
2114	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2115	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2116	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2117	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2118	* \param iQBits quantization step size
2119	* \param dTemp correction factor
2120	* \param bLast indicates if the coefficient is the last significant
2121	* \returns best quantized transform level for given scan position
2122	* This method calculates the best quantized transform level for a given scan position.
2123	*/
2124	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2125	Double& rd64CodedCost0,
2126	Double& rd64CodedCostSig,
2127	Int lLevelDouble,
2128	UInt uiMaxAbsLevel,
2129	UShort ui16CtxNumSig,
2130	UShort ui16CtxNumOne,
2131	UShort ui16CtxNumAbs,
2132	UShort ui16AbsGoRice,
2133	UInt c1Idx,
2134	UInt c2Idx,
2135	Int iQBits,
2136	Double dTemp,
2137	Bool bLast ) const
2138	{
2139	Double dCurrCostSig = 0;
2140	UInt uiBestAbsLevel = 0;
2141
2142	if( !bLast && uiMaxAbsLevel < 3 )
2143	{
2144	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2145	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2146	if( uiMaxAbsLevel == 0 )
2147	{
2148	return uiBestAbsLevel;
2149	}
2150	}
2151	else
2152	{
2153	rd64CodedCost = MAX_DOUBLE;
2154	}
2155
2156	if( !bLast )
2157	{
2158	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2159	}
2160
2161	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2162	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2163	{
2164	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2165	Double dCurrCost = dErr * dErr * dTemp + xGetICost(xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx ));
2166	dCurrCost += dCurrCostSig;
2167
2168	if( dCurrCost < rd64CodedCost )
2169	{
2170	uiBestAbsLevel = uiAbsLevel;
2171	rd64CodedCost = dCurrCost;
2172	rd64CodedCostSig = dCurrCostSig;
2173	}
2174	}
2175
2176	return uiBestAbsLevel;
2177	}
2178
2179	/** Calculates the cost for specific absolute transform level
2180	* \param uiAbsLevel scaled quantized level
2181	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2182	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2183	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2184	* \returns cost of given absolute transform level
2185	*/
2186	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2187	UShort ui16CtxNumOne,
2188	UShort ui16CtxNumAbs,
2189	UShort ui16AbsGoRice
2190	, UInt c1Idx,
2191	UInt c2Idx
2192	) const
2193	{
2194	Int iRate = Int(xGetIEPRate());
2195	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2196
2197	if ( uiAbsLevel >= baseLevel )
2198	{
2199	UInt symbol = uiAbsLevel - baseLevel;
2200	UInt length;
2201	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2202	{
2203	length = symbol>>ui16AbsGoRice;
2204	iRate += (length+1+ui16AbsGoRice)<< 15;
2205	}
2206	else
2207	{
2208	length = ui16AbsGoRice;
2209	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2210	while (symbol >= (1<<length))
2211	{
2212	symbol -= (1<<(length++));
2213	}
2214	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2215	}
2216	if (c1Idx < C1FLAG_NUMBER)
2217	{
2218	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2219
2220	if (c2Idx < C2FLAG_NUMBER)
2221	{
2222	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2223	}
2224	}
2225	}
2226	else
2227	if( uiAbsLevel == 1 )
2228	{
2229	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2230	}
2231	else if( uiAbsLevel == 2 )
2232	{
2233	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2234	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2235	}
2236	else
2237	{
2238	iRate = 0;
2239	}
2240	return iRate;
2241	}
2242
2243	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2244	UShort ui16CtxNumSig ) const
2245	{
2246	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2247	}
2248
2249	/** Calculates the cost of signaling the last significant coefficient in the block
2250	* \param uiPosX X coordinate of the last significant coefficient
2251	* \param uiPosY Y coordinate of the last significant coefficient
2252	* \returns cost of last significant coefficient
2253	*/
2254	/*
2255	* \param uiWidth width of the transform unit (TU)
2256	*/
2257	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2258	const UInt uiPosY ) const
2259	{
2260	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2261	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2262	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2263	if( uiCtxX > 3 )
2264	{
2265	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2266	}
2267	if( uiCtxY > 3 )
2268	{
2269	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2270	}
2271	return xGetICost( uiCost );
2272	}
2273
2274	/** Calculates the cost for specific absolute transform level
2275	* \param uiAbsLevel scaled quantized level
2276	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2277	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2278	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2279	* \returns cost of given absolute transform level
2280	*/
2281	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2282	UShort ui16CtxNumSig ) const
2283	{
2284	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2285	}
2286
2287	/** Get the cost for a specific rate
2288	* \param dRate rate of a bit
2289	* \returns cost at the specific rate
2290	*/
2291	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2292	{
2293	return m_dLambda * dRate;
2294	}
2295
2296	/** Get the cost of an equal probable bit
2297	* \returns cost of equal probable bit
2298	*/
2299	__inline Double TComTrQuant::xGetIEPRate ( ) const
2300	{
2301	return 32768;
2302	}
2303
2304	/** Context derivation process of coeff_abs_significant_flag
2305	* \param uiSigCoeffGroupFlag significance map of L1
2306	* \param uiBlkX column of current scan position
2307	* \param uiBlkY row of current scan position
2308	* \param uiLog2BlkSize log2 value of block size
2309	* \returns ctxInc for current scan position
2310	*/
2311	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2312	const UInt uiCGPosX,
2313	const UInt uiCGPosY,
2314	Int width, Int height)
2315	{
2316	UInt uiRight = 0;
2317	UInt uiLower = 0;
2318
2319	width >>= 2;
2320	height >>= 2;
2321	if( uiCGPosX < width - 1 )
2322	{
2323	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2324	}
2325	if (uiCGPosY < height - 1 )
2326	{
2327	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2328	}
2329	return (uiRight \|\| uiLower);
2330
2331	}
2332	/** set quantized matrix coefficient for encode
2333	* \param scalingList quantaized matrix address
2334	*/
2335	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2336	{
2337	UInt size,list;
2338	UInt qp;
2339
2340	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2341	{
2342	for(list = 0; list < g_scalingListNum[size]; list++)
2343	{
2344	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2345	{
2346	xSetScalingListEnc(scalingList,list,size,qp);
2347	xSetScalingListDec(scalingList,list,size,qp);
2348	setErrScaleCoeff(list,size,qp);
2349	}
2350	}
2351	}
2352	}
2353	/** set quantized matrix coefficient for decode
2354	* \param scalingList quantaized matrix address
2355	*/
2356	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2357	{
2358	UInt size,list;
2359	UInt qp;
2360
2361	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2362	{
2363	for(list = 0; list < g_scalingListNum[size]; list++)
2364	{
2365	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2366	{
2367	xSetScalingListDec(scalingList,list,size,qp);
2368	}
2369	}
2370	}
2371	}
2372	/** set error scale coefficients
2373	* \param list List ID
2374	* \param uiSize Size
2375	* \param uiQP Quantization parameter
2376	*/
2377	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
2378	{
2379
2380	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2381	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
2382	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
2383
2384	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2385	Int *piQuantcoeff;
2386	Double *pdErrScale;
2387	piQuantcoeff = getQuantCoeff(list, qp,size);
2388	pdErrScale = getErrScaleCoeff(list, size, qp);
2389
2390	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2391	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2392	for(i=0;i<uiMaxNumCoeff;i++)
2393	{
2394	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
2395	}
2396	}
2397
2398	/** set quantized matrix coefficient for encode
2399	* \param scalingList quantaized matrix address
2400	* \param listId List index
2401	* \param sizeId size index
2402	* \param uiQP Quantization parameter
2403	*/
2404	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2405	{
2406	UInt width = g_scalingListSizeX[sizeId];
2407	UInt height = g_scalingListSizeX[sizeId];
2408	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2409	Int *quantcoeff;
2410	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2411	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2412
2413	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2414	}
2415	/** set quantized matrix coefficient for decode
2416	* \param scalingList quantaized matrix address
2417	* \param list List index
2418	* \param size size index
2419	* \param uiQP Quantization parameter
2420	*/
2421	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2422	{
2423	UInt width = g_scalingListSizeX[sizeId];
2424	UInt height = g_scalingListSizeX[sizeId];
2425	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2426	Int *dequantcoeff;
2427	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2428
2429	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2430	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2431	}
2432
2433	/** set flat matrix value to quantized coefficient
2434	*/
2435	Void TComTrQuant::setFlatScalingList()
2436	{
2437	UInt size,list;
2438	UInt qp;
2439
2440	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2441	{
2442	for(list = 0; list < g_scalingListNum[size]; list++)
2443	{
2444	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2445	{
2446	xsetFlatScalingList(list,size,qp);
2447	setErrScaleCoeff(list,size,qp);
2448	}
2449	}
2450	}
2451	}
2452
2453	/** set flat matrix value to quantized coefficient
2454	* \param list List ID
2455	* \param uiQP Quantization parameter
2456	* \param uiSize Size
2457	*/
2458	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2459	{
2460	UInt i,num = g_scalingListSize[size];
2461	Int *quantcoeff;
2462	Int *dequantcoeff;
2463	Int quantScales = g_quantScales[qp];
2464	Int invQuantScales = g_invQuantScales[qp]<<4;
2465
2466	quantcoeff = getQuantCoeff(list, qp, size);
2467	dequantcoeff = getDequantCoeff(list, qp, size);
2468
2469	for(i=0;i<num;i++)
2470	{
2471	*quantcoeff++ = quantScales;
2472	*dequantcoeff++ = invQuantScales;
2473	}
2474	}
2475
2476	/** set quantized matrix coefficient for encode
2477	* \param coeff quantaized matrix address
2478	* \param quantcoeff quantaized matrix address
2479	* \param quantScales Q(QP%6)
2480	* \param height height
2481	* \param width width
2482	* \param ratio ratio for upscale
2483	* \param sizuNum matrix size
2484	* \param dc dc parameter
2485	*/
2486	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2487	{
2488	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2489	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2490	for(UInt j=0;j<height;j++)
2491	{
2492	for(UInt i=0;i<width;i++)
2493	{
2494	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2495	}
2496	}
2497	if(ratio > 1)
2498	{
2499	quantcoeff[0] = quantScales / dc;
2500	}
2501	}
2502	/** set quantized matrix coefficient for decode
2503	* \param coeff quantaized matrix address
2504	* \param dequantcoeff quantaized matrix address
2505	* \param invQuantScales IQ(QP%6))
2506	* \param height height
2507	* \param width width
2508	* \param ratio ratio for upscale
2509	* \param sizuNum matrix size
2510	* \param dc dc parameter
2511	*/
2512	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2513	{
2514	for(UInt j=0;j<height;j++)
2515	{
2516	for(UInt i=0;i<width;i++)
2517	{
2518	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2519	}
2520	}
2521	if(ratio > 1)
2522	{
2523	dequantcoeff[0] = invQuantScales * dc;
2524	}
2525	}
2526
2527	/** initialization process of scaling list array
2528	*/
2529	Void TComTrQuant::initScalingList()
2530	{
2531	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2532	{
2533	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2534	{
2535	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2536	{
2537	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2538	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2539	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
2540	}
2541	}
2542	}
2543	// alias list [1] as [3].
2544	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2545	{
2546	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
2547	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
2548	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
2549	}
2550	}
2551	/** destroy quantization matrix array
2552	*/
2553	Void TComTrQuant::destroyScalingList()
2554	{
2555	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2556	{
2557	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2558	{
2559	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2560	{
2561	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
2562	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
2563	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
2564	}
2565	}
2566	}
2567	}
2568
2569	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-VC SHVC

Context navigation

source: SHVCSoftware/branches/SHM-4.1-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 1164

Download in other formats: