Context navigation

TComTrQuant.cpp @ 446

Visit:

Last change on this file since 446 was 446, checked in by tech, 11 years ago
Added missing parts.
Property svn:eol-style set to `native`
File size: 83.9 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2013, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(Int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_useRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	/** Set qP for Quantization.
183	* \param qpy QPy
184	* \param bLowpass
185	* \param eSliceType
186	* \param eTxtType
187	* \param qpBdOffset
188	* \param chromaQPOffset
189	*
190	* return void
191	*/
192	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
193	{
194	Int qpScaled;
195
196	if(eTxtType == TEXT_LUMA)
197	{
198	qpScaled = qpy + qpBdOffset;
199	}
200	else
201	{
202	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
203
204	if(qpScaled < 0)
205	{
206	qpScaled = qpScaled + qpBdOffset;
207	}
208	else
209	{
210	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
211	}
212	}
213	m_cQP.setQpParam( qpScaled );
214	}
215
216	#if MATRIX_MULT
217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
218	* \param block pointer to input data (residual)
219	* \param coeff pointer to output data (transform coefficients)
220	* \param uiStride stride of input data
221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
223	*/
224	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
225	{
226	Int i,j,k,iSum;
227	Int tmp[32*32];
228	const Short *iT;
229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
230
231	if (uiTrSize==4)
232	{
233	iT = g_aiT4[0];
234	}
235	else if (uiTrSize==8)
236	{
237	iT = g_aiT8[0];
238	}
239	else if (uiTrSize==16)
240	{
241	iT = g_aiT16[0];
242	}
243	else if (uiTrSize==32)
244	{
245	iT = g_aiT32[0];
246	}
247	else
248	{
249	assert(0);
250	}
251
252	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
253	Int add_1st = 1<<(shift_1st-1);
254	Int shift_2nd = uiLog2TrSize + 6;
255	Int add_2nd = 1<<(shift_2nd-1);
256
257	/* Horizontal transform */
258
259	if (uiTrSize==4)
260	{
261	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
262	{
263	iT = g_as_DST_MAT_4[0];
264	}
265	}
266	for (i=0; i<uiTrSize; i++)
267	{
268	for (j=0; j<uiTrSize; j++)
269	{
270	iSum = 0;
271	for (k=0; k<uiTrSize; k++)
272	{
273	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
274	}
275	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
276	}
277	}
278
279	/* Vertical transform */
280	if (uiTrSize==4)
281	{
282	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
283	{
284	iT = g_as_DST_MAT_4[0];
285	}
286	else
287	{
288	iT = g_aiT4[0];
289	}
290	}
291	for (i=0; i<uiTrSize; i++)
292	{
293	for (j=0; j<uiTrSize; j++)
294	{
295	iSum = 0;
296	for (k=0; k<uiTrSize; k++)
297	{
298	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
299	}
300	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
301	}
302	}
303	}
304
305	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
306	* \param coeff pointer to input data (transform coefficients)
307	* \param block pointer to output data (residual)
308	* \param uiStride stride of output data
309	* \param uiTrSize transform size (uiTrSize x uiTrSize)
310	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
311	*/
312	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
313	{
314	Int i,j,k,iSum;
315	Int tmp[32*32];
316	const Short *iT;
317
318	if (uiTrSize==4)
319	{
320	iT = g_aiT4[0];
321	}
322	else if (uiTrSize==8)
323	{
324	iT = g_aiT8[0];
325	}
326	else if (uiTrSize==16)
327	{
328	iT = g_aiT16[0];
329	}
330	else if (uiTrSize==32)
331	{
332	iT = g_aiT32[0];
333	}
334	else
335	{
336	assert(0);
337	}
338
339	Int shift_1st = SHIFT_INV_1ST;
340	Int add_1st = 1<<(shift_1st-1);
341	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
342	Int add_2nd = 1<<(shift_2nd-1);
343	if (uiTrSize==4)
344	{
345	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
346	{
347	iT = g_as_DST_MAT_4[0];
348	}
349	}
350
351	/* Horizontal transform */
352	for (i=0; i<uiTrSize; i++)
353	{
354	for (j=0; j<uiTrSize; j++)
355	{
356	iSum = 0;
357	for (k=0; k<uiTrSize; k++)
358	{
359	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
360	}
361	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
362	}
363	}
364
365	if (uiTrSize==4)
366	{
367	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
368	{
369	iT = g_as_DST_MAT_4[0];
370	}
371	else
372	{
373	iT = g_aiT4[0];
374	}
375	}
376
377	/* Vertical transform */
378	for (i=0; i<uiTrSize; i++)
379	{
380	for (j=0; j<uiTrSize; j++)
381	{
382	iSum = 0;
383	for (k=0; k<uiTrSize; k++)
384	{
385	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
386	}
387	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
388	}
389	}
390	}
391
392	#else //MATRIX_MULT
393
394	/** 4x4 forward transform implemented using partial butterfly structure (1D)
395	* \param src input data (residual)
396	* \param dst output data (transform coefficients)
397	* \param shift specifies right shift after 1D transform
398	*/
399
400	void partialButterfly4(Short src,Short dst,Int shift, Int line)
401	{
402	Int j;
403	Int E[2],O[2];
404	Int add = 1<<(shift-1);
405
406	for (j=0; j<line; j++)
407	{
408	/* E and O */
409	E[0] = src[0] + src[3];
410	O[0] = src[0] - src[3];
411	E[1] = src[1] + src[2];
412	O[1] = src[1] - src[2];
413
414	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
415	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
416	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
417	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
418
419	src += 4;
420	dst ++;
421	}
422	}
423
424	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
425	// give identical results
426	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
427	{
428	Int i, c[4];
429	Int rnd_factor = 1<<(shift-1);
430	for (i=0; i<4; i++)
431	{
432	// Intermediate Variables
433	c[0] = block[4i+0] + block[4i+3];
434	c[1] = block[4i+1] + block[4i+3];
435	c[2] = block[4i+0] - block[4i+1];
436	c[3] = 74* block[4*i+2];
437
438	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
439	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
440	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
441	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
442	}
443	}
444
445	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
446	{
447	Int i, c[4];
448	Int rnd_factor = 1<<(shift-1);
449	for (i=0; i<4; i++)
450	{
451	// Intermediate Variables
452	c[0] = tmp[ i] + tmp[ 8+i];
453	c[1] = tmp[8+i] + tmp[12+i];
454	c[2] = tmp[ i] - tmp[12+i];
455	c[3] = 74* tmp[4+i];
456
457	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
458	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
459	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
460	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
461	}
462	}
463
464	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
465	{
466	Int j;
467	Int E[2],O[2];
468	Int add = 1<<(shift-1);
469
470	for (j=0; j<line; j++)
471	{
472	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
473	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
474	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
475	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
476	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
477
478	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
479	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
480	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
481	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
482	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
483
484	src ++;
485	dst += 4;
486	}
487	}
488
489
490	void partialButterfly8(Short src,Short dst,Int shift, Int line)
491	{
492	Int j,k;
493	Int E[4],O[4];
494	Int EE[2],EO[2];
495	Int add = 1<<(shift-1);
496
497	for (j=0; j<line; j++)
498	{
499	/* E and O*/
500	for (k=0;k<4;k++)
501	{
502	E[k] = src[k] + src[7-k];
503	O[k] = src[k] - src[7-k];
504	}
505	/* EE and EO */
506	EE[0] = E[0] + E[3];
507	EO[0] = E[0] - E[3];
508	EE[1] = E[1] + E[2];
509	EO[1] = E[1] - E[2];
510
511	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
512	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
513	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
514	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
515
516	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
517	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
518	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
519	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
520
521	src += 8;
522	dst ++;
523	}
524	}
525
526
527	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
528	{
529	Int j,k;
530	Int E[4],O[4];
531	Int EE[2],EO[2];
532	Int add = 1<<(shift-1);
533
534	for (j=0; j<line; j++)
535	{
536	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
537	for (k=0;k<4;k++)
538	{
539	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
540	}
541
542	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
543	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
544	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
545	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
546
547	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
548	E[0] = EE[0] + EO[0];
549	E[3] = EE[0] - EO[0];
550	E[1] = EE[1] + EO[1];
551	E[2] = EE[1] - EO[1];
552	for (k=0;k<4;k++)
553	{
554	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
555	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
556	}
557	src ++;
558	dst += 8;
559	}
560	}
561
562
563	void partialButterfly16(Short src,Short dst,Int shift, Int line)
564	{
565	Int j,k;
566	Int E[8],O[8];
567	Int EE[4],EO[4];
568	Int EEE[2],EEO[2];
569	Int add = 1<<(shift-1);
570
571	for (j=0; j<line; j++)
572	{
573	/* E and O*/
574	for (k=0;k<8;k++)
575	{
576	E[k] = src[k] + src[15-k];
577	O[k] = src[k] - src[15-k];
578	}
579	/* EE and EO */
580	for (k=0;k<4;k++)
581	{
582	EE[k] = E[k] + E[7-k];
583	EO[k] = E[k] - E[7-k];
584	}
585	/* EEE and EEO */
586	EEE[0] = EE[0] + EE[3];
587	EEO[0] = EE[0] - EE[3];
588	EEE[1] = EE[1] + EE[2];
589	EEO[1] = EE[1] - EE[2];
590
591	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
592	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
593	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
594	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
595
596	for (k=2;k<16;k+=4)
597	{
598	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
599	}
600
601	for (k=1;k<16;k+=2)
602	{
603	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
604	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
605	}
606
607	src += 16;
608	dst ++;
609
610	}
611	}
612
613
614	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
615	{
616	Int j,k;
617	Int E[8],O[8];
618	Int EE[4],EO[4];
619	Int EEE[2],EEO[2];
620	Int add = 1<<(shift-1);
621
622	for (j=0; j<line; j++)
623	{
624	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
625	for (k=0;k<8;k++)
626	{
627	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
628	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
629	}
630	for (k=0;k<4;k++)
631	{
632	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
633	}
634	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
635	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
636	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
637	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
638
639	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
640	for (k=0;k<2;k++)
641	{
642	EE[k] = EEE[k] + EEO[k];
643	EE[k+2] = EEE[1-k] - EEO[1-k];
644	}
645	for (k=0;k<4;k++)
646	{
647	E[k] = EE[k] + EO[k];
648	E[k+4] = EE[3-k] - EO[3-k];
649	}
650	for (k=0;k<8;k++)
651	{
652	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
653	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
654	}
655	src ++;
656	dst += 16;
657	}
658	}
659
660
661	void partialButterfly32(Short src,Short dst,Int shift, Int line)
662	{
663	Int j,k;
664	Int E[16],O[16];
665	Int EE[8],EO[8];
666	Int EEE[4],EEO[4];
667	Int EEEE[2],EEEO[2];
668	Int add = 1<<(shift-1);
669
670	for (j=0; j<line; j++)
671	{
672	/* E and O*/
673	for (k=0;k<16;k++)
674	{
675	E[k] = src[k] + src[31-k];
676	O[k] = src[k] - src[31-k];
677	}
678	/* EE and EO */
679	for (k=0;k<8;k++)
680	{
681	EE[k] = E[k] + E[15-k];
682	EO[k] = E[k] - E[15-k];
683	}
684	/* EEE and EEO */
685	for (k=0;k<4;k++)
686	{
687	EEE[k] = EE[k] + EE[7-k];
688	EEO[k] = EE[k] - EE[7-k];
689	}
690	/* EEEE and EEEO */
691	EEEE[0] = EEE[0] + EEE[3];
692	EEEO[0] = EEE[0] - EEE[3];
693	EEEE[1] = EEE[1] + EEE[2];
694	EEEO[1] = EEE[1] - EEE[2];
695
696	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
697	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
698	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
699	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
700	for (k=4;k<32;k+=8)
701	{
702	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
703	}
704	for (k=2;k<32;k+=4)
705	{
706	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
707	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
708	}
709	for (k=1;k<32;k+=2)
710	{
711	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
712	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
713	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
714	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
715	}
716	src += 32;
717	dst ++;
718	}
719	}
720
721
722	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
723	{
724	Int j,k;
725	Int E[16],O[16];
726	Int EE[8],EO[8];
727	Int EEE[4],EEO[4];
728	Int EEEE[2],EEEO[2];
729	Int add = 1<<(shift-1);
730
731	for (j=0; j<line; j++)
732	{
733	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
734	for (k=0;k<16;k++)
735	{
736	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
737	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
738	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
739	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
740	}
741	for (k=0;k<8;k++)
742	{
743	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
744	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
745	}
746	for (k=0;k<4;k++)
747	{
748	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
749	}
750	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
751	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
752	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
753	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
754
755	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
756	EEE[0] = EEEE[0] + EEEO[0];
757	EEE[3] = EEEE[0] - EEEO[0];
758	EEE[1] = EEEE[1] + EEEO[1];
759	EEE[2] = EEEE[1] - EEEO[1];
760	for (k=0;k<4;k++)
761	{
762	EE[k] = EEE[k] + EEO[k];
763	EE[k+4] = EEE[3-k] - EEO[3-k];
764	}
765	for (k=0;k<8;k++)
766	{
767	E[k] = EE[k] + EO[k];
768	E[k+8] = EE[7-k] - EO[7-k];
769	}
770	for (k=0;k<16;k++)
771	{
772	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
773	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
774	}
775	src ++;
776	dst += 32;
777	}
778	}
779
780	/** MxN forward transform (2D)
781	* \param block input data (residual)
782	* \param coeff output data (transform coefficients)
783	* \param iWidth input data (width of transform)
784	* \param iHeight input data (height of transform)
785	*/
786	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
787	{
788	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
789	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
790
791	Short tmp[ 64 * 64 ];
792
793	if( iWidth == 4 && iHeight == 4)
794	{
795	if (uiMode != REG_DCT)
796	{
797	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
798	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
799	}
800	else
801	{
802	partialButterfly4(block, tmp, shift_1st, iHeight);
803	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
804	}
805
806	}
807	else if( iWidth == 8 && iHeight == 8)
808	{
809	partialButterfly8( block, tmp, shift_1st, iHeight );
810	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
811	}
812	else if( iWidth == 16 && iHeight == 16)
813	{
814	partialButterfly16( block, tmp, shift_1st, iHeight );
815	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
816	}
817	else if( iWidth == 32 && iHeight == 32)
818	{
819	partialButterfly32( block, tmp, shift_1st, iHeight );
820	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
821	}
822	}
823	/** MxN inverse transform (2D)
824	* \param coeff input data (transform coefficients)
825	* \param block output data (residual)
826	* \param iWidth input data (width of transform)
827	* \param iHeight input data (height of transform)
828	*/
829	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
830	{
831	Int shift_1st = SHIFT_INV_1ST;
832	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
833
834	Short tmp[ 64*64];
835	if( iWidth == 4 && iHeight == 4)
836	{
837	if (uiMode != REG_DCT)
838	{
839	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
840	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
841	}
842	else
843	{
844	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
845	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
846	}
847	}
848	else if( iWidth == 8 && iHeight == 8)
849	{
850	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
851	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
852	}
853	else if( iWidth == 16 && iHeight == 16)
854	{
855	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
856	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
857	}
858	else if( iWidth == 32 && iHeight == 32)
859	{
860	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
861	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
862	}
863	}
864
865	#endif //MATRIX_MULT
866
867	// To minimize the distortion only. No rate is considered.
868	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
869	{
870	Int lastCG = -1;
871	Int absSum = 0 ;
872	Int n ;
873
874	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
875	{
876	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
877	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
878	absSum = 0 ;
879
880	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
881	{
882	if( pQCoef[ scan[ n + subPos ]] )
883	{
884	lastNZPosInCG = n;
885	break;
886	}
887	}
888
889	for(n = 0; n <SCAN_SET_SIZE; n++ )
890	{
891	if( pQCoef[ scan[ n + subPos ]] )
892	{
893	firstNZPosInCG = n;
894	break;
895	}
896	}
897
898	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
899	{
900	absSum += pQCoef[ scan[ n + subPos ]];
901	}
902
903	if(lastNZPosInCG>=0 && lastCG==-1)
904	{
905	lastCG = 1 ;
906	}
907
908	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
909	{
910	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
911	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
912	{
913	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
914
915	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
916	{
917	UInt blkPos = scan[ n+subPos ];
918	if(pQCoef[ blkPos ] != 0 )
919	{
920	if(deltaU[blkPos]>0)
921	{
922	curCost = - deltaU[blkPos];
923	curChange=1 ;
924	}
925	else
926	{
927	//curChange =-1;
928	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
929	{
930	curCost=MAX_INT ;
931	}
932	else
933	{
934	curCost = deltaU[blkPos];
935	curChange =-1;
936	}
937	}
938	}
939	else
940	{
941	if(n<firstNZPosInCG)
942	{
943	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
944	if(thisSignBit != signbit )
945	{
946	curCost = MAX_INT;
947	}
948	else
949	{
950	curCost = - (deltaU[blkPos]) ;
951	curChange = 1 ;
952	}
953	}
954	else
955	{
956	curCost = - (deltaU[blkPos]) ;
957	curChange = 1 ;
958	}
959	}
960
961	if( curCost<minCostInc)
962	{
963	minCostInc = curCost ;
964	finalChange = curChange ;
965	minPos = blkPos ;
966	}
967	} //CG loop
968
969	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
970	{
971	finalChange = -1;
972	}
973
974	if(pCoef[minPos]>=0)
975	{
976	pQCoef[minPos] += finalChange ;
977	}
978	else
979	{
980	pQCoef[minPos] -= finalChange ;
981	}
982	} // Hide
983	}
984	if(lastCG==1)
985	{
986	lastCG=0 ;
987	}
988	} // TU loop
989
990	return;
991	}
992
993	Void TComTrQuant::xQuant( TComDataCU* pcCU,
994	Int* pSrc,
995	TCoeff* pDes,
996	#if ADAPTIVE_QP_SELECTION
997	Int*& pArlDes,
998	#endif
999	Int iWidth,
1000	Int iHeight,
1001	UInt& uiAcSum,
1002	TextType eTType,
1003	UInt uiAbsPartIdx )
1004	{
1005	Int* piCoef = pSrc;
1006	TCoeff* piQCoef = pDes;
1007	#if ADAPTIVE_QP_SELECTION
1008	Int* piArlCCoef = pArlDes;
1009	#endif
1010	Int iAdd = 0;
1011
1012	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
1013	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
1014	{
1015	#if ADAPTIVE_QP_SELECTION
1016	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1017	#else
1018	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1019	#endif
1020	}
1021	else
1022	{
1023	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1024
1025	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1026	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1027
1028	Int deltaU[32*32] ;
1029
1030	#if ADAPTIVE_QP_SELECTION
1031	QpParam cQpBase;
1032	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1033
1034	Int qpScaled;
1035	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1036
1037	if(eTType == TEXT_LUMA)
1038	{
1039	qpScaled = iQpBase + qpBDOffset;
1040	}
1041	else
1042	{
1043	Int chromaQPOffset;
1044	if(eTType == TEXT_CHROMA_U)
1045	{
1046	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
1047	}
1048	else
1049	{
1050	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
1051	}
1052	iQpBase = iQpBase + chromaQPOffset;
1053
1054	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
1055
1056	if(qpScaled < 0)
1057	{
1058	qpScaled = qpScaled + qpBDOffset;
1059	}
1060	else
1061	{
1062	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
1063	}
1064	}
1065	cQpBase.setQpParam(qpScaled);
1066	#endif
1067
1068	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1069	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1070	assert(scalingListType < 6);
1071	Int *piQuantCoeff = 0;
1072	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1073
1074	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1075	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1076
1077	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1078
1079	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1080
1081	#if ADAPTIVE_QP_SELECTION
1082	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1083	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1084	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1085	Int iAddC = 1 << (iQBitsC-1);
1086	#endif
1087
1088	Int qBits8 = iQBits-8;
1089	for( Int n = 0; n < iWidth*iHeight; n++ )
1090	{
1091	Int iLevel;
1092	Int iSign;
1093	UInt uiBlockPos = n;
1094	iLevel = piCoef[uiBlockPos];
1095	iSign = (iLevel < 0 ? -1: 1);
1096
1097	#if ADAPTIVE_QP_SELECTION
1098	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1099	if( m_bUseAdaptQpSelect )
1100	{
1101	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1102	}
1103	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1104	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1105	#else
1106	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1107	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1108	#endif
1109	uiAcSum += iLevel;
1110	iLevel *= iSign;
1111	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1112	} // for n
1113	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1114	{
1115	if(uiAcSum>=2)
1116	{
1117	signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1118	}
1119	}
1120	} //if RDOQ
1121	//return;
1122
1123	}
1124
1125	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1126	{
1127
1128	const TCoeff* piQCoef = pSrc;
1129	Int* piCoef = pDes;
1130
1131	if ( iWidth > (Int)m_uiMaxTrSize )
1132	{
1133	iWidth = m_uiMaxTrSize;
1134	iHeight = m_uiMaxTrSize;
1135	}
1136
1137	Int iShift,iAdd,iCoeffQ;
1138	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1139
1140	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1141
1142	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1143
1144	TCoeff clipQCoef;
1145
1146	if(getUseScalingList())
1147	{
1148	iShift += 4;
1149	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1150
1151	if(iShift > m_cQP.m_iPer)
1152	{
1153	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1154
1155	for( Int n = 0; n < iWidth*iHeight; n++ )
1156	{
1157	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1158	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1159	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1160	}
1161	}
1162	else
1163	{
1164	for( Int n = 0; n < iWidth*iHeight; n++ )
1165	{
1166	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1167	iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation
1168	piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) );
1169	}
1170	}
1171	}
1172	else
1173	{
1174	iAdd = 1 << (iShift-1);
1175	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1176
1177	for( Int n = 0; n < iWidth*iHeight; n++ )
1178	{
1179	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1180	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1181	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1182	}
1183	}
1184	}
1185
1186	Void TComTrQuant::init( UInt uiMaxTrSize,
1187	Bool bUseRDOQ,
1188	Bool bUseRDOQTS,
1189	Bool bEnc, Bool useTransformSkipFast
1190	#if ADAPTIVE_QP_SELECTION
1191	, Bool bUseAdaptQpSelect
1192	#endif
1193	)
1194	{
1195	m_uiMaxTrSize = uiMaxTrSize;
1196	m_bEnc = bEnc;
1197	m_useRDOQ = bUseRDOQ;
1198	m_useRDOQTS = bUseRDOQTS;
1199	#if ADAPTIVE_QP_SELECTION
1200	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1201	#endif
1202	m_useTransformSkipFast = useTransformSkipFast;
1203	}
1204
1205	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1206	Pel* pcResidual,
1207	UInt uiStride,
1208	TCoeff* rpcCoeff,
1209	#if ADAPTIVE_QP_SELECTION
1210	Int*& rpcArlCoeff,
1211	#endif
1212	UInt uiWidth,
1213	UInt uiHeight,
1214	UInt& uiAbsSum,
1215	TextType eTType,
1216	UInt uiAbsPartIdx,
1217	Bool useTransformSkip
1218	)
1219	{
1220	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
1221	{
1222	uiAbsSum=0;
1223	for (UInt k = 0; k<uiHeight; k++)
1224	{
1225	for (UInt j = 0; j<uiWidth; j++)
1226	{
1227	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1228	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1229	}
1230	}
1231	return;
1232	}
1233	UInt uiMode; //luma intra pred
1234	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1235	{
1236	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
1237	}
1238	else
1239	{
1240	uiMode = REG_DCT;
1241	}
1242
1243	uiAbsSum = 0;
1244	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1245	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1246	if(useTransformSkip)
1247	{
1248	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1249	}
1250	else
1251	{
1252	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1253	}
1254	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
1255	#if ADAPTIVE_QP_SELECTION
1256	rpcArlCoeff,
1257	#endif
1258	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
1259	}
1260
1261	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
1262	{
1263	if(transQuantBypass)
1264	{
1265	for (UInt k = 0; k<uiHeight; k++)
1266	{
1267	for (UInt j = 0; j<uiWidth; j++)
1268	{
1269	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
1270	}
1271	}
1272	return;
1273	}
1274	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1275	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
1276	if(useTransformSkip == true)
1277	{
1278	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1279	}
1280	else
1281	{
1282	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
1283	}
1284	}
1285
1286	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
1287	{
1288	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
1289	{
1290	return;
1291	}
1292	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
1293
1294	if( uiTrMode == stopTrMode )
1295	{
1296	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
1297	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
1298	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
1299	{
1300	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
1301	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
1302	{
1303	return;
1304	}
1305	uiWidth <<= 1;
1306	uiHeight <<= 1;
1307	}
1308	Pel* pResi = rpcResidual + uiAddr;
1309	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
1310	assert(scalingListType < 6);
1311	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
1312	}
1313	else
1314	{
1315	uiTrMode++;
1316	uiWidth >>= 1;
1317	uiHeight >>= 1;
1318	Int trWidth = uiWidth, trHeight = uiHeight;
1319	UInt uiAddrOffset = trHeight * uiStride;
1320	UInt uiCoefOffset = trWidth * trHeight;
1321	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
1322	{
1323	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1324	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1325	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
1326	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
1327	}
1328	}
1329	}
1330
1331	// ------------------------------------------------------------------------------------------------
1332	// Logical transform
1333	// ------------------------------------------------------------------------------------------------
1334
1335	/** Wrapper function between HM interface and core NxN forward transform (2D)
1336	* \param piBlkResi input data (residual)
1337	* \param psCoeff output data (transform coefficients)
1338	* \param uiStride stride of input residual data
1339	* \param iSize transform size (iSize x iSize)
1340	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1341	*/
1342	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
1343	{
1344	#if MATRIX_MULT
1345	Int iSize = iWidth;
1346	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
1347	#else
1348	Int j;
1349	{
1350	Short block[ 64 * 64 ];
1351	Short coeff[ 64 * 64 ];
1352	{
1353	for (j = 0; j < iHeight; j++)
1354	{
1355	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
1356	}
1357	}
1358	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
1359	for ( j = 0; j < iHeight * iWidth; j++ )
1360	{
1361	psCoeff[ j ] = coeff[ j ];
1362	}
1363	return ;
1364	}
1365	#endif
1366	}
1367
1368
1369	/** Wrapper function between HM interface and core NxN inverse transform (2D)
1370	* \param plCoef input data (transform coefficients)
1371	* \param pResidual output data (residual)
1372	* \param uiStride stride of input residual data
1373	* \param iSize transform size (iSize x iSize)
1374	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1375	*/
1376	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1377	{
1378	#if MATRIX_MULT
1379	Int iSize = iWidth;
1380	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
1381	#else
1382	Int j;
1383	{
1384	Short block[ 64 * 64 ];
1385	Short coeff[ 64 * 64 ];
1386	for ( j = 0; j < iHeight * iWidth; j++ )
1387	{
1388	coeff[j] = (Short)plCoef[j];
1389	}
1390	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
1391	{
1392	for ( j = 0; j < iHeight; j++ )
1393	{
1394	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
1395	}
1396	}
1397	return ;
1398	}
1399	#endif
1400	}
1401
1402	/** Wrapper function between HM interface and core 4x4 transform skipping
1403	* \param piBlkResi input data (residual)
1404	* \param psCoeff output data (transform coefficients)
1405	* \param uiStride stride of input residual data
1406	* \param iSize transform size (iSize x iSize)
1407	*/
1408	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
1409	{
1410	assert( width == height );
1411	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1412	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1413	UInt transformSkipShift;
1414	Int j,k;
1415	if(shift >= 0)
1416	{
1417	transformSkipShift = shift;
1418	for (j = 0; j < height; j++)
1419	{
1420	for(k = 0; k < width; k ++)
1421	{
1422	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
1423	}
1424	}
1425	}
1426	else
1427	{
1428	//The case when uiBitDepth > 13
1429	Int offset;
1430	transformSkipShift = -shift;
1431	offset = (1 << (transformSkipShift - 1));
1432	for (j = 0; j < height; j++)
1433	{
1434	for(k = 0; k < width; k ++)
1435	{
1436	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
1437	}
1438	}
1439	}
1440	}
1441
1442	/** Wrapper function between HM interface and core NxN transform skipping
1443	* \param plCoef input data (coefficients)
1444	* \param pResidual output data (residual)
1445	* \param uiStride stride of input residual data
1446	* \param iSize transform size (iSize x iSize)
1447	*/
1448	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
1449	{
1450	assert( width == height );
1451	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
1452	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
1453	UInt transformSkipShift;
1454	Int j,k;
1455	if(shift > 0)
1456	{
1457	Int offset;
1458	transformSkipShift = shift;
1459	offset = (1 << (transformSkipShift -1));
1460	for ( j = 0; j < height; j++ )
1461	{
1462	for(k = 0; k < width; k ++)
1463	{
1464	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
1465	}
1466	}
1467	}
1468	else
1469	{
1470	//The case when uiBitDepth >= 13
1471	transformSkipShift = - shift;
1472	for ( j = 0; j < height; j++ )
1473	{
1474	for(k = 0; k < width; k ++)
1475	{
1476	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
1477	}
1478	}
1479	}
1480	}
1481
1482	/** RDOQ with CABAC
1483	* \param pcCU pointer to coding unit structure
1484	* \param plSrcCoeff pointer to input buffer
1485	* \param piDstCoeff reference to pointer to output buffer
1486	* \param uiWidth block width
1487	* \param uiHeight block height
1488	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
1489	* \param eTType plane type / luminance or chrominance
1490	* \param uiAbsPartIdx absolute partition index
1491	* \returns Void
1492	* Rate distortion optimized quantization for entropy
1493	* coding engines using probability models like CABAC
1494	*/
1495	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
1496	Int* plSrcCoeff,
1497	TCoeff* piDstCoeff,
1498	#if ADAPTIVE_QP_SELECTION
1499	Int*& piArlDstCoeff,
1500	#endif
1501	UInt uiWidth,
1502	UInt uiHeight,
1503	UInt& uiAbsSum,
1504	TextType eTType,
1505	UInt uiAbsPartIdx )
1506	{
1507	Int iQBits = m_cQP.m_iBits;
1508	Double dTemp = 0;
1509	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
1510	Int uiQ = g_quantScales[m_cQP.rem()];
1511
1512	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
1513	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1514	UInt uiGoRiceParam = 0;
1515	Double d64BlockUncodedCost = 0;
1516	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
1517	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
1518	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1519	assert(scalingListType < 6);
1520
1521	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1522	Double dErrScale = 0;
1523	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
1524	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
1525	Int *piQCoef = piQCoefOrg;
1526	Double *pdErrScale = pdErrScaleOrg;
1527	#if ADAPTIVE_QP_SELECTION
1528	Int iQBitsC = iQBits - ARL_C_PRECISION;
1529	Int iAddC = 1 << (iQBitsC-1);
1530	#endif
1531	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1532
1533	#if ADAPTIVE_QP_SELECTION
1534	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
1535	#endif
1536
1537	Double pdCostCoeff [ 32 * 32 ];
1538	Double pdCostSig [ 32 * 32 ];
1539	Double pdCostCoeff0[ 32 * 32 ];
1540	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
1541	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
1542	Int rateIncUp [ 32 * 32 ];
1543	Int rateIncDown [ 32 * 32 ];
1544	Int sigRateDelta[ 32 * 32 ];
1545	Int deltaU [ 32 * 32 ];
1546	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
1547	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
1548	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
1549	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
1550
1551	const UInt * scanCG;
1552	{
1553	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
1554	if( uiLog2BlkSize == 3 )
1555	{
1556	scanCG = g_sigLastScan8x8[ uiScanIdx ];
1557	}
1558	else if( uiLog2BlkSize == 5 )
1559	{
1560	scanCG = g_sigLastScanCG32x32;
1561	}
1562	}
1563	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
1564	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
1565	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
1566	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
1567	Int iCGLastScanPos = -1;
1568
1569	UInt uiCtxSet = 0;
1570	Int c1 = 1;
1571	Int c2 = 0;
1572	Double d64BaseCost = 0;
1573	Int iLastScanPos = -1;
1574	dTemp = dErrScale;
1575
1576	UInt c1Idx = 0;
1577	UInt c2Idx = 0;
1578	Int baseLevel;
1579
1580	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
1581
1582	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
1583	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
1584
1585	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
1586	Int iScanPos;
1587	coeffGroupRDStats rdStats;
1588
1589	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
1590	{
1591	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1592	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
1593	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
1594	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1595
1596	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1597	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1598	{
1599	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1600	//===== quantization =====
1601	UInt uiBlkPos = scan[iScanPos];
1602	// set coeff
1603	uiQ = piQCoef[uiBlkPos];
1604	dTemp = pdErrScale[uiBlkPos];
1605	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
1606	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
1607	#if ADAPTIVE_QP_SELECTION
1608	if( m_bUseAdaptQpSelect )
1609	{
1610	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
1611	}
1612	#endif
1613	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
1614
1615	Double dErr = Double( lLevelDouble );
1616	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
1617	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
1618	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
1619
1620	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
1621	{
1622	iLastScanPos = iScanPos;
1623	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1624	iCGLastScanPos = iCGScanPos;
1625	}
1626
1627	if ( iLastScanPos >= 0 )
1628	{
1629	//===== coefficient level estimation =====
1630	UInt uiLevel;
1631	UInt uiOneCtx = 4 * uiCtxSet + c1;
1632	UInt uiAbsCtx = uiCtxSet + c2;
1633
1634	if( iScanPos == iLastScanPos )
1635	{
1636	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1637	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1638	c1Idx, c2Idx, iQBits, dTemp, 1 );
1639	}
1640	else
1641	{
1642	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1643	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1644	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
1645	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
1646	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
1647	c1Idx, c2Idx, iQBits, dTemp, 0 );
1648	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
1649	}
1650	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
1651	if( uiLevel > 0 )
1652	{
1653	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
1654	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1655	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
1656	}
1657	else // uiLevel == 0
1658	{
1659	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
1660	}
1661	piDstCoeff[ uiBlkPos ] = uiLevel;
1662	d64BaseCost += pdCostCoeff [ iScanPos ];
1663
1664
1665	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
1666	if( uiLevel >= baseLevel )
1667	{
1668	if(uiLevel > 3*(1<<uiGoRiceParam))
1669	{
1670	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
1671	}
1672	}
1673	if ( uiLevel >= 1)
1674	{
1675	c1Idx ++;
1676	}
1677
1678	//===== update bin model =====
1679	if( uiLevel > 1 )
1680	{
1681	c1 = 0;
1682	c2 += (c2 < 2);
1683	c2Idx ++;
1684	}
1685	else if( (c1 < 3) && (c1 > 0) && uiLevel)
1686	{
1687	c1++;
1688	}
1689
1690	//===== context set update =====
1691	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
1692	{
1693	c2 = 0;
1694	uiGoRiceParam = 0;
1695
1696	c1Idx = 0;
1697	c2Idx = 0;
1698	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
1699	if( c1 == 0 )
1700	{
1701	uiCtxSet++;
1702	}
1703	c1 = 1;
1704	}
1705	}
1706	else
1707	{
1708	d64BaseCost += pdCostCoeff0[ iScanPos ];
1709	}
1710	rdStats.d64SigCost += pdCostSig[ iScanPos ];
1711	if (iScanPosinCG == 0 )
1712	{
1713	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
1714	}
1715	if (piDstCoeff[ uiBlkPos ] )
1716	{
1717	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1718	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
1719	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
1720	if ( iScanPosinCG != 0 )
1721	{
1722	rdStats.iNNZbeforePos0++;
1723	}
1724	}
1725	} //end for (iScanPosinCG)
1726
1727	if (iCGLastScanPos >= 0)
1728	{
1729	if( iCGScanPos )
1730	{
1731	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1732	{
1733	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1734	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
1735	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1736	}
1737	else
1738	{
1739	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
1740	{
1741	if ( rdStats.iNNZbeforePos0 == 0 )
1742	{
1743	d64BaseCost -= rdStats.d64SigCost_0;
1744	rdStats.d64SigCost -= rdStats.d64SigCost_0;
1745	}
1746	// rd-cost if SigCoeffGroupFlag = 0, initialization
1747	Double d64CostZeroCG = d64BaseCost;
1748
1749	// add SigCoeffGroupFlag cost to total cost
1750	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
1751	if (iCGScanPos < iCGLastScanPos)
1752	{
1753	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
1754	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
1755	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
1756	}
1757
1758	// try to convert the current coeff group from non-zero to all-zero
1759	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
1760	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
1761	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
1762
1763	// if we can save cost, change this block to all-zero block
1764	if ( d64CostZeroCG < d64BaseCost )
1765	{
1766	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
1767	d64BaseCost = d64CostZeroCG;
1768	if (iCGScanPos < iCGLastScanPos)
1769	{
1770	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
1771	}
1772	// reset coeffs to 0 in this block
1773	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1774	{
1775	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1776	UInt uiBlkPos = scan[ iScanPos ];
1777
1778	if (piDstCoeff[ uiBlkPos ])
1779	{
1780	piDstCoeff [ uiBlkPos ] = 0;
1781	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
1782	pdCostSig [ iScanPos ] = 0;
1783	}
1784	}
1785	} // end if ( d64CostAllZeros < d64BaseCost )
1786	}
1787	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
1788	}
1789	else
1790	{
1791	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
1792	}
1793	}
1794	} //end for (iCGScanPos)
1795
1796	//===== estimate last position =====
1797	if ( iLastScanPos < 0 )
1798	{
1799	return;
1800	}
1801
1802	Double d64BestCost = 0;
1803	Int ui16CtxCbf = 0;
1804	Int iBestLastIdxP1 = 0;
1805	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
1806	{
1807	ui16CtxCbf = 0;
1808	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
1809	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
1810	}
1811	else
1812	{
1813	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
1814	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
1815	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
1816	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
1817	}
1818
1819	Bool bFoundLast = false;
1820	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
1821	{
1822	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
1823
1824	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
1825	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1826	{
1827	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
1828	{
1829	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
1830	if (iScanPos > iLastScanPos) continue;
1831	UInt uiBlkPos = scan[iScanPos];
1832
1833	if( piDstCoeff[ uiBlkPos ] )
1834	{
1835	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
1836	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
1837
1838	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
1839	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
1840
1841	if( totalCost < d64BestCost )
1842	{
1843	iBestLastIdxP1 = iScanPos + 1;
1844	d64BestCost = totalCost;
1845	}
1846	if( piDstCoeff[ uiBlkPos ] > 1 )
1847	{
1848	bFoundLast = true;
1849	break;
1850	}
1851	d64BaseCost -= pdCostCoeff[ iScanPos ];
1852	d64BaseCost += pdCostCoeff0[ iScanPos ];
1853	}
1854	else
1855	{
1856	d64BaseCost -= pdCostSig[ iScanPos ];
1857	}
1858	} //end for
1859	if (bFoundLast)
1860	{
1861	break;
1862	}
1863	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
1864	} // end for
1865
1866	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
1867	{
1868	Int blkPos = scan[ scanPos ];
1869	Int level = piDstCoeff[ blkPos ];
1870	uiAbsSum += level;
1871	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
1872	}
1873
1874	//===== clean uncoded coefficients =====
1875	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
1876	{
1877	piDstCoeff[ scan[ scanPos ] ] = 0;
1878	}
1879
1880	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
1881	{
1882	Int64 rdFactor = (Int64) (
1883	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
1884	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
1885	+ 0.5);
1886	Int lastCG = -1;
1887	Int absSum = 0 ;
1888	Int n ;
1889
1890	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1891	{
1892	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1893	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1894	absSum = 0 ;
1895
1896	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1897	{
1898	if( piDstCoeff[ scan[ n + subPos ]] )
1899	{
1900	lastNZPosInCG = n;
1901	break;
1902	}
1903	}
1904
1905	for(n = 0; n <SCAN_SET_SIZE; n++ )
1906	{
1907	if( piDstCoeff[ scan[ n + subPos ]] )
1908	{
1909	firstNZPosInCG = n;
1910	break;
1911	}
1912	}
1913
1914	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1915	{
1916	absSum += piDstCoeff[ scan[ n + subPos ]];
1917	}
1918
1919	if(lastNZPosInCG>=0 && lastCG==-1)
1920	{
1921	lastCG = 1;
1922	}
1923
1924	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1925	{
1926	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
1927	if( signbit!=(absSum&0x1) ) // hide but need tune
1928	{
1929	// calculate the cost
1930	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
1931	Int minPos =-1, finalChange=0, curChange=0;
1932
1933	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1934	{
1935	UInt uiBlkPos = scan[ n + subPos ];
1936	if(piDstCoeff[ uiBlkPos ] != 0 )
1937	{
1938	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
1939	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1940	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
1941
1942	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1943	{
1944	costDown -= (4<<15) ;
1945	}
1946
1947	if(costUp<costDown)
1948	{
1949	curCost = costUp;
1950	curChange = 1 ;
1951	}
1952	else
1953	{
1954	curChange = -1 ;
1955	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1956	{
1957	curCost = MAX_INT64 ;
1958	}
1959	else
1960	{
1961	curCost = costDown ;
1962	}
1963	}
1964	}
1965	else
1966	{
1967	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1968	curChange = 1 ;
1969
1970	if(n<firstNZPosInCG)
1971	{
1972	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1973	if(thissignbit != signbit )
1974	{
1975	curCost = MAX_INT64;
1976	}
1977	}
1978	}
1979
1980	if( curCost<minCostInc)
1981	{
1982	minCostInc = curCost ;
1983	finalChange = curChange ;
1984	minPos = uiBlkPos ;
1985	}
1986	}
1987
1988	if(piDstCoeff[minPos] == 32767 \|\| piDstCoeff[minPos] == -32768)
1989	{
1990	finalChange = -1;
1991	}
1992
1993	if(plSrcCoeff[minPos]>=0)
1994	{
1995	piDstCoeff[minPos] += finalChange ;
1996	}
1997	else
1998	{
1999	piDstCoeff[minPos] -= finalChange ;
2000	}
2001	}
2002	}
2003
2004	if(lastCG==1)
2005	{
2006	lastCG=0 ;
2007	}
2008	}
2009	}
2010	}
2011
2012	/** Pattern decision for context derivation process of significant_coeff_flag
2013	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2014	* \param posXCG column of current coefficient group
2015	* \param posYCG row of current coefficient group
2016	* \param width width of the block
2017	* \param height height of the block
2018	* \returns pattern for current coefficient group
2019	*/
2020	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
2021	{
2022	if( width == 4 && height == 4 ) return -1;
2023
2024	UInt sigRight = 0;
2025	UInt sigLower = 0;
2026
2027	width >>= 2;
2028	height >>= 2;
2029	if( posXCG < width - 1 )
2030	{
2031	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
2032	}
2033	if (posYCG < height - 1 )
2034	{
2035	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
2036	}
2037	return sigRight + (sigLower<<1);
2038	}
2039
2040	/** Context derivation process of coeff_abs_significant_flag
2041	* \param patternSigCtx pattern for current coefficient group
2042	* \param posX column of current scan position
2043	* \param posY row of current scan position
2044	* \param log2BlockSize log2 value of block size (square block)
2045	* \param width width of the block
2046	* \param height height of the block
2047	* \param textureType texture type (TEXT_LUMA...)
2048	* \returns ctxInc for current scan position
2049	*/
2050	Int TComTrQuant::getSigCtxInc (
2051	Int patternSigCtx,
2052	UInt scanIdx,
2053	Int posX,
2054	Int posY,
2055	Int log2BlockSize,
2056	TextType textureType
2057	)
2058	{
2059	const Int ctxIndMap[16] =
2060	{
2061	0, 1, 4, 5,
2062	2, 3, 4, 5,
2063	6, 6, 8, 8,
2064	7, 7, 8, 8
2065	};
2066
2067	if( posX + posY == 0 )
2068	{
2069	return 0;
2070	}
2071
2072	if ( log2BlockSize == 2 )
2073	{
2074	return ctxIndMap[ 4 * posY + posX ];
2075	}
2076
2077	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
2078
2079	Int posXinSubset = posX-((posX>>2)<<2);
2080	Int posYinSubset = posY-((posY>>2)<<2);
2081	Int cnt = 0;
2082	if(patternSigCtx==0)
2083	{
2084	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
2085	}
2086	else if(patternSigCtx==1)
2087	{
2088	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
2089	}
2090	else if(patternSigCtx==2)
2091	{
2092	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
2093	}
2094	else
2095	{
2096	cnt = 2;
2097	}
2098
2099	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
2100	}
2101
2102	/** Get the best level in RD sense
2103	* \param rd64CodedCost reference to coded cost
2104	* \param rd64CodedCost0 reference to cost when coefficient is 0
2105	* \param rd64CodedCostSig reference to cost of significant coefficient
2106	* \param lLevelDouble reference to unscaled quantized level
2107	* \param uiMaxAbsLevel scaled quantized level
2108	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2109	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2110	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2111	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2112	* \param iQBits quantization step size
2113	* \param dTemp correction factor
2114	* \param bLast indicates if the coefficient is the last significant
2115	* \returns best quantized transform level for given scan position
2116	* This method calculates the best quantized transform level for a given scan position.
2117	*/
2118	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2119	Double& rd64CodedCost0,
2120	Double& rd64CodedCostSig,
2121	Int lLevelDouble,
2122	UInt uiMaxAbsLevel,
2123	UShort ui16CtxNumSig,
2124	UShort ui16CtxNumOne,
2125	UShort ui16CtxNumAbs,
2126	UShort ui16AbsGoRice,
2127	UInt c1Idx,
2128	UInt c2Idx,
2129	Int iQBits,
2130	Double dTemp,
2131	Bool bLast ) const
2132	{
2133	Double dCurrCostSig = 0;
2134	UInt uiBestAbsLevel = 0;
2135
2136	if( !bLast && uiMaxAbsLevel < 3 )
2137	{
2138	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2139	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2140	if( uiMaxAbsLevel == 0 )
2141	{
2142	return uiBestAbsLevel;
2143	}
2144	}
2145	else
2146	{
2147	rd64CodedCost = MAX_DOUBLE;
2148	}
2149
2150	if( !bLast )
2151	{
2152	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2153	}
2154
2155	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2156	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2157	{
2158	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
2159	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
2160	dCurrCost += dCurrCostSig;
2161
2162	if( dCurrCost < rd64CodedCost )
2163	{
2164	uiBestAbsLevel = uiAbsLevel;
2165	rd64CodedCost = dCurrCost;
2166	rd64CodedCostSig = dCurrCostSig;
2167	}
2168	}
2169
2170	return uiBestAbsLevel;
2171	}
2172
2173	/** Calculates the cost for specific absolute transform level
2174	* \param uiAbsLevel scaled quantized level
2175	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2176	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2177	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2178	* \returns cost of given absolute transform level
2179	*/
2180	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
2181	UShort ui16CtxNumOne,
2182	UShort ui16CtxNumAbs,
2183	UShort ui16AbsGoRice
2184	, UInt c1Idx,
2185	UInt c2Idx
2186	) const
2187	{
2188	Double iRate = xGetIEPRate();
2189	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2190
2191	if ( uiAbsLevel >= baseLevel )
2192	{
2193	UInt symbol = uiAbsLevel - baseLevel;
2194	UInt length;
2195	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2196	{
2197	length = symbol>>ui16AbsGoRice;
2198	iRate += (length+1+ui16AbsGoRice)<< 15;
2199	}
2200	else
2201	{
2202	length = ui16AbsGoRice;
2203	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2204	while (symbol >= (1<<length))
2205	{
2206	symbol -= (1<<(length++));
2207	}
2208	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2209	}
2210	if (c1Idx < C1FLAG_NUMBER)
2211	{
2212	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2213
2214	if (c2Idx < C2FLAG_NUMBER)
2215	{
2216	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2217	}
2218	}
2219	}
2220	else
2221	if( uiAbsLevel == 1 )
2222	{
2223	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2224	}
2225	else if( uiAbsLevel == 2 )
2226	{
2227	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2228	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2229	}
2230	else
2231	{
2232	assert (0);
2233	}
2234	return xGetICost( iRate );
2235	}
2236
2237	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2238	UShort ui16CtxNumOne,
2239	UShort ui16CtxNumAbs,
2240	UShort ui16AbsGoRice
2241	, UInt c1Idx,
2242	UInt c2Idx
2243	) const
2244	{
2245	Int iRate = 0;
2246	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2247
2248	if ( uiAbsLevel >= baseLevel )
2249	{
2250	UInt uiSymbol = uiAbsLevel - baseLevel;
2251	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
2252	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
2253
2254	if( bExpGolomb )
2255	{
2256	uiAbsLevel = uiSymbol - uiMaxVlc;
2257	Int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
2258	iRate += iEGS << 15;
2259	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
2260	}
2261
2262	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
2263	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
2264
2265	iRate += ui16NumBins << 15;
2266
2267	if (c1Idx < C1FLAG_NUMBER)
2268	{
2269	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2270
2271	if (c2Idx < C2FLAG_NUMBER)
2272	{
2273	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2274	}
2275	}
2276	}
2277	else
2278	if( uiAbsLevel == 0 )
2279	{
2280	return 0;
2281	}
2282	else if( uiAbsLevel == 1 )
2283	{
2284	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2285	}
2286	else if( uiAbsLevel == 2 )
2287	{
2288	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2289	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2290	}
2291	else
2292	{
2293	assert(0);
2294	}
2295	return iRate;
2296	}
2297
2298	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2299	UShort ui16CtxNumSig ) const
2300	{
2301	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2302	}
2303
2304	/** Calculates the cost of signaling the last significant coefficient in the block
2305	* \param uiPosX X coordinate of the last significant coefficient
2306	* \param uiPosY Y coordinate of the last significant coefficient
2307	* \returns cost of last significant coefficient
2308	*/
2309	/*
2310	* \param uiWidth width of the transform unit (TU)
2311	*/
2312	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2313	const UInt uiPosY ) const
2314	{
2315	UInt uiCtxX = g_uiGroupIdx[uiPosX];
2316	UInt uiCtxY = g_uiGroupIdx[uiPosY];
2317	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
2318	if( uiCtxX > 3 )
2319	{
2320	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2321	}
2322	if( uiCtxY > 3 )
2323	{
2324	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2325	}
2326	return xGetICost( uiCost );
2327	}
2328
2329	/** Calculates the cost for specific absolute transform level
2330	* \param uiAbsLevel scaled quantized level
2331	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2332	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2333	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2334	* \returns cost of given absolute transform level
2335	*/
2336	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2337	UShort ui16CtxNumSig ) const
2338	{
2339	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2340	}
2341
2342	/** Get the cost for a specific rate
2343	* \param dRate rate of a bit
2344	* \returns cost at the specific rate
2345	*/
2346	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
2347	{
2348	return m_dLambda * dRate;
2349	}
2350
2351	/** Get the cost of an equal probable bit
2352	* \returns cost of equal probable bit
2353	*/
2354	__inline Double TComTrQuant::xGetIEPRate ( ) const
2355	{
2356	return 32768;
2357	}
2358
2359	/** Context derivation process of coeff_abs_significant_flag
2360	* \param uiSigCoeffGroupFlag significance map of L1
2361	* \param uiBlkX column of current scan position
2362	* \param uiBlkY row of current scan position
2363	* \param uiLog2BlkSize log2 value of block size
2364	* \returns ctxInc for current scan position
2365	*/
2366	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
2367	const UInt uiCGPosX,
2368	const UInt uiCGPosY,
2369	Int width, Int height)
2370	{
2371	UInt uiRight = 0;
2372	UInt uiLower = 0;
2373
2374	width >>= 2;
2375	height >>= 2;
2376	if( uiCGPosX < width - 1 )
2377	{
2378	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
2379	}
2380	if (uiCGPosY < height - 1 )
2381	{
2382	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
2383	}
2384	return (uiRight \|\| uiLower);
2385
2386	}
2387	/** set quantized matrix coefficient for encode
2388	* \param scalingList quantaized matrix address
2389	*/
2390	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
2391	{
2392	UInt size,list;
2393	UInt qp;
2394
2395	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2396	{
2397	for(list = 0; list < g_scalingListNum[size]; list++)
2398	{
2399	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2400	{
2401	xSetScalingListEnc(scalingList,list,size,qp);
2402	xSetScalingListDec(scalingList,list,size,qp);
2403	setErrScaleCoeff(list,size,qp);
2404	}
2405	}
2406	}
2407	}
2408	/** set quantized matrix coefficient for decode
2409	* \param scalingList quantaized matrix address
2410	*/
2411	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
2412	{
2413	UInt size,list;
2414	UInt qp;
2415
2416	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2417	{
2418	for(list = 0; list < g_scalingListNum[size]; list++)
2419	{
2420	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2421	{
2422	xSetScalingListDec(scalingList,list,size,qp);
2423	}
2424	}
2425	}
2426	}
2427	/** set error scale coefficients
2428	* \param list List ID
2429	* \param uiSize Size
2430	* \param uiQP Quantization parameter
2431	*/
2432	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
2433	{
2434
2435	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2436	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
2437	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
2438
2439	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2440	Int *piQuantcoeff;
2441	Double *pdErrScale;
2442	piQuantcoeff = getQuantCoeff(list, qp,size);
2443	pdErrScale = getErrScaleCoeff(list, size, qp);
2444
2445	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2446	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
2447	for(i=0;i<uiMaxNumCoeff;i++)
2448	{
2449	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
2450	}
2451	}
2452
2453	/** set quantized matrix coefficient for encode
2454	* \param scalingList quantaized matrix address
2455	* \param listId List index
2456	* \param sizeId size index
2457	* \param uiQP Quantization parameter
2458	*/
2459	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2460	{
2461	UInt width = g_scalingListSizeX[sizeId];
2462	UInt height = g_scalingListSizeX[sizeId];
2463	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2464	Int *quantcoeff;
2465	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2466	quantcoeff = getQuantCoeff(listId, qp, sizeId);
2467
2468	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2469	}
2470	/** set quantized matrix coefficient for decode
2471	* \param scalingList quantaized matrix address
2472	* \param list List index
2473	* \param size size index
2474	* \param uiQP Quantization parameter
2475	*/
2476	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
2477	{
2478	UInt width = g_scalingListSizeX[sizeId];
2479	UInt height = g_scalingListSizeX[sizeId];
2480	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2481	Int *dequantcoeff;
2482	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2483
2484	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2485	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
2486	}
2487
2488	/** set flat matrix value to quantized coefficient
2489	*/
2490	Void TComTrQuant::setFlatScalingList()
2491	{
2492	UInt size,list;
2493	UInt qp;
2494
2495	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
2496	{
2497	for(list = 0; list < g_scalingListNum[size]; list++)
2498	{
2499	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
2500	{
2501	xsetFlatScalingList(list,size,qp);
2502	setErrScaleCoeff(list,size,qp);
2503	}
2504	}
2505	}
2506	}
2507
2508	/** set flat matrix value to quantized coefficient
2509	* \param list List ID
2510	* \param uiQP Quantization parameter
2511	* \param uiSize Size
2512	*/
2513	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
2514	{
2515	UInt i,num = g_scalingListSize[size];
2516	Int *quantcoeff;
2517	Int *dequantcoeff;
2518	Int quantScales = g_quantScales[qp];
2519	Int invQuantScales = g_invQuantScales[qp]<<4;
2520
2521	quantcoeff = getQuantCoeff(list, qp, size);
2522	dequantcoeff = getDequantCoeff(list, qp, size);
2523
2524	for(i=0;i<num;i++)
2525	{
2526	*quantcoeff++ = quantScales;
2527	*dequantcoeff++ = invQuantScales;
2528	}
2529	}
2530
2531	/** set quantized matrix coefficient for encode
2532	* \param coeff quantaized matrix address
2533	* \param quantcoeff quantaized matrix address
2534	* \param quantScales Q(QP%6)
2535	* \param height height
2536	* \param width width
2537	* \param ratio ratio for upscale
2538	* \param sizuNum matrix size
2539	* \param dc dc parameter
2540	*/
2541	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2542	{
2543	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
2544	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
2545	for(UInt j=0;j<height;j++)
2546	{
2547	for(UInt i=0;i<width;i++)
2548	{
2549	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
2550	}
2551	}
2552	if(ratio > 1)
2553	{
2554	quantcoeff[0] = quantScales / dc;
2555	}
2556	}
2557	/** set quantized matrix coefficient for decode
2558	* \param coeff quantaized matrix address
2559	* \param dequantcoeff quantaized matrix address
2560	* \param invQuantScales IQ(QP%6))
2561	* \param height height
2562	* \param width width
2563	* \param ratio ratio for upscale
2564	* \param sizuNum matrix size
2565	* \param dc dc parameter
2566	*/
2567	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
2568	{
2569	for(UInt j=0;j<height;j++)
2570	{
2571	for(UInt i=0;i<width;i++)
2572	{
2573	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
2574	}
2575	}
2576	if(ratio > 1)
2577	{
2578	dequantcoeff[0] = invQuantScales * dc;
2579	}
2580	}
2581
2582	/** initialization process of scaling list array
2583	*/
2584	Void TComTrQuant::initScalingList()
2585	{
2586	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2587	{
2588	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2589	{
2590	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2591	{
2592	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2593	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
2594	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
2595	}
2596	}
2597	}
2598	// alias list [1] as [3].
2599	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2600	{
2601	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
2602	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
2603	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
2604	}
2605	}
2606	/** destroy quantization matrix array
2607	*/
2608	Void TComTrQuant::destroyScalingList()
2609	{
2610	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
2611	{
2612	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
2613	{
2614	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
2615	{
2616	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
2617	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
2618	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
2619	}
2620	}
2621	}
2622	}
2623
2624	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-3V 3D-HEVC

Context navigation

source: 3DVCSoftware/branches/HTM-DEV-0.2-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 446

Download in other formats: