Context navigation

source: 3DVCSoftware/branches/HTM-3.0-Vidyo/source/Lib/TLibCommon/TComTrQuant.cpp @ 272

Visit:

Last change on this file since 272 was 56, checked in by hschwarz, 13 years ago
updated trunk (move to HM6.1)
Property svn:eol-style set to `native`
File size: 131.2 KB

Line
1	/* The copyright in this software is being made available under the BSD
2	* License, included below. This software may be subject to other third party
3	* and contributor rights, including patent rights, and no such rights are
4	* granted under this license.
5	*
6	* Copyright (c) 2010-2012, ITU/ISO/IEC
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions are met:
11	*
12	* * Redistributions of source code must retain the above copyright notice,
13	* this list of conditions and the following disclaimer.
14	* * Redistributions in binary form must reproduce the above copyright notice,
15	* this list of conditions and the following disclaimer in the documentation
16	* and/or other materials provided with the distribution.
17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18	* be used to endorse or promote products derived from this software without
19	* specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31	* THE POSSIBILITY OF SUCH DAMAGE.
32	*/
33
34	/** \file TComTrQuant.cpp
35	\brief transform and quantization class
36	*/
37
38	#include <stdlib.h>
39	#include <math.h>
40	#include <memory.h>
41	#include "TComTrQuant.h"
42	#include "TComPic.h"
43	#include "ContextTables.h"
44
45	typedef struct
46	{
47	Int iNNZbeforePos0;
48	Double d64CodedLevelandDist; // distortion and level cost only
49	Double d64UncodedDist; // all zero coded block distortion
50	Double d64SigCost;
51	Double d64SigCost_0;
52	} coeffGroupRDStats;
53
54	//! \ingroup TLibCommon
55	//! \{
56
57	// ====================================================================================================================
58	// Constants
59	// ====================================================================================================================
60
61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
62
63	// ====================================================================================================================
64	// Tables
65	// ====================================================================================================================
66
67	// RDOQ parameter
68
69	// ====================================================================================================================
70	// Qp class member functions
71	// ====================================================================================================================
72
73	QpParam::QpParam()
74	{
75	}
76
77	// ====================================================================================================================
78	// TComTrQuant class member functions
79	// ====================================================================================================================
80
81	TComTrQuant::TComTrQuant()
82	{
83	m_cQP.clear();
84
85	// allocate temporary buffers
86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
87
88	// allocate bit estimation class (for RDOQ)
89	m_pcEstBitsSbac = new estBitsSbacStruct;
90	initScalingList();
91	}
92
93	TComTrQuant::~TComTrQuant()
94	{
95	// delete temporary buffers
96	if ( m_plTempCoeff )
97	{
98	delete [] m_plTempCoeff;
99	m_plTempCoeff = NULL;
100	}
101
102	// delete bit estimation class
103	if ( m_pcEstBitsSbac )
104	{
105	delete m_pcEstBitsSbac;
106	}
107	destroyScalingList();
108	}
109
110	#if ADAPTIVE_QP_SELECTION
111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
112	{
113	Int qpBase = pcSlice->getSliceQpBase();
114	Int sliceQpused = pcSlice->getSliceQp();
115	Int sliceQpnext;
116	Double alpha = qpBase < 17 ? 0.5 : 1;
117
118	Int cnt=0;
119	for(int u=1; u<=LEVEL_RANGE; u++)
120	{
121	cnt += m_sliceNsamples[u] ;
122	}
123
124	if( !m_bUseRDOQ )
125	{
126	sliceQpused = qpBase;
127	alpha = 0.5;
128	}
129
130	if( cnt > 120 )
131	{
132	Double sum = 0;
133	Int k = 0;
134	for(Int u=1; u<LEVEL_RANGE; u++)
135	{
136	sum += u*m_sliceSumC[u];
137	k += uum_sliceNsamples[u];
138	}
139
140	Int v;
141	Double q[MAX_QP+1] ;
142	for(v=0; v<=MAX_QP; v++)
143	{
144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
145	}
146
147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
148
149	for(v=0; v<MAX_QP; v++)
150	{
151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
152	{
153	break;
154	}
155	}
156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
157	}
158	else
159	{
160	sliceQpnext = sliceQpused;
161	}
162
163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
164	}
165
166	Void TComTrQuant::initSliceQpDelta()
167	{
168	for(Int qp=0; qp<=MAX_QP; qp++)
169	{
170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
171	}
172	}
173
174	Void TComTrQuant::clearSliceARLCnt()
175	{
176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
178	}
179	#endif
180
181
182	#if H0736_AVC_STYLE_QP_RANGE
183	/** Set qP for Quantization.
184	* \param qpy QPy
185	* \param bLowpass
186	* \param eSliceType
187	* \param eTxtType
188	* \param qpBdOffset
189	* \param chromaQPOffset
190	*
191	* return void
192	*/
193	Void TComTrQuant::setQPforQuant( Int qpy, Bool bLowpass, SliceType eSliceType, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
194	{
195	Int qpScaled;
196
197	if(eTxtType == TEXT_LUMA)
198	{
199	qpScaled = qpy + qpBdOffset;
200	}
201	else
202	{
203	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
204
205	if(qpScaled < 0)
206	{
207	qpScaled = qpScaled + qpBdOffset;
208	}
209	else
210	{
211	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
212	}
213	}
214	m_cQP.setQpParam( qpScaled, bLowpass, eSliceType );
215	}
216	#else
217	/// Including Chroma QP Parameter setting
218	Void TComTrQuant::setQPforQuant( Int iQP, Bool bLowpass, SliceType eSliceType, TextType eTxtType, Int Shift)
219	{
220	iQP = Clip3( MIN_QP, MAX_QP, iQP + Shift );
221
222	if(eTxtType != TEXT_LUMA) //Chroma
223	{
224	iQP = g_aucChromaScale[ iQP ];
225	}
226
227	m_cQP.setQpParam( iQP, bLowpass, eSliceType );
228	}
229	#endif
230
231	#if MATRIX_MULT
232	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
233	* \param block pointer to input data (residual)
234	* \param coeff pointer to output data (transform coefficients)
235	* \param uiStride stride of input data
236	* \param uiTrSize transform size (uiTrSize x uiTrSize)
237	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
238	*/
239	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
240	{
241	Int i,j,k,iSum;
242	Int tmp[32*32];
243	const short *iT;
244	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
245
246	if (uiTrSize==4)
247	{
248	iT = g_aiT4[0];
249	}
250	else if (uiTrSize==8)
251	{
252	iT = g_aiT8[0];
253	}
254	else if (uiTrSize==16)
255	{
256	iT = g_aiT16[0];
257	}
258	else if (uiTrSize==32)
259	{
260	iT = g_aiT32[0];
261	}
262	else
263	{
264	assert(0);
265	}
266
267	#if FULL_NBIT
268	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
269	#else
270	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
271	#endif
272
273	int add_1st = 1<<(shift_1st-1);
274	int shift_2nd = uiLog2TrSize + 6;
275	int add_2nd = 1<<(shift_2nd-1);
276
277	/* Horizontal transform */
278
279	if (uiTrSize==4)
280	{
281	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
282	{
283	iT = g_as_DST_MAT_4[0];
284	}
285	}
286	for (i=0; i<uiTrSize; i++)
287	{
288	for (j=0; j<uiTrSize; j++)
289	{
290	iSum = 0;
291	for (k=0; k<uiTrSize; k++)
292	{
293	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
294	}
295	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
296	}
297	}
298
299	/* Vertical transform */
300	if (uiTrSize==4)
301	{
302	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
303	{
304	iT = g_as_DST_MAT_4[0];
305	}
306	else
307	{
308	iT = g_aiT4[0];
309	}
310	}
311	for (i=0; i<uiTrSize; i++)
312	{
313	for (j=0; j<uiTrSize; j++)
314	{
315	iSum = 0;
316	for (k=0; k<uiTrSize; k++)
317	{
318	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
319	}
320	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
321	}
322	}
323	}
324
325	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
326	* \param coeff pointer to input data (transform coefficients)
327	* \param block pointer to output data (residual)
328	* \param uiStride stride of output data
329	* \param uiTrSize transform size (uiTrSize x uiTrSize)
330	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
331	*/
332	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
333	{
334	int i,j,k,iSum;
335	Int tmp[32*32];
336	const short *iT;
337
338	if (uiTrSize==4)
339	{
340	iT = g_aiT4[0];
341	}
342	else if (uiTrSize==8)
343	{
344	iT = g_aiT8[0];
345	}
346	else if (uiTrSize==16)
347	{
348	iT = g_aiT16[0];
349	}
350	else if (uiTrSize==32)
351	{
352	iT = g_aiT32[0];
353	}
354	else
355	{
356	assert(0);
357	}
358
359	int shift_1st = SHIFT_INV_1ST;
360	int add_1st = 1<<(shift_1st-1);
361	#if FULL_NBIT
362	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
363	#else
364	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
365	#endif
366	int add_2nd = 1<<(shift_2nd-1);
367	if (uiTrSize==4)
368	{
369	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
370	{
371	iT = g_as_DST_MAT_4[0];
372	}
373	}
374
375	/* Horizontal transform */
376	for (i=0; i<uiTrSize; i++)
377	{
378	for (j=0; j<uiTrSize; j++)
379	{
380	iSum = 0;
381	for (k=0; k<uiTrSize; k++)
382	{
383	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
384	}
385	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
386	}
387	}
388
389	if (uiTrSize==4)
390	{
391	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
392	{
393	iT = g_as_DST_MAT_4[0];
394	}
395	else
396	{
397	iT = g_aiT4[0];
398	}
399	}
400
401	/* Vertical transform */
402	for (i=0; i<uiTrSize; i++)
403	{
404	for (j=0; j<uiTrSize; j++)
405	{
406	iSum = 0;
407	for (k=0; k<uiTrSize; k++)
408	{
409	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
410	}
411	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
412	}
413	}
414	}
415
416	#else //MATRIX_MULT
417
418	/** 4x4 forward transform implemented using partial butterfly structure (1D)
419	* \param src input data (residual)
420	* \param dst output data (transform coefficients)
421	* \param shift specifies right shift after 1D transform
422	*/
423	#if !UNIFIED_TRANSFORM
424	void partialButterfly4(short src[4][4],short dst[4][4],int shift)
425	{
426	int j;
427	int E[2],O[2];
428	int add = 1<<(shift-1);
429
430	for (j=0; j<4; j++)
431	{
432	/* E and O */
433	E[0] = src[j][0] + src[j][3];
434	O[0] = src[j][0] - src[j][3];
435	E[1] = src[j][1] + src[j][2];
436	O[1] = src[j][1] - src[j][2];
437
438	dst[0][j] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
439	dst[2][j] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]E[1] + add)>>shift;
440	dst[1][j] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
441	dst[3][j] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]O[1] + add)>>shift;
442	}
443	}
444	#endif
445
446	void partialButterfly4(short src,short dst,int shift, int line)
447	{
448	int j;
449	int E[2],O[2];
450	int add = 1<<(shift-1);
451
452	for (j=0; j<line; j++)
453	{
454	/* E and O */
455	E[0] = src[0] + src[3];
456	O[0] = src[0] - src[3];
457	E[1] = src[1] + src[2];
458	O[1] = src[1] - src[2];
459
460	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
461	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
462	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
463	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
464
465	src += 4;
466	dst ++;
467	}
468	}
469
470	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
471	// give identical results
472	#if UNIFIED_TRANSFORM
473	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
474	#else
475	void fastForwardDst(short block[4][4],short coeff[4][4],int shift) // input block, output coeff
476	#endif
477	{
478	int i, c[4];
479	int rnd_factor = 1<<(shift-1);
480	for (i=0; i<4; i++)
481	{
482	// Intermediate Variables
483	#if UNIFIED_TRANSFORM
484	c[0] = block[4i+0] + block[4i+3];
485	c[1] = block[4i+1] + block[4i+3];
486	c[2] = block[4i+0] - block[4i+1];
487	c[3] = 74* block[4*i+2];
488
489	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
490	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
491	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
492	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
493	#else
494	c[0] = block[i][0] + block[i][3];
495	c[1] = block[i][1] + block[i][3];
496	c[2] = block[i][0] - block[i][1];
497	c[3] = 74* block[i][2];
498
499	coeff[0][i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
500	coeff[1][i] = ( 74 * (block[i][0]+ block[i][1] - block[i][3]) + rnd_factor ) >> shift;
501	coeff[2][i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
502	coeff[3][i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
503	#endif
504	}
505	}
506
507	#if UNIFIED_TRANSFORM
508	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
509	#else
510	void fastInverseDst(short tmp[4][4],short block[4][4],int shift) // input tmp, output block
511	#endif
512	{
513	int i, c[4];
514	int rnd_factor = 1<<(shift-1);
515	for (i=0; i<4; i++)
516	{
517	// Intermediate Variables
518	#if UNIFIED_TRANSFORM
519	c[0] = tmp[ i] + tmp[ 8+i];
520	c[1] = tmp[8+i] + tmp[12+i];
521	c[2] = tmp[ i] - tmp[12+i];
522	c[3] = 74* tmp[4+i];
523
524	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
525	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
526	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
527	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
528	#else
529	c[0] = tmp[0][i] + tmp[2][i];
530	c[1] = tmp[2][i] + tmp[3][i];
531	c[2] = tmp[0][i] - tmp[3][i];
532	c[3] = 74* tmp[1][i];
533
534	block[i][0] = Clip3( -32768, 32767, ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
535	block[i][1] = Clip3( -32768, 32767, ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
536	block[i][2] = Clip3( -32768, 32767, ( 74 * (tmp[0][i] - tmp[2][i] + tmp[3][i]) + rnd_factor ) >> shift );
537	block[i][3] = Clip3( -32768, 32767, ( 55 * c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
538	#endif
539	}
540	}
541	#if !UNIFIED_TRANSFORM
542	/** 4x4 forward transform (2D)
543	* \param block input data (residual)
544	* \param coeff output data (transform coefficients)
545	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
546	*/
547	void xTr4(short block[4][4],short coeff[4][4],UInt uiMode)
548	{
549	#if FULL_NBIT
550	int shift_1st = 1 + g_uiBitDepth - 8; // log2(4) - 1 + g_uiBitDepth - 8
551	#else
552	int shift_1st = 1 + g_uiBitIncrement; // log2(4) - 1 + g_uiBitIncrement
553	#endif
554	int shift_2nd = 8; // log2(4) + 6
555	short tmp[4][4];
556	#if LOGI_INTRA_NAME_3MPM
557	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
558	#else
559	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])// Check for DCT or DST
560	#endif
561	{
562	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
563	}
564	else
565	{
566	partialButterfly4(block,tmp,shift_1st);
567	}
568
569	#if LOGI_INTRA_NAME_3MPM
570	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
571	#else
572	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
573	#endif
574	{
575	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
576	}
577	else
578	{
579	partialButterfly4(tmp,coeff,shift_2nd);
580	}
581	}
582
583	/** 4x4 inverse transform implemented using partial butterfly structure (1D)
584	* \param src input data (transform coefficients)
585	* \param dst output data (residual)
586	* \param shift specifies right shift after 1D transform
587	*/
588	void partialButterflyInverse4(short src[4][4],short dst[4][4],int shift)
589	{
590	int j;
591	int E[2],O[2];
592	int add = 1<<(shift-1);
593
594	for (j=0; j<4; j++)
595	{
596	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
597	O[0] = g_aiT4[1][0]src[1][j] + g_aiT4[3][0]src[3][j];
598	O[1] = g_aiT4[1][1]src[1][j] + g_aiT4[3][1]src[3][j];
599	E[0] = g_aiT4[0][0]src[0][j] + g_aiT4[2][0]src[2][j];
600	E[1] = g_aiT4[0][1]src[0][j] + g_aiT4[2][1]src[2][j];
601
602	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
603	dst[j][0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
604	dst[j][1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
605	dst[j][2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
606	dst[j][3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
607	}
608	}
609	#endif
610
611	void partialButterflyInverse4(short src,short dst,int shift, int line)
612	{
613	int j;
614	int E[2],O[2];
615	int add = 1<<(shift-1);
616
617	for (j=0; j<line; j++)
618	{
619	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
620	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
621	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
622	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
623	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
624
625	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
626	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
627	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
628	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
629	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
630
631	src ++;
632	dst += 4;
633	}
634	}
635
636	#if !UNIFIED_TRANSFORM
637	/** 4x4 inverse transform (2D)
638	* \param coeff input data (transform coefficients)
639	* \param block output data (residual)
640	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
641	*/
642	void xITr4(short coeff[4][4],short block[4][4], UInt uiMode)
643	{
644	int shift_1st = SHIFT_INV_1ST;
645	#if FULL_NBIT
646	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
647	#else
648	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
649	#endif
650	short tmp[4][4];
651
652	#if LOGI_INTRA_NAME_3MPM
653	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
654	#else
655	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
656	#endif
657	{
658	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
659	}
660	else
661	{
662	partialButterflyInverse4(coeff,tmp,shift_1st);
663	}
664	#if LOGI_INTRA_NAME_3MPM
665	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
666	#else
667	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
668	#endif
669	{
670	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
671	}
672	else
673	{
674	partialButterflyInverse4(tmp,block,shift_2nd);
675	}
676	}
677
678	/** 8x8 forward transform implemented using partial butterfly structure (1D)
679	* \param src input data (residual)
680	* \param dst output data (transform coefficients)
681	* \param shift specifies right shift after 1D transform
682	*/
683	void partialButterfly8(short src[8][8],short dst[8][8],int shift)
684	{
685	int j,k;
686	int E[4],O[4];
687	int EE[2],EO[2];
688	int add = 1<<(shift-1);
689
690	for (j=0; j<8; j++)
691	{
692	/* E and O*/
693	for (k=0;k<4;k++)
694	{
695	E[k] = src[j][k] + src[j][7-k];
696	O[k] = src[j][k] - src[j][7-k];
697	}
698	/* EE and EO */
699	EE[0] = E[0] + E[3];
700	EO[0] = E[0] - E[3];
701	EE[1] = E[1] + E[2];
702	EO[1] = E[1] - E[2];
703
704	dst[0][j] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
705	dst[4][j] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]EE[1] + add)>>shift;
706	dst[2][j] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]EO[1] + add)>>shift;
707	dst[6][j] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]EO[1] + add)>>shift;
708
709	dst[1][j] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
710	dst[3][j] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]O[3] + add)>>shift;
711	dst[5][j] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]O[3] + add)>>shift;
712	dst[7][j] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]O[3] + add)>>shift;
713	}
714	}
715	#endif
716
717	void partialButterfly8(short src,short dst,int shift, int line)
718	{
719	int j,k;
720	int E[4],O[4];
721	int EE[2],EO[2];
722	int add = 1<<(shift-1);
723
724	for (j=0; j<line; j++)
725	{
726	/* E and O*/
727	for (k=0;k<4;k++)
728	{
729	E[k] = src[k] + src[7-k];
730	O[k] = src[k] - src[7-k];
731	}
732	/* EE and EO */
733	EE[0] = E[0] + E[3];
734	EO[0] = E[0] - E[3];
735	EE[1] = E[1] + E[2];
736	EO[1] = E[1] - E[2];
737
738	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
739	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
740	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
741	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
742
743	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
744	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
745	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
746	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
747
748	src += 8;
749	dst ++;
750	}
751	}
752
753	#if !UNIFIED_TRANSFORM
754	/** 8x8 forward transform (2D)
755	* \param block input data (residual)
756	* \param coeff output data (transform coefficients)
757	*/
758	void xTr8(short block[8][8],short coeff[8][8])
759	{
760	#if FULL_NBIT
761	int shift_1st = 2 + g_uiBitDepth - 8; // log2(8) - 1 + g_uiBitDepth - 8
762	#else
763	int shift_1st = 2 + g_uiBitIncrement; // log2(8) - 1 + g_uiBitIncrement
764	#endif
765	int shift_2nd = 9; // log2(8) + 6
766	short tmp[8][8];
767
768	partialButterfly8(block,tmp,shift_1st);
769	partialButterfly8(tmp,coeff,shift_2nd);
770	}
771
772	/** 8x8 inverse transform implemented using partial butterfly structure (1D)
773	* \param src input data (transform coefficients)
774	* \param dst output data (residual)
775	* \param shift specifies right shift after 1D transform
776	*/
777	void partialButterflyInverse8(short src[8][8],short dst[8][8],int shift)
778	{
779	int j,k;
780	int E[4],O[4];
781	int EE[2],EO[2];
782	int add = 1<<(shift-1);
783
784	for (j=0; j<8; j++)
785	{
786	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
787	for (k=0;k<4;k++)
788	{
789	O[k] = g_aiT8[ 1][k]src[ 1][j] + g_aiT8[ 3][k]src[ 3][j] + g_aiT8[ 5][k]src[ 5][j] + g_aiT8[ 7][k]src[ 7][j];
790	}
791
792	EO[0] = g_aiT8[2][0]src[2][j] + g_aiT8[6][0]src[6][j];
793	EO[1] = g_aiT8[2][1]src[2][j] + g_aiT8[6][1]src[6][j];
794	EE[0] = g_aiT8[0][0]src[0][j] + g_aiT8[4][0]src[4][j];
795	EE[1] = g_aiT8[0][1]src[0][j] + g_aiT8[4][1]src[4][j];
796
797	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
798	E[0] = EE[0] + EO[0];
799	E[3] = EE[0] - EO[0];
800	E[1] = EE[1] + EO[1];
801	E[2] = EE[1] - EO[1];
802	for (k=0;k<4;k++)
803	{
804	dst[j][k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
805	dst[j][k+4] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
806	}
807	}
808	}
809	#endif
810
811	void partialButterflyInverse8(short src,short dst,int shift, int line)
812	{
813	int j,k;
814	int E[4],O[4];
815	int EE[2],EO[2];
816	int add = 1<<(shift-1);
817
818	for (j=0; j<line; j++)
819	{
820	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
821	for (k=0;k<4;k++)
822	{
823	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
824	}
825
826	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
827	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
828	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
829	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
830
831	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
832	E[0] = EE[0] + EO[0];
833	E[3] = EE[0] - EO[0];
834	E[1] = EE[1] + EO[1];
835	E[2] = EE[1] - EO[1];
836	for (k=0;k<4;k++)
837	{
838	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
839	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
840	}
841	src ++;
842	dst += 8;
843	}
844	}
845
846	#if !UNIFIED_TRANSFORM
847	/** 8x8 inverse transform (2D)
848	* \param coeff input data (transform coefficients)
849	* \param block output data (residual)
850	*/
851	void xITr8(short coeff[8][8],short block[8][8])
852	{
853	int shift_1st = SHIFT_INV_1ST;
854	#if FULL_NBIT
855	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
856	#else
857	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
858	#endif
859	short tmp[8][8];
860
861	partialButterflyInverse8(coeff,tmp,shift_1st);
862	partialButterflyInverse8(tmp,block,shift_2nd);
863	}
864
865	/** 16x16 forward transform implemented using partial butterfly structure (1D)
866	* \param src input data (residual)
867	* \param dst output data (transform coefficients)
868	* \param shift specifies right shift after 1D transform
869	*/
870	void partialButterfly16(short src[16][16],short dst[16][16],int shift)
871	{
872	int j,k;
873	int E[8],O[8];
874	int EE[4],EO[4];
875	int EEE[2],EEO[2];
876	int add = 1<<(shift-1);
877
878	for (j=0; j<16; j++)
879	{
880	/* E and O*/
881	for (k=0;k<8;k++)
882	{
883	E[k] = src[j][k] + src[j][15-k];
884	O[k] = src[j][k] - src[j][15-k];
885	}
886	/* EE and EO */
887	for (k=0;k<4;k++)
888	{
889	EE[k] = E[k] + E[7-k];
890	EO[k] = E[k] - E[7-k];
891	}
892	/* EEE and EEO */
893	EEE[0] = EE[0] + EE[3];
894	EEO[0] = EE[0] - EE[3];
895	EEE[1] = EE[1] + EE[2];
896	EEO[1] = EE[1] - EE[2];
897
898	dst[ 0][j] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
899	dst[ 8][j] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]EEE[1] + add)>>shift;
900	dst[ 4][j] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]EEO[1] + add)>>shift;
901	dst[12][j] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]EEO[1] + add)>>shift;
902
903	for (k=2;k<16;k+=4)
904	{
905	dst[k][j] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]EO[3] + add)>>shift;
906	}
907
908	for (k=1;k<16;k+=2)
909	{
910	dst[k][j] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]O[3] +
911	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
912	}
913
914	}
915	}
916	#endif
917
918	void partialButterfly16(short src,short dst,int shift, int line)
919	{
920	int j,k;
921	int E[8],O[8];
922	int EE[4],EO[4];
923	int EEE[2],EEO[2];
924	int add = 1<<(shift-1);
925
926	for (j=0; j<line; j++)
927	{
928	/* E and O*/
929	for (k=0;k<8;k++)
930	{
931	E[k] = src[k] + src[15-k];
932	O[k] = src[k] - src[15-k];
933	}
934	/* EE and EO */
935	for (k=0;k<4;k++)
936	{
937	EE[k] = E[k] + E[7-k];
938	EO[k] = E[k] - E[7-k];
939	}
940	/* EEE and EEO */
941	EEE[0] = EE[0] + EE[3];
942	EEO[0] = EE[0] - EE[3];
943	EEE[1] = EE[1] + EE[2];
944	EEO[1] = EE[1] - EE[2];
945
946	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
947	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
948	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
949	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
950
951	for (k=2;k<16;k+=4)
952	{
953	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
954	}
955
956	for (k=1;k<16;k+=2)
957	{
958	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
959	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
960	}
961
962	src += 16;
963	dst ++;
964
965	}
966	}
967
968	#if !UNIFIED_TRANSFORM
969	/** 16x16 forward transform (2D)
970	* \param block input data (residual)
971	* \param coeff output data (transform coefficients)
972	*/
973	void xTr16(short block[16][16],short coeff[16][16])
974	{
975	#if FULL_NBIT
976	int shift_1st = 3 + g_uiBitDepth - 8; // log2(16) - 1 + g_uiBitDepth - 8
977	#else
978	int shift_1st = 3 + g_uiBitIncrement; // log2(16) - 1 + g_uiBitIncrement
979	#endif
980	int shift_2nd = 10; // log2(16) + 6
981	short tmp[16][16];
982
983	partialButterfly16(block,tmp,shift_1st);
984	partialButterfly16(tmp,coeff,shift_2nd);
985	}
986
987	/** 16x16 inverse transform implemented using partial butterfly structure (1D)
988	* \param src input data (transform coefficients)
989	* \param dst output data (residual)
990	* \param shift specifies right shift after 1D transform
991	*/
992	void partialButterflyInverse16(short src[16][16],short dst[16][16],int shift)
993	{
994	int j,k;
995	int E[8],O[8];
996	int EE[4],EO[4];
997	int EEE[2],EEO[2];
998	int add = 1<<(shift-1);
999
1000	for (j=0; j<16; j++)
1001	{
1002	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
1003	for (k=0;k<8;k++)
1004	{
1005	O[k] = g_aiT16[ 1][k]src[ 1][j] + g_aiT16[ 3][k]src[ 3][j] + g_aiT16[ 5][k]src[ 5][j] + g_aiT16[ 7][k]src[ 7][j] +
1006	g_aiT16[ 9][k]src[ 9][j] + g_aiT16[11][k]src[11][j] + g_aiT16[13][k]src[13][j] + g_aiT16[15][k]src[15][j];
1007	}
1008	for (k=0;k<4;k++)
1009	{
1010	EO[k] = g_aiT16[ 2][k]src[ 2][j] + g_aiT16[ 6][k]src[ 6][j] + g_aiT16[10][k]src[10][j] + g_aiT16[14][k]src[14][j];
1011	}
1012	EEO[0] = g_aiT16[4][0]src[4][j] + g_aiT16[12][0]src[12][j];
1013	EEE[0] = g_aiT16[0][0]src[0][j] + g_aiT16[ 8][0]src[ 8][j];
1014	EEO[1] = g_aiT16[4][1]src[4][j] + g_aiT16[12][1]src[12][j];
1015	EEE[1] = g_aiT16[0][1]src[0][j] + g_aiT16[ 8][1]src[ 8][j];
1016
1017	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
1018	for (k=0;k<2;k++)
1019	{
1020	EE[k] = EEE[k] + EEO[k];
1021	EE[k+2] = EEE[1-k] - EEO[1-k];
1022	}
1023	for (k=0;k<4;k++)
1024	{
1025	E[k] = EE[k] + EO[k];
1026	E[k+4] = EE[3-k] - EO[3-k];
1027	}
1028	for (k=0;k<8;k++)
1029	{
1030	dst[j][k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
1031	dst[j][k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
1032	}
1033	}
1034	}
1035	#endif
1036
1037	void partialButterflyInverse16(short src,short dst,int shift, int line)
1038	{
1039	int j,k;
1040	int E[8],O[8];
1041	int EE[4],EO[4];
1042	int EEE[2],EEO[2];
1043	int add = 1<<(shift-1);
1044
1045	for (j=0; j<line; j++)
1046	{
1047	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
1048	for (k=0;k<8;k++)
1049	{
1050	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
1051	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
1052	}
1053	for (k=0;k<4;k++)
1054	{
1055	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
1056	}
1057	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
1058	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
1059	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
1060	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
1061
1062	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
1063	for (k=0;k<2;k++)
1064	{
1065	EE[k] = EEE[k] + EEO[k];
1066	EE[k+2] = EEE[1-k] - EEO[1-k];
1067	}
1068	for (k=0;k<4;k++)
1069	{
1070	E[k] = EE[k] + EO[k];
1071	E[k+4] = EE[3-k] - EO[3-k];
1072	}
1073	for (k=0;k<8;k++)
1074	{
1075	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
1076	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
1077	}
1078	src ++;
1079	dst += 16;
1080	}
1081	}
1082
1083	#if !UNIFIED_TRANSFORM
1084	/** 16x16 inverse transform (2D)
1085	* \param coeff input data (transform coefficients)
1086	* \param block output data (residual)
1087	*/
1088	void xITr16(short coeff[16][16],short block[16][16])
1089	{
1090	int shift_1st = SHIFT_INV_1ST;
1091	#if FULL_NBIT
1092	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
1093	#else
1094	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
1095	#endif
1096	short tmp[16][16];
1097
1098	partialButterflyInverse16(coeff,tmp,shift_1st);
1099	partialButterflyInverse16(tmp,block,shift_2nd);
1100	}
1101
1102	/** 32x32 forward transform implemented using partial butterfly structure (1D)
1103	* \param src input data (residual)
1104	* \param dst output data (transform coefficients)
1105	* \param shift specifies right shift after 1D transform
1106	*/
1107	void partialButterfly32(short src[32][32],short dst[32][32],int shift)
1108	{
1109	int j,k;
1110	int E[16],O[16];
1111	int EE[8],EO[8];
1112	int EEE[4],EEO[4];
1113	int EEEE[2],EEEO[2];
1114	int add = 1<<(shift-1);
1115
1116	for (j=0; j<32; j++)
1117	{
1118	/* E and O*/
1119	for (k=0;k<16;k++)
1120	{
1121	E[k] = src[j][k] + src[j][31-k];
1122	O[k] = src[j][k] - src[j][31-k];
1123	}
1124	/* EE and EO */
1125	for (k=0;k<8;k++)
1126	{
1127	EE[k] = E[k] + E[15-k];
1128	EO[k] = E[k] - E[15-k];
1129	}
1130	/* EEE and EEO */
1131	for (k=0;k<4;k++)
1132	{
1133	EEE[k] = EE[k] + EE[7-k];
1134	EEO[k] = EE[k] - EE[7-k];
1135	}
1136	/* EEEE and EEEO */
1137	EEEE[0] = EEE[0] + EEE[3];
1138	EEEO[0] = EEE[0] - EEE[3];
1139	EEEE[1] = EEE[1] + EEE[2];
1140	EEEO[1] = EEE[1] - EEE[2];
1141
1142	dst[ 0][j] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
1143	dst[16][j] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]EEEE[1] + add)>>shift;
1144	dst[ 8][j] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]EEEO[1] + add)>>shift;
1145	dst[24][j] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]EEEO[1] + add)>>shift;
1146	for (k=4;k<32;k+=8)
1147	{
1148	dst[k][j] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]EEO[3] + add)>>shift;
1149	}
1150	for (k=2;k<32;k+=4)
1151	{
1152	dst[k][j] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]EO[3] +
1153	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
1154	}
1155	for (k=1;k<32;k+=2)
1156	{
1157	dst[k][j] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]O[ 3] +
1158	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
1159	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
1160	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
1161	}
1162	}
1163	}
1164	#endif
1165
1166	void partialButterfly32(short src,short dst,int shift, int line)
1167	{
1168	int j,k;
1169	int E[16],O[16];
1170	int EE[8],EO[8];
1171	int EEE[4],EEO[4];
1172	int EEEE[2],EEEO[2];
1173	int add = 1<<(shift-1);
1174
1175	for (j=0; j<line; j++)
1176	{
1177	/* E and O*/
1178	for (k=0;k<16;k++)
1179	{
1180	E[k] = src[k] + src[31-k];
1181	O[k] = src[k] - src[31-k];
1182	}
1183	/* EE and EO */
1184	for (k=0;k<8;k++)
1185	{
1186	EE[k] = E[k] + E[15-k];
1187	EO[k] = E[k] - E[15-k];
1188	}
1189	/* EEE and EEO */
1190	for (k=0;k<4;k++)
1191	{
1192	EEE[k] = EE[k] + EE[7-k];
1193	EEO[k] = EE[k] - EE[7-k];
1194	}
1195	/* EEEE and EEEO */
1196	EEEE[0] = EEE[0] + EEE[3];
1197	EEEO[0] = EEE[0] - EEE[3];
1198	EEEE[1] = EEE[1] + EEE[2];
1199	EEEO[1] = EEE[1] - EEE[2];
1200
1201	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
1202	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
1203	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
1204	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
1205	for (k=4;k<32;k+=8)
1206	{
1207	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
1208	}
1209	for (k=2;k<32;k+=4)
1210	{
1211	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
1212	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
1213	}
1214	for (k=1;k<32;k+=2)
1215	{
1216	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
1217	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
1218	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
1219	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
1220	}
1221	src += 32;
1222	dst ++;
1223	}
1224	}
1225
1226	#if !UNIFIED_TRANSFORM
1227	/** 32x32 forward transform (2D)
1228	* \param block input data (residual)
1229	* \param coeff output data (transform coefficients)
1230	*/
1231	void xTr32(short block[32][32],short coeff[32][32])
1232	{
1233	#if FULL_NBIT
1234	int shift_1st = 4 + g_uiBitDepth - 8; // log2(32) - 1 + g_uiBitDepth - 8
1235	#else
1236	int shift_1st = 4 + g_uiBitIncrement; // log2(32) - 1 + g_uiBitIncrement
1237	#endif
1238	int shift_2nd = 11; // log2(32) + 6
1239	short tmp[32][32];
1240
1241	partialButterfly32(block,tmp,shift_1st);
1242	partialButterfly32(tmp,coeff,shift_2nd);
1243	}
1244
1245	/** 32x32 inverse transform implemented using partial butterfly structure (1D)
1246	* \param src input data (transform coefficients)
1247	* \param dst output data (residual)
1248	* \param shift specifies right shift after 1D transform
1249	*/
1250	void partialButterflyInverse32(short src[32][32],short dst[32][32],int shift)
1251	{
1252	int j,k;
1253	int E[16],O[16];
1254	int EE[8],EO[8];
1255	int EEE[4],EEO[4];
1256	int EEEE[2],EEEO[2];
1257	int add = 1<<(shift-1);
1258
1259	for (j=0; j<32; j++)
1260	{
1261	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
1262	for (k=0;k<16;k++)
1263	{
1264	O[k] = g_aiT32[ 1][k]src[ 1][j] + g_aiT32[ 3][k]src[ 3][j] + g_aiT32[ 5][k]src[ 5][j] + g_aiT32[ 7][k]src[ 7][j] +
1265	g_aiT32[ 9][k]src[ 9][j] + g_aiT32[11][k]src[11][j] + g_aiT32[13][k]src[13][j] + g_aiT32[15][k]src[15][j] +
1266	g_aiT32[17][k]src[17][j] + g_aiT32[19][k]src[19][j] + g_aiT32[21][k]src[21][j] + g_aiT32[23][k]src[23][j] +
1267	g_aiT32[25][k]src[25][j] + g_aiT32[27][k]src[27][j] + g_aiT32[29][k]src[29][j] + g_aiT32[31][k]src[31][j];
1268	}
1269	for (k=0;k<8;k++)
1270	{
1271	EO[k] = g_aiT32[ 2][k]src[ 2][j] + g_aiT32[ 6][k]src[ 6][j] + g_aiT32[10][k]src[10][j] + g_aiT32[14][k]src[14][j] +
1272	g_aiT32[18][k]src[18][j] + g_aiT32[22][k]src[22][j] + g_aiT32[26][k]src[26][j] + g_aiT32[30][k]src[30][j];
1273	}
1274	for (k=0;k<4;k++)
1275	{
1276	EEO[k] = g_aiT32[4][k]src[4][j] + g_aiT32[12][k]src[12][j] + g_aiT32[20][k]src[20][j] + g_aiT32[28][k]src[28][j];
1277	}
1278	EEEO[0] = g_aiT32[8][0]src[8][j] + g_aiT32[24][0]src[24][j];
1279	EEEO[1] = g_aiT32[8][1]src[8][j] + g_aiT32[24][1]src[24][j];
1280	EEEE[0] = g_aiT32[0][0]src[0][j] + g_aiT32[16][0]src[16][j];
1281	EEEE[1] = g_aiT32[0][1]src[0][j] + g_aiT32[16][1]src[16][j];
1282
1283	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
1284	EEE[0] = EEEE[0] + EEEO[0];
1285	EEE[3] = EEEE[0] - EEEO[0];
1286	EEE[1] = EEEE[1] + EEEO[1];
1287	EEE[2] = EEEE[1] - EEEO[1];
1288	for (k=0;k<4;k++)
1289	{
1290	EE[k] = EEE[k] + EEO[k];
1291	EE[k+4] = EEE[3-k] - EEO[3-k];
1292	}
1293	for (k=0;k<8;k++)
1294	{
1295	E[k] = EE[k] + EO[k];
1296	E[k+8] = EE[7-k] - EO[7-k];
1297	}
1298	for (k=0;k<16;k++)
1299	{
1300	dst[j][k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
1301	dst[j][k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
1302	}
1303	}
1304	}
1305	#endif
1306
1307	void partialButterflyInverse32(short src,short dst,int shift, int line)
1308	{
1309	int j,k;
1310	int E[16],O[16];
1311	int EE[8],EO[8];
1312	int EEE[4],EEO[4];
1313	int EEEE[2],EEEO[2];
1314	int add = 1<<(shift-1);
1315
1316	for (j=0; j<line; j++)
1317	{
1318	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
1319	for (k=0;k<16;k++)
1320	{
1321	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
1322	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
1323	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
1324	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
1325	}
1326	for (k=0;k<8;k++)
1327	{
1328	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
1329	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
1330	}
1331	for (k=0;k<4;k++)
1332	{
1333	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
1334	}
1335	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
1336	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
1337	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
1338	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
1339
1340	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
1341	EEE[0] = EEEE[0] + EEEO[0];
1342	EEE[3] = EEEE[0] - EEEO[0];
1343	EEE[1] = EEEE[1] + EEEO[1];
1344	EEE[2] = EEEE[1] - EEEO[1];
1345	for (k=0;k<4;k++)
1346	{
1347	EE[k] = EEE[k] + EEO[k];
1348	EE[k+4] = EEE[3-k] - EEO[3-k];
1349	}
1350	for (k=0;k<8;k++)
1351	{
1352	E[k] = EE[k] + EO[k];
1353	E[k+8] = EE[7-k] - EO[7-k];
1354	}
1355	for (k=0;k<16;k++)
1356	{
1357	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
1358	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
1359	}
1360	src ++;
1361	dst += 32;
1362	}
1363	}
1364
1365	#if !UNIFIED_TRANSFORM
1366	/** 32x32 inverse transform (2D)
1367	* \param coeff input data (transform coefficients)
1368	* \param block output data (residual)
1369	*/
1370	void xITr32(short coeff[32][32],short block[32][32])
1371	{
1372	int shift_1st = SHIFT_INV_1ST;
1373	#if FULL_NBIT
1374	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
1375	#else
1376	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
1377	#endif
1378	short tmp[32][32];
1379
1380	partialButterflyInverse32(coeff,tmp,shift_1st);
1381	partialButterflyInverse32(tmp,block,shift_2nd);
1382	}
1383	#endif
1384
1385	/** MxN forward transform (2D)
1386	* \param block input data (residual)
1387	* \param coeff output data (transform coefficients)
1388	* \param iWidth input data (width of transform)
1389	* \param iHeight input data (height of transform)
1390	*/
1391	#if UNIFIED_TRANSFORM
1392	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
1393	#else
1394	void xTrMxN(short block,short coeff, int iWidth, int iHeight)
1395	#endif
1396	{
1397	#if FULL_NBIT
1398	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
1399	#else
1400	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
1401	#endif
1402	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
1403
1404	short tmp[ 64 * 64 ];
1405
1406	if( iWidth == 16 && iHeight == 4)
1407	{
1408	partialButterfly16( block, tmp, shift_1st, iHeight );
1409	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
1410	}
1411	else if( iWidth == 32 && iHeight == 8 )
1412	{
1413	partialButterfly32( block, tmp, shift_1st, iHeight );
1414	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
1415	}
1416	else if( iWidth == 4 && iHeight == 16)
1417	{
1418	partialButterfly4( block, tmp, shift_1st, iHeight );
1419	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
1420	}
1421	else if( iWidth == 8 && iHeight == 32 )
1422	{
1423	partialButterfly8( block, tmp, shift_1st, iHeight );
1424	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
1425	}
1426	#if UNIFIED_TRANSFORM
1427	else if( iWidth == 4 && iHeight == 4)
1428	{
1429	#if LOGI_INTRA_NAME_3MPM
1430	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
1431	#else
1432	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])// Check for DCT or DST
1433	#endif
1434	{
1435	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
1436	}
1437	else
1438	{
1439	partialButterfly4(block, tmp, shift_1st, iHeight);
1440	}
1441	#if LOGI_INTRA_NAME_3MPM
1442	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
1443	#else
1444	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
1445	#endif
1446	{
1447	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
1448	}
1449	else
1450	{
1451	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
1452	}
1453	}
1454	else if( iWidth == 8 && iHeight == 8)
1455	{
1456	partialButterfly8( block, tmp, shift_1st, iHeight );
1457	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
1458	}
1459	else if( iWidth == 16 && iHeight == 16)
1460	{
1461	partialButterfly16( block, tmp, shift_1st, iHeight );
1462	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
1463	}
1464	else if( iWidth == 32 && iHeight == 32)
1465	{
1466	partialButterfly32( block, tmp, shift_1st, iHeight );
1467	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
1468	}
1469	#endif
1470	}
1471	/** MxN inverse transform (2D)
1472	* \param coeff input data (transform coefficients)
1473	* \param block output data (residual)
1474	* \param iWidth input data (width of transform)
1475	* \param iHeight input data (height of transform)
1476	*/
1477	#if UNIFIED_TRANSFORM
1478	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
1479	#else
1480	void xITrMxN(short coeff,short block, int iWidth, int iHeight)
1481	#endif
1482	{
1483	int shift_1st = SHIFT_INV_1ST;
1484	#if FULL_NBIT
1485	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
1486	#else
1487	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
1488	#endif
1489
1490	short tmp[ 64*64];
1491	if( iWidth == 16 && iHeight == 4)
1492	{
1493	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
1494	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
1495	}
1496	else if( iWidth == 32 && iHeight == 8)
1497	{
1498	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
1499	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
1500	}
1501	else if( iWidth == 4 && iHeight == 16)
1502	{
1503	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
1504	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
1505	}
1506	else if( iWidth == 8 && iHeight == 32)
1507	{
1508	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
1509	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
1510	}
1511	#if UNIFIED_TRANSFORM
1512	else if( iWidth == 4 && iHeight == 4)
1513	{
1514	#if LOGI_INTRA_NAME_3MPM
1515	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
1516	#else
1517	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
1518	#endif
1519	{
1520	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
1521	}
1522	else
1523	{
1524	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
1525	}
1526	#if LOGI_INTRA_NAME_3MPM
1527	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
1528	#else
1529	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
1530	#endif
1531	{
1532	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
1533	}
1534	else
1535	{
1536	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
1537	}
1538	}
1539	else if( iWidth == 8 && iHeight == 8)
1540	{
1541	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
1542	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
1543	}
1544	else if( iWidth == 16 && iHeight == 16)
1545	{
1546	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
1547	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
1548	}
1549	else if( iWidth == 32 && iHeight == 32)
1550	{
1551	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
1552	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
1553	}
1554	#endif
1555	}
1556
1557	#endif //MATRIX_MULT
1558
1559	#if MULTIBITS_DATA_HIDING
1560	// To minimize the distortion only. No rate is considered.
1561	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
1562	{
1563	Int tsig = pcCU->getSlice()->getPPS()->getTSIG() ;
1564	Int lastCG = -1;
1565	Int absSum = 0 ;
1566	Int n ;
1567
1568	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
1569	{
1570	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
1571	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
1572	absSum = 0 ;
1573
1574	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
1575	{
1576	if( pQCoef[ scan[ n + subPos ]] )
1577	{
1578	lastNZPosInCG = n;
1579	break;
1580	}
1581	}
1582
1583	for(n = 0; n <SCAN_SET_SIZE; n++ )
1584	{
1585	if( pQCoef[ scan[ n + subPos ]] )
1586	{
1587	firstNZPosInCG = n;
1588	break;
1589	}
1590	}
1591
1592	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
1593	{
1594	absSum += pQCoef[ scan[ n + subPos ]];
1595	}
1596
1597	if(lastNZPosInCG>=0 && lastCG==-1)
1598	{
1599	lastCG = 1 ;
1600	}
1601
1602	if( lastNZPosInCG-firstNZPosInCG>=tsig )
1603	{
1604	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
1605	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1606	{
1607	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
1608
1609	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
1610	{
1611	UInt blkPos = scan[ n+subPos ];
1612	if(pQCoef[ blkPos ] != 0 )
1613	{
1614	if(deltaU[blkPos]>0)
1615	{
1616	curCost = - deltaU[blkPos];
1617	curChange=1 ;
1618	}
1619	else
1620	{
1621	//curChange =-1;
1622	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1623	{
1624	curCost=MAX_INT ;
1625	}
1626	else
1627	{
1628	curCost = deltaU[blkPos];
1629	curChange =-1;
1630	}
1631	}
1632	}
1633	else
1634	{
1635	if(n<firstNZPosInCG)
1636	{
1637	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1638	if(thisSignBit != signbit )
1639	{
1640	curCost = MAX_INT;
1641	}
1642	else
1643	{
1644	curCost = - (deltaU[blkPos]) ;
1645	curChange = 1 ;
1646	}
1647	}
1648	else
1649	{
1650	curCost = - (deltaU[blkPos]) ;
1651	curChange = 1 ;
1652	}
1653	}
1654
1655	if( curCost<minCostInc)
1656	{
1657	minCostInc = curCost ;
1658	finalChange = curChange ;
1659	minPos = blkPos ;
1660	}
1661	} //CG loop
1662
1663	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
1664	{
1665	finalChange = -1;
1666	}
1667
1668	if(pCoef[minPos]>=0)
1669	{
1670	pQCoef[minPos] += finalChange ;
1671	}
1672	else
1673	{
1674	pQCoef[minPos] -= finalChange ;
1675	}
1676	} // Hide
1677	}
1678	if(lastCG==1)
1679	{
1680	lastCG=0 ;
1681	}
1682	} // TU loop
1683
1684	return;
1685	}
1686	#endif
1687
1688	Void TComTrQuant::xQuant( TComDataCU* pcCU,
1689	Int* pSrc,
1690	TCoeff* pDes,
1691	#if ADAPTIVE_QP_SELECTION
1692	Int*& pArlDes,
1693	#endif
1694	Int iWidth,
1695	Int iHeight,
1696	UInt& uiAcSum,
1697	TextType eTType,
1698	UInt uiAbsPartIdx )
1699	{
1700	Int* piCoef = pSrc;
1701	TCoeff* piQCoef = pDes;
1702	#if ADAPTIVE_QP_SELECTION
1703	Int* piArlCCoef = pArlDes;
1704	#endif
1705	Int iAdd = 0;
1706
1707	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) )
1708	{
1709	#if ADAPTIVE_QP_SELECTION
1710	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1711	#else
1712	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
1713	#endif
1714	}
1715	else
1716	{
1717	#if MULTIBITS_DATA_HIDING
1718	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
1719
1720	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
1721	if (scanIdx == SCAN_ZIGZAG)
1722	{
1723	scanIdx = SCAN_DIAG;
1724	}
1725
1726	if (iWidth != iHeight)
1727	{
1728	scanIdx = SCAN_DIAG;
1729	}
1730
1731	const UInt * scan;
1732	if (iWidth == iHeight)
1733	{
1734	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
1735	}
1736	else
1737	{
1738	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
1739	}
1740
1741	Int deltaU[32*32] ;
1742	#endif
1743
1744	#if ADAPTIVE_QP_SELECTION
1745	QpParam cQpBase;
1746	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
1747
1748	#if H0736_AVC_STYLE_QP_RANGE
1749	Int qpScaled;
1750	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
1751
1752	if(eTType == TEXT_LUMA)
1753	{
1754	qpScaled = iQpBase + qpBDOffset;
1755	}
1756	else
1757	{
1758	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
1759
1760	if(qpScaled < 0)
1761	{
1762	qpScaled = qpScaled + qpBDOffset;
1763	}
1764	else
1765	{
1766	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
1767	}
1768	}
1769	cQpBase.setQpParam(qpScaled, false, pcCU->getSlice()->getSliceType());
1770	#else
1771	if(eTType != TEXT_LUMA)
1772	{
1773	iQpBase = g_aucChromaScale[iQpBase];
1774	}
1775	cQpBase.setQpParam(iQpBase, false, pcCU->getSlice()->getSliceType());
1776	#endif
1777	#endif
1778
1779	Bool bNonSqureFlag = ( iWidth != iHeight );
1780	UInt dir = SCALING_LIST_SQT;
1781	if( bNonSqureFlag )
1782	{
1783	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1784	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1785	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1786	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1787	iHeight = iWidth;
1788	}
1789
1790	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1791	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
1792	assert(scalingListType < 6);
1793	Int *piQuantCoeff = 0;
1794	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
1795
1796	#if FULL_NBIT
1797	UInt uiBitDepth = g_uiBitDepth;
1798	#else
1799	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1800	#endif
1801	UInt iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
1802	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
1803
1804	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1805
1806	#if ADAPTIVE_QP_SELECTION
1807	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
1808	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1809	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
1810	Int iAddC = 1 << (iQBitsC-1);
1811	#endif
1812
1813	#if MULTIBITS_DATA_HIDING
1814	Int qBits8 = iQBits-8;
1815	#endif
1816	for( Int n = 0; n < iWidth*iHeight; n++ )
1817	{
1818	Int iLevel;
1819	Int iSign;
1820	UInt uiBlockPos = n;
1821	iLevel = piCoef[uiBlockPos];
1822	iSign = (iLevel < 0 ? -1: 1);
1823
1824	#if ADAPTIVE_QP_SELECTION
1825	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
1826	if( m_bUseAdaptQpSelect )
1827	{
1828	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
1829	}
1830	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
1831	#if MULTIBITS_DATA_HIDING
1832	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
1833	#endif
1834	#else
1835	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
1836	#if MULTIBITS_DATA_HIDING
1837	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
1838	#endif
1839	#endif
1840	uiAcSum += iLevel;
1841	iLevel *= iSign;
1842	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
1843	} // for n
1844	#if MULTIBITS_DATA_HIDING
1845	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1846	{
1847	if(uiAcSum>=2)
1848	{
1849	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
1850	}
1851	}
1852	#endif
1853	} //if RDOQ
1854	//return;
1855
1856	}
1857
1858	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
1859	{
1860
1861	const TCoeff* piQCoef = pSrc;
1862	Int* piCoef = pDes;
1863	UInt dir = SCALING_LIST_SQT;
1864	if( iWidth != iHeight )
1865	{
1866	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
1867	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
1868	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
1869	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
1870	iHeight = iWidth;
1871	}
1872
1873	if ( iWidth > (Int)m_uiMaxTrSize )
1874	{
1875	iWidth = m_uiMaxTrSize;
1876	iHeight = m_uiMaxTrSize;
1877	}
1878
1879	Int iShift,iAdd,iCoeffQ;
1880	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
1881
1882	#if FULL_NBIT
1883	UInt uiBitDepth = g_uiBitDepth;
1884	#else
1885	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
1886	#endif
1887	UInt iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
1888	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
1889
1890	#if DEQUANT_CLIPPING
1891	TCoeff clipQCoef;
1892	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
1893	const Int levelLimit = 1 << bitRange;
1894	#endif
1895
1896	if(getUseScalingList())
1897	{
1898	iShift += 4;
1899	if(iShift > m_cQP.m_iPer)
1900	{
1901	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
1902	}
1903	else
1904	{
1905	iAdd = 0;
1906	}
1907	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
1908
1909	if(iShift > m_cQP.m_iPer)
1910	{
1911	for( Int n = 0; n < iWidth*iHeight; n++ )
1912	{
1913	#if DEQUANT_CLIPPING
1914	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1915	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1916	#else
1917	iCoeffQ = ((piQCoef[n] * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
1918	#endif
1919	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1920	}
1921	}
1922	else
1923	{
1924	for( Int n = 0; n < iWidth*iHeight; n++ )
1925	{
1926	#if DEQUANT_CLIPPING
1927	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
1928	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1929	#else
1930	iCoeffQ = (piQCoef[n] * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
1931	#endif
1932	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1933	}
1934	}
1935	}
1936	else
1937	{
1938	iAdd = 1 << (iShift-1);
1939	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
1940
1941	for( Int n = 0; n < iWidth*iHeight; n++ )
1942	{
1943	#if DEQUANT_CLIPPING
1944	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
1945	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
1946	#else
1947	iCoeffQ = ( piQCoef[n] * scale + iAdd ) >> iShift;
1948	#endif
1949	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
1950	}
1951	}
1952	}
1953
1954	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
1955	Bool bUseRDOQ, Bool bEnc
1956	#if ADAPTIVE_QP_SELECTION
1957	, Bool bUseAdaptQpSelect
1958	#endif
1959	)
1960	{
1961	m_uiMaxTrSize = uiMaxTrSize;
1962	m_bEnc = bEnc;
1963	m_bUseRDOQ = bUseRDOQ;
1964	#if ADAPTIVE_QP_SELECTION
1965	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1966	#endif
1967	}
1968
1969	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
1970	Pel* pcResidual,
1971	UInt uiStride,
1972	TCoeff* rpcCoeff,
1973	#if ADAPTIVE_QP_SELECTION
1974	Int*& rpcArlCoeff,
1975	#endif
1976	UInt uiWidth,
1977	UInt uiHeight,
1978	UInt& uiAbsSum,
1979	TextType eTType,
1980	UInt uiAbsPartIdx )
1981	{
1982	#if LOSSLESS_CODING
1983	if((m_cQP.qp() == 0) && (pcCU->getSlice()->getSPS()->getUseLossless()))
1984	{
1985	uiAbsSum=0;
1986	for (UInt k = 0; k<uiHeight; k++)
1987	{
1988	for (UInt j = 0; j<uiWidth; j++)
1989	{
1990	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
1991	uiAbsSum += abs(pcResidual[k*uiStride+j]);
1992	}
1993	}
1994	return;
1995	}
1996	#endif
1997	UInt uiMode; //luma intra pred
1998	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
1999	{
2000	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
2001	}
2002	else
2003	{
2004	uiMode = REG_DCT;
2005	}
2006
2007	uiAbsSum = 0;
2008	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
2009
2010	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
2011	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
2012	#if ADAPTIVE_QP_SELECTION
2013	rpcArlCoeff,
2014	#endif
2015	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
2016	}
2017
2018	#if LOSSLESS_CODING
2019	Void TComTrQuant::invtransformNxN( TComDataCU* pcCU, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType)
2020	#else
2021	Void TComTrQuant::invtransformNxN( TextType eText, UInt uiMode,Pel& rpcResidual, UInt uiStride, TCoeff pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType)
2022	#endif
2023	{
2024	#if LOSSLESS_CODING
2025	if((m_cQP.qp() == 0) && (pcCU->getSlice()->getSPS()->getUseLossless()))
2026	{
2027	for (UInt k = 0; k<uiHeight; k++)
2028	{
2029	for (UInt j = 0; j<uiWidth; j++)
2030	{
2031	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
2032	}
2033	}
2034	return;
2035	}
2036	#endif
2037	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
2038	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
2039	}
2040
2041	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
2042	{
2043	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
2044	{
2045	return;
2046	}
2047
2048	UInt uiLumaTrMode, uiChromaTrMode;
2049	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
2050	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
2051
2052	if( uiTrMode == uiStopTrMode )
2053	{
2054	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
2055	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
2056	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
2057	{
2058	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
2059	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
2060	{
2061	return;
2062	}
2063	uiWidth <<= 1;
2064	uiHeight <<= 1;
2065	}
2066	Pel* pResi = rpcResidual + uiAddr;
2067	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
2068	{
2069	Int trWidth = uiWidth;
2070	Int trHeight = uiHeight;
2071	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
2072
2073	uiWidth = trWidth;
2074	uiHeight = trHeight;
2075	}
2076	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
2077	assert(scalingListType < 6);
2078	#if LOSSLESS_CODING
2079	invtransformNxN( pcCU, eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
2080	#else
2081	invtransformNxN( eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
2082	#endif
2083	}
2084	else
2085	{
2086	uiTrMode++;
2087	uiWidth >>= 1;
2088	uiHeight >>= 1;
2089	Int trWidth = uiWidth, trHeight = uiHeight;
2090	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
2091	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
2092	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
2093	UInt uiAddrOffset = trHeight * uiStride;
2094	UInt uiCoefOffset = trWidth * trHeight;
2095	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
2096	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
2097	if( uiInterTUSplitDirection != 2 )
2098	{
2099	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
2100	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
2101	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
2102	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
2103	}
2104	else
2105	{
2106	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
2107	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
2108	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
2109	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
2110	}
2111	}
2112	}
2113
2114	// ------------------------------------------------------------------------------------------------
2115	// Logical transform
2116	// ------------------------------------------------------------------------------------------------
2117
2118	/** Wrapper function between HM interface and core NxN forward transform (2D)
2119	* \param piBlkResi input data (residual)
2120	* \param psCoeff output data (transform coefficients)
2121	* \param uiStride stride of input residual data
2122	* \param iSize transform size (iSize x iSize)
2123	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
2124	*/
2125	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
2126	{
2127	#if MATRIX_MULT
2128	Int iSize = iWidth;
2129	if( iWidth != iHeight)
2130	{
2131	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
2132	return;
2133	}
2134	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
2135	#else
2136	#if UNIFIED_TRANSFORM
2137	Int j;
2138	#else
2139	Int iSize = iWidth;
2140	if( iWidth != iHeight)
2141	#endif
2142	{
2143	short block[ 64 * 64 ];
2144	short coeff[ 64 * 64 ];
2145	{
2146	for (j = 0; j < iHeight; j++)
2147	{
2148	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
2149	}
2150	}
2151	#if UNIFIED_TRANSFORM
2152	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
2153	#else
2154	xTrMxN( block, coeff, iWidth, iHeight );
2155	#endif
2156	for ( j = 0; j < iHeight * iWidth; j++ )
2157	{
2158	psCoeff[ j ] = coeff[ j ];
2159	}
2160	return ;
2161	}
2162	#if !UNIFIED_TRANSFORM
2163	if (iSize==4)
2164	{
2165	short block[4][4];
2166	short coeff[4][4];
2167	for (j=0; j<4; j++)
2168	{
2169	memcpy(block[j],piBlkResi+juiStride,4sizeof(short));
2170	}
2171	xTr4(block,coeff,uiMode);
2172	for (j=0; j<4; j++)
2173	{
2174	for (k=0; k<4; k++)
2175	{
2176	psCoeff[j*4+k] = coeff[j][k];
2177	}
2178	}
2179	}
2180	else if (iSize==8)
2181	{
2182	short block[8][8];
2183	short coeff[8][8];
2184
2185	for (j=0; j<8; j++)
2186	{
2187	memcpy(block[j],piBlkResi+juiStride,8sizeof(short));
2188	}
2189
2190	xTr8(block,coeff);
2191	for (j=0; j<8; j++)
2192	{
2193	for (k=0; k<8; k++)
2194	{
2195	psCoeff[j*8+k] = coeff[j][k];
2196	}
2197	}
2198	}
2199	else if (iSize==16)
2200	{
2201	short block[16][16];
2202	short coeff[16][16];
2203
2204	for (j=0; j<16; j++)
2205	{
2206	memcpy(block[j],piBlkResi+juiStride,16sizeof(short));
2207	}
2208	xTr16(block,coeff);
2209	for (j=0; j<16; j++)
2210	{
2211	for (k=0; k<16; k++)
2212	{
2213	psCoeff[j*16+k] = coeff[j][k];
2214	}
2215	}
2216	}
2217	else if (iSize==32)
2218	{
2219	short block[32][32];
2220	short coeff[32][32];
2221
2222	for (j=0; j<32; j++)
2223	{
2224	memcpy(block[j],piBlkResi+juiStride,32sizeof(short));
2225	}
2226	xTr32(block,coeff);
2227	for (j=0; j<32; j++)
2228	{
2229	for (k=0; k<32; k++)
2230	{
2231	psCoeff[j*32+k] = coeff[j][k];
2232	}
2233	}
2234	}
2235	#endif
2236	#endif
2237	}
2238
2239	/** Wrapper function between HM interface and core NxN inverse transform (2D)
2240	* \param plCoef input data (transform coefficients)
2241	* \param pResidual output data (residual)
2242	* \param uiStride stride of input residual data
2243	* \param iSize transform size (iSize x iSize)
2244	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
2245	*/
2246	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
2247	{
2248	#if MATRIX_MULT
2249	Int iSize = iWidth;
2250	if( iWidth != iHeight )
2251	{
2252	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
2253	return;
2254	}
2255	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
2256	#else
2257	#if UNIFIED_TRANSFORM
2258	Int j;
2259	#else
2260	Int j,k;
2261	Int iSize = iWidth;
2262	if( iWidth != iHeight )
2263	#endif
2264	{
2265	short block[ 64 * 64 ];
2266	short coeff[ 64 * 64 ];
2267	for ( j = 0; j < iHeight * iWidth; j++ )
2268	{
2269	coeff[j] = (short)plCoef[j];
2270	}
2271	#if UNIFIED_TRANSFORM
2272	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
2273	#else
2274	xITrMxN( coeff, block, iWidth, iHeight );
2275	#endif
2276	{
2277	for ( j = 0; j < iHeight; j++ )
2278	{
2279	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
2280	}
2281	}
2282	return ;
2283	}
2284	#if !UNIFIED_TRANSFORM
2285	if (iSize==4)
2286	{
2287	short block[4][4];
2288	short coeff[4][4];
2289
2290	for (j=0; j<4; j++)
2291	{
2292	for (k=0; k<4; k++)
2293	{
2294	coeff[j][k] = (short)plCoef[j*4+k];
2295	}
2296	}
2297	xITr4(coeff,block,uiMode);
2298	for (j=0; j<4; j++)
2299	{
2300	memcpy(pResidual+juiStride,block[j],4sizeof(short));
2301	}
2302	}
2303	else if (iSize==8)
2304	{
2305	short block[8][8];
2306	short coeff[8][8];
2307
2308	for (j=0; j<8; j++)
2309	{
2310	for (k=0; k<8; k++)
2311	{
2312	coeff[j][k] = (short)plCoef[j*8+k];
2313	}
2314	}
2315	xITr8(coeff,block);
2316	for (j=0; j<8; j++)
2317	{
2318	memcpy(pResidual+juiStride,block[j],8sizeof(short));
2319	}
2320	}
2321	else if (iSize==16)
2322	{
2323	short block[16][16];
2324	short coeff[16][16];
2325
2326	for (j=0; j<16; j++)
2327	{
2328	for (k=0; k<16; k++)
2329	{
2330	coeff[j][k] = (short)plCoef[j*16+k];
2331	}
2332	}
2333	xITr16(coeff,block);
2334	for (j=0; j<16; j++)
2335	{
2336	memcpy(pResidual+juiStride,block[j],16sizeof(short));
2337	}
2338	}
2339
2340	else if (iSize==32)
2341	{
2342	short block[32][32];
2343	short coeff[32][32];
2344
2345	for (j=0; j<32; j++)
2346	{
2347	for (k=0; k<32; k++)
2348	{
2349	coeff[j][k] = (short)plCoef[j*32+k];
2350	}
2351	}
2352	xITr32(coeff,block);
2353	for (j=0; j<32; j++)
2354	{
2355	memcpy(pResidual+juiStride,block[j],32sizeof(short));
2356	}
2357	}
2358	#endif
2359	#endif
2360	}
2361
2362	/** RDOQ with CABAC
2363	* \param pcCU pointer to coding unit structure
2364	* \param plSrcCoeff pointer to input buffer
2365	* \param piDstCoeff reference to pointer to output buffer
2366	* \param uiWidth block width
2367	* \param uiHeight block height
2368	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
2369	* \param eTType plane type / luminance or chrominance
2370	* \param uiAbsPartIdx absolute partition index
2371	* \returns Void
2372	* Rate distortion optimized quantization for entropy
2373	* coding engines using probability models like CABAC
2374	*/
2375	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
2376	Int* plSrcCoeff,
2377	TCoeff* piDstCoeff,
2378	#if ADAPTIVE_QP_SELECTION
2379	Int*& piArlDstCoeff,
2380	#endif
2381	UInt uiWidth,
2382	UInt uiHeight,
2383	UInt& uiAbsSum,
2384	TextType eTType,
2385	UInt uiAbsPartIdx )
2386	{
2387	Int iQBits = m_cQP.m_iBits;
2388	Double dTemp = 0;
2389
2390	UInt dir = SCALING_LIST_SQT;
2391	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
2392	Int uiQ = g_quantScales[m_cQP.rem()];
2393	if (uiWidth != uiHeight)
2394	{
2395	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
2396	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
2397	}
2398
2399	#if FULL_NBIT
2400	UInt uiBitDepth = g_uiBitDepth;
2401	#else
2402	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
2403	#endif
2404	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
2405	UInt uiGoRiceParam = 0;
2406	Double d64BlockUncodedCost = 0;
2407	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
2408	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
2409	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
2410	assert(scalingListType < 6);
2411
2412	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
2413	double dErrScale = 0;
2414	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
2415	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
2416	Int *piQCoef = piQCoefOrg;
2417	double *pdErrScale = pdErrScaleOrg;
2418	#if ADAPTIVE_QP_SELECTION
2419	Int iQBitsC = iQBits - ARL_C_PRECISION;
2420	Int iAddC = 1 << (iQBitsC-1);
2421	#endif
2422	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
2423	if (uiScanIdx == SCAN_ZIGZAG)
2424	{
2425	// Map value zigzag to diagonal scan
2426	uiScanIdx = SCAN_DIAG;
2427	}
2428	Int blockType = uiLog2BlkSize;
2429	if (uiWidth != uiHeight)
2430	{
2431	uiScanIdx = SCAN_DIAG;
2432	blockType = 4;
2433	}
2434
2435	#if ADAPTIVE_QP_SELECTION
2436	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
2437	#endif
2438
2439	Double pdCostCoeff [ 32 * 32 ];
2440	Double pdCostSig [ 32 * 32 ];
2441	Double pdCostCoeff0[ 32 * 32 ];
2442	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
2443	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
2444	#if MULTIBITS_DATA_HIDING
2445	Int rateIncUp [ 32 * 32 ];
2446	Int rateIncDown [ 32 * 32 ];
2447	Int sigRateDelta[ 32 * 32 ];
2448	Int deltaU [ 32 * 32 ];
2449	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
2450	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
2451	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
2452	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
2453	#endif
2454
2455	const UInt * scanCG;
2456	if (uiWidth == uiHeight)
2457	{
2458	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
2459	#if MULTILEVEL_SIGMAP_EXT
2460	if( uiLog2BlkSize == 3 )
2461	{
2462	scanCG = g_sigLastScan8x8[ uiScanIdx ];
2463	}
2464	else if( uiLog2BlkSize == 5 )
2465	{
2466	scanCG = g_sigLastScanCG32x32;
2467	}
2468	#endif
2469	}
2470	else
2471	{
2472	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
2473	}
2474	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
2475	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2476	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2477	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
2478	Int iCGLastScanPos = -1;
2479
2480	UInt uiCtxSet = 0;
2481	Int c1 = 1;
2482	Int c2 = 0;
2483	UInt uiNumOne = 0;
2484	Double d64BaseCost = 0;
2485	Int iLastScanPos = -1;
2486	dTemp = dErrScale;
2487
2488	#if RESTRICT_GR1GR2FLAG_NUMBER
2489	UInt c1Idx = 0;
2490	UInt c2Idx = 0;
2491	Int baseLevel;
2492	#endif
2493
2494	const UInt * scan;
2495	if (uiWidth == uiHeight)
2496	{
2497	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
2498	}
2499	else
2500	{
2501	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
2502	}
2503
2504	#if !MULTILEVEL_SIGMAP_EXT
2505	if (blockType < 4)
2506	{
2507	for( Int iScanPos = (Int) uiMaxNumCoeff-1; iScanPos >= 0; iScanPos-- )
2508	{
2509	//===== quantization =====
2510	UInt uiBlkPos = scan[iScanPos];
2511	// set coeff
2512	uiQ = piQCoef[uiBlkPos];
2513	dTemp = pdErrScale[uiBlkPos];
2514	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
2515	lLevelDouble = (Int)min<Int64>(((Int64)abs(lLevelDouble) * uiQ), MAX_INT-(1 << (iQBits - 1)));
2516	#if ADAPTIVE_QP_SELECTION
2517	if( m_bUseAdaptQpSelect )
2518	{
2519	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
2520	}
2521	#endif
2522	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
2523	uiMaxAbsLevel=plSrcCoeff[ uiBlkPos ]>=0 ? min<UInt>(uiMaxAbsLevel,32767): min<UInt>(uiMaxAbsLevel,32768);
2524	Double dErr = Double( lLevelDouble );
2525	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
2526	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2527	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2528
2529	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2530	{
2531	iLastScanPos = iScanPos;
2532	#if LEVEL_CTX_LUMA_RED
2533	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
2534	#else
2535	uiCtxSet = iScanPos < SCAN_SET_SIZE ? 0 : 3;
2536	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 3;
2537	#endif
2538	}
2539
2540	if ( iLastScanPos >= 0 )
2541	{
2542	//===== coefficient level estimation =====
2543	UInt uiLevel;
2544	UInt uiOneCtx = 4 * uiCtxSet + c1;
2545	#if RESTRICT_GR1GR2FLAG_NUMBER
2546	UInt uiAbsCtx = uiCtxSet + c2;
2547	#else
2548	UInt uiAbsCtx = 3 * uiCtxSet + c2;
2549	#endif
2550
2551	if( iScanPos == iLastScanPos )
2552	{
2553	#if RESTRICT_GR1GR2FLAG_NUMBER
2554	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, iQBits, dTemp, 1 );
2555	#else
2556	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam, iQBits, dTemp, 1 );
2557	#endif
2558	}
2559	else
2560	{
2561	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
2562	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
2563	UShort uiCtxSig = getSigCtxInc( piDstCoeff, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
2564	#if RESTRICT_GR1GR2FLAG_NUMBER
2565	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, iQBits, dTemp, 0 );
2566	#else
2567	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam, iQBits, dTemp, 0 );
2568	#endif
2569	#if MULTIBITS_DATA_HIDING
2570	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2571	#endif
2572	}
2573	#if MULTIBITS_DATA_HIDING
2574	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
2575	if( uiLevel > 0 )
2576	{
2577	#if RESTRICT_GR1GR2FLAG_NUMBER
2578	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
2579	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
2580	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
2581	#else
2582	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam );
2583	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam ) - rateNow;
2584	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam ) - rateNow;
2585	#endif
2586	}
2587	else // uiLevel == 0
2588	{
2589	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2590	}
2591	#endif
2592	piDstCoeff[ uiBlkPos ] = uiLevel;
2593	d64BaseCost += pdCostCoeff [ iScanPos ];
2594
2595	#if RESTRICT_GR1GR2FLAG_NUMBER
2596	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2597	if( uiLevel >= baseLevel )
2598	{
2599	#if EIGHT_BITS_RICE_CODE
2600	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - baseLevel, 23 ) ];
2601	#else
2602	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - baseLevel, 15 ) ];
2603	#endif
2604	}
2605	if ( uiLevel >= 1)
2606	{
2607	c1Idx ++;
2608	}
2609	#endif
2610
2611	//===== update bin model =====
2612	if( uiLevel > 1 )
2613	{
2614	c1 = 0;
2615	c2 += (c2 < 2);
2616	uiNumOne++;
2617	#if RESTRICT_GR1GR2FLAG_NUMBER
2618	c2Idx ++;
2619	#else
2620	if( uiLevel > 2 )
2621	{
2622	#if EIGHT_BITS_RICE_CODE
2623	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - 3, 23 ) ];
2624	#else
2625	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - 3, 15 ) ];
2626	#endif
2627	}
2628	#endif
2629	}
2630	else if( (c1 < 3) && (c1 > 0) && uiLevel)
2631	{
2632	c1++;
2633	}
2634
2635	//===== context set update =====
2636	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
2637	{
2638	c1 = 1;
2639	c2 = 0;
2640	uiGoRiceParam = 0;
2641
2642	#if RESTRICT_GR1GR2FLAG_NUMBER
2643	c1Idx = 0;
2644	c2Idx = 0;
2645	#endif
2646	#if LEVEL_CTX_LUMA_RED
2647	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
2648	#else
2649	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 3;
2650	#endif
2651	if( uiNumOne > 0 )
2652	{
2653	uiCtxSet++;
2654	#if !LEVEL_CTX_LUMA_RED
2655	if(uiNumOne > 3 && eTType==TEXT_LUMA)
2656	{
2657	uiCtxSet++;
2658	}
2659	#endif
2660	}
2661	uiNumOne >>= 1;
2662	}
2663	}
2664	else
2665	{
2666	d64BaseCost += pdCostCoeff0[ iScanPos ];
2667	}
2668	}
2669	}
2670	else //(uiLog2BlkSize > 3), for 16x16 and 32x32 TU
2671	{
2672	#endif
2673	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
2674	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
2675
2676	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2677	Int iScanPos;
2678	coeffGroupRDStats rdStats;
2679
2680	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2681	{
2682	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
2683	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
2684	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
2685	#if MULTILEVEL_SIGMAP_EXT
2686	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
2687	{
2688	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
2689	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
2690	}
2691	#endif
2692	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2693
2694	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2695	{
2696	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2697	//===== quantization =====
2698	UInt uiBlkPos = scan[iScanPos];
2699	// set coeff
2700	uiQ = piQCoef[uiBlkPos];
2701	dTemp = pdErrScale[uiBlkPos];
2702	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
2703	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
2704	#if ADAPTIVE_QP_SELECTION
2705	if( m_bUseAdaptQpSelect )
2706	{
2707	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
2708	}
2709	#endif
2710	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
2711
2712	Double dErr = Double( lLevelDouble );
2713	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
2714	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2715	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2716
2717	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2718	{
2719	iLastScanPos = iScanPos;
2720	#if LEVEL_CTX_LUMA_RED
2721	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
2722	#else
2723	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 3;
2724	#endif
2725	iCGLastScanPos = iCGScanPos;
2726	}
2727
2728	if ( iLastScanPos >= 0 )
2729	{
2730	//===== coefficient level estimation =====
2731	UInt uiLevel;
2732	UInt uiOneCtx = 4 * uiCtxSet + c1;
2733	#if RESTRICT_GR1GR2FLAG_NUMBER
2734	UInt uiAbsCtx = uiCtxSet + c2;
2735	#else
2736	UInt uiAbsCtx = 3 * uiCtxSet + c2;
2737	#endif
2738
2739	if( iScanPos == iLastScanPos )
2740	{
2741	#if RESTRICT_GR1GR2FLAG_NUMBER
2742	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2743	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2744	c1Idx, c2Idx, iQBits, dTemp, 1 );
2745	#else
2746	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2747	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2748	iQBits, dTemp, 1 );
2749	#endif
2750	}
2751	else
2752	{
2753	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
2754	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
2755	UShort uiCtxSig = getSigCtxInc( piDstCoeff, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
2756	#if RESTRICT_GR1GR2FLAG_NUMBER
2757	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2758	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2759	c1Idx, c2Idx, iQBits, dTemp, 0 );
2760	#else
2761	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2762	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2763	iQBits, dTemp, 0 );
2764	#endif
2765	#if MULTIBITS_DATA_HIDING
2766	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2767	#endif
2768	}
2769	#if MULTIBITS_DATA_HIDING
2770	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
2771	if( uiLevel > 0 )
2772	{
2773	#if RESTRICT_GR1GR2FLAG_NUMBER
2774	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
2775	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
2776	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
2777	#else
2778	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam );
2779	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam ) - rateNow;
2780	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam ) - rateNow;
2781	#endif
2782	}
2783	else // uiLevel == 0
2784	{
2785	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2786	}
2787	#endif
2788	piDstCoeff[ uiBlkPos ] = uiLevel;
2789	d64BaseCost += pdCostCoeff [ iScanPos ];
2790
2791
2792	#if RESTRICT_GR1GR2FLAG_NUMBER
2793	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2794	if( uiLevel >= baseLevel )
2795	{
2796	#if EIGHT_BITS_RICE_CODE
2797	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - baseLevel , 23 ) ];
2798	#else
2799	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - baseLevel, 15 ) ];
2800	#endif
2801	}
2802	if ( uiLevel >= 1)
2803	{
2804	c1Idx ++;
2805	}
2806	#endif
2807
2808	//===== update bin model =====
2809	if( uiLevel > 1 )
2810	{
2811	c1 = 0;
2812	c2 += (c2 < 2);
2813	uiNumOne++;
2814	#if RESTRICT_GR1GR2FLAG_NUMBER
2815	c2Idx ++;
2816	#else
2817	if( uiLevel > 2 )
2818	{
2819	#if EIGHT_BITS_RICE_CODE
2820	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - 3, 23 ) ];
2821	#else
2822	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - 3, 15 ) ];
2823	#endif
2824	}
2825	#endif
2826	}
2827	else if( (c1 < 3) && (c1 > 0) && uiLevel)
2828	{
2829	c1++;
2830	}
2831
2832	//===== context set update =====
2833	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
2834	{
2835	c1 = 1;
2836	c2 = 0;
2837	uiGoRiceParam = 0;
2838
2839	#if RESTRICT_GR1GR2FLAG_NUMBER
2840	c1Idx = 0;
2841	c2Idx = 0;
2842	#endif
2843	#if LEVEL_CTX_LUMA_RED
2844	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
2845	#else
2846	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 3;
2847	#endif
2848	if( uiNumOne > 0 )
2849	{
2850	uiCtxSet++;
2851	#if !LEVEL_CTX_LUMA_RED
2852	if( uiNumOne > 3 && eTType==TEXT_LUMA)
2853	{
2854	uiCtxSet++;
2855	}
2856	#endif
2857	}
2858	uiNumOne >>= 1;
2859	}
2860	}
2861	else
2862	{
2863	d64BaseCost += pdCostCoeff0[ iScanPos ];
2864	}
2865	rdStats.d64SigCost += pdCostSig[ iScanPos ];
2866	if (iScanPosinCG == 0 )
2867	{
2868	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2869	}
2870	if (piDstCoeff[ uiBlkPos ] )
2871	{
2872	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2873	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2874	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2875	if ( iScanPosinCG != 0 )
2876	{
2877	rdStats.iNNZbeforePos0++;
2878	}
2879	}
2880	} //end for (iScanPosinCG)
2881
2882	if (iCGLastScanPos >= 0)
2883	{
2884	#if REMOVE_INFER_SIGGRP
2885	if( iCGScanPos )
2886	#else
2887	#if MULTILEVEL_SIGMAP_EXT
2888	if ( !bothCGNeighboursOne( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight ) && (iCGScanPos != 0) )
2889	#else
2890	if ( !bothCGNeighboursOne( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight ) && (iCGScanPos != 0) )
2891	#endif
2892	#endif
2893	{
2894	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2895	{
2896	#if MULTILEVEL_SIGMAP_EXT
2897	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2898	#else
2899	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
2900	#endif
2901	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2902	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2903	}
2904	else
2905	{
2906	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2907	{
2908	if ( rdStats.iNNZbeforePos0 == 0 )
2909	{
2910	d64BaseCost -= rdStats.d64SigCost_0;
2911	rdStats.d64SigCost -= rdStats.d64SigCost_0;
2912	}
2913	// rd-cost if SigCoeffGroupFlag = 0, initialization
2914	Double d64CostZeroCG = d64BaseCost;
2915
2916	// add SigCoeffGroupFlag cost to total cost
2917	#if MULTILEVEL_SIGMAP_EXT
2918	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
2919	#else
2920	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
2921	#endif
2922	if (iCGScanPos < iCGLastScanPos)
2923	{
2924	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2925	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2926	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2927	}
2928
2929	// try to convert the current coeff group from non-zero to all-zero
2930	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2931	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2932	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2933
2934	// if we can save cost, change this block to all-zero block
2935	if ( d64CostZeroCG < d64BaseCost )
2936	{
2937	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2938	d64BaseCost = d64CostZeroCG;
2939	if (iCGScanPos < iCGLastScanPos)
2940	{
2941	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2942	}
2943	// reset coeffs to 0 in this block
2944	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2945	{
2946	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2947	UInt uiBlkPos = scan[ iScanPos ];
2948
2949	if (piDstCoeff[ uiBlkPos ])
2950	{
2951	piDstCoeff [ uiBlkPos ] = 0;
2952	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2953	pdCostSig [ iScanPos ] = 0;
2954	}
2955	}
2956	} // end if ( d64CostAllZeros < d64BaseCost )
2957	}
2958	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2959	}
2960	#if REMOVE_INFER_SIGGRP
2961	else
2962	{
2963	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2964	}
2965	#else
2966	else // if ( !bothCGNeighboursOne( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY ) && (uiCGScanPos != 0) && (uiSigCoeffGroupFlag[ uiCGBlkPos ] != 0) )
2967	{
2968	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2969	} // end if ( !bothCGNeighboursOne( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY ) && (uiCGScanPos != 0) && (uiSigCoeffGroupFlag[ uiCGBlkPos ] != 0) )
2970	#endif
2971	}
2972	} //end for (iCGScanPos)
2973	#if !MULTILEVEL_SIGMAP_EXT
2974	}
2975	#endif
2976
2977	//===== estimate last position =====
2978	if ( iLastScanPos < 0 )
2979	{
2980	return;
2981	}
2982
2983	Double d64BestCost = 0;
2984	Int ui16CtxCbf = 0;
2985	Int iBestLastIdxP1 = 0;
2986	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2987	{
2988	ui16CtxCbf = 0;
2989	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2990	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2991	}
2992	else
2993	{
2994	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
2995	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
2996	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2997	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2998	}
2999
3000	#if !MULTILEVEL_SIGMAP_EXT
3001	if (blockType < 4)
3002	{
3003	for( Int iScanPos = iLastScanPos; iScanPos >= 0; iScanPos-- )
3004	{
3005	UInt uiBlkPos = scan[iScanPos];
3006	if( piDstCoeff[ uiBlkPos ] )
3007	{
3008	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
3009	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
3010	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
3011	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
3012	if( totalCost < d64BestCost )
3013	{
3014	iBestLastIdxP1 = iScanPos + 1;
3015	d64BestCost = totalCost;
3016	}
3017	if( piDstCoeff[ uiBlkPos ] > 1 )
3018	{
3019	break;
3020	}
3021	d64BaseCost -= pdCostCoeff[ iScanPos ];
3022	d64BaseCost += pdCostCoeff0[ iScanPos ];
3023	}
3024	else
3025	{
3026	d64BaseCost -= pdCostSig[ iScanPos ];
3027	}
3028	}
3029	}
3030	else //if (uiLog2BlkSize < 4)
3031	{
3032	#endif
3033	Bool bFoundLast = false;
3034	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
3035	{
3036	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
3037
3038	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
3039	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
3040	{
3041	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
3042	{
3043	#if MULTILEVEL_SIGMAP_EXT
3044	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
3045	#else
3046	Int iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
3047	#endif
3048	if (iScanPos > iLastScanPos) continue;
3049	UInt uiBlkPos = scan[iScanPos];
3050
3051	if( piDstCoeff[ uiBlkPos ] )
3052	{
3053	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
3054	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
3055
3056	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
3057	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
3058
3059	if( totalCost < d64BestCost )
3060	{
3061	iBestLastIdxP1 = iScanPos + 1;
3062	d64BestCost = totalCost;
3063	}
3064	if( piDstCoeff[ uiBlkPos ] > 1 )
3065	{
3066	bFoundLast = true;
3067	break;
3068	}
3069	d64BaseCost -= pdCostCoeff[ iScanPos ];
3070	d64BaseCost += pdCostCoeff0[ iScanPos ];
3071	}
3072	else
3073	{
3074	d64BaseCost -= pdCostSig[ iScanPos ];
3075	}
3076	} //end for
3077	if (bFoundLast)
3078	{
3079	break;
3080	}
3081	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
3082	} // end for
3083	#if !MULTILEVEL_SIGMAP_EXT
3084	} //if (uiLog2BlkSize < 4)
3085	#endif
3086
3087	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
3088	{
3089	Int blkPos = scan[ scanPos ];
3090	Int level = piDstCoeff[ blkPos ];
3091	uiAbsSum += level;
3092	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
3093	}
3094
3095	//===== clean uncoded coefficients =====
3096	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
3097	{
3098	piDstCoeff[ scan[ scanPos ] ] = 0;
3099	}
3100
3101	#if MULTIBITS_DATA_HIDING
3102	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
3103	{
3104	Int rdFactor = (Int)((Double)(g_invQuantScales[m_cQP.rem()]g_invQuantScales[m_cQP.rem()]<<(2m_cQP.m_iPer))/m_dLambda/16 + 0.5) ;
3105
3106	Int tsig = pcCU->getSlice()->getPPS()->getTSIG() ;
3107
3108	Int lastCG = -1;
3109	Int absSum = 0 ;
3110	Int n ;
3111
3112	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
3113	{
3114	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
3115	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
3116	absSum = 0 ;
3117
3118	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
3119	{
3120	if( piDstCoeff[ scan[ n + subPos ]] )
3121	{
3122	lastNZPosInCG = n;
3123	break;
3124	}
3125	}
3126
3127	for(n = 0; n <SCAN_SET_SIZE; n++ )
3128	{
3129	if( piDstCoeff[ scan[ n + subPos ]] )
3130	{
3131	firstNZPosInCG = n;
3132	break;
3133	}
3134	}
3135
3136	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
3137	{
3138	absSum += piDstCoeff[ scan[ n + subPos ]];
3139	}
3140
3141	if(lastNZPosInCG>=0 && lastCG==-1) lastCG =1 ;
3142
3143	if( lastNZPosInCG-firstNZPosInCG>=tsig )
3144	{
3145	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
3146	if( signbit!=(absSum&0x1) ) // hide but need tune
3147	{
3148	// calculate the cost
3149	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
3150
3151	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
3152	{
3153	UInt uiBlkPos = scan[ n + subPos ];
3154	if(piDstCoeff[ uiBlkPos ] != 0 )
3155	{
3156	Int costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
3157	Int costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
3158	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
3159
3160	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
3161	{
3162	costDown -= (4<<15) ;
3163	}
3164
3165	if(costUp<costDown)
3166	{
3167	curCost = costUp;
3168	curChange = 1 ;
3169	}
3170	else
3171	{
3172	curChange = -1 ;
3173	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
3174	{
3175	curCost = MAX_INT ;
3176	}
3177	else
3178	{
3179	curCost = costDown ;
3180	}
3181	}
3182	}
3183	else
3184	{
3185	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
3186	curChange = 1 ;
3187
3188	if(n<firstNZPosInCG)
3189	{
3190	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
3191	if(thissignbit != signbit )
3192	{
3193	curCost = MAX_INT;
3194	}
3195	}
3196	}
3197
3198	if( curCost<minCostInc)
3199	{
3200	minCostInc = curCost ;
3201	finalChange = curChange ;
3202	minPos = uiBlkPos ;
3203	}
3204	}
3205
3206	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
3207	{
3208	finalChange = -1;
3209	}
3210
3211	if(plSrcCoeff[minPos]>=0)
3212	{
3213	piDstCoeff[minPos] += finalChange ;
3214	}
3215	else
3216	{
3217	piDstCoeff[minPos] -= finalChange ;
3218	}
3219	}
3220	}
3221
3222	if(lastCG==1)
3223	{
3224	lastCG=0 ;
3225	}
3226	}
3227	}
3228	#endif
3229	}
3230
3231	/** Context derivation process of coeff_abs_significant_flag
3232	* \param pcCoeff pointer to prior coded transform coefficients
3233	* \param posX column of current scan position
3234	* \param posY row of current scan position
3235	* \param blockType log2 value of block size if square block, or 4 otherwise
3236	* \param width width of the block
3237	* \param height height of the block
3238	* \param textureType texture type (TEXT_LUMA...)
3239	* \returns ctxInc for current scan position
3240	*/
3241	Int TComTrQuant::getSigCtxInc ( TCoeff* pcCoeff,
3242	Int posX,
3243	Int posY,
3244	Int blockType,
3245	Int width
3246	,Int height
3247	,TextType textureType
3248	)
3249	{
3250	if ( blockType == 2 )
3251	{
3252	//LUMA map
3253	const Int ctxIndMap4x4Luma[15] =
3254	{
3255	0, 1, 4, 5,
3256	2, 3, 4, 5,
3257	6, 6, 8, 8,
3258	7, 7, 8
3259	};
3260	//CHROMA map
3261	const Int ctxIndMap4x4Chroma[15] =
3262	{
3263	0, 1, 2, 4,
3264	1, 1, 2, 4,
3265	3, 3, 5, 5,
3266	4, 4, 5
3267	};
3268
3269	if (textureType == TEXT_LUMA)
3270	{
3271	return ctxIndMap4x4Luma[ 4 * posY + posX ];
3272	}
3273	else
3274	{
3275	return ctxIndMap4x4Chroma[ 4 * posY + posX ];
3276	}
3277	}
3278
3279	if ( blockType == 3 )
3280	{
3281	const Int map8x8[16] =
3282	{
3283	0, 1, 2, 3,
3284	4, 5, 6, 3,
3285	8, 6, 6, 7,
3286	9, 9, 7, 7
3287	};
3288
3289	Int offset = (textureType == TEXT_LUMA) ? 9 : 6;
3290
3291	if ( posX + posY == 0 )
3292	{
3293	return offset + 10;
3294	}
3295	return offset + map8x8[4 * (posY >> 1) + (posX >> 1)];
3296	}
3297
3298	Int offset = (textureType == TEXT_LUMA) ? 20 : 17;
3299	if( posX + posY == 0 )
3300	{
3301	return offset;
3302	}
3303	#if SIGMAP_CONST_AT_HIGH_FREQUENCY
3304	Int thredHighFreq = 3*(std::max(width, height)>>4);
3305	if ((posX>>2) + (posY>>2) >= thredHighFreq)
3306	{
3307	return (textureType == TEXT_LUMA) ? 24 : 18;
3308	}
3309	#endif
3310
3311	const TCoeff pData = pcCoeff + posX + posY width;
3312
3313	#if !SIGMAP_CTX_SUBBLOCK
3314	Int thred = std::max(height, width) >> 2;
3315	#endif
3316
3317	Int cnt = 0;
3318	if( posX < width - 1 )
3319	{
3320	cnt += pData[1] != 0;
3321	if( posY < height - 1 )
3322	{
3323	cnt += pData[width+1] != 0;
3324	}
3325	if( posX < width - 2 )
3326	{
3327	cnt += pData[2] != 0;
3328	}
3329	}
3330	if ( posY < height - 1 )
3331	{
3332	if( ( ( posX & 3 ) \|\| ( posY & 3 ) ) && ( ( (posX+1) & 3 ) \|\| ( (posY+2) & 3 ) ) )
3333	{
3334	cnt += pData[width] != 0;
3335	}
3336	if ( posY < height - 2 && cnt < 4 )
3337	{
3338	cnt += pData[2*width] != 0;
3339	}
3340	}
3341
3342	cnt = ( cnt + 1 ) >> 1;
3343	#if SIGMAP_CTX_SUBBLOCK
3344	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 4 : 1) + offset + cnt;
3345	#else
3346	return (( textureType == TEXT_LUMA && posX + posY >= thred ) ? 4 : 1) + offset + cnt;
3347	#endif
3348	}
3349
3350	/** Get the best level in RD sense
3351	* \param rd64CodedCost reference to coded cost
3352	* \param rd64CodedCost0 reference to cost when coefficient is 0
3353	* \param rd64CodedCostSig reference to cost of significant coefficient
3354	* \param lLevelDouble reference to unscaled quantized level
3355	* \param uiMaxAbsLevel scaled quantized level
3356	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
3357	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
3358	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
3359	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
3360	* \param iQBits quantization step size
3361	* \param dTemp correction factor
3362	* \param bLast indicates if the coefficient is the last significant
3363	* \returns best quantized transform level for given scan position
3364	* This method calculates the best quantized transform level for a given scan position.
3365	*/
3366	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
3367	Double& rd64CodedCost0,
3368	Double& rd64CodedCostSig,
3369	Int lLevelDouble,
3370	UInt uiMaxAbsLevel,
3371	UShort ui16CtxNumSig,
3372	UShort ui16CtxNumOne,
3373	UShort ui16CtxNumAbs,
3374	UShort ui16AbsGoRice,
3375	#if RESTRICT_GR1GR2FLAG_NUMBER
3376	UInt c1Idx,
3377	UInt c2Idx,
3378	#endif
3379	Int iQBits,
3380	Double dTemp,
3381	Bool bLast ) const
3382	{
3383	Double dCurrCostSig = 0;
3384	UInt uiBestAbsLevel = 0;
3385
3386	if( !bLast && uiMaxAbsLevel < 3 )
3387	{
3388	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
3389	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
3390	if( uiMaxAbsLevel == 0 )
3391	{
3392	return uiBestAbsLevel;
3393	}
3394	}
3395	else
3396	{
3397	rd64CodedCost = MAX_DOUBLE;
3398	}
3399
3400	if( !bLast )
3401	{
3402	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
3403	}
3404
3405	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
3406	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
3407	{
3408	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
3409	#if RESTRICT_GR1GR2FLAG_NUMBER
3410	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
3411	#else
3412	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice );
3413	#endif
3414	dCurrCost += dCurrCostSig;
3415
3416	if( dCurrCost < rd64CodedCost )
3417	{
3418	uiBestAbsLevel = uiAbsLevel;
3419	rd64CodedCost = dCurrCost;
3420	rd64CodedCostSig = dCurrCostSig;
3421	}
3422	}
3423
3424	return uiBestAbsLevel;
3425	}
3426
3427	/** Calculates the cost for specific absolute transform level
3428	* \param uiAbsLevel scaled quantized level
3429	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
3430	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
3431	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
3432	* \returns cost of given absolute transform level
3433	*/
3434	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
3435	UShort ui16CtxNumOne,
3436	UShort ui16CtxNumAbs,
3437	UShort ui16AbsGoRice
3438	#if RESTRICT_GR1GR2FLAG_NUMBER
3439	, UInt c1Idx,
3440	UInt c2Idx
3441	#endif
3442	) const
3443	{
3444	Double iRate = xGetIEPRate();
3445	#if RESTRICT_GR1GR2FLAG_NUMBER
3446	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
3447
3448	if ( uiAbsLevel >= baseLevel )
3449	{
3450	UInt uiSymbol = uiAbsLevel - baseLevel;
3451	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
3452	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
3453
3454	if( bExpGolomb )
3455	{
3456	uiAbsLevel = uiSymbol - uiMaxVlc;
3457	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
3458	iRate += iEGS << 15;
3459	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
3460	}
3461
3462	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
3463	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
3464
3465	iRate += ui16NumBins << 15;
3466
3467	if (c1Idx < C1FLAG_NUMBER)
3468	{
3469	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
3470
3471	if (c2Idx < C2FLAG_NUMBER)
3472	{
3473	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
3474	}
3475	}
3476	}
3477	else
3478	#endif
3479	if( uiAbsLevel == 1 )
3480	{
3481	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
3482	}
3483	else if( uiAbsLevel == 2 )
3484	{
3485	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
3486	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
3487	}
3488	else
3489	{
3490	#if RESTRICT_GR1GR2FLAG_NUMBER
3491	assert (0);
3492	#else
3493	UInt uiSymbol = uiAbsLevel - 3;
3494	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
3495	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
3496
3497	if( bExpGolomb )
3498	{
3499	uiAbsLevel = uiSymbol - uiMaxVlc;
3500	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
3501	iRate += iEGS << 15;
3502	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
3503	}
3504
3505	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
3506	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
3507
3508	iRate += ui16NumBins << 15;
3509	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
3510	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
3511	#endif
3512	}
3513	return xGetICost( iRate );
3514	}
3515
3516	#if MULTIBITS_DATA_HIDING
3517	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
3518	UShort ui16CtxNumOne,
3519	UShort ui16CtxNumAbs,
3520	UShort ui16AbsGoRice
3521	#if RESTRICT_GR1GR2FLAG_NUMBER
3522	, UInt c1Idx,
3523	UInt c2Idx
3524	#endif
3525	) const
3526	{
3527	Int iRate = 0;
3528	#if RESTRICT_GR1GR2FLAG_NUMBER
3529	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
3530
3531	if ( uiAbsLevel >= baseLevel )
3532	{
3533	UInt uiSymbol = uiAbsLevel - baseLevel;
3534	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
3535	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
3536
3537	if( bExpGolomb )
3538	{
3539	uiAbsLevel = uiSymbol - uiMaxVlc;
3540	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
3541	iRate += iEGS << 15;
3542	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
3543	}
3544
3545	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
3546	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
3547
3548	iRate += ui16NumBins << 15;
3549
3550	if (c1Idx < C1FLAG_NUMBER)
3551	{
3552	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
3553
3554	if (c2Idx < C2FLAG_NUMBER)
3555	{
3556	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
3557	}
3558	}
3559	}
3560	else
3561	#endif
3562	if( uiAbsLevel == 0 )
3563	{
3564	return 0;
3565	}
3566	else if( uiAbsLevel == 1 )
3567	{
3568	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
3569	}
3570	else if( uiAbsLevel == 2 )
3571	{
3572	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
3573	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
3574	}
3575	else
3576	{
3577	#if RESTRICT_GR1GR2FLAG_NUMBER
3578	assert(0);
3579	#else
3580	UInt uiSymbol = uiAbsLevel - 3;
3581	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
3582	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
3583
3584	if( bExpGolomb )
3585	{
3586	uiAbsLevel = uiSymbol - uiMaxVlc;
3587	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
3588	iRate += iEGS << 15;
3589	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
3590	}
3591
3592	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
3593	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
3594
3595	iRate += ui16NumBins << 15;
3596	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
3597	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
3598	#endif
3599	}
3600	return iRate;
3601	}
3602	#endif
3603
3604	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
3605	UShort ui16CtxNumSig ) const
3606	{
3607	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
3608	}
3609
3610	/** Calculates the cost of signaling the last significant coefficient in the block
3611	* \param uiPosX X coordinate of the last significant coefficient
3612	* \param uiPosY Y coordinate of the last significant coefficient
3613	* \returns cost of last significant coefficient
3614	*/
3615	/*
3616	* \param uiWidth width of the transform unit (TU)
3617	*/
3618	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
3619	const UInt uiPosY,
3620	const UInt uiBlkWdth ) const
3621	{
3622	UInt uiCtxX = g_uiGroupIdx[uiPosX];
3623	UInt uiCtxY = g_uiGroupIdx[uiPosY];
3624	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
3625	if( uiCtxX > 3 )
3626	{
3627	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
3628	}
3629	if( uiCtxY > 3 )
3630	{
3631	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
3632	}
3633	return xGetICost( uiCost );
3634	}
3635
3636	/** Calculates the cost for specific absolute transform level
3637	* \param uiAbsLevel scaled quantized level
3638	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
3639	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
3640	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
3641	* \returns cost of given absolute transform level
3642	*/
3643	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
3644	UShort ui16CtxNumSig ) const
3645	{
3646	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
3647	}
3648
3649	/** Get the cost for a specific rate
3650	* \param dRate rate of a bit
3651	* \returns cost at the specific rate
3652	*/
3653	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
3654	{
3655	return m_dLambda * dRate;
3656	}
3657
3658	/** Get the cost of an equal probable bit
3659	* \returns cost of equal probable bit
3660	*/
3661	__inline Double TComTrQuant::xGetIEPRate ( ) const
3662	{
3663	return 32768;
3664	}
3665
3666	/** Context derivation process of coeff_abs_significant_flag
3667	* \param uiSigCoeffGroupFlag significance map of L1
3668	* \param uiBlkX column of current scan position
3669	* \param uiBlkY row of current scan position
3670	* \param uiLog2BlkSize log2 value of block size
3671	* \returns ctxInc for current scan position
3672	*/
3673	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
3674	const UInt uiCGPosX,
3675	const UInt uiCGPosY,
3676	#if MULTILEVEL_SIGMAP_EXT
3677	const UInt scanIdx,
3678	#endif
3679	Int width, Int height)
3680	{
3681	UInt uiRight = 0;
3682	UInt uiLower = 0;
3683
3684	width >>= 2;
3685	height >>= 2;
3686	#if MULTILEVEL_SIGMAP_EXT
3687	if( width == 2 && height == 2 ) // 8x8
3688	{
3689	if( scanIdx == SCAN_HOR )
3690	{
3691	width = 1;
3692	height = 4;
3693	}
3694	else if( scanIdx == SCAN_VER )
3695	{
3696	width = 4;
3697	height = 1;
3698	}
3699	}
3700	#endif
3701	if( uiCGPosX < width - 1 )
3702	{
3703	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
3704	}
3705	if (uiCGPosY < height - 1 )
3706	{
3707	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
3708	}
3709	#if REMOVE_INFER_SIGGRP
3710	return (uiRight \|\| uiLower);
3711	#else
3712	return uiRight + uiLower;
3713	#endif
3714
3715	}
3716	#if !REMOVE_INFER_SIGGRP
3717	// return 1 if both right neighbour and lower neighour are 1's
3718	Bool TComTrQuant::bothCGNeighboursOne ( const UInt* uiSigCoeffGroupFlag,
3719	const UInt uiCGPosX,
3720	const UInt uiCGPosY,
3721	#if MULTILEVEL_SIGMAP_EXT
3722	const UInt scanIdx,
3723	#endif
3724	Int width, Int height)
3725	{
3726	UInt uiRight = 0;
3727	UInt uiLower = 0;
3728
3729	width >>= 2;
3730	height >>= 2;
3731	#if MULTILEVEL_SIGMAP_EXT
3732	if( width == 2 && height == 2 ) // 8x8
3733	{
3734	if( scanIdx == SCAN_HOR )
3735	{
3736	width = 1;
3737	height = 4;
3738	}
3739	else if( scanIdx == SCAN_VER )
3740	{
3741	width = 4;
3742	height = 1;
3743	}
3744	}
3745	#endif
3746	if( uiCGPosX < width - 1 )
3747	{
3748	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
3749	}
3750	if (uiCGPosY < height - 1 )
3751	{
3752	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
3753	}
3754
3755	return (uiRight & uiLower);
3756	}
3757	#endif
3758	/** set quantized matrix coefficient for encode
3759	* \param scalingList quantaized matrix address
3760	*/
3761	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
3762	{
3763	UInt size,list;
3764	UInt qp;
3765
3766	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
3767	{
3768	for(list = 0; list < g_scalingListNum[size]; list++)
3769	{
3770	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
3771	{
3772	xSetScalingListEnc(scalingList,list,size,qp);
3773	xSetScalingListDec(scalingList,list,size,qp);
3774	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
3775	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
3776	{
3777	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
3778	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
3779	}
3780	}
3781	}
3782	}
3783	}
3784	/** set quantized matrix coefficient for decode
3785	* \param scalingList quantaized matrix address
3786	*/
3787	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
3788	{
3789	UInt size,list;
3790	UInt qp;
3791
3792	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
3793	{
3794	for(list = 0; list < g_scalingListNum[size]; list++)
3795	{
3796	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
3797	{
3798	xSetScalingListDec(scalingList,list,size,qp);
3799	}
3800	}
3801	}
3802	}
3803	/** set error scale coefficients
3804	* \param list List ID
3805	* \param uiSize Size
3806	* \param uiQP Quantization parameter
3807	*/
3808	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
3809	{
3810
3811	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
3812	#if FULL_NBIT
3813	UInt uiBitDepth = g_uiBitDepth;
3814	#else
3815	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
3816	#endif
3817
3818	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
3819
3820	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
3821	Int *piQuantcoeff;
3822	double *pdErrScale;
3823	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
3824	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
3825
3826	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
3827	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
3828	for(i=0;i<uiMaxNumCoeff;i++)
3829	{
3830	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
3831	}
3832	}
3833
3834	/** set quantized matrix coefficient for encode
3835	* \param scalingList quantaized matrix address
3836	* \param listId List index
3837	* \param sizeId size index
3838	* \param uiQP Quantization parameter
3839	*/
3840	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
3841	{
3842	UInt width = g_scalingListSizeX[sizeId];
3843	UInt height = g_scalingListSizeX[sizeId];
3844	#if SCALING_LIST
3845	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3846	#endif
3847	Int *quantcoeff;
3848	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
3849	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
3850
3851	#if SCALING_LIST
3852	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
3853	#else
3854	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,1,(Int)g_scalingListSizeX[sizeId],0);
3855	#endif
3856
3857	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
3858	{
3859	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
3860	#if SCALING_LIST
3861	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
3862	#else
3863	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,1,(Int)g_scalingListSizeX[sizeId],0);
3864	#endif
3865
3866	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
3867	#if SCALING_LIST
3868	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
3869	#else
3870	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,1,(Int)g_scalingListSizeX[sizeId],0);
3871	#endif
3872	}
3873	}
3874	/** set quantized matrix coefficient for decode
3875	* \param scalingList quantaized matrix address
3876	* \param list List index
3877	* \param size size index
3878	* \param uiQP Quantization parameter
3879	*/
3880	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
3881	{
3882	UInt width = g_scalingListSizeX[sizeId];
3883	UInt height = g_scalingListSizeX[sizeId];
3884	#if SCALING_LIST
3885	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
3886	#endif
3887	Int *dequantcoeff;
3888	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
3889
3890	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
3891	#if SCALING_LIST
3892	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
3893	#else
3894	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,1,(Int)g_scalingListSizeX[sizeId],0);
3895	#endif
3896
3897	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
3898	{
3899	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
3900	#if SCALING_LIST
3901	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
3902	#else
3903	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,1,(Int)g_scalingListSizeX[sizeId],0);
3904	#endif
3905
3906	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
3907
3908	#if SCALING_LIST
3909	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
3910	#else
3911	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,1,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),0);
3912	#endif
3913	}
3914	}
3915
3916	/** set flat matrix value to quantized coefficient
3917	*/
3918	Void TComTrQuant::setFlatScalingList()
3919	{
3920	UInt size,list;
3921	UInt qp;
3922
3923	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
3924	{
3925	for(list = 0; list < g_scalingListNum[size]; list++)
3926	{
3927	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
3928	{
3929	xsetFlatScalingList(list,size,qp);
3930	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
3931	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
3932	{
3933	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
3934	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
3935	}
3936	}
3937	}
3938	}
3939	}
3940
3941	/** set flat matrix value to quantized coefficient
3942	* \param list List ID
3943	* \param uiQP Quantization parameter
3944	* \param uiSize Size
3945	*/
3946	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
3947	{
3948	UInt i,num = g_scalingListSize[size];
3949	UInt numDiv4 = num>>2;
3950	Int *quantcoeff;
3951	Int *dequantcoeff;
3952	Int quantScales = g_quantScales[qp];
3953	Int invQuantScales = g_invQuantScales[qp]<<4;
3954
3955	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
3956	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
3957
3958	for(i=0;i<num;i++)
3959	{
3960	*quantcoeff++ = quantScales;
3961	*dequantcoeff++ = invQuantScales;
3962	}
3963
3964	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
3965	{
3966	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
3967	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
3968
3969	for(i=0;i<numDiv4;i++)
3970	{
3971	*quantcoeff++ = quantScales;
3972	*dequantcoeff++ = invQuantScales;
3973	}
3974	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
3975	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
3976
3977	for(i=0;i<numDiv4;i++)
3978	{
3979	*quantcoeff++ = quantScales;
3980	*dequantcoeff++ = invQuantScales;
3981	}
3982	}
3983	}
3984
3985	/** set quantized matrix coefficient for encode
3986	* \param coeff quantaized matrix address
3987	* \param quantcoeff quantaized matrix address
3988	* \param quantScales Q(QP%6)
3989	* \param height height
3990	* \param width width
3991	* \param ratio ratio for upscale
3992	* \param sizuNum matrix size
3993	* \param dc dc parameter
3994	*/
3995	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3996	{
3997	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
3998	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
3999	for(UInt j=0;j<height;j++)
4000	{
4001	for(UInt i=0;i<width;i++)
4002	{
4003	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
4004	}
4005	}
4006	#if SCALING_LIST
4007	if(ratio > 1)
4008	{
4009	quantcoeff[0] = quantScales / dc;
4010	}
4011	#endif
4012	}
4013	/** set quantized matrix coefficient for decode
4014	* \param coeff quantaized matrix address
4015	* \param dequantcoeff quantaized matrix address
4016	* \param invQuantScales IQ(QP%6))
4017	* \param height height
4018	* \param width width
4019	* \param ratio ratio for upscale
4020	* \param sizuNum matrix size
4021	* \param dc dc parameter
4022	*/
4023	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
4024	{
4025	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
4026	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
4027	for(UInt j=0;j<height;j++)
4028	{
4029	for(UInt i=0;i<width;i++)
4030	{
4031	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
4032	}
4033	}
4034	#if SCALING_LIST
4035	if(ratio > 1)
4036	{
4037	dequantcoeff[0] = invQuantScales * dc;
4038	}
4039	#endif
4040	}
4041
4042	/** initialization process of scaling list array
4043	*/
4044	Void TComTrQuant::initScalingList()
4045	{
4046	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
4047	{
4048	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
4049	{
4050	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
4051	{
4052	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
4053	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
4054	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
4055
4056	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
4057	{
4058	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
4059	{
4060	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
4061	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
4062	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
4063	}
4064	}
4065	}
4066	}
4067	}
4068	//copy for NSQT
4069	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
4070	{
4071	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
4072	{
4073	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
4074	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
4075	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
4076	}
4077	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
4078	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
4079	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
4080	}
4081	}
4082	/** destroy quantization matrix array
4083	*/
4084	Void TComTrQuant::destroyScalingList()
4085	{
4086	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
4087	{
4088	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
4089	{
4090	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
4091	{
4092	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
4093	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
4094	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
4095	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
4096	{
4097	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
4098	{
4099	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
4100	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
4101	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
4102	}
4103	}
4104	}
4105	}
4106	}
4107	}
4108
4109	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: