Context navigation

source: 3DVCSoftware/trunk/source/Lib/TLibCommon/TComTrQuant.cpp @ 605

Visit:

Last change on this file since 605 was 296, checked in by tech, 12 years ago
Reintegrated branch 5.1-dev0 rev. 295.
Property svn:eol-style set to `native`
File size: 92.2 KB

Rev	Line
[5]	1	/* The copyright in this software is being made available under the BSD
	2	* License, included below. This software may be subject to other third party
	3	* and contributor rights, including patent rights, and no such rights are
[56]	4	* granted under this license.
[5]	5	*
[56]	6	* Copyright (c) 2010-2012, ITU/ISO/IEC
[5]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions are met:
	11	*
	12	* * Redistributions of source code must retain the above copyright notice,
	13	* this list of conditions and the following disclaimer.
	14	* * Redistributions in binary form must reproduce the above copyright notice,
	15	* this list of conditions and the following disclaimer in the documentation
	16	* and/or other materials provided with the distribution.
[56]	17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
[5]	18	* be used to endorse or promote products derived from this software without
	19	* specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
	25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	31	* THE POSSIBILITY OF SUCH DAMAGE.
	32	*/
[2]	33
	34	/** \file TComTrQuant.cpp
	35	\brief transform and quantization class
	36	*/
	37
	38	#include <stdlib.h>
	39	#include <math.h>
	40	#include <memory.h>
	41	#include "TComTrQuant.h"
	42	#include "TComPic.h"
	43	#include "ContextTables.h"
	44
[56]	45	typedef struct
	46	{
	47	Int iNNZbeforePos0;
	48	Double d64CodedLevelandDist; // distortion and level cost only
	49	Double d64UncodedDist; // all zero coded block distortion
	50	Double d64SigCost;
	51	Double d64SigCost_0;
	52	} coeffGroupRDStats;
	53
	54	//! \ingroup TLibCommon
	55	//! \{
	56
[2]	57	// ====================================================================================================================
	58	// Constants
	59	// ====================================================================================================================
	60
	61	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
	62
	63	// ====================================================================================================================
	64	// Tables
	65	// ====================================================================================================================
	66
	67	// RDOQ parameter
	68
	69	// ====================================================================================================================
	70	// Qp class member functions
	71	// ====================================================================================================================
	72
	73	QpParam::QpParam()
	74	{
	75	}
	76
	77	// ====================================================================================================================
	78	// TComTrQuant class member functions
	79	// ====================================================================================================================
	80
	81	TComTrQuant::TComTrQuant()
	82	{
	83	m_cQP.clear();
	84
	85	// allocate temporary buffers
[56]	86	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
[2]	87
	88	// allocate bit estimation class (for RDOQ)
	89	m_pcEstBitsSbac = new estBitsSbacStruct;
[56]	90	initScalingList();
[2]	91	}
	92
	93	TComTrQuant::~TComTrQuant()
	94	{
	95	// delete temporary buffers
	96	if ( m_plTempCoeff )
	97	{
	98	delete [] m_plTempCoeff;
	99	m_plTempCoeff = NULL;
	100	}
	101
	102	// delete bit estimation class
[56]	103	if ( m_pcEstBitsSbac )
	104	{
	105	delete m_pcEstBitsSbac;
	106	}
	107	destroyScalingList();
[2]	108	}
	109
[56]	110	#if ADAPTIVE_QP_SELECTION
	111	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
	112	{
	113	Int qpBase = pcSlice->getSliceQpBase();
	114	Int sliceQpused = pcSlice->getSliceQp();
	115	Int sliceQpnext;
	116	Double alpha = qpBase < 17 ? 0.5 : 1;
	117
	118	Int cnt=0;
	119	for(int u=1; u<=LEVEL_RANGE; u++)
	120	{
	121	cnt += m_sliceNsamples[u] ;
	122	}
	123
	124	if( !m_bUseRDOQ )
	125	{
	126	sliceQpused = qpBase;
	127	alpha = 0.5;
	128	}
	129
	130	if( cnt > 120 )
	131	{
	132	Double sum = 0;
	133	Int k = 0;
	134	for(Int u=1; u<LEVEL_RANGE; u++)
	135	{
	136	sum += u*m_sliceSumC[u];
	137	k += uum_sliceNsamples[u];
	138	}
	139
	140	Int v;
	141	Double q[MAX_QP+1] ;
	142	for(v=0; v<=MAX_QP; v++)
	143	{
	144	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
	145	}
	146
	147	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
	148
	149	for(v=0; v<MAX_QP; v++)
	150	{
	151	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
	152	{
	153	break;
	154	}
	155	}
	156	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
	157	}
	158	else
	159	{
	160	sliceQpnext = sliceQpused;
	161	}
	162
	163	m_qpDelta[qpBase] = sliceQpnext - qpBase;
	164	}
	165
	166	Void TComTrQuant::initSliceQpDelta()
	167	{
	168	for(Int qp=0; qp<=MAX_QP; qp++)
	169	{
	170	m_qpDelta[qp] = qp < 17 ? 0 : 1;
	171	}
	172	}
	173
	174	Void TComTrQuant::clearSliceARLCnt()
	175	{
	176	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
	177	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
	178	}
	179	#endif
	180
	181
	182	/** Set qP for Quantization.
	183	* \param qpy QPy
	184	* \param bLowpass
	185	* \param eSliceType
	186	* \param eTxtType
	187	* \param qpBdOffset
	188	* \param chromaQPOffset
	189	*
	190	* return void
	191	*/
	192	Void TComTrQuant::setQPforQuant( Int qpy, Bool bLowpass, SliceType eSliceType, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
	193	{
	194	Int qpScaled;
	195
	196	if(eTxtType == TEXT_LUMA)
	197	{
	198	qpScaled = qpy + qpBdOffset;
	199	}
	200	else
	201	{
	202	qpScaled = Clip3( -qpBdOffset, 51, qpy + chromaQPOffset );
	203
	204	if(qpScaled < 0)
	205	{
	206	qpScaled = qpScaled + qpBdOffset;
	207	}
	208	else
	209	{
	210	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBdOffset;
	211	}
	212	}
	213	m_cQP.setQpParam( qpScaled, bLowpass, eSliceType );
	214	}
[2]	215
	216	#if MATRIX_MULT
	217	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
	218	* \param block pointer to input data (residual)
	219	* \param coeff pointer to output data (transform coefficients)
	220	* \param uiStride stride of input data
	221	* \param uiTrSize transform size (uiTrSize x uiTrSize)
	222	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	223	*/
[56]	224	void xTr(Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
[2]	225	{
	226	Int i,j,k,iSum;
	227	Int tmp[32*32];
	228	const short *iT;
	229	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
	230
	231	if (uiTrSize==4)
	232	{
	233	iT = g_aiT4[0];
	234	}
	235	else if (uiTrSize==8)
	236	{
	237	iT = g_aiT8[0];
	238	}
	239	else if (uiTrSize==16)
	240	{
	241	iT = g_aiT16[0];
	242	}
	243	else if (uiTrSize==32)
	244	{
	245	iT = g_aiT32[0];
	246	}
[56]	247	else
	248	{
[2]	249	assert(0);
	250	}
	251
	252	#if FULL_NBIT
	253	int shift_1st = uiLog2TrSize - 1 + g_uiBitDepth - 8; // log2(N) - 1 + g_uiBitDepth - 8
	254	#else
	255	int shift_1st = uiLog2TrSize - 1 + g_uiBitIncrement; // log2(N) - 1 + g_uiBitIncrement
	256	#endif
	257
	258	int add_1st = 1<<(shift_1st-1);
	259	int shift_2nd = uiLog2TrSize + 6;
	260	int add_2nd = 1<<(shift_2nd-1);
	261
	262	/* Horizontal transform */
	263
	264	if (uiTrSize==4)
	265	{
	266	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
	267	{
	268	iT = g_as_DST_MAT_4[0];
	269	}
	270	}
	271	for (i=0; i<uiTrSize; i++)
	272	{
	273	for (j=0; j<uiTrSize; j++)
	274	{
	275	iSum = 0;
	276	for (k=0; k<uiTrSize; k++)
	277	{
	278	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
	279	}
	280	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
	281	}
	282	}
[56]	283
	284	/* Vertical transform */
[2]	285	if (uiTrSize==4)
	286	{
	287	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
	288	{
	289	iT = g_as_DST_MAT_4[0];
	290	}
	291	else
	292	{
	293	iT = g_aiT4[0];
	294	}
	295	}
	296	for (i=0; i<uiTrSize; i++)
	297	{
	298	for (j=0; j<uiTrSize; j++)
	299	{
	300	iSum = 0;
	301	for (k=0; k<uiTrSize; k++)
	302	{
	303	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
	304	}
	305	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
	306	}
[56]	307	}
[2]	308	}
	309
	310	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
	311	* \param coeff pointer to input data (transform coefficients)
	312	* \param block pointer to output data (residual)
	313	* \param uiStride stride of output data
	314	* \param uiTrSize transform size (uiTrSize x uiTrSize)
	315	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	316	*/
[56]	317	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
[2]	318	{
	319	int i,j,k,iSum;
	320	Int tmp[32*32];
	321	const short *iT;
[56]	322
[2]	323	if (uiTrSize==4)
	324	{
	325	iT = g_aiT4[0];
	326	}
	327	else if (uiTrSize==8)
	328	{
	329	iT = g_aiT8[0];
	330	}
	331	else if (uiTrSize==16)
	332	{
	333	iT = g_aiT16[0];
	334	}
	335	else if (uiTrSize==32)
	336	{
	337	iT = g_aiT32[0];
	338	}
[56]	339	else
	340	{
[2]	341	assert(0);
	342	}
[56]	343
[2]	344	int shift_1st = SHIFT_INV_1ST;
	345	int add_1st = 1<<(shift_1st-1);
	346	#if FULL_NBIT
	347	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
	348	#else
	349	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
	350	#endif
	351	int add_2nd = 1<<(shift_2nd-1);
	352	if (uiTrSize==4)
	353	{
	354	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
	355	{
	356	iT = g_as_DST_MAT_4[0];
	357	}
	358	}
[56]	359
[2]	360	/* Horizontal transform */
	361	for (i=0; i<uiTrSize; i++)
	362	{
	363	for (j=0; j<uiTrSize; j++)
	364	{
	365	iSum = 0;
	366	for (k=0; k<uiTrSize; k++)
	367	{
	368	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
	369	}
[56]	370	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
[2]	371	}
	372	}
[56]	373
[2]	374	if (uiTrSize==4)
	375	{
	376	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
	377	{
	378	iT = g_as_DST_MAT_4[0];
	379	}
	380	else
	381	{
	382	iT = g_aiT4[0];
	383	}
	384	}
[56]	385
[2]	386	/* Vertical transform */
	387	for (i=0; i<uiTrSize; i++)
	388	{
	389	for (j=0; j<uiTrSize; j++)
	390	{
	391	iSum = 0;
	392	for (k=0; k<uiTrSize; k++)
	393	{
	394	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
	395	}
[56]	396	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
[2]	397	}
	398	}
	399	}
	400
	401	#else //MATRIX_MULT
	402
	403	/** 4x4 forward transform implemented using partial butterfly structure (1D)
[56]	404	* \param src input data (residual)
	405	* \param dst output data (transform coefficients)
[2]	406	* \param shift specifies right shift after 1D transform
	407	*/
	408
[56]	409	void partialButterfly4(short src,short dst,int shift, int line)
	410	{
	411	int j;
	412	int E[2],O[2];
	413	int add = 1<<(shift-1);
	414
	415	for (j=0; j<line; j++)
	416	{
	417	/* E and O */
	418	E[0] = src[0] + src[3];
	419	O[0] = src[0] - src[3];
	420	E[1] = src[1] + src[2];
	421	O[1] = src[1] - src[2];
	422
	423	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
	424	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
	425	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
	426	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
	427
	428	src += 4;
	429	dst ++;
	430	}
	431	}
	432
[2]	433	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
	434	// give identical results
[56]	435	void fastForwardDst(short block,short coeff,int shift) // input block, output coeff
[2]	436	{
	437	int i, c[4];
	438	int rnd_factor = 1<<(shift-1);
	439	for (i=0; i<4; i++)
	440	{
	441	// Intermediate Variables
[56]	442	c[0] = block[4i+0] + block[4i+3];
	443	c[1] = block[4i+1] + block[4i+3];
	444	c[2] = block[4i+0] - block[4i+1];
	445	c[3] = 74* block[4*i+2];
	446
	447	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
	448	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
	449	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
	450	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
[2]	451	}
	452	}
[56]	453
	454	void fastInverseDst(short tmp,short block,int shift) // input tmp, output block
[2]	455	{
	456	int i, c[4];
	457	int rnd_factor = 1<<(shift-1);
	458	for (i=0; i<4; i++)
	459	{
	460	// Intermediate Variables
[56]	461	c[0] = tmp[ i] + tmp[ 8+i];
	462	c[1] = tmp[8+i] + tmp[12+i];
	463	c[2] = tmp[ i] - tmp[12+i];
	464	c[3] = 74* tmp[4+i];
	465
	466	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
	467	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
	468	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
	469	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
[2]	470	}
	471	}
[56]	472
	473	void partialButterflyInverse4(short src,short dst,int shift, int line)
	474	{
	475	int j;
	476	int E[2],O[2];
	477	int add = 1<<(shift-1);
	478
	479	for (j=0; j<line; j++)
	480	{
	481	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	482	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
	483	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
	484	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
	485	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
	486
	487	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	488	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
	489	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
	490	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
	491	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
	492
	493	src ++;
	494	dst += 4;
	495	}
	496	}
	497
[2]	498
[56]	499	void partialButterfly8(short src,short dst,int shift, int line)
	500	{
	501	int j,k;
	502	int E[4],O[4];
	503	int EE[2],EO[2];
	504	int add = 1<<(shift-1);
	505
	506	for (j=0; j<line; j++)
	507	{
	508	/* E and O*/
	509	for (k=0;k<4;k++)
	510	{
	511	E[k] = src[k] + src[7-k];
	512	O[k] = src[k] - src[7-k];
	513	}
	514	/* EE and EO */
	515	EE[0] = E[0] + E[3];
	516	EO[0] = E[0] - E[3];
	517	EE[1] = E[1] + E[2];
	518	EO[1] = E[1] - E[2];
	519
	520	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
	521	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
	522	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
	523	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
	524
	525	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
	526	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
	527	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
	528	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
	529
	530	src += 8;
	531	dst ++;
	532	}
	533	}
	534
[2]	535
[56]	536	void partialButterflyInverse8(short src,short dst,int shift, int line)
	537	{
	538	int j,k;
	539	int E[4],O[4];
	540	int EE[2],EO[2];
	541	int add = 1<<(shift-1);
	542
	543	for (j=0; j<line; j++)
	544	{
	545	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	546	for (k=0;k<4;k++)
	547	{
	548	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
	549	}
	550
	551	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
	552	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
	553	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
	554	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
	555
	556	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	557	E[0] = EE[0] + EO[0];
	558	E[3] = EE[0] - EO[0];
	559	E[1] = EE[1] + EO[1];
	560	E[2] = EE[1] - EO[1];
	561	for (k=0;k<4;k++)
	562	{
	563	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
	564	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
	565	}
	566	src ++;
	567	dst += 8;
	568	}
	569	}
	570
[2]	571
[56]	572	void partialButterfly16(short src,short dst,int shift, int line)
	573	{
	574	int j,k;
	575	int E[8],O[8];
	576	int EE[4],EO[4];
	577	int EEE[2],EEO[2];
	578	int add = 1<<(shift-1);
	579
	580	for (j=0; j<line; j++)
	581	{
	582	/* E and O*/
	583	for (k=0;k<8;k++)
	584	{
	585	E[k] = src[k] + src[15-k];
	586	O[k] = src[k] - src[15-k];
	587	}
	588	/* EE and EO */
	589	for (k=0;k<4;k++)
	590	{
	591	EE[k] = E[k] + E[7-k];
	592	EO[k] = E[k] - E[7-k];
	593	}
	594	/* EEE and EEO */
	595	EEE[0] = EE[0] + EE[3];
	596	EEO[0] = EE[0] - EE[3];
	597	EEE[1] = EE[1] + EE[2];
	598	EEO[1] = EE[1] - EE[2];
	599
	600	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
	601	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
	602	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
	603	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
	604
	605	for (k=2;k<16;k+=4)
	606	{
	607	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
	608	}
	609
	610	for (k=1;k<16;k+=2)
	611	{
	612	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
	613	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
	614	}
	615
	616	src += 16;
	617	dst ++;
	618
	619	}
	620	}
	621
[2]	622
[56]	623	void partialButterflyInverse16(short src,short dst,int shift, int line)
	624	{
	625	int j,k;
	626	int E[8],O[8];
	627	int EE[4],EO[4];
	628	int EEE[2],EEO[2];
	629	int add = 1<<(shift-1);
	630
	631	for (j=0; j<line; j++)
	632	{
	633	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	634	for (k=0;k<8;k++)
	635	{
	636	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
	637	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
	638	}
	639	for (k=0;k<4;k++)
	640	{
	641	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
	642	}
	643	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
	644	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
	645	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
	646	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
	647
	648	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	649	for (k=0;k<2;k++)
	650	{
	651	EE[k] = EEE[k] + EEO[k];
	652	EE[k+2] = EEE[1-k] - EEO[1-k];
	653	}
	654	for (k=0;k<4;k++)
	655	{
	656	E[k] = EE[k] + EO[k];
	657	E[k+4] = EE[3-k] - EO[3-k];
	658	}
	659	for (k=0;k<8;k++)
	660	{
	661	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
	662	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
	663	}
	664	src ++;
	665	dst += 16;
	666	}
	667	}
	668
[2]	669
[56]	670	void partialButterfly32(short src,short dst,int shift, int line)
	671	{
	672	int j,k;
	673	int E[16],O[16];
	674	int EE[8],EO[8];
	675	int EEE[4],EEO[4];
	676	int EEEE[2],EEEO[2];
	677	int add = 1<<(shift-1);
	678
	679	for (j=0; j<line; j++)
	680	{
	681	/* E and O*/
	682	for (k=0;k<16;k++)
	683	{
	684	E[k] = src[k] + src[31-k];
	685	O[k] = src[k] - src[31-k];
	686	}
	687	/* EE and EO */
	688	for (k=0;k<8;k++)
	689	{
	690	EE[k] = E[k] + E[15-k];
	691	EO[k] = E[k] - E[15-k];
	692	}
	693	/* EEE and EEO */
	694	for (k=0;k<4;k++)
	695	{
	696	EEE[k] = EE[k] + EE[7-k];
	697	EEO[k] = EE[k] - EE[7-k];
	698	}
	699	/* EEEE and EEEO */
	700	EEEE[0] = EEE[0] + EEE[3];
	701	EEEO[0] = EEE[0] - EEE[3];
	702	EEEE[1] = EEE[1] + EEE[2];
	703	EEEO[1] = EEE[1] - EEE[2];
	704
	705	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
	706	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
	707	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
	708	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
	709	for (k=4;k<32;k+=8)
	710	{
	711	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
	712	}
	713	for (k=2;k<32;k+=4)
	714	{
	715	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
	716	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
	717	}
	718	for (k=1;k<32;k+=2)
	719	{
	720	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
	721	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
	722	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
	723	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
	724	}
	725	src += 32;
	726	dst ++;
	727	}
	728	}
	729
[2]	730
[56]	731	void partialButterflyInverse32(short src,short dst,int shift, int line)
	732	{
	733	int j,k;
	734	int E[16],O[16];
	735	int EE[8],EO[8];
	736	int EEE[4],EEO[4];
	737	int EEEE[2],EEEO[2];
	738	int add = 1<<(shift-1);
	739
	740	for (j=0; j<line; j++)
	741	{
	742	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	743	for (k=0;k<16;k++)
	744	{
	745	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
	746	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
	747	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
	748	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
	749	}
	750	for (k=0;k<8;k++)
	751	{
	752	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
	753	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
	754	}
	755	for (k=0;k<4;k++)
	756	{
	757	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
	758	}
	759	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
	760	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
	761	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
	762	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
	763
	764	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	765	EEE[0] = EEEE[0] + EEEO[0];
	766	EEE[3] = EEEE[0] - EEEO[0];
	767	EEE[1] = EEEE[1] + EEEO[1];
	768	EEE[2] = EEEE[1] - EEEO[1];
	769	for (k=0;k<4;k++)
	770	{
	771	EE[k] = EEE[k] + EEO[k];
	772	EE[k+4] = EEE[3-k] - EEO[3-k];
	773	}
	774	for (k=0;k<8;k++)
	775	{
	776	E[k] = EE[k] + EO[k];
	777	E[k+8] = EE[7-k] - EO[7-k];
	778	}
	779	for (k=0;k<16;k++)
	780	{
	781	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
	782	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
	783	}
	784	src ++;
	785	dst += 32;
	786	}
	787	}
	788
[2]	789
[56]	790	/** MxN forward transform (2D)
	791	* \param block input data (residual)
	792	* \param coeff output data (transform coefficients)
	793	* \param iWidth input data (width of transform)
	794	* \param iHeight input data (height of transform)
	795	*/
	796	void xTrMxN(short block,short coeff, int iWidth, int iHeight, UInt uiMode)
[2]	797	{
[56]	798	#if FULL_NBIT
	799	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitDepth - 8; // log2(iWidth) - 1 + g_uiBitDepth - 8
[2]	800	#else
[56]	801	int shift_1st = g_aucConvertToBit[iWidth] + 1 + g_uiBitIncrement; // log2(iWidth) - 1 + g_uiBitIncrement
[2]	802	#endif
[56]	803	int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
[2]	804
[56]	805	short tmp[ 64 * 64 ];
[2]	806
[56]	807	if( iWidth == 16 && iHeight == 4)
[2]	808	{
[56]	809	partialButterfly16( block, tmp, shift_1st, iHeight );
	810	partialButterfly4( tmp, coeff, shift_2nd, iWidth );
[2]	811	}
[56]	812	else if( iWidth == 32 && iHeight == 8 )
[2]	813	{
[56]	814	partialButterfly32( block, tmp, shift_1st, iHeight );
	815	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
[2]	816	}
[56]	817	else if( iWidth == 4 && iHeight == 16)
[2]	818	{
[56]	819	partialButterfly4( block, tmp, shift_1st, iHeight );
	820	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
[2]	821	}
[56]	822	else if( iWidth == 8 && iHeight == 32 )
[2]	823	{
[56]	824	partialButterfly8( block, tmp, shift_1st, iHeight );
	825	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
[2]	826	}
[56]	827	else if( iWidth == 4 && iHeight == 4)
[2]	828	{
[56]	829	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
	830	{
	831	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
[2]	832	}
[56]	833	else
	834	{
	835	partialButterfly4(block, tmp, shift_1st, iHeight);
[2]	836	}
[56]	837	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
	838	{
	839	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
[2]	840	}
[56]	841	else
	842	{
	843	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
	844	}
[2]	845	}
[56]	846	else if( iWidth == 8 && iHeight == 8)
[2]	847	{
[56]	848	partialButterfly8( block, tmp, shift_1st, iHeight );
	849	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
[2]	850	}
[56]	851	else if( iWidth == 16 && iHeight == 16)
[2]	852	{
[56]	853	partialButterfly16( block, tmp, shift_1st, iHeight );
	854	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
[2]	855	}
[56]	856	else if( iWidth == 32 && iHeight == 32)
[2]	857	{
[56]	858	partialButterfly32( block, tmp, shift_1st, iHeight );
	859	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
[2]	860	}
[56]	861	}
	862	/** MxN inverse transform (2D)
	863	* \param coeff input data (transform coefficients)
	864	* \param block output data (residual)
	865	* \param iWidth input data (width of transform)
	866	* \param iHeight input data (height of transform)
	867	*/
	868	void xITrMxN(short coeff,short block, int iWidth, int iHeight, UInt uiMode)
	869	{
	870	int shift_1st = SHIFT_INV_1ST;
	871	#if FULL_NBIT
	872	int shift_2nd = SHIFT_INV_2ND - ((short)g_uiBitDepth - 8);
[2]	873	#else
[56]	874	int shift_2nd = SHIFT_INV_2ND - g_uiBitIncrement;
[2]	875	#endif
	876
[56]	877	short tmp[ 64*64];
	878	if( iWidth == 16 && iHeight == 4)
	879	{
	880	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
	881	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
[2]	882	}
[56]	883	else if( iWidth == 32 && iHeight == 8)
	884	{
	885	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
	886	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
[2]	887	}
[56]	888	else if( iWidth == 4 && iHeight == 16)
[2]	889	{
[56]	890	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
	891	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
	892	}
	893	else if( iWidth == 8 && iHeight == 32)
[2]	894	{
[56]	895	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
	896	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
[2]	897	}
[56]	898	else if( iWidth == 4 && iHeight == 4)
[2]	899	{
[56]	900	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=11 && uiMode <= 34))) // Check for DCT or DST
	901	{
	902	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
[2]	903	}
	904	else
	905	{
[56]	906	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
	907	}
	908	if (uiMode != REG_DCT && (!uiMode \|\| (uiMode>=2 && uiMode <= 25))) // Check for DCT or DST
	909	{
	910	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
[2]	911	}
	912	else
[56]	913	{
	914	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
	915	}
[2]	916	}
[56]	917	else if( iWidth == 8 && iHeight == 8)
[2]	918	{
[56]	919	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
	920	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
[2]	921	}
[56]	922	else if( iWidth == 16 && iHeight == 16)
[2]	923	{
[56]	924	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
	925	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
[2]	926	}
[56]	927	else if( iWidth == 32 && iHeight == 32)
[2]	928	{
[56]	929	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
	930	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
[2]	931	}
[56]	932	}
[2]	933
[56]	934	#endif //MATRIX_MULT
[2]	935
[56]	936	// To minimize the distortion only. No rate is considered.
	937	Void TComTrQuant::signBitHidingHDQ( TComDataCU* pcCU, TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
	938	{
	939	Int tsig = pcCU->getSlice()->getPPS()->getTSIG() ;
	940	Int lastCG = -1;
	941	Int absSum = 0 ;
	942	Int n ;
[2]	943
[56]	944	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
	945	{
	946	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
	947	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
	948	absSum = 0 ;
[2]	949
[56]	950	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
	951	{
	952	if( pQCoef[ scan[ n + subPos ]] )
	953	{
	954	lastNZPosInCG = n;
	955	break;
	956	}
	957	}
[2]	958
[56]	959	for(n = 0; n <SCAN_SET_SIZE; n++ )
	960	{
	961	if( pQCoef[ scan[ n + subPos ]] )
	962	{
	963	firstNZPosInCG = n;
	964	break;
	965	}
	966	}
[2]	967
[56]	968	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
	969	{
	970	absSum += pQCoef[ scan[ n + subPos ]];
	971	}
[2]	972
[56]	973	if(lastNZPosInCG>=0 && lastCG==-1)
	974	{
	975	lastCG = 1 ;
	976	}
	977
	978	if( lastNZPosInCG-firstNZPosInCG>=tsig )
	979	{
	980	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
	981	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
	982	{
	983	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
	984
	985	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
[2]	986	{
[56]	987	UInt blkPos = scan[ n+subPos ];
	988	if(pQCoef[ blkPos ] != 0 )
[2]	989	{
[56]	990	if(deltaU[blkPos]>0)
[2]	991	{
[56]	992	curCost = - deltaU[blkPos];
	993	curChange=1 ;
[2]	994	}
[56]	995	else
[2]	996	{
[56]	997	//curChange =-1;
	998	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
	999	{
	1000	curCost=MAX_INT ;
	1001	}
	1002	else
	1003	{
	1004	curCost = deltaU[blkPos];
	1005	curChange =-1;
	1006	}
[2]	1007	}
	1008	}
	1009	else
	1010	{
[56]	1011	if(n<firstNZPosInCG)
	1012	{
	1013	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
	1014	if(thisSignBit != signbit )
	1015	{
	1016	curCost = MAX_INT;
[2]	1017	}
[56]	1018	else
	1019	{
	1020	curCost = - (deltaU[blkPos]) ;
	1021	curChange = 1 ;
[2]	1022	}
	1023	}
	1024	else
	1025	{
[56]	1026	curCost = - (deltaU[blkPos]) ;
	1027	curChange = 1 ;
[2]	1028	}
	1029	}
[56]	1030
	1031	if( curCost<minCostInc)
[2]	1032	{
[56]	1033	minCostInc = curCost ;
	1034	finalChange = curChange ;
	1035	minPos = blkPos ;
[2]	1036	}
[56]	1037	} //CG loop
[2]	1038
[56]	1039	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
	1040	{
	1041	finalChange = -1;
[2]	1042	}
	1043
[56]	1044	if(pCoef[minPos]>=0)
[2]	1045	{
[56]	1046	pQCoef[minPos] += finalChange ;
[2]	1047	}
[56]	1048	else
	1049	{
	1050	pQCoef[minPos] -= finalChange ;
	1051	}
	1052	} // Hide
	1053	}
	1054	if(lastCG==1)
	1055	{
	1056	lastCG=0 ;
	1057	}
	1058	} // TU loop
	1059
	1060	return;
	1061	}
	1062
	1063	Void TComTrQuant::xQuant( TComDataCU* pcCU,
	1064	Int* pSrc,
	1065	TCoeff* pDes,
	1066	#if ADAPTIVE_QP_SELECTION
	1067	Int*& pArlDes,
	1068	#endif
	1069	Int iWidth,
	1070	Int iHeight,
	1071	UInt& uiAcSum,
	1072	TextType eTType,
	1073	UInt uiAbsPartIdx )
[2]	1074	{
[56]	1075	Int* piCoef = pSrc;
	1076	TCoeff* piQCoef = pDes;
	1077	#if ADAPTIVE_QP_SELECTION
	1078	Int* piArlCCoef = pArlDes;
	1079	#endif
	1080	Int iAdd = 0;
[2]	1081
[56]	1082	if ( m_bUseRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA) )
[2]	1083	{
[56]	1084	#if ADAPTIVE_QP_SELECTION
	1085	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
[2]	1086	#else
[56]	1087	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
[2]	1088	#endif
	1089	}
	1090	else
	1091	{
[56]	1092	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
	1093
	1094	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
	1095	if (scanIdx == SCAN_ZIGZAG)
[2]	1096	{
[56]	1097	scanIdx = SCAN_DIAG;
[2]	1098	}
[56]	1099
	1100	if (iWidth != iHeight)
[2]	1101	{
[56]	1102	scanIdx = SCAN_DIAG;
[2]	1103	}
[56]	1104
	1105	const UInt * scan;
	1106	if (iWidth == iHeight)
[2]	1107	{
[56]	1108	scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
[2]	1109	}
	1110	else
	1111	{
[56]	1112	scan = g_sigScanNSQT[ log2BlockSize - 2 ];
[2]	1113	}
[56]	1114
	1115	Int deltaU[32*32] ;
	1116
	1117	#if ADAPTIVE_QP_SELECTION
	1118	QpParam cQpBase;
	1119	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
	1120
	1121	Int qpScaled;
	1122	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
	1123
	1124	if(eTType == TEXT_LUMA)
[2]	1125	{
[56]	1126	qpScaled = iQpBase + qpBDOffset;
[2]	1127	}
[56]	1128	else
	1129	{
	1130	qpScaled = Clip3( -qpBDOffset, 51, iQpBase);
	1131
	1132	if(qpScaled < 0)
	1133	{
	1134	qpScaled = qpScaled + qpBDOffset;
[2]	1135	}
	1136	else
	1137	{
[56]	1138	qpScaled = g_aucChromaScale[ Clip3(0, 51, qpScaled) ] + qpBDOffset;
[2]	1139	}
	1140	}
[56]	1141	cQpBase.setQpParam(qpScaled, false, pcCU->getSlice()->getSliceType());
[2]	1142	#endif
	1143
[56]	1144	Bool bNonSqureFlag = ( iWidth != iHeight );
	1145	UInt dir = SCALING_LIST_SQT;
	1146	if( bNonSqureFlag )
	1147	{
	1148	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
	1149	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
	1150	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
	1151	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
	1152	iHeight = iWidth;
	1153	}
[2]	1154
	1155	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
[56]	1156	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
	1157	assert(scalingListType < 6);
	1158	Int *piQuantCoeff = 0;
	1159	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2, dir);
[2]	1160
	1161	#if FULL_NBIT
	1162	UInt uiBitDepth = g_uiBitDepth;
	1163	#else
	1164	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
	1165	#endif
	1166	UInt iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
	1167	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
	1168
	1169	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
	1170
[56]	1171	#if ADAPTIVE_QP_SELECTION
	1172	iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
	1173	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
	1174	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
	1175	Int iAddC = 1 << (iQBitsC-1);
	1176	#endif
	1177
	1178	Int qBits8 = iQBits-8;
[2]	1179	for( Int n = 0; n < iWidth*iHeight; n++ )
	1180	{
[56]	1181	Int iLevel;
[2]	1182	Int iSign;
[56]	1183	UInt uiBlockPos = n;
	1184	iLevel = piCoef[uiBlockPos];
[2]	1185	iSign = (iLevel < 0 ? -1: 1);
	1186
[56]	1187	#if ADAPTIVE_QP_SELECTION
	1188	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
	1189	if( m_bUseAdaptQpSelect )
[2]	1190	{
[56]	1191	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
[2]	1192	}
[56]	1193	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
	1194	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
[2]	1195	#else
[56]	1196	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
	1197	deltaU[uiBlockPos] = (Int)( ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
[2]	1198	#endif
	1199	uiAcSum += iLevel;
	1200	iLevel *= iSign;
[56]	1201	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
[2]	1202	} // for n
[56]	1203	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
[2]	1204	{
[56]	1205	if(uiAcSum>=2)
[2]	1206	{
[56]	1207	signBitHidingHDQ( pcCU, piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
[2]	1208	}
	1209	}
[56]	1210	} //if RDOQ
	1211	//return;
[2]	1212
	1213	}
	1214
[56]	1215	Void TComTrQuant::xDeQuant( const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
[2]	1216	{
	1217
[56]	1218	const TCoeff* piQCoef = pSrc;
	1219	Int* piCoef = pDes;
	1220	UInt dir = SCALING_LIST_SQT;
	1221	if( iWidth != iHeight )
	1222	{
	1223	dir = ( iWidth < iHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
	1224	UInt uiWidthBit = g_aucConvertToBit[ iWidth ] + 2;
	1225	UInt uiHeightBit = g_aucConvertToBit[ iHeight ] + 2;
	1226	iWidth = 1 << ( ( uiWidthBit + uiHeightBit) >> 1 );
	1227	iHeight = iWidth;
	1228	}
	1229
[2]	1230	if ( iWidth > (Int)m_uiMaxTrSize )
	1231	{
	1232	iWidth = m_uiMaxTrSize;
	1233	iHeight = m_uiMaxTrSize;
	1234	}
	1235
	1236	Int iShift,iAdd,iCoeffQ;
	1237	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
	1238
	1239	#if FULL_NBIT
	1240	UInt uiBitDepth = g_uiBitDepth;
	1241	#else
	1242	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
	1243	#endif
	1244	UInt iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;
	1245	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
	1246
[56]	1247	TCoeff clipQCoef;
	1248	const Int bitRange = min( 15, ( Int )( 12 + uiLog2TrSize + uiBitDepth - m_cQP.m_iPer) );
	1249	const Int levelLimit = 1 << bitRange;
	1250
	1251	if(getUseScalingList())
[2]	1252	{
[56]	1253	iShift += 4;
	1254	if(iShift > m_cQP.m_iPer)
[2]	1255	{
[56]	1256	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
[2]	1257	}
[56]	1258	else
[2]	1259	{
[56]	1260	iAdd = 0;
[2]	1261	}
[56]	1262	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
	1263
	1264	if(iShift > m_cQP.m_iPer)
[2]	1265	{
[56]	1266	for( Int n = 0; n < iWidth*iHeight; n++ )
	1267	{
	1268	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
	1269	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
	1270	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
	1271	}
[2]	1272	}
	1273	else
	1274	{
[56]	1275	for( Int n = 0; n < iWidth*iHeight; n++ )
	1276	{
	1277	clipQCoef = Clip3( -levelLimit, levelLimit - 1, piQCoef[n] );
	1278	iCoeffQ = (clipQCoef * piDequantCoef[n]) << (m_cQP.m_iPer - iShift);
	1279	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
	1280	}
	1281	}
[2]	1282	}
[56]	1283	else
[2]	1284	{
[56]	1285	iAdd = 1 << (iShift-1);
	1286	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
[2]	1287
[56]	1288	for( Int n = 0; n < iWidth*iHeight; n++ )
	1289	{
	1290	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
	1291	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
	1292	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
	1293	}
[2]	1294	}
	1295	}
[56]	1296
[2]	1297	Void TComTrQuant::init( UInt uiMaxWidth, UInt uiMaxHeight, UInt uiMaxTrSize, Int iSymbolMode, UInt aTableLP4, UInt aTableLP8, UInt *aTableLastPosVlcIndex,
[56]	1298	Bool bUseRDOQ, Bool bEnc
	1299	#if ADAPTIVE_QP_SELECTION
	1300	, Bool bUseAdaptQpSelect
[2]	1301	#endif
[56]	1302	)
[2]	1303	{
	1304	m_uiMaxTrSize = uiMaxTrSize;
	1305	m_bEnc = bEnc;
	1306	m_bUseRDOQ = bUseRDOQ;
[56]	1307	#if ADAPTIVE_QP_SELECTION
	1308	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
[2]	1309	#endif
	1310	}
	1311
[56]	1312	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
	1313	Pel* pcResidual,
	1314	UInt uiStride,
	1315	TCoeff* rpcCoeff,
	1316	#if ADAPTIVE_QP_SELECTION
	1317	Int*& rpcArlCoeff,
[2]	1318	#endif
[56]	1319	UInt uiWidth,
	1320	UInt uiHeight,
	1321	UInt& uiAbsSum,
	1322	TextType eTType,
	1323	UInt uiAbsPartIdx )
[2]	1324	{
[56]	1325	#if LOSSLESS_CODING
	1326	if((m_cQP.qp() == 0) && (pcCU->getSlice()->getSPS()->getUseLossless()))
[2]	1327	{
[56]	1328	uiAbsSum=0;
	1329	for (UInt k = 0; k<uiHeight; k++)
[2]	1330	{
[56]	1331	for (UInt j = 0; j<uiWidth; j++)
[2]	1332	{
[56]	1333	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
	1334	uiAbsSum += abs(pcResidual[k*uiStride+j]);
[2]	1335	}
	1336	}
[56]	1337	return;
[2]	1338	}
[56]	1339	#endif
	1340	UInt uiMode; //luma intra pred
	1341	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
[2]	1342	{
[56]	1343	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
[2]	1344	}
	1345	else
	1346	{
[56]	1347	uiMode = REG_DCT;
[2]	1348	}
	1349
[56]	1350	uiAbsSum = 0;
	1351	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
[2]	1352
[56]	1353	xT( uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	1354	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
	1355	#if ADAPTIVE_QP_SELECTION
	1356	rpcArlCoeff,
	1357	#endif
	1358	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
[2]	1359	}
	1360
[56]	1361	#if LOSSLESS_CODING
	1362	Void TComTrQuant::invtransformNxN( TComDataCU* pcCU, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType)
[2]	1363	#else
[56]	1364	Void TComTrQuant::invtransformNxN( TextType eText, UInt uiMode,Pel& rpcResidual, UInt uiStride, TCoeff pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType)
[2]	1365	#endif
	1366	{
[56]	1367	#if LOSSLESS_CODING
	1368	if((m_cQP.qp() == 0) && (pcCU->getSlice()->getSPS()->getUseLossless()))
[2]	1369	{
[56]	1370	for (UInt k = 0; k<uiHeight; k++)
[2]	1371	{
[56]	1372	for (UInt j = 0; j<uiWidth; j++)
	1373	{
	1374	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
	1375	}
	1376	}
	1377	return;
[2]	1378	}
[56]	1379	#endif
	1380	xDeQuant( pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
	1381	xIT( uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
[2]	1382	}
	1383
[56]	1384	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
[2]	1385	{
[56]	1386	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
[2]	1387	{
[56]	1388	return;
[2]	1389	}
	1390
	1391	UInt uiLumaTrMode, uiChromaTrMode;
	1392	pcCU->convertTransIdx( uiAbsPartIdx, pcCU->getTransformIdx( uiAbsPartIdx ), uiLumaTrMode, uiChromaTrMode );
	1393	const UInt uiStopTrMode = eTxt == TEXT_LUMA ? uiLumaTrMode : uiChromaTrMode;
	1394
	1395	if( uiTrMode == uiStopTrMode )
	1396	{
	1397	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
	1398	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
[56]	1399	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
[2]	1400	{
	1401	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
	1402	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
	1403	{
	1404	return;
	1405	}
	1406	uiWidth <<= 1;
	1407	uiHeight <<= 1;
	1408	}
	1409	Pel* pResi = rpcResidual + uiAddr;
[56]	1410	if( pcCU->useNonSquareTrans( uiTrMode, uiAbsPartIdx ) )
	1411	{
	1412	Int trWidth = uiWidth;
	1413	Int trHeight = uiHeight;
	1414	pcCU->getNSQTSize( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
	1415
	1416	uiWidth = trWidth;
	1417	uiHeight = trHeight;
	1418	}
	1419	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
	1420	assert(scalingListType < 6);
	1421	#if LOSSLESS_CODING
	1422	invtransformNxN( pcCU, eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
	1423	#else
	1424	invtransformNxN( eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType );
[2]	1425	#endif
	1426	}
	1427	else
	1428	{
	1429	uiTrMode++;
	1430	uiWidth >>= 1;
	1431	uiHeight >>= 1;
[56]	1432	Int trWidth = uiWidth, trHeight = uiHeight;
	1433	Int trLastWidth = uiWidth << 1, trLastHeight = uiHeight << 1;
	1434	pcCU->getNSQTSize ( uiTrMode, uiAbsPartIdx, trWidth, trHeight );
	1435	pcCU->getNSQTSize ( uiTrMode - 1, uiAbsPartIdx, trLastWidth, trLastHeight );
	1436	UInt uiAddrOffset = trHeight * uiStride;
	1437	UInt uiCoefOffset = trWidth * trHeight;
	1438	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
	1439	UInt uiInterTUSplitDirection = pcCU->getInterTUSplitDirection ( trWidth, trHeight, trLastWidth, trLastHeight );
	1440	if( uiInterTUSplitDirection != 2 )
	1441	{
	1442	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1443	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth * uiInterTUSplitDirection + uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1444	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 2 * trWidth * uiInterTUSplitDirection + 2 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1445	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + 3 * trWidth * uiInterTUSplitDirection + 3 * uiAddrOffset * ( 1 - uiInterTUSplitDirection), uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
	1446	}
	1447	else
	1448	{
	1449	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1450	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1451	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1452	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
	1453	}
[2]	1454	}
	1455	}
	1456
	1457	// ------------------------------------------------------------------------------------------------
	1458	// Logical transform
	1459	// ------------------------------------------------------------------------------------------------
	1460
	1461	/** Wrapper function between HM interface and core NxN forward transform (2D)
	1462	* \param piBlkResi input data (residual)
	1463	* \param psCoeff output data (transform coefficients)
	1464	* \param uiStride stride of input residual data
	1465	* \param iSize transform size (iSize x iSize)
	1466	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	1467	*/
[56]	1468	Void TComTrQuant::xT( UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
[2]	1469	{
	1470	#if MATRIX_MULT
[56]	1471	Int iSize = iWidth;
	1472	if( iWidth != iHeight)
	1473	{
	1474	xTrMxN( piBlkResi, psCoeff, uiStride, (UInt)iWidth, (UInt)iHeight );
	1475	return;
	1476	}
[2]	1477	xTr(piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
	1478	#else
[56]	1479	Int j;
	1480	{
	1481	short block[ 64 * 64 ];
	1482	short coeff[ 64 * 64 ];
	1483	{
	1484	for (j = 0; j < iHeight; j++)
	1485	{
	1486	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( short ) );
	1487	}
	1488	}
	1489	xTrMxN( block, coeff, iWidth, iHeight, uiMode );
	1490	for ( j = 0; j < iHeight * iWidth; j++ )
	1491	{
	1492	psCoeff[ j ] = coeff[ j ];
	1493	}
	1494	return ;
	1495	}
[2]	1496	#endif
	1497	}
	1498
	1499	/** Wrapper function between HM interface and core NxN inverse transform (2D)
	1500	* \param plCoef input data (transform coefficients)
	1501	* \param pResidual output data (residual)
	1502	* \param uiStride stride of input residual data
	1503	* \param iSize transform size (iSize x iSize)
	1504	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	1505	*/
[56]	1506	Void TComTrQuant::xIT( UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
[2]	1507	{
	1508	#if MATRIX_MULT
[56]	1509	Int iSize = iWidth;
	1510	if( iWidth != iHeight )
	1511	{
	1512	xITrMxN( plCoef, pResidual, uiStride, (UInt)iWidth, (UInt)iHeight );
	1513	return;
	1514	}
[2]	1515	xITr(plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
	1516	#else
[56]	1517	Int j;
	1518	{
	1519	short block[ 64 * 64 ];
	1520	short coeff[ 64 * 64 ];
	1521	for ( j = 0; j < iHeight * iWidth; j++ )
	1522	{
	1523	coeff[j] = (short)plCoef[j];
	1524	}
	1525	xITrMxN( coeff, block, iWidth, iHeight, uiMode );
	1526	{
	1527	for ( j = 0; j < iHeight; j++ )
	1528	{
	1529	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(short) );
	1530	}
	1531	}
	1532	return ;
	1533	}
[2]	1534	#endif
	1535	}
[56]	1536
[2]	1537	/** RDOQ with CABAC
	1538	* \param pcCU pointer to coding unit structure
	1539	* \param plSrcCoeff pointer to input buffer
	1540	* \param piDstCoeff reference to pointer to output buffer
	1541	* \param uiWidth block width
	1542	* \param uiHeight block height
	1543	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
	1544	* \param eTType plane type / luminance or chrominance
	1545	* \param uiAbsPartIdx absolute partition index
	1546	* \returns Void
	1547	* Rate distortion optimized quantization for entropy
	1548	* coding engines using probability models like CABAC
	1549	*/
	1550	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
[56]	1551	Int* plSrcCoeff,
	1552	TCoeff* piDstCoeff,
	1553	#if ADAPTIVE_QP_SELECTION
	1554	Int*& piArlDstCoeff,
	1555	#endif
[2]	1556	UInt uiWidth,
	1557	UInt uiHeight,
	1558	UInt& uiAbsSum,
	1559	TextType eTType,
	1560	UInt uiAbsPartIdx )
	1561	{
	1562	Int iQBits = m_cQP.m_iBits;
	1563	Double dTemp = 0;
	1564
[56]	1565	UInt dir = SCALING_LIST_SQT;
[2]	1566	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
[56]	1567	Int uiQ = g_quantScales[m_cQP.rem()];
	1568	if (uiWidth != uiHeight)
	1569	{
	1570	uiLog2TrSize += (uiWidth > uiHeight) ? -1 : 1;
	1571	dir = ( uiWidth < uiHeight )? SCALING_LIST_VER: SCALING_LIST_HOR;
	1572	}
	1573
[2]	1574	#if FULL_NBIT
	1575	UInt uiBitDepth = g_uiBitDepth;
	1576	#else
	1577	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
	1578	#endif
	1579	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
[56]	1580	UInt uiGoRiceParam = 0;
	1581	Double d64BlockUncodedCost = 0;
	1582	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
	1583	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
	1584	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
	1585	assert(scalingListType < 6);
	1586
[2]	1587	iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
[56]	1588	double dErrScale = 0;
	1589	double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem,dir);
	1590	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2,dir);
	1591	Int *piQCoef = piQCoefOrg;
	1592	double *pdErrScale = pdErrScaleOrg;
	1593	#if ADAPTIVE_QP_SELECTION
	1594	Int iQBitsC = iQBits - ARL_C_PRECISION;
	1595	Int iAddC = 1 << (iQBitsC-1);
	1596	#endif
	1597	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
	1598	if (uiScanIdx == SCAN_ZIGZAG)
[2]	1599	{
[56]	1600	// Map value zigzag to diagonal scan
	1601	uiScanIdx = SCAN_DIAG;
[2]	1602	}
[56]	1603	Int blockType = uiLog2BlkSize;
	1604	if (uiWidth != uiHeight)
[2]	1605	{
[56]	1606	uiScanIdx = SCAN_DIAG;
	1607	blockType = 4;
[2]	1608	}
[56]	1609
	1610	#if ADAPTIVE_QP_SELECTION
	1611	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
	1612	#endif
	1613
	1614	Double pdCostCoeff [ 32 * 32 ];
	1615	Double pdCostSig [ 32 * 32 ];
	1616	Double pdCostCoeff0[ 32 * 32 ];
	1617	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
	1618	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
	1619	Int rateIncUp [ 32 * 32 ];
	1620	Int rateIncDown [ 32 * 32 ];
	1621	Int sigRateDelta[ 32 * 32 ];
	1622	Int deltaU [ 32 * 32 ];
	1623	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
	1624	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
	1625	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
	1626	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
	1627
	1628	const UInt * scanCG;
	1629	if (uiWidth == uiHeight)
[2]	1630	{
[56]	1631	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
	1632	if( uiLog2BlkSize == 3 )
[2]	1633	{
[56]	1634	scanCG = g_sigLastScan8x8[ uiScanIdx ];
[2]	1635	}
[56]	1636	else if( uiLog2BlkSize == 5 )
[2]	1637	{
[56]	1638	scanCG = g_sigLastScanCG32x32;
[2]	1639	}
	1640	}
	1641	else
	1642	{
[56]	1643	scanCG = g_sigCGScanNSQT[ uiLog2BlkSize - 2 ];
[2]	1644	}
[56]	1645	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
	1646	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
	1647	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
	1648	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
	1649	Int iCGLastScanPos = -1;
[2]	1650
[56]	1651	UInt uiCtxSet = 0;
	1652	Int c1 = 1;
	1653	Int c2 = 0;
	1654	UInt uiNumOne = 0;
	1655	Double d64BaseCost = 0;
	1656	Int iLastScanPos = -1;
	1657	dTemp = dErrScale;
	1658
	1659	UInt c1Idx = 0;
	1660	UInt c2Idx = 0;
	1661	Int baseLevel;
[2]	1662
[56]	1663	const UInt * scan;
	1664	if (uiWidth == uiHeight)
	1665	{
	1666	scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
	1667	}
	1668	else
	1669	{
	1670	scan = g_sigScanNSQT[ uiLog2BlkSize - 2 ];
	1671	}
[2]	1672
[56]	1673	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
	1674	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
	1675
	1676	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
	1677	Int iScanPos;
	1678	coeffGroupRDStats rdStats;
	1679
	1680	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
	1681	{
	1682	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
	1683	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
	1684	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
	1685	if( uiWidth == 8 && uiHeight == 8 && (uiScanIdx == SCAN_HOR \|\| uiScanIdx == SCAN_VER) )
[2]	1686	{
[56]	1687	uiCGPosY = (uiScanIdx == SCAN_HOR ? uiCGBlkPos : 0);
	1688	uiCGPosX = (uiScanIdx == SCAN_VER ? uiCGBlkPos : 0);
	1689	}
	1690	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
[2]	1691
[56]	1692	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	1693	{
	1694	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	1695	//===== quantization =====
	1696	UInt uiBlkPos = scan[iScanPos];
	1697	// set coeff
	1698	uiQ = piQCoef[uiBlkPos];
	1699	dTemp = pdErrScale[uiBlkPos];
	1700	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
	1701	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
	1702	#if ADAPTIVE_QP_SELECTION
	1703	if( m_bUseAdaptQpSelect )
[2]	1704	{
[56]	1705	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
[2]	1706	}
[56]	1707	#endif
	1708	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
	1709
	1710	Double dErr = Double( lLevelDouble );
	1711	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
	1712	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
	1713	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
	1714
	1715	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
[2]	1716	{
[56]	1717	iLastScanPos = iScanPos;
	1718	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
	1719	iCGLastScanPos = iCGScanPos;
[2]	1720	}
[56]	1721
	1722	if ( iLastScanPos >= 0 )
[2]	1723	{
[56]	1724	//===== coefficient level estimation =====
	1725	UInt uiLevel;
	1726	UInt uiOneCtx = 4 * uiCtxSet + c1;
	1727	UInt uiAbsCtx = uiCtxSet + c2;
	1728
	1729	if( iScanPos == iLastScanPos )
[2]	1730	{
[56]	1731	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
	1732	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
	1733	c1Idx, c2Idx, iQBits, dTemp, 1 );
	1734	}
	1735	else
	1736	{
	1737	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
	1738	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
	1739	UShort uiCtxSig = getSigCtxInc( piDstCoeff, uiPosX, uiPosY, blockType, uiWidth, uiHeight, eTType );
	1740	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
	1741	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
	1742	c1Idx, c2Idx, iQBits, dTemp, 0 );
	1743	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
	1744	}
	1745	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
	1746	if( uiLevel > 0 )
	1747	{
	1748	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
	1749	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
	1750	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
	1751	}
	1752	else // uiLevel == 0
	1753	{
	1754	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
	1755	}
	1756	piDstCoeff[ uiBlkPos ] = uiLevel;
	1757	d64BaseCost += pdCostCoeff [ iScanPos ];
	1758
	1759
	1760	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
	1761	if( uiLevel >= baseLevel )
	1762	{
	1763	uiGoRiceParam = g_aauiGoRiceUpdate[ uiGoRiceParam ][ min<UInt>( uiLevel - baseLevel , 23 ) ];
	1764	}
	1765	if ( uiLevel >= 1)
	1766	{
	1767	c1Idx ++;
	1768	}
	1769
	1770	//===== update bin model =====
	1771	if( uiLevel > 1 )
	1772	{
	1773	c1 = 0;
	1774	c2 += (c2 < 2);
	1775	uiNumOne++;
	1776	c2Idx ++;
	1777	}
	1778	else if( (c1 < 3) && (c1 > 0) && uiLevel)
	1779	{
	1780	c1++;
	1781	}
	1782
	1783	//===== context set update =====
	1784	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
	1785	{
	1786	c1 = 1;
	1787	c2 = 0;
	1788	uiGoRiceParam = 0;
	1789
	1790	c1Idx = 0;
	1791	c2Idx = 0;
	1792	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
	1793	if( uiNumOne > 0 )
	1794	{
	1795	uiCtxSet++;
[2]	1796	}
[56]	1797	uiNumOne >>= 1;
[2]	1798	}
	1799	}
[56]	1800	else
	1801	{
	1802	d64BaseCost += pdCostCoeff0[ iScanPos ];
	1803	}
	1804	rdStats.d64SigCost += pdCostSig[ iScanPos ];
	1805	if (iScanPosinCG == 0 )
	1806	{
	1807	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
	1808	}
	1809	if (piDstCoeff[ uiBlkPos ] )
	1810	{
	1811	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
	1812	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
	1813	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
	1814	if ( iScanPosinCG != 0 )
	1815	{
	1816	rdStats.iNNZbeforePos0++;
	1817	}
	1818	}
	1819	} //end for (iScanPosinCG)
[2]	1820
[56]	1821	if (iCGLastScanPos >= 0)
[2]	1822	{
[56]	1823	if( iCGScanPos )
[2]	1824	{
[56]	1825	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
[2]	1826	{
[56]	1827	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
	1828	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
	1829	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
	1830	}
	1831	else
	1832	{
	1833	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
	1834	{
	1835	if ( rdStats.iNNZbeforePos0 == 0 )
	1836	{
	1837	d64BaseCost -= rdStats.d64SigCost_0;
	1838	rdStats.d64SigCost -= rdStats.d64SigCost_0;
	1839	}
	1840	// rd-cost if SigCoeffGroupFlag = 0, initialization
	1841	Double d64CostZeroCG = d64BaseCost;
	1842
	1843	// add SigCoeffGroupFlag cost to total cost
	1844	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiScanIdx, uiWidth, uiHeight);
	1845	if (iCGScanPos < iCGLastScanPos)
	1846	{
	1847	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
	1848	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
	1849	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
	1850	}
	1851
	1852	// try to convert the current coeff group from non-zero to all-zero
	1853	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
	1854	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
	1855	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
	1856
	1857	// if we can save cost, change this block to all-zero block
	1858	if ( d64CostZeroCG < d64BaseCost )
	1859	{
	1860	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
	1861	d64BaseCost = d64CostZeroCG;
	1862	if (iCGScanPos < iCGLastScanPos)
	1863	{
	1864	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
	1865	}
	1866	// reset coeffs to 0 in this block
	1867	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	1868	{
	1869	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	1870	UInt uiBlkPos = scan[ iScanPos ];
	1871
	1872	if (piDstCoeff[ uiBlkPos ])
	1873	{
	1874	piDstCoeff [ uiBlkPos ] = 0;
	1875	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
	1876	pdCostSig [ iScanPos ] = 0;
	1877	}
	1878	}
	1879	} // end if ( d64CostAllZeros < d64BaseCost )
	1880	}
	1881	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
[2]	1882	}
[56]	1883	else
[2]	1884	{
[56]	1885	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
[2]	1886	}
	1887	}
[56]	1888	} //end for (iCGScanPos)
[2]	1889
[56]	1890	//===== estimate last position =====
	1891	if ( iLastScanPos < 0 )
	1892	{
	1893	return;
	1894	}
	1895
	1896	Double d64BestCost = 0;
	1897	Int ui16CtxCbf = 0;
	1898	Int iBestLastIdxP1 = 0;
[2]	1899	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
	1900	{
[56]	1901	ui16CtxCbf = 0;
	1902	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
	1903	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
[2]	1904	}
	1905	else
	1906	{
[56]	1907	ui16CtxCbf = pcCU->getCtxQtCbf( uiAbsPartIdx, eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
	1908	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
	1909	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
	1910	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
[2]	1911	}
	1912
[56]	1913	Bool bFoundLast = false;
	1914	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
[2]	1915	{
[56]	1916	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
	1917
	1918	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
	1919	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
	1920	{
	1921	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	1922	{
	1923	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	1924	if (iScanPos > iLastScanPos) continue;
	1925	UInt uiBlkPos = scan[iScanPos];
	1926
	1927	if( piDstCoeff[ uiBlkPos ] )
	1928	{
	1929	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
	1930	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
	1931
	1932	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, uiWidth ) : xGetRateLast( uiPosX, uiPosY, uiWidth );
	1933	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
	1934
	1935	if( totalCost < d64BestCost )
	1936	{
	1937	iBestLastIdxP1 = iScanPos + 1;
	1938	d64BestCost = totalCost;
	1939	}
	1940	if( piDstCoeff[ uiBlkPos ] > 1 )
	1941	{
	1942	bFoundLast = true;
	1943	break;
	1944	}
	1945	d64BaseCost -= pdCostCoeff[ iScanPos ];
	1946	d64BaseCost += pdCostCoeff0[ iScanPos ];
	1947	}
	1948	else
	1949	{
	1950	d64BaseCost -= pdCostSig[ iScanPos ];
	1951	}
	1952	} //end for
	1953	if (bFoundLast)
	1954	{
	1955	break;
	1956	}
	1957	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
	1958	} // end for
	1959
	1960	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
	1961	{
	1962	Int blkPos = scan[ scanPos ];
	1963	Int level = piDstCoeff[ blkPos ];
	1964	uiAbsSum += level;
	1965	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
[2]	1966	}
	1967
	1968	//===== clean uncoded coefficients =====
[56]	1969	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
[2]	1970	{
[56]	1971	piDstCoeff[ scan[ scanPos ] ] = 0;
	1972	}
	1973
	1974	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
	1975	{
	1976	Int rdFactor = (Int)((Double)(g_invQuantScales[m_cQP.rem()]g_invQuantScales[m_cQP.rem()]<<(2m_cQP.m_iPer))/m_dLambda/16 + 0.5) ;
	1977
	1978	Int tsig = pcCU->getSlice()->getPPS()->getTSIG() ;
	1979
	1980	Int lastCG = -1;
	1981	Int absSum = 0 ;
	1982	Int n ;
	1983
	1984	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
[2]	1985	{
[56]	1986	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
	1987	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
	1988	absSum = 0 ;
	1989
	1990	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
	1991	{
	1992	if( piDstCoeff[ scan[ n + subPos ]] )
	1993	{
	1994	lastNZPosInCG = n;
	1995	break;
	1996	}
	1997	}
	1998
	1999	for(n = 0; n <SCAN_SET_SIZE; n++ )
	2000	{
	2001	if( piDstCoeff[ scan[ n + subPos ]] )
	2002	{
	2003	firstNZPosInCG = n;
	2004	break;
	2005	}
	2006	}
	2007
	2008	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
	2009	{
	2010	absSum += piDstCoeff[ scan[ n + subPos ]];
	2011	}
	2012
	2013	if(lastNZPosInCG>=0 && lastCG==-1) lastCG =1 ;
[2]	2014
[56]	2015	if( lastNZPosInCG-firstNZPosInCG>=tsig )
[2]	2016	{
[56]	2017	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
	2018	if( signbit!=(absSum&0x1) ) // hide but need tune
	2019	{
	2020	// calculate the cost
	2021	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
	2022
	2023	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
	2024	{
	2025	UInt uiBlkPos = scan[ n + subPos ];
	2026	if(piDstCoeff[ uiBlkPos ] != 0 )
	2027	{
	2028	Int costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
	2029	Int costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
	2030	- ( abs(piDstCoeff[uiBlkPos])==1?((1<<15)+sigRateDelta[uiBlkPos]):0 );
	2031
	2032	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
	2033	{
	2034	costDown -= (4<<15) ;
	2035	}
	2036
	2037	if(costUp<costDown)
	2038	{
	2039	curCost = costUp;
	2040	curChange = 1 ;
	2041	}
	2042	else
	2043	{
	2044	curChange = -1 ;
	2045	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
	2046	{
	2047	curCost = MAX_INT ;
	2048	}
	2049	else
	2050	{
	2051	curCost = costDown ;
	2052	}
	2053	}
	2054	}
	2055	else
	2056	{
	2057	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
	2058	curChange = 1 ;
	2059
	2060	if(n<firstNZPosInCG)
	2061	{
	2062	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
	2063	if(thissignbit != signbit )
	2064	{
	2065	curCost = MAX_INT;
	2066	}
	2067	}
	2068	}
	2069
	2070	if( curCost<minCostInc)
	2071	{
	2072	minCostInc = curCost ;
	2073	finalChange = curChange ;
	2074	minPos = uiBlkPos ;
	2075	}
	2076	}
	2077
	2078	if(piQCoef[minPos] == 32767 \|\| piQCoef[minPos] == -32768)
	2079	{
	2080	finalChange = -1;
	2081	}
	2082
	2083	if(plSrcCoeff[minPos]>=0)
	2084	{
	2085	piDstCoeff[minPos] += finalChange ;
	2086	}
	2087	else
	2088	{
	2089	piDstCoeff[minPos] -= finalChange ;
	2090	}
	2091	}
[2]	2092	}
[56]	2093
	2094	if(lastCG==1)
[2]	2095	{
[56]	2096	lastCG=0 ;
	2097	}
[2]	2098	}
	2099	}
	2100	}
	2101
	2102	/** Context derivation process of coeff_abs_significant_flag
	2103	* \param pcCoeff pointer to prior coded transform coefficients
[56]	2104	* \param posX column of current scan position
	2105	* \param posY row of current scan position
	2106	* \param blockType log2 value of block size if square block, or 4 otherwise
	2107	* \param width width of the block
	2108	* \param height height of the block
	2109	* \param textureType texture type (TEXT_LUMA...)
[2]	2110	* \returns ctxInc for current scan position
	2111	*/
[56]	2112	Int TComTrQuant::getSigCtxInc ( TCoeff* pcCoeff,
	2113	Int posX,
	2114	Int posY,
	2115	Int blockType,
	2116	Int width
	2117	,Int height
	2118	,TextType textureType
	2119	)
[2]	2120	{
[56]	2121	if ( blockType == 2 )
[2]	2122	{
[56]	2123	//LUMA map
	2124	const Int ctxIndMap4x4Luma[15] =
[2]	2125	{
[56]	2126	0, 1, 4, 5,
	2127	2, 3, 4, 5,
	2128	6, 6, 8, 8,
	2129	7, 7, 8
	2130	};
	2131	//CHROMA map
	2132	const Int ctxIndMap4x4Chroma[15] =
[2]	2133	{
[56]	2134	0, 1, 2, 4,
	2135	1, 1, 2, 4,
	2136	3, 3, 5, 5,
	2137	4, 4, 5
	2138	};
	2139
	2140	if (textureType == TEXT_LUMA)
[2]	2141	{
[56]	2142	return ctxIndMap4x4Luma[ 4 * posY + posX ];
[2]	2143	}
	2144	else
	2145	{
[56]	2146	return ctxIndMap4x4Chroma[ 4 * posY + posX ];
[2]	2147	}
	2148	}
	2149
[56]	2150	if ( blockType == 3 )
[2]	2151	{
[56]	2152	const Int map8x8[16] =
	2153	{
	2154	0, 1, 2, 3,
	2155	4, 5, 6, 3,
	2156	8, 6, 6, 7,
	2157	9, 9, 7, 7
	2158	};
	2159
	2160	Int offset = (textureType == TEXT_LUMA) ? 9 : 6;
	2161
	2162	if ( posX + posY == 0 )
	2163	{
	2164	return offset + 10;
	2165	}
	2166	return offset + map8x8[4 * (posY >> 1) + (posX >> 1)];
[2]	2167	}
[56]	2168
	2169	Int offset = (textureType == TEXT_LUMA) ? 20 : 17;
	2170	if( posX + posY == 0 )
[2]	2171	{
[56]	2172	return offset;
[2]	2173	}
[56]	2174	#if SIGMAP_CONST_AT_HIGH_FREQUENCY
	2175	Int thredHighFreq = 3*(std::max(width, height)>>4);
	2176	if ((posX>>2) + (posY>>2) >= thredHighFreq)
[2]	2177	{
[56]	2178	return (textureType == TEXT_LUMA) ? 24 : 18;
[2]	2179	}
[56]	2180	#endif
	2181
	2182	const TCoeff pData = pcCoeff + posX + posY width;
	2183
	2184
	2185	Int cnt = 0;
	2186	if( posX < width - 1 )
[2]	2187	{
[56]	2188	cnt += pData[1] != 0;
	2189	if( posY < height - 1 )
	2190	{
	2191	cnt += pData[width+1] != 0;
	2192	}
	2193	if( posX < width - 2 )
	2194	{
	2195	cnt += pData[2] != 0;
	2196	}
[2]	2197	}
[56]	2198	if ( posY < height - 1 )
[2]	2199	{
[56]	2200	if( ( ( posX & 3 ) \|\| ( posY & 3 ) ) && ( ( (posX+1) & 3 ) \|\| ( (posY+2) & 3 ) ) )
[2]	2201	{
[56]	2202	cnt += pData[width] != 0;
[2]	2203	}
[56]	2204	if ( posY < height - 2 && cnt < 4 )
[2]	2205	{
[56]	2206	cnt += pData[2*width] != 0;
[2]	2207	}
	2208	}
[56]	2209
	2210	cnt = ( cnt + 1 ) >> 1;
	2211	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 4 : 1) + offset + cnt;
[2]	2212	}
	2213
	2214	/** Get the best level in RD sense
[56]	2215	* \param rd64CodedCost reference to coded cost
	2216	* \param rd64CodedCost0 reference to cost when coefficient is 0
	2217	* \param rd64CodedCostSig reference to cost of significant coefficient
[2]	2218	* \param lLevelDouble reference to unscaled quantized level
	2219	* \param uiMaxAbsLevel scaled quantized level
	2220	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
	2221	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2222	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2223	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
	2224	* \param iQBits quantization step size
	2225	* \param dTemp correction factor
[56]	2226	* \param bLast indicates if the coefficient is the last significant
[2]	2227	* \returns best quantized transform level for given scan position
	2228	* This method calculates the best quantized transform level for a given scan position.
	2229	*/
[56]	2230	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
	2231	Double& rd64CodedCost0,
	2232	Double& rd64CodedCostSig,
	2233	Int lLevelDouble,
	2234	UInt uiMaxAbsLevel,
	2235	UShort ui16CtxNumSig,
	2236	UShort ui16CtxNumOne,
	2237	UShort ui16CtxNumAbs,
	2238	UShort ui16AbsGoRice,
	2239	UInt c1Idx,
	2240	UInt c2Idx,
	2241	Int iQBits,
	2242	Double dTemp,
	2243	Bool bLast ) const
[2]	2244	{
[56]	2245	Double dCurrCostSig = 0;
[2]	2246	UInt uiBestAbsLevel = 0;
	2247
[56]	2248	if( !bLast && uiMaxAbsLevel < 3 )
[2]	2249	{
[56]	2250	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
	2251	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
	2252	if( uiMaxAbsLevel == 0 )
	2253	{
	2254	return uiBestAbsLevel;
	2255	}
[2]	2256	}
	2257	else
	2258	{
[56]	2259	rd64CodedCost = MAX_DOUBLE;
[2]	2260	}
	2261
[56]	2262	if( !bLast )
[2]	2263	{
[56]	2264	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
[2]	2265	}
	2266
[56]	2267	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
	2268	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
[2]	2269	{
[56]	2270	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
	2271	Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx );
	2272	dCurrCost += dCurrCostSig;
[2]	2273
	2274	if( dCurrCost < rd64CodedCost )
	2275	{
[56]	2276	uiBestAbsLevel = uiAbsLevel;
	2277	rd64CodedCost = dCurrCost;
	2278	rd64CodedCostSig = dCurrCostSig;
[2]	2279	}
	2280	}
[56]	2281
[2]	2282	return uiBestAbsLevel;
	2283	}
	2284
	2285	/** Calculates the cost for specific absolute transform level
	2286	* \param uiAbsLevel scaled quantized level
	2287	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2288	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2289	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
	2290	* \returns cost of given absolute transform level
	2291	*/
	2292	__inline Double TComTrQuant::xGetICRateCost ( UInt uiAbsLevel,
	2293	UShort ui16CtxNumOne,
	2294	UShort ui16CtxNumAbs,
[56]	2295	UShort ui16AbsGoRice
	2296	, UInt c1Idx,
	2297	UInt c2Idx
	2298	) const
[2]	2299	{
	2300	Double iRate = xGetIEPRate();
[56]	2301	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
	2302
	2303	if ( uiAbsLevel >= baseLevel )
[2]	2304	{
[56]	2305	UInt uiSymbol = uiAbsLevel - baseLevel;
	2306	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
	2307	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
	2308
	2309	if( bExpGolomb )
[2]	2310	{
[56]	2311	uiAbsLevel = uiSymbol - uiMaxVlc;
	2312	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
	2313	iRate += iEGS << 15;
	2314	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
[2]	2315	}
[56]	2316
	2317	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
	2318	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
	2319
	2320	iRate += ui16NumBins << 15;
	2321
	2322	if (c1Idx < C1FLAG_NUMBER)
	2323	{
	2324	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2325
	2326	if (c2Idx < C2FLAG_NUMBER)
	2327	{
	2328	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
	2329	}
	2330	}
[2]	2331	}
[56]	2332	else
[2]	2333	if( uiAbsLevel == 1 )
	2334	{
[56]	2335	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
[2]	2336	}
	2337	else if( uiAbsLevel == 2 )
	2338	{
[56]	2339	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2340	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
[2]	2341	}
	2342	else
	2343	{
[56]	2344	assert (0);
[2]	2345	}
[56]	2346	return xGetICost( iRate );
	2347	}
	2348
	2349	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
	2350	UShort ui16CtxNumOne,
	2351	UShort ui16CtxNumAbs,
	2352	UShort ui16AbsGoRice
	2353	, UInt c1Idx,
	2354	UInt c2Idx
	2355	) const
	2356	{
	2357	Int iRate = 0;
	2358	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
	2359
	2360	if ( uiAbsLevel >= baseLevel )
[2]	2361	{
[56]	2362	UInt uiSymbol = uiAbsLevel - baseLevel;
	2363	UInt uiMaxVlc = g_auiGoRiceRange[ ui16AbsGoRice ];
	2364	Bool bExpGolomb = ( uiSymbol > uiMaxVlc );
	2365
	2366	if( bExpGolomb )
	2367	{
	2368	uiAbsLevel = uiSymbol - uiMaxVlc;
	2369	int iEGS = 1; for( UInt uiMax = 2; uiAbsLevel >= uiMax; uiMax <<= 1, iEGS += 2 );
	2370	iRate += iEGS << 15;
	2371	uiSymbol = min<UInt>( uiSymbol, ( uiMaxVlc + 1 ) );
	2372	}
	2373
	2374	UShort ui16PrefLen = UShort( uiSymbol >> ui16AbsGoRice ) + 1;
	2375	UShort ui16NumBins = min<UInt>( ui16PrefLen, g_auiGoRicePrefixLen[ ui16AbsGoRice ] ) + ui16AbsGoRice;
	2376
	2377	iRate += ui16NumBins << 15;
	2378
	2379	if (c1Idx < C1FLAG_NUMBER)
	2380	{
	2381	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2382
	2383	if (c2Idx < C2FLAG_NUMBER)
	2384	{
	2385	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
	2386	}
	2387	}
[2]	2388	}
	2389	else
[56]	2390	if( uiAbsLevel == 0 )
[2]	2391	{
[56]	2392	return 0;
[2]	2393	}
[56]	2394	else if( uiAbsLevel == 1 )
	2395	{
	2396	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
	2397	}
	2398	else if( uiAbsLevel == 2 )
	2399	{
	2400	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2401	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
	2402	}
	2403	else
	2404	{
	2405	assert(0);
	2406	}
	2407	return iRate;
[2]	2408	}
	2409
[56]	2410	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
	2411	UShort ui16CtxNumSig ) const
	2412	{
	2413	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
	2414	}
	2415
[2]	2416	/** Calculates the cost of signaling the last significant coefficient in the block
	2417	* \param uiPosX X coordinate of the last significant coefficient
	2418	* \param uiPosY Y coordinate of the last significant coefficient
	2419	* \returns cost of last significant coefficient
	2420	*/
[56]	2421	/*
	2422	* \param uiWidth width of the transform unit (TU)
	2423	*/
	2424	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
	2425	const UInt uiPosY,
	2426	const UInt uiBlkWdth ) const
[2]	2427	{
[56]	2428	UInt uiCtxX = g_uiGroupIdx[uiPosX];
	2429	UInt uiCtxY = g_uiGroupIdx[uiPosY];
	2430	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
	2431	if( uiCtxX > 3 )
	2432	{
	2433	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
	2434	}
	2435	if( uiCtxY > 3 )
	2436	{
	2437	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
	2438	}
	2439	return xGetICost( uiCost );
[2]	2440	}
	2441
	2442	/** Calculates the cost for specific absolute transform level
	2443	* \param uiAbsLevel scaled quantized level
	2444	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2445	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2446	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
	2447	* \returns cost of given absolute transform level
	2448	*/
	2449	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
	2450	UShort ui16CtxNumSig ) const
	2451	{
	2452	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
	2453	}
	2454
	2455	/** Get the cost for a specific rate
	2456	* \param dRate rate of a bit
	2457	* \returns cost at the specific rate
	2458	*/
	2459	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
	2460	{
	2461	return m_dLambda * dRate;
	2462	}
	2463
	2464	/** Get the cost of an equal probable bit
	2465	* \returns cost of equal probable bit
	2466	*/
	2467	__inline Double TComTrQuant::xGetIEPRate ( ) const
	2468	{
	2469	return 32768;
	2470	}
[56]	2471
	2472	/** Context derivation process of coeff_abs_significant_flag
	2473	* \param uiSigCoeffGroupFlag significance map of L1
	2474	* \param uiBlkX column of current scan position
	2475	* \param uiBlkY row of current scan position
	2476	* \param uiLog2BlkSize log2 value of block size
	2477	* \returns ctxInc for current scan position
	2478	*/
	2479	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
	2480	const UInt uiCGPosX,
	2481	const UInt uiCGPosY,
	2482	const UInt scanIdx,
	2483	Int width, Int height)
	2484	{
	2485	UInt uiRight = 0;
	2486	UInt uiLower = 0;
	2487
	2488	width >>= 2;
	2489	height >>= 2;
	2490	if( width == 2 && height == 2 ) // 8x8
	2491	{
	2492	if( scanIdx == SCAN_HOR )
	2493	{
	2494	width = 1;
	2495	height = 4;
	2496	}
	2497	else if( scanIdx == SCAN_VER )
	2498	{
	2499	width = 4;
	2500	height = 1;
	2501	}
	2502	}
	2503	if( uiCGPosX < width - 1 )
	2504	{
	2505	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
	2506	}
	2507	if (uiCGPosY < height - 1 )
	2508	{
	2509	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
	2510	}
	2511	return (uiRight \|\| uiLower);
	2512
	2513	}
	2514	/** set quantized matrix coefficient for encode
	2515	* \param scalingList quantaized matrix address
	2516	*/
	2517	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
	2518	{
	2519	UInt size,list;
	2520	UInt qp;
	2521
	2522	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
	2523	{
	2524	for(list = 0; list < g_scalingListNum[size]; list++)
	2525	{
	2526	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
	2527	{
	2528	xSetScalingListEnc(scalingList,list,size,qp);
	2529	xSetScalingListDec(scalingList,list,size,qp);
	2530	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
	2531	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
	2532	{
	2533	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
	2534	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
	2535	}
	2536	}
	2537	}
	2538	}
	2539	}
	2540	/** set quantized matrix coefficient for decode
	2541	* \param scalingList quantaized matrix address
	2542	*/
	2543	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
	2544	{
	2545	UInt size,list;
	2546	UInt qp;
	2547
	2548	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
	2549	{
	2550	for(list = 0; list < g_scalingListNum[size]; list++)
	2551	{
	2552	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
	2553	{
	2554	xSetScalingListDec(scalingList,list,size,qp);
	2555	}
	2556	}
	2557	}
	2558	}
	2559	/** set error scale coefficients
	2560	* \param list List ID
	2561	* \param uiSize Size
	2562	* \param uiQP Quantization parameter
	2563	*/
	2564	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp, UInt dir)
	2565	{
	2566
	2567	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
	2568	#if FULL_NBIT
	2569	UInt uiBitDepth = g_uiBitDepth;
	2570	#else
	2571	UInt uiBitDepth = g_uiBitDepth + g_uiBitIncrement;
	2572	#endif
	2573
	2574	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
	2575
	2576	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
	2577	Int *piQuantcoeff;
	2578	double *pdErrScale;
	2579	piQuantcoeff = getQuantCoeff(list, qp,size,dir);
	2580	pdErrScale = getErrScaleCoeff(list, size, qp,dir);
	2581
	2582	double dErrScale = (double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
	2583	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
	2584	for(i=0;i<uiMaxNumCoeff;i++)
	2585	{
	2586	pdErrScale[i] = dErrScale/(double)piQuantcoeff[i]/(double)piQuantcoeff[i]/(double)(1<<(2*g_uiBitIncrement));
	2587	}
	2588	}
	2589
	2590	/** set quantized matrix coefficient for encode
	2591	* \param scalingList quantaized matrix address
	2592	* \param listId List index
	2593	* \param sizeId size index
	2594	* \param uiQP Quantization parameter
	2595	*/
	2596	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
	2597	{
	2598	UInt width = g_scalingListSizeX[sizeId];
	2599	UInt height = g_scalingListSizeX[sizeId];
	2600	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
	2601	Int *quantcoeff;
	2602	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
	2603	quantcoeff = getQuantCoeff(listId, qp, sizeId, SCALING_LIST_SQT);
	2604
	2605	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2606
	2607	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16) //for NSQT
	2608	{
	2609	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
	2610	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2611
	2612	quantcoeff = getQuantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
	2613	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2614	}
	2615	}
	2616	/** set quantized matrix coefficient for decode
	2617	* \param scalingList quantaized matrix address
	2618	* \param list List index
	2619	* \param size size index
	2620	* \param uiQP Quantization parameter
	2621	*/
	2622	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
	2623	{
	2624	UInt width = g_scalingListSizeX[sizeId];
	2625	UInt height = g_scalingListSizeX[sizeId];
	2626	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
	2627	Int *dequantcoeff;
	2628	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
	2629
	2630	dequantcoeff = getDequantCoeff(listId, qp, sizeId,SCALING_LIST_SQT);
	2631	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2632
	2633	if(sizeId == SCALING_LIST_32x32 \|\| sizeId == SCALING_LIST_16x16)
	2634	{
	2635	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_VER);
	2636	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width>>2,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2637
	2638	dequantcoeff = getDequantCoeff(listId, qp, sizeId-1,SCALING_LIST_HOR);
	2639
	2640	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height>>2,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2641	}
	2642	}
	2643
	2644	/** set flat matrix value to quantized coefficient
	2645	*/
	2646	Void TComTrQuant::setFlatScalingList()
	2647	{
	2648	UInt size,list;
	2649	UInt qp;
	2650
	2651	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
	2652	{
	2653	for(list = 0; list < g_scalingListNum[size]; list++)
	2654	{
	2655	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
	2656	{
	2657	xsetFlatScalingList(list,size,qp);
	2658	setErrScaleCoeff(list,size,qp,SCALING_LIST_SQT);
	2659	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
	2660	{
	2661	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_HOR);
	2662	setErrScaleCoeff(list,size-1,qp,SCALING_LIST_VER);
	2663	}
	2664	}
	2665	}
	2666	}
	2667	}
	2668
	2669	/** set flat matrix value to quantized coefficient
	2670	* \param list List ID
	2671	* \param uiQP Quantization parameter
	2672	* \param uiSize Size
	2673	*/
	2674	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
	2675	{
	2676	UInt i,num = g_scalingListSize[size];
	2677	UInt numDiv4 = num>>2;
	2678	Int *quantcoeff;
	2679	Int *dequantcoeff;
	2680	Int quantScales = g_quantScales[qp];
	2681	Int invQuantScales = g_invQuantScales[qp]<<4;
	2682
	2683	quantcoeff = getQuantCoeff(list, qp, size,SCALING_LIST_SQT);
	2684	dequantcoeff = getDequantCoeff(list, qp, size,SCALING_LIST_SQT);
	2685
	2686	for(i=0;i<num;i++)
	2687	{
	2688	*quantcoeff++ = quantScales;
	2689	*dequantcoeff++ = invQuantScales;
	2690	}
	2691
	2692	if(size == SCALING_LIST_32x32 \|\| size == SCALING_LIST_16x16)
	2693	{
	2694	quantcoeff = getQuantCoeff(list, qp, size-1, SCALING_LIST_HOR);
	2695	dequantcoeff = getDequantCoeff(list, qp, size-1, SCALING_LIST_HOR);
	2696
	2697	for(i=0;i<numDiv4;i++)
	2698	{
	2699	*quantcoeff++ = quantScales;
	2700	*dequantcoeff++ = invQuantScales;
	2701	}
	2702	quantcoeff = getQuantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
	2703	dequantcoeff = getDequantCoeff(list, qp, size-1 ,SCALING_LIST_VER);
	2704
	2705	for(i=0;i<numDiv4;i++)
	2706	{
	2707	*quantcoeff++ = quantScales;
	2708	*dequantcoeff++ = invQuantScales;
	2709	}
	2710	}
	2711	}
	2712
	2713	/** set quantized matrix coefficient for encode
	2714	* \param coeff quantaized matrix address
	2715	* \param quantcoeff quantaized matrix address
	2716	* \param quantScales Q(QP%6)
	2717	* \param height height
	2718	* \param width width
	2719	* \param ratio ratio for upscale
	2720	* \param sizuNum matrix size
	2721	* \param dc dc parameter
	2722	*/
	2723	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
	2724	{
	2725	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
	2726	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
	2727	for(UInt j=0;j<height;j++)
	2728	{
	2729	for(UInt i=0;i<width;i++)
	2730	{
	2731	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
	2732	}
	2733	}
	2734	if(ratio > 1)
	2735	{
	2736	quantcoeff[0] = quantScales / dc;
	2737	}
	2738	}
	2739	/** set quantized matrix coefficient for decode
	2740	* \param coeff quantaized matrix address
	2741	* \param dequantcoeff quantaized matrix address
	2742	* \param invQuantScales IQ(QP%6))
	2743	* \param height height
	2744	* \param width width
	2745	* \param ratio ratio for upscale
	2746	* \param sizuNum matrix size
	2747	* \param dc dc parameter
	2748	*/
	2749	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
	2750	{
	2751	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
	2752	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
	2753	for(UInt j=0;j<height;j++)
	2754	{
	2755	for(UInt i=0;i<width;i++)
	2756	{
	2757	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
	2758	}
	2759	}
	2760	if(ratio > 1)
	2761	{
	2762	dequantcoeff[0] = invQuantScales * dc;
	2763	}
	2764	}
	2765
	2766	/** initialization process of scaling list array
	2767	*/
	2768	Void TComTrQuant::initScalingList()
	2769	{
	2770	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
	2771	{
	2772	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
	2773	{
	2774	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	2775	{
	2776	m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
	2777	m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT] = new Int [g_scalingListSize[sizeId]];
	2778	m_errScale [sizeId][listId][qp][SCALING_LIST_SQT] = new double [g_scalingListSize[sizeId]];
	2779
	2780	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
	2781	{
	2782	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
	2783	{
	2784	m_quantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
	2785	m_dequantCoef [sizeId][listId][qp][dir] = new Int [g_scalingListSize[sizeId]];
	2786	m_errScale [sizeId][listId][qp][dir] = new double [g_scalingListSize[sizeId]];
	2787	}
	2788	}
	2789	}
	2790	}
	2791	}
	2792	//copy for NSQT
	2793	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	2794	{
	2795	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
	2796	{
	2797	m_quantCoef [SCALING_LIST_16x16][3][qp][dir] = m_quantCoef [SCALING_LIST_16x16][1][qp][dir];
	2798	m_dequantCoef [SCALING_LIST_16x16][3][qp][dir] = m_dequantCoef [SCALING_LIST_16x16][1][qp][dir];
	2799	m_errScale [SCALING_LIST_16x16][3][qp][dir] = m_errScale [SCALING_LIST_16x16][1][qp][dir];
	2800	}
	2801	m_quantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_quantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
	2802	m_dequantCoef [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_dequantCoef [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
	2803	m_errScale [SCALING_LIST_32x32][3][qp][SCALING_LIST_SQT] = m_errScale [SCALING_LIST_32x32][1][qp][SCALING_LIST_SQT];
	2804	}
	2805	}
	2806	/** destroy quantization matrix array
	2807	*/
	2808	Void TComTrQuant::destroyScalingList()
	2809	{
	2810	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
	2811	{
	2812	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
	2813	{
	2814	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	2815	{
	2816	if(m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_quantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
	2817	if(m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_dequantCoef [sizeId][listId][qp][SCALING_LIST_SQT];
	2818	if(m_errScale [sizeId][listId][qp][SCALING_LIST_SQT]) delete [] m_errScale [sizeId][listId][qp][SCALING_LIST_SQT];
	2819	if(sizeId == SCALING_LIST_8x8 \|\| (sizeId == SCALING_LIST_16x16 && listId < 2))
	2820	{
	2821	for(UInt dir = SCALING_LIST_VER; dir < SCALING_LIST_DIR_NUM; dir++)
	2822	{
	2823	if(m_quantCoef [sizeId][listId][qp][dir]) delete [] m_quantCoef [sizeId][listId][qp][dir];
	2824	if(m_dequantCoef [sizeId][listId][qp][dir]) delete [] m_dequantCoef [sizeId][listId][qp][dir];
	2825	if(m_errScale [sizeId][listId][qp][dir]) delete [] m_errScale [sizeId][listId][qp][dir];
	2826	}
	2827	}
	2828	}
	2829	}
	2830	}
	2831	}
	2832
	2833	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: