Context navigation

TComTrQuant.cpp @ 844

Visit:

Last change on this file since 844 was 595, checked in by seregin, 11 years ago
merge with SHM-5.0-dev branch
Property svn:eol-style set to `native`
File size: 82.2 KB

Rev	Line
[313]	1	/* The copyright in this software is being made available under the BSD
	2	* License, included below. This software may be subject to other third party
	3	* and contributor rights, including patent rights, and no such rights are
	4	* granted under this license.
	5	*
[595]	6	* Copyright (c) 2010-2014, ITU/ISO/IEC
[313]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions are met:
	11	*
	12	* * Redistributions of source code must retain the above copyright notice,
	13	* this list of conditions and the following disclaimer.
	14	* * Redistributions in binary form must reproduce the above copyright notice,
	15	* this list of conditions and the following disclaimer in the documentation
	16	* and/or other materials provided with the distribution.
	17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
	18	* be used to endorse or promote products derived from this software without
	19	* specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
	25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	31	* THE POSSIBILITY OF SUCH DAMAGE.
	32	*/
	33
	34	/** \file TComTrQuant.cpp
	35	\brief transform and quantization class
	36	*/
	37
	38	#include <stdlib.h>
	39	#include <math.h>
	40	#include <memory.h>
	41	#include "TComTrQuant.h"
	42	#include "TComPic.h"
	43	#include "ContextTables.h"
[442]	44	#define MAYBE_BUGFIX 1
[313]	45	typedef struct
	46	{
	47	Int iNNZbeforePos0;
	48	Double d64CodedLevelandDist; // distortion and level cost only
	49	Double d64UncodedDist; // all zero coded block distortion
	50	Double d64SigCost;
	51	Double d64SigCost_0;
[442]	52	#if MAYBE_BUGFIX
	53	Void init()
	54	{
	55	iNNZbeforePos0 = 0;
[540]	56	d64CodedLevelandDist = 0;
	57	d64UncodedDist = 0;
	58	d64SigCost = 0;
	59	d64SigCost_0 = 0;
[442]	60	}
	61	#endif
[313]	62	} coeffGroupRDStats;
	63
	64	//! \ingroup TLibCommon
	65	//! \{
	66
	67	// ====================================================================================================================
	68	// Constants
	69	// ====================================================================================================================
	70
	71	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
	72
	73	// ====================================================================================================================
	74	// Tables
	75	// ====================================================================================================================
	76
	77	// RDOQ parameter
	78
	79	// ====================================================================================================================
	80	// Qp class member functions
	81	// ====================================================================================================================
	82
	83	QpParam::QpParam()
	84	{
	85	}
	86
	87	// ====================================================================================================================
	88	// TComTrQuant class member functions
	89	// ====================================================================================================================
	90
	91	TComTrQuant::TComTrQuant()
	92	{
	93	m_cQP.clear();
	94
	95	// allocate temporary buffers
	96	m_plTempCoeff = new Int[ MAX_CU_SIZE*MAX_CU_SIZE ];
	97
	98	// allocate bit estimation class (for RDOQ)
	99	m_pcEstBitsSbac = new estBitsSbacStruct;
	100	initScalingList();
	101	}
	102
	103	TComTrQuant::~TComTrQuant()
	104	{
	105	// delete temporary buffers
	106	if ( m_plTempCoeff )
	107	{
	108	delete [] m_plTempCoeff;
	109	m_plTempCoeff = NULL;
	110	}
	111
	112	// delete bit estimation class
	113	if ( m_pcEstBitsSbac )
	114	{
	115	delete m_pcEstBitsSbac;
	116	}
	117	destroyScalingList();
	118	}
	119
	120	#if ADAPTIVE_QP_SELECTION
	121	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
	122	{
	123	Int qpBase = pcSlice->getSliceQpBase();
	124	Int sliceQpused = pcSlice->getSliceQp();
	125	Int sliceQpnext;
	126	Double alpha = qpBase < 17 ? 0.5 : 1;
	127
	128	Int cnt=0;
	129	for(Int u=1; u<=LEVEL_RANGE; u++)
	130	{
	131	cnt += m_sliceNsamples[u] ;
	132	}
	133
	134	if( !m_useRDOQ )
	135	{
	136	sliceQpused = qpBase;
	137	alpha = 0.5;
	138	}
	139
	140	if( cnt > 120 )
	141	{
	142	Double sum = 0;
	143	Int k = 0;
	144	for(Int u=1; u<LEVEL_RANGE; u++)
	145	{
	146	sum += u*m_sliceSumC[u];
	147	k += uum_sliceNsamples[u];
	148	}
	149
	150	Int v;
	151	Double q[MAX_QP+1] ;
	152	for(v=0; v<=MAX_QP; v++)
	153	{
	154	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
	155	}
	156
	157	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
	158
	159	for(v=0; v<MAX_QP; v++)
	160	{
	161	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
	162	{
	163	break;
	164	}
	165	}
	166	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
	167	}
	168	else
	169	{
	170	sliceQpnext = sliceQpused;
	171	}
	172
	173	m_qpDelta[qpBase] = sliceQpnext - qpBase;
	174	}
	175
	176	Void TComTrQuant::initSliceQpDelta()
	177	{
	178	for(Int qp=0; qp<=MAX_QP; qp++)
	179	{
	180	m_qpDelta[qp] = qp < 17 ? 0 : 1;
	181	}
	182	}
	183
	184	Void TComTrQuant::clearSliceARLCnt()
	185	{
	186	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
	187	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
	188	}
	189	#endif
	190
	191
	192	/** Set qP for Quantization.
	193	* \param qpy QPy
	194	* \param bLowpass
	195	* \param eSliceType
	196	* \param eTxtType
	197	* \param qpBdOffset
	198	* \param chromaQPOffset
	199	*
	200	* return void
	201	*/
	202	Void TComTrQuant::setQPforQuant( Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset)
	203	{
	204	Int qpScaled;
	205
	206	if(eTxtType == TEXT_LUMA)
	207	{
	208	qpScaled = qpy + qpBdOffset;
	209	}
	210	else
	211	{
	212	qpScaled = Clip3( -qpBdOffset, 57, qpy + chromaQPOffset );
	213
	214	if(qpScaled < 0)
	215	{
	216	qpScaled = qpScaled + qpBdOffset;
	217	}
	218	else
	219	{
	220	qpScaled = g_aucChromaScale[ qpScaled ] + qpBdOffset;
	221	}
	222	}
	223	m_cQP.setQpParam( qpScaled );
	224	}
	225
	226	#if MATRIX_MULT
	227	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
	228	* \param block pointer to input data (residual)
	229	* \param coeff pointer to output data (transform coefficients)
	230	* \param uiStride stride of input data
	231	* \param uiTrSize transform size (uiTrSize x uiTrSize)
	232	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	233	*/
	234	void xTr(Int bitDepth, Pel block, Int coeff, UInt uiStride, UInt uiTrSize, UInt uiMode)
	235	{
	236	Int i,j,k,iSum;
	237	Int tmp[32*32];
	238	const Short *iT;
	239	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
	240
	241	if (uiTrSize==4)
	242	{
	243	iT = g_aiT4[0];
	244	}
	245	else if (uiTrSize==8)
	246	{
	247	iT = g_aiT8[0];
	248	}
	249	else if (uiTrSize==16)
	250	{
	251	iT = g_aiT16[0];
	252	}
	253	else if (uiTrSize==32)
	254	{
	255	iT = g_aiT32[0];
	256	}
	257	else
	258	{
	259	assert(0);
	260	}
	261
	262	Int shift_1st = uiLog2TrSize - 1 + bitDepth-8; // log2(N) - 1 + g_bitDepth-8
	263	Int add_1st = 1<<(shift_1st-1);
	264	Int shift_2nd = uiLog2TrSize + 6;
	265	Int add_2nd = 1<<(shift_2nd-1);
	266
	267	/* Horizontal transform */
	268
	269	if (uiTrSize==4)
	270	{
	271	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode])
	272	{
	273	iT = g_as_DST_MAT_4[0];
	274	}
	275	}
	276	for (i=0; i<uiTrSize; i++)
	277	{
	278	for (j=0; j<uiTrSize; j++)
	279	{
	280	iSum = 0;
	281	for (k=0; k<uiTrSize; k++)
	282	{
	283	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
	284	}
	285	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
	286	}
	287	}
	288
	289	/* Vertical transform */
	290	if (uiTrSize==4)
	291	{
	292	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode])
	293	{
	294	iT = g_as_DST_MAT_4[0];
	295	}
	296	else
	297	{
	298	iT = g_aiT4[0];
	299	}
	300	}
	301	for (i=0; i<uiTrSize; i++)
	302	{
	303	for (j=0; j<uiTrSize; j++)
	304	{
	305	iSum = 0;
	306	for (k=0; k<uiTrSize; k++)
	307	{
	308	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
	309	}
	310	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
	311	}
	312	}
	313	}
	314
	315	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
	316	* \param coeff pointer to input data (transform coefficients)
	317	* \param block pointer to output data (residual)
	318	* \param uiStride stride of output data
	319	* \param uiTrSize transform size (uiTrSize x uiTrSize)
	320	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	321	*/
	322	void xITr(Int coeff, Pel block, UInt uiStride, UInt uiTrSize, UInt uiMode)
	323	{
	324	Int i,j,k,iSum;
	325	Int tmp[32*32];
	326	const Short *iT;
	327
	328	if (uiTrSize==4)
	329	{
	330	iT = g_aiT4[0];
	331	}
	332	else if (uiTrSize==8)
	333	{
	334	iT = g_aiT8[0];
	335	}
	336	else if (uiTrSize==16)
	337	{
	338	iT = g_aiT16[0];
	339	}
	340	else if (uiTrSize==32)
	341	{
	342	iT = g_aiT32[0];
	343	}
	344	else
	345	{
	346	assert(0);
	347	}
	348
	349	Int shift_1st = SHIFT_INV_1ST;
	350	Int add_1st = 1<<(shift_1st-1);
	351	Int shift_2nd = SHIFT_INV_2ND - g_bitDepth-8;
	352	Int add_2nd = 1<<(shift_2nd-1);
	353	if (uiTrSize==4)
	354	{
	355	if (uiMode != REG_DCT && g_aucDCTDSTMode_Vert[uiMode] ) // Check for DCT or DST
	356	{
	357	iT = g_as_DST_MAT_4[0];
	358	}
	359	}
	360
	361	/* Horizontal transform */
	362	for (i=0; i<uiTrSize; i++)
	363	{
	364	for (j=0; j<uiTrSize; j++)
	365	{
	366	iSum = 0;
	367	for (k=0; k<uiTrSize; k++)
	368	{
	369	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
	370	}
	371	tmp[i*uiTrSize+j] = Clip3(-32768, 32767, (iSum + add_1st)>>shift_1st); // Clipping is normative
	372	}
	373	}
	374
	375	if (uiTrSize==4)
	376	{
	377	if (uiMode != REG_DCT && g_aucDCTDSTMode_Hor[uiMode] ) // Check for DCT or DST
	378	{
	379	iT = g_as_DST_MAT_4[0];
	380	}
	381	else
	382	{
	383	iT = g_aiT4[0];
	384	}
	385	}
	386
	387	/* Vertical transform */
	388	for (i=0; i<uiTrSize; i++)
	389	{
	390	for (j=0; j<uiTrSize; j++)
	391	{
	392	iSum = 0;
	393	for (k=0; k<uiTrSize; k++)
	394	{
	395	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
	396	}
	397	block[i*uiStride+j] = Clip3(-32768, 32767, (iSum + add_2nd)>>shift_2nd); // Clipping is non-normative
	398	}
	399	}
	400	}
	401
	402	#else //MATRIX_MULT
	403
	404	/** 4x4 forward transform implemented using partial butterfly structure (1D)
	405	* \param src input data (residual)
	406	* \param dst output data (transform coefficients)
	407	* \param shift specifies right shift after 1D transform
	408	*/
	409
	410	void partialButterfly4(Short src,Short dst,Int shift, Int line)
	411	{
	412	Int j;
	413	Int E[2],O[2];
	414	Int add = 1<<(shift-1);
	415
	416	for (j=0; j<line; j++)
	417	{
	418	/* E and O */
	419	E[0] = src[0] + src[3];
	420	O[0] = src[0] - src[3];
	421	E[1] = src[1] + src[2];
	422	O[1] = src[1] - src[2];
	423
	424	dst[0] = (g_aiT4[0][0]E[0] + g_aiT4[0][1]E[1] + add)>>shift;
	425	dst[2line] = (g_aiT4[2][0]E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
	426	dst[line] = (g_aiT4[1][0]O[0] + g_aiT4[1][1]O[1] + add)>>shift;
	427	dst[3line] = (g_aiT4[3][0]O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
	428
	429	src += 4;
	430	dst ++;
	431	}
	432	}
	433
	434	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
	435	// give identical results
	436	void fastForwardDst(Short block,Short coeff,Int shift) // input block, output coeff
	437	{
	438	Int i, c[4];
	439	Int rnd_factor = 1<<(shift-1);
	440	for (i=0; i<4; i++)
	441	{
	442	// Intermediate Variables
	443	c[0] = block[4i+0] + block[4i+3];
	444	c[1] = block[4i+1] + block[4i+3];
	445	c[2] = block[4i+0] - block[4i+1];
	446	c[3] = 74* block[4*i+2];
	447
	448	coeff[ i] = ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift;
	449	coeff[ 4+i] = ( 74 * (block[4i+0]+ block[4i+1] - block[4*i+3]) + rnd_factor ) >> shift;
	450	coeff[ 8+i] = ( 29 * c[2] + 55 * c[0] - c[3] + rnd_factor ) >> shift;
	451	coeff[12+i] = ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift;
	452	}
	453	}
	454
	455	void fastInverseDst(Short tmp,Short block,Int shift) // input tmp, output block
	456	{
	457	Int i, c[4];
	458	Int rnd_factor = 1<<(shift-1);
	459	for (i=0; i<4; i++)
	460	{
	461	// Intermediate Variables
	462	c[0] = tmp[ i] + tmp[ 8+i];
	463	c[1] = tmp[8+i] + tmp[12+i];
	464	c[2] = tmp[ i] - tmp[12+i];
	465	c[3] = 74* tmp[4+i];
	466
	467	block[4i+0] = Clip3( -32768, 32767, ( 29 c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
	468	block[4i+1] = Clip3( -32768, 32767, ( 55 c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
	469	block[4i+2] = Clip3( -32768, 32767, ( 74 (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
	470	block[4i+3] = Clip3( -32768, 32767, ( 55 c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
	471	}
	472	}
	473
	474	void partialButterflyInverse4(Short src,Short dst,Int shift, Int line)
	475	{
	476	Int j;
	477	Int E[2],O[2];
	478	Int add = 1<<(shift-1);
	479
	480	for (j=0; j<line; j++)
	481	{
	482	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	483	O[0] = g_aiT4[1][0]src[line] + g_aiT4[3][0]src[3*line];
	484	O[1] = g_aiT4[1][1]src[line] + g_aiT4[3][1]src[3*line];
	485	E[0] = g_aiT4[0][0]src[0] + g_aiT4[2][0]src[2*line];
	486	E[1] = g_aiT4[0][1]src[0] + g_aiT4[2][1]src[2*line];
	487
	488	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	489	dst[0] = Clip3( -32768, 32767, (E[0] + O[0] + add)>>shift );
	490	dst[1] = Clip3( -32768, 32767, (E[1] + O[1] + add)>>shift );
	491	dst[2] = Clip3( -32768, 32767, (E[1] - O[1] + add)>>shift );
	492	dst[3] = Clip3( -32768, 32767, (E[0] - O[0] + add)>>shift );
	493
	494	src ++;
	495	dst += 4;
	496	}
	497	}
	498
	499
	500	void partialButterfly8(Short src,Short dst,Int shift, Int line)
	501	{
	502	Int j,k;
	503	Int E[4],O[4];
	504	Int EE[2],EO[2];
	505	Int add = 1<<(shift-1);
	506
	507	for (j=0; j<line; j++)
	508	{
	509	/* E and O*/
	510	for (k=0;k<4;k++)
	511	{
	512	E[k] = src[k] + src[7-k];
	513	O[k] = src[k] - src[7-k];
	514	}
	515	/* EE and EO */
	516	EE[0] = E[0] + E[3];
	517	EO[0] = E[0] - E[3];
	518	EE[1] = E[1] + E[2];
	519	EO[1] = E[1] - E[2];
	520
	521	dst[0] = (g_aiT8[0][0]EE[0] + g_aiT8[0][1]EE[1] + add)>>shift;
	522	dst[4line] = (g_aiT8[4][0]EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift;
	523	dst[2line] = (g_aiT8[2][0]EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
	524	dst[6line] = (g_aiT8[6][0]EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift;
	525
	526	dst[line] = (g_aiT8[1][0]O[0] + g_aiT8[1][1]O[1] + g_aiT8[1][2]O[2] + g_aiT8[1][3]O[3] + add)>>shift;
	527	dst[3line] = (g_aiT8[3][0]O[0] + g_aiT8[3][1]O[1] + g_aiT8[3][2]O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
	528	dst[5line] = (g_aiT8[5][0]O[0] + g_aiT8[5][1]O[1] + g_aiT8[5][2]O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
	529	dst[7line] = (g_aiT8[7][0]O[0] + g_aiT8[7][1]O[1] + g_aiT8[7][2]O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
	530
	531	src += 8;
	532	dst ++;
	533	}
	534	}
	535
	536
	537	void partialButterflyInverse8(Short src,Short dst,Int shift, Int line)
	538	{
	539	Int j,k;
	540	Int E[4],O[4];
	541	Int EE[2],EO[2];
	542	Int add = 1<<(shift-1);
	543
	544	for (j=0; j<line; j++)
	545	{
	546	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	547	for (k=0;k<4;k++)
	548	{
	549	O[k] = g_aiT8[ 1][k]src[line] + g_aiT8[ 3][k]src[3line] + g_aiT8[ 5][k]src[5line] + g_aiT8[ 7][k]src[7*line];
	550	}
	551
	552	EO[0] = g_aiT8[2][0]src[ 2line ] + g_aiT8[6][0]src[ 6line ];
	553	EO[1] = g_aiT8[2][1]src[ 2line ] + g_aiT8[6][1]src[ 6line ];
	554	EE[0] = g_aiT8[0][0]src[ 0 ] + g_aiT8[4][0]src[ 4*line ];
	555	EE[1] = g_aiT8[0][1]src[ 0 ] + g_aiT8[4][1]src[ 4*line ];
	556
	557	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	558	E[0] = EE[0] + EO[0];
	559	E[3] = EE[0] - EO[0];
	560	E[1] = EE[1] + EO[1];
	561	E[2] = EE[1] - EO[1];
	562	for (k=0;k<4;k++)
	563	{
	564	dst[ k ] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
	565	dst[ k+4 ] = Clip3( -32768, 32767, (E[3-k] - O[3-k] + add)>>shift );
	566	}
	567	src ++;
	568	dst += 8;
	569	}
	570	}
	571
	572
	573	void partialButterfly16(Short src,Short dst,Int shift, Int line)
	574	{
	575	Int j,k;
	576	Int E[8],O[8];
	577	Int EE[4],EO[4];
	578	Int EEE[2],EEO[2];
	579	Int add = 1<<(shift-1);
	580
	581	for (j=0; j<line; j++)
	582	{
	583	/* E and O*/
	584	for (k=0;k<8;k++)
	585	{
	586	E[k] = src[k] + src[15-k];
	587	O[k] = src[k] - src[15-k];
	588	}
	589	/* EE and EO */
	590	for (k=0;k<4;k++)
	591	{
	592	EE[k] = E[k] + E[7-k];
	593	EO[k] = E[k] - E[7-k];
	594	}
	595	/* EEE and EEO */
	596	EEE[0] = EE[0] + EE[3];
	597	EEO[0] = EE[0] - EE[3];
	598	EEE[1] = EE[1] + EE[2];
	599	EEO[1] = EE[1] - EE[2];
	600
	601	dst[ 0 ] = (g_aiT16[ 0][0]EEE[0] + g_aiT16[ 0][1]EEE[1] + add)>>shift;
	602	dst[ 8line ] = (g_aiT16[ 8][0]EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;
	603	dst[ 4line ] = (g_aiT16[ 4][0]EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;
	604	dst[ 12line] = (g_aiT16[12][0]EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
	605
	606	for (k=2;k<16;k+=4)
	607	{
	608	dst[ kline ] = (g_aiT16[k][0]EO[0] + g_aiT16[k][1]EO[1] + g_aiT16[k][2]EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;
	609	}
	610
	611	for (k=1;k<16;k+=2)
	612	{
	613	dst[ kline ] = (g_aiT16[k][0]O[0] + g_aiT16[k][1]O[1] + g_aiT16[k][2]O[2] + g_aiT16[k][3]*O[3] +
	614	g_aiT16[k][4]O[4] + g_aiT16[k][5]O[5] + g_aiT16[k][6]O[6] + g_aiT16[k][7]O[7] + add)>>shift;
	615	}
	616
	617	src += 16;
	618	dst ++;
	619
	620	}
	621	}
	622
	623
	624	void partialButterflyInverse16(Short src,Short dst,Int shift, Int line)
	625	{
	626	Int j,k;
	627	Int E[8],O[8];
	628	Int EE[4],EO[4];
	629	Int EEE[2],EEO[2];
	630	Int add = 1<<(shift-1);
	631
	632	for (j=0; j<line; j++)
	633	{
	634	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	635	for (k=0;k<8;k++)
	636	{
	637	O[k] = g_aiT16[ 1][k]src[ line] + g_aiT16[ 3][k]src[ 3line] + g_aiT16[ 5][k]src[ 5line] + g_aiT16[ 7][k]src[ 7*line] +
	638	g_aiT16[ 9][k]src[ 9line] + g_aiT16[11][k]src[11line] + g_aiT16[13][k]src[13line] + g_aiT16[15][k]src[15line];
	639	}
	640	for (k=0;k<4;k++)
	641	{
	642	EO[k] = g_aiT16[ 2][k]src[ 2line] + g_aiT16[ 6][k]src[ 6line] + g_aiT16[10][k]src[10line] + g_aiT16[14][k]src[14line];
	643	}
	644	EEO[0] = g_aiT16[4][0]src[ 4line ] + g_aiT16[12][0]src[ 12line ];
	645	EEE[0] = g_aiT16[0][0]src[ 0 ] + g_aiT16[ 8][0]src[ 8*line ];
	646	EEO[1] = g_aiT16[4][1]src[ 4line ] + g_aiT16[12][1]src[ 12line ];
	647	EEE[1] = g_aiT16[0][1]src[ 0 ] + g_aiT16[ 8][1]src[ 8*line ];
	648
	649	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	650	for (k=0;k<2;k++)
	651	{
	652	EE[k] = EEE[k] + EEO[k];
	653	EE[k+2] = EEE[1-k] - EEO[1-k];
	654	}
	655	for (k=0;k<4;k++)
	656	{
	657	E[k] = EE[k] + EO[k];
	658	E[k+4] = EE[3-k] - EO[3-k];
	659	}
	660	for (k=0;k<8;k++)
	661	{
	662	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
	663	dst[k+8] = Clip3( -32768, 32767, (E[7-k] - O[7-k] + add)>>shift );
	664	}
	665	src ++;
	666	dst += 16;
	667	}
	668	}
	669
	670
	671	void partialButterfly32(Short src,Short dst,Int shift, Int line)
	672	{
	673	Int j,k;
	674	Int E[16],O[16];
	675	Int EE[8],EO[8];
	676	Int EEE[4],EEO[4];
	677	Int EEEE[2],EEEO[2];
	678	Int add = 1<<(shift-1);
	679
	680	for (j=0; j<line; j++)
	681	{
	682	/* E and O*/
	683	for (k=0;k<16;k++)
	684	{
	685	E[k] = src[k] + src[31-k];
	686	O[k] = src[k] - src[31-k];
	687	}
	688	/* EE and EO */
	689	for (k=0;k<8;k++)
	690	{
	691	EE[k] = E[k] + E[15-k];
	692	EO[k] = E[k] - E[15-k];
	693	}
	694	/* EEE and EEO */
	695	for (k=0;k<4;k++)
	696	{
	697	EEE[k] = EE[k] + EE[7-k];
	698	EEO[k] = EE[k] - EE[7-k];
	699	}
	700	/* EEEE and EEEO */
	701	EEEE[0] = EEE[0] + EEE[3];
	702	EEEO[0] = EEE[0] - EEE[3];
	703	EEEE[1] = EEE[1] + EEE[2];
	704	EEEO[1] = EEE[1] - EEE[2];
	705
	706	dst[ 0 ] = (g_aiT32[ 0][0]EEEE[0] + g_aiT32[ 0][1]EEEE[1] + add)>>shift;
	707	dst[ 16line ] = (g_aiT32[16][0]EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
	708	dst[ 8line ] = (g_aiT32[ 8][0]EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift;
	709	dst[ 24line ] = (g_aiT32[24][0]EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
	710	for (k=4;k<32;k+=8)
	711	{
	712	dst[ kline ] = (g_aiT32[k][0]EEO[0] + g_aiT32[k][1]EEO[1] + g_aiT32[k][2]EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
	713	}
	714	for (k=2;k<32;k+=4)
	715	{
	716	dst[ kline ] = (g_aiT32[k][0]EO[0] + g_aiT32[k][1]EO[1] + g_aiT32[k][2]EO[2] + g_aiT32[k][3]*EO[3] +
	717	g_aiT32[k][4]EO[4] + g_aiT32[k][5]EO[5] + g_aiT32[k][6]EO[6] + g_aiT32[k][7]EO[7] + add)>>shift;
	718	}
	719	for (k=1;k<32;k+=2)
	720	{
	721	dst[ kline ] = (g_aiT32[k][ 0]O[ 0] + g_aiT32[k][ 1]O[ 1] + g_aiT32[k][ 2]O[ 2] + g_aiT32[k][ 3]*O[ 3] +
	722	g_aiT32[k][ 4]O[ 4] + g_aiT32[k][ 5]O[ 5] + g_aiT32[k][ 6]O[ 6] + g_aiT32[k][ 7]O[ 7] +
	723	g_aiT32[k][ 8]O[ 8] + g_aiT32[k][ 9]O[ 9] + g_aiT32[k][10]O[10] + g_aiT32[k][11]O[11] +
	724	g_aiT32[k][12]O[12] + g_aiT32[k][13]O[13] + g_aiT32[k][14]O[14] + g_aiT32[k][15]O[15] + add)>>shift;
	725	}
	726	src += 32;
	727	dst ++;
	728	}
	729	}
	730
	731
	732	void partialButterflyInverse32(Short src,Short dst,Int shift, Int line)
	733	{
	734	Int j,k;
	735	Int E[16],O[16];
	736	Int EE[8],EO[8];
	737	Int EEE[4],EEO[4];
	738	Int EEEE[2],EEEO[2];
	739	Int add = 1<<(shift-1);
	740
	741	for (j=0; j<line; j++)
	742	{
	743	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	744	for (k=0;k<16;k++)
	745	{
	746	O[k] = g_aiT32[ 1][k]src[ line ] + g_aiT32[ 3][k]src[ 3line ] + g_aiT32[ 5][k]src[ 5line ] + g_aiT32[ 7][k]src[ 7*line ] +
	747	g_aiT32[ 9][k]src[ 9line ] + g_aiT32[11][k]src[ 11line ] + g_aiT32[13][k]src[ 13line ] + g_aiT32[15][k]src[ 15line ] +
	748	g_aiT32[17][k]src[ 17line ] + g_aiT32[19][k]src[ 19line ] + g_aiT32[21][k]src[ 21line ] + g_aiT32[23][k]src[ 23line ] +
	749	g_aiT32[25][k]src[ 25line ] + g_aiT32[27][k]src[ 27line ] + g_aiT32[29][k]src[ 29line ] + g_aiT32[31][k]src[ 31line ];
	750	}
	751	for (k=0;k<8;k++)
	752	{
	753	EO[k] = g_aiT32[ 2][k]src[ 2line ] + g_aiT32[ 6][k]src[ 6line ] + g_aiT32[10][k]src[ 10line ] + g_aiT32[14][k]src[ 14line ] +
	754	g_aiT32[18][k]src[ 18line ] + g_aiT32[22][k]src[ 22line ] + g_aiT32[26][k]src[ 26line ] + g_aiT32[30][k]src[ 30line ];
	755	}
	756	for (k=0;k<4;k++)
	757	{
	758	EEO[k] = g_aiT32[4][k]src[ 4line ] + g_aiT32[12][k]src[ 12line ] + g_aiT32[20][k]src[ 20line ] + g_aiT32[28][k]src[ 28line ];
	759	}
	760	EEEO[0] = g_aiT32[8][0]src[ 8line ] + g_aiT32[24][0]src[ 24line ];
	761	EEEO[1] = g_aiT32[8][1]src[ 8line ] + g_aiT32[24][1]src[ 24line ];
	762	EEEE[0] = g_aiT32[0][0]src[ 0 ] + g_aiT32[16][0]src[ 16*line ];
	763	EEEE[1] = g_aiT32[0][1]src[ 0 ] + g_aiT32[16][1]src[ 16*line ];
	764
	765	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	766	EEE[0] = EEEE[0] + EEEO[0];
	767	EEE[3] = EEEE[0] - EEEO[0];
	768	EEE[1] = EEEE[1] + EEEO[1];
	769	EEE[2] = EEEE[1] - EEEO[1];
	770	for (k=0;k<4;k++)
	771	{
	772	EE[k] = EEE[k] + EEO[k];
	773	EE[k+4] = EEE[3-k] - EEO[3-k];
	774	}
	775	for (k=0;k<8;k++)
	776	{
	777	E[k] = EE[k] + EO[k];
	778	E[k+8] = EE[7-k] - EO[7-k];
	779	}
	780	for (k=0;k<16;k++)
	781	{
	782	dst[k] = Clip3( -32768, 32767, (E[k] + O[k] + add)>>shift );
	783	dst[k+16] = Clip3( -32768, 32767, (E[15-k] - O[15-k] + add)>>shift );
	784	}
	785	src ++;
	786	dst += 32;
	787	}
	788	}
	789
	790	/** MxN forward transform (2D)
	791	* \param block input data (residual)
	792	* \param coeff output data (transform coefficients)
	793	* \param iWidth input data (width of transform)
	794	* \param iHeight input data (height of transform)
	795	*/
	796	void xTrMxN(Int bitDepth, Short block,Short coeff, Int iWidth, Int iHeight, UInt uiMode)
	797	{
	798	Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
	799	Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
	800
	801	Short tmp[ 64 * 64 ];
	802
	803	if( iWidth == 4 && iHeight == 4)
	804	{
	805	if (uiMode != REG_DCT)
	806	{
	807	fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
	808	fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
	809	}
	810	else
	811	{
	812	partialButterfly4(block, tmp, shift_1st, iHeight);
	813	partialButterfly4(tmp, coeff, shift_2nd, iWidth);
	814	}
	815
	816	}
	817	else if( iWidth == 8 && iHeight == 8)
	818	{
	819	partialButterfly8( block, tmp, shift_1st, iHeight );
	820	partialButterfly8( tmp, coeff, shift_2nd, iWidth );
	821	}
	822	else if( iWidth == 16 && iHeight == 16)
	823	{
	824	partialButterfly16( block, tmp, shift_1st, iHeight );
	825	partialButterfly16( tmp, coeff, shift_2nd, iWidth );
	826	}
	827	else if( iWidth == 32 && iHeight == 32)
	828	{
	829	partialButterfly32( block, tmp, shift_1st, iHeight );
	830	partialButterfly32( tmp, coeff, shift_2nd, iWidth );
	831	}
	832	}
	833	/** MxN inverse transform (2D)
	834	* \param coeff input data (transform coefficients)
	835	* \param block output data (residual)
	836	* \param iWidth input data (width of transform)
	837	* \param iHeight input data (height of transform)
	838	*/
	839	void xITrMxN(Int bitDepth, Short coeff,Short block, Int iWidth, Int iHeight, UInt uiMode)
	840	{
	841	Int shift_1st = SHIFT_INV_1ST;
	842	Int shift_2nd = SHIFT_INV_2ND - (bitDepth-8);
	843
	844	Short tmp[ 64*64];
	845	if( iWidth == 4 && iHeight == 4)
	846	{
	847	if (uiMode != REG_DCT)
	848	{
	849	fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
	850	fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
	851	}
	852	else
	853	{
	854	partialButterflyInverse4(coeff,tmp,shift_1st,iWidth);
	855	partialButterflyInverse4(tmp,block,shift_2nd,iHeight);
	856	}
	857	}
	858	else if( iWidth == 8 && iHeight == 8)
	859	{
	860	partialButterflyInverse8(coeff,tmp,shift_1st,iWidth);
	861	partialButterflyInverse8(tmp,block,shift_2nd,iHeight);
	862	}
	863	else if( iWidth == 16 && iHeight == 16)
	864	{
	865	partialButterflyInverse16(coeff,tmp,shift_1st,iWidth);
	866	partialButterflyInverse16(tmp,block,shift_2nd,iHeight);
	867	}
	868	else if( iWidth == 32 && iHeight == 32)
	869	{
	870	partialButterflyInverse32(coeff,tmp,shift_1st,iWidth);
	871	partialButterflyInverse32(tmp,block,shift_2nd,iHeight);
	872	}
	873	}
	874
	875	#endif //MATRIX_MULT
	876
	877	// To minimize the distortion only. No rate is considered.
	878	Void TComTrQuant::signBitHidingHDQ( TCoeff* pQCoef, TCoeff* pCoef, UInt const scan, Int deltaU, Int width, Int height )
	879	{
	880	Int lastCG = -1;
	881	Int absSum = 0 ;
	882	Int n ;
	883
	884	for( Int subSet = (width*height-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
	885	{
	886	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
	887	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
	888	absSum = 0 ;
	889
	890	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
	891	{
	892	if( pQCoef[ scan[ n + subPos ]] )
	893	{
	894	lastNZPosInCG = n;
	895	break;
	896	}
	897	}
	898
	899	for(n = 0; n <SCAN_SET_SIZE; n++ )
	900	{
	901	if( pQCoef[ scan[ n + subPos ]] )
	902	{
	903	firstNZPosInCG = n;
	904	break;
	905	}
	906	}
	907
	908	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
	909	{
	910	absSum += pQCoef[ scan[ n + subPos ]];
	911	}
	912
	913	if(lastNZPosInCG>=0 && lastCG==-1)
	914	{
	915	lastCG = 1 ;
	916	}
	917
	918	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
	919	{
	920	UInt signbit = (pQCoef[scan[subPos+firstNZPosInCG]]>0?0:1) ;
	921	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
	922	{
	923	Int minCostInc = MAX_INT, minPos =-1, finalChange=0, curCost=MAX_INT, curChange=0;
	924
	925	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
	926	{
	927	UInt blkPos = scan[ n+subPos ];
	928	if(pQCoef[ blkPos ] != 0 )
	929	{
	930	if(deltaU[blkPos]>0)
	931	{
	932	curCost = - deltaU[blkPos];
	933	curChange=1 ;
	934	}
	935	else
	936	{
	937	//curChange =-1;
	938	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
	939	{
	940	curCost=MAX_INT ;
	941	}
	942	else
	943	{
	944	curCost = deltaU[blkPos];
	945	curChange =-1;
	946	}
	947	}
	948	}
	949	else
	950	{
	951	if(n<firstNZPosInCG)
	952	{
	953	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
	954	if(thisSignBit != signbit )
	955	{
	956	curCost = MAX_INT;
	957	}
	958	else
	959	{
	960	curCost = - (deltaU[blkPos]) ;
	961	curChange = 1 ;
	962	}
	963	}
	964	else
	965	{
	966	curCost = - (deltaU[blkPos]) ;
	967	curChange = 1 ;
	968	}
	969	}
	970
	971	if( curCost<minCostInc)
	972	{
	973	minCostInc = curCost ;
	974	finalChange = curChange ;
	975	minPos = blkPos ;
	976	}
	977	} //CG loop
	978
	979	if(pQCoef[minPos] == 32767 \|\| pQCoef[minPos] == -32768)
	980	{
	981	finalChange = -1;
	982	}
	983
	984	if(pCoef[minPos]>=0)
	985	{
	986	pQCoef[minPos] += finalChange ;
	987	}
	988	else
	989	{
	990	pQCoef[minPos] -= finalChange ;
	991	}
	992	} // Hide
	993	}
	994	if(lastCG==1)
	995	{
	996	lastCG=0 ;
	997	}
	998	} // TU loop
	999
	1000	return;
	1001	}
	1002
	1003	Void TComTrQuant::xQuant( TComDataCU* pcCU,
	1004	Int* pSrc,
	1005	TCoeff* pDes,
	1006	#if ADAPTIVE_QP_SELECTION
	1007	Int*& pArlDes,
	1008	#endif
	1009	Int iWidth,
	1010	Int iHeight,
	1011	UInt& uiAcSum,
	1012	TextType eTType,
	1013	UInt uiAbsPartIdx )
	1014	{
	1015	Int* piCoef = pSrc;
	1016	TCoeff* piQCoef = pDes;
	1017	#if ADAPTIVE_QP_SELECTION
	1018	Int* piArlCCoef = pArlDes;
	1019	#endif
	1020	Int iAdd = 0;
	1021
	1022	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
	1023	if ( useRDOQ && (eTType == TEXT_LUMA \|\| RDOQ_CHROMA))
	1024	{
	1025	#if ADAPTIVE_QP_SELECTION
	1026	xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
	1027	#else
	1028	xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
	1029	#endif
	1030	}
	1031	else
	1032	{
	1033	const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
	1034
	1035	UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
	1036	const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
	1037
	1038	Int deltaU[32*32] ;
	1039
	1040	#if ADAPTIVE_QP_SELECTION
	1041	QpParam cQpBase;
	1042	Int iQpBase = pcCU->getSlice()->getSliceQpBase();
	1043
	1044	Int qpScaled;
[442]	1045	#if REPN_FORMAT_IN_VPS
	1046	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getQpBDOffsetY() : pcCU->getSlice()->getQpBDOffsetC();
	1047	#else
[313]	1048	Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
[442]	1049	#endif
[313]	1050
	1051	if(eTType == TEXT_LUMA)
	1052	{
	1053	qpScaled = iQpBase + qpBDOffset;
	1054	}
	1055	else
	1056	{
	1057	Int chromaQPOffset;
	1058	if(eTType == TEXT_CHROMA_U)
	1059	{
	1060	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
	1061	}
	1062	else
	1063	{
	1064	chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
	1065	}
	1066	iQpBase = iQpBase + chromaQPOffset;
	1067
	1068	qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
	1069
	1070	if(qpScaled < 0)
	1071	{
	1072	qpScaled = qpScaled + qpBDOffset;
	1073	}
	1074	else
	1075	{
	1076	qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
	1077	}
	1078	}
	1079	cQpBase.setQpParam(qpScaled);
	1080	#endif
	1081
	1082	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
	1083	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
[595]	1084	assert(scalingListType < SCALING_LIST_NUM);
[313]	1085	Int *piQuantCoeff = 0;
	1086	piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
	1087
	1088	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
	1089	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
	1090
	1091	#if ADAPTIVE_QP_SELECTION
	1092	Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
	1093	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
	1094	Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
	1095	Int iAddC = 1 << (iQBitsC-1);
	1096	#else
	1097	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
	1098	iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
	1099	#endif
	1100
	1101	Int qBits8 = iQBits-8;
	1102	for( Int n = 0; n < iWidth*iHeight; n++ )
	1103	{
	1104	Int iLevel;
	1105	Int iSign;
	1106	UInt uiBlockPos = n;
	1107	iLevel = piCoef[uiBlockPos];
	1108	iSign = (iLevel < 0 ? -1: 1);
	1109
	1110	#if ADAPTIVE_QP_SELECTION
	1111	Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
	1112	if( m_bUseAdaptQpSelect )
	1113	{
	1114	piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
	1115	}
	1116	iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
	1117	deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
	1118	#else
	1119	iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
	1120	deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
	1121	#endif
	1122	uiAcSum += iLevel;
	1123	iLevel *= iSign;
	1124	piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
	1125	} // for n
	1126	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
	1127	{
	1128	if(uiAcSum>=2)
	1129	{
	1130	signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
	1131	}
	1132	}
	1133	} //if RDOQ
	1134	//return;
	1135
	1136	}
	1137
	1138	Void TComTrQuant::xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType )
	1139	{
	1140
	1141	const TCoeff* piQCoef = pSrc;
	1142	Int* piCoef = pDes;
	1143
	1144	if ( iWidth > (Int)m_uiMaxTrSize )
	1145	{
	1146	iWidth = m_uiMaxTrSize;
	1147	iHeight = m_uiMaxTrSize;
	1148	}
	1149
	1150	Int iShift,iAdd,iCoeffQ;
	1151	UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
	1152
	1153	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
	1154
	1155	iShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - iTransformShift;
	1156
	1157	TCoeff clipQCoef;
	1158
	1159	if(getUseScalingList())
	1160	{
	1161	iShift += 4;
	1162	Int *piDequantCoef = getDequantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
	1163
	1164	if(iShift > m_cQP.m_iPer)
	1165	{
	1166	iAdd = 1 << (iShift - m_cQP.m_iPer - 1);
	1167
	1168	for( Int n = 0; n < iWidth*iHeight; n++ )
	1169	{
	1170	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
	1171	iCoeffQ = ((clipQCoef * piDequantCoef[n]) + iAdd ) >> (iShift - m_cQP.m_iPer);
	1172	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
	1173	}
	1174	}
	1175	else
	1176	{
	1177	for( Int n = 0; n < iWidth*iHeight; n++ )
	1178	{
	1179	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
	1180	iCoeffQ = Clip3( -32768, 32767, clipQCoef * piDequantCoef[n] ); // Clip to avoid possible overflow in following shift left operation
	1181	piCoef[n] = Clip3( -32768, 32767, iCoeffQ << ( m_cQP.m_iPer - iShift ) );
	1182	}
	1183	}
	1184	}
	1185	else
	1186	{
	1187	iAdd = 1 << (iShift-1);
	1188	Int scale = g_invQuantScales[m_cQP.m_iRem] << m_cQP.m_iPer;
	1189
	1190	for( Int n = 0; n < iWidth*iHeight; n++ )
	1191	{
	1192	clipQCoef = Clip3( -32768, 32767, piQCoef[n] );
	1193	iCoeffQ = ( clipQCoef * scale + iAdd ) >> iShift;
	1194	piCoef[n] = Clip3(-32768,32767,iCoeffQ);
	1195	}
	1196	}
	1197	}
	1198
	1199	Void TComTrQuant::init( UInt uiMaxTrSize,
	1200	Bool bUseRDOQ,
	1201	Bool bUseRDOQTS,
	1202	Bool bEnc, Bool useTransformSkipFast
	1203	#if ADAPTIVE_QP_SELECTION
	1204	, Bool bUseAdaptQpSelect
	1205	#endif
	1206	)
	1207	{
	1208	m_uiMaxTrSize = uiMaxTrSize;
	1209	m_bEnc = bEnc;
	1210	m_useRDOQ = bUseRDOQ;
	1211	m_useRDOQTS = bUseRDOQTS;
	1212	#if ADAPTIVE_QP_SELECTION
	1213	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
	1214	#endif
	1215	m_useTransformSkipFast = useTransformSkipFast;
	1216	}
	1217
	1218	Void TComTrQuant::transformNxN( TComDataCU* pcCU,
	1219	Pel* pcResidual,
	1220	UInt uiStride,
	1221	TCoeff* rpcCoeff,
	1222	#if ADAPTIVE_QP_SELECTION
	1223	Int*& rpcArlCoeff,
	1224	#endif
	1225	UInt uiWidth,
	1226	UInt uiHeight,
	1227	UInt& uiAbsSum,
	1228	TextType eTType,
	1229	UInt uiAbsPartIdx,
	1230	Bool useTransformSkip
	1231	)
	1232	{
	1233	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
	1234	{
	1235	uiAbsSum=0;
	1236	for (UInt k = 0; k<uiHeight; k++)
	1237	{
	1238	for (UInt j = 0; j<uiWidth; j++)
	1239	{
	1240	rpcCoeff[kuiWidth+j]= pcResidual[kuiStride+j];
	1241	uiAbsSum += abs(pcResidual[k*uiStride+j]);
	1242	}
	1243	}
	1244	return;
	1245	}
	1246	UInt uiMode; //luma intra pred
	1247	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
	1248	{
	1249	uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
	1250	}
	1251	else
	1252	{
	1253	uiMode = REG_DCT;
	1254	}
	1255
	1256	uiAbsSum = 0;
	1257	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
	1258	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
	1259	if(useTransformSkip)
	1260	{
	1261	xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	1262	}
	1263	else
	1264	{
	1265	xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	1266	}
	1267	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
	1268	#if ADAPTIVE_QP_SELECTION
	1269	rpcArlCoeff,
	1270	#endif
	1271	uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
	1272	}
	1273
	1274	Void TComTrQuant::invtransformNxN( Bool transQuantBypass, TextType eText, UInt uiMode,Pel* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip )
	1275	{
	1276	if(transQuantBypass)
	1277	{
	1278	for (UInt k = 0; k<uiHeight; k++)
	1279	{
	1280	for (UInt j = 0; j<uiWidth; j++)
	1281	{
	1282	rpcResidual[kuiStride+j] = pcCoeff[kuiWidth+j];
	1283	}
	1284	}
	1285	return;
	1286	}
	1287	Int bitDepth = eText == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
	1288	xDeQuant(bitDepth, pcCoeff, m_plTempCoeff, uiWidth, uiHeight, scalingListType);
	1289	if(useTransformSkip == true)
	1290	{
	1291	xITransformSkip(bitDepth, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
	1292	}
	1293	else
	1294	{
	1295	xIT(bitDepth, uiMode, m_plTempCoeff, rpcResidual, uiStride, uiWidth, uiHeight );
	1296	}
	1297	}
	1298
	1299	Void TComTrQuant::invRecurTransformNxN( TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Pel* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight, UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff )
	1300	{
	1301	if( !pcCU->getCbf(uiAbsPartIdx, eTxt, uiTrMode) )
	1302	{
	1303	return;
	1304	}
	1305	const UInt stopTrMode = pcCU->getTransformIdx( uiAbsPartIdx );
	1306
	1307	if( uiTrMode == stopTrMode )
	1308	{
	1309	UInt uiDepth = pcCU->getDepth( uiAbsPartIdx ) + uiTrMode;
	1310	UInt uiLog2TrSize = g_aucConvertToBit[ pcCU->getSlice()->getSPS()->getMaxCUWidth() >> uiDepth ] + 2;
	1311	if( eTxt != TEXT_LUMA && uiLog2TrSize == 2 )
	1312	{
	1313	UInt uiQPDiv = pcCU->getPic()->getNumPartInCU() >> ( ( uiDepth - 1 ) << 1 );
	1314	if( ( uiAbsPartIdx % uiQPDiv ) != 0 )
	1315	{
	1316	return;
	1317	}
	1318	uiWidth <<= 1;
	1319	uiHeight <<= 1;
	1320	}
	1321	Pel* pResi = rpcResidual + uiAddr;
	1322	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTxt];
[595]	1323	assert(scalingListType < SCALING_LIST_NUM);
[313]	1324	invtransformNxN( pcCU->getCUTransquantBypass(uiAbsPartIdx), eTxt, REG_DCT, pResi, uiStride, rpcCoeff, uiWidth, uiHeight, scalingListType, pcCU->getTransformSkip(uiAbsPartIdx, eTxt) );
	1325	}
	1326	else
	1327	{
	1328	uiTrMode++;
	1329	uiWidth >>= 1;
	1330	uiHeight >>= 1;
	1331	Int trWidth = uiWidth, trHeight = uiHeight;
	1332	UInt uiAddrOffset = trHeight * uiStride;
	1333	UInt uiCoefOffset = trWidth * trHeight;
	1334	UInt uiPartOffset = pcCU->getTotalNumPart() >> ( uiTrMode << 1 );
	1335	{
	1336	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1337	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + trWidth , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1338	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset , uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff ); rpcCoeff += uiCoefOffset; uiAbsPartIdx += uiPartOffset;
	1339	invRecurTransformNxN( pcCU, uiAbsPartIdx, eTxt, rpcResidual, uiAddr + uiAddrOffset + trWidth, uiStride, uiWidth, uiHeight, uiMaxTrMode, uiTrMode, rpcCoeff );
	1340	}
	1341	}
	1342	}
	1343
	1344	// ------------------------------------------------------------------------------------------------
	1345	// Logical transform
	1346	// ------------------------------------------------------------------------------------------------
	1347
	1348	/** Wrapper function between HM interface and core NxN forward transform (2D)
	1349	* \param piBlkResi input data (residual)
	1350	* \param psCoeff output data (transform coefficients)
	1351	* \param uiStride stride of input residual data
	1352	* \param iSize transform size (iSize x iSize)
	1353	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	1354	*/
	1355	Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
	1356	{
	1357	#if MATRIX_MULT
	1358	Int iSize = iWidth;
	1359	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
	1360	#else
	1361	Int j;
	1362	Short block[ 32 * 32 ];
	1363	Short coeff[ 32 * 32 ];
	1364	for (j = 0; j < iHeight; j++)
	1365	{
	1366	memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
	1367	}
	1368	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
	1369	for ( j = 0; j < iHeight * iWidth; j++ )
	1370	{
	1371	psCoeff[ j ] = coeff[ j ];
	1372	}
	1373	#endif
	1374	}
	1375
	1376
	1377	/** Wrapper function between HM interface and core NxN inverse transform (2D)
	1378	* \param plCoef input data (transform coefficients)
	1379	* \param pResidual output data (residual)
	1380	* \param uiStride stride of input residual data
	1381	* \param iSize transform size (iSize x iSize)
	1382	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	1383	*/
	1384	Void TComTrQuant::xIT(Int bitDepth, UInt uiMode, Int* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
	1385	{
	1386	#if MATRIX_MULT
	1387	Int iSize = iWidth;
	1388	xITr(bitDepth, plCoef,pResidual,uiStride,(UInt)iSize,uiMode);
	1389	#else
	1390	Int j;
	1391	{
	1392	Short block[ 32 * 32 ];
	1393	Short coeff[ 32 * 32 ];
	1394	for ( j = 0; j < iHeight * iWidth; j++ )
	1395	{
	1396	coeff[j] = (Short)plCoef[j];
	1397	}
	1398	xITrMxN(bitDepth, coeff, block, iWidth, iHeight, uiMode );
	1399	{
	1400	for ( j = 0; j < iHeight; j++ )
	1401	{
	1402	memcpy( pResidual + j * uiStride, block + j * iWidth, iWidth * sizeof(Short) );
	1403	}
	1404	}
	1405	return ;
	1406	}
	1407	#endif
	1408	}
	1409
	1410	/** Wrapper function between HM interface and core 4x4 transform skipping
	1411	* \param piBlkResi input data (residual)
	1412	* \param psCoeff output data (transform coefficients)
	1413	* \param uiStride stride of input residual data
	1414	* \param iSize transform size (iSize x iSize)
	1415	*/
	1416	Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
	1417	{
	1418	assert( width == height );
	1419	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
	1420	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
	1421	UInt transformSkipShift;
	1422	Int j,k;
	1423	if(shift >= 0)
	1424	{
	1425	transformSkipShift = shift;
	1426	for (j = 0; j < height; j++)
	1427	{
	1428	for(k = 0; k < width; k ++)
	1429	{
	1430	psCoeff[jheight + k] = piBlkResi[j uiStride + k] << transformSkipShift;
	1431	}
	1432	}
	1433	}
	1434	else
	1435	{
	1436	//The case when uiBitDepth > 13
	1437	Int offset;
	1438	transformSkipShift = -shift;
	1439	offset = (1 << (transformSkipShift - 1));
	1440	for (j = 0; j < height; j++)
	1441	{
	1442	for(k = 0; k < width; k ++)
	1443	{
	1444	psCoeff[jheight + k] = (piBlkResi[j uiStride + k] + offset) >> transformSkipShift;
	1445	}
	1446	}
	1447	}
	1448	}
	1449
	1450	/** Wrapper function between HM interface and core NxN transform skipping
	1451	* \param plCoef input data (coefficients)
	1452	* \param pResidual output data (residual)
	1453	* \param uiStride stride of input residual data
	1454	* \param iSize transform size (iSize x iSize)
	1455	*/
	1456	Void TComTrQuant::xITransformSkip(Int bitDepth, Int* plCoef, Pel* pResidual, UInt uiStride, Int width, Int height )
	1457	{
	1458	assert( width == height );
	1459	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
	1460	Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
	1461	UInt transformSkipShift;
	1462	Int j,k;
	1463	if(shift > 0)
	1464	{
	1465	Int offset;
	1466	transformSkipShift = shift;
	1467	offset = (1 << (transformSkipShift -1));
	1468	for ( j = 0; j < height; j++ )
	1469	{
	1470	for(k = 0; k < width; k ++)
	1471	{
	1472	pResidual[j * uiStride + k] = (plCoef[j*width+k] + offset) >> transformSkipShift;
	1473	}
	1474	}
	1475	}
	1476	else
	1477	{
	1478	//The case when uiBitDepth >= 13
	1479	transformSkipShift = - shift;
	1480	for ( j = 0; j < height; j++ )
	1481	{
	1482	for(k = 0; k < width; k ++)
	1483	{
	1484	pResidual[j * uiStride + k] = plCoef[j*width+k] << transformSkipShift;
	1485	}
	1486	}
	1487	}
	1488	}
	1489
	1490	/** RDOQ with CABAC
	1491	* \param pcCU pointer to coding unit structure
	1492	* \param plSrcCoeff pointer to input buffer
	1493	* \param piDstCoeff reference to pointer to output buffer
	1494	* \param uiWidth block width
	1495	* \param uiHeight block height
	1496	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
	1497	* \param eTType plane type / luminance or chrominance
	1498	* \param uiAbsPartIdx absolute partition index
	1499	* \returns Void
	1500	* Rate distortion optimized quantization for entropy
	1501	* coding engines using probability models like CABAC
	1502	*/
	1503	Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
	1504	Int* plSrcCoeff,
	1505	TCoeff* piDstCoeff,
	1506	#if ADAPTIVE_QP_SELECTION
	1507	Int*& piArlDstCoeff,
	1508	#endif
	1509	UInt uiWidth,
	1510	UInt uiHeight,
	1511	UInt& uiAbsSum,
	1512	TextType eTType,
	1513	UInt uiAbsPartIdx )
	1514	{
	1515	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
	1516
	1517	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
	1518	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
	1519	UInt uiGoRiceParam = 0;
	1520	Double d64BlockUncodedCost = 0;
	1521	const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
	1522	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
	1523	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
[595]	1524	assert(scalingListType < SCALING_LIST_NUM);
[313]	1525
	1526	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
	1527	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
	1528	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
	1529	Int *piQCoef = piQCoefOrg;
	1530	Double *pdErrScale = pdErrScaleOrg;
	1531	#if ADAPTIVE_QP_SELECTION
	1532	Int iQBitsC = iQBits - ARL_C_PRECISION;
	1533	Int iAddC = 1 << (iQBitsC-1);
	1534	#endif
	1535	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
	1536
	1537	#if ADAPTIVE_QP_SELECTION
	1538	memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
	1539	#endif
	1540
	1541	Double pdCostCoeff [ 32 * 32 ];
	1542	Double pdCostSig [ 32 * 32 ];
	1543	Double pdCostCoeff0[ 32 * 32 ];
	1544	::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
	1545	::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
	1546	Int rateIncUp [ 32 * 32 ];
	1547	Int rateIncDown [ 32 * 32 ];
	1548	Int sigRateDelta[ 32 * 32 ];
	1549	Int deltaU [ 32 * 32 ];
	1550	::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
	1551	::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
	1552	::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
	1553	::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
	1554
	1555	const UInt * scanCG;
	1556	{
	1557	scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
	1558	if( uiLog2BlkSize == 3 )
	1559	{
	1560	scanCG = g_sigLastScan8x8[ uiScanIdx ];
	1561	}
	1562	else if( uiLog2BlkSize == 5 )
	1563	{
	1564	scanCG = g_sigLastScanCG32x32;
	1565	}
	1566	}
	1567	const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
	1568	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
	1569	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
	1570	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
	1571	Int iCGLastScanPos = -1;
	1572
	1573	UInt uiCtxSet = 0;
	1574	Int c1 = 1;
	1575	Int c2 = 0;
	1576	Double d64BaseCost = 0;
	1577	Int iLastScanPos = -1;
	1578
	1579	UInt c1Idx = 0;
	1580	UInt c2Idx = 0;
	1581	Int baseLevel;
	1582
	1583	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
	1584
	1585	::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
	1586	::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
	1587
	1588	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
	1589	Int iScanPos;
	1590	coeffGroupRDStats rdStats;
	1591
	1592	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
	1593	{
	1594	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
	1595	UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
	1596	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
[442]	1597	#if MAYBE_BUGFIX
	1598	rdStats.init();
	1599	#else
[313]	1600	::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
[442]	1601	#endif
[313]	1602	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
	1603	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	1604	{
	1605	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	1606	//===== quantization =====
	1607	UInt uiBlkPos = scan[iScanPos];
	1608	// set coeff
	1609	Int uiQ = piQCoef[uiBlkPos];
	1610	Double dTemp = pdErrScale[uiBlkPos];
	1611	Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
	1612	lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
	1613	#if ADAPTIVE_QP_SELECTION
	1614	if( m_bUseAdaptQpSelect )
	1615	{
	1616	piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
	1617	}
	1618	#endif
	1619	UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
	1620
	1621	Double dErr = Double( lLevelDouble );
	1622	pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
	1623	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
	1624	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
	1625
	1626	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
	1627	{
	1628	iLastScanPos = iScanPos;
	1629	uiCtxSet = (iScanPos < SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
	1630	iCGLastScanPos = iCGScanPos;
	1631	}
	1632
	1633	if ( iLastScanPos >= 0 )
	1634	{
	1635	//===== coefficient level estimation =====
	1636	UInt uiLevel;
	1637	UInt uiOneCtx = 4 * uiCtxSet + c1;
	1638	UInt uiAbsCtx = uiCtxSet + c2;
	1639
	1640	if( iScanPos == iLastScanPos )
	1641	{
	1642	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
	1643	lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
	1644	c1Idx, c2Idx, iQBits, dTemp, 1 );
	1645	}
	1646	else
	1647	{
	1648	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
	1649	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
	1650	UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
	1651	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
	1652	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
	1653	c1Idx, c2Idx, iQBits, dTemp, 0 );
	1654	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
	1655	}
	1656	deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
	1657	if( uiLevel > 0 )
	1658	{
	1659	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
	1660	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
	1661	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
	1662	}
	1663	else // uiLevel == 0
	1664	{
	1665	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
	1666	}
	1667	piDstCoeff[ uiBlkPos ] = uiLevel;
	1668	d64BaseCost += pdCostCoeff [ iScanPos ];
	1669
	1670
	1671	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
	1672	if( uiLevel >= baseLevel )
	1673	{
	1674	if(uiLevel > 3*(1<<uiGoRiceParam))
	1675	{
	1676	uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
	1677	}
	1678	}
	1679	if ( uiLevel >= 1)
	1680	{
	1681	c1Idx ++;
	1682	}
	1683
	1684	//===== update bin model =====
	1685	if( uiLevel > 1 )
	1686	{
	1687	c1 = 0;
	1688	c2 += (c2 < 2);
	1689	c2Idx ++;
	1690	}
	1691	else if( (c1 < 3) && (c1 > 0) && uiLevel)
	1692	{
	1693	c1++;
	1694	}
	1695
	1696	//===== context set update =====
	1697	if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
	1698	{
	1699	c2 = 0;
	1700	uiGoRiceParam = 0;
	1701
	1702	c1Idx = 0;
	1703	c2Idx = 0;
	1704	uiCtxSet = (iScanPos == SCAN_SET_SIZE \|\| eTType!=TEXT_LUMA) ? 0 : 2;
	1705	if( c1 == 0 )
	1706	{
	1707	uiCtxSet++;
	1708	}
	1709	c1 = 1;
	1710	}
	1711	}
	1712	else
	1713	{
	1714	d64BaseCost += pdCostCoeff0[ iScanPos ];
	1715	}
	1716	rdStats.d64SigCost += pdCostSig[ iScanPos ];
	1717	if (iScanPosinCG == 0 )
	1718	{
	1719	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
	1720	}
	1721	if (piDstCoeff[ uiBlkPos ] )
	1722	{
	1723	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
	1724	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
	1725	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
	1726	if ( iScanPosinCG != 0 )
	1727	{
	1728	rdStats.iNNZbeforePos0++;
	1729	}
	1730	}
	1731	} //end for (iScanPosinCG)
	1732
	1733	if (iCGLastScanPos >= 0)
	1734	{
	1735	if( iCGScanPos )
	1736	{
	1737	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
	1738	{
	1739	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
	1740	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
	1741	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
	1742	}
	1743	else
	1744	{
	1745	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
	1746	{
	1747	if ( rdStats.iNNZbeforePos0 == 0 )
	1748	{
	1749	d64BaseCost -= rdStats.d64SigCost_0;
	1750	rdStats.d64SigCost -= rdStats.d64SigCost_0;
	1751	}
	1752	// rd-cost if SigCoeffGroupFlag = 0, initialization
	1753	Double d64CostZeroCG = d64BaseCost;
	1754
	1755	// add SigCoeffGroupFlag cost to total cost
	1756	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
	1757	if (iCGScanPos < iCGLastScanPos)
	1758	{
	1759	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
	1760	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
	1761	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
	1762	}
	1763
	1764	// try to convert the current coeff group from non-zero to all-zero
	1765	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
	1766	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
	1767	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
	1768
	1769	// if we can save cost, change this block to all-zero block
	1770	if ( d64CostZeroCG < d64BaseCost )
	1771	{
	1772	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
	1773	d64BaseCost = d64CostZeroCG;
	1774	if (iCGScanPos < iCGLastScanPos)
	1775	{
	1776	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
	1777	}
	1778	// reset coeffs to 0 in this block
	1779	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	1780	{
	1781	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	1782	UInt uiBlkPos = scan[ iScanPos ];
	1783
	1784	if (piDstCoeff[ uiBlkPos ])
	1785	{
	1786	piDstCoeff [ uiBlkPos ] = 0;
	1787	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
	1788	pdCostSig [ iScanPos ] = 0;
	1789	}
	1790	}
	1791	} // end if ( d64CostAllZeros < d64BaseCost )
	1792	}
	1793	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
	1794	}
	1795	else
	1796	{
	1797	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
	1798	}
	1799	}
	1800	} //end for (iCGScanPos)
	1801
	1802	//===== estimate last position =====
	1803	if ( iLastScanPos < 0 )
	1804	{
	1805	return;
	1806	}
	1807
	1808	Double d64BestCost = 0;
	1809	Int ui16CtxCbf = 0;
	1810	Int iBestLastIdxP1 = 0;
	1811	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
	1812	{
	1813	ui16CtxCbf = 0;
	1814	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
	1815	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
	1816	}
	1817	else
	1818	{
	1819	ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
	1820	ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
	1821	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
	1822	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
	1823	}
	1824
	1825	Bool bFoundLast = false;
	1826	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
	1827	{
	1828	UInt uiCGBlkPos = scanCG[ iCGScanPos ];
	1829
	1830	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
	1831	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
	1832	{
	1833	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	1834	{
	1835	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	1836	if (iScanPos > iLastScanPos) continue;
	1837	UInt uiBlkPos = scan[iScanPos];
	1838
	1839	if( piDstCoeff[ uiBlkPos ] )
	1840	{
	1841	UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
	1842	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
	1843
	1844	Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
	1845	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
	1846
	1847	if( totalCost < d64BestCost )
	1848	{
	1849	iBestLastIdxP1 = iScanPos + 1;
	1850	d64BestCost = totalCost;
	1851	}
	1852	if( piDstCoeff[ uiBlkPos ] > 1 )
	1853	{
	1854	bFoundLast = true;
	1855	break;
	1856	}
	1857	d64BaseCost -= pdCostCoeff[ iScanPos ];
	1858	d64BaseCost += pdCostCoeff0[ iScanPos ];
	1859	}
	1860	else
	1861	{
	1862	d64BaseCost -= pdCostSig[ iScanPos ];
	1863	}
	1864	} //end for
	1865	if (bFoundLast)
	1866	{
	1867	break;
	1868	}
	1869	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
	1870	} // end for
	1871
	1872	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
	1873	{
	1874	Int blkPos = scan[ scanPos ];
	1875	Int level = piDstCoeff[ blkPos ];
	1876	uiAbsSum += level;
	1877	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
	1878	}
	1879
	1880	//===== clean uncoded coefficients =====
	1881	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
	1882	{
	1883	piDstCoeff[ scan[ scanPos ] ] = 0;
	1884	}
	1885
	1886	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
	1887	{
	1888	Int64 rdFactor = (Int64) (
	1889	g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
	1890	/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
	1891	+ 0.5);
	1892	Int lastCG = -1;
	1893	Int absSum = 0 ;
	1894	Int n ;
	1895
	1896	for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
	1897	{
	1898	Int subPos = subSet << LOG2_SCAN_SET_SIZE;
	1899	Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
	1900	absSum = 0 ;
	1901
	1902	for(n = SCAN_SET_SIZE-1; n >= 0; --n )
	1903	{
	1904	if( piDstCoeff[ scan[ n + subPos ]] )
	1905	{
	1906	lastNZPosInCG = n;
	1907	break;
	1908	}
	1909	}
	1910
	1911	for(n = 0; n <SCAN_SET_SIZE; n++ )
	1912	{
	1913	if( piDstCoeff[ scan[ n + subPos ]] )
	1914	{
	1915	firstNZPosInCG = n;
	1916	break;
	1917	}
	1918	}
	1919
	1920	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
	1921	{
	1922	absSum += piDstCoeff[ scan[ n + subPos ]];
	1923	}
	1924
	1925	if(lastNZPosInCG>=0 && lastCG==-1)
	1926	{
	1927	lastCG = 1;
	1928	}
	1929
	1930	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
	1931	{
	1932	UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
	1933	if( signbit!=(absSum&0x1) ) // hide but need tune
	1934	{
	1935	// calculate the cost
	1936	Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
	1937	Int minPos =-1, finalChange=0, curChange=0;
	1938
	1939	for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
	1940	{
	1941	UInt uiBlkPos = scan[ n + subPos ];
	1942	if(piDstCoeff[ uiBlkPos ] != 0 )
	1943	{
	1944	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
	1945	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
[540]	1946	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
[313]	1947
	1948	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
	1949	{
	1950	costDown -= (4<<15) ;
	1951	}
	1952
	1953	if(costUp<costDown)
	1954	{
	1955	curCost = costUp;
	1956	curChange = 1 ;
	1957	}
	1958	else
	1959	{
	1960	curChange = -1 ;
	1961	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
	1962	{
	1963	curCost = MAX_INT64 ;
	1964	}
	1965	else
	1966	{
	1967	curCost = costDown ;
	1968	}
	1969	}
	1970	}
	1971	else
	1972	{
	1973	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
	1974	curChange = 1 ;
	1975
	1976	if(n<firstNZPosInCG)
	1977	{
	1978	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
	1979	if(thissignbit != signbit )
	1980	{
	1981	curCost = MAX_INT64;
	1982	}
	1983	}
	1984	}
	1985
	1986	if( curCost<minCostInc)
	1987	{
	1988	minCostInc = curCost ;
	1989	finalChange = curChange ;
	1990	minPos = uiBlkPos ;
	1991	}
	1992	}
	1993
	1994	if(piDstCoeff[minPos] == 32767 \|\| piDstCoeff[minPos] == -32768)
	1995	{
	1996	finalChange = -1;
	1997	}
	1998
	1999	if(plSrcCoeff[minPos]>=0)
	2000	{
	2001	piDstCoeff[minPos] += finalChange ;
	2002	}
	2003	else
	2004	{
	2005	piDstCoeff[minPos] -= finalChange ;
	2006	}
	2007	}
	2008	}
	2009
	2010	if(lastCG==1)
	2011	{
	2012	lastCG=0 ;
	2013	}
	2014	}
	2015	}
	2016	}
	2017
	2018	/** Pattern decision for context derivation process of significant_coeff_flag
	2019	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
	2020	* \param posXCG column of current coefficient group
	2021	* \param posYCG row of current coefficient group
	2022	* \param width width of the block
	2023	* \param height height of the block
	2024	* \returns pattern for current coefficient group
	2025	*/
	2026	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height )
	2027	{
	2028	if( width == 4 && height == 4 ) return -1;
	2029
	2030	UInt sigRight = 0;
	2031	UInt sigLower = 0;
	2032
	2033	width >>= 2;
	2034	height >>= 2;
	2035	if( posXCG < width - 1 )
	2036	{
	2037	sigRight = (sigCoeffGroupFlag[ posYCG * width + posXCG + 1 ] != 0);
	2038	}
	2039	if (posYCG < height - 1 )
	2040	{
	2041	sigLower = (sigCoeffGroupFlag[ (posYCG + 1 ) * width + posXCG ] != 0);
	2042	}
	2043	return sigRight + (sigLower<<1);
	2044	}
	2045
	2046	/** Context derivation process of coeff_abs_significant_flag
	2047	* \param patternSigCtx pattern for current coefficient group
	2048	* \param posX column of current scan position
	2049	* \param posY row of current scan position
	2050	* \param log2BlockSize log2 value of block size (square block)
	2051	* \param width width of the block
	2052	* \param height height of the block
	2053	* \param textureType texture type (TEXT_LUMA...)
	2054	* \returns ctxInc for current scan position
	2055	*/
	2056	Int TComTrQuant::getSigCtxInc (
	2057	Int patternSigCtx,
	2058	UInt scanIdx,
	2059	Int posX,
	2060	Int posY,
	2061	Int log2BlockSize,
	2062	TextType textureType
	2063	)
	2064	{
	2065	const Int ctxIndMap[16] =
	2066	{
	2067	0, 1, 4, 5,
	2068	2, 3, 4, 5,
	2069	6, 6, 8, 8,
	2070	7, 7, 8, 8
	2071	};
	2072
	2073	if( posX + posY == 0 )
	2074	{
	2075	return 0;
	2076	}
	2077
	2078	if ( log2BlockSize == 2 )
	2079	{
	2080	return ctxIndMap[ 4 * posY + posX ];
	2081	}
	2082
	2083	Int offset = log2BlockSize == 3 ? (scanIdx==SCAN_DIAG ? 9 : 15) : (textureType == TEXT_LUMA ? 21 : 12);
	2084
	2085	Int posXinSubset = posX-((posX>>2)<<2);
	2086	Int posYinSubset = posY-((posY>>2)<<2);
	2087	Int cnt = 0;
	2088	if(patternSigCtx==0)
	2089	{
	2090	cnt = posXinSubset+posYinSubset<=2 ? (posXinSubset+posYinSubset==0 ? 2 : 1) : 0;
	2091	}
	2092	else if(patternSigCtx==1)
	2093	{
	2094	cnt = posYinSubset<=1 ? (posYinSubset==0 ? 2 : 1) : 0;
	2095	}
	2096	else if(patternSigCtx==2)
	2097	{
	2098	cnt = posXinSubset<=1 ? (posXinSubset==0 ? 2 : 1) : 0;
	2099	}
	2100	else
	2101	{
	2102	cnt = 2;
	2103	}
	2104
	2105	return (( textureType == TEXT_LUMA && ((posX>>2) + (posY>>2)) > 0 ) ? 3 : 0) + offset + cnt;
	2106	}
	2107
	2108	/** Get the best level in RD sense
	2109	* \param rd64CodedCost reference to coded cost
	2110	* \param rd64CodedCost0 reference to cost when coefficient is 0
	2111	* \param rd64CodedCostSig reference to cost of significant coefficient
	2112	* \param lLevelDouble reference to unscaled quantized level
	2113	* \param uiMaxAbsLevel scaled quantized level
	2114	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
	2115	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2116	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2117	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
	2118	* \param iQBits quantization step size
	2119	* \param dTemp correction factor
	2120	* \param bLast indicates if the coefficient is the last significant
	2121	* \returns best quantized transform level for given scan position
	2122	* This method calculates the best quantized transform level for a given scan position.
	2123	*/
	2124	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
	2125	Double& rd64CodedCost0,
	2126	Double& rd64CodedCostSig,
	2127	Int lLevelDouble,
	2128	UInt uiMaxAbsLevel,
	2129	UShort ui16CtxNumSig,
	2130	UShort ui16CtxNumOne,
	2131	UShort ui16CtxNumAbs,
	2132	UShort ui16AbsGoRice,
	2133	UInt c1Idx,
	2134	UInt c2Idx,
	2135	Int iQBits,
	2136	Double dTemp,
	2137	Bool bLast ) const
	2138	{
	2139	Double dCurrCostSig = 0;
	2140	UInt uiBestAbsLevel = 0;
	2141
	2142	if( !bLast && uiMaxAbsLevel < 3 )
	2143	{
	2144	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
	2145	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
	2146	if( uiMaxAbsLevel == 0 )
	2147	{
	2148	return uiBestAbsLevel;
	2149	}
	2150	}
	2151	else
	2152	{
	2153	rd64CodedCost = MAX_DOUBLE;
	2154	}
	2155
	2156	if( !bLast )
	2157	{
	2158	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
	2159	}
	2160
	2161	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
	2162	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
	2163	{
	2164	Double dErr = Double( lLevelDouble - ( uiAbsLevel << iQBits ) );
[540]	2165	Double dCurrCost = dErr * dErr * dTemp + xGetICost(xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx ));
[313]	2166	dCurrCost += dCurrCostSig;
	2167
	2168	if( dCurrCost < rd64CodedCost )
	2169	{
	2170	uiBestAbsLevel = uiAbsLevel;
	2171	rd64CodedCost = dCurrCost;
	2172	rd64CodedCostSig = dCurrCostSig;
	2173	}
	2174	}
	2175
	2176	return uiBestAbsLevel;
	2177	}
	2178
	2179	/** Calculates the cost for specific absolute transform level
	2180	* \param uiAbsLevel scaled quantized level
	2181	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2182	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2183	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
	2184	* \returns cost of given absolute transform level
	2185	*/
[540]	2186	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
[313]	2187	UShort ui16CtxNumOne,
	2188	UShort ui16CtxNumAbs,
	2189	UShort ui16AbsGoRice
	2190	, UInt c1Idx,
	2191	UInt c2Idx
	2192	) const
	2193	{
[540]	2194	Int iRate = Int(xGetIEPRate());
[313]	2195	UInt baseLevel = (c1Idx < C1FLAG_NUMBER)? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
	2196
	2197	if ( uiAbsLevel >= baseLevel )
	2198	{
	2199	UInt symbol = uiAbsLevel - baseLevel;
	2200	UInt length;
	2201	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
	2202	{
	2203	length = symbol>>ui16AbsGoRice;
	2204	iRate += (length+1+ui16AbsGoRice)<< 15;
	2205	}
	2206	else
	2207	{
	2208	length = ui16AbsGoRice;
	2209	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
	2210	while (symbol >= (1<<length))
	2211	{
	2212	symbol -= (1<<(length++));
	2213	}
	2214	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
	2215	}
	2216	if (c1Idx < C1FLAG_NUMBER)
	2217	{
	2218	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2219
	2220	if (c2Idx < C2FLAG_NUMBER)
	2221	{
	2222	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
	2223	}
	2224	}
	2225	}
	2226	else
	2227	if( uiAbsLevel == 1 )
	2228	{
	2229	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
	2230	}
	2231	else if( uiAbsLevel == 2 )
	2232	{
	2233	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2234	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
	2235	}
	2236	else
	2237	{
[540]	2238	iRate = 0;
[313]	2239	}
	2240	return iRate;
	2241	}
	2242
	2243	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
	2244	UShort ui16CtxNumSig ) const
	2245	{
	2246	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
	2247	}
	2248
	2249	/** Calculates the cost of signaling the last significant coefficient in the block
	2250	* \param uiPosX X coordinate of the last significant coefficient
	2251	* \param uiPosY Y coordinate of the last significant coefficient
	2252	* \returns cost of last significant coefficient
	2253	*/
	2254	/*
	2255	* \param uiWidth width of the transform unit (TU)
	2256	*/
	2257	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
	2258	const UInt uiPosY ) const
	2259	{
	2260	UInt uiCtxX = g_uiGroupIdx[uiPosX];
	2261	UInt uiCtxY = g_uiGroupIdx[uiPosY];
	2262	Double uiCost = m_pcEstBitsSbac->lastXBits[ uiCtxX ] + m_pcEstBitsSbac->lastYBits[ uiCtxY ];
	2263	if( uiCtxX > 3 )
	2264	{
	2265	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
	2266	}
	2267	if( uiCtxY > 3 )
	2268	{
	2269	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
	2270	}
	2271	return xGetICost( uiCost );
	2272	}
	2273
	2274	/** Calculates the cost for specific absolute transform level
	2275	* \param uiAbsLevel scaled quantized level
	2276	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2277	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2278	* \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
	2279	* \returns cost of given absolute transform level
	2280	*/
	2281	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
	2282	UShort ui16CtxNumSig ) const
	2283	{
	2284	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
	2285	}
	2286
	2287	/** Get the cost for a specific rate
	2288	* \param dRate rate of a bit
	2289	* \returns cost at the specific rate
	2290	*/
	2291	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
	2292	{
	2293	return m_dLambda * dRate;
	2294	}
	2295
	2296	/** Get the cost of an equal probable bit
	2297	* \returns cost of equal probable bit
	2298	*/
	2299	__inline Double TComTrQuant::xGetIEPRate ( ) const
	2300	{
	2301	return 32768;
	2302	}
	2303
	2304	/** Context derivation process of coeff_abs_significant_flag
	2305	* \param uiSigCoeffGroupFlag significance map of L1
	2306	* \param uiBlkX column of current scan position
	2307	* \param uiBlkY row of current scan position
	2308	* \param uiLog2BlkSize log2 value of block size
	2309	* \returns ctxInc for current scan position
	2310	*/
	2311	UInt TComTrQuant::getSigCoeffGroupCtxInc ( const UInt* uiSigCoeffGroupFlag,
	2312	const UInt uiCGPosX,
	2313	const UInt uiCGPosY,
	2314	Int width, Int height)
	2315	{
	2316	UInt uiRight = 0;
	2317	UInt uiLower = 0;
	2318
	2319	width >>= 2;
	2320	height >>= 2;
	2321	if( uiCGPosX < width - 1 )
	2322	{
	2323	uiRight = (uiSigCoeffGroupFlag[ uiCGPosY * width + uiCGPosX + 1 ] != 0);
	2324	}
	2325	if (uiCGPosY < height - 1 )
	2326	{
	2327	uiLower = (uiSigCoeffGroupFlag[ (uiCGPosY + 1 ) * width + uiCGPosX ] != 0);
	2328	}
	2329	return (uiRight \|\| uiLower);
	2330
	2331	}
	2332	/** set quantized matrix coefficient for encode
	2333	* \param scalingList quantaized matrix address
	2334	*/
	2335	Void TComTrQuant::setScalingList(TComScalingList *scalingList)
	2336	{
	2337	UInt size,list;
	2338	UInt qp;
	2339
	2340	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
	2341	{
	2342	for(list = 0; list < g_scalingListNum[size]; list++)
	2343	{
	2344	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
	2345	{
	2346	xSetScalingListEnc(scalingList,list,size,qp);
	2347	xSetScalingListDec(scalingList,list,size,qp);
	2348	setErrScaleCoeff(list,size,qp);
	2349	}
	2350	}
	2351	}
	2352	}
	2353	/** set quantized matrix coefficient for decode
	2354	* \param scalingList quantaized matrix address
	2355	*/
	2356	Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
	2357	{
	2358	UInt size,list;
	2359	UInt qp;
	2360
	2361	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
	2362	{
	2363	for(list = 0; list < g_scalingListNum[size]; list++)
	2364	{
	2365	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
	2366	{
	2367	xSetScalingListDec(scalingList,list,size,qp);
	2368	}
	2369	}
	2370	}
	2371	}
	2372	/** set error scale coefficients
	2373	* \param list List ID
	2374	* \param uiSize Size
	2375	* \param uiQP Quantization parameter
	2376	*/
	2377	Void TComTrQuant::setErrScaleCoeff(UInt list,UInt size, UInt qp)
	2378	{
	2379
	2380	UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
	2381	Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
	2382	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
	2383
	2384	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
	2385	Int *piQuantcoeff;
	2386	Double *pdErrScale;
	2387	piQuantcoeff = getQuantCoeff(list, qp,size);
	2388	pdErrScale = getErrScaleCoeff(list, size, qp);
	2389
	2390	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
	2391	dErrScale = dErrScalepow(2.0,-2.0iTransformShift); // Compensate for scaling through forward transform
	2392	for(i=0;i<uiMaxNumCoeff;i++)
	2393	{
	2394	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(bitDepth-8)));
	2395	}
	2396	}
	2397
	2398	/** set quantized matrix coefficient for encode
	2399	* \param scalingList quantaized matrix address
	2400	* \param listId List index
	2401	* \param sizeId size index
	2402	* \param uiQP Quantization parameter
	2403	*/
	2404	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
	2405	{
	2406	UInt width = g_scalingListSizeX[sizeId];
	2407	UInt height = g_scalingListSizeX[sizeId];
	2408	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
	2409	Int *quantcoeff;
	2410	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
	2411	quantcoeff = getQuantCoeff(listId, qp, sizeId);
	2412
	2413	processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2414	}
	2415	/** set quantized matrix coefficient for decode
	2416	* \param scalingList quantaized matrix address
	2417	* \param list List index
	2418	* \param size size index
	2419	* \param uiQP Quantization parameter
	2420	*/
	2421	Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
	2422	{
	2423	UInt width = g_scalingListSizeX[sizeId];
	2424	UInt height = g_scalingListSizeX[sizeId];
	2425	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
	2426	Int *dequantcoeff;
	2427	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
	2428
	2429	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
	2430	processScalingListDec(coeff,dequantcoeff,g_invQuantScales[qp],height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
	2431	}
	2432
	2433	/** set flat matrix value to quantized coefficient
	2434	*/
	2435	Void TComTrQuant::setFlatScalingList()
	2436	{
	2437	UInt size,list;
	2438	UInt qp;
	2439
	2440	for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
	2441	{
	2442	for(list = 0; list < g_scalingListNum[size]; list++)
	2443	{
	2444	for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
	2445	{
	2446	xsetFlatScalingList(list,size,qp);
	2447	setErrScaleCoeff(list,size,qp);
	2448	}
	2449	}
	2450	}
	2451	}
	2452
	2453	/** set flat matrix value to quantized coefficient
	2454	* \param list List ID
	2455	* \param uiQP Quantization parameter
	2456	* \param uiSize Size
	2457	*/
	2458	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, UInt qp)
	2459	{
	2460	UInt i,num = g_scalingListSize[size];
	2461	Int *quantcoeff;
	2462	Int *dequantcoeff;
	2463	Int quantScales = g_quantScales[qp];
	2464	Int invQuantScales = g_invQuantScales[qp]<<4;
	2465
	2466	quantcoeff = getQuantCoeff(list, qp, size);
	2467	dequantcoeff = getDequantCoeff(list, qp, size);
	2468
	2469	for(i=0;i<num;i++)
	2470	{
	2471	*quantcoeff++ = quantScales;
	2472	*dequantcoeff++ = invQuantScales;
	2473	}
	2474	}
	2475
	2476	/** set quantized matrix coefficient for encode
	2477	* \param coeff quantaized matrix address
	2478	* \param quantcoeff quantaized matrix address
	2479	* \param quantScales Q(QP%6)
	2480	* \param height height
	2481	* \param width width
	2482	* \param ratio ratio for upscale
	2483	* \param sizuNum matrix size
	2484	* \param dc dc parameter
	2485	*/
	2486	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
	2487	{
	2488	Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
	2489	Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
	2490	for(UInt j=0;j<height;j++)
	2491	{
	2492	for(UInt i=0;i<width;i++)
	2493	{
	2494	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j * nsqth / ratio) + i * nsqtw /ratio];
	2495	}
	2496	}
	2497	if(ratio > 1)
	2498	{
	2499	quantcoeff[0] = quantScales / dc;
	2500	}
	2501	}
	2502	/** set quantized matrix coefficient for decode
	2503	* \param coeff quantaized matrix address
	2504	* \param dequantcoeff quantaized matrix address
	2505	* \param invQuantScales IQ(QP%6))
	2506	* \param height height
	2507	* \param width width
	2508	* \param ratio ratio for upscale
	2509	* \param sizuNum matrix size
	2510	* \param dc dc parameter
	2511	*/
	2512	Void TComTrQuant::processScalingListDec( Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
	2513	{
	2514	for(UInt j=0;j<height;j++)
	2515	{
	2516	for(UInt i=0;i<width;i++)
	2517	{
	2518	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
	2519	}
	2520	}
	2521	if(ratio > 1)
	2522	{
	2523	dequantcoeff[0] = invQuantScales * dc;
	2524	}
	2525	}
	2526
	2527	/** initialization process of scaling list array
	2528	*/
	2529	Void TComTrQuant::initScalingList()
	2530	{
	2531	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
	2532	{
	2533	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
	2534	{
	2535	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	2536	{
	2537	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
	2538	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
	2539	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
	2540	}
	2541	}
	2542	}
	2543	// alias list [1] as [3].
	2544	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	2545	{
	2546	m_quantCoef [SCALING_LIST_32x32][3][qp] = m_quantCoef [SCALING_LIST_32x32][1][qp];
	2547	m_dequantCoef [SCALING_LIST_32x32][3][qp] = m_dequantCoef [SCALING_LIST_32x32][1][qp];
	2548	m_errScale [SCALING_LIST_32x32][3][qp] = m_errScale [SCALING_LIST_32x32][1][qp];
	2549	}
	2550	}
	2551	/** destroy quantization matrix array
	2552	*/
	2553	Void TComTrQuant::destroyScalingList()
	2554	{
	2555	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
	2556	{
	2557	for(UInt listId = 0; listId < g_scalingListNum[sizeId]; listId++)
	2558	{
	2559	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	2560	{
	2561	if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
	2562	if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
	2563	if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
	2564	}
	2565	}
	2566	}
	2567	}
	2568
	2569	//! \}

Note: See TracBrowser for help on using the repository browser.

JCT-VC SHVC

Context navigation

source: SHVCSoftware/trunk/source/Lib/TLibCommon/TComTrQuant.cpp @ 844

Download in other formats: