Context navigation

source: SHVCSoftware/branches/SHM-dev/source/Lib/TLibCommon/TComTrQuant.cpp @ 1263

Visit:

Last change on this file since 1263 was 1260, checked in by seregin, 9 years ago
port rev 4257
Property svn:eol-style set to `native`
File size: 124.6 KB

Rev	Line
[313]	1	/* The copyright in this software is being made available under the BSD
	2	* License, included below. This software may be subject to other third party
	3	* and contributor rights, including patent rights, and no such rights are
[1029]	4	* granted under this license.
[313]	5	*
[1259]	6	* Copyright (c) 2010-2015, ITU/ISO/IEC
[313]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions are met:
	11	*
	12	* * Redistributions of source code must retain the above copyright notice,
	13	* this list of conditions and the following disclaimer.
	14	* * Redistributions in binary form must reproduce the above copyright notice,
	15	* this list of conditions and the following disclaimer in the documentation
	16	* and/or other materials provided with the distribution.
	17	* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
	18	* be used to endorse or promote products derived from this software without
	19	* specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	22	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
	25	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	26	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	27	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	28	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	29	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	30	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	31	* THE POSSIBILITY OF SUCH DAMAGE.
	32	*/
	33
	34	/** \file TComTrQuant.cpp
	35	\brief transform and quantization class
	36	*/
	37
	38	#include <stdlib.h>
	39	#include <math.h>
[1029]	40	#include <limits>
[313]	41	#include <memory.h>
	42	#include "TComTrQuant.h"
	43	#include "TComPic.h"
	44	#include "ContextTables.h"
[1029]	45	#include "TComTU.h"
	46	#include "Debug.h"
	47
[313]	48	typedef struct
	49	{
	50	Int iNNZbeforePos0;
	51	Double d64CodedLevelandDist; // distortion and level cost only
	52	Double d64UncodedDist; // all zero coded block distortion
	53	Double d64SigCost;
	54	Double d64SigCost_0;
	55	} coeffGroupRDStats;
	56
	57	//! \ingroup TLibCommon
	58	//! \{
	59
	60	// ====================================================================================================================
	61	// Constants
	62	// ====================================================================================================================
	63
	64	#define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
	65
[1029]	66
[313]	67	// ====================================================================================================================
[1029]	68	// QpParam constructor
[313]	69	// ====================================================================================================================
	70
[1029]	71	QpParam::QpParam(const Int qpy,
	72	const ChannelType chType,
	73	const Int qpBdOffset,
	74	const Int chromaQPOffset,
	75	const ChromaFormat chFmt )
	76	{
	77	Int baseQp;
[313]	78
[1029]	79	if(isLuma(chType))
	80	{
	81	baseQp = qpy + qpBdOffset;
	82	}
	83	else
	84	{
	85	baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
[313]	86
[1029]	87	if(baseQp < 0)
	88	{
	89	baseQp = baseQp + qpBdOffset;
	90	}
	91	else
	92	{
	93	baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
	94	}
	95	}
	96
	97	Qp =baseQp;
	98	per=baseQp/6;
	99	rem=baseQp%6;
	100	}
	101
	102	QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
[313]	103	{
[1029]	104	Int chromaQpOffset = 0;
	105
	106	if (isChroma(compID))
	107	{
	108	chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
	109	chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
	110
	111	chromaQpOffset += cu.getSlice()->getPPS()->getChromaQpAdjTableAt(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
	112	}
	113
[1203]	114	#if SVC_EXTENSION
[1029]	115	TComSlice* slice = const_cast<TComSlice*> (cu.getSlice());
	116	#endif
	117
	118	*this = QpParam(cu.getQP( 0 ),
	119	toChannelType(compID),
[1203]	120	#if SVC_EXTENSION
[1029]	121	isLuma(compID) ? slice->getQpBDOffsetY() : slice->getQpBDOffsetC(),
	122	#else
	123	cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
	124	#endif
	125	chromaQpOffset,
	126	cu.getPic()->getChromaFormat());
[313]	127	}
	128
[1029]	129
[313]	130	// ====================================================================================================================
	131	// TComTrQuant class member functions
	132	// ====================================================================================================================
	133
	134	TComTrQuant::TComTrQuant()
	135	{
	136	// allocate temporary buffers
[1029]	137	m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
	138
[313]	139	// allocate bit estimation class (for RDOQ)
	140	m_pcEstBitsSbac = new estBitsSbacStruct;
	141	initScalingList();
	142	}
	143
	144	TComTrQuant::~TComTrQuant()
	145	{
	146	// delete temporary buffers
	147	if ( m_plTempCoeff )
	148	{
	149	delete [] m_plTempCoeff;
	150	m_plTempCoeff = NULL;
	151	}
[1029]	152
[313]	153	// delete bit estimation class
	154	if ( m_pcEstBitsSbac )
	155	{
	156	delete m_pcEstBitsSbac;
	157	}
	158	destroyScalingList();
	159	}
	160
	161	#if ADAPTIVE_QP_SELECTION
	162	Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
	163	{
[1029]	164	// NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
	165
[313]	166	Int qpBase = pcSlice->getSliceQpBase();
	167	Int sliceQpused = pcSlice->getSliceQp();
	168	Int sliceQpnext;
	169	Double alpha = qpBase < 17 ? 0.5 : 1;
[1029]	170
[313]	171	Int cnt=0;
	172	for(Int u=1; u<=LEVEL_RANGE; u++)
[1029]	173	{
[313]	174	cnt += m_sliceNsamples[u] ;
	175	}
	176
	177	if( !m_useRDOQ )
	178	{
	179	sliceQpused = qpBase;
	180	alpha = 0.5;
	181	}
	182
	183	if( cnt > 120 )
	184	{
	185	Double sum = 0;
	186	Int k = 0;
	187	for(Int u=1; u<LEVEL_RANGE; u++)
	188	{
	189	sum += u*m_sliceSumC[u];
	190	k += uum_sliceNsamples[u];
	191	}
	192
	193	Int v;
	194	Double q[MAX_QP+1] ;
	195	for(v=0; v<=MAX_QP; v++)
	196	{
	197	q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
	198	}
	199
	200	Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
	201
	202	for(v=0; v<MAX_QP; v++)
	203	{
	204	if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
	205	{
	206	break;
	207	}
	208	}
	209	sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
	210	}
	211	else
	212	{
	213	sliceQpnext = sliceQpused;
	214	}
	215
[1029]	216	m_qpDelta[qpBase] = sliceQpnext - qpBase;
[313]	217	}
	218
	219	Void TComTrQuant::initSliceQpDelta()
	220	{
	221	for(Int qp=0; qp<=MAX_QP; qp++)
	222	{
	223	m_qpDelta[qp] = qp < 17 ? 0 : 1;
	224	}
	225	}
	226
	227	Void TComTrQuant::clearSliceARLCnt()
[1029]	228	{
[313]	229	memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
	230	memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
	231	}
	232	#endif
	233
	234
	235
	236	#if MATRIX_MULT
	237	/** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
	238	* \param block pointer to input data (residual)
	239	* \param coeff pointer to output data (transform coefficients)
	240	* \param uiStride stride of input data
	241	* \param uiTrSize transform size (uiTrSize x uiTrSize)
	242	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	243	*/
[1029]	244	Void xTr(Int bitDepth, Pel block, TCoeff coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
[313]	245	{
[1029]	246	UInt i,j,k;
	247	TCoeff iSum;
	248	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
	249	const TMatrixCoeff *iT;
[313]	250	UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
	251
	252	if (uiTrSize==4)
	253	{
[1029]	254	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
[313]	255	}
	256	else if (uiTrSize==8)
	257	{
[1029]	258	iT = g_aiT8[TRANSFORM_FORWARD][0];
[313]	259	}
	260	else if (uiTrSize==16)
	261	{
[1029]	262	iT = g_aiT16[TRANSFORM_FORWARD][0];
[313]	263	}
	264	else if (uiTrSize==32)
	265	{
[1029]	266	iT = g_aiT32[TRANSFORM_FORWARD][0];
[313]	267	}
	268	else
	269	{
	270	assert(0);
	271	}
	272
[1029]	273	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
[313]	274
[1029]	275	const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
	276	const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
	277	const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
	278	const Int add_2nd = 1<<(shift_2nd-1);
	279
[313]	280	/* Horizontal transform */
	281
	282	for (i=0; i<uiTrSize; i++)
	283	{
	284	for (j=0; j<uiTrSize; j++)
	285	{
	286	iSum = 0;
	287	for (k=0; k<uiTrSize; k++)
	288	{
	289	iSum += iT[iuiTrSize+k]block[j*uiStride+k];
	290	}
	291	tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
	292	}
	293	}
[1029]	294
[313]	295	/* Vertical transform */
[1029]	296	for (i=0; i<uiTrSize; i++)
[313]	297	{
	298	for (j=0; j<uiTrSize; j++)
	299	{
	300	iSum = 0;
	301	for (k=0; k<uiTrSize; k++)
	302	{
[1029]	303	iSum += iT[iuiTrSize+k]tmp[j*uiTrSize+k];
[313]	304	}
[1029]	305	coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
[313]	306	}
	307	}
	308	}
	309
	310	/** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
	311	* \param coeff pointer to input data (transform coefficients)
	312	* \param block pointer to output data (residual)
	313	* \param uiStride stride of output data
	314	* \param uiTrSize transform size (uiTrSize x uiTrSize)
	315	* \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
	316	*/
[1029]	317	Void xITr(Int bitDepth, TCoeff coeff, Pel block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
[313]	318	{
[1029]	319	UInt i,j,k;
	320	TCoeff iSum;
	321	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
	322	const TMatrixCoeff *iT;
	323
[313]	324	if (uiTrSize==4)
	325	{
[1029]	326	iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
[313]	327	}
	328	else if (uiTrSize==8)
	329	{
[1029]	330	iT = g_aiT8[TRANSFORM_INVERSE][0];
[313]	331	}
	332	else if (uiTrSize==16)
	333	{
[1029]	334	iT = g_aiT16[TRANSFORM_INVERSE][0];
[313]	335	}
	336	else if (uiTrSize==32)
	337	{
[1029]	338	iT = g_aiT32[TRANSFORM_INVERSE][0];
[313]	339	}
	340	else
	341	{
	342	assert(0);
	343	}
[1029]	344
	345	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
	346
	347	const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
	348	const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
	349	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
	350	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
	351	assert(shift_2nd>=0);
	352	const Int add_1st = 1<<(shift_1st-1);
	353	const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
	354
[313]	355	/* Horizontal transform */
	356	for (i=0; i<uiTrSize; i++)
[1029]	357	{
[313]	358	for (j=0; j<uiTrSize; j++)
	359	{
	360	iSum = 0;
	361	for (k=0; k<uiTrSize; k++)
[1029]	362	{
	363	iSum += iT[kuiTrSize+i]coeff[k*uiTrSize+j];
[313]	364	}
[1029]	365
	366	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
	367	tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
[313]	368	}
	369	}
[1029]	370
[313]	371	/* Vertical transform */
	372	for (i=0; i<uiTrSize; i++)
[1029]	373	{
[313]	374	for (j=0; j<uiTrSize; j++)
	375	{
	376	iSum = 0;
	377	for (k=0; k<uiTrSize; k++)
[1029]	378	{
[313]	379	iSum += iT[kuiTrSize+j]tmp[i*uiTrSize+k];
	380	}
[1029]	381
	382	block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
[313]	383	}
	384	}
	385	}
	386
[1029]	387	#endif //MATRIX_MULT
[313]	388
[1029]	389
[313]	390	/** 4x4 forward transform implemented using partial butterfly structure (1D)
	391	* \param src input data (residual)
	392	* \param dst output data (transform coefficients)
	393	* \param shift specifies right shift after 1D transform
[1260]	394	* \param line
[313]	395	*/
[1029]	396	Void partialButterfly4(TCoeff src, TCoeff dst, Int shift, Int line)
[313]	397	{
	398	Int j;
[1029]	399	TCoeff E[2],O[2];
	400	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	401
	402	for (j=0; j<line; j++)
[1029]	403	{
[313]	404	/* E and O */
	405	E[0] = src[0] + src[3];
	406	O[0] = src[0] - src[3];
	407	E[1] = src[1] + src[2];
	408	O[1] = src[1] - src[2];
	409
[1029]	410	dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]E[1] + add)>>shift;
	411	dst[2line] = (g_aiT4[TRANSFORM_FORWARD][2][0]E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
	412	dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]O[1] + add)>>shift;
	413	dst[3line] = (g_aiT4[TRANSFORM_FORWARD][3][0]O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
[313]	414
	415	src += 4;
	416	dst ++;
	417	}
	418	}
	419
[1029]	420	// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
[313]	421	// give identical results
[1029]	422	Void fastForwardDst(TCoeff block, TCoeff coeff, Int shift) // input block, output coeff
[313]	423	{
[1029]	424	Int i;
	425	TCoeff c[4];
	426	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	427	for (i=0; i<4; i++)
	428	{
	429	// Intermediate Variables
[1029]	430	c[0] = block[4*i+0];
	431	c[1] = block[4*i+1];
	432	c[2] = block[4*i+2];
	433	c[3] = block[4*i+3];
[313]	434
[1029]	435	for (Int row = 0; row < 4; row++)
	436	{
	437	TCoeff result = 0;
	438	for (Int column = 0; column < 4; column++)
[1246]	439	{
[1029]	440	result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
[1246]	441	}
[1029]	442
	443	coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
	444	}
[313]	445	}
	446	}
	447
[1029]	448	Void fastInverseDst(TCoeff tmp, TCoeff block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block
[313]	449	{
[1029]	450	Int i;
	451	TCoeff c[4];
	452	TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	453	for (i=0; i<4; i++)
[1029]	454	{
[313]	455	// Intermediate Variables
[1029]	456	c[0] = tmp[ i];
	457	c[1] = tmp[4 +i];
	458	c[2] = tmp[8 +i];
	459	c[3] = tmp[12+i];
[313]	460
[1029]	461	for (Int column = 0; column < 4; column++)
	462	{
	463	TCoeff &result = block[(i * 4) + column];
	464
	465	result = 0;
	466	for (Int row = 0; row < 4; row++)
[1246]	467	{
[1029]	468	result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
[1246]	469	}
[1029]	470
	471	result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
	472	}
[313]	473	}
	474	}
	475
[1029]	476	/** 4x4 inverse transform implemented using partial butterfly structure (1D)
	477	* \param src input data (transform coefficients)
	478	* \param dst output data (residual)
	479	* \param shift specifies right shift after 1D transform
[1260]	480	* \param line
	481	* \param outputMinimum minimum for clipping
	482	* \param outputMaximum maximum for clipping
[1029]	483	*/
	484	Void partialButterflyInverse4(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
[313]	485	{
	486	Int j;
[1029]	487	TCoeff E[2],O[2];
	488	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	489
	490	for (j=0; j<line; j++)
[1029]	491	{
	492	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	493	O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]src[3*line];
	494	O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]src[3*line];
	495	E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]src[2*line];
	496	E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]src[2*line];
[313]	497
	498	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
[1029]	499	dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
	500	dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
	501	dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
	502	dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
	503
[313]	504	src ++;
	505	dst += 4;
	506	}
	507	}
	508
[1029]	509	/** 8x8 forward transform implemented using partial butterfly structure (1D)
	510	* \param src input data (residual)
	511	* \param dst output data (transform coefficients)
	512	* \param shift specifies right shift after 1D transform
[1260]	513	* \param line
[1029]	514	*/
	515	Void partialButterfly8(TCoeff src, TCoeff dst, Int shift, Int line)
[313]	516	{
	517	Int j,k;
[1029]	518	TCoeff E[4],O[4];
	519	TCoeff EE[2],EO[2];
	520	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	521
	522	for (j=0; j<line; j++)
[1029]	523	{
[313]	524	/* E and O*/
	525	for (k=0;k<4;k++)
	526	{
	527	E[k] = src[k] + src[7-k];
	528	O[k] = src[k] - src[7-k];
[1029]	529	}
[313]	530	/* EE and EO */
[1029]	531	EE[0] = E[0] + E[3];
[313]	532	EO[0] = E[0] - E[3];
	533	EE[1] = E[1] + E[2];
	534	EO[1] = E[1] - E[2];
	535
[1029]	536	dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]EE[1] + add)>>shift;
	537	dst[4line] = (g_aiT8[TRANSFORM_FORWARD][4][0]EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
	538	dst[2line] = (g_aiT8[TRANSFORM_FORWARD][2][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
	539	dst[6line] = (g_aiT8[TRANSFORM_FORWARD][6][0]EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
[313]	540
[1029]	541	dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]O[3] + add)>>shift;
	542	dst[3line] = (g_aiT8[TRANSFORM_FORWARD][3][0]O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
	543	dst[5line] = (g_aiT8[TRANSFORM_FORWARD][5][0]O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
	544	dst[7line] = (g_aiT8[TRANSFORM_FORWARD][7][0]O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
[313]	545
	546	src += 8;
	547	dst ++;
	548	}
	549	}
	550
[1029]	551	/** 8x8 inverse transform implemented using partial butterfly structure (1D)
	552	* \param src input data (transform coefficients)
	553	* \param dst output data (residual)
	554	* \param shift specifies right shift after 1D transform
[1260]	555	* \param line
	556	* \param outputMinimum minimum for clipping
	557	* \param outputMaximum maximum for clipping
[1029]	558	*/
	559	Void partialButterflyInverse8(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
[313]	560	{
	561	Int j,k;
[1029]	562	TCoeff E[4],O[4];
	563	TCoeff EE[2],EO[2];
	564	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	565
[1029]	566	for (j=0; j<line; j++)
	567	{
[313]	568	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	569	for (k=0;k<4;k++)
	570	{
[1029]	571	O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]src[3*line] +
	572	g_aiT8[TRANSFORM_INVERSE][ 5][k]src[5line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]src[7line];
[313]	573	}
	574
[1029]	575	EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][0]src[ 6line ];
	576	EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]src[ 2line ] + g_aiT8[TRANSFORM_INVERSE][6][1]src[ 6line ];
	577	EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]src[ 4*line ];
	578	EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]src[ 4*line ];
[313]	579
[1029]	580	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
[313]	581	E[0] = EE[0] + EO[0];
	582	E[3] = EE[0] - EO[0];
	583	E[1] = EE[1] + EO[1];
	584	E[2] = EE[1] - EO[1];
	585	for (k=0;k<4;k++)
	586	{
[1029]	587	dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
	588	dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
	589	}
[313]	590	src ++;
	591	dst += 8;
	592	}
	593	}
	594
[1029]	595	/** 16x16 forward transform implemented using partial butterfly structure (1D)
	596	* \param src input data (residual)
	597	* \param dst output data (transform coefficients)
	598	* \param shift specifies right shift after 1D transform
[1260]	599	* \param line
[1029]	600	*/
	601	Void partialButterfly16(TCoeff src, TCoeff dst, Int shift, Int line)
[313]	602	{
	603	Int j,k;
[1029]	604	TCoeff E[8],O[8];
	605	TCoeff EE[4],EO[4];
	606	TCoeff EEE[2],EEO[2];
	607	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	608
[1029]	609	for (j=0; j<line; j++)
	610	{
[313]	611	/* E and O*/
	612	for (k=0;k<8;k++)
	613	{
	614	E[k] = src[k] + src[15-k];
	615	O[k] = src[k] - src[15-k];
[1029]	616	}
[313]	617	/* EE and EO */
	618	for (k=0;k<4;k++)
	619	{
	620	EE[k] = E[k] + E[7-k];
	621	EO[k] = E[k] - E[7-k];
	622	}
	623	/* EEE and EEO */
[1029]	624	EEE[0] = EE[0] + EE[3];
[313]	625	EEO[0] = EE[0] - EE[3];
	626	EEE[1] = EE[1] + EE[2];
	627	EEO[1] = EE[1] - EE[2];
	628
[1029]	629	dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]EEE[1] + add)>>shift;
	630	dst[ 8line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
	631	dst[ 4line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
	632	dst[ 12line] = (g_aiT16[TRANSFORM_FORWARD][12][0]EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
[313]	633
	634	for (k=2;k<16;k+=4)
	635	{
[1029]	636	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
	637	g_aiT16[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]EO[3] + add)>>shift;
[313]	638	}
	639
	640	for (k=1;k<16;k+=2)
	641	{
[1029]	642	dst[ kline ] = (g_aiT16[TRANSFORM_FORWARD][k][0]O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
	643	g_aiT16[TRANSFORM_FORWARD][k][2]O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]O[3] +
	644	g_aiT16[TRANSFORM_FORWARD][k][4]O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]O[5] +
	645	g_aiT16[TRANSFORM_FORWARD][k][6]O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]O[7] + add)>>shift;
[313]	646	}
	647
	648	src += 16;
[1029]	649	dst ++;
[313]	650
	651	}
	652	}
	653
[1029]	654	/** 16x16 inverse transform implemented using partial butterfly structure (1D)
[1260]	655	* \param src input data (transform coefficients)
	656	* \param dst output data (residual)
	657	* \param shift specifies right shift after 1D transform
	658	* \param line
	659	* \param outputMinimum minimum for clipping
	660	* \param outputMaximum maximum for clipping
[1029]	661	*/
	662	Void partialButterflyInverse16(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
[313]	663	{
	664	Int j,k;
[1029]	665	TCoeff E[8],O[8];
	666	TCoeff EE[4],EO[4];
	667	TCoeff EEE[2],EEO[2];
	668	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	669
	670	for (j=0; j<line; j++)
[1029]	671	{
[313]	672	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	673	for (k=0;k<8;k++)
	674	{
[1029]	675	O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]src[ 3*line] +
	676	g_aiT16[TRANSFORM_INVERSE][ 5][k]src[ 5line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]src[ 7line] +
	677	g_aiT16[TRANSFORM_INVERSE][ 9][k]src[ 9line] + g_aiT16[TRANSFORM_INVERSE][11][k]src[11line] +
	678	g_aiT16[TRANSFORM_INVERSE][13][k]src[13line] + g_aiT16[TRANSFORM_INVERSE][15][k]src[15line];
[313]	679	}
	680	for (k=0;k<4;k++)
	681	{
[1029]	682	EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]src[ 2line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]src[ 6line] +
	683	g_aiT16[TRANSFORM_INVERSE][10][k]src[10line] + g_aiT16[TRANSFORM_INVERSE][14][k]src[14line];
[313]	684	}
[1029]	685	EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][0]src[ 12line ];
	686	EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]src[ 8*line ];
	687	EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]src[ 4line ] + g_aiT16[TRANSFORM_INVERSE][12][1]src[ 12line ];
	688	EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]src[ 8*line ];
[313]	689
[1029]	690	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
[313]	691	for (k=0;k<2;k++)
	692	{
	693	EE[k] = EEE[k] + EEO[k];
	694	EE[k+2] = EEE[1-k] - EEO[1-k];
[1029]	695	}
[313]	696	for (k=0;k<4;k++)
	697	{
	698	E[k] = EE[k] + EO[k];
	699	E[k+4] = EE[3-k] - EO[3-k];
[1029]	700	}
[313]	701	for (k=0;k<8;k++)
	702	{
[1029]	703	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
	704	dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
	705	}
	706	src ++;
[313]	707	dst += 16;
	708	}
	709	}
	710
[1029]	711	/** 32x32 forward transform implemented using partial butterfly structure (1D)
	712	* \param src input data (residual)
	713	* \param dst output data (transform coefficients)
	714	* \param shift specifies right shift after 1D transform
[1260]	715	* \param line
[1029]	716	*/
	717	Void partialButterfly32(TCoeff src, TCoeff dst, Int shift, Int line)
[313]	718	{
	719	Int j,k;
[1029]	720	TCoeff E[16],O[16];
	721	TCoeff EE[8],EO[8];
	722	TCoeff EEE[4],EEO[4];
	723	TCoeff EEEE[2],EEEO[2];
	724	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	725
	726	for (j=0; j<line; j++)
[1029]	727	{
[313]	728	/* E and O*/
	729	for (k=0;k<16;k++)
	730	{
	731	E[k] = src[k] + src[31-k];
	732	O[k] = src[k] - src[31-k];
[1029]	733	}
[313]	734	/* EE and EO */
	735	for (k=0;k<8;k++)
	736	{
	737	EE[k] = E[k] + E[15-k];
	738	EO[k] = E[k] - E[15-k];
	739	}
	740	/* EEE and EEO */
	741	for (k=0;k<4;k++)
	742	{
	743	EEE[k] = EE[k] + EE[7-k];
	744	EEO[k] = EE[k] - EE[7-k];
	745	}
	746	/* EEEE and EEEO */
[1029]	747	EEEE[0] = EEE[0] + EEE[3];
[313]	748	EEEO[0] = EEE[0] - EEE[3];
	749	EEEE[1] = EEE[1] + EEE[2];
	750	EEEO[1] = EEE[1] - EEE[2];
	751
[1029]	752	dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]EEEE[1] + add)>>shift;
	753	dst[ 16line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
	754	dst[ 8line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
	755	dst[ 24line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
[313]	756	for (k=4;k<32;k+=8)
	757	{
[1029]	758	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
	759	g_aiT32[TRANSFORM_FORWARD][k][2]EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EEO[3] + add)>>shift;
	760	}
[313]	761	for (k=2;k<32;k+=4)
	762	{
[1029]	763	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][0]EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
	764	g_aiT32[TRANSFORM_FORWARD][k][2]EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]EO[3] +
	765	g_aiT32[TRANSFORM_FORWARD][k][4]EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]EO[5] +
	766	g_aiT32[TRANSFORM_FORWARD][k][6]EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]EO[7] + add)>>shift;
	767	}
[313]	768	for (k=1;k<32;k+=2)
	769	{
[1029]	770	dst[ kline ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
	771	g_aiT32[TRANSFORM_FORWARD][k][ 2]O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]O[ 3] +
	772	g_aiT32[TRANSFORM_FORWARD][k][ 4]O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]O[ 5] +
	773	g_aiT32[TRANSFORM_FORWARD][k][ 6]O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]O[ 7] +
	774	g_aiT32[TRANSFORM_FORWARD][k][ 8]O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]O[ 9] +
	775	g_aiT32[TRANSFORM_FORWARD][k][10]O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]O[11] +
	776	g_aiT32[TRANSFORM_FORWARD][k][12]O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]O[13] +
	777	g_aiT32[TRANSFORM_FORWARD][k][14]O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]O[15] + add)>>shift;
[313]	778	}
[1029]	779
[313]	780	src += 32;
	781	dst ++;
	782	}
	783	}
	784
[1029]	785	/** 32x32 inverse transform implemented using partial butterfly structure (1D)
	786	* \param src input data (transform coefficients)
	787	* \param dst output data (residual)
	788	* \param shift specifies right shift after 1D transform
[1260]	789	* \param line
	790	* \param outputMinimum minimum for clipping
	791	* \param outputMaximum maximum for clipping
[1029]	792	*/
	793	Void partialButterflyInverse32(TCoeff src, TCoeff dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
[313]	794	{
	795	Int j,k;
[1029]	796	TCoeff E[16],O[16];
	797	TCoeff EE[8],EO[8];
	798	TCoeff EEE[4],EEO[4];
	799	TCoeff EEEE[2],EEEO[2];
	800	TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
[313]	801
	802	for (j=0; j<line; j++)
[1029]	803	{
[313]	804	/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
	805	for (k=0;k<16;k++)
	806	{
[1029]	807	O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]src[ 3*line ] +
	808	g_aiT32[TRANSFORM_INVERSE][ 5][k]src[ 5line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]src[ 7line ] +
	809	g_aiT32[TRANSFORM_INVERSE][ 9][k]src[ 9line ] + g_aiT32[TRANSFORM_INVERSE][11][k]src[ 11line ] +
	810	g_aiT32[TRANSFORM_INVERSE][13][k]src[ 13line ] + g_aiT32[TRANSFORM_INVERSE][15][k]src[ 15line ] +
	811	g_aiT32[TRANSFORM_INVERSE][17][k]src[ 17line ] + g_aiT32[TRANSFORM_INVERSE][19][k]src[ 19line ] +
	812	g_aiT32[TRANSFORM_INVERSE][21][k]src[ 21line ] + g_aiT32[TRANSFORM_INVERSE][23][k]src[ 23line ] +
	813	g_aiT32[TRANSFORM_INVERSE][25][k]src[ 25line ] + g_aiT32[TRANSFORM_INVERSE][27][k]src[ 27line ] +
	814	g_aiT32[TRANSFORM_INVERSE][29][k]src[ 29line ] + g_aiT32[TRANSFORM_INVERSE][31][k]src[ 31line ];
[313]	815	}
	816	for (k=0;k<8;k++)
	817	{
[1029]	818	EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]src[ 2line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]src[ 6line ] +
	819	g_aiT32[TRANSFORM_INVERSE][10][k]src[ 10line ] + g_aiT32[TRANSFORM_INVERSE][14][k]src[ 14line ] +
	820	g_aiT32[TRANSFORM_INVERSE][18][k]src[ 18line ] + g_aiT32[TRANSFORM_INVERSE][22][k]src[ 22line ] +
	821	g_aiT32[TRANSFORM_INVERSE][26][k]src[ 26line ] + g_aiT32[TRANSFORM_INVERSE][30][k]src[ 30line ];
[313]	822	}
	823	for (k=0;k<4;k++)
	824	{
[1029]	825	EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]src[ 4line ] + g_aiT32[TRANSFORM_INVERSE][12][k]src[ 12line ] +
	826	g_aiT32[TRANSFORM_INVERSE][20][k]src[ 20line ] + g_aiT32[TRANSFORM_INVERSE][28][k]src[ 28line ];
[313]	827	}
[1029]	828	EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][0]src[ 24line ];
	829	EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]src[ 8line ] + g_aiT32[TRANSFORM_INVERSE][24][1]src[ 24line ];
	830	EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]src[ 16*line ];
	831	EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]src[ 16*line ];
[313]	832
	833	/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
	834	EEE[0] = EEEE[0] + EEEO[0];
	835	EEE[3] = EEEE[0] - EEEO[0];
	836	EEE[1] = EEEE[1] + EEEO[1];
[1029]	837	EEE[2] = EEEE[1] - EEEO[1];
[313]	838	for (k=0;k<4;k++)
	839	{
	840	EE[k] = EEE[k] + EEO[k];
	841	EE[k+4] = EEE[3-k] - EEO[3-k];
[1029]	842	}
[313]	843	for (k=0;k<8;k++)
	844	{
	845	E[k] = EE[k] + EO[k];
	846	E[k+8] = EE[7-k] - EO[7-k];
[1029]	847	}
[313]	848	for (k=0;k<16;k++)
	849	{
[1029]	850	dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
	851	dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
[313]	852	}
	853	src ++;
	854	dst += 32;
	855	}
	856	}
	857
	858	/** MxN forward transform (2D)
[1260]	859	* \param bitDepth [in] bit depth
	860	* \param block [in] residual block
	861	* \param coeff [out] transform coefficients
	862	* \param iWidth [in] width of transform
	863	* \param iHeight [in] height of transform
	864	* \param useDST [in]
	865	* \param maxTrDynamicRange [in]
	866
[313]	867	*/
[1029]	868	Void xTrMxN(Int bitDepth, TCoeff block, TCoeff coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
[313]	869	{
[1029]	870	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
[313]	871
[1029]	872	const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
	873	const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
[313]	874
[1029]	875	assert(shift_1st >= 0);
	876	assert(shift_2nd >= 0);
	877
	878	TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
	879
	880	switch (iWidth)
[313]	881	{
[1029]	882	case 4:
	883	{
	884	if ((iHeight == 4) && useDST) // Check for DCT or DST
	885	{
	886	fastForwardDst( block, tmp, shift_1st );
	887	}
[1246]	888	else
	889	{
	890	partialButterfly4 ( block, tmp, shift_1st, iHeight );
	891	}
[1029]	892	}
	893	break;
[313]	894
[1029]	895	case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break;
	896	case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break;
	897	case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break;
	898	default:
	899	assert(0); exit (1); break;
[313]	900	}
[1029]	901
	902	switch (iHeight)
[313]	903	{
[1029]	904	case 4:
	905	{
	906	if ((iWidth == 4) && useDST) // Check for DCT or DST
	907	{
	908	fastForwardDst( tmp, coeff, shift_2nd );
	909	}
[1246]	910	else
	911	{
	912	partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
	913	}
[1029]	914	}
	915	break;
	916
	917	case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break;
	918	case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break;
	919	case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break;
	920	default:
	921	assert(0); exit (1); break;
[313]	922	}
	923	}
[1029]	924
	925
[313]	926	/** MxN inverse transform (2D)
[1260]	927	* \param bitDepth [in] bit depth
	928	* \param coeff [in] transform coefficients
	929	* \param block [out] residual block
	930	* \param iWidth [in] width of transform
	931	* \param iHeight [in] height of transform
	932	* \param useDST [in]
	933	* \param maxTrDynamicRange [in]
[313]	934	*/
[1029]	935	Void xITrMxN(Int bitDepth, TCoeff coeff, TCoeff block, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
[313]	936	{
[1029]	937	static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
[313]	938
[1029]	939	Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
	940	Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
	941	const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
	942	const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
	943
	944	assert(shift_1st >= 0);
	945	assert(shift_2nd >= 0);
	946
	947	TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
	948
	949	switch (iHeight)
[313]	950	{
[1029]	951	case 4:
	952	{
	953	if ((iWidth == 4) && useDST) // Check for DCT or DST
	954	{
	955	fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
	956	}
[1246]	957	else
	958	{
	959	partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
	960	}
[1029]	961	}
	962	break;
	963
	964	case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
	965	case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
	966	case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
	967
	968	default:
	969	assert(0); exit (1); break;
[313]	970	}
[1029]	971
	972	switch (iWidth)
[313]	973	{
[1029]	974	// Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
	975	case 4:
	976	{
	977	if ((iHeight == 4) && useDST) // Check for DCT or DST
	978	{
	979	fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
	980	}
[1246]	981	else
	982	{
	983	partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
	984	}
[1029]	985	}
	986	break;
	987
	988	case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
	989	case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
	990	case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
	991
	992	default:
	993	assert(0); exit (1); break;
[313]	994	}
	995	}
	996
	997
[1029]	998	// To minimize the distortion only. No rate is considered.
	999	Void TComTrQuant::signBitHidingHDQ( const ComponentID compID, TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters )
[313]	1000	{
[1029]	1001	const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
	1002	const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
	1003	const UInt groupSize = 1 << MLS_CG_SIZE;
	1004
	1005	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
	1006	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
	1007
[313]	1008	Int lastCG = -1;
	1009	Int absSum = 0 ;
	1010	Int n ;
	1011
[1029]	1012	for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
[313]	1013	{
[1029]	1014	Int subPos = subSet << MLS_CG_SIZE;
	1015	Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
[313]	1016	absSum = 0 ;
	1017
[1029]	1018	for(n = groupSize-1; n >= 0; --n )
[313]	1019	{
[1029]	1020	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
[313]	1021	{
	1022	lastNZPosInCG = n;
	1023	break;
	1024	}
	1025	}
	1026
[1029]	1027	for(n = 0; n <groupSize; n++ )
[313]	1028	{
[1029]	1029	if( pQCoef[ codingParameters.scan[ n + subPos ]] )
[313]	1030	{
	1031	firstNZPosInCG = n;
	1032	break;
	1033	}
	1034	}
	1035
	1036	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
	1037	{
[1029]	1038	absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
[313]	1039	}
	1040
[1029]	1041	if(lastNZPosInCG>=0 && lastCG==-1)
[313]	1042	{
[1029]	1043	lastCG = 1 ;
[313]	1044	}
	1045
	1046	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
	1047	{
[1029]	1048	UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
[313]	1049	if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
	1050	{
[1029]	1051	TCoeff curCost = std::numeric_limits<TCoeff>::max();
	1052	TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
	1053	Int minPos =-1, finalChange=0, curChange=0;
	1054
	1055	for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
[313]	1056	{
[1029]	1057	UInt blkPos = codingParameters.scan[ n+subPos ];
[313]	1058	if(pQCoef[ blkPos ] != 0 )
	1059	{
	1060	if(deltaU[blkPos]>0)
	1061	{
[1029]	1062	curCost = - deltaU[blkPos];
[313]	1063	curChange=1 ;
	1064	}
[1029]	1065	else
[313]	1066	{
	1067	//curChange =-1;
	1068	if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
	1069	{
[1029]	1070	curCost = std::numeric_limits<TCoeff>::max();
[313]	1071	}
	1072	else
	1073	{
[1029]	1074	curCost = deltaU[blkPos];
[313]	1075	curChange =-1;
	1076	}
	1077	}
	1078	}
	1079	else
	1080	{
	1081	if(n<firstNZPosInCG)
	1082	{
	1083	UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
	1084	if(thisSignBit != signbit )
	1085	{
[1029]	1086	curCost = std::numeric_limits<TCoeff>::max();
[313]	1087	}
	1088	else
[1029]	1089	{
[313]	1090	curCost = - (deltaU[blkPos]) ;
	1091	curChange = 1 ;
	1092	}
	1093	}
	1094	else
	1095	{
	1096	curCost = - (deltaU[blkPos]) ;
	1097	curChange = 1 ;
	1098	}
	1099	}
	1100
	1101	if( curCost<minCostInc)
	1102	{
	1103	minCostInc = curCost ;
	1104	finalChange = curChange ;
	1105	minPos = blkPos ;
	1106	}
	1107	} //CG loop
	1108
[1029]	1109	if(pQCoef[minPos] == entropyCodingMaximum \|\| pQCoef[minPos] == entropyCodingMinimum)
[313]	1110	{
	1111	finalChange = -1;
	1112	}
	1113
	1114	if(pCoef[minPos]>=0)
	1115	{
[1029]	1116	pQCoef[minPos] += finalChange ;
[313]	1117	}
[1029]	1118	else
	1119	{
[313]	1120	pQCoef[minPos] -= finalChange ;
[1029]	1121	}
[313]	1122	} // Hide
	1123	}
[1029]	1124	if(lastCG==1)
[313]	1125	{
	1126	lastCG=0 ;
	1127	}
	1128	} // TU loop
	1129
	1130	return;
	1131	}
	1132
[1029]	1133
	1134	Void TComTrQuant::xQuant( TComTU &rTu,
	1135	TCoeff * pSrc,
	1136	TCoeff * pDes,
[313]	1137	#if ADAPTIVE_QP_SELECTION
[1029]	1138	TCoeff *pArlDes,
[313]	1139	#endif
[1029]	1140	TCoeff &uiAbsSum,
	1141	const ComponentID compID,
	1142	const QpParam &cQP )
[313]	1143	{
[1029]	1144	const TComRectangle &rect = rTu.getRect(compID);
	1145	const UInt uiWidth = rect.width;
	1146	const UInt uiHeight = rect.height;
	1147	TComDataCU* pcCU = rTu.getCU();
	1148	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	1149
	1150	TCoeff* piCoef = pSrc;
[313]	1151	TCoeff* piQCoef = pDes;
	1152	#if ADAPTIVE_QP_SELECTION
[1029]	1153	TCoeff* piArlCCoef = pArlDes;
[313]	1154	#endif
[1029]	1155
	1156	const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
	1157
	1158	Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
	1159	if ( useRDOQ && (isLuma(compID) \|\| RDOQ_CHROMA) )
[313]	1160	{
	1161	#if ADAPTIVE_QP_SELECTION
[1029]	1162	xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
[313]	1163	#else
[1029]	1164	xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
[313]	1165	#endif
	1166	}
	1167	else
	1168	{
[1029]	1169	TUEntropyCodingParameters codingParameters;
	1170	getTUEntropyCodingParameters(codingParameters, rTu, compID);
[313]	1171
[1029]	1172	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
	1173	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
[313]	1174
[1029]	1175	TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
[313]	1176
[1029]	1177	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
[313]	1178
[1029]	1179	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
	1180	assert(scalingListType < SCALING_LIST_NUM);
	1181	Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
	1182
	1183	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
	1184	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
	1185
	1186	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
	1187	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
	1188	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
	1189	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
	1190	*/
	1191
	1192	// Represents scaling through forward transform
	1193	Int iTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
	1194	if (useTransformSkip && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
[313]	1195	{
[1029]	1196	iTransformShift = std::max<Int>(0, iTransformShift);
[313]	1197	}
[1029]	1198
	1199	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
	1200	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
	1201
	1202	#if ADAPTIVE_QP_SELECTION
	1203	Int iQBitsC = MAX_INT;
	1204	Int iAddC = MAX_INT;
	1205
	1206	if (m_bUseAdaptQpSelect)
[313]	1207	{
[1029]	1208	iQBitsC = iQBits - ARL_C_PRECISION;
	1209	iAddC = 1 << (iQBitsC-1);
[313]	1210	}
	1211	#endif
	1212
[1029]	1213	const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
	1214	const Int qBits8 = iQBits - 8;
[313]	1215
[1029]	1216	for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
	1217	{
	1218	const TCoeff iLevel = piCoef[uiBlockPos];
	1219	const TCoeff iSign = (iLevel < 0 ? -1: 1);
[313]	1220
[1029]	1221	const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
[313]	1222
	1223	#if ADAPTIVE_QP_SELECTION
	1224	if( m_bUseAdaptQpSelect )
	1225	{
[1029]	1226	piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
[313]	1227	}
	1228	#endif
[1029]	1229
	1230	const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
	1231	deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
	1232
	1233	uiAbsSum += quantisedMagnitude;
	1234	const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
	1235
	1236	piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
[313]	1237	} // for n
[1029]	1238
[313]	1239	if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
	1240	{
[1029]	1241	if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
[313]	1242	{
[1029]	1243	signBitHidingHDQ( compID, piQCoef, piCoef, deltaU, codingParameters ) ;
[313]	1244	}
	1245	}
	1246	} //if RDOQ
	1247	//return;
	1248	}
	1249
[1029]	1250	Void TComTrQuant::xDeQuant( TComTU &rTu,
	1251	const TCoeff * pSrc,
	1252	TCoeff * pDes,
	1253	const ComponentID compID,
	1254	const QpParam &cQP )
[313]	1255	{
[1029]	1256	assert(compID<MAX_NUM_COMPONENT);
[313]	1257
[1029]	1258	TComDataCU *pcCU = rTu.getCU();
	1259	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	1260	const TComRectangle &rect = rTu.getRect(compID);
	1261	const UInt uiWidth = rect.width;
	1262	const UInt uiHeight = rect.height;
	1263	const TCoeff *const piQCoef = pSrc;
	1264	TCoeff *const piCoef = pDes;
	1265	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
	1266	const UInt numSamplesInBlock = uiWidth*uiHeight;
	1267	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
	1268	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
	1269	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
	1270	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
[313]	1271
[1029]	1272	assert (scalingListType < SCALING_LIST_NUM);
	1273	assert ( uiWidth <= m_uiMaxTrSize );
[313]	1274
[1029]	1275	// Represents scaling through forward transform
	1276	const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
	1277	const Int originalTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
	1278	const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
[313]	1279
[1029]	1280	const Int QP_per = cQP.per;
	1281	const Int QP_rem = cQP.rem;
	1282
	1283	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
	1284
	1285	if(enableScalingLists)
[313]	1286	{
[1029]	1287	//from the dequantisation equation:
	1288	//iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
	1289	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift
	1290	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
	1291	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
[313]	1292
[1029]	1293	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
	1294	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
	1295
	1296	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
	1297
	1298	if(rightShift > 0)
[313]	1299	{
[1029]	1300	const Intermediate_Int iAdd = 1 << (rightShift - 1);
	1301
	1302	for( Int n = 0; n < numSamplesInBlock; n++ )
[313]	1303	{
[1029]	1304	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
	1305	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
	1306
	1307	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
[313]	1308	}
	1309	}
	1310	else
	1311	{
[1029]	1312	const Int leftShift = -rightShift;
	1313
	1314	for( Int n = 0; n < numSamplesInBlock; n++ )
[313]	1315	{
[1029]	1316	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
	1317	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
	1318
	1319	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
[313]	1320	}
	1321	}
	1322	}
	1323	else
	1324	{
[1029]	1325	const Int scale = g_invQuantScales[QP_rem];
	1326	const Int scaleBits = (IQUANT_SHIFT + 1) ;
[313]	1327
[1029]	1328	//from the dequantisation equation:
	1329	//iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
	1330	//(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift
	1331	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
	1332	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
	1333	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
	1334
	1335	if (rightShift > 0)
[313]	1336	{
[1029]	1337	const Intermediate_Int iAdd = 1 << (rightShift - 1);
	1338
	1339	for( Int n = 0; n < numSamplesInBlock; n++ )
	1340	{
	1341	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
	1342	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
	1343
	1344	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
	1345	}
[313]	1346	}
[1029]	1347	else
	1348	{
	1349	const Int leftShift = -rightShift;
	1350
	1351	for( Int n = 0; n < numSamplesInBlock; n++ )
	1352	{
	1353	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
	1354	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
	1355
	1356	piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
	1357	}
	1358	}
[313]	1359	}
	1360	}
	1361
[1029]	1362
	1363	Void TComTrQuant::init( UInt uiMaxTrSize,
	1364	Bool bUseRDOQ,
	1365	Bool bUseRDOQTS,
	1366	Bool bEnc,
	1367	Bool useTransformSkipFast
[313]	1368	#if ADAPTIVE_QP_SELECTION
[1029]	1369	, Bool bUseAdaptQpSelect
[313]	1370	#endif
	1371	)
	1372	{
	1373	m_uiMaxTrSize = uiMaxTrSize;
	1374	m_bEnc = bEnc;
[1029]	1375	m_useRDOQ = bUseRDOQ;
	1376	m_useRDOQTS = bUseRDOQTS;
[313]	1377	#if ADAPTIVE_QP_SELECTION
	1378	m_bUseAdaptQpSelect = bUseAdaptQpSelect;
	1379	#endif
	1380	m_useTransformSkipFast = useTransformSkipFast;
	1381	}
	1382
[1029]	1383
	1384	Void TComTrQuant::transformNxN( TComTU & rTu,
	1385	const ComponentID compID,
	1386	Pel * pcResidual,
	1387	const UInt uiStride,
	1388	TCoeff * rpcCoeff,
[313]	1389	#if ADAPTIVE_QP_SELECTION
[1029]	1390	TCoeff * pcArlCoeff,
[313]	1391	#endif
[1029]	1392	TCoeff & uiAbsSum,
	1393	const QpParam & cQP
	1394	)
[313]	1395	{
[1029]	1396	const TComRectangle &rect = rTu.getRect(compID);
	1397	const UInt uiWidth = rect.width;
	1398	const UInt uiHeight = rect.height;
	1399	TComDataCU* pcCU = rTu.getCU();
	1400	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	1401	const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
	1402
	1403	uiAbsSum=0;
	1404
	1405	RDPCMMode rdpcmMode = RDPCM_OFF;
	1406	rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
	1407
	1408	if (rdpcmMode == RDPCM_OFF)
[313]	1409	{
[1029]	1410	uiAbsSum = 0;
	1411	//transform and quantise
	1412	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
[313]	1413	{
[1029]	1414	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
	1415	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
	1416
	1417	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
[313]	1418	{
[1029]	1419	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
	1420	{
	1421	const Pel currentSample = pcResidual[(y * uiStride) + x];
	1422
	1423	rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
	1424	uiAbsSum += TCoeff(abs(currentSample));
	1425	}
[313]	1426	}
	1427	}
[1029]	1428	else
	1429	{
	1430	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
	1431	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
	1432	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
	1433	#endif
	1434
	1435	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
	1436
	1437	if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
	1438	{
	1439	xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
	1440	}
	1441	else
	1442	{
	1443	xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	1444	}
	1445
	1446	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
	1447	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
	1448	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
	1449	#endif
	1450
	1451	xQuant( rTu, m_plTempCoeff, rpcCoeff,
	1452
	1453	#if ADAPTIVE_QP_SELECTION
	1454	pcArlCoeff,
	1455	#endif
	1456	uiAbsSum, compID, cQP );
	1457
	1458	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
	1459	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
	1460	printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
	1461	#endif
	1462	}
[313]	1463	}
[1029]	1464
	1465	//set the CBF
	1466	pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
	1467	}
	1468
	1469
	1470	Void TComTrQuant::invTransformNxN( TComTU &rTu,
	1471	const ComponentID compID,
	1472	Pel *pcResidual,
	1473	const UInt uiStride,
	1474	TCoeff * pcCoeff,
	1475	const QpParam &cQP
	1476	DEBUG_STRING_FN_DECLAREP(psDebug))
	1477	{
	1478	TComDataCU* pcCU=rTu.getCU();
	1479	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	1480	const TComRectangle &rect = rTu.getRect(compID);
	1481	const UInt uiWidth = rect.width;
	1482	const UInt uiHeight = rect.height;
	1483
	1484	if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
[313]	1485	{
[1029]	1486	//------------------------------------------------
	1487
	1488	//recurse deeper
	1489
	1490	TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
	1491
	1492	do
	1493	{
	1494	//------------------
	1495
	1496	const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
	1497
	1498	Pel subTUResidual = pcResidual + (lineOffset uiStride);
	1499	TCoeff subTUCoefficients = pcCoeff + (lineOffset subTURecurse.getRect(compID).width);
	1500
	1501	invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
	1502
	1503	//------------------
	1504
[1246]	1505	} while (subTURecurse.nextSection(rTu));
[1029]	1506
	1507	//------------------------------------------------
	1508
	1509	return;
[313]	1510	}
[1029]	1511
	1512	#if defined DEBUG_STRING
	1513	if (psDebug)
[313]	1514	{
[1029]	1515	std::stringstream ss(stringstream::out);
	1516	printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
	1517	DEBUG_STRING_APPEND((*psDebug), ss.str())
[313]	1518	}
[1029]	1519	#endif
	1520
	1521	if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
[313]	1522	{
[1029]	1523	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
	1524	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
	1525
	1526	for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
	1527	{
	1528	for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
	1529	{
	1530	pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
	1531	}
	1532	}
[313]	1533	}
	1534	else
	1535	{
[1029]	1536	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
	1537	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
	1538	printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
	1539	#endif
	1540
	1541	xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
	1542
	1543	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
	1544	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
	1545	printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
	1546	#endif
	1547
	1548	#if defined DEBUG_STRING
	1549	if (psDebug)
	1550	{
	1551	std::stringstream ss(stringstream::out);
	1552	printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
	1553	(*psDebug)+=ss.str();
	1554	}
	1555	#endif
	1556
	1557	if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
	1558	{
	1559	xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
	1560
	1561	#if defined DEBUG_STRING
	1562	if (psDebug)
	1563	{
	1564	std::stringstream ss(stringstream::out);
	1565	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
	1566	(*psDebug)+=ss.str();
	1567	(*psDebug)+="(<- was a Transform-skipped block)\n";
	1568	}
	1569	#endif
	1570	}
	1571	else
	1572	{
	1573	xIT( compID, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight );
	1574
	1575	#if defined DEBUG_STRING
	1576	if (psDebug)
	1577	{
	1578	std::stringstream ss(stringstream::out);
	1579	printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
	1580	(*psDebug)+=ss.str();
	1581	(*psDebug)+="(<- was a Transformed block)\n";
	1582	}
	1583	#endif
	1584	}
	1585
	1586	#ifdef DEBUG_TRANSFORM_AND_QUANTISE
	1587	std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
	1588	printBlock(pcResidual, uiWidth, uiHeight, uiStride);
	1589	g_debugCounter++;
	1590	#endif
[313]	1591	}
[1029]	1592
	1593	invRdpcmNxN( rTu, compID, pcResidual, uiStride );
[313]	1594	}
	1595
[1029]	1596	Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
	1597	TComYuv *pResidual,
	1598	TComTU &rTu)
[313]	1599	{
[1246]	1600	if (!rTu.ProcessComponentSection(compID))
	1601	{
	1602	return;
	1603	}
[1029]	1604
	1605	TComDataCU* pcCU = rTu.getCU();
	1606	UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
	1607	UInt uiTrMode=rTu.GetTransformDepthRel();
	1608	if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) \|\| !pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) )
[313]	1609	{
[1029]	1610	return;
	1611	}
	1612
	1613	if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
	1614	{
	1615	const TComRectangle &tuRect = rTu.getRect(compID);
	1616	const Int uiStride = pResidual->getStride( compID );
	1617	Pel *rpcResidual = pResidual->getAddr( compID );
	1618	UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0);
	1619	Pel *pResi = rpcResidual + uiAddr;
	1620	TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
	1621
	1622	const QpParam cQP(*pcCU, compID);
	1623
	1624	if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
[313]	1625	{
[1029]	1626	DEBUG_STRING_NEW(sTemp)
	1627	#ifdef DEBUG_STRING
	1628	std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
	1629	#endif
	1630
	1631	invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
	1632
	1633	#ifdef DEBUG_STRING
	1634	if (psDebug != 0)
[1246]	1635	{
[1029]	1636	std::cout << (*psDebug);
[1246]	1637	}
[1029]	1638	#endif
	1639	}
	1640
	1641	if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
	1642	{
	1643	const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
	1644	const Int strideLuma = pResidual->getStride( COMPONENT_Y );
	1645	const Int tuWidth = rTu.getRect( compID ).width;
	1646	const Int tuHeight = rTu.getRect( compID ).height;
	1647
	1648	if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
[313]	1649	{
[1029]	1650	pResi = rpcResidual + uiAddr;
	1651	const Pel *pResiLuma = piResiLuma + uiAddr;
	1652
	1653	crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
[313]	1654	}
[1029]	1655	}
[313]	1656	}
[1029]	1657	else
[313]	1658	{
[1029]	1659	TComTURecurse tuRecurseChild(rTu, false);
	1660	do
	1661	{
	1662	invRecurTransformNxN( compID, pResidual, tuRecurseChild );
[1246]	1663	} while (tuRecurseChild.nextSection(rTu));
[313]	1664	}
[1029]	1665	}
	1666
	1667	Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
	1668	{
	1669	TComDataCU *pcCU=rTu.getCU();
	1670	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
	1671
	1672	const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx );
	1673	const UInt uiWidth = rTu.getRect(compID).width;
	1674	const UInt uiHeight = rTu.getRect(compID).height;
	1675	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
	1676	const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
	1677
	1678	UInt uiX = 0;
	1679	UInt uiY = 0;
	1680
[1240]	1681	UInt &majorAxis = (mode == RDPCM_VER) ? uiX : uiY;
	1682	UInt &minorAxis = (mode == RDPCM_VER) ? uiY : uiX;
	1683	const UInt majorAxisLimit = (mode == RDPCM_VER) ? uiWidth : uiHeight;
	1684	const UInt minorAxisLimit = (mode == RDPCM_VER) ? uiHeight : uiWidth;
[1029]	1685
[1240]	1686	const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
[1029]	1687
	1688	uiAbsSum = 0;
	1689
	1690	for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
[313]	1691	{
[1240]	1692	TCoeff accumulatorValue = 0; // 32-bit accumulator
[1029]	1693	for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
	1694	{
	1695	const UInt sampleIndex = (uiY * uiWidth) + uiX;
	1696	const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
	1697	const Pel currentSample = pcResidual[(uiY * uiStride) + uiX];
[1240]	1698	const TCoeff encoderSideDelta = TCoeff(currentSample) - accumulatorValue;
[1029]	1699
	1700	Pel reconstructedDelta;
	1701	if ( bLossless )
	1702	{
	1703	pcCoeff[coefficientIndex] = encoderSideDelta;
	1704	reconstructedDelta = encoderSideDelta;
	1705	}
	1706	else
	1707	{
	1708	transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
	1709	invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
	1710	}
	1711
	1712	uiAbsSum += abs(pcCoeff[coefficientIndex]);
	1713
[1253]	1714	if (mode != RDPCM_OFF)
[1240]	1715	{
	1716	accumulatorValue += reconstructedDelta;
	1717	}
[1029]	1718	}
[313]	1719	}
	1720	}
	1721
[1029]	1722	Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
[313]	1723	{
[1029]	1724	TComDataCU *pcCU=rTu.getCU();
	1725	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
	1726
	1727	if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) \|\| ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
[313]	1728	{
[1029]	1729	rdpcmMode = RDPCM_OFF;
	1730	}
	1731	else if ( pcCU->isIntra( uiAbsPartIdx ) )
[313]	1732	{
[1029]	1733	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
	1734	const ChannelType chType = toChannelType(compID);
	1735	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
	1736	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
	1737	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
	1738
	1739	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
[313]	1740	{
[1029]	1741	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
	1742	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
	1743	}
[1246]	1744	else
	1745	{
	1746	rdpcmMode = RDPCM_OFF;
	1747	}
[1029]	1748	}
	1749	else // not intra, need to select the best mode
	1750	{
	1751	const UInt uiWidth = rTu.getRect(compID).width;
	1752	const UInt uiHeight = rTu.getRect(compID).height;
	1753
	1754	RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES;
	1755	TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max();
	1756	TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
	1757
	1758	for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
	1759	{
	1760	const RDPCMMode mode = RDPCMMode(modeIndex);
	1761
	1762	TCoeff currAbsSum = 0;
	1763
	1764	applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
	1765
	1766	if (currAbsSum < bestAbsSum)
[313]	1767	{
[1029]	1768	bestMode = mode;
	1769	bestAbsSum = currAbsSum;
	1770	if (mode != RDPCM_OFF)
	1771	{
	1772	memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
	1773	}
[313]	1774	}
	1775	}
[1029]	1776
	1777	rdpcmMode = bestMode;
	1778	uiAbsSum = bestAbsSum;
	1779
	1780	if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
	1781	{
	1782	memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
	1783	}
[313]	1784	}
[1029]	1785
	1786	pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
	1787	}
	1788
	1789	Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
	1790	{
	1791	TComDataCU *pcCU=rTu.getCU();
	1792	const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
	1793
	1794	if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) \|\| pcCU->getCUTransquantBypass(uiAbsPartIdx)))
[313]	1795	{
[1029]	1796	const UInt uiWidth = rTu.getRect(compID).width;
	1797	const UInt uiHeight = rTu.getRect(compID).height;
	1798
	1799	RDPCMMode rdpcmMode = RDPCM_OFF;
	1800
	1801	if ( pcCU->isIntra( uiAbsPartIdx ) )
[313]	1802	{
[1029]	1803	const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
	1804	const ChannelType chType = toChannelType(compID);
	1805	const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
	1806	const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
	1807	const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
	1808
	1809	if (uiChFinalMode == VER_IDX \|\| uiChFinalMode == HOR_IDX)
	1810	{
	1811	rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
	1812	}
[313]	1813	}
[1029]	1814	else // not intra case
	1815	{
	1816	rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
	1817	}
	1818
[1240]	1819	static const TCoeff pelMin=(TCoeff) std::numeric_limits<Pel>::min();
	1820	static const TCoeff pelMax=(TCoeff) std::numeric_limits<Pel>::max();
[1029]	1821	if (rdpcmMode == RDPCM_VER)
	1822	{
[1240]	1823	for( UInt uiX = 0; uiX < uiWidth; uiX++ )
[1029]	1824	{
[1240]	1825	Pel *pcCurResidual = pcResidual+uiX;
	1826	TCoeff accumulator = *pcCurResidual; // 32-bit accumulator
	1827	pcCurResidual+=uiStride;
	1828	for( UInt uiY = 1; uiY < uiHeight; uiY++, pcCurResidual+=uiStride )
[1029]	1829	{
[1240]	1830	accumulator += *(pcCurResidual);
	1831	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
[1029]	1832	}
	1833	}
	1834	}
	1835	else if (rdpcmMode == RDPCM_HOR)
	1836	{
	1837	for( UInt uiY = 0; uiY < uiHeight; uiY++ )
	1838	{
[1240]	1839	Pel pcCurResidual = pcResidual+uiYuiStride;
	1840	TCoeff accumulator = *pcCurResidual;
	1841	pcCurResidual++;
	1842	for( UInt uiX = 1; uiX < uiWidth; uiX++, pcCurResidual++ )
[1029]	1843	{
[1240]	1844	accumulator += *(pcCurResidual);
	1845	*pcCurResidual = (Pel)Clip3<TCoeff>(pelMin, pelMax, accumulator);
[1029]	1846	}
	1847	}
	1848	}
[313]	1849	}
	1850	}
	1851
	1852	// ------------------------------------------------------------------------------------------------
	1853	// Logical transform
	1854	// ------------------------------------------------------------------------------------------------
	1855
[1029]	1856	/** Wrapper function between HM interface and core NxN forward transform (2D)
[1260]	1857	* \param compID colour component ID
	1858	* \param useDST
[313]	1859	* \param piBlkResi input data (residual)
[1260]	1860	* \param uiStride stride of input residual data
[313]	1861	* \param psCoeff output data (transform coefficients)
[1260]	1862	* \param iWidth transform width
	1863	* \param iHeight transform height
[313]	1864	*/
[1029]	1865	Void TComTrQuant::xT( const ComponentID compID, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight )
[313]	1866	{
[1029]	1867	#if MATRIX_MULT
	1868	if( iWidth == iHeight)
	1869	{
	1870	xTr(g_bitDepth[toChannelType(compID)], piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
	1871	return;
[313]	1872	}
[1029]	1873	#endif
	1874
	1875	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
	1876	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
	1877
	1878	for (Int y = 0; y < iHeight; y++)
[1246]	1879	{
[1029]	1880	for (Int x = 0; x < iWidth; x++)
	1881	{
	1882	block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
	1883	}
[1246]	1884	}
[1029]	1885
	1886	xTrMxN( g_bitDepth[toChannelType(compID)], block, coeff, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
	1887
	1888	memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
[313]	1889	}
	1890
[1029]	1891	/** Wrapper function between HM interface and core NxN inverse transform (2D)
[1260]	1892	* \param compID colour component ID
	1893	* \param useDST
[313]	1894	* \param plCoef input data (transform coefficients)
	1895	* \param pResidual output data (residual)
	1896	* \param uiStride stride of input residual data
[1260]	1897	* \param iWidth transform width
	1898	* \param iHeight transform height
[313]	1899	*/
[1029]	1900	Void TComTrQuant::xIT( const ComponentID compID, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
[313]	1901	{
[1029]	1902	#if MATRIX_MULT
	1903	if( iWidth == iHeight )
	1904	{
	1905	#if O0043_BEST_EFFORT_DECODING
	1906	xITr(g_bitDepthInStream[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
[313]	1907	#else
[1029]	1908	xITr(g_bitDepth[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
	1909	#endif
	1910	return;
	1911	}
	1912	#endif
	1913
	1914	TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
	1915	TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
	1916
	1917	memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
	1918
	1919	#if O0043_BEST_EFFORT_DECODING
	1920	xITrMxN( g_bitDepthInStream[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
	1921	#else
	1922	xITrMxN( g_bitDepth[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
	1923	#endif
	1924
	1925	for (Int y = 0; y < iHeight; y++)
[1246]	1926	{
[1029]	1927	for (Int x = 0; x < iWidth; x++)
[313]	1928	{
[1029]	1929	pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
[313]	1930	}
[1246]	1931	}
[313]	1932	}
[1029]	1933
[313]	1934	/** Wrapper function between HM interface and core 4x4 transform skipping
	1935	* \param piBlkResi input data (residual)
[1260]	1936	* \param uiStride stride of input residual data
[313]	1937	* \param psCoeff output data (transform coefficients)
[1260]	1938	* \param rTu reference to transform data
	1939	* \param component colour component
[313]	1940	*/
[1029]	1941	Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
[313]	1942	{
[1029]	1943	const TComRectangle &rect = rTu.getRect(component);
	1944	const Int width = rect.width;
	1945	const Int height = rect.height;
	1946
	1947	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
	1948	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
[313]	1949	{
[1029]	1950	iTransformShift = std::max<Int>(0, iTransformShift);
	1951	}
	1952
	1953	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
	1954	const UInt uiSizeMinus1 = (width * height) - 1;
	1955
	1956	if (iTransformShift >= 0)
	1957	{
	1958	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
	1959	{
	1960	for (UInt x = 0; x < width; x++, coefficientIndex++)
[313]	1961	{
[1029]	1962	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
[313]	1963	}
	1964	}
	1965	}
[1029]	1966	else //for very high bit depths
[313]	1967	{
[1029]	1968	iTransformShift = -iTransformShift;
	1969	const TCoeff offset = 1 << (iTransformShift - 1);
	1970
	1971	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
	1972	{
	1973	for (UInt x = 0; x < width; x++, coefficientIndex++)
[313]	1974	{
[1029]	1975	psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
[313]	1976	}
	1977	}
	1978	}
	1979	}
	1980
[1029]	1981	/** Wrapper function between HM interface and core NxN transform skipping
[313]	1982	* \param plCoef input data (coefficients)
	1983	* \param pResidual output data (residual)
	1984	* \param uiStride stride of input residual data
[1260]	1985	* \param rTu reference to transform data
	1986	* \param component colour component ID
[313]	1987	*/
[1029]	1988	Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
[313]	1989	{
[1029]	1990	const TComRectangle &rect = rTu.getRect(component);
	1991	const Int width = rect.width;
	1992	const Int height = rect.height;
	1993
	1994	Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
	1995	if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
[313]	1996	{
[1029]	1997	iTransformShift = std::max<Int>(0, iTransformShift);
	1998	}
	1999
	2000	const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
	2001	const UInt uiSizeMinus1 = (width * height) - 1;
	2002
	2003	if (iTransformShift >= 0)
	2004	{
	2005	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
	2006
	2007	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
	2008	{
	2009	for (UInt x = 0; x < width; x++, coefficientIndex++)
[313]	2010	{
[1029]	2011	pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
	2012	}
[313]	2013	}
	2014	}
[1029]	2015	else //for very high bit depths
[313]	2016	{
[1029]	2017	iTransformShift = -iTransformShift;
	2018
	2019	for (UInt y = 0, coefficientIndex = 0; y < height; y++)
	2020	{
	2021	for (UInt x = 0; x < width; x++, coefficientIndex++)
[313]	2022	{
[1029]	2023	pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
[313]	2024	}
	2025	}
	2026	}
	2027	}
	2028
	2029	/** RDOQ with CABAC
[1260]	2030	* \param rTu reference to transform data
[313]	2031	* \param plSrcCoeff pointer to input buffer
	2032	* \param piDstCoeff reference to pointer to output buffer
[1260]	2033	* \param piArlDstCoeff
[313]	2034	* \param uiAbsSum reference to absolute sum of quantized transform coefficient
[1260]	2035	* \param compID colour component ID
	2036	* \param cQP reference to quantization parameters
	2037
[313]	2038	* Rate distortion optimized quantization for entropy
	2039	* coding engines using probability models like CABAC
	2040	*/
[1029]	2041	Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
	2042	TCoeff * plSrcCoeff,
	2043	TCoeff * piDstCoeff,
[313]	2044	#if ADAPTIVE_QP_SELECTION
[1029]	2045	TCoeff * piArlDstCoeff,
[313]	2046	#endif
[1029]	2047	TCoeff &uiAbsSum,
	2048	const ComponentID compID,
	2049	const QpParam &cQP )
[313]	2050	{
[1029]	2051	const TComRectangle & rect = rTu.getRect(compID);
	2052	const UInt uiWidth = rect.width;
	2053	const UInt uiHeight = rect.height;
	2054	TComDataCU * pcCU = rTu.getCU();
	2055	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	2056	const ChannelType channelType = toChannelType(compID);
	2057	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
	2058
	2059	const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
	2060
	2061	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
	2062	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
	2063	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
	2064	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
	2065	*/
	2066
	2067	// Represents scaling through forward transform
	2068	Int iTransformShift = getTransformShift(channelType, uiLog2TrSize);
	2069	if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
	2070	{
	2071	iTransformShift = std::max<Int>(0, iTransformShift);
	2072	}
	2073
	2074	const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getUseGolombRiceParameterAdaptation();
	2075	const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
	2076	UInt uiGoRiceParam = initialGolombRiceParameter;
	2077	Double d64BlockUncodedCost = 0;
	2078	const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
	2079	const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
	2080	const UInt uiMaxNumCoeff = uiWidth * uiHeight;
	2081	assert(compID<MAX_NUM_COMPONENT);
	2082
	2083	Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
[595]	2084	assert(scalingListType < SCALING_LIST_NUM);
[1029]	2085
[313]	2086	#if ADAPTIVE_QP_SELECTION
[1029]	2087	memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
	2088	#endif
	2089
	2090	Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
	2091	Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
	2092	Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
	2093	memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
	2094	memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
	2095	Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
	2096	Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
	2097	Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
	2098	TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
	2099	memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
	2100	memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
	2101	memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
	2102	memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
	2103
	2104	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
	2105	const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
	2106	const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
	2107
	2108	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
	2109	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
	2110	const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
	2111
	2112	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
	2113	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
	2114
	2115	#if ADAPTIVE_QP_SELECTION
[313]	2116	Int iQBitsC = iQBits - ARL_C_PRECISION;
	2117	Int iAddC = 1 << (iQBitsC-1);
	2118	#endif
[1029]	2119
	2120	TUEntropyCodingParameters codingParameters;
	2121	getTUEntropyCodingParameters(codingParameters, rTu, compID);
	2122	const UInt uiCGSize = (1 << MLS_CG_SIZE);
	2123
[313]	2124	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
	2125	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
	2126	Int iCGLastScanPos = -1;
[1029]	2127
[313]	2128	UInt uiCtxSet = 0;
	2129	Int c1 = 1;
	2130	Int c2 = 0;
	2131	Double d64BaseCost = 0;
	2132	Int iLastScanPos = -1;
[1029]	2133
[313]	2134	UInt c1Idx = 0;
	2135	UInt c2Idx = 0;
	2136	Int baseLevel;
[1029]	2137
	2138	memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
	2139	memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
	2140
[313]	2141	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
	2142	Int iScanPos;
[1029]	2143	coeffGroupRDStats rdStats;
	2144
	2145	const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
	2146
[313]	2147	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
	2148	{
[1029]	2149	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
	2150	UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
	2151	UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
	2152
	2153	memset( &rdStats, 0, sizeof (coeffGroupRDStats));
	2154
	2155	const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
	2156
[313]	2157	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	2158	{
	2159	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
	2160	//===== quantization =====
[1029]	2161	UInt uiBlkPos = codingParameters.scan[iScanPos];
[313]	2162	// set coeff
[1029]	2163
	2164	const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
	2165	const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
	2166
	2167	const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
	2168
	2169	const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, MAX_INTERMEDIATE_INT - (Intermediate_Int(1) << (iQBits - 1)));
	2170
[313]	2171	#if ADAPTIVE_QP_SELECTION
	2172	if( m_bUseAdaptQpSelect )
	2173	{
[1029]	2174	piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
[313]	2175	}
	2176	#endif
[1029]	2177	const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
	2178
	2179	const Double dErr = Double( lLevelDouble );
	2180	pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
[313]	2181	d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
	2182	piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
[1029]	2183
[313]	2184	if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
	2185	{
	2186	iLastScanPos = iScanPos;
[1029]	2187	uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
[313]	2188	iCGLastScanPos = iCGScanPos;
	2189	}
[1029]	2190
[313]	2191	if ( iLastScanPos >= 0 )
	2192	{
	2193	//===== coefficient level estimation =====
	2194	UInt uiLevel;
[1029]	2195	UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
	2196	UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
	2197
[313]	2198	if( iScanPos == iLastScanPos )
	2199	{
[1029]	2200	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
	2201	lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
	2202	c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, channelType
	2203	);
[313]	2204	}
	2205	else
	2206	{
[1029]	2207	UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
	2208
[313]	2209	uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
[1029]	2210	lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
	2211	c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, channelType
	2212	);
	2213
[313]	2214	sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
	2215	}
[1029]	2216
	2217	deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
	2218
[313]	2219	if( uiLevel > 0 )
	2220	{
[1029]	2221	Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType );
	2222	rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
	2223	rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
[313]	2224	}
	2225	else // uiLevel == 0
	2226	{
	2227	rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
	2228	}
	2229	piDstCoeff[ uiBlkPos ] = uiLevel;
	2230	d64BaseCost += pdCostCoeff [ iScanPos ];
[1029]	2231
[313]	2232	baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
	2233	if( uiLevel >= baseLevel )
	2234	{
[1029]	2235	if (uiLevel > 3*(1<<uiGoRiceParam))
[313]	2236	{
[1029]	2237	uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
[313]	2238	}
	2239	}
	2240	if ( uiLevel >= 1)
	2241	{
	2242	c1Idx ++;
	2243	}
[1029]	2244
[313]	2245	//===== update bin model =====
	2246	if( uiLevel > 1 )
	2247	{
[1029]	2248	c1 = 0;
[313]	2249	c2 += (c2 < 2);
	2250	c2Idx ++;
	2251	}
	2252	else if( (c1 < 3) && (c1 > 0) && uiLevel)
	2253	{
	2254	c1++;
	2255	}
[1029]	2256
[313]	2257	//===== context set update =====
[1029]	2258	if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
[313]	2259	{
[1029]	2260	uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this before entering the final group
	2261	c1 = 1;
[313]	2262	c2 = 0;
[1029]	2263	c1Idx = 0;
	2264	c2Idx = 0;
	2265	uiGoRiceParam = initialGolombRiceParameter;
[313]	2266	}
	2267	}
	2268	else
	2269	{
	2270	d64BaseCost += pdCostCoeff0[ iScanPos ];
	2271	}
	2272	rdStats.d64SigCost += pdCostSig[ iScanPos ];
	2273	if (iScanPosinCG == 0 )
	2274	{
	2275	rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
	2276	}
	2277	if (piDstCoeff[ uiBlkPos ] )
	2278	{
	2279	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
	2280	rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
	2281	rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
	2282	if ( iScanPosinCG != 0 )
	2283	{
	2284	rdStats.iNNZbeforePos0++;
	2285	}
	2286	}
	2287	} //end for (iScanPosinCG)
[1029]	2288
	2289	if (iCGLastScanPos >= 0)
[313]	2290	{
	2291	if( iCGScanPos )
	2292	{
	2293	if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
	2294	{
[1029]	2295	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
	2296	d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
	2297	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
	2298	}
[313]	2299	else
	2300	{
	2301	if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
	2302	{
[1029]	2303	if ( rdStats.iNNZbeforePos0 == 0 )
[313]	2304	{
	2305	d64BaseCost -= rdStats.d64SigCost_0;
	2306	rdStats.d64SigCost -= rdStats.d64SigCost_0;
	2307	}
	2308	// rd-cost if SigCoeffGroupFlag = 0, initialization
	2309	Double d64CostZeroCG = d64BaseCost;
[1029]	2310
[313]	2311	// add SigCoeffGroupFlag cost to total cost
[1029]	2312	UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
	2313
[313]	2314	if (iCGScanPos < iCGLastScanPos)
	2315	{
[1029]	2316	d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
	2317	d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
	2318	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
[313]	2319	}
[1029]	2320
[313]	2321	// try to convert the current coeff group from non-zero to all-zero
	2322	d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
	2323	d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
	2324	d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
[1029]	2325
[313]	2326	// if we can save cost, change this block to all-zero block
[1029]	2327	if ( d64CostZeroCG < d64BaseCost )
[313]	2328	{
	2329	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
	2330	d64BaseCost = d64CostZeroCG;
	2331	if (iCGScanPos < iCGLastScanPos)
	2332	{
[1029]	2333	pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
[313]	2334	}
[1029]	2335	// reset coeffs to 0 in this block
[313]	2336	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	2337	{
	2338	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
[1029]	2339	UInt uiBlkPos = codingParameters.scan[ iScanPos ];
	2340
[313]	2341	if (piDstCoeff[ uiBlkPos ])
	2342	{
	2343	piDstCoeff [ uiBlkPos ] = 0;
	2344	pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
	2345	pdCostSig [ iScanPos ] = 0;
	2346	}
	2347	}
[1029]	2348	} // end if ( d64CostAllZeros < d64BaseCost )
[313]	2349	}
	2350	} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
	2351	}
	2352	else
	2353	{
	2354	uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
	2355	}
	2356	}
	2357	} //end for (iCGScanPos)
[1029]	2358
[313]	2359	//===== estimate last position =====
	2360	if ( iLastScanPos < 0 )
	2361	{
	2362	return;
	2363	}
[1029]	2364
[313]	2365	Double d64BestCost = 0;
	2366	Int ui16CtxCbf = 0;
	2367	Int iBestLastIdxP1 = 0;
[1029]	2368	if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
[313]	2369	{
	2370	ui16CtxCbf = 0;
	2371	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
	2372	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
	2373	}
	2374	else
	2375	{
[1029]	2376	ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
	2377	ui16CtxCbf += getCBFContextOffset(compID);
[313]	2378	d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
	2379	d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
	2380	}
[1029]	2381
	2382
[313]	2383	Bool bFoundLast = false;
	2384	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
	2385	{
[1029]	2386	UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
	2387
	2388	d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
[313]	2389	if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
[1029]	2390	{
[313]	2391	for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
	2392	{
	2393	iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
[1029]	2394
[1246]	2395	if (iScanPos > iLastScanPos)
	2396	{
	2397	continue;
	2398	}
[1029]	2399	UInt uiBlkPos = codingParameters.scan[iScanPos];
	2400
[313]	2401	if( piDstCoeff[ uiBlkPos ] )
	2402	{
[1029]	2403	UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
	2404	UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
	2405
	2406	Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
[313]	2407	Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
[1029]	2408
[313]	2409	if( totalCost < d64BestCost )
	2410	{
	2411	iBestLastIdxP1 = iScanPos + 1;
	2412	d64BestCost = totalCost;
	2413	}
	2414	if( piDstCoeff[ uiBlkPos ] > 1 )
	2415	{
	2416	bFoundLast = true;
	2417	break;
	2418	}
	2419	d64BaseCost -= pdCostCoeff[ iScanPos ];
	2420	d64BaseCost += pdCostCoeff0[ iScanPos ];
	2421	}
	2422	else
	2423	{
	2424	d64BaseCost -= pdCostSig[ iScanPos ];
	2425	}
[1029]	2426	} //end for
[313]	2427	if (bFoundLast)
	2428	{
	2429	break;
	2430	}
	2431	} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
[1029]	2432	} // end for
	2433
	2434
[313]	2435	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
	2436	{
[1029]	2437	Int blkPos = codingParameters.scan[ scanPos ];
	2438	TCoeff level = piDstCoeff[ blkPos ];
[313]	2439	uiAbsSum += level;
	2440	piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
	2441	}
[1029]	2442
[313]	2443	//===== clean uncoded coefficients =====
	2444	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
	2445	{
[1029]	2446	piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
[313]	2447	}
[1029]	2448
	2449
[313]	2450	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
	2451	{
[1029]	2452	const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
	2453	Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
	2454	/ m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(g_bitDepth[channelType] - 8)))
	2455	+ 0.5);
	2456
[313]	2457	Int lastCG = -1;
	2458	Int absSum = 0 ;
	2459	Int n ;
[1029]	2460
	2461	for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
[313]	2462	{
[1029]	2463	Int subPos = subSet << MLS_CG_SIZE;
	2464	Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
[313]	2465	absSum = 0 ;
[1029]	2466
	2467	for(n = uiCGSize-1; n >= 0; --n )
[313]	2468	{
[1029]	2469	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
[313]	2470	{
	2471	lastNZPosInCG = n;
	2472	break;
	2473	}
	2474	}
[1029]	2475
	2476	for(n = 0; n <uiCGSize; n++ )
[313]	2477	{
[1029]	2478	if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
[313]	2479	{
	2480	firstNZPosInCG = n;
	2481	break;
	2482	}
	2483	}
[1029]	2484
[313]	2485	for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
	2486	{
[1029]	2487	absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
[313]	2488	}
[1029]	2489
[313]	2490	if(lastNZPosInCG>=0 && lastCG==-1)
	2491	{
[1029]	2492	lastCG = 1;
	2493	}
	2494
[313]	2495	if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
	2496	{
[1029]	2497	UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
[313]	2498	if( signbit!=(absSum&0x1) ) // hide but need tune
	2499	{
[1029]	2500	// calculate the cost
	2501	Int64 minCostInc = MAX_INT64, curCost = MAX_INT64;
	2502	Int minPos = -1, finalChange = 0, curChange = 0;
	2503
	2504	for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
[313]	2505	{
[1029]	2506	UInt uiBlkPos = codingParameters.scan[ n + subPos ];
[313]	2507	if(piDstCoeff[ uiBlkPos ] != 0 )
	2508	{
[1029]	2509	Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
	2510	Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
	2511	- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
	2512
[313]	2513	if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
	2514	{
[1029]	2515	costDown -= (4<<15);
[313]	2516	}
[1029]	2517
[313]	2518	if(costUp<costDown)
[1029]	2519	{
[313]	2520	curCost = costUp;
[1029]	2521	curChange = 1;
[313]	2522	}
[1029]	2523	else
[313]	2524	{
[1029]	2525	curChange = -1;
[313]	2526	if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
	2527	{
[1029]	2528	curCost = MAX_INT64;
[313]	2529	}
	2530	else
	2531	{
[1029]	2532	curCost = costDown;
[313]	2533	}
	2534	}
	2535	}
	2536	else
	2537	{
[1029]	2538	curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
[313]	2539	curChange = 1 ;
[1029]	2540
[313]	2541	if(n<firstNZPosInCG)
	2542	{
	2543	UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
	2544	if(thissignbit != signbit )
	2545	{
	2546	curCost = MAX_INT64;
	2547	}
	2548	}
	2549	}
[1029]	2550
[313]	2551	if( curCost<minCostInc)
	2552	{
[1029]	2553	minCostInc = curCost;
	2554	finalChange = curChange;
	2555	minPos = uiBlkPos;
[313]	2556	}
	2557	}
[1029]	2558
	2559	if(piDstCoeff[minPos] == entropyCodingMaximum \|\| piDstCoeff[minPos] == entropyCodingMinimum)
[313]	2560	{
	2561	finalChange = -1;
	2562	}
[1029]	2563
[313]	2564	if(plSrcCoeff[minPos]>=0)
	2565	{
	2566	piDstCoeff[minPos] += finalChange ;
	2567	}
	2568	else
	2569	{
[1029]	2570	piDstCoeff[minPos] -= finalChange ;
	2571	}
[313]	2572	}
	2573	}
[1029]	2574
[313]	2575	if(lastCG==1)
	2576	{
[1029]	2577	lastCG=0 ;
[313]	2578	}
	2579	}
	2580	}
	2581	}
	2582
[1029]	2583
[313]	2584	/** Pattern decision for context derivation process of significant_coeff_flag
	2585	* \param sigCoeffGroupFlag pointer to prior coded significant coeff group
[1029]	2586	* \param uiCGPosX column of current coefficient group
	2587	* \param uiCGPosY row of current coefficient group
[1260]	2588	* \param widthInGroups width of the block
	2589	* \param heightInGroups height of the block
[313]	2590	* \returns pattern for current coefficient group
	2591	*/
[1029]	2592	Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
[313]	2593	{
[1246]	2594	if ((widthInGroups <= 1) && (heightInGroups <= 1))
	2595	{
	2596	return 0;
	2597	}
[313]	2598
[1029]	2599	const Bool rightAvailable = uiCGPosX < (widthInGroups - 1);
	2600	const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
	2601
[313]	2602	UInt sigRight = 0;
	2603	UInt sigLower = 0;
	2604
[1246]	2605	if (rightAvailable)
	2606	{
	2607	sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
	2608	}
	2609	if (belowAvailable)
	2610	{
	2611	sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
	2612	}
[1029]	2613
	2614	return sigRight + (sigLower << 1);
[313]	2615	}
	2616
[1029]	2617
[313]	2618	/** Context derivation process of coeff_abs_significant_flag
	2619	* \param patternSigCtx pattern for current coefficient group
[1260]	2620	* \param codingParameters coding parameters for the TU (includes the scan)
[1029]	2621	* \param scanPosition current position in scan order
	2622	* \param log2BlockWidth log2 width of the block
	2623	* \param log2BlockHeight log2 height of the block
[1260]	2624	* \param chanType channel type (CHANNEL_TYPE_LUMA/CHROMA)
[313]	2625	* \returns ctxInc for current scan position
	2626	*/
[1029]	2627	Int TComTrQuant::getSigCtxInc ( Int patternSigCtx,
	2628	const TUEntropyCodingParameters &codingParameters,
	2629	const Int scanPosition,
	2630	const Int log2BlockWidth,
	2631	const Int log2BlockHeight,
	2632	const ChannelType chanType)
[313]	2633	{
[1029]	2634	if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
[313]	2635	{
[1029]	2636	//single context mode
	2637	return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
[313]	2638	}
	2639
[1029]	2640	const UInt rasterPosition = codingParameters.scan[scanPosition];
	2641	const UInt posY = rasterPosition >> log2BlockWidth;
	2642	const UInt posX = rasterPosition - (posY << log2BlockWidth);
[313]	2643
[1246]	2644	if ((posX + posY) == 0)
	2645	{
	2646	return 0; //special case for the DC context variable
	2647	}
[313]	2648
[1029]	2649	Int offset = MAX_INT;
	2650
	2651	if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
[313]	2652	{
[1029]	2653	offset = ctxIndMap4x4[ (4 * posY) + posX ];
[313]	2654	}
	2655	else
	2656	{
[1029]	2657	Int cnt = 0;
	2658
	2659	switch (patternSigCtx)
	2660	{
	2661	//------------------
	2662
	2663	case 0: //neither neighbouring group is significant
	2664	{
	2665	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
	2666	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
	2667	const Int posTotalInSubset = posXinSubset + posYinSubset;
	2668
	2669	//first N coefficients in scan order use 2; the next few use 1; the rest use 0.
	2670	const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
	2671	const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
	2672
	2673	cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
	2674	}
	2675	break;
	2676
	2677	//------------------
	2678
	2679	case 1: //right group is significant, below is not
	2680	{
	2681	const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
	2682	const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT;
	2683
	2684	cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
	2685	}
	2686	break;
	2687
	2688	//------------------
	2689
	2690	case 2: //below group is significant, right is not
	2691	{
	2692	const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
	2693	const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH;
	2694
	2695	cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
	2696	}
	2697	break;
	2698
	2699	//------------------
	2700
	2701	case 3: //both neighbouring groups are significant
	2702	{
	2703	cnt = 2;
	2704	}
	2705	break;
	2706
	2707	//------------------
	2708
	2709	default:
	2710	std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
	2711	exit(1);
	2712	break;
	2713	}
	2714
	2715	//------------------------------------------------
	2716
	2717	const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
	2718
	2719	offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
[313]	2720	}
	2721
[1029]	2722	return codingParameters.firstSignificanceMapContext + offset;
[313]	2723	}
	2724
[1029]	2725
[313]	2726	/** Get the best level in RD sense
	2727	* \param rd64CodedCost reference to coded cost
	2728	* \param rd64CodedCost0 reference to cost when coefficient is 0
	2729	* \param rd64CodedCostSig reference to cost of significant coefficient
	2730	* \param lLevelDouble reference to unscaled quantized level
	2731	* \param uiMaxAbsLevel scaled quantized level
	2732	* \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
	2733	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2734	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2735	* \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
[1260]	2736	* \param c1Idx
	2737	* \param c2Idx
[313]	2738	* \param iQBits quantization step size
[1260]	2739	* \param errorScale
[313]	2740	* \param bLast indicates if the coefficient is the last significant
[1260]	2741	* \param useLimitedPrefixLength
	2742	* \param channelType texture channel type (luma/chroma)
[313]	2743	* \returns best quantized transform level for given scan position
	2744	* This method calculates the best quantized transform level for a given scan position.
	2745	*/
[1029]	2746	__inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
	2747	Double& rd64CodedCost0,
	2748	Double& rd64CodedCostSig,
	2749	Intermediate_Int lLevelDouble,
	2750	UInt uiMaxAbsLevel,
	2751	UShort ui16CtxNumSig,
	2752	UShort ui16CtxNumOne,
	2753	UShort ui16CtxNumAbs,
	2754	UShort ui16AbsGoRice,
	2755	UInt c1Idx,
	2756	UInt c2Idx,
	2757	Int iQBits,
	2758	Double errorScale,
	2759	Bool bLast,
	2760	Bool useLimitedPrefixLength,
	2761	ChannelType channelType
	2762	) const
[313]	2763	{
[1029]	2764	Double dCurrCostSig = 0;
[313]	2765	UInt uiBestAbsLevel = 0;
[1029]	2766
[313]	2767	if( !bLast && uiMaxAbsLevel < 3 )
	2768	{
[1029]	2769	rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
[313]	2770	rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
	2771	if( uiMaxAbsLevel == 0 )
	2772	{
	2773	return uiBestAbsLevel;
	2774	}
	2775	}
	2776	else
	2777	{
	2778	rd64CodedCost = MAX_DOUBLE;
	2779	}
	2780
	2781	if( !bLast )
	2782	{
	2783	dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
	2784	}
	2785
	2786	UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
	2787	for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
	2788	{
[1029]	2789	Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
	2790	Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, channelType ) );
[313]	2791	dCurrCost += dCurrCostSig;
	2792
	2793	if( dCurrCost < rd64CodedCost )
	2794	{
	2795	uiBestAbsLevel = uiAbsLevel;
	2796	rd64CodedCost = dCurrCost;
	2797	rd64CodedCostSig = dCurrCostSig;
	2798	}
	2799	}
	2800
	2801	return uiBestAbsLevel;
	2802	}
	2803
	2804	/** Calculates the cost for specific absolute transform level
	2805	* \param uiAbsLevel scaled quantized level
	2806	* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
	2807	* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
	2808	* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
[1260]	2809	* \param c1Idx
	2810	* \param c2Idx
	2811	* \param useLimitedPrefixLength
	2812	* \param channelType texture channel type (luma/chroma)
[313]	2813	* \returns cost of given absolute transform level
	2814	*/
[1029]	2815	__inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
[313]	2816	UShort ui16CtxNumOne,
	2817	UShort ui16CtxNumAbs,
[1029]	2818	UShort ui16AbsGoRice,
	2819	UInt c1Idx,
	2820	UInt c2Idx,
	2821	Bool useLimitedPrefixLength,
	2822	ChannelType channelType
[313]	2823	) const
	2824	{
[1029]	2825	Int iRate = Int(xGetIEPRate()); // cost of sign bit
	2826	UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
[313]	2827
	2828	if ( uiAbsLevel >= baseLevel )
[1029]	2829	{
[313]	2830	UInt symbol = uiAbsLevel - baseLevel;
	2831	UInt length;
	2832	if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
	2833	{
	2834	length = symbol>>ui16AbsGoRice;
	2835	iRate += (length+1+ui16AbsGoRice)<< 15;
	2836	}
[1029]	2837	else if (useLimitedPrefixLength)
	2838	{
	2839	const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + g_maxTrDynamicRange[channelType]));
	2840
	2841	UInt prefixLength = 0;
	2842	UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
	2843
	2844	while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
	2845	{
	2846	prefixLength++;
	2847	}
	2848
	2849	const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (g_maxTrDynamicRange[channelType] - ui16AbsGoRice) : (prefixLength + 1/separator/);
	2850
	2851	iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
	2852	}
[313]	2853	else
	2854	{
	2855	length = ui16AbsGoRice;
	2856	symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
	2857	while (symbol >= (1<<length))
	2858	{
[1029]	2859	symbol -= (1<<(length++));
[313]	2860	}
	2861	iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
	2862	}
[1029]	2863
[313]	2864	if (c1Idx < C1FLAG_NUMBER)
	2865	{
	2866	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2867
	2868	if (c2Idx < C2FLAG_NUMBER)
	2869	{
	2870	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
	2871	}
	2872	}
	2873	}
[1029]	2874	else if( uiAbsLevel == 1 )
[313]	2875	{
	2876	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
	2877	}
	2878	else if( uiAbsLevel == 2 )
	2879	{
	2880	iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
	2881	iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
	2882	}
	2883	else
	2884	{
[540]	2885	iRate = 0;
[313]	2886	}
[1029]	2887
	2888	return iRate;
[313]	2889	}
	2890
	2891	__inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
	2892	UShort ui16CtxNumSig ) const
	2893	{
	2894	return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
	2895	}
	2896
	2897	/** Calculates the cost of signaling the last significant coefficient in the block
	2898	* \param uiPosX X coordinate of the last significant coefficient
	2899	* \param uiPosY Y coordinate of the last significant coefficient
[1260]	2900	* \param component colour component ID
[313]	2901	* \returns cost of last significant coefficient
	2902	*/
	2903	/*
	2904	* \param uiWidth width of the transform unit (TU)
	2905	*/
	2906	__inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
[1029]	2907	const UInt uiPosY,
	2908	const ComponentID component ) const
[313]	2909	{
	2910	UInt uiCtxX = g_uiGroupIdx[uiPosX];
	2911	UInt uiCtxY = g_uiGroupIdx[uiPosY];
[1029]	2912
	2913	Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
	2914
[313]	2915	if( uiCtxX > 3 )
	2916	{
	2917	uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
	2918	}
	2919	if( uiCtxY > 3 )
	2920	{
	2921	uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
	2922	}
	2923	return xGetICost( uiCost );
	2924	}
	2925
	2926	__inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
	2927	UShort ui16CtxNumSig ) const
	2928	{
	2929	return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
	2930	}
	2931
	2932	/** Get the cost for a specific rate
	2933	* \param dRate rate of a bit
	2934	* \returns cost at the specific rate
	2935	*/
	2936	__inline Double TComTrQuant::xGetICost ( Double dRate ) const
	2937	{
	2938	return m_dLambda * dRate;
	2939	}
	2940
	2941	/** Get the cost of an equal probable bit
	2942	* \returns cost of equal probable bit
	2943	*/
	2944	__inline Double TComTrQuant::xGetIEPRate ( ) const
	2945	{
	2946	return 32768;
	2947	}
	2948
	2949	/** Context derivation process of coeff_abs_significant_flag
	2950	* \param uiSigCoeffGroupFlag significance map of L1
[1260]	2951	* \param uiCGPosX column of current scan position
	2952	* \param uiCGPosY row of current scan position
	2953	* \param widthInGroups width of the block
	2954	* \param heightInGroups height of the block
[313]	2955	* \returns ctxInc for current scan position
	2956	*/
[1029]	2957	UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag,
	2958	const UInt uiCGPosX,
	2959	const UInt uiCGPosY,
	2960	const UInt widthInGroups,
	2961	const UInt heightInGroups)
[313]	2962	{
[1029]	2963	UInt sigRight = 0;
	2964	UInt sigLower = 0;
[313]	2965
[1246]	2966	if (uiCGPosX < (widthInGroups - 1))
	2967	{
	2968	sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
	2969	}
	2970	if (uiCGPosY < (heightInGroups - 1))
	2971	{
	2972	sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
	2973	}
[313]	2974
[1029]	2975	return ((sigRight + sigLower) != 0) ? 1 : 0;
[313]	2976	}
[1029]	2977
	2978
[313]	2979	/** set quantized matrix coefficient for encode
[1260]	2980	* \param scalingList quantized matrix address
	2981	* \param format chroma format
[313]	2982	*/
[1029]	2983	Void TComTrQuant::setScalingList(TComScalingList *scalingList, const ChromaFormat format)
[313]	2984	{
[1029]	2985	const Int minimumQp = 0;
	2986	const Int maximumQp = SCALING_LIST_REM_NUM;
[313]	2987
[1029]	2988	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
[313]	2989	{
[1029]	2990	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
[313]	2991	{
[1029]	2992	for(Int qp = minimumQp; qp < maximumQp; qp++)
[313]	2993	{
[1029]	2994	xSetScalingListEnc(scalingList,list,size,qp,format);
[1235]	2995	xSetScalingListDec(*scalingList,list,size,qp,format);
[313]	2996	setErrScaleCoeff(list,size,qp);
	2997	}
	2998	}
	2999	}
	3000	}
	3001	/** set quantized matrix coefficient for decode
[1260]	3002	* \param scalingList quantized matrix address
	3003	* \param format chroma format
[313]	3004	*/
[1235]	3005	Void TComTrQuant::setScalingListDec(const TComScalingList &scalingList, const ChromaFormat format)
[313]	3006	{
[1029]	3007	const Int minimumQp = 0;
	3008	const Int maximumQp = SCALING_LIST_REM_NUM;
[313]	3009
[1029]	3010	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
[313]	3011	{
[1029]	3012	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
[313]	3013	{
[1029]	3014	for(Int qp = minimumQp; qp < maximumQp; qp++)
[313]	3015	{
[1029]	3016	xSetScalingListDec(scalingList,list,size,qp,format);
[313]	3017	}
	3018	}
	3019	}
	3020	}
	3021	/** set error scale coefficients
	3022	* \param list List ID
[1260]	3023	* \param size Size
	3024	* \param qp Quantization parameter
[313]	3025	*/
[1029]	3026	Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp)
[313]	3027	{
[1029]	3028	const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
	3029	const ChannelType channelType = ((list == 0) \|\| (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
[313]	3030
[1029]	3031	const Int iTransformShift = getTransformShift(channelType, uiLog2TrSize); // Represents scaling through forward transform
[313]	3032
	3033	UInt i,uiMaxNumCoeff = g_scalingListSize[size];
	3034	Int *piQuantcoeff;
	3035	Double *pdErrScale;
	3036	piQuantcoeff = getQuantCoeff(list, qp,size);
	3037	pdErrScale = getErrScaleCoeff(list, size, qp);
	3038
[1029]	3039	Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
	3040	dErrScale = dErrScalepow(2.0,(-2.0iTransformShift)); // Compensate for scaling through forward transform
	3041
[313]	3042	for(i=0;i<uiMaxNumCoeff;i++)
	3043	{
[1029]	3044	pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
[313]	3045	}
[1029]	3046
	3047	getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
[313]	3048	}
	3049
	3050	/** set quantized matrix coefficient for encode
[1260]	3051	* \param scalingList quantized matrix address
[313]	3052	* \param listId List index
	3053	* \param sizeId size index
[1260]	3054	* \param qp Quantization parameter
	3055	* \param format chroma format
[313]	3056	*/
[1029]	3057	Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
[313]	3058	{
[1029]	3059	UInt width = g_scalingListSizeX[sizeId];
[313]	3060	UInt height = g_scalingListSizeX[sizeId];
[1029]	3061	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
[313]	3062	Int *quantcoeff;
[1029]	3063	Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
	3064	quantcoeff = getQuantCoeff(listId, qp, sizeId);
[313]	3065
[1029]	3066	Int quantScales = g_quantScales[qp];
	3067
	3068	processScalingListEnc(coeff,
	3069	quantcoeff,
	3070	(quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
	3071	height, width, ratio,
	3072	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
	3073	scalingList->getScalingListDC(sizeId,listId));
[313]	3074	}
[1029]	3075
[313]	3076	/** set quantized matrix coefficient for decode
	3077	* \param scalingList quantaized matrix address
[1260]	3078	* \param listId List index
	3079	* \param sizeId size index
	3080	* \param qp Quantization parameter
	3081	* \param format chroma format
[313]	3082	*/
[1235]	3083	Void TComTrQuant::xSetScalingListDec(const TComScalingList &scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
[313]	3084	{
[1029]	3085	UInt width = g_scalingListSizeX[sizeId];
[313]	3086	UInt height = g_scalingListSizeX[sizeId];
[1029]	3087	UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
[313]	3088	Int *dequantcoeff;
[1235]	3089	const Int *coeff = scalingList.getScalingListAddress(sizeId,listId);
[313]	3090
	3091	dequantcoeff = getDequantCoeff(listId, qp, sizeId);
[1029]	3092
	3093	Int invQuantScale = g_invQuantScales[qp];
	3094
	3095	processScalingListDec(coeff,
	3096	dequantcoeff,
	3097	invQuantScale,
	3098	height, width, ratio,
	3099	min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
[1235]	3100	scalingList.getScalingListDC(sizeId,listId));
[313]	3101	}
	3102
	3103	/** set flat matrix value to quantized coefficient
	3104	*/
[1029]	3105	Void TComTrQuant::setFlatScalingList(const ChromaFormat format)
[313]	3106	{
[1029]	3107	const Int minimumQp = 0;
	3108	const Int maximumQp = SCALING_LIST_REM_NUM;
[313]	3109
[1029]	3110	for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
[313]	3111	{
[1029]	3112	for(UInt list = 0; list < SCALING_LIST_NUM; list++)
[313]	3113	{
[1029]	3114	for(Int qp = minimumQp; qp < maximumQp; qp++)
[313]	3115	{
[1029]	3116	xsetFlatScalingList(list,size,qp,format);
[313]	3117	setErrScaleCoeff(list,size,qp);
	3118	}
	3119	}
	3120	}
	3121	}
	3122
	3123	/** set flat matrix value to quantized coefficient
	3124	* \param list List ID
[1260]	3125	* \param size size index
	3126	* \param qp Quantization parameter
	3127	* \param format chroma format
[313]	3128	*/
[1029]	3129	Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp, const ChromaFormat format)
[313]	3130	{
	3131	UInt i,num = g_scalingListSize[size];
	3132	Int *quantcoeff;
	3133	Int *dequantcoeff;
	3134
[1029]	3135	Int quantScales = g_quantScales [qp];
	3136	Int invQuantScales = g_invQuantScales[qp] << 4;
	3137
[313]	3138	quantcoeff = getQuantCoeff(list, qp, size);
	3139	dequantcoeff = getDequantCoeff(list, qp, size);
	3140
	3141	for(i=0;i<num;i++)
[1029]	3142	{
[313]	3143	*quantcoeff++ = quantScales;
	3144	*dequantcoeff++ = invQuantScales;
	3145	}
	3146	}
	3147
	3148	/** set quantized matrix coefficient for encode
	3149	* \param coeff quantaized matrix address
	3150	* \param quantcoeff quantaized matrix address
	3151	* \param quantScales Q(QP%6)
	3152	* \param height height
	3153	* \param width width
	3154	* \param ratio ratio for upscale
	3155	* \param sizuNum matrix size
	3156	* \param dc dc parameter
	3157	*/
	3158	Void TComTrQuant::processScalingListEnc( Int coeff, Int quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
	3159	{
	3160	for(UInt j=0;j<height;j++)
	3161	{
	3162	for(UInt i=0;i<width;i++)
	3163	{
[1029]	3164	quantcoeff[jwidth + i] = quantScales / coeff[sizuNum (j / ratio) + i / ratio];
[313]	3165	}
	3166	}
[1029]	3167
[313]	3168	if(ratio > 1)
	3169	{
	3170	quantcoeff[0] = quantScales / dc;
	3171	}
	3172	}
[1029]	3173
[313]	3174	/** set quantized matrix coefficient for decode
	3175	* \param coeff quantaized matrix address
	3176	* \param dequantcoeff quantaized matrix address
	3177	* \param invQuantScales IQ(QP%6))
	3178	* \param height height
	3179	* \param width width
	3180	* \param ratio ratio for upscale
	3181	* \param sizuNum matrix size
	3182	* \param dc dc parameter
	3183	*/
[1235]	3184	Void TComTrQuant::processScalingListDec( const Int coeff, Int dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
[313]	3185	{
	3186	for(UInt j=0;j<height;j++)
	3187	{
	3188	for(UInt i=0;i<width;i++)
	3189	{
	3190	dequantcoeff[jwidth + i] = invQuantScales coeff[sizuNum * (j / ratio) + i / ratio];
	3191	}
	3192	}
[1029]	3193
[313]	3194	if(ratio > 1)
	3195	{
	3196	dequantcoeff[0] = invQuantScales * dc;
	3197	}
	3198	}
	3199
	3200	/** initialization process of scaling list array
	3201	*/
	3202	Void TComTrQuant::initScalingList()
	3203	{
	3204	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
	3205	{
[1029]	3206	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
[313]	3207	{
[1029]	3208	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
[313]	3209	{
[1029]	3210	m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
	3211	m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
[313]	3212	m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
[1029]	3213	} // listID loop
[313]	3214	}
	3215	}
	3216	}
[1029]	3217
[313]	3218	/** destroy quantization matrix array
	3219	*/
	3220	Void TComTrQuant::destroyScalingList()
	3221	{
	3222	for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
	3223	{
[1029]	3224	for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
[313]	3225	{
	3226	for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
	3227	{
[1246]	3228	if(m_quantCoef[sizeId][listId][qp])
	3229	{
	3230	delete [] m_quantCoef[sizeId][listId][qp];
	3231	}
	3232	if(m_dequantCoef[sizeId][listId][qp])
	3233	{
	3234	delete [] m_dequantCoef[sizeId][listId][qp];
	3235	}
	3236	if(m_errScale[sizeId][listId][qp])
	3237	{
	3238	delete [] m_errScale[sizeId][listId][qp];
	3239	}
[313]	3240	}
	3241	}
	3242	}
	3243	}
	3244
[1240]	3245	Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const TCoeff resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
[1029]	3246	{
	3247	TComDataCU *pcCU = rTu.getCU();
	3248	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	3249	const TComRectangle &rect = rTu.getRect(compID);
	3250	const UInt uiWidth = rect.width;
	3251	const UInt uiHeight = rect.height;
	3252	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
	3253	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
	3254	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
	3255	const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
	3256
	3257	assert( scalingListType < SCALING_LIST_NUM );
	3258	const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
	3259
	3260
	3261	/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
	3262	* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
	3263	* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
	3264	* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
	3265	*/
	3266
	3267	const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
	3268	// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
	3269
	3270	const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
	3271
	3272	TCoeff transformedCoefficient;
	3273
	3274	// transform-skip
	3275	if (iTransformShift >= 0)
	3276	{
	3277	transformedCoefficient = resiDiff << iTransformShift;
	3278	}
	3279	else // for very high bit depths
	3280	{
	3281	const Int iTrShiftNeg = -iTransformShift;
	3282	const Int offset = 1 << (iTrShiftNeg - 1);
	3283	transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
	3284	}
	3285
	3286	// quantization
	3287	const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
	3288
	3289	const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
	3290
	3291	const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
	3292
	3293	const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
	3294
	3295	const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
	3296	const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
	3297	pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
	3298	}
	3299
	3300
	3301	Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
	3302	{
	3303	TComDataCU *pcCU = rTu.getCU();
	3304	const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
	3305	const TComRectangle &rect = rTu.getRect(compID);
	3306	const UInt uiWidth = rect.width;
	3307	const UInt uiHeight = rect.height;
	3308	const Int QP_per = cQP.per;
	3309	const Int QP_rem = cQP.rem;
	3310	const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
	3311	const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
	3312	const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
	3313	const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
	3314
	3315	assert( scalingListType < SCALING_LIST_NUM );
	3316
	3317	const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
	3318
	3319	const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
	3320	const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
	3321
	3322	// Dequantisation
	3323
	3324	TCoeff dequantisedSample;
	3325
	3326	if(enableScalingLists)
	3327	{
	3328	const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
	3329	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
	3330
	3331	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
	3332	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
	3333
	3334	Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
	3335
	3336	if(rightShift > 0)
	3337	{
	3338	const Intermediate_Int iAdd = 1 << (rightShift - 1);
	3339	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
	3340	const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
	3341
	3342	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
	3343	}
	3344	else
	3345	{
	3346	const Int leftShift = -rightShift;
	3347	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
	3348	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
	3349
	3350	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
	3351	}
	3352	}
	3353	else
	3354	{
	3355	const Int scale = g_invQuantScales[QP_rem];
	3356	const Int scaleBits = (IQUANT_SHIFT + 1) ;
	3357
	3358	const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
	3359	const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
	3360	const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
	3361
	3362	if (rightShift > 0)
	3363	{
	3364	const Intermediate_Int iAdd = 1 << (rightShift - 1);
	3365	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
	3366	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
	3367
	3368	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
	3369	}
	3370	else
	3371	{
	3372	const Int leftShift = -rightShift;
	3373	const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
	3374	const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
	3375
	3376	dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
	3377	}
	3378	}
	3379
	3380	// Inverse transform-skip
	3381
	3382	if (iTransformShift >= 0)
	3383	{
	3384	const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
	3385	reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift);
	3386	}
	3387	else //for very high bit depths
	3388	{
	3389	const Int iTrShiftNeg = -iTransformShift;
	3390	reconSample = Pel(dequantisedSample << iTrShiftNeg);
	3391	}
	3392	}
	3393
	3394
	3395	Void TComTrQuant::crossComponentPrediction( TComTU & rTu,
	3396	const ComponentID compID,
	3397	const Pel * piResiL,
	3398	const Pel * piResiC,
	3399	Pel * piResiT,
	3400	const Int width,
	3401	const Int height,
	3402	const Int strideL,
	3403	const Int strideC,
	3404	const Int strideT,
	3405	const Bool reverse )
	3406	{
	3407	const Pel *pResiL = piResiL;
	3408	const Pel *pResiC = piResiC;
	3409	Pel *pResiT = piResiT;
	3410
	3411	TComDataCU *pCU = rTu.getCU();
[1239]	3412	const Int alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
[1029]	3413	const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
	3414
	3415	for( Int y = 0; y < height; y++ )
	3416	{
	3417	if (reverse)
	3418	{
[1239]	3419	// A constraint is to be added to the HEVC Standard to limit the size of pResiL and pResiC at this point.
	3420	// The likely form of the constraint is to either restrict the values to CoeffMin to CoeffMax,
	3421	// or to be representable in a bitDepthY+4 or bitDepthC+4 signed integer.
	3422	// The result of the constraint is that for 8/10/12bit profiles, the input values
	3423	// can be represented within a 16-bit Pel-type.
	3424	#if RExt__HIGH_BIT_DEPTH_SUPPORT
[1029]	3425	for( Int x = 0; x < width; x++ )
	3426	{
	3427	pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
	3428	}
[1239]	3429	#else
	3430	const Int minPel=std::numeric_limits<Pel>::min();
	3431	const Int maxPel=std::numeric_limits<Pel>::max();
	3432	for( Int x = 0; x < width; x++ )
	3433	{
	3434	pResiT[x] = Clip3<Int>(minPel, maxPel, pResiC[x] + (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3));
	3435	}
	3436	#endif
[1029]	3437	}
	3438	else
	3439	{
[1239]	3440	// Forward does not need clipping. Pel type should always be big enough.
[1029]	3441	for( Int x = 0; x < width; x++ )
	3442	{
[1239]	3443	pResiT[x] = pResiC[x] - (( alpha * rightShift<Int>(Int(pResiL[x]), diffBitDepth) ) >> 3);
[1029]	3444	}
	3445	}
	3446
	3447	pResiL += strideL;
	3448	pResiC += strideC;
	3449	pResiT += strideT;
	3450	}
	3451	}
	3452
[313]	3453	//! \}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: