Context navigation

source: 3DVCSoftware/trunk/source/Lib/TLibEncoder/TEncAdaptiveLoopFilter.cpp @ 2

Visit:

Last change on this file since 2 was 2, checked in by hhi, 14 years ago
inital import
Property svn:eol-style set to `native`
File size: 141.9 KB

Line
1
2
3	/** \file TEncAdaptiveLoopFilter.cpp
4	\brief estimation part of adaptive loop filter class
5	*/
6	#include "TEncAdaptiveLoopFilter.h"
7	#include <string.h>
8	#include <stdlib.h>
9	#include <stdio.h>
10	#include <math.h>
11
12	// ====================================================================================================================
13	// Constants
14	// ====================================================================================================================
15
16	#define ALF_NUM_OF_REDESIGN 3
17
18	// ====================================================================================================================
19	// Tables
20	// ====================================================================================================================
21
22	const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x9[81] =
23	{
24	0, 1, 2, 3, 4, 5, 6, 7, 8,
25	9, 10, 11, 12, 13, 14, 15, 16, 17,
26	18, 19, 20, 21, 22, 23, 24, 25, 26,
27	27, 28, 29, 30, 31, 32, 33, 34, 35,
28	36, 37, 38, 39, 40, 39, 38, 37, 36,
29	35, 34, 33, 32, 31, 30, 29, 28, 27,
30	26, 25, 24, 23, 22, 21, 20, 19, 18,
31	17, 16, 15, 14, 13, 12, 11, 10, 9,
32	8, 7, 6, 5, 4, 3, 2, 1, 0
33	};
34
35	const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray7x7[49] =
36	{
37	0, 1, 2, 3, 4, 5, 6,
38	7, 8, 9, 10, 11, 12, 13,
39	14, 15, 16, 17, 18, 19, 20,
40	21, 22, 23, 24, 23, 22, 21,
41	20, 19, 18, 17, 16, 15, 14,
42	13, 12, 11, 10, 9, 8, 7,
43	6, 5, 4, 3, 2, 1, 0,
44	};
45
46	const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray5x5[25] =
47	{
48	0, 1, 2, 3, 4,
49	5, 6, 7, 8, 9,
50	10, 11, 12, 11, 10,
51	9, 8, 7, 6, 5,
52	4, 3, 2, 1, 0,
53	};
54
55	#if TI_ALF_MAX_VSIZE_7
56	const Int TEncAdaptiveLoopFilter::m_aiSymmetricArray9x7[63] =
57	{
58	0, 1, 2, 3, 4, 5, 6, 7, 8,
59	9, 10, 11, 12, 13, 14, 15, 16, 17,
60	18, 19, 20, 21, 22, 23, 24, 25, 26,
61	27, 28, 29, 30, 31, 30, 29, 28, 27,
62	26, 25, 24, 23, 22, 21, 20, 19, 18,
63	17, 16, 15, 14, 13, 12, 11, 10, 9,
64	8, 7, 6, 5, 4, 3, 2, 1, 0
65	};
66	#endif
67
68	#if MQT_ALF_NPASS
69	#if TI_ALF_MAX_VSIZE_7
70	Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[21] =
71	#else
72	Int TEncAdaptiveLoopFilter::m_aiTapPos9x9_In9x9Sym[22] =
73	#endif
74	{
75	#if TI_ALF_MAX_VSIZE_7
76	0, 1, 2,
77	3, 4, 5, 6, 7,
78	8, 9, 10, 11, 12, 13, 14,
79	15, 16, 17, 18, 19, 20
80	#else
81	0,
82	1, 2, 3,
83	4, 5, 6, 7, 8,
84	9, 10, 11, 12, 13, 14, 15,
85	16, 17, 18, 19, 20, 21
86	#endif
87	};
88
89	Int TEncAdaptiveLoopFilter::m_aiTapPos7x7_In9x9Sym[14] =
90	{
91	#if TI_ALF_MAX_VSIZE_7
92	1,
93	4, 5, 6,
94	9, 10, 11, 12, 13,
95	16, 17, 18, 19, 20
96
97	#else
98
99	2,
100	5, 6, 7,
101	10, 11, 12, 13, 14,
102	17, 18, 19, 20, 21
103	#endif
104	};
105
106	Int TEncAdaptiveLoopFilter::m_aiTapPos5x5_In9x9Sym[8] =
107	{
108
109	#if TI_ALF_MAX_VSIZE_7
110	5,
111	10, 11, 12,
112	17, 18, 19, 20
113	#else
114	6,
115	11, 12, 13,
116	18, 19, 20, 21
117
118	#endif
119
120	};
121
122	Int* TEncAdaptiveLoopFilter::m_iTapPosTabIn9x9Sym[NO_TEST_FILT] =
123	{
124	m_aiTapPos9x9_In9x9Sym, m_aiTapPos7x7_In9x9Sym, m_aiTapPos5x5_In9x9Sym
125	};
126	#endif
127
128	// ====================================================================================================================
129	// Constructor / destructor
130	// ====================================================================================================================
131
132	TEncAdaptiveLoopFilter::TEncAdaptiveLoopFilter()
133	{
134	m_ppdAlfCorr = NULL;
135	m_pdDoubleAlfCoeff = NULL;
136	m_pcPic = NULL;
137	m_pcEntropyCoder = NULL;
138	m_pcBestAlfParam = NULL;
139	m_pcTempAlfParam = NULL;
140	m_pcPicYuvBest = NULL;
141	m_pcPicYuvTmp = NULL;
142	#if MTK_NONCROSS_INLOOP_FILTER
143	m_pcSliceYuvTmp = NULL;
144	#endif
145	#if MQT_BA_RA && MQT_ALF_NPASS
146	m_aiFilterCoeffSaved = NULL;
147	#endif
148	}
149
150	// ====================================================================================================================
151	// Public member functions
152	// ====================================================================================================================
153
154	#if MQT_BA_RA && MQT_ALF_NPASS
155	Void TEncAdaptiveLoopFilter::createAlfGlobalBuffers(Int iALFEncodePassReduction)
156	{
157	if(iALFEncodePassReduction)
158	{
159	for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
160	{
161	m_aiFilterCoeffSavedMethods[i] = new Int**[9];
162	for(Int j=0; j< 9; j++)
163	{
164	m_aiFilterCoeffSavedMethods[i][j] = new Int*[NO_VAR_BINS];
165	for(Int k=0; k< NO_VAR_BINS; k++)
166	{
167	m_aiFilterCoeffSavedMethods[i][j][k] = new Int[MAX_SQR_FILT_LENGTH];
168	}
169	}
170	}
171
172	}
173	}
174
175	Void TEncAdaptiveLoopFilter::destroyAlfGlobalBuffers()
176	{
177	if(m_iALFEncodePassReduction)
178	{
179	for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
180	{
181	for(Int j=0; j< 9; j++)
182	{
183	for(Int k=0; k< NO_VAR_BINS; k++)
184	{
185	delete[] m_aiFilterCoeffSavedMethods[i][j][k];
186	}
187	delete[] m_aiFilterCoeffSavedMethods[i][j];
188	}
189	delete[] m_aiFilterCoeffSavedMethods[i];
190	}
191
192	}
193
194	}
195	#endif
196
197	/**
198	\param pcPic picture (TComPic) pointer
199	\param pcEntropyCoder entropy coder class
200	*/
201	Void TEncAdaptiveLoopFilter::startALFEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder )
202	{
203	m_pcPic = pcPic;
204	m_pcEntropyCoder = pcEntropyCoder;
205
206	m_eSliceType = pcPic->getSlice(0)->getSliceType();
207	m_iPicNalReferenceIdc = (pcPic->getSlice(0)->isReferenced() ? 1 :0);
208
209	m_uiNumSCUInCU = m_pcPic->getNumPartInCU();
210
211	xInitParam();
212	xCreateTmpAlfCtrlFlags();
213
214	Int iWidth = pcPic->getPicYuvOrg()->getWidth();
215	Int iHeight = pcPic->getPicYuvOrg()->getHeight();
216
217	m_pcPicYuvTmp = new TComPicYuv();
218	m_pcPicYuvTmp->createLuma(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
219	m_pcPicYuvBest = pcPic->getPicYuvPred();
220
221	m_pcBestAlfParam = new ALFParam;
222	m_pcTempAlfParam = new ALFParam;
223	allocALFParam(m_pcBestAlfParam);
224	allocALFParam(m_pcTempAlfParam);
225	m_im_width = iWidth;
226	m_im_height = iHeight;
227
228	// init qc_filter
229	initMatrix4D_double(&m_EGlobalSym, NO_TEST_FILT, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
230	initMatrix3D_double(&m_yGlobalSym, NO_TEST_FILT, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
231	initMatrix_int(&m_filterCoeffSymQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
232
233	m_pixAcc = (double *) calloc(NO_VAR_BINS, sizeof(double));
234	#if !MQT_BA_RA
235	get_mem2Dpel(&m_varImg, m_im_height, m_im_width);
236	#endif
237	get_mem2Dpel(&m_maskImg, m_im_height, m_im_width);
238
239	initMatrix_double(&m_E_temp, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);//
240	m_y_temp = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));//
241	initMatrix3D_double(&m_E_merged, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);//
242	initMatrix_double(&m_y_merged, NO_VAR_BINS, MAX_SQR_FILT_LENGTH); //
243	m_pixAcc_merged = (double *) calloc(NO_VAR_BINS, sizeof(double));//
244
245	m_filterCoeffQuantMod = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));//
246	m_filterCoeff = (double *) calloc(MAX_SQR_FILT_LENGTH, sizeof(double));//
247	m_filterCoeffQuant = (int *) calloc(MAX_SQR_FILT_LENGTH, sizeof(int));//
248	initMatrix_int(&m_diffFilterCoeffQuant, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);//
249	initMatrix_int(&m_FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);//
250
251	m_tempALFp = new ALFParam;
252	allocALFParam(m_tempALFp);
253	m_pcDummyEntropyCoder = m_pcEntropyCoder;
254
255	#if MTK_NONCROSS_INLOOP_FILTER
256	if( m_bUseNonCrossALF )
257	{
258	m_pcSliceYuvTmp = new TComPicYuv();
259	m_pcSliceYuvTmp->create(iWidth, iHeight, g_uiMaxCUWidth, g_uiMaxCUHeight, g_uiMaxCUDepth);
260	}
261	#endif
262
263
264	}
265
266	Void TEncAdaptiveLoopFilter::endALFEnc()
267	{
268	xUninitParam();
269	xDestroyTmpAlfCtrlFlags();
270
271	m_pcPicYuvTmp->destroyLuma();
272	delete m_pcPicYuvTmp;
273	m_pcPicYuvTmp = NULL;
274	m_pcPic = NULL;
275	m_pcEntropyCoder = NULL;
276
277	freeALFParam(m_pcBestAlfParam);
278	freeALFParam(m_pcTempAlfParam);
279	delete m_pcBestAlfParam;
280	delete m_pcTempAlfParam;
281	// delete qc filters
282	destroyMatrix4D_double(m_EGlobalSym, NO_TEST_FILT, NO_VAR_BINS);
283	destroyMatrix3D_double(m_yGlobalSym, NO_TEST_FILT);
284	destroyMatrix_int(m_filterCoeffSymQuant);
285
286	free(m_pixAcc);
287	#if !MQT_BA_RA
288	free_mem2Dpel(m_varImg);
289	#endif
290	free_mem2Dpel(m_maskImg);
291
292	destroyMatrix3D_double(m_E_merged, NO_VAR_BINS);
293	destroyMatrix_double(m_y_merged);
294	destroyMatrix_double(m_E_temp);
295	free(m_pixAcc_merged);
296
297	free(m_filterCoeffQuantMod);
298	free(m_y_temp);
299
300	free(m_filterCoeff);
301	free(m_filterCoeffQuant);
302	destroyMatrix_int(m_diffFilterCoeffQuant);
303	destroyMatrix_int(m_FilterCoeffQuantTemp);
304
305	freeALFParam(m_tempALFp);
306	delete m_tempALFp;
307
308	#if MTK_NONCROSS_INLOOP_FILTER
309
310	if(m_bUseNonCrossALF)
311	{
312	m_pcSliceYuvTmp->destroy();
313	delete m_pcSliceYuvTmp;
314	m_pcSliceYuvTmp = NULL;
315	}
316	#endif
317
318	}
319
320	/**
321	\param pcAlfParam ALF parameter
322	\param dLambda lambda value for RD cost computation
323	\retval ruiDist distortion
324	\retval ruiBits required bits
325	\retval ruiMaxAlfCtrlDepth optimal partition depth
326	*/
327	Void TEncAdaptiveLoopFilter::ALFProcess( ALFParam* pcAlfParam, Double dLambda, UInt64& ruiDist, UInt64& ruiBits, UInt& ruiMaxAlfCtrlDepth )
328	{
329	Int tap, num_coef;
330
331	// set global variables
332	tap = ALF_MAX_NUM_TAP;
333	#if TI_ALF_MAX_VSIZE_7
334	Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap);
335	num_coef = (tap * tapV + 1) >> 1;
336	#else
337	num_coef = (tap*tap+1)>>1;
338	#endif
339	num_coef = num_coef + 1; // DC offset
340
341	// set lambda
342	m_dLambdaLuma = dLambda;
343	m_dLambdaChroma = dLambda;
344
345	TComPicYuv* pcPicOrg = m_pcPic->getPicYuvOrg();
346
347	// extend image for filtering
348	TComPicYuv* pcPicYuvRec = m_pcPic->getPicYuvRec();
349	TComPicYuv* pcPicYuvExtRec = m_pcTempPicYuv;
350
351	pcPicYuvRec->copyToPic(pcPicYuvExtRec);
352	#if MTK_NONCROSS_INLOOP_FILTER
353	if(!m_bUseNonCrossALF)
354	{
355	#endif
356	pcPicYuvExtRec->setBorderExtension( false );
357	pcPicYuvExtRec->extendPicBorder ();
358	#if MTK_NONCROSS_INLOOP_FILTER
359	}
360	#endif
361
362	// set min cost
363	UInt64 uiMinRate = MAX_INT;
364	UInt64 uiMinDist = MAX_INT;
365	Double dMinCost = MAX_DOUBLE;
366
367	UInt64 uiOrigRate;
368	UInt64 uiOrigDist;
369	Double dOrigCost;
370
371	// calc original cost
372	xCalcRDCost( pcPicOrg, pcPicYuvRec, NULL, uiOrigRate, uiOrigDist, dOrigCost );
373	m_pcBestAlfParam->alf_flag = 0;
374	m_pcBestAlfParam->cu_control_flag = 0;
375
376	// initialize temp_alfps
377	m_pcTempAlfParam->alf_flag = 1;
378	m_pcTempAlfParam->tap = tap;
379	#if TI_ALF_MAX_VSIZE_7
380	m_pcTempAlfParam->tapV = tapV;
381	#endif
382	m_pcTempAlfParam->num_coeff = num_coef;
383	m_pcTempAlfParam->chroma_idc = 0;
384	m_pcTempAlfParam->cu_control_flag = 0;
385
386	#if MQT_ALF_NPASS
387	setALFEncodingParam(m_pcPic);
388	#endif
389
390	// adaptive in-loop wiener filtering
391	xEncALFLuma_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
392
393	// cu-based filter on/off control
394	xCUAdaptiveControl_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
395
396	// adaptive tap-length
397	xFilterTapDecision_qc( pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, uiMinRate, uiMinDist, dMinCost );
398
399	// compute RD cost
400	xCalcRDCost( pcPicOrg, pcPicYuvRec, m_pcBestAlfParam, uiMinRate, uiMinDist, dMinCost );
401
402	// compare RD cost to non-ALF case
403	if( dMinCost < dOrigCost )
404	{
405	m_pcBestAlfParam->alf_flag = 1;
406
407	ruiBits = uiMinRate;
408	ruiDist = uiMinDist;
409	}
410	else
411	{
412	m_pcBestAlfParam->alf_flag = 0;
413	m_pcBestAlfParam->cu_control_flag = 0;
414
415	uiMinRate = uiOrigRate;
416	uiMinDist = uiOrigDist;
417	dMinCost = dMinCost;
418
419	m_pcEntropyCoder->setAlfCtrl(false);
420	pcPicYuvExtRec->copyToPicLuma(pcPicYuvRec);
421
422	ruiBits = uiOrigRate;
423	ruiDist = uiOrigDist;
424	}
425
426	// if ALF works
427	if( m_pcBestAlfParam->alf_flag )
428	{
429	// predict ALF coefficients
430	predictALFCoeff( m_pcBestAlfParam );
431
432	// do additional ALF process for chroma
433	xEncALFChroma( uiMinRate, pcPicOrg, pcPicYuvExtRec, pcPicYuvRec, ruiDist, ruiBits );
434	}
435
436	// copy to best storage
437	copyALFParam(pcAlfParam, m_pcBestAlfParam);
438
439	// store best depth
440	ruiMaxAlfCtrlDepth = m_pcEntropyCoder->getMaxAlfCtrlDepth();
441	}
442
443	// ====================================================================================================================
444	// Protected member functions
445	// ====================================================================================================================
446
447	Void TEncAdaptiveLoopFilter::xEncALFChroma( UInt64 uiLumaRate, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, UInt64& ruiBits )
448	{
449	// restriction for non-referenced B-slice
450	if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0)
451	{
452	return;
453	}
454
455	Int tap, num_coef;
456
457	// set global variables
458	tap = ALF_MAX_NUM_TAP_C;
459	num_coef = (tap*tap+1)>>1;
460	num_coef = num_coef + 1; // DC offset
461
462	// set min cost
463	UInt64 uiMinRate = uiLumaRate;
464	UInt64 uiMinDist = MAX_INT;
465	Double dMinCost = MAX_DOUBLE;
466
467	// calc original cost
468	copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
469	xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiMinRate, uiMinDist, dMinCost);
470
471	// initialize temp_alfps
472	m_pcTempAlfParam->chroma_idc = 3;
473	m_pcTempAlfParam->tap_chroma = tap;
474	m_pcTempAlfParam->num_coeff_chroma = num_coef;
475
476	// Adaptive in-loop wiener filtering for chroma
477	xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);
478
479	// filter on/off decision for chroma
480	Int iCWidth = (pcPicOrg->getWidth()>>1);
481	Int iCHeight = (pcPicOrg->getHeight()>>1);
482	Int iCStride = pcPicOrg->getCStride();
483	UInt64 uiFiltDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicRest->getCbAddr(), iCWidth, iCHeight, iCStride);
484	UInt64 uiFiltDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicRest->getCrAddr(), iCWidth, iCHeight, iCStride);
485	UInt64 uiOrgDistCb = xCalcSSD(pcPicOrg->getCbAddr(), pcPicDec->getCbAddr(), iCWidth, iCHeight, iCStride);
486	UInt64 uiOrgDistCr = xCalcSSD(pcPicOrg->getCrAddr(), pcPicDec->getCrAddr(), iCWidth, iCHeight, iCStride);
487
488	m_pcTempAlfParam->chroma_idc = 0;
489	if(uiOrgDistCb > uiFiltDistCb)
490	m_pcTempAlfParam->chroma_idc += 2;
491	if(uiOrgDistCr > uiFiltDistCr )
492	m_pcTempAlfParam->chroma_idc += 1;
493
494	if(m_pcTempAlfParam->chroma_idc)
495	{
496	if(m_pcTempAlfParam->chroma_idc!=3)
497	{
498	// chroma filter re-design
499	xFilteringFrameChroma(pcPicOrg, pcPicDec, pcPicRest);
500	}
501
502	UInt64 uiRate, uiDist;
503	Double dCost;
504	xCalcRDCostChroma(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost);
505
506	if( dCost < dMinCost )
507	{
508	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
509	predictALFCoeffChroma(m_pcBestAlfParam);
510
511	ruiBits += uiRate;
512	ruiDist += uiDist;
513	}
514	else
515	{
516	m_pcBestAlfParam->chroma_idc = 0;
517
518	if((m_pcTempAlfParam->chroma_idc>>1)&0x01)
519	pcPicDec->copyToPicCb(pcPicRest);
520	if(m_pcTempAlfParam->chroma_idc&0x01)
521	pcPicDec->copyToPicCr(pcPicRest);
522
523	ruiBits += uiMinRate;
524	ruiDist += uiMinDist;
525	}
526	}
527	else
528	{
529	m_pcBestAlfParam->chroma_idc = 0;
530
531	ruiBits += uiMinRate;
532	ruiDist += uiMinDist;
533
534	pcPicDec->copyToPicCb(pcPicRest);
535	pcPicDec->copyToPicCr(pcPicRest);
536	}
537	}
538
539	// ====================================================================================================================
540	// Private member functions
541	// ====================================================================================================================
542
543	Void TEncAdaptiveLoopFilter::xInitParam()
544	{
545	Int i, j;
546
547	if (m_ppdAlfCorr != NULL)
548	{
549	for (i = 0; i < ALF_MAX_NUM_COEF; i++)
550	{
551	for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
552	{
553	m_ppdAlfCorr[i][j] = 0;
554	}
555	}
556	}
557	else
558	{
559	m_ppdAlfCorr = new Double*[ALF_MAX_NUM_COEF];
560	for (i = 0; i < ALF_MAX_NUM_COEF; i++)
561	{
562	m_ppdAlfCorr[i] = new Double[ALF_MAX_NUM_COEF+1];
563	for (j = 0; j < ALF_MAX_NUM_COEF+1; j++)
564	{
565	m_ppdAlfCorr[i][j] = 0;
566	}
567	}
568	}
569
570	if (m_pdDoubleAlfCoeff != NULL)
571	{
572	for (i = 0; i < ALF_MAX_NUM_COEF; i++)
573	{
574	m_pdDoubleAlfCoeff[i] = 0;
575	}
576	}
577	else
578	{
579	m_pdDoubleAlfCoeff = new Double[ALF_MAX_NUM_COEF];
580	for (i = 0; i < ALF_MAX_NUM_COEF; i++)
581	{
582	m_pdDoubleAlfCoeff[i] = 0;
583	}
584	}
585	}
586
587	Void TEncAdaptiveLoopFilter::xUninitParam()
588	{
589	Int i;
590
591	if (m_ppdAlfCorr != NULL)
592	{
593	for (i = 0; i < ALF_MAX_NUM_COEF; i++)
594	{
595	delete[] m_ppdAlfCorr[i];
596	m_ppdAlfCorr[i] = NULL;
597	}
598	delete[] m_ppdAlfCorr;
599	m_ppdAlfCorr = NULL;
600	}
601
602	if (m_pdDoubleAlfCoeff != NULL)
603	{
604	delete[] m_pdDoubleAlfCoeff;
605	m_pdDoubleAlfCoeff = NULL;
606	}
607	}
608
609	Void TEncAdaptiveLoopFilter::xCreateTmpAlfCtrlFlags()
610	{
611	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
612	{
613	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
614	pcCU->createTmpAlfCtrlFlag();
615	}
616	}
617
618	Void TEncAdaptiveLoopFilter::xDestroyTmpAlfCtrlFlags()
619	{
620	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
621	{
622	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
623	pcCU->destroyTmpAlfCtrlFlag();
624	}
625	}
626
627	Void TEncAdaptiveLoopFilter::xCopyTmpAlfCtrlFlagsTo()
628	{
629	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
630	{
631	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
632	pcCU->copyAlfCtrlFlagFromTmp();
633	}
634	}
635
636	Void TEncAdaptiveLoopFilter::xCopyTmpAlfCtrlFlagsFrom()
637	{
638	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
639	{
640	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
641	pcCU->copyAlfCtrlFlagToTmp();
642	}
643	}
644
645	Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlags()
646	{
647	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
648	{
649	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
650	xEncodeCUAlfCtrlFlag(pcCU, 0, 0);
651	}
652	}
653
654	Void TEncAdaptiveLoopFilter::xEncodeCUAlfCtrlFlag(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth)
655	{
656	Bool bBoundary = false;
657	UInt uiLPelX = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
658	UInt uiRPelX = uiLPelX + (g_uiMaxCUWidth>>uiDepth) - 1;
659	UInt uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
660	UInt uiBPelY = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
661
662	#if AD_HOCS_SLICES
663	if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) \|\| ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
664	#else
665	if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) \|\| ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
666	#endif
667	{
668	bBoundary = true;
669	}
670
671	if( ( ( uiDepth < pcCU->getDepth( uiAbsPartIdx ) ) && ( uiDepth < (g_uiMaxCUDepth-g_uiAddCUDepth) ) ) \|\| bBoundary )
672	{
673	UInt uiQNumParts = ( m_pcPic->getNumPartInCU() >> (uiDepth<<1) )>>2;
674	for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++, uiAbsPartIdx+=uiQNumParts )
675	{
676	uiLPelX = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
677	uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
678
679	#if AD_HOCS_SLICES
680	if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
681	#else
682	if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
683	#endif
684	xEncodeCUAlfCtrlFlag(pcCU, uiAbsPartIdx, uiDepth+1);
685	}
686	return;
687	}
688
689	m_pcEntropyCoder->encodeAlfCtrlFlag(pcCU, uiAbsPartIdx);
690	}
691	#if MTK_NONCROSS_INLOOP_FILTER
692	Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Int ypos, Int xpos, Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride, Bool bSymmCopyBlockMatrix)
693	#else
694	Void TEncAdaptiveLoopFilter::xCalcCorrelationFunc(Pel* pOrg, Pel* pCmp, Int iTap, Int iWidth, Int iHeight, Int iOrgStride, Int iCmpStride)
695	#endif
696	{
697	//Patch should be extended before this point................
698	//ext_offset = tap>>1;
699
700	#if TI_ALF_MAX_VSIZE_7
701	Int iTapV = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
702	Int N = (iTap * iTapV + 1) >> 1;
703	Int offsetV = iTapV >> 1;
704	#else
705	Int N = (iTap*iTap+1)>>1;
706	#endif
707	Int offset = iTap>>1;
708
709	const Int* pFiltPos;
710
711	switch(iTap)
712	{
713	case 5:
714	pFiltPos = m_aiSymmetricArray5x5;
715	break;
716	case 7:
717	pFiltPos = m_aiSymmetricArray7x7;
718	break;
719	case 9:
720	#if TI_ALF_MAX_VSIZE_7
721	pFiltPos = m_aiSymmetricArray9x7;
722	#else
723	pFiltPos = m_aiSymmetricArray9x9;
724	#endif
725	break;
726	default:
727	#if TI_ALF_MAX_VSIZE_7
728	pFiltPos = m_aiSymmetricArray9x7;
729	#else
730	pFiltPos = m_aiSymmetricArray9x9;
731	#endif
732	assert(0);
733	break;
734	}
735
736	Pel* pTerm = new Pel[N];
737
738	Int i, j;
739	#if MTK_NONCROSS_INLOOP_FILTER
740	for (Int y = ypos; y < ypos + iHeight; y++)
741	{
742	for (Int x = xpos; x < xpos + iWidth; x++)
743	{
744	#else
745	for (Int y = 0; y < iHeight; y++)
746	{
747	for (Int x = 0; x < iWidth; x++)
748	{
749	#endif
750	i = 0;
751	::memset(pTerm, 0, sizeof(Pel)*N);
752	#if TI_ALF_MAX_VSIZE_7
753	for (Int yy = y - offsetV; yy <= y + offsetV; yy++)
754	#else
755	for(Int yy=y-offset; yy<=y+offset; yy++)
756	#endif
757	{
758	for(Int xx=x-offset; xx<=x+offset; xx++)
759	{
760	pTerm[pFiltPos[i]] += pCmp[xx + yy*iCmpStride];
761	i++;
762	}
763	}
764
765	for(j=0; j<N; j++)
766	{
767	m_ppdAlfCorr[j][j] += pTerm[j]*pTerm[j];
768	for(i=j+1; i<N; i++)
769	m_ppdAlfCorr[j][i] += pTerm[j]*pTerm[i];
770
771	// DC offset
772	m_ppdAlfCorr[j][N] += pTerm[j];
773	m_ppdAlfCorr[j][N+1] += pOrg[x+yiOrgStride]pTerm[j];
774	}
775	// DC offset
776	for(i=0; i<N; i++)
777	m_ppdAlfCorr[N][i] += pTerm[i];
778	m_ppdAlfCorr[N][N] += 1;
779	m_ppdAlfCorr[N][N+1] += pOrg[x+y*iOrgStride];
780	}
781	}
782	#if MTK_NONCROSS_INLOOP_FILTER
783	if(bSymmCopyBlockMatrix)
784	{
785	#endif
786	for(j=0; j<N-1; j++)
787	{
788	for(i=j+1; i<N; i++)
789	m_ppdAlfCorr[i][j] = m_ppdAlfCorr[j][i];
790	}
791	#if MTK_NONCROSS_INLOOP_FILTER
792	}
793	#endif
794
795	delete[] pTerm;
796	pTerm = NULL;
797	}
798
799	#if IBDI_DISTORTION
800	UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride )
801	{
802	UInt64 uiSSD = 0;
803	Int x, y;
804
805	Int iShift = g_uiBitIncrement;
806	Int iOffset = (g_uiBitIncrement>0)? (1<<(g_uiBitIncrement-1)):0;
807	Int iTemp;
808
809	for( y = 0; y < iHeight; y++ )
810	{
811	for( x = 0; x < iWidth; x++ )
812	{
813	iTemp = ((pOrg[x]+iOffset)>>iShift) - ((pCmp[x]+iOffset)>>iShift); uiSSD += iTemp * iTemp;
814	}
815	pOrg += iStride;
816	pCmp += iStride;
817	}
818
819	return uiSSD;;
820	}
821	#else
822	UInt64 TEncAdaptiveLoopFilter::xCalcSSD(Pel* pOrg, Pel* pCmp, Int iWidth, Int iHeight, Int iStride )
823	{
824	UInt64 uiSSD = 0;
825	Int x, y;
826
827	UInt uiShift = g_uiBitIncrement<<1;
828	Int iTemp;
829
830	for( y = 0; y < iHeight; y++ )
831	{
832	for( x = 0; x < iWidth; x++ )
833	{
834	iTemp = pOrg[x] - pCmp[x]; uiSSD += ( iTemp * iTemp ) >> uiShift;
835	}
836	pOrg += iStride;
837	pCmp += iStride;
838	}
839
840	return uiSSD;;
841	}
842	#endif
843
844	Int TEncAdaptiveLoopFilter::xGauss(Double **a, Int N)
845	{
846	Int i, j, k;
847	Double t;
848
849	for(k=0; k<N; k++)
850	{
851	if (a[k][k] <0.000001)
852	return 1;
853	}
854
855	for(k=0; k<N-1; k++)
856	{
857	for(i=k+1;i<N; i++)
858	{
859	t=a[i][k]/a[k][k];
860	for(j=k+1; j<=N; j++)
861	{
862	a[i][j] -= t * a[k][j];
863	if(i==j && fabs(a[i][j])<0.000001) return 1;
864	}
865	}
866	}
867	for(i=N-1; i>=0; i--)
868	{
869	t = a[i][N];
870	for(j=i+1; j<N; j++)
871	t -= a[i][j] * a[j][N];
872	a[i][N] = t / a[i][i];
873	}
874	return 0;
875	}
876
877	Void TEncAdaptiveLoopFilter::xFilterCoefQuickSort( Double coef_data, Int coef_num, Int upper, Int lower )
878	{
879	Double mid, tmp_data;
880	Int i, j, tmp_num;
881
882	i = upper;
883	j = lower;
884	mid = coef_data[(lower+upper)>>1];
885	do
886	{
887	while( coef_data[i] < mid ) i++;
888	while( mid < coef_data[j] ) j--;
889	if( i <= j )
890	{
891	tmp_data = coef_data[i];
892	tmp_num = coef_num[i];
893	coef_data[i] = coef_data[j];
894	coef_num[i] = coef_num[j];
895	coef_data[j] = tmp_data;
896	coef_num[j] = tmp_num;
897	i++;
898	j--;
899	}
900	} while( i <= j );
901	if ( upper < j ) xFilterCoefQuickSort(coef_data, coef_num, upper, j);
902	if ( i < lower ) xFilterCoefQuickSort(coef_data, coef_num, i, lower);
903	}
904
905	Void TEncAdaptiveLoopFilter::xQuantFilterCoef(Double* h, Int* qh, Int tap, int bit_depth)
906	{
907	Int i, N;
908	Int max_value, min_value;
909	Double dbl_total_gain;
910	Int total_gain, q_total_gain;
911	Int upper, lower;
912	Double *dh;
913	Int *nc;
914	const Int *pFiltMag;
915
916	switch(tap)
917	{
918	case 5:
919	pFiltMag = m_aiSymmetricMag5x5;
920	break;
921	case 7:
922	pFiltMag = m_aiSymmetricMag7x7;
923	break;
924	case 9:
925	#if TI_ALF_MAX_VSIZE_7
926	pFiltMag = m_aiSymmetricMag9x7;
927	#else
928	pFiltMag = m_aiSymmetricMag9x9;
929	#endif
930	break;
931	default:
932	#if TI_ALF_MAX_VSIZE_7
933	pFiltMag = m_aiSymmetricMag9x7;
934	#else
935	pFiltMag = m_aiSymmetricMag9x9;
936	#endif
937	assert(0);
938	break;
939	}
940
941	#if TI_ALF_MAX_VSIZE_7
942	Int tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(tap);
943	N = (tap * tapV + 1) >> 1;
944	#else
945	N = (tap*tap+1)>>1;
946	#endif
947
948	dh = new Double[N];
949	nc = new Int[N];
950
951	max_value = (1<<(1+ALF_NUM_BIT_SHIFT))-1;
952	min_value = 0-(1<<(1+ALF_NUM_BIT_SHIFT));
953
954	dbl_total_gain=0.0;
955	q_total_gain=0;
956	for(i=0; i<N; i++)
957	{
958	if(h[i]>=0.0)
959	qh[i] = (Int)( h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5);
960	else
961	qh[i] = -(Int)(-h[i]*(1<<ALF_NUM_BIT_SHIFT)+0.5);
962
963	dh[i] = (Double)qh[i]/(Double)(1<<ALF_NUM_BIT_SHIFT) - h[i];
964	dh[i]*=pFiltMag[i];
965	dbl_total_gain += h[i]*pFiltMag[i];
966	q_total_gain += qh[i]*pFiltMag[i];
967	nc[i] = i;
968	}
969
970	// modification of quantized filter coefficients
971	total_gain = (Int)(dbl_total_gain*(1<<ALF_NUM_BIT_SHIFT)+0.5);
972
973	if( q_total_gain != total_gain )
974	{
975	xFilterCoefQuickSort(dh, nc, 0, N-1);
976	if( q_total_gain > total_gain )
977	{
978	upper = N-1;
979	while( q_total_gain > total_gain+1 )
980	{
981	i = nc[upper%N];
982	qh[i]--;
983	q_total_gain -= pFiltMag[i];
984	upper--;
985	}
986	if( q_total_gain == total_gain+1 )
987	{
988	if(dh[N-1]>0)
989	qh[N-1]--;
990	else
991	{
992	i=nc[upper%N];
993	qh[i]--;
994	qh[N-1]++;
995	}
996	}
997	}
998	else if( q_total_gain < total_gain )
999	{
1000	lower = 0;
1001	while( q_total_gain < total_gain-1 )
1002	{
1003	i=nc[lower%N];
1004	qh[i]++;
1005	q_total_gain += pFiltMag[i];
1006	lower++;
1007	}
1008	if( q_total_gain == total_gain-1 )
1009	{
1010	if(dh[N-1]<0)
1011	qh[N-1]++;
1012	else
1013	{
1014	i=nc[lower%N];
1015	qh[i]++;
1016	qh[N-1]--;
1017	}
1018	}
1019	}
1020	}
1021
1022	// set of filter coefficients
1023	for(i=0; i<N; i++)
1024	{
1025	qh[i] = Max(min_value,Min(max_value, qh[i]));
1026	}
1027
1028	// DC offset
1029	// max_value = Min( (1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma)))-1, (1<<14)-1);
1030	// min_value = Max( -(1<<(3+Max(img_bitdepth_luma,img_bitdepth_chroma))), -(1<<14) );
1031	max_value = Min( (1<<(3+g_uiBitDepth + g_uiBitIncrement))-1, (1<<14)-1);
1032	min_value = Max( -(1<<(3+g_uiBitDepth + g_uiBitIncrement)), -(1<<14) );
1033
1034	qh[N] = (h[N]>=0.0)? (Int)( h[N](1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5) : -(Int)(-h[N](1<<(ALF_NUM_BIT_SHIFT-bit_depth+8)) + 0.5);
1035	qh[N] = Max(min_value,Min(max_value, qh[N]));
1036
1037	delete[] dh;
1038	dh = NULL;
1039
1040	delete[] nc;
1041	nc = NULL;
1042	}
1043
1044	Void TEncAdaptiveLoopFilter::xClearFilterCoefInt(Int* qh, Int N)
1045	{
1046	// clear
1047	memset( qh, 0, sizeof( Int ) * N );
1048
1049	// center pos
1050	qh[N-2] = 1<<ALF_NUM_BIT_SHIFT;
1051	}
1052
1053	Void TEncAdaptiveLoopFilter::xCalcRDCost(ALFParam* pAlfParam, UInt64& ruiRate, UInt64 uiDist, Double& rdCost)
1054	{
1055	if(pAlfParam != NULL)
1056	{
1057	Int* piTmpCoef;
1058	piTmpCoef = new Int[ALF_MAX_NUM_COEF];
1059
1060	memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff);
1061
1062	predictALFCoeff(pAlfParam);
1063
1064	m_pcEntropyCoder->resetEntropy();
1065	m_pcEntropyCoder->resetBits();
1066	m_pcEntropyCoder->encodeAlfParam(pAlfParam);
1067
1068	if(pAlfParam->cu_control_flag)
1069	{
1070	#if TSB_ALF_HEADER
1071	m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
1072	#else
1073	xEncodeCUAlfCtrlFlags();
1074	#endif
1075	}
1076	ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
1077	memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff);
1078	delete[] piTmpCoef;
1079	piTmpCoef = NULL;
1080	}
1081	else
1082	{
1083	ruiRate = 1;
1084	}
1085
1086	rdCost = (Double)(ruiRate) * m_dLambdaLuma + (Double)(uiDist);
1087	}
1088
1089	Void TEncAdaptiveLoopFilter::xCalcRDCost(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
1090	{
1091	if(pAlfParam != NULL)
1092	{
1093	Int* piTmpCoef;
1094	piTmpCoef = new Int[ALF_MAX_NUM_COEF];
1095
1096	memcpy(piTmpCoef, pAlfParam->coeff, sizeof(Int)*pAlfParam->num_coeff);
1097
1098	predictALFCoeff(pAlfParam);
1099
1100	m_pcEntropyCoder->resetEntropy();
1101	m_pcEntropyCoder->resetBits();
1102	m_pcEntropyCoder->encodeAlfParam(pAlfParam);
1103
1104	if(pAlfParam->cu_control_flag)
1105	{
1106	#if TSB_ALF_HEADER
1107	m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
1108	#else
1109	xEncodeCUAlfCtrlFlags();
1110	#endif
1111	}
1112	ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
1113	memcpy(pAlfParam->coeff, piTmpCoef, sizeof(int)*pAlfParam->num_coeff);
1114	delete[] piTmpCoef;
1115	piTmpCoef = NULL;
1116	}
1117	else
1118	{
1119	ruiRate = 1;
1120	}
1121
1122	ruiDist = xCalcSSD(pcPicOrg->getLumaAddr(), pcPicCmp->getLumaAddr(), pcPicOrg->getWidth(), pcPicOrg->getHeight(), pcPicOrg->getStride());
1123	rdCost = (Double)(ruiRate) * m_dLambdaLuma + (Double)(ruiDist);
1124	}
1125
1126	Void TEncAdaptiveLoopFilter::xCalcRDCostChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicCmp, ALFParam* pAlfParam, UInt64& ruiRate, UInt64& ruiDist, Double& rdCost)
1127	{
1128	if(pAlfParam->chroma_idc)
1129	{
1130	Int* piTmpCoef;
1131	piTmpCoef = new Int[ALF_MAX_NUM_COEF_C];
1132
1133	memcpy(piTmpCoef, pAlfParam->coeff_chroma, sizeof(Int)*pAlfParam->num_coeff_chroma);
1134
1135	predictALFCoeffChroma(pAlfParam);
1136
1137	m_pcEntropyCoder->resetEntropy();
1138	m_pcEntropyCoder->resetBits();
1139	m_pcEntropyCoder->encodeAlfParam(pAlfParam);
1140
1141	if(pAlfParam->cu_control_flag)
1142	{
1143	#if TSB_ALF_HEADER
1144	m_pcEntropyCoder->encodeAlfCtrlParam(pAlfParam);
1145	#else
1146	xEncodeCUAlfCtrlFlags();
1147	#endif
1148	}
1149	ruiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
1150	memcpy(pAlfParam->coeff_chroma, piTmpCoef, sizeof(int)*pAlfParam->num_coeff_chroma);
1151	delete[] piTmpCoef;
1152	piTmpCoef = NULL;
1153	}
1154	ruiDist = 0;
1155	ruiDist += xCalcSSD(pcPicOrg->getCbAddr(), pcPicCmp->getCbAddr(), (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride());
1156	ruiDist += xCalcSSD(pcPicOrg->getCrAddr(), pcPicCmp->getCrAddr(), (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride());
1157	rdCost = (Double)(ruiRate) * m_dLambdaChroma + (Double)(ruiDist);
1158	}
1159
1160	Void TEncAdaptiveLoopFilter::xFilteringFrameChroma(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
1161	{
1162	Int i, tap, N, err_code;
1163	Int* qh;
1164
1165	tap = m_pcTempAlfParam->tap_chroma;
1166	N = m_pcTempAlfParam->num_coeff_chroma;
1167	qh = m_pcTempAlfParam->coeff_chroma;
1168
1169	// initialize correlation
1170	for(i=0; i<N; i++)
1171	memset(m_ppdAlfCorr[i], 0, sizeof(Double)*(N+1));
1172
1173	if ((m_pcTempAlfParam->chroma_idc>>1)&0x01)
1174	{
1175	Pel* pOrg = pcPicOrg->getCbAddr();
1176	Pel* pCmp = pcPicDec->getCbAddr();
1177	#if MTK_NONCROSS_INLOOP_FILTER
1178	if(!m_bUseNonCrossALF)
1179	xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
1180	else
1181	xCalcCorrelationFuncforChromaSlices(ALF_Cb, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride());
1182	#else
1183	xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride());
1184	#endif
1185	}
1186	if ((m_pcTempAlfParam->chroma_idc)&0x01)
1187	{
1188	Pel* pOrg = pcPicOrg->getCrAddr();
1189	Pel* pCmp = pcPicDec->getCrAddr();
1190	#if MTK_NONCROSS_INLOOP_FILTER
1191	if(!m_bUseNonCrossALF)
1192	xCalcCorrelationFunc(0, 0, pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride(), true);
1193	else
1194	xCalcCorrelationFuncforChromaSlices(ALF_Cr, pOrg, pCmp, tap, pcPicOrg->getCStride(), pcPicDec->getCStride());
1195	#else
1196	xCalcCorrelationFunc(pOrg, pCmp, tap, (pcPicOrg->getWidth()>>1), (pcPicOrg->getHeight()>>1), pcPicOrg->getCStride(), pcPicDec->getCStride());
1197	#endif
1198	}
1199
1200	err_code = xGauss(m_ppdAlfCorr, N);
1201
1202	if(err_code)
1203	{
1204	xClearFilterCoefInt(qh, N);
1205	}
1206	else
1207	{
1208	for(i=0; i<N; i++)
1209	m_pdDoubleAlfCoeff[i] = m_ppdAlfCorr[i][N];
1210
1211	xQuantFilterCoef(m_pdDoubleAlfCoeff, qh, tap, g_uiBitDepth + g_uiBitIncrement);
1212	}
1213
1214
1215	if ((m_pcTempAlfParam->chroma_idc>>1)&0x01)
1216	{
1217	#if MTK_NONCROSS_INLOOP_FILTER
1218	if(! m_bUseNonCrossALF)
1219	xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 0);
1220	else
1221	xFrameChromaforSlices(ALF_Cb, pcPicDec, pcPicRest, qh, tap);
1222	#else
1223	xFrameChroma(pcPicDec, pcPicRest, qh, tap, 0);
1224	#endif
1225	}
1226	if ((m_pcTempAlfParam->chroma_idc)&0x01)
1227	{
1228	#if MTK_NONCROSS_INLOOP_FILTER
1229	if(! m_bUseNonCrossALF)
1230	xFrameChroma(0, 0, (pcPicRest->getHeight() >> 1), (pcPicRest->getWidth() >>1), pcPicDec, pcPicRest, qh, tap, 1);
1231	else
1232	xFrameChromaforSlices(ALF_Cr, pcPicDec, pcPicRest, qh, tap);
1233	#else
1234	xFrameChroma(pcPicDec, pcPicRest, qh, tap, 1);
1235	#endif
1236	}
1237
1238	if(m_pcTempAlfParam->chroma_idc<3)
1239	{
1240	if(m_pcTempAlfParam->chroma_idc==1)
1241	{
1242	pcPicDec->copyToPicCb(pcPicRest);
1243	}
1244	if(m_pcTempAlfParam->chroma_idc==2)
1245	{
1246	pcPicDec->copyToPicCr(pcPicRest);
1247	}
1248	}
1249
1250	}
1251
1252	Void TEncAdaptiveLoopFilter::xCopyDecToRestCUs(TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
1253	{
1254	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
1255	{
1256	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
1257	xCopyDecToRestCU(pcCU, 0, 0, pcPicDec, pcPicRest);
1258	}
1259	}
1260
1261	Void TEncAdaptiveLoopFilter::xCopyDecToRestCU(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest)
1262	{
1263	Bool bBoundary = false;
1264	UInt uiLPelX = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
1265	UInt uiRPelX = uiLPelX + (g_uiMaxCUWidth>>uiDepth) - 1;
1266	UInt uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
1267	UInt uiBPelY = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
1268
1269	if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) \|\| ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
1270	{
1271	bBoundary = true;
1272	}
1273
1274	if( ( ( uiDepth < pcCU->getDepth( uiAbsPartIdx ) ) && ( uiDepth < (g_uiMaxCUDepth-g_uiAddCUDepth) ) ) \|\| bBoundary )
1275	{
1276	UInt uiQNumParts = ( m_pcPic->getNumPartInCU() >> (uiDepth<<1) )>>2;
1277	for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++, uiAbsPartIdx+=uiQNumParts )
1278	{
1279	uiLPelX = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
1280	uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
1281
1282	if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
1283	xCopyDecToRestCU(pcCU, uiAbsPartIdx, uiDepth+1, pcPicDec, pcPicRest);
1284	}
1285	return;
1286	}
1287
1288	if (!pcCU->getAlfCtrlFlag(uiAbsPartIdx))
1289	{
1290	UInt uiCUAddr = pcCU->getAddr();
1291
1292	Int iWidth = pcCU->getWidth(uiAbsPartIdx);
1293	Int iHeight = pcCU->getHeight(uiAbsPartIdx);
1294
1295	Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx);
1296	Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx);
1297
1298	Int iRecStride = pcPicDec->getStride();
1299	Int iFiltStride = pcPicRest->getStride();
1300
1301	for (Int y = 0; y < iHeight; y++)
1302	{
1303	for (Int x = 0; x < iWidth; x++)
1304	{
1305	pFilt[x] = pRec[x];
1306	}
1307	pRec += iRecStride;
1308	pFilt += iFiltStride;
1309	}
1310	}
1311	}
1312
1313	Void TEncAdaptiveLoopFilter::xcollectStatCodeFilterCoeffForce0(int **pDiffQFilterCoeffIntPP, int fl, int sqrFiltLength,
1314	int filters_per_group, int bitsVarBin[])
1315	{
1316	int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal,
1317	*pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
1318	minKStart, minBitsKStart, bitsKStart;
1319
1320	pDepthInt=pDepthIntTab[fl-2];
1321
1322	maxScanVal=0;
1323	for (i=0; i<sqrFiltLength; i++)
1324	{
1325	maxScanVal=max(maxScanVal, pDepthInt[i]);
1326	}
1327
1328	// vlc for all
1329	memset(bitsCoeffScan, 0, MAX_SCAN_VAL * MAX_EXP_GOLOMB * sizeof(int));
1330	for(ind=0; ind<filters_per_group; ++ind)
1331	{
1332	for(i = 0; i < sqrFiltLength; i++)
1333	{
1334	scanPos=pDepthInt[i]-1;
1335	coeffVal=abs(pDiffQFilterCoeffIntPP[ind][i]);
1336	for (k=1; k<15; k++)
1337	{
1338	bitsCoeffScan[scanPos][k] += lengthGolomb(coeffVal, k);
1339	}
1340	}
1341	}
1342
1343	minBitsKStart=0;
1344	minKStart = -1;
1345	for (k=1; k<8; k++)
1346	{
1347	bitsKStart=0; kStart=k;
1348	for (scanPos=0; scanPos<maxScanVal; scanPos++)
1349	{
1350	kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin];
1351
1352	if (bitsCoeffScan[scanPos][kStart+1]<minBits)
1353	{
1354	kMin=kStart+1; minBits=bitsCoeffScan[scanPos][kMin];
1355	}
1356	kStart=kMin;
1357	bitsKStart+=minBits;
1358	}
1359	if (bitsKStart<minBitsKStart \|\| k==1)
1360	{
1361	minBitsKStart=bitsKStart;
1362	minKStart=k;
1363	}
1364	}
1365
1366	kStart = minKStart;
1367	for (scanPos=0; scanPos<maxScanVal; scanPos++)
1368	{
1369	kMin=kStart; minBits=bitsCoeffScan[scanPos][kMin];
1370
1371	if (bitsCoeffScan[scanPos][kStart+1]<minBits)
1372	{
1373	kMin = kStart+1;
1374	minBits = bitsCoeffScan[scanPos][kMin];
1375	}
1376
1377	kMinTab[scanPos] = kMin;
1378	kStart = kMin;
1379	}
1380
1381	for(ind=0; ind<filters_per_group; ++ind)
1382	{
1383	bitsVarBin[ind]=0;
1384	for(i = 0; i < sqrFiltLength; i++)
1385	{
1386	scanPos=pDepthInt[i]-1;
1387	bitsVarBin[ind] += lengthGolomb(abs(pDiffQFilterCoeffIntPP[ind][i]), kMinTab[scanPos]);
1388	}
1389	}
1390	}
1391
1392	Void TEncAdaptiveLoopFilter::xdecideCoeffForce0(int codedVarBins[NO_VAR_BINS], double errorForce0Coeff[], double errorForce0CoeffTab[NO_VAR_BINS][2], int bitsVarBin[NO_VAR_BINS], double lambda, int filters_per_fr)
1393	{
1394	int filtNo;
1395	double lagrangianDiff;
1396	int ind;
1397
1398	errorForce0Coeff[0]=errorForce0Coeff[1]=0;
1399	for (ind=0; ind<16; ind++) codedVarBins[ind]=0;
1400
1401	for(filtNo=0; filtNo<filters_per_fr; filtNo++)
1402	{
1403	// No coeffcient prediction bits used
1404	#if ENABLE_FORCECOEFF0
1405	lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]);
1406	codedVarBins[filtNo]=(lagrangianDiff>0)? 1 : 0;
1407	errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]];
1408	errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1];
1409	#else
1410	lagrangianDiff=errorForce0CoeffTab[filtNo][0]-(errorForce0CoeffTab[filtNo][1]+lambda*bitsVarBin[filtNo]);
1411	codedVarBins[filtNo]= 1;
1412	errorForce0Coeff[0]+=errorForce0CoeffTab[filtNo][codedVarBins[filtNo]];
1413	errorForce0Coeff[1]+=errorForce0CoeffTab[filtNo][1];
1414	#endif
1415	}
1416	}
1417
1418	double TEncAdaptiveLoopFilter::xfindBestCoeffCodMethod(int codedVarBins[NO_VAR_BINS], int *forceCoeff0,
1419	int **filterCoeffSymQuant, int fl, int sqrFiltLength,
1420	int filters_per_fr, double errorForce0CoeffTab[NO_VAR_BINS][2],
1421	double *errorQuant, double lambda)
1422
1423	{
1424	int bitsVarBin[NO_VAR_BINS], createBistream, coeffBits, coeffBitsForce0;
1425	double errorForce0Coeff[2], lagrangianForce0, lagrangian;
1426
1427	xcollectStatCodeFilterCoeffForce0(filterCoeffSymQuant, fl, sqrFiltLength,
1428	filters_per_fr, bitsVarBin);
1429
1430	xdecideCoeffForce0(codedVarBins, errorForce0Coeff, errorForce0CoeffTab, bitsVarBin, lambda, filters_per_fr);
1431
1432	coeffBitsForce0 = xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
1433	filters_per_fr, codedVarBins, createBistream=0, m_tempALFp);
1434
1435	coeffBits = xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength, filters_per_fr,
1436	createBistream=0, m_tempALFp);
1437
1438	lagrangianForce0=errorForce0Coeff[0]+lambda*coeffBitsForce0;
1439	lagrangian=errorForce0Coeff[1]+lambda*coeffBits;
1440	if (lagrangianForce0<lagrangian)
1441	{
1442	*errorQuant=errorForce0Coeff[0];
1443	*forceCoeff0=1;
1444	return(lagrangianForce0);
1445	}
1446	else
1447	{
1448	*errorQuant=errorForce0Coeff[1];
1449	*forceCoeff0=0;
1450	return(lagrangian);
1451	}
1452	}
1453
1454	Int TEncAdaptiveLoopFilter::xsendAllFiltersPPPred(int **FilterCoeffQuant, int fl, int sqrFiltLength,
1455	int filters_per_group, int createBistream, ALFParam* ALFp)
1456	{
1457	int ind, bit_ct = 0, bit_ct0 = 0, i;
1458	int predMethod = 0;
1459	int force0 = 0;
1460	Int64 Newbit_ct;
1461
1462	bit_ct0 = xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0);
1463
1464	for(ind = 0; ind < filters_per_group; ++ind)
1465	{
1466	if(ind == 0)
1467	{
1468	for(i = 0; i < sqrFiltLength; i++)
1469	m_diffFilterCoeffQuant[ind][i] = FilterCoeffQuant[ind][i];
1470	}
1471	else
1472	{
1473	for(i = 0; i < sqrFiltLength; i++)
1474	m_diffFilterCoeffQuant[ind][i] = FilterCoeffQuant[ind][i] - FilterCoeffQuant[ind-1][i];
1475	}
1476	}
1477
1478	if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, 0) >= bit_ct0)
1479	{
1480	predMethod = 0;
1481	if(filters_per_group > 1)
1482	bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream);
1483	bit_ct += xcodeFilterCoeff(FilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream);
1484	}
1485	else
1486	{
1487	predMethod = 1;
1488	if(filters_per_group > 1)
1489	bit_ct += lengthPredFlags(force0, predMethod, NULL, 0, createBistream);
1490	bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group, createBistream);
1491	}
1492
1493	ALFp->forceCoeff0 = 0;
1494	ALFp->filters_per_group_diff = filters_per_group;
1495	ALFp->filters_per_group = filters_per_group;
1496	ALFp->predMethod = predMethod;
1497	ALFp->num_coeff = sqrFiltLength;
1498	if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM)
1499	ALFp->realfiltNo=2;
1500	else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM)
1501	ALFp->realfiltNo=1;
1502	else
1503	ALFp->realfiltNo=0;
1504
1505	for(ind = 0; ind < filters_per_group; ++ind)
1506	{
1507	for(i = 0; i < sqrFiltLength; i++)
1508	{
1509	if (predMethod) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i];
1510	else ALFp->coeffmulti[ind][i] = FilterCoeffQuant[ind][i];
1511	}
1512	}
1513	m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
1514
1515
1516	// return(bit_ct);
1517	return ((Int)Newbit_ct);
1518	}
1519
1520
1521	Int TEncAdaptiveLoopFilter::xsendAllFiltersPPPredForce0(int **FilterCoeffQuant, int fl, int sqrFiltLength, int filters_per_group,
1522	int codedVarBins[NO_VAR_BINS], int createBistream, ALFParam* ALFp)
1523	{
1524	int ind, bit_ct=0, bit_ct0, i, j;
1525	int filters_per_group_temp, filters_per_group_diff;
1526	int chosenPred = 0;
1527	int force0 = 1;
1528	Int64 Newbit_ct;
1529
1530	i = 0;
1531	for(ind = 0; ind < filters_per_group; ind++)
1532	{
1533	if(codedVarBins[ind] == 1)
1534	{
1535	for(j = 0; j < sqrFiltLength; j++)
1536	m_FilterCoeffQuantTemp[i][j]=FilterCoeffQuant[ind][j];
1537	i++;
1538	}
1539	}
1540	filters_per_group_diff = filters_per_group_temp = i;
1541
1542	for(ind = 0; ind < filters_per_group; ++ind)
1543	{
1544	if(ind == 0)
1545	{
1546	for(i = 0; i < sqrFiltLength; i++)
1547	m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i];
1548	}
1549	else
1550	{
1551	for(i = 0; i < sqrFiltLength; i++)
1552	m_diffFilterCoeffQuant[ind][i] = m_FilterCoeffQuantTemp[ind][i] - m_FilterCoeffQuantTemp[ind-1][i];
1553	}
1554	}
1555
1556	if(!((filters_per_group_temp == 0) && (filters_per_group == 1)))
1557	{
1558	bit_ct0 = xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, 0);
1559
1560	if(xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_diff, 0) >= bit_ct0)
1561	{
1562	chosenPred = 0;
1563	bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream);
1564	bit_ct += xcodeFilterCoeff(m_FilterCoeffQuantTemp, fl, sqrFiltLength, filters_per_group_temp, createBistream);
1565	}
1566	else
1567	{
1568	chosenPred = 1;
1569	bit_ct += lengthPredFlags(force0, chosenPred, codedVarBins, filters_per_group, createBistream);
1570	bit_ct += xcodeFilterCoeff(m_diffFilterCoeffQuant, fl, sqrFiltLength, filters_per_group_temp, createBistream);
1571	}
1572	}
1573	ALFp->forceCoeff0 = 1;
1574	ALFp->predMethod = chosenPred;
1575	ALFp->filters_per_group_diff = filters_per_group_diff;
1576	ALFp->filters_per_group = filters_per_group;
1577	ALFp->num_coeff = sqrFiltLength;
1578	if (ALFp->num_coeff == SQR_FILT_LENGTH_5SYM)
1579	ALFp->realfiltNo=2;
1580	else if (ALFp->num_coeff == SQR_FILT_LENGTH_7SYM)
1581	ALFp->realfiltNo=1;
1582	else
1583	ALFp->realfiltNo=0;
1584
1585	for(ind = 0; ind < filters_per_group; ++ind)
1586	{
1587	ALFp->codedVarBins[ind] = codedVarBins[ind];
1588	}
1589	for(ind = 0; ind < filters_per_group_diff; ++ind)
1590	{
1591	for(i = 0; i < sqrFiltLength; i++)
1592	{
1593	if (chosenPred) ALFp->coeffmulti[ind][i] = m_diffFilterCoeffQuant[ind][i];
1594	else ALFp->coeffmulti[ind][i] = m_FilterCoeffQuantTemp[ind][i];
1595	}
1596	}
1597	m_pcDummyEntropyCoder->codeFiltCountBit(ALFp, &Newbit_ct);
1598
1599	return ((Int)Newbit_ct);
1600	}
1601
1602	//filtNo==-1/realfiltNo, noFilters=filters_per_frames, realfiltNo=filtNo
1603	Int TEncAdaptiveLoopFilter::xcodeAuxInfo(int filtNo, int noFilters, int varIndTab[NO_VAR_BINS], int frNo, int createBitstream,int realfiltNo, ALFParam* ALFp)
1604	{
1605	int i, filterPattern[NO_VAR_BINS], startSecondFilter=0, bitCt=0, codePrediction;
1606	Int64 NewbitCt;
1607
1608	codePrediction = 0;
1609
1610	//send realfiltNo (tap related)
1611	ALFp->realfiltNo = realfiltNo;
1612	ALFp->filtNo = filtNo;
1613
1614	if(filtNo >= 0)
1615	{
1616	// decide startSecondFilter and filterPattern
1617	if(noFilters > 1)
1618	{
1619	memset(filterPattern, 0, NO_VAR_BINS * sizeof(int));
1620	for(i = 1; i < NO_VAR_BINS; ++i)
1621	{
1622	if(varIndTab[i] != varIndTab[i-1])
1623	{
1624	filterPattern[i] = 1;
1625	startSecondFilter = i;
1626	}
1627	}
1628	memcpy (ALFp->filterPattern, filterPattern, NO_VAR_BINS * sizeof(int));
1629	ALFp->startSecondFilter = startSecondFilter;
1630	}
1631
1632	//send noFilters (filters_per_frame)
1633	//0: filters_per_frame = 1
1634	//1: filters_per_frame = 2
1635	//2: filters_per_frame > 2 (exact number from filterPattern)
1636
1637	ALFp->noFilters = min(noFilters-1,2);
1638	if (noFilters<=0) printf("error\n");
1639	}
1640	m_pcDummyEntropyCoder->codeAuxCountBit(ALFp, &NewbitCt);
1641	bitCt = (int) NewbitCt;
1642	return(bitCt);
1643	}
1644
1645	Int TEncAdaptiveLoopFilter::xcodeFilterCoeff(int **pDiffQFilterCoeffIntPP, int fl, int sqrFiltLength,
1646	int filters_per_group, int createBitstream)
1647	{
1648	int i, k, kMin, kStart, minBits, ind, scanPos, maxScanVal, coeffVal, len = 0,
1649	*pDepthInt=NULL, kMinTab[MAX_SQR_FILT_LENGTH], bitsCoeffScan[MAX_SCAN_VAL][MAX_EXP_GOLOMB],
1650	minKStart, minBitsKStart, bitsKStart;
1651
1652	pDepthInt = pDepthIntTab[fl-2];
1653
1654	maxScanVal = 0;
1655	for(i = 0; i < sqrFiltLength; i++)
1656	{
1657	maxScanVal = max(maxScanVal, pDepthInt[i]);
1658	}
1659
1660	// vlc for all
1661	memset(bitsCoeffScan, 0, MAX_SCAN_VAL * MAX_EXP_GOLOMB * sizeof(int));
1662	for(ind=0; ind<filters_per_group; ++ind)
1663	{
1664	for(i = 0; i < sqrFiltLength; i++)
1665	{
1666	scanPos=pDepthInt[i]-1;
1667	coeffVal=abs(pDiffQFilterCoeffIntPP[ind][i]);
1668	for (k=1; k<15; k++)
1669	{
1670	bitsCoeffScan[scanPos][k]+=lengthGolomb(coeffVal, k);
1671	}
1672	}
1673	}
1674
1675	minBitsKStart = 0;
1676	minKStart = -1;
1677	for(k = 1; k < 8; k++)
1678	{
1679	bitsKStart = 0;
1680	kStart = k;
1681	for(scanPos = 0; scanPos < maxScanVal; scanPos++)
1682	{
1683	kMin = kStart;
1684	minBits = bitsCoeffScan[scanPos][kMin];
1685
1686	if(bitsCoeffScan[scanPos][kStart+1] < minBits)
1687	{
1688	kMin = kStart + 1;
1689	minBits = bitsCoeffScan[scanPos][kMin];
1690	}
1691	kStart = kMin;
1692	bitsKStart += minBits;
1693	}
1694	if((bitsKStart < minBitsKStart) \|\| (k == 1))
1695	{
1696	minBitsKStart = bitsKStart;
1697	minKStart = k;
1698	}
1699	}
1700
1701	kStart = minKStart;
1702	for(scanPos = 0; scanPos < maxScanVal; scanPos++)
1703	{
1704	kMin = kStart;
1705	minBits = bitsCoeffScan[scanPos][kMin];
1706
1707	if(bitsCoeffScan[scanPos][kStart+1] < minBits)
1708	{
1709	kMin = kStart + 1;
1710	minBits = bitsCoeffScan[scanPos][kMin];
1711	}
1712
1713	kMinTab[scanPos] = kMin;
1714	kStart = kMin;
1715	}
1716
1717	// Coding parameters
1718	// len += lengthFilterCodingParams(minKStart, maxScanVal, kMinTab, createBitstream);
1719	len += (3 + maxScanVal);
1720
1721	// Filter coefficients
1722	len += lengthFilterCoeffs(sqrFiltLength, filters_per_group, pDepthInt, pDiffQFilterCoeffIntPP,
1723	kMinTab, createBitstream);
1724
1725	return len;
1726	}
1727
1728	Int TEncAdaptiveLoopFilter::lengthGolomb(int coeffVal, int k)
1729	{
1730	int m = 2 << (k - 1);
1731	int q = coeffVal / m;
1732	if(coeffVal != 0)
1733	return(q + 2 + k);
1734	else
1735	return(q + 1 + k);
1736	}
1737
1738	Int TEncAdaptiveLoopFilter::lengthPredFlags(int force0, int predMethod, int codedVarBins[NO_VAR_BINS],
1739	int filters_per_group, int createBitstream)
1740	{
1741	int bit_cnt = 0;
1742
1743	if(force0)
1744	bit_cnt = 2 + filters_per_group;
1745	else
1746	bit_cnt = 2;
1747	return bit_cnt;
1748
1749	}
1750	//important
1751	Int TEncAdaptiveLoopFilter::lengthFilterCoeffs(int sqrFiltLength, int filters_per_group, int pDepthInt[],
1752	int **FilterCoeff, int kMinTab[], int createBitstream)
1753	{
1754	int ind, scanPos, i;
1755	int bit_cnt = 0;
1756
1757	for(ind = 0; ind < filters_per_group; ++ind)
1758	{
1759	for(i = 0; i < sqrFiltLength; i++)
1760	{
1761	scanPos = pDepthInt[i] - 1;
1762	bit_cnt += lengthGolomb(abs(FilterCoeff[ind][i]), kMinTab[scanPos]);
1763	}
1764	}
1765	return bit_cnt;
1766	}
1767
1768	Void TEncAdaptiveLoopFilter::xEncALFLuma_qc ( TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost )
1769	{
1770	//pcPicDec: extended decoded
1771	//pcPicRest: original decoded: filtered signal will be stored
1772
1773	UInt64 uiRate;
1774	UInt64 uiDist;
1775	Double dCost;
1776	#if !MQT_ALF_NPASS
1777	Int Height = pcPicOrg->getHeight();
1778	Int Width = pcPicOrg->getWidth();
1779	#endif
1780	Int LumaStride = pcPicOrg->getStride();
1781	imgpel* pOrg = (imgpel*) pcPicOrg->getLumaAddr();
1782	imgpel* pRest = (imgpel*) pcPicRest->getLumaAddr();
1783	imgpel* pDec = (imgpel*) pcPicDec->getLumaAddr();
1784
1785	Int tap = ALF_MIN_NUM_TAP;
1786	m_pcTempAlfParam->tap = tap;
1787	#if TI_ALF_MAX_VSIZE_7
1788	m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap);
1789	m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap);
1790	#else
1791	m_pcTempAlfParam->num_coeff = (Int)tap*tap/4 + 2;
1792	#endif
1793
1794	#if MQT_BA_RA
1795
1796	#if MQT_ALF_NPASS
1797
1798	static Bool bFirst = true;
1799	static Int* apiVarIndTabBest[NUM_ALF_CLASS_METHOD];
1800	static Int** appiBestCoeffSet[NUM_ALF_CLASS_METHOD];
1801
1802	static Double*** adBestySym;
1803	static Double**** adBestESym;
1804	static Double** adBestpixAcc;
1805
1806	if(bFirst)
1807	{
1808	if(m_iALFEncodePassReduction)
1809	{
1810	initMatrix4D_double(&adBestESym,NUM_ALF_CLASS_METHOD, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
1811	initMatrix3D_double(&adBestySym,NUM_ALF_CLASS_METHOD, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
1812	initMatrix_double (&adBestpixAcc,NUM_ALF_CLASS_METHOD, NO_VAR_BINS );
1813
1814	for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
1815	{
1816
1817	apiVarIndTabBest[i] = new Int[NO_VAR_BINS];
1818	appiBestCoeffSet[i] = new Int*[NO_VAR_BINS];
1819	for(Int j=0; j< NO_VAR_BINS; j++)
1820	{
1821	appiBestCoeffSet[i][j]= new Int[MAX_SQR_FILT_LENGTH];
1822	}
1823	}
1824	}
1825
1826	bFirst = false;
1827	}
1828
1829	Int ibestfiltNo[NUM_ALF_CLASS_METHOD];
1830	Int ibestfilters_per_fr[NUM_ALF_CLASS_METHOD];
1831	Int64 iDist;
1832	Int64 iMinMethodDist = MAX_INT;
1833	UInt64 uiMinMethodRate;
1834	Double dMinMethodCost = MAX_DOUBLE;
1835	#endif
1836	Int iBestClassMethod = ALF_RA;
1837	Double adExtraCostReduction[NUM_ALF_CLASS_METHOD];
1838	ALFParam cFrmAlfParam [NUM_ALF_CLASS_METHOD];
1839	ALFParam* pcAlfParam = NULL;
1840
1841	for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
1842	{
1843	pcAlfParam = &(cFrmAlfParam[i]);
1844	allocALFParam(pcAlfParam);
1845
1846	pcAlfParam->alf_flag = 1;
1847	pcAlfParam->chroma_idc = 0;
1848	pcAlfParam->cu_control_flag = 0;
1849	pcAlfParam->tap = tap;
1850	#if TI_ALF_MAX_VSIZE_7
1851	pcAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);
1852	pcAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);
1853	#else
1854	pcAlfParam->num_coeff = (Int)tap*tap/4 + 2;
1855	#endif
1856
1857	switch(i)
1858	{
1859	case ALF_RA:
1860	{
1861	adExtraCostReduction[i] = (double)(m_im_height * m_im_width) * m_dLambdaLuma * 2.0 / 4096.0;
1862	}
1863	break;
1864	default:
1865	{
1866	adExtraCostReduction[i] = 0.0;
1867	}
1868	break;
1869	}
1870
1871	}
1872
1873	for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
1874	{
1875	pcAlfParam = &(cFrmAlfParam[i]);
1876	m_varImg = m_varImgMethods[i];
1877	m_uiVarGenMethod = pcAlfParam->alf_pcr_region_flag = i;
1878	#if MQT_ALF_NPASS
1879	if(m_iALFEncodePassReduction)
1880	{
1881	m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[m_uiVarGenMethod];
1882	}
1883	setInitialMask(pcPicOrg, pcPicDec);
1884	#else
1885	for (Int i=0; i<Height; i++)
1886	{
1887	for (Int j=0; j<Width; j++)
1888	{
1889	m_maskImg[i][j] = 1;
1890	}
1891	}
1892	#if MTK_NONCROSS_INLOOP_FILTER
1893	if(!m_bUseNonCrossALF)
1894	calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
1895	else
1896	calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
1897	#else
1898	calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
1899	#endif
1900	#endif
1901
1902	#if MQT_ALF_NPASS
1903	if(m_iALFEncodePassReduction)
1904	{
1905	xFirstEstimateFilteringFrameLumaAllTap(pOrg, pDec, LumaStride,
1906	pcAlfParam, apiVarIndTabBest[i], appiBestCoeffSet[i],
1907	ibestfiltNo[i], ibestfilters_per_fr[i],
1908	adBestySym[i], adBestESym[i], adBestpixAcc[i],
1909	uiRate, iDist, dCost);
1910
1911	}
1912	else
1913	{
1914	#endif
1915	xFirstFilteringFrameLuma(pOrg, pDec, (imgpel*)m_pcPicYuvTmp->getLumaAddr(), pcAlfParam, pcAlfParam->tap, LumaStride);
1916	#if MQT_ALF_NPASS
1917	}
1918	#endif
1919
1920	#if MQT_ALF_NPASS
1921	if(!m_iALFEncodePassReduction)
1922	{
1923	#endif
1924	xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, pcAlfParam, uiRate, uiDist, dCost);
1925	#if MQT_ALF_NPASS
1926	iDist = (Int64)uiDist;
1927	}
1928	#endif
1929
1930	dCost -= adExtraCostReduction[i];
1931
1932	if(dCost < dMinMethodCost)
1933	{
1934	iBestClassMethod = i;
1935	dMinMethodCost = dCost;
1936	uiMinMethodRate= uiRate;
1937	iMinMethodDist = iDist;
1938	#if MQT_ALF_NPASS
1939	if(!m_iALFEncodePassReduction)
1940	{
1941	#endif
1942	m_pcPicYuvTmp->copyToPicLuma(pcPicRest);
1943	#if MQT_ALF_NPASS
1944	}
1945	#endif
1946
1947	}
1948
1949	}
1950
1951	dMinMethodCost += adExtraCostReduction[iBestClassMethod];
1952
1953
1954	m_varImg= m_varImgMethods[iBestClassMethod];
1955
1956	m_uiVarGenMethod = iBestClassMethod;
1957
1958	#if MQT_ALF_NPASS
1959	if(m_iALFEncodePassReduction)
1960	{
1961
1962	m_aiFilterCoeffSaved = m_aiFilterCoeffSavedMethods[iBestClassMethod];
1963
1964	setInitialMask(pcPicOrg, pcPicDec);
1965
1966	m_pcBestAlfParam->alf_flag = 1;
1967	m_pcBestAlfParam->cu_control_flag = 0;
1968	m_pcBestAlfParam->chroma_idc = 0;
1969	m_pcBestAlfParam->alf_pcr_region_flag = iBestClassMethod;
1970
1971	m_pcBestAlfParam->tap = cFrmAlfParam[iBestClassMethod].tap;
1972	#if TI_ALF_MAX_VSIZE_7
1973	m_pcBestAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(pcAlfParam->tap);
1974	m_pcBestAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(pcAlfParam->tap);
1975	#else
1976	m_pcBestAlfParam->num_coeff = (Int)tap*tap/4 + 2;
1977	#endif
1978
1979	xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcBestAlfParam, m_pcBestAlfParam->tap, LumaStride);
1980
1981
1982
1983	xCalcRDCost(pcPicOrg, pcPicRest, m_pcBestAlfParam, uiMinMethodRate, uiDist, dMinMethodCost);
1984	iMinMethodDist = (Int64)uiDist;
1985	}
1986	else
1987	{
1988	#endif
1989	copyALFParam(m_pcBestAlfParam, &cFrmAlfParam[iBestClassMethod]);
1990	#if MQT_ALF_NPASS
1991
1992	}
1993	#endif
1994
1995	ruiMinRate = uiMinMethodRate;
1996	ruiMinDist = (UInt64)iMinMethodDist;
1997	rdMinCost = dMinMethodCost;
1998
1999
2000	for(Int i=0; i< NUM_ALF_CLASS_METHOD; i++)
2001	{
2002	freeALFParam(&cFrmAlfParam[i]);
2003	}
2004
2005	#else
2006
2007	#if MQT_ALF_NPASS
2008	setInitialMask(pcPicOrg, pcPicDec);
2009	#else
2010	for (Int i=0; i<Height; i++)
2011	{
2012	for (Int j=0; j<Width; j++)
2013	{
2014	m_maskImg[i][j] = 1;
2015	}
2016	}
2017	#if MTK_NONCROSS_INLOOP_FILTER
2018	if(!m_bUseNonCrossALF)
2019	calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
2020	else
2021	calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
2022	#else
2023	calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
2024	#endif
2025	#endif
2026
2027	#if MQT_ALF_NPASS
2028	if(m_iALFEncodePassReduction)
2029	{
2030	xFirstFilteringFrameLumaAllTap(pOrg, pDec, pRest, LumaStride);
2031	}
2032	else
2033	#endif
2034	xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, m_pcTempAlfParam->tap, LumaStride);
2035
2036	xCalcRDCost(pcPicOrg, pcPicRest, m_pcTempAlfParam, uiRate, uiDist, dCost); // change this function final coding
2037
2038	if( dCost < rdMinCost)
2039	{
2040	ruiMinRate = uiRate;
2041	ruiMinDist = uiDist;
2042	rdMinCost = dCost;
2043	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
2044	}
2045
2046	#endif
2047	}
2048
2049	Void TEncAdaptiveLoopFilter::xFirstFilteringFrameLuma(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, ALFParam* ALFp, Int tap, Int Stride)
2050	{
2051	#if MTK_NONCROSS_INLOOP_FILTER
2052	if(!m_bUseNonCrossALF)
2053	xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, tap, Stride);
2054	else
2055	xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, tap, Stride);
2056	#else
2057	xstoreInBlockMatrix(ImgOrg, ImgDec, tap, Stride);
2058	#endif
2059
2060
2061	xFilteringFrameLuma_qc(ImgOrg, ImgDec, ImgRest, ALFp, tap, Stride);
2062	}
2063
2064
2065	#if MTK_NONCROSS_INLOOP_FILTER
2066	Void TEncAdaptiveLoopFilter::xstoreInBlockMatrix(Int ypos, Int xpos, Int iheight, Int iwidth, Bool bResetBlockMatrix, Bool bSymmCopyBlockMatrix, imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride)
2067	#else
2068	Void TEncAdaptiveLoopFilter::xstoreInBlockMatrix(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int Stride)
2069	#endif
2070	{
2071	#if MQT_BA_RA
2072	Int var_step_size_w = VAR_SIZE_W;
2073	Int var_step_size_h = VAR_SIZE_H;
2074	#endif
2075
2076	Int i,j,k,l,varInd,ii,jj;
2077	Int x, y;
2078	Int fl =tap/2;
2079	#if TI_ALF_MAX_VSIZE_7
2080	Int flV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl);
2081	Int sqrFiltLength = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(tap);
2082	#else
2083	Int sqrFiltLength=(((tap*tap)/4 + 1) + 1);
2084	#endif
2085	Int fl2=9/2; //extended size at each side of the frame
2086	Int ELocal[MAX_SQR_FILT_LENGTH];
2087	Int yLocal;
2088	Int *p_pattern;
2089	Int filtNo =2;
2090	double *E,yy;
2091	#if MTK_NONCROSS_INLOOP_FILTER
2092	static Int count_valid;
2093	#else
2094	Int count_valid=0;
2095	#endif
2096	if (tap==9)
2097	filtNo =0;
2098	else if (tap==7)
2099	filtNo =1;
2100
2101	p_pattern= m_patternTab[filtNo];
2102
2103	#if MTK_NONCROSS_INLOOP_FILTER
2104	if(bResetBlockMatrix)
2105	{
2106	count_valid = 0;
2107	#endif
2108	memset( m_pixAcc, 0,sizeof(double)*NO_VAR_BINS);
2109	for (varInd=0; varInd<NO_VAR_BINS; varInd++)
2110	{
2111	memset(m_yGlobalSym[filtNo][varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
2112	for (k=0; k<sqrFiltLength; k++)
2113	{
2114	memset(m_EGlobalSym[filtNo][varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
2115	}
2116	}
2117	for (i = fl2; i < m_im_height+fl2; i++)
2118	{
2119	for (j = fl2; j < m_im_width+fl2; j++)
2120	{
2121	if (m_maskImg[i-fl2][j-fl2] == 1)
2122	{
2123	count_valid++;
2124	}
2125	}
2126	}
2127	#if MTK_NONCROSS_INLOOP_FILTER
2128	}
2129	#endif
2130
2131	{
2132	#if MTK_NONCROSS_INLOOP_FILTER
2133	x = y = fl2; //cytsai: shall x, y be removed ?
2134
2135	for (i= ypos; i< ypos + iheight; i++)
2136	{
2137	for (j= xpos; j< xpos + iwidth; j++)
2138	{
2139	#else
2140	for (i=0,y=fl2; i<m_im_height; i++,y++)
2141	{
2142	for (j=0,x=fl2; j<m_im_width; j++,x++)
2143	{
2144	#endif
2145	#if MQT_ALF_NPASS
2146	Int condition = (m_maskImg[i][j] == 1);
2147	if (m_iDesignCurrentFilter)
2148	{
2149	condition = (m_maskImg[i][j] == 0 && count_valid > 0);
2150	}
2151	if(!condition)
2152	{
2153	#else
2154	if (m_maskImg[i][j] == 0 && count_valid > 0)
2155	{
2156
2157	}
2158	else
2159	{
2160	#endif
2161	#if MQT_BA_RA
2162	varInd = m_varImg[i/var_step_size_h][j/var_step_size_w];
2163	#else
2164	varInd=min(m_varImg[i][j], NO_VAR_BINS-1);
2165	#endif
2166	k=0;
2167	memset(ELocal, 0, sqrFiltLength*sizeof(int));
2168	#if TI_ALF_MAX_VSIZE_7
2169	for (ii = -flV; ii < 0; ii++)
2170	#else
2171	for (ii=-fl; ii<0; ii++)
2172	#endif
2173	{
2174	for (jj=-fl-ii; jj<=fl+ii; jj++)
2175	{
2176	ELocal[p_pattern[k++]]+=(ImgDec[(i+ii)Stride + (j+jj)]+ImgDec[(i-ii)Stride + (j-jj)]);
2177	}
2178	}
2179	for (jj=-fl; jj<0; jj++)
2180	ELocal[p_pattern[k++]]+=(ImgDec[(i)Stride + (j+jj)]+ImgDec[(i)Stride + (j-jj)]);
2181	ELocal[p_pattern[k++]]+=ImgDec[(i)*Stride + (j)];
2182	ELocal[sqrFiltLength-1]=1;
2183	yLocal=ImgOrg[(i)*Stride + (j)];
2184
2185	m_pixAcc[varInd]+=(yLocal*yLocal);
2186	E= m_EGlobalSym[filtNo][varInd];
2187	yy= m_yGlobalSym[filtNo][varInd];
2188
2189	for (k=0; k<sqrFiltLength; k++)
2190	{
2191	for (l=k; l<sqrFiltLength; l++)
2192	E[k][l]+=(double)(ELocal[k]*ELocal[l]);
2193	yy[k]+=(double)(ELocal[k]*yLocal);
2194	}
2195	}
2196	}
2197	}
2198	}
2199
2200	#if MTK_NONCROSS_INLOOP_FILTER
2201	if(bSymmCopyBlockMatrix)
2202	{
2203	#endif
2204
2205	// Matrix EGlobalSeq is symmetric, only part of it is calculated
2206	for (varInd=0; varInd<NO_VAR_BINS; varInd++)
2207	{
2208	double **pE = m_EGlobalSym[filtNo][varInd];
2209	for (k=1; k<sqrFiltLength; k++)
2210	{
2211	for (l=0; l<k; l++)
2212	{
2213	pE[k][l]=pE[l][k];
2214	}
2215	}
2216	}
2217	#if MTK_NONCROSS_INLOOP_FILTER
2218	}
2219	#endif
2220
2221	}
2222
2223	Void TEncAdaptiveLoopFilter::xFilteringFrameLuma_qc(imgpel* ImgOrg, imgpel* imgY_pad, imgpel* ImgFilt, ALFParam* ALFp, Int tap, Int Stride)
2224	{
2225	int filtNo,filters_per_fr;
2226	static double ySym, *ESym;
2227	int lambda_val = (Int) m_dLambdaLuma;
2228	lambda_val = lambda_val * (1<<(2*g_uiBitIncrement));
2229	if (tap==9)
2230	filtNo =0;
2231	else if (tap==7)
2232	filtNo =1;
2233	else
2234	filtNo=2;
2235
2236	ESym=m_EGlobalSym[filtNo];
2237	ySym=m_yGlobalSym[filtNo];
2238
2239	xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,
2240	m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
2241
2242	// g_filterCoeffPrevSelected = g_filterCoeffSym
2243	xcalcPredFilterCoeff(filtNo);
2244
2245	//filter the frame with g_filterCoeffPrevSelected
2246	#if MTK_NONCROSS_INLOOP_FILTER
2247	if(!m_bUseNonCrossALF)
2248	xfilterFrame_en(0, 0, m_im_height, m_im_width, imgY_pad, ImgFilt, filtNo, Stride);
2249	else
2250	xfilterSlices_en(imgY_pad, ImgFilt, filtNo, Stride);
2251	#else
2252	xfilterFrame_en(imgY_pad, ImgFilt, filtNo, Stride);
2253	#endif
2254
2255	xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,0, ALFp);
2256	}
2257
2258	#if MTK_NONCROSS_INLOOP_FILTER
2259	Void TEncAdaptiveLoopFilter::xfilterFrame_en(int ypos, int xpos, int iheight, int iwidth, imgpel* ImgDec, imgpel* ImgRest,int filtNo, int Stride)
2260	#else
2261	Void TEncAdaptiveLoopFilter::xfilterFrame_en(imgpel* ImgDec, imgpel* ImgRest,int filtNo, int Stride)
2262	#endif
2263	{
2264	#if MQT_BA_RA
2265	imgpel *imgY_rec = ImgDec;
2266	imgpel p_imgY_pad, p_imgY_pad0;
2267	int var_step_size_w = VAR_SIZE_W;
2268	int var_step_size_h = VAR_SIZE_H;
2269	int i,j,y,x;
2270	#else
2271	int i,j,ii,jj,y,x;
2272	#endif
2273	int *pattern;
2274	int fl, fl_temp, sqrFiltLength;
2275	int pixelInt;
2276	int offset = (1<<(NUM_BITS - 2));
2277
2278	pattern=m_patternTab_filt[filtNo];
2279	fl_temp=m_flTab[filtNo];
2280	#if !MQT_BA_RA
2281	#if TI_ALF_MAX_VSIZE_7
2282	Int fl_tempV = TComAdaptiveLoopFilter::ALFFlHToFlV(fl_temp);
2283	#endif
2284	#endif
2285	sqrFiltLength=MAX_SQR_FILT_LENGTH; fl=FILTER_LENGTH/2;
2286
2287	#if MTK_NONCROSS_INLOOP_FILTER
2288	for (y= ypos, i = fl+ ypos; i < ypos+ iheight+ fl; i++, y++)
2289	{
2290	for (x= xpos, j = fl+ xpos; j < xpos+ iwidth+ fl; j++, x++)
2291	{
2292	#else
2293	for (y=0, i = fl; i < m_im_height+fl; i++, y++)
2294	{
2295	for (x=0, j = fl; j < m_im_width+fl; j++, x++)
2296	{
2297	#endif
2298	#if MQT_BA_RA
2299	int varInd=m_varImg[y/var_step_size_h][x/var_step_size_w];
2300	#else
2301	int varInd=m_varImg[i-fl][j-fl];
2302	imgpel im1,im2;
2303	#endif
2304	int *coef = m_filterCoeffPrevSelected[varInd];
2305	pattern=m_patternTab_filt[filtNo];
2306	pixelInt= m_filterCoeffPrevSelected[varInd][sqrFiltLength-1];
2307
2308	#if MQT_BA_RA
2309	if (filtNo == 2) //5x5
2310	{
2311	pixelInt += coef[22]* (imgY_rec[(i-fl+2)Stride + j-fl]+imgY_rec[(i-fl-2)Stride + j-fl]);
2312
2313	pixelInt += coef[30]* (imgY_rec[(i-fl+1)Stride + j-fl+1]+imgY_rec[(i-fl-1)Stride + j-fl-1]);
2314	pixelInt += coef[31]* (imgY_rec[(i-fl+1)Stride + j-fl] +imgY_rec[(i-fl-1)Stride + j-fl]);
2315	pixelInt += coef[32]* (imgY_rec[(i-fl+1)Stride + j-fl-1]+imgY_rec[(i-fl-1)Stride + j-fl+1]);
2316
2317	pixelInt += coef[38]* (imgY_rec[(i-fl)Stride + j-fl-2]+imgY_rec[(i-fl)Stride + j-fl+2]);
2318	pixelInt += coef[39]* (imgY_rec[(i-fl)Stride + j-fl-1]+imgY_rec[(i-fl)Stride + j-fl+1]);
2319	pixelInt += coef[40]* (imgY_rec[(i-fl)*Stride + j-fl]);
2320	}
2321	else if (filtNo == 1) //7x7
2322	{
2323	pixelInt += coef[13]* (imgY_rec[(i-fl+3)Stride + j-fl]+imgY_rec[(i-fl-3)Stride + j-fl]);
2324
2325	p_imgY_pad = imgY_rec + (i-fl+2)*Stride;
2326	p_imgY_pad0 = imgY_rec + (i-fl-2)*Stride;
2327	pixelInt += coef[21]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
2328	pixelInt += coef[22]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
2329	pixelInt += coef[23]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
2330
2331	p_imgY_pad = imgY_rec + (i-fl+1)*Stride;
2332	p_imgY_pad0 = imgY_rec + (i-fl-1)*Stride;
2333	pixelInt += coef[29]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]);
2334	pixelInt += coef[30]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
2335	pixelInt += coef[31]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
2336	pixelInt += coef[32]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
2337	pixelInt += coef[33]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]);
2338
2339	p_imgY_pad = imgY_rec + (i-fl)*Stride;
2340	pixelInt += coef[37]* (p_imgY_pad[j-fl+3]+p_imgY_pad[j-fl-3]);
2341	pixelInt += coef[38]* (p_imgY_pad[j-fl+2]+p_imgY_pad[j-fl-2]);
2342	pixelInt += coef[39]* (p_imgY_pad[j-fl+1]+p_imgY_pad[j-fl-1]);
2343	pixelInt += coef[40]* (p_imgY_pad[j-fl]);
2344
2345	}
2346	else
2347	{
2348	#if !TI_ALF_MAX_VSIZE_7
2349	pixelInt += coef[4]* (imgY_rec[(i-fl+4)Stride + j-fl]+imgY_rec[(i-fl-4)Stride + j-fl]);
2350	#endif
2351	p_imgY_pad = imgY_rec + (i-fl+3)*Stride;
2352	p_imgY_pad0 = imgY_rec + (i-fl-3)*Stride;
2353	pixelInt += coef[12]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
2354	pixelInt += coef[13]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
2355	pixelInt += coef[14]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
2356
2357	p_imgY_pad = imgY_rec + (i-fl+2)*Stride;
2358	p_imgY_pad0 = imgY_rec + (i-fl-2)*Stride;
2359	pixelInt += coef[20]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]);
2360	pixelInt += coef[21]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
2361	pixelInt += coef[22]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
2362	pixelInt += coef[23]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
2363	pixelInt += coef[24]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]);
2364
2365	p_imgY_pad = imgY_rec + (i-fl+1)*Stride;
2366	p_imgY_pad0 = imgY_rec + (i-fl-1)*Stride;
2367	pixelInt += coef[28]* (p_imgY_pad[j-fl+3]+p_imgY_pad0[j-fl-3]);
2368	pixelInt += coef[29]* (p_imgY_pad[j-fl+2]+p_imgY_pad0[j-fl-2]);
2369	pixelInt += coef[30]* (p_imgY_pad[j-fl+1]+p_imgY_pad0[j-fl-1]);
2370	pixelInt += coef[31]* (p_imgY_pad[j-fl]+p_imgY_pad0[j-fl]);
2371	pixelInt += coef[32]* (p_imgY_pad[j-fl-1]+p_imgY_pad0[j-fl+1]);
2372	pixelInt += coef[33]* (p_imgY_pad[j-fl-2]+p_imgY_pad0[j-fl+2]);
2373	pixelInt += coef[34]* (p_imgY_pad[j-fl-3]+p_imgY_pad0[j-fl+3]);
2374
2375	p_imgY_pad = imgY_rec + (i-fl)*Stride;
2376	pixelInt += coef[36]* (p_imgY_pad[j-fl+4]+p_imgY_pad[j-fl-4]);
2377	pixelInt += coef[37]* (p_imgY_pad[j-fl+3]+p_imgY_pad[j-fl-3]);
2378	pixelInt += coef[38]* (p_imgY_pad[j-fl+2]+p_imgY_pad[j-fl-2]);
2379	pixelInt += coef[39]* (p_imgY_pad[j-fl+1]+p_imgY_pad[j-fl-1]);
2380	pixelInt += coef[40]* (p_imgY_pad[j-fl]);
2381
2382	}
2383	#else
2384
2385	#if TI_ALF_MAX_VSIZE_7
2386	for (ii = -fl_tempV; ii < 0; ii++)
2387	#else
2388	for (ii=-fl_temp; ii<0; ii++)
2389	#endif
2390	{
2391	im1= &(ImgDec[(y+ii)*Stride + x-fl_temp-ii]);
2392	im2= &(ImgDec[(y-ii)*Stride + x+fl_temp+ii]);
2393	for (jj=-fl_temp-ii; jj<=fl_temp+ii; jj++,im1++,im2--)
2394	pixelInt+=((im1+ im2)coef[(pattern++)]);
2395	}
2396	im1= &(ImgDec[y*Stride + x-fl_temp]);
2397	im2= &(ImgDec[y*Stride + x+fl_temp]);
2398	for (jj=-fl_temp; jj<0; jj++,im1++,im2--)
2399	pixelInt+=((im1+ im2)coef[(pattern++)]);
2400	pixelInt+=(ImgDec[yStride + x]coef[*(pattern++)]);
2401	#endif
2402
2403	pixelInt=(int)((pixelInt+offset) >> (NUM_BITS - 1));
2404	ImgRest[y*Stride + x] = Clip3(0, g_uiIBDI_MAX, pixelInt);
2405	}
2406	}
2407	}
2408
2409	Void TEncAdaptiveLoopFilter::xfindBestFilterVarPred(double ySym, double ESym, double pixAcc, int filterCoeffSym, int filterCoeffSymQuant, int filtNo, int filters_per_fr_best, int varIndTab[], imgpel imgY_rec, imgpel varImg, imgpel maskImg, imgpel *imgY_pad, double lambda_val)
2410	{
2411	int filters_per_fr, firstFilt, coded, forceCoeff0,
2412	interval[NO_VAR_BINS][2], intervalBest[NO_VAR_BINS][2];
2413	int i, k, varInd;
2414	static double *E_temp, y_temp, *pixAcc_temp;
2415	static int **FilterCoeffQuantTemp;
2416	double error, lambda, lagrangian, lagrangianMin;
2417
2418	int sqrFiltLength;
2419	int pattern, patternMap, *weights;
2420	int numBits, coeffBits;
2421	double errorForce0CoeffTab[NO_VAR_BINS][2];
2422	int codedVarBins[NO_VAR_BINS], createBistream /, forceCoeff0 /;
2423	int usePrevFilt[NO_VAR_BINS], usePrevFiltDefault[NO_VAR_BINS];
2424	static int first=0;
2425
2426	for (i = 0; i < NO_VAR_BINS; i++)
2427	usePrevFiltDefault[i]=usePrevFilt[i]=1;
2428	lambda = lambda_val;
2429	sqrFiltLength=MAX_SQR_FILT_LENGTH;
2430
2431	if (first==0)
2432	{
2433	initMatrix3D_double(&E_temp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH, MAX_SQR_FILT_LENGTH);
2434	initMatrix_double(&y_temp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
2435	pixAcc_temp = (double *) calloc(NO_VAR_BINS, sizeof(double));
2436	initMatrix_int(&FilterCoeffQuantTemp, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
2437	first=1;
2438	}
2439
2440	sqrFiltLength=m_sqrFiltLengthTab[filtNo];
2441	Int fl = m_flTab[filtNo];
2442	weights=m_weightsTab[filtNo];
2443	patternMap=m_patternMapTab[filtNo];
2444	pattern=m_patternTab[filtNo];
2445
2446	memcpy(pixAcc_temp,pixAcc,sizeof(double)*NO_VAR_BINS);
2447	for (varInd=0; varInd<NO_VAR_BINS; varInd++)
2448	{
2449	memcpy(y_temp[varInd],ySym[varInd],sizeof(double)*sqrFiltLength);
2450	for (k=0; k<sqrFiltLength; k++)
2451	memcpy(E_temp[varInd][k],ESym[varInd][k],sizeof(double)*sqrFiltLength);
2452	}
2453
2454	// zero all variables
2455	memset(varIndTab,0,sizeof(int)*NO_VAR_BINS);
2456
2457	for(i = 0; i < NO_VAR_BINS; i++)
2458	{
2459	memset(filterCoeffSym[i],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
2460	memset(filterCoeffSymQuant[i],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
2461	}
2462
2463	firstFilt=1; lagrangianMin=0;
2464	filters_per_fr=NO_FILTERS;
2465
2466	while(filters_per_fr>=1)
2467	{
2468	findFilterGroupingError(E_temp, y_temp, pixAcc_temp, interval, sqrFiltLength, filters_per_fr);
2469	findFilterCoeff(E_temp, y_temp, pixAcc_temp, filterCoeffSym, filterCoeffSymQuant, interval,
2470	varIndTab, sqrFiltLength, filters_per_fr, weights, numBits=NUM_BITS, errorForce0CoeffTab);
2471	lagrangian=xfindBestCoeffCodMethod(codedVarBins, &forceCoeff0, filterCoeffSymQuant, fl,
2472	sqrFiltLength, filters_per_fr, errorForce0CoeffTab, &error, lambda);
2473
2474	if (lagrangian<lagrangianMin \|\| firstFilt==1)
2475	{
2476	firstFilt=0;
2477	lagrangianMin=lagrangian;
2478
2479	(*filters_per_fr_best)=filters_per_fr;
2480	memcpy(intervalBest, interval, NO_VAR_BINS2sizeof(int));
2481	}
2482	filters_per_fr--;
2483	}
2484
2485	findFilterCoeff(E_temp, y_temp, pixAcc_temp, filterCoeffSym, filterCoeffSymQuant, intervalBest,
2486	varIndTab, sqrFiltLength, (*filters_per_fr_best), weights, numBits=NUM_BITS, errorForce0CoeffTab);
2487
2488	xfindBestCoeffCodMethod(codedVarBins, &forceCoeff0, filterCoeffSymQuant, fl, sqrFiltLength,
2489	(*filters_per_fr_best), errorForce0CoeffTab, &error, lambda);
2490
2491	coded=1;
2492	if (forceCoeff0==1 && (*filters_per_fr_best)==1)
2493	{
2494	coded=0;
2495	coeffBits = xcodeAuxInfo(-1, (*filters_per_fr_best), varIndTab, 0, createBistream=0,filtNo, m_tempALFp);
2496	}
2497	else
2498	{
2499	coeffBits = xcodeAuxInfo(filtNo, (*filters_per_fr_best), varIndTab, 0, createBistream=0,filtNo, m_tempALFp);
2500	}
2501
2502	if (forceCoeff0==0)
2503	{
2504	coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength,
2505	(*filters_per_fr_best), createBistream=0, m_tempALFp);
2506	}
2507	else
2508	{
2509	if ((*filters_per_fr_best)==1)
2510	{
2511	for(varInd=0; varInd<(*filters_per_fr_best); varInd++)
2512	{
2513	memset(filterCoeffSym[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
2514	memset(filterCoeffSymQuant[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
2515	}
2516	}
2517	else
2518	{
2519	coeffBits += xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
2520	(*filters_per_fr_best), codedVarBins, createBistream=0, m_tempALFp);
2521
2522	for(varInd=0; varInd<(*filters_per_fr_best); varInd++)
2523	{
2524	if (codedVarBins[varInd]==0)
2525	{
2526	memset(filterCoeffSym[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
2527	memset(filterCoeffSymQuant[varInd],0,sizeof(int)*MAX_SQR_FILT_LENGTH);
2528	}
2529	}
2530	}
2531	}
2532	}
2533
2534
2535	Void TEncAdaptiveLoopFilter::xcalcPredFilterCoeff(int filtNo)
2536	{
2537	int *patternMap, varInd, i, k;
2538
2539	patternMap=m_patternMapTab[filtNo];
2540	for(varInd=0; varInd<NO_VAR_BINS; ++varInd)
2541	{
2542	k=0;
2543	for(i = 0; i < MAX_SQR_FILT_LENGTH; i++)
2544	{
2545	if (patternMap[i]>0)
2546	{
2547	m_filterCoeffPrevSelected[varInd][i]=m_filterCoeffSym[m_varIndTab[varInd]][k];
2548	k++;
2549	}
2550	else
2551	{
2552	m_filterCoeffPrevSelected[varInd][i]=0;
2553	}
2554	#if MQT_ALF_NPASS
2555	if (m_iALFEncodePassReduction && (!m_iUsePreviousFilter \|\| !m_iDesignCurrentFilter))
2556	{
2557	if((m_iCurrentPOC%m_iGOPSize) == 0)
2558	{
2559	m_aiFilterCoeffSaved[0][varInd][i] = m_aiFilterCoeffSaved[m_iGOPSize][varInd][i];
2560	m_aiFilterCoeffSaved[m_iGOPSize][varInd][i] = m_filterCoeffPrevSelected[varInd][i];
2561	}
2562	else
2563	{
2564	m_aiFilterCoeffSaved[m_iCurrentPOC%m_iGOPSize][varInd][i] = m_filterCoeffPrevSelected[varInd][i];
2565	}
2566	}
2567	#endif
2568	}
2569	}
2570	}
2571
2572	#if MQT_ALF_NPASS
2573	UInt TEncAdaptiveLoopFilter::xcodeFiltCoeff(int *filterCoeffSymQuant, int filtNo, int varIndTab[], int filters_per_fr_best, int frNo, ALFParam ALFp)
2574	#else
2575	Void TEncAdaptiveLoopFilter::xcodeFiltCoeff(int *filterCoeffSymQuant, int filtNo, int varIndTab[], int filters_per_fr_best, int frNo, ALFParam ALFp)
2576	#endif
2577	{
2578	int varInd, forceCoeff0, codedVarBins[NO_VAR_BINS], coeffBits, createBistream, sqrFiltLength=m_sqrFiltLengthTab[filtNo],
2579	fl=m_flTab[filtNo], coded;
2580
2581	ALFp->filters_per_group_diff = filters_per_fr_best;
2582	ALFp->filters_per_group = filters_per_fr_best;
2583
2584	for(varInd=0; varInd<filters_per_fr_best; varInd++)
2585	{
2586	codedVarBins[varInd] = 1;
2587	}
2588	memcpy (ALFp->codedVarBins, codedVarBins, sizeof(int)*NO_VAR_BINS);
2589	forceCoeff0=0;
2590	for(varInd=0; varInd<filters_per_fr_best; varInd++)
2591	{
2592	if (codedVarBins[varInd] == 0)
2593	{
2594	forceCoeff0=1;
2595	break;
2596	}
2597	}
2598
2599	coded=1;
2600	if (forceCoeff0==1 && filters_per_fr_best==1)
2601	{
2602	coded=0;
2603	coeffBits = xcodeAuxInfo(-1, filters_per_fr_best, varIndTab, frNo, createBistream=1,filtNo, ALFp);
2604	}
2605	else
2606	{
2607	coeffBits = xcodeAuxInfo(filtNo, filters_per_fr_best, varIndTab, frNo, createBistream=1,filtNo, ALFp);
2608	}
2609
2610	ALFp->forceCoeff0 = forceCoeff0;
2611	ALFp->predMethod = 0;
2612	ALFp->num_coeff = sqrFiltLength;
2613	ALFp->realfiltNo=filtNo;
2614	if (filters_per_fr_best <= 1)
2615	{
2616	ALFp->forceCoeff0 = 0;
2617	ALFp->predMethod = 0;
2618	}
2619
2620	if (forceCoeff0==0)
2621	{
2622	coeffBits += xsendAllFiltersPPPred(filterCoeffSymQuant, fl, sqrFiltLength,
2623	filters_per_fr_best, createBistream=1, ALFp);
2624	}
2625	else if (filters_per_fr_best>1)
2626	{
2627	coeffBits += xsendAllFiltersPPPredForce0(filterCoeffSymQuant, fl, sqrFiltLength,
2628	filters_per_fr_best, codedVarBins, createBistream=1, ALFp);
2629	}
2630
2631	#if MQT_ALF_NPASS
2632	return (UInt)coeffBits;
2633	#endif
2634	}
2635
2636
2637
2638	#if TSB_ALF_HEADER
2639	Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, ALFParam *pAlfParam)
2640	#else
2641	Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlags_qc(UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist)
2642	#endif
2643	{
2644	ruiDist = 0;
2645	#if TSB_ALF_HEADER
2646	pAlfParam->num_alf_cu_flag = 0;
2647	#endif
2648
2649	for( UInt uiCUAddr = 0; uiCUAddr < m_pcPic->getNumCUsInFrame() ; uiCUAddr++ )
2650	{
2651	TComDataCU* pcCU = m_pcPic->getCU( uiCUAddr );
2652	#if TSB_ALF_HEADER
2653	xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, pAlfParam);
2654	#else
2655	xSetCUAlfCtrlFlag_qc(pcCU, 0, 0, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist);
2656	#endif
2657	}
2658	}
2659
2660	#if TSB_ALF_HEADER
2661	Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist, ALFParam *pAlfParam)
2662	#else
2663	Void TEncAdaptiveLoopFilter::xSetCUAlfCtrlFlag_qc(TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth, UInt uiAlfCtrlDepth, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiDist)
2664	#endif
2665	{
2666	Bool bBoundary = false;
2667	UInt uiLPelX = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
2668	UInt uiRPelX = uiLPelX + (g_uiMaxCUWidth>>uiDepth) - 1;
2669	UInt uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
2670	UInt uiBPelY = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
2671
2672	if( ( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() ) \|\| ( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() ) )
2673	{
2674	bBoundary = true;
2675	}
2676
2677	if( ( ( uiDepth < pcCU->getDepth( uiAbsPartIdx ) ) && ( uiDepth < (g_uiMaxCUDepth-g_uiAddCUDepth) ) ) \|\| bBoundary )
2678	{
2679	UInt uiQNumParts = ( m_pcPic->getNumPartInCU() >> (uiDepth<<1) )>>2;
2680	for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++, uiAbsPartIdx+=uiQNumParts )
2681	{
2682	uiLPelX = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
2683	uiTPelY = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
2684
2685	if( ( uiLPelX < pcCU->getSlice()->getSPS()->getWidth() ) && ( uiTPelY < pcCU->getSlice()->getSPS()->getHeight() ) )
2686	#if TSB_ALF_HEADER
2687	xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist, pAlfParam);
2688	#else
2689	xSetCUAlfCtrlFlag_qc(pcCU, uiAbsPartIdx, uiDepth+1, uiAlfCtrlDepth, pcPicOrg, pcPicDec, pcPicRest, ruiDist);
2690	#endif
2691	}
2692	return;
2693	}
2694
2695	if( uiDepth > uiAlfCtrlDepth && !pcCU->isFirstAbsZorderIdxInDepth(uiAbsPartIdx, uiAlfCtrlDepth))
2696	{
2697	return;
2698	}
2699
2700	UInt uiCUAddr = pcCU->getAddr();
2701	UInt64 uiRecSSD = 0;
2702	UInt64 uiFiltSSD = 0;
2703
2704	Int iWidth;
2705	Int iHeight;
2706	UInt uiSetDepth;
2707
2708	if (uiDepth > uiAlfCtrlDepth && pcCU->isFirstAbsZorderIdxInDepth(uiAbsPartIdx, uiAlfCtrlDepth))
2709	{
2710	iWidth = g_uiMaxCUWidth >> uiAlfCtrlDepth;
2711	iHeight = g_uiMaxCUHeight >> uiAlfCtrlDepth;
2712
2713	uiRPelX = uiLPelX + iWidth - 1;
2714	uiBPelY = uiTPelY + iHeight - 1;
2715
2716	if( uiRPelX >= pcCU->getSlice()->getSPS()->getWidth() )
2717	{
2718	iWidth = pcCU->getSlice()->getSPS()->getWidth() - uiLPelX;
2719	}
2720
2721	if( uiBPelY >= pcCU->getSlice()->getSPS()->getHeight() )
2722	{
2723	iHeight = pcCU->getSlice()->getSPS()->getHeight() - uiTPelY;
2724	}
2725
2726	uiSetDepth = uiAlfCtrlDepth;
2727	}
2728	else
2729	{
2730	iWidth = pcCU->getWidth(uiAbsPartIdx);
2731	iHeight = pcCU->getHeight(uiAbsPartIdx);
2732	uiSetDepth = uiDepth;
2733	}
2734
2735	Pel* pOrg = pcPicOrg->getLumaAddr(uiCUAddr, uiAbsPartIdx);
2736	Pel* pRec = pcPicDec->getLumaAddr(uiCUAddr, uiAbsPartIdx);
2737	Pel* pFilt = pcPicRest->getLumaAddr(uiCUAddr, uiAbsPartIdx);
2738
2739	uiRecSSD += xCalcSSD( pOrg, pRec, iWidth, iHeight, pcPicOrg->getStride() );
2740	uiFiltSSD += xCalcSSD( pOrg, pFilt, iWidth, iHeight, pcPicOrg->getStride() );
2741
2742	if (uiFiltSSD < uiRecSSD)
2743	{
2744	ruiDist += uiFiltSSD;
2745	pcCU->setAlfCtrlFlagSubParts(1, uiAbsPartIdx, uiSetDepth);
2746	#if TSB_ALF_HEADER
2747	pAlfParam->alf_cu_flag[pAlfParam->num_alf_cu_flag]=1;
2748	#endif
2749	for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(pcPicOrg->getHeight()-1)) ;i++)
2750	{
2751	for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(pcPicOrg->getWidth()-1)) ;j++)
2752	{
2753	m_maskImg[i][j]=1;
2754	}
2755	}
2756	}
2757	else
2758	{
2759	ruiDist += uiRecSSD;
2760	pcCU->setAlfCtrlFlagSubParts(0, uiAbsPartIdx, uiSetDepth);
2761	#if TSB_ALF_HEADER
2762	pAlfParam->alf_cu_flag[pAlfParam->num_alf_cu_flag]=0;
2763	#endif
2764	for (int i=uiTPelY ;i<=min(uiBPelY,(unsigned int)(pcPicOrg->getHeight()-1)) ;i++)
2765	{
2766	for (int j=uiLPelX ;j<=min(uiRPelX,(unsigned int)(pcPicOrg->getWidth()-1)) ;j++)
2767	{
2768	m_maskImg[i][j]=0;
2769	}
2770	}
2771	}
2772	#if TSB_ALF_HEADER
2773	pAlfParam->num_alf_cu_flag++;
2774	#endif
2775	}
2776
2777	Void TEncAdaptiveLoopFilter::xReDesignFilterCoeff_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, Bool bReadCorr)
2778	{
2779
2780	Int tap = m_pcTempAlfParam->tap;
2781	Int LumaStride = pcPicOrg->getStride();
2782	imgpel* pOrg = (imgpel*)pcPicOrg->getLumaAddr();
2783	imgpel* pDec = (imgpel*)pcPicDec->getLumaAddr();
2784	imgpel* pRest = (imgpel*)pcPicRest->getLumaAddr();
2785	xFirstFilteringFrameLuma(pOrg, pDec, pRest, m_pcTempAlfParam, tap, LumaStride);
2786
2787	}
2788	Void TEncAdaptiveLoopFilter::xCUAdaptiveControl_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost)
2789	{
2790	#if MQT_ALF_NPASS
2791	imgpel** maskImgTemp;
2792
2793	if(m_iALFEncodePassReduction == 2)
2794	{
2795	get_mem2Dpel(&maskImgTemp, m_im_height, m_im_width);
2796	}
2797	#endif
2798
2799	m_pcEntropyCoder->setAlfCtrl(true);
2800
2801	UInt uiBestDepth = 0;
2802
2803	ALFParam cFrmAlfParam;
2804	allocALFParam(&cFrmAlfParam);
2805	copyALFParam(&cFrmAlfParam, m_pcBestAlfParam);
2806
2807	for (UInt uiDepth = 0; uiDepth < g_uiMaxCUDepth; uiDepth++)
2808	{
2809	m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth);
2810	pcPicRest->copyToPicLuma(m_pcPicYuvTmp);
2811	copyALFParam(m_pcTempAlfParam, &cFrmAlfParam);
2812	m_pcTempAlfParam->cu_control_flag = 1;
2813
2814	#if MQT_ALF_NPASS
2815	for (UInt uiRD = 0; uiRD <= m_iALFNumOfRedesign; uiRD++)
2816	#else
2817	for (UInt uiRD = 0; uiRD <= ALF_NUM_OF_REDESIGN; uiRD++)
2818	#endif
2819	{
2820	if (uiRD)
2821	{
2822	// re-design filter coefficients
2823	xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, true); //use filtering of mine
2824	}
2825
2826	UInt64 uiRate, uiDist;
2827	Double dCost;
2828	//m_pcPicYuvTmp: filtered signal, pcPicDec: orig reconst
2829	#if TSB_ALF_HEADER
2830	xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here
2831	#else
2832	xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here
2833	#endif
2834
2835	xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost);
2836
2837	if (dCost < rdMinCost)
2838	{
2839	uiBestDepth = uiDepth;
2840	rdMinCost = dCost;
2841	ruiMinDist = uiDist;
2842	ruiMinRate = uiRate;
2843	m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest);
2844	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
2845	//save maskImg
2846	xCopyTmpAlfCtrlFlagsFrom();
2847	#if MQT_ALF_NPASS
2848	if(m_iALFEncodePassReduction == 2)
2849	{
2850	::memcpy(maskImgTemp[0], m_maskImg[0], sizeof(imgpel)m_im_height m_im_width);
2851	}
2852	#endif
2853	}
2854	}
2855	}
2856
2857	if (m_pcBestAlfParam->cu_control_flag)
2858	{
2859	#if MQT_ALF_NPASS
2860	if(m_iALFEncodePassReduction == 2)
2861	{
2862	UInt uiDepth = uiBestDepth;
2863	::memcpy(m_maskImg[0], maskImgTemp[0], sizeof(imgpel)m_im_height m_im_width);
2864	xCopyTmpAlfCtrlFlagsTo();
2865
2866	copyALFParam(&cFrmAlfParam, m_pcBestAlfParam);
2867
2868	m_pcEntropyCoder->setAlfCtrl(true);
2869	m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth);
2870	copyALFParam(m_pcTempAlfParam, &cFrmAlfParam);
2871
2872	xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, true); //use filtering of mine
2873
2874	UInt64 uiRate, uiDist;
2875	Double dCost;
2876
2877	#if TSB_ALF_HEADER
2878	xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here
2879	#else
2880	xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here
2881	#endif
2882
2883	xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost);
2884
2885	if (dCost < rdMinCost)
2886	{
2887	rdMinCost = dCost;
2888	ruiMinDist = uiDist;
2889	ruiMinRate = uiRate;
2890	m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest);
2891	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
2892	xCopyTmpAlfCtrlFlagsFrom();
2893	}
2894	}
2895	#endif
2896
2897	m_pcEntropyCoder->setAlfCtrl(true);
2898	m_pcEntropyCoder->setMaxAlfCtrlDepth(uiBestDepth);
2899	xCopyTmpAlfCtrlFlagsTo();
2900	m_pcPicYuvBest->copyToPicLuma(pcPicRest);//copy m_pcPicYuvBest to pcPicRest
2901	xCopyDecToRestCUs(pcPicDec, pcPicRest); //pcPicRest = pcPicDec
2902	}
2903	else
2904	{
2905	m_pcEntropyCoder->setAlfCtrl(false);
2906	m_pcEntropyCoder->setMaxAlfCtrlDepth(0);
2907	}
2908	freeALFParam(&cFrmAlfParam);
2909
2910	#if MQT_ALF_NPASS
2911	if(m_iALFEncodePassReduction == 2)
2912	{
2913	free_mem2Dpel(maskImgTemp);
2914	}
2915	#endif
2916	}
2917
2918
2919	Void TEncAdaptiveLoopFilter::xFilterTapDecision_qc(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, UInt64& ruiMinRate, UInt64& ruiMinDist, Double& rdMinCost)
2920	{
2921	#if MQT_ALF_NPASS
2922	if(m_iALFEncodePassReduction)
2923	{
2924	return; // filter tap has been decided in xEncALFLuma_qc
2925	}
2926	#endif
2927
2928	// restriction for non-referenced B-slice
2929	if (m_eSliceType == B_SLICE && m_iPicNalReferenceIdc == 0)
2930	{
2931	return;
2932	}
2933
2934	UInt64 uiRate, uiDist;
2935	Double dCost;
2936
2937	if (m_pcBestAlfParam->cu_control_flag)
2938	{
2939	xCopyTmpAlfCtrlFlagsFrom();
2940	}
2941
2942	Bool bChanged = false;
2943	for (Int iTap = ALF_MIN_NUM_TAP; iTap <= ALF_MAX_NUM_TAP; iTap += 2)
2944	{
2945	copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
2946	m_pcTempAlfParam->tap = iTap;
2947	#if TI_ALF_MAX_VSIZE_7
2948	m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(m_pcTempAlfParam->tap);
2949	m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(m_pcTempAlfParam->tap);
2950	#else
2951	m_pcTempAlfParam->num_coeff = (Int)(iTap*iTap/4) + 2;
2952	#endif
2953
2954	if (m_pcTempAlfParam->cu_control_flag)
2955	{
2956	xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, false);
2957	#if TSB_ALF_HEADER
2958	xSetCUAlfCtrlFlags_qc(m_pcEntropyCoder->getMaxAlfCtrlDepth(), pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam);
2959	#else
2960	xSetCUAlfCtrlFlags_qc(m_pcEntropyCoder->getMaxAlfCtrlDepth(), pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist);
2961	#endif
2962	xCalcRDCost(m_pcTempAlfParam, uiRate, uiDist, dCost);
2963	}
2964
2965	else
2966	{
2967	Int Height = pcPicOrg->getHeight();
2968	Int Width = pcPicOrg->getWidth();
2969	for (Int i=0; i<Height; i++)
2970	{
2971	for (Int j=0; j<Width; j++)
2972	{
2973	m_maskImg[i][j] = 1;
2974	}
2975	}
2976	xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, false);
2977
2978	xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, m_pcTempAlfParam, uiRate, uiDist, dCost);
2979	}
2980
2981	if (dCost < rdMinCost)
2982	{
2983	rdMinCost = dCost;
2984	ruiMinDist = uiDist;
2985	ruiMinRate = uiRate;
2986	m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest);
2987	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
2988	bChanged = true;
2989	if (m_pcTempAlfParam->cu_control_flag)
2990	{
2991	xCopyTmpAlfCtrlFlagsFrom();
2992	}
2993	}
2994	}
2995
2996	if (m_pcBestAlfParam->cu_control_flag)
2997	{
2998	xCopyTmpAlfCtrlFlagsTo();
2999	if (bChanged)
3000	{
3001	m_pcPicYuvBest->copyToPicLuma(pcPicRest);
3002	xCopyDecToRestCUs(pcPicDec, pcPicRest);
3003	}
3004	}
3005	else if (m_pcBestAlfParam->tap > ALF_MIN_NUM_TAP)
3006	{
3007	m_pcPicYuvBest->copyToPicLuma(pcPicRest);
3008	}
3009
3010	copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
3011	}
3012
3013
3014	#define ROUND(a) (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5))
3015	#define REG 0.0001
3016	#define REG_SQR 0.0000001
3017
3018	//Find filter coeff related
3019	Int TEncAdaptiveLoopFilter::gnsCholeskyDec(double **inpMatr, double outMatr[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], int noEq)
3020	{
3021	int
3022	i, j, k; /* Looping Variables */
3023	double
3024	scale; /* scaling factor for each row */
3025	double
3026	invDiag[MAX_SQR_FILT_LENGTH]; /* Vector of the inverse of diagonal entries of outMatr */
3027
3028
3029	/*
3030	* Cholesky decomposition starts
3031	*/
3032
3033	for(i = 0; i < noEq; i++)
3034	{
3035	for(j = i; j < noEq; j++)
3036	{
3037	/* Compute the scaling factor */
3038	scale=inpMatr[i][j];
3039	if ( i > 0) for( k = i - 1 ; k >= 0 ; k--)
3040	scale -= outMatr[k][j] * outMatr[k][i];
3041
3042	/* Compute i'th row of outMatr */
3043	if(i==j)
3044	{
3045	if(scale <= REG_SQR ) // if(scale <= 0 ) /* If inpMatr is singular */
3046	{
3047	return(0);
3048	}
3049	else /* Normal operation */
3050	invDiag[i] = 1.0/(outMatr[i][i]=sqrt(scale));
3051	}
3052	else
3053	{
3054	outMatr[i][j] = scaleinvDiag[i]; / Upper triangular part */
3055	outMatr[j][i] = 0.0; /* Lower triangular part set to 0 */
3056	}
3057	}
3058	}
3059	return(1); /* Signal that Cholesky factorization is successfully performed */
3060	}
3061
3062
3063	Void TEncAdaptiveLoopFilter::gnsTransposeBacksubstitution(double U[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], double rhs[], double x[], int order)
3064	{
3065	int
3066	i,j; /* Looping variables */
3067	double
3068	sum; /* Holds backsubstitution from already handled rows */
3069
3070	/* Backsubstitution starts */
3071	x[0] = rhs[0]/U[0][0]; /* First row of U' */
3072	for (i = 1; i < order; i++)
3073	{ /* For the rows 1..order-1 */
3074
3075	for (j = 0, sum = 0.0; j < i; j++) /* Backsubst already solved unknowns */
3076	sum += x[j]*U[j][i];
3077
3078	x[i]=(rhs[i] - sum)/U[i][i]; /* i'th component of solution vect. */
3079	}
3080	}
3081
3082
3083
3084	Void TEncAdaptiveLoopFilter::gnsBacksubstitution(double R[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH], double z[MAX_SQR_FILT_LENGTH], int R_size, double A[MAX_SQR_FILT_LENGTH])
3085	{
3086	int i, j;
3087	double sum;
3088
3089	R_size--;
3090
3091	A[R_size] = z[R_size] / R[R_size][R_size];
3092
3093	for (i = R_size-1; i >= 0; i--)
3094	{
3095	for (j = i+1, sum = 0.0; j <= R_size; j++)
3096	sum += R[i][j] * A[j];
3097
3098	A[i] = (z[i] - sum) / R[i][i];
3099	}
3100	}
3101
3102
3103	Int TEncAdaptiveLoopFilter::gnsSolveByChol(double *LHS, double rhs, double *x, int noEq)
3104	{
3105	double aux[MAX_SQR_FILT_LENGTH]; /* Auxiliary vector */
3106	double U[MAX_SQR_FILT_LENGTH][MAX_SQR_FILT_LENGTH]; /* Upper triangular Cholesky factor of LHS */
3107	int i, singular; /* Looping variable */
3108
3109	/* The equation to be solved is LHSx = rhs */
3110
3111	/* Compute upper triangular U such that U'U = LHS /
3112	if(gnsCholeskyDec(LHS, U, noEq)) /* If Cholesky decomposition has been successful */
3113	{
3114	singular=1;
3115	/* Now, the equation is U'Ux = rhs, where U is upper triangular
3116	* Solve U'*aux = rhs for aux
3117	*/
3118	gnsTransposeBacksubstitution(U, rhs, aux, noEq);
3119
3120	/* The equation is now Ux = aux, solve it for x (new motion coefficients) /
3121	gnsBacksubstitution(U, aux, noEq, x);
3122
3123	}
3124	else /* LHS was singular */
3125	{
3126	singular=0;
3127
3128	/* Regularize LHS */
3129	for(i=0; i<noEq; i++)
3130	LHS[i][i] += REG;
3131	/* Compute upper triangular U such that U'U = regularized LHS /
3132	singular = gnsCholeskyDec(LHS, U, noEq);
3133	/* Solve U'aux = rhs for aux /
3134	gnsTransposeBacksubstitution(U, rhs, aux, noEq);
3135
3136	/* Solve Ux = aux for x /
3137	gnsBacksubstitution(U, aux, noEq, x);
3138	}
3139	return(singular);
3140	}
3141
3142
3143	//////////////////////////////////////////////////////////////////////////////////////////
3144
3145
3146	Void TEncAdaptiveLoopFilter::add_A(double Amerged, double *A, int start, int stop, int size)
3147	{
3148	int
3149	i, j, ind; /* Looping variable */
3150
3151	for (i=0; i<size; i++)
3152	{
3153	for (j=0; j<size; j++)
3154	{
3155	Amerged[i][j]=0;
3156	for (ind=start; ind<=stop; ind++)
3157	{
3158	Amerged[i][j]+=A[ind][i][j];
3159	}
3160	}
3161	}
3162	}
3163
3164	Void TEncAdaptiveLoopFilter::add_b(double bmerged, double *b, int start, int stop, int size)
3165	{
3166	int
3167	i, ind; /* Looping variable */
3168
3169	for (i=0; i<size; i++)
3170	{
3171	bmerged[i]=0;
3172	for (ind=start; ind<=stop; ind++)
3173	{
3174	bmerged[i]+=b[ind][i];
3175	}
3176	}
3177	}
3178
3179	double TEncAdaptiveLoopFilter::calculateErrorCoeffProvided(double *A, double b, double *c, int size)
3180	{
3181	int i, j;
3182	double error, sum=0;
3183
3184	error=0;
3185	for (i=0; i<size; i++) //diagonal
3186	{
3187	sum=0;
3188	for (j=i+1; j<size; j++)
3189	sum+=(A[j][i]+A[i][j])*c[j];
3190	error+=(A[i][i]c[i]+sum-2b[i])*c[i];
3191	}
3192
3193	return(error);
3194	}
3195
3196	double TEncAdaptiveLoopFilter::calculateErrorAbs(double *A, double b, double y, int size)
3197	{
3198	int i;
3199	double error, sum;
3200	double c[MAX_SQR_FILT_LENGTH];
3201
3202	gnsSolveByChol(A, b, c, size);
3203
3204	sum=0;
3205	for (i=0; i<size; i++)
3206	{
3207	sum+=c[i]*b[i];
3208	}
3209	error=y-sum;
3210
3211	return(error);
3212	}
3213
3214	double TEncAdaptiveLoopFilter::mergeFiltersGreedy(double yGlobalSeq, double EGlobalSeq, double pixAccGlobalSeq, int intervalBest[NO_VAR_BINS][2], int sqrFiltLength, int noIntervals)
3215	{
3216	int first, ind, ind1, ind2, i, j, bestToMerge ;
3217	double error, error1, error2, errorMin;
3218	static double pixAcc_temp, error_tab[NO_VAR_BINS],error_comb_tab[NO_VAR_BINS];
3219	static int indexList[NO_VAR_BINS], available[NO_VAR_BINS], noRemaining;
3220	if (noIntervals == NO_FILTERS)
3221	{
3222	noRemaining=NO_VAR_BINS;
3223	for (ind=0; ind<NO_VAR_BINS; ind++)
3224	{
3225	indexList[ind]=ind;
3226	available[ind]=1;
3227	m_pixAcc_merged[ind]=pixAccGlobalSeq[ind];
3228	memcpy(m_y_merged[ind],yGlobalSeq[ind],sizeof(double)*sqrFiltLength);
3229	for (i=0; i<sqrFiltLength; i++)
3230	{
3231	memcpy(m_E_merged[ind][i],EGlobalSeq[ind][i],sizeof(double)*sqrFiltLength);
3232	}
3233	}
3234	}
3235	// Try merging different matrices
3236	if (noIntervals == NO_FILTERS)
3237	{
3238	for (ind=0; ind<NO_VAR_BINS; ind++)
3239	{
3240	error_tab[ind]=calculateErrorAbs(m_E_merged[ind], m_y_merged[ind], m_pixAcc_merged[ind], sqrFiltLength);
3241	}
3242	for (ind=0; ind<NO_VAR_BINS-1; ind++)
3243	{
3244	ind1=indexList[ind];
3245	ind2=indexList[ind+1];
3246
3247	error1=error_tab[ind1];
3248	error2=error_tab[ind2];
3249
3250	pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];
3251	for (i=0; i<sqrFiltLength; i++)
3252	{
3253	m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i];
3254	for (j=0; j<sqrFiltLength; j++)
3255	{
3256	m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];
3257	}
3258	}
3259	error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2;
3260	}
3261	}
3262	while (noRemaining>noIntervals)
3263	{
3264	errorMin=0; first=1;
3265	bestToMerge = 0;
3266	for (ind=0; ind<noRemaining-1; ind++)
3267	{
3268	error = error_comb_tab[indexList[ind]];
3269	if ((error<errorMin \|\| first==1))
3270	{
3271	errorMin=error;
3272	bestToMerge=ind;
3273	first=0;
3274	}
3275	}
3276	ind1=indexList[bestToMerge];
3277	ind2=indexList[bestToMerge+1];
3278	m_pixAcc_merged[ind1]+=m_pixAcc_merged[ind2];
3279	for (i=0; i<sqrFiltLength; i++)
3280	{
3281	m_y_merged[ind1][i]+=m_y_merged[ind2][i];
3282	for (j=0; j<sqrFiltLength; j++)
3283	{
3284	m_E_merged[ind1][i][j]+=m_E_merged[ind2][i][j];
3285	}
3286	}
3287	available[ind2]=0;
3288
3289	//update error tables
3290	error_tab[ind1]=error_comb_tab[ind1]+error_tab[ind1]+error_tab[ind2];
3291	if (indexList[bestToMerge] > 0)
3292	{
3293	ind1=indexList[bestToMerge-1];
3294	ind2=indexList[bestToMerge];
3295	error1=error_tab[ind1];
3296	error2=error_tab[ind2];
3297	pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];
3298	for (i=0; i<sqrFiltLength; i++)
3299	{
3300	m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i];
3301	for (j=0; j<sqrFiltLength; j++)
3302	{
3303	m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];
3304	}
3305	}
3306	error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2;
3307	}
3308	if (indexList[bestToMerge+1] < NO_VAR_BINS-1)
3309	{
3310	ind1=indexList[bestToMerge];
3311	ind2=indexList[bestToMerge+2];
3312	error1=error_tab[ind1];
3313	error2=error_tab[ind2];
3314	pixAcc_temp=m_pixAcc_merged[ind1]+m_pixAcc_merged[ind2];
3315	for (i=0; i<sqrFiltLength; i++)
3316	{
3317	m_y_temp[i]=m_y_merged[ind1][i]+m_y_merged[ind2][i];
3318	for (j=0; j<sqrFiltLength; j++)
3319	{
3320	m_E_temp[i][j]=m_E_merged[ind1][i][j]+m_E_merged[ind2][i][j];
3321	}
3322	}
3323	error_comb_tab[ind1]=calculateErrorAbs(m_E_temp, m_y_temp, pixAcc_temp, sqrFiltLength)-error1-error2;
3324	}
3325
3326	ind=0;
3327	for (i=0; i<NO_VAR_BINS; i++)
3328	{
3329	if (available[i]==1)
3330	{
3331	indexList[ind]=i;
3332	ind++;
3333	}
3334	}
3335	noRemaining--;
3336	}
3337
3338
3339	errorMin=0;
3340	for (ind=0; ind<noIntervals; ind++)
3341	{
3342	errorMin+=error_tab[indexList[ind]];
3343	}
3344
3345	for (ind=0; ind<noIntervals-1; ind++)
3346	{
3347	intervalBest[ind][0]=indexList[ind]; intervalBest[ind][1]=indexList[ind+1]-1;
3348	}
3349
3350	intervalBest[noIntervals-1][0]=indexList[noIntervals-1];
3351	intervalBest[noIntervals-1][1]=NO_VAR_BINS-1;
3352
3353	return(errorMin);
3354	}
3355
3356
3357
3358	double TEncAdaptiveLoopFilter::findFilterGroupingError(double *EGlobalSeq, double yGlobalSeq, double *pixAccGlobalSeq, int intervalBest[NO_VAR_BINS][2], int sqrFiltLength, int filters_per_fr)
3359	{
3360	double error;
3361
3362	// find best filters for each frame group
3363	error = 0;
3364	error += mergeFiltersGreedy(yGlobalSeq, EGlobalSeq, pixAccGlobalSeq, intervalBest, sqrFiltLength, filters_per_fr);
3365
3366	return(error);
3367	}
3368
3369
3370	Void TEncAdaptiveLoopFilter::roundFiltCoeff(int FilterCoeffQuan, double FilterCoeff, int sqrFiltLength, int factor)
3371	{
3372	int i;
3373	double diff;
3374	int diffInt, sign;
3375
3376	for(i = 0; i < sqrFiltLength; i++)
3377	{
3378	sign = (FilterCoeff[i]>0) ? 1: -1;
3379	diff = FilterCoeff[i]*sign;
3380	diffInt = (int)(diff*(double)factor+0.5);
3381	FilterCoeffQuan[i] = diffInt*sign;
3382	}
3383	}
3384
3385	Double TEncAdaptiveLoopFilter::QuantizeIntegerFilterPP(double filterCoeff, int filterCoeffQuant, double *E, double y, int sqrFiltLength, int *weights, int bit_depth)
3386	{
3387	double error;
3388
3389	int factor = (1<<(bit_depth-1)), i;
3390	int quantCoeffSum, minInd, targetCoeffSumInt, k, diff;
3391	double targetCoeffSum, errMin;
3392
3393	gnsSolveByChol(E, y, filterCoeff, sqrFiltLength);
3394	targetCoeffSum=0;
3395	for (i=0; i<sqrFiltLength; i++)
3396	{
3397	targetCoeffSum+=(weights[i]filterCoeff[i]factor);
3398	}
3399	targetCoeffSumInt=ROUND(targetCoeffSum);
3400	roundFiltCoeff(filterCoeffQuant, filterCoeff, sqrFiltLength, factor);
3401	quantCoeffSum=0;
3402	for (i=0; i<sqrFiltLength; i++)
3403	{
3404	quantCoeffSum+=weights[i]*filterCoeffQuant[i];
3405	}
3406
3407	int count=0;
3408	while(quantCoeffSum!=targetCoeffSumInt && count < 10)
3409	{
3410	if (quantCoeffSum>targetCoeffSumInt)
3411	{
3412	diff=quantCoeffSum-targetCoeffSumInt;
3413	errMin=0; minInd=-1;
3414	for (k=0; k<sqrFiltLength; k++)
3415	{
3416	if (weights[k]<=diff)
3417	{
3418	for (i=0; i<sqrFiltLength; i++)
3419	{
3420	m_filterCoeffQuantMod[i]=filterCoeffQuant[i];
3421	}
3422	m_filterCoeffQuantMod[k]--;
3423	for (i=0; i<sqrFiltLength; i++)
3424	{
3425	filterCoeff[i]=(double)m_filterCoeffQuantMod[i]/(double)factor;
3426	}
3427	error=calculateErrorCoeffProvided(E, y, filterCoeff, sqrFiltLength);
3428	if (error<errMin \|\| minInd==-1)
3429	{
3430	errMin=error;
3431	minInd=k;
3432	}
3433	} // if (weights(k)<=diff){
3434	} // for (k=0; k<sqrFiltLength; k++){
3435	filterCoeffQuant[minInd]--;
3436	}
3437	else
3438	{
3439	diff=targetCoeffSumInt-quantCoeffSum;
3440	errMin=0; minInd=-1;
3441	for (k=0; k<sqrFiltLength; k++)
3442	{
3443	if (weights[k]<=diff)
3444	{
3445	for (i=0; i<sqrFiltLength; i++)
3446	{
3447	m_filterCoeffQuantMod[i]=filterCoeffQuant[i];
3448	}
3449	m_filterCoeffQuantMod[k]++;
3450	for (i=0; i<sqrFiltLength; i++)
3451	{
3452	filterCoeff[i]=(double)m_filterCoeffQuantMod[i]/(double)factor;
3453	}
3454	error=calculateErrorCoeffProvided(E, y, filterCoeff, sqrFiltLength);
3455	if (error<errMin \|\| minInd==-1)
3456	{
3457	errMin=error;
3458	minInd=k;
3459	}
3460	} // if (weights(k)<=diff){
3461	} // for (k=0; k<sqrFiltLength; k++){
3462	filterCoeffQuant[minInd]++;
3463	}
3464
3465	quantCoeffSum=0;
3466	for (i=0; i<sqrFiltLength; i++)
3467	{
3468	quantCoeffSum+=weights[i]*filterCoeffQuant[i];
3469	}
3470	}
3471	if( count == 10 )
3472	{
3473	for (i=0; i<sqrFiltLength; i++)
3474	{
3475	filterCoeffQuant[i] = 0;
3476	}
3477	}
3478
3479	for (i=0; i<sqrFiltLength; i++)
3480	{
3481	filterCoeff[i]=(double)filterCoeffQuant[i]/(double)factor;
3482	}
3483
3484	error=calculateErrorCoeffProvided(E, y, filterCoeff, sqrFiltLength);
3485	return(error);
3486	}
3487
3488	Double TEncAdaptiveLoopFilter::findFilterCoeff(double *EGlobalSeq, double yGlobalSeq, double pixAccGlobalSeq, int filterCoeffSeq, int filterCoeffQuantSeq, int intervalBest[NO_VAR_BINS][2], int varIndTab[NO_VAR_BINS], int sqrFiltLength, int filters_per_fr, int weights, int bit_depth, double errorTabForce0Coeff[NO_VAR_BINS][2])
3489	{
3490	static double pixAcc_temp;
3491	double error;
3492	int k, filtNo;
3493
3494	error = 0;
3495	for(filtNo = 0; filtNo < filters_per_fr; filtNo++)
3496	{
3497	add_A(m_E_temp, EGlobalSeq, intervalBest[filtNo][0], intervalBest[filtNo][1], sqrFiltLength);
3498	add_b(m_y_temp, yGlobalSeq, intervalBest[filtNo][0], intervalBest[filtNo][1], sqrFiltLength);
3499
3500	pixAcc_temp = 0;
3501	for(k = intervalBest[filtNo][0]; k <= intervalBest[filtNo][1]; k++)
3502	pixAcc_temp += pixAccGlobalSeq[k];
3503
3504	// Find coeffcients
3505	errorTabForce0Coeff[filtNo][1] = pixAcc_temp + QuantizeIntegerFilterPP(m_filterCoeff, m_filterCoeffQuant, m_E_temp, m_y_temp, sqrFiltLength, weights, bit_depth);
3506	errorTabForce0Coeff[filtNo][0] = pixAcc_temp;
3507	error += errorTabForce0Coeff[filtNo][1];
3508
3509	for(k = 0; k < sqrFiltLength; k++)
3510	{
3511	filterCoeffSeq[filtNo][k] = m_filterCoeffQuant[k];
3512	filterCoeffQuantSeq[filtNo][k] = m_filterCoeffQuant[k];
3513	}
3514	}
3515
3516	for(filtNo = 0; filtNo < filters_per_fr; filtNo++)
3517	{
3518	for(k = intervalBest[filtNo][0]; k <= intervalBest[filtNo][1]; k++)
3519	varIndTab[k] = filtNo;
3520	}
3521
3522	return(error);
3523	}
3524
3525	#if MQT_ALF_NPASS
3526	Void TEncAdaptiveLoopFilter::setALFEncodingParam(TComPic *pcPic)
3527	{
3528	if(m_iALFEncodePassReduction)
3529	{
3530	m_iALFNumOfRedesign = 0;
3531	m_iCurrentPOC = m_pcPic->getPOC();
3532
3533	if((m_eSliceType == I_SLICE) \|\| (m_iGOPSize==8 && (m_iCurrentPOC % 4 == 0)))
3534	{
3535	m_iUsePreviousFilter = 0;
3536	}
3537	else
3538	{
3539	m_iUsePreviousFilter = 1;
3540	}
3541	}
3542	else
3543	{
3544	m_iALFNumOfRedesign = ALF_NUM_OF_REDESIGN;
3545	}
3546	m_iDesignCurrentFilter = 1;
3547
3548	}
3549
3550	Void TEncAdaptiveLoopFilter::xcalcPredFilterCoeffPrev(Int filtNo)
3551	{
3552	int varInd, i;
3553
3554	for(varInd=0; varInd<NO_VAR_BINS; ++varInd)
3555	{
3556	for(i = 0; i < MAX_SQR_FILT_LENGTH; i++)
3557	{
3558	m_filterCoeffPrevSelected[varInd][i]=m_aiFilterCoeffSaved[m_iFilterIdx][varInd][i];
3559	}
3560	}
3561	}
3562
3563	Void TEncAdaptiveLoopFilter::setFilterIdx(Int index)
3564	{
3565	if (m_iGOPSize == 8)
3566	{
3567	if ((m_iCurrentPOC % m_iGOPSize) == 0)
3568	{
3569	Int FiltTable[2] = {0, m_iGOPSize};
3570	m_iFilterIdx = FiltTable[index];
3571	}
3572	if ((m_iCurrentPOC % m_iGOPSize) == 4)
3573	{
3574	Int FiltTable[2] = {0, m_iGOPSize};
3575	m_iFilterIdx = FiltTable[index];
3576	}
3577	if ((m_iCurrentPOC % m_iGOPSize) == 2)
3578	{
3579	Int FiltTable[2] = {0, 4};
3580	m_iFilterIdx = FiltTable[index];
3581	}
3582	if ((m_iCurrentPOC % m_iGOPSize) == 6)
3583	{
3584	Int FiltTable[2] = {4, m_iGOPSize};
3585	m_iFilterIdx = FiltTable[index];
3586	}
3587	if ((m_iCurrentPOC % m_iGOPSize) == 1)
3588	{
3589	Int FiltTable[2] = {0, 2};
3590	m_iFilterIdx = FiltTable[index];
3591	}
3592	if ((m_iCurrentPOC % m_iGOPSize) == 3)
3593	{
3594	Int FiltTable[2] = {2, 4};
3595	m_iFilterIdx = FiltTable[index];
3596	}
3597	if ((m_iCurrentPOC % m_iGOPSize) == 5)
3598	{
3599	Int FiltTable[2] = {4, 6};
3600	m_iFilterIdx = FiltTable[index];
3601	}
3602	if ((m_iCurrentPOC % m_iGOPSize) == 7)
3603	{
3604	Int FiltTable[2] = {6, m_iGOPSize};
3605	m_iFilterIdx = FiltTable[index];
3606	}
3607	}
3608	else
3609	{
3610	Int FiltTable[2] = {0, m_iGOPSize};
3611	m_iFilterIdx = FiltTable[index];
3612	}
3613	}
3614
3615	Void TEncAdaptiveLoopFilter::setInitialMask(TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec)
3616	{
3617	Int Height = pcPicOrg->getHeight();
3618	Int Width = pcPicOrg->getWidth();
3619	Int LumaStride = pcPicOrg->getStride();
3620	imgpel* pDec = (imgpel*)pcPicDec->getLumaAddr();
3621
3622	#if MTK_NONCROSS_INLOOP_FILTER
3623	if(!m_bUseNonCrossALF)
3624	calcVar(0, 0, m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
3625	else
3626	calcVarforSlices(m_varImg, pDec, 9/2, VAR_SIZE, LumaStride);
3627	#else
3628	calcVar(m_varImg, pDec, 9/2, VAR_SIZE, Height, Width, LumaStride);
3629	#endif
3630
3631	if(!m_iALFEncodePassReduction \|\| !m_iUsePreviousFilter)
3632	{
3633	for(Int y=0; y<Height; y++)
3634	{
3635	for(Int x=0; x<Width; x++)
3636	{
3637	m_maskImg[y][x] = 1;
3638	}
3639	}
3640	}
3641	else
3642	{
3643	Int uiBestDepth=0;
3644	UInt64 uiRate, uiDist, uiMinRate, uiMinDist;
3645	Double dCost, dMinCost = MAX_DOUBLE;
3646	//imgpel* pOrg = (imgpel*)pcPicOrg->getLumaAddr();
3647	imgpel* pRest = (imgpel*)m_pcPicYuvTmp->getLumaAddr();
3648
3649	Int iTap = 9;
3650	Int filtNo = 0;
3651	m_pcTempAlfParam->cu_control_flag = 0;
3652	m_pcTempAlfParam->tap = iTap;
3653	#if TI_ALF_MAX_VSIZE_7
3654	m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
3655	m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(iTap);
3656	#else
3657	m_pcTempAlfParam->num_coeff = iTap*iTap/4 + 2;
3658	#endif
3659
3660	for (Int index=0; index<2; index++)
3661	{
3662	setFilterIdx(index);
3663	xcalcPredFilterCoeffPrev(filtNo);
3664	#if MTK_NONCROSS_INLOOP_FILTER
3665	if(!m_bUseNonCrossALF)
3666	xfilterFrame_en(0, 0, Height, Width, pDec, pRest, filtNo, LumaStride);
3667	else
3668	xfilterSlices_en(pDec, pRest, filtNo, LumaStride);
3669	#else
3670	xfilterFrame_en(pDec, pRest, filtNo, LumaStride);
3671	#endif
3672	xCalcRDCost(pcPicOrg, m_pcPicYuvTmp, NULL, uiRate, uiDist, dCost);
3673	if (dCost < dMinCost)
3674	{
3675	dMinCost = dCost;
3676	uiMinDist = uiDist;
3677	uiMinRate = uiRate;
3678	m_pcPicYuvTmp->copyToPicLuma(m_pcPicYuvBest);
3679	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
3680	}
3681	}
3682	m_pcPicYuvBest->copyToPicLuma(m_pcPicYuvTmp);
3683
3684	m_pcEntropyCoder->setAlfCtrl(true);
3685	Int maxDepth = g_uiMaxCUDepth;
3686	if (pcPicOrg->getWidth() < 1000) maxDepth = 2;
3687	for (UInt uiDepth = 0; uiDepth < maxDepth; uiDepth++)
3688	{
3689	m_pcEntropyCoder->setMaxAlfCtrlDepth(uiDepth);
3690	copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
3691	m_pcTempAlfParam->cu_control_flag = 1;
3692
3693	#if TSB_ALF_HEADER
3694	xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist, m_pcTempAlfParam); //set up varImg here
3695	#else
3696	xSetCUAlfCtrlFlags_qc(uiDepth, pcPicOrg, pcPicDec, m_pcPicYuvTmp, uiDist); //set up varImg here
3697	#endif
3698	m_pcEntropyCoder->resetEntropy();
3699	m_pcEntropyCoder->resetBits();
3700	xEncodeCUAlfCtrlFlags();
3701	uiRate = m_pcEntropyCoder->getNumberOfWrittenBits();
3702	dCost = (Double)(uiRate) * m_dLambdaLuma + (Double)(uiDist);
3703
3704	if (dCost < dMinCost)
3705	{
3706	uiBestDepth = uiDepth;
3707	dMinCost = dCost;
3708	uiMinDist = uiDist;
3709	uiMinRate = uiRate;
3710	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
3711	//save maskImg
3712	xCopyTmpAlfCtrlFlagsFrom();
3713	}
3714	}
3715
3716	copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
3717	m_iDesignCurrentFilter = 0; // design filter for subsequent slices
3718	xReDesignFilterCoeff_qc(pcPicOrg, pcPicDec, m_pcPicYuvTmp, true);
3719	m_iDesignCurrentFilter = 1;
3720
3721	if (m_pcBestAlfParam->cu_control_flag)
3722	{
3723	m_pcEntropyCoder->setAlfCtrl(true);
3724	m_pcEntropyCoder->setMaxAlfCtrlDepth(uiBestDepth);
3725	xCopyTmpAlfCtrlFlagsTo();
3726	}
3727	else
3728	{
3729	m_pcEntropyCoder->setAlfCtrl(false);
3730	m_pcEntropyCoder->setMaxAlfCtrlDepth(0);
3731	for(Int y=0; y<Height; y++)
3732	{
3733	for(Int x=0; x<Width; x++)
3734	{
3735	m_maskImg[y][x] = 1;
3736	}
3737	}
3738	}
3739	}
3740	}
3741
3742	#if MQT_BA_RA
3743	Void TEncAdaptiveLoopFilter::xFirstEstimateFilteringFrameLumaAllTap(imgpel* ImgOrg, imgpel* ImgDec, Int Stride,
3744	ALFParam* pcAlfSaved,
3745	Int* aiVarIndTabBest,
3746	Int** ppiBestCoeffSet,
3747	Int& ibestfiltNo,
3748	Int& ibestfilters_per_fr,
3749	Double** ppdBesty,
3750	Double*** pppdBestE,
3751	Double* pdBestpixAcc,
3752	UInt64& ruiRate,
3753	Int64& riDist,
3754	Double& rdCost
3755	)
3756	#else
3757	Void TEncAdaptiveLoopFilter::xFirstFilteringFrameLumaAllTap(imgpel* ImgOrg, imgpel* ImgDec, imgpel* ImgRest, Int Stride)
3758	#endif
3759	{
3760	#if !MQT_BA_RA
3761	static Bool bFirst = true;
3762	static Int aiVarIndTabBest[NO_VAR_BINS];
3763	#endif
3764	static Double ySym, *ESym;
3765	#if !MQT_BA_RA
3766	static Int** ppiBestCoeffSet;
3767
3768	if(bFirst)
3769	{
3770	initMatrix_int(&ppiBestCoeffSet, NO_VAR_BINS, MAX_SQR_FILT_LENGTH);
3771	bFirst = false;
3772	}
3773	#endif
3774
3775	Int lambda_val = ((Int) m_dLambdaLuma) * (1<<(2*g_uiBitIncrement));
3776	#if MQT_BA_RA
3777	Int filtNo, filters_per_fr;
3778	#else
3779	Int filtNo, ibestfiltNo=0, filters_per_fr, ibestfilters_per_fr=0;
3780	#endif
3781	Int64 iEstimatedDist;
3782	UInt64 uiRate;
3783	Double dEstimatedCost, dEstimatedMinCost = MAX_DOUBLE;;
3784	Bool bMatrixBaseReady = false;
3785	m_iMatrixBaseFiltNo = 0;
3786	#if MQT_BA_RA
3787	m_pcTempAlfParam->alf_flag = 1;
3788	m_pcTempAlfParam->cu_control_flag = 0;
3789	m_pcTempAlfParam->chroma_idc = 0;
3790	#endif
3791	for(Int iTap = ALF_MAX_NUM_TAP; iTap>=ALF_MIN_NUM_TAP; iTap -= 2)
3792	{
3793	m_pcTempAlfParam->tap = iTap;
3794	#if TI_ALF_MAX_VSIZE_7
3795	m_pcTempAlfParam->tapV = TComAdaptiveLoopFilter::ALFTapHToTapV(iTap);
3796	m_pcTempAlfParam->num_coeff = TComAdaptiveLoopFilter::ALFTapHToNumCoeff(iTap);
3797	#else
3798	m_pcTempAlfParam->num_coeff = iTap*iTap/4 + 2;
3799	#endif
3800	if (iTap==9)
3801	{
3802	filtNo = 0;
3803	}
3804	else if (iTap==7)
3805	{
3806	filtNo = 1;
3807	}
3808	else
3809	{
3810	filtNo = 2;
3811	}
3812
3813	ESym = m_EGlobalSym [filtNo];
3814	ySym = m_yGlobalSym [filtNo];
3815
3816	if( bMatrixBaseReady )
3817	{
3818	xretriveBlockMatrix(m_pcTempAlfParam->num_coeff, m_iTapPosTabIn9x9Sym[filtNo],
3819	m_EGlobalSym[m_iMatrixBaseFiltNo], ESym,
3820	m_yGlobalSym[m_iMatrixBaseFiltNo], ySym);
3821
3822	}
3823	else
3824	#if MTK_NONCROSS_INLOOP_FILTER
3825	{
3826	if(!m_bUseNonCrossALF)
3827	xstoreInBlockMatrix(0, 0, m_im_height, m_im_width, true, true, ImgOrg, ImgDec, iTap, Stride);
3828	else
3829	xstoreInBlockMatrixforSlices(ImgOrg, ImgDec, iTap, Stride);
3830	}
3831	#else
3832	xstoreInBlockMatrix(ImgOrg, ImgDec, iTap, Stride);
3833	#endif
3834	if(filtNo == m_iMatrixBaseFiltNo)
3835	{
3836	bMatrixBaseReady = true;
3837	}
3838
3839	xfindBestFilterVarPred(ySym, ESym, m_pixAcc, m_filterCoeffSym, m_filterCoeffSymQuant, filtNo, &filters_per_fr,
3840	m_varIndTab, NULL, m_varImg, m_maskImg, NULL, lambda_val);
3841
3842	uiRate = xcodeFiltCoeff(m_filterCoeffSymQuant, filtNo, m_varIndTab, filters_per_fr,0, m_pcTempAlfParam);
3843	iEstimatedDist = xEstimateFiltDist(filters_per_fr, m_varIndTab, ESym, ySym, m_filterCoeffSym, m_pcTempAlfParam->num_coeff);
3844	dEstimatedCost = (Double)(uiRate) * m_dLambdaLuma + (Double)(iEstimatedDist);
3845
3846	if(dEstimatedCost < dEstimatedMinCost)
3847	{
3848	dEstimatedMinCost = dEstimatedCost;
3849	ibestfiltNo = filtNo;
3850	ibestfilters_per_fr = filters_per_fr;
3851	#if MQT_BA_RA
3852	ruiRate = uiRate;
3853	riDist = iEstimatedDist;
3854	rdCost = dEstimatedMinCost;
3855
3856	copyALFParam(pcAlfSaved, m_pcTempAlfParam);
3857	#else
3858	copyALFParam(m_pcBestAlfParam, m_pcTempAlfParam);
3859	#endif
3860	::memcpy(aiVarIndTabBest, m_varIndTab, sizeof(Int)*NO_VAR_BINS);
3861	for(Int i=0; i< ibestfilters_per_fr; i++ )
3862	{
3863	::memcpy( ppiBestCoeffSet[i], m_filterCoeffSym[i], sizeof(Int) * m_pcTempAlfParam->num_coeff);
3864	}
3865	}
3866	}
3867
3868	filtNo = ibestfiltNo;
3869	filters_per_fr = ibestfilters_per_fr;
3870	::memcpy(m_varIndTab, aiVarIndTabBest, sizeof(Int)*NO_VAR_BINS);
3871	for(Int i=0; i< filters_per_fr; i++ )
3872	{
3873	#if MQT_BA_RA
3874	::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * pcAlfSaved->num_coeff);
3875	#else
3876	::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * m_pcTempAlfParam->num_coeff);
3877	#endif
3878	}
3879
3880	xcalcPredFilterCoeff(filtNo);
3881
3882	#if MQT_BA_RA
3883
3884
3885	::memset( pdBestpixAcc, 0,sizeof(double)*NO_VAR_BINS);
3886	for (Int varInd=0; varInd<NO_VAR_BINS; varInd++)
3887	{
3888	::memset(ppdBesty[varInd],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
3889	for (Int k=0; k< pcAlfSaved->num_coeff; k++)
3890	{
3891	::memset(pppdBestE[varInd][k],0,sizeof(double)*MAX_SQR_FILT_LENGTH);
3892	}
3893	}
3894	ESym = m_EGlobalSym [filtNo];
3895	ySym = m_yGlobalSym [filtNo];
3896
3897	::memcpy( pdBestpixAcc, m_pixAcc ,sizeof(double)*NO_VAR_BINS);
3898	for (Int varInd=0; varInd<NO_VAR_BINS; varInd++)
3899	{
3900	::memcpy(ppdBesty[varInd],ySym[varInd],sizeof(double)*MAX_SQR_FILT_LENGTH);
3901	for (Int k=0; k< pcAlfSaved->num_coeff; k++)
3902	{
3903	::memcpy(pppdBestE[varInd][k],ESym[varInd][k],sizeof(double)*MAX_SQR_FILT_LENGTH);
3904	}
3905	}
3906
3907
3908	#else
3909
3910	filtNo = ibestfiltNo;
3911	filters_per_fr = ibestfilters_per_fr;
3912	copyALFParam(m_pcTempAlfParam, m_pcBestAlfParam);
3913	::memcpy(m_varIndTab, aiVarIndTabBest, sizeof(Int)*NO_VAR_BINS);
3914	for(Int i=0; i< filters_per_fr; i++ )
3915	{
3916	::memcpy(m_filterCoeffSym[i], ppiBestCoeffSet[i], sizeof(Int) * m_pcTempAlfParam->num_coeff);
3917	}
3918
3919	xcalcPredFilterCoeff(filtNo);
3920	#if MTK_NONCROSS_INLOOP_FILTER
3921	if(!m_bUseNonCrossALF)
3922	xfilterFrame_en(0, 0, m_im_height, m_im_width, ImgDec, ImgRest, filtNo, Stride);
3923	else
3924	xfilterSlices_en(ImgDec, ImgRest, filtNo, Stride);
3925	#else
3926	xfilterFrame_en(ImgDec, ImgRest, filtNo, Stride);
3927	#endif
3928
3929	#endif
3930	}
3931
3932	Void TEncAdaptiveLoopFilter::xretriveBlockMatrix(Int iNumTaps,
3933	Int* piTapPosInMaxFilter,
3934	Double* pppdEBase, Double* pppdETarget,
3935	Double ppdyBase, Double ppdyTarget )
3936	{
3937	Int varInd;
3938	Int i, j, r, c;
3939
3940	Double** ppdSrcE;
3941	Double** ppdDstE;
3942	Double* pdSrcy;
3943	Double* pdDsty;
3944
3945	for (varInd=0; varInd< NO_VAR_BINS; varInd++)
3946	{
3947	ppdSrcE = pppdEBase [varInd];
3948	ppdDstE = pppdETarget[varInd];
3949	pdSrcy = ppdyBase [varInd];
3950	pdDsty = ppdyTarget [varInd];
3951
3952	for(j=0; j< iNumTaps; j++)
3953	{
3954	r = piTapPosInMaxFilter[j];
3955
3956	for(i=j; i< iNumTaps; i++)
3957	{
3958	c = piTapPosInMaxFilter[i];
3959
3960	//auto-correlation retrieval
3961	ppdDstE[j][i] = ppdSrcE[r][c];
3962
3963	}
3964
3965	//cross-correlation retrieval
3966	pdDsty[j] = pdSrcy[r];
3967
3968	}
3969
3970	//symmetric copy
3971	for(j=1; j< iNumTaps; j++)
3972	for(i=0; i< j; i++)
3973	ppdDstE[j][i] = ppdDstE[i][j];
3974
3975	}
3976
3977	}
3978
3979	Int64 TEncAdaptiveLoopFilter::xFastFiltDistEstimation(Double** ppdE, Double* pdy, Int* piCoeff, Int iFiltLength)
3980	{
3981	//static memory
3982	static Bool bFirst = true;
3983	static Double* pdcoeff;
3984	if(bFirst)
3985	{
3986	pdcoeff= new Double[MAX_SQR_FILT_LENGTH];
3987	bFirst= false;
3988	}
3989
3990	//variable
3991	Int i,j;
3992	Int64 iDist;
3993	Double dDist, dsum;
3994
3995
3996	for(i=0; i< iFiltLength; i++)
3997	pdcoeff[i]= (Double)piCoeff[i] / (Double)(1<< (NUM_BITS - 1) );
3998
3999
4000	dDist =0;
4001	for(i=0; i< iFiltLength; i++)
4002	{
4003	dsum= ((Double)ppdE[i][i]) * pdcoeff[i];
4004	for(j=i+1; j< iFiltLength; j++)
4005	dsum += (Double)(2ppdE[i][j]) pdcoeff[j];
4006
4007	dDist += ((dsum - 2.0 * pdy[i])* pdcoeff[i] );
4008	}
4009
4010
4011	UInt uiShift = g_uiBitIncrement<<1;
4012	if(dDist < 0)
4013	{
4014	iDist = -(((Int64)(-dDist + 0.5)) >> uiShift);
4015	}
4016	else //dDist >=0
4017	{
4018	iDist= ((Int64)(dDist+0.5)) >> uiShift;
4019	}
4020
4021	return iDist;
4022
4023	}
4024
4025	Int64 TEncAdaptiveLoopFilter::xEstimateFiltDist(Int filters_per_fr, Int* VarIndTab,
4026	Double* pppdE, Double ppdy,
4027	Int** ppiCoeffSet, Int iFiltLength)
4028
4029	{
4030	Int64 iDist;
4031	Double** ppdDstE;
4032	Double** ppdSrcE;
4033	Double* pdDsty;
4034	Double* pdSrcy;
4035	Int f, j, i, varInd;
4036	Int* piCoeff;
4037
4038	//clean m_E_merged & m_y_merged
4039	for(f=0; f< filters_per_fr; f++)
4040	{
4041	for(j =0; j < iFiltLength; j++)
4042	{
4043	//clean m_E_merged one line
4044	for(i=0; i < iFiltLength; i++)
4045	m_E_merged[f][j][i] = 0;
4046
4047	//clean m_y_merged
4048	m_y_merged[f][j] = 0;
4049	}
4050	}
4051
4052
4053	//merge correlation values
4054	for (varInd=0; varInd< NO_VAR_BINS; varInd++)
4055	{
4056	ppdSrcE = pppdE[varInd];
4057	ppdDstE = m_E_merged[ VarIndTab[varInd] ];
4058
4059	pdSrcy = ppdy[varInd];
4060	pdDsty = m_y_merged[ VarIndTab[varInd] ];
4061
4062	for(j=0; j< iFiltLength; j++)
4063	{
4064	for(i=0; i< iFiltLength; i++)
4065	ppdDstE[j][i] += ppdSrcE[j][i];
4066
4067	pdDsty[j] += pdSrcy[j];
4068	}
4069	}
4070
4071	//estimate distortion reduction by using FFDE (JCTVC-C143)
4072	iDist = 0;
4073	for(f=0; f< filters_per_fr; f++)
4074	{
4075	piCoeff = ppiCoeffSet[f];
4076	ppdDstE = m_E_merged [f];
4077	pdDsty = m_y_merged [f];
4078
4079	iDist += xFastFiltDistEstimation(ppdDstE, pdDsty, piCoeff, iFiltLength);
4080	}
4081
4082
4083	return iDist;
4084
4085	}
4086	#endif
4087
4088	#if MTK_NONCROSS_INLOOP_FILTER
4089
4090	Void TEncAdaptiveLoopFilter::calcVarforSlices(imgpel *varmap, imgpel imgY_Dec, Int pad_size, Int fl, Int img_stride)
4091	{
4092	#if MQT_BA_RA
4093	if(m_uiVarGenMethod == ALF_RA)
4094	{
4095	return;
4096	}
4097	#endif
4098
4099	Pel* pPicSrc = (Pel *)imgY_Dec;
4100	Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr();
4101
4102	for(UInt s=0; s< m_uiNumSlicesInPic; s++)
4103	{
4104	CAlfSlice* pSlice = &(m_pSlice[s]);
4105
4106	pSlice->copySliceLuma(pPicSlice, pPicSrc, img_stride);
4107	pSlice->extendSliceBorderLuma(pPicSlice, img_stride, (UInt)EXTEND_NUM_PEL);
4108	calcVarforOneSlice(pSlice, varmap, (imgpel*)pPicSlice, pad_size, fl, img_stride);
4109	}
4110	}
4111
4112
4113
4114	Void TEncAdaptiveLoopFilter::xfilterSlices_en(imgpel* ImgDec, imgpel* ImgRest,int filtNo, int iStride)
4115	{
4116	Pel* pPicSrc = (Pel *)ImgDec;
4117	Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr();
4118
4119	for(UInt s=0; s< m_uiNumSlicesInPic; s++)
4120	{
4121	CAlfSlice* pSlice = &(m_pSlice[s]);
4122
4123	pSlice->copySliceLuma(pPicSlice, pPicSrc, iStride);
4124	pSlice->extendSliceBorderLuma(pPicSlice, iStride, EXTEND_NUM_PEL);
4125
4126	xfilterOneSlice_en(pSlice, (imgpel*)pPicSlice, ImgRest, filtNo, iStride);
4127	}
4128	}
4129
4130
4131	Void TEncAdaptiveLoopFilter::xfilterOneSlice_en(CAlfSlice* pSlice, imgpel* ImgDec, imgpel* ImgRest,int filtNo, int iStride)
4132	{
4133	UInt uiNumLCUs = pSlice->getNumLCUs();
4134
4135	Int iHeight, iWidth;
4136	Int ypos, xpos;
4137
4138	for(UInt i=0; i< uiNumLCUs; i++)
4139	{
4140	CAlfCU* pcAlfCU = &((*pSlice)[i]);
4141
4142	ypos = pcAlfCU->getCU()->getCUPelY();
4143	xpos = pcAlfCU->getCU()->getCUPelX();
4144	iHeight = pcAlfCU->getHeight();
4145	iWidth = pcAlfCU->getWidth();
4146
4147	xfilterFrame_en(ypos, xpos, iHeight, iWidth, ImgDec, ImgRest, filtNo, iStride);
4148	}
4149	}
4150
4151
4152
4153	Void TEncAdaptiveLoopFilter::xstoreInBlockMatrixforSlices(imgpel* ImgOrg, imgpel* ImgDec, Int tap, Int iStride)
4154	{
4155	Pel* pPicSrc = (Pel *)ImgDec;
4156	Pel* pPicSlice = m_pcSliceYuvTmp->getLumaAddr();
4157
4158	for(UInt s=0; s< m_uiNumSlicesInPic; s++)
4159	{
4160	CAlfSlice* pSlice = &(m_pSlice[s]);
4161	pSlice->copySliceLuma(pPicSlice, pPicSrc, iStride);
4162	pSlice->extendSliceBorderLuma(pPicSlice, iStride, (UInt)EXTEND_NUM_PEL);
4163	xstoreInBlockMatrixforOneSlice(pSlice, ImgOrg, (imgpel*)pPicSlice, tap, iStride, (s==0), (s== m_uiNumSlicesInPic-1));
4164	}
4165	}
4166
4167	Void TEncAdaptiveLoopFilter::xstoreInBlockMatrixforOneSlice(CAlfSlice* pSlice,
4168	imgpel* ImgOrg, imgpel* ImgDec,
4169	Int tap, Int iStride,
4170	Bool bFirstSlice,
4171	Bool bLastSlice
4172	)
4173	{
4174
4175
4176	UInt uiNumLCUs = pSlice->getNumLCUs();
4177
4178	Int iHeight, iWidth;
4179	Int ypos, xpos;
4180	Bool bFirstLCU, bLastLCU;
4181
4182	for(UInt i=0; i< uiNumLCUs; i++)
4183	{
4184	bFirstLCU = (i==0);
4185	bLastLCU = (i== uiNumLCUs -1);
4186
4187	CAlfCU* pcAlfCU = &((*pSlice)[i]);
4188	ypos = pcAlfCU->getCU()->getCUPelY();
4189	xpos = pcAlfCU->getCU()->getCUPelX();
4190	iHeight = pcAlfCU->getHeight();
4191	iWidth = pcAlfCU->getWidth();
4192
4193	xstoreInBlockMatrix(ypos, xpos, iHeight, iWidth,
4194	(bFirstSlice && bFirstLCU),(bLastSlice && bLastLCU),
4195	ImgOrg, ImgDec,tap, iStride);
4196	}
4197	}
4198
4199
4200
4201	Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaSlices(Int ComponentID, Pel* pOrg, Pel* pCmp, Int iTap, Int iOrgStride, Int iCmpStride)
4202	{
4203
4204	assert(iOrgStride == iCmpStride);
4205
4206	Pel* pPicSrc = pCmp;
4207	Pel* pPicSlice = (ComponentID == ALF_Cb)?(m_pcSliceYuvTmp->getCbAddr()):(m_pcSliceYuvTmp->getCrAddr());
4208
4209	for(UInt s=0; s< m_uiNumSlicesInPic; s++)
4210	{
4211	CAlfSlice* pSlice = &(m_pSlice[s]);
4212
4213	pSlice->copySliceChroma(pPicSlice, pPicSrc, iCmpStride);
4214	pSlice->extendSliceBorderChroma(pPicSlice, iCmpStride, (UInt)EXTEND_NUM_PEL_C);
4215
4216	xCalcCorrelationFuncforChromaOneSlice(pSlice, pOrg, pPicSlice, iTap, iCmpStride,(s==m_uiNumSlicesInPic-1));
4217	}
4218	}
4219
4220	Void TEncAdaptiveLoopFilter::xCalcCorrelationFuncforChromaOneSlice(CAlfSlice* pSlice, Pel* pOrg, Pel* pCmp, Int iTap, Int iStride, Bool bLastSlice)
4221	{
4222	UInt uiNumLCUs = pSlice->getNumLCUs();
4223
4224	Int iHeight, iWidth;
4225	Int ypos, xpos;
4226	Bool bLastLCU;
4227
4228	for(UInt i=0; i< uiNumLCUs; i++)
4229	{
4230	bLastLCU = (i== uiNumLCUs -1);
4231
4232	CAlfCU* pcAlfCU = &((*pSlice)[i]);
4233	ypos = ( pcAlfCU->getCU()->getCUPelY() >> 1 );
4234	xpos = ( pcAlfCU->getCU()->getCUPelX() >> 1 );
4235	iHeight = (Int)( pcAlfCU->getHeight() >> 1);
4236	iWidth = (Int)( pcAlfCU->getWidth() >> 1);
4237
4238	xCalcCorrelationFunc(ypos, xpos, pOrg, pCmp, iTap, iWidth, iHeight, iStride, iStride, (bLastSlice && bLastLCU ) );
4239	}
4240	}
4241
4242	Void TEncAdaptiveLoopFilter::xFrameChromaforSlices(Int ComponentID, TComPicYuv* pcPicDecYuv, TComPicYuv* pcPicRestYuv, Int *qh, Int iTap )
4243	{
4244	Pel* pPicDec = (ComponentID == ALF_Cb)?( pcPicDecYuv->getCbAddr()):( pcPicDecYuv->getCrAddr());
4245	// Pel* pPicRest = (ComponentID == ALF_Cb)?( pcPicRestYuv->getCbAddr()):( pcPicRestYuv->getCrAddr());
4246	Pel* pPicSlice = (ComponentID == ALF_Cb)?(m_pcSliceYuvTmp->getCbAddr()):(m_pcSliceYuvTmp->getCrAddr());
4247
4248	Int iStride = pcPicDecYuv->getCStride();
4249
4250	assert(iStride == pcPicRestYuv->getCStride());
4251
4252	for(UInt s=0; s< m_uiNumSlicesInPic; s++)
4253	{
4254	CAlfSlice* pSlice = &(m_pSlice[s]);
4255
4256	pSlice->copySliceChroma(pPicSlice, pPicDec, iStride);
4257	pSlice->extendSliceBorderChroma(pPicSlice, iStride, (UInt)EXTEND_NUM_PEL_C);
4258
4259	xFrameChromaforOneSlice(pSlice, ComponentID, m_pcSliceYuvTmp, pcPicRestYuv, qh, iTap);
4260	}
4261	}
4262
4263	#endif
4264
4265
4266	#if MTK_SAO
4267	inline Double xRoundIbdi2(Double x)
4268	{
4269	return ((x)>0) ? (Int)(((Int)(x)+(1<<(g_uiBitIncrement-1)))/(1<<g_uiBitIncrement)) : ((Int)(((Int)(x)-(1<<(g_uiBitIncrement-1)))/(1<<g_uiBitIncrement)));
4270	}
4271
4272	inline Double xRoundIbdi(Double x)
4273	{
4274	return (g_uiBitIncrement >0 ? xRoundIbdi2((x)) : ((x)>=0 ? ((Int)((x)+0.5)) : ((Int)((x)-0.5)))) ;
4275	}
4276
4277	/** run QAO One Part.
4278	* \param pQAOOnePart, iPartIdx
4279	*/
4280	Void TEncSampleAdaptiveOffset::xQAOOnePart(SAOQTPart* pQAOOnePart, Int iPartIdx)
4281	{
4282	Int iTypeIdx;
4283	Int iNumTotalType = MAX_NUM_SAO_TYPE;
4284
4285	Int64 iEstDist;
4286	Int64 iOffsetOrg;
4287	Int64 iOffset;
4288	Int64 iCount;
4289	Int iClassIdx;
4290	Int uiShift = g_uiBitIncrement << 1;
4291	Double dAreaWeight = (pQAOOnePart->part_xe - pQAOOnePart->part_xs + 1) * (pQAOOnePart->part_ye - pQAOOnePart->part_ys + 1);
4292	Double dComplexityCost = 0;
4293	Int iQaoPara1 = SAO_RDCO;
4294
4295	UInt uiDepth = pQAOOnePart->PartLevel;
4296
4297	// m_iDistOrg [iPartIdx] = 0;
4298
4299	m_iDistOrg [iPartIdx] = (Int64)((Double)(iQaoPara1)/10000 * m_dLambdaLuma * dAreaWeight);
4300
4301	for (iTypeIdx=-1; iTypeIdx<iNumTotalType; iTypeIdx++)
4302	{
4303	if( m_bUseSBACRD )
4304	{
4305	m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]); // pcCU->getDepth(0) ==> m_puhDepth[uiIdx]
4306	m_pcRDGoOnSbacCoder->resetBits();
4307	}
4308	else
4309	{
4310	m_pcEntropyCoder->resetEntropy();
4311	m_pcEntropyCoder->resetBits();
4312	}
4313
4314	iEstDist = 0;
4315
4316	m_pcEntropyCoder->m_pcEntropyCoderIf->codeAoUvlc(iTypeIdx+1);
4317
4318	if (iTypeIdx>=0)
4319	{
4320
4321	for(iClassIdx=1; iClassIdx < m_iNumClass[iTypeIdx]+1; iClassIdx++)
4322	{
4323	if(m_iCount [iPartIdx][iTypeIdx][iClassIdx])
4324	{
4325	m_iOffset[iPartIdx][iTypeIdx][iClassIdx] = (Int64) xRoundIbdi((Double)(m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx]<<m_uiAoBitDepth) / (Double)m_iCount [iPartIdx][iTypeIdx][iClassIdx]);
4326	}
4327	else
4328	{
4329	m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx] = 0;
4330	m_iOffset[iPartIdx][iTypeIdx][iClassIdx] = 0;
4331	}
4332
4333	iCount = m_iCount [iPartIdx][iTypeIdx][iClassIdx];
4334	iOffset = m_iOffset[iPartIdx][iTypeIdx][iClassIdx] << (g_uiBitIncrement-m_uiAoBitDepth);
4335	iOffsetOrg = m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx];
4336	iEstDist += (( iCountiOffsetiOffset-iOffsetOrgiOffset2 ) >> uiShift);
4337	m_pcEntropyCoder->m_pcEntropyCoderIf->codeAoSvlc((Int)m_iOffset[iPartIdx][iTypeIdx][iClassIdx]);
4338	}
4339	m_iDist[iPartIdx][iTypeIdx] = iEstDist;
4340	m_iRate[iPartIdx][iTypeIdx] = m_pcEntropyCoder->getNumberOfWrittenBits();
4341
4342	m_dCost[iPartIdx][iTypeIdx] = (Double)((Double)m_iDist[iPartIdx][iTypeIdx] + m_dLambdaLuma * (Double) m_iRate[iPartIdx][iTypeIdx]);
4343	dComplexityCost = (Double)(iQaoPara1)/10000 * m_dLambdaLuma * (Double)m_iWeightAO[iTypeIdx] * dAreaWeight;
4344	m_dCost[iPartIdx][iTypeIdx] = (Double)((Double)m_iDist[iPartIdx][iTypeIdx] + m_dLambdaLuma * (Double) m_iRate[iPartIdx][iTypeIdx]) + dComplexityCost;
4345
4346	// printf("\n%3d:%10.f, %10.0f, %10.0f",iPartIdx,(Double)m_iDist[iPartIdx][iTypeIdx], dComplexityCost);
4347
4348	// printf("\n%d, %d, %6d, %6d, %f", iPartIdx, iTypeIdx, (Int)m_iDist[iPartIdx][iTypeIdx], (Int)m_iRate[iPartIdx][iTypeIdx], m_dCost[iPartIdx][iTypeIdx]);
4349	if(m_dCost[iPartIdx][iTypeIdx] < m_dCostPartBest[iPartIdx])
4350	{
4351	m_iDistOrg [iPartIdx] = (Int64)dComplexityCost;
4352	m_dCostPartBest[iPartIdx] = m_dCost[iPartIdx][iTypeIdx];
4353	m_iTypePartBest[iPartIdx] = iTypeIdx;
4354	if( m_bUseSBACRD )
4355	m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[pQAOOnePart->PartLevel][CI_TEMP_BEST] );
4356	}
4357	}
4358	else
4359	{
4360
4361	if(m_iDistOrg[iPartIdx] < m_dCostPartBest[iPartIdx] )
4362	{
4363	m_dCostPartBest[iPartIdx] = (Double) m_iDistOrg[iPartIdx] + m_pcEntropyCoder->getNumberOfWrittenBits()*m_dLambdaLuma ;
4364	m_iTypePartBest[iPartIdx] = -1;
4365	if( m_bUseSBACRD )
4366	m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[pQAOOnePart->PartLevel][CI_TEMP_BEST] );
4367	}
4368	}
4369	}
4370
4371	pQAOOnePart->bProcessed = true;
4372	pQAOOnePart->bSplit = false;
4373	pQAOOnePart->iMinDist = m_iTypePartBest[iPartIdx] >= 0 ? m_iDist[iPartIdx][m_iTypePartBest[iPartIdx]] : m_iDistOrg[iPartIdx];
4374	pQAOOnePart->iMinRate = (Int) (m_iTypePartBest[iPartIdx] >= 0 ? m_iRate[iPartIdx][m_iTypePartBest[iPartIdx]] : 0);
4375	pQAOOnePart->dMinCost = pQAOOnePart->iMinDist + m_dLambdaLuma * pQAOOnePart->iMinRate;
4376	pQAOOnePart->iBestType = m_iTypePartBest[iPartIdx];
4377	if (pQAOOnePart->iBestType != -1)
4378	{
4379	pQAOOnePart->bEnableFlag = 1;
4380	pQAOOnePart->iLength = m_iNumClass[m_psQAOPart[iPartIdx].iBestType];
4381	for (Int i=0; i<pQAOOnePart->iLength ; i++)
4382	pQAOOnePart->iOffset[i] = (Int) m_iOffset[iPartIdx][pQAOOnePart->iBestType][i+1];
4383	}
4384	else
4385	{
4386	pQAOOnePart->bEnableFlag = 0;
4387	pQAOOnePart->iLength = 0;
4388	}
4389
4390	}
4391
4392	/** run Part Tree Disable.
4393	* \param pQAOOnePart, iPartIdx
4394	*/
4395	Void TEncSampleAdaptiveOffset::xPartTreeDisable(Int iPartIdx)
4396	{
4397	SAOQTPart* pQAOPart= &(m_psQAOPart[iPartIdx]);
4398
4399	pQAOPart->bEnableFlag = false;
4400	pQAOPart->bSplit = false;
4401	pQAOPart->iLength = 0;
4402	pQAOPart->iBestType = -1;
4403
4404	if (pQAOPart->PartLevel < m_uiMaxSplitLevel)
4405	{
4406	for (Int i=0; i<NUM_DOWN_PART; i++)
4407	{
4408	xPartTreeDisable(pQAOPart->DownPartsIdx[i]);
4409	}
4410	}
4411
4412	}
4413
4414	/** run QuadTree Decision Function.
4415	* \param iPartIdx, pcPicOrg, pcPicDec, pcPicRest, &dCostFinal
4416	*/
4417	Void TEncSampleAdaptiveOffset::xQuadTreeDecisionFunc(Int iPartIdx, TComPicYuv* pcPicOrg, TComPicYuv* pcPicDec, TComPicYuv* pcPicRest, Double &dCostFinal)
4418	{
4419	SAOQTPart* pQAOPart= &(m_psQAOPart[iPartIdx]);
4420	UInt uiDepth = pQAOPart->PartLevel;
4421	UInt uhNextDepth = uiDepth+1;
4422
4423	if (iPartIdx == 0)
4424	{
4425	dCostFinal = 0;
4426	}
4427
4428	//QAO for this part
4429	if(!pQAOPart->bProcessed)
4430	{
4431	xQAOOnePart (pQAOPart, iPartIdx);
4432	}
4433
4434	//QAO for sub 4 parts
4435	if (pQAOPart->PartLevel < m_uiMaxSplitLevel)
4436	{
4437	Double dCostNotSplit = m_dLambdaLuma + pQAOPart->dMinCost;
4438	Double dCostSplit = m_dLambdaLuma;
4439
4440	for (Int i=0; i< NUM_DOWN_PART ;i++)
4441	{
4442	if( m_bUseSBACRD )
4443	{
4444	if ( 0 == iPartIdx) //initialize RD with previous depth buffer
4445	m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
4446	else
4447	m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
4448	}
4449	xQuadTreeDecisionFunc(pQAOPart->DownPartsIdx[i], pcPicOrg, pcPicDec, pcPicRest, dCostFinal);
4450	dCostSplit += dCostFinal;
4451	if( m_bUseSBACRD )
4452	{
4453	m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
4454	}
4455	}
4456
4457
4458	if(dCostSplit < dCostNotSplit)
4459	{
4460	dCostFinal = dCostSplit;
4461	pQAOPart->bSplit = true;
4462	pQAOPart->bEnableFlag = false;
4463	pQAOPart->iLength = 0;
4464	pQAOPart->iBestType = -1;
4465	}
4466	else
4467	{
4468	dCostFinal = dCostNotSplit;
4469	pQAOPart->bSplit = false;
4470	for (Int i=0; i<NUM_DOWN_PART; i++)
4471	{
4472	xPartTreeDisable(pQAOPart->DownPartsIdx[i]);
4473	}
4474	}
4475	}
4476	else
4477	{
4478	dCostFinal = pQAOPart->dMinCost;
4479	}
4480	}
4481	/** destory TEncSampleAdaptiveOffset class.
4482	* \param
4483	*/
4484	Void TEncSampleAdaptiveOffset::destoryEncBuffer()
4485	{
4486
4487	for (Int i=0;i<m_iNumTotalParts;i++)
4488	{
4489	for (Int j=0;j<MAX_NUM_SAO_TYPE;j++)
4490	{
4491	if (m_iCount [i][j])
4492	{
4493	delete [] m_iCount [i][j];
4494	}
4495	if (m_iOffset[i][j])
4496	{
4497	delete [] m_iOffset[i][j];
4498	}
4499	if (m_iOffsetOrg[i][j])
4500	{
4501	delete [] m_iOffsetOrg[i][j];
4502	}
4503	}
4504	if (m_iRate[i])
4505	{
4506	delete [] m_iRate[i];
4507	}
4508	if (m_iDist[i])
4509	{
4510	delete [] m_iDist[i];
4511	}
4512	if (m_dCost[i])
4513	{
4514	delete [] m_dCost[i];
4515	}
4516	if (m_iCount [i])
4517	{
4518	delete [] m_iCount [i];
4519	}
4520	if (m_iOffset[i])
4521	{
4522	delete [] m_iOffset[i];
4523	}
4524	if (m_iOffsetOrg[i])
4525	{
4526	delete [] m_iOffsetOrg[i];
4527	}
4528
4529	}
4530	if (m_iDistOrg)
4531	{
4532	delete [] m_iDistOrg ; m_iDistOrg = NULL;
4533	}
4534	if (m_dCostPartBest)
4535	{
4536	delete [] m_dCostPartBest ; m_dCostPartBest = NULL;
4537	}
4538	if (m_iTypePartBest)
4539	{
4540	delete [] m_iTypePartBest ; m_iTypePartBest = NULL;
4541	}
4542	if (m_iRate)
4543	{
4544	delete [] m_iRate ; m_iRate = NULL;
4545	}
4546	if (m_iDist)
4547	{
4548	delete [] m_iDist ; m_iDist = NULL;
4549	}
4550	if (m_dCost)
4551	{
4552	delete [] m_dCost ; m_dCost = NULL;
4553	}
4554	if (m_iCount)
4555	{
4556	delete [] m_iCount ; m_iCount = NULL;
4557	}
4558	if (m_iOffset)
4559	{
4560	delete [] m_iOffset ; m_iOffset = NULL;
4561	}
4562	if (m_iOffsetOrg)
4563	{
4564	delete [] m_iOffsetOrg ; m_iOffsetOrg = NULL;
4565	}
4566
4567
4568	}
4569	Void TEncSampleAdaptiveOffset::createEncBuffer()
4570	{
4571	m_iDistOrg = new Int64 [m_iNumTotalParts];
4572	m_dCostPartBest = new Double [m_iNumTotalParts];
4573	m_iTypePartBest = new Int [m_iNumTotalParts];
4574
4575	m_iRate = new Int64* [m_iNumTotalParts];
4576	m_iDist = new Int64* [m_iNumTotalParts];
4577	m_dCost = new Double*[m_iNumTotalParts];
4578
4579	m_iCount = new Int64 **[m_iNumTotalParts];
4580	m_iOffset = new Int64 **[m_iNumTotalParts];
4581	m_iOffsetOrg = new Int64 **[m_iNumTotalParts];
4582
4583	for (Int i=0;i<m_iNumTotalParts;i++)
4584	{
4585	m_iRate[i] = new Int64 [MAX_NUM_SAO_TYPE];
4586	m_iDist[i] = new Int64 [MAX_NUM_SAO_TYPE];
4587	m_dCost[i] = new Double [MAX_NUM_SAO_TYPE];
4588
4589	m_iCount [i] = new Int64 *[MAX_NUM_SAO_TYPE];
4590	m_iOffset[i] = new Int64 *[MAX_NUM_SAO_TYPE];
4591	m_iOffsetOrg[i] = new Int64 *[MAX_NUM_SAO_TYPE];
4592
4593	for (Int j=0;j<MAX_NUM_SAO_TYPE;j++)
4594	{
4595	m_iCount [i][j] = new Int64 [MAX_NUM_QAO_CLASS];
4596	m_iOffset[i][j] = new Int64 [MAX_NUM_QAO_CLASS];
4597	m_iOffsetOrg[i][j]= new Int64 [MAX_NUM_QAO_CLASS];
4598	}
4599	}
4600
4601	}
4602
4603	/** start Sao Encoder.
4604	* \param pcPic, pcEntropyCoder, pppcRDSbacCoder, pcRDGoOnSbacCoder
4605	*/
4606	Void TEncSampleAdaptiveOffset::startSaoEnc( TComPic* pcPic, TEncEntropy* pcEntropyCoder, TEncSbac*** pppcRDSbacCoder, TEncSbac* pcRDGoOnSbacCoder)
4607	{
4608	if( pcRDGoOnSbacCoder )
4609	m_bUseSBACRD = true;
4610	else
4611	m_bUseSBACRD = false;
4612
4613	m_pcPic = pcPic;
4614	m_pcEntropyCoder = pcEntropyCoder;
4615
4616	m_pppcRDSbacCoder = pppcRDSbacCoder;
4617	m_pcRDGoOnSbacCoder = pcRDGoOnSbacCoder;
4618	m_pcEntropyCoder->resetEntropy();
4619	m_pcEntropyCoder->resetBits();
4620
4621	if( m_bUseSBACRD )
4622	{
4623	m_pcRDGoOnSbacCoder->store( m_pppcRDSbacCoder[0][CI_NEXT_BEST]);
4624	m_pppcRDSbacCoder[0][CI_CURR_BEST]->load( m_pppcRDSbacCoder[0][CI_NEXT_BEST]);
4625	}
4626
4627	m_bSaoFlag = 0;
4628	for (Int i=0;i<m_iNumTotalParts;i++)
4629	{
4630	m_dCostPartBest[i] = MAX_DOUBLE;
4631	m_iTypePartBest[i] = -1;
4632	m_iDistOrg[i] = 0;
4633	for (Int j=0;j<MAX_NUM_SAO_TYPE;j++)
4634	{
4635	m_iDist[i][j] = 0;
4636	m_iRate[i][j] = 0;
4637	m_dCost[i][j] = 0;
4638	for (Int k=0;k<MAX_NUM_QAO_CLASS;k++)
4639	{
4640	m_iCount [i][j][k] = 0;
4641	m_iOffset[i][j][k] = 0;
4642	m_iOffsetOrg[i][j][k] = 0;
4643	}
4644	}
4645	}
4646
4647	for(Int i=0; i< m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; i++)
4648	{
4649	m_psQAOPart[i].bEnableFlag = 0;
4650	m_psQAOPart[i].iBestType = -1;
4651	m_psQAOPart[i].iLength = 0;
4652	m_psQAOPart[i].bSplit = false;
4653	m_psQAOPart[i].bProcessed = false;
4654	m_psQAOPart[i].dMinCost = MAX_DOUBLE;
4655	m_psQAOPart[i].iMinDist = MAX_INT;
4656	m_psQAOPart[i].iMinRate = MAX_INT;
4657
4658	for (Int j=0;j<MAX_NUM_QAO_CLASS;j++)
4659	{
4660	m_psQAOPart[i].iOffset[j] = 0;
4661	}
4662	}
4663
4664	for(Int i=0; i< m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; i++)
4665	{
4666	m_psQAOPart[i].bEnableFlag = 0;
4667	m_psQAOPart[i].iBestType = -1;
4668	m_psQAOPart[i].iLength = 0;
4669	for (Int j=0;j<MAX_NUM_QAO_CLASS;j++)
4670	{
4671	m_psQAOPart[i].iOffset[j] = 0;
4672	}
4673	}
4674
4675	}
4676
4677	/** end Sao Encoder.
4678	* \param
4679	*/
4680	Void TEncSampleAdaptiveOffset::endSaoEnc()
4681	{
4682	m_pcPic = NULL;
4683	m_pcEntropyCoder = NULL;
4684	}
4685
4686	inline int xSign(int x)
4687	{
4688	return ((x >> 31) \| ((int)( (((unsigned int) -x)) >> 31)));
4689	}
4690	/** calculate Ao Stats Cu
4691	* \param iAddr, iPartIdx
4692	*/
4693	Void TEncSampleAdaptiveOffset::calcAoStatsCu(Int iAddr, Int iPartIdx)
4694	{
4695	Int x,y;
4696	TComDataCU *pTmpCu = m_pcPic->getCU(iAddr);
4697	TComSPS *pTmpSPS = m_pcPic->getSlice(0)->getSPS();
4698
4699
4700	Pel* pOrg ;
4701	Pel* pRec ;
4702	Int iStride = m_pcPic->getStride();
4703	Int iLcuWidth = pTmpSPS->getMaxCUHeight();
4704	Int iLcuHeight = pTmpSPS->getMaxCUWidth();
4705	Int iPicWidth = pTmpSPS->getWidth();
4706	Int iPicHeight = pTmpSPS->getHeight();
4707	UInt uiLPelX = pTmpCu->getCUPelX();
4708	UInt uiRPelX = uiLPelX + iLcuWidth;
4709	UInt uiTPelY = pTmpCu->getCUPelY();
4710	UInt uiBPelY = uiTPelY + iLcuHeight;
4711	uiRPelX = uiRPelX > iPicWidth ? iPicWidth : uiRPelX;
4712	uiBPelY = uiBPelY > iPicHeight? iPicHeight: uiBPelY;
4713	iLcuWidth = uiRPelX - uiLPelX;
4714	iLcuHeight = uiBPelY - uiTPelY;
4715	Int64* iStats ;
4716	Int64* iCount ;
4717	Int iClassIdx;
4718
4719
4720	// if(m_iAoType == BO_0 \|\| m_iAoType == BO_1)
4721	{
4722	iStats = m_iOffsetOrg[iPartIdx][SAO_BO_0];
4723	iCount = m_iCount [iPartIdx][SAO_BO_0];
4724
4725	pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
4726	pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
4727
4728	for (y=0; y<iLcuHeight; y++)
4729	{
4730	for (x=0; x<iLcuWidth; x++)
4731	{
4732	iClassIdx = m_ppLumaTableBo0[pRec[x]];
4733	if (iClassIdx)
4734	{
4735	iStats[iClassIdx] += (pOrg[x] - pRec[x]);
4736	iCount[iClassIdx] ++;
4737	}
4738	}
4739	pOrg += iStride;
4740	pRec += iStride;
4741	}
4742
4743	iStats = m_iOffsetOrg[iPartIdx][SAO_BO_1];
4744	iCount = m_iCount [iPartIdx][SAO_BO_1];
4745
4746	pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
4747	pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
4748
4749	for (y=0; y<iLcuHeight; y++)
4750	{
4751	for (x=0; x<iLcuWidth; x++)
4752	{
4753	iClassIdx = m_ppLumaTableBo1[pRec[x]];
4754	if (iClassIdx)
4755	{
4756	iStats[iClassIdx] += (pOrg[x] - pRec[x]);
4757	iCount[iClassIdx] ++;
4758	}
4759	}
4760	pOrg += iStride;
4761	pRec += iStride;
4762	}
4763	}
4764
4765	Int iSignLeft;
4766	Int iSignRight;
4767	Int iSignDown;
4768	Int iSignDown1;
4769	Int iSignDown2;
4770
4771	UInt uiEdgeType;
4772
4773	// if (m_iAoType == EO_0 \|\| m_iAoType == EO_1 \|\| m_iAoType == EO_2 \|\| m_iAoType == EO_3)
4774	{
4775	// if (m_iAoType == EO_0 )
4776	{
4777	iStats = m_iOffsetOrg[iPartIdx][SAO_EO_0];
4778	iCount = m_iCount [iPartIdx][SAO_EO_0];
4779
4780	pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
4781	pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
4782	for (y=0; y<iLcuHeight; y++)
4783	{
4784	iSignLeft = xSign(pRec[1] - pRec[0]);
4785	for (x=1; x<iLcuWidth-1; x++)
4786	{
4787	iSignRight = xSign(pRec[x] - pRec[x+1]);
4788	uiEdgeType = iSignRight + iSignLeft + 2;
4789	iSignLeft = -iSignRight;
4790
4791	iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
4792	iCount[m_auiEoTable[uiEdgeType]] ++;
4793	}
4794	pOrg += iStride;
4795	pRec += iStride;
4796	}
4797	}
4798
4799	// if (m_iAoType == EO_1 )
4800	{
4801	iStats = m_iOffsetOrg[iPartIdx][SAO_EO_1];
4802	iCount = m_iCount [iPartIdx][SAO_EO_1];
4803
4804	pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
4805	pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
4806	pOrg += iStride;
4807	pRec += iStride;
4808
4809	for (x=0; x< iLcuWidth; x++)
4810	{
4811	m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride]);
4812	}
4813
4814	for (y=1; y<iLcuHeight-1; y++)
4815	{
4816	for (x=0; x<iLcuWidth; x++)
4817	{
4818
4819	iSignDown = xSign(pRec[x] - pRec[x+iStride]);
4820	uiEdgeType = iSignDown + m_iUpBuff1[x] + 2;
4821	m_iUpBuff1[x]= -iSignDown;
4822
4823	iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
4824	iCount[m_auiEoTable[uiEdgeType]] ++;
4825
4826	}
4827	pOrg += iStride;
4828	pRec += iStride;
4829	}
4830	}
4831	// if (m_iAoType == EO_2 )
4832	{
4833	iStats = m_iOffsetOrg[iPartIdx][SAO_EO_2];
4834	iCount = m_iCount [iPartIdx][SAO_EO_2];
4835
4836	pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
4837	pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
4838	pOrg += iStride;
4839	pRec += iStride;
4840	for (x=1; x<iLcuWidth; x++)
4841	{
4842	m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride-1]);
4843	}
4844	for (y=1; y<iLcuHeight-1; y++)
4845	{
4846	iSignDown2 = xSign(pRec[iStride] - pRec[0]);
4847	for (x=1; x<iLcuWidth-1; x++)
4848	{
4849	iSignDown1 = xSign(pRec[x] - pRec[x+iStride+1]) ;
4850	uiEdgeType = iSignDown1 + m_iUpBuff1[x] + 2;
4851	m_iUpBufft[x+1] = -iSignDown1;
4852	iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
4853	iCount[m_auiEoTable[uiEdgeType]] ++;
4854	}
4855	m_iUpBufft[1] = iSignDown2;
4856	ipSwap = m_iUpBuff1;
4857	m_iUpBuff1 = m_iUpBufft;
4858	m_iUpBufft = ipSwap;
4859
4860	pRec += iStride;
4861	pOrg += iStride;
4862	}
4863	}
4864	// if (m_iAoType == EO_3 )
4865	{
4866	iStats = m_iOffsetOrg[iPartIdx][SAO_EO_3];
4867	iCount = m_iCount [iPartIdx][SAO_EO_3];
4868
4869	pOrg = m_pcPic->getPicYuvOrg()->getLumaAddr(iAddr);
4870	pRec = m_pcPic->getPicYuvRec()->getLumaAddr(iAddr);
4871	pOrg += iStride;
4872	pRec += iStride;
4873	for (x=0; x<iLcuWidth-1; x++)
4874	{
4875	m_iUpBuff1[x] = xSign(pRec[x] - pRec[x-iStride+1]);
4876	}
4877
4878	for (y=1; y<iLcuHeight-1; y++)
4879	{
4880	for (x=1; x<iLcuWidth-1; x++)
4881	{
4882	iSignDown1 = xSign(pRec[x] - pRec[x+iStride-1]) ;
4883	uiEdgeType = iSignDown1 + m_iUpBuff1[x] + 2;
4884	m_iUpBuff1[x-1] = -iSignDown1;
4885	iStats[m_auiEoTable[uiEdgeType]] += (pOrg[x] - pRec[x]);
4886	iCount[m_auiEoTable[uiEdgeType]] ++;
4887	}
4888	m_iUpBuff1[iLcuWidth-2] = xSign(pRec[iLcuWidth-2 + iStride] - pRec[iLcuWidth-1]);
4889
4890	pRec += iStride;
4891	pOrg += iStride;
4892	}
4893	}
4894	}
4895
4896	}
4897
4898	/** run get QAO Stats
4899	* \param pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt
4900	*/
4901	Void TEncSampleAdaptiveOffset::xGetQAOStats(TComPicYuv* pcPicYuvOrg, TComPicYuv* pcPicYuvRec, TComPicYuv* pcPicYuvExt)
4902	{
4903	Int iLevelIdx, iPartIdx, iTypeIdx, iClassIdx;
4904	Int i;
4905	Int iNumTotalType = MAX_NUM_SAO_TYPE;
4906	Int LcuIdxX;
4907	Int LcuIdxY;
4908	Int iAddr;
4909	Int iFrameWidthInCU = m_pcPic->getFrameWidthInCU();
4910	Int iDownPartIdx;
4911	Int iPartStart;
4912	Int iPartEnd;
4913
4914	if (m_uiMaxSplitLevel == 0)
4915	{
4916	iPartIdx = 0;
4917	for (LcuIdxY = m_psQAOPart[iPartIdx].StartCUY; LcuIdxY<= m_psQAOPart[iPartIdx].EndCUY; LcuIdxY++)
4918	{
4919	for (LcuIdxX = m_psQAOPart[iPartIdx].StartCUX; LcuIdxX<= m_psQAOPart[iPartIdx].EndCUX; LcuIdxX++)
4920	{
4921	iAddr = LcuIdxY*iFrameWidthInCU + LcuIdxX;
4922	calcAoStatsCu(iAddr, iPartIdx);
4923	}
4924	}
4925
4926	}
4927	else
4928	{
4929	for(iPartIdx=m_aiNumCulPartsLevel[m_uiMaxSplitLevel-1]; iPartIdx<m_aiNumCulPartsLevel[m_uiMaxSplitLevel]; iPartIdx++)
4930	{
4931	for (LcuIdxY = m_psQAOPart[iPartIdx].StartCUY; LcuIdxY<= m_psQAOPart[iPartIdx].EndCUY; LcuIdxY++)
4932	{
4933	for (LcuIdxX = m_psQAOPart[iPartIdx].StartCUX; LcuIdxX<= m_psQAOPart[iPartIdx].EndCUX; LcuIdxX++)
4934	{
4935	iAddr = LcuIdxY*iFrameWidthInCU + LcuIdxX;
4936	calcAoStatsCu(iAddr, iPartIdx);
4937	}
4938	}
4939	}
4940	for (iLevelIdx=m_uiMaxSplitLevel-1; iLevelIdx>=0; iLevelIdx--)
4941	{
4942	iPartStart = (iLevelIdx > 0) ? m_aiNumCulPartsLevel[iLevelIdx-1] : 0;
4943	iPartEnd = m_aiNumCulPartsLevel[iLevelIdx];
4944	for(iPartIdx = iPartStart; iPartIdx < iPartEnd; iPartIdx++)
4945	{
4946	for (i=0; i<NUM_DOWN_PART; i++)
4947	{
4948	iDownPartIdx = m_psQAOPart[iPartIdx].DownPartsIdx[i];
4949	for (iTypeIdx=0; iTypeIdx<iNumTotalType; iTypeIdx++)
4950	{
4951	for (iClassIdx=0; iClassIdx<m_iNumClass[iTypeIdx]+1; iClassIdx++)
4952	{
4953	m_iOffsetOrg[iPartIdx][iTypeIdx][iClassIdx] += m_iOffsetOrg[iDownPartIdx][iTypeIdx][iClassIdx];
4954	m_iCount [iPartIdx][iTypeIdx][iClassIdx] += m_iCount [iDownPartIdx][iTypeIdx][iClassIdx];
4955	}
4956	}
4957	}
4958	}
4959	}
4960	}
4961	}
4962
4963	/** Sample adaptive offset Process
4964	* \param dLambda
4965	*/
4966	Void TEncSampleAdaptiveOffset::SAOProcess( Double dLambda)
4967	{
4968	// set lambda
4969	TComPicYuv* pcPicYuvOrg = m_pcPic->getPicYuvOrg();
4970	TComPicYuv* pcPicYuvRec = m_pcPic->getPicYuvRec();
4971
4972	TComPicYuv* pcPicYuvExt = NULL;
4973
4974	m_eSliceType = m_pcPic->getSlice(0)->getSliceType();
4975	m_iPicNalReferenceIdc = (m_pcPic->getSlice(0)->isReferenced() ? 1 :0);
4976
4977	m_dLambdaLuma = dLambda;
4978	m_dLambdaChroma = dLambda;
4979
4980	if (g_uiBitIncrement>1)
4981	{
4982	m_uiAoBitDepth = 1;
4983	}
4984	else
4985	{
4986	m_uiAoBitDepth = 0;
4987	}
4988
4989	Double dCostFinal = 0;
4990
4991	xGetQAOStats(pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt);
4992	xQuadTreeDecisionFunc(0, pcPicYuvOrg, pcPicYuvRec, pcPicYuvExt, dCostFinal);
4993	m_bSaoFlag = dCostFinal < m_iDistOrg[0] ? 1:0;
4994
4995
4996	if(m_bSaoFlag)
4997	{
4998	xProcessQuadTreeAo( 0, pcPicYuvRec, pcPicYuvExt);
4999	}
5000
5001	}
5002
5003
5004	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: