1 /*****************************************************************************
2  *
3  *  XVID MPEG-4 VIDEO CODEC
4  *  - PSNR-HVS-M plugin: computes the PSNR-HVS-M metric -
5  *
6  *  Copyright(C) 2010 Michael Militzer <michael@xvid.org>
7  *
8  *  This program is free software ; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation ; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program ; if not, write to the Free Software
20  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21  *
22  * $Id: plugin_psnrhvsm.c 1985 2011-05-18 09:02:35Z Isibaar $
23  *
24  ****************************************************************************/
25 
26 /*****************************************************************************
27  *
28  * The PSNR-HVS-M metric is described in the following paper:
29  *
30  * "On between-coefficient contrast masking of DCT basis functions", by
31  * N. Ponomarenko, F. Silvestri, K. Egiazarian, M. Carli, J. Astola, V. Lukin,
32  * in Proceedings of the Third International Workshop on Video Processing and
33  * Quality Metrics for Consumer Electronics VPQM-07, January, 2007, 4 p.
34  *
35  * http://www.ponomarenko.info/psnrhvsm.htm
36  *
37  ****************************************************************************/
38 
39 #include <stdlib.h>
40 #include <stdio.h>
41 #include <math.h>
42 #include "../portab.h"
43 #include "../xvid.h"
44 #include "../dct/fdct.h"
45 #include "../image/image.h"
46 #include "../motion/sad.h"
47 #include "../utils/mem_transfer.h"
48 #include "../utils/emms.h"
49 
50 typedef struct {
51 
52 	uint64_t mse_sum_y; /* for avrg psnr-hvs-m */
53 	uint64_t mse_sum_u;
54 	uint64_t mse_sum_v;
55 
56 	long frame_cnt;
57 
58 } psnrhvsm_data_t; /* internal plugin data */
59 
60 
61 #if 0 /* Floating-point implementation: Slow but accurate */
62 
63 static const float CSF_Coeff[64] = {
64 	1.608443f, 2.339554f, 2.573509f, 1.608443f, 1.072295f, 0.643377f, 0.504610f, 0.421887f,
65 	2.144591f, 2.144591f, 1.838221f, 1.354478f, 0.989811f, 0.443708f, 0.428918f, 0.467911f,
66 	1.838221f, 1.979622f, 1.608443f, 1.072295f, 0.643377f, 0.451493f, 0.372972f, 0.459555f,
67 	1.838221f, 1.513829f, 1.169777f, 0.887417f, 0.504610f, 0.295806f, 0.321689f, 0.415082f,
68 	1.429727f, 1.169777f, 0.695543f, 0.459555f, 0.378457f, 0.236102f, 0.249855f, 0.334222f,
69 	1.072295f, 0.735288f, 0.467911f, 0.402111f, 0.317717f, 0.247453f, 0.227744f, 0.279729f,
70 	0.525206f, 0.402111f, 0.329937f, 0.295806f, 0.249855f, 0.212687f, 0.214459f, 0.254803f,
71 	0.357432f, 0.279729f, 0.270896f, 0.262603f, 0.229778f, 0.257351f, 0.249855f, 0.259950f
72 };
73 
74 static const float Mask_Coeff[64] = {
75 	0.000000f, 0.826446f, 1.000000f, 0.390625f, 0.173611f, 0.062500f, 0.038447f, 0.026874f,
76 	0.694444f, 0.694444f, 0.510204f, 0.277008f, 0.147929f, 0.029727f, 0.027778f, 0.033058f,
77 	0.510204f, 0.591716f, 0.390625f, 0.173611f, 0.062500f, 0.030779f, 0.021004f, 0.031888f,
78 	0.510204f, 0.346021f, 0.206612f, 0.118906f, 0.038447f, 0.013212f, 0.015625f, 0.026015f,
79 	0.308642f, 0.206612f, 0.073046f, 0.031888f, 0.021626f, 0.008417f, 0.009426f, 0.016866f,
80 	0.173611f, 0.081633f, 0.033058f, 0.024414f, 0.015242f, 0.009246f, 0.007831f, 0.011815f,
81 	0.041649f, 0.024414f, 0.016437f, 0.013212f, 0.009426f, 0.006830f, 0.006944f, 0.009803f,
82 	0.019290f, 0.011815f, 0.011080f, 0.010412f, 0.007972f, 0.010000f, 0.009426f, 0.010203f
83 };
84 
85 static uint32_t calc_SSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride)
86 {
87 	int x, y, i, j;
88 	uint32_t Global_A, Global_B, Sum_A = 0, Sum_B = 0;
89 	uint32_t Local[8] = {0, 0, 0, 0, 0, 0, 0, 0};
90 	uint32_t Local_Square[8] = {0, 0, 0, 0, 0, 0, 0, 0};
91 	float MASK_A = 0.f, MASK_B = 0.f;
92 	float Mult1 = 1.f, Mult2 = 1.f;
93 	uint32_t MSE_H = 0;
94 
95 	/* Step 1: Calculate CSF weighted energy of DCT coefficients */
96 	for (y = 0; y < 8; y++) {
97 		for (x = 0; x < 8; x++) {
98 			MASK_A += (float)(DCT_A[y*8 + x]*DCT_A[y*8 + x])*Mask_Coeff[y*8 + x];
99 			MASK_B += (float)(DCT_B[y*8 + x]*DCT_B[y*8 + x])*Mask_Coeff[y*8 + x];
100 		}
101 	}
102 
103 	/* Step 2: Determine local variances compared to entire block variance */
104 	for (y = 0; y < 2; y++) {
105 		for (x = 0; x < 2; x++) {
106 			for (j = 0; j < 4; j++) {
107 				for (i = 0; i < 4; i++) {
108 					uint8_t A = IMG_A[(y*4+j)*stride + 4*x + i];
109 					uint8_t B = IMG_B[(y*4+j)*stride + 4*x + i];
110 
111 					Local[y*2 + x] += A;
112 					Local[y*2 + x + 4] += B;
113 					Local_Square[y*2 + x] += A*A;
114 					Local_Square[y*2 + x + 4] += B*B;
115 				}
116 			}
117 		}
118 	}
119 
120 	Global_A = Local[0] + Local[1] + Local[2] + Local[3];
121 	Global_B = Local[4] + Local[5] + Local[6] + Local[7];
122 
123 	for (i = 0; i < 8; i++)
124 		Local[i] = (Local_Square[i]<<4) - (Local[i]*Local[i]); /* 16*Var(Di) */
125 
126 	Local_Square[0] += (Local_Square[1] + Local_Square[2] + Local_Square[3]);
127 	Local_Square[4] += (Local_Square[5] + Local_Square[6] + Local_Square[7]);
128 
129 	Global_A = (Local_Square[0]<<6) - Global_A*Global_A; /* 64*Var(D) */
130 	Global_B = (Local_Square[4]<<6) - Global_B*Global_B; /* 64*Var(D) */
131 
132 	/* Step 3: Calculate contrast masking threshold */
133 	if (Global_A)
134 		Mult1 = (float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)(Global_A)/4.f);
135 
136 	if (Global_B)
137 		Mult2 = (float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)(Global_B)/4.f);
138 
139 	MASK_A = (float)sqrt(MASK_A * Mult1) / 32.f;
140 	MASK_B = (float)sqrt(MASK_B * Mult2) / 32.f;
141 
142 	if (MASK_B > MASK_A) MASK_A = MASK_B; /* MAX(MASK_A, MASK_B) */
143 
144 	/* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */
145 	for (j = 0; j < 8; j++) {
146 		for (i = 0; i < 8; i++) {
147 			float u = (float)abs(DCT_A[j*8 + i] - DCT_B[j*8 + i]);
148 
149 			if ((i|j)>0) {
150 				if (u < (MASK_A / Mask_Coeff[j*8 + i]))
151 					u = 0; /* The error is not perceivable */
152 				else
153 					u -= (MASK_A / Mask_Coeff[j*8 + i]);
154 			}
155 
156 			MSE_H += (uint32_t) ((16.f*(u * CSF_Coeff[j*8 + i])*(u * CSF_Coeff[j*8 + i])) + 0.5f);
157 		}
158 	}
159 	return MSE_H; /* Fixed-point value right-shifted by four */
160 }
161 
162 #else
163 
calc_SSE_H(int16_t * DCT_A,int16_t * DCT_B,uint8_t * IMG_A,uint8_t * IMG_B,int stride)164 static uint32_t calc_SSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride)
165 {
166 	DECLARE_ALIGNED_MATRIX(sums, 1, 8, uint16_t, CACHE_LINE);
167 	DECLARE_ALIGNED_MATRIX(squares, 1, 8, uint32_t, CACHE_LINE);
168 	uint32_t i, Global_A, Global_B, Sum_A = 0, Sum_B = 0;
169 	uint32_t local[8], MASK_A, MASK_B, Mult1 = 64, Mult2 = 64;
170 
171 	/* Step 1: Calculate CSF weighted energy of DCT coefficients */
172 
173 	Sum_A = coeff8_energy(DCT_A);
174 	Sum_B = coeff8_energy(DCT_B);
175 
176 	/* Step 2: Determine local variances compared to entire block variance */
177 
178 	Global_A = blocksum8(IMG_A, stride, sums, squares);
179 	Global_B = blocksum8(IMG_B, stride, &sums[4], &squares[4]);
180 
181 	for (i = 0; i < 8; i++)
182 		local[i] = (squares[i]<<4) - (sums[i]*sums[i]); /* 16*Var(Di) */
183 
184 	squares[0] += (squares[1] + squares[2] + squares[3]);
185 	squares[4] += (squares[5] + squares[6] + squares[7]);
186 
187 	Global_A = (squares[0]<<6) - Global_A*Global_A; /* 64*Var(D) */
188 	Global_B = (squares[4]<<6) - Global_B*Global_B; /* 64*Var(D) */
189 
190 	/* Step 3: Calculate contrast masking threshold */
191 
192 	if (Global_A)
193 		Mult1 = ((local[0]+local[1]+local[2]+local[3])<<8) / Global_A;
194 
195 	if (Global_B)
196 		Mult2 = ((local[4]+local[5]+local[6]+local[7])<<8) / Global_B;
197 
198 	MASK_A = isqrt(2*Sum_A*Mult1) + 16;
199 	MASK_B = isqrt(2*Sum_B*Mult2) + 16;
200 
201 	if (MASK_B > MASK_A)  /* MAX(MASK_A, MASK_B) */
202 		MASK_A = ((MASK_B + 32) >> 6);
203 	else
204 		MASK_A = ((MASK_A + 32) >> 6);
205 
206 	/* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */
207 
208 	return sseh8_16bit(DCT_A, DCT_B, (uint16_t) MASK_A);
209 }
210 
211 #endif
212 
psnrhvsm_after(xvid_plg_data_t * data,psnrhvsm_data_t * psnrhvsm)213 static void psnrhvsm_after(xvid_plg_data_t *data, psnrhvsm_data_t *psnrhvsm)
214 {
215 	DECLARE_ALIGNED_MATRIX(DCT, 2, 64, int16_t, CACHE_LINE);
216 	int32_t x, y, u, v;
217 	int16_t *DCT_A = &DCT[0], *DCT_B = &DCT[64];
218 	uint64_t sse_y = 0, sse_u = 0, sse_v = 0;
219 
220 	for (y = 0; y < data->height>>3; y++) {
221 		uint8_t *IMG_A = (uint8_t *) data->original.plane[0];
222 		uint8_t *IMG_B = (uint8_t *) data->current.plane[0];
223 		uint32_t stride = data->original.stride[0];
224 
225 		for (x = 0; x < data->width>>3; x++) { /* non multiple of 8 handling ?? */
226 			int offset = (y<<3)*stride + (x<<3);
227 
228 			emms();
229 
230 			/* Transfer data */
231 			transfer_8to16copy(DCT_A, IMG_A + offset, stride);
232 			transfer_8to16copy(DCT_B, IMG_B + offset, stride);
233 
234 			/* Perform DCT */
235 			fdct(DCT_A);
236 			fdct(DCT_B);
237 
238 			emms();
239 
240 			/* Calculate SSE_H reduced by contrast masking effect */
241 			sse_y += calc_SSE_H(DCT_A, DCT_B, IMG_A + offset, IMG_B + offset, stride);
242 		}
243 	}
244 
245 	for (y = 0; y < data->height>>4; y++) {
246 		uint8_t *U_A = (uint8_t *) data->original.plane[1];
247 		uint8_t *V_A = (uint8_t *) data->original.plane[2];
248 		uint8_t *U_B = (uint8_t *) data->current.plane[1];
249 		uint8_t *V_B = (uint8_t *) data->current.plane[2];
250 		uint32_t stride_uv = data->current.stride[1];
251 
252 		for (x = 0; x < data->width>>4; x++) { /* non multiple of 8 handling ?? */
253 			int offset = (y<<3)*stride_uv + (x<<3);
254 
255 			emms();
256 
257 			/* Transfer data */
258 			transfer_8to16copy(DCT_A, U_A + offset, stride_uv);
259 			transfer_8to16copy(DCT_B, U_B + offset, stride_uv);
260 
261 			/* Perform DCT */
262 			fdct(DCT_A);
263 			fdct(DCT_B);
264 
265 			emms();
266 
267 			/* Calculate SSE_H reduced by contrast masking effect */
268 			sse_u += calc_SSE_H(DCT_A, DCT_B, U_A + offset, U_B + offset, stride_uv);
269 
270 			emms();
271 
272 			/* Transfer data */
273 			transfer_8to16copy(DCT_A, V_A + offset, stride_uv);
274 			transfer_8to16copy(DCT_B, V_B + offset, stride_uv);
275 
276 			/* Perform DCT */
277 			fdct(DCT_A);
278 			fdct(DCT_B);
279 
280 			emms();
281 
282 			/* Calculate SSE_H reduced by contrast masking effect */
283 			sse_v += calc_SSE_H(DCT_A, DCT_B, V_A + offset, V_B + offset, stride_uv);
284 		}
285 	}
286 
287 	y = (int32_t) ( 4*16*sse_y / (data->width * data->height));
288 	u = (int32_t) (16*16*sse_u / (data->width * data->height));
289 	v = (int32_t) (16*16*sse_v / (data->width * data->height));
290 
291 	psnrhvsm->mse_sum_y += y;
292 	psnrhvsm->mse_sum_u += u;
293 	psnrhvsm->mse_sum_v += v;
294 	psnrhvsm->frame_cnt++;
295 
296 	printf("       psnrhvsm y: %2.2f, psnrhvsm u: %2.2f, psnrhvsm v: %2.2f\n", sse_to_PSNR(y, 1024), sse_to_PSNR(u, 1024), sse_to_PSNR(v, 1024));
297 }
298 
psnrhvsm_create(xvid_plg_create_t * create,void ** handle)299 static int psnrhvsm_create(xvid_plg_create_t *create, void **handle)
300 {
301 	psnrhvsm_data_t *psnrhvsm;
302 	psnrhvsm = (psnrhvsm_data_t *) malloc(sizeof(psnrhvsm_data_t));
303 
304 	psnrhvsm->mse_sum_y = 0;
305 	psnrhvsm->mse_sum_u = 0;
306 	psnrhvsm->mse_sum_v = 0;
307 
308 	psnrhvsm->frame_cnt = 0;
309 
310 	*(handle) = (void*) psnrhvsm;
311 	return 0;
312 }
313 
xvid_plugin_psnrhvsm(void * handle,int opt,void * param1,void * param2)314 int xvid_plugin_psnrhvsm(void *handle, int opt, void *param1, void *param2)
315 {
316 	switch(opt) {
317 		case(XVID_PLG_INFO):
318  			((xvid_plg_info_t *)param1)->flags = XVID_REQORIGINAL;
319 			break;
320 		case(XVID_PLG_CREATE):
321 			psnrhvsm_create((xvid_plg_create_t *)param1,(void **)param2);
322 			break;
323 		case(XVID_PLG_BEFORE):
324 		case(XVID_PLG_FRAME):
325 			break;
326 		case(XVID_PLG_AFTER):
327 			psnrhvsm_after((xvid_plg_data_t *)param1, (psnrhvsm_data_t *)handle);
328 			break;
329 		case(XVID_PLG_DESTROY):
330 			{
331 				uint32_t y, u, v;
332 				psnrhvsm_data_t *psnrhvsm = (psnrhvsm_data_t *)handle;
333 
334 				if (psnrhvsm) {
335 					y = (uint32_t) (psnrhvsm->mse_sum_y / psnrhvsm->frame_cnt);
336 					u = (uint32_t) (psnrhvsm->mse_sum_u / psnrhvsm->frame_cnt);
337 					v = (uint32_t) (psnrhvsm->mse_sum_v / psnrhvsm->frame_cnt);
338 
339 					emms();
340 					printf("Average psnrhvsm y: %2.2f, psnrhvsm u: %2.2f, psnrhvsm v: %2.2f\n",
341 						sse_to_PSNR(y, 1024), sse_to_PSNR(u, 1024), sse_to_PSNR(v, 1024));
342 					free(psnrhvsm);
343 				}
344 			}
345 			break;
346 		default:
347 			break;
348 	}
349 	return 0;
350 };
351