1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12 #include <time.h>
13
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17
18 #ifdef ENABLE_ROW_TESTS
19 #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
20 #endif
21
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24
25 namespace libyuv {
26
27 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)28 static int I420TestFilter(int src_width,
29 int src_height,
30 int dst_width,
31 int dst_height,
32 FilterMode f,
33 int benchmark_iterations,
34 int disable_cpu_flags,
35 int benchmark_cpu_info) {
36 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
37 return 0;
38 }
39
40 int i, j;
41 int src_width_uv = (Abs(src_width) + 1) >> 1;
42 int src_height_uv = (Abs(src_height) + 1) >> 1;
43
44 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
45 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
46
47 int src_stride_y = Abs(src_width);
48 int src_stride_uv = src_width_uv;
49
50 align_buffer_page_end(src_y, src_y_plane_size);
51 align_buffer_page_end(src_u, src_uv_plane_size);
52 align_buffer_page_end(src_v, src_uv_plane_size);
53 if (!src_y || !src_u || !src_v) {
54 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
55 return 0;
56 }
57 MemRandomize(src_y, src_y_plane_size);
58 MemRandomize(src_u, src_uv_plane_size);
59 MemRandomize(src_v, src_uv_plane_size);
60
61 int dst_width_uv = (dst_width + 1) >> 1;
62 int dst_height_uv = (dst_height + 1) >> 1;
63
64 int64_t dst_y_plane_size = (dst_width) * (dst_height);
65 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
66
67 int dst_stride_y = dst_width;
68 int dst_stride_uv = dst_width_uv;
69
70 align_buffer_page_end(dst_y_c, dst_y_plane_size);
71 align_buffer_page_end(dst_u_c, dst_uv_plane_size);
72 align_buffer_page_end(dst_v_c, dst_uv_plane_size);
73 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
74 align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
75 align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
76 if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
77 !dst_v_opt) {
78 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
79 return 0;
80 }
81
82 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
83 double c_time = get_time();
84 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
85 src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
86 dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
87 c_time = (get_time() - c_time);
88
89 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
90 double opt_time = get_time();
91 for (i = 0; i < benchmark_iterations; ++i) {
92 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
93 src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
94 dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
95 f);
96 }
97 opt_time = (get_time() - opt_time) / benchmark_iterations;
98 // Report performance of C vs OPT.
99 printf("filter %d - %8d us C - %8d us OPT\n", f,
100 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
101
102 // C version may be a little off from the optimized. Order of
103 // operations may introduce rounding somewhere. So do a difference
104 // of the buffers and look to see that the max difference is not
105 // over 3.
106 int max_diff = 0;
107 for (i = 0; i < (dst_height); ++i) {
108 for (j = 0; j < (dst_width); ++j) {
109 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
110 dst_y_opt[(i * dst_stride_y) + j]);
111 if (abs_diff > max_diff) {
112 max_diff = abs_diff;
113 }
114 }
115 }
116
117 for (i = 0; i < (dst_height_uv); ++i) {
118 for (j = 0; j < (dst_width_uv); ++j) {
119 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
120 dst_u_opt[(i * dst_stride_uv) + j]);
121 if (abs_diff > max_diff) {
122 max_diff = abs_diff;
123 }
124 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
125 dst_v_opt[(i * dst_stride_uv) + j]);
126 if (abs_diff > max_diff) {
127 max_diff = abs_diff;
128 }
129 }
130 }
131
132 free_aligned_buffer_page_end(dst_y_c);
133 free_aligned_buffer_page_end(dst_u_c);
134 free_aligned_buffer_page_end(dst_v_c);
135 free_aligned_buffer_page_end(dst_y_opt);
136 free_aligned_buffer_page_end(dst_u_opt);
137 free_aligned_buffer_page_end(dst_v_opt);
138 free_aligned_buffer_page_end(src_y);
139 free_aligned_buffer_page_end(src_u);
140 free_aligned_buffer_page_end(src_v);
141
142 return max_diff;
143 }
144
145 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
146 // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)147 static int I420TestFilter_16(int src_width,
148 int src_height,
149 int dst_width,
150 int dst_height,
151 FilterMode f,
152 int benchmark_iterations,
153 int disable_cpu_flags,
154 int benchmark_cpu_info) {
155 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156 return 0;
157 }
158
159 int i;
160 int src_width_uv = (Abs(src_width) + 1) >> 1;
161 int src_height_uv = (Abs(src_height) + 1) >> 1;
162
163 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
164 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
165
166 int src_stride_y = Abs(src_width);
167 int src_stride_uv = src_width_uv;
168
169 align_buffer_page_end(src_y, src_y_plane_size);
170 align_buffer_page_end(src_u, src_uv_plane_size);
171 align_buffer_page_end(src_v, src_uv_plane_size);
172 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
173 align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
174 align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
175 if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
176 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
177 return 0;
178 }
179 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
180 uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
181 uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
182
183 MemRandomize(src_y, src_y_plane_size);
184 MemRandomize(src_u, src_uv_plane_size);
185 MemRandomize(src_v, src_uv_plane_size);
186
187 for (i = 0; i < src_y_plane_size; ++i) {
188 p_src_y_16[i] = src_y[i];
189 }
190 for (i = 0; i < src_uv_plane_size; ++i) {
191 p_src_u_16[i] = src_u[i];
192 p_src_v_16[i] = src_v[i];
193 }
194
195 int dst_width_uv = (dst_width + 1) >> 1;
196 int dst_height_uv = (dst_height + 1) >> 1;
197
198 int dst_y_plane_size = (dst_width) * (dst_height);
199 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
200
201 int dst_stride_y = dst_width;
202 int dst_stride_uv = dst_width_uv;
203
204 align_buffer_page_end(dst_y_8, dst_y_plane_size);
205 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
206 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
207 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
208 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
209 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
210
211 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
212 uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
213 uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
214
215 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
216 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
217 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
218 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
219 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
220 for (i = 0; i < benchmark_iterations; ++i) {
221 I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
222 p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
223 dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
224 dst_stride_uv, dst_width, dst_height, f);
225 }
226
227 // Expect an exact match.
228 int max_diff = 0;
229 for (i = 0; i < dst_y_plane_size; ++i) {
230 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
231 if (abs_diff > max_diff) {
232 max_diff = abs_diff;
233 }
234 }
235 for (i = 0; i < dst_uv_plane_size; ++i) {
236 int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
237 if (abs_diff > max_diff) {
238 max_diff = abs_diff;
239 }
240 abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
241 if (abs_diff > max_diff) {
242 max_diff = abs_diff;
243 }
244 }
245
246 free_aligned_buffer_page_end(dst_y_8);
247 free_aligned_buffer_page_end(dst_u_8);
248 free_aligned_buffer_page_end(dst_v_8);
249 free_aligned_buffer_page_end(dst_y_16);
250 free_aligned_buffer_page_end(dst_u_16);
251 free_aligned_buffer_page_end(dst_v_16);
252 free_aligned_buffer_page_end(src_y);
253 free_aligned_buffer_page_end(src_u);
254 free_aligned_buffer_page_end(src_v);
255 free_aligned_buffer_page_end(src_y_16);
256 free_aligned_buffer_page_end(src_u_16);
257 free_aligned_buffer_page_end(src_v_16);
258
259 return max_diff;
260 }
261
262 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)263 static int I444TestFilter(int src_width,
264 int src_height,
265 int dst_width,
266 int dst_height,
267 FilterMode f,
268 int benchmark_iterations,
269 int disable_cpu_flags,
270 int benchmark_cpu_info) {
271 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
272 return 0;
273 }
274
275 int i, j;
276 int src_width_uv = Abs(src_width);
277 int src_height_uv = Abs(src_height);
278
279 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
280 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
281
282 int src_stride_y = Abs(src_width);
283 int src_stride_uv = src_width_uv;
284
285 align_buffer_page_end(src_y, src_y_plane_size);
286 align_buffer_page_end(src_u, src_uv_plane_size);
287 align_buffer_page_end(src_v, src_uv_plane_size);
288 if (!src_y || !src_u || !src_v) {
289 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
290 return 0;
291 }
292 MemRandomize(src_y, src_y_plane_size);
293 MemRandomize(src_u, src_uv_plane_size);
294 MemRandomize(src_v, src_uv_plane_size);
295
296 int dst_width_uv = dst_width;
297 int dst_height_uv = dst_height;
298
299 int64_t dst_y_plane_size = (dst_width) * (dst_height);
300 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
301
302 int dst_stride_y = dst_width;
303 int dst_stride_uv = dst_width_uv;
304
305 align_buffer_page_end(dst_y_c, dst_y_plane_size);
306 align_buffer_page_end(dst_u_c, dst_uv_plane_size);
307 align_buffer_page_end(dst_v_c, dst_uv_plane_size);
308 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
309 align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
310 align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
311 if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
312 !dst_v_opt) {
313 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
314 return 0;
315 }
316
317 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
318 double c_time = get_time();
319 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
320 src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
321 dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
322 c_time = (get_time() - c_time);
323
324 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
325 double opt_time = get_time();
326 for (i = 0; i < benchmark_iterations; ++i) {
327 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
328 src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
329 dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
330 f);
331 }
332 opt_time = (get_time() - opt_time) / benchmark_iterations;
333 // Report performance of C vs OPT.
334 printf("filter %d - %8d us C - %8d us OPT\n", f,
335 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
336
337 // C version may be a little off from the optimized. Order of
338 // operations may introduce rounding somewhere. So do a difference
339 // of the buffers and look to see that the max difference is not
340 // over 3.
341 int max_diff = 0;
342 for (i = 0; i < (dst_height); ++i) {
343 for (j = 0; j < (dst_width); ++j) {
344 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
345 dst_y_opt[(i * dst_stride_y) + j]);
346 if (abs_diff > max_diff) {
347 max_diff = abs_diff;
348 }
349 }
350 }
351
352 for (i = 0; i < (dst_height_uv); ++i) {
353 for (j = 0; j < (dst_width_uv); ++j) {
354 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
355 dst_u_opt[(i * dst_stride_uv) + j]);
356 if (abs_diff > max_diff) {
357 max_diff = abs_diff;
358 }
359 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
360 dst_v_opt[(i * dst_stride_uv) + j]);
361 if (abs_diff > max_diff) {
362 max_diff = abs_diff;
363 }
364 }
365 }
366
367 free_aligned_buffer_page_end(dst_y_c);
368 free_aligned_buffer_page_end(dst_u_c);
369 free_aligned_buffer_page_end(dst_v_c);
370 free_aligned_buffer_page_end(dst_y_opt);
371 free_aligned_buffer_page_end(dst_u_opt);
372 free_aligned_buffer_page_end(dst_v_opt);
373 free_aligned_buffer_page_end(src_y);
374 free_aligned_buffer_page_end(src_u);
375 free_aligned_buffer_page_end(src_v);
376
377 return max_diff;
378 }
379
380 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
381 // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)382 static int I444TestFilter_16(int src_width,
383 int src_height,
384 int dst_width,
385 int dst_height,
386 FilterMode f,
387 int benchmark_iterations,
388 int disable_cpu_flags,
389 int benchmark_cpu_info) {
390 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
391 return 0;
392 }
393
394 int i;
395 int src_width_uv = Abs(src_width);
396 int src_height_uv = Abs(src_height);
397
398 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
399 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
400
401 int src_stride_y = Abs(src_width);
402 int src_stride_uv = src_width_uv;
403
404 align_buffer_page_end(src_y, src_y_plane_size);
405 align_buffer_page_end(src_u, src_uv_plane_size);
406 align_buffer_page_end(src_v, src_uv_plane_size);
407 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
408 align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
409 align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
410 if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
411 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
412 return 0;
413 }
414 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
415 uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
416 uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
417
418 MemRandomize(src_y, src_y_plane_size);
419 MemRandomize(src_u, src_uv_plane_size);
420 MemRandomize(src_v, src_uv_plane_size);
421
422 for (i = 0; i < src_y_plane_size; ++i) {
423 p_src_y_16[i] = src_y[i];
424 }
425 for (i = 0; i < src_uv_plane_size; ++i) {
426 p_src_u_16[i] = src_u[i];
427 p_src_v_16[i] = src_v[i];
428 }
429
430 int dst_width_uv = dst_width;
431 int dst_height_uv = dst_height;
432
433 int dst_y_plane_size = (dst_width) * (dst_height);
434 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
435
436 int dst_stride_y = dst_width;
437 int dst_stride_uv = dst_width_uv;
438
439 align_buffer_page_end(dst_y_8, dst_y_plane_size);
440 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
441 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
442 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
443 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
444 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
445
446 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
447 uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
448 uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
449
450 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
451 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
452 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
453 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
454 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
455 for (i = 0; i < benchmark_iterations; ++i) {
456 I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
457 p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
458 dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
459 dst_stride_uv, dst_width, dst_height, f);
460 }
461
462 // Expect an exact match.
463 int max_diff = 0;
464 for (i = 0; i < dst_y_plane_size; ++i) {
465 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
466 if (abs_diff > max_diff) {
467 max_diff = abs_diff;
468 }
469 }
470 for (i = 0; i < dst_uv_plane_size; ++i) {
471 int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
472 if (abs_diff > max_diff) {
473 max_diff = abs_diff;
474 }
475 abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
476 if (abs_diff > max_diff) {
477 max_diff = abs_diff;
478 }
479 }
480
481 free_aligned_buffer_page_end(dst_y_8);
482 free_aligned_buffer_page_end(dst_u_8);
483 free_aligned_buffer_page_end(dst_v_8);
484 free_aligned_buffer_page_end(dst_y_16);
485 free_aligned_buffer_page_end(dst_u_16);
486 free_aligned_buffer_page_end(dst_v_16);
487 free_aligned_buffer_page_end(src_y);
488 free_aligned_buffer_page_end(src_u);
489 free_aligned_buffer_page_end(src_v);
490 free_aligned_buffer_page_end(src_y_16);
491 free_aligned_buffer_page_end(src_u_16);
492 free_aligned_buffer_page_end(src_v_16);
493
494 return max_diff;
495 }
496
497 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
NV12TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)498 static int NV12TestFilter(int src_width,
499 int src_height,
500 int dst_width,
501 int dst_height,
502 FilterMode f,
503 int benchmark_iterations,
504 int disable_cpu_flags,
505 int benchmark_cpu_info) {
506 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
507 return 0;
508 }
509
510 int i, j;
511 int src_width_uv = (Abs(src_width) + 1) >> 1;
512 int src_height_uv = (Abs(src_height) + 1) >> 1;
513
514 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
515 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
516
517 int src_stride_y = Abs(src_width);
518 int src_stride_uv = src_width_uv * 2;
519
520 align_buffer_page_end(src_y, src_y_plane_size);
521 align_buffer_page_end(src_uv, src_uv_plane_size);
522 if (!src_y || !src_uv) {
523 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
524 return 0;
525 }
526 MemRandomize(src_y, src_y_plane_size);
527 MemRandomize(src_uv, src_uv_plane_size);
528
529 int dst_width_uv = (dst_width + 1) >> 1;
530 int dst_height_uv = (dst_height + 1) >> 1;
531
532 int64_t dst_y_plane_size = (dst_width) * (dst_height);
533 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
534
535 int dst_stride_y = dst_width;
536 int dst_stride_uv = dst_width_uv * 2;
537
538 align_buffer_page_end(dst_y_c, dst_y_plane_size);
539 align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
540 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
541 align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
542 if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
543 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
544 return 0;
545 }
546
547 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
548 double c_time = get_time();
549 NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
550 dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width,
551 dst_height, f);
552 c_time = (get_time() - c_time);
553
554 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
555 double opt_time = get_time();
556 for (i = 0; i < benchmark_iterations; ++i) {
557 NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
558 dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width,
559 dst_height, f);
560 }
561 opt_time = (get_time() - opt_time) / benchmark_iterations;
562 // Report performance of C vs OPT.
563 printf("filter %d - %8d us C - %8d us OPT\n", f,
564 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
565
566 // C version may be a little off from the optimized. Order of
567 // operations may introduce rounding somewhere. So do a difference
568 // of the buffers and look to see that the max difference is not
569 // over 3.
570 int max_diff = 0;
571 for (i = 0; i < (dst_height); ++i) {
572 for (j = 0; j < (dst_width); ++j) {
573 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
574 dst_y_opt[(i * dst_stride_y) + j]);
575 if (abs_diff > max_diff) {
576 max_diff = abs_diff;
577 }
578 }
579 }
580
581 for (i = 0; i < (dst_height_uv); ++i) {
582 for (j = 0; j < (dst_width_uv * 2); ++j) {
583 int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
584 dst_uv_opt[(i * dst_stride_uv) + j]);
585 if (abs_diff > max_diff) {
586 max_diff = abs_diff;
587 }
588 }
589 }
590
591 free_aligned_buffer_page_end(dst_y_c);
592 free_aligned_buffer_page_end(dst_uv_c);
593 free_aligned_buffer_page_end(dst_y_opt);
594 free_aligned_buffer_page_end(dst_uv_opt);
595 free_aligned_buffer_page_end(src_y);
596 free_aligned_buffer_page_end(src_uv);
597
598 return max_diff;
599 }
600
601 // The following adjustments in dimensions ensure the scale factor will be
602 // exactly achieved.
603 // 2 is chroma subsample.
604 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
605 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
606
607 #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
608 TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
609 int diff = I420TestFilter( \
610 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
611 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
612 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
613 benchmark_cpu_info_); \
614 EXPECT_LE(diff, max_diff); \
615 } \
616 TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
617 int diff = I444TestFilter( \
618 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
619 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
620 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
621 benchmark_cpu_info_); \
622 EXPECT_LE(diff, max_diff); \
623 } \
624 TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_16) { \
625 int diff = I420TestFilter_16( \
626 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
627 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
628 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
629 benchmark_cpu_info_); \
630 EXPECT_LE(diff, max_diff); \
631 } \
632 TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_16) { \
633 int diff = I444TestFilter_16( \
634 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
635 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
636 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
637 benchmark_cpu_info_); \
638 EXPECT_LE(diff, max_diff); \
639 } \
640 TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \
641 int diff = NV12TestFilter( \
642 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
643 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
644 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
645 benchmark_cpu_info_); \
646 EXPECT_LE(diff, max_diff); \
647 }
648
649 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
650 // filtering is different fixed point implementations for SSSE3, Neon and C.
651 #ifdef ENABLE_SLOW_TESTS
652 #define TEST_FACTOR(name, nom, denom, boxdiff) \
653 TEST_FACTOR1(, name, None, nom, denom, 0) \
654 TEST_FACTOR1(, name, Linear, nom, denom, 3) \
655 TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
656 TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
657 #else
658 #define TEST_FACTOR(name, nom, denom, boxdiff) \
659 TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
660 TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
661 TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
662 TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
663 #endif
664
665 TEST_FACTOR(2, 1, 2, 0)
666 TEST_FACTOR(4, 1, 4, 0)
667 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
668 TEST_FACTOR(3by4, 3, 4, 1)
669 TEST_FACTOR(3by8, 3, 8, 1)
670 TEST_FACTOR(3, 1, 3, 0)
671 #undef TEST_FACTOR1
672 #undef TEST_FACTOR
673 #undef SX
674 #undef DX
675
676 #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
677 TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
678 int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
679 height, kFilter##filter, benchmark_iterations_, \
680 disable_cpu_flags_, benchmark_cpu_info_); \
681 EXPECT_LE(diff, max_diff); \
682 } \
683 TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
684 int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
685 height, kFilter##filter, benchmark_iterations_, \
686 disable_cpu_flags_, benchmark_cpu_info_); \
687 EXPECT_LE(diff, max_diff); \
688 } \
689 TEST_F(LibYUVScaleTest, \
690 DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
691 int diff = I420TestFilter_16( \
692 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
693 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
694 EXPECT_LE(diff, max_diff); \
695 } \
696 TEST_F(LibYUVScaleTest, \
697 DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
698 int diff = I444TestFilter_16( \
699 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
700 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
701 EXPECT_LE(diff, max_diff); \
702 } \
703 TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \
704 int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \
705 height, kFilter##filter, benchmark_iterations_, \
706 disable_cpu_flags_, benchmark_cpu_info_); \
707 EXPECT_LE(diff, max_diff); \
708 } \
709 TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
710 int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
711 Abs(benchmark_height_), kFilter##filter, \
712 benchmark_iterations_, disable_cpu_flags_, \
713 benchmark_cpu_info_); \
714 EXPECT_LE(diff, max_diff); \
715 } \
716 TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
717 int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
718 Abs(benchmark_height_), kFilter##filter, \
719 benchmark_iterations_, disable_cpu_flags_, \
720 benchmark_cpu_info_); \
721 EXPECT_LE(diff, max_diff); \
722 } \
723 TEST_F(LibYUVScaleTest, \
724 DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
725 int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
726 Abs(benchmark_height_), kFilter##filter, \
727 benchmark_iterations_, disable_cpu_flags_, \
728 benchmark_cpu_info_); \
729 EXPECT_LE(diff, max_diff); \
730 } \
731 TEST_F(LibYUVScaleTest, \
732 DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
733 int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
734 Abs(benchmark_height_), kFilter##filter, \
735 benchmark_iterations_, disable_cpu_flags_, \
736 benchmark_cpu_info_); \
737 EXPECT_LE(diff, max_diff); \
738 } \
739 TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \
740 int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \
741 Abs(benchmark_height_), kFilter##filter, \
742 benchmark_iterations_, disable_cpu_flags_, \
743 benchmark_cpu_info_); \
744 EXPECT_LE(diff, max_diff); \
745 }
746
747 #ifdef ENABLE_SLOW_TESTS
748 // Test scale to a specified size with all 4 filters.
749 #define TEST_SCALETO(name, width, height) \
750 TEST_SCALETO1(, name, width, height, None, 0) \
751 TEST_SCALETO1(, name, width, height, Linear, 3) \
752 TEST_SCALETO1(, name, width, height, Bilinear, 3) \
753 TEST_SCALETO1(, name, width, height, Box, 3)
754 #else
755 // Test scale to a specified size with all 4 filters.
756 #define TEST_SCALETO(name, width, height) \
757 TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
758 TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
759 TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
760 TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
761 #endif
762
763 TEST_SCALETO(Scale, 1, 1)
764 TEST_SCALETO(Scale, 320, 240)
765 TEST_SCALETO(Scale, 569, 480)
766 TEST_SCALETO(Scale, 640, 360)
767 TEST_SCALETO(Scale, 1280, 720)
768 #ifdef ENABLE_SLOW_TESTS
769 TEST_SCALETO(Scale, 1920, 1080)
770 #endif // ENABLE_SLOW_TESTS
771 #undef TEST_SCALETO1
772 #undef TEST_SCALETO
773
774 #ifdef ENABLE_ROW_TESTS
775 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)776 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
777 SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
778 SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
779 SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
780 memset(orig_pixels, 0, sizeof(orig_pixels));
781 memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
782 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
783
784 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
785 if (!has_ssse3) {
786 printf("Warning SSSE3 not detected; Skipping test.\n");
787 } else {
788 // TL.
789 orig_pixels[0] = 255u;
790 orig_pixels[1] = 0u;
791 orig_pixels[128 + 0] = 0u;
792 orig_pixels[128 + 1] = 0u;
793 // TR.
794 orig_pixels[2] = 0u;
795 orig_pixels[3] = 100u;
796 orig_pixels[128 + 2] = 0u;
797 orig_pixels[128 + 3] = 0u;
798 // BL.
799 orig_pixels[4] = 0u;
800 orig_pixels[5] = 0u;
801 orig_pixels[128 + 4] = 50u;
802 orig_pixels[128 + 5] = 0u;
803 // BR.
804 orig_pixels[6] = 0u;
805 orig_pixels[7] = 0u;
806 orig_pixels[128 + 6] = 0u;
807 orig_pixels[128 + 7] = 20u;
808 // Odd.
809 orig_pixels[126] = 4u;
810 orig_pixels[127] = 255u;
811 orig_pixels[128 + 126] = 16u;
812 orig_pixels[128 + 127] = 255u;
813
814 // Test regular half size.
815 ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
816
817 EXPECT_EQ(64u, dst_pixels_c[0]);
818 EXPECT_EQ(25u, dst_pixels_c[1]);
819 EXPECT_EQ(13u, dst_pixels_c[2]);
820 EXPECT_EQ(5u, dst_pixels_c[3]);
821 EXPECT_EQ(0u, dst_pixels_c[4]);
822 EXPECT_EQ(133u, dst_pixels_c[63]);
823
824 // Test Odd width version - Last pixel is just 1 horizontal pixel.
825 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
826
827 EXPECT_EQ(64u, dst_pixels_c[0]);
828 EXPECT_EQ(25u, dst_pixels_c[1]);
829 EXPECT_EQ(13u, dst_pixels_c[2]);
830 EXPECT_EQ(5u, dst_pixels_c[3]);
831 EXPECT_EQ(0u, dst_pixels_c[4]);
832 EXPECT_EQ(10u, dst_pixels_c[63]);
833
834 // Test one pixel less, should skip the last pixel.
835 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
836 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
837
838 EXPECT_EQ(64u, dst_pixels_c[0]);
839 EXPECT_EQ(25u, dst_pixels_c[1]);
840 EXPECT_EQ(13u, dst_pixels_c[2]);
841 EXPECT_EQ(5u, dst_pixels_c[3]);
842 EXPECT_EQ(0u, dst_pixels_c[4]);
843 EXPECT_EQ(0u, dst_pixels_c[63]);
844
845 // Test regular half size SSSE3.
846 ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
847
848 EXPECT_EQ(64u, dst_pixels_opt[0]);
849 EXPECT_EQ(25u, dst_pixels_opt[1]);
850 EXPECT_EQ(13u, dst_pixels_opt[2]);
851 EXPECT_EQ(5u, dst_pixels_opt[3]);
852 EXPECT_EQ(0u, dst_pixels_opt[4]);
853 EXPECT_EQ(133u, dst_pixels_opt[63]);
854
855 // Compare C and SSSE3 match.
856 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
857 ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
858 for (int i = 0; i < 64; ++i) {
859 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
860 }
861 }
862 }
863 #endif // HAS_SCALEROWDOWN2_SSSE3
864
865 extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
866 ptrdiff_t src_stride,
867 uint16_t* dst,
868 int dst_width);
869 extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
870 ptrdiff_t src_stride,
871 uint16_t* dst,
872 int dst_width);
873 extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
874 ptrdiff_t src_stride,
875 uint16_t* dst,
876 int dst_width);
877
TEST_F(LibYUVScaleTest,TestScaleRowUp2_16)878 TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
879 SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
880 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
881 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
882
883 memset(orig_pixels, 0, sizeof(orig_pixels));
884 memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
885 memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
886
887 for (int i = 0; i < 640 * 2 + 1; ++i) {
888 orig_pixels[i] = i;
889 }
890 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
891 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
892 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
893 int has_neon = TestCpuFlag(kCpuHasNEON);
894 if (has_neon) {
895 ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
896 } else {
897 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
898 }
899 #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
900 int has_mmi = TestCpuFlag(kCpuHasMMI);
901 if (has_mmi) {
902 ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
903 } else {
904 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
905 }
906 #else
907 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
908 #endif
909 }
910
911 for (int i = 0; i < 1280; ++i) {
912 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
913 }
914 EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
915 EXPECT_EQ(dst_pixels_c[1279], 800);
916 }
917
918 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
919 ptrdiff_t src_stride,
920 uint16_t* dst,
921 int dst_width);
922
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)923 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
924 SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
925 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
926 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
927
928 memset(orig_pixels, 0, sizeof(orig_pixels));
929 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
930 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
931
932 for (int i = 0; i < 2560 * 2; ++i) {
933 orig_pixels[i] = i;
934 }
935 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
936 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
937 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
938 int has_neon = TestCpuFlag(kCpuHasNEON);
939 if (has_neon) {
940 ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
941 } else {
942 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
943 }
944 #else
945 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
946 #endif
947 }
948
949 for (int i = 0; i < 1280; ++i) {
950 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
951 }
952
953 EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
954 EXPECT_EQ(dst_pixels_c[1279], 3839);
955 }
956 #endif // ENABLE_ROW_TESTS
957
958 // Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
959 // difference.
960 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)961 static int TestPlaneFilter_16(int src_width,
962 int src_height,
963 int dst_width,
964 int dst_height,
965 FilterMode f,
966 int benchmark_iterations,
967 int disable_cpu_flags,
968 int benchmark_cpu_info) {
969 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
970 return 0;
971 }
972
973 int i;
974 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
975 int src_stride_y = Abs(src_width);
976 int dst_y_plane_size = dst_width * dst_height;
977 int dst_stride_y = dst_width;
978
979 align_buffer_page_end(src_y, src_y_plane_size);
980 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
981 align_buffer_page_end(dst_y_8, dst_y_plane_size);
982 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
983 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
984 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
985
986 MemRandomize(src_y, src_y_plane_size);
987 memset(dst_y_8, 0, dst_y_plane_size);
988 memset(dst_y_16, 1, dst_y_plane_size * 2);
989
990 for (i = 0; i < src_y_plane_size; ++i) {
991 p_src_y_16[i] = src_y[i] & 255;
992 }
993
994 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
995 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
996 dst_width, dst_height, f);
997 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
998
999 for (i = 0; i < benchmark_iterations; ++i) {
1000 ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
1001 dst_stride_y, dst_width, dst_height, f);
1002 }
1003
1004 // Expect an exact match.
1005 int max_diff = 0;
1006 for (i = 0; i < dst_y_plane_size; ++i) {
1007 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
1008 if (abs_diff > max_diff) {
1009 max_diff = abs_diff;
1010 }
1011 }
1012
1013 free_aligned_buffer_page_end(dst_y_8);
1014 free_aligned_buffer_page_end(dst_y_16);
1015 free_aligned_buffer_page_end(src_y);
1016 free_aligned_buffer_page_end(src_y_16);
1017
1018 return max_diff;
1019 }
1020
1021 // The following adjustments in dimensions ensure the scale factor will be
1022 // exactly achieved.
1023 // 2 is chroma subsample.
1024 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
1025 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
1026
1027 #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
1028 TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
1029 int diff = TestPlaneFilter_16( \
1030 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
1031 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
1032 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
1033 benchmark_cpu_info_); \
1034 EXPECT_LE(diff, max_diff); \
1035 }
1036
1037 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
1038 // filtering is different fixed point implementations for SSSE3, Neon and C.
1039 #define TEST_FACTOR(name, nom, denom, boxdiff) \
1040 TEST_FACTOR1(name, None, nom, denom, 0) \
1041 TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
1042 TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
1043 TEST_FACTOR1(name, Box, nom, denom, boxdiff)
1044
1045 TEST_FACTOR(2, 1, 2, 0)
1046 TEST_FACTOR(4, 1, 4, 0)
1047 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
1048 TEST_FACTOR(3by4, 3, 4, 1)
1049 TEST_FACTOR(3by8, 3, 8, 1)
1050 TEST_FACTOR(3, 1, 3, 0)
1051 #undef TEST_FACTOR1
1052 #undef TEST_FACTOR
1053 #undef SX
1054 #undef DX
1055 } // namespace libyuv
1056