1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12 #include <time.h>
13
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17
18 #ifdef ENABLE_ROW_TESTS
19 #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
20 #endif
21
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24
25 namespace libyuv {
26
27 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)28 static int I420TestFilter(int src_width,
29 int src_height,
30 int dst_width,
31 int dst_height,
32 FilterMode f,
33 int benchmark_iterations,
34 int disable_cpu_flags,
35 int benchmark_cpu_info) {
36 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
37 return 0;
38 }
39
40 int i, j;
41 int src_width_uv = (Abs(src_width) + 1) >> 1;
42 int src_height_uv = (Abs(src_height) + 1) >> 1;
43
44 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
45 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
46
47 int src_stride_y = Abs(src_width);
48 int src_stride_uv = src_width_uv;
49
50 align_buffer_page_end(src_y, src_y_plane_size);
51 align_buffer_page_end(src_u, src_uv_plane_size);
52 align_buffer_page_end(src_v, src_uv_plane_size);
53 if (!src_y || !src_u || !src_v) {
54 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
55 return 0;
56 }
57 MemRandomize(src_y, src_y_plane_size);
58 MemRandomize(src_u, src_uv_plane_size);
59 MemRandomize(src_v, src_uv_plane_size);
60
61 int dst_width_uv = (dst_width + 1) >> 1;
62 int dst_height_uv = (dst_height + 1) >> 1;
63
64 int64_t dst_y_plane_size = (dst_width) * (dst_height);
65 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
66
67 int dst_stride_y = dst_width;
68 int dst_stride_uv = dst_width_uv;
69
70 align_buffer_page_end(dst_y_c, dst_y_plane_size);
71 align_buffer_page_end(dst_u_c, dst_uv_plane_size);
72 align_buffer_page_end(dst_v_c, dst_uv_plane_size);
73 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
74 align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
75 align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
76 if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
77 !dst_v_opt) {
78 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
79 return 0;
80 }
81
82 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
83 double c_time = get_time();
84 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
85 src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
86 dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
87 c_time = (get_time() - c_time);
88
89 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
90 double opt_time = get_time();
91 for (i = 0; i < benchmark_iterations; ++i) {
92 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
93 src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
94 dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
95 f);
96 }
97 opt_time = (get_time() - opt_time) / benchmark_iterations;
98 // Report performance of C vs OPT.
99 printf("filter %d - %8d us C - %8d us OPT\n", f,
100 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
101
102 // C version may be a little off from the optimized. Order of
103 // operations may introduce rounding somewhere. So do a difference
104 // of the buffers and look to see that the max difference is not
105 // over 3.
106 int max_diff = 0;
107 for (i = 0; i < (dst_height); ++i) {
108 for (j = 0; j < (dst_width); ++j) {
109 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
110 dst_y_opt[(i * dst_stride_y) + j]);
111 if (abs_diff > max_diff) {
112 max_diff = abs_diff;
113 }
114 }
115 }
116
117 for (i = 0; i < (dst_height_uv); ++i) {
118 for (j = 0; j < (dst_width_uv); ++j) {
119 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
120 dst_u_opt[(i * dst_stride_uv) + j]);
121 if (abs_diff > max_diff) {
122 max_diff = abs_diff;
123 }
124 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
125 dst_v_opt[(i * dst_stride_uv) + j]);
126 if (abs_diff > max_diff) {
127 max_diff = abs_diff;
128 }
129 }
130 }
131
132 free_aligned_buffer_page_end(dst_y_c);
133 free_aligned_buffer_page_end(dst_u_c);
134 free_aligned_buffer_page_end(dst_v_c);
135 free_aligned_buffer_page_end(dst_y_opt);
136 free_aligned_buffer_page_end(dst_u_opt);
137 free_aligned_buffer_page_end(dst_v_opt);
138 free_aligned_buffer_page_end(src_y);
139 free_aligned_buffer_page_end(src_u);
140 free_aligned_buffer_page_end(src_v);
141
142 return max_diff;
143 }
144
145 // Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
146 // 0 = exact.
I420TestFilter_12(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)147 static int I420TestFilter_12(int src_width,
148 int src_height,
149 int dst_width,
150 int dst_height,
151 FilterMode f,
152 int benchmark_iterations,
153 int disable_cpu_flags,
154 int benchmark_cpu_info) {
155 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156 return 0;
157 }
158
159 int i;
160 int src_width_uv = (Abs(src_width) + 1) >> 1;
161 int src_height_uv = (Abs(src_height) + 1) >> 1;
162
163 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
164 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
165
166 int src_stride_y = Abs(src_width);
167 int src_stride_uv = src_width_uv;
168
169 align_buffer_page_end(src_y, src_y_plane_size);
170 align_buffer_page_end(src_u, src_uv_plane_size);
171 align_buffer_page_end(src_v, src_uv_plane_size);
172 align_buffer_page_end(src_y_12, src_y_plane_size * 2);
173 align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
174 align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
175 if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
176 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
177 return 0;
178 }
179 uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
180 uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
181 uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
182
183 MemRandomize(src_y, src_y_plane_size);
184 MemRandomize(src_u, src_uv_plane_size);
185 MemRandomize(src_v, src_uv_plane_size);
186
187 for (i = 0; i < src_y_plane_size; ++i) {
188 p_src_y_12[i] = src_y[i];
189 }
190 for (i = 0; i < src_uv_plane_size; ++i) {
191 p_src_u_12[i] = src_u[i];
192 p_src_v_12[i] = src_v[i];
193 }
194
195 int dst_width_uv = (dst_width + 1) >> 1;
196 int dst_height_uv = (dst_height + 1) >> 1;
197
198 int dst_y_plane_size = (dst_width) * (dst_height);
199 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
200
201 int dst_stride_y = dst_width;
202 int dst_stride_uv = dst_width_uv;
203
204 align_buffer_page_end(dst_y_8, dst_y_plane_size);
205 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
206 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
207 align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
208 align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
209 align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
210
211 uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
212 uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
213 uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
214
215 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
216 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
217 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
218 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
219 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
220 for (i = 0; i < benchmark_iterations; ++i) {
221 I420Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
222 p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
223 dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
224 dst_stride_uv, dst_width, dst_height, f);
225 }
226
227 // Expect an exact match.
228 int max_diff = 0;
229 for (i = 0; i < dst_y_plane_size; ++i) {
230 int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
231 if (abs_diff > max_diff) {
232 max_diff = abs_diff;
233 }
234 }
235 for (i = 0; i < dst_uv_plane_size; ++i) {
236 int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
237 if (abs_diff > max_diff) {
238 max_diff = abs_diff;
239 }
240 abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
241 if (abs_diff > max_diff) {
242 max_diff = abs_diff;
243 }
244 }
245
246 free_aligned_buffer_page_end(dst_y_8);
247 free_aligned_buffer_page_end(dst_u_8);
248 free_aligned_buffer_page_end(dst_v_8);
249 free_aligned_buffer_page_end(dst_y_12);
250 free_aligned_buffer_page_end(dst_u_12);
251 free_aligned_buffer_page_end(dst_v_12);
252 free_aligned_buffer_page_end(src_y);
253 free_aligned_buffer_page_end(src_u);
254 free_aligned_buffer_page_end(src_v);
255 free_aligned_buffer_page_end(src_y_12);
256 free_aligned_buffer_page_end(src_u_12);
257 free_aligned_buffer_page_end(src_v_12);
258
259 return max_diff;
260 }
261
262 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
263 // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)264 static int I420TestFilter_16(int src_width,
265 int src_height,
266 int dst_width,
267 int dst_height,
268 FilterMode f,
269 int benchmark_iterations,
270 int disable_cpu_flags,
271 int benchmark_cpu_info) {
272 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
273 return 0;
274 }
275
276 int i;
277 int src_width_uv = (Abs(src_width) + 1) >> 1;
278 int src_height_uv = (Abs(src_height) + 1) >> 1;
279
280 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
281 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
282
283 int src_stride_y = Abs(src_width);
284 int src_stride_uv = src_width_uv;
285
286 align_buffer_page_end(src_y, src_y_plane_size);
287 align_buffer_page_end(src_u, src_uv_plane_size);
288 align_buffer_page_end(src_v, src_uv_plane_size);
289 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
290 align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
291 align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
292 if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
293 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
294 return 0;
295 }
296 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
297 uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
298 uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
299
300 MemRandomize(src_y, src_y_plane_size);
301 MemRandomize(src_u, src_uv_plane_size);
302 MemRandomize(src_v, src_uv_plane_size);
303
304 for (i = 0; i < src_y_plane_size; ++i) {
305 p_src_y_16[i] = src_y[i];
306 }
307 for (i = 0; i < src_uv_plane_size; ++i) {
308 p_src_u_16[i] = src_u[i];
309 p_src_v_16[i] = src_v[i];
310 }
311
312 int dst_width_uv = (dst_width + 1) >> 1;
313 int dst_height_uv = (dst_height + 1) >> 1;
314
315 int dst_y_plane_size = (dst_width) * (dst_height);
316 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
317
318 int dst_stride_y = dst_width;
319 int dst_stride_uv = dst_width_uv;
320
321 align_buffer_page_end(dst_y_8, dst_y_plane_size);
322 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
323 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
324 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
325 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
326 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
327
328 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
329 uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
330 uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
331
332 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
333 I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
334 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
335 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
336 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
337 for (i = 0; i < benchmark_iterations; ++i) {
338 I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
339 p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
340 dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
341 dst_stride_uv, dst_width, dst_height, f);
342 }
343
344 // Expect an exact match.
345 int max_diff = 0;
346 for (i = 0; i < dst_y_plane_size; ++i) {
347 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
348 if (abs_diff > max_diff) {
349 max_diff = abs_diff;
350 }
351 }
352 for (i = 0; i < dst_uv_plane_size; ++i) {
353 int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
354 if (abs_diff > max_diff) {
355 max_diff = abs_diff;
356 }
357 abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
358 if (abs_diff > max_diff) {
359 max_diff = abs_diff;
360 }
361 }
362
363 free_aligned_buffer_page_end(dst_y_8);
364 free_aligned_buffer_page_end(dst_u_8);
365 free_aligned_buffer_page_end(dst_v_8);
366 free_aligned_buffer_page_end(dst_y_16);
367 free_aligned_buffer_page_end(dst_u_16);
368 free_aligned_buffer_page_end(dst_v_16);
369 free_aligned_buffer_page_end(src_y);
370 free_aligned_buffer_page_end(src_u);
371 free_aligned_buffer_page_end(src_v);
372 free_aligned_buffer_page_end(src_y_16);
373 free_aligned_buffer_page_end(src_u_16);
374 free_aligned_buffer_page_end(src_v_16);
375
376 return max_diff;
377 }
378
379 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)380 static int I444TestFilter(int src_width,
381 int src_height,
382 int dst_width,
383 int dst_height,
384 FilterMode f,
385 int benchmark_iterations,
386 int disable_cpu_flags,
387 int benchmark_cpu_info) {
388 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
389 return 0;
390 }
391
392 int i, j;
393 int src_width_uv = Abs(src_width);
394 int src_height_uv = Abs(src_height);
395
396 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
397 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
398
399 int src_stride_y = Abs(src_width);
400 int src_stride_uv = src_width_uv;
401
402 align_buffer_page_end(src_y, src_y_plane_size);
403 align_buffer_page_end(src_u, src_uv_plane_size);
404 align_buffer_page_end(src_v, src_uv_plane_size);
405 if (!src_y || !src_u || !src_v) {
406 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
407 return 0;
408 }
409 MemRandomize(src_y, src_y_plane_size);
410 MemRandomize(src_u, src_uv_plane_size);
411 MemRandomize(src_v, src_uv_plane_size);
412
413 int dst_width_uv = dst_width;
414 int dst_height_uv = dst_height;
415
416 int64_t dst_y_plane_size = (dst_width) * (dst_height);
417 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
418
419 int dst_stride_y = dst_width;
420 int dst_stride_uv = dst_width_uv;
421
422 align_buffer_page_end(dst_y_c, dst_y_plane_size);
423 align_buffer_page_end(dst_u_c, dst_uv_plane_size);
424 align_buffer_page_end(dst_v_c, dst_uv_plane_size);
425 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
426 align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
427 align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
428 if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
429 !dst_v_opt) {
430 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
431 return 0;
432 }
433
434 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
435 double c_time = get_time();
436 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
437 src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
438 dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
439 c_time = (get_time() - c_time);
440
441 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
442 double opt_time = get_time();
443 for (i = 0; i < benchmark_iterations; ++i) {
444 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
445 src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
446 dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
447 f);
448 }
449 opt_time = (get_time() - opt_time) / benchmark_iterations;
450 // Report performance of C vs OPT.
451 printf("filter %d - %8d us C - %8d us OPT\n", f,
452 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
453
454 // C version may be a little off from the optimized. Order of
455 // operations may introduce rounding somewhere. So do a difference
456 // of the buffers and look to see that the max difference is not
457 // over 3.
458 int max_diff = 0;
459 for (i = 0; i < (dst_height); ++i) {
460 for (j = 0; j < (dst_width); ++j) {
461 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
462 dst_y_opt[(i * dst_stride_y) + j]);
463 if (abs_diff > max_diff) {
464 max_diff = abs_diff;
465 }
466 }
467 }
468
469 for (i = 0; i < (dst_height_uv); ++i) {
470 for (j = 0; j < (dst_width_uv); ++j) {
471 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
472 dst_u_opt[(i * dst_stride_uv) + j]);
473 if (abs_diff > max_diff) {
474 max_diff = abs_diff;
475 }
476 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
477 dst_v_opt[(i * dst_stride_uv) + j]);
478 if (abs_diff > max_diff) {
479 max_diff = abs_diff;
480 }
481 }
482 }
483
484 free_aligned_buffer_page_end(dst_y_c);
485 free_aligned_buffer_page_end(dst_u_c);
486 free_aligned_buffer_page_end(dst_v_c);
487 free_aligned_buffer_page_end(dst_y_opt);
488 free_aligned_buffer_page_end(dst_u_opt);
489 free_aligned_buffer_page_end(dst_v_opt);
490 free_aligned_buffer_page_end(src_y);
491 free_aligned_buffer_page_end(src_u);
492 free_aligned_buffer_page_end(src_v);
493
494 return max_diff;
495 }
496
497 // Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
498 // 0 = exact.
I444TestFilter_12(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)499 static int I444TestFilter_12(int src_width,
500 int src_height,
501 int dst_width,
502 int dst_height,
503 FilterMode f,
504 int benchmark_iterations,
505 int disable_cpu_flags,
506 int benchmark_cpu_info) {
507 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
508 return 0;
509 }
510
511 int i;
512 int src_width_uv = Abs(src_width);
513 int src_height_uv = Abs(src_height);
514
515 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
516 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
517
518 int src_stride_y = Abs(src_width);
519 int src_stride_uv = src_width_uv;
520
521 align_buffer_page_end(src_y, src_y_plane_size);
522 align_buffer_page_end(src_u, src_uv_plane_size);
523 align_buffer_page_end(src_v, src_uv_plane_size);
524 align_buffer_page_end(src_y_12, src_y_plane_size * 2);
525 align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
526 align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
527 if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
528 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
529 return 0;
530 }
531 uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
532 uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
533 uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
534
535 MemRandomize(src_y, src_y_plane_size);
536 MemRandomize(src_u, src_uv_plane_size);
537 MemRandomize(src_v, src_uv_plane_size);
538
539 for (i = 0; i < src_y_plane_size; ++i) {
540 p_src_y_12[i] = src_y[i];
541 }
542 for (i = 0; i < src_uv_plane_size; ++i) {
543 p_src_u_12[i] = src_u[i];
544 p_src_v_12[i] = src_v[i];
545 }
546
547 int dst_width_uv = dst_width;
548 int dst_height_uv = dst_height;
549
550 int dst_y_plane_size = (dst_width) * (dst_height);
551 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
552
553 int dst_stride_y = dst_width;
554 int dst_stride_uv = dst_width_uv;
555
556 align_buffer_page_end(dst_y_8, dst_y_plane_size);
557 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
558 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
559 align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
560 align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
561 align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
562
563 uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
564 uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
565 uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
566
567 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
568 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
569 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
570 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
571 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
572 for (i = 0; i < benchmark_iterations; ++i) {
573 I444Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
574 p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
575 dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
576 dst_stride_uv, dst_width, dst_height, f);
577 }
578
579 // Expect an exact match.
580 int max_diff = 0;
581 for (i = 0; i < dst_y_plane_size; ++i) {
582 int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
583 if (abs_diff > max_diff) {
584 max_diff = abs_diff;
585 }
586 }
587 for (i = 0; i < dst_uv_plane_size; ++i) {
588 int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
589 if (abs_diff > max_diff) {
590 max_diff = abs_diff;
591 }
592 abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
593 if (abs_diff > max_diff) {
594 max_diff = abs_diff;
595 }
596 }
597
598 free_aligned_buffer_page_end(dst_y_8);
599 free_aligned_buffer_page_end(dst_u_8);
600 free_aligned_buffer_page_end(dst_v_8);
601 free_aligned_buffer_page_end(dst_y_12);
602 free_aligned_buffer_page_end(dst_u_12);
603 free_aligned_buffer_page_end(dst_v_12);
604 free_aligned_buffer_page_end(src_y);
605 free_aligned_buffer_page_end(src_u);
606 free_aligned_buffer_page_end(src_v);
607 free_aligned_buffer_page_end(src_y_12);
608 free_aligned_buffer_page_end(src_u_12);
609 free_aligned_buffer_page_end(src_v_12);
610
611 return max_diff;
612 }
613
614 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
615 // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)616 static int I444TestFilter_16(int src_width,
617 int src_height,
618 int dst_width,
619 int dst_height,
620 FilterMode f,
621 int benchmark_iterations,
622 int disable_cpu_flags,
623 int benchmark_cpu_info) {
624 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
625 return 0;
626 }
627
628 int i;
629 int src_width_uv = Abs(src_width);
630 int src_height_uv = Abs(src_height);
631
632 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
633 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
634
635 int src_stride_y = Abs(src_width);
636 int src_stride_uv = src_width_uv;
637
638 align_buffer_page_end(src_y, src_y_plane_size);
639 align_buffer_page_end(src_u, src_uv_plane_size);
640 align_buffer_page_end(src_v, src_uv_plane_size);
641 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
642 align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
643 align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
644 if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
645 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
646 return 0;
647 }
648 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
649 uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
650 uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
651
652 MemRandomize(src_y, src_y_plane_size);
653 MemRandomize(src_u, src_uv_plane_size);
654 MemRandomize(src_v, src_uv_plane_size);
655
656 for (i = 0; i < src_y_plane_size; ++i) {
657 p_src_y_16[i] = src_y[i];
658 }
659 for (i = 0; i < src_uv_plane_size; ++i) {
660 p_src_u_16[i] = src_u[i];
661 p_src_v_16[i] = src_v[i];
662 }
663
664 int dst_width_uv = dst_width;
665 int dst_height_uv = dst_height;
666
667 int dst_y_plane_size = (dst_width) * (dst_height);
668 int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
669
670 int dst_stride_y = dst_width;
671 int dst_stride_uv = dst_width_uv;
672
673 align_buffer_page_end(dst_y_8, dst_y_plane_size);
674 align_buffer_page_end(dst_u_8, dst_uv_plane_size);
675 align_buffer_page_end(dst_v_8, dst_uv_plane_size);
676 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
677 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
678 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
679
680 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
681 uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
682 uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
683
684 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
685 I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
686 src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
687 dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
688 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
689 for (i = 0; i < benchmark_iterations; ++i) {
690 I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
691 p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
692 dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
693 dst_stride_uv, dst_width, dst_height, f);
694 }
695
696 // Expect an exact match.
697 int max_diff = 0;
698 for (i = 0; i < dst_y_plane_size; ++i) {
699 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
700 if (abs_diff > max_diff) {
701 max_diff = abs_diff;
702 }
703 }
704 for (i = 0; i < dst_uv_plane_size; ++i) {
705 int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
706 if (abs_diff > max_diff) {
707 max_diff = abs_diff;
708 }
709 abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
710 if (abs_diff > max_diff) {
711 max_diff = abs_diff;
712 }
713 }
714
715 free_aligned_buffer_page_end(dst_y_8);
716 free_aligned_buffer_page_end(dst_u_8);
717 free_aligned_buffer_page_end(dst_v_8);
718 free_aligned_buffer_page_end(dst_y_16);
719 free_aligned_buffer_page_end(dst_u_16);
720 free_aligned_buffer_page_end(dst_v_16);
721 free_aligned_buffer_page_end(src_y);
722 free_aligned_buffer_page_end(src_u);
723 free_aligned_buffer_page_end(src_v);
724 free_aligned_buffer_page_end(src_y_16);
725 free_aligned_buffer_page_end(src_u_16);
726 free_aligned_buffer_page_end(src_v_16);
727
728 return max_diff;
729 }
730
731 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
NV12TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)732 static int NV12TestFilter(int src_width,
733 int src_height,
734 int dst_width,
735 int dst_height,
736 FilterMode f,
737 int benchmark_iterations,
738 int disable_cpu_flags,
739 int benchmark_cpu_info) {
740 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
741 return 0;
742 }
743
744 int i, j;
745 int src_width_uv = (Abs(src_width) + 1) >> 1;
746 int src_height_uv = (Abs(src_height) + 1) >> 1;
747
748 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
749 int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
750
751 int src_stride_y = Abs(src_width);
752 int src_stride_uv = src_width_uv * 2;
753
754 align_buffer_page_end(src_y, src_y_plane_size);
755 align_buffer_page_end(src_uv, src_uv_plane_size);
756 if (!src_y || !src_uv) {
757 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
758 return 0;
759 }
760 MemRandomize(src_y, src_y_plane_size);
761 MemRandomize(src_uv, src_uv_plane_size);
762
763 int dst_width_uv = (dst_width + 1) >> 1;
764 int dst_height_uv = (dst_height + 1) >> 1;
765
766 int64_t dst_y_plane_size = (dst_width) * (dst_height);
767 int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
768
769 int dst_stride_y = dst_width;
770 int dst_stride_uv = dst_width_uv * 2;
771
772 align_buffer_page_end(dst_y_c, dst_y_plane_size);
773 align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
774 align_buffer_page_end(dst_y_opt, dst_y_plane_size);
775 align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
776 if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
777 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
778 return 0;
779 }
780
781 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
782 double c_time = get_time();
783 NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
784 dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width,
785 dst_height, f);
786 c_time = (get_time() - c_time);
787
788 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
789 double opt_time = get_time();
790 for (i = 0; i < benchmark_iterations; ++i) {
791 NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
792 dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width,
793 dst_height, f);
794 }
795 opt_time = (get_time() - opt_time) / benchmark_iterations;
796 // Report performance of C vs OPT.
797 printf("filter %d - %8d us C - %8d us OPT\n", f,
798 static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
799
800 // C version may be a little off from the optimized. Order of
801 // operations may introduce rounding somewhere. So do a difference
802 // of the buffers and look to see that the max difference is not
803 // over 3.
804 int max_diff = 0;
805 for (i = 0; i < (dst_height); ++i) {
806 for (j = 0; j < (dst_width); ++j) {
807 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
808 dst_y_opt[(i * dst_stride_y) + j]);
809 if (abs_diff > max_diff) {
810 max_diff = abs_diff;
811 }
812 }
813 }
814
815 for (i = 0; i < (dst_height_uv); ++i) {
816 for (j = 0; j < (dst_width_uv * 2); ++j) {
817 int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
818 dst_uv_opt[(i * dst_stride_uv) + j]);
819 if (abs_diff > max_diff) {
820 max_diff = abs_diff;
821 }
822 }
823 }
824
825 free_aligned_buffer_page_end(dst_y_c);
826 free_aligned_buffer_page_end(dst_uv_c);
827 free_aligned_buffer_page_end(dst_y_opt);
828 free_aligned_buffer_page_end(dst_uv_opt);
829 free_aligned_buffer_page_end(src_y);
830 free_aligned_buffer_page_end(src_uv);
831
832 return max_diff;
833 }
834
835 // The following adjustments in dimensions ensure the scale factor will be
836 // exactly achieved.
837 // 2 is chroma subsample.
838 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
839 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
840
841 #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
842 TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
843 int diff = I420TestFilter( \
844 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
845 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
846 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
847 benchmark_cpu_info_); \
848 EXPECT_LE(diff, max_diff); \
849 } \
850 TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
851 int diff = I444TestFilter( \
852 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
853 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
854 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
855 benchmark_cpu_info_); \
856 EXPECT_LE(diff, max_diff); \
857 } \
858 TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \
859 int diff = I420TestFilter_12( \
860 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
861 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
862 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
863 benchmark_cpu_info_); \
864 EXPECT_LE(diff, max_diff); \
865 } \
866 TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \
867 int diff = I444TestFilter_12( \
868 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
869 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
870 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
871 benchmark_cpu_info_); \
872 EXPECT_LE(diff, max_diff); \
873 } \
874 TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \
875 int diff = NV12TestFilter( \
876 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
877 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
878 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
879 benchmark_cpu_info_); \
880 EXPECT_LE(diff, max_diff); \
881 }
882
883 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
884 // filtering is different fixed point implementations for SSSE3, Neon and C.
885 #ifdef ENABLE_SLOW_TESTS
886 #define TEST_FACTOR(name, nom, denom, boxdiff) \
887 TEST_FACTOR1(, name, None, nom, denom, 0) \
888 TEST_FACTOR1(, name, Linear, nom, denom, 3) \
889 TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
890 TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
891 #else
892 #define TEST_FACTOR(name, nom, denom, boxdiff) \
893 TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
894 TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
895 TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
896 TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
897 #endif
898
899 TEST_FACTOR(2, 1, 2, 0)
900 TEST_FACTOR(4, 1, 4, 0)
901 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
902 TEST_FACTOR(3by4, 3, 4, 1)
903 TEST_FACTOR(3by8, 3, 8, 1)
904 TEST_FACTOR(3, 1, 3, 0)
905 #undef TEST_FACTOR1
906 #undef TEST_FACTOR
907 #undef SX
908 #undef DX
909
910 #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
911 TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
912 int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
913 height, kFilter##filter, benchmark_iterations_, \
914 disable_cpu_flags_, benchmark_cpu_info_); \
915 EXPECT_LE(diff, max_diff); \
916 } \
917 TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
918 int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
919 height, kFilter##filter, benchmark_iterations_, \
920 disable_cpu_flags_, benchmark_cpu_info_); \
921 EXPECT_LE(diff, max_diff); \
922 } \
923 TEST_F(LibYUVScaleTest, \
924 DISABLED_##I420##name##To##width##x##height##_##filter##_12) { \
925 int diff = I420TestFilter_12( \
926 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
927 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
928 EXPECT_LE(diff, max_diff); \
929 } \
930 TEST_F(LibYUVScaleTest, \
931 DISABLED_##I444##name##To##width##x##height##_##filter##_12) { \
932 int diff = I444TestFilter_12( \
933 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
934 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
935 EXPECT_LE(diff, max_diff); \
936 } \
937 TEST_F(LibYUVScaleTest, \
938 DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
939 int diff = I420TestFilter_16( \
940 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
941 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
942 EXPECT_LE(diff, max_diff); \
943 } \
944 TEST_F(LibYUVScaleTest, \
945 DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
946 int diff = I444TestFilter_16( \
947 benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
948 benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
949 EXPECT_LE(diff, max_diff); \
950 } \
951 TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \
952 int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \
953 height, kFilter##filter, benchmark_iterations_, \
954 disable_cpu_flags_, benchmark_cpu_info_); \
955 EXPECT_LE(diff, max_diff); \
956 } \
957 TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
958 int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
959 Abs(benchmark_height_), kFilter##filter, \
960 benchmark_iterations_, disable_cpu_flags_, \
961 benchmark_cpu_info_); \
962 EXPECT_LE(diff, max_diff); \
963 } \
964 TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
965 int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
966 Abs(benchmark_height_), kFilter##filter, \
967 benchmark_iterations_, disable_cpu_flags_, \
968 benchmark_cpu_info_); \
969 EXPECT_LE(diff, max_diff); \
970 } \
971 TEST_F(LibYUVScaleTest, \
972 DISABLED_##I420##name##From##width##x##height##_##filter##_12) { \
973 int diff = I420TestFilter_12(width, height, Abs(benchmark_width_), \
974 Abs(benchmark_height_), kFilter##filter, \
975 benchmark_iterations_, disable_cpu_flags_, \
976 benchmark_cpu_info_); \
977 EXPECT_LE(diff, max_diff); \
978 } \
979 TEST_F(LibYUVScaleTest, \
980 DISABLED_##I444##name##From##width##x##height##_##filter##_12) { \
981 int diff = I444TestFilter_12(width, height, Abs(benchmark_width_), \
982 Abs(benchmark_height_), kFilter##filter, \
983 benchmark_iterations_, disable_cpu_flags_, \
984 benchmark_cpu_info_); \
985 EXPECT_LE(diff, max_diff); \
986 } \
987 TEST_F(LibYUVScaleTest, \
988 DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
989 int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
990 Abs(benchmark_height_), kFilter##filter, \
991 benchmark_iterations_, disable_cpu_flags_, \
992 benchmark_cpu_info_); \
993 EXPECT_LE(diff, max_diff); \
994 } \
995 TEST_F(LibYUVScaleTest, \
996 DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
997 int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
998 Abs(benchmark_height_), kFilter##filter, \
999 benchmark_iterations_, disable_cpu_flags_, \
1000 benchmark_cpu_info_); \
1001 EXPECT_LE(diff, max_diff); \
1002 } \
1003 TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \
1004 int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \
1005 Abs(benchmark_height_), kFilter##filter, \
1006 benchmark_iterations_, disable_cpu_flags_, \
1007 benchmark_cpu_info_); \
1008 EXPECT_LE(diff, max_diff); \
1009 }
1010
1011 #ifdef ENABLE_SLOW_TESTS
1012 // Test scale to a specified size with all 4 filters.
1013 #define TEST_SCALETO(name, width, height) \
1014 TEST_SCALETO1(, name, width, height, None, 0) \
1015 TEST_SCALETO1(, name, width, height, Linear, 3) \
1016 TEST_SCALETO1(, name, width, height, Bilinear, 3) \
1017 TEST_SCALETO1(, name, width, height, Box, 3)
1018 #else
1019 // Test scale to a specified size with all 4 filters.
1020 #define TEST_SCALETO(name, width, height) \
1021 TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
1022 TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
1023 TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
1024 TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
1025 #endif
1026
1027 TEST_SCALETO(Scale, 1, 1)
1028 TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
1029 TEST_SCALETO(Scale, 320, 240)
1030 TEST_SCALETO(Scale, 569, 480)
1031 TEST_SCALETO(Scale, 640, 360)
1032 TEST_SCALETO(Scale, 1280, 720)
1033 #ifdef ENABLE_SLOW_TESTS
1034 TEST_SCALETO(Scale, 1920, 1080)
1035 #endif // ENABLE_SLOW_TESTS
1036 #undef TEST_SCALETO1
1037 #undef TEST_SCALETO
1038
1039 #define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff) \
1040 TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) { \
1041 int diff = I420TestFilter(benchmark_width_, benchmark_height_, \
1042 benchmark_height_, benchmark_width_, \
1043 kFilter##filter, benchmark_iterations_, \
1044 disable_cpu_flags_, benchmark_cpu_info_); \
1045 EXPECT_LE(diff, max_diff); \
1046 } \
1047 TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \
1048 int diff = I444TestFilter(benchmark_width_, benchmark_height_, \
1049 benchmark_height_, benchmark_width_, \
1050 kFilter##filter, benchmark_iterations_, \
1051 disable_cpu_flags_, benchmark_cpu_info_); \
1052 EXPECT_LE(diff, max_diff); \
1053 } \
1054 TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) { \
1055 int diff = I420TestFilter_12(benchmark_width_, benchmark_height_, \
1056 benchmark_height_, benchmark_width_, \
1057 kFilter##filter, benchmark_iterations_, \
1058 disable_cpu_flags_, benchmark_cpu_info_); \
1059 EXPECT_LE(diff, max_diff); \
1060 } \
1061 TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) { \
1062 int diff = I444TestFilter_12(benchmark_width_, benchmark_height_, \
1063 benchmark_height_, benchmark_width_, \
1064 kFilter##filter, benchmark_iterations_, \
1065 disable_cpu_flags_, benchmark_cpu_info_); \
1066 EXPECT_LE(diff, max_diff); \
1067 } \
1068 TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \
1069 int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \
1070 benchmark_height_, benchmark_width_, \
1071 kFilter##filter, benchmark_iterations_, \
1072 disable_cpu_flags_, benchmark_cpu_info_); \
1073 EXPECT_LE(diff, max_diff); \
1074 } \
1075 TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \
1076 int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \
1077 benchmark_height_, benchmark_width_, \
1078 kFilter##filter, benchmark_iterations_, \
1079 disable_cpu_flags_, benchmark_cpu_info_); \
1080 EXPECT_LE(diff, max_diff); \
1081 } \
1082 TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \
1083 int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \
1084 benchmark_height_, benchmark_width_, \
1085 kFilter##filter, benchmark_iterations_, \
1086 disable_cpu_flags_, benchmark_cpu_info_); \
1087 EXPECT_LE(diff, max_diff); \
1088 }
1089
1090 // Test scale to a specified size with all 4 filters.
1091 #ifdef ENABLE_SLOW_TESTS
1092 TEST_SCALESWAPXY1(, Scale, None, 0)
1093 TEST_SCALESWAPXY1(, Scale, Linear, 3)
1094 TEST_SCALESWAPXY1(, Scale, Bilinear, 3)
1095 TEST_SCALESWAPXY1(, Scale, Box, 3)
1096 #else
1097 TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0)
1098 TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3)
1099 TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
1100 TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
1101 #endif
1102
1103 #undef TEST_SCALESWAPXY1
1104
1105 #ifdef ENABLE_ROW_TESTS
1106 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)1107 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
1108 SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
1109 SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
1110 SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
1111 memset(orig_pixels, 0, sizeof(orig_pixels));
1112 memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
1113 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
1114
1115 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
1116 if (!has_ssse3) {
1117 printf("Warning SSSE3 not detected; Skipping test.\n");
1118 } else {
1119 // TL.
1120 orig_pixels[0] = 255u;
1121 orig_pixels[1] = 0u;
1122 orig_pixels[128 + 0] = 0u;
1123 orig_pixels[128 + 1] = 0u;
1124 // TR.
1125 orig_pixels[2] = 0u;
1126 orig_pixels[3] = 100u;
1127 orig_pixels[128 + 2] = 0u;
1128 orig_pixels[128 + 3] = 0u;
1129 // BL.
1130 orig_pixels[4] = 0u;
1131 orig_pixels[5] = 0u;
1132 orig_pixels[128 + 4] = 50u;
1133 orig_pixels[128 + 5] = 0u;
1134 // BR.
1135 orig_pixels[6] = 0u;
1136 orig_pixels[7] = 0u;
1137 orig_pixels[128 + 6] = 0u;
1138 orig_pixels[128 + 7] = 20u;
1139 // Odd.
1140 orig_pixels[126] = 4u;
1141 orig_pixels[127] = 255u;
1142 orig_pixels[128 + 126] = 16u;
1143 orig_pixels[128 + 127] = 255u;
1144
1145 // Test regular half size.
1146 ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
1147
1148 EXPECT_EQ(64u, dst_pixels_c[0]);
1149 EXPECT_EQ(25u, dst_pixels_c[1]);
1150 EXPECT_EQ(13u, dst_pixels_c[2]);
1151 EXPECT_EQ(5u, dst_pixels_c[3]);
1152 EXPECT_EQ(0u, dst_pixels_c[4]);
1153 EXPECT_EQ(133u, dst_pixels_c[63]);
1154
1155 // Test Odd width version - Last pixel is just 1 horizontal pixel.
1156 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
1157
1158 EXPECT_EQ(64u, dst_pixels_c[0]);
1159 EXPECT_EQ(25u, dst_pixels_c[1]);
1160 EXPECT_EQ(13u, dst_pixels_c[2]);
1161 EXPECT_EQ(5u, dst_pixels_c[3]);
1162 EXPECT_EQ(0u, dst_pixels_c[4]);
1163 EXPECT_EQ(10u, dst_pixels_c[63]);
1164
1165 // Test one pixel less, should skip the last pixel.
1166 memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
1167 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
1168
1169 EXPECT_EQ(64u, dst_pixels_c[0]);
1170 EXPECT_EQ(25u, dst_pixels_c[1]);
1171 EXPECT_EQ(13u, dst_pixels_c[2]);
1172 EXPECT_EQ(5u, dst_pixels_c[3]);
1173 EXPECT_EQ(0u, dst_pixels_c[4]);
1174 EXPECT_EQ(0u, dst_pixels_c[63]);
1175
1176 // Test regular half size SSSE3.
1177 ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
1178
1179 EXPECT_EQ(64u, dst_pixels_opt[0]);
1180 EXPECT_EQ(25u, dst_pixels_opt[1]);
1181 EXPECT_EQ(13u, dst_pixels_opt[2]);
1182 EXPECT_EQ(5u, dst_pixels_opt[3]);
1183 EXPECT_EQ(0u, dst_pixels_opt[4]);
1184 EXPECT_EQ(133u, dst_pixels_opt[63]);
1185
1186 // Compare C and SSSE3 match.
1187 ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
1188 ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
1189 for (int i = 0; i < 64; ++i) {
1190 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
1191 }
1192 }
1193 }
1194 #endif // HAS_SCALEROWDOWN2_SSSE3
1195
1196 extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
1197 ptrdiff_t src_stride,
1198 uint16_t* dst,
1199 int dst_width);
1200 extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
1201 ptrdiff_t src_stride,
1202 uint16_t* dst,
1203 int dst_width);
1204 extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
1205 ptrdiff_t src_stride,
1206 uint16_t* dst,
1207 int dst_width);
1208
TEST_F(LibYUVScaleTest,TestScaleRowUp2_16)1209 TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
1210 SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
1211 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
1212 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
1213
1214 memset(orig_pixels, 0, sizeof(orig_pixels));
1215 memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
1216 memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
1217
1218 for (int i = 0; i < 640 * 2 + 1; ++i) {
1219 orig_pixels[i] = i;
1220 }
1221 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
1222 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1223 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
1224 int has_neon = TestCpuFlag(kCpuHasNEON);
1225 if (has_neon) {
1226 ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1227 } else {
1228 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1229 }
1230 #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
1231 int has_mmi = TestCpuFlag(kCpuHasMMI);
1232 if (has_mmi) {
1233 ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1234 } else {
1235 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1236 }
1237 #else
1238 ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1239 #endif
1240 }
1241
1242 for (int i = 0; i < 1280; ++i) {
1243 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
1244 }
1245 EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
1246 EXPECT_EQ(dst_pixels_c[1279], 800);
1247 }
1248
1249 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
1250 ptrdiff_t src_stride,
1251 uint16_t* dst,
1252 int dst_width);
1253
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)1254 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
1255 SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
1256 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
1257 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
1258
1259 memset(orig_pixels, 0, sizeof(orig_pixels));
1260 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
1261 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
1262
1263 for (int i = 0; i < 2560 * 2; ++i) {
1264 orig_pixels[i] = i;
1265 }
1266 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
1267 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1268 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
1269 int has_neon = TestCpuFlag(kCpuHasNEON);
1270 if (has_neon) {
1271 ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
1272 } else {
1273 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
1274 }
1275 #else
1276 ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
1277 #endif
1278 }
1279
1280 for (int i = 0; i < 1280; ++i) {
1281 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
1282 }
1283
1284 EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
1285 EXPECT_EQ(dst_pixels_c[1279], 3839);
1286 }
1287 #endif // ENABLE_ROW_TESTS
1288
1289 // Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
1290 // difference.
1291 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)1292 static int TestPlaneFilter_16(int src_width,
1293 int src_height,
1294 int dst_width,
1295 int dst_height,
1296 FilterMode f,
1297 int benchmark_iterations,
1298 int disable_cpu_flags,
1299 int benchmark_cpu_info) {
1300 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
1301 return 0;
1302 }
1303
1304 int i;
1305 int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
1306 int src_stride_y = Abs(src_width);
1307 int dst_y_plane_size = dst_width * dst_height;
1308 int dst_stride_y = dst_width;
1309
1310 align_buffer_page_end(src_y, src_y_plane_size);
1311 align_buffer_page_end(src_y_16, src_y_plane_size * 2);
1312 align_buffer_page_end(dst_y_8, dst_y_plane_size);
1313 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
1314 uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
1315 uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
1316
1317 MemRandomize(src_y, src_y_plane_size);
1318 memset(dst_y_8, 0, dst_y_plane_size);
1319 memset(dst_y_16, 1, dst_y_plane_size * 2);
1320
1321 for (i = 0; i < src_y_plane_size; ++i) {
1322 p_src_y_16[i] = src_y[i] & 255;
1323 }
1324
1325 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
1326 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
1327 dst_width, dst_height, f);
1328 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
1329
1330 for (i = 0; i < benchmark_iterations; ++i) {
1331 ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
1332 dst_stride_y, dst_width, dst_height, f);
1333 }
1334
1335 // Expect an exact match.
1336 int max_diff = 0;
1337 for (i = 0; i < dst_y_plane_size; ++i) {
1338 int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
1339 if (abs_diff > max_diff) {
1340 max_diff = abs_diff;
1341 }
1342 }
1343
1344 free_aligned_buffer_page_end(dst_y_8);
1345 free_aligned_buffer_page_end(dst_y_16);
1346 free_aligned_buffer_page_end(src_y);
1347 free_aligned_buffer_page_end(src_y_16);
1348
1349 return max_diff;
1350 }
1351
1352 // The following adjustments in dimensions ensure the scale factor will be
1353 // exactly achieved.
1354 // 2 is chroma subsample.
1355 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
1356 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
1357
1358 #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
1359 TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
1360 int diff = TestPlaneFilter_16( \
1361 SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
1362 DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
1363 kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
1364 benchmark_cpu_info_); \
1365 EXPECT_LE(diff, max_diff); \
1366 }
1367
1368 // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
1369 // filtering is different fixed point implementations for SSSE3, Neon and C.
1370 #define TEST_FACTOR(name, nom, denom, boxdiff) \
1371 TEST_FACTOR1(name, None, nom, denom, 0) \
1372 TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
1373 TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
1374 TEST_FACTOR1(name, Box, nom, denom, boxdiff)
1375
1376 TEST_FACTOR(2, 1, 2, 0)
1377 TEST_FACTOR(4, 1, 4, 0)
1378 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
1379 TEST_FACTOR(3by4, 3, 4, 1)
1380 TEST_FACTOR(3by8, 3, 8, 1)
1381 TEST_FACTOR(3, 1, 3, 0)
1382 #undef TEST_FACTOR1
1383 #undef TEST_FACTOR
1384 #undef SX
1385 #undef DX
1386
TEST_F(LibYUVScaleTest,PlaneTest3x)1387 TEST_F(LibYUVScaleTest, PlaneTest3x) {
1388 const int kSrcStride = 48;
1389 const int kDstStride = 16;
1390 const int kSize = kSrcStride * 3;
1391 align_buffer_page_end(orig_pixels, kSize);
1392 for (int i = 0; i < 48 * 3; ++i) {
1393 orig_pixels[i] = i;
1394 }
1395 align_buffer_page_end(dest_pixels, kDstStride);
1396
1397 int iterations16 =
1398 benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
1399 for (int i = 0; i < iterations16; ++i) {
1400 ScalePlane(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
1401 kFilterBilinear);
1402 }
1403
1404 EXPECT_EQ(49, dest_pixels[0]);
1405
1406 ScalePlane(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
1407 kFilterNone);
1408
1409 EXPECT_EQ(49, dest_pixels[0]);
1410
1411 free_aligned_buffer_page_end(dest_pixels);
1412 free_aligned_buffer_page_end(orig_pixels);
1413 }
1414
TEST_F(LibYUVScaleTest,PlaneTest4x)1415 TEST_F(LibYUVScaleTest, PlaneTest4x) {
1416 const int kSrcStride = 64;
1417 const int kDstStride = 16;
1418 const int kSize = kSrcStride * 4;
1419 align_buffer_page_end(orig_pixels, kSize);
1420 for (int i = 0; i < 64 * 4; ++i) {
1421 orig_pixels[i] = i;
1422 }
1423 align_buffer_page_end(dest_pixels, kDstStride);
1424
1425 int iterations16 =
1426 benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
1427 for (int i = 0; i < iterations16; ++i) {
1428 ScalePlane(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
1429 kFilterBilinear);
1430 }
1431
1432 EXPECT_EQ((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0]);
1433
1434 ScalePlane(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
1435 kFilterNone);
1436
1437 EXPECT_EQ(130, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
1438
1439 free_aligned_buffer_page_end(dest_pixels);
1440 free_aligned_buffer_page_end(orig_pixels);
1441 }
1442
1443 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_None)1444 TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
1445 const int kSize = benchmark_width_ * benchmark_height_;
1446 align_buffer_page_end(orig_pixels, kSize);
1447 for (int i = 0; i < kSize; ++i) {
1448 orig_pixels[i] = i;
1449 }
1450 align_buffer_page_end(dest_opt_pixels, kSize);
1451 align_buffer_page_end(dest_c_pixels, kSize);
1452
1453 MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
1454 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
1455 dest_c_pixels, benchmark_height_, benchmark_height_,
1456 benchmark_width_, kFilterNone);
1457 MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
1458
1459 for (int i = 0; i < benchmark_iterations_; ++i) {
1460 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
1461 benchmark_height_, dest_opt_pixels, benchmark_height_,
1462 benchmark_height_, benchmark_width_, kFilterNone);
1463 }
1464
1465 for (int i = 0; i < kSize; ++i) {
1466 EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
1467 }
1468
1469 free_aligned_buffer_page_end(dest_c_pixels);
1470 free_aligned_buffer_page_end(dest_opt_pixels);
1471 free_aligned_buffer_page_end(orig_pixels);
1472 }
1473
TEST_F(LibYUVScaleTest,PlaneTestRotate_Bilinear)1474 TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
1475 const int kSize = benchmark_width_ * benchmark_height_;
1476 align_buffer_page_end(orig_pixels, kSize);
1477 for (int i = 0; i < kSize; ++i) {
1478 orig_pixels[i] = i;
1479 }
1480 align_buffer_page_end(dest_opt_pixels, kSize);
1481 align_buffer_page_end(dest_c_pixels, kSize);
1482
1483 MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
1484 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
1485 dest_c_pixels, benchmark_height_, benchmark_height_,
1486 benchmark_width_, kFilterBilinear);
1487 MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
1488
1489 for (int i = 0; i < benchmark_iterations_; ++i) {
1490 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
1491 benchmark_height_, dest_opt_pixels, benchmark_height_,
1492 benchmark_height_, benchmark_width_, kFilterBilinear);
1493 }
1494
1495 for (int i = 0; i < kSize; ++i) {
1496 EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
1497 }
1498
1499 free_aligned_buffer_page_end(dest_c_pixels);
1500 free_aligned_buffer_page_end(dest_opt_pixels);
1501 free_aligned_buffer_page_end(orig_pixels);
1502 }
1503
1504 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_Box)1505 TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
1506 const int kSize = benchmark_width_ * benchmark_height_;
1507 align_buffer_page_end(orig_pixels, kSize);
1508 for (int i = 0; i < kSize; ++i) {
1509 orig_pixels[i] = i;
1510 }
1511 align_buffer_page_end(dest_opt_pixels, kSize);
1512 align_buffer_page_end(dest_c_pixels, kSize);
1513
1514 MaskCpuFlags(disable_cpu_flags_); // Disable all CPU optimization.
1515 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
1516 dest_c_pixels, benchmark_height_, benchmark_height_,
1517 benchmark_width_, kFilterBox);
1518 MaskCpuFlags(benchmark_cpu_info_); // Enable all CPU optimization.
1519
1520 for (int i = 0; i < benchmark_iterations_; ++i) {
1521 ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
1522 benchmark_height_, dest_opt_pixels, benchmark_height_,
1523 benchmark_height_, benchmark_width_, kFilterBox);
1524 }
1525
1526 for (int i = 0; i < kSize; ++i) {
1527 EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
1528 }
1529
1530 free_aligned_buffer_page_end(dest_c_pixels);
1531 free_aligned_buffer_page_end(dest_opt_pixels);
1532 free_aligned_buffer_page_end(orig_pixels);
1533 }
1534
1535 } // namespace libyuv
1536