1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17 
18 #ifdef ENABLE_ROW_TESTS
19 #include "libyuv/scale_row.h"  // For ScaleRowDown2Box_Odd_C
20 #endif
21 
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24 
25 namespace libyuv {
26 
27 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)28 static int I420TestFilter(int src_width,
29                           int src_height,
30                           int dst_width,
31                           int dst_height,
32                           FilterMode f,
33                           int benchmark_iterations,
34                           int disable_cpu_flags,
35                           int benchmark_cpu_info) {
36   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
37     return 0;
38   }
39 
40   int i, j;
41   int src_width_uv = (Abs(src_width) + 1) >> 1;
42   int src_height_uv = (Abs(src_height) + 1) >> 1;
43 
44   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
45   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
46 
47   int src_stride_y = Abs(src_width);
48   int src_stride_uv = src_width_uv;
49 
50   align_buffer_page_end(src_y, src_y_plane_size);
51   align_buffer_page_end(src_u, src_uv_plane_size);
52   align_buffer_page_end(src_v, src_uv_plane_size);
53   if (!src_y || !src_u || !src_v) {
54     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
55     return 0;
56   }
57   MemRandomize(src_y, src_y_plane_size);
58   MemRandomize(src_u, src_uv_plane_size);
59   MemRandomize(src_v, src_uv_plane_size);
60 
61   int dst_width_uv = (dst_width + 1) >> 1;
62   int dst_height_uv = (dst_height + 1) >> 1;
63 
64   int64_t dst_y_plane_size = (dst_width) * (dst_height);
65   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
66 
67   int dst_stride_y = dst_width;
68   int dst_stride_uv = dst_width_uv;
69 
70   align_buffer_page_end(dst_y_c, dst_y_plane_size);
71   align_buffer_page_end(dst_u_c, dst_uv_plane_size);
72   align_buffer_page_end(dst_v_c, dst_uv_plane_size);
73   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
74   align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
75   align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
76   if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
77       !dst_v_opt) {
78     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
79     return 0;
80   }
81 
82   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
83   double c_time = get_time();
84   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
85             src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
86             dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
87   c_time = (get_time() - c_time);
88 
89   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
90   double opt_time = get_time();
91   for (i = 0; i < benchmark_iterations; ++i) {
92     I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
93               src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
94               dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
95               f);
96   }
97   opt_time = (get_time() - opt_time) / benchmark_iterations;
98   // Report performance of C vs OPT.
99   printf("filter %d - %8d us C - %8d us OPT\n", f,
100          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
101 
102   // C version may be a little off from the optimized. Order of
103   //  operations may introduce rounding somewhere. So do a difference
104   //  of the buffers and look to see that the max difference is not
105   //  over 3.
106   int max_diff = 0;
107   for (i = 0; i < (dst_height); ++i) {
108     for (j = 0; j < (dst_width); ++j) {
109       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
110                          dst_y_opt[(i * dst_stride_y) + j]);
111       if (abs_diff > max_diff) {
112         max_diff = abs_diff;
113       }
114     }
115   }
116 
117   for (i = 0; i < (dst_height_uv); ++i) {
118     for (j = 0; j < (dst_width_uv); ++j) {
119       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
120                          dst_u_opt[(i * dst_stride_uv) + j]);
121       if (abs_diff > max_diff) {
122         max_diff = abs_diff;
123       }
124       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
125                      dst_v_opt[(i * dst_stride_uv) + j]);
126       if (abs_diff > max_diff) {
127         max_diff = abs_diff;
128       }
129     }
130   }
131 
132   free_aligned_buffer_page_end(dst_y_c);
133   free_aligned_buffer_page_end(dst_u_c);
134   free_aligned_buffer_page_end(dst_v_c);
135   free_aligned_buffer_page_end(dst_y_opt);
136   free_aligned_buffer_page_end(dst_u_opt);
137   free_aligned_buffer_page_end(dst_v_opt);
138   free_aligned_buffer_page_end(src_y);
139   free_aligned_buffer_page_end(src_u);
140   free_aligned_buffer_page_end(src_v);
141 
142   return max_diff;
143 }
144 
145 // Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
146 // 0 = exact.
I420TestFilter_12(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)147 static int I420TestFilter_12(int src_width,
148                              int src_height,
149                              int dst_width,
150                              int dst_height,
151                              FilterMode f,
152                              int benchmark_iterations,
153                              int disable_cpu_flags,
154                              int benchmark_cpu_info) {
155   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156     return 0;
157   }
158 
159   int i;
160   int src_width_uv = (Abs(src_width) + 1) >> 1;
161   int src_height_uv = (Abs(src_height) + 1) >> 1;
162 
163   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
164   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
165 
166   int src_stride_y = Abs(src_width);
167   int src_stride_uv = src_width_uv;
168 
169   align_buffer_page_end(src_y, src_y_plane_size);
170   align_buffer_page_end(src_u, src_uv_plane_size);
171   align_buffer_page_end(src_v, src_uv_plane_size);
172   align_buffer_page_end(src_y_12, src_y_plane_size * 2);
173   align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
174   align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
175   if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
176     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
177     return 0;
178   }
179   uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
180   uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
181   uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
182 
183   MemRandomize(src_y, src_y_plane_size);
184   MemRandomize(src_u, src_uv_plane_size);
185   MemRandomize(src_v, src_uv_plane_size);
186 
187   for (i = 0; i < src_y_plane_size; ++i) {
188     p_src_y_12[i] = src_y[i];
189   }
190   for (i = 0; i < src_uv_plane_size; ++i) {
191     p_src_u_12[i] = src_u[i];
192     p_src_v_12[i] = src_v[i];
193   }
194 
195   int dst_width_uv = (dst_width + 1) >> 1;
196   int dst_height_uv = (dst_height + 1) >> 1;
197 
198   int dst_y_plane_size = (dst_width) * (dst_height);
199   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
200 
201   int dst_stride_y = dst_width;
202   int dst_stride_uv = dst_width_uv;
203 
204   align_buffer_page_end(dst_y_8, dst_y_plane_size);
205   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
206   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
207   align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
208   align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
209   align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
210 
211   uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
212   uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
213   uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
214 
215   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
216   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
217             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
218             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
219   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
220   for (i = 0; i < benchmark_iterations; ++i) {
221     I420Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
222                  p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
223                  dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
224                  dst_stride_uv, dst_width, dst_height, f);
225   }
226 
227   // Expect an exact match.
228   int max_diff = 0;
229   for (i = 0; i < dst_y_plane_size; ++i) {
230     int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
231     if (abs_diff > max_diff) {
232       max_diff = abs_diff;
233     }
234   }
235   for (i = 0; i < dst_uv_plane_size; ++i) {
236     int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
237     if (abs_diff > max_diff) {
238       max_diff = abs_diff;
239     }
240     abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
241     if (abs_diff > max_diff) {
242       max_diff = abs_diff;
243     }
244   }
245 
246   free_aligned_buffer_page_end(dst_y_8);
247   free_aligned_buffer_page_end(dst_u_8);
248   free_aligned_buffer_page_end(dst_v_8);
249   free_aligned_buffer_page_end(dst_y_12);
250   free_aligned_buffer_page_end(dst_u_12);
251   free_aligned_buffer_page_end(dst_v_12);
252   free_aligned_buffer_page_end(src_y);
253   free_aligned_buffer_page_end(src_u);
254   free_aligned_buffer_page_end(src_v);
255   free_aligned_buffer_page_end(src_y_12);
256   free_aligned_buffer_page_end(src_u_12);
257   free_aligned_buffer_page_end(src_v_12);
258 
259   return max_diff;
260 }
261 
262 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
263 // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)264 static int I420TestFilter_16(int src_width,
265                              int src_height,
266                              int dst_width,
267                              int dst_height,
268                              FilterMode f,
269                              int benchmark_iterations,
270                              int disable_cpu_flags,
271                              int benchmark_cpu_info) {
272   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
273     return 0;
274   }
275 
276   int i;
277   int src_width_uv = (Abs(src_width) + 1) >> 1;
278   int src_height_uv = (Abs(src_height) + 1) >> 1;
279 
280   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
281   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
282 
283   int src_stride_y = Abs(src_width);
284   int src_stride_uv = src_width_uv;
285 
286   align_buffer_page_end(src_y, src_y_plane_size);
287   align_buffer_page_end(src_u, src_uv_plane_size);
288   align_buffer_page_end(src_v, src_uv_plane_size);
289   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
290   align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
291   align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
292   if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
293     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
294     return 0;
295   }
296   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
297   uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
298   uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
299 
300   MemRandomize(src_y, src_y_plane_size);
301   MemRandomize(src_u, src_uv_plane_size);
302   MemRandomize(src_v, src_uv_plane_size);
303 
304   for (i = 0; i < src_y_plane_size; ++i) {
305     p_src_y_16[i] = src_y[i];
306   }
307   for (i = 0; i < src_uv_plane_size; ++i) {
308     p_src_u_16[i] = src_u[i];
309     p_src_v_16[i] = src_v[i];
310   }
311 
312   int dst_width_uv = (dst_width + 1) >> 1;
313   int dst_height_uv = (dst_height + 1) >> 1;
314 
315   int dst_y_plane_size = (dst_width) * (dst_height);
316   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
317 
318   int dst_stride_y = dst_width;
319   int dst_stride_uv = dst_width_uv;
320 
321   align_buffer_page_end(dst_y_8, dst_y_plane_size);
322   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
323   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
324   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
325   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
326   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
327 
328   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
329   uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
330   uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
331 
332   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
333   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
334             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
335             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
336   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
337   for (i = 0; i < benchmark_iterations; ++i) {
338     I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
339                  p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
340                  dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
341                  dst_stride_uv, dst_width, dst_height, f);
342   }
343 
344   // Expect an exact match.
345   int max_diff = 0;
346   for (i = 0; i < dst_y_plane_size; ++i) {
347     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
348     if (abs_diff > max_diff) {
349       max_diff = abs_diff;
350     }
351   }
352   for (i = 0; i < dst_uv_plane_size; ++i) {
353     int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
354     if (abs_diff > max_diff) {
355       max_diff = abs_diff;
356     }
357     abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
358     if (abs_diff > max_diff) {
359       max_diff = abs_diff;
360     }
361   }
362 
363   free_aligned_buffer_page_end(dst_y_8);
364   free_aligned_buffer_page_end(dst_u_8);
365   free_aligned_buffer_page_end(dst_v_8);
366   free_aligned_buffer_page_end(dst_y_16);
367   free_aligned_buffer_page_end(dst_u_16);
368   free_aligned_buffer_page_end(dst_v_16);
369   free_aligned_buffer_page_end(src_y);
370   free_aligned_buffer_page_end(src_u);
371   free_aligned_buffer_page_end(src_v);
372   free_aligned_buffer_page_end(src_y_16);
373   free_aligned_buffer_page_end(src_u_16);
374   free_aligned_buffer_page_end(src_v_16);
375 
376   return max_diff;
377 }
378 
379 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)380 static int I444TestFilter(int src_width,
381                           int src_height,
382                           int dst_width,
383                           int dst_height,
384                           FilterMode f,
385                           int benchmark_iterations,
386                           int disable_cpu_flags,
387                           int benchmark_cpu_info) {
388   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
389     return 0;
390   }
391 
392   int i, j;
393   int src_width_uv = Abs(src_width);
394   int src_height_uv = Abs(src_height);
395 
396   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
397   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
398 
399   int src_stride_y = Abs(src_width);
400   int src_stride_uv = src_width_uv;
401 
402   align_buffer_page_end(src_y, src_y_plane_size);
403   align_buffer_page_end(src_u, src_uv_plane_size);
404   align_buffer_page_end(src_v, src_uv_plane_size);
405   if (!src_y || !src_u || !src_v) {
406     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
407     return 0;
408   }
409   MemRandomize(src_y, src_y_plane_size);
410   MemRandomize(src_u, src_uv_plane_size);
411   MemRandomize(src_v, src_uv_plane_size);
412 
413   int dst_width_uv = dst_width;
414   int dst_height_uv = dst_height;
415 
416   int64_t dst_y_plane_size = (dst_width) * (dst_height);
417   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
418 
419   int dst_stride_y = dst_width;
420   int dst_stride_uv = dst_width_uv;
421 
422   align_buffer_page_end(dst_y_c, dst_y_plane_size);
423   align_buffer_page_end(dst_u_c, dst_uv_plane_size);
424   align_buffer_page_end(dst_v_c, dst_uv_plane_size);
425   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
426   align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
427   align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
428   if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
429       !dst_v_opt) {
430     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
431     return 0;
432   }
433 
434   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
435   double c_time = get_time();
436   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
437             src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
438             dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
439   c_time = (get_time() - c_time);
440 
441   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
442   double opt_time = get_time();
443   for (i = 0; i < benchmark_iterations; ++i) {
444     I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
445               src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
446               dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
447               f);
448   }
449   opt_time = (get_time() - opt_time) / benchmark_iterations;
450   // Report performance of C vs OPT.
451   printf("filter %d - %8d us C - %8d us OPT\n", f,
452          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
453 
454   // C version may be a little off from the optimized. Order of
455   //  operations may introduce rounding somewhere. So do a difference
456   //  of the buffers and look to see that the max difference is not
457   //  over 3.
458   int max_diff = 0;
459   for (i = 0; i < (dst_height); ++i) {
460     for (j = 0; j < (dst_width); ++j) {
461       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
462                          dst_y_opt[(i * dst_stride_y) + j]);
463       if (abs_diff > max_diff) {
464         max_diff = abs_diff;
465       }
466     }
467   }
468 
469   for (i = 0; i < (dst_height_uv); ++i) {
470     for (j = 0; j < (dst_width_uv); ++j) {
471       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
472                          dst_u_opt[(i * dst_stride_uv) + j]);
473       if (abs_diff > max_diff) {
474         max_diff = abs_diff;
475       }
476       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
477                      dst_v_opt[(i * dst_stride_uv) + j]);
478       if (abs_diff > max_diff) {
479         max_diff = abs_diff;
480       }
481     }
482   }
483 
484   free_aligned_buffer_page_end(dst_y_c);
485   free_aligned_buffer_page_end(dst_u_c);
486   free_aligned_buffer_page_end(dst_v_c);
487   free_aligned_buffer_page_end(dst_y_opt);
488   free_aligned_buffer_page_end(dst_u_opt);
489   free_aligned_buffer_page_end(dst_v_opt);
490   free_aligned_buffer_page_end(src_y);
491   free_aligned_buffer_page_end(src_u);
492   free_aligned_buffer_page_end(src_v);
493 
494   return max_diff;
495 }
496 
497 // Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
498 // 0 = exact.
I444TestFilter_12(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)499 static int I444TestFilter_12(int src_width,
500                              int src_height,
501                              int dst_width,
502                              int dst_height,
503                              FilterMode f,
504                              int benchmark_iterations,
505                              int disable_cpu_flags,
506                              int benchmark_cpu_info) {
507   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
508     return 0;
509   }
510 
511   int i;
512   int src_width_uv = Abs(src_width);
513   int src_height_uv = Abs(src_height);
514 
515   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
516   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
517 
518   int src_stride_y = Abs(src_width);
519   int src_stride_uv = src_width_uv;
520 
521   align_buffer_page_end(src_y, src_y_plane_size);
522   align_buffer_page_end(src_u, src_uv_plane_size);
523   align_buffer_page_end(src_v, src_uv_plane_size);
524   align_buffer_page_end(src_y_12, src_y_plane_size * 2);
525   align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
526   align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
527   if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
528     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
529     return 0;
530   }
531   uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
532   uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
533   uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
534 
535   MemRandomize(src_y, src_y_plane_size);
536   MemRandomize(src_u, src_uv_plane_size);
537   MemRandomize(src_v, src_uv_plane_size);
538 
539   for (i = 0; i < src_y_plane_size; ++i) {
540     p_src_y_12[i] = src_y[i];
541   }
542   for (i = 0; i < src_uv_plane_size; ++i) {
543     p_src_u_12[i] = src_u[i];
544     p_src_v_12[i] = src_v[i];
545   }
546 
547   int dst_width_uv = dst_width;
548   int dst_height_uv = dst_height;
549 
550   int dst_y_plane_size = (dst_width) * (dst_height);
551   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
552 
553   int dst_stride_y = dst_width;
554   int dst_stride_uv = dst_width_uv;
555 
556   align_buffer_page_end(dst_y_8, dst_y_plane_size);
557   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
558   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
559   align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
560   align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
561   align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
562 
563   uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
564   uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
565   uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
566 
567   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
568   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
569             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
570             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
571   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
572   for (i = 0; i < benchmark_iterations; ++i) {
573     I444Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
574                  p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
575                  dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
576                  dst_stride_uv, dst_width, dst_height, f);
577   }
578 
579   // Expect an exact match.
580   int max_diff = 0;
581   for (i = 0; i < dst_y_plane_size; ++i) {
582     int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
583     if (abs_diff > max_diff) {
584       max_diff = abs_diff;
585     }
586   }
587   for (i = 0; i < dst_uv_plane_size; ++i) {
588     int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
589     if (abs_diff > max_diff) {
590       max_diff = abs_diff;
591     }
592     abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
593     if (abs_diff > max_diff) {
594       max_diff = abs_diff;
595     }
596   }
597 
598   free_aligned_buffer_page_end(dst_y_8);
599   free_aligned_buffer_page_end(dst_u_8);
600   free_aligned_buffer_page_end(dst_v_8);
601   free_aligned_buffer_page_end(dst_y_12);
602   free_aligned_buffer_page_end(dst_u_12);
603   free_aligned_buffer_page_end(dst_v_12);
604   free_aligned_buffer_page_end(src_y);
605   free_aligned_buffer_page_end(src_u);
606   free_aligned_buffer_page_end(src_v);
607   free_aligned_buffer_page_end(src_y_12);
608   free_aligned_buffer_page_end(src_u_12);
609   free_aligned_buffer_page_end(src_v_12);
610 
611   return max_diff;
612 }
613 
614 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
615 // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)616 static int I444TestFilter_16(int src_width,
617                              int src_height,
618                              int dst_width,
619                              int dst_height,
620                              FilterMode f,
621                              int benchmark_iterations,
622                              int disable_cpu_flags,
623                              int benchmark_cpu_info) {
624   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
625     return 0;
626   }
627 
628   int i;
629   int src_width_uv = Abs(src_width);
630   int src_height_uv = Abs(src_height);
631 
632   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
633   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
634 
635   int src_stride_y = Abs(src_width);
636   int src_stride_uv = src_width_uv;
637 
638   align_buffer_page_end(src_y, src_y_plane_size);
639   align_buffer_page_end(src_u, src_uv_plane_size);
640   align_buffer_page_end(src_v, src_uv_plane_size);
641   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
642   align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
643   align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
644   if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
645     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
646     return 0;
647   }
648   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
649   uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
650   uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
651 
652   MemRandomize(src_y, src_y_plane_size);
653   MemRandomize(src_u, src_uv_plane_size);
654   MemRandomize(src_v, src_uv_plane_size);
655 
656   for (i = 0; i < src_y_plane_size; ++i) {
657     p_src_y_16[i] = src_y[i];
658   }
659   for (i = 0; i < src_uv_plane_size; ++i) {
660     p_src_u_16[i] = src_u[i];
661     p_src_v_16[i] = src_v[i];
662   }
663 
664   int dst_width_uv = dst_width;
665   int dst_height_uv = dst_height;
666 
667   int dst_y_plane_size = (dst_width) * (dst_height);
668   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
669 
670   int dst_stride_y = dst_width;
671   int dst_stride_uv = dst_width_uv;
672 
673   align_buffer_page_end(dst_y_8, dst_y_plane_size);
674   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
675   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
676   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
677   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
678   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
679 
680   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
681   uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
682   uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
683 
684   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
685   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
686             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
687             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
688   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
689   for (i = 0; i < benchmark_iterations; ++i) {
690     I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
691                  p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
692                  dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
693                  dst_stride_uv, dst_width, dst_height, f);
694   }
695 
696   // Expect an exact match.
697   int max_diff = 0;
698   for (i = 0; i < dst_y_plane_size; ++i) {
699     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
700     if (abs_diff > max_diff) {
701       max_diff = abs_diff;
702     }
703   }
704   for (i = 0; i < dst_uv_plane_size; ++i) {
705     int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
706     if (abs_diff > max_diff) {
707       max_diff = abs_diff;
708     }
709     abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
710     if (abs_diff > max_diff) {
711       max_diff = abs_diff;
712     }
713   }
714 
715   free_aligned_buffer_page_end(dst_y_8);
716   free_aligned_buffer_page_end(dst_u_8);
717   free_aligned_buffer_page_end(dst_v_8);
718   free_aligned_buffer_page_end(dst_y_16);
719   free_aligned_buffer_page_end(dst_u_16);
720   free_aligned_buffer_page_end(dst_v_16);
721   free_aligned_buffer_page_end(src_y);
722   free_aligned_buffer_page_end(src_u);
723   free_aligned_buffer_page_end(src_v);
724   free_aligned_buffer_page_end(src_y_16);
725   free_aligned_buffer_page_end(src_u_16);
726   free_aligned_buffer_page_end(src_v_16);
727 
728   return max_diff;
729 }
730 
731 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
NV12TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)732 static int NV12TestFilter(int src_width,
733                           int src_height,
734                           int dst_width,
735                           int dst_height,
736                           FilterMode f,
737                           int benchmark_iterations,
738                           int disable_cpu_flags,
739                           int benchmark_cpu_info) {
740   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
741     return 0;
742   }
743 
744   int i, j;
745   int src_width_uv = (Abs(src_width) + 1) >> 1;
746   int src_height_uv = (Abs(src_height) + 1) >> 1;
747 
748   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
749   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
750 
751   int src_stride_y = Abs(src_width);
752   int src_stride_uv = src_width_uv * 2;
753 
754   align_buffer_page_end(src_y, src_y_plane_size);
755   align_buffer_page_end(src_uv, src_uv_plane_size);
756   if (!src_y || !src_uv) {
757     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
758     return 0;
759   }
760   MemRandomize(src_y, src_y_plane_size);
761   MemRandomize(src_uv, src_uv_plane_size);
762 
763   int dst_width_uv = (dst_width + 1) >> 1;
764   int dst_height_uv = (dst_height + 1) >> 1;
765 
766   int64_t dst_y_plane_size = (dst_width) * (dst_height);
767   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
768 
769   int dst_stride_y = dst_width;
770   int dst_stride_uv = dst_width_uv * 2;
771 
772   align_buffer_page_end(dst_y_c, dst_y_plane_size);
773   align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
774   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
775   align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
776   if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
777     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
778     return 0;
779   }
780 
781   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
782   double c_time = get_time();
783   NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
784             dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width,
785             dst_height, f);
786   c_time = (get_time() - c_time);
787 
788   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
789   double opt_time = get_time();
790   for (i = 0; i < benchmark_iterations; ++i) {
791     NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
792               dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width,
793               dst_height, f);
794   }
795   opt_time = (get_time() - opt_time) / benchmark_iterations;
796   // Report performance of C vs OPT.
797   printf("filter %d - %8d us C - %8d us OPT\n", f,
798          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
799 
800   // C version may be a little off from the optimized. Order of
801   //  operations may introduce rounding somewhere. So do a difference
802   //  of the buffers and look to see that the max difference is not
803   //  over 3.
804   int max_diff = 0;
805   for (i = 0; i < (dst_height); ++i) {
806     for (j = 0; j < (dst_width); ++j) {
807       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
808                          dst_y_opt[(i * dst_stride_y) + j]);
809       if (abs_diff > max_diff) {
810         max_diff = abs_diff;
811       }
812     }
813   }
814 
815   for (i = 0; i < (dst_height_uv); ++i) {
816     for (j = 0; j < (dst_width_uv * 2); ++j) {
817       int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
818                          dst_uv_opt[(i * dst_stride_uv) + j]);
819       if (abs_diff > max_diff) {
820         max_diff = abs_diff;
821       }
822     }
823   }
824 
825   free_aligned_buffer_page_end(dst_y_c);
826   free_aligned_buffer_page_end(dst_uv_c);
827   free_aligned_buffer_page_end(dst_y_opt);
828   free_aligned_buffer_page_end(dst_uv_opt);
829   free_aligned_buffer_page_end(src_y);
830   free_aligned_buffer_page_end(src_uv);
831 
832   return max_diff;
833 }
834 
835 // The following adjustments in dimensions ensure the scale factor will be
836 // exactly achieved.
837 // 2 is chroma subsample.
838 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
839 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
840 
841 #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff)           \
842   TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) {                 \
843     int diff = I420TestFilter(                                                \
844         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
845         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
846         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
847         benchmark_cpu_info_);                                                 \
848     EXPECT_LE(diff, max_diff);                                                \
849   }                                                                           \
850   TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) {                 \
851     int diff = I444TestFilter(                                                \
852         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
853         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
854         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
855         benchmark_cpu_info_);                                                 \
856     EXPECT_LE(diff, max_diff);                                                \
857   }                                                                           \
858   TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \
859     int diff = I420TestFilter_12(                                             \
860         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
861         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
862         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
863         benchmark_cpu_info_);                                                 \
864     EXPECT_LE(diff, max_diff);                                                \
865   }                                                                           \
866   TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \
867     int diff = I444TestFilter_12(                                             \
868         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
869         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
870         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
871         benchmark_cpu_info_);                                                 \
872     EXPECT_LE(diff, max_diff);                                                \
873   }                                                                           \
874   TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) {                 \
875     int diff = NV12TestFilter(                                                \
876         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
877         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
878         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
879         benchmark_cpu_info_);                                                 \
880     EXPECT_LE(diff, max_diff);                                                \
881   }
882 
883 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
884 // filtering is different fixed point implementations for SSSE3, Neon and C.
885 #ifdef ENABLE_SLOW_TESTS
886 #define TEST_FACTOR(name, nom, denom, boxdiff)  \
887   TEST_FACTOR1(, name, None, nom, denom, 0)     \
888   TEST_FACTOR1(, name, Linear, nom, denom, 3)   \
889   TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
890   TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
891 #else
892 #define TEST_FACTOR(name, nom, denom, boxdiff)           \
893   TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0)     \
894   TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3)   \
895   TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
896   TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
897 #endif
898 
899 TEST_FACTOR(2, 1, 2, 0)
900 TEST_FACTOR(4, 1, 4, 0)
901 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
902 TEST_FACTOR(3by4, 3, 4, 1)
903 TEST_FACTOR(3by8, 3, 8, 1)
904 TEST_FACTOR(3, 1, 3, 0)
905 #undef TEST_FACTOR1
906 #undef TEST_FACTOR
907 #undef SX
908 #undef DX
909 
910 #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff)       \
911   TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) {      \
912     int diff = I420TestFilter(benchmark_width_, benchmark_height_, width,     \
913                               height, kFilter##filter, benchmark_iterations_, \
914                               disable_cpu_flags_, benchmark_cpu_info_);       \
915     EXPECT_LE(diff, max_diff);                                                \
916   }                                                                           \
917   TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) {      \
918     int diff = I444TestFilter(benchmark_width_, benchmark_height_, width,     \
919                               height, kFilter##filter, benchmark_iterations_, \
920                               disable_cpu_flags_, benchmark_cpu_info_);       \
921     EXPECT_LE(diff, max_diff);                                                \
922   }                                                                           \
923   TEST_F(LibYUVScaleTest,                                                     \
924          DISABLED_##I420##name##To##width##x##height##_##filter##_12) {       \
925     int diff = I420TestFilter_12(                                             \
926         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
927         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
928     EXPECT_LE(diff, max_diff);                                                \
929   }                                                                           \
930   TEST_F(LibYUVScaleTest,                                                     \
931          DISABLED_##I444##name##To##width##x##height##_##filter##_12) {       \
932     int diff = I444TestFilter_12(                                             \
933         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
934         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
935     EXPECT_LE(diff, max_diff);                                                \
936   }                                                                           \
937   TEST_F(LibYUVScaleTest,                                                     \
938          DISABLED_##I420##name##To##width##x##height##_##filter##_16) {       \
939     int diff = I420TestFilter_16(                                             \
940         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
941         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
942     EXPECT_LE(diff, max_diff);                                                \
943   }                                                                           \
944   TEST_F(LibYUVScaleTest,                                                     \
945          DISABLED_##I444##name##To##width##x##height##_##filter##_16) {       \
946     int diff = I444TestFilter_16(                                             \
947         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
948         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
949     EXPECT_LE(diff, max_diff);                                                \
950   }                                                                           \
951   TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) {      \
952     int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width,     \
953                               height, kFilter##filter, benchmark_iterations_, \
954                               disable_cpu_flags_, benchmark_cpu_info_);       \
955     EXPECT_LE(diff, max_diff);                                                \
956   }                                                                           \
957   TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) {    \
958     int diff = I420TestFilter(width, height, Abs(benchmark_width_),           \
959                               Abs(benchmark_height_), kFilter##filter,        \
960                               benchmark_iterations_, disable_cpu_flags_,      \
961                               benchmark_cpu_info_);                           \
962     EXPECT_LE(diff, max_diff);                                                \
963   }                                                                           \
964   TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) {    \
965     int diff = I444TestFilter(width, height, Abs(benchmark_width_),           \
966                               Abs(benchmark_height_), kFilter##filter,        \
967                               benchmark_iterations_, disable_cpu_flags_,      \
968                               benchmark_cpu_info_);                           \
969     EXPECT_LE(diff, max_diff);                                                \
970   }                                                                           \
971   TEST_F(LibYUVScaleTest,                                                     \
972          DISABLED_##I420##name##From##width##x##height##_##filter##_12) {     \
973     int diff = I420TestFilter_12(width, height, Abs(benchmark_width_),        \
974                                  Abs(benchmark_height_), kFilter##filter,     \
975                                  benchmark_iterations_, disable_cpu_flags_,   \
976                                  benchmark_cpu_info_);                        \
977     EXPECT_LE(diff, max_diff);                                                \
978   }                                                                           \
979   TEST_F(LibYUVScaleTest,                                                     \
980          DISABLED_##I444##name##From##width##x##height##_##filter##_12) {     \
981     int diff = I444TestFilter_12(width, height, Abs(benchmark_width_),        \
982                                  Abs(benchmark_height_), kFilter##filter,     \
983                                  benchmark_iterations_, disable_cpu_flags_,   \
984                                  benchmark_cpu_info_);                        \
985     EXPECT_LE(diff, max_diff);                                                \
986   }                                                                           \
987   TEST_F(LibYUVScaleTest,                                                     \
988          DISABLED_##I420##name##From##width##x##height##_##filter##_16) {     \
989     int diff = I420TestFilter_16(width, height, Abs(benchmark_width_),        \
990                                  Abs(benchmark_height_), kFilter##filter,     \
991                                  benchmark_iterations_, disable_cpu_flags_,   \
992                                  benchmark_cpu_info_);                        \
993     EXPECT_LE(diff, max_diff);                                                \
994   }                                                                           \
995   TEST_F(LibYUVScaleTest,                                                     \
996          DISABLED_##I444##name##From##width##x##height##_##filter##_16) {     \
997     int diff = I444TestFilter_16(width, height, Abs(benchmark_width_),        \
998                                  Abs(benchmark_height_), kFilter##filter,     \
999                                  benchmark_iterations_, disable_cpu_flags_,   \
1000                                  benchmark_cpu_info_);                        \
1001     EXPECT_LE(diff, max_diff);                                                \
1002   }                                                                           \
1003   TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) {    \
1004     int diff = NV12TestFilter(width, height, Abs(benchmark_width_),           \
1005                               Abs(benchmark_height_), kFilter##filter,        \
1006                               benchmark_iterations_, disable_cpu_flags_,      \
1007                               benchmark_cpu_info_);                           \
1008     EXPECT_LE(diff, max_diff);                                                \
1009   }
1010 
1011 #ifdef ENABLE_SLOW_TESTS
1012 // Test scale to a specified size with all 4 filters.
1013 #define TEST_SCALETO(name, width, height)           \
1014   TEST_SCALETO1(, name, width, height, None, 0)     \
1015   TEST_SCALETO1(, name, width, height, Linear, 3)   \
1016   TEST_SCALETO1(, name, width, height, Bilinear, 3) \
1017   TEST_SCALETO1(, name, width, height, Box, 3)
1018 #else
1019 // Test scale to a specified size with all 4 filters.
1020 #define TEST_SCALETO(name, width, height)                    \
1021   TEST_SCALETO1(DISABLED_, name, width, height, None, 0)     \
1022   TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3)   \
1023   TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
1024   TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
1025 #endif
1026 
1027 TEST_SCALETO(Scale, 1, 1)
1028 TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
1029 TEST_SCALETO(Scale, 320, 240)
1030 TEST_SCALETO(Scale, 569, 480)
1031 TEST_SCALETO(Scale, 640, 360)
1032 TEST_SCALETO(Scale, 1280, 720)
1033 #ifdef ENABLE_SLOW_TESTS
1034 TEST_SCALETO(Scale, 1920, 1080)
1035 #endif  // ENABLE_SLOW_TESTS
1036 #undef TEST_SCALETO1
1037 #undef TEST_SCALETO
1038 
1039 #define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff)               \
1040   TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) {                   \
1041     int diff = I420TestFilter(benchmark_width_, benchmark_height_,         \
1042                               benchmark_height_, benchmark_width_,         \
1043                               kFilter##filter, benchmark_iterations_,      \
1044                               disable_cpu_flags_, benchmark_cpu_info_);    \
1045     EXPECT_LE(diff, max_diff);                                             \
1046   }                                                                        \
1047   TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) {                   \
1048     int diff = I444TestFilter(benchmark_width_, benchmark_height_,         \
1049                               benchmark_height_, benchmark_width_,         \
1050                               kFilter##filter, benchmark_iterations_,      \
1051                               disable_cpu_flags_, benchmark_cpu_info_);    \
1052     EXPECT_LE(diff, max_diff);                                             \
1053   }                                                                        \
1054   TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) {   \
1055     int diff = I420TestFilter_12(benchmark_width_, benchmark_height_,      \
1056                                  benchmark_height_, benchmark_width_,      \
1057                                  kFilter##filter, benchmark_iterations_,   \
1058                                  disable_cpu_flags_, benchmark_cpu_info_); \
1059     EXPECT_LE(diff, max_diff);                                             \
1060   }                                                                        \
1061   TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) {   \
1062     int diff = I444TestFilter_12(benchmark_width_, benchmark_height_,      \
1063                                  benchmark_height_, benchmark_width_,      \
1064                                  kFilter##filter, benchmark_iterations_,   \
1065                                  disable_cpu_flags_, benchmark_cpu_info_); \
1066     EXPECT_LE(diff, max_diff);                                             \
1067   }                                                                        \
1068   TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) {   \
1069     int diff = I420TestFilter_16(benchmark_width_, benchmark_height_,      \
1070                                  benchmark_height_, benchmark_width_,      \
1071                                  kFilter##filter, benchmark_iterations_,   \
1072                                  disable_cpu_flags_, benchmark_cpu_info_); \
1073     EXPECT_LE(diff, max_diff);                                             \
1074   }                                                                        \
1075   TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) {   \
1076     int diff = I444TestFilter_16(benchmark_width_, benchmark_height_,      \
1077                                  benchmark_height_, benchmark_width_,      \
1078                                  kFilter##filter, benchmark_iterations_,   \
1079                                  disable_cpu_flags_, benchmark_cpu_info_); \
1080     EXPECT_LE(diff, max_diff);                                             \
1081   }                                                                        \
1082   TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) {                   \
1083     int diff = NV12TestFilter(benchmark_width_, benchmark_height_,         \
1084                               benchmark_height_, benchmark_width_,         \
1085                               kFilter##filter, benchmark_iterations_,      \
1086                               disable_cpu_flags_, benchmark_cpu_info_);    \
1087     EXPECT_LE(diff, max_diff);                                             \
1088   }
1089 
1090 // Test scale to a specified size with all 4 filters.
1091 #ifdef ENABLE_SLOW_TESTS
1092 TEST_SCALESWAPXY1(, Scale, None, 0)
1093 TEST_SCALESWAPXY1(, Scale, Linear, 3)
1094 TEST_SCALESWAPXY1(, Scale, Bilinear, 3)
1095 TEST_SCALESWAPXY1(, Scale, Box, 3)
1096 #else
1097 TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0)
1098 TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3)
1099 TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
1100 TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
1101 #endif
1102 
1103 #undef TEST_SCALESWAPXY1
1104 
1105 #ifdef ENABLE_ROW_TESTS
1106 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)1107 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
1108   SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
1109   SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
1110   SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
1111   memset(orig_pixels, 0, sizeof(orig_pixels));
1112   memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
1113   memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
1114 
1115   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
1116   if (!has_ssse3) {
1117     printf("Warning SSSE3 not detected; Skipping test.\n");
1118   } else {
1119     // TL.
1120     orig_pixels[0] = 255u;
1121     orig_pixels[1] = 0u;
1122     orig_pixels[128 + 0] = 0u;
1123     orig_pixels[128 + 1] = 0u;
1124     // TR.
1125     orig_pixels[2] = 0u;
1126     orig_pixels[3] = 100u;
1127     orig_pixels[128 + 2] = 0u;
1128     orig_pixels[128 + 3] = 0u;
1129     // BL.
1130     orig_pixels[4] = 0u;
1131     orig_pixels[5] = 0u;
1132     orig_pixels[128 + 4] = 50u;
1133     orig_pixels[128 + 5] = 0u;
1134     // BR.
1135     orig_pixels[6] = 0u;
1136     orig_pixels[7] = 0u;
1137     orig_pixels[128 + 6] = 0u;
1138     orig_pixels[128 + 7] = 20u;
1139     // Odd.
1140     orig_pixels[126] = 4u;
1141     orig_pixels[127] = 255u;
1142     orig_pixels[128 + 126] = 16u;
1143     orig_pixels[128 + 127] = 255u;
1144 
1145     // Test regular half size.
1146     ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
1147 
1148     EXPECT_EQ(64u, dst_pixels_c[0]);
1149     EXPECT_EQ(25u, dst_pixels_c[1]);
1150     EXPECT_EQ(13u, dst_pixels_c[2]);
1151     EXPECT_EQ(5u, dst_pixels_c[3]);
1152     EXPECT_EQ(0u, dst_pixels_c[4]);
1153     EXPECT_EQ(133u, dst_pixels_c[63]);
1154 
1155     // Test Odd width version - Last pixel is just 1 horizontal pixel.
1156     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
1157 
1158     EXPECT_EQ(64u, dst_pixels_c[0]);
1159     EXPECT_EQ(25u, dst_pixels_c[1]);
1160     EXPECT_EQ(13u, dst_pixels_c[2]);
1161     EXPECT_EQ(5u, dst_pixels_c[3]);
1162     EXPECT_EQ(0u, dst_pixels_c[4]);
1163     EXPECT_EQ(10u, dst_pixels_c[63]);
1164 
1165     // Test one pixel less, should skip the last pixel.
1166     memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
1167     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
1168 
1169     EXPECT_EQ(64u, dst_pixels_c[0]);
1170     EXPECT_EQ(25u, dst_pixels_c[1]);
1171     EXPECT_EQ(13u, dst_pixels_c[2]);
1172     EXPECT_EQ(5u, dst_pixels_c[3]);
1173     EXPECT_EQ(0u, dst_pixels_c[4]);
1174     EXPECT_EQ(0u, dst_pixels_c[63]);
1175 
1176     // Test regular half size SSSE3.
1177     ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
1178 
1179     EXPECT_EQ(64u, dst_pixels_opt[0]);
1180     EXPECT_EQ(25u, dst_pixels_opt[1]);
1181     EXPECT_EQ(13u, dst_pixels_opt[2]);
1182     EXPECT_EQ(5u, dst_pixels_opt[3]);
1183     EXPECT_EQ(0u, dst_pixels_opt[4]);
1184     EXPECT_EQ(133u, dst_pixels_opt[63]);
1185 
1186     // Compare C and SSSE3 match.
1187     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
1188     ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
1189     for (int i = 0; i < 64; ++i) {
1190       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
1191     }
1192   }
1193 }
1194 #endif  // HAS_SCALEROWDOWN2_SSSE3
1195 
1196 extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
1197                                     ptrdiff_t src_stride,
1198                                     uint16_t* dst,
1199                                     int dst_width);
1200 extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
1201                                    ptrdiff_t src_stride,
1202                                    uint16_t* dst,
1203                                    int dst_width);
1204 extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
1205                                  ptrdiff_t src_stride,
1206                                  uint16_t* dst,
1207                                  int dst_width);
1208 
TEST_F(LibYUVScaleTest,TestScaleRowUp2_16)1209 TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
1210   SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]);  // 2 rows + 1 pixel overrun.
1211   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
1212   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
1213 
1214   memset(orig_pixels, 0, sizeof(orig_pixels));
1215   memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
1216   memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
1217 
1218   for (int i = 0; i < 640 * 2 + 1; ++i) {
1219     orig_pixels[i] = i;
1220   }
1221   ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
1222   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1223 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
1224     int has_neon = TestCpuFlag(kCpuHasNEON);
1225     if (has_neon) {
1226       ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1227     } else {
1228       ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1229     }
1230 #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
1231     int has_mmi = TestCpuFlag(kCpuHasMMI);
1232     if (has_mmi) {
1233       ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1234     } else {
1235       ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1236     }
1237 #else
1238     ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
1239 #endif
1240   }
1241 
1242   for (int i = 0; i < 1280; ++i) {
1243     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
1244   }
1245   EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
1246   EXPECT_EQ(dst_pixels_c[1279], 800);
1247 }
1248 
1249 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
1250                                          ptrdiff_t src_stride,
1251                                          uint16_t* dst,
1252                                          int dst_width);
1253 
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)1254 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
1255   SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
1256   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
1257   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
1258 
1259   memset(orig_pixels, 0, sizeof(orig_pixels));
1260   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
1261   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
1262 
1263   for (int i = 0; i < 2560 * 2; ++i) {
1264     orig_pixels[i] = i;
1265   }
1266   ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
1267   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1268 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
1269     int has_neon = TestCpuFlag(kCpuHasNEON);
1270     if (has_neon) {
1271       ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
1272     } else {
1273       ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
1274     }
1275 #else
1276     ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
1277 #endif
1278   }
1279 
1280   for (int i = 0; i < 1280; ++i) {
1281     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
1282   }
1283 
1284   EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
1285   EXPECT_EQ(dst_pixels_c[1279], 3839);
1286 }
1287 #endif  // ENABLE_ROW_TESTS
1288 
1289 // Test scaling plane with 8 bit C vs 12 bit C and return maximum pixel
1290 // difference.
1291 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)1292 static int TestPlaneFilter_16(int src_width,
1293                               int src_height,
1294                               int dst_width,
1295                               int dst_height,
1296                               FilterMode f,
1297                               int benchmark_iterations,
1298                               int disable_cpu_flags,
1299                               int benchmark_cpu_info) {
1300   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
1301     return 0;
1302   }
1303 
1304   int i;
1305   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
1306   int src_stride_y = Abs(src_width);
1307   int dst_y_plane_size = dst_width * dst_height;
1308   int dst_stride_y = dst_width;
1309 
1310   align_buffer_page_end(src_y, src_y_plane_size);
1311   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
1312   align_buffer_page_end(dst_y_8, dst_y_plane_size);
1313   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
1314   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
1315   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
1316 
1317   MemRandomize(src_y, src_y_plane_size);
1318   memset(dst_y_8, 0, dst_y_plane_size);
1319   memset(dst_y_16, 1, dst_y_plane_size * 2);
1320 
1321   for (i = 0; i < src_y_plane_size; ++i) {
1322     p_src_y_16[i] = src_y[i] & 255;
1323   }
1324 
1325   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
1326   ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
1327              dst_width, dst_height, f);
1328   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
1329 
1330   for (i = 0; i < benchmark_iterations; ++i) {
1331     ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
1332                   dst_stride_y, dst_width, dst_height, f);
1333   }
1334 
1335   // Expect an exact match.
1336   int max_diff = 0;
1337   for (i = 0; i < dst_y_plane_size; ++i) {
1338     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
1339     if (abs_diff > max_diff) {
1340       max_diff = abs_diff;
1341     }
1342   }
1343 
1344   free_aligned_buffer_page_end(dst_y_8);
1345   free_aligned_buffer_page_end(dst_y_16);
1346   free_aligned_buffer_page_end(src_y);
1347   free_aligned_buffer_page_end(src_y_16);
1348 
1349   return max_diff;
1350 }
1351 
1352 // The following adjustments in dimensions ensure the scale factor will be
1353 // exactly achieved.
1354 // 2 is chroma subsample.
1355 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
1356 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
1357 
1358 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
1359   TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
1360     int diff = TestPlaneFilter_16(                                             \
1361         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
1362         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
1363         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
1364         benchmark_cpu_info_);                                                  \
1365     EXPECT_LE(diff, max_diff);                                                 \
1366   }
1367 
1368 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
1369 // filtering is different fixed point implementations for SSSE3, Neon and C.
1370 #define TEST_FACTOR(name, nom, denom, boxdiff)      \
1371   TEST_FACTOR1(name, None, nom, denom, 0)           \
1372   TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
1373   TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
1374   TEST_FACTOR1(name, Box, nom, denom, boxdiff)
1375 
1376 TEST_FACTOR(2, 1, 2, 0)
1377 TEST_FACTOR(4, 1, 4, 0)
1378 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
1379 TEST_FACTOR(3by4, 3, 4, 1)
1380 TEST_FACTOR(3by8, 3, 8, 1)
1381 TEST_FACTOR(3, 1, 3, 0)
1382 #undef TEST_FACTOR1
1383 #undef TEST_FACTOR
1384 #undef SX
1385 #undef DX
1386 
TEST_F(LibYUVScaleTest,PlaneTest3x)1387 TEST_F(LibYUVScaleTest, PlaneTest3x) {
1388   const int kSrcStride = 48;
1389   const int kDstStride = 16;
1390   const int kSize = kSrcStride * 3;
1391   align_buffer_page_end(orig_pixels, kSize);
1392   for (int i = 0; i < 48 * 3; ++i) {
1393     orig_pixels[i] = i;
1394   }
1395   align_buffer_page_end(dest_pixels, kDstStride);
1396 
1397   int iterations16 =
1398       benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
1399   for (int i = 0; i < iterations16; ++i) {
1400     ScalePlane(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
1401                kFilterBilinear);
1402   }
1403 
1404   EXPECT_EQ(49, dest_pixels[0]);
1405 
1406   ScalePlane(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
1407              kFilterNone);
1408 
1409   EXPECT_EQ(49, dest_pixels[0]);
1410 
1411   free_aligned_buffer_page_end(dest_pixels);
1412   free_aligned_buffer_page_end(orig_pixels);
1413 }
1414 
TEST_F(LibYUVScaleTest,PlaneTest4x)1415 TEST_F(LibYUVScaleTest, PlaneTest4x) {
1416   const int kSrcStride = 64;
1417   const int kDstStride = 16;
1418   const int kSize = kSrcStride * 4;
1419   align_buffer_page_end(orig_pixels, kSize);
1420   for (int i = 0; i < 64 * 4; ++i) {
1421     orig_pixels[i] = i;
1422   }
1423   align_buffer_page_end(dest_pixels, kDstStride);
1424 
1425   int iterations16 =
1426       benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
1427   for (int i = 0; i < iterations16; ++i) {
1428     ScalePlane(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
1429                kFilterBilinear);
1430   }
1431 
1432   EXPECT_EQ((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0]);
1433 
1434   ScalePlane(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
1435              kFilterNone);
1436 
1437   EXPECT_EQ(130, dest_pixels[0]);  // expect the 3rd pixel of the 3rd row
1438 
1439   free_aligned_buffer_page_end(dest_pixels);
1440   free_aligned_buffer_page_end(orig_pixels);
1441 }
1442 
1443 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_None)1444 TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
1445   const int kSize = benchmark_width_ * benchmark_height_;
1446   align_buffer_page_end(orig_pixels, kSize);
1447   for (int i = 0; i < kSize; ++i) {
1448     orig_pixels[i] = i;
1449   }
1450   align_buffer_page_end(dest_opt_pixels, kSize);
1451   align_buffer_page_end(dest_c_pixels, kSize);
1452 
1453   MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
1454   ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
1455              dest_c_pixels, benchmark_height_, benchmark_height_,
1456              benchmark_width_, kFilterNone);
1457   MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
1458 
1459   for (int i = 0; i < benchmark_iterations_; ++i) {
1460     ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
1461                benchmark_height_, dest_opt_pixels, benchmark_height_,
1462                benchmark_height_, benchmark_width_, kFilterNone);
1463   }
1464 
1465   for (int i = 0; i < kSize; ++i) {
1466     EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
1467   }
1468 
1469   free_aligned_buffer_page_end(dest_c_pixels);
1470   free_aligned_buffer_page_end(dest_opt_pixels);
1471   free_aligned_buffer_page_end(orig_pixels);
1472 }
1473 
TEST_F(LibYUVScaleTest,PlaneTestRotate_Bilinear)1474 TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
1475   const int kSize = benchmark_width_ * benchmark_height_;
1476   align_buffer_page_end(orig_pixels, kSize);
1477   for (int i = 0; i < kSize; ++i) {
1478     orig_pixels[i] = i;
1479   }
1480   align_buffer_page_end(dest_opt_pixels, kSize);
1481   align_buffer_page_end(dest_c_pixels, kSize);
1482 
1483   MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
1484   ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
1485              dest_c_pixels, benchmark_height_, benchmark_height_,
1486              benchmark_width_, kFilterBilinear);
1487   MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
1488 
1489   for (int i = 0; i < benchmark_iterations_; ++i) {
1490     ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
1491                benchmark_height_, dest_opt_pixels, benchmark_height_,
1492                benchmark_height_, benchmark_width_, kFilterBilinear);
1493   }
1494 
1495   for (int i = 0; i < kSize; ++i) {
1496     EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
1497   }
1498 
1499   free_aligned_buffer_page_end(dest_c_pixels);
1500   free_aligned_buffer_page_end(dest_opt_pixels);
1501   free_aligned_buffer_page_end(orig_pixels);
1502 }
1503 
1504 // Intent is to test 200x50 to 50x200 but width and height can be parameters.
TEST_F(LibYUVScaleTest,PlaneTestRotate_Box)1505 TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
1506   const int kSize = benchmark_width_ * benchmark_height_;
1507   align_buffer_page_end(orig_pixels, kSize);
1508   for (int i = 0; i < kSize; ++i) {
1509     orig_pixels[i] = i;
1510   }
1511   align_buffer_page_end(dest_opt_pixels, kSize);
1512   align_buffer_page_end(dest_c_pixels, kSize);
1513 
1514   MaskCpuFlags(disable_cpu_flags_);  // Disable all CPU optimization.
1515   ScalePlane(orig_pixels, benchmark_width_, benchmark_width_, benchmark_height_,
1516              dest_c_pixels, benchmark_height_, benchmark_height_,
1517              benchmark_width_, kFilterBox);
1518   MaskCpuFlags(benchmark_cpu_info_);  // Enable all CPU optimization.
1519 
1520   for (int i = 0; i < benchmark_iterations_; ++i) {
1521     ScalePlane(orig_pixels, benchmark_width_, benchmark_width_,
1522                benchmark_height_, dest_opt_pixels, benchmark_height_,
1523                benchmark_height_, benchmark_width_, kFilterBox);
1524   }
1525 
1526   for (int i = 0; i < kSize; ++i) {
1527     EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
1528   }
1529 
1530   free_aligned_buffer_page_end(dest_c_pixels);
1531   free_aligned_buffer_page_end(dest_opt_pixels);
1532   free_aligned_buffer_page_end(orig_pixels);
1533 }
1534 
1535 }  // namespace libyuv
1536