1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <time.h>
13 
14 #include "../unit_test/unit_test.h"
15 #include "libyuv/cpu_id.h"
16 #include "libyuv/scale.h"
17 
18 #ifdef ENABLE_ROW_TESTS
19 #include "libyuv/scale_row.h"  // For ScaleRowDown2Box_Odd_C
20 #endif
21 
22 #define STRINGIZE(line) #line
23 #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24 
25 namespace libyuv {
26 
27 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)28 static int I420TestFilter(int src_width,
29                           int src_height,
30                           int dst_width,
31                           int dst_height,
32                           FilterMode f,
33                           int benchmark_iterations,
34                           int disable_cpu_flags,
35                           int benchmark_cpu_info) {
36   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
37     return 0;
38   }
39 
40   int i, j;
41   int src_width_uv = (Abs(src_width) + 1) >> 1;
42   int src_height_uv = (Abs(src_height) + 1) >> 1;
43 
44   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
45   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
46 
47   int src_stride_y = Abs(src_width);
48   int src_stride_uv = src_width_uv;
49 
50   align_buffer_page_end(src_y, src_y_plane_size);
51   align_buffer_page_end(src_u, src_uv_plane_size);
52   align_buffer_page_end(src_v, src_uv_plane_size);
53   if (!src_y || !src_u || !src_v) {
54     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
55     return 0;
56   }
57   MemRandomize(src_y, src_y_plane_size);
58   MemRandomize(src_u, src_uv_plane_size);
59   MemRandomize(src_v, src_uv_plane_size);
60 
61   int dst_width_uv = (dst_width + 1) >> 1;
62   int dst_height_uv = (dst_height + 1) >> 1;
63 
64   int64_t dst_y_plane_size = (dst_width) * (dst_height);
65   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
66 
67   int dst_stride_y = dst_width;
68   int dst_stride_uv = dst_width_uv;
69 
70   align_buffer_page_end(dst_y_c, dst_y_plane_size);
71   align_buffer_page_end(dst_u_c, dst_uv_plane_size);
72   align_buffer_page_end(dst_v_c, dst_uv_plane_size);
73   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
74   align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
75   align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
76   if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
77       !dst_v_opt) {
78     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
79     return 0;
80   }
81 
82   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
83   double c_time = get_time();
84   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
85             src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
86             dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
87   c_time = (get_time() - c_time);
88 
89   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
90   double opt_time = get_time();
91   for (i = 0; i < benchmark_iterations; ++i) {
92     I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
93               src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
94               dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
95               f);
96   }
97   opt_time = (get_time() - opt_time) / benchmark_iterations;
98   // Report performance of C vs OPT.
99   printf("filter %d - %8d us C - %8d us OPT\n", f,
100          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
101 
102   // C version may be a little off from the optimized. Order of
103   //  operations may introduce rounding somewhere. So do a difference
104   //  of the buffers and look to see that the max difference is not
105   //  over 3.
106   int max_diff = 0;
107   for (i = 0; i < (dst_height); ++i) {
108     for (j = 0; j < (dst_width); ++j) {
109       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
110                          dst_y_opt[(i * dst_stride_y) + j]);
111       if (abs_diff > max_diff) {
112         max_diff = abs_diff;
113       }
114     }
115   }
116 
117   for (i = 0; i < (dst_height_uv); ++i) {
118     for (j = 0; j < (dst_width_uv); ++j) {
119       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
120                          dst_u_opt[(i * dst_stride_uv) + j]);
121       if (abs_diff > max_diff) {
122         max_diff = abs_diff;
123       }
124       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
125                      dst_v_opt[(i * dst_stride_uv) + j]);
126       if (abs_diff > max_diff) {
127         max_diff = abs_diff;
128       }
129     }
130   }
131 
132   free_aligned_buffer_page_end(dst_y_c);
133   free_aligned_buffer_page_end(dst_u_c);
134   free_aligned_buffer_page_end(dst_v_c);
135   free_aligned_buffer_page_end(dst_y_opt);
136   free_aligned_buffer_page_end(dst_u_opt);
137   free_aligned_buffer_page_end(dst_v_opt);
138   free_aligned_buffer_page_end(src_y);
139   free_aligned_buffer_page_end(src_u);
140   free_aligned_buffer_page_end(src_v);
141 
142   return max_diff;
143 }
144 
145 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
146 // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)147 static int I420TestFilter_16(int src_width,
148                              int src_height,
149                              int dst_width,
150                              int dst_height,
151                              FilterMode f,
152                              int benchmark_iterations,
153                              int disable_cpu_flags,
154                              int benchmark_cpu_info) {
155   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156     return 0;
157   }
158 
159   int i;
160   int src_width_uv = (Abs(src_width) + 1) >> 1;
161   int src_height_uv = (Abs(src_height) + 1) >> 1;
162 
163   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
164   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
165 
166   int src_stride_y = Abs(src_width);
167   int src_stride_uv = src_width_uv;
168 
169   align_buffer_page_end(src_y, src_y_plane_size);
170   align_buffer_page_end(src_u, src_uv_plane_size);
171   align_buffer_page_end(src_v, src_uv_plane_size);
172   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
173   align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
174   align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
175   if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
176     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
177     return 0;
178   }
179   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
180   uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
181   uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
182 
183   MemRandomize(src_y, src_y_plane_size);
184   MemRandomize(src_u, src_uv_plane_size);
185   MemRandomize(src_v, src_uv_plane_size);
186 
187   for (i = 0; i < src_y_plane_size; ++i) {
188     p_src_y_16[i] = src_y[i];
189   }
190   for (i = 0; i < src_uv_plane_size; ++i) {
191     p_src_u_16[i] = src_u[i];
192     p_src_v_16[i] = src_v[i];
193   }
194 
195   int dst_width_uv = (dst_width + 1) >> 1;
196   int dst_height_uv = (dst_height + 1) >> 1;
197 
198   int dst_y_plane_size = (dst_width) * (dst_height);
199   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
200 
201   int dst_stride_y = dst_width;
202   int dst_stride_uv = dst_width_uv;
203 
204   align_buffer_page_end(dst_y_8, dst_y_plane_size);
205   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
206   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
207   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
208   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
209   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
210 
211   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
212   uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
213   uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
214 
215   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
216   I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
217             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
218             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
219   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
220   for (i = 0; i < benchmark_iterations; ++i) {
221     I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
222                  p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
223                  dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
224                  dst_stride_uv, dst_width, dst_height, f);
225   }
226 
227   // Expect an exact match.
228   int max_diff = 0;
229   for (i = 0; i < dst_y_plane_size; ++i) {
230     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
231     if (abs_diff > max_diff) {
232       max_diff = abs_diff;
233     }
234   }
235   for (i = 0; i < dst_uv_plane_size; ++i) {
236     int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
237     if (abs_diff > max_diff) {
238       max_diff = abs_diff;
239     }
240     abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
241     if (abs_diff > max_diff) {
242       max_diff = abs_diff;
243     }
244   }
245 
246   free_aligned_buffer_page_end(dst_y_8);
247   free_aligned_buffer_page_end(dst_u_8);
248   free_aligned_buffer_page_end(dst_v_8);
249   free_aligned_buffer_page_end(dst_y_16);
250   free_aligned_buffer_page_end(dst_u_16);
251   free_aligned_buffer_page_end(dst_v_16);
252   free_aligned_buffer_page_end(src_y);
253   free_aligned_buffer_page_end(src_u);
254   free_aligned_buffer_page_end(src_v);
255   free_aligned_buffer_page_end(src_y_16);
256   free_aligned_buffer_page_end(src_u_16);
257   free_aligned_buffer_page_end(src_v_16);
258 
259   return max_diff;
260 }
261 
262 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)263 static int I444TestFilter(int src_width,
264                           int src_height,
265                           int dst_width,
266                           int dst_height,
267                           FilterMode f,
268                           int benchmark_iterations,
269                           int disable_cpu_flags,
270                           int benchmark_cpu_info) {
271   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
272     return 0;
273   }
274 
275   int i, j;
276   int src_width_uv = Abs(src_width);
277   int src_height_uv = Abs(src_height);
278 
279   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
280   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
281 
282   int src_stride_y = Abs(src_width);
283   int src_stride_uv = src_width_uv;
284 
285   align_buffer_page_end(src_y, src_y_plane_size);
286   align_buffer_page_end(src_u, src_uv_plane_size);
287   align_buffer_page_end(src_v, src_uv_plane_size);
288   if (!src_y || !src_u || !src_v) {
289     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
290     return 0;
291   }
292   MemRandomize(src_y, src_y_plane_size);
293   MemRandomize(src_u, src_uv_plane_size);
294   MemRandomize(src_v, src_uv_plane_size);
295 
296   int dst_width_uv = dst_width;
297   int dst_height_uv = dst_height;
298 
299   int64_t dst_y_plane_size = (dst_width) * (dst_height);
300   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
301 
302   int dst_stride_y = dst_width;
303   int dst_stride_uv = dst_width_uv;
304 
305   align_buffer_page_end(dst_y_c, dst_y_plane_size);
306   align_buffer_page_end(dst_u_c, dst_uv_plane_size);
307   align_buffer_page_end(dst_v_c, dst_uv_plane_size);
308   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
309   align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
310   align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
311   if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
312       !dst_v_opt) {
313     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
314     return 0;
315   }
316 
317   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
318   double c_time = get_time();
319   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
320             src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
321             dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
322   c_time = (get_time() - c_time);
323 
324   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
325   double opt_time = get_time();
326   for (i = 0; i < benchmark_iterations; ++i) {
327     I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
328               src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
329               dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
330               f);
331   }
332   opt_time = (get_time() - opt_time) / benchmark_iterations;
333   // Report performance of C vs OPT.
334   printf("filter %d - %8d us C - %8d us OPT\n", f,
335          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
336 
337   // C version may be a little off from the optimized. Order of
338   //  operations may introduce rounding somewhere. So do a difference
339   //  of the buffers and look to see that the max difference is not
340   //  over 3.
341   int max_diff = 0;
342   for (i = 0; i < (dst_height); ++i) {
343     for (j = 0; j < (dst_width); ++j) {
344       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
345                          dst_y_opt[(i * dst_stride_y) + j]);
346       if (abs_diff > max_diff) {
347         max_diff = abs_diff;
348       }
349     }
350   }
351 
352   for (i = 0; i < (dst_height_uv); ++i) {
353     for (j = 0; j < (dst_width_uv); ++j) {
354       int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
355                          dst_u_opt[(i * dst_stride_uv) + j]);
356       if (abs_diff > max_diff) {
357         max_diff = abs_diff;
358       }
359       abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
360                      dst_v_opt[(i * dst_stride_uv) + j]);
361       if (abs_diff > max_diff) {
362         max_diff = abs_diff;
363       }
364     }
365   }
366 
367   free_aligned_buffer_page_end(dst_y_c);
368   free_aligned_buffer_page_end(dst_u_c);
369   free_aligned_buffer_page_end(dst_v_c);
370   free_aligned_buffer_page_end(dst_y_opt);
371   free_aligned_buffer_page_end(dst_u_opt);
372   free_aligned_buffer_page_end(dst_v_opt);
373   free_aligned_buffer_page_end(src_y);
374   free_aligned_buffer_page_end(src_u);
375   free_aligned_buffer_page_end(src_v);
376 
377   return max_diff;
378 }
379 
380 // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
381 // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)382 static int I444TestFilter_16(int src_width,
383                              int src_height,
384                              int dst_width,
385                              int dst_height,
386                              FilterMode f,
387                              int benchmark_iterations,
388                              int disable_cpu_flags,
389                              int benchmark_cpu_info) {
390   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
391     return 0;
392   }
393 
394   int i;
395   int src_width_uv = Abs(src_width);
396   int src_height_uv = Abs(src_height);
397 
398   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
399   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
400 
401   int src_stride_y = Abs(src_width);
402   int src_stride_uv = src_width_uv;
403 
404   align_buffer_page_end(src_y, src_y_plane_size);
405   align_buffer_page_end(src_u, src_uv_plane_size);
406   align_buffer_page_end(src_v, src_uv_plane_size);
407   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
408   align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
409   align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
410   if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
411     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
412     return 0;
413   }
414   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
415   uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
416   uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
417 
418   MemRandomize(src_y, src_y_plane_size);
419   MemRandomize(src_u, src_uv_plane_size);
420   MemRandomize(src_v, src_uv_plane_size);
421 
422   for (i = 0; i < src_y_plane_size; ++i) {
423     p_src_y_16[i] = src_y[i];
424   }
425   for (i = 0; i < src_uv_plane_size; ++i) {
426     p_src_u_16[i] = src_u[i];
427     p_src_v_16[i] = src_v[i];
428   }
429 
430   int dst_width_uv = dst_width;
431   int dst_height_uv = dst_height;
432 
433   int dst_y_plane_size = (dst_width) * (dst_height);
434   int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
435 
436   int dst_stride_y = dst_width;
437   int dst_stride_uv = dst_width_uv;
438 
439   align_buffer_page_end(dst_y_8, dst_y_plane_size);
440   align_buffer_page_end(dst_u_8, dst_uv_plane_size);
441   align_buffer_page_end(dst_v_8, dst_uv_plane_size);
442   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
443   align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
444   align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
445 
446   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
447   uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
448   uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
449 
450   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
451   I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
452             src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
453             dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
454   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
455   for (i = 0; i < benchmark_iterations; ++i) {
456     I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
457                  p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
458                  dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
459                  dst_stride_uv, dst_width, dst_height, f);
460   }
461 
462   // Expect an exact match.
463   int max_diff = 0;
464   for (i = 0; i < dst_y_plane_size; ++i) {
465     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
466     if (abs_diff > max_diff) {
467       max_diff = abs_diff;
468     }
469   }
470   for (i = 0; i < dst_uv_plane_size; ++i) {
471     int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
472     if (abs_diff > max_diff) {
473       max_diff = abs_diff;
474     }
475     abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
476     if (abs_diff > max_diff) {
477       max_diff = abs_diff;
478     }
479   }
480 
481   free_aligned_buffer_page_end(dst_y_8);
482   free_aligned_buffer_page_end(dst_u_8);
483   free_aligned_buffer_page_end(dst_v_8);
484   free_aligned_buffer_page_end(dst_y_16);
485   free_aligned_buffer_page_end(dst_u_16);
486   free_aligned_buffer_page_end(dst_v_16);
487   free_aligned_buffer_page_end(src_y);
488   free_aligned_buffer_page_end(src_u);
489   free_aligned_buffer_page_end(src_v);
490   free_aligned_buffer_page_end(src_y_16);
491   free_aligned_buffer_page_end(src_u_16);
492   free_aligned_buffer_page_end(src_v_16);
493 
494   return max_diff;
495 }
496 
497 // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
NV12TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)498 static int NV12TestFilter(int src_width,
499                           int src_height,
500                           int dst_width,
501                           int dst_height,
502                           FilterMode f,
503                           int benchmark_iterations,
504                           int disable_cpu_flags,
505                           int benchmark_cpu_info) {
506   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
507     return 0;
508   }
509 
510   int i, j;
511   int src_width_uv = (Abs(src_width) + 1) >> 1;
512   int src_height_uv = (Abs(src_height) + 1) >> 1;
513 
514   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
515   int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
516 
517   int src_stride_y = Abs(src_width);
518   int src_stride_uv = src_width_uv * 2;
519 
520   align_buffer_page_end(src_y, src_y_plane_size);
521   align_buffer_page_end(src_uv, src_uv_plane_size);
522   if (!src_y || !src_uv) {
523     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
524     return 0;
525   }
526   MemRandomize(src_y, src_y_plane_size);
527   MemRandomize(src_uv, src_uv_plane_size);
528 
529   int dst_width_uv = (dst_width + 1) >> 1;
530   int dst_height_uv = (dst_height + 1) >> 1;
531 
532   int64_t dst_y_plane_size = (dst_width) * (dst_height);
533   int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
534 
535   int dst_stride_y = dst_width;
536   int dst_stride_uv = dst_width_uv * 2;
537 
538   align_buffer_page_end(dst_y_c, dst_y_plane_size);
539   align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
540   align_buffer_page_end(dst_y_opt, dst_y_plane_size);
541   align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
542   if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
543     printf("Skipped.  Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
544     return 0;
545   }
546 
547   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
548   double c_time = get_time();
549   NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
550             dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width,
551             dst_height, f);
552   c_time = (get_time() - c_time);
553 
554   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
555   double opt_time = get_time();
556   for (i = 0; i < benchmark_iterations; ++i) {
557     NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
558               dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width,
559               dst_height, f);
560   }
561   opt_time = (get_time() - opt_time) / benchmark_iterations;
562   // Report performance of C vs OPT.
563   printf("filter %d - %8d us C - %8d us OPT\n", f,
564          static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
565 
566   // C version may be a little off from the optimized. Order of
567   //  operations may introduce rounding somewhere. So do a difference
568   //  of the buffers and look to see that the max difference is not
569   //  over 3.
570   int max_diff = 0;
571   for (i = 0; i < (dst_height); ++i) {
572     for (j = 0; j < (dst_width); ++j) {
573       int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
574                          dst_y_opt[(i * dst_stride_y) + j]);
575       if (abs_diff > max_diff) {
576         max_diff = abs_diff;
577       }
578     }
579   }
580 
581   for (i = 0; i < (dst_height_uv); ++i) {
582     for (j = 0; j < (dst_width_uv * 2); ++j) {
583       int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
584                          dst_uv_opt[(i * dst_stride_uv) + j]);
585       if (abs_diff > max_diff) {
586         max_diff = abs_diff;
587       }
588     }
589   }
590 
591   free_aligned_buffer_page_end(dst_y_c);
592   free_aligned_buffer_page_end(dst_uv_c);
593   free_aligned_buffer_page_end(dst_y_opt);
594   free_aligned_buffer_page_end(dst_uv_opt);
595   free_aligned_buffer_page_end(src_y);
596   free_aligned_buffer_page_end(src_uv);
597 
598   return max_diff;
599 }
600 
601 // The following adjustments in dimensions ensure the scale factor will be
602 // exactly achieved.
603 // 2 is chroma subsample.
604 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
605 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
606 
607 #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff)           \
608   TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) {                 \
609     int diff = I420TestFilter(                                                \
610         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
611         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
612         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
613         benchmark_cpu_info_);                                                 \
614     EXPECT_LE(diff, max_diff);                                                \
615   }                                                                           \
616   TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) {                 \
617     int diff = I444TestFilter(                                                \
618         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
619         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
620         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
621         benchmark_cpu_info_);                                                 \
622     EXPECT_LE(diff, max_diff);                                                \
623   }                                                                           \
624   TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_16) { \
625     int diff = I420TestFilter_16(                                             \
626         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
627         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
628         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
629         benchmark_cpu_info_);                                                 \
630     EXPECT_LE(diff, max_diff);                                                \
631   }                                                                           \
632   TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_16) { \
633     int diff = I444TestFilter_16(                                             \
634         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
635         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
636         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
637         benchmark_cpu_info_);                                                 \
638     EXPECT_LE(diff, max_diff);                                                \
639   }                                                                           \
640   TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) {                 \
641     int diff = NV12TestFilter(                                                \
642         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),  \
643         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),  \
644         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,           \
645         benchmark_cpu_info_);                                                 \
646     EXPECT_LE(diff, max_diff);                                                \
647   }
648 
649 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
650 // filtering is different fixed point implementations for SSSE3, Neon and C.
651 #ifdef ENABLE_SLOW_TESTS
652 #define TEST_FACTOR(name, nom, denom, boxdiff)  \
653   TEST_FACTOR1(, name, None, nom, denom, 0)     \
654   TEST_FACTOR1(, name, Linear, nom, denom, 3)   \
655   TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
656   TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
657 #else
658 #define TEST_FACTOR(name, nom, denom, boxdiff)           \
659   TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0)     \
660   TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3)   \
661   TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
662   TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
663 #endif
664 
665 TEST_FACTOR(2, 1, 2, 0)
666 TEST_FACTOR(4, 1, 4, 0)
667 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
668 TEST_FACTOR(3by4, 3, 4, 1)
669 TEST_FACTOR(3by8, 3, 8, 1)
670 TEST_FACTOR(3, 1, 3, 0)
671 #undef TEST_FACTOR1
672 #undef TEST_FACTOR
673 #undef SX
674 #undef DX
675 
676 #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff)       \
677   TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) {      \
678     int diff = I420TestFilter(benchmark_width_, benchmark_height_, width,     \
679                               height, kFilter##filter, benchmark_iterations_, \
680                               disable_cpu_flags_, benchmark_cpu_info_);       \
681     EXPECT_LE(diff, max_diff);                                                \
682   }                                                                           \
683   TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) {      \
684     int diff = I444TestFilter(benchmark_width_, benchmark_height_, width,     \
685                               height, kFilter##filter, benchmark_iterations_, \
686                               disable_cpu_flags_, benchmark_cpu_info_);       \
687     EXPECT_LE(diff, max_diff);                                                \
688   }                                                                           \
689   TEST_F(LibYUVScaleTest,                                                     \
690          DISABLED_##I420##name##To##width##x##height##_##filter##_16) {       \
691     int diff = I420TestFilter_16(                                             \
692         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
693         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
694     EXPECT_LE(diff, max_diff);                                                \
695   }                                                                           \
696   TEST_F(LibYUVScaleTest,                                                     \
697          DISABLED_##I444##name##To##width##x##height##_##filter##_16) {       \
698     int diff = I444TestFilter_16(                                             \
699         benchmark_width_, benchmark_height_, width, height, kFilter##filter,  \
700         benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_);      \
701     EXPECT_LE(diff, max_diff);                                                \
702   }                                                                           \
703   TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) {      \
704     int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width,     \
705                               height, kFilter##filter, benchmark_iterations_, \
706                               disable_cpu_flags_, benchmark_cpu_info_);       \
707     EXPECT_LE(diff, max_diff);                                                \
708   }                                                                           \
709   TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) {    \
710     int diff = I420TestFilter(width, height, Abs(benchmark_width_),           \
711                               Abs(benchmark_height_), kFilter##filter,        \
712                               benchmark_iterations_, disable_cpu_flags_,      \
713                               benchmark_cpu_info_);                           \
714     EXPECT_LE(diff, max_diff);                                                \
715   }                                                                           \
716   TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) {    \
717     int diff = I444TestFilter(width, height, Abs(benchmark_width_),           \
718                               Abs(benchmark_height_), kFilter##filter,        \
719                               benchmark_iterations_, disable_cpu_flags_,      \
720                               benchmark_cpu_info_);                           \
721     EXPECT_LE(diff, max_diff);                                                \
722   }                                                                           \
723   TEST_F(LibYUVScaleTest,                                                     \
724          DISABLED_##I420##name##From##width##x##height##_##filter##_16) {     \
725     int diff = I420TestFilter_16(width, height, Abs(benchmark_width_),        \
726                                  Abs(benchmark_height_), kFilter##filter,     \
727                                  benchmark_iterations_, disable_cpu_flags_,   \
728                                  benchmark_cpu_info_);                        \
729     EXPECT_LE(diff, max_diff);                                                \
730   }                                                                           \
731   TEST_F(LibYUVScaleTest,                                                     \
732          DISABLED_##I444##name##From##width##x##height##_##filter##_16) {     \
733     int diff = I444TestFilter_16(width, height, Abs(benchmark_width_),        \
734                                  Abs(benchmark_height_), kFilter##filter,     \
735                                  benchmark_iterations_, disable_cpu_flags_,   \
736                                  benchmark_cpu_info_);                        \
737     EXPECT_LE(diff, max_diff);                                                \
738   }                                                                           \
739   TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) {    \
740     int diff = NV12TestFilter(width, height, Abs(benchmark_width_),           \
741                               Abs(benchmark_height_), kFilter##filter,        \
742                               benchmark_iterations_, disable_cpu_flags_,      \
743                               benchmark_cpu_info_);                           \
744     EXPECT_LE(diff, max_diff);                                                \
745   }
746 
747 #ifdef ENABLE_SLOW_TESTS
748 // Test scale to a specified size with all 4 filters.
749 #define TEST_SCALETO(name, width, height)           \
750   TEST_SCALETO1(, name, width, height, None, 0)     \
751   TEST_SCALETO1(, name, width, height, Linear, 3)   \
752   TEST_SCALETO1(, name, width, height, Bilinear, 3) \
753   TEST_SCALETO1(, name, width, height, Box, 3)
754 #else
755 // Test scale to a specified size with all 4 filters.
756 #define TEST_SCALETO(name, width, height)                    \
757   TEST_SCALETO1(DISABLED_, name, width, height, None, 0)     \
758   TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3)   \
759   TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
760   TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
761 #endif
762 
763 TEST_SCALETO(Scale, 1, 1)
764 TEST_SCALETO(Scale, 320, 240)
765 TEST_SCALETO(Scale, 569, 480)
766 TEST_SCALETO(Scale, 640, 360)
767 TEST_SCALETO(Scale, 1280, 720)
768 #ifdef ENABLE_SLOW_TESTS
769 TEST_SCALETO(Scale, 1920, 1080)
770 #endif  // ENABLE_SLOW_TESTS
771 #undef TEST_SCALETO1
772 #undef TEST_SCALETO
773 
774 #ifdef ENABLE_ROW_TESTS
775 #ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_Odd_SSSE3)776 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
777   SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
778   SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
779   SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
780   memset(orig_pixels, 0, sizeof(orig_pixels));
781   memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
782   memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
783 
784   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
785   if (!has_ssse3) {
786     printf("Warning SSSE3 not detected; Skipping test.\n");
787   } else {
788     // TL.
789     orig_pixels[0] = 255u;
790     orig_pixels[1] = 0u;
791     orig_pixels[128 + 0] = 0u;
792     orig_pixels[128 + 1] = 0u;
793     // TR.
794     orig_pixels[2] = 0u;
795     orig_pixels[3] = 100u;
796     orig_pixels[128 + 2] = 0u;
797     orig_pixels[128 + 3] = 0u;
798     // BL.
799     orig_pixels[4] = 0u;
800     orig_pixels[5] = 0u;
801     orig_pixels[128 + 4] = 50u;
802     orig_pixels[128 + 5] = 0u;
803     // BR.
804     orig_pixels[6] = 0u;
805     orig_pixels[7] = 0u;
806     orig_pixels[128 + 6] = 0u;
807     orig_pixels[128 + 7] = 20u;
808     // Odd.
809     orig_pixels[126] = 4u;
810     orig_pixels[127] = 255u;
811     orig_pixels[128 + 126] = 16u;
812     orig_pixels[128 + 127] = 255u;
813 
814     // Test regular half size.
815     ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
816 
817     EXPECT_EQ(64u, dst_pixels_c[0]);
818     EXPECT_EQ(25u, dst_pixels_c[1]);
819     EXPECT_EQ(13u, dst_pixels_c[2]);
820     EXPECT_EQ(5u, dst_pixels_c[3]);
821     EXPECT_EQ(0u, dst_pixels_c[4]);
822     EXPECT_EQ(133u, dst_pixels_c[63]);
823 
824     // Test Odd width version - Last pixel is just 1 horizontal pixel.
825     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
826 
827     EXPECT_EQ(64u, dst_pixels_c[0]);
828     EXPECT_EQ(25u, dst_pixels_c[1]);
829     EXPECT_EQ(13u, dst_pixels_c[2]);
830     EXPECT_EQ(5u, dst_pixels_c[3]);
831     EXPECT_EQ(0u, dst_pixels_c[4]);
832     EXPECT_EQ(10u, dst_pixels_c[63]);
833 
834     // Test one pixel less, should skip the last pixel.
835     memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
836     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
837 
838     EXPECT_EQ(64u, dst_pixels_c[0]);
839     EXPECT_EQ(25u, dst_pixels_c[1]);
840     EXPECT_EQ(13u, dst_pixels_c[2]);
841     EXPECT_EQ(5u, dst_pixels_c[3]);
842     EXPECT_EQ(0u, dst_pixels_c[4]);
843     EXPECT_EQ(0u, dst_pixels_c[63]);
844 
845     // Test regular half size SSSE3.
846     ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
847 
848     EXPECT_EQ(64u, dst_pixels_opt[0]);
849     EXPECT_EQ(25u, dst_pixels_opt[1]);
850     EXPECT_EQ(13u, dst_pixels_opt[2]);
851     EXPECT_EQ(5u, dst_pixels_opt[3]);
852     EXPECT_EQ(0u, dst_pixels_opt[4]);
853     EXPECT_EQ(133u, dst_pixels_opt[63]);
854 
855     // Compare C and SSSE3 match.
856     ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
857     ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
858     for (int i = 0; i < 64; ++i) {
859       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
860     }
861   }
862 }
863 #endif  // HAS_SCALEROWDOWN2_SSSE3
864 
865 extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
866                                     ptrdiff_t src_stride,
867                                     uint16_t* dst,
868                                     int dst_width);
869 extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
870                                    ptrdiff_t src_stride,
871                                    uint16_t* dst,
872                                    int dst_width);
873 extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
874                                  ptrdiff_t src_stride,
875                                  uint16_t* dst,
876                                  int dst_width);
877 
TEST_F(LibYUVScaleTest,TestScaleRowUp2_16)878 TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
879   SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]);  // 2 rows + 1 pixel overrun.
880   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
881   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
882 
883   memset(orig_pixels, 0, sizeof(orig_pixels));
884   memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
885   memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
886 
887   for (int i = 0; i < 640 * 2 + 1; ++i) {
888     orig_pixels[i] = i;
889   }
890   ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
891   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
892 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
893     int has_neon = TestCpuFlag(kCpuHasNEON);
894     if (has_neon) {
895       ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
896     } else {
897       ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
898     }
899 #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
900     int has_mmi = TestCpuFlag(kCpuHasMMI);
901     if (has_mmi) {
902       ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
903     } else {
904       ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
905     }
906 #else
907     ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
908 #endif
909   }
910 
911   for (int i = 0; i < 1280; ++i) {
912     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
913   }
914   EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
915   EXPECT_EQ(dst_pixels_c[1279], 800);
916 }
917 
918 extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
919                                          ptrdiff_t src_stride,
920                                          uint16_t* dst,
921                                          int dst_width);
922 
TEST_F(LibYUVScaleTest,TestScaleRowDown2Box_16)923 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
924   SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
925   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
926   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
927 
928   memset(orig_pixels, 0, sizeof(orig_pixels));
929   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
930   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
931 
932   for (int i = 0; i < 2560 * 2; ++i) {
933     orig_pixels[i] = i;
934   }
935   ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
936   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
937 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
938     int has_neon = TestCpuFlag(kCpuHasNEON);
939     if (has_neon) {
940       ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
941     } else {
942       ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
943     }
944 #else
945     ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
946 #endif
947   }
948 
949   for (int i = 0; i < 1280; ++i) {
950     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
951   }
952 
953   EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
954   EXPECT_EQ(dst_pixels_c[1279], 3839);
955 }
956 #endif  // ENABLE_ROW_TESTS
957 
958 // Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
959 // difference.
960 // 0 = exact.
TestPlaneFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)961 static int TestPlaneFilter_16(int src_width,
962                               int src_height,
963                               int dst_width,
964                               int dst_height,
965                               FilterMode f,
966                               int benchmark_iterations,
967                               int disable_cpu_flags,
968                               int benchmark_cpu_info) {
969   if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
970     return 0;
971   }
972 
973   int i;
974   int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
975   int src_stride_y = Abs(src_width);
976   int dst_y_plane_size = dst_width * dst_height;
977   int dst_stride_y = dst_width;
978 
979   align_buffer_page_end(src_y, src_y_plane_size);
980   align_buffer_page_end(src_y_16, src_y_plane_size * 2);
981   align_buffer_page_end(dst_y_8, dst_y_plane_size);
982   align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
983   uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
984   uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
985 
986   MemRandomize(src_y, src_y_plane_size);
987   memset(dst_y_8, 0, dst_y_plane_size);
988   memset(dst_y_16, 1, dst_y_plane_size * 2);
989 
990   for (i = 0; i < src_y_plane_size; ++i) {
991     p_src_y_16[i] = src_y[i] & 255;
992   }
993 
994   MaskCpuFlags(disable_cpu_flags);  // Disable all CPU optimization.
995   ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
996              dst_width, dst_height, f);
997   MaskCpuFlags(benchmark_cpu_info);  // Enable all CPU optimization.
998 
999   for (i = 0; i < benchmark_iterations; ++i) {
1000     ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
1001                   dst_stride_y, dst_width, dst_height, f);
1002   }
1003 
1004   // Expect an exact match.
1005   int max_diff = 0;
1006   for (i = 0; i < dst_y_plane_size; ++i) {
1007     int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
1008     if (abs_diff > max_diff) {
1009       max_diff = abs_diff;
1010     }
1011   }
1012 
1013   free_aligned_buffer_page_end(dst_y_8);
1014   free_aligned_buffer_page_end(dst_y_16);
1015   free_aligned_buffer_page_end(src_y);
1016   free_aligned_buffer_page_end(src_y_16);
1017 
1018   return max_diff;
1019 }
1020 
1021 // The following adjustments in dimensions ensure the scale factor will be
1022 // exactly achieved.
1023 // 2 is chroma subsample.
1024 #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
1025 #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
1026 
1027 #define TEST_FACTOR1(name, filter, nom, denom, max_diff)                       \
1028   TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) { \
1029     int diff = TestPlaneFilter_16(                                             \
1030         SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom),   \
1031         DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom),   \
1032         kFilter##filter, benchmark_iterations_, disable_cpu_flags_,            \
1033         benchmark_cpu_info_);                                                  \
1034     EXPECT_LE(diff, max_diff);                                                 \
1035   }
1036 
1037 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
1038 // filtering is different fixed point implementations for SSSE3, Neon and C.
1039 #define TEST_FACTOR(name, nom, denom, boxdiff)      \
1040   TEST_FACTOR1(name, None, nom, denom, 0)           \
1041   TEST_FACTOR1(name, Linear, nom, denom, boxdiff)   \
1042   TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
1043   TEST_FACTOR1(name, Box, nom, denom, boxdiff)
1044 
1045 TEST_FACTOR(2, 1, 2, 0)
1046 TEST_FACTOR(4, 1, 4, 0)
1047 // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance.  Takes 90 seconds.
1048 TEST_FACTOR(3by4, 3, 4, 1)
1049 TEST_FACTOR(3by8, 3, 8, 1)
1050 TEST_FACTOR(3, 1, 3, 0)
1051 #undef TEST_FACTOR1
1052 #undef TEST_FACTOR
1053 #undef SX
1054 #undef DX
1055 }  // namespace libyuv
1056