1 // Copyright 2021 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/convolve.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdio>
22 #include <cstring>
23 #include <ostream>
24 #include <string>
25 #include <tuple>
26 
27 #include "absl/strings/match.h"
28 #include "absl/strings/str_format.h"
29 #include "absl/strings/string_view.h"
30 #include "absl/time/clock.h"
31 #include "absl/time/time.h"
32 #include "gtest/gtest.h"
33 #include "src/dsp/constants.h"
34 #include "src/dsp/dsp.h"
35 #include "src/utils/common.h"
36 #include "src/utils/compiler_attributes.h"
37 #include "src/utils/constants.h"
38 #include "src/utils/cpu.h"
39 #include "src/utils/memory.h"
40 #include "tests/block_utils.h"
41 #include "tests/third_party/libvpx/acm_random.h"
42 #include "tests/third_party/libvpx/md5_helper.h"
43 #include "tests/utils.h"
44 
45 namespace libgav1 {
46 namespace dsp {
47 namespace {
48 
49 // The convolve function will access at most (block_height + 7) rows/columns
50 // from the beginning.
51 constexpr int kMaxBlockWidth = kMaxSuperBlockSizeInPixels + kSubPixelTaps;
52 constexpr int kMaxBlockHeight = kMaxSuperBlockSizeInPixels + kSubPixelTaps;
53 
54 // Test all the filters in |kSubPixelFilters|. There are 6 different filters but
55 // filters [4] and [5] are only reached through GetFilterIndex().
56 constexpr int kMinimumViableRuns = 4 * 16;
57 
58 struct ConvolveTestParam {
59   enum BlockSize {
60     kBlockSize2x2,
61     kBlockSize2x4,
62     kBlockSize4x2,
63     kBlockSize4x4,
64     kBlockSize4x8,
65     kBlockSize8x2,
66     kBlockSize8x4,
67     kBlockSize8x8,
68     kBlockSize8x16,
69     kBlockSize16x8,
70     kBlockSize16x16,
71     kBlockSize16x32,
72     kBlockSize32x16,
73     kBlockSize32x32,
74     kBlockSize32x64,
75     kBlockSize64x32,
76     kBlockSize64x64,
77     kBlockSize64x128,
78     kBlockSize128x64,
79     kBlockSize128x128,
80     kNumBlockSizes
81   };
82 
83   static constexpr int kBlockWidth[kNumBlockSizes] = {
84       2, 2, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128};
85   static constexpr int kBlockHeight[kNumBlockSizes] = {
86       2, 4, 2, 4, 8, 2, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64, 128, 64, 128};
87 
ConvolveTestParamlibgav1::dsp::__anon5015b8ab0111::ConvolveTestParam88   explicit ConvolveTestParam(BlockSize block_size)
89       : block_size(block_size),
90         width(kBlockWidth[block_size]),
91         height(kBlockHeight[block_size]) {}
92 
93   BlockSize block_size;
94   int width;
95   int height;
96 };
97 
98 #if !LIBGAV1_CXX17
99 constexpr int ConvolveTestParam::kBlockWidth[kNumBlockSizes];   // static.
100 constexpr int ConvolveTestParam::kBlockHeight[kNumBlockSizes];  // static.
101 #endif
102 
GetConvolveDigest8bpp(int id)103 const char* GetConvolveDigest8bpp(int id) {
104   // Entries containing 'XXXXX...' are skipped. See the test for details.
105   static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 16] = {
106       "ae5977a4ceffbac0cde72a04a43a9d57", "6cf5f791fe0d8dcd3526be3c6b814035",
107       "d905dfcad930aded7718587c05b48aaf", "6baf153feff04cc5b7e87c0bb60a905d",
108       "871ed5a69ca31e6444faa720895949bf", "c9cf1deba08dac5972b3b0a43eff8f98",
109       "68e2f90eaa0ab5da7e6f5776993f7eea", "f1f8282fb33c30eb68c0c315b7a4bc01",
110       "9412064b0eebf8123f23d74147d04dff", "cc08936effe309ab9a4fa1bf7e28e24e",
111       "36cbef36fa21b98df03536c918bf752a", "9d0da6321cf5311ea0bdd41271763030",
112       "55a10165ee8a660d7dddacf7de558cdd", "ac7fc9f9ea7213743fae5a023faaaf08",
113       "077e1b7b355c7ab3ca40230ee8efd8ea", "7a3e8de2a1caae206cf3e51a86dfd15a",
114       "1ddf9020f18fa7883355cf8c0881186a", "2377dd167ef2707978bed6f10ffd4e76",
115       "f918e0e4422967c6a7e47298135c7ae9", "b2264e129636368b5496760b39e64b7a",
116       "1168251e6261e2ff1fa69a93226dbd76", "4821befdf63f8c6da6440afeb57f320f",
117       "c30fc44d83821141e84cc4793e127301", "a8293b933d9f2e5d7f922ea40111d643",
118       "354a54861a94e8b027afd9931e61f997", "b384e9e3d81f9f4f9024028fbe451d8b",
119       "eeeb8589c1b31cbb565154736ca939ec", "f49dab626ddd977ed171f79295c24935",
120       "78d2f27e0d4708cb16856d7d40dc16fb", "9d2393ea156a1c2083f5b4207793064b",
121       "a9c62745b95c66fa497a524886af57e2", "2c614ec4463386ec075a0f1dbb587933",
122       "7a8856480d752153370240b066b90f6a", "beaef1dbffadc701fccb7c18a03e3a41",
123       "72b1e700c949d06eaf62d664dafdb5b6", "684f5c3a25a080edaf79add6e9137a8e",
124       "3be970f49e4288988818b087201d54da", "d2b9dba2968894a414756bb510ac389a",
125       "9a3215eb97aedbbddd76c7440837d040", "4e317feac6da46addf0e8b9d8d54304b",
126       "d2f5ca2b7958c332a3fb771f66da01f0", "7aec92c3b65e456b64ae285c12b03b0d",
127       "f72a99ad63f6a88c23724e898b705d21", "07a1f07f114c4a38ba08d2f44e1e1132",
128       "26b9de95edb45b31ac5aa19825831c7a", "4e4677a0623d44237eb8d6a622cdc526",
129       "c1b836a6ce023663b90db0e320389414", "5befcf222152ebc8d779fcc10b95320a",
130       "62adf407fc27d8682ced4dd7b55af14e", "35be0786a072bf2f1286989261bf6580",
131       "90562fc42dc5d879ae74c4909c1dec30", "a1427352f9e413975a0949e2b300c657",
132       "bcbc418bc2beb243e463851cd95335a9", "cb8fedcbecee3947358dc61f95e56530",
133       "0d0154a7d573685285a83a4cf201ac57", "b14bd8068f108905682b83cc15778065",
134       "c96c867d998473197dde9b587be14e3a", "f596c63c7b14cada0174e17124c83942",
135       "eb2822ad8204ed4ecbf0f30fcb210498", "538ce869ffd23b6963e61badfab7712b",
136       "6bbcc075f8b768a02cdc9149f150326d", "4ae70d9db2ec36885394db7d59bdd4f7",
137       "5fee162fe52c11c823db4d5ede370654", "9365186c59ef66d9def40f437022ad93",
138       "0f95fb0276c9c7910937fbdf75f2811d", "356d4003477283e157c8d2b5a79d913c",
139       "b355dab2dbb6f5869018563eece22862", "cf6ff8c43d8059cea6090a23ab66a0ef",
140       "a336f8b7bcf188840ca65c0d0e66518a", "de953f03895923359c6a719e6a537b89",
141       "8463ade9347ed602663e2cec5c4c3fe6", "392de11ffcd5c2ecf3db3480ee135340",
142       "bddd31e3e852712e6244b616622af83d", "30a36245c40d978fc8976b442a8600c3",
143       "93aa662b988b8502e5ea95659eafde59", "70440ba9ee7f9d16d297dbb49e54a56e",
144       "1eb2be4c05b50e427e29c72fa566bff5", "52c0980bae63e8459e82eee7d8af2334",
145       "75e57104d6058cd2bce1d3d8142d273d", "b4c735269ade44419169adbd852d5ddc",
146       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
147       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a7305087fae23de53d21a6909009ff69",
148       "8dcce009395264379c1a51239f4bb22c", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
149       "8dcce009395264379c1a51239f4bb22c", "d90a69e7bae8aa46ed0e1e5f911d7a07",
150       "6ab4dc87be03be1dcc5d956ca819d938", "6ab4dc87be03be1dcc5d956ca819d938",
151       "8f2afdb2f03cd04ffacd421b958caaa0", "710ccecc103033088d898a2b924551fb",
152       "710ccecc103033088d898a2b924551fb", "a4093e3e5902dd659407ce6471635a4e",
153       "375d7f5358d7a088a498b8b3aaecc0d5", "375d7f5358d7a088a498b8b3aaecc0d5",
154       "08867ea5cc38c705ec52af821bc4736a", "2afb540e8063f58d1b03896486c5e89b",
155       "2afb540e8063f58d1b03896486c5e89b", "6ce47b11d2e60c5d183c84ce9f2e46cc",
156       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
157       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a5a1ac658d7ce4a846a32b9fcfaa3475",
158       "2370f4e4a83edf91b7f504bbe4b00e90", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
159       "ae5464066a049622a7a264cdf9394b55", "45368b6db3d1fee739a64b0bc823ea9c",
160       "8dff0f28192d9f8c0bf7fb5405719dd8", "632738ef3ff3021cff45045c41978849",
161       "f7ec43384037e8d6c618e0df826ec029", "a6bc648197781a2dc99c487e66464320",
162       "1112ebd509007154c72c5a485b220b62", "9714c4ce636b6fb0ad05cba246d48c76",
163       "2c93dde8884f09fb5bb5ad6d95cde86d", "a49e6160b5d1b56bc2046963101cd606",
164       "7f084953976111e9f65b57876e7552b1", "0846ec82555b66197c5c45b08240fbcc",
165       "ca7471c126ccd22189e874f0a6e41960", "0802b6318fbd0969a33de8fdfcd07f10",
166       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
167       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "3b1ceebf0579fcbbfd6136938c595b91",
168       "ecafabcad1045f15d31ce2f3b13132f2", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
169       "5f211eba020e256a5781b203c5aa1d2e", "3b04497634364dd2cd3f2482b5d4b32f",
170       "a8ac7b5dc65ffb758b0643508a0e744e", "561ed8be43c221a561f8885a0d74c7ef",
171       "8159619fc234598c8c75154d80021fd4", "8f43645dce92cf7594aa4822aa53b17d",
172       "b6ccddb7dfa4eddc87b4eff08b5a3195", "b4e605327b28db573d88844a1a09db8d",
173       "15b00a15d1cc6cc96ca85d00b167e4dd", "7bf911888c11a9fefd604b8b9c82e9a1",
174       "bfb69b4d7d4aed73cfa75a0f55b66440", "034d1d62581bd0d840c4cf1e28227931",
175       "8cba849640e9e2859d509bc81ca94acd", "bc79acf2a0fe419194cdb4529bc7dcc8",
176       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
177       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "3bfad931bce82335219e0e29c15f2b21",
178       "68a701313d2247d2b32636ebc1f2a008", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
179       "05afe1f40d37a45a97a5e0aadd5066fb", "9e1f0e0bddb58d15d0925eeaede9b84c",
180       "03313cdaa593a1a7b4869010dcc7b241", "88a50d2b4107ee5b5074b2520183f8ac",
181       "ac50ea9f7306da95a5092709442989cf", "739b17591437edffd36799237b962658",
182       "b8a7eb7dd9c216e240517edfc6489397", "75b755f199dbf4a0e5ebbb86c2bd871d",
183       "31b0017ba1110e3d70b020901bc15564", "0a1aa8f5ecfd11ddba080af0051c576a",
184       "536181ee90de883cc383787aec089221", "29f82b0f3e4113944bd28aacd9b8489a",
185       "ee3e76371240d1f1ff811cea6a7d4f63", "17a20dbbf09feae557d40aa5818fbe76",
186       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
187       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "6baf153feff04cc5b7e87c0bb60a905d",
188       "871ed5a69ca31e6444faa720895949bf", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
189       "68e2f90eaa0ab5da7e6f5776993f7eea", "f1f8282fb33c30eb68c0c315b7a4bc01",
190       "9412064b0eebf8123f23d74147d04dff", "cc08936effe309ab9a4fa1bf7e28e24e",
191       "36cbef36fa21b98df03536c918bf752a", "9d0da6321cf5311ea0bdd41271763030",
192       "55a10165ee8a660d7dddacf7de558cdd", "ac7fc9f9ea7213743fae5a023faaaf08",
193       "077e1b7b355c7ab3ca40230ee8efd8ea", "7a3e8de2a1caae206cf3e51a86dfd15a",
194       "1ddf9020f18fa7883355cf8c0881186a", "2377dd167ef2707978bed6f10ffd4e76",
195       "f918e0e4422967c6a7e47298135c7ae9", "b2264e129636368b5496760b39e64b7a",
196       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
197       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "4cfad2c437084a93ea76913e21c2dd89",
198       "d372f0c17bce98855d6d59fbee814c3d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
199       "d99ffd2579eb781c30bc0df7b76ad61e", "4e139e57cbb049a0f4ef816adc48d026",
200       "be53b2507048e7ff50226d15c0b28865", "b73f3c1a10405de89d1f9e812ff73b5a",
201       "c7d51b1f2df49ab83962257e8a5934e5", "159e443d79cc59b11ca4a80aa7aa09be",
202       "6ef14b14882e1465b0482b0e0b16d8ce", "22a8d287b425c870f40c64a50f91ce54",
203       "f1d96db5a2e0a2160df38bd96d28d19b", "637d1e5221422dfe9a6dbcfd7f62ebdd",
204       "f275af4f1f350ffaaf650310cb5dddec", "f81c4d6b001a14584528880fa6988a87",
205       "a5a2f9c2e7759d8a3dec1bc4b56be587", "2317c57ab69a36eb3bf278cf8a8795a3",
206       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
207       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "1a0bdfc96a3b9fd904e658f238ab1076",
208       "56d16e54afe205e97527902770e71c71", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
209       "1f7b5b8282ff3cf4d8e8c52d80ef5b4d", "79e9e260a2028c5fe320005c272064b9",
210       "2418ebcdf85551b9ae6e3725f04aae6d", "98bdf907ebacacb734c9eef1ee727c6e",
211       "4dd5672d53c8f359e8f80badaa843dfc", "a1bef519bbf07138e2eec5a91694de46",
212       "df1cb51fe1a937cd7834e973dc5cb814", "317fe65abf81ef3ea07976ef8667baeb",
213       "2da29da97806ae0ee300c5e69c35a4aa", "555475f5d1685638169ab904447e4f13",
214       "b3e3a6234e8045e6182cf90a09f767b2", "849dfeca59074525dea59681a7f88ab4",
215       "39a68af80be11e1682b6f3c4ede33530", "b22d765af176d87e7d3048b4b89b86ad",
216       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
217       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "b8a710baa6a9fc784909671d450ecd99",
218       "f9e6a56382d8d12da676d6631bb6ef75", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
219       "3bf8e11e18527b16f0d7c0361d74a52d", "b9ff54c6f1e3b41fc7fc0f3fa0e75cf2",
220       "06ef1504f31af5f173d3317866ca57cb", "635e8ee11cf04d73598549234ad732a0",
221       "fab693410d59ee88aa2895527efc31ac", "3041eb26c23a63a587fbec623919e2d2",
222       "c61d99d5daf575664fb7ad64976f4b03", "822f6c4eb5db760468d822b21f48d94d",
223       "3f6fcb9fae3666e085b9e29002a802fc", "d9b9fecd195736a6049c528d4cb886b5",
224       "fed17fc391e6c3db4aa14ea1d6596c87", "d0d3482d981989e117cbb32fc4550267",
225       "39561688bf6680054edbfae6035316ce", "087c5992ca6f829e1ba4ba5332d67947",
226   };
227   assert(id >= 0);
228   assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
229   return kDigest[id];
230 }
231 
GetConvolveScaleDigest8bpp(int id)232 const char* GetConvolveScaleDigest8bpp(int id) {
233   // Entries containing 'XXXXX...' are skipped. See the test for details.
234   static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 2] = {
235       "0291a23f2ac4c40b5d8e957e63769904", "1d48447857472d6455af10d5526f6827",
236       "409b2278d6d372248f1891ca0dd12760", "9e416606a3f82fe5bb3f7182e4f42c2d",
237       "e126563f859ddd5c5ffde6f641168fad", "9bad4f1b7e1865f814b6fd5620816ebd",
238       "50e5e5a57185477cb2af83490c33b47c", "3d2fb301c61d7fbd0e21ac263f7ac552",
239       "5920032c6432c80c6e5e61b684018d13", "07ada64d24339488cdce492e6e0c6b0d",
240       "aaf1589aff6d062a87c627ab9ba20e3e", "91adf91bb24d2c4ea3f882bdf7396e33",
241       "1d17a932a68bb1f199f709e7725fe44b", "07716c63afda034cb386511ea25a63b5",
242       "cca17ef3324c41d189e674a059ef1255", "37d17e70619823a606c0b5f74bf2e33b",
243       "ba8ed5474c187c8e8d7f82a6a29ee860", "27663f037973ebe82ec10252a4d91299",
244       "24c27e187e8d5a2bbfa0fef9046d3eb0", "9854fdc91a48e3bd4639edcc940e5c09",
245       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
246       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a71907c60a9f1f81972a2859ae54a805",
247       "817bc3bf0c77abc4186eac39f2320184", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
248       "4e7182a8b226982e2678abcf5f83325d", "50cef7c6e57544135f102226bb95bed9",
249       "225e054dbcfff05b1c8b0792c731449e", "16eb63f03839159f3af0e08be857170f",
250       "c8e5d111a2e3f4487330a8bd893cb894", "4fd99eaf9c160442aab35b9bdc5d275b",
251       "8b0f61bfb30747d4c9215618ac42557c", "1df78022da202cefb9a8100b114152d9",
252       "378466e1eda63dbc03565b78af8e723f", "28ea721411fbf5fc805035be9a384140",
253       "4fed5d4163a3bfcc6726a42f20410b0a", "55abfca0c820771bd926e4b94f66a499",
254       "6c8b8ef0a78859c768e629e1decc0019", "d0ead286b5ba3841d24dd114efbfef0a",
255   };
256   assert(id >= 0);
257   assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
258   return kDigest[id];
259 }
260 
261 #if LIBGAV1_MAX_BITDEPTH >= 10
GetConvolveDigest10bpp(int id)262 const char* GetConvolveDigest10bpp(int id) {
263   // Entries containing 'XXXXX...' are skipped. See the test for details.
264   static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 16] = {
265       "b1b6903d60501c7bc11e5285beb26a52", "a7855ed75772d7fa815978a202bbcd9f",
266       "bde291a4e8087c085fe8b3632f4d7351", "238980eebc9e63ae3eea2771c7a70f12",
267       "0eac13431bd7d8a573318408a72246d5", "d05a237ed7a9ca877256b71555b1b8e4",
268       "73438155feb62595e3e406921102d748", "5871e0e88a776840d619670fbf107858",
269       "1c6376ce55c9ee9e35d432edb1ffb3b7", "d675e0195c9feca956e637f3f1959f40",
270       "b5681673903ade13d69e295f82fdd009", "3c43020105ae93a301404b4cd6238654",
271       "dd2c5880a94ed3758bfea0b0e8c78286", "4ebb1a7b25a39d8b9868ec8a1243103f",
272       "d34ec07845cd8523651e5f5112984a14", "2ce55308d873f4cd244f16da2b06e06e",
273       "a4bb5d5ff4b25f391265b5231049a09a", "c9106e0c820b03bcdde3aa94efc11a3e",
274       "7ec2eae9e118506da8b33440b399511a", "78de867c8ee947ed6d29055747f26949",
275       "a693b4bd0334a3b98d45e67d3985bb63", "156de3172d9acf3c7f251cd7a18ad461",
276       "e545b8a3ff958f8363c7968cbae96732", "7842b2047356c1417d9d88219707f1a1",
277       "1a487c658d684314d91bb6d961a94672", "94b3e5bcd6b849b66a4571ec3d23f9be",
278       "0635a296be01b7e641de98ee27c33cd2", "82dc120bf8c2043bc5eee81007309ebf",
279       "58c826cad3c14cdf26a649265758c58b", "f166254037c0dfb140f54cd7b08bddfe",
280       "74ab206f14ac5f62653cd3dd71a7916d", "5621caef7cc1d6522903290ccc5c2cb8",
281       "78ec6cf42cce4b1feb65e076c78ca241", "42188e2dbb4e02cd353552ea147ad03f",
282       "f9813870fc27941a7c00a0443d7c2fe7", "20b14a6b5af7aa356963bcaaf23d230d",
283       "9c9c41435697f75fa118b6d6464ee7cb", "38816245ed832ba313fefafcbed1e5c8",
284       "5d34137cc8ddba75347b0fa1d0a91791", "465dcb046a0449b9dfb3e0b297aa3863",
285       "3e787534dff83c22b3033750e448865a", "4c91f676a054d582bcae1ca9adb87a31",
286       "eab5894046a99ad0a1a12c91b0f37bd7", "765b4cfbfc1a4988878c412d53bcb597",
287       "bc63b29ec78c1efec5543885a45bb822", "91d6bdbc62d4bb80c9b371d9704e3c9e",
288       "cecd57396a0033456408f3f3554c6912", "5b37f94ef136c1eb9a6181c19491459c",
289       "716ba3a25b454e44b46caa42622c128c", "9076f58c4ab20f2f06d701a6b53b1c4f",
290       "d3212ab3922f147c3cf126c3b1aa17f6", "b55fea77f0e14a8bf8b6562b766fe91f",
291       "59b578268ff26a1e21c5b4273f73f852", "16761e7c8ba2645718153bed83ae78f6",
292       "a9e9805769fe1baf5c7933793ccca0d8", "553a2c24939dff18ec5833c77f556cfb",
293       "5c1ec75a160c444fa90abf106fa1140e", "2266840f11ac4c066d941ec473b1a54f",
294       "9e194755b2a37b615a517d5f8746dfbb", "bbf86f8174334f0b8d869fd8d58bf92d",
295       "fd1da8d197cb385f7917cd296d67afb9", "a984202c527b757337c605443f376915",
296       "c347f4a58fd784c5e88c1a23e4ff15d2", "29cbaadbff9adf4a3d49bd9900a9dd0b",
297       "c5997b802a6ba1cf5ba1057ddc5baa7e", "4f750f6375524311d260306deb233861",
298       "59f33727e5beeb783a057770bec7b4cd", "0654d72f22306b28d9ae42515845240c",
299       "6c9d7d9e6ef81d76e775a85c53abe209", "a35f435ccc67717a49251a07e62ae204",
300       "c5325015cb0b7c42839ac4aa21803fa0", "f81f31f1585c0f70438c09e829416f20",
301       "ab10b22fb8dd8199040745565b28595d", "0d928d6111f86c60ccefc6c6604d5659",
302       "4ed1a6200912995d4f571bdb7822aa83", "92e31a45513582f386dc9c22a57bbbbd",
303       "6dbf310a9c8d85f76306d6a35545f8af", "80fce29dc82d5857c1ed5ef2aea16835",
304       "14f2c5b9d2cd621c178a39f1ec0c38eb", "da54cfb4530841bda29966cfa05f4879",
305       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
306       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "7e3fa9c03bc3dfbdeb67f24c5d9a49cd",
307       "f3454ca93cbb0c8c09b0695d90a0df3d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
308       "f3454ca93cbb0c8c09b0695d90a0df3d", "1a77d2af4d2b6cf8737cfbcacacdc4e4",
309       "89bec831efea2f88129dedcad06bb3fa", "89bec831efea2f88129dedcad06bb3fa",
310       "dead0fe4030085c22e92d16bb110de9d", "306a2f5dfd675df4ed9af44fd5cac8c0",
311       "306a2f5dfd675df4ed9af44fd5cac8c0", "9d01c946a12f5ef9d9cebd9816e06014",
312       "768f63912e43148c13688d7f23281531", "768f63912e43148c13688d7f23281531",
313       "2e7927158e7b8e40e7269fc909fb584b", "123028e18c2bfb334e34adb5a4f67de4",
314       "123028e18c2bfb334e34adb5a4f67de4", "2c979c2bddef79a760e72a802f83cc76",
315       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
316       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "da1a6ff2be03ec8acde4cb1cd519a6f0",
317       "a4ca37cb869a0dbd1c4a2dcc449a8f31", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
318       "1b5d1d4c7be8d5ec00a42a49eecf918f", "98b77e88b0784baaea64c98c8707fe46",
319       "8148788044522edc3c497e1017efe2ce", "acf60abeda98bbea161139b915317423",
320       "262c96b1f2c4f85c86c0e9c77fedff1e", "f35a3d13516440f9168076d9b07c9e98",
321       "13782526fc2726100cb3cf375b3150ed", "13c07441b47b0c1ed80f015ac302d220",
322       "02880fde51ac991ad18d8986f4e5145c", "aa25073115bad49432953254e7dce0bc",
323       "69e3361b7199e10e75685b90fb0df623", "2f8ab35f6e7030e82ca922a68b29af4a",
324       "452f91b01833c57db4e909575a029ff6", "1fabf0655bedb671e4d7287fec8119ba",
325       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
326       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "d54206c34785cc3d8a06c2ceac46378c",
327       "85a11892ed884e3e74968435f6b16e64", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
328       "16434230d24b9522ae2680e8c37e1b95", "963dea92f3efbb99137d1de9c56728d3",
329       "b72fb6a9a073c2fe65013af1842dc9b0", "86fa0c299737eb499cbcdce94abe2d33",
330       "6b80af04470b83673d98f46925e678a5", "65baca6167fe5249f7a839ce5b2fd591",
331       "e47ded6c0eec1d5baadd02aff172f2b1", "c0950e609f278efb7050d319a9756bb3",
332       "9051290279237f9fb1389989b142d2dd", "34cdc1be291c95981c98812c5c343a15",
333       "5b64a6911cb7c3d60bb8f961ed9782a2", "7133de9d03a4b07716a12226b5e493e8",
334       "3594eff52d5ed875bd9655ddbf106fae", "90d7e13aa2f9a064493ff2b3b5b12109",
335       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
336       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "b1f26ee13df2e14a757416ba8a682278",
337       "996b6c166f9ed25bd07ea6acdf7597ff", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
338       "34895d4c69a6c3303693e6f431bcd5d8", "c9497b00cb1bc3363dd126ffdddadc8e",
339       "1e461869bb2ee9b6069c5e52cf817291", "8d7f1d7ea6a0dcc922ad5d2e77bc74dd",
340       "138855d9bf0ccd0c62ac14c7bff4fd37", "64035142864914d05a48ef8e013631d0",
341       "205904fa3c644433b46e01c11dd2fe40", "291425aaf8206b20e88db8ebf3cf7e7f",
342       "cb6238b8eb6b72980958e6fcceb2f2eb", "626321a6dfac542d0fc70321fac13ff3",
343       "1c6fda7501e0f8bdad972f7857cd9354", "4fd485dadcb570e5a0a5addaf9ba84da",
344       "d3f140aea9e8eabf4e1e5190e0148288", "e4938219593bbed5ae638a93f2f4a580",
345       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
346       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "238980eebc9e63ae3eea2771c7a70f12",
347       "0eac13431bd7d8a573318408a72246d5", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
348       "73438155feb62595e3e406921102d748", "5871e0e88a776840d619670fbf107858",
349       "1c6376ce55c9ee9e35d432edb1ffb3b7", "d675e0195c9feca956e637f3f1959f40",
350       "b5681673903ade13d69e295f82fdd009", "3c43020105ae93a301404b4cd6238654",
351       "dd2c5880a94ed3758bfea0b0e8c78286", "4ebb1a7b25a39d8b9868ec8a1243103f",
352       "d34ec07845cd8523651e5f5112984a14", "2ce55308d873f4cd244f16da2b06e06e",
353       "a4bb5d5ff4b25f391265b5231049a09a", "c9106e0c820b03bcdde3aa94efc11a3e",
354       "7ec2eae9e118506da8b33440b399511a", "78de867c8ee947ed6d29055747f26949",
355       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
356       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "e552466a4e7ff187251b8914b084d404",
357       "981b7c44b6f7b7ac2acf0cc4096e6bf4", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
358       "a4c75372af36162831cb872e24e1088c", "497271227a70a72f9ad25b415d41563f",
359       "c48bd7e11ec44ba7b2bc8b6a04592439", "0960a9af91250e9faa1eaac32227bf6f",
360       "746c2e0f96ae2246d534d67102be068c", "d6f6db079da9b8909a153c07cc9d0e63",
361       "7c8928a0d769f4264d195f39cb68a772", "db645c96fc8be04015e0eb538afec9ae",
362       "946af3a8f5362def5f4e27cb0fd4e754", "7ad78dfe7bbedf696dd58d9ad01bcfba",
363       "f0fd9c09d454e4ce918faa97e9ac10be", "af6ae5c0eb28417bd251184baf2eaba7",
364       "866f8df540dd3b58ab1339314d139cbd", "72803589b453a29501540aeddc23e6f4",
365       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
366       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "aba5d5ef5e96fe418e65d20e506ea834",
367       "d70bf16e2a31e90b7b3cdeaef1494cf9", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
368       "6df80bb7f264f4f285d09a4d61533fae", "c8831118d1004a7cca015a4fca140018",
369       "b7f82c140369067c105c7967c75b6f9e", "130f47aae365aabfec4360fa5b5ff554",
370       "92483ed631de21b685ffe6ccadbbec8f", "cbb6ab31547df6b91cfb48630fdffb48",
371       "1eea5e8a24d6aa11778eb3e5e5e9c9f2", "9e193b6b28ce798c44c744efde19eee9",
372       "885c384d90aaa34acd8303958033c252", "8110ed10e7234851dff3c7e4a51108a2",
373       "6fb9383302eb7e7a13387464d2634e03", "864d51fcc737bc73a3f588b67515039a",
374       "2ecb7890f00234bcb28c1d969f489012", "c4793d431dbf2d88826bb440bf027512",
375       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
376       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "972aeba65e8a6d20dd0f95279be2aa75",
377       "34165457282e2af2e9b3f5840e4dec5d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
378       "b8c5582b9bbb789c45471f93be83b41f", "257bf5467db570974d7cf2356bacf116",
379       "5255dded79f56b0078543b5a1814a668", "ef745100f5f34c8ff841b2b0b57eb33f",
380       "edae8ed67286ca6a31573a541b3deb6f", "01adcd8bf15fbf70df47fbf3a953aa14",
381       "ba539808a8501609ce052a1562a62b25", "ac8e6391200cec2abdebb00744a2ba82",
382       "54b17120f7d71ddb4d70590ecd231cc1", "f6e36446a97611a4db4425df926974b2",
383       "a82f4080699300b659bbe1b5c4463147", "ecedb178f7cad3dc1b921eca67f9efb6",
384       "0609ca0ff3ca90069e8b48829b4b0891", "839e86c681e97359f7819c766000dd1c",
385   };
386   assert(id >= 0);
387   assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
388   return kDigest[id];
389 }
390 
GetConvolveScaleDigest10bpp(int id)391 const char* GetConvolveScaleDigest10bpp(int id) {
392   // Entries containing 'XXXXX...' are skipped. See the test for details.
393   static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 2] = {
394       "27e21eb31687f9fbd0a66865fa8d7c8a", "9bff726c8e1d0998451a3b9cf2b3d8c8",
395       "661d74cfef36f12ed8d9b4c3ccb7fe0d", "5fc365fd1fcc9599dd97a885ba0c2eec",
396       "acdba2c82a6268e3c0ae8fc32be1b41f", "a5db60bbeaf56ab030ed21c42d553cf3",
397       "1228bb633f9fd63fdb998b775ca79e98", "07812c97f9f43a2a8ae07329dc488699",
398       "903525fb782119c4dfaf61b98a310c9f", "f38b51cef38b929e317861ccbc73ecd8",
399       "b78b05138e1d5fbf089144c42ce03058", "f2e227664cbf2d821b242a34fcbc9835",
400       "cb992dac70591e7d3663588ae13b9adc", "f2292d33657d939fa85ea5bacdfe39a3",
401       "7049dc742d6d8ad6f5d4309968ff281c", "e4beebde1ac335a4d92e4af94653a2ce",
402       "cc77875f98f54b9b26b5f7d9fcbc828d", "fb623f7b9e1ffcf2ae361599728a5589",
403       "c33847e47a7eda214734084640818df9", "ab3e1aec3d720c0c89c46a8d5b161b44",
404       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
405       "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "efe4de861dcf0f7458b6208cae7e3584",
406       "814751c55fa84f0fed94ff15fc30fc24", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
407       "31a63fe47297102937acbe7a328588b7", "b804a0a24633243f7da48d7a5f51c0bf",
408       "cb492672b005fc378cccc8c03003cd4a", "1d18732bcf2ea487e84579489cc59a22",
409       "457c4b3ec38a8d6c210584ade1a9fae2", "a3afdd468e6a5238a3dbd2cc21c11c9e",
410       "6ff8a16f21d6e8a9741dacf0734ae563", "3ffa29ef7e54e51f6849c9a3d3c79d03",
411       "af89899b083cf269ac1bd988aeb15b15", "3365d8411c11081fb228436238b9a671",
412       "3ba56d30f5f81d7098f356635a58b9af", "b3013776900c6520bd30f868e8c963b6",
413       "81febaa7342692483040f500ba2e5e2b", "4a51ff1d9a4a68687d590b41aa7835a3",
414   };
415   assert(id >= 0);
416   assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
417   return kDigest[id];
418 }
419 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
420 
421 struct ConvolveTypeParam {
ConvolveTypeParamlibgav1::dsp::__anon5015b8ab0111::ConvolveTypeParam422   ConvolveTypeParam(bool is_intra_block_copy, bool is_compound,
423                     bool has_vertical_filter, bool has_horizontal_filter)
424       : is_intra_block_copy(is_intra_block_copy),
425         is_compound(is_compound),
426         has_vertical_filter(has_vertical_filter),
427         has_horizontal_filter(has_horizontal_filter) {}
428   bool is_intra_block_copy;
429   bool is_compound;
430   bool has_vertical_filter;
431   bool has_horizontal_filter;
432 };
433 
operator <<(std::ostream & os,const ConvolveTestParam & param)434 std::ostream& operator<<(std::ostream& os, const ConvolveTestParam& param) {
435   return os << "BlockSize" << param.width << "x" << param.height;
436 }
437 
operator <<(std::ostream & os,const ConvolveTypeParam & param)438 std::ostream& operator<<(std::ostream& os, const ConvolveTypeParam& param) {
439   return os << "is_intra_block_copy: " << param.is_intra_block_copy
440             << ", is_compound: " << param.is_compound
441             << ", has_(vertical/horizontal)_filter: "
442             << param.has_vertical_filter << "/" << param.has_horizontal_filter;
443 }
444 
445 //------------------------------------------------------------------------------
446 template <int bitdepth, typename Pixel>
447 class ConvolveTest : public testing::TestWithParam<
448                          std::tuple<ConvolveTypeParam, ConvolveTestParam>> {
449  public:
450   ConvolveTest() = default;
451   ~ConvolveTest() override = default;
452 
SetUp()453   void SetUp() override {
454     ConvolveInit_C();
455 
456     const Dsp* const dsp = GetDspTable(bitdepth);
457     ASSERT_NE(dsp, nullptr);
458     GetConvolveFunc(dsp, &base_convolve_func_);
459 
460     const testing::TestInfo* const test_info =
461         testing::UnitTest::GetInstance()->current_test_info();
462     const absl::string_view test_case = test_info->test_suite_name();
463     if (absl::StartsWith(test_case, "C/")) {
464       base_convolve_func_ = nullptr;
465     } else if (absl::StartsWith(test_case, "SSE41/")) {
466       if ((GetCpuInfo() & kSSE4_1) != 0) {
467         ConvolveInit_SSE4_1();
468       }
469     } else if (absl::StartsWith(test_case, "AVX2/")) {
470       if ((GetCpuInfo() & kAVX2) != 0) {
471         ConvolveInit_AVX2();
472       }
473     } else if (absl::StartsWith(test_case, "NEON/")) {
474       ConvolveInit_NEON();
475 #if LIBGAV1_MAX_BITDEPTH >= 10
476       ConvolveInit10bpp_NEON();
477 #endif
478     } else {
479       FAIL() << "Unrecognized architecture prefix in test case name: "
480              << test_case;
481     }
482 
483     GetConvolveFunc(dsp, &cur_convolve_func_);
484 
485     // Skip functions that have not been specialized for this particular
486     // architecture.
487     if (cur_convolve_func_ == base_convolve_func_) {
488       cur_convolve_func_ = nullptr;
489     }
490   }
491 
492  protected:
GetDigestId() const493   int GetDigestId() const {
494     int id = param_.block_size;
495     id += param_.kNumBlockSizes *
496           static_cast<int>(type_param_.has_horizontal_filter);
497     id += 2 * param_.kNumBlockSizes *
498           static_cast<int>(type_param_.has_vertical_filter);
499     id += 4 * param_.kNumBlockSizes * static_cast<int>(type_param_.is_compound);
500     id += 8 * param_.kNumBlockSizes *
501           static_cast<int>(type_param_.is_intra_block_copy);
502     return id;
503   }
504 
505   void GetConvolveFunc(const Dsp* dsp, ConvolveFunc* func);
506   void SetInputData(bool use_fixed_values, int value);
507   void Check(bool use_fixed_values, const Pixel* src, const Pixel* dest,
508              libvpx_test::MD5* md5_digest);
509   void Check16Bit(bool use_fixed_values, const uint16_t* src,
510                   const uint16_t* dest, libvpx_test::MD5* md5_digest);
511   // |num_runs| covers the categories of filters (6) and the number of filters
512   // under each category (16).
513   void Test(bool use_fixed_values, int value,
514             int num_runs = kMinimumViableRuns);
515 
516   const ConvolveTypeParam type_param_ = std::get<0>(GetParam());
517   const ConvolveTestParam param_ = std::get<1>(GetParam());
518 
519  private:
520   ConvolveFunc base_convolve_func_;
521   ConvolveFunc cur_convolve_func_;
522   // Convolve filters are 7-tap, which need 3 pixels
523   // (kRestorationHorizontalBorder) padding.
524   Pixel source_[kMaxBlockHeight * kMaxBlockWidth] = {};
525   uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
526   uint16_t dest_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
527   Pixel dest_clipped_[kMaxBlockHeight * kMaxBlockWidth] = {};
528 
529   const int source_stride_ = kMaxBlockWidth;
530   const int source_height_ = kMaxBlockHeight;
531 };
532 
533 template <int bitdepth, typename Pixel>
GetConvolveFunc(const Dsp * const dsp,ConvolveFunc * func)534 void ConvolveTest<bitdepth, Pixel>::GetConvolveFunc(const Dsp* const dsp,
535                                                     ConvolveFunc* func) {
536   *func =
537       dsp->convolve[type_param_.is_intra_block_copy][type_param_.is_compound]
538                    [type_param_.has_vertical_filter]
539                    [type_param_.has_horizontal_filter];
540 }
541 
542 template <int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)543 void ConvolveTest<bitdepth, Pixel>::SetInputData(bool use_fixed_values,
544                                                  int value) {
545   if (use_fixed_values) {
546     std::fill(source_, source_ + source_height_ * source_stride_, value);
547   } else {
548     const int offset =
549         kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
550     const int mask = (1 << bitdepth) - 1;
551     libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
552     const int height = param_.height;
553     const int width = param_.width;
554     for (int y = 0; y < height; ++y) {
555       for (int x = 0; x < width; ++x) {
556         source_[y * source_stride_ + x + offset] = rnd.Rand16() & mask;
557       }
558     }
559     // Copy border pixels to the left and right borders.
560     for (int y = 0; y < height; ++y) {
561       Memset(&source_[(y + kConvolveBorderLeftTop) * source_stride_],
562              source_[y * source_stride_ + offset], kConvolveBorderLeftTop);
563       Memset(&source_[y * source_stride_ + offset + width],
564              source_[y * source_stride_ + offset + width - 1],
565              kConvolveBorderLeftTop);
566     }
567     // Copy border pixels to the top and bottom borders.
568     for (int y = 0; y < kConvolveBorderLeftTop; ++y) {
569       memcpy(&source_[y * source_stride_],
570              &source_[kConvolveBorderLeftTop * source_stride_],
571              source_stride_ * sizeof(Pixel));
572       memcpy(&source_[(y + kConvolveBorderLeftTop + height) * source_stride_],
573              &source_[(kConvolveBorderLeftTop + height - 1) * source_stride_],
574              source_stride_ * sizeof(Pixel));
575     }
576   }
577 }
578 
579 template <int bitdepth, typename Pixel>
Check(bool use_fixed_values,const Pixel * src,const Pixel * dest,libvpx_test::MD5 * md5_digest)580 void ConvolveTest<bitdepth, Pixel>::Check(bool use_fixed_values,
581                                           const Pixel* src, const Pixel* dest,
582                                           libvpx_test::MD5* md5_digest) {
583   if (use_fixed_values) {
584     // For fixed values, input and output are identical.
585     const bool success =
586         test_utils::CompareBlocks(src, dest, param_.width, param_.height,
587                                   kMaxBlockWidth, kMaxBlockWidth, false, false);
588     EXPECT_TRUE(success);
589   } else {
590     // For random input, compare md5.
591     const int offset =
592         kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
593     const size_t size = sizeof(dest_clipped_) - offset * sizeof(Pixel);
594     md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
595   }
596 }
597 
598 template <int bitdepth, typename Pixel>
Check16Bit(bool use_fixed_values,const uint16_t * src,const uint16_t * dest,libvpx_test::MD5 * md5_digest)599 void ConvolveTest<bitdepth, Pixel>::Check16Bit(bool use_fixed_values,
600                                                const uint16_t* src,
601                                                const uint16_t* dest,
602                                                libvpx_test::MD5* md5_digest) {
603   if (use_fixed_values) {
604     // For fixed values, input and output are identical.
605     const bool success =
606         test_utils::CompareBlocks(src, dest, param_.width, param_.height,
607                                   kMaxBlockWidth, kMaxBlockWidth, false);
608     EXPECT_TRUE(success);
609   } else {
610     // For random input, compare md5.
611     const int offset =
612         kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
613     const size_t size = sizeof(dest_16bit_) - offset * sizeof(uint16_t);
614     md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
615   }
616 }
617 
618 template <int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)619 void ConvolveTest<bitdepth, Pixel>::Test(
620     bool use_fixed_values, int value, int num_runs /*= kMinimumViableRuns*/) {
621   // There's no meaning testing fixed input in compound convolve.
622   if (type_param_.is_compound && use_fixed_values) return;
623 
624   // There should not be any function set for this combination.
625   if (type_param_.is_intra_block_copy && type_param_.is_compound) {
626     ASSERT_EQ(cur_convolve_func_, nullptr);
627     return;
628   }
629 
630   // Compound and intra block copy functions are only used for blocks 4x4 or
631   // greater.
632   if (type_param_.is_compound || type_param_.is_intra_block_copy) {
633     if (param_.width < 4 || param_.height < 4) {
634       GTEST_SKIP();
635     }
636   }
637 
638   // Skip unspecialized functions.
639   if (cur_convolve_func_ == nullptr) {
640     GTEST_SKIP();
641   }
642 
643   SetInputData(use_fixed_values, value);
644   int subpixel_x = 0;
645   int subpixel_y = 0;
646   int vertical_index = 0;
647   int horizontal_index = 0;
648   const int offset =
649       kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
650   const Pixel* const src = source_ + offset;
651   const ptrdiff_t src_stride = source_stride_ * sizeof(Pixel);
652   const ptrdiff_t src_stride_16 = source_stride_;
653   const ptrdiff_t dst_stride = kMaxBlockWidth * sizeof(Pixel);
654   // Pack Compound output since we control the predictor buffer.
655   const ptrdiff_t dst_stride_compound = param_.width;
656 
657   // Output is always 16 bits regardless of |bitdepth|.
658   uint16_t* dst_16 = dest_16bit_ + offset;
659   // Output depends on |bitdepth|.
660   Pixel* dst_pixel = dest_clipped_ + offset;
661 
662   // Collect the first |kMinimumViableRuns| into one md5 buffer.
663   libvpx_test::MD5 md5_digest;
664 
665   absl::Duration elapsed_time;
666   for (int i = 0; i < num_runs; ++i) {
667     // Test every filter.
668     // Because of masking |subpixel_{x,y}| values roll over every 16 iterations.
669     subpixel_x += 1 << 6;
670     subpixel_y += 1 << 6;
671 
672     const int horizontal_filter_id = (subpixel_x >> 6) & 0xF;
673     const int vertical_filter_id = (subpixel_y >> 6) & 0xF;
674 
675     // |filter_id| == 0 (copy) must be handled by the appropriate 1D or copy
676     // function.
677     if (horizontal_filter_id == 0 || vertical_filter_id == 0) {
678       continue;
679     }
680 
681     // For focused speed testing these can be set to the desired filter. Want
682     // only 8 tap filters? Set |{vertical,horizontal}_index| to 2.
683     vertical_index += static_cast<int>(i % 16 == 0);
684     vertical_index %= 4;
685     horizontal_index += static_cast<int>(i % 16 == 0);
686     horizontal_index %= 4;
687 
688     if (type_param_.is_compound) {
689       // Output type is uint16_t.
690       const absl::Time start = absl::Now();
691       cur_convolve_func_(src, src_stride, horizontal_index, vertical_index,
692                          horizontal_filter_id, vertical_filter_id, param_.width,
693                          param_.height, dst_16, dst_stride_compound);
694       elapsed_time += absl::Now() - start;
695     } else {
696       // Output type is Pixel.
697       const absl::Time start = absl::Now();
698       cur_convolve_func_(src, src_stride, horizontal_index, vertical_index,
699                          horizontal_filter_id, vertical_filter_id, param_.width,
700                          param_.height, dst_pixel, dst_stride);
701       elapsed_time += absl::Now() - start;
702     }
703 
704     // Only check the output for the first set. After that it's just repeated
705     // runs for speed timing.
706     if (i >= kMinimumViableRuns) continue;
707 
708     if (type_param_.is_compound) {
709       // Need to copy source to a uint16_t buffer for comparison.
710       Pixel* src_ptr = source_;
711       uint16_t* src_ptr_16 = source_16bit_;
712       for (int y = 0; y < kMaxBlockHeight; ++y) {
713         for (int x = 0; x < kMaxBlockWidth; ++x) {
714           src_ptr_16[x] = src_ptr[x];
715         }
716         src_ptr += src_stride_16;
717         src_ptr_16 += src_stride_16;
718       }
719 
720       Check16Bit(use_fixed_values, source_16bit_ + offset, dst_16, &md5_digest);
721     } else {
722       Check(use_fixed_values, src, dst_pixel, &md5_digest);
723     }
724   }
725 
726   if (!use_fixed_values) {
727     // md5 sums are only calculated for random input.
728     const char* ref_digest;
729     if (bitdepth == 8) {
730       ref_digest = GetConvolveDigest8bpp(GetDigestId());
731     } else {
732 #if LIBGAV1_MAX_BITDEPTH >= 10
733       ref_digest = GetConvolveDigest10bpp(GetDigestId());
734 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
735     }
736     const char* direction;
737     if (type_param_.has_vertical_filter && type_param_.has_horizontal_filter) {
738       direction = "2D";
739     } else if (type_param_.has_vertical_filter) {
740       direction = "Vertical";
741     } else if (type_param_.has_horizontal_filter) {
742       direction = "Horizontal";
743     } else {
744       direction = "Copy";
745     }
746     const auto elapsed_time_us =
747         static_cast<int>(absl::ToInt64Microseconds(elapsed_time));
748     printf("Mode Convolve%s%s%s[%25s]: %5d us MD5: %s\n",
749            type_param_.is_compound ? "Compound" : "",
750            type_param_.is_intra_block_copy ? "IntraBlockCopy" : "", direction,
751            absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
752            elapsed_time_us, md5_digest.Get());
753     EXPECT_STREQ(ref_digest, md5_digest.Get());
754   }
755 }
756 
ApplyFilterToSignedInput(const int min_input,const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)757 void ApplyFilterToSignedInput(const int min_input, const int max_input,
758                               const int8_t filter[kSubPixelTaps],
759                               int* min_output, int* max_output) {
760   int min = 0, max = 0;
761   for (int i = 0; i < kSubPixelTaps; ++i) {
762     const int tap = filter[i];
763     if (tap > 0) {
764       max += max_input * tap;
765       min += min_input * tap;
766     } else {
767       min += max_input * tap;
768       max += min_input * tap;
769     }
770   }
771   *min_output = min;
772   *max_output = max;
773 }
774 
ApplyFilterToUnsignedInput(const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)775 void ApplyFilterToUnsignedInput(const int max_input,
776                                 const int8_t filter[kSubPixelTaps],
777                                 int* min_output, int* max_output) {
778   ApplyFilterToSignedInput(0, max_input, filter, min_output, max_output);
779 }
780 
781 // Validate the maximum ranges for different parts of the Convolve process.
782 template <int bitdepth>
ShowRange()783 void ShowRange() {
784   // Subtract one from the shift bits because the filter is pre-shifted by 1.
785   constexpr int horizontal_bits = (bitdepth == kBitdepth12)
786                                       ? kInterRoundBitsHorizontal12bpp - 1
787                                       : kInterRoundBitsHorizontal - 1;
788   constexpr int vertical_bits = (bitdepth == kBitdepth12)
789                                     ? kInterRoundBitsVertical12bpp - 1
790                                     : kInterRoundBitsVertical - 1;
791   constexpr int compound_vertical_bits = kInterRoundBitsCompoundVertical - 1;
792 
793   constexpr int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
794 
795   constexpr int max_input = (1 << bitdepth) - 1;
796 
797   const int8_t* worst_convolve_filter = kHalfSubPixelFilters[2][8];
798 
799   // First pass.
800   printf("Bitdepth: %2d Input range:            [%8d, %8d]\n", bitdepth, 0,
801          max_input);
802 
803   int min, max;
804   ApplyFilterToUnsignedInput(max_input, worst_convolve_filter, &min, &max);
805 
806   if (bitdepth == 8) {
807     // 8bpp can use int16_t for sums.
808     assert(min > INT16_MIN);
809     assert(max < INT16_MAX);
810   } else {
811     // 10bpp and 12bpp require int32_t.
812     assert(min > INT32_MIN);
813     assert(max > INT16_MAX && max < INT32_MAX);
814   }
815 
816   printf("  Horizontal upscaled range:         [%8d, %8d]\n", min, max);
817 
818   const int first_pass_min = RightShiftWithRounding(min, horizontal_bits);
819   const int first_pass_max = RightShiftWithRounding(max, horizontal_bits);
820 
821   // All bitdepths can use int16_t for first pass output.
822   assert(first_pass_min > INT16_MIN);
823   assert(first_pass_max < INT16_MAX);
824 
825   printf("  Horizontal downscaled range:       [%8d, %8d]\n", first_pass_min,
826          first_pass_max);
827 
828   // Second pass.
829   ApplyFilterToSignedInput(first_pass_min, first_pass_max,
830                            worst_convolve_filter, &min, &max);
831 
832   // All bitdepths require int32_t for second pass sums.
833   assert(min < INT16_MIN && min > INT32_MIN);
834   assert(max > INT16_MAX && max < INT32_MAX);
835 
836   printf("  Vertical upscaled range:           [%8d, %8d]\n", min, max);
837 
838   // Second pass non-compound output is clipped to Pixel values.
839   const int second_pass_min =
840       Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input);
841   const int second_pass_max =
842       Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input);
843   printf("  Pixel output range:                [%8d, %8d]\n", second_pass_min,
844          second_pass_max);
845 
846   // Output is Pixel so matches Pixel values.
847   assert(second_pass_min == 0);
848   assert(second_pass_max == max_input);
849 
850   const int compound_second_pass_min =
851       RightShiftWithRounding(min, compound_vertical_bits) + compound_offset;
852   const int compound_second_pass_max =
853       RightShiftWithRounding(max, compound_vertical_bits) + compound_offset;
854 
855   printf("  Compound output range:             [%8d, %8d]\n",
856          compound_second_pass_min, compound_second_pass_max);
857 
858   if (bitdepth == 8) {
859     // 8bpp output is int16_t without an offset.
860     assert(compound_second_pass_min > INT16_MIN);
861     assert(compound_second_pass_max < INT16_MAX);
862   } else {
863     // 10bpp and 12bpp use the offset to fit inside uint16_t.
864     assert(compound_second_pass_min > 0);
865     assert(compound_second_pass_max < UINT16_MAX);
866   }
867 
868   printf("\n");
869 }
870 
TEST(ConvolveTest,ShowRange)871 TEST(ConvolveTest, ShowRange) {
872   ShowRange<kBitdepth8>();
873   ShowRange<kBitdepth10>();
874   ShowRange<kBitdepth12>();
875 }
876 
877 using ConvolveTest8bpp = ConvolveTest<8, uint8_t>;
878 
TEST_P(ConvolveTest8bpp,FixedValues)879 TEST_P(ConvolveTest8bpp, FixedValues) {
880   Test(true, 0);
881   Test(true, 1);
882   Test(true, 128);
883   Test(true, 255);
884 }
885 
TEST_P(ConvolveTest8bpp,RandomValues)886 TEST_P(ConvolveTest8bpp, RandomValues) { Test(false, 0); }
887 
TEST_P(ConvolveTest8bpp,DISABLED_Speed)888 TEST_P(ConvolveTest8bpp, DISABLED_Speed) {
889   const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
890   Test(false, 0, num_runs);
891 }
892 
893 //------------------------------------------------------------------------------
894 template <int bitdepth, typename Pixel>
895 class ConvolveScaleTest
896     : public testing::TestWithParam<
897           std::tuple<bool /*is_compound*/, ConvolveTestParam>> {
898  public:
899   ConvolveScaleTest() = default;
900   ~ConvolveScaleTest() override = default;
901 
SetUp()902   void SetUp() override {
903     ConvolveInit_C();
904 
905     const Dsp* const dsp = GetDspTable(bitdepth);
906     ASSERT_NE(dsp, nullptr);
907     base_convolve_scale_func_ = dsp->convolve_scale[is_compound_];
908 
909     const testing::TestInfo* const test_info =
910         testing::UnitTest::GetInstance()->current_test_info();
911     const absl::string_view test_case = test_info->test_suite_name();
912     if (absl::StartsWith(test_case, "C/")) {
913       base_convolve_scale_func_ = nullptr;
914     } else if (absl::StartsWith(test_case, "SSE41/")) {
915       if ((GetCpuInfo() & kSSE4_1) != 0) {
916         ConvolveInit_SSE4_1();
917       }
918     } else if (absl::StartsWith(test_case, "AVX2/")) {
919       if ((GetCpuInfo() & kAVX2) != 0) {
920         ConvolveInit_AVX2();
921       }
922     } else if (absl::StartsWith(test_case, "NEON/")) {
923       ConvolveInit_NEON();
924 #if LIBGAV1_MAX_BITDEPTH >= 10
925       ConvolveInit10bpp_NEON();
926 #endif
927     } else {
928       FAIL() << "Unrecognized architecture prefix in test case name: "
929              << test_case;
930     }
931 
932     cur_convolve_scale_func_ = dsp->convolve_scale[is_compound_];
933 
934     // Skip functions that have not been specialized for this particular
935     // architecture.
936     if (cur_convolve_scale_func_ == base_convolve_scale_func_) {
937       cur_convolve_scale_func_ = nullptr;
938     }
939   }
940 
941  protected:
GetDigestId() const942   int GetDigestId() const {
943     return param_.block_size +
944            param_.kNumBlockSizes * static_cast<int>(is_compound_);
945   }
946 
947   void SetInputData(bool use_fixed_values, int value);
948   void Check(bool use_fixed_values, const Pixel* src, const Pixel* dest,
949              libvpx_test::MD5* md5_digest);
950   void Check16Bit(bool use_fixed_values, const uint16_t* src,
951                   const uint16_t* dest, libvpx_test::MD5* md5_digest);
952   // |num_runs| covers the categories of filters (6) and the number of filters
953   // under each category (16).
954   void Test(bool use_fixed_values, int value,
955             int num_runs = kMinimumViableRuns);
956 
957   const bool is_compound_ = std::get<0>(GetParam());
958   const ConvolveTestParam param_ = std::get<1>(GetParam());
959 
960  private:
961   ConvolveScaleFunc base_convolve_scale_func_;
962   ConvolveScaleFunc cur_convolve_scale_func_;
963   // Convolve filters are 7-tap, which need 3 pixels
964   // (kRestorationHorizontalBorder) padding.
965   // The source can be at most 2 times of max width/height.
966   Pixel source_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
967   uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
968   uint16_t dest_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
969   Pixel dest_clipped_[kMaxBlockHeight * kMaxBlockWidth] = {};
970 
971   const int source_stride_ = kMaxBlockWidth * 2;
972   const int source_height_ = kMaxBlockHeight * 2;
973 };
974 
975 template <int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)976 void ConvolveScaleTest<bitdepth, Pixel>::SetInputData(bool use_fixed_values,
977                                                       int value) {
978   if (use_fixed_values) {
979     std::fill(source_, source_ + source_height_ * source_stride_, value);
980   } else {
981     const int offset =
982         kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
983     const int mask = (1 << bitdepth) - 1;
984     libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
985     const int height = param_.height * 2;
986     const int width = param_.width * 2;
987     for (int y = 0; y < height; ++y) {
988       for (int x = 0; x < width; ++x) {
989         source_[y * source_stride_ + x + offset] = rnd.Rand16() & mask;
990       }
991     }
992     // Copy border pixels to the left and right borders.
993     for (int y = 0; y < height; ++y) {
994       Memset(&source_[(y + kConvolveBorderLeftTop) * source_stride_],
995              source_[y * source_stride_ + offset], kConvolveBorderLeftTop);
996       Memset(&source_[y * source_stride_ + offset + width],
997              source_[y * source_stride_ + offset + width - 1],
998              kConvolveBorderLeftTop);
999     }
1000     // Copy border pixels to the top and bottom borders.
1001     for (int y = 0; y < kConvolveBorderLeftTop; ++y) {
1002       memcpy(&source_[y * source_stride_],
1003              &source_[kConvolveBorderLeftTop * source_stride_],
1004              source_stride_ * sizeof(Pixel));
1005       memcpy(&source_[(y + kConvolveBorderLeftTop + height) * source_stride_],
1006              &source_[(kConvolveBorderLeftTop + height - 1) * source_stride_],
1007              source_stride_ * sizeof(Pixel));
1008     }
1009   }
1010 }
1011 
1012 template <int bitdepth, typename Pixel>
Check(bool use_fixed_values,const Pixel * src,const Pixel * dest,libvpx_test::MD5 * md5_digest)1013 void ConvolveScaleTest<bitdepth, Pixel>::Check(bool use_fixed_values,
1014                                                const Pixel* src,
1015                                                const Pixel* dest,
1016                                                libvpx_test::MD5* md5_digest) {
1017   if (use_fixed_values) {
1018     // For fixed values, input and output are identical.
1019     const bool success =
1020         test_utils::CompareBlocks(src, dest, param_.width, param_.height,
1021                                   kMaxBlockWidth, kMaxBlockWidth, false, false);
1022     EXPECT_TRUE(success);
1023   } else {
1024     // For random input, compare md5.
1025     const int offset =
1026         kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
1027     const size_t size = sizeof(dest_clipped_) - offset * sizeof(Pixel);
1028     md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
1029   }
1030 }
1031 
1032 template <int bitdepth, typename Pixel>
Check16Bit(bool use_fixed_values,const uint16_t * src,const uint16_t * dest,libvpx_test::MD5 * md5_digest)1033 void ConvolveScaleTest<bitdepth, Pixel>::Check16Bit(
1034     bool use_fixed_values, const uint16_t* src, const uint16_t* dest,
1035     libvpx_test::MD5* md5_digest) {
1036   if (use_fixed_values) {
1037     // For fixed values, input and output are identical.
1038     const bool success =
1039         test_utils::CompareBlocks(src, dest, param_.width, param_.height,
1040                                   kMaxBlockWidth, kMaxBlockWidth, false);
1041     EXPECT_TRUE(success);
1042   } else {
1043     // For random input, compare md5.
1044     const int offset =
1045         kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
1046     const size_t size = sizeof(dest_16bit_) - offset * sizeof(uint16_t);
1047     md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
1048   }
1049 }
1050 
1051 template <int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)1052 void ConvolveScaleTest<bitdepth, Pixel>::Test(
1053     bool use_fixed_values, int value, int num_runs /*= kMinimumViableRuns*/) {
1054   // There's no meaning testing fixed input in compound convolve.
1055   if (is_compound_ && use_fixed_values) return;
1056 
1057   // The compound function is only used for blocks 4x4 or greater.
1058   if (is_compound_) {
1059     if (param_.width < 4 || param_.height < 4) {
1060       GTEST_SKIP();
1061     }
1062   }
1063 
1064   // Skip unspecialized functions.
1065   if (cur_convolve_scale_func_ == nullptr) {
1066     GTEST_SKIP();
1067   }
1068 
1069   SetInputData(use_fixed_values, value);
1070   libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed() +
1071                              GetDigestId());
1072   // [1,2048] for |step_[xy]|. This covers a scaling range of 1/1024 to 2x.
1073   const int step_x = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
1074   const int step_y = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
1075   int subpixel_x = 0;
1076   int subpixel_y = 0;
1077   int vertical_index = 0;
1078   int horizontal_index = 0;
1079   const int offset =
1080       kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
1081   const int offset_scale =
1082       kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
1083   const Pixel* const src_scale = source_ + offset_scale;
1084   const ptrdiff_t src_stride = source_stride_ * sizeof(Pixel);
1085   const ptrdiff_t dst_stride = kMaxBlockWidth * sizeof(Pixel);
1086   // Pack Compound output since we control the predictor buffer.
1087   const ptrdiff_t dst_stride_compound = param_.width;
1088 
1089   // Output is always 16 bits regardless of |bitdepth|.
1090   uint16_t* dst_16 = dest_16bit_ + offset;
1091   // Output depends on |bitdepth|.
1092   Pixel* dst_pixel = dest_clipped_ + offset;
1093 
1094   // Collect the first |kMinimumViableRuns| into one md5 buffer.
1095   libvpx_test::MD5 md5_digest;
1096 
1097   absl::Duration elapsed_time;
1098   for (int i = 0; i < num_runs; ++i) {
1099     // Test every filter.
1100     // Because of masking |subpixel_{x,y}| values roll over every 16 iterations.
1101     subpixel_x += 1 << 6;
1102     subpixel_y += 1 << 6;
1103 
1104     const int horizontal_filter_id = (subpixel_x >> 6) & 0xF;
1105     const int vertical_filter_id = (subpixel_y >> 6) & 0xF;
1106 
1107     // |filter_id| == 0 (copy) must be handled by the appropriate 1D or copy
1108     // function.
1109     if (horizontal_filter_id == 0 || vertical_filter_id == 0) {
1110       continue;
1111     }
1112 
1113     // For focused speed testing these can be set to the desired filter. Want
1114     // only 8 tap filters? Set |{vertical,horizontal}_index| to 2.
1115     vertical_index += static_cast<int>(i % 16 == 0);
1116     vertical_index %= 4;
1117     horizontal_index += static_cast<int>(i % 16 == 0);
1118     horizontal_index %= 4;
1119 
1120     // Output type is uint16_t.
1121     const absl::Time start = absl::Now();
1122     if (is_compound_) {
1123       cur_convolve_scale_func_(
1124           source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
1125           step_y, param_.width, param_.height, dst_16, dst_stride_compound);
1126     } else {
1127       cur_convolve_scale_func_(
1128           source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
1129           step_y, param_.width, param_.height, dst_pixel, dst_stride);
1130     }
1131     elapsed_time += absl::Now() - start;
1132 
1133     // Only check the output for the first set. After that it's just repeated
1134     // runs for speed timing.
1135     if (i >= kMinimumViableRuns) continue;
1136 
1137     // Convolve function does not clip the output. The clipping is applied
1138     // later, but libaom clips the output. So we apply clipping to match
1139     // libaom in tests.
1140     if (is_compound_) {
1141       const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
1142       Pixel* dest_row = dest_clipped_;
1143       for (int y = 0; y < kMaxBlockHeight; ++y) {
1144         for (int x = 0; x < kMaxBlockWidth; ++x) {
1145           dest_row[x] = static_cast<Pixel>(Clip3(
1146               dest_16bit_[y * dst_stride_compound + x] - single_round_offset, 0,
1147               (1 << bitdepth) - 1));
1148         }
1149         dest_row += kMaxBlockWidth;
1150       }
1151     }
1152 
1153     if (is_compound_) {
1154       Check16Bit(use_fixed_values, source_16bit_ + offset_scale, dst_16,
1155                  &md5_digest);
1156     } else {
1157       Check(use_fixed_values, src_scale, dst_pixel, &md5_digest);
1158     }
1159   }
1160 
1161   if (!use_fixed_values) {
1162     // md5 sums are only calculated for random input.
1163     const char* ref_digest;
1164     if (bitdepth == 8) {
1165       ref_digest = GetConvolveScaleDigest8bpp(GetDigestId());
1166     } else {
1167 #if LIBGAV1_MAX_BITDEPTH >= 10
1168       ref_digest = GetConvolveScaleDigest10bpp(GetDigestId());
1169 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1170     }
1171 
1172     const auto elapsed_time_us =
1173         static_cast<int>(absl::ToInt64Microseconds(elapsed_time));
1174     printf("Mode Convolve%sScale2D[%25s]: %5d us MD5: %s\n",
1175            is_compound_ ? "Compound" : "",
1176            absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
1177            elapsed_time_us, md5_digest.Get());
1178     EXPECT_STREQ(ref_digest, md5_digest.Get());
1179   }
1180 }
1181 
1182 using ConvolveScaleTest8bpp = ConvolveScaleTest<8, uint8_t>;
1183 
TEST_P(ConvolveScaleTest8bpp,FixedValues)1184 TEST_P(ConvolveScaleTest8bpp, FixedValues) {
1185   Test(true, 0);
1186   Test(true, 1);
1187   Test(true, 128);
1188   Test(true, 255);
1189 }
1190 
TEST_P(ConvolveScaleTest8bpp,RandomValues)1191 TEST_P(ConvolveScaleTest8bpp, RandomValues) { Test(false, 0); }
1192 
TEST_P(ConvolveScaleTest8bpp,DISABLED_Speed)1193 TEST_P(ConvolveScaleTest8bpp, DISABLED_Speed) {
1194   const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
1195   Test(false, 0, num_runs);
1196 }
1197 
1198 //------------------------------------------------------------------------------
1199 const ConvolveTestParam kConvolveParam[] = {
1200     ConvolveTestParam(ConvolveTestParam::kBlockSize2x2),
1201     ConvolveTestParam(ConvolveTestParam::kBlockSize2x4),
1202     ConvolveTestParam(ConvolveTestParam::kBlockSize4x2),
1203     ConvolveTestParam(ConvolveTestParam::kBlockSize4x4),
1204     ConvolveTestParam(ConvolveTestParam::kBlockSize4x8),
1205     ConvolveTestParam(ConvolveTestParam::kBlockSize8x2),
1206     ConvolveTestParam(ConvolveTestParam::kBlockSize8x4),
1207     ConvolveTestParam(ConvolveTestParam::kBlockSize8x8),
1208     ConvolveTestParam(ConvolveTestParam::kBlockSize8x16),
1209     ConvolveTestParam(ConvolveTestParam::kBlockSize16x8),
1210     ConvolveTestParam(ConvolveTestParam::kBlockSize16x16),
1211     ConvolveTestParam(ConvolveTestParam::kBlockSize16x32),
1212     ConvolveTestParam(ConvolveTestParam::kBlockSize32x16),
1213     ConvolveTestParam(ConvolveTestParam::kBlockSize32x32),
1214     ConvolveTestParam(ConvolveTestParam::kBlockSize32x64),
1215     ConvolveTestParam(ConvolveTestParam::kBlockSize64x32),
1216     ConvolveTestParam(ConvolveTestParam::kBlockSize64x64),
1217     ConvolveTestParam(ConvolveTestParam::kBlockSize64x128),
1218     ConvolveTestParam(ConvolveTestParam::kBlockSize128x64),
1219     ConvolveTestParam(ConvolveTestParam::kBlockSize128x128),
1220 };
1221 
1222 const ConvolveTypeParam kConvolveTypeParam[] = {
1223     ConvolveTypeParam(false, false, false, false),
1224     ConvolveTypeParam(false, false, false, true),
1225     ConvolveTypeParam(false, false, true, false),
1226     ConvolveTypeParam(false, false, true, true),
1227     ConvolveTypeParam(false, true, false, false),
1228     ConvolveTypeParam(false, true, false, true),
1229     ConvolveTypeParam(false, true, true, false),
1230     ConvolveTypeParam(false, true, true, true),
1231     ConvolveTypeParam(true, false, false, false),
1232     ConvolveTypeParam(true, false, false, true),
1233     ConvolveTypeParam(true, false, true, false),
1234     ConvolveTypeParam(true, false, true, true),
1235     // This is left to ensure no function exists for |intra_block_copy| when
1236     // |is_compound| is true; all combinations aren't necessary.
1237     ConvolveTypeParam(true, true, false, false),
1238 };
1239 
1240 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest8bpp,
1241                          testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1242                                           testing::ValuesIn(kConvolveParam)));
1243 INSTANTIATE_TEST_SUITE_P(C, ConvolveScaleTest8bpp,
1244                          testing::Combine(testing::Bool(),
1245                                           testing::ValuesIn(kConvolveParam)));
1246 
1247 #if LIBGAV1_ENABLE_NEON
1248 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest8bpp,
1249                          testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1250                                           testing::ValuesIn(kConvolveParam)));
1251 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScaleTest8bpp,
1252                          testing::Combine(testing::Bool(),
1253                                           testing::ValuesIn(kConvolveParam)));
1254 #endif  // LIBGAV1_ENABLE_NEON
1255 
1256 #if LIBGAV1_ENABLE_SSE4_1
1257 INSTANTIATE_TEST_SUITE_P(SSE41, ConvolveTest8bpp,
1258                          testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1259                                           testing::ValuesIn(kConvolveParam)));
1260 INSTANTIATE_TEST_SUITE_P(SSE41, ConvolveScaleTest8bpp,
1261                          testing::Combine(testing::Bool(),
1262                                           testing::ValuesIn(kConvolveParam)));
1263 #endif  // LIBGAV1_ENABLE_SSE4_1
1264 
1265 #if LIBGAV1_ENABLE_AVX2
1266 INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveTest8bpp,
1267                          testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1268                                           testing::ValuesIn(kConvolveParam)));
1269 INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveScaleTest8bpp,
1270                          testing::Combine(testing::Bool(),
1271                                           testing::ValuesIn(kConvolveParam)));
1272 #endif  // LIBGAV1_ENABLE_AVX2
1273 
1274 #if LIBGAV1_MAX_BITDEPTH >= 10
1275 using ConvolveTest10bpp = ConvolveTest<10, uint16_t>;
1276 
TEST_P(ConvolveTest10bpp,FixedValues)1277 TEST_P(ConvolveTest10bpp, FixedValues) {
1278   Test(true, 0);
1279   Test(true, 1);
1280   Test(true, 128);
1281   Test(true, (1 << 10) - 1);
1282 }
1283 
TEST_P(ConvolveTest10bpp,RandomValues)1284 TEST_P(ConvolveTest10bpp, RandomValues) { Test(false, 0); }
1285 
TEST_P(ConvolveTest10bpp,DISABLED_Speed)1286 TEST_P(ConvolveTest10bpp, DISABLED_Speed) {
1287   const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
1288   Test(false, 0, num_runs);
1289 }
1290 
1291 using ConvolveScaleTest10bpp = ConvolveScaleTest<10, uint16_t>;
1292 
TEST_P(ConvolveScaleTest10bpp,FixedValues)1293 TEST_P(ConvolveScaleTest10bpp, FixedValues) {
1294   Test(true, 0);
1295   Test(true, 1);
1296   Test(true, 128);
1297   Test(true, (1 << 10) - 1);
1298 }
1299 
TEST_P(ConvolveScaleTest10bpp,RandomValues)1300 TEST_P(ConvolveScaleTest10bpp, RandomValues) { Test(false, 0); }
1301 
TEST_P(ConvolveScaleTest10bpp,DISABLED_Speed)1302 TEST_P(ConvolveScaleTest10bpp, DISABLED_Speed) {
1303   const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
1304   Test(false, 0, num_runs);
1305 }
1306 
1307 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest10bpp,
1308                          testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1309                                           testing::ValuesIn(kConvolveParam)));
1310 INSTANTIATE_TEST_SUITE_P(C, ConvolveScaleTest10bpp,
1311                          testing::Combine(testing::Bool(),
1312                                           testing::ValuesIn(kConvolveParam)));
1313 
1314 #if LIBGAV1_ENABLE_NEON
1315 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest10bpp,
1316                          testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1317                                           testing::ValuesIn(kConvolveParam)));
1318 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScaleTest10bpp,
1319                          testing::Combine(testing::Bool(),
1320                                           testing::ValuesIn(kConvolveParam)));
1321 #endif  // LIBGAV1_ENABLE_NEON
1322 
1323 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1324 
1325 }  // namespace
1326 }  // namespace dsp
1327 }  // namespace libgav1
1328