1 // Copyright 2021 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/convolve.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdio>
22 #include <cstring>
23 #include <ostream>
24 #include <string>
25 #include <tuple>
26
27 #include "absl/strings/match.h"
28 #include "absl/strings/str_format.h"
29 #include "absl/strings/string_view.h"
30 #include "absl/time/clock.h"
31 #include "absl/time/time.h"
32 #include "gtest/gtest.h"
33 #include "src/dsp/constants.h"
34 #include "src/dsp/dsp.h"
35 #include "src/utils/common.h"
36 #include "src/utils/compiler_attributes.h"
37 #include "src/utils/constants.h"
38 #include "src/utils/cpu.h"
39 #include "src/utils/memory.h"
40 #include "tests/block_utils.h"
41 #include "tests/third_party/libvpx/acm_random.h"
42 #include "tests/third_party/libvpx/md5_helper.h"
43 #include "tests/utils.h"
44
45 namespace libgav1 {
46 namespace dsp {
47 namespace {
48
49 // The convolve function will access at most (block_height + 7) rows/columns
50 // from the beginning.
51 constexpr int kMaxBlockWidth = kMaxSuperBlockSizeInPixels + kSubPixelTaps;
52 constexpr int kMaxBlockHeight = kMaxSuperBlockSizeInPixels + kSubPixelTaps;
53
54 // Test all the filters in |kSubPixelFilters|. There are 6 different filters but
55 // filters [4] and [5] are only reached through GetFilterIndex().
56 constexpr int kMinimumViableRuns = 4 * 16;
57
58 struct ConvolveTestParam {
59 enum BlockSize {
60 kBlockSize2x2,
61 kBlockSize2x4,
62 kBlockSize4x2,
63 kBlockSize4x4,
64 kBlockSize4x8,
65 kBlockSize8x2,
66 kBlockSize8x4,
67 kBlockSize8x8,
68 kBlockSize8x16,
69 kBlockSize16x8,
70 kBlockSize16x16,
71 kBlockSize16x32,
72 kBlockSize32x16,
73 kBlockSize32x32,
74 kBlockSize32x64,
75 kBlockSize64x32,
76 kBlockSize64x64,
77 kBlockSize64x128,
78 kBlockSize128x64,
79 kBlockSize128x128,
80 kNumBlockSizes
81 };
82
83 static constexpr int kBlockWidth[kNumBlockSizes] = {
84 2, 2, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128};
85 static constexpr int kBlockHeight[kNumBlockSizes] = {
86 2, 4, 2, 4, 8, 2, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64, 128, 64, 128};
87
ConvolveTestParamlibgav1::dsp::__anon5015b8ab0111::ConvolveTestParam88 explicit ConvolveTestParam(BlockSize block_size)
89 : block_size(block_size),
90 width(kBlockWidth[block_size]),
91 height(kBlockHeight[block_size]) {}
92
93 BlockSize block_size;
94 int width;
95 int height;
96 };
97
98 #if !LIBGAV1_CXX17
99 constexpr int ConvolveTestParam::kBlockWidth[kNumBlockSizes]; // static.
100 constexpr int ConvolveTestParam::kBlockHeight[kNumBlockSizes]; // static.
101 #endif
102
GetConvolveDigest8bpp(int id)103 const char* GetConvolveDigest8bpp(int id) {
104 // Entries containing 'XXXXX...' are skipped. See the test for details.
105 static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 16] = {
106 "ae5977a4ceffbac0cde72a04a43a9d57", "6cf5f791fe0d8dcd3526be3c6b814035",
107 "d905dfcad930aded7718587c05b48aaf", "6baf153feff04cc5b7e87c0bb60a905d",
108 "871ed5a69ca31e6444faa720895949bf", "c9cf1deba08dac5972b3b0a43eff8f98",
109 "68e2f90eaa0ab5da7e6f5776993f7eea", "f1f8282fb33c30eb68c0c315b7a4bc01",
110 "9412064b0eebf8123f23d74147d04dff", "cc08936effe309ab9a4fa1bf7e28e24e",
111 "36cbef36fa21b98df03536c918bf752a", "9d0da6321cf5311ea0bdd41271763030",
112 "55a10165ee8a660d7dddacf7de558cdd", "ac7fc9f9ea7213743fae5a023faaaf08",
113 "077e1b7b355c7ab3ca40230ee8efd8ea", "7a3e8de2a1caae206cf3e51a86dfd15a",
114 "1ddf9020f18fa7883355cf8c0881186a", "2377dd167ef2707978bed6f10ffd4e76",
115 "f918e0e4422967c6a7e47298135c7ae9", "b2264e129636368b5496760b39e64b7a",
116 "1168251e6261e2ff1fa69a93226dbd76", "4821befdf63f8c6da6440afeb57f320f",
117 "c30fc44d83821141e84cc4793e127301", "a8293b933d9f2e5d7f922ea40111d643",
118 "354a54861a94e8b027afd9931e61f997", "b384e9e3d81f9f4f9024028fbe451d8b",
119 "eeeb8589c1b31cbb565154736ca939ec", "f49dab626ddd977ed171f79295c24935",
120 "78d2f27e0d4708cb16856d7d40dc16fb", "9d2393ea156a1c2083f5b4207793064b",
121 "a9c62745b95c66fa497a524886af57e2", "2c614ec4463386ec075a0f1dbb587933",
122 "7a8856480d752153370240b066b90f6a", "beaef1dbffadc701fccb7c18a03e3a41",
123 "72b1e700c949d06eaf62d664dafdb5b6", "684f5c3a25a080edaf79add6e9137a8e",
124 "3be970f49e4288988818b087201d54da", "d2b9dba2968894a414756bb510ac389a",
125 "9a3215eb97aedbbddd76c7440837d040", "4e317feac6da46addf0e8b9d8d54304b",
126 "d2f5ca2b7958c332a3fb771f66da01f0", "7aec92c3b65e456b64ae285c12b03b0d",
127 "f72a99ad63f6a88c23724e898b705d21", "07a1f07f114c4a38ba08d2f44e1e1132",
128 "26b9de95edb45b31ac5aa19825831c7a", "4e4677a0623d44237eb8d6a622cdc526",
129 "c1b836a6ce023663b90db0e320389414", "5befcf222152ebc8d779fcc10b95320a",
130 "62adf407fc27d8682ced4dd7b55af14e", "35be0786a072bf2f1286989261bf6580",
131 "90562fc42dc5d879ae74c4909c1dec30", "a1427352f9e413975a0949e2b300c657",
132 "bcbc418bc2beb243e463851cd95335a9", "cb8fedcbecee3947358dc61f95e56530",
133 "0d0154a7d573685285a83a4cf201ac57", "b14bd8068f108905682b83cc15778065",
134 "c96c867d998473197dde9b587be14e3a", "f596c63c7b14cada0174e17124c83942",
135 "eb2822ad8204ed4ecbf0f30fcb210498", "538ce869ffd23b6963e61badfab7712b",
136 "6bbcc075f8b768a02cdc9149f150326d", "4ae70d9db2ec36885394db7d59bdd4f7",
137 "5fee162fe52c11c823db4d5ede370654", "9365186c59ef66d9def40f437022ad93",
138 "0f95fb0276c9c7910937fbdf75f2811d", "356d4003477283e157c8d2b5a79d913c",
139 "b355dab2dbb6f5869018563eece22862", "cf6ff8c43d8059cea6090a23ab66a0ef",
140 "a336f8b7bcf188840ca65c0d0e66518a", "de953f03895923359c6a719e6a537b89",
141 "8463ade9347ed602663e2cec5c4c3fe6", "392de11ffcd5c2ecf3db3480ee135340",
142 "bddd31e3e852712e6244b616622af83d", "30a36245c40d978fc8976b442a8600c3",
143 "93aa662b988b8502e5ea95659eafde59", "70440ba9ee7f9d16d297dbb49e54a56e",
144 "1eb2be4c05b50e427e29c72fa566bff5", "52c0980bae63e8459e82eee7d8af2334",
145 "75e57104d6058cd2bce1d3d8142d273d", "b4c735269ade44419169adbd852d5ddc",
146 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
147 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a7305087fae23de53d21a6909009ff69",
148 "8dcce009395264379c1a51239f4bb22c", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
149 "8dcce009395264379c1a51239f4bb22c", "d90a69e7bae8aa46ed0e1e5f911d7a07",
150 "6ab4dc87be03be1dcc5d956ca819d938", "6ab4dc87be03be1dcc5d956ca819d938",
151 "8f2afdb2f03cd04ffacd421b958caaa0", "710ccecc103033088d898a2b924551fb",
152 "710ccecc103033088d898a2b924551fb", "a4093e3e5902dd659407ce6471635a4e",
153 "375d7f5358d7a088a498b8b3aaecc0d5", "375d7f5358d7a088a498b8b3aaecc0d5",
154 "08867ea5cc38c705ec52af821bc4736a", "2afb540e8063f58d1b03896486c5e89b",
155 "2afb540e8063f58d1b03896486c5e89b", "6ce47b11d2e60c5d183c84ce9f2e46cc",
156 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
157 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a5a1ac658d7ce4a846a32b9fcfaa3475",
158 "2370f4e4a83edf91b7f504bbe4b00e90", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
159 "ae5464066a049622a7a264cdf9394b55", "45368b6db3d1fee739a64b0bc823ea9c",
160 "8dff0f28192d9f8c0bf7fb5405719dd8", "632738ef3ff3021cff45045c41978849",
161 "f7ec43384037e8d6c618e0df826ec029", "a6bc648197781a2dc99c487e66464320",
162 "1112ebd509007154c72c5a485b220b62", "9714c4ce636b6fb0ad05cba246d48c76",
163 "2c93dde8884f09fb5bb5ad6d95cde86d", "a49e6160b5d1b56bc2046963101cd606",
164 "7f084953976111e9f65b57876e7552b1", "0846ec82555b66197c5c45b08240fbcc",
165 "ca7471c126ccd22189e874f0a6e41960", "0802b6318fbd0969a33de8fdfcd07f10",
166 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
167 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "3b1ceebf0579fcbbfd6136938c595b91",
168 "ecafabcad1045f15d31ce2f3b13132f2", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
169 "5f211eba020e256a5781b203c5aa1d2e", "3b04497634364dd2cd3f2482b5d4b32f",
170 "a8ac7b5dc65ffb758b0643508a0e744e", "561ed8be43c221a561f8885a0d74c7ef",
171 "8159619fc234598c8c75154d80021fd4", "8f43645dce92cf7594aa4822aa53b17d",
172 "b6ccddb7dfa4eddc87b4eff08b5a3195", "b4e605327b28db573d88844a1a09db8d",
173 "15b00a15d1cc6cc96ca85d00b167e4dd", "7bf911888c11a9fefd604b8b9c82e9a1",
174 "bfb69b4d7d4aed73cfa75a0f55b66440", "034d1d62581bd0d840c4cf1e28227931",
175 "8cba849640e9e2859d509bc81ca94acd", "bc79acf2a0fe419194cdb4529bc7dcc8",
176 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
177 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "3bfad931bce82335219e0e29c15f2b21",
178 "68a701313d2247d2b32636ebc1f2a008", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
179 "05afe1f40d37a45a97a5e0aadd5066fb", "9e1f0e0bddb58d15d0925eeaede9b84c",
180 "03313cdaa593a1a7b4869010dcc7b241", "88a50d2b4107ee5b5074b2520183f8ac",
181 "ac50ea9f7306da95a5092709442989cf", "739b17591437edffd36799237b962658",
182 "b8a7eb7dd9c216e240517edfc6489397", "75b755f199dbf4a0e5ebbb86c2bd871d",
183 "31b0017ba1110e3d70b020901bc15564", "0a1aa8f5ecfd11ddba080af0051c576a",
184 "536181ee90de883cc383787aec089221", "29f82b0f3e4113944bd28aacd9b8489a",
185 "ee3e76371240d1f1ff811cea6a7d4f63", "17a20dbbf09feae557d40aa5818fbe76",
186 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
187 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "6baf153feff04cc5b7e87c0bb60a905d",
188 "871ed5a69ca31e6444faa720895949bf", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
189 "68e2f90eaa0ab5da7e6f5776993f7eea", "f1f8282fb33c30eb68c0c315b7a4bc01",
190 "9412064b0eebf8123f23d74147d04dff", "cc08936effe309ab9a4fa1bf7e28e24e",
191 "36cbef36fa21b98df03536c918bf752a", "9d0da6321cf5311ea0bdd41271763030",
192 "55a10165ee8a660d7dddacf7de558cdd", "ac7fc9f9ea7213743fae5a023faaaf08",
193 "077e1b7b355c7ab3ca40230ee8efd8ea", "7a3e8de2a1caae206cf3e51a86dfd15a",
194 "1ddf9020f18fa7883355cf8c0881186a", "2377dd167ef2707978bed6f10ffd4e76",
195 "f918e0e4422967c6a7e47298135c7ae9", "b2264e129636368b5496760b39e64b7a",
196 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
197 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "4cfad2c437084a93ea76913e21c2dd89",
198 "d372f0c17bce98855d6d59fbee814c3d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
199 "d99ffd2579eb781c30bc0df7b76ad61e", "4e139e57cbb049a0f4ef816adc48d026",
200 "be53b2507048e7ff50226d15c0b28865", "b73f3c1a10405de89d1f9e812ff73b5a",
201 "c7d51b1f2df49ab83962257e8a5934e5", "159e443d79cc59b11ca4a80aa7aa09be",
202 "6ef14b14882e1465b0482b0e0b16d8ce", "22a8d287b425c870f40c64a50f91ce54",
203 "f1d96db5a2e0a2160df38bd96d28d19b", "637d1e5221422dfe9a6dbcfd7f62ebdd",
204 "f275af4f1f350ffaaf650310cb5dddec", "f81c4d6b001a14584528880fa6988a87",
205 "a5a2f9c2e7759d8a3dec1bc4b56be587", "2317c57ab69a36eb3bf278cf8a8795a3",
206 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
207 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "1a0bdfc96a3b9fd904e658f238ab1076",
208 "56d16e54afe205e97527902770e71c71", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
209 "1f7b5b8282ff3cf4d8e8c52d80ef5b4d", "79e9e260a2028c5fe320005c272064b9",
210 "2418ebcdf85551b9ae6e3725f04aae6d", "98bdf907ebacacb734c9eef1ee727c6e",
211 "4dd5672d53c8f359e8f80badaa843dfc", "a1bef519bbf07138e2eec5a91694de46",
212 "df1cb51fe1a937cd7834e973dc5cb814", "317fe65abf81ef3ea07976ef8667baeb",
213 "2da29da97806ae0ee300c5e69c35a4aa", "555475f5d1685638169ab904447e4f13",
214 "b3e3a6234e8045e6182cf90a09f767b2", "849dfeca59074525dea59681a7f88ab4",
215 "39a68af80be11e1682b6f3c4ede33530", "b22d765af176d87e7d3048b4b89b86ad",
216 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
217 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "b8a710baa6a9fc784909671d450ecd99",
218 "f9e6a56382d8d12da676d6631bb6ef75", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
219 "3bf8e11e18527b16f0d7c0361d74a52d", "b9ff54c6f1e3b41fc7fc0f3fa0e75cf2",
220 "06ef1504f31af5f173d3317866ca57cb", "635e8ee11cf04d73598549234ad732a0",
221 "fab693410d59ee88aa2895527efc31ac", "3041eb26c23a63a587fbec623919e2d2",
222 "c61d99d5daf575664fb7ad64976f4b03", "822f6c4eb5db760468d822b21f48d94d",
223 "3f6fcb9fae3666e085b9e29002a802fc", "d9b9fecd195736a6049c528d4cb886b5",
224 "fed17fc391e6c3db4aa14ea1d6596c87", "d0d3482d981989e117cbb32fc4550267",
225 "39561688bf6680054edbfae6035316ce", "087c5992ca6f829e1ba4ba5332d67947",
226 };
227 assert(id >= 0);
228 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
229 return kDigest[id];
230 }
231
GetConvolveScaleDigest8bpp(int id)232 const char* GetConvolveScaleDigest8bpp(int id) {
233 // Entries containing 'XXXXX...' are skipped. See the test for details.
234 static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 2] = {
235 "0291a23f2ac4c40b5d8e957e63769904", "1d48447857472d6455af10d5526f6827",
236 "409b2278d6d372248f1891ca0dd12760", "9e416606a3f82fe5bb3f7182e4f42c2d",
237 "e126563f859ddd5c5ffde6f641168fad", "9bad4f1b7e1865f814b6fd5620816ebd",
238 "50e5e5a57185477cb2af83490c33b47c", "3d2fb301c61d7fbd0e21ac263f7ac552",
239 "5920032c6432c80c6e5e61b684018d13", "07ada64d24339488cdce492e6e0c6b0d",
240 "aaf1589aff6d062a87c627ab9ba20e3e", "91adf91bb24d2c4ea3f882bdf7396e33",
241 "1d17a932a68bb1f199f709e7725fe44b", "07716c63afda034cb386511ea25a63b5",
242 "cca17ef3324c41d189e674a059ef1255", "37d17e70619823a606c0b5f74bf2e33b",
243 "ba8ed5474c187c8e8d7f82a6a29ee860", "27663f037973ebe82ec10252a4d91299",
244 "24c27e187e8d5a2bbfa0fef9046d3eb0", "9854fdc91a48e3bd4639edcc940e5c09",
245 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
246 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "a71907c60a9f1f81972a2859ae54a805",
247 "817bc3bf0c77abc4186eac39f2320184", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
248 "4e7182a8b226982e2678abcf5f83325d", "50cef7c6e57544135f102226bb95bed9",
249 "225e054dbcfff05b1c8b0792c731449e", "16eb63f03839159f3af0e08be857170f",
250 "c8e5d111a2e3f4487330a8bd893cb894", "4fd99eaf9c160442aab35b9bdc5d275b",
251 "8b0f61bfb30747d4c9215618ac42557c", "1df78022da202cefb9a8100b114152d9",
252 "378466e1eda63dbc03565b78af8e723f", "28ea721411fbf5fc805035be9a384140",
253 "4fed5d4163a3bfcc6726a42f20410b0a", "55abfca0c820771bd926e4b94f66a499",
254 "6c8b8ef0a78859c768e629e1decc0019", "d0ead286b5ba3841d24dd114efbfef0a",
255 };
256 assert(id >= 0);
257 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
258 return kDigest[id];
259 }
260
261 #if LIBGAV1_MAX_BITDEPTH >= 10
GetConvolveDigest10bpp(int id)262 const char* GetConvolveDigest10bpp(int id) {
263 // Entries containing 'XXXXX...' are skipped. See the test for details.
264 static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 16] = {
265 "b1b6903d60501c7bc11e5285beb26a52", "a7855ed75772d7fa815978a202bbcd9f",
266 "bde291a4e8087c085fe8b3632f4d7351", "238980eebc9e63ae3eea2771c7a70f12",
267 "0eac13431bd7d8a573318408a72246d5", "d05a237ed7a9ca877256b71555b1b8e4",
268 "73438155feb62595e3e406921102d748", "5871e0e88a776840d619670fbf107858",
269 "1c6376ce55c9ee9e35d432edb1ffb3b7", "d675e0195c9feca956e637f3f1959f40",
270 "b5681673903ade13d69e295f82fdd009", "3c43020105ae93a301404b4cd6238654",
271 "dd2c5880a94ed3758bfea0b0e8c78286", "4ebb1a7b25a39d8b9868ec8a1243103f",
272 "d34ec07845cd8523651e5f5112984a14", "2ce55308d873f4cd244f16da2b06e06e",
273 "a4bb5d5ff4b25f391265b5231049a09a", "c9106e0c820b03bcdde3aa94efc11a3e",
274 "7ec2eae9e118506da8b33440b399511a", "78de867c8ee947ed6d29055747f26949",
275 "a693b4bd0334a3b98d45e67d3985bb63", "156de3172d9acf3c7f251cd7a18ad461",
276 "e545b8a3ff958f8363c7968cbae96732", "7842b2047356c1417d9d88219707f1a1",
277 "1a487c658d684314d91bb6d961a94672", "94b3e5bcd6b849b66a4571ec3d23f9be",
278 "0635a296be01b7e641de98ee27c33cd2", "82dc120bf8c2043bc5eee81007309ebf",
279 "58c826cad3c14cdf26a649265758c58b", "f166254037c0dfb140f54cd7b08bddfe",
280 "74ab206f14ac5f62653cd3dd71a7916d", "5621caef7cc1d6522903290ccc5c2cb8",
281 "78ec6cf42cce4b1feb65e076c78ca241", "42188e2dbb4e02cd353552ea147ad03f",
282 "f9813870fc27941a7c00a0443d7c2fe7", "20b14a6b5af7aa356963bcaaf23d230d",
283 "9c9c41435697f75fa118b6d6464ee7cb", "38816245ed832ba313fefafcbed1e5c8",
284 "5d34137cc8ddba75347b0fa1d0a91791", "465dcb046a0449b9dfb3e0b297aa3863",
285 "3e787534dff83c22b3033750e448865a", "4c91f676a054d582bcae1ca9adb87a31",
286 "eab5894046a99ad0a1a12c91b0f37bd7", "765b4cfbfc1a4988878c412d53bcb597",
287 "bc63b29ec78c1efec5543885a45bb822", "91d6bdbc62d4bb80c9b371d9704e3c9e",
288 "cecd57396a0033456408f3f3554c6912", "5b37f94ef136c1eb9a6181c19491459c",
289 "716ba3a25b454e44b46caa42622c128c", "9076f58c4ab20f2f06d701a6b53b1c4f",
290 "d3212ab3922f147c3cf126c3b1aa17f6", "b55fea77f0e14a8bf8b6562b766fe91f",
291 "59b578268ff26a1e21c5b4273f73f852", "16761e7c8ba2645718153bed83ae78f6",
292 "a9e9805769fe1baf5c7933793ccca0d8", "553a2c24939dff18ec5833c77f556cfb",
293 "5c1ec75a160c444fa90abf106fa1140e", "2266840f11ac4c066d941ec473b1a54f",
294 "9e194755b2a37b615a517d5f8746dfbb", "bbf86f8174334f0b8d869fd8d58bf92d",
295 "fd1da8d197cb385f7917cd296d67afb9", "a984202c527b757337c605443f376915",
296 "c347f4a58fd784c5e88c1a23e4ff15d2", "29cbaadbff9adf4a3d49bd9900a9dd0b",
297 "c5997b802a6ba1cf5ba1057ddc5baa7e", "4f750f6375524311d260306deb233861",
298 "59f33727e5beeb783a057770bec7b4cd", "0654d72f22306b28d9ae42515845240c",
299 "6c9d7d9e6ef81d76e775a85c53abe209", "a35f435ccc67717a49251a07e62ae204",
300 "c5325015cb0b7c42839ac4aa21803fa0", "f81f31f1585c0f70438c09e829416f20",
301 "ab10b22fb8dd8199040745565b28595d", "0d928d6111f86c60ccefc6c6604d5659",
302 "4ed1a6200912995d4f571bdb7822aa83", "92e31a45513582f386dc9c22a57bbbbd",
303 "6dbf310a9c8d85f76306d6a35545f8af", "80fce29dc82d5857c1ed5ef2aea16835",
304 "14f2c5b9d2cd621c178a39f1ec0c38eb", "da54cfb4530841bda29966cfa05f4879",
305 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
306 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "7e3fa9c03bc3dfbdeb67f24c5d9a49cd",
307 "f3454ca93cbb0c8c09b0695d90a0df3d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
308 "f3454ca93cbb0c8c09b0695d90a0df3d", "1a77d2af4d2b6cf8737cfbcacacdc4e4",
309 "89bec831efea2f88129dedcad06bb3fa", "89bec831efea2f88129dedcad06bb3fa",
310 "dead0fe4030085c22e92d16bb110de9d", "306a2f5dfd675df4ed9af44fd5cac8c0",
311 "306a2f5dfd675df4ed9af44fd5cac8c0", "9d01c946a12f5ef9d9cebd9816e06014",
312 "768f63912e43148c13688d7f23281531", "768f63912e43148c13688d7f23281531",
313 "2e7927158e7b8e40e7269fc909fb584b", "123028e18c2bfb334e34adb5a4f67de4",
314 "123028e18c2bfb334e34adb5a4f67de4", "2c979c2bddef79a760e72a802f83cc76",
315 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
316 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "da1a6ff2be03ec8acde4cb1cd519a6f0",
317 "a4ca37cb869a0dbd1c4a2dcc449a8f31", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
318 "1b5d1d4c7be8d5ec00a42a49eecf918f", "98b77e88b0784baaea64c98c8707fe46",
319 "8148788044522edc3c497e1017efe2ce", "acf60abeda98bbea161139b915317423",
320 "262c96b1f2c4f85c86c0e9c77fedff1e", "f35a3d13516440f9168076d9b07c9e98",
321 "13782526fc2726100cb3cf375b3150ed", "13c07441b47b0c1ed80f015ac302d220",
322 "02880fde51ac991ad18d8986f4e5145c", "aa25073115bad49432953254e7dce0bc",
323 "69e3361b7199e10e75685b90fb0df623", "2f8ab35f6e7030e82ca922a68b29af4a",
324 "452f91b01833c57db4e909575a029ff6", "1fabf0655bedb671e4d7287fec8119ba",
325 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
326 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "d54206c34785cc3d8a06c2ceac46378c",
327 "85a11892ed884e3e74968435f6b16e64", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
328 "16434230d24b9522ae2680e8c37e1b95", "963dea92f3efbb99137d1de9c56728d3",
329 "b72fb6a9a073c2fe65013af1842dc9b0", "86fa0c299737eb499cbcdce94abe2d33",
330 "6b80af04470b83673d98f46925e678a5", "65baca6167fe5249f7a839ce5b2fd591",
331 "e47ded6c0eec1d5baadd02aff172f2b1", "c0950e609f278efb7050d319a9756bb3",
332 "9051290279237f9fb1389989b142d2dd", "34cdc1be291c95981c98812c5c343a15",
333 "5b64a6911cb7c3d60bb8f961ed9782a2", "7133de9d03a4b07716a12226b5e493e8",
334 "3594eff52d5ed875bd9655ddbf106fae", "90d7e13aa2f9a064493ff2b3b5b12109",
335 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
336 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "b1f26ee13df2e14a757416ba8a682278",
337 "996b6c166f9ed25bd07ea6acdf7597ff", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
338 "34895d4c69a6c3303693e6f431bcd5d8", "c9497b00cb1bc3363dd126ffdddadc8e",
339 "1e461869bb2ee9b6069c5e52cf817291", "8d7f1d7ea6a0dcc922ad5d2e77bc74dd",
340 "138855d9bf0ccd0c62ac14c7bff4fd37", "64035142864914d05a48ef8e013631d0",
341 "205904fa3c644433b46e01c11dd2fe40", "291425aaf8206b20e88db8ebf3cf7e7f",
342 "cb6238b8eb6b72980958e6fcceb2f2eb", "626321a6dfac542d0fc70321fac13ff3",
343 "1c6fda7501e0f8bdad972f7857cd9354", "4fd485dadcb570e5a0a5addaf9ba84da",
344 "d3f140aea9e8eabf4e1e5190e0148288", "e4938219593bbed5ae638a93f2f4a580",
345 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
346 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "238980eebc9e63ae3eea2771c7a70f12",
347 "0eac13431bd7d8a573318408a72246d5", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
348 "73438155feb62595e3e406921102d748", "5871e0e88a776840d619670fbf107858",
349 "1c6376ce55c9ee9e35d432edb1ffb3b7", "d675e0195c9feca956e637f3f1959f40",
350 "b5681673903ade13d69e295f82fdd009", "3c43020105ae93a301404b4cd6238654",
351 "dd2c5880a94ed3758bfea0b0e8c78286", "4ebb1a7b25a39d8b9868ec8a1243103f",
352 "d34ec07845cd8523651e5f5112984a14", "2ce55308d873f4cd244f16da2b06e06e",
353 "a4bb5d5ff4b25f391265b5231049a09a", "c9106e0c820b03bcdde3aa94efc11a3e",
354 "7ec2eae9e118506da8b33440b399511a", "78de867c8ee947ed6d29055747f26949",
355 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
356 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "e552466a4e7ff187251b8914b084d404",
357 "981b7c44b6f7b7ac2acf0cc4096e6bf4", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
358 "a4c75372af36162831cb872e24e1088c", "497271227a70a72f9ad25b415d41563f",
359 "c48bd7e11ec44ba7b2bc8b6a04592439", "0960a9af91250e9faa1eaac32227bf6f",
360 "746c2e0f96ae2246d534d67102be068c", "d6f6db079da9b8909a153c07cc9d0e63",
361 "7c8928a0d769f4264d195f39cb68a772", "db645c96fc8be04015e0eb538afec9ae",
362 "946af3a8f5362def5f4e27cb0fd4e754", "7ad78dfe7bbedf696dd58d9ad01bcfba",
363 "f0fd9c09d454e4ce918faa97e9ac10be", "af6ae5c0eb28417bd251184baf2eaba7",
364 "866f8df540dd3b58ab1339314d139cbd", "72803589b453a29501540aeddc23e6f4",
365 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
366 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "aba5d5ef5e96fe418e65d20e506ea834",
367 "d70bf16e2a31e90b7b3cdeaef1494cf9", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
368 "6df80bb7f264f4f285d09a4d61533fae", "c8831118d1004a7cca015a4fca140018",
369 "b7f82c140369067c105c7967c75b6f9e", "130f47aae365aabfec4360fa5b5ff554",
370 "92483ed631de21b685ffe6ccadbbec8f", "cbb6ab31547df6b91cfb48630fdffb48",
371 "1eea5e8a24d6aa11778eb3e5e5e9c9f2", "9e193b6b28ce798c44c744efde19eee9",
372 "885c384d90aaa34acd8303958033c252", "8110ed10e7234851dff3c7e4a51108a2",
373 "6fb9383302eb7e7a13387464d2634e03", "864d51fcc737bc73a3f588b67515039a",
374 "2ecb7890f00234bcb28c1d969f489012", "c4793d431dbf2d88826bb440bf027512",
375 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
376 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "972aeba65e8a6d20dd0f95279be2aa75",
377 "34165457282e2af2e9b3f5840e4dec5d", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
378 "b8c5582b9bbb789c45471f93be83b41f", "257bf5467db570974d7cf2356bacf116",
379 "5255dded79f56b0078543b5a1814a668", "ef745100f5f34c8ff841b2b0b57eb33f",
380 "edae8ed67286ca6a31573a541b3deb6f", "01adcd8bf15fbf70df47fbf3a953aa14",
381 "ba539808a8501609ce052a1562a62b25", "ac8e6391200cec2abdebb00744a2ba82",
382 "54b17120f7d71ddb4d70590ecd231cc1", "f6e36446a97611a4db4425df926974b2",
383 "a82f4080699300b659bbe1b5c4463147", "ecedb178f7cad3dc1b921eca67f9efb6",
384 "0609ca0ff3ca90069e8b48829b4b0891", "839e86c681e97359f7819c766000dd1c",
385 };
386 assert(id >= 0);
387 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
388 return kDigest[id];
389 }
390
GetConvolveScaleDigest10bpp(int id)391 const char* GetConvolveScaleDigest10bpp(int id) {
392 // Entries containing 'XXXXX...' are skipped. See the test for details.
393 static const char* const kDigest[ConvolveTestParam::kNumBlockSizes * 2] = {
394 "27e21eb31687f9fbd0a66865fa8d7c8a", "9bff726c8e1d0998451a3b9cf2b3d8c8",
395 "661d74cfef36f12ed8d9b4c3ccb7fe0d", "5fc365fd1fcc9599dd97a885ba0c2eec",
396 "acdba2c82a6268e3c0ae8fc32be1b41f", "a5db60bbeaf56ab030ed21c42d553cf3",
397 "1228bb633f9fd63fdb998b775ca79e98", "07812c97f9f43a2a8ae07329dc488699",
398 "903525fb782119c4dfaf61b98a310c9f", "f38b51cef38b929e317861ccbc73ecd8",
399 "b78b05138e1d5fbf089144c42ce03058", "f2e227664cbf2d821b242a34fcbc9835",
400 "cb992dac70591e7d3663588ae13b9adc", "f2292d33657d939fa85ea5bacdfe39a3",
401 "7049dc742d6d8ad6f5d4309968ff281c", "e4beebde1ac335a4d92e4af94653a2ce",
402 "cc77875f98f54b9b26b5f7d9fcbc828d", "fb623f7b9e1ffcf2ae361599728a5589",
403 "c33847e47a7eda214734084640818df9", "ab3e1aec3d720c0c89c46a8d5b161b44",
404 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
405 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", "efe4de861dcf0f7458b6208cae7e3584",
406 "814751c55fa84f0fed94ff15fc30fc24", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
407 "31a63fe47297102937acbe7a328588b7", "b804a0a24633243f7da48d7a5f51c0bf",
408 "cb492672b005fc378cccc8c03003cd4a", "1d18732bcf2ea487e84579489cc59a22",
409 "457c4b3ec38a8d6c210584ade1a9fae2", "a3afdd468e6a5238a3dbd2cc21c11c9e",
410 "6ff8a16f21d6e8a9741dacf0734ae563", "3ffa29ef7e54e51f6849c9a3d3c79d03",
411 "af89899b083cf269ac1bd988aeb15b15", "3365d8411c11081fb228436238b9a671",
412 "3ba56d30f5f81d7098f356635a58b9af", "b3013776900c6520bd30f868e8c963b6",
413 "81febaa7342692483040f500ba2e5e2b", "4a51ff1d9a4a68687d590b41aa7835a3",
414 };
415 assert(id >= 0);
416 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
417 return kDigest[id];
418 }
419 #endif // LIBGAV1_MAX_BITDEPTH >= 10
420
421 struct ConvolveTypeParam {
ConvolveTypeParamlibgav1::dsp::__anon5015b8ab0111::ConvolveTypeParam422 ConvolveTypeParam(bool is_intra_block_copy, bool is_compound,
423 bool has_vertical_filter, bool has_horizontal_filter)
424 : is_intra_block_copy(is_intra_block_copy),
425 is_compound(is_compound),
426 has_vertical_filter(has_vertical_filter),
427 has_horizontal_filter(has_horizontal_filter) {}
428 bool is_intra_block_copy;
429 bool is_compound;
430 bool has_vertical_filter;
431 bool has_horizontal_filter;
432 };
433
operator <<(std::ostream & os,const ConvolveTestParam & param)434 std::ostream& operator<<(std::ostream& os, const ConvolveTestParam& param) {
435 return os << "BlockSize" << param.width << "x" << param.height;
436 }
437
operator <<(std::ostream & os,const ConvolveTypeParam & param)438 std::ostream& operator<<(std::ostream& os, const ConvolveTypeParam& param) {
439 return os << "is_intra_block_copy: " << param.is_intra_block_copy
440 << ", is_compound: " << param.is_compound
441 << ", has_(vertical/horizontal)_filter: "
442 << param.has_vertical_filter << "/" << param.has_horizontal_filter;
443 }
444
445 //------------------------------------------------------------------------------
446 template <int bitdepth, typename Pixel>
447 class ConvolveTest : public testing::TestWithParam<
448 std::tuple<ConvolveTypeParam, ConvolveTestParam>> {
449 public:
450 ConvolveTest() = default;
451 ~ConvolveTest() override = default;
452
SetUp()453 void SetUp() override {
454 ConvolveInit_C();
455
456 const Dsp* const dsp = GetDspTable(bitdepth);
457 ASSERT_NE(dsp, nullptr);
458 GetConvolveFunc(dsp, &base_convolve_func_);
459
460 const testing::TestInfo* const test_info =
461 testing::UnitTest::GetInstance()->current_test_info();
462 const absl::string_view test_case = test_info->test_suite_name();
463 if (absl::StartsWith(test_case, "C/")) {
464 base_convolve_func_ = nullptr;
465 } else if (absl::StartsWith(test_case, "SSE41/")) {
466 if ((GetCpuInfo() & kSSE4_1) != 0) {
467 ConvolveInit_SSE4_1();
468 }
469 } else if (absl::StartsWith(test_case, "AVX2/")) {
470 if ((GetCpuInfo() & kAVX2) != 0) {
471 ConvolveInit_AVX2();
472 }
473 } else if (absl::StartsWith(test_case, "NEON/")) {
474 ConvolveInit_NEON();
475 #if LIBGAV1_MAX_BITDEPTH >= 10
476 ConvolveInit10bpp_NEON();
477 #endif
478 } else {
479 FAIL() << "Unrecognized architecture prefix in test case name: "
480 << test_case;
481 }
482
483 GetConvolveFunc(dsp, &cur_convolve_func_);
484
485 // Skip functions that have not been specialized for this particular
486 // architecture.
487 if (cur_convolve_func_ == base_convolve_func_) {
488 cur_convolve_func_ = nullptr;
489 }
490 }
491
492 protected:
GetDigestId() const493 int GetDigestId() const {
494 int id = param_.block_size;
495 id += param_.kNumBlockSizes *
496 static_cast<int>(type_param_.has_horizontal_filter);
497 id += 2 * param_.kNumBlockSizes *
498 static_cast<int>(type_param_.has_vertical_filter);
499 id += 4 * param_.kNumBlockSizes * static_cast<int>(type_param_.is_compound);
500 id += 8 * param_.kNumBlockSizes *
501 static_cast<int>(type_param_.is_intra_block_copy);
502 return id;
503 }
504
505 void GetConvolveFunc(const Dsp* dsp, ConvolveFunc* func);
506 void SetInputData(bool use_fixed_values, int value);
507 void Check(bool use_fixed_values, const Pixel* src, const Pixel* dest,
508 libvpx_test::MD5* md5_digest);
509 void Check16Bit(bool use_fixed_values, const uint16_t* src,
510 const uint16_t* dest, libvpx_test::MD5* md5_digest);
511 // |num_runs| covers the categories of filters (6) and the number of filters
512 // under each category (16).
513 void Test(bool use_fixed_values, int value,
514 int num_runs = kMinimumViableRuns);
515
516 const ConvolveTypeParam type_param_ = std::get<0>(GetParam());
517 const ConvolveTestParam param_ = std::get<1>(GetParam());
518
519 private:
520 ConvolveFunc base_convolve_func_;
521 ConvolveFunc cur_convolve_func_;
522 // Convolve filters are 7-tap, which need 3 pixels
523 // (kRestorationHorizontalBorder) padding.
524 Pixel source_[kMaxBlockHeight * kMaxBlockWidth] = {};
525 uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
526 uint16_t dest_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
527 Pixel dest_clipped_[kMaxBlockHeight * kMaxBlockWidth] = {};
528
529 const int source_stride_ = kMaxBlockWidth;
530 const int source_height_ = kMaxBlockHeight;
531 };
532
533 template <int bitdepth, typename Pixel>
GetConvolveFunc(const Dsp * const dsp,ConvolveFunc * func)534 void ConvolveTest<bitdepth, Pixel>::GetConvolveFunc(const Dsp* const dsp,
535 ConvolveFunc* func) {
536 *func =
537 dsp->convolve[type_param_.is_intra_block_copy][type_param_.is_compound]
538 [type_param_.has_vertical_filter]
539 [type_param_.has_horizontal_filter];
540 }
541
542 template <int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)543 void ConvolveTest<bitdepth, Pixel>::SetInputData(bool use_fixed_values,
544 int value) {
545 if (use_fixed_values) {
546 std::fill(source_, source_ + source_height_ * source_stride_, value);
547 } else {
548 const int offset =
549 kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
550 const int mask = (1 << bitdepth) - 1;
551 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
552 const int height = param_.height;
553 const int width = param_.width;
554 for (int y = 0; y < height; ++y) {
555 for (int x = 0; x < width; ++x) {
556 source_[y * source_stride_ + x + offset] = rnd.Rand16() & mask;
557 }
558 }
559 // Copy border pixels to the left and right borders.
560 for (int y = 0; y < height; ++y) {
561 Memset(&source_[(y + kConvolveBorderLeftTop) * source_stride_],
562 source_[y * source_stride_ + offset], kConvolveBorderLeftTop);
563 Memset(&source_[y * source_stride_ + offset + width],
564 source_[y * source_stride_ + offset + width - 1],
565 kConvolveBorderLeftTop);
566 }
567 // Copy border pixels to the top and bottom borders.
568 for (int y = 0; y < kConvolveBorderLeftTop; ++y) {
569 memcpy(&source_[y * source_stride_],
570 &source_[kConvolveBorderLeftTop * source_stride_],
571 source_stride_ * sizeof(Pixel));
572 memcpy(&source_[(y + kConvolveBorderLeftTop + height) * source_stride_],
573 &source_[(kConvolveBorderLeftTop + height - 1) * source_stride_],
574 source_stride_ * sizeof(Pixel));
575 }
576 }
577 }
578
579 template <int bitdepth, typename Pixel>
Check(bool use_fixed_values,const Pixel * src,const Pixel * dest,libvpx_test::MD5 * md5_digest)580 void ConvolveTest<bitdepth, Pixel>::Check(bool use_fixed_values,
581 const Pixel* src, const Pixel* dest,
582 libvpx_test::MD5* md5_digest) {
583 if (use_fixed_values) {
584 // For fixed values, input and output are identical.
585 const bool success =
586 test_utils::CompareBlocks(src, dest, param_.width, param_.height,
587 kMaxBlockWidth, kMaxBlockWidth, false, false);
588 EXPECT_TRUE(success);
589 } else {
590 // For random input, compare md5.
591 const int offset =
592 kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
593 const size_t size = sizeof(dest_clipped_) - offset * sizeof(Pixel);
594 md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
595 }
596 }
597
598 template <int bitdepth, typename Pixel>
Check16Bit(bool use_fixed_values,const uint16_t * src,const uint16_t * dest,libvpx_test::MD5 * md5_digest)599 void ConvolveTest<bitdepth, Pixel>::Check16Bit(bool use_fixed_values,
600 const uint16_t* src,
601 const uint16_t* dest,
602 libvpx_test::MD5* md5_digest) {
603 if (use_fixed_values) {
604 // For fixed values, input and output are identical.
605 const bool success =
606 test_utils::CompareBlocks(src, dest, param_.width, param_.height,
607 kMaxBlockWidth, kMaxBlockWidth, false);
608 EXPECT_TRUE(success);
609 } else {
610 // For random input, compare md5.
611 const int offset =
612 kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
613 const size_t size = sizeof(dest_16bit_) - offset * sizeof(uint16_t);
614 md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
615 }
616 }
617
618 template <int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)619 void ConvolveTest<bitdepth, Pixel>::Test(
620 bool use_fixed_values, int value, int num_runs /*= kMinimumViableRuns*/) {
621 // There's no meaning testing fixed input in compound convolve.
622 if (type_param_.is_compound && use_fixed_values) return;
623
624 // There should not be any function set for this combination.
625 if (type_param_.is_intra_block_copy && type_param_.is_compound) {
626 ASSERT_EQ(cur_convolve_func_, nullptr);
627 return;
628 }
629
630 // Compound and intra block copy functions are only used for blocks 4x4 or
631 // greater.
632 if (type_param_.is_compound || type_param_.is_intra_block_copy) {
633 if (param_.width < 4 || param_.height < 4) {
634 GTEST_SKIP();
635 }
636 }
637
638 // Skip unspecialized functions.
639 if (cur_convolve_func_ == nullptr) {
640 GTEST_SKIP();
641 }
642
643 SetInputData(use_fixed_values, value);
644 int subpixel_x = 0;
645 int subpixel_y = 0;
646 int vertical_index = 0;
647 int horizontal_index = 0;
648 const int offset =
649 kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
650 const Pixel* const src = source_ + offset;
651 const ptrdiff_t src_stride = source_stride_ * sizeof(Pixel);
652 const ptrdiff_t src_stride_16 = source_stride_;
653 const ptrdiff_t dst_stride = kMaxBlockWidth * sizeof(Pixel);
654 // Pack Compound output since we control the predictor buffer.
655 const ptrdiff_t dst_stride_compound = param_.width;
656
657 // Output is always 16 bits regardless of |bitdepth|.
658 uint16_t* dst_16 = dest_16bit_ + offset;
659 // Output depends on |bitdepth|.
660 Pixel* dst_pixel = dest_clipped_ + offset;
661
662 // Collect the first |kMinimumViableRuns| into one md5 buffer.
663 libvpx_test::MD5 md5_digest;
664
665 absl::Duration elapsed_time;
666 for (int i = 0; i < num_runs; ++i) {
667 // Test every filter.
668 // Because of masking |subpixel_{x,y}| values roll over every 16 iterations.
669 subpixel_x += 1 << 6;
670 subpixel_y += 1 << 6;
671
672 const int horizontal_filter_id = (subpixel_x >> 6) & 0xF;
673 const int vertical_filter_id = (subpixel_y >> 6) & 0xF;
674
675 // |filter_id| == 0 (copy) must be handled by the appropriate 1D or copy
676 // function.
677 if (horizontal_filter_id == 0 || vertical_filter_id == 0) {
678 continue;
679 }
680
681 // For focused speed testing these can be set to the desired filter. Want
682 // only 8 tap filters? Set |{vertical,horizontal}_index| to 2.
683 vertical_index += static_cast<int>(i % 16 == 0);
684 vertical_index %= 4;
685 horizontal_index += static_cast<int>(i % 16 == 0);
686 horizontal_index %= 4;
687
688 if (type_param_.is_compound) {
689 // Output type is uint16_t.
690 const absl::Time start = absl::Now();
691 cur_convolve_func_(src, src_stride, horizontal_index, vertical_index,
692 horizontal_filter_id, vertical_filter_id, param_.width,
693 param_.height, dst_16, dst_stride_compound);
694 elapsed_time += absl::Now() - start;
695 } else {
696 // Output type is Pixel.
697 const absl::Time start = absl::Now();
698 cur_convolve_func_(src, src_stride, horizontal_index, vertical_index,
699 horizontal_filter_id, vertical_filter_id, param_.width,
700 param_.height, dst_pixel, dst_stride);
701 elapsed_time += absl::Now() - start;
702 }
703
704 // Only check the output for the first set. After that it's just repeated
705 // runs for speed timing.
706 if (i >= kMinimumViableRuns) continue;
707
708 if (type_param_.is_compound) {
709 // Need to copy source to a uint16_t buffer for comparison.
710 Pixel* src_ptr = source_;
711 uint16_t* src_ptr_16 = source_16bit_;
712 for (int y = 0; y < kMaxBlockHeight; ++y) {
713 for (int x = 0; x < kMaxBlockWidth; ++x) {
714 src_ptr_16[x] = src_ptr[x];
715 }
716 src_ptr += src_stride_16;
717 src_ptr_16 += src_stride_16;
718 }
719
720 Check16Bit(use_fixed_values, source_16bit_ + offset, dst_16, &md5_digest);
721 } else {
722 Check(use_fixed_values, src, dst_pixel, &md5_digest);
723 }
724 }
725
726 if (!use_fixed_values) {
727 // md5 sums are only calculated for random input.
728 const char* ref_digest;
729 if (bitdepth == 8) {
730 ref_digest = GetConvolveDigest8bpp(GetDigestId());
731 } else {
732 #if LIBGAV1_MAX_BITDEPTH >= 10
733 ref_digest = GetConvolveDigest10bpp(GetDigestId());
734 #endif // LIBGAV1_MAX_BITDEPTH >= 10
735 }
736 const char* direction;
737 if (type_param_.has_vertical_filter && type_param_.has_horizontal_filter) {
738 direction = "2D";
739 } else if (type_param_.has_vertical_filter) {
740 direction = "Vertical";
741 } else if (type_param_.has_horizontal_filter) {
742 direction = "Horizontal";
743 } else {
744 direction = "Copy";
745 }
746 const auto elapsed_time_us =
747 static_cast<int>(absl::ToInt64Microseconds(elapsed_time));
748 printf("Mode Convolve%s%s%s[%25s]: %5d us MD5: %s\n",
749 type_param_.is_compound ? "Compound" : "",
750 type_param_.is_intra_block_copy ? "IntraBlockCopy" : "", direction,
751 absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
752 elapsed_time_us, md5_digest.Get());
753 EXPECT_STREQ(ref_digest, md5_digest.Get());
754 }
755 }
756
ApplyFilterToSignedInput(const int min_input,const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)757 void ApplyFilterToSignedInput(const int min_input, const int max_input,
758 const int8_t filter[kSubPixelTaps],
759 int* min_output, int* max_output) {
760 int min = 0, max = 0;
761 for (int i = 0; i < kSubPixelTaps; ++i) {
762 const int tap = filter[i];
763 if (tap > 0) {
764 max += max_input * tap;
765 min += min_input * tap;
766 } else {
767 min += max_input * tap;
768 max += min_input * tap;
769 }
770 }
771 *min_output = min;
772 *max_output = max;
773 }
774
ApplyFilterToUnsignedInput(const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)775 void ApplyFilterToUnsignedInput(const int max_input,
776 const int8_t filter[kSubPixelTaps],
777 int* min_output, int* max_output) {
778 ApplyFilterToSignedInput(0, max_input, filter, min_output, max_output);
779 }
780
781 // Validate the maximum ranges for different parts of the Convolve process.
782 template <int bitdepth>
ShowRange()783 void ShowRange() {
784 // Subtract one from the shift bits because the filter is pre-shifted by 1.
785 constexpr int horizontal_bits = (bitdepth == kBitdepth12)
786 ? kInterRoundBitsHorizontal12bpp - 1
787 : kInterRoundBitsHorizontal - 1;
788 constexpr int vertical_bits = (bitdepth == kBitdepth12)
789 ? kInterRoundBitsVertical12bpp - 1
790 : kInterRoundBitsVertical - 1;
791 constexpr int compound_vertical_bits = kInterRoundBitsCompoundVertical - 1;
792
793 constexpr int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
794
795 constexpr int max_input = (1 << bitdepth) - 1;
796
797 const int8_t* worst_convolve_filter = kHalfSubPixelFilters[2][8];
798
799 // First pass.
800 printf("Bitdepth: %2d Input range: [%8d, %8d]\n", bitdepth, 0,
801 max_input);
802
803 int min, max;
804 ApplyFilterToUnsignedInput(max_input, worst_convolve_filter, &min, &max);
805
806 if (bitdepth == 8) {
807 // 8bpp can use int16_t for sums.
808 assert(min > INT16_MIN);
809 assert(max < INT16_MAX);
810 } else {
811 // 10bpp and 12bpp require int32_t.
812 assert(min > INT32_MIN);
813 assert(max > INT16_MAX && max < INT32_MAX);
814 }
815
816 printf(" Horizontal upscaled range: [%8d, %8d]\n", min, max);
817
818 const int first_pass_min = RightShiftWithRounding(min, horizontal_bits);
819 const int first_pass_max = RightShiftWithRounding(max, horizontal_bits);
820
821 // All bitdepths can use int16_t for first pass output.
822 assert(first_pass_min > INT16_MIN);
823 assert(first_pass_max < INT16_MAX);
824
825 printf(" Horizontal downscaled range: [%8d, %8d]\n", first_pass_min,
826 first_pass_max);
827
828 // Second pass.
829 ApplyFilterToSignedInput(first_pass_min, first_pass_max,
830 worst_convolve_filter, &min, &max);
831
832 // All bitdepths require int32_t for second pass sums.
833 assert(min < INT16_MIN && min > INT32_MIN);
834 assert(max > INT16_MAX && max < INT32_MAX);
835
836 printf(" Vertical upscaled range: [%8d, %8d]\n", min, max);
837
838 // Second pass non-compound output is clipped to Pixel values.
839 const int second_pass_min =
840 Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input);
841 const int second_pass_max =
842 Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input);
843 printf(" Pixel output range: [%8d, %8d]\n", second_pass_min,
844 second_pass_max);
845
846 // Output is Pixel so matches Pixel values.
847 assert(second_pass_min == 0);
848 assert(second_pass_max == max_input);
849
850 const int compound_second_pass_min =
851 RightShiftWithRounding(min, compound_vertical_bits) + compound_offset;
852 const int compound_second_pass_max =
853 RightShiftWithRounding(max, compound_vertical_bits) + compound_offset;
854
855 printf(" Compound output range: [%8d, %8d]\n",
856 compound_second_pass_min, compound_second_pass_max);
857
858 if (bitdepth == 8) {
859 // 8bpp output is int16_t without an offset.
860 assert(compound_second_pass_min > INT16_MIN);
861 assert(compound_second_pass_max < INT16_MAX);
862 } else {
863 // 10bpp and 12bpp use the offset to fit inside uint16_t.
864 assert(compound_second_pass_min > 0);
865 assert(compound_second_pass_max < UINT16_MAX);
866 }
867
868 printf("\n");
869 }
870
TEST(ConvolveTest,ShowRange)871 TEST(ConvolveTest, ShowRange) {
872 ShowRange<kBitdepth8>();
873 ShowRange<kBitdepth10>();
874 ShowRange<kBitdepth12>();
875 }
876
877 using ConvolveTest8bpp = ConvolveTest<8, uint8_t>;
878
TEST_P(ConvolveTest8bpp,FixedValues)879 TEST_P(ConvolveTest8bpp, FixedValues) {
880 Test(true, 0);
881 Test(true, 1);
882 Test(true, 128);
883 Test(true, 255);
884 }
885
TEST_P(ConvolveTest8bpp,RandomValues)886 TEST_P(ConvolveTest8bpp, RandomValues) { Test(false, 0); }
887
TEST_P(ConvolveTest8bpp,DISABLED_Speed)888 TEST_P(ConvolveTest8bpp, DISABLED_Speed) {
889 const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
890 Test(false, 0, num_runs);
891 }
892
893 //------------------------------------------------------------------------------
894 template <int bitdepth, typename Pixel>
895 class ConvolveScaleTest
896 : public testing::TestWithParam<
897 std::tuple<bool /*is_compound*/, ConvolveTestParam>> {
898 public:
899 ConvolveScaleTest() = default;
900 ~ConvolveScaleTest() override = default;
901
SetUp()902 void SetUp() override {
903 ConvolveInit_C();
904
905 const Dsp* const dsp = GetDspTable(bitdepth);
906 ASSERT_NE(dsp, nullptr);
907 base_convolve_scale_func_ = dsp->convolve_scale[is_compound_];
908
909 const testing::TestInfo* const test_info =
910 testing::UnitTest::GetInstance()->current_test_info();
911 const absl::string_view test_case = test_info->test_suite_name();
912 if (absl::StartsWith(test_case, "C/")) {
913 base_convolve_scale_func_ = nullptr;
914 } else if (absl::StartsWith(test_case, "SSE41/")) {
915 if ((GetCpuInfo() & kSSE4_1) != 0) {
916 ConvolveInit_SSE4_1();
917 }
918 } else if (absl::StartsWith(test_case, "AVX2/")) {
919 if ((GetCpuInfo() & kAVX2) != 0) {
920 ConvolveInit_AVX2();
921 }
922 } else if (absl::StartsWith(test_case, "NEON/")) {
923 ConvolveInit_NEON();
924 #if LIBGAV1_MAX_BITDEPTH >= 10
925 ConvolveInit10bpp_NEON();
926 #endif
927 } else {
928 FAIL() << "Unrecognized architecture prefix in test case name: "
929 << test_case;
930 }
931
932 cur_convolve_scale_func_ = dsp->convolve_scale[is_compound_];
933
934 // Skip functions that have not been specialized for this particular
935 // architecture.
936 if (cur_convolve_scale_func_ == base_convolve_scale_func_) {
937 cur_convolve_scale_func_ = nullptr;
938 }
939 }
940
941 protected:
GetDigestId() const942 int GetDigestId() const {
943 return param_.block_size +
944 param_.kNumBlockSizes * static_cast<int>(is_compound_);
945 }
946
947 void SetInputData(bool use_fixed_values, int value);
948 void Check(bool use_fixed_values, const Pixel* src, const Pixel* dest,
949 libvpx_test::MD5* md5_digest);
950 void Check16Bit(bool use_fixed_values, const uint16_t* src,
951 const uint16_t* dest, libvpx_test::MD5* md5_digest);
952 // |num_runs| covers the categories of filters (6) and the number of filters
953 // under each category (16).
954 void Test(bool use_fixed_values, int value,
955 int num_runs = kMinimumViableRuns);
956
957 const bool is_compound_ = std::get<0>(GetParam());
958 const ConvolveTestParam param_ = std::get<1>(GetParam());
959
960 private:
961 ConvolveScaleFunc base_convolve_scale_func_;
962 ConvolveScaleFunc cur_convolve_scale_func_;
963 // Convolve filters are 7-tap, which need 3 pixels
964 // (kRestorationHorizontalBorder) padding.
965 // The source can be at most 2 times of max width/height.
966 Pixel source_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
967 uint16_t source_16bit_[kMaxBlockHeight * kMaxBlockWidth * 4] = {};
968 uint16_t dest_16bit_[kMaxBlockHeight * kMaxBlockWidth] = {};
969 Pixel dest_clipped_[kMaxBlockHeight * kMaxBlockWidth] = {};
970
971 const int source_stride_ = kMaxBlockWidth * 2;
972 const int source_height_ = kMaxBlockHeight * 2;
973 };
974
975 template <int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)976 void ConvolveScaleTest<bitdepth, Pixel>::SetInputData(bool use_fixed_values,
977 int value) {
978 if (use_fixed_values) {
979 std::fill(source_, source_ + source_height_ * source_stride_, value);
980 } else {
981 const int offset =
982 kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
983 const int mask = (1 << bitdepth) - 1;
984 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
985 const int height = param_.height * 2;
986 const int width = param_.width * 2;
987 for (int y = 0; y < height; ++y) {
988 for (int x = 0; x < width; ++x) {
989 source_[y * source_stride_ + x + offset] = rnd.Rand16() & mask;
990 }
991 }
992 // Copy border pixels to the left and right borders.
993 for (int y = 0; y < height; ++y) {
994 Memset(&source_[(y + kConvolveBorderLeftTop) * source_stride_],
995 source_[y * source_stride_ + offset], kConvolveBorderLeftTop);
996 Memset(&source_[y * source_stride_ + offset + width],
997 source_[y * source_stride_ + offset + width - 1],
998 kConvolveBorderLeftTop);
999 }
1000 // Copy border pixels to the top and bottom borders.
1001 for (int y = 0; y < kConvolveBorderLeftTop; ++y) {
1002 memcpy(&source_[y * source_stride_],
1003 &source_[kConvolveBorderLeftTop * source_stride_],
1004 source_stride_ * sizeof(Pixel));
1005 memcpy(&source_[(y + kConvolveBorderLeftTop + height) * source_stride_],
1006 &source_[(kConvolveBorderLeftTop + height - 1) * source_stride_],
1007 source_stride_ * sizeof(Pixel));
1008 }
1009 }
1010 }
1011
1012 template <int bitdepth, typename Pixel>
Check(bool use_fixed_values,const Pixel * src,const Pixel * dest,libvpx_test::MD5 * md5_digest)1013 void ConvolveScaleTest<bitdepth, Pixel>::Check(bool use_fixed_values,
1014 const Pixel* src,
1015 const Pixel* dest,
1016 libvpx_test::MD5* md5_digest) {
1017 if (use_fixed_values) {
1018 // For fixed values, input and output are identical.
1019 const bool success =
1020 test_utils::CompareBlocks(src, dest, param_.width, param_.height,
1021 kMaxBlockWidth, kMaxBlockWidth, false, false);
1022 EXPECT_TRUE(success);
1023 } else {
1024 // For random input, compare md5.
1025 const int offset =
1026 kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
1027 const size_t size = sizeof(dest_clipped_) - offset * sizeof(Pixel);
1028 md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
1029 }
1030 }
1031
1032 template <int bitdepth, typename Pixel>
Check16Bit(bool use_fixed_values,const uint16_t * src,const uint16_t * dest,libvpx_test::MD5 * md5_digest)1033 void ConvolveScaleTest<bitdepth, Pixel>::Check16Bit(
1034 bool use_fixed_values, const uint16_t* src, const uint16_t* dest,
1035 libvpx_test::MD5* md5_digest) {
1036 if (use_fixed_values) {
1037 // For fixed values, input and output are identical.
1038 const bool success =
1039 test_utils::CompareBlocks(src, dest, param_.width, param_.height,
1040 kMaxBlockWidth, kMaxBlockWidth, false);
1041 EXPECT_TRUE(success);
1042 } else {
1043 // For random input, compare md5.
1044 const int offset =
1045 kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
1046 const size_t size = sizeof(dest_16bit_) - offset * sizeof(uint16_t);
1047 md5_digest->Add(reinterpret_cast<const uint8_t*>(dest), size);
1048 }
1049 }
1050
1051 template <int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)1052 void ConvolveScaleTest<bitdepth, Pixel>::Test(
1053 bool use_fixed_values, int value, int num_runs /*= kMinimumViableRuns*/) {
1054 // There's no meaning testing fixed input in compound convolve.
1055 if (is_compound_ && use_fixed_values) return;
1056
1057 // The compound function is only used for blocks 4x4 or greater.
1058 if (is_compound_) {
1059 if (param_.width < 4 || param_.height < 4) {
1060 GTEST_SKIP();
1061 }
1062 }
1063
1064 // Skip unspecialized functions.
1065 if (cur_convolve_scale_func_ == nullptr) {
1066 GTEST_SKIP();
1067 }
1068
1069 SetInputData(use_fixed_values, value);
1070 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed() +
1071 GetDigestId());
1072 // [1,2048] for |step_[xy]|. This covers a scaling range of 1/1024 to 2x.
1073 const int step_x = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
1074 const int step_y = (rnd.Rand16() & ((1 << 11) - 1)) + 1;
1075 int subpixel_x = 0;
1076 int subpixel_y = 0;
1077 int vertical_index = 0;
1078 int horizontal_index = 0;
1079 const int offset =
1080 kConvolveBorderLeftTop * kMaxBlockWidth + kConvolveBorderLeftTop;
1081 const int offset_scale =
1082 kConvolveBorderLeftTop * source_stride_ + kConvolveBorderLeftTop;
1083 const Pixel* const src_scale = source_ + offset_scale;
1084 const ptrdiff_t src_stride = source_stride_ * sizeof(Pixel);
1085 const ptrdiff_t dst_stride = kMaxBlockWidth * sizeof(Pixel);
1086 // Pack Compound output since we control the predictor buffer.
1087 const ptrdiff_t dst_stride_compound = param_.width;
1088
1089 // Output is always 16 bits regardless of |bitdepth|.
1090 uint16_t* dst_16 = dest_16bit_ + offset;
1091 // Output depends on |bitdepth|.
1092 Pixel* dst_pixel = dest_clipped_ + offset;
1093
1094 // Collect the first |kMinimumViableRuns| into one md5 buffer.
1095 libvpx_test::MD5 md5_digest;
1096
1097 absl::Duration elapsed_time;
1098 for (int i = 0; i < num_runs; ++i) {
1099 // Test every filter.
1100 // Because of masking |subpixel_{x,y}| values roll over every 16 iterations.
1101 subpixel_x += 1 << 6;
1102 subpixel_y += 1 << 6;
1103
1104 const int horizontal_filter_id = (subpixel_x >> 6) & 0xF;
1105 const int vertical_filter_id = (subpixel_y >> 6) & 0xF;
1106
1107 // |filter_id| == 0 (copy) must be handled by the appropriate 1D or copy
1108 // function.
1109 if (horizontal_filter_id == 0 || vertical_filter_id == 0) {
1110 continue;
1111 }
1112
1113 // For focused speed testing these can be set to the desired filter. Want
1114 // only 8 tap filters? Set |{vertical,horizontal}_index| to 2.
1115 vertical_index += static_cast<int>(i % 16 == 0);
1116 vertical_index %= 4;
1117 horizontal_index += static_cast<int>(i % 16 == 0);
1118 horizontal_index %= 4;
1119
1120 // Output type is uint16_t.
1121 const absl::Time start = absl::Now();
1122 if (is_compound_) {
1123 cur_convolve_scale_func_(
1124 source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
1125 step_y, param_.width, param_.height, dst_16, dst_stride_compound);
1126 } else {
1127 cur_convolve_scale_func_(
1128 source_, src_stride, horizontal_index, vertical_index, 0, 0, step_x,
1129 step_y, param_.width, param_.height, dst_pixel, dst_stride);
1130 }
1131 elapsed_time += absl::Now() - start;
1132
1133 // Only check the output for the first set. After that it's just repeated
1134 // runs for speed timing.
1135 if (i >= kMinimumViableRuns) continue;
1136
1137 // Convolve function does not clip the output. The clipping is applied
1138 // later, but libaom clips the output. So we apply clipping to match
1139 // libaom in tests.
1140 if (is_compound_) {
1141 const int single_round_offset = (1 << bitdepth) + (1 << (bitdepth - 1));
1142 Pixel* dest_row = dest_clipped_;
1143 for (int y = 0; y < kMaxBlockHeight; ++y) {
1144 for (int x = 0; x < kMaxBlockWidth; ++x) {
1145 dest_row[x] = static_cast<Pixel>(Clip3(
1146 dest_16bit_[y * dst_stride_compound + x] - single_round_offset, 0,
1147 (1 << bitdepth) - 1));
1148 }
1149 dest_row += kMaxBlockWidth;
1150 }
1151 }
1152
1153 if (is_compound_) {
1154 Check16Bit(use_fixed_values, source_16bit_ + offset_scale, dst_16,
1155 &md5_digest);
1156 } else {
1157 Check(use_fixed_values, src_scale, dst_pixel, &md5_digest);
1158 }
1159 }
1160
1161 if (!use_fixed_values) {
1162 // md5 sums are only calculated for random input.
1163 const char* ref_digest;
1164 if (bitdepth == 8) {
1165 ref_digest = GetConvolveScaleDigest8bpp(GetDigestId());
1166 } else {
1167 #if LIBGAV1_MAX_BITDEPTH >= 10
1168 ref_digest = GetConvolveScaleDigest10bpp(GetDigestId());
1169 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1170 }
1171
1172 const auto elapsed_time_us =
1173 static_cast<int>(absl::ToInt64Microseconds(elapsed_time));
1174 printf("Mode Convolve%sScale2D[%25s]: %5d us MD5: %s\n",
1175 is_compound_ ? "Compound" : "",
1176 absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
1177 elapsed_time_us, md5_digest.Get());
1178 EXPECT_STREQ(ref_digest, md5_digest.Get());
1179 }
1180 }
1181
1182 using ConvolveScaleTest8bpp = ConvolveScaleTest<8, uint8_t>;
1183
TEST_P(ConvolveScaleTest8bpp,FixedValues)1184 TEST_P(ConvolveScaleTest8bpp, FixedValues) {
1185 Test(true, 0);
1186 Test(true, 1);
1187 Test(true, 128);
1188 Test(true, 255);
1189 }
1190
TEST_P(ConvolveScaleTest8bpp,RandomValues)1191 TEST_P(ConvolveScaleTest8bpp, RandomValues) { Test(false, 0); }
1192
TEST_P(ConvolveScaleTest8bpp,DISABLED_Speed)1193 TEST_P(ConvolveScaleTest8bpp, DISABLED_Speed) {
1194 const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
1195 Test(false, 0, num_runs);
1196 }
1197
1198 //------------------------------------------------------------------------------
1199 const ConvolveTestParam kConvolveParam[] = {
1200 ConvolveTestParam(ConvolveTestParam::kBlockSize2x2),
1201 ConvolveTestParam(ConvolveTestParam::kBlockSize2x4),
1202 ConvolveTestParam(ConvolveTestParam::kBlockSize4x2),
1203 ConvolveTestParam(ConvolveTestParam::kBlockSize4x4),
1204 ConvolveTestParam(ConvolveTestParam::kBlockSize4x8),
1205 ConvolveTestParam(ConvolveTestParam::kBlockSize8x2),
1206 ConvolveTestParam(ConvolveTestParam::kBlockSize8x4),
1207 ConvolveTestParam(ConvolveTestParam::kBlockSize8x8),
1208 ConvolveTestParam(ConvolveTestParam::kBlockSize8x16),
1209 ConvolveTestParam(ConvolveTestParam::kBlockSize16x8),
1210 ConvolveTestParam(ConvolveTestParam::kBlockSize16x16),
1211 ConvolveTestParam(ConvolveTestParam::kBlockSize16x32),
1212 ConvolveTestParam(ConvolveTestParam::kBlockSize32x16),
1213 ConvolveTestParam(ConvolveTestParam::kBlockSize32x32),
1214 ConvolveTestParam(ConvolveTestParam::kBlockSize32x64),
1215 ConvolveTestParam(ConvolveTestParam::kBlockSize64x32),
1216 ConvolveTestParam(ConvolveTestParam::kBlockSize64x64),
1217 ConvolveTestParam(ConvolveTestParam::kBlockSize64x128),
1218 ConvolveTestParam(ConvolveTestParam::kBlockSize128x64),
1219 ConvolveTestParam(ConvolveTestParam::kBlockSize128x128),
1220 };
1221
1222 const ConvolveTypeParam kConvolveTypeParam[] = {
1223 ConvolveTypeParam(false, false, false, false),
1224 ConvolveTypeParam(false, false, false, true),
1225 ConvolveTypeParam(false, false, true, false),
1226 ConvolveTypeParam(false, false, true, true),
1227 ConvolveTypeParam(false, true, false, false),
1228 ConvolveTypeParam(false, true, false, true),
1229 ConvolveTypeParam(false, true, true, false),
1230 ConvolveTypeParam(false, true, true, true),
1231 ConvolveTypeParam(true, false, false, false),
1232 ConvolveTypeParam(true, false, false, true),
1233 ConvolveTypeParam(true, false, true, false),
1234 ConvolveTypeParam(true, false, true, true),
1235 // This is left to ensure no function exists for |intra_block_copy| when
1236 // |is_compound| is true; all combinations aren't necessary.
1237 ConvolveTypeParam(true, true, false, false),
1238 };
1239
1240 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest8bpp,
1241 testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1242 testing::ValuesIn(kConvolveParam)));
1243 INSTANTIATE_TEST_SUITE_P(C, ConvolveScaleTest8bpp,
1244 testing::Combine(testing::Bool(),
1245 testing::ValuesIn(kConvolveParam)));
1246
1247 #if LIBGAV1_ENABLE_NEON
1248 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest8bpp,
1249 testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1250 testing::ValuesIn(kConvolveParam)));
1251 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScaleTest8bpp,
1252 testing::Combine(testing::Bool(),
1253 testing::ValuesIn(kConvolveParam)));
1254 #endif // LIBGAV1_ENABLE_NEON
1255
1256 #if LIBGAV1_ENABLE_SSE4_1
1257 INSTANTIATE_TEST_SUITE_P(SSE41, ConvolveTest8bpp,
1258 testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1259 testing::ValuesIn(kConvolveParam)));
1260 INSTANTIATE_TEST_SUITE_P(SSE41, ConvolveScaleTest8bpp,
1261 testing::Combine(testing::Bool(),
1262 testing::ValuesIn(kConvolveParam)));
1263 #endif // LIBGAV1_ENABLE_SSE4_1
1264
1265 #if LIBGAV1_ENABLE_AVX2
1266 INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveTest8bpp,
1267 testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1268 testing::ValuesIn(kConvolveParam)));
1269 INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveScaleTest8bpp,
1270 testing::Combine(testing::Bool(),
1271 testing::ValuesIn(kConvolveParam)));
1272 #endif // LIBGAV1_ENABLE_AVX2
1273
1274 #if LIBGAV1_MAX_BITDEPTH >= 10
1275 using ConvolveTest10bpp = ConvolveTest<10, uint16_t>;
1276
TEST_P(ConvolveTest10bpp,FixedValues)1277 TEST_P(ConvolveTest10bpp, FixedValues) {
1278 Test(true, 0);
1279 Test(true, 1);
1280 Test(true, 128);
1281 Test(true, (1 << 10) - 1);
1282 }
1283
TEST_P(ConvolveTest10bpp,RandomValues)1284 TEST_P(ConvolveTest10bpp, RandomValues) { Test(false, 0); }
1285
TEST_P(ConvolveTest10bpp,DISABLED_Speed)1286 TEST_P(ConvolveTest10bpp, DISABLED_Speed) {
1287 const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
1288 Test(false, 0, num_runs);
1289 }
1290
1291 using ConvolveScaleTest10bpp = ConvolveScaleTest<10, uint16_t>;
1292
TEST_P(ConvolveScaleTest10bpp,FixedValues)1293 TEST_P(ConvolveScaleTest10bpp, FixedValues) {
1294 Test(true, 0);
1295 Test(true, 1);
1296 Test(true, 128);
1297 Test(true, (1 << 10) - 1);
1298 }
1299
TEST_P(ConvolveScaleTest10bpp,RandomValues)1300 TEST_P(ConvolveScaleTest10bpp, RandomValues) { Test(false, 0); }
1301
TEST_P(ConvolveScaleTest10bpp,DISABLED_Speed)1302 TEST_P(ConvolveScaleTest10bpp, DISABLED_Speed) {
1303 const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
1304 Test(false, 0, num_runs);
1305 }
1306
1307 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest10bpp,
1308 testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1309 testing::ValuesIn(kConvolveParam)));
1310 INSTANTIATE_TEST_SUITE_P(C, ConvolveScaleTest10bpp,
1311 testing::Combine(testing::Bool(),
1312 testing::ValuesIn(kConvolveParam)));
1313
1314 #if LIBGAV1_ENABLE_NEON
1315 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveTest10bpp,
1316 testing::Combine(testing::ValuesIn(kConvolveTypeParam),
1317 testing::ValuesIn(kConvolveParam)));
1318 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScaleTest10bpp,
1319 testing::Combine(testing::Bool(),
1320 testing::ValuesIn(kConvolveParam)));
1321 #endif // LIBGAV1_ENABLE_NEON
1322
1323 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1324
1325 } // namespace
1326 } // namespace dsp
1327 } // namespace libgav1
1328