1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/obmc.h"
16 #include "src/utils/cpu.h"
17
18 #if LIBGAV1_ENABLE_NEON
19
20 #include <arm_neon.h>
21
22 #include <algorithm>
23 #include <cassert>
24 #include <cstddef>
25 #include <cstdint>
26 #include <cstring>
27
28 #include "src/dsp/arm/common_neon.h"
29 #include "src/dsp/constants.h"
30 #include "src/dsp/dsp.h"
31 #include "src/utils/common.h"
32
33 namespace libgav1 {
34 namespace dsp {
35 namespace {
36 #include "src/dsp/obmc.inc"
37
38 } // namespace
39
40 namespace low_bitdepth {
41 namespace {
42
WriteObmcLine4(uint8_t * LIBGAV1_RESTRICT const pred,const uint8_t * LIBGAV1_RESTRICT const obmc_pred,const uint8x8_t pred_mask,const uint8x8_t obmc_pred_mask)43 inline void WriteObmcLine4(uint8_t* LIBGAV1_RESTRICT const pred,
44 const uint8_t* LIBGAV1_RESTRICT const obmc_pred,
45 const uint8x8_t pred_mask,
46 const uint8x8_t obmc_pred_mask) {
47 const uint8x8_t pred_val = Load4(pred);
48 const uint8x8_t obmc_pred_val = Load4(obmc_pred);
49 const uint16x8_t weighted_pred = vmull_u8(pred_mask, pred_val);
50 const uint8x8_t result =
51 vrshrn_n_u16(vmlal_u8(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
52 StoreLo4(pred, result);
53 }
54
OverlapBlendFromLeft2xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)55 inline void OverlapBlendFromLeft2xH_NEON(
56 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
57 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
58 const ptrdiff_t obmc_prediction_stride) {
59 const uint8x8_t mask_inverter = vdup_n_u8(64);
60 const uint8x8_t pred_mask = Load2(kObmcMask);
61 const uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
62 uint8x8_t pred_val = vdup_n_u8(0);
63 uint8x8_t obmc_pred_val = vdup_n_u8(0);
64 int y = 0;
65 do {
66 pred_val = Load2<0>(pred, pred_val);
67 const uint16x8_t weighted_pred = vmull_u8(pred_mask, pred_val);
68 obmc_pred_val = Load2<0>(obmc_pred, obmc_pred_val);
69 const uint8x8_t result =
70 vrshrn_n_u16(vmlal_u8(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
71 Store2<0>(pred, result);
72
73 pred += prediction_stride;
74 obmc_pred += obmc_prediction_stride;
75 } while (++y != height);
76 }
77
OverlapBlendFromLeft4xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)78 inline void OverlapBlendFromLeft4xH_NEON(
79 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
80 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
81 const ptrdiff_t obmc_prediction_stride) {
82 const uint8x8_t mask_inverter = vdup_n_u8(64);
83 const uint8x8_t pred_mask = Load4(kObmcMask + 2);
84 // 64 - mask
85 const uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
86 int y = 0;
87 do {
88 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
89 pred += prediction_stride;
90 obmc_pred += obmc_prediction_stride;
91
92 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
93 pred += prediction_stride;
94 obmc_pred += obmc_prediction_stride;
95
96 y += 2;
97 } while (y != height);
98 }
99
OverlapBlendFromLeft8xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)100 inline void OverlapBlendFromLeft8xH_NEON(
101 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
102 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
103 const ptrdiff_t obmc_prediction_stride) {
104 const uint8x8_t mask_inverter = vdup_n_u8(64);
105 const uint8x8_t pred_mask = vld1_u8(kObmcMask + 6);
106 // 64 - mask
107 const uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
108 int y = 0;
109 do {
110 const uint8x8_t pred_val = vld1_u8(pred);
111 const uint16x8_t weighted_pred = vmull_u8(pred_mask, pred_val);
112 const uint8x8_t obmc_pred_val = vld1_u8(obmc_pred);
113 const uint8x8_t result =
114 vrshrn_n_u16(vmlal_u8(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
115
116 vst1_u8(pred, result);
117 pred += prediction_stride;
118 obmc_pred += obmc_prediction_stride;
119 } while (++y != height);
120 }
121
OverlapBlendFromLeft_NEON(void * LIBGAV1_RESTRICT const prediction,const ptrdiff_t prediction_stride,const int width,const int height,const void * LIBGAV1_RESTRICT const obmc_prediction,const ptrdiff_t obmc_prediction_stride)122 void OverlapBlendFromLeft_NEON(
123 void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
124 const int width, const int height,
125 const void* LIBGAV1_RESTRICT const obmc_prediction,
126 const ptrdiff_t obmc_prediction_stride) {
127 auto* pred = static_cast<uint8_t*>(prediction);
128 const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
129 assert(width >= 2);
130 assert(height >= 4);
131
132 if (width == 2) {
133 OverlapBlendFromLeft2xH_NEON(pred, prediction_stride, height, obmc_pred,
134 obmc_prediction_stride);
135 return;
136 }
137 if (width == 4) {
138 OverlapBlendFromLeft4xH_NEON(pred, prediction_stride, height, obmc_pred,
139 obmc_prediction_stride);
140 return;
141 }
142 if (width == 8) {
143 OverlapBlendFromLeft8xH_NEON(pred, prediction_stride, height, obmc_pred,
144 obmc_prediction_stride);
145 return;
146 }
147 const uint8x16_t mask_inverter = vdupq_n_u8(64);
148 const uint8_t* mask = kObmcMask + width - 2;
149 int x = 0;
150 do {
151 pred = static_cast<uint8_t*>(prediction) + x;
152 obmc_pred = static_cast<const uint8_t*>(obmc_prediction) + x;
153 const uint8x16_t pred_mask = vld1q_u8(mask + x);
154 // 64 - mask
155 const uint8x16_t obmc_pred_mask = vsubq_u8(mask_inverter, pred_mask);
156 int y = 0;
157 do {
158 const uint8x16_t pred_val = vld1q_u8(pred);
159 const uint8x16_t obmc_pred_val = vld1q_u8(obmc_pred);
160 const uint16x8_t weighted_pred_lo =
161 vmull_u8(vget_low_u8(pred_mask), vget_low_u8(pred_val));
162 const uint8x8_t result_lo =
163 vrshrn_n_u16(vmlal_u8(weighted_pred_lo, vget_low_u8(obmc_pred_mask),
164 vget_low_u8(obmc_pred_val)),
165 6);
166 const uint16x8_t weighted_pred_hi =
167 vmull_u8(vget_high_u8(pred_mask), vget_high_u8(pred_val));
168 const uint8x8_t result_hi =
169 vrshrn_n_u16(vmlal_u8(weighted_pred_hi, vget_high_u8(obmc_pred_mask),
170 vget_high_u8(obmc_pred_val)),
171 6);
172 vst1q_u8(pred, vcombine_u8(result_lo, result_hi));
173
174 pred += prediction_stride;
175 obmc_pred += obmc_prediction_stride;
176 } while (++y < height);
177 x += 16;
178 } while (x < width);
179 }
180
OverlapBlendFromTop4x4_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride,const int height)181 inline void OverlapBlendFromTop4x4_NEON(
182 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
183 const uint8_t* LIBGAV1_RESTRICT obmc_pred,
184 const ptrdiff_t obmc_prediction_stride, const int height) {
185 uint8x8_t pred_mask = vdup_n_u8(kObmcMask[height - 2]);
186 const uint8x8_t mask_inverter = vdup_n_u8(64);
187 uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
188 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
189 pred += prediction_stride;
190 obmc_pred += obmc_prediction_stride;
191
192 if (height == 2) {
193 return;
194 }
195
196 pred_mask = vdup_n_u8(kObmcMask[3]);
197 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
198 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
199 pred += prediction_stride;
200 obmc_pred += obmc_prediction_stride;
201
202 pred_mask = vdup_n_u8(kObmcMask[4]);
203 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
204 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
205 }
206
OverlapBlendFromTop4xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)207 inline void OverlapBlendFromTop4xH_NEON(
208 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
209 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
210 const ptrdiff_t obmc_prediction_stride) {
211 if (height < 8) {
212 OverlapBlendFromTop4x4_NEON(pred, prediction_stride, obmc_pred,
213 obmc_prediction_stride, height);
214 return;
215 }
216 const uint8_t* mask = kObmcMask + height - 2;
217 const uint8x8_t mask_inverter = vdup_n_u8(64);
218 int y = 0;
219 // Compute 6 lines for height 8, or 12 lines for height 16. The remaining
220 // lines are unchanged as the corresponding mask value is 64.
221 do {
222 uint8x8_t pred_mask = vdup_n_u8(mask[y]);
223 uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
224 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
225 pred += prediction_stride;
226 obmc_pred += obmc_prediction_stride;
227
228 pred_mask = vdup_n_u8(mask[y + 1]);
229 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
230 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
231 pred += prediction_stride;
232 obmc_pred += obmc_prediction_stride;
233
234 pred_mask = vdup_n_u8(mask[y + 2]);
235 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
236 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
237 pred += prediction_stride;
238 obmc_pred += obmc_prediction_stride;
239
240 pred_mask = vdup_n_u8(mask[y + 3]);
241 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
242 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
243 pred += prediction_stride;
244 obmc_pred += obmc_prediction_stride;
245
246 pred_mask = vdup_n_u8(mask[y + 4]);
247 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
248 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
249 pred += prediction_stride;
250 obmc_pred += obmc_prediction_stride;
251
252 pred_mask = vdup_n_u8(mask[y + 5]);
253 obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
254 WriteObmcLine4(pred, obmc_pred, pred_mask, obmc_pred_mask);
255 pred += prediction_stride;
256 obmc_pred += obmc_prediction_stride;
257
258 // Increment for the right mask index.
259 y += 6;
260 } while (y < height - 4);
261 }
262
OverlapBlendFromTop8xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)263 inline void OverlapBlendFromTop8xH_NEON(
264 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
265 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
266 const ptrdiff_t obmc_prediction_stride) {
267 const uint8x8_t mask_inverter = vdup_n_u8(64);
268 const uint8_t* mask = kObmcMask + height - 2;
269 const int compute_height = height - (height >> 2);
270 int y = 0;
271 do {
272 const uint8x8_t pred_mask = vdup_n_u8(mask[y]);
273 // 64 - mask
274 const uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
275 const uint8x8_t pred_val = vld1_u8(pred);
276 const uint16x8_t weighted_pred = vmull_u8(pred_mask, pred_val);
277 const uint8x8_t obmc_pred_val = vld1_u8(obmc_pred);
278 const uint8x8_t result =
279 vrshrn_n_u16(vmlal_u8(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
280
281 vst1_u8(pred, result);
282 pred += prediction_stride;
283 obmc_pred += obmc_prediction_stride;
284 } while (++y != compute_height);
285 }
286
OverlapBlendFromTop_NEON(void * LIBGAV1_RESTRICT const prediction,const ptrdiff_t prediction_stride,const int width,const int height,const void * LIBGAV1_RESTRICT const obmc_prediction,const ptrdiff_t obmc_prediction_stride)287 void OverlapBlendFromTop_NEON(
288 void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
289 const int width, const int height,
290 const void* LIBGAV1_RESTRICT const obmc_prediction,
291 const ptrdiff_t obmc_prediction_stride) {
292 auto* pred = static_cast<uint8_t*>(prediction);
293 const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
294 assert(width >= 4);
295 assert(height >= 2);
296
297 if (width == 4) {
298 OverlapBlendFromTop4xH_NEON(pred, prediction_stride, height, obmc_pred,
299 obmc_prediction_stride);
300 return;
301 }
302
303 if (width == 8) {
304 OverlapBlendFromTop8xH_NEON(pred, prediction_stride, height, obmc_pred,
305 obmc_prediction_stride);
306 return;
307 }
308
309 const uint8_t* mask = kObmcMask + height - 2;
310 const uint8x8_t mask_inverter = vdup_n_u8(64);
311 // Stop when mask value becomes 64. This is inferred for 4xH.
312 const int compute_height = height - (height >> 2);
313 int y = 0;
314 do {
315 const uint8x8_t pred_mask = vdup_n_u8(mask[y]);
316 // 64 - mask
317 const uint8x8_t obmc_pred_mask = vsub_u8(mask_inverter, pred_mask);
318 int x = 0;
319 do {
320 const uint8x16_t pred_val = vld1q_u8(pred + x);
321 const uint8x16_t obmc_pred_val = vld1q_u8(obmc_pred + x);
322 const uint16x8_t weighted_pred_lo =
323 vmull_u8(pred_mask, vget_low_u8(pred_val));
324 const uint8x8_t result_lo =
325 vrshrn_n_u16(vmlal_u8(weighted_pred_lo, obmc_pred_mask,
326 vget_low_u8(obmc_pred_val)),
327 6);
328 const uint16x8_t weighted_pred_hi =
329 vmull_u8(pred_mask, vget_high_u8(pred_val));
330 const uint8x8_t result_hi =
331 vrshrn_n_u16(vmlal_u8(weighted_pred_hi, obmc_pred_mask,
332 vget_high_u8(obmc_pred_val)),
333 6);
334 vst1q_u8(pred + x, vcombine_u8(result_lo, result_hi));
335
336 x += 16;
337 } while (x < width);
338 pred += prediction_stride;
339 obmc_pred += obmc_prediction_stride;
340 } while (++y < compute_height);
341 }
342
Init8bpp()343 void Init8bpp() {
344 Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth8);
345 assert(dsp != nullptr);
346 dsp->obmc_blend[kObmcDirectionVertical] = OverlapBlendFromTop_NEON;
347 dsp->obmc_blend[kObmcDirectionHorizontal] = OverlapBlendFromLeft_NEON;
348 }
349
350 } // namespace
351 } // namespace low_bitdepth
352
353 #if LIBGAV1_MAX_BITDEPTH >= 10
354 namespace high_bitdepth {
355 namespace {
356
357 // This is a flat array of masks for each block dimension from 2 to 32. The
358 // starting index for each length is length-2. The value 64 leaves the result
359 // equal to |pred| and may be ignored if convenient. Vector loads may overrread
360 // values meant for larger sizes, but these values will be unused.
361 constexpr uint16_t kObmcMask[62] = {
362 // Obmc Mask 2
363 45, 64,
364 // Obmc Mask 4
365 39, 50, 59, 64,
366 // Obmc Mask 8
367 36, 42, 48, 53, 57, 61, 64, 64,
368 // Obmc Mask 16
369 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 64, 64, 64, 64,
370 // Obmc Mask 32
371 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, 56, 57, 58,
372 59, 60, 60, 61, 62, 64, 64, 64, 64, 64, 64, 64, 64};
373
BlendObmc2Or4(uint8_t * LIBGAV1_RESTRICT const pred,const uint8_t * LIBGAV1_RESTRICT const obmc_pred,const uint16x4_t pred_mask,const uint16x4_t obmc_pred_mask)374 inline uint16x4_t BlendObmc2Or4(uint8_t* LIBGAV1_RESTRICT const pred,
375 const uint8_t* LIBGAV1_RESTRICT const obmc_pred,
376 const uint16x4_t pred_mask,
377 const uint16x4_t obmc_pred_mask) {
378 const uint16x4_t pred_val = vld1_u16(reinterpret_cast<uint16_t*>(pred));
379 const uint16x4_t obmc_pred_val =
380 vld1_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
381 const uint16x4_t weighted_pred = vmul_u16(pred_mask, pred_val);
382 const uint16x4_t result =
383 vrshr_n_u16(vmla_u16(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
384 return result;
385 }
386
BlendObmc8(uint8_t * LIBGAV1_RESTRICT const pred,const uint8_t * LIBGAV1_RESTRICT const obmc_pred,const uint16x8_t pred_mask,const uint16x8_t obmc_pred_mask)387 inline uint16x8_t BlendObmc8(uint8_t* LIBGAV1_RESTRICT const pred,
388 const uint8_t* LIBGAV1_RESTRICT const obmc_pred,
389 const uint16x8_t pred_mask,
390 const uint16x8_t obmc_pred_mask) {
391 const uint16x8_t pred_val = vld1q_u16(reinterpret_cast<uint16_t*>(pred));
392 const uint16x8_t obmc_pred_val =
393 vld1q_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
394 const uint16x8_t weighted_pred = vmulq_u16(pred_mask, pred_val);
395 const uint16x8_t result =
396 vrshrq_n_u16(vmlaq_u16(weighted_pred, obmc_pred_mask, obmc_pred_val), 6);
397 return result;
398 }
399
OverlapBlendFromLeft2xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)400 inline void OverlapBlendFromLeft2xH_NEON(
401 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
402 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
403 const ptrdiff_t obmc_prediction_stride) {
404 const uint16x4_t mask_inverter = vdup_n_u16(64);
405 // Second two lanes unused.
406 const uint16x4_t pred_mask = vld1_u16(kObmcMask);
407 const uint16x4_t obmc_pred_mask = vsub_u16(mask_inverter, pred_mask);
408 int y = 0;
409 do {
410 const uint16x4_t result_0 =
411 BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
412 Store2<0>(reinterpret_cast<uint16_t*>(pred), result_0);
413
414 pred += prediction_stride;
415 obmc_pred += obmc_prediction_stride;
416
417 const uint16x4_t result_1 =
418 BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
419 Store2<0>(reinterpret_cast<uint16_t*>(pred), result_1);
420
421 pred += prediction_stride;
422 obmc_pred += obmc_prediction_stride;
423
424 y += 2;
425 } while (y != height);
426 }
427
OverlapBlendFromLeft4xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)428 inline void OverlapBlendFromLeft4xH_NEON(
429 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
430 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
431 const ptrdiff_t obmc_prediction_stride) {
432 const uint16x4_t mask_inverter = vdup_n_u16(64);
433 const uint16x4_t pred_mask = vld1_u16(kObmcMask + 2);
434 // 64 - mask
435 const uint16x4_t obmc_pred_mask = vsub_u16(mask_inverter, pred_mask);
436 int y = 0;
437 do {
438 const uint16x4_t result_0 =
439 BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
440 vst1_u16(reinterpret_cast<uint16_t*>(pred), result_0);
441 pred += prediction_stride;
442 obmc_pred += obmc_prediction_stride;
443
444 const uint16x4_t result_1 =
445 BlendObmc2Or4(pred, obmc_pred, pred_mask, obmc_pred_mask);
446 vst1_u16(reinterpret_cast<uint16_t*>(pred), result_1);
447 pred += prediction_stride;
448 obmc_pred += obmc_prediction_stride;
449
450 y += 2;
451 } while (y != height);
452 }
453
OverlapBlendFromLeft_NEON(void * LIBGAV1_RESTRICT const prediction,const ptrdiff_t prediction_stride,const int width,const int height,const void * LIBGAV1_RESTRICT const obmc_prediction,const ptrdiff_t obmc_prediction_stride)454 void OverlapBlendFromLeft_NEON(
455 void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
456 const int width, const int height,
457 const void* LIBGAV1_RESTRICT const obmc_prediction,
458 const ptrdiff_t obmc_prediction_stride) {
459 auto* pred = static_cast<uint8_t*>(prediction);
460 const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
461 assert(width >= 2);
462 assert(height >= 4);
463
464 if (width == 2) {
465 OverlapBlendFromLeft2xH_NEON(pred, prediction_stride, height, obmc_pred,
466 obmc_prediction_stride);
467 return;
468 }
469 if (width == 4) {
470 OverlapBlendFromLeft4xH_NEON(pred, prediction_stride, height, obmc_pred,
471 obmc_prediction_stride);
472 return;
473 }
474 const uint16x8_t mask_inverter = vdupq_n_u16(64);
475 const uint16_t* mask = kObmcMask + width - 2;
476 int x = 0;
477 do {
478 pred = reinterpret_cast<uint8_t*>(static_cast<uint16_t*>(prediction) + x);
479 obmc_pred = reinterpret_cast<const uint8_t*>(
480 static_cast<const uint16_t*>(obmc_prediction) + x);
481 const uint16x8_t pred_mask = vld1q_u16(mask + x);
482 // 64 - mask
483 const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
484 int y = 0;
485 do {
486 const uint16x8_t result =
487 BlendObmc8(pred, obmc_pred, pred_mask, obmc_pred_mask);
488 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
489
490 pred += prediction_stride;
491 obmc_pred += obmc_prediction_stride;
492 } while (++y < height);
493 x += 8;
494 } while (x < width);
495 }
496
497 template <int lane>
BlendObmcFromTop4(uint8_t * LIBGAV1_RESTRICT const pred,const uint8_t * LIBGAV1_RESTRICT const obmc_pred,const uint16x8_t pred_mask,const uint16x8_t obmc_pred_mask)498 inline uint16x4_t BlendObmcFromTop4(
499 uint8_t* LIBGAV1_RESTRICT const pred,
500 const uint8_t* LIBGAV1_RESTRICT const obmc_pred, const uint16x8_t pred_mask,
501 const uint16x8_t obmc_pred_mask) {
502 const uint16x4_t pred_val = vld1_u16(reinterpret_cast<uint16_t*>(pred));
503 const uint16x4_t obmc_pred_val =
504 vld1_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
505 const uint16x4_t weighted_pred = VMulLaneQU16<lane>(pred_val, pred_mask);
506 const uint16x4_t result = vrshr_n_u16(
507 VMlaLaneQU16<lane>(weighted_pred, obmc_pred_val, obmc_pred_mask), 6);
508 return result;
509 }
510
511 template <int lane>
BlendObmcFromTop8(uint8_t * LIBGAV1_RESTRICT const pred,const uint8_t * LIBGAV1_RESTRICT const obmc_pred,const uint16x8_t pred_mask,const uint16x8_t obmc_pred_mask)512 inline uint16x8_t BlendObmcFromTop8(
513 uint8_t* LIBGAV1_RESTRICT const pred,
514 const uint8_t* LIBGAV1_RESTRICT const obmc_pred, const uint16x8_t pred_mask,
515 const uint16x8_t obmc_pred_mask) {
516 const uint16x8_t pred_val = vld1q_u16(reinterpret_cast<uint16_t*>(pred));
517 const uint16x8_t obmc_pred_val =
518 vld1q_u16(reinterpret_cast<const uint16_t*>(obmc_pred));
519 const uint16x8_t weighted_pred = VMulQLaneQU16<lane>(pred_val, pred_mask);
520 const uint16x8_t result = vrshrq_n_u16(
521 VMlaQLaneQU16<lane>(weighted_pred, obmc_pred_val, obmc_pred_mask), 6);
522 return result;
523 }
524
OverlapBlendFromTop4x2Or4_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride,const int height)525 inline void OverlapBlendFromTop4x2Or4_NEON(
526 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
527 const uint8_t* LIBGAV1_RESTRICT obmc_pred,
528 const ptrdiff_t obmc_prediction_stride, const int height) {
529 const uint16x8_t pred_mask = vld1q_u16(&kObmcMask[height - 2]);
530 const uint16x8_t mask_inverter = vdupq_n_u16(64);
531 const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
532 uint16x4_t result =
533 BlendObmcFromTop4<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
534 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
535 pred += prediction_stride;
536 obmc_pred += obmc_prediction_stride;
537
538 if (height == 2) {
539 // Mask value is 64, meaning |pred| is unchanged.
540 return;
541 }
542
543 result = BlendObmcFromTop4<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
544 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
545 pred += prediction_stride;
546 obmc_pred += obmc_prediction_stride;
547
548 result = BlendObmcFromTop4<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
549 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
550 }
551
OverlapBlendFromTop4xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const int height,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride)552 inline void OverlapBlendFromTop4xH_NEON(
553 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
554 const int height, const uint8_t* LIBGAV1_RESTRICT obmc_pred,
555 const ptrdiff_t obmc_prediction_stride) {
556 if (height < 8) {
557 OverlapBlendFromTop4x2Or4_NEON(pred, prediction_stride, obmc_pred,
558 obmc_prediction_stride, height);
559 return;
560 }
561 const uint16_t* mask = kObmcMask + height - 2;
562 const uint16x8_t mask_inverter = vdupq_n_u16(64);
563 int y = 0;
564 // Compute 6 lines for height 8, or 12 lines for height 16. The remaining
565 // lines are unchanged as the corresponding mask value is 64.
566 do {
567 const uint16x8_t pred_mask = vld1q_u16(&mask[y]);
568 const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
569 uint16x4_t result =
570 BlendObmcFromTop4<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
571 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
572 pred += prediction_stride;
573 obmc_pred += obmc_prediction_stride;
574
575 result = BlendObmcFromTop4<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
576 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
577 pred += prediction_stride;
578 obmc_pred += obmc_prediction_stride;
579
580 result = BlendObmcFromTop4<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
581 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
582 pred += prediction_stride;
583 obmc_pred += obmc_prediction_stride;
584
585 result = BlendObmcFromTop4<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
586 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
587 pred += prediction_stride;
588 obmc_pred += obmc_prediction_stride;
589
590 result = BlendObmcFromTop4<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
591 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
592 pred += prediction_stride;
593 obmc_pred += obmc_prediction_stride;
594
595 result = BlendObmcFromTop4<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
596 vst1_u16(reinterpret_cast<uint16_t*>(pred), result);
597 pred += prediction_stride;
598 obmc_pred += obmc_prediction_stride;
599
600 // Increment for the right mask index.
601 y += 6;
602 } while (y < height - 4);
603 }
604
OverlapBlendFromTop8xH_NEON(uint8_t * LIBGAV1_RESTRICT pred,const ptrdiff_t prediction_stride,const uint8_t * LIBGAV1_RESTRICT obmc_pred,const ptrdiff_t obmc_prediction_stride,const int height)605 inline void OverlapBlendFromTop8xH_NEON(
606 uint8_t* LIBGAV1_RESTRICT pred, const ptrdiff_t prediction_stride,
607 const uint8_t* LIBGAV1_RESTRICT obmc_pred,
608 const ptrdiff_t obmc_prediction_stride, const int height) {
609 const uint16_t* mask = kObmcMask + height - 2;
610 const uint16x8_t mask_inverter = vdupq_n_u16(64);
611 uint16x8_t pred_mask = vld1q_u16(mask);
612 uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
613 uint16x8_t result =
614 BlendObmcFromTop8<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
615 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
616 if (height == 2) return;
617
618 pred += prediction_stride;
619 obmc_pred += obmc_prediction_stride;
620
621 result = BlendObmcFromTop8<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
622 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
623 pred += prediction_stride;
624 obmc_pred += obmc_prediction_stride;
625
626 result = BlendObmcFromTop8<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
627 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
628 pred += prediction_stride;
629 obmc_pred += obmc_prediction_stride;
630
631 result = BlendObmcFromTop8<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
632 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
633 if (height == 4) return;
634
635 pred += prediction_stride;
636 obmc_pred += obmc_prediction_stride;
637
638 result = BlendObmcFromTop8<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
639 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
640 pred += prediction_stride;
641 obmc_pred += obmc_prediction_stride;
642
643 result = BlendObmcFromTop8<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
644 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
645
646 if (height == 8) return;
647
648 pred += prediction_stride;
649 obmc_pred += obmc_prediction_stride;
650
651 result = BlendObmcFromTop8<6>(pred, obmc_pred, pred_mask, obmc_pred_mask);
652 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
653 pred += prediction_stride;
654 obmc_pred += obmc_prediction_stride;
655
656 result = BlendObmcFromTop8<7>(pred, obmc_pred, pred_mask, obmc_pred_mask);
657 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
658 pred += prediction_stride;
659 obmc_pred += obmc_prediction_stride;
660
661 pred_mask = vld1q_u16(&mask[8]);
662 obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
663
664 result = BlendObmcFromTop8<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
665 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
666 pred += prediction_stride;
667 obmc_pred += obmc_prediction_stride;
668
669 result = BlendObmcFromTop8<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
670 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
671 pred += prediction_stride;
672 obmc_pred += obmc_prediction_stride;
673
674 result = BlendObmcFromTop8<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
675 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
676 pred += prediction_stride;
677 obmc_pred += obmc_prediction_stride;
678
679 result = BlendObmcFromTop8<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
680 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
681
682 if (height == 16) return;
683
684 pred += prediction_stride;
685 obmc_pred += obmc_prediction_stride;
686
687 result = BlendObmcFromTop8<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
688 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
689 pred += prediction_stride;
690 obmc_pred += obmc_prediction_stride;
691
692 result = BlendObmcFromTop8<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
693 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
694 pred += prediction_stride;
695 obmc_pred += obmc_prediction_stride;
696
697 result = BlendObmcFromTop8<6>(pred, obmc_pred, pred_mask, obmc_pred_mask);
698 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
699 pred += prediction_stride;
700 obmc_pred += obmc_prediction_stride;
701
702 result = BlendObmcFromTop8<7>(pred, obmc_pred, pred_mask, obmc_pred_mask);
703 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
704 pred += prediction_stride;
705 obmc_pred += obmc_prediction_stride;
706
707 pred_mask = vld1q_u16(&mask[16]);
708 obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
709
710 result = BlendObmcFromTop8<0>(pred, obmc_pred, pred_mask, obmc_pred_mask);
711 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
712 pred += prediction_stride;
713 obmc_pred += obmc_prediction_stride;
714
715 result = BlendObmcFromTop8<1>(pred, obmc_pred, pred_mask, obmc_pred_mask);
716 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
717 pred += prediction_stride;
718 obmc_pred += obmc_prediction_stride;
719
720 result = BlendObmcFromTop8<2>(pred, obmc_pred, pred_mask, obmc_pred_mask);
721 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
722 pred += prediction_stride;
723 obmc_pred += obmc_prediction_stride;
724
725 result = BlendObmcFromTop8<3>(pred, obmc_pred, pred_mask, obmc_pred_mask);
726 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
727 pred += prediction_stride;
728 obmc_pred += obmc_prediction_stride;
729
730 result = BlendObmcFromTop8<4>(pred, obmc_pred, pred_mask, obmc_pred_mask);
731 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
732 pred += prediction_stride;
733 obmc_pred += obmc_prediction_stride;
734
735 result = BlendObmcFromTop8<5>(pred, obmc_pred, pred_mask, obmc_pred_mask);
736 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
737 pred += prediction_stride;
738 obmc_pred += obmc_prediction_stride;
739
740 result = BlendObmcFromTop8<6>(pred, obmc_pred, pred_mask, obmc_pred_mask);
741 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
742 pred += prediction_stride;
743 obmc_pred += obmc_prediction_stride;
744
745 result = BlendObmcFromTop8<7>(pred, obmc_pred, pred_mask, obmc_pred_mask);
746 vst1q_u16(reinterpret_cast<uint16_t*>(pred), result);
747 }
748
OverlapBlendFromTop_NEON(void * LIBGAV1_RESTRICT const prediction,const ptrdiff_t prediction_stride,const int width,const int height,const void * LIBGAV1_RESTRICT const obmc_prediction,const ptrdiff_t obmc_prediction_stride)749 void OverlapBlendFromTop_NEON(
750 void* LIBGAV1_RESTRICT const prediction, const ptrdiff_t prediction_stride,
751 const int width, const int height,
752 const void* LIBGAV1_RESTRICT const obmc_prediction,
753 const ptrdiff_t obmc_prediction_stride) {
754 auto* pred = static_cast<uint8_t*>(prediction);
755 const auto* obmc_pred = static_cast<const uint8_t*>(obmc_prediction);
756 assert(width >= 4);
757 assert(height >= 2);
758
759 if (width == 4) {
760 OverlapBlendFromTop4xH_NEON(pred, prediction_stride, height, obmc_pred,
761 obmc_prediction_stride);
762 return;
763 }
764
765 if (width == 8) {
766 OverlapBlendFromTop8xH_NEON(pred, prediction_stride, obmc_pred,
767 obmc_prediction_stride, height);
768 return;
769 }
770
771 const uint16_t* mask = kObmcMask + height - 2;
772 const uint16x8_t mask_inverter = vdupq_n_u16(64);
773 const uint16x8_t pred_mask = vld1q_u16(mask);
774 // 64 - mask
775 const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
776 #define OBMC_ROW_FROM_TOP(n) \
777 do { \
778 int x = 0; \
779 do { \
780 const uint16x8_t result = BlendObmcFromTop8<n>( \
781 reinterpret_cast<uint8_t*>(reinterpret_cast<uint16_t*>(pred) + x), \
782 reinterpret_cast<const uint8_t*>( \
783 reinterpret_cast<const uint16_t*>(obmc_pred) + x), \
784 pred_mask, obmc_pred_mask); \
785 vst1q_u16(reinterpret_cast<uint16_t*>(pred) + x, result); \
786 \
787 x += 8; \
788 } while (x < width); \
789 } while (false)
790
791 // Compute 1 row.
792 if (height == 2) {
793 OBMC_ROW_FROM_TOP(0);
794 return;
795 }
796
797 // Compute 3 rows.
798 if (height == 4) {
799 OBMC_ROW_FROM_TOP(0);
800 pred += prediction_stride;
801 obmc_pred += obmc_prediction_stride;
802 OBMC_ROW_FROM_TOP(1);
803 pred += prediction_stride;
804 obmc_pred += obmc_prediction_stride;
805 OBMC_ROW_FROM_TOP(2);
806 return;
807 }
808
809 // Compute 6 rows.
810 if (height == 8) {
811 OBMC_ROW_FROM_TOP(0);
812 pred += prediction_stride;
813 obmc_pred += obmc_prediction_stride;
814 OBMC_ROW_FROM_TOP(1);
815 pred += prediction_stride;
816 obmc_pred += obmc_prediction_stride;
817 OBMC_ROW_FROM_TOP(2);
818 pred += prediction_stride;
819 obmc_pred += obmc_prediction_stride;
820 OBMC_ROW_FROM_TOP(3);
821 pred += prediction_stride;
822 obmc_pred += obmc_prediction_stride;
823 OBMC_ROW_FROM_TOP(4);
824 pred += prediction_stride;
825 obmc_pred += obmc_prediction_stride;
826 OBMC_ROW_FROM_TOP(5);
827 return;
828 }
829
830 // Compute 12 rows.
831 if (height == 16) {
832 OBMC_ROW_FROM_TOP(0);
833 pred += prediction_stride;
834 obmc_pred += obmc_prediction_stride;
835 OBMC_ROW_FROM_TOP(1);
836 pred += prediction_stride;
837 obmc_pred += obmc_prediction_stride;
838 OBMC_ROW_FROM_TOP(2);
839 pred += prediction_stride;
840 obmc_pred += obmc_prediction_stride;
841 OBMC_ROW_FROM_TOP(3);
842 pred += prediction_stride;
843 obmc_pred += obmc_prediction_stride;
844 OBMC_ROW_FROM_TOP(4);
845 pred += prediction_stride;
846 obmc_pred += obmc_prediction_stride;
847 OBMC_ROW_FROM_TOP(5);
848 pred += prediction_stride;
849 obmc_pred += obmc_prediction_stride;
850 OBMC_ROW_FROM_TOP(6);
851 pred += prediction_stride;
852 obmc_pred += obmc_prediction_stride;
853 OBMC_ROW_FROM_TOP(7);
854 pred += prediction_stride;
855 obmc_pred += obmc_prediction_stride;
856
857 const uint16x8_t pred_mask = vld1q_u16(&mask[8]);
858 // 64 - mask
859 const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
860 OBMC_ROW_FROM_TOP(0);
861 pred += prediction_stride;
862 obmc_pred += obmc_prediction_stride;
863 OBMC_ROW_FROM_TOP(1);
864 pred += prediction_stride;
865 obmc_pred += obmc_prediction_stride;
866 OBMC_ROW_FROM_TOP(2);
867 pred += prediction_stride;
868 obmc_pred += obmc_prediction_stride;
869 OBMC_ROW_FROM_TOP(3);
870 return;
871 }
872
873 // Stop when mask value becomes 64. This is a multiple of 8 for height 32
874 // and 64.
875 const int compute_height = height - (height >> 2);
876 int y = 0;
877 do {
878 const uint16x8_t pred_mask = vld1q_u16(&mask[y]);
879 // 64 - mask
880 const uint16x8_t obmc_pred_mask = vsubq_u16(mask_inverter, pred_mask);
881 OBMC_ROW_FROM_TOP(0);
882 pred += prediction_stride;
883 obmc_pred += obmc_prediction_stride;
884 OBMC_ROW_FROM_TOP(1);
885 pred += prediction_stride;
886 obmc_pred += obmc_prediction_stride;
887 OBMC_ROW_FROM_TOP(2);
888 pred += prediction_stride;
889 obmc_pred += obmc_prediction_stride;
890 OBMC_ROW_FROM_TOP(3);
891 pred += prediction_stride;
892 obmc_pred += obmc_prediction_stride;
893 OBMC_ROW_FROM_TOP(4);
894 pred += prediction_stride;
895 obmc_pred += obmc_prediction_stride;
896 OBMC_ROW_FROM_TOP(5);
897 pred += prediction_stride;
898 obmc_pred += obmc_prediction_stride;
899 OBMC_ROW_FROM_TOP(6);
900 pred += prediction_stride;
901 obmc_pred += obmc_prediction_stride;
902 OBMC_ROW_FROM_TOP(7);
903 pred += prediction_stride;
904 obmc_pred += obmc_prediction_stride;
905
906 y += 8;
907 } while (y < compute_height);
908 }
909
Init10bpp()910 void Init10bpp() {
911 Dsp* const dsp = dsp_internal::GetWritableDspTable(kBitdepth10);
912 assert(dsp != nullptr);
913 dsp->obmc_blend[kObmcDirectionVertical] = OverlapBlendFromTop_NEON;
914 dsp->obmc_blend[kObmcDirectionHorizontal] = OverlapBlendFromLeft_NEON;
915 }
916
917 } // namespace
918 } // namespace high_bitdepth
919 #endif // LIBGAV1_MAX_BITDEPTH >= 10
920
ObmcInit_NEON()921 void ObmcInit_NEON() {
922 low_bitdepth::Init8bpp();
923 #if LIBGAV1_MAX_BITDEPTH >= 10
924 high_bitdepth::Init10bpp();
925 #endif
926 }
927
928 } // namespace dsp
929 } // namespace libgav1
930
931 #else // !LIBGAV1_ENABLE_NEON
932
933 namespace libgav1 {
934 namespace dsp {
935
ObmcInit_NEON()936 void ObmcInit_NEON() {}
937
938 } // namespace dsp
939 } // namespace libgav1
940 #endif // LIBGAV1_ENABLE_NEON
941