1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <array>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 #include <memory>
23
24 #include "src/buffer_pool.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/motion_vector.h"
28 #include "src/obu_parser.h"
29 #include "src/prediction_mask.h"
30 #include "src/tile.h"
31 #include "src/utils/array_2d.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/block_parameters_holder.h"
34 #include "src/utils/common.h"
35 #include "src/utils/constants.h"
36 #include "src/utils/logging.h"
37 #include "src/utils/memory.h"
38 #include "src/utils/types.h"
39 #include "src/warp_prediction.h"
40 #include "src/yuv_buffer.h"
41
42 namespace libgav1 {
43 namespace {
44
45 // Import all the constants in the anonymous namespace.
46 #include "src/inter_intra_masks.inc"
47
48 // Precision bits when scaling reference frames.
49 constexpr int kReferenceScaleShift = 14;
50 constexpr int kAngleStep = 3;
51 constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = {
52 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0};
53
54 // The following modes need both the left_column and top_row for intra
55 // prediction. For directional modes left/top requirement is inferred based on
56 // the prediction angle. For Dc modes, left/top requirement is inferred based on
57 // whether or not left/top is available.
58 constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth,
59 kPredictionModeSmoothHorizontal,
60 kPredictionModeSmoothVertical,
61 kPredictionModePaeth);
62
GetDirectionalIntraPredictorDerivative(const int angle)63 int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
64 assert(angle >= 3);
65 assert(angle <= 87);
66 return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
67 }
68
69 // Maps the block_size to an index as follows:
70 // kBlock8x8 => 0.
71 // kBlock8x16 => 1.
72 // kBlock8x32 => 2.
73 // kBlock16x8 => 3.
74 // kBlock16x16 => 4.
75 // kBlock16x32 => 5.
76 // kBlock32x8 => 6.
77 // kBlock32x16 => 7.
78 // kBlock32x32 => 8.
GetWedgeBlockSizeIndex(BlockSize block_size)79 int GetWedgeBlockSizeIndex(BlockSize block_size) {
80 assert(block_size >= kBlock8x8);
81 return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) -
82 static_cast<int>(block_size >= kBlock32x8);
83 }
84
85 // Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively.
GetInterIntraMaskLookupIndex(int dimension)86 int GetInterIntraMaskLookupIndex(int dimension) {
87 assert(dimension == 4 || dimension == 8 || dimension == 16 ||
88 dimension == 32);
89 return FloorLog2(dimension) - 2;
90 }
91
92 // 7.11.2.9.
GetIntraEdgeFilterStrength(int width,int height,int filter_type,int delta)93 int GetIntraEdgeFilterStrength(int width, int height, int filter_type,
94 int delta) {
95 const int sum = width + height;
96 delta = std::abs(delta);
97 if (filter_type == 0) {
98 if (sum <= 8) {
99 if (delta >= 56) return 1;
100 } else if (sum <= 16) {
101 if (delta >= 40) return 1;
102 } else if (sum <= 24) {
103 if (delta >= 32) return 3;
104 if (delta >= 16) return 2;
105 if (delta >= 8) return 1;
106 } else if (sum <= 32) {
107 if (delta >= 32) return 3;
108 if (delta >= 4) return 2;
109 return 1;
110 } else {
111 return 3;
112 }
113 } else {
114 if (sum <= 8) {
115 if (delta >= 64) return 2;
116 if (delta >= 40) return 1;
117 } else if (sum <= 16) {
118 if (delta >= 48) return 2;
119 if (delta >= 20) return 1;
120 } else if (sum <= 24) {
121 if (delta >= 4) return 3;
122 } else {
123 return 3;
124 }
125 }
126 return 0;
127 }
128
129 // 7.11.2.10.
DoIntraEdgeUpsampling(int width,int height,int filter_type,int delta)130 bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) {
131 const int sum = width + height;
132 delta = std::abs(delta);
133 // This function should not be called when the prediction angle is 90 or 180.
134 assert(delta != 0);
135 if (delta >= 40) return false;
136 return (filter_type == 1) ? sum <= 8 : sum <= 16;
137 }
138
139 constexpr uint8_t kQuantizedDistanceWeight[4][2] = {
140 {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}};
141
142 constexpr uint8_t kQuantizedDistanceLookup[4][2] = {
143 {9, 7}, {11, 5}, {12, 4}, {13, 3}};
144
GetDistanceWeights(const int distance[2],int weight[2])145 void GetDistanceWeights(const int distance[2], int weight[2]) {
146 // Note: distance[0] and distance[1] correspond to relative distance
147 // between current frame and reference frame [1] and [0], respectively.
148 const int order = static_cast<int>(distance[0] <= distance[1]);
149 if (distance[0] == 0 || distance[1] == 0) {
150 weight[0] = kQuantizedDistanceLookup[3][order];
151 weight[1] = kQuantizedDistanceLookup[3][1 - order];
152 } else {
153 int i;
154 for (i = 0; i < 3; ++i) {
155 const int weight_0 = kQuantizedDistanceWeight[i][order];
156 const int weight_1 = kQuantizedDistanceWeight[i][1 - order];
157 if (order == 0) {
158 if (distance[0] * weight_0 < distance[1] * weight_1) break;
159 } else {
160 if (distance[0] * weight_0 > distance[1] * weight_1) break;
161 }
162 }
163 weight[0] = kQuantizedDistanceLookup[i][order];
164 weight[1] = kQuantizedDistanceLookup[i][1 - order];
165 }
166 }
167
GetIntraPredictor(PredictionMode mode,bool has_left,bool has_top)168 dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left,
169 bool has_top) {
170 if (mode == kPredictionModeDc) {
171 if (has_left && has_top) {
172 return dsp::kIntraPredictorDc;
173 }
174 if (has_left) {
175 return dsp::kIntraPredictorDcLeft;
176 }
177 if (has_top) {
178 return dsp::kIntraPredictorDcTop;
179 }
180 return dsp::kIntraPredictorDcFill;
181 }
182 switch (mode) {
183 case kPredictionModePaeth:
184 return dsp::kIntraPredictorPaeth;
185 case kPredictionModeSmooth:
186 return dsp::kIntraPredictorSmooth;
187 case kPredictionModeSmoothVertical:
188 return dsp::kIntraPredictorSmoothVertical;
189 case kPredictionModeSmoothHorizontal:
190 return dsp::kIntraPredictorSmoothHorizontal;
191 default:
192 return dsp::kNumIntraPredictors;
193 }
194 }
195
GetStartPoint(Array2DView<uint8_t> * const buffer,const int plane,const int x,const int y,const int bitdepth)196 uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane,
197 const int x, const int y, const int bitdepth) {
198 #if LIBGAV1_MAX_BITDEPTH >= 10
199 if (bitdepth > 8) {
200 Array2DView<uint16_t> buffer16(
201 buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t),
202 reinterpret_cast<uint16_t*>(&buffer[plane][0][0]));
203 return reinterpret_cast<uint8_t*>(&buffer16[y][x]);
204 }
205 #endif // LIBGAV1_MAX_BITDEPTH >= 10
206 static_cast<void>(bitdepth);
207 return &buffer[plane][y][x];
208 }
209
GetPixelPositionFromHighScale(int start,int step,int offset)210 int GetPixelPositionFromHighScale(int start, int step, int offset) {
211 return (start + step * offset) >> kScaleSubPixelBits;
212 }
213
GetMaskBlendFunc(const dsp::Dsp & dsp,bool is_inter_intra,bool is_wedge_inter_intra,int subsampling_x,int subsampling_y)214 dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra,
215 bool is_wedge_inter_intra,
216 int subsampling_x, int subsampling_y) {
217 return (is_inter_intra && !is_wedge_inter_intra)
218 ? dsp.mask_blend[0][/*is_inter_intra=*/true]
219 : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra];
220 }
221
222 } // namespace
223
224 template <typename Pixel>
IntraPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool has_top_right,bool has_bottom_left,PredictionMode mode,TransformSize tx_size)225 void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
226 bool has_left, bool has_top, bool has_top_right,
227 bool has_bottom_left, PredictionMode mode,
228 TransformSize tx_size) {
229 const int width = kTransformWidth[tx_size];
230 const int height = kTransformHeight[tx_size];
231 const int x_shift = subsampling_x_[plane];
232 const int y_shift = subsampling_y_[plane];
233 const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
234 const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1;
235 // For performance reasons, do not initialize the following two buffers.
236 alignas(kMaxAlignment) Pixel top_row_data[160];
237 alignas(kMaxAlignment) Pixel left_column_data[160];
238 #if LIBGAV1_MSAN
239 if (IsDirectionalMode(mode)) {
240 memset(top_row_data, 0, sizeof(top_row_data));
241 memset(left_column_data, 0, sizeof(left_column_data));
242 }
243 #endif
244 // Some predictors use |top_row_data| and |left_column_data| with a negative
245 // offset to access pixels to the top-left of the current block. So have some
246 // space before the arrays to allow populating those without having to move
247 // the rest of the array.
248 Pixel* const top_row = top_row_data + 16;
249 Pixel* const left_column = left_column_data + 16;
250 const int bitdepth = sequence_header_.color_config.bitdepth;
251 const int top_and_left_size = width + height;
252 const bool is_directional_mode = IsDirectionalMode(mode);
253 const PredictionParameters& prediction_parameters =
254 *block.bp->prediction_parameters;
255 const bool use_filter_intra =
256 (plane == kPlaneY && prediction_parameters.use_filter_intra);
257 const int prediction_angle =
258 is_directional_mode
259 ? kPredictionModeToAngle[mode] +
260 prediction_parameters.angle_delta[GetPlaneType(plane)] *
261 kAngleStep
262 : 0;
263 // Directional prediction requires buffers larger than the width or height.
264 const int top_size = is_directional_mode ? top_and_left_size : width;
265 const int left_size = is_directional_mode ? top_and_left_size : height;
266 const int top_right_size =
267 is_directional_mode ? (has_top_right ? 2 : 1) * width : width;
268 const int bottom_left_size =
269 is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height;
270
271 Array2DView<Pixel> buffer(buffer_[plane].rows(),
272 buffer_[plane].columns() / sizeof(Pixel),
273 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
274 const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
275 (is_directional_mode && prediction_angle < 180) ||
276 (mode == kPredictionModeDc && has_top);
277 const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
278 (is_directional_mode && prediction_angle > 90) ||
279 (mode == kPredictionModeDc && has_left);
280
281 const Pixel* top_row_src = buffer[y - 1];
282
283 // Determine if we need to retrieve the top row from
284 // |intra_prediction_buffer_|.
285 if ((needs_top || needs_left) && use_intra_prediction_buffer_) {
286 // Superblock index of block.row4x4. block.row4x4 is always in luma
287 // dimension (no subsampling).
288 const int current_superblock_index =
289 block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4);
290 // Superblock index of y - 1. y is in the plane dimension (chroma planes
291 // could be subsampled).
292 const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) -
293 subsampling_y_[plane];
294 const int top_row_superblock_index = (y - 1) >> plane_shift;
295 // If the superblock index of y - 1 is not that of the current superblock,
296 // then we will have to retrieve the top row from the
297 // |intra_prediction_buffer_|.
298 if (current_superblock_index != top_row_superblock_index) {
299 top_row_src = reinterpret_cast<const Pixel*>(
300 (*intra_prediction_buffer_)[plane].get());
301 }
302 }
303
304 if (needs_top) {
305 // Compute top_row.
306 if (has_top || has_left) {
307 const int left_index = has_left ? x - 1 : x;
308 top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index];
309 } else {
310 top_row[-1] = 1 << (bitdepth - 1);
311 }
312 if (!has_top && has_left) {
313 Memset(top_row, buffer[y][x - 1], top_size);
314 } else if (!has_top && !has_left) {
315 Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size);
316 } else {
317 const int top_limit = std::min(max_x - x + 1, top_right_size);
318 memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel));
319 // Even though it is safe to call Memset with a size of 0, accessing
320 // top_row_src[top_limit - x + 1] is not allowed when this condition is
321 // false.
322 if (top_size - top_limit > 0) {
323 Memset(top_row + top_limit, top_row_src[top_limit + x - 1],
324 top_size - top_limit);
325 }
326 }
327 }
328 if (needs_left) {
329 // Compute left_column.
330 if (has_top || has_left) {
331 const int left_index = has_left ? x - 1 : x;
332 left_column[-1] =
333 has_top ? top_row_src[left_index] : buffer[y][left_index];
334 } else {
335 left_column[-1] = 1 << (bitdepth - 1);
336 }
337 if (!has_left && has_top) {
338 Memset(left_column, top_row_src[x], left_size);
339 } else if (!has_left && !has_top) {
340 Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size);
341 } else {
342 const int left_limit = std::min(max_y - y + 1, bottom_left_size);
343 for (int i = 0; i < left_limit; ++i) {
344 left_column[i] = buffer[y + i][x - 1];
345 }
346 // Even though it is safe to call Memset with a size of 0, accessing
347 // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is
348 // false.
349 if (left_size - left_limit > 0) {
350 Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1],
351 left_size - left_limit);
352 }
353 }
354 }
355 Pixel* const dest = &buffer[y][x];
356 const ptrdiff_t dest_stride = buffer_[plane].columns();
357 if (use_filter_intra) {
358 dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column,
359 prediction_parameters.filter_intra_mode, width,
360 height);
361 } else if (is_directional_mode) {
362 DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left,
363 needs_top, prediction_angle, width, height, max_x,
364 max_y, tx_size, top_row, left_column);
365 } else {
366 const dsp::IntraPredictor predictor =
367 GetIntraPredictor(mode, has_left, has_top);
368 assert(predictor != dsp::kNumIntraPredictors);
369 dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row,
370 left_column);
371 }
372 }
373
374 template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane,
375 int x, int y, bool has_left,
376 bool has_top, bool has_top_right,
377 bool has_bottom_left,
378 PredictionMode mode,
379 TransformSize tx_size);
380 #if LIBGAV1_MAX_BITDEPTH >= 10
381 template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
382 int x, int y, bool has_left,
383 bool has_top, bool has_top_right,
384 bool has_bottom_left,
385 PredictionMode mode,
386 TransformSize tx_size);
387 #endif
388
GetIntraEdgeFilterType(const Block & block,Plane plane) const389 int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
390 bool top;
391 bool left;
392 if (plane == kPlaneY) {
393 top = block.top_available[kPlaneY] &&
394 kPredictionModeSmoothMask.Contains(block.bp_top->y_mode);
395 left = block.left_available[kPlaneY] &&
396 kPredictionModeSmoothMask.Contains(block.bp_left->y_mode);
397 } else {
398 top = block.top_available[plane] &&
399 block.bp->prediction_parameters->chroma_top_uses_smooth_prediction;
400 left = block.left_available[plane] &&
401 block.bp->prediction_parameters->chroma_left_uses_smooth_prediction;
402 }
403 return static_cast<int>(top || left);
404 }
405
406 template <typename Pixel>
DirectionalPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool needs_left,bool needs_top,int prediction_angle,int width,int height,int max_x,int max_y,TransformSize tx_size,Pixel * const top_row,Pixel * const left_column)407 void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y,
408 bool has_left, bool has_top, bool needs_left,
409 bool needs_top, int prediction_angle,
410 int width, int height, int max_x, int max_y,
411 TransformSize tx_size, Pixel* const top_row,
412 Pixel* const left_column) {
413 Array2DView<Pixel> buffer(buffer_[plane].rows(),
414 buffer_[plane].columns() / sizeof(Pixel),
415 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
416 Pixel* const dest = &buffer[y][x];
417 const ptrdiff_t stride = buffer_[plane].columns();
418 if (prediction_angle == 90) {
419 dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical](
420 dest, stride, top_row, left_column);
421 return;
422 }
423 if (prediction_angle == 180) {
424 dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal](
425 dest, stride, top_row, left_column);
426 return;
427 }
428
429 bool upsampled_top = false;
430 bool upsampled_left = false;
431 if (sequence_header_.enable_intra_edge_filter) {
432 const int filter_type = GetIntraEdgeFilterType(block, plane);
433 if (prediction_angle > 90 && prediction_angle < 180 &&
434 (width + height) >= 24) {
435 // 7.11.2.7.
436 left_column[-1] = top_row[-1] = RightShiftWithRounding(
437 left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4);
438 }
439 if (has_top && needs_top) {
440 const int strength = GetIntraEdgeFilterStrength(
441 width, height, filter_type, prediction_angle - 90);
442 if (strength > 0) {
443 const int num_pixels = std::min(width, max_x - x + 1) +
444 ((prediction_angle < 90) ? height : 0) + 1;
445 dsp_.intra_edge_filter(top_row - 1, num_pixels, strength);
446 }
447 }
448 if (has_left && needs_left) {
449 const int strength = GetIntraEdgeFilterStrength(
450 width, height, filter_type, prediction_angle - 180);
451 if (strength > 0) {
452 const int num_pixels = std::min(height, max_y - y + 1) +
453 ((prediction_angle > 180) ? width : 0) + 1;
454 dsp_.intra_edge_filter(left_column - 1, num_pixels, strength);
455 }
456 }
457 upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type,
458 prediction_angle - 90);
459 if (upsampled_top && needs_top) {
460 const int num_pixels = width + ((prediction_angle < 90) ? height : 0);
461 dsp_.intra_edge_upsampler(top_row, num_pixels);
462 }
463 upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type,
464 prediction_angle - 180);
465 if (upsampled_left && needs_left) {
466 const int num_pixels = height + ((prediction_angle > 180) ? width : 0);
467 dsp_.intra_edge_upsampler(left_column, num_pixels);
468 }
469 }
470
471 if (prediction_angle < 90) {
472 const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle);
473 dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height,
474 dx, upsampled_top);
475 } else if (prediction_angle < 180) {
476 const int dx =
477 GetDirectionalIntraPredictorDerivative(180 - prediction_angle);
478 const int dy =
479 GetDirectionalIntraPredictorDerivative(prediction_angle - 90);
480 dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column,
481 width, height, dx, dy, upsampled_top,
482 upsampled_left);
483 } else {
484 assert(prediction_angle < 270);
485 const int dy =
486 GetDirectionalIntraPredictorDerivative(270 - prediction_angle);
487 dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width,
488 height, dy, upsampled_left);
489 }
490 }
491
492 template <typename Pixel>
PalettePrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const int x,const int y,const TransformSize tx_size)493 void Tile::PalettePrediction(const Block& block, const Plane plane,
494 const int start_x, const int start_y, const int x,
495 const int y, const TransformSize tx_size) {
496 const int tx_width = kTransformWidth[tx_size];
497 const int tx_height = kTransformHeight[tx_size];
498 const uint16_t* const palette =
499 block.bp->prediction_parameters->palette_mode_info.color[plane];
500 const PlaneType plane_type = GetPlaneType(plane);
501 const int x4 = MultiplyBy4(x);
502 const int y4 = MultiplyBy4(y);
503 Array2DView<Pixel> buffer(buffer_[plane].rows(),
504 buffer_[plane].columns() / sizeof(Pixel),
505 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
506 for (int row = 0; row < tx_height; ++row) {
507 assert(block.bp->prediction_parameters
508 ->color_index_map[plane_type][y4 + row] != nullptr);
509 for (int column = 0; column < tx_width; ++column) {
510 buffer[start_y + row][start_x + column] =
511 palette[block.bp->prediction_parameters
512 ->color_index_map[plane_type][y4 + row][x4 + column]];
513 }
514 }
515 }
516
517 template void Tile::PalettePrediction<uint8_t>(
518 const Block& block, const Plane plane, const int start_x, const int start_y,
519 const int x, const int y, const TransformSize tx_size);
520 #if LIBGAV1_MAX_BITDEPTH >= 10
521 template void Tile::PalettePrediction<uint16_t>(
522 const Block& block, const Plane plane, const int start_x, const int start_y,
523 const int x, const int y, const TransformSize tx_size);
524 #endif
525
526 template <typename Pixel>
ChromaFromLumaPrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const TransformSize tx_size)527 void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane,
528 const int start_x, const int start_y,
529 const TransformSize tx_size) {
530 const int subsampling_x = subsampling_x_[plane];
531 const int subsampling_y = subsampling_y_[plane];
532 const PredictionParameters& prediction_parameters =
533 *block.bp->prediction_parameters;
534 Array2DView<Pixel> y_buffer(
535 buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel),
536 reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0]));
537 if (!block.scratch_buffer->cfl_luma_buffer_valid) {
538 const int luma_x = start_x << subsampling_x;
539 const int luma_y = start_y << subsampling_y;
540 dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y](
541 block.scratch_buffer->cfl_luma_buffer,
542 prediction_parameters.max_luma_width - luma_x,
543 prediction_parameters.max_luma_height - luma_y,
544 reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]),
545 buffer_[kPlaneY].columns());
546 block.scratch_buffer->cfl_luma_buffer_valid = true;
547 }
548 Array2DView<Pixel> buffer(buffer_[plane].rows(),
549 buffer_[plane].columns() / sizeof(Pixel),
550 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
551 dsp_.cfl_intra_predictors[tx_size](
552 reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]),
553 buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer,
554 (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u
555 : prediction_parameters.cfl_alpha_v);
556 }
557
558 template void Tile::ChromaFromLumaPrediction<uint8_t>(
559 const Block& block, const Plane plane, const int start_x, const int start_y,
560 const TransformSize tx_size);
561 #if LIBGAV1_MAX_BITDEPTH >= 10
562 template void Tile::ChromaFromLumaPrediction<uint16_t>(
563 const Block& block, const Plane plane, const int start_x, const int start_y,
564 const TransformSize tx_size);
565 #endif
566
InterIntraPrediction(uint16_t * const prediction_0,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const PredictionParameters & prediction_parameters,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,uint8_t * const dest,const ptrdiff_t dest_stride)567 void Tile::InterIntraPrediction(
568 uint16_t* const prediction_0, const uint8_t* const prediction_mask,
569 const ptrdiff_t prediction_mask_stride,
570 const PredictionParameters& prediction_parameters,
571 const int prediction_width, const int prediction_height,
572 const int subsampling_x, const int subsampling_y, uint8_t* const dest,
573 const ptrdiff_t dest_stride) {
574 assert(prediction_mask != nullptr);
575 assert(prediction_parameters.compound_prediction_type ==
576 kCompoundPredictionTypeIntra ||
577 prediction_parameters.compound_prediction_type ==
578 kCompoundPredictionTypeWedge);
579 // The first buffer of InterIntra is from inter prediction.
580 // The second buffer is from intra prediction.
581 #if LIBGAV1_MAX_BITDEPTH >= 10
582 if (sequence_header_.color_config.bitdepth > 8) {
583 GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true,
584 prediction_parameters.is_wedge_inter_intra, subsampling_x,
585 subsampling_y)(
586 prediction_0, reinterpret_cast<uint16_t*>(dest),
587 dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride,
588 prediction_width, prediction_height, dest, dest_stride);
589 return;
590 }
591 #endif
592 const int function_index = prediction_parameters.is_wedge_inter_intra
593 ? subsampling_x + subsampling_y
594 : 0;
595 // |is_inter_intra| prediction values are stored in a Pixel buffer but it is
596 // currently declared as a uint16_t buffer.
597 // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and
598 // remove the reinterpret_cast.
599 dsp_.inter_intra_mask_blend_8bpp[function_index](
600 reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride,
601 prediction_mask, prediction_mask_stride, prediction_width,
602 prediction_height);
603 }
604
CompoundInterPrediction(const Block & block,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,const int candidate_row,const int candidate_column,uint8_t * dest,const ptrdiff_t dest_stride)605 void Tile::CompoundInterPrediction(
606 const Block& block, const uint8_t* const prediction_mask,
607 const ptrdiff_t prediction_mask_stride, const int prediction_width,
608 const int prediction_height, const int subsampling_x,
609 const int subsampling_y, const int candidate_row,
610 const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) {
611 const PredictionParameters& prediction_parameters =
612 *block.bp->prediction_parameters;
613
614 void* prediction[2];
615 #if LIBGAV1_MAX_BITDEPTH >= 10
616 const int bitdepth = sequence_header_.color_config.bitdepth;
617 if (bitdepth > 8) {
618 prediction[0] = block.scratch_buffer->prediction_buffer[0];
619 prediction[1] = block.scratch_buffer->prediction_buffer[1];
620 } else {
621 #endif
622 prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0];
623 prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1];
624 #if LIBGAV1_MAX_BITDEPTH >= 10
625 }
626 #endif
627
628 switch (prediction_parameters.compound_prediction_type) {
629 case kCompoundPredictionTypeWedge:
630 case kCompoundPredictionTypeDiffWeighted:
631 GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false,
632 prediction_parameters.is_wedge_inter_intra,
633 subsampling_x, subsampling_y)(
634 prediction[0], prediction[1],
635 /*prediction_stride=*/prediction_width, prediction_mask,
636 prediction_mask_stride, prediction_width, prediction_height, dest,
637 dest_stride);
638 break;
639 case kCompoundPredictionTypeDistance:
640 DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width,
641 prediction_height, candidate_row,
642 candidate_column, dest, dest_stride);
643 break;
644 default:
645 assert(prediction_parameters.compound_prediction_type ==
646 kCompoundPredictionTypeAverage);
647 dsp_.average_blend(prediction[0], prediction[1], prediction_width,
648 prediction_height, dest, dest_stride);
649 break;
650 }
651 }
652
GetWarpParams(const Block & block,const Plane plane,const int prediction_width,const int prediction_height,const PredictionParameters & prediction_parameters,const ReferenceFrameType reference_type,bool * const is_local_valid,GlobalMotion * const global_motion_params,GlobalMotion * const local_warp_params) const653 GlobalMotion* Tile::GetWarpParams(
654 const Block& block, const Plane plane, const int prediction_width,
655 const int prediction_height,
656 const PredictionParameters& prediction_parameters,
657 const ReferenceFrameType reference_type, bool* const is_local_valid,
658 GlobalMotion* const global_motion_params,
659 GlobalMotion* const local_warp_params) const {
660 if (prediction_width < 8 || prediction_height < 8 ||
661 frame_header_.force_integer_mv == 1) {
662 return nullptr;
663 }
664 if (plane == kPlaneY) {
665 *is_local_valid =
666 prediction_parameters.motion_mode == kMotionModeLocalWarp &&
667 WarpEstimation(
668 prediction_parameters.num_warp_samples, DivideBy4(prediction_width),
669 DivideBy4(prediction_height), block.row4x4, block.column4x4,
670 block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates,
671 local_warp_params) &&
672 SetupShear(local_warp_params);
673 }
674 if (prediction_parameters.motion_mode == kMotionModeLocalWarp &&
675 *is_local_valid) {
676 return local_warp_params;
677 }
678 if (!IsScaled(reference_type)) {
679 GlobalMotionTransformationType global_motion_type =
680 (reference_type != kReferenceFrameIntra)
681 ? global_motion_params->type
682 : kNumGlobalMotionTransformationTypes;
683 const bool is_global_valid =
684 IsGlobalMvBlock(*block.bp, global_motion_type) &&
685 SetupShear(global_motion_params);
686 // Valid global motion type implies reference type can't be intra.
687 assert(!is_global_valid || reference_type != kReferenceFrameIntra);
688 if (is_global_valid) return global_motion_params;
689 }
690 return nullptr;
691 }
692
InterPrediction(const Block & block,const Plane plane,const int x,const int y,const int prediction_width,const int prediction_height,int candidate_row,int candidate_column,bool * const is_local_valid,GlobalMotion * const local_warp_params)693 bool Tile::InterPrediction(const Block& block, const Plane plane, const int x,
694 const int y, const int prediction_width,
695 const int prediction_height, int candidate_row,
696 int candidate_column, bool* const is_local_valid,
697 GlobalMotion* const local_warp_params) {
698 const int bitdepth = sequence_header_.color_config.bitdepth;
699 const BlockParameters& bp = *block.bp;
700 const BlockParameters& bp_reference =
701 *block_parameters_holder_.Find(candidate_row, candidate_column);
702 const bool is_compound =
703 bp_reference.reference_frame[1] > kReferenceFrameIntra;
704 assert(bp.is_inter);
705 const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra;
706
707 const PredictionParameters& prediction_parameters =
708 *block.bp->prediction_parameters;
709 uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth);
710 const ptrdiff_t dest_stride = buffer_[plane].columns(); // In bytes.
711 for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) {
712 const ReferenceFrameType reference_type =
713 bp_reference.reference_frame[index];
714 GlobalMotion global_motion_params =
715 frame_header_.global_motion[reference_type];
716 GlobalMotion* warp_params =
717 GetWarpParams(block, plane, prediction_width, prediction_height,
718 prediction_parameters, reference_type, is_local_valid,
719 &global_motion_params, local_warp_params);
720 if (warp_params != nullptr) {
721 if (!BlockWarpProcess(block, plane, index, x, y, prediction_width,
722 prediction_height, warp_params, is_compound,
723 is_inter_intra, dest, dest_stride)) {
724 return false;
725 }
726 } else {
727 const int reference_index =
728 prediction_parameters.use_intra_block_copy
729 ? -1
730 : frame_header_.reference_frame_index[reference_type -
731 kReferenceFrameLast];
732 if (!BlockInterPrediction(
733 block, plane, reference_index, bp_reference.mv.mv[index], x, y,
734 prediction_width, prediction_height, candidate_row,
735 candidate_column, block.scratch_buffer->prediction_buffer[index],
736 is_compound, is_inter_intra, dest, dest_stride)) {
737 return false;
738 }
739 }
740 }
741
742 const int subsampling_x = subsampling_x_[plane];
743 const int subsampling_y = subsampling_y_[plane];
744 ptrdiff_t prediction_mask_stride = 0;
745 const uint8_t* prediction_mask = nullptr;
746 if (prediction_parameters.compound_prediction_type ==
747 kCompoundPredictionTypeWedge) {
748 const Array2D<uint8_t>& wedge_mask =
749 wedge_masks_[GetWedgeBlockSizeIndex(block.size)]
750 [prediction_parameters.wedge_sign]
751 [prediction_parameters.wedge_index];
752 prediction_mask = wedge_mask[0];
753 prediction_mask_stride = wedge_mask.columns();
754 } else if (prediction_parameters.compound_prediction_type ==
755 kCompoundPredictionTypeIntra) {
756 // 7.11.3.13. The inter intra masks are precomputed and stored as a set of
757 // look up tables.
758 assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes);
759 prediction_mask =
760 kInterIntraMasks[prediction_parameters.inter_intra_mode]
761 [GetInterIntraMaskLookupIndex(prediction_width)]
762 [GetInterIntraMaskLookupIndex(prediction_height)];
763 prediction_mask_stride = prediction_width;
764 } else if (prediction_parameters.compound_prediction_type ==
765 kCompoundPredictionTypeDiffWeighted) {
766 if (plane == kPlaneY) {
767 assert(prediction_width >= 8);
768 assert(prediction_height >= 8);
769 dsp_.weight_mask[FloorLog2(prediction_width) - 3]
770 [FloorLog2(prediction_height) - 3]
771 [static_cast<int>(prediction_parameters.mask_is_inverse)](
772 block.scratch_buffer->prediction_buffer[0],
773 block.scratch_buffer->prediction_buffer[1],
774 block.scratch_buffer->weight_mask,
775 kMaxSuperBlockSizeInPixels);
776 }
777 prediction_mask = block.scratch_buffer->weight_mask;
778 prediction_mask_stride = kMaxSuperBlockSizeInPixels;
779 }
780
781 if (is_compound) {
782 CompoundInterPrediction(block, prediction_mask, prediction_mask_stride,
783 prediction_width, prediction_height, subsampling_x,
784 subsampling_y, candidate_row, candidate_column,
785 dest, dest_stride);
786 } else if (prediction_parameters.motion_mode == kMotionModeObmc) {
787 // Obmc mode is allowed only for single reference (!is_compound).
788 return ObmcPrediction(block, plane, prediction_width, prediction_height);
789 } else if (is_inter_intra) {
790 // InterIntra and obmc must be mutually exclusive.
791 InterIntraPrediction(
792 block.scratch_buffer->prediction_buffer[0], prediction_mask,
793 prediction_mask_stride, prediction_parameters, prediction_width,
794 prediction_height, subsampling_x, subsampling_y, dest, dest_stride);
795 }
796 return true;
797 }
798
ObmcBlockPrediction(const Block & block,const MotionVector & mv,const Plane plane,const int reference_frame_index,const int width,const int height,const int x,const int y,const int candidate_row,const int candidate_column,const ObmcDirection blending_direction)799 bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv,
800 const Plane plane,
801 const int reference_frame_index, const int width,
802 const int height, const int x, const int y,
803 const int candidate_row,
804 const int candidate_column,
805 const ObmcDirection blending_direction) {
806 const int bitdepth = sequence_header_.color_config.bitdepth;
807 // Obmc's prediction needs to be clipped before blending with above/left
808 // prediction blocks.
809 // Obmc prediction is used only when is_compound is false. So it is safe to
810 // use prediction_buffer[1] as a temporary buffer for the Obmc prediction.
811 static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >=
812 64 * 64 * sizeof(uint16_t),
813 "");
814 auto* const obmc_buffer =
815 reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]);
816 const ptrdiff_t obmc_buffer_stride =
817 (bitdepth == 8) ? width : width * sizeof(uint16_t);
818 if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y,
819 width, height, candidate_row, candidate_column,
820 nullptr, false, false, obmc_buffer,
821 obmc_buffer_stride)) {
822 return false;
823 }
824
825 uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth);
826 const ptrdiff_t prediction_stride = buffer_[plane].columns();
827 dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width,
828 height, obmc_buffer, obmc_buffer_stride);
829 return true;
830 }
831
ObmcPrediction(const Block & block,const Plane plane,const int width,const int height)832 bool Tile::ObmcPrediction(const Block& block, const Plane plane,
833 const int width, const int height) {
834 const int subsampling_x = subsampling_x_[plane];
835 const int subsampling_y = subsampling_y_[plane];
836 if (block.top_available[kPlaneY] &&
837 !IsBlockSmallerThan8x8(block.residual_size[plane])) {
838 const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]);
839 const int column4x4_max =
840 std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
841 const int candidate_row = block.row4x4 - 1;
842 const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y;
843 int column4x4 = block.column4x4;
844 const int prediction_height = std::min(height >> 1, 32 >> subsampling_y);
845 for (int i = 0, step; i < num_limit && column4x4 < column4x4_max;
846 column4x4 += step) {
847 const int candidate_column = column4x4 | 1;
848 const BlockParameters& bp_top =
849 *block_parameters_holder_.Find(candidate_row, candidate_column);
850 const int candidate_block_size = bp_top.size;
851 step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16);
852 if (bp_top.reference_frame[0] > kReferenceFrameIntra) {
853 i++;
854 const int candidate_reference_frame_index =
855 frame_header_.reference_frame_index[bp_top.reference_frame[0] -
856 kReferenceFrameLast];
857 const int prediction_width =
858 std::min(width, MultiplyBy4(step) >> subsampling_x);
859 if (!ObmcBlockPrediction(
860 block, bp_top.mv.mv[0], plane, candidate_reference_frame_index,
861 prediction_width, prediction_height,
862 MultiplyBy4(column4x4) >> subsampling_x, block_start_y,
863 candidate_row, candidate_column, kObmcDirectionVertical)) {
864 return false;
865 }
866 }
867 }
868 }
869
870 if (block.left_available[kPlaneY]) {
871 const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]);
872 const int row4x4_max =
873 std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
874 const int candidate_column = block.column4x4 - 1;
875 int row4x4 = block.row4x4;
876 const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x;
877 const int prediction_width = std::min(width >> 1, 32 >> subsampling_x);
878 for (int i = 0, step; i < num_limit && row4x4 < row4x4_max;
879 row4x4 += step) {
880 const int candidate_row = row4x4 | 1;
881 const BlockParameters& bp_left =
882 *block_parameters_holder_.Find(candidate_row, candidate_column);
883 const int candidate_block_size = bp_left.size;
884 step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16);
885 if (bp_left.reference_frame[0] > kReferenceFrameIntra) {
886 i++;
887 const int candidate_reference_frame_index =
888 frame_header_.reference_frame_index[bp_left.reference_frame[0] -
889 kReferenceFrameLast];
890 const int prediction_height =
891 std::min(height, MultiplyBy4(step) >> subsampling_y);
892 if (!ObmcBlockPrediction(
893 block, bp_left.mv.mv[0], plane, candidate_reference_frame_index,
894 prediction_width, prediction_height, block_start_x,
895 MultiplyBy4(row4x4) >> subsampling_y, candidate_row,
896 candidate_column, kObmcDirectionHorizontal)) {
897 return false;
898 }
899 }
900 }
901 }
902 return true;
903 }
904
DistanceWeightedPrediction(void * prediction_0,void * prediction_1,const int width,const int height,const int candidate_row,const int candidate_column,uint8_t * dest,ptrdiff_t dest_stride)905 void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
906 const int width, const int height,
907 const int candidate_row,
908 const int candidate_column, uint8_t* dest,
909 ptrdiff_t dest_stride) {
910 int distance[2];
911 int weight[2];
912 for (int reference = 0; reference < 2; ++reference) {
913 const BlockParameters& bp =
914 *block_parameters_holder_.Find(candidate_row, candidate_column);
915 // Note: distance[0] and distance[1] correspond to relative distance
916 // between current frame and reference frame [1] and [0], respectively.
917 distance[1 - reference] = std::min(
918 std::abs(static_cast<int>(
919 current_frame_.reference_info()
920 ->relative_distance_from[bp.reference_frame[reference]])),
921 static_cast<int>(kMaxFrameDistance));
922 }
923 GetDistanceWeights(distance, weight);
924
925 dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1],
926 width, height, dest, dest_stride);
927 }
928
ScaleMotionVector(const MotionVector & mv,const Plane plane,const int reference_frame_index,const int x,const int y,int * const start_x,int * const start_y,int * const step_x,int * const step_y)929 void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
930 const int reference_frame_index, const int x,
931 const int y, int* const start_x,
932 int* const start_y, int* const step_x,
933 int* const step_y) {
934 const int reference_upscaled_width =
935 (reference_frame_index == -1)
936 ? frame_header_.upscaled_width
937 : reference_frames_[reference_frame_index]->upscaled_width();
938 const int reference_height =
939 (reference_frame_index == -1)
940 ? frame_header_.height
941 : reference_frames_[reference_frame_index]->frame_height();
942 assert(2 * frame_header_.width >= reference_upscaled_width &&
943 2 * frame_header_.height >= reference_height &&
944 frame_header_.width <= 16 * reference_upscaled_width &&
945 frame_header_.height <= 16 * reference_height);
946 const bool is_scaled_x = reference_upscaled_width != frame_header_.width;
947 const bool is_scaled_y = reference_height != frame_header_.height;
948 const int half_sample = 1 << (kSubPixelBits - 1);
949 int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]);
950 int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]);
951 const int rounding_offset =
952 DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits));
953 if (is_scaled_x) {
954 const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) +
955 DivideBy2(frame_header_.width)) /
956 frame_header_.width;
957 *step_x = RightShiftWithRoundingSigned(
958 scale_x, kReferenceScaleShift - kScaleSubPixelBits);
959 orig_x += half_sample;
960 // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can
961 // be up to 15 bits. So we use int64_t to hold base_x.
962 const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x -
963 (half_sample << kReferenceScaleShift);
964 *start_x =
965 RightShiftWithRoundingSigned(
966 base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
967 rounding_offset;
968 } else {
969 *step_x = 1 << kScaleSubPixelBits;
970 *start_x = LeftShift(orig_x, 6) + rounding_offset;
971 }
972 if (is_scaled_y) {
973 const int scale_y = ((reference_height << kReferenceScaleShift) +
974 DivideBy2(frame_header_.height)) /
975 frame_header_.height;
976 *step_y = RightShiftWithRoundingSigned(
977 scale_y, kReferenceScaleShift - kScaleSubPixelBits);
978 orig_y += half_sample;
979 const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y -
980 (half_sample << kReferenceScaleShift);
981 *start_y =
982 RightShiftWithRoundingSigned(
983 base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
984 rounding_offset;
985 } else {
986 *step_y = 1 << kScaleSubPixelBits;
987 *start_y = LeftShift(orig_y, 6) + rounding_offset;
988 }
989 }
990
991 // static.
GetReferenceBlockPosition(const int reference_frame_index,const bool is_scaled,const int width,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int start_x,const int start_y,const int step_x,const int step_y,const int left_border,const int right_border,const int top_border,const int bottom_border,int * ref_block_start_x,int * ref_block_start_y,int * ref_block_end_x)992 bool Tile::GetReferenceBlockPosition(
993 const int reference_frame_index, const bool is_scaled, const int width,
994 const int height, const int ref_start_x, const int ref_last_x,
995 const int ref_start_y, const int ref_last_y, const int start_x,
996 const int start_y, const int step_x, const int step_y,
997 const int left_border, const int right_border, const int top_border,
998 const int bottom_border, int* ref_block_start_x, int* ref_block_start_y,
999 int* ref_block_end_x) {
1000 *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0);
1001 *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0);
1002 if (reference_frame_index == -1) {
1003 return false;
1004 }
1005 *ref_block_start_x -= kConvolveBorderLeftTop;
1006 *ref_block_start_y -= kConvolveBorderLeftTop;
1007 *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) +
1008 kConvolveBorderRight;
1009 int ref_block_end_y =
1010 GetPixelPositionFromHighScale(start_y, step_y, height - 1) +
1011 kConvolveBorderBottom;
1012 if (is_scaled) {
1013 const int block_height =
1014 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1015 kScaleSubPixelBits) +
1016 kSubPixelTaps;
1017 *ref_block_end_x += kConvolveScaleBorderRight - kConvolveBorderRight;
1018 ref_block_end_y = *ref_block_start_y + block_height - 1;
1019 }
1020 // Determines if we need to extend beyond the left/right/top/bottom border.
1021 return *ref_block_start_x < (ref_start_x - left_border) ||
1022 *ref_block_end_x > (ref_last_x + right_border) ||
1023 *ref_block_start_y < (ref_start_y - top_border) ||
1024 ref_block_end_y > (ref_last_y + bottom_border);
1025 }
1026
1027 // Builds a block as the input for convolve, by copying the content of
1028 // reference frame (either a decoded reference frame, or current frame).
1029 // |block_extended_width| is the combined width of the block and its borders.
1030 template <typename Pixel>
BuildConvolveBlock(const Plane plane,const int reference_frame_index,const bool is_scaled,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int step_y,const int ref_block_start_x,const int ref_block_end_x,const int ref_block_start_y,uint8_t * block_buffer,ptrdiff_t convolve_buffer_stride,ptrdiff_t block_extended_width)1031 void Tile::BuildConvolveBlock(
1032 const Plane plane, const int reference_frame_index, const bool is_scaled,
1033 const int height, const int ref_start_x, const int ref_last_x,
1034 const int ref_start_y, const int ref_last_y, const int step_y,
1035 const int ref_block_start_x, const int ref_block_end_x,
1036 const int ref_block_start_y, uint8_t* block_buffer,
1037 ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) {
1038 const YuvBuffer* const reference_buffer =
1039 (reference_frame_index == -1)
1040 ? current_frame_.buffer()
1041 : reference_frames_[reference_frame_index]->buffer();
1042 Array2DView<const Pixel> reference_block(
1043 reference_buffer->height(plane),
1044 reference_buffer->stride(plane) / sizeof(Pixel),
1045 reinterpret_cast<const Pixel*>(reference_buffer->data(plane)));
1046 auto* const block_head = reinterpret_cast<Pixel*>(block_buffer);
1047 convolve_buffer_stride /= sizeof(Pixel);
1048 int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom;
1049 if (is_scaled) {
1050 block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1051 kScaleSubPixelBits) +
1052 kSubPixelTaps;
1053 }
1054 const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x);
1055 const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y);
1056 const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x);
1057 const int block_width = copy_end_x - copy_start_x + 1;
1058 const bool extend_left = ref_block_start_x < ref_start_x;
1059 const bool extend_right = ref_block_end_x > ref_last_x;
1060 const bool out_of_left = copy_start_x > ref_block_end_x;
1061 const bool out_of_right = copy_end_x < ref_block_start_x;
1062 if (out_of_left || out_of_right) {
1063 const int ref_x = out_of_left ? copy_start_x : copy_end_x;
1064 Pixel* buf_ptr = block_head;
1065 for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1066 Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width);
1067 if (ref_block_start_y + y >= ref_start_y &&
1068 ref_block_start_y + y < ref_last_y) {
1069 ++ref_y;
1070 }
1071 buf_ptr += convolve_buffer_stride;
1072 }
1073 } else {
1074 Pixel* buf_ptr = block_head;
1075 const int left_width = copy_start_x - ref_block_start_x;
1076 for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1077 if (extend_left) {
1078 Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width);
1079 }
1080 memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x],
1081 block_width * sizeof(Pixel));
1082 if (extend_right) {
1083 Memset(buf_ptr + left_width + block_width,
1084 reference_block[ref_y][copy_end_x],
1085 block_extended_width - left_width - block_width);
1086 }
1087 if (ref_block_start_y + y >= ref_start_y &&
1088 ref_block_start_y + y < ref_last_y) {
1089 ++ref_y;
1090 }
1091 buf_ptr += convolve_buffer_stride;
1092 }
1093 }
1094 }
1095
BlockInterPrediction(const Block & block,const Plane plane,const int reference_frame_index,const MotionVector & mv,const int x,const int y,const int width,const int height,const int candidate_row,const int candidate_column,uint16_t * const prediction,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1096 bool Tile::BlockInterPrediction(
1097 const Block& block, const Plane plane, const int reference_frame_index,
1098 const MotionVector& mv, const int x, const int y, const int width,
1099 const int height, const int candidate_row, const int candidate_column,
1100 uint16_t* const prediction, const bool is_compound,
1101 const bool is_inter_intra, uint8_t* const dest,
1102 const ptrdiff_t dest_stride) {
1103 const BlockParameters& bp =
1104 *block_parameters_holder_.Find(candidate_row, candidate_column);
1105 int start_x;
1106 int start_y;
1107 int step_x;
1108 int step_y;
1109 ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y,
1110 &step_x, &step_y);
1111 const int horizontal_filter_index = bp.interpolation_filter[1];
1112 const int vertical_filter_index = bp.interpolation_filter[0];
1113 const int subsampling_x = subsampling_x_[plane];
1114 const int subsampling_y = subsampling_y_[plane];
1115 // reference_frame_index equal to -1 indicates using current frame as
1116 // reference.
1117 const YuvBuffer* const reference_buffer =
1118 (reference_frame_index == -1)
1119 ? current_frame_.buffer()
1120 : reference_frames_[reference_frame_index]->buffer();
1121 const int reference_upscaled_width =
1122 (reference_frame_index == -1)
1123 ? MultiplyBy4(frame_header_.columns4x4)
1124 : reference_frames_[reference_frame_index]->upscaled_width();
1125 const int reference_height =
1126 (reference_frame_index == -1)
1127 ? MultiplyBy4(frame_header_.rows4x4)
1128 : reference_frames_[reference_frame_index]->frame_height();
1129 const int ref_start_x = 0;
1130 const int ref_last_x =
1131 SubsampledValue(reference_upscaled_width, subsampling_x) - 1;
1132 const int ref_start_y = 0;
1133 const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1;
1134
1135 const bool is_scaled = (reference_frame_index != -1) &&
1136 (frame_header_.width != reference_upscaled_width ||
1137 frame_header_.height != reference_height);
1138 const int bitdepth = sequence_header_.color_config.bitdepth;
1139 const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
1140 int ref_block_start_x;
1141 int ref_block_start_y;
1142 int ref_block_end_x;
1143 const bool extend_block = GetReferenceBlockPosition(
1144 reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x,
1145 ref_start_y, ref_last_y, start_x, start_y, step_x, step_y,
1146 reference_buffer->left_border(plane),
1147 reference_buffer->right_border(plane),
1148 reference_buffer->top_border(plane),
1149 reference_buffer->bottom_border(plane), &ref_block_start_x,
1150 &ref_block_start_y, &ref_block_end_x);
1151
1152 // In frame parallel mode, ensure that the reference block has been decoded
1153 // and available for referencing.
1154 if (reference_frame_index != -1 && frame_parallel_) {
1155 int reference_y_max;
1156 if (is_scaled) {
1157 // TODO(vigneshv): For now, we wait for the entire reference frame to be
1158 // decoded if we are using scaled references. This will eventually be
1159 // fixed.
1160 reference_y_max = reference_height;
1161 } else {
1162 reference_y_max =
1163 std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y);
1164 // For U and V planes with subsampling, we need to multiply
1165 // reference_y_max by 2 since we only track the progress of Y planes.
1166 reference_y_max = LeftShift(reference_y_max, subsampling_y);
1167 }
1168 if (reference_frame_progress_cache_[reference_frame_index] <
1169 reference_y_max &&
1170 !reference_frames_[reference_frame_index]->WaitUntil(
1171 reference_y_max,
1172 &reference_frame_progress_cache_[reference_frame_index])) {
1173 return false;
1174 }
1175 }
1176
1177 const uint8_t* block_start = nullptr;
1178 ptrdiff_t convolve_buffer_stride;
1179 if (!extend_block) {
1180 const YuvBuffer* const reference_buffer =
1181 (reference_frame_index == -1)
1182 ? current_frame_.buffer()
1183 : reference_frames_[reference_frame_index]->buffer();
1184 convolve_buffer_stride = reference_buffer->stride(plane);
1185 if (reference_frame_index == -1 || is_scaled) {
1186 block_start = reference_buffer->data(plane) +
1187 ref_block_start_y * reference_buffer->stride(plane) +
1188 ref_block_start_x * pixel_size;
1189 } else {
1190 block_start = reference_buffer->data(plane) +
1191 (ref_block_start_y + kConvolveBorderLeftTop) *
1192 reference_buffer->stride(plane) +
1193 (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
1194 }
1195 } else {
1196 const int border_right =
1197 is_scaled ? kConvolveScaleBorderRight : kConvolveBorderRight;
1198 // The block width can be at most 2 times as much as current
1199 // block's width because of scaling.
1200 auto block_extended_width = Align<ptrdiff_t>(
1201 (2 * width + kConvolveBorderLeftTop + border_right) * pixel_size,
1202 kMaxAlignment);
1203 convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
1204 #if LIBGAV1_MAX_BITDEPTH >= 10
1205 if (bitdepth > 8) {
1206 BuildConvolveBlock<uint16_t>(
1207 plane, reference_frame_index, is_scaled, height, ref_start_x,
1208 ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1209 ref_block_end_x, ref_block_start_y,
1210 block.scratch_buffer->convolve_block_buffer.get(),
1211 convolve_buffer_stride, block_extended_width);
1212 } else {
1213 #endif
1214 BuildConvolveBlock<uint8_t>(
1215 plane, reference_frame_index, is_scaled, height, ref_start_x,
1216 ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1217 ref_block_end_x, ref_block_start_y,
1218 block.scratch_buffer->convolve_block_buffer.get(),
1219 convolve_buffer_stride, block_extended_width);
1220 #if LIBGAV1_MAX_BITDEPTH >= 10
1221 }
1222 #endif
1223 block_start = block.scratch_buffer->convolve_block_buffer.get() +
1224 (is_scaled ? 0
1225 : kConvolveBorderLeftTop * convolve_buffer_stride +
1226 kConvolveBorderLeftTop * pixel_size);
1227 }
1228
1229 void* const output =
1230 (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest);
1231 ptrdiff_t output_stride = (is_compound || is_inter_intra)
1232 ? /*prediction_stride=*/width
1233 : dest_stride;
1234 #if LIBGAV1_MAX_BITDEPTH >= 10
1235 // |is_inter_intra| calculations are written to the |prediction| buffer.
1236 // Unlike the |is_compound| calculations the output is Pixel and not uint16_t.
1237 // convolve_func() expects |output_stride| to be in bytes and not Pixels.
1238 // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to
1239 // account for this.
1240 if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1241 output_stride *= 2;
1242 }
1243 #endif
1244 assert(output != nullptr);
1245 if (is_scaled) {
1246 dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound];
1247 assert(convolve_func != nullptr);
1248
1249 convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1250 vertical_filter_index, start_x, start_y, step_x, step_y,
1251 width, height, output, output_stride);
1252 } else {
1253 const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask;
1254 const int vertical_filter_id = (start_y >> 6) & kSubPixelMask;
1255
1256 dsp::ConvolveFunc convolve_func =
1257 dsp_.convolve[reference_frame_index == -1][is_compound]
1258 [vertical_filter_id != 0][horizontal_filter_id != 0];
1259 assert(convolve_func != nullptr);
1260
1261 convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1262 vertical_filter_index, horizontal_filter_id,
1263 vertical_filter_id, width, height, output, output_stride);
1264 }
1265 return true;
1266 }
1267
BlockWarpProcess(const Block & block,const Plane plane,const int index,const int block_start_x,const int block_start_y,const int width,const int height,GlobalMotion * const warp_params,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1268 bool Tile::BlockWarpProcess(const Block& block, const Plane plane,
1269 const int index, const int block_start_x,
1270 const int block_start_y, const int width,
1271 const int height, GlobalMotion* const warp_params,
1272 const bool is_compound, const bool is_inter_intra,
1273 uint8_t* const dest, const ptrdiff_t dest_stride) {
1274 assert(width >= 8 && height >= 8);
1275 const BlockParameters& bp = *block.bp;
1276 const int reference_frame_index =
1277 frame_header_.reference_frame_index[bp.reference_frame[index] -
1278 kReferenceFrameLast];
1279 const uint8_t* const source =
1280 reference_frames_[reference_frame_index]->buffer()->data(plane);
1281 ptrdiff_t source_stride =
1282 reference_frames_[reference_frame_index]->buffer()->stride(plane);
1283 const int source_width =
1284 reference_frames_[reference_frame_index]->buffer()->width(plane);
1285 const int source_height =
1286 reference_frames_[reference_frame_index]->buffer()->height(plane);
1287 uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index];
1288
1289 // In frame parallel mode, ensure that the reference block has been decoded
1290 // and available for referencing.
1291 if (frame_parallel_) {
1292 int reference_y_max = -1;
1293 // Find out the maximum y-coordinate for warping.
1294 for (int start_y = block_start_y; start_y < block_start_y + height;
1295 start_y += 8) {
1296 for (int start_x = block_start_x; start_x < block_start_x + width;
1297 start_x += 8) {
1298 const int src_x = (start_x + 4) << subsampling_x_[plane];
1299 const int src_y = (start_y + 4) << subsampling_y_[plane];
1300 const int dst_y = src_x * warp_params->params[4] +
1301 src_y * warp_params->params[5] +
1302 warp_params->params[1];
1303 const int y4 = dst_y >> subsampling_y_[plane];
1304 const int iy4 = y4 >> kWarpedModelPrecisionBits;
1305 reference_y_max = std::max(iy4 + 8, reference_y_max);
1306 }
1307 }
1308 // For U and V planes with subsampling, we need to multiply reference_y_max
1309 // by 2 since we only track the progress of Y planes.
1310 reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]);
1311 if (reference_frame_progress_cache_[reference_frame_index] <
1312 reference_y_max &&
1313 !reference_frames_[reference_frame_index]->WaitUntil(
1314 reference_y_max,
1315 &reference_frame_progress_cache_[reference_frame_index])) {
1316 return false;
1317 }
1318 }
1319 if (is_compound) {
1320 dsp_.warp_compound(source, source_stride, source_width, source_height,
1321 warp_params->params, subsampling_x_[plane],
1322 subsampling_y_[plane], block_start_x, block_start_y,
1323 width, height, warp_params->alpha, warp_params->beta,
1324 warp_params->gamma, warp_params->delta, prediction,
1325 /*prediction_stride=*/width);
1326 } else {
1327 void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest;
1328 ptrdiff_t output_stride =
1329 is_inter_intra ? /*prediction_stride=*/width : dest_stride;
1330 #if LIBGAV1_MAX_BITDEPTH >= 10
1331 // |is_inter_intra| calculations are written to the |prediction| buffer.
1332 // Unlike the |is_compound| calculations the output is Pixel and not
1333 // uint16_t. warp_clip() expects |output_stride| to be in bytes and not
1334 // Pixels. |prediction_stride| is in units of uint16_t. Adjust
1335 // |output_stride| to account for this.
1336 if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1337 output_stride *= 2;
1338 }
1339 #endif
1340 dsp_.warp(source, source_stride, source_width, source_height,
1341 warp_params->params, subsampling_x_[plane], subsampling_y_[plane],
1342 block_start_x, block_start_y, width, height, warp_params->alpha,
1343 warp_params->beta, warp_params->gamma, warp_params->delta, output,
1344 output_stride);
1345 }
1346 return true;
1347 }
1348
1349 } // namespace libgav1
1350