1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <array>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 #include <memory>
23
24 #include "src/buffer_pool.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/motion_vector.h"
28 #include "src/obu_parser.h"
29 #include "src/prediction_mask.h"
30 #include "src/tile.h"
31 #include "src/utils/array_2d.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/block_parameters_holder.h"
34 #include "src/utils/common.h"
35 #include "src/utils/constants.h"
36 #include "src/utils/logging.h"
37 #include "src/utils/memory.h"
38 #include "src/utils/types.h"
39 #include "src/warp_prediction.h"
40 #include "src/yuv_buffer.h"
41
42 namespace libgav1 {
43 namespace {
44
45 // Import all the constants in the anonymous namespace.
46 #include "src/inter_intra_masks.inc"
47
48 constexpr int kAngleStep = 3;
49 constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = {
50 0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0};
51
52 // The following modes need both the left_column and top_row for intra
53 // prediction. For directional modes left/top requirement is inferred based on
54 // the prediction angle. For Dc modes, left/top requirement is inferred based on
55 // whether or not left/top is available.
56 constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth,
57 kPredictionModeSmoothHorizontal,
58 kPredictionModeSmoothVertical,
59 kPredictionModePaeth);
60
GetDirectionalIntraPredictorDerivative(const int angle)61 int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
62 assert(angle >= 3);
63 assert(angle <= 87);
64 return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
65 }
66
67 // Maps the block_size to an index as follows:
68 // kBlock8x8 => 0.
69 // kBlock8x16 => 1.
70 // kBlock8x32 => 2.
71 // kBlock16x8 => 3.
72 // kBlock16x16 => 4.
73 // kBlock16x32 => 5.
74 // kBlock32x8 => 6.
75 // kBlock32x16 => 7.
76 // kBlock32x32 => 8.
GetWedgeBlockSizeIndex(BlockSize block_size)77 int GetWedgeBlockSizeIndex(BlockSize block_size) {
78 assert(block_size >= kBlock8x8);
79 return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) -
80 static_cast<int>(block_size >= kBlock32x8);
81 }
82
83 // Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively.
GetInterIntraMaskLookupIndex(int dimension)84 int GetInterIntraMaskLookupIndex(int dimension) {
85 assert(dimension == 4 || dimension == 8 || dimension == 16 ||
86 dimension == 32);
87 return FloorLog2(dimension) - 2;
88 }
89
90 // 7.11.2.9.
GetIntraEdgeFilterStrength(int width,int height,int filter_type,int delta)91 int GetIntraEdgeFilterStrength(int width, int height, int filter_type,
92 int delta) {
93 const int sum = width + height;
94 delta = std::abs(delta);
95 if (filter_type == 0) {
96 if (sum <= 8) {
97 if (delta >= 56) return 1;
98 } else if (sum <= 16) {
99 if (delta >= 40) return 1;
100 } else if (sum <= 24) {
101 if (delta >= 32) return 3;
102 if (delta >= 16) return 2;
103 if (delta >= 8) return 1;
104 } else if (sum <= 32) {
105 if (delta >= 32) return 3;
106 if (delta >= 4) return 2;
107 return 1;
108 } else {
109 return 3;
110 }
111 } else {
112 if (sum <= 8) {
113 if (delta >= 64) return 2;
114 if (delta >= 40) return 1;
115 } else if (sum <= 16) {
116 if (delta >= 48) return 2;
117 if (delta >= 20) return 1;
118 } else if (sum <= 24) {
119 if (delta >= 4) return 3;
120 } else {
121 return 3;
122 }
123 }
124 return 0;
125 }
126
127 // 7.11.2.10.
DoIntraEdgeUpsampling(int width,int height,int filter_type,int delta)128 bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) {
129 const int sum = width + height;
130 delta = std::abs(delta);
131 // This function should not be called when the prediction angle is 90 or 180.
132 assert(delta != 0);
133 if (delta >= 40) return false;
134 return (filter_type == 1) ? sum <= 8 : sum <= 16;
135 }
136
137 constexpr uint8_t kQuantizedDistanceWeight[4][2] = {
138 {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}};
139
140 constexpr uint8_t kQuantizedDistanceLookup[4][2] = {
141 {9, 7}, {11, 5}, {12, 4}, {13, 3}};
142
GetDistanceWeights(const int distance[2],int weight[2])143 void GetDistanceWeights(const int distance[2], int weight[2]) {
144 // Note: distance[0] and distance[1] correspond to relative distance
145 // between current frame and reference frame [1] and [0], respectively.
146 const int order = static_cast<int>(distance[0] <= distance[1]);
147 if (distance[0] == 0 || distance[1] == 0) {
148 weight[0] = kQuantizedDistanceLookup[3][order];
149 weight[1] = kQuantizedDistanceLookup[3][1 - order];
150 } else {
151 int i;
152 for (i = 0; i < 3; ++i) {
153 const int weight_0 = kQuantizedDistanceWeight[i][order];
154 const int weight_1 = kQuantizedDistanceWeight[i][1 - order];
155 if (order == 0) {
156 if (distance[0] * weight_0 < distance[1] * weight_1) break;
157 } else {
158 if (distance[0] * weight_0 > distance[1] * weight_1) break;
159 }
160 }
161 weight[0] = kQuantizedDistanceLookup[i][order];
162 weight[1] = kQuantizedDistanceLookup[i][1 - order];
163 }
164 }
165
GetIntraPredictor(PredictionMode mode,bool has_left,bool has_top)166 dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left,
167 bool has_top) {
168 if (mode == kPredictionModeDc) {
169 if (has_left && has_top) {
170 return dsp::kIntraPredictorDc;
171 }
172 if (has_left) {
173 return dsp::kIntraPredictorDcLeft;
174 }
175 if (has_top) {
176 return dsp::kIntraPredictorDcTop;
177 }
178 return dsp::kIntraPredictorDcFill;
179 }
180 switch (mode) {
181 case kPredictionModePaeth:
182 return dsp::kIntraPredictorPaeth;
183 case kPredictionModeSmooth:
184 return dsp::kIntraPredictorSmooth;
185 case kPredictionModeSmoothVertical:
186 return dsp::kIntraPredictorSmoothVertical;
187 case kPredictionModeSmoothHorizontal:
188 return dsp::kIntraPredictorSmoothHorizontal;
189 default:
190 return dsp::kNumIntraPredictors;
191 }
192 }
193
GetStartPoint(Array2DView<uint8_t> * const buffer,const int plane,const int x,const int y,const int bitdepth)194 uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane,
195 const int x, const int y, const int bitdepth) {
196 #if LIBGAV1_MAX_BITDEPTH >= 10
197 if (bitdepth > 8) {
198 Array2DView<uint16_t> buffer16(
199 buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t),
200 reinterpret_cast<uint16_t*>(&buffer[plane][0][0]));
201 return reinterpret_cast<uint8_t*>(&buffer16[y][x]);
202 }
203 #endif // LIBGAV1_MAX_BITDEPTH >= 10
204 static_cast<void>(bitdepth);
205 return &buffer[plane][y][x];
206 }
207
GetPixelPositionFromHighScale(int start,int step,int offset)208 int GetPixelPositionFromHighScale(int start, int step, int offset) {
209 return (start + step * offset) >> kScaleSubPixelBits;
210 }
211
GetMaskBlendFunc(const dsp::Dsp & dsp,bool is_inter_intra,bool is_wedge_inter_intra,int subsampling_x,int subsampling_y)212 dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra,
213 bool is_wedge_inter_intra,
214 int subsampling_x, int subsampling_y) {
215 return (is_inter_intra && !is_wedge_inter_intra)
216 ? dsp.mask_blend[0][is_inter_intra]
217 : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra];
218 }
219
220 } // namespace
221
222 template <typename Pixel>
IntraPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool has_top_right,bool has_bottom_left,PredictionMode mode,TransformSize tx_size)223 void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
224 bool has_left, bool has_top, bool has_top_right,
225 bool has_bottom_left, PredictionMode mode,
226 TransformSize tx_size) {
227 const int width = 1 << kTransformWidthLog2[tx_size];
228 const int height = 1 << kTransformHeightLog2[tx_size];
229 const int x_shift = subsampling_x_[plane];
230 const int y_shift = subsampling_y_[plane];
231 const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
232 const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1;
233 // For performance reasons, do not initialize the following two buffers.
234 alignas(kMaxAlignment) Pixel top_row_data[160];
235 alignas(kMaxAlignment) Pixel left_column_data[160];
236 #if LIBGAV1_MSAN
237 if (IsDirectionalMode(mode)) {
238 memset(top_row_data, 0, sizeof(top_row_data));
239 memset(left_column_data, 0, sizeof(left_column_data));
240 }
241 #endif
242 // Some predictors use |top_row_data| and |left_column_data| with a negative
243 // offset to access pixels to the top-left of the current block. So have some
244 // space before the arrays to allow populating those without having to move
245 // the rest of the array.
246 Pixel* const top_row = top_row_data + 16;
247 Pixel* const left_column = left_column_data + 16;
248 const int bitdepth = sequence_header_.color_config.bitdepth;
249 const int top_and_left_size = width + height;
250 const bool is_directional_mode = IsDirectionalMode(mode);
251 const PredictionParameters& prediction_parameters =
252 *block.bp->prediction_parameters;
253 const bool use_filter_intra =
254 (plane == kPlaneY && prediction_parameters.use_filter_intra);
255 const int prediction_angle =
256 is_directional_mode
257 ? kPredictionModeToAngle[mode] +
258 prediction_parameters.angle_delta[GetPlaneType(plane)] *
259 kAngleStep
260 : 0;
261 // Directional prediction requires buffers larger than the width or height.
262 const int top_size = is_directional_mode ? top_and_left_size : width;
263 const int left_size = is_directional_mode ? top_and_left_size : height;
264 const int top_right_size =
265 is_directional_mode ? (has_top_right ? 2 : 1) * width : width;
266 const int bottom_left_size =
267 is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height;
268
269 Array2DView<Pixel> buffer(buffer_[plane].rows(),
270 buffer_[plane].columns() / sizeof(Pixel),
271 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
272 const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
273 (is_directional_mode && prediction_angle < 180) ||
274 (mode == kPredictionModeDc && has_top);
275 const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
276 (is_directional_mode && prediction_angle > 90) ||
277 (mode == kPredictionModeDc && has_left);
278
279 const Pixel* top_row_src = buffer[y - 1];
280 int top_row_offset = 0;
281
282 // Determine if we need to retrieve the top row from
283 // |intra_prediction_buffer_|.
284 if ((needs_top || needs_left) && use_intra_prediction_buffer_) {
285 // Superblock index of block.row4x4. block.row4x4 is always in luma
286 // dimension (no subsampling).
287 const int current_superblock_index =
288 block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4);
289 // Superblock index of y - 1. y is in the plane dimension (chroma planes
290 // could be subsampled).
291 const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) -
292 subsampling_y_[plane];
293 const int top_row_superblock_index = (y - 1) >> plane_shift;
294 // If the superblock index of y - 1 is not that of the current superblock,
295 // then we will have to retrieve the top row from the
296 // |intra_prediction_buffer_|.
297 if (current_superblock_index != top_row_superblock_index) {
298 top_row_src =
299 reinterpret_cast<const Pixel*>(intra_prediction_buffer_[plane].get());
300 // The |intra_prediction_buffer_| only stores the top row for this Tile.
301 // The |x| value in this function is absolute to the frame. So in order to
302 // make it relative to this Tile, all acccesses into top_row_src must be
303 // offset by negative |top_row_offset|.
304 top_row_offset = MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
305 }
306 }
307
308 if (needs_top) {
309 // Compute top_row.
310 if (has_top || has_left) {
311 const int left_index = has_left ? x - 1 : x;
312 top_row[-1] = has_top ? top_row_src[left_index - top_row_offset]
313 : buffer[y][left_index];
314 } else {
315 top_row[-1] = 1 << (bitdepth - 1);
316 }
317 if (!has_top && has_left) {
318 Memset(top_row, buffer[y][x - 1], top_size);
319 } else if (!has_top && !has_left) {
320 Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size);
321 } else {
322 const int top_limit = std::min(max_x - x + 1, top_right_size);
323 memcpy(top_row, &top_row_src[x - top_row_offset],
324 top_limit * sizeof(Pixel));
325 // Even though it is safe to call Memset with a size of 0, accessing
326 // top_row_src[top_limit - x + 1] is not allowed when this condition is
327 // false.
328 if (top_size - top_limit > 0) {
329 Memset(top_row + top_limit,
330 top_row_src[top_limit + x - 1 - top_row_offset],
331 top_size - top_limit);
332 }
333 }
334 }
335 if (needs_left) {
336 // Compute left_column.
337 if (has_top || has_left) {
338 const int left_index = has_left ? x - 1 : x;
339 left_column[-1] = has_top ? top_row_src[left_index - top_row_offset]
340 : buffer[y][left_index];
341 } else {
342 left_column[-1] = 1 << (bitdepth - 1);
343 }
344 if (!has_left && has_top) {
345 Memset(left_column, top_row_src[x - top_row_offset], left_size);
346 } else if (!has_left && !has_top) {
347 Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size);
348 } else {
349 const int left_limit = std::min(max_y - y + 1, bottom_left_size);
350 for (int i = 0; i < left_limit; ++i) {
351 left_column[i] = buffer[y + i][x - 1];
352 }
353 // Even though it is safe to call Memset with a size of 0, accessing
354 // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is
355 // false.
356 if (left_size - left_limit > 0) {
357 Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1],
358 left_size - left_limit);
359 }
360 }
361 }
362 Pixel* const dest = &buffer[y][x];
363 const ptrdiff_t dest_stride = buffer_[plane].columns();
364 if (use_filter_intra) {
365 dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column,
366 prediction_parameters.filter_intra_mode, width,
367 height);
368 } else if (is_directional_mode) {
369 DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left,
370 needs_top, prediction_angle, width, height, max_x,
371 max_y, tx_size, top_row, left_column);
372 } else {
373 const dsp::IntraPredictor predictor =
374 GetIntraPredictor(mode, has_left, has_top);
375 assert(predictor != dsp::kNumIntraPredictors);
376 dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row,
377 left_column);
378 }
379 }
380
381 template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane,
382 int x, int y, bool has_left,
383 bool has_top, bool has_top_right,
384 bool has_bottom_left,
385 PredictionMode mode,
386 TransformSize tx_size);
387 #if LIBGAV1_MAX_BITDEPTH >= 10
388 template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
389 int x, int y, bool has_left,
390 bool has_top, bool has_top_right,
391 bool has_bottom_left,
392 PredictionMode mode,
393 TransformSize tx_size);
394 #endif
395
396 constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth,
397 kPredictionModeSmoothHorizontal,
398 kPredictionModeSmoothVertical);
399
IsSmoothPrediction(int row,int column,Plane plane) const400 bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const {
401 const BlockParameters& bp = *block_parameters_holder_.Find(row, column);
402 PredictionMode mode;
403 if (plane == kPlaneY) {
404 mode = bp.y_mode;
405 } else {
406 if (bp.reference_frame[0] > kReferenceFrameIntra) return false;
407 mode = bp.uv_mode;
408 }
409 return kPredictionModeSmoothMask.Contains(mode);
410 }
411
GetIntraEdgeFilterType(const Block & block,Plane plane) const412 int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
413 const int subsampling_x = subsampling_x_[plane];
414 const int subsampling_y = subsampling_y_[plane];
415 if (block.top_available[plane]) {
416 const int row =
417 block.row4x4 - 1 -
418 static_cast<int>(subsampling_y != 0 && (block.row4x4 & 1) != 0);
419 const int column =
420 block.column4x4 +
421 static_cast<int>(subsampling_x != 0 && (block.column4x4 & 1) == 0);
422 if (IsSmoothPrediction(row, column, plane)) return 1;
423 }
424 if (block.left_available[plane]) {
425 const int row = block.row4x4 + static_cast<int>(subsampling_y != 0 &&
426 (block.row4x4 & 1) == 0);
427 const int column =
428 block.column4x4 - 1 -
429 static_cast<int>(subsampling_x != 0 && (block.column4x4 & 1) != 0);
430 if (IsSmoothPrediction(row, column, plane)) return 1;
431 }
432 return 0;
433 }
434
435 template <typename Pixel>
DirectionalPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool needs_left,bool needs_top,int prediction_angle,int width,int height,int max_x,int max_y,TransformSize tx_size,Pixel * const top_row,Pixel * const left_column)436 void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y,
437 bool has_left, bool has_top, bool needs_left,
438 bool needs_top, int prediction_angle,
439 int width, int height, int max_x, int max_y,
440 TransformSize tx_size, Pixel* const top_row,
441 Pixel* const left_column) {
442 Array2DView<Pixel> buffer(buffer_[plane].rows(),
443 buffer_[plane].columns() / sizeof(Pixel),
444 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
445 Pixel* const dest = &buffer[y][x];
446 const ptrdiff_t stride = buffer_[plane].columns();
447 if (prediction_angle == 90) {
448 dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical](
449 dest, stride, top_row, left_column);
450 return;
451 }
452 if (prediction_angle == 180) {
453 dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal](
454 dest, stride, top_row, left_column);
455 return;
456 }
457
458 bool upsampled_top = false;
459 bool upsampled_left = false;
460 if (sequence_header_.enable_intra_edge_filter) {
461 const int filter_type = GetIntraEdgeFilterType(block, plane);
462 if (prediction_angle > 90 && prediction_angle < 180 &&
463 (width + height) >= 24) {
464 // 7.11.2.7.
465 left_column[-1] = top_row[-1] = RightShiftWithRounding(
466 left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4);
467 }
468 if (has_top && needs_top) {
469 const int strength = GetIntraEdgeFilterStrength(
470 width, height, filter_type, prediction_angle - 90);
471 if (strength > 0) {
472 const int num_pixels = std::min(width, max_x - x + 1) +
473 ((prediction_angle < 90) ? height : 0) + 1;
474 dsp_.intra_edge_filter(top_row - 1, num_pixels, strength);
475 }
476 }
477 if (has_left && needs_left) {
478 const int strength = GetIntraEdgeFilterStrength(
479 width, height, filter_type, prediction_angle - 180);
480 if (strength > 0) {
481 const int num_pixels = std::min(height, max_y - y + 1) +
482 ((prediction_angle > 180) ? width : 0) + 1;
483 dsp_.intra_edge_filter(left_column - 1, num_pixels, strength);
484 }
485 }
486 upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type,
487 prediction_angle - 90);
488 if (upsampled_top && needs_top) {
489 const int num_pixels = width + ((prediction_angle < 90) ? height : 0);
490 dsp_.intra_edge_upsampler(top_row, num_pixels);
491 }
492 upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type,
493 prediction_angle - 180);
494 if (upsampled_left && needs_left) {
495 const int num_pixels = height + ((prediction_angle > 180) ? width : 0);
496 dsp_.intra_edge_upsampler(left_column, num_pixels);
497 }
498 }
499
500 if (prediction_angle < 90) {
501 const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle);
502 dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height,
503 dx, upsampled_top);
504 } else if (prediction_angle < 180) {
505 const int dx =
506 GetDirectionalIntraPredictorDerivative(180 - prediction_angle);
507 const int dy =
508 GetDirectionalIntraPredictorDerivative(prediction_angle - 90);
509 dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column,
510 width, height, dx, dy, upsampled_top,
511 upsampled_left);
512 } else {
513 assert(prediction_angle < 270);
514 const int dy =
515 GetDirectionalIntraPredictorDerivative(270 - prediction_angle);
516 dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width,
517 height, dy, upsampled_left);
518 }
519 }
520
521 template <typename Pixel>
PalettePrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const int x,const int y,const TransformSize tx_size)522 void Tile::PalettePrediction(const Block& block, const Plane plane,
523 const int start_x, const int start_y, const int x,
524 const int y, const TransformSize tx_size) {
525 const int tx_width = kTransformWidth[tx_size];
526 const int tx_height = kTransformHeight[tx_size];
527 const uint16_t* const palette = block.bp->palette_mode_info.color[plane];
528 const PlaneType plane_type = GetPlaneType(plane);
529 const int x4 = MultiplyBy4(x);
530 const int y4 = MultiplyBy4(y);
531 Array2DView<Pixel> buffer(buffer_[plane].rows(),
532 buffer_[plane].columns() / sizeof(Pixel),
533 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
534 for (int row = 0; row < tx_height; ++row) {
535 assert(block.bp->prediction_parameters
536 ->color_index_map[plane_type][y4 + row] != nullptr);
537 for (int column = 0; column < tx_width; ++column) {
538 buffer[start_y + row][start_x + column] =
539 palette[block.bp->prediction_parameters
540 ->color_index_map[plane_type][y4 + row][x4 + column]];
541 }
542 }
543 }
544
545 template void Tile::PalettePrediction<uint8_t>(
546 const Block& block, const Plane plane, const int start_x, const int start_y,
547 const int x, const int y, const TransformSize tx_size);
548 #if LIBGAV1_MAX_BITDEPTH >= 10
549 template void Tile::PalettePrediction<uint16_t>(
550 const Block& block, const Plane plane, const int start_x, const int start_y,
551 const int x, const int y, const TransformSize tx_size);
552 #endif
553
554 template <typename Pixel>
ChromaFromLumaPrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const TransformSize tx_size)555 void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane,
556 const int start_x, const int start_y,
557 const TransformSize tx_size) {
558 const int subsampling_x = subsampling_x_[plane];
559 const int subsampling_y = subsampling_y_[plane];
560 const PredictionParameters& prediction_parameters =
561 *block.bp->prediction_parameters;
562 Array2DView<Pixel> y_buffer(
563 buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel),
564 reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0]));
565 if (!block.scratch_buffer->cfl_luma_buffer_valid) {
566 const int luma_x = start_x << subsampling_x;
567 const int luma_y = start_y << subsampling_y;
568 dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y](
569 block.scratch_buffer->cfl_luma_buffer,
570 prediction_parameters.max_luma_width - luma_x,
571 prediction_parameters.max_luma_height - luma_y,
572 reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]),
573 buffer_[kPlaneY].columns());
574 block.scratch_buffer->cfl_luma_buffer_valid = true;
575 }
576 Array2DView<Pixel> buffer(buffer_[plane].rows(),
577 buffer_[plane].columns() / sizeof(Pixel),
578 reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
579 dsp_.cfl_intra_predictors[tx_size](
580 reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]),
581 buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer,
582 (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u
583 : prediction_parameters.cfl_alpha_v);
584 }
585
586 template void Tile::ChromaFromLumaPrediction<uint8_t>(
587 const Block& block, const Plane plane, const int start_x, const int start_y,
588 const TransformSize tx_size);
589 #if LIBGAV1_MAX_BITDEPTH >= 10
590 template void Tile::ChromaFromLumaPrediction<uint16_t>(
591 const Block& block, const Plane plane, const int start_x, const int start_y,
592 const TransformSize tx_size);
593 #endif
594
InterIntraPrediction(uint16_t * const prediction_0,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const PredictionParameters & prediction_parameters,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,uint8_t * const dest,const ptrdiff_t dest_stride)595 void Tile::InterIntraPrediction(
596 uint16_t* const prediction_0, const uint8_t* const prediction_mask,
597 const ptrdiff_t prediction_mask_stride,
598 const PredictionParameters& prediction_parameters,
599 const int prediction_width, const int prediction_height,
600 const int subsampling_x, const int subsampling_y, uint8_t* const dest,
601 const ptrdiff_t dest_stride) {
602 assert(prediction_mask != nullptr);
603 assert(prediction_parameters.compound_prediction_type ==
604 kCompoundPredictionTypeIntra ||
605 prediction_parameters.compound_prediction_type ==
606 kCompoundPredictionTypeWedge);
607 // The first buffer of InterIntra is from inter prediction.
608 // The second buffer is from intra prediction.
609 #if LIBGAV1_MAX_BITDEPTH >= 10
610 if (sequence_header_.color_config.bitdepth > 8) {
611 GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true,
612 prediction_parameters.is_wedge_inter_intra, subsampling_x,
613 subsampling_y)(
614 prediction_0, reinterpret_cast<uint16_t*>(dest),
615 dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride,
616 prediction_width, prediction_height, dest, dest_stride);
617 return;
618 }
619 #endif
620 const int function_index = prediction_parameters.is_wedge_inter_intra
621 ? subsampling_x + subsampling_y
622 : 0;
623 // |is_inter_intra| prediction values are stored in a Pixel buffer but it is
624 // currently declared as a uint16_t buffer.
625 // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and
626 // remove the reinterpret_cast.
627 dsp_.inter_intra_mask_blend_8bpp[function_index](
628 reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride,
629 prediction_mask, prediction_mask_stride, prediction_width,
630 prediction_height);
631 }
632
CompoundInterPrediction(const Block & block,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,const int candidate_row,const int candidate_column,uint8_t * dest,const ptrdiff_t dest_stride)633 void Tile::CompoundInterPrediction(
634 const Block& block, const uint8_t* const prediction_mask,
635 const ptrdiff_t prediction_mask_stride, const int prediction_width,
636 const int prediction_height, const int subsampling_x,
637 const int subsampling_y, const int candidate_row,
638 const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) {
639 const PredictionParameters& prediction_parameters =
640 *block.bp->prediction_parameters;
641
642 void* prediction[2];
643 #if LIBGAV1_MAX_BITDEPTH >= 10
644 const int bitdepth = sequence_header_.color_config.bitdepth;
645 if (bitdepth > 8) {
646 prediction[0] = block.scratch_buffer->prediction_buffer[0];
647 prediction[1] = block.scratch_buffer->prediction_buffer[1];
648 } else {
649 #endif
650 prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0];
651 prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1];
652 #if LIBGAV1_MAX_BITDEPTH >= 10
653 }
654 #endif
655
656 switch (prediction_parameters.compound_prediction_type) {
657 case kCompoundPredictionTypeWedge:
658 case kCompoundPredictionTypeDiffWeighted:
659 GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false,
660 prediction_parameters.is_wedge_inter_intra,
661 subsampling_x, subsampling_y)(
662 prediction[0], prediction[1],
663 /*prediction_stride=*/prediction_width, prediction_mask,
664 prediction_mask_stride, prediction_width, prediction_height, dest,
665 dest_stride);
666 break;
667 case kCompoundPredictionTypeDistance:
668 DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width,
669 prediction_height, candidate_row,
670 candidate_column, dest, dest_stride);
671 break;
672 default:
673 assert(prediction_parameters.compound_prediction_type ==
674 kCompoundPredictionTypeAverage);
675 dsp_.average_blend(prediction[0], prediction[1], prediction_width,
676 prediction_height, dest, dest_stride);
677 break;
678 }
679 }
680
GetWarpParams(const Block & block,const Plane plane,const int prediction_width,const int prediction_height,const PredictionParameters & prediction_parameters,const ReferenceFrameType reference_type,bool * const is_local_valid,GlobalMotion * const global_motion_params,GlobalMotion * const local_warp_params) const681 GlobalMotion* Tile::GetWarpParams(
682 const Block& block, const Plane plane, const int prediction_width,
683 const int prediction_height,
684 const PredictionParameters& prediction_parameters,
685 const ReferenceFrameType reference_type, bool* const is_local_valid,
686 GlobalMotion* const global_motion_params,
687 GlobalMotion* const local_warp_params) const {
688 if (prediction_width < 8 || prediction_height < 8 ||
689 frame_header_.force_integer_mv == 1) {
690 return nullptr;
691 }
692 if (plane == kPlaneY) {
693 *is_local_valid =
694 prediction_parameters.motion_mode == kMotionModeLocalWarp &&
695 WarpEstimation(
696 prediction_parameters.num_warp_samples, DivideBy4(prediction_width),
697 DivideBy4(prediction_height), block.row4x4, block.column4x4,
698 block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates,
699 local_warp_params) &&
700 SetupShear(local_warp_params);
701 }
702 if (prediction_parameters.motion_mode == kMotionModeLocalWarp &&
703 *is_local_valid) {
704 return local_warp_params;
705 }
706 if (!IsScaled(reference_type)) {
707 GlobalMotionTransformationType global_motion_type =
708 (reference_type != kReferenceFrameIntra)
709 ? global_motion_params->type
710 : kNumGlobalMotionTransformationTypes;
711 const bool is_global_valid =
712 IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) &&
713 SetupShear(global_motion_params);
714 // Valid global motion type implies reference type can't be intra.
715 assert(!is_global_valid || reference_type != kReferenceFrameIntra);
716 if (is_global_valid) return global_motion_params;
717 }
718 return nullptr;
719 }
720
InterPrediction(const Block & block,const Plane plane,const int x,const int y,const int prediction_width,const int prediction_height,int candidate_row,int candidate_column,bool * const is_local_valid,GlobalMotion * const local_warp_params)721 bool Tile::InterPrediction(const Block& block, const Plane plane, const int x,
722 const int y, const int prediction_width,
723 const int prediction_height, int candidate_row,
724 int candidate_column, bool* const is_local_valid,
725 GlobalMotion* const local_warp_params) {
726 const int bitdepth = sequence_header_.color_config.bitdepth;
727 const BlockParameters& bp = *block.bp;
728 const BlockParameters& bp_reference =
729 *block_parameters_holder_.Find(candidate_row, candidate_column);
730 const bool is_compound =
731 bp_reference.reference_frame[1] > kReferenceFrameIntra;
732 assert(bp.is_inter);
733 const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra;
734
735 const PredictionParameters& prediction_parameters =
736 *block.bp->prediction_parameters;
737 uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth);
738 const ptrdiff_t dest_stride = buffer_[plane].columns(); // In bytes.
739 for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) {
740 const ReferenceFrameType reference_type =
741 bp_reference.reference_frame[index];
742 GlobalMotion global_motion_params =
743 frame_header_.global_motion[reference_type];
744 GlobalMotion* warp_params =
745 GetWarpParams(block, plane, prediction_width, prediction_height,
746 prediction_parameters, reference_type, is_local_valid,
747 &global_motion_params, local_warp_params);
748 if (warp_params != nullptr) {
749 if (!BlockWarpProcess(block, plane, index, x, y, prediction_width,
750 prediction_height, warp_params, is_compound,
751 is_inter_intra, dest, dest_stride)) {
752 return false;
753 }
754 } else {
755 const int reference_index =
756 prediction_parameters.use_intra_block_copy
757 ? -1
758 : frame_header_.reference_frame_index[reference_type -
759 kReferenceFrameLast];
760 if (!BlockInterPrediction(
761 block, plane, reference_index, bp_reference.mv.mv[index], x, y,
762 prediction_width, prediction_height, candidate_row,
763 candidate_column, block.scratch_buffer->prediction_buffer[index],
764 is_compound, is_inter_intra, dest, dest_stride)) {
765 return false;
766 }
767 }
768 }
769
770 const int subsampling_x = subsampling_x_[plane];
771 const int subsampling_y = subsampling_y_[plane];
772 ptrdiff_t prediction_mask_stride = 0;
773 const uint8_t* prediction_mask = nullptr;
774 if (prediction_parameters.compound_prediction_type ==
775 kCompoundPredictionTypeWedge) {
776 const Array2D<uint8_t>& wedge_mask =
777 wedge_masks_[GetWedgeBlockSizeIndex(block.size)]
778 [prediction_parameters.wedge_sign]
779 [prediction_parameters.wedge_index];
780 prediction_mask = wedge_mask[0];
781 prediction_mask_stride = wedge_mask.columns();
782 } else if (prediction_parameters.compound_prediction_type ==
783 kCompoundPredictionTypeIntra) {
784 // 7.11.3.13. The inter intra masks are precomputed and stored as a set of
785 // look up tables.
786 assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes);
787 prediction_mask =
788 kInterIntraMasks[prediction_parameters.inter_intra_mode]
789 [GetInterIntraMaskLookupIndex(prediction_width)]
790 [GetInterIntraMaskLookupIndex(prediction_height)];
791 prediction_mask_stride = prediction_width;
792 } else if (prediction_parameters.compound_prediction_type ==
793 kCompoundPredictionTypeDiffWeighted) {
794 if (plane == kPlaneY) {
795 assert(prediction_width >= 8);
796 assert(prediction_height >= 8);
797 dsp_.weight_mask[FloorLog2(prediction_width) - 3]
798 [FloorLog2(prediction_height) - 3]
799 [static_cast<int>(prediction_parameters.mask_is_inverse)](
800 block.scratch_buffer->prediction_buffer[0],
801 block.scratch_buffer->prediction_buffer[1],
802 block.scratch_buffer->weight_mask,
803 kMaxSuperBlockSizeInPixels);
804 }
805 prediction_mask = block.scratch_buffer->weight_mask;
806 prediction_mask_stride = kMaxSuperBlockSizeInPixels;
807 }
808
809 if (is_compound) {
810 CompoundInterPrediction(block, prediction_mask, prediction_mask_stride,
811 prediction_width, prediction_height, subsampling_x,
812 subsampling_y, candidate_row, candidate_column,
813 dest, dest_stride);
814 } else if (prediction_parameters.motion_mode == kMotionModeObmc) {
815 // Obmc mode is allowed only for single reference (!is_compound).
816 if (!ObmcPrediction(block, plane, prediction_width, prediction_height)) {
817 return false;
818 }
819 } else if (is_inter_intra) {
820 // InterIntra and obmc must be mutually exclusive.
821 InterIntraPrediction(
822 block.scratch_buffer->prediction_buffer[0], prediction_mask,
823 prediction_mask_stride, prediction_parameters, prediction_width,
824 prediction_height, subsampling_x, subsampling_y, dest, dest_stride);
825 }
826 return true;
827 }
828
ObmcBlockPrediction(const Block & block,const MotionVector & mv,const Plane plane,const int reference_frame_index,const int width,const int height,const int x,const int y,const int candidate_row,const int candidate_column,const ObmcDirection blending_direction)829 bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv,
830 const Plane plane,
831 const int reference_frame_index, const int width,
832 const int height, const int x, const int y,
833 const int candidate_row,
834 const int candidate_column,
835 const ObmcDirection blending_direction) {
836 const int bitdepth = sequence_header_.color_config.bitdepth;
837 // Obmc's prediction needs to be clipped before blending with above/left
838 // prediction blocks.
839 // Obmc prediction is used only when is_compound is false. So it is safe to
840 // use prediction_buffer[1] as a temporary buffer for the Obmc prediction.
841 static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >=
842 64 * 64 * sizeof(uint16_t),
843 "");
844 auto* const obmc_buffer =
845 reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]);
846 const ptrdiff_t obmc_buffer_stride =
847 (bitdepth == 8) ? width : width * sizeof(uint16_t);
848 if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y,
849 width, height, candidate_row, candidate_column,
850 nullptr, false, false, obmc_buffer,
851 obmc_buffer_stride)) {
852 return false;
853 }
854
855 uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth);
856 const ptrdiff_t prediction_stride = buffer_[plane].columns();
857 dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width,
858 height, obmc_buffer, obmc_buffer_stride);
859 return true;
860 }
861
ObmcPrediction(const Block & block,const Plane plane,const int width,const int height)862 bool Tile::ObmcPrediction(const Block& block, const Plane plane,
863 const int width, const int height) {
864 const int subsampling_x = subsampling_x_[plane];
865 const int subsampling_y = subsampling_y_[plane];
866 if (block.top_available[kPlaneY] &&
867 !IsBlockSmallerThan8x8(block.residual_size[plane])) {
868 const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]);
869 const int column4x4_max =
870 std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
871 const int candidate_row = block.row4x4 - 1;
872 const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y;
873 int column4x4 = block.column4x4;
874 const int prediction_height = std::min(height >> 1, 32 >> subsampling_y);
875 for (int i = 0, step; i < num_limit && column4x4 < column4x4_max;
876 column4x4 += step) {
877 const int candidate_column = column4x4 | 1;
878 const BlockParameters& bp_top =
879 *block_parameters_holder_.Find(candidate_row, candidate_column);
880 const int candidate_block_size = bp_top.size;
881 step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16);
882 if (bp_top.reference_frame[0] > kReferenceFrameIntra) {
883 i++;
884 const int candidate_reference_frame_index =
885 frame_header_.reference_frame_index[bp_top.reference_frame[0] -
886 kReferenceFrameLast];
887 const int prediction_width =
888 std::min(width, MultiplyBy4(step) >> subsampling_x);
889 if (!ObmcBlockPrediction(
890 block, bp_top.mv.mv[0], plane, candidate_reference_frame_index,
891 prediction_width, prediction_height,
892 MultiplyBy4(column4x4) >> subsampling_x, block_start_y,
893 candidate_row, candidate_column, kObmcDirectionVertical)) {
894 return false;
895 }
896 }
897 }
898 }
899
900 if (block.left_available[kPlaneY]) {
901 const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]);
902 const int row4x4_max =
903 std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
904 const int candidate_column = block.column4x4 - 1;
905 int row4x4 = block.row4x4;
906 const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x;
907 const int prediction_width = std::min(width >> 1, 32 >> subsampling_x);
908 for (int i = 0, step; i < num_limit && row4x4 < row4x4_max;
909 row4x4 += step) {
910 const int candidate_row = row4x4 | 1;
911 const BlockParameters& bp_left =
912 *block_parameters_holder_.Find(candidate_row, candidate_column);
913 const int candidate_block_size = bp_left.size;
914 step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16);
915 if (bp_left.reference_frame[0] > kReferenceFrameIntra) {
916 i++;
917 const int candidate_reference_frame_index =
918 frame_header_.reference_frame_index[bp_left.reference_frame[0] -
919 kReferenceFrameLast];
920 const int prediction_height =
921 std::min(height, MultiplyBy4(step) >> subsampling_y);
922 if (!ObmcBlockPrediction(
923 block, bp_left.mv.mv[0], plane, candidate_reference_frame_index,
924 prediction_width, prediction_height, block_start_x,
925 MultiplyBy4(row4x4) >> subsampling_y, candidate_row,
926 candidate_column, kObmcDirectionHorizontal)) {
927 return false;
928 }
929 }
930 }
931 }
932 return true;
933 }
934
DistanceWeightedPrediction(void * prediction_0,void * prediction_1,const int width,const int height,const int candidate_row,const int candidate_column,uint8_t * dest,ptrdiff_t dest_stride)935 void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
936 const int width, const int height,
937 const int candidate_row,
938 const int candidate_column, uint8_t* dest,
939 ptrdiff_t dest_stride) {
940 int distance[2];
941 int weight[2];
942 for (int reference = 0; reference < 2; ++reference) {
943 const BlockParameters& bp =
944 *block_parameters_holder_.Find(candidate_row, candidate_column);
945 const unsigned int reference_hint =
946 current_frame_.order_hint(bp.reference_frame[reference]);
947 // Note: distance[0] and distance[1] correspond to relative distance
948 // between current frame and reference frame [1] and [0], respectively.
949 distance[1 - reference] = Clip3(
950 std::abs(GetRelativeDistance(reference_hint, frame_header_.order_hint,
951 sequence_header_.order_hint_shift_bits)),
952 0, kMaxFrameDistance);
953 }
954 GetDistanceWeights(distance, weight);
955
956 dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1],
957 width, height, dest, dest_stride);
958 }
959
960 // static.
GetReferenceBlockPosition(const int reference_frame_index,const bool is_scaled,const int width,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int start_x,const int start_y,const int step_x,const int step_y,const int left_border,const int right_border,const int top_border,const int bottom_border,int * ref_block_start_x,int * ref_block_start_y,int * ref_block_end_x)961 bool Tile::GetReferenceBlockPosition(
962 const int reference_frame_index, const bool is_scaled, const int width,
963 const int height, const int ref_start_x, const int ref_last_x,
964 const int ref_start_y, const int ref_last_y, const int start_x,
965 const int start_y, const int step_x, const int step_y,
966 const int left_border, const int right_border, const int top_border,
967 const int bottom_border, int* ref_block_start_x, int* ref_block_start_y,
968 int* ref_block_end_x) {
969 *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0);
970 *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0);
971 if (reference_frame_index == -1) {
972 return false;
973 }
974 *ref_block_start_x -= kConvolveBorderLeftTop;
975 *ref_block_start_y -= kConvolveBorderLeftTop;
976 *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) +
977 kConvolveBorderRight;
978 int ref_block_end_y =
979 GetPixelPositionFromHighScale(start_y, step_y, height - 1) +
980 kConvolveBorderBottom;
981 if (is_scaled) {
982 const int block_height =
983 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
984 kScaleSubPixelBits) +
985 kSubPixelTaps;
986 ref_block_end_y = *ref_block_start_y + block_height - 1;
987 }
988 // Determines if we need to extend beyond the left/right/top/bottom border.
989 return *ref_block_start_x < (ref_start_x - left_border) ||
990 *ref_block_end_x > (ref_last_x + right_border) ||
991 *ref_block_start_y < (ref_start_y - top_border) ||
992 ref_block_end_y > (ref_last_y + bottom_border);
993 }
994
995 // Builds a block as the input for convolve, by copying the content of
996 // reference frame (either a decoded reference frame, or current frame).
997 // |block_extended_width| is the combined width of the block and its borders.
998 template <typename Pixel>
BuildConvolveBlock(const Plane plane,const int reference_frame_index,const bool is_scaled,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int step_y,const int ref_block_start_x,const int ref_block_end_x,const int ref_block_start_y,uint8_t * block_buffer,ptrdiff_t convolve_buffer_stride,ptrdiff_t block_extended_width)999 void Tile::BuildConvolveBlock(
1000 const Plane plane, const int reference_frame_index, const bool is_scaled,
1001 const int height, const int ref_start_x, const int ref_last_x,
1002 const int ref_start_y, const int ref_last_y, const int step_y,
1003 const int ref_block_start_x, const int ref_block_end_x,
1004 const int ref_block_start_y, uint8_t* block_buffer,
1005 ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) {
1006 const YuvBuffer* const reference_buffer =
1007 (reference_frame_index == -1)
1008 ? current_frame_.buffer()
1009 : reference_frames_[reference_frame_index]->buffer();
1010 Array2DView<const Pixel> reference_block(
1011 reference_buffer->height(plane),
1012 reference_buffer->stride(plane) / sizeof(Pixel),
1013 reinterpret_cast<const Pixel*>(reference_buffer->data(plane)));
1014 auto* const block_head = reinterpret_cast<Pixel*>(block_buffer);
1015 convolve_buffer_stride /= sizeof(Pixel);
1016 int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom;
1017 if (is_scaled) {
1018 block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1019 kScaleSubPixelBits) +
1020 kSubPixelTaps;
1021 }
1022 const int copy_start_x =
1023 std::min(std::max(ref_block_start_x, ref_start_x), ref_last_x);
1024 const int copy_end_x =
1025 std::max(std::min(ref_block_end_x, ref_last_x), copy_start_x);
1026 const int copy_start_y =
1027 std::min(std::max(ref_block_start_y, ref_start_y), ref_last_y);
1028 const int block_width = copy_end_x - copy_start_x + 1;
1029 const bool extend_left = ref_block_start_x < ref_start_x;
1030 const bool extend_right = ref_block_end_x > ref_last_x;
1031 const bool out_of_left = copy_start_x > ref_block_end_x;
1032 const bool out_of_right = copy_end_x < ref_block_start_x;
1033 if (out_of_left || out_of_right) {
1034 const int ref_x = out_of_left ? copy_start_x : copy_end_x;
1035 Pixel* buf_ptr = block_head;
1036 for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1037 Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width);
1038 if (ref_block_start_y + y >= ref_start_y &&
1039 ref_block_start_y + y < ref_last_y) {
1040 ++ref_y;
1041 }
1042 buf_ptr += convolve_buffer_stride;
1043 }
1044 } else {
1045 Pixel* buf_ptr = block_head;
1046 const int left_width = copy_start_x - ref_block_start_x;
1047 for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1048 if (extend_left) {
1049 Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width);
1050 }
1051 memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x],
1052 block_width * sizeof(Pixel));
1053 if (extend_right) {
1054 Memset(buf_ptr + left_width + block_width,
1055 reference_block[ref_y][copy_end_x],
1056 block_extended_width - left_width - block_width);
1057 }
1058 if (ref_block_start_y + y >= ref_start_y &&
1059 ref_block_start_y + y < ref_last_y) {
1060 ++ref_y;
1061 }
1062 buf_ptr += convolve_buffer_stride;
1063 }
1064 }
1065 }
1066
BlockInterPrediction(const Block & block,const Plane plane,const int reference_frame_index,const MotionVector & mv,const int x,const int y,const int width,const int height,const int candidate_row,const int candidate_column,uint16_t * const prediction,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1067 bool Tile::BlockInterPrediction(
1068 const Block& block, const Plane plane, const int reference_frame_index,
1069 const MotionVector& mv, const int x, const int y, const int width,
1070 const int height, const int candidate_row, const int candidate_column,
1071 uint16_t* const prediction, const bool is_compound,
1072 const bool is_inter_intra, uint8_t* const dest,
1073 const ptrdiff_t dest_stride) {
1074 const BlockParameters& bp =
1075 *block_parameters_holder_.Find(candidate_row, candidate_column);
1076 int start_x;
1077 int start_y;
1078 int step_x;
1079 int step_y;
1080 ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y,
1081 &step_x, &step_y);
1082 const int horizontal_filter_index = bp.interpolation_filter[1];
1083 const int vertical_filter_index = bp.interpolation_filter[0];
1084 const int subsampling_x = subsampling_x_[plane];
1085 const int subsampling_y = subsampling_y_[plane];
1086 // reference_frame_index equal to -1 indicates using current frame as
1087 // reference.
1088 const YuvBuffer* const reference_buffer =
1089 (reference_frame_index == -1)
1090 ? current_frame_.buffer()
1091 : reference_frames_[reference_frame_index]->buffer();
1092 const int reference_upscaled_width =
1093 (reference_frame_index == -1)
1094 ? MultiplyBy4(frame_header_.columns4x4)
1095 : reference_frames_[reference_frame_index]->upscaled_width();
1096 const int reference_height =
1097 (reference_frame_index == -1)
1098 ? MultiplyBy4(frame_header_.rows4x4)
1099 : reference_frames_[reference_frame_index]->frame_height();
1100 const int ref_start_x = 0;
1101 const int ref_last_x =
1102 SubsampledValue(reference_upscaled_width, subsampling_x) - 1;
1103 const int ref_start_y = 0;
1104 const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1;
1105
1106 const bool is_scaled = (reference_frame_index != -1) &&
1107 (frame_header_.width != reference_upscaled_width ||
1108 frame_header_.height != reference_height);
1109 const int bitdepth = sequence_header_.color_config.bitdepth;
1110 const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
1111 int ref_block_start_x;
1112 int ref_block_start_y;
1113 int ref_block_end_x;
1114 const bool extend_block = GetReferenceBlockPosition(
1115 reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x,
1116 ref_start_y, ref_last_y, start_x, start_y, step_x, step_y,
1117 reference_buffer->left_border(plane),
1118 reference_buffer->right_border(plane),
1119 reference_buffer->top_border(plane),
1120 reference_buffer->bottom_border(plane), &ref_block_start_x,
1121 &ref_block_start_y, &ref_block_end_x);
1122
1123 // In frame parallel mode, ensure that the reference block has been decoded
1124 // and available for referencing.
1125 if (reference_frame_index != -1 && frame_parallel_) {
1126 int reference_y_max;
1127 if (is_scaled) {
1128 // TODO(vigneshv): For now, we wait for the entire reference frame to be
1129 // decoded if we are using scaled references. This will eventually be
1130 // fixed.
1131 reference_y_max = reference_height;
1132 } else {
1133 reference_y_max =
1134 std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y);
1135 // For U and V planes with subsampling, we need to multiply
1136 // reference_y_max by 2 since we only track the progress of Y planes.
1137 reference_y_max = LeftShift(reference_y_max, subsampling_y);
1138 }
1139 if (!reference_frames_[reference_frame_index]->WaitUntil(reference_y_max)) {
1140 return false;
1141 }
1142 }
1143
1144 const uint8_t* block_start = nullptr;
1145 ptrdiff_t convolve_buffer_stride;
1146 if (!extend_block) {
1147 const YuvBuffer* const reference_buffer =
1148 (reference_frame_index == -1)
1149 ? current_frame_.buffer()
1150 : reference_frames_[reference_frame_index]->buffer();
1151 convolve_buffer_stride = reference_buffer->stride(plane);
1152 if (reference_frame_index == -1 || is_scaled) {
1153 block_start = reference_buffer->data(plane) +
1154 ref_block_start_y * reference_buffer->stride(plane) +
1155 ref_block_start_x * pixel_size;
1156 } else {
1157 block_start = reference_buffer->data(plane) +
1158 (ref_block_start_y + kConvolveBorderLeftTop) *
1159 reference_buffer->stride(plane) +
1160 (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
1161 }
1162 } else {
1163 // The block width can be at most 2 times as much as current
1164 // block's width because of scaling.
1165 auto block_extended_width = Align<ptrdiff_t>(
1166 (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) *
1167 pixel_size,
1168 kMaxAlignment);
1169 convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
1170 #if LIBGAV1_MAX_BITDEPTH >= 10
1171 if (bitdepth > 8) {
1172 BuildConvolveBlock<uint16_t>(
1173 plane, reference_frame_index, is_scaled, height, ref_start_x,
1174 ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1175 ref_block_end_x, ref_block_start_y,
1176 block.scratch_buffer->convolve_block_buffer.get(),
1177 convolve_buffer_stride, block_extended_width);
1178 } else {
1179 #endif
1180 BuildConvolveBlock<uint8_t>(
1181 plane, reference_frame_index, is_scaled, height, ref_start_x,
1182 ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1183 ref_block_end_x, ref_block_start_y,
1184 block.scratch_buffer->convolve_block_buffer.get(),
1185 convolve_buffer_stride, block_extended_width);
1186 #if LIBGAV1_MAX_BITDEPTH >= 10
1187 }
1188 #endif
1189 block_start = block.scratch_buffer->convolve_block_buffer.get() +
1190 (is_scaled ? 0
1191 : kConvolveBorderLeftTop * convolve_buffer_stride +
1192 kConvolveBorderLeftTop * pixel_size);
1193 }
1194
1195 const int has_horizontal_filter = static_cast<int>(
1196 ((mv.mv[MotionVector::kColumn] * (1 << (1 - subsampling_x))) & 15) != 0);
1197 const int has_vertical_filter = static_cast<int>(
1198 ((mv.mv[MotionVector::kRow] * (1 << (1 - subsampling_y))) & 15) != 0);
1199 void* const output =
1200 (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest);
1201 ptrdiff_t output_stride = (is_compound || is_inter_intra)
1202 ? /*prediction_stride=*/width
1203 : dest_stride;
1204 #if LIBGAV1_MAX_BITDEPTH >= 10
1205 // |is_inter_intra| calculations are written to the |prediction| buffer.
1206 // Unlike the |is_compound| calculations the output is Pixel and not uint16_t.
1207 // convolve_func() expects |output_stride| to be in bytes and not Pixels.
1208 // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to
1209 // account for this.
1210 if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1211 output_stride *= 2;
1212 }
1213 #endif
1214 assert(output != nullptr);
1215 if (is_scaled) {
1216 dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound];
1217 assert(convolve_func != nullptr);
1218
1219 convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1220 vertical_filter_index, start_x, start_y, step_x, step_y,
1221 width, height, output, output_stride);
1222 } else {
1223 dsp::ConvolveFunc convolve_func =
1224 dsp_.convolve[reference_frame_index == -1][is_compound]
1225 [has_vertical_filter][has_horizontal_filter];
1226 assert(convolve_func != nullptr);
1227
1228 convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1229 vertical_filter_index, start_x, start_y, width, height,
1230 output, output_stride);
1231 }
1232 return true;
1233 }
1234
BlockWarpProcess(const Block & block,const Plane plane,const int index,const int block_start_x,const int block_start_y,const int width,const int height,GlobalMotion * const warp_params,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1235 bool Tile::BlockWarpProcess(const Block& block, const Plane plane,
1236 const int index, const int block_start_x,
1237 const int block_start_y, const int width,
1238 const int height, GlobalMotion* const warp_params,
1239 const bool is_compound, const bool is_inter_intra,
1240 uint8_t* const dest, const ptrdiff_t dest_stride) {
1241 assert(width >= 8 && height >= 8);
1242 const BlockParameters& bp = *block.bp;
1243 const int reference_frame_index =
1244 frame_header_.reference_frame_index[bp.reference_frame[index] -
1245 kReferenceFrameLast];
1246 const uint8_t* const source =
1247 reference_frames_[reference_frame_index]->buffer()->data(plane);
1248 ptrdiff_t source_stride =
1249 reference_frames_[reference_frame_index]->buffer()->stride(plane);
1250 const int source_width =
1251 reference_frames_[reference_frame_index]->buffer()->width(plane);
1252 const int source_height =
1253 reference_frames_[reference_frame_index]->buffer()->height(plane);
1254 uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index];
1255
1256 // In frame parallel mode, ensure that the reference block has been decoded
1257 // and available for referencing.
1258 if (frame_parallel_) {
1259 int reference_y_max = -1;
1260 // Find out the maximum y-coordinate for warping.
1261 for (int start_y = block_start_y; start_y < block_start_y + height;
1262 start_y += 8) {
1263 for (int start_x = block_start_x; start_x < block_start_x + width;
1264 start_x += 8) {
1265 const int src_x = (start_x + 4) << subsampling_x_[plane];
1266 const int src_y = (start_y + 4) << subsampling_y_[plane];
1267 const int dst_y = src_x * warp_params->params[4] +
1268 src_y * warp_params->params[5] +
1269 warp_params->params[1];
1270 const int y4 = dst_y >> subsampling_y_[plane];
1271 const int iy4 = y4 >> kWarpedModelPrecisionBits;
1272 reference_y_max = std::max(iy4 + 8, reference_y_max);
1273 }
1274 }
1275 // For U and V planes with subsampling, we need to multiply reference_y_max
1276 // by 2 since we only track the progress of Y planes.
1277 reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]);
1278 if (!reference_frames_[reference_frame_index]->WaitUntil(reference_y_max)) {
1279 return false;
1280 }
1281 }
1282 if (is_compound) {
1283 dsp_.warp_compound(source, source_stride, source_width, source_height,
1284 warp_params->params, subsampling_x_[plane],
1285 subsampling_y_[plane], block_start_x, block_start_y,
1286 width, height, warp_params->alpha, warp_params->beta,
1287 warp_params->gamma, warp_params->delta, prediction,
1288 /*prediction_stride=*/width);
1289 } else {
1290 void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest;
1291 ptrdiff_t output_stride =
1292 is_inter_intra ? /*prediction_stride=*/width : dest_stride;
1293 #if LIBGAV1_MAX_BITDEPTH >= 10
1294 // |is_inter_intra| calculations are written to the |prediction| buffer.
1295 // Unlike the |is_compound| calculations the output is Pixel and not
1296 // uint16_t. warp_clip() expects |output_stride| to be in bytes and not
1297 // Pixels. |prediction_stride| is in units of uint16_t. Adjust
1298 // |output_stride| to account for this.
1299 if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1300 output_stride *= 2;
1301 }
1302 #endif
1303 dsp_.warp(source, source_stride, source_width, source_height,
1304 warp_params->params, subsampling_x_[plane], subsampling_y_[plane],
1305 block_start_x, block_start_y, width, height, warp_params->alpha,
1306 warp_params->beta, warp_params->gamma, warp_params->delta, output,
1307 output_stride);
1308 }
1309 return true;
1310 }
1311
1312 } // namespace libgav1
1313