1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <array>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 #include <memory>
23 
24 #include "src/buffer_pool.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/motion_vector.h"
28 #include "src/obu_parser.h"
29 #include "src/prediction_mask.h"
30 #include "src/tile.h"
31 #include "src/utils/array_2d.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/block_parameters_holder.h"
34 #include "src/utils/common.h"
35 #include "src/utils/constants.h"
36 #include "src/utils/logging.h"
37 #include "src/utils/memory.h"
38 #include "src/utils/types.h"
39 #include "src/warp_prediction.h"
40 #include "src/yuv_buffer.h"
41 
42 namespace libgav1 {
43 namespace {
44 
45 // Import all the constants in the anonymous namespace.
46 #include "src/inter_intra_masks.inc"
47 
48 constexpr int kAngleStep = 3;
49 constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = {
50     0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0};
51 
52 // The following modes need both the left_column and top_row for intra
53 // prediction. For directional modes left/top requirement is inferred based on
54 // the prediction angle. For Dc modes, left/top requirement is inferred based on
55 // whether or not left/top is available.
56 constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth,
57                                       kPredictionModeSmoothHorizontal,
58                                       kPredictionModeSmoothVertical,
59                                       kPredictionModePaeth);
60 
GetDirectionalIntraPredictorDerivative(const int angle)61 int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
62   assert(angle >= 3);
63   assert(angle <= 87);
64   return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
65 }
66 
67 // Maps the block_size to an index as follows:
68 //  kBlock8x8 => 0.
69 //  kBlock8x16 => 1.
70 //  kBlock8x32 => 2.
71 //  kBlock16x8 => 3.
72 //  kBlock16x16 => 4.
73 //  kBlock16x32 => 5.
74 //  kBlock32x8 => 6.
75 //  kBlock32x16 => 7.
76 //  kBlock32x32 => 8.
GetWedgeBlockSizeIndex(BlockSize block_size)77 int GetWedgeBlockSizeIndex(BlockSize block_size) {
78   assert(block_size >= kBlock8x8);
79   return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) -
80          static_cast<int>(block_size >= kBlock32x8);
81 }
82 
83 // Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively.
GetInterIntraMaskLookupIndex(int dimension)84 int GetInterIntraMaskLookupIndex(int dimension) {
85   assert(dimension == 4 || dimension == 8 || dimension == 16 ||
86          dimension == 32);
87   return FloorLog2(dimension) - 2;
88 }
89 
90 // 7.11.2.9.
GetIntraEdgeFilterStrength(int width,int height,int filter_type,int delta)91 int GetIntraEdgeFilterStrength(int width, int height, int filter_type,
92                                int delta) {
93   const int sum = width + height;
94   delta = std::abs(delta);
95   if (filter_type == 0) {
96     if (sum <= 8) {
97       if (delta >= 56) return 1;
98     } else if (sum <= 16) {
99       if (delta >= 40) return 1;
100     } else if (sum <= 24) {
101       if (delta >= 32) return 3;
102       if (delta >= 16) return 2;
103       if (delta >= 8) return 1;
104     } else if (sum <= 32) {
105       if (delta >= 32) return 3;
106       if (delta >= 4) return 2;
107       return 1;
108     } else {
109       return 3;
110     }
111   } else {
112     if (sum <= 8) {
113       if (delta >= 64) return 2;
114       if (delta >= 40) return 1;
115     } else if (sum <= 16) {
116       if (delta >= 48) return 2;
117       if (delta >= 20) return 1;
118     } else if (sum <= 24) {
119       if (delta >= 4) return 3;
120     } else {
121       return 3;
122     }
123   }
124   return 0;
125 }
126 
127 // 7.11.2.10.
DoIntraEdgeUpsampling(int width,int height,int filter_type,int delta)128 bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) {
129   const int sum = width + height;
130   delta = std::abs(delta);
131   // This function should not be called when the prediction angle is 90 or 180.
132   assert(delta != 0);
133   if (delta >= 40) return false;
134   return (filter_type == 1) ? sum <= 8 : sum <= 16;
135 }
136 
137 constexpr uint8_t kQuantizedDistanceWeight[4][2] = {
138     {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}};
139 
140 constexpr uint8_t kQuantizedDistanceLookup[4][2] = {
141     {9, 7}, {11, 5}, {12, 4}, {13, 3}};
142 
GetDistanceWeights(const int distance[2],int weight[2])143 void GetDistanceWeights(const int distance[2], int weight[2]) {
144   // Note: distance[0] and distance[1] correspond to relative distance
145   // between current frame and reference frame [1] and [0], respectively.
146   const int order = static_cast<int>(distance[0] <= distance[1]);
147   if (distance[0] == 0 || distance[1] == 0) {
148     weight[0] = kQuantizedDistanceLookup[3][order];
149     weight[1] = kQuantizedDistanceLookup[3][1 - order];
150   } else {
151     int i;
152     for (i = 0; i < 3; ++i) {
153       const int weight_0 = kQuantizedDistanceWeight[i][order];
154       const int weight_1 = kQuantizedDistanceWeight[i][1 - order];
155       if (order == 0) {
156         if (distance[0] * weight_0 < distance[1] * weight_1) break;
157       } else {
158         if (distance[0] * weight_0 > distance[1] * weight_1) break;
159       }
160     }
161     weight[0] = kQuantizedDistanceLookup[i][order];
162     weight[1] = kQuantizedDistanceLookup[i][1 - order];
163   }
164 }
165 
GetIntraPredictor(PredictionMode mode,bool has_left,bool has_top)166 dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left,
167                                       bool has_top) {
168   if (mode == kPredictionModeDc) {
169     if (has_left && has_top) {
170       return dsp::kIntraPredictorDc;
171     }
172     if (has_left) {
173       return dsp::kIntraPredictorDcLeft;
174     }
175     if (has_top) {
176       return dsp::kIntraPredictorDcTop;
177     }
178     return dsp::kIntraPredictorDcFill;
179   }
180   switch (mode) {
181     case kPredictionModePaeth:
182       return dsp::kIntraPredictorPaeth;
183     case kPredictionModeSmooth:
184       return dsp::kIntraPredictorSmooth;
185     case kPredictionModeSmoothVertical:
186       return dsp::kIntraPredictorSmoothVertical;
187     case kPredictionModeSmoothHorizontal:
188       return dsp::kIntraPredictorSmoothHorizontal;
189     default:
190       return dsp::kNumIntraPredictors;
191   }
192 }
193 
GetStartPoint(Array2DView<uint8_t> * const buffer,const int plane,const int x,const int y,const int bitdepth)194 uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane,
195                        const int x, const int y, const int bitdepth) {
196 #if LIBGAV1_MAX_BITDEPTH >= 10
197   if (bitdepth > 8) {
198     Array2DView<uint16_t> buffer16(
199         buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t),
200         reinterpret_cast<uint16_t*>(&buffer[plane][0][0]));
201     return reinterpret_cast<uint8_t*>(&buffer16[y][x]);
202   }
203 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
204   static_cast<void>(bitdepth);
205   return &buffer[plane][y][x];
206 }
207 
GetPixelPositionFromHighScale(int start,int step,int offset)208 int GetPixelPositionFromHighScale(int start, int step, int offset) {
209   return (start + step * offset) >> kScaleSubPixelBits;
210 }
211 
GetMaskBlendFunc(const dsp::Dsp & dsp,bool is_inter_intra,bool is_wedge_inter_intra,int subsampling_x,int subsampling_y)212 dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra,
213                                     bool is_wedge_inter_intra,
214                                     int subsampling_x, int subsampling_y) {
215   return (is_inter_intra && !is_wedge_inter_intra)
216              ? dsp.mask_blend[0][/*is_inter_intra=*/true]
217              : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra];
218 }
219 
220 }  // namespace
221 
222 template <typename Pixel>
IntraPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool has_top_right,bool has_bottom_left,PredictionMode mode,TransformSize tx_size)223 void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
224                            bool has_left, bool has_top, bool has_top_right,
225                            bool has_bottom_left, PredictionMode mode,
226                            TransformSize tx_size) {
227   const int width = 1 << kTransformWidthLog2[tx_size];
228   const int height = 1 << kTransformHeightLog2[tx_size];
229   const int x_shift = subsampling_x_[plane];
230   const int y_shift = subsampling_y_[plane];
231   const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
232   const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1;
233   // For performance reasons, do not initialize the following two buffers.
234   alignas(kMaxAlignment) Pixel top_row_data[160];
235   alignas(kMaxAlignment) Pixel left_column_data[160];
236 #if LIBGAV1_MSAN
237   if (IsDirectionalMode(mode)) {
238     memset(top_row_data, 0, sizeof(top_row_data));
239     memset(left_column_data, 0, sizeof(left_column_data));
240   }
241 #endif
242   // Some predictors use |top_row_data| and |left_column_data| with a negative
243   // offset to access pixels to the top-left of the current block. So have some
244   // space before the arrays to allow populating those without having to move
245   // the rest of the array.
246   Pixel* const top_row = top_row_data + 16;
247   Pixel* const left_column = left_column_data + 16;
248   const int bitdepth = sequence_header_.color_config.bitdepth;
249   const int top_and_left_size = width + height;
250   const bool is_directional_mode = IsDirectionalMode(mode);
251   const PredictionParameters& prediction_parameters =
252       *block.bp->prediction_parameters;
253   const bool use_filter_intra =
254       (plane == kPlaneY && prediction_parameters.use_filter_intra);
255   const int prediction_angle =
256       is_directional_mode
257           ? kPredictionModeToAngle[mode] +
258                 prediction_parameters.angle_delta[GetPlaneType(plane)] *
259                     kAngleStep
260           : 0;
261   // Directional prediction requires buffers larger than the width or height.
262   const int top_size = is_directional_mode ? top_and_left_size : width;
263   const int left_size = is_directional_mode ? top_and_left_size : height;
264   const int top_right_size =
265       is_directional_mode ? (has_top_right ? 2 : 1) * width : width;
266   const int bottom_left_size =
267       is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height;
268 
269   Array2DView<Pixel> buffer(buffer_[plane].rows(),
270                             buffer_[plane].columns() / sizeof(Pixel),
271                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
272   const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
273                          (is_directional_mode && prediction_angle < 180) ||
274                          (mode == kPredictionModeDc && has_top);
275   const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
276                           (is_directional_mode && prediction_angle > 90) ||
277                           (mode == kPredictionModeDc && has_left);
278 
279   const Pixel* top_row_src = buffer[y - 1];
280 
281   // Determine if we need to retrieve the top row from
282   // |intra_prediction_buffer_|.
283   if ((needs_top || needs_left) && use_intra_prediction_buffer_) {
284     // Superblock index of block.row4x4. block.row4x4 is always in luma
285     // dimension (no subsampling).
286     const int current_superblock_index =
287         block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4);
288     // Superblock index of y - 1. y is in the plane dimension (chroma planes
289     // could be subsampled).
290     const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) -
291                             subsampling_y_[plane];
292     const int top_row_superblock_index = (y - 1) >> plane_shift;
293     // If the superblock index of y - 1 is not that of the current superblock,
294     // then we will have to retrieve the top row from the
295     // |intra_prediction_buffer_|.
296     if (current_superblock_index != top_row_superblock_index) {
297       top_row_src = reinterpret_cast<const Pixel*>(
298           (*intra_prediction_buffer_)[plane].get());
299     }
300   }
301 
302   if (needs_top) {
303     // Compute top_row.
304     if (has_top || has_left) {
305       const int left_index = has_left ? x - 1 : x;
306       top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index];
307     } else {
308       top_row[-1] = 1 << (bitdepth - 1);
309     }
310     if (!has_top && has_left) {
311       Memset(top_row, buffer[y][x - 1], top_size);
312     } else if (!has_top && !has_left) {
313       Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size);
314     } else {
315       const int top_limit = std::min(max_x - x + 1, top_right_size);
316       memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel));
317       // Even though it is safe to call Memset with a size of 0, accessing
318       // top_row_src[top_limit - x + 1] is not allowed when this condition is
319       // false.
320       if (top_size - top_limit > 0) {
321         Memset(top_row + top_limit, top_row_src[top_limit + x - 1],
322                top_size - top_limit);
323       }
324     }
325   }
326   if (needs_left) {
327     // Compute left_column.
328     if (has_top || has_left) {
329       const int left_index = has_left ? x - 1 : x;
330       left_column[-1] =
331           has_top ? top_row_src[left_index] : buffer[y][left_index];
332     } else {
333       left_column[-1] = 1 << (bitdepth - 1);
334     }
335     if (!has_left && has_top) {
336       Memset(left_column, top_row_src[x], left_size);
337     } else if (!has_left && !has_top) {
338       Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size);
339     } else {
340       const int left_limit = std::min(max_y - y + 1, bottom_left_size);
341       for (int i = 0; i < left_limit; ++i) {
342         left_column[i] = buffer[y + i][x - 1];
343       }
344       // Even though it is safe to call Memset with a size of 0, accessing
345       // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is
346       // false.
347       if (left_size - left_limit > 0) {
348         Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1],
349                left_size - left_limit);
350       }
351     }
352   }
353   Pixel* const dest = &buffer[y][x];
354   const ptrdiff_t dest_stride = buffer_[plane].columns();
355   if (use_filter_intra) {
356     dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column,
357                                 prediction_parameters.filter_intra_mode, width,
358                                 height);
359   } else if (is_directional_mode) {
360     DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left,
361                           needs_top, prediction_angle, width, height, max_x,
362                           max_y, tx_size, top_row, left_column);
363   } else {
364     const dsp::IntraPredictor predictor =
365         GetIntraPredictor(mode, has_left, has_top);
366     assert(predictor != dsp::kNumIntraPredictors);
367     dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row,
368                                               left_column);
369   }
370 }
371 
372 template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane,
373                                              int x, int y, bool has_left,
374                                              bool has_top, bool has_top_right,
375                                              bool has_bottom_left,
376                                              PredictionMode mode,
377                                              TransformSize tx_size);
378 #if LIBGAV1_MAX_BITDEPTH >= 10
379 template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
380                                               int x, int y, bool has_left,
381                                               bool has_top, bool has_top_right,
382                                               bool has_bottom_left,
383                                               PredictionMode mode,
384                                               TransformSize tx_size);
385 #endif
386 
387 constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth,
388                                                kPredictionModeSmoothHorizontal,
389                                                kPredictionModeSmoothVertical);
390 
IsSmoothPrediction(int row,int column,Plane plane) const391 bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const {
392   const BlockParameters& bp = *block_parameters_holder_.Find(row, column);
393   PredictionMode mode;
394   if (plane == kPlaneY) {
395     mode = bp.y_mode;
396   } else {
397     if (bp.reference_frame[0] > kReferenceFrameIntra) return false;
398     mode = bp.uv_mode;
399   }
400   return kPredictionModeSmoothMask.Contains(mode);
401 }
402 
GetIntraEdgeFilterType(const Block & block,Plane plane) const403 int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
404   const int subsampling_x = subsampling_x_[plane];
405   const int subsampling_y = subsampling_y_[plane];
406   if (block.top_available[plane]) {
407     const int row =
408         block.row4x4 - 1 -
409         static_cast<int>(subsampling_y != 0 && (block.row4x4 & 1) != 0);
410     const int column =
411         block.column4x4 +
412         static_cast<int>(subsampling_x != 0 && (block.column4x4 & 1) == 0);
413     if (IsSmoothPrediction(row, column, plane)) return 1;
414   }
415   if (block.left_available[plane]) {
416     const int row = block.row4x4 + static_cast<int>(subsampling_y != 0 &&
417                                                     (block.row4x4 & 1) == 0);
418     const int column =
419         block.column4x4 - 1 -
420         static_cast<int>(subsampling_x != 0 && (block.column4x4 & 1) != 0);
421     if (IsSmoothPrediction(row, column, plane)) return 1;
422   }
423   return 0;
424 }
425 
426 template <typename Pixel>
DirectionalPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool needs_left,bool needs_top,int prediction_angle,int width,int height,int max_x,int max_y,TransformSize tx_size,Pixel * const top_row,Pixel * const left_column)427 void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y,
428                                  bool has_left, bool has_top, bool needs_left,
429                                  bool needs_top, int prediction_angle,
430                                  int width, int height, int max_x, int max_y,
431                                  TransformSize tx_size, Pixel* const top_row,
432                                  Pixel* const left_column) {
433   Array2DView<Pixel> buffer(buffer_[plane].rows(),
434                             buffer_[plane].columns() / sizeof(Pixel),
435                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
436   Pixel* const dest = &buffer[y][x];
437   const ptrdiff_t stride = buffer_[plane].columns();
438   if (prediction_angle == 90) {
439     dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical](
440         dest, stride, top_row, left_column);
441     return;
442   }
443   if (prediction_angle == 180) {
444     dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal](
445         dest, stride, top_row, left_column);
446     return;
447   }
448 
449   bool upsampled_top = false;
450   bool upsampled_left = false;
451   if (sequence_header_.enable_intra_edge_filter) {
452     const int filter_type = GetIntraEdgeFilterType(block, plane);
453     if (prediction_angle > 90 && prediction_angle < 180 &&
454         (width + height) >= 24) {
455       // 7.11.2.7.
456       left_column[-1] = top_row[-1] = RightShiftWithRounding(
457           left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4);
458     }
459     if (has_top && needs_top) {
460       const int strength = GetIntraEdgeFilterStrength(
461           width, height, filter_type, prediction_angle - 90);
462       if (strength > 0) {
463         const int num_pixels = std::min(width, max_x - x + 1) +
464                                ((prediction_angle < 90) ? height : 0) + 1;
465         dsp_.intra_edge_filter(top_row - 1, num_pixels, strength);
466       }
467     }
468     if (has_left && needs_left) {
469       const int strength = GetIntraEdgeFilterStrength(
470           width, height, filter_type, prediction_angle - 180);
471       if (strength > 0) {
472         const int num_pixels = std::min(height, max_y - y + 1) +
473                                ((prediction_angle > 180) ? width : 0) + 1;
474         dsp_.intra_edge_filter(left_column - 1, num_pixels, strength);
475       }
476     }
477     upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type,
478                                           prediction_angle - 90);
479     if (upsampled_top && needs_top) {
480       const int num_pixels = width + ((prediction_angle < 90) ? height : 0);
481       dsp_.intra_edge_upsampler(top_row, num_pixels);
482     }
483     upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type,
484                                            prediction_angle - 180);
485     if (upsampled_left && needs_left) {
486       const int num_pixels = height + ((prediction_angle > 180) ? width : 0);
487       dsp_.intra_edge_upsampler(left_column, num_pixels);
488     }
489   }
490 
491   if (prediction_angle < 90) {
492     const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle);
493     dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height,
494                                            dx, upsampled_top);
495   } else if (prediction_angle < 180) {
496     const int dx =
497         GetDirectionalIntraPredictorDerivative(180 - prediction_angle);
498     const int dy =
499         GetDirectionalIntraPredictorDerivative(prediction_angle - 90);
500     dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column,
501                                            width, height, dx, dy, upsampled_top,
502                                            upsampled_left);
503   } else {
504     assert(prediction_angle < 270);
505     const int dy =
506         GetDirectionalIntraPredictorDerivative(270 - prediction_angle);
507     dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width,
508                                            height, dy, upsampled_left);
509   }
510 }
511 
512 template <typename Pixel>
PalettePrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const int x,const int y,const TransformSize tx_size)513 void Tile::PalettePrediction(const Block& block, const Plane plane,
514                              const int start_x, const int start_y, const int x,
515                              const int y, const TransformSize tx_size) {
516   const int tx_width = kTransformWidth[tx_size];
517   const int tx_height = kTransformHeight[tx_size];
518   const uint16_t* const palette = block.bp->palette_mode_info.color[plane];
519   const PlaneType plane_type = GetPlaneType(plane);
520   const int x4 = MultiplyBy4(x);
521   const int y4 = MultiplyBy4(y);
522   Array2DView<Pixel> buffer(buffer_[plane].rows(),
523                             buffer_[plane].columns() / sizeof(Pixel),
524                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
525   for (int row = 0; row < tx_height; ++row) {
526     assert(block.bp->prediction_parameters
527                ->color_index_map[plane_type][y4 + row] != nullptr);
528     for (int column = 0; column < tx_width; ++column) {
529       buffer[start_y + row][start_x + column] =
530           palette[block.bp->prediction_parameters
531                       ->color_index_map[plane_type][y4 + row][x4 + column]];
532     }
533   }
534 }
535 
536 template void Tile::PalettePrediction<uint8_t>(
537     const Block& block, const Plane plane, const int start_x, const int start_y,
538     const int x, const int y, const TransformSize tx_size);
539 #if LIBGAV1_MAX_BITDEPTH >= 10
540 template void Tile::PalettePrediction<uint16_t>(
541     const Block& block, const Plane plane, const int start_x, const int start_y,
542     const int x, const int y, const TransformSize tx_size);
543 #endif
544 
545 template <typename Pixel>
ChromaFromLumaPrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const TransformSize tx_size)546 void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane,
547                                     const int start_x, const int start_y,
548                                     const TransformSize tx_size) {
549   const int subsampling_x = subsampling_x_[plane];
550   const int subsampling_y = subsampling_y_[plane];
551   const PredictionParameters& prediction_parameters =
552       *block.bp->prediction_parameters;
553   Array2DView<Pixel> y_buffer(
554       buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel),
555       reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0]));
556   if (!block.scratch_buffer->cfl_luma_buffer_valid) {
557     const int luma_x = start_x << subsampling_x;
558     const int luma_y = start_y << subsampling_y;
559     dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y](
560         block.scratch_buffer->cfl_luma_buffer,
561         prediction_parameters.max_luma_width - luma_x,
562         prediction_parameters.max_luma_height - luma_y,
563         reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]),
564         buffer_[kPlaneY].columns());
565     block.scratch_buffer->cfl_luma_buffer_valid = true;
566   }
567   Array2DView<Pixel> buffer(buffer_[plane].rows(),
568                             buffer_[plane].columns() / sizeof(Pixel),
569                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
570   dsp_.cfl_intra_predictors[tx_size](
571       reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]),
572       buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer,
573       (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u
574                          : prediction_parameters.cfl_alpha_v);
575 }
576 
577 template void Tile::ChromaFromLumaPrediction<uint8_t>(
578     const Block& block, const Plane plane, const int start_x, const int start_y,
579     const TransformSize tx_size);
580 #if LIBGAV1_MAX_BITDEPTH >= 10
581 template void Tile::ChromaFromLumaPrediction<uint16_t>(
582     const Block& block, const Plane plane, const int start_x, const int start_y,
583     const TransformSize tx_size);
584 #endif
585 
InterIntraPrediction(uint16_t * const prediction_0,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const PredictionParameters & prediction_parameters,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,uint8_t * const dest,const ptrdiff_t dest_stride)586 void Tile::InterIntraPrediction(
587     uint16_t* const prediction_0, const uint8_t* const prediction_mask,
588     const ptrdiff_t prediction_mask_stride,
589     const PredictionParameters& prediction_parameters,
590     const int prediction_width, const int prediction_height,
591     const int subsampling_x, const int subsampling_y, uint8_t* const dest,
592     const ptrdiff_t dest_stride) {
593   assert(prediction_mask != nullptr);
594   assert(prediction_parameters.compound_prediction_type ==
595              kCompoundPredictionTypeIntra ||
596          prediction_parameters.compound_prediction_type ==
597              kCompoundPredictionTypeWedge);
598   // The first buffer of InterIntra is from inter prediction.
599   // The second buffer is from intra prediction.
600 #if LIBGAV1_MAX_BITDEPTH >= 10
601   if (sequence_header_.color_config.bitdepth > 8) {
602     GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true,
603                      prediction_parameters.is_wedge_inter_intra, subsampling_x,
604                      subsampling_y)(
605         prediction_0, reinterpret_cast<uint16_t*>(dest),
606         dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride,
607         prediction_width, prediction_height, dest, dest_stride);
608     return;
609   }
610 #endif
611   const int function_index = prediction_parameters.is_wedge_inter_intra
612                                  ? subsampling_x + subsampling_y
613                                  : 0;
614   // |is_inter_intra| prediction values are stored in a Pixel buffer but it is
615   // currently declared as a uint16_t buffer.
616   // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and
617   // remove the reinterpret_cast.
618   dsp_.inter_intra_mask_blend_8bpp[function_index](
619       reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride,
620       prediction_mask, prediction_mask_stride, prediction_width,
621       prediction_height);
622 }
623 
CompoundInterPrediction(const Block & block,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,const int candidate_row,const int candidate_column,uint8_t * dest,const ptrdiff_t dest_stride)624 void Tile::CompoundInterPrediction(
625     const Block& block, const uint8_t* const prediction_mask,
626     const ptrdiff_t prediction_mask_stride, const int prediction_width,
627     const int prediction_height, const int subsampling_x,
628     const int subsampling_y, const int candidate_row,
629     const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) {
630   const PredictionParameters& prediction_parameters =
631       *block.bp->prediction_parameters;
632 
633   void* prediction[2];
634 #if LIBGAV1_MAX_BITDEPTH >= 10
635   const int bitdepth = sequence_header_.color_config.bitdepth;
636   if (bitdepth > 8) {
637     prediction[0] = block.scratch_buffer->prediction_buffer[0];
638     prediction[1] = block.scratch_buffer->prediction_buffer[1];
639   } else {
640 #endif
641     prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0];
642     prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1];
643 #if LIBGAV1_MAX_BITDEPTH >= 10
644   }
645 #endif
646 
647   switch (prediction_parameters.compound_prediction_type) {
648     case kCompoundPredictionTypeWedge:
649     case kCompoundPredictionTypeDiffWeighted:
650       GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false,
651                        prediction_parameters.is_wedge_inter_intra,
652                        subsampling_x, subsampling_y)(
653           prediction[0], prediction[1],
654           /*prediction_stride=*/prediction_width, prediction_mask,
655           prediction_mask_stride, prediction_width, prediction_height, dest,
656           dest_stride);
657       break;
658     case kCompoundPredictionTypeDistance:
659       DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width,
660                                  prediction_height, candidate_row,
661                                  candidate_column, dest, dest_stride);
662       break;
663     default:
664       assert(prediction_parameters.compound_prediction_type ==
665              kCompoundPredictionTypeAverage);
666       dsp_.average_blend(prediction[0], prediction[1], prediction_width,
667                          prediction_height, dest, dest_stride);
668       break;
669   }
670 }
671 
GetWarpParams(const Block & block,const Plane plane,const int prediction_width,const int prediction_height,const PredictionParameters & prediction_parameters,const ReferenceFrameType reference_type,bool * const is_local_valid,GlobalMotion * const global_motion_params,GlobalMotion * const local_warp_params) const672 GlobalMotion* Tile::GetWarpParams(
673     const Block& block, const Plane plane, const int prediction_width,
674     const int prediction_height,
675     const PredictionParameters& prediction_parameters,
676     const ReferenceFrameType reference_type, bool* const is_local_valid,
677     GlobalMotion* const global_motion_params,
678     GlobalMotion* const local_warp_params) const {
679   if (prediction_width < 8 || prediction_height < 8 ||
680       frame_header_.force_integer_mv == 1) {
681     return nullptr;
682   }
683   if (plane == kPlaneY) {
684     *is_local_valid =
685         prediction_parameters.motion_mode == kMotionModeLocalWarp &&
686         WarpEstimation(
687             prediction_parameters.num_warp_samples, DivideBy4(prediction_width),
688             DivideBy4(prediction_height), block.row4x4, block.column4x4,
689             block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates,
690             local_warp_params) &&
691         SetupShear(local_warp_params);
692   }
693   if (prediction_parameters.motion_mode == kMotionModeLocalWarp &&
694       *is_local_valid) {
695     return local_warp_params;
696   }
697   if (!IsScaled(reference_type)) {
698     GlobalMotionTransformationType global_motion_type =
699         (reference_type != kReferenceFrameIntra)
700             ? global_motion_params->type
701             : kNumGlobalMotionTransformationTypes;
702     const bool is_global_valid =
703         IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) &&
704         SetupShear(global_motion_params);
705     // Valid global motion type implies reference type can't be intra.
706     assert(!is_global_valid || reference_type != kReferenceFrameIntra);
707     if (is_global_valid) return global_motion_params;
708   }
709   return nullptr;
710 }
711 
InterPrediction(const Block & block,const Plane plane,const int x,const int y,const int prediction_width,const int prediction_height,int candidate_row,int candidate_column,bool * const is_local_valid,GlobalMotion * const local_warp_params)712 bool Tile::InterPrediction(const Block& block, const Plane plane, const int x,
713                            const int y, const int prediction_width,
714                            const int prediction_height, int candidate_row,
715                            int candidate_column, bool* const is_local_valid,
716                            GlobalMotion* const local_warp_params) {
717   const int bitdepth = sequence_header_.color_config.bitdepth;
718   const BlockParameters& bp = *block.bp;
719   const BlockParameters& bp_reference =
720       *block_parameters_holder_.Find(candidate_row, candidate_column);
721   const bool is_compound =
722       bp_reference.reference_frame[1] > kReferenceFrameIntra;
723   assert(bp.is_inter);
724   const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra;
725 
726   const PredictionParameters& prediction_parameters =
727       *block.bp->prediction_parameters;
728   uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth);
729   const ptrdiff_t dest_stride = buffer_[plane].columns();  // In bytes.
730   for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) {
731     const ReferenceFrameType reference_type =
732         bp_reference.reference_frame[index];
733     GlobalMotion global_motion_params =
734         frame_header_.global_motion[reference_type];
735     GlobalMotion* warp_params =
736         GetWarpParams(block, plane, prediction_width, prediction_height,
737                       prediction_parameters, reference_type, is_local_valid,
738                       &global_motion_params, local_warp_params);
739     if (warp_params != nullptr) {
740       if (!BlockWarpProcess(block, plane, index, x, y, prediction_width,
741                             prediction_height, warp_params, is_compound,
742                             is_inter_intra, dest, dest_stride)) {
743         return false;
744       }
745     } else {
746       const int reference_index =
747           prediction_parameters.use_intra_block_copy
748               ? -1
749               : frame_header_.reference_frame_index[reference_type -
750                                                     kReferenceFrameLast];
751       if (!BlockInterPrediction(
752               block, plane, reference_index, bp_reference.mv.mv[index], x, y,
753               prediction_width, prediction_height, candidate_row,
754               candidate_column, block.scratch_buffer->prediction_buffer[index],
755               is_compound, is_inter_intra, dest, dest_stride)) {
756         return false;
757       }
758     }
759   }
760 
761   const int subsampling_x = subsampling_x_[plane];
762   const int subsampling_y = subsampling_y_[plane];
763   ptrdiff_t prediction_mask_stride = 0;
764   const uint8_t* prediction_mask = nullptr;
765   if (prediction_parameters.compound_prediction_type ==
766       kCompoundPredictionTypeWedge) {
767     const Array2D<uint8_t>& wedge_mask =
768         wedge_masks_[GetWedgeBlockSizeIndex(block.size)]
769                     [prediction_parameters.wedge_sign]
770                     [prediction_parameters.wedge_index];
771     prediction_mask = wedge_mask[0];
772     prediction_mask_stride = wedge_mask.columns();
773   } else if (prediction_parameters.compound_prediction_type ==
774              kCompoundPredictionTypeIntra) {
775     // 7.11.3.13. The inter intra masks are precomputed and stored as a set of
776     // look up tables.
777     assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes);
778     prediction_mask =
779         kInterIntraMasks[prediction_parameters.inter_intra_mode]
780                         [GetInterIntraMaskLookupIndex(prediction_width)]
781                         [GetInterIntraMaskLookupIndex(prediction_height)];
782     prediction_mask_stride = prediction_width;
783   } else if (prediction_parameters.compound_prediction_type ==
784              kCompoundPredictionTypeDiffWeighted) {
785     if (plane == kPlaneY) {
786       assert(prediction_width >= 8);
787       assert(prediction_height >= 8);
788       dsp_.weight_mask[FloorLog2(prediction_width) - 3]
789                       [FloorLog2(prediction_height) - 3]
790                       [static_cast<int>(prediction_parameters.mask_is_inverse)](
791                           block.scratch_buffer->prediction_buffer[0],
792                           block.scratch_buffer->prediction_buffer[1],
793                           block.scratch_buffer->weight_mask,
794                           kMaxSuperBlockSizeInPixels);
795     }
796     prediction_mask = block.scratch_buffer->weight_mask;
797     prediction_mask_stride = kMaxSuperBlockSizeInPixels;
798   }
799 
800   if (is_compound) {
801     CompoundInterPrediction(block, prediction_mask, prediction_mask_stride,
802                             prediction_width, prediction_height, subsampling_x,
803                             subsampling_y, candidate_row, candidate_column,
804                             dest, dest_stride);
805   } else if (prediction_parameters.motion_mode == kMotionModeObmc) {
806     // Obmc mode is allowed only for single reference (!is_compound).
807     return ObmcPrediction(block, plane, prediction_width, prediction_height);
808   } else if (is_inter_intra) {
809     // InterIntra and obmc must be mutually exclusive.
810     InterIntraPrediction(
811         block.scratch_buffer->prediction_buffer[0], prediction_mask,
812         prediction_mask_stride, prediction_parameters, prediction_width,
813         prediction_height, subsampling_x, subsampling_y, dest, dest_stride);
814   }
815   return true;
816 }
817 
ObmcBlockPrediction(const Block & block,const MotionVector & mv,const Plane plane,const int reference_frame_index,const int width,const int height,const int x,const int y,const int candidate_row,const int candidate_column,const ObmcDirection blending_direction)818 bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv,
819                                const Plane plane,
820                                const int reference_frame_index, const int width,
821                                const int height, const int x, const int y,
822                                const int candidate_row,
823                                const int candidate_column,
824                                const ObmcDirection blending_direction) {
825   const int bitdepth = sequence_header_.color_config.bitdepth;
826   // Obmc's prediction needs to be clipped before blending with above/left
827   // prediction blocks.
828   // Obmc prediction is used only when is_compound is false. So it is safe to
829   // use prediction_buffer[1] as a temporary buffer for the Obmc prediction.
830   static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >=
831                     64 * 64 * sizeof(uint16_t),
832                 "");
833   auto* const obmc_buffer =
834       reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]);
835   const ptrdiff_t obmc_buffer_stride =
836       (bitdepth == 8) ? width : width * sizeof(uint16_t);
837   if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y,
838                             width, height, candidate_row, candidate_column,
839                             nullptr, false, false, obmc_buffer,
840                             obmc_buffer_stride)) {
841     return false;
842   }
843 
844   uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth);
845   const ptrdiff_t prediction_stride = buffer_[plane].columns();
846   dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width,
847                                       height, obmc_buffer, obmc_buffer_stride);
848   return true;
849 }
850 
ObmcPrediction(const Block & block,const Plane plane,const int width,const int height)851 bool Tile::ObmcPrediction(const Block& block, const Plane plane,
852                           const int width, const int height) {
853   const int subsampling_x = subsampling_x_[plane];
854   const int subsampling_y = subsampling_y_[plane];
855   if (block.top_available[kPlaneY] &&
856       !IsBlockSmallerThan8x8(block.residual_size[plane])) {
857     const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]);
858     const int column4x4_max =
859         std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
860     const int candidate_row = block.row4x4 - 1;
861     const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y;
862     int column4x4 = block.column4x4;
863     const int prediction_height = std::min(height >> 1, 32 >> subsampling_y);
864     for (int i = 0, step; i < num_limit && column4x4 < column4x4_max;
865          column4x4 += step) {
866       const int candidate_column = column4x4 | 1;
867       const BlockParameters& bp_top =
868           *block_parameters_holder_.Find(candidate_row, candidate_column);
869       const int candidate_block_size = bp_top.size;
870       step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16);
871       if (bp_top.reference_frame[0] > kReferenceFrameIntra) {
872         i++;
873         const int candidate_reference_frame_index =
874             frame_header_.reference_frame_index[bp_top.reference_frame[0] -
875                                                 kReferenceFrameLast];
876         const int prediction_width =
877             std::min(width, MultiplyBy4(step) >> subsampling_x);
878         if (!ObmcBlockPrediction(
879                 block, bp_top.mv.mv[0], plane, candidate_reference_frame_index,
880                 prediction_width, prediction_height,
881                 MultiplyBy4(column4x4) >> subsampling_x, block_start_y,
882                 candidate_row, candidate_column, kObmcDirectionVertical)) {
883           return false;
884         }
885       }
886     }
887   }
888 
889   if (block.left_available[kPlaneY]) {
890     const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]);
891     const int row4x4_max =
892         std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
893     const int candidate_column = block.column4x4 - 1;
894     int row4x4 = block.row4x4;
895     const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x;
896     const int prediction_width = std::min(width >> 1, 32 >> subsampling_x);
897     for (int i = 0, step; i < num_limit && row4x4 < row4x4_max;
898          row4x4 += step) {
899       const int candidate_row = row4x4 | 1;
900       const BlockParameters& bp_left =
901           *block_parameters_holder_.Find(candidate_row, candidate_column);
902       const int candidate_block_size = bp_left.size;
903       step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16);
904       if (bp_left.reference_frame[0] > kReferenceFrameIntra) {
905         i++;
906         const int candidate_reference_frame_index =
907             frame_header_.reference_frame_index[bp_left.reference_frame[0] -
908                                                 kReferenceFrameLast];
909         const int prediction_height =
910             std::min(height, MultiplyBy4(step) >> subsampling_y);
911         if (!ObmcBlockPrediction(
912                 block, bp_left.mv.mv[0], plane, candidate_reference_frame_index,
913                 prediction_width, prediction_height, block_start_x,
914                 MultiplyBy4(row4x4) >> subsampling_y, candidate_row,
915                 candidate_column, kObmcDirectionHorizontal)) {
916           return false;
917         }
918       }
919     }
920   }
921   return true;
922 }
923 
DistanceWeightedPrediction(void * prediction_0,void * prediction_1,const int width,const int height,const int candidate_row,const int candidate_column,uint8_t * dest,ptrdiff_t dest_stride)924 void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
925                                       const int width, const int height,
926                                       const int candidate_row,
927                                       const int candidate_column, uint8_t* dest,
928                                       ptrdiff_t dest_stride) {
929   int distance[2];
930   int weight[2];
931   for (int reference = 0; reference < 2; ++reference) {
932     const BlockParameters& bp =
933         *block_parameters_holder_.Find(candidate_row, candidate_column);
934     // Note: distance[0] and distance[1] correspond to relative distance
935     // between current frame and reference frame [1] and [0], respectively.
936     distance[1 - reference] = std::min(
937         std::abs(static_cast<int>(
938             current_frame_.reference_info()
939                 ->relative_distance_from[bp.reference_frame[reference]])),
940         static_cast<int>(kMaxFrameDistance));
941   }
942   GetDistanceWeights(distance, weight);
943 
944   dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1],
945                                width, height, dest, dest_stride);
946 }
947 
948 // static.
GetReferenceBlockPosition(const int reference_frame_index,const bool is_scaled,const int width,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int start_x,const int start_y,const int step_x,const int step_y,const int left_border,const int right_border,const int top_border,const int bottom_border,int * ref_block_start_x,int * ref_block_start_y,int * ref_block_end_x)949 bool Tile::GetReferenceBlockPosition(
950     const int reference_frame_index, const bool is_scaled, const int width,
951     const int height, const int ref_start_x, const int ref_last_x,
952     const int ref_start_y, const int ref_last_y, const int start_x,
953     const int start_y, const int step_x, const int step_y,
954     const int left_border, const int right_border, const int top_border,
955     const int bottom_border, int* ref_block_start_x, int* ref_block_start_y,
956     int* ref_block_end_x) {
957   *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0);
958   *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0);
959   if (reference_frame_index == -1) {
960     return false;
961   }
962   *ref_block_start_x -= kConvolveBorderLeftTop;
963   *ref_block_start_y -= kConvolveBorderLeftTop;
964   *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) +
965                      kConvolveBorderRight;
966   int ref_block_end_y =
967       GetPixelPositionFromHighScale(start_y, step_y, height - 1) +
968       kConvolveBorderBottom;
969   if (is_scaled) {
970     const int block_height =
971         (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
972          kScaleSubPixelBits) +
973         kSubPixelTaps;
974     ref_block_end_y = *ref_block_start_y + block_height - 1;
975   }
976   // Determines if we need to extend beyond the left/right/top/bottom border.
977   return *ref_block_start_x < (ref_start_x - left_border) ||
978          *ref_block_end_x > (ref_last_x + right_border) ||
979          *ref_block_start_y < (ref_start_y - top_border) ||
980          ref_block_end_y > (ref_last_y + bottom_border);
981 }
982 
983 // Builds a block as the input for convolve, by copying the content of
984 // reference frame (either a decoded reference frame, or current frame).
985 // |block_extended_width| is the combined width of the block and its borders.
986 template <typename Pixel>
BuildConvolveBlock(const Plane plane,const int reference_frame_index,const bool is_scaled,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int step_y,const int ref_block_start_x,const int ref_block_end_x,const int ref_block_start_y,uint8_t * block_buffer,ptrdiff_t convolve_buffer_stride,ptrdiff_t block_extended_width)987 void Tile::BuildConvolveBlock(
988     const Plane plane, const int reference_frame_index, const bool is_scaled,
989     const int height, const int ref_start_x, const int ref_last_x,
990     const int ref_start_y, const int ref_last_y, const int step_y,
991     const int ref_block_start_x, const int ref_block_end_x,
992     const int ref_block_start_y, uint8_t* block_buffer,
993     ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) {
994   const YuvBuffer* const reference_buffer =
995       (reference_frame_index == -1)
996           ? current_frame_.buffer()
997           : reference_frames_[reference_frame_index]->buffer();
998   Array2DView<const Pixel> reference_block(
999       reference_buffer->height(plane),
1000       reference_buffer->stride(plane) / sizeof(Pixel),
1001       reinterpret_cast<const Pixel*>(reference_buffer->data(plane)));
1002   auto* const block_head = reinterpret_cast<Pixel*>(block_buffer);
1003   convolve_buffer_stride /= sizeof(Pixel);
1004   int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom;
1005   if (is_scaled) {
1006     block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1007                     kScaleSubPixelBits) +
1008                    kSubPixelTaps;
1009   }
1010   const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x);
1011   const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y);
1012   const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x);
1013   const int block_width = copy_end_x - copy_start_x + 1;
1014   const bool extend_left = ref_block_start_x < ref_start_x;
1015   const bool extend_right = ref_block_end_x > ref_last_x;
1016   const bool out_of_left = copy_start_x > ref_block_end_x;
1017   const bool out_of_right = copy_end_x < ref_block_start_x;
1018   if (out_of_left || out_of_right) {
1019     const int ref_x = out_of_left ? copy_start_x : copy_end_x;
1020     Pixel* buf_ptr = block_head;
1021     for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1022       Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width);
1023       if (ref_block_start_y + y >= ref_start_y &&
1024           ref_block_start_y + y < ref_last_y) {
1025         ++ref_y;
1026       }
1027       buf_ptr += convolve_buffer_stride;
1028     }
1029   } else {
1030     Pixel* buf_ptr = block_head;
1031     const int left_width = copy_start_x - ref_block_start_x;
1032     for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1033       if (extend_left) {
1034         Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width);
1035       }
1036       memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x],
1037              block_width * sizeof(Pixel));
1038       if (extend_right) {
1039         Memset(buf_ptr + left_width + block_width,
1040                reference_block[ref_y][copy_end_x],
1041                block_extended_width - left_width - block_width);
1042       }
1043       if (ref_block_start_y + y >= ref_start_y &&
1044           ref_block_start_y + y < ref_last_y) {
1045         ++ref_y;
1046       }
1047       buf_ptr += convolve_buffer_stride;
1048     }
1049   }
1050 }
1051 
BlockInterPrediction(const Block & block,const Plane plane,const int reference_frame_index,const MotionVector & mv,const int x,const int y,const int width,const int height,const int candidate_row,const int candidate_column,uint16_t * const prediction,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1052 bool Tile::BlockInterPrediction(
1053     const Block& block, const Plane plane, const int reference_frame_index,
1054     const MotionVector& mv, const int x, const int y, const int width,
1055     const int height, const int candidate_row, const int candidate_column,
1056     uint16_t* const prediction, const bool is_compound,
1057     const bool is_inter_intra, uint8_t* const dest,
1058     const ptrdiff_t dest_stride) {
1059   const BlockParameters& bp =
1060       *block_parameters_holder_.Find(candidate_row, candidate_column);
1061   int start_x;
1062   int start_y;
1063   int step_x;
1064   int step_y;
1065   ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y,
1066                     &step_x, &step_y);
1067   const int horizontal_filter_index = bp.interpolation_filter[1];
1068   const int vertical_filter_index = bp.interpolation_filter[0];
1069   const int subsampling_x = subsampling_x_[plane];
1070   const int subsampling_y = subsampling_y_[plane];
1071   // reference_frame_index equal to -1 indicates using current frame as
1072   // reference.
1073   const YuvBuffer* const reference_buffer =
1074       (reference_frame_index == -1)
1075           ? current_frame_.buffer()
1076           : reference_frames_[reference_frame_index]->buffer();
1077   const int reference_upscaled_width =
1078       (reference_frame_index == -1)
1079           ? MultiplyBy4(frame_header_.columns4x4)
1080           : reference_frames_[reference_frame_index]->upscaled_width();
1081   const int reference_height =
1082       (reference_frame_index == -1)
1083           ? MultiplyBy4(frame_header_.rows4x4)
1084           : reference_frames_[reference_frame_index]->frame_height();
1085   const int ref_start_x = 0;
1086   const int ref_last_x =
1087       SubsampledValue(reference_upscaled_width, subsampling_x) - 1;
1088   const int ref_start_y = 0;
1089   const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1;
1090 
1091   const bool is_scaled = (reference_frame_index != -1) &&
1092                          (frame_header_.width != reference_upscaled_width ||
1093                           frame_header_.height != reference_height);
1094   const int bitdepth = sequence_header_.color_config.bitdepth;
1095   const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
1096   int ref_block_start_x;
1097   int ref_block_start_y;
1098   int ref_block_end_x;
1099   const bool extend_block = GetReferenceBlockPosition(
1100       reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x,
1101       ref_start_y, ref_last_y, start_x, start_y, step_x, step_y,
1102       reference_buffer->left_border(plane),
1103       reference_buffer->right_border(plane),
1104       reference_buffer->top_border(plane),
1105       reference_buffer->bottom_border(plane), &ref_block_start_x,
1106       &ref_block_start_y, &ref_block_end_x);
1107 
1108   // In frame parallel mode, ensure that the reference block has been decoded
1109   // and available for referencing.
1110   if (reference_frame_index != -1 && frame_parallel_) {
1111     int reference_y_max;
1112     if (is_scaled) {
1113       // TODO(vigneshv): For now, we wait for the entire reference frame to be
1114       // decoded if we are using scaled references. This will eventually be
1115       // fixed.
1116       reference_y_max = reference_height;
1117     } else {
1118       reference_y_max =
1119           std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y);
1120       // For U and V planes with subsampling, we need to multiply
1121       // reference_y_max by 2 since we only track the progress of Y planes.
1122       reference_y_max = LeftShift(reference_y_max, subsampling_y);
1123     }
1124     if (reference_frame_progress_cache_[reference_frame_index] <
1125             reference_y_max &&
1126         !reference_frames_[reference_frame_index]->WaitUntil(
1127             reference_y_max,
1128             &reference_frame_progress_cache_[reference_frame_index])) {
1129       return false;
1130     }
1131   }
1132 
1133   const uint8_t* block_start = nullptr;
1134   ptrdiff_t convolve_buffer_stride;
1135   if (!extend_block) {
1136     const YuvBuffer* const reference_buffer =
1137         (reference_frame_index == -1)
1138             ? current_frame_.buffer()
1139             : reference_frames_[reference_frame_index]->buffer();
1140     convolve_buffer_stride = reference_buffer->stride(plane);
1141     if (reference_frame_index == -1 || is_scaled) {
1142       block_start = reference_buffer->data(plane) +
1143                     ref_block_start_y * reference_buffer->stride(plane) +
1144                     ref_block_start_x * pixel_size;
1145     } else {
1146       block_start = reference_buffer->data(plane) +
1147                     (ref_block_start_y + kConvolveBorderLeftTop) *
1148                         reference_buffer->stride(plane) +
1149                     (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
1150     }
1151   } else {
1152     // The block width can be at most 2 times as much as current
1153     // block's width because of scaling.
1154     auto block_extended_width = Align<ptrdiff_t>(
1155         (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) *
1156             pixel_size,
1157         kMaxAlignment);
1158     convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
1159 #if LIBGAV1_MAX_BITDEPTH >= 10
1160     if (bitdepth > 8) {
1161       BuildConvolveBlock<uint16_t>(
1162           plane, reference_frame_index, is_scaled, height, ref_start_x,
1163           ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1164           ref_block_end_x, ref_block_start_y,
1165           block.scratch_buffer->convolve_block_buffer.get(),
1166           convolve_buffer_stride, block_extended_width);
1167     } else {
1168 #endif
1169       BuildConvolveBlock<uint8_t>(
1170           plane, reference_frame_index, is_scaled, height, ref_start_x,
1171           ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1172           ref_block_end_x, ref_block_start_y,
1173           block.scratch_buffer->convolve_block_buffer.get(),
1174           convolve_buffer_stride, block_extended_width);
1175 #if LIBGAV1_MAX_BITDEPTH >= 10
1176     }
1177 #endif
1178     block_start = block.scratch_buffer->convolve_block_buffer.get() +
1179                   (is_scaled ? 0
1180                              : kConvolveBorderLeftTop * convolve_buffer_stride +
1181                                    kConvolveBorderLeftTop * pixel_size);
1182   }
1183 
1184   void* const output =
1185       (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest);
1186   ptrdiff_t output_stride = (is_compound || is_inter_intra)
1187                                 ? /*prediction_stride=*/width
1188                                 : dest_stride;
1189 #if LIBGAV1_MAX_BITDEPTH >= 10
1190   // |is_inter_intra| calculations are written to the |prediction| buffer.
1191   // Unlike the |is_compound| calculations the output is Pixel and not uint16_t.
1192   // convolve_func() expects |output_stride| to be in bytes and not Pixels.
1193   // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to
1194   // account for this.
1195   if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1196     output_stride *= 2;
1197   }
1198 #endif
1199   assert(output != nullptr);
1200   if (is_scaled) {
1201     dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound];
1202     assert(convolve_func != nullptr);
1203 
1204     convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1205                   vertical_filter_index, start_x, start_y, step_x, step_y,
1206                   width, height, output, output_stride);
1207   } else {
1208     const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask;
1209     const int vertical_filter_id = (start_y >> 6) & kSubPixelMask;
1210 
1211     dsp::ConvolveFunc convolve_func =
1212         dsp_.convolve[reference_frame_index == -1][is_compound]
1213                      [vertical_filter_id != 0][horizontal_filter_id != 0];
1214     assert(convolve_func != nullptr);
1215 
1216     convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1217                   vertical_filter_index, horizontal_filter_id,
1218                   vertical_filter_id, width, height, output, output_stride);
1219   }
1220   return true;
1221 }
1222 
BlockWarpProcess(const Block & block,const Plane plane,const int index,const int block_start_x,const int block_start_y,const int width,const int height,GlobalMotion * const warp_params,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1223 bool Tile::BlockWarpProcess(const Block& block, const Plane plane,
1224                             const int index, const int block_start_x,
1225                             const int block_start_y, const int width,
1226                             const int height, GlobalMotion* const warp_params,
1227                             const bool is_compound, const bool is_inter_intra,
1228                             uint8_t* const dest, const ptrdiff_t dest_stride) {
1229   assert(width >= 8 && height >= 8);
1230   const BlockParameters& bp = *block.bp;
1231   const int reference_frame_index =
1232       frame_header_.reference_frame_index[bp.reference_frame[index] -
1233                                           kReferenceFrameLast];
1234   const uint8_t* const source =
1235       reference_frames_[reference_frame_index]->buffer()->data(plane);
1236   ptrdiff_t source_stride =
1237       reference_frames_[reference_frame_index]->buffer()->stride(plane);
1238   const int source_width =
1239       reference_frames_[reference_frame_index]->buffer()->width(plane);
1240   const int source_height =
1241       reference_frames_[reference_frame_index]->buffer()->height(plane);
1242   uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index];
1243 
1244   // In frame parallel mode, ensure that the reference block has been decoded
1245   // and available for referencing.
1246   if (frame_parallel_) {
1247     int reference_y_max = -1;
1248     // Find out the maximum y-coordinate for warping.
1249     for (int start_y = block_start_y; start_y < block_start_y + height;
1250          start_y += 8) {
1251       for (int start_x = block_start_x; start_x < block_start_x + width;
1252            start_x += 8) {
1253         const int src_x = (start_x + 4) << subsampling_x_[plane];
1254         const int src_y = (start_y + 4) << subsampling_y_[plane];
1255         const int dst_y = src_x * warp_params->params[4] +
1256                           src_y * warp_params->params[5] +
1257                           warp_params->params[1];
1258         const int y4 = dst_y >> subsampling_y_[plane];
1259         const int iy4 = y4 >> kWarpedModelPrecisionBits;
1260         reference_y_max = std::max(iy4 + 8, reference_y_max);
1261       }
1262     }
1263     // For U and V planes with subsampling, we need to multiply reference_y_max
1264     // by 2 since we only track the progress of Y planes.
1265     reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]);
1266     if (reference_frame_progress_cache_[reference_frame_index] <
1267             reference_y_max &&
1268         !reference_frames_[reference_frame_index]->WaitUntil(
1269             reference_y_max,
1270             &reference_frame_progress_cache_[reference_frame_index])) {
1271       return false;
1272     }
1273   }
1274   if (is_compound) {
1275     dsp_.warp_compound(source, source_stride, source_width, source_height,
1276                        warp_params->params, subsampling_x_[plane],
1277                        subsampling_y_[plane], block_start_x, block_start_y,
1278                        width, height, warp_params->alpha, warp_params->beta,
1279                        warp_params->gamma, warp_params->delta, prediction,
1280                        /*prediction_stride=*/width);
1281   } else {
1282     void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest;
1283     ptrdiff_t output_stride =
1284         is_inter_intra ? /*prediction_stride=*/width : dest_stride;
1285 #if LIBGAV1_MAX_BITDEPTH >= 10
1286     // |is_inter_intra| calculations are written to the |prediction| buffer.
1287     // Unlike the |is_compound| calculations the output is Pixel and not
1288     // uint16_t. warp_clip() expects |output_stride| to be in bytes and not
1289     // Pixels. |prediction_stride| is in units of uint16_t. Adjust
1290     // |output_stride| to account for this.
1291     if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1292       output_stride *= 2;
1293     }
1294 #endif
1295     dsp_.warp(source, source_stride, source_width, source_height,
1296               warp_params->params, subsampling_x_[plane], subsampling_y_[plane],
1297               block_start_x, block_start_y, width, height, warp_params->alpha,
1298               warp_params->beta, warp_params->gamma, warp_params->delta, output,
1299               output_stride);
1300   }
1301   return true;
1302 }
1303 
1304 }  // namespace libgav1
1305