1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_TILE_H_
18 #define LIBGAV1_SRC_TILE_H_
19 
20 #include <algorithm>
21 #include <array>
22 #include <cassert>
23 #include <condition_variable>  // NOLINT (unapproved c++11 header)
24 #include <cstddef>
25 #include <cstdint>
26 #include <memory>
27 #include <mutex>  // NOLINT (unapproved c++11 header)
28 #include <vector>
29 
30 #include "src/buffer_pool.h"
31 #include "src/decoder_state.h"
32 #include "src/dsp/common.h"
33 #include "src/dsp/constants.h"
34 #include "src/dsp/dsp.h"
35 #include "src/frame_scratch_buffer.h"
36 #include "src/loop_filter_mask.h"
37 #include "src/loop_restoration_info.h"
38 #include "src/obu_parser.h"
39 #include "src/post_filter.h"
40 #include "src/quantizer.h"
41 #include "src/residual_buffer_pool.h"
42 #include "src/symbol_decoder_context.h"
43 #include "src/tile_scratch_buffer.h"
44 #include "src/utils/array_2d.h"
45 #include "src/utils/block_parameters_holder.h"
46 #include "src/utils/blocking_counter.h"
47 #include "src/utils/common.h"
48 #include "src/utils/compiler_attributes.h"
49 #include "src/utils/constants.h"
50 #include "src/utils/entropy_decoder.h"
51 #include "src/utils/memory.h"
52 #include "src/utils/parameter_tree.h"
53 #include "src/utils/segmentation_map.h"
54 #include "src/utils/threadpool.h"
55 #include "src/utils/types.h"
56 #include "src/yuv_buffer.h"
57 
58 namespace libgav1 {
59 
60 // Indicates what the ProcessSuperBlock() and TransformBlock() functions should
61 // do. "Parse" refers to consuming the bitstream, reading the transform
62 // coefficients and performing the dequantization. "Decode" refers to computing
63 // the prediction, applying the inverse transforms and adding the residual.
64 enum ProcessingMode {
65   kProcessingModeParseOnly,
66   kProcessingModeDecodeOnly,
67   kProcessingModeParseAndDecode,
68 };
69 
70 class Tile : public Allocable {
71  public:
Create(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,BlockParametersHolder * const block_parameters_holder,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)72   static std::unique_ptr<Tile> Create(
73       int tile_number, const uint8_t* const data, size_t size,
74       const ObuSequenceHeader& sequence_header,
75       const ObuFrameHeader& frame_header, RefCountedBuffer* const current_frame,
76       const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer,
77       const WedgeMaskArray& wedge_masks,
78       SymbolDecoderContext* const saved_symbol_decoder_context,
79       const SegmentationMap* prev_segment_ids, PostFilter* const post_filter,
80       BlockParametersHolder* const block_parameters_holder,
81       const dsp::Dsp* const dsp, ThreadPool* const thread_pool,
82       BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
83       bool use_intra_prediction_buffer) {
84     std::unique_ptr<Tile> tile(new (std::nothrow) Tile(
85         tile_number, data, size, sequence_header, frame_header, current_frame,
86         state, frame_scratch_buffer, wedge_masks, saved_symbol_decoder_context,
87         prev_segment_ids, post_filter, block_parameters_holder, dsp,
88         thread_pool, pending_tiles, frame_parallel,
89         use_intra_prediction_buffer));
90     return (tile != nullptr && tile->Init()) ? std::move(tile) : nullptr;
91   }
92 
93   // Move only.
94   Tile(Tile&& tile) noexcept;
95   Tile& operator=(Tile&& tile) noexcept;
96   Tile(const Tile&) = delete;
97   Tile& operator=(const Tile&) = delete;
98 
99   struct Block;  // Defined after this class.
100 
101   // Parses the entire tile.
102   bool Parse();
103   // Parses and decodes the entire tile. Depending on the configuration of this
104   // Tile, this function may do multithreaded decoding.
105   bool ParseAndDecode(bool is_main_thread);  // 5.11.2.
106   // Processes all the columns of the superblock row at |row4x4| that are within
107   // this Tile. If |save_symbol_decoder_context| is true, then
108   // SaveSymbolDecoderContext() is invoked for the last superblock row.
109   template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
110   bool ProcessSuperBlockRow(int row4x4, TileScratchBuffer* scratch_buffer);
111 
sequence_header()112   const ObuSequenceHeader& sequence_header() const { return sequence_header_; }
frame_header()113   const ObuFrameHeader& frame_header() const { return frame_header_; }
current_frame()114   const RefCountedBuffer& current_frame() const { return current_frame_; }
motion_field()115   const TemporalMotionField& motion_field() const { return motion_field_; }
reference_frame_sign_bias()116   const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias()
117       const {
118     return reference_frame_sign_bias_;
119   }
120 
121   // 5.11.51.
IsInside(int row4x4,int column4x4)122   bool IsInside(int row4x4, int column4x4) const {
123     return row4x4 >= row4x4_start_ && row4x4 < row4x4_end_ &&
124            column4x4 >= column4x4_start_ && column4x4 < column4x4_end_;
125   }
126 
IsLeftInside(int column4x4)127   bool IsLeftInside(int column4x4) const {
128     // We use "larger than" as the condition. Don't pass in the left column
129     // offset column4x4 - 1.
130     assert(column4x4 <= column4x4_end_);
131     return column4x4 > column4x4_start_;
132   }
133 
IsTopInside(int row4x4)134   bool IsTopInside(int row4x4) const {
135     // We use "larger than" as the condition. Don't pass in the top row offset
136     // row4x4 - 1.
137     assert(row4x4 <= row4x4_end_);
138     return row4x4 > row4x4_start_;
139   }
140 
IsTopLeftInside(int row4x4,int column4x4)141   bool IsTopLeftInside(int row4x4, int column4x4) const {
142     // We use "larger than" as the condition. Don't pass in the top row offset
143     // row4x4 - 1 or the left column offset column4x4 - 1.
144     assert(row4x4 <= row4x4_end_);
145     assert(column4x4 <= column4x4_end_);
146     return row4x4 > row4x4_start_ && column4x4 > column4x4_start_;
147   }
148 
IsBottomRightInside(int row4x4,int column4x4)149   bool IsBottomRightInside(int row4x4, int column4x4) const {
150     assert(row4x4 >= row4x4_start_);
151     assert(column4x4 >= column4x4_start_);
152     return row4x4 < row4x4_end_ && column4x4 < column4x4_end_;
153   }
154 
BlockParametersAddress(int row4x4,int column4x4)155   BlockParameters** BlockParametersAddress(int row4x4, int column4x4) const {
156     return block_parameters_holder_.Address(row4x4, column4x4);
157   }
158 
BlockParametersStride()159   int BlockParametersStride() const {
160     return block_parameters_holder_.columns4x4();
161   }
162 
163   // Returns true if Parameters() can be called with |row| and |column| as
164   // inputs, false otherwise.
HasParameters(int row,int column)165   bool HasParameters(int row, int column) const {
166     return block_parameters_holder_.Find(row, column) != nullptr;
167   }
Parameters(int row,int column)168   const BlockParameters& Parameters(int row, int column) const {
169     return *block_parameters_holder_.Find(row, column);
170   }
number()171   int number() const { return number_; }
superblock_rows()172   int superblock_rows() const { return superblock_rows_; }
superblock_columns()173   int superblock_columns() const { return superblock_columns_; }
174 
175  private:
176   Tile(int tile_number, const uint8_t* data, size_t size,
177        const ObuSequenceHeader& sequence_header,
178        const ObuFrameHeader& frame_header, RefCountedBuffer* current_frame,
179        const DecoderState& state, FrameScratchBuffer* frame_scratch_buffer,
180        const WedgeMaskArray& wedge_masks,
181        SymbolDecoderContext* saved_symbol_decoder_context,
182        const SegmentationMap* prev_segment_ids, PostFilter* post_filter,
183        BlockParametersHolder* block_parameters_holder, const dsp::Dsp* dsp,
184        ThreadPool* thread_pool, BlockingCounterWithStatus* pending_tiles,
185        bool frame_parallel, bool use_intra_prediction_buffer);
186 
187   // Stores the transform tree state when reading variable size transform trees
188   // and when applying the transform tree. When applying the transform tree,
189   // |depth| is not used.
190   struct TransformTreeNode {
191     // The default constructor is invoked by the Stack<TransformTreeNode, n>
192     // constructor. Stack<> does not use the default-constructed elements, so it
193     // is safe for the default constructor to not initialize the members.
194     TransformTreeNode() = default;
195     TransformTreeNode(int x, int y, TransformSize tx_size, int depth = -1)
xTransformTreeNode196         : x(x), y(y), tx_size(tx_size), depth(depth) {}
197 
198     int x;
199     int y;
200     TransformSize tx_size;
201     int depth;
202   };
203 
204   // Parameters used to facilitate multi-threading within the Tile.
205   struct ThreadingParameters {
206     std::mutex mutex;
207     // Array2DView of size |superblock_rows_| by |superblock_columns_|
208     // containing the processing state of each superblock. The code in this
209     // class uses relative indexing of superblocks with respect to this Tile.
210     // The memory for this comes from the caller (the |super_block_state|
211     // parameter in the constructor). The memory is for the whole frame whereas
212     // the |sb_state| array in this struct points to the beginning of this Tile.
213     Array2DView<SuperBlockState> sb_state LIBGAV1_GUARDED_BY(mutex);
214     // Variable used to indicate either parse or decode failure.
215     bool abort LIBGAV1_GUARDED_BY(mutex) = false;
216     int pending_jobs LIBGAV1_GUARDED_BY(mutex) = 0;
217     std::condition_variable pending_jobs_zero_condvar;
218   };
219 
220   // The residual pointer is used to traverse the |residual_buffer_|. It is
221   // used in two different ways.
222   // If |split_parse_and_decode_| is true:
223   //    The pointer points to the beginning of the |residual_buffer_| when the
224   //    "parse" and "decode" steps begin. It is then moved forward tx_size in
225   //    each iteration of the "parse" and the "decode" steps. In this case, the
226   //    ResidualPtr variable passed into various functions starting from
227   //    ProcessSuperBlock is used as an in/out parameter to keep track of the
228   //    residual pointer.
229   // If |split_parse_and_decode_| is false:
230   //    The pointer is reset to the beginning of the |residual_buffer_| for
231   //    every transform block.
232   using ResidualPtr = uint8_t*;
233 
234   // Performs member initializations that may fail. Helper function used by
235   // Create().
236   LIBGAV1_MUST_USE_RESULT bool Init();
237 
238   // Saves the symbol decoder context of this tile into
239   // |saved_symbol_decoder_context_| if necessary.
240   void SaveSymbolDecoderContext();
241 
242   // Entry point for multi-threaded decoding. This function performs the same
243   // functionality as ParseAndDecode(). The current thread does the "parse" step
244   // while the worker threads do the "decode" step.
245   bool ThreadedParseAndDecode();
246 
247   // Returns whether or not the prerequisites for decoding the superblock at
248   // |row_index| and |column_index| are satisfied. |threading_.mutex| must be
249   // held when calling this function.
250   bool CanDecode(int row_index, int column_index) const;
251 
252   // This function is run by the worker threads when multi-threaded decoding is
253   // enabled. Once a superblock is decoded, this function will set the
254   // corresponding |threading_.sb_state| entry to kSuperBlockStateDecoded. On
255   // failure, |threading_.abort| will be set to true. If at any point
256   // |threading_.abort| becomes true, this function will return as early as it
257   // can. If the decoding succeeds, this function will also schedule the
258   // decoding jobs for the superblock to the bottom-left and the superblock to
259   // the right of this superblock (if it is allowed).
260   void DecodeSuperBlock(int row_index, int column_index, int block_width4x4);
261 
262   // If |use_intra_prediction_buffer_| is true, then this function copies the
263   // last row of the superblockrow starting at |row4x4| into the
264   // |intra_prediction_buffer_| (which may be used by the intra prediction
265   // process for the next superblock row).
266   void PopulateIntraPredictionBuffer(int row4x4);
267 
268   uint16_t* GetPartitionCdf(int row4x4, int column4x4, BlockSize block_size);
269   bool ReadPartition(int row4x4, int column4x4, BlockSize block_size,
270                      bool has_rows, bool has_columns, Partition* partition);
271   // Processes the Partition starting at |row4x4_start|, |column4x4_start|
272   // iteratively. It performs a DFS traversal over the partition tree to process
273   // the blocks in the right order.
274   bool ProcessPartition(
275       int row4x4_start, int column4x4_start, ParameterTree* root,
276       TileScratchBuffer* scratch_buffer,
277       ResidualPtr* residual);  // Iterative implementation of 5.11.4.
278   bool ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
279                     ParameterTree* tree, TileScratchBuffer* scratch_buffer,
280                     ResidualPtr* residual);   // 5.11.5.
281   void ResetCdef(int row4x4, int column4x4);  // 5.11.55.
282 
283   // This function is used to decode a superblock when the parsing has already
284   // been done for that superblock.
285   bool DecodeSuperBlock(ParameterTree* tree, TileScratchBuffer* scratch_buffer,
286                         ResidualPtr* residual);
287   // Helper function used by DecodeSuperBlock(). Note that the decode_block()
288   // function in the spec is equivalent to ProcessBlock() in the code.
289   bool DecodeBlock(ParameterTree* tree, TileScratchBuffer* scratch_buffer,
290                    ResidualPtr* residual);
291 
292   void ClearBlockDecoded(TileScratchBuffer* scratch_buffer, int row4x4,
293                          int column4x4);  // 5.11.3.
294   bool ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4,
295                          TileScratchBuffer* scratch_buffer,
296                          ProcessingMode mode);
297   void ResetLoopRestorationParams();
298   void ReadLoopRestorationCoefficients(int row4x4, int column4x4,
299                                        BlockSize block_size);  // 5.11.57.
300   // Build bit masks for vertical edges followed by horizontal edges.
301   // Traverse through each transform edge in the current coding block, and
302   // determine if a 4x4 edge needs filtering. If filtering is needed, determine
303   // filter length. Set corresponding bit mask to 1.
304   void BuildBitMask(const Block& block);
305   void BuildBitMaskHelper(const Block& block, int row4x4, int column4x4,
306                           BlockSize block_size, bool is_vertical_block_border,
307                           bool is_horizontal_block_border);
308 
309   // Helper functions for DecodeBlock.
310   bool ReadSegmentId(const Block& block);       // 5.11.9.
311   bool ReadIntraSegmentId(const Block& block);  // 5.11.8.
312   void ReadSkip(const Block& block);            // 5.11.11.
313   void ReadSkipMode(const Block& block);        // 5.11.10.
314   void ReadCdef(const Block& block);            // 5.11.56.
315   // Returns the new value. |cdf| is an array of size kDeltaSymbolCount + 1.
316   int ReadAndClipDelta(uint16_t* cdf, int delta_small, int scale, int min_value,
317                        int max_value, int value);
318   void ReadQuantizerIndexDelta(const Block& block);  // 5.11.12.
319   void ReadLoopFilterDelta(const Block& block);      // 5.11.13.
320   // Populates |BlockParameters::deblock_filter_level| for the given |block|
321   // using |deblock_filter_levels_|.
322   void PopulateDeblockFilterLevel(const Block& block);
323   void ReadPredictionModeY(const Block& block, bool intra_y_mode);
324   void ReadIntraAngleInfo(const Block& block,
325                           PlaneType plane_type);  // 5.11.42 and 5.11.43.
326   void ReadPredictionModeUV(const Block& block);
327   void ReadCflAlpha(const Block& block);  // 5.11.45.
328   int GetPaletteCache(const Block& block, PlaneType plane_type,
329                       uint16_t* cache);
330   void ReadPaletteColors(const Block& block, Plane plane);
331   void ReadPaletteModeInfo(const Block& block);      // 5.11.46.
332   void ReadFilterIntraModeInfo(const Block& block);  // 5.11.24.
333   int ReadMotionVectorComponent(const Block& block,
334                                 int component);                // 5.11.32.
335   void ReadMotionVector(const Block& block, int index);        // 5.11.31.
336   bool DecodeIntraModeInfo(const Block& block);                // 5.11.7.
337   int8_t ComputePredictedSegmentId(const Block& block) const;  // 5.11.21.
338   bool ReadInterSegmentId(const Block& block, bool pre_skip);  // 5.11.19.
339   void ReadIsInter(const Block& block);                        // 5.11.20.
340   bool ReadIntraBlockModeInfo(const Block& block,
341                               bool intra_y_mode);  // 5.11.22.
342   int GetUseCompoundReferenceContext(const Block& block);
343   CompoundReferenceType ReadCompoundReferenceType(const Block& block);
344   // Calculates count0 by calling block.CountReferences() on the frame types
345   // from type0_start to type0_end, inclusive, and summing the results.
346   // Calculates count1 by calling block.CountReferences() on the frame types
347   // from type1_start to type1_end, inclusive, and summing the results.
348   // Compares count0 with count1 and returns 0, 1 or 2.
349   //
350   // See count_refs and ref_count_ctx in 8.3.2.
351   int GetReferenceContext(const Block& block, ReferenceFrameType type0_start,
352                           ReferenceFrameType type0_end,
353                           ReferenceFrameType type1_start,
354                           ReferenceFrameType type1_end) const;
355   template <bool is_single, bool is_backward, int index>
356   uint16_t* GetReferenceCdf(const Block& block, CompoundReferenceType type =
357                                                     kNumCompoundReferenceTypes);
358   void ReadReferenceFrames(const Block& block);  // 5.11.25.
359   void ReadInterPredictionModeY(const Block& block,
360                                 const MvContexts& mode_contexts);
361   void ReadRefMvIndex(const Block& block);
362   void ReadInterIntraMode(const Block& block, bool is_compound);  // 5.11.28.
363   bool IsScaled(ReferenceFrameType type) const;  // Part of 5.11.27.
364   void ReadMotionMode(const Block& block, bool is_compound);  // 5.11.27.
365   uint16_t* GetIsExplicitCompoundTypeCdf(const Block& block);
366   uint16_t* GetIsCompoundTypeAverageCdf(const Block& block);
367   void ReadCompoundType(const Block& block, bool is_compound);  // 5.11.29.
368   uint16_t* GetInterpolationFilterCdf(const Block& block, int direction);
369   void ReadInterpolationFilter(const Block& block);
370   bool ReadInterBlockModeInfo(const Block& block);             // 5.11.23.
371   bool DecodeInterModeInfo(const Block& block);                // 5.11.18.
372   bool DecodeModeInfo(const Block& block);                     // 5.11.6.
373   bool IsMvValid(const Block& block, bool is_compound) const;  // 6.10.25.
374   bool AssignInterMv(const Block& block, bool is_compound);    // 5.11.26.
375   bool AssignIntraMv(const Block& block);                      // 5.11.26.
376   int GetTopTransformWidth(const Block& block, int row4x4, int column4x4,
377                            bool ignore_skip);
378   int GetLeftTransformHeight(const Block& block, int row4x4, int column4x4,
379                              bool ignore_skip);
380   TransformSize ReadFixedTransformSize(const Block& block);  // 5.11.15.
381   // Iterative implementation of 5.11.17.
382   void ReadVariableTransformTree(const Block& block, int row4x4, int column4x4,
383                                  TransformSize tx_size);
384   void DecodeTransformSize(const Block& block);  // 5.11.16.
385   bool ComputePrediction(const Block& block);    // 5.11.33.
386   // |x4| and |y4| are the column and row positions of the 4x4 block. |w4| and
387   // |h4| are the width and height in 4x4 units of |tx_size|.
388   int GetTransformAllZeroContext(const Block& block, Plane plane,
389                                  TransformSize tx_size, int x4, int y4, int w4,
390                                  int h4);
391   TransformSet GetTransformSet(TransformSize tx_size,
392                                bool is_inter) const;  // 5.11.48.
393   TransformType ComputeTransformType(const Block& block, Plane plane,
394                                      TransformSize tx_size, int block_x,
395                                      int block_y);  // 5.11.40.
396   void ReadTransformType(const Block& block, int x4, int y4,
397                          TransformSize tx_size);  // 5.11.47.
398   int GetCoeffBaseContextEob(TransformSize tx_size, int index);
399   int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
400                                   TransformClass tx_class);
401   template <typename ResidualType>
402   void ReadCoeffBase2D(
403       const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
404       int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
405       uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
406       ResidualType* quantized_buffer);
407   template <typename ResidualType>
408   void ReadCoeffBaseHorizontal(
409       const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
410       int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
411       uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
412       ResidualType* quantized_buffer);
413   template <typename ResidualType>
414   void ReadCoeffBaseVertical(
415       const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
416       int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
417       uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
418       ResidualType* quantized_buffer);
419   int GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane);
420   void SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane,
421                           uint8_t coefficient_level, int8_t dc_category);
422   void InterIntraPrediction(
423       uint16_t* prediction_0, const uint8_t* prediction_mask,
424       ptrdiff_t prediction_mask_stride,
425       const PredictionParameters& prediction_parameters, int prediction_width,
426       int prediction_height, int subsampling_x, int subsampling_y,
427       uint8_t* dest,
428       ptrdiff_t dest_stride);  // Part of section 7.11.3.1 in the spec.
429   void CompoundInterPrediction(
430       const Block& block, const uint8_t* prediction_mask,
431       ptrdiff_t prediction_mask_stride, int prediction_width,
432       int prediction_height, int subsampling_x, int subsampling_y,
433       int candidate_row, int candidate_column, uint8_t* dest,
434       ptrdiff_t dest_stride);  // Part of section 7.11.3.1 in the spec.
435   GlobalMotion* GetWarpParams(const Block& block, Plane plane,
436                               int prediction_width, int prediction_height,
437                               const PredictionParameters& prediction_parameters,
438                               ReferenceFrameType reference_type,
439                               bool* is_local_valid,
440                               GlobalMotion* global_motion_params,
441                               GlobalMotion* local_warp_params)
442       const;  // Part of section 7.11.3.1 in the spec.
443   bool InterPrediction(const Block& block, Plane plane, int x, int y,
444                        int prediction_width, int prediction_height,
445                        int candidate_row, int candidate_column,
446                        bool* is_local_valid,
447                        GlobalMotion* local_warp_params);  // 7.11.3.1.
448   void ScaleMotionVector(const MotionVector& mv, Plane plane,
449                          int reference_frame_index, int x, int y, int* start_x,
450                          int* start_y, int* step_x, int* step_y);  // 7.11.3.3.
451   // If the method returns false, the caller only uses the output parameters
452   // *ref_block_start_x and *ref_block_start_y. If the method returns true, the
453   // caller uses all three output parameters.
454   static bool GetReferenceBlockPosition(
455       int reference_frame_index, bool is_scaled, int width, int height,
456       int ref_start_x, int ref_last_x, int ref_start_y, int ref_last_y,
457       int start_x, int start_y, int step_x, int step_y, int left_border,
458       int right_border, int top_border, int bottom_border,
459       int* ref_block_start_x, int* ref_block_start_y, int* ref_block_end_x);
460 
461   template <typename Pixel>
462   void BuildConvolveBlock(Plane plane, int reference_frame_index,
463                           bool is_scaled, int height, int ref_start_x,
464                           int ref_last_x, int ref_start_y, int ref_last_y,
465                           int step_y, int ref_block_start_x,
466                           int ref_block_end_x, int ref_block_start_y,
467                           uint8_t* block_buffer,
468                           ptrdiff_t convolve_buffer_stride,
469                           ptrdiff_t block_extended_width);
470   bool BlockInterPrediction(const Block& block, Plane plane,
471                             int reference_frame_index, const MotionVector& mv,
472                             int x, int y, int width, int height,
473                             int candidate_row, int candidate_column,
474                             uint16_t* prediction, bool is_compound,
475                             bool is_inter_intra, uint8_t* dest,
476                             ptrdiff_t dest_stride);  // 7.11.3.4.
477   bool BlockWarpProcess(const Block& block, Plane plane, int index,
478                         int block_start_x, int block_start_y, int width,
479                         int height, GlobalMotion* warp_params, bool is_compound,
480                         bool is_inter_intra, uint8_t* dest,
481                         ptrdiff_t dest_stride);  // 7.11.3.5.
482   bool ObmcBlockPrediction(const Block& block, const MotionVector& mv,
483                            Plane plane, int reference_frame_index, int width,
484                            int height, int x, int y, int candidate_row,
485                            int candidate_column,
486                            ObmcDirection blending_direction);
487   bool ObmcPrediction(const Block& block, Plane plane, int width,
488                       int height);  // 7.11.3.9.
489   void DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
490                                   int width, int height, int candidate_row,
491                                   int candidate_column, uint8_t* dest,
492                                   ptrdiff_t dest_stride);  // 7.11.3.15.
493   // This function specializes the parsing of DC coefficient by removing some of
494   // the branches when i == 0 (since scan[0] is always 0 and scan[i] is always
495   // non-zero for all other possible values of i). |dc_category| is an output
496   // parameter that is populated when |is_dc_coefficient| is true.
497   // |coefficient_level| is an output parameter which accumulates the
498   // coefficient level.
499   template <typename ResidualType, bool is_dc_coefficient>
500   LIBGAV1_ALWAYS_INLINE bool ReadSignAndApplyDequantization(
501       const uint16_t* scan, int i, int q_value, const uint8_t* quantizer_matrix,
502       int shift, int max_value, uint16_t* dc_sign_cdf, int8_t* dc_category,
503       int* coefficient_level,
504       ResidualType* residual_buffer);  // Part of 5.11.39.
505   int ReadCoeffBaseRange(int clamped_tx_size_context, int cdf_context,
506                          int plane_type);  // Part of 5.11.39.
507   // Returns the number of non-zero coefficients that were read. |tx_type| is an
508   // output parameter that stores the computed transform type for the plane
509   // whose coefficients were read. Returns -1 on failure.
510   template <typename ResidualType>
511   int ReadTransformCoefficients(const Block& block, Plane plane, int start_x,
512                                 int start_y, TransformSize tx_size,
513                                 TransformType* tx_type);  // 5.11.39.
514   bool TransformBlock(const Block& block, Plane plane, int base_x, int base_y,
515                       TransformSize tx_size, int x, int y,
516                       ProcessingMode mode);  // 5.11.35.
517   // Iterative implementation of 5.11.36.
518   bool TransformTree(const Block& block, int start_x, int start_y,
519                      BlockSize plane_size, ProcessingMode mode);
520   void ReconstructBlock(const Block& block, Plane plane, int start_x,
521                         int start_y, TransformSize tx_size,
522                         TransformType tx_type,
523                         int non_zero_coeff_count);         // Part of 7.12.3.
524   bool Residual(const Block& block, ProcessingMode mode);  // 5.11.34.
525   // part of 5.11.5 (reset_block_context() in the spec).
526   void ResetEntropyContext(const Block& block);
527   // Populates the |color_context| and |color_order| for the |i|th iteration
528   // with entries counting down from |start| to |end| (|start| > |end|).
529   void PopulatePaletteColorContexts(
530       const Block& block, PlaneType plane_type, int i, int start, int end,
531       uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize],
532       uint8_t color_context[kMaxPaletteSquare]);  // 5.11.50.
533   bool ReadPaletteTokens(const Block& block);     // 5.11.49.
534   template <typename Pixel>
535   void IntraPrediction(const Block& block, Plane plane, int x, int y,
536                        bool has_left, bool has_top, bool has_top_right,
537                        bool has_bottom_left, PredictionMode mode,
538                        TransformSize tx_size);
539   bool IsSmoothPrediction(int row, int column, Plane plane) const;
540   int GetIntraEdgeFilterType(const Block& block,
541                              Plane plane) const;  // 7.11.2.8.
542   template <typename Pixel>
543   void DirectionalPrediction(const Block& block, Plane plane, int x, int y,
544                              bool has_left, bool has_top, bool needs_left,
545                              bool needs_top, int prediction_angle, int width,
546                              int height, int max_x, int max_y,
547                              TransformSize tx_size, Pixel* top_row,
548                              Pixel* left_column);  // 7.11.2.4.
549   template <typename Pixel>
550   void PalettePrediction(const Block& block, Plane plane, int start_x,
551                          int start_y, int x, int y,
552                          TransformSize tx_size);  // 7.11.4.
553   template <typename Pixel>
554   void ChromaFromLumaPrediction(const Block& block, Plane plane, int start_x,
555                                 int start_y,
556                                 TransformSize tx_size);  // 7.11.5.
557   // Section 7.19. Applies some filtering and reordering to the motion vectors
558   // for the given |block| and stores them into |current_frame_|.
559   void StoreMotionFieldMvsIntoCurrentFrame(const Block& block);
560 
561   // Returns the zero-based index of the super block that contains |row4x4|
562   // relative to the start of this tile.
SuperBlockRowIndex(int row4x4)563   int SuperBlockRowIndex(int row4x4) const {
564     return (row4x4 - row4x4_start_) >>
565            (sequence_header_.use_128x128_superblock ? 5 : 4);
566   }
567 
568   // Returns the zero-based index of the super block that contains |column4x4|
569   // relative to the start of this tile.
SuperBlockColumnIndex(int column4x4)570   int SuperBlockColumnIndex(int column4x4) const {
571     return (column4x4 - column4x4_start_) >>
572            (sequence_header_.use_128x128_superblock ? 5 : 4);
573   }
574 
SuperBlockSize()575   BlockSize SuperBlockSize() const {
576     return sequence_header_.use_128x128_superblock ? kBlock128x128
577                                                    : kBlock64x64;
578   }
PlaneCount()579   int PlaneCount() const {
580     return sequence_header_.color_config.is_monochrome ? kMaxPlanesMonochrome
581                                                        : kMaxPlanes;
582   }
583 
584   const int number_;
585   int row_;
586   int column_;
587   const uint8_t* const data_;
588   size_t size_;
589   int row4x4_start_;
590   int row4x4_end_;
591   int column4x4_start_;
592   int column4x4_end_;
593   int superblock_rows_;
594   int superblock_columns_;
595   bool read_deltas_;
596   const int8_t subsampling_x_[kMaxPlanes];
597   const int8_t subsampling_y_[kMaxPlanes];
598   int deblock_row_limit_[kMaxPlanes];
599   int deblock_column_limit_[kMaxPlanes];
600 
601   // The dimensions (in order) are: segment_id, level_index (based on plane and
602   // direction), reference_frame and mode_id.
603   uint8_t deblock_filter_levels_[kMaxSegments][kFrameLfCount]
604                                 [kNumReferenceFrameTypes][2];
605 
606   // current_quantizer_index_ is in the range [0, 255].
607   uint8_t current_quantizer_index_;
608   // These two arrays (|coefficient_levels_| and |dc_categories_|) are used to
609   // store the entropy context. Their dimensions are as follows: First -
610   // left/top; Second - plane; Third - row4x4 (if first dimension is
611   // left)/column4x4 (if first dimension is top).
612   //
613   // This is equivalent to the LeftLevelContext and AboveLevelContext arrays in
614   // the spec. In the spec, it stores values from 0 through 63 (inclusive). The
615   // stored values are used to compute the left and top contexts in
616   // GetTransformAllZeroContext. In that function, we only care about the
617   // following values: 0, 1, 2, 3 and >= 4. So instead of clamping to 63, we
618   // clamp to 4 (i.e.) all the values greater than 4 are stored as 4.
619   std::array<Array2D<uint8_t>, 2> coefficient_levels_;
620   // This is equivalent to the LeftDcContext and AboveDcContext arrays in the
621   // spec. In the spec, it can store 3 possible values: 0, 1 and 2 (where 1
622   // means the value is < 0, 2 means the value is > 0 and 0 means the value is
623   // equal to 0).
624   //
625   // The stored values are used in two places:
626   //  * GetTransformAllZeroContext: Here, we only care about whether the
627   //  value is 0 or not (whether it is 1 or 2 is irrelevant).
628   //  * GetDcSignContext: Here, we do the following computation: if the
629   //  stored value is 1, we decrement a counter. If the stored value is 2
630   //  we increment a counter.
631   //
632   // Based on this usage, we can simply replace 1 with -1 and 2 with 1 and
633   // use that value to compute the counter.
634   //
635   // The usage on GetTransformAllZeroContext is unaffected since there we
636   // only care about whether it is 0 or not.
637   std::array<Array2D<int8_t>, 2> dc_categories_;
638   const ObuSequenceHeader& sequence_header_;
639   const ObuFrameHeader& frame_header_;
640   const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias_;
641   const std::array<RefCountedBufferPtr, kNumReferenceFrameTypes>&
642       reference_frames_;
643   TemporalMotionField& motion_field_;
644   const std::array<uint8_t, kNumReferenceFrameTypes>& reference_order_hint_;
645   const WedgeMaskArray& wedge_masks_;
646   DaalaBitReader reader_;
647   SymbolDecoderContext symbol_decoder_context_;
648   SymbolDecoderContext* const saved_symbol_decoder_context_;
649   const SegmentationMap* prev_segment_ids_;
650   const dsp::Dsp& dsp_;
651   PostFilter& post_filter_;
652   BlockParametersHolder& block_parameters_holder_;
653   Quantizer quantizer_;
654   // When there is no multi-threading within the Tile, |residual_buffer_| is
655   // used. When there is multi-threading within the Tile,
656   // |residual_buffer_threaded_| is used. In the following comment,
657   // |residual_buffer| refers to either |residual_buffer_| or
658   // |residual_buffer_threaded_| depending on whether multi-threading is enabled
659   // within the Tile or not.
660   // The |residual_buffer| is used to help with the dequantization and the
661   // inverse transform processes. It is declared as a uint8_t, but is always
662   // accessed either as an int16_t or int32_t depending on |bitdepth|. Here is
663   // what it stores at various stages of the decoding process (in the order
664   // which they happen):
665   //   1) In ReadTransformCoefficients(), this buffer is used to store the
666   //   dequantized values.
667   //   2) In Reconstruct(), this buffer is used as the input to the row
668   //   transform process.
669   // The size of this buffer would be:
670   //    For |residual_buffer_|: (4096 + 32 * |kResidualPaddingVertical|) *
671   //        |residual_size_|. Where 4096 = 64x64 which is the maximum transform
672   //        size, and 32 * |kResidualPaddingVertical| is the padding to avoid
673   //        bottom boundary checks when parsing quantized coefficients. This
674   //        memory is allocated and owned by the Tile class.
675   //    For |residual_buffer_threaded_|: See the comment below. This memory is
676   //        not allocated or owned by the Tile class.
677   AlignedUniquePtr<uint8_t> residual_buffer_;
678   // This is a 2d array of pointers of size |superblock_rows_| by
679   // |superblock_columns_| where each pointer points to a ResidualBuffer for a
680   // single super block. The array is populated when the parsing process begins
681   // by calling |residual_buffer_pool_->Get()| and the memory is released back
682   // to the pool by calling |residual_buffer_pool_->Release()| when the decoding
683   // process is complete.
684   Array2D<std::unique_ptr<ResidualBuffer>> residual_buffer_threaded_;
685   // sizeof(int16_t or int32_t) depending on |bitdepth|.
686   const size_t residual_size_;
687   // Number of superblocks on the top-right that will have to be decoded before
688   // the current superblock can be decoded. This will be 1 if allow_intrabc is
689   // false. If allow_intrabc is true, then this value will be
690   // use_128x128_superblock ? 3 : 5. This is the allowed range of reference for
691   // the top rows for intrabc.
692   const int intra_block_copy_lag_;
693 
694   // In the Tile class, we use the "current_frame" in two ways:
695   //   1) To write the decoded output into (using the |buffer_| view).
696   //   2) To read the pixels for intra block copy (using the |current_frame_|
697   //      reference).
698   //
699   // When intra block copy is off, |buffer_| and |current_frame_| may or may not
700   // point to the same plane pointers. But it is okay since |current_frame_| is
701   // never used in this case.
702   //
703   // When intra block copy is on, |buffer_| and |current_frame_| always point to
704   // the same plane pointers (since post filtering is disabled). So the usage in
705   // both case 1 and case 2 remain valid.
706   Array2DView<uint8_t> buffer_[kMaxPlanes];
707   RefCountedBuffer& current_frame_;
708 
709   Array2D<int16_t>& cdef_index_;
710   Array2D<TransformSize>& inter_transform_sizes_;
711   std::array<RestorationUnitInfo, kMaxPlanes> reference_unit_info_;
712   // If |thread_pool_| is nullptr, the calling thread will do the parsing and
713   // the decoding in one pass. If |thread_pool_| is not nullptr, then the main
714   // thread will do the parsing while the thread pool workers will do the
715   // decoding.
716   ThreadPool* const thread_pool_;
717   ThreadingParameters threading_;
718   ResidualBufferPool* const residual_buffer_pool_;
719   TileScratchBufferPool* const tile_scratch_buffer_pool_;
720   BlockingCounterWithStatus* const pending_tiles_;
721   bool split_parse_and_decode_;
722   // This is used only when |split_parse_and_decode_| is false.
723   std::unique_ptr<PredictionParameters> prediction_parameters_ = nullptr;
724   // Stores the |transform_type| for the super block being decoded at a 4x4
725   // granularity. The spec uses absolute indices for this array but it is
726   // sufficient to use indices relative to the super block being decoded.
727   TransformType transform_types_[32][32];
728   // delta_lf_[i] is in the range [-63, 63].
729   int8_t delta_lf_[kFrameLfCount];
730   // True if all the values in |delta_lf_| are zero. False otherwise.
731   bool delta_lf_all_zero_;
732   bool build_bit_mask_when_parsing_;
733   const bool frame_parallel_;
734   const bool use_intra_prediction_buffer_;
735   // Buffer used to store the unfiltered pixels that are necessary for decoding
736   // the next superblock row (for the intra prediction process). Used only if
737   // |use_intra_prediction_buffer_| is true.
738   std::array<AlignedDynamicBuffer<uint8_t, kMaxAlignment>, kMaxPlanes>
739       intra_prediction_buffer_;
740 };
741 
742 struct Tile::Block {
BlockBlock743   Block(const Tile& tile, BlockSize size, int row4x4, int column4x4,
744         TileScratchBuffer* const scratch_buffer, ResidualPtr* residual)
745       : tile(tile),
746         size(size),
747         row4x4(row4x4),
748         column4x4(column4x4),
749         width(kBlockWidthPixels[size]),
750         height(kBlockHeightPixels[size]),
751         width4x4(width >> 2),
752         height4x4(height >> 2),
753         scratch_buffer(scratch_buffer),
754         residual(residual) {
755     assert(size != kBlockInvalid);
756     residual_size[kPlaneY] = kPlaneResidualSize[size][0][0];
757     residual_size[kPlaneU] = residual_size[kPlaneV] =
758         kPlaneResidualSize[size][tile.subsampling_x_[kPlaneU]]
759                           [tile.subsampling_y_[kPlaneU]];
760     assert(residual_size[kPlaneY] != kBlockInvalid);
761     if (tile.PlaneCount() > 1) {
762       assert(residual_size[kPlaneU] != kBlockInvalid);
763     }
764     if ((row4x4 & 1) == 0 &&
765         (tile.sequence_header_.color_config.subsampling_y & height4x4) == 1) {
766       has_chroma = false;
767     } else if ((column4x4 & 1) == 0 &&
768                (tile.sequence_header_.color_config.subsampling_x & width4x4) ==
769                    1) {
770       has_chroma = false;
771     } else {
772       has_chroma = !tile.sequence_header_.color_config.is_monochrome;
773     }
774     top_available[kPlaneY] = tile.IsTopInside(row4x4);
775     left_available[kPlaneY] = tile.IsLeftInside(column4x4);
776     if (has_chroma) {
777       // top_available[kPlaneU] and top_available[kPlaneV] are valid only if
778       // has_chroma is true.
779       // The next 3 lines are equivalent to:
780       // top_available[kPlaneU] = top_available[kPlaneV] =
781       //     top_available[kPlaneY] &&
782       //     ((tile.sequence_header_.color_config.subsampling_y & height4x4) ==
783       //     0 || tile.IsTopInside(row4x4 - 1));
784       top_available[kPlaneU] = top_available[kPlaneV] = tile.IsTopInside(
785           row4x4 -
786           (tile.sequence_header_.color_config.subsampling_y & height4x4));
787       // left_available[kPlaneU] and left_available[kPlaneV] are valid only if
788       // has_chroma is true.
789       // The next 3 lines are equivalent to:
790       // left_available[kPlaneU] = left_available[kPlaneV] =
791       //     left_available[kPlaneY] &&
792       //     ((tile.sequence_header_.color_config.subsampling_x & width4x4) == 0
793       //      || tile.IsLeftInside(column4x4 - 1));
794       left_available[kPlaneU] = left_available[kPlaneV] = tile.IsLeftInside(
795           column4x4 -
796           (tile.sequence_header_.color_config.subsampling_x & width4x4));
797     }
798     const ptrdiff_t stride = tile.BlockParametersStride();
799     BlockParameters** const bps =
800         tile.BlockParametersAddress(row4x4, column4x4);
801     bp = *bps;
802     // bp_top is valid only if top_available[kPlaneY] is true.
803     if (top_available[kPlaneY]) {
804       bp_top = *(bps - stride);
805     }
806     // bp_left is valid only if left_available[kPlaneY] is true.
807     if (left_available[kPlaneY]) {
808       bp_left = *(bps - 1);
809     }
810   }
811 
HasChromaBlock812   bool HasChroma() const { return has_chroma; }
813 
814   // These return values of these group of functions are valid only if the
815   // corresponding top_available or left_available is true.
TopReferenceBlock816   ReferenceFrameType TopReference(int index) const {
817     return bp_top->reference_frame[index];
818   }
819 
LeftReferenceBlock820   ReferenceFrameType LeftReference(int index) const {
821     return bp_left->reference_frame[index];
822   }
823 
IsTopIntraBlock824   bool IsTopIntra() const { return TopReference(0) <= kReferenceFrameIntra; }
IsLeftIntraBlock825   bool IsLeftIntra() const { return LeftReference(0) <= kReferenceFrameIntra; }
826 
IsTopSingleBlock827   bool IsTopSingle() const { return TopReference(1) <= kReferenceFrameIntra; }
IsLeftSingleBlock828   bool IsLeftSingle() const { return LeftReference(1) <= kReferenceFrameIntra; }
829 
CountReferencesBlock830   int CountReferences(ReferenceFrameType type) const {
831     return static_cast<int>(top_available[kPlaneY] &&
832                             bp_top->reference_frame[0] == type) +
833            static_cast<int>(top_available[kPlaneY] &&
834                             bp_top->reference_frame[1] == type) +
835            static_cast<int>(left_available[kPlaneY] &&
836                             bp_left->reference_frame[0] == type) +
837            static_cast<int>(left_available[kPlaneY] &&
838                             bp_left->reference_frame[1] == type);
839   }
840 
841   // 7.10.3.
842   // Checks if there are any inter blocks to the left or above. If so, it
843   // returns true indicating that the block has neighbors that are suitable for
844   // use by overlapped motion compensation.
HasOverlappableCandidatesBlock845   bool HasOverlappableCandidates() const {
846     const ptrdiff_t stride = tile.BlockParametersStride();
847     BlockParameters** const bps = tile.BlockParametersAddress(0, 0);
848     if (top_available[kPlaneY]) {
849       BlockParameters** bps_top = bps + (row4x4 - 1) * stride + (column4x4 | 1);
850       const int columns = std::min(tile.frame_header_.columns4x4 - column4x4,
851                                    static_cast<int>(width4x4));
852       BlockParameters** const bps_top_end = bps_top + columns;
853       do {
854         if ((*bps_top)->reference_frame[0] > kReferenceFrameIntra) {
855           return true;
856         }
857         bps_top += 2;
858       } while (bps_top < bps_top_end);
859     }
860     if (left_available[kPlaneY]) {
861       BlockParameters** bps_left = bps + (row4x4 | 1) * stride + column4x4 - 1;
862       const int rows = std::min(tile.frame_header_.rows4x4 - row4x4,
863                                 static_cast<int>(height4x4));
864       BlockParameters** const bps_left_end = bps_left + rows * stride;
865       do {
866         if ((*bps_left)->reference_frame[0] > kReferenceFrameIntra) {
867           return true;
868         }
869         bps_left += 2 * stride;
870       } while (bps_left < bps_left_end);
871     }
872     return false;
873   }
874 
875   const Tile& tile;
876   bool has_chroma;
877   const BlockSize size;
878   bool top_available[kMaxPlanes];
879   bool left_available[kMaxPlanes];
880   BlockSize residual_size[kMaxPlanes];
881   const int row4x4;
882   const int column4x4;
883   const int width;
884   const int height;
885   const int width4x4;
886   const int height4x4;
887   const BlockParameters* bp_top;
888   const BlockParameters* bp_left;
889   BlockParameters* bp;
890   TileScratchBuffer* const scratch_buffer;
891   ResidualPtr* const residual;
892 };
893 
894 }  // namespace libgav1
895 
896 #endif  // LIBGAV1_SRC_TILE_H_
897