1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_TILE_H_ 18 #define LIBGAV1_SRC_TILE_H_ 19 20 #include <algorithm> 21 #include <array> 22 #include <cassert> 23 #include <condition_variable> // NOLINT (unapproved c++11 header) 24 #include <cstddef> 25 #include <cstdint> 26 #include <memory> 27 #include <mutex> // NOLINT (unapproved c++11 header) 28 #include <vector> 29 30 #include "src/buffer_pool.h" 31 #include "src/decoder_state.h" 32 #include "src/dsp/common.h" 33 #include "src/dsp/constants.h" 34 #include "src/dsp/dsp.h" 35 #include "src/frame_scratch_buffer.h" 36 #include "src/loop_filter_mask.h" 37 #include "src/loop_restoration_info.h" 38 #include "src/obu_parser.h" 39 #include "src/post_filter.h" 40 #include "src/quantizer.h" 41 #include "src/residual_buffer_pool.h" 42 #include "src/symbol_decoder_context.h" 43 #include "src/tile_scratch_buffer.h" 44 #include "src/utils/array_2d.h" 45 #include "src/utils/block_parameters_holder.h" 46 #include "src/utils/blocking_counter.h" 47 #include "src/utils/common.h" 48 #include "src/utils/compiler_attributes.h" 49 #include "src/utils/constants.h" 50 #include "src/utils/entropy_decoder.h" 51 #include "src/utils/memory.h" 52 #include "src/utils/parameter_tree.h" 53 #include "src/utils/segmentation_map.h" 54 #include "src/utils/threadpool.h" 55 #include "src/utils/types.h" 56 #include "src/yuv_buffer.h" 57 58 namespace libgav1 { 59 60 // Indicates what the ProcessSuperBlock() and TransformBlock() functions should 61 // do. "Parse" refers to consuming the bitstream, reading the transform 62 // coefficients and performing the dequantization. "Decode" refers to computing 63 // the prediction, applying the inverse transforms and adding the residual. 64 enum ProcessingMode { 65 kProcessingModeParseOnly, 66 kProcessingModeDecodeOnly, 67 kProcessingModeParseAndDecode, 68 }; 69 70 class Tile : public Allocable { 71 public: Create(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,BlockParametersHolder * const block_parameters_holder,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)72 static std::unique_ptr<Tile> Create( 73 int tile_number, const uint8_t* const data, size_t size, 74 const ObuSequenceHeader& sequence_header, 75 const ObuFrameHeader& frame_header, RefCountedBuffer* const current_frame, 76 const DecoderState& state, FrameScratchBuffer* const frame_scratch_buffer, 77 const WedgeMaskArray& wedge_masks, 78 SymbolDecoderContext* const saved_symbol_decoder_context, 79 const SegmentationMap* prev_segment_ids, PostFilter* const post_filter, 80 BlockParametersHolder* const block_parameters_holder, 81 const dsp::Dsp* const dsp, ThreadPool* const thread_pool, 82 BlockingCounterWithStatus* const pending_tiles, bool frame_parallel, 83 bool use_intra_prediction_buffer) { 84 std::unique_ptr<Tile> tile(new (std::nothrow) Tile( 85 tile_number, data, size, sequence_header, frame_header, current_frame, 86 state, frame_scratch_buffer, wedge_masks, saved_symbol_decoder_context, 87 prev_segment_ids, post_filter, block_parameters_holder, dsp, 88 thread_pool, pending_tiles, frame_parallel, 89 use_intra_prediction_buffer)); 90 return (tile != nullptr && tile->Init()) ? std::move(tile) : nullptr; 91 } 92 93 // Move only. 94 Tile(Tile&& tile) noexcept; 95 Tile& operator=(Tile&& tile) noexcept; 96 Tile(const Tile&) = delete; 97 Tile& operator=(const Tile&) = delete; 98 99 struct Block; // Defined after this class. 100 101 // Parses the entire tile. 102 bool Parse(); 103 // Parses and decodes the entire tile. Depending on the configuration of this 104 // Tile, this function may do multithreaded decoding. 105 bool ParseAndDecode(bool is_main_thread); // 5.11.2. 106 // Processes all the columns of the superblock row at |row4x4| that are within 107 // this Tile. If |save_symbol_decoder_context| is true, then 108 // SaveSymbolDecoderContext() is invoked for the last superblock row. 109 template <ProcessingMode processing_mode, bool save_symbol_decoder_context> 110 bool ProcessSuperBlockRow(int row4x4, TileScratchBuffer* scratch_buffer); 111 sequence_header()112 const ObuSequenceHeader& sequence_header() const { return sequence_header_; } frame_header()113 const ObuFrameHeader& frame_header() const { return frame_header_; } current_frame()114 const RefCountedBuffer& current_frame() const { return current_frame_; } motion_field()115 const TemporalMotionField& motion_field() const { return motion_field_; } reference_frame_sign_bias()116 const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias() 117 const { 118 return reference_frame_sign_bias_; 119 } 120 121 // 5.11.51. IsInside(int row4x4,int column4x4)122 bool IsInside(int row4x4, int column4x4) const { 123 return row4x4 >= row4x4_start_ && row4x4 < row4x4_end_ && 124 column4x4 >= column4x4_start_ && column4x4 < column4x4_end_; 125 } 126 IsLeftInside(int column4x4)127 bool IsLeftInside(int column4x4) const { 128 // We use "larger than" as the condition. Don't pass in the left column 129 // offset column4x4 - 1. 130 assert(column4x4 <= column4x4_end_); 131 return column4x4 > column4x4_start_; 132 } 133 IsTopInside(int row4x4)134 bool IsTopInside(int row4x4) const { 135 // We use "larger than" as the condition. Don't pass in the top row offset 136 // row4x4 - 1. 137 assert(row4x4 <= row4x4_end_); 138 return row4x4 > row4x4_start_; 139 } 140 IsTopLeftInside(int row4x4,int column4x4)141 bool IsTopLeftInside(int row4x4, int column4x4) const { 142 // We use "larger than" as the condition. Don't pass in the top row offset 143 // row4x4 - 1 or the left column offset column4x4 - 1. 144 assert(row4x4 <= row4x4_end_); 145 assert(column4x4 <= column4x4_end_); 146 return row4x4 > row4x4_start_ && column4x4 > column4x4_start_; 147 } 148 IsBottomRightInside(int row4x4,int column4x4)149 bool IsBottomRightInside(int row4x4, int column4x4) const { 150 assert(row4x4 >= row4x4_start_); 151 assert(column4x4 >= column4x4_start_); 152 return row4x4 < row4x4_end_ && column4x4 < column4x4_end_; 153 } 154 BlockParametersAddress(int row4x4,int column4x4)155 BlockParameters** BlockParametersAddress(int row4x4, int column4x4) const { 156 return block_parameters_holder_.Address(row4x4, column4x4); 157 } 158 BlockParametersStride()159 int BlockParametersStride() const { 160 return block_parameters_holder_.columns4x4(); 161 } 162 163 // Returns true if Parameters() can be called with |row| and |column| as 164 // inputs, false otherwise. HasParameters(int row,int column)165 bool HasParameters(int row, int column) const { 166 return block_parameters_holder_.Find(row, column) != nullptr; 167 } Parameters(int row,int column)168 const BlockParameters& Parameters(int row, int column) const { 169 return *block_parameters_holder_.Find(row, column); 170 } number()171 int number() const { return number_; } superblock_rows()172 int superblock_rows() const { return superblock_rows_; } superblock_columns()173 int superblock_columns() const { return superblock_columns_; } 174 175 private: 176 Tile(int tile_number, const uint8_t* data, size_t size, 177 const ObuSequenceHeader& sequence_header, 178 const ObuFrameHeader& frame_header, RefCountedBuffer* current_frame, 179 const DecoderState& state, FrameScratchBuffer* frame_scratch_buffer, 180 const WedgeMaskArray& wedge_masks, 181 SymbolDecoderContext* saved_symbol_decoder_context, 182 const SegmentationMap* prev_segment_ids, PostFilter* post_filter, 183 BlockParametersHolder* block_parameters_holder, const dsp::Dsp* dsp, 184 ThreadPool* thread_pool, BlockingCounterWithStatus* pending_tiles, 185 bool frame_parallel, bool use_intra_prediction_buffer); 186 187 // Stores the transform tree state when reading variable size transform trees 188 // and when applying the transform tree. When applying the transform tree, 189 // |depth| is not used. 190 struct TransformTreeNode { 191 // The default constructor is invoked by the Stack<TransformTreeNode, n> 192 // constructor. Stack<> does not use the default-constructed elements, so it 193 // is safe for the default constructor to not initialize the members. 194 TransformTreeNode() = default; 195 TransformTreeNode(int x, int y, TransformSize tx_size, int depth = -1) xTransformTreeNode196 : x(x), y(y), tx_size(tx_size), depth(depth) {} 197 198 int x; 199 int y; 200 TransformSize tx_size; 201 int depth; 202 }; 203 204 // Parameters used to facilitate multi-threading within the Tile. 205 struct ThreadingParameters { 206 std::mutex mutex; 207 // Array2DView of size |superblock_rows_| by |superblock_columns_| 208 // containing the processing state of each superblock. The code in this 209 // class uses relative indexing of superblocks with respect to this Tile. 210 // The memory for this comes from the caller (the |super_block_state| 211 // parameter in the constructor). The memory is for the whole frame whereas 212 // the |sb_state| array in this struct points to the beginning of this Tile. 213 Array2DView<SuperBlockState> sb_state LIBGAV1_GUARDED_BY(mutex); 214 // Variable used to indicate either parse or decode failure. 215 bool abort LIBGAV1_GUARDED_BY(mutex) = false; 216 int pending_jobs LIBGAV1_GUARDED_BY(mutex) = 0; 217 std::condition_variable pending_jobs_zero_condvar; 218 }; 219 220 // The residual pointer is used to traverse the |residual_buffer_|. It is 221 // used in two different ways. 222 // If |split_parse_and_decode_| is true: 223 // The pointer points to the beginning of the |residual_buffer_| when the 224 // "parse" and "decode" steps begin. It is then moved forward tx_size in 225 // each iteration of the "parse" and the "decode" steps. In this case, the 226 // ResidualPtr variable passed into various functions starting from 227 // ProcessSuperBlock is used as an in/out parameter to keep track of the 228 // residual pointer. 229 // If |split_parse_and_decode_| is false: 230 // The pointer is reset to the beginning of the |residual_buffer_| for 231 // every transform block. 232 using ResidualPtr = uint8_t*; 233 234 // Performs member initializations that may fail. Helper function used by 235 // Create(). 236 LIBGAV1_MUST_USE_RESULT bool Init(); 237 238 // Saves the symbol decoder context of this tile into 239 // |saved_symbol_decoder_context_| if necessary. 240 void SaveSymbolDecoderContext(); 241 242 // Entry point for multi-threaded decoding. This function performs the same 243 // functionality as ParseAndDecode(). The current thread does the "parse" step 244 // while the worker threads do the "decode" step. 245 bool ThreadedParseAndDecode(); 246 247 // Returns whether or not the prerequisites for decoding the superblock at 248 // |row_index| and |column_index| are satisfied. |threading_.mutex| must be 249 // held when calling this function. 250 bool CanDecode(int row_index, int column_index) const; 251 252 // This function is run by the worker threads when multi-threaded decoding is 253 // enabled. Once a superblock is decoded, this function will set the 254 // corresponding |threading_.sb_state| entry to kSuperBlockStateDecoded. On 255 // failure, |threading_.abort| will be set to true. If at any point 256 // |threading_.abort| becomes true, this function will return as early as it 257 // can. If the decoding succeeds, this function will also schedule the 258 // decoding jobs for the superblock to the bottom-left and the superblock to 259 // the right of this superblock (if it is allowed). 260 void DecodeSuperBlock(int row_index, int column_index, int block_width4x4); 261 262 // If |use_intra_prediction_buffer_| is true, then this function copies the 263 // last row of the superblockrow starting at |row4x4| into the 264 // |intra_prediction_buffer_| (which may be used by the intra prediction 265 // process for the next superblock row). 266 void PopulateIntraPredictionBuffer(int row4x4); 267 268 uint16_t* GetPartitionCdf(int row4x4, int column4x4, BlockSize block_size); 269 bool ReadPartition(int row4x4, int column4x4, BlockSize block_size, 270 bool has_rows, bool has_columns, Partition* partition); 271 // Processes the Partition starting at |row4x4_start|, |column4x4_start| 272 // iteratively. It performs a DFS traversal over the partition tree to process 273 // the blocks in the right order. 274 bool ProcessPartition( 275 int row4x4_start, int column4x4_start, ParameterTree* root, 276 TileScratchBuffer* scratch_buffer, 277 ResidualPtr* residual); // Iterative implementation of 5.11.4. 278 bool ProcessBlock(int row4x4, int column4x4, BlockSize block_size, 279 ParameterTree* tree, TileScratchBuffer* scratch_buffer, 280 ResidualPtr* residual); // 5.11.5. 281 void ResetCdef(int row4x4, int column4x4); // 5.11.55. 282 283 // This function is used to decode a superblock when the parsing has already 284 // been done for that superblock. 285 bool DecodeSuperBlock(ParameterTree* tree, TileScratchBuffer* scratch_buffer, 286 ResidualPtr* residual); 287 // Helper function used by DecodeSuperBlock(). Note that the decode_block() 288 // function in the spec is equivalent to ProcessBlock() in the code. 289 bool DecodeBlock(ParameterTree* tree, TileScratchBuffer* scratch_buffer, 290 ResidualPtr* residual); 291 292 void ClearBlockDecoded(TileScratchBuffer* scratch_buffer, int row4x4, 293 int column4x4); // 5.11.3. 294 bool ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4, 295 TileScratchBuffer* scratch_buffer, 296 ProcessingMode mode); 297 void ResetLoopRestorationParams(); 298 void ReadLoopRestorationCoefficients(int row4x4, int column4x4, 299 BlockSize block_size); // 5.11.57. 300 // Build bit masks for vertical edges followed by horizontal edges. 301 // Traverse through each transform edge in the current coding block, and 302 // determine if a 4x4 edge needs filtering. If filtering is needed, determine 303 // filter length. Set corresponding bit mask to 1. 304 void BuildBitMask(const Block& block); 305 void BuildBitMaskHelper(const Block& block, int row4x4, int column4x4, 306 BlockSize block_size, bool is_vertical_block_border, 307 bool is_horizontal_block_border); 308 309 // Helper functions for DecodeBlock. 310 bool ReadSegmentId(const Block& block); // 5.11.9. 311 bool ReadIntraSegmentId(const Block& block); // 5.11.8. 312 void ReadSkip(const Block& block); // 5.11.11. 313 void ReadSkipMode(const Block& block); // 5.11.10. 314 void ReadCdef(const Block& block); // 5.11.56. 315 // Returns the new value. |cdf| is an array of size kDeltaSymbolCount + 1. 316 int ReadAndClipDelta(uint16_t* cdf, int delta_small, int scale, int min_value, 317 int max_value, int value); 318 void ReadQuantizerIndexDelta(const Block& block); // 5.11.12. 319 void ReadLoopFilterDelta(const Block& block); // 5.11.13. 320 // Populates |BlockParameters::deblock_filter_level| for the given |block| 321 // using |deblock_filter_levels_|. 322 void PopulateDeblockFilterLevel(const Block& block); 323 void ReadPredictionModeY(const Block& block, bool intra_y_mode); 324 void ReadIntraAngleInfo(const Block& block, 325 PlaneType plane_type); // 5.11.42 and 5.11.43. 326 void ReadPredictionModeUV(const Block& block); 327 void ReadCflAlpha(const Block& block); // 5.11.45. 328 int GetPaletteCache(const Block& block, PlaneType plane_type, 329 uint16_t* cache); 330 void ReadPaletteColors(const Block& block, Plane plane); 331 void ReadPaletteModeInfo(const Block& block); // 5.11.46. 332 void ReadFilterIntraModeInfo(const Block& block); // 5.11.24. 333 int ReadMotionVectorComponent(const Block& block, 334 int component); // 5.11.32. 335 void ReadMotionVector(const Block& block, int index); // 5.11.31. 336 bool DecodeIntraModeInfo(const Block& block); // 5.11.7. 337 int8_t ComputePredictedSegmentId(const Block& block) const; // 5.11.21. 338 bool ReadInterSegmentId(const Block& block, bool pre_skip); // 5.11.19. 339 void ReadIsInter(const Block& block); // 5.11.20. 340 bool ReadIntraBlockModeInfo(const Block& block, 341 bool intra_y_mode); // 5.11.22. 342 int GetUseCompoundReferenceContext(const Block& block); 343 CompoundReferenceType ReadCompoundReferenceType(const Block& block); 344 // Calculates count0 by calling block.CountReferences() on the frame types 345 // from type0_start to type0_end, inclusive, and summing the results. 346 // Calculates count1 by calling block.CountReferences() on the frame types 347 // from type1_start to type1_end, inclusive, and summing the results. 348 // Compares count0 with count1 and returns 0, 1 or 2. 349 // 350 // See count_refs and ref_count_ctx in 8.3.2. 351 int GetReferenceContext(const Block& block, ReferenceFrameType type0_start, 352 ReferenceFrameType type0_end, 353 ReferenceFrameType type1_start, 354 ReferenceFrameType type1_end) const; 355 template <bool is_single, bool is_backward, int index> 356 uint16_t* GetReferenceCdf(const Block& block, CompoundReferenceType type = 357 kNumCompoundReferenceTypes); 358 void ReadReferenceFrames(const Block& block); // 5.11.25. 359 void ReadInterPredictionModeY(const Block& block, 360 const MvContexts& mode_contexts); 361 void ReadRefMvIndex(const Block& block); 362 void ReadInterIntraMode(const Block& block, bool is_compound); // 5.11.28. 363 bool IsScaled(ReferenceFrameType type) const; // Part of 5.11.27. 364 void ReadMotionMode(const Block& block, bool is_compound); // 5.11.27. 365 uint16_t* GetIsExplicitCompoundTypeCdf(const Block& block); 366 uint16_t* GetIsCompoundTypeAverageCdf(const Block& block); 367 void ReadCompoundType(const Block& block, bool is_compound); // 5.11.29. 368 uint16_t* GetInterpolationFilterCdf(const Block& block, int direction); 369 void ReadInterpolationFilter(const Block& block); 370 bool ReadInterBlockModeInfo(const Block& block); // 5.11.23. 371 bool DecodeInterModeInfo(const Block& block); // 5.11.18. 372 bool DecodeModeInfo(const Block& block); // 5.11.6. 373 bool IsMvValid(const Block& block, bool is_compound) const; // 6.10.25. 374 bool AssignInterMv(const Block& block, bool is_compound); // 5.11.26. 375 bool AssignIntraMv(const Block& block); // 5.11.26. 376 int GetTopTransformWidth(const Block& block, int row4x4, int column4x4, 377 bool ignore_skip); 378 int GetLeftTransformHeight(const Block& block, int row4x4, int column4x4, 379 bool ignore_skip); 380 TransformSize ReadFixedTransformSize(const Block& block); // 5.11.15. 381 // Iterative implementation of 5.11.17. 382 void ReadVariableTransformTree(const Block& block, int row4x4, int column4x4, 383 TransformSize tx_size); 384 void DecodeTransformSize(const Block& block); // 5.11.16. 385 bool ComputePrediction(const Block& block); // 5.11.33. 386 // |x4| and |y4| are the column and row positions of the 4x4 block. |w4| and 387 // |h4| are the width and height in 4x4 units of |tx_size|. 388 int GetTransformAllZeroContext(const Block& block, Plane plane, 389 TransformSize tx_size, int x4, int y4, int w4, 390 int h4); 391 TransformSet GetTransformSet(TransformSize tx_size, 392 bool is_inter) const; // 5.11.48. 393 TransformType ComputeTransformType(const Block& block, Plane plane, 394 TransformSize tx_size, int block_x, 395 int block_y); // 5.11.40. 396 void ReadTransformType(const Block& block, int x4, int y4, 397 TransformSize tx_size); // 5.11.47. 398 int GetCoeffBaseContextEob(TransformSize tx_size, int index); 399 int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos, 400 TransformClass tx_class); 401 template <typename ResidualType> 402 void ReadCoeffBase2D( 403 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size, 404 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob, 405 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 406 ResidualType* quantized_buffer); 407 template <typename ResidualType> 408 void ReadCoeffBaseHorizontal( 409 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size, 410 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob, 411 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 412 ResidualType* quantized_buffer); 413 template <typename ResidualType> 414 void ReadCoeffBaseVertical( 415 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size, 416 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob, 417 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1], 418 ResidualType* quantized_buffer); 419 int GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane); 420 void SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane, 421 uint8_t coefficient_level, int8_t dc_category); 422 void InterIntraPrediction( 423 uint16_t* prediction_0, const uint8_t* prediction_mask, 424 ptrdiff_t prediction_mask_stride, 425 const PredictionParameters& prediction_parameters, int prediction_width, 426 int prediction_height, int subsampling_x, int subsampling_y, 427 uint8_t* dest, 428 ptrdiff_t dest_stride); // Part of section 7.11.3.1 in the spec. 429 void CompoundInterPrediction( 430 const Block& block, const uint8_t* prediction_mask, 431 ptrdiff_t prediction_mask_stride, int prediction_width, 432 int prediction_height, int subsampling_x, int subsampling_y, 433 int candidate_row, int candidate_column, uint8_t* dest, 434 ptrdiff_t dest_stride); // Part of section 7.11.3.1 in the spec. 435 GlobalMotion* GetWarpParams(const Block& block, Plane plane, 436 int prediction_width, int prediction_height, 437 const PredictionParameters& prediction_parameters, 438 ReferenceFrameType reference_type, 439 bool* is_local_valid, 440 GlobalMotion* global_motion_params, 441 GlobalMotion* local_warp_params) 442 const; // Part of section 7.11.3.1 in the spec. 443 bool InterPrediction(const Block& block, Plane plane, int x, int y, 444 int prediction_width, int prediction_height, 445 int candidate_row, int candidate_column, 446 bool* is_local_valid, 447 GlobalMotion* local_warp_params); // 7.11.3.1. 448 void ScaleMotionVector(const MotionVector& mv, Plane plane, 449 int reference_frame_index, int x, int y, int* start_x, 450 int* start_y, int* step_x, int* step_y); // 7.11.3.3. 451 // If the method returns false, the caller only uses the output parameters 452 // *ref_block_start_x and *ref_block_start_y. If the method returns true, the 453 // caller uses all three output parameters. 454 static bool GetReferenceBlockPosition( 455 int reference_frame_index, bool is_scaled, int width, int height, 456 int ref_start_x, int ref_last_x, int ref_start_y, int ref_last_y, 457 int start_x, int start_y, int step_x, int step_y, int left_border, 458 int right_border, int top_border, int bottom_border, 459 int* ref_block_start_x, int* ref_block_start_y, int* ref_block_end_x); 460 461 template <typename Pixel> 462 void BuildConvolveBlock(Plane plane, int reference_frame_index, 463 bool is_scaled, int height, int ref_start_x, 464 int ref_last_x, int ref_start_y, int ref_last_y, 465 int step_y, int ref_block_start_x, 466 int ref_block_end_x, int ref_block_start_y, 467 uint8_t* block_buffer, 468 ptrdiff_t convolve_buffer_stride, 469 ptrdiff_t block_extended_width); 470 bool BlockInterPrediction(const Block& block, Plane plane, 471 int reference_frame_index, const MotionVector& mv, 472 int x, int y, int width, int height, 473 int candidate_row, int candidate_column, 474 uint16_t* prediction, bool is_compound, 475 bool is_inter_intra, uint8_t* dest, 476 ptrdiff_t dest_stride); // 7.11.3.4. 477 bool BlockWarpProcess(const Block& block, Plane plane, int index, 478 int block_start_x, int block_start_y, int width, 479 int height, GlobalMotion* warp_params, bool is_compound, 480 bool is_inter_intra, uint8_t* dest, 481 ptrdiff_t dest_stride); // 7.11.3.5. 482 bool ObmcBlockPrediction(const Block& block, const MotionVector& mv, 483 Plane plane, int reference_frame_index, int width, 484 int height, int x, int y, int candidate_row, 485 int candidate_column, 486 ObmcDirection blending_direction); 487 bool ObmcPrediction(const Block& block, Plane plane, int width, 488 int height); // 7.11.3.9. 489 void DistanceWeightedPrediction(void* prediction_0, void* prediction_1, 490 int width, int height, int candidate_row, 491 int candidate_column, uint8_t* dest, 492 ptrdiff_t dest_stride); // 7.11.3.15. 493 // This function specializes the parsing of DC coefficient by removing some of 494 // the branches when i == 0 (since scan[0] is always 0 and scan[i] is always 495 // non-zero for all other possible values of i). |dc_category| is an output 496 // parameter that is populated when |is_dc_coefficient| is true. 497 // |coefficient_level| is an output parameter which accumulates the 498 // coefficient level. 499 template <typename ResidualType, bool is_dc_coefficient> 500 LIBGAV1_ALWAYS_INLINE bool ReadSignAndApplyDequantization( 501 const uint16_t* scan, int i, int q_value, const uint8_t* quantizer_matrix, 502 int shift, int max_value, uint16_t* dc_sign_cdf, int8_t* dc_category, 503 int* coefficient_level, 504 ResidualType* residual_buffer); // Part of 5.11.39. 505 int ReadCoeffBaseRange(int clamped_tx_size_context, int cdf_context, 506 int plane_type); // Part of 5.11.39. 507 // Returns the number of non-zero coefficients that were read. |tx_type| is an 508 // output parameter that stores the computed transform type for the plane 509 // whose coefficients were read. Returns -1 on failure. 510 template <typename ResidualType> 511 int ReadTransformCoefficients(const Block& block, Plane plane, int start_x, 512 int start_y, TransformSize tx_size, 513 TransformType* tx_type); // 5.11.39. 514 bool TransformBlock(const Block& block, Plane plane, int base_x, int base_y, 515 TransformSize tx_size, int x, int y, 516 ProcessingMode mode); // 5.11.35. 517 // Iterative implementation of 5.11.36. 518 bool TransformTree(const Block& block, int start_x, int start_y, 519 BlockSize plane_size, ProcessingMode mode); 520 void ReconstructBlock(const Block& block, Plane plane, int start_x, 521 int start_y, TransformSize tx_size, 522 TransformType tx_type, 523 int non_zero_coeff_count); // Part of 7.12.3. 524 bool Residual(const Block& block, ProcessingMode mode); // 5.11.34. 525 // part of 5.11.5 (reset_block_context() in the spec). 526 void ResetEntropyContext(const Block& block); 527 // Populates the |color_context| and |color_order| for the |i|th iteration 528 // with entries counting down from |start| to |end| (|start| > |end|). 529 void PopulatePaletteColorContexts( 530 const Block& block, PlaneType plane_type, int i, int start, int end, 531 uint8_t color_order[kMaxPaletteSquare][kMaxPaletteSize], 532 uint8_t color_context[kMaxPaletteSquare]); // 5.11.50. 533 bool ReadPaletteTokens(const Block& block); // 5.11.49. 534 template <typename Pixel> 535 void IntraPrediction(const Block& block, Plane plane, int x, int y, 536 bool has_left, bool has_top, bool has_top_right, 537 bool has_bottom_left, PredictionMode mode, 538 TransformSize tx_size); 539 bool IsSmoothPrediction(int row, int column, Plane plane) const; 540 int GetIntraEdgeFilterType(const Block& block, 541 Plane plane) const; // 7.11.2.8. 542 template <typename Pixel> 543 void DirectionalPrediction(const Block& block, Plane plane, int x, int y, 544 bool has_left, bool has_top, bool needs_left, 545 bool needs_top, int prediction_angle, int width, 546 int height, int max_x, int max_y, 547 TransformSize tx_size, Pixel* top_row, 548 Pixel* left_column); // 7.11.2.4. 549 template <typename Pixel> 550 void PalettePrediction(const Block& block, Plane plane, int start_x, 551 int start_y, int x, int y, 552 TransformSize tx_size); // 7.11.4. 553 template <typename Pixel> 554 void ChromaFromLumaPrediction(const Block& block, Plane plane, int start_x, 555 int start_y, 556 TransformSize tx_size); // 7.11.5. 557 // Section 7.19. Applies some filtering and reordering to the motion vectors 558 // for the given |block| and stores them into |current_frame_|. 559 void StoreMotionFieldMvsIntoCurrentFrame(const Block& block); 560 561 // Returns the zero-based index of the super block that contains |row4x4| 562 // relative to the start of this tile. SuperBlockRowIndex(int row4x4)563 int SuperBlockRowIndex(int row4x4) const { 564 return (row4x4 - row4x4_start_) >> 565 (sequence_header_.use_128x128_superblock ? 5 : 4); 566 } 567 568 // Returns the zero-based index of the super block that contains |column4x4| 569 // relative to the start of this tile. SuperBlockColumnIndex(int column4x4)570 int SuperBlockColumnIndex(int column4x4) const { 571 return (column4x4 - column4x4_start_) >> 572 (sequence_header_.use_128x128_superblock ? 5 : 4); 573 } 574 SuperBlockSize()575 BlockSize SuperBlockSize() const { 576 return sequence_header_.use_128x128_superblock ? kBlock128x128 577 : kBlock64x64; 578 } PlaneCount()579 int PlaneCount() const { 580 return sequence_header_.color_config.is_monochrome ? kMaxPlanesMonochrome 581 : kMaxPlanes; 582 } 583 584 const int number_; 585 int row_; 586 int column_; 587 const uint8_t* const data_; 588 size_t size_; 589 int row4x4_start_; 590 int row4x4_end_; 591 int column4x4_start_; 592 int column4x4_end_; 593 int superblock_rows_; 594 int superblock_columns_; 595 bool read_deltas_; 596 const int8_t subsampling_x_[kMaxPlanes]; 597 const int8_t subsampling_y_[kMaxPlanes]; 598 int deblock_row_limit_[kMaxPlanes]; 599 int deblock_column_limit_[kMaxPlanes]; 600 601 // The dimensions (in order) are: segment_id, level_index (based on plane and 602 // direction), reference_frame and mode_id. 603 uint8_t deblock_filter_levels_[kMaxSegments][kFrameLfCount] 604 [kNumReferenceFrameTypes][2]; 605 606 // current_quantizer_index_ is in the range [0, 255]. 607 uint8_t current_quantizer_index_; 608 // These two arrays (|coefficient_levels_| and |dc_categories_|) are used to 609 // store the entropy context. Their dimensions are as follows: First - 610 // left/top; Second - plane; Third - row4x4 (if first dimension is 611 // left)/column4x4 (if first dimension is top). 612 // 613 // This is equivalent to the LeftLevelContext and AboveLevelContext arrays in 614 // the spec. In the spec, it stores values from 0 through 63 (inclusive). The 615 // stored values are used to compute the left and top contexts in 616 // GetTransformAllZeroContext. In that function, we only care about the 617 // following values: 0, 1, 2, 3 and >= 4. So instead of clamping to 63, we 618 // clamp to 4 (i.e.) all the values greater than 4 are stored as 4. 619 std::array<Array2D<uint8_t>, 2> coefficient_levels_; 620 // This is equivalent to the LeftDcContext and AboveDcContext arrays in the 621 // spec. In the spec, it can store 3 possible values: 0, 1 and 2 (where 1 622 // means the value is < 0, 2 means the value is > 0 and 0 means the value is 623 // equal to 0). 624 // 625 // The stored values are used in two places: 626 // * GetTransformAllZeroContext: Here, we only care about whether the 627 // value is 0 or not (whether it is 1 or 2 is irrelevant). 628 // * GetDcSignContext: Here, we do the following computation: if the 629 // stored value is 1, we decrement a counter. If the stored value is 2 630 // we increment a counter. 631 // 632 // Based on this usage, we can simply replace 1 with -1 and 2 with 1 and 633 // use that value to compute the counter. 634 // 635 // The usage on GetTransformAllZeroContext is unaffected since there we 636 // only care about whether it is 0 or not. 637 std::array<Array2D<int8_t>, 2> dc_categories_; 638 const ObuSequenceHeader& sequence_header_; 639 const ObuFrameHeader& frame_header_; 640 const std::array<bool, kNumReferenceFrameTypes>& reference_frame_sign_bias_; 641 const std::array<RefCountedBufferPtr, kNumReferenceFrameTypes>& 642 reference_frames_; 643 TemporalMotionField& motion_field_; 644 const std::array<uint8_t, kNumReferenceFrameTypes>& reference_order_hint_; 645 const WedgeMaskArray& wedge_masks_; 646 DaalaBitReader reader_; 647 SymbolDecoderContext symbol_decoder_context_; 648 SymbolDecoderContext* const saved_symbol_decoder_context_; 649 const SegmentationMap* prev_segment_ids_; 650 const dsp::Dsp& dsp_; 651 PostFilter& post_filter_; 652 BlockParametersHolder& block_parameters_holder_; 653 Quantizer quantizer_; 654 // When there is no multi-threading within the Tile, |residual_buffer_| is 655 // used. When there is multi-threading within the Tile, 656 // |residual_buffer_threaded_| is used. In the following comment, 657 // |residual_buffer| refers to either |residual_buffer_| or 658 // |residual_buffer_threaded_| depending on whether multi-threading is enabled 659 // within the Tile or not. 660 // The |residual_buffer| is used to help with the dequantization and the 661 // inverse transform processes. It is declared as a uint8_t, but is always 662 // accessed either as an int16_t or int32_t depending on |bitdepth|. Here is 663 // what it stores at various stages of the decoding process (in the order 664 // which they happen): 665 // 1) In ReadTransformCoefficients(), this buffer is used to store the 666 // dequantized values. 667 // 2) In Reconstruct(), this buffer is used as the input to the row 668 // transform process. 669 // The size of this buffer would be: 670 // For |residual_buffer_|: (4096 + 32 * |kResidualPaddingVertical|) * 671 // |residual_size_|. Where 4096 = 64x64 which is the maximum transform 672 // size, and 32 * |kResidualPaddingVertical| is the padding to avoid 673 // bottom boundary checks when parsing quantized coefficients. This 674 // memory is allocated and owned by the Tile class. 675 // For |residual_buffer_threaded_|: See the comment below. This memory is 676 // not allocated or owned by the Tile class. 677 AlignedUniquePtr<uint8_t> residual_buffer_; 678 // This is a 2d array of pointers of size |superblock_rows_| by 679 // |superblock_columns_| where each pointer points to a ResidualBuffer for a 680 // single super block. The array is populated when the parsing process begins 681 // by calling |residual_buffer_pool_->Get()| and the memory is released back 682 // to the pool by calling |residual_buffer_pool_->Release()| when the decoding 683 // process is complete. 684 Array2D<std::unique_ptr<ResidualBuffer>> residual_buffer_threaded_; 685 // sizeof(int16_t or int32_t) depending on |bitdepth|. 686 const size_t residual_size_; 687 // Number of superblocks on the top-right that will have to be decoded before 688 // the current superblock can be decoded. This will be 1 if allow_intrabc is 689 // false. If allow_intrabc is true, then this value will be 690 // use_128x128_superblock ? 3 : 5. This is the allowed range of reference for 691 // the top rows for intrabc. 692 const int intra_block_copy_lag_; 693 694 // In the Tile class, we use the "current_frame" in two ways: 695 // 1) To write the decoded output into (using the |buffer_| view). 696 // 2) To read the pixels for intra block copy (using the |current_frame_| 697 // reference). 698 // 699 // When intra block copy is off, |buffer_| and |current_frame_| may or may not 700 // point to the same plane pointers. But it is okay since |current_frame_| is 701 // never used in this case. 702 // 703 // When intra block copy is on, |buffer_| and |current_frame_| always point to 704 // the same plane pointers (since post filtering is disabled). So the usage in 705 // both case 1 and case 2 remain valid. 706 Array2DView<uint8_t> buffer_[kMaxPlanes]; 707 RefCountedBuffer& current_frame_; 708 709 Array2D<int16_t>& cdef_index_; 710 Array2D<TransformSize>& inter_transform_sizes_; 711 std::array<RestorationUnitInfo, kMaxPlanes> reference_unit_info_; 712 // If |thread_pool_| is nullptr, the calling thread will do the parsing and 713 // the decoding in one pass. If |thread_pool_| is not nullptr, then the main 714 // thread will do the parsing while the thread pool workers will do the 715 // decoding. 716 ThreadPool* const thread_pool_; 717 ThreadingParameters threading_; 718 ResidualBufferPool* const residual_buffer_pool_; 719 TileScratchBufferPool* const tile_scratch_buffer_pool_; 720 BlockingCounterWithStatus* const pending_tiles_; 721 bool split_parse_and_decode_; 722 // This is used only when |split_parse_and_decode_| is false. 723 std::unique_ptr<PredictionParameters> prediction_parameters_ = nullptr; 724 // Stores the |transform_type| for the super block being decoded at a 4x4 725 // granularity. The spec uses absolute indices for this array but it is 726 // sufficient to use indices relative to the super block being decoded. 727 TransformType transform_types_[32][32]; 728 // delta_lf_[i] is in the range [-63, 63]. 729 int8_t delta_lf_[kFrameLfCount]; 730 // True if all the values in |delta_lf_| are zero. False otherwise. 731 bool delta_lf_all_zero_; 732 bool build_bit_mask_when_parsing_; 733 const bool frame_parallel_; 734 const bool use_intra_prediction_buffer_; 735 // Buffer used to store the unfiltered pixels that are necessary for decoding 736 // the next superblock row (for the intra prediction process). Used only if 737 // |use_intra_prediction_buffer_| is true. 738 std::array<AlignedDynamicBuffer<uint8_t, kMaxAlignment>, kMaxPlanes> 739 intra_prediction_buffer_; 740 }; 741 742 struct Tile::Block { BlockBlock743 Block(const Tile& tile, BlockSize size, int row4x4, int column4x4, 744 TileScratchBuffer* const scratch_buffer, ResidualPtr* residual) 745 : tile(tile), 746 size(size), 747 row4x4(row4x4), 748 column4x4(column4x4), 749 width(kBlockWidthPixels[size]), 750 height(kBlockHeightPixels[size]), 751 width4x4(width >> 2), 752 height4x4(height >> 2), 753 scratch_buffer(scratch_buffer), 754 residual(residual) { 755 assert(size != kBlockInvalid); 756 residual_size[kPlaneY] = kPlaneResidualSize[size][0][0]; 757 residual_size[kPlaneU] = residual_size[kPlaneV] = 758 kPlaneResidualSize[size][tile.subsampling_x_[kPlaneU]] 759 [tile.subsampling_y_[kPlaneU]]; 760 assert(residual_size[kPlaneY] != kBlockInvalid); 761 if (tile.PlaneCount() > 1) { 762 assert(residual_size[kPlaneU] != kBlockInvalid); 763 } 764 if ((row4x4 & 1) == 0 && 765 (tile.sequence_header_.color_config.subsampling_y & height4x4) == 1) { 766 has_chroma = false; 767 } else if ((column4x4 & 1) == 0 && 768 (tile.sequence_header_.color_config.subsampling_x & width4x4) == 769 1) { 770 has_chroma = false; 771 } else { 772 has_chroma = !tile.sequence_header_.color_config.is_monochrome; 773 } 774 top_available[kPlaneY] = tile.IsTopInside(row4x4); 775 left_available[kPlaneY] = tile.IsLeftInside(column4x4); 776 if (has_chroma) { 777 // top_available[kPlaneU] and top_available[kPlaneV] are valid only if 778 // has_chroma is true. 779 // The next 3 lines are equivalent to: 780 // top_available[kPlaneU] = top_available[kPlaneV] = 781 // top_available[kPlaneY] && 782 // ((tile.sequence_header_.color_config.subsampling_y & height4x4) == 783 // 0 || tile.IsTopInside(row4x4 - 1)); 784 top_available[kPlaneU] = top_available[kPlaneV] = tile.IsTopInside( 785 row4x4 - 786 (tile.sequence_header_.color_config.subsampling_y & height4x4)); 787 // left_available[kPlaneU] and left_available[kPlaneV] are valid only if 788 // has_chroma is true. 789 // The next 3 lines are equivalent to: 790 // left_available[kPlaneU] = left_available[kPlaneV] = 791 // left_available[kPlaneY] && 792 // ((tile.sequence_header_.color_config.subsampling_x & width4x4) == 0 793 // || tile.IsLeftInside(column4x4 - 1)); 794 left_available[kPlaneU] = left_available[kPlaneV] = tile.IsLeftInside( 795 column4x4 - 796 (tile.sequence_header_.color_config.subsampling_x & width4x4)); 797 } 798 const ptrdiff_t stride = tile.BlockParametersStride(); 799 BlockParameters** const bps = 800 tile.BlockParametersAddress(row4x4, column4x4); 801 bp = *bps; 802 // bp_top is valid only if top_available[kPlaneY] is true. 803 if (top_available[kPlaneY]) { 804 bp_top = *(bps - stride); 805 } 806 // bp_left is valid only if left_available[kPlaneY] is true. 807 if (left_available[kPlaneY]) { 808 bp_left = *(bps - 1); 809 } 810 } 811 HasChromaBlock812 bool HasChroma() const { return has_chroma; } 813 814 // These return values of these group of functions are valid only if the 815 // corresponding top_available or left_available is true. TopReferenceBlock816 ReferenceFrameType TopReference(int index) const { 817 return bp_top->reference_frame[index]; 818 } 819 LeftReferenceBlock820 ReferenceFrameType LeftReference(int index) const { 821 return bp_left->reference_frame[index]; 822 } 823 IsTopIntraBlock824 bool IsTopIntra() const { return TopReference(0) <= kReferenceFrameIntra; } IsLeftIntraBlock825 bool IsLeftIntra() const { return LeftReference(0) <= kReferenceFrameIntra; } 826 IsTopSingleBlock827 bool IsTopSingle() const { return TopReference(1) <= kReferenceFrameIntra; } IsLeftSingleBlock828 bool IsLeftSingle() const { return LeftReference(1) <= kReferenceFrameIntra; } 829 CountReferencesBlock830 int CountReferences(ReferenceFrameType type) const { 831 return static_cast<int>(top_available[kPlaneY] && 832 bp_top->reference_frame[0] == type) + 833 static_cast<int>(top_available[kPlaneY] && 834 bp_top->reference_frame[1] == type) + 835 static_cast<int>(left_available[kPlaneY] && 836 bp_left->reference_frame[0] == type) + 837 static_cast<int>(left_available[kPlaneY] && 838 bp_left->reference_frame[1] == type); 839 } 840 841 // 7.10.3. 842 // Checks if there are any inter blocks to the left or above. If so, it 843 // returns true indicating that the block has neighbors that are suitable for 844 // use by overlapped motion compensation. HasOverlappableCandidatesBlock845 bool HasOverlappableCandidates() const { 846 const ptrdiff_t stride = tile.BlockParametersStride(); 847 BlockParameters** const bps = tile.BlockParametersAddress(0, 0); 848 if (top_available[kPlaneY]) { 849 BlockParameters** bps_top = bps + (row4x4 - 1) * stride + (column4x4 | 1); 850 const int columns = std::min(tile.frame_header_.columns4x4 - column4x4, 851 static_cast<int>(width4x4)); 852 BlockParameters** const bps_top_end = bps_top + columns; 853 do { 854 if ((*bps_top)->reference_frame[0] > kReferenceFrameIntra) { 855 return true; 856 } 857 bps_top += 2; 858 } while (bps_top < bps_top_end); 859 } 860 if (left_available[kPlaneY]) { 861 BlockParameters** bps_left = bps + (row4x4 | 1) * stride + column4x4 - 1; 862 const int rows = std::min(tile.frame_header_.rows4x4 - row4x4, 863 static_cast<int>(height4x4)); 864 BlockParameters** const bps_left_end = bps_left + rows * stride; 865 do { 866 if ((*bps_left)->reference_frame[0] > kReferenceFrameIntra) { 867 return true; 868 } 869 bps_left += 2 * stride; 870 } while (bps_left < bps_left_end); 871 } 872 return false; 873 } 874 875 const Tile& tile; 876 bool has_chroma; 877 const BlockSize size; 878 bool top_available[kMaxPlanes]; 879 bool left_available[kMaxPlanes]; 880 BlockSize residual_size[kMaxPlanes]; 881 const int row4x4; 882 const int column4x4; 883 const int width; 884 const int height; 885 const int width4x4; 886 const int height4x4; 887 const BlockParameters* bp_top; 888 const BlockParameters* bp_left; 889 BlockParameters* bp; 890 TileScratchBuffer* const scratch_buffer; 891 ResidualPtr* const residual; 892 }; 893 894 } // namespace libgav1 895 896 #endif // LIBGAV1_SRC_TILE_H_ 897