1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/tile.h"
16
17 #include <algorithm>
18 #include <array>
19 #include <cassert>
20 #include <climits>
21 #include <cstdlib>
22 #include <cstring>
23 #include <memory>
24 #include <new>
25 #include <numeric>
26 #include <type_traits>
27 #include <utility>
28
29 #include "src/frame_scratch_buffer.h"
30 #include "src/motion_vector.h"
31 #include "src/reconstruction.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/common.h"
34 #include "src/utils/constants.h"
35 #include "src/utils/logging.h"
36 #include "src/utils/segmentation.h"
37 #include "src/utils/stack.h"
38
39 namespace libgav1 {
40 namespace {
41
42 // Import all the constants in the anonymous namespace.
43 #include "src/quantizer_tables.inc"
44 #include "src/scan_tables.inc"
45
46 // Precision bits when scaling reference frames.
47 constexpr int kReferenceScaleShift = 14;
48 // Range above kNumQuantizerBaseLevels which the exponential golomb coding
49 // process is activated.
50 constexpr int kQuantizerCoefficientBaseRange = 12;
51 constexpr int kNumQuantizerBaseLevels = 2;
52 constexpr int kCoeffBaseRangeMaxIterations =
53 kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1);
54 constexpr int kEntropyContextLeft = 0;
55 constexpr int kEntropyContextTop = 1;
56
57 constexpr uint8_t kAllZeroContextsByTopLeft[5][5] = {{1, 2, 2, 2, 3},
58 {2, 4, 4, 4, 5},
59 {2, 4, 4, 4, 5},
60 {2, 4, 4, 4, 5},
61 {3, 5, 5, 5, 6}};
62
63 // The space complexity of DFS is O(branching_factor * max_depth). For the
64 // parameter tree, branching_factor = 4 (there could be up to 4 children for
65 // every node) and max_depth (excluding the root) = 5 (to go from a 128x128
66 // block all the way to a 4x4 block). The worse-case stack size is 16, by
67 // counting the number of 'o' nodes in the diagram:
68 //
69 // | 128x128 The highest level (corresponding to the
70 // | root of the tree) has no node in the stack.
71 // |-----------------+
72 // | | | |
73 // | o o o 64x64
74 // |
75 // |-----------------+
76 // | | | |
77 // | o o o 32x32 Higher levels have three nodes in the stack,
78 // | because we pop one node off the stack before
79 // |-----------------+ pushing its four children onto the stack.
80 // | | | |
81 // | o o o 16x16
82 // |
83 // |-----------------+
84 // | | | |
85 // | o o o 8x8
86 // |
87 // |-----------------+
88 // | | | |
89 // o o o o 4x4 Only the lowest level has four nodes in the
90 // stack.
91 constexpr int kDfsStackSize = 16;
92
93 // Mask indicating whether the transform sets contain a particular transform
94 // type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set.
95 constexpr BitMaskSet kTransformTypeInSetMask[kNumTransformSets] = {
96 BitMaskSet(0x1), BitMaskSet(0xE0F), BitMaskSet(0x20F),
97 BitMaskSet(0xFFFF), BitMaskSet(0xFFF), BitMaskSet(0x201)};
98
99 constexpr PredictionMode
100 kFilterIntraModeToIntraPredictor[kNumFilterIntraPredictors] = {
101 kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
102 kPredictionModeD157, kPredictionModeDc};
103
104 // Mask used to determine the index for mode_deltas lookup.
105 constexpr BitMaskSet kPredictionModeDeltasMask(
106 kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv,
107 kPredictionModeNearestNearestMv, kPredictionModeNearNearMv,
108 kPredictionModeNearestNewMv, kPredictionModeNewNearestMv,
109 kPredictionModeNearNewMv, kPredictionModeNewNearMv,
110 kPredictionModeNewNewMv);
111
112 // This is computed as:
113 // min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4.
114 constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = {
115 0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 5, 4, 5, 6, 6, 5, 6, 6};
116
117 /* clang-format off */
118 constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = {
119 {{0, 1, 6, 6, 0}, {1, 6, 6, 21, 0}, {6, 6, 21, 21, 0}, {6, 21, 21, 21, 0},
120 {0, 0, 0, 0, 0}},
121 {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
122 {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
123 {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
124 {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
125 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
126 {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
127 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
128 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
129 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
130 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
131 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
132 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
133 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
134 {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
135 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
136 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
137 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
138 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
139 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
140 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
141 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
142 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
143 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
144 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
145 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
146 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
147 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
148 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
149 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
150 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
151 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
152 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
153 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
154 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
155 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
156 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}};
157 /* clang-format on */
158
159 // Extended the table size from 3 to 16 by repeating the last element to avoid
160 // the clips to row or column indices.
161 constexpr uint8_t kCoeffBasePositionContextOffset[16] = {
162 26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
163
164 constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = {
165 kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
166 kPredictionModeSmooth};
167
168 // Number of horizontal luma samples before intra block copy can be used.
169 constexpr int kIntraBlockCopyDelayPixels = 256;
170 // Number of 64 by 64 blocks before intra block copy can be used.
171 constexpr int kIntraBlockCopyDelay64x64Blocks = kIntraBlockCopyDelayPixels / 64;
172
173 // Index [i][j] corresponds to the transform size of width 1 << (i + 2) and
174 // height 1 << (j + 2).
175 constexpr TransformSize k4x4SizeToTransformSize[5][5] = {
176 {kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
177 kNumTransformSizes, kNumTransformSizes},
178 {kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
179 kTransformSize8x32, kNumTransformSizes},
180 {kTransformSize16x4, kTransformSize16x8, kTransformSize16x16,
181 kTransformSize16x32, kTransformSize16x64},
182 {kNumTransformSizes, kTransformSize32x8, kTransformSize32x16,
183 kTransformSize32x32, kTransformSize32x64},
184 {kNumTransformSizes, kNumTransformSizes, kTransformSize64x16,
185 kTransformSize64x32, kTransformSize64x64}};
186
187 // Defined in section 9.3 of the spec.
188 constexpr TransformType kModeToTransformType[kIntraPredictionModesUV] = {
189 kTransformTypeDctDct, kTransformTypeDctAdst, kTransformTypeAdstDct,
190 kTransformTypeDctDct, kTransformTypeAdstAdst, kTransformTypeDctAdst,
191 kTransformTypeAdstDct, kTransformTypeAdstDct, kTransformTypeDctAdst,
192 kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct,
193 kTransformTypeAdstAdst, kTransformTypeDctDct};
194
195 // Defined in section 5.11.47 of the spec. This array does not contain an entry
196 // for kTransformSetDctOnly, so the first dimension needs to be
197 // |kNumTransformSets| - 1.
198 constexpr TransformType kInverseTransformTypeBySet[kNumTransformSets - 1][16] =
199 {{kTransformTypeIdentityIdentity, kTransformTypeDctDct,
200 kTransformTypeIdentityDct, kTransformTypeDctIdentity,
201 kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
202 {kTransformTypeIdentityIdentity, kTransformTypeDctDct,
203 kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
204 {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
205 kTransformTypeDctIdentity, kTransformTypeIdentityAdst,
206 kTransformTypeAdstIdentity, kTransformTypeIdentityFlipadst,
207 kTransformTypeFlipadstIdentity, kTransformTypeDctDct,
208 kTransformTypeDctAdst, kTransformTypeAdstDct, kTransformTypeDctFlipadst,
209 kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
210 kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
211 kTransformTypeAdstFlipadst},
212 {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
213 kTransformTypeDctIdentity, kTransformTypeDctDct, kTransformTypeDctAdst,
214 kTransformTypeAdstDct, kTransformTypeDctFlipadst,
215 kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
216 kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
217 kTransformTypeAdstFlipadst},
218 {kTransformTypeIdentityIdentity, kTransformTypeDctDct}};
219
220 // Replaces all occurrences of 64x* and *x64 with 32x* and *x32 respectively.
221 constexpr TransformSize kAdjustedTransformSize[kNumTransformSizes] = {
222 kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
223 kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
224 kTransformSize8x32, kTransformSize16x4, kTransformSize16x8,
225 kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
226 kTransformSize32x8, kTransformSize32x16, kTransformSize32x32,
227 kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
228 kTransformSize32x32};
229
230 // This is the same as Max_Tx_Size_Rect array in the spec but with *x64 and 64*x
231 // transforms replaced with *x32 and 32x* respectively.
232 constexpr TransformSize kUVTransformSize[kMaxBlockSizes] = {
233 kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
234 kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
235 kTransformSize8x32, kTransformSize16x4, kTransformSize16x8,
236 kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
237 kTransformSize32x8, kTransformSize32x16, kTransformSize32x32,
238 kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
239 kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
240 kTransformSize32x32};
241
242 // ith entry of this array is computed as:
243 // DivideBy2(TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[i]) +
244 // TransformSizeToSquareTransformIndex(kTransformSizeSquareMax[i]) +
245 // 1)
246 constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = {
247 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4};
248
249 constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31};
250
251 constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15};
252
253 // Maps compound prediction modes into single modes. For e.g.
254 // kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0
255 // and kPredictionModeNewMv for index 1. It is used to simplify the logic in
256 // AssignMv (and avoid duplicate code). This is section 5.11.30. in the spec.
257 constexpr PredictionMode
258 kCompoundToSinglePredictionMode[kNumCompoundInterPredictionModes][2] = {
259 {kPredictionModeNearestMv, kPredictionModeNearestMv},
260 {kPredictionModeNearMv, kPredictionModeNearMv},
261 {kPredictionModeNearestMv, kPredictionModeNewMv},
262 {kPredictionModeNewMv, kPredictionModeNearestMv},
263 {kPredictionModeNearMv, kPredictionModeNewMv},
264 {kPredictionModeNewMv, kPredictionModeNearMv},
265 {kPredictionModeGlobalMv, kPredictionModeGlobalMv},
266 {kPredictionModeNewMv, kPredictionModeNewMv},
267 };
GetSinglePredictionMode(int index,PredictionMode y_mode)268 PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) {
269 if (y_mode < kPredictionModeNearestNearestMv) {
270 return y_mode;
271 }
272 const int lookup_index = y_mode - kPredictionModeNearestNearestMv;
273 assert(lookup_index >= 0);
274 return kCompoundToSinglePredictionMode[lookup_index][index];
275 }
276
277 // log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because
278 // dqDenom is always a power of two and hence right shift can be used instead of
279 // division.
280 constexpr uint8_t kQuantizationShift[kNumTransformSizes] = {
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2};
282
283 // Returns the minimum of |length| or |max|-|start|. This is used to clamp array
284 // indices when accessing arrays whose bound is equal to |max|.
GetNumElements(int length,int start,int max)285 int GetNumElements(int length, int start, int max) {
286 return std::min(length, max - start);
287 }
288
289 template <typename T>
SetBlockValues(int rows,int columns,T value,T * dst,ptrdiff_t stride)290 void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
291 // Specialize all columns cases (values in kTransformWidth4x4[]) for better
292 // performance.
293 switch (columns) {
294 case 1:
295 MemSetBlock<T>(rows, 1, value, dst, stride);
296 break;
297 case 2:
298 MemSetBlock<T>(rows, 2, value, dst, stride);
299 break;
300 case 4:
301 MemSetBlock<T>(rows, 4, value, dst, stride);
302 break;
303 case 8:
304 MemSetBlock<T>(rows, 8, value, dst, stride);
305 break;
306 default:
307 assert(columns == 16);
308 MemSetBlock<T>(rows, 16, value, dst, stride);
309 break;
310 }
311 }
312
SetTransformType(const Tile::Block & block,int x4,int y4,int w4,int h4,TransformType tx_type,TransformType transform_types[32][32])313 void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4,
314 TransformType tx_type,
315 TransformType transform_types[32][32]) {
316 const int y_offset = y4 - block.row4x4;
317 const int x_offset = x4 - block.column4x4;
318 TransformType* const dst = &transform_types[y_offset][x_offset];
319 SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32);
320 }
321
StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,const MotionVector & mv_to_store,ptrdiff_t stride,int rows,int columns,ReferenceFrameType * reference_frame_row_start,MotionVector * mv)322 void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,
323 const MotionVector& mv_to_store, ptrdiff_t stride,
324 int rows, int columns,
325 ReferenceFrameType* reference_frame_row_start,
326 MotionVector* mv) {
327 static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), "");
328 do {
329 // Don't switch the following two memory setting functions.
330 // Some ARM CPUs are quite sensitive to the order.
331 memset(reference_frame_row_start, reference_frame_to_store, columns);
332 std::fill(mv, mv + columns, mv_to_store);
333 reference_frame_row_start += stride;
334 mv += stride;
335 } while (--rows != 0);
336 }
337
338 // Inverse transform process assumes that the quantized coefficients are stored
339 // as a virtual 2d array of size |tx_width| x tx_height. If transform width is
340 // 64, then this assumption is broken because the scan order used for populating
341 // the coefficients for such transforms is the same as the one used for
342 // corresponding transform with width 32 (e.g. the scan order used for 64x16 is
343 // the same as the one used for 32x16). So we must restore the coefficients to
344 // their correct positions and clean the positions they occupied.
345 template <typename ResidualType>
MoveCoefficientsForTxWidth64(int clamped_tx_height,int tx_width,ResidualType * residual)346 void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width,
347 ResidualType* residual) {
348 if (tx_width != 64) return;
349 const int rows = clamped_tx_height - 2;
350 auto* src = residual + 32 * rows;
351 residual += 64 * rows;
352 // Process 2 rows in each loop in reverse order to avoid overwrite.
353 int x = rows >> 1;
354 do {
355 // The 2 rows can be processed in order.
356 memcpy(residual, src, 32 * sizeof(src[0]));
357 memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
358 memset(src + 32, 0, 32 * sizeof(src[0]));
359 src -= 64;
360 residual -= 128;
361 } while (--x);
362 // Process the second row. The first row is already correct.
363 memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
364 memset(src + 32, 0, 32 * sizeof(src[0]));
365 }
366
GetClampParameters(const Tile::Block & block,int min[2],int max[2])367 void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) {
368 // 7.10.2.14 (part 1). (also contains implementations of 5.11.53
369 // and 5.11.54).
370 constexpr int kMvBorder4x4 = 4;
371 const int row_border = kMvBorder4x4 + block.height4x4;
372 const int column_border = kMvBorder4x4 + block.width4x4;
373 const int macroblocks_to_top_edge = -block.row4x4;
374 const int macroblocks_to_bottom_edge =
375 block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4;
376 const int macroblocks_to_left_edge = -block.column4x4;
377 const int macroblocks_to_right_edge =
378 block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4;
379 min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border);
380 min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border);
381 max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border);
382 max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border);
383 }
384
385 // Section 8.3.2 in the spec, under coeff_base_eob.
GetCoeffBaseContextEob(TransformSize tx_size,int index)386 int GetCoeffBaseContextEob(TransformSize tx_size, int index) {
387 if (index == 0) return 0;
388 const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
389 const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
390 const int tx_height = kTransformHeight[adjusted_tx_size];
391 if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
392 if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
393 return 3;
394 }
395
396 // Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based
397 // on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in
398 // the end of block case.
GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2,int pos,TransformClass tx_class)399 int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
400 TransformClass tx_class) {
401 if (pos == 0) return 0;
402 const int tx_width = 1 << adjusted_tx_width_log2;
403 const int row = pos >> adjusted_tx_width_log2;
404 const int column = pos & (tx_width - 1);
405 // This return statement is equivalent to:
406 // return ((tx_class == kTransformClass2D && (row | column) < 2) ||
407 // (tx_class == kTransformClassHorizontal && column == 0) ||
408 // (tx_class == kTransformClassVertical && row == 0))
409 // ? 7
410 // : 14;
411 return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) &
412 static_cast<int>((row | column) < 2)) |
413 (tx_class & static_cast<int>(column == 0)) |
414 ((tx_class >> 1) & static_cast<int>(row == 0)));
415 }
416
417 } // namespace
418
Tile(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)419 Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
420 const ObuSequenceHeader& sequence_header,
421 const ObuFrameHeader& frame_header,
422 RefCountedBuffer* const current_frame, const DecoderState& state,
423 FrameScratchBuffer* const frame_scratch_buffer,
424 const WedgeMaskArray& wedge_masks,
425 SymbolDecoderContext* const saved_symbol_decoder_context,
426 const SegmentationMap* prev_segment_ids,
427 PostFilter* const post_filter, const dsp::Dsp* const dsp,
428 ThreadPool* const thread_pool,
429 BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
430 bool use_intra_prediction_buffer)
431 : number_(tile_number),
432 row_(number_ / frame_header.tile_info.tile_columns),
433 column_(number_ % frame_header.tile_info.tile_columns),
434 data_(data),
435 size_(size),
436 read_deltas_(false),
437 subsampling_x_{0, sequence_header.color_config.subsampling_x,
438 sequence_header.color_config.subsampling_x},
439 subsampling_y_{0, sequence_header.color_config.subsampling_y,
440 sequence_header.color_config.subsampling_y},
441 current_quantizer_index_(frame_header.quantizer.base_index),
442 sequence_header_(sequence_header),
443 frame_header_(frame_header),
444 reference_frame_sign_bias_(state.reference_frame_sign_bias),
445 reference_frames_(state.reference_frame),
446 motion_field_(frame_scratch_buffer->motion_field),
447 reference_order_hint_(state.reference_order_hint),
448 wedge_masks_(wedge_masks),
449 reader_(data_, size_, frame_header_.enable_cdf_update),
450 symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context),
451 saved_symbol_decoder_context_(saved_symbol_decoder_context),
452 prev_segment_ids_(prev_segment_ids),
453 dsp_(*dsp),
454 post_filter_(*post_filter),
455 block_parameters_holder_(frame_scratch_buffer->block_parameters_holder),
456 quantizer_(sequence_header_.color_config.bitdepth,
457 &frame_header_.quantizer),
458 residual_size_((sequence_header_.color_config.bitdepth == 8)
459 ? sizeof(int16_t)
460 : sizeof(int32_t)),
461 intra_block_copy_lag_(
462 frame_header_.allow_intrabc
463 ? (sequence_header_.use_128x128_superblock ? 3 : 5)
464 : 1),
465 current_frame_(*current_frame),
466 cdef_index_(frame_scratch_buffer->cdef_index),
467 inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
468 thread_pool_(thread_pool),
469 residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
470 tile_scratch_buffer_pool_(
471 &frame_scratch_buffer->tile_scratch_buffer_pool),
472 pending_tiles_(pending_tiles),
473 frame_parallel_(frame_parallel),
474 use_intra_prediction_buffer_(use_intra_prediction_buffer),
475 intra_prediction_buffer_(
476 use_intra_prediction_buffer_
477 ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_]
478 : nullptr) {
479 row4x4_start_ = frame_header.tile_info.tile_row_start[row_];
480 row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1];
481 column4x4_start_ = frame_header.tile_info.tile_column_start[column_];
482 column4x4_end_ = frame_header.tile_info.tile_column_start[column_ + 1];
483 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
484 const int block_width4x4_log2 = k4x4HeightLog2[SuperBlockSize()];
485 superblock_rows_ =
486 (row4x4_end_ - row4x4_start_ + block_width4x4 - 1) >> block_width4x4_log2;
487 superblock_columns_ =
488 (column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >>
489 block_width4x4_log2;
490 // If |split_parse_and_decode_| is true, we do the necessary setup for
491 // splitting the parsing and the decoding steps. This is done in the following
492 // two cases:
493 // 1) If there is multi-threading within a tile (this is done if
494 // |thread_pool_| is not nullptr and if there are at least as many
495 // superblock columns as |intra_block_copy_lag_|).
496 // 2) If |frame_parallel| is true.
497 split_parse_and_decode_ = (thread_pool_ != nullptr &&
498 superblock_columns_ > intra_block_copy_lag_) ||
499 frame_parallel;
500 if (frame_parallel_) {
501 reference_frame_progress_cache_.fill(INT_MIN);
502 }
503 memset(delta_lf_, 0, sizeof(delta_lf_));
504 delta_lf_all_zero_ = true;
505 const YuvBuffer& buffer = post_filter_.frame_buffer();
506 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
507 // Verify that the borders are big enough for Reconstruct(). max_tx_length
508 // is the maximum value of tx_width and tx_height for the plane.
509 const int max_tx_length = (plane == kPlaneY) ? 64 : 32;
510 // Reconstruct() may overwrite on the right. Since the right border of a
511 // row is followed in memory by the left border of the next row, the
512 // number of extra pixels to the right of a row is at least the sum of the
513 // left and right borders.
514 //
515 // Note: This assertion actually checks the sum of the left and right
516 // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally
517 // and vertically shifted version of |buffer|. Since the sum of the left and
518 // right borders is not changed by the shift, we can just check the sum of
519 // the left and right borders of |buffer|.
520 assert(buffer.left_border(plane) + buffer.right_border(plane) >=
521 max_tx_length - 1);
522 // Reconstruct() may overwrite on the bottom. We need an extra border row
523 // on the bottom because we need the left border of that row.
524 //
525 // Note: This assertion checks the bottom border of
526 // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical
527 // shift that the PostFilter constructor applied to |buffer| and reduce the
528 // bottom border by that amount.
529 #ifndef NDEBUG
530 const int vertical_shift = static_cast<int>(
531 (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) /
532 buffer.stride(plane));
533 const int bottom_border = buffer.bottom_border(plane) - vertical_shift;
534 assert(bottom_border >= max_tx_length);
535 #endif
536 // In AV1, a transform block of height H starts at a y coordinate that is
537 // a multiple of H. If a transform block at the bottom of the frame has
538 // height H, then Reconstruct() will write up to the row with index
539 // Align(buffer.height(plane), H) - 1. Therefore the maximum number of
540 // rows Reconstruct() may write to is
541 // Align(buffer.height(plane), max_tx_length).
542 buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
543 buffer.stride(plane),
544 post_filter_.GetUnfilteredBuffer(plane));
545 const int plane_height =
546 SubsampledValue(frame_header_.height, subsampling_y_[plane]);
547 deblock_row_limit_[plane] =
548 std::min(frame_header_.rows4x4, DivideBy4(plane_height + 3)
549 << subsampling_y_[plane]);
550 const int plane_width =
551 SubsampledValue(frame_header_.width, subsampling_x_[plane]);
552 deblock_column_limit_[plane] =
553 std::min(frame_header_.columns4x4, DivideBy4(plane_width + 3)
554 << subsampling_x_[plane]);
555 }
556 }
557
Init()558 bool Tile::Init() {
559 assert(coefficient_levels_.size() == dc_categories_.size());
560 for (size_t i = 0; i < coefficient_levels_.size(); ++i) {
561 const int contexts_per_plane = (i == kEntropyContextLeft)
562 ? frame_header_.rows4x4
563 : frame_header_.columns4x4;
564 if (!coefficient_levels_[i].Reset(PlaneCount(), contexts_per_plane)) {
565 LIBGAV1_DLOG(ERROR, "coefficient_levels_[%zu].Reset() failed.", i);
566 return false;
567 }
568 if (!dc_categories_[i].Reset(PlaneCount(), contexts_per_plane)) {
569 LIBGAV1_DLOG(ERROR, "dc_categories_[%zu].Reset() failed.", i);
570 return false;
571 }
572 }
573 if (split_parse_and_decode_) {
574 assert(residual_buffer_pool_ != nullptr);
575 if (!residual_buffer_threaded_.Reset(superblock_rows_, superblock_columns_,
576 /*zero_initialize=*/false)) {
577 LIBGAV1_DLOG(ERROR, "residual_buffer_threaded_.Reset() failed.");
578 return false;
579 }
580 } else {
581 // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary
582 // checks when parsing quantized coefficients.
583 residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(
584 32, (4096 + 32 * kResidualPaddingVertical) * residual_size_);
585 if (residual_buffer_ == nullptr) {
586 LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed.");
587 return false;
588 }
589 prediction_parameters_.reset(new (std::nothrow) PredictionParameters());
590 if (prediction_parameters_ == nullptr) {
591 LIBGAV1_DLOG(ERROR, "Allocation of prediction_parameters_ failed.");
592 return false;
593 }
594 }
595 if (frame_header_.use_ref_frame_mvs) {
596 assert(sequence_header_.enable_order_hint);
597 SetupMotionField(frame_header_, current_frame_, reference_frames_,
598 row4x4_start_, row4x4_end_, column4x4_start_,
599 column4x4_end_, &motion_field_);
600 }
601 ResetLoopRestorationParams();
602 return true;
603 }
604
605 template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
ProcessSuperBlockRow(int row4x4,TileScratchBuffer * const scratch_buffer)606 bool Tile::ProcessSuperBlockRow(int row4x4,
607 TileScratchBuffer* const scratch_buffer) {
608 if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true;
609 assert(scratch_buffer != nullptr);
610 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
611 for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
612 column4x4 += block_width4x4) {
613 if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4, scratch_buffer,
614 processing_mode)) {
615 LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
616 row4x4, column4x4);
617 return false;
618 }
619 }
620 if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) {
621 SaveSymbolDecoderContext();
622 }
623 if (processing_mode == kProcessingModeDecodeOnly ||
624 processing_mode == kProcessingModeParseAndDecode) {
625 PopulateIntraPredictionBuffer(row4x4);
626 }
627 return true;
628 }
629
630 // Used in frame parallel mode. The symbol decoder context need not be saved in
631 // this case since it was done when parsing was complete.
632 template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
633 int row4x4, TileScratchBuffer* scratch_buffer);
634 // Used in non frame parallel mode.
635 template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
636 int row4x4, TileScratchBuffer* scratch_buffer);
637
SaveSymbolDecoderContext()638 void Tile::SaveSymbolDecoderContext() {
639 if (frame_header_.enable_frame_end_update_cdf &&
640 number_ == frame_header_.tile_info.context_update_id) {
641 *saved_symbol_decoder_context_ = symbol_decoder_context_;
642 }
643 }
644
ParseAndDecode()645 bool Tile::ParseAndDecode() {
646 // If this is the main thread, we build the loop filter bit masks when parsing
647 // so that it happens in the current thread. This ensures that the main thread
648 // does as much work as possible.
649 if (split_parse_and_decode_) {
650 if (!ThreadedParseAndDecode()) return false;
651 SaveSymbolDecoderContext();
652 return true;
653 }
654 std::unique_ptr<TileScratchBuffer> scratch_buffer =
655 tile_scratch_buffer_pool_->Get();
656 if (scratch_buffer == nullptr) {
657 pending_tiles_->Decrement(false);
658 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
659 return false;
660 }
661 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
662 for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
663 row4x4 += block_width4x4) {
664 if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
665 row4x4, scratch_buffer.get())) {
666 pending_tiles_->Decrement(false);
667 return false;
668 }
669 }
670 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
671 pending_tiles_->Decrement(true);
672 return true;
673 }
674
Parse()675 bool Tile::Parse() {
676 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
677 std::unique_ptr<TileScratchBuffer> scratch_buffer =
678 tile_scratch_buffer_pool_->Get();
679 if (scratch_buffer == nullptr) {
680 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
681 return false;
682 }
683 for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
684 row4x4 += block_width4x4) {
685 if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>(
686 row4x4, scratch_buffer.get())) {
687 return false;
688 }
689 }
690 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
691 SaveSymbolDecoderContext();
692 return true;
693 }
694
Decode(std::mutex * const mutex,int * const superblock_row_progress,std::condition_variable * const superblock_row_progress_condvar)695 bool Tile::Decode(
696 std::mutex* const mutex, int* const superblock_row_progress,
697 std::condition_variable* const superblock_row_progress_condvar) {
698 const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16;
699 const int block_width4x4_log2 =
700 sequence_header_.use_128x128_superblock ? 5 : 4;
701 std::unique_ptr<TileScratchBuffer> scratch_buffer =
702 tile_scratch_buffer_pool_->Get();
703 if (scratch_buffer == nullptr) {
704 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
705 return false;
706 }
707 for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2;
708 row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) {
709 if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
710 row4x4, scratch_buffer.get())) {
711 return false;
712 }
713 if (post_filter_.DoDeblock()) {
714 // Apply vertical deblock filtering for all the columns in this tile
715 // except for the first 64 columns.
716 post_filter_.ApplyDeblockFilter(
717 kLoopFilterTypeVertical, row4x4,
718 column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_,
719 block_width4x4);
720 // If this is the first superblock row of the tile, then we cannot apply
721 // horizontal deblocking here since we don't know if the top row is
722 // available. So it will be done by the calling thread in that case.
723 if (row4x4 != row4x4_start_) {
724 // Apply horizontal deblock filtering for all the columns in this tile
725 // except for the first and the last 64 columns.
726 // Note about the last tile of each row: For the last tile,
727 // column4x4_end may not be a multiple of 16. In that case it is still
728 // okay to simply subtract 16 since ApplyDeblockFilter() will only do
729 // the filters in increments of 64 columns (or 32 columns for chroma
730 // with subsampling).
731 post_filter_.ApplyDeblockFilter(
732 kLoopFilterTypeHorizontal, row4x4,
733 column4x4_start_ + kNum4x4InLoopFilterUnit,
734 column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4);
735 }
736 }
737 bool notify;
738 {
739 std::unique_lock<std::mutex> lock(*mutex);
740 notify = ++superblock_row_progress[index] ==
741 frame_header_.tile_info.tile_columns;
742 }
743 if (notify) {
744 // We are done decoding this superblock row. Notify the post filtering
745 // thread.
746 superblock_row_progress_condvar[index].notify_one();
747 }
748 }
749 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
750 return true;
751 }
752
ThreadedParseAndDecode()753 bool Tile::ThreadedParseAndDecode() {
754 {
755 std::lock_guard<std::mutex> lock(threading_.mutex);
756 if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) {
757 pending_tiles_->Decrement(false);
758 LIBGAV1_DLOG(ERROR, "threading.sb_state.Reset() failed.");
759 return false;
760 }
761 // Account for the parsing job.
762 ++threading_.pending_jobs;
763 }
764
765 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
766
767 // Begin parsing.
768 std::unique_ptr<TileScratchBuffer> scratch_buffer =
769 tile_scratch_buffer_pool_->Get();
770 if (scratch_buffer == nullptr) {
771 pending_tiles_->Decrement(false);
772 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
773 return false;
774 }
775 for (int row4x4 = row4x4_start_, row_index = 0; row4x4 < row4x4_end_;
776 row4x4 += block_width4x4, ++row_index) {
777 for (int column4x4 = column4x4_start_, column_index = 0;
778 column4x4 < column4x4_end_;
779 column4x4 += block_width4x4, ++column_index) {
780 if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4,
781 scratch_buffer.get(), kProcessingModeParseOnly)) {
782 std::lock_guard<std::mutex> lock(threading_.mutex);
783 threading_.abort = true;
784 break;
785 }
786 std::unique_lock<std::mutex> lock(threading_.mutex);
787 if (threading_.abort) break;
788 threading_.sb_state[row_index][column_index] = kSuperBlockStateParsed;
789 // Schedule the decoding of this superblock if it is allowed.
790 if (CanDecode(row_index, column_index)) {
791 ++threading_.pending_jobs;
792 threading_.sb_state[row_index][column_index] =
793 kSuperBlockStateScheduled;
794 lock.unlock();
795 thread_pool_->Schedule(
796 [this, row_index, column_index, block_width4x4]() {
797 DecodeSuperBlock(row_index, column_index, block_width4x4);
798 });
799 }
800 }
801 std::lock_guard<std::mutex> lock(threading_.mutex);
802 if (threading_.abort) break;
803 }
804 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
805
806 // We are done parsing. We can return here since the calling thread will make
807 // sure that it waits for all the superblocks to be decoded.
808 //
809 // Finish using |threading_| before |pending_tiles_->Decrement()| because the
810 // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
811 // is called.
812 threading_.mutex.lock();
813 const bool no_pending_jobs = (--threading_.pending_jobs == 0);
814 const bool job_succeeded = !threading_.abort;
815 threading_.mutex.unlock();
816 if (no_pending_jobs) {
817 // We are done parsing and decoding this tile.
818 pending_tiles_->Decrement(job_succeeded);
819 }
820 return job_succeeded;
821 }
822
CanDecode(int row_index,int column_index) const823 bool Tile::CanDecode(int row_index, int column_index) const {
824 assert(row_index >= 0);
825 assert(column_index >= 0);
826 // If |threading_.sb_state[row_index][column_index]| is not equal to
827 // kSuperBlockStateParsed, then return false. This is ok because if
828 // |threading_.sb_state[row_index][column_index]| is equal to:
829 // kSuperBlockStateNone - then the superblock is not yet parsed.
830 // kSuperBlockStateScheduled - then the superblock is already scheduled for
831 // decode.
832 // kSuperBlockStateDecoded - then the superblock has already been decoded.
833 if (row_index >= superblock_rows_ || column_index >= superblock_columns_ ||
834 threading_.sb_state[row_index][column_index] != kSuperBlockStateParsed) {
835 return false;
836 }
837 // First superblock has no dependencies.
838 if (row_index == 0 && column_index == 0) {
839 return true;
840 }
841 // Superblocks in the first row only depend on the superblock to the left of
842 // it.
843 if (row_index == 0) {
844 return threading_.sb_state[0][column_index - 1] == kSuperBlockStateDecoded;
845 }
846 // All other superblocks depend on superblock to the left of it (if one
847 // exists) and superblock to the top right with a lag of
848 // |intra_block_copy_lag_| (if one exists).
849 const int top_right_column_index =
850 std::min(column_index + intra_block_copy_lag_, superblock_columns_ - 1);
851 return threading_.sb_state[row_index - 1][top_right_column_index] ==
852 kSuperBlockStateDecoded &&
853 (column_index == 0 ||
854 threading_.sb_state[row_index][column_index - 1] ==
855 kSuperBlockStateDecoded);
856 }
857
DecodeSuperBlock(int row_index,int column_index,int block_width4x4)858 void Tile::DecodeSuperBlock(int row_index, int column_index,
859 int block_width4x4) {
860 const int row4x4 = row4x4_start_ + (row_index * block_width4x4);
861 const int column4x4 = column4x4_start_ + (column_index * block_width4x4);
862 std::unique_ptr<TileScratchBuffer> scratch_buffer =
863 tile_scratch_buffer_pool_->Get();
864 bool ok = scratch_buffer != nullptr;
865 if (ok) {
866 ok = ProcessSuperBlock(row4x4, column4x4, block_width4x4,
867 scratch_buffer.get(), kProcessingModeDecodeOnly);
868 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
869 }
870 std::unique_lock<std::mutex> lock(threading_.mutex);
871 if (ok) {
872 threading_.sb_state[row_index][column_index] = kSuperBlockStateDecoded;
873 // Candidate rows and columns that we could potentially begin the decoding
874 // (if it is allowed to do so). The candidates are:
875 // 1) The superblock to the bottom-left of the current superblock with a
876 // lag of |intra_block_copy_lag_| (or the beginning of the next superblock
877 // row in case there are less than |intra_block_copy_lag_| superblock
878 // columns in the Tile).
879 // 2) The superblock to the right of the current superblock.
880 const int candidate_row_indices[] = {row_index + 1, row_index};
881 const int candidate_column_indices[] = {
882 std::max(0, column_index - intra_block_copy_lag_), column_index + 1};
883 for (size_t i = 0; i < std::extent<decltype(candidate_row_indices)>::value;
884 ++i) {
885 const int candidate_row_index = candidate_row_indices[i];
886 const int candidate_column_index = candidate_column_indices[i];
887 if (!CanDecode(candidate_row_index, candidate_column_index)) {
888 continue;
889 }
890 ++threading_.pending_jobs;
891 threading_.sb_state[candidate_row_index][candidate_column_index] =
892 kSuperBlockStateScheduled;
893 lock.unlock();
894 thread_pool_->Schedule([this, candidate_row_index, candidate_column_index,
895 block_width4x4]() {
896 DecodeSuperBlock(candidate_row_index, candidate_column_index,
897 block_width4x4);
898 });
899 lock.lock();
900 }
901 } else {
902 threading_.abort = true;
903 }
904 // Finish using |threading_| before |pending_tiles_->Decrement()| because the
905 // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
906 // is called.
907 const bool no_pending_jobs = (--threading_.pending_jobs == 0);
908 const bool job_succeeded = !threading_.abort;
909 lock.unlock();
910 if (no_pending_jobs) {
911 // We are done parsing and decoding this tile.
912 pending_tiles_->Decrement(job_succeeded);
913 }
914 }
915
PopulateIntraPredictionBuffer(int row4x4)916 void Tile::PopulateIntraPredictionBuffer(int row4x4) {
917 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
918 if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) {
919 return;
920 }
921 const size_t pixel_size =
922 (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t)
923 : sizeof(uint16_t));
924 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
925 const int row_to_copy =
926 (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1;
927 const size_t pixels_to_copy =
928 (MultiplyBy4(column4x4_end_ - column4x4_start_) >>
929 subsampling_x_[plane]) *
930 pixel_size;
931 const size_t column_start =
932 MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
933 void* start;
934 #if LIBGAV1_MAX_BITDEPTH >= 10
935 if (sequence_header_.color_config.bitdepth > 8) {
936 Array2DView<uint16_t> buffer(
937 buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
938 reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
939 start = &buffer[row_to_copy][column_start];
940 } else // NOLINT
941 #endif
942 {
943 start = &buffer_[plane][row_to_copy][column_start];
944 }
945 memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size,
946 start, pixels_to_copy);
947 }
948 }
949
GetTransformAllZeroContext(const Block & block,Plane plane,TransformSize tx_size,int x4,int y4,int w4,int h4)950 int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
951 TransformSize tx_size, int x4, int y4,
952 int w4, int h4) {
953 const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
954 const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
955
956 const int tx_width = kTransformWidth[tx_size];
957 const int tx_height = kTransformHeight[tx_size];
958 const BlockSize plane_size = block.residual_size[plane];
959 const int block_width = kBlockWidthPixels[plane_size];
960 const int block_height = kBlockHeightPixels[plane_size];
961
962 int top = 0;
963 int left = 0;
964 const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
965 const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
966 if (plane == kPlaneY) {
967 if (block_width == tx_width && block_height == tx_height) return 0;
968 const uint8_t* coefficient_levels =
969 &coefficient_levels_[kEntropyContextTop][plane][x4];
970 for (int i = 0; i < num_top_elements; ++i) {
971 top = std::max(top, static_cast<int>(coefficient_levels[i]));
972 }
973 coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
974 for (int i = 0; i < num_left_elements; ++i) {
975 left = std::max(left, static_cast<int>(coefficient_levels[i]));
976 }
977 assert(top <= 4);
978 assert(left <= 4);
979 // kAllZeroContextsByTopLeft is pre-computed based on the logic in the spec
980 // for top and left.
981 return kAllZeroContextsByTopLeft[top][left];
982 }
983 const uint8_t* coefficient_levels =
984 &coefficient_levels_[kEntropyContextTop][plane][x4];
985 const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
986 for (int i = 0; i < num_top_elements; ++i) {
987 top |= coefficient_levels[i];
988 top |= dc_categories[i];
989 }
990 coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
991 dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
992 for (int i = 0; i < num_left_elements; ++i) {
993 left |= coefficient_levels[i];
994 left |= dc_categories[i];
995 }
996 return static_cast<int>(top != 0) + static_cast<int>(left != 0) + 7 +
997 3 * static_cast<int>(block_width * block_height >
998 tx_width * tx_height);
999 }
1000
GetTransformSet(TransformSize tx_size,bool is_inter) const1001 TransformSet Tile::GetTransformSet(TransformSize tx_size, bool is_inter) const {
1002 const TransformSize tx_size_square_min = kTransformSizeSquareMin[tx_size];
1003 const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
1004 if (tx_size_square_max == kTransformSize64x64) return kTransformSetDctOnly;
1005 if (is_inter) {
1006 if (frame_header_.reduced_tx_set ||
1007 tx_size_square_max == kTransformSize32x32) {
1008 return kTransformSetInter3;
1009 }
1010 if (tx_size_square_min == kTransformSize16x16) return kTransformSetInter2;
1011 return kTransformSetInter1;
1012 }
1013 if (tx_size_square_max == kTransformSize32x32) return kTransformSetDctOnly;
1014 if (frame_header_.reduced_tx_set ||
1015 tx_size_square_min == kTransformSize16x16) {
1016 return kTransformSetIntra2;
1017 }
1018 return kTransformSetIntra1;
1019 }
1020
ComputeTransformType(const Block & block,Plane plane,TransformSize tx_size,int block_x,int block_y)1021 TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
1022 TransformSize tx_size, int block_x,
1023 int block_y) {
1024 const BlockParameters& bp = *block.bp;
1025 const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
1026 if (frame_header_.segmentation.lossless[bp.segment_id] ||
1027 tx_size_square_max == kTransformSize64x64) {
1028 return kTransformTypeDctDct;
1029 }
1030 if (plane == kPlaneY) {
1031 return transform_types_[block_y - block.row4x4][block_x - block.column4x4];
1032 }
1033 const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1034 TransformType tx_type;
1035 if (bp.is_inter) {
1036 const int x4 =
1037 std::max(block.column4x4, block_x << subsampling_x_[kPlaneU]);
1038 const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]);
1039 tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4];
1040 } else {
1041 tx_type = kModeToTransformType[bp.uv_mode];
1042 }
1043 return kTransformTypeInSetMask[tx_set].Contains(tx_type)
1044 ? tx_type
1045 : kTransformTypeDctDct;
1046 }
1047
ReadTransformType(const Block & block,int x4,int y4,TransformSize tx_size)1048 void Tile::ReadTransformType(const Block& block, int x4, int y4,
1049 TransformSize tx_size) {
1050 BlockParameters& bp = *block.bp;
1051 const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1052
1053 TransformType tx_type = kTransformTypeDctDct;
1054 if (tx_set != kTransformSetDctOnly &&
1055 frame_header_.segmentation.qindex[bp.segment_id] > 0) {
1056 const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set);
1057 const int cdf_tx_size_index =
1058 TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]);
1059 uint16_t* cdf;
1060 if (bp.is_inter) {
1061 cdf = symbol_decoder_context_
1062 .inter_tx_type_cdf[cdf_index][cdf_tx_size_index];
1063 } else {
1064 const PredictionMode intra_direction =
1065 block.bp->prediction_parameters->use_filter_intra
1066 ? kFilterIntraModeToIntraPredictor[block.bp->prediction_parameters
1067 ->filter_intra_mode]
1068 : bp.y_mode;
1069 cdf =
1070 symbol_decoder_context_
1071 .intra_tx_type_cdf[cdf_index][cdf_tx_size_index][intra_direction];
1072 }
1073 tx_type = static_cast<TransformType>(
1074 reader_.ReadSymbol(cdf, kNumTransformTypesInSet[tx_set]));
1075 // This array does not contain an entry for kTransformSetDctOnly, so the
1076 // first dimension needs to be offset by 1.
1077 tx_type = kInverseTransformTypeBySet[tx_set - 1][tx_type];
1078 }
1079 SetTransformType(block, x4, y4, kTransformWidth4x4[tx_size],
1080 kTransformHeight4x4[tx_size], tx_type, transform_types_);
1081 }
1082
1083 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1084 // Bottom boundary checks are avoided by the padded rows.
1085 // For a coefficient near the right boundary, the two right neighbors and the
1086 // one bottom-right neighbor may be out of boundary. We don't check the right
1087 // boundary for them, because the out of boundary neighbors project to positions
1088 // above the diagonal line which goes through the current coefficient and these
1089 // positions are still all 0s according to the diagonal scan order.
1090 template <typename ResidualType>
ReadCoeffBase2D(const uint16_t * scan,PlaneType plane_type,TransformSize tx_size,int clamped_tx_size_context,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],ResidualType * const quantized_buffer)1091 void Tile::ReadCoeffBase2D(
1092 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
1093 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1094 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1095 ResidualType* const quantized_buffer) {
1096 const int tx_width = 1 << adjusted_tx_width_log2;
1097 int i = eob - 2;
1098 do {
1099 constexpr auto threshold = static_cast<ResidualType>(3);
1100 const uint16_t pos = scan[i];
1101 const int row = pos >> adjusted_tx_width_log2;
1102 const int column = pos & (tx_width - 1);
1103 auto* const quantized = &quantized_buffer[pos];
1104 int context;
1105 if (pos == 0) {
1106 context = 0;
1107 } else {
1108 context = std::min(
1109 4, DivideBy2(
1110 1 + (std::min(quantized[1], threshold) + // {0, 1}
1111 std::min(quantized[tx_width], threshold) + // {1, 0}
1112 std::min(quantized[tx_width + 1], threshold) + // {1, 1}
1113 std::min(quantized[2], threshold) + // {0, 2}
1114 std::min(quantized[MultiplyBy2(tx_width)],
1115 threshold)))); // {2, 0}
1116 context += kCoeffBaseContextOffset[tx_size][std::min(row, 4)]
1117 [std::min(column, 4)];
1118 }
1119 int level =
1120 reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1121 if (level > kNumQuantizerBaseLevels) {
1122 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1123 // + 1, because we clip the overall output to 6 and the unclipped
1124 // quantized values will always result in an output of greater than 6.
1125 context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
1126 quantized[tx_width] + // {1, 0}
1127 quantized[tx_width + 1])); // {1, 1}
1128 if (pos != 0) {
1129 context += 14 >> static_cast<int>((row | column) < 2);
1130 }
1131 level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
1132 }
1133 quantized[0] = level;
1134 } while (--i >= 0);
1135 }
1136
1137 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1138 // Bottom boundary checks are avoided by the padded rows.
1139 // For a coefficient near the right boundary, the four right neighbors may be
1140 // out of boundary. We don't do the boundary check for the first three right
1141 // neighbors, because even for the transform blocks with smallest width 4, the
1142 // first three out of boundary neighbors project to positions left of the
1143 // current coefficient and these positions are still all 0s according to the
1144 // column scan order. However, when transform block width is 4 and the current
1145 // coefficient is on the right boundary, its fourth right neighbor projects to
1146 // the under position on the same column, which could be nonzero. Therefore, we
1147 // must skip the fourth right neighbor. To make it simple, for any coefficient,
1148 // we always do the boundary check for its fourth right neighbor.
1149 template <typename ResidualType>
ReadCoeffBaseHorizontal(const uint16_t * scan,PlaneType plane_type,TransformSize,int clamped_tx_size_context,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],ResidualType * const quantized_buffer)1150 void Tile::ReadCoeffBaseHorizontal(
1151 const uint16_t* scan, PlaneType plane_type, TransformSize /*tx_size*/,
1152 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1153 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1154 ResidualType* const quantized_buffer) {
1155 const int tx_width = 1 << adjusted_tx_width_log2;
1156 int i = eob - 2;
1157 do {
1158 constexpr auto threshold = static_cast<ResidualType>(3);
1159 const uint16_t pos = scan[i];
1160 const int column = pos & (tx_width - 1);
1161 auto* const quantized = &quantized_buffer[pos];
1162 int context = std::min(
1163 4,
1164 DivideBy2(1 +
1165 (std::min(quantized[1], threshold) + // {0, 1}
1166 std::min(quantized[tx_width], threshold) + // {1, 0}
1167 std::min(quantized[2], threshold) + // {0, 2}
1168 std::min(quantized[3], threshold) + // {0, 3}
1169 std::min(quantized[4],
1170 static_cast<ResidualType>(
1171 (column + 4 < tx_width) ? 3 : 0))))); // {0, 4}
1172 context += kCoeffBasePositionContextOffset[column];
1173 int level =
1174 reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1175 if (level > kNumQuantizerBaseLevels) {
1176 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1177 // + 1, because we clip the overall output to 6 and the unclipped
1178 // quantized values will always result in an output of greater than 6.
1179 context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
1180 quantized[tx_width] + // {1, 0}
1181 quantized[2])); // {0, 2}
1182 if (pos != 0) {
1183 context += 14 >> static_cast<int>(column == 0);
1184 }
1185 level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
1186 }
1187 quantized[0] = level;
1188 } while (--i >= 0);
1189 }
1190
1191 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1192 // Bottom boundary checks are avoided by the padded rows.
1193 // Right boundary check is performed explicitly.
1194 template <typename ResidualType>
ReadCoeffBaseVertical(const uint16_t * scan,PlaneType plane_type,TransformSize,int clamped_tx_size_context,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],ResidualType * const quantized_buffer)1195 void Tile::ReadCoeffBaseVertical(
1196 const uint16_t* scan, PlaneType plane_type, TransformSize /*tx_size*/,
1197 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1198 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1199 ResidualType* const quantized_buffer) {
1200 const int tx_width = 1 << adjusted_tx_width_log2;
1201 int i = eob - 2;
1202 do {
1203 constexpr auto threshold = static_cast<ResidualType>(3);
1204 const uint16_t pos = scan[i];
1205 const int row = pos >> adjusted_tx_width_log2;
1206 const int column = pos & (tx_width - 1);
1207 auto* const quantized = &quantized_buffer[pos];
1208 const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0;
1209 int context =
1210 std::min(4, DivideBy2(1 + (std::min(quantized_column1, 3) + // {0, 1}
1211 std::min(quantized[tx_width],
1212 threshold) + // {1, 0}
1213 std::min(quantized[MultiplyBy2(tx_width)],
1214 threshold) + // {2, 0}
1215 std::min(quantized[tx_width * 3],
1216 threshold) + // {3, 0}
1217 std::min(quantized[MultiplyBy4(tx_width)],
1218 threshold)))); // {4, 0}
1219 context += kCoeffBasePositionContextOffset[row];
1220 int level =
1221 reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1222 if (level > kNumQuantizerBaseLevels) {
1223 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1224 // + 1, because we clip the overall output to 6 and the unclipped
1225 // quantized values will always result in an output of greater than 6.
1226 int context =
1227 std::min(6, DivideBy2(1 + quantized_column1 + // {0, 1}
1228 quantized[tx_width] + // {1, 0}
1229 quantized[MultiplyBy2(tx_width)])); // {2, 0}
1230 if (pos != 0) {
1231 context += 14 >> static_cast<int>(row == 0);
1232 }
1233 level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
1234 }
1235 quantized[0] = level;
1236 } while (--i >= 0);
1237 }
1238
GetDcSignContext(int x4,int y4,int w4,int h4,Plane plane)1239 int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
1240 const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1241 const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
1242 // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension.
1243 int8_t dc_sign = std::accumulate(
1244 dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0);
1245 const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1246 dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
1247 dc_sign = std::accumulate(
1248 dc_categories, dc_categories + GetNumElements(h4, y4, max_y4x4), dc_sign);
1249 // This return statement is equivalent to:
1250 // if (dc_sign < 0) return 1;
1251 // if (dc_sign > 0) return 2;
1252 // return 0;
1253 // And it is better than:
1254 // return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0);
1255 return static_cast<int>(dc_sign < 0) +
1256 MultiplyBy2(static_cast<int>(dc_sign > 0));
1257 }
1258
SetEntropyContexts(int x4,int y4,int w4,int h4,Plane plane,uint8_t coefficient_level,int8_t dc_category)1259 void Tile::SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane,
1260 uint8_t coefficient_level, int8_t dc_category) {
1261 const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1262 const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
1263 memset(&coefficient_levels_[kEntropyContextTop][plane][x4], coefficient_level,
1264 num_top_elements);
1265 memset(&dc_categories_[kEntropyContextTop][plane][x4], dc_category,
1266 num_top_elements);
1267 const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1268 const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
1269 memset(&coefficient_levels_[kEntropyContextLeft][plane][y4],
1270 coefficient_level, num_left_elements);
1271 memset(&dc_categories_[kEntropyContextLeft][plane][y4], dc_category,
1272 num_left_elements);
1273 }
1274
ScaleMotionVector(const MotionVector & mv,const Plane plane,const int reference_frame_index,const int x,const int y,int * const start_x,int * const start_y,int * const step_x,int * const step_y)1275 void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
1276 const int reference_frame_index, const int x,
1277 const int y, int* const start_x,
1278 int* const start_y, int* const step_x,
1279 int* const step_y) {
1280 const int reference_upscaled_width =
1281 (reference_frame_index == -1)
1282 ? frame_header_.upscaled_width
1283 : reference_frames_[reference_frame_index]->upscaled_width();
1284 const int reference_height =
1285 (reference_frame_index == -1)
1286 ? frame_header_.height
1287 : reference_frames_[reference_frame_index]->frame_height();
1288 assert(2 * frame_header_.width >= reference_upscaled_width &&
1289 2 * frame_header_.height >= reference_height &&
1290 frame_header_.width <= 16 * reference_upscaled_width &&
1291 frame_header_.height <= 16 * reference_height);
1292 const bool is_scaled_x = reference_upscaled_width != frame_header_.width;
1293 const bool is_scaled_y = reference_height != frame_header_.height;
1294 const int half_sample = 1 << (kSubPixelBits - 1);
1295 int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]);
1296 int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]);
1297 const int rounding_offset =
1298 DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits));
1299 if (is_scaled_x) {
1300 const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) +
1301 DivideBy2(frame_header_.width)) /
1302 frame_header_.width;
1303 *step_x = RightShiftWithRoundingSigned(
1304 scale_x, kReferenceScaleShift - kScaleSubPixelBits);
1305 orig_x += half_sample;
1306 // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can
1307 // be up to 15 bits. So we use int64_t to hold base_x.
1308 const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x -
1309 (half_sample << kReferenceScaleShift);
1310 *start_x =
1311 RightShiftWithRoundingSigned(
1312 base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
1313 rounding_offset;
1314 } else {
1315 *step_x = 1 << kScaleSubPixelBits;
1316 *start_x = LeftShift(orig_x, 6) + rounding_offset;
1317 }
1318 if (is_scaled_y) {
1319 const int scale_y = ((reference_height << kReferenceScaleShift) +
1320 DivideBy2(frame_header_.height)) /
1321 frame_header_.height;
1322 *step_y = RightShiftWithRoundingSigned(
1323 scale_y, kReferenceScaleShift - kScaleSubPixelBits);
1324 orig_y += half_sample;
1325 const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y -
1326 (half_sample << kReferenceScaleShift);
1327 *start_y =
1328 RightShiftWithRoundingSigned(
1329 base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
1330 rounding_offset;
1331 } else {
1332 *step_y = 1 << kScaleSubPixelBits;
1333 *start_y = LeftShift(orig_y, 6) + rounding_offset;
1334 }
1335 }
1336
1337 template <typename ResidualType, bool is_dc_coefficient>
ReadSignAndApplyDequantization(const uint16_t * const scan,int i,int q_value,const uint8_t * const quantizer_matrix,int shift,int max_value,uint16_t * const dc_sign_cdf,int8_t * const dc_category,int * const coefficient_level,ResidualType * residual_buffer)1338 bool Tile::ReadSignAndApplyDequantization(
1339 const uint16_t* const scan, int i, int q_value,
1340 const uint8_t* const quantizer_matrix, int shift, int max_value,
1341 uint16_t* const dc_sign_cdf, int8_t* const dc_category,
1342 int* const coefficient_level, ResidualType* residual_buffer) {
1343 const int pos = is_dc_coefficient ? 0 : scan[i];
1344 // If residual_buffer[pos] is zero, then the rest of the function has no
1345 // effect.
1346 int level = residual_buffer[pos];
1347 if (level == 0) return true;
1348 const int sign = is_dc_coefficient
1349 ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf))
1350 : reader_.ReadBit();
1351 if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
1352 int length = 0;
1353 bool golomb_length_bit = false;
1354 do {
1355 golomb_length_bit = static_cast<bool>(reader_.ReadBit());
1356 ++length;
1357 if (length > 20) {
1358 LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length);
1359 return false;
1360 }
1361 } while (!golomb_length_bit);
1362 int x = 1;
1363 for (int i = length - 2; i >= 0; --i) {
1364 x = (x << 1) | reader_.ReadBit();
1365 }
1366 level += x - 1;
1367 }
1368 if (is_dc_coefficient) {
1369 *dc_category = (sign != 0) ? -1 : 1;
1370 }
1371 level &= 0xfffff;
1372 *coefficient_level += level;
1373 // Apply dequantization. Step 1 of section 7.12.3 in the spec.
1374 int q = q_value;
1375 if (quantizer_matrix != nullptr) {
1376 q = RightShiftWithRounding(q * quantizer_matrix[pos], 5);
1377 }
1378 // The intermediate multiplication can exceed 32 bits, so it has to be
1379 // performed by promoting one of the values to int64_t.
1380 int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff;
1381 dequantized_value >>= shift;
1382 // At this point:
1383 // * |dequantized_value| is always non-negative.
1384 // * |sign| can be either 0 or 1.
1385 // * min_value = -(max_value + 1).
1386 // We need to apply the following:
1387 // dequantized_value = sign ? -dequantized_value : dequantized_value;
1388 // dequantized_value = Clip3(dequantized_value, min_value, max_value);
1389 //
1390 // Note that -x == ~(x - 1).
1391 //
1392 // Now, The above two lines can be done with a std::min and xor as follows:
1393 dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign;
1394 residual_buffer[pos] = dequantized_value;
1395 return true;
1396 }
1397
ReadCoeffBaseRange(int clamped_tx_size_context,int cdf_context,int plane_type)1398 int Tile::ReadCoeffBaseRange(int clamped_tx_size_context, int cdf_context,
1399 int plane_type) {
1400 int level = 0;
1401 for (int j = 0; j < kCoeffBaseRangeMaxIterations; ++j) {
1402 const int coeff_base_range = reader_.ReadSymbol<kCoeffBaseRangeSymbolCount>(
1403 symbol_decoder_context_.coeff_base_range_cdf[clamped_tx_size_context]
1404 [plane_type][cdf_context]);
1405 level += coeff_base_range;
1406 if (coeff_base_range < (kCoeffBaseRangeSymbolCount - 1)) break;
1407 }
1408 return level;
1409 }
1410
1411 template <typename ResidualType>
ReadTransformCoefficients(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType * const tx_type)1412 int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
1413 int start_x, int start_y,
1414 TransformSize tx_size,
1415 TransformType* const tx_type) {
1416 const int x4 = DivideBy4(start_x);
1417 const int y4 = DivideBy4(start_y);
1418 const int w4 = kTransformWidth4x4[tx_size];
1419 const int h4 = kTransformHeight4x4[tx_size];
1420 const int tx_size_context = kTransformSizeContext[tx_size];
1421 int context =
1422 GetTransformAllZeroContext(block, plane, tx_size, x4, y4, w4, h4);
1423 const bool all_zero = reader_.ReadSymbol(
1424 symbol_decoder_context_.all_zero_cdf[tx_size_context][context]);
1425 if (all_zero) {
1426 if (plane == kPlaneY) {
1427 SetTransformType(block, x4, y4, w4, h4, kTransformTypeDctDct,
1428 transform_types_);
1429 }
1430 SetEntropyContexts(x4, y4, w4, h4, plane, 0, 0);
1431 // This is not used in this case, so it can be set to any value.
1432 *tx_type = kNumTransformTypes;
1433 return 0;
1434 }
1435 const int tx_width = kTransformWidth[tx_size];
1436 const int tx_height = kTransformHeight[tx_size];
1437 const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
1438 const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
1439 const int tx_padding =
1440 (1 << adjusted_tx_width_log2) * kResidualPaddingVertical;
1441 auto* residual = reinterpret_cast<ResidualType*>(*block.residual);
1442 // Clear padding to avoid bottom boundary checks when parsing quantized
1443 // coefficients.
1444 memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_);
1445 const int clamped_tx_height = std::min(tx_height, 32);
1446 if (plane == kPlaneY) {
1447 ReadTransformType(block, x4, y4, tx_size);
1448 }
1449 BlockParameters& bp = *block.bp;
1450 *tx_type = ComputeTransformType(block, plane, tx_size, x4, y4);
1451 const int eob_multi_size = kEobMultiSizeLookup[tx_size];
1452 const PlaneType plane_type = GetPlaneType(plane);
1453 const TransformClass tx_class = GetTransformClass(*tx_type);
1454 context = static_cast<int>(tx_class != kTransformClass2D);
1455 int eob_pt = 1;
1456 switch (eob_multi_size) {
1457 case 0:
1458 eob_pt += reader_.ReadSymbol<kEobPt16SymbolCount>(
1459 symbol_decoder_context_.eob_pt_16_cdf[plane_type][context]);
1460 break;
1461 case 1:
1462 eob_pt += reader_.ReadSymbol<kEobPt32SymbolCount>(
1463 symbol_decoder_context_.eob_pt_32_cdf[plane_type][context]);
1464 break;
1465 case 2:
1466 eob_pt += reader_.ReadSymbol<kEobPt64SymbolCount>(
1467 symbol_decoder_context_.eob_pt_64_cdf[plane_type][context]);
1468 break;
1469 case 3:
1470 eob_pt += reader_.ReadSymbol<kEobPt128SymbolCount>(
1471 symbol_decoder_context_.eob_pt_128_cdf[plane_type][context]);
1472 break;
1473 case 4:
1474 eob_pt += reader_.ReadSymbol<kEobPt256SymbolCount>(
1475 symbol_decoder_context_.eob_pt_256_cdf[plane_type][context]);
1476 break;
1477 case 5:
1478 eob_pt += reader_.ReadSymbol<kEobPt512SymbolCount>(
1479 symbol_decoder_context_.eob_pt_512_cdf[plane_type]);
1480 break;
1481 case 6:
1482 default:
1483 eob_pt += reader_.ReadSymbol<kEobPt1024SymbolCount>(
1484 symbol_decoder_context_.eob_pt_1024_cdf[plane_type]);
1485 break;
1486 }
1487 int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
1488 if (eob_pt >= 3) {
1489 context = eob_pt - 3;
1490 const bool eob_extra = reader_.ReadSymbol(
1491 symbol_decoder_context_
1492 .eob_extra_cdf[tx_size_context][plane_type][context]);
1493 if (eob_extra) eob += 1 << (eob_pt - 3);
1494 for (int i = 1; i < eob_pt - 2; ++i) {
1495 assert(eob_pt - i >= 3);
1496 assert(eob_pt <= kEobPt1024SymbolCount);
1497 if (static_cast<bool>(reader_.ReadBit())) {
1498 eob += 1 << (eob_pt - i - 3);
1499 }
1500 }
1501 }
1502 const uint16_t* scan = kScan[tx_class][tx_size];
1503 const int clamped_tx_size_context = std::min(tx_size_context, 3);
1504 // Read the last coefficient.
1505 {
1506 context = GetCoeffBaseContextEob(tx_size, eob - 1);
1507 const uint16_t pos = scan[eob - 1];
1508 int level =
1509 1 + reader_.ReadSymbol<kCoeffBaseEobSymbolCount>(
1510 symbol_decoder_context_
1511 .coeff_base_eob_cdf[tx_size_context][plane_type][context]);
1512 if (level > kNumQuantizerBaseLevels) {
1513 level += ReadCoeffBaseRange(
1514 clamped_tx_size_context,
1515 GetCoeffBaseRangeContextEob(adjusted_tx_width_log2, pos, tx_class),
1516 plane_type);
1517 }
1518 residual[pos] = level;
1519 }
1520 if (eob > 1) {
1521 // Read all the other coefficients.
1522 // Lookup used to call the right variant of ReadCoeffBase*() based on the
1523 // transform class.
1524 static constexpr void (Tile::*kGetCoeffBaseFunc[])(
1525 const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
1526 int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1527 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1528 ResidualType* quantized_buffer) = {
1529 &Tile::ReadCoeffBase2D<ResidualType>,
1530 &Tile::ReadCoeffBaseHorizontal<ResidualType>,
1531 &Tile::ReadCoeffBaseVertical<ResidualType>};
1532 (this->*kGetCoeffBaseFunc[tx_class])(
1533 scan, plane_type, tx_size, clamped_tx_size_context,
1534 adjusted_tx_width_log2, eob,
1535 symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type],
1536 residual);
1537 }
1538 const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
1539 const int current_quantizer_index = GetQIndex(
1540 frame_header_.segmentation, bp.segment_id, current_quantizer_index_);
1541 const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
1542 const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
1543 const int shift = kQuantizationShift[tx_size];
1544 const uint8_t* const quantizer_matrix =
1545 (frame_header_.quantizer.use_matrix &&
1546 *tx_type < kTransformTypeIdentityIdentity &&
1547 !frame_header_.segmentation.lossless[bp.segment_id] &&
1548 frame_header_.quantizer.matrix_level[plane] < 15)
1549 ? &kQuantizerMatrix[frame_header_.quantizer.matrix_level[plane]]
1550 [plane_type][kQuantizerMatrixOffset[tx_size]]
1551 : nullptr;
1552 int coefficient_level = 0;
1553 int8_t dc_category = 0;
1554 uint16_t* const dc_sign_cdf =
1555 (residual[0] != 0)
1556 ? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext(
1557 x4, y4, w4, h4, plane)]
1558 : nullptr;
1559 assert(scan[0] == 0);
1560 if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>(
1561 scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf,
1562 &dc_category, &coefficient_level, residual)) {
1563 return -1;
1564 }
1565 if (eob > 1) {
1566 int i = 1;
1567 do {
1568 if (!ReadSignAndApplyDequantization<ResidualType,
1569 /*is_dc_coefficient=*/false>(
1570 scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr,
1571 nullptr, &coefficient_level, residual)) {
1572 return -1;
1573 }
1574 } while (++i < eob);
1575 MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual);
1576 }
1577 SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level),
1578 dc_category);
1579 if (split_parse_and_decode_) {
1580 *block.residual += tx_width * tx_height * residual_size_;
1581 }
1582 return eob;
1583 }
1584
1585 // CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template
1586 // |function| depending on the value of |sequence_header_.color_config.bitdepth|
1587 // with the variadic arguments.
1588 #if LIBGAV1_MAX_BITDEPTH >= 10
1589 #define CALL_BITDEPTH_FUNCTION(function, ...) \
1590 do { \
1591 if (sequence_header_.color_config.bitdepth > 8) { \
1592 function<uint16_t>(__VA_ARGS__); \
1593 } else { \
1594 function<uint8_t>(__VA_ARGS__); \
1595 } \
1596 } while (false)
1597 #else
1598 #define CALL_BITDEPTH_FUNCTION(function, ...) \
1599 do { \
1600 function<uint8_t>(__VA_ARGS__); \
1601 } while (false)
1602 #endif
1603
TransformBlock(const Block & block,Plane plane,int base_x,int base_y,TransformSize tx_size,int x,int y,ProcessingMode mode)1604 bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
1605 int base_y, TransformSize tx_size, int x, int y,
1606 ProcessingMode mode) {
1607 BlockParameters& bp = *block.bp;
1608 const int subsampling_x = subsampling_x_[plane];
1609 const int subsampling_y = subsampling_y_[plane];
1610 const int start_x = base_x + MultiplyBy4(x);
1611 const int start_y = base_y + MultiplyBy4(y);
1612 const int max_x = MultiplyBy4(frame_header_.columns4x4) >> subsampling_x;
1613 const int max_y = MultiplyBy4(frame_header_.rows4x4) >> subsampling_y;
1614 if (start_x >= max_x || start_y >= max_y) return true;
1615 const int row = DivideBy4(start_y << subsampling_y);
1616 const int column = DivideBy4(start_x << subsampling_x);
1617 const int mask = sequence_header_.use_128x128_superblock ? 31 : 15;
1618 const int sub_block_row4x4 = row & mask;
1619 const int sub_block_column4x4 = column & mask;
1620 const int step_x = kTransformWidth4x4[tx_size];
1621 const int step_y = kTransformHeight4x4[tx_size];
1622 const bool do_decode = mode == kProcessingModeDecodeOnly ||
1623 mode == kProcessingModeParseAndDecode;
1624 if (do_decode && !bp.is_inter) {
1625 if (bp.palette_mode_info.size[GetPlaneType(plane)] > 0) {
1626 CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
1627 x, y, tx_size);
1628 } else {
1629 const PredictionMode mode =
1630 (plane == kPlaneY)
1631 ? bp.y_mode
1632 : (bp.uv_mode == kPredictionModeChromaFromLuma ? kPredictionModeDc
1633 : bp.uv_mode);
1634 const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
1635 const int tr_column4x4 =
1636 (sub_block_column4x4 >> subsampling_x) + step_x + 1;
1637 const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1;
1638 const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x);
1639 const bool has_left = x > 0 || block.left_available[plane];
1640 const bool has_top = y > 0 || block.top_available[plane];
1641
1642 CALL_BITDEPTH_FUNCTION(
1643 IntraPrediction, block, plane, start_x, start_y, has_left, has_top,
1644 block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
1645 block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
1646 mode, tx_size);
1647 if (plane != kPlaneY && bp.uv_mode == kPredictionModeChromaFromLuma) {
1648 CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
1649 start_y, tx_size);
1650 }
1651 }
1652 if (plane == kPlaneY) {
1653 block.bp->prediction_parameters->max_luma_width =
1654 start_x + MultiplyBy4(step_x);
1655 block.bp->prediction_parameters->max_luma_height =
1656 start_y + MultiplyBy4(step_y);
1657 block.scratch_buffer->cfl_luma_buffer_valid = false;
1658 }
1659 }
1660 if (!bp.skip) {
1661 const int sb_row_index = SuperBlockRowIndex(block.row4x4);
1662 const int sb_column_index = SuperBlockColumnIndex(block.column4x4);
1663 if (mode == kProcessingModeDecodeOnly) {
1664 TransformParameterQueue& tx_params =
1665 *residual_buffer_threaded_[sb_row_index][sb_column_index]
1666 ->transform_parameters();
1667 ReconstructBlock(block, plane, start_x, start_y, tx_size,
1668 tx_params.Type(), tx_params.NonZeroCoeffCount());
1669 tx_params.Pop();
1670 } else {
1671 TransformType tx_type;
1672 int non_zero_coeff_count;
1673 #if LIBGAV1_MAX_BITDEPTH >= 10
1674 if (sequence_header_.color_config.bitdepth > 8) {
1675 non_zero_coeff_count = ReadTransformCoefficients<int32_t>(
1676 block, plane, start_x, start_y, tx_size, &tx_type);
1677 } else // NOLINT
1678 #endif
1679 {
1680 non_zero_coeff_count = ReadTransformCoefficients<int16_t>(
1681 block, plane, start_x, start_y, tx_size, &tx_type);
1682 }
1683 if (non_zero_coeff_count < 0) return false;
1684 if (mode == kProcessingModeParseAndDecode) {
1685 ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type,
1686 non_zero_coeff_count);
1687 } else {
1688 assert(mode == kProcessingModeParseOnly);
1689 residual_buffer_threaded_[sb_row_index][sb_column_index]
1690 ->transform_parameters()
1691 ->Push(non_zero_coeff_count, tx_type);
1692 }
1693 }
1694 }
1695 if (do_decode) {
1696 bool* block_decoded =
1697 &block.scratch_buffer
1698 ->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1]
1699 [(sub_block_column4x4 >> subsampling_x) + 1];
1700 SetBlockValues<bool>(step_y, step_x, true, block_decoded,
1701 TileScratchBuffer::kBlockDecodedStride);
1702 }
1703 return true;
1704 }
1705
TransformTree(const Block & block,int start_x,int start_y,BlockSize plane_size,ProcessingMode mode)1706 bool Tile::TransformTree(const Block& block, int start_x, int start_y,
1707 BlockSize plane_size, ProcessingMode mode) {
1708 assert(plane_size <= kBlock64x64);
1709 // Branching factor is 4; Maximum Depth is 4; So the maximum stack size
1710 // required is (4 - 1) * 4 + 1 = 13.
1711 Stack<TransformTreeNode, 13> stack;
1712 // It is okay to cast BlockSize to TransformSize here since the enum are
1713 // equivalent for all BlockSize values <= kBlock64x64.
1714 stack.Push(TransformTreeNode(start_x, start_y,
1715 static_cast<TransformSize>(plane_size)));
1716
1717 do {
1718 TransformTreeNode node = stack.Pop();
1719 const int row = DivideBy4(node.y);
1720 const int column = DivideBy4(node.x);
1721 if (row >= frame_header_.rows4x4 || column >= frame_header_.columns4x4) {
1722 continue;
1723 }
1724 const TransformSize inter_tx_size = inter_transform_sizes_[row][column];
1725 const int width = kTransformWidth[node.tx_size];
1726 const int height = kTransformHeight[node.tx_size];
1727 if (width <= kTransformWidth[inter_tx_size] &&
1728 height <= kTransformHeight[inter_tx_size]) {
1729 if (!TransformBlock(block, kPlaneY, node.x, node.y, node.tx_size, 0, 0,
1730 mode)) {
1731 return false;
1732 }
1733 continue;
1734 }
1735 // The split transform size look up gives the right transform size that we
1736 // should push in the stack.
1737 // if (width > height) => transform size whose width is half.
1738 // if (width < height) => transform size whose height is half.
1739 // if (width == height) => transform size whose width and height are half.
1740 const TransformSize split_tx_size = kSplitTransformSize[node.tx_size];
1741 const int half_width = DivideBy2(width);
1742 if (width > height) {
1743 stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1744 stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1745 continue;
1746 }
1747 const int half_height = DivideBy2(height);
1748 if (width < height) {
1749 stack.Push(
1750 TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1751 stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1752 continue;
1753 }
1754 stack.Push(TransformTreeNode(node.x + half_width, node.y + half_height,
1755 split_tx_size));
1756 stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1757 stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1758 stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1759 } while (!stack.Empty());
1760 return true;
1761 }
1762
ReconstructBlock(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType tx_type,int non_zero_coeff_count)1763 void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
1764 int start_y, TransformSize tx_size,
1765 TransformType tx_type, int non_zero_coeff_count) {
1766 // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
1767 assert(non_zero_coeff_count >= 0);
1768 if (non_zero_coeff_count == 0) return;
1769 #if LIBGAV1_MAX_BITDEPTH >= 10
1770 if (sequence_header_.color_config.bitdepth > 8) {
1771 Array2DView<uint16_t> buffer(
1772 buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
1773 reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
1774 Reconstruct(dsp_, tx_type, tx_size,
1775 frame_header_.segmentation.lossless[block.bp->segment_id],
1776 reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
1777 &buffer, non_zero_coeff_count);
1778 } else // NOLINT
1779 #endif
1780 {
1781 Reconstruct(dsp_, tx_type, tx_size,
1782 frame_header_.segmentation.lossless[block.bp->segment_id],
1783 reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
1784 &buffer_[plane], non_zero_coeff_count);
1785 }
1786 if (split_parse_and_decode_) {
1787 *block.residual +=
1788 kTransformWidth[tx_size] * kTransformHeight[tx_size] * residual_size_;
1789 }
1790 }
1791
Residual(const Block & block,ProcessingMode mode)1792 bool Tile::Residual(const Block& block, ProcessingMode mode) {
1793 const int width_chunks = std::max(1, block.width >> 6);
1794 const int height_chunks = std::max(1, block.height >> 6);
1795 const BlockSize size_chunk4x4 =
1796 (width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size;
1797 const BlockParameters& bp = *block.bp;
1798 for (int chunk_y = 0; chunk_y < height_chunks; ++chunk_y) {
1799 for (int chunk_x = 0; chunk_x < width_chunks; ++chunk_x) {
1800 const int num_planes = block.HasChroma() ? PlaneCount() : 1;
1801 int plane = kPlaneY;
1802 do {
1803 const int subsampling_x = subsampling_x_[plane];
1804 const int subsampling_y = subsampling_y_[plane];
1805 // For Y Plane, when lossless is true |bp.transform_size| is always
1806 // kTransformSize4x4. So we can simply use |bp.transform_size| here as
1807 // the Y plane's transform size (part of Section 5.11.37 in the spec).
1808 const TransformSize tx_size =
1809 (plane == kPlaneY) ? bp.transform_size : bp.uv_transform_size;
1810 const BlockSize plane_size =
1811 kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y];
1812 assert(plane_size != kBlockInvalid);
1813 if (bp.is_inter &&
1814 !frame_header_.segmentation.lossless[bp.segment_id] &&
1815 plane == kPlaneY) {
1816 const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y);
1817 const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x);
1818 const int base_x = MultiplyBy4(column_chunk4x4 >> subsampling_x);
1819 const int base_y = MultiplyBy4(row_chunk4x4 >> subsampling_y);
1820 if (!TransformTree(block, base_x, base_y, plane_size, mode)) {
1821 return false;
1822 }
1823 } else {
1824 const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
1825 const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
1826 const int step_x = kTransformWidth4x4[tx_size];
1827 const int step_y = kTransformHeight4x4[tx_size];
1828 const int num4x4_wide = kNum4x4BlocksWide[plane_size];
1829 const int num4x4_high = kNum4x4BlocksHigh[plane_size];
1830 for (int y = 0; y < num4x4_high; y += step_y) {
1831 for (int x = 0; x < num4x4_wide; x += step_x) {
1832 if (!TransformBlock(
1833 block, static_cast<Plane>(plane), base_x, base_y, tx_size,
1834 x + (MultiplyBy16(chunk_x) >> subsampling_x),
1835 y + (MultiplyBy16(chunk_y) >> subsampling_y), mode)) {
1836 return false;
1837 }
1838 }
1839 }
1840 }
1841 } while (++plane < num_planes);
1842 }
1843 }
1844 return true;
1845 }
1846
1847 // The purpose of this function is to limit the maximum size of motion vectors
1848 // and also, if use_intra_block_copy is true, to additionally constrain the
1849 // motion vector so that the data is fetched from parts of the tile that have
1850 // already been decoded and are not too close to the current block (in order to
1851 // make a pipelined decoder implementation feasible).
IsMvValid(const Block & block,bool is_compound) const1852 bool Tile::IsMvValid(const Block& block, bool is_compound) const {
1853 const BlockParameters& bp = *block.bp;
1854 for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
1855 for (int mv_component : bp.mv.mv[i].mv) {
1856 if (std::abs(mv_component) >= (1 << 14)) {
1857 return false;
1858 }
1859 }
1860 }
1861 if (!block.bp->prediction_parameters->use_intra_block_copy) {
1862 return true;
1863 }
1864 if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) {
1865 return false;
1866 }
1867 const int delta_row = bp.mv.mv[0].mv[0] >> 3;
1868 const int delta_column = bp.mv.mv[0].mv[1] >> 3;
1869 int src_top_edge = MultiplyBy4(block.row4x4) + delta_row;
1870 int src_left_edge = MultiplyBy4(block.column4x4) + delta_column;
1871 const int src_bottom_edge = src_top_edge + block.height;
1872 const int src_right_edge = src_left_edge + block.width;
1873 if (block.HasChroma()) {
1874 if (block.width < 8 && subsampling_x_[kPlaneU] != 0) {
1875 src_left_edge -= 4;
1876 }
1877 if (block.height < 8 && subsampling_y_[kPlaneU] != 0) {
1878 src_top_edge -= 4;
1879 }
1880 }
1881 if (src_top_edge < MultiplyBy4(row4x4_start_) ||
1882 src_left_edge < MultiplyBy4(column4x4_start_) ||
1883 src_bottom_edge > MultiplyBy4(row4x4_end_) ||
1884 src_right_edge > MultiplyBy4(column4x4_end_)) {
1885 return false;
1886 }
1887 // sb_height_log2 = use_128x128_superblock ? log2(128) : log2(64)
1888 const int sb_height_log2 =
1889 6 + static_cast<int>(sequence_header_.use_128x128_superblock);
1890 const int active_sb_row = MultiplyBy4(block.row4x4) >> sb_height_log2;
1891 const int active_64x64_block_column = MultiplyBy4(block.column4x4) >> 6;
1892 const int src_sb_row = (src_bottom_edge - 1) >> sb_height_log2;
1893 const int src_64x64_block_column = (src_right_edge - 1) >> 6;
1894 const int total_64x64_blocks_per_row =
1895 ((column4x4_end_ - column4x4_start_ - 1) >> 4) + 1;
1896 const int active_64x64_block =
1897 active_sb_row * total_64x64_blocks_per_row + active_64x64_block_column;
1898 const int src_64x64_block =
1899 src_sb_row * total_64x64_blocks_per_row + src_64x64_block_column;
1900 if (src_64x64_block >= active_64x64_block - kIntraBlockCopyDelay64x64Blocks) {
1901 return false;
1902 }
1903
1904 // Wavefront constraint: use only top left area of frame for reference.
1905 if (src_sb_row > active_sb_row) return false;
1906 const int gradient =
1907 1 + kIntraBlockCopyDelay64x64Blocks +
1908 static_cast<int>(sequence_header_.use_128x128_superblock);
1909 const int wavefront_offset = gradient * (active_sb_row - src_sb_row);
1910 return src_64x64_block_column < active_64x64_block_column -
1911 kIntraBlockCopyDelay64x64Blocks +
1912 wavefront_offset;
1913 }
1914
AssignInterMv(const Block & block,bool is_compound)1915 bool Tile::AssignInterMv(const Block& block, bool is_compound) {
1916 int min[2];
1917 int max[2];
1918 GetClampParameters(block, min, max);
1919 BlockParameters& bp = *block.bp;
1920 const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1921 if (is_compound) {
1922 for (int i = 0; i < 2; ++i) {
1923 const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode);
1924 MotionVector predicted_mv;
1925 if (mode == kPredictionModeGlobalMv) {
1926 predicted_mv = prediction_parameters.global_mv[i];
1927 } else {
1928 const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1929 (mode == kPredictionModeNewMv &&
1930 prediction_parameters.ref_mv_count <= 1))
1931 ? 0
1932 : prediction_parameters.ref_mv_index;
1933 predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i);
1934 if (ref_mv_index < prediction_parameters.ref_mv_count) {
1935 predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1936 predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1937 }
1938 }
1939 if (mode == kPredictionModeNewMv) {
1940 ReadMotionVector(block, i);
1941 bp.mv.mv[i].mv[0] += predicted_mv.mv[0];
1942 bp.mv.mv[i].mv[1] += predicted_mv.mv[1];
1943 } else {
1944 bp.mv.mv[i] = predicted_mv;
1945 }
1946 }
1947 } else {
1948 const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode);
1949 MotionVector predicted_mv;
1950 if (mode == kPredictionModeGlobalMv) {
1951 predicted_mv = prediction_parameters.global_mv[0];
1952 } else {
1953 const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1954 (mode == kPredictionModeNewMv &&
1955 prediction_parameters.ref_mv_count <= 1))
1956 ? 0
1957 : prediction_parameters.ref_mv_index;
1958 predicted_mv = prediction_parameters.reference_mv(ref_mv_index);
1959 if (ref_mv_index < prediction_parameters.ref_mv_count) {
1960 predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1961 predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1962 }
1963 }
1964 if (mode == kPredictionModeNewMv) {
1965 ReadMotionVector(block, 0);
1966 bp.mv.mv[0].mv[0] += predicted_mv.mv[0];
1967 bp.mv.mv[0].mv[1] += predicted_mv.mv[1];
1968 } else {
1969 bp.mv.mv[0] = predicted_mv;
1970 }
1971 }
1972 return IsMvValid(block, is_compound);
1973 }
1974
AssignIntraMv(const Block & block)1975 bool Tile::AssignIntraMv(const Block& block) {
1976 // TODO(linfengz): Check if the clamping process is necessary.
1977 int min[2];
1978 int max[2];
1979 GetClampParameters(block, min, max);
1980 BlockParameters& bp = *block.bp;
1981 const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1982 const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0);
1983 ReadMotionVector(block, 0);
1984 if (ref_mv_0.mv32 == 0) {
1985 const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1);
1986 if (ref_mv_1.mv32 == 0) {
1987 const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
1988 if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
1989 bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4);
1990 bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels);
1991 } else {
1992 bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4);
1993 }
1994 } else {
1995 bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]);
1996 bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]);
1997 }
1998 } else {
1999 bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]);
2000 bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]);
2001 }
2002 return IsMvValid(block, /*is_compound=*/false);
2003 }
2004
ResetEntropyContext(const Block & block)2005 void Tile::ResetEntropyContext(const Block& block) {
2006 const int num_planes = block.HasChroma() ? PlaneCount() : 1;
2007 int plane = kPlaneY;
2008 do {
2009 const int subsampling_x = subsampling_x_[plane];
2010 const int start_x = block.column4x4 >> subsampling_x;
2011 const int end_x =
2012 std::min((block.column4x4 + block.width4x4) >> subsampling_x,
2013 frame_header_.columns4x4);
2014 memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0,
2015 end_x - start_x);
2016 memset(&dc_categories_[kEntropyContextTop][plane][start_x], 0,
2017 end_x - start_x);
2018 const int subsampling_y = subsampling_y_[plane];
2019 const int start_y = block.row4x4 >> subsampling_y;
2020 const int end_y =
2021 std::min((block.row4x4 + block.height4x4) >> subsampling_y,
2022 frame_header_.rows4x4);
2023 memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0,
2024 end_y - start_y);
2025 memset(&dc_categories_[kEntropyContextLeft][plane][start_y], 0,
2026 end_y - start_y);
2027 } while (++plane < num_planes);
2028 }
2029
ComputePrediction(const Block & block)2030 bool Tile::ComputePrediction(const Block& block) {
2031 const BlockParameters& bp = *block.bp;
2032 if (!bp.is_inter) return true;
2033 const int mask =
2034 (1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) -
2035 1;
2036 const int sub_block_row4x4 = block.row4x4 & mask;
2037 const int sub_block_column4x4 = block.column4x4 & mask;
2038 const int plane_count = block.HasChroma() ? PlaneCount() : 1;
2039 // Returns true if this block applies local warping. The state is determined
2040 // in the Y plane and carried for use in the U/V planes.
2041 // But the U/V planes will not apply warping when the block size is smaller
2042 // than 8x8, even if this variable is true.
2043 bool is_local_valid = false;
2044 // Local warping parameters, similar usage as is_local_valid.
2045 GlobalMotion local_warp_params;
2046 int plane = kPlaneY;
2047 do {
2048 const int8_t subsampling_x = subsampling_x_[plane];
2049 const int8_t subsampling_y = subsampling_y_[plane];
2050 const BlockSize plane_size = block.residual_size[plane];
2051 const int block_width4x4 = kNum4x4BlocksWide[plane_size];
2052 const int block_height4x4 = kNum4x4BlocksHigh[plane_size];
2053 const int block_width = MultiplyBy4(block_width4x4);
2054 const int block_height = MultiplyBy4(block_height4x4);
2055 const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
2056 const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
2057 if (bp.reference_frame[1] == kReferenceFrameIntra) {
2058 const int tr_row4x4 = sub_block_row4x4 >> subsampling_y;
2059 const int tr_column4x4 =
2060 (sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1;
2061 const int bl_row4x4 =
2062 (sub_block_row4x4 >> subsampling_y) + block_height4x4;
2063 const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x) + 1;
2064 const TransformSize tx_size =
2065 k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]]
2066 [k4x4HeightLog2[plane_size]];
2067 const bool has_left = block.left_available[plane];
2068 const bool has_top = block.top_available[plane];
2069 CALL_BITDEPTH_FUNCTION(
2070 IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y,
2071 has_left, has_top,
2072 block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
2073 block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
2074 kInterIntraToIntraMode[block.bp->prediction_parameters
2075 ->inter_intra_mode],
2076 tx_size);
2077 }
2078 int candidate_row = block.row4x4;
2079 int candidate_column = block.column4x4;
2080 bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra;
2081 if (!some_use_intra && plane != 0) {
2082 candidate_row = (candidate_row >> subsampling_y) << subsampling_y;
2083 candidate_column = (candidate_column >> subsampling_x) << subsampling_x;
2084 if (candidate_row != block.row4x4) {
2085 // Top block.
2086 const BlockParameters& bp_top =
2087 *block_parameters_holder_.Find(candidate_row, block.column4x4);
2088 some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra;
2089 if (!some_use_intra && candidate_column != block.column4x4) {
2090 // Top-left block.
2091 const BlockParameters& bp_top_left =
2092 *block_parameters_holder_.Find(candidate_row, candidate_column);
2093 some_use_intra =
2094 bp_top_left.reference_frame[0] == kReferenceFrameIntra;
2095 }
2096 }
2097 if (!some_use_intra && candidate_column != block.column4x4) {
2098 // Left block.
2099 const BlockParameters& bp_left =
2100 *block_parameters_holder_.Find(block.row4x4, candidate_column);
2101 some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra;
2102 }
2103 }
2104 int prediction_width;
2105 int prediction_height;
2106 if (some_use_intra) {
2107 candidate_row = block.row4x4;
2108 candidate_column = block.column4x4;
2109 prediction_width = block_width;
2110 prediction_height = block_height;
2111 } else {
2112 prediction_width = block.width >> subsampling_x;
2113 prediction_height = block.height >> subsampling_y;
2114 }
2115 int r = 0;
2116 int y = 0;
2117 do {
2118 int c = 0;
2119 int x = 0;
2120 do {
2121 if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x,
2122 base_y + y, prediction_width, prediction_height,
2123 candidate_row + r, candidate_column + c,
2124 &is_local_valid, &local_warp_params)) {
2125 return false;
2126 }
2127 ++c;
2128 x += prediction_width;
2129 } while (x < block_width);
2130 ++r;
2131 y += prediction_height;
2132 } while (y < block_height);
2133 } while (++plane < plane_count);
2134 return true;
2135 }
2136
2137 #undef CALL_BITDEPTH_FUNCTION
2138
PopulateDeblockFilterLevel(const Block & block)2139 void Tile::PopulateDeblockFilterLevel(const Block& block) {
2140 if (!post_filter_.DoDeblock()) return;
2141 BlockParameters& bp = *block.bp;
2142 const int mode_id =
2143 static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode));
2144 for (int i = 0; i < kFrameLfCount; ++i) {
2145 if (delta_lf_all_zero_) {
2146 bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
2147 bp.segment_id, i, bp.reference_frame[0], mode_id);
2148 } else {
2149 bp.deblock_filter_level[i] =
2150 deblock_filter_levels_[bp.segment_id][i][bp.reference_frame[0]]
2151 [mode_id];
2152 }
2153 }
2154 }
2155
ProcessBlock(int row4x4,int column4x4,BlockSize block_size,ParameterTree * const tree,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2156 bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
2157 ParameterTree* const tree,
2158 TileScratchBuffer* const scratch_buffer,
2159 ResidualPtr* residual) {
2160 // Do not process the block if the starting point is beyond the visible frame.
2161 // This is equivalent to the has_row/has_column check in the
2162 // decode_partition() section of the spec when partition equals
2163 // kPartitionHorizontal or kPartitionVertical.
2164 if (row4x4 >= frame_header_.rows4x4 ||
2165 column4x4 >= frame_header_.columns4x4) {
2166 return true;
2167 }
2168 BlockParameters& bp = *tree->parameters();
2169 block_parameters_holder_.FillCache(row4x4, column4x4, block_size, &bp);
2170 Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
2171 bp.size = block_size;
2172 bp.prediction_parameters =
2173 split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
2174 new (std::nothrow) PredictionParameters())
2175 : std::move(prediction_parameters_);
2176 if (bp.prediction_parameters == nullptr) return false;
2177 if (!DecodeModeInfo(block)) return false;
2178 bp.is_global_mv_block = (bp.y_mode == kPredictionModeGlobalMv ||
2179 bp.y_mode == kPredictionModeGlobalGlobalMv) &&
2180 !IsBlockDimension4(bp.size);
2181 PopulateDeblockFilterLevel(block);
2182 if (!ReadPaletteTokens(block)) return false;
2183 DecodeTransformSize(block);
2184 // Part of Section 5.11.37 in the spec (implemented as a simple lookup).
2185 bp.uv_transform_size = frame_header_.segmentation.lossless[bp.segment_id]
2186 ? kTransformSize4x4
2187 : kUVTransformSize[block.residual_size[kPlaneU]];
2188 if (bp.skip) ResetEntropyContext(block);
2189 if (split_parse_and_decode_) {
2190 if (!Residual(block, kProcessingModeParseOnly)) return false;
2191 } else {
2192 if (!ComputePrediction(block) ||
2193 !Residual(block, kProcessingModeParseAndDecode)) {
2194 return false;
2195 }
2196 }
2197 // If frame_header_.segmentation.enabled is false, bp.segment_id is 0 for all
2198 // blocks. We don't need to call save bp.segment_id in the current frame
2199 // because the current frame's segmentation map will be cleared to all 0s.
2200 //
2201 // If frame_header_.segmentation.enabled is true and
2202 // frame_header_.segmentation.update_map is false, we will copy the previous
2203 // frame's segmentation map to the current frame. So we don't need to call
2204 // save bp.segment_id in the current frame.
2205 if (frame_header_.segmentation.enabled &&
2206 frame_header_.segmentation.update_map) {
2207 const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
2208 static_cast<int>(block.width4x4));
2209 const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
2210 static_cast<int>(block.height4x4));
2211 current_frame_.segmentation_map()->FillBlock(row4x4, column4x4, x_limit,
2212 y_limit, bp.segment_id);
2213 }
2214 StoreMotionFieldMvsIntoCurrentFrame(block);
2215 if (!split_parse_and_decode_) {
2216 prediction_parameters_ = std::move(bp.prediction_parameters);
2217 }
2218 return true;
2219 }
2220
DecodeBlock(ParameterTree * const tree,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2221 bool Tile::DecodeBlock(ParameterTree* const tree,
2222 TileScratchBuffer* const scratch_buffer,
2223 ResidualPtr* residual) {
2224 const int row4x4 = tree->row4x4();
2225 const int column4x4 = tree->column4x4();
2226 if (row4x4 >= frame_header_.rows4x4 ||
2227 column4x4 >= frame_header_.columns4x4) {
2228 return true;
2229 }
2230 const BlockSize block_size = tree->block_size();
2231 Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
2232 if (!ComputePrediction(block) ||
2233 !Residual(block, kProcessingModeDecodeOnly)) {
2234 return false;
2235 }
2236 block.bp->prediction_parameters.reset(nullptr);
2237 return true;
2238 }
2239
ProcessPartition(int row4x4_start,int column4x4_start,ParameterTree * const root,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2240 bool Tile::ProcessPartition(int row4x4_start, int column4x4_start,
2241 ParameterTree* const root,
2242 TileScratchBuffer* const scratch_buffer,
2243 ResidualPtr* residual) {
2244 Stack<ParameterTree*, kDfsStackSize> stack;
2245
2246 // Set up the first iteration.
2247 ParameterTree* node = root;
2248 int row4x4 = row4x4_start;
2249 int column4x4 = column4x4_start;
2250 BlockSize block_size = SuperBlockSize();
2251
2252 // DFS loop. If it sees a terminal node (leaf node), ProcessBlock is invoked.
2253 // Otherwise, the children are pushed into the stack for future processing.
2254 do {
2255 if (!stack.Empty()) {
2256 // Set up subsequent iterations.
2257 node = stack.Pop();
2258 row4x4 = node->row4x4();
2259 column4x4 = node->column4x4();
2260 block_size = node->block_size();
2261 }
2262 if (row4x4 >= frame_header_.rows4x4 ||
2263 column4x4 >= frame_header_.columns4x4) {
2264 continue;
2265 }
2266 const int block_width4x4 = kNum4x4BlocksWide[block_size];
2267 assert(block_width4x4 == kNum4x4BlocksHigh[block_size]);
2268 const int half_block4x4 = block_width4x4 >> 1;
2269 const bool has_rows = (row4x4 + half_block4x4) < frame_header_.rows4x4;
2270 const bool has_columns =
2271 (column4x4 + half_block4x4) < frame_header_.columns4x4;
2272 Partition partition;
2273 if (!ReadPartition(row4x4, column4x4, block_size, has_rows, has_columns,
2274 &partition)) {
2275 LIBGAV1_DLOG(ERROR, "Failed to read partition for row: %d column: %d",
2276 row4x4, column4x4);
2277 return false;
2278 }
2279 const BlockSize sub_size = kSubSize[partition][block_size];
2280 // Section 6.10.4: It is a requirement of bitstream conformance that
2281 // get_plane_residual_size( subSize, 1 ) is not equal to BLOCK_INVALID
2282 // every time subSize is computed.
2283 if (sub_size == kBlockInvalid ||
2284 kPlaneResidualSize[sub_size]
2285 [sequence_header_.color_config.subsampling_x]
2286 [sequence_header_.color_config.subsampling_y] ==
2287 kBlockInvalid) {
2288 LIBGAV1_DLOG(
2289 ERROR,
2290 "Invalid sub-block/plane size for row: %d column: %d partition: "
2291 "%d block_size: %d sub_size: %d subsampling_x/y: %d, %d",
2292 row4x4, column4x4, partition, block_size, sub_size,
2293 sequence_header_.color_config.subsampling_x,
2294 sequence_header_.color_config.subsampling_y);
2295 return false;
2296 }
2297 if (!node->SetPartitionType(partition)) {
2298 LIBGAV1_DLOG(ERROR, "node->SetPartitionType() failed.");
2299 return false;
2300 }
2301 switch (partition) {
2302 case kPartitionNone:
2303 if (!ProcessBlock(row4x4, column4x4, sub_size, node, scratch_buffer,
2304 residual)) {
2305 return false;
2306 }
2307 break;
2308 case kPartitionSplit:
2309 // The children must be added in reverse order since a stack is being
2310 // used.
2311 for (int i = 3; i >= 0; --i) {
2312 ParameterTree* const child = node->children(i);
2313 assert(child != nullptr);
2314 stack.Push(child);
2315 }
2316 break;
2317 case kPartitionHorizontal:
2318 case kPartitionVertical:
2319 case kPartitionHorizontalWithTopSplit:
2320 case kPartitionHorizontalWithBottomSplit:
2321 case kPartitionVerticalWithLeftSplit:
2322 case kPartitionVerticalWithRightSplit:
2323 case kPartitionHorizontal4:
2324 case kPartitionVertical4:
2325 for (int i = 0; i < 4; ++i) {
2326 ParameterTree* const child = node->children(i);
2327 // Once a null child is seen, all the subsequent children will also be
2328 // null.
2329 if (child == nullptr) break;
2330 if (!ProcessBlock(child->row4x4(), child->column4x4(),
2331 child->block_size(), child, scratch_buffer,
2332 residual)) {
2333 return false;
2334 }
2335 }
2336 break;
2337 }
2338 } while (!stack.Empty());
2339 return true;
2340 }
2341
ResetLoopRestorationParams()2342 void Tile::ResetLoopRestorationParams() {
2343 for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
2344 for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) {
2345 reference_unit_info_[plane].sgr_proj_info.multiplier[i] =
2346 kSgrProjDefaultMultiplier[i];
2347 for (int j = 0; j < kNumWienerCoefficients; ++j) {
2348 reference_unit_info_[plane].wiener_info.filter[i][j] =
2349 kWienerDefaultFilter[j];
2350 }
2351 }
2352 }
2353 }
2354
ResetCdef(const int row4x4,const int column4x4)2355 void Tile::ResetCdef(const int row4x4, const int column4x4) {
2356 if (!sequence_header_.enable_cdef) return;
2357 const int row = DivideBy16(row4x4);
2358 const int column = DivideBy16(column4x4);
2359 cdef_index_[row][column] = -1;
2360 if (sequence_header_.use_128x128_superblock) {
2361 const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
2362 const int border_row = DivideBy16(row4x4 + cdef_size4x4);
2363 const int border_column = DivideBy16(column4x4 + cdef_size4x4);
2364 cdef_index_[row][border_column] = -1;
2365 cdef_index_[border_row][column] = -1;
2366 cdef_index_[border_row][border_column] = -1;
2367 }
2368 }
2369
ClearBlockDecoded(TileScratchBuffer * const scratch_buffer,int row4x4,int column4x4)2370 void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer,
2371 int row4x4, int column4x4) {
2372 // Set everything to false.
2373 memset(scratch_buffer->block_decoded, 0,
2374 sizeof(scratch_buffer->block_decoded));
2375 // Set specific edge cases to true.
2376 const int sb_size4 = sequence_header_.use_128x128_superblock ? 32 : 16;
2377 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2378 const int subsampling_x = subsampling_x_[plane];
2379 const int subsampling_y = subsampling_y_[plane];
2380 const int sb_width4 = (column4x4_end_ - column4x4) >> subsampling_x;
2381 const int sb_height4 = (row4x4_end_ - row4x4) >> subsampling_y;
2382 // The memset is equivalent to the following lines in the spec:
2383 // for ( x = -1; x <= ( sbSize4 >> subX ); x++ ) {
2384 // if ( y < 0 && x < sbWidth4 ) {
2385 // BlockDecoded[plane][y][x] = 1
2386 // }
2387 // }
2388 const int num_elements =
2389 std::min((sb_size4 >> subsampling_x_[plane]) + 1, sb_width4) + 1;
2390 memset(&scratch_buffer->block_decoded[plane][0][0], 1, num_elements);
2391 // The for loop is equivalent to the following lines in the spec:
2392 // for ( y = -1; y <= ( sbSize4 >> subY ); y++ )
2393 // if ( x < 0 && y < sbHeight4 )
2394 // BlockDecoded[plane][y][x] = 1
2395 // }
2396 // }
2397 // BlockDecoded[plane][sbSize4 >> subY][-1] = 0
2398 for (int y = -1; y < std::min((sb_size4 >> subsampling_y), sb_height4);
2399 ++y) {
2400 scratch_buffer->block_decoded[plane][y + 1][0] = true;
2401 }
2402 }
2403 }
2404
ProcessSuperBlock(int row4x4,int column4x4,int block_width4x4,TileScratchBuffer * const scratch_buffer,ProcessingMode mode)2405 bool Tile::ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4,
2406 TileScratchBuffer* const scratch_buffer,
2407 ProcessingMode mode) {
2408 const bool parsing =
2409 mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode;
2410 const bool decoding = mode == kProcessingModeDecodeOnly ||
2411 mode == kProcessingModeParseAndDecode;
2412 if (parsing) {
2413 read_deltas_ = frame_header_.delta_q.present;
2414 ResetCdef(row4x4, column4x4);
2415 }
2416 if (decoding) {
2417 ClearBlockDecoded(scratch_buffer, row4x4, column4x4);
2418 }
2419 const BlockSize block_size = SuperBlockSize();
2420 if (parsing) {
2421 ReadLoopRestorationCoefficients(row4x4, column4x4, block_size);
2422 }
2423 const int row = row4x4 / block_width4x4;
2424 const int column = column4x4 / block_width4x4;
2425 if (parsing && decoding) {
2426 uint8_t* residual_buffer = residual_buffer_.get();
2427 if (!ProcessPartition(row4x4, column4x4,
2428 block_parameters_holder_.Tree(row, column),
2429 scratch_buffer, &residual_buffer)) {
2430 LIBGAV1_DLOG(ERROR, "Error decoding partition row: %d column: %d", row4x4,
2431 column4x4);
2432 return false;
2433 }
2434 return true;
2435 }
2436 const int sb_row_index = SuperBlockRowIndex(row4x4);
2437 const int sb_column_index = SuperBlockColumnIndex(column4x4);
2438 if (parsing) {
2439 residual_buffer_threaded_[sb_row_index][sb_column_index] =
2440 residual_buffer_pool_->Get();
2441 if (residual_buffer_threaded_[sb_row_index][sb_column_index] == nullptr) {
2442 LIBGAV1_DLOG(ERROR, "Failed to get residual buffer.");
2443 return false;
2444 }
2445 uint8_t* residual_buffer =
2446 residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2447 if (!ProcessPartition(row4x4, column4x4,
2448 block_parameters_holder_.Tree(row, column),
2449 scratch_buffer, &residual_buffer)) {
2450 LIBGAV1_DLOG(ERROR, "Error parsing partition row: %d column: %d", row4x4,
2451 column4x4);
2452 return false;
2453 }
2454 } else {
2455 uint8_t* residual_buffer =
2456 residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2457 if (!DecodeSuperBlock(block_parameters_holder_.Tree(row, column),
2458 scratch_buffer, &residual_buffer)) {
2459 LIBGAV1_DLOG(ERROR, "Error decoding superblock row: %d column: %d",
2460 row4x4, column4x4);
2461 return false;
2462 }
2463 residual_buffer_pool_->Release(
2464 std::move(residual_buffer_threaded_[sb_row_index][sb_column_index]));
2465 }
2466 return true;
2467 }
2468
DecodeSuperBlock(ParameterTree * const tree,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2469 bool Tile::DecodeSuperBlock(ParameterTree* const tree,
2470 TileScratchBuffer* const scratch_buffer,
2471 ResidualPtr* residual) {
2472 Stack<ParameterTree*, kDfsStackSize> stack;
2473 stack.Push(tree);
2474 do {
2475 ParameterTree* const node = stack.Pop();
2476 if (node->partition() != kPartitionNone) {
2477 for (int i = 3; i >= 0; --i) {
2478 if (node->children(i) == nullptr) continue;
2479 stack.Push(node->children(i));
2480 }
2481 continue;
2482 }
2483 if (!DecodeBlock(node, scratch_buffer, residual)) {
2484 LIBGAV1_DLOG(ERROR, "Error decoding block row: %d column: %d",
2485 node->row4x4(), node->column4x4());
2486 return false;
2487 }
2488 } while (!stack.Empty());
2489 return true;
2490 }
2491
ReadLoopRestorationCoefficients(int row4x4,int column4x4,BlockSize block_size)2492 void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4,
2493 BlockSize block_size) {
2494 if (frame_header_.allow_intrabc) return;
2495 LoopRestorationInfo* const restoration_info = post_filter_.restoration_info();
2496 const bool is_superres_scaled =
2497 frame_header_.width != frame_header_.upscaled_width;
2498 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2499 LoopRestorationUnitInfo unit_info;
2500 if (restoration_info->PopulateUnitInfoForSuperBlock(
2501 static_cast<Plane>(plane), block_size, is_superres_scaled,
2502 frame_header_.superres_scale_denominator, row4x4, column4x4,
2503 &unit_info)) {
2504 for (int unit_row = unit_info.row_start; unit_row < unit_info.row_end;
2505 ++unit_row) {
2506 for (int unit_column = unit_info.column_start;
2507 unit_column < unit_info.column_end; ++unit_column) {
2508 const int unit_id = unit_row * restoration_info->num_horizontal_units(
2509 static_cast<Plane>(plane)) +
2510 unit_column;
2511 restoration_info->ReadUnitCoefficients(
2512 &reader_, &symbol_decoder_context_, static_cast<Plane>(plane),
2513 unit_id, &reference_unit_info_);
2514 }
2515 }
2516 }
2517 }
2518 }
2519
StoreMotionFieldMvsIntoCurrentFrame(const Block & block)2520 void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
2521 if (frame_header_.refresh_frame_flags == 0 ||
2522 IsIntraFrame(frame_header_.frame_type)) {
2523 return;
2524 }
2525 // Iterate over odd rows/columns beginning at the first odd row/column for the
2526 // block. It is done this way because motion field mvs are only needed at a
2527 // 8x8 granularity.
2528 const int row_start4x4 = block.row4x4 | 1;
2529 const int row_limit4x4 =
2530 std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
2531 if (row_start4x4 >= row_limit4x4) return;
2532 const int column_start4x4 = block.column4x4 | 1;
2533 const int column_limit4x4 =
2534 std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
2535 if (column_start4x4 >= column_limit4x4) return;
2536
2537 // The largest reference MV component that can be saved.
2538 constexpr int kRefMvsLimit = (1 << 12) - 1;
2539 const BlockParameters& bp = *block.bp;
2540 ReferenceInfo* reference_info = current_frame_.reference_info();
2541 for (int i = 1; i >= 0; --i) {
2542 const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
2543 // Must make a local copy so that StoreMotionFieldMvs() knows there is no
2544 // overlap between load and store.
2545 const MotionVector mv_to_store = bp.mv.mv[i];
2546 const int mv_row = std::abs(mv_to_store.mv[MotionVector::kRow]);
2547 const int mv_column = std::abs(mv_to_store.mv[MotionVector::kColumn]);
2548 if (reference_frame_to_store > kReferenceFrameIntra &&
2549 // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
2550 // absolute values and then compare with kRefMvsLimit to save a branch.
2551 // The next line is equivalent to:
2552 // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
2553 (mv_row | mv_column) <= kRefMvsLimit &&
2554 reference_info->relative_distance_from[reference_frame_to_store] < 0) {
2555 const int row_start8x8 = DivideBy2(row_start4x4);
2556 const int row_limit8x8 = DivideBy2(row_limit4x4);
2557 const int column_start8x8 = DivideBy2(column_start4x4);
2558 const int column_limit8x8 = DivideBy2(column_limit4x4);
2559 const int rows = row_limit8x8 - row_start8x8;
2560 const int columns = column_limit8x8 - column_start8x8;
2561 const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4());
2562 ReferenceFrameType* const reference_frame_row_start =
2563 &reference_info
2564 ->motion_field_reference_frame[row_start8x8][column_start8x8];
2565 MotionVector* const mv =
2566 &reference_info->motion_field_mv[row_start8x8][column_start8x8];
2567
2568 // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined
2569 // and simplifies std::fill() for these cases.
2570 if (columns <= 1) {
2571 // Don't change the above condition to (columns == 1).
2572 // Condition (columns <= 1) may help the compiler simplify the inlining
2573 // of the general case of StoreMotionFieldMvs() by eliminating the
2574 // (columns == 0) case.
2575 assert(columns == 1);
2576 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2577 1, reference_frame_row_start, mv);
2578 } else if (columns == 2) {
2579 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2580 2, reference_frame_row_start, mv);
2581 } else if (columns == 4) {
2582 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2583 4, reference_frame_row_start, mv);
2584 } else if (columns == 8) {
2585 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2586 8, reference_frame_row_start, mv);
2587 } else if (columns == 16) {
2588 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2589 16, reference_frame_row_start, mv);
2590 } else if (columns < 16) {
2591 // This always true condition (columns < 16) may help the compiler
2592 // simplify the inlining of the following function.
2593 // This general case is rare and usually only happens to the blocks
2594 // which contain the right boundary of the frame.
2595 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2596 columns, reference_frame_row_start, mv);
2597 } else {
2598 assert(false);
2599 }
2600 return;
2601 }
2602 }
2603 }
2604
2605 } // namespace libgav1
2606