1 /*
2  * Copyright © 2018-2021, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #ifndef DAV1D_SRC_INTERNAL_H
29 #define DAV1D_SRC_INTERNAL_H
30 
31 #include <stdatomic.h>
32 
33 #include "dav1d/data.h"
34 
35 typedef struct Dav1dFrameContext Dav1dFrameContext;
36 typedef struct Dav1dTileState Dav1dTileState;
37 typedef struct Dav1dTileContext Dav1dTileContext;
38 typedef struct Dav1dPostFilterContext Dav1dPostFilterContext;
39 typedef struct Dav1dTask Dav1dTask;
40 
41 #include "common/attributes.h"
42 
43 #include "src/cdef.h"
44 #include "src/cdf.h"
45 #include "src/data.h"
46 #include "src/env.h"
47 #include "src/film_grain.h"
48 #include "src/intra_edge.h"
49 #include "src/ipred.h"
50 #include "src/itx.h"
51 #include "src/levels.h"
52 #include "src/lf_mask.h"
53 #include "src/loopfilter.h"
54 #include "src/looprestoration.h"
55 #include "src/mc.h"
56 #include "src/msac.h"
57 #include "src/picture.h"
58 #include "src/recon.h"
59 #include "src/refmvs.h"
60 #include "src/thread.h"
61 
62 typedef struct Dav1dDSPContext {
63     Dav1dFilmGrainDSPContext fg;
64     Dav1dIntraPredDSPContext ipred;
65     Dav1dMCDSPContext mc;
66     Dav1dInvTxfmDSPContext itx;
67     Dav1dLoopFilterDSPContext lf;
68     Dav1dCdefDSPContext cdef;
69     Dav1dLoopRestorationDSPContext lr;
70 } Dav1dDSPContext;
71 
72 struct Dav1dTileGroup {
73     Dav1dData data;
74     int start, end;
75 };
76 
77 struct Dav1dContext {
78     Dav1dFrameContext *fc;
79     unsigned n_fc;
80 
81     Dav1dPostFilterContext *pfc;
82     unsigned n_pfc;
83 
84     // cache of OBUs that make up a single frame before we submit them
85     // to a frame worker to be decoded
86     struct Dav1dTileGroup *tile;
87     int n_tile_data_alloc;
88     int n_tile_data;
89     int n_tiles;
90     Dav1dMemPool *seq_hdr_pool;
91     Dav1dRef *seq_hdr_ref;
92     Dav1dSequenceHeader *seq_hdr;
93     Dav1dMemPool *frame_hdr_pool;
94     Dav1dRef *frame_hdr_ref;
95     Dav1dFrameHeader *frame_hdr;
96 
97     Dav1dRef *content_light_ref;
98     Dav1dContentLightLevel *content_light;
99     Dav1dRef *mastering_display_ref;
100     Dav1dMasteringDisplay *mastering_display;
101     Dav1dRef *itut_t35_ref;
102     Dav1dITUTT35 *itut_t35;
103 
104     // decoded output picture queue
105     Dav1dData in;
106     Dav1dPicture out;
107     // dummy is a pointer to prevent compiler errors about atomic_load()
108     // not taking const arguments
109     atomic_int flush_mem, *flush;
110     struct {
111         Dav1dThreadPicture *out_delayed;
112         unsigned next;
113     } frame_thread;
114 
115     // postfilter threading (refer to pfc[] for per_thread thingies)
116     struct PostFilterThreadData {
117         pthread_mutex_t lock;
118         pthread_cond_t cond;
119         struct Dav1dTask *tasks;
120         int frame_cnt;
121         int inited;
122     } postfilter_thread;
123 
124     // reference/entropy state
125     Dav1dMemPool *segmap_pool;
126     Dav1dMemPool *refmvs_pool;
127     struct {
128         Dav1dThreadPicture p;
129         Dav1dRef *segmap;
130         Dav1dRef *refmvs;
131         unsigned refpoc[7];
132     } refs[8];
133     Dav1dMemPool *cdf_pool;
134     CdfThreadContext cdf[8];
135 
136     Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];
137 
138     // tree to keep track of which edges are available
139     struct {
140         EdgeNode *root[2 /* BL_128X128 vs. BL_64X64 */];
141         EdgeBranch branch_sb128[1 + 4 + 16 + 64];
142         EdgeBranch branch_sb64[1 + 4 + 16];
143         EdgeTip tip_sb128[256];
144         EdgeTip tip_sb64[64];
145     } intra_edge;
146 
147     Dav1dPicAllocator allocator;
148     int apply_grain;
149     int operating_point;
150     unsigned operating_point_idc;
151     int all_layers;
152     unsigned frame_size_limit;
153     int drain;
154     enum PictureFlags frame_flags;
155     enum Dav1dEventFlags event_flags;
156 
157     Dav1dLogger logger;
158 
159     Dav1dMemPool *picture_pool;
160 };
161 
162 struct Dav1dFrameContext {
163     Dav1dRef *seq_hdr_ref;
164     Dav1dSequenceHeader *seq_hdr;
165     Dav1dRef *frame_hdr_ref;
166     Dav1dFrameHeader *frame_hdr;
167     Dav1dThreadPicture refp[7];
168     Dav1dPicture cur; // during block coding / reconstruction
169     Dav1dThreadPicture sr_cur; // after super-resolution upscaling
170     Dav1dRef *mvs_ref;
171     refmvs_temporal_block *mvs, *ref_mvs[7];
172     Dav1dRef *ref_mvs_ref[7];
173     Dav1dRef *cur_segmap_ref, *prev_segmap_ref;
174     uint8_t *cur_segmap;
175     const uint8_t *prev_segmap;
176     unsigned refpoc[7], refrefpoc[7][7];
177     uint8_t gmv_warp_allowed[7];
178     CdfThreadContext in_cdf, out_cdf;
179     struct Dav1dTileGroup *tile;
180     int n_tile_data_alloc;
181     int n_tile_data;
182 
183     // for scalable references
184     struct ScalableMotionParams {
185         int scale; // if no scaling, this is 0
186         int step;
187     } svc[7][2 /* x, y */];
188     int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];
189 
190     const Dav1dContext *c;
191     Dav1dTileContext *tc;
192     int n_tc;
193     Dav1dTileState *ts;
194     int n_ts;
195     const Dav1dDSPContext *dsp;
196     struct {
197         recon_b_intra_fn recon_b_intra;
198         recon_b_inter_fn recon_b_inter;
199         filter_sbrow_fn filter_sbrow;
200         filter_sbrow_fn filter_sbrow_deblock;
201         filter_sbrow_fn filter_sbrow_cdef;
202         filter_sbrow_fn filter_sbrow_resize;
203         filter_sbrow_fn filter_sbrow_lr;
204         backup_ipred_edge_fn backup_ipred_edge;
205         read_coef_blocks_fn read_coef_blocks;
206     } bd_fn;
207 
208     int ipred_edge_sz;
209     pixel *ipred_edge[3];
210     ptrdiff_t b4_stride;
211     int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;
212     uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
213     const uint8_t *qm[N_RECT_TX_SIZES][3 /* plane */];
214     BlockContext *a;
215     int a_sz /* w*tile_rows */;
216     refmvs_frame rf;
217     uint8_t jnt_weights[7][7];
218     int bitdepth_max;
219 
220     struct {
221         struct thread_data td;
222         int pass, die;
223         // indexed using t->by * f->b4_stride + t->bx
224         Av1Block *b;
225         struct CodedBlockInfo {
226             int16_t eob[3 /* plane */];
227             uint8_t txtp[3 /* plane */];
228         } *cbi;
229         // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1)
230         uint16_t (*pal)[3 /* plane */][8 /* idx */];
231         // iterated over inside tile state
232         uint8_t *pal_idx;
233         coef *cf;
234         int pal_sz, pal_idx_sz, cf_sz;
235         // start offsets per tile
236         int *tile_start_off;
237     } frame_thread;
238 
239     // loopfilter
240     struct {
241         uint8_t (*level)[4];
242         Av1Filter *mask;
243         Av1Restoration *lr_mask;
244         int top_pre_cdef_toggle;
245         int mask_sz /* w*h */, lr_mask_sz, cdef_line_sz[2] /* stride */;
246         int lr_line_sz, re_sz /* h */;
247         ALIGN(Av1FilterLUT lim_lut, 16);
248         int last_sharpness;
249         uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
250         uint8_t *tx_lpf_right_edge[2];
251         uint8_t *cdef_line_buf;
252         pixel *cdef_line[2 /* pre, post */][3 /* plane */];
253         pixel *lr_lpf_line[3 /* plane */];
254 
255         // in-loop filter per-frame state keeping
256         int tile_row; // for carry-over at tile row edges
257         pixel *p[3], *sr_p[3];
258         Av1Filter *mask_ptr, *prev_mask_ptr;
259         int restore_planes; // enum LrRestorePlanes
260 
261         struct {
262             pthread_cond_t cond;
263             struct PostFilterThreadData *pftd;
264             struct Dav1dTask *tasks;
265             int num_tasks;
266             int npf;
267             int done;
268             int inited;
269         } thread;
270     } lf;
271 
272     // threading (refer to tc[] for per-thread things)
273     struct FrameTileThreadData {
274         uint64_t available;
275         pthread_mutex_t lock;
276         pthread_cond_t cond, icond;
277         int tasks_left, num_tasks;
278         int (*task_idx_to_sby_and_tile_idx)[2];
279         int titsati_sz, titsati_init[2];
280         uint16_t titsati_index_rows[1 + DAV1D_MAX_TILE_ROWS];
281         int inited;
282     } tile_thread;
283 };
284 
285 struct Dav1dTileState {
286     CdfContext cdf;
287     MsacContext msac;
288 
289     struct {
290         int col_start, col_end, row_start, row_end; // in 4px units
291         int col, row; // in tile units
292     } tiling;
293 
294     atomic_int progress; // in sby units, TILE_ERROR after a decoding error
295     struct {
296         pthread_mutex_t lock;
297         pthread_cond_t cond;
298     } tile_thread;
299     struct {
300         uint8_t *pal_idx;
301         coef *cf;
302     } frame_thread;
303 
304     uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
305     const uint16_t (*dq)[3][2];
306     int last_qidx;
307 
308     int8_t last_delta_lf[4];
309     uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
310     const uint8_t (*lflvl)[4][8][2];
311 
312     Av1RestorationUnit *lr_ref[3];
313 };
314 
315 struct Dav1dTileContext {
316     const Dav1dFrameContext *f;
317     Dav1dTileState *ts;
318     int bx, by;
319     BlockContext l, *a;
320     ALIGN(union, 32) {
321         int16_t cf_8bpc [32 * 32];
322         int32_t cf_16bpc[32 * 32];
323     };
324     // FIXME types can be changed to pixel (and dynamically allocated)
325     // which would make copy/assign operations slightly faster?
326     uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
327     uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */];
328     uint8_t txtp_map[32 * 32]; // inter-only
329     refmvs_tile rt;
330     ALIGN(union, 64) {
331         struct {
332             union {
333                 uint8_t  lap_8bpc [128 * 32];
334                 uint16_t lap_16bpc[128 * 32];
335                 struct {
336                     int16_t compinter[2][128 * 128];
337                     uint8_t seg_mask[128 * 128];
338                 };
339             };
340             union {
341                 // stride=192 for non-SVC, or 320 for SVC
342                 uint8_t  emu_edge_8bpc [320 * (256 + 7)];
343                 uint16_t emu_edge_16bpc[320 * (256 + 7)];
344             };
345         };
346         struct {
347             union {
348                 uint8_t levels[32 * 34];
349                 struct {
350                     uint8_t pal_order[64][8];
351                     uint8_t pal_ctx[64];
352                 };
353             };
354             int16_t ac[32 * 32];
355             uint8_t pal_idx[2 * 64 * 64];
356             uint16_t pal[3 /* plane */][8 /* palette_idx */];
357             ALIGN(union, 32) {
358                 struct {
359                     uint8_t interintra_8bpc[64 * 64];
360                     uint8_t edge_8bpc[257];
361                 };
362                 struct {
363                     uint16_t interintra_16bpc[64 * 64];
364                     uint16_t edge_16bpc[257];
365                 };
366             };
367         };
368     } scratch;
369 
370     Dav1dWarpedMotionParams warpmv;
371     Av1Filter *lf_mask;
372     int8_t *cur_sb_cdef_idx_ptr;
373     // for chroma sub8x8, we need to know the filter for all 4 subblocks in
374     // a 4x4 area, but the top/left one can go out of cache already, so this
375     // keeps it accessible
376     enum Filter2d tl_4x4_filter;
377 
378     struct {
379         struct thread_data td;
380         struct FrameTileThreadData *fttd;
381         int die;
382     } tile_thread;
383 };
384 
385 struct Dav1dPostFilterContext {
386     Dav1dContext *c;
387     struct thread_data td;
388     int flushed;
389     int die;
390 };
391 
392 #endif /* DAV1D_SRC_INTERNAL_H */
393