1 /* 2 * Copyright © 2018-2021, VideoLAN and dav1d authors 3 * Copyright © 2018, Two Orioles, LLC 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifndef DAV1D_SRC_INTERNAL_H 29 #define DAV1D_SRC_INTERNAL_H 30 31 #include <stdatomic.h> 32 33 #include "dav1d/data.h" 34 35 typedef struct Dav1dFrameContext Dav1dFrameContext; 36 typedef struct Dav1dTileState Dav1dTileState; 37 typedef struct Dav1dTaskContext Dav1dTaskContext; 38 typedef struct Dav1dTask Dav1dTask; 39 40 #include "common/attributes.h" 41 42 #include "src/cdef.h" 43 #include "src/cdf.h" 44 #include "src/data.h" 45 #include "src/env.h" 46 #include "src/filmgrain.h" 47 #include "src/intra_edge.h" 48 #include "src/ipred.h" 49 #include "src/itx.h" 50 #include "src/levels.h" 51 #include "src/lf_mask.h" 52 #include "src/loopfilter.h" 53 #include "src/looprestoration.h" 54 #include "src/mc.h" 55 #include "src/msac.h" 56 #include "src/picture.h" 57 #include "src/recon.h" 58 #include "src/refmvs.h" 59 #include "src/thread.h" 60 61 typedef struct Dav1dDSPContext { 62 Dav1dFilmGrainDSPContext fg; 63 Dav1dIntraPredDSPContext ipred; 64 Dav1dMCDSPContext mc; 65 Dav1dInvTxfmDSPContext itx; 66 Dav1dLoopFilterDSPContext lf; 67 Dav1dCdefDSPContext cdef; 68 Dav1dLoopRestorationDSPContext lr; 69 } Dav1dDSPContext; 70 71 struct Dav1dTileGroup { 72 Dav1dData data; 73 int start, end; 74 }; 75 76 enum TaskType { 77 DAV1D_TASK_TYPE_INIT, 78 DAV1D_TASK_TYPE_INIT_CDF, 79 DAV1D_TASK_TYPE_TILE_ENTROPY, 80 DAV1D_TASK_TYPE_ENTROPY_PROGRESS, 81 DAV1D_TASK_TYPE_TILE_RECONSTRUCTION, 82 DAV1D_TASK_TYPE_DEBLOCK_COLS, 83 DAV1D_TASK_TYPE_DEBLOCK_ROWS, 84 DAV1D_TASK_TYPE_CDEF, 85 DAV1D_TASK_TYPE_SUPER_RESOLUTION, 86 DAV1D_TASK_TYPE_LOOP_RESTORATION, 87 DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS, 88 DAV1D_TASK_TYPE_FG_PREP, 89 DAV1D_TASK_TYPE_FG_APPLY, 90 }; 91 92 struct Dav1dContext { 93 Dav1dFrameContext *fc; 94 unsigned n_fc; 95 96 Dav1dTaskContext *tc; 97 unsigned n_tc; 98 99 // cache of OBUs that make up a single frame before we submit them 100 // to a frame worker to be decoded 101 struct Dav1dTileGroup *tile; 102 int n_tile_data_alloc; 103 int n_tile_data; 104 int n_tiles; 105 Dav1dMemPool *seq_hdr_pool; 106 Dav1dRef *seq_hdr_ref; 107 Dav1dSequenceHeader *seq_hdr; 108 Dav1dMemPool *frame_hdr_pool; 109 Dav1dRef *frame_hdr_ref; 110 Dav1dFrameHeader *frame_hdr; 111 112 Dav1dRef *content_light_ref; 113 Dav1dContentLightLevel *content_light; 114 Dav1dRef *mastering_display_ref; 115 Dav1dMasteringDisplay *mastering_display; 116 Dav1dRef *itut_t35_ref; 117 Dav1dITUTT35 *itut_t35; 118 119 // decoded output picture queue 120 Dav1dData in; 121 Dav1dThreadPicture out, cache; 122 // dummy is a pointer to prevent compiler errors about atomic_load() 123 // not taking const arguments 124 atomic_int flush_mem, *flush; 125 struct { 126 Dav1dThreadPicture *out_delayed; 127 unsigned next; 128 } frame_thread; 129 130 // task threading (refer to tc[] for per_thread thingies) 131 struct TaskThreadData { 132 pthread_mutex_t lock; 133 pthread_cond_t cond; 134 atomic_uint first; 135 unsigned cur; 136 // This is used for delayed reset of the task cur pointer when 137 // such operation is needed but the thread doesn't enter a critical 138 // section (typically when executing the next sbrow task locklessly). 139 // See src/thread_task.c:reset_task_cur(). 140 atomic_uint reset_task_cur; 141 atomic_int cond_signaled; 142 struct { 143 int exec; 144 pthread_cond_t cond; 145 const Dav1dPicture *in; 146 Dav1dPicture *out; 147 enum TaskType type; 148 atomic_int progress[2]; /* [0]=started, [1]=completed */ 149 union { 150 struct { 151 ALIGN(int8_t grain_lut_8bpc[3][GRAIN_HEIGHT + 1][GRAIN_WIDTH], 16); 152 ALIGN(uint8_t scaling_8bpc[3][256], 64); 153 }; 154 struct { 155 ALIGN(int16_t grain_lut_16bpc[3][GRAIN_HEIGHT + 1][GRAIN_WIDTH], 16); 156 ALIGN(uint8_t scaling_16bpc[3][4096], 64); 157 }; 158 }; 159 } delayed_fg; 160 int inited; 161 } task_thread; 162 163 // reference/entropy state 164 Dav1dMemPool *segmap_pool; 165 Dav1dMemPool *refmvs_pool; 166 struct { 167 Dav1dThreadPicture p; 168 Dav1dRef *segmap; 169 Dav1dRef *refmvs; 170 unsigned refpoc[7]; 171 } refs[8]; 172 Dav1dMemPool *cdf_pool; 173 CdfThreadContext cdf[8]; 174 175 Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */]; 176 Dav1dRefmvsDSPContext refmvs_dsp; 177 178 // tree to keep track of which edges are available 179 struct { 180 EdgeNode *root[2 /* BL_128X128 vs. BL_64X64 */]; 181 EdgeBranch branch_sb128[1 + 4 + 16 + 64]; 182 EdgeBranch branch_sb64[1 + 4 + 16]; 183 EdgeTip tip_sb128[256]; 184 EdgeTip tip_sb64[64]; 185 } intra_edge; 186 187 Dav1dPicAllocator allocator; 188 int apply_grain; 189 int operating_point; 190 unsigned operating_point_idc; 191 int all_layers; 192 int max_spatial_id; 193 unsigned frame_size_limit; 194 int strict_std_compliance; 195 int output_invisible_frames; 196 enum Dav1dInloopFilterType inloop_filters; 197 int drain; 198 enum PictureFlags frame_flags; 199 enum Dav1dEventFlags event_flags; 200 Dav1dDataProps cached_error_props; 201 int cached_error; 202 203 Dav1dLogger logger; 204 205 Dav1dMemPool *picture_pool; 206 }; 207 208 struct Dav1dTask { 209 unsigned frame_idx; // frame thread id 210 enum TaskType type; // task work 211 int sby; // sbrow 212 213 // task dependencies 214 int recon_progress, deblock_progress; 215 int deps_skip; 216 struct Dav1dTask *next; // only used in task queue 217 }; 218 219 struct Dav1dFrameContext { 220 Dav1dRef *seq_hdr_ref; 221 Dav1dSequenceHeader *seq_hdr; 222 Dav1dRef *frame_hdr_ref; 223 Dav1dFrameHeader *frame_hdr; 224 Dav1dThreadPicture refp[7]; 225 Dav1dPicture cur; // during block coding / reconstruction 226 Dav1dThreadPicture sr_cur; // after super-resolution upscaling 227 Dav1dRef *mvs_ref; 228 refmvs_temporal_block *mvs, *ref_mvs[7]; 229 Dav1dRef *ref_mvs_ref[7]; 230 Dav1dRef *cur_segmap_ref, *prev_segmap_ref; 231 uint8_t *cur_segmap; 232 const uint8_t *prev_segmap; 233 unsigned refpoc[7], refrefpoc[7][7]; 234 uint8_t gmv_warp_allowed[7]; 235 CdfThreadContext in_cdf, out_cdf; 236 struct Dav1dTileGroup *tile; 237 int n_tile_data_alloc; 238 int n_tile_data; 239 240 // for scalable references 241 struct ScalableMotionParams { 242 int scale; // if no scaling, this is 0 243 int step; 244 } svc[7][2 /* x, y */]; 245 int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */]; 246 247 const Dav1dContext *c; 248 Dav1dTileState *ts; 249 int n_ts; 250 const Dav1dDSPContext *dsp; 251 struct { 252 recon_b_intra_fn recon_b_intra; 253 recon_b_inter_fn recon_b_inter; 254 filter_sbrow_fn filter_sbrow; 255 filter_sbrow_fn filter_sbrow_deblock_cols; 256 filter_sbrow_fn filter_sbrow_deblock_rows; 257 void (*filter_sbrow_cdef)(Dav1dTaskContext *tc, int sby); 258 filter_sbrow_fn filter_sbrow_resize; 259 filter_sbrow_fn filter_sbrow_lr; 260 backup_ipred_edge_fn backup_ipred_edge; 261 read_coef_blocks_fn read_coef_blocks; 262 } bd_fn; 263 264 int ipred_edge_sz; 265 pixel *ipred_edge[3]; 266 ptrdiff_t b4_stride; 267 int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w; 268 uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */]; 269 const uint8_t *qm[N_RECT_TX_SIZES][3 /* plane */]; 270 BlockContext *a; 271 int a_sz /* w*tile_rows */; 272 refmvs_frame rf; 273 uint8_t jnt_weights[7][7]; 274 int bitdepth_max; 275 276 struct { 277 int next_tile_row[2 /* 0: reconstruction, 1: entropy */]; 278 int entropy_progress; 279 atomic_int deblock_progress; // in sby units 280 atomic_uint *frame_progress, *copy_lpf_progress; 281 // indexed using t->by * f->b4_stride + t->bx 282 Av1Block *b; 283 struct CodedBlockInfo { 284 int16_t eob[3 /* plane */]; 285 uint8_t txtp[3 /* plane */]; 286 } *cbi; 287 // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1) 288 uint16_t (*pal)[3 /* plane */][8 /* idx */]; 289 // iterated over inside tile state 290 uint8_t *pal_idx; 291 coef *cf; 292 int prog_sz; 293 int pal_sz, pal_idx_sz, cf_sz; 294 // start offsets per tile 295 int *tile_start_off; 296 } frame_thread; 297 298 // loopfilter 299 struct { 300 uint8_t (*level)[4]; 301 Av1Filter *mask; 302 Av1Restoration *lr_mask; 303 int mask_sz /* w*h */, lr_mask_sz; 304 int cdef_buf_plane_sz[2]; /* stride*sbh*4 */ 305 int cdef_buf_sbh; 306 int lr_buf_plane_sz[2]; /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */ 307 int re_sz /* h */; 308 ALIGN(Av1FilterLUT lim_lut, 16); 309 int last_sharpness; 310 uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */]; 311 uint8_t *tx_lpf_right_edge[2]; 312 uint8_t *cdef_line_buf, *lr_line_buf; 313 pixel *cdef_line[2 /* pre, post */][3 /* plane */]; 314 pixel *cdef_lpf_line[3 /* plane */]; 315 pixel *lr_lpf_line[3 /* plane */]; 316 317 // in-loop filter per-frame state keeping 318 uint8_t *start_of_tile_row; 319 int start_of_tile_row_sz; 320 int need_cdef_lpf_copy; 321 pixel *p[3], *sr_p[3]; 322 Av1Filter *mask_ptr, *prev_mask_ptr; 323 int restore_planes; // enum LrRestorePlanes 324 } lf; 325 326 struct { 327 pthread_cond_t cond; 328 struct TaskThreadData *ttd; 329 struct Dav1dTask *tasks, *tile_tasks[2], init_task; 330 int num_tasks, num_tile_tasks; 331 int init_done; 332 int done[2]; 333 int retval; 334 int update_set; // whether we need to update CDF reference 335 atomic_int error; 336 int task_counter; 337 struct Dav1dTask *task_head, *task_tail; 338 // Points to the task directly before the cur pointer in the queue. 339 // This cur pointer is theoretical here, we actually keep track of the 340 // "prev_t" variable. This is needed to not loose the tasks in 341 // [head;cur-1] when picking one for execution. 342 struct Dav1dTask *task_cur_prev; 343 } task_thread; 344 345 // threading (refer to tc[] for per-thread things) 346 struct FrameTileThreadData { 347 int (*lowest_pixel_mem)[7][2]; 348 int lowest_pixel_mem_sz; 349 } tile_thread; 350 }; 351 352 struct Dav1dTileState { 353 CdfContext cdf; 354 MsacContext msac; 355 356 struct { 357 int col_start, col_end, row_start, row_end; // in 4px units 358 int col, row; // in tile units 359 } tiling; 360 361 // in sby units, TILE_ERROR after a decoding error 362 atomic_int progress[2 /* 0: reconstruction, 1: entropy */]; 363 struct { 364 uint8_t *pal_idx; 365 coef *cf; 366 } frame_thread[2 /* 0: reconstruction, 1: entropy */]; 367 368 // in fullpel units, [0] = Y, [1] = UV, used for progress requirements 369 // each entry is one tile-sbrow; middle index is refidx 370 int (*lowest_pixel)[7][2]; 371 372 uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */]; 373 const uint16_t (*dq)[3][2]; 374 int last_qidx; 375 376 int8_t last_delta_lf[4]; 377 uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */]; 378 const uint8_t (*lflvl)[4][8][2]; 379 380 Av1RestorationUnit *lr_ref[3]; 381 }; 382 383 struct Dav1dTaskContext { 384 const Dav1dContext *c; 385 const Dav1dFrameContext *f; 386 Dav1dTileState *ts; 387 int bx, by; 388 BlockContext l, *a; 389 refmvs_tile rt; 390 ALIGN(union, 64) { 391 int16_t cf_8bpc [32 * 32]; 392 int32_t cf_16bpc[32 * 32]; 393 }; 394 // FIXME types can be changed to pixel (and dynamically allocated) 395 // which would make copy/assign operations slightly faster? 396 uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */]; 397 uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */]; 398 uint8_t txtp_map[32 * 32]; // inter-only 399 ALIGN(union, 64) { 400 struct { 401 union { 402 uint8_t lap_8bpc [128 * 32]; 403 uint16_t lap_16bpc[128 * 32]; 404 struct { 405 int16_t compinter[2][128 * 128]; 406 uint8_t seg_mask[128 * 128]; 407 }; 408 }; 409 union { 410 // stride=192 for non-SVC, or 320 for SVC 411 uint8_t emu_edge_8bpc [320 * (256 + 7)]; 412 uint16_t emu_edge_16bpc[320 * (256 + 7)]; 413 }; 414 }; 415 struct { 416 union { 417 uint8_t levels[32 * 34]; 418 struct { 419 uint8_t pal_order[64][8]; 420 uint8_t pal_ctx[64]; 421 }; 422 }; 423 int16_t ac[32 * 32]; 424 uint8_t pal_idx[2 * 64 * 64]; 425 uint16_t pal[3 /* plane */][8 /* palette_idx */]; 426 ALIGN(union, 64) { 427 struct { 428 uint8_t interintra_8bpc[64 * 64]; 429 uint8_t edge_8bpc[257]; 430 }; 431 struct { 432 uint16_t interintra_16bpc[64 * 64]; 433 uint16_t edge_16bpc[257]; 434 }; 435 }; 436 }; 437 } scratch; 438 439 Dav1dWarpedMotionParams warpmv; 440 Av1Filter *lf_mask; 441 int top_pre_cdef_toggle; 442 int8_t *cur_sb_cdef_idx_ptr; 443 // for chroma sub8x8, we need to know the filter for all 4 subblocks in 444 // a 4x4 area, but the top/left one can go out of cache already, so this 445 // keeps it accessible 446 enum Filter2d tl_4x4_filter; 447 448 struct { 449 int pass; 450 } frame_thread; 451 struct { 452 struct thread_data td; 453 struct TaskThreadData *ttd; 454 struct FrameTileThreadData *fttd; 455 int flushed; 456 int die; 457 } task_thread; 458 }; 459 460 #endif /* DAV1D_SRC_INTERNAL_H */ 461