1 /*
2  * Copyright © 2018-2021, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <errno.h>
31 #include <limits.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <inttypes.h>
35 
36 #include "dav1d/data.h"
37 
38 #include "common/frame.h"
39 #include "common/intops.h"
40 
41 #include "src/ctx.h"
42 #include "src/decode.h"
43 #include "src/dequant_tables.h"
44 #include "src/env.h"
45 #include "src/film_grain.h"
46 #include "src/log.h"
47 #include "src/qm.h"
48 #include "src/recon.h"
49 #include "src/ref.h"
50 #include "src/tables.h"
51 #include "src/thread_task.h"
52 #include "src/warpmv.h"
53 
init_quant_tables(const Dav1dSequenceHeader * const seq_hdr,const Dav1dFrameHeader * const frame_hdr,const int qidx,uint16_t (* dq)[3][2])54 static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
55                               const Dav1dFrameHeader *const frame_hdr,
56                               const int qidx, uint16_t (*dq)[3][2])
57 {
58     for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
59         const int yac = frame_hdr->segmentation.enabled ?
60             iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
61         const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
62         const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
63         const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
64         const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
65         const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
66 
67         dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
68         dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
69         dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
70         dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
71         dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
72         dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
73     }
74 }
75 
read_mv_component_diff(Dav1dTileContext * const t,CdfMvComponent * const mv_comp,const int have_fp)76 static int read_mv_component_diff(Dav1dTileContext *const t,
77                                   CdfMvComponent *const mv_comp,
78                                   const int have_fp)
79 {
80     Dav1dTileState *const ts = t->ts;
81     const Dav1dFrameContext *const f = t->f;
82     const int have_hp = f->frame_hdr->hp;
83     const int sign = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->sign);
84     const int cl = dav1d_msac_decode_symbol_adapt16(&ts->msac,
85                                                     mv_comp->classes, 10);
86     int up, fp, hp;
87 
88     if (!cl) {
89         up = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->class0);
90         if (have_fp) {
91             fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
92                                                  mv_comp->class0_fp[up], 3);
93             hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
94                                                         mv_comp->class0_hp) : 1;
95         } else {
96             fp = 3;
97             hp = 1;
98         }
99     } else {
100         up = 1 << cl;
101         for (int n = 0; n < cl; n++)
102             up |= dav1d_msac_decode_bool_adapt(&ts->msac,
103                                                mv_comp->classN[n]) << n;
104         if (have_fp) {
105             fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
106                                                  mv_comp->classN_fp, 3);
107             hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
108                                                         mv_comp->classN_hp) : 1;
109         } else {
110             fp = 3;
111             hp = 1;
112         }
113     }
114 
115     const int diff = ((up << 3) | (fp << 1) | hp) + 1;
116 
117     return sign ? -diff : diff;
118 }
119 
read_mv_residual(Dav1dTileContext * const t,mv * const ref_mv,CdfMvContext * const mv_cdf,const int have_fp)120 static void read_mv_residual(Dav1dTileContext *const t, mv *const ref_mv,
121                              CdfMvContext *const mv_cdf, const int have_fp)
122 {
123     switch (dav1d_msac_decode_symbol_adapt4(&t->ts->msac, t->ts->cdf.mv.joint,
124                                             N_MV_JOINTS - 1))
125     {
126     case MV_JOINT_HV:
127         ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
128         ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
129         break;
130     case MV_JOINT_H:
131         ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
132         break;
133     case MV_JOINT_V:
134         ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
135         break;
136     default:
137         break;
138     }
139 }
140 
read_tx_tree(Dav1dTileContext * const t,const enum RectTxfmSize from,const int depth,uint16_t * const masks,const int x_off,const int y_off)141 static void read_tx_tree(Dav1dTileContext *const t,
142                          const enum RectTxfmSize from,
143                          const int depth, uint16_t *const masks,
144                          const int x_off, const int y_off)
145 {
146     const Dav1dFrameContext *const f = t->f;
147     const int bx4 = t->bx & 31, by4 = t->by & 31;
148     const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
149     const int txw = t_dim->lw, txh = t_dim->lh;
150     int is_split;
151 
152     if (depth < 2 && from > (int) TX_4X4) {
153         const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
154         const int a = t->a->tx[bx4] < txw;
155         const int l = t->l.tx[by4] < txh;
156 
157         is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
158                        t->ts->cdf.m.txpart[cat][a + l]);
159         if (is_split)
160             masks[depth] |= 1 << (y_off * 4 + x_off);
161     } else {
162         is_split = 0;
163     }
164 
165     if (is_split && t_dim->max > TX_8X8) {
166         const enum RectTxfmSize sub = t_dim->sub;
167         const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
168         const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
169 
170         read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
171         t->bx += txsw;
172         if (txw >= txh && t->bx < f->bw)
173             read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
174         t->bx -= txsw;
175         t->by += txsh;
176         if (txh >= txw && t->by < f->bh) {
177             read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
178             t->bx += txsw;
179             if (txw >= txh && t->bx < f->bw)
180                 read_tx_tree(t, sub, depth + 1, masks,
181                              x_off * 2 + 1, y_off * 2 + 1);
182             t->bx -= txsw;
183         }
184         t->by -= txsh;
185     } else {
186 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
187         rep_macro(type, t->dir tx, off, is_split ? TX_4X4 : mul * txh)
188         case_set_upto16(t_dim->h, l., 1, by4);
189 #undef set_ctx
190 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
191         rep_macro(type, t->dir tx, off, is_split ? TX_4X4 : mul * txw)
192         case_set_upto16(t_dim->w, a->, 0, bx4);
193 #undef set_ctx
194     }
195 }
196 
neg_deinterleave(int diff,int ref,int max)197 static int neg_deinterleave(int diff, int ref, int max) {
198     if (!ref) return diff;
199     if (ref >= (max - 1)) return max - diff - 1;
200     if (2 * ref < max) {
201         if (diff <= 2 * ref) {
202             if (diff & 1)
203                 return ref + ((diff + 1) >> 1);
204             else
205                 return ref - (diff >> 1);
206         }
207         return diff;
208     } else {
209         if (diff <= 2 * (max - ref - 1)) {
210             if (diff & 1)
211                 return ref + ((diff + 1) >> 1);
212             else
213                 return ref - (diff >> 1);
214         }
215         return max - (diff + 1);
216     }
217 }
218 
find_matching_ref(const Dav1dTileContext * const t,const enum EdgeFlags intra_edge_flags,const int bw4,const int bh4,const int w4,const int h4,const int have_left,const int have_top,const int ref,uint64_t masks[2])219 static void find_matching_ref(const Dav1dTileContext *const t,
220                               const enum EdgeFlags intra_edge_flags,
221                               const int bw4, const int bh4,
222                               const int w4, const int h4,
223                               const int have_left, const int have_top,
224                               const int ref, uint64_t masks[2])
225 {
226     /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
227     int count = 0;
228     int have_topleft = have_top && have_left;
229     int have_topright = imax(bw4, bh4) < 32 &&
230                         have_top && t->bx + bw4 < t->ts->tiling.col_end &&
231                         (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
232 
233 #define bs(rp) dav1d_block_dimensions[(rp)->bs]
234 #define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
235 
236     if (have_top) {
237         const refmvs_block *r2 = &r[-1][t->bx];
238         if (matches(r2)) {
239             masks[0] |= 1;
240             count = 1;
241         }
242         int aw4 = bs(r2)[0];
243         if (aw4 >= bw4) {
244             const int off = t->bx & (aw4 - 1);
245             if (off) have_topleft = 0;
246             if (aw4 - off > bw4) have_topright = 0;
247         } else {
248             unsigned mask = 1 << aw4;
249             for (int x = aw4; x < w4; x += aw4) {
250                 r2 += aw4;
251                 if (matches(r2)) {
252                     masks[0] |= mask;
253                     if (++count >= 8) return;
254                 }
255                 aw4 = bs(r2)[0];
256                 mask <<= aw4;
257             }
258         }
259     }
260     if (have_left) {
261         /*const*/ refmvs_block *const *r2 = r;
262         if (matches(&r2[0][t->bx - 1])) {
263             masks[1] |= 1;
264             if (++count >= 8) return;
265         }
266         int lh4 = bs(&r2[0][t->bx - 1])[1];
267         if (lh4 >= bh4) {
268             if (t->by & (lh4 - 1)) have_topleft = 0;
269         } else {
270             unsigned mask = 1 << lh4;
271             for (int y = lh4; y < h4; y += lh4) {
272                 r2 += lh4;
273                 if (matches(&r2[0][t->bx - 1])) {
274                     masks[1] |= mask;
275                     if (++count >= 8) return;
276                 }
277                 lh4 = bs(&r2[0][t->bx - 1])[1];
278                 mask <<= lh4;
279             }
280         }
281     }
282     if (have_topleft && matches(&r[-1][t->bx - 1])) {
283         masks[1] |= 1ULL << 32;
284         if (++count >= 8) return;
285     }
286     if (have_topright && matches(&r[-1][t->bx + bw4])) {
287         masks[0] |= 1ULL << 32;
288     }
289 #undef matches
290 }
291 
derive_warpmv(const Dav1dTileContext * const t,const int bw4,const int bh4,const uint64_t masks[2],const union mv mv,Dav1dWarpedMotionParams * const wmp)292 static void derive_warpmv(const Dav1dTileContext *const t,
293                           const int bw4, const int bh4,
294                           const uint64_t masks[2], const union mv mv,
295                           Dav1dWarpedMotionParams *const wmp)
296 {
297     int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
298     /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
299 
300 #define add_sample(dx, dy, sx, sy, rp) do { \
301     pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
302     pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
303     pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
304     pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
305     np++; \
306 } while (0)
307 
308     // use masks[] to find the projectable motion vectors in the edges
309     if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
310         const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
311         add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
312     } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
313         const int tz = ctz(xmask);
314         off += tz;
315         xmask >>= tz;
316         add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
317         xmask &= ~1;
318     }
319     if (np < 8 && masks[1] == 1) {
320         const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
321         add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
322     } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
323         const int tz = ctz(ymask);
324         off += tz;
325         ymask >>= tz;
326         add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
327         ymask &= ~1;
328     }
329     if (np < 8 && masks[1] >> 32) // top/left
330         add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
331     if (np < 8 && masks[0] >> 32) // top/right
332         add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
333     assert(np > 0 && np <= 8);
334 #undef bs
335 
336     // select according to motion vector difference against a threshold
337     int mvd[8], ret = 0;
338     const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
339     for (int i = 0; i < np; i++) {
340         mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
341                  abs(pts[i][1][1] - pts[i][0][1] - mv.y);
342         if (mvd[i] > thresh)
343             mvd[i] = -1;
344         else
345             ret++;
346     }
347     if (!ret) {
348         ret = 1;
349     } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
350         while (mvd[i] != -1) i++;
351         while (mvd[j] == -1) j--;
352         assert(i != j);
353         if (i > j) break;
354         // replace the discarded samples;
355         mvd[i] = mvd[j];
356         memcpy(pts[i], pts[j], sizeof(*pts));
357     }
358 
359     if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
360         !dav1d_get_shear_params(wmp))
361     {
362         wmp->type = DAV1D_WM_TYPE_AFFINE;
363     } else
364         wmp->type = DAV1D_WM_TYPE_IDENTITY;
365 }
366 
findoddzero(const uint8_t * buf,int len)367 static inline int findoddzero(const uint8_t *buf, int len) {
368     for (int n = 0; n < len; n++)
369         if (!buf[n * 2]) return 1;
370     return 0;
371 }
372 
read_pal_plane(Dav1dTileContext * const t,Av1Block * const b,const int pl,const int sz_ctx,const int bx4,const int by4)373 static void read_pal_plane(Dav1dTileContext *const t, Av1Block *const b,
374                            const int pl, const int sz_ctx,
375                            const int bx4, const int by4)
376 {
377     Dav1dTileState *const ts = t->ts;
378     const Dav1dFrameContext *const f = t->f;
379     const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
380                                            ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
381     uint16_t cache[16], used_cache[8];
382     int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
383     int n_cache = 0;
384     // don't reuse above palette outside SB64 boundaries
385     int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
386     const uint16_t *l = t->al_pal[1][by4][pl], *a = t->al_pal[0][bx4][pl];
387 
388     // fill/sort cache
389     while (l_cache && a_cache) {
390         if (*l < *a) {
391             if (!n_cache || cache[n_cache - 1] != *l)
392                 cache[n_cache++] = *l;
393             l++;
394             l_cache--;
395         } else {
396             if (*a == *l) {
397                 l++;
398                 l_cache--;
399             }
400             if (!n_cache || cache[n_cache - 1] != *a)
401                 cache[n_cache++] = *a;
402             a++;
403             a_cache--;
404         }
405     }
406     if (l_cache) {
407         do {
408             if (!n_cache || cache[n_cache - 1] != *l)
409                 cache[n_cache++] = *l;
410             l++;
411         } while (--l_cache > 0);
412     } else if (a_cache) {
413         do {
414             if (!n_cache || cache[n_cache - 1] != *a)
415                 cache[n_cache++] = *a;
416             a++;
417         } while (--a_cache > 0);
418     }
419 
420     // find reused cache entries
421     int i = 0;
422     for (int n = 0; n < n_cache && i < pal_sz; n++)
423         if (dav1d_msac_decode_bool_equi(&ts->msac))
424             used_cache[i++] = cache[n];
425     const int n_used_cache = i;
426 
427     // parse new entries
428     uint16_t *const pal = f->frame_thread.pass ?
429         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
430                             ((t->bx >> 1) + (t->by & 1))][pl] : t->scratch.pal[pl];
431     if (i < pal_sz) {
432         int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, f->cur.p.bpc);
433 
434         if (i < pal_sz) {
435             int bits = f->cur.p.bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
436             const int max = (1 << f->cur.p.bpc) - 1;
437 
438             do {
439                 const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
440                 prev = pal[i++] = imin(prev + delta + !pl, max);
441                 if (prev + !pl >= max) {
442                     for (; i < pal_sz; i++)
443                         pal[i] = max;
444                     break;
445                 }
446                 bits = imin(bits, 1 + ulog2(max - prev - !pl));
447             } while (i < pal_sz);
448         }
449 
450         // merge cache+new entries
451         int n = 0, m = n_used_cache;
452         for (i = 0; i < pal_sz; i++) {
453             if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
454                 pal[i] = used_cache[n++];
455             } else {
456                 assert(m < pal_sz);
457                 pal[i] = pal[m++];
458             }
459         }
460     } else {
461         memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
462     }
463 
464     if (DEBUG_BLOCK_INFO) {
465         printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
466                pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
467         for (int n = 0; n < n_cache; n++)
468             printf("%c%02x", n ? ' ' : '[', cache[n]);
469         printf("%s, pal=", n_cache ? "]" : "[]");
470         for (int n = 0; n < pal_sz; n++)
471             printf("%c%02x", n ? ' ' : '[', pal[n]);
472         printf("]\n");
473     }
474 }
475 
read_pal_uv(Dav1dTileContext * const t,Av1Block * const b,const int sz_ctx,const int bx4,const int by4)476 static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b,
477                         const int sz_ctx, const int bx4, const int by4)
478 {
479     read_pal_plane(t, b, 1, sz_ctx, bx4, by4);
480 
481     // V pal coding
482     Dav1dTileState *const ts = t->ts;
483     const Dav1dFrameContext *const f = t->f;
484     uint16_t *const pal = f->frame_thread.pass ?
485         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
486                             ((t->bx >> 1) + (t->by & 1))][2] : t->scratch.pal[2];
487     if (dav1d_msac_decode_bool_equi(&ts->msac)) {
488         const int bits = f->cur.p.bpc - 4 +
489                          dav1d_msac_decode_bools(&ts->msac, 2);
490         int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, f->cur.p.bpc);
491         const int max = (1 << f->cur.p.bpc) - 1;
492         for (int i = 1; i < b->pal_sz[1]; i++) {
493             int delta = dav1d_msac_decode_bools(&ts->msac, bits);
494             if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
495             prev = pal[i] = (prev + delta) & max;
496         }
497     } else {
498         for (int i = 0; i < b->pal_sz[1]; i++)
499             pal[i] = dav1d_msac_decode_bools(&ts->msac, f->cur.p.bpc);
500     }
501     if (DEBUG_BLOCK_INFO) {
502         printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
503         for (int n = 0; n < b->pal_sz[1]; n++)
504             printf("%c%02x", n ? ' ' : '[', pal[n]);
505         printf("]\n");
506     }
507 }
508 
509 // meant to be SIMD'able, so that theoretical complexity of this function
510 // times block size goes from w4*h4 to w4+h4-1
511 // a and b are previous two lines containing (a) top/left entries or (b)
512 // top/left entries, with a[0] being either the first top or first left entry,
513 // depending on top_offset being 1 or 0, and b being the first top/left entry
514 // for whichever has one. left_offset indicates whether the (len-1)th entry
515 // has a left neighbour.
516 // output is order[] and ctx for each member of this diagonal.
order_palette(const uint8_t * pal_idx,const ptrdiff_t stride,const int i,const int first,const int last,uint8_t (* const order)[8],uint8_t * const ctx)517 static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride,
518                           const int i, const int first, const int last,
519                           uint8_t (*const order)[8], uint8_t *const ctx)
520 {
521     int have_top = i > first;
522 
523     assert(pal_idx);
524     pal_idx += first + (i - first) * stride;
525     for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
526         const int have_left = j > 0;
527 
528         assert(have_left || have_top);
529 
530 #define add(v_in) do { \
531         const int v = v_in; \
532         assert((unsigned)v < 8U); \
533         order[n][o_idx++] = v; \
534         mask |= 1 << v; \
535     } while (0)
536 
537         unsigned mask = 0;
538         int o_idx = 0;
539         if (!have_left) {
540             ctx[n] = 0;
541             add(pal_idx[-stride]);
542         } else if (!have_top) {
543             ctx[n] = 0;
544             add(pal_idx[-1]);
545         } else {
546             const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
547             const int same_t_l = t == l;
548             const int same_t_tl = t == tl;
549             const int same_l_tl = l == tl;
550             const int same_all = same_t_l & same_t_tl & same_l_tl;
551 
552             if (same_all) {
553                 ctx[n] = 4;
554                 add(t);
555             } else if (same_t_l) {
556                 ctx[n] = 3;
557                 add(t);
558                 add(tl);
559             } else if (same_t_tl | same_l_tl) {
560                 ctx[n] = 2;
561                 add(tl);
562                 add(same_t_tl ? l : t);
563             } else {
564                 ctx[n] = 1;
565                 add(imin(t, l));
566                 add(imax(t, l));
567                 add(tl);
568             }
569         }
570         for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
571             if (!(mask & m))
572                 order[n][o_idx++] = bit;
573         assert(o_idx == 8);
574 #undef add
575     }
576 }
577 
read_pal_indices(Dav1dTileContext * const t,uint8_t * const pal_idx,const Av1Block * const b,const int pl,const int w4,const int h4,const int bw4,const int bh4)578 static void read_pal_indices(Dav1dTileContext *const t,
579                              uint8_t *const pal_idx,
580                              const Av1Block *const b, const int pl,
581                              const int w4, const int h4,
582                              const int bw4, const int bh4)
583 {
584     Dav1dTileState *const ts = t->ts;
585     const ptrdiff_t stride = bw4 * 4;
586     assert(pal_idx);
587     pal_idx[0] = dav1d_msac_decode_uniform(&ts->msac, b->pal_sz[pl]);
588     uint16_t (*const color_map_cdf)[8] =
589         ts->cdf.m.color_map[pl][b->pal_sz[pl] - 2];
590     uint8_t (*const order)[8] = t->scratch.pal_order;
591     uint8_t *const ctx = t->scratch.pal_ctx;
592     for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
593         // top/left-to-bottom/right diagonals ("wave-front")
594         const int first = imin(i, w4 * 4 - 1);
595         const int last = imax(0, i - h4 * 4 + 1);
596         order_palette(pal_idx, stride, i, first, last, order, ctx);
597         for (int j = first, m = 0; j >= last; j--, m++) {
598             const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
599                                       color_map_cdf[ctx[m]], b->pal_sz[pl] - 1);
600             pal_idx[(i - j) * stride + j] = order[m][color_idx];
601         }
602     }
603     // fill invisible edges
604     if (bw4 > w4)
605         for (int y = 0; y < 4 * h4; y++)
606             memset(&pal_idx[y * stride + 4 * w4],
607                    pal_idx[y * stride + 4 * w4 - 1], 4 * (bw4 - w4));
608     if (h4 < bh4) {
609         const uint8_t *const src = &pal_idx[stride * (4 * h4 - 1)];
610         for (int y = h4 * 4; y < bh4 * 4; y++)
611             memcpy(&pal_idx[y * stride], src, bw4 * 4);
612     }
613 }
614 
read_vartx_tree(Dav1dTileContext * const t,Av1Block * const b,const enum BlockSize bs,const int bx4,const int by4)615 static void read_vartx_tree(Dav1dTileContext *const t,
616                             Av1Block *const b, const enum BlockSize bs,
617                             const int bx4, const int by4)
618 {
619     const Dav1dFrameContext *const f = t->f;
620     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
621     const int bw4 = b_dim[0], bh4 = b_dim[1];
622 
623     // var-tx tree coding
624     uint16_t tx_split[2] = { 0 };
625     b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
626     if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
627                      b->max_ytx == TX_4X4))
628     {
629         b->max_ytx = b->uvtx = TX_4X4;
630         if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
631 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
632             rep_macro(type, t->dir tx, off, TX_4X4)
633             case_set(bh4, l., 1, by4);
634             case_set(bw4, a->, 0, bx4);
635 #undef set_ctx
636         }
637     } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
638         if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
639 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
640             rep_macro(type, t->dir tx, off, mul * b_dim[2 + diridx])
641             case_set(bh4, l., 1, by4);
642             case_set(bw4, a->, 0, bx4);
643 #undef set_ctx
644         }
645         b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
646     } else {
647         assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
648         int y, x, y_off, x_off;
649         const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
650         for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
651             for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
652                 read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
653                 // contexts are updated inside read_tx_tree()
654                 t->bx += ytx->w;
655             }
656             t->bx -= x;
657             t->by += ytx->h;
658         }
659         t->by -= y;
660         if (DEBUG_BLOCK_INFO)
661             printf("Post-vartxtree[%x/%x]: r=%d\n",
662                    tx_split[0], tx_split[1], t->ts->msac.rng);
663         b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
664     }
665     assert(!(tx_split[0] & ~0x33));
666     b->tx_split0 = (uint8_t)tx_split[0];
667     b->tx_split1 = tx_split[1];
668 }
669 
get_prev_frame_segid(const Dav1dFrameContext * const f,const int by,const int bx,const int w4,int h4,const uint8_t * ref_seg_map,const ptrdiff_t stride)670 static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
671                                             const int by, const int bx,
672                                             const int w4, int h4,
673                                             const uint8_t *ref_seg_map,
674                                             const ptrdiff_t stride)
675 {
676     assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
677     if (dav1d_thread_picture_wait(&f->refp[f->frame_hdr->primary_ref_frame],
678                                   (by + h4) * 4, PLANE_TYPE_BLOCK))
679     {
680         return 8;
681     }
682 
683     unsigned seg_id = 8;
684     ref_seg_map += by * stride + bx;
685     do {
686         for (int x = 0; x < w4; x++)
687             seg_id = imin(seg_id, ref_seg_map[x]);
688         ref_seg_map += stride;
689     } while (--h4 > 0 && seg_id);
690     assert(seg_id < 8);
691 
692     return seg_id;
693 }
694 
decode_b(Dav1dTileContext * const t,const enum BlockLevel bl,const enum BlockSize bs,const enum BlockPartition bp,const enum EdgeFlags intra_edge_flags)695 static int decode_b(Dav1dTileContext *const t,
696                     const enum BlockLevel bl,
697                     const enum BlockSize bs,
698                     const enum BlockPartition bp,
699                     const enum EdgeFlags intra_edge_flags)
700 {
701     Dav1dTileState *const ts = t->ts;
702     const Dav1dFrameContext *const f = t->f;
703     Av1Block b_mem, *const b = f->frame_thread.pass ?
704         &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
705     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
706     const int bx4 = t->bx & 31, by4 = t->by & 31;
707     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
708     const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
709     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
710     const int bw4 = b_dim[0], bh4 = b_dim[1];
711     const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
712     const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
713     const int have_left = t->bx > ts->tiling.col_start;
714     const int have_top = t->by > ts->tiling.row_start;
715     const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
716                            (bw4 > ss_hor || t->bx & 1) &&
717                            (bh4 > ss_ver || t->by & 1);
718 
719     if (f->frame_thread.pass == 2) {
720         if (b->intra) {
721             f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
722 
723             const enum IntraPredMode y_mode_nofilt =
724                 b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
725 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
726             rep_macro(type, t->dir mode, off, mul * y_mode_nofilt); \
727             rep_macro(type, t->dir intra, off, mul)
728             case_set(bh4, l., 1, by4);
729             case_set(bw4, a->, 0, bx4);
730 #undef set_ctx
731             if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
732                 refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
733                 for (int x = 0; x < bw4; x++) {
734                     r[x].ref.ref[0] = 0;
735                     r[x].bs = bs;
736                 }
737                 refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
738                 for (int y = 0; y < bh4 - 1; y++) {
739                     rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
740                     rr[y][t->bx + bw4 - 1].bs = bs;
741                 }
742             }
743 
744             if (has_chroma) {
745 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
746                 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
747                 case_set(cbh4, l., 1, cby4);
748                 case_set(cbw4, a->, 0, cbx4);
749 #undef set_ctx
750             }
751         } else {
752             if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
753                 b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
754             {
755                 if (b->matrix[0] == SHRT_MIN) {
756                     t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
757                 } else {
758                     t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
759                     t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
760                     t->warpmv.matrix[3] = b->matrix[1];
761                     t->warpmv.matrix[4] = b->matrix[2];
762                     t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
763                     dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
764                                           t->bx, t->by);
765                     dav1d_get_shear_params(&t->warpmv);
766 #define signabs(v) v < 0 ? '-' : ' ', abs(v)
767                     if (DEBUG_BLOCK_INFO)
768                         printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
769                                "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
770                                signabs(t->warpmv.matrix[0]),
771                                signabs(t->warpmv.matrix[1]),
772                                signabs(t->warpmv.matrix[2]),
773                                signabs(t->warpmv.matrix[3]),
774                                signabs(t->warpmv.matrix[4]),
775                                signabs(t->warpmv.matrix[5]),
776                                signabs(t->warpmv.u.p.alpha),
777                                signabs(t->warpmv.u.p.beta),
778                                signabs(t->warpmv.u.p.gamma),
779                                signabs(t->warpmv.u.p.delta),
780                                b->mv2d.y, b->mv2d.x);
781 #undef signabs
782                 }
783             }
784             if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
785 
786             const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
787 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
788             rep_macro(type, t->dir filter[0], off, mul * filter[0]); \
789             rep_macro(type, t->dir filter[1], off, mul * filter[1]); \
790             rep_macro(type, t->dir intra, off, 0)
791             case_set(bh4, l., 1, by4);
792             case_set(bw4, a->, 0, bx4);
793 #undef set_ctx
794 
795             if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
796                 refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
797                 for (int x = 0; x < bw4; x++) {
798                     r[x].ref.ref[0] = b->ref[0] + 1;
799                     r[x].mv.mv[0] = b->mv[0];
800                     r[x].bs = bs;
801                 }
802                 refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
803                 for (int y = 0; y < bh4 - 1; y++) {
804                     rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
805                     rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
806                     rr[y][t->bx + bw4 - 1].bs = bs;
807                 }
808             }
809 
810             if (has_chroma) {
811 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
812                 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
813                 case_set(cbh4, l., 1, cby4);
814                 case_set(cbw4, a->, 0, cbx4);
815 #undef set_ctx
816             }
817         }
818         return 0;
819     }
820 
821     const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
822 
823     b->bl = bl;
824     b->bp = bp;
825     b->bs = bs;
826 
827     const Dav1dSegmentationData *seg = NULL;
828 
829     // segment_id (if seg_feature for skip/ref/gmv is enabled)
830     int seg_pred = 0;
831     if (f->frame_hdr->segmentation.enabled) {
832         if (!f->frame_hdr->segmentation.update_map) {
833             if (f->prev_segmap) {
834                 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
835                                                        f->prev_segmap,
836                                                        f->b4_stride);
837                 if (seg_id >= 8) return -1;
838                 b->seg_id = seg_id;
839             } else {
840                 b->seg_id = 0;
841             }
842             seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
843         } else if (f->frame_hdr->segmentation.seg_data.preskip) {
844             if (f->frame_hdr->segmentation.temporal &&
845                 (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
846                                 ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
847                                 t->l.seg_pred[by4]])))
848             {
849                 // temporal predicted seg_id
850                 if (f->prev_segmap) {
851                     unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
852                                                            w4, h4,
853                                                            f->prev_segmap,
854                                                            f->b4_stride);
855                     if (seg_id >= 8) return -1;
856                     b->seg_id = seg_id;
857                 } else {
858                     b->seg_id = 0;
859                 }
860             } else {
861                 int seg_ctx;
862                 const unsigned pred_seg_id =
863                     get_cur_frame_segid(t->by, t->bx, have_top, have_left,
864                                         &seg_ctx, f->cur_segmap, f->b4_stride);
865                 const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
866                                           ts->cdf.m.seg_id[seg_ctx],
867                                           DAV1D_MAX_SEGMENTS - 1);
868                 const unsigned last_active_seg_id =
869                     f->frame_hdr->segmentation.seg_data.last_active_segid;
870                 b->seg_id = neg_deinterleave(diff, pred_seg_id,
871                                              last_active_seg_id + 1);
872                 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
873                 if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
874             }
875 
876             if (DEBUG_BLOCK_INFO)
877                 printf("Post-segid[preskip;%d]: r=%d\n",
878                        b->seg_id, ts->msac.rng);
879 
880             seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
881         }
882     } else {
883         b->seg_id = 0;
884     }
885 
886     // skip_mode
887     if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
888         f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
889     {
890         const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
891         b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
892                            ts->cdf.m.skip_mode[smctx]);
893         if (DEBUG_BLOCK_INFO)
894             printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
895     } else {
896         b->skip_mode = 0;
897     }
898 
899     // skip
900     if (b->skip_mode || (seg && seg->skip)) {
901         b->skip = 1;
902     } else {
903         const int sctx = t->a->skip[bx4] + t->l.skip[by4];
904         b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
905         if (DEBUG_BLOCK_INFO)
906             printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
907     }
908 
909     // segment_id
910     if (f->frame_hdr->segmentation.enabled &&
911         f->frame_hdr->segmentation.update_map &&
912         !f->frame_hdr->segmentation.seg_data.preskip)
913     {
914         if (!b->skip && f->frame_hdr->segmentation.temporal &&
915             (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
916                             ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
917                             t->l.seg_pred[by4]])))
918         {
919             // temporal predicted seg_id
920             if (f->prev_segmap) {
921                 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
922                                                        f->prev_segmap,
923                                                        f->b4_stride);
924                 if (seg_id >= 8) return -1;
925                 b->seg_id = seg_id;
926             } else {
927                 b->seg_id = 0;
928             }
929         } else {
930             int seg_ctx;
931             const unsigned pred_seg_id =
932                 get_cur_frame_segid(t->by, t->bx, have_top, have_left,
933                                     &seg_ctx, f->cur_segmap, f->b4_stride);
934             if (b->skip) {
935                 b->seg_id = pred_seg_id;
936             } else {
937                 const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
938                                           ts->cdf.m.seg_id[seg_ctx],
939                                           DAV1D_MAX_SEGMENTS - 1);
940                 const unsigned last_active_seg_id =
941                     f->frame_hdr->segmentation.seg_data.last_active_segid;
942                 b->seg_id = neg_deinterleave(diff, pred_seg_id,
943                                              last_active_seg_id + 1);
944                 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
945             }
946             if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
947         }
948 
949         seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
950 
951         if (DEBUG_BLOCK_INFO)
952             printf("Post-segid[postskip;%d]: r=%d\n",
953                    b->seg_id, ts->msac.rng);
954     }
955 
956     // cdef index
957     if (!b->skip) {
958         const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
959                                            ((t->by & 16) >> 3) : 0;
960         if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
961             const int v = dav1d_msac_decode_bools(&ts->msac,
962                               f->frame_hdr->cdef.n_bits);
963             t->cur_sb_cdef_idx_ptr[idx] = v;
964             if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
965             if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
966             if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
967 
968             if (DEBUG_BLOCK_INFO)
969                 printf("Post-cdef_idx[%d]: r=%d\n",
970                         *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
971         }
972     }
973 
974     // delta-q/lf
975     if (!(t->bx & (31 >> !f->seq_hdr->sb128)) &&
976         !(t->by & (31 >> !f->seq_hdr->sb128)))
977     {
978         const int prev_qidx = ts->last_qidx;
979         const int have_delta_q = f->frame_hdr->delta.q.present &&
980             (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
981 
982         int8_t prev_delta_lf[4];
983         memcpy(prev_delta_lf, ts->last_delta_lf, 4);
984 
985         if (have_delta_q) {
986             int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
987                                                           ts->cdf.m.delta_q, 3);
988             if (delta_q == 3) {
989                 const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
990                 delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
991                           1 + (1 << n_bits);
992             }
993             if (delta_q) {
994                 if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
995                 delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
996             }
997             ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
998             if (have_delta_q && DEBUG_BLOCK_INFO)
999                 printf("Post-delta_q[%d->%d]: r=%d\n",
1000                        delta_q, ts->last_qidx, ts->msac.rng);
1001 
1002             if (f->frame_hdr->delta.lf.present) {
1003                 const int n_lfs = f->frame_hdr->delta.lf.multi ?
1004                     f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
1005 
1006                 for (int i = 0; i < n_lfs; i++) {
1007                     int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1008                         ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
1009                     if (delta_lf == 3) {
1010                         const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
1011                         delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
1012                                    1 + (1 << n_bits);
1013                     }
1014                     if (delta_lf) {
1015                         if (dav1d_msac_decode_bool_equi(&ts->msac))
1016                             delta_lf = -delta_lf;
1017                         delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
1018                     }
1019                     ts->last_delta_lf[i] =
1020                         iclip(ts->last_delta_lf[i] + delta_lf, -63, 63);
1021                     if (have_delta_q && DEBUG_BLOCK_INFO)
1022                         printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
1023                                ts->msac.rng);
1024                 }
1025             }
1026         }
1027         if (ts->last_qidx == f->frame_hdr->quant.yac) {
1028             // assign frame-wide q values to this sb
1029             ts->dq = f->dq;
1030         } else if (ts->last_qidx != prev_qidx) {
1031             // find sb-specific quant parameters
1032             init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
1033             ts->dq = ts->dqmem;
1034         }
1035         if (!memcmp(ts->last_delta_lf, (int8_t[4]) { 0, 0, 0, 0 }, 4)) {
1036             // assign frame-wide lf values to this sb
1037             ts->lflvl = f->lf.lvl;
1038         } else if (memcmp(ts->last_delta_lf, prev_delta_lf, 4)) {
1039             // find sb-specific lf lvl parameters
1040             dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf);
1041             ts->lflvl = ts->lflvlmem;
1042         }
1043     }
1044 
1045     if (b->skip_mode) {
1046         b->intra = 0;
1047     } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
1048         if (seg && (seg->ref >= 0 || seg->globalmv)) {
1049             b->intra = !seg->ref;
1050         } else {
1051             const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
1052                                            have_top, have_left);
1053             b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
1054                             ts->cdf.m.intra[ictx]);
1055             if (DEBUG_BLOCK_INFO)
1056                 printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
1057         }
1058     } else if (f->frame_hdr->allow_intrabc) {
1059         b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
1060         if (DEBUG_BLOCK_INFO)
1061             printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
1062     } else {
1063         b->intra = 1;
1064     }
1065 
1066     // intra/inter-specific stuff
1067     if (b->intra) {
1068         uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
1069             ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
1070             ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
1071                         [dav1d_intra_mode_context[t->l.mode[by4]]];
1072         b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
1073                                                      N_INTRA_PRED_MODES - 1);
1074         if (DEBUG_BLOCK_INFO)
1075             printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
1076 
1077         // angle delta
1078         if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
1079             b->y_mode <= VERT_LEFT_PRED)
1080         {
1081             uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
1082             const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
1083             b->y_angle = angle - 3;
1084         } else {
1085             b->y_angle = 0;
1086         }
1087 
1088         if (has_chroma) {
1089             const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
1090                 cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
1091             uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
1092             b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
1093                              N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
1094             if (DEBUG_BLOCK_INFO)
1095                 printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
1096 
1097             if (b->uv_mode == CFL_PRED) {
1098 #define SIGN(a) (!!(a) + ((a) > 0))
1099                 const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1100                                      ts->cdf.m.cfl_sign, 7) + 1;
1101                 const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
1102                 assert(sign_u == sign / 3);
1103                 if (sign_u) {
1104                     const int ctx = (sign_u == 2) * 3 + sign_v;
1105                     b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1106                                           ts->cdf.m.cfl_alpha[ctx], 15) + 1;
1107                     if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
1108                 } else {
1109                     b->cfl_alpha[0] = 0;
1110                 }
1111                 if (sign_v) {
1112                     const int ctx = (sign_v == 2) * 3 + sign_u;
1113                     b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1114                                           ts->cdf.m.cfl_alpha[ctx], 15) + 1;
1115                     if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
1116                 } else {
1117                     b->cfl_alpha[1] = 0;
1118                 }
1119 #undef SIGN
1120                 if (DEBUG_BLOCK_INFO)
1121                     printf("Post-uvalphas[%d/%d]: r=%d\n",
1122                            b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
1123             } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
1124                        b->uv_mode <= VERT_LEFT_PRED)
1125             {
1126                 uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
1127                 const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
1128                 b->uv_angle = angle - 3;
1129             } else {
1130                 b->uv_angle = 0;
1131             }
1132         }
1133 
1134         b->pal_sz[0] = b->pal_sz[1] = 0;
1135         if (f->frame_hdr->allow_screen_content_tools &&
1136             imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
1137         {
1138             const int sz_ctx = b_dim[2] + b_dim[3] - 2;
1139             if (b->y_mode == DC_PRED) {
1140                 const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
1141                 const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
1142                                           ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
1143                 if (DEBUG_BLOCK_INFO)
1144                     printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
1145                 if (use_y_pal)
1146                     read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
1147             }
1148 
1149             if (has_chroma && b->uv_mode == DC_PRED) {
1150                 const int pal_ctx = b->pal_sz[0] > 0;
1151                 const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
1152                                            ts->cdf.m.pal_uv[pal_ctx]);
1153                 if (DEBUG_BLOCK_INFO)
1154                     printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
1155                 if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
1156                     read_pal_uv(t, b, sz_ctx, bx4, by4);
1157             }
1158         }
1159 
1160         if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
1161             imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
1162         {
1163             const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
1164                                       ts->cdf.m.use_filter_intra[bs]);
1165             if (is_filter) {
1166                 b->y_mode = FILTER_PRED;
1167                 b->y_angle = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1168                                  ts->cdf.m.filter_intra, 4);
1169             }
1170             if (DEBUG_BLOCK_INFO)
1171                 printf("Post-filterintramode[%d/%d]: r=%d\n",
1172                        b->y_mode, b->y_angle, ts->msac.rng);
1173         }
1174 
1175         if (b->pal_sz[0]) {
1176             uint8_t *pal_idx;
1177             if (f->frame_thread.pass) {
1178                 assert(ts->frame_thread.pal_idx);
1179                 pal_idx = ts->frame_thread.pal_idx;
1180                 ts->frame_thread.pal_idx += bw4 * bh4 * 16;
1181             } else
1182                 pal_idx = t->scratch.pal_idx;
1183             read_pal_indices(t, pal_idx, b, 0, w4, h4, bw4, bh4);
1184             if (DEBUG_BLOCK_INFO)
1185                 printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
1186         }
1187 
1188         if (has_chroma && b->pal_sz[1]) {
1189             uint8_t *pal_idx;
1190             if (f->frame_thread.pass) {
1191                 assert(ts->frame_thread.pal_idx);
1192                 pal_idx = ts->frame_thread.pal_idx;
1193                 ts->frame_thread.pal_idx += cbw4 * cbh4 * 16;
1194             } else
1195                 pal_idx = &t->scratch.pal_idx[bw4 * bh4 * 16];
1196             read_pal_indices(t, pal_idx, b, 1, cw4, ch4, cbw4, cbh4);
1197             if (DEBUG_BLOCK_INFO)
1198                 printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
1199         }
1200 
1201         const TxfmInfo *t_dim;
1202         if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
1203             b->tx = b->uvtx = (int) TX_4X4;
1204             t_dim = &dav1d_txfm_dimensions[TX_4X4];
1205         } else {
1206             b->tx = dav1d_max_txfm_size_for_bs[bs][0];
1207             b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
1208             t_dim = &dav1d_txfm_dimensions[b->tx];
1209             if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
1210                 const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
1211                 uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
1212                 int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
1213                                 imin(t_dim->max, 2));
1214 
1215                 while (depth--) {
1216                     b->tx = t_dim->sub;
1217                     t_dim = &dav1d_txfm_dimensions[b->tx];
1218                 }
1219             }
1220             if (DEBUG_BLOCK_INFO)
1221                 printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
1222         }
1223 
1224         // reconstruction
1225         if (f->frame_thread.pass == 1) {
1226             f->bd_fn.read_coef_blocks(t, bs, b);
1227         } else {
1228             f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
1229         }
1230 
1231         if (f->frame_hdr->loopfilter.level_y[0] ||
1232             f->frame_hdr->loopfilter.level_y[1])
1233         {
1234             dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
1235                                        (const uint8_t (*)[8][2])
1236                                        &ts->lflvl[b->seg_id][0][0][0],
1237                                        t->bx, t->by, f->w4, f->h4, bs,
1238                                        b->tx, b->uvtx, f->cur.p.layout,
1239                                        &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
1240                                        has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
1241                                        has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
1242         }
1243 
1244         // update contexts
1245 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1246         rep_macro(type, t->dir tx_intra, off, mul * (((uint8_t *) &t_dim->lw)[diridx])); \
1247         rep_macro(type, t->dir tx, off, mul * (((uint8_t *) &t_dim->lw)[diridx])); \
1248         rep_macro(type, t->dir mode, off, mul * y_mode_nofilt); \
1249         rep_macro(type, t->dir pal_sz, off, mul * b->pal_sz[0]); \
1250         rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1251         rep_macro(type, t->dir skip_mode, off, 0); \
1252         rep_macro(type, t->dir intra, off, mul); \
1253         rep_macro(type, t->dir skip, off, mul * b->skip); \
1254         /* see aomedia bug 2183 for why we use luma coordinates here */ \
1255         rep_macro(type, t->pal_sz_uv[diridx], off, mul * (has_chroma ? b->pal_sz[1] : 0)); \
1256         if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
1257             rep_macro(type, t->dir comp_type, off, mul * COMP_INTER_NONE); \
1258             rep_macro(type, t->dir ref[0], off, mul * ((uint8_t) -1)); \
1259             rep_macro(type, t->dir ref[1], off, mul * ((uint8_t) -1)); \
1260             rep_macro(type, t->dir filter[0], off, mul * DAV1D_N_SWITCHABLE_FILTERS); \
1261             rep_macro(type, t->dir filter[1], off, mul * DAV1D_N_SWITCHABLE_FILTERS); \
1262         }
1263         const enum IntraPredMode y_mode_nofilt =
1264             b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
1265         case_set(bh4, l., 1, by4);
1266         case_set(bw4, a->, 0, bx4);
1267 #undef set_ctx
1268         if (b->pal_sz[0]) {
1269             uint16_t *const pal = f->frame_thread.pass ?
1270                 f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
1271                                     ((t->bx >> 1) + (t->by & 1))][0] : t->scratch.pal[0];
1272             for (int x = 0; x < bw4; x++)
1273                 memcpy(t->al_pal[0][bx4 + x][0], pal, 16);
1274             for (int y = 0; y < bh4; y++)
1275                 memcpy(t->al_pal[1][by4 + y][0], pal, 16);
1276         }
1277         if (has_chroma) {
1278 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1279                 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
1280                 case_set(cbh4, l., 1, cby4);
1281                 case_set(cbw4, a->, 0, cbx4);
1282 #undef set_ctx
1283             if (b->pal_sz[1]) {
1284                 const uint16_t (*const pal)[8] = f->frame_thread.pass ?
1285                     f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) *
1286                     (f->b4_stride >> 1) + ((t->bx >> 1) + (t->by & 1))] :
1287                     t->scratch.pal;
1288                 // see aomedia bug 2183 for why we use luma coordinates here
1289                 for (int pl = 1; pl <= 2; pl++) {
1290                     for (int x = 0; x < bw4; x++)
1291                         memcpy(t->al_pal[0][bx4 + x][pl], pal[pl], 16);
1292                     for (int y = 0; y < bh4; y++)
1293                         memcpy(t->al_pal[1][by4 + y][pl], pal[pl], 16);
1294                 }
1295             }
1296         }
1297         if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
1298             splat_intraref(&t->rt, t->by, t->bx, bs);
1299         }
1300     } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
1301         // intra block copy
1302         refmvs_candidate mvstack[8];
1303         int n_mvs, ctx;
1304         dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1305                           (union refmvs_refpair) { .ref = { 0, -1 }},
1306                           bs, intra_edge_flags, t->by, t->bx);
1307 
1308         if (mvstack[0].mv.mv[0].n)
1309             b->mv[0] = mvstack[0].mv.mv[0];
1310         else if (mvstack[1].mv.mv[0].n)
1311             b->mv[0] = mvstack[1].mv.mv[0];
1312         else {
1313             if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
1314                 b->mv[0].y = 0;
1315                 b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
1316             } else {
1317                 b->mv[0].y = -(512 << f->seq_hdr->sb128);
1318                 b->mv[0].x = 0;
1319             }
1320         }
1321 
1322         const union mv ref = b->mv[0];
1323         read_mv_residual(t, &b->mv[0], &ts->cdf.dmv, 0);
1324 
1325         // clip intrabc motion vector to decoded parts of current tile
1326         int border_left = ts->tiling.col_start * 4;
1327         int border_top  = ts->tiling.row_start * 4;
1328         if (has_chroma) {
1329             if (bw4 < 2 &&  ss_hor)
1330                 border_left += 4;
1331             if (bh4 < 2 &&  ss_ver)
1332                 border_top  += 4;
1333         }
1334         int src_left   = t->bx * 4 + (b->mv[0].x >> 3);
1335         int src_top    = t->by * 4 + (b->mv[0].y >> 3);
1336         int src_right  = src_left + bw4 * 4;
1337         int src_bottom = src_top  + bh4 * 4;
1338         const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
1339 
1340         // check against left or right tile boundary and adjust if necessary
1341         if (src_left < border_left) {
1342             src_right += border_left - src_left;
1343             src_left  += border_left - src_left;
1344         } else if (src_right > border_right) {
1345             src_left  -= src_right - border_right;
1346             src_right -= src_right - border_right;
1347         }
1348         // check against top tile boundary and adjust if necessary
1349         if (src_top < border_top) {
1350             src_bottom += border_top - src_top;
1351             src_top    += border_top - src_top;
1352         }
1353 
1354         const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
1355         const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
1356         const int sb_size = 1 << (6 + f->seq_hdr->sb128);
1357         // check for overlap with current superblock
1358         if (src_bottom > sby && src_right > sbx) {
1359             if (src_top - border_top >= src_bottom - sby) {
1360                 // if possible move src up into the previous suberblock row
1361                 src_top    -= src_bottom - sby;
1362                 src_bottom -= src_bottom - sby;
1363             } else if (src_left - border_left >= src_right - sbx) {
1364                 // if possible move src left into the previous suberblock
1365                 src_left  -= src_right - sbx;
1366                 src_right -= src_right - sbx;
1367             }
1368         }
1369         // move src up if it is below current superblock row
1370         if (src_bottom > sby + sb_size) {
1371             src_top    -= src_bottom - (sby + sb_size);
1372             src_bottom -= src_bottom - (sby + sb_size);
1373         }
1374         // error out if mv still overlaps with the current superblock
1375         if (src_bottom > sby && src_right > sbx)
1376             return -1;
1377 
1378         b->mv[0].x = (src_left - t->bx * 4) * 8;
1379         b->mv[0].y = (src_top  - t->by * 4) * 8;
1380 
1381         if (DEBUG_BLOCK_INFO)
1382             printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
1383                    b->mv[0].y, b->mv[0].x, ref.y, ref.x,
1384                    mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
1385         read_vartx_tree(t, b, bs, bx4, by4);
1386 
1387         // reconstruction
1388         if (f->frame_thread.pass == 1) {
1389             f->bd_fn.read_coef_blocks(t, bs, b);
1390             b->filter2d = FILTER_2D_BILINEAR;
1391         } else {
1392             if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
1393         }
1394 
1395         splat_intrabc_mv(&t->rt, t->by, t->bx, bs, b->mv[0]);
1396 
1397 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1398         rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \
1399         rep_macro(type, t->dir mode, off, mul * DC_PRED); \
1400         rep_macro(type, t->dir pal_sz, off, 0); \
1401         /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1402         rep_macro(type, t->pal_sz_uv[diridx], off, 0); \
1403         rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1404         rep_macro(type, t->dir skip_mode, off, 0); \
1405         rep_macro(type, t->dir intra, off, 0); \
1406         rep_macro(type, t->dir skip, off, mul * b->skip)
1407         case_set(bh4, l., 1, by4);
1408         case_set(bw4, a->, 0, bx4);
1409 #undef set_ctx
1410         if (has_chroma) {
1411 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1412             rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
1413             case_set(cbh4, l., 1, cby4);
1414             case_set(cbw4, a->, 0, cbx4);
1415 #undef set_ctx
1416         }
1417     } else {
1418         // inter-specific mode/mv coding
1419         int is_comp, has_subpel_filter;
1420 
1421         if (b->skip_mode) {
1422             is_comp = 1;
1423         } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
1424                    f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
1425         {
1426             const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
1427                                          have_top, have_left);
1428             is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
1429                           ts->cdf.m.comp[ctx]);
1430             if (DEBUG_BLOCK_INFO)
1431                 printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
1432         } else {
1433             is_comp = 0;
1434         }
1435 
1436         if (b->skip_mode) {
1437             b->ref[0] = f->frame_hdr->skip_mode_refs[0];
1438             b->ref[1] = f->frame_hdr->skip_mode_refs[1];
1439             b->comp_type = COMP_INTER_AVG;
1440             b->inter_mode = NEARESTMV_NEARESTMV;
1441             b->drl_idx = NEAREST_DRL;
1442             has_subpel_filter = 0;
1443 
1444             refmvs_candidate mvstack[8];
1445             int n_mvs, ctx;
1446             dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1447                               (union refmvs_refpair) { .ref = {
1448                                     b->ref[0] + 1, b->ref[1] + 1 }},
1449                               bs, intra_edge_flags, t->by, t->bx);
1450 
1451             b->mv[0] = mvstack[0].mv.mv[0];
1452             b->mv[1] = mvstack[0].mv.mv[1];
1453             fix_mv_precision(f->frame_hdr, &b->mv[0]);
1454             fix_mv_precision(f->frame_hdr, &b->mv[1]);
1455             if (DEBUG_BLOCK_INFO)
1456                 printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
1457                        b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
1458                        b->ref[0], b->ref[1]);
1459         } else if (is_comp) {
1460             const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
1461                                                  have_top, have_left);
1462             if (dav1d_msac_decode_bool_adapt(&ts->msac,
1463                     ts->cdf.m.comp_dir[dir_ctx]))
1464             {
1465                 // bidir - first reference (fw)
1466                 const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
1467                                                      have_top, have_left);
1468                 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1469                         ts->cdf.m.comp_fwd_ref[0][ctx1]))
1470                 {
1471                     const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
1472                                                            have_top, have_left);
1473                     b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
1474                                         ts->cdf.m.comp_fwd_ref[2][ctx2]);
1475                 } else {
1476                     const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
1477                                                            have_top, have_left);
1478                     b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
1479                                     ts->cdf.m.comp_fwd_ref[1][ctx2]);
1480                 }
1481 
1482                 // second reference (bw)
1483                 const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
1484                                                      have_top, have_left);
1485                 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1486                         ts->cdf.m.comp_bwd_ref[0][ctx3]))
1487                 {
1488                     b->ref[1] = 6;
1489                 } else {
1490                     const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
1491                                                            have_top, have_left);
1492                     b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
1493                                         ts->cdf.m.comp_bwd_ref[1][ctx4]);
1494                 }
1495             } else {
1496                 // unidir
1497                 const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
1498                                                      have_top, have_left);
1499                 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1500                         ts->cdf.m.comp_uni_ref[0][uctx_p]))
1501                 {
1502                     b->ref[0] = 4;
1503                     b->ref[1] = 6;
1504                 } else {
1505                     const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
1506                                                            have_top, have_left);
1507                     b->ref[0] = 0;
1508                     b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
1509                                         ts->cdf.m.comp_uni_ref[1][uctx_p1]);
1510                     if (b->ref[1] == 2) {
1511                         const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
1512                                                                have_top, have_left);
1513                         b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
1514                                          ts->cdf.m.comp_uni_ref[2][uctx_p2]);
1515                     }
1516                 }
1517             }
1518             if (DEBUG_BLOCK_INFO)
1519                 printf("Post-refs[%d/%d]: r=%d\n",
1520                        b->ref[0], b->ref[1], ts->msac.rng);
1521 
1522             refmvs_candidate mvstack[8];
1523             int n_mvs, ctx;
1524             dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1525                               (union refmvs_refpair) { .ref = {
1526                                     b->ref[0] + 1, b->ref[1] + 1 }},
1527                               bs, intra_edge_flags, t->by, t->bx);
1528 
1529             b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1530                                 ts->cdf.m.comp_inter_mode[ctx],
1531                                 N_COMP_INTER_PRED_MODES - 1);
1532             if (DEBUG_BLOCK_INFO)
1533                 printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
1534                        b->inter_mode, ctx, n_mvs, ts->msac.rng);
1535 
1536             const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
1537             b->drl_idx = NEAREST_DRL;
1538             if (b->inter_mode == NEWMV_NEWMV) {
1539                 if (n_mvs > 1) { // NEARER, NEAR or NEARISH
1540                     const int drl_ctx_v1 = get_drl_context(mvstack, 0);
1541                     b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1542                                       ts->cdf.m.drl_bit[drl_ctx_v1]);
1543                     if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
1544                         const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1545                         b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1546                                           ts->cdf.m.drl_bit[drl_ctx_v2]);
1547                     }
1548                     if (DEBUG_BLOCK_INFO)
1549                         printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1550                                b->drl_idx, n_mvs, ts->msac.rng);
1551                 }
1552             } else if (im[0] == NEARMV || im[1] == NEARMV) {
1553                 b->drl_idx = NEARER_DRL;
1554                 if (n_mvs > 2) { // NEAR or NEARISH
1555                     const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1556                     b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1557                                       ts->cdf.m.drl_bit[drl_ctx_v2]);
1558                     if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
1559                         const int drl_ctx_v3 = get_drl_context(mvstack, 2);
1560                         b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1561                                           ts->cdf.m.drl_bit[drl_ctx_v3]);
1562                     }
1563                     if (DEBUG_BLOCK_INFO)
1564                         printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1565                                b->drl_idx, n_mvs, ts->msac.rng);
1566                 }
1567             }
1568             assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1569 
1570 #define assign_comp_mv(idx) \
1571             switch (im[idx]) { \
1572             case NEARMV: \
1573             case NEARESTMV: \
1574                 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1575                 fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
1576                 break; \
1577             case GLOBALMV: \
1578                 has_subpel_filter |= \
1579                     f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
1580                 b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
1581                                         t->bx, t->by, bw4, bh4, f->frame_hdr); \
1582                 break; \
1583             case NEWMV: \
1584                 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1585                 read_mv_residual(t, &b->mv[idx], &ts->cdf.mv, \
1586                                  !f->frame_hdr->force_integer_mv); \
1587                 break; \
1588             }
1589             has_subpel_filter = imin(bw4, bh4) == 1 ||
1590                                 b->inter_mode != GLOBALMV_GLOBALMV;
1591             assign_comp_mv(0);
1592             assign_comp_mv(1);
1593 #undef assign_comp_mv
1594             if (DEBUG_BLOCK_INFO)
1595                 printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
1596                        b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
1597                        ts->msac.rng);
1598 
1599             // jnt_comp vs. seg vs. wedge
1600             int is_segwedge = 0;
1601             if (f->seq_hdr->masked_compound) {
1602                 const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
1603 
1604                 is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac,
1605                                   ts->cdf.m.mask_comp[mask_ctx]);
1606                 if (DEBUG_BLOCK_INFO)
1607                     printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
1608                            is_segwedge, mask_ctx, ts->msac.rng);
1609             }
1610 
1611             if (!is_segwedge) {
1612                 if (f->seq_hdr->jnt_comp) {
1613                     const int jnt_ctx =
1614                         get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits,
1615                                          f->cur.frame_hdr->frame_offset,
1616                                          f->refp[b->ref[0]].p.frame_hdr->frame_offset,
1617                                          f->refp[b->ref[1]].p.frame_hdr->frame_offset,
1618                                          t->a, &t->l, by4, bx4);
1619                     b->comp_type = COMP_INTER_WEIGHTED_AVG +
1620                                    dav1d_msac_decode_bool_adapt(&ts->msac,
1621                                        ts->cdf.m.jnt_comp[jnt_ctx]);
1622                     if (DEBUG_BLOCK_INFO)
1623                         printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
1624                                b->comp_type == COMP_INTER_AVG,
1625                                jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
1626                                t->l.comp_type[by4], t->l.ref[0][by4],
1627                                ts->msac.rng);
1628                 } else {
1629                     b->comp_type = COMP_INTER_AVG;
1630                 }
1631             } else {
1632                 if (wedge_allowed_mask & (1 << bs)) {
1633                     const int ctx = dav1d_wedge_ctx_lut[bs];
1634                     b->comp_type = COMP_INTER_WEDGE -
1635                                    dav1d_msac_decode_bool_adapt(&ts->msac,
1636                                        ts->cdf.m.wedge_comp[ctx]);
1637                     if (b->comp_type == COMP_INTER_WEDGE)
1638                         b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1639                                            ts->cdf.m.wedge_idx[ctx], 15);
1640                 } else {
1641                     b->comp_type = COMP_INTER_SEG;
1642                 }
1643                 b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac);
1644                 if (DEBUG_BLOCK_INFO)
1645                     printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
1646                            b->comp_type == COMP_INTER_WEDGE,
1647                            b->wedge_idx, b->mask_sign, ts->msac.rng);
1648             }
1649         } else {
1650             b->comp_type = COMP_INTER_NONE;
1651 
1652             // ref
1653             if (seg && seg->ref > 0) {
1654                 b->ref[0] = seg->ref - 1;
1655             } else if (seg && (seg->globalmv || seg->skip)) {
1656                 b->ref[0] = 0;
1657             } else {
1658                 const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
1659                                                  have_top, have_left);
1660                 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1661                                                  ts->cdf.m.ref[0][ctx1]))
1662                 {
1663                     const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
1664                                                        have_top, have_left);
1665                     if (dav1d_msac_decode_bool_adapt(&ts->msac,
1666                                                      ts->cdf.m.ref[1][ctx2]))
1667                     {
1668                         b->ref[0] = 6;
1669                     } else {
1670                         const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
1671                                                            have_top, have_left);
1672                         b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
1673                                             ts->cdf.m.ref[5][ctx3]);
1674                     }
1675                 } else {
1676                     const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
1677                                                        have_top, have_left);
1678                     if (dav1d_msac_decode_bool_adapt(&ts->msac,
1679                                                      ts->cdf.m.ref[2][ctx2]))
1680                     {
1681                         const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
1682                                                            have_top, have_left);
1683                         b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
1684                                             ts->cdf.m.ref[4][ctx3]);
1685                     } else {
1686                         const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
1687                                                            have_top, have_left);
1688                         b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
1689                                         ts->cdf.m.ref[3][ctx3]);
1690                     }
1691                 }
1692                 if (DEBUG_BLOCK_INFO)
1693                     printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
1694             }
1695             b->ref[1] = -1;
1696 
1697             refmvs_candidate mvstack[8];
1698             int n_mvs, ctx;
1699             dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1700                               (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }},
1701                               bs, intra_edge_flags, t->by, t->bx);
1702 
1703             // mode parsing and mv derivation from ref_mvs
1704             if ((seg && (seg->skip || seg->globalmv)) ||
1705                 dav1d_msac_decode_bool_adapt(&ts->msac,
1706                                              ts->cdf.m.newmv_mode[ctx & 7]))
1707             {
1708                 if ((seg && (seg->skip || seg->globalmv)) ||
1709                     !dav1d_msac_decode_bool_adapt(&ts->msac,
1710                          ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
1711                 {
1712                     b->inter_mode = GLOBALMV;
1713                     b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]],
1714                                           t->bx, t->by, bw4, bh4, f->frame_hdr);
1715                     has_subpel_filter = imin(bw4, bh4) == 1 ||
1716                         f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION;
1717                 } else {
1718                     has_subpel_filter = 1;
1719                     if (dav1d_msac_decode_bool_adapt(&ts->msac,
1720                             ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
1721                     { // NEAREST, NEARER, NEAR or NEARISH
1722                         b->inter_mode = NEARMV;
1723                         b->drl_idx = NEARER_DRL;
1724                         if (n_mvs > 2) { // NEARER, NEAR or NEARISH
1725                             const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1726                             b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1727                                               ts->cdf.m.drl_bit[drl_ctx_v2]);
1728                             if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH
1729                                 const int drl_ctx_v3 =
1730                                     get_drl_context(mvstack, 2);
1731                                 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1732                                                   ts->cdf.m.drl_bit[drl_ctx_v3]);
1733                             }
1734                         }
1735                     } else {
1736                         b->inter_mode = NEARESTMV;
1737                         b->drl_idx = NEAREST_DRL;
1738                     }
1739                     assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1740                     b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
1741                     if (b->drl_idx < NEAR_DRL)
1742                         fix_mv_precision(f->frame_hdr, &b->mv[0]);
1743                 }
1744 
1745                 if (DEBUG_BLOCK_INFO)
1746                     printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
1747                            b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
1748                            ts->msac.rng);
1749             } else {
1750                 has_subpel_filter = 1;
1751                 b->inter_mode = NEWMV;
1752                 b->drl_idx = NEAREST_DRL;
1753                 if (n_mvs > 1) { // NEARER, NEAR or NEARISH
1754                     const int drl_ctx_v1 = get_drl_context(mvstack, 0);
1755                     b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1756                                       ts->cdf.m.drl_bit[drl_ctx_v1]);
1757                     if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH
1758                         const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1759                         b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1760                                           ts->cdf.m.drl_bit[drl_ctx_v2]);
1761                     }
1762                 }
1763                 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1764                 if (n_mvs > 1) {
1765                     b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
1766                 } else {
1767                     assert(!b->drl_idx);
1768                     b->mv[0] = mvstack[0].mv.mv[0];
1769                     fix_mv_precision(f->frame_hdr, &b->mv[0]);
1770                 }
1771                 if (DEBUG_BLOCK_INFO)
1772                     printf("Post-intermode[%d,drl=%d]: r=%d\n",
1773                            b->inter_mode, b->drl_idx, ts->msac.rng);
1774                 read_mv_residual(t, &b->mv[0], &ts->cdf.mv,
1775                                  !f->frame_hdr->force_integer_mv);
1776                 if (DEBUG_BLOCK_INFO)
1777                     printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
1778                            b->mv[0].y, b->mv[0].x, ts->msac.rng);
1779             }
1780 
1781             // interintra flags
1782             const int ii_sz_grp = dav1d_ymode_size_context[bs];
1783             if (f->seq_hdr->inter_intra &&
1784                 interintra_allowed_mask & (1 << bs) &&
1785                 dav1d_msac_decode_bool_adapt(&ts->msac,
1786                                              ts->cdf.m.interintra[ii_sz_grp]))
1787             {
1788                 b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1789                                          ts->cdf.m.interintra_mode[ii_sz_grp],
1790                                          N_INTER_INTRA_PRED_MODES - 1);
1791                 const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
1792                 b->interintra_type = INTER_INTRA_BLEND +
1793                                      dav1d_msac_decode_bool_adapt(&ts->msac,
1794                                          ts->cdf.m.interintra_wedge[wedge_ctx]);
1795                 if (b->interintra_type == INTER_INTRA_WEDGE)
1796                     b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1797                                        ts->cdf.m.wedge_idx[wedge_ctx], 15);
1798             } else {
1799                 b->interintra_type = INTER_INTRA_NONE;
1800             }
1801             if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra &&
1802                 interintra_allowed_mask & (1 << bs))
1803             {
1804                 printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
1805                        b->interintra_type, b->interintra_mode,
1806                        b->wedge_idx, ts->msac.rng);
1807             }
1808 
1809             // motion variation
1810             if (f->frame_hdr->switchable_motion_mode &&
1811                 b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
1812                 // is not warped global motion
1813                 !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV &&
1814                   f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) &&
1815                 // has overlappable neighbours
1816                 ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
1817                  (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
1818             {
1819                 // reaching here means the block allows obmc - check warp by
1820                 // finding matching-ref blocks in top/left edges
1821                 uint64_t mask[2] = { 0, 0 };
1822                 find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
1823                                   have_left, have_top, b->ref[0], mask);
1824                 const int allow_warp = !f->svc[b->ref[0]][0].scale &&
1825                     !f->frame_hdr->force_integer_mv &&
1826                     f->frame_hdr->warp_motion && (mask[0] | mask[1]);
1827 
1828                 b->motion_mode = allow_warp ?
1829                     dav1d_msac_decode_symbol_adapt4(&ts->msac,
1830                         ts->cdf.m.motion_mode[bs], 2) :
1831                     dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
1832                 if (b->motion_mode == MM_WARP) {
1833                     has_subpel_filter = 0;
1834                     derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
1835 #define signabs(v) v < 0 ? '-' : ' ', abs(v)
1836                     if (DEBUG_BLOCK_INFO)
1837                         printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
1838                                "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
1839                                "mv=y:%d,x:%d\n",
1840                                signabs(t->warpmv.matrix[0]),
1841                                signabs(t->warpmv.matrix[1]),
1842                                signabs(t->warpmv.matrix[2]),
1843                                signabs(t->warpmv.matrix[3]),
1844                                signabs(t->warpmv.matrix[4]),
1845                                signabs(t->warpmv.matrix[5]),
1846                                signabs(t->warpmv.u.p.alpha),
1847                                signabs(t->warpmv.u.p.beta),
1848                                signabs(t->warpmv.u.p.gamma),
1849                                signabs(t->warpmv.u.p.delta),
1850                                b->mv[0].y, b->mv[0].x);
1851 #undef signabs
1852                     if (f->frame_thread.pass) {
1853                         if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) {
1854                             b->matrix[0] = t->warpmv.matrix[2] - 0x10000;
1855                             b->matrix[1] = t->warpmv.matrix[3];
1856                             b->matrix[2] = t->warpmv.matrix[4];
1857                             b->matrix[3] = t->warpmv.matrix[5] - 0x10000;
1858                         } else {
1859                             b->matrix[0] = SHRT_MIN;
1860                         }
1861                     }
1862                 }
1863 
1864                 if (DEBUG_BLOCK_INFO)
1865                     printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIu64 "x/0x%"
1866                            PRIu64 "x]\n", b->motion_mode, ts->msac.rng, mask[0],
1867                             mask[1]);
1868             } else {
1869                 b->motion_mode = MM_TRANSLATION;
1870             }
1871         }
1872 
1873         // subpel filter
1874         enum Dav1dFilterMode filter[2];
1875         if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) {
1876             if (has_subpel_filter) {
1877                 const int comp = b->comp_type != COMP_INTER_NONE;
1878                 const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
1879                                                 by4, bx4);
1880                 filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1881                                ts->cdf.m.filter[0][ctx1],
1882                                DAV1D_N_SWITCHABLE_FILTERS - 1);
1883                 if (f->seq_hdr->dual_filter) {
1884                     const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
1885                                                     b->ref[0], by4, bx4);
1886                     if (DEBUG_BLOCK_INFO)
1887                         printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
1888                                filter[0], ctx1, ts->msac.rng);
1889                     filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1890                                     ts->cdf.m.filter[1][ctx2],
1891                                     DAV1D_N_SWITCHABLE_FILTERS - 1);
1892                     if (DEBUG_BLOCK_INFO)
1893                         printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
1894                                filter[1], ctx2, ts->msac.rng);
1895                 } else {
1896                     filter[1] = filter[0];
1897                     if (DEBUG_BLOCK_INFO)
1898                         printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
1899                                filter[0], ctx1, ts->msac.rng);
1900                 }
1901             } else {
1902                 filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR;
1903             }
1904         } else {
1905             filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode;
1906         }
1907         b->filter2d = dav1d_filter_2d[filter[1]][filter[0]];
1908 
1909         read_vartx_tree(t, b, bs, bx4, by4);
1910 
1911         // reconstruction
1912         if (f->frame_thread.pass == 1) {
1913             f->bd_fn.read_coef_blocks(t, bs, b);
1914         } else {
1915             if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
1916         }
1917 
1918         if (f->frame_hdr->loopfilter.level_y[0] ||
1919             f->frame_hdr->loopfilter.level_y[1])
1920         {
1921             const int is_globalmv =
1922                 b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
1923             const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
1924                 &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
1925             const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
1926             dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls,
1927                                        t->bx, t->by, f->w4, f->h4, b->skip, bs,
1928                                        f->frame_hdr->segmentation.lossless[b->seg_id] ?
1929                                            (enum RectTxfmSize) TX_4X4 : b->max_ytx,
1930                                        tx_split, b->uvtx, f->cur.p.layout,
1931                                        &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
1932                                        has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
1933                                        has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
1934         }
1935 
1936         // context updates
1937         if (is_comp) {
1938             splat_tworef_mv(&t->rt, t->by, t->bx, bs, b->inter_mode,
1939                             (refmvs_refpair) { .ref = { b->ref[0], b->ref[1] }},
1940                             (refmvs_mvpair) { .mv = { [0] = b->mv[0], [1] = b->mv[1] }});
1941         } else {
1942             splat_oneref_mv(&t->rt, t->by, t->bx, bs, b->inter_mode,
1943                             b->ref[0], b->mv[0], b->interintra_type);
1944         }
1945 
1946 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1947         rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1948         rep_macro(type, t->dir skip_mode, off, mul * b->skip_mode); \
1949         rep_macro(type, t->dir intra, off, 0); \
1950         rep_macro(type, t->dir skip, off, mul * b->skip); \
1951         rep_macro(type, t->dir pal_sz, off, 0); \
1952         /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1953         rep_macro(type, t->pal_sz_uv[diridx], off, 0); \
1954         rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \
1955         rep_macro(type, t->dir comp_type, off, mul * b->comp_type); \
1956         rep_macro(type, t->dir filter[0], off, mul * filter[0]); \
1957         rep_macro(type, t->dir filter[1], off, mul * filter[1]); \
1958         rep_macro(type, t->dir mode, off, mul * b->inter_mode); \
1959         rep_macro(type, t->dir ref[0], off, mul * b->ref[0]); \
1960         rep_macro(type, t->dir ref[1], off, mul * ((uint8_t) b->ref[1]))
1961         case_set(bh4, l., 1, by4);
1962         case_set(bw4, a->, 0, bx4);
1963 #undef set_ctx
1964 
1965         if (has_chroma) {
1966 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1967             rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
1968             case_set(cbh4, l., 1, cby4);
1969             case_set(cbw4, a->, 0, cbx4);
1970 #undef set_ctx
1971         }
1972     }
1973 
1974     // update contexts
1975     if (f->frame_hdr->segmentation.enabled &&
1976         f->frame_hdr->segmentation.update_map)
1977     {
1978         uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
1979 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1980         for (int y = 0; y < bh4; y++) { \
1981             rep_macro(type, seg_ptr, 0, mul * b->seg_id); \
1982             seg_ptr += f->b4_stride; \
1983         }
1984         case_set(bw4, NULL, 0, 0);
1985 #undef set_ctx
1986     }
1987     if (!b->skip) {
1988         uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1];
1989         const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
1990         const int bx_idx = (bx4 & 16) >> 4;
1991         for (int y = 0; y < bh4; y += 2, noskip_mask++) {
1992             (*noskip_mask)[bx_idx] |= mask;
1993             if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
1994                 (*noskip_mask)[1] |= mask;
1995         }
1996     }
1997 
1998     return 0;
1999 }
2000 
2001 #if __has_feature(memory_sanitizer)
2002 
2003 #include <sanitizer/msan_interface.h>
2004 
checked_decode_b(Dav1dTileContext * const t,const enum BlockLevel bl,const enum BlockSize bs,const enum BlockPartition bp,const enum EdgeFlags intra_edge_flags)2005 static int checked_decode_b(Dav1dTileContext *const t,
2006                             const enum BlockLevel bl,
2007                             const enum BlockSize bs,
2008                             const enum BlockPartition bp,
2009                             const enum EdgeFlags intra_edge_flags)
2010 {
2011     const Dav1dFrameContext *const f = t->f;
2012     const int err = decode_b(t, bl, bs, bp, intra_edge_flags);
2013 
2014     if (err == 0 && !(f->frame_thread.pass & 1)) {
2015         const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2016         const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2017         const uint8_t *const b_dim = dav1d_block_dimensions[bs];
2018         const int bw4 = b_dim[0], bh4 = b_dim[1];
2019         const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
2020         const int has_chroma = f->seq_hdr->layout != DAV1D_PIXEL_LAYOUT_I400 &&
2021                                (bw4 > ss_hor || t->bx & 1) &&
2022                                (bh4 > ss_ver || t->by & 1);
2023 
2024         for (int p = 0; p < 1 + 2 * has_chroma; p++) {
2025             const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2026             const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2027             const ptrdiff_t stride = f->cur.stride[!!p];
2028             const int bx = t->bx & ~ss_hor;
2029             const int by = t->by & ~ss_ver;
2030             const int width  = w4 << (2 - ss_hor + (bw4 == ss_hor));
2031             const int height = h4 << (2 - ss_ver + (bh4 == ss_ver));
2032 
2033             const uint8_t *data = f->cur.data[p] + (by << (2 - ss_ver)) * stride +
2034                                   (bx << (2 - ss_hor + !!f->seq_hdr->hbd));
2035 
2036             for (int y = 0; y < height; data += stride, y++) {
2037                 const size_t line_sz = width << !!f->seq_hdr->hbd;
2038                 if (__msan_test_shadow(data, line_sz) != -1) {
2039                     fprintf(stderr, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n",
2040                             p, bx, by, w4, h4, y);
2041                     __msan_check_mem_is_initialized(data, line_sz);
2042                 }
2043             }
2044         }
2045     }
2046 
2047     return err;
2048 }
2049 
2050 #define decode_b checked_decode_b
2051 
2052 #endif /* defined(__has_feature) */
2053 
decode_sb(Dav1dTileContext * const t,const enum BlockLevel bl,const EdgeNode * const node)2054 static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl,
2055                      const EdgeNode *const node)
2056 {
2057     const Dav1dFrameContext *const f = t->f;
2058     const int hsz = 16 >> bl;
2059     const int have_h_split = f->bw > t->bx + hsz;
2060     const int have_v_split = f->bh > t->by + hsz;
2061 
2062     if (!have_h_split && !have_v_split) {
2063         assert(bl < BL_8X8);
2064         return decode_sb(t, bl + 1, ((const EdgeBranch *) node)->split[0]);
2065     }
2066 
2067     uint16_t *pc;
2068     enum BlockPartition bp;
2069     int ctx, bx8, by8;
2070     if (f->frame_thread.pass != 2) {
2071         if (0 && bl == BL_64X64)
2072             printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
2073                    f->frame_hdr->frame_offset, t->by, t->bx, bl, t->ts->msac.rng);
2074         bx8 = (t->bx & 31) >> 1;
2075         by8 = (t->by & 31) >> 1;
2076         ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
2077         pc = t->ts->cdf.m.partition[bl][ctx];
2078     }
2079 
2080     if (have_h_split && have_v_split) {
2081         if (f->frame_thread.pass == 2) {
2082             const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2083             bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
2084         } else {
2085             bp = dav1d_msac_decode_symbol_adapt16(&t->ts->msac, pc,
2086                                                   dav1d_partition_type_count[bl]);
2087             if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
2088                 (bp == PARTITION_V || bp == PARTITION_V4 ||
2089                  bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
2090             {
2091                 return 1;
2092             }
2093             if (DEBUG_BLOCK_INFO)
2094                 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2095                        f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp,
2096                        t->ts->msac.rng);
2097         }
2098         const uint8_t *const b = dav1d_block_sizes[bl][bp];
2099 
2100         switch (bp) {
2101         case PARTITION_NONE:
2102             if (decode_b(t, bl, b[0], PARTITION_NONE, node->o))
2103                 return -1;
2104             break;
2105         case PARTITION_H:
2106             if (decode_b(t, bl, b[0], PARTITION_H, node->h[0]))
2107                 return -1;
2108             t->by += hsz;
2109             if (decode_b(t, bl, b[0], PARTITION_H, node->h[1]))
2110                 return -1;
2111             t->by -= hsz;
2112             break;
2113         case PARTITION_V:
2114             if (decode_b(t, bl, b[0], PARTITION_V, node->v[0]))
2115                 return -1;
2116             t->bx += hsz;
2117             if (decode_b(t, bl, b[0], PARTITION_V, node->v[1]))
2118                 return -1;
2119             t->bx -= hsz;
2120             break;
2121         case PARTITION_SPLIT:
2122             if (bl == BL_8X8) {
2123                 const EdgeTip *const tip = (const EdgeTip *) node;
2124                 assert(hsz == 1);
2125                 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
2126                     return -1;
2127                 const enum Filter2d tl_filter = t->tl_4x4_filter;
2128                 t->bx++;
2129                 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
2130                     return -1;
2131                 t->bx--;
2132                 t->by++;
2133                 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
2134                     return -1;
2135                 t->bx++;
2136                 t->tl_4x4_filter = tl_filter;
2137                 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[3]))
2138                     return -1;
2139                 t->bx--;
2140                 t->by--;
2141             } else {
2142                 const EdgeBranch *const branch = (const EdgeBranch *) node;
2143                 if (decode_sb(t, bl + 1, branch->split[0]))
2144                     return 1;
2145                 t->bx += hsz;
2146                 if (decode_sb(t, bl + 1, branch->split[1]))
2147                     return 1;
2148                 t->bx -= hsz;
2149                 t->by += hsz;
2150                 if (decode_sb(t, bl + 1, branch->split[2]))
2151                     return 1;
2152                 t->bx += hsz;
2153                 if (decode_sb(t, bl + 1, branch->split[3]))
2154                     return 1;
2155                 t->bx -= hsz;
2156                 t->by -= hsz;
2157             }
2158             break;
2159         case PARTITION_T_TOP_SPLIT: {
2160             const EdgeBranch *const branch = (const EdgeBranch *) node;
2161             if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[0]))
2162                 return -1;
2163             t->bx += hsz;
2164             if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[1]))
2165                 return -1;
2166             t->bx -= hsz;
2167             t->by += hsz;
2168             if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, branch->tts[2]))
2169                 return -1;
2170             t->by -= hsz;
2171             break;
2172         }
2173         case PARTITION_T_BOTTOM_SPLIT: {
2174             const EdgeBranch *const branch = (const EdgeBranch *) node;
2175             if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, branch->tbs[0]))
2176                 return -1;
2177             t->by += hsz;
2178             if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[1]))
2179                 return -1;
2180             t->bx += hsz;
2181             if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[2]))
2182                 return -1;
2183             t->bx -= hsz;
2184             t->by -= hsz;
2185             break;
2186         }
2187         case PARTITION_T_LEFT_SPLIT: {
2188             const EdgeBranch *const branch = (const EdgeBranch *) node;
2189             if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[0]))
2190                 return -1;
2191             t->by += hsz;
2192             if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[1]))
2193                 return -1;
2194             t->by -= hsz;
2195             t->bx += hsz;
2196             if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, branch->tls[2]))
2197                 return -1;
2198             t->bx -= hsz;
2199             break;
2200         }
2201         case PARTITION_T_RIGHT_SPLIT: {
2202             const EdgeBranch *const branch = (const EdgeBranch *) node;
2203             if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, branch->trs[0]))
2204                 return -1;
2205             t->bx += hsz;
2206             if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[1]))
2207                 return -1;
2208             t->by += hsz;
2209             if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[2]))
2210                 return -1;
2211             t->by -= hsz;
2212             t->bx -= hsz;
2213             break;
2214         }
2215         case PARTITION_H4: {
2216             const EdgeBranch *const branch = (const EdgeBranch *) node;
2217             if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[0]))
2218                 return -1;
2219             t->by += hsz >> 1;
2220             if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[1]))
2221                 return -1;
2222             t->by += hsz >> 1;
2223             if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[2]))
2224                 return -1;
2225             t->by += hsz >> 1;
2226             if (t->by < f->bh)
2227                 if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[3]))
2228                     return -1;
2229             t->by -= hsz * 3 >> 1;
2230             break;
2231         }
2232         case PARTITION_V4: {
2233             const EdgeBranch *const branch = (const EdgeBranch *) node;
2234             if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[0]))
2235                 return -1;
2236             t->bx += hsz >> 1;
2237             if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[1]))
2238                 return -1;
2239             t->bx += hsz >> 1;
2240             if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[2]))
2241                 return -1;
2242             t->bx += hsz >> 1;
2243             if (t->bx < f->bw)
2244                 if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[3]))
2245                     return -1;
2246             t->bx -= hsz * 3 >> 1;
2247             break;
2248         }
2249         default: assert(0);
2250         }
2251     } else if (have_h_split) {
2252         unsigned is_split;
2253         if (f->frame_thread.pass == 2) {
2254             const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2255             is_split = b->bl != bl;
2256         } else {
2257             is_split = dav1d_msac_decode_bool(&t->ts->msac,
2258                            gather_top_partition_prob(pc, bl));
2259             if (DEBUG_BLOCK_INFO)
2260                 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2261                        f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
2262                        is_split ? PARTITION_SPLIT : PARTITION_H, t->ts->msac.rng);
2263         }
2264 
2265         assert(bl < BL_8X8);
2266         if (is_split) {
2267             const EdgeBranch *const branch = (const EdgeBranch *) node;
2268             bp = PARTITION_SPLIT;
2269             if (decode_sb(t, bl + 1, branch->split[0])) return 1;
2270             t->bx += hsz;
2271             if (decode_sb(t, bl + 1, branch->split[1])) return 1;
2272             t->bx -= hsz;
2273         } else {
2274             bp = PARTITION_H;
2275             if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0],
2276                          PARTITION_H, node->h[0]))
2277                 return -1;
2278         }
2279     } else {
2280         assert(have_v_split);
2281         unsigned is_split;
2282         if (f->frame_thread.pass == 2) {
2283             const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2284             is_split = b->bl != bl;
2285         } else {
2286             is_split = dav1d_msac_decode_bool(&t->ts->msac,
2287                            gather_left_partition_prob(pc, bl));
2288             if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
2289                 return 1;
2290             if (DEBUG_BLOCK_INFO)
2291                 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2292                        f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
2293                        is_split ? PARTITION_SPLIT : PARTITION_V, t->ts->msac.rng);
2294         }
2295 
2296         assert(bl < BL_8X8);
2297         if (is_split) {
2298             const EdgeBranch *const branch = (const EdgeBranch *) node;
2299             bp = PARTITION_SPLIT;
2300             if (decode_sb(t, bl + 1, branch->split[0])) return 1;
2301             t->by += hsz;
2302             if (decode_sb(t, bl + 1, branch->split[2])) return 1;
2303             t->by -= hsz;
2304         } else {
2305             bp = PARTITION_V;
2306             if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0],
2307                          PARTITION_V, node->v[0]))
2308                 return -1;
2309         }
2310     }
2311 
2312     if (f->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
2313 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
2314         rep_macro(type, t->a->partition, bx8, mul * dav1d_al_part_ctx[0][bl][bp]); \
2315         rep_macro(type, t->l.partition, by8, mul * dav1d_al_part_ctx[1][bl][bp])
2316         case_set_upto16(hsz,,,);
2317 #undef set_ctx
2318     }
2319 
2320     return 0;
2321 }
2322 
reset_context(BlockContext * const ctx,const int keyframe,const int pass)2323 static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
2324     memset(ctx->intra, keyframe, sizeof(ctx->intra));
2325     memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
2326     if (keyframe)
2327         memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
2328 
2329     if (pass == 2) return;
2330 
2331     memset(ctx->partition, 0, sizeof(ctx->partition));
2332     memset(ctx->skip, 0, sizeof(ctx->skip));
2333     memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
2334     memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
2335     memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
2336     memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
2337     memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
2338     if (!keyframe) {
2339         memset(ctx->ref, -1, sizeof(ctx->ref));
2340         memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
2341         memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
2342     }
2343     memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
2344     memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
2345     memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
2346     memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
2347     memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
2348 }
2349 
2350 // { Y+U+V, Y+U } * 4
2351 static const uint8_t ss_size_mul[4][2] = {
2352     [DAV1D_PIXEL_LAYOUT_I400] = {  4, 4 },
2353     [DAV1D_PIXEL_LAYOUT_I420] = {  6, 5 },
2354     [DAV1D_PIXEL_LAYOUT_I422] = {  8, 6 },
2355     [DAV1D_PIXEL_LAYOUT_I444] = { 12, 8 },
2356 };
2357 
setup_tile(Dav1dTileState * const ts,const Dav1dFrameContext * const f,const uint8_t * const data,const size_t sz,const int tile_row,const int tile_col,const int tile_start_off)2358 static void setup_tile(Dav1dTileState *const ts,
2359                        const Dav1dFrameContext *const f,
2360                        const uint8_t *const data, const size_t sz,
2361                        const int tile_row, const int tile_col,
2362                        const int tile_start_off)
2363 {
2364     const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
2365     const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
2366     const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1];
2367     const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row];
2368     const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1];
2369     const int sb_shift = f->sb_shift;
2370 
2371     const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
2372     ts->frame_thread.pal_idx = f->frame_thread.pal_idx ?
2373         &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 4] :
2374         NULL;
2375 
2376     ts->frame_thread.cf = f->frame_thread.cf ?
2377         (uint8_t*)f->frame_thread.cf +
2378             (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
2379         NULL;
2380 
2381     dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
2382     ts->last_qidx = f->frame_hdr->quant.yac;
2383     memset(ts->last_delta_lf, 0, sizeof(ts->last_delta_lf));
2384 
2385     dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
2386 
2387     ts->tiling.row = tile_row;
2388     ts->tiling.col = tile_col;
2389     ts->tiling.col_start = col_sb_start << sb_shift;
2390     ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
2391     ts->tiling.row_start = row_sb_start << sb_shift;
2392     ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
2393 
2394     // Reference Restoration Unit (used for exp coding)
2395     int sb_idx, unit_idx;
2396     if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2397         // vertical components only
2398         sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
2399         unit_idx = (ts->tiling.row_start & 16) >> 3;
2400     } else {
2401         sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
2402         unit_idx = ((ts->tiling.row_start & 16) >> 3) +
2403                    ((ts->tiling.col_start & 16) >> 4);
2404     }
2405     for (int p = 0; p < 3; p++) {
2406         if (!((f->lf.restore_planes >> p) & 1U))
2407             continue;
2408 
2409         if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2410             const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2411             const int d = f->frame_hdr->super_res.width_scale_denominator;
2412             const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
2413             const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
2414             const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
2415             const int px_x = x << (unit_size_log2 + ss_hor);
2416             const int u_idx = unit_idx + ((px_x & 64) >> 6);
2417             const int sb128x = px_x >> 7;
2418             if (sb128x >= f->sr_sb128w) continue;
2419             ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx];
2420         } else {
2421             ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2422         }
2423 
2424         ts->lr_ref[p]->filter_v[0] = 3;
2425         ts->lr_ref[p]->filter_v[1] = -7;
2426         ts->lr_ref[p]->filter_v[2] = 15;
2427         ts->lr_ref[p]->filter_h[0] = 3;
2428         ts->lr_ref[p]->filter_h[1] = -7;
2429         ts->lr_ref[p]->filter_h[2] = 15;
2430         ts->lr_ref[p]->sgr_weights[0] = -32;
2431         ts->lr_ref[p]->sgr_weights[1] = 31;
2432     }
2433 
2434     if (f->n_tc > 1)
2435         atomic_init(&ts->progress, row_sb_start);
2436 }
2437 
read_restoration_info(Dav1dTileContext * const t,Av1RestorationUnit * const lr,const int p,const enum Dav1dRestorationType frame_type)2438 static void read_restoration_info(Dav1dTileContext *const t,
2439                                   Av1RestorationUnit *const lr, const int p,
2440                                   const enum Dav1dRestorationType frame_type)
2441 {
2442     const Dav1dFrameContext *const f = t->f;
2443     Dav1dTileState *const ts = t->ts;
2444 
2445     if (frame_type == DAV1D_RESTORATION_SWITCHABLE) {
2446         const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac,
2447                                ts->cdf.m.restore_switchable, 2);
2448         lr->type = filter ? filter == 2 ? DAV1D_RESTORATION_SGRPROJ :
2449                                           DAV1D_RESTORATION_WIENER :
2450                                           DAV1D_RESTORATION_NONE;
2451     } else {
2452         const unsigned type =
2453             dav1d_msac_decode_bool_adapt(&ts->msac,
2454                 frame_type == DAV1D_RESTORATION_WIENER ?
2455                 ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj);
2456         lr->type = type ? frame_type : DAV1D_RESTORATION_NONE;
2457     }
2458 
2459     if (lr->type == DAV1D_RESTORATION_WIENER) {
2460         lr->filter_v[0] = p ? 0 :
2461             dav1d_msac_decode_subexp(&ts->msac,
2462                 ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5;
2463         lr->filter_v[1] =
2464             dav1d_msac_decode_subexp(&ts->msac,
2465                 ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23;
2466         lr->filter_v[2] =
2467             dav1d_msac_decode_subexp(&ts->msac,
2468                 ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17;
2469 
2470         lr->filter_h[0] = p ? 0 :
2471             dav1d_msac_decode_subexp(&ts->msac,
2472                 ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5;
2473         lr->filter_h[1] =
2474             dav1d_msac_decode_subexp(&ts->msac,
2475                 ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23;
2476         lr->filter_h[2] =
2477             dav1d_msac_decode_subexp(&ts->msac,
2478                 ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17;
2479         memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
2480         ts->lr_ref[p] = lr;
2481         if (DEBUG_BLOCK_INFO)
2482             printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
2483                    p, lr->filter_v[0], lr->filter_v[1],
2484                    lr->filter_v[2], lr->filter_h[0],
2485                    lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
2486     } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) {
2487         const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4);
2488         const uint16_t *const sgr_params = dav1d_sgr_params[idx];
2489         lr->sgr_idx = idx;
2490         lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac,
2491             ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0;
2492         lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac,
2493             ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95;
2494         memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
2495         memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
2496         ts->lr_ref[p] = lr;
2497         if (DEBUG_BLOCK_INFO)
2498             printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
2499                    p, lr->sgr_idx, lr->sgr_weights[0],
2500                    lr->sgr_weights[1], ts->msac.rng);
2501     }
2502 }
2503 
dav1d_decode_tile_sbrow(Dav1dTileContext * const t)2504 int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {
2505     const Dav1dFrameContext *const f = t->f;
2506     const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64;
2507     Dav1dTileState *const ts = t->ts;
2508     const Dav1dContext *const c = f->c;
2509     const int sb_step = f->sb_step;
2510     const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
2511     const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
2512     const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
2513 
2514     if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
2515         dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start,
2516                                      ts->tiling.col_end, ts->tiling.row_start,
2517                                      ts->tiling.row_end, t->by >> f->sb_shift,
2518                                      ts->tiling.row);
2519     }
2520 
2521     reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), f->frame_thread.pass);
2522     if (f->frame_thread.pass == 2) {
2523         for (t->bx = ts->tiling.col_start,
2524              t->a = f->a + col_sb128_start + tile_row * f->sb128w;
2525              t->bx < ts->tiling.col_end; t->bx += sb_step)
2526         {
2527             if (atomic_load_explicit(c->flush, memory_order_acquire))
2528                 return 1;
2529             if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
2530                 return 1;
2531             if (t->bx & 16 || f->seq_hdr->sb128)
2532                 t->a++;
2533         }
2534         f->bd_fn.backup_ipred_edge(t);
2535         return 0;
2536     }
2537 
2538     // error out on symbol decoder overread
2539     if (ts->msac.cnt < -15) return 1;
2540 
2541     if (f->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) {
2542         if (c->n_fc > 1) for (int n = 0; n < 7; n++)
2543             if (dav1d_thread_picture_wait(&f->refp[n], 4 * (t->by + sb_step),
2544                                           PLANE_TYPE_BLOCK))
2545             {
2546                 return 1;
2547             }
2548         dav1d_refmvs_load_tmvs(&f->rf, ts->tiling.row,
2549                                ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
2550                                t->by >> 1, (t->by + sb_step) >> 1);
2551     }
2552     memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
2553     const int sb128y = t->by >> 5;
2554     for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
2555          t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
2556          t->bx < ts->tiling.col_end; t->bx += sb_step)
2557     {
2558         if (atomic_load_explicit(c->flush, memory_order_acquire))
2559             return 1;
2560         if (root_bl == BL_128X128) {
2561             t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
2562             t->cur_sb_cdef_idx_ptr[0] = -1;
2563             t->cur_sb_cdef_idx_ptr[1] = -1;
2564             t->cur_sb_cdef_idx_ptr[2] = -1;
2565             t->cur_sb_cdef_idx_ptr[3] = -1;
2566         } else {
2567             t->cur_sb_cdef_idx_ptr =
2568                 &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
2569                                       ((t->by & 16) >> 3)];
2570             t->cur_sb_cdef_idx_ptr[0] = -1;
2571         }
2572         // Restoration filter
2573         for (int p = 0; p < 3; p++) {
2574             if (!((f->lf.restore_planes >> p) & 1U))
2575                 continue;
2576 
2577             const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2578             const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2579             const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
2580             const int y = t->by * 4 >> ss_ver;
2581             const int h = (f->cur.p.h + ss_ver) >> ss_ver;
2582 
2583             const int unit_size = 1 << unit_size_log2;
2584             const unsigned mask = unit_size - 1;
2585             if (y & mask) continue;
2586             const int half_unit = unit_size >> 1;
2587             // Round half up at frame boundaries, if there's more than one
2588             // restoration unit
2589             if (y && y + half_unit > h) continue;
2590 
2591             const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p];
2592 
2593             if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2594                 const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
2595                 const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
2596 
2597                 const int d = f->frame_hdr->super_res.width_scale_denominator;
2598                 const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
2599                 const int x0 = ((4 *  t->bx            * d >> ss_hor) + rnd) >> shift;
2600                 const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
2601 
2602                 for (int x = x0; x < imin(x1, n_units); x++) {
2603                     const int px_x = x << (unit_size_log2 + ss_hor);
2604                     const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
2605                     const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
2606                     Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2607 
2608                     read_restoration_info(t, lr, p, frame_type);
2609                 }
2610             } else {
2611                 const int x = 4 * t->bx >> ss_hor;
2612                 if (x & mask) continue;
2613                 const int w = (f->cur.p.w + ss_hor) >> ss_hor;
2614                 // Round half up at frame boundaries, if there's more than one
2615                 // restoration unit
2616                 if (x && x + half_unit > w) continue;
2617                 const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
2618                 const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
2619                 Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2620 
2621                 read_restoration_info(t, lr, p, frame_type);
2622             }
2623         }
2624         if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
2625             return 1;
2626         if (t->bx & 16 || f->seq_hdr->sb128) {
2627             t->a++;
2628             t->lf_mask++;
2629         }
2630     }
2631 
2632     if (f->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
2633         dav1d_refmvs_save_tmvs(&t->rt,
2634                                ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
2635                                t->by >> 1, (t->by + sb_step) >> 1);
2636     }
2637 
2638     // backup pre-loopfilter pixels for intra prediction of the next sbrow
2639     if (f->frame_thread.pass != 1)
2640         f->bd_fn.backup_ipred_edge(t);
2641 
2642     // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
2643     // up the initial value in neighbour tiles when running the loopfilter
2644     int align_h = (f->bh + 31) & ~31;
2645     memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
2646            &t->l.tx_lpf_y[t->by & 16], sb_step);
2647     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2648     align_h >>= ss_ver;
2649     memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
2650            &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
2651 
2652     return 0;
2653 }
2654 
dav1d_decode_frame(Dav1dFrameContext * const f)2655 int dav1d_decode_frame(Dav1dFrameContext *const f) {
2656     const Dav1dContext *const c = f->c;
2657     int retval = DAV1D_ERR(ENOMEM);
2658 
2659     if (f->n_tc > 1) {
2660         const int titsati_sz = f->frame_hdr->tiling.cols * f->sbh;
2661         if (titsati_sz != f->tile_thread.titsati_sz) {
2662             freep(&f->tile_thread.task_idx_to_sby_and_tile_idx);
2663             f->tile_thread.task_idx_to_sby_and_tile_idx =
2664                 malloc(sizeof(*f->tile_thread.task_idx_to_sby_and_tile_idx) *
2665                        titsati_sz);
2666             if (!f->tile_thread.task_idx_to_sby_and_tile_idx) {
2667                 f->tile_thread.titsati_sz = 0;
2668                 goto error;
2669             }
2670             f->tile_thread.titsati_sz = titsati_sz;
2671         }
2672         if (f->tile_thread.titsati_init[0] != f->frame_hdr->tiling.cols ||
2673             f->tile_thread.titsati_init[1] != f->frame_hdr->tiling.rows ||
2674             memcmp(f->frame_hdr->tiling.row_start_sb, f->tile_thread.titsati_index_rows,
2675                    sizeof(*f->tile_thread.titsati_index_rows) *
2676                        (f->frame_hdr->tiling.rows + 1)))
2677         {
2678             for (int tile_row = 0, task_idx = 0;
2679                  tile_row < f->frame_hdr->tiling.rows; tile_row++)
2680             {
2681                 for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
2682                      sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1]; sby++)
2683                 {
2684                     for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols;
2685                          tile_col++, task_idx++)
2686                     {
2687                         f->tile_thread.task_idx_to_sby_and_tile_idx[task_idx][0] = sby;
2688                         f->tile_thread.task_idx_to_sby_and_tile_idx[task_idx][1] =
2689                             tile_row * f->frame_hdr->tiling.cols + tile_col;
2690                     }
2691                 }
2692             }
2693             f->tile_thread.titsati_init[0] = f->frame_hdr->tiling.cols;
2694             f->tile_thread.titsati_init[1] = f->frame_hdr->tiling.rows;
2695             memcpy(f->tile_thread.titsati_index_rows, f->frame_hdr->tiling.row_start_sb,
2696                    sizeof(*f->tile_thread.titsati_index_rows) *
2697                        (f->frame_hdr->tiling.rows + 1));
2698         }
2699     }
2700 
2701     const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
2702     if (n_ts != f->n_ts) {
2703         if (c->n_fc > 1) {
2704             freep(&f->frame_thread.tile_start_off);
2705             f->frame_thread.tile_start_off =
2706                 malloc(sizeof(*f->frame_thread.tile_start_off) * n_ts);
2707             if (!f->frame_thread.tile_start_off) {
2708                 for (int n = 0; n < f->n_ts; n++) {
2709                     Dav1dTileState *const ts = &f->ts[n];
2710                     pthread_cond_destroy(&ts->tile_thread.cond);
2711                     pthread_mutex_destroy(&ts->tile_thread.lock);
2712                 }
2713                 f->n_ts = 0;
2714                 goto error;
2715             }
2716         }
2717         Dav1dTileState *ts_new = dav1d_alloc_aligned(sizeof(*f->ts) * n_ts, 32);
2718         if (!ts_new) goto error;
2719         if (n_ts > f->n_ts) {
2720             if (f->ts) {
2721                 memcpy(ts_new, f->ts, sizeof(*f->ts) * f->n_ts);
2722                 dav1d_free_aligned(f->ts);
2723             }
2724             f->ts = ts_new;
2725             for (int n = f->n_ts; n < n_ts; f->n_ts = ++n) {
2726                 Dav1dTileState *const ts = &f->ts[n];
2727                 if (pthread_mutex_init(&ts->tile_thread.lock, NULL)) goto error;
2728                 if (pthread_cond_init(&ts->tile_thread.cond, NULL)) {
2729                     pthread_mutex_destroy(&ts->tile_thread.lock);
2730                     goto error;
2731                 }
2732             }
2733         } else {
2734             for (int n = n_ts; n < f->n_ts; n++) {
2735                 Dav1dTileState *const ts = &f->ts[n];
2736                 pthread_cond_destroy(&ts->tile_thread.cond);
2737                 pthread_mutex_destroy(&ts->tile_thread.lock);
2738             }
2739             memcpy(ts_new, f->ts, sizeof(*f->ts) * n_ts);
2740             dav1d_free_aligned(f->ts);
2741             f->n_ts = n_ts;
2742             f->ts = ts_new;
2743         }
2744     }
2745 
2746     const int a_sz = f->sb128w * f->frame_hdr->tiling.rows;
2747     if (a_sz != f->a_sz) {
2748         freep(&f->a);
2749         f->a = malloc(sizeof(*f->a) * a_sz);
2750         if (!f->a) {
2751             f->a_sz = 0;
2752             goto error;
2753         }
2754         f->a_sz = a_sz;
2755     }
2756 
2757     const int num_sb128 = f->sb128w * f->sb128h;
2758     const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
2759     const int hbd = !!f->seq_hdr->hbd;
2760     if (c->n_fc > 1) {
2761         int tile_idx = 0;
2762         for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
2763             int row_off = f->frame_hdr->tiling.row_start_sb[tile_row] *
2764                           f->sb_step * 4 * f->sb128w * 128;
2765             int b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
2766                           f->frame_hdr->tiling.row_start_sb[tile_row]) * f->sb_step * 4;
2767             for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
2768                 f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
2769                     f->frame_hdr->tiling.col_start_sb[tile_col] * f->sb_step * 4;
2770             }
2771         }
2772 
2773         const int cf_sz = (num_sb128 * size_mul[0]) << hbd;
2774         if (cf_sz != f->frame_thread.cf_sz) {
2775             dav1d_freep_aligned(&f->frame_thread.cf);
2776             f->frame_thread.cf =
2777                 dav1d_alloc_aligned((size_t)cf_sz * 128 * 128 / 2, 32);
2778             if (!f->frame_thread.cf) {
2779                 f->frame_thread.cf_sz = 0;
2780                 goto error;
2781             }
2782             memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2);
2783             f->frame_thread.cf_sz = cf_sz;
2784         }
2785 
2786         if (f->frame_hdr->allow_screen_content_tools) {
2787             if (num_sb128 != f->frame_thread.pal_sz) {
2788                 dav1d_freep_aligned(&f->frame_thread.pal);
2789                 f->frame_thread.pal =
2790                     dav1d_alloc_aligned(sizeof(*f->frame_thread.pal) *
2791                                         num_sb128 * 16 * 16, 32);
2792                 if (!f->frame_thread.pal) {
2793                     f->frame_thread.pal_sz = 0;
2794                     goto error;
2795                 }
2796                 f->frame_thread.pal_sz = num_sb128;
2797             }
2798 
2799             const int pal_idx_sz = num_sb128 * size_mul[1];
2800             if (pal_idx_sz != f->frame_thread.pal_idx_sz) {
2801                 dav1d_freep_aligned(&f->frame_thread.pal_idx);
2802                 f->frame_thread.pal_idx =
2803                     dav1d_alloc_aligned(sizeof(*f->frame_thread.pal_idx) *
2804                                         pal_idx_sz * 128 * 128 / 4, 32);
2805                 if (!f->frame_thread.pal_idx) {
2806                     f->frame_thread.pal_idx_sz = 0;
2807                     goto error;
2808                 }
2809                 f->frame_thread.pal_idx_sz = pal_idx_sz;
2810             }
2811         } else if (f->frame_thread.pal) {
2812             dav1d_freep_aligned(&f->frame_thread.pal);
2813             dav1d_freep_aligned(&f->frame_thread.pal_idx);
2814             f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0;
2815         }
2816     }
2817 
2818     // update allocation of block contexts for above
2819     const ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1];
2820     if (y_stride != f->lf.cdef_line_sz[0] || uv_stride != f->lf.cdef_line_sz[1]) {
2821         dav1d_free_aligned(f->lf.cdef_line_buf);
2822         size_t alloc_sz = 64;
2823         alloc_sz += (y_stride  < 0 ? -y_stride  : y_stride ) * 4;
2824         alloc_sz += (uv_stride < 0 ? -uv_stride : uv_stride) * 8;
2825         uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(alloc_sz, 32);
2826         if (!ptr) {
2827             f->lf.cdef_line_sz[0] = f->lf.cdef_line_sz[1] = 0;
2828             goto error;
2829         }
2830 
2831         ptr += 32;
2832         if (y_stride < 0) {
2833             f->lf.cdef_line[0][0] = ptr - y_stride * 1;
2834             f->lf.cdef_line[1][0] = ptr - y_stride * 3;
2835             ptr -= y_stride * 4;
2836         } else {
2837             f->lf.cdef_line[0][0] = ptr + y_stride * 0;
2838             f->lf.cdef_line[1][0] = ptr + y_stride * 2;
2839             ptr += y_stride * 4;
2840         }
2841         if (uv_stride < 0) {
2842             f->lf.cdef_line[0][1] = ptr - uv_stride * 1;
2843             f->lf.cdef_line[0][2] = ptr - uv_stride * 3;
2844             f->lf.cdef_line[1][1] = ptr - uv_stride * 5;
2845             f->lf.cdef_line[1][2] = ptr - uv_stride * 7;
2846         } else {
2847             f->lf.cdef_line[0][1] = ptr + uv_stride * 0;
2848             f->lf.cdef_line[0][2] = ptr + uv_stride * 2;
2849             f->lf.cdef_line[1][1] = ptr + uv_stride * 4;
2850             f->lf.cdef_line[1][2] = ptr + uv_stride * 6;
2851         }
2852 
2853         f->lf.cdef_line_sz[0] = (int) y_stride;
2854         f->lf.cdef_line_sz[1] = (int) uv_stride;
2855     }
2856 
2857     const int lr_line_sz = ((f->sr_cur.p.p.w + 31) & ~31) << hbd;
2858     if (lr_line_sz != f->lf.lr_line_sz) {
2859         dav1d_freep_aligned(&f->lf.lr_lpf_line[0]);
2860         const int num_lines = c->n_pfc > 1 ? f->sbh * (4 << f->seq_hdr->sb128) : 12;
2861         // lr simd may overread the input, so slightly over-allocate the lpf buffer
2862         uint8_t *lr_ptr = dav1d_alloc_aligned(lr_line_sz * num_lines * 3 + 64, 32);
2863         if (!lr_ptr) {
2864             f->lf.lr_line_sz = 0;
2865             goto error;
2866         }
2867 
2868         for (int pl = 0; pl <= 2; pl++) {
2869             f->lf.lr_lpf_line[pl] = lr_ptr;
2870             lr_ptr += lr_line_sz * num_lines;
2871         }
2872 
2873         f->lf.lr_line_sz = lr_line_sz;
2874     }
2875 
2876     // update allocation for loopfilter masks
2877     if (num_sb128 != f->lf.mask_sz) {
2878         freep(&f->lf.mask);
2879         freep(&f->lf.level);
2880         f->lf.mask = malloc(sizeof(*f->lf.mask) * num_sb128);
2881         // over-allocate by 3 bytes since some of the SIMD implementations
2882         // index this from the level type and can thus over-read by up to 3
2883         f->lf.level = malloc(sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3);
2884         if (!f->lf.mask || !f->lf.level) {
2885             f->lf.mask_sz = 0;
2886             goto error;
2887         }
2888         if (c->n_fc > 1) {
2889             freep(&f->frame_thread.b);
2890             freep(&f->frame_thread.cbi);
2891             f->frame_thread.b = malloc(sizeof(*f->frame_thread.b) *
2892                                        num_sb128 * 32 * 32);
2893             f->frame_thread.cbi = malloc(sizeof(*f->frame_thread.cbi) *
2894                                          num_sb128 * 32 * 32);
2895             if (!f->frame_thread.b || !f->frame_thread.cbi) {
2896                 f->lf.mask_sz = 0;
2897                 goto error;
2898             }
2899         }
2900         f->lf.mask_sz = num_sb128;
2901     }
2902 
2903     f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
2904     const int lr_mask_sz = f->sr_sb128w * f->sb128h;
2905     if (lr_mask_sz != f->lf.lr_mask_sz) {
2906         freep(&f->lf.lr_mask);
2907         f->lf.lr_mask = malloc(sizeof(*f->lf.lr_mask) * lr_mask_sz);
2908         if (!f->lf.lr_mask) {
2909             f->lf.lr_mask_sz = 0;
2910             goto error;
2911         }
2912         f->lf.lr_mask_sz = lr_mask_sz;
2913     }
2914     f->lf.restore_planes =
2915         ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
2916         ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
2917         ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
2918     if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) {
2919         dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness);
2920         f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness;
2921     }
2922     dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
2923     memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128);
2924 
2925     const int ipred_edge_sz = f->sbh * f->sb128w << hbd;
2926     if (ipred_edge_sz != f->ipred_edge_sz) {
2927         dav1d_freep_aligned(&f->ipred_edge[0]);
2928         uint8_t *ptr = f->ipred_edge[0] =
2929             dav1d_alloc_aligned(ipred_edge_sz * 128 * 3, 32);
2930         if (!ptr) {
2931             f->ipred_edge_sz = 0;
2932             goto error;
2933         }
2934         f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1;
2935         f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2;
2936         f->ipred_edge_sz = ipred_edge_sz;
2937     }
2938 
2939     const int re_sz = f->sb128h * f->frame_hdr->tiling.cols;
2940     if (re_sz != f->lf.re_sz) {
2941         freep(&f->lf.tx_lpf_right_edge[0]);
2942         f->lf.tx_lpf_right_edge[0] = malloc(re_sz * 32 * 2);
2943         if (!f->lf.tx_lpf_right_edge[0]) {
2944             f->lf.re_sz = 0;
2945             goto error;
2946         }
2947         f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32;
2948         f->lf.re_sz = re_sz;
2949     }
2950 
2951     // init ref mvs
2952     if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
2953         const int ret =
2954             dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr,
2955                                     f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs, f->n_tc);
2956         if (ret < 0) goto error;
2957     }
2958 
2959     // create post-filtering tasks
2960     if (c->n_pfc > 1)
2961         if (dav1d_task_create_filter_sbrow(f))
2962             goto error;
2963 
2964     retval = DAV1D_ERR(EINVAL);
2965 
2966     // setup dequant tables
2967     init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq);
2968     if (f->frame_hdr->quant.qm)
2969         for (int i = 0; i < N_RECT_TX_SIZES; i++) {
2970             f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i];
2971             f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i];
2972             f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i];
2973         }
2974     else
2975         memset(f->qm, 0, sizeof(f->qm));
2976 
2977     // setup jnt_comp weights
2978     if (f->frame_hdr->switchable_comp_refs) {
2979         for (int i = 0; i < 7; i++) {
2980             const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset;
2981 
2982             for (int j = i + 1; j < 7; j++) {
2983                 const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset;
2984 
2985                 const unsigned d1 =
2986                     imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc,
2987                                           f->cur.frame_hdr->frame_offset)), 31);
2988                 const unsigned d0 =
2989                     imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc,
2990                                           f->cur.frame_hdr->frame_offset)), 31);
2991                 const int order = d0 <= d1;
2992 
2993                 static const uint8_t quant_dist_weight[3][2] = {
2994                     { 2, 3 }, { 2, 5 }, { 2, 7 }
2995                 };
2996                 static const uint8_t quant_dist_lookup_table[4][2] = {
2997                     { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
2998                 };
2999 
3000                 int k;
3001                 for (k = 0; k < 3; k++) {
3002                     const int c0 = quant_dist_weight[k][order];
3003                     const int c1 = quant_dist_weight[k][!order];
3004                     const int d0_c0 = d0 * c0;
3005                     const int d1_c1 = d1 * c1;
3006                     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
3007                 }
3008 
3009                 f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
3010             }
3011         }
3012     }
3013 
3014     /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
3015      * so just point the chroma pointers in 4:0:0 to the luma plane here to
3016      * avoid having additional in-loop branches in various places. We never
3017      * dereference those pointers so it doesn't really matter what they
3018      * point at, as long as the pointers are valid. */
3019     const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
3020     f->lf.mask_ptr = f->lf.mask;
3021     f->lf.p[0] = f->cur.data[0];
3022     f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0];
3023     f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0];
3024     f->lf.sr_p[0] = f->sr_cur.p.data[0];
3025     f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0];
3026     f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0];
3027     f->lf.tile_row = 1;
3028 
3029     dav1d_cdf_thread_wait(&f->in_cdf);
3030     if (f->frame_hdr->refresh_context)
3031         dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
3032 
3033     // parse individual tiles per tile group
3034     int update_set = 0, tile_row = 0, tile_col = 0;
3035     for (int i = 0; i < f->n_tile_data; i++) {
3036         const uint8_t *data = f->tile[i].data.data;
3037         size_t size = f->tile[i].data.sz;
3038 
3039         for (int j = f->tile[i].start; j <= f->tile[i].end; j++) {
3040             size_t tile_sz;
3041             if (j == f->tile[i].end) {
3042                 tile_sz = size;
3043             } else {
3044                 if (f->frame_hdr->tiling.n_bytes > size) goto error;
3045                 tile_sz = 0;
3046                 for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++)
3047                     tile_sz |= (unsigned)*data++ << (k * 8);
3048                 tile_sz++;
3049                 size -= f->frame_hdr->tiling.n_bytes;
3050                 if (tile_sz > size) goto error;
3051             }
3052 
3053             setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++,
3054                        c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0);
3055 
3056             if (tile_col == f->frame_hdr->tiling.cols) {
3057                 tile_col = 0;
3058                 tile_row++;
3059             }
3060             if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context)
3061                 update_set = 1;
3062             data += tile_sz;
3063             size -= tile_sz;
3064         }
3065     }
3066 
3067     // 2-pass decoding:
3068     // - enabled for frame-threading, so that one frame can do symbol parsing
3069     //   as another (or multiple) are doing reconstruction. One advantage here
3070     //   is that although reconstruction is limited by reference availability,
3071     //   symbol parsing is not. Therefore, symbol parsing can effectively use
3072     //   row and col tile threading, but reconstruction only col tile threading;
3073     // - pass 0 means no 2-pass;
3074     // - pass 1 means symbol parsing only;
3075     // - pass 2 means reconstruction and loop filtering.
3076 
3077     const int uses_2pass = c->n_fc > 1 && f->frame_hdr->refresh_context;
3078     for (f->frame_thread.pass = uses_2pass;
3079          f->frame_thread.pass <= 2 * uses_2pass; f->frame_thread.pass++)
3080     {
3081         const enum PlaneType progress_plane_type =
3082             f->frame_thread.pass == 0 ? PLANE_TYPE_ALL :
3083             f->frame_thread.pass == 1 ? PLANE_TYPE_BLOCK : PLANE_TYPE_Y;
3084 
3085         for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++)
3086             reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), f->frame_thread.pass);
3087 
3088         if (f->n_tc == 1 || (c->n_pfc > 1 && f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows == 1)) {
3089             Dav1dTileContext *const t = f->tc;
3090 
3091             // no tile threading - we explicitly interleave tile/sbrow decoding
3092             // and post-filtering, so that the full process runs in-line, so
3093             // that frame threading is still possible
3094             for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
3095                 const int sbh_end =
3096                     imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh);
3097                 for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
3098                      sby < sbh_end; sby++)
3099                 {
3100                     t->by = sby << (4 + f->seq_hdr->sb128);
3101                     const int by_end = (t->by + f->sb_step) >> 1;
3102                     if (f->frame_thread.pass <= 1 && f->frame_hdr->use_ref_frame_mvs) {
3103                         if (c->n_fc > 1) for (int n = 0; n < 7; n++)
3104                             if (dav1d_thread_picture_wait(&f->refp[n],
3105                                                           4 * (t->by + f->sb_step),
3106                                                           PLANE_TYPE_BLOCK))
3107                             {
3108                                 goto error;
3109                             }
3110                         dav1d_refmvs_load_tmvs(&f->rf, tile_row,
3111                                                0, f->bw >> 1, t->by >> 1, by_end);
3112                     }
3113                     for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
3114                         t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col];
3115                         if (dav1d_decode_tile_sbrow(t)) goto error;
3116                     }
3117                     if (f->frame_thread.pass <= 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
3118                         dav1d_refmvs_save_tmvs(&t->rt, 0, f->bw >> 1, t->by >> 1, by_end);
3119                     }
3120 
3121                     // loopfilter + cdef + restoration
3122                     if (f->frame_thread.pass != 1) {
3123                         if (c->n_pfc == 1)
3124                             f->bd_fn.filter_sbrow(f, sby);
3125                         else {
3126                             pthread_mutex_lock(&f->lf.thread.pftd->lock);
3127                             if (f->lf.thread.npf != 0 && !f->lf.thread.done) {
3128                                 Dav1dTask *const t = &f->lf.thread.tasks[sby * f->lf.thread.npf];
3129                                 t->start = 1;
3130                                 if (t->status == DAV1D_TASK_READY)
3131                                     dav1d_task_schedule(f->lf.thread.pftd, t);
3132                             }
3133                             pthread_mutex_unlock(&f->lf.thread.pftd->lock);
3134                         }
3135                     }
3136                     if (c->n_pfc == 1 || f->frame_thread.pass == 1 || f->lf.thread.npf == 0)
3137                         dav1d_thread_picture_signal(&f->sr_cur,
3138                                                     (sby + 1) * f->sb_step * 4,
3139                                                     progress_plane_type);
3140                 }
3141             }
3142         } else {
3143             // signal available tasks to worker threads
3144             int num_tasks;
3145 
3146             pthread_mutex_lock(&f->tile_thread.lock);
3147             assert(!f->tile_thread.tasks_left);
3148             if (f->frame_thread.pass == 1 || f->n_tc >= f->frame_hdr->tiling.cols) {
3149                 // we can (or in fact, if >, we need to) do full tile decoding.
3150                 // loopfilter happens below
3151                 num_tasks = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
3152             } else {
3153                 // we need to interleave sbrow decoding for all tile cols in a
3154                 // tile row, since otherwise subsequent threads will be blocked
3155                 // waiting for the post-filter to complete
3156                 num_tasks = f->sbh * f->frame_hdr->tiling.cols;
3157             }
3158             f->tile_thread.num_tasks = f->tile_thread.tasks_left = num_tasks;
3159             pthread_cond_broadcast(&f->tile_thread.cond);
3160             pthread_mutex_unlock(&f->tile_thread.lock);
3161 
3162             for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
3163                 for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
3164                      sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1]; sby++)
3165                 {
3166                     for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols;
3167                          tile_col++)
3168                     {
3169                         int progress;
3170                         Dav1dTileState *const ts =
3171                             &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col];
3172 
3173                         if ((progress = atomic_load(&ts->progress)) <= sby) {
3174                             pthread_mutex_lock(&ts->tile_thread.lock);
3175                             while ((progress = atomic_load(&ts->progress)) <= sby)
3176                                 pthread_cond_wait(&ts->tile_thread.cond,
3177                                                   &ts->tile_thread.lock);
3178                             pthread_mutex_unlock(&ts->tile_thread.lock);
3179                         }
3180                         if (progress == TILE_ERROR) {
3181                             dav1d_thread_picture_signal(&f->sr_cur, FRAME_ERROR,
3182                                                         PLANE_TYPE_ALL);
3183                             const uint64_t all_mask = ~0ULL >> (64 - f->n_tc);
3184                             pthread_mutex_lock(&f->tile_thread.lock);
3185                             while (f->tile_thread.available != all_mask)
3186                                 pthread_cond_wait(&f->tile_thread.icond, &f->tile_thread.lock);
3187                             pthread_mutex_unlock(&f->tile_thread.lock);
3188                             goto error;
3189                         }
3190                     }
3191 
3192                     // loopfilter + cdef + restoration
3193                     if (f->frame_thread.pass != 1) {
3194                         if (c->n_pfc == 1)
3195                             f->bd_fn.filter_sbrow(f, sby);
3196                         else {
3197                             pthread_mutex_lock(&f->lf.thread.pftd->lock);
3198                             if (f->lf.thread.npf != 0 && !f->lf.thread.done) {
3199                                 Dav1dTask *const t = &f->lf.thread.tasks[sby * f->lf.thread.npf];
3200                                 t->start = 1;
3201                                 if (t->status == DAV1D_TASK_READY)
3202                                     dav1d_task_schedule(f->lf.thread.pftd, t);
3203                             }
3204                             pthread_mutex_unlock(&f->lf.thread.pftd->lock);
3205                         }
3206                     }
3207                     if (c->n_pfc == 1 || f->frame_thread.pass == 1 || f->lf.thread.npf == 0)
3208                         dav1d_thread_picture_signal(&f->sr_cur,
3209                                                     (sby + 1) * f->sb_step * 4,
3210                                                     progress_plane_type);
3211                 }
3212             }
3213 
3214             const uint64_t all_mask = ~0ULL >> (64 - f->n_tc);
3215             pthread_mutex_lock(&f->tile_thread.lock);
3216             while (f->tile_thread.available != all_mask)
3217                 pthread_cond_wait(&f->tile_thread.icond, &f->tile_thread.lock);
3218             pthread_mutex_unlock(&f->tile_thread.lock);
3219         }
3220 
3221         if (f->frame_thread.pass <= 1 && f->frame_hdr->refresh_context) {
3222             // cdf update
3223             if (update_set)
3224                 dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
3225                                         &f->ts[f->frame_hdr->tiling.update].cdf);
3226             dav1d_cdf_thread_signal(&f->out_cdf);
3227         }
3228         if (f->frame_thread.pass == 1) {
3229             assert(c->n_fc > 1);
3230             for (int tile_idx = 0;
3231                  tile_idx < f->frame_hdr->tiling.rows * f->frame_hdr->tiling.cols;
3232                  tile_idx++)
3233             {
3234                 Dav1dTileState *const ts = &f->ts[tile_idx];
3235                 const size_t tile_start_off =
3236                     (size_t) f->frame_thread.tile_start_off[tile_idx];
3237                 ts->frame_thread.pal_idx = f->frame_thread.pal_idx ?
3238                     &f->frame_thread.pal_idx[tile_start_off * size_mul[1] / 4] :
3239                     NULL;
3240                 ts->frame_thread.cf = f->frame_thread.cf ?
3241                     (uint8_t*)f->frame_thread.cf +
3242                         ((tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
3243                     NULL;
3244                 if (f->n_tc > 0) {
3245                     const unsigned row_sb_start =
3246                         f->frame_hdr->tiling.row_start_sb[ts->tiling.row];
3247                     atomic_init(&ts->progress, row_sb_start);
3248                 }
3249             }
3250         }
3251     }
3252 
3253     retval = 0;
3254 error:
3255     if (c->n_pfc > 1) {
3256         pthread_mutex_lock(&f->lf.thread.pftd->lock);
3257         if (!f->lf.thread.done) {
3258             if (retval != 0) {
3259                 f->lf.thread.done = -1;
3260                 pthread_cond_signal(&f->lf.thread.pftd->cond);
3261             }
3262             pthread_cond_wait(&f->lf.thread.cond, &f->lf.thread.pftd->lock);
3263         }
3264         pthread_mutex_unlock(&f->lf.thread.pftd->lock);
3265     }
3266     dav1d_thread_picture_signal(&f->sr_cur, retval == 0 ? UINT_MAX : FRAME_ERROR,
3267                                 PLANE_TYPE_ALL);
3268     for (int i = 0; i < 7; i++) {
3269         if (f->refp[i].p.data[0])
3270             dav1d_thread_picture_unref(&f->refp[i]);
3271         dav1d_ref_dec(&f->ref_mvs_ref[i]);
3272     }
3273 
3274     dav1d_picture_unref_internal(&f->cur);
3275     dav1d_thread_picture_unref(&f->sr_cur);
3276     dav1d_cdf_thread_unref(&f->in_cdf);
3277     if (f->frame_hdr->refresh_context) {
3278         dav1d_cdf_thread_signal(&f->out_cdf);
3279         dav1d_cdf_thread_unref(&f->out_cdf);
3280     }
3281     dav1d_ref_dec(&f->cur_segmap_ref);
3282     dav1d_ref_dec(&f->prev_segmap_ref);
3283     dav1d_ref_dec(&f->mvs_ref);
3284     dav1d_ref_dec(&f->seq_hdr_ref);
3285     dav1d_ref_dec(&f->frame_hdr_ref);
3286 
3287     for (int i = 0; i < f->n_tile_data; i++)
3288         dav1d_data_unref_internal(&f->tile[i].data);
3289 
3290     return retval;
3291 }
3292 
get_upscale_x0(const int in_w,const int out_w,const int step)3293 static int get_upscale_x0(const int in_w, const int out_w, const int step) {
3294     const int err = out_w * step - (in_w << 14);
3295     const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
3296     return x0 & 0x3fff;
3297 }
3298 
dav1d_submit_frame(Dav1dContext * const c)3299 int dav1d_submit_frame(Dav1dContext *const c) {
3300     Dav1dFrameContext *f;
3301     int res = -1;
3302 
3303     // wait for c->out_delayed[next] and move into c->out if visible
3304     Dav1dThreadPicture *out_delayed;
3305     if (c->n_fc > 1) {
3306         const unsigned next = c->frame_thread.next++;
3307         if (c->frame_thread.next == c->n_fc)
3308             c->frame_thread.next = 0;
3309 
3310         f = &c->fc[next];
3311         pthread_mutex_lock(&f->frame_thread.td.lock);
3312         while (f->n_tile_data > 0)
3313             pthread_cond_wait(&f->frame_thread.td.cond,
3314                               &f->frame_thread.td.lock);
3315         out_delayed = &c->frame_thread.out_delayed[next];
3316         if (out_delayed->p.data[0]) {
3317             const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
3318                                                            memory_order_relaxed);
3319             if (out_delayed->visible && progress != FRAME_ERROR) {
3320                 dav1d_picture_ref(&c->out, &out_delayed->p);
3321                 c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
3322             }
3323             dav1d_thread_picture_unref(out_delayed);
3324         }
3325     } else {
3326         f = c->fc;
3327     }
3328 
3329     f->seq_hdr = c->seq_hdr;
3330     f->seq_hdr_ref = c->seq_hdr_ref;
3331     dav1d_ref_inc(f->seq_hdr_ref);
3332     f->frame_hdr = c->frame_hdr;
3333     f->frame_hdr_ref = c->frame_hdr_ref;
3334     c->frame_hdr = NULL;
3335     c->frame_hdr_ref = NULL;
3336     f->dsp = &c->dsp[f->seq_hdr->hbd];
3337 
3338     const int bpc = 8 + 2 * f->seq_hdr->hbd;
3339 
3340     if (!f->dsp->ipred.intra_pred[DC_PRED]) {
3341         Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd];
3342 
3343         switch (bpc) {
3344 #define assign_bitdepth_case(bd) \
3345             dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
3346             dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
3347             dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
3348             dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
3349             dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
3350             dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
3351             dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
3352             break
3353 #if CONFIG_8BPC
3354         case 8:
3355             assign_bitdepth_case(8);
3356 #endif
3357 #if CONFIG_16BPC
3358         case 10:
3359         case 12:
3360             assign_bitdepth_case(16);
3361 #endif
3362 #undef assign_bitdepth_case
3363         default:
3364             dav1d_log(c, "Compiled without support for %d-bit decoding\n",
3365                     8 + 2 * f->seq_hdr->hbd);
3366             res = DAV1D_ERR(ENOPROTOOPT);
3367             goto error;
3368         }
3369     }
3370 
3371 #define assign_bitdepth_case(bd) \
3372         f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
3373         f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
3374         f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
3375         f->bd_fn.filter_sbrow_deblock = dav1d_filter_sbrow_deblock_##bd##bpc; \
3376         f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
3377         f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
3378         f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
3379         f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
3380         f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc
3381     if (!f->seq_hdr->hbd) {
3382 #if CONFIG_8BPC
3383         assign_bitdepth_case(8);
3384 #endif
3385     } else {
3386 #if CONFIG_16BPC
3387         assign_bitdepth_case(16);
3388 #endif
3389     }
3390 #undef assign_bitdepth_case
3391 
3392     int ref_coded_width[7];
3393     if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
3394         if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) {
3395             const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
3396             if (!c->refs[pri_ref].p.p.data[0]) {
3397                 res = DAV1D_ERR(EINVAL);
3398                 goto error;
3399             }
3400         }
3401         for (int i = 0; i < 7; i++) {
3402             const int refidx = f->frame_hdr->refidx[i];
3403             if (!c->refs[refidx].p.p.data[0] ||
3404                 f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w ||
3405                 f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h ||
3406                 f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 ||
3407                 f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 ||
3408                 f->seq_hdr->layout != c->refs[refidx].p.p.p.layout ||
3409                 bpc != c->refs[refidx].p.p.p.bpc)
3410             {
3411                 for (int j = 0; j < i; j++)
3412                     dav1d_thread_picture_unref(&f->refp[j]);
3413                 res = DAV1D_ERR(EINVAL);
3414                 goto error;
3415             }
3416             dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
3417             ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0];
3418             if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w ||
3419                 f->frame_hdr->height != c->refs[refidx].p.p.p.h)
3420             {
3421 #define scale_fac(ref_sz, this_sz) \
3422     ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
3423                 f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
3424                                                f->frame_hdr->width[0]);
3425                 f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
3426                                                f->frame_hdr->height);
3427                 f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
3428                 f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
3429             } else {
3430                 f->svc[i][0].scale = 0;
3431             }
3432             f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION &&
3433                                      !f->frame_hdr->force_integer_mv &&
3434                                      !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) &&
3435                                      !f->svc[i][0].scale;
3436         }
3437     }
3438 
3439     // setup entropy
3440     if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
3441         dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
3442     } else {
3443         const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
3444         dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
3445     }
3446     if (f->frame_hdr->refresh_context) {
3447         res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1 ? &f->frame_thread.td : NULL);
3448         if (res < 0) goto error;
3449     }
3450 
3451     // FIXME qsort so tiles are in order (for frame threading)
3452     if (f->n_tile_data_alloc < c->n_tile_data) {
3453         freep(&f->tile);
3454         assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile));
3455         f->tile = malloc(c->n_tile_data * sizeof(*f->tile));
3456         if (!f->tile) {
3457             f->n_tile_data_alloc = f->n_tile_data = 0;
3458             res = DAV1D_ERR(ENOMEM);
3459             goto error;
3460         }
3461         f->n_tile_data_alloc = c->n_tile_data;
3462     }
3463     memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
3464     memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile));
3465     f->n_tile_data = c->n_tile_data;
3466     c->n_tile_data = 0;
3467 
3468     // allocate frame
3469     res = dav1d_thread_picture_alloc(c, f, bpc);
3470     if (res < 0) goto error;
3471 
3472     if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
3473         res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p);
3474         if (res < 0) goto error;
3475     } else {
3476         dav1d_picture_ref(&f->cur, &f->sr_cur.p);
3477     }
3478 
3479     if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
3480         f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
3481         const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
3482         const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
3483         const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
3484         f->resize_step[1] = scale_fac(in_cw, out_cw);
3485 #undef scale_fac
3486         f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
3487         f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
3488     }
3489 
3490     // move f->cur into output queue
3491     if (c->n_fc == 1) {
3492         if (f->frame_hdr->show_frame) {
3493             dav1d_picture_ref(&c->out, &f->sr_cur.p);
3494             c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur);
3495         }
3496     } else {
3497         dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
3498     }
3499 
3500     f->w4 = (f->frame_hdr->width[0] + 3) >> 2;
3501     f->h4 = (f->frame_hdr->height + 3) >> 2;
3502     f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1;
3503     f->bh = ((f->frame_hdr->height + 7) >> 3) << 1;
3504     f->sb128w = (f->bw + 31) >> 5;
3505     f->sb128h = (f->bh + 31) >> 5;
3506     f->sb_shift = 4 + f->seq_hdr->sb128;
3507     f->sb_step = 16 << f->seq_hdr->sb128;
3508     f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
3509     f->b4_stride = (f->bw + 31) & ~31;
3510     f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
3511 
3512     // ref_mvs
3513     if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
3514         f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool,
3515             sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
3516         if (!f->mvs_ref) {
3517             res = DAV1D_ERR(ENOMEM);
3518             goto error;
3519         }
3520         f->mvs = f->mvs_ref->data;
3521         if (!f->frame_hdr->allow_intrabc) {
3522             for (int i = 0; i < 7; i++)
3523                 f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset;
3524         } else {
3525             memset(f->refpoc, 0, sizeof(f->refpoc));
3526         }
3527         if (f->frame_hdr->use_ref_frame_mvs) {
3528             for (int i = 0; i < 7; i++) {
3529                 const int refidx = f->frame_hdr->refidx[i];
3530                 if (c->refs[refidx].refmvs != NULL &&
3531                     ref_coded_width[i] == f->cur.p.w &&
3532                     f->refp[i].p.p.h == f->cur.p.h)
3533                 {
3534                     f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
3535                     dav1d_ref_inc(f->ref_mvs_ref[i]);
3536                     f->ref_mvs[i] = c->refs[refidx].refmvs->data;
3537                 } else {
3538                     f->ref_mvs[i] = NULL;
3539                     f->ref_mvs_ref[i] = NULL;
3540                 }
3541                 memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
3542                        sizeof(*f->refrefpoc));
3543             }
3544         } else {
3545             memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
3546         }
3547     } else {
3548         f->mvs_ref = NULL;
3549         memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
3550     }
3551 
3552     // segmap
3553     if (f->frame_hdr->segmentation.enabled) {
3554         // By default, the previous segmentation map is not initialised.
3555         f->prev_segmap_ref = NULL;
3556         f->prev_segmap = NULL;
3557 
3558         // We might need a previous frame's segmentation map. This
3559         // happens if there is either no update or a temporal update.
3560         if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) {
3561             const int pri_ref = f->frame_hdr->primary_ref_frame;
3562             assert(pri_ref != DAV1D_PRIMARY_REF_NONE);
3563             const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1;
3564             const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
3565             if (ref_w == f->bw && ref_h == f->bh) {
3566                 f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap;
3567                 if (f->prev_segmap_ref) {
3568                     dav1d_ref_inc(f->prev_segmap_ref);
3569                     f->prev_segmap = f->prev_segmap_ref->data;
3570                 }
3571             }
3572         }
3573 
3574         if (f->frame_hdr->segmentation.update_map) {
3575             // We're updating an existing map, but need somewhere to
3576             // put the new values. Allocate them here (the data
3577             // actually gets set elsewhere)
3578             f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool,
3579                 sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
3580             if (!f->cur_segmap_ref) {
3581                 dav1d_ref_dec(&f->prev_segmap_ref);
3582                 res = DAV1D_ERR(ENOMEM);
3583                 goto error;
3584             }
3585             f->cur_segmap = f->cur_segmap_ref->data;
3586         } else if (f->prev_segmap_ref) {
3587             // We're not updating an existing map, and we have a valid
3588             // reference. Use that.
3589             f->cur_segmap_ref = f->prev_segmap_ref;
3590             dav1d_ref_inc(f->cur_segmap_ref);
3591             f->cur_segmap = f->prev_segmap_ref->data;
3592         } else {
3593             // We need to make a new map. Allocate one here and zero it out.
3594             const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
3595             f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size);
3596             if (!f->cur_segmap_ref) {
3597                 res = DAV1D_ERR(ENOMEM);
3598                 goto error;
3599             }
3600             f->cur_segmap = f->cur_segmap_ref->data;
3601             memset(f->cur_segmap, 0, segmap_size);
3602         }
3603     } else {
3604         f->cur_segmap = NULL;
3605         f->cur_segmap_ref = NULL;
3606         f->prev_segmap_ref = NULL;
3607     }
3608 
3609     // update references etc.
3610     const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags;
3611     for (int i = 0; i < 8; i++) {
3612         if (refresh_frame_flags & (1 << i)) {
3613             if (c->refs[i].p.p.data[0])
3614                 dav1d_thread_picture_unref(&c->refs[i].p);
3615             dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
3616 
3617             dav1d_cdf_thread_unref(&c->cdf[i]);
3618             if (f->frame_hdr->refresh_context) {
3619                 dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
3620             } else {
3621                 dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf);
3622             }
3623 
3624             dav1d_ref_dec(&c->refs[i].segmap);
3625             c->refs[i].segmap = f->cur_segmap_ref;
3626             if (f->cur_segmap_ref)
3627                 dav1d_ref_inc(f->cur_segmap_ref);
3628             dav1d_ref_dec(&c->refs[i].refmvs);
3629             if (!f->frame_hdr->allow_intrabc) {
3630                 c->refs[i].refmvs = f->mvs_ref;
3631                 if (f->mvs_ref)
3632                     dav1d_ref_inc(f->mvs_ref);
3633             }
3634             memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
3635         }
3636     }
3637 
3638     if (c->n_fc == 1) {
3639         if ((res = dav1d_decode_frame(f)) < 0) {
3640             dav1d_picture_unref_internal(&c->out);
3641             for (int i = 0; i < 8; i++) {
3642                 if (refresh_frame_flags & (1 << i)) {
3643                     if (c->refs[i].p.p.data[0])
3644                         dav1d_thread_picture_unref(&c->refs[i].p);
3645                     dav1d_cdf_thread_unref(&c->cdf[i]);
3646                     dav1d_ref_dec(&c->refs[i].segmap);
3647                     dav1d_ref_dec(&c->refs[i].refmvs);
3648                 }
3649             }
3650             return res;
3651         }
3652     } else {
3653         pthread_cond_signal(&f->frame_thread.td.cond);
3654         pthread_mutex_unlock(&f->frame_thread.td.lock);
3655     }
3656 
3657     return 0;
3658 error:
3659     dav1d_cdf_thread_unref(&f->in_cdf);
3660     if (f->frame_hdr->refresh_context)
3661         dav1d_cdf_thread_unref(&f->out_cdf);
3662     for (int i = 0; i < 7; i++) {
3663         if (f->refp[i].p.data[0])
3664             dav1d_thread_picture_unref(&f->refp[i]);
3665         dav1d_ref_dec(&f->ref_mvs_ref[i]);
3666     }
3667     if (c->n_fc == 1)
3668         dav1d_picture_unref_internal(&c->out);
3669     else
3670         dav1d_thread_picture_unref(out_delayed);
3671     dav1d_picture_unref_internal(&f->cur);
3672     dav1d_thread_picture_unref(&f->sr_cur);
3673     dav1d_ref_dec(&f->mvs_ref);
3674     dav1d_ref_dec(&f->seq_hdr_ref);
3675     dav1d_ref_dec(&f->frame_hdr_ref);
3676 
3677     for (int i = 0; i < f->n_tile_data; i++)
3678         dav1d_data_unref_internal(&f->tile[i].data);
3679     f->n_tile_data = 0;
3680 
3681     if (c->n_fc > 1) {
3682         pthread_cond_signal(&f->frame_thread.td.cond);
3683         pthread_mutex_unlock(&f->frame_thread.td.lock);
3684     }
3685 
3686     return res;
3687 }
3688