1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "vpx_mem/vpx_mem.h"
12 #include "vpx_ports/mem.h"
13 
14 #include "vp9/common/vp9_blockd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_entropy.h"
17 #if CONFIG_COEFFICIENT_RANGE_CHECKING
18 #include "vp9/common/vp9_idct.h"
19 #endif
20 
21 #include "vp9/decoder/vp9_detokenize.h"
22 
23 #define EOB_CONTEXT_NODE 0
24 #define ZERO_CONTEXT_NODE 1
25 #define ONE_CONTEXT_NODE 2
26 
27 #define INCREMENT_COUNT(token)                   \
28   do {                                           \
29     if (counts) ++coef_counts[band][ctx][token]; \
30   } while (0)
31 
read_bool(vpx_reader * r,int prob,BD_VALUE * value,int * count,unsigned int * range)32 static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value,
33                             int *count, unsigned int *range) {
34   const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT;
35   const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
36 #if CONFIG_BITSTREAM_DEBUG
37   const int queue_r = bitstream_queue_get_read();
38   const int frame_idx = bitstream_queue_get_frame_read();
39   int ref_result, ref_prob;
40   bitstream_queue_pop(&ref_result, &ref_prob);
41   if (prob != ref_prob) {
42     fprintf(stderr,
43             "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d "
44             "queue_r %d\n",
45             frame_idx, prob, ref_prob, queue_r);
46 
47     assert(0);
48   }
49 #endif
50 
51   if (*count < 0) {
52     r->value = *value;
53     r->count = *count;
54     vpx_reader_fill(r);
55     *value = r->value;
56     *count = r->count;
57   }
58 
59   if (*value >= bigsplit) {
60     *range = *range - split;
61     *value = *value - bigsplit;
62     {
63       const int shift = vpx_norm[*range];
64       *range <<= shift;
65       *value <<= shift;
66       *count -= shift;
67     }
68 #if CONFIG_BITSTREAM_DEBUG
69     {
70       const int bit = 1;
71       if (bit != ref_result) {
72         fprintf(
73             stderr,
74             "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d "
75             "queue_r %d\n",
76             frame_idx, bit, ref_result, queue_r);
77 
78         assert(0);
79       }
80     }
81 #endif
82     return 1;
83   }
84   *range = split;
85   {
86     const int shift = vpx_norm[*range];
87     *range <<= shift;
88     *value <<= shift;
89     *count -= shift;
90   }
91 #if CONFIG_BITSTREAM_DEBUG
92   {
93     const int bit = 0;
94     if (bit != ref_result) {
95       fprintf(stderr,
96               "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d "
97               "queue_r %d\n",
98               frame_idx, bit, ref_result, queue_r);
99 
100       assert(0);
101     }
102   }
103 #endif
104   return 0;
105 }
106 
read_coeff(vpx_reader * r,const vpx_prob * probs,int n,BD_VALUE * value,int * count,unsigned int * range)107 static INLINE int read_coeff(vpx_reader *r, const vpx_prob *probs, int n,
108                              BD_VALUE *value, int *count, unsigned int *range) {
109   int i, val = 0;
110   for (i = 0; i < n; ++i)
111     val = (val << 1) | read_bool(r, probs[i], value, count, range);
112   return val;
113 }
114 
decode_coefs(const MACROBLOCKD * xd,PLANE_TYPE type,tran_low_t * dqcoeff,TX_SIZE tx_size,const int16_t * dq,int ctx,const int16_t * scan,const int16_t * nb,vpx_reader * r)115 static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
116                         tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
117                         int ctx, const int16_t *scan, const int16_t *nb,
118                         vpx_reader *r) {
119   FRAME_COUNTS *counts = xd->counts;
120   const int max_eob = 16 << (tx_size << 1);
121   const FRAME_CONTEXT *const fc = xd->fc;
122   const int ref = is_inter_block(xd->mi[0]);
123   int band, c = 0;
124   const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
125       fc->coef_probs[tx_size][type][ref];
126   const vpx_prob *prob;
127   unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
128   unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
129   uint8_t token_cache[32 * 32];
130   const uint8_t *band_translate = get_band_translate(tx_size);
131   const int dq_shift = (tx_size == TX_32X32);
132   int v;
133   int16_t dqv = dq[0];
134   const uint8_t *const cat6_prob =
135 #if CONFIG_VP9_HIGHBITDEPTH
136       (xd->bd == VPX_BITS_12)
137           ? vp9_cat6_prob_high12
138           : (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 :
139 #endif  // CONFIG_VP9_HIGHBITDEPTH
140                                     vp9_cat6_prob;
141   const int cat6_bits =
142 #if CONFIG_VP9_HIGHBITDEPTH
143       (xd->bd == VPX_BITS_12) ? 18
144                               : (xd->bd == VPX_BITS_10) ? 16 :
145 #endif  // CONFIG_VP9_HIGHBITDEPTH
146                                                         14;
147   // Keep value, range, and count as locals.  The compiler produces better
148   // results with the locals than using r directly.
149   BD_VALUE value = r->value;
150   unsigned int range = r->range;
151   int count = r->count;
152 
153   if (counts) {
154     coef_counts = counts->coef[tx_size][type][ref];
155     eob_branch_count = counts->eob_branch[tx_size][type][ref];
156   }
157 
158   while (c < max_eob) {
159     int val = -1;
160     band = *band_translate++;
161     prob = coef_probs[band][ctx];
162     if (counts) ++eob_branch_count[band][ctx];
163     if (!read_bool(r, prob[EOB_CONTEXT_NODE], &value, &count, &range)) {
164       INCREMENT_COUNT(EOB_MODEL_TOKEN);
165       break;
166     }
167 
168     while (!read_bool(r, prob[ZERO_CONTEXT_NODE], &value, &count, &range)) {
169       INCREMENT_COUNT(ZERO_TOKEN);
170       dqv = dq[1];
171       token_cache[scan[c]] = 0;
172       ++c;
173       if (c >= max_eob) {
174         r->value = value;
175         r->range = range;
176         r->count = count;
177         return c;  // zero tokens at the end (no eob token)
178       }
179       ctx = get_coef_context(nb, token_cache, c);
180       band = *band_translate++;
181       prob = coef_probs[band][ctx];
182     }
183 
184     if (read_bool(r, prob[ONE_CONTEXT_NODE], &value, &count, &range)) {
185       const vpx_prob *p = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
186       INCREMENT_COUNT(TWO_TOKEN);
187       if (read_bool(r, p[0], &value, &count, &range)) {
188         if (read_bool(r, p[3], &value, &count, &range)) {
189           token_cache[scan[c]] = 5;
190           if (read_bool(r, p[5], &value, &count, &range)) {
191             if (read_bool(r, p[7], &value, &count, &range)) {
192               val = CAT6_MIN_VAL +
193                     read_coeff(r, cat6_prob, cat6_bits, &value, &count, &range);
194             } else {
195               val = CAT5_MIN_VAL +
196                     read_coeff(r, vp9_cat5_prob, 5, &value, &count, &range);
197             }
198           } else if (read_bool(r, p[6], &value, &count, &range)) {
199             val = CAT4_MIN_VAL +
200                   read_coeff(r, vp9_cat4_prob, 4, &value, &count, &range);
201           } else {
202             val = CAT3_MIN_VAL +
203                   read_coeff(r, vp9_cat3_prob, 3, &value, &count, &range);
204           }
205         } else {
206           token_cache[scan[c]] = 4;
207           if (read_bool(r, p[4], &value, &count, &range)) {
208             val = CAT2_MIN_VAL +
209                   read_coeff(r, vp9_cat2_prob, 2, &value, &count, &range);
210           } else {
211             val = CAT1_MIN_VAL +
212                   read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range);
213           }
214         }
215 #if CONFIG_VP9_HIGHBITDEPTH
216         // val may use 18-bits
217         v = (int)(((int64_t)val * dqv) >> dq_shift);
218 #else
219         v = (val * dqv) >> dq_shift;
220 #endif
221       } else {
222         if (read_bool(r, p[1], &value, &count, &range)) {
223           token_cache[scan[c]] = 3;
224           v = ((3 + read_bool(r, p[2], &value, &count, &range)) * dqv) >>
225               dq_shift;
226         } else {
227           token_cache[scan[c]] = 2;
228           v = (2 * dqv) >> dq_shift;
229         }
230       }
231     } else {
232       INCREMENT_COUNT(ONE_TOKEN);
233       token_cache[scan[c]] = 1;
234       v = dqv >> dq_shift;
235     }
236 #if CONFIG_COEFFICIENT_RANGE_CHECKING
237 #if CONFIG_VP9_HIGHBITDEPTH
238     dqcoeff[scan[c]] = highbd_check_range(
239         read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd);
240 #else
241     dqcoeff[scan[c]] =
242         check_range(read_bool(r, 128, &value, &count, &range) ? -v : v);
243 #endif  // CONFIG_VP9_HIGHBITDEPTH
244 #else
245     if (read_bool(r, 128, &value, &count, &range)) {
246       dqcoeff[scan[c]] = (tran_low_t)-v;
247     } else {
248       dqcoeff[scan[c]] = (tran_low_t)v;
249     }
250 #endif  // CONFIG_COEFFICIENT_RANGE_CHECKING
251     ++c;
252     ctx = get_coef_context(nb, token_cache, c);
253     dqv = dq[1];
254   }
255 
256   r->value = value;
257   r->range = range;
258   r->count = count;
259   return c;
260 }
261 
get_ctx_shift(MACROBLOCKD * xd,int * ctx_shift_a,int * ctx_shift_l,int x,int y,unsigned int tx_size_in_blocks)262 static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l,
263                           int x, int y, unsigned int tx_size_in_blocks) {
264   if (xd->max_blocks_wide) {
265     if (tx_size_in_blocks + x > xd->max_blocks_wide)
266       *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8;
267   }
268   if (xd->max_blocks_high) {
269     if (tx_size_in_blocks + y > xd->max_blocks_high)
270       *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8;
271   }
272 }
273 
vp9_decode_block_tokens(TileWorkerData * twd,int plane,const scan_order * sc,int x,int y,TX_SIZE tx_size,int seg_id)274 int vp9_decode_block_tokens(TileWorkerData *twd, int plane,
275                             const scan_order *sc, int x, int y, TX_SIZE tx_size,
276                             int seg_id) {
277   vpx_reader *r = &twd->bit_reader;
278   MACROBLOCKD *xd = &twd->xd;
279   struct macroblockd_plane *const pd = &xd->plane[plane];
280   const int16_t *const dequant = pd->seg_dequant[seg_id];
281   int eob;
282   ENTROPY_CONTEXT *a = pd->above_context + x;
283   ENTROPY_CONTEXT *l = pd->left_context + y;
284   int ctx;
285   int ctx_shift_a = 0;
286   int ctx_shift_l = 0;
287 
288   switch (tx_size) {
289     case TX_4X4:
290       ctx = a[0] != 0;
291       ctx += l[0] != 0;
292       eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
293                          dequant, ctx, sc->scan, sc->neighbors, r);
294       a[0] = l[0] = (eob > 0);
295       break;
296     case TX_8X8:
297       get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8);
298       ctx = !!*(const uint16_t *)a;
299       ctx += !!*(const uint16_t *)l;
300       eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
301                          dequant, ctx, sc->scan, sc->neighbors, r);
302       *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a;
303       *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l;
304       break;
305     case TX_16X16:
306       get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16);
307       ctx = !!*(const uint32_t *)a;
308       ctx += !!*(const uint32_t *)l;
309       eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
310                          dequant, ctx, sc->scan, sc->neighbors, r);
311       *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a;
312       *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l;
313       break;
314     case TX_32X32:
315       get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32);
316       // NOTE: casting to uint64_t here is safe because the default memory
317       // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte
318       // boundaries.
319       ctx = !!*(const uint64_t *)a;
320       ctx += !!*(const uint64_t *)l;
321       eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
322                          dequant, ctx, sc->scan, sc->neighbors, r);
323       *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a;
324       *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l;
325       break;
326     default:
327       assert(0 && "Invalid transform size.");
328       eob = 0;
329       break;
330   }
331 
332   return eob;
333 }
334