1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27 
28 #define CABAC(h) 0
29 #define UNCHECKED_BITSTREAM_READER 1
30 
31 #include "internal.h"
32 #include "avcodec.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37 #include "mpegutils.h"
38 #include "libavutil/avassert.h"
39 
40 
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44 
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48 
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56 
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64 
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66   1,  0,  0,  0,
67   7,  2,  0,  0,
68   7,  7,  3,  0,
69   9,  7,  7,  5,
70   9,  9,  7,  6,
71  10, 10,  9,  7,
72  11, 11, 10,  7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76 
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78   1,   0,  0, 0,
79  15,   1,  0, 0,
80  14,  13,  1, 0,
81   7,  12, 11, 1,
82   6,   5, 10, 1,
83   7,   6,  4, 9,
84   7,   6,  5, 8,
85   7,   6,  5, 4,
86   7,   5,  4, 4,
87 };
88 
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91      1, 0, 0, 0,
92      6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
93     11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
94     14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
95     16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
96 },
97 {
98      2, 0, 0, 0,
99      6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
100      8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
101     12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
102     13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
103 },
104 {
105      4, 0, 0, 0,
106      6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
107      7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
108      8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
109     10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
110 },
111 {
112      6, 0, 0, 0,
113      6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
114      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
116      6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
117 }
118 };
119 
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122      1, 0, 0, 0,
123      5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
124      7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
125     15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
126     15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
127 },
128 {
129      3, 0, 0, 0,
130     11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
131      4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
132     15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
133     11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
134 },
135 {
136     15, 0, 0, 0,
137     15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
138     11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
139     11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
140     13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
141 },
142 {
143      3, 0, 0, 0,
144      0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
145     16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
146     32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
147     48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
148 }
149 };
150 
151 static const uint8_t total_zeros_len[16][16]= {
152     {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153     {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154     {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155     {5,3,4,4,3,3,3,4,3,4,5,5,5},
156     {4,4,4,3,3,3,3,3,4,5,4,5},
157     {6,5,3,3,3,3,3,3,4,3,6},
158     {6,5,3,3,3,2,3,4,3,6},
159     {6,4,5,3,2,2,3,3,6},
160     {6,6,4,2,2,3,2,5},
161     {5,5,3,2,2,2,4},
162     {4,4,3,3,1,3},
163     {4,4,2,1,3},
164     {3,3,1,2},
165     {2,2,1},
166     {1,1},
167 };
168 
169 static const uint8_t total_zeros_bits[16][16]= {
170     {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171     {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172     {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173     {3,7,5,4,6,5,4,3,3,2,2,1,0},
174     {5,4,3,7,6,5,4,3,2,1,1,0},
175     {1,1,7,6,5,4,3,2,1,1,0},
176     {1,1,5,4,3,3,2,1,1,0},
177     {1,1,1,3,3,2,2,1,0},
178     {1,0,1,3,2,1,1,1},
179     {1,0,1,3,2,1,1},
180     {0,1,1,2,1,3},
181     {0,1,1,1,1},
182     {0,1,1,1},
183     {0,1,1},
184     {0,1},
185 };
186 
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188     { 1, 2, 3, 3,},
189     { 1, 2, 2, 0,},
190     { 1, 1, 0, 0,},
191 };
192 
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194     { 1, 1, 1, 0,},
195     { 1, 1, 0, 0,},
196     { 1, 0, 0, 0,},
197 };
198 
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200     { 1, 3, 3, 4, 4, 4, 5, 5 },
201     { 3, 2, 3, 3, 3, 3, 3 },
202     { 3, 3, 2, 2, 3, 3 },
203     { 3, 2, 2, 2, 3 },
204     { 2, 2, 2, 2 },
205     { 2, 2, 1 },
206     { 1, 1 },
207 };
208 
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210     { 1, 2, 3, 2, 3, 1, 1, 0 },
211     { 0, 1, 1, 4, 5, 6, 7 },
212     { 0, 1, 1, 2, 6, 7 },
213     { 6, 0, 1, 2, 7 },
214     { 0, 1, 2, 3 },
215     { 0, 1, 1 },
216     { 0, 1 },
217 };
218 
219 static const uint8_t run_len[7][16]={
220     {1,1},
221     {1,2,2},
222     {2,2,2,2},
223     {2,2,2,3,3},
224     {2,2,3,3,3,3},
225     {2,3,3,3,3,3,3},
226     {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228 
229 static const uint8_t run_bits[7][16]={
230     {1,0},
231     {1,1,0},
232     {3,2,1,0},
233     {3,2,1,1,0},
234     {3,2,3,2,1,0},
235     {3,0,1,3,2,5,4},
236     {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238 
239 static VLC coeff_token_vlc[4];
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 
243 static VLC chroma_dc_coeff_token_vlc;
244 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
245 static const int chroma_dc_coeff_token_vlc_table_size = 256;
246 
247 static VLC chroma422_dc_coeff_token_vlc;
248 static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
249 static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
250 
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254 
255 static VLC chroma_dc_total_zeros_vlc[3];
256 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
257 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
258 
259 static VLC chroma422_dc_total_zeros_vlc[7];
260 static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
261 static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
262 
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266 
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270 
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS           8
277 #define TOTAL_ZEROS_VLC_BITS           9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS                   3
281 #define RUN7_VLC_BITS                  6
282 
283 /**
284  * Get the predicted number of non-zero coefficients.
285  * @param n block index
286  */
pred_non_zero_count(H264Context * h,int n)287 static inline int pred_non_zero_count(H264Context *h, int n){
288     const int index8= scan8[n];
289     const int left= h->non_zero_count_cache[index8 - 1];
290     const int top = h->non_zero_count_cache[index8 - 8];
291     int i= left + top;
292 
293     if(i<64) i= (i+1)>>1;
294 
295     tprintf(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296 
297     return i&31;
298 }
299 
init_cavlc_level_tab(void)300 static av_cold void init_cavlc_level_tab(void){
301     int suffix_length;
302     unsigned int i;
303 
304     for(suffix_length=0; suffix_length<7; suffix_length++){
305         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 
308             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309                 int level_code = (prefix << suffix_length) +
310                     (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311                 int mask = -(level_code&1);
312                 level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313                 cavlc_level_tab[suffix_length][i][0]= level_code;
314                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315             }else if(prefix + 1 <= LEVEL_TAB_BITS){
316                 cavlc_level_tab[suffix_length][i][0]= prefix+100;
317                 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318             }else{
319                 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320                 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321             }
322         }
323     }
324 }
325 
ff_h264_decode_init_vlc(void)326 av_cold void ff_h264_decode_init_vlc(void){
327     static int done = 0;
328 
329     if (!done) {
330         int i;
331         int offset;
332         done = 1;
333 
334         chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335         chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337                  &chroma_dc_coeff_token_len [0], 1, 1,
338                  &chroma_dc_coeff_token_bits[0], 1, 1,
339                  INIT_VLC_USE_NEW_STATIC);
340 
341         chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342         chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343         init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344                  &chroma422_dc_coeff_token_len [0], 1, 1,
345                  &chroma422_dc_coeff_token_bits[0], 1, 1,
346                  INIT_VLC_USE_NEW_STATIC);
347 
348         offset = 0;
349         for(i=0; i<4; i++){
350             coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351             coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353                      &coeff_token_len [i][0], 1, 1,
354                      &coeff_token_bits[i][0], 1, 1,
355                      INIT_VLC_USE_NEW_STATIC);
356             offset += coeff_token_vlc_tables_size[i];
357         }
358         /*
359          * This is a one time safety check to make sure that
360          * the packed static coeff_token_vlc table sizes
361          * were initialized correctly.
362          */
363         av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364 
365         for(i=0; i<3; i++){
366             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367             chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368             init_vlc(&chroma_dc_total_zeros_vlc[i],
369                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
370                      &chroma_dc_total_zeros_len [i][0], 1, 1,
371                      &chroma_dc_total_zeros_bits[i][0], 1, 1,
372                      INIT_VLC_USE_NEW_STATIC);
373         }
374 
375         for(i=0; i<7; i++){
376             chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377             chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378             init_vlc(&chroma422_dc_total_zeros_vlc[i],
379                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
380                      &chroma422_dc_total_zeros_len [i][0], 1, 1,
381                      &chroma422_dc_total_zeros_bits[i][0], 1, 1,
382                      INIT_VLC_USE_NEW_STATIC);
383         }
384 
385         for(i=0; i<15; i++){
386             total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387             total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388             init_vlc(&total_zeros_vlc[i],
389                      TOTAL_ZEROS_VLC_BITS, 16,
390                      &total_zeros_len [i][0], 1, 1,
391                      &total_zeros_bits[i][0], 1, 1,
392                      INIT_VLC_USE_NEW_STATIC);
393         }
394 
395         for(i=0; i<6; i++){
396             run_vlc[i].table = run_vlc_tables[i];
397             run_vlc[i].table_allocated = run_vlc_tables_size;
398             init_vlc(&run_vlc[i],
399                      RUN_VLC_BITS, 7,
400                      &run_len [i][0], 1, 1,
401                      &run_bits[i][0], 1, 1,
402                      INIT_VLC_USE_NEW_STATIC);
403         }
404         run7_vlc.table = run7_vlc_table,
405         run7_vlc.table_allocated = run7_vlc_table_size;
406         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407                  &run_len [6][0], 1, 1,
408                  &run_bits[6][0], 1, 1,
409                  INIT_VLC_USE_NEW_STATIC);
410 
411         init_cavlc_level_tab();
412     }
413 }
414 
415 /**
416  *
417  */
get_level_prefix(GetBitContext * gb)418 static inline int get_level_prefix(GetBitContext *gb){
419     unsigned int buf;
420     int log;
421 
422     OPEN_READER(re, gb);
423     UPDATE_CACHE(re, gb);
424     buf=GET_CACHE(re, gb);
425 
426     log= 32 - av_log2(buf);
427 #ifdef TRACE
428     print_bin(buf>>(32-log), log);
429     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431 
432     LAST_SKIP_BITS(re, gb, log);
433     CLOSE_READER(re, gb);
434 
435     return log-1;
436 }
437 
438 /**
439  * Decode a residual block.
440  * @param n block index
441  * @param scantable scantable
442  * @param max_coeff number of coefficients in the block
443  * @return <0 if an error occurred
444  */
decode_residual(H264Context * h,GetBitContext * gb,int16_t * block,int n,const uint8_t * scantable,const uint32_t * qmul,int max_coeff)445 static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
447     int level[16];
448     int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
449 
450     //FIXME put trailing_onex into the context
451 
452     if(max_coeff <= 8){
453         if (max_coeff == 4)
454             coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
455         else
456             coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
457         total_coeff= coeff_token>>2;
458     }else{
459         if(n >= LUMA_DC_BLOCK_INDEX){
460             total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
461             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
462             total_coeff= coeff_token>>2;
463         }else{
464             total_coeff= pred_non_zero_count(h, n);
465             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
466             total_coeff= coeff_token>>2;
467         }
468     }
469     h->non_zero_count_cache[ scan8[n] ]= total_coeff;
470 
471     //FIXME set last_non_zero?
472 
473     if(total_coeff==0)
474         return 0;
475     if(total_coeff > (unsigned)max_coeff) {
476         av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
477         return -1;
478     }
479 
480     trailing_ones= coeff_token&3;
481     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
482     av_assert2(total_coeff<=16);
483 
484     i = show_bits(gb, 3);
485     skip_bits(gb, trailing_ones);
486     level[0] = 1-((i&4)>>1);
487     level[1] = 1-((i&2)   );
488     level[2] = 1-((i&1)<<1);
489 
490     if(trailing_ones<total_coeff) {
491         int mask, prefix;
492         int suffix_length = total_coeff > 10 & trailing_ones < 3;
493         int bitsi= show_bits(gb, LEVEL_TAB_BITS);
494         int level_code= cavlc_level_tab[suffix_length][bitsi][0];
495 
496         skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
497         if(level_code >= 100){
498             prefix= level_code - 100;
499             if(prefix == LEVEL_TAB_BITS)
500                 prefix += get_level_prefix(gb);
501 
502             //first coefficient has suffix_length equal to 0 or 1
503             if(prefix<14){ //FIXME try to build a large unified VLC table for all this
504                 if(suffix_length)
505                     level_code= (prefix<<1) + get_bits1(gb); //part
506                 else
507                     level_code= prefix; //part
508             }else if(prefix==14){
509                 if(suffix_length)
510                     level_code= (prefix<<1) + get_bits1(gb); //part
511                 else
512                     level_code= prefix + get_bits(gb, 4); //part
513             }else{
514                 level_code= 30;
515                 if(prefix>=16){
516                     if(prefix > 25+3){
517                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
518                         return -1;
519                     }
520                     level_code += (1<<(prefix-3))-4096;
521                 }
522                 level_code += get_bits(gb, prefix-3); //part
523             }
524 
525             if(trailing_ones < 3) level_code += 2;
526 
527             suffix_length = 2;
528             mask= -(level_code&1);
529             level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530         }else{
531             level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532 
533             suffix_length = 1 + (level_code + 3U > 6U);
534             level[trailing_ones]= level_code;
535         }
536 
537         //remaining coefficients have suffix_length > 0
538         for(i=trailing_ones+1;i<total_coeff;i++) {
539             static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540             int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541             level_code= cavlc_level_tab[suffix_length][bitsi][0];
542 
543             skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544             if(level_code >= 100){
545                 prefix= level_code - 100;
546                 if(prefix == LEVEL_TAB_BITS){
547                     prefix += get_level_prefix(gb);
548                 }
549                 if(prefix<15){
550                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551                 }else{
552                     level_code = 15<<suffix_length;
553                     if (prefix>=16) {
554                         if(prefix > 25+3){
555                             av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
556                             return AVERROR_INVALIDDATA;
557                         }
558                         level_code += (1<<(prefix-3))-4096;
559                     }
560                     level_code += get_bits(gb, prefix-3);
561                 }
562                 mask= -(level_code&1);
563                 level_code= (((2+level_code)>>1) ^ mask) - mask;
564             }
565             level[i]= level_code;
566             suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
567         }
568     }
569 
570     if(total_coeff == max_coeff)
571         zeros_left=0;
572     else{
573         if (max_coeff <= 8) {
574             if (max_coeff == 4)
575                 zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
576                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
577             else
578                 zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
579                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
580         } else {
581             zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
582         }
583     }
584 
585 #define STORE_BLOCK(type) \
586     scantable += zeros_left + total_coeff - 1; \
587     if(n >= LUMA_DC_BLOCK_INDEX){ \
588         ((type*)block)[*scantable] = level[0]; \
589         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
590             if(zeros_left < 7) \
591                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
592             else \
593                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
594             zeros_left -= run_before; \
595             scantable -= 1 + run_before; \
596             ((type*)block)[*scantable]= level[i]; \
597         } \
598         for(;i<total_coeff;i++) { \
599             scantable--; \
600             if (i < 16) { \
601                 ((type*)block)[*scantable]= level[i]; \
602             } \
603         } \
604     }else{ \
605         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
606         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
607             if(zeros_left < 7) \
608                 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
609             else \
610                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
611             zeros_left -= run_before; \
612             scantable -= 1 + run_before; \
613             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
614         } \
615         for(;i<total_coeff;i++) { \
616             scantable--; \
617             ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
618         } \
619     }
620 
621     if (h->pixel_shift) {
622         STORE_BLOCK(int32_t)
623     } else {
624         STORE_BLOCK(int16_t)
625     }
626 
627     if(zeros_left<0){
628         av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
629         return -1;
630     }
631 
632     return 0;
633 }
634 
decode_luma_residual(H264Context * h,GetBitContext * gb,const uint8_t * scan,const uint8_t * scan8x8,int pixel_shift,int mb_type,int cbp,int p)635 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
636     int i4x4, i8x8;
637     int qscale = p == 0 ? h->qscale : h->chroma_qp[p-1];
638     if(IS_INTRA16x16(mb_type)){
639         AV_ZERO128(h->mb_luma_dc[p]+0);
640         AV_ZERO128(h->mb_luma_dc[p]+8);
641         AV_ZERO128(h->mb_luma_dc[p]+16);
642         AV_ZERO128(h->mb_luma_dc[p]+24);
643         if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
644             return -1; //FIXME continue if partitioned and other return -1 too
645         }
646 
647         av_assert2((cbp&15) == 0 || (cbp&15) == 15);
648 
649         if(cbp&15){
650             for(i8x8=0; i8x8<4; i8x8++){
651                 for(i4x4=0; i4x4<4; i4x4++){
652                     const int index= i4x4 + 4*i8x8 + p*16;
653                     if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
654                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
655                         return -1;
656                     }
657                 }
658             }
659             return 0xf;
660         }else{
661             fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
662             return 0;
663         }
664     }else{
665         int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
666         /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
667         int new_cbp = 0;
668         for(i8x8=0; i8x8<4; i8x8++){
669             if(cbp & (1<<i8x8)){
670                 if(IS_8x8DCT(mb_type)){
671                     int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
672                     uint8_t *nnz;
673                     for(i4x4=0; i4x4<4; i4x4++){
674                         const int index= i4x4 + 4*i8x8 + p*16;
675                         if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
676                                             h->dequant8_coeff[cqm][qscale], 16) < 0 )
677                             return -1;
678                     }
679                     nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
680                     nnz[0] += nnz[1] + nnz[8] + nnz[9];
681                     new_cbp |= !!nnz[0] << i8x8;
682                 }else{
683                     for(i4x4=0; i4x4<4; i4x4++){
684                         const int index= i4x4 + 4*i8x8 + p*16;
685                         if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
686                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
687                             return -1;
688                         }
689                         new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
690                     }
691                 }
692             }else{
693                 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
694                 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
695             }
696         }
697         return new_cbp;
698     }
699 }
700 
ff_h264_decode_mb_cavlc(H264Context * h)701 int ff_h264_decode_mb_cavlc(H264Context *h){
702     int mb_xy;
703     int partition_count;
704     unsigned int mb_type, cbp;
705     int dct8x8_allowed= h->pps.transform_8x8_mode;
706     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
707     const int pixel_shift = h->pixel_shift;
708     unsigned local_ref_count[2];
709 
710     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
711 
712     tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
713     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
714                 down the code */
715     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
716         if(h->mb_skip_run==-1)
717             h->mb_skip_run= get_ue_golomb_long(&h->gb);
718 
719         if (h->mb_skip_run--) {
720             if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
721                 if(h->mb_skip_run==0)
722                     h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
723             }
724             decode_mb_skip(h);
725             return 0;
726         }
727     }
728     if (FRAME_MBAFF(h)) {
729         if( (h->mb_y&1) == 0 )
730             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
731     }
732 
733     h->prev_mb_skipped= 0;
734 
735     mb_type= get_ue_golomb(&h->gb);
736     if(h->slice_type_nos == AV_PICTURE_TYPE_B){
737         if(mb_type < 23){
738             partition_count= b_mb_type_info[mb_type].partition_count;
739             mb_type=         b_mb_type_info[mb_type].type;
740         }else{
741             mb_type -= 23;
742             goto decode_intra_mb;
743         }
744     }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
745         if(mb_type < 5){
746             partition_count= p_mb_type_info[mb_type].partition_count;
747             mb_type=         p_mb_type_info[mb_type].type;
748         }else{
749             mb_type -= 5;
750             goto decode_intra_mb;
751         }
752     }else{
753        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
754         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
755             mb_type--;
756 decode_intra_mb:
757         if(mb_type > 25){
758             av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), h->mb_x, h->mb_y);
759             return -1;
760         }
761         partition_count=0;
762         cbp= i_mb_type_info[mb_type].cbp;
763         h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
764         mb_type= i_mb_type_info[mb_type].type;
765     }
766 
767     if(MB_FIELD(h))
768         mb_type |= MB_TYPE_INTERLACED;
769 
770     h->slice_table[ mb_xy ]= h->slice_num;
771 
772     if(IS_INTRA_PCM(mb_type)){
773         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
774                             h->sps.bit_depth_luma;
775 
776         // We assume these blocks are very rare so we do not optimize it.
777         h->intra_pcm_ptr = align_get_bits(&h->gb);
778         if (get_bits_left(&h->gb) < mb_size) {
779             av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
780             return AVERROR_INVALIDDATA;
781         }
782         skip_bits_long(&h->gb, mb_size);
783 
784         // In deblocking, the quantizer is 0
785         h->cur_pic.qscale_table[mb_xy] = 0;
786         // All coeffs are present
787         memset(h->non_zero_count[mb_xy], 16, 48);
788 
789         h->cur_pic.mb_type[mb_xy] = mb_type;
790         return 0;
791     }
792 
793     local_ref_count[0] = h->ref_count[0] << MB_MBAFF(h);
794     local_ref_count[1] = h->ref_count[1] << MB_MBAFF(h);
795 
796     fill_decode_neighbors(h, mb_type);
797     fill_decode_caches(h, mb_type);
798 
799     //mb_pred
800     if(IS_INTRA(mb_type)){
801         int pred_mode;
802 //            init_top_left_availability(h);
803         if(IS_INTRA4x4(mb_type)){
804             int i;
805             int di = 1;
806             if(dct8x8_allowed && get_bits1(&h->gb)){
807                 mb_type |= MB_TYPE_8x8DCT;
808                 di = 4;
809             }
810 
811 //                fill_intra4x4_pred_table(h);
812             for(i=0; i<16; i+=di){
813                 int mode= pred_intra_mode(h, i);
814 
815                 if(!get_bits1(&h->gb)){
816                     const int rem_mode= get_bits(&h->gb, 3);
817                     mode = rem_mode + (rem_mode >= mode);
818                 }
819 
820                 if(di==4)
821                     fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
822                 else
823                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
824             }
825             write_back_intra_pred_mode(h);
826             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
827                 return -1;
828         }else{
829             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode, 0);
830             if(h->intra16x16_pred_mode < 0)
831                 return -1;
832         }
833         if(decode_chroma){
834             pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&h->gb), 1);
835             if(pred_mode < 0)
836                 return -1;
837             h->chroma_pred_mode= pred_mode;
838         } else {
839             h->chroma_pred_mode = DC_128_PRED8x8;
840         }
841     }else if(partition_count==4){
842         int i, j, sub_partition_count[4], list, ref[2][4];
843 
844         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
845             for(i=0; i<4; i++){
846                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
847                 if(h->sub_mb_type[i] >=13){
848                     av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
849                     return -1;
850                 }
851                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
852                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
853             }
854             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
855                 ff_h264_pred_direct_motion(h, &mb_type);
856                 h->ref_cache[0][scan8[4]] =
857                 h->ref_cache[1][scan8[4]] =
858                 h->ref_cache[0][scan8[12]] =
859                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
860             }
861         }else{
862             av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
863             for(i=0; i<4; i++){
864                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
865                 if(h->sub_mb_type[i] >=4){
866                     av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
867                     return -1;
868                 }
869                 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
870                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
871             }
872         }
873 
874         for(list=0; list<h->list_count; list++){
875             int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
876             for(i=0; i<4; i++){
877                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
878                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
879                     unsigned int tmp;
880                     if(ref_count == 1){
881                         tmp= 0;
882                     }else if(ref_count == 2){
883                         tmp= get_bits1(&h->gb)^1;
884                     }else{
885                         tmp= get_ue_golomb_31(&h->gb);
886                         if(tmp>=ref_count){
887                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
888                             return -1;
889                         }
890                     }
891                     ref[list][i]= tmp;
892                 }else{
893                  //FIXME
894                     ref[list][i] = -1;
895                 }
896             }
897         }
898 
899         if(dct8x8_allowed)
900             dct8x8_allowed = get_dct8x8_allowed(h);
901 
902         for(list=0; list<h->list_count; list++){
903             for(i=0; i<4; i++){
904                 if(IS_DIRECT(h->sub_mb_type[i])) {
905                     h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
906                     continue;
907                 }
908                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
909                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
910 
911                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
912                     const int sub_mb_type= h->sub_mb_type[i];
913                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
914                     for(j=0; j<sub_partition_count[i]; j++){
915                         int mx, my;
916                         const int index= 4*i + block_width*j;
917                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
918                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
919                         mx += get_se_golomb(&h->gb);
920                         my += get_se_golomb(&h->gb);
921                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
922 
923                         if(IS_SUB_8X8(sub_mb_type)){
924                             mv_cache[ 1 ][0]=
925                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
926                             mv_cache[ 1 ][1]=
927                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
928                         }else if(IS_SUB_8X4(sub_mb_type)){
929                             mv_cache[ 1 ][0]= mx;
930                             mv_cache[ 1 ][1]= my;
931                         }else if(IS_SUB_4X8(sub_mb_type)){
932                             mv_cache[ 8 ][0]= mx;
933                             mv_cache[ 8 ][1]= my;
934                         }
935                         mv_cache[ 0 ][0]= mx;
936                         mv_cache[ 0 ][1]= my;
937                     }
938                 }else{
939                     uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
940                     p[0] = p[1]=
941                     p[8] = p[9]= 0;
942                 }
943             }
944         }
945     }else if(IS_DIRECT(mb_type)){
946         ff_h264_pred_direct_motion(h, &mb_type);
947         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
948     }else{
949         int list, mx, my, i;
950          //FIXME we should set ref_idx_l? to 0 if we use that later ...
951         if(IS_16X16(mb_type)){
952             for(list=0; list<h->list_count; list++){
953                     unsigned int val;
954                     if(IS_DIR(mb_type, 0, list)){
955                         if(local_ref_count[list]==1){
956                             val= 0;
957                         } else if(local_ref_count[list]==2){
958                             val= get_bits1(&h->gb)^1;
959                         }else{
960                             val= get_ue_golomb_31(&h->gb);
961                             if (val >= local_ref_count[list]){
962                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
963                                 return -1;
964                             }
965                         }
966                     fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
967                     }
968             }
969             for(list=0; list<h->list_count; list++){
970                 if(IS_DIR(mb_type, 0, list)){
971                     pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
972                     mx += get_se_golomb(&h->gb);
973                     my += get_se_golomb(&h->gb);
974                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
975 
976                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
977                 }
978             }
979         }
980         else if(IS_16X8(mb_type)){
981             for(list=0; list<h->list_count; list++){
982                     for(i=0; i<2; i++){
983                         unsigned int val;
984                         if(IS_DIR(mb_type, i, list)){
985                             if(local_ref_count[list] == 1) {
986                                 val= 0;
987                             } else if(local_ref_count[list] == 2) {
988                                 val= get_bits1(&h->gb)^1;
989                             }else{
990                                 val= get_ue_golomb_31(&h->gb);
991                                 if (val >= local_ref_count[list]){
992                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
993                                     return -1;
994                                 }
995                             }
996                         }else
997                             val= LIST_NOT_USED&0xFF;
998                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
999                     }
1000             }
1001             for(list=0; list<h->list_count; list++){
1002                 for(i=0; i<2; i++){
1003                     unsigned int val;
1004                     if(IS_DIR(mb_type, i, list)){
1005                         pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1006                         mx += get_se_golomb(&h->gb);
1007                         my += get_se_golomb(&h->gb);
1008                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1009 
1010                         val= pack16to32(mx,my);
1011                     }else
1012                         val=0;
1013                     fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1014                 }
1015             }
1016         }else{
1017             av_assert2(IS_8X16(mb_type));
1018             for(list=0; list<h->list_count; list++){
1019                     for(i=0; i<2; i++){
1020                         unsigned int val;
1021                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1022                             if(local_ref_count[list]==1){
1023                                 val= 0;
1024                             } else if(local_ref_count[list]==2){
1025                                 val= get_bits1(&h->gb)^1;
1026                             }else{
1027                                 val= get_ue_golomb_31(&h->gb);
1028                                 if (val >= local_ref_count[list]){
1029                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1030                                     return -1;
1031                                 }
1032                             }
1033                         }else
1034                             val= LIST_NOT_USED&0xFF;
1035                         fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1036                     }
1037             }
1038             for(list=0; list<h->list_count; list++){
1039                 for(i=0; i<2; i++){
1040                     unsigned int val;
1041                     if(IS_DIR(mb_type, i, list)){
1042                         pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1043                         mx += get_se_golomb(&h->gb);
1044                         my += get_se_golomb(&h->gb);
1045                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
1046 
1047                         val= pack16to32(mx,my);
1048                     }else
1049                         val=0;
1050                     fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1051                 }
1052             }
1053         }
1054     }
1055 
1056     if(IS_INTER(mb_type))
1057         write_back_motion(h, mb_type);
1058 
1059     if(!IS_INTRA16x16(mb_type)){
1060         cbp= get_ue_golomb(&h->gb);
1061 
1062         if(decode_chroma){
1063             if(cbp > 47){
1064                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1065                 return -1;
1066             }
1067             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1068             else                     cbp= golomb_to_inter_cbp   [cbp];
1069         }else{
1070             if(cbp > 15){
1071                 av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
1072                 return -1;
1073             }
1074             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1075             else                     cbp= golomb_to_inter_cbp_gray[cbp];
1076         }
1077     } else {
1078         if (!decode_chroma && cbp>15) {
1079             av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1080             return AVERROR_INVALIDDATA;
1081         }
1082     }
1083 
1084     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1085         mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
1086     }
1087     h->cbp=
1088     h->cbp_table[mb_xy]= cbp;
1089     h->cur_pic.mb_type[mb_xy] = mb_type;
1090 
1091     if(cbp || IS_INTRA16x16(mb_type)){
1092         int i4x4, i8x8, chroma_idx;
1093         int dquant;
1094         int ret;
1095         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1096         const uint8_t *scan, *scan8x8;
1097         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1098 
1099         if(IS_INTERLACED(mb_type)){
1100             scan8x8= h->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1101             scan= h->qscale ? h->field_scan : h->field_scan_q0;
1102         }else{
1103             scan8x8= h->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1104             scan= h->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1105         }
1106 
1107         dquant= get_se_golomb(&h->gb);
1108 
1109         h->qscale += dquant;
1110 
1111         if(((unsigned)h->qscale) > max_qp){
1112             if(h->qscale<0) h->qscale+= max_qp+1;
1113             else            h->qscale-= max_qp+1;
1114             if(((unsigned)h->qscale) > max_qp){
1115                 av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
1116                 return -1;
1117             }
1118         }
1119 
1120         h->chroma_qp[0]= get_chroma_qp(h, 0, h->qscale);
1121         h->chroma_qp[1]= get_chroma_qp(h, 1, h->qscale);
1122 
1123         if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1124             return -1;
1125         }
1126         h->cbp_table[mb_xy] |= ret << 12;
1127         if (CHROMA444(h)) {
1128             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1129                 return -1;
1130             }
1131             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1132                 return -1;
1133             }
1134         } else {
1135             const int num_c8x8 = h->sps.chroma_format_idc;
1136 
1137             if(cbp&0x30){
1138                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1139                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1140                                         CHROMA_DC_BLOCK_INDEX+chroma_idx,
1141                                         CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
1142                                         NULL, 4*num_c8x8) < 0) {
1143                         return -1;
1144                     }
1145             }
1146 
1147             if(cbp&0x20){
1148                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1149                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1150                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1151                     for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1152                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
1153                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1154                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1155                                 return -1;
1156                             mb += 16 << pixel_shift;
1157                         }
1158                     }
1159                 }
1160             }else{
1161                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1162                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1163             }
1164         }
1165     }else{
1166         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1167         fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1168         fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1169     }
1170     h->cur_pic.qscale_table[mb_xy] = h->qscale;
1171     write_back_non_zero_count(h);
1172 
1173     return 0;
1174 }
1175