1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp9_rtcd.h"
12 #include "./vpx_config.h"
13 #include "./vpx_dsp_rtcd.h"
14 
15 #include "vpx_dsp/quantize.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_ports/mem.h"
18 
19 #if CONFIG_MISMATCH_DEBUG
20 #include "vpx_util/vpx_debug_util.h"
21 #endif
22 
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_scan.h"
27 
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_rd.h"
30 #include "vp9/encoder/vp9_tokenize.h"
31 
32 struct optimize_ctx {
33   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
34   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
35 };
36 
vp9_subtract_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)37 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
38   struct macroblock_plane *const p = &x->plane[plane];
39   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
40   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
41   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
42   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
43 
44 #if CONFIG_VP9_HIGHBITDEPTH
45   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
46     vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
47                               p->src.stride, pd->dst.buf, pd->dst.stride,
48                               x->e_mbd.bd);
49     return;
50   }
51 #endif  // CONFIG_VP9_HIGHBITDEPTH
52   vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
53                      pd->dst.buf, pd->dst.stride);
54 }
55 
56 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
57   { 10, 6 },
58   { 8, 5 },
59 };
60 
61 // 'num' can be negative, but 'shift' must be non-negative.
62 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
63   (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
64 
vp9_optimize_b(MACROBLOCK * mb,int plane,int block,TX_SIZE tx_size,int ctx)65 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
66                    int ctx) {
67   MACROBLOCKD *const xd = &mb->e_mbd;
68   struct macroblock_plane *const p = &mb->plane[plane];
69   struct macroblockd_plane *const pd = &xd->plane[plane];
70   const int ref = is_inter_block(xd->mi[0]);
71   uint8_t token_cache[1024];
72   const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
73   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
74   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
75   const int eob = p->eobs[block];
76   const PLANE_TYPE plane_type = get_plane_type(plane);
77   const int default_eob = 16 << (tx_size << 1);
78   const int shift = (tx_size == TX_32X32);
79   const int16_t *const dequant_ptr = pd->dequant;
80   const uint8_t *const band_translate = get_band_translate(tx_size);
81   const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
82   const int16_t *const scan = so->scan;
83   const int16_t *const nb = so->neighbors;
84   const MODE_INFO *mbmi = xd->mi[0];
85   const int sharpness = mb->sharpness;
86   const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
87   const int64_t rdmult =
88       (sharpness == 0 ? rdadj >> 1
89                       : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
90 
91   const int64_t rddiv = mb->rddiv;
92   int64_t rd_cost0, rd_cost1;
93   int64_t rate0, rate1;
94   int16_t t0, t1;
95   int i, final_eob;
96   int count_high_values_after_eob = 0;
97 #if CONFIG_VP9_HIGHBITDEPTH
98   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
99 #else
100   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
101 #endif
102   unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
103       mb->token_costs[tx_size][plane_type][ref];
104   unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
105   int64_t eob_cost0, eob_cost1;
106   const int ctx0 = ctx;
107   int64_t accu_rate = 0;
108   // Initialized to the worst possible error for the largest transform size.
109   // This ensures that it never goes negative.
110   int64_t accu_error = ((int64_t)1) << 50;
111   int64_t best_block_rd_cost = INT64_MAX;
112   int x_prev = 1;
113   tran_low_t before_best_eob_qc = 0;
114   tran_low_t before_best_eob_dqc = 0;
115 
116   assert((!plane_type && !plane) || (plane_type && plane));
117   assert(eob <= default_eob);
118 
119   for (i = 0; i < eob; i++) {
120     const int rc = scan[i];
121     token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
122   }
123   final_eob = 0;
124 
125   // Initial RD cost.
126   token_costs_cur = token_costs + band_translate[0];
127   rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
128   best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
129 
130   // For each token, pick one of two choices greedily:
131   // (i) First candidate: Keep current quantized value, OR
132   // (ii) Second candidate: Reduce quantized value by 1.
133   for (i = 0; i < eob; i++) {
134     const int rc = scan[i];
135     const int x = qcoeff[rc];
136     const int band_cur = band_translate[i];
137     const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
138     const int token_tree_sel_cur = (x_prev == 0);
139     token_costs_cur = token_costs + band_cur;
140     if (x == 0) {  // No need to search
141       const int token = vp9_get_token(x);
142       rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
143       accu_rate += rate0;
144       x_prev = 0;
145       // Note: accu_error does not change.
146     } else {
147       const int dqv = dequant_ptr[rc != 0];
148       // Compute the distortion for quantizing to 0.
149       const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
150       const int diff_for_zero =
151 #if CONFIG_VP9_HIGHBITDEPTH
152           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
153               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
154               :
155 #endif
156               diff_for_zero_raw;
157       const int64_t distortion_for_zero =
158           (int64_t)diff_for_zero * diff_for_zero;
159 
160       // Compute the distortion for the first candidate
161       const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
162       const int diff0 =
163 #if CONFIG_VP9_HIGHBITDEPTH
164           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
165               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
166               :
167 #endif  // CONFIG_VP9_HIGHBITDEPTH
168               diff0_raw;
169       const int64_t distortion0 = (int64_t)diff0 * diff0;
170 
171       // Compute the distortion for the second candidate
172       const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
173       const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
174       int64_t distortion1;
175       if (x1 != 0) {
176         const int dqv_step =
177 #if CONFIG_VP9_HIGHBITDEPTH
178             (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
179                                                           :
180 #endif  // CONFIG_VP9_HIGHBITDEPTH
181                                                           dqv;
182         const int diff_step = (dqv_step + sign) ^ sign;
183         const int diff1 = diff0 - diff_step;
184         assert(dqv > 0);  // We aren't right shifting a negative number above.
185         distortion1 = (int64_t)diff1 * diff1;
186       } else {
187         distortion1 = distortion_for_zero;
188       }
189       {
190         // Calculate RDCost for current coeff for the two candidates.
191         const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
192         const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
193         rate0 =
194             base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
195         rate1 =
196             base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
197       }
198       {
199         int rdcost_better_for_x1, eob_rdcost_better_for_x1;
200         int dqc0, dqc1;
201         int64_t best_eob_cost_cur;
202         int use_x1;
203 
204         // Calculate RD Cost effect on the next coeff for the two candidates.
205         int64_t next_bits0 = 0;
206         int64_t next_bits1 = 0;
207         int64_t next_eob_bits0 = 0;
208         int64_t next_eob_bits1 = 0;
209         if (i < default_eob - 1) {
210           int ctx_next, token_tree_sel_next;
211           const int band_next = band_translate[i + 1];
212           const int token_next =
213               (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
214           unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
215                                                [ENTROPY_TOKENS] =
216                                                    token_costs + band_next;
217           token_cache[rc] = vp9_pt_energy_class[t0];
218           ctx_next = get_coef_context(nb, token_cache, i + 1);
219           token_tree_sel_next = (x == 0);
220           next_bits0 =
221               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
222           next_eob_bits0 =
223               (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
224           token_cache[rc] = vp9_pt_energy_class[t1];
225           ctx_next = get_coef_context(nb, token_cache, i + 1);
226           token_tree_sel_next = (x1 == 0);
227           next_bits1 =
228               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
229           if (x1 != 0) {
230             next_eob_bits1 =
231                 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
232           }
233         }
234 
235         // Compare the total RD costs for two candidates.
236         rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
237         rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
238         rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
239         eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
240                            (accu_error + distortion0 - distortion_for_zero));
241         eob_cost1 = eob_cost0;
242         if (x1 != 0) {
243           eob_cost1 =
244               RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
245                      (accu_error + distortion1 - distortion_for_zero));
246           eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
247         } else {
248           eob_rdcost_better_for_x1 = 0;
249         }
250 
251         // Calculate the two candidate de-quantized values.
252         dqc0 = dqcoeff[rc];
253         dqc1 = 0;
254         if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
255           if (x1 != 0) {
256             dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
257           } else {
258             dqc1 = 0;
259           }
260         }
261 
262         // Pick and record the better quantized and de-quantized values.
263         if (rdcost_better_for_x1) {
264           qcoeff[rc] = x1;
265           dqcoeff[rc] = dqc1;
266           accu_rate += rate1;
267           accu_error += distortion1 - distortion_for_zero;
268           assert(distortion1 <= distortion_for_zero);
269           token_cache[rc] = vp9_pt_energy_class[t1];
270         } else {
271           accu_rate += rate0;
272           accu_error += distortion0 - distortion_for_zero;
273           assert(distortion0 <= distortion_for_zero);
274           token_cache[rc] = vp9_pt_energy_class[t0];
275         }
276         if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
277         assert(accu_error >= 0);
278         x_prev = qcoeff[rc];  // Update based on selected quantized value.
279 
280         use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
281         best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
282 
283         // Determine whether to move the eob position to i+1
284         if (best_eob_cost_cur < best_block_rd_cost) {
285           best_block_rd_cost = best_eob_cost_cur;
286           final_eob = i + 1;
287           count_high_values_after_eob = 0;
288           if (use_x1) {
289             before_best_eob_qc = x1;
290             before_best_eob_dqc = dqc1;
291           } else {
292             before_best_eob_qc = x;
293             before_best_eob_dqc = dqc0;
294           }
295         }
296       }
297     }
298   }
299   if (count_high_values_after_eob > 0) {
300     final_eob = eob - 1;
301     for (; final_eob >= 0; final_eob--) {
302       const int rc = scan[final_eob];
303       const int x = qcoeff[rc];
304       if (x) {
305         break;
306       }
307     }
308     final_eob++;
309   } else {
310     assert(final_eob <= eob);
311     if (final_eob > 0) {
312       int rc;
313       assert(before_best_eob_qc != 0);
314       i = final_eob - 1;
315       rc = scan[i];
316       qcoeff[rc] = before_best_eob_qc;
317       dqcoeff[rc] = before_best_eob_dqc;
318     }
319     for (i = final_eob; i < eob; i++) {
320       int rc = scan[i];
321       qcoeff[rc] = 0;
322       dqcoeff[rc] = 0;
323     }
324   }
325   mb->plane[plane].eobs[block] = final_eob;
326   return final_eob;
327 }
328 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
329 
fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)330 static INLINE void fdct32x32(int rd_transform, const int16_t *src,
331                              tran_low_t *dst, int src_stride) {
332   if (rd_transform)
333     vpx_fdct32x32_rd(src, dst, src_stride);
334   else
335     vpx_fdct32x32(src, dst, src_stride);
336 }
337 
338 #if CONFIG_VP9_HIGHBITDEPTH
highbd_fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)339 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
340                                     tran_low_t *dst, int src_stride) {
341   if (rd_transform)
342     vpx_highbd_fdct32x32_rd(src, dst, src_stride);
343   else
344     vpx_highbd_fdct32x32(src, dst, src_stride);
345 }
346 #endif  // CONFIG_VP9_HIGHBITDEPTH
347 
vp9_xform_quant_fp(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)348 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
349                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
350   MACROBLOCKD *const xd = &x->e_mbd;
351   const struct macroblock_plane *const p = &x->plane[plane];
352   const struct macroblockd_plane *const pd = &xd->plane[plane];
353   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
354   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
355   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
356   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
357   uint16_t *const eob = &p->eobs[block];
358   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
359   const int16_t *src_diff;
360   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
361   // skip block condition should be handled before this is called.
362   assert(!x->skip_block);
363 
364 #if CONFIG_VP9_HIGHBITDEPTH
365   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
366     switch (tx_size) {
367       case TX_32X32:
368         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
369         vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
370                                      p->quant_fp, qcoeff, dqcoeff, pd->dequant,
371                                      eob, scan_order->scan, scan_order->iscan);
372         break;
373       case TX_16X16:
374         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375         vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp,
376                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
377                                scan_order->scan, scan_order->iscan);
378         break;
379       case TX_8X8:
380         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
381         vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp,
382                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
383                                scan_order->scan, scan_order->iscan);
384         break;
385       default:
386         assert(tx_size == TX_4X4);
387         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
388         vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
389                                p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
390                                scan_order->scan, scan_order->iscan);
391         break;
392     }
393     return;
394   }
395 #endif  // CONFIG_VP9_HIGHBITDEPTH
396 
397   switch (tx_size) {
398     case TX_32X32:
399       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
400       vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
401                             p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
402                             scan_order->scan, scan_order->iscan);
403       break;
404     case TX_16X16:
405       vpx_fdct16x16(src_diff, coeff, diff_stride);
406       vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp,
407                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
408                       scan_order->iscan);
409       break;
410     case TX_8X8:
411       vpx_fdct8x8(src_diff, coeff, diff_stride);
412       vp9_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp,
413                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
414                       scan_order->iscan);
415 
416       break;
417     default:
418       assert(tx_size == TX_4X4);
419       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
420       vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
421                       qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
422                       scan_order->iscan);
423       break;
424   }
425 }
426 
vp9_xform_quant_dc(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)427 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
428                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
429   MACROBLOCKD *const xd = &x->e_mbd;
430   const struct macroblock_plane *const p = &x->plane[plane];
431   const struct macroblockd_plane *const pd = &xd->plane[plane];
432   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
433   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
434   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
435   uint16_t *const eob = &p->eobs[block];
436   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
437   const int16_t *src_diff;
438   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
439   // skip block condition should be handled before this is called.
440   assert(!x->skip_block);
441 
442 #if CONFIG_VP9_HIGHBITDEPTH
443   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
444     switch (tx_size) {
445       case TX_32X32:
446         vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
447         vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
448                                      p->quant_fp[0], qcoeff, dqcoeff,
449                                      pd->dequant[0], eob);
450         break;
451       case TX_16X16:
452         vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
453         vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
454                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
455                                eob);
456         break;
457       case TX_8X8:
458         vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
459         vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
460                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
461                                eob);
462         break;
463       default:
464         assert(tx_size == TX_4X4);
465         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
466         vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
467                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
468                                eob);
469         break;
470     }
471     return;
472   }
473 #endif  // CONFIG_VP9_HIGHBITDEPTH
474 
475   switch (tx_size) {
476     case TX_32X32:
477       vpx_fdct32x32_1(src_diff, coeff, diff_stride);
478       vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
479                             qcoeff, dqcoeff, pd->dequant[0], eob);
480       break;
481     case TX_16X16:
482       vpx_fdct16x16_1(src_diff, coeff, diff_stride);
483       vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
484                       qcoeff, dqcoeff, pd->dequant[0], eob);
485       break;
486     case TX_8X8:
487       vpx_fdct8x8_1(src_diff, coeff, diff_stride);
488       vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
489                       qcoeff, dqcoeff, pd->dequant[0], eob);
490       break;
491     default:
492       assert(tx_size == TX_4X4);
493       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
494       vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
495                       qcoeff, dqcoeff, pd->dequant[0], eob);
496       break;
497   }
498 }
499 
vp9_xform_quant(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)500 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
501                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
502   MACROBLOCKD *const xd = &x->e_mbd;
503   const struct macroblock_plane *const p = &x->plane[plane];
504   const struct macroblockd_plane *const pd = &xd->plane[plane];
505   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
506   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
507   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
508   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
509   uint16_t *const eob = &p->eobs[block];
510   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
511   const int16_t *src_diff;
512   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
513   // skip block condition should be handled before this is called.
514   assert(!x->skip_block);
515 
516 #if CONFIG_VP9_HIGHBITDEPTH
517   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
518     switch (tx_size) {
519       case TX_32X32:
520         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
521         vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
522                                     p->round, p->quant, p->quant_shift, qcoeff,
523                                     dqcoeff, pd->dequant, eob, scan_order->scan,
524                                     scan_order->iscan);
525         break;
526       case TX_16X16:
527         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
528         vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
529                               p->quant, p->quant_shift, qcoeff, dqcoeff,
530                               pd->dequant, eob, scan_order->scan,
531                               scan_order->iscan);
532         break;
533       case TX_8X8:
534         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
535         vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
536                               p->quant, p->quant_shift, qcoeff, dqcoeff,
537                               pd->dequant, eob, scan_order->scan,
538                               scan_order->iscan);
539         break;
540       default:
541         assert(tx_size == TX_4X4);
542         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
543         vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
544                               p->quant, p->quant_shift, qcoeff, dqcoeff,
545                               pd->dequant, eob, scan_order->scan,
546                               scan_order->iscan);
547         break;
548     }
549     return;
550   }
551 #endif  // CONFIG_VP9_HIGHBITDEPTH
552 
553   switch (tx_size) {
554     case TX_32X32:
555       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
556       vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
557                            p->quant, p->quant_shift, qcoeff, dqcoeff,
558                            pd->dequant, eob, scan_order->scan,
559                            scan_order->iscan);
560       break;
561     case TX_16X16:
562       vpx_fdct16x16(src_diff, coeff, diff_stride);
563       vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
564                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
565                      scan_order->scan, scan_order->iscan);
566       break;
567     case TX_8X8:
568       vpx_fdct8x8(src_diff, coeff, diff_stride);
569       vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
570                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
571                      scan_order->scan, scan_order->iscan);
572       break;
573     default:
574       assert(tx_size == TX_4X4);
575       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
576       vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
577                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
578                      scan_order->scan, scan_order->iscan);
579       break;
580   }
581 }
582 
encode_block(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)583 static void encode_block(int plane, int block, int row, int col,
584                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
585   struct encode_b_args *const args = arg;
586 #if CONFIG_MISMATCH_DEBUG
587   int mi_row = args->mi_row;
588   int mi_col = args->mi_col;
589   int output_enabled = args->output_enabled;
590 #endif
591   MACROBLOCK *const x = args->x;
592   MACROBLOCKD *const xd = &x->e_mbd;
593   struct macroblock_plane *const p = &x->plane[plane];
594   struct macroblockd_plane *const pd = &xd->plane[plane];
595   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
596   uint8_t *dst;
597   ENTROPY_CONTEXT *a, *l;
598   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
599   a = &args->ta[col];
600   l = &args->tl[row];
601 
602   // TODO(jingning): per transformed block zero forcing only enabled for
603   // luma component. will integrate chroma components as well.
604   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
605     p->eobs[block] = 0;
606     *a = *l = 0;
607 #if CONFIG_MISMATCH_DEBUG
608     goto encode_block_end;
609 #else
610     return;
611 #endif
612   }
613 
614   if (!x->skip_recode) {
615     if (x->quant_fp) {
616       // Encoding process for rtc mode
617       if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
618         // skip forward transform
619         p->eobs[block] = 0;
620         *a = *l = 0;
621 #if CONFIG_MISMATCH_DEBUG
622         goto encode_block_end;
623 #else
624         return;
625 #endif
626       } else {
627         vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
628       }
629     } else {
630       if (max_txsize_lookup[plane_bsize] == tx_size) {
631         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
632         if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
633           // full forward transform and quantization
634           vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
635         } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
636           // fast path forward transform and quantization
637           vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
638         } else {
639           // skip forward transform
640           p->eobs[block] = 0;
641           *a = *l = 0;
642 #if CONFIG_MISMATCH_DEBUG
643           goto encode_block_end;
644 #else
645           return;
646 #endif
647         }
648       } else {
649         vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
650       }
651     }
652   }
653 
654   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
655     const int ctx = combine_entropy_contexts(*a, *l);
656     *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
657   } else {
658     *a = *l = p->eobs[block] > 0;
659   }
660 
661   if (p->eobs[block]) *(args->skip) = 0;
662 
663   if (x->skip_encode || p->eobs[block] == 0) {
664 #if CONFIG_MISMATCH_DEBUG
665     goto encode_block_end;
666 #else
667     return;
668 #endif
669   }
670 #if CONFIG_VP9_HIGHBITDEPTH
671   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
672     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
673     switch (tx_size) {
674       case TX_32X32:
675         vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
676                                  xd->bd);
677         break;
678       case TX_16X16:
679         vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
680                                  xd->bd);
681         break;
682       case TX_8X8:
683         vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
684                                xd->bd);
685         break;
686       default:
687         assert(tx_size == TX_4X4);
688         // this is like vp9_short_idct4x4 but has a special case around eob<=1
689         // which is significant (not just an optimization) for the lossless
690         // case.
691         x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
692                                xd->bd);
693         break;
694     }
695 #if CONFIG_MISMATCH_DEBUG
696     goto encode_block_end;
697 #else
698     return;
699 #endif
700   }
701 #endif  // CONFIG_VP9_HIGHBITDEPTH
702 
703   switch (tx_size) {
704     case TX_32X32:
705       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
706       break;
707     case TX_16X16:
708       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
709       break;
710     case TX_8X8:
711       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
712       break;
713     default:
714       assert(tx_size == TX_4X4);
715       // this is like vp9_short_idct4x4 but has a special case around eob<=1
716       // which is significant (not just an optimization) for the lossless
717       // case.
718       x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
719       break;
720   }
721 #if CONFIG_MISMATCH_DEBUG
722 encode_block_end:
723   if (output_enabled) {
724     int pixel_c, pixel_r;
725     int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
726     int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
727     mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
728                     pd->subsampling_x, pd->subsampling_y);
729     mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
730                              blk_w, blk_h,
731                              xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
732   }
733 #endif
734 }
735 
encode_block_pass1(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)736 static void encode_block_pass1(int plane, int block, int row, int col,
737                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
738                                void *arg) {
739   MACROBLOCK *const x = (MACROBLOCK *)arg;
740   MACROBLOCKD *const xd = &x->e_mbd;
741   struct macroblock_plane *const p = &x->plane[plane];
742   struct macroblockd_plane *const pd = &xd->plane[plane];
743   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
744   uint8_t *dst;
745   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
746 
747   vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
748 
749   if (p->eobs[block] > 0) {
750 #if CONFIG_VP9_HIGHBITDEPTH
751     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
752       x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
753                              p->eobs[block], xd->bd);
754       return;
755     }
756 #endif  // CONFIG_VP9_HIGHBITDEPTH
757     x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
758   }
759 }
760 
vp9_encode_sby_pass1(MACROBLOCK * x,BLOCK_SIZE bsize)761 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
762   vp9_subtract_plane(x, bsize, 0);
763   vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
764                                          encode_block_pass1, x);
765 }
766 
vp9_encode_sb(MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int output_enabled)767 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
768                    int output_enabled) {
769   MACROBLOCKD *const xd = &x->e_mbd;
770   struct optimize_ctx ctx;
771   MODE_INFO *mi = xd->mi[0];
772   int plane;
773 #if CONFIG_MISMATCH_DEBUG
774   struct encode_b_args arg = { x,         1,      NULL,   NULL,
775                                &mi->skip, mi_row, mi_col, output_enabled };
776 #else
777   struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
778   (void)mi_row;
779   (void)mi_col;
780   (void)output_enabled;
781 #endif
782 
783   mi->skip = 1;
784 
785   if (x->skip) return;
786 
787   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
788     if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
789 
790     if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
791       const struct macroblockd_plane *const pd = &xd->plane[plane];
792       const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
793       vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
794                                ctx.tl[plane]);
795       arg.enable_coeff_opt = 1;
796     } else {
797       arg.enable_coeff_opt = 0;
798     }
799     arg.ta = ctx.ta[plane];
800     arg.tl = ctx.tl[plane];
801 
802     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
803                                            &arg);
804   }
805 }
806 
vp9_encode_block_intra(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)807 void vp9_encode_block_intra(int plane, int block, int row, int col,
808                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
809                             void *arg) {
810   struct encode_b_args *const args = arg;
811   MACROBLOCK *const x = args->x;
812   MACROBLOCKD *const xd = &x->e_mbd;
813   MODE_INFO *mi = xd->mi[0];
814   struct macroblock_plane *const p = &x->plane[plane];
815   struct macroblockd_plane *const pd = &xd->plane[plane];
816   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
817   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
818   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
819   const scan_order *scan_order;
820   TX_TYPE tx_type = DCT_DCT;
821   PREDICTION_MODE mode;
822   const int bwl = b_width_log2_lookup[plane_bsize];
823   const int diff_stride = 4 * (1 << bwl);
824   uint8_t *src, *dst;
825   int16_t *src_diff;
826   uint16_t *eob = &p->eobs[block];
827   const int src_stride = p->src.stride;
828   const int dst_stride = pd->dst.stride;
829   ENTROPY_CONTEXT *a = NULL;
830   ENTROPY_CONTEXT *l = NULL;
831   int entropy_ctx = 0;
832   dst = &pd->dst.buf[4 * (row * dst_stride + col)];
833   src = &p->src.buf[4 * (row * src_stride + col)];
834   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
835   if (args->enable_coeff_opt) {
836     a = &args->ta[col];
837     l = &args->tl[row];
838     entropy_ctx = combine_entropy_contexts(*a, *l);
839   }
840 
841   if (tx_size == TX_4X4) {
842     tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
843     scan_order = &vp9_scan_orders[TX_4X4][tx_type];
844     mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
845   } else {
846     mode = plane == 0 ? mi->mode : mi->uv_mode;
847     if (tx_size == TX_32X32) {
848       scan_order = &vp9_default_scan_orders[TX_32X32];
849     } else {
850       tx_type = get_tx_type(get_plane_type(plane), xd);
851       scan_order = &vp9_scan_orders[tx_size][tx_type];
852     }
853   }
854 
855   vp9_predict_intra_block(
856       xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
857       (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
858       dst_stride, col, row, plane);
859 
860   // skip block condition should be handled before this is called.
861   assert(!x->skip_block);
862 
863 #if CONFIG_VP9_HIGHBITDEPTH
864   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
865     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
866     switch (tx_size) {
867       case TX_32X32:
868         if (!x->skip_recode) {
869           vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
870                                     src_stride, dst, dst_stride, xd->bd);
871           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
872           vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
873                                       p->round, p->quant, p->quant_shift,
874                                       qcoeff, dqcoeff, pd->dequant, eob,
875                                       scan_order->scan, scan_order->iscan);
876         }
877         if (args->enable_coeff_opt && !x->skip_recode) {
878           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
879         }
880         if (!x->skip_encode && *eob) {
881           vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
882         }
883         break;
884       case TX_16X16:
885         if (!x->skip_recode) {
886           vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
887                                     src_stride, dst, dst_stride, xd->bd);
888           if (tx_type == DCT_DCT)
889             vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
890           else
891             vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
892           vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
893                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
894                                 pd->dequant, eob, scan_order->scan,
895                                 scan_order->iscan);
896         }
897         if (args->enable_coeff_opt && !x->skip_recode) {
898           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
899         }
900         if (!x->skip_encode && *eob) {
901           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
902                                   xd->bd);
903         }
904         break;
905       case TX_8X8:
906         if (!x->skip_recode) {
907           vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
908                                     src_stride, dst, dst_stride, xd->bd);
909           if (tx_type == DCT_DCT)
910             vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
911           else
912             vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
913           vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
914                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
915                                 pd->dequant, eob, scan_order->scan,
916                                 scan_order->iscan);
917         }
918         if (args->enable_coeff_opt && !x->skip_recode) {
919           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
920         }
921         if (!x->skip_encode && *eob) {
922           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
923                                 xd->bd);
924         }
925         break;
926       default:
927         assert(tx_size == TX_4X4);
928         if (!x->skip_recode) {
929           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
930                                     src_stride, dst, dst_stride, xd->bd);
931           if (tx_type != DCT_DCT)
932             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
933           else
934             x->fwd_txfm4x4(src_diff, coeff, diff_stride);
935           vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
936                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
937                                 pd->dequant, eob, scan_order->scan,
938                                 scan_order->iscan);
939         }
940         if (args->enable_coeff_opt && !x->skip_recode) {
941           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
942         }
943         if (!x->skip_encode && *eob) {
944           if (tx_type == DCT_DCT) {
945             // this is like vp9_short_idct4x4 but has a special case around
946             // eob<=1 which is significant (not just an optimization) for the
947             // lossless case.
948             x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
949           } else {
950             vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
951                                      xd->bd);
952           }
953         }
954         break;
955     }
956     if (*eob) *(args->skip) = 0;
957     return;
958   }
959 #endif  // CONFIG_VP9_HIGHBITDEPTH
960 
961   switch (tx_size) {
962     case TX_32X32:
963       if (!x->skip_recode) {
964         vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
965                            dst_stride);
966         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
967         vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
968                              p->quant, p->quant_shift, qcoeff, dqcoeff,
969                              pd->dequant, eob, scan_order->scan,
970                              scan_order->iscan);
971       }
972       if (args->enable_coeff_opt && !x->skip_recode) {
973         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
974       }
975       if (!x->skip_encode && *eob)
976         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
977       break;
978     case TX_16X16:
979       if (!x->skip_recode) {
980         vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
981                            dst_stride);
982         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
983         vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
984                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
985                        scan_order->scan, scan_order->iscan);
986       }
987       if (args->enable_coeff_opt && !x->skip_recode) {
988         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
989       }
990       if (!x->skip_encode && *eob)
991         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
992       break;
993     case TX_8X8:
994       if (!x->skip_recode) {
995         vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
996                            dst_stride);
997         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
998         vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
999                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
1000                        scan_order->scan, scan_order->iscan);
1001       }
1002       if (args->enable_coeff_opt && !x->skip_recode) {
1003         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004       }
1005       if (!x->skip_encode && *eob)
1006         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1007       break;
1008     default:
1009       assert(tx_size == TX_4X4);
1010       if (!x->skip_recode) {
1011         vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
1012                            dst_stride);
1013         if (tx_type != DCT_DCT)
1014           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1015         else
1016           x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1017         vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
1018                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
1019                        scan_order->scan, scan_order->iscan);
1020       }
1021       if (args->enable_coeff_opt && !x->skip_recode) {
1022         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1023       }
1024       if (!x->skip_encode && *eob) {
1025         if (tx_type == DCT_DCT)
1026           // this is like vp9_short_idct4x4 but has a special case around eob<=1
1027           // which is significant (not just an optimization) for the lossless
1028           // case.
1029           x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1030         else
1031           vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1032       }
1033       break;
1034   }
1035   if (*eob) *(args->skip) = 0;
1036 }
1037 
vp9_encode_intra_block_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane,int enable_optimize_b)1038 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1039                                   int enable_optimize_b) {
1040   const MACROBLOCKD *const xd = &x->e_mbd;
1041   struct optimize_ctx ctx;
1042 #if CONFIG_MISMATCH_DEBUG
1043   // TODO(angiebird): make mismatch_debug support intra mode
1044   struct encode_b_args arg = {
1045     x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0,
1046     0
1047   };
1048 #else
1049   struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
1050                                ctx.tl[plane], &xd->mi[0]->skip };
1051 #endif
1052 
1053   if (enable_optimize_b && x->optimize &&
1054       (!x->skip_recode || !x->skip_optimize)) {
1055     const struct macroblockd_plane *const pd = &xd->plane[plane];
1056     const TX_SIZE tx_size =
1057         plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1058     vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1059   } else {
1060     arg.enable_coeff_opt = 0;
1061   }
1062 
1063   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1064                                          vp9_encode_block_intra, &arg);
1065 }
1066