1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "./aom_dsp_rtcd.h"
15 #include "./av1_rtcd.h"
16 #include "aom_dsp/inv_txfm.h"
17 #include "aom_ports/mem.h"
18 #include "av1/common/av1_inv_txfm1d_cfg.h"
19 #include "av1/common/blockd.h"
20 #include "av1/common/enums.h"
21 #include "av1/common/idct.h"
22 #if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
23 CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
24 #include "av1/common/daala_tx.h"
25 #endif
26
av1_get_tx_scale(const TX_SIZE tx_size)27 int av1_get_tx_scale(const TX_SIZE tx_size) {
28 const int pels = tx_size_2d[tx_size];
29 return (pels > 256) + (pels > 1024) + (pels > 4096);
30 }
31
32 // NOTE: The implementation of all inverses need to be aware of the fact
33 // that input and output could be the same buffer.
34
35 #if CONFIG_EXT_TX
iidtx4_c(const tran_low_t * input,tran_low_t * output)36 static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
37 int i;
38 for (i = 0; i < 4; ++i) {
39 output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
40 }
41 }
42
iidtx8_c(const tran_low_t * input,tran_low_t * output)43 static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
44 int i;
45 for (i = 0; i < 8; ++i) {
46 output[i] = input[i] * 2;
47 }
48 }
49
iidtx16_c(const tran_low_t * input,tran_low_t * output)50 static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
51 int i;
52 for (i = 0; i < 16; ++i) {
53 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
54 }
55 }
56
iidtx32_c(const tran_low_t * input,tran_low_t * output)57 static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
58 int i;
59 for (i = 0; i < 32; ++i) {
60 output[i] = input[i] * 4;
61 }
62 }
63
64 #if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
iidtx64_c(const tran_low_t * input,tran_low_t * output)65 static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
66 int i;
67 for (i = 0; i < 64; ++i) {
68 output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
69 }
70 }
71 #endif // CONFIG_TX64X64
72 #endif // CONFIG_EXT_TX
73
74 // For use in lieu of ADST
ihalfright32_c(const tran_low_t * input,tran_low_t * output)75 static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
76 int i;
77 tran_low_t inputhalf[16];
78 // Multiply input by sqrt(2)
79 for (i = 0; i < 16; ++i) {
80 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
81 }
82 for (i = 0; i < 16; ++i) {
83 output[i] = input[16 + i] * 4;
84 }
85 aom_idct16_c(inputhalf, output + 16);
86 // Note overall scaling factor is 4 times orthogonal
87 }
88
89 #if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
idct64_col_c(const tran_low_t * input,tran_low_t * output)90 static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
91 int32_t in[64], out[64];
92 int i;
93 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
94 av1_idct64_new(in, out, inv_cos_bit_col_dct_64, inv_stage_range_col_dct_64);
95 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
96 }
97
idct64_row_c(const tran_low_t * input,tran_low_t * output)98 static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
99 int32_t in[64], out[64];
100 int i;
101 for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
102 av1_idct64_new(in, out, inv_cos_bit_row_dct_64, inv_stage_range_row_dct_64);
103 for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
104 }
105
106 // For use in lieu of ADST
ihalfright64_c(const tran_low_t * input,tran_low_t * output)107 static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
108 int i;
109 tran_low_t inputhalf[32];
110 // Multiply input by sqrt(2)
111 for (i = 0; i < 32; ++i) {
112 inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
113 }
114 for (i = 0; i < 32; ++i) {
115 output[i] = (tran_low_t)dct_const_round_shift(input[32 + i] * 4 * Sqrt2);
116 }
117 aom_idct32_c(inputhalf, output + 32);
118 // Note overall scaling factor is 4 * sqrt(2) times orthogonal
119 }
120 #endif // CONFIG_TX64X64
121
122 // Inverse identity transform and add.
123 #if CONFIG_EXT_TX
inv_idtx_add_c(const tran_low_t * input,uint8_t * dest,int stride,int bsx,int bsy,TX_TYPE tx_type)124 static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
125 int bsx, int bsy, TX_TYPE tx_type) {
126 int r, c;
127 const int pels = bsx * bsy;
128 const int shift = 3 - ((pels > 256) + (pels > 1024));
129 if (tx_type == IDTX) {
130 for (r = 0; r < bsy; ++r) {
131 for (c = 0; c < bsx; ++c)
132 dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
133 dest += stride;
134 input += bsx;
135 }
136 }
137 }
138 #endif // CONFIG_EXT_TX
139
140 #define FLIPUD_PTR(dest, stride, size) \
141 do { \
142 (dest) = (dest) + ((size)-1) * (stride); \
143 (stride) = -(stride); \
144 } while (0)
145
146 #if CONFIG_EXT_TX
maybe_flip_strides(uint8_t ** dst,int * dstride,tran_low_t ** src,int * sstride,TX_TYPE tx_type,int sizey,int sizex)147 static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
148 int *sstride, TX_TYPE tx_type, int sizey,
149 int sizex) {
150 // Note that the transpose of src will be added to dst. In order to LR
151 // flip the addends (in dst coordinates), we UD flip the src. To UD flip
152 // the addends, we UD flip the dst.
153 switch (tx_type) {
154 case DCT_DCT:
155 case ADST_DCT:
156 case DCT_ADST:
157 case ADST_ADST:
158 case IDTX:
159 case V_DCT:
160 case H_DCT:
161 case V_ADST:
162 case H_ADST: break;
163 case FLIPADST_DCT:
164 case FLIPADST_ADST:
165 case V_FLIPADST:
166 // flip UD
167 FLIPUD_PTR(*dst, *dstride, sizey);
168 break;
169 case DCT_FLIPADST:
170 case ADST_FLIPADST:
171 case H_FLIPADST:
172 // flip LR
173 FLIPUD_PTR(*src, *sstride, sizex);
174 break;
175 case FLIPADST_FLIPADST:
176 // flip UD
177 FLIPUD_PTR(*dst, *dstride, sizey);
178 // flip LR
179 FLIPUD_PTR(*src, *sstride, sizex);
180 break;
181 default: assert(0); break;
182 }
183 }
184 #endif // CONFIG_EXT_TX
185
186 #if CONFIG_HIGHBITDEPTH
187 #if CONFIG_EXT_TX && CONFIG_TX64X64
highbd_inv_idtx_add_c(const tran_low_t * input,uint8_t * dest8,int stride,int bsx,int bsy,TX_TYPE tx_type,int bd)188 static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
189 int stride, int bsx, int bsy, TX_TYPE tx_type,
190 int bd) {
191 int r, c;
192 const int pels = bsx * bsy;
193 const int shift = 3 - ((pels > 256) + (pels > 1024));
194 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
195
196 if (tx_type == IDTX) {
197 for (r = 0; r < bsy; ++r) {
198 for (c = 0; c < bsx; ++c)
199 dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
200 dest += stride;
201 input += bsx;
202 }
203 }
204 }
205 #endif // CONFIG_EXT_TX && CONFIG_TX64X64
206 #endif // CONFIG_HIGHBITDEPTH
207
208 #if CONFIG_LGT || CONFIG_LGT_FROM_PRED
ilgt4(const tran_low_t * input,tran_low_t * output,const tran_high_t * lgtmtx)209 void ilgt4(const tran_low_t *input, tran_low_t *output,
210 const tran_high_t *lgtmtx) {
211 if (!lgtmtx) assert(0);
212 #if CONFIG_LGT_FROM_PRED
213 // For DCT/ADST, use butterfly implementations
214 if (lgtmtx[0] == DCT4) {
215 aom_idct4_c(input, output);
216 return;
217 } else if (lgtmtx[0] == ADST4) {
218 aom_iadst4_c(input, output);
219 return;
220 }
221 #endif // CONFIG_LGT_FROM_PRED
222
223 // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,4
224 tran_high_t s[4] = { 0 };
225 for (int i = 0; i < 4; ++i)
226 for (int j = 0; j < 4; ++j) s[j] += lgtmtx[i * 4 + j] * input[i];
227
228 for (int i = 0; i < 4; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
229 }
230
ilgt8(const tran_low_t * input,tran_low_t * output,const tran_high_t * lgtmtx)231 void ilgt8(const tran_low_t *input, tran_low_t *output,
232 const tran_high_t *lgtmtx) {
233 if (!lgtmtx) assert(0);
234 #if CONFIG_LGT_FROM_PRED
235 // For DCT/ADST, use butterfly implementations
236 if (lgtmtx[0] == DCT8) {
237 aom_idct8_c(input, output);
238 return;
239 } else if (lgtmtx[0] == ADST8) {
240 aom_iadst8_c(input, output);
241 return;
242 }
243 #endif // CONFIG_LGT_FROM_PRED
244
245 // evaluate s[j] = sum of all lgtmtx[j]*input[i] over i=1,...,8
246 tran_high_t s[8] = { 0 };
247 for (int i = 0; i < 8; ++i)
248 for (int j = 0; j < 8; ++j) s[j] += lgtmtx[i * 8 + j] * input[i];
249
250 for (int i = 0; i < 8; ++i) output[i] = WRAPLOW(dct_const_round_shift(s[i]));
251 }
252 #endif // CONFIG_LGT || CONFIG_LGT_FROM_PRED
253
254 #if CONFIG_LGT
255 // get_lgt4 and get_lgt8 return 1 and pick a lgt matrix if LGT is chosen to
256 // apply. Otherwise they return 0
get_lgt4(const TxfmParam * txfm_param,int is_col,const tran_high_t ** lgtmtx)257 int get_lgt4(const TxfmParam *txfm_param, int is_col,
258 const tran_high_t **lgtmtx) {
259 if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
260 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
261 lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
262 return 1;
263 } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
264 htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
265 lgtmtx[0] = txfm_param->is_inter ? &lgt4_170[0][0] : &lgt4_140[0][0];
266 return 1;
267 }
268 lgtmtx[0] = NULL;
269 return 0;
270 }
271
get_lgt8(const TxfmParam * txfm_param,int is_col,const tran_high_t ** lgtmtx)272 int get_lgt8(const TxfmParam *txfm_param, int is_col,
273 const tran_high_t **lgtmtx) {
274 if (is_col && (vtx_tab[txfm_param->tx_type] == ADST_1D ||
275 vtx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
276 lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
277 return 1;
278 } else if (!is_col && (htx_tab[txfm_param->tx_type] == ADST_1D ||
279 htx_tab[txfm_param->tx_type] == FLIPADST_1D)) {
280 lgtmtx[0] = txfm_param->is_inter ? &lgt8_170[0][0] : &lgt8_150[0][0];
281 return 1;
282 }
283 lgtmtx[0] = NULL;
284 return 0;
285 }
286 #endif // CONFIG_LGT
287
288 #if CONFIG_LGT_FROM_PRED
ilgt16up(const tran_low_t * input,tran_low_t * output,const tran_high_t * lgtmtx)289 void ilgt16up(const tran_low_t *input, tran_low_t *output,
290 const tran_high_t *lgtmtx) {
291 if (lgtmtx[0] == DCT16) {
292 aom_idct16_c(input, output);
293 return;
294 } else if (lgtmtx[0] == ADST16) {
295 aom_iadst16_c(input, output);
296 return;
297 } else if (lgtmtx[0] == DCT32) {
298 aom_idct32_c(input, output);
299 return;
300 } else if (lgtmtx[0] == ADST32) {
301 ihalfright32_c(input, output);
302 return;
303 } else {
304 assert(0);
305 }
306 }
307
get_discontinuity_1d(uint8_t * arr,int n,int * idx_max_diff)308 void get_discontinuity_1d(uint8_t *arr, int n, int *idx_max_diff) {
309 *idx_max_diff = -1;
310
311 int temp = 0, max_diff = 0, min_diff = INT_MAX;
312 for (int i = 1; i < n; ++i) {
313 temp = abs(arr[i] - arr[i - 1]);
314 if (temp > max_diff) {
315 max_diff = temp;
316 *idx_max_diff = i;
317 }
318 if (temp < min_diff) min_diff = temp;
319 }
320 }
321
get_discontinuity_2d(uint8_t * dst,int stride,int n,int is_col,int * idx_max_diff,int ntx)322 void get_discontinuity_2d(uint8_t *dst, int stride, int n, int is_col,
323 int *idx_max_diff, int ntx) {
324 *idx_max_diff = -1;
325
326 int diff = 0, temp = 0, max_diff = 0, min_diff = INT_MAX;
327 for (int i = 1; i < n; ++i) {
328 temp = 0;
329 for (int j = 0; j < ntx; ++j) {
330 if (is_col) // vertical diff
331 diff = dst[i * stride + j] - dst[(i - 1) * stride + j];
332 else // horizontal diff
333 diff = dst[j * stride + i] - dst[j * stride + i - 1];
334 temp += diff * diff;
335 }
336 // temp/w is the i-th avg square diff
337 if (temp > max_diff) {
338 max_diff = temp;
339 *idx_max_diff = i;
340 }
341 if (temp < min_diff) min_diff = temp;
342 }
343 }
344
idx_selfloop_wrt_mode(PREDICTION_MODE mode,int is_col)345 int idx_selfloop_wrt_mode(PREDICTION_MODE mode, int is_col) {
346 // 0: no self-loop
347 // 1: small self-loop
348 // 2: medium self-loop
349 // 3: large self-loop
350 switch (mode) {
351 case DC_PRED:
352 case SMOOTH_PRED:
353 // predition is good for both directions: large SLs for row and col
354 return 3;
355 case TM_PRED: return 0;
356 #if CONFIG_SMOOTH_HV
357 case SMOOTH_H_PRED:
358 #endif
359 case H_PRED:
360 // prediction is good for H direction: large SL for row only
361 return is_col ? 0 : 3;
362 #if CONFIG_SMOOTH_HV
363 case SMOOTH_V_PRED:
364 #endif
365 case V_PRED:
366 // prediction is good for V direction: large SL for col only
367 return is_col ? 3 : 0;
368 #if LGT_SL_INTRA
369 // directional mode: choose SL based on the direction
370 case D45_PRED: return is_col ? 2 : 0;
371 case D63_PRED: return is_col ? 3 : 0;
372 case D117_PRED: return is_col ? 3 : 1;
373 case D135_PRED: return 2;
374 case D153_PRED: return is_col ? 1 : 3;
375 case D207_PRED: return is_col ? 0 : 3;
376 #else
377 case D45_PRED:
378 case D63_PRED:
379 case D117_PRED: return is_col ? 3 : 0;
380 case D135_PRED:
381 case D153_PRED:
382 case D207_PRED: return is_col ? 0 : 3;
383 #endif
384 // inter: no SL
385 default: return 0;
386 }
387 }
388
get_lgt4_from_pred(const TxfmParam * txfm_param,int is_col,const tran_high_t ** lgtmtx,int ntx)389 void get_lgt4_from_pred(const TxfmParam *txfm_param, int is_col,
390 const tran_high_t **lgtmtx, int ntx) {
391 PREDICTION_MODE mode = txfm_param->mode;
392 int stride = txfm_param->stride;
393 uint8_t *dst = txfm_param->dst;
394 int bp = -1;
395 uint8_t arr[4];
396
397 // Each lgt4mtx_arr[k][i] corresponds to a line graph with a self-loop on
398 // the first node, and possibly a weak edge within the line graph. i is
399 // the index of the weak edge (between the i-th and (i+1)-th pixels, i=0
400 // means no weak edge). k corresponds to the first self-loop's weight
401 const tran_high_t *lgt4mtx_arr[4][4] = {
402 { &lgt4_000[0][0], &lgt4_000w1[0][0], &lgt4_000w2[0][0],
403 &lgt4_000w3[0][0] },
404 { &lgt4_060[0][0], &lgt4_060_000w1[0][0], &lgt4_060_000w2[0][0],
405 &lgt4_060_000w3[0][0] },
406 { &lgt4_100[0][0], &lgt4_100_000w1[0][0], &lgt4_100_000w2[0][0],
407 &lgt4_100_000w3[0][0] },
408 { &lgt4_150[0][0], &lgt4_150_000w1[0][0], &lgt4_150_000w2[0][0],
409 &lgt4_150_000w3[0][0] },
410 };
411
412 // initialize to DCT or some LGTs, and then change later if necessary
413 int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
414 lgtmtx[0] = lgt4mtx_arr[idx_sl][0];
415
416 // find the break point and replace the line graph by the one with a
417 // break point
418 if (mode == DC_PRED || mode == SMOOTH_PRED) {
419 // Do not use break point, since 1) is_left_available and is_top_available
420 // in DC_PRED are not known by txfm_param for now, so accessing
421 // both boundaries anyway may cause a mismatch 2) DC prediciton
422 // typically yields very smooth residues so having the break point
423 // does not usually improve the RD result.
424 return;
425 } else if (mode == TM_PRED) {
426 // TM_PRED: use both 1D top boundary and 1D left boundary
427 if (is_col)
428 for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
429 else
430 for (int i = 0; i < 4; ++i) arr[i] = dst[i];
431 get_discontinuity_1d(&arr[0], 4, &bp);
432 } else if (mode == V_PRED) {
433 // V_PRED: use 1D top boundary only
434 if (is_col) return;
435 for (int i = 0; i < 4; ++i) arr[i] = dst[i];
436 get_discontinuity_1d(&arr[0], 4, &bp);
437 } else if (mode == H_PRED) {
438 // H_PRED: use 1D left boundary only
439 if (!is_col) return;
440 for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride];
441 get_discontinuity_1d(&arr[0], 4, &bp);
442 #if CONFIG_SMOOTH_HV
443 } else if (mode == SMOOTH_V_PRED) {
444 if (is_col) return;
445 for (int i = 0; i < 4; ++i) arr[i] = dst[-stride + i];
446 get_discontinuity_1d(&arr[0], 4, &bp);
447 } else if (mode == SMOOTH_H_PRED) {
448 if (!is_col) return;
449 for (int i = 0; i < 4; ++i) arr[i] = dst[i * stride - 1];
450 get_discontinuity_1d(&arr[0], 4, &bp);
451 #endif
452 } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
453 // directional modes closer to vertical (maybe include D135 later)
454 if (!is_col) get_discontinuity_2d(dst, stride, 4, 0, &bp, ntx);
455 } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
456 // directional modes closer to horizontal
457 if (is_col) get_discontinuity_2d(dst, stride, 4, 1, &bp, ntx);
458 } else if (mode > TM_PRED) {
459 // inter
460 get_discontinuity_2d(dst, stride, 4, is_col, &bp, ntx);
461 }
462
463 #if LGT_SL_INTRA
464 if (bp != -1) lgtmtx[0] = lgt4mtx_arr[idx_sl][bp];
465 #else
466 if (bp != -1) lgtmtx[0] = lgt4mtx_arr[0][bp];
467 #endif
468 }
469
get_lgt8_from_pred(const TxfmParam * txfm_param,int is_col,const tran_high_t ** lgtmtx,int ntx)470 void get_lgt8_from_pred(const TxfmParam *txfm_param, int is_col,
471 const tran_high_t **lgtmtx, int ntx) {
472 PREDICTION_MODE mode = txfm_param->mode;
473 int stride = txfm_param->stride;
474 uint8_t *dst = txfm_param->dst;
475 int bp = -1;
476 uint8_t arr[8];
477
478 const tran_high_t *lgt8mtx_arr[4][8] = {
479 { &lgt8_000[0][0], &lgt8_000w1[0][0], &lgt8_000w2[0][0], &lgt8_000w3[0][0],
480 &lgt8_000w4[0][0], &lgt8_000w5[0][0], &lgt8_000w6[0][0],
481 &lgt8_000w7[0][0] },
482 { &lgt8_060[0][0], &lgt8_060_000w1[0][0], &lgt8_060_000w2[0][0],
483 &lgt8_060_000w3[0][0], &lgt8_060_000w4[0][0], &lgt8_060_000w5[0][0],
484 &lgt8_060_000w6[0][0], &lgt8_060_000w7[0][0] },
485 { &lgt8_100[0][0], &lgt8_100_000w1[0][0], &lgt8_100_000w2[0][0],
486 &lgt8_100_000w3[0][0], &lgt8_100_000w4[0][0], &lgt8_100_000w5[0][0],
487 &lgt8_100_000w6[0][0], &lgt8_100_000w7[0][0] },
488 { &lgt8_150[0][0], &lgt8_150_000w1[0][0], &lgt8_150_000w2[0][0],
489 &lgt8_150_000w3[0][0], &lgt8_150_000w4[0][0], &lgt8_150_000w5[0][0],
490 &lgt8_150_000w6[0][0], &lgt8_150_000w7[0][0] },
491 };
492
493 int idx_sl = idx_selfloop_wrt_mode(mode, is_col);
494 lgtmtx[0] = lgt8mtx_arr[idx_sl][0];
495
496 if (mode == DC_PRED || mode == SMOOTH_PRED) {
497 return;
498 } else if (mode == TM_PRED) {
499 if (is_col)
500 for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
501 else
502 for (int i = 0; i < 8; ++i) arr[i] = dst[i];
503 get_discontinuity_1d(&arr[0], 8, &bp);
504 } else if (mode == V_PRED) {
505 if (is_col) return;
506 for (int i = 0; i < 8; ++i) arr[i] = dst[i];
507 get_discontinuity_1d(&arr[0], 8, &bp);
508 } else if (mode == H_PRED) {
509 if (!is_col) return;
510 for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride];
511 get_discontinuity_1d(&arr[0], 8, &bp);
512 #if CONFIG_SMOOTH_HV
513 } else if (mode == SMOOTH_V_PRED) {
514 if (is_col) return;
515 for (int i = 0; i < 8; ++i) arr[i] = dst[-stride + i];
516 get_discontinuity_1d(&arr[0], 8, &bp);
517 } else if (mode == SMOOTH_H_PRED) {
518 if (!is_col) return;
519 for (int i = 0; i < 8; ++i) arr[i] = dst[i * stride - 1];
520 get_discontinuity_1d(&arr[0], 8, &bp);
521 #endif
522 } else if (mode == D45_PRED || mode == D63_PRED || mode == D117_PRED) {
523 if (!is_col) get_discontinuity_2d(dst, stride, 8, 0, &bp, ntx);
524 } else if (mode == D135_PRED || mode == D153_PRED || mode == D207_PRED) {
525 if (is_col) get_discontinuity_2d(dst, stride, 8, 1, &bp, ntx);
526 } else if (mode > TM_PRED) {
527 get_discontinuity_2d(dst, stride, 8, is_col, &bp, ntx);
528 }
529
530 #if LGT_SL_INTRA
531 if (bp != -1) lgtmtx[0] = lgt8mtx_arr[idx_sl][bp];
532 #else
533 if (bp != -1) lgtmtx[0] = lgt8mtx_arr[0][bp];
534 #endif
535 }
536
537 // Since LGTs with length >8 are not implemented now, the following function
538 // will just call DCT or ADST
get_lgt16up_from_pred(const TxfmParam * txfm_param,int is_col,const tran_high_t ** lgtmtx,int ntx)539 void get_lgt16up_from_pred(const TxfmParam *txfm_param, int is_col,
540 const tran_high_t **lgtmtx, int ntx) {
541 int tx_length = is_col ? tx_size_high[txfm_param->tx_size]
542 : tx_size_wide[txfm_param->tx_size];
543 assert(tx_length == 16 || tx_length == 32);
544 PREDICTION_MODE mode = txfm_param->mode;
545
546 (void)ntx;
547 const tran_high_t *dctmtx =
548 tx_length == 16 ? &lgt16_000[0][0] : &lgt32_000[0][0];
549 const tran_high_t *adstmtx =
550 tx_length == 16 ? &lgt16_200[0][0] : &lgt32_200[0][0];
551
552 switch (mode) {
553 case DC_PRED:
554 case TM_PRED:
555 case SMOOTH_PRED:
556 // prediction from both top and left -> ADST
557 lgtmtx[0] = adstmtx;
558 break;
559 case V_PRED:
560 case D45_PRED:
561 case D63_PRED:
562 case D117_PRED:
563 #if CONFIG_SMOOTH_HV
564 case SMOOTH_V_PRED:
565 #endif
566 // prediction from the top more than from the left -> ADST
567 lgtmtx[0] = is_col ? adstmtx : dctmtx;
568 break;
569 case H_PRED:
570 case D135_PRED:
571 case D153_PRED:
572 case D207_PRED:
573 #if CONFIG_SMOOTH_HV
574 case SMOOTH_H_PRED:
575 #endif
576 // prediction from the left more than from the top -> DCT
577 lgtmtx[0] = is_col ? dctmtx : adstmtx;
578 break;
579 default: lgtmtx[0] = dctmtx; break;
580 }
581 }
582
583 typedef void (*IlgtFunc)(const tran_low_t *input, tran_low_t *output,
584 const tran_high_t *lgtmtx);
585
586 static IlgtFunc ilgt_func[4] = { ilgt4, ilgt8, ilgt16up, ilgt16up };
587
588 typedef void (*GetLgtFunc)(const TxfmParam *txfm_param, int is_col,
589 const tran_high_t **lgtmtx, int ntx);
590
591 static GetLgtFunc get_lgt_func[4] = { get_lgt4_from_pred, get_lgt8_from_pred,
592 get_lgt16up_from_pred,
593 get_lgt16up_from_pred };
594
595 // this inline function corresponds to the up scaling before the transpose
596 // operation in the av1_iht* functions
inv_upscale_wrt_txsize(const tran_high_t val,const TX_SIZE tx_size)597 static INLINE tran_low_t inv_upscale_wrt_txsize(const tran_high_t val,
598 const TX_SIZE tx_size) {
599 switch (tx_size) {
600 case TX_4X4:
601 case TX_8X8:
602 case TX_4X16:
603 case TX_16X4:
604 case TX_8X32:
605 case TX_32X8: return (tran_low_t)val;
606 case TX_4X8:
607 case TX_8X4:
608 case TX_8X16:
609 case TX_16X8: return (tran_low_t)dct_const_round_shift(val * Sqrt2);
610 default: assert(0); break;
611 }
612 return 0;
613 }
614
615 // This inline function corresponds to the bit shift before summing with the
616 // destination in the av1_iht* functions
inv_downscale_wrt_txsize(const tran_low_t val,const TX_SIZE tx_size)617 static INLINE tran_low_t inv_downscale_wrt_txsize(const tran_low_t val,
618 const TX_SIZE tx_size) {
619 switch (tx_size) {
620 case TX_4X4: return ROUND_POWER_OF_TWO(val, 4);
621 case TX_4X8:
622 case TX_8X4:
623 case TX_8X8:
624 case TX_4X16:
625 case TX_16X4: return ROUND_POWER_OF_TWO(val, 5);
626 case TX_8X16:
627 case TX_16X8:
628 case TX_8X32:
629 case TX_32X8: return ROUND_POWER_OF_TWO(val, 6);
630 default: assert(0); break;
631 }
632 return 0;
633 }
634
ilgt2d_from_pred_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)635 void ilgt2d_from_pred_add(const tran_low_t *input, uint8_t *dest, int stride,
636 const TxfmParam *txfm_param) {
637 const TX_SIZE tx_size = txfm_param->tx_size;
638 const int w = tx_size_wide[tx_size];
639 const int h = tx_size_high[tx_size];
640 const int wlog2 = tx_size_wide_log2[tx_size];
641 const int hlog2 = tx_size_high_log2[tx_size];
642 assert(w <= 8 || h <= 8);
643
644 int i, j;
645 // largest 1D size allowed for LGT: 32
646 // largest 2D size allowed for LGT: 8x32=256
647 tran_low_t tmp[256], out[256], temp1d[32];
648 const tran_high_t *lgtmtx_col[1];
649 const tran_high_t *lgtmtx_row[1];
650 get_lgt_func[hlog2 - 2](txfm_param, 1, lgtmtx_col, w);
651 get_lgt_func[wlog2 - 2](txfm_param, 0, lgtmtx_row, h);
652
653 // for inverse transform, to be consistent with av1_iht functions, we always
654 // apply row transforms first and column transforms second, but both
655 // row-first and column-first versions are implemented here for future
656 // tests (use different lgtmtx_col[i], and choose row or column tx first
657 // depending on transforms).
658 #if 1
659 // inverse column transforms
660 for (i = 0; i < w; ++i) {
661 // transpose
662 for (j = 0; j < h; ++j) tmp[i * h + j] = input[j * w + i];
663 ilgt_func[hlog2 - 2](&tmp[i * h], temp1d, lgtmtx_col[0]);
664 // upscale, and store in place
665 for (j = 0; j < h; ++j)
666 tmp[i * h + j] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
667 }
668 // inverse row transforms
669 for (i = 0; i < h; ++i) {
670 for (j = 0; j < w; ++j) temp1d[j] = tmp[j * h + i];
671 ilgt_func[wlog2 - 2](temp1d, &out[i * w], lgtmtx_row[0]);
672 }
673 // downscale + sum with the destination
674 for (i = 0; i < h; ++i) {
675 for (j = 0; j < w; ++j) {
676 int d = i * stride + j;
677 int s = i * w + j;
678 dest[d] =
679 clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
680 }
681 }
682 #else
683 // inverse row transforms
684 for (i = 0; i < h; ++i) {
685 ilgt_func[wlog2 - 2](input, temp1d, lgtmtx_row[0]);
686 // upscale and transpose (tmp[j*h+i] <--> tmp[j][i])
687 for (j = 0; j < w; ++j)
688 tmp[j * h + i] = inv_upscale_wrt_txsize(temp1d[j], tx_size);
689 input += w;
690 }
691 // inverse column transforms
692 for (i = 0; i < w; ++i)
693 ilgt_func[hlog2 - 2](&tmp[i * h], &out[i * h], lgtmtx_col[0]);
694 // here, out[] is the transpose of 2D block of transform coefficients
695
696 // downscale + transform + sum with dest
697 for (i = 0; i < h; ++i) {
698 for (j = 0; j < w; ++j) {
699 int d = i * stride + j;
700 int s = j * h + i;
701 dest[d] =
702 clip_pixel_add(dest[d], inv_downscale_wrt_txsize(out[s], tx_size));
703 }
704 }
705 #endif
706 }
707 #endif // CONFIG_LGT_FROM_PRED
708
av1_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)709 void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
710 const TxfmParam *txfm_param) {
711 const TX_TYPE tx_type = txfm_param->tx_type;
712 #if CONFIG_MRC_TX
713 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
714 #endif // CONFIG_MRC_TX
715 #if !CONFIG_DAALA_DCT4
716 if (tx_type == DCT_DCT) {
717 aom_idct4x4_16_add(input, dest, stride);
718 return;
719 }
720 #endif
721 static const transform_2d IHT_4[] = {
722 #if CONFIG_DAALA_DCT4
723 { daala_idct4, daala_idct4 }, // DCT_DCT = 0
724 { daala_idst4, daala_idct4 }, // ADST_DCT = 1
725 { daala_idct4, daala_idst4 }, // DCT_ADST = 2
726 { daala_idst4, daala_idst4 }, // ADST_ADST = 3
727 #if CONFIG_EXT_TX
728 { daala_idst4, daala_idct4 }, // FLIPADST_DCT
729 { daala_idct4, daala_idst4 }, // DCT_FLIPADST
730 { daala_idst4, daala_idst4 }, // FLIPADST_FLIPADST
731 { daala_idst4, daala_idst4 }, // ADST_FLIPADST
732 { daala_idst4, daala_idst4 }, // FLIPADST_ADST
733 { daala_idtx4, daala_idtx4 }, // IDTX
734 { daala_idct4, daala_idtx4 }, // V_DCT
735 { daala_idtx4, daala_idct4 }, // H_DCT
736 { daala_idst4, daala_idtx4 }, // V_ADST
737 { daala_idtx4, daala_idst4 }, // H_ADST
738 { daala_idst4, daala_idtx4 }, // V_FLIPADST
739 { daala_idtx4, daala_idst4 }, // H_FLIPADST
740 #endif
741 #else
742 { aom_idct4_c, aom_idct4_c }, // DCT_DCT = 0
743 { aom_iadst4_c, aom_idct4_c }, // ADST_DCT = 1
744 { aom_idct4_c, aom_iadst4_c }, // DCT_ADST = 2
745 { aom_iadst4_c, aom_iadst4_c }, // ADST_ADST = 3
746 #if CONFIG_EXT_TX
747 { aom_iadst4_c, aom_idct4_c }, // FLIPADST_DCT
748 { aom_idct4_c, aom_iadst4_c }, // DCT_FLIPADST
749 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_FLIPADST
750 { aom_iadst4_c, aom_iadst4_c }, // ADST_FLIPADST
751 { aom_iadst4_c, aom_iadst4_c }, // FLIPADST_ADST
752 { iidtx4_c, iidtx4_c }, // IDTX
753 { aom_idct4_c, iidtx4_c }, // V_DCT
754 { iidtx4_c, aom_idct4_c }, // H_DCT
755 { aom_iadst4_c, iidtx4_c }, // V_ADST
756 { iidtx4_c, aom_iadst4_c }, // H_ADST
757 { aom_iadst4_c, iidtx4_c }, // V_FLIPADST
758 { iidtx4_c, aom_iadst4_c }, // H_FLIPADST
759 #endif
760 #endif
761 };
762
763 int i, j;
764 tran_low_t tmp[4][4];
765 tran_low_t out[4][4];
766 tran_low_t *outp = &out[0][0];
767 int outstride = 4;
768
769 #if CONFIG_DCT_ONLY
770 assert(tx_type == DCT_DCT);
771 #endif
772
773 #if CONFIG_LGT
774 const tran_high_t *lgtmtx_col[1];
775 const tran_high_t *lgtmtx_row[1];
776 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
777 int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
778 #endif
779
780 // inverse transform row vectors
781 for (i = 0; i < 4; ++i) {
782 #if CONFIG_DAALA_DCT4
783 tran_low_t temp_in[4];
784 for (j = 0; j < 4; j++) temp_in[j] = input[j] * 2;
785 IHT_4[tx_type].rows(temp_in, out[i]);
786 #else
787 #if CONFIG_LGT
788 if (use_lgt_row)
789 ilgt4(input, out[i], lgtmtx_row[0]);
790 else
791 #endif
792 IHT_4[tx_type].rows(input, out[i]);
793 #endif
794 input += 4;
795 }
796
797 // transpose
798 for (i = 0; i < 4; i++) {
799 for (j = 0; j < 4; j++) {
800 tmp[j][i] = out[i][j];
801 }
802 }
803
804 // inverse transform column vectors
805 for (i = 0; i < 4; ++i) {
806 #if CONFIG_LGT
807 if (use_lgt_col)
808 ilgt4(tmp[i], out[i], lgtmtx_col[0]);
809 else
810 #endif
811 IHT_4[tx_type].cols(tmp[i], out[i]);
812 }
813
814 #if CONFIG_EXT_TX
815 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
816 #endif
817
818 // Sum with the destination
819 for (i = 0; i < 4; ++i) {
820 for (j = 0; j < 4; ++j) {
821 int d = i * stride + j;
822 int s = j * outstride + i;
823 #if CONFIG_DAALA_DCT4
824 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
825 #else
826 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
827 #endif
828 }
829 }
830 }
831
av1_iht4x8_32_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)832 void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
833 const TxfmParam *txfm_param) {
834 const TX_TYPE tx_type = txfm_param->tx_type;
835 #if CONFIG_MRC_TX
836 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
837 #endif // CONFIG_MRC_TX
838 #if CONFIG_DCT_ONLY
839 assert(tx_type == DCT_DCT);
840 #endif
841 static const transform_2d IHT_4x8[] = {
842 { aom_idct8_c, aom_idct4_c }, // DCT_DCT
843 { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
844 { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
845 { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
846 #if CONFIG_EXT_TX
847 { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
848 { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
849 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
850 { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
851 { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
852 { iidtx8_c, iidtx4_c }, // IDTX
853 { aom_idct8_c, iidtx4_c }, // V_DCT
854 { iidtx8_c, aom_idct4_c }, // H_DCT
855 { aom_iadst8_c, iidtx4_c }, // V_ADST
856 { iidtx8_c, aom_iadst4_c }, // H_ADST
857 { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
858 { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
859 #endif
860 };
861
862 const int n = 4;
863 const int n2 = 8;
864 int i, j;
865 tran_low_t out[4][8], tmp[4][8], outtmp[4];
866 tran_low_t *outp = &out[0][0];
867 int outstride = n2;
868
869 #if CONFIG_LGT
870 const tran_high_t *lgtmtx_col[1];
871 const tran_high_t *lgtmtx_row[1];
872 int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
873 int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
874 #endif
875
876 // inverse transform row vectors and transpose
877 for (i = 0; i < n2; ++i) {
878 #if CONFIG_LGT
879 if (use_lgt_row)
880 ilgt4(input, outtmp, lgtmtx_row[0]);
881 else
882 #endif
883 IHT_4x8[tx_type].rows(input, outtmp);
884 for (j = 0; j < n; ++j)
885 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
886 input += n;
887 }
888
889 // inverse transform column vectors
890 for (i = 0; i < n; ++i) {
891 #if CONFIG_LGT
892 if (use_lgt_col)
893 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
894 else
895 #endif
896 IHT_4x8[tx_type].cols(tmp[i], out[i]);
897 }
898
899 #if CONFIG_EXT_TX
900 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
901 #endif
902
903 // Sum with the destination
904 for (i = 0; i < n2; ++i) {
905 for (j = 0; j < n; ++j) {
906 int d = i * stride + j;
907 int s = j * outstride + i;
908 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
909 }
910 }
911 }
912
av1_iht8x4_32_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)913 void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
914 const TxfmParam *txfm_param) {
915 const TX_TYPE tx_type = txfm_param->tx_type;
916 #if CONFIG_MRC_TX
917 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
918 #endif // CONFIG_MRC_TX
919 #if CONFIG_DCT_ONLY
920 assert(tx_type == DCT_DCT);
921 #endif
922 static const transform_2d IHT_8x4[] = {
923 { aom_idct4_c, aom_idct8_c }, // DCT_DCT
924 { aom_iadst4_c, aom_idct8_c }, // ADST_DCT
925 { aom_idct4_c, aom_iadst8_c }, // DCT_ADST
926 { aom_iadst4_c, aom_iadst8_c }, // ADST_ADST
927 #if CONFIG_EXT_TX
928 { aom_iadst4_c, aom_idct8_c }, // FLIPADST_DCT
929 { aom_idct4_c, aom_iadst8_c }, // DCT_FLIPADST
930 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_FLIPADST
931 { aom_iadst4_c, aom_iadst8_c }, // ADST_FLIPADST
932 { aom_iadst4_c, aom_iadst8_c }, // FLIPADST_ADST
933 { iidtx4_c, iidtx8_c }, // IDTX
934 { aom_idct4_c, iidtx8_c }, // V_DCT
935 { iidtx4_c, aom_idct8_c }, // H_DCT
936 { aom_iadst4_c, iidtx8_c }, // V_ADST
937 { iidtx4_c, aom_iadst8_c }, // H_ADST
938 { aom_iadst4_c, iidtx8_c }, // V_FLIPADST
939 { iidtx4_c, aom_iadst8_c }, // H_FLIPADST
940 #endif
941 };
942
943 const int n = 4;
944 const int n2 = 8;
945
946 int i, j;
947 tran_low_t out[8][4], tmp[8][4], outtmp[8];
948 tran_low_t *outp = &out[0][0];
949 int outstride = n;
950
951 #if CONFIG_LGT
952 const tran_high_t *lgtmtx_col[1];
953 const tran_high_t *lgtmtx_row[1];
954 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
955 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
956 #endif
957
958 // inverse transform row vectors and transpose
959 for (i = 0; i < n; ++i) {
960 #if CONFIG_LGT
961 if (use_lgt_row)
962 ilgt8(input, outtmp, lgtmtx_row[0]);
963 else
964 #endif
965 IHT_8x4[tx_type].rows(input, outtmp);
966 for (j = 0; j < n2; ++j)
967 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
968 input += n2;
969 }
970
971 // inverse transform column vectors
972 for (i = 0; i < n2; ++i) {
973 #if CONFIG_LGT
974 if (use_lgt_col)
975 ilgt4(tmp[i], out[i], lgtmtx_col[0]);
976 else
977 #endif
978 IHT_8x4[tx_type].cols(tmp[i], out[i]);
979 }
980
981 #if CONFIG_EXT_TX
982 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
983 #endif
984
985 // Sum with the destination
986 for (i = 0; i < n; ++i) {
987 for (j = 0; j < n2; ++j) {
988 int d = i * stride + j;
989 int s = j * outstride + i;
990 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
991 }
992 }
993 }
994
av1_iht4x16_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)995 void av1_iht4x16_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
996 const TxfmParam *txfm_param) {
997 const TX_TYPE tx_type = txfm_param->tx_type;
998 #if CONFIG_MRC_TX
999 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1000 #endif // CONFIG_MRC_TX
1001 #if CONFIG_DCT_ONLY
1002 assert(tx_type == DCT_DCT);
1003 #endif
1004 static const transform_2d IHT_4x16[] = {
1005 { aom_idct16_c, aom_idct4_c }, // DCT_DCT
1006 { aom_iadst16_c, aom_idct4_c }, // ADST_DCT
1007 { aom_idct16_c, aom_iadst4_c }, // DCT_ADST
1008 { aom_iadst16_c, aom_iadst4_c }, // ADST_ADST
1009 #if CONFIG_EXT_TX
1010 { aom_iadst16_c, aom_idct4_c }, // FLIPADST_DCT
1011 { aom_idct16_c, aom_iadst4_c }, // DCT_FLIPADST
1012 { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_FLIPADST
1013 { aom_iadst16_c, aom_iadst4_c }, // ADST_FLIPADST
1014 { aom_iadst16_c, aom_iadst4_c }, // FLIPADST_ADST
1015 { iidtx16_c, iidtx4_c }, // IDTX
1016 { aom_idct16_c, iidtx4_c }, // V_DCT
1017 { iidtx16_c, aom_idct4_c }, // H_DCT
1018 { aom_iadst16_c, iidtx4_c }, // V_ADST
1019 { iidtx16_c, aom_iadst4_c }, // H_ADST
1020 { aom_iadst16_c, iidtx4_c }, // V_FLIPADST
1021 { iidtx16_c, aom_iadst4_c }, // H_FLIPADST
1022 #endif
1023 };
1024
1025 const int n = 4;
1026 const int n4 = 16;
1027 int i, j;
1028 tran_low_t out[4][16], tmp[4][16], outtmp[4];
1029 tran_low_t *outp = &out[0][0];
1030 int outstride = n4;
1031
1032 #if CONFIG_LGT
1033 const tran_high_t *lgtmtx_row[1];
1034 int use_lgt_row = get_lgt4(txfm_param, 0, lgtmtx_row);
1035 #endif
1036
1037 // inverse transform row vectors and transpose
1038 for (i = 0; i < n4; ++i) {
1039 #if CONFIG_LGT
1040 if (use_lgt_row)
1041 ilgt4(input, outtmp, lgtmtx_row[0]);
1042 else
1043 #endif
1044 IHT_4x16[tx_type].rows(input, outtmp);
1045 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
1046 input += n;
1047 }
1048
1049 // inverse transform column vectors
1050 for (i = 0; i < n; ++i) {
1051 IHT_4x16[tx_type].cols(tmp[i], out[i]);
1052 }
1053
1054 #if CONFIG_EXT_TX
1055 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
1056 #endif
1057
1058 // Sum with the destination
1059 for (i = 0; i < n4; ++i) {
1060 for (j = 0; j < n; ++j) {
1061 int d = i * stride + j;
1062 int s = j * outstride + i;
1063 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1064 }
1065 }
1066 }
1067
av1_iht16x4_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1068 void av1_iht16x4_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1069 const TxfmParam *txfm_param) {
1070 const TX_TYPE tx_type = txfm_param->tx_type;
1071 #if CONFIG_MRC_TX
1072 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1073 #endif // CONFIG_MRC_TX
1074 #if CONFIG_DCT_ONLY
1075 assert(tx_type == DCT_DCT);
1076 #endif
1077 static const transform_2d IHT_16x4[] = {
1078 { aom_idct4_c, aom_idct16_c }, // DCT_DCT
1079 { aom_iadst4_c, aom_idct16_c }, // ADST_DCT
1080 { aom_idct4_c, aom_iadst16_c }, // DCT_ADST
1081 { aom_iadst4_c, aom_iadst16_c }, // ADST_ADST
1082 #if CONFIG_EXT_TX
1083 { aom_iadst4_c, aom_idct16_c }, // FLIPADST_DCT
1084 { aom_idct4_c, aom_iadst16_c }, // DCT_FLIPADST
1085 { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1086 { aom_iadst4_c, aom_iadst16_c }, // ADST_FLIPADST
1087 { aom_iadst4_c, aom_iadst16_c }, // FLIPADST_ADST
1088 { iidtx4_c, iidtx16_c }, // IDTX
1089 { aom_idct4_c, iidtx16_c }, // V_DCT
1090 { iidtx4_c, aom_idct16_c }, // H_DCT
1091 { aom_iadst4_c, iidtx16_c }, // V_ADST
1092 { iidtx4_c, aom_iadst16_c }, // H_ADST
1093 { aom_iadst4_c, iidtx16_c }, // V_FLIPADST
1094 { iidtx4_c, aom_iadst16_c }, // H_FLIPADST
1095 #endif
1096 };
1097
1098 const int n = 4;
1099 const int n4 = 16;
1100
1101 int i, j;
1102 tran_low_t out[16][4], tmp[16][4], outtmp[16];
1103 tran_low_t *outp = &out[0][0];
1104 int outstride = n;
1105
1106 #if CONFIG_LGT
1107 const tran_high_t *lgtmtx_col[1];
1108 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
1109 #endif
1110
1111 // inverse transform row vectors and transpose
1112 for (i = 0; i < n; ++i) {
1113 IHT_16x4[tx_type].rows(input, outtmp);
1114 for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
1115 input += n4;
1116 }
1117
1118 // inverse transform column vectors
1119 for (i = 0; i < n4; ++i) {
1120 #if CONFIG_LGT
1121 if (use_lgt_col)
1122 ilgt4(tmp[i], out[i], lgtmtx_col[0]);
1123 else
1124 #endif
1125 IHT_16x4[tx_type].cols(tmp[i], out[i]);
1126 }
1127
1128 #if CONFIG_EXT_TX
1129 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
1130 #endif
1131
1132 // Sum with the destination
1133 for (i = 0; i < n; ++i) {
1134 for (j = 0; j < n4; ++j) {
1135 int d = i * stride + j;
1136 int s = j * outstride + i;
1137 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1138 }
1139 }
1140 }
1141
av1_iht8x16_128_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1142 void av1_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1143 const TxfmParam *txfm_param) {
1144 const TX_TYPE tx_type = txfm_param->tx_type;
1145 #if CONFIG_MRC_TX
1146 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1147 #endif // CONFIG_MRC_TX
1148 #if CONFIG_DCT_ONLY
1149 assert(tx_type == DCT_DCT);
1150 #endif
1151 static const transform_2d IHT_8x16[] = {
1152 { aom_idct16_c, aom_idct8_c }, // DCT_DCT
1153 { aom_iadst16_c, aom_idct8_c }, // ADST_DCT
1154 { aom_idct16_c, aom_iadst8_c }, // DCT_ADST
1155 { aom_iadst16_c, aom_iadst8_c }, // ADST_ADST
1156 #if CONFIG_EXT_TX
1157 { aom_iadst16_c, aom_idct8_c }, // FLIPADST_DCT
1158 { aom_idct16_c, aom_iadst8_c }, // DCT_FLIPADST
1159 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1160 { aom_iadst16_c, aom_iadst8_c }, // ADST_FLIPADST
1161 { aom_iadst16_c, aom_iadst8_c }, // FLIPADST_ADST
1162 { iidtx16_c, iidtx8_c }, // IDTX
1163 { aom_idct16_c, iidtx8_c }, // V_DCT
1164 { iidtx16_c, aom_idct8_c }, // H_DCT
1165 { aom_iadst16_c, iidtx8_c }, // V_ADST
1166 { iidtx16_c, aom_iadst8_c }, // H_ADST
1167 { aom_iadst16_c, iidtx8_c }, // V_FLIPADST
1168 { iidtx16_c, aom_iadst8_c }, // H_FLIPADST
1169 #endif
1170 };
1171
1172 const int n = 8;
1173 const int n2 = 16;
1174 int i, j;
1175 tran_low_t out[8][16], tmp[8][16], outtmp[8];
1176 tran_low_t *outp = &out[0][0];
1177 int outstride = n2;
1178
1179 #if CONFIG_LGT
1180 const tran_high_t *lgtmtx_row[1];
1181 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
1182 #endif
1183
1184 // inverse transform row vectors and transpose
1185 for (i = 0; i < n2; ++i) {
1186 #if CONFIG_LGT
1187 if (use_lgt_row)
1188 ilgt8(input, outtmp, lgtmtx_row[0]);
1189 else
1190 #endif
1191 IHT_8x16[tx_type].rows(input, outtmp);
1192 for (j = 0; j < n; ++j)
1193 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1194 input += n;
1195 }
1196
1197 // inverse transform column vectors
1198 for (i = 0; i < n; ++i) {
1199 IHT_8x16[tx_type].cols(tmp[i], out[i]);
1200 }
1201
1202 #if CONFIG_EXT_TX
1203 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1204 #endif
1205
1206 // Sum with the destination
1207 for (i = 0; i < n2; ++i) {
1208 for (j = 0; j < n; ++j) {
1209 int d = i * stride + j;
1210 int s = j * outstride + i;
1211 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1212 }
1213 }
1214 }
1215
av1_iht16x8_128_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1216 void av1_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1217 const TxfmParam *txfm_param) {
1218 const TX_TYPE tx_type = txfm_param->tx_type;
1219 #if CONFIG_MRC_TX
1220 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1221 #endif // CONFIG_MRC_TX
1222 #if CONFIG_DCT_ONLY
1223 assert(tx_type == DCT_DCT);
1224 #endif
1225 static const transform_2d IHT_16x8[] = {
1226 { aom_idct8_c, aom_idct16_c }, // DCT_DCT
1227 { aom_iadst8_c, aom_idct16_c }, // ADST_DCT
1228 { aom_idct8_c, aom_iadst16_c }, // DCT_ADST
1229 { aom_iadst8_c, aom_iadst16_c }, // ADST_ADST
1230 #if CONFIG_EXT_TX
1231 { aom_iadst8_c, aom_idct16_c }, // FLIPADST_DCT
1232 { aom_idct8_c, aom_iadst16_c }, // DCT_FLIPADST
1233 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1234 { aom_iadst8_c, aom_iadst16_c }, // ADST_FLIPADST
1235 { aom_iadst8_c, aom_iadst16_c }, // FLIPADST_ADST
1236 { iidtx8_c, iidtx16_c }, // IDTX
1237 { aom_idct8_c, iidtx16_c }, // V_DCT
1238 { iidtx8_c, aom_idct16_c }, // H_DCT
1239 { aom_iadst8_c, iidtx16_c }, // V_ADST
1240 { iidtx8_c, aom_iadst16_c }, // H_ADST
1241 { aom_iadst8_c, iidtx16_c }, // V_FLIPADST
1242 { iidtx8_c, aom_iadst16_c }, // H_FLIPADST
1243 #endif
1244 };
1245
1246 const int n = 8;
1247 const int n2 = 16;
1248
1249 int i, j;
1250 tran_low_t out[16][8], tmp[16][8], outtmp[16];
1251 tran_low_t *outp = &out[0][0];
1252 int outstride = n;
1253
1254 #if CONFIG_LGT
1255 const tran_high_t *lgtmtx_col[1];
1256 int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
1257 #endif
1258
1259 // inverse transform row vectors and transpose
1260 for (i = 0; i < n; ++i) {
1261 IHT_16x8[tx_type].rows(input, outtmp);
1262 for (j = 0; j < n2; ++j)
1263 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1264 input += n2;
1265 }
1266
1267 // inverse transform column vectors
1268 for (i = 0; i < n2; ++i) {
1269 #if CONFIG_LGT
1270 if (use_lgt_col)
1271 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
1272 else
1273 #endif
1274 IHT_16x8[tx_type].cols(tmp[i], out[i]);
1275 }
1276
1277 #if CONFIG_EXT_TX
1278 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1279 #endif
1280
1281 // Sum with the destination
1282 for (i = 0; i < n; ++i) {
1283 for (j = 0; j < n2; ++j) {
1284 int d = i * stride + j;
1285 int s = j * outstride + i;
1286 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1287 }
1288 }
1289 }
1290
av1_iht8x32_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1291 void av1_iht8x32_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1292 const TxfmParam *txfm_param) {
1293 const TX_TYPE tx_type = txfm_param->tx_type;
1294 #if CONFIG_MRC_TX
1295 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1296 #endif // CONFIG_MRC_TX
1297 #if CONFIG_DCT_ONLY
1298 assert(tx_type == DCT_DCT);
1299 #endif
1300 static const transform_2d IHT_8x32[] = {
1301 { aom_idct32_c, aom_idct8_c }, // DCT_DCT
1302 { ihalfright32_c, aom_idct8_c }, // ADST_DCT
1303 { aom_idct32_c, aom_iadst8_c }, // DCT_ADST
1304 { ihalfright32_c, aom_iadst8_c }, // ADST_ADST
1305 #if CONFIG_EXT_TX
1306 { ihalfright32_c, aom_idct8_c }, // FLIPADST_DCT
1307 { aom_idct32_c, aom_iadst8_c }, // DCT_FLIPADST
1308 { ihalfright32_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1309 { ihalfright32_c, aom_iadst8_c }, // ADST_FLIPADST
1310 { ihalfright32_c, aom_iadst8_c }, // FLIPADST_ADST
1311 { iidtx32_c, iidtx8_c }, // IDTX
1312 { aom_idct32_c, iidtx8_c }, // V_DCT
1313 { iidtx32_c, aom_idct8_c }, // H_DCT
1314 { ihalfright32_c, iidtx8_c }, // V_ADST
1315 { iidtx32_c, aom_iadst8_c }, // H_ADST
1316 { ihalfright32_c, iidtx8_c }, // V_FLIPADST
1317 { iidtx32_c, aom_iadst8_c }, // H_FLIPADST
1318 #endif
1319 };
1320
1321 const int n = 8;
1322 const int n4 = 32;
1323 int i, j;
1324 tran_low_t out[8][32], tmp[8][32], outtmp[8];
1325 tran_low_t *outp = &out[0][0];
1326 int outstride = n4;
1327
1328 #if CONFIG_LGT
1329 const tran_high_t *lgtmtx_row[1];
1330 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
1331 #endif
1332
1333 // inverse transform row vectors and transpose
1334 for (i = 0; i < n4; ++i) {
1335 #if CONFIG_LGT
1336 if (use_lgt_row)
1337 ilgt8(input, outtmp, lgtmtx_row[0]);
1338 else
1339 #endif
1340 IHT_8x32[tx_type].rows(input, outtmp);
1341 for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j];
1342 input += n;
1343 }
1344
1345 // inverse transform column vectors
1346 for (i = 0; i < n; ++i) {
1347 IHT_8x32[tx_type].cols(tmp[i], out[i]);
1348 }
1349
1350 #if CONFIG_EXT_TX
1351 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n4, n);
1352 #endif
1353
1354 // Sum with the destination
1355 for (i = 0; i < n4; ++i) {
1356 for (j = 0; j < n; ++j) {
1357 int d = i * stride + j;
1358 int s = j * outstride + i;
1359 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1360 }
1361 }
1362 }
1363
av1_iht32x8_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1364 void av1_iht32x8_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1365 const TxfmParam *txfm_param) {
1366 const TX_TYPE tx_type = txfm_param->tx_type;
1367 #if CONFIG_MRC_TX
1368 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1369 #endif // CONFIG_MRC_TX
1370 #if CONFIG_DCT_ONLY
1371 assert(tx_type == DCT_DCT);
1372 #endif
1373 static const transform_2d IHT_32x8[] = {
1374 { aom_idct8_c, aom_idct32_c }, // DCT_DCT
1375 { aom_iadst8_c, aom_idct32_c }, // ADST_DCT
1376 { aom_idct8_c, ihalfright32_c }, // DCT_ADST
1377 { aom_iadst8_c, ihalfright32_c }, // ADST_ADST
1378 #if CONFIG_EXT_TX
1379 { aom_iadst8_c, aom_idct32_c }, // FLIPADST_DCT
1380 { aom_idct8_c, ihalfright32_c }, // DCT_FLIPADST
1381 { aom_iadst8_c, ihalfright32_c }, // FLIPADST_FLIPADST
1382 { aom_iadst8_c, ihalfright32_c }, // ADST_FLIPADST
1383 { aom_iadst8_c, ihalfright32_c }, // FLIPADST_ADST
1384 { iidtx8_c, iidtx32_c }, // IDTX
1385 { aom_idct8_c, iidtx32_c }, // V_DCT
1386 { iidtx8_c, aom_idct32_c }, // H_DCT
1387 { aom_iadst8_c, iidtx32_c }, // V_ADST
1388 { iidtx8_c, ihalfright32_c }, // H_ADST
1389 { aom_iadst8_c, iidtx32_c }, // V_FLIPADST
1390 { iidtx8_c, ihalfright32_c }, // H_FLIPADST
1391 #endif
1392 };
1393
1394 const int n = 8;
1395 const int n4 = 32;
1396
1397 int i, j;
1398 tran_low_t out[32][8], tmp[32][8], outtmp[32];
1399 tran_low_t *outp = &out[0][0];
1400 int outstride = n;
1401
1402 #if CONFIG_LGT
1403 const tran_high_t *lgtmtx_col[1];
1404 int use_lgt_col = get_lgt4(txfm_param, 1, lgtmtx_col);
1405 #endif
1406
1407 // inverse transform row vectors and transpose
1408 for (i = 0; i < n; ++i) {
1409 IHT_32x8[tx_type].rows(input, outtmp);
1410 for (j = 0; j < n4; ++j) tmp[j][i] = outtmp[j];
1411 input += n4;
1412 }
1413
1414 // inverse transform column vectors
1415 for (i = 0; i < n4; ++i) {
1416 #if CONFIG_LGT
1417 if (use_lgt_col)
1418 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
1419 else
1420 #endif
1421 IHT_32x8[tx_type].cols(tmp[i], out[i]);
1422 }
1423
1424 #if CONFIG_EXT_TX
1425 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n4);
1426 #endif
1427
1428 // Sum with the destination
1429 for (i = 0; i < n; ++i) {
1430 for (j = 0; j < n4; ++j) {
1431 int d = i * stride + j;
1432 int s = j * outstride + i;
1433 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1434 }
1435 }
1436 }
1437
av1_iht16x32_512_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1438 void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1439 const TxfmParam *txfm_param) {
1440 const TX_TYPE tx_type = txfm_param->tx_type;
1441 #if CONFIG_MRC_TX
1442 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1443 #endif // CONFIG_MRC_TX
1444 #if CONFIG_DCT_ONLY
1445 assert(tx_type == DCT_DCT);
1446 #endif
1447 static const transform_2d IHT_16x32[] = {
1448 { aom_idct32_c, aom_idct16_c }, // DCT_DCT
1449 { ihalfright32_c, aom_idct16_c }, // ADST_DCT
1450 { aom_idct32_c, aom_iadst16_c }, // DCT_ADST
1451 { ihalfright32_c, aom_iadst16_c }, // ADST_ADST
1452 #if CONFIG_EXT_TX
1453 { ihalfright32_c, aom_idct16_c }, // FLIPADST_DCT
1454 { aom_idct32_c, aom_iadst16_c }, // DCT_FLIPADST
1455 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1456 { ihalfright32_c, aom_iadst16_c }, // ADST_FLIPADST
1457 { ihalfright32_c, aom_iadst16_c }, // FLIPADST_ADST
1458 { iidtx32_c, iidtx16_c }, // IDTX
1459 { aom_idct32_c, iidtx16_c }, // V_DCT
1460 { iidtx32_c, aom_idct16_c }, // H_DCT
1461 { ihalfright32_c, iidtx16_c }, // V_ADST
1462 { iidtx32_c, aom_iadst16_c }, // H_ADST
1463 { ihalfright32_c, iidtx16_c }, // V_FLIPADST
1464 { iidtx32_c, aom_iadst16_c }, // H_FLIPADST
1465 #endif
1466 };
1467
1468 const int n = 16;
1469 const int n2 = 32;
1470 int i, j;
1471 tran_low_t out[16][32], tmp[16][32], outtmp[16];
1472 tran_low_t *outp = &out[0][0];
1473 int outstride = n2;
1474
1475 // inverse transform row vectors and transpose
1476 for (i = 0; i < n2; ++i) {
1477 IHT_16x32[tx_type].rows(input, outtmp);
1478 for (j = 0; j < n; ++j)
1479 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1480 input += n;
1481 }
1482
1483 // inverse transform column vectors
1484 for (i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
1485
1486 #if CONFIG_EXT_TX
1487 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
1488 #endif
1489
1490 // Sum with the destination
1491 for (i = 0; i < n2; ++i) {
1492 for (j = 0; j < n; ++j) {
1493 int d = i * stride + j;
1494 int s = j * outstride + i;
1495 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1496 }
1497 }
1498 }
1499
av1_iht32x16_512_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1500 void av1_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1501 const TxfmParam *txfm_param) {
1502 const TX_TYPE tx_type = txfm_param->tx_type;
1503 #if CONFIG_MRC_TX
1504 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1505 #endif // CONFIG_MRC_TX
1506 #if CONFIG_DCT_ONLY
1507 assert(tx_type == DCT_DCT);
1508 #endif
1509 static const transform_2d IHT_32x16[] = {
1510 { aom_idct16_c, aom_idct32_c }, // DCT_DCT
1511 { aom_iadst16_c, aom_idct32_c }, // ADST_DCT
1512 { aom_idct16_c, ihalfright32_c }, // DCT_ADST
1513 { aom_iadst16_c, ihalfright32_c }, // ADST_ADST
1514 #if CONFIG_EXT_TX
1515 { aom_iadst16_c, aom_idct32_c }, // FLIPADST_DCT
1516 { aom_idct16_c, ihalfright32_c }, // DCT_FLIPADST
1517 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
1518 { aom_iadst16_c, ihalfright32_c }, // ADST_FLIPADST
1519 { aom_iadst16_c, ihalfright32_c }, // FLIPADST_ADST
1520 { iidtx16_c, iidtx32_c }, // IDTX
1521 { aom_idct16_c, iidtx32_c }, // V_DCT
1522 { iidtx16_c, aom_idct32_c }, // H_DCT
1523 { aom_iadst16_c, iidtx32_c }, // V_ADST
1524 { iidtx16_c, ihalfright32_c }, // H_ADST
1525 { aom_iadst16_c, iidtx32_c }, // V_FLIPADST
1526 { iidtx16_c, ihalfright32_c }, // H_FLIPADST
1527 #endif
1528 };
1529 const int n = 16;
1530 const int n2 = 32;
1531
1532 int i, j;
1533 tran_low_t out[32][16], tmp[32][16], outtmp[32];
1534 tran_low_t *outp = &out[0][0];
1535 int outstride = n;
1536
1537 // inverse transform row vectors and transpose
1538 for (i = 0; i < n; ++i) {
1539 IHT_32x16[tx_type].rows(input, outtmp);
1540 for (j = 0; j < n2; ++j)
1541 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
1542 input += n2;
1543 }
1544
1545 // inverse transform column vectors
1546 for (i = 0; i < n2; ++i) IHT_32x16[tx_type].cols(tmp[i], out[i]);
1547
1548 #if CONFIG_EXT_TX
1549 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
1550 #endif
1551
1552 // Sum with the destination
1553 for (i = 0; i < n; ++i) {
1554 for (j = 0; j < n2; ++j) {
1555 int d = i * stride + j;
1556 int s = j * outstride + i;
1557 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1558 }
1559 }
1560 }
1561
av1_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1562 void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1563 const TxfmParam *txfm_param) {
1564 const TX_TYPE tx_type = txfm_param->tx_type;
1565 #if CONFIG_MRC_TX
1566 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1567 #endif // CONFIG_MRC_TX
1568 #if CONFIG_DCT_ONLY
1569 assert(tx_type == DCT_DCT);
1570 #endif
1571 static const transform_2d IHT_8[] = {
1572 #if CONFIG_DAALA_DCT8
1573 { daala_idct8, daala_idct8 }, // DCT_DCT = 0
1574 { daala_idst8, daala_idct8 }, // ADST_DCT = 1
1575 { daala_idct8, daala_idst8 }, // DCT_ADST = 2
1576 { daala_idst8, daala_idst8 }, // ADST_ADST = 3
1577 #if CONFIG_EXT_TX
1578 { daala_idst8, daala_idct8 }, // FLIPADST_DCT
1579 { daala_idct8, daala_idst8 }, // DCT_FLIPADST
1580 { daala_idst8, daala_idst8 }, // FLIPADST_FLIPADST
1581 { daala_idst8, daala_idst8 }, // ADST_FLIPADST
1582 { daala_idst8, daala_idst8 }, // FLIPADST_ADST
1583 { daala_idtx8, daala_idtx8 }, // IDTX
1584 { daala_idct8, daala_idtx8 }, // V_DCT
1585 { daala_idtx8, daala_idct8 }, // H_DCT
1586 { daala_idst8, daala_idtx8 }, // V_ADST
1587 { daala_idtx8, daala_idst8 }, // H_ADST
1588 { daala_idst8, daala_idtx8 }, // V_FLIPADST
1589 { daala_idtx8, daala_idst8 }, // H_FLIPADST
1590 #endif
1591 #else
1592 { aom_idct8_c, aom_idct8_c }, // DCT_DCT = 0
1593 { aom_iadst8_c, aom_idct8_c }, // ADST_DCT = 1
1594 { aom_idct8_c, aom_iadst8_c }, // DCT_ADST = 2
1595 { aom_iadst8_c, aom_iadst8_c }, // ADST_ADST = 3
1596 #if CONFIG_EXT_TX
1597 { aom_iadst8_c, aom_idct8_c }, // FLIPADST_DCT
1598 { aom_idct8_c, aom_iadst8_c }, // DCT_FLIPADST
1599 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_FLIPADST
1600 { aom_iadst8_c, aom_iadst8_c }, // ADST_FLIPADST
1601 { aom_iadst8_c, aom_iadst8_c }, // FLIPADST_ADST
1602 { iidtx8_c, iidtx8_c }, // IDTX
1603 { aom_idct8_c, iidtx8_c }, // V_DCT
1604 { iidtx8_c, aom_idct8_c }, // H_DCT
1605 { aom_iadst8_c, iidtx8_c }, // V_ADST
1606 { iidtx8_c, aom_iadst8_c }, // H_ADST
1607 { aom_iadst8_c, iidtx8_c }, // V_FLIPADST
1608 { iidtx8_c, aom_iadst8_c }, // H_FLIPADST
1609 #endif
1610 #endif
1611 };
1612
1613 int i, j;
1614 tran_low_t tmp[8][8];
1615 tran_low_t out[8][8];
1616 tran_low_t *outp = &out[0][0];
1617 int outstride = 8;
1618
1619 #if CONFIG_LGT
1620 const tran_high_t *lgtmtx_col[1];
1621 const tran_high_t *lgtmtx_row[1];
1622 int use_lgt_col = get_lgt8(txfm_param, 1, lgtmtx_col);
1623 int use_lgt_row = get_lgt8(txfm_param, 0, lgtmtx_row);
1624 #endif
1625
1626 // inverse transform row vectors
1627 for (i = 0; i < 8; ++i) {
1628 #if CONFIG_DAALA_DCT8
1629 tran_low_t temp_in[8];
1630 for (j = 0; j < 8; j++) temp_in[j] = input[j] * 2;
1631 IHT_8[tx_type].rows(temp_in, out[i]);
1632 #else
1633 #if CONFIG_LGT
1634 if (use_lgt_row)
1635 ilgt8(input, out[i], lgtmtx_row[0]);
1636 else
1637 #endif
1638 IHT_8[tx_type].rows(input, out[i]);
1639 #endif
1640 input += 8;
1641 }
1642
1643 // transpose
1644 for (i = 0; i < 8; i++) {
1645 for (j = 0; j < 8; j++) {
1646 tmp[j][i] = out[i][j];
1647 }
1648 }
1649
1650 // inverse transform column vectors
1651 for (i = 0; i < 8; ++i) {
1652 #if CONFIG_LGT
1653 if (use_lgt_col)
1654 ilgt8(tmp[i], out[i], lgtmtx_col[0]);
1655 else
1656 #endif
1657 IHT_8[tx_type].cols(tmp[i], out[i]);
1658 }
1659
1660 #if CONFIG_EXT_TX
1661 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
1662 #endif
1663
1664 // Sum with the destination
1665 for (i = 0; i < 8; ++i) {
1666 for (j = 0; j < 8; ++j) {
1667 int d = i * stride + j;
1668 int s = j * outstride + i;
1669 #if CONFIG_DAALA_DCT8
1670 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
1671 #else
1672 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1673 #endif
1674 }
1675 }
1676 }
1677
av1_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1678 void av1_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1679 const TxfmParam *txfm_param) {
1680 const TX_TYPE tx_type = txfm_param->tx_type;
1681 #if CONFIG_MRC_TX
1682 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1683 #endif // CONFIG_MRC_TX
1684 #if CONFIG_DCT_ONLY
1685 assert(tx_type == DCT_DCT);
1686 #endif
1687 static const transform_2d IHT_16[] = {
1688 #if CONFIG_DAALA_DCT16
1689 { daala_idct16, daala_idct16 }, // DCT_DCT = 0
1690 { daala_idst16, daala_idct16 }, // ADST_DCT = 1
1691 { daala_idct16, daala_idst16 }, // DCT_ADST = 2
1692 { daala_idst16, daala_idst16 }, // ADST_ADST = 3
1693 #if CONFIG_EXT_TX
1694 { daala_idst16, daala_idct16 }, // FLIPADST_DCT
1695 { daala_idct16, daala_idst16 }, // DCT_FLIPADST
1696 { daala_idst16, daala_idst16 }, // FLIPADST_FLIPADST
1697 { daala_idst16, daala_idst16 }, // ADST_FLIPADST
1698 { daala_idst16, daala_idst16 }, // FLIPADST_ADST
1699 { daala_idtx16, daala_idtx16 }, // IDTX
1700 { daala_idct16, daala_idtx16 }, // V_DCT
1701 { daala_idtx16, daala_idct16 }, // H_DCT
1702 { daala_idst16, daala_idtx16 }, // V_ADST
1703 { daala_idtx16, daala_idst16 }, // H_ADST
1704 { daala_idst16, daala_idtx16 }, // V_FLIPADST
1705 { daala_idtx16, daala_idst16 }, // H_FLIPADST
1706 #endif
1707 #else
1708 { aom_idct16_c, aom_idct16_c }, // DCT_DCT = 0
1709 { aom_iadst16_c, aom_idct16_c }, // ADST_DCT = 1
1710 { aom_idct16_c, aom_iadst16_c }, // DCT_ADST = 2
1711 { aom_iadst16_c, aom_iadst16_c }, // ADST_ADST = 3
1712 #if CONFIG_EXT_TX
1713 { aom_iadst16_c, aom_idct16_c }, // FLIPADST_DCT
1714 { aom_idct16_c, aom_iadst16_c }, // DCT_FLIPADST
1715 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_FLIPADST
1716 { aom_iadst16_c, aom_iadst16_c }, // ADST_FLIPADST
1717 { aom_iadst16_c, aom_iadst16_c }, // FLIPADST_ADST
1718 { iidtx16_c, iidtx16_c }, // IDTX
1719 { aom_idct16_c, iidtx16_c }, // V_DCT
1720 { iidtx16_c, aom_idct16_c }, // H_DCT
1721 { aom_iadst16_c, iidtx16_c }, // V_ADST
1722 { iidtx16_c, aom_iadst16_c }, // H_ADST
1723 { aom_iadst16_c, iidtx16_c }, // V_FLIPADST
1724 { iidtx16_c, aom_iadst16_c }, // H_FLIPADST
1725 #endif
1726 #endif
1727 };
1728
1729 int i, j;
1730 tran_low_t tmp[16][16];
1731 tran_low_t out[16][16];
1732 tran_low_t *outp = &out[0][0];
1733 int outstride = 16;
1734
1735 // inverse transform row vectors
1736 for (i = 0; i < 16; ++i) {
1737 #if CONFIG_DAALA_DCT16
1738 tran_low_t temp_in[16];
1739 for (j = 0; j < 16; j++) temp_in[j] = input[j] * 2;
1740 IHT_16[tx_type].rows(temp_in, out[i]);
1741 #else
1742 IHT_16[tx_type].rows(input, out[i]);
1743 #endif
1744 input += 16;
1745 }
1746
1747 // transpose
1748 for (i = 0; i < 16; i++) {
1749 for (j = 0; j < 16; j++) {
1750 tmp[j][i] = out[i][j];
1751 }
1752 }
1753
1754 // inverse transform column vectors
1755 for (i = 0; i < 16; ++i) IHT_16[tx_type].cols(tmp[i], out[i]);
1756
1757 #if CONFIG_EXT_TX
1758 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
1759 #endif
1760
1761 // Sum with the destination
1762 for (i = 0; i < 16; ++i) {
1763 for (j = 0; j < 16; ++j) {
1764 int d = i * stride + j;
1765 int s = j * outstride + i;
1766 #if CONFIG_DAALA_DCT16
1767 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
1768 #else
1769 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1770 #endif
1771 }
1772 }
1773 }
1774
1775 #if CONFIG_EXT_TX || CONFIG_DAALA_DCT32
av1_iht32x32_1024_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1776 void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1777 const TxfmParam *txfm_param) {
1778 const TX_TYPE tx_type = txfm_param->tx_type;
1779 #if CONFIG_DCT_ONLY
1780 assert(tx_type == DCT_DCT);
1781 #endif
1782 static const transform_2d IHT_32[] = {
1783 #if CONFIG_DAALA_DCT32
1784 { daala_idct32, daala_idct32 }, // DCT_DCT
1785 #if CONFIG_EXT_TX
1786 { daala_idst32, daala_idct32 }, // ADST_DCT
1787 { daala_idct32, daala_idst32 }, // DCT_ADST
1788 { daala_idst32, daala_idst32 }, // ADST_ADST
1789 { daala_idst32, daala_idct32 }, // FLIPADST_DCT
1790 { daala_idct32, daala_idst32 }, // DCT_FLIPADST
1791 { daala_idst32, daala_idst32 }, // FLIPADST_FLIPADST
1792 { daala_idst32, daala_idst32 }, // ADST_FLIPADST
1793 { daala_idst32, daala_idst32 }, // FLIPADST_ADST
1794 { daala_idtx32, daala_idtx32 }, // IDTX
1795 { daala_idct32, daala_idtx32 }, // V_DCT
1796 { daala_idtx32, daala_idct32 }, // H_DCT
1797 { daala_idst32, daala_idtx32 }, // V_ADST
1798 { daala_idtx32, daala_idst32 }, // H_ADST
1799 { daala_idst32, daala_idtx32 }, // V_FLIPADST
1800 { daala_idtx32, daala_idst32 }, // H_FLIPADST
1801 #endif
1802 #else
1803 { aom_idct32_c, aom_idct32_c }, // DCT_DCT
1804 #if CONFIG_EXT_TX
1805 { ihalfright32_c, aom_idct32_c }, // ADST_DCT
1806 { aom_idct32_c, ihalfright32_c }, // DCT_ADST
1807 { ihalfright32_c, ihalfright32_c }, // ADST_ADST
1808 { ihalfright32_c, aom_idct32_c }, // FLIPADST_DCT
1809 { aom_idct32_c, ihalfright32_c }, // DCT_FLIPADST
1810 { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
1811 { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
1812 { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
1813 { iidtx32_c, iidtx32_c }, // IDTX
1814 { aom_idct32_c, iidtx32_c }, // V_DCT
1815 { iidtx32_c, aom_idct32_c }, // H_DCT
1816 { ihalfright32_c, iidtx32_c }, // V_ADST
1817 { iidtx32_c, ihalfright32_c }, // H_ADST
1818 { ihalfright32_c, iidtx32_c }, // V_FLIPADST
1819 { iidtx32_c, ihalfright32_c }, // H_FLIPADST
1820 #endif
1821 #endif
1822 };
1823
1824 int i, j;
1825 tran_low_t tmp[32][32];
1826 tran_low_t out[32][32];
1827 tran_low_t *outp = &out[0][0];
1828 int outstride = 32;
1829
1830 // inverse transform row vectors
1831 for (i = 0; i < 32; ++i) {
1832 #if CONFIG_DAALA_DCT32
1833 tran_low_t temp_in[32];
1834 for (j = 0; j < 32; j++) temp_in[j] = input[j] * 2;
1835 IHT_32[tx_type].rows(temp_in, out[i]);
1836 #else
1837 IHT_32[tx_type].rows(input, out[i]);
1838 #endif
1839 input += 32;
1840 }
1841
1842 // transpose
1843 for (i = 0; i < 32; i++) {
1844 for (j = 0; j < 32; j++) {
1845 #if CONFIG_DAALA_DCT32
1846 tmp[j][i] = out[i][j] * 4;
1847 #else
1848 tmp[j][i] = out[i][j];
1849 #endif
1850 }
1851 }
1852
1853 // inverse transform column vectors
1854 for (i = 0; i < 32; ++i) IHT_32[tx_type].cols(tmp[i], out[i]);
1855
1856 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
1857
1858 // Sum with the destination
1859 for (i = 0; i < 32; ++i) {
1860 for (j = 0; j < 32; ++j) {
1861 int d = i * stride + j;
1862 int s = j * outstride + i;
1863 #if CONFIG_DAALA_DCT32
1864 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1865 #else
1866 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
1867 #endif
1868 }
1869 }
1870 }
1871 #endif // CONFIG_EXT_TX || CONFIG_DAALA_DCT32
1872
1873 #if CONFIG_TX64X64
av1_iht64x64_4096_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1874 void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1875 const TxfmParam *txfm_param) {
1876 const TX_TYPE tx_type = txfm_param->tx_type;
1877 #if CONFIG_MRC_TX
1878 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1879 #endif // CONFIG_MRC_TX
1880 #if CONFIG_DCT_ONLY
1881 assert(tx_type == DCT_DCT);
1882 #endif
1883 static const transform_2d IHT_64[] = {
1884 #if CONFIG_DAALA_DCT64
1885 { daala_idct64, daala_idct64 }, // DCT_DCT
1886 { daala_idst64, daala_idct64 }, // ADST_DCT
1887 { daala_idct64, daala_idst64 }, // DCT_ADST
1888 { daala_idst64, daala_idst64 }, // ADST_ADST
1889 #if CONFIG_EXT_TX
1890 { daala_idst64, daala_idct64 }, // FLIPADST_DCT
1891 { daala_idct64, daala_idst64 }, // DCT_FLIPADST
1892 { daala_idst64, daala_idst64 }, // FLIPADST_FLIPADST
1893 { daala_idst64, daala_idst64 }, // ADST_FLIPADST
1894 { daala_idst64, daala_idst64 }, // FLIPADST_ADST
1895 { daala_idtx64, daala_idtx64 }, // IDTX
1896 { daala_idct64, daala_idtx64 }, // V_DCT
1897 { daala_idtx64, daala_idct64 }, // H_DCT
1898 { daala_idst64, daala_idtx64 }, // V_ADST
1899 { daala_idtx64, daala_idst64 }, // H_ADST
1900 { daala_idst64, daala_idtx64 }, // V_FLIPADST
1901 { daala_idtx64, daala_idst64 }, // H_FLIPADST
1902 #endif
1903 #else
1904 { idct64_col_c, idct64_row_c }, // DCT_DCT
1905 { ihalfright64_c, idct64_row_c }, // ADST_DCT
1906 { idct64_col_c, ihalfright64_c }, // DCT_ADST
1907 { ihalfright64_c, ihalfright64_c }, // ADST_ADST
1908 #if CONFIG_EXT_TX
1909 { ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
1910 { idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
1911 { ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
1912 { ihalfright64_c, ihalfright64_c }, // ADST_FLIPADST
1913 { ihalfright64_c, ihalfright64_c }, // FLIPADST_ADST
1914 { iidtx64_c, iidtx64_c }, // IDTX
1915 { idct64_col_c, iidtx64_c }, // V_DCT
1916 { iidtx64_c, idct64_row_c }, // H_DCT
1917 { ihalfright64_c, iidtx64_c }, // V_ADST
1918 { iidtx64_c, ihalfright64_c }, // H_ADST
1919 { ihalfright64_c, iidtx64_c }, // V_FLIPADST
1920 { iidtx64_c, ihalfright64_c }, // H_FLIPADST
1921 #endif
1922 #endif
1923 };
1924
1925 int i, j;
1926 tran_low_t tmp[64][64];
1927 tran_low_t out[64][64];
1928 tran_low_t *outp = &out[0][0];
1929 int outstride = 64;
1930
1931 // inverse transform row vectors
1932 for (i = 0; i < 64; ++i) {
1933 #if CONFIG_DAALA_DCT64
1934 tran_low_t temp_in[64];
1935 for (j = 0; j < 64; j++) temp_in[j] = input[j] * 2;
1936 IHT_64[tx_type].rows(temp_in, out[i]);
1937 // Do not rescale intermediate for Daala
1938 #else
1939 IHT_64[tx_type].rows(input, out[i]);
1940 for (j = 0; j < 64; ++j) out[i][j] = ROUND_POWER_OF_TWO(out[i][j], 1);
1941 #endif
1942 input += 64;
1943 }
1944
1945 // transpose
1946 for (i = 0; i < 64; i++) {
1947 for (j = 0; j < 64; j++) {
1948 tmp[j][i] = out[i][j];
1949 }
1950 }
1951
1952 // inverse transform column vectors
1953 for (i = 0; i < 64; ++i) IHT_64[tx_type].cols(tmp[i], out[i]);
1954
1955 #if CONFIG_EXT_TX
1956 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
1957 #endif // CONFIG_EXT_TX
1958
1959 // Sum with the destination
1960 for (i = 0; i < 64; ++i) {
1961 for (j = 0; j < 64; ++j) {
1962 int d = i * stride + j;
1963 int s = j * outstride + i;
1964 #if CONFIG_DAALA_DCT64
1965 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 2));
1966 #else
1967 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
1968 #endif
1969 }
1970 }
1971 }
1972
av1_iht64x32_2048_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)1973 void av1_iht64x32_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
1974 const TxfmParam *txfm_param) {
1975 const TX_TYPE tx_type = txfm_param->tx_type;
1976 #if CONFIG_MRC_TX
1977 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
1978 #endif // CONFIG_MRC_TX
1979 #if CONFIG_DCT_ONLY
1980 assert(tx_type == DCT_DCT);
1981 #endif
1982 static const transform_2d IHT_64x32[] = {
1983 { aom_idct32_c, idct64_row_c }, // DCT_DCT
1984 { ihalfright32_c, idct64_row_c }, // ADST_DCT
1985 { aom_idct32_c, ihalfright64_c }, // DCT_ADST
1986 { ihalfright32_c, ihalfright64_c }, // ADST_ADST
1987 #if CONFIG_EXT_TX
1988 { ihalfright32_c, idct64_row_c }, // FLIPADST_DCT
1989 { aom_idct32_c, ihalfright64_c }, // DCT_FLIPADST
1990 { ihalfright32_c, ihalfright64_c }, // FLIPADST_FLIPADST
1991 { ihalfright32_c, ihalfright64_c }, // ADST_FLIPADST
1992 { ihalfright32_c, ihalfright64_c }, // FLIPADST_ADST
1993 { iidtx32_c, iidtx64_c }, // IDTX
1994 { aom_idct32_c, iidtx64_c }, // V_DCT
1995 { iidtx32_c, idct64_row_c }, // H_DCT
1996 { ihalfright32_c, iidtx64_c }, // V_ADST
1997 { iidtx32_c, ihalfright64_c }, // H_ADST
1998 { ihalfright32_c, iidtx64_c }, // V_FLIPADST
1999 { iidtx32_c, ihalfright64_c }, // H_FLIPADST
2000 #endif
2001 };
2002 const int n = 32;
2003 const int n2 = 64;
2004
2005 int i, j;
2006 tran_low_t out[64][32], tmp[64][32], outtmp[64];
2007 tran_low_t *outp = &out[0][0];
2008 int outstride = n;
2009
2010 // inverse transform row vectors and transpose
2011 for (i = 0; i < n; ++i) {
2012 IHT_64x32[tx_type].rows(input, outtmp);
2013 for (j = 0; j < n2; ++j)
2014 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
2015 input += n2;
2016 }
2017
2018 // inverse transform column vectors
2019 for (i = 0; i < n2; ++i) IHT_64x32[tx_type].cols(tmp[i], out[i]);
2020
2021 #if CONFIG_EXT_TX
2022 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
2023 #endif
2024
2025 // Sum with the destination
2026 for (i = 0; i < n; ++i) {
2027 for (j = 0; j < n2; ++j) {
2028 int d = i * stride + j;
2029 int s = j * outstride + i;
2030 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
2031 }
2032 }
2033 }
2034
av1_iht32x64_2048_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2035 void av1_iht32x64_2048_add_c(const tran_low_t *input, uint8_t *dest, int stride,
2036 const TxfmParam *txfm_param) {
2037 const TX_TYPE tx_type = txfm_param->tx_type;
2038 #if CONFIG_MRC_TX
2039 assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
2040 #endif // CONFIG_MRC_TX
2041 #if CONFIG_DCT_ONLY
2042 assert(tx_type == DCT_DCT);
2043 #endif
2044 static const transform_2d IHT_32x64[] = {
2045 { idct64_col_c, aom_idct32_c }, // DCT_DCT
2046 { ihalfright64_c, aom_idct32_c }, // ADST_DCT
2047 { idct64_col_c, ihalfright32_c }, // DCT_ADST
2048 { ihalfright64_c, ihalfright32_c }, // ADST_ADST
2049 #if CONFIG_EXT_TX
2050 { ihalfright64_c, aom_idct32_c }, // FLIPADST_DCT
2051 { idct64_col_c, ihalfright32_c }, // DCT_FLIPADST
2052 { ihalfright64_c, ihalfright32_c }, // FLIPADST_FLIPADST
2053 { ihalfright64_c, ihalfright32_c }, // ADST_FLIPADST
2054 { ihalfright64_c, ihalfright32_c }, // FLIPADST_ADST
2055 { iidtx64_c, iidtx32_c }, // IDTX
2056 { idct64_col_c, iidtx32_c }, // V_DCT
2057 { iidtx64_c, aom_idct32_c }, // H_DCT
2058 { ihalfright64_c, iidtx32_c }, // V_ADST
2059 { iidtx64_c, ihalfright32_c }, // H_ADST
2060 { ihalfright64_c, iidtx32_c }, // V_FLIPADST
2061 { iidtx64_c, ihalfright32_c }, // H_FLIPADST
2062 #endif
2063 };
2064
2065 const int n = 32;
2066 const int n2 = 64;
2067 int i, j;
2068 tran_low_t out[32][64], tmp[32][64], outtmp[32];
2069 tran_low_t *outp = &out[0][0];
2070 int outstride = n2;
2071
2072 // inverse transform row vectors and transpose
2073 for (i = 0; i < n2; ++i) {
2074 IHT_32x64[tx_type].rows(input, outtmp);
2075 for (j = 0; j < n; ++j)
2076 tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
2077 input += n;
2078 }
2079
2080 // inverse transform column vectors
2081 for (i = 0; i < n; ++i) IHT_32x64[tx_type].cols(tmp[i], out[i]);
2082
2083 #if CONFIG_EXT_TX
2084 maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
2085 #endif
2086
2087 // Sum with the destination
2088 for (i = 0; i < n2; ++i) {
2089 for (j = 0; j < n; ++j) {
2090 int d = i * stride + j;
2091 int s = j * outstride + i;
2092 dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
2093 }
2094 }
2095 }
2096
2097 #endif // CONFIG_TX64X64
2098
2099 // idct
av1_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2100 void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
2101 const TxfmParam *txfm_param) {
2102 const int eob = txfm_param->eob;
2103 if (eob > 1)
2104 av1_iht4x4_16_add(input, dest, stride, txfm_param);
2105 else
2106 aom_idct4x4_1_add(input, dest, stride);
2107 }
2108
av1_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2109 void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
2110 const TxfmParam *txfm_param) {
2111 const int eob = txfm_param->eob;
2112 if (eob > 1)
2113 aom_iwht4x4_16_add(input, dest, stride);
2114 else
2115 aom_iwht4x4_1_add(input, dest, stride);
2116 }
2117
2118 #if !CONFIG_DAALA_DCT8
idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2119 static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
2120 const TxfmParam *txfm_param) {
2121 // If dc is 1, then input[0] is the reconstructed value, do not need
2122 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
2123
2124 // The calculation can be simplified if there are not many non-zero dct
2125 // coefficients. Use eobs to decide what to do.
2126 // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
2127 // Combine that with code here.
2128 #if CONFIG_ADAPT_SCAN
2129 const int16_t half = txfm_param->eob_threshold[0];
2130 #else
2131 const int16_t half = 12;
2132 #endif
2133
2134 const int eob = txfm_param->eob;
2135 if (eob == 1)
2136 // DC only DCT coefficient
2137 aom_idct8x8_1_add(input, dest, stride);
2138 else if (eob <= half)
2139 aom_idct8x8_12_add(input, dest, stride);
2140 else
2141 aom_idct8x8_64_add(input, dest, stride);
2142 }
2143 #endif
2144
2145 #if !CONFIG_DAALA_DCT16
idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2146 static void idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
2147 const TxfmParam *txfm_param) {
2148 // The calculation can be simplified if there are not many non-zero dct
2149 // coefficients. Use eobs to separate different cases.
2150 #if CONFIG_ADAPT_SCAN
2151 const int16_t half = txfm_param->eob_threshold[0];
2152 const int16_t quarter = txfm_param->eob_threshold[1];
2153 #else
2154 const int16_t half = 38;
2155 const int16_t quarter = 10;
2156 #endif
2157
2158 const int eob = txfm_param->eob;
2159 if (eob == 1) /* DC only DCT coefficient. */
2160 aom_idct16x16_1_add(input, dest, stride);
2161 else if (eob <= quarter)
2162 aom_idct16x16_10_add(input, dest, stride);
2163 else if (eob <= half)
2164 aom_idct16x16_38_add(input, dest, stride);
2165 else
2166 aom_idct16x16_256_add(input, dest, stride);
2167 }
2168 #endif
2169
2170 #if CONFIG_MRC_TX
imrc32x32_add_c(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2171 static void imrc32x32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
2172 const TxfmParam *txfm_param) {
2173 #if CONFIG_ADAPT_SCAN
2174 const int16_t half = txfm_param->eob_threshold[0];
2175 const int16_t quarter = txfm_param->eob_threshold[1];
2176 #else
2177 const int16_t half = 135;
2178 const int16_t quarter = 34;
2179 #endif
2180
2181 const int eob = txfm_param->eob;
2182 int n_masked_vals = 0;
2183 uint8_t *mask;
2184 uint8_t mask_tmp[32 * 32];
2185 if (eob == 1) {
2186 aom_idct32x32_1_add_c(input, dest, stride);
2187 } else {
2188 if ((txfm_param->is_inter && SIGNAL_MRC_MASK_INTER) ||
2189 (!txfm_param->is_inter && SIGNAL_MRC_MASK_INTRA)) {
2190 mask = txfm_param->mask;
2191 } else {
2192 n_masked_vals =
2193 get_mrc_pred_mask(txfm_param->dst, txfm_param->stride, mask_tmp, 32,
2194 32, 32, txfm_param->is_inter);
2195 if (!is_valid_mrc_mask(n_masked_vals, 32, 32))
2196 assert(0 && "Invalid MRC mask");
2197 mask = mask_tmp;
2198 }
2199 if (eob <= quarter)
2200 // non-zero coeff only in upper-left 8x8
2201 aom_imrc32x32_34_add_c(input, dest, stride, mask);
2202 else if (eob <= half)
2203 // non-zero coeff only in upper-left 16x16
2204 aom_imrc32x32_135_add_c(input, dest, stride, mask);
2205 else
2206 aom_imrc32x32_1024_add_c(input, dest, stride, mask);
2207 }
2208 }
2209 #endif // CONFIG_MRC_TX
2210
2211 #if !CONFIG_DAALA_DCT32
idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2212 static void idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
2213 const TxfmParam *txfm_param) {
2214 #if CONFIG_ADAPT_SCAN
2215 const int16_t half = txfm_param->eob_threshold[0];
2216 const int16_t quarter = txfm_param->eob_threshold[1];
2217 #else
2218 const int16_t half = 135;
2219 const int16_t quarter = 34;
2220 #endif
2221
2222 const int eob = txfm_param->eob;
2223 if (eob == 1)
2224 aom_idct32x32_1_add(input, dest, stride);
2225 else if (eob <= quarter)
2226 // non-zero coeff only in upper-left 8x8
2227 aom_idct32x32_34_add(input, dest, stride);
2228 else if (eob <= half)
2229 // non-zero coeff only in upper-left 16x16
2230 aom_idct32x32_135_add(input, dest, stride);
2231 else
2232 aom_idct32x32_1024_add(input, dest, stride);
2233 }
2234 #endif
2235
2236 #if CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
idct64x64_add(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2237 static void idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
2238 const TxfmParam *txfm_param) {
2239 (void)txfm_param;
2240 av1_iht64x64_4096_add(input, dest, stride, txfm_param);
2241 }
2242 #endif // CONFIG_TX64X64 && !CONFIG_DAALA_DCT64
2243
2244 #if CONFIG_CHROMA_2X2
inv_txfm_add_2x2(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2245 static void inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest, int stride,
2246 const TxfmParam *txfm_param) {
2247 tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
2248 tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
2249 tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
2250 tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
2251
2252 tran_high_t a2 = a1 + c1;
2253 tran_high_t b2 = b1 + d1;
2254 tran_high_t c2 = a1 - c1;
2255 tran_high_t d2 = b1 - d1;
2256
2257 (void)txfm_param;
2258
2259 a1 = (a2 + b2) >> 2;
2260 b1 = (a2 - b2) >> 2;
2261 c1 = (c2 + d2) >> 2;
2262 d1 = (c2 - d2) >> 2;
2263
2264 dest[0] = clip_pixel_add(dest[0], WRAPLOW(a1));
2265 dest[1] = clip_pixel_add(dest[1], WRAPLOW(b1));
2266 dest[stride] = clip_pixel_add(dest[stride], WRAPLOW(c1));
2267 dest[stride + 1] = clip_pixel_add(dest[stride + 1], WRAPLOW(d1));
2268 }
2269 #endif
2270
inv_txfm_add_4x4(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2271 static void inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
2272 const TxfmParam *txfm_param) {
2273 const TX_TYPE tx_type = txfm_param->tx_type;
2274 if (txfm_param->lossless) {
2275 assert(tx_type == DCT_DCT);
2276 av1_iwht4x4_add(input, dest, stride, txfm_param);
2277 return;
2278 }
2279
2280 switch (tx_type) {
2281 #if !CONFIG_DAALA_DCT4
2282 case DCT_DCT: av1_idct4x4_add(input, dest, stride, txfm_param); break;
2283 #else
2284 case DCT_DCT:
2285 #endif
2286 case ADST_DCT:
2287 case DCT_ADST:
2288 case ADST_ADST:
2289 #if CONFIG_LGT || CONFIG_DAALA_DCT4
2290 // LGT only exists in C verson
2291 av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
2292 break;
2293 #else
2294 av1_iht4x4_16_add(input, dest, stride, txfm_param);
2295 break;
2296 #endif
2297 #if CONFIG_EXT_TX
2298 case FLIPADST_DCT:
2299 case DCT_FLIPADST:
2300 case FLIPADST_FLIPADST:
2301 case ADST_FLIPADST:
2302 case FLIPADST_ADST:
2303 #if CONFIG_LGT || CONFIG_DAALA_DCT4
2304 av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
2305 break;
2306 #else
2307 av1_iht4x4_16_add(input, dest, stride, txfm_param);
2308 break;
2309 #endif
2310 case V_DCT:
2311 case H_DCT:
2312 case V_ADST:
2313 case H_ADST:
2314 case V_FLIPADST:
2315 case H_FLIPADST:
2316 // Use C version since DST only exists in C code
2317 av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
2318 break;
2319 case IDTX: inv_idtx_add_c(input, dest, stride, 4, 4, tx_type); break;
2320 #endif // CONFIG_EXT_TX
2321 default: assert(0); break;
2322 }
2323 }
2324
inv_txfm_add_4x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2325 static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
2326 const TxfmParam *txfm_param) {
2327 #if CONFIG_LGT
2328 av1_iht4x8_32_add_c(input, dest, stride, txfm_param);
2329 #else
2330 av1_iht4x8_32_add(input, dest, stride, txfm_param);
2331 #endif
2332 }
2333
inv_txfm_add_8x4(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2334 static void inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
2335 const TxfmParam *txfm_param) {
2336 #if CONFIG_LGT
2337 av1_iht8x4_32_add_c(input, dest, stride, txfm_param);
2338 #else
2339 av1_iht8x4_32_add(input, dest, stride, txfm_param);
2340 #endif
2341 }
2342
2343 // These will be used by the masked-tx experiment in the future.
2344 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
inv_txfm_add_4x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2345 static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
2346 int stride, const TxfmParam *txfm_param) {
2347 #if CONFIG_LGT
2348 av1_iht4x16_64_add_c(input, dest, stride, txfm_param);
2349 #else
2350 av1_iht4x16_64_add(input, dest, stride, txfm_param);
2351 #endif
2352 }
2353
inv_txfm_add_16x4(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2354 static void inv_txfm_add_16x4(const tran_low_t *input, uint8_t *dest,
2355 int stride, const TxfmParam *txfm_param) {
2356 #if CONFIG_LGT
2357 av1_iht16x4_64_add_c(input, dest, stride, txfm_param);
2358 #else
2359 av1_iht16x4_64_add(input, dest, stride, txfm_param);
2360 #endif
2361 }
2362
inv_txfm_add_8x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2363 static void inv_txfm_add_8x32(const tran_low_t *input, uint8_t *dest,
2364 int stride, const TxfmParam *txfm_param) {
2365 #if CONFIG_LGT
2366 av1_iht8x32_256_add_c(input, dest, stride, txfm_param);
2367 #else
2368 av1_iht8x32_256_add(input, dest, stride, txfm_param);
2369 #endif
2370 }
2371
inv_txfm_add_32x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2372 static void inv_txfm_add_32x8(const tran_low_t *input, uint8_t *dest,
2373 int stride, const TxfmParam *txfm_param) {
2374 #if CONFIG_LGT
2375 av1_iht32x8_256_add_c(input, dest, stride, txfm_param);
2376 #else
2377 av1_iht32x8_256_add(input, dest, stride, txfm_param);
2378 #endif
2379 }
2380 #endif
2381
inv_txfm_add_8x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2382 static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
2383 int stride, const TxfmParam *txfm_param) {
2384 #if CONFIG_LGT
2385 av1_iht8x16_128_add_c(input, dest, stride, txfm_param);
2386 #else
2387 av1_iht8x16_128_add(input, dest, stride, txfm_param);
2388 #endif
2389 }
2390
inv_txfm_add_16x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2391 static void inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
2392 int stride, const TxfmParam *txfm_param) {
2393 #if CONFIG_LGT
2394 av1_iht16x8_128_add_c(input, dest, stride, txfm_param);
2395 #else
2396 av1_iht16x8_128_add(input, dest, stride, txfm_param);
2397 #endif
2398 }
2399
inv_txfm_add_16x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2400 static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
2401 int stride, const TxfmParam *txfm_param) {
2402 av1_iht16x32_512_add(input, dest, stride, txfm_param);
2403 }
2404
inv_txfm_add_32x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2405 static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
2406 int stride, const TxfmParam *txfm_param) {
2407 av1_iht32x16_512_add(input, dest, stride, txfm_param);
2408 }
2409
2410 #if CONFIG_TX64X64
inv_txfm_add_32x64(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2411 static void inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
2412 int stride, const TxfmParam *txfm_param) {
2413 av1_iht32x64_2048_add(input, dest, stride, txfm_param);
2414 }
2415
inv_txfm_add_64x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2416 static void inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
2417 int stride, const TxfmParam *txfm_param) {
2418 av1_iht64x32_2048_add(input, dest, stride, txfm_param);
2419 }
2420 #endif // CONFIG_TX64X64
2421
inv_txfm_add_8x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2422 static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
2423 const TxfmParam *txfm_param) {
2424 const TX_TYPE tx_type = txfm_param->tx_type;
2425 switch (tx_type) {
2426 #if !CONFIG_DAALA_DCT8
2427 case DCT_DCT: idct8x8_add(input, dest, stride, txfm_param); break;
2428 #else
2429 case DCT_DCT:
2430 #endif
2431 case ADST_DCT:
2432 case DCT_ADST:
2433 case ADST_ADST:
2434 #if CONFIG_LGT || CONFIG_DAALA_DCT8
2435 av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
2436 break;
2437 #else
2438 av1_iht8x8_64_add(input, dest, stride, txfm_param);
2439 break;
2440 #endif
2441 #if CONFIG_EXT_TX
2442 case FLIPADST_DCT:
2443 case DCT_FLIPADST:
2444 case FLIPADST_FLIPADST:
2445 case ADST_FLIPADST:
2446 case FLIPADST_ADST:
2447 #if CONFIG_LGT || CONFIG_DAALA_DCT8
2448 av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
2449 break;
2450 #else
2451 av1_iht8x8_64_add(input, dest, stride, txfm_param);
2452 break;
2453 #endif
2454 case V_DCT:
2455 case H_DCT:
2456 case V_ADST:
2457 case H_ADST:
2458 case V_FLIPADST:
2459 case H_FLIPADST:
2460 // Use C version since DST only exists in C code
2461 av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
2462 break;
2463 case IDTX: inv_idtx_add_c(input, dest, stride, 8, 8, tx_type); break;
2464 #endif // CONFIG_EXT_TX
2465 default: assert(0); break;
2466 }
2467 }
2468
inv_txfm_add_16x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2469 static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
2470 int stride, const TxfmParam *txfm_param) {
2471 const TX_TYPE tx_type = txfm_param->tx_type;
2472 switch (tx_type) {
2473 #if !CONFIG_DAALA_DCT16
2474 case DCT_DCT: idct16x16_add(input, dest, stride, txfm_param); break;
2475 #else
2476 case DCT_DCT:
2477 #endif
2478 case ADST_DCT:
2479 case DCT_ADST:
2480 case ADST_ADST:
2481 #if CONFIG_DAALA_DCT16
2482 av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
2483 #else
2484 av1_iht16x16_256_add(input, dest, stride, txfm_param);
2485 #endif // CONFIG_DAALA_DCT16
2486 break;
2487 #if CONFIG_EXT_TX
2488 case FLIPADST_DCT:
2489 case DCT_FLIPADST:
2490 case FLIPADST_FLIPADST:
2491 case ADST_FLIPADST:
2492 case FLIPADST_ADST:
2493 case V_DCT:
2494 case H_DCT:
2495 case V_ADST:
2496 case H_ADST:
2497 case V_FLIPADST:
2498 case H_FLIPADST:
2499 #if CONFIG_DAALA_DCT16
2500 av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
2501 #else
2502 av1_iht16x16_256_add(input, dest, stride, txfm_param);
2503 #endif // CONFIG_DAALA_DCT16
2504 break;
2505 case IDTX: inv_idtx_add_c(input, dest, stride, 16, 16, tx_type); break;
2506 #endif // CONFIG_EXT_TX
2507 #if CONFIG_MRC_TX
2508 case MRC_DCT: assert(0 && "Invalid tx type for tx size");
2509 #endif // CONFIG_MRC_TX
2510 default: assert(0); break;
2511 }
2512 }
2513
inv_txfm_add_32x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2514 static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
2515 int stride, const TxfmParam *txfm_param) {
2516 const TX_TYPE tx_type = txfm_param->tx_type;
2517 switch (tx_type) {
2518 #if !CONFIG_DAALA_DCT32
2519 case DCT_DCT: idct32x32_add(input, dest, stride, txfm_param); break;
2520 #else
2521 case DCT_DCT:
2522 av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
2523 break;
2524 #endif
2525 #if CONFIG_EXT_TX
2526 case ADST_DCT:
2527 case DCT_ADST:
2528 case ADST_ADST:
2529 case FLIPADST_DCT:
2530 case DCT_FLIPADST:
2531 case FLIPADST_FLIPADST:
2532 case ADST_FLIPADST:
2533 case FLIPADST_ADST:
2534 case V_DCT:
2535 case H_DCT:
2536 case V_ADST:
2537 case H_ADST:
2538 case V_FLIPADST:
2539 case H_FLIPADST:
2540 av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
2541 break;
2542 case IDTX: inv_idtx_add_c(input, dest, stride, 32, 32, tx_type); break;
2543 #endif // CONFIG_EXT_TX
2544 #if CONFIG_MRC_TX
2545 case MRC_DCT: imrc32x32_add_c(input, dest, stride, txfm_param); break;
2546 #endif // CONFIG_MRC_TX
2547 default: assert(0); break;
2548 }
2549 }
2550
2551 #if CONFIG_TX64X64
inv_txfm_add_64x64(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2552 static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
2553 int stride, const TxfmParam *txfm_param) {
2554 const TX_TYPE tx_type = txfm_param->tx_type;
2555 assert(tx_type == DCT_DCT);
2556 switch (tx_type) {
2557 #if !CONFIG_DAALA_DCT64
2558 case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
2559 #else
2560 case DCT_DCT:
2561 #endif
2562 #if CONFIG_EXT_TX
2563 case ADST_DCT:
2564 case DCT_ADST:
2565 case ADST_ADST:
2566 case FLIPADST_DCT:
2567 case DCT_FLIPADST:
2568 case FLIPADST_FLIPADST:
2569 case ADST_FLIPADST:
2570 case FLIPADST_ADST:
2571 case V_DCT:
2572 case H_DCT:
2573 case V_ADST:
2574 case H_ADST:
2575 case V_FLIPADST:
2576 case H_FLIPADST:
2577 av1_iht64x64_4096_add_c(input, dest, stride, txfm_param);
2578 break;
2579 case IDTX: inv_idtx_add_c(input, dest, stride, 64, 64, tx_type); break;
2580 #endif // CONFIG_EXT_TX
2581 #if CONFIG_MRC_TX
2582 case MRC_DCT: assert(0 && "Invalid tx type for tx size");
2583 #endif // CONFIG_MRC_TX
2584 default: assert(0); break;
2585 }
2586 }
2587 #endif // CONFIG_TX64X64
2588
av1_highbd_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob,int bd)2589 void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
2590 int eob, int bd) {
2591 if (eob > 1)
2592 aom_highbd_iwht4x4_16_add(input, dest, stride, bd);
2593 else
2594 aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
2595 }
2596
2597 #if CONFIG_CHROMA_2X2
highbd_inv_txfm_add_2x2(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2598 static void highbd_inv_txfm_add_2x2(const tran_low_t *input, uint8_t *dest,
2599 int stride, const TxfmParam *txfm_param) {
2600 int eob = txfm_param->eob;
2601 int bd = txfm_param->bd;
2602 int lossless = txfm_param->lossless;
2603 const TX_TYPE tx_type = txfm_param->tx_type;
2604 tran_high_t a1 = input[0] >> UNIT_QUANT_SHIFT;
2605 tran_high_t b1 = input[1] >> UNIT_QUANT_SHIFT;
2606 tran_high_t c1 = input[2] >> UNIT_QUANT_SHIFT;
2607 tran_high_t d1 = input[3] >> UNIT_QUANT_SHIFT;
2608
2609 tran_high_t a2 = a1 + c1;
2610 tran_high_t b2 = b1 + d1;
2611 tran_high_t c2 = a1 - c1;
2612 tran_high_t d2 = b1 - d1;
2613
2614 uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
2615
2616 (void)tx_type;
2617 (void)lossless;
2618 (void)eob;
2619
2620 a1 = (a2 + b2) >> 2;
2621 b1 = (a2 - b2) >> 2;
2622 c1 = (c2 + d2) >> 2;
2623 d1 = (c2 - d2) >> 2;
2624
2625 dst[0] = highbd_clip_pixel_add(dst[0], a1, bd);
2626 dst[1] = highbd_clip_pixel_add(dst[1], b1, bd);
2627 dst[stride] = highbd_clip_pixel_add(dst[stride], c1, bd);
2628 dst[stride + 1] = highbd_clip_pixel_add(dst[stride + 1], d1, bd);
2629 }
2630 #endif
2631
cast_to_int32(const tran_low_t * input)2632 static const int32_t *cast_to_int32(const tran_low_t *input) {
2633 assert(sizeof(int32_t) == sizeof(tran_low_t));
2634 return (const int32_t *)input;
2635 }
2636
av1_highbd_inv_txfm_add_4x4(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2637 void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
2638 int stride, const TxfmParam *txfm_param) {
2639 int eob = txfm_param->eob;
2640 int bd = txfm_param->bd;
2641 int lossless = txfm_param->lossless;
2642 const int32_t *src = cast_to_int32(input);
2643 const TX_TYPE tx_type = txfm_param->tx_type;
2644 if (lossless) {
2645 assert(tx_type == DCT_DCT);
2646 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd);
2647 return;
2648 }
2649 switch (tx_type) {
2650 case DCT_DCT:
2651 case ADST_DCT:
2652 case DCT_ADST:
2653 case ADST_ADST:
2654 av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2655 bd);
2656 break;
2657 #if CONFIG_EXT_TX
2658 case FLIPADST_DCT:
2659 case DCT_FLIPADST:
2660 case FLIPADST_FLIPADST:
2661 case ADST_FLIPADST:
2662 case FLIPADST_ADST:
2663 av1_inv_txfm2d_add_4x4(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2664 bd);
2665 break;
2666 // use the c version for anything including identity for now
2667 case V_DCT:
2668 case H_DCT:
2669 case V_ADST:
2670 case H_ADST:
2671 case V_FLIPADST:
2672 case H_FLIPADST:
2673 case IDTX:
2674 av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2675 bd);
2676 break;
2677 #endif // CONFIG_EXT_TX
2678 default: assert(0); break;
2679 }
2680 }
2681
av1_highbd_inv_txfm_add_4x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2682 void av1_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
2683 int stride, const TxfmParam *txfm_param) {
2684 const int32_t *src = cast_to_int32(input);
2685 av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2686 txfm_param->tx_type, txfm_param->bd);
2687 }
2688
av1_highbd_inv_txfm_add_8x4(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2689 void av1_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
2690 int stride, const TxfmParam *txfm_param) {
2691 const int32_t *src = cast_to_int32(input);
2692 av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2693 txfm_param->tx_type, txfm_param->bd);
2694 }
2695
highbd_inv_txfm_add_8x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2696 static void highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
2697 int stride, const TxfmParam *txfm_param) {
2698 const int32_t *src = cast_to_int32(input);
2699 av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2700 txfm_param->tx_type, txfm_param->bd);
2701 }
2702
highbd_inv_txfm_add_16x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2703 static void highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
2704 int stride, const TxfmParam *txfm_param) {
2705 const int32_t *src = cast_to_int32(input);
2706 av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2707 txfm_param->tx_type, txfm_param->bd);
2708 }
2709
highbd_inv_txfm_add_16x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2710 static void highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
2711 int stride, const TxfmParam *txfm_param) {
2712 const int32_t *src = cast_to_int32(input);
2713 av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2714 txfm_param->tx_type, txfm_param->bd);
2715 }
2716
highbd_inv_txfm_add_32x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2717 static void highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
2718 int stride, const TxfmParam *txfm_param) {
2719 const int32_t *src = cast_to_int32(input);
2720 av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2721 txfm_param->tx_type, txfm_param->bd);
2722 }
2723
2724 #if CONFIG_TX64X64
highbd_inv_txfm_add_32x64(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2725 static void highbd_inv_txfm_add_32x64(const tran_low_t *input, uint8_t *dest,
2726 int stride, const TxfmParam *txfm_param) {
2727 const int32_t *src = cast_to_int32(input);
2728 av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2729 txfm_param->tx_type, txfm_param->bd);
2730 }
2731
highbd_inv_txfm_add_64x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2732 static void highbd_inv_txfm_add_64x32(const tran_low_t *input, uint8_t *dest,
2733 int stride, const TxfmParam *txfm_param) {
2734 const int32_t *src = cast_to_int32(input);
2735 av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2736 txfm_param->tx_type, txfm_param->bd);
2737 }
2738 #endif // CONFIG_TX64X64
2739
highbd_inv_txfm_add_8x8(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2740 static void highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
2741 int stride, const TxfmParam *txfm_param) {
2742 int bd = txfm_param->bd;
2743 const TX_TYPE tx_type = txfm_param->tx_type;
2744 const int32_t *src = cast_to_int32(input);
2745 switch (tx_type) {
2746 case DCT_DCT:
2747 case ADST_DCT:
2748 case DCT_ADST:
2749 case ADST_ADST:
2750 av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2751 bd);
2752 break;
2753 #if CONFIG_EXT_TX
2754 case FLIPADST_DCT:
2755 case DCT_FLIPADST:
2756 case FLIPADST_FLIPADST:
2757 case ADST_FLIPADST:
2758 case FLIPADST_ADST:
2759 av1_inv_txfm2d_add_8x8(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2760 bd);
2761 break;
2762 // use the c version for anything including identity for now
2763 case V_DCT:
2764 case H_DCT:
2765 case V_ADST:
2766 case H_ADST:
2767 case V_FLIPADST:
2768 case H_FLIPADST:
2769 case IDTX:
2770 av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2771 bd);
2772 break;
2773 #endif // CONFIG_EXT_TX
2774 default: assert(0);
2775 }
2776 }
2777
highbd_inv_txfm_add_16x16(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2778 static void highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
2779 int stride, const TxfmParam *txfm_param) {
2780 int bd = txfm_param->bd;
2781 const TX_TYPE tx_type = txfm_param->tx_type;
2782 const int32_t *src = cast_to_int32(input);
2783 switch (tx_type) {
2784 case DCT_DCT:
2785 case ADST_DCT:
2786 case DCT_ADST:
2787 case ADST_ADST:
2788 av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2789 bd);
2790 break;
2791 #if CONFIG_EXT_TX
2792 case FLIPADST_DCT:
2793 case DCT_FLIPADST:
2794 case FLIPADST_FLIPADST:
2795 case ADST_FLIPADST:
2796 case FLIPADST_ADST:
2797 av1_inv_txfm2d_add_16x16(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2798 bd);
2799 break;
2800 // use the c version for anything including identity for now
2801 case V_DCT:
2802 case H_DCT:
2803 case V_ADST:
2804 case H_ADST:
2805 case V_FLIPADST:
2806 case H_FLIPADST:
2807 case IDTX:
2808 av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2809 tx_type, bd);
2810 break;
2811 #endif // CONFIG_EXT_TX
2812 default: assert(0);
2813 }
2814 }
2815
highbd_inv_txfm_add_32x32(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2816 static void highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
2817 int stride, const TxfmParam *txfm_param) {
2818 int bd = txfm_param->bd;
2819 const TX_TYPE tx_type = txfm_param->tx_type;
2820 const int32_t *src = cast_to_int32(input);
2821 switch (tx_type) {
2822 case DCT_DCT:
2823 av1_inv_txfm2d_add_32x32(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
2824 bd);
2825 break;
2826
2827 // The optimised version only supports DCT_DCT, so force use of
2828 // the C version for all other transform types.
2829 case ADST_DCT:
2830 case DCT_ADST:
2831 case ADST_ADST:
2832 #if CONFIG_EXT_TX
2833 case FLIPADST_DCT:
2834 case DCT_FLIPADST:
2835 case FLIPADST_FLIPADST:
2836 case ADST_FLIPADST:
2837 case FLIPADST_ADST:
2838 case IDTX:
2839 case V_DCT:
2840 case H_DCT:
2841 case V_ADST:
2842 case H_ADST:
2843 case V_FLIPADST:
2844 case H_FLIPADST:
2845 #endif // CONFIG_EXT_TX
2846 av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2847 tx_type, bd);
2848 break;
2849
2850 default: assert(0);
2851 }
2852 }
2853
2854 #if CONFIG_TX64X64
highbd_inv_txfm_add_64x64(const tran_low_t * input,uint8_t * dest,int stride,const TxfmParam * txfm_param)2855 static void highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
2856 int stride, const TxfmParam *txfm_param) {
2857 int bd = txfm_param->bd;
2858 const TX_TYPE tx_type = txfm_param->tx_type;
2859 const int32_t *src = cast_to_int32(input);
2860 switch (tx_type) {
2861 case DCT_DCT:
2862 av1_inv_txfm2d_add_64x64(src, CONVERT_TO_SHORTPTR(dest), stride, DCT_DCT,
2863 bd);
2864 break;
2865 #if CONFIG_EXT_TX
2866 case ADST_DCT:
2867 case DCT_ADST:
2868 case ADST_ADST:
2869 case FLIPADST_DCT:
2870 case DCT_FLIPADST:
2871 case FLIPADST_FLIPADST:
2872 case ADST_FLIPADST:
2873 case FLIPADST_ADST:
2874 case V_DCT:
2875 case H_DCT:
2876 case V_ADST:
2877 case H_ADST:
2878 case V_FLIPADST:
2879 case H_FLIPADST:
2880 // TODO(sarahparker)
2881 // I've deleted the 64x64 implementations that existed in lieu
2882 // of adst, flipadst and identity for simplicity but will bring back
2883 // in a later change. This shouldn't impact performance since
2884 // DCT_DCT is the only extended type currently allowed for 64x64,
2885 // as dictated by get_ext_tx_set_type in blockd.h.
2886 av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride,
2887 DCT_DCT, bd);
2888 break;
2889 case IDTX:
2890 highbd_inv_idtx_add_c(input, dest, stride, 64, 64, tx_type, bd);
2891 break;
2892 #endif // CONFIG_EXT_TX
2893 default: assert(0); break;
2894 }
2895 }
2896 #endif // CONFIG_TX64X64
2897
av1_inv_txfm_add(const tran_low_t * input,uint8_t * dest,int stride,TxfmParam * txfm_param)2898 void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
2899 TxfmParam *txfm_param) {
2900 const TX_SIZE tx_size = txfm_param->tx_size;
2901 #if CONFIG_LGT_FROM_PRED
2902 if (txfm_param->use_lgt) {
2903 assert(is_lgt_allowed(txfm_param->mode, tx_size));
2904 ilgt2d_from_pred_add(input, dest, stride, txfm_param);
2905 return;
2906 }
2907 #endif // CONFIG_LGT_FROM_PRED
2908 switch (tx_size) {
2909 #if CONFIG_TX64X64
2910 case TX_64X64: inv_txfm_add_64x64(input, dest, stride, txfm_param); break;
2911 #endif // CONFIG_TX64X64
2912 case TX_32X32: inv_txfm_add_32x32(input, dest, stride, txfm_param); break;
2913 case TX_16X16: inv_txfm_add_16x16(input, dest, stride, txfm_param); break;
2914 case TX_8X8: inv_txfm_add_8x8(input, dest, stride, txfm_param); break;
2915 case TX_4X8: inv_txfm_add_4x8(input, dest, stride, txfm_param); break;
2916 case TX_8X4: inv_txfm_add_8x4(input, dest, stride, txfm_param); break;
2917 case TX_8X16: inv_txfm_add_8x16(input, dest, stride, txfm_param); break;
2918 case TX_16X8: inv_txfm_add_16x8(input, dest, stride, txfm_param); break;
2919 case TX_16X32: inv_txfm_add_16x32(input, dest, stride, txfm_param); break;
2920 case TX_32X16: inv_txfm_add_32x16(input, dest, stride, txfm_param); break;
2921 #if CONFIG_TX64X64
2922 case TX_64X32: inv_txfm_add_64x32(input, dest, stride, txfm_param); break;
2923 case TX_32X64: inv_txfm_add_32x64(input, dest, stride, txfm_param); break;
2924 #endif // CONFIG_TX64X64
2925 case TX_4X4:
2926 // this is like av1_short_idct4x4 but has a special case around eob<=1
2927 // which is significant (not just an optimization) for the lossless
2928 // case.
2929 inv_txfm_add_4x4(input, dest, stride, txfm_param);
2930 break;
2931 #if CONFIG_CHROMA_2X2
2932 case TX_2X2: inv_txfm_add_2x2(input, dest, stride, txfm_param); break;
2933 #endif
2934 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
2935 case TX_32X8: inv_txfm_add_32x8(input, dest, stride, txfm_param); break;
2936 case TX_8X32: inv_txfm_add_8x32(input, dest, stride, txfm_param); break;
2937 case TX_16X4: inv_txfm_add_16x4(input, dest, stride, txfm_param); break;
2938 case TX_4X16: inv_txfm_add_4x16(input, dest, stride, txfm_param); break;
2939 #endif
2940 default: assert(0 && "Invalid transform size"); break;
2941 }
2942 }
2943
init_txfm_param(const MACROBLOCKD * xd,TX_SIZE tx_size,TX_TYPE tx_type,int eob,TxfmParam * txfm_param)2944 static void init_txfm_param(const MACROBLOCKD *xd, TX_SIZE tx_size,
2945 TX_TYPE tx_type, int eob, TxfmParam *txfm_param) {
2946 txfm_param->tx_type = tx_type;
2947 txfm_param->tx_size = tx_size;
2948 txfm_param->eob = eob;
2949 txfm_param->lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
2950 txfm_param->bd = xd->bd;
2951 #if CONFIG_LGT
2952 txfm_param->is_inter = is_inter_block(&xd->mi[0]->mbmi);
2953 #endif
2954 #if CONFIG_LGT_FROM_PRED
2955 txfm_param->use_lgt = xd->mi[0]->mbmi.use_lgt;
2956 #endif
2957 #if CONFIG_ADAPT_SCAN
2958 txfm_param->eob_threshold =
2959 (const int16_t *)&xd->eob_threshold_md[tx_size][tx_type][0];
2960 #endif
2961 }
2962
2963 #if !CONFIG_TXMG
2964 typedef void (*InvTxfmFunc)(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
2965 TxfmParam *txfm_param);
2966
2967 static InvTxfmFunc inv_txfm_func[2] = { av1_inv_txfm_add,
2968 av1_highbd_inv_txfm_add };
2969 #endif
2970
av1_inverse_transform_block(const MACROBLOCKD * xd,const tran_low_t * dqcoeff,PREDICTION_MODE mode,uint8_t * mrc_mask,TX_TYPE tx_type,TX_SIZE tx_size,uint8_t * dst,int stride,int eob)2971 void av1_inverse_transform_block(const MACROBLOCKD *xd,
2972 const tran_low_t *dqcoeff,
2973 #if CONFIG_LGT_FROM_PRED
2974 PREDICTION_MODE mode,
2975 #endif
2976 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
2977 uint8_t *mrc_mask,
2978 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
2979 TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
2980 int stride, int eob) {
2981 if (!eob) return;
2982 #if CONFIG_PVQ
2983 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
2984 const int txb_width = block_size_wide[tx_bsize];
2985 const int txb_height = block_size_high[tx_bsize];
2986 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2987 for (int r = 0; r < txb_height; r++)
2988 for (int c = 0; c < txb_width; c++)
2989 CONVERT_TO_SHORTPTR(dst)[r * stride + c] = 0;
2990 } else {
2991 for (int r = 0; r < txb_height; r++)
2992 for (int c = 0; c < txb_width; c++) dst[r * stride + c] = 0;
2993 }
2994 #endif // CONFIG_PVQ
2995 TxfmParam txfm_param;
2996 init_txfm_param(xd, tx_size, tx_type, eob, &txfm_param);
2997 #if CONFIG_LGT || CONFIG_MRC_TX
2998 txfm_param.is_inter = is_inter_block(&xd->mi[0]->mbmi);
2999 #endif // CONFIG_LGT || CONFIG_MRC_TX
3000 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3001 txfm_param.mask = mrc_mask;
3002 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3003 #if CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
3004 txfm_param.dst = dst;
3005 txfm_param.stride = stride;
3006 #if CONFIG_LGT_FROM_PRED
3007 txfm_param.mode = mode;
3008 #endif // CONFIG_LGT_FROM_PRED
3009 #endif // CONFIG_LGT_FROM_PRED || CONFIG_MRC_TX
3010
3011 const int is_hbd = get_bitdepth_data_path_index(xd);
3012 #if CONFIG_TXMG
3013 if (is_hbd) {
3014 av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
3015 } else {
3016 DECLARE_ALIGNED(16, uint16_t, tmp[MAX_TX_SQUARE]);
3017 int tmp_stride = MAX_TX_SIZE;
3018 int w = tx_size_wide[tx_size];
3019 int h = tx_size_high[tx_size];
3020 for (int r = 0; r < h; ++r) {
3021 for (int c = 0; c < w; ++c) {
3022 tmp[r * tmp_stride + c] = dst[r * stride + c];
3023 }
3024 }
3025
3026 av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
3027 &txfm_param);
3028
3029 for (int r = 0; r < h; ++r) {
3030 for (int c = 0; c < w; ++c) {
3031 dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c];
3032 }
3033 }
3034 }
3035 #else // CONFIG_TXMG
3036 inv_txfm_func[is_hbd](dqcoeff, dst, stride, &txfm_param);
3037 #endif // CONFIG_TXMG
3038 }
3039
av1_inverse_transform_block_facade(MACROBLOCKD * xd,int plane,int block,int blk_row,int blk_col,int eob)3040 void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
3041 int blk_row, int blk_col, int eob) {
3042 struct macroblockd_plane *const pd = &xd->plane[plane];
3043 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
3044 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3045 uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
3046 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3047 const PLANE_TYPE plane_type = get_plane_type(plane);
3048 const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
3049 const TX_TYPE tx_type =
3050 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
3051 const int dst_stride = pd->dst.stride;
3052 uint8_t *dst =
3053 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
3054 av1_inverse_transform_block(xd, dqcoeff,
3055 #if CONFIG_LGT_FROM_PRED
3056 xd->mi[0]->mbmi.mode,
3057 #endif // CONFIG_LGT_FROM_PRED
3058 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3059 mrc_mask,
3060 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3061 tx_type, tx_size, dst, dst_stride, eob);
3062 }
3063
av1_highbd_inv_txfm_add(const tran_low_t * input,uint8_t * dest,int stride,TxfmParam * txfm_param)3064 void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
3065 TxfmParam *txfm_param) {
3066 const TX_SIZE tx_size = txfm_param->tx_size;
3067 switch (tx_size) {
3068 #if CONFIG_TX64X64
3069 case TX_64X64:
3070 highbd_inv_txfm_add_64x64(input, dest, stride, txfm_param);
3071 break;
3072 #endif // CONFIG_TX64X64
3073 case TX_32X32:
3074 highbd_inv_txfm_add_32x32(input, dest, stride, txfm_param);
3075 break;
3076 case TX_16X16:
3077 highbd_inv_txfm_add_16x16(input, dest, stride, txfm_param);
3078 break;
3079 case TX_8X8:
3080 highbd_inv_txfm_add_8x8(input, dest, stride, txfm_param);
3081 break;
3082 case TX_4X8:
3083 av1_highbd_inv_txfm_add_4x8(input, dest, stride, txfm_param);
3084 break;
3085 case TX_8X4:
3086 av1_highbd_inv_txfm_add_8x4(input, dest, stride, txfm_param);
3087 break;
3088 case TX_8X16:
3089 highbd_inv_txfm_add_8x16(input, dest, stride, txfm_param);
3090 break;
3091 case TX_16X8:
3092 highbd_inv_txfm_add_16x8(input, dest, stride, txfm_param);
3093 break;
3094 case TX_16X32:
3095 highbd_inv_txfm_add_16x32(input, dest, stride, txfm_param);
3096 break;
3097 case TX_32X16:
3098 highbd_inv_txfm_add_32x16(input, dest, stride, txfm_param);
3099 break;
3100 #if CONFIG_TX64X64
3101 case TX_64X32:
3102 highbd_inv_txfm_add_64x32(input, dest, stride, txfm_param);
3103 break;
3104 case TX_32X64:
3105 highbd_inv_txfm_add_32x64(input, dest, stride, txfm_param);
3106 break;
3107 #endif // CONFIG_TX64X64
3108 case TX_4X4:
3109 // this is like av1_short_idct4x4 but has a special case around eob<=1
3110 // which is significant (not just an optimization) for the lossless
3111 // case.
3112 av1_highbd_inv_txfm_add_4x4(input, dest, stride, txfm_param);
3113 break;
3114 #if CONFIG_CHROMA_2X2
3115 case TX_2X2:
3116 highbd_inv_txfm_add_2x2(input, dest, stride, txfm_param);
3117 break;
3118 #endif
3119 default: assert(0 && "Invalid transform size"); break;
3120 }
3121 }
3122