1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13
14 #include "./av1_rtcd.h"
15 #include "aom_dsp/txfm_common.h"
16 #include "av1/common/enums.h"
17 #include "av1/common/av1_fwd_txfm1d.h"
18 #include "av1/common/av1_fwd_txfm1d_cfg.h"
19 #include "av1/common/av1_txfm.h"
20
fwd_txfm_type_to_func(TXFM_TYPE txfm_type)21 static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
22 switch (txfm_type) {
23 case TXFM_TYPE_DCT4: return av1_fdct4_new;
24 case TXFM_TYPE_DCT8: return av1_fdct8_new;
25 case TXFM_TYPE_DCT16: return av1_fdct16_new;
26 case TXFM_TYPE_DCT32: return av1_fdct32_new;
27 #if CONFIG_TX64X64
28 case TXFM_TYPE_DCT64: return av1_fdct64_new;
29 #endif // CONFIG_TX64X64
30 case TXFM_TYPE_ADST4: return av1_fadst4_new;
31 case TXFM_TYPE_ADST8: return av1_fadst8_new;
32 case TXFM_TYPE_ADST16: return av1_fadst16_new;
33 case TXFM_TYPE_ADST32: return av1_fadst32_new;
34 #if CONFIG_EXT_TX
35 case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
36 case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
37 case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
38 case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
39 #if CONFIG_TX64X64
40 case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
41 #endif // CONFIG_TX64X64
42 #endif // CONFIG_EXT_TX
43 default: assert(0); return NULL;
44 }
45 }
46
av1_gen_fwd_stage_range(int8_t * stage_range_col,int8_t * stage_range_row,const TXFM_2D_FLIP_CFG * cfg,int bd)47 void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
48 const TXFM_2D_FLIP_CFG *cfg, int bd) {
49 // Note when assigning txfm_size_col, we use the txfm_size from the
50 // row configuration and vice versa. This is intentionally done to
51 // accurately perform rectangular transforms. When the transform is
52 // rectangular, the number of columns will be the same as the
53 // txfm_size stored in the row cfg struct. It will make no difference
54 // for square transforms.
55 const int txfm_size_col = cfg->row_cfg->txfm_size;
56 const int txfm_size_row = cfg->col_cfg->txfm_size;
57 // Take the shift from the larger dimension in the rectangular case.
58 const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
59 : cfg->col_cfg->shift;
60 // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
61 for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
62 stage_range_col[i] = cfg->col_cfg->stage_range[i] + shift[0] + bd + 1;
63 }
64
65 // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
66 for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
67 stage_range_row[i] =
68 cfg->row_cfg->stage_range[i] + shift[0] + shift[1] + bd + 1;
69 }
70 }
71
fwd_txfm2d_c(const int16_t * input,int32_t * output,const int stride,const TXFM_2D_FLIP_CFG * cfg,int32_t * buf,int bd)72 static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
73 const int stride, const TXFM_2D_FLIP_CFG *cfg,
74 int32_t *buf, int bd) {
75 int c, r;
76 // Note when assigning txfm_size_col, we use the txfm_size from the
77 // row configuration and vice versa. This is intentionally done to
78 // accurately perform rectangular transforms. When the transform is
79 // rectangular, the number of columns will be the same as the
80 // txfm_size stored in the row cfg struct. It will make no difference
81 // for square transforms.
82 const int txfm_size_col = cfg->row_cfg->txfm_size;
83 const int txfm_size_row = cfg->col_cfg->txfm_size;
84 // Take the shift from the larger dimension in the rectangular case.
85 const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
86 : cfg->col_cfg->shift;
87 int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
88 int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
89 assert(cfg->col_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
90 assert(cfg->row_cfg->stage_num <= MAX_TXFM_STAGE_NUM);
91 av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd);
92
93 const int8_t *cos_bit_col = cfg->col_cfg->cos_bit;
94 const int8_t *cos_bit_row = cfg->row_cfg->cos_bit;
95 const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->col_cfg->txfm_type);
96 const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->row_cfg->txfm_type);
97
98 // use output buffer as temp buffer
99 int32_t *temp_in = output;
100 int32_t *temp_out = output + txfm_size_row;
101
102 // Columns
103 for (c = 0; c < txfm_size_col; ++c) {
104 if (cfg->ud_flip == 0) {
105 for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * stride + c];
106 } else {
107 for (r = 0; r < txfm_size_row; ++r)
108 // flip upside down
109 temp_in[r] = input[(txfm_size_row - r - 1) * stride + c];
110 }
111 round_shift_array(temp_in, txfm_size_row, -shift[0]);
112 // Multiply everything by Sqrt2 on the larger dimension if the
113 // transform is rectangular
114 if (txfm_size_col > txfm_size_row) {
115 for (r = 0; r < txfm_size_row; ++r)
116 temp_in[r] = (int32_t)fdct_round_shift(temp_in[r] * Sqrt2);
117 }
118 txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
119 round_shift_array(temp_out, txfm_size_row, -shift[1]);
120 if (cfg->lr_flip == 0) {
121 for (r = 0; r < txfm_size_row; ++r)
122 buf[r * txfm_size_col + c] = temp_out[r];
123 } else {
124 for (r = 0; r < txfm_size_row; ++r)
125 // flip from left to right
126 buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r];
127 }
128 }
129
130 // Rows
131 for (r = 0; r < txfm_size_row; ++r) {
132 // Multiply everything by Sqrt2 on the larger dimension if the
133 // transform is rectangular
134 if (txfm_size_row > txfm_size_col) {
135 for (c = 0; c < txfm_size_col; ++c)
136 buf[r * txfm_size_col + c] =
137 (int32_t)fdct_round_shift(buf[r * txfm_size_col + c] * Sqrt2);
138 }
139 txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
140 cos_bit_row, stage_range_row);
141 round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]);
142 }
143 }
144
av1_fwd_txfm2d_4x8_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)145 void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
146 TX_TYPE tx_type, int bd) {
147 #if CONFIG_TXMG
148 int32_t txfm_buf[4 * 8];
149 int16_t rinput[4 * 8];
150 TX_SIZE tx_size = TX_4X8;
151 TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
152 TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
153 int w = tx_size_wide[tx_size];
154 int h = tx_size_high[tx_size];
155 int rw = h;
156 int rh = w;
157 transpose_int16(rinput, rw, input, stride, w, h);
158 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
159 fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
160 transpose_int32(output, w, txfm_buf, rw, rw, rh);
161 #else
162 int32_t txfm_buf[4 * 8];
163 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X8);
164 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
165 #endif
166 }
167
av1_fwd_txfm2d_8x4_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)168 void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
169 TX_TYPE tx_type, int bd) {
170 int32_t txfm_buf[8 * 4];
171 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X4);
172 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
173 }
174
av1_fwd_txfm2d_8x16_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)175 void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
176 TX_TYPE tx_type, int bd) {
177 #if CONFIG_TXMG
178 int32_t txfm_buf[8 * 16];
179 int16_t rinput[8 * 16];
180 TX_SIZE tx_size = TX_8X16;
181 TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
182 TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
183 int w = tx_size_wide[tx_size];
184 int h = tx_size_high[tx_size];
185 int rw = h;
186 int rh = w;
187 transpose_int16(rinput, rw, input, stride, w, h);
188 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
189 fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
190 transpose_int32(output, w, txfm_buf, rw, rw, rh);
191 #else
192 int32_t txfm_buf[8 * 16];
193 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X16);
194 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
195 #endif
196 }
197
av1_fwd_txfm2d_16x8_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)198 void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
199 TX_TYPE tx_type, int bd) {
200 int32_t txfm_buf[16 * 8];
201 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X8);
202 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
203 }
204
av1_fwd_txfm2d_16x32_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)205 void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
206 TX_TYPE tx_type, int bd) {
207 #if CONFIG_TXMG
208 int32_t txfm_buf[16 * 32];
209 int16_t rinput[16 * 32];
210 TX_SIZE tx_size = TX_16X32;
211 TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
212 TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
213 int w = tx_size_wide[tx_size];
214 int h = tx_size_high[tx_size];
215 int rw = h;
216 int rh = w;
217 transpose_int16(rinput, rw, input, stride, w, h);
218 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
219 fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
220 transpose_int32(output, w, txfm_buf, rw, rw, rh);
221 #else
222 int32_t txfm_buf[16 * 32];
223 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X32);
224 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
225 #endif
226 }
227
av1_fwd_txfm2d_32x16_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)228 void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
229 TX_TYPE tx_type, int bd) {
230 int32_t txfm_buf[32 * 16];
231 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X16);
232 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
233 }
234
av1_fwd_txfm2d_4x4_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)235 void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
236 TX_TYPE tx_type, int bd) {
237 int32_t txfm_buf[4 * 4];
238 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X4);
239 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
240 }
241
av1_fwd_txfm2d_8x8_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)242 void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
243 TX_TYPE tx_type, int bd) {
244 int32_t txfm_buf[8 * 8];
245 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X8);
246 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
247 }
248
av1_fwd_txfm2d_16x16_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)249 void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
250 TX_TYPE tx_type, int bd) {
251 int32_t txfm_buf[16 * 16];
252 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X16);
253 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
254 }
255
av1_fwd_txfm2d_32x32_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)256 void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
257 TX_TYPE tx_type, int bd) {
258 int32_t txfm_buf[32 * 32];
259 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_32X32);
260 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
261 }
262
263 #if CONFIG_TX64X64
av1_fwd_txfm2d_64x64_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)264 void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
265 TX_TYPE tx_type, int bd) {
266 int32_t txfm_buf[64 * 64];
267 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x64_cfg(tx_type);
268 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
269 }
270
av1_fwd_txfm2d_32x64_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)271 void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
272 TX_TYPE tx_type, int bd) {
273 int32_t txfm_buf[32 * 64];
274 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_32x64_cfg(tx_type);
275 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
276 }
277
av1_fwd_txfm2d_64x32_c(const int16_t * input,int32_t * output,int stride,TX_TYPE tx_type,int bd)278 void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
279 TX_TYPE tx_type, int bd) {
280 int32_t txfm_buf[64 * 32];
281 TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x32_cfg(tx_type);
282 fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
283 }
284 #endif // CONFIG_TX64X64
285
286 static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
287 // DCT
288 {
289 #if CONFIG_CHROMA_2X2
290 NULL,
291 #endif
292 &fwd_txfm_1d_col_cfg_dct_4, &fwd_txfm_1d_col_cfg_dct_8,
293 &fwd_txfm_1d_col_cfg_dct_16, &fwd_txfm_1d_col_cfg_dct_32 },
294 // ADST
295 {
296 #if CONFIG_CHROMA_2X2
297 NULL,
298 #endif
299 &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
300 &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
301 #if CONFIG_EXT_TX
302 // FLIPADST
303 {
304 #if CONFIG_CHROMA_2X2
305 NULL,
306 #endif
307 &fwd_txfm_1d_col_cfg_adst_4, &fwd_txfm_1d_col_cfg_adst_8,
308 &fwd_txfm_1d_col_cfg_adst_16, &fwd_txfm_1d_col_cfg_adst_32 },
309 // IDENTITY
310 {
311 #if CONFIG_CHROMA_2X2
312 NULL,
313 #endif
314 &fwd_txfm_1d_cfg_identity_4, &fwd_txfm_1d_cfg_identity_8,
315 &fwd_txfm_1d_cfg_identity_16, &fwd_txfm_1d_cfg_identity_32 },
316 #endif // CONFIG_EXT_TX
317 };
318
319 static const TXFM_1D_CFG *fwd_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
320 // DCT
321 {
322 #if CONFIG_CHROMA_2X2
323 NULL,
324 #endif
325 &fwd_txfm_1d_row_cfg_dct_4, &fwd_txfm_1d_row_cfg_dct_8,
326 &fwd_txfm_1d_row_cfg_dct_16, &fwd_txfm_1d_row_cfg_dct_32 },
327 // ADST
328 {
329 #if CONFIG_CHROMA_2X2
330 NULL,
331 #endif
332 &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
333 &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
334 #if CONFIG_EXT_TX
335 // FLIPADST
336 {
337 #if CONFIG_CHROMA_2X2
338 NULL,
339 #endif
340 &fwd_txfm_1d_row_cfg_adst_4, &fwd_txfm_1d_row_cfg_adst_8,
341 &fwd_txfm_1d_row_cfg_adst_16, &fwd_txfm_1d_row_cfg_adst_32 },
342 // IDENTITY
343 {
344 #if CONFIG_CHROMA_2X2
345 NULL,
346 #endif
347 &fwd_txfm_1d_cfg_identity_4, &fwd_txfm_1d_cfg_identity_8,
348 &fwd_txfm_1d_cfg_identity_16, &fwd_txfm_1d_cfg_identity_32 },
349 #endif // CONFIG_EXT_TX
350 };
351
av1_get_fwd_txfm_cfg(TX_TYPE tx_type,TX_SIZE tx_size)352 TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size) {
353 TXFM_2D_FLIP_CFG cfg;
354 set_flip_cfg(tx_type, &cfg);
355 const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
356 const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
357 const TX_SIZE tx_size_col = txsize_vert_map[tx_size];
358 const TX_SIZE tx_size_row = txsize_horz_map[tx_size];
359 cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
360 cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
361 return cfg;
362 }
363
364 #if CONFIG_TX64X64
av1_get_fwd_txfm_32x64_cfg(TX_TYPE tx_type)365 TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(TX_TYPE tx_type) {
366 TXFM_2D_FLIP_CFG cfg;
367 const TX_TYPE_1D tx_type_row = htx_tab[tx_type];
368 const TX_SIZE tx_size_row = txsize_horz_map[TX_32X64];
369 switch (tx_type) {
370 case DCT_DCT:
371 cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
372 cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
373 cfg.ud_flip = 0;
374 cfg.lr_flip = 0;
375 break;
376 default: assert(0);
377 }
378 return cfg;
379 }
380
av1_get_fwd_txfm_64x32_cfg(TX_TYPE tx_type)381 TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(TX_TYPE tx_type) {
382 TXFM_2D_FLIP_CFG cfg;
383 const TX_TYPE_1D tx_type_col = vtx_tab[tx_type];
384 const TX_SIZE tx_size_col = txsize_vert_map[TX_64X32];
385 switch (tx_type) {
386 case DCT_DCT:
387 cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
388 cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
389 cfg.ud_flip = 0;
390 cfg.lr_flip = 0;
391 break;
392 default: assert(0);
393 }
394 return cfg;
395 }
396
av1_get_fwd_txfm_64x64_cfg(TX_TYPE tx_type)397 TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(TX_TYPE tx_type) {
398 TXFM_2D_FLIP_CFG cfg;
399 switch (tx_type) {
400 case DCT_DCT:
401 cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
402 cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
403 cfg.ud_flip = 0;
404 cfg.lr_flip = 0;
405 break;
406 default:
407 cfg.ud_flip = 0;
408 cfg.lr_flip = 0;
409 assert(0);
410 }
411 return cfg;
412 }
413 #endif // CONFIG_TX64X64
414