1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 
13 #include "./vp9_rtcd.h"
14 #include "./vpx_dsp_rtcd.h"
15 #include "vp9/common/vp9_blockd.h"
16 #include "vp9/common/vp9_idct.h"
17 #include "vpx_dsp/inv_txfm.h"
18 #include "vpx_ports/mem.h"
19 
vp9_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21                          int tx_type) {
22   const transform_2d IHT_4[] = {
23     { idct4_c, idct4_c },   // DCT_DCT  = 0
24     { iadst4_c, idct4_c },  // ADST_DCT = 1
25     { idct4_c, iadst4_c },  // DCT_ADST = 2
26     { iadst4_c, iadst4_c }  // ADST_ADST = 3
27   };
28 
29   int i, j;
30   tran_low_t out[4 * 4];
31   tran_low_t *outptr = out;
32   tran_low_t temp_in[4], temp_out[4];
33 
34   // inverse transform row vectors
35   for (i = 0; i < 4; ++i) {
36     IHT_4[tx_type].rows(input, outptr);
37     input += 4;
38     outptr += 4;
39   }
40 
41   // inverse transform column vectors
42   for (i = 0; i < 4; ++i) {
43     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
44     IHT_4[tx_type].cols(temp_in, temp_out);
45     for (j = 0; j < 4; ++j) {
46       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
47                                             ROUND_POWER_OF_TWO(temp_out[j], 4));
48     }
49   }
50 }
51 
52 static const transform_2d IHT_8[] = {
53   { idct8_c, idct8_c },   // DCT_DCT  = 0
54   { iadst8_c, idct8_c },  // ADST_DCT = 1
55   { idct8_c, iadst8_c },  // DCT_ADST = 2
56   { iadst8_c, iadst8_c }  // ADST_ADST = 3
57 };
58 
vp9_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)59 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
60                          int tx_type) {
61   int i, j;
62   tran_low_t out[8 * 8];
63   tran_low_t *outptr = out;
64   tran_low_t temp_in[8], temp_out[8];
65   const transform_2d ht = IHT_8[tx_type];
66 
67   // inverse transform row vectors
68   for (i = 0; i < 8; ++i) {
69     ht.rows(input, outptr);
70     input += 8;
71     outptr += 8;
72   }
73 
74   // inverse transform column vectors
75   for (i = 0; i < 8; ++i) {
76     for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
77     ht.cols(temp_in, temp_out);
78     for (j = 0; j < 8; ++j) {
79       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
80                                             ROUND_POWER_OF_TWO(temp_out[j], 5));
81     }
82   }
83 }
84 
85 static const transform_2d IHT_16[] = {
86   { idct16_c, idct16_c },   // DCT_DCT  = 0
87   { iadst16_c, idct16_c },  // ADST_DCT = 1
88   { idct16_c, iadst16_c },  // DCT_ADST = 2
89   { iadst16_c, iadst16_c }  // ADST_ADST = 3
90 };
91 
vp9_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)92 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
93                             int tx_type) {
94   int i, j;
95   tran_low_t out[16 * 16];
96   tran_low_t *outptr = out;
97   tran_low_t temp_in[16], temp_out[16];
98   const transform_2d ht = IHT_16[tx_type];
99 
100   // Rows
101   for (i = 0; i < 16; ++i) {
102     ht.rows(input, outptr);
103     input += 16;
104     outptr += 16;
105   }
106 
107   // Columns
108   for (i = 0; i < 16; ++i) {
109     for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
110     ht.cols(temp_in, temp_out);
111     for (j = 0; j < 16; ++j) {
112       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
113                                             ROUND_POWER_OF_TWO(temp_out[j], 6));
114     }
115   }
116 }
117 
118 // idct
vp9_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)119 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
120                      int eob) {
121   if (eob > 1)
122     vpx_idct4x4_16_add(input, dest, stride);
123   else
124     vpx_idct4x4_1_add(input, dest, stride);
125 }
126 
vp9_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)127 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
128                      int eob) {
129   if (eob > 1)
130     vpx_iwht4x4_16_add(input, dest, stride);
131   else
132     vpx_iwht4x4_1_add(input, dest, stride);
133 }
134 
vp9_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)135 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
136                      int eob) {
137   // If dc is 1, then input[0] is the reconstructed value, do not need
138   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
139 
140   // The calculation can be simplified if there are not many non-zero dct
141   // coefficients. Use eobs to decide what to do.
142   if (eob == 1)
143     // DC only DCT coefficient
144     vpx_idct8x8_1_add(input, dest, stride);
145   else if (eob <= 12)
146     vpx_idct8x8_12_add(input, dest, stride);
147   else
148     vpx_idct8x8_64_add(input, dest, stride);
149 }
150 
vp9_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)151 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
152                        int eob) {
153   /* The calculation can be simplified if there are not many non-zero dct
154    * coefficients. Use eobs to separate different cases. */
155   if (eob == 1) /* DC only DCT coefficient. */
156     vpx_idct16x16_1_add(input, dest, stride);
157   else if (eob <= 10)
158     vpx_idct16x16_10_add(input, dest, stride);
159   else if (eob <= 38)
160     vpx_idct16x16_38_add(input, dest, stride);
161   else
162     vpx_idct16x16_256_add(input, dest, stride);
163 }
164 
vp9_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)165 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
166                        int eob) {
167   if (eob == 1)
168     vpx_idct32x32_1_add(input, dest, stride);
169   else if (eob <= 34)
170     // non-zero coeff only in upper-left 8x8
171     vpx_idct32x32_34_add(input, dest, stride);
172   else if (eob <= 135)
173     // non-zero coeff only in upper-left 16x16
174     vpx_idct32x32_135_add(input, dest, stride);
175   else
176     vpx_idct32x32_1024_add(input, dest, stride);
177 }
178 
179 // iht
vp9_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)180 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
181                     int stride, int eob) {
182   if (tx_type == DCT_DCT)
183     vp9_idct4x4_add(input, dest, stride, eob);
184   else
185     vp9_iht4x4_16_add(input, dest, stride, tx_type);
186 }
187 
vp9_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)188 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
189                     int stride, int eob) {
190   if (tx_type == DCT_DCT) {
191     vp9_idct8x8_add(input, dest, stride, eob);
192   } else {
193     vp9_iht8x8_64_add(input, dest, stride, tx_type);
194   }
195 }
196 
vp9_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)197 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
198                       int stride, int eob) {
199   if (tx_type == DCT_DCT) {
200     vp9_idct16x16_add(input, dest, stride, eob);
201   } else {
202     vp9_iht16x16_256_add(input, dest, stride, tx_type);
203   }
204 }
205 
206 #if CONFIG_VP9_HIGHBITDEPTH
207 
vp9_highbd_iht4x4_16_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)208 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
209                                 int stride, int tx_type, int bd) {
210   const highbd_transform_2d IHT_4[] = {
211     { vpx_highbd_idct4_c, vpx_highbd_idct4_c },   // DCT_DCT  = 0
212     { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },  // ADST_DCT = 1
213     { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },  // DCT_ADST = 2
214     { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }  // ADST_ADST = 3
215   };
216 
217   int i, j;
218   tran_low_t out[4 * 4];
219   tran_low_t *outptr = out;
220   tran_low_t temp_in[4], temp_out[4];
221 
222   // Inverse transform row vectors.
223   for (i = 0; i < 4; ++i) {
224     IHT_4[tx_type].rows(input, outptr, bd);
225     input += 4;
226     outptr += 4;
227   }
228 
229   // Inverse transform column vectors.
230   for (i = 0; i < 4; ++i) {
231     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
232     IHT_4[tx_type].cols(temp_in, temp_out, bd);
233     for (j = 0; j < 4; ++j) {
234       dest[j * stride + i] = highbd_clip_pixel_add(
235           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
236     }
237   }
238 }
239 
240 static const highbd_transform_2d HIGH_IHT_8[] = {
241   { vpx_highbd_idct8_c, vpx_highbd_idct8_c },   // DCT_DCT  = 0
242   { vpx_highbd_iadst8_c, vpx_highbd_idct8_c },  // ADST_DCT = 1
243   { vpx_highbd_idct8_c, vpx_highbd_iadst8_c },  // DCT_ADST = 2
244   { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }  // ADST_ADST = 3
245 };
246 
vp9_highbd_iht8x8_64_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)247 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
248                                 int stride, int tx_type, int bd) {
249   int i, j;
250   tran_low_t out[8 * 8];
251   tran_low_t *outptr = out;
252   tran_low_t temp_in[8], temp_out[8];
253   const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
254 
255   // Inverse transform row vectors.
256   for (i = 0; i < 8; ++i) {
257     ht.rows(input, outptr, bd);
258     input += 8;
259     outptr += 8;
260   }
261 
262   // Inverse transform column vectors.
263   for (i = 0; i < 8; ++i) {
264     for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
265     ht.cols(temp_in, temp_out, bd);
266     for (j = 0; j < 8; ++j) {
267       dest[j * stride + i] = highbd_clip_pixel_add(
268           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
269     }
270   }
271 }
272 
273 static const highbd_transform_2d HIGH_IHT_16[] = {
274   { vpx_highbd_idct16_c, vpx_highbd_idct16_c },   // DCT_DCT  = 0
275   { vpx_highbd_iadst16_c, vpx_highbd_idct16_c },  // ADST_DCT = 1
276   { vpx_highbd_idct16_c, vpx_highbd_iadst16_c },  // DCT_ADST = 2
277   { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }  // ADST_ADST = 3
278 };
279 
vp9_highbd_iht16x16_256_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)280 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
281                                    int stride, int tx_type, int bd) {
282   int i, j;
283   tran_low_t out[16 * 16];
284   tran_low_t *outptr = out;
285   tran_low_t temp_in[16], temp_out[16];
286   const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
287 
288   // Rows
289   for (i = 0; i < 16; ++i) {
290     ht.rows(input, outptr, bd);
291     input += 16;
292     outptr += 16;
293   }
294 
295   // Columns
296   for (i = 0; i < 16; ++i) {
297     for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
298     ht.cols(temp_in, temp_out, bd);
299     for (j = 0; j < 16; ++j) {
300       dest[j * stride + i] = highbd_clip_pixel_add(
301           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
302     }
303   }
304 }
305 
306 // idct
vp9_highbd_idct4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)307 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
308                             int eob, int bd) {
309   if (eob > 1)
310     vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
311   else
312     vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
313 }
314 
vp9_highbd_iwht4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)315 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
316                             int eob, int bd) {
317   if (eob > 1)
318     vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
319   else
320     vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
321 }
322 
vp9_highbd_idct8x8_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)323 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
324                             int eob, int bd) {
325   // If dc is 1, then input[0] is the reconstructed value, do not need
326   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
327 
328   // The calculation can be simplified if there are not many non-zero dct
329   // coefficients. Use eobs to decide what to do.
330   // DC only DCT coefficient
331   if (eob == 1) {
332     vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
333   } else if (eob <= 12) {
334     vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
335   } else {
336     vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
337   }
338 }
339 
vp9_highbd_idct16x16_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)340 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
341                               int stride, int eob, int bd) {
342   // The calculation can be simplified if there are not many non-zero dct
343   // coefficients. Use eobs to separate different cases.
344   // DC only DCT coefficient.
345   if (eob == 1) {
346     vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
347   } else if (eob <= 10) {
348     vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
349   } else if (eob <= 38) {
350     vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
351   } else {
352     vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
353   }
354 }
355 
vp9_highbd_idct32x32_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)356 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
357                               int stride, int eob, int bd) {
358   // Non-zero coeff only in upper-left 8x8
359   if (eob == 1) {
360     vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
361   } else if (eob <= 34) {
362     vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
363   } else if (eob <= 135) {
364     vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
365   } else {
366     vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
367   }
368 }
369 
370 // iht
vp9_highbd_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)371 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
372                            uint16_t *dest, int stride, int eob, int bd) {
373   if (tx_type == DCT_DCT)
374     vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
375   else
376     vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
377 }
378 
vp9_highbd_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)379 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
380                            uint16_t *dest, int stride, int eob, int bd) {
381   if (tx_type == DCT_DCT) {
382     vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
383   } else {
384     vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
385   }
386 }
387 
vp9_highbd_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)388 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
389                              uint16_t *dest, int stride, int eob, int bd) {
390   if (tx_type == DCT_DCT) {
391     vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
392   } else {
393     vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
394   }
395 }
396 #endif  // CONFIG_VP9_HIGHBITDEPTH
397