1 /*****************************************************************************
2  * This file is part of Kvazaar HEVC encoder.
3  *
4  * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without modification,
8  * are permitted provided that the following conditions are met:
9  *
10  * * Redistributions of source code must retain the above copyright notice, this
11  *   list of conditions and the following disclaimer.
12  *
13  * * Redistributions in binary form must reproduce the above copyright notice, this
14  *   list of conditions and the following disclaimer in the documentation and/or
15  *   other materials provided with the distribution.
16  *
17  * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26  * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
28  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
31  ****************************************************************************/
32 
33 #include "strategies/generic/dct-generic.h"
34 
35 #include "strategyselector.h"
36 #include "tables.h"
37 
38 ALIGNED(32) const int16_t kvz_g_dst_4[4][4] =
39 {
40   { 29, 55, 74, 84 },
41   { 74, 74, 0, -74 },
42   { 84, -29, -74, 55 },
43   { 55, -84, 74, -29 }
44 };
45 
46 ALIGNED(32) const int16_t kvz_g_dct_4[4][4] =
47 {
48   { 64, 64, 64, 64 },
49   { 83, 36, -36, -83 },
50   { 64, -64, -64, 64 },
51   { 36, -83, 83, -36 }
52 };
53 
54 ALIGNED(64) const int16_t kvz_g_dct_8[8][8] =
55 {
56   { 64, 64, 64, 64, 64, 64, 64, 64 },
57   { 89, 75, 50, 18, -18, -50, -75, -89 },
58   { 83, 36, -36, -83, -83, -36, 36, 83 },
59   { 75, -18, -89, -50, 50, 89, 18, -75 },
60   { 64, -64, -64, 64, 64, -64, -64, 64 },
61   { 50, -89, 18, 75, -75, -18, 89, -50 },
62   { 36, -83, 83, -36, -36, 83, -83, 36 },
63   { 18, -50, 75, -89, 89, -75, 50, -18 }
64 };
65 
66 ALIGNED(64) const int16_t kvz_g_dct_16[16][16] =
67 {
68   { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
69   { 90, 87, 80, 70, 57, 43, 25, 9, -9, -25, -43, -57, -70, -80, -87, -90 },
70   { 89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89 },
71   { 87, 57, 9, -43, -80, -90, -70, -25, 25, 70, 90, 80, 43, -9, -57, -87 },
72   { 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83 },
73   { 80, 9, -70, -87, -25, 57, 90, 43, -43, -90, -57, 25, 87, 70, -9, -80 },
74   { 75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75 },
75   { 70, -43, -87, 9, 90, 25, -80, -57, 57, 80, -25, -90, -9, 87, 43, -70 },
76   { 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64 },
77   { 57, -80, -25, 90, -9, -87, 43, 70, -70, -43, 87, 9, -90, 25, 80, -57 },
78   { 50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50 },
79   { 43, -90, 57, 25, -87, 70, 9, -80, 80, -9, -70, 87, -25, -57, 90, -43 },
80   { 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36 },
81   { 25, -70, 90, -80, 43, 9, -57, 87, -87, 57, -9, -43, 80, -90, 70, -25 },
82   { 18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18 },
83   { 9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9 }
84 };
85 
86 ALIGNED(64) const int16_t kvz_g_dct_32[32][32] =
87 {
88   { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
89   { 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, -4, -13, -22, -31, -38, -46, -54, -61, -67, -73, -78, -82, -85, -88, -90, -90 },
90   { 90, 87, 80, 70, 57, 43, 25, 9, -9, -25, -43, -57, -70, -80, -87, -90, -90, -87, -80, -70, -57, -43, -25, -9, 9, 25, 43, 57, 70, 80, 87, 90 },
91   { 90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13, 13, 38, 61, 78, 88, 90, 85, 73, 54, 31, 4, -22, -46, -67, -82, -90 },
92   { 89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89, 89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89 },
93   { 88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22, -22, -61, -85, -90, -73, -38, 4, 46, 78, 90, 82, 54, 13, -31, -67, -88 },
94   { 87, 57, 9, -43, -80, -90, -70, -25, 25, 70, 90, 80, 43, -9, -57, -87, -87, -57, -9, 43, 80, 90, 70, 25, -25, -70, -90, -80, -43, 9, 57, 87 },
95   { 85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31, 31, 78, 90, 61, 4, -54, -88, -82, -38, 22, 73, 90, 67, 13, -46, -85 },
96   { 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83 },
97   { 82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67, 4, 73, 88, 38, -38, -88, -73, -4, 67, 90, 46, -31, -85, -78, -13, 61, 90, 54, -22, -82 },
98   { 80, 9, -70, -87, -25, 57, 90, 43, -43, -90, -57, 25, 87, 70, -9, -80, -80, -9, 70, 87, 25, -57, -90, -43, 43, 90, 57, -25, -87, -70, 9, 80 },
99   { 78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46, 46, 90, 38, -54, -90, -31, 61, 88, 22, -67, -85, -13, 73, 82, 4, -78 },
100   { 75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75, 75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75 },
101   { 73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54, -54, -85, 4, 88, 46, -61, -82, 13, 90, 38, -67, -78, 22, 90, 31, -73 },
102   { 70, -43, -87, 9, 90, 25, -80, -57, 57, 80, -25, -90, -9, 87, 43, -70, -70, 43, 87, -9, -90, -25, 80, 57, -57, -80, 25, 90, 9, -87, -43, 70 },
103   { 67, -54, -78, 38, 85, -22, -90, 4, 90, 13, -88, -31, 82, 46, -73, -61, 61, 73, -46, -82, 31, 88, -13, -90, -4, 90, 22, -85, -38, 78, 54, -67 },
104   { 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64 },
105   { 61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67, -67, -54, 78, 38, -85, -22, 90, 4, -90, 13, 88, -31, -82, 46, 73, -61 },
106   { 57, -80, -25, 90, -9, -87, 43, 70, -70, -43, 87, 9, -90, 25, 80, -57, -57, 80, 25, -90, 9, 87, -43, -70, 70, 43, -87, -9, 90, -25, -80, 57 },
107   { 54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73, 73, 31, -90, 22, 78, -67, -38, 90, -13, -82, 61, 46, -88, 4, 85, -54 },
108   { 50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50, 50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50 },
109   { 46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82, 4, 78, -78, -4, 82, -73, -13, 85, -67, -22, 88, -61, -31, 90, -54, -38, 90, -46 },
110   { 43, -90, 57, 25, -87, 70, 9, -80, 80, -9, -70, 87, -25, -57, 90, -43, -43, 90, -57, -25, 87, -70, -9, 80, -80, 9, 70, -87, 25, 57, -90, 43 },
111   { 38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82, 82, -22, -54, 90, -61, -13, 78, -85, 31, 46, -90, 67, 4, -73, 88, -38 },
112   { 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36 },
113   { 31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85, -85, 46, 13, -67, 90, -73, 22, 38, -82, 88, -54, -4, 61, -90, 78, -31 },
114   { 25, -70, 90, -80, 43, 9, -57, 87, -87, 57, -9, -43, 80, -90, 70, -25, -25, 70, -90, 80, -43, -9, 57, -87, 87, -57, 9, 43, -80, 90, -70, 25 },
115   { 22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88, 88, -67, 31, 13, -54, 82, -90, 78, -46, 4, 38, -73, 90, -85, 61, -22 },
116   { 18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18, 18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18 },
117   { 13, -38, 61, -78, 88, -90, 85, -73, 54, -31, 4, 22, -46, 67, -82, 90, -90, 82, -67, 46, -22, -4, 31, -54, 73, -85, 90, -88, 78, -61, 38, -13 },
118   { 9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9, -9, 25, -43, 57, -70, 80, -87, 90, -90, 87, -80, 70, -57, 43, -25, 9 },
119   { 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90, 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 }
120 };
121 
122 ALIGNED(32) const int16_t kvz_g_dst_4_t[4][4] =
123 {
124   { 29, 74, 84, 55 },
125   { 55, 74, -29, -84 },
126   { 74, 0, -74, 74 },
127   { 84, -74, 55, -29 }
128 };
129 
130 ALIGNED(32) const int16_t kvz_g_dct_4_t[4][4] =
131 {
132   { 64, 83, 64, 36, },
133   { 64, 36, -64, -83, },
134   { 64, -36, -64, 83, },
135   { 64, -83, 64, -36 }
136 };
137 
138 ALIGNED(64) const int16_t kvz_g_dct_8_t[8][8] =
139 {
140   { 64, 89, 83, 75, 64, 50, 36, 18, },
141   { 64, 75, 36, -18, -64, -89, -83, -50, },
142   { 64, 50, -36, -89, -64, 18, 83, 75, },
143   { 64, 18, -83, -50, 64, 75, -36, -89, },
144   { 64, -18, -83, 50, 64, -75, -36, 89, },
145   { 64, -50, -36, 89, -64, -18, 83, -75, },
146   { 64, -75, 36, 18, -64, 89, -83, 50, },
147   { 64, -89, 83, -75, 64, -50, 36, -18 }
148 };
149 
150 ALIGNED(64) const int16_t kvz_g_dct_16_t[16][16] =
151 {
152   { 64, 90, 89, 87, 83, 80, 75, 70, 64, 57, 50, 43, 36, 25, 18, 9, },
153   { 64, 87, 75, 57, 36, 9, -18, -43, -64, -80, -89, -90, -83, -70, -50, -25, },
154   { 64, 80, 50, 9, -36, -70, -89, -87, -64, -25, 18, 57, 83, 90, 75, 43, },
155   { 64, 70, 18, -43, -83, -87, -50, 9, 64, 90, 75, 25, -36, -80, -89, -57, },
156   { 64, 57, -18, -80, -83, -25, 50, 90, 64, -9, -75, -87, -36, 43, 89, 70, },
157   { 64, 43, -50, -90, -36, 57, 89, 25, -64, -87, -18, 70, 83, 9, -75, -80, },
158   { 64, 25, -75, -70, 36, 90, 18, -80, -64, 43, 89, 9, -83, -57, 50, 87, },
159   { 64, 9, -89, -25, 83, 43, -75, -57, 64, 70, -50, -80, 36, 87, -18, -90, },
160   { 64, -9, -89, 25, 83, -43, -75, 57, 64, -70, -50, 80, 36, -87, -18, 90, },
161   { 64, -25, -75, 70, 36, -90, 18, 80, -64, -43, 89, -9, -83, 57, 50, -87, },
162   { 64, -43, -50, 90, -36, -57, 89, -25, -64, 87, -18, -70, 83, -9, -75, 80, },
163   { 64, -57, -18, 80, -83, 25, 50, -90, 64, 9, -75, 87, -36, -43, 89, -70, },
164   { 64, -70, 18, 43, -83, 87, -50, -9, 64, -90, 75, -25, -36, 80, -89, 57, },
165   { 64, -80, 50, -9, -36, 70, -89, 87, -64, 25, 18, -57, 83, -90, 75, -43, },
166   { 64, -87, 75, -57, 36, -9, -18, 43, -64, 80, -89, 90, -83, 70, -50, 25, },
167   { 64, -90, 89, -87, 83, -80, 75, -70, 64, -57, 50, -43, 36, -25, 18, -9 }
168 };
169 
170 ALIGNED(64) const int16_t kvz_g_dct_32_t[32][32] =
171 {
172   { 64, 90, 90, 90, 89, 88, 87, 85, 83, 82, 80, 78, 75, 73, 70, 67, 64, 61, 57, 54, 50, 46, 43, 38, 36, 31, 25, 22, 18, 13, 9, 4, },
173   { 64, 90, 87, 82, 75, 67, 57, 46, 36, 22, 9, -4, -18, -31, -43, -54, -64, -73, -80, -85, -89, -90, -90, -88, -83, -78, -70, -61, -50, -38, -25, -13, },
174   { 64, 88, 80, 67, 50, 31, 9, -13, -36, -54, -70, -82, -89, -90, -87, -78, -64, -46, -25, -4, 18, 38, 57, 73, 83, 90, 90, 85, 75, 61, 43, 22, },
175   { 64, 85, 70, 46, 18, -13, -43, -67, -83, -90, -87, -73, -50, -22, 9, 38, 64, 82, 90, 88, 75, 54, 25, -4, -36, -61, -80, -90, -89, -78, -57, -31, },
176   { 64, 82, 57, 22, -18, -54, -80, -90, -83, -61, -25, 13, 50, 78, 90, 85, 64, 31, -9, -46, -75, -90, -87, -67, -36, 4, 43, 73, 89, 88, 70, 38, },
177   { 64, 78, 43, -4, -50, -82, -90, -73, -36, 13, 57, 85, 89, 67, 25, -22, -64, -88, -87, -61, -18, 31, 70, 90, 83, 54, 9, -38, -75, -90, -80, -46, },
178   { 64, 73, 25, -31, -75, -90, -70, -22, 36, 78, 90, 67, 18, -38, -80, -90, -64, -13, 43, 82, 89, 61, 9, -46, -83, -88, -57, -4, 50, 85, 87, 54, },
179   { 64, 67, 9, -54, -89, -78, -25, 38, 83, 85, 43, -22, -75, -90, -57, 4, 64, 90, 70, 13, -50, -88, -80, -31, 36, 82, 87, 46, -18, -73, -90, -61, },
180   { 64, 61, -9, -73, -89, -46, 25, 82, 83, 31, -43, -88, -75, -13, 57, 90, 64, -4, -70, -90, -50, 22, 80, 85, 36, -38, -87, -78, -18, 54, 90, 67, },
181   { 64, 54, -25, -85, -75, -4, 70, 88, 36, -46, -90, -61, 18, 82, 80, 13, -64, -90, -43, 38, 89, 67, -9, -78, -83, -22, 57, 90, 50, -31, -87, -73, },
182   { 64, 46, -43, -90, -50, 38, 90, 54, -36, -90, -57, 31, 89, 61, -25, -88, -64, 22, 87, 67, -18, -85, -70, 13, 83, 73, -9, -82, -75, 4, 80, 78, },
183   { 64, 38, -57, -88, -18, 73, 80, -4, -83, -67, 25, 90, 50, -46, -90, -31, 64, 85, 9, -78, -75, 13, 87, 61, -36, -90, -43, 54, 89, 22, -70, -82, },
184   { 64, 31, -70, -78, 18, 90, 43, -61, -83, 4, 87, 54, -50, -88, -9, 82, 64, -38, -90, -22, 75, 73, -25, -90, -36, 67, 80, -13, -89, -46, 57, 85, },
185   { 64, 22, -80, -61, 50, 85, -9, -90, -36, 73, 70, -38, -89, -4, 87, 46, -64, -78, 25, 90, 18, -82, -57, 54, 83, -13, -90, -31, 75, 67, -43, -88, },
186   { 64, 13, -87, -38, 75, 61, -57, -78, 36, 88, -9, -90, -18, 85, 43, -73, -64, 54, 80, -31, -89, 4, 90, 22, -83, -46, 70, 67, -50, -82, 25, 90, },
187   { 64, 4, -90, -13, 89, 22, -87, -31, 83, 38, -80, -46, 75, 54, -70, -61, 64, 67, -57, -73, 50, 78, -43, -82, 36, 85, -25, -88, 18, 90, -9, -90, },
188   { 64, -4, -90, 13, 89, -22, -87, 31, 83, -38, -80, 46, 75, -54, -70, 61, 64, -67, -57, 73, 50, -78, -43, 82, 36, -85, -25, 88, 18, -90, -9, 90, },
189   { 64, -13, -87, 38, 75, -61, -57, 78, 36, -88, -9, 90, -18, -85, 43, 73, -64, -54, 80, 31, -89, -4, 90, -22, -83, 46, 70, -67, -50, 82, 25, -90, },
190   { 64, -22, -80, 61, 50, -85, -9, 90, -36, -73, 70, 38, -89, 4, 87, -46, -64, 78, 25, -90, 18, 82, -57, -54, 83, 13, -90, 31, 75, -67, -43, 88, },
191   { 64, -31, -70, 78, 18, -90, 43, 61, -83, -4, 87, -54, -50, 88, -9, -82, 64, 38, -90, 22, 75, -73, -25, 90, -36, -67, 80, 13, -89, 46, 57, -85, },
192   { 64, -38, -57, 88, -18, -73, 80, 4, -83, 67, 25, -90, 50, 46, -90, 31, 64, -85, 9, 78, -75, -13, 87, -61, -36, 90, -43, -54, 89, -22, -70, 82, },
193   { 64, -46, -43, 90, -50, -38, 90, -54, -36, 90, -57, -31, 89, -61, -25, 88, -64, -22, 87, -67, -18, 85, -70, -13, 83, -73, -9, 82, -75, -4, 80, -78, },
194   { 64, -54, -25, 85, -75, 4, 70, -88, 36, 46, -90, 61, 18, -82, 80, -13, -64, 90, -43, -38, 89, -67, -9, 78, -83, 22, 57, -90, 50, 31, -87, 73, },
195   { 64, -61, -9, 73, -89, 46, 25, -82, 83, -31, -43, 88, -75, 13, 57, -90, 64, 4, -70, 90, -50, -22, 80, -85, 36, 38, -87, 78, -18, -54, 90, -67, },
196   { 64, -67, 9, 54, -89, 78, -25, -38, 83, -85, 43, 22, -75, 90, -57, -4, 64, -90, 70, -13, -50, 88, -80, 31, 36, -82, 87, -46, -18, 73, -90, 61, },
197   { 64, -73, 25, 31, -75, 90, -70, 22, 36, -78, 90, -67, 18, 38, -80, 90, -64, 13, 43, -82, 89, -61, 9, 46, -83, 88, -57, 4, 50, -85, 87, -54, },
198   { 64, -78, 43, 4, -50, 82, -90, 73, -36, -13, 57, -85, 89, -67, 25, 22, -64, 88, -87, 61, -18, -31, 70, -90, 83, -54, 9, 38, -75, 90, -80, 46, },
199   { 64, -82, 57, -22, -18, 54, -80, 90, -83, 61, -25, -13, 50, -78, 90, -85, 64, -31, -9, 46, -75, 90, -87, 67, -36, -4, 43, -73, 89, -88, 70, -38, },
200   { 64, -85, 70, -46, 18, 13, -43, 67, -83, 90, -87, 73, -50, 22, 9, -38, 64, -82, 90, -88, 75, -54, 25, 4, -36, 61, -80, 90, -89, 78, -57, 31, },
201   { 64, -88, 80, -67, 50, -31, 9, 13, -36, 54, -70, 82, -89, 90, -87, 78, -64, 46, -25, 4, 18, -38, 57, -73, 83, -90, 90, -85, 75, -61, 43, -22, },
202   { 64, -90, 87, -82, 75, -67, 57, -46, 36, -22, 9, 4, -18, 31, -43, 54, -64, 73, -80, 85, -89, 90, -90, 88, -83, 78, -70, 61, -50, 38, -25, 13, },
203   { 64, -90, 90, -90, 89, -88, 87, -85, 83, -82, 80, -78, 75, -73, 70, -67, 64, -61, 57, -54, 50, -46, 43, -38, 36, -31, 25, -22, 18, -13, 9, -4 }
204 };
205 
206 /**
207  * \brief Generic partial butterfly functions
208  *
209  * TODO: description
210  *
211  * \param TODO
212  *
213  * \returns TODO
214  */
215 
216 // Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
217 // gives identical results
fast_forward_dst_4_generic(const short * block,short * coeff,int32_t shift)218 static void fast_forward_dst_4_generic(const short *block, short *coeff, int32_t shift)  // input block, output coeff
219 {
220   int32_t i, c[4];
221   int32_t rnd_factor = 1 << (shift - 1);
222   for (i = 0; i < 4; i++) {
223     // int32_termediate Variables
224     c[0] = block[4 * i + 0] + block[4 * i + 3];
225     c[1] = block[4 * i + 1] + block[4 * i + 3];
226     c[2] = block[4 * i + 0] - block[4 * i + 1];
227     c[3] = 74 * block[4 * i + 2];
228 
229     coeff[i] = (short)((29 * c[0] + 55 * c[1] + c[3] + rnd_factor) >> shift);
230     coeff[4 + i] = (short)((74 * (block[4 * i + 0] + block[4 * i + 1] - block[4 * i + 3]) + rnd_factor) >> shift);
231     coeff[8 + i] = (short)((29 * c[2] + 55 * c[0] - c[3] + rnd_factor) >> shift);
232     coeff[12 + i] = (short)((55 * c[2] - 29 * c[1] + c[3] + rnd_factor) >> shift);
233   }
234 }
235 
fast_inverse_dst_4_generic(const short * tmp,short * block,int shift)236 static void fast_inverse_dst_4_generic(const short *tmp, short *block, int shift)  // input tmp, output block
237 {
238   int i, c[4];
239   int rnd_factor = 1 << (shift - 1);
240   for (i = 0; i < 4; i++) {
241     // Intermediate Variables
242     c[0] = tmp[i] + tmp[8 + i];
243     c[1] = tmp[8 + i] + tmp[12 + i];
244     c[2] = tmp[i] - tmp[12 + i];
245     c[3] = 74 * tmp[4 + i];
246 
247     block[4 * i + 0] = (short)CLIP(-32768, 32767, (29 * c[0] + 55 * c[1] + c[3] + rnd_factor) >> shift);
248     block[4 * i + 1] = (short)CLIP(-32768, 32767, (55 * c[2] - 29 * c[1] + c[3] + rnd_factor) >> shift);
249     block[4 * i + 2] = (short)CLIP(-32768, 32767, (74 * (tmp[i] - tmp[8 + i] + tmp[12 + i]) + rnd_factor) >> shift);
250     block[4 * i + 3] = (short)CLIP(-32768, 32767, (55 * c[0] + 29 * c[2] - c[3] + rnd_factor) >> shift);
251   }
252 }
253 
254 
partial_butterfly_4_generic(const short * src,short * dst,int32_t shift)255 static void partial_butterfly_4_generic(const short *src, short *dst,
256   int32_t shift)
257 {
258   int32_t j;
259   int32_t e[2], o[2];
260   int32_t add = 1 << (shift - 1);
261   const int32_t line = 4;
262 
263   for (j = 0; j < line; j++) {
264     // E and O
265     e[0] = src[0] + src[3];
266     o[0] = src[0] - src[3];
267     e[1] = src[1] + src[2];
268     o[1] = src[1] - src[2];
269 
270     dst[0] = (short)((kvz_g_dct_4[0][0] * e[0] + kvz_g_dct_4[0][1] * e[1] + add) >> shift);
271     dst[2 * line] = (short)((kvz_g_dct_4[2][0] * e[0] + kvz_g_dct_4[2][1] * e[1] + add) >> shift);
272     dst[line] = (short)((kvz_g_dct_4[1][0] * o[0] + kvz_g_dct_4[1][1] * o[1] + add) >> shift);
273     dst[3 * line] = (short)((kvz_g_dct_4[3][0] * o[0] + kvz_g_dct_4[3][1] * o[1] + add) >> shift);
274 
275     src += 4;
276     dst++;
277   }
278 }
279 
280 
partial_butterfly_inverse_4_generic(const short * src,short * dst,int shift)281 static void partial_butterfly_inverse_4_generic(const short *src, short *dst,
282   int shift)
283 {
284   int j;
285   int e[2], o[2];
286   int add = 1 << (shift - 1);
287   const int32_t line = 4;
288 
289   for (j = 0; j < line; j++) {
290     // Utilizing symmetry properties to the maximum to minimize the number of multiplications
291     o[0] = kvz_g_dct_4[1][0] * src[line] + kvz_g_dct_4[3][0] * src[3 * line];
292     o[1] = kvz_g_dct_4[1][1] * src[line] + kvz_g_dct_4[3][1] * src[3 * line];
293     e[0] = kvz_g_dct_4[0][0] * src[0] + kvz_g_dct_4[2][0] * src[2 * line];
294     e[1] = kvz_g_dct_4[0][1] * src[0] + kvz_g_dct_4[2][1] * src[2 * line];
295 
296     // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
297     dst[0] = (short)CLIP(-32768, 32767, (e[0] + o[0] + add) >> shift);
298     dst[1] = (short)CLIP(-32768, 32767, (e[1] + o[1] + add) >> shift);
299     dst[2] = (short)CLIP(-32768, 32767, (e[1] - o[1] + add) >> shift);
300     dst[3] = (short)CLIP(-32768, 32767, (e[0] - o[0] + add) >> shift);
301 
302     src++;
303     dst += 4;
304   }
305 }
306 
307 
partial_butterfly_8_generic(const short * src,short * dst,int32_t shift)308 static void partial_butterfly_8_generic(const short *src, short *dst,
309   int32_t shift)
310 {
311   int32_t j, k;
312   int32_t e[4], o[4];
313   int32_t ee[2], eo[2];
314   int32_t add = 1 << (shift - 1);
315   const int32_t line = 8;
316 
317   for (j = 0; j < line; j++) {
318     // E and O
319     for (k = 0; k < 4; k++) {
320       e[k] = src[k] + src[7 - k];
321       o[k] = src[k] - src[7 - k];
322     }
323     // EE and EO
324     ee[0] = e[0] + e[3];
325     eo[0] = e[0] - e[3];
326     ee[1] = e[1] + e[2];
327     eo[1] = e[1] - e[2];
328 
329     dst[0] = (short)((kvz_g_dct_8[0][0] * ee[0] + kvz_g_dct_8[0][1] * ee[1] + add) >> shift);
330     dst[4 * line] = (short)((kvz_g_dct_8[4][0] * ee[0] + kvz_g_dct_8[4][1] * ee[1] + add) >> shift);
331     dst[2 * line] = (short)((kvz_g_dct_8[2][0] * eo[0] + kvz_g_dct_8[2][1] * eo[1] + add) >> shift);
332     dst[6 * line] = (short)((kvz_g_dct_8[6][0] * eo[0] + kvz_g_dct_8[6][1] * eo[1] + add) >> shift);
333 
334     dst[line] = (short)((kvz_g_dct_8[1][0] * o[0] + kvz_g_dct_8[1][1] * o[1] + kvz_g_dct_8[1][2] * o[2] + kvz_g_dct_8[1][3] * o[3] + add) >> shift);
335     dst[3 * line] = (short)((kvz_g_dct_8[3][0] * o[0] + kvz_g_dct_8[3][1] * o[1] + kvz_g_dct_8[3][2] * o[2] + kvz_g_dct_8[3][3] * o[3] + add) >> shift);
336     dst[5 * line] = (short)((kvz_g_dct_8[5][0] * o[0] + kvz_g_dct_8[5][1] * o[1] + kvz_g_dct_8[5][2] * o[2] + kvz_g_dct_8[5][3] * o[3] + add) >> shift);
337     dst[7 * line] = (short)((kvz_g_dct_8[7][0] * o[0] + kvz_g_dct_8[7][1] * o[1] + kvz_g_dct_8[7][2] * o[2] + kvz_g_dct_8[7][3] * o[3] + add) >> shift);
338 
339     src += 8;
340     dst++;
341   }
342 }
343 
344 
partial_butterfly_inverse_8_generic(const int16_t * src,int16_t * dst,int32_t shift)345 static void partial_butterfly_inverse_8_generic(const int16_t *src, int16_t *dst,
346   int32_t shift)
347 {
348   int32_t j, k;
349   int32_t e[4], o[4];
350   int32_t ee[2], eo[2];
351   int32_t add = 1 << (shift - 1);
352   const int32_t line = 8;
353 
354   for (j = 0; j < line; j++) {
355     // Utilizing symmetry properties to the maximum to minimize the number of multiplications
356     for (k = 0; k < 4; k++) {
357       o[k] = kvz_g_dct_8[1][k] * src[line] + kvz_g_dct_8[3][k] * src[3 * line] + kvz_g_dct_8[5][k] * src[5 * line] + kvz_g_dct_8[7][k] * src[7 * line];
358     }
359 
360     eo[0] = kvz_g_dct_8[2][0] * src[2 * line] + kvz_g_dct_8[6][0] * src[6 * line];
361     eo[1] = kvz_g_dct_8[2][1] * src[2 * line] + kvz_g_dct_8[6][1] * src[6 * line];
362     ee[0] = kvz_g_dct_8[0][0] * src[0] + kvz_g_dct_8[4][0] * src[4 * line];
363     ee[1] = kvz_g_dct_8[0][1] * src[0] + kvz_g_dct_8[4][1] * src[4 * line];
364 
365     // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
366     e[0] = ee[0] + eo[0];
367     e[3] = ee[0] - eo[0];
368     e[1] = ee[1] + eo[1];
369     e[2] = ee[1] - eo[1];
370     for (k = 0; k < 4; k++) {
371       dst[k] = (int16_t)MAX(-32768, MIN(32767, (e[k] + o[k] + add) >> shift));
372       dst[k + 4] = (int16_t)MAX(-32768, MIN(32767, (e[3 - k] - o[3 - k] + add) >> shift));
373     }
374     src++;
375     dst += 8;
376   }
377 }
378 
379 
partial_butterfly_16_generic(const short * src,short * dst,int32_t shift)380 static void partial_butterfly_16_generic(const short *src, short *dst,
381   int32_t shift)
382 {
383   int32_t j, k;
384   int32_t e[8], o[8];
385   int32_t ee[4], eo[4];
386   int32_t eee[2], eeo[2];
387   int32_t add = 1 << (shift - 1);
388   const int32_t line = 16;
389 
390   for (j = 0; j < line; j++) {
391     // E and O
392     for (k = 0; k < 8; k++) {
393       e[k] = src[k] + src[15 - k];
394       o[k] = src[k] - src[15 - k];
395     }
396     // EE and EO
397     for (k = 0; k < 4; k++) {
398       ee[k] = e[k] + e[7 - k];
399       eo[k] = e[k] - e[7 - k];
400     }
401     // EEE and EEO
402     eee[0] = ee[0] + ee[3];
403     eeo[0] = ee[0] - ee[3];
404     eee[1] = ee[1] + ee[2];
405     eeo[1] = ee[1] - ee[2];
406 
407     dst[0] = (short)((kvz_g_dct_16[0][0] * eee[0] + kvz_g_dct_16[0][1] * eee[1] + add) >> shift);
408     dst[8 * line] = (short)((kvz_g_dct_16[8][0] * eee[0] + kvz_g_dct_16[8][1] * eee[1] + add) >> shift);
409     dst[4 * line] = (short)((kvz_g_dct_16[4][0] * eeo[0] + kvz_g_dct_16[4][1] * eeo[1] + add) >> shift);
410     dst[12 * line] = (short)((kvz_g_dct_16[12][0] * eeo[0] + kvz_g_dct_16[12][1] * eeo[1] + add) >> shift);
411 
412     for (k = 2; k < 16; k += 4) {
413       dst[k*line] = (short)((kvz_g_dct_16[k][0] * eo[0] + kvz_g_dct_16[k][1] * eo[1] + kvz_g_dct_16[k][2] * eo[2] + kvz_g_dct_16[k][3] * eo[3] + add) >> shift);
414     }
415 
416     for (k = 1; k < 16; k += 2) {
417       dst[k*line] = (short)((kvz_g_dct_16[k][0] * o[0] + kvz_g_dct_16[k][1] * o[1] + kvz_g_dct_16[k][2] * o[2] + kvz_g_dct_16[k][3] * o[3] +
418         kvz_g_dct_16[k][4] * o[4] + kvz_g_dct_16[k][5] * o[5] + kvz_g_dct_16[k][6] * o[6] + kvz_g_dct_16[k][7] * o[7] + add) >> shift);
419     }
420 
421     src += 16;
422     dst++;
423   }
424 }
425 
426 
partial_butterfly_inverse_16_generic(const int16_t * src,int16_t * dst,int32_t shift)427 static void partial_butterfly_inverse_16_generic(const int16_t *src, int16_t *dst,
428   int32_t shift)
429 {
430   int32_t j, k;
431   int32_t e[8], o[8];
432   int32_t ee[4], eo[4];
433   int32_t eee[2], eeo[2];
434   int32_t add = 1 << (shift - 1);
435   const int32_t line = 16;
436 
437   for (j = 0; j < line; j++) {
438     // Utilizing symmetry properties to the maximum to minimize the number of multiplications
439     for (k = 0; k < 8; k++)  {
440       o[k] = kvz_g_dct_16[1][k] * src[line] + kvz_g_dct_16[3][k] * src[3 * line] + kvz_g_dct_16[5][k] * src[5 * line] + kvz_g_dct_16[7][k] * src[7 * line] +
441         kvz_g_dct_16[9][k] * src[9 * line] + kvz_g_dct_16[11][k] * src[11 * line] + kvz_g_dct_16[13][k] * src[13 * line] + kvz_g_dct_16[15][k] * src[15 * line];
442     }
443     for (k = 0; k < 4; k++) {
444       eo[k] = kvz_g_dct_16[2][k] * src[2 * line] + kvz_g_dct_16[6][k] * src[6 * line] + kvz_g_dct_16[10][k] * src[10 * line] + kvz_g_dct_16[14][k] * src[14 * line];
445     }
446     eeo[0] = kvz_g_dct_16[4][0] * src[4 * line] + kvz_g_dct_16[12][0] * src[12 * line];
447     eee[0] = kvz_g_dct_16[0][0] * src[0] + kvz_g_dct_16[8][0] * src[8 * line];
448     eeo[1] = kvz_g_dct_16[4][1] * src[4 * line] + kvz_g_dct_16[12][1] * src[12 * line];
449     eee[1] = kvz_g_dct_16[0][1] * src[0] + kvz_g_dct_16[8][1] * src[8 * line];
450 
451     // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
452     for (k = 0; k < 2; k++) {
453       ee[k] = eee[k] + eeo[k];
454       ee[k + 2] = eee[1 - k] - eeo[1 - k];
455     }
456     for (k = 0; k < 4; k++) {
457       e[k] = ee[k] + eo[k];
458       e[k + 4] = ee[3 - k] - eo[3 - k];
459     }
460     for (k = 0; k < 8; k++) {
461       dst[k] = (short)MAX(-32768, MIN(32767, (e[k] + o[k] + add) >> shift));
462       dst[k + 8] = (short)MAX(-32768, MIN(32767, (e[7 - k] - o[7 - k] + add) >> shift));
463     }
464     src++;
465     dst += 16;
466   }
467 }
468 
469 
partial_butterfly_32_generic(const short * src,short * dst,int32_t shift)470 static void partial_butterfly_32_generic(const short *src, short *dst,
471   int32_t shift)
472 {
473   int32_t j, k;
474   int32_t e[16], o[16];
475   int32_t ee[8], eo[8];
476   int32_t eee[4], eeo[4];
477   int32_t eeee[2], eeeo[2];
478   int32_t add = 1 << (shift - 1);
479   const int32_t line = 32;
480 
481   for (j = 0; j < line; j++) {
482     // E and O
483     for (k = 0; k < 16; k++) {
484       e[k] = src[k] + src[31 - k];
485       o[k] = src[k] - src[31 - k];
486     }
487     // EE and EO
488     for (k = 0; k < 8; k++) {
489       ee[k] = e[k] + e[15 - k];
490       eo[k] = e[k] - e[15 - k];
491     }
492     // EEE and EEO
493     for (k = 0; k < 4; k++) {
494       eee[k] = ee[k] + ee[7 - k];
495       eeo[k] = ee[k] - ee[7 - k];
496     }
497     // EEEE and EEEO
498     eeee[0] = eee[0] + eee[3];
499     eeeo[0] = eee[0] - eee[3];
500     eeee[1] = eee[1] + eee[2];
501     eeeo[1] = eee[1] - eee[2];
502 
503     dst[0] = (short)((kvz_g_dct_32[0][0] * eeee[0] + kvz_g_dct_32[0][1] * eeee[1] + add) >> shift);
504     dst[16 * line] = (short)((kvz_g_dct_32[16][0] * eeee[0] + kvz_g_dct_32[16][1] * eeee[1] + add) >> shift);
505     dst[8 * line] = (short)((kvz_g_dct_32[8][0] * eeeo[0] + kvz_g_dct_32[8][1] * eeeo[1] + add) >> shift);
506     dst[24 * line] = (short)((kvz_g_dct_32[24][0] * eeeo[0] + kvz_g_dct_32[24][1] * eeeo[1] + add) >> shift);
507     for (k = 4; k < 32; k += 8) {
508       dst[k*line] = (short)((kvz_g_dct_32[k][0] * eeo[0] + kvz_g_dct_32[k][1] * eeo[1] + kvz_g_dct_32[k][2] * eeo[2] + kvz_g_dct_32[k][3] * eeo[3] + add) >> shift);
509     }
510     for (k = 2; k < 32; k += 4) {
511       dst[k*line] = (short)((kvz_g_dct_32[k][0] * eo[0] + kvz_g_dct_32[k][1] * eo[1] + kvz_g_dct_32[k][2] * eo[2] + kvz_g_dct_32[k][3] * eo[3] +
512         kvz_g_dct_32[k][4] * eo[4] + kvz_g_dct_32[k][5] * eo[5] + kvz_g_dct_32[k][6] * eo[6] + kvz_g_dct_32[k][7] * eo[7] + add) >> shift);
513     }
514     for (k = 1; k < 32; k += 2) {
515       dst[k*line] = (short)((kvz_g_dct_32[k][0] * o[0] + kvz_g_dct_32[k][1] * o[1] + kvz_g_dct_32[k][2] * o[2] + kvz_g_dct_32[k][3] * o[3] +
516         kvz_g_dct_32[k][4] * o[4] + kvz_g_dct_32[k][5] * o[5] + kvz_g_dct_32[k][6] * o[6] + kvz_g_dct_32[k][7] * o[7] +
517         kvz_g_dct_32[k][8] * o[8] + kvz_g_dct_32[k][9] * o[9] + kvz_g_dct_32[k][10] * o[10] + kvz_g_dct_32[k][11] * o[11] +
518         kvz_g_dct_32[k][12] * o[12] + kvz_g_dct_32[k][13] * o[13] + kvz_g_dct_32[k][14] * o[14] + kvz_g_dct_32[k][15] * o[15] + add) >> shift);
519     }
520     src += 32;
521     dst++;
522   }
523 }
524 
525 
partial_butterfly_inverse_32_generic(const int16_t * src,int16_t * dst,int32_t shift)526 static void partial_butterfly_inverse_32_generic(const int16_t *src, int16_t *dst,
527   int32_t shift)
528 {
529   int32_t j, k;
530   int32_t e[16], o[16];
531   int32_t ee[8], eo[8];
532   int32_t eee[4], eeo[4];
533   int32_t eeee[2], eeeo[2];
534   int32_t add = 1 << (shift - 1);
535   const int32_t line = 32;
536 
537   for (j = 0; j<line; j++) {
538     // Utilizing symmetry properties to the maximum to minimize the number of multiplications
539     for (k = 0; k < 16; k++) {
540       o[k] = kvz_g_dct_32[1][k] * src[line] + kvz_g_dct_32[3][k] * src[3 * line] + kvz_g_dct_32[5][k] * src[5 * line] + kvz_g_dct_32[7][k] * src[7 * line] +
541         kvz_g_dct_32[9][k] * src[9 * line] + kvz_g_dct_32[11][k] * src[11 * line] + kvz_g_dct_32[13][k] * src[13 * line] + kvz_g_dct_32[15][k] * src[15 * line] +
542         kvz_g_dct_32[17][k] * src[17 * line] + kvz_g_dct_32[19][k] * src[19 * line] + kvz_g_dct_32[21][k] * src[21 * line] + kvz_g_dct_32[23][k] * src[23 * line] +
543         kvz_g_dct_32[25][k] * src[25 * line] + kvz_g_dct_32[27][k] * src[27 * line] + kvz_g_dct_32[29][k] * src[29 * line] + kvz_g_dct_32[31][k] * src[31 * line];
544     }
545     for (k = 0; k < 8; k++) {
546       eo[k] = kvz_g_dct_32[2][k] * src[2 * line] + kvz_g_dct_32[6][k] * src[6 * line] + kvz_g_dct_32[10][k] * src[10 * line] + kvz_g_dct_32[14][k] * src[14 * line] +
547         kvz_g_dct_32[18][k] * src[18 * line] + kvz_g_dct_32[22][k] * src[22 * line] + kvz_g_dct_32[26][k] * src[26 * line] + kvz_g_dct_32[30][k] * src[30 * line];
548     }
549     for (k = 0; k < 4; k++) {
550       eeo[k] = kvz_g_dct_32[4][k] * src[4 * line] + kvz_g_dct_32[12][k] * src[12 * line] + kvz_g_dct_32[20][k] * src[20 * line] + kvz_g_dct_32[28][k] * src[28 * line];
551     }
552     eeeo[0] = kvz_g_dct_32[8][0] * src[8 * line] + kvz_g_dct_32[24][0] * src[24 * line];
553     eeeo[1] = kvz_g_dct_32[8][1] * src[8 * line] + kvz_g_dct_32[24][1] * src[24 * line];
554     eeee[0] = kvz_g_dct_32[0][0] * src[0] + kvz_g_dct_32[16][0] * src[16 * line];
555     eeee[1] = kvz_g_dct_32[0][1] * src[0] + kvz_g_dct_32[16][1] * src[16 * line];
556 
557     // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
558     eee[0] = eeee[0] + eeeo[0];
559     eee[3] = eeee[0] - eeeo[0];
560     eee[1] = eeee[1] + eeeo[1];
561     eee[2] = eeee[1] - eeeo[1];
562     for (k = 0; k < 4; k++) {
563       ee[k] = eee[k] + eeo[k];
564       ee[k + 4] = eee[3 - k] - eeo[3 - k];
565     }
566     for (k = 0; k < 8; k++) {
567       e[k] = ee[k] + eo[k];
568       e[k + 8] = ee[7 - k] - eo[7 - k];
569     }
570     for (k = 0; k<16; k++) {
571       dst[k] = (short)MAX(-32768, MIN(32767, (e[k] + o[k] + add) >> shift));
572       dst[k + 16] = (short)MAX(-32768, MIN(32767, (e[15 - k] - o[15 - k] + add) >> shift));
573     }
574     src++;
575     dst += 32;
576   }
577 }
578 
579 #define DCT_NXN_GENERIC(n) \
580 static void dct_ ## n ## x ## n ## _generic(int8_t bitdepth, const int16_t *input, int16_t *output) { \
581 \
582   int16_t tmp[ n * n ]; \
583   int32_t shift_1st = kvz_g_convert_to_bit[ n ] + 1 + (bitdepth - 8); \
584   int32_t shift_2nd = kvz_g_convert_to_bit[ n ] + 8; \
585 \
586   partial_butterfly_ ## n ## _generic(input, tmp, shift_1st); \
587   partial_butterfly_ ## n ## _generic(tmp, output, shift_2nd); \
588 }
589 
590 #define IDCT_NXN_GENERIC(n) \
591 static void idct_ ## n ## x ## n ## _generic(int8_t bitdepth, const int16_t *input, int16_t *output) { \
592 \
593   int16_t tmp[ n * n ]; \
594   int32_t shift_1st = 7; \
595   int32_t shift_2nd = 12 - (bitdepth - 8); \
596 \
597   partial_butterfly_inverse_ ## n ## _generic(input, tmp, shift_1st); \
598   partial_butterfly_inverse_ ## n ## _generic(tmp, output, shift_2nd); \
599 }
600 
601 DCT_NXN_GENERIC(4);
602 DCT_NXN_GENERIC(8);
603 DCT_NXN_GENERIC(16);
604 DCT_NXN_GENERIC(32);
605 
606 IDCT_NXN_GENERIC(4);
607 IDCT_NXN_GENERIC(8);
608 IDCT_NXN_GENERIC(16);
609 IDCT_NXN_GENERIC(32);
610 
fast_forward_dst_4x4_generic(int8_t bitdepth,const int16_t * input,int16_t * output)611 static void fast_forward_dst_4x4_generic(int8_t bitdepth, const int16_t *input, int16_t *output)
612 {
613   int16_t tmp[4*4];
614   int32_t shift_1st = kvz_g_convert_to_bit[4] + 1 + (bitdepth - 8);
615   int32_t shift_2nd = kvz_g_convert_to_bit[4] + 8;
616 
617   fast_forward_dst_4_generic(input, tmp, shift_1st);
618   fast_forward_dst_4_generic(tmp, output, shift_2nd);
619 }
620 
fast_inverse_dst_4x4_generic(int8_t bitdepth,const int16_t * input,int16_t * output)621 static void fast_inverse_dst_4x4_generic(int8_t bitdepth, const int16_t *input, int16_t *output)
622 {
623   int16_t tmp[4 * 4];
624   int32_t shift_1st = 7;
625   int32_t shift_2nd = 12 - (bitdepth - 8);
626 
627   fast_inverse_dst_4_generic(input, tmp, shift_1st);
628   fast_inverse_dst_4_generic(tmp, output, shift_2nd);
629 }
630 
kvz_strategy_register_dct_generic(void * opaque,uint8_t bitdepth)631 int kvz_strategy_register_dct_generic(void* opaque, uint8_t bitdepth)
632 {
633   bool success = true;
634 
635   success &= kvz_strategyselector_register(opaque, "fast_forward_dst_4x4", "generic", 0, &fast_forward_dst_4x4_generic);
636 
637   success &= kvz_strategyselector_register(opaque, "dct_4x4", "generic", 0, &dct_4x4_generic);
638   success &= kvz_strategyselector_register(opaque, "dct_8x8", "generic", 0, &dct_8x8_generic);
639   success &= kvz_strategyselector_register(opaque, "dct_16x16", "generic", 0, &dct_16x16_generic);
640   success &= kvz_strategyselector_register(opaque, "dct_32x32", "generic", 0, &dct_32x32_generic);
641 
642   success &= kvz_strategyselector_register(opaque, "fast_inverse_dst_4x4", "generic", 0, &fast_inverse_dst_4x4_generic);
643 
644   success &= kvz_strategyselector_register(opaque, "idct_4x4", "generic", 0, &idct_4x4_generic);
645   success &= kvz_strategyselector_register(opaque, "idct_8x8", "generic", 0, &idct_8x8_generic);
646   success &= kvz_strategyselector_register(opaque, "idct_16x16", "generic", 0, &idct_16x16_generic);
647   success &= kvz_strategyselector_register(opaque, "idct_32x32", "generic", 0, &idct_32x32_generic);
648   return success;
649 }
650