Lines Matching refs:buf0

16   __m128i buf0[32];  in av1_fdct32_new_sse4_1()  local
56 buf0[0] = _mm_add_epi32(buf1[0], buf1[15]); in av1_fdct32_new_sse4_1()
57 buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]); in av1_fdct32_new_sse4_1()
58 buf0[1] = _mm_add_epi32(buf1[1], buf1[14]); in av1_fdct32_new_sse4_1()
59 buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]); in av1_fdct32_new_sse4_1()
60 buf0[2] = _mm_add_epi32(buf1[2], buf1[13]); in av1_fdct32_new_sse4_1()
61 buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]); in av1_fdct32_new_sse4_1()
62 buf0[3] = _mm_add_epi32(buf1[3], buf1[12]); in av1_fdct32_new_sse4_1()
63 buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]); in av1_fdct32_new_sse4_1()
64 buf0[4] = _mm_add_epi32(buf1[4], buf1[11]); in av1_fdct32_new_sse4_1()
65 buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]); in av1_fdct32_new_sse4_1()
66 buf0[5] = _mm_add_epi32(buf1[5], buf1[10]); in av1_fdct32_new_sse4_1()
67 buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]); in av1_fdct32_new_sse4_1()
68 buf0[6] = _mm_add_epi32(buf1[6], buf1[9]); in av1_fdct32_new_sse4_1()
69 buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]); in av1_fdct32_new_sse4_1()
70 buf0[7] = _mm_add_epi32(buf1[7], buf1[8]); in av1_fdct32_new_sse4_1()
71 buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]); in av1_fdct32_new_sse4_1()
72 buf0[16] = buf1[16]; in av1_fdct32_new_sse4_1()
73 buf0[17] = buf1[17]; in av1_fdct32_new_sse4_1()
74 buf0[18] = buf1[18]; in av1_fdct32_new_sse4_1()
75 buf0[19] = buf1[19]; in av1_fdct32_new_sse4_1()
76 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20], in av1_fdct32_new_sse4_1()
77 buf0[27], cos_bit); in av1_fdct32_new_sse4_1()
78 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21], in av1_fdct32_new_sse4_1()
79 buf0[26], cos_bit); in av1_fdct32_new_sse4_1()
80 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22], in av1_fdct32_new_sse4_1()
81 buf0[25], cos_bit); in av1_fdct32_new_sse4_1()
82 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23], in av1_fdct32_new_sse4_1()
83 buf0[24], cos_bit); in av1_fdct32_new_sse4_1()
84 buf0[28] = buf1[28]; in av1_fdct32_new_sse4_1()
85 buf0[29] = buf1[29]; in av1_fdct32_new_sse4_1()
86 buf0[30] = buf1[30]; in av1_fdct32_new_sse4_1()
87 buf0[31] = buf1[31]; in av1_fdct32_new_sse4_1()
91 buf1[0] = _mm_add_epi32(buf0[0], buf0[7]); in av1_fdct32_new_sse4_1()
92 buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]); in av1_fdct32_new_sse4_1()
93 buf1[1] = _mm_add_epi32(buf0[1], buf0[6]); in av1_fdct32_new_sse4_1()
94 buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]); in av1_fdct32_new_sse4_1()
95 buf1[2] = _mm_add_epi32(buf0[2], buf0[5]); in av1_fdct32_new_sse4_1()
96 buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]); in av1_fdct32_new_sse4_1()
97 buf1[3] = _mm_add_epi32(buf0[3], buf0[4]); in av1_fdct32_new_sse4_1()
98 buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]); in av1_fdct32_new_sse4_1()
99 buf1[8] = buf0[8]; in av1_fdct32_new_sse4_1()
100 buf1[9] = buf0[9]; in av1_fdct32_new_sse4_1()
101 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10], in av1_fdct32_new_sse4_1()
103 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11], in av1_fdct32_new_sse4_1()
105 buf1[14] = buf0[14]; in av1_fdct32_new_sse4_1()
106 buf1[15] = buf0[15]; in av1_fdct32_new_sse4_1()
107 buf1[16] = _mm_add_epi32(buf0[16], buf0[23]); in av1_fdct32_new_sse4_1()
108 buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]); in av1_fdct32_new_sse4_1()
109 buf1[17] = _mm_add_epi32(buf0[17], buf0[22]); in av1_fdct32_new_sse4_1()
110 buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]); in av1_fdct32_new_sse4_1()
111 buf1[18] = _mm_add_epi32(buf0[18], buf0[21]); in av1_fdct32_new_sse4_1()
112 buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]); in av1_fdct32_new_sse4_1()
113 buf1[19] = _mm_add_epi32(buf0[19], buf0[20]); in av1_fdct32_new_sse4_1()
114 buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]); in av1_fdct32_new_sse4_1()
115 buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]); in av1_fdct32_new_sse4_1()
116 buf1[31] = _mm_add_epi32(buf0[31], buf0[24]); in av1_fdct32_new_sse4_1()
117 buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]); in av1_fdct32_new_sse4_1()
118 buf1[30] = _mm_add_epi32(buf0[30], buf0[25]); in av1_fdct32_new_sse4_1()
119 buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]); in av1_fdct32_new_sse4_1()
120 buf1[29] = _mm_add_epi32(buf0[29], buf0[26]); in av1_fdct32_new_sse4_1()
121 buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]); in av1_fdct32_new_sse4_1()
122 buf1[28] = _mm_add_epi32(buf0[28], buf0[27]); in av1_fdct32_new_sse4_1()
126 buf0[0] = _mm_add_epi32(buf1[0], buf1[3]); in av1_fdct32_new_sse4_1()
127 buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]); in av1_fdct32_new_sse4_1()
128 buf0[1] = _mm_add_epi32(buf1[1], buf1[2]); in av1_fdct32_new_sse4_1()
129 buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]); in av1_fdct32_new_sse4_1()
130 buf0[4] = buf1[4]; in av1_fdct32_new_sse4_1()
131 btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5], buf0[6], in av1_fdct32_new_sse4_1()
133 buf0[7] = buf1[7]; in av1_fdct32_new_sse4_1()
134 buf0[8] = _mm_add_epi32(buf1[8], buf1[11]); in av1_fdct32_new_sse4_1()
135 buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]); in av1_fdct32_new_sse4_1()
136 buf0[9] = _mm_add_epi32(buf1[9], buf1[10]); in av1_fdct32_new_sse4_1()
137 buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]); in av1_fdct32_new_sse4_1()
138 buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]); in av1_fdct32_new_sse4_1()
139 buf0[15] = _mm_add_epi32(buf1[15], buf1[12]); in av1_fdct32_new_sse4_1()
140 buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]); in av1_fdct32_new_sse4_1()
141 buf0[14] = _mm_add_epi32(buf1[14], buf1[13]); in av1_fdct32_new_sse4_1()
142 buf0[16] = buf1[16]; in av1_fdct32_new_sse4_1()
143 buf0[17] = buf1[17]; in av1_fdct32_new_sse4_1()
144 btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18], in av1_fdct32_new_sse4_1()
145 buf0[29], cos_bit); in av1_fdct32_new_sse4_1()
146 btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19], in av1_fdct32_new_sse4_1()
147 buf0[28], cos_bit); in av1_fdct32_new_sse4_1()
148 btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20], in av1_fdct32_new_sse4_1()
149 buf0[27], cos_bit); in av1_fdct32_new_sse4_1()
150 btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21], in av1_fdct32_new_sse4_1()
151 buf0[26], cos_bit); in av1_fdct32_new_sse4_1()
152 buf0[22] = buf1[22]; in av1_fdct32_new_sse4_1()
153 buf0[23] = buf1[23]; in av1_fdct32_new_sse4_1()
154 buf0[24] = buf1[24]; in av1_fdct32_new_sse4_1()
155 buf0[25] = buf1[25]; in av1_fdct32_new_sse4_1()
156 buf0[30] = buf1[30]; in av1_fdct32_new_sse4_1()
157 buf0[31] = buf1[31]; in av1_fdct32_new_sse4_1()
161 btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0], buf1[1], in av1_fdct32_new_sse4_1()
163 btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2], buf1[3], in av1_fdct32_new_sse4_1()
165 buf1[4] = _mm_add_epi32(buf0[4], buf0[5]); in av1_fdct32_new_sse4_1()
166 buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]); in av1_fdct32_new_sse4_1()
167 buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]); in av1_fdct32_new_sse4_1()
168 buf1[7] = _mm_add_epi32(buf0[7], buf0[6]); in av1_fdct32_new_sse4_1()
169 buf1[8] = buf0[8]; in av1_fdct32_new_sse4_1()
170 btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9], in av1_fdct32_new_sse4_1()
172 btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10], in av1_fdct32_new_sse4_1()
174 buf1[11] = buf0[11]; in av1_fdct32_new_sse4_1()
175 buf1[12] = buf0[12]; in av1_fdct32_new_sse4_1()
176 buf1[15] = buf0[15]; in av1_fdct32_new_sse4_1()
177 buf1[16] = _mm_add_epi32(buf0[16], buf0[19]); in av1_fdct32_new_sse4_1()
178 buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]); in av1_fdct32_new_sse4_1()
179 buf1[17] = _mm_add_epi32(buf0[17], buf0[18]); in av1_fdct32_new_sse4_1()
180 buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]); in av1_fdct32_new_sse4_1()
181 buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]); in av1_fdct32_new_sse4_1()
182 buf1[23] = _mm_add_epi32(buf0[23], buf0[20]); in av1_fdct32_new_sse4_1()
183 buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]); in av1_fdct32_new_sse4_1()
184 buf1[22] = _mm_add_epi32(buf0[22], buf0[21]); in av1_fdct32_new_sse4_1()
185 buf1[24] = _mm_add_epi32(buf0[24], buf0[27]); in av1_fdct32_new_sse4_1()
186 buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]); in av1_fdct32_new_sse4_1()
187 buf1[25] = _mm_add_epi32(buf0[25], buf0[26]); in av1_fdct32_new_sse4_1()
188 buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]); in av1_fdct32_new_sse4_1()
189 buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]); in av1_fdct32_new_sse4_1()
190 buf1[31] = _mm_add_epi32(buf0[31], buf0[28]); in av1_fdct32_new_sse4_1()
191 buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]); in av1_fdct32_new_sse4_1()
192 buf1[30] = _mm_add_epi32(buf0[30], buf0[29]); in av1_fdct32_new_sse4_1()
196 buf0[0] = buf1[0]; in av1_fdct32_new_sse4_1()
197 buf0[1] = buf1[1]; in av1_fdct32_new_sse4_1()
198 buf0[2] = buf1[2]; in av1_fdct32_new_sse4_1()
199 buf0[3] = buf1[3]; in av1_fdct32_new_sse4_1()
200 btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7], in av1_fdct32_new_sse4_1()
202 btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5], buf0[6], in av1_fdct32_new_sse4_1()
204 buf0[8] = _mm_add_epi32(buf1[8], buf1[9]); in av1_fdct32_new_sse4_1()
205 buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]); in av1_fdct32_new_sse4_1()
206 buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]); in av1_fdct32_new_sse4_1()
207 buf0[11] = _mm_add_epi32(buf1[11], buf1[10]); in av1_fdct32_new_sse4_1()
208 buf0[12] = _mm_add_epi32(buf1[12], buf1[13]); in av1_fdct32_new_sse4_1()
209 buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]); in av1_fdct32_new_sse4_1()
210 buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]); in av1_fdct32_new_sse4_1()
211 buf0[15] = _mm_add_epi32(buf1[15], buf1[14]); in av1_fdct32_new_sse4_1()
212 buf0[16] = buf1[16]; in av1_fdct32_new_sse4_1()
213 btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17], in av1_fdct32_new_sse4_1()
214 buf0[30], cos_bit); in av1_fdct32_new_sse4_1()
215 btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18], in av1_fdct32_new_sse4_1()
216 buf0[29], cos_bit); in av1_fdct32_new_sse4_1()
217 buf0[19] = buf1[19]; in av1_fdct32_new_sse4_1()
218 buf0[20] = buf1[20]; in av1_fdct32_new_sse4_1()
219 btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21], in av1_fdct32_new_sse4_1()
220 buf0[26], cos_bit); in av1_fdct32_new_sse4_1()
221 btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22], in av1_fdct32_new_sse4_1()
222 buf0[25], cos_bit); in av1_fdct32_new_sse4_1()
223 buf0[23] = buf1[23]; in av1_fdct32_new_sse4_1()
224 buf0[24] = buf1[24]; in av1_fdct32_new_sse4_1()
225 buf0[27] = buf1[27]; in av1_fdct32_new_sse4_1()
226 buf0[28] = buf1[28]; in av1_fdct32_new_sse4_1()
227 buf0[31] = buf1[31]; in av1_fdct32_new_sse4_1()
231 buf1[0] = buf0[0]; in av1_fdct32_new_sse4_1()
232 buf1[1] = buf0[1]; in av1_fdct32_new_sse4_1()
233 buf1[2] = buf0[2]; in av1_fdct32_new_sse4_1()
234 buf1[3] = buf0[3]; in av1_fdct32_new_sse4_1()
235 buf1[4] = buf0[4]; in av1_fdct32_new_sse4_1()
236 buf1[5] = buf0[5]; in av1_fdct32_new_sse4_1()
237 buf1[6] = buf0[6]; in av1_fdct32_new_sse4_1()
238 buf1[7] = buf0[7]; in av1_fdct32_new_sse4_1()
239 btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8], buf1[15], in av1_fdct32_new_sse4_1()
241 btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9], in av1_fdct32_new_sse4_1()
243 btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10], in av1_fdct32_new_sse4_1()
245 btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11], in av1_fdct32_new_sse4_1()
247 buf1[16] = _mm_add_epi32(buf0[16], buf0[17]); in av1_fdct32_new_sse4_1()
248 buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]); in av1_fdct32_new_sse4_1()
249 buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]); in av1_fdct32_new_sse4_1()
250 buf1[19] = _mm_add_epi32(buf0[19], buf0[18]); in av1_fdct32_new_sse4_1()
251 buf1[20] = _mm_add_epi32(buf0[20], buf0[21]); in av1_fdct32_new_sse4_1()
252 buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]); in av1_fdct32_new_sse4_1()
253 buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]); in av1_fdct32_new_sse4_1()
254 buf1[23] = _mm_add_epi32(buf0[23], buf0[22]); in av1_fdct32_new_sse4_1()
255 buf1[24] = _mm_add_epi32(buf0[24], buf0[25]); in av1_fdct32_new_sse4_1()
256 buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]); in av1_fdct32_new_sse4_1()
257 buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]); in av1_fdct32_new_sse4_1()
258 buf1[27] = _mm_add_epi32(buf0[27], buf0[26]); in av1_fdct32_new_sse4_1()
259 buf1[28] = _mm_add_epi32(buf0[28], buf0[29]); in av1_fdct32_new_sse4_1()
260 buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]); in av1_fdct32_new_sse4_1()
261 buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]); in av1_fdct32_new_sse4_1()
262 buf1[31] = _mm_add_epi32(buf0[31], buf0[30]); in av1_fdct32_new_sse4_1()
266 buf0[0] = buf1[0]; in av1_fdct32_new_sse4_1()
267 buf0[1] = buf1[1]; in av1_fdct32_new_sse4_1()
268 buf0[2] = buf1[2]; in av1_fdct32_new_sse4_1()
269 buf0[3] = buf1[3]; in av1_fdct32_new_sse4_1()
270 buf0[4] = buf1[4]; in av1_fdct32_new_sse4_1()
271 buf0[5] = buf1[5]; in av1_fdct32_new_sse4_1()
272 buf0[6] = buf1[6]; in av1_fdct32_new_sse4_1()
273 buf0[7] = buf1[7]; in av1_fdct32_new_sse4_1()
274 buf0[8] = buf1[8]; in av1_fdct32_new_sse4_1()
275 buf0[9] = buf1[9]; in av1_fdct32_new_sse4_1()
276 buf0[10] = buf1[10]; in av1_fdct32_new_sse4_1()
277 buf0[11] = buf1[11]; in av1_fdct32_new_sse4_1()
278 buf0[12] = buf1[12]; in av1_fdct32_new_sse4_1()
279 buf0[13] = buf1[13]; in av1_fdct32_new_sse4_1()
280 buf0[14] = buf1[14]; in av1_fdct32_new_sse4_1()
281 buf0[15] = buf1[15]; in av1_fdct32_new_sse4_1()
282 btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16], in av1_fdct32_new_sse4_1()
283 buf0[31], cos_bit); in av1_fdct32_new_sse4_1()
284 btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17], in av1_fdct32_new_sse4_1()
285 buf0[30], cos_bit); in av1_fdct32_new_sse4_1()
286 btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18], in av1_fdct32_new_sse4_1()
287 buf0[29], cos_bit); in av1_fdct32_new_sse4_1()
288 btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19], in av1_fdct32_new_sse4_1()
289 buf0[28], cos_bit); in av1_fdct32_new_sse4_1()
290 btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20], in av1_fdct32_new_sse4_1()
291 buf0[27], cos_bit); in av1_fdct32_new_sse4_1()
292 btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21], in av1_fdct32_new_sse4_1()
293 buf0[26], cos_bit); in av1_fdct32_new_sse4_1()
294 btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22], in av1_fdct32_new_sse4_1()
295 buf0[25], cos_bit); in av1_fdct32_new_sse4_1()
296 btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23], in av1_fdct32_new_sse4_1()
297 buf0[24], cos_bit); in av1_fdct32_new_sse4_1()
300 output[0] = buf0[0]; in av1_fdct32_new_sse4_1()
301 output[1] = buf0[16]; in av1_fdct32_new_sse4_1()
302 output[2] = buf0[8]; in av1_fdct32_new_sse4_1()
303 output[3] = buf0[24]; in av1_fdct32_new_sse4_1()
304 output[4] = buf0[4]; in av1_fdct32_new_sse4_1()
305 output[5] = buf0[20]; in av1_fdct32_new_sse4_1()
306 output[6] = buf0[12]; in av1_fdct32_new_sse4_1()
307 output[7] = buf0[28]; in av1_fdct32_new_sse4_1()
308 output[8] = buf0[2]; in av1_fdct32_new_sse4_1()
309 output[9] = buf0[18]; in av1_fdct32_new_sse4_1()
310 output[10] = buf0[10]; in av1_fdct32_new_sse4_1()
311 output[11] = buf0[26]; in av1_fdct32_new_sse4_1()
312 output[12] = buf0[6]; in av1_fdct32_new_sse4_1()
313 output[13] = buf0[22]; in av1_fdct32_new_sse4_1()
314 output[14] = buf0[14]; in av1_fdct32_new_sse4_1()
315 output[15] = buf0[30]; in av1_fdct32_new_sse4_1()
316 output[16] = buf0[1]; in av1_fdct32_new_sse4_1()
317 output[17] = buf0[17]; in av1_fdct32_new_sse4_1()
318 output[18] = buf0[9]; in av1_fdct32_new_sse4_1()
319 output[19] = buf0[25]; in av1_fdct32_new_sse4_1()
320 output[20] = buf0[5]; in av1_fdct32_new_sse4_1()
321 output[21] = buf0[21]; in av1_fdct32_new_sse4_1()
322 output[22] = buf0[13]; in av1_fdct32_new_sse4_1()
323 output[23] = buf0[29]; in av1_fdct32_new_sse4_1()
324 output[24] = buf0[3]; in av1_fdct32_new_sse4_1()
325 output[25] = buf0[19]; in av1_fdct32_new_sse4_1()
326 output[26] = buf0[11]; in av1_fdct32_new_sse4_1()
327 output[27] = buf0[27]; in av1_fdct32_new_sse4_1()
328 output[28] = buf0[7]; in av1_fdct32_new_sse4_1()
329 output[29] = buf0[23]; in av1_fdct32_new_sse4_1()
330 output[30] = buf0[15]; in av1_fdct32_new_sse4_1()
331 output[31] = buf0[31]; in av1_fdct32_new_sse4_1()
339 __m128i buf0[4]; in av1_fadst4_new_sse4_1() local
349 buf0[j] = input[j * col_num + col]; in av1_fadst4_new_sse4_1()
354 buf1[0] = buf0[3]; in av1_fadst4_new_sse4_1()
355 buf1[1] = buf0[0]; in av1_fadst4_new_sse4_1()
356 buf1[2] = buf0[1]; in av1_fadst4_new_sse4_1()
357 buf1[3] = buf0[2]; in av1_fadst4_new_sse4_1()
363 btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1], in av1_fadst4_new_sse4_1()
365 btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2], in av1_fadst4_new_sse4_1()
366 buf0[3], cos_bit); in av1_fadst4_new_sse4_1()
370 buf1[0] = _mm_add_epi32(buf0[0], buf0[2]); in av1_fadst4_new_sse4_1()
371 buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]); in av1_fadst4_new_sse4_1()
372 buf1[1] = _mm_add_epi32(buf0[1], buf0[3]); in av1_fadst4_new_sse4_1()
373 buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]); in av1_fadst4_new_sse4_1()
379 buf0[0] = buf1[0]; in av1_fadst4_new_sse4_1()
380 buf0[1] = buf1[1]; in av1_fadst4_new_sse4_1()
381 btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2], in av1_fadst4_new_sse4_1()
382 buf0[3], cos_bit); in av1_fadst4_new_sse4_1()
386 buf1[0] = buf0[0]; in av1_fadst4_new_sse4_1()
387 buf1[1] = _mm_sub_epi32(_mm_setzero_si128(), buf0[2]); in av1_fadst4_new_sse4_1()
388 buf1[2] = buf0[3]; in av1_fadst4_new_sse4_1()
389 buf1[3] = _mm_sub_epi32(_mm_setzero_si128(), buf0[1]); in av1_fadst4_new_sse4_1()