1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef VPX_DSP_MIPS_FWD_TXFM_MSA_H_ 12 #define VPX_DSP_MIPS_FWD_TXFM_MSA_H_ 13 14 #include "vpx_dsp/mips/txfm_macros_msa.h" 15 #include "vpx_dsp/txfm_common.h" 16 17 #define LD_HADD(psrc, stride) \ 18 ({ \ 19 v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m; \ 20 v4i32 vec_w_m; \ 21 \ 22 LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m); \ 23 ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m); \ 24 LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m); \ 25 ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m, in4_m, in6_m, \ 26 in0_m, in4_m); \ 27 in0_m += in4_m; \ 28 \ 29 vec_w_m = __msa_hadd_s_w(in0_m, in0_m); \ 30 HADD_SW_S32(vec_w_m); \ 31 }) 32 33 #define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) \ 34 { \ 35 v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \ 36 v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \ 37 v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \ 38 v8i16 coeff_m = { \ 39 cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, -cospi_8_64, 0, 0, 0 \ 40 }; \ 41 \ 42 BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m); \ 43 ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m); \ 44 SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m); \ 45 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 46 vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m); \ 47 \ 48 SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m); \ 49 cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m); \ 50 vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m); \ 51 \ 52 vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m); \ 53 cnst2_m = __msa_splati_h(coeff_m, 2); \ 54 cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m); \ 55 vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m); \ 56 \ 57 SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS); \ 58 PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m, vec7_m, \ 59 vec7_m, out0, out2, out1, out3); \ 60 } 61 62 #define SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7) \ 63 { \ 64 v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ 65 \ 66 SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15); \ 67 SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15); \ 68 AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3, in0, in1, \ 69 in2, in3); \ 70 AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7, in4, in5, \ 71 in6, in7); \ 72 } 73 74 #define VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ 75 out3, out4, out5, out6, out7) \ 76 { \ 77 v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \ 78 v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \ 79 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ 80 cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \ 81 \ 82 /* FDCT stage1 */ \ 83 BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m, \ 84 s3_m, s4_m, s5_m, s6_m, s7_m); \ 85 BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \ 86 ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \ 87 ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \ 88 SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \ 89 x1_m = __msa_ilvev_h(x1_m, x0_m); \ 90 out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 91 \ 92 SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \ 93 x2_m = -x2_m; \ 94 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 95 out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 96 \ 97 out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 98 x2_m = __msa_splati_h(coeff_m, 2); \ 99 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 100 out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 101 \ 102 /* stage2 */ \ 103 ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \ 104 \ 105 s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 106 s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 107 \ 108 /* stage3 */ \ 109 BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \ 110 \ 111 /* stage4 */ \ 112 ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \ 113 ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \ 114 \ 115 SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \ 116 x1_m = __msa_ilvev_h(x0_m, x1_m); \ 117 out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \ 118 \ 119 SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \ 120 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 121 out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 122 \ 123 x1_m = __msa_splati_h(coeff_m, 5); \ 124 x0_m = -x0_m; \ 125 x0_m = __msa_ilvev_h(x1_m, x0_m); \ 126 out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \ 127 \ 128 x2_m = __msa_splati_h(coeff_m, 6); \ 129 x3_m = -x3_m; \ 130 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 131 out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 132 } 133 134 #define FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ 135 out2, out3, out4, out5, out6, out7) \ 136 { \ 137 v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \ 138 v8i16 x0_m, x1_m, x2_m, x3_m; \ 139 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ 140 cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \ 141 \ 142 /* FDCT stage1 */ \ 143 BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m, \ 144 s3_m, s4_m, s5_m, s6_m, s7_m); \ 145 BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \ 146 ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \ 147 ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \ 148 SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \ 149 x1_m = __msa_ilvev_h(x1_m, x0_m); \ 150 out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 151 \ 152 SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \ 153 x2_m = -x2_m; \ 154 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 155 out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 156 \ 157 out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 158 x2_m = __msa_splati_h(coeff_m, 2); \ 159 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 160 out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 161 \ 162 /* stage2 */ \ 163 ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \ 164 \ 165 s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 166 s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 167 \ 168 /* stage3 */ \ 169 BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \ 170 \ 171 /* stage4 */ \ 172 ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \ 173 ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \ 174 \ 175 SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \ 176 x1_m = __msa_ilvev_h(x0_m, x1_m); \ 177 out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \ 178 \ 179 SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \ 180 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 181 out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 182 \ 183 x1_m = __msa_splati_h(coeff_m, 5); \ 184 x0_m = -x0_m; \ 185 x0_m = __msa_ilvev_h(x1_m, x0_m); \ 186 out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \ 187 \ 188 x2_m = __msa_splati_h(coeff_m, 6); \ 189 x3_m = -x3_m; \ 190 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 191 out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 192 } 193 194 #define FDCT8x16_ODD(input0, input1, input2, input3, input4, input5, input6, \ 195 input7, out1, out3, out5, out7, out9, out11, out13, \ 196 out15) \ 197 { \ 198 v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m; \ 199 v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m; \ 200 v8i16 stp36_m, stp37_m, vec0_m, vec1_m; \ 201 v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m; \ 202 v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m; \ 203 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ 204 -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 }; \ 205 v8i16 coeff1_m = { cospi_2_64, cospi_30_64, cospi_14_64, cospi_18_64, \ 206 cospi_10_64, cospi_22_64, cospi_6_64, cospi_26_64 }; \ 207 v8i16 coeff2_m = { \ 208 -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64, 0, 0, 0, 0 \ 209 }; \ 210 \ 211 /* stp 1 */ \ 212 ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m); \ 213 ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m); \ 214 \ 215 cnst4_m = __msa_splati_h(coeff_m, 0); \ 216 stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m); \ 217 \ 218 cnst5_m = __msa_splati_h(coeff_m, 1); \ 219 cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m); \ 220 stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m); \ 221 stp24_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m); \ 222 stp23_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m); \ 223 \ 224 /* stp2 */ \ 225 BUTTERFLY_4(input0, input1, stp22_m, stp23_m, stp30_m, stp31_m, stp32_m, \ 226 stp33_m); \ 227 BUTTERFLY_4(input7, input6, stp25_m, stp24_m, stp37_m, stp36_m, stp35_m, \ 228 stp34_m); \ 229 \ 230 ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m); \ 231 ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m); \ 232 \ 233 SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m); \ 234 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 235 stp26_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ 236 \ 237 cnst0_m = __msa_splati_h(coeff_m, 4); \ 238 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 239 stp21_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ 240 \ 241 SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m); \ 242 cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 243 stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \ 244 \ 245 cnst0_m = __msa_splati_h(coeff_m, 3); \ 246 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 247 stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \ 248 \ 249 /* stp4 */ \ 250 BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m, vec6_m, vec2_m, vec4_m, \ 251 vec5_m); \ 252 BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m, stp21_m, stp23_m, stp24_m, \ 253 stp31_m); \ 254 \ 255 ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m); \ 256 SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m); \ 257 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 258 \ 259 out1 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 260 \ 261 cnst0_m = __msa_splati_h(coeff2_m, 0); \ 262 cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 263 out15 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 264 \ 265 ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m); \ 266 SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ 267 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 268 \ 269 out9 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ 270 \ 271 cnst1_m = __msa_splati_h(coeff2_m, 2); \ 272 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 273 out7 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 274 \ 275 ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m); \ 276 SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m); \ 277 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 278 out5 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 279 \ 280 cnst0_m = __msa_splati_h(coeff2_m, 1); \ 281 cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 282 out11 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 283 \ 284 ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m); \ 285 SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m); \ 286 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 287 \ 288 out13 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ 289 \ 290 cnst1_m = __msa_splati_h(coeff2_m, 3); \ 291 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 292 out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 293 } 294 295 #define FDCT_POSTPROC_2V_NEG_H(vec0, vec1) \ 296 { \ 297 v8i16 tp0_m, tp1_m; \ 298 v8i16 one_m = __msa_ldi_h(1); \ 299 \ 300 tp0_m = __msa_clti_s_h(vec0, 0); \ 301 tp1_m = __msa_clti_s_h(vec1, 0); \ 302 vec0 += 1; \ 303 vec1 += 1; \ 304 tp0_m = one_m & tp0_m; \ 305 tp1_m = one_m & tp1_m; \ 306 vec0 += tp0_m; \ 307 vec1 += tp1_m; \ 308 vec0 >>= 2; \ 309 vec1 >>= 2; \ 310 } 311 312 #define FDCT32_POSTPROC_NEG_W(vec) \ 313 { \ 314 v4i32 temp_m; \ 315 v4i32 one_m = __msa_ldi_w(1); \ 316 \ 317 temp_m = __msa_clti_s_w(vec, 0); \ 318 vec += 1; \ 319 temp_m = one_m & temp_m; \ 320 vec += temp_m; \ 321 vec >>= 2; \ 322 } 323 324 #define FDCT32_POSTPROC_2V_POS_H(vec0, vec1) \ 325 { \ 326 v8i16 tp0_m, tp1_m; \ 327 v8i16 one = __msa_ldi_h(1); \ 328 \ 329 tp0_m = __msa_clei_s_h(vec0, 0); \ 330 tp1_m = __msa_clei_s_h(vec1, 0); \ 331 tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255); \ 332 tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255); \ 333 vec0 += 1; \ 334 vec1 += 1; \ 335 tp0_m = one & tp0_m; \ 336 tp1_m = one & tp1_m; \ 337 vec0 += tp0_m; \ 338 vec1 += tp1_m; \ 339 vec0 >>= 2; \ 340 vec1 >>= 2; \ 341 } 342 343 #define DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right, reg1_right, \ 344 const0, const1, out0, out1, out2, out3) \ 345 { \ 346 v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \ 347 v2i64 tp0_m, tp1_m, tp2_m, tp3_m; \ 348 v4i32 k0_m = __msa_fill_w((int32_t)const0); \ 349 \ 350 s0_m = __msa_fill_w((int32_t)const1); \ 351 k0_m = __msa_ilvev_w(s0_m, k0_m); \ 352 \ 353 ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m); \ 354 ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m); \ 355 ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m); \ 356 ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m); \ 357 \ 358 DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m); \ 359 DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m); \ 360 tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \ 361 tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \ 362 tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \ 363 tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \ 364 out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \ 365 out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \ 366 \ 367 DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m); \ 368 DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m); \ 369 tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \ 370 tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \ 371 tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \ 372 tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \ 373 out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \ 374 out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \ 375 } 376 377 void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr, 378 int32_t src_stride); 379 void fdct16x8_1d_row(int16_t *input, int16_t *output); 380 #endif // VPX_DSP_MIPS_FWD_TXFM_MSA_H_ 381