1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_
12 #define VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_
13
14 #include <emmintrin.h>
15 //#include "vpx/vpx_integer.h"
16
17 #define pair_set_epi16(a, b) \
18 _mm_set1_epi32((int)((uint16_t)(a) | ((uint32_t)(b) << 16) ))
19
20 #define pair_set_epi32(a, b) \
21 _mm_set_epi32((int)(b), (int)(a), (int)(b), (int)(a))
22
23 #define dual_set_epi16(a, b) \
24 _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \
25 (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a))
26
27 #define octa_set_epi16(a, b, c, d, e, f, g, h) \
28 _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \
29 (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h))
30
dct_const_round_shift_sse2(const __m128i in)31 static INLINE __m128i dct_const_round_shift_sse2(const __m128i in) {
32 const __m128i t = _mm_add_epi32(in, _mm_set1_epi32(DCT_CONST_ROUNDING));
33 return _mm_srai_epi32(t, DCT_CONST_BITS);
34 }
35
idct_madd_round_shift_sse2(const __m128i in,const __m128i cospi)36 static INLINE __m128i idct_madd_round_shift_sse2(const __m128i in,
37 const __m128i cospi) {
38 const __m128i t = _mm_madd_epi16(in, cospi);
39 return dct_const_round_shift_sse2(t);
40 }
41
42 // Calculate the dot product between in0/1 and x and wrap to short.
idct_calc_wraplow_sse2(const __m128i in0,const __m128i in1,const __m128i x)43 static INLINE __m128i idct_calc_wraplow_sse2(const __m128i in0,
44 const __m128i in1,
45 const __m128i x) {
46 const __m128i t0 = idct_madd_round_shift_sse2(in0, x);
47 const __m128i t1 = idct_madd_round_shift_sse2(in1, x);
48 return _mm_packs_epi32(t0, t1);
49 }
50
51 // Multiply elements by constants and add them together.
butterfly(const __m128i in0,const __m128i in1,const int c0,const int c1,__m128i * const out0,__m128i * const out1)52 static INLINE void butterfly(const __m128i in0, const __m128i in1, const int c0,
53 const int c1, __m128i *const out0,
54 __m128i *const out1) {
55 const __m128i cst0 = pair_set_epi16(c0, -c1);
56 const __m128i cst1 = pair_set_epi16(c1, c0);
57 const __m128i lo = _mm_unpacklo_epi16(in0, in1);
58 const __m128i hi = _mm_unpackhi_epi16(in0, in1);
59 *out0 = idct_calc_wraplow_sse2(lo, hi, cst0);
60 *out1 = idct_calc_wraplow_sse2(lo, hi, cst1);
61 }
62
recon_and_store_16(const __m128i in0,const __m128i in1,uint8_t * const dest)63 static INLINE void recon_and_store_16(const __m128i in0, const __m128i in1,
64 uint8_t *const dest) {
65 const __m128i zero = _mm_setzero_si128();
66 const __m128i d = _mm_loadu_si128((__m128i *)dest);
67 const __m128i d0 = _mm_unpacklo_epi8(d, zero);
68 const __m128i d1 = _mm_unpackhi_epi8(d, zero);
69 const __m128i d2 = _mm_add_epi16(in0, d0);
70 const __m128i d3 = _mm_add_epi16(in1, d1);
71 const __m128i dd = _mm_packus_epi16(d2, d3);
72 _mm_storeu_si128((__m128i *)dest, dd);
73 }
74
75 #endif // VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_
76