1 /*
2 * Copyright(c) 2019 Intel Corporation
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10 */
11
12 #ifndef EbHighbdIntraPrediction_SSE2_h
13 #define EbHighbdIntraPrediction_SSE2_h
14
15 #include <emmintrin.h>
16 #include "EbDefinitions.h"
17 #include "common_dsp_rtcd.h"
18
dc_sum_4x32bit(const __m128i src)19 static INLINE __m128i dc_sum_4x32bit(const __m128i src) {
20 __m128i sum, sum_hi;
21 sum_hi = _mm_srli_si128(src, 8);
22 sum = _mm_add_epi32(src, sum_hi);
23 sum_hi = _mm_srli_si128(sum, 4);
24 return _mm_add_epi32(sum, sum_hi);
25 }
26
dc_sum_4x16bit(const __m128i src)27 static INLINE __m128i dc_sum_4x16bit(const __m128i src) {
28 __m128i sum, sum_hi;
29 const __m128i src_hi = _mm_srli_si128(src, 4);
30 sum = _mm_add_epi16(src, src_hi);
31 sum_hi = _mm_srli_si128(sum, 2);
32 sum = _mm_add_epi16(sum, sum_hi);
33
34 return sum;
35 }
36
dc_sum_4x16bit_large(const __m128i src)37 static INLINE __m128i dc_sum_4x16bit_large(const __m128i src) {
38 // Unpack to avoid 12-bit overflow.
39 const __m128i src_32 = _mm_unpacklo_epi16(src, _mm_setzero_si128());
40 return dc_sum_4x32bit(src_32);
41 }
42
dc_sum_8x16bit(const __m128i src)43 static INLINE __m128i dc_sum_8x16bit(const __m128i src) {
44 const __m128i src_hi = _mm_srli_si128(src, 8);
45 const __m128i sum = _mm_add_epi16(src, src_hi);
46 return dc_sum_4x16bit(sum);
47 }
48
dc_sum_8x16bit_large(const __m128i src)49 static INLINE __m128i dc_sum_8x16bit_large(const __m128i src) {
50 const __m128i src_hi = _mm_srli_si128(src, 8);
51 const __m128i sum = _mm_add_epi16(src, src_hi);
52 return dc_sum_4x16bit_large(sum);
53 }
54
dc_sum_4(const uint16_t * const src)55 static INLINE __m128i dc_sum_4(const uint16_t *const src) {
56 const __m128i s = _mm_loadl_epi64((const __m128i *)src);
57 return dc_sum_4x16bit(s);
58 }
59
dc_sum_8(const uint16_t * const src)60 static INLINE __m128i dc_sum_8(const uint16_t *const src) {
61 const __m128i s = _mm_loadu_si128((const __m128i *)src);
62 return dc_sum_8x16bit(s);
63 }
64
65 #endif // EbHighbdIntraPrediction_SSE2_h
66