1 /* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ADD_SAT_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ADD_SAT_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/expr.h>
17 #include <simdpp/detail/null/math.h>
18 #include <simdpp/detail/vector_array_macros.h>
19
20 namespace simdpp {
21 namespace SIMDPP_ARCH_NAMESPACE {
22 namespace detail {
23 namespace insn {
24
25 static SIMDPP_INL
i_add_sat(const int8<16> & a,const int8<16> & b)26 int8<16> i_add_sat(const int8<16>& a, const int8<16>& b)
27 {
28 #if SIMDPP_USE_NULL
29 return detail::null::add_sat(a, b);
30 #elif SIMDPP_USE_SSE2
31 return _mm_adds_epi8(a.native(), b.native());
32 #elif SIMDPP_USE_NEON
33 return vqaddq_s8(a.native(), b.native());
34 #elif SIMDPP_USE_ALTIVEC
35 return vec_adds(a.native(), b.native());
36 #elif SIMDPP_USE_MSA
37 return __msa_adds_s_b(a.native(), b.native());
38 #endif
39 }
40
41 #if SIMDPP_USE_AVX2
42 static SIMDPP_INL
i_add_sat(const int8<32> & a,const int8<32> & b)43 int8<32> i_add_sat(const int8<32>& a, const int8<32>& b)
44 {
45 return _mm256_adds_epi8(a.native(), b.native());
46 }
47 #endif
48
49 #if SIMDPP_USE_AVX512BW
50 static SIMDPP_INL
i_add_sat(const int8<64> & a,const int8<64> & b)51 int8<64> i_add_sat(const int8<64>& a, const int8<64>& b)
52 {
53 return _mm512_adds_epi8(a.native(), b.native());
54 }
55 #endif
56
57 // -----------------------------------------------------------------------------
58
59 static SIMDPP_INL
i_add_sat(const int16<8> & a,const int16<8> & b)60 int16<8> i_add_sat(const int16<8>& a, const int16<8>& b)
61 {
62 #if SIMDPP_USE_NULL
63 return detail::null::add_sat(a, b);
64 #elif SIMDPP_USE_SSE2
65 return _mm_adds_epi16(a.native(), b.native());
66 #elif SIMDPP_USE_NEON
67 return vqaddq_s16(a.native(), b.native());
68 #elif SIMDPP_USE_ALTIVEC
69 return vec_adds(a.native(), b.native());
70 #elif SIMDPP_USE_MSA
71 return __msa_adds_s_h(a.native(), b.native());
72 #endif
73 }
74
75 #if SIMDPP_USE_AVX2
76 static SIMDPP_INL
i_add_sat(const int16<16> & a,const int16<16> & b)77 int16<16> i_add_sat(const int16<16>& a, const int16<16>& b)
78 {
79 return _mm256_adds_epi16(a.native(), b.native());
80 }
81 #endif
82
83 #if SIMDPP_USE_AVX512BW
84 static SIMDPP_INL
i_add_sat(const int16<32> & a,const int16<32> & b)85 int16<32> i_add_sat(const int16<32>& a, const int16<32>& b)
86 {
87 return _mm512_adds_epi16(a.native(), b.native());
88 }
89 #endif
90
91 // -----------------------------------------------------------------------------
92
93 static SIMDPP_INL
i_add_sat(const uint8<16> & a,const uint8<16> & b)94 uint8<16> i_add_sat(const uint8<16>& a, const uint8<16>& b)
95 {
96 #if SIMDPP_USE_NULL
97 return detail::null::add_sat(a, b);
98 #elif SIMDPP_USE_SSE2
99 return _mm_adds_epu8(a.native(), b.native());
100 #elif SIMDPP_USE_NEON
101 return vqaddq_u8(a.native(), b.native());
102 #elif SIMDPP_USE_ALTIVEC
103 return vec_adds(a.native(), b.native());
104 #elif SIMDPP_USE_MSA
105 return __msa_adds_u_b(a.native(), b.native());
106 #endif
107 }
108
109 #if SIMDPP_USE_AVX2
110 static SIMDPP_INL
i_add_sat(const uint8<32> & a,const uint8<32> & b)111 uint8<32> i_add_sat(const uint8<32>& a, const uint8<32>& b)
112 {
113 return _mm256_adds_epu8(a.native(), b.native());
114 }
115 #endif
116
117 #if SIMDPP_USE_AVX512BW
118 static SIMDPP_INL
i_add_sat(const uint8<64> & a,const uint8<64> & b)119 uint8<64> i_add_sat(const uint8<64>& a, const uint8<64>& b)
120 {
121 return _mm512_adds_epu8(a.native(), b.native());
122 }
123 #endif
124
125 // -----------------------------------------------------------------------------
126
127 static SIMDPP_INL
i_add_sat(const uint16<8> & a,const uint16<8> & b)128 uint16<8> i_add_sat(const uint16<8>& a, const uint16<8>& b)
129 {
130 #if SIMDPP_USE_NULL
131 return detail::null::add_sat(a, b);
132 #elif SIMDPP_USE_SSE2
133 return _mm_adds_epu16(a.native(), b.native());
134 #elif SIMDPP_USE_NEON
135 return vqaddq_u16(a.native(), b.native());
136 #elif SIMDPP_USE_ALTIVEC
137 return vec_adds(a.native(), b.native());
138 #elif SIMDPP_USE_MSA
139 return __msa_adds_u_h(a.native(), b.native());
140 #endif
141 }
142
143 #if SIMDPP_USE_AVX2
144 static SIMDPP_INL
i_add_sat(const uint16<16> & a,const uint16<16> & b)145 uint16<16> i_add_sat(const uint16<16>& a, const uint16<16>& b)
146 {
147 return _mm256_adds_epu16(a.native(), b.native());
148 }
149 #endif
150
151 #if SIMDPP_USE_AVX512BW
152 static SIMDPP_INL
i_add_sat(const uint16<32> & a,const uint16<32> & b)153 uint16<32> i_add_sat(const uint16<32>& a, const uint16<32>& b)
154 {
155 return _mm512_adds_epu16(a.native(), b.native());
156 }
157 #endif
158
159 template<class R, unsigned N, class E1, class E2> SIMDPP_INL
i_add_sat(const uint16<N> & a,const uint16<N> & b)160 uint16<N> i_add_sat(const uint16<N>& a, const uint16<N>& b)
161 {
162 SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, add_sat, a, b);
163 }
164
165 // -----------------------------------------------------------------------------
166
167 template<class V> SIMDPP_INL
i_add_sat(const V & a,const V & b)168 V i_add_sat(const V& a, const V& b)
169 {
170 SIMDPP_VEC_ARRAY_IMPL2(V, i_add_sat, a, b)
171 }
172
173 } // namespace insn
174 } // namespace detail
175 } // namespace SIMDPP_ARCH_NAMESPACE
176 } // namespace simdpp
177
178 #endif
179
180