1 /*  Copyright (C) 2013-2017  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ADD_SAT_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_ADD_SAT_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/expr.h>
17 #include <simdpp/detail/null/math.h>
18 #include <simdpp/detail/vector_array_macros.h>
19 
20 namespace simdpp {
21 namespace SIMDPP_ARCH_NAMESPACE {
22 namespace detail {
23 namespace insn {
24 
25 static SIMDPP_INL
i_add_sat(const int8<16> & a,const int8<16> & b)26 int8<16> i_add_sat(const int8<16>& a, const int8<16>& b)
27 {
28 #if SIMDPP_USE_NULL
29     return detail::null::add_sat(a, b);
30 #elif SIMDPP_USE_SSE2
31     return _mm_adds_epi8(a.native(), b.native());
32 #elif SIMDPP_USE_NEON
33     return vqaddq_s8(a.native(), b.native());
34 #elif SIMDPP_USE_ALTIVEC
35     return vec_adds(a.native(), b.native());
36 #elif SIMDPP_USE_MSA
37     return __msa_adds_s_b(a.native(), b.native());
38 #endif
39 }
40 
41 #if SIMDPP_USE_AVX2
42 static SIMDPP_INL
i_add_sat(const int8<32> & a,const int8<32> & b)43 int8<32> i_add_sat(const int8<32>& a, const int8<32>& b)
44 {
45     return _mm256_adds_epi8(a.native(), b.native());
46 }
47 #endif
48 
49 #if SIMDPP_USE_AVX512BW
50 static SIMDPP_INL
i_add_sat(const int8<64> & a,const int8<64> & b)51 int8<64> i_add_sat(const int8<64>& a, const int8<64>& b)
52 {
53     return _mm512_adds_epi8(a.native(), b.native());
54 }
55 #endif
56 
57 // -----------------------------------------------------------------------------
58 
59 static SIMDPP_INL
i_add_sat(const int16<8> & a,const int16<8> & b)60 int16<8> i_add_sat(const int16<8>& a, const int16<8>& b)
61 {
62 #if SIMDPP_USE_NULL
63     return detail::null::add_sat(a, b);
64 #elif SIMDPP_USE_SSE2
65     return _mm_adds_epi16(a.native(), b.native());
66 #elif SIMDPP_USE_NEON
67     return vqaddq_s16(a.native(), b.native());
68 #elif SIMDPP_USE_ALTIVEC
69     return vec_adds(a.native(), b.native());
70 #elif SIMDPP_USE_MSA
71     return __msa_adds_s_h(a.native(), b.native());
72 #endif
73 }
74 
75 #if SIMDPP_USE_AVX2
76 static SIMDPP_INL
i_add_sat(const int16<16> & a,const int16<16> & b)77 int16<16> i_add_sat(const int16<16>& a, const int16<16>& b)
78 {
79     return _mm256_adds_epi16(a.native(), b.native());
80 }
81 #endif
82 
83 #if SIMDPP_USE_AVX512BW
84 static SIMDPP_INL
i_add_sat(const int16<32> & a,const int16<32> & b)85 int16<32> i_add_sat(const int16<32>& a, const int16<32>& b)
86 {
87     return _mm512_adds_epi16(a.native(), b.native());
88 }
89 #endif
90 
91 // -----------------------------------------------------------------------------
92 
93 static SIMDPP_INL
i_add_sat(const uint8<16> & a,const uint8<16> & b)94 uint8<16> i_add_sat(const uint8<16>& a, const uint8<16>& b)
95 {
96 #if SIMDPP_USE_NULL
97     return detail::null::add_sat(a, b);
98 #elif SIMDPP_USE_SSE2
99     return _mm_adds_epu8(a.native(), b.native());
100 #elif SIMDPP_USE_NEON
101     return vqaddq_u8(a.native(), b.native());
102 #elif SIMDPP_USE_ALTIVEC
103     return vec_adds(a.native(), b.native());
104 #elif SIMDPP_USE_MSA
105     return __msa_adds_u_b(a.native(), b.native());
106 #endif
107 }
108 
109 #if SIMDPP_USE_AVX2
110 static SIMDPP_INL
i_add_sat(const uint8<32> & a,const uint8<32> & b)111 uint8<32> i_add_sat(const uint8<32>& a, const uint8<32>& b)
112 {
113     return _mm256_adds_epu8(a.native(), b.native());
114 }
115 #endif
116 
117 #if SIMDPP_USE_AVX512BW
118 static SIMDPP_INL
i_add_sat(const uint8<64> & a,const uint8<64> & b)119 uint8<64> i_add_sat(const uint8<64>& a, const uint8<64>& b)
120 {
121     return _mm512_adds_epu8(a.native(), b.native());
122 }
123 #endif
124 
125 // -----------------------------------------------------------------------------
126 
127 static SIMDPP_INL
i_add_sat(const uint16<8> & a,const uint16<8> & b)128 uint16<8> i_add_sat(const uint16<8>& a, const uint16<8>& b)
129 {
130 #if SIMDPP_USE_NULL
131     return detail::null::add_sat(a, b);
132 #elif SIMDPP_USE_SSE2
133     return _mm_adds_epu16(a.native(), b.native());
134 #elif SIMDPP_USE_NEON
135     return vqaddq_u16(a.native(), b.native());
136 #elif SIMDPP_USE_ALTIVEC
137     return vec_adds(a.native(), b.native());
138 #elif SIMDPP_USE_MSA
139     return __msa_adds_u_h(a.native(), b.native());
140 #endif
141 }
142 
143 #if SIMDPP_USE_AVX2
144 static SIMDPP_INL
i_add_sat(const uint16<16> & a,const uint16<16> & b)145 uint16<16> i_add_sat(const uint16<16>& a, const uint16<16>& b)
146 {
147     return _mm256_adds_epu16(a.native(), b.native());
148 }
149 #endif
150 
151 #if SIMDPP_USE_AVX512BW
152 static SIMDPP_INL
i_add_sat(const uint16<32> & a,const uint16<32> & b)153 uint16<32> i_add_sat(const uint16<32>& a, const uint16<32>& b)
154 {
155     return _mm512_adds_epu16(a.native(), b.native());
156 }
157 #endif
158 
159 template<class R, unsigned N, class E1, class E2> SIMDPP_INL
i_add_sat(const uint16<N> & a,const uint16<N> & b)160 uint16<N> i_add_sat(const uint16<N>& a, const uint16<N>& b)
161 {
162     SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, add_sat, a, b);
163 }
164 
165 // -----------------------------------------------------------------------------
166 
167 template<class V> SIMDPP_INL
i_add_sat(const V & a,const V & b)168 V i_add_sat(const V& a, const V& b)
169 {
170     SIMDPP_VEC_ARRAY_IMPL2(V, i_add_sat, a, b)
171 }
172 
173 } // namespace insn
174 } // namespace detail
175 } // namespace SIMDPP_ARCH_NAMESPACE
176 } // namespace simdpp
177 
178 #endif
179 
180