1 /*  Copyright (C) 2013-2017  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SUB_SAT_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_SUB_SAT_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/detail/null/math.h>
17 #include <simdpp/detail/vector_array_macros.h>
18 
19 namespace simdpp {
20 namespace SIMDPP_ARCH_NAMESPACE {
21 namespace detail {
22 namespace insn {
23 
24 static SIMDPP_INL
i_sub_sat(const int8<16> & a,const int8<16> & b)25 int8<16> i_sub_sat(const int8<16>& a, const int8<16>& b)
26 {
27 #if SIMDPP_USE_NULL
28     return detail::null::sub_sat(a, b);
29 #elif SIMDPP_USE_SSE2
30     return _mm_subs_epi8(a.native(), b.native());
31 #elif SIMDPP_USE_NEON
32     return vqsubq_s8(a.native(), b.native());
33 #elif SIMDPP_USE_ALTIVEC
34     return vec_subs(a.native(), b.native());
35 #elif SIMDPP_USE_MSA
36     return __msa_subs_s_b(a.native(), b.native());
37 #endif
38 }
39 
40 #if SIMDPP_USE_AVX2
41 static SIMDPP_INL
i_sub_sat(const int8<32> & a,const int8<32> & b)42 int8<32> i_sub_sat(const int8<32>& a, const int8<32>& b)
43 {
44     return _mm256_subs_epi8(a.native(), b.native());
45 }
46 #endif
47 
48 #if SIMDPP_USE_AVX512BW
49 static SIMDPP_INL
i_sub_sat(const int8<64> & a,const int8<64> & b)50 int8<64> i_sub_sat(const int8<64>& a, const int8<64>& b)
51 {
52     return _mm512_subs_epi8(a.native(), b.native());
53 }
54 #endif
55 
56 // -----------------------------------------------------------------------------
57 
58 static SIMDPP_INL
i_sub_sat(const int16<8> & a,const int16<8> & b)59 int16<8> i_sub_sat(const int16<8>& a, const int16<8>& b)
60 {
61 #if SIMDPP_USE_NULL
62     return detail::null::sub_sat(a, b);
63 #elif SIMDPP_USE_SSE2
64     return _mm_subs_epi16(a.native(), b.native());
65 #elif SIMDPP_USE_NEON
66     return vqsubq_s16(a.native(), b.native());
67 #elif SIMDPP_USE_ALTIVEC
68     return vec_subs(a.native(), b.native());
69 #elif SIMDPP_USE_MSA
70     return __msa_subs_s_h(a.native(), b.native());
71 #endif
72 }
73 
74 #if SIMDPP_USE_AVX2
75 static SIMDPP_INL
i_sub_sat(const int16<16> & a,const int16<16> & b)76 int16<16> i_sub_sat(const int16<16>& a, const int16<16>& b)
77 {
78     return _mm256_subs_epi16(a.native(), b.native());
79 }
80 #endif
81 
82 #if SIMDPP_USE_AVX512BW
83 static SIMDPP_INL
i_sub_sat(const int16<32> & a,const int16<32> & b)84 int16<32> i_sub_sat(const int16<32>& a, const int16<32>& b)
85 {
86     return _mm512_subs_epi16(a.native(), b.native());
87 }
88 #endif
89 
90 // -----------------------------------------------------------------------------
91 
92 static SIMDPP_INL
i_sub_sat(const uint8<16> & a,const uint8<16> & b)93 uint8<16> i_sub_sat(const uint8<16>& a, const uint8<16>& b)
94 {
95 #if SIMDPP_USE_NULL
96     return detail::null::sub_sat(a, b);
97 #elif SIMDPP_USE_SSE2
98     return _mm_subs_epu8(a.native(), b.native());
99 #elif SIMDPP_USE_NEON
100     return vqsubq_u8(a.native(), b.native());
101 #elif SIMDPP_USE_ALTIVEC
102     return vec_subs(a.native(), b.native());
103 #elif SIMDPP_USE_MSA
104     return __msa_subs_u_b(a.native(), b.native());
105 #endif
106 }
107 
108 #if SIMDPP_USE_AVX2
109 static SIMDPP_INL
i_sub_sat(const uint8<32> & a,const uint8<32> & b)110 uint8<32> i_sub_sat(const uint8<32>& a, const uint8<32>& b)
111 {
112     return _mm256_subs_epu8(a.native(), b.native());
113 }
114 #endif
115 
116 #if SIMDPP_USE_AVX512BW
117 static SIMDPP_INL
i_sub_sat(const uint8<64> & a,const uint8<64> & b)118 uint8<64> i_sub_sat(const uint8<64>& a, const uint8<64>& b)
119 {
120     return _mm512_subs_epu8(a.native(), b.native());
121 }
122 #endif
123 
124 // -----------------------------------------------------------------------------
125 
126 static SIMDPP_INL
i_sub_sat(const uint16<8> & a,const uint16<8> & b)127 uint16<8> i_sub_sat(const uint16<8>& a, const uint16<8>& b)
128 {
129 #if SIMDPP_USE_NULL
130     return detail::null::sub_sat(a, b);
131 #elif SIMDPP_USE_SSE2
132     return _mm_subs_epu16(a.native(), b.native());
133 #elif SIMDPP_USE_NEON
134     return vqsubq_u16(a.native(), b.native());
135 #elif SIMDPP_USE_ALTIVEC
136     return vec_subs(a.native(), b.native());
137 #elif SIMDPP_USE_MSA
138     return __msa_subs_u_h(a.native(), b.native());
139 #endif
140 }
141 
142 #if SIMDPP_USE_AVX2
143 static SIMDPP_INL
i_sub_sat(const uint16<16> & a,const uint16<16> & b)144 uint16<16> i_sub_sat(const uint16<16>& a, const uint16<16>& b)
145 {
146     return _mm256_subs_epu16(a.native(), b.native());
147 }
148 #endif
149 
150 #if SIMDPP_USE_AVX512BW
151 static SIMDPP_INL
i_sub_sat(const uint16<32> & a,const uint16<32> & b)152 uint16<32> i_sub_sat(const uint16<32>& a, const uint16<32>& b)
153 {
154     return _mm512_subs_epu16(a.native(), b.native());
155 }
156 #endif
157 
158 // -----------------------------------------------------------------------------
159 
160 template<class V> SIMDPP_INL
i_sub_sat(const V & a,const V & b)161 V i_sub_sat(const V& a, const V& b)
162 {
163     SIMDPP_VEC_ARRAY_IMPL2(V, i_sub_sat, a, b)
164 }
165 
166 } // namespace insn
167 } // namespace detail
168 } // namespace SIMDPP_ARCH_NAMESPACE
169 } // namespace simdpp
170 
171 #endif
172 
173