1 /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_CORE_I_AVG_H
9 #define LIBSIMDPP_SIMDPP_CORE_I_AVG_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/detail/insn/i_avg.h>
17 #include <simdpp/core/detail/scalar_arg_impl.h>
18
19 namespace simdpp {
20 namespace SIMDPP_ARCH_NAMESPACE {
21
22
23 /** Computes rounded average of the unsigned 8-bit values.
24
25 @code
26 r0 = (a0 + b0 + 1) / 2
27 ...
28 rN = (aN + bN + 1) / 2
29 @endcode
30
31 @par 256-bit version:
32 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
33 */
34 template<unsigned N, class E1, class E2> SIMDPP_INL
avg(const uint8<N,E1> & a,const uint8<N,E2> & b)35 uint8<N,expr_empty> avg(const uint8<N,E1>& a, const uint8<N,E2>& b)
36 {
37 return detail::insn::i_avg(a.eval(), b.eval());
38 }
39
SIMDPP_SCALAR_ARG_IMPL_VEC(avg,uint8,uint8)40 SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint8, uint8)
41
42 /** Computes rounded average of signed 8-bit values.
43
44 @code
45 r0 = (a0 + b0 + 1) / 2
46 ...
47 rN = (aN + bN + 1) / 2
48 @endcode
49
50 @par 128-bit version:
51 @icost{SSE2-AVX2, 4-5}
52
53 @par 256-bit version:
54 @icost{SSE2-AVX, 8-9}
55 @icost{AVX2, 4-5}
56 @icost{NEON, ALTIVEC, 2}
57 */
58 template<unsigned N, class E1, class E2> SIMDPP_INL
59 int8<N,expr_empty> avg(const int8<N,E1>& a, const int8<N,E2>& b)
60 {
61 return detail::insn::i_avg(a.eval(), b.eval());
62 }
63
SIMDPP_SCALAR_ARG_IMPL_VEC(avg,int8,int8)64 SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int8, int8)
65
66 /** Computes rounded average of unsigned 16-bit values.
67
68 @code
69 r0 = (a0 + b0 + 1) / 2
70 ...
71 rN = (aN + bN + 1) / 2
72 @endcode
73
74 @par 256-bit version:
75 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
76 */
77 template<unsigned N, class E1, class E2> SIMDPP_INL
78 uint16<N,expr_empty> avg(const uint16<N,E1>& a, const uint16<N,E2>& b)
79 {
80 return detail::insn::i_avg(a.eval(), b.eval());
81 }
82
SIMDPP_SCALAR_ARG_IMPL_VEC(avg,uint16,uint16)83 SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint16, uint16)
84
85 /** Computes rounded average of signed 16-bit values.
86
87 @code
88 r0 = (a0 + b0 + 1) / 2
89 ...
90 rN = (aN + bN + 1) / 2
91 @endcode
92
93 @par 128-bit version:
94 @icost{SSE2-AVX2, 4-5}
95
96 @par 256-bit version:
97 @icost{SSE2-AVX, 8-9}
98 @icost{AVX2, 4-5}
99 @icost{NEON, ALTIVEC, 2}
100 */
101 template<unsigned N, class E1, class E2> SIMDPP_INL
102 int16<N,expr_empty> avg(const int16<N,E1>& a, const int16<N,E2>& b)
103 {
104 return detail::insn::i_avg(a.eval(), b.eval());
105 }
106
SIMDPP_SCALAR_ARG_IMPL_VEC(avg,int16,int16)107 SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int16, int16)
108
109 /** Computes rounded average of unsigned 32-bit values.
110
111 @code
112 r0 = (a0 + b0 + 1) / 2
113 ...
114 rN = (aN + bN + 1) / 2
115 @endcode
116
117 @par 128-bit version:
118 @icost{SSE2-AVX2, 6-7}
119
120 @par 256-bit version:
121 @icost{SSE2-AVX, 12-13}
122 @icost{AVX2, 6-7}
123 @icost{NEON, ALTIVEC, 2}
124 */
125 template<unsigned N, class E1, class E2> SIMDPP_INL
126 uint32<N,expr_empty> avg(const uint32<N,E1>& a, const uint32<N,E2>& b)
127 {
128 return detail::insn::i_avg(a.eval(), b.eval());
129 }
130
SIMDPP_SCALAR_ARG_IMPL_VEC(avg,uint32,uint32)131 SIMDPP_SCALAR_ARG_IMPL_VEC(avg, uint32, uint32)
132
133 /** Computes rounded average of signed 32-bit values.
134
135 @code
136 r0 = (a0 + b0 + 1) / 2
137 ...
138 rN = (aN + bN + 1) / 2
139 @endcode
140
141 @par 128-bit version:
142 @icost{SSE2-AVX2, 9-10}
143 @icost{NEON, 1}
144
145 @par 256-bit version:
146 @icost{SSE2-AVX, 18-19}
147 @icost{AVX2, 9-10}
148 @icost{NEON, ALTIVEC, 2}
149 */
150 template<unsigned N, class E1, class E2> SIMDPP_INL
151 int32<N,expr_empty> avg(const int32<N,E1>& a, const int32<N,E2>& b)
152 {
153 return detail::insn::i_avg(a.eval(), b.eval());
154 }
155
156 SIMDPP_SCALAR_ARG_IMPL_VEC(avg, int32, int32)
157
158 } // namespace SIMDPP_ARCH_NAMESPACE
159 } // namespace simdpp
160
161 #endif
162
163