1 /*  Copyright (C) 2013-2014  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_CORE_I_MUL_H
9 #define LIBSIMDPP_SIMDPP_CORE_I_MUL_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/detail/expr/i_mul.h>
17 #include <simdpp/core/detail/get_expr_uint.h>
18 #include <simdpp/core/detail/scalar_arg_impl.h>
19 #include <simdpp/core/detail/get_expr_uint.h>
20 
21 namespace simdpp {
22 namespace SIMDPP_ARCH_NAMESPACE {
23 
24 // no 8 bit multiplications in SSE
25 /** Multiplies 16-bit values and returns the lower part of the multiplication
26 
27     @code
28     r0 = low(a0 * b0)
29     ...
30     rN = low(aN * bN)
31     @endcode
32 
33     @par 256-bit version:
34     @icost{SSE2-AVX, NEON, ALTIVEC, 2}
35 */
36 template<unsigned N, class V1, class V2> SIMDPP_INL
37 typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type
mul_lo(const any_int16<N,V1> & a,const any_int16<N,V2> & b)38         mul_lo(const any_int16<N,V1>& a,
39                const any_int16<N,V2>& b)
40 {
41     return { { a.wrapped(), b.wrapped() } };
42 }
43 
SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo,expr_mul_lo,any_int16,int16)44 SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int16, int16)
45 
46 /** Multiplies signed 16-bit values and returns the higher half of the result.
47 
48     @code
49     r0 = high(a0 * b0)
50     ...
51     rN = high(aN * bN)
52     @endcode
53 
54     @par 128-bit version:
55     @icost{NEON, ALTIVEC, 3}
56 
57     @par 256-bit version:
58     @icost{SSE2-AVX, 2}
59     @icost{NEON, ALTIVEC, 6}
60 */
61 template<unsigned N, class E1, class E2> SIMDPP_INL
62 int16<N, expr_mul_hi<int16<N,E1>,
63                      int16<N,E2>>> mul_hi(const int16<N,E1>& a,
64                                           const int16<N,E2>& b)
65 {
66     return { { a, b } };
67 }
68 
SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi,expr_mul_hi,int16,int16)69 SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, int16, int16)
70 
71 /** Multiplies unsigned 16-bit values and returns the higher half of the result.
72 
73     @code
74     r0 = high(a0 * b0)
75     ...
76     rN = high(aN * bN)
77     @endcode
78 
79     @par 128-bit version:
80     @icost{NEON, ALTIVEC, 3}
81 
82     @par 256-bit version:
83     @icost{SSE2-AVX, 2}
84     @icost{NEON, ALTIVEC, 6}
85 */
86 template<unsigned N, class E1, class E2> SIMDPP_INL
87 uint16<N, expr_mul_hi<uint16<N,E1>,
88                       uint16<N,E2>>> mul_hi(const uint16<N,E1>& a,
89                                             const uint16<N,E2>& b)
90 {
91     return { { a, b } };
92 }
93 
SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi,expr_mul_hi,uint16,uint16)94 SIMDPP_SCALAR_ARG_IMPL_EXPR(mul_hi, expr_mul_hi, uint16, uint16)
95 
96 
97 /** Multiplies 32-bit values and returns the lower half of the result.
98 
99     @code
100     r0 = low(a0 * b0)
101     ...
102     rN = low(aN * bN)
103     @endcode
104 
105     @par 128-bit version:
106     @icost{SSE2-SSSE3, 6}
107     @icost{ALTIVEC, 8}
108 
109     @par 256-bit version:
110     @icost{SSE2-SSSE3, 12}
111     @icost{SSE4.1, AVX, NEON, 2}
112     @icost{ALTIVEC, 16}
113 */
114 template<unsigned N, class V1, class V2> SIMDPP_INL
115 typename detail::get_expr_uint<expr_mul_lo, V1, V2>::type
116         mul_lo(const any_int32<N,V1>& a,
117                const any_int32<N,V2>& b)
118 {
119     return { { a.wrapped(), b.wrapped() } };
120 }
121 
122 SIMDPP_SCALAR_ARG_IMPL_INT_UNSIGNED(mul_lo, expr_mul_lo, any_int32, int32)
123 
124 
125 } // namespace SIMDPP_ARCH_NAMESPACE
126 } // namespace simdpp
127 
128 #endif
129 
130