1/*
2 * Copyright (C) 2014 the FFLAS-FFPACK group
3 *
4 * Written by   Bastien Vialla<bastien.vialla@lirmm.fr>
5 * Brice Boyer (briceboyer) <boyer.brice@gmail.com>
6 *
7 *
8 * ========LICENCE========
9 * This file is part of the library FFLAS-FFPACK.
10 *
11 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
12 * it under the terms of the  GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
24 * ========LICENCE========
25 *.
26 */
27
28#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_INL
29#define __FFLASFFPACK_fflas_ffpack_utils_simd128_INL
30
31struct Simd128fp_base {
32
33    /* Name of the Simd struct */
34    static inline const std::string type_string () { return "Simd128"; }
35
36
37};
38
39struct Simd128i_base {
40
41    /*
42     * alias to 128 bit simd register
43     */
44    using vect_t = __m128i;
45
46    /* Name of the Simd struct */
47    static inline const std::string type_string () { return "Simd128"; }
48
49    /*
50     *  Return vector of type vect_t with all elements set to zero
51     *  Return [0, ...,0]
52     */
53    static INLINE CONST vect_t zero() { return _mm_setzero_si128(); }
54
55    /*
56     * Shift packed 128-bit integers in a left by s bits while shifting in zeros, and store the results in vect_t.
57     * Args   : [a0] int128_t
58     * Return : [a0 << (s*8)] int128_t
59     */
60    template<uint8_t s>
61    static INLINE CONST vect_t sll128(const vect_t a) { return _mm_slli_si128(a, s); }
62
63    /*
64     * Shift packed 128-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
65     * Args   : [a0] int128_t
66     * Return : [a0 >> (s*8)] int128_t
67     */
68    template<uint8_t s>
69    static INLINE CONST vect_t srl128(const vect_t a) { return _mm_srli_si128(a, s); }
70
71    /*
72     * Compute the bitwise AND and store the results in vect_t.
73     * Args   : [a0, ..., a127]
74     *		   [b0, ..., b127]
75     * Return : [a0 AND b0, ..., a127 AND b127]
76     */
77    static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm_and_si128(b, a); }
78
79    /*
80     * Compute the bitwise OR and store the results in vect_t.
81     * Args   : [a0, ..., a127]
82     *		   [b0, ..., b127]
83     * Return : [a0 OR b0, ..., a127 OR b127]
84     */
85    static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm_or_si128(b, a); }
86
87    /*
88     * Compute the bitwise XOR and store the results in vect_t.
89     * Args   : [a0, ..., a127]
90     *		   [b0, ..., b127]
91     * Return : [a0 XOR b0, ..., a127 XOR b127]
92     */
93    static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm_xor_si128(b, a); }
94
95    /*
96     * Compute the bitwise NOT AND and store the results in vect_t.
97     * Args   : [a0, ..., a127]
98     *		   [b0, ..., b127]
99     * Return : [NOT(a0) AND b0, ..., NOT(a127) AND b127]
100     */
101    static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm_andnot_si128(a, b); }
102
103};
104
105template <bool ArithType, bool Int, bool Signed, int Size> struct Simd128_impl;
106
107template <class T>
108using Simd128 =
109Simd128_impl<std::is_arithmetic<T>::value, std::is_integral<T>::value, std::is_signed<T>::value, sizeof(T)>;
110
111#include "simd128_float.inl"
112#include "simd128_double.inl"
113
114#ifdef SIMD_INT
115// Trop d'instructions SSE manquantes pour les int8_t
116
117#include "simd128_int16.inl"
118#include "simd128_int32.inl"
119#ifdef __x86_64__
120#include "simd128_int64.inl"
121#endif
122#endif //#ifdef SIMD_INT
123
124#endif // __FFLASFFPACK_fflas_ffpack_utils_simd128_INL
125/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
126// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
127