1 /* This file is part of the gf2x library.
2
3 Copyright 2010, 2013, 2015
4 Richard Brent, Pierrick Gaudry, Emmanuel Thome', Paul Zimmermann
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms of either:
8 - If the archive contains a file named toom-gpl.c (not a trivial
9 placeholder), the GNU General Public License as published by the Free
10 Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 - If the archive contains a file named toom-gpl.c which is a trivial
13 placeholder, the GNU Lesser General Public License as published by
14 the Free Software Foundation; either version 2.1 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 FITNESS FOR A PARTICULAR PURPOSE. See the license text for more details.
20
21 You should have received a copy of the GNU General Public License as
22 well as the GNU Lesser General Public License along with this program;
23 see the files COPYING and COPYING.LIB. If not, write to the Free
24 Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 02110-1301, USA.
26 */
27
28 /* Implements 128x128 -> 256 bit product using pclmulqdq instruction. */
29
30 #ifndef GF2X_MUL2_H_
31 #define GF2X_MUL2_H_
32
33 #include "gf2x.h"
34 /* All gf2x source files for lowlevel functions must include gf2x-small.h
35 * This is mandatory for the tuning mechanism. */
36 #include "gf2x/gf2x-small.h"
37
38 #if GF2X_WORDSIZE != 64
39 #error "This code is for 64-bit only"
40 #endif
41
42 #ifndef GF2X_HAVE_PCLMUL_SUPPORT
43 #error "This code needs pclmul support"
44 #endif
45
46 GF2X_STORAGE_CLASS_mul2
47 #if defined(CARRY) && defined(BORROW)
48 #error "internal error, mul2cl.c can't be included with both BORROW and CARRY!"
49 #endif
50
51 #if defined(CARRY)
52 /* {t, 4} <- {s1, 2} * {s2, 2}, and {c, 2} <- {s1+1, 1} * {s2+1, 1} */
53 void
gf2x_mul2c(unsigned long * t,unsigned long const * s1,unsigned long const * s2,unsigned long * c)54 gf2x_mul2c (unsigned long *t, unsigned long const *s1, unsigned long const *s2,
55 unsigned long *c)
56 #elif defined(BORROW)
57 void
58 /* {t, 4} <- {s1, 2} * {s2, 2}, knowing {c, 2} = {s1+1, 1} * {s2+1, 1} */
59 gf2x_mul2b (unsigned long *t, unsigned long const *s1, unsigned long const *s2,
60 unsigned long const *c)
61 #else
62 void gf2x_mul2(unsigned long * t, unsigned long const * s1,
63 unsigned long const * s2)
64 #endif
65 {
66 #define PXOR(lop, rop) _mm_xor_si128((lop), (rop))
67 #define PZERO _mm_setzero_si128()
68 __m128i ss1 = _mm_loadu_si128((__m128i *)s1);
69 __m128i ss2 = _mm_loadu_si128((__m128i *)s2);
70
71
72 __m128i t00 = _mm_clmulepi64_si128(ss1, ss2, 0);
73 #ifndef BORROW
74 __m128i t11 = _mm_clmulepi64_si128(ss1, ss2, 0x11);
75 #endif
76
77 ss1 = PXOR(ss1, _mm_shuffle_epi32(ss1, _MM_SHUFFLE(1,0,3,2)));
78 ss2 = PXOR(ss2, _mm_shuffle_epi32(ss2, _MM_SHUFFLE(1,0,3,2)));
79
80 __m128i tk = _mm_clmulepi64_si128(ss1, ss2, 0);
81
82 #ifndef BORROW
83 tk = PXOR(tk, PXOR(t00, t11));
84 #endif
85
86 /* store result */
87 #if defined(BORROW)
88 tk = PXOR(tk, PXOR(t00, _mm_loadu_si128((__m128i*)c)));
89 _mm_storeu_si128((__m128i *)(t), PXOR(t00, _mm_unpacklo_epi64(PZERO, tk)));
90 _mm_storeu_si128((__m128i *)(t+2),PXOR(c, _mm_unpackhi_epi64(tk, PZERO)));
91 #elif defined(CARRY)
92 _mm_storeu_si128((__m128i *)c, t11);
93 _mm_storeu_si128((__m128i *)(t), PXOR(t00, _mm_unpacklo_epi64(PZERO, tk)));
94 _mm_storeu_si128((__m128i *)(t+2),PXOR(t11, _mm_unpackhi_epi64(tk, PZERO)));
95 #else
96 _mm_storeu_si128((__m128i *)(t), PXOR(t00, _mm_unpacklo_epi64(PZERO, tk)));
97 _mm_storeu_si128((__m128i *)(t+2),PXOR(t11, _mm_unpackhi_epi64(tk, PZERO)));
98 #endif
99
100 #undef PZERO
101 #undef PXOR
102 }
103 #endif /* GF2X_MUL2_H_ */
104