1 /*
2  * Copyright (c) 2015-2020, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /** \file
30  * \brief Uniformly-named primitives named by target type.
31  *
32  * The following are a set of primitives named by target type, so that we can
33  * macro the hell out of all our NFA implementations. Hurrah!
34  */
35 
36 #ifndef UNIFORM_OPS_H
37 #define UNIFORM_OPS_H
38 
39 #include "ue2common.h"
40 #include "simd_utils.h"
41 #include "unaligned.h"
42 
43 // Aligned loads
44 #define load_u8(a)          (*(const u8 *)(a))
45 #define load_u16(a)         (*(const u16 *)(a))
46 #define load_u32(a)         (*(const u32 *)(a))
47 #define load_u64a(a)        (*(const u64a *)(a))
48 #define load_m128(a)        load128(a)
49 #define load_m256(a)        load256(a)
50 #define load_m384(a)        load384(a)
51 #define load_m512(a)        load512(a)
52 
53 // Unaligned loads
54 #define loadu_u8(a)          (*(const u8 *)(a))
55 #define loadu_u16(a)         unaligned_load_u16((const u8 *)(a))
56 #define loadu_u32(a)         unaligned_load_u32((const u8 *)(a))
57 #define loadu_u64a(a)        unaligned_load_u64a((const u8 *)(a))
58 #define loadu_m128(a)        loadu128(a)
59 #define loadu_m256(a)        loadu256(a)
60 #define loadu_m384(a)        loadu384(a)
61 #define loadu_m512(a)        loadu512(a)
62 
63 // Aligned stores
64 #define store_u8(ptr, a)    do { *(u8 *)(ptr) = (a); } while(0)
65 #define store_u16(ptr, a)   do { *(u16 *)(ptr) = (a); } while(0)
66 #define store_u32(ptr, a)   do { *(u32 *)(ptr) = (a); } while(0)
67 #define store_u64a(ptr, a)  do { *(u64a *)(ptr) = (a); } while(0)
68 #define store_m128(ptr, a)  store128(ptr, a)
69 #define store_m256(ptr, a)  store256(ptr, a)
70 #define store_m384(ptr, a)  store384(ptr, a)
71 #define store_m512(ptr, a)  store512(ptr, a)
72 
73 // Unaligned stores
74 #define storeu_u8(ptr, a)    do { *(u8 *)(ptr) = (a); } while(0)
75 #define storeu_u16(ptr, a)   unaligned_store_u16(ptr, a)
76 #define storeu_u32(ptr, a)   unaligned_store_u32(ptr, a)
77 #define storeu_u64a(ptr, a)  unaligned_store_u64a(ptr, a)
78 #define storeu_m128(ptr, a)  storeu128(ptr, a)
79 
80 #define zero_u8             0
81 #define zero_u32            0
82 #define zero_u64a           0
83 #define zero_m128           zeroes128()
84 #define zero_m256           zeroes256()
85 #define zero_m384           zeroes384()
86 #define zero_m512           zeroes512()
87 
88 #define ones_u8             0xff
89 #define ones_u32            0xfffffffful
90 #define ones_u64a           0xffffffffffffffffull
91 #define ones_m128           ones128()
92 #define ones_m256           ones256()
93 #define ones_m384           ones384()
94 #define ones_m512           ones512()
95 
96 #define or_u8(a, b)         ((a) | (b))
97 #define or_u32(a, b)        ((a) | (b))
98 #define or_u64a(a, b)       ((a) | (b))
99 #define or_m128(a, b)       (or128(a, b))
100 #define or_m256(a, b)       (or256(a, b))
101 #define or_m384(a, b)       (or384(a, b))
102 #define or_m512(a, b)       (or512(a, b))
103 
104 #if defined(HAVE_AVX512VBMI)
105 #define expand_m128(a)      (expand128(a))
106 #define expand_m256(a)      (expand256(a))
107 #define expand_m384(a)      (expand384(a))
108 #define expand_m512(a)      (a)
109 
110 #define shuffle_byte_m128(a, b)       (pshufb_m512(b, a))
111 #define shuffle_byte_m256(a, b)       (vpermb512(a, b))
112 #define shuffle_byte_m384(a, b)       (vpermb512(a, b))
113 #define shuffle_byte_m512(a, b)       (vpermb512(a, b))
114 #endif
115 
116 #define and_u8(a, b)        ((a) & (b))
117 #define and_u32(a, b)       ((a) & (b))
118 #define and_u64a(a, b)      ((a) & (b))
119 #define and_m128(a, b)      (and128(a, b))
120 #define and_m256(a, b)      (and256(a, b))
121 #define and_m384(a, b)      (and384(a, b))
122 #define and_m512(a, b)      (and512(a, b))
123 
124 #define not_u8(a)           (~(a))
125 #define not_u32(a)          (~(a))
126 #define not_u64a(a)         (~(a))
127 #define not_m128(a)         (not128(a))
128 #define not_m256(a)         (not256(a))
129 #define not_m384(a)         (not384(a))
130 #define not_m512(a)         (not512(a))
131 
132 #define andnot_u8(a, b)     ((~(a)) & (b))
133 #define andnot_u32(a, b)    ((~(a)) & (b))
134 #define andnot_u64a(a, b)   ((~(a)) & (b))
135 #define andnot_m128(a, b)   (andnot128(a, b))
136 #define andnot_m256(a, b)   (andnot256(a, b))
137 #define andnot_m384(a, b)   (andnot384(a, b))
138 #define andnot_m512(a, b)   (andnot512(a, b))
139 
140 #define lshift_u32(a, b)    ((a) << (b))
141 #define lshift_u64a(a, b)   ((a) << (b))
142 #define lshift_m128(a, b)   (lshift64_m128(a, b))
143 #define lshift_m256(a, b)   (lshift64_m256(a, b))
144 #define lshift_m384(a, b)   (lshift64_m384(a, b))
145 #define lshift_m512(a, b)   (lshift64_m512(a, b))
146 
147 #define isZero_u8(a)        ((a) == 0)
148 #define isZero_u32(a)       ((a) == 0)
149 #define isZero_u64a(a)      ((a) == 0)
150 #define isZero_m128(a)      (!isnonzero128(a))
151 #define isZero_m256(a)      (!isnonzero256(a))
152 #define isZero_m384(a)      (!isnonzero384(a))
153 #define isZero_m512(a)      (!isnonzero512(a))
154 
155 #define isNonZero_u8(a)     ((a) != 0)
156 #define isNonZero_u32(a)    ((a) != 0)
157 #define isNonZero_u64a(a)   ((a) != 0)
158 #define isNonZero_m128(a)   (isnonzero128(a))
159 #define isNonZero_m256(a)   (isnonzero256(a))
160 #define isNonZero_m384(a)   (isnonzero384(a))
161 #define isNonZero_m512(a)   (isnonzero512(a))
162 
163 #define diffrich_u32(a, b)  ((a) != (b))
164 #define diffrich_u64a(a, b) ((a) != (b) ? 3 : 0) //TODO: impl 32bit granularity
165 #define diffrich_m128(a, b) (diffrich128(a, b))
166 #define diffrich_m256(a, b) (diffrich256(a, b))
167 #define diffrich_m384(a, b) (diffrich384(a, b))
168 #define diffrich_m512(a, b) (diffrich512(a, b))
169 
170 #define diffrich64_u32(a, b)  ((a) != (b))
171 #define diffrich64_u64a(a, b) ((a) != (b) ? 1 : 0)
172 #define diffrich64_m128(a, b) (diffrich64_128(a, b))
173 #define diffrich64_m256(a, b) (diffrich64_256(a, b))
174 #define diffrich64_m384(a, b) (diffrich64_384(a, b))
175 #define diffrich64_m512(a, b) (diffrich64_512(a, b))
176 
177 #define noteq_u8(a, b)      ((a) != (b))
178 #define noteq_u32(a, b)     ((a) != (b))
179 #define noteq_u64a(a, b)    ((a) != (b))
180 #define noteq_m128(a, b)    (diff128(a, b))
181 #define noteq_m256(a, b)    (diff256(a, b))
182 #define noteq_m384(a, b)    (diff384(a, b))
183 #define noteq_m512(a, b)    (diff512(a, b))
184 
185 #define partial_store_m128(ptr, v, sz) storebytes128(ptr, v, sz)
186 #define partial_store_m256(ptr, v, sz) storebytes256(ptr, v, sz)
187 #define partial_store_m384(ptr, v, sz) storebytes384(ptr, v, sz)
188 #define partial_store_m512(ptr, v, sz) storebytes512(ptr, v, sz)
189 
190 #define partial_load_m128(ptr, sz) loadbytes128(ptr, sz)
191 #define partial_load_m256(ptr, sz) loadbytes256(ptr, sz)
192 #define partial_load_m384(ptr, sz) loadbytes384(ptr, sz)
193 #define partial_load_m512(ptr, sz) loadbytes512(ptr, sz)
194 
195 #define store_compressed_u32(ptr, x, m, len)  storecompressed32(ptr, x, m, len)
196 #define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len)
197 #define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len)
198 #define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len)
199 #define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len)
200 #define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len)
201 
202 #define load_compressed_u32(x, ptr, m, len)   loadcompressed32(x, ptr, m, len)
203 #define load_compressed_u64a(x, ptr, m, len)  loadcompressed64(x, ptr, m, len)
204 #define load_compressed_m128(x, ptr, m, len)  loadcompressed128(x, ptr, m, len)
205 #define load_compressed_m256(x, ptr, m, len)  loadcompressed256(x, ptr, m, len)
206 #define load_compressed_m384(x, ptr, m, len)  loadcompressed384(x, ptr, m, len)
207 #define load_compressed_m512(x, ptr, m, len)  loadcompressed512(x, ptr, m, len)
208 
209 static really_inline
clearbit_u32(u32 * p,u32 n)210 void clearbit_u32(u32 *p, u32 n) {
211     assert(n < sizeof(*p) * 8);
212     *p &= ~(1U << n);
213 }
214 
215 static really_inline
clearbit_u64a(u64a * p,u32 n)216 void clearbit_u64a(u64a *p, u32 n) {
217     assert(n < sizeof(*p) * 8);
218     *p &= ~(1ULL << n);
219 }
220 
221 #define clearbit_m128(ptr, n)   (clearbit128(ptr, n))
222 #define clearbit_m256(ptr, n)   (clearbit256(ptr, n))
223 #define clearbit_m384(ptr, n)   (clearbit384(ptr, n))
224 #define clearbit_m512(ptr, n)   (clearbit512(ptr, n))
225 
226 static really_inline
testbit_u32(u32 val,u32 n)227 char testbit_u32(u32 val, u32 n) {
228     assert(n < sizeof(val) * 8);
229     return !!(val & (1U << n));
230 }
231 
232 static really_inline
testbit_u64a(u64a val,u32 n)233 char testbit_u64a(u64a val, u32 n) {
234     assert(n < sizeof(val) * 8);
235     return !!(val & (1ULL << n));
236 }
237 
238 #define testbit_m128(val, n)    (testbit128(val, n))
239 #define testbit_m256(val, n)    (testbit256(val, n))
240 #define testbit_m384(val, n)    (testbit384(val, n))
241 #define testbit_m512(val, n)    (testbit512(val, n))
242 
243 #endif
244