1 /**
2  * Author......: See docs/credits.txt
3  * License.....: MIT
4  */
5 
6 #ifndef _INC_TYPES_H
7 #define _INC_TYPES_H
8 
9 #if ATTACK_MODE == 9
10 #define SALT_POS       (pws_pos + gid)
11 #define DIGESTS_CNT    1
12 #define DIGESTS_OFFSET (pws_pos + gid)
13 #else
14 #define SALT_POS       salt_pos_host
15 #define DIGESTS_CNT    digests_cnt_host
16 #define DIGESTS_OFFSET digests_offset_host
17 #endif
18 
19 #ifdef IS_CUDA
20 // https://docs.nvidia.com/cuda/nvrtc/index.html#integer-size
21 typedef unsigned char       uchar;
22 typedef unsigned short      ushort;
23 typedef unsigned int        uint;
24 typedef unsigned long       ulong;
25 typedef unsigned long long  ullong;
26 #endif
27 
28 #ifdef IS_OPENCL
29 typedef ulong   ullong;
30 typedef ulong2  ullong2;
31 typedef ulong4  ullong4;
32 typedef ulong8  ullong8;
33 typedef ulong16 ullong16;
34 #endif
35 
36 #ifdef KERNEL_STATIC
37 typedef uchar  u8;
38 typedef ushort u16;
39 typedef uint   u32;
40 typedef ullong u64;
41 #else
42 typedef uint8_t  u8;
43 typedef uint16_t u16;
44 typedef uint32_t u32;
45 typedef uint64_t u64;
46 #endif
47 
48 //testwise disabled
49 //typedef u8  u8a  __attribute__ ((aligned (8)));
50 //typedef u16 u16a __attribute__ ((aligned (8)));
51 //typedef u32 u32a __attribute__ ((aligned (8)));
52 //typedef u64 u64a __attribute__ ((aligned (8)));
53 
54 typedef u8  u8a;
55 typedef u16 u16a;
56 typedef u32 u32a;
57 typedef u64 u64a;
58 
59 #ifndef NEW_SIMD_CODE
60 #undef  VECT_SIZE
61 #define VECT_SIZE 1
62 #endif
63 
64 #define CONCAT(a, b)       a##b
65 #define VTYPE(type, width) CONCAT(type, width)
66 
67 // emulated is always VECT_SIZE = 1
68 #if VECT_SIZE == 1
69 typedef u8   u8x;
70 typedef u16  u16x;
71 typedef u32  u32x;
72 typedef u64  u64x;
73 
74 #define make_u8x  (u8)
75 #define make_u16x (u16)
76 #define make_u32x (u32)
77 #define make_u64x (u64)
78 
79 #else
80 #if defined IS_CUDA || defined IS_HIP
81 
82 #if VECT_SIZE == 2
83 
84 struct __device_builtin__ __builtin_align__(2) u8x
85 {
86   u8 s0;
87   u8 s1;
88 
u8xu8x89   inline __device__  u8x (const u8 a, const u8 b) : s0(a), s1(b) { }
u8xu8x90   inline __device__  u8x (const u8 a)             : s0(a), s1(a) { }
91 
u8xu8x92   inline __device__  u8x (void) : s0(0), s1(0) { }
~u8xu8x93   inline __device__ ~u8x (void) { }
94 };
95 
96 struct __device_builtin__ __builtin_align__(4) u16x
97 {
98   u16 s0;
99   u16 s1;
100 
u16xu16x101   inline __device__  u16x (const u16 a, const u16 b) : s0(a), s1(b) { }
u16xu16x102   inline __device__  u16x (const u16 a)              : s0(a), s1(a) { }
103 
u16xu16x104   inline __device__  u16x (void) : s0(0), s1(0) { }
~u16xu16x105   inline __device__ ~u16x (void) { }
106 };
107 
108 struct __device_builtin__ __builtin_align__(8) u32x
109 {
110   u32 s0;
111   u32 s1;
112 
u32xu32x113   inline __device__  u32x (const u32 a, const u32 b) : s0(a), s1(b) { }
u32xu32x114   inline __device__  u32x (const u32 a)              : s0(a), s1(a) { }
115 
u32xu32x116   inline __device__  u32x (void) : s0(0), s1(0) { }
~u32xu32x117   inline __device__ ~u32x (void) { }
118 };
119 
120 struct __device_builtin__ __builtin_align__(16) u64x
121 {
122   u64 s0;
123   u64 s1;
124 
u64xu64x125   inline __device__  u64x (const u64 a, const u64 b) : s0(a), s1(b) { }
u64xu64x126   inline __device__  u64x (const u64 a)              : s0(a), s1(a) { }
127 
u64xu64x128   inline __device__  u64x (void) : s0(0), s1(0) { }
~u64xu64x129   inline __device__ ~u64x (void) { }
130 };
131 
132 inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b));    }
133 inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
134 
135 inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;     }
136 inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1;  }
137 
138 inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;     }
139 inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1;  }
140 
141 inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;     }
142 inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1;  }
143 
144 inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;     }
145 inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1;  }
146 
147 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;     }
148 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
149 
150 inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;     }
151 inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1;  }
152 
153 inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;     }
154 inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1;  }
155 
156 inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;     }
157 inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1;  }
158 
159 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   );  }
160 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1));  }
161 
162 inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   );  }
163 inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1));  }
164 
165 inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   );  }
166 inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1));  }
167 
168 inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   );  }
169 inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1));  }
170 
171 inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   );  }
172 inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1));  }
173 
174 inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   );  }
175 inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1));  }
176 
177 inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   );  }
178 inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1));  }
179 
180 inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   );  }
181 inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1));  }
182 
183 inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   );  }
184 inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1));  }
185 
186 inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1); }
187 
188 inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b));    }
189 inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
190 
191 inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;     }
192 inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1;  }
193 
194 inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;     }
195 inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1;  }
196 
197 inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;     }
198 inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1;  }
199 
200 inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;     }
201 inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1;  }
202 
203 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;     }
204 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1;  }
205 
206 inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;     }
207 inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1;  }
208 
209 inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;     }
210 inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1;  }
211 
212 inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;     }
213 inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1;  }
214 
215 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   );  }
216 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1));  }
217 
218 inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   );  }
219 inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1));  }
220 
221 inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   );  }
222 inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1));  }
223 
224 inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   );  }
225 inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1));  }
226 
227 inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   );  }
228 inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1));  }
229 
230 inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   );  }
231 inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1));  }
232 
233 inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   );  }
234 inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1));  }
235 
236 inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   );  }
237 inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1));  }
238 
239 inline __device__ u64x operator %  (const u64x a, const u64  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   );  }
240 inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1));  }
241 
242 inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1); }
243 
244 #endif
245 
246 #if VECT_SIZE == 4
247 
248 struct __device_builtin__ __builtin_align__(4) u8x
249 {
250   u8 s0;
251   u8 s1;
252   u8 s2;
253   u8 s3;
254 
u8xu8x255   inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { }
u8xu8x256   inline __device__  u8x (const u8 a)                                     : s0(a), s1(a), s2(a), s3(a) { }
257 
u8xu8x258   inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0) { }
~u8xu8x259   inline __device__ ~u8x (void) { }
260 };
261 
262 struct __device_builtin__ __builtin_align__(8) u16x
263 {
264   u16 s0;
265   u16 s1;
266   u16 s2;
267   u16 s3;
268 
u16xu16x269   inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { }
u16xu16x270   inline __device__  u16x (const u16 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
271 
u16xu16x272   inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0) { }
~u16xu16x273   inline __device__ ~u16x (void) { }
274 };
275 
276 struct __device_builtin__ __builtin_align__(16) u32x
277 {
278   u32 s0;
279   u32 s1;
280   u32 s2;
281   u32 s3;
282 
u32xu32x283   inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { }
u32xu32x284   inline __device__  u32x (const u32 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
285 
u32xu32x286   inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0) { }
~u32xu32x287   inline __device__ ~u32x (void) { }
288 };
289 
290 struct __device_builtin__ __builtin_align__(32) u64x
291 {
292   u64 s0;
293   u64 s1;
294   u64 s2;
295   u64 s3;
296 
u64xu64x297   inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { }
u64xu64x298   inline __device__  u64x (const u64 a)                                        : s0(a), s1(a), s2(a), s3(a) { }
299 
u64xu64x300   inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0) { }
~u64xu64x301   inline __device__ ~u64x (void) { }
302 };
303 
304 inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)   ); }
305 inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
306 
307 inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;     }
308 inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3;  }
309 
310 inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;     }
311 inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3;  }
312 
313 inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;     }
314 inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3;  }
315 
316 inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;     }
317 inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3;  }
318 
319 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
320 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
321 
322 inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;     }
323 inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3;  }
324 
325 inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;     }
326 inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3;  }
327 
328 inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;     }
329 inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3;  }
330 
331 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
332 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
333 
334 inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   );  }
335 inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3));  }
336 
337 inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   );  }
338 inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3));  }
339 
340 inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   );  }
341 inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3));  }
342 
343 inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   );  }
344 inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3));  }
345 
346 inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   );  }
347 inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3));  }
348 
349 inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   );  }
350 inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3));  }
351 
352 inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   );  }
353 inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3));  }
354 
355 inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   );  }
356 inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3));  }
357 
358 inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
359 
360 inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)   ); }
361 inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
362 
363 inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;     }
364 inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3;  }
365 
366 inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;     }
367 inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3;  }
368 
369 inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;     }
370 inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3;  }
371 
372 inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;     }
373 inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3;  }
374 
375 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;     }
376 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3;  }
377 
378 inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;     }
379 inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3;  }
380 
381 inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;     }
382 inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3;  }
383 
384 inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;     }
385 inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3;  }
386 
387 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   );  }
388 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3));  }
389 
390 inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   );  }
391 inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3));  }
392 
393 inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   );  }
394 inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3));  }
395 
396 inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   );  }
397 inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3));  }
398 
399 inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   );  }
400 inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3));  }
401 
402 inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   );  }
403 inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3));  }
404 
405 inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   );  }
406 inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3));  }
407 
408 inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   );  }
409 inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3));  }
410 
411 inline __device__ u64x operator %  (const u64x a, const u32  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   );  }
412 inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3));  }
413 
414 inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
415 
416 #endif
417 
418 #if VECT_SIZE == 8
419 
420 struct __device_builtin__ __builtin_align__(8) u8x
421 {
422   u8 s0;
423   u8 s1;
424   u8 s2;
425   u8 s3;
426   u8 s4;
427   u8 s5;
428   u8 s6;
429   u8 s7;
430 
u8xu8x431   inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
u8xu8x432   inline __device__  u8x (const u8 a)                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
433 
u8xu8x434   inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
~u8xu8x435   inline __device__ ~u8x (void) { }
436 };
437 
438 struct __device_builtin__ __builtin_align__(16) u16x
439 {
440   u16 s0;
441   u16 s1;
442   u16 s2;
443   u16 s3;
444   u16 s4;
445   u16 s5;
446   u16 s6;
447   u16 s7;
448 
u16xu16x449   inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
u16xu16x450   inline __device__  u16x (const u16 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
451 
u16xu16x452   inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
~u16xu16x453   inline __device__ ~u16x (void) { }
454 };
455 
456 struct __device_builtin__ __builtin_align__(32) u32x
457 {
458   u32 s0;
459   u32 s1;
460   u32 s2;
461   u32 s3;
462   u32 s4;
463   u32 s5;
464   u32 s6;
465   u32 s7;
466 
u32xu32x467   inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
u32xu32x468   inline __device__  u32x (const u32 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
469 
u32xu32x470   inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
~u32xu32x471   inline __device__ ~u32x (void) { }
472 };
473 
474 struct __device_builtin__ __builtin_align__(64) u64x
475 {
476   u64 s0;
477   u64 s1;
478   u64 s2;
479   u64 s3;
480   u64 s4;
481   u64 s5;
482   u64 s6;
483   u64 s7;
484 
u64xu64x485   inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
u64xu64x486   inline __device__  u64x (const u64 a)                                                                                            : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
487 
u64xu64x488   inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
~u64xu64x489   inline __device__ ~u64x (void) { }
490 };
491 
492 inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)   ); }
493 inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
494 
495 inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;     }
496 inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7;  }
497 
498 inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;     }
499 inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7;  }
500 
501 inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;     }
502 inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7;  }
503 
504 inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;     }
505 inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7;  }
506 
507 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
508 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
509 
510 inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;     }
511 inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7;  }
512 
513 inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;     }
514 inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7;  }
515 
516 inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;     }
517 inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7;  }
518 
519 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
520 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
521 
522 inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b)   );  }
523 inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7));  }
524 
525 inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b)   );  }
526 inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7));  }
527 
528 inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b)   );  }
529 inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7));  }
530 
531 inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b)   );  }
532 inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7));  }
533 
534 inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b)   );  }
535 inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7));  }
536 
537 inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b)   );  }
538 inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7));  }
539 
540 inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b)   );  }
541 inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7));  }
542 
543 inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b)   );  }
544 inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7));  }
545 
546 inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
547 
548 inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)   ); }
549 inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
550 
551 inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;     }
552 inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7;  }
553 
554 inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;     }
555 inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7;  }
556 
557 inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;     }
558 inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7;  }
559 
560 inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;     }
561 inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7;  }
562 
563 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;     }
564 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7;  }
565 
566 inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;     }
567 inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7;  }
568 
569 inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;     }
570 inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7;  }
571 
572 inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;     }
573 inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7;  }
574 
575 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b)   );  }
576 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7));  }
577 
578 inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b)   );  }
579 inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7));  }
580 
581 inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b)   );  }
582 inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7));  }
583 
584 inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b)   );  }
585 inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7));  }
586 
587 inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b)   );  }
588 inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7));  }
589 
590 inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b)   );  }
591 inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7));  }
592 
593 inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b)   );  }
594 inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7));  }
595 
596 inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b)   );  }
597 inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7));  }
598 
599 inline __device__ u64x operator %  (const u64x a, const u64  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b)   );  }
600 inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7));  }
601 
602 inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
603 
604 #endif
605 
606 #if VECT_SIZE == 16
607 
608 struct __device_builtin__ __builtin_align__(16) u8x
609 {
610   u8 s0;
611   u8 s1;
612   u8 s2;
613   u8 s3;
614   u8 s4;
615   u8 s5;
616   u8 s6;
617   u8 s7;
618   u8 s8;
619   u8 s9;
620   u8 sa;
621   u8 sb;
622   u8 sc;
623   u8 sd;
624   u8 se;
625   u8 sf;
626 
u8xu8x627   inline __device__  u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
u8xu8x628   inline __device__  u8x (const u8 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
629 
u8xu8x630   inline __device__  u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
~u8xu8x631   inline __device__ ~u8x (void) { }
632 };
633 
634 struct __device_builtin__ __builtin_align__(32) u16x
635 {
636   u16 s0;
637   u16 s1;
638   u16 s2;
639   u16 s3;
640   u16 s4;
641   u16 s5;
642   u16 s6;
643   u16 s7;
644   u16 s8;
645   u16 s9;
646   u16 sa;
647   u16 sb;
648   u16 sc;
649   u16 sd;
650   u16 se;
651   u16 sf;
652 
u16xu16x653   inline __device__  u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
u16xu16x654   inline __device__  u16x (const u16 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
655 
u16xu16x656   inline __device__  u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
~u16xu16x657   inline __device__ ~u16x (void) { }
658 };
659 
660 struct __device_builtin__ __builtin_align__(64) u32x
661 {
662   u32 s0;
663   u32 s1;
664   u32 s2;
665   u32 s3;
666   u32 s4;
667   u32 s5;
668   u32 s6;
669   u32 s7;
670   u32 s8;
671   u32 s9;
672   u32 sa;
673   u32 sb;
674   u32 sc;
675   u32 sd;
676   u32 se;
677   u32 sf;
678 
u32xu32x679   inline __device__  u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
u32xu32x680   inline __device__  u32x (const u32 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
681 
u32xu32x682   inline __device__  u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
~u32xu32x683   inline __device__ ~u32x (void) { }
684 };
685 
686 struct __device_builtin__ __builtin_align__(128) u64x
687 {
688   u64 s0;
689   u64 s1;
690   u64 s2;
691   u64 s3;
692   u64 s4;
693   u64 s5;
694   u64 s6;
695   u64 s7;
696   u64 s8;
697   u64 s9;
698   u64 sa;
699   u64 sb;
700   u64 sc;
701   u64 sd;
702   u64 se;
703   u64 sf;
704 
u64xu64x705   inline __device__  u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
u64xu64x706   inline __device__  u64x (const u64 a)                                                                                                                                                                                     : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
707 
u64xu64x708   inline __device__  u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
~u64xu64x709   inline __device__ ~u64x (void) { }
710 };
711 
712 inline __device__ bool operator != (const u32x a, const u32  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)    && (a.s8 != b)    && (a.s9 != b)    && (a.sa != b)    && (a.sb != b)    && (a.sc != b)    && (a.sd != b)    && (a.se != b)    && (a.sf != b)   ); }
713 inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
714 
715 inline __device__ void operator ^= (u32x &a, const u32  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;    a.s8 ^= b;    a.s9 ^= b;    a.sa ^= b;    a.sb ^= b;    a.sc ^= b;    a.sd ^= b;    a.se ^= b;    a.sf ^= b;    }
716 inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
717 
718 inline __device__ void operator |= (u32x &a, const u32  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;    a.s8 |= b;    a.s9 |= b;    a.sa |= b;    a.sb |= b;    a.sc |= b;    a.sd |= b;    a.se |= b;    a.sf |= b;    }
719 inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
720 
721 inline __device__ void operator &= (u32x &a, const u32  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;    a.s8 &= b;    a.s9 &= b;    a.sa &= b;    a.sb &= b;    a.sc &= b;    a.sd &= b;    a.se &= b;    a.sf &= b;    }
722 inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
723 
724 inline __device__ void operator += (u32x &a, const u32  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;    a.s8 += b;    a.s9 += b;    a.sa += b;    a.sb += b;    a.sc += b;    a.sd += b;    a.se += b;    a.sf += b;    }
725 inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
726 
727 inline __device__ void operator -= (u32x &a, const u32  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
728 inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
729 
730 inline __device__ void operator *= (u32x &a, const u32  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;    a.s8 *= b;    a.s9 *= b;    a.sa *= b;    a.sb *= b;    a.sc *= b;    a.sd *= b;    a.se *= b;    a.sf *= b;    }
731 inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
732 
733 inline __device__ void operator >>= (u32x &a, const u32  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;    a.s8 >>= b;    a.s9 >>= b;    a.sa >>= b;    a.sb >>= b;    a.sc >>= b;    a.sd >>= b;    a.se >>= b;    a.sf >>= b;    }
734 inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
735 
736 inline __device__ void operator <<= (u32x &a, const u32  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;    a.s8 <<= b;    a.s9 <<= b;    a.sa <<= b;    a.sb <<= b;    a.sc <<= b;    a.sd <<= b;    a.se <<= b;    a.sf <<= b;    }
737 inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
738 
739 inline __device__ u32x operator << (const u32x a, const u32  b) { return u32x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
740 inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
741 
742 inline __device__ u32x operator >> (const u32x a, const u32  b) { return u32x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b),    (a.s8 >> b),    (a.s9 >> b)   , (a.sa >> b),    (a.sb >> b)   , (a.sc >> b),    (a.sd >> b)   , (a.se >> b),    (a.sf >> b)   );  }
743 inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf));  }
744 
745 inline __device__ u32x operator ^  (const u32x a, const u32  b) { return u32x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b),    (a.s8 ^  b),    (a.s9 ^  b)   , (a.sa ^  b),    (a.sb ^  b)   , (a.sc ^  b),    (a.sd ^  b)   , (a.se ^  b),    (a.sf ^  b)   );  }
746 inline __device__ u32x operator ^  (const u32x a, const u32x b) { return u32x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7), (a.s8 ^  b.s8), (a.s9 ^  b.s9), (a.sa ^  b.sa), (a.sb ^  b.sb), (a.sc ^  b.sc), (a.sd ^  b.sd), (a.se ^  b.se), (a.sf ^  b.sf));  }
747 
748 inline __device__ u32x operator |  (const u32x a, const u32  b) { return u32x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b),    (a.s8 |  b),    (a.s9 |  b)   , (a.sa |  b),    (a.sb |  b)   , (a.sc |  b),    (a.sd |  b)   , (a.se |  b),    (a.sf |  b)   );  }
749 inline __device__ u32x operator |  (const u32x a, const u32x b) { return u32x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7), (a.s8 |  b.s8), (a.s9 |  b.s9), (a.sa |  b.sa), (a.sb |  b.sb), (a.sc |  b.sc), (a.sd |  b.sd), (a.se |  b.se), (a.sf |  b.sf));  }
750 
751 inline __device__ u32x operator &  (const u32x a, const u32  b) { return u32x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b),    (a.s8 &  b),    (a.s9 &  b)   , (a.sa &  b),    (a.sb &  b)   , (a.sc &  b),    (a.sd &  b)   , (a.se &  b),    (a.sf &  b)   );  }
752 inline __device__ u32x operator &  (const u32x a, const u32x b) { return u32x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7), (a.s8 &  b.s8), (a.s9 &  b.s9), (a.sa &  b.sa), (a.sb &  b.sb), (a.sc &  b.sc), (a.sd &  b.sd), (a.se &  b.se), (a.sf &  b.sf));  }
753 
754 inline __device__ u32x operator +  (const u32x a, const u32  b) { return u32x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b),    (a.s8 +  b),    (a.s9 +  b)   , (a.sa +  b),    (a.sb +  b)   , (a.sc +  b),    (a.sd +  b)   , (a.se +  b),    (a.sf +  b)   );  }
755 inline __device__ u32x operator +  (const u32x a, const u32x b) { return u32x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7), (a.s8 +  b.s8), (a.s9 +  b.s9), (a.sa +  b.sa), (a.sb +  b.sb), (a.sc +  b.sc), (a.sd +  b.sd), (a.se +  b.se), (a.sf +  b.sf));  }
756 
757 inline __device__ u32x operator -  (const u32x a, const u32  b) { return u32x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b),    (a.s8 -  b),    (a.s9 -  b)   , (a.sa -  b),    (a.sb -  b)   , (a.sc -  b),    (a.sd -  b)   , (a.se -  b),    (a.sf -  b)   );  }
758 inline __device__ u32x operator -  (const u32x a, const u32x b) { return u32x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7), (a.s8 -  b.s8), (a.s9 -  b.s9), (a.sa -  b.sa), (a.sb -  b.sb), (a.sc -  b.sc), (a.sd -  b.sd), (a.se -  b.se), (a.sf -  b.sf));  }
759 
760 inline __device__ u32x operator *  (const u32x a, const u32  b) { return u32x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b),    (a.s8 *  b),    (a.s9 *  b)   , (a.sa *  b),    (a.sb *  b)   , (a.sc *  b),    (a.sd *  b)   , (a.se *  b),    (a.sf *  b)   );  }
761 inline __device__ u32x operator *  (const u32x a, const u32x b) { return u32x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7), (a.s8 *  b.s8), (a.s9 *  b.s9), (a.sa *  b.sa), (a.sb *  b.sb), (a.sc *  b.sc), (a.sd *  b.sd), (a.se *  b.se), (a.sf *  b.sf));  }
762 
763 inline __device__ u32x operator %  (const u32x a, const u32  b) { return u32x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b),    (a.s8 %  b),    (a.s9 %  b)   , (a.sa %  b),    (a.sb %  b)   , (a.sc %  b),    (a.sd %  b)   , (a.se %  b),    (a.sf %  b)   );  }
764 inline __device__ u32x operator %  (const u32x a, const u32x b) { return u32x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7), (a.s8 %  b.s8), (a.s9 %  b.s9), (a.sa %  b.sa), (a.sb %  b.sb), (a.sc %  b.sc), (a.sd %  b.sd), (a.se %  b.se), (a.sf %  b.sf));  }
765 
766 inline __device__ u32x operator ~  (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
767 
768 inline __device__ bool operator != (const u64x a, const u64  b) { return ((a.s0 != b)    && (a.s1 != b)    && (a.s2 != b)    && (a.s3 != b)    && (a.s4 != b)    && (a.s5 != b)    && (a.s6 != b)    && (a.s7 != b)    && (a.s8 != b)    && (a.s9 != b)    && (a.sa != b)    && (a.sb != b)    && (a.sc != b)    && (a.sd != b)    && (a.se != b)    && (a.sf != b)   ); }
769 inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
770 
771 inline __device__ void operator ^= (u64x &a, const u64  b) { a.s0 ^= b;    a.s1 ^= b;    a.s2 ^= b;    a.s3 ^= b;    a.s4 ^= b;    a.s5 ^= b;    a.s6 ^= b;    a.s7 ^= b;    a.s8 ^= b;    a.s9 ^= b;    a.sa ^= b;    a.sb ^= b;    a.sc ^= b;    a.sd ^= b;    a.se ^= b;    a.sf ^= b;    }
772 inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
773 
774 inline __device__ void operator |= (u64x &a, const u64  b) { a.s0 |= b;    a.s1 |= b;    a.s2 |= b;    a.s3 |= b;    a.s4 |= b;    a.s5 |= b;    a.s6 |= b;    a.s7 |= b;    a.s8 |= b;    a.s9 |= b;    a.sa |= b;    a.sb |= b;    a.sc |= b;    a.sd |= b;    a.se |= b;    a.sf |= b;    }
775 inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
776 
777 inline __device__ void operator &= (u64x &a, const u64  b) { a.s0 &= b;    a.s1 &= b;    a.s2 &= b;    a.s3 &= b;    a.s4 &= b;    a.s5 &= b;    a.s6 &= b;    a.s7 &= b;    a.s8 &= b;    a.s9 &= b;    a.sa &= b;    a.sb &= b;    a.sc &= b;    a.sd &= b;    a.se &= b;    a.sf &= b;    }
778 inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
779 
780 inline __device__ void operator += (u64x &a, const u64  b) { a.s0 += b;    a.s1 += b;    a.s2 += b;    a.s3 += b;    a.s4 += b;    a.s5 += b;    a.s6 += b;    a.s7 += b;    a.s8 += b;    a.s9 += b;    a.sa += b;    a.sb += b;    a.sc += b;    a.sd += b;    a.se += b;    a.sf += b;    }
781 inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
782 
783 inline __device__ void operator -= (u64x &a, const u64  b) { a.s0 -= b;    a.s1 -= b;    a.s2 -= b;    a.s3 -= b;    a.s4 -= b;    a.s5 -= b;    a.s6 -= b;    a.s7 -= b;    a.s8 -= b;    a.s9 -= b;    a.sa -= b;    a.sb -= b;    a.sc -= b;    a.sd -= b;    a.se -= b;    a.sf -= b;    }
784 inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
785 
786 inline __device__ void operator *= (u64x &a, const u64  b) { a.s0 *= b;    a.s1 *= b;    a.s2 *= b;    a.s3 *= b;    a.s4 *= b;    a.s5 *= b;    a.s6 *= b;    a.s7 *= b;    a.s8 *= b;    a.s9 *= b;    a.sa *= b;    a.sb *= b;    a.sc *= b;    a.sd *= b;    a.se *= b;    a.sf *= b;    }
787 inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
788 
789 inline __device__ void operator >>= (u64x &a, const u64  b) { a.s0 >>= b;    a.s1 >>= b;    a.s2 >>= b;    a.s3 >>= b;    a.s4 >>= b;    a.s5 >>= b;    a.s6 >>= b;    a.s7 >>= b;    a.s8 >>= b;    a.s9 >>= b;    a.sa >>= b;    a.sb >>= b;    a.sc >>= b;    a.sd >>= b;    a.se >>= b;    a.sf >>= b;    }
790 inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
791 
792 inline __device__ void operator <<= (u64x &a, const u64  b) { a.s0 <<= b;    a.s1 <<= b;    a.s2 <<= b;    a.s3 <<= b;    a.s4 <<= b;    a.s5 <<= b;    a.s6 <<= b;    a.s7 <<= b;    a.s8 <<= b;    a.s9 <<= b;    a.sa <<= b;    a.sb <<= b;    a.sc <<= b;    a.sd <<= b;    a.se <<= b;    a.sf <<= b;    }
793 inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
794 
795 inline __device__ u64x operator << (const u64x a, const u64  b) { return u64x ((a.s0 << b),    (a.s1 << b)   , (a.s2 << b),    (a.s3 << b)   , (a.s4 << b),    (a.s5 << b)   , (a.s6 << b),    (a.s7 << b),    (a.s8 << b),    (a.s9 << b)   , (a.sa << b),    (a.sb << b)   , (a.sc << b),    (a.sd << b)   , (a.se << b),    (a.sf << b)   );  }
796 inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf));  }
797 
798 inline __device__ u64x operator >> (const u64x a, const u64  b) { return u64x ((a.s0 >> b),    (a.s1 >> b)   , (a.s2 >> b),    (a.s3 >> b)   , (a.s4 >> b),    (a.s5 >> b)   , (a.s6 >> b),    (a.s7 >> b),    (a.s8 >> b),    (a.s9 >> b)   , (a.sa >> b),    (a.sb >> b)   , (a.sc >> b),    (a.sd >> b)   , (a.se >> b),    (a.sf >> b)   );  }
799 inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf));  }
800 
801 inline __device__ u64x operator ^  (const u64x a, const u64  b) { return u64x ((a.s0 ^  b),    (a.s1 ^  b)   , (a.s2 ^  b),    (a.s3 ^  b)   , (a.s4 ^  b),    (a.s5 ^  b)   , (a.s6 ^  b),    (a.s7 ^  b),    (a.s8 ^  b),    (a.s9 ^  b)   , (a.sa ^  b),    (a.sb ^  b)   , (a.sc ^  b),    (a.sd ^  b)   , (a.se ^  b),    (a.sf ^  b)   );  }
802 inline __device__ u64x operator ^  (const u64x a, const u64x b) { return u64x ((a.s0 ^  b.s0), (a.s1 ^  b.s1), (a.s2 ^  b.s2), (a.s3 ^  b.s3), (a.s4 ^  b.s4), (a.s5 ^  b.s5), (a.s6 ^  b.s6), (a.s7 ^  b.s7), (a.s8 ^  b.s8), (a.s9 ^  b.s9), (a.sa ^  b.sa), (a.sb ^  b.sb), (a.sc ^  b.sc), (a.sd ^  b.sd), (a.se ^  b.se), (a.sf ^  b.sf));  }
803 
804 inline __device__ u64x operator |  (const u64x a, const u64  b) { return u64x ((a.s0 |  b),    (a.s1 |  b)   , (a.s2 |  b),    (a.s3 |  b)   , (a.s4 |  b),    (a.s5 |  b)   , (a.s6 |  b),    (a.s7 |  b),    (a.s8 |  b),    (a.s9 |  b)   , (a.sa |  b),    (a.sb |  b)   , (a.sc |  b),    (a.sd |  b)   , (a.se |  b),    (a.sf |  b)   );  }
805 inline __device__ u64x operator |  (const u64x a, const u64x b) { return u64x ((a.s0 |  b.s0), (a.s1 |  b.s1), (a.s2 |  b.s2), (a.s3 |  b.s3), (a.s4 |  b.s4), (a.s5 |  b.s5), (a.s6 |  b.s6), (a.s7 |  b.s7), (a.s8 |  b.s8), (a.s9 |  b.s9), (a.sa |  b.sa), (a.sb |  b.sb), (a.sc |  b.sc), (a.sd |  b.sd), (a.se |  b.se), (a.sf |  b.sf));  }
806 
807 inline __device__ u64x operator &  (const u64x a, const u64  b) { return u64x ((a.s0 &  b),    (a.s1 &  b)   , (a.s2 &  b),    (a.s3 &  b)   , (a.s4 &  b),    (a.s5 &  b)   , (a.s6 &  b),    (a.s7 &  b),    (a.s8 &  b),    (a.s9 &  b)   , (a.sa &  b),    (a.sb &  b)   , (a.sc &  b),    (a.sd &  b)   , (a.se &  b),    (a.sf &  b)   );  }
808 inline __device__ u64x operator &  (const u64x a, const u64x b) { return u64x ((a.s0 &  b.s0), (a.s1 &  b.s1), (a.s2 &  b.s2), (a.s3 &  b.s3), (a.s4 &  b.s4), (a.s5 &  b.s5), (a.s6 &  b.s6), (a.s7 &  b.s7), (a.s8 &  b.s8), (a.s9 &  b.s9), (a.sa &  b.sa), (a.sb &  b.sb), (a.sc &  b.sc), (a.sd &  b.sd), (a.se &  b.se), (a.sf &  b.sf));  }
809 
810 inline __device__ u64x operator +  (const u64x a, const u64  b) { return u64x ((a.s0 +  b),    (a.s1 +  b)   , (a.s2 +  b),    (a.s3 +  b)   , (a.s4 +  b),    (a.s5 +  b)   , (a.s6 +  b),    (a.s7 +  b),    (a.s8 +  b),    (a.s9 +  b)   , (a.sa +  b),    (a.sb +  b)   , (a.sc +  b),    (a.sd +  b)   , (a.se +  b),    (a.sf +  b)   );  }
811 inline __device__ u64x operator +  (const u64x a, const u64x b) { return u64x ((a.s0 +  b.s0), (a.s1 +  b.s1), (a.s2 +  b.s2), (a.s3 +  b.s3), (a.s4 +  b.s4), (a.s5 +  b.s5), (a.s6 +  b.s6), (a.s7 +  b.s7), (a.s8 +  b.s8), (a.s9 +  b.s9), (a.sa +  b.sa), (a.sb +  b.sb), (a.sc +  b.sc), (a.sd +  b.sd), (a.se +  b.se), (a.sf +  b.sf));  }
812 
813 inline __device__ u64x operator -  (const u64x a, const u64  b) { return u64x ((a.s0 -  b),    (a.s1 -  b)   , (a.s2 -  b),    (a.s3 -  b)   , (a.s4 -  b),    (a.s5 -  b)   , (a.s6 -  b),    (a.s7 -  b),    (a.s8 -  b),    (a.s9 -  b)   , (a.sa -  b),    (a.sb -  b)   , (a.sc -  b),    (a.sd -  b)   , (a.se -  b),    (a.sf -  b)   );  }
814 inline __device__ u64x operator -  (const u64x a, const u64x b) { return u64x ((a.s0 -  b.s0), (a.s1 -  b.s1), (a.s2 -  b.s2), (a.s3 -  b.s3), (a.s4 -  b.s4), (a.s5 -  b.s5), (a.s6 -  b.s6), (a.s7 -  b.s7), (a.s8 -  b.s8), (a.s9 -  b.s9), (a.sa -  b.sa), (a.sb -  b.sb), (a.sc -  b.sc), (a.sd -  b.sd), (a.se -  b.se), (a.sf -  b.sf));  }
815 
816 inline __device__ u64x operator *  (const u64x a, const u64  b) { return u64x ((a.s0 *  b),    (a.s1 *  b)   , (a.s2 *  b),    (a.s3 *  b)   , (a.s4 *  b),    (a.s5 *  b)   , (a.s6 *  b),    (a.s7 *  b),    (a.s8 *  b),    (a.s9 *  b)   , (a.sa *  b),    (a.sb *  b)   , (a.sc *  b),    (a.sd *  b)   , (a.se *  b),    (a.sf *  b)   );  }
817 inline __device__ u64x operator *  (const u64x a, const u64x b) { return u64x ((a.s0 *  b.s0), (a.s1 *  b.s1), (a.s2 *  b.s2), (a.s3 *  b.s3), (a.s4 *  b.s4), (a.s5 *  b.s5), (a.s6 *  b.s6), (a.s7 *  b.s7), (a.s8 *  b.s8), (a.s9 *  b.s9), (a.sa *  b.sa), (a.sb *  b.sb), (a.sc *  b.sc), (a.sd *  b.sd), (a.se *  b.se), (a.sf *  b.sf));  }
818 
819 inline __device__ u64x operator %  (const u64x a, const u64  b) { return u64x ((a.s0 %  b),    (a.s1 %  b)   , (a.s2 %  b),    (a.s3 %  b)   , (a.s4 %  b),    (a.s5 %  b)   , (a.s6 %  b),    (a.s7 %  b),    (a.s8 %  b),    (a.s9 %  b)   , (a.sa %  b),    (a.sb %  b)   , (a.sc %  b),    (a.sd %  b)   , (a.se %  b),    (a.sf %  b)   );  }
820 inline __device__ u64x operator %  (const u64x a, const u64x b) { return u64x ((a.s0 %  b.s0), (a.s1 %  b.s1), (a.s2 %  b.s2), (a.s3 %  b.s3), (a.s4 %  b.s4), (a.s5 %  b.s5), (a.s6 %  b.s6), (a.s7 %  b.s7), (a.s8 %  b.s8), (a.s9 %  b.s9), (a.sa %  b.sa), (a.sb %  b.sb), (a.sc %  b.sc), (a.sd %  b.sd), (a.se %  b.se), (a.sf %  b.sf));  }
821 
822 inline __device__ u64x operator ~  (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
823 
824 #endif
825 
826 typedef __device_builtin__ struct u8x  u8x;
827 typedef __device_builtin__ struct u16x u16x;
828 typedef __device_builtin__ struct u32x u32x;
829 typedef __device_builtin__ struct u64x u64x;
830 
831 #define make_u8x  u8x
832 #define make_u16x u16x
833 #define make_u32x u32x
834 #define make_u64x u64x
835 
836 #else
837 typedef VTYPE(uchar,  VECT_SIZE) u8x;
838 typedef VTYPE(ushort, VECT_SIZE) u16x;
839 typedef VTYPE(uint,   VECT_SIZE) u32x;
840 typedef VTYPE(ullong, VECT_SIZE) u64x;
841 
842 #define make_u8x  (u8x)
843 #define make_u16x (u16x)
844 #define make_u32x (u32x)
845 #define make_u64x (u64x)
846 
847 #endif
848 #endif
849 
850 // unions
851 
852 typedef union vconv32
853 {
854   u64 v32;
855 
856   struct
857   {
858     u16 a;
859     u16 b;
860 
861   } v16;
862 
863   struct
864   {
865     u8 a;
866     u8 b;
867     u8 c;
868     u8 d;
869 
870   } v8;
871 
872 } vconv32_t;
873 
874 typedef union vconv64
875 {
876   u64 v64;
877 
878   struct
879   {
880     u32 a;
881     u32 b;
882 
883   } v32;
884 
885   struct
886   {
887     u16 a;
888     u16 b;
889     u16 c;
890     u16 d;
891 
892   } v16;
893 
894   struct
895   {
896     u8 a;
897     u8 b;
898     u8 c;
899     u8 d;
900     u8 e;
901     u8 f;
902     u8 g;
903     u8 h;
904 
905   } v8;
906 
907 } vconv64_t;
908 
909 /**
910  * Author......: See docs/credits.txt
911  * License.....: MIT
912  */
913 
914 typedef enum siphash_constants
915 {
916   SIPHASHM_0=0x736f6d6570736575UL,
917   SIPHASHM_1=0x646f72616e646f6dUL,
918   SIPHASHM_2=0x6c7967656e657261UL,
919   SIPHASHM_3=0x7465646279746573UL
920 
921 } siphash_constants_t;
922 
923 typedef enum bcrypt_constants
924 {
925   BCRYPTM_0=0x4f727068U,
926   BCRYPTM_1=0x65616e42U,
927   BCRYPTM_2=0x65686f6cU,
928   BCRYPTM_3=0x64657253U,
929   BCRYPTM_4=0x63727944U,
930   BCRYPTM_5=0x6f756274U
931 
932 } bcrypt_constants_t;
933 
934 typedef enum md4_constants
935 {
936   MD4M_A=0x67452301U,
937   MD4M_B=0xefcdab89U,
938   MD4M_C=0x98badcfeU,
939   MD4M_D=0x10325476U,
940 
941   MD4S00=3,
942   MD4S01=7,
943   MD4S02=11,
944   MD4S03=19,
945   MD4S10=3,
946   MD4S11=5,
947   MD4S12=9,
948   MD4S13=13,
949   MD4S20=3,
950   MD4S21=9,
951   MD4S22=11,
952   MD4S23=15,
953 
954   MD4C00=0x00000000U,
955   MD4C01=0x5a827999U,
956   MD4C02=0x6ed9eba1U
957 
958 } md4_constants_t;
959 
960 typedef enum md5_constants
961 {
962   MD5M_A=0x67452301U,
963   MD5M_B=0xefcdab89U,
964   MD5M_C=0x98badcfeU,
965   MD5M_D=0x10325476U,
966 
967   MD5S00=7,
968   MD5S01=12,
969   MD5S02=17,
970   MD5S03=22,
971   MD5S10=5,
972   MD5S11=9,
973   MD5S12=14,
974   MD5S13=20,
975   MD5S20=4,
976   MD5S21=11,
977   MD5S22=16,
978   MD5S23=23,
979   MD5S30=6,
980   MD5S31=10,
981   MD5S32=15,
982   MD5S33=21,
983 
984   MD5C00=0xd76aa478U,
985   MD5C01=0xe8c7b756U,
986   MD5C02=0x242070dbU,
987   MD5C03=0xc1bdceeeU,
988   MD5C04=0xf57c0fafU,
989   MD5C05=0x4787c62aU,
990   MD5C06=0xa8304613U,
991   MD5C07=0xfd469501U,
992   MD5C08=0x698098d8U,
993   MD5C09=0x8b44f7afU,
994   MD5C0a=0xffff5bb1U,
995   MD5C0b=0x895cd7beU,
996   MD5C0c=0x6b901122U,
997   MD5C0d=0xfd987193U,
998   MD5C0e=0xa679438eU,
999   MD5C0f=0x49b40821U,
1000   MD5C10=0xf61e2562U,
1001   MD5C11=0xc040b340U,
1002   MD5C12=0x265e5a51U,
1003   MD5C13=0xe9b6c7aaU,
1004   MD5C14=0xd62f105dU,
1005   MD5C15=0x02441453U,
1006   MD5C16=0xd8a1e681U,
1007   MD5C17=0xe7d3fbc8U,
1008   MD5C18=0x21e1cde6U,
1009   MD5C19=0xc33707d6U,
1010   MD5C1a=0xf4d50d87U,
1011   MD5C1b=0x455a14edU,
1012   MD5C1c=0xa9e3e905U,
1013   MD5C1d=0xfcefa3f8U,
1014   MD5C1e=0x676f02d9U,
1015   MD5C1f=0x8d2a4c8aU,
1016   MD5C20=0xfffa3942U,
1017   MD5C21=0x8771f681U,
1018   MD5C22=0x6d9d6122U,
1019   MD5C23=0xfde5380cU,
1020   MD5C24=0xa4beea44U,
1021   MD5C25=0x4bdecfa9U,
1022   MD5C26=0xf6bb4b60U,
1023   MD5C27=0xbebfbc70U,
1024   MD5C28=0x289b7ec6U,
1025   MD5C29=0xeaa127faU,
1026   MD5C2a=0xd4ef3085U,
1027   MD5C2b=0x04881d05U,
1028   MD5C2c=0xd9d4d039U,
1029   MD5C2d=0xe6db99e5U,
1030   MD5C2e=0x1fa27cf8U,
1031   MD5C2f=0xc4ac5665U,
1032   MD5C30=0xf4292244U,
1033   MD5C31=0x432aff97U,
1034   MD5C32=0xab9423a7U,
1035   MD5C33=0xfc93a039U,
1036   MD5C34=0x655b59c3U,
1037   MD5C35=0x8f0ccc92U,
1038   MD5C36=0xffeff47dU,
1039   MD5C37=0x85845dd1U,
1040   MD5C38=0x6fa87e4fU,
1041   MD5C39=0xfe2ce6e0U,
1042   MD5C3a=0xa3014314U,
1043   MD5C3b=0x4e0811a1U,
1044   MD5C3c=0xf7537e82U,
1045   MD5C3d=0xbd3af235U,
1046   MD5C3e=0x2ad7d2bbU,
1047   MD5C3f=0xeb86d391U
1048 
1049 } md5_constants_t;
1050 
1051 typedef enum sha1_constants
1052 {
1053   SHA1M_A=0x67452301U,
1054   SHA1M_B=0xefcdab89U,
1055   SHA1M_C=0x98badcfeU,
1056   SHA1M_D=0x10325476U,
1057   SHA1M_E=0xc3d2e1f0U,
1058 
1059   SHA1C00=0x5a827999U,
1060   SHA1C01=0x6ed9eba1U,
1061   SHA1C02=0x8f1bbcdcU,
1062   SHA1C03=0xca62c1d6U
1063 
1064 } sha1_constants_t;
1065 
1066 typedef enum sha2_32_constants
1067 {
1068   // SHA-224 Initial Hash Values
1069   SHA224M_A=0xc1059ed8U,
1070   SHA224M_B=0x367cd507U,
1071   SHA224M_C=0x3070dd17U,
1072   SHA224M_D=0xf70e5939U,
1073   SHA224M_E=0xffc00b31U,
1074   SHA224M_F=0x68581511U,
1075   SHA224M_G=0x64f98fa7U,
1076   SHA224M_H=0xbefa4fa4U,
1077 
1078   // SHA-224 Constants
1079   SHA224C00=0x428a2f98U,
1080   SHA224C01=0x71374491U,
1081   SHA224C02=0xb5c0fbcfU,
1082   SHA224C03=0xe9b5dba5U,
1083   SHA224C04=0x3956c25bU,
1084   SHA224C05=0x59f111f1U,
1085   SHA224C06=0x923f82a4U,
1086   SHA224C07=0xab1c5ed5U,
1087   SHA224C08=0xd807aa98U,
1088   SHA224C09=0x12835b01U,
1089   SHA224C0a=0x243185beU,
1090   SHA224C0b=0x550c7dc3U,
1091   SHA224C0c=0x72be5d74U,
1092   SHA224C0d=0x80deb1feU,
1093   SHA224C0e=0x9bdc06a7U,
1094   SHA224C0f=0xc19bf174U,
1095   SHA224C10=0xe49b69c1U,
1096   SHA224C11=0xefbe4786U,
1097   SHA224C12=0x0fc19dc6U,
1098   SHA224C13=0x240ca1ccU,
1099   SHA224C14=0x2de92c6fU,
1100   SHA224C15=0x4a7484aaU,
1101   SHA224C16=0x5cb0a9dcU,
1102   SHA224C17=0x76f988daU,
1103   SHA224C18=0x983e5152U,
1104   SHA224C19=0xa831c66dU,
1105   SHA224C1a=0xb00327c8U,
1106   SHA224C1b=0xbf597fc7U,
1107   SHA224C1c=0xc6e00bf3U,
1108   SHA224C1d=0xd5a79147U,
1109   SHA224C1e=0x06ca6351U,
1110   SHA224C1f=0x14292967U,
1111   SHA224C20=0x27b70a85U,
1112   SHA224C21=0x2e1b2138U,
1113   SHA224C22=0x4d2c6dfcU,
1114   SHA224C23=0x53380d13U,
1115   SHA224C24=0x650a7354U,
1116   SHA224C25=0x766a0abbU,
1117   SHA224C26=0x81c2c92eU,
1118   SHA224C27=0x92722c85U,
1119   SHA224C28=0xa2bfe8a1U,
1120   SHA224C29=0xa81a664bU,
1121   SHA224C2a=0xc24b8b70U,
1122   SHA224C2b=0xc76c51a3U,
1123   SHA224C2c=0xd192e819U,
1124   SHA224C2d=0xd6990624U,
1125   SHA224C2e=0xf40e3585U,
1126   SHA224C2f=0x106aa070U,
1127   SHA224C30=0x19a4c116U,
1128   SHA224C31=0x1e376c08U,
1129   SHA224C32=0x2748774cU,
1130   SHA224C33=0x34b0bcb5U,
1131   SHA224C34=0x391c0cb3U,
1132   SHA224C35=0x4ed8aa4aU,
1133   SHA224C36=0x5b9cca4fU,
1134   SHA224C37=0x682e6ff3U,
1135   SHA224C38=0x748f82eeU,
1136   SHA224C39=0x78a5636fU,
1137   SHA224C3a=0x84c87814U,
1138   SHA224C3b=0x8cc70208U,
1139   SHA224C3c=0x90befffaU,
1140   SHA224C3d=0xa4506cebU,
1141   SHA224C3e=0xbef9a3f7U,
1142   SHA224C3f=0xc67178f2U,
1143 
1144   // SHA-256 Initial Hash Values
1145   SHA256M_A=0x6a09e667U,
1146   SHA256M_B=0xbb67ae85U,
1147   SHA256M_C=0x3c6ef372U,
1148   SHA256M_D=0xa54ff53aU,
1149   SHA256M_E=0x510e527fU,
1150   SHA256M_F=0x9b05688cU,
1151   SHA256M_G=0x1f83d9abU,
1152   SHA256M_H=0x5be0cd19U,
1153 
1154   // SHA-256 Constants
1155   SHA256C00=0x428a2f98U,
1156   SHA256C01=0x71374491U,
1157   SHA256C02=0xb5c0fbcfU,
1158   SHA256C03=0xe9b5dba5U,
1159   SHA256C04=0x3956c25bU,
1160   SHA256C05=0x59f111f1U,
1161   SHA256C06=0x923f82a4U,
1162   SHA256C07=0xab1c5ed5U,
1163   SHA256C08=0xd807aa98U,
1164   SHA256C09=0x12835b01U,
1165   SHA256C0a=0x243185beU,
1166   SHA256C0b=0x550c7dc3U,
1167   SHA256C0c=0x72be5d74U,
1168   SHA256C0d=0x80deb1feU,
1169   SHA256C0e=0x9bdc06a7U,
1170   SHA256C0f=0xc19bf174U,
1171   SHA256C10=0xe49b69c1U,
1172   SHA256C11=0xefbe4786U,
1173   SHA256C12=0x0fc19dc6U,
1174   SHA256C13=0x240ca1ccU,
1175   SHA256C14=0x2de92c6fU,
1176   SHA256C15=0x4a7484aaU,
1177   SHA256C16=0x5cb0a9dcU,
1178   SHA256C17=0x76f988daU,
1179   SHA256C18=0x983e5152U,
1180   SHA256C19=0xa831c66dU,
1181   SHA256C1a=0xb00327c8U,
1182   SHA256C1b=0xbf597fc7U,
1183   SHA256C1c=0xc6e00bf3U,
1184   SHA256C1d=0xd5a79147U,
1185   SHA256C1e=0x06ca6351U,
1186   SHA256C1f=0x14292967U,
1187   SHA256C20=0x27b70a85U,
1188   SHA256C21=0x2e1b2138U,
1189   SHA256C22=0x4d2c6dfcU,
1190   SHA256C23=0x53380d13U,
1191   SHA256C24=0x650a7354U,
1192   SHA256C25=0x766a0abbU,
1193   SHA256C26=0x81c2c92eU,
1194   SHA256C27=0x92722c85U,
1195   SHA256C28=0xa2bfe8a1U,
1196   SHA256C29=0xa81a664bU,
1197   SHA256C2a=0xc24b8b70U,
1198   SHA256C2b=0xc76c51a3U,
1199   SHA256C2c=0xd192e819U,
1200   SHA256C2d=0xd6990624U,
1201   SHA256C2e=0xf40e3585U,
1202   SHA256C2f=0x106aa070U,
1203   SHA256C30=0x19a4c116U,
1204   SHA256C31=0x1e376c08U,
1205   SHA256C32=0x2748774cU,
1206   SHA256C33=0x34b0bcb5U,
1207   SHA256C34=0x391c0cb3U,
1208   SHA256C35=0x4ed8aa4aU,
1209   SHA256C36=0x5b9cca4fU,
1210   SHA256C37=0x682e6ff3U,
1211   SHA256C38=0x748f82eeU,
1212   SHA256C39=0x78a5636fU,
1213   SHA256C3a=0x84c87814U,
1214   SHA256C3b=0x8cc70208U,
1215   SHA256C3c=0x90befffaU,
1216   SHA256C3d=0xa4506cebU,
1217   SHA256C3e=0xbef9a3f7U,
1218   SHA256C3f=0xc67178f2U,
1219 
1220 } sha2_32_constants_t;
1221 
1222 typedef enum sha2_64_constants
1223 {
1224   // SHA-384 Initial Hash Values
1225   SHA384M_A=0xcbbb9d5dc1059ed8UL,
1226   SHA384M_B=0x629a292a367cd507UL,
1227   SHA384M_C=0x9159015a3070dd17UL,
1228   SHA384M_D=0x152fecd8f70e5939UL,
1229   SHA384M_E=0x67332667ffc00b31UL,
1230   SHA384M_F=0x8eb44a8768581511UL,
1231   SHA384M_G=0xdb0c2e0d64f98fa7UL,
1232   SHA384M_H=0x47b5481dbefa4fa4UL,
1233 
1234   // SHA-512 Initial Hash Values
1235   SHA512M_A=0x6a09e667f3bcc908UL,
1236   SHA512M_B=0xbb67ae8584caa73bUL,
1237   SHA512M_C=0x3c6ef372fe94f82bUL,
1238   SHA512M_D=0xa54ff53a5f1d36f1UL,
1239   SHA512M_E=0x510e527fade682d1UL,
1240   SHA512M_F=0x9b05688c2b3e6c1fUL,
1241   SHA512M_G=0x1f83d9abfb41bd6bUL,
1242   SHA512M_H=0x5be0cd19137e2179UL,
1243 
1244   // SHA-384/512 Constants
1245   SHA512C00=0x428a2f98d728ae22UL,
1246   SHA512C01=0x7137449123ef65cdUL,
1247   SHA512C02=0xb5c0fbcfec4d3b2fUL,
1248   SHA512C03=0xe9b5dba58189dbbcUL,
1249   SHA512C04=0x3956c25bf348b538UL,
1250   SHA512C05=0x59f111f1b605d019UL,
1251   SHA512C06=0x923f82a4af194f9bUL,
1252   SHA512C07=0xab1c5ed5da6d8118UL,
1253   SHA512C08=0xd807aa98a3030242UL,
1254   SHA512C09=0x12835b0145706fbeUL,
1255   SHA512C0a=0x243185be4ee4b28cUL,
1256   SHA512C0b=0x550c7dc3d5ffb4e2UL,
1257   SHA512C0c=0x72be5d74f27b896fUL,
1258   SHA512C0d=0x80deb1fe3b1696b1UL,
1259   SHA512C0e=0x9bdc06a725c71235UL,
1260   SHA512C0f=0xc19bf174cf692694UL,
1261   SHA512C10=0xe49b69c19ef14ad2UL,
1262   SHA512C11=0xefbe4786384f25e3UL,
1263   SHA512C12=0x0fc19dc68b8cd5b5UL,
1264   SHA512C13=0x240ca1cc77ac9c65UL,
1265   SHA512C14=0x2de92c6f592b0275UL,
1266   SHA512C15=0x4a7484aa6ea6e483UL,
1267   SHA512C16=0x5cb0a9dcbd41fbd4UL,
1268   SHA512C17=0x76f988da831153b5UL,
1269   SHA512C18=0x983e5152ee66dfabUL,
1270   SHA512C19=0xa831c66d2db43210UL,
1271   SHA512C1a=0xb00327c898fb213fUL,
1272   SHA512C1b=0xbf597fc7beef0ee4UL,
1273   SHA512C1c=0xc6e00bf33da88fc2UL,
1274   SHA512C1d=0xd5a79147930aa725UL,
1275   SHA512C1e=0x06ca6351e003826fUL,
1276   SHA512C1f=0x142929670a0e6e70UL,
1277   SHA512C20=0x27b70a8546d22ffcUL,
1278   SHA512C21=0x2e1b21385c26c926UL,
1279   SHA512C22=0x4d2c6dfc5ac42aedUL,
1280   SHA512C23=0x53380d139d95b3dfUL,
1281   SHA512C24=0x650a73548baf63deUL,
1282   SHA512C25=0x766a0abb3c77b2a8UL,
1283   SHA512C26=0x81c2c92e47edaee6UL,
1284   SHA512C27=0x92722c851482353bUL,
1285   SHA512C28=0xa2bfe8a14cf10364UL,
1286   SHA512C29=0xa81a664bbc423001UL,
1287   SHA512C2a=0xc24b8b70d0f89791UL,
1288   SHA512C2b=0xc76c51a30654be30UL,
1289   SHA512C2c=0xd192e819d6ef5218UL,
1290   SHA512C2d=0xd69906245565a910UL,
1291   SHA512C2e=0xf40e35855771202aUL,
1292   SHA512C2f=0x106aa07032bbd1b8UL,
1293   SHA512C30=0x19a4c116b8d2d0c8UL,
1294   SHA512C31=0x1e376c085141ab53UL,
1295   SHA512C32=0x2748774cdf8eeb99UL,
1296   SHA512C33=0x34b0bcb5e19b48a8UL,
1297   SHA512C34=0x391c0cb3c5c95a63UL,
1298   SHA512C35=0x4ed8aa4ae3418acbUL,
1299   SHA512C36=0x5b9cca4f7763e373UL,
1300   SHA512C37=0x682e6ff3d6b2b8a3UL,
1301   SHA512C38=0x748f82ee5defb2fcUL,
1302   SHA512C39=0x78a5636f43172f60UL,
1303   SHA512C3a=0x84c87814a1f0ab72UL,
1304   SHA512C3b=0x8cc702081a6439ecUL,
1305   SHA512C3c=0x90befffa23631e28UL,
1306   SHA512C3d=0xa4506cebde82bde9UL,
1307   SHA512C3e=0xbef9a3f7b2c67915UL,
1308   SHA512C3f=0xc67178f2e372532bUL,
1309   SHA512C40=0xca273eceea26619cUL,
1310   SHA512C41=0xd186b8c721c0c207UL,
1311   SHA512C42=0xeada7dd6cde0eb1eUL,
1312   SHA512C43=0xf57d4f7fee6ed178UL,
1313   SHA512C44=0x06f067aa72176fbaUL,
1314   SHA512C45=0x0a637dc5a2c898a6UL,
1315   SHA512C46=0x113f9804bef90daeUL,
1316   SHA512C47=0x1b710b35131c471bUL,
1317   SHA512C48=0x28db77f523047d84UL,
1318   SHA512C49=0x32caab7b40c72493UL,
1319   SHA512C4a=0x3c9ebe0a15c9bebcUL,
1320   SHA512C4b=0x431d67c49c100d4cUL,
1321   SHA512C4c=0x4cc5d4becb3e42b6UL,
1322   SHA512C4d=0x597f299cfc657e2aUL,
1323   SHA512C4e=0x5fcb6fab3ad6faecUL,
1324   SHA512C4f=0x6c44198c4a475817UL
1325 
1326 } sha2_64_constants_t;
1327 
1328 typedef enum ripemd160_constants
1329 {
1330   RIPEMD160M_A=0x67452301U,
1331   RIPEMD160M_B=0xefcdab89U,
1332   RIPEMD160M_C=0x98badcfeU,
1333   RIPEMD160M_D=0x10325476U,
1334   RIPEMD160M_E=0xc3d2e1f0U,
1335 
1336   RIPEMD160C00=0x00000000U,
1337   RIPEMD160C10=0x5a827999U,
1338   RIPEMD160C20=0x6ed9eba1U,
1339   RIPEMD160C30=0x8f1bbcdcU,
1340   RIPEMD160C40=0xa953fd4eU,
1341   RIPEMD160C50=0x50a28be6U,
1342   RIPEMD160C60=0x5c4dd124U,
1343   RIPEMD160C70=0x6d703ef3U,
1344   RIPEMD160C80=0x7a6d76e9U,
1345   RIPEMD160C90=0x00000000U,
1346 
1347   RIPEMD160S00=11,
1348   RIPEMD160S01=14,
1349   RIPEMD160S02=15,
1350   RIPEMD160S03=12,
1351   RIPEMD160S04=5,
1352   RIPEMD160S05=8,
1353   RIPEMD160S06=7,
1354   RIPEMD160S07=9,
1355   RIPEMD160S08=11,
1356   RIPEMD160S09=13,
1357   RIPEMD160S0A=14,
1358   RIPEMD160S0B=15,
1359   RIPEMD160S0C=6,
1360   RIPEMD160S0D=7,
1361   RIPEMD160S0E=9,
1362   RIPEMD160S0F=8,
1363 
1364   RIPEMD160S10=7,
1365   RIPEMD160S11=6,
1366   RIPEMD160S12=8,
1367   RIPEMD160S13=13,
1368   RIPEMD160S14=11,
1369   RIPEMD160S15=9,
1370   RIPEMD160S16=7,
1371   RIPEMD160S17=15,
1372   RIPEMD160S18=7,
1373   RIPEMD160S19=12,
1374   RIPEMD160S1A=15,
1375   RIPEMD160S1B=9,
1376   RIPEMD160S1C=11,
1377   RIPEMD160S1D=7,
1378   RIPEMD160S1E=13,
1379   RIPEMD160S1F=12,
1380 
1381   RIPEMD160S20=11,
1382   RIPEMD160S21=13,
1383   RIPEMD160S22=6,
1384   RIPEMD160S23=7,
1385   RIPEMD160S24=14,
1386   RIPEMD160S25=9,
1387   RIPEMD160S26=13,
1388   RIPEMD160S27=15,
1389   RIPEMD160S28=14,
1390   RIPEMD160S29=8,
1391   RIPEMD160S2A=13,
1392   RIPEMD160S2B=6,
1393   RIPEMD160S2C=5,
1394   RIPEMD160S2D=12,
1395   RIPEMD160S2E=7,
1396   RIPEMD160S2F=5,
1397 
1398   RIPEMD160S30=11,
1399   RIPEMD160S31=12,
1400   RIPEMD160S32=14,
1401   RIPEMD160S33=15,
1402   RIPEMD160S34=14,
1403   RIPEMD160S35=15,
1404   RIPEMD160S36=9,
1405   RIPEMD160S37=8,
1406   RIPEMD160S38=9,
1407   RIPEMD160S39=14,
1408   RIPEMD160S3A=5,
1409   RIPEMD160S3B=6,
1410   RIPEMD160S3C=8,
1411   RIPEMD160S3D=6,
1412   RIPEMD160S3E=5,
1413   RIPEMD160S3F=12,
1414 
1415   RIPEMD160S40=9,
1416   RIPEMD160S41=15,
1417   RIPEMD160S42=5,
1418   RIPEMD160S43=11,
1419   RIPEMD160S44=6,
1420   RIPEMD160S45=8,
1421   RIPEMD160S46=13,
1422   RIPEMD160S47=12,
1423   RIPEMD160S48=5,
1424   RIPEMD160S49=12,
1425   RIPEMD160S4A=13,
1426   RIPEMD160S4B=14,
1427   RIPEMD160S4C=11,
1428   RIPEMD160S4D=8,
1429   RIPEMD160S4E=5,
1430   RIPEMD160S4F=6,
1431 
1432   RIPEMD160S50=8,
1433   RIPEMD160S51=9,
1434   RIPEMD160S52=9,
1435   RIPEMD160S53=11,
1436   RIPEMD160S54=13,
1437   RIPEMD160S55=15,
1438   RIPEMD160S56=15,
1439   RIPEMD160S57=5,
1440   RIPEMD160S58=7,
1441   RIPEMD160S59=7,
1442   RIPEMD160S5A=8,
1443   RIPEMD160S5B=11,
1444   RIPEMD160S5C=14,
1445   RIPEMD160S5D=14,
1446   RIPEMD160S5E=12,
1447   RIPEMD160S5F=6,
1448 
1449   RIPEMD160S60=9,
1450   RIPEMD160S61=13,
1451   RIPEMD160S62=15,
1452   RIPEMD160S63=7,
1453   RIPEMD160S64=12,
1454   RIPEMD160S65=8,
1455   RIPEMD160S66=9,
1456   RIPEMD160S67=11,
1457   RIPEMD160S68=7,
1458   RIPEMD160S69=7,
1459   RIPEMD160S6A=12,
1460   RIPEMD160S6B=7,
1461   RIPEMD160S6C=6,
1462   RIPEMD160S6D=15,
1463   RIPEMD160S6E=13,
1464   RIPEMD160S6F=11,
1465 
1466   RIPEMD160S70=9,
1467   RIPEMD160S71=7,
1468   RIPEMD160S72=15,
1469   RIPEMD160S73=11,
1470   RIPEMD160S74=8,
1471   RIPEMD160S75=6,
1472   RIPEMD160S76=6,
1473   RIPEMD160S77=14,
1474   RIPEMD160S78=12,
1475   RIPEMD160S79=13,
1476   RIPEMD160S7A=5,
1477   RIPEMD160S7B=14,
1478   RIPEMD160S7C=13,
1479   RIPEMD160S7D=13,
1480   RIPEMD160S7E=7,
1481   RIPEMD160S7F=5,
1482 
1483   RIPEMD160S80=15,
1484   RIPEMD160S81=5,
1485   RIPEMD160S82=8,
1486   RIPEMD160S83=11,
1487   RIPEMD160S84=14,
1488   RIPEMD160S85=14,
1489   RIPEMD160S86=6,
1490   RIPEMD160S87=14,
1491   RIPEMD160S88=6,
1492   RIPEMD160S89=9,
1493   RIPEMD160S8A=12,
1494   RIPEMD160S8B=9,
1495   RIPEMD160S8C=12,
1496   RIPEMD160S8D=5,
1497   RIPEMD160S8E=15,
1498   RIPEMD160S8F=8,
1499 
1500   RIPEMD160S90=8,
1501   RIPEMD160S91=5,
1502   RIPEMD160S92=12,
1503   RIPEMD160S93=9,
1504   RIPEMD160S94=12,
1505   RIPEMD160S95=5,
1506   RIPEMD160S96=14,
1507   RIPEMD160S97=6,
1508   RIPEMD160S98=8,
1509   RIPEMD160S99=13,
1510   RIPEMD160S9A=6,
1511   RIPEMD160S9B=5,
1512   RIPEMD160S9C=15,
1513   RIPEMD160S9D=13,
1514   RIPEMD160S9E=11,
1515   RIPEMD160S9F=11
1516 
1517 } ripemd160_constants_t;
1518 
1519 typedef enum keccak_constants
1520 {
1521   KECCAK_RNDC_00=0x0000000000000001UL,
1522   KECCAK_RNDC_01=0x0000000000008082UL,
1523   KECCAK_RNDC_02=0x800000000000808aUL,
1524   KECCAK_RNDC_03=0x8000000080008000UL,
1525   KECCAK_RNDC_04=0x000000000000808bUL,
1526   KECCAK_RNDC_05=0x0000000080000001UL,
1527   KECCAK_RNDC_06=0x8000000080008081UL,
1528   KECCAK_RNDC_07=0x8000000000008009UL,
1529   KECCAK_RNDC_08=0x000000000000008aUL,
1530   KECCAK_RNDC_09=0x0000000000000088UL,
1531   KECCAK_RNDC_10=0x0000000080008009UL,
1532   KECCAK_RNDC_11=0x000000008000000aUL,
1533   KECCAK_RNDC_12=0x000000008000808bUL,
1534   KECCAK_RNDC_13=0x800000000000008bUL,
1535   KECCAK_RNDC_14=0x8000000000008089UL,
1536   KECCAK_RNDC_15=0x8000000000008003UL,
1537   KECCAK_RNDC_16=0x8000000000008002UL,
1538   KECCAK_RNDC_17=0x8000000000000080UL,
1539   KECCAK_RNDC_18=0x000000000000800aUL,
1540   KECCAK_RNDC_19=0x800000008000000aUL,
1541   KECCAK_RNDC_20=0x8000000080008081UL,
1542   KECCAK_RNDC_21=0x8000000000008080UL,
1543   KECCAK_RNDC_22=0x0000000080000001UL,
1544   KECCAK_RNDC_23=0x8000000080008008UL,
1545 
1546   KECCAK_PILN_00=10,
1547   KECCAK_PILN_01=7,
1548   KECCAK_PILN_02=11,
1549   KECCAK_PILN_03=17,
1550   KECCAK_PILN_04=18,
1551   KECCAK_PILN_05=3,
1552   KECCAK_PILN_06=5,
1553   KECCAK_PILN_07=16,
1554   KECCAK_PILN_08=8,
1555   KECCAK_PILN_09=21,
1556   KECCAK_PILN_10=24,
1557   KECCAK_PILN_11=4,
1558   KECCAK_PILN_12=15,
1559   KECCAK_PILN_13=23,
1560   KECCAK_PILN_14=19,
1561   KECCAK_PILN_15=13,
1562   KECCAK_PILN_16=12,
1563   KECCAK_PILN_17=2,
1564   KECCAK_PILN_18=20,
1565   KECCAK_PILN_19=14,
1566   KECCAK_PILN_20=22,
1567   KECCAK_PILN_21=9,
1568   KECCAK_PILN_22=6,
1569   KECCAK_PILN_23=1,
1570 
1571   KECCAK_ROTC_00=1,
1572   KECCAK_ROTC_01=3,
1573   KECCAK_ROTC_02=6,
1574   KECCAK_ROTC_03=10,
1575   KECCAK_ROTC_04=15,
1576   KECCAK_ROTC_05=21,
1577   KECCAK_ROTC_06=28,
1578   KECCAK_ROTC_07=36,
1579   KECCAK_ROTC_08=45,
1580   KECCAK_ROTC_09=55,
1581   KECCAK_ROTC_10=2,
1582   KECCAK_ROTC_11=14,
1583   KECCAK_ROTC_12=27,
1584   KECCAK_ROTC_13=41,
1585   KECCAK_ROTC_14=56,
1586   KECCAK_ROTC_15=8,
1587   KECCAK_ROTC_16=25,
1588   KECCAK_ROTC_17=43,
1589   KECCAK_ROTC_18=62,
1590   KECCAK_ROTC_19=18,
1591   KECCAK_ROTC_20=39,
1592   KECCAK_ROTC_21=61,
1593   KECCAK_ROTC_22=20,
1594   KECCAK_ROTC_23=44,
1595 
1596 } keccak_constants_t;
1597 
1598 typedef enum mysql323_constants
1599 {
1600   MYSQL323_A=0x50305735U,
1601   MYSQL323_B=0x12345671U
1602 
1603 } mysql323_constants_t;
1604 
1605 typedef enum fortigate_constants
1606 {
1607   FORTIGATE_A=0x2eba88a3U,
1608   FORTIGATE_B=0x4ab04c42U,
1609   FORTIGATE_C=0xc1307953U,
1610   FORTIGATE_D=0x3fcc0731U,
1611   FORTIGATE_E=0x299032a1U,
1612   FORTIGATE_F=0x705b81a9U
1613 
1614 } fortigate_constants_t;
1615 
1616 typedef enum blake2b_constants
1617 {
1618   BLAKE2B_IV_00=0x6a09e667f3bcc908UL,
1619   BLAKE2B_IV_01=0xbb67ae8584caa73bUL,
1620   BLAKE2B_IV_02=0x3c6ef372fe94f82bUL,
1621   BLAKE2B_IV_03=0xa54ff53a5f1d36f1UL,
1622   BLAKE2B_IV_04=0x510e527fade682d1UL,
1623   BLAKE2B_IV_05=0x9b05688c2b3e6c1fUL,
1624   BLAKE2B_IV_06=0x1f83d9abfb41bd6bUL,
1625   BLAKE2B_IV_07=0x5be0cd19137e2179UL
1626 
1627 } blake2b_constants_t;
1628 
1629 typedef enum combinator_mode
1630 {
1631   COMBINATOR_MODE_BASE_LEFT  = 10001,
1632   COMBINATOR_MODE_BASE_RIGHT = 10002
1633 
1634 } combinator_mode_t;
1635 
1636 #ifdef KERNEL_STATIC
1637 typedef struct digest
1638 {
1639   u32 digest_buf[DGST_ELEM];
1640 
1641 } digest_t;
1642 #endif
1643 
1644 typedef struct salt
1645 {
1646   u32 salt_buf[64];
1647   u32 salt_buf_pc[64];
1648 
1649   u32 salt_len;
1650   u32 salt_len_pc;
1651   u32 salt_iter;
1652   u32 salt_iter2;
1653   u32 salt_sign[2];
1654   u32 salt_repeats;
1655 
1656   u32 orig_pos;
1657 
1658   u32 digests_cnt;
1659   u32 digests_done;
1660 
1661   u32 digests_offset;
1662 
1663   u32 scrypt_N;
1664   u32 scrypt_r;
1665   u32 scrypt_p;
1666 
1667 } salt_t;
1668 
1669 typedef struct
1670 {
1671   u32 key;
1672   u64 val;
1673 
1674 } hcstat_table_t;
1675 
1676 typedef struct
1677 {
1678   u32 cs_buf[0x100];
1679   u32 cs_len;
1680 
1681 } cs_t;
1682 
1683 typedef struct
1684 {
1685   u32 cmds[32];
1686 
1687 } kernel_rule_t;
1688 
1689 typedef struct pw
1690 {
1691   u32 i[64];
1692 
1693   u32 pw_len;
1694 
1695 } pw_t;
1696 
1697 typedef struct pw_idx
1698 {
1699   u32 off;
1700   u32 cnt;
1701   u32 len;
1702 
1703 } pw_idx_t;
1704 
1705 typedef struct bf
1706 {
1707   u32  i;
1708 
1709 } bf_t;
1710 
1711 typedef struct bs_word
1712 {
1713   u32  b[32];
1714 
1715 } bs_word_t;
1716 
1717 typedef struct plain
1718 {
1719   u64  gidvid;
1720   u32  il_pos;
1721   u32  salt_pos;
1722   u32  digest_pos;
1723   u32  hash_pos;
1724   u32  extra1;
1725   u32  extra2;
1726 
1727 } plain_t;
1728 
1729 typedef struct keyboard_layout_mapping
1730 {
1731   u32 src_char;
1732   int src_len;
1733   u32 dst_char;
1734   int dst_len;
1735 
1736 } keyboard_layout_mapping_t;
1737 
1738 typedef struct hc_enc
1739 {
1740   int  pos;   // source offset
1741 
1742   u32  cbuf;  // carry buffer
1743   int  clen;  // carry length
1744 
1745 } hc_enc_t;
1746 
1747 #endif
1748