1 /*
2 * This software is Copyright (c) 2015 Sayantan Datta <std2048 at gmail dot com>
3 * and it is hereby released to the general public under the following terms:
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted.
6 * Based on Solar Designer implementation of DES_bs_b.c in jtr-v1.7.9
7 */
8 #include "opencl_lm_kernel_params.h"
9
10 #if WORK_GROUP_SIZE
11 #define MAYBE_LOCAL __local
12 #define NEXT_BIT 1
13 #else
14 #define MAYBE_LOCAL __global
15 #define NEXT_BIT gws
16 #endif
17
18
19
20 #define kvtype vtype
21 #define kvand vand
22 #define kvor vor
23 #define kvshl1 vshl1
24 #define kvshl vshl
25 #define kvshr vshr
26
27 #define mask01 0x01010101
28 #define mask02 0x02020202
29 #define mask04 0x04040404
30 #define mask08 0x08080808
31 #define mask10 0x10101010
32 #define mask20 0x20202020
33 #define mask40 0x40404040
34 #define mask80 0x80808080
35
36 #define kvand_shl1_or(dst, src, mask) \
37 kvand(tmp, src, mask); \
38 kvshl1(tmp, tmp); \
39 kvor(dst, dst, tmp)
40
41 #define kvand_shl_or(dst, src, mask, shift) \
42 kvand(tmp, src, mask); \
43 kvshl(tmp, tmp, shift); \
44 kvor(dst, dst, tmp)
45
46 #define kvand_shl1(dst, src, mask) \
47 kvand(tmp, src, mask) ; \
48 kvshl1(dst, tmp)
49
50 #define kvand_or(dst, src, mask) \
51 kvand(tmp, src, mask); \
52 kvor(dst, dst, tmp)
53
54 #define kvand_shr_or(dst, src, mask, shift) \
55 kvand(tmp, src, mask); \
56 kvshr(tmp, tmp, shift); \
57 kvor(dst, dst, tmp)
58
59 #define kvand_shr(dst, src, mask, shift) \
60 kvand(tmp, src, mask); \
61 kvshr(dst, tmp, shift)
62
63 #define LOAD_V \
64 kvtype v0 = *(__global kvtype *)&vp[0]; \
65 kvtype v1 = *(__global kvtype *)&vp[1]; \
66 kvtype v2 = *(__global kvtype *)&vp[2]; \
67 kvtype v3 = *(__global kvtype *)&vp[3]; \
68 kvtype v4 = *(__global kvtype *)&vp[4]; \
69 kvtype v5 = *(__global kvtype *)&vp[5]; \
70 kvtype v6 = *(__global kvtype *)&vp[6]; \
71 kvtype v7 = *(__global kvtype *)&vp[7];
72
73 #define FINALIZE_NEXT_KEY_BIT_0g { \
74 kvtype m = mask01, va, vb, tmp; \
75 kvand(va, v0, m); \
76 kvand_shl1(vb, v1, m); \
77 kvand_shl_or(va, v2, m, 2); \
78 kvand_shl_or(vb, v3, m, 3); \
79 kvand_shl_or(va, v4, m, 4); \
80 kvand_shl_or(vb, v5, m, 5); \
81 kvand_shl_or(va, v6, m, 6); \
82 kvand_shl_or(vb, v7, m, 7); \
83 kvor(kp[0], va, vb); \
84 kp += NEXT_BIT; \
85 }
86
87 #define FINALIZE_NEXT_KEY_BIT_1g { \
88 kvtype m = mask02, va, vb, tmp; \
89 kvand_shr(va, v0, m, 1); \
90 kvand(vb, v1, m); \
91 kvand_shl1_or(va, v2, m); \
92 kvand_shl_or(vb, v3, m, 2); \
93 kvand_shl_or(va, v4, m, 3); \
94 kvand_shl_or(vb, v5, m, 4); \
95 kvand_shl_or(va, v6, m, 5); \
96 kvand_shl_or(vb, v7, m, 6); \
97 kvor(kp[0], va, vb); \
98 kp += NEXT_BIT; \
99 }
100
101 #define FINALIZE_NEXT_KEY_BIT_2g { \
102 kvtype m = mask04, va, vb, tmp; \
103 kvand_shr(va, v0, m, 2); \
104 kvand_shr(vb, v1, m, 1); \
105 kvand_or(va, v2, m); \
106 kvand_shl1_or(vb, v3, m); \
107 kvand_shl_or(va, v4, m, 2); \
108 kvand_shl_or(vb, v5, m, 3); \
109 kvand_shl_or(va, v6, m, 4); \
110 kvand_shl_or(vb, v7, m, 5); \
111 kvor(kp[0], va, vb); \
112 kp += NEXT_BIT; \
113 }
114
115 #define FINALIZE_NEXT_KEY_BIT_3g { \
116 kvtype m = mask08, va, vb, tmp; \
117 kvand_shr(va, v0, m, 3); \
118 kvand_shr(vb, v1, m, 2); \
119 kvand_shr_or(va, v2, m, 1); \
120 kvand_or(vb, v3, m); \
121 kvand_shl1_or(va, v4, m); \
122 kvand_shl_or(vb, v5, m, 2); \
123 kvand_shl_or(va, v6, m, 3); \
124 kvand_shl_or(vb, v7, m, 4); \
125 kvor(kp[0], va, vb); \
126 kp += NEXT_BIT; \
127 }
128
129 #define FINALIZE_NEXT_KEY_BIT_4g { \
130 kvtype m = mask10, va, vb, tmp; \
131 kvand_shr(va, v0, m, 4); \
132 kvand_shr(vb, v1, m, 3); \
133 kvand_shr_or(va, v2, m, 2); \
134 kvand_shr_or(vb, v3, m, 1); \
135 kvand_or(va, v4, m); \
136 kvand_shl1_or(vb, v5, m); \
137 kvand_shl_or(va, v6, m, 2); \
138 kvand_shl_or(vb, v7, m, 3); \
139 kvor(kp[0], va, vb); \
140 kp += NEXT_BIT; \
141 }
142
143 #define FINALIZE_NEXT_KEY_BIT_5g { \
144 kvtype m = mask20, va, vb, tmp; \
145 kvand_shr(va, v0, m, 5); \
146 kvand_shr(vb, v1, m, 4); \
147 kvand_shr_or(va, v2, m, 3); \
148 kvand_shr_or(vb, v3, m, 2); \
149 kvand_shr_or(va, v4, m, 1); \
150 kvand_or(vb, v5, m); \
151 kvand_shl1_or(va, v6, m); \
152 kvand_shl_or(vb, v7, m, 2); \
153 kvor(kp[0], va, vb); \
154 kp += NEXT_BIT; \
155 }
156
157 #define FINALIZE_NEXT_KEY_BIT_6g { \
158 kvtype m = mask40, va, vb, tmp; \
159 kvand_shr(va, v0, m, 6); \
160 kvand_shr(vb, v1, m, 5); \
161 kvand_shr_or(va, v2, m, 4); \
162 kvand_shr_or(vb, v3, m, 3); \
163 kvand_shr_or(va, v4, m, 2); \
164 kvand_shr_or(vb, v5, m, 1); \
165 kvand_or(va, v6, m); \
166 kvand_shl1_or(vb, v7, m); \
167 kvor(kp[0], va, vb); \
168 kp += NEXT_BIT; \
169 }
170
171 #define FINALIZE_NEXT_KEY_BIT_7g { \
172 kvtype m = mask80, va, vb, tmp; \
173 kvand_shr(va, v0, m, 7); \
174 kvand_shr(vb, v1, m, 6); \
175 kvand_shr_or(va, v2, m, 5); \
176 kvand_shr_or(vb, v3, m, 4); \
177 kvand_shr_or(va, v4, m, 3); \
178 kvand_shr_or(vb, v5, m, 2); \
179 kvand_shr_or(va, v6, m, 1); \
180 kvand_or(vb, v7, m); \
181 kvor(kp[0], va, vb); \
182 kp += NEXT_BIT; \
183 }
184
lm_bs_finalize_keys(__global opencl_lm_transfer * lm_raw_keys,int section,MAYBE_LOCAL lm_vector * lm_keys,unsigned int s_key_offset)185 inline void lm_bs_finalize_keys(__global opencl_lm_transfer *lm_raw_keys,
186 int section,
187 MAYBE_LOCAL lm_vector *lm_keys,
188 #if WORK_GROUP_SIZE
189 unsigned int s_key_offset
190 #else
191 unsigned int gws
192 #endif
193 ) {
194
195 #if WORK_GROUP_SIZE
196 #define LM_KEYS_OFFSET s_key_offset
197 #else
198 #define LM_KEYS_OFFSET section
199 #endif
200 MAYBE_LOCAL lm_vector *kp = (MAYBE_LOCAL lm_vector *)&lm_keys[LM_KEYS_OFFSET];
201
202 int ic ;
203 for (ic = 0; ic < 7; ic++) {
204 __global lm_vector *vp =
205 (__global lm_vector *)&lm_raw_keys[section].xkeys.v[ic][0] ;
206 LOAD_V
207 FINALIZE_NEXT_KEY_BIT_0g
208 FINALIZE_NEXT_KEY_BIT_1g
209 FINALIZE_NEXT_KEY_BIT_2g
210 FINALIZE_NEXT_KEY_BIT_3g
211 FINALIZE_NEXT_KEY_BIT_4g
212 FINALIZE_NEXT_KEY_BIT_5g
213 FINALIZE_NEXT_KEY_BIT_6g
214 FINALIZE_NEXT_KEY_BIT_7g
215 }
216 }
217