1/*
2 * This software is Copyright (c) 2012-2015 Sayantan Datta <std2048 at gmail dot com>
3 * and it is hereby released to the general public under the following terms:
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted.
6 * Based on Solar Designer implementation of DES_bs_b.c in jtr-v1.7.9
7 */
8
9#include "opencl_DES_kernel_params.h"
10
11#if WORK_GROUP_SIZE > 0
12#define z(p, q) vxorf(B[p], s_des_bs_key[q + s_key_offset])
13#else
14#define z(p, q) vxorf(B[p], des_bs_key[section + q * gws])
15#endif
16
17#define H1_k0()\
18        s1(z(index0, 12), z(index1, 46), z(index2, 33), z(index3, 52), z(index4, 48), z(index5, 20),\
19		B,40, 48, 54, 62);\
20	s2(z(index6, 34), z(index7, 55), z(index8, 5), z(index9, 13), z(index10, 18), z(index11, 40),\
21		B,44, 59, 33, 49);\
22	s3(z(7, 4), z(8, 32), z(9, 26),\
23		z(10, 27), z(11, 38), z(12, 54),\
24		B,55, 47, 61, 37);\
25	s4(z(11, 53), z(12, 6), z(13, 31),\
26		z(14, 25), z(15, 19), z(16, 41),\
27		B,57, 51, 41, 32);\
28	s5(z(index24, 15), z(index25, 24), z(index26, 28), z(index27, 43), z(index28, 30), z(index29, 3),\
29		B,39, 45, 56, 34);\
30	s6(z(index30, 35), z(index31, 22), z(index32, 2), z(index33, 44), z(index34, 14), z(index35, 23),\
31		B,35, 60, 42, 50);\
32	s7(z(23, 51), z(24, 16), z(25, 29),\
33		z(26, 49), z(27, 7), z(28, 17),\
34		B,63, 43, 53, 38);\
35	s8(z(27, 37), z(28, 8), z(29, 9),\
36		z(30, 50), z(31, 42), z(0, 21),\
37		B,36, 58, 46, 52);
38
39#define H2_k0()\
40	s1(z(index48, 5), z(index49, 39), z(index50, 26), z(index51, 45), z(index52, 41), z(index53, 13),\
41		B,8, 16, 22, 30);\
42	s2(z(index54, 27), z(index55, 48), z(index56, 53), z(index57, 6), z(index58, 11), z(index59, 33),\
43		B,12, 27, 1, 17);\
44	s3(z(39, 52), z(40, 25), z(41, 19),\
45		z(42, 20), z(43, 31), z(44, 47),\
46		B,23, 15, 29, 5);\
47	s4(z(43, 46), z(44, 54), z(45, 55),\
48		z(46, 18), z(47, 12), z(48, 34),\
49		B,25, 19, 9, 0);\
50	s5(z(index72, 8), z(index73, 17), z(index74, 21), z(index75, 36), z(index76, 23), z(index77, 49),\
51		B,7, 13, 24, 2);\
52	s6(z(index78, 28), z(index79, 15), z(index80, 24), z(index81, 37), z(index82, 7), z(index83, 16),\
53		B,3, 28, 10, 18);\
54	s7(z(55, 44), z(56, 9), z(57, 22),\
55		z(58, 42), z(59, 0), z(60, 10),\
56		B,31, 11, 21, 6);\
57	s8(z(59, 30), z(60, 1), z(61, 2),\
58		z(62, 43), z(63, 35), z(32, 14),\
59		B,4, 26, 14, 20);
60
61#define H2_k48()\
62	s1(y48(index48, 12), y48(index49, 46), y48(index50, 33), y48(index51, 52), y48(index52, 48), y48(index53, 20),\
63		B,8, 16, 22, 30);\
64	s2(y48(index54, 34), y48(index55, 55), y48(index56, 5), y48(index57, 13), y48(index58, 18), y48(index59, 40),\
65		B,12, 27, 1, 17);\
66	s3(y48(39, 4), y48(40, 32), y48(41, 26),\
67		y48(42, 27), y48(43, 38), y48(44, 54),\
68		B,23, 15, 29, 5);\
69	s4(y48(43, 53), y48(44, 6), y48(45, 31),\
70		y48(46, 25), y48(47, 19), y48(48, 41),\
71		B,25, 19, 9, 0);\
72	s5(y48(index72, 15), y48(index73, 24), y48(index74, 28), y48(index75, 43), y48(index76, 30), y48(index77, 3),\
73		B,7, 13, 24, 2);\
74	s6(y48(index78, 35), y48(index79, 22), y48(index80, 2), y48(index81, 44), y48(index82, 14), y48(index83, 23),\
75		B,3, 28, 10, 18);\
76	s7(y48(55, 51), y48(56, 16), y48(57, 29),\
77		y48(58, 49), y48(59, 7), y48(60, 17),\
78		B,31, 11, 21, 6);\
79	s8(y48(59, 37), y48(60, 8), y48(61, 9),\
80		y48(62, 50), y48(63, 42), y48(32, 21),\
81		B,4, 26, 14, 20);
82
83#define H1_k96()\
84        s1(z(index0, 46), z(index1, 25), z(index2, 12), z(index3, 31), z(index4, 27), z(index5, 54),\
85		B,40, 48, 54, 62);\
86	s2(z(index6, 13), z(index7, 34), z(index8, 39), z(index9, 47), z(index10, 52), z(index11, 19),\
87		B,44, 59, 33, 49);\
88	s3(z(7, 38), z(8, 11), z(9, 5),\
89		z(10, 6), z(11, 48), z(12, 33),\
90		B,55, 47, 61, 37);\
91	s4(z(11, 32), z(12, 40), z(13, 41),\
92		z(14, 4), z(15, 53), z(16, 20),\
93		B,57, 51, 41, 32);\
94	s5(z(index24, 51), z(index25, 3), z(index26, 7), z(index27, 22), z(index28, 9), z(index29, 35),\
95		B,39, 45, 56, 34);\
96	s6(z(index30, 14), z(index31, 1), z(index32, 10), z(index33, 23), z(index34, 50), z(index35, 2),\
97		B,35, 60, 42, 50);\
98	s7(z(23, 30), z(24, 24), z(25, 8),\
99		z(26, 28), z(27, 43), z(28, 49),\
100		B,63, 43, 53, 38);\
101	s8(z(27, 16), z(28, 44), z(29, 17),\
102		z(30, 29), z(31, 21), z(0, 0),\
103		B,36, 58, 46, 52);
104
105#define H2_k96()\
106	s1(z(index48, 32), z(index49, 11), z(index50, 53), z(index51, 48), z(index52, 13), z(index53, 40),\
107		B,8, 16, 22, 30);\
108	s2(z(index54, 54), z(index55, 20), z(index56, 25), z(index57, 33), z(index58, 38), z(index59, 5),\
109		B,12, 27, 1, 17);\
110	s3(z(39, 55), z(40, 52), z(41, 46),\
111		z(42, 47), z(43, 34), z(44, 19),\
112		B,23, 15, 29, 5);\
113	s4(z(43, 18), z(44, 26), z(45, 27),\
114		z(46, 45), z(47, 39), z(48, 6),\
115		B,25, 19, 9, 0);\
116	s5(z(index72, 37), z(index73, 42), z(index74, 50), z(index75, 8), z(index76, 24), z(index77, 21),\
117		B,7, 13, 24, 2);\
118	s6(z(index78, 0), z(index79, 44), z(index80, 49), z(index81, 9), z(index82, 36), z(index83, 17),\
119		B,3, 28, 10, 18);\
120	s7(z(55, 16), z(56, 10), z(57, 51),\
121		z(58, 14), z(59, 29), z(60, 35),\
122		B,31, 11, 21, 6);\
123	s8(z(59, 2), z(60, 30), z(61, 3),\
124		z(62, 15), z(63, 7), z(32, 43),\
125		B,4, 26, 14, 20);
126
127#define H1_k192()\
128        s1(z(index0, 18), z(index1, 52), z(index2, 39), z(index3, 34), z(index4, 54), z(index5, 26),\
129		B,40, 48, 54, 62);\
130	s2(z(index6, 40), z(index7, 6), z(index8, 11), z(index9, 19), z(index10, 55), z(index11, 46),\
131		B,44, 59, 33, 49);\
132	s3(z(7, 41), z(8, 38), z(9, 32),\
133		z(10, 33), z(11, 20), z(12, 5),\
134		B,55, 47, 61, 37);\
135	s4(z(11, 4), z(12, 12), z(13, 13),\
136		z(14, 31), z(15, 25), z(16, 47),\
137		B,57, 51, 41, 32);\
138	s5(z(index24, 23), z(index25, 28), z(index26, 36), z(index27, 51), z(index28, 10), z(index29, 7),\
139		B,39, 45, 56, 34);\
140	s6(z(index30, 43), z(index31, 30), z(index32, 35), z(index33, 24), z(index34, 22), z(index35, 3),\
141		B,35, 60, 42, 50);\
142	s7(z(23, 2), z(24, 49), z(25, 37),\
143		z(26, 0), z(27, 15), z(28, 21),\
144		B,63, 43, 53, 38);\
145	s8(z(27, 17), z(28, 16), z(29, 42),\
146		z(30, 1), z(31, 50), z(0, 29),\
147		B,36, 58, 46, 52);
148
149#define H2_k192()\
150	s1(z(index48, 4), z(index49, 38), z(index50, 25), z(index51, 20), z(index52, 40), z(index53, 12),\
151		B,8, 16, 22, 30);\
152	s2(z(index54, 26), z(index55, 47), z(index56, 52), z(index57, 5), z(index58, 41), z(index59, 32),\
153		B,12, 27, 1, 17);\
154	s3(z(39, 27), z(40, 55), z(41, 18),\
155		z(42, 19), z(43, 6), z(44, 46),\
156		B,23, 15, 29, 5);\
157	s4(z(43, 45), z(44, 53), z(45, 54),\
158		z(46, 48), z(47, 11), z(48, 33),\
159		B,25, 19, 9, 0);\
160	s5(z(index72, 9), z(index73, 14), z(index74, 22), z(index75, 37), z(index76, 49), z(index77, 50),\
161		B,7, 13, 24, 2);\
162	s6(z(index78, 29), z(index79, 16), z(index80, 21), z(index81, 10), z(index82, 8), z(index83, 42),\
163		B,3, 28, 10, 18);\
164	s7(z(55, 17), z(56, 35), z(57, 23),\
165		z(58, 43), z(59, 1), z(60, 7),\
166		B,31, 11, 21, 6);\
167	s8(z(59, 3), z(60, 2), z(61, 28),\
168		z(62, 44), z(63, 36), z(32, 15),\
169		B,4, 26, 14, 20);
170
171#define H1_k288()\
172        s1(z(index0, 45), z(index1, 55), z(index2, 11), z(index3, 6), z(index4, 26), z(index5, 53),\
173		B,40, 48, 54, 62);\
174	s2(z(index6, 12), z(index7, 33), z(index8, 38), z(index9, 46), z(index10, 27), z(index11, 18),\
175		B,44, 59, 33, 49);\
176	s3(z(7, 13), z(8, 41), z(9, 4),\
177		z(10, 5), z(11, 47), z(12, 32),\
178		B,55, 47, 61, 37);\
179	s4(z(11, 31), z(12, 39), z(13, 40),\
180		z(14, 34), z(15, 52), z(16, 19),\
181		B,57, 51, 41, 32);\
182	s5(z(index24, 24), z(index25, 0), z(index26, 8), z(index27, 23), z(index28, 35), z(index29, 36),\
183		B,39, 45, 56, 34);\
184	s6(z(index30, 15), z(index31, 2), z(index32, 7), z(index33, 49), z(index34, 51), z(index35, 28),\
185		B,35, 60, 42, 50);\
186	s7(z(23, 3), z(24, 21), z(25, 9),\
187		z(26, 29), z(27, 44), z(28, 50),\
188		B,63, 43, 53, 38);\
189	s8(z(27, 42), z(28, 17), z(29, 14),\
190		z(30, 30), z(31, 22), z(0, 1),\
191		B,36, 58, 46, 52);
192
193#define H2_k288()\
194	s1(z(index48, 31), z(index49, 41), z(index50, 52), z(index51, 47), z(index52, 12), z(index53, 39),\
195		B,8, 16, 22, 30);\
196	s2(z(index54, 53), z(index55, 19), z(index56, 55), z(index57, 32), z(index58, 13), z(index59, 4),\
197		B,12, 27, 1, 17);\
198	s3(z(39, 54), z(40, 27), z(41, 45),\
199		z(42, 46), z(43, 33), z(44, 18),\
200		B,23, 15, 29, 5);\
201	s4(z(43, 48), z(44, 25), z(45, 26),\
202		z(46, 20), z(47, 38), z(48, 5),\
203		B,25, 19, 9, 0);\
204	s5(z(index72, 10), z(index73, 43), z(index74, 51), z(index75, 9), z(index76, 21), z(index77, 22),\
205		B,7, 13, 24, 2);\
206	s6(z(index78, 1), z(index79, 17), z(index80, 50), z(index81, 35), z(index82, 37), z(index83, 14),\
207		B,3, 28, 10, 18);\
208	s7(z(55, 42), z(56, 7), z(57, 24),\
209		z(58, 15), z(59, 30), z(60, 36),\
210		B,31, 11, 21, 6);\
211	s8(z(59, 28), z(60, 3), z(61, 0),\
212		z(62, 16), z(63, 8), z(32, 44),\
213		B,4, 26, 14, 20);
214
215#define H1_k384()\
216        s1(z(index0, 55), z(index1, 34), z(index2, 45), z(index3, 40), z(index4, 5), z(index5, 32),\
217		B,40, 48, 54, 62);\
218	s2(z(index6, 46), z(index7, 12), z(index8, 48), z(index9, 25), z(index10, 6), z(index11, 52),\
219		B,44, 59, 33, 49);\
220	s3(z(7, 47), z(8, 20), z(9, 38),\
221		z(10, 39), z(11, 26), z(12, 11),\
222		B,55, 47, 61, 37);\
223	s4(z(11, 41), z(12, 18), z(13, 19),\
224		z(14, 13), z(15, 31), z(16, 53),\
225		B,57, 51, 41, 32);\
226	s5(z(index24, 3), z(index25, 36), z(index26, 44), z(index27, 2), z(index28, 14), z(index29, 15),\
227		B,39, 45, 56, 34);\
228	s6(z(index30, 51), z(index31, 10), z(index32, 43), z(index33, 28), z(index34, 30), z(index35, 7),\
229		B,35, 60, 42, 50);\
230	s7(z(23, 35), z(24, 0), z(25, 17),\
231		z(26, 8), z(27, 23), z(28, 29),\
232		B,63, 43, 53, 38);\
233	s8(z(27, 21), z(28, 49), z(29, 50),\
234		z(30, 9), z(31, 1), z(0, 37),\
235		B,36, 58, 46, 52);
236
237#define H2_k384()\
238	s1(z(index48, 41), z(index49, 20), z(index50, 31), z(index51, 26), z(index52, 46), z(index53, 18),\
239		B,8, 16, 22, 30);\
240	s2(z(index54, 32), z(index55, 53), z(index56, 34), z(index57, 11), z(index58, 47), z(index59, 38),\
241		B,12, 27, 1, 17);\
242	s3(z(39, 33), z(40, 6), z(41, 55),\
243		z(42, 25), z(43, 12), z(44, 52),\
244		B,23, 15, 29, 5);\
245	s4(z(43, 27), z(44, 4), z(45, 5),\
246		z(46, 54), z(47, 48), z(48, 39),\
247		B,25, 19, 9, 0);\
248	s5(z(index72, 42), z(index73, 22), z(index74, 30), z(index75, 17), z(index76, 0), z(index77, 1),\
249		B,7, 13, 24, 2);\
250	s6(z(index78, 37), z(index79, 49), z(index80, 29), z(index81, 14), z(index82, 16), z(index83, 50),\
251		B,3, 28, 10, 18);\
252	s7(z(55, 21), z(56, 43), z(57, 3),\
253		z(58, 51), z(59, 9), z(60, 15),\
254		B,31, 11, 21, 6);\
255	s8(z(59, 7), z(60, 35), z(61, 36),\
256		z(62, 24), z(63, 44), z(32, 23),\
257		B,4, 26, 14, 20);
258
259#define H1_k480()\
260        s1(z(index0, 27), z(index1, 6), z(index2, 48), z(index3, 12), z(index4, 32), z(index5, 4),\
261		B,40, 48, 54, 62);\
262	s2(z(index6, 18), z(index7, 39), z(index8, 20), z(index9, 52), z(index10, 33), z(index11, 55),\
263		B,44, 59, 33, 49);\
264	s3(z(7, 19), z(8, 47), z(9, 41),\
265		z(10, 11), z(11, 53), z(12, 38),\
266		B,55, 47, 61, 37);\
267	s4(z(11, 13), z(12, 45), z(13, 46),\
268		z(14, 40), z(15, 34), z(16, 25),\
269		B,57, 51, 41, 32);\
270	s5(z(index24, 28), z(index25, 8), z(index26, 16), z(index27, 3), z(index28, 43), z(index29, 44),\
271		B,39, 45, 56, 34);\
272	s6(z(index30, 23), z(index31, 35), z(index32, 15), z(index33, 0), z(index34, 2), z(index35, 36),\
273		B,35, 60, 42, 50);\
274	s7(z(23, 7), z(24, 29), z(25, 42),\
275		z(26, 37), z(27, 24), z(28, 1),\
276		B,63, 43, 53, 38);\
277	s8(z(27, 50), z(28, 21), z(29, 22),\
278		z(30, 10), z(31, 30), z(0, 9),\
279		B,36, 58, 46, 52);
280
281#define H2_k480()\
282	s1(z(index48, 13), z(index49, 47), z(index50, 34), z(index51, 53), z(index52, 18), z(index53, 45),\
283		B,8, 16, 22, 30);\
284	s2(z(index54, 4), z(index55, 25), z(index56, 6), z(index57, 38), z(index58, 19), z(index59, 41),\
285		B,12, 27, 1, 17);\
286	s3(z(39, 5), z(40, 33), z(41, 27),\
287		z(42, 52), z(43, 39), z(44, 55),\
288		B,23, 15, 29, 5);\
289	s4(z(43, 54), z(44, 31), z(45, 32),\
290		z(46, 26), z(47, 20), z(48, 11),\
291		B,25, 19, 9, 0);\
292	s5(z(index72, 14), z(index73, 51), z(index74, 2), z(index75, 42), z(index76, 29), z(index77, 30),\
293		B,7, 13, 24, 2);\
294	s6(z(index78, 9), z(index79, 21), z(index80, 1), z(index81, 43), z(index82, 17), z(index83, 22),\
295		B,3, 28, 10, 18);\
296	s7(z(55, 50), z(56, 15), z(57, 28),\
297		z(58, 23), z(59, 10), z(60, 44),\
298		B,31, 11, 21, 6);\
299	s8(z(59, 36), z(60, 7), z(61, 8),\
300		z(62, 49), z(63, 16), z(32, 24),\
301		B,4, 26, 14, 20);
302
303#define H1_k576()\
304        s1(z(index0, 54), z(index1, 33), z(index2, 20), z(index3, 39), z(index4, 4), z(index5, 31),\
305		B,40, 48, 54, 62);\
306	s2(z(index6, 45), z(index7, 11), z(index8, 47), z(index9, 55), z(index10, 5), z(index11, 27),\
307		B,44, 59, 33, 49);\
308	s3(z(7, 46), z(8, 19), z(9, 13),\
309		z(10, 38), z(11, 25), z(12, 41),\
310		B,55, 47, 61, 37);\
311	s4(z(11, 40), z(12, 48), z(13, 18),\
312		z(14, 12), z(15, 6), z(16, 52),\
313		B,57, 51, 41, 32);\
314	s5(z(index24, 0), z(index25, 37), z(index26, 17), z(index27, 28), z(index28, 15), z(index29, 16),\
315		B,39, 45, 56, 34);\
316	s6(z(index30, 24), z(index31, 7), z(index32, 44), z(index33, 29), z(index34, 3), z(index35, 8),\
317		B,35, 60, 42, 50);\
318	s7(z(23, 36), z(24, 1), z(25, 14),\
319		z(26, 9), z(27, 49), z(28, 30),\
320		B,63, 43, 53, 38);\
321	s8(z(27, 22), z(28, 50), z(29, 51),\
322		z(30, 35), z(31, 2), z(0, 10),\
323		B,36, 58, 46, 52);
324
325#define H2_k576()\
326	s1(z(index48, 40), z(index49, 19), z(index50, 6), z(index51, 25), z(index52, 45), z(index53, 48),\
327		B,8, 16, 22, 30);\
328	s2(z(index54, 31), z(index55, 52), z(index56, 33), z(index57, 41), z(index58, 46), z(index59, 13),\
329		B,12, 27, 1, 17);\
330	s3(z(39, 32), z(40, 5), z(41, 54),\
331		z(42, 55), z(43, 11), z(44, 27),\
332		B,23, 15, 29, 5);\
333	s4(z(43, 26), z(44, 34), z(45, 4),\
334		z(46, 53), z(47, 47), z(48, 38),\
335		B,25, 19, 9, 0);\
336	s5(z(index72, 43), z(index73, 23), z(index74, 3), z(index75, 14), z(index76, 1), z(index77, 2),\
337		B,7, 13, 24, 2);\
338	s6(z(index78, 10), z(index79, 50), z(index80, 30), z(index81, 15), z(index82, 42), z(index83, 51),\
339		B,3, 28, 10, 18);\
340	s7(z(55, 22), z(56, 44), z(57, 0),\
341		z(58, 24), z(59, 35), z(60, 16),\
342		B,31, 11, 21, 6);\
343	s8(z(59, 8), z(60, 36), z(61, 37),\
344		z(62, 21), z(63, 17), z(32, 49),\
345		B,4, 26, 14, 20);
346
347#define H1_k672()\
348        s1(z(index0, 26), z(index1, 5), z(index2, 47), z(index3, 11), z(index4, 31), z(index5, 34),\
349		B,40, 48, 54, 62);\
350	s2(z(index6, 48), z(index7, 38), z(index8, 19), z(index9, 27), z(index10, 32), z(index11, 54),\
351		B,44, 59, 33, 49);\
352	s3(z(7, 18), z(8, 46), z(9, 40),\
353		z(10, 41), z(11, 52), z(12, 13),\
354		B,55, 47, 61, 37);\
355	s4(z(11, 12), z(12, 20), z(13, 45),\
356		z(14, 39), z(15, 33), z(16, 55),\
357		B,57, 51, 41, 32);\
358	s5(z(index24, 29), z(index25, 9), z(index26, 42), z(index27, 0), z(index28, 44), z(index29, 17),\
359		B,39, 45, 56, 34);\
360	s6(z(index30, 49), z(index31, 36), z(index32, 16), z(index33, 1), z(index34, 28), z(index35, 37),\
361		B,35, 60, 42, 50);\
362	s7(z(23, 8), z(24, 30), z(25, 43),\
363		z(26, 10), z(27, 21), z(28, 2),\
364		B,63, 43, 53, 38);\
365	s8(z(27, 51), z(28, 22), z(29, 23),\
366		z(30, 7), z(31, 3), z(0, 35),\
367		B,36, 58, 46, 52);
368
369#define H2_k672()\
370	s1(z(index48, 19), z(index49, 53), z(index50, 40), z(index51, 4), z(index52, 55), z(index53, 27),\
371		B,8, 16, 22, 30);\
372	s2(z(index54, 41), z(index55, 31), z(index56, 12), z(index57, 20), z(index58, 25), z(index59, 47),\
373		B,12, 27, 1, 17);\
374	s3(z(39, 11), z(40, 39), z(41, 33),\
375		z(42, 34), z(43, 45), z(44, 6),\
376		B,23, 15, 29, 5);\
377	s4(z(43, 5), z(44, 13), z(45, 38),\
378		z(46, 32), z(47, 26), z(48, 48),\
379		B,25, 19, 9, 0);\
380	s5(z(index72, 22), z(index73, 2), z(index74, 35), z(index75, 50), z(index76, 37), z(index77, 10),\
381		B,7, 13, 24, 2);\
382	s6(z(index78, 42), z(index79, 29), z(index80, 9), z(index81, 51), z(index82, 21), z(index83, 30),\
383		B,3, 28, 10, 18);\
384	s7(z(55, 1), z(56, 23), z(57, 36),\
385		z(58, 3), z(59, 14), z(60, 24),\
386		B,31, 11, 21, 6);\
387	s8(z(59, 44), z(60, 15), z(61, 16),\
388		z(62, 0), z(63, 49), z(32, 28),\
389		B,4, 26, 14, 20);
390
391#define SWAP(a, b) 	\
392	tmp = B[a];	\
393	B[a] = B[b];	\
394	B[b] = tmp;
395
396#define BIG_SWAP() { 	\
397	SWAP(0, 32);	\
398	SWAP(1, 33);	\
399	SWAP(2, 34);	\
400	SWAP(3, 35);	\
401	SWAP(4, 36);	\
402	SWAP(5, 37);	\
403	SWAP(6, 38);	\
404	SWAP(7, 39);	\
405	SWAP(8, 40);	\
406	SWAP(9, 41);	\
407	SWAP(10, 42);	\
408	SWAP(11, 43);	\
409	SWAP(12, 44);	\
410	SWAP(13, 45);	\
411	SWAP(14, 46);	\
412	SWAP(15, 47);	\
413	SWAP(16, 48);	\
414	SWAP(17, 49);	\
415	SWAP(18, 50);	\
416	SWAP(19, 51);	\
417	SWAP(20, 52);	\
418	SWAP(21, 53);	\
419	SWAP(22, 54);	\
420	SWAP(23, 55);	\
421	SWAP(24, 56);	\
422	SWAP(25, 57);	\
423	SWAP(26, 58);	\
424	SWAP(27, 59);	\
425	SWAP(28, 60);	\
426	SWAP(29, 61);	\
427	SWAP(30, 62);	\
428	SWAP(31, 63);  	\
429}
430#define H()		\
431	H1_k0();	\
432	H2_k0();	\
433	H1_k96();	\
434	H2_k96();	\
435	H1_k192();	\
436	H2_k192();	\
437	H1_k288();	\
438	H2_k288();	\
439	H1_k384();	\
440	H2_k384();	\
441	H1_k480();	\
442	H2_k480();	\
443	H1_k576();	\
444	H2_k576();	\
445	H1_k672();	\
446	H2_k672();
447
448__kernel void DES_bs_25(__global DES_bs_vector *des_bs_key,
449			__global vtype *unchecked_hashes) {
450
451		int section = get_global_id(0);
452		int i;
453		int gws = get_global_size(0);
454		vtype B[64], tmp;
455
456#if WORK_GROUP_SIZE > 0
457		__local DES_bs_vector s_des_bs_key[56 * WORK_GROUP_SIZE];
458		int lid = get_local_id(0);
459		int s_key_offset = 56 * lid;
460
461		for (i = 0; i < 56; i++)
462			s_des_bs_key[lid * 56 + i] = des_bs_key[section + i * gws];
463		barrier(CLK_LOCAL_MEM_FENCE);
464#endif
465		int iterations;
466
467		{
468			vtype zero = 0;
469			DES_bs_clear_block
470		}
471#pragma unroll 1
472		for (iterations = 24; iterations >= 0; --iterations) {
473			H();
474			BIG_SWAP();
475		}
476
477		BIG_SWAP();
478
479		for (i = 0; i < 64; i++)
480			unchecked_hashes[i * gws + section] = B[i];
481
482}
483