1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6#define NEW_SIMD_CODE
7
8#ifdef KERNEL_STATIC
9#include "inc_vendor.h"
10#include "inc_types.h"
11#include "inc_platform.cl"
12#include "inc_common.cl"
13#include "inc_rp_optimized.h"
14#include "inc_rp_optimized.cl"
15#include "inc_simd.cl"
16#endif
17
18CONSTANT_VK u32a c_tables[4][256] =
19{
20  {
21    0x00072000, 0x00075000, 0x00074800, 0x00071000,
22    0x00076800, 0x00074000, 0x00070000, 0x00077000,
23    0x00073000, 0x00075800, 0x00070800, 0x00076000,
24    0x00073800, 0x00077800, 0x00072800, 0x00071800,
25    0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
26    0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
27    0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
28    0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
29    0x00022000, 0x00025000, 0x00024800, 0x00021000,
30    0x00026800, 0x00024000, 0x00020000, 0x00027000,
31    0x00023000, 0x00025800, 0x00020800, 0x00026000,
32    0x00023800, 0x00027800, 0x00022800, 0x00021800,
33    0x00062000, 0x00065000, 0x00064800, 0x00061000,
34    0x00066800, 0x00064000, 0x00060000, 0x00067000,
35    0x00063000, 0x00065800, 0x00060800, 0x00066000,
36    0x00063800, 0x00067800, 0x00062800, 0x00061800,
37    0x00032000, 0x00035000, 0x00034800, 0x00031000,
38    0x00036800, 0x00034000, 0x00030000, 0x00037000,
39    0x00033000, 0x00035800, 0x00030800, 0x00036000,
40    0x00033800, 0x00037800, 0x00032800, 0x00031800,
41    0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
42    0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
43    0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
44    0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
45    0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
46    0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
47    0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
48    0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
49    0x00052000, 0x00055000, 0x00054800, 0x00051000,
50    0x00056800, 0x00054000, 0x00050000, 0x00057000,
51    0x00053000, 0x00055800, 0x00050800, 0x00056000,
52    0x00053800, 0x00057800, 0x00052800, 0x00051800,
53    0x00012000, 0x00015000, 0x00014800, 0x00011000,
54    0x00016800, 0x00014000, 0x00010000, 0x00017000,
55    0x00013000, 0x00015800, 0x00010800, 0x00016000,
56    0x00013800, 0x00017800, 0x00012800, 0x00011800,
57    0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
58    0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
59    0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
60    0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
61    0x00042000, 0x00045000, 0x00044800, 0x00041000,
62    0x00046800, 0x00044000, 0x00040000, 0x00047000,
63    0x00043000, 0x00045800, 0x00040800, 0x00046000,
64    0x00043800, 0x00047800, 0x00042800, 0x00041800,
65    0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
66    0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
67    0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
68    0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
69    0x00002000, 0x00005000, 0x00004800, 0x00001000,
70    0x00006800, 0x00004000, 0x00000000, 0x00007000,
71    0x00003000, 0x00005800, 0x00000800, 0x00006000,
72    0x00003800, 0x00007800, 0x00002800, 0x00001800,
73    0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
74    0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
75    0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
76    0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
77    0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
78    0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
79    0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
80    0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
81    0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
82    0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
83    0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
84    0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
85  },
86  {
87    0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
88    0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
89    0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
90    0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
91    0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
92    0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
93    0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
94    0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
95    0x05280000, 0x05400000, 0x05080000, 0x05680000,
96    0x05500000, 0x05180000, 0x05200000, 0x05100000,
97    0x05700000, 0x05780000, 0x05600000, 0x05380000,
98    0x05300000, 0x05000000, 0x05480000, 0x05580000,
99    0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
100    0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
101    0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
102    0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
103    0x00280000, 0x00400000, 0x00080000, 0x00680000,
104    0x00500000, 0x00180000, 0x00200000, 0x00100000,
105    0x00700000, 0x00780000, 0x00600000, 0x00380000,
106    0x00300000, 0x00000000, 0x00480000, 0x00580000,
107    0x04280000, 0x04400000, 0x04080000, 0x04680000,
108    0x04500000, 0x04180000, 0x04200000, 0x04100000,
109    0x04700000, 0x04780000, 0x04600000, 0x04380000,
110    0x04300000, 0x04000000, 0x04480000, 0x04580000,
111    0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
112    0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
113    0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
114    0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
115    0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
116    0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
117    0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
118    0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
119    0x07280000, 0x07400000, 0x07080000, 0x07680000,
120    0x07500000, 0x07180000, 0x07200000, 0x07100000,
121    0x07700000, 0x07780000, 0x07600000, 0x07380000,
122    0x07300000, 0x07000000, 0x07480000, 0x07580000,
123    0x02280000, 0x02400000, 0x02080000, 0x02680000,
124    0x02500000, 0x02180000, 0x02200000, 0x02100000,
125    0x02700000, 0x02780000, 0x02600000, 0x02380000,
126    0x02300000, 0x02000000, 0x02480000, 0x02580000,
127    0x03280000, 0x03400000, 0x03080000, 0x03680000,
128    0x03500000, 0x03180000, 0x03200000, 0x03100000,
129    0x03700000, 0x03780000, 0x03600000, 0x03380000,
130    0x03300000, 0x03000000, 0x03480000, 0x03580000,
131    0x06280000, 0x06400000, 0x06080000, 0x06680000,
132    0x06500000, 0x06180000, 0x06200000, 0x06100000,
133    0x06700000, 0x06780000, 0x06600000, 0x06380000,
134    0x06300000, 0x06000000, 0x06480000, 0x06580000,
135    0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
136    0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
137    0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
138    0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
139    0x01280000, 0x01400000, 0x01080000, 0x01680000,
140    0x01500000, 0x01180000, 0x01200000, 0x01100000,
141    0x01700000, 0x01780000, 0x01600000, 0x01380000,
142    0x01300000, 0x01000000, 0x01480000, 0x01580000,
143    0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
144    0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
145    0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
146    0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
147    0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
148    0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
149    0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
150    0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
151  },
152  {
153    0x30000002, 0x60000002, 0x38000002, 0x08000002,
154    0x28000002, 0x78000002, 0x68000002, 0x40000002,
155    0x20000002, 0x50000002, 0x48000002, 0x70000002,
156    0x00000002, 0x18000002, 0x58000002, 0x10000002,
157    0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
158    0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
159    0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
160    0x80000005, 0x98000005, 0xd8000005, 0x90000005,
161    0x30000005, 0x60000005, 0x38000005, 0x08000005,
162    0x28000005, 0x78000005, 0x68000005, 0x40000005,
163    0x20000005, 0x50000005, 0x48000005, 0x70000005,
164    0x00000005, 0x18000005, 0x58000005, 0x10000005,
165    0x30000000, 0x60000000, 0x38000000, 0x08000000,
166    0x28000000, 0x78000000, 0x68000000, 0x40000000,
167    0x20000000, 0x50000000, 0x48000000, 0x70000000,
168    0x00000000, 0x18000000, 0x58000000, 0x10000000,
169    0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
170    0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
171    0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
172    0x80000003, 0x98000003, 0xd8000003, 0x90000003,
173    0x30000001, 0x60000001, 0x38000001, 0x08000001,
174    0x28000001, 0x78000001, 0x68000001, 0x40000001,
175    0x20000001, 0x50000001, 0x48000001, 0x70000001,
176    0x00000001, 0x18000001, 0x58000001, 0x10000001,
177    0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
178    0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
179    0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
180    0x80000000, 0x98000000, 0xd8000000, 0x90000000,
181    0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
182    0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
183    0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
184    0x80000006, 0x98000006, 0xd8000006, 0x90000006,
185    0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
186    0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
187    0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
188    0x80000001, 0x98000001, 0xd8000001, 0x90000001,
189    0x30000003, 0x60000003, 0x38000003, 0x08000003,
190    0x28000003, 0x78000003, 0x68000003, 0x40000003,
191    0x20000003, 0x50000003, 0x48000003, 0x70000003,
192    0x00000003, 0x18000003, 0x58000003, 0x10000003,
193    0x30000004, 0x60000004, 0x38000004, 0x08000004,
194    0x28000004, 0x78000004, 0x68000004, 0x40000004,
195    0x20000004, 0x50000004, 0x48000004, 0x70000004,
196    0x00000004, 0x18000004, 0x58000004, 0x10000004,
197    0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
198    0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
199    0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
200    0x80000002, 0x98000002, 0xd8000002, 0x90000002,
201    0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
202    0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
203    0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
204    0x80000004, 0x98000004, 0xd8000004, 0x90000004,
205    0x30000006, 0x60000006, 0x38000006, 0x08000006,
206    0x28000006, 0x78000006, 0x68000006, 0x40000006,
207    0x20000006, 0x50000006, 0x48000006, 0x70000006,
208    0x00000006, 0x18000006, 0x58000006, 0x10000006,
209    0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
210    0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
211    0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
212    0x80000007, 0x98000007, 0xd8000007, 0x90000007,
213    0x30000007, 0x60000007, 0x38000007, 0x08000007,
214    0x28000007, 0x78000007, 0x68000007, 0x40000007,
215    0x20000007, 0x50000007, 0x48000007, 0x70000007,
216    0x00000007, 0x18000007, 0x58000007, 0x10000007,
217  },
218  {
219    0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
220    0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
221    0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
222    0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
223    0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
224    0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
225    0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
226    0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
227    0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
228    0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
229    0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
230    0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
231    0x00000068, 0x00000058, 0x00000020, 0x00000008,
232    0x00000018, 0x00000078, 0x00000028, 0x00000048,
233    0x00000000, 0x00000050, 0x00000070, 0x00000038,
234    0x00000030, 0x00000040, 0x00000010, 0x00000060,
235    0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
236    0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
237    0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
238    0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
239    0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
240    0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
241    0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
242    0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
243    0x00000568, 0x00000558, 0x00000520, 0x00000508,
244    0x00000518, 0x00000578, 0x00000528, 0x00000548,
245    0x00000500, 0x00000550, 0x00000570, 0x00000538,
246    0x00000530, 0x00000540, 0x00000510, 0x00000560,
247    0x00000268, 0x00000258, 0x00000220, 0x00000208,
248    0x00000218, 0x00000278, 0x00000228, 0x00000248,
249    0x00000200, 0x00000250, 0x00000270, 0x00000238,
250    0x00000230, 0x00000240, 0x00000210, 0x00000260,
251    0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
252    0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
253    0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
254    0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
255    0x00000168, 0x00000158, 0x00000120, 0x00000108,
256    0x00000118, 0x00000178, 0x00000128, 0x00000148,
257    0x00000100, 0x00000150, 0x00000170, 0x00000138,
258    0x00000130, 0x00000140, 0x00000110, 0x00000160,
259    0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
260    0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
261    0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
262    0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
263    0x00000768, 0x00000758, 0x00000720, 0x00000708,
264    0x00000718, 0x00000778, 0x00000728, 0x00000748,
265    0x00000700, 0x00000750, 0x00000770, 0x00000738,
266    0x00000730, 0x00000740, 0x00000710, 0x00000760,
267    0x00000368, 0x00000358, 0x00000320, 0x00000308,
268    0x00000318, 0x00000378, 0x00000328, 0x00000348,
269    0x00000300, 0x00000350, 0x00000370, 0x00000338,
270    0x00000330, 0x00000340, 0x00000310, 0x00000360,
271    0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
272    0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
273    0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
274    0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
275    0x00000468, 0x00000458, 0x00000420, 0x00000408,
276    0x00000418, 0x00000478, 0x00000428, 0x00000448,
277    0x00000400, 0x00000450, 0x00000470, 0x00000438,
278    0x00000430, 0x00000440, 0x00000410, 0x00000460,
279    0x00000668, 0x00000658, 0x00000620, 0x00000608,
280    0x00000618, 0x00000678, 0x00000628, 0x00000648,
281    0x00000600, 0x00000650, 0x00000670, 0x00000638,
282    0x00000630, 0x00000640, 0x00000610, 0x00000660,
283  }
284};
285
286#if   VECT_SIZE == 1
287#define BOX(i,n,S) (S)[(n)][(i)]
288#elif VECT_SIZE == 2
289#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
290#elif VECT_SIZE == 4
291#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
292#elif VECT_SIZE == 8
293#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
294#elif VECT_SIZE == 16
295#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
296#endif
297
298#define _round(k1,k2,tbl)                 \
299{                                         \
300  u32x t;                                 \
301  t = (k1) + r;                           \
302  l ^= BOX (((t >>  0) & 0xff), 0, tbl) ^ \
303       BOX (((t >>  8) & 0xff), 1, tbl) ^ \
304       BOX (((t >> 16) & 0xff), 2, tbl) ^ \
305       BOX (((t >> 24) & 0xff), 3, tbl);  \
306  t = (k2) + l;                           \
307  r ^= BOX (((t >>  0) & 0xff), 0, tbl) ^ \
308       BOX (((t >>  8) & 0xff), 1, tbl) ^ \
309       BOX (((t >> 16) & 0xff), 2, tbl) ^ \
310       BOX (((t >> 24) & 0xff), 3, tbl);  \
311}
312
313#define R(k,h,s,i,t)      \
314{                         \
315  u32x r;                 \
316  u32x l;                 \
317  r = h[i + 0];           \
318  l = h[i + 1];           \
319  _round (k[0], k[1], t);  \
320  _round (k[2], k[3], t);  \
321  _round (k[4], k[5], t);  \
322  _round (k[6], k[7], t);  \
323  _round (k[0], k[1], t);  \
324  _round (k[2], k[3], t);  \
325  _round (k[4], k[5], t);  \
326  _round (k[6], k[7], t);  \
327  _round (k[0], k[1], t);  \
328  _round (k[2], k[3], t);  \
329  _round (k[4], k[5], t);  \
330  _round (k[6], k[7], t);  \
331  _round (k[7], k[6], t);  \
332  _round (k[5], k[4], t);  \
333  _round (k[3], k[2], t);  \
334  _round (k[1], k[0], t);  \
335  s[i + 0] = l;           \
336  s[i + 1] = r;           \
337}
338
339#define X(w,u,v)      \
340  w[0] = u[0] ^ v[0]; \
341  w[1] = u[1] ^ v[1]; \
342  w[2] = u[2] ^ v[2]; \
343  w[3] = u[3] ^ v[3]; \
344  w[4] = u[4] ^ v[4]; \
345  w[5] = u[5] ^ v[5]; \
346  w[6] = u[6] ^ v[6]; \
347  w[7] = u[7] ^ v[7];
348
349#define P(k,w)                        \
350  k[0] = ((w[0] & 0x000000ff) <<  0)  \
351       | ((w[2] & 0x000000ff) <<  8)  \
352       | ((w[4] & 0x000000ff) << 16)  \
353       | ((w[6] & 0x000000ff) << 24); \
354  k[1] = ((w[0] & 0x0000ff00) >>  8)  \
355       | ((w[2] & 0x0000ff00) >>  0)  \
356       | ((w[4] & 0x0000ff00) <<  8)  \
357       | ((w[6] & 0x0000ff00) << 16); \
358  k[2] = ((w[0] & 0x00ff0000) >> 16)  \
359       | ((w[2] & 0x00ff0000) >>  8)  \
360       | ((w[4] & 0x00ff0000) <<  0)  \
361       | ((w[6] & 0x00ff0000) <<  8); \
362  k[3] = ((w[0] & 0xff000000) >> 24)  \
363       | ((w[2] & 0xff000000) >> 16)  \
364       | ((w[4] & 0xff000000) >>  8)  \
365       | ((w[6] & 0xff000000) >>  0); \
366  k[4] = ((w[1] & 0x000000ff) <<  0)  \
367       | ((w[3] & 0x000000ff) <<  8)  \
368       | ((w[5] & 0x000000ff) << 16)  \
369       | ((w[7] & 0x000000ff) << 24); \
370  k[5] = ((w[1] & 0x0000ff00) >>  8)  \
371       | ((w[3] & 0x0000ff00) >>  0)  \
372       | ((w[5] & 0x0000ff00) <<  8)  \
373       | ((w[7] & 0x0000ff00) << 16); \
374  k[6] = ((w[1] & 0x00ff0000) >> 16)  \
375       | ((w[3] & 0x00ff0000) >>  8)  \
376       | ((w[5] & 0x00ff0000) <<  0)  \
377       | ((w[7] & 0x00ff0000) <<  8); \
378  k[7] = ((w[1] & 0xff000000) >> 24)  \
379       | ((w[3] & 0xff000000) >> 16)  \
380       | ((w[5] & 0xff000000) >>  8)  \
381       | ((w[7] & 0xff000000) >>  0);
382
383#define A(x)        \
384{                   \
385  u32x l;           \
386  u32x r;           \
387  l = x[0] ^ x[2];  \
388  r = x[1] ^ x[3];  \
389  x[0] = x[2];      \
390  x[1] = x[3];      \
391  x[2] = x[4];      \
392  x[3] = x[5];      \
393  x[4] = x[6];      \
394  x[5] = x[7];      \
395  x[6] = l;         \
396  x[7] = r;         \
397}
398
399#define AA(x)       \
400{                   \
401  u32x l;           \
402  u32x r;           \
403  l    = x[0];      \
404  r    = x[2];      \
405  x[0] = x[4];      \
406  x[2] = x[6];      \
407  x[4] = l ^ r;     \
408  x[6] = x[0] ^ r;  \
409  l    = x[1];      \
410  r    = x[3];      \
411  x[1] = x[5];      \
412  x[3] = x[7];      \
413  x[5] = l ^ r;     \
414  x[7] = x[1] ^ r;  \
415}
416
417#define C(x)          \
418  x[0] ^= 0xff00ff00; \
419  x[1] ^= 0xff00ff00; \
420  x[2] ^= 0x00ff00ff; \
421  x[3] ^= 0x00ff00ff; \
422  x[4] ^= 0x00ffff00; \
423  x[5] ^= 0xff0000ff; \
424  x[6] ^= 0x000000ff; \
425  x[7] ^= 0xff00ffff;
426
427#define SHIFT12(u,m,s)              \
428  u[0] = m[0] ^ s[6];               \
429  u[1] = m[1] ^ s[7];               \
430  u[2] = m[2] ^ (s[0] << 16)        \
431              ^ (s[0] >> 16)        \
432              ^ (s[0] & 0x0000ffff) \
433              ^ (s[1] & 0x0000ffff) \
434              ^ (s[1] >> 16)        \
435              ^ (s[2] << 16)        \
436              ^ s[6]                \
437              ^ (s[6] << 16)        \
438              ^ (s[7] & 0xffff0000) \
439              ^ (s[7] >> 16);       \
440  u[3] = m[3] ^ (s[0] & 0x0000ffff) \
441              ^ (s[0] << 16)        \
442              ^ (s[1] & 0x0000ffff) \
443              ^ (s[1] << 16)        \
444              ^ (s[1] >> 16)        \
445              ^ (s[2] << 16)        \
446              ^ (s[2] >> 16)        \
447              ^ (s[3] << 16)        \
448              ^ s[6]                \
449              ^ (s[6] << 16)        \
450              ^ (s[6] >> 16)        \
451              ^ (s[7] & 0x0000ffff) \
452              ^ (s[7] << 16)        \
453              ^ (s[7] >> 16);       \
454  u[4] = m[4] ^ (s[0] & 0xffff0000) \
455              ^ (s[0] << 16)        \
456              ^ (s[0] >> 16)        \
457              ^ (s[1] & 0xffff0000) \
458              ^ (s[1] >> 16)        \
459              ^ (s[2] << 16)        \
460              ^ (s[2] >> 16)        \
461              ^ (s[3] << 16)        \
462              ^ (s[3] >> 16)        \
463              ^ (s[4] << 16)        \
464              ^ (s[6] << 16)        \
465              ^ (s[6] >> 16)        \
466              ^ (s[7] & 0x0000ffff) \
467              ^ (s[7] << 16)        \
468              ^ (s[7] >> 16);       \
469  u[5] = m[5] ^ (s[0] << 16)        \
470              ^ (s[0] >> 16)        \
471              ^ (s[0] & 0xffff0000) \
472              ^ (s[1] & 0x0000ffff) \
473              ^ s[2]                \
474              ^ (s[2] >> 16)        \
475              ^ (s[3] << 16)        \
476              ^ (s[3] >> 16)        \
477              ^ (s[4] << 16)        \
478              ^ (s[4] >> 16)        \
479              ^ (s[5] << 16)        \
480              ^ (s[6] << 16)        \
481              ^ (s[6] >> 16)        \
482              ^ (s[7] & 0xffff0000) \
483              ^ (s[7] << 16)        \
484              ^ (s[7] >> 16);       \
485  u[6] = m[6] ^ s[0]                \
486              ^ (s[1] >> 16)        \
487              ^ (s[2] << 16)        \
488              ^ s[3]                \
489              ^ (s[3] >> 16)        \
490              ^ (s[4] << 16)        \
491              ^ (s[4] >> 16)        \
492              ^ (s[5] << 16)        \
493              ^ (s[5] >> 16)        \
494              ^ s[6]                \
495              ^ (s[6] << 16)        \
496              ^ (s[6] >> 16)        \
497              ^ (s[7] << 16);       \
498  u[7] = m[7] ^ (s[0] & 0xffff0000) \
499              ^ (s[0] << 16)        \
500              ^ (s[1] & 0x0000ffff) \
501              ^ (s[1] << 16)        \
502              ^ (s[2] >> 16)        \
503              ^ (s[3] << 16)        \
504              ^ s[4]                \
505              ^ (s[4] >> 16)        \
506              ^ (s[5] << 16)        \
507              ^ (s[5] >> 16)        \
508              ^ (s[6] >> 16)        \
509              ^ (s[7] & 0x0000ffff) \
510              ^ (s[7] << 16)        \
511              ^ (s[7] >> 16);
512
513#define SHIFT16(h,v,u)              \
514  v[0] = h[0] ^ (u[1] << 16)        \
515              ^ (u[0] >> 16);       \
516  v[1] = h[1] ^ (u[2] << 16)        \
517              ^ (u[1] >> 16);       \
518  v[2] = h[2] ^ (u[3] << 16)        \
519              ^ (u[2] >> 16);       \
520  v[3] = h[3] ^ (u[4] << 16)        \
521              ^ (u[3] >> 16);       \
522  v[4] = h[4] ^ (u[5] << 16)        \
523              ^ (u[4] >> 16);       \
524  v[5] = h[5] ^ (u[6] << 16)        \
525              ^ (u[5] >> 16);       \
526  v[6] = h[6] ^ (u[7] << 16)        \
527              ^ (u[6] >> 16);       \
528  v[7] = h[7] ^ (u[0] & 0xffff0000) \
529              ^ (u[0] << 16)        \
530              ^ (u[7] >> 16)        \
531              ^ (u[1] & 0xffff0000) \
532              ^ (u[1] << 16)        \
533              ^ (u[6] << 16)        \
534              ^ (u[7] & 0xffff0000);
535
536#define SHIFT61(h,v)          \
537  h[0] = (v[0] & 0xffff0000)  \
538       ^ (v[0] << 16)         \
539       ^ (v[0] >> 16)         \
540       ^ (v[1] >> 16)         \
541       ^ (v[1] & 0xffff0000)  \
542       ^ (v[2] << 16)         \
543       ^ (v[3] >> 16)         \
544       ^ (v[4] << 16)         \
545       ^ (v[5] >> 16)         \
546       ^ v[5]                 \
547       ^ (v[6] >> 16)         \
548       ^ (v[7] << 16)         \
549       ^ (v[7] >> 16)         \
550       ^ (v[7] & 0x0000ffff); \
551  h[1] = (v[0] << 16)         \
552       ^ (v[0] >> 16)         \
553       ^ (v[0] & 0xffff0000)  \
554       ^ (v[1] & 0x0000ffff)  \
555       ^ v[2]                 \
556       ^ (v[2] >> 16)         \
557       ^ (v[3] << 16)         \
558       ^ (v[4] >> 16)         \
559       ^ (v[5] << 16)         \
560       ^ (v[6] << 16)         \
561       ^ v[6]                 \
562       ^ (v[7] & 0xffff0000)  \
563       ^ (v[7] >> 16);        \
564  h[2] = (v[0] & 0x0000ffff)  \
565       ^ (v[0] << 16)         \
566       ^ (v[1] << 16)         \
567       ^ (v[1] >> 16)         \
568       ^ (v[1] & 0xffff0000)  \
569       ^ (v[2] << 16)         \
570       ^ (v[3] >> 16)         \
571       ^ v[3]                 \
572       ^ (v[4] << 16)         \
573       ^ (v[5] >> 16)         \
574       ^ v[6]                 \
575       ^ (v[6] >> 16)         \
576       ^ (v[7] & 0x0000ffff)  \
577       ^ (v[7] << 16)         \
578       ^ (v[7] >> 16);        \
579  h[3] = (v[0] << 16)         \
580       ^ (v[0] >> 16)         \
581       ^ (v[0] & 0xffff0000)  \
582       ^ (v[1] & 0xffff0000)  \
583       ^ (v[1] >> 16)         \
584       ^ (v[2] << 16)         \
585       ^ (v[2] >> 16)         \
586       ^ v[2]                 \
587       ^ (v[3] << 16)         \
588       ^ (v[4] >> 16)         \
589       ^ v[4]                 \
590       ^ (v[5] << 16)         \
591       ^ (v[6] << 16)         \
592       ^ (v[7] & 0x0000ffff)  \
593       ^ (v[7] >> 16);        \
594  h[4] = (v[0] >> 16)         \
595       ^ (v[1] << 16)         \
596       ^ v[1]                 \
597       ^ (v[2] >> 16)         \
598       ^ v[2]                 \
599       ^ (v[3] << 16)         \
600       ^ (v[3] >> 16)         \
601       ^ v[3]                 \
602       ^ (v[4] << 16)         \
603       ^ (v[5] >> 16)         \
604       ^ v[5]                 \
605       ^ (v[6] << 16)         \
606       ^ (v[6] >> 16)         \
607       ^ (v[7] << 16);        \
608  h[5] = (v[0] << 16)         \
609       ^ (v[0] & 0xffff0000)  \
610       ^ (v[1] << 16)         \
611       ^ (v[1] >> 16)         \
612       ^ (v[1] & 0xffff0000)  \
613       ^ (v[2] << 16)         \
614       ^ v[2]                 \
615       ^ (v[3] >> 16)         \
616       ^ v[3]                 \
617       ^ (v[4] << 16)         \
618       ^ (v[4] >> 16)         \
619       ^ v[4]                 \
620       ^ (v[5] << 16)         \
621       ^ (v[6] << 16)         \
622       ^ (v[6] >> 16)         \
623       ^ v[6]                 \
624       ^ (v[7] << 16)         \
625       ^ (v[7] >> 16)         \
626       ^ (v[7] & 0xffff0000); \
627  h[6] = v[0]                 \
628       ^ v[2]                 \
629       ^ (v[2] >> 16)         \
630       ^ v[3]                 \
631       ^ (v[3] << 16)         \
632       ^ v[4]                 \
633       ^ (v[4] >> 16)         \
634       ^ (v[5] << 16)         \
635       ^ (v[5] >> 16)         \
636       ^ v[5]                 \
637       ^ (v[6] << 16)         \
638       ^ (v[6] >> 16)         \
639       ^ v[6]                 \
640       ^ (v[7] << 16)         \
641       ^ v[7];                \
642  h[7] = v[0]                 \
643       ^ (v[0] >> 16)         \
644       ^ (v[1] << 16)         \
645       ^ (v[1] >> 16)         \
646       ^ (v[2] << 16)         \
647       ^ (v[3] >> 16)         \
648       ^ v[3]                 \
649       ^ (v[4] << 16)         \
650       ^ v[4]                 \
651       ^ (v[5] >> 16)         \
652       ^ v[5]                 \
653       ^ (v[6] << 16)         \
654       ^ (v[6] >> 16)         \
655       ^ (v[7] << 16)         \
656       ^ v[7];
657
658#define PASS0(h,s,u,v,t)  \
659{                         \
660  u32x k[8];              \
661  u32x w[8];              \
662  X (w, u, v);            \
663  P (k, w);               \
664  R (k, h, s, 0, t);      \
665  A (u);                  \
666  AA (v);                 \
667}
668
669#define PASS2(h,s,u,v,t)  \
670{                         \
671  u32x k[8];              \
672  u32x w[8];              \
673  X (w, u, v);            \
674  P (k, w);               \
675  R (k, h, s, 2, t);      \
676  A (u);                  \
677  C (u);                  \
678  AA (v);                 \
679}
680
681#define PASS4(h,s,u,v,t)  \
682{                         \
683  u32x k[8];              \
684  u32x w[8];              \
685  X (w, u, v);            \
686  P (k, w);               \
687  R (k, h, s, 4, t);      \
688  A (u);                  \
689  AA (v);                 \
690}
691
692#define PASS6(h,s,u,v,t)  \
693{                         \
694  u32x k[8];              \
695  u32x w[8];              \
696  X (w, u, v);            \
697  P (k, w);               \
698  R (k, h, s, 6, t);      \
699}
700
701KERNEL_FQ void m06900_m04 (KERN_ATTR_RULES ())
702{
703  /**
704   * base
705   */
706
707  const u64 gid = get_global_id (0);
708  const u64 lid = get_local_id (0);
709  const u64 lsz = get_local_size (0);
710
711  /**
712   * sbox
713   */
714
715  LOCAL_VK u32 s_tables[4][256];
716
717  for (u32 i = lid; i < 256; i += lsz)
718  {
719    s_tables[0][i] = c_tables[0][i];
720    s_tables[1][i] = c_tables[1][i];
721    s_tables[2][i] = c_tables[2][i];
722    s_tables[3][i] = c_tables[3][i];
723  }
724
725  SYNC_THREADS ();
726
727  if (gid >= gid_max) return;
728
729  /**
730   * base
731   */
732
733  u32 pw_buf0[4];
734  u32 pw_buf1[4];
735
736  pw_buf0[0] = pws[gid].i[0];
737  pw_buf0[1] = pws[gid].i[1];
738  pw_buf0[2] = pws[gid].i[2];
739  pw_buf0[3] = pws[gid].i[3];
740  pw_buf1[0] = pws[gid].i[4];
741  pw_buf1[1] = pws[gid].i[5];
742  pw_buf1[2] = pws[gid].i[6];
743  pw_buf1[3] = pws[gid].i[7];
744
745  const u32 pw_len = pws[gid].pw_len & 63;
746
747  /**
748   * loop
749   */
750
751  for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
752  {
753    u32x w0[4] = { 0 };
754    u32x w1[4] = { 0 };
755    u32x w2[4] = { 0 };
756    u32x w3[4] = { 0 };
757
758    const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
759
760    u32x data[8];
761
762    data[0] = w0[0];
763    data[1] = w0[1];
764    data[2] = w0[2];
765    data[3] = w0[3];
766    data[4] = w1[0];
767    data[5] = w1[1];
768    data[6] = w1[2];
769    data[7] = w1[3];
770
771    u32x state[16];
772
773    state[ 0] = 0;
774    state[ 1] = 0;
775    state[ 2] = 0;
776    state[ 3] = 0;
777    state[ 4] = 0;
778    state[ 5] = 0;
779    state[ 6] = 0;
780    state[ 7] = 0;
781    state[ 8] = data[0];
782    state[ 9] = data[1];
783    state[10] = data[2];
784    state[11] = data[3];
785    state[12] = data[4];
786    state[13] = data[5];
787    state[14] = data[6];
788    state[15] = data[7];
789
790    u32x state_m[8];
791    u32x data_m[8];
792
793    /* gost1 */
794
795    state_m[0] = state[0];
796    state_m[1] = state[1];
797    state_m[2] = state[2];
798    state_m[3] = state[3];
799    state_m[4] = state[4];
800    state_m[5] = state[5];
801    state_m[6] = state[6];
802    state_m[7] = state[7];
803
804    data_m[0] = data[0];
805    data_m[1] = data[1];
806    data_m[2] = data[2];
807    data_m[3] = data[3];
808    data_m[4] = data[4];
809    data_m[5] = data[5];
810    data_m[6] = data[6];
811    data_m[7] = data[7];
812
813    u32x tmp[8];
814
815    //if (pw_len > 0) // not really SIMD compatible
816    {
817      PASS0 (state, tmp, state_m, data_m, s_tables);
818      PASS2 (state, tmp, state_m, data_m, s_tables);
819      PASS4 (state, tmp, state_m, data_m, s_tables);
820      PASS6 (state, tmp, state_m, data_m, s_tables);
821
822      SHIFT12 (state_m, data, tmp);
823      SHIFT16 (state, data_m, state_m);
824      SHIFT61 (state, data_m);
825    }
826
827    data[0] = out_len * 8;
828    data[1] = 0;
829    data[2] = 0;
830    data[3] = 0;
831    data[4] = 0;
832    data[5] = 0;
833    data[6] = 0;
834    data[7] = 0;
835
836    /* gost2 */
837
838    state_m[0] = state[0];
839    state_m[1] = state[1];
840    state_m[2] = state[2];
841    state_m[3] = state[3];
842    state_m[4] = state[4];
843    state_m[5] = state[5];
844    state_m[6] = state[6];
845    state_m[7] = state[7];
846
847    data_m[0] = data[0];
848    data_m[1] = data[1];
849    data_m[2] = data[2];
850    data_m[3] = data[3];
851    data_m[4] = data[4];
852    data_m[5] = data[5];
853    data_m[6] = data[6];
854    data_m[7] = data[7];
855
856    PASS0 (state, tmp, state_m, data_m, s_tables);
857    PASS2 (state, tmp, state_m, data_m, s_tables);
858    PASS4 (state, tmp, state_m, data_m, s_tables);
859    PASS6 (state, tmp, state_m, data_m, s_tables);
860
861    SHIFT12 (state_m, data, tmp);
862    SHIFT16 (state, data_m, state_m);
863    SHIFT61 (state, data_m);
864
865    /* gost3 */
866
867    data[0] = state[ 8];
868    data[1] = state[ 9];
869    data[2] = state[10];
870    data[3] = state[11];
871    data[4] = state[12];
872    data[5] = state[13];
873    data[6] = state[14];
874    data[7] = state[15];
875
876    state_m[0] = state[0];
877    state_m[1] = state[1];
878    state_m[2] = state[2];
879    state_m[3] = state[3];
880    state_m[4] = state[4];
881    state_m[5] = state[5];
882    state_m[6] = state[6];
883    state_m[7] = state[7];
884
885    data_m[0] = data[0];
886    data_m[1] = data[1];
887    data_m[2] = data[2];
888    data_m[3] = data[3];
889    data_m[4] = data[4];
890    data_m[5] = data[5];
891    data_m[6] = data[6];
892    data_m[7] = data[7];
893
894    PASS0 (state, tmp, state_m, data_m, s_tables);
895    PASS2 (state, tmp, state_m, data_m, s_tables);
896    PASS4 (state, tmp, state_m, data_m, s_tables);
897    PASS6 (state, tmp, state_m, data_m, s_tables);
898
899    SHIFT12 (state_m, data, tmp);
900    SHIFT16 (state, data_m, state_m);
901    SHIFT61 (state, data_m);
902
903    /* store */
904
905    COMPARE_M_SIMD (state[0], state[1], state[2], state[3]);
906  }
907}
908
909KERNEL_FQ void m06900_m08 (KERN_ATTR_RULES ())
910{
911}
912
913KERNEL_FQ void m06900_m16 (KERN_ATTR_RULES ())
914{
915}
916
917KERNEL_FQ void m06900_s04 (KERN_ATTR_RULES ())
918{
919  /**
920   * base
921   */
922
923  const u64 gid = get_global_id (0);
924  const u64 lid = get_local_id (0);
925  const u64 lsz = get_local_size (0);
926
927  /**
928   * sbox
929   */
930
931  LOCAL_VK u32 s_tables[4][256];
932
933  for (u32 i = lid; i < 256; i += lsz)
934  {
935    s_tables[0][i] = c_tables[0][i];
936    s_tables[1][i] = c_tables[1][i];
937    s_tables[2][i] = c_tables[2][i];
938    s_tables[3][i] = c_tables[3][i];
939  }
940
941  SYNC_THREADS ();
942
943  if (gid >= gid_max) return;
944
945  /**
946   * base
947   */
948
949  u32 pw_buf0[4];
950  u32 pw_buf1[4];
951
952  pw_buf0[0] = pws[gid].i[0];
953  pw_buf0[1] = pws[gid].i[1];
954  pw_buf0[2] = pws[gid].i[2];
955  pw_buf0[3] = pws[gid].i[3];
956  pw_buf1[0] = pws[gid].i[4];
957  pw_buf1[1] = pws[gid].i[5];
958  pw_buf1[2] = pws[gid].i[6];
959  pw_buf1[3] = pws[gid].i[7];
960
961  const u32 pw_len = pws[gid].pw_len & 63;
962
963  /**
964   * digest
965   */
966
967  const u32 search[4] =
968  {
969    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
970    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
971    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
972    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
973  };
974
975  /**
976   * loop
977   */
978
979  for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
980  {
981    u32x w0[4] = { 0 };
982    u32x w1[4] = { 0 };
983    u32x w2[4] = { 0 };
984    u32x w3[4] = { 0 };
985
986    const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
987
988    /**
989     * GOST
990     */
991
992    u32x data[8];
993
994    data[0] = w0[0];
995    data[1] = w0[1];
996    data[2] = w0[2];
997    data[3] = w0[3];
998    data[4] = w1[0];
999    data[5] = w1[1];
1000    data[6] = w1[2];
1001    data[7] = w1[3];
1002
1003    u32x state[16];
1004
1005    state[ 0] = 0;
1006    state[ 1] = 0;
1007    state[ 2] = 0;
1008    state[ 3] = 0;
1009    state[ 4] = 0;
1010    state[ 5] = 0;
1011    state[ 6] = 0;
1012    state[ 7] = 0;
1013    state[ 8] = data[0];
1014    state[ 9] = data[1];
1015    state[10] = data[2];
1016    state[11] = data[3];
1017    state[12] = data[4];
1018    state[13] = data[5];
1019    state[14] = data[6];
1020    state[15] = data[7];
1021
1022    u32x state_m[8];
1023    u32x data_m[8];
1024
1025    /* gost1 */
1026
1027    state_m[0] = state[0];
1028    state_m[1] = state[1];
1029    state_m[2] = state[2];
1030    state_m[3] = state[3];
1031    state_m[4] = state[4];
1032    state_m[5] = state[5];
1033    state_m[6] = state[6];
1034    state_m[7] = state[7];
1035
1036    data_m[0] = data[0];
1037    data_m[1] = data[1];
1038    data_m[2] = data[2];
1039    data_m[3] = data[3];
1040    data_m[4] = data[4];
1041    data_m[5] = data[5];
1042    data_m[6] = data[6];
1043    data_m[7] = data[7];
1044
1045    u32x tmp[8];
1046
1047    //if (pw_len > 0) // not really SIMD compatible
1048    {
1049      PASS0 (state, tmp, state_m, data_m, s_tables);
1050      PASS2 (state, tmp, state_m, data_m, s_tables);
1051      PASS4 (state, tmp, state_m, data_m, s_tables);
1052      PASS6 (state, tmp, state_m, data_m, s_tables);
1053
1054      SHIFT12 (state_m, data, tmp);
1055      SHIFT16 (state, data_m, state_m);
1056      SHIFT61 (state, data_m);
1057    }
1058
1059    data[0] = out_len * 8;
1060    data[1] = 0;
1061    data[2] = 0;
1062    data[3] = 0;
1063    data[4] = 0;
1064    data[5] = 0;
1065    data[6] = 0;
1066    data[7] = 0;
1067
1068    /* gost2 */
1069
1070    state_m[0] = state[0];
1071    state_m[1] = state[1];
1072    state_m[2] = state[2];
1073    state_m[3] = state[3];
1074    state_m[4] = state[4];
1075    state_m[5] = state[5];
1076    state_m[6] = state[6];
1077    state_m[7] = state[7];
1078
1079    data_m[0] = data[0];
1080    data_m[1] = data[1];
1081    data_m[2] = data[2];
1082    data_m[3] = data[3];
1083    data_m[4] = data[4];
1084    data_m[5] = data[5];
1085    data_m[6] = data[6];
1086    data_m[7] = data[7];
1087
1088    PASS0 (state, tmp, state_m, data_m, s_tables);
1089    PASS2 (state, tmp, state_m, data_m, s_tables);
1090    PASS4 (state, tmp, state_m, data_m, s_tables);
1091    PASS6 (state, tmp, state_m, data_m, s_tables);
1092
1093    SHIFT12 (state_m, data, tmp);
1094    SHIFT16 (state, data_m, state_m);
1095    SHIFT61 (state, data_m);
1096
1097    /* gost3 */
1098
1099    data[0] = state[ 8];
1100    data[1] = state[ 9];
1101    data[2] = state[10];
1102    data[3] = state[11];
1103    data[4] = state[12];
1104    data[5] = state[13];
1105    data[6] = state[14];
1106    data[7] = state[15];
1107
1108    state_m[0] = state[0];
1109    state_m[1] = state[1];
1110    state_m[2] = state[2];
1111    state_m[3] = state[3];
1112    state_m[4] = state[4];
1113    state_m[5] = state[5];
1114    state_m[6] = state[6];
1115    state_m[7] = state[7];
1116
1117    data_m[0] = data[0];
1118    data_m[1] = data[1];
1119    data_m[2] = data[2];
1120    data_m[3] = data[3];
1121    data_m[4] = data[4];
1122    data_m[5] = data[5];
1123    data_m[6] = data[6];
1124    data_m[7] = data[7];
1125
1126    PASS0 (state, tmp, state_m, data_m, s_tables);
1127    PASS2 (state, tmp, state_m, data_m, s_tables);
1128    PASS4 (state, tmp, state_m, data_m, s_tables);
1129    PASS6 (state, tmp, state_m, data_m, s_tables);
1130
1131    SHIFT12 (state_m, data, tmp);
1132    SHIFT16 (state, data_m, state_m);
1133    SHIFT61 (state, data_m);
1134
1135    /* store */
1136
1137    COMPARE_S_SIMD (state[0], state[1], state[2], state[3]);
1138  }
1139}
1140
1141KERNEL_FQ void m06900_s08 (KERN_ATTR_RULES ())
1142{
1143}
1144
1145KERNEL_FQ void m06900_s16 (KERN_ATTR_RULES ())
1146{
1147}
1148