1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 */ 5 6#define NEW_SIMD_CODE 7 8#ifdef KERNEL_STATIC 9#include "inc_vendor.h" 10#include "inc_types.h" 11#include "inc_platform.cl" 12#include "inc_common.cl" 13#include "inc_rp_optimized.h" 14#include "inc_rp_optimized.cl" 15#include "inc_simd.cl" 16#endif 17 18CONSTANT_VK u32a c_tables[4][256] = 19{ 20 { 21 0x00072000, 0x00075000, 0x00074800, 0x00071000, 22 0x00076800, 0x00074000, 0x00070000, 0x00077000, 23 0x00073000, 0x00075800, 0x00070800, 0x00076000, 24 0x00073800, 0x00077800, 0x00072800, 0x00071800, 25 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000, 26 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000, 27 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000, 28 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800, 29 0x00022000, 0x00025000, 0x00024800, 0x00021000, 30 0x00026800, 0x00024000, 0x00020000, 0x00027000, 31 0x00023000, 0x00025800, 0x00020800, 0x00026000, 32 0x00023800, 0x00027800, 0x00022800, 0x00021800, 33 0x00062000, 0x00065000, 0x00064800, 0x00061000, 34 0x00066800, 0x00064000, 0x00060000, 0x00067000, 35 0x00063000, 0x00065800, 0x00060800, 0x00066000, 36 0x00063800, 0x00067800, 0x00062800, 0x00061800, 37 0x00032000, 0x00035000, 0x00034800, 0x00031000, 38 0x00036800, 0x00034000, 0x00030000, 0x00037000, 39 0x00033000, 0x00035800, 0x00030800, 0x00036000, 40 0x00033800, 0x00037800, 0x00032800, 0x00031800, 41 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000, 42 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000, 43 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000, 44 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800, 45 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000, 46 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000, 47 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000, 48 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800, 49 0x00052000, 0x00055000, 0x00054800, 0x00051000, 50 0x00056800, 0x00054000, 0x00050000, 0x00057000, 51 0x00053000, 0x00055800, 0x00050800, 0x00056000, 52 0x00053800, 0x00057800, 0x00052800, 0x00051800, 53 0x00012000, 0x00015000, 0x00014800, 0x00011000, 54 0x00016800, 0x00014000, 0x00010000, 0x00017000, 55 0x00013000, 0x00015800, 0x00010800, 0x00016000, 56 0x00013800, 0x00017800, 0x00012800, 0x00011800, 57 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000, 58 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000, 59 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000, 60 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800, 61 0x00042000, 0x00045000, 0x00044800, 0x00041000, 62 0x00046800, 0x00044000, 0x00040000, 0x00047000, 63 0x00043000, 0x00045800, 0x00040800, 0x00046000, 64 0x00043800, 0x00047800, 0x00042800, 0x00041800, 65 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000, 66 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000, 67 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000, 68 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800, 69 0x00002000, 0x00005000, 0x00004800, 0x00001000, 70 0x00006800, 0x00004000, 0x00000000, 0x00007000, 71 0x00003000, 0x00005800, 0x00000800, 0x00006000, 72 0x00003800, 0x00007800, 0x00002800, 0x00001800, 73 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000, 74 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000, 75 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000, 76 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800, 77 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000, 78 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000, 79 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000, 80 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800, 81 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000, 82 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000, 83 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000, 84 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800, 85 }, 86 { 87 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000, 88 0x03d00000, 0x03980000, 0x03a00000, 0x03900000, 89 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000, 90 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000, 91 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000, 92 0x06d00000, 0x06980000, 0x06a00000, 0x06900000, 93 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000, 94 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000, 95 0x05280000, 0x05400000, 0x05080000, 0x05680000, 96 0x05500000, 0x05180000, 0x05200000, 0x05100000, 97 0x05700000, 0x05780000, 0x05600000, 0x05380000, 98 0x05300000, 0x05000000, 0x05480000, 0x05580000, 99 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000, 100 0x00d00000, 0x00980000, 0x00a00000, 0x00900000, 101 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000, 102 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000, 103 0x00280000, 0x00400000, 0x00080000, 0x00680000, 104 0x00500000, 0x00180000, 0x00200000, 0x00100000, 105 0x00700000, 0x00780000, 0x00600000, 0x00380000, 106 0x00300000, 0x00000000, 0x00480000, 0x00580000, 107 0x04280000, 0x04400000, 0x04080000, 0x04680000, 108 0x04500000, 0x04180000, 0x04200000, 0x04100000, 109 0x04700000, 0x04780000, 0x04600000, 0x04380000, 110 0x04300000, 0x04000000, 0x04480000, 0x04580000, 111 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000, 112 0x04d00000, 0x04980000, 0x04a00000, 0x04900000, 113 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000, 114 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000, 115 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000, 116 0x07d00000, 0x07980000, 0x07a00000, 0x07900000, 117 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000, 118 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000, 119 0x07280000, 0x07400000, 0x07080000, 0x07680000, 120 0x07500000, 0x07180000, 0x07200000, 0x07100000, 121 0x07700000, 0x07780000, 0x07600000, 0x07380000, 122 0x07300000, 0x07000000, 0x07480000, 0x07580000, 123 0x02280000, 0x02400000, 0x02080000, 0x02680000, 124 0x02500000, 0x02180000, 0x02200000, 0x02100000, 125 0x02700000, 0x02780000, 0x02600000, 0x02380000, 126 0x02300000, 0x02000000, 0x02480000, 0x02580000, 127 0x03280000, 0x03400000, 0x03080000, 0x03680000, 128 0x03500000, 0x03180000, 0x03200000, 0x03100000, 129 0x03700000, 0x03780000, 0x03600000, 0x03380000, 130 0x03300000, 0x03000000, 0x03480000, 0x03580000, 131 0x06280000, 0x06400000, 0x06080000, 0x06680000, 132 0x06500000, 0x06180000, 0x06200000, 0x06100000, 133 0x06700000, 0x06780000, 0x06600000, 0x06380000, 134 0x06300000, 0x06000000, 0x06480000, 0x06580000, 135 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000, 136 0x05d00000, 0x05980000, 0x05a00000, 0x05900000, 137 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000, 138 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000, 139 0x01280000, 0x01400000, 0x01080000, 0x01680000, 140 0x01500000, 0x01180000, 0x01200000, 0x01100000, 141 0x01700000, 0x01780000, 0x01600000, 0x01380000, 142 0x01300000, 0x01000000, 0x01480000, 0x01580000, 143 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000, 144 0x02d00000, 0x02980000, 0x02a00000, 0x02900000, 145 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000, 146 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000, 147 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000, 148 0x01d00000, 0x01980000, 0x01a00000, 0x01900000, 149 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000, 150 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000, 151 }, 152 { 153 0x30000002, 0x60000002, 0x38000002, 0x08000002, 154 0x28000002, 0x78000002, 0x68000002, 0x40000002, 155 0x20000002, 0x50000002, 0x48000002, 0x70000002, 156 0x00000002, 0x18000002, 0x58000002, 0x10000002, 157 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005, 158 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005, 159 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005, 160 0x80000005, 0x98000005, 0xd8000005, 0x90000005, 161 0x30000005, 0x60000005, 0x38000005, 0x08000005, 162 0x28000005, 0x78000005, 0x68000005, 0x40000005, 163 0x20000005, 0x50000005, 0x48000005, 0x70000005, 164 0x00000005, 0x18000005, 0x58000005, 0x10000005, 165 0x30000000, 0x60000000, 0x38000000, 0x08000000, 166 0x28000000, 0x78000000, 0x68000000, 0x40000000, 167 0x20000000, 0x50000000, 0x48000000, 0x70000000, 168 0x00000000, 0x18000000, 0x58000000, 0x10000000, 169 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003, 170 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003, 171 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003, 172 0x80000003, 0x98000003, 0xd8000003, 0x90000003, 173 0x30000001, 0x60000001, 0x38000001, 0x08000001, 174 0x28000001, 0x78000001, 0x68000001, 0x40000001, 175 0x20000001, 0x50000001, 0x48000001, 0x70000001, 176 0x00000001, 0x18000001, 0x58000001, 0x10000001, 177 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000, 178 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000, 179 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000, 180 0x80000000, 0x98000000, 0xd8000000, 0x90000000, 181 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006, 182 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006, 183 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006, 184 0x80000006, 0x98000006, 0xd8000006, 0x90000006, 185 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001, 186 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001, 187 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001, 188 0x80000001, 0x98000001, 0xd8000001, 0x90000001, 189 0x30000003, 0x60000003, 0x38000003, 0x08000003, 190 0x28000003, 0x78000003, 0x68000003, 0x40000003, 191 0x20000003, 0x50000003, 0x48000003, 0x70000003, 192 0x00000003, 0x18000003, 0x58000003, 0x10000003, 193 0x30000004, 0x60000004, 0x38000004, 0x08000004, 194 0x28000004, 0x78000004, 0x68000004, 0x40000004, 195 0x20000004, 0x50000004, 0x48000004, 0x70000004, 196 0x00000004, 0x18000004, 0x58000004, 0x10000004, 197 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002, 198 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002, 199 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002, 200 0x80000002, 0x98000002, 0xd8000002, 0x90000002, 201 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004, 202 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004, 203 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004, 204 0x80000004, 0x98000004, 0xd8000004, 0x90000004, 205 0x30000006, 0x60000006, 0x38000006, 0x08000006, 206 0x28000006, 0x78000006, 0x68000006, 0x40000006, 207 0x20000006, 0x50000006, 0x48000006, 0x70000006, 208 0x00000006, 0x18000006, 0x58000006, 0x10000006, 209 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007, 210 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007, 211 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007, 212 0x80000007, 0x98000007, 0xd8000007, 0x90000007, 213 0x30000007, 0x60000007, 0x38000007, 0x08000007, 214 0x28000007, 0x78000007, 0x68000007, 0x40000007, 215 0x20000007, 0x50000007, 0x48000007, 0x70000007, 216 0x00000007, 0x18000007, 0x58000007, 0x10000007, 217 }, 218 { 219 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088, 220 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8, 221 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8, 222 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0, 223 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788, 224 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8, 225 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8, 226 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0, 227 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688, 228 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8, 229 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8, 230 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0, 231 0x00000068, 0x00000058, 0x00000020, 0x00000008, 232 0x00000018, 0x00000078, 0x00000028, 0x00000048, 233 0x00000000, 0x00000050, 0x00000070, 0x00000038, 234 0x00000030, 0x00000040, 0x00000010, 0x00000060, 235 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288, 236 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8, 237 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8, 238 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0, 239 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388, 240 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8, 241 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8, 242 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0, 243 0x00000568, 0x00000558, 0x00000520, 0x00000508, 244 0x00000518, 0x00000578, 0x00000528, 0x00000548, 245 0x00000500, 0x00000550, 0x00000570, 0x00000538, 246 0x00000530, 0x00000540, 0x00000510, 0x00000560, 247 0x00000268, 0x00000258, 0x00000220, 0x00000208, 248 0x00000218, 0x00000278, 0x00000228, 0x00000248, 249 0x00000200, 0x00000250, 0x00000270, 0x00000238, 250 0x00000230, 0x00000240, 0x00000210, 0x00000260, 251 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488, 252 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8, 253 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8, 254 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0, 255 0x00000168, 0x00000158, 0x00000120, 0x00000108, 256 0x00000118, 0x00000178, 0x00000128, 0x00000148, 257 0x00000100, 0x00000150, 0x00000170, 0x00000138, 258 0x00000130, 0x00000140, 0x00000110, 0x00000160, 259 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188, 260 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8, 261 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8, 262 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0, 263 0x00000768, 0x00000758, 0x00000720, 0x00000708, 264 0x00000718, 0x00000778, 0x00000728, 0x00000748, 265 0x00000700, 0x00000750, 0x00000770, 0x00000738, 266 0x00000730, 0x00000740, 0x00000710, 0x00000760, 267 0x00000368, 0x00000358, 0x00000320, 0x00000308, 268 0x00000318, 0x00000378, 0x00000328, 0x00000348, 269 0x00000300, 0x00000350, 0x00000370, 0x00000338, 270 0x00000330, 0x00000340, 0x00000310, 0x00000360, 271 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588, 272 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8, 273 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8, 274 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0, 275 0x00000468, 0x00000458, 0x00000420, 0x00000408, 276 0x00000418, 0x00000478, 0x00000428, 0x00000448, 277 0x00000400, 0x00000450, 0x00000470, 0x00000438, 278 0x00000430, 0x00000440, 0x00000410, 0x00000460, 279 0x00000668, 0x00000658, 0x00000620, 0x00000608, 280 0x00000618, 0x00000678, 0x00000628, 0x00000648, 281 0x00000600, 0x00000650, 0x00000670, 0x00000638, 282 0x00000630, 0x00000640, 0x00000610, 0x00000660, 283 } 284}; 285 286#if VECT_SIZE == 1 287#define BOX(i,n,S) (S)[(n)][(i)] 288#elif VECT_SIZE == 2 289#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) 290#elif VECT_SIZE == 4 291#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) 292#elif VECT_SIZE == 8 293#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) 294#elif VECT_SIZE == 16 295#define BOX(i,n,S) make_u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) 296#endif 297 298#define _round(k1,k2,tbl) \ 299{ \ 300 u32x t; \ 301 t = (k1) + r; \ 302 l ^= BOX (((t >> 0) & 0xff), 0, tbl) ^ \ 303 BOX (((t >> 8) & 0xff), 1, tbl) ^ \ 304 BOX (((t >> 16) & 0xff), 2, tbl) ^ \ 305 BOX (((t >> 24) & 0xff), 3, tbl); \ 306 t = (k2) + l; \ 307 r ^= BOX (((t >> 0) & 0xff), 0, tbl) ^ \ 308 BOX (((t >> 8) & 0xff), 1, tbl) ^ \ 309 BOX (((t >> 16) & 0xff), 2, tbl) ^ \ 310 BOX (((t >> 24) & 0xff), 3, tbl); \ 311} 312 313#define R(k,h,s,i,t) \ 314{ \ 315 u32x r; \ 316 u32x l; \ 317 r = h[i + 0]; \ 318 l = h[i + 1]; \ 319 _round (k[0], k[1], t); \ 320 _round (k[2], k[3], t); \ 321 _round (k[4], k[5], t); \ 322 _round (k[6], k[7], t); \ 323 _round (k[0], k[1], t); \ 324 _round (k[2], k[3], t); \ 325 _round (k[4], k[5], t); \ 326 _round (k[6], k[7], t); \ 327 _round (k[0], k[1], t); \ 328 _round (k[2], k[3], t); \ 329 _round (k[4], k[5], t); \ 330 _round (k[6], k[7], t); \ 331 _round (k[7], k[6], t); \ 332 _round (k[5], k[4], t); \ 333 _round (k[3], k[2], t); \ 334 _round (k[1], k[0], t); \ 335 s[i + 0] = l; \ 336 s[i + 1] = r; \ 337} 338 339#define X(w,u,v) \ 340 w[0] = u[0] ^ v[0]; \ 341 w[1] = u[1] ^ v[1]; \ 342 w[2] = u[2] ^ v[2]; \ 343 w[3] = u[3] ^ v[3]; \ 344 w[4] = u[4] ^ v[4]; \ 345 w[5] = u[5] ^ v[5]; \ 346 w[6] = u[6] ^ v[6]; \ 347 w[7] = u[7] ^ v[7]; 348 349#define P(k,w) \ 350 k[0] = ((w[0] & 0x000000ff) << 0) \ 351 | ((w[2] & 0x000000ff) << 8) \ 352 | ((w[4] & 0x000000ff) << 16) \ 353 | ((w[6] & 0x000000ff) << 24); \ 354 k[1] = ((w[0] & 0x0000ff00) >> 8) \ 355 | ((w[2] & 0x0000ff00) >> 0) \ 356 | ((w[4] & 0x0000ff00) << 8) \ 357 | ((w[6] & 0x0000ff00) << 16); \ 358 k[2] = ((w[0] & 0x00ff0000) >> 16) \ 359 | ((w[2] & 0x00ff0000) >> 8) \ 360 | ((w[4] & 0x00ff0000) << 0) \ 361 | ((w[6] & 0x00ff0000) << 8); \ 362 k[3] = ((w[0] & 0xff000000) >> 24) \ 363 | ((w[2] & 0xff000000) >> 16) \ 364 | ((w[4] & 0xff000000) >> 8) \ 365 | ((w[6] & 0xff000000) >> 0); \ 366 k[4] = ((w[1] & 0x000000ff) << 0) \ 367 | ((w[3] & 0x000000ff) << 8) \ 368 | ((w[5] & 0x000000ff) << 16) \ 369 | ((w[7] & 0x000000ff) << 24); \ 370 k[5] = ((w[1] & 0x0000ff00) >> 8) \ 371 | ((w[3] & 0x0000ff00) >> 0) \ 372 | ((w[5] & 0x0000ff00) << 8) \ 373 | ((w[7] & 0x0000ff00) << 16); \ 374 k[6] = ((w[1] & 0x00ff0000) >> 16) \ 375 | ((w[3] & 0x00ff0000) >> 8) \ 376 | ((w[5] & 0x00ff0000) << 0) \ 377 | ((w[7] & 0x00ff0000) << 8); \ 378 k[7] = ((w[1] & 0xff000000) >> 24) \ 379 | ((w[3] & 0xff000000) >> 16) \ 380 | ((w[5] & 0xff000000) >> 8) \ 381 | ((w[7] & 0xff000000) >> 0); 382 383#define A(x) \ 384{ \ 385 u32x l; \ 386 u32x r; \ 387 l = x[0] ^ x[2]; \ 388 r = x[1] ^ x[3]; \ 389 x[0] = x[2]; \ 390 x[1] = x[3]; \ 391 x[2] = x[4]; \ 392 x[3] = x[5]; \ 393 x[4] = x[6]; \ 394 x[5] = x[7]; \ 395 x[6] = l; \ 396 x[7] = r; \ 397} 398 399#define AA(x) \ 400{ \ 401 u32x l; \ 402 u32x r; \ 403 l = x[0]; \ 404 r = x[2]; \ 405 x[0] = x[4]; \ 406 x[2] = x[6]; \ 407 x[4] = l ^ r; \ 408 x[6] = x[0] ^ r; \ 409 l = x[1]; \ 410 r = x[3]; \ 411 x[1] = x[5]; \ 412 x[3] = x[7]; \ 413 x[5] = l ^ r; \ 414 x[7] = x[1] ^ r; \ 415} 416 417#define C(x) \ 418 x[0] ^= 0xff00ff00; \ 419 x[1] ^= 0xff00ff00; \ 420 x[2] ^= 0x00ff00ff; \ 421 x[3] ^= 0x00ff00ff; \ 422 x[4] ^= 0x00ffff00; \ 423 x[5] ^= 0xff0000ff; \ 424 x[6] ^= 0x000000ff; \ 425 x[7] ^= 0xff00ffff; 426 427#define SHIFT12(u,m,s) \ 428 u[0] = m[0] ^ s[6]; \ 429 u[1] = m[1] ^ s[7]; \ 430 u[2] = m[2] ^ (s[0] << 16) \ 431 ^ (s[0] >> 16) \ 432 ^ (s[0] & 0x0000ffff) \ 433 ^ (s[1] & 0x0000ffff) \ 434 ^ (s[1] >> 16) \ 435 ^ (s[2] << 16) \ 436 ^ s[6] \ 437 ^ (s[6] << 16) \ 438 ^ (s[7] & 0xffff0000) \ 439 ^ (s[7] >> 16); \ 440 u[3] = m[3] ^ (s[0] & 0x0000ffff) \ 441 ^ (s[0] << 16) \ 442 ^ (s[1] & 0x0000ffff) \ 443 ^ (s[1] << 16) \ 444 ^ (s[1] >> 16) \ 445 ^ (s[2] << 16) \ 446 ^ (s[2] >> 16) \ 447 ^ (s[3] << 16) \ 448 ^ s[6] \ 449 ^ (s[6] << 16) \ 450 ^ (s[6] >> 16) \ 451 ^ (s[7] & 0x0000ffff) \ 452 ^ (s[7] << 16) \ 453 ^ (s[7] >> 16); \ 454 u[4] = m[4] ^ (s[0] & 0xffff0000) \ 455 ^ (s[0] << 16) \ 456 ^ (s[0] >> 16) \ 457 ^ (s[1] & 0xffff0000) \ 458 ^ (s[1] >> 16) \ 459 ^ (s[2] << 16) \ 460 ^ (s[2] >> 16) \ 461 ^ (s[3] << 16) \ 462 ^ (s[3] >> 16) \ 463 ^ (s[4] << 16) \ 464 ^ (s[6] << 16) \ 465 ^ (s[6] >> 16) \ 466 ^ (s[7] & 0x0000ffff) \ 467 ^ (s[7] << 16) \ 468 ^ (s[7] >> 16); \ 469 u[5] = m[5] ^ (s[0] << 16) \ 470 ^ (s[0] >> 16) \ 471 ^ (s[0] & 0xffff0000) \ 472 ^ (s[1] & 0x0000ffff) \ 473 ^ s[2] \ 474 ^ (s[2] >> 16) \ 475 ^ (s[3] << 16) \ 476 ^ (s[3] >> 16) \ 477 ^ (s[4] << 16) \ 478 ^ (s[4] >> 16) \ 479 ^ (s[5] << 16) \ 480 ^ (s[6] << 16) \ 481 ^ (s[6] >> 16) \ 482 ^ (s[7] & 0xffff0000) \ 483 ^ (s[7] << 16) \ 484 ^ (s[7] >> 16); \ 485 u[6] = m[6] ^ s[0] \ 486 ^ (s[1] >> 16) \ 487 ^ (s[2] << 16) \ 488 ^ s[3] \ 489 ^ (s[3] >> 16) \ 490 ^ (s[4] << 16) \ 491 ^ (s[4] >> 16) \ 492 ^ (s[5] << 16) \ 493 ^ (s[5] >> 16) \ 494 ^ s[6] \ 495 ^ (s[6] << 16) \ 496 ^ (s[6] >> 16) \ 497 ^ (s[7] << 16); \ 498 u[7] = m[7] ^ (s[0] & 0xffff0000) \ 499 ^ (s[0] << 16) \ 500 ^ (s[1] & 0x0000ffff) \ 501 ^ (s[1] << 16) \ 502 ^ (s[2] >> 16) \ 503 ^ (s[3] << 16) \ 504 ^ s[4] \ 505 ^ (s[4] >> 16) \ 506 ^ (s[5] << 16) \ 507 ^ (s[5] >> 16) \ 508 ^ (s[6] >> 16) \ 509 ^ (s[7] & 0x0000ffff) \ 510 ^ (s[7] << 16) \ 511 ^ (s[7] >> 16); 512 513#define SHIFT16(h,v,u) \ 514 v[0] = h[0] ^ (u[1] << 16) \ 515 ^ (u[0] >> 16); \ 516 v[1] = h[1] ^ (u[2] << 16) \ 517 ^ (u[1] >> 16); \ 518 v[2] = h[2] ^ (u[3] << 16) \ 519 ^ (u[2] >> 16); \ 520 v[3] = h[3] ^ (u[4] << 16) \ 521 ^ (u[3] >> 16); \ 522 v[4] = h[4] ^ (u[5] << 16) \ 523 ^ (u[4] >> 16); \ 524 v[5] = h[5] ^ (u[6] << 16) \ 525 ^ (u[5] >> 16); \ 526 v[6] = h[6] ^ (u[7] << 16) \ 527 ^ (u[6] >> 16); \ 528 v[7] = h[7] ^ (u[0] & 0xffff0000) \ 529 ^ (u[0] << 16) \ 530 ^ (u[7] >> 16) \ 531 ^ (u[1] & 0xffff0000) \ 532 ^ (u[1] << 16) \ 533 ^ (u[6] << 16) \ 534 ^ (u[7] & 0xffff0000); 535 536#define SHIFT61(h,v) \ 537 h[0] = (v[0] & 0xffff0000) \ 538 ^ (v[0] << 16) \ 539 ^ (v[0] >> 16) \ 540 ^ (v[1] >> 16) \ 541 ^ (v[1] & 0xffff0000) \ 542 ^ (v[2] << 16) \ 543 ^ (v[3] >> 16) \ 544 ^ (v[4] << 16) \ 545 ^ (v[5] >> 16) \ 546 ^ v[5] \ 547 ^ (v[6] >> 16) \ 548 ^ (v[7] << 16) \ 549 ^ (v[7] >> 16) \ 550 ^ (v[7] & 0x0000ffff); \ 551 h[1] = (v[0] << 16) \ 552 ^ (v[0] >> 16) \ 553 ^ (v[0] & 0xffff0000) \ 554 ^ (v[1] & 0x0000ffff) \ 555 ^ v[2] \ 556 ^ (v[2] >> 16) \ 557 ^ (v[3] << 16) \ 558 ^ (v[4] >> 16) \ 559 ^ (v[5] << 16) \ 560 ^ (v[6] << 16) \ 561 ^ v[6] \ 562 ^ (v[7] & 0xffff0000) \ 563 ^ (v[7] >> 16); \ 564 h[2] = (v[0] & 0x0000ffff) \ 565 ^ (v[0] << 16) \ 566 ^ (v[1] << 16) \ 567 ^ (v[1] >> 16) \ 568 ^ (v[1] & 0xffff0000) \ 569 ^ (v[2] << 16) \ 570 ^ (v[3] >> 16) \ 571 ^ v[3] \ 572 ^ (v[4] << 16) \ 573 ^ (v[5] >> 16) \ 574 ^ v[6] \ 575 ^ (v[6] >> 16) \ 576 ^ (v[7] & 0x0000ffff) \ 577 ^ (v[7] << 16) \ 578 ^ (v[7] >> 16); \ 579 h[3] = (v[0] << 16) \ 580 ^ (v[0] >> 16) \ 581 ^ (v[0] & 0xffff0000) \ 582 ^ (v[1] & 0xffff0000) \ 583 ^ (v[1] >> 16) \ 584 ^ (v[2] << 16) \ 585 ^ (v[2] >> 16) \ 586 ^ v[2] \ 587 ^ (v[3] << 16) \ 588 ^ (v[4] >> 16) \ 589 ^ v[4] \ 590 ^ (v[5] << 16) \ 591 ^ (v[6] << 16) \ 592 ^ (v[7] & 0x0000ffff) \ 593 ^ (v[7] >> 16); \ 594 h[4] = (v[0] >> 16) \ 595 ^ (v[1] << 16) \ 596 ^ v[1] \ 597 ^ (v[2] >> 16) \ 598 ^ v[2] \ 599 ^ (v[3] << 16) \ 600 ^ (v[3] >> 16) \ 601 ^ v[3] \ 602 ^ (v[4] << 16) \ 603 ^ (v[5] >> 16) \ 604 ^ v[5] \ 605 ^ (v[6] << 16) \ 606 ^ (v[6] >> 16) \ 607 ^ (v[7] << 16); \ 608 h[5] = (v[0] << 16) \ 609 ^ (v[0] & 0xffff0000) \ 610 ^ (v[1] << 16) \ 611 ^ (v[1] >> 16) \ 612 ^ (v[1] & 0xffff0000) \ 613 ^ (v[2] << 16) \ 614 ^ v[2] \ 615 ^ (v[3] >> 16) \ 616 ^ v[3] \ 617 ^ (v[4] << 16) \ 618 ^ (v[4] >> 16) \ 619 ^ v[4] \ 620 ^ (v[5] << 16) \ 621 ^ (v[6] << 16) \ 622 ^ (v[6] >> 16) \ 623 ^ v[6] \ 624 ^ (v[7] << 16) \ 625 ^ (v[7] >> 16) \ 626 ^ (v[7] & 0xffff0000); \ 627 h[6] = v[0] \ 628 ^ v[2] \ 629 ^ (v[2] >> 16) \ 630 ^ v[3] \ 631 ^ (v[3] << 16) \ 632 ^ v[4] \ 633 ^ (v[4] >> 16) \ 634 ^ (v[5] << 16) \ 635 ^ (v[5] >> 16) \ 636 ^ v[5] \ 637 ^ (v[6] << 16) \ 638 ^ (v[6] >> 16) \ 639 ^ v[6] \ 640 ^ (v[7] << 16) \ 641 ^ v[7]; \ 642 h[7] = v[0] \ 643 ^ (v[0] >> 16) \ 644 ^ (v[1] << 16) \ 645 ^ (v[1] >> 16) \ 646 ^ (v[2] << 16) \ 647 ^ (v[3] >> 16) \ 648 ^ v[3] \ 649 ^ (v[4] << 16) \ 650 ^ v[4] \ 651 ^ (v[5] >> 16) \ 652 ^ v[5] \ 653 ^ (v[6] << 16) \ 654 ^ (v[6] >> 16) \ 655 ^ (v[7] << 16) \ 656 ^ v[7]; 657 658#define PASS0(h,s,u,v,t) \ 659{ \ 660 u32x k[8]; \ 661 u32x w[8]; \ 662 X (w, u, v); \ 663 P (k, w); \ 664 R (k, h, s, 0, t); \ 665 A (u); \ 666 AA (v); \ 667} 668 669#define PASS2(h,s,u,v,t) \ 670{ \ 671 u32x k[8]; \ 672 u32x w[8]; \ 673 X (w, u, v); \ 674 P (k, w); \ 675 R (k, h, s, 2, t); \ 676 A (u); \ 677 C (u); \ 678 AA (v); \ 679} 680 681#define PASS4(h,s,u,v,t) \ 682{ \ 683 u32x k[8]; \ 684 u32x w[8]; \ 685 X (w, u, v); \ 686 P (k, w); \ 687 R (k, h, s, 4, t); \ 688 A (u); \ 689 AA (v); \ 690} 691 692#define PASS6(h,s,u,v,t) \ 693{ \ 694 u32x k[8]; \ 695 u32x w[8]; \ 696 X (w, u, v); \ 697 P (k, w); \ 698 R (k, h, s, 6, t); \ 699} 700 701KERNEL_FQ void m06900_m04 (KERN_ATTR_RULES ()) 702{ 703 /** 704 * base 705 */ 706 707 const u64 gid = get_global_id (0); 708 const u64 lid = get_local_id (0); 709 const u64 lsz = get_local_size (0); 710 711 /** 712 * sbox 713 */ 714 715 LOCAL_VK u32 s_tables[4][256]; 716 717 for (u32 i = lid; i < 256; i += lsz) 718 { 719 s_tables[0][i] = c_tables[0][i]; 720 s_tables[1][i] = c_tables[1][i]; 721 s_tables[2][i] = c_tables[2][i]; 722 s_tables[3][i] = c_tables[3][i]; 723 } 724 725 SYNC_THREADS (); 726 727 if (gid >= gid_max) return; 728 729 /** 730 * base 731 */ 732 733 u32 pw_buf0[4]; 734 u32 pw_buf1[4]; 735 736 pw_buf0[0] = pws[gid].i[0]; 737 pw_buf0[1] = pws[gid].i[1]; 738 pw_buf0[2] = pws[gid].i[2]; 739 pw_buf0[3] = pws[gid].i[3]; 740 pw_buf1[0] = pws[gid].i[4]; 741 pw_buf1[1] = pws[gid].i[5]; 742 pw_buf1[2] = pws[gid].i[6]; 743 pw_buf1[3] = pws[gid].i[7]; 744 745 const u32 pw_len = pws[gid].pw_len & 63; 746 747 /** 748 * loop 749 */ 750 751 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) 752 { 753 u32x w0[4] = { 0 }; 754 u32x w1[4] = { 0 }; 755 u32x w2[4] = { 0 }; 756 u32x w3[4] = { 0 }; 757 758 const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); 759 760 u32x data[8]; 761 762 data[0] = w0[0]; 763 data[1] = w0[1]; 764 data[2] = w0[2]; 765 data[3] = w0[3]; 766 data[4] = w1[0]; 767 data[5] = w1[1]; 768 data[6] = w1[2]; 769 data[7] = w1[3]; 770 771 u32x state[16]; 772 773 state[ 0] = 0; 774 state[ 1] = 0; 775 state[ 2] = 0; 776 state[ 3] = 0; 777 state[ 4] = 0; 778 state[ 5] = 0; 779 state[ 6] = 0; 780 state[ 7] = 0; 781 state[ 8] = data[0]; 782 state[ 9] = data[1]; 783 state[10] = data[2]; 784 state[11] = data[3]; 785 state[12] = data[4]; 786 state[13] = data[5]; 787 state[14] = data[6]; 788 state[15] = data[7]; 789 790 u32x state_m[8]; 791 u32x data_m[8]; 792 793 /* gost1 */ 794 795 state_m[0] = state[0]; 796 state_m[1] = state[1]; 797 state_m[2] = state[2]; 798 state_m[3] = state[3]; 799 state_m[4] = state[4]; 800 state_m[5] = state[5]; 801 state_m[6] = state[6]; 802 state_m[7] = state[7]; 803 804 data_m[0] = data[0]; 805 data_m[1] = data[1]; 806 data_m[2] = data[2]; 807 data_m[3] = data[3]; 808 data_m[4] = data[4]; 809 data_m[5] = data[5]; 810 data_m[6] = data[6]; 811 data_m[7] = data[7]; 812 813 u32x tmp[8]; 814 815 //if (pw_len > 0) // not really SIMD compatible 816 { 817 PASS0 (state, tmp, state_m, data_m, s_tables); 818 PASS2 (state, tmp, state_m, data_m, s_tables); 819 PASS4 (state, tmp, state_m, data_m, s_tables); 820 PASS6 (state, tmp, state_m, data_m, s_tables); 821 822 SHIFT12 (state_m, data, tmp); 823 SHIFT16 (state, data_m, state_m); 824 SHIFT61 (state, data_m); 825 } 826 827 data[0] = out_len * 8; 828 data[1] = 0; 829 data[2] = 0; 830 data[3] = 0; 831 data[4] = 0; 832 data[5] = 0; 833 data[6] = 0; 834 data[7] = 0; 835 836 /* gost2 */ 837 838 state_m[0] = state[0]; 839 state_m[1] = state[1]; 840 state_m[2] = state[2]; 841 state_m[3] = state[3]; 842 state_m[4] = state[4]; 843 state_m[5] = state[5]; 844 state_m[6] = state[6]; 845 state_m[7] = state[7]; 846 847 data_m[0] = data[0]; 848 data_m[1] = data[1]; 849 data_m[2] = data[2]; 850 data_m[3] = data[3]; 851 data_m[4] = data[4]; 852 data_m[5] = data[5]; 853 data_m[6] = data[6]; 854 data_m[7] = data[7]; 855 856 PASS0 (state, tmp, state_m, data_m, s_tables); 857 PASS2 (state, tmp, state_m, data_m, s_tables); 858 PASS4 (state, tmp, state_m, data_m, s_tables); 859 PASS6 (state, tmp, state_m, data_m, s_tables); 860 861 SHIFT12 (state_m, data, tmp); 862 SHIFT16 (state, data_m, state_m); 863 SHIFT61 (state, data_m); 864 865 /* gost3 */ 866 867 data[0] = state[ 8]; 868 data[1] = state[ 9]; 869 data[2] = state[10]; 870 data[3] = state[11]; 871 data[4] = state[12]; 872 data[5] = state[13]; 873 data[6] = state[14]; 874 data[7] = state[15]; 875 876 state_m[0] = state[0]; 877 state_m[1] = state[1]; 878 state_m[2] = state[2]; 879 state_m[3] = state[3]; 880 state_m[4] = state[4]; 881 state_m[5] = state[5]; 882 state_m[6] = state[6]; 883 state_m[7] = state[7]; 884 885 data_m[0] = data[0]; 886 data_m[1] = data[1]; 887 data_m[2] = data[2]; 888 data_m[3] = data[3]; 889 data_m[4] = data[4]; 890 data_m[5] = data[5]; 891 data_m[6] = data[6]; 892 data_m[7] = data[7]; 893 894 PASS0 (state, tmp, state_m, data_m, s_tables); 895 PASS2 (state, tmp, state_m, data_m, s_tables); 896 PASS4 (state, tmp, state_m, data_m, s_tables); 897 PASS6 (state, tmp, state_m, data_m, s_tables); 898 899 SHIFT12 (state_m, data, tmp); 900 SHIFT16 (state, data_m, state_m); 901 SHIFT61 (state, data_m); 902 903 /* store */ 904 905 COMPARE_M_SIMD (state[0], state[1], state[2], state[3]); 906 } 907} 908 909KERNEL_FQ void m06900_m08 (KERN_ATTR_RULES ()) 910{ 911} 912 913KERNEL_FQ void m06900_m16 (KERN_ATTR_RULES ()) 914{ 915} 916 917KERNEL_FQ void m06900_s04 (KERN_ATTR_RULES ()) 918{ 919 /** 920 * base 921 */ 922 923 const u64 gid = get_global_id (0); 924 const u64 lid = get_local_id (0); 925 const u64 lsz = get_local_size (0); 926 927 /** 928 * sbox 929 */ 930 931 LOCAL_VK u32 s_tables[4][256]; 932 933 for (u32 i = lid; i < 256; i += lsz) 934 { 935 s_tables[0][i] = c_tables[0][i]; 936 s_tables[1][i] = c_tables[1][i]; 937 s_tables[2][i] = c_tables[2][i]; 938 s_tables[3][i] = c_tables[3][i]; 939 } 940 941 SYNC_THREADS (); 942 943 if (gid >= gid_max) return; 944 945 /** 946 * base 947 */ 948 949 u32 pw_buf0[4]; 950 u32 pw_buf1[4]; 951 952 pw_buf0[0] = pws[gid].i[0]; 953 pw_buf0[1] = pws[gid].i[1]; 954 pw_buf0[2] = pws[gid].i[2]; 955 pw_buf0[3] = pws[gid].i[3]; 956 pw_buf1[0] = pws[gid].i[4]; 957 pw_buf1[1] = pws[gid].i[5]; 958 pw_buf1[2] = pws[gid].i[6]; 959 pw_buf1[3] = pws[gid].i[7]; 960 961 const u32 pw_len = pws[gid].pw_len & 63; 962 963 /** 964 * digest 965 */ 966 967 const u32 search[4] = 968 { 969 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 970 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 971 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], 972 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] 973 }; 974 975 /** 976 * loop 977 */ 978 979 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) 980 { 981 u32x w0[4] = { 0 }; 982 u32x w1[4] = { 0 }; 983 u32x w2[4] = { 0 }; 984 u32x w3[4] = { 0 }; 985 986 const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1); 987 988 /** 989 * GOST 990 */ 991 992 u32x data[8]; 993 994 data[0] = w0[0]; 995 data[1] = w0[1]; 996 data[2] = w0[2]; 997 data[3] = w0[3]; 998 data[4] = w1[0]; 999 data[5] = w1[1]; 1000 data[6] = w1[2]; 1001 data[7] = w1[3]; 1002 1003 u32x state[16]; 1004 1005 state[ 0] = 0; 1006 state[ 1] = 0; 1007 state[ 2] = 0; 1008 state[ 3] = 0; 1009 state[ 4] = 0; 1010 state[ 5] = 0; 1011 state[ 6] = 0; 1012 state[ 7] = 0; 1013 state[ 8] = data[0]; 1014 state[ 9] = data[1]; 1015 state[10] = data[2]; 1016 state[11] = data[3]; 1017 state[12] = data[4]; 1018 state[13] = data[5]; 1019 state[14] = data[6]; 1020 state[15] = data[7]; 1021 1022 u32x state_m[8]; 1023 u32x data_m[8]; 1024 1025 /* gost1 */ 1026 1027 state_m[0] = state[0]; 1028 state_m[1] = state[1]; 1029 state_m[2] = state[2]; 1030 state_m[3] = state[3]; 1031 state_m[4] = state[4]; 1032 state_m[5] = state[5]; 1033 state_m[6] = state[6]; 1034 state_m[7] = state[7]; 1035 1036 data_m[0] = data[0]; 1037 data_m[1] = data[1]; 1038 data_m[2] = data[2]; 1039 data_m[3] = data[3]; 1040 data_m[4] = data[4]; 1041 data_m[5] = data[5]; 1042 data_m[6] = data[6]; 1043 data_m[7] = data[7]; 1044 1045 u32x tmp[8]; 1046 1047 //if (pw_len > 0) // not really SIMD compatible 1048 { 1049 PASS0 (state, tmp, state_m, data_m, s_tables); 1050 PASS2 (state, tmp, state_m, data_m, s_tables); 1051 PASS4 (state, tmp, state_m, data_m, s_tables); 1052 PASS6 (state, tmp, state_m, data_m, s_tables); 1053 1054 SHIFT12 (state_m, data, tmp); 1055 SHIFT16 (state, data_m, state_m); 1056 SHIFT61 (state, data_m); 1057 } 1058 1059 data[0] = out_len * 8; 1060 data[1] = 0; 1061 data[2] = 0; 1062 data[3] = 0; 1063 data[4] = 0; 1064 data[5] = 0; 1065 data[6] = 0; 1066 data[7] = 0; 1067 1068 /* gost2 */ 1069 1070 state_m[0] = state[0]; 1071 state_m[1] = state[1]; 1072 state_m[2] = state[2]; 1073 state_m[3] = state[3]; 1074 state_m[4] = state[4]; 1075 state_m[5] = state[5]; 1076 state_m[6] = state[6]; 1077 state_m[7] = state[7]; 1078 1079 data_m[0] = data[0]; 1080 data_m[1] = data[1]; 1081 data_m[2] = data[2]; 1082 data_m[3] = data[3]; 1083 data_m[4] = data[4]; 1084 data_m[5] = data[5]; 1085 data_m[6] = data[6]; 1086 data_m[7] = data[7]; 1087 1088 PASS0 (state, tmp, state_m, data_m, s_tables); 1089 PASS2 (state, tmp, state_m, data_m, s_tables); 1090 PASS4 (state, tmp, state_m, data_m, s_tables); 1091 PASS6 (state, tmp, state_m, data_m, s_tables); 1092 1093 SHIFT12 (state_m, data, tmp); 1094 SHIFT16 (state, data_m, state_m); 1095 SHIFT61 (state, data_m); 1096 1097 /* gost3 */ 1098 1099 data[0] = state[ 8]; 1100 data[1] = state[ 9]; 1101 data[2] = state[10]; 1102 data[3] = state[11]; 1103 data[4] = state[12]; 1104 data[5] = state[13]; 1105 data[6] = state[14]; 1106 data[7] = state[15]; 1107 1108 state_m[0] = state[0]; 1109 state_m[1] = state[1]; 1110 state_m[2] = state[2]; 1111 state_m[3] = state[3]; 1112 state_m[4] = state[4]; 1113 state_m[5] = state[5]; 1114 state_m[6] = state[6]; 1115 state_m[7] = state[7]; 1116 1117 data_m[0] = data[0]; 1118 data_m[1] = data[1]; 1119 data_m[2] = data[2]; 1120 data_m[3] = data[3]; 1121 data_m[4] = data[4]; 1122 data_m[5] = data[5]; 1123 data_m[6] = data[6]; 1124 data_m[7] = data[7]; 1125 1126 PASS0 (state, tmp, state_m, data_m, s_tables); 1127 PASS2 (state, tmp, state_m, data_m, s_tables); 1128 PASS4 (state, tmp, state_m, data_m, s_tables); 1129 PASS6 (state, tmp, state_m, data_m, s_tables); 1130 1131 SHIFT12 (state_m, data, tmp); 1132 SHIFT16 (state, data_m, state_m); 1133 SHIFT61 (state, data_m); 1134 1135 /* store */ 1136 1137 COMPARE_S_SIMD (state[0], state[1], state[2], state[3]); 1138 } 1139} 1140 1141KERNEL_FQ void m06900_s08 (KERN_ATTR_RULES ()) 1142{ 1143} 1144 1145KERNEL_FQ void m06900_s16 (KERN_ATTR_RULES ()) 1146{ 1147} 1148