1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below 5 * : sboxes for others were takes fron JtR, license below 6 */ 7 8#ifdef KERNEL_STATIC 9#include "inc_vendor.h" 10#include "inc_types.h" 11#include "inc_platform.cl" 12#include "inc_common.cl" 13#endif 14 15#define COMPARE_S "inc_comp_single_bs.cl" 16#define COMPARE_M "inc_comp_multi_bs.cl" 17 18#ifdef IS_NV 19#define KXX_DECL 20#endif 21 22#if (defined IS_AMD || defined IS_HIP) 23#define KXX_DECL 24#endif 25 26#ifdef IS_GENERIC 27#define KXX_DECL 28#endif 29 30#ifdef IS_NV 31 32#if CUDA_ARCH >= 500 33 34// 35// Bitslice DES S-boxes with LOP3.LUT instructions 36// For NVIDIA Maxwell architecture and CUDA 7.5 RC 37// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19 38// 39// Gate counts: 25 24 25 18 25 24 24 23 40// Average: 23.5 41// Depth: 8 7 7 6 8 10 10 8 42// Average: 8 43// 44// Note that same S-box function with a lower gate count isn't necessarily faster. 45// 46// These Boolean expressions corresponding to DES S-boxes were 47// discovered by <deeplearningjohndoe at gmail.com> 48// 49// This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com> 50// Redistribution and use in source and binary forms, with or without 51// modification, are permitted. 52// 53// The underlying mathematical formulas are NOT copyrighted. 54// 55 56#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d)); 57 58DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 59{ 60 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) 61 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) 62 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) 63 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) 64 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) 65 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) 66 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) 67 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) 68 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) 69 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) 70 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) 71 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) 72 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) 73 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) 74 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) 75 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) 76 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) 77 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) 78 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) 79 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) 80 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) 81 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) 82 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) 83 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) 84 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) 85 86 *out1 ^= x1; 87 *out2 ^= x2; 88 *out3 ^= x3; 89 *out4 ^= x4; 90} 91 92DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 93{ 94 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) 95 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) 96 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) 97 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) 98 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) 99 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) 100 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) 101 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) 102 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) 103 LUT(x3333CCCC00000000, a2, a5, a6, 0x14) 104 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) 105 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) 106 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) 107 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) 108 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) 109 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) 110 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) 111 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) 112 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) 113 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) 114 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) 115 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) 116 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) 117 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) 118 119 *out1 ^= x1; 120 *out2 ^= x2; 121 *out3 ^= x3; 122 *out4 ^= x4; 123} 124 125DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 126{ 127 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) 128 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) 129 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) 130 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) 131 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) 132 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) 133 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) 134 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) 135 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) 136 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) 137 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) 138 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) 139 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) 140 LUT(x500F500F500F500F, a1, a3, a4, 0x98) 141 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) 142 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) 143 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) 144 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) 145 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) 146 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) 147 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) 148 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) 149 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) 150 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) 151 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) 152 153 *out1 ^= x1; 154 *out2 ^= x2; 155 *out3 ^= x3; 156 *out4 ^= x4; 157} 158 159DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 160{ 161 LUT(x55F055F055F055F0, a1, a3, a4, 0x72) 162 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) 163 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) 164 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) 165 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) 166 LUT(x9999666699996666, a1, a2, a5, 0x69) 167 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) 168 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) 169 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) 170 LUT(x4848484848484848, a1, a2, a3, 0x12) 171 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) 172 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) 173 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) 174 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) 175 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) 176 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) 177 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) 178 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) 179 180 *out1 ^= x1; 181 *out2 ^= x2; 182 *out3 ^= x3; 183 *out4 ^= x4; 184} 185 186DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 187{ 188 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) 189 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) 190 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) 191 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) 192 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) 193 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) 194 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) 195 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) 196 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) 197 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) 198 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) 199 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) 200 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) 201 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) 202 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) 203 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) 204 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) 205 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) 206 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) 207 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) 208 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) 209 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) 210 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) 211 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) 212 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) 213 214 *out1 ^= x1; 215 *out2 ^= x2; 216 *out3 ^= x3; 217 *out4 ^= x4; 218} 219 220DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 221{ 222 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) 223 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) 224 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) 225 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) 226 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) 227 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) 228 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) 229 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) 230 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) 231 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) 232 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) 233 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) 234 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) 235 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) 236 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) 237 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) 238 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) 239 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) 240 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) 241 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) 242 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) 243 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) 244 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) 245 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) 246 247 *out1 ^= x1; 248 *out2 ^= x2; 249 *out3 ^= x3; 250 *out4 ^= x4; 251} 252 253DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 254{ 255 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) 256 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) 257 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) 258 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) 259 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) 260 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) 261 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) 262 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) 263 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) 264 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) 265 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) 266 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) 267 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) 268 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) 269 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) 270 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) 271 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) 272 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) 273 LUT(xA050A050A050A050, a1, a3, a4, 0x21) 274 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) 275 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) 276 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) 277 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) 278 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) 279 280 *out1 ^= x1; 281 *out2 ^= x2; 282 *out3 ^= x3; 283 *out4 ^= x4; 284} 285 286DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 287{ 288 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) 289 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) 290 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) 291 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) 292 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) 293 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) 294 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) 295 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) 296 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) 297 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) 298 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) 299 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) 300 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) 301 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) 302 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) 303 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) 304 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) 305 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) 306 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) 307 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) 308 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) 309 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) 310 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) 311 312 *out1 ^= x1; 313 *out2 ^= x2; 314 *out3 ^= x3; 315 *out4 ^= x4; 316} 317 318#else 319 320/* 321 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC 322 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. 323 * 324 * Gate counts: 49 44 46 33 48 46 46 41 325 * Average: 44.125 326 * 327 * Several same-gate-count expressions for each S-box are included (for use on 328 * different CPUs/GPUs). 329 * 330 * These Boolean expressions corresponding to DES S-boxes have been generated 331 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's 332 * John the Ripper password cracker: http://www.openwall.com/john/ 333 * Being mathematical formulas, they are not copyrighted and are free for reuse 334 * by anyone. 335 * 336 * This file (a specific representation of the S-box expressions, surrounding 337 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>. 338 * Redistribution and use in source and binary forms, with or without 339 * modification, are permitted. (This is a heavily cut-down "BSD license".) 340 * 341 * The effort has been sponsored by Rapid7: http://www.rapid7.com 342 */ 343 344DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 345{ 346 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, 347 x25202160; 348 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; 349 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; 350 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; 351 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; 352 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; 353 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; 354 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; 355 u32 x00, x01, x10, x11, x20, x21, x30, x31; 356 357 x55005500 = a1 & ~a5; 358 x5A0F5A0F = a4 ^ x55005500; 359 x3333FFFF = a3 | a6; 360 x66666666 = a1 ^ a3; 361 x22226666 = x3333FFFF & x66666666; 362 x2D2D6969 = a4 ^ x22226666; 363 x25202160 = x2D2D6969 & ~x5A0F5A0F; 364 365 x00FFFF00 = a5 ^ a6; 366 x33CCCC33 = a3 ^ x00FFFF00; 367 x4803120C = x5A0F5A0F & ~x33CCCC33; 368 x2222FFFF = a6 | x22226666; 369 x6A21EDF3 = x4803120C ^ x2222FFFF; 370 x4A01CC93 = x6A21EDF3 & ~x25202160; 371 372 x5555FFFF = a1 | a6; 373 x7F75FFFF = x6A21EDF3 | x5555FFFF; 374 x00D20096 = a5 & ~x2D2D6969; 375 x7FA7FF69 = x7F75FFFF ^ x00D20096; 376 377 x0A0A0000 = a4 & ~x5555FFFF; 378 x0AD80096 = x00D20096 ^ x0A0A0000; 379 x00999900 = x00FFFF00 & ~x66666666; 380 x0AD99996 = x0AD80096 | x00999900; 381 382 x22332233 = a3 & ~x55005500; 383 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; 384 x054885C0 = x257AA5F0 & ~x22332233; 385 xFAB77A3F = ~x054885C0; 386 x2221EDF3 = x3333FFFF & x6A21EDF3; 387 xD89697CC = xFAB77A3F ^ x2221EDF3; 388 x20 = x7FA7FF69 & ~a2; 389 x21 = x20 ^ xD89697CC; 390 *out3 ^= x21; 391 392 x05B77AC0 = x00FFFF00 ^ x054885C0; 393 x05F77AD6 = x00D20096 | x05B77AC0; 394 x36C48529 = x3333FFFF ^ x05F77AD6; 395 x6391D07C = a1 ^ x36C48529; 396 xBB0747B0 = xD89697CC ^ x6391D07C; 397 x00 = x25202160 | a2; 398 x01 = x00 ^ xBB0747B0; 399 *out1 ^= x01; 400 401 x4C460000 = x3333FFFF ^ x7F75FFFF; 402 x4EDF9996 = x0AD99996 | x4C460000; 403 x2D4E49EA = x6391D07C ^ x4EDF9996; 404 xBBFFFFB0 = x00FFFF00 | xBB0747B0; 405 x96B1B65A = x2D4E49EA ^ xBBFFFFB0; 406 x10 = x4A01CC93 | a2; 407 x11 = x10 ^ x96B1B65A; 408 *out2 ^= x11; 409 410 x5AFF5AFF = a5 | x5A0F5A0F; 411 x52B11215 = x5AFF5AFF & ~x2D4E49EA; 412 x4201C010 = x4A01CC93 & x6391D07C; 413 x10B0D205 = x52B11215 ^ x4201C010; 414 x30 = x10B0D205 | a2; 415 x31 = x30 ^ x0AD99996; 416 *out4 ^= x31; 417} 418 419DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 420{ 421 u32 x33CC33CC; 422 u32 x55550000, x00AA00FF, x33BB33FF; 423 u32 x33CC0000, x11441144, x11BB11BB, x003311BB; 424 u32 x00000F0F, x336600FF, x332200FF, x332200F0; 425 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; 426 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; 427 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; 428 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; 429 u32 x0A451047, xBBDFDD7B, xB19ACD3C; 430 u32 x00, x01, x10, x11, x20, x21, x30, x31; 431 432 x33CC33CC = a2 ^ a5; 433 434 x55550000 = a1 & ~a6; 435 x00AA00FF = a5 & ~x55550000; 436 x33BB33FF = a2 | x00AA00FF; 437 438 x33CC0000 = x33CC33CC & ~a6; 439 x11441144 = a1 & x33CC33CC; 440 x11BB11BB = a5 ^ x11441144; 441 x003311BB = x11BB11BB & ~x33CC0000; 442 443 x00000F0F = a3 & a6; 444 x336600FF = x00AA00FF ^ x33CC0000; 445 x332200FF = x33BB33FF & x336600FF; 446 x332200F0 = x332200FF & ~x00000F0F; 447 448 x0302000F = a3 & x332200FF; 449 xAAAAAAAA = ~a1; 450 xA9A8AAA5 = x0302000F ^ xAAAAAAAA; 451 x33CCCC33 = a6 ^ x33CC33CC; 452 x33CCC030 = x33CCCC33 & ~x00000F0F; 453 x9A646A95 = xA9A8AAA5 ^ x33CCC030; 454 x10 = a4 & ~x332200F0; 455 x11 = x10 ^ x9A646A95; 456 *out2 ^= x11; 457 458 x00333303 = a2 & ~x33CCC030; 459 x118822B8 = x11BB11BB ^ x00333303; 460 xA8208805 = xA9A8AAA5 & ~x118822B8; 461 x3CC3C33C = a3 ^ x33CCCC33; 462 x94E34B39 = xA8208805 ^ x3CC3C33C; 463 x00 = x33BB33FF & ~a4; 464 x01 = x00 ^ x94E34B39; 465 *out1 ^= x01; 466 467 x0331330C = x0302000F ^ x00333303; 468 x3FF3F33C = x3CC3C33C | x0331330C; 469 xA9DF596A = x33BB33FF ^ x9A646A95; 470 xA9DF5F6F = x00000F0F | xA9DF596A; 471 x962CAC53 = x3FF3F33C ^ xA9DF5F6F; 472 473 xA9466A6A = x332200FF ^ x9A646A95; 474 x3DA52153 = x94E34B39 ^ xA9466A6A; 475 x29850143 = xA9DF5F6F & x3DA52153; 476 x33C0330C = x33CC33CC & x3FF3F33C; 477 x1A45324F = x29850143 ^ x33C0330C; 478 x20 = x1A45324F | a4; 479 x21 = x20 ^ x962CAC53; 480 *out3 ^= x21; 481 482 x0A451047 = x1A45324F & ~x118822B8; 483 xBBDFDD7B = x33CCCC33 | xA9DF596A; 484 xB19ACD3C = x0A451047 ^ xBBDFDD7B; 485 x30 = x003311BB | a4; 486 x31 = x30 ^ xB19ACD3C; 487 *out4 ^= x31; 488} 489 490DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 491{ 492 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; 493 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; 494 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; 495 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; 496 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; 497 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; 498 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; 499 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; 500 u32 x00, x01, x10, x11, x20, x21, x30, x31; 501 502 x44444444 = a1 & ~a2; 503 x0F0FF0F0 = a3 ^ a6; 504 x4F4FF4F4 = x44444444 | x0F0FF0F0; 505 x00FFFF00 = a4 ^ a6; 506 x00AAAA00 = x00FFFF00 & ~a1; 507 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; 508 509 x3C3CC3C3 = a2 ^ x0F0FF0F0; 510 x3C3C0000 = x3C3CC3C3 & ~a6; 511 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; 512 x0C840A00 = x4FE55EF4 & ~x7373F4F4; 513 514 x00005EF4 = a6 & x4FE55EF4; 515 x00FF5EFF = a4 | x00005EF4; 516 x00555455 = a1 & x00FF5EFF; 517 x3C699796 = x3C3CC3C3 ^ x00555455; 518 x30 = x4FE55EF4 & ~a5; 519 x31 = x30 ^ x3C699796; 520 *out4 ^= x31; 521 522 x000FF000 = x0F0FF0F0 & x00FFFF00; 523 x55AA55AA = a1 ^ a4; 524 x26D9A15E = x7373F4F4 ^ x55AA55AA; 525 x2FDFAF5F = a3 | x26D9A15E; 526 x2FD00F5F = x2FDFAF5F & ~x000FF000; 527 528 x55AAFFAA = x00AAAA00 | x55AA55AA; 529 x28410014 = x3C699796 & ~x55AAFFAA; 530 x000000FF = a4 & a6; 531 x000000CC = x000000FF & ~a2; 532 x284100D8 = x28410014 ^ x000000CC; 533 534 x204100D0 = x7373F4F4 & x284100D8; 535 x3C3CC3FF = x3C3CC3C3 | x000000FF; 536 x1C3CC32F = x3C3CC3FF & ~x204100D0; 537 x4969967A = a1 ^ x1C3CC32F; 538 x10 = x2FD00F5F & a5; 539 x11 = x10 ^ x4969967A; 540 *out2 ^= x11; 541 542 x4CC44CC4 = x4FE55EF4 & ~a2; 543 x40C040C0 = x4CC44CC4 & ~a3; 544 xC3C33C3C = ~x3C3CC3C3; 545 x9669C396 = x55AAFFAA ^ xC3C33C3C; 546 xD6A98356 = x40C040C0 ^ x9669C396; 547 x00 = a5 & ~x0C840A00; 548 x01 = x00 ^ xD6A98356; 549 *out1 ^= x01; 550 551 xD6E9C3D6 = x40C040C0 | x9669C396; 552 x4CEEEEC4 = x00AAAA00 | x4CC44CC4; 553 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; 554 x001A000B = a4 & ~x4FE55EF4; 555 x9A1F2D1B = x9A072D12 | x001A000B; 556 x20 = a5 & ~x284100D8; 557 x21 = x20 ^ x9A1F2D1B; 558 *out3 ^= x21; 559} 560 561DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 562{ 563 u32 x5A5A5A5A, x0F0FF0F0; 564 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, 565 x52FBCA0F, x61C8F93C; 566 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; 567 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; 568 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; 569 u32 x00, x01, x10, x11, x20, x21, x30, x31; 570 571 x5A5A5A5A = a1 ^ a3; 572 x0F0FF0F0 = a3 ^ a5; 573 x33FF33FF = a2 | a4; 574 x33FFCC00 = a5 ^ x33FF33FF; 575 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; 576 x0C0CC0C0 = x0F0FF0F0 & ~a2; 577 x0CF3C03F = a4 ^ x0C0CC0C0; 578 x5EFBDA7F = x5A5A5A5A | x0CF3C03F; 579 x52FBCA0F = x5EFBDA7F & ~x0C0030F0; 580 x61C8F93C = a2 ^ x52FBCA0F; 581 582 x00C0C03C = x0CF3C03F & x61C8F93C; 583 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; 584 x3B92A366 = x5A5A5A5A ^ x61C8F93C; 585 x30908326 = x3B92A366 & ~x0F0F30C0; 586 x3C90B3D6 = x0C0030F0 ^ x30908326; 587 588 x33CC33CC = a2 ^ a4; 589 x0C0CFFFF = a5 | x0C0CC0C0; 590 x379E5C99 = x3B92A366 ^ x0C0CFFFF; 591 x04124C11 = x379E5C99 & ~x33CC33CC; 592 x56E9861E = x52FBCA0F ^ x04124C11; 593 x00 = a6 & ~x3C90B3D6; 594 x01 = x00 ^ x56E9861E; 595 *out1 ^= x01; 596 597 xA91679E1 = ~x56E9861E; 598 x10 = x3C90B3D6 & ~a6; 599 x11 = x10 ^ xA91679E1; 600 *out2 ^= x11; 601 602 x9586CA37 = x3C90B3D6 ^ xA91679E1; 603 x8402C833 = x9586CA37 & ~x33CC33CC; 604 x84C2C83F = x00C0C03C | x8402C833; 605 xB35C94A6 = x379E5C99 ^ x84C2C83F; 606 x20 = x61C8F93C | a6; 607 x21 = x20 ^ xB35C94A6; 608 *out3 ^= x21; 609 610 x30 = a6 & x61C8F93C; 611 x31 = x30 ^ xB35C94A6; 612 *out4 ^= x31; 613} 614 615DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 616{ 617 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; 618 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; 619 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; 620 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; 621 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; 622 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; 623 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; 624 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; 625 u32 x00, x01, x10, x11, x20, x21, x30, x31; 626 627 x77777777 = a1 | a3; 628 x77770000 = x77777777 & ~a6; 629 x22225555 = a1 ^ x77770000; 630 x11116666 = a3 ^ x22225555; 631 x1F1F6F6F = a4 | x11116666; 632 633 x70700000 = x77770000 & ~a4; 634 x43433333 = a3 ^ x70700000; 635 x00430033 = a5 & x43433333; 636 x55557777 = a1 | x11116666; 637 x55167744 = x00430033 ^ x55557777; 638 x5A19784B = a4 ^ x55167744; 639 640 x5A1987B4 = a6 ^ x5A19784B; 641 x7A3BD7F5 = x22225555 | x5A1987B4; 642 x003B00F5 = a5 & x7A3BD7F5; 643 x221955A0 = x22225555 ^ x003B00F5; 644 x05050707 = a4 & x55557777; 645 x271C52A7 = x221955A0 ^ x05050707; 646 647 x2A2A82A0 = x7A3BD7F5 & ~a1; 648 x6969B193 = x43433333 ^ x2A2A82A0; 649 x1FE06F90 = a5 ^ x1F1F6F6F; 650 x16804E00 = x1FE06F90 & ~x6969B193; 651 xE97FB1FF = ~x16804E00; 652 x20 = xE97FB1FF & ~a2; 653 x21 = x20 ^ x5A19784B; 654 *out3 ^= x21; 655 656 x43403302 = x43433333 & ~x003B00F5; 657 x35CAED30 = x2A2A82A0 ^ x1FE06F90; 658 x37DEFFB7 = x271C52A7 | x35CAED30; 659 x349ECCB5 = x37DEFFB7 & ~x43403302; 660 x0B01234A = x1F1F6F6F & ~x349ECCB5; 661 662 x101884B4 = x5A1987B4 & x349ECCB5; 663 x0FF8EB24 = x1FE06F90 ^ x101884B4; 664 x41413333 = x43433333 & x55557777; 665 x4FF9FB37 = x0FF8EB24 | x41413333; 666 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; 667 x30 = x4FC2FBC2 & a2; 668 x31 = x30 ^ x271C52A7; 669 *out4 ^= x31; 670 671 x22222222 = a1 ^ x77777777; 672 x16BCEE97 = x349ECCB5 ^ x22222222; 673 x0F080B04 = a4 & x0FF8EB24; 674 x19B4E593 = x16BCEE97 ^ x0F080B04; 675 x00 = x0B01234A | a2; 676 x01 = x00 ^ x19B4E593; 677 *out1 ^= x01; 678 679 x5C5C5C5C = x1F1F6F6F ^ x43433333; 680 x4448184C = x5C5C5C5C & ~x19B4E593; 681 x2DDABE71 = x22225555 ^ x0FF8EB24; 682 x6992A63D = x4448184C ^ x2DDABE71; 683 x10 = x1F1F6F6F & a2; 684 x11 = x10 ^ x6992A63D; 685 *out2 ^= x11; 686} 687 688DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 689{ 690 u32 x33CC33CC; 691 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; 692 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; 693 u32 x09030C06, x09030000, x336622FF, x3A6522FF; 694 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; 695 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; 696 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; 697 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; 698 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; 699 u32 x00, x01, x10, x11, x20, x21, x30, x31; 700 701 x33CC33CC = a2 ^ a5; 702 703 x3333FFFF = a2 | a6; 704 x11115555 = a1 & x3333FFFF; 705 x22DD6699 = x33CC33CC ^ x11115555; 706 x22DD9966 = a6 ^ x22DD6699; 707 x00220099 = a5 & ~x22DD9966; 708 709 x00551144 = a1 & x22DD9966; 710 x33662277 = a2 ^ x00551144; 711 x5A5A5A5A = a1 ^ a3; 712 x7B7E7A7F = x33662277 | x5A5A5A5A; 713 x59A31CE6 = x22DD6699 ^ x7B7E7A7F; 714 715 x09030C06 = a3 & x59A31CE6; 716 x09030000 = x09030C06 & ~a6; 717 x336622FF = x00220099 | x33662277; 718 x3A6522FF = x09030000 ^ x336622FF; 719 x30 = x3A6522FF & a4; 720 x31 = x30 ^ x59A31CE6; 721 *out4 ^= x31; 722 723 x484D494C = a2 ^ x7B7E7A7F; 724 x0000B6B3 = a6 & ~x484D494C; 725 x0F0FB9BC = a3 ^ x0000B6B3; 726 x00FC00F9 = a5 & ~x09030C06; 727 x0FFFB9FD = x0F0FB9BC | x00FC00F9; 728 729 x5DF75DF7 = a1 | x59A31CE6; 730 x116600F7 = x336622FF & x5DF75DF7; 731 x1E69B94B = x0F0FB9BC ^ x116600F7; 732 x1668B94B = x1E69B94B & ~x09030000; 733 x20 = x00220099 | a4; 734 x21 = x20 ^ x1668B94B; 735 *out3 ^= x21; 736 737 x7B7B7B7B = a2 | x5A5A5A5A; 738 x411E5984 = x3A6522FF ^ x7B7B7B7B; 739 x1FFFFDFD = x11115555 | x0FFFB9FD; 740 x5EE1A479 = x411E5984 ^ x1FFFFDFD; 741 742 x3CB4DFD2 = x22DD6699 ^ x1E69B94B; 743 x004B002D = a5 & ~x3CB4DFD2; 744 xB7B2B6B3 = ~x484D494C; 745 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; 746 xCC82CDE5 = x004B002D ^ xCCC9CDC8; 747 x10 = xCC82CDE5 & ~a4; 748 x11 = x10 ^ x5EE1A479; 749 *out2 ^= x11; 750 751 x0055EEBB = a6 ^ x00551144; 752 x5A5AECE9 = a1 ^ x0F0FB9BC; 753 x0050ECA9 = x0055EEBB & x5A5AECE9; 754 xC5CAC1CE = x09030C06 ^ xCCC9CDC8; 755 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; 756 x00 = x0FFFB9FD & ~a4; 757 x01 = x00 ^ xC59A2D67; 758 *out1 ^= x01; 759} 760 761DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 762{ 763 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; 764 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; 765 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; 766 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; 767 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; 768 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; 769 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; 770 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; 771 u32 x00, x01, x10, x11, x20, x21, x30, x31; 772 773 x0FF00FF0 = a4 ^ a5; 774 x3CC33CC3 = a3 ^ x0FF00FF0; 775 x00003CC3 = a6 & x3CC33CC3; 776 x0F000F00 = a4 & x0FF00FF0; 777 x5A555A55 = a2 ^ x0F000F00; 778 x00001841 = x00003CC3 & x5A555A55; 779 780 x00000F00 = a6 & x0F000F00; 781 x33333C33 = a3 ^ x00000F00; 782 x7B777E77 = x5A555A55 | x33333C33; 783 x0FF0F00F = a6 ^ x0FF00FF0; 784 x74878E78 = x7B777E77 ^ x0FF0F00F; 785 x30 = a1 & ~x00001841; 786 x31 = x30 ^ x74878E78; 787 *out4 ^= x31; 788 789 x003C003C = a5 & ~x3CC33CC3; 790 x5A7D5A7D = x5A555A55 | x003C003C; 791 x333300F0 = x00003CC3 ^ x33333C33; 792 x694E5A8D = x5A7D5A7D ^ x333300F0; 793 794 x0FF0CCCC = x00003CC3 ^ x0FF0F00F; 795 x000F0303 = a4 & ~x0FF0CCCC; 796 x5A505854 = x5A555A55 & ~x000F0303; 797 x33CC000F = a5 ^ x333300F0; 798 x699C585B = x5A505854 ^ x33CC000F; 799 800 x7F878F78 = x0F000F00 | x74878E78; 801 x21101013 = a3 & x699C585B; 802 x7F979F7B = x7F878F78 | x21101013; 803 x30030CC0 = x3CC33CC3 & ~x0FF0F00F; 804 x4F9493BB = x7F979F7B ^ x30030CC0; 805 x00 = x4F9493BB & ~a1; 806 x01 = x00 ^ x694E5A8D; 807 *out1 ^= x01; 808 809 x6F9CDBFB = x699C585B | x4F9493BB; 810 x0000DBFB = a6 & x6F9CDBFB; 811 x00005151 = a2 & x0000DBFB; 812 x26DAC936 = x694E5A8D ^ x4F9493BB; 813 x26DA9867 = x00005151 ^ x26DAC936; 814 815 x27DA9877 = x21101013 | x26DA9867; 816 x27DA438C = x0000DBFB ^ x27DA9877; 817 x2625C9C9 = a5 ^ x26DAC936; 818 x27FFCBCD = x27DA438C | x2625C9C9; 819 x20 = x27FFCBCD & a1; 820 x21 = x20 ^ x699C585B; 821 *out3 ^= x21; 822 823 x27FF1036 = x0000DBFB ^ x27FFCBCD; 824 x27FF103E = x003C003C | x27FF1036; 825 xB06B6C44 = ~x4F9493BB; 826 x97947C7A = x27FF103E ^ xB06B6C44; 827 x10 = x97947C7A & ~a1; 828 x11 = x10 ^ x26DA9867; 829 *out2 ^= x11; 830} 831 832DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 833{ 834 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; 835 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; 836 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; 837 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; 838 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; 839 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; 840 u32 xF700A600, x61008000, x03B7856B, x62B7056B; 841 u32 x00, x01, x10, x11, x20, x21, x30, x31; 842 843 x0C0C0C0C = a3 & ~a2; 844 x0000F0F0 = a5 & ~a3; 845 x00FFF00F = a4 ^ x0000F0F0; 846 x00555005 = a1 & x00FFF00F; 847 x00515001 = x00555005 & ~x0C0C0C0C; 848 849 x33000330 = a2 & ~x00FFF00F; 850 x77555775 = a1 | x33000330; 851 x30303030 = a2 & ~a3; 852 x3030CFCF = a5 ^ x30303030; 853 x30104745 = x77555775 & x3030CFCF; 854 x30555745 = x00555005 | x30104745; 855 856 xFF000FF0 = ~x00FFF00F; 857 xCF1048B5 = x30104745 ^ xFF000FF0; 858 x080A080A = a3 & ~x77555775; 859 xC71A40BF = xCF1048B5 ^ x080A080A; 860 xCB164CB3 = x0C0C0C0C ^ xC71A40BF; 861 x10 = x00515001 | a6; 862 x11 = x10 ^ xCB164CB3; 863 *out2 ^= x11; 864 865 x9E4319E6 = a1 ^ xCB164CB3; 866 x000019E6 = a5 & x9E4319E6; 867 xF429738C = a2 ^ xC71A40BF; 868 xF4296A6A = x000019E6 ^ xF429738C; 869 xC729695A = x33000330 ^ xF4296A6A; 870 871 xC47C3D2F = x30555745 ^ xF4296A6A; 872 xF77F3F3F = a2 | xC47C3D2F; 873 x9E43E619 = a5 ^ x9E4319E6; 874 x693CD926 = xF77F3F3F ^ x9E43E619; 875 x20 = x30555745 & a6; 876 x21 = x20 ^ x693CD926; 877 *out3 ^= x21; 878 879 xF719A695 = x3030CFCF ^ xC729695A; 880 xF4FF73FF = a4 | xF429738C; 881 x03E6D56A = xF719A695 ^ xF4FF73FF; 882 x56B3803F = a1 ^ x03E6D56A; 883 x30 = x56B3803F & a6; 884 x31 = x30 ^ xC729695A; 885 *out4 ^= x31; 886 887 xF700A600 = xF719A695 & ~a4; 888 x61008000 = x693CD926 & xF700A600; 889 x03B7856B = x00515001 ^ x03E6D56A; 890 x62B7056B = x61008000 ^ x03B7856B; 891 x00 = x62B7056B | a6; 892 x01 = x00 ^ xC729695A; 893 *out1 ^= x01; 894} 895 896#endif 897#endif 898 899#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC 900 901/* 902 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC 903 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. 904 * 905 * Gate counts: 49 44 46 33 48 46 46 41 906 * Average: 44.125 907 * 908 * Several same-gate-count expressions for each S-box are included (for use on 909 * different CPUs/GPUs). 910 * 911 * These Boolean expressions corresponding to DES S-boxes have been generated 912 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's 913 * John the Ripper password cracker: http://www.openwall.com/john/ 914 * Being mathematical formulas, they are not copyrighted and are free for reuse 915 * by anyone. 916 * 917 * This file (a specific representation of the S-box expressions, surrounding 918 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>. 919 * Redistribution and use in source and binary forms, with or without 920 * modification, are permitted. (This is a heavily cut-down "BSD license".) 921 * 922 * The effort has been sponsored by Rapid7: http://www.rapid7.com 923 */ 924 925DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 926{ 927 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, 928 x25202160; 929 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; 930 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; 931 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; 932 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; 933 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; 934 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; 935 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; 936 u32 x00, x01, x10, x11, x20, x21, x30, x31; 937 938 x55005500 = a1 & ~a5; 939 x5A0F5A0F = a4 ^ x55005500; 940 x3333FFFF = a3 | a6; 941 x66666666 = a1 ^ a3; 942 x22226666 = x3333FFFF & x66666666; 943 x2D2D6969 = a4 ^ x22226666; 944 x25202160 = x2D2D6969 & ~x5A0F5A0F; 945 946 x00FFFF00 = a5 ^ a6; 947 x33CCCC33 = a3 ^ x00FFFF00; 948 x4803120C = x5A0F5A0F & ~x33CCCC33; 949 x2222FFFF = a6 | x22226666; 950 x6A21EDF3 = x4803120C ^ x2222FFFF; 951 x4A01CC93 = x6A21EDF3 & ~x25202160; 952 953 x5555FFFF = a1 | a6; 954 x7F75FFFF = x6A21EDF3 | x5555FFFF; 955 x00D20096 = a5 & ~x2D2D6969; 956 x7FA7FF69 = x7F75FFFF ^ x00D20096; 957 958 x0A0A0000 = a4 & ~x5555FFFF; 959 x0AD80096 = x00D20096 ^ x0A0A0000; 960 x00999900 = x00FFFF00 & ~x66666666; 961 x0AD99996 = x0AD80096 | x00999900; 962 963 x22332233 = a3 & ~x55005500; 964 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; 965 x054885C0 = x257AA5F0 & ~x22332233; 966 xFAB77A3F = ~x054885C0; 967 x2221EDF3 = x3333FFFF & x6A21EDF3; 968 xD89697CC = xFAB77A3F ^ x2221EDF3; 969 x20 = x7FA7FF69 & ~a2; 970 x21 = x20 ^ xD89697CC; 971 *out3 ^= x21; 972 973 x05B77AC0 = x00FFFF00 ^ x054885C0; 974 x05F77AD6 = x00D20096 | x05B77AC0; 975 x36C48529 = x3333FFFF ^ x05F77AD6; 976 x6391D07C = a1 ^ x36C48529; 977 xBB0747B0 = xD89697CC ^ x6391D07C; 978 x00 = x25202160 | a2; 979 x01 = x00 ^ xBB0747B0; 980 *out1 ^= x01; 981 982 x4C460000 = x3333FFFF ^ x7F75FFFF; 983 x4EDF9996 = x0AD99996 | x4C460000; 984 x2D4E49EA = x6391D07C ^ x4EDF9996; 985 xBBFFFFB0 = x00FFFF00 | xBB0747B0; 986 x96B1B65A = x2D4E49EA ^ xBBFFFFB0; 987 x10 = x4A01CC93 | a2; 988 x11 = x10 ^ x96B1B65A; 989 *out2 ^= x11; 990 991 x5AFF5AFF = a5 | x5A0F5A0F; 992 x52B11215 = x5AFF5AFF & ~x2D4E49EA; 993 x4201C010 = x4A01CC93 & x6391D07C; 994 x10B0D205 = x52B11215 ^ x4201C010; 995 x30 = x10B0D205 | a2; 996 x31 = x30 ^ x0AD99996; 997 *out4 ^= x31; 998} 999 1000DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1001{ 1002 u32 x33CC33CC; 1003 u32 x55550000, x00AA00FF, x33BB33FF; 1004 u32 x33CC0000, x11441144, x11BB11BB, x003311BB; 1005 u32 x00000F0F, x336600FF, x332200FF, x332200F0; 1006 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; 1007 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; 1008 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; 1009 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; 1010 u32 x0A451047, xBBDFDD7B, xB19ACD3C; 1011 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1012 1013 x33CC33CC = a2 ^ a5; 1014 1015 x55550000 = a1 & ~a6; 1016 x00AA00FF = a5 & ~x55550000; 1017 x33BB33FF = a2 | x00AA00FF; 1018 1019 x33CC0000 = x33CC33CC & ~a6; 1020 x11441144 = a1 & x33CC33CC; 1021 x11BB11BB = a5 ^ x11441144; 1022 x003311BB = x11BB11BB & ~x33CC0000; 1023 1024 x00000F0F = a3 & a6; 1025 x336600FF = x00AA00FF ^ x33CC0000; 1026 x332200FF = x33BB33FF & x336600FF; 1027 x332200F0 = x332200FF & ~x00000F0F; 1028 1029 x0302000F = a3 & x332200FF; 1030 xAAAAAAAA = ~a1; 1031 xA9A8AAA5 = x0302000F ^ xAAAAAAAA; 1032 x33CCCC33 = a6 ^ x33CC33CC; 1033 x33CCC030 = x33CCCC33 & ~x00000F0F; 1034 x9A646A95 = xA9A8AAA5 ^ x33CCC030; 1035 x10 = a4 & ~x332200F0; 1036 x11 = x10 ^ x9A646A95; 1037 *out2 ^= x11; 1038 1039 x00333303 = a2 & ~x33CCC030; 1040 x118822B8 = x11BB11BB ^ x00333303; 1041 xA8208805 = xA9A8AAA5 & ~x118822B8; 1042 x3CC3C33C = a3 ^ x33CCCC33; 1043 x94E34B39 = xA8208805 ^ x3CC3C33C; 1044 x00 = x33BB33FF & ~a4; 1045 x01 = x00 ^ x94E34B39; 1046 *out1 ^= x01; 1047 1048 x0331330C = x0302000F ^ x00333303; 1049 x3FF3F33C = x3CC3C33C | x0331330C; 1050 xA9DF596A = x33BB33FF ^ x9A646A95; 1051 xA9DF5F6F = x00000F0F | xA9DF596A; 1052 x962CAC53 = x3FF3F33C ^ xA9DF5F6F; 1053 1054 xA9466A6A = x332200FF ^ x9A646A95; 1055 x3DA52153 = x94E34B39 ^ xA9466A6A; 1056 x29850143 = xA9DF5F6F & x3DA52153; 1057 x33C0330C = x33CC33CC & x3FF3F33C; 1058 x1A45324F = x29850143 ^ x33C0330C; 1059 x20 = x1A45324F | a4; 1060 x21 = x20 ^ x962CAC53; 1061 *out3 ^= x21; 1062 1063 x0A451047 = x1A45324F & ~x118822B8; 1064 xBBDFDD7B = x33CCCC33 | xA9DF596A; 1065 xB19ACD3C = x0A451047 ^ xBBDFDD7B; 1066 x30 = x003311BB | a4; 1067 x31 = x30 ^ xB19ACD3C; 1068 *out4 ^= x31; 1069} 1070 1071DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1072{ 1073 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; 1074 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; 1075 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; 1076 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; 1077 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; 1078 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; 1079 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; 1080 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; 1081 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1082 1083 x44444444 = a1 & ~a2; 1084 x0F0FF0F0 = a3 ^ a6; 1085 x4F4FF4F4 = x44444444 | x0F0FF0F0; 1086 x00FFFF00 = a4 ^ a6; 1087 x00AAAA00 = x00FFFF00 & ~a1; 1088 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; 1089 1090 x3C3CC3C3 = a2 ^ x0F0FF0F0; 1091 x3C3C0000 = x3C3CC3C3 & ~a6; 1092 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; 1093 x0C840A00 = x4FE55EF4 & ~x7373F4F4; 1094 1095 x00005EF4 = a6 & x4FE55EF4; 1096 x00FF5EFF = a4 | x00005EF4; 1097 x00555455 = a1 & x00FF5EFF; 1098 x3C699796 = x3C3CC3C3 ^ x00555455; 1099 x30 = x4FE55EF4 & ~a5; 1100 x31 = x30 ^ x3C699796; 1101 *out4 ^= x31; 1102 1103 x000FF000 = x0F0FF0F0 & x00FFFF00; 1104 x55AA55AA = a1 ^ a4; 1105 x26D9A15E = x7373F4F4 ^ x55AA55AA; 1106 x2FDFAF5F = a3 | x26D9A15E; 1107 x2FD00F5F = x2FDFAF5F & ~x000FF000; 1108 1109 x55AAFFAA = x00AAAA00 | x55AA55AA; 1110 x28410014 = x3C699796 & ~x55AAFFAA; 1111 x000000FF = a4 & a6; 1112 x000000CC = x000000FF & ~a2; 1113 x284100D8 = x28410014 ^ x000000CC; 1114 1115 x204100D0 = x7373F4F4 & x284100D8; 1116 x3C3CC3FF = x3C3CC3C3 | x000000FF; 1117 x1C3CC32F = x3C3CC3FF & ~x204100D0; 1118 x4969967A = a1 ^ x1C3CC32F; 1119 x10 = x2FD00F5F & a5; 1120 x11 = x10 ^ x4969967A; 1121 *out2 ^= x11; 1122 1123 x4CC44CC4 = x4FE55EF4 & ~a2; 1124 x40C040C0 = x4CC44CC4 & ~a3; 1125 xC3C33C3C = ~x3C3CC3C3; 1126 x9669C396 = x55AAFFAA ^ xC3C33C3C; 1127 xD6A98356 = x40C040C0 ^ x9669C396; 1128 x00 = a5 & ~x0C840A00; 1129 x01 = x00 ^ xD6A98356; 1130 *out1 ^= x01; 1131 1132 xD6E9C3D6 = x40C040C0 | x9669C396; 1133 x4CEEEEC4 = x00AAAA00 | x4CC44CC4; 1134 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; 1135 x001A000B = a4 & ~x4FE55EF4; 1136 x9A1F2D1B = x9A072D12 | x001A000B; 1137 x20 = a5 & ~x284100D8; 1138 x21 = x20 ^ x9A1F2D1B; 1139 *out3 ^= x21; 1140} 1141 1142DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1143{ 1144 u32 x5A5A5A5A, x0F0FF0F0; 1145 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, 1146 x52FBCA0F, x61C8F93C; 1147 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; 1148 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; 1149 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; 1150 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1151 1152 x5A5A5A5A = a1 ^ a3; 1153 x0F0FF0F0 = a3 ^ a5; 1154 x33FF33FF = a2 | a4; 1155 x33FFCC00 = a5 ^ x33FF33FF; 1156 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; 1157 x0C0CC0C0 = x0F0FF0F0 & ~a2; 1158 x0CF3C03F = a4 ^ x0C0CC0C0; 1159 x5EFBDA7F = x5A5A5A5A | x0CF3C03F; 1160 x52FBCA0F = x5EFBDA7F & ~x0C0030F0; 1161 x61C8F93C = a2 ^ x52FBCA0F; 1162 1163 x00C0C03C = x0CF3C03F & x61C8F93C; 1164 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; 1165 x3B92A366 = x5A5A5A5A ^ x61C8F93C; 1166 x30908326 = x3B92A366 & ~x0F0F30C0; 1167 x3C90B3D6 = x0C0030F0 ^ x30908326; 1168 1169 x33CC33CC = a2 ^ a4; 1170 x0C0CFFFF = a5 | x0C0CC0C0; 1171 x379E5C99 = x3B92A366 ^ x0C0CFFFF; 1172 x04124C11 = x379E5C99 & ~x33CC33CC; 1173 x56E9861E = x52FBCA0F ^ x04124C11; 1174 x00 = a6 & ~x3C90B3D6; 1175 x01 = x00 ^ x56E9861E; 1176 *out1 ^= x01; 1177 1178 xA91679E1 = ~x56E9861E; 1179 x10 = x3C90B3D6 & ~a6; 1180 x11 = x10 ^ xA91679E1; 1181 *out2 ^= x11; 1182 1183 x9586CA37 = x3C90B3D6 ^ xA91679E1; 1184 x8402C833 = x9586CA37 & ~x33CC33CC; 1185 x84C2C83F = x00C0C03C | x8402C833; 1186 xB35C94A6 = x379E5C99 ^ x84C2C83F; 1187 x20 = x61C8F93C | a6; 1188 x21 = x20 ^ xB35C94A6; 1189 *out3 ^= x21; 1190 1191 x30 = a6 & x61C8F93C; 1192 x31 = x30 ^ xB35C94A6; 1193 *out4 ^= x31; 1194} 1195 1196DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1197{ 1198 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; 1199 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; 1200 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; 1201 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; 1202 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; 1203 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; 1204 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; 1205 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; 1206 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1207 1208 x77777777 = a1 | a3; 1209 x77770000 = x77777777 & ~a6; 1210 x22225555 = a1 ^ x77770000; 1211 x11116666 = a3 ^ x22225555; 1212 x1F1F6F6F = a4 | x11116666; 1213 1214 x70700000 = x77770000 & ~a4; 1215 x43433333 = a3 ^ x70700000; 1216 x00430033 = a5 & x43433333; 1217 x55557777 = a1 | x11116666; 1218 x55167744 = x00430033 ^ x55557777; 1219 x5A19784B = a4 ^ x55167744; 1220 1221 x5A1987B4 = a6 ^ x5A19784B; 1222 x7A3BD7F5 = x22225555 | x5A1987B4; 1223 x003B00F5 = a5 & x7A3BD7F5; 1224 x221955A0 = x22225555 ^ x003B00F5; 1225 x05050707 = a4 & x55557777; 1226 x271C52A7 = x221955A0 ^ x05050707; 1227 1228 x2A2A82A0 = x7A3BD7F5 & ~a1; 1229 x6969B193 = x43433333 ^ x2A2A82A0; 1230 x1FE06F90 = a5 ^ x1F1F6F6F; 1231 x16804E00 = x1FE06F90 & ~x6969B193; 1232 xE97FB1FF = ~x16804E00; 1233 x20 = xE97FB1FF & ~a2; 1234 x21 = x20 ^ x5A19784B; 1235 *out3 ^= x21; 1236 1237 x43403302 = x43433333 & ~x003B00F5; 1238 x35CAED30 = x2A2A82A0 ^ x1FE06F90; 1239 x37DEFFB7 = x271C52A7 | x35CAED30; 1240 x349ECCB5 = x37DEFFB7 & ~x43403302; 1241 x0B01234A = x1F1F6F6F & ~x349ECCB5; 1242 1243 x101884B4 = x5A1987B4 & x349ECCB5; 1244 x0FF8EB24 = x1FE06F90 ^ x101884B4; 1245 x41413333 = x43433333 & x55557777; 1246 x4FF9FB37 = x0FF8EB24 | x41413333; 1247 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; 1248 x30 = x4FC2FBC2 & a2; 1249 x31 = x30 ^ x271C52A7; 1250 *out4 ^= x31; 1251 1252 x22222222 = a1 ^ x77777777; 1253 x16BCEE97 = x349ECCB5 ^ x22222222; 1254 x0F080B04 = a4 & x0FF8EB24; 1255 x19B4E593 = x16BCEE97 ^ x0F080B04; 1256 x00 = x0B01234A | a2; 1257 x01 = x00 ^ x19B4E593; 1258 *out1 ^= x01; 1259 1260 x5C5C5C5C = x1F1F6F6F ^ x43433333; 1261 x4448184C = x5C5C5C5C & ~x19B4E593; 1262 x2DDABE71 = x22225555 ^ x0FF8EB24; 1263 x6992A63D = x4448184C ^ x2DDABE71; 1264 x10 = x1F1F6F6F & a2; 1265 x11 = x10 ^ x6992A63D; 1266 *out2 ^= x11; 1267} 1268 1269DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1270{ 1271 u32 x33CC33CC; 1272 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; 1273 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; 1274 u32 x09030C06, x09030000, x336622FF, x3A6522FF; 1275 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; 1276 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; 1277 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; 1278 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; 1279 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; 1280 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1281 1282 x33CC33CC = a2 ^ a5; 1283 1284 x3333FFFF = a2 | a6; 1285 x11115555 = a1 & x3333FFFF; 1286 x22DD6699 = x33CC33CC ^ x11115555; 1287 x22DD9966 = a6 ^ x22DD6699; 1288 x00220099 = a5 & ~x22DD9966; 1289 1290 x00551144 = a1 & x22DD9966; 1291 x33662277 = a2 ^ x00551144; 1292 x5A5A5A5A = a1 ^ a3; 1293 x7B7E7A7F = x33662277 | x5A5A5A5A; 1294 x59A31CE6 = x22DD6699 ^ x7B7E7A7F; 1295 1296 x09030C06 = a3 & x59A31CE6; 1297 x09030000 = x09030C06 & ~a6; 1298 x336622FF = x00220099 | x33662277; 1299 x3A6522FF = x09030000 ^ x336622FF; 1300 x30 = x3A6522FF & a4; 1301 x31 = x30 ^ x59A31CE6; 1302 *out4 ^= x31; 1303 1304 x484D494C = a2 ^ x7B7E7A7F; 1305 x0000B6B3 = a6 & ~x484D494C; 1306 x0F0FB9BC = a3 ^ x0000B6B3; 1307 x00FC00F9 = a5 & ~x09030C06; 1308 x0FFFB9FD = x0F0FB9BC | x00FC00F9; 1309 1310 x5DF75DF7 = a1 | x59A31CE6; 1311 x116600F7 = x336622FF & x5DF75DF7; 1312 x1E69B94B = x0F0FB9BC ^ x116600F7; 1313 x1668B94B = x1E69B94B & ~x09030000; 1314 x20 = x00220099 | a4; 1315 x21 = x20 ^ x1668B94B; 1316 *out3 ^= x21; 1317 1318 x7B7B7B7B = a2 | x5A5A5A5A; 1319 x411E5984 = x3A6522FF ^ x7B7B7B7B; 1320 x1FFFFDFD = x11115555 | x0FFFB9FD; 1321 x5EE1A479 = x411E5984 ^ x1FFFFDFD; 1322 1323 x3CB4DFD2 = x22DD6699 ^ x1E69B94B; 1324 x004B002D = a5 & ~x3CB4DFD2; 1325 xB7B2B6B3 = ~x484D494C; 1326 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; 1327 xCC82CDE5 = x004B002D ^ xCCC9CDC8; 1328 x10 = xCC82CDE5 & ~a4; 1329 x11 = x10 ^ x5EE1A479; 1330 *out2 ^= x11; 1331 1332 x0055EEBB = a6 ^ x00551144; 1333 x5A5AECE9 = a1 ^ x0F0FB9BC; 1334 x0050ECA9 = x0055EEBB & x5A5AECE9; 1335 xC5CAC1CE = x09030C06 ^ xCCC9CDC8; 1336 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; 1337 x00 = x0FFFB9FD & ~a4; 1338 x01 = x00 ^ xC59A2D67; 1339 *out1 ^= x01; 1340} 1341 1342DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1343{ 1344 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; 1345 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; 1346 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; 1347 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; 1348 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; 1349 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; 1350 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; 1351 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; 1352 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1353 1354 x0FF00FF0 = a4 ^ a5; 1355 x3CC33CC3 = a3 ^ x0FF00FF0; 1356 x00003CC3 = a6 & x3CC33CC3; 1357 x0F000F00 = a4 & x0FF00FF0; 1358 x5A555A55 = a2 ^ x0F000F00; 1359 x00001841 = x00003CC3 & x5A555A55; 1360 1361 x00000F00 = a6 & x0F000F00; 1362 x33333C33 = a3 ^ x00000F00; 1363 x7B777E77 = x5A555A55 | x33333C33; 1364 x0FF0F00F = a6 ^ x0FF00FF0; 1365 x74878E78 = x7B777E77 ^ x0FF0F00F; 1366 x30 = a1 & ~x00001841; 1367 x31 = x30 ^ x74878E78; 1368 *out4 ^= x31; 1369 1370 x003C003C = a5 & ~x3CC33CC3; 1371 x5A7D5A7D = x5A555A55 | x003C003C; 1372 x333300F0 = x00003CC3 ^ x33333C33; 1373 x694E5A8D = x5A7D5A7D ^ x333300F0; 1374 1375 x0FF0CCCC = x00003CC3 ^ x0FF0F00F; 1376 x000F0303 = a4 & ~x0FF0CCCC; 1377 x5A505854 = x5A555A55 & ~x000F0303; 1378 x33CC000F = a5 ^ x333300F0; 1379 x699C585B = x5A505854 ^ x33CC000F; 1380 1381 x7F878F78 = x0F000F00 | x74878E78; 1382 x21101013 = a3 & x699C585B; 1383 x7F979F7B = x7F878F78 | x21101013; 1384 x30030CC0 = x3CC33CC3 & ~x0FF0F00F; 1385 x4F9493BB = x7F979F7B ^ x30030CC0; 1386 x00 = x4F9493BB & ~a1; 1387 x01 = x00 ^ x694E5A8D; 1388 *out1 ^= x01; 1389 1390 x6F9CDBFB = x699C585B | x4F9493BB; 1391 x0000DBFB = a6 & x6F9CDBFB; 1392 x00005151 = a2 & x0000DBFB; 1393 x26DAC936 = x694E5A8D ^ x4F9493BB; 1394 x26DA9867 = x00005151 ^ x26DAC936; 1395 1396 x27DA9877 = x21101013 | x26DA9867; 1397 x27DA438C = x0000DBFB ^ x27DA9877; 1398 x2625C9C9 = a5 ^ x26DAC936; 1399 x27FFCBCD = x27DA438C | x2625C9C9; 1400 x20 = x27FFCBCD & a1; 1401 x21 = x20 ^ x699C585B; 1402 *out3 ^= x21; 1403 1404 x27FF1036 = x0000DBFB ^ x27FFCBCD; 1405 x27FF103E = x003C003C | x27FF1036; 1406 xB06B6C44 = ~x4F9493BB; 1407 x97947C7A = x27FF103E ^ xB06B6C44; 1408 x10 = x97947C7A & ~a1; 1409 x11 = x10 ^ x26DA9867; 1410 *out2 ^= x11; 1411} 1412 1413DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1414{ 1415 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; 1416 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; 1417 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; 1418 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; 1419 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; 1420 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; 1421 u32 xF700A600, x61008000, x03B7856B, x62B7056B; 1422 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1423 1424 x0C0C0C0C = a3 & ~a2; 1425 x0000F0F0 = a5 & ~a3; 1426 x00FFF00F = a4 ^ x0000F0F0; 1427 x00555005 = a1 & x00FFF00F; 1428 x00515001 = x00555005 & ~x0C0C0C0C; 1429 1430 x33000330 = a2 & ~x00FFF00F; 1431 x77555775 = a1 | x33000330; 1432 x30303030 = a2 & ~a3; 1433 x3030CFCF = a5 ^ x30303030; 1434 x30104745 = x77555775 & x3030CFCF; 1435 x30555745 = x00555005 | x30104745; 1436 1437 xFF000FF0 = ~x00FFF00F; 1438 xCF1048B5 = x30104745 ^ xFF000FF0; 1439 x080A080A = a3 & ~x77555775; 1440 xC71A40BF = xCF1048B5 ^ x080A080A; 1441 xCB164CB3 = x0C0C0C0C ^ xC71A40BF; 1442 x10 = x00515001 | a6; 1443 x11 = x10 ^ xCB164CB3; 1444 *out2 ^= x11; 1445 1446 x9E4319E6 = a1 ^ xCB164CB3; 1447 x000019E6 = a5 & x9E4319E6; 1448 xF429738C = a2 ^ xC71A40BF; 1449 xF4296A6A = x000019E6 ^ xF429738C; 1450 xC729695A = x33000330 ^ xF4296A6A; 1451 1452 xC47C3D2F = x30555745 ^ xF4296A6A; 1453 xF77F3F3F = a2 | xC47C3D2F; 1454 x9E43E619 = a5 ^ x9E4319E6; 1455 x693CD926 = xF77F3F3F ^ x9E43E619; 1456 x20 = x30555745 & a6; 1457 x21 = x20 ^ x693CD926; 1458 *out3 ^= x21; 1459 1460 xF719A695 = x3030CFCF ^ xC729695A; 1461 xF4FF73FF = a4 | xF429738C; 1462 x03E6D56A = xF719A695 ^ xF4FF73FF; 1463 x56B3803F = a1 ^ x03E6D56A; 1464 x30 = x56B3803F & a6; 1465 x31 = x30 ^ xC729695A; 1466 *out4 ^= x31; 1467 1468 xF700A600 = xF719A695 & ~a4; 1469 x61008000 = x693CD926 & xF700A600; 1470 x03B7856B = x00515001 ^ x03E6D56A; 1471 x62B7056B = x61008000 ^ x03B7856B; 1472 x00 = x62B7056B | a6; 1473 x01 = x00 ^ xC729695A; 1474 *out1 ^= x01; 1475} 1476 1477#endif 1478 1479#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; } 1480 1481#define DATASWAP \ 1482 SWAP (D00, D32); \ 1483 SWAP (D01, D33); \ 1484 SWAP (D02, D34); \ 1485 SWAP (D03, D35); \ 1486 SWAP (D04, D36); \ 1487 SWAP (D05, D37); \ 1488 SWAP (D06, D38); \ 1489 SWAP (D07, D39); \ 1490 SWAP (D08, D40); \ 1491 SWAP (D09, D41); \ 1492 SWAP (D10, D42); \ 1493 SWAP (D11, D43); \ 1494 SWAP (D12, D44); \ 1495 SWAP (D13, D45); \ 1496 SWAP (D14, D46); \ 1497 SWAP (D15, D47); \ 1498 SWAP (D16, D48); \ 1499 SWAP (D17, D49); \ 1500 SWAP (D18, D50); \ 1501 SWAP (D19, D51); \ 1502 SWAP (D20, D52); \ 1503 SWAP (D21, D53); \ 1504 SWAP (D22, D54); \ 1505 SWAP (D23, D55); \ 1506 SWAP (D24, D56); \ 1507 SWAP (D25, D57); \ 1508 SWAP (D26, D58); \ 1509 SWAP (D27, D59); \ 1510 SWAP (D28, D60); \ 1511 SWAP (D29, D61); \ 1512 SWAP (D30, D62); \ 1513 SWAP (D31, D63); 1514 1515#define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; } 1516#define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; } 1517#define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; } 1518#define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; } 1519#define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; } 1520#define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; } 1521#define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; } 1522#define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; } 1523#define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; } 1524#define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; } 1525#define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; } 1526#define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; } 1527#define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; } 1528#define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; } 1529#define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; } 1530#define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; } 1531 1532DECLSPEC void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63) 1533{ 1534 KXX_DECL u32 k00, k01, k02, k03, k04, k05; 1535 KXX_DECL u32 k06, k07, k08, k09, k10, k11; 1536 KXX_DECL u32 k12, k13, k14, k15, k16, k17; 1537 KXX_DECL u32 k18, k19, k20, k21, k22, k23; 1538 KXX_DECL u32 k24, k25, k26, k27, k28, k29; 1539 KXX_DECL u32 k30, k31, k32, k33, k34, k35; 1540 KXX_DECL u32 k36, k37, k38, k39, k40, k41; 1541 KXX_DECL u32 k42, k43, k44, k45, k46, k47; 1542 1543 #ifdef _unroll 1544 #pragma unroll 1545 #endif 1546 for (u32 i = 0; i < 2; i++) 1547 { 1548 if (i) KEYSET10 else KEYSET00 1549 1550 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1551 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1552 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1553 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1554 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1555 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1556 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1557 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1558 1559 if (i) KEYSET11 else KEYSET01 1560 1561 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1562 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1563 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1564 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1565 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1566 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1567 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1568 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1569 1570 if (i) KEYSET12 else KEYSET02 1571 1572 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1573 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1574 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1575 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1576 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1577 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1578 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1579 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1580 1581 if (i) KEYSET13 else KEYSET03 1582 1583 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1584 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1585 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1586 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1587 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1588 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1589 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1590 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1591 1592 if (i) KEYSET14 else KEYSET04 1593 1594 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1595 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1596 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1597 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1598 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1599 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1600 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1601 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1602 1603 if (i) KEYSET15 else KEYSET05 1604 1605 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1606 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1607 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1608 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1609 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1610 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1611 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1612 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1613 1614 if (i) KEYSET16 else KEYSET06 1615 1616 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1617 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1618 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1619 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1620 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1621 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1622 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1623 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1624 1625 if (i) KEYSET17 else KEYSET07 1626 1627 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1628 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1629 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1630 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1631 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1632 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1633 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1634 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1635 } 1636} 1637 1638DECLSPEC void transpose32c (u32 *data) 1639{ 1640 #define swap(x,y,j,m) \ 1641 t = ((x) ^ ((y) >> (j))) & (m); \ 1642 (x) = (x) ^ t; \ 1643 (y) = (y) ^ (t << (j)); 1644 1645 u32 t; 1646 1647 swap (data[ 0], data[16], 16, 0x0000ffff); 1648 swap (data[ 1], data[17], 16, 0x0000ffff); 1649 swap (data[ 2], data[18], 16, 0x0000ffff); 1650 swap (data[ 3], data[19], 16, 0x0000ffff); 1651 swap (data[ 4], data[20], 16, 0x0000ffff); 1652 swap (data[ 5], data[21], 16, 0x0000ffff); 1653 swap (data[ 6], data[22], 16, 0x0000ffff); 1654 swap (data[ 7], data[23], 16, 0x0000ffff); 1655 swap (data[ 8], data[24], 16, 0x0000ffff); 1656 swap (data[ 9], data[25], 16, 0x0000ffff); 1657 swap (data[10], data[26], 16, 0x0000ffff); 1658 swap (data[11], data[27], 16, 0x0000ffff); 1659 swap (data[12], data[28], 16, 0x0000ffff); 1660 swap (data[13], data[29], 16, 0x0000ffff); 1661 swap (data[14], data[30], 16, 0x0000ffff); 1662 swap (data[15], data[31], 16, 0x0000ffff); 1663 swap (data[ 0], data[ 8], 8, 0x00ff00ff); 1664 swap (data[ 1], data[ 9], 8, 0x00ff00ff); 1665 swap (data[ 2], data[10], 8, 0x00ff00ff); 1666 swap (data[ 3], data[11], 8, 0x00ff00ff); 1667 swap (data[ 4], data[12], 8, 0x00ff00ff); 1668 swap (data[ 5], data[13], 8, 0x00ff00ff); 1669 swap (data[ 6], data[14], 8, 0x00ff00ff); 1670 swap (data[ 7], data[15], 8, 0x00ff00ff); 1671 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f); 1672 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f); 1673 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f); 1674 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f); 1675 swap (data[ 0], data[ 2], 2, 0x33333333); 1676 swap (data[ 1], data[ 3], 2, 0x33333333); 1677 swap (data[ 0], data[ 1], 1, 0x55555555); 1678 swap (data[ 2], data[ 3], 1, 0x55555555); 1679 swap (data[ 4], data[ 6], 2, 0x33333333); 1680 swap (data[ 5], data[ 7], 2, 0x33333333); 1681 swap (data[ 4], data[ 5], 1, 0x55555555); 1682 swap (data[ 6], data[ 7], 1, 0x55555555); 1683 swap (data[ 8], data[12], 4, 0x0f0f0f0f); 1684 swap (data[ 9], data[13], 4, 0x0f0f0f0f); 1685 swap (data[10], data[14], 4, 0x0f0f0f0f); 1686 swap (data[11], data[15], 4, 0x0f0f0f0f); 1687 swap (data[ 8], data[10], 2, 0x33333333); 1688 swap (data[ 9], data[11], 2, 0x33333333); 1689 swap (data[ 8], data[ 9], 1, 0x55555555); 1690 swap (data[10], data[11], 1, 0x55555555); 1691 swap (data[12], data[14], 2, 0x33333333); 1692 swap (data[13], data[15], 2, 0x33333333); 1693 swap (data[12], data[13], 1, 0x55555555); 1694 swap (data[14], data[15], 1, 0x55555555); 1695 swap (data[16], data[24], 8, 0x00ff00ff); 1696 swap (data[17], data[25], 8, 0x00ff00ff); 1697 swap (data[18], data[26], 8, 0x00ff00ff); 1698 swap (data[19], data[27], 8, 0x00ff00ff); 1699 swap (data[20], data[28], 8, 0x00ff00ff); 1700 swap (data[21], data[29], 8, 0x00ff00ff); 1701 swap (data[22], data[30], 8, 0x00ff00ff); 1702 swap (data[23], data[31], 8, 0x00ff00ff); 1703 swap (data[16], data[20], 4, 0x0f0f0f0f); 1704 swap (data[17], data[21], 4, 0x0f0f0f0f); 1705 swap (data[18], data[22], 4, 0x0f0f0f0f); 1706 swap (data[19], data[23], 4, 0x0f0f0f0f); 1707 swap (data[16], data[18], 2, 0x33333333); 1708 swap (data[17], data[19], 2, 0x33333333); 1709 swap (data[16], data[17], 1, 0x55555555); 1710 swap (data[18], data[19], 1, 0x55555555); 1711 swap (data[20], data[22], 2, 0x33333333); 1712 swap (data[21], data[23], 2, 0x33333333); 1713 swap (data[20], data[21], 1, 0x55555555); 1714 swap (data[22], data[23], 1, 0x55555555); 1715 swap (data[24], data[28], 4, 0x0f0f0f0f); 1716 swap (data[25], data[29], 4, 0x0f0f0f0f); 1717 swap (data[26], data[30], 4, 0x0f0f0f0f); 1718 swap (data[27], data[31], 4, 0x0f0f0f0f); 1719 swap (data[24], data[26], 2, 0x33333333); 1720 swap (data[25], data[27], 2, 0x33333333); 1721 swap (data[24], data[25], 1, 0x55555555); 1722 swap (data[26], data[27], 1, 0x55555555); 1723 swap (data[28], data[30], 2, 0x33333333); 1724 swap (data[29], data[31], 2, 0x33333333); 1725 swap (data[28], data[29], 1, 0x55555555); 1726 swap (data[30], data[31], 1, 0x55555555); 1727} 1728 1729// 1730// transpose bitslice mod : attention race conditions, need different buffers for *in and *out 1731// 1732 1733KERNEL_FQ void m14000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) 1734{ 1735 const u64 gid = get_global_id (0); 1736 1737 const u32 block = gid / 32; 1738 const u32 slice = gid % 32; 1739 1740 const u32 w0 = mod[gid]; 1741 1742 #ifdef _unroll 1743 #pragma unroll 1744 #endif 1745 for (int i = 0, j = 0; i < 32; i += 8, j += 7) 1746 { 1747 hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0 >> (i + 7)) & 1) << slice)); 1748 hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0 >> (i + 6)) & 1) << slice)); 1749 hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0 >> (i + 5)) & 1) << slice)); 1750 hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0 >> (i + 4)) & 1) << slice)); 1751 hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0 >> (i + 3)) & 1) << slice)); 1752 hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0 >> (i + 2)) & 1) << slice)); 1753 hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0 >> (i + 1)) & 1) << slice)); 1754 } 1755} 1756 1757KERNEL_FQ void m14000_mxx (KERN_ATTR_BITSLICE ()) 1758{ 1759 /** 1760 * base 1761 */ 1762 1763 const u64 gid = get_global_id (0); 1764 const u64 lid = get_local_id (0); 1765 1766 /** 1767 * salt 1768 */ 1769 1770 const u32 salt0 = salt_bufs[SALT_POS].salt_buf_pc[0]; 1771 const u32 salt1 = salt_bufs[SALT_POS].salt_buf_pc[1]; 1772 1773 // salt1 first, because this is a 64 bit value actually 1774 1775 1776 const u32 d00 = (((salt1 >> 0) & 1) ? -1 : 0); 1777 const u32 d01 = (((salt1 >> 1) & 1) ? -1 : 0); 1778 const u32 d02 = (((salt1 >> 2) & 1) ? -1 : 0); 1779 const u32 d03 = (((salt1 >> 3) & 1) ? -1 : 0); 1780 const u32 d04 = (((salt1 >> 4) & 1) ? -1 : 0); 1781 const u32 d05 = (((salt1 >> 5) & 1) ? -1 : 0); 1782 const u32 d06 = (((salt1 >> 6) & 1) ? -1 : 0); 1783 const u32 d07 = (((salt1 >> 7) & 1) ? -1 : 0); 1784 const u32 d08 = (((salt1 >> 8) & 1) ? -1 : 0); 1785 const u32 d09 = (((salt1 >> 9) & 1) ? -1 : 0); 1786 const u32 d10 = (((salt1 >> 10) & 1) ? -1 : 0); 1787 const u32 d11 = (((salt1 >> 11) & 1) ? -1 : 0); 1788 const u32 d12 = (((salt1 >> 12) & 1) ? -1 : 0); 1789 const u32 d13 = (((salt1 >> 13) & 1) ? -1 : 0); 1790 const u32 d14 = (((salt1 >> 14) & 1) ? -1 : 0); 1791 const u32 d15 = (((salt1 >> 15) & 1) ? -1 : 0); 1792 const u32 d16 = (((salt1 >> 16) & 1) ? -1 : 0); 1793 const u32 d17 = (((salt1 >> 17) & 1) ? -1 : 0); 1794 const u32 d18 = (((salt1 >> 18) & 1) ? -1 : 0); 1795 const u32 d19 = (((salt1 >> 19) & 1) ? -1 : 0); 1796 const u32 d20 = (((salt1 >> 20) & 1) ? -1 : 0); 1797 const u32 d21 = (((salt1 >> 21) & 1) ? -1 : 0); 1798 const u32 d22 = (((salt1 >> 22) & 1) ? -1 : 0); 1799 const u32 d23 = (((salt1 >> 23) & 1) ? -1 : 0); 1800 const u32 d24 = (((salt1 >> 24) & 1) ? -1 : 0); 1801 const u32 d25 = (((salt1 >> 25) & 1) ? -1 : 0); 1802 const u32 d26 = (((salt1 >> 26) & 1) ? -1 : 0); 1803 const u32 d27 = (((salt1 >> 27) & 1) ? -1 : 0); 1804 const u32 d28 = (((salt1 >> 28) & 1) ? -1 : 0); 1805 const u32 d29 = (((salt1 >> 29) & 1) ? -1 : 0); 1806 const u32 d30 = (((salt1 >> 30) & 1) ? -1 : 0); 1807 const u32 d31 = (((salt1 >> 31) & 1) ? -1 : 0); 1808 const u32 d32 = (((salt0 >> 0) & 1) ? -1 : 0); 1809 const u32 d33 = (((salt0 >> 1) & 1) ? -1 : 0); 1810 const u32 d34 = (((salt0 >> 2) & 1) ? -1 : 0); 1811 const u32 d35 = (((salt0 >> 3) & 1) ? -1 : 0); 1812 const u32 d36 = (((salt0 >> 4) & 1) ? -1 : 0); 1813 const u32 d37 = (((salt0 >> 5) & 1) ? -1 : 0); 1814 const u32 d38 = (((salt0 >> 6) & 1) ? -1 : 0); 1815 const u32 d39 = (((salt0 >> 7) & 1) ? -1 : 0); 1816 const u32 d40 = (((salt0 >> 8) & 1) ? -1 : 0); 1817 const u32 d41 = (((salt0 >> 9) & 1) ? -1 : 0); 1818 const u32 d42 = (((salt0 >> 10) & 1) ? -1 : 0); 1819 const u32 d43 = (((salt0 >> 11) & 1) ? -1 : 0); 1820 const u32 d44 = (((salt0 >> 12) & 1) ? -1 : 0); 1821 const u32 d45 = (((salt0 >> 13) & 1) ? -1 : 0); 1822 const u32 d46 = (((salt0 >> 14) & 1) ? -1 : 0); 1823 const u32 d47 = (((salt0 >> 15) & 1) ? -1 : 0); 1824 const u32 d48 = (((salt0 >> 16) & 1) ? -1 : 0); 1825 const u32 d49 = (((salt0 >> 17) & 1) ? -1 : 0); 1826 const u32 d50 = (((salt0 >> 18) & 1) ? -1 : 0); 1827 const u32 d51 = (((salt0 >> 19) & 1) ? -1 : 0); 1828 const u32 d52 = (((salt0 >> 20) & 1) ? -1 : 0); 1829 const u32 d53 = (((salt0 >> 21) & 1) ? -1 : 0); 1830 const u32 d54 = (((salt0 >> 22) & 1) ? -1 : 0); 1831 const u32 d55 = (((salt0 >> 23) & 1) ? -1 : 0); 1832 const u32 d56 = (((salt0 >> 24) & 1) ? -1 : 0); 1833 const u32 d57 = (((salt0 >> 25) & 1) ? -1 : 0); 1834 const u32 d58 = (((salt0 >> 26) & 1) ? -1 : 0); 1835 const u32 d59 = (((salt0 >> 27) & 1) ? -1 : 0); 1836 const u32 d60 = (((salt0 >> 28) & 1) ? -1 : 0); 1837 const u32 d61 = (((salt0 >> 29) & 1) ? -1 : 0); 1838 const u32 d62 = (((salt0 >> 30) & 1) ? -1 : 0); 1839 const u32 d63 = (((salt0 >> 31) & 1) ? -1 : 0); 1840 1841 /** 1842 * base 1843 */ 1844 1845 const u32 w0 = pws[gid].i[0]; 1846 const u32 w1 = pws[gid].i[1]; 1847 1848 #define K00 (((w0 >> ( 0 + 7)) & 1) ? -1 : 0) 1849 #define K01 (((w0 >> ( 0 + 6)) & 1) ? -1 : 0) 1850 #define K02 (((w0 >> ( 0 + 5)) & 1) ? -1 : 0) 1851 #define K03 (((w0 >> ( 0 + 4)) & 1) ? -1 : 0) 1852 #define K04 (((w0 >> ( 0 + 3)) & 1) ? -1 : 0) 1853 #define K05 (((w0 >> ( 0 + 2)) & 1) ? -1 : 0) 1854 #define K06 (((w0 >> ( 0 + 1)) & 1) ? -1 : 0) 1855 #define K07 (((w0 >> ( 8 + 7)) & 1) ? -1 : 0) 1856 #define K08 (((w0 >> ( 8 + 6)) & 1) ? -1 : 0) 1857 #define K09 (((w0 >> ( 8 + 5)) & 1) ? -1 : 0) 1858 #define K10 (((w0 >> ( 8 + 4)) & 1) ? -1 : 0) 1859 #define K11 (((w0 >> ( 8 + 3)) & 1) ? -1 : 0) 1860 #define K12 (((w0 >> ( 8 + 2)) & 1) ? -1 : 0) 1861 #define K13 (((w0 >> ( 8 + 1)) & 1) ? -1 : 0) 1862 #define K14 (((w0 >> (16 + 7)) & 1) ? -1 : 0) 1863 #define K15 (((w0 >> (16 + 6)) & 1) ? -1 : 0) 1864 #define K16 (((w0 >> (16 + 5)) & 1) ? -1 : 0) 1865 #define K17 (((w0 >> (16 + 4)) & 1) ? -1 : 0) 1866 #define K18 (((w0 >> (16 + 3)) & 1) ? -1 : 0) 1867 #define K19 (((w0 >> (16 + 2)) & 1) ? -1 : 0) 1868 #define K20 (((w0 >> (16 + 1)) & 1) ? -1 : 0) 1869 #define K21 (((w0 >> (24 + 7)) & 1) ? -1 : 0) 1870 #define K22 (((w0 >> (24 + 6)) & 1) ? -1 : 0) 1871 #define K23 (((w0 >> (24 + 5)) & 1) ? -1 : 0) 1872 #define K24 (((w0 >> (24 + 4)) & 1) ? -1 : 0) 1873 #define K25 (((w0 >> (24 + 3)) & 1) ? -1 : 0) 1874 #define K26 (((w0 >> (24 + 2)) & 1) ? -1 : 0) 1875 #define K27 (((w0 >> (24 + 1)) & 1) ? -1 : 0) 1876 #define K28 (((w1 >> ( 0 + 7)) & 1) ? -1 : 0) 1877 #define K29 (((w1 >> ( 0 + 6)) & 1) ? -1 : 0) 1878 #define K30 (((w1 >> ( 0 + 5)) & 1) ? -1 : 0) 1879 #define K31 (((w1 >> ( 0 + 4)) & 1) ? -1 : 0) 1880 #define K32 (((w1 >> ( 0 + 3)) & 1) ? -1 : 0) 1881 #define K33 (((w1 >> ( 0 + 2)) & 1) ? -1 : 0) 1882 #define K34 (((w1 >> ( 0 + 1)) & 1) ? -1 : 0) 1883 #define K35 (((w1 >> ( 8 + 7)) & 1) ? -1 : 0) 1884 #define K36 (((w1 >> ( 8 + 6)) & 1) ? -1 : 0) 1885 #define K37 (((w1 >> ( 8 + 5)) & 1) ? -1 : 0) 1886 #define K38 (((w1 >> ( 8 + 4)) & 1) ? -1 : 0) 1887 #define K39 (((w1 >> ( 8 + 3)) & 1) ? -1 : 0) 1888 #define K40 (((w1 >> ( 8 + 2)) & 1) ? -1 : 0) 1889 #define K41 (((w1 >> ( 8 + 1)) & 1) ? -1 : 0) 1890 #define K42 (((w1 >> (16 + 7)) & 1) ? -1 : 0) 1891 #define K43 (((w1 >> (16 + 6)) & 1) ? -1 : 0) 1892 #define K44 (((w1 >> (16 + 5)) & 1) ? -1 : 0) 1893 #define K45 (((w1 >> (16 + 4)) & 1) ? -1 : 0) 1894 #define K46 (((w1 >> (16 + 3)) & 1) ? -1 : 0) 1895 #define K47 (((w1 >> (16 + 2)) & 1) ? -1 : 0) 1896 #define K48 (((w1 >> (16 + 1)) & 1) ? -1 : 0) 1897 #define K49 (((w1 >> (24 + 7)) & 1) ? -1 : 0) 1898 #define K50 (((w1 >> (24 + 6)) & 1) ? -1 : 0) 1899 #define K51 (((w1 >> (24 + 5)) & 1) ? -1 : 0) 1900 #define K52 (((w1 >> (24 + 4)) & 1) ? -1 : 0) 1901 #define K53 (((w1 >> (24 + 3)) & 1) ? -1 : 0) 1902 #define K54 (((w1 >> (24 + 2)) & 1) ? -1 : 0) 1903 #define K55 (((w1 >> (24 + 1)) & 1) ? -1 : 0) 1904 1905 /** 1906 * inner loop 1907 */ 1908 1909 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) 1910 { 1911 u32 k00 = K00; 1912 u32 k01 = K01; 1913 u32 k02 = K02; 1914 u32 k03 = K03; 1915 u32 k04 = K04; 1916 u32 k05 = K05; 1917 u32 k06 = K06; 1918 u32 k07 = K07; 1919 u32 k08 = K08; 1920 u32 k09 = K09; 1921 u32 k10 = K10; 1922 u32 k11 = K11; 1923 u32 k12 = K12; 1924 u32 k13 = K13; 1925 u32 k14 = K14; 1926 u32 k15 = K15; 1927 u32 k16 = K16; 1928 u32 k17 = K17; 1929 u32 k18 = K18; 1930 u32 k19 = K19; 1931 u32 k20 = K20; 1932 u32 k21 = K21; 1933 u32 k22 = K22; 1934 u32 k23 = K23; 1935 u32 k24 = K24; 1936 u32 k25 = K25; 1937 u32 k26 = K26; 1938 u32 k27 = K27; 1939 1940 const u32 pc_pos = il_pos / 32; 1941 1942 k00 |= words_buf_s[pc_pos].b[ 0]; 1943 k01 |= words_buf_s[pc_pos].b[ 1]; 1944 k02 |= words_buf_s[pc_pos].b[ 2]; 1945 k03 |= words_buf_s[pc_pos].b[ 3]; 1946 k04 |= words_buf_s[pc_pos].b[ 4]; 1947 k05 |= words_buf_s[pc_pos].b[ 5]; 1948 k06 |= words_buf_s[pc_pos].b[ 6]; 1949 k07 |= words_buf_s[pc_pos].b[ 7]; 1950 k08 |= words_buf_s[pc_pos].b[ 8]; 1951 k09 |= words_buf_s[pc_pos].b[ 9]; 1952 k10 |= words_buf_s[pc_pos].b[10]; 1953 k11 |= words_buf_s[pc_pos].b[11]; 1954 k12 |= words_buf_s[pc_pos].b[12]; 1955 k13 |= words_buf_s[pc_pos].b[13]; 1956 k14 |= words_buf_s[pc_pos].b[14]; 1957 k15 |= words_buf_s[pc_pos].b[15]; 1958 k16 |= words_buf_s[pc_pos].b[16]; 1959 k17 |= words_buf_s[pc_pos].b[17]; 1960 k18 |= words_buf_s[pc_pos].b[18]; 1961 k19 |= words_buf_s[pc_pos].b[19]; 1962 k20 |= words_buf_s[pc_pos].b[20]; 1963 k21 |= words_buf_s[pc_pos].b[21]; 1964 k22 |= words_buf_s[pc_pos].b[22]; 1965 k23 |= words_buf_s[pc_pos].b[23]; 1966 k24 |= words_buf_s[pc_pos].b[24]; 1967 k25 |= words_buf_s[pc_pos].b[25]; 1968 k26 |= words_buf_s[pc_pos].b[26]; 1969 k27 |= words_buf_s[pc_pos].b[27]; 1970 1971 u32 D00 = d00; 1972 u32 D01 = d01; 1973 u32 D02 = d02; 1974 u32 D03 = d03; 1975 u32 D04 = d04; 1976 u32 D05 = d05; 1977 u32 D06 = d06; 1978 u32 D07 = d07; 1979 u32 D08 = d08; 1980 u32 D09 = d09; 1981 u32 D10 = d10; 1982 u32 D11 = d11; 1983 u32 D12 = d12; 1984 u32 D13 = d13; 1985 u32 D14 = d14; 1986 u32 D15 = d15; 1987 u32 D16 = d16; 1988 u32 D17 = d17; 1989 u32 D18 = d18; 1990 u32 D19 = d19; 1991 u32 D20 = d20; 1992 u32 D21 = d21; 1993 u32 D22 = d22; 1994 u32 D23 = d23; 1995 u32 D24 = d24; 1996 u32 D25 = d25; 1997 u32 D26 = d26; 1998 u32 D27 = d27; 1999 u32 D28 = d28; 2000 u32 D29 = d29; 2001 u32 D30 = d30; 2002 u32 D31 = d31; 2003 u32 D32 = d32; 2004 u32 D33 = d33; 2005 u32 D34 = d34; 2006 u32 D35 = d35; 2007 u32 D36 = d36; 2008 u32 D37 = d37; 2009 u32 D38 = d38; 2010 u32 D39 = d39; 2011 u32 D40 = d40; 2012 u32 D41 = d41; 2013 u32 D42 = d42; 2014 u32 D43 = d43; 2015 u32 D44 = d44; 2016 u32 D45 = d45; 2017 u32 D46 = d46; 2018 u32 D47 = d47; 2019 u32 D48 = d48; 2020 u32 D49 = d49; 2021 u32 D50 = d50; 2022 u32 D51 = d51; 2023 u32 D52 = d52; 2024 u32 D53 = d53; 2025 u32 D54 = d54; 2026 u32 D55 = d55; 2027 u32 D56 = d56; 2028 u32 D57 = d57; 2029 u32 D58 = d58; 2030 u32 D59 = d59; 2031 u32 D60 = d60; 2032 u32 D61 = d61; 2033 u32 D62 = d62; 2034 u32 D63 = d63; 2035 2036 DES 2037 ( 2038 k00, k01, k02, k03, k04, k05, k06, 2039 k07, k08, k09, k10, k11, k12, k13, 2040 k14, k15, k16, k17, k18, k19, k20, 2041 k21, k22, k23, k24, k25, k26, k27, 2042 K28, K29, K30, K31, K32, K33, K34, 2043 K35, K36, K37, K38, K39, K40, K41, 2044 K42, K43, K44, K45, K46, K47, K48, 2045 K49, K50, K51, K52, K53, K54, K55, 2046 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, 2047 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, 2048 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, 2049 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, 2050 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, 2051 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, 2052 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, 2053 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 2054 ); 2055 2056 u32 out[64]; 2057 2058 out[ 0] = D00; 2059 out[ 1] = D01; 2060 out[ 2] = D02; 2061 out[ 3] = D03; 2062 out[ 4] = D04; 2063 out[ 5] = D05; 2064 out[ 6] = D06; 2065 out[ 7] = D07; 2066 out[ 8] = D08; 2067 out[ 9] = D09; 2068 out[10] = D10; 2069 out[11] = D11; 2070 out[12] = D12; 2071 out[13] = D13; 2072 out[14] = D14; 2073 out[15] = D15; 2074 out[16] = D16; 2075 out[17] = D17; 2076 out[18] = D18; 2077 out[19] = D19; 2078 out[20] = D20; 2079 out[21] = D21; 2080 out[22] = D22; 2081 out[23] = D23; 2082 out[24] = D24; 2083 out[25] = D25; 2084 out[26] = D26; 2085 out[27] = D27; 2086 out[28] = D28; 2087 out[29] = D29; 2088 out[30] = D30; 2089 out[31] = D31; 2090 out[32] = D32; 2091 out[33] = D33; 2092 out[34] = D34; 2093 out[35] = D35; 2094 out[36] = D36; 2095 out[37] = D37; 2096 out[38] = D38; 2097 out[39] = D39; 2098 out[40] = D40; 2099 out[41] = D41; 2100 out[42] = D42; 2101 out[43] = D43; 2102 out[44] = D44; 2103 out[45] = D45; 2104 out[46] = D46; 2105 out[47] = D47; 2106 out[48] = D48; 2107 out[49] = D49; 2108 out[50] = D50; 2109 out[51] = D51; 2110 out[52] = D52; 2111 out[53] = D53; 2112 out[54] = D54; 2113 out[55] = D55; 2114 out[56] = D56; 2115 out[57] = D57; 2116 out[58] = D58; 2117 out[59] = D59; 2118 out[60] = D60; 2119 out[61] = D61; 2120 out[62] = D62; 2121 out[63] = D63; 2122 2123 if (digests_cnt < 16) 2124 { 2125 for (u32 d = 0; d < digests_cnt; d++) 2126 { 2127 const u32 final_hash_pos = DIGESTS_OFFSET + d; 2128 2129 if (hashes_shown[final_hash_pos]) continue; 2130 2131 u32 search[2]; 2132 2133 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; 2134 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; 2135 2136 u32 tmpResult = 0; 2137 2138 #ifdef _unroll 2139 #pragma unroll 2140 #endif 2141 for (int i = 0; i < 32; i++) 2142 { 2143 const u32 b0 = -((search[0] >> i) & 1); 2144 const u32 b1 = -((search[1] >> i) & 1); 2145 2146 tmpResult |= out[ 0 + i] ^ b0; 2147 tmpResult |= out[32 + i] ^ b1; 2148 } 2149 2150 if (tmpResult == 0xffffffff) continue; 2151 2152 const u32 slice = ffz (tmpResult); 2153 2154 const u32 r0 = search[0]; 2155 const u32 r1 = search[1]; 2156 const u32 r2 = 0; 2157 const u32 r3 = 0; 2158 2159 #ifdef KERNEL_STATIC 2160 #include COMPARE_M 2161 #endif 2162 } 2163 } 2164 else 2165 { 2166 u32 out0[32]; 2167 u32 out1[32]; 2168 2169 #ifdef _unroll 2170 #pragma unroll 2171 #endif 2172 for (int i = 0; i < 32; i++) 2173 { 2174 out0[i] = out[ 0 + i]; 2175 out1[i] = out[32 + i]; 2176 } 2177 2178 transpose32c (out0); 2179 transpose32c (out1); 2180 2181 #ifdef _unroll 2182 #pragma unroll 2183 #endif 2184 for (int slice = 0; slice < 32; slice++) 2185 { 2186 const u32 r0 = out0[slice]; 2187 const u32 r1 = out1[slice]; 2188 const u32 r2 = 0; 2189 const u32 r3 = 0; 2190 2191 #ifdef KERNEL_STATIC 2192 #include COMPARE_M 2193 #endif 2194 } 2195 } 2196 } 2197} 2198 2199KERNEL_FQ void m14000_sxx (KERN_ATTR_BITSLICE ()) 2200{ 2201 /** 2202 * base 2203 */ 2204 2205 const u64 gid = get_global_id (0); 2206 const u64 lid = get_local_id (0); 2207 2208 /** 2209 * salt 2210 */ 2211 2212 const u32 salt0 = salt_bufs[SALT_POS].salt_buf_pc[0]; 2213 const u32 salt1 = salt_bufs[SALT_POS].salt_buf_pc[1]; 2214 2215 // salt1 first, because this is a 64 bit value actually 2216 2217 const u32 d00 = (((salt1 >> 0) & 1) ? -1 : 0); 2218 const u32 d01 = (((salt1 >> 1) & 1) ? -1 : 0); 2219 const u32 d02 = (((salt1 >> 2) & 1) ? -1 : 0); 2220 const u32 d03 = (((salt1 >> 3) & 1) ? -1 : 0); 2221 const u32 d04 = (((salt1 >> 4) & 1) ? -1 : 0); 2222 const u32 d05 = (((salt1 >> 5) & 1) ? -1 : 0); 2223 const u32 d06 = (((salt1 >> 6) & 1) ? -1 : 0); 2224 const u32 d07 = (((salt1 >> 7) & 1) ? -1 : 0); 2225 const u32 d08 = (((salt1 >> 8) & 1) ? -1 : 0); 2226 const u32 d09 = (((salt1 >> 9) & 1) ? -1 : 0); 2227 const u32 d10 = (((salt1 >> 10) & 1) ? -1 : 0); 2228 const u32 d11 = (((salt1 >> 11) & 1) ? -1 : 0); 2229 const u32 d12 = (((salt1 >> 12) & 1) ? -1 : 0); 2230 const u32 d13 = (((salt1 >> 13) & 1) ? -1 : 0); 2231 const u32 d14 = (((salt1 >> 14) & 1) ? -1 : 0); 2232 const u32 d15 = (((salt1 >> 15) & 1) ? -1 : 0); 2233 const u32 d16 = (((salt1 >> 16) & 1) ? -1 : 0); 2234 const u32 d17 = (((salt1 >> 17) & 1) ? -1 : 0); 2235 const u32 d18 = (((salt1 >> 18) & 1) ? -1 : 0); 2236 const u32 d19 = (((salt1 >> 19) & 1) ? -1 : 0); 2237 const u32 d20 = (((salt1 >> 20) & 1) ? -1 : 0); 2238 const u32 d21 = (((salt1 >> 21) & 1) ? -1 : 0); 2239 const u32 d22 = (((salt1 >> 22) & 1) ? -1 : 0); 2240 const u32 d23 = (((salt1 >> 23) & 1) ? -1 : 0); 2241 const u32 d24 = (((salt1 >> 24) & 1) ? -1 : 0); 2242 const u32 d25 = (((salt1 >> 25) & 1) ? -1 : 0); 2243 const u32 d26 = (((salt1 >> 26) & 1) ? -1 : 0); 2244 const u32 d27 = (((salt1 >> 27) & 1) ? -1 : 0); 2245 const u32 d28 = (((salt1 >> 28) & 1) ? -1 : 0); 2246 const u32 d29 = (((salt1 >> 29) & 1) ? -1 : 0); 2247 const u32 d30 = (((salt1 >> 30) & 1) ? -1 : 0); 2248 const u32 d31 = (((salt1 >> 31) & 1) ? -1 : 0); 2249 const u32 d32 = (((salt0 >> 0) & 1) ? -1 : 0); 2250 const u32 d33 = (((salt0 >> 1) & 1) ? -1 : 0); 2251 const u32 d34 = (((salt0 >> 2) & 1) ? -1 : 0); 2252 const u32 d35 = (((salt0 >> 3) & 1) ? -1 : 0); 2253 const u32 d36 = (((salt0 >> 4) & 1) ? -1 : 0); 2254 const u32 d37 = (((salt0 >> 5) & 1) ? -1 : 0); 2255 const u32 d38 = (((salt0 >> 6) & 1) ? -1 : 0); 2256 const u32 d39 = (((salt0 >> 7) & 1) ? -1 : 0); 2257 const u32 d40 = (((salt0 >> 8) & 1) ? -1 : 0); 2258 const u32 d41 = (((salt0 >> 9) & 1) ? -1 : 0); 2259 const u32 d42 = (((salt0 >> 10) & 1) ? -1 : 0); 2260 const u32 d43 = (((salt0 >> 11) & 1) ? -1 : 0); 2261 const u32 d44 = (((salt0 >> 12) & 1) ? -1 : 0); 2262 const u32 d45 = (((salt0 >> 13) & 1) ? -1 : 0); 2263 const u32 d46 = (((salt0 >> 14) & 1) ? -1 : 0); 2264 const u32 d47 = (((salt0 >> 15) & 1) ? -1 : 0); 2265 const u32 d48 = (((salt0 >> 16) & 1) ? -1 : 0); 2266 const u32 d49 = (((salt0 >> 17) & 1) ? -1 : 0); 2267 const u32 d50 = (((salt0 >> 18) & 1) ? -1 : 0); 2268 const u32 d51 = (((salt0 >> 19) & 1) ? -1 : 0); 2269 const u32 d52 = (((salt0 >> 20) & 1) ? -1 : 0); 2270 const u32 d53 = (((salt0 >> 21) & 1) ? -1 : 0); 2271 const u32 d54 = (((salt0 >> 22) & 1) ? -1 : 0); 2272 const u32 d55 = (((salt0 >> 23) & 1) ? -1 : 0); 2273 const u32 d56 = (((salt0 >> 24) & 1) ? -1 : 0); 2274 const u32 d57 = (((salt0 >> 25) & 1) ? -1 : 0); 2275 const u32 d58 = (((salt0 >> 26) & 1) ? -1 : 0); 2276 const u32 d59 = (((salt0 >> 27) & 1) ? -1 : 0); 2277 const u32 d60 = (((salt0 >> 28) & 1) ? -1 : 0); 2278 const u32 d61 = (((salt0 >> 29) & 1) ? -1 : 0); 2279 const u32 d62 = (((salt0 >> 30) & 1) ? -1 : 0); 2280 const u32 d63 = (((salt0 >> 31) & 1) ? -1 : 0); 2281 2282 /** 2283 * digest 2284 */ 2285 2286 const u32 s0 = digests_buf[0].digest_buf[0]; 2287 const u32 s1 = digests_buf[0].digest_buf[1]; 2288 2289 const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0); 2290 const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0); 2291 const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0); 2292 const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0); 2293 const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0); 2294 const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0); 2295 const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0); 2296 const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0); 2297 const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0); 2298 const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0); 2299 const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0); 2300 const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0); 2301 const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0); 2302 const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0); 2303 const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0); 2304 const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0); 2305 const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0); 2306 const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0); 2307 const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0); 2308 const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0); 2309 const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0); 2310 const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0); 2311 const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0); 2312 const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0); 2313 const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0); 2314 const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0); 2315 const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0); 2316 const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0); 2317 const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0); 2318 const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0); 2319 const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0); 2320 const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0); 2321 const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0); 2322 const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0); 2323 const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0); 2324 const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0); 2325 const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0); 2326 const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0); 2327 const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0); 2328 const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0); 2329 const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0); 2330 const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0); 2331 const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0); 2332 const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0); 2333 const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0); 2334 const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0); 2335 const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0); 2336 const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0); 2337 const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0); 2338 const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0); 2339 const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0); 2340 const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0); 2341 const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0); 2342 const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0); 2343 const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0); 2344 const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0); 2345 const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0); 2346 const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0); 2347 const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0); 2348 const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0); 2349 const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0); 2350 const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0); 2351 const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0); 2352 const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0); 2353 2354 /** 2355 * base 2356 */ 2357 2358 const u32 w0 = pws[gid].i[0]; 2359 const u32 w1 = pws[gid].i[1]; 2360 2361 #define K00 (((w0 >> ( 0 + 7)) & 1) ? -1 : 0) 2362 #define K01 (((w0 >> ( 0 + 6)) & 1) ? -1 : 0) 2363 #define K02 (((w0 >> ( 0 + 5)) & 1) ? -1 : 0) 2364 #define K03 (((w0 >> ( 0 + 4)) & 1) ? -1 : 0) 2365 #define K04 (((w0 >> ( 0 + 3)) & 1) ? -1 : 0) 2366 #define K05 (((w0 >> ( 0 + 2)) & 1) ? -1 : 0) 2367 #define K06 (((w0 >> ( 0 + 1)) & 1) ? -1 : 0) 2368 #define K07 (((w0 >> ( 8 + 7)) & 1) ? -1 : 0) 2369 #define K08 (((w0 >> ( 8 + 6)) & 1) ? -1 : 0) 2370 #define K09 (((w0 >> ( 8 + 5)) & 1) ? -1 : 0) 2371 #define K10 (((w0 >> ( 8 + 4)) & 1) ? -1 : 0) 2372 #define K11 (((w0 >> ( 8 + 3)) & 1) ? -1 : 0) 2373 #define K12 (((w0 >> ( 8 + 2)) & 1) ? -1 : 0) 2374 #define K13 (((w0 >> ( 8 + 1)) & 1) ? -1 : 0) 2375 #define K14 (((w0 >> (16 + 7)) & 1) ? -1 : 0) 2376 #define K15 (((w0 >> (16 + 6)) & 1) ? -1 : 0) 2377 #define K16 (((w0 >> (16 + 5)) & 1) ? -1 : 0) 2378 #define K17 (((w0 >> (16 + 4)) & 1) ? -1 : 0) 2379 #define K18 (((w0 >> (16 + 3)) & 1) ? -1 : 0) 2380 #define K19 (((w0 >> (16 + 2)) & 1) ? -1 : 0) 2381 #define K20 (((w0 >> (16 + 1)) & 1) ? -1 : 0) 2382 #define K21 (((w0 >> (24 + 7)) & 1) ? -1 : 0) 2383 #define K22 (((w0 >> (24 + 6)) & 1) ? -1 : 0) 2384 #define K23 (((w0 >> (24 + 5)) & 1) ? -1 : 0) 2385 #define K24 (((w0 >> (24 + 4)) & 1) ? -1 : 0) 2386 #define K25 (((w0 >> (24 + 3)) & 1) ? -1 : 0) 2387 #define K26 (((w0 >> (24 + 2)) & 1) ? -1 : 0) 2388 #define K27 (((w0 >> (24 + 1)) & 1) ? -1 : 0) 2389 #define K28 (((w1 >> ( 0 + 7)) & 1) ? -1 : 0) 2390 #define K29 (((w1 >> ( 0 + 6)) & 1) ? -1 : 0) 2391 #define K30 (((w1 >> ( 0 + 5)) & 1) ? -1 : 0) 2392 #define K31 (((w1 >> ( 0 + 4)) & 1) ? -1 : 0) 2393 #define K32 (((w1 >> ( 0 + 3)) & 1) ? -1 : 0) 2394 #define K33 (((w1 >> ( 0 + 2)) & 1) ? -1 : 0) 2395 #define K34 (((w1 >> ( 0 + 1)) & 1) ? -1 : 0) 2396 #define K35 (((w1 >> ( 8 + 7)) & 1) ? -1 : 0) 2397 #define K36 (((w1 >> ( 8 + 6)) & 1) ? -1 : 0) 2398 #define K37 (((w1 >> ( 8 + 5)) & 1) ? -1 : 0) 2399 #define K38 (((w1 >> ( 8 + 4)) & 1) ? -1 : 0) 2400 #define K39 (((w1 >> ( 8 + 3)) & 1) ? -1 : 0) 2401 #define K40 (((w1 >> ( 8 + 2)) & 1) ? -1 : 0) 2402 #define K41 (((w1 >> ( 8 + 1)) & 1) ? -1 : 0) 2403 #define K42 (((w1 >> (16 + 7)) & 1) ? -1 : 0) 2404 #define K43 (((w1 >> (16 + 6)) & 1) ? -1 : 0) 2405 #define K44 (((w1 >> (16 + 5)) & 1) ? -1 : 0) 2406 #define K45 (((w1 >> (16 + 4)) & 1) ? -1 : 0) 2407 #define K46 (((w1 >> (16 + 3)) & 1) ? -1 : 0) 2408 #define K47 (((w1 >> (16 + 2)) & 1) ? -1 : 0) 2409 #define K48 (((w1 >> (16 + 1)) & 1) ? -1 : 0) 2410 #define K49 (((w1 >> (24 + 7)) & 1) ? -1 : 0) 2411 #define K50 (((w1 >> (24 + 6)) & 1) ? -1 : 0) 2412 #define K51 (((w1 >> (24 + 5)) & 1) ? -1 : 0) 2413 #define K52 (((w1 >> (24 + 4)) & 1) ? -1 : 0) 2414 #define K53 (((w1 >> (24 + 3)) & 1) ? -1 : 0) 2415 #define K54 (((w1 >> (24 + 2)) & 1) ? -1 : 0) 2416 #define K55 (((w1 >> (24 + 1)) & 1) ? -1 : 0) 2417 2418 /** 2419 * inner loop 2420 */ 2421 2422 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) 2423 { 2424 u32 k00 = K00; 2425 u32 k01 = K01; 2426 u32 k02 = K02; 2427 u32 k03 = K03; 2428 u32 k04 = K04; 2429 u32 k05 = K05; 2430 u32 k06 = K06; 2431 u32 k07 = K07; 2432 u32 k08 = K08; 2433 u32 k09 = K09; 2434 u32 k10 = K10; 2435 u32 k11 = K11; 2436 u32 k12 = K12; 2437 u32 k13 = K13; 2438 u32 k14 = K14; 2439 u32 k15 = K15; 2440 u32 k16 = K16; 2441 u32 k17 = K17; 2442 u32 k18 = K18; 2443 u32 k19 = K19; 2444 u32 k20 = K20; 2445 u32 k21 = K21; 2446 u32 k22 = K22; 2447 u32 k23 = K23; 2448 u32 k24 = K24; 2449 u32 k25 = K25; 2450 u32 k26 = K26; 2451 u32 k27 = K27; 2452 2453 const u32 pc_pos = il_pos / 32; 2454 2455 k00 |= words_buf_s[pc_pos].b[ 0]; 2456 k01 |= words_buf_s[pc_pos].b[ 1]; 2457 k02 |= words_buf_s[pc_pos].b[ 2]; 2458 k03 |= words_buf_s[pc_pos].b[ 3]; 2459 k04 |= words_buf_s[pc_pos].b[ 4]; 2460 k05 |= words_buf_s[pc_pos].b[ 5]; 2461 k06 |= words_buf_s[pc_pos].b[ 6]; 2462 k07 |= words_buf_s[pc_pos].b[ 7]; 2463 k08 |= words_buf_s[pc_pos].b[ 8]; 2464 k09 |= words_buf_s[pc_pos].b[ 9]; 2465 k10 |= words_buf_s[pc_pos].b[10]; 2466 k11 |= words_buf_s[pc_pos].b[11]; 2467 k12 |= words_buf_s[pc_pos].b[12]; 2468 k13 |= words_buf_s[pc_pos].b[13]; 2469 k14 |= words_buf_s[pc_pos].b[14]; 2470 k15 |= words_buf_s[pc_pos].b[15]; 2471 k16 |= words_buf_s[pc_pos].b[16]; 2472 k17 |= words_buf_s[pc_pos].b[17]; 2473 k18 |= words_buf_s[pc_pos].b[18]; 2474 k19 |= words_buf_s[pc_pos].b[19]; 2475 k20 |= words_buf_s[pc_pos].b[20]; 2476 k21 |= words_buf_s[pc_pos].b[21]; 2477 k22 |= words_buf_s[pc_pos].b[22]; 2478 k23 |= words_buf_s[pc_pos].b[23]; 2479 k24 |= words_buf_s[pc_pos].b[24]; 2480 k25 |= words_buf_s[pc_pos].b[25]; 2481 k26 |= words_buf_s[pc_pos].b[26]; 2482 k27 |= words_buf_s[pc_pos].b[27]; 2483 2484 u32 D00 = d00; 2485 u32 D01 = d01; 2486 u32 D02 = d02; 2487 u32 D03 = d03; 2488 u32 D04 = d04; 2489 u32 D05 = d05; 2490 u32 D06 = d06; 2491 u32 D07 = d07; 2492 u32 D08 = d08; 2493 u32 D09 = d09; 2494 u32 D10 = d10; 2495 u32 D11 = d11; 2496 u32 D12 = d12; 2497 u32 D13 = d13; 2498 u32 D14 = d14; 2499 u32 D15 = d15; 2500 u32 D16 = d16; 2501 u32 D17 = d17; 2502 u32 D18 = d18; 2503 u32 D19 = d19; 2504 u32 D20 = d20; 2505 u32 D21 = d21; 2506 u32 D22 = d22; 2507 u32 D23 = d23; 2508 u32 D24 = d24; 2509 u32 D25 = d25; 2510 u32 D26 = d26; 2511 u32 D27 = d27; 2512 u32 D28 = d28; 2513 u32 D29 = d29; 2514 u32 D30 = d30; 2515 u32 D31 = d31; 2516 u32 D32 = d32; 2517 u32 D33 = d33; 2518 u32 D34 = d34; 2519 u32 D35 = d35; 2520 u32 D36 = d36; 2521 u32 D37 = d37; 2522 u32 D38 = d38; 2523 u32 D39 = d39; 2524 u32 D40 = d40; 2525 u32 D41 = d41; 2526 u32 D42 = d42; 2527 u32 D43 = d43; 2528 u32 D44 = d44; 2529 u32 D45 = d45; 2530 u32 D46 = d46; 2531 u32 D47 = d47; 2532 u32 D48 = d48; 2533 u32 D49 = d49; 2534 u32 D50 = d50; 2535 u32 D51 = d51; 2536 u32 D52 = d52; 2537 u32 D53 = d53; 2538 u32 D54 = d54; 2539 u32 D55 = d55; 2540 u32 D56 = d56; 2541 u32 D57 = d57; 2542 u32 D58 = d58; 2543 u32 D59 = d59; 2544 u32 D60 = d60; 2545 u32 D61 = d61; 2546 u32 D62 = d62; 2547 u32 D63 = d63; 2548 2549 DES 2550 ( 2551 k00, k01, k02, k03, k04, k05, k06, 2552 k07, k08, k09, k10, k11, k12, k13, 2553 k14, k15, k16, k17, k18, k19, k20, 2554 k21, k22, k23, k24, k25, k26, k27, 2555 K28, K29, K30, K31, K32, K33, K34, 2556 K35, K36, K37, K38, K39, K40, K41, 2557 K42, K43, K44, K45, K46, K47, K48, 2558 K49, K50, K51, K52, K53, K54, K55, 2559 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, 2560 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, 2561 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, 2562 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, 2563 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, 2564 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, 2565 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, 2566 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 2567 ); 2568 2569 u32 tmpResult = 0; 2570 2571 tmpResult |= D00 ^ S00; 2572 tmpResult |= D01 ^ S01; 2573 tmpResult |= D02 ^ S02; 2574 tmpResult |= D03 ^ S03; 2575 tmpResult |= D04 ^ S04; 2576 tmpResult |= D05 ^ S05; 2577 tmpResult |= D06 ^ S06; 2578 tmpResult |= D07 ^ S07; 2579 tmpResult |= D08 ^ S08; 2580 tmpResult |= D09 ^ S09; 2581 tmpResult |= D10 ^ S10; 2582 tmpResult |= D11 ^ S11; 2583 tmpResult |= D12 ^ S12; 2584 tmpResult |= D13 ^ S13; 2585 tmpResult |= D14 ^ S14; 2586 tmpResult |= D15 ^ S15; 2587 2588 if (tmpResult == 0xffffffff) continue; 2589 2590 tmpResult |= D16 ^ S16; 2591 tmpResult |= D17 ^ S17; 2592 tmpResult |= D18 ^ S18; 2593 tmpResult |= D19 ^ S19; 2594 tmpResult |= D20 ^ S20; 2595 tmpResult |= D21 ^ S21; 2596 tmpResult |= D22 ^ S22; 2597 tmpResult |= D23 ^ S23; 2598 tmpResult |= D24 ^ S24; 2599 tmpResult |= D25 ^ S25; 2600 tmpResult |= D26 ^ S26; 2601 tmpResult |= D27 ^ S27; 2602 tmpResult |= D28 ^ S28; 2603 tmpResult |= D29 ^ S29; 2604 tmpResult |= D30 ^ S30; 2605 tmpResult |= D31 ^ S31; 2606 2607 if (tmpResult == 0xffffffff) continue; 2608 2609 tmpResult |= D32 ^ S32; 2610 tmpResult |= D33 ^ S33; 2611 tmpResult |= D34 ^ S34; 2612 tmpResult |= D35 ^ S35; 2613 tmpResult |= D36 ^ S36; 2614 tmpResult |= D37 ^ S37; 2615 tmpResult |= D38 ^ S38; 2616 tmpResult |= D39 ^ S39; 2617 tmpResult |= D40 ^ S40; 2618 tmpResult |= D41 ^ S41; 2619 tmpResult |= D42 ^ S42; 2620 tmpResult |= D43 ^ S43; 2621 tmpResult |= D44 ^ S44; 2622 tmpResult |= D45 ^ S45; 2623 tmpResult |= D46 ^ S46; 2624 tmpResult |= D47 ^ S47; 2625 2626 if (tmpResult == 0xffffffff) continue; 2627 2628 tmpResult |= D48 ^ S48; 2629 tmpResult |= D49 ^ S49; 2630 tmpResult |= D50 ^ S50; 2631 tmpResult |= D51 ^ S51; 2632 tmpResult |= D52 ^ S52; 2633 tmpResult |= D53 ^ S53; 2634 tmpResult |= D54 ^ S54; 2635 tmpResult |= D55 ^ S55; 2636 tmpResult |= D56 ^ S56; 2637 tmpResult |= D57 ^ S57; 2638 tmpResult |= D58 ^ S58; 2639 tmpResult |= D59 ^ S59; 2640 tmpResult |= D60 ^ S60; 2641 tmpResult |= D61 ^ S61; 2642 tmpResult |= D62 ^ S62; 2643 tmpResult |= D63 ^ S63; 2644 2645 if (tmpResult == 0xffffffff) continue; 2646 2647 const u32 slice = ffz (tmpResult); 2648 2649 #ifdef KERNEL_STATIC 2650 #include COMPARE_S 2651 #endif 2652 } 2653} 2654