1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below 5 * : sboxes for others were takes fron JtR, license below 6 */ 7 8#ifdef KERNEL_STATIC 9#include "inc_vendor.h" 10#include "inc_types.h" 11#include "inc_platform.cl" 12#include "inc_common.cl" 13#endif 14 15#define COMPARE_S "inc_comp_single_bs.cl" 16#define COMPARE_M "inc_comp_multi_bs.cl" 17 18#ifdef IS_NV 19#define KXX_DECL 20#endif 21 22#if (defined IS_AMD || defined IS_HIP) 23#define KXX_DECL 24#endif 25 26#ifdef IS_GENERIC 27#define KXX_DECL 28#endif 29 30#ifdef IS_NV 31 32#if CUDA_ARCH >= 500 33 34// 35// Bitslice DES S-boxes with LOP3.LUT instructions 36// For NVIDIA Maxwell architecture and CUDA 7.5 RC 37// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19 38// 39// Gate counts: 25 24 25 18 25 24 24 23 40// Average: 23.5 41// Depth: 8 7 7 6 8 10 10 8 42// Average: 8 43// 44// Note that same S-box function with a lower gate count isn't necessarily faster. 45// 46// These Boolean expressions corresponding to DES S-boxes were 47// discovered by <deeplearningjohndoe at gmail.com> 48// 49// This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com> 50// Redistribution and use in source and binary forms, with or without 51// modification, are permitted. 52// 53// The underlying mathematical formulas are NOT copyrighted. 54// 55 56#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d)); 57 58DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 59{ 60 LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) 61 LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) 62 LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) 63 LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) 64 LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) 65 LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) 66 LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) 67 LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) 68 LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) 69 LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) 70 LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) 71 LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) 72 LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) 73 LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) 74 LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) 75 LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) 76 LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) 77 LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) 78 LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) 79 LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) 80 LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) 81 LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) 82 LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) 83 LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) 84 LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) 85 86 *out1 ^= x1; 87 *out2 ^= x2; 88 *out3 ^= x3; 89 *out4 ^= x4; 90} 91 92DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 93{ 94 LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) 95 LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) 96 LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) 97 LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) 98 LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) 99 LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) 100 LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) 101 LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) 102 LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) 103 LUT(x3333CCCC00000000, a2, a5, a6, 0x14) 104 LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) 105 LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) 106 LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) 107 LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) 108 LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) 109 LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) 110 LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) 111 LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) 112 LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) 113 LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) 114 LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) 115 LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) 116 LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) 117 LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) 118 119 *out1 ^= x1; 120 *out2 ^= x2; 121 *out3 ^= x3; 122 *out4 ^= x4; 123} 124 125DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 126{ 127 LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) 128 LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) 129 LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) 130 LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) 131 LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) 132 LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) 133 LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) 134 LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) 135 LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) 136 LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) 137 LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) 138 LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) 139 LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) 140 LUT(x500F500F500F500F, a1, a3, a4, 0x98) 141 LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) 142 LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) 143 LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) 144 LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) 145 LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) 146 LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) 147 LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) 148 LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) 149 LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) 150 LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) 151 LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) 152 153 *out1 ^= x1; 154 *out2 ^= x2; 155 *out3 ^= x3; 156 *out4 ^= x4; 157} 158 159DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 160{ 161 LUT(x55F055F055F055F0, a1, a3, a4, 0x72) 162 LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) 163 LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) 164 LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) 165 LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) 166 LUT(x9999666699996666, a1, a2, a5, 0x69) 167 LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) 168 LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) 169 LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) 170 LUT(x4848484848484848, a1, a2, a3, 0x12) 171 LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) 172 LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) 173 LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) 174 LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) 175 LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) 176 LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) 177 LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) 178 LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) 179 180 *out1 ^= x1; 181 *out2 ^= x2; 182 *out3 ^= x3; 183 *out4 ^= x4; 184} 185 186DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 187{ 188 LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) 189 LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) 190 LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) 191 LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) 192 LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) 193 LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) 194 LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) 195 LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) 196 LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) 197 LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) 198 LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) 199 LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) 200 LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) 201 LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) 202 LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) 203 LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) 204 LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) 205 LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) 206 LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) 207 LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) 208 LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) 209 LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) 210 LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) 211 LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) 212 LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) 213 214 *out1 ^= x1; 215 *out2 ^= x2; 216 *out3 ^= x3; 217 *out4 ^= x4; 218} 219 220DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 221{ 222 LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) 223 LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) 224 LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) 225 LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) 226 LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) 227 LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) 228 LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) 229 LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) 230 LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) 231 LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) 232 LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) 233 LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) 234 LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) 235 LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) 236 LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) 237 LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) 238 LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) 239 LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) 240 LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) 241 LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) 242 LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) 243 LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) 244 LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) 245 LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) 246 247 *out1 ^= x1; 248 *out2 ^= x2; 249 *out3 ^= x3; 250 *out4 ^= x4; 251} 252 253DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 254{ 255 LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) 256 LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) 257 LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) 258 LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) 259 LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) 260 LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) 261 LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) 262 LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) 263 LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) 264 LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) 265 LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) 266 LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) 267 LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) 268 LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) 269 LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) 270 LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) 271 LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) 272 LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) 273 LUT(xA050A050A050A050, a1, a3, a4, 0x21) 274 LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) 275 LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) 276 LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) 277 LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) 278 LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) 279 280 *out1 ^= x1; 281 *out2 ^= x2; 282 *out3 ^= x3; 283 *out4 ^= x4; 284} 285 286DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 287{ 288 LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) 289 LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) 290 LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) 291 LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) 292 LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) 293 LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) 294 LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) 295 LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) 296 LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) 297 LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) 298 LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) 299 LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) 300 LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) 301 LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) 302 LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) 303 LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) 304 LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) 305 LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) 306 LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) 307 LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) 308 LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) 309 LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) 310 LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) 311 312 *out1 ^= x1; 313 *out2 ^= x2; 314 *out3 ^= x3; 315 *out4 ^= x4; 316} 317 318#else 319 320/* 321 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC 322 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. 323 * 324 * Gate counts: 49 44 46 33 48 46 46 41 325 * Average: 44.125 326 * 327 * Several same-gate-count expressions for each S-box are included (for use on 328 * different CPUs/GPUs). 329 * 330 * These Boolean expressions corresponding to DES S-boxes have been generated 331 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's 332 * John the Ripper password cracker: http://www.openwall.com/john/ 333 * Being mathematical formulas, they are not copyrighted and are free for reuse 334 * by anyone. 335 * 336 * This file (a specific representation of the S-box expressions, surrounding 337 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>. 338 * Redistribution and use in source and binary forms, with or without 339 * modification, are permitted. (This is a heavily cut-down "BSD license".) 340 * 341 * The effort has been sponsored by Rapid7: http://www.rapid7.com 342 */ 343 344DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 345{ 346 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, 347 x25202160; 348 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; 349 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; 350 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; 351 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; 352 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; 353 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; 354 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; 355 u32 x00, x01, x10, x11, x20, x21, x30, x31; 356 357 x55005500 = a1 & ~a5; 358 x5A0F5A0F = a4 ^ x55005500; 359 x3333FFFF = a3 | a6; 360 x66666666 = a1 ^ a3; 361 x22226666 = x3333FFFF & x66666666; 362 x2D2D6969 = a4 ^ x22226666; 363 x25202160 = x2D2D6969 & ~x5A0F5A0F; 364 365 x00FFFF00 = a5 ^ a6; 366 x33CCCC33 = a3 ^ x00FFFF00; 367 x4803120C = x5A0F5A0F & ~x33CCCC33; 368 x2222FFFF = a6 | x22226666; 369 x6A21EDF3 = x4803120C ^ x2222FFFF; 370 x4A01CC93 = x6A21EDF3 & ~x25202160; 371 372 x5555FFFF = a1 | a6; 373 x7F75FFFF = x6A21EDF3 | x5555FFFF; 374 x00D20096 = a5 & ~x2D2D6969; 375 x7FA7FF69 = x7F75FFFF ^ x00D20096; 376 377 x0A0A0000 = a4 & ~x5555FFFF; 378 x0AD80096 = x00D20096 ^ x0A0A0000; 379 x00999900 = x00FFFF00 & ~x66666666; 380 x0AD99996 = x0AD80096 | x00999900; 381 382 x22332233 = a3 & ~x55005500; 383 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; 384 x054885C0 = x257AA5F0 & ~x22332233; 385 xFAB77A3F = ~x054885C0; 386 x2221EDF3 = x3333FFFF & x6A21EDF3; 387 xD89697CC = xFAB77A3F ^ x2221EDF3; 388 x20 = x7FA7FF69 & ~a2; 389 x21 = x20 ^ xD89697CC; 390 *out3 ^= x21; 391 392 x05B77AC0 = x00FFFF00 ^ x054885C0; 393 x05F77AD6 = x00D20096 | x05B77AC0; 394 x36C48529 = x3333FFFF ^ x05F77AD6; 395 x6391D07C = a1 ^ x36C48529; 396 xBB0747B0 = xD89697CC ^ x6391D07C; 397 x00 = x25202160 | a2; 398 x01 = x00 ^ xBB0747B0; 399 *out1 ^= x01; 400 401 x4C460000 = x3333FFFF ^ x7F75FFFF; 402 x4EDF9996 = x0AD99996 | x4C460000; 403 x2D4E49EA = x6391D07C ^ x4EDF9996; 404 xBBFFFFB0 = x00FFFF00 | xBB0747B0; 405 x96B1B65A = x2D4E49EA ^ xBBFFFFB0; 406 x10 = x4A01CC93 | a2; 407 x11 = x10 ^ x96B1B65A; 408 *out2 ^= x11; 409 410 x5AFF5AFF = a5 | x5A0F5A0F; 411 x52B11215 = x5AFF5AFF & ~x2D4E49EA; 412 x4201C010 = x4A01CC93 & x6391D07C; 413 x10B0D205 = x52B11215 ^ x4201C010; 414 x30 = x10B0D205 | a2; 415 x31 = x30 ^ x0AD99996; 416 *out4 ^= x31; 417} 418 419DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 420{ 421 u32 x33CC33CC; 422 u32 x55550000, x00AA00FF, x33BB33FF; 423 u32 x33CC0000, x11441144, x11BB11BB, x003311BB; 424 u32 x00000F0F, x336600FF, x332200FF, x332200F0; 425 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; 426 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; 427 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; 428 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; 429 u32 x0A451047, xBBDFDD7B, xB19ACD3C; 430 u32 x00, x01, x10, x11, x20, x21, x30, x31; 431 432 x33CC33CC = a2 ^ a5; 433 434 x55550000 = a1 & ~a6; 435 x00AA00FF = a5 & ~x55550000; 436 x33BB33FF = a2 | x00AA00FF; 437 438 x33CC0000 = x33CC33CC & ~a6; 439 x11441144 = a1 & x33CC33CC; 440 x11BB11BB = a5 ^ x11441144; 441 x003311BB = x11BB11BB & ~x33CC0000; 442 443 x00000F0F = a3 & a6; 444 x336600FF = x00AA00FF ^ x33CC0000; 445 x332200FF = x33BB33FF & x336600FF; 446 x332200F0 = x332200FF & ~x00000F0F; 447 448 x0302000F = a3 & x332200FF; 449 xAAAAAAAA = ~a1; 450 xA9A8AAA5 = x0302000F ^ xAAAAAAAA; 451 x33CCCC33 = a6 ^ x33CC33CC; 452 x33CCC030 = x33CCCC33 & ~x00000F0F; 453 x9A646A95 = xA9A8AAA5 ^ x33CCC030; 454 x10 = a4 & ~x332200F0; 455 x11 = x10 ^ x9A646A95; 456 *out2 ^= x11; 457 458 x00333303 = a2 & ~x33CCC030; 459 x118822B8 = x11BB11BB ^ x00333303; 460 xA8208805 = xA9A8AAA5 & ~x118822B8; 461 x3CC3C33C = a3 ^ x33CCCC33; 462 x94E34B39 = xA8208805 ^ x3CC3C33C; 463 x00 = x33BB33FF & ~a4; 464 x01 = x00 ^ x94E34B39; 465 *out1 ^= x01; 466 467 x0331330C = x0302000F ^ x00333303; 468 x3FF3F33C = x3CC3C33C | x0331330C; 469 xA9DF596A = x33BB33FF ^ x9A646A95; 470 xA9DF5F6F = x00000F0F | xA9DF596A; 471 x962CAC53 = x3FF3F33C ^ xA9DF5F6F; 472 473 xA9466A6A = x332200FF ^ x9A646A95; 474 x3DA52153 = x94E34B39 ^ xA9466A6A; 475 x29850143 = xA9DF5F6F & x3DA52153; 476 x33C0330C = x33CC33CC & x3FF3F33C; 477 x1A45324F = x29850143 ^ x33C0330C; 478 x20 = x1A45324F | a4; 479 x21 = x20 ^ x962CAC53; 480 *out3 ^= x21; 481 482 x0A451047 = x1A45324F & ~x118822B8; 483 xBBDFDD7B = x33CCCC33 | xA9DF596A; 484 xB19ACD3C = x0A451047 ^ xBBDFDD7B; 485 x30 = x003311BB | a4; 486 x31 = x30 ^ xB19ACD3C; 487 *out4 ^= x31; 488} 489 490DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 491{ 492 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; 493 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; 494 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; 495 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; 496 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; 497 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; 498 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; 499 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; 500 u32 x00, x01, x10, x11, x20, x21, x30, x31; 501 502 x44444444 = a1 & ~a2; 503 x0F0FF0F0 = a3 ^ a6; 504 x4F4FF4F4 = x44444444 | x0F0FF0F0; 505 x00FFFF00 = a4 ^ a6; 506 x00AAAA00 = x00FFFF00 & ~a1; 507 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; 508 509 x3C3CC3C3 = a2 ^ x0F0FF0F0; 510 x3C3C0000 = x3C3CC3C3 & ~a6; 511 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; 512 x0C840A00 = x4FE55EF4 & ~x7373F4F4; 513 514 x00005EF4 = a6 & x4FE55EF4; 515 x00FF5EFF = a4 | x00005EF4; 516 x00555455 = a1 & x00FF5EFF; 517 x3C699796 = x3C3CC3C3 ^ x00555455; 518 x30 = x4FE55EF4 & ~a5; 519 x31 = x30 ^ x3C699796; 520 *out4 ^= x31; 521 522 x000FF000 = x0F0FF0F0 & x00FFFF00; 523 x55AA55AA = a1 ^ a4; 524 x26D9A15E = x7373F4F4 ^ x55AA55AA; 525 x2FDFAF5F = a3 | x26D9A15E; 526 x2FD00F5F = x2FDFAF5F & ~x000FF000; 527 528 x55AAFFAA = x00AAAA00 | x55AA55AA; 529 x28410014 = x3C699796 & ~x55AAFFAA; 530 x000000FF = a4 & a6; 531 x000000CC = x000000FF & ~a2; 532 x284100D8 = x28410014 ^ x000000CC; 533 534 x204100D0 = x7373F4F4 & x284100D8; 535 x3C3CC3FF = x3C3CC3C3 | x000000FF; 536 x1C3CC32F = x3C3CC3FF & ~x204100D0; 537 x4969967A = a1 ^ x1C3CC32F; 538 x10 = x2FD00F5F & a5; 539 x11 = x10 ^ x4969967A; 540 *out2 ^= x11; 541 542 x4CC44CC4 = x4FE55EF4 & ~a2; 543 x40C040C0 = x4CC44CC4 & ~a3; 544 xC3C33C3C = ~x3C3CC3C3; 545 x9669C396 = x55AAFFAA ^ xC3C33C3C; 546 xD6A98356 = x40C040C0 ^ x9669C396; 547 x00 = a5 & ~x0C840A00; 548 x01 = x00 ^ xD6A98356; 549 *out1 ^= x01; 550 551 xD6E9C3D6 = x40C040C0 | x9669C396; 552 x4CEEEEC4 = x00AAAA00 | x4CC44CC4; 553 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; 554 x001A000B = a4 & ~x4FE55EF4; 555 x9A1F2D1B = x9A072D12 | x001A000B; 556 x20 = a5 & ~x284100D8; 557 x21 = x20 ^ x9A1F2D1B; 558 *out3 ^= x21; 559} 560 561DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 562{ 563 u32 x5A5A5A5A, x0F0FF0F0; 564 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, 565 x52FBCA0F, x61C8F93C; 566 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; 567 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; 568 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; 569 u32 x00, x01, x10, x11, x20, x21, x30, x31; 570 571 x5A5A5A5A = a1 ^ a3; 572 x0F0FF0F0 = a3 ^ a5; 573 x33FF33FF = a2 | a4; 574 x33FFCC00 = a5 ^ x33FF33FF; 575 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; 576 x0C0CC0C0 = x0F0FF0F0 & ~a2; 577 x0CF3C03F = a4 ^ x0C0CC0C0; 578 x5EFBDA7F = x5A5A5A5A | x0CF3C03F; 579 x52FBCA0F = x5EFBDA7F & ~x0C0030F0; 580 x61C8F93C = a2 ^ x52FBCA0F; 581 582 x00C0C03C = x0CF3C03F & x61C8F93C; 583 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; 584 x3B92A366 = x5A5A5A5A ^ x61C8F93C; 585 x30908326 = x3B92A366 & ~x0F0F30C0; 586 x3C90B3D6 = x0C0030F0 ^ x30908326; 587 588 x33CC33CC = a2 ^ a4; 589 x0C0CFFFF = a5 | x0C0CC0C0; 590 x379E5C99 = x3B92A366 ^ x0C0CFFFF; 591 x04124C11 = x379E5C99 & ~x33CC33CC; 592 x56E9861E = x52FBCA0F ^ x04124C11; 593 x00 = a6 & ~x3C90B3D6; 594 x01 = x00 ^ x56E9861E; 595 *out1 ^= x01; 596 597 xA91679E1 = ~x56E9861E; 598 x10 = x3C90B3D6 & ~a6; 599 x11 = x10 ^ xA91679E1; 600 *out2 ^= x11; 601 602 x9586CA37 = x3C90B3D6 ^ xA91679E1; 603 x8402C833 = x9586CA37 & ~x33CC33CC; 604 x84C2C83F = x00C0C03C | x8402C833; 605 xB35C94A6 = x379E5C99 ^ x84C2C83F; 606 x20 = x61C8F93C | a6; 607 x21 = x20 ^ xB35C94A6; 608 *out3 ^= x21; 609 610 x30 = a6 & x61C8F93C; 611 x31 = x30 ^ xB35C94A6; 612 *out4 ^= x31; 613} 614 615DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 616{ 617 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; 618 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; 619 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; 620 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; 621 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; 622 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; 623 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; 624 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; 625 u32 x00, x01, x10, x11, x20, x21, x30, x31; 626 627 x77777777 = a1 | a3; 628 x77770000 = x77777777 & ~a6; 629 x22225555 = a1 ^ x77770000; 630 x11116666 = a3 ^ x22225555; 631 x1F1F6F6F = a4 | x11116666; 632 633 x70700000 = x77770000 & ~a4; 634 x43433333 = a3 ^ x70700000; 635 x00430033 = a5 & x43433333; 636 x55557777 = a1 | x11116666; 637 x55167744 = x00430033 ^ x55557777; 638 x5A19784B = a4 ^ x55167744; 639 640 x5A1987B4 = a6 ^ x5A19784B; 641 x7A3BD7F5 = x22225555 | x5A1987B4; 642 x003B00F5 = a5 & x7A3BD7F5; 643 x221955A0 = x22225555 ^ x003B00F5; 644 x05050707 = a4 & x55557777; 645 x271C52A7 = x221955A0 ^ x05050707; 646 647 x2A2A82A0 = x7A3BD7F5 & ~a1; 648 x6969B193 = x43433333 ^ x2A2A82A0; 649 x1FE06F90 = a5 ^ x1F1F6F6F; 650 x16804E00 = x1FE06F90 & ~x6969B193; 651 xE97FB1FF = ~x16804E00; 652 x20 = xE97FB1FF & ~a2; 653 x21 = x20 ^ x5A19784B; 654 *out3 ^= x21; 655 656 x43403302 = x43433333 & ~x003B00F5; 657 x35CAED30 = x2A2A82A0 ^ x1FE06F90; 658 x37DEFFB7 = x271C52A7 | x35CAED30; 659 x349ECCB5 = x37DEFFB7 & ~x43403302; 660 x0B01234A = x1F1F6F6F & ~x349ECCB5; 661 662 x101884B4 = x5A1987B4 & x349ECCB5; 663 x0FF8EB24 = x1FE06F90 ^ x101884B4; 664 x41413333 = x43433333 & x55557777; 665 x4FF9FB37 = x0FF8EB24 | x41413333; 666 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; 667 x30 = x4FC2FBC2 & a2; 668 x31 = x30 ^ x271C52A7; 669 *out4 ^= x31; 670 671 x22222222 = a1 ^ x77777777; 672 x16BCEE97 = x349ECCB5 ^ x22222222; 673 x0F080B04 = a4 & x0FF8EB24; 674 x19B4E593 = x16BCEE97 ^ x0F080B04; 675 x00 = x0B01234A | a2; 676 x01 = x00 ^ x19B4E593; 677 *out1 ^= x01; 678 679 x5C5C5C5C = x1F1F6F6F ^ x43433333; 680 x4448184C = x5C5C5C5C & ~x19B4E593; 681 x2DDABE71 = x22225555 ^ x0FF8EB24; 682 x6992A63D = x4448184C ^ x2DDABE71; 683 x10 = x1F1F6F6F & a2; 684 x11 = x10 ^ x6992A63D; 685 *out2 ^= x11; 686} 687 688DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 689{ 690 u32 x33CC33CC; 691 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; 692 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; 693 u32 x09030C06, x09030000, x336622FF, x3A6522FF; 694 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; 695 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; 696 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; 697 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; 698 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; 699 u32 x00, x01, x10, x11, x20, x21, x30, x31; 700 701 x33CC33CC = a2 ^ a5; 702 703 x3333FFFF = a2 | a6; 704 x11115555 = a1 & x3333FFFF; 705 x22DD6699 = x33CC33CC ^ x11115555; 706 x22DD9966 = a6 ^ x22DD6699; 707 x00220099 = a5 & ~x22DD9966; 708 709 x00551144 = a1 & x22DD9966; 710 x33662277 = a2 ^ x00551144; 711 x5A5A5A5A = a1 ^ a3; 712 x7B7E7A7F = x33662277 | x5A5A5A5A; 713 x59A31CE6 = x22DD6699 ^ x7B7E7A7F; 714 715 x09030C06 = a3 & x59A31CE6; 716 x09030000 = x09030C06 & ~a6; 717 x336622FF = x00220099 | x33662277; 718 x3A6522FF = x09030000 ^ x336622FF; 719 x30 = x3A6522FF & a4; 720 x31 = x30 ^ x59A31CE6; 721 *out4 ^= x31; 722 723 x484D494C = a2 ^ x7B7E7A7F; 724 x0000B6B3 = a6 & ~x484D494C; 725 x0F0FB9BC = a3 ^ x0000B6B3; 726 x00FC00F9 = a5 & ~x09030C06; 727 x0FFFB9FD = x0F0FB9BC | x00FC00F9; 728 729 x5DF75DF7 = a1 | x59A31CE6; 730 x116600F7 = x336622FF & x5DF75DF7; 731 x1E69B94B = x0F0FB9BC ^ x116600F7; 732 x1668B94B = x1E69B94B & ~x09030000; 733 x20 = x00220099 | a4; 734 x21 = x20 ^ x1668B94B; 735 *out3 ^= x21; 736 737 x7B7B7B7B = a2 | x5A5A5A5A; 738 x411E5984 = x3A6522FF ^ x7B7B7B7B; 739 x1FFFFDFD = x11115555 | x0FFFB9FD; 740 x5EE1A479 = x411E5984 ^ x1FFFFDFD; 741 742 x3CB4DFD2 = x22DD6699 ^ x1E69B94B; 743 x004B002D = a5 & ~x3CB4DFD2; 744 xB7B2B6B3 = ~x484D494C; 745 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; 746 xCC82CDE5 = x004B002D ^ xCCC9CDC8; 747 x10 = xCC82CDE5 & ~a4; 748 x11 = x10 ^ x5EE1A479; 749 *out2 ^= x11; 750 751 x0055EEBB = a6 ^ x00551144; 752 x5A5AECE9 = a1 ^ x0F0FB9BC; 753 x0050ECA9 = x0055EEBB & x5A5AECE9; 754 xC5CAC1CE = x09030C06 ^ xCCC9CDC8; 755 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; 756 x00 = x0FFFB9FD & ~a4; 757 x01 = x00 ^ xC59A2D67; 758 *out1 ^= x01; 759} 760 761DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 762{ 763 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; 764 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; 765 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; 766 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; 767 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; 768 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; 769 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; 770 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; 771 u32 x00, x01, x10, x11, x20, x21, x30, x31; 772 773 x0FF00FF0 = a4 ^ a5; 774 x3CC33CC3 = a3 ^ x0FF00FF0; 775 x00003CC3 = a6 & x3CC33CC3; 776 x0F000F00 = a4 & x0FF00FF0; 777 x5A555A55 = a2 ^ x0F000F00; 778 x00001841 = x00003CC3 & x5A555A55; 779 780 x00000F00 = a6 & x0F000F00; 781 x33333C33 = a3 ^ x00000F00; 782 x7B777E77 = x5A555A55 | x33333C33; 783 x0FF0F00F = a6 ^ x0FF00FF0; 784 x74878E78 = x7B777E77 ^ x0FF0F00F; 785 x30 = a1 & ~x00001841; 786 x31 = x30 ^ x74878E78; 787 *out4 ^= x31; 788 789 x003C003C = a5 & ~x3CC33CC3; 790 x5A7D5A7D = x5A555A55 | x003C003C; 791 x333300F0 = x00003CC3 ^ x33333C33; 792 x694E5A8D = x5A7D5A7D ^ x333300F0; 793 794 x0FF0CCCC = x00003CC3 ^ x0FF0F00F; 795 x000F0303 = a4 & ~x0FF0CCCC; 796 x5A505854 = x5A555A55 & ~x000F0303; 797 x33CC000F = a5 ^ x333300F0; 798 x699C585B = x5A505854 ^ x33CC000F; 799 800 x7F878F78 = x0F000F00 | x74878E78; 801 x21101013 = a3 & x699C585B; 802 x7F979F7B = x7F878F78 | x21101013; 803 x30030CC0 = x3CC33CC3 & ~x0FF0F00F; 804 x4F9493BB = x7F979F7B ^ x30030CC0; 805 x00 = x4F9493BB & ~a1; 806 x01 = x00 ^ x694E5A8D; 807 *out1 ^= x01; 808 809 x6F9CDBFB = x699C585B | x4F9493BB; 810 x0000DBFB = a6 & x6F9CDBFB; 811 x00005151 = a2 & x0000DBFB; 812 x26DAC936 = x694E5A8D ^ x4F9493BB; 813 x26DA9867 = x00005151 ^ x26DAC936; 814 815 x27DA9877 = x21101013 | x26DA9867; 816 x27DA438C = x0000DBFB ^ x27DA9877; 817 x2625C9C9 = a5 ^ x26DAC936; 818 x27FFCBCD = x27DA438C | x2625C9C9; 819 x20 = x27FFCBCD & a1; 820 x21 = x20 ^ x699C585B; 821 *out3 ^= x21; 822 823 x27FF1036 = x0000DBFB ^ x27FFCBCD; 824 x27FF103E = x003C003C | x27FF1036; 825 xB06B6C44 = ~x4F9493BB; 826 x97947C7A = x27FF103E ^ xB06B6C44; 827 x10 = x97947C7A & ~a1; 828 x11 = x10 ^ x26DA9867; 829 *out2 ^= x11; 830} 831 832DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 833{ 834 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; 835 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; 836 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; 837 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; 838 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; 839 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; 840 u32 xF700A600, x61008000, x03B7856B, x62B7056B; 841 u32 x00, x01, x10, x11, x20, x21, x30, x31; 842 843 x0C0C0C0C = a3 & ~a2; 844 x0000F0F0 = a5 & ~a3; 845 x00FFF00F = a4 ^ x0000F0F0; 846 x00555005 = a1 & x00FFF00F; 847 x00515001 = x00555005 & ~x0C0C0C0C; 848 849 x33000330 = a2 & ~x00FFF00F; 850 x77555775 = a1 | x33000330; 851 x30303030 = a2 & ~a3; 852 x3030CFCF = a5 ^ x30303030; 853 x30104745 = x77555775 & x3030CFCF; 854 x30555745 = x00555005 | x30104745; 855 856 xFF000FF0 = ~x00FFF00F; 857 xCF1048B5 = x30104745 ^ xFF000FF0; 858 x080A080A = a3 & ~x77555775; 859 xC71A40BF = xCF1048B5 ^ x080A080A; 860 xCB164CB3 = x0C0C0C0C ^ xC71A40BF; 861 x10 = x00515001 | a6; 862 x11 = x10 ^ xCB164CB3; 863 *out2 ^= x11; 864 865 x9E4319E6 = a1 ^ xCB164CB3; 866 x000019E6 = a5 & x9E4319E6; 867 xF429738C = a2 ^ xC71A40BF; 868 xF4296A6A = x000019E6 ^ xF429738C; 869 xC729695A = x33000330 ^ xF4296A6A; 870 871 xC47C3D2F = x30555745 ^ xF4296A6A; 872 xF77F3F3F = a2 | xC47C3D2F; 873 x9E43E619 = a5 ^ x9E4319E6; 874 x693CD926 = xF77F3F3F ^ x9E43E619; 875 x20 = x30555745 & a6; 876 x21 = x20 ^ x693CD926; 877 *out3 ^= x21; 878 879 xF719A695 = x3030CFCF ^ xC729695A; 880 xF4FF73FF = a4 | xF429738C; 881 x03E6D56A = xF719A695 ^ xF4FF73FF; 882 x56B3803F = a1 ^ x03E6D56A; 883 x30 = x56B3803F & a6; 884 x31 = x30 ^ xC729695A; 885 *out4 ^= x31; 886 887 xF700A600 = xF719A695 & ~a4; 888 x61008000 = x693CD926 & xF700A600; 889 x03B7856B = x00515001 ^ x03E6D56A; 890 x62B7056B = x61008000 ^ x03B7856B; 891 x00 = x62B7056B | a6; 892 x01 = x00 ^ xC729695A; 893 *out1 ^= x01; 894} 895 896#endif 897#endif 898 899#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC 900 901/* 902 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC 903 * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. 904 * 905 * Gate counts: 49 44 46 33 48 46 46 41 906 * Average: 44.125 907 * 908 * Several same-gate-count expressions for each S-box are included (for use on 909 * different CPUs/GPUs). 910 * 911 * These Boolean expressions corresponding to DES S-boxes have been generated 912 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's 913 * John the Ripper password cracker: http://www.openwall.com/john/ 914 * Being mathematical formulas, they are not copyrighted and are free for reuse 915 * by anyone. 916 * 917 * This file (a specific representation of the S-box expressions, surrounding 918 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>. 919 * Redistribution and use in source and binary forms, with or without 920 * modification, are permitted. (This is a heavily cut-down "BSD license".) 921 * 922 * The effort has been sponsored by Rapid7: http://www.rapid7.com 923 */ 924 925DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 926{ 927 u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, 928 x25202160; 929 u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; 930 u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; 931 u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; 932 u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; 933 u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; 934 u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; 935 u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; 936 u32 x00, x01, x10, x11, x20, x21, x30, x31; 937 938 x55005500 = a1 & ~a5; 939 x5A0F5A0F = a4 ^ x55005500; 940 x3333FFFF = a3 | a6; 941 x66666666 = a1 ^ a3; 942 x22226666 = x3333FFFF & x66666666; 943 x2D2D6969 = a4 ^ x22226666; 944 x25202160 = x2D2D6969 & ~x5A0F5A0F; 945 946 x00FFFF00 = a5 ^ a6; 947 x33CCCC33 = a3 ^ x00FFFF00; 948 x4803120C = x5A0F5A0F & ~x33CCCC33; 949 x2222FFFF = a6 | x22226666; 950 x6A21EDF3 = x4803120C ^ x2222FFFF; 951 x4A01CC93 = x6A21EDF3 & ~x25202160; 952 953 x5555FFFF = a1 | a6; 954 x7F75FFFF = x6A21EDF3 | x5555FFFF; 955 x00D20096 = a5 & ~x2D2D6969; 956 x7FA7FF69 = x7F75FFFF ^ x00D20096; 957 958 x0A0A0000 = a4 & ~x5555FFFF; 959 x0AD80096 = x00D20096 ^ x0A0A0000; 960 x00999900 = x00FFFF00 & ~x66666666; 961 x0AD99996 = x0AD80096 | x00999900; 962 963 x22332233 = a3 & ~x55005500; 964 x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; 965 x054885C0 = x257AA5F0 & ~x22332233; 966 xFAB77A3F = ~x054885C0; 967 x2221EDF3 = x3333FFFF & x6A21EDF3; 968 xD89697CC = xFAB77A3F ^ x2221EDF3; 969 x20 = x7FA7FF69 & ~a2; 970 x21 = x20 ^ xD89697CC; 971 *out3 ^= x21; 972 973 x05B77AC0 = x00FFFF00 ^ x054885C0; 974 x05F77AD6 = x00D20096 | x05B77AC0; 975 x36C48529 = x3333FFFF ^ x05F77AD6; 976 x6391D07C = a1 ^ x36C48529; 977 xBB0747B0 = xD89697CC ^ x6391D07C; 978 x00 = x25202160 | a2; 979 x01 = x00 ^ xBB0747B0; 980 *out1 ^= x01; 981 982 x4C460000 = x3333FFFF ^ x7F75FFFF; 983 x4EDF9996 = x0AD99996 | x4C460000; 984 x2D4E49EA = x6391D07C ^ x4EDF9996; 985 xBBFFFFB0 = x00FFFF00 | xBB0747B0; 986 x96B1B65A = x2D4E49EA ^ xBBFFFFB0; 987 x10 = x4A01CC93 | a2; 988 x11 = x10 ^ x96B1B65A; 989 *out2 ^= x11; 990 991 x5AFF5AFF = a5 | x5A0F5A0F; 992 x52B11215 = x5AFF5AFF & ~x2D4E49EA; 993 x4201C010 = x4A01CC93 & x6391D07C; 994 x10B0D205 = x52B11215 ^ x4201C010; 995 x30 = x10B0D205 | a2; 996 x31 = x30 ^ x0AD99996; 997 *out4 ^= x31; 998} 999 1000DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1001{ 1002 u32 x33CC33CC; 1003 u32 x55550000, x00AA00FF, x33BB33FF; 1004 u32 x33CC0000, x11441144, x11BB11BB, x003311BB; 1005 u32 x00000F0F, x336600FF, x332200FF, x332200F0; 1006 u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; 1007 u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; 1008 u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; 1009 u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; 1010 u32 x0A451047, xBBDFDD7B, xB19ACD3C; 1011 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1012 1013 x33CC33CC = a2 ^ a5; 1014 1015 x55550000 = a1 & ~a6; 1016 x00AA00FF = a5 & ~x55550000; 1017 x33BB33FF = a2 | x00AA00FF; 1018 1019 x33CC0000 = x33CC33CC & ~a6; 1020 x11441144 = a1 & x33CC33CC; 1021 x11BB11BB = a5 ^ x11441144; 1022 x003311BB = x11BB11BB & ~x33CC0000; 1023 1024 x00000F0F = a3 & a6; 1025 x336600FF = x00AA00FF ^ x33CC0000; 1026 x332200FF = x33BB33FF & x336600FF; 1027 x332200F0 = x332200FF & ~x00000F0F; 1028 1029 x0302000F = a3 & x332200FF; 1030 xAAAAAAAA = ~a1; 1031 xA9A8AAA5 = x0302000F ^ xAAAAAAAA; 1032 x33CCCC33 = a6 ^ x33CC33CC; 1033 x33CCC030 = x33CCCC33 & ~x00000F0F; 1034 x9A646A95 = xA9A8AAA5 ^ x33CCC030; 1035 x10 = a4 & ~x332200F0; 1036 x11 = x10 ^ x9A646A95; 1037 *out2 ^= x11; 1038 1039 x00333303 = a2 & ~x33CCC030; 1040 x118822B8 = x11BB11BB ^ x00333303; 1041 xA8208805 = xA9A8AAA5 & ~x118822B8; 1042 x3CC3C33C = a3 ^ x33CCCC33; 1043 x94E34B39 = xA8208805 ^ x3CC3C33C; 1044 x00 = x33BB33FF & ~a4; 1045 x01 = x00 ^ x94E34B39; 1046 *out1 ^= x01; 1047 1048 x0331330C = x0302000F ^ x00333303; 1049 x3FF3F33C = x3CC3C33C | x0331330C; 1050 xA9DF596A = x33BB33FF ^ x9A646A95; 1051 xA9DF5F6F = x00000F0F | xA9DF596A; 1052 x962CAC53 = x3FF3F33C ^ xA9DF5F6F; 1053 1054 xA9466A6A = x332200FF ^ x9A646A95; 1055 x3DA52153 = x94E34B39 ^ xA9466A6A; 1056 x29850143 = xA9DF5F6F & x3DA52153; 1057 x33C0330C = x33CC33CC & x3FF3F33C; 1058 x1A45324F = x29850143 ^ x33C0330C; 1059 x20 = x1A45324F | a4; 1060 x21 = x20 ^ x962CAC53; 1061 *out3 ^= x21; 1062 1063 x0A451047 = x1A45324F & ~x118822B8; 1064 xBBDFDD7B = x33CCCC33 | xA9DF596A; 1065 xB19ACD3C = x0A451047 ^ xBBDFDD7B; 1066 x30 = x003311BB | a4; 1067 x31 = x30 ^ xB19ACD3C; 1068 *out4 ^= x31; 1069} 1070 1071DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1072{ 1073 u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; 1074 u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; 1075 u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; 1076 u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; 1077 u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; 1078 u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; 1079 u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; 1080 u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; 1081 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1082 1083 x44444444 = a1 & ~a2; 1084 x0F0FF0F0 = a3 ^ a6; 1085 x4F4FF4F4 = x44444444 | x0F0FF0F0; 1086 x00FFFF00 = a4 ^ a6; 1087 x00AAAA00 = x00FFFF00 & ~a1; 1088 x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; 1089 1090 x3C3CC3C3 = a2 ^ x0F0FF0F0; 1091 x3C3C0000 = x3C3CC3C3 & ~a6; 1092 x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; 1093 x0C840A00 = x4FE55EF4 & ~x7373F4F4; 1094 1095 x00005EF4 = a6 & x4FE55EF4; 1096 x00FF5EFF = a4 | x00005EF4; 1097 x00555455 = a1 & x00FF5EFF; 1098 x3C699796 = x3C3CC3C3 ^ x00555455; 1099 x30 = x4FE55EF4 & ~a5; 1100 x31 = x30 ^ x3C699796; 1101 *out4 ^= x31; 1102 1103 x000FF000 = x0F0FF0F0 & x00FFFF00; 1104 x55AA55AA = a1 ^ a4; 1105 x26D9A15E = x7373F4F4 ^ x55AA55AA; 1106 x2FDFAF5F = a3 | x26D9A15E; 1107 x2FD00F5F = x2FDFAF5F & ~x000FF000; 1108 1109 x55AAFFAA = x00AAAA00 | x55AA55AA; 1110 x28410014 = x3C699796 & ~x55AAFFAA; 1111 x000000FF = a4 & a6; 1112 x000000CC = x000000FF & ~a2; 1113 x284100D8 = x28410014 ^ x000000CC; 1114 1115 x204100D0 = x7373F4F4 & x284100D8; 1116 x3C3CC3FF = x3C3CC3C3 | x000000FF; 1117 x1C3CC32F = x3C3CC3FF & ~x204100D0; 1118 x4969967A = a1 ^ x1C3CC32F; 1119 x10 = x2FD00F5F & a5; 1120 x11 = x10 ^ x4969967A; 1121 *out2 ^= x11; 1122 1123 x4CC44CC4 = x4FE55EF4 & ~a2; 1124 x40C040C0 = x4CC44CC4 & ~a3; 1125 xC3C33C3C = ~x3C3CC3C3; 1126 x9669C396 = x55AAFFAA ^ xC3C33C3C; 1127 xD6A98356 = x40C040C0 ^ x9669C396; 1128 x00 = a5 & ~x0C840A00; 1129 x01 = x00 ^ xD6A98356; 1130 *out1 ^= x01; 1131 1132 xD6E9C3D6 = x40C040C0 | x9669C396; 1133 x4CEEEEC4 = x00AAAA00 | x4CC44CC4; 1134 x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; 1135 x001A000B = a4 & ~x4FE55EF4; 1136 x9A1F2D1B = x9A072D12 | x001A000B; 1137 x20 = a5 & ~x284100D8; 1138 x21 = x20 ^ x9A1F2D1B; 1139 *out3 ^= x21; 1140} 1141 1142DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1143{ 1144 u32 x5A5A5A5A, x0F0FF0F0; 1145 u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, 1146 x52FBCA0F, x61C8F93C; 1147 u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; 1148 u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; 1149 u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; 1150 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1151 1152 x5A5A5A5A = a1 ^ a3; 1153 x0F0FF0F0 = a3 ^ a5; 1154 x33FF33FF = a2 | a4; 1155 x33FFCC00 = a5 ^ x33FF33FF; 1156 x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; 1157 x0C0CC0C0 = x0F0FF0F0 & ~a2; 1158 x0CF3C03F = a4 ^ x0C0CC0C0; 1159 x5EFBDA7F = x5A5A5A5A | x0CF3C03F; 1160 x52FBCA0F = x5EFBDA7F & ~x0C0030F0; 1161 x61C8F93C = a2 ^ x52FBCA0F; 1162 1163 x00C0C03C = x0CF3C03F & x61C8F93C; 1164 x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; 1165 x3B92A366 = x5A5A5A5A ^ x61C8F93C; 1166 x30908326 = x3B92A366 & ~x0F0F30C0; 1167 x3C90B3D6 = x0C0030F0 ^ x30908326; 1168 1169 x33CC33CC = a2 ^ a4; 1170 x0C0CFFFF = a5 | x0C0CC0C0; 1171 x379E5C99 = x3B92A366 ^ x0C0CFFFF; 1172 x04124C11 = x379E5C99 & ~x33CC33CC; 1173 x56E9861E = x52FBCA0F ^ x04124C11; 1174 x00 = a6 & ~x3C90B3D6; 1175 x01 = x00 ^ x56E9861E; 1176 *out1 ^= x01; 1177 1178 xA91679E1 = ~x56E9861E; 1179 x10 = x3C90B3D6 & ~a6; 1180 x11 = x10 ^ xA91679E1; 1181 *out2 ^= x11; 1182 1183 x9586CA37 = x3C90B3D6 ^ xA91679E1; 1184 x8402C833 = x9586CA37 & ~x33CC33CC; 1185 x84C2C83F = x00C0C03C | x8402C833; 1186 xB35C94A6 = x379E5C99 ^ x84C2C83F; 1187 x20 = x61C8F93C | a6; 1188 x21 = x20 ^ xB35C94A6; 1189 *out3 ^= x21; 1190 1191 x30 = a6 & x61C8F93C; 1192 x31 = x30 ^ xB35C94A6; 1193 *out4 ^= x31; 1194} 1195 1196DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1197{ 1198 u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; 1199 u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; 1200 u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; 1201 u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; 1202 u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; 1203 u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; 1204 u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; 1205 u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; 1206 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1207 1208 x77777777 = a1 | a3; 1209 x77770000 = x77777777 & ~a6; 1210 x22225555 = a1 ^ x77770000; 1211 x11116666 = a3 ^ x22225555; 1212 x1F1F6F6F = a4 | x11116666; 1213 1214 x70700000 = x77770000 & ~a4; 1215 x43433333 = a3 ^ x70700000; 1216 x00430033 = a5 & x43433333; 1217 x55557777 = a1 | x11116666; 1218 x55167744 = x00430033 ^ x55557777; 1219 x5A19784B = a4 ^ x55167744; 1220 1221 x5A1987B4 = a6 ^ x5A19784B; 1222 x7A3BD7F5 = x22225555 | x5A1987B4; 1223 x003B00F5 = a5 & x7A3BD7F5; 1224 x221955A0 = x22225555 ^ x003B00F5; 1225 x05050707 = a4 & x55557777; 1226 x271C52A7 = x221955A0 ^ x05050707; 1227 1228 x2A2A82A0 = x7A3BD7F5 & ~a1; 1229 x6969B193 = x43433333 ^ x2A2A82A0; 1230 x1FE06F90 = a5 ^ x1F1F6F6F; 1231 x16804E00 = x1FE06F90 & ~x6969B193; 1232 xE97FB1FF = ~x16804E00; 1233 x20 = xE97FB1FF & ~a2; 1234 x21 = x20 ^ x5A19784B; 1235 *out3 ^= x21; 1236 1237 x43403302 = x43433333 & ~x003B00F5; 1238 x35CAED30 = x2A2A82A0 ^ x1FE06F90; 1239 x37DEFFB7 = x271C52A7 | x35CAED30; 1240 x349ECCB5 = x37DEFFB7 & ~x43403302; 1241 x0B01234A = x1F1F6F6F & ~x349ECCB5; 1242 1243 x101884B4 = x5A1987B4 & x349ECCB5; 1244 x0FF8EB24 = x1FE06F90 ^ x101884B4; 1245 x41413333 = x43433333 & x55557777; 1246 x4FF9FB37 = x0FF8EB24 | x41413333; 1247 x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; 1248 x30 = x4FC2FBC2 & a2; 1249 x31 = x30 ^ x271C52A7; 1250 *out4 ^= x31; 1251 1252 x22222222 = a1 ^ x77777777; 1253 x16BCEE97 = x349ECCB5 ^ x22222222; 1254 x0F080B04 = a4 & x0FF8EB24; 1255 x19B4E593 = x16BCEE97 ^ x0F080B04; 1256 x00 = x0B01234A | a2; 1257 x01 = x00 ^ x19B4E593; 1258 *out1 ^= x01; 1259 1260 x5C5C5C5C = x1F1F6F6F ^ x43433333; 1261 x4448184C = x5C5C5C5C & ~x19B4E593; 1262 x2DDABE71 = x22225555 ^ x0FF8EB24; 1263 x6992A63D = x4448184C ^ x2DDABE71; 1264 x10 = x1F1F6F6F & a2; 1265 x11 = x10 ^ x6992A63D; 1266 *out2 ^= x11; 1267} 1268 1269DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1270{ 1271 u32 x33CC33CC; 1272 u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; 1273 u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; 1274 u32 x09030C06, x09030000, x336622FF, x3A6522FF; 1275 u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; 1276 u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; 1277 u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; 1278 u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; 1279 u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; 1280 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1281 1282 x33CC33CC = a2 ^ a5; 1283 1284 x3333FFFF = a2 | a6; 1285 x11115555 = a1 & x3333FFFF; 1286 x22DD6699 = x33CC33CC ^ x11115555; 1287 x22DD9966 = a6 ^ x22DD6699; 1288 x00220099 = a5 & ~x22DD9966; 1289 1290 x00551144 = a1 & x22DD9966; 1291 x33662277 = a2 ^ x00551144; 1292 x5A5A5A5A = a1 ^ a3; 1293 x7B7E7A7F = x33662277 | x5A5A5A5A; 1294 x59A31CE6 = x22DD6699 ^ x7B7E7A7F; 1295 1296 x09030C06 = a3 & x59A31CE6; 1297 x09030000 = x09030C06 & ~a6; 1298 x336622FF = x00220099 | x33662277; 1299 x3A6522FF = x09030000 ^ x336622FF; 1300 x30 = x3A6522FF & a4; 1301 x31 = x30 ^ x59A31CE6; 1302 *out4 ^= x31; 1303 1304 x484D494C = a2 ^ x7B7E7A7F; 1305 x0000B6B3 = a6 & ~x484D494C; 1306 x0F0FB9BC = a3 ^ x0000B6B3; 1307 x00FC00F9 = a5 & ~x09030C06; 1308 x0FFFB9FD = x0F0FB9BC | x00FC00F9; 1309 1310 x5DF75DF7 = a1 | x59A31CE6; 1311 x116600F7 = x336622FF & x5DF75DF7; 1312 x1E69B94B = x0F0FB9BC ^ x116600F7; 1313 x1668B94B = x1E69B94B & ~x09030000; 1314 x20 = x00220099 | a4; 1315 x21 = x20 ^ x1668B94B; 1316 *out3 ^= x21; 1317 1318 x7B7B7B7B = a2 | x5A5A5A5A; 1319 x411E5984 = x3A6522FF ^ x7B7B7B7B; 1320 x1FFFFDFD = x11115555 | x0FFFB9FD; 1321 x5EE1A479 = x411E5984 ^ x1FFFFDFD; 1322 1323 x3CB4DFD2 = x22DD6699 ^ x1E69B94B; 1324 x004B002D = a5 & ~x3CB4DFD2; 1325 xB7B2B6B3 = ~x484D494C; 1326 xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; 1327 xCC82CDE5 = x004B002D ^ xCCC9CDC8; 1328 x10 = xCC82CDE5 & ~a4; 1329 x11 = x10 ^ x5EE1A479; 1330 *out2 ^= x11; 1331 1332 x0055EEBB = a6 ^ x00551144; 1333 x5A5AECE9 = a1 ^ x0F0FB9BC; 1334 x0050ECA9 = x0055EEBB & x5A5AECE9; 1335 xC5CAC1CE = x09030C06 ^ xCCC9CDC8; 1336 xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; 1337 x00 = x0FFFB9FD & ~a4; 1338 x01 = x00 ^ xC59A2D67; 1339 *out1 ^= x01; 1340} 1341 1342DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1343{ 1344 u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; 1345 u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; 1346 u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; 1347 u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; 1348 u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; 1349 u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; 1350 u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; 1351 u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; 1352 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1353 1354 x0FF00FF0 = a4 ^ a5; 1355 x3CC33CC3 = a3 ^ x0FF00FF0; 1356 x00003CC3 = a6 & x3CC33CC3; 1357 x0F000F00 = a4 & x0FF00FF0; 1358 x5A555A55 = a2 ^ x0F000F00; 1359 x00001841 = x00003CC3 & x5A555A55; 1360 1361 x00000F00 = a6 & x0F000F00; 1362 x33333C33 = a3 ^ x00000F00; 1363 x7B777E77 = x5A555A55 | x33333C33; 1364 x0FF0F00F = a6 ^ x0FF00FF0; 1365 x74878E78 = x7B777E77 ^ x0FF0F00F; 1366 x30 = a1 & ~x00001841; 1367 x31 = x30 ^ x74878E78; 1368 *out4 ^= x31; 1369 1370 x003C003C = a5 & ~x3CC33CC3; 1371 x5A7D5A7D = x5A555A55 | x003C003C; 1372 x333300F0 = x00003CC3 ^ x33333C33; 1373 x694E5A8D = x5A7D5A7D ^ x333300F0; 1374 1375 x0FF0CCCC = x00003CC3 ^ x0FF0F00F; 1376 x000F0303 = a4 & ~x0FF0CCCC; 1377 x5A505854 = x5A555A55 & ~x000F0303; 1378 x33CC000F = a5 ^ x333300F0; 1379 x699C585B = x5A505854 ^ x33CC000F; 1380 1381 x7F878F78 = x0F000F00 | x74878E78; 1382 x21101013 = a3 & x699C585B; 1383 x7F979F7B = x7F878F78 | x21101013; 1384 x30030CC0 = x3CC33CC3 & ~x0FF0F00F; 1385 x4F9493BB = x7F979F7B ^ x30030CC0; 1386 x00 = x4F9493BB & ~a1; 1387 x01 = x00 ^ x694E5A8D; 1388 *out1 ^= x01; 1389 1390 x6F9CDBFB = x699C585B | x4F9493BB; 1391 x0000DBFB = a6 & x6F9CDBFB; 1392 x00005151 = a2 & x0000DBFB; 1393 x26DAC936 = x694E5A8D ^ x4F9493BB; 1394 x26DA9867 = x00005151 ^ x26DAC936; 1395 1396 x27DA9877 = x21101013 | x26DA9867; 1397 x27DA438C = x0000DBFB ^ x27DA9877; 1398 x2625C9C9 = a5 ^ x26DAC936; 1399 x27FFCBCD = x27DA438C | x2625C9C9; 1400 x20 = x27FFCBCD & a1; 1401 x21 = x20 ^ x699C585B; 1402 *out3 ^= x21; 1403 1404 x27FF1036 = x0000DBFB ^ x27FFCBCD; 1405 x27FF103E = x003C003C | x27FF1036; 1406 xB06B6C44 = ~x4F9493BB; 1407 x97947C7A = x27FF103E ^ xB06B6C44; 1408 x10 = x97947C7A & ~a1; 1409 x11 = x10 ^ x26DA9867; 1410 *out2 ^= x11; 1411} 1412 1413DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) 1414{ 1415 u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; 1416 u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; 1417 u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; 1418 u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; 1419 u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; 1420 u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; 1421 u32 xF700A600, x61008000, x03B7856B, x62B7056B; 1422 u32 x00, x01, x10, x11, x20, x21, x30, x31; 1423 1424 x0C0C0C0C = a3 & ~a2; 1425 x0000F0F0 = a5 & ~a3; 1426 x00FFF00F = a4 ^ x0000F0F0; 1427 x00555005 = a1 & x00FFF00F; 1428 x00515001 = x00555005 & ~x0C0C0C0C; 1429 1430 x33000330 = a2 & ~x00FFF00F; 1431 x77555775 = a1 | x33000330; 1432 x30303030 = a2 & ~a3; 1433 x3030CFCF = a5 ^ x30303030; 1434 x30104745 = x77555775 & x3030CFCF; 1435 x30555745 = x00555005 | x30104745; 1436 1437 xFF000FF0 = ~x00FFF00F; 1438 xCF1048B5 = x30104745 ^ xFF000FF0; 1439 x080A080A = a3 & ~x77555775; 1440 xC71A40BF = xCF1048B5 ^ x080A080A; 1441 xCB164CB3 = x0C0C0C0C ^ xC71A40BF; 1442 x10 = x00515001 | a6; 1443 x11 = x10 ^ xCB164CB3; 1444 *out2 ^= x11; 1445 1446 x9E4319E6 = a1 ^ xCB164CB3; 1447 x000019E6 = a5 & x9E4319E6; 1448 xF429738C = a2 ^ xC71A40BF; 1449 xF4296A6A = x000019E6 ^ xF429738C; 1450 xC729695A = x33000330 ^ xF4296A6A; 1451 1452 xC47C3D2F = x30555745 ^ xF4296A6A; 1453 xF77F3F3F = a2 | xC47C3D2F; 1454 x9E43E619 = a5 ^ x9E4319E6; 1455 x693CD926 = xF77F3F3F ^ x9E43E619; 1456 x20 = x30555745 & a6; 1457 x21 = x20 ^ x693CD926; 1458 *out3 ^= x21; 1459 1460 xF719A695 = x3030CFCF ^ xC729695A; 1461 xF4FF73FF = a4 | xF429738C; 1462 x03E6D56A = xF719A695 ^ xF4FF73FF; 1463 x56B3803F = a1 ^ x03E6D56A; 1464 x30 = x56B3803F & a6; 1465 x31 = x30 ^ xC729695A; 1466 *out4 ^= x31; 1467 1468 xF700A600 = xF719A695 & ~a4; 1469 x61008000 = x693CD926 & xF700A600; 1470 x03B7856B = x00515001 ^ x03E6D56A; 1471 x62B7056B = x61008000 ^ x03B7856B; 1472 x00 = x62B7056B | a6; 1473 x01 = x00 ^ xC729695A; 1474 *out1 ^= x01; 1475} 1476 1477#endif 1478 1479#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; } 1480 1481#define DATASWAP \ 1482 SWAP (D00, D32); \ 1483 SWAP (D01, D33); \ 1484 SWAP (D02, D34); \ 1485 SWAP (D03, D35); \ 1486 SWAP (D04, D36); \ 1487 SWAP (D05, D37); \ 1488 SWAP (D06, D38); \ 1489 SWAP (D07, D39); \ 1490 SWAP (D08, D40); \ 1491 SWAP (D09, D41); \ 1492 SWAP (D10, D42); \ 1493 SWAP (D11, D43); \ 1494 SWAP (D12, D44); \ 1495 SWAP (D13, D45); \ 1496 SWAP (D14, D46); \ 1497 SWAP (D15, D47); \ 1498 SWAP (D16, D48); \ 1499 SWAP (D17, D49); \ 1500 SWAP (D18, D50); \ 1501 SWAP (D19, D51); \ 1502 SWAP (D20, D52); \ 1503 SWAP (D21, D53); \ 1504 SWAP (D22, D54); \ 1505 SWAP (D23, D55); \ 1506 SWAP (D24, D56); \ 1507 SWAP (D25, D57); \ 1508 SWAP (D26, D58); \ 1509 SWAP (D27, D59); \ 1510 SWAP (D28, D60); \ 1511 SWAP (D29, D61); \ 1512 SWAP (D30, D62); \ 1513 SWAP (D31, D63); 1514 1515#define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; } 1516#define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; } 1517#define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; } 1518#define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; } 1519#define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; } 1520#define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; } 1521#define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; } 1522#define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; } 1523#define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; } 1524#define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; } 1525#define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; } 1526#define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; } 1527#define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; } 1528#define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; } 1529#define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; } 1530#define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; } 1531 1532DECLSPEC void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63) 1533{ 1534 KXX_DECL u32 k00, k01, k02, k03, k04, k05; 1535 KXX_DECL u32 k06, k07, k08, k09, k10, k11; 1536 KXX_DECL u32 k12, k13, k14, k15, k16, k17; 1537 KXX_DECL u32 k18, k19, k20, k21, k22, k23; 1538 KXX_DECL u32 k24, k25, k26, k27, k28, k29; 1539 KXX_DECL u32 k30, k31, k32, k33, k34, k35; 1540 KXX_DECL u32 k36, k37, k38, k39, k40, k41; 1541 KXX_DECL u32 k42, k43, k44, k45, k46, k47; 1542 1543 #ifdef _unroll 1544 #pragma unroll 1545 #endif 1546 for (u32 i = 0; i < 2; i++) 1547 { 1548 if (i) KEYSET10 else KEYSET00 1549 1550 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1551 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1552 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1553 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1554 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1555 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1556 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1557 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1558 1559 if (i) KEYSET11 else KEYSET01 1560 1561 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1562 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1563 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1564 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1565 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1566 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1567 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1568 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1569 1570 if (i) KEYSET12 else KEYSET02 1571 1572 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1573 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1574 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1575 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1576 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1577 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1578 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1579 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1580 1581 if (i) KEYSET13 else KEYSET03 1582 1583 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1584 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1585 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1586 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1587 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1588 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1589 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1590 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1591 1592 if (i) KEYSET14 else KEYSET04 1593 1594 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1595 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1596 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1597 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1598 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1599 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1600 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1601 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1602 1603 if (i) KEYSET15 else KEYSET05 1604 1605 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1606 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1607 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1608 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1609 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1610 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1611 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1612 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1613 1614 if (i) KEYSET16 else KEYSET06 1615 1616 s1(*D63 ^ k00, *D32 ^ k01, *D33 ^ k02, *D34 ^ k03, *D35 ^ k04, *D36 ^ k05, D08, D16, D22, D30); 1617 s2(*D35 ^ k06, *D36 ^ k07, *D37 ^ k08, *D38 ^ k09, *D39 ^ k10, *D40 ^ k11, D12, D27, D01, D17); 1618 s3(*D39 ^ k12, *D40 ^ k13, *D41 ^ k14, *D42 ^ k15, *D43 ^ k16, *D44 ^ k17, D23, D15, D29, D05); 1619 s4(*D43 ^ k18, *D44 ^ k19, *D45 ^ k20, *D46 ^ k21, *D47 ^ k22, *D48 ^ k23, D25, D19, D09, D00); 1620 s5(*D47 ^ k24, *D48 ^ k25, *D49 ^ k26, *D50 ^ k27, *D51 ^ k28, *D52 ^ k29, D07, D13, D24, D02); 1621 s6(*D51 ^ k30, *D52 ^ k31, *D53 ^ k32, *D54 ^ k33, *D55 ^ k34, *D56 ^ k35, D03, D28, D10, D18); 1622 s7(*D55 ^ k36, *D56 ^ k37, *D57 ^ k38, *D58 ^ k39, *D59 ^ k40, *D60 ^ k41, D31, D11, D21, D06); 1623 s8(*D59 ^ k42, *D60 ^ k43, *D61 ^ k44, *D62 ^ k45, *D63 ^ k46, *D32 ^ k47, D04, D26, D14, D20); 1624 1625 if (i) KEYSET17 else KEYSET07 1626 1627 s1(*D31 ^ k00, *D00 ^ k01, *D01 ^ k02, *D02 ^ k03, *D03 ^ k04, *D04 ^ k05, D40, D48, D54, D62); 1628 s2(*D03 ^ k06, *D04 ^ k07, *D05 ^ k08, *D06 ^ k09, *D07 ^ k10, *D08 ^ k11, D44, D59, D33, D49); 1629 s3(*D07 ^ k12, *D08 ^ k13, *D09 ^ k14, *D10 ^ k15, *D11 ^ k16, *D12 ^ k17, D55, D47, D61, D37); 1630 s4(*D11 ^ k18, *D12 ^ k19, *D13 ^ k20, *D14 ^ k21, *D15 ^ k22, *D16 ^ k23, D57, D51, D41, D32); 1631 s5(*D15 ^ k24, *D16 ^ k25, *D17 ^ k26, *D18 ^ k27, *D19 ^ k28, *D20 ^ k29, D39, D45, D56, D34); 1632 s6(*D19 ^ k30, *D20 ^ k31, *D21 ^ k32, *D22 ^ k33, *D23 ^ k34, *D24 ^ k35, D35, D60, D42, D50); 1633 s7(*D23 ^ k36, *D24 ^ k37, *D25 ^ k38, *D26 ^ k39, *D27 ^ k40, *D28 ^ k41, D63, D43, D53, D38); 1634 s8(*D27 ^ k42, *D28 ^ k43, *D29 ^ k44, *D30 ^ k45, *D31 ^ k46, *D00 ^ k47, D36, D58, D46, D52); 1635 } 1636} 1637 1638DECLSPEC void transpose32c (u32 *data) 1639{ 1640 #define swap(x,y,j,m) \ 1641 t = ((x) ^ ((y) >> (j))) & (m); \ 1642 (x) = (x) ^ t; \ 1643 (y) = (y) ^ (t << (j)); 1644 1645 u32 t; 1646 1647 swap (data[ 0], data[16], 16, 0x0000ffff); 1648 swap (data[ 1], data[17], 16, 0x0000ffff); 1649 swap (data[ 2], data[18], 16, 0x0000ffff); 1650 swap (data[ 3], data[19], 16, 0x0000ffff); 1651 swap (data[ 4], data[20], 16, 0x0000ffff); 1652 swap (data[ 5], data[21], 16, 0x0000ffff); 1653 swap (data[ 6], data[22], 16, 0x0000ffff); 1654 swap (data[ 7], data[23], 16, 0x0000ffff); 1655 swap (data[ 8], data[24], 16, 0x0000ffff); 1656 swap (data[ 9], data[25], 16, 0x0000ffff); 1657 swap (data[10], data[26], 16, 0x0000ffff); 1658 swap (data[11], data[27], 16, 0x0000ffff); 1659 swap (data[12], data[28], 16, 0x0000ffff); 1660 swap (data[13], data[29], 16, 0x0000ffff); 1661 swap (data[14], data[30], 16, 0x0000ffff); 1662 swap (data[15], data[31], 16, 0x0000ffff); 1663 swap (data[ 0], data[ 8], 8, 0x00ff00ff); 1664 swap (data[ 1], data[ 9], 8, 0x00ff00ff); 1665 swap (data[ 2], data[10], 8, 0x00ff00ff); 1666 swap (data[ 3], data[11], 8, 0x00ff00ff); 1667 swap (data[ 4], data[12], 8, 0x00ff00ff); 1668 swap (data[ 5], data[13], 8, 0x00ff00ff); 1669 swap (data[ 6], data[14], 8, 0x00ff00ff); 1670 swap (data[ 7], data[15], 8, 0x00ff00ff); 1671 swap (data[ 0], data[ 4], 4, 0x0f0f0f0f); 1672 swap (data[ 1], data[ 5], 4, 0x0f0f0f0f); 1673 swap (data[ 2], data[ 6], 4, 0x0f0f0f0f); 1674 swap (data[ 3], data[ 7], 4, 0x0f0f0f0f); 1675 swap (data[ 0], data[ 2], 2, 0x33333333); 1676 swap (data[ 1], data[ 3], 2, 0x33333333); 1677 swap (data[ 0], data[ 1], 1, 0x55555555); 1678 swap (data[ 2], data[ 3], 1, 0x55555555); 1679 swap (data[ 4], data[ 6], 2, 0x33333333); 1680 swap (data[ 5], data[ 7], 2, 0x33333333); 1681 swap (data[ 4], data[ 5], 1, 0x55555555); 1682 swap (data[ 6], data[ 7], 1, 0x55555555); 1683 swap (data[ 8], data[12], 4, 0x0f0f0f0f); 1684 swap (data[ 9], data[13], 4, 0x0f0f0f0f); 1685 swap (data[10], data[14], 4, 0x0f0f0f0f); 1686 swap (data[11], data[15], 4, 0x0f0f0f0f); 1687 swap (data[ 8], data[10], 2, 0x33333333); 1688 swap (data[ 9], data[11], 2, 0x33333333); 1689 swap (data[ 8], data[ 9], 1, 0x55555555); 1690 swap (data[10], data[11], 1, 0x55555555); 1691 swap (data[12], data[14], 2, 0x33333333); 1692 swap (data[13], data[15], 2, 0x33333333); 1693 swap (data[12], data[13], 1, 0x55555555); 1694 swap (data[14], data[15], 1, 0x55555555); 1695 swap (data[16], data[24], 8, 0x00ff00ff); 1696 swap (data[17], data[25], 8, 0x00ff00ff); 1697 swap (data[18], data[26], 8, 0x00ff00ff); 1698 swap (data[19], data[27], 8, 0x00ff00ff); 1699 swap (data[20], data[28], 8, 0x00ff00ff); 1700 swap (data[21], data[29], 8, 0x00ff00ff); 1701 swap (data[22], data[30], 8, 0x00ff00ff); 1702 swap (data[23], data[31], 8, 0x00ff00ff); 1703 swap (data[16], data[20], 4, 0x0f0f0f0f); 1704 swap (data[17], data[21], 4, 0x0f0f0f0f); 1705 swap (data[18], data[22], 4, 0x0f0f0f0f); 1706 swap (data[19], data[23], 4, 0x0f0f0f0f); 1707 swap (data[16], data[18], 2, 0x33333333); 1708 swap (data[17], data[19], 2, 0x33333333); 1709 swap (data[16], data[17], 1, 0x55555555); 1710 swap (data[18], data[19], 1, 0x55555555); 1711 swap (data[20], data[22], 2, 0x33333333); 1712 swap (data[21], data[23], 2, 0x33333333); 1713 swap (data[20], data[21], 1, 0x55555555); 1714 swap (data[22], data[23], 1, 0x55555555); 1715 swap (data[24], data[28], 4, 0x0f0f0f0f); 1716 swap (data[25], data[29], 4, 0x0f0f0f0f); 1717 swap (data[26], data[30], 4, 0x0f0f0f0f); 1718 swap (data[27], data[31], 4, 0x0f0f0f0f); 1719 swap (data[24], data[26], 2, 0x33333333); 1720 swap (data[25], data[27], 2, 0x33333333); 1721 swap (data[24], data[25], 1, 0x55555555); 1722 swap (data[26], data[27], 1, 0x55555555); 1723 swap (data[28], data[30], 2, 0x33333333); 1724 swap (data[29], data[31], 2, 0x33333333); 1725 swap (data[28], data[29], 1, 0x55555555); 1726 swap (data[30], data[31], 1, 0x55555555); 1727} 1728 1729// 1730// transpose bitslice mod : attention race conditions, need different buffers for *in and *out 1731// 1732 1733KERNEL_FQ void m03000_tm (GLOBAL_AS u32 *mod, GLOBAL_AS bs_word_t *words_buf_b) 1734{ 1735 const u64 gid = get_global_id (0); 1736 1737 const u32 block = gid / 32; 1738 const u32 slice = gid % 32; 1739 1740 const u32 w0 = mod[gid]; 1741 1742 for (int i = 0; i < 32; i += 8) 1743 { 1744 hc_atomic_or (&words_buf_b[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice)); 1745 hc_atomic_or (&words_buf_b[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice)); 1746 hc_atomic_or (&words_buf_b[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice)); 1747 hc_atomic_or (&words_buf_b[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice)); 1748 hc_atomic_or (&words_buf_b[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice)); 1749 hc_atomic_or (&words_buf_b[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice)); 1750 hc_atomic_or (&words_buf_b[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice)); 1751 hc_atomic_or (&words_buf_b[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice)); 1752 } 1753} 1754 1755KERNEL_FQ void m03000_mxx (KERN_ATTR_BITSLICE ()) 1756{ 1757 /** 1758 * base 1759 */ 1760 1761 const u64 gid = get_global_id (0); 1762 const u64 lid = get_local_id (0); 1763 1764 if (gid >= gid_max) return; 1765 1766 /** 1767 * base 1768 */ 1769 1770 const u32 w0s = pws[gid].i[0]; 1771 const u32 w1s = pws[gid].i[1]; 1772 1773 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0) 1774 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0) 1775 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0) 1776 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0) 1777 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0) 1778 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0) 1779 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0) 1780 #define K07 (((w0s >> ( 0 + 0)) & 1) ? -1 : 0) 1781 #define K08 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0) 1782 #define K09 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0) 1783 #define K10 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0) 1784 #define K11 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0) 1785 #define K12 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0) 1786 #define K13 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0) 1787 #define K14 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0) 1788 #define K15 (((w0s >> ( 8 + 0)) & 1) ? -1 : 0) 1789 #define K16 (((w0s >> (16 + 7)) & 1) ? -1 : 0) 1790 #define K17 (((w0s >> (16 + 6)) & 1) ? -1 : 0) 1791 #define K18 (((w0s >> (16 + 5)) & 1) ? -1 : 0) 1792 #define K19 (((w0s >> (16 + 4)) & 1) ? -1 : 0) 1793 #define K20 (((w0s >> (16 + 3)) & 1) ? -1 : 0) 1794 #define K21 (((w0s >> (16 + 2)) & 1) ? -1 : 0) 1795 #define K22 (((w0s >> (16 + 1)) & 1) ? -1 : 0) 1796 #define K23 (((w0s >> (16 + 0)) & 1) ? -1 : 0) 1797 #define K24 (((w0s >> (24 + 7)) & 1) ? -1 : 0) 1798 #define K25 (((w0s >> (24 + 6)) & 1) ? -1 : 0) 1799 #define K26 (((w0s >> (24 + 5)) & 1) ? -1 : 0) 1800 #define K27 (((w0s >> (24 + 4)) & 1) ? -1 : 0) 1801 #define K28 (((w0s >> (24 + 3)) & 1) ? -1 : 0) 1802 #define K29 (((w0s >> (24 + 2)) & 1) ? -1 : 0) 1803 #define K30 (((w0s >> (24 + 1)) & 1) ? -1 : 0) 1804 #define K31 (((w0s >> (24 + 0)) & 1) ? -1 : 0) 1805 #define K32 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0) 1806 #define K33 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0) 1807 #define K34 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0) 1808 #define K35 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0) 1809 #define K36 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0) 1810 #define K37 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0) 1811 #define K38 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0) 1812 #define K39 (((w1s >> ( 0 + 0)) & 1) ? -1 : 0) 1813 #define K40 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0) 1814 #define K41 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0) 1815 #define K42 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0) 1816 #define K43 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0) 1817 #define K44 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0) 1818 #define K45 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0) 1819 #define K46 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0) 1820 #define K47 (((w1s >> ( 8 + 0)) & 1) ? -1 : 0) 1821 #define K48 (((w1s >> (16 + 7)) & 1) ? -1 : 0) 1822 #define K49 (((w1s >> (16 + 6)) & 1) ? -1 : 0) 1823 #define K50 (((w1s >> (16 + 5)) & 1) ? -1 : 0) 1824 #define K51 (((w1s >> (16 + 4)) & 1) ? -1 : 0) 1825 #define K52 (((w1s >> (16 + 3)) & 1) ? -1 : 0) 1826 #define K53 (((w1s >> (16 + 2)) & 1) ? -1 : 0) 1827 #define K54 (((w1s >> (16 + 1)) & 1) ? -1 : 0) 1828 #define K55 (((w1s >> (16 + 0)) & 1) ? -1 : 0) 1829 1830 /** 1831 * inner loop 1832 */ 1833 1834 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) 1835 { 1836 u32 k00 = K00; 1837 u32 k01 = K01; 1838 u32 k02 = K02; 1839 u32 k03 = K03; 1840 u32 k04 = K04; 1841 u32 k05 = K05; 1842 u32 k06 = K06; 1843 u32 k07 = K07; 1844 u32 k08 = K08; 1845 u32 k09 = K09; 1846 u32 k10 = K10; 1847 u32 k11 = K11; 1848 u32 k12 = K12; 1849 u32 k13 = K13; 1850 u32 k14 = K14; 1851 u32 k15 = K15; 1852 u32 k16 = K16; 1853 u32 k17 = K17; 1854 u32 k18 = K18; 1855 u32 k19 = K19; 1856 u32 k20 = K20; 1857 u32 k21 = K21; 1858 u32 k22 = K22; 1859 u32 k23 = K23; 1860 u32 k24 = K24; 1861 u32 k25 = K25; 1862 u32 k26 = K26; 1863 u32 k27 = K27; 1864 u32 k28 = K28; 1865 u32 k29 = K29; 1866 u32 k30 = K30; 1867 u32 k31 = K31; 1868 1869 const u32 pc_pos = il_pos / 32; 1870 1871 k00 |= words_buf_s[pc_pos].b[ 0]; 1872 k01 |= words_buf_s[pc_pos].b[ 1]; 1873 k02 |= words_buf_s[pc_pos].b[ 2]; 1874 k03 |= words_buf_s[pc_pos].b[ 3]; 1875 k04 |= words_buf_s[pc_pos].b[ 4]; 1876 k05 |= words_buf_s[pc_pos].b[ 5]; 1877 k06 |= words_buf_s[pc_pos].b[ 6]; 1878 k07 |= words_buf_s[pc_pos].b[ 7]; 1879 k08 |= words_buf_s[pc_pos].b[ 8]; 1880 k09 |= words_buf_s[pc_pos].b[ 9]; 1881 k10 |= words_buf_s[pc_pos].b[10]; 1882 k11 |= words_buf_s[pc_pos].b[11]; 1883 k12 |= words_buf_s[pc_pos].b[12]; 1884 k13 |= words_buf_s[pc_pos].b[13]; 1885 k14 |= words_buf_s[pc_pos].b[14]; 1886 k15 |= words_buf_s[pc_pos].b[15]; 1887 k16 |= words_buf_s[pc_pos].b[16]; 1888 k17 |= words_buf_s[pc_pos].b[17]; 1889 k18 |= words_buf_s[pc_pos].b[18]; 1890 k19 |= words_buf_s[pc_pos].b[19]; 1891 k20 |= words_buf_s[pc_pos].b[20]; 1892 k21 |= words_buf_s[pc_pos].b[21]; 1893 k22 |= words_buf_s[pc_pos].b[22]; 1894 k23 |= words_buf_s[pc_pos].b[23]; 1895 k24 |= words_buf_s[pc_pos].b[24]; 1896 k25 |= words_buf_s[pc_pos].b[25]; 1897 k26 |= words_buf_s[pc_pos].b[26]; 1898 k27 |= words_buf_s[pc_pos].b[27]; 1899 k28 |= words_buf_s[pc_pos].b[28]; 1900 k29 |= words_buf_s[pc_pos].b[29]; 1901 k30 |= words_buf_s[pc_pos].b[30]; 1902 k31 |= words_buf_s[pc_pos].b[31]; 1903 1904 // KGS!@#$% including IP 1905 1906 u32 D00 = 0; 1907 u32 D01 = 0; 1908 u32 D02 = 0; 1909 u32 D03 = 0xffffffff; 1910 u32 D04 = 0; 1911 u32 D05 = 0xffffffff; 1912 u32 D06 = 0xffffffff; 1913 u32 D07 = 0xffffffff; 1914 u32 D08 = 0; 1915 u32 D09 = 0; 1916 u32 D10 = 0; 1917 u32 D11 = 0; 1918 u32 D12 = 0; 1919 u32 D13 = 0xffffffff; 1920 u32 D14 = 0; 1921 u32 D15 = 0; 1922 u32 D16 = 0xffffffff; 1923 u32 D17 = 0xffffffff; 1924 u32 D18 = 0; 1925 u32 D19 = 0; 1926 u32 D20 = 0; 1927 u32 D21 = 0; 1928 u32 D22 = 0xffffffff; 1929 u32 D23 = 0; 1930 u32 D24 = 0xffffffff; 1931 u32 D25 = 0; 1932 u32 D26 = 0xffffffff; 1933 u32 D27 = 0; 1934 u32 D28 = 0xffffffff; 1935 u32 D29 = 0xffffffff; 1936 u32 D30 = 0xffffffff; 1937 u32 D31 = 0xffffffff; 1938 u32 D32 = 0; 1939 u32 D33 = 0; 1940 u32 D34 = 0; 1941 u32 D35 = 0; 1942 u32 D36 = 0; 1943 u32 D37 = 0; 1944 u32 D38 = 0; 1945 u32 D39 = 0; 1946 u32 D40 = 0xffffffff; 1947 u32 D41 = 0xffffffff; 1948 u32 D42 = 0xffffffff; 1949 u32 D43 = 0; 1950 u32 D44 = 0xffffffff; 1951 u32 D45 = 0; 1952 u32 D46 = 0; 1953 u32 D47 = 0; 1954 u32 D48 = 0; 1955 u32 D49 = 0; 1956 u32 D50 = 0; 1957 u32 D51 = 0; 1958 u32 D52 = 0; 1959 u32 D53 = 0; 1960 u32 D54 = 0; 1961 u32 D55 = 0xffffffff; 1962 u32 D56 = 0; 1963 u32 D57 = 0; 1964 u32 D58 = 0xffffffff; 1965 u32 D59 = 0; 1966 u32 D60 = 0; 1967 u32 D61 = 0xffffffff; 1968 u32 D62 = 0xffffffff; 1969 u32 D63 = 0xffffffff; 1970 1971 DES 1972 ( 1973 k00, k01, k02, k03, k04, k05, k06, 1974 k07, k08, k09, k10, k11, k12, k13, 1975 k14, k15, k16, k17, k18, k19, k20, 1976 k21, k22, k23, k24, k25, k26, k27, 1977 k28, k29, k30, k31, K32, K33, K34, 1978 K35, K36, K37, K38, K39, K40, K41, 1979 K42, K43, K44, K45, K46, K47, K48, 1980 K49, K50, K51, K52, K53, K54, K55, 1981 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, 1982 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, 1983 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, 1984 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, 1985 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, 1986 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, 1987 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, 1988 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 1989 ); 1990 1991 u32 out[64]; 1992 1993 out[ 0] = D00; 1994 out[ 1] = D01; 1995 out[ 2] = D02; 1996 out[ 3] = D03; 1997 out[ 4] = D04; 1998 out[ 5] = D05; 1999 out[ 6] = D06; 2000 out[ 7] = D07; 2001 out[ 8] = D08; 2002 out[ 9] = D09; 2003 out[10] = D10; 2004 out[11] = D11; 2005 out[12] = D12; 2006 out[13] = D13; 2007 out[14] = D14; 2008 out[15] = D15; 2009 out[16] = D16; 2010 out[17] = D17; 2011 out[18] = D18; 2012 out[19] = D19; 2013 out[20] = D20; 2014 out[21] = D21; 2015 out[22] = D22; 2016 out[23] = D23; 2017 out[24] = D24; 2018 out[25] = D25; 2019 out[26] = D26; 2020 out[27] = D27; 2021 out[28] = D28; 2022 out[29] = D29; 2023 out[30] = D30; 2024 out[31] = D31; 2025 out[32] = D32; 2026 out[33] = D33; 2027 out[34] = D34; 2028 out[35] = D35; 2029 out[36] = D36; 2030 out[37] = D37; 2031 out[38] = D38; 2032 out[39] = D39; 2033 out[40] = D40; 2034 out[41] = D41; 2035 out[42] = D42; 2036 out[43] = D43; 2037 out[44] = D44; 2038 out[45] = D45; 2039 out[46] = D46; 2040 out[47] = D47; 2041 out[48] = D48; 2042 out[49] = D49; 2043 out[50] = D50; 2044 out[51] = D51; 2045 out[52] = D52; 2046 out[53] = D53; 2047 out[54] = D54; 2048 out[55] = D55; 2049 out[56] = D56; 2050 out[57] = D57; 2051 out[58] = D58; 2052 out[59] = D59; 2053 out[60] = D60; 2054 out[61] = D61; 2055 out[62] = D62; 2056 out[63] = D63; 2057 2058 if (digests_cnt < 16) 2059 { 2060 for (u32 d = 0; d < digests_cnt; d++) 2061 { 2062 const u32 final_hash_pos = DIGESTS_OFFSET + d; 2063 2064 if (hashes_shown[final_hash_pos]) continue; 2065 2066 u32 search[2]; 2067 2068 search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; 2069 search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; 2070 2071 u32 tmpResult = 0; 2072 2073 #pragma unroll 2074 for (int i = 0; i < 32; i++) 2075 { 2076 const u32 b0 = -((search[0] >> i) & 1); 2077 const u32 b1 = -((search[1] >> i) & 1); 2078 2079 tmpResult |= out[ 0 + i] ^ b0; 2080 tmpResult |= out[32 + i] ^ b1; 2081 } 2082 2083 if (tmpResult == 0xffffffff) continue; 2084 2085 const u32 slice = ffz (tmpResult); 2086 2087 const u32 r0 = search[0]; 2088 const u32 r1 = search[1]; 2089 #ifdef KERNEL_STATIC 2090 const u32 r2 = 0; 2091 const u32 r3 = 0; 2092 #endif 2093 2094 #include COMPARE_M 2095 } 2096 } 2097 else 2098 { 2099 u32 out0[32]; 2100 u32 out1[32]; 2101 2102 #pragma unroll 2103 for (int i = 0; i < 32; i++) 2104 { 2105 out0[i] = out[ 0 + 31 - i]; 2106 out1[i] = out[32 + 31 - i]; 2107 } 2108 2109 transpose32c (out0); 2110 transpose32c (out1); 2111 2112 #pragma unroll 2113 for (int slice = 0; slice < 32; slice++) 2114 { 2115 const u32 r0 = out0[31 - slice]; 2116 const u32 r1 = out1[31 - slice]; 2117 #ifdef KERNEL_STATIC 2118 const u32 r2 = 0; 2119 const u32 r3 = 0; 2120 #endif 2121 2122 #include COMPARE_M 2123 } 2124 } 2125 } 2126} 2127 2128KERNEL_FQ void m03000_sxx (KERN_ATTR_BITSLICE ()) 2129{ 2130 /** 2131 * base 2132 */ 2133 2134 const u64 gid = get_global_id (0); 2135 const u64 lid = get_local_id (0); 2136 2137 if (gid >= gid_max) return; 2138 2139 /** 2140 * digest 2141 */ 2142 2143 const u32 s0 = digests_buf[0].digest_buf[0]; 2144 const u32 s1 = digests_buf[0].digest_buf[1]; 2145 2146 const u32 S00 = (((s0 >> 0) & 1) ? -1 : 0); 2147 const u32 S01 = (((s0 >> 1) & 1) ? -1 : 0); 2148 const u32 S02 = (((s0 >> 2) & 1) ? -1 : 0); 2149 const u32 S03 = (((s0 >> 3) & 1) ? -1 : 0); 2150 const u32 S04 = (((s0 >> 4) & 1) ? -1 : 0); 2151 const u32 S05 = (((s0 >> 5) & 1) ? -1 : 0); 2152 const u32 S06 = (((s0 >> 6) & 1) ? -1 : 0); 2153 const u32 S07 = (((s0 >> 7) & 1) ? -1 : 0); 2154 const u32 S08 = (((s0 >> 8) & 1) ? -1 : 0); 2155 const u32 S09 = (((s0 >> 9) & 1) ? -1 : 0); 2156 const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0); 2157 const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0); 2158 const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0); 2159 const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0); 2160 const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0); 2161 const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0); 2162 const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0); 2163 const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0); 2164 const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0); 2165 const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0); 2166 const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0); 2167 const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0); 2168 const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0); 2169 const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0); 2170 const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0); 2171 const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0); 2172 const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0); 2173 const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0); 2174 const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0); 2175 const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0); 2176 const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0); 2177 const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0); 2178 const u32 S32 = (((s1 >> 0) & 1) ? -1 : 0); 2179 const u32 S33 = (((s1 >> 1) & 1) ? -1 : 0); 2180 const u32 S34 = (((s1 >> 2) & 1) ? -1 : 0); 2181 const u32 S35 = (((s1 >> 3) & 1) ? -1 : 0); 2182 const u32 S36 = (((s1 >> 4) & 1) ? -1 : 0); 2183 const u32 S37 = (((s1 >> 5) & 1) ? -1 : 0); 2184 const u32 S38 = (((s1 >> 6) & 1) ? -1 : 0); 2185 const u32 S39 = (((s1 >> 7) & 1) ? -1 : 0); 2186 const u32 S40 = (((s1 >> 8) & 1) ? -1 : 0); 2187 const u32 S41 = (((s1 >> 9) & 1) ? -1 : 0); 2188 const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0); 2189 const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0); 2190 const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0); 2191 const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0); 2192 const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0); 2193 const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0); 2194 const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0); 2195 const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0); 2196 const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0); 2197 const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0); 2198 const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0); 2199 const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0); 2200 const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0); 2201 const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0); 2202 const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0); 2203 const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0); 2204 const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0); 2205 const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0); 2206 const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0); 2207 const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0); 2208 const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0); 2209 const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0); 2210 2211 /** 2212 * base 2213 */ 2214 2215 const u32 w0s = pws[gid].i[0]; 2216 const u32 w1s = pws[gid].i[1]; 2217 2218 #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0) 2219 #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0) 2220 #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0) 2221 #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0) 2222 #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0) 2223 #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0) 2224 #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0) 2225 #define K07 (((w0s >> ( 0 + 0)) & 1) ? -1 : 0) 2226 #define K08 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0) 2227 #define K09 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0) 2228 #define K10 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0) 2229 #define K11 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0) 2230 #define K12 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0) 2231 #define K13 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0) 2232 #define K14 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0) 2233 #define K15 (((w0s >> ( 8 + 0)) & 1) ? -1 : 0) 2234 #define K16 (((w0s >> (16 + 7)) & 1) ? -1 : 0) 2235 #define K17 (((w0s >> (16 + 6)) & 1) ? -1 : 0) 2236 #define K18 (((w0s >> (16 + 5)) & 1) ? -1 : 0) 2237 #define K19 (((w0s >> (16 + 4)) & 1) ? -1 : 0) 2238 #define K20 (((w0s >> (16 + 3)) & 1) ? -1 : 0) 2239 #define K21 (((w0s >> (16 + 2)) & 1) ? -1 : 0) 2240 #define K22 (((w0s >> (16 + 1)) & 1) ? -1 : 0) 2241 #define K23 (((w0s >> (16 + 0)) & 1) ? -1 : 0) 2242 #define K24 (((w0s >> (24 + 7)) & 1) ? -1 : 0) 2243 #define K25 (((w0s >> (24 + 6)) & 1) ? -1 : 0) 2244 #define K26 (((w0s >> (24 + 5)) & 1) ? -1 : 0) 2245 #define K27 (((w0s >> (24 + 4)) & 1) ? -1 : 0) 2246 #define K28 (((w0s >> (24 + 3)) & 1) ? -1 : 0) 2247 #define K29 (((w0s >> (24 + 2)) & 1) ? -1 : 0) 2248 #define K30 (((w0s >> (24 + 1)) & 1) ? -1 : 0) 2249 #define K31 (((w0s >> (24 + 0)) & 1) ? -1 : 0) 2250 #define K32 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0) 2251 #define K33 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0) 2252 #define K34 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0) 2253 #define K35 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0) 2254 #define K36 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0) 2255 #define K37 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0) 2256 #define K38 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0) 2257 #define K39 (((w1s >> ( 0 + 0)) & 1) ? -1 : 0) 2258 #define K40 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0) 2259 #define K41 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0) 2260 #define K42 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0) 2261 #define K43 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0) 2262 #define K44 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0) 2263 #define K45 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0) 2264 #define K46 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0) 2265 #define K47 (((w1s >> ( 8 + 0)) & 1) ? -1 : 0) 2266 #define K48 (((w1s >> (16 + 7)) & 1) ? -1 : 0) 2267 #define K49 (((w1s >> (16 + 6)) & 1) ? -1 : 0) 2268 #define K50 (((w1s >> (16 + 5)) & 1) ? -1 : 0) 2269 #define K51 (((w1s >> (16 + 4)) & 1) ? -1 : 0) 2270 #define K52 (((w1s >> (16 + 3)) & 1) ? -1 : 0) 2271 #define K53 (((w1s >> (16 + 2)) & 1) ? -1 : 0) 2272 #define K54 (((w1s >> (16 + 1)) & 1) ? -1 : 0) 2273 #define K55 (((w1s >> (16 + 0)) & 1) ? -1 : 0) 2274 2275 /** 2276 * inner loop 2277 */ 2278 2279 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += 32) 2280 { 2281 u32 k00 = K00; 2282 u32 k01 = K01; 2283 u32 k02 = K02; 2284 u32 k03 = K03; 2285 u32 k04 = K04; 2286 u32 k05 = K05; 2287 u32 k06 = K06; 2288 u32 k07 = K07; 2289 u32 k08 = K08; 2290 u32 k09 = K09; 2291 u32 k10 = K10; 2292 u32 k11 = K11; 2293 u32 k12 = K12; 2294 u32 k13 = K13; 2295 u32 k14 = K14; 2296 u32 k15 = K15; 2297 u32 k16 = K16; 2298 u32 k17 = K17; 2299 u32 k18 = K18; 2300 u32 k19 = K19; 2301 u32 k20 = K20; 2302 u32 k21 = K21; 2303 u32 k22 = K22; 2304 u32 k23 = K23; 2305 u32 k24 = K24; 2306 u32 k25 = K25; 2307 u32 k26 = K26; 2308 u32 k27 = K27; 2309 u32 k28 = K28; 2310 u32 k29 = K29; 2311 u32 k30 = K30; 2312 u32 k31 = K31; 2313 2314 const u32 pc_pos = il_pos / 32; 2315 2316 k00 |= words_buf_s[pc_pos].b[ 0]; 2317 k01 |= words_buf_s[pc_pos].b[ 1]; 2318 k02 |= words_buf_s[pc_pos].b[ 2]; 2319 k03 |= words_buf_s[pc_pos].b[ 3]; 2320 k04 |= words_buf_s[pc_pos].b[ 4]; 2321 k05 |= words_buf_s[pc_pos].b[ 5]; 2322 k06 |= words_buf_s[pc_pos].b[ 6]; 2323 k07 |= words_buf_s[pc_pos].b[ 7]; 2324 k08 |= words_buf_s[pc_pos].b[ 8]; 2325 k09 |= words_buf_s[pc_pos].b[ 9]; 2326 k10 |= words_buf_s[pc_pos].b[10]; 2327 k11 |= words_buf_s[pc_pos].b[11]; 2328 k12 |= words_buf_s[pc_pos].b[12]; 2329 k13 |= words_buf_s[pc_pos].b[13]; 2330 k14 |= words_buf_s[pc_pos].b[14]; 2331 k15 |= words_buf_s[pc_pos].b[15]; 2332 k16 |= words_buf_s[pc_pos].b[16]; 2333 k17 |= words_buf_s[pc_pos].b[17]; 2334 k18 |= words_buf_s[pc_pos].b[18]; 2335 k19 |= words_buf_s[pc_pos].b[19]; 2336 k20 |= words_buf_s[pc_pos].b[20]; 2337 k21 |= words_buf_s[pc_pos].b[21]; 2338 k22 |= words_buf_s[pc_pos].b[22]; 2339 k23 |= words_buf_s[pc_pos].b[23]; 2340 k24 |= words_buf_s[pc_pos].b[24]; 2341 k25 |= words_buf_s[pc_pos].b[25]; 2342 k26 |= words_buf_s[pc_pos].b[26]; 2343 k27 |= words_buf_s[pc_pos].b[27]; 2344 k28 |= words_buf_s[pc_pos].b[28]; 2345 k29 |= words_buf_s[pc_pos].b[29]; 2346 k30 |= words_buf_s[pc_pos].b[30]; 2347 k31 |= words_buf_s[pc_pos].b[31]; 2348 2349 // KGS!@#$% including IP 2350 2351 u32 D00 = 0; 2352 u32 D01 = 0; 2353 u32 D02 = 0; 2354 u32 D03 = 0xffffffff; 2355 u32 D04 = 0; 2356 u32 D05 = 0xffffffff; 2357 u32 D06 = 0xffffffff; 2358 u32 D07 = 0xffffffff; 2359 u32 D08 = 0; 2360 u32 D09 = 0; 2361 u32 D10 = 0; 2362 u32 D11 = 0; 2363 u32 D12 = 0; 2364 u32 D13 = 0xffffffff; 2365 u32 D14 = 0; 2366 u32 D15 = 0; 2367 u32 D16 = 0xffffffff; 2368 u32 D17 = 0xffffffff; 2369 u32 D18 = 0; 2370 u32 D19 = 0; 2371 u32 D20 = 0; 2372 u32 D21 = 0; 2373 u32 D22 = 0xffffffff; 2374 u32 D23 = 0; 2375 u32 D24 = 0xffffffff; 2376 u32 D25 = 0; 2377 u32 D26 = 0xffffffff; 2378 u32 D27 = 0; 2379 u32 D28 = 0xffffffff; 2380 u32 D29 = 0xffffffff; 2381 u32 D30 = 0xffffffff; 2382 u32 D31 = 0xffffffff; 2383 u32 D32 = 0; 2384 u32 D33 = 0; 2385 u32 D34 = 0; 2386 u32 D35 = 0; 2387 u32 D36 = 0; 2388 u32 D37 = 0; 2389 u32 D38 = 0; 2390 u32 D39 = 0; 2391 u32 D40 = 0xffffffff; 2392 u32 D41 = 0xffffffff; 2393 u32 D42 = 0xffffffff; 2394 u32 D43 = 0; 2395 u32 D44 = 0xffffffff; 2396 u32 D45 = 0; 2397 u32 D46 = 0; 2398 u32 D47 = 0; 2399 u32 D48 = 0; 2400 u32 D49 = 0; 2401 u32 D50 = 0; 2402 u32 D51 = 0; 2403 u32 D52 = 0; 2404 u32 D53 = 0; 2405 u32 D54 = 0; 2406 u32 D55 = 0xffffffff; 2407 u32 D56 = 0; 2408 u32 D57 = 0; 2409 u32 D58 = 0xffffffff; 2410 u32 D59 = 0; 2411 u32 D60 = 0; 2412 u32 D61 = 0xffffffff; 2413 u32 D62 = 0xffffffff; 2414 u32 D63 = 0xffffffff; 2415 2416 DES 2417 ( 2418 k00, k01, k02, k03, k04, k05, k06, 2419 k07, k08, k09, k10, k11, k12, k13, 2420 k14, k15, k16, k17, k18, k19, k20, 2421 k21, k22, k23, k24, k25, k26, k27, 2422 k28, k29, k30, k31, K32, K33, K34, 2423 K35, K36, K37, K38, K39, K40, K41, 2424 K42, K43, K44, K45, K46, K47, K48, 2425 K49, K50, K51, K52, K53, K54, K55, 2426 &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07, 2427 &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15, 2428 &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23, 2429 &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31, 2430 &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39, 2431 &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47, 2432 &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55, 2433 &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63 2434 ); 2435 2436 u32 tmpResult = 0; 2437 2438 tmpResult |= D00 ^ S00; 2439 tmpResult |= D01 ^ S01; 2440 tmpResult |= D02 ^ S02; 2441 tmpResult |= D03 ^ S03; 2442 tmpResult |= D04 ^ S04; 2443 tmpResult |= D05 ^ S05; 2444 tmpResult |= D06 ^ S06; 2445 tmpResult |= D07 ^ S07; 2446 tmpResult |= D08 ^ S08; 2447 tmpResult |= D09 ^ S09; 2448 tmpResult |= D10 ^ S10; 2449 tmpResult |= D11 ^ S11; 2450 tmpResult |= D12 ^ S12; 2451 tmpResult |= D13 ^ S13; 2452 tmpResult |= D14 ^ S14; 2453 tmpResult |= D15 ^ S15; 2454 2455 if (tmpResult == 0xffffffff) continue; 2456 2457 tmpResult |= D16 ^ S16; 2458 tmpResult |= D17 ^ S17; 2459 tmpResult |= D18 ^ S18; 2460 tmpResult |= D19 ^ S19; 2461 tmpResult |= D20 ^ S20; 2462 tmpResult |= D21 ^ S21; 2463 tmpResult |= D22 ^ S22; 2464 tmpResult |= D23 ^ S23; 2465 tmpResult |= D24 ^ S24; 2466 tmpResult |= D25 ^ S25; 2467 tmpResult |= D26 ^ S26; 2468 tmpResult |= D27 ^ S27; 2469 tmpResult |= D28 ^ S28; 2470 tmpResult |= D29 ^ S29; 2471 tmpResult |= D30 ^ S30; 2472 tmpResult |= D31 ^ S31; 2473 2474 if (tmpResult == 0xffffffff) continue; 2475 2476 tmpResult |= D32 ^ S32; 2477 tmpResult |= D33 ^ S33; 2478 tmpResult |= D34 ^ S34; 2479 tmpResult |= D35 ^ S35; 2480 tmpResult |= D36 ^ S36; 2481 tmpResult |= D37 ^ S37; 2482 tmpResult |= D38 ^ S38; 2483 tmpResult |= D39 ^ S39; 2484 tmpResult |= D40 ^ S40; 2485 tmpResult |= D41 ^ S41; 2486 tmpResult |= D42 ^ S42; 2487 tmpResult |= D43 ^ S43; 2488 tmpResult |= D44 ^ S44; 2489 tmpResult |= D45 ^ S45; 2490 tmpResult |= D46 ^ S46; 2491 tmpResult |= D47 ^ S47; 2492 2493 if (tmpResult == 0xffffffff) continue; 2494 2495 tmpResult |= D48 ^ S48; 2496 tmpResult |= D49 ^ S49; 2497 tmpResult |= D50 ^ S50; 2498 tmpResult |= D51 ^ S51; 2499 tmpResult |= D52 ^ S52; 2500 tmpResult |= D53 ^ S53; 2501 tmpResult |= D54 ^ S54; 2502 tmpResult |= D55 ^ S55; 2503 tmpResult |= D56 ^ S56; 2504 tmpResult |= D57 ^ S57; 2505 tmpResult |= D58 ^ S58; 2506 tmpResult |= D59 ^ S59; 2507 tmpResult |= D60 ^ S60; 2508 tmpResult |= D61 ^ S61; 2509 tmpResult |= D62 ^ S62; 2510 tmpResult |= D63 ^ S63; 2511 2512 if (tmpResult == 0xffffffff) continue; 2513 2514 const u32 slice = ffz (tmpResult); 2515 2516 #ifdef KERNEL_STATIC 2517 #include COMPARE_S 2518 #endif 2519 } 2520} 2521