1 /*
2 * This file is part of John the Ripper password cracker,
3 * based on rawSHA256_fmt.c code and Drepper's spec at
4 * http://www.akkadia.org/drepper/SHA-crypt.txt
5 *
6 * This software is Copyright (c) 2012 magnum, and it is hereby released to the
7 * general public under the following terms: Redistribution and use in source
8 * and binary forms, with or without modification, are permitted.
9 *
10 * See code/comments in cryptsha256 for how and why this is being done. NOTE,
11 * we could limit ourselves to 15 byte password, and then only need 1 limb
12 * SHA512 SIMD logic. If we allow 2 limb logic then 79 byte passwords are max.
13 * this is better than cryptsha256, where if we only allowed 1 limb, then only
14 * 3 btye passwords would have been max, and even at 2 limbs, 35 byte passwords
15 * are the longest we can do.
16 *
17 * Porting to SSE2, May 2015, JimF. A little harder than some, since we have to
18 * group and rearrange passwords based upon length. We must only run passwords
19 * of a specific block group size in 1 SSE_COEF_SHA512 bundle. If we later do
20 * PARA_SHA512, then each bundle of SSE_COEF_SHA512*PARA_SHA512 will have to be
21 * made up of passwords of same block group size.
22 *
23 * Here are the block sizes per password length. To be equal group size, all
24 * numbers for 2 passwords must be equal all the way across. So, password
25 * lengths of 0, 1, ... 15 are 1 group. 16..23 are another group. 24..31 are
26 * yet another, etc. There are 5 'groups' of lengths.
27 *
28 * Here is the raw block length data. Only first and last length for the group has been kept.
29 Len: cp pspc cspp ppc cpp psc csp pc
30 0 : 1 1 1 1 1 1 1 1
31 15 : 1 1 1 1 1 1 1 1
32 16 : 1 2 2 1 1 1 1 1
33 23 : 1 2 2 1 1 1 1 1
34 24 : 1 2 2 2 2 1 1 1
35 31 : 1 2 2 2 2 1 1 1
36 32 : 1 2 2 2 2 2 2 1
37 47 : 1 2 2 2 2 2 2 1
38 48 : 2 2 2 2 2 2 2 2
39 79 : 2 2 2 2 2 2 2 2
40 Source to make above table (made up to 90,but over 79 is 3 limbs)
41 #include <stdio.h>
42 int c=64, s=16;
43 int S(int sz) {
44 if (sz<=111) return 1;
45 else if (sz <= 111+128) return 2;
46 else return 3;
47 }
48 void proc(int p) {
49 int cp=p+c;
50 printf("%-2d : %d %d %d %d %d %d %d %d\n",
51 p,S(cp),S(cp+s+p),S(cp+s+p),S(cp+p),S(cp+p),S(cp+s),S(cp+s),S(cp));
52 }
53 void main(int argc, char **argv) {
54 int i;
55 if (argc==2) s=atoi(argv[1]);
56 printf("Len: cp pspc cspp ppc cpp psc csp pc (saltlen=%d)\n",s);
57 for (i = 0; i < 90; ++i)
58 proc(i);
59 }
60 */
61
62 #if FMT_EXTERNS_H
63 extern struct fmt_main fmt_cryptsha512;
64 #elif FMT_REGISTERS_H
65 john_register_one(&fmt_cryptsha512);
66 #else
67
68 #define _GNU_SOURCE 1
69 #include <string.h>
70
71 #ifdef _OPENMP
72 #include <omp.h>
73 #endif
74
75 #include "arch.h"
76 #include "sha2.h"
77 #include "params.h"
78 #include "common.h"
79 #include "formats.h"
80 #include "johnswap.h"
81 #include "simd-intrinsics.h"
82
83 #ifndef OMP_SCALE
84 #define OMP_SCALE 1 // This and MKPC tuned for core i7
85 #endif
86
87 // NOTE, in SSE mode, even if NOT in OMP, we may need to scale, quite a bit, due to needing
88 // to 'group' passwords differently, so that we have lengths which 'share' the same number
89 // of crypt block counts for each 'type'. We may want to scale as much as 128 or so, just
90 // to try to have better saturation. If we only had 8 passwords given to us, and they were
91 // one each of these lengths: 3 7 8 12 13 14 15 21, in theory, we could do this
92 // with only 2 SSE calls (SIMD_COEF_32==4 for SHA256). However, length 3 has to to run by itself,
93 // length 7 by itself, 8 by itself, and the rest can run together, but there are 5 of them,
94 // so it takes to runs. So, instead of 2 runs, we have to do 5 runs. Not very efficient.
95 // however, if we have a lot more passwords to work with, we can re-arrange them, to run
96 // them in groups that all 'fit' together, and do so until we exhaust all from a given length
97 // range, then do all in the next range. Thus, until we get to the last set within a length
98 // range, we are doing a fully packed SSE run, and having a LOT less wasted space. This will
99 // get even more interesting, when we start doing OMP, but it should just be the same principal,
100 // preload more passwords, and group them, then run the OMP threads over a single length, then
101 // go to the next length, until done, trying to keep each thread running, and keeping each block
102 // of SSE data full, until the last in a range. We probably can simply build all the rearrangments,
103 // then let the threads go on ALL data, without caring about the length, since each thread will only
104 // be working on passwords in a single MMX buffer that all match, at any given moment.
105 #ifdef SIMD_COEF_64
106 #define SIMD_COEF_SCALE 32
107 #else
108 #define SIMD_COEF_SCALE 1
109 #endif
110
111 #define FORMAT_LABEL "sha512crypt"
112
113 #ifdef SIMD_COEF_64
114 #define ALGORITHM_NAME SHA512_ALGORITHM_NAME
115 #else
116 #if ARCH_BITS >= 64
117 #define ALGORITHM_NAME "64/" ARCH_BITS_STR SHA2_LIB
118 #else
119 #define ALGORITHM_NAME "32/" ARCH_BITS_STR SHA2_LIB
120 #endif
121 #endif
122
123 // 79 is max length we can do in 2 SIMD limbs, so just make it 79 always.
124 #define PLAINTEXT_LENGTH 79
125
126 #define BINARY_ALIGN 4
127 #define SALT_SIZE sizeof(struct saltstruct)
128 #define SALT_ALIGN 4
129
130 #ifdef SIMD_COEF_64
131 #define MIN_KEYS_PER_CRYPT (SIMD_COEF_64*SIMD_PARA_SHA512)
132 #define MAX_KEYS_PER_CRYPT (SIMD_COEF_64*SIMD_PARA_SHA512)
133 #if ARCH_LITTLE_ENDIAN==1
134 #define GETPOS(i, index) ( (index&(SIMD_COEF_64-1))*8 + ((i)&(0xffffffff-7))*SIMD_COEF_64 + (7-((i)&7)) + (unsigned int)index/SIMD_COEF_64*SHA_BUF_SIZ*SIMD_COEF_64*8 )
135 #else
136 #define GETPOS(i, index) ( (index&(SIMD_COEF_64-1))*8 + ((i)&(0xffffffff-7))*SIMD_COEF_64 + ((i)&7) + (unsigned int)index/SIMD_COEF_64*SHA_BUF_SIZ*SIMD_COEF_64*8 )
137 #endif
138 #else
139 #define MIN_KEYS_PER_CRYPT 1
140 #define MAX_KEYS_PER_CRYPT 1
141 #endif
142
143 // these MUST be defined prior to loading cryptsha512_valid.h
144 #define BINARY_SIZE 64
145 #define SALT_LENGTH 16
146 #define CIPHERTEXT_LENGTH 86
147 #define __CRYPTSHA512_CREATE_PROPER_TESTS_ARRAY__
148 #include "sha512crypt_common.h"
149
150 #define BLKS MIN_KEYS_PER_CRYPT
151
152 /* This structure is 'pre-loaded' with the keyspace of all possible crypts which */
153 /* will be performed WITHIN the inner loop. There are 8 possible buffers that */
154 /* are used. They are cp, pspc, cspp, ppc, cpp, psc, csp, and pc, where p stands */
155 /* for the 'hash' built from the password (and it is the same length as the */
156 /* password), s stands for the hash built from the salt (same size as salt), and */
157 /* c stands for the crypt results from the prior loop. There are 8 possible */
158 /* buffer layouts listed, but they fall into a pattern that is 42 long (2*3*7) */
159 /* this structure encapsulates this. we build this buffer, after computing the */
160 /* s hash, the p hash, and the starting c values. Then, within the inner loop, */
161 /* we simply spin through this structure, calling the SHA512 code to do the work. */
162 /* NOTE, most of the time, there will be 1 block and 2 block crypts. As the */
163 /* the password length grows, the more 2 block crypts there are, thus slower */
164 /**/
165 /* for SSE only, but 'could' be done for sha2.c code (jtr sha2) */
166 /* This keyspace was changed, to be put into BE at the start, and then we never */
167 /* do any swapping, but keep it in BE format from that point on. To do this, we */
168 /* changed the pointers to be a pointer to the start of the block, AND an offset */
169 /* for SSE, we need a pointer to the start of the block[0], and the offset. The */
170 /* index needed will be known in the crypt_all. This means we need something */
171 /* similar to out GET_POS macros, but also for oSSL formats. */
172 /* To do this, we have to use the JtR sha2.c functions, since there is this func: */
173 /* sha512_hash_block(&CTX, data, int perform_endian_swap). So if we set the last */
174 /* param to 0, we can call this function, and it will avoid the byte swapping */
175 typedef struct cryptloopstruct_t {
176 unsigned char buf[8*2*128*BLKS]; // will allocate to hold 42 2 block buffers (42 * 2 * 128) Reduced to only requiring 8*2*128
177 // now, the cryptstructs are on the stack within the crypt for loop, so we avoid allocation.
178 // and to avoid the single static variable, or a static array.
179 unsigned char *bufs[BLKS][42]; // points to the start of each 2 block buffer.
180 #ifdef SIMD_COEF_64
181 int offs[BLKS][42];
182 #endif
183 unsigned char *cptr[BLKS][42]; // points to where we copy the crypt pointer for next round.
184 // Round 0 points to somewhere in round 1's buffer, etc.
185 int datlen[42]; // if 1, then this is a small, only 1 block crypt. Some rounds for shorter passwords take only 1 crypt block.
186 // NOTE, datlen could be changed to a number, and then we could do > 2 block crypts. Would take a little
187 // more memory (and longer PW's certainly DO take more time), but it should work fine. It may be an issue
188 // especially when doing OMP, that the memory footprint of this 'hot' inner loop simply gets too big, and
189 // things slow down. For now, we are limiting ourselves to 35 byte password, which fits into 2 SHA512 buffers
190 } cryptloopstruct;
191
192 static int (*saved_len);
193 static char (*saved_key)[PLAINTEXT_LENGTH + 1];
194 static uint32_t (*crypt_out)[BINARY_SIZE / sizeof(uint32_t)];
195
196 /* these 2 values are used in setup of the cryptloopstruct, AND to do our SHA512_Init() calls, in the inner loop */
197 static const unsigned char padding[256] = { 0x80, 0 /* 0,0,0,0.... */ };
198 #if !defined(JTR_INC_COMMON_CRYPTO_SHA2) && !defined (SIMD_COEF_64)
199 static const uint64_t ctx_init[8] =
200 {0x6A09E667F3BCC908ULL,0xBB67AE8584CAA73BULL,0x3C6EF372FE94F82BULL,0xA54FF53A5F1D36F1ULL,0x510E527FADE682D1ULL,0x9B05688C2B3E6C1FULL,0x1F83D9ABFB41BD6BULL,0x5BE0CD19137E2179ULL};
201 #endif
202
203 static struct saltstruct {
204 unsigned int len;
205 unsigned int rounds;
206 unsigned char salt[SALT_LENGTH];
207 } *cur_salt;
208
init(struct fmt_main * self)209 static void init(struct fmt_main *self)
210 {
211 omp_autotune(self, OMP_SCALE);
212
213 self->params.max_keys_per_crypt *= SIMD_COEF_SCALE;
214
215 // we allocate 1 more than needed, and use that 'extra' value as a zero
216 // length PW to fill in the tail groups in MMX mode.
217 saved_len = mem_calloc(1 + self->params.max_keys_per_crypt, sizeof(*saved_len));
218 saved_key = mem_calloc(1 + self->params.max_keys_per_crypt, sizeof(*saved_key));
219 crypt_out = mem_calloc(1 + self->params.max_keys_per_crypt, sizeof(*crypt_out));
220 }
221
done(void)222 static void done(void)
223 {
224 MEM_FREE(crypt_out);
225 MEM_FREE(saved_key);
226 MEM_FREE(saved_len);
227 }
228
229 #define COMMON_GET_HASH_VAR crypt_out
230 #include "common-get-hash.h"
231
set_key(char * key,int index)232 static void set_key(char *key, int index)
233 {
234 saved_len[index] = strnzcpyn(saved_key[index], key, sizeof(*saved_key));
235 }
236
get_key(int index)237 static char *get_key(int index)
238 {
239 saved_key[index][saved_len[index]] = 0;
240 return saved_key[index];
241 }
242
243 /*
244 These are the 8 types of buffers this algorithm uses:
245 cp
246 pspc
247 cspp
248 ppc
249 cpp
250 psc
251 csp
252 pc
253 */
LoadCryptStruct(cryptloopstruct * crypt_struct,int index,int idx,char * p_bytes,char * s_bytes)254 static void LoadCryptStruct(cryptloopstruct *crypt_struct, int index, int idx, char *p_bytes, char *s_bytes) {
255 unsigned len_pc, len_ppsc, len_ppc, len_psc; // length of 'data'
256 unsigned tot_pc, tot_ppsc, tot_ppc, tot_psc; // length of entire block to crypt (128 or 256)
257 unsigned off_pc, off_pspc, off_ppc, off_psc; // offset to the crypt ptr for these 4 'types'.
258 unsigned dlen_pc, dlen_ppsc, dlen_ppc, dlen_psc; // is this 1 or 2 block (or actual len for CommonCrypto, since it uses SHA512_Final()
259 unsigned plen=saved_len[index];
260 unsigned char *cp = crypt_struct->buf;
261 cryptloopstruct *pstr = crypt_struct;
262 #ifdef SIMD_COEF_64
263 // in SSE mode, we FORCE every buffer to be 2 blocks, even if it COULD fit into 1.
264 // Then we simply use the 2 block SSE code.
265 unsigned char *next_cp;
266 cp += idx*2*128;
267 #endif
268
269 len_pc = plen + BINARY_SIZE;
270 len_ppsc = (plen<<1) + cur_salt->len + BINARY_SIZE;
271 len_ppc = (plen<<1) + BINARY_SIZE;
272 len_psc = plen + cur_salt->len + BINARY_SIZE;
273
274 #ifdef JTR_INC_COMMON_CRYPTO_SHA2
275 if (len_pc <=111) tot_pc =128; else tot_pc =256;
276 if (len_ppsc<=111) tot_ppsc=128; else tot_ppsc=256;
277 if (len_ppc <=111) tot_ppc =128; else tot_ppc =256;
278 if (len_psc <=111) tot_psc =128; else tot_psc =256;
279 dlen_pc =len_pc;
280 dlen_ppsc=len_ppsc;
281 dlen_ppc =len_ppc;
282 dlen_psc =len_psc;
283 #else
284 if (len_pc <=111) {tot_pc =128; dlen_pc =128;}else{tot_pc =256; dlen_pc =256; }
285 if (len_ppsc<=111) {tot_ppsc=128; dlen_ppsc=128;}else{tot_ppsc=256; dlen_ppsc=256; }
286 if (len_ppc <=111) {tot_ppc =128; dlen_ppc =128;}else{tot_ppc =256; dlen_ppc =256; }
287 if (len_psc <=111) {tot_psc =128; dlen_psc =128;}else{tot_psc =256; dlen_psc =256; }
288 #endif
289 off_pc = len_pc - BINARY_SIZE;
290 off_pspc = len_ppsc - BINARY_SIZE;
291 off_ppc = len_ppc - BINARY_SIZE;
292 off_psc = len_psc - BINARY_SIZE;
293
294 // Adjust cp for idx;
295 #ifdef SIMD_COEF_64
296 next_cp = cp + (2*128*BLKS);
297 #endif
298
299 // pstr->buf[0] is a cp (First of this type)
300 pstr->bufs[idx][0] = pstr->cptr[idx][41] = cp;
301 // For fist element only, we DO copy in the c value.
302 memcpy(cp, crypt_out[index], BINARY_SIZE); cp += BINARY_SIZE;
303 memcpy(cp, p_bytes, plen); cp += plen;
304 if (!idx) pstr->datlen[0] = dlen_pc;
305 memcpy(cp, padding, tot_pc-2-len_pc); cp += (tot_pc-len_pc);
306 pstr->bufs[idx][0][tot_pc-2] = (len_pc<<3)>>8;
307 pstr->bufs[idx][0][tot_pc-1] = (len_pc<<3)&0xFF;
308
309 #ifdef SIMD_COEF_64
310 cp = next_cp;
311 next_cp = cp + (2*128*BLKS);
312 #endif
313
314 // pstr->buf[1] is a pspc (First of this type)
315 pstr->bufs[idx][1] = cp;
316 pstr->cptr[idx][0] = cp + off_pspc;
317 memcpy(cp, p_bytes, plen); cp += plen;
318 memcpy(cp, s_bytes, cur_salt->len); cp += cur_salt->len;
319 memcpy(cp, p_bytes, plen); cp += (plen+BINARY_SIZE);
320 if (!idx) pstr->datlen[1] = dlen_ppsc;
321 memcpy(cp, padding, tot_ppsc-2-len_ppsc); cp += (tot_ppsc-len_ppsc);
322 pstr->bufs[idx][1][tot_ppsc-2] = (len_ppsc<<3)>>8;
323 pstr->bufs[idx][1][tot_ppsc-1] = (len_ppsc<<3)&0xFF;
324
325 #ifdef SIMD_COEF_64
326 cp = next_cp;
327 next_cp = cp + (2*128*BLKS);
328 #endif
329
330 // pstr->buf[2] is a cspp (First of this type)
331 pstr->bufs[idx][2] = pstr->cptr[idx][1] = cp;
332 cp += BINARY_SIZE;
333 memcpy(cp, s_bytes, cur_salt->len); cp += cur_salt->len;
334 memcpy(cp, p_bytes, plen); cp += plen;
335 memcpy(cp, p_bytes, plen); cp += plen;
336 if (!idx) pstr->datlen[2] = dlen_ppsc;
337 memcpy(cp, padding, tot_ppsc-2-len_ppsc); cp += (tot_ppsc-len_ppsc);
338 pstr->bufs[idx][2][tot_ppsc-2] = (len_ppsc<<3)>>8;
339 pstr->bufs[idx][2][tot_ppsc-1] = (len_ppsc<<3)&0xFF;
340
341 #ifdef SIMD_COEF_64
342 cp = next_cp;
343 next_cp = cp + (2*128*BLKS);
344 #endif
345
346 // pstr->buf[3] is a ppc (First of this type)
347 pstr->bufs[idx][3] = cp;
348 pstr->cptr[idx][2] = cp + off_ppc;
349 memcpy(cp, p_bytes, plen); cp += plen;
350 memcpy(cp, p_bytes, plen); cp +=(plen+BINARY_SIZE);
351 if (!idx) pstr->datlen[3] = dlen_ppc;
352 memcpy(cp, padding, tot_ppc-2-len_ppc); cp += (tot_ppc-len_ppc);
353 pstr->bufs[idx][3][tot_ppc-2] = (len_ppc<<3)>>8;
354 pstr->bufs[idx][3][tot_ppc-1] = (len_ppc<<3)&0xFF;
355
356 #ifdef SIMD_COEF_64
357 cp = next_cp;
358 next_cp = cp + (2*128*BLKS);
359 #endif
360
361 // pstr->buf[4] is a cspp (from 2)
362 pstr->bufs[idx][4] = pstr->cptr[idx][3] = pstr->bufs[idx][2];
363 if (!idx) pstr->datlen[4] = dlen_ppsc;
364
365 // pstr->buf[5] is a pspc (from [1])
366 pstr->bufs[idx][5] = pstr->bufs[idx][1]; pstr->cptr[idx][4] = pstr->cptr[idx][0];
367 if (!idx) pstr->datlen[5] = dlen_ppsc;
368
369 // pstr->buf[6] is a cpp (First of this type)
370 pstr->bufs[idx][6] = pstr->cptr[idx][5] = cp;
371 cp += BINARY_SIZE;
372 memcpy(cp, p_bytes, plen); cp += plen;
373 memcpy(cp, p_bytes, plen); cp += plen;
374 if (!idx) pstr->datlen[6] = dlen_ppc;
375 memcpy(cp, padding, tot_ppc-2-len_ppc); cp += (tot_ppc-len_ppc);
376 pstr->bufs[idx][6][tot_ppc-2] = (len_ppc<<3)>>8;
377 pstr->bufs[idx][6][tot_ppc-1] = (len_ppc<<3)&0xFF;
378
379 #ifdef SIMD_COEF_64
380 cp = next_cp;
381 next_cp = cp + (2*128*BLKS);
382 #endif
383
384 // pstr->buf[07] psc (First of this type)
385 pstr->bufs[idx][7] = cp;
386 pstr->cptr[idx][6] = cp + off_psc;
387 memcpy(cp, p_bytes, plen); cp += plen;
388 memcpy(cp, s_bytes, cur_salt->len); cp += (cur_salt->len+BINARY_SIZE);
389 if (!idx) pstr->datlen[7] = dlen_psc;
390 memcpy(cp, padding, tot_psc-2-len_psc); cp += (tot_psc-len_psc);
391 pstr->bufs[idx][7][tot_psc-2] = (len_psc<<3)>>8;
392 pstr->bufs[idx][7][tot_psc-1] = (len_psc<<3)&0xFF;
393
394 #ifdef SIMD_COEF_64
395 cp = next_cp;
396 next_cp = cp + (2*128*BLKS);
397 #endif
398
399 // pstr->buf[08] cspp (from 2)
400 pstr->bufs[idx][8] = pstr->cptr[idx][7] = pstr->bufs[idx][2];
401 if (!idx) pstr->datlen[8] = dlen_ppsc;
402
403 // pstr->buf[09] ppc (from 3)
404 pstr->bufs[idx][9] = pstr->bufs[idx][3]; pstr->cptr[idx][8] = pstr->cptr[idx][2];
405 if (!idx) pstr->datlen[9] = dlen_ppc;
406
407 // pstr->buf[10] cspp (from 2)
408 pstr->bufs[idx][10] = pstr->cptr[idx][9] = pstr->bufs[idx][2];
409 if (!idx) pstr->datlen[10] = dlen_ppsc;
410
411 // pstr->buf[11] pspc (from 1)
412 pstr->bufs[idx][11] = pstr->bufs[idx][1]; pstr->cptr[idx][10] = pstr->cptr[idx][0];
413 if (!idx) pstr->datlen[11] = dlen_ppsc;
414
415 // pstr->buf[12] cpp (from 6)
416 pstr->bufs[idx][12] = pstr->cptr[idx][11] = pstr->bufs[idx][6];
417 if (!idx) pstr->datlen[12] = dlen_ppc;
418
419 // pstr->buf[13] pspc (from 1)
420 pstr->bufs[idx][13] = pstr->bufs[idx][1]; pstr->cptr[idx][12] = pstr->cptr[idx][0];
421 if (!idx) pstr->datlen[13] = dlen_ppsc;
422
423 // pstr->buf[14] csp (First of this type)
424 pstr->bufs[idx][14] = pstr->cptr[idx][13] = cp;
425 cp += BINARY_SIZE;
426 memcpy(cp, s_bytes, cur_salt->len); cp += cur_salt->len;
427 memcpy(cp, p_bytes, plen); cp += plen;
428 if (!idx) pstr->datlen[14] = dlen_psc;
429 memcpy(cp, padding, tot_psc-2-len_psc); cp += (tot_psc-len_psc);
430 pstr->bufs[idx][14][tot_psc-2] = (len_psc<<3)>>8;
431 pstr->bufs[idx][14][tot_psc-1] = (len_psc<<3)&0xFF;
432
433 #ifdef SIMD_COEF_64
434 cp = next_cp;
435 next_cp = cp + (2*128*BLKS);
436 #endif
437
438 // pstr->buf[15] ppc (from 3)
439 pstr->bufs[idx][15] = pstr->bufs[idx][3]; pstr->cptr[idx][14] = pstr->cptr[idx][2];
440 if (!idx) pstr->datlen[15] = dlen_ppc;
441
442 // pstr->buf[16] cspp (from 2)
443 pstr->bufs[idx][16] = pstr->cptr[idx][15] = pstr->bufs[idx][2];
444 if (!idx) pstr->datlen[16] = dlen_ppsc;
445
446 // pstr->buf[17] pspc (from 1)
447 pstr->bufs[idx][17] = pstr->bufs[idx][1]; pstr->cptr[idx][16] = pstr->cptr[idx][0];
448 if (!idx) pstr->datlen[17] = dlen_ppsc;
449
450 // pstr->buf[18] cpp (from 6)
451 pstr->bufs[idx][18] = pstr->cptr[idx][17] = pstr->bufs[idx][6];
452 if (!idx) pstr->datlen[18] = dlen_ppc;
453
454 // pstr->buf[19] pspc (from 1)
455 pstr->bufs[idx][19] = pstr->bufs[idx][1]; pstr->cptr[idx][18] = pstr->cptr[idx][0];
456 if (!idx) pstr->datlen[19] = dlen_ppsc;
457
458 // pstr->buf[20] cspp (from 2)
459 pstr->bufs[idx][20] = pstr->cptr[idx][19] = pstr->bufs[idx][2];
460 if (!idx) pstr->datlen[20] = dlen_ppsc;
461
462 // pstr->buf[21] pc (First of this type)
463 pstr->bufs[idx][21] = cp;
464 pstr->cptr[idx][20] = cp + off_pc;
465 memcpy(cp, p_bytes, plen); cp += (plen+BINARY_SIZE);
466 if (!idx) pstr->datlen[21] = dlen_pc;
467 memcpy(cp, padding, tot_psc-2-len_pc);
468 pstr->bufs[idx][21][tot_pc-2] = (len_pc<<3)>>8;
469 pstr->bufs[idx][21][tot_pc-1] = (len_pc<<3)&0xFF;
470
471 #ifdef SIMD_COEF_64
472 cp = next_cp;
473 next_cp = cp + (2*128*BLKS);
474 #endif
475
476 // pstr->buf[22] cspp (from 2)
477 pstr->bufs[idx][22] = pstr->cptr[idx][21] = pstr->bufs[idx][2];
478 if (!idx) pstr->datlen[22] = dlen_ppsc;
479
480 // pstr->buf[23] pspc (from 1)
481 pstr->bufs[idx][23] = pstr->bufs[idx][1]; pstr->cptr[idx][22] = pstr->cptr[idx][0];
482 if (!idx) pstr->datlen[23] = dlen_ppsc;
483
484 // pstr->buf[24] cpp (from 6)
485 pstr->bufs[idx][24] = pstr->cptr[idx][23] = pstr->bufs[idx][6];
486 if (!idx) pstr->datlen[24] = dlen_ppc;
487
488 // pstr->buf[25] pspc (from 1)
489 pstr->bufs[idx][25] = pstr->bufs[idx][1]; pstr->cptr[idx][24] = pstr->cptr[idx][0];
490 if (!idx) pstr->datlen[25] = dlen_ppsc;
491
492 // pstr->buf[26] cspp (from 2)
493 pstr->bufs[idx][26] = pstr->cptr[idx][25] = pstr->bufs[idx][2];
494 if (!idx) pstr->datlen[26] = dlen_ppsc;
495
496 // pstr->buf[27] ppc (from 3)
497 pstr->bufs[idx][27] = pstr->bufs[idx][3]; pstr->cptr[idx][26] = pstr->cptr[idx][2];
498 if (!idx) pstr->datlen[27] = dlen_ppc;
499
500 // pstr->buf[28] csp (from 14)
501 pstr->bufs[idx][28] = pstr->cptr[idx][27] = pstr->bufs[idx][14];
502 if (!idx) pstr->datlen[28] = dlen_psc;
503
504 // pstr->buf[29] pspc (from 1)
505 pstr->bufs[idx][29] = pstr->bufs[idx][1]; pstr->cptr[idx][28] = pstr->cptr[idx][0];
506 if (!idx) pstr->datlen[29] = dlen_ppsc;
507
508 // pstr->buf[30] cpp (from 6)
509 pstr->bufs[idx][30] = pstr->cptr[idx][29] = pstr->bufs[idx][6];
510 if (!idx) pstr->datlen[30] = dlen_ppc;
511
512 // pstr->buf[31] pspc (from 1)
513 pstr->bufs[idx][31] = pstr->bufs[idx][1]; pstr->cptr[idx][30] = pstr->cptr[idx][0];
514 if (!idx) pstr->datlen[31] = dlen_ppsc;
515
516 // pstr->buf[32] cspp (from 2)
517 pstr->bufs[idx][32] = pstr->cptr[idx][31] = pstr->bufs[idx][2];
518 if (!idx) pstr->datlen[32] = dlen_ppsc;
519
520 // pstr->buf[33] ppc (from 3)
521 pstr->bufs[idx][33] = pstr->bufs[idx][3]; pstr->cptr[idx][32] = pstr->cptr[idx][2];
522 if (!idx) pstr->datlen[33] = dlen_ppc;
523
524 // pstr->buf[34] cspp (from 2)
525 pstr->bufs[idx][34] = pstr->cptr[idx][33] = pstr->bufs[idx][2];
526 if (!idx) pstr->datlen[34] = dlen_ppsc;
527
528 // pstr->buf[35] psc (from 7)
529 pstr->bufs[idx][35] = pstr->bufs[idx][7]; pstr->cptr[idx][34] = pstr->cptr[idx][6];
530 if (!idx) pstr->datlen[35] = dlen_psc;
531
532 // pstr->buf[36] cpp (from 6)
533 pstr->bufs[idx][36] = pstr->cptr[idx][35] = pstr->bufs[idx][6];
534 if (!idx) pstr->datlen[36] = dlen_ppc;
535
536 // pstr->buf[37] pspc (from 1)
537 pstr->bufs[idx][37] = pstr->bufs[idx][1]; pstr->cptr[idx][36] = pstr->cptr[idx][0];
538 if (!idx) pstr->datlen[37] = dlen_ppsc;
539
540 // pstr->buf[38] cspp (from 2)
541 pstr->bufs[idx][38] = pstr->cptr[idx][37] = pstr->bufs[idx][2];
542 if (!idx) pstr->datlen[38] = dlen_ppsc;
543
544 // pstr->buf[39] ppc (from 3)
545 pstr->bufs[idx][39] = pstr->bufs[idx][3]; pstr->cptr[idx][38] = pstr->cptr[idx][2];
546 if (!idx) pstr->datlen[39] = dlen_ppc;
547
548 // pstr->buf[40] cspp (from 2)
549 pstr->bufs[idx][40] = pstr->cptr[idx][39] = pstr->bufs[idx][2];
550 if (!idx) pstr->datlen[40] = dlen_ppsc;
551
552 // pstr->buf[41] pspc (from 1)
553 pstr->bufs[idx][41] = pstr->bufs[idx][1]; pstr->cptr[idx][40] = pstr->cptr[idx][0];
554 if (!idx) pstr->datlen[41] = dlen_ppsc;
555 }
556
crypt_all(int * pcount,struct db_salt * salt)557 static int crypt_all(int *pcount, struct db_salt *salt)
558 {
559 const int count = *pcount;
560 int index = 0;
561 int *MixOrder, tot_todo;
562
563 #ifdef SIMD_COEF_64
564 // group based upon size splits.
565 MixOrder = mem_calloc((count+6*MIN_KEYS_PER_CRYPT), sizeof(int));
566 {
567 static const int lens[17][6] = {
568 {0,24,48,88,89,90}, // 0 byte salt
569 {0,24,48,88,89,90}, // 1 byte salt
570 {0,23,24,46,48,87}, // 2 byte salt
571 {0,23,24,45,48,87}, // 3 byte salt
572 {0,22,24,44,48,86}, // 4 byte salt
573 {0,22,24,43,48,86}, // 5 byte salt
574 {0,21,24,42,48,85}, // 6 byte salt
575 {0,21,24,41,48,85}, // 7 byte salt
576 {0,20,24,40,48,84}, // 8 byte salt
577 {0,20,24,39,48,84}, // 9 byte salt
578 {0,19,24,38,48,83}, // 10 byte salt
579 {0,19,24,37,48,83}, // 11 byte salt
580 {0,18,24,36,48,82}, // 12 byte salt
581 {0,18,24,35,48,82}, // 13 byte salt
582 {0,17,24,34,48,81}, // 14 byte salt
583 {0,17,24,33,48,81}, // 15 byte salt
584 {0,16,24,32,48,80} };
585 int j;
586 tot_todo = 0;
587 saved_len[count] = 0; // point all 'tail' MMX buffer elements to this location.
588 for (j = 0; j < 5; ++j) {
589 for (index = 0; index < count; ++index) {
590 if (saved_len[index] >= lens[cur_salt->len][j] && saved_len[index] < lens[cur_salt->len][j+1])
591 MixOrder[tot_todo++] = index;
592 }
593 while (tot_todo % MIN_KEYS_PER_CRYPT)
594 MixOrder[tot_todo++] = count;
595 }
596 }
597 #else
598 // no need to mix. just run them one after the next, in any order.
599 MixOrder = mem_calloc(count, sizeof(int));
600 for (index = 0; index < count; ++index)
601 MixOrder[index] = index;
602 tot_todo = count;
603 #endif
604
605 #ifdef _OPENMP
606 #pragma omp parallel for
607 #endif
608 for (index = 0; index < tot_todo; index += MIN_KEYS_PER_CRYPT)
609 {
610 // portably align temp_result char * pointer machine word size.
611 union xx {
612 unsigned char c[BINARY_SIZE];
613 ARCH_WORD a[BINARY_SIZE/sizeof(ARCH_WORD)];
614 } u;
615 unsigned char *temp_result = u.c;
616 SHA512_CTX ctx;
617 SHA512_CTX alt_ctx;
618 size_t cnt;
619 int idx;
620 char *cp;
621 char p_bytes[PLAINTEXT_LENGTH+1];
622 char s_bytes[PLAINTEXT_LENGTH+1];
623 char tmp_cls[sizeof(cryptloopstruct)+MEM_ALIGN_SIMD];
624 cryptloopstruct *crypt_struct;
625 #ifdef SIMD_COEF_64
626 char tmp_sse_out[8*MIN_KEYS_PER_CRYPT*8+MEM_ALIGN_SIMD];
627 uint64_t *sse_out;
628 sse_out = (uint64_t *)mem_align(tmp_sse_out, MEM_ALIGN_SIMD);
629 #endif
630 crypt_struct = (cryptloopstruct *)mem_align(tmp_cls,MEM_ALIGN_SIMD);
631
632 for (idx = 0; idx < MIN_KEYS_PER_CRYPT; ++idx)
633 {
634 /* Prepare for the real work. */
635 SHA512_Init(&ctx);
636
637 /* Add the key string. */
638 SHA512_Update(&ctx, (unsigned char*)saved_key[MixOrder[index+idx]], saved_len[MixOrder[index+idx]]);
639
640 /* The last part is the salt string. This must be at most 16
641 characters and it ends at the first `$' character (for
642 compatibility with existing implementations). */
643 SHA512_Update(&ctx, cur_salt->salt, cur_salt->len);
644
645 /* Compute alternate SHA512 sum with input KEY, SALT, and KEY. The
646 final result will be added to the first context. */
647 SHA512_Init(&alt_ctx);
648
649 /* Add key. */
650 SHA512_Update(&alt_ctx, (unsigned char*)saved_key[MixOrder[index+idx]], saved_len[MixOrder[index+idx]]);
651
652 /* Add salt. */
653 SHA512_Update(&alt_ctx, cur_salt->salt, cur_salt->len);
654
655 /* Add key again. */
656 SHA512_Update(&alt_ctx, (unsigned char*)saved_key[MixOrder[index+idx]], saved_len[MixOrder[index+idx]]);
657
658 /* Now get result of this (64 bytes) and add it to the other
659 context. */
660 SHA512_Final((unsigned char*)crypt_out[MixOrder[index+idx]], &alt_ctx);
661
662 /* Add for any character in the key one byte of the alternate sum. */
663 for (cnt = saved_len[MixOrder[index+idx]]; cnt > BINARY_SIZE; cnt -= BINARY_SIZE)
664 SHA512_Update(&ctx, (unsigned char*)crypt_out[MixOrder[index+idx]], BINARY_SIZE);
665 SHA512_Update(&ctx, (unsigned char*)crypt_out[MixOrder[index+idx]], cnt);
666
667 /* Take the binary representation of the length of the key and for every
668 1 add the alternate sum, for every 0 the key. */
669 for (cnt = saved_len[MixOrder[index+idx]]; cnt > 0; cnt >>= 1)
670 if ((cnt & 1) != 0)
671 SHA512_Update(&ctx, (unsigned char*)crypt_out[MixOrder[index+idx]], BINARY_SIZE);
672 else
673 SHA512_Update(&ctx, (unsigned char*)saved_key[MixOrder[index+idx]], saved_len[MixOrder[index+idx]]);
674
675 /* Create intermediate result. */
676 SHA512_Final((unsigned char*)crypt_out[MixOrder[index+idx]], &ctx);
677
678 /* Start computation of P byte sequence. */
679 SHA512_Init(&alt_ctx);
680
681 /* For every character in the password add the entire password. */
682 for (cnt = 0; cnt < saved_len[MixOrder[index+idx]]; ++cnt)
683 SHA512_Update(&alt_ctx, (unsigned char*)saved_key[MixOrder[index+idx]], saved_len[MixOrder[index+idx]]);
684
685 /* Finish the digest. */
686 SHA512_Final(temp_result, &alt_ctx);
687
688 /* Create byte sequence P. */
689 cp = p_bytes;
690 for (cnt = saved_len[MixOrder[index+idx]]; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE)
691 cp = (char *) memcpy (cp, temp_result, BINARY_SIZE) + BINARY_SIZE;
692 memcpy (cp, temp_result, cnt);
693
694 /* Start computation of S byte sequence. */
695 SHA512_Init(&alt_ctx);
696
697 /* repeat the following 16+A[0] times, where A[0] represents the
698 first byte in digest A interpreted as an 8-bit unsigned value */
699 for (cnt = 0; cnt < 16 + ((unsigned char*)crypt_out[MixOrder[index+idx]])[0]; ++cnt)
700 SHA512_Update(&alt_ctx, cur_salt->salt, cur_salt->len);
701
702 /* Finish the digest. */
703 SHA512_Final(temp_result, &alt_ctx);
704
705 /* Create byte sequence S. */
706 cp = s_bytes;
707 for (cnt = cur_salt->len; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE)
708 cp = (char *) memcpy (cp, temp_result, BINARY_SIZE) + BINARY_SIZE;
709 memcpy (cp, temp_result, cnt);
710
711 /* Repeatedly run the collected hash value through SHA512 to
712 burn CPU cycles. */
713 LoadCryptStruct(crypt_struct, MixOrder[index+idx], idx, p_bytes, s_bytes);
714 }
715
716 idx = 0;
717 #ifdef SIMD_COEF_64
718 for (cnt = 1; ; ++cnt) {
719 if (crypt_struct->datlen[idx]==256) {
720 unsigned char *cp = crypt_struct->bufs[0][idx];
721 SIMDSHA512body(cp, sse_out, NULL, SSEi_FLAT_IN|SSEi_2BUF_INPUT_FIRST_BLK);
722 SIMDSHA512body(&cp[128], sse_out, sse_out, SSEi_FLAT_IN|SSEi_2BUF_INPUT_FIRST_BLK|SSEi_RELOAD);
723 } else {
724 unsigned char *cp = crypt_struct->bufs[0][idx];
725 SIMDSHA512body(cp, sse_out, NULL, SSEi_FLAT_IN|SSEi_2BUF_INPUT_FIRST_BLK);
726 }
727 if (cnt == cur_salt->rounds)
728 break;
729 {
730 int j, k;
731 for (k = 0; k < MIN_KEYS_PER_CRYPT; ++k) {
732 uint64_t *o = (uint64_t *)crypt_struct->cptr[k][idx];
733 #if !ARCH_ALLOWS_UNALIGNED
734 if (!is_aligned(o, 8)) {
735 unsigned char *cp = (unsigned char*)o;
736 for (j = 0; j < 64; ++j)
737 *cp++ = ((unsigned char*)sse_out)[GETPOS(j, k)];
738 } else
739 #endif
740 for (j = 0; j < 8; ++j)
741 #if ARCH_LITTLE_ENDIAN==1
742 *o++ = JOHNSWAP64(sse_out[j*SIMD_COEF_64+(k&(SIMD_COEF_64-1))+k/SIMD_COEF_64*8*SIMD_COEF_64]);
743 #else
744 *o++ = sse_out[j*SIMD_COEF_64+(k&(SIMD_COEF_64-1))+k/SIMD_COEF_64*8*SIMD_COEF_64];
745 #endif
746 }
747 }
748 if (++idx == 42)
749 idx = 0;
750 }
751 {
752 int j, k;
753 for (k = 0; k < MIN_KEYS_PER_CRYPT; ++k) {
754 uint64_t *o = (uint64_t *)crypt_out[MixOrder[index+k]];
755 for (j = 0; j < 8; ++j)
756 #if ARCH_LITTLE_ENDIAN==1
757 *o++ = JOHNSWAP64(sse_out[j*SIMD_COEF_64+(k&(SIMD_COEF_64-1))+k/SIMD_COEF_64*8*SIMD_COEF_64]);
758 #else
759 *o++ = sse_out[j*SIMD_COEF_64+(k&(SIMD_COEF_64-1))+k/SIMD_COEF_64*8*SIMD_COEF_64];
760 #endif
761 }
762 }
763 #else
764 SHA512_Init(&ctx);
765 for (cnt = 1; ; ++cnt) {
766 // calling with 128 byte, or 256 byte always, will force the update to properly crypt the data.
767 // NOTE the data is fully formed. It ends in a 0x80, is padded with nulls, AND has bit appended.
768 SHA512_Update(&ctx, crypt_struct->bufs[0][idx], crypt_struct->datlen[idx]);
769 if (cnt == cur_salt->rounds)
770 break;
771 #ifdef JTR_INC_COMMON_CRYPTO_SHA2
772 SHA512_Final(crypt_struct->cptr[0][idx], &ctx);
773 #else // !defined JTR_INC_COMMON_CRYPTO_SHA2, so it is oSSL, or generic
774 #if ARCH_LITTLE_ENDIAN
775 {
776 int j;
777 uint64_t *o = (uint64_t *)crypt_struct->cptr[0][idx];
778 for (j = 0; j < 8; ++j)
779 *o++ = JOHNSWAP64(ctx.h[j]);
780 }
781 #else
782 memcpy(crypt_struct->cptr[0][idx], ctx.h, BINARY_SIZE);
783 #endif
784 #endif
785 if (++idx == 42)
786 idx = 0;
787
788 #ifdef JTR_INC_COMMON_CRYPTO_SHA2
789 SHA512_Init(&ctx);
790 #else
791 // this memcpy is 'good enough', used instead of SHA512_Init()
792 memcpy(ctx.h, ctx_init, sizeof(ctx_init));
793 #endif
794 }
795 #ifdef JTR_INC_COMMON_CRYPTO_SHA2
796 SHA512_Final((unsigned char*)crypt_out[MixOrder[index]], &ctx);
797 #else
798 #if ARCH_LITTLE_ENDIAN
799 {
800 int j;
801 uint64_t *o = (uint64_t *)crypt_out[MixOrder[index]];
802 for (j = 0; j < 8; ++j)
803 *o++ = JOHNSWAP64(ctx.h[j]);
804 }
805 #else
806 memcpy(crypt_out[MixOrder[index]], ctx.h, BINARY_SIZE);
807 #endif
808 #endif
809
810 #endif
811 }
812 MEM_FREE(MixOrder);
813 return count;
814 }
815
set_salt(void * salt)816 static void set_salt(void *salt)
817 {
818 cur_salt = salt;
819 }
820
get_salt(char * ciphertext)821 static void *get_salt(char *ciphertext)
822 {
823 static struct saltstruct out;
824 int len;
825
826 memset(&out, 0, sizeof(out));
827 out.rounds = ROUNDS_DEFAULT;
828 ciphertext += FORMAT_TAG_LEN;
829 if (!strncmp(ciphertext, ROUNDS_PREFIX,
830 sizeof(ROUNDS_PREFIX) - 1)) {
831 const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1;
832 char *endp;
833 unsigned long int srounds = strtoul(num, &endp, 10);
834 if (*endp == '$')
835 {
836 ciphertext = endp + 1;
837 srounds = srounds < ROUNDS_MIN ?
838 ROUNDS_MIN : srounds;
839 out.rounds = srounds > ROUNDS_MAX ?
840 ROUNDS_MAX : srounds;
841 }
842 }
843
844 for (len = 0; ciphertext[len] != '$'; len++);
845
846 if (len > SALT_LENGTH)
847 len = SALT_LENGTH;
848
849 memcpy(out.salt, ciphertext, len);
850 out.len = len;
851 return &out;
852 }
853
cmp_all(void * binary,int count)854 static int cmp_all(void *binary, int count)
855 {
856 int index;
857
858 for (index = 0; index < count; index++)
859 if (!memcmp(binary, crypt_out[index], ARCH_SIZE))
860 return 1;
861 return 0;
862 }
863
cmp_one(void * binary,int index)864 static int cmp_one(void *binary, int index)
865 {
866 return !memcmp(binary, crypt_out[index], BINARY_SIZE);
867 }
868
cmp_exact(char * source,int index)869 static int cmp_exact(char *source, int index)
870 {
871 return 1;
872 }
873
sha512crypt_iterations(void * salt)874 static unsigned int sha512crypt_iterations(void *salt)
875 {
876 struct saltstruct *sha512crypt_salt;
877
878 sha512crypt_salt = salt;
879 return (unsigned int)sha512crypt_salt->rounds;
880 }
881
882 // Public domain hash function by DJ Bernstein
883 // We are hashing the entire struct
salt_hash(void * salt)884 static int salt_hash(void *salt)
885 {
886 unsigned char *s = salt;
887 unsigned int hash = 5381;
888 unsigned int i;
889
890 for (i = 0; i < SALT_SIZE; i++)
891 hash = ((hash << 5) + hash) ^ s[i];
892
893 return hash & (SALT_HASH_SIZE - 1);
894 }
895
896 struct fmt_main fmt_cryptsha512 = {
897 {
898 FORMAT_LABEL,
899 FORMAT_NAME,
900 "SHA512 " ALGORITHM_NAME,
901 BENCHMARK_COMMENT,
902 BENCHMARK_LENGTH,
903 0,
904 PLAINTEXT_LENGTH,
905 BINARY_SIZE,
906 BINARY_ALIGN,
907 SALT_SIZE,
908 SALT_ALIGN,
909 MIN_KEYS_PER_CRYPT,
910 MAX_KEYS_PER_CRYPT,
911 FMT_CASE | FMT_8_BIT | FMT_OMP,
912 {
913 "iteration count",
914 },
915 { FORMAT_TAG },
916 tests
917 }, {
918 init,
919 done,
920 fmt_default_reset,
921 fmt_default_prepare,
922 valid,
923 fmt_default_split,
924 get_binary,
925 get_salt,
926 {
927 sha512crypt_iterations,
928 },
929 fmt_default_source,
930 {
931 fmt_default_binary_hash_0,
932 fmt_default_binary_hash_1,
933 fmt_default_binary_hash_2,
934 fmt_default_binary_hash_3,
935 fmt_default_binary_hash_4,
936 fmt_default_binary_hash_5,
937 fmt_default_binary_hash_6
938 },
939 salt_hash,
940 NULL,
941 set_salt,
942 set_key,
943 get_key,
944 fmt_default_clear_keys,
945 crypt_all,
946 {
947 #define COMMON_GET_HASH_LINK
948 #include "common-get-hash.h"
949 },
950 cmp_all,
951 cmp_one,
952 cmp_exact
953 }
954 };
955
956 #endif /* plugin stanza */
957