1/*
2 * Copyright (c) 2012-2016, magnum
3 * and Copyright (c) 2015, Sayantan Datta <sdatta@openwall.com>
4 * This software is hereby released to the general public under
5 * the following terms: Redistribution and use in source and binary
6 * forms, with or without modification, are permitted.
7 */
8
9#include "opencl_device_info.h"
10#define AMD_PUTCHAR_NOCAST
11#include "opencl_misc.h"
12#include "opencl_sha1.h"
13#include "opencl_mask.h"
14
15#if BITMAP_SIZE_BITS_LESS_ONE < 0xffffffff
16#define BITMAP_SIZE_BITS (BITMAP_SIZE_BITS_LESS_ONE + 1)
17#else
18/*undefined, cause error.*/
19#endif
20
21inline void cmp_final(uint gid,
22		uint iter,
23		__private uint *hash,
24		__global uint *offset_table,
25		__global uint *hash_table,
26		__global uint *return_hashes,
27		volatile __global uint *output,
28		volatile __global uint *bitmap_dupe) {
29
30	uint t, offset_table_index, hash_table_index;
31	unsigned long LO, MI, HI;
32	unsigned long p;
33
34	HI = (unsigned long)hash[4];
35	MI = ((unsigned long)hash[3] << 32) | (unsigned long)hash[2];
36	LO = ((unsigned long)hash[1] << 32) | (unsigned long)hash[0];
37
38	p = (HI % OFFSET_TABLE_SIZE) * SHIFT128_OT_SZ;
39	p += (MI % OFFSET_TABLE_SIZE) * SHIFT64_OT_SZ;
40	p += LO % OFFSET_TABLE_SIZE;
41	p %= OFFSET_TABLE_SIZE;
42	offset_table_index = (unsigned int)p;
43
44	//error: chances of overflow is extremely low.
45	LO += (unsigned long)offset_table[offset_table_index];
46
47	p = (HI % HASH_TABLE_SIZE) * SHIFT128_HT_SZ;
48	p += (MI % HASH_TABLE_SIZE) * SHIFT64_HT_SZ;
49	p += LO % HASH_TABLE_SIZE;
50	p %= HASH_TABLE_SIZE;
51	hash_table_index = (unsigned int)p;
52
53	if (hash_table[hash_table_index] == hash[0])
54	if (hash_table[HASH_TABLE_SIZE + hash_table_index] == hash[1])
55	{
56/*
57 * Prevent duplicate keys from cracking same hash
58 */
59		if (!(atomic_or(&bitmap_dupe[hash_table_index/32], (1U << (hash_table_index % 32))) & (1U << (hash_table_index % 32)))) {
60			t = atomic_inc(&output[0]);
61			output[1 + 3 * t] = gid;
62			output[2 + 3 * t] = iter;
63			output[3 + 3 * t] = hash_table_index;
64			return_hashes[2 * t] = hash[2];
65			return_hashes[2 * t + 1] = hash[3];
66		}
67	}
68}
69
70inline void cmp(uint gid,
71		uint iter,
72		__private uint *hash,
73#if USE_LOCAL_BITMAPS
74		__local
75#else
76		__global
77#endif
78		uint *bitmaps,
79		__global uint *offset_table,
80		__global uint *hash_table,
81		__global uint *return_hashes,
82		volatile __global uint *output,
83		volatile __global uint *bitmap_dupe) {
84	uint bitmap_index, tmp = 1;
85
86#if SELECT_CMP_STEPS > 4
87	bitmap_index = hash[0] & (BITMAP_SIZE_BITS - 1);
88	tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U;
89	bitmap_index = (hash[0] >> 16) & (BITMAP_SIZE_BITS - 1);
90	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
91	bitmap_index = hash[1] & (BITMAP_SIZE_BITS - 1);
92	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 4) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
93	bitmap_index = (hash[1] >> 16) & (BITMAP_SIZE_BITS - 1);
94	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 3 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
95	bitmap_index = hash[2] & (BITMAP_SIZE_BITS - 1);
96	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 3) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
97	bitmap_index = (hash[2] >> 16) & (BITMAP_SIZE_BITS - 1);
98	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 5 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
99	bitmap_index = hash[3] & (BITMAP_SIZE_BITS - 1);
100	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 6 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
101	bitmap_index = (hash[3] >> 16) & (BITMAP_SIZE_BITS - 1);
102	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 7 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
103#elif SELECT_CMP_STEPS > 2
104	bitmap_index = hash[3] & (BITMAP_SIZE_BITS - 1);
105	tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U;
106	bitmap_index = hash[2] & (BITMAP_SIZE_BITS - 1);
107	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
108	bitmap_index = hash[1] & (BITMAP_SIZE_BITS - 1);
109	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 4) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
110	bitmap_index = hash[0] & (BITMAP_SIZE_BITS - 1);
111	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 3 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
112#elif SELECT_CMP_STEPS > 1
113	bitmap_index = hash[3] & (BITMAP_SIZE_BITS - 1);
114	tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U;
115	bitmap_index = hash[2] & (BITMAP_SIZE_BITS - 1);
116	tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U;
117#else
118	bitmap_index = hash[3] & BITMAP_SIZE_BITS_LESS_ONE;
119	tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U;
120#endif
121
122	if (tmp)
123		cmp_final(gid, iter, hash, offset_table, hash_table, return_hashes, output, bitmap_dupe);
124}
125
126#define USE_CONST_CACHE \
127	(CONST_CACHE_SIZE >= (NUM_INT_KEYS * 4))
128
129__kernel void sha1(__global uint *keys,
130		  __global uint *index,
131		  __global uint *int_key_loc,
132#if USE_CONST_CACHE
133		  constant
134#else
135		  __global
136#endif
137		  uint *int_keys
138#if !defined(__OS_X__) && USE_CONST_CACHE && gpu_amd(DEVICE_INFO)
139		__attribute__((max_constant_size (NUM_INT_KEYS * 4)))
140#endif
141		 , __global uint *bitmaps,
142		  __global uint *offset_table,
143		  __global uint *hash_table,
144		  __global uint *return_hashes,
145		  volatile __global uint *out_hash_ids,
146		  volatile __global uint *bitmap_dupe)
147{
148	uint i;
149	uint gid = get_global_id(0);
150	uint base = index[gid];
151	uint W[16] = { 0 };
152	uint len = base & 63;
153	uint hash[5];
154#if NUM_INT_KEYS > 1 && !IS_STATIC_GPU_MASK
155	uint ikl = int_key_loc[gid];
156	uint loc0 = ikl & 0xff;
157#if MASK_FMT_INT_PLHDR > 1
158#if LOC_1 >= 0
159	uint loc1 = (ikl & 0xff00) >> 8;
160#endif
161#endif
162#if MASK_FMT_INT_PLHDR > 2
163#if LOC_2 >= 0
164	uint loc2 = (ikl & 0xff0000) >> 16;
165#endif
166#endif
167#if MASK_FMT_INT_PLHDR > 3
168#if LOC_3 >= 0
169	uint loc3 = (ikl & 0xff000000) >> 24;
170#endif
171#endif
172#endif
173
174#if !IS_STATIC_GPU_MASK
175#define GPU_LOC_0 loc0
176#define GPU_LOC_1 loc1
177#define GPU_LOC_2 loc2
178#define GPU_LOC_3 loc3
179#else
180#define GPU_LOC_0 LOC_0
181#define GPU_LOC_1 LOC_1
182#define GPU_LOC_2 LOC_2
183#define GPU_LOC_3 LOC_3
184#endif
185
186#if USE_LOCAL_BITMAPS
187	uint lid = get_local_id(0);
188	uint lws = get_local_size(0);
189	uint __local s_bitmaps[(BITMAP_SIZE_BITS >> 5) * SELECT_CMP_STEPS];
190
191	for (i = 0; i < (((BITMAP_SIZE_BITS >> 5) * SELECT_CMP_STEPS) / lws); i++)
192		s_bitmaps[i*lws + lid] = bitmaps[i*lws + lid];
193
194	barrier(CLK_LOCAL_MEM_FENCE);
195#endif
196
197	keys += base >> 6;
198
199#ifndef TWICE
200	for (i = 0; i < (len+3)/4; i++)
201		W[i] = SWAP32(keys[i]);
202
203	PUTCHAR_BE(W, len, 0x80);
204	W[15] = len << 3;
205#endif
206
207	for (i = 0; i < NUM_INT_KEYS; i++) {
208#ifdef TWICE
209		uint j;
210
211		for (j = 0; j < (len+3)/4; j++)
212			W[j] = SWAP32(keys[j]);
213		for (; j < 15; j++)
214			W[j] = 0;
215
216		PUTCHAR_BE(W, len, 0x80);
217		W[15] = len << 3;
218#endif
219#if NUM_INT_KEYS > 1
220		PUTCHAR_BE(W, GPU_LOC_0, (int_keys[i] & 0xff));
221
222#if MASK_FMT_INT_PLHDR > 1
223#if LOC_1 >= 0
224		PUTCHAR_BE(W, GPU_LOC_1, ((int_keys[i] & 0xff00) >> 8));
225#endif
226#endif
227#if MASK_FMT_INT_PLHDR > 2
228#if LOC_2 >= 0
229		PUTCHAR_BE(W, GPU_LOC_2, ((int_keys[i] & 0xff0000) >> 16));
230#endif
231#endif
232#if MASK_FMT_INT_PLHDR > 3
233#if LOC_3 >= 0
234		PUTCHAR_BE(W, GPU_LOC_3, ((int_keys[i] & 0xff000000) >> 24));
235#endif
236#endif
237#endif
238		sha1_single(uint, W, hash);
239#ifdef TWICE
240		W[0] = hash[0];
241		W[1] = hash[1];
242		W[2] = hash[2];
243		W[3] = hash[3];
244		W[4] = hash[4];
245		W[5] = 0x80000000;
246		W[15] = 20 << 3;
247		sha1_single_160Z(uint, W, hash);
248#endif
249
250		cmp(gid, i, hash,
251#if USE_LOCAL_BITMAPS
252		    s_bitmaps
253#else
254		    bitmaps
255#endif
256		    , offset_table, hash_table, return_hashes, out_hash_ids, bitmap_dupe);
257	}
258}
259