1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 //           Based on the specification and source code provided by
3 //           Korea Internet & Security Agency (KISA) website. Also
4 //           see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 //           and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "misc.h"
17 
18 #if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
19 
20 #if defined(CRYPTOPP_AVX2_AVAILABLE)
21 # include <emmintrin.h>
22 # include <immintrin.h>
23 #endif
24 
25 // GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
26 #if (CRYPTOPP_GCC_VERSION >= 40500)
27 # include <x86intrin.h>
28 #endif
29 
30 ANONYMOUS_NAMESPACE_BEGIN
31 
32 /* LSH Constants */
33 
34 const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
35 // const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
36 // const unsigned int LSH256_CV_BYTE_LEN = 64;
37 const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
38 
39 // const unsigned int MSG_BLK_WORD_LEN = 32;
40 const unsigned int CV_WORD_LEN = 16;
41 const unsigned int CONST_WORD_LEN = 8;
42 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
43 // const unsigned int WORD_BIT_LEN = 32;
44 const unsigned int NUM_STEPS = 26;
45 
46 const unsigned int ROT_EVEN_ALPHA = 29;
47 const unsigned int ROT_EVEN_BETA = 1;
48 const unsigned int ROT_ODD_ALPHA = 5;
49 const unsigned int ROT_ODD_BETA = 17;
50 
51 const unsigned int LSH_TYPE_256_256 = 0x0000020;
52 const unsigned int LSH_TYPE_256_224 = 0x000001C;
53 
54 // const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
55 // const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
56 
57 /* Error Code */
58 
59 const unsigned int LSH_SUCCESS = 0x0;
60 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
61 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
62 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
63 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
64 
65 /* Index into our state array */
66 
67 const unsigned int AlgorithmType = 80;
68 const unsigned int RemainingBits = 81;
69 
70 NAMESPACE_END
71 
72 NAMESPACE_BEGIN(CryptoPP)
73 NAMESPACE_BEGIN(LSH)
74 
75 // lsh256.cpp
76 extern const word32 LSH256_IV224[CV_WORD_LEN];
77 extern const word32 LSH256_IV256[CV_WORD_LEN];
78 extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
79 
80 NAMESPACE_END  // LSH
81 NAMESPACE_END  // Crypto++
82 
83 ANONYMOUS_NAMESPACE_BEGIN
84 
85 using CryptoPP::byte;
86 using CryptoPP::word32;
87 using CryptoPP::rotlFixed;
88 using CryptoPP::rotlConstant;
89 
90 using CryptoPP::GetBlock;
91 using CryptoPP::LittleEndian;
92 using CryptoPP::ConditionalByteReverse;
93 using CryptoPP::LITTLE_ENDIAN_ORDER;
94 
95 typedef byte lsh_u8;
96 typedef word32 lsh_u32;
97 typedef word32 lsh_uint;
98 typedef word32 lsh_err;
99 typedef word32 lsh_type;
100 
101 using CryptoPP::LSH::LSH256_IV224;
102 using CryptoPP::LSH::LSH256_IV256;
103 using CryptoPP::LSH::LSH256_StepConstants;
104 
105 struct LSH256_AVX2_Context
106 {
LSH256_AVX2_ContextLSH256_AVX2_Context107 	LSH256_AVX2_Context(word32* state, word32 algType, word32& remainingBitLength) :
108 		cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
109 		last_block(reinterpret_cast<byte*>(state+48)),
110 		remain_databitlen(remainingBitLength),
111 		alg_type(static_cast<lsh_type>(algType)) {}
112 
113 	lsh_u32* cv_l;  // start of our state block
114 	lsh_u32* cv_r;
115 	lsh_u32* sub_msgs;
116 	lsh_u8*  last_block;
117 	lsh_u32& remain_databitlen;
118 	lsh_type alg_type;
119 };
120 
121 struct LSH256_AVX2_Internal
122 {
LSH256_AVX2_InternalLSH256_AVX2_Internal123 	LSH256_AVX2_Internal(word32* state) :
124 		submsg_e_l(state+16), submsg_e_r(state+24),
125 		submsg_o_l(state+32), submsg_o_r(state+40) { }
126 
127 	lsh_u32* submsg_e_l; /* even left sub-message  */
128 	lsh_u32* submsg_e_r; /* even right sub-message */
129 	lsh_u32* submsg_o_l; /* odd left sub-message   */
130 	lsh_u32* submsg_o_r; /* odd right sub-message  */
131 };
132 
133 // Zero the upper 128 bits of all YMM registers on exit.
134 // It avoids AVX state transition penalties when saving state.
135 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735
136 // makes using zeroupper a little tricky.
137 
138 struct AVX_Cleanup
139 {
~AVX_CleanupAVX_Cleanup140 	~AVX_Cleanup() {
141 		_mm256_zeroupper();
142 	}
143 };
144 
145 // const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
146 
147 /* LSH AlgType Macro */
148 
LSH_IS_LSH512(lsh_uint val)149 inline bool LSH_IS_LSH512(lsh_uint val) {
150 	return (val & 0xf0000) == 0;
151 }
152 
LSH_GET_SMALL_HASHBIT(lsh_uint val)153 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
154 	return val >> 24;
155 }
156 
LSH_GET_HASHBYTE(lsh_uint val)157 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
158 	return val & 0xffff;
159 }
160 
LSH_GET_HASHBIT(lsh_uint val)161 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
162 	return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
163 }
164 
loadLE32(lsh_u32 v)165 inline lsh_u32 loadLE32(lsh_u32 v) {
166 	return ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v);
167 }
168 
ROTL(lsh_u32 x,lsh_u32 r)169 lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
170 	return rotlFixed(x, r);
171 }
172 
173 // Original code relied upon unaligned lsh_u32 buffer
load_msg_blk(LSH256_AVX2_Internal * i_state,const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])174 inline void load_msg_blk(LSH256_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
175 {
176 	CRYPTOPP_ASSERT(i_state != NULLPTR);
177 
178 	lsh_u32* submsg_e_l = i_state->submsg_e_l;
179 	lsh_u32* submsg_e_r = i_state->submsg_e_r;
180 	lsh_u32* submsg_o_l = i_state->submsg_o_l;
181 	lsh_u32* submsg_o_r = i_state->submsg_o_r;
182 
183 	_mm256_storeu_si256(M256_CAST(submsg_e_l+0),
184 		_mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
185 	_mm256_storeu_si256(M256_CAST(submsg_e_r+0),
186 		_mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
187 	_mm256_storeu_si256(M256_CAST(submsg_o_l+0),
188 		_mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
189 	_mm256_storeu_si256(M256_CAST(submsg_o_r+0),
190 		_mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
191 }
192 
msg_exp_even(LSH256_AVX2_Internal * i_state)193 inline void msg_exp_even(LSH256_AVX2_Internal* i_state)
194 {
195 	CRYPTOPP_ASSERT(i_state != NULLPTR);
196 
197 	lsh_u32* submsg_e_l = i_state->submsg_e_l;
198 	lsh_u32* submsg_e_r = i_state->submsg_e_r;
199 	lsh_u32* submsg_o_l = i_state->submsg_o_l;
200 	lsh_u32* submsg_o_r = i_state->submsg_o_r;
201 
202 	const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
203 		0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
204 
205 	_mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi32(
206 		_mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
207 		_mm256_shuffle_epi8(
208 			_mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)), mask)));
209 	_mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi32(
210 		_mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
211 		_mm256_shuffle_epi8(
212 			_mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)), mask)));
213 }
214 
msg_exp_odd(LSH256_AVX2_Internal * i_state)215 inline void msg_exp_odd(LSH256_AVX2_Internal* i_state)
216 {
217 	CRYPTOPP_ASSERT(i_state != NULLPTR);
218 
219 	lsh_u32* submsg_e_l = i_state->submsg_e_l;
220 	lsh_u32* submsg_e_r = i_state->submsg_e_r;
221 	lsh_u32* submsg_o_l = i_state->submsg_o_l;
222 	lsh_u32* submsg_o_r = i_state->submsg_o_r;
223 
224 	const __m256i mask = _mm256_set_epi32(0x1b1a1918, 0x17161514,
225 		0x13121110, 0x1f1e1d1c, 0x07060504, 0x03020100, 0x0b0a0908, 0x0f0e0d0c);
226 
227 	_mm256_storeu_si256(M256_CAST(submsg_o_l+0), _mm256_add_epi32(
228 		_mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
229 		_mm256_shuffle_epi8(
230 			_mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)), mask)));
231 	_mm256_storeu_si256(M256_CAST(submsg_o_r+0), _mm256_add_epi32(
232 		_mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
233 		_mm256_shuffle_epi8(
234 			_mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)), mask)));
235 }
236 
load_sc(const lsh_u32 ** p_const_v,size_t i)237 inline void load_sc(const lsh_u32** p_const_v, size_t i)
238 {
239 	CRYPTOPP_ASSERT(p_const_v != NULLPTR);
240 
241 	*p_const_v = &LSH256_StepConstants[i];
242 }
243 
msg_add_even(lsh_u32 cv_l[8],lsh_u32 cv_r[8],LSH256_AVX2_Internal * i_state)244 inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
245 {
246 	CRYPTOPP_ASSERT(i_state != NULLPTR);
247 
248 	lsh_u32* submsg_e_l = i_state->submsg_e_l;
249 	lsh_u32* submsg_e_r = i_state->submsg_e_r;
250 
251 	_mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_xor_si256(
252 		_mm256_loadu_si256(CONST_M256_CAST(cv_l+0)),
253 		_mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0))));
254 	_mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_xor_si256(
255 		_mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
256 		_mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0))));
257 }
258 
msg_add_odd(lsh_u32 cv_l[8],lsh_u32 cv_r[8],LSH256_AVX2_Internal * i_state)259 inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_AVX2_Internal* i_state)
260 {
261 	CRYPTOPP_ASSERT(i_state != NULLPTR);
262 
263 	lsh_u32* submsg_o_l = i_state->submsg_o_l;
264 	lsh_u32* submsg_o_r = i_state->submsg_o_r;
265 
266 	_mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
267 		_mm256_loadu_si256(CONST_M256_CAST(cv_l)),
268 		_mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
269 	_mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
270 		_mm256_loadu_si256(CONST_M256_CAST(cv_r)),
271 		_mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
272 }
273 
add_blk(lsh_u32 cv_l[8],lsh_u32 cv_r[8])274 inline void add_blk(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
275 {
276 	_mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi32(
277 		_mm256_loadu_si256(CONST_M256_CAST(cv_l)),
278 		_mm256_loadu_si256(CONST_M256_CAST(cv_r))));
279 }
280 
281 template <unsigned int R>
rotate_blk(lsh_u32 cv[8])282 inline void rotate_blk(lsh_u32 cv[8])
283 {
284 	_mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
285 		_mm256_slli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
286 		_mm256_srli_epi32(_mm256_loadu_si256(CONST_M256_CAST(cv)), 32-R)));
287 }
288 
xor_with_const(lsh_u32 cv_l[8],const lsh_u32 const_v[8])289 inline void xor_with_const(lsh_u32 cv_l[8], const lsh_u32 const_v[8])
290 {
291 	_mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
292 		_mm256_loadu_si256(CONST_M256_CAST(cv_l)),
293 		_mm256_loadu_si256(CONST_M256_CAST(const_v))));
294 }
295 
rotate_msg_gamma(lsh_u32 cv_r[8])296 inline void rotate_msg_gamma(lsh_u32 cv_r[8])
297 {
298 	// g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
299 	_mm256_storeu_si256(M256_CAST(cv_r+0),
300 		_mm256_shuffle_epi8(_mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
301 			_mm256_set_epi8(
302 				/* hi lane */ 15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1,
303 				/* lo lane */ 12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
304 }
305 
word_perm(lsh_u32 cv_l[8],lsh_u32 cv_r[8])306 inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
307 {
308 	__m256i temp = _mm256_shuffle_epi32(
309 		_mm256_loadu_si256(CONST_M256_CAST(cv_l)), _MM_SHUFFLE(3,1,0,2));
310 	_mm256_storeu_si256(M256_CAST(cv_r),
311 		_mm256_shuffle_epi32(
312 			_mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(1,2,3,0)));
313 	_mm256_storeu_si256(M256_CAST(cv_l),
314 		_mm256_permute2x128_si256(temp,
315 			_mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,3,0,1)));
316 	_mm256_storeu_si256(M256_CAST(cv_r),
317 		_mm256_permute2x128_si256(temp,
318 			_mm256_loadu_si256(CONST_M256_CAST(cv_r)), _MM_SHUFFLE(0,2,0,0)));
319 };
320 
321 /* -------------------------------------------------------- *
322 * step function
323 * -------------------------------------------------------- */
324 
325 template <unsigned int Alpha, unsigned int Beta>
mix(lsh_u32 cv_l[8],lsh_u32 cv_r[8],const lsh_u32 const_v[8])326 inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
327 {
328 	add_blk(cv_l, cv_r);
329 	rotate_blk<Alpha>(cv_l);
330 	xor_with_const(cv_l, const_v);
331 	add_blk(cv_r, cv_l);
332 	rotate_blk<Beta>(cv_r);
333 	add_blk(cv_l, cv_r);
334 	rotate_msg_gamma(cv_r);
335 }
336 
337 /* -------------------------------------------------------- *
338 * compression function
339 * -------------------------------------------------------- */
340 
compress(LSH256_AVX2_Context * ctx,const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])341 inline void compress(LSH256_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
342 {
343 	CRYPTOPP_ASSERT(ctx != NULLPTR);
344 
345 	LSH256_AVX2_Internal  s_state(ctx->cv_l);
346 	LSH256_AVX2_Internal* i_state = &s_state;
347 
348 	const lsh_u32* const_v = NULL;
349 	lsh_u32* cv_l = ctx->cv_l;
350 	lsh_u32* cv_r = ctx->cv_r;
351 
352 	load_msg_blk(i_state, pdMsgBlk);
353 
354 	msg_add_even(cv_l, cv_r, i_state);
355 	load_sc(&const_v, 0);
356 	mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
357 	word_perm(cv_l, cv_r);
358 
359 	msg_add_odd(cv_l, cv_r, i_state);
360 	load_sc(&const_v, 8);
361 	mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
362 	word_perm(cv_l, cv_r);
363 
364 	for (size_t i = 1; i < NUM_STEPS / 2; i++)
365 	{
366 		msg_exp_even(i_state);
367 		msg_add_even(cv_l, cv_r, i_state);
368 		load_sc(&const_v, 16 * i);
369 		mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
370 		word_perm(cv_l, cv_r);
371 
372 		msg_exp_odd(i_state);
373 		msg_add_odd(cv_l, cv_r, i_state);
374 		load_sc(&const_v, 16 * i + 8);
375 		mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
376 		word_perm(cv_l, cv_r);
377 	}
378 
379 	msg_exp_even(i_state);
380 	msg_add_even(cv_l, cv_r, i_state);
381 }
382 
383 /* -------------------------------------------------------- */
384 
load_iv(word32 cv_l[8],word32 cv_r[8],const word32 iv[16])385 inline void load_iv(word32 cv_l[8], word32 cv_r[8], const word32 iv[16])
386 {
387 	// The IV's are 32-byte aligned so we can use aligned loads.
388 	_mm256_storeu_si256(M256_CAST(cv_l+0),
389 		_mm256_load_si256(CONST_M256_CAST(iv+0)));
390 	_mm256_storeu_si256(M256_CAST(cv_r+0),
391 		_mm256_load_si256(CONST_M256_CAST(iv+8)));
392 }
393 
zero_iv(lsh_u32 cv_l[8],lsh_u32 cv_r[8])394 inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
395 {
396 	_mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
397 	_mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
398 }
399 
zero_submsgs(LSH256_AVX2_Context * ctx)400 inline void zero_submsgs(LSH256_AVX2_Context* ctx)
401 {
402 	lsh_u32* sub_msgs = ctx->sub_msgs;
403 
404 	_mm256_storeu_si256(M256_CAST(sub_msgs+ 0), _mm256_setzero_si256());
405 	_mm256_storeu_si256(M256_CAST(sub_msgs+ 8), _mm256_setzero_si256());
406 	_mm256_storeu_si256(M256_CAST(sub_msgs+16), _mm256_setzero_si256());
407 	_mm256_storeu_si256(M256_CAST(sub_msgs+24), _mm256_setzero_si256());
408 }
409 
init224(LSH256_AVX2_Context * ctx)410 inline void init224(LSH256_AVX2_Context* ctx)
411 {
412 	CRYPTOPP_ASSERT(ctx != NULLPTR);
413 
414 	zero_submsgs(ctx);
415 	load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
416 }
417 
init256(LSH256_AVX2_Context * ctx)418 inline void init256(LSH256_AVX2_Context* ctx)
419 {
420 	CRYPTOPP_ASSERT(ctx != NULLPTR);
421 
422 	zero_submsgs(ctx);
423 	load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
424 }
425 
426 /* -------------------------------------------------------- */
427 
fin(LSH256_AVX2_Context * ctx)428 inline void fin(LSH256_AVX2_Context* ctx)
429 {
430 	CRYPTOPP_ASSERT(ctx != NULLPTR);
431 
432 	_mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
433 		_mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
434 		_mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
435 }
436 
437 /* -------------------------------------------------------- */
438 
get_hash(LSH256_AVX2_Context * ctx,lsh_u8 * pbHashVal)439 inline void get_hash(LSH256_AVX2_Context* ctx, lsh_u8* pbHashVal)
440 {
441 	CRYPTOPP_ASSERT(ctx != NULLPTR);
442 	CRYPTOPP_ASSERT(ctx->alg_type != 0);
443 	CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
444 
445 	lsh_uint alg_type = ctx->alg_type;
446 	lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
447 	lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
448 
449 	// Multiplying by looks odd...
450 	memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
451 	if (hash_val_bit_len){
452 		pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
453 	}
454 }
455 
456 /* -------------------------------------------------------- */
457 
lsh256_init_avx2(LSH256_AVX2_Context * ctx)458 lsh_err lsh256_init_avx2(LSH256_AVX2_Context* ctx)
459 {
460 	CRYPTOPP_ASSERT(ctx != NULLPTR);
461 	CRYPTOPP_ASSERT(ctx->alg_type != 0);
462 
463 	lsh_u32 alg_type = ctx->alg_type;
464 	const lsh_u32* const_v = NULL;
465 	ctx->remain_databitlen = 0;
466 
467 	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
468 	AVX_Cleanup cleanup;
469 
470 	switch (alg_type)
471 	{
472 	case LSH_TYPE_256_256:
473 		init256(ctx);
474 		return LSH_SUCCESS;
475 	case LSH_TYPE_256_224:
476 		init224(ctx);
477 		return LSH_SUCCESS;
478 	default:
479 		break;
480 	}
481 
482 	lsh_u32* cv_l = ctx->cv_l;
483 	lsh_u32* cv_r = ctx->cv_r;
484 
485 	zero_iv(cv_l, cv_r);
486 	cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
487 	cv_l[1] = LSH_GET_HASHBIT(alg_type);
488 
489 	for (size_t i = 0; i < NUM_STEPS / 2; i++)
490 	{
491 		//Mix
492 		load_sc(&const_v, i * 16);
493 		mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
494 		word_perm(cv_l, cv_r);
495 
496 		load_sc(&const_v, i * 16 + 8);
497 		mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
498 		word_perm(cv_l, cv_r);
499 	}
500 
501 	return LSH_SUCCESS;
502 }
503 
lsh256_update_avx2(LSH256_AVX2_Context * ctx,const lsh_u8 * data,size_t databitlen)504 lsh_err lsh256_update_avx2(LSH256_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen)
505 {
506 	CRYPTOPP_ASSERT(ctx != NULLPTR);
507 	CRYPTOPP_ASSERT(data != NULLPTR);
508 	CRYPTOPP_ASSERT(databitlen % 8 == 0);
509 	CRYPTOPP_ASSERT(ctx->alg_type != 0);
510 
511 	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
512 	AVX_Cleanup cleanup;
513 
514 	if (databitlen == 0){
515 		return LSH_SUCCESS;
516 	}
517 
518 	// We are byte oriented. tail bits will always be 0.
519 	size_t databytelen = databitlen >> 3;
520 	// lsh_uint pos2 = databitlen & 0x7;
521 	const size_t pos2 = 0;
522 
523 	size_t remain_msg_byte = ctx->remain_databitlen >> 3;
524 	// lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
525 	const size_t remain_msg_bit = 0;
526 
527 	if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
528 		return LSH_ERR_INVALID_STATE;
529 	}
530 	if (remain_msg_bit > 0){
531 		return LSH_ERR_INVALID_DATABITLEN;
532 	}
533 
534 	if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
535 	{
536 		memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
537 		ctx->remain_databitlen += (lsh_uint)databitlen;
538 		remain_msg_byte += (lsh_uint)databytelen;
539 		if (pos2){
540 			ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
541 		}
542 		return LSH_SUCCESS;
543 	}
544 
545 	if (remain_msg_byte > 0){
546 		size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
547 		memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
548 		compress(ctx, ctx->last_block);
549 		data += more_byte;
550 		databytelen -= more_byte;
551 		remain_msg_byte = 0;
552 		ctx->remain_databitlen = 0;
553 	}
554 
555 	while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
556 	{
557 		// This call to compress caused some trouble.
558 		// The data pointer can become unaligned in the
559 		// previous block.
560 		compress(ctx, data);
561 		data += LSH256_MSG_BLK_BYTE_LEN;
562 		databytelen -= LSH256_MSG_BLK_BYTE_LEN;
563 	}
564 
565 	if (databytelen > 0){
566 		memcpy(ctx->last_block, data, databytelen);
567 		ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
568 	}
569 
570 	if (pos2){
571 		ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
572 		ctx->remain_databitlen += pos2;
573 	}
574 
575 	return LSH_SUCCESS;
576 }
577 
lsh256_final_avx2(LSH256_AVX2_Context * ctx,lsh_u8 * hashval)578 lsh_err lsh256_final_avx2(LSH256_AVX2_Context* ctx, lsh_u8* hashval)
579 {
580 	CRYPTOPP_ASSERT(ctx != NULLPTR);
581 	CRYPTOPP_ASSERT(hashval != NULLPTR);
582 
583 	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
584 	AVX_Cleanup cleanup;
585 
586 	// We are byte oriented. tail bits will always be 0.
587 	size_t remain_msg_byte = ctx->remain_databitlen >> 3;
588 	// lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
589 	const size_t remain_msg_bit = 0;
590 
591 	if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
592 		return LSH_ERR_INVALID_STATE;
593 	}
594 
595 	if (remain_msg_bit){
596 		ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
597 	}
598 	else{
599 		ctx->last_block[remain_msg_byte] = 0x80;
600 	}
601 	memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
602 
603 	compress(ctx, ctx->last_block);
604 
605 	fin(ctx);
606 	get_hash(ctx, hashval);
607 
608 	return LSH_SUCCESS;
609 }
610 
611 ANONYMOUS_NAMESPACE_END
612 
NAMESPACE_BEGIN(CryptoPP)613 NAMESPACE_BEGIN(CryptoPP)
614 
615 extern
616 void LSH256_Base_Restart_AVX2(word32* state)
617 {
618 	state[RemainingBits] = 0;
619 	LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
620 	lsh_err err = lsh256_init_avx2(&ctx);
621 
622 	if (err != LSH_SUCCESS)
623 		throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_init_avx2 failed");
624 }
625 
626 extern
LSH256_Base_Update_AVX2(word32 * state,const byte * input,size_t size)627 void LSH256_Base_Update_AVX2(word32* state, const byte *input, size_t size)
628 {
629 	LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
630 	lsh_err err = lsh256_update_avx2(&ctx, input, 8*size);
631 
632 	if (err != LSH_SUCCESS)
633 		throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_update_avx2 failed");
634 }
635 
636 extern
LSH256_Base_TruncatedFinal_AVX2(word32 * state,byte * hash,size_t)637 void LSH256_Base_TruncatedFinal_AVX2(word32* state, byte *hash, size_t)
638 {
639 	LSH256_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
640 	lsh_err err = lsh256_final_avx2(&ctx, hash);
641 
642 	if (err != LSH_SUCCESS)
643 		throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_final_avx2 failed");
644 }
645 
646 NAMESPACE_END
647 
648 #endif  // CRYPTOPP_AVX2_AVAILABLE
649