1 // tiger.cpp - originally written and placed in the public domain by Wei Dai
2 
3 #include "pch.h"
4 #include "config.h"
5 
6 #include "tiger.h"
7 #include "misc.h"
8 #include "cpu.h"
9 
10 #if defined(CRYPTOPP_DISABLE_TIGER_ASM)
11 # undef CRYPTOPP_X86_ASM_AVAILABLE
12 # undef CRYPTOPP_X32_ASM_AVAILABLE
13 # undef CRYPTOPP_X64_ASM_AVAILABLE
14 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
15 #endif
16 
NAMESPACE_BEGIN(CryptoPP)17 NAMESPACE_BEGIN(CryptoPP)
18 
19 std::string Tiger::AlgorithmProvider() const
20 {
21 #ifndef CRYPTOPP_DISABLE_TIGER_ASM
22 # if CRYPTOPP_SSE2_ASM_AVAILABLE
23 	if (HasSSE2())
24 		return "SSE2";
25 # endif
26 #endif
27 	return "C++";
28 }
29 
InitState(HashWordType * state)30 void Tiger::InitState(HashWordType *state)
31 {
32 	state[0] = W64LIT(0x0123456789ABCDEF);
33 	state[1] = W64LIT(0xFEDCBA9876543210);
34 	state[2] = W64LIT(0xF096A5B4C3B2E187);
35 }
36 
TruncatedFinal(byte * digest,size_t digestSize)37 void Tiger::TruncatedFinal(byte *digest, size_t digestSize)
38 {
39 	CRYPTOPP_ASSERT(digest != NULLPTR);
40 	ThrowIfInvalidTruncatedSize(digestSize);
41 
42 	PadLastBlock(56, 0x01);
43 	CorrectEndianess(m_data, m_data, 56);
44 
45 	m_data[7] = GetBitCountLo();
46 
47 	Transform(m_state, m_data);
48 	CorrectEndianess(m_state, m_state, DigestSize());
49 	memcpy(digest, m_state, digestSize);
50 
51 	Restart();		// reinit for next use
52 }
53 
Transform(word64 * state,const word64 * data)54 void Tiger::Transform (word64 *state, const word64 *data)
55 {
56 #if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
57 	if (HasSSE2())
58 	{
59 #ifdef __GNUC__
60 		__asm__ __volatile__
61 		(
62 		INTEL_NOPREFIX
63 		AS_PUSH_IF86(bx)
64 #else
65 		AS2(	lea		edx, [table])
66 		AS2(	mov		eax, state)
67 		AS2(	mov		esi, data)
68 #endif
69 		AS2(	movq	mm0, [eax])
70 		AS2(	movq	mm1, [eax+1*8])
71 		AS2(	movq	mm5, mm1)
72 		AS2(	movq	mm2, [eax+2*8])
73 		AS2(	movq	mm7, [edx+4*2048+0*8])
74 		AS2(	movq	mm6, [edx+4*2048+1*8])
75 		AS2(	mov		ecx, esp)
76 		AS2(	and		esp, 0xfffffff0)
77 		AS2(	sub		esp, 8*8)
78 		AS_PUSH_IF86(cx)
79 
80 #define SSE2_round(a,b,c,x,mul) \
81 		AS2(	pxor	c, [x])\
82 		AS2(	movd	ecx, c)\
83 		AS2(	movzx	edi, cl)\
84 		AS2(	movq	mm3, [edx+0*2048+edi*8])\
85 		AS2(	movzx	edi, ch)\
86 		AS2(	movq	mm4, [edx+3*2048+edi*8])\
87 		AS2(	shr		ecx, 16)\
88 		AS2(	movzx	edi, cl)\
89 		AS2(	pxor	mm3, [edx+1*2048+edi*8])\
90 		AS2(	movzx	edi, ch)\
91 		AS2(	pxor	mm4, [edx+2*2048+edi*8])\
92 		AS3(	pextrw	ecx, c, 2)\
93 		AS2(	movzx	edi, cl)\
94 		AS2(	pxor	mm3, [edx+2*2048+edi*8])\
95 		AS2(	movzx	edi, ch)\
96 		AS2(	pxor	mm4, [edx+1*2048+edi*8])\
97 		AS3(	pextrw	ecx, c, 3)\
98 		AS2(	movzx	edi, cl)\
99 		AS2(	pxor	mm3, [edx+3*2048+edi*8])\
100 		AS2(	psubq	a, mm3)\
101 		AS2(	movzx	edi, ch)\
102 		AS2(	pxor	mm4, [edx+0*2048+edi*8])\
103 		AS2(	paddq	b, mm4)\
104 		SSE2_mul_##mul(b)
105 
106 #define SSE2_mul_5(b)	\
107 		AS2(	movq	mm3, b)\
108 		AS2(	psllq	b, 2)\
109 		AS2(	paddq	b, mm3)
110 
111 #define SSE2_mul_7(b)	\
112 		AS2(	movq	mm3, b)\
113 		AS2(	psllq	b, 3)\
114 		AS2(	psubq	b, mm3)
115 
116 #define SSE2_mul_9(b)	\
117 		AS2(	movq	mm3, b)\
118 		AS2(	psllq	b, 3)\
119 		AS2(	paddq	b, mm3)
120 
121 #define label2_5 1
122 #define label2_7 2
123 #define label2_9 3
124 
125 #define SSE2_pass(A,B,C,mul,X)	\
126 		AS2(	xor		ebx, ebx)\
127 		ASL(mul)\
128 		SSE2_round(A,B,C,X+0*8+ebx,mul)\
129 		SSE2_round(B,C,A,X+1*8+ebx,mul)\
130 		AS2(	cmp		ebx, 6*8)\
131 		ASJ(	je,		label2_##mul, f)\
132 		SSE2_round(C,A,B,X+2*8+ebx,mul)\
133 		AS2(	add		ebx, 3*8)\
134 		ASJ(	jmp,	mul, b)\
135 		ASL(label2_##mul)
136 
137 #define SSE2_key_schedule(Y,X) \
138 		AS2(	movq	mm3, [X+7*8])\
139 		AS2(	pxor	mm3, mm6)\
140 		AS2(	movq	mm4, [X+0*8])\
141 		AS2(	psubq	mm4, mm3)\
142 		AS2(	movq	[Y+0*8], mm4)\
143 		AS2(	pxor	mm4, [X+1*8])\
144 		AS2(	movq	mm3, mm4)\
145 		AS2(	movq	[Y+1*8], mm4)\
146 		AS2(	paddq	mm4, [X+2*8])\
147 		AS2(	pxor	mm3, mm7)\
148 		AS2(	psllq	mm3, 19)\
149 		AS2(	movq	[Y+2*8], mm4)\
150 		AS2(	pxor	mm3, mm4)\
151 		AS2(	movq	mm4, [X+3*8])\
152 		AS2(	psubq	mm4, mm3)\
153 		AS2(	movq	[Y+3*8], mm4)\
154 		AS2(	pxor	mm4, [X+4*8])\
155 		AS2(	movq	mm3, mm4)\
156 		AS2(	movq	[Y+4*8], mm4)\
157 		AS2(	paddq	mm4, [X+5*8])\
158 		AS2(	pxor	mm3, mm7)\
159 		AS2(	psrlq	mm3, 23)\
160 		AS2(	movq	[Y+5*8], mm4)\
161 		AS2(	pxor	mm3, mm4)\
162 		AS2(	movq	mm4, [X+6*8])\
163 		AS2(	psubq	mm4, mm3)\
164 		AS2(	movq	[Y+6*8], mm4)\
165 		AS2(	pxor	mm4, [X+7*8])\
166 		AS2(	movq	mm3, mm4)\
167 		AS2(	movq	[Y+7*8], mm4)\
168 		AS2(	paddq	mm4, [Y+0*8])\
169 		AS2(	pxor	mm3, mm7)\
170 		AS2(	psllq	mm3, 19)\
171 		AS2(	movq	[Y+0*8], mm4)\
172 		AS2(	pxor	mm3, mm4)\
173 		AS2(	movq	mm4, [Y+1*8])\
174 		AS2(	psubq	mm4, mm3)\
175 		AS2(	movq	[Y+1*8], mm4)\
176 		AS2(	pxor	mm4, [Y+2*8])\
177 		AS2(	movq	mm3, mm4)\
178 		AS2(	movq	[Y+2*8], mm4)\
179 		AS2(	paddq	mm4, [Y+3*8])\
180 		AS2(	pxor	mm3, mm7)\
181 		AS2(	psrlq	mm3, 23)\
182 		AS2(	movq	[Y+3*8], mm4)\
183 		AS2(	pxor	mm3, mm4)\
184 		AS2(	movq	mm4, [Y+4*8])\
185 		AS2(	psubq	mm4, mm3)\
186 		AS2(	movq	[Y+4*8], mm4)\
187 		AS2(	pxor	mm4, [Y+5*8])\
188 		AS2(	movq	[Y+5*8], mm4)\
189 		AS2(	paddq	mm4, [Y+6*8])\
190 		AS2(	movq	[Y+6*8], mm4)\
191 		AS2(	pxor	mm4, [edx+4*2048+2*8])\
192 		AS2(	movq	mm3, [Y+7*8])\
193 		AS2(	psubq	mm3, mm4)\
194 		AS2(	movq	[Y+7*8], mm3)
195 
196 		SSE2_pass(mm0, mm1, mm2, 5, esi)
197 		SSE2_key_schedule(esp+4, esi)
198 		SSE2_pass(mm2, mm0, mm1, 7, esp+4)
199 		SSE2_key_schedule(esp+4, esp+4)
200 		SSE2_pass(mm1, mm2, mm0, 9, esp+4)
201 
202 		AS2(	pxor	mm0, [eax+0*8])
203 		AS2(	movq	[eax+0*8], mm0)
204 		AS2(	psubq	mm1, mm5)
205 		AS2(	movq	[eax+1*8], mm1)
206 		AS2(	paddq	mm2, [eax+2*8])
207 		AS2(	movq	[eax+2*8], mm2)
208 
209 		AS_POP_IF86(sp)
210 		AS1(	emms)
211 
212 #ifdef __GNUC__
213 		AS_POP_IF86(bx)
214 		ATT_PREFIX
215 			:
216 			: "a" (state), "S" (data), "d" (table)
217 			: "%ecx", "%edi", "memory", "cc"
218 		);
219 #endif
220 	}
221 	else
222 #endif
223 	{
224 		word64 a = state[0];
225 		word64 b = state[1];
226 		word64 c = state[2];
227 		word64 Y[8];
228 
229 #define t1 (table)
230 #define t2 (table+256)
231 #define t3 (table+256*2)
232 #define t4 (table+256*3)
233 
234 #define round(a,b,c,x,mul) \
235 	c ^= x; \
236 	a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
237 	b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
238 	b *= mul
239 
240 #define pass(a,b,c,mul,X) {\
241 	int i=0;\
242 	while (true)\
243 	{\
244 		round(a,b,c,X[i+0],mul); \
245 		round(b,c,a,X[i+1],mul); \
246 		if (i==6)\
247 			break;\
248 		round(c,a,b,X[i+2],mul); \
249 		i+=3;\
250 	}}
251 
252 #define key_schedule(Y,X) \
253 	Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
254 	Y[1] = X[1] ^ Y[0]; \
255 	Y[2] = X[2] + Y[1]; \
256 	Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
257 	Y[4] = X[4] ^ Y[3]; \
258 	Y[5] = X[5] + Y[4]; \
259 	Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
260 	Y[7] = X[7] ^ Y[6]; \
261 	Y[0] += Y[7]; \
262 	Y[1] -= Y[0] ^ ((~Y[7])<<19); \
263 	Y[2] ^= Y[1]; \
264 	Y[3] += Y[2]; \
265 	Y[4] -= Y[3] ^ ((~Y[2])>>23); \
266 	Y[5] ^= Y[4]; \
267 	Y[6] += Y[5]; \
268 	Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
269 
270 		pass(a,b,c,5,data);
271 		key_schedule(Y,data);
272 		pass(c,a,b,7,Y);
273 		key_schedule(Y,Y);
274 		pass(b,c,a,9,Y);
275 
276 		state[0] = a ^ state[0];
277 		state[1] = b - state[1];
278 		state[2] = c + state[2];
279 	}
280 }
281 
282 NAMESPACE_END
283