1 // panama.cpp - written and placed in the public domain by Wei Dai
2 
3 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM panama.cpp" to generate MASM code
4 
5 #include "pch.h"
6 
7 #ifndef CRYPTOPP_GENERATE_X64_MASM
8 
9 #include "panama.h"
10 #include "secblock.h"
11 #include "misc.h"
12 #include "cpu.h"
13 
NAMESPACE_BEGIN(CryptoPP)14 NAMESPACE_BEGIN(CryptoPP)
15 
16 #if CRYPTOPP_MSC_VERSION
17 # pragma warning(disable: 4731)
18 #endif
19 
20 template <class B>
21 void Panama<B>::Reset()
22 {
23 	memset(m_state, 0, m_state.SizeInBytes());
24 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
25 	m_state[17] = HasSSSE3();
26 #endif
27 }
28 
29 #endif	// #ifndef CRYPTOPP_GENERATE_X64_MASM
30 
31 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
32 extern "C" {
33 void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y);
34 }
35 #elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
36 
37 #ifdef CRYPTOPP_GENERATE_X64_MASM
38 	Panama_SSE2_Pull	PROC FRAME
39 	rex_push_reg rdi
40 	alloc_stack(2*16)
41 	save_xmm128 xmm6, 0h
42 	save_xmm128 xmm7, 10h
43 	.endprolog
44 #else
45 void CRYPTOPP_NOINLINE Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y)
46 {
47 #if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
48 	asm __volatile__
49 	(
50 	INTEL_NOPREFIX
51 	AS_PUSH_IF86(	bx)
52 #else
53 	AS2(	mov		AS_REG_1, count)
54 	AS2(	mov		AS_REG_2, state)
55 	AS2(	mov		AS_REG_3, z)
56 	AS2(	mov		AS_REG_4, y)
57 #endif
58 #endif	// #ifdef CRYPTOPP_GENERATE_X64_MASM
59 
60 #if CRYPTOPP_BOOL_X32
61 	#define REG_loopEnd			r8d
62 #elif CRYPTOPP_BOOL_X86
63 	#define REG_loopEnd			[esp]
64 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
65 	#define REG_loopEnd			rdi
66 #else
67 	#define REG_loopEnd			r8
68 #endif
69 
70 	AS2(	shl		AS_REG_1, 5)
71 	ASJ(	jz,		5, f)
72 	AS2(	mov		AS_REG_6d, [AS_REG_2+4*17])
73 	AS2(	add		AS_REG_1, AS_REG_6)
74 
75 	#if CRYPTOPP_BOOL_X64
76 		AS2(	mov		REG_loopEnd, AS_REG_1)
77 	#else
78 		AS_PUSH_IF86(	bp)
79 		// AS1(	push	AS_REG_1) // AS_REG_1 is defined as ecx uner X86 and X32 (see cpu.h)
80 		AS_PUSH_IF86(	cx)
81 	#endif
82 
83 	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_2+0*16])
84 	AS2(	movdqa	xmm1, XMMWORD_PTR [AS_REG_2+1*16])
85 	AS2(	movdqa	xmm2, XMMWORD_PTR [AS_REG_2+2*16])
86 	AS2(	movdqa	xmm3, XMMWORD_PTR [AS_REG_2+3*16])
87 	AS2(	mov		eax, dword ptr [AS_REG_2+4*16])
88 
89 	ASL(4)
90 	// gamma and pi
91 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
92 	AS2(	test	AS_REG_6, 1)
93 	ASJ(	jnz,	6, f)
94 #endif
95 	AS2(	movdqa	xmm6, xmm2)
96 	AS2(	movss	xmm6, xmm3)
97 	ASS(	pshufd	xmm5, xmm6, 0, 3, 2, 1)
98 	AS2(	movd	xmm6, eax)
99 	AS2(	movdqa	xmm7, xmm3)
100 	AS2(	movss	xmm7, xmm6)
101 	ASS(	pshufd	xmm6, xmm7, 0, 3, 2, 1)
102 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
103 	ASJ(	jmp,	7, f)
104 	ASL(6)
105 	AS2(	movdqa	xmm5, xmm3)
106 	AS3(	palignr	xmm5, xmm2, 4)
107 	AS2(	movd	xmm6, eax)
108 	AS3(	palignr	xmm6, xmm3, 4)
109 	ASL(7)
110 #endif
111 
112 	AS2(	movd	AS_REG_1d, xmm2)
113 	AS1(	not		AS_REG_1d)
114 	AS2(	movd	AS_REG_7d, xmm3)
115 	AS2(	or		AS_REG_1d, AS_REG_7d)
116 	AS2(	xor		eax, AS_REG_1d)
117 
118 #define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
119 
120 #define pi(i)	\
121 	AS2(	movd	AS_REG_1d, xmm7)\
122 	AS2(	rol		AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
123 	AS2(	mov		[AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
124 
125 #define pi4(x, y, z, a, b, c, d)	\
126 	AS2(	pcmpeqb	xmm7, xmm7)\
127 	AS2(	pxor	xmm7, x)\
128 	AS2(	por		xmm7, y)\
129 	AS2(	pxor	xmm7, z)\
130 	pi(a)\
131 	ASS(	pshuflw	xmm7, xmm7, 1, 0, 3, 2)\
132 	pi(b)\
133 	AS2(	punpckhqdq	xmm7, xmm7)\
134 	pi(c)\
135 	ASS(	pshuflw	xmm7, xmm7, 1, 0, 3, 2)\
136 	pi(d)
137 
138 	pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
139 	pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
140 	pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
141 	pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
142 
143 	// output keystream and update buffer here to hide partial memory stalls between pi and theta
144 	AS2(	movdqa	xmm4, xmm3)
145 	AS2(	punpcklqdq	xmm3, xmm2)		// 1 5 2 6
146 	AS2(	punpckhdq	xmm4, xmm2)		// 9 10 13 14
147 	AS2(	movdqa	xmm2, xmm1)
148 	AS2(	punpcklqdq	xmm1, xmm0)		// 3 7 4 8
149 	AS2(	punpckhdq	xmm2, xmm0)		// 11 12 15 16
150 
151 	// keystream
152 	AS2(	test	AS_REG_3, AS_REG_3)
153 	ASJ(	jz,		0, f)
154 	AS2(	movdqa	xmm6, xmm4)
155 	AS2(	punpcklqdq	xmm4, xmm2)
156 	AS2(	punpckhqdq	xmm6, xmm2)
157 	AS2(	test	AS_REG_4, 15)
158 	ASJ(	jnz,	2, f)
159 	AS2(	test	AS_REG_4, AS_REG_4)
160 	ASJ(	jz,		1, f)
161 	AS2(	pxor	xmm4, [AS_REG_4])
162 	AS2(	pxor	xmm6, [AS_REG_4+16])
163 	AS2(	add		AS_REG_4, 32)
164 	ASJ(	jmp,	1, f)
165 	ASL(2)
166 	AS2(	movdqu	xmm0, [AS_REG_4])
167 	AS2(	movdqu	xmm2, [AS_REG_4+16])
168 	AS2(	pxor	xmm4, xmm0)
169 	AS2(	pxor	xmm6, xmm2)
170 	AS2(	add		AS_REG_4, 32)
171 	ASL(1)
172 	AS2(	test	AS_REG_3, 15)
173 	ASJ(	jnz,	3, f)
174 	AS2(	movdqa	XMMWORD_PTR [AS_REG_3], xmm4)
175 	AS2(	movdqa	XMMWORD_PTR [AS_REG_3+16], xmm6)
176 	AS2(	add		AS_REG_3, 32)
177 	ASJ(	jmp,	0, f)
178 	ASL(3)
179 	AS2(	movdqu	XMMWORD_PTR [AS_REG_3], xmm4)
180 	AS2(	movdqu	XMMWORD_PTR [AS_REG_3+16], xmm6)
181 	AS2(	add		AS_REG_3, 32)
182 	ASL(0)
183 
184 	// buffer update
185 	AS2(	lea		AS_REG_1, [AS_REG_6 + 32])
186 	AS2(	and		AS_REG_1, 31*32)
187 	AS2(	lea		AS_REG_7, [AS_REG_6 + (32-24)*32])
188 	AS2(	and		AS_REG_7, 31*32)
189 
190 	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
191 	AS2(	pxor	xmm3, xmm0)
192 	ASS(	pshufd	xmm0, xmm0, 2, 3, 0, 1)
193 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
194 	AS2(	pxor	xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
195 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
196 
197 	AS2(	movdqa	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
198 	AS2(	pxor	xmm1, xmm4)
199 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
200 	AS2(	pxor	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
201 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
202 
203 	// theta
204 	AS2(	movdqa	xmm3, XMMWORD_PTR [AS_REG_2+3*16])
205 	AS2(	movdqa	xmm2, XMMWORD_PTR [AS_REG_2+2*16])
206 	AS2(	movdqa	xmm1, XMMWORD_PTR [AS_REG_2+1*16])
207 	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_2+0*16])
208 
209 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
210 	AS2(	test	AS_REG_6, 1)
211 	ASJ(	jnz,	8, f)
212 #endif
213 	AS2(	movd	xmm6, eax)
214 	AS2(	movdqa	xmm7, xmm3)
215 	AS2(	movss	xmm7, xmm6)
216 	AS2(	movdqa	xmm6, xmm2)
217 	AS2(	movss	xmm6, xmm3)
218 	AS2(	movdqa	xmm5, xmm1)
219 	AS2(	movss	xmm5, xmm2)
220 	AS2(	movdqa	xmm4, xmm0)
221 	AS2(	movss	xmm4, xmm1)
222 	ASS(	pshufd	xmm7, xmm7, 0, 3, 2, 1)
223 	ASS(	pshufd	xmm6, xmm6, 0, 3, 2, 1)
224 	ASS(	pshufd	xmm5, xmm5, 0, 3, 2, 1)
225 	ASS(	pshufd	xmm4, xmm4, 0, 3, 2, 1)
226 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
227 	ASJ(	jmp,	9, f)
228 	ASL(8)
229 	AS2(	movd	xmm7, eax)
230 	AS3(	palignr	xmm7, xmm3, 4)
231 	AS2(	movq	xmm6, xmm3)
232 	AS3(	palignr	xmm6, xmm2, 4)
233 	AS2(	movq	xmm5, xmm2)
234 	AS3(	palignr	xmm5, xmm1, 4)
235 	AS2(	movq	xmm4, xmm1)
236 	AS3(	palignr	xmm4, xmm0, 4)
237 	ASL(9)
238 #endif
239 
240 	AS2(	xor		eax, 1)
241 	AS2(	movd	AS_REG_1d, xmm0)
242 	AS2(	xor		eax, AS_REG_1d)
243 	AS2(	movd	AS_REG_1d, xmm3)
244 	AS2(	xor		eax, AS_REG_1d)
245 
246 	AS2(	pxor	xmm3, xmm2)
247 	AS2(	pxor	xmm2, xmm1)
248 	AS2(	pxor	xmm1, xmm0)
249 	AS2(	pxor	xmm0, xmm7)
250 	AS2(	pxor	xmm3, xmm7)
251 	AS2(	pxor	xmm2, xmm6)
252 	AS2(	pxor	xmm1, xmm5)
253 	AS2(	pxor	xmm0, xmm4)
254 
255 	// sigma
256 	AS2(	lea		AS_REG_1, [AS_REG_6 + (32-4)*32])
257 	AS2(	and		AS_REG_1, 31*32)
258 	AS2(	lea		AS_REG_7, [AS_REG_6 + 16*32])
259 	AS2(	and		AS_REG_7, 31*32)
260 
261 	AS2(	movdqa	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
262 	AS2(	movdqa	xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
263 	AS2(	movdqa	xmm6, xmm4)
264 	AS2(	punpcklqdq	xmm4, xmm5)
265 	AS2(	punpckhqdq	xmm6, xmm5)
266 	AS2(	pxor	xmm3, xmm4)
267 	AS2(	pxor	xmm2, xmm6)
268 
269 	AS2(	movdqa	xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
270 	AS2(	movdqa	xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
271 	AS2(	movdqa	xmm6, xmm4)
272 	AS2(	punpcklqdq	xmm4, xmm5)
273 	AS2(	punpckhqdq	xmm6, xmm5)
274 	AS2(	pxor	xmm1, xmm4)
275 	AS2(	pxor	xmm0, xmm6)
276 
277 	// loop
278 	AS2(	add		AS_REG_6, 32)
279 	AS2(	cmp		AS_REG_6, REG_loopEnd)
280 	ASJ(	jne,	4, b)
281 
282 	// save state
283 	AS2(	mov		[AS_REG_2+4*16], eax)
284 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+3*16], xmm3)
285 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+2*16], xmm2)
286 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+1*16], xmm1)
287 	AS2(	movdqa	XMMWORD_PTR [AS_REG_2+0*16], xmm0)
288 
289 	#if CRYPTOPP_BOOL_X32
290 		AS2(	add		esp, 8)
291 		AS_POP_IF86(	bp)
292 	#elif CRYPTOPP_BOOL_X86
293 		AS2(	add		esp, 4)
294 		AS_POP_IF86(	bp)
295 	#endif
296 	ASL(5)
297 
298 #if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
299 		AS_POP_IF86(	bx)
300 		ATT_PREFIX
301 			:
302 	#if CRYPTOPP_BOOL_X64
303 			: "D" (count), "S" (state), "d" (z), "c" (y)
304 			: "%r8", "%r9", "r10", "%eax", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
305 	#else
306 			: "c" (count), "d" (state), "S" (z), "D" (y)
307 			: "%eax", "memory", "cc"
308 	#endif
309 	);
310 #endif
311 
312 #ifdef CRYPTOPP_GENERATE_X64_MASM
313 	movdqa	xmm6, [rsp + 0h]
314 	movdqa	xmm7, [rsp + 10h]
315 	add rsp, 2*16
316 	pop	rdi
317 	ret
318 	Panama_SSE2_Pull ENDP
319 #else
320 }
321 #endif
322 #endif	// #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
323 
324 #ifndef CRYPTOPP_GENERATE_X64_MASM
325 
326 template <class B>
327 void Panama<B>::Iterate(size_t count, const word32 *p, byte *output, const byte *input, KeystreamOperation operation)
328 {
329 	CRYPTOPP_ASSERT(IsAlignedOn(m_state,GetAlignmentOf<word32>()));
330 	word32 bstart = m_state[17];
331 	word32 *const aPtr = m_state;
332 	word32 cPtr[17];
333 
334 #define bPtr ((byte *)(aPtr+20))
335 
336 // reorder the state for SSE2
337 // a and c: 4 8 12 16 | 3 7 11 15 | 2 6 10 14 | 1 5 9 13 | 0
338 //			xmm0		xmm1		xmm2		xmm3		eax
339 #define a(i) aPtr[((i)*13+16) % 17]		// 13 is inverse of 4 mod 17
340 #define c(i) cPtr[((i)*13+16) % 17]
341 // b: 0 4 | 1 5 | 2 6 | 3 7
342 #define b(i, j) b##i[(j)*2%8 + (j)/4]
343 
344 // buffer update
345 #define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
346 #define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
347 // gamma and pi
348 #define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
349 // theta and sigma
350 #define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
351 #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
352 #define TS1L(i) T(i+1, b(4,i))
353 #define TS2(i) T(i+9, b(16,i))
354 
355 	while (count--)
356 	{
357 		if (output)
358 		{
359 #define PANAMA_OUTPUT(x)	\
360 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, a(0+9));\
361 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, a(1+9));\
362 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, a(2+9));\
363 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a(3+9));\
364 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 4, a(4+9));\
365 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 5, a(5+9));\
366 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 6, a(6+9));\
367 	CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 7, a(7+9));
368 
369 			typedef word32 WordType;
370 			CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(PANAMA_OUTPUT, 4*8);
371 		}
372 
373 		word32 *const b16 = (word32 *)(void *)(bPtr+((bstart+16*32) & 31*32));
374 		word32 *const b4 = (word32 *)(void *)(bPtr+((bstart+(32-4)*32) & 31*32));
375        	bstart += 32;
376 		word32 *const b0 = (word32 *)(void *)(bPtr+((bstart) & 31*32));
377 		word32 *const b25 = (word32 *)(void *)(bPtr+((bstart+(32-25)*32) & 31*32));
378 
379 		if (p)
380 		{
381 			US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
382 		}
383 		else
384 		{
385 			UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
386 		}
387 
388 		GP(0);
389 		GP(1);
390 		GP(2);
391 		GP(3);
392 		GP(4);
393 		GP(5);
394 		GP(6);
395 		GP(7);
396 		GP(8);
397 		GP(9);
398 		GP(10);
399 		GP(11);
400 		GP(12);
401 		GP(13);
402 		GP(14);
403 		GP(15);
404 		GP(16);
405 
406 		T(0,1);
407 
408 		if (p)
409 		{
410 			TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
411 			p += 8;
412 		}
413 		else
414 		{
415 			TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
416 		}
417 
418 		TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
419 	}
420 	m_state[17] = bstart;
421 }
422 
423 namespace Weak {
424 template <class B>
HashMultipleBlocks(const word32 * input,size_t length)425 size_t PanamaHash<B>::HashMultipleBlocks(const word32 *input, size_t length)
426 {
427 	this->Iterate(length / this->BLOCKSIZE, input);
428 	return length % this->BLOCKSIZE;
429 }
430 
431 template <class B>
TruncatedFinal(byte * hash,size_t size)432 void PanamaHash<B>::TruncatedFinal(byte *hash, size_t size)
433 {
434 	this->ThrowIfInvalidTruncatedSize(size);
435 
436 	this->PadLastBlock(this->BLOCKSIZE, 0x01);
437 
438 	HashEndianCorrectedBlock(this->m_data);
439 
440 	this->Iterate(32);	// pull
441 
442 	FixedSizeSecBlock<word32, 8> buf;
443 	this->Iterate(1, NULL, buf.BytePtr(), NULL);
444 
445 	memcpy(hash, buf, size);
446 
447 	this->Restart();		// reinit for next use
448 }
449 }
450 
451 template <class B>
CipherSetKey(const NameValuePairs & params,const byte * key,size_t length)452 void PanamaCipherPolicy<B>::CipherSetKey(const NameValuePairs &params, const byte *key, size_t length)
453 {
454 	CRYPTOPP_UNUSED(params); CRYPTOPP_UNUSED(length);
455 	CRYPTOPP_ASSERT(length==32);
456 	memcpy(m_key, key, 32);
457 }
458 
459 template <class B>
CipherResynchronize(byte * keystreamBuffer,const byte * iv,size_t length)460 void PanamaCipherPolicy<B>::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length)
461 {
462 	CRYPTOPP_UNUSED(keystreamBuffer); CRYPTOPP_UNUSED(iv); CRYPTOPP_UNUSED(length);
463 	CRYPTOPP_ASSERT(IsAlignedOn(iv,GetAlignmentOf<word32>()));
464 	CRYPTOPP_ASSERT(length==32);
465 
466 	this->Reset();
467 	this->Iterate(1, m_key);
468 	if (iv && IsAligned<word32>(iv))
469 		this->Iterate(1, (const word32 *)(void *)iv);
470 	else
471 	{
472 		FixedSizeSecBlock<word32, 8> buf;
473 		if (iv)
474 			memcpy(buf, iv, 32);
475 		else
476 			memset(buf, 0, 32);
477 		this->Iterate(1, buf);
478 	}
479 
480 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
481 	if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2() && !IsP4())		// SSE2 code is slower on P4 Prescott
482 		Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
483 	else
484 #endif
485 		this->Iterate(32);
486 }
487 
488 template <class B>
GetAlignment() const489 unsigned int PanamaCipherPolicy<B>::GetAlignment() const
490 {
491 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
492 	if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
493 		return 16;
494 	else
495 #endif
496 		return 1;
497 }
498 
499 template <class B>
OperateKeystream(KeystreamOperation operation,byte * output,const byte * input,size_t iterationCount)500 void PanamaCipherPolicy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
501 {
502 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_PANAMA_ASM)
503 	if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
504 		Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)(void *)output, (const word32 *)(void *)input);
505 	else
506 #endif
507 		this->Iterate(iterationCount, NULL, output, input, operation);
508 }
509 
510 template class Panama<BigEndian>;
511 template class Panama<LittleEndian>;
512 
513 template class Weak::PanamaHash<BigEndian>;
514 template class Weak::PanamaHash<LittleEndian>;
515 
516 template class PanamaCipherPolicy<BigEndian>;
517 template class PanamaCipherPolicy<LittleEndian>;
518 
519 NAMESPACE_END
520 
521 #endif	// #ifndef CRYPTOPP_GENERATE_X64_MASM
522