1 /* $OpenBSD: wp_block.c,v 1.9 2014/07/09 16:06:13 miod Exp $ */
2 /**
3  * The Whirlpool hashing function.
4  *
5  * <P>
6  * <b>References</b>
7  *
8  * <P>
9  * The Whirlpool algorithm was developed by
10  * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
11  * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
12  *
13  * See
14  *      P.S.L.M. Barreto, V. Rijmen,
15  *      ``The Whirlpool hashing function,''
16  *      NESSIE submission, 2000 (tweaked version, 2001),
17  *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
18  *
19  * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20  * Vincent Rijmen. Lookup "reference implementations" on
21  * <http://planeta.terra.com.br/informatica/paulobarreto/>
22  *
23  * =============================================================================
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  */
38 
39 #include "wp_locl.h"
40 #include <string.h>
41 #include <machine/endian.h>
42 
43 typedef unsigned char		u8;
44 #if defined(_LP64)
45 typedef unsigned long		u64;
46 #else
47 typedef unsigned long long	u64;
48 #endif
49 
50 #define ROUNDS	10
51 
52 #undef SMALL_REGISTER_BANK
53 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
54 #  define SMALL_REGISTER_BANK
55 #  if defined(WHIRLPOOL_ASM)
56 #    ifndef OPENSSL_SMALL_FOOTPRINT
57 #      define OPENSSL_SMALL_FOOTPRINT	/* it appears that for elder non-MMX
58 					   CPUs this is actually faster! */
59 #    endif
60 #    define GO_FOR_MMX(ctx,inp,num)	do {			\
61 	extern unsigned int OPENSSL_ia32cap_P[];		\
62 	void whirlpool_block_mmx(void *,const void *,size_t);	\
63 	if (!(OPENSSL_ia32cap_P[0] & (1<<23)))	break;		\
64         whirlpool_block_mmx(ctx->H.c,inp,num);	return;		\
65 					} while (0)
66 #  endif
67 #elif defined(__arm__)
68 #  define SMALL_REGISTER_BANK
69 #elif defined(__vax__)
70 #  define SMALL_REGISTER_BANK
71 #endif
72 
73 #undef ROTATE
74 #if defined(__GNUC__) && __GNUC__>=2
75 #  if defined(__x86_64) || defined(__x86_64__)
76 #      define ROTATE(a,n)	({ u64 ret; asm ("rolq %1,%0"	\
77 				   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
78 #  elif defined(__ia64) || defined(__ia64__)
79 #    if BYTE_ORDER == LITTLE_ENDIAN
80 #      define ROTATE(a,n)	({ u64 ret; asm ("shrp %0=%1,%1,%2"	\
81 				   : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
82 #    else
83 #      define ROTATE(a,n)	({ u64 ret; asm ("shrp %0=%1,%1,%2"	\
84 				   : "=r"(ret) : "r"(a),"M"(n)); ret; })
85 #    endif
86 #  endif
87 #endif
88 
89 #if defined(OPENSSL_SMALL_FOOTPRINT)
90 #  if !defined(ROTATE)
91 #    if BYTE_ORDER == LITTLE_ENDIAN	/* little-endians have to rotate left */
92 #      define ROTATE(i,n)	((i)<<(n) ^ (i)>>(64-n))
93 #    else				/* big-endians have to rotate right */
94 #      define ROTATE(i,n)	((i)>>(n) ^ (i)<<(64-n))
95 #    endif
96 #  endif
97 #  if defined(ROTATE) && !defined(__STRICT_ALIGNMENT)
98 #    define __STRICT_ALIGNMENT	/* ensure smallest table size */
99 #  endif
100 #endif
101 
102 /*
103  * Table size depends on __STRICT_ALIGNMENT and whether or not endian-
104  * specific ROTATE macro is defined. If __STRICT_ALIGNMENT is not
105  * defined, which is normally the case on x86[_64] CPUs, the table is
106  * 4KB large unconditionally. Otherwise if ROTATE is defined, the
107  * table is 2KB large, and otherwise - 16KB. 2KB table requires a
108  * whole bunch of additional rotations, but I'm willing to "trade,"
109  * because 16KB table certainly trashes L1 cache. I wish all CPUs
110  * could handle unaligned load as 4KB table doesn't trash the cache,
111  * nor does it require additional rotations.
112  */
113 /*
114  * Note that every Cn macro expands as two loads: one byte load and
115  * one quadword load. One can argue that that many single-byte loads
116  * is too excessive, as one could load a quadword and "milk" it for
117  * eight 8-bit values instead. Well, yes, but in order to do so *and*
118  * avoid excessive loads you have to accommodate a handful of 64-bit
119  * values in the register bank and issue a bunch of shifts and mask.
120  * It's a tradeoff: loads vs. shift and mask in big register bank[!].
121  * On most CPUs eight single-byte loads are faster and I let other
122  * ones to depend on smart compiler to fold byte loads if beneficial.
123  * Hand-coded assembler would be another alternative:-)
124  */
125 #ifdef __STRICT_ALIGNMENT
126 #  if defined(ROTATE)
127 #    define N	1
128 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7
129 #    define C0(K,i)	(Cx.q[K.c[(i)*8+0]])
130 #    define C1(K,i)	ROTATE(Cx.q[K.c[(i)*8+1]],8)
131 #    define C2(K,i)	ROTATE(Cx.q[K.c[(i)*8+2]],16)
132 #    define C3(K,i)	ROTATE(Cx.q[K.c[(i)*8+3]],24)
133 #    define C4(K,i)	ROTATE(Cx.q[K.c[(i)*8+4]],32)
134 #    define C5(K,i)	ROTATE(Cx.q[K.c[(i)*8+5]],40)
135 #    define C6(K,i)	ROTATE(Cx.q[K.c[(i)*8+6]],48)
136 #    define C7(K,i)	ROTATE(Cx.q[K.c[(i)*8+7]],56)
137 #  else
138 #    define N	8
139 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
140 					c7,c0,c1,c2,c3,c4,c5,c6, \
141 					c6,c7,c0,c1,c2,c3,c4,c5, \
142 					c5,c6,c7,c0,c1,c2,c3,c4, \
143 					c4,c5,c6,c7,c0,c1,c2,c3, \
144 					c3,c4,c5,c6,c7,c0,c1,c2, \
145 					c2,c3,c4,c5,c6,c7,c0,c1, \
146 					c1,c2,c3,c4,c5,c6,c7,c0
147 #    define C0(K,i)	(Cx.q[0+8*K.c[(i)*8+0]])
148 #    define C1(K,i)	(Cx.q[1+8*K.c[(i)*8+1]])
149 #    define C2(K,i)	(Cx.q[2+8*K.c[(i)*8+2]])
150 #    define C3(K,i)	(Cx.q[3+8*K.c[(i)*8+3]])
151 #    define C4(K,i)	(Cx.q[4+8*K.c[(i)*8+4]])
152 #    define C5(K,i)	(Cx.q[5+8*K.c[(i)*8+5]])
153 #    define C6(K,i)	(Cx.q[6+8*K.c[(i)*8+6]])
154 #    define C7(K,i)	(Cx.q[7+8*K.c[(i)*8+7]])
155 #  endif
156 #else
157 #  define N	2
158 #  define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
159 					c0,c1,c2,c3,c4,c5,c6,c7
160 #  define C0(K,i)	(((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
161 #  define C1(K,i)	(((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
162 #  define C2(K,i)	(((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
163 #  define C3(K,i)	(((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
164 #  define C4(K,i)	(((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
165 #  define C5(K,i)	(((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
166 #  define C6(K,i)	(((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
167 #  define C7(K,i)	(((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
168 #endif
169 
170 static const
171 union	{
172 	u8	c[(256*N+ROUNDS)*sizeof(u64)];
173 	u64	q[(256*N+ROUNDS)];
174 	} Cx = { {
175 	/* Note endian-neutral representation:-) */
176 	LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
177 	LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
178 	LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
179 	LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
180 	LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
181 	LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
182 	LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
183 	LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
184 	LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
185 	LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
186 	LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
187 	LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
188 	LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
189 	LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
190 	LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
191 	LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
192 	LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
193 	LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
194 	LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
195 	LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
196 	LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
197 	LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
198 	LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
199 	LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
200 	LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
201 	LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
202 	LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
203 	LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
204 	LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
205 	LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
206 	LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
207 	LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
208 	LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
209 	LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
210 	LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
211 	LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
212 	LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
213 	LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
214 	LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
215 	LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
216 	LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
217 	LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
218 	LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
219 	LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
220 	LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
221 	LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
222 	LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
223 	LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
224 	LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
225 	LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
226 	LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
227 	LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
228 	LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
229 	LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
230 	LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
231 	LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
232 	LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
233 	LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
234 	LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
235 	LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
236 	LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
237 	LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
238 	LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
239 	LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
240 	LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
241 	LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
242 	LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
243 	LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
244 	LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
245 	LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
246 	LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
247 	LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
248 	LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
249 	LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
250 	LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
251 	LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
252 	LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
253 	LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
254 	LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
255 	LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
256 	LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
257 	LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
258 	LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
259 	LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
260 	LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
261 	LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
262 	LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
263 	LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
264 	LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
265 	LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
266 	LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
267 	LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
268 	LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
269 	LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
270 	LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
271 	LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
272 	LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
273 	LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
274 	LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
275 	LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
276 	LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
277 	LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
278 	LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
279 	LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
280 	LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
281 	LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
282 	LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
283 	LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
284 	LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
285 	LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
286 	LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
287 	LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
288 	LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
289 	LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
290 	LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
291 	LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
292 	LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
293 	LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
294 	LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
295 	LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
296 	LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
297 	LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
298 	LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
299 	LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
300 	LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
301 	LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
302 	LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
303 	LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
304 	LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
305 	LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
306 	LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
307 	LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
308 	LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
309 	LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
310 	LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
311 	LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
312 	LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
313 	LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
314 	LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
315 	LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
316 	LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
317 	LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
318 	LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
319 	LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
320 	LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
321 	LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
322 	LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
323 	LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
324 	LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
325 	LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
326 	LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
327 	LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
328 	LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
329 	LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
330 	LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
331 	LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
332 	LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
333 	LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
334 	LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
335 	LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
336 	LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
337 	LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
338 	LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
339 	LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
340 	LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
341 	LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
342 	LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
343 	LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
344 	LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
345 	LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
346 	LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
347 	LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
348 	LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
349 	LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
350 	LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
351 	LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
352 	LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
353 	LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
354 	LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
355 	LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
356 	LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
357 	LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
358 	LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
359 	LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
360 	LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
361 	LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
362 	LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
363 	LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
364 	LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
365 	LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
366 	LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
367 	LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
368 	LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
369 	LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
370 	LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
371 	LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
372 	LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
373 	LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
374 	LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
375 	LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
376 	LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
377 	LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
378 	LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
379 	LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
380 	LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
381 	LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
382 	LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
383 	LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
384 	LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
385 	LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
386 	LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
387 	LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
388 	LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
389 	LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
390 	LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
391 	LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
392 	LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
393 	LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
394 	LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
395 	LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
396 	LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
397 	LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
398 	LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
399 	LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
400 	LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
401 	LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
402 	LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
403 	LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
404 	LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
405 	LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
406 	LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
407 	LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
408 	LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
409 	LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
410 	LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
411 	LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
412 	LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
413 	LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
414 	LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
415 	LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
416 	LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
417 	LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
418 	LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
419 	LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
420 	LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
421 	LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
422 	LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
423 	LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
424 	LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
425 	LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
426 	LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
427 	LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
428 	LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
429 	LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
430 	LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
431 	LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
432 #define RC	(&(Cx.q[256*N]))
433 	0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f,	/* rc[ROUNDS] */
434 	0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
435 	0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
436 	0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
437 	0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
438 	0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
439 	0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
440 	0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
441 	0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
442 	0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
443 	}
444 };
445 
446 void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
447 	{
448 	int	r;
449 	const u8 *p=inp;
450 	union	{ u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
451 
452 #ifdef GO_FOR_MMX
453 	GO_FOR_MMX(ctx,inp,n);
454 #endif
455 							do {
456 #ifdef OPENSSL_SMALL_FOOTPRINT
457 	u64	L[8];
458 	int	i;
459 
460 	for (i=0;i<64;i++)	S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
461 	for (r=0;r<ROUNDS;r++)
462 		{
463 		for (i=0;i<8;i++)
464 			{
465 			L[i]  = i ? 0 : RC[r];
466 			L[i] ^=	C0(K,i)       ^ C1(K,(i-1)&7) ^
467 				C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
468 				C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
469 				C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
470 			}
471 		memcpy (K.q,L,64);
472 		for (i=0;i<8;i++)
473 			{
474 			L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
475 				C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
476 				C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
477 				C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
478 			}
479 		memcpy (S.q,L,64);
480 		}
481 	for (i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
482 #else
483 	u64	L0,L1,L2,L3,L4,L5,L6,L7;
484 
485 #ifdef __STRICT_ALIGNMENT
486 	if ((size_t)p & 7)
487 		{
488 		memcpy (S.c,p,64);
489 		S.q[0] ^= (K.q[0] = H->q[0]);
490 		S.q[1] ^= (K.q[1] = H->q[1]);
491 		S.q[2] ^= (K.q[2] = H->q[2]);
492 		S.q[3] ^= (K.q[3] = H->q[3]);
493 		S.q[4] ^= (K.q[4] = H->q[4]);
494 		S.q[5] ^= (K.q[5] = H->q[5]);
495 		S.q[6] ^= (K.q[6] = H->q[6]);
496 		S.q[7] ^= (K.q[7] = H->q[7]);
497 		}
498 	else
499 #endif
500 		{
501 		const u64 *pa = (const u64*)p;
502 		S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
503 		S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
504 		S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
505 		S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
506 		S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
507 		S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
508 		S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
509 		S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
510 		}
511 
512 	for(r=0;r<ROUNDS;r++)
513 		{
514 #ifdef SMALL_REGISTER_BANK
515 		L0 =	C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
516 			C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
517 		L1 =	C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
518 			C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
519 		L2 =	C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
520 			C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
521 		L3 =	C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
522 			C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
523 		L4 =	C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
524 			C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
525 		L5 =	C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
526 			C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
527 		L6 =	C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
528 			C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
529 		L7 =	C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
530 			C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
531 
532 		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
533 		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
534 
535 		L0 ^=	C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
536 			C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
537 		L1 ^=	C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
538 			C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
539 		L2 ^=	C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
540 			C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
541 		L3 ^=	C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
542 			C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
543 		L4 ^=	C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
544 			C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
545 		L5 ^=	C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
546 			C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
547 		L6 ^=	C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
548 			C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
549 		L7 ^=	C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
550 			C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
551 
552 		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
553 		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
554 #else
555 		L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
556 		L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
557 		L0 ^= RC[r];
558 
559 		L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
560 		L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
561 
562 		L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
563 		L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
564 
565 		L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
566 		L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
567 
568 		L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
569 		L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
570 
571 		L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
572 		L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
573 
574 		L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
575 		L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
576 
577 		L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
578 		L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
579 
580 		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
581 		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
582 
583 		L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
584 		L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
585 
586 		L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
587 		L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
588 
589 		L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
590 		L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
591 
592 		L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
593 		L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
594 
595 		L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
596 		L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
597 
598 		L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
599 		L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
600 
601 		L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
602 		L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
603 
604 		L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
605 		L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
606 
607 		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
608 		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
609 #endif
610 		}
611 
612 #ifdef __STRICT_ALIGNMENT
613 	if ((size_t)p & 7)
614 		{
615 		int i;
616 		for(i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
617 		}
618 	else
619 #endif
620 		{
621 		const u64 *pa=(const u64 *)p;
622 		H->q[0] ^= S.q[0] ^ pa[0];
623 		H->q[1] ^= S.q[1] ^ pa[1];
624 		H->q[2] ^= S.q[2] ^ pa[2];
625 		H->q[3] ^= S.q[3] ^ pa[3];
626 		H->q[4] ^= S.q[4] ^ pa[4];
627 		H->q[5] ^= S.q[5] ^ pa[5];
628 		H->q[6] ^= S.q[6] ^ pa[6];
629 		H->q[7] ^= S.q[7] ^ pa[7];
630 		}
631 #endif
632 							p += 64;
633 							} while(--n);
634 	}
635