172c33676SMaxim Ag /* $OpenBSD: gcm128.c,v 1.22 2018/01/24 23:03:37 kettenis Exp $ */
2f5b1c8a1SJohn Marino /* ====================================================================
3f5b1c8a1SJohn Marino  * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
4f5b1c8a1SJohn Marino  *
5f5b1c8a1SJohn Marino  * Redistribution and use in source and binary forms, with or without
6f5b1c8a1SJohn Marino  * modification, are permitted provided that the following conditions
7f5b1c8a1SJohn Marino  * are met:
8f5b1c8a1SJohn Marino  *
9f5b1c8a1SJohn Marino  * 1. Redistributions of source code must retain the above copyright
10f5b1c8a1SJohn Marino  *    notice, this list of conditions and the following disclaimer.
11f5b1c8a1SJohn Marino  *
12f5b1c8a1SJohn Marino  * 2. Redistributions in binary form must reproduce the above copyright
13f5b1c8a1SJohn Marino  *    notice, this list of conditions and the following disclaimer in
14f5b1c8a1SJohn Marino  *    the documentation and/or other materials provided with the
15f5b1c8a1SJohn Marino  *    distribution.
16f5b1c8a1SJohn Marino  *
17f5b1c8a1SJohn Marino  * 3. All advertising materials mentioning features or use of this
18f5b1c8a1SJohn Marino  *    software must display the following acknowledgment:
19f5b1c8a1SJohn Marino  *    "This product includes software developed by the OpenSSL Project
20f5b1c8a1SJohn Marino  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21f5b1c8a1SJohn Marino  *
22f5b1c8a1SJohn Marino  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23f5b1c8a1SJohn Marino  *    endorse or promote products derived from this software without
24f5b1c8a1SJohn Marino  *    prior written permission. For written permission, please contact
25f5b1c8a1SJohn Marino  *    openssl-core@openssl.org.
26f5b1c8a1SJohn Marino  *
27f5b1c8a1SJohn Marino  * 5. Products derived from this software may not be called "OpenSSL"
28f5b1c8a1SJohn Marino  *    nor may "OpenSSL" appear in their names without prior written
29f5b1c8a1SJohn Marino  *    permission of the OpenSSL Project.
30f5b1c8a1SJohn Marino  *
31f5b1c8a1SJohn Marino  * 6. Redistributions of any form whatsoever must retain the following
32f5b1c8a1SJohn Marino  *    acknowledgment:
33f5b1c8a1SJohn Marino  *    "This product includes software developed by the OpenSSL Project
34f5b1c8a1SJohn Marino  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35f5b1c8a1SJohn Marino  *
36f5b1c8a1SJohn Marino  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37f5b1c8a1SJohn Marino  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38f5b1c8a1SJohn Marino  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39f5b1c8a1SJohn Marino  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
40f5b1c8a1SJohn Marino  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41f5b1c8a1SJohn Marino  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42f5b1c8a1SJohn Marino  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43f5b1c8a1SJohn Marino  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44f5b1c8a1SJohn Marino  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45f5b1c8a1SJohn Marino  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46f5b1c8a1SJohn Marino  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47f5b1c8a1SJohn Marino  * OF THE POSSIBILITY OF SUCH DAMAGE.
48f5b1c8a1SJohn Marino  * ====================================================================
49f5b1c8a1SJohn Marino  */
50f5b1c8a1SJohn Marino 
51f5b1c8a1SJohn Marino #define OPENSSL_FIPSAPI
52f5b1c8a1SJohn Marino 
53f5b1c8a1SJohn Marino #include <openssl/crypto.h>
54f5b1c8a1SJohn Marino #include "modes_lcl.h"
55f5b1c8a1SJohn Marino #include <string.h>
56f5b1c8a1SJohn Marino 
57f5b1c8a1SJohn Marino #ifndef MODES_DEBUG
58f5b1c8a1SJohn Marino # ifndef NDEBUG
59f5b1c8a1SJohn Marino #  define NDEBUG
60f5b1c8a1SJohn Marino # endif
61f5b1c8a1SJohn Marino #endif
62f5b1c8a1SJohn Marino 
63f5b1c8a1SJohn Marino #if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
64f5b1c8a1SJohn Marino /* redefine, because alignment is ensured */
65f5b1c8a1SJohn Marino #undef	GETU32
66f5b1c8a1SJohn Marino #define	GETU32(p)	BSWAP4(*(const u32 *)(p))
67f5b1c8a1SJohn Marino #undef	PUTU32
68f5b1c8a1SJohn Marino #define	PUTU32(p,v)	*(u32 *)(p) = BSWAP4(v)
69f5b1c8a1SJohn Marino #endif
70f5b1c8a1SJohn Marino 
71f5b1c8a1SJohn Marino #define	PACK(s)		((size_t)(s)<<(sizeof(size_t)*8-16))
72f5b1c8a1SJohn Marino #define REDUCE1BIT(V)	\
73f5b1c8a1SJohn Marino 	do { \
74f5b1c8a1SJohn Marino 		if (sizeof(size_t)==8) { \
75f5b1c8a1SJohn Marino 			u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
76f5b1c8a1SJohn Marino 			V.lo  = (V.hi<<63)|(V.lo>>1); \
77f5b1c8a1SJohn Marino 			V.hi  = (V.hi>>1 )^T; \
78f5b1c8a1SJohn Marino 		} else { \
79f5b1c8a1SJohn Marino 			u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80f5b1c8a1SJohn Marino 			V.lo  = (V.hi<<63)|(V.lo>>1); \
81f5b1c8a1SJohn Marino 			V.hi  = (V.hi>>1 )^((u64)T<<32); \
82f5b1c8a1SJohn Marino 		} \
83f5b1c8a1SJohn Marino 	} while(0)
84f5b1c8a1SJohn Marino 
85f5b1c8a1SJohn Marino /*
86f5b1c8a1SJohn Marino  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87f5b1c8a1SJohn Marino  * never be set to 8. 8 is effectively reserved for testing purposes.
88f5b1c8a1SJohn Marino  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89f5b1c8a1SJohn Marino  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90f5b1c8a1SJohn Marino  * whole spectrum of possible table driven implementations. Why? In
91f5b1c8a1SJohn Marino  * non-"Shoup's" case memory access pattern is segmented in such manner,
92f5b1c8a1SJohn Marino  * that it's trivial to see that cache timing information can reveal
93f5b1c8a1SJohn Marino  * fair portion of intermediate hash value. Given that ciphertext is
94f5b1c8a1SJohn Marino  * always available to attacker, it's possible for him to attempt to
95f5b1c8a1SJohn Marino  * deduce secret parameter H and if successful, tamper with messages
96f5b1c8a1SJohn Marino  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97f5b1c8a1SJohn Marino  * not as trivial, but there is no reason to believe that it's resistant
98f5b1c8a1SJohn Marino  * to cache-timing attack. And the thing about "8-bit" implementation is
99f5b1c8a1SJohn Marino  * that it consumes 16 (sixteen) times more memory, 4KB per individual
100f5b1c8a1SJohn Marino  * key + 1KB shared. Well, on pros side it should be twice as fast as
101f5b1c8a1SJohn Marino  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102f5b1c8a1SJohn Marino  * was observed to run ~75% faster, closer to 100% for commercial
103f5b1c8a1SJohn Marino  * compilers... Yet "4-bit" procedure is preferred, because it's
104f5b1c8a1SJohn Marino  * believed to provide better security-performance balance and adequate
105f5b1c8a1SJohn Marino  * all-round performance. "All-round" refers to things like:
106f5b1c8a1SJohn Marino  *
107f5b1c8a1SJohn Marino  * - shorter setup time effectively improves overall timing for
108f5b1c8a1SJohn Marino  *   handling short messages;
109f5b1c8a1SJohn Marino  * - larger table allocation can become unbearable because of VM
110f5b1c8a1SJohn Marino  *   subsystem penalties (for example on Windows large enough free
111f5b1c8a1SJohn Marino  *   results in VM working set trimming, meaning that consequent
112f5b1c8a1SJohn Marino  *   malloc would immediately incur working set expansion);
113f5b1c8a1SJohn Marino  * - larger table has larger cache footprint, which can affect
114f5b1c8a1SJohn Marino  *   performance of other code paths (not necessarily even from same
115f5b1c8a1SJohn Marino  *   thread in Hyper-Threading world);
116f5b1c8a1SJohn Marino  *
117f5b1c8a1SJohn Marino  * Value of 1 is not appropriate for performance reasons.
118f5b1c8a1SJohn Marino  */
119f5b1c8a1SJohn Marino #if	TABLE_BITS==8
120f5b1c8a1SJohn Marino 
gcm_init_8bit(u128 Htable[256],u64 H[2])121f5b1c8a1SJohn Marino static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122f5b1c8a1SJohn Marino {
123f5b1c8a1SJohn Marino 	int  i, j;
124f5b1c8a1SJohn Marino 	u128 V;
125f5b1c8a1SJohn Marino 
126f5b1c8a1SJohn Marino 	Htable[0].hi = 0;
127f5b1c8a1SJohn Marino 	Htable[0].lo = 0;
128f5b1c8a1SJohn Marino 	V.hi = H[0];
129f5b1c8a1SJohn Marino 	V.lo = H[1];
130f5b1c8a1SJohn Marino 
131f5b1c8a1SJohn Marino 	for (Htable[128]=V, i=64; i>0; i>>=1) {
132f5b1c8a1SJohn Marino 		REDUCE1BIT(V);
133f5b1c8a1SJohn Marino 		Htable[i] = V;
134f5b1c8a1SJohn Marino 	}
135f5b1c8a1SJohn Marino 
136f5b1c8a1SJohn Marino 	for (i=2; i<256; i<<=1) {
137f5b1c8a1SJohn Marino 		u128 *Hi = Htable+i, H0 = *Hi;
138f5b1c8a1SJohn Marino 		for (j=1; j<i; ++j) {
139f5b1c8a1SJohn Marino 			Hi[j].hi = H0.hi^Htable[j].hi;
140f5b1c8a1SJohn Marino 			Hi[j].lo = H0.lo^Htable[j].lo;
141f5b1c8a1SJohn Marino 		}
142f5b1c8a1SJohn Marino 	}
143f5b1c8a1SJohn Marino }
144f5b1c8a1SJohn Marino 
gcm_gmult_8bit(u64 Xi[2],const u128 Htable[256])145f5b1c8a1SJohn Marino static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146f5b1c8a1SJohn Marino {
147f5b1c8a1SJohn Marino 	u128 Z = { 0, 0};
148f5b1c8a1SJohn Marino 	const u8 *xi = (const u8 *)Xi+15;
149f5b1c8a1SJohn Marino 	size_t rem, n = *xi;
150f5b1c8a1SJohn Marino 	static const size_t rem_8bit[256] = {
151f5b1c8a1SJohn Marino 		PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
152f5b1c8a1SJohn Marino 		PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
153f5b1c8a1SJohn Marino 		PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
154f5b1c8a1SJohn Marino 		PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
155f5b1c8a1SJohn Marino 		PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
156f5b1c8a1SJohn Marino 		PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
157f5b1c8a1SJohn Marino 		PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
158f5b1c8a1SJohn Marino 		PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
159f5b1c8a1SJohn Marino 		PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
160f5b1c8a1SJohn Marino 		PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
161f5b1c8a1SJohn Marino 		PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
162f5b1c8a1SJohn Marino 		PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
163f5b1c8a1SJohn Marino 		PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
164f5b1c8a1SJohn Marino 		PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
165f5b1c8a1SJohn Marino 		PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
166f5b1c8a1SJohn Marino 		PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
167f5b1c8a1SJohn Marino 		PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
168f5b1c8a1SJohn Marino 		PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
169f5b1c8a1SJohn Marino 		PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
170f5b1c8a1SJohn Marino 		PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
171f5b1c8a1SJohn Marino 		PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
172f5b1c8a1SJohn Marino 		PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
173f5b1c8a1SJohn Marino 		PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
174f5b1c8a1SJohn Marino 		PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
175f5b1c8a1SJohn Marino 		PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
176f5b1c8a1SJohn Marino 		PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
177f5b1c8a1SJohn Marino 		PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
178f5b1c8a1SJohn Marino 		PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
179f5b1c8a1SJohn Marino 		PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
180f5b1c8a1SJohn Marino 		PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
181f5b1c8a1SJohn Marino 		PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
182f5b1c8a1SJohn Marino 		PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
183f5b1c8a1SJohn Marino 		PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
184f5b1c8a1SJohn Marino 		PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
185f5b1c8a1SJohn Marino 		PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
186f5b1c8a1SJohn Marino 		PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
187f5b1c8a1SJohn Marino 		PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
188f5b1c8a1SJohn Marino 		PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
189f5b1c8a1SJohn Marino 		PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
190f5b1c8a1SJohn Marino 		PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
191f5b1c8a1SJohn Marino 		PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
192f5b1c8a1SJohn Marino 		PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
193f5b1c8a1SJohn Marino 		PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
194f5b1c8a1SJohn Marino 		PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
195f5b1c8a1SJohn Marino 		PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
196f5b1c8a1SJohn Marino 		PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
197f5b1c8a1SJohn Marino 		PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
198f5b1c8a1SJohn Marino 		PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
199f5b1c8a1SJohn Marino 		PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
200f5b1c8a1SJohn Marino 		PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
201f5b1c8a1SJohn Marino 		PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
202f5b1c8a1SJohn Marino 		PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
203f5b1c8a1SJohn Marino 		PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
204f5b1c8a1SJohn Marino 		PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
205f5b1c8a1SJohn Marino 		PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
206f5b1c8a1SJohn Marino 		PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
207f5b1c8a1SJohn Marino 		PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
208f5b1c8a1SJohn Marino 		PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
209f5b1c8a1SJohn Marino 		PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
210f5b1c8a1SJohn Marino 		PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
211f5b1c8a1SJohn Marino 		PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
212f5b1c8a1SJohn Marino 		PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
213f5b1c8a1SJohn Marino 		PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
214f5b1c8a1SJohn Marino 		PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
215f5b1c8a1SJohn Marino 
216f5b1c8a1SJohn Marino 	while (1) {
217f5b1c8a1SJohn Marino 		Z.hi ^= Htable[n].hi;
218f5b1c8a1SJohn Marino 		Z.lo ^= Htable[n].lo;
219f5b1c8a1SJohn Marino 
220f5b1c8a1SJohn Marino 		if ((u8 *)Xi==xi)	break;
221f5b1c8a1SJohn Marino 
222f5b1c8a1SJohn Marino 		n = *(--xi);
223f5b1c8a1SJohn Marino 
224f5b1c8a1SJohn Marino 		rem  = (size_t)Z.lo&0xff;
225f5b1c8a1SJohn Marino 		Z.lo = (Z.hi<<56)|(Z.lo>>8);
226f5b1c8a1SJohn Marino 		Z.hi = (Z.hi>>8);
22772c33676SMaxim Ag #if SIZE_MAX == 0xffffffffffffffff
228f5b1c8a1SJohn Marino 		Z.hi ^= rem_8bit[rem];
22972c33676SMaxim Ag #else
230f5b1c8a1SJohn Marino 		Z.hi ^= (u64)rem_8bit[rem]<<32;
23172c33676SMaxim Ag #endif
232f5b1c8a1SJohn Marino 	}
233f5b1c8a1SJohn Marino 
23472c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
235f5b1c8a1SJohn Marino #ifdef BSWAP8
236f5b1c8a1SJohn Marino 	Xi[0] = BSWAP8(Z.hi);
237f5b1c8a1SJohn Marino 	Xi[1] = BSWAP8(Z.lo);
238f5b1c8a1SJohn Marino #else
239f5b1c8a1SJohn Marino 	u8 *p = (u8 *)Xi;
240f5b1c8a1SJohn Marino 	u32 v;
241f5b1c8a1SJohn Marino 	v = (u32)(Z.hi>>32);	PUTU32(p,v);
242f5b1c8a1SJohn Marino 	v = (u32)(Z.hi);	PUTU32(p+4,v);
243f5b1c8a1SJohn Marino 	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
244f5b1c8a1SJohn Marino 	v = (u32)(Z.lo);	PUTU32(p+12,v);
245f5b1c8a1SJohn Marino #endif
24672c33676SMaxim Ag #else /* BIG_ENDIAN */
247f5b1c8a1SJohn Marino 	Xi[0] = Z.hi;
248f5b1c8a1SJohn Marino 	Xi[1] = Z.lo;
24972c33676SMaxim Ag #endif
250f5b1c8a1SJohn Marino }
251f5b1c8a1SJohn Marino #define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
252f5b1c8a1SJohn Marino 
253f5b1c8a1SJohn Marino #elif	TABLE_BITS==4
254f5b1c8a1SJohn Marino 
gcm_init_4bit(u128 Htable[16],u64 H[2])255f5b1c8a1SJohn Marino static void gcm_init_4bit(u128 Htable[16], u64 H[2])
256f5b1c8a1SJohn Marino {
257f5b1c8a1SJohn Marino 	u128 V;
258f5b1c8a1SJohn Marino #if defined(OPENSSL_SMALL_FOOTPRINT)
259f5b1c8a1SJohn Marino 	int  i;
260f5b1c8a1SJohn Marino #endif
261f5b1c8a1SJohn Marino 
262f5b1c8a1SJohn Marino 	Htable[0].hi = 0;
263f5b1c8a1SJohn Marino 	Htable[0].lo = 0;
264f5b1c8a1SJohn Marino 	V.hi = H[0];
265f5b1c8a1SJohn Marino 	V.lo = H[1];
266f5b1c8a1SJohn Marino 
267f5b1c8a1SJohn Marino #if defined(OPENSSL_SMALL_FOOTPRINT)
268f5b1c8a1SJohn Marino 	for (Htable[8]=V, i=4; i>0; i>>=1) {
269f5b1c8a1SJohn Marino 		REDUCE1BIT(V);
270f5b1c8a1SJohn Marino 		Htable[i] = V;
271f5b1c8a1SJohn Marino 	}
272f5b1c8a1SJohn Marino 
273f5b1c8a1SJohn Marino 	for (i=2; i<16; i<<=1) {
274f5b1c8a1SJohn Marino 		u128 *Hi = Htable+i;
275f5b1c8a1SJohn Marino 		int   j;
276f5b1c8a1SJohn Marino 		for (V=*Hi, j=1; j<i; ++j) {
277f5b1c8a1SJohn Marino 			Hi[j].hi = V.hi^Htable[j].hi;
278f5b1c8a1SJohn Marino 			Hi[j].lo = V.lo^Htable[j].lo;
279f5b1c8a1SJohn Marino 		}
280f5b1c8a1SJohn Marino 	}
281f5b1c8a1SJohn Marino #else
282f5b1c8a1SJohn Marino 	Htable[8] = V;
283f5b1c8a1SJohn Marino 	REDUCE1BIT(V);
284f5b1c8a1SJohn Marino 	Htable[4] = V;
285f5b1c8a1SJohn Marino 	REDUCE1BIT(V);
286f5b1c8a1SJohn Marino 	Htable[2] = V;
287f5b1c8a1SJohn Marino 	REDUCE1BIT(V);
288f5b1c8a1SJohn Marino 	Htable[1] = V;
289f5b1c8a1SJohn Marino 	Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
290f5b1c8a1SJohn Marino 	V=Htable[4];
291f5b1c8a1SJohn Marino 	Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
292f5b1c8a1SJohn Marino 	Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
293f5b1c8a1SJohn Marino 	Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
294f5b1c8a1SJohn Marino 	V=Htable[8];
295f5b1c8a1SJohn Marino 	Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
296f5b1c8a1SJohn Marino 	Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
297f5b1c8a1SJohn Marino 	Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
298f5b1c8a1SJohn Marino 	Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
299f5b1c8a1SJohn Marino 	Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
300f5b1c8a1SJohn Marino 	Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
301f5b1c8a1SJohn Marino 	Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
302f5b1c8a1SJohn Marino #endif
303f5b1c8a1SJohn Marino #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
304f5b1c8a1SJohn Marino 	/*
305f5b1c8a1SJohn Marino 	 * ARM assembler expects specific dword order in Htable.
306f5b1c8a1SJohn Marino 	 */
307f5b1c8a1SJohn Marino 	{
308f5b1c8a1SJohn Marino 		int j;
30972c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
310f5b1c8a1SJohn Marino 		for (j=0;j<16;++j) {
311f5b1c8a1SJohn Marino 			V = Htable[j];
312f5b1c8a1SJohn Marino 			Htable[j].hi = V.lo;
313f5b1c8a1SJohn Marino 			Htable[j].lo = V.hi;
314f5b1c8a1SJohn Marino 		}
31572c33676SMaxim Ag #else /* BIG_ENDIAN */
316f5b1c8a1SJohn Marino 		for (j=0;j<16;++j) {
317f5b1c8a1SJohn Marino 			V = Htable[j];
318f5b1c8a1SJohn Marino 			Htable[j].hi = V.lo<<32|V.lo>>32;
319f5b1c8a1SJohn Marino 			Htable[j].lo = V.hi<<32|V.hi>>32;
320f5b1c8a1SJohn Marino 		}
32172c33676SMaxim Ag #endif
322f5b1c8a1SJohn Marino 	}
323f5b1c8a1SJohn Marino #endif
324f5b1c8a1SJohn Marino }
325f5b1c8a1SJohn Marino 
326f5b1c8a1SJohn Marino #ifndef GHASH_ASM
327f5b1c8a1SJohn Marino static const size_t rem_4bit[16] = {
328f5b1c8a1SJohn Marino 	PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
329f5b1c8a1SJohn Marino 	PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
330f5b1c8a1SJohn Marino 	PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
331f5b1c8a1SJohn Marino 	PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
332f5b1c8a1SJohn Marino 
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])333f5b1c8a1SJohn Marino static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
334f5b1c8a1SJohn Marino {
335f5b1c8a1SJohn Marino 	u128 Z;
336f5b1c8a1SJohn Marino 	int cnt = 15;
337f5b1c8a1SJohn Marino 	size_t rem, nlo, nhi;
338f5b1c8a1SJohn Marino 
339f5b1c8a1SJohn Marino 	nlo  = ((const u8 *)Xi)[15];
340f5b1c8a1SJohn Marino 	nhi  = nlo>>4;
341f5b1c8a1SJohn Marino 	nlo &= 0xf;
342f5b1c8a1SJohn Marino 
343f5b1c8a1SJohn Marino 	Z.hi = Htable[nlo].hi;
344f5b1c8a1SJohn Marino 	Z.lo = Htable[nlo].lo;
345f5b1c8a1SJohn Marino 
346f5b1c8a1SJohn Marino 	while (1) {
347f5b1c8a1SJohn Marino 		rem  = (size_t)Z.lo&0xf;
348f5b1c8a1SJohn Marino 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
349f5b1c8a1SJohn Marino 		Z.hi = (Z.hi>>4);
35072c33676SMaxim Ag #if SIZE_MAX == 0xffffffffffffffff
351f5b1c8a1SJohn Marino 		Z.hi ^= rem_4bit[rem];
35272c33676SMaxim Ag #else
353f5b1c8a1SJohn Marino 		Z.hi ^= (u64)rem_4bit[rem]<<32;
35472c33676SMaxim Ag #endif
355f5b1c8a1SJohn Marino 		Z.hi ^= Htable[nhi].hi;
356f5b1c8a1SJohn Marino 		Z.lo ^= Htable[nhi].lo;
357f5b1c8a1SJohn Marino 
358f5b1c8a1SJohn Marino 		if (--cnt<0)		break;
359f5b1c8a1SJohn Marino 
360f5b1c8a1SJohn Marino 		nlo  = ((const u8 *)Xi)[cnt];
361f5b1c8a1SJohn Marino 		nhi  = nlo>>4;
362f5b1c8a1SJohn Marino 		nlo &= 0xf;
363f5b1c8a1SJohn Marino 
364f5b1c8a1SJohn Marino 		rem  = (size_t)Z.lo&0xf;
365f5b1c8a1SJohn Marino 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
366f5b1c8a1SJohn Marino 		Z.hi = (Z.hi>>4);
36772c33676SMaxim Ag #if SIZE_MAX == 0xffffffffffffffff
368f5b1c8a1SJohn Marino 		Z.hi ^= rem_4bit[rem];
36972c33676SMaxim Ag #else
370f5b1c8a1SJohn Marino 		Z.hi ^= (u64)rem_4bit[rem]<<32;
37172c33676SMaxim Ag #endif
372f5b1c8a1SJohn Marino 		Z.hi ^= Htable[nlo].hi;
373f5b1c8a1SJohn Marino 		Z.lo ^= Htable[nlo].lo;
374f5b1c8a1SJohn Marino 	}
375f5b1c8a1SJohn Marino 
37672c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
377f5b1c8a1SJohn Marino #ifdef BSWAP8
378f5b1c8a1SJohn Marino 	Xi[0] = BSWAP8(Z.hi);
379f5b1c8a1SJohn Marino 	Xi[1] = BSWAP8(Z.lo);
380f5b1c8a1SJohn Marino #else
381f5b1c8a1SJohn Marino 	u8 *p = (u8 *)Xi;
382f5b1c8a1SJohn Marino 	u32 v;
383f5b1c8a1SJohn Marino 	v = (u32)(Z.hi>>32);	PUTU32(p,v);
384f5b1c8a1SJohn Marino 	v = (u32)(Z.hi);	PUTU32(p+4,v);
385f5b1c8a1SJohn Marino 	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
386f5b1c8a1SJohn Marino 	v = (u32)(Z.lo);	PUTU32(p+12,v);
387f5b1c8a1SJohn Marino #endif
38872c33676SMaxim Ag #else /* BIG_ENDIAN */
389f5b1c8a1SJohn Marino 	Xi[0] = Z.hi;
390f5b1c8a1SJohn Marino 	Xi[1] = Z.lo;
39172c33676SMaxim Ag #endif
392f5b1c8a1SJohn Marino }
393f5b1c8a1SJohn Marino 
394f5b1c8a1SJohn Marino #if !defined(OPENSSL_SMALL_FOOTPRINT)
395f5b1c8a1SJohn Marino /*
396f5b1c8a1SJohn Marino  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
397f5b1c8a1SJohn Marino  * details... Compiler-generated code doesn't seem to give any
398f5b1c8a1SJohn Marino  * performance improvement, at least not on x86[_64]. It's here
399f5b1c8a1SJohn Marino  * mostly as reference and a placeholder for possible future
400f5b1c8a1SJohn Marino  * non-trivial optimization[s]...
401f5b1c8a1SJohn Marino  */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)402f5b1c8a1SJohn Marino static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
403f5b1c8a1SJohn Marino 				const u8 *inp,size_t len)
404f5b1c8a1SJohn Marino {
405f5b1c8a1SJohn Marino     u128 Z;
406f5b1c8a1SJohn Marino     int cnt;
407f5b1c8a1SJohn Marino     size_t rem, nlo, nhi;
408f5b1c8a1SJohn Marino 
409f5b1c8a1SJohn Marino #if 1
410f5b1c8a1SJohn Marino     do {
411f5b1c8a1SJohn Marino 	cnt  = 15;
412f5b1c8a1SJohn Marino 	nlo  = ((const u8 *)Xi)[15];
413f5b1c8a1SJohn Marino 	nlo ^= inp[15];
414f5b1c8a1SJohn Marino 	nhi  = nlo>>4;
415f5b1c8a1SJohn Marino 	nlo &= 0xf;
416f5b1c8a1SJohn Marino 
417f5b1c8a1SJohn Marino 	Z.hi = Htable[nlo].hi;
418f5b1c8a1SJohn Marino 	Z.lo = Htable[nlo].lo;
419f5b1c8a1SJohn Marino 
420f5b1c8a1SJohn Marino 	while (1) {
421f5b1c8a1SJohn Marino 		rem  = (size_t)Z.lo&0xf;
422f5b1c8a1SJohn Marino 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
423f5b1c8a1SJohn Marino 		Z.hi = (Z.hi>>4);
42472c33676SMaxim Ag #if SIZE_MAX == 0xffffffffffffffff
425f5b1c8a1SJohn Marino 		Z.hi ^= rem_4bit[rem];
42672c33676SMaxim Ag #else
427f5b1c8a1SJohn Marino 		Z.hi ^= (u64)rem_4bit[rem]<<32;
42872c33676SMaxim Ag #endif
429f5b1c8a1SJohn Marino 		Z.hi ^= Htable[nhi].hi;
430f5b1c8a1SJohn Marino 		Z.lo ^= Htable[nhi].lo;
431f5b1c8a1SJohn Marino 
432f5b1c8a1SJohn Marino 		if (--cnt<0)		break;
433f5b1c8a1SJohn Marino 
434f5b1c8a1SJohn Marino 		nlo  = ((const u8 *)Xi)[cnt];
435f5b1c8a1SJohn Marino 		nlo ^= inp[cnt];
436f5b1c8a1SJohn Marino 		nhi  = nlo>>4;
437f5b1c8a1SJohn Marino 		nlo &= 0xf;
438f5b1c8a1SJohn Marino 
439f5b1c8a1SJohn Marino 		rem  = (size_t)Z.lo&0xf;
440f5b1c8a1SJohn Marino 		Z.lo = (Z.hi<<60)|(Z.lo>>4);
441f5b1c8a1SJohn Marino 		Z.hi = (Z.hi>>4);
44272c33676SMaxim Ag #if SIZE_MAX == 0xffffffffffffffff
443f5b1c8a1SJohn Marino 		Z.hi ^= rem_4bit[rem];
44472c33676SMaxim Ag #else
445f5b1c8a1SJohn Marino 		Z.hi ^= (u64)rem_4bit[rem]<<32;
44672c33676SMaxim Ag #endif
447f5b1c8a1SJohn Marino 		Z.hi ^= Htable[nlo].hi;
448f5b1c8a1SJohn Marino 		Z.lo ^= Htable[nlo].lo;
449f5b1c8a1SJohn Marino 	}
450f5b1c8a1SJohn Marino #else
451f5b1c8a1SJohn Marino     /*
452f5b1c8a1SJohn Marino      * Extra 256+16 bytes per-key plus 512 bytes shared tables
453f5b1c8a1SJohn Marino      * [should] give ~50% improvement... One could have PACK()-ed
454f5b1c8a1SJohn Marino      * the rem_8bit even here, but the priority is to minimize
455f5b1c8a1SJohn Marino      * cache footprint...
456f5b1c8a1SJohn Marino      */
457f5b1c8a1SJohn Marino     u128 Hshr4[16];	/* Htable shifted right by 4 bits */
458f5b1c8a1SJohn Marino     u8   Hshl4[16];	/* Htable shifted left  by 4 bits */
459f5b1c8a1SJohn Marino     static const unsigned short rem_8bit[256] = {
460f5b1c8a1SJohn Marino 	0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
461f5b1c8a1SJohn Marino 	0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
462f5b1c8a1SJohn Marino 	0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
463f5b1c8a1SJohn Marino 	0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
464f5b1c8a1SJohn Marino 	0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
465f5b1c8a1SJohn Marino 	0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
466f5b1c8a1SJohn Marino 	0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
467f5b1c8a1SJohn Marino 	0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
468f5b1c8a1SJohn Marino 	0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
469f5b1c8a1SJohn Marino 	0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
470f5b1c8a1SJohn Marino 	0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
471f5b1c8a1SJohn Marino 	0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
472f5b1c8a1SJohn Marino 	0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
473f5b1c8a1SJohn Marino 	0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
474f5b1c8a1SJohn Marino 	0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
475f5b1c8a1SJohn Marino 	0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
476f5b1c8a1SJohn Marino 	0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
477f5b1c8a1SJohn Marino 	0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
478f5b1c8a1SJohn Marino 	0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
479f5b1c8a1SJohn Marino 	0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
480f5b1c8a1SJohn Marino 	0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
481f5b1c8a1SJohn Marino 	0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
482f5b1c8a1SJohn Marino 	0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
483f5b1c8a1SJohn Marino 	0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
484f5b1c8a1SJohn Marino 	0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
485f5b1c8a1SJohn Marino 	0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
486f5b1c8a1SJohn Marino 	0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
487f5b1c8a1SJohn Marino 	0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
488f5b1c8a1SJohn Marino 	0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
489f5b1c8a1SJohn Marino 	0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
490f5b1c8a1SJohn Marino 	0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
491f5b1c8a1SJohn Marino 	0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
492f5b1c8a1SJohn Marino     /*
493f5b1c8a1SJohn Marino      * This pre-processing phase slows down procedure by approximately
494f5b1c8a1SJohn Marino      * same time as it makes each loop spin faster. In other words
495f5b1c8a1SJohn Marino      * single block performance is approximately same as straightforward
496f5b1c8a1SJohn Marino      * "4-bit" implementation, and then it goes only faster...
497f5b1c8a1SJohn Marino      */
498f5b1c8a1SJohn Marino     for (cnt=0; cnt<16; ++cnt) {
499f5b1c8a1SJohn Marino 	Z.hi = Htable[cnt].hi;
500f5b1c8a1SJohn Marino 	Z.lo = Htable[cnt].lo;
501f5b1c8a1SJohn Marino 	Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
502f5b1c8a1SJohn Marino 	Hshr4[cnt].hi = (Z.hi>>4);
503f5b1c8a1SJohn Marino 	Hshl4[cnt]    = (u8)(Z.lo<<4);
504f5b1c8a1SJohn Marino     }
505f5b1c8a1SJohn Marino 
506f5b1c8a1SJohn Marino     do {
507f5b1c8a1SJohn Marino 	for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
508f5b1c8a1SJohn Marino 		nlo  = ((const u8 *)Xi)[cnt];
509f5b1c8a1SJohn Marino 		nlo ^= inp[cnt];
510f5b1c8a1SJohn Marino 		nhi  = nlo>>4;
511f5b1c8a1SJohn Marino 		nlo &= 0xf;
512f5b1c8a1SJohn Marino 
513f5b1c8a1SJohn Marino 		Z.hi ^= Htable[nlo].hi;
514f5b1c8a1SJohn Marino 		Z.lo ^= Htable[nlo].lo;
515f5b1c8a1SJohn Marino 
516f5b1c8a1SJohn Marino 		rem = (size_t)Z.lo&0xff;
517f5b1c8a1SJohn Marino 
518f5b1c8a1SJohn Marino 		Z.lo = (Z.hi<<56)|(Z.lo>>8);
519f5b1c8a1SJohn Marino 		Z.hi = (Z.hi>>8);
520f5b1c8a1SJohn Marino 
521f5b1c8a1SJohn Marino 		Z.hi ^= Hshr4[nhi].hi;
522f5b1c8a1SJohn Marino 		Z.lo ^= Hshr4[nhi].lo;
523f5b1c8a1SJohn Marino 		Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
524f5b1c8a1SJohn Marino 	}
525f5b1c8a1SJohn Marino 
526f5b1c8a1SJohn Marino 	nlo  = ((const u8 *)Xi)[0];
527f5b1c8a1SJohn Marino 	nlo ^= inp[0];
528f5b1c8a1SJohn Marino 	nhi  = nlo>>4;
529f5b1c8a1SJohn Marino 	nlo &= 0xf;
530f5b1c8a1SJohn Marino 
531f5b1c8a1SJohn Marino 	Z.hi ^= Htable[nlo].hi;
532f5b1c8a1SJohn Marino 	Z.lo ^= Htable[nlo].lo;
533f5b1c8a1SJohn Marino 
534f5b1c8a1SJohn Marino 	rem = (size_t)Z.lo&0xf;
535f5b1c8a1SJohn Marino 
536f5b1c8a1SJohn Marino 	Z.lo = (Z.hi<<60)|(Z.lo>>4);
537f5b1c8a1SJohn Marino 	Z.hi = (Z.hi>>4);
538f5b1c8a1SJohn Marino 
539f5b1c8a1SJohn Marino 	Z.hi ^= Htable[nhi].hi;
540f5b1c8a1SJohn Marino 	Z.lo ^= Htable[nhi].lo;
541f5b1c8a1SJohn Marino 	Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
542f5b1c8a1SJohn Marino #endif
543f5b1c8a1SJohn Marino 
54472c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
545f5b1c8a1SJohn Marino #ifdef BSWAP8
546f5b1c8a1SJohn Marino 	Xi[0] = BSWAP8(Z.hi);
547f5b1c8a1SJohn Marino 	Xi[1] = BSWAP8(Z.lo);
548f5b1c8a1SJohn Marino #else
549f5b1c8a1SJohn Marino 	u8 *p = (u8 *)Xi;
550f5b1c8a1SJohn Marino 	u32 v;
551f5b1c8a1SJohn Marino 	v = (u32)(Z.hi>>32);	PUTU32(p,v);
552f5b1c8a1SJohn Marino 	v = (u32)(Z.hi);	PUTU32(p+4,v);
553f5b1c8a1SJohn Marino 	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
554f5b1c8a1SJohn Marino 	v = (u32)(Z.lo);	PUTU32(p+12,v);
555f5b1c8a1SJohn Marino #endif
55672c33676SMaxim Ag #else /* BIG_ENDIAN */
557f5b1c8a1SJohn Marino 	Xi[0] = Z.hi;
558f5b1c8a1SJohn Marino 	Xi[1] = Z.lo;
55972c33676SMaxim Ag #endif
560f5b1c8a1SJohn Marino     } while (inp+=16, len-=16);
561f5b1c8a1SJohn Marino }
562f5b1c8a1SJohn Marino #endif
563f5b1c8a1SJohn Marino #else
564f5b1c8a1SJohn Marino void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
565f5b1c8a1SJohn Marino void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
566f5b1c8a1SJohn Marino #endif
567f5b1c8a1SJohn Marino 
568f5b1c8a1SJohn Marino #define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
569f5b1c8a1SJohn Marino #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
570f5b1c8a1SJohn Marino #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
571f5b1c8a1SJohn Marino /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
572f5b1c8a1SJohn Marino  * trashing effect. In other words idea is to hash data while it's
573f5b1c8a1SJohn Marino  * still in L1 cache after encryption pass... */
574f5b1c8a1SJohn Marino #define GHASH_CHUNK       (3*1024)
575f5b1c8a1SJohn Marino #endif
576f5b1c8a1SJohn Marino 
577f5b1c8a1SJohn Marino #else	/* TABLE_BITS */
578f5b1c8a1SJohn Marino 
579f5b1c8a1SJohn Marino static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
580f5b1c8a1SJohn Marino {
581f5b1c8a1SJohn Marino 	u128 V,Z = { 0,0 };
582f5b1c8a1SJohn Marino 	long X;
583f5b1c8a1SJohn Marino 	int  i,j;
584f5b1c8a1SJohn Marino 	const long *xi = (const long *)Xi;
585f5b1c8a1SJohn Marino 
586f5b1c8a1SJohn Marino 	V.hi = H[0];	/* H is in host byte order, no byte swapping */
587f5b1c8a1SJohn Marino 	V.lo = H[1];
588f5b1c8a1SJohn Marino 
589f5b1c8a1SJohn Marino 	for (j=0; j<16/sizeof(long); ++j) {
59072c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
59172c33676SMaxim Ag #if SIZE_MAX == 0xffffffffffffffff
592f5b1c8a1SJohn Marino #ifdef BSWAP8
593f5b1c8a1SJohn Marino 			X = (long)(BSWAP8(xi[j]));
594f5b1c8a1SJohn Marino #else
595f5b1c8a1SJohn Marino 			const u8 *p = (const u8 *)(xi+j);
596f5b1c8a1SJohn Marino 			X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
597f5b1c8a1SJohn Marino #endif
59872c33676SMaxim Ag #else
599f5b1c8a1SJohn Marino 			const u8 *p = (const u8 *)(xi+j);
600f5b1c8a1SJohn Marino 			X = (long)GETU32(p);
60172c33676SMaxim Ag #endif
60272c33676SMaxim Ag #else /* BIG_ENDIAN */
603f5b1c8a1SJohn Marino 		X = xi[j];
60472c33676SMaxim Ag #endif
605f5b1c8a1SJohn Marino 
606f5b1c8a1SJohn Marino 		for (i=0; i<8*sizeof(long); ++i, X<<=1) {
607f5b1c8a1SJohn Marino 			u64 M = (u64)(X>>(8*sizeof(long)-1));
608f5b1c8a1SJohn Marino 			Z.hi ^= V.hi&M;
609f5b1c8a1SJohn Marino 			Z.lo ^= V.lo&M;
610f5b1c8a1SJohn Marino 
611f5b1c8a1SJohn Marino 			REDUCE1BIT(V);
612f5b1c8a1SJohn Marino 		}
613f5b1c8a1SJohn Marino 	}
614f5b1c8a1SJohn Marino 
61572c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
616f5b1c8a1SJohn Marino #ifdef BSWAP8
617f5b1c8a1SJohn Marino 	Xi[0] = BSWAP8(Z.hi);
618f5b1c8a1SJohn Marino 	Xi[1] = BSWAP8(Z.lo);
619f5b1c8a1SJohn Marino #else
620f5b1c8a1SJohn Marino 	u8 *p = (u8 *)Xi;
621f5b1c8a1SJohn Marino 	u32 v;
622f5b1c8a1SJohn Marino 	v = (u32)(Z.hi>>32);	PUTU32(p,v);
623f5b1c8a1SJohn Marino 	v = (u32)(Z.hi);	PUTU32(p+4,v);
624f5b1c8a1SJohn Marino 	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
625f5b1c8a1SJohn Marino 	v = (u32)(Z.lo);	PUTU32(p+12,v);
626f5b1c8a1SJohn Marino #endif
62772c33676SMaxim Ag #else /* BIG_ENDIAN */
628f5b1c8a1SJohn Marino 	Xi[0] = Z.hi;
629f5b1c8a1SJohn Marino 	Xi[1] = Z.lo;
63072c33676SMaxim Ag #endif
631f5b1c8a1SJohn Marino }
632f5b1c8a1SJohn Marino #define GCM_MUL(ctx,Xi)	  gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
633f5b1c8a1SJohn Marino 
634f5b1c8a1SJohn Marino #endif
635f5b1c8a1SJohn Marino 
63672c33676SMaxim Ag #if	defined(GHASH_ASM) && \
637f5b1c8a1SJohn Marino 	(defined(__i386)	|| defined(__i386__)	|| \
638f5b1c8a1SJohn Marino 	 defined(__x86_64)	|| defined(__x86_64__)	|| \
639f5b1c8a1SJohn Marino 	 defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64))
64072c33676SMaxim Ag #include "x86_arch.h"
64172c33676SMaxim Ag #endif
64272c33676SMaxim Ag 
64372c33676SMaxim Ag #if	TABLE_BITS==4 && defined(GHASH_ASM)
64472c33676SMaxim Ag # if	(defined(__i386)	|| defined(__i386__)	|| \
64572c33676SMaxim Ag 	 defined(__x86_64)	|| defined(__x86_64__)	|| \
64672c33676SMaxim Ag 	 defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64))
647f5b1c8a1SJohn Marino #  define GHASH_ASM_X86_OR_64
648f5b1c8a1SJohn Marino #  define GCM_FUNCREF_4BIT
649f5b1c8a1SJohn Marino 
650f5b1c8a1SJohn Marino void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
651f5b1c8a1SJohn Marino void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
652f5b1c8a1SJohn Marino void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
653f5b1c8a1SJohn Marino 
654f5b1c8a1SJohn Marino #  if	defined(__i386) || defined(__i386__) || defined(_M_IX86)
655f5b1c8a1SJohn Marino #   define GHASH_ASM_X86
656f5b1c8a1SJohn Marino void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
657f5b1c8a1SJohn Marino void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658f5b1c8a1SJohn Marino 
659f5b1c8a1SJohn Marino void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
660f5b1c8a1SJohn Marino void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661f5b1c8a1SJohn Marino #  endif
662f5b1c8a1SJohn Marino # elif defined(__arm__) || defined(__arm)
663f5b1c8a1SJohn Marino #  include "arm_arch.h"
66472c33676SMaxim Ag #  if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
665f5b1c8a1SJohn Marino #   define GHASH_ASM_ARM
666f5b1c8a1SJohn Marino #   define GCM_FUNCREF_4BIT
667f5b1c8a1SJohn Marino void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
668f5b1c8a1SJohn Marino void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669f5b1c8a1SJohn Marino #  endif
670f5b1c8a1SJohn Marino # endif
671f5b1c8a1SJohn Marino #endif
672f5b1c8a1SJohn Marino 
673f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
674f5b1c8a1SJohn Marino # undef  GCM_MUL
675f5b1c8a1SJohn Marino # define GCM_MUL(ctx,Xi)	(*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
676f5b1c8a1SJohn Marino # ifdef GHASH
677f5b1c8a1SJohn Marino #  undef  GHASH
678f5b1c8a1SJohn Marino #  define GHASH(ctx,in,len)	(*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
679f5b1c8a1SJohn Marino # endif
680f5b1c8a1SJohn Marino #endif
681f5b1c8a1SJohn Marino 
682f5b1c8a1SJohn Marino void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
683f5b1c8a1SJohn Marino {
684f5b1c8a1SJohn Marino 	memset(ctx,0,sizeof(*ctx));
685f5b1c8a1SJohn Marino 	ctx->block = block;
686f5b1c8a1SJohn Marino 	ctx->key   = key;
687f5b1c8a1SJohn Marino 
688f5b1c8a1SJohn Marino 	(*block)(ctx->H.c,ctx->H.c,key);
689f5b1c8a1SJohn Marino 
69072c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
691f5b1c8a1SJohn Marino 	/* H is stored in host byte order */
692f5b1c8a1SJohn Marino #ifdef BSWAP8
693f5b1c8a1SJohn Marino 	ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
694f5b1c8a1SJohn Marino 	ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
695f5b1c8a1SJohn Marino #else
696f5b1c8a1SJohn Marino 	u8 *p = ctx->H.c;
697f5b1c8a1SJohn Marino 	u64 hi,lo;
698f5b1c8a1SJohn Marino 	hi = (u64)GETU32(p)  <<32|GETU32(p+4);
699f5b1c8a1SJohn Marino 	lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
700f5b1c8a1SJohn Marino 	ctx->H.u[0] = hi;
701f5b1c8a1SJohn Marino 	ctx->H.u[1] = lo;
702f5b1c8a1SJohn Marino #endif
70372c33676SMaxim Ag #endif
704f5b1c8a1SJohn Marino 
705f5b1c8a1SJohn Marino #if	TABLE_BITS==8
706f5b1c8a1SJohn Marino 	gcm_init_8bit(ctx->Htable,ctx->H.u);
707f5b1c8a1SJohn Marino #elif	TABLE_BITS==4
708f5b1c8a1SJohn Marino # if	defined(GHASH_ASM_X86_OR_64)
709f5b1c8a1SJohn Marino #  if	!defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
71072c33676SMaxim Ag 	/* check FXSR and PCLMULQDQ bits */
71172c33676SMaxim Ag 	if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
71272c33676SMaxim Ag 	    (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
713f5b1c8a1SJohn Marino 		gcm_init_clmul(ctx->Htable,ctx->H.u);
714f5b1c8a1SJohn Marino 		ctx->gmult = gcm_gmult_clmul;
715f5b1c8a1SJohn Marino 		ctx->ghash = gcm_ghash_clmul;
716f5b1c8a1SJohn Marino 		return;
717f5b1c8a1SJohn Marino 	}
718f5b1c8a1SJohn Marino #  endif
719f5b1c8a1SJohn Marino 	gcm_init_4bit(ctx->Htable,ctx->H.u);
720f5b1c8a1SJohn Marino #  if	defined(GHASH_ASM_X86)			/* x86 only */
721f5b1c8a1SJohn Marino #   if	defined(OPENSSL_IA32_SSE2)
72272c33676SMaxim Ag 	if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) {	/* check SSE bit */
723f5b1c8a1SJohn Marino #   else
72472c33676SMaxim Ag 	if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) {	/* check MMX bit */
725f5b1c8a1SJohn Marino #   endif
726f5b1c8a1SJohn Marino 		ctx->gmult = gcm_gmult_4bit_mmx;
727f5b1c8a1SJohn Marino 		ctx->ghash = gcm_ghash_4bit_mmx;
728f5b1c8a1SJohn Marino 	} else {
729f5b1c8a1SJohn Marino 		ctx->gmult = gcm_gmult_4bit_x86;
730f5b1c8a1SJohn Marino 		ctx->ghash = gcm_ghash_4bit_x86;
731f5b1c8a1SJohn Marino 	}
732f5b1c8a1SJohn Marino #  else
733f5b1c8a1SJohn Marino 	ctx->gmult = gcm_gmult_4bit;
734f5b1c8a1SJohn Marino 	ctx->ghash = gcm_ghash_4bit;
735f5b1c8a1SJohn Marino #  endif
736f5b1c8a1SJohn Marino # elif	defined(GHASH_ASM_ARM)
737f5b1c8a1SJohn Marino 	if (OPENSSL_armcap_P & ARMV7_NEON) {
738f5b1c8a1SJohn Marino 		ctx->gmult = gcm_gmult_neon;
739f5b1c8a1SJohn Marino 		ctx->ghash = gcm_ghash_neon;
740f5b1c8a1SJohn Marino 	} else {
741f5b1c8a1SJohn Marino 		gcm_init_4bit(ctx->Htable,ctx->H.u);
742f5b1c8a1SJohn Marino 		ctx->gmult = gcm_gmult_4bit;
743f5b1c8a1SJohn Marino 		ctx->ghash = gcm_ghash_4bit;
744f5b1c8a1SJohn Marino 	}
745f5b1c8a1SJohn Marino # else
746f5b1c8a1SJohn Marino 	gcm_init_4bit(ctx->Htable,ctx->H.u);
747f5b1c8a1SJohn Marino # endif
748f5b1c8a1SJohn Marino #endif
749f5b1c8a1SJohn Marino }
750f5b1c8a1SJohn Marino 
751f5b1c8a1SJohn Marino void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
752f5b1c8a1SJohn Marino {
753f5b1c8a1SJohn Marino 	unsigned int ctr;
754f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
755f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
756f5b1c8a1SJohn Marino #endif
757f5b1c8a1SJohn Marino 
758f5b1c8a1SJohn Marino 	ctx->Yi.u[0]  = 0;
759f5b1c8a1SJohn Marino 	ctx->Yi.u[1]  = 0;
760f5b1c8a1SJohn Marino 	ctx->Xi.u[0]  = 0;
761f5b1c8a1SJohn Marino 	ctx->Xi.u[1]  = 0;
762f5b1c8a1SJohn Marino 	ctx->len.u[0] = 0;	/* AAD length */
763f5b1c8a1SJohn Marino 	ctx->len.u[1] = 0;	/* message length */
764f5b1c8a1SJohn Marino 	ctx->ares = 0;
765f5b1c8a1SJohn Marino 	ctx->mres = 0;
766f5b1c8a1SJohn Marino 
767f5b1c8a1SJohn Marino 	if (len==12) {
768f5b1c8a1SJohn Marino 		memcpy(ctx->Yi.c,iv,12);
769f5b1c8a1SJohn Marino 		ctx->Yi.c[15]=1;
770f5b1c8a1SJohn Marino 		ctr=1;
771f5b1c8a1SJohn Marino 	}
772f5b1c8a1SJohn Marino 	else {
773f5b1c8a1SJohn Marino 		size_t i;
774f5b1c8a1SJohn Marino 		u64 len0 = len;
775f5b1c8a1SJohn Marino 
776f5b1c8a1SJohn Marino 		while (len>=16) {
777f5b1c8a1SJohn Marino 			for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
778f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Yi);
779f5b1c8a1SJohn Marino 			iv += 16;
780f5b1c8a1SJohn Marino 			len -= 16;
781f5b1c8a1SJohn Marino 		}
782f5b1c8a1SJohn Marino 		if (len) {
783f5b1c8a1SJohn Marino 			for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
784f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Yi);
785f5b1c8a1SJohn Marino 		}
786f5b1c8a1SJohn Marino 		len0 <<= 3;
78772c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
788f5b1c8a1SJohn Marino #ifdef BSWAP8
789f5b1c8a1SJohn Marino 		ctx->Yi.u[1]  ^= BSWAP8(len0);
790f5b1c8a1SJohn Marino #else
791f5b1c8a1SJohn Marino 		ctx->Yi.c[8]  ^= (u8)(len0>>56);
792f5b1c8a1SJohn Marino 		ctx->Yi.c[9]  ^= (u8)(len0>>48);
793f5b1c8a1SJohn Marino 		ctx->Yi.c[10] ^= (u8)(len0>>40);
794f5b1c8a1SJohn Marino 		ctx->Yi.c[11] ^= (u8)(len0>>32);
795f5b1c8a1SJohn Marino 		ctx->Yi.c[12] ^= (u8)(len0>>24);
796f5b1c8a1SJohn Marino 		ctx->Yi.c[13] ^= (u8)(len0>>16);
797f5b1c8a1SJohn Marino 		ctx->Yi.c[14] ^= (u8)(len0>>8);
798f5b1c8a1SJohn Marino 		ctx->Yi.c[15] ^= (u8)(len0);
799f5b1c8a1SJohn Marino #endif
80072c33676SMaxim Ag #else /* BIG_ENDIAN */
801f5b1c8a1SJohn Marino 		ctx->Yi.u[1]  ^= len0;
80272c33676SMaxim Ag #endif
803f5b1c8a1SJohn Marino 
804f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Yi);
805f5b1c8a1SJohn Marino 
80672c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
807f5b1c8a1SJohn Marino #ifdef BSWAP4
808f5b1c8a1SJohn Marino 		ctr = BSWAP4(ctx->Yi.d[3]);
809f5b1c8a1SJohn Marino #else
810f5b1c8a1SJohn Marino 		ctr = GETU32(ctx->Yi.c+12);
811f5b1c8a1SJohn Marino #endif
81272c33676SMaxim Ag #else /* BIG_ENDIAN */
813f5b1c8a1SJohn Marino 		ctr = ctx->Yi.d[3];
81472c33676SMaxim Ag #endif
815f5b1c8a1SJohn Marino 	}
816f5b1c8a1SJohn Marino 
817f5b1c8a1SJohn Marino 	(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
818f5b1c8a1SJohn Marino 	++ctr;
81972c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
820f5b1c8a1SJohn Marino #ifdef BSWAP4
821f5b1c8a1SJohn Marino 	ctx->Yi.d[3] = BSWAP4(ctr);
822f5b1c8a1SJohn Marino #else
823f5b1c8a1SJohn Marino 	PUTU32(ctx->Yi.c+12,ctr);
824f5b1c8a1SJohn Marino #endif
82572c33676SMaxim Ag #else /* BIG_ENDIAN */
826f5b1c8a1SJohn Marino 	ctx->Yi.d[3] = ctr;
82772c33676SMaxim Ag #endif
828f5b1c8a1SJohn Marino }
829f5b1c8a1SJohn Marino 
830f5b1c8a1SJohn Marino int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
831f5b1c8a1SJohn Marino {
832f5b1c8a1SJohn Marino 	size_t i;
833f5b1c8a1SJohn Marino 	unsigned int n;
834f5b1c8a1SJohn Marino 	u64 alen = ctx->len.u[0];
835f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
836f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
837f5b1c8a1SJohn Marino # ifdef GHASH
838f5b1c8a1SJohn Marino 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
839f5b1c8a1SJohn Marino 				const u8 *inp,size_t len)	= ctx->ghash;
840f5b1c8a1SJohn Marino # endif
841f5b1c8a1SJohn Marino #endif
842f5b1c8a1SJohn Marino 
843f5b1c8a1SJohn Marino 	if (ctx->len.u[1]) return -2;
844f5b1c8a1SJohn Marino 
845f5b1c8a1SJohn Marino 	alen += len;
846f5b1c8a1SJohn Marino 	if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
847f5b1c8a1SJohn Marino 		return -1;
848f5b1c8a1SJohn Marino 	ctx->len.u[0] = alen;
849f5b1c8a1SJohn Marino 
850f5b1c8a1SJohn Marino 	n = ctx->ares;
851f5b1c8a1SJohn Marino 	if (n) {
852f5b1c8a1SJohn Marino 		while (n && len) {
853f5b1c8a1SJohn Marino 			ctx->Xi.c[n] ^= *(aad++);
854f5b1c8a1SJohn Marino 			--len;
855f5b1c8a1SJohn Marino 			n = (n+1)%16;
856f5b1c8a1SJohn Marino 		}
857f5b1c8a1SJohn Marino 		if (n==0) GCM_MUL(ctx,Xi);
858f5b1c8a1SJohn Marino 		else {
859f5b1c8a1SJohn Marino 			ctx->ares = n;
860f5b1c8a1SJohn Marino 			return 0;
861f5b1c8a1SJohn Marino 		}
862f5b1c8a1SJohn Marino 	}
863f5b1c8a1SJohn Marino 
864f5b1c8a1SJohn Marino #ifdef GHASH
865f5b1c8a1SJohn Marino 	if ((i = (len&(size_t)-16))) {
866f5b1c8a1SJohn Marino 		GHASH(ctx,aad,i);
867f5b1c8a1SJohn Marino 		aad += i;
868f5b1c8a1SJohn Marino 		len -= i;
869f5b1c8a1SJohn Marino 	}
870f5b1c8a1SJohn Marino #else
871f5b1c8a1SJohn Marino 	while (len>=16) {
872f5b1c8a1SJohn Marino 		for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
873f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Xi);
874f5b1c8a1SJohn Marino 		aad += 16;
875f5b1c8a1SJohn Marino 		len -= 16;
876f5b1c8a1SJohn Marino 	}
877f5b1c8a1SJohn Marino #endif
878f5b1c8a1SJohn Marino 	if (len) {
879f5b1c8a1SJohn Marino 		n = (unsigned int)len;
880f5b1c8a1SJohn Marino 		for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
881f5b1c8a1SJohn Marino 	}
882f5b1c8a1SJohn Marino 
883f5b1c8a1SJohn Marino 	ctx->ares = n;
884f5b1c8a1SJohn Marino 	return 0;
885f5b1c8a1SJohn Marino }
886f5b1c8a1SJohn Marino 
887f5b1c8a1SJohn Marino int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
888f5b1c8a1SJohn Marino 		const unsigned char *in, unsigned char *out,
889f5b1c8a1SJohn Marino 		size_t len)
890f5b1c8a1SJohn Marino {
891f5b1c8a1SJohn Marino 	unsigned int n, ctr;
892f5b1c8a1SJohn Marino 	size_t i;
893f5b1c8a1SJohn Marino 	u64        mlen  = ctx->len.u[1];
894f5b1c8a1SJohn Marino 	block128_f block = ctx->block;
895f5b1c8a1SJohn Marino 	void      *key   = ctx->key;
896f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
897f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
898f5b1c8a1SJohn Marino # ifdef GHASH
899f5b1c8a1SJohn Marino 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
900f5b1c8a1SJohn Marino 				const u8 *inp,size_t len)	= ctx->ghash;
901f5b1c8a1SJohn Marino # endif
902f5b1c8a1SJohn Marino #endif
903f5b1c8a1SJohn Marino 
904f5b1c8a1SJohn Marino 	mlen += len;
905f5b1c8a1SJohn Marino 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906f5b1c8a1SJohn Marino 		return -1;
907f5b1c8a1SJohn Marino 	ctx->len.u[1] = mlen;
908f5b1c8a1SJohn Marino 
909f5b1c8a1SJohn Marino 	if (ctx->ares) {
910f5b1c8a1SJohn Marino 		/* First call to encrypt finalizes GHASH(AAD) */
911f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Xi);
912f5b1c8a1SJohn Marino 		ctx->ares = 0;
913f5b1c8a1SJohn Marino 	}
914f5b1c8a1SJohn Marino 
91572c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
916f5b1c8a1SJohn Marino #ifdef BSWAP4
917f5b1c8a1SJohn Marino 	ctr = BSWAP4(ctx->Yi.d[3]);
918f5b1c8a1SJohn Marino #else
919f5b1c8a1SJohn Marino 	ctr = GETU32(ctx->Yi.c+12);
920f5b1c8a1SJohn Marino #endif
92172c33676SMaxim Ag #else /* BIG_ENDIAN */
922f5b1c8a1SJohn Marino 	ctr = ctx->Yi.d[3];
92372c33676SMaxim Ag #endif
924f5b1c8a1SJohn Marino 
925f5b1c8a1SJohn Marino 	n = ctx->mres;
926f5b1c8a1SJohn Marino #if !defined(OPENSSL_SMALL_FOOTPRINT)
927f5b1c8a1SJohn Marino 	if (16%sizeof(size_t) == 0) do {	/* always true actually */
928f5b1c8a1SJohn Marino 		if (n) {
929f5b1c8a1SJohn Marino 			while (n && len) {
930f5b1c8a1SJohn Marino 				ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
931f5b1c8a1SJohn Marino 				--len;
932f5b1c8a1SJohn Marino 				n = (n+1)%16;
933f5b1c8a1SJohn Marino 			}
934f5b1c8a1SJohn Marino 			if (n==0) GCM_MUL(ctx,Xi);
935f5b1c8a1SJohn Marino 			else {
936f5b1c8a1SJohn Marino 				ctx->mres = n;
937f5b1c8a1SJohn Marino 				return 0;
938f5b1c8a1SJohn Marino 			}
939f5b1c8a1SJohn Marino 		}
940f5b1c8a1SJohn Marino #ifdef __STRICT_ALIGNMENT
941f5b1c8a1SJohn Marino 		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
942f5b1c8a1SJohn Marino 			break;
943f5b1c8a1SJohn Marino #endif
944f5b1c8a1SJohn Marino #if defined(GHASH) && defined(GHASH_CHUNK)
945f5b1c8a1SJohn Marino 		while (len>=GHASH_CHUNK) {
946f5b1c8a1SJohn Marino 		    size_t j=GHASH_CHUNK;
947f5b1c8a1SJohn Marino 
948f5b1c8a1SJohn Marino 		    while (j) {
949f5b1c8a1SJohn Marino 		    	size_t *out_t=(size_t *)out;
950f5b1c8a1SJohn Marino 		    	const size_t *in_t=(const size_t *)in;
951f5b1c8a1SJohn Marino 
952f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
953f5b1c8a1SJohn Marino 			++ctr;
95472c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
955f5b1c8a1SJohn Marino #ifdef BSWAP4
956f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
957f5b1c8a1SJohn Marino #else
958f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
959f5b1c8a1SJohn Marino #endif
96072c33676SMaxim Ag #else /* BIG_ENDIAN */
961f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
96272c33676SMaxim Ag #endif
963f5b1c8a1SJohn Marino 			for (i=0; i<16/sizeof(size_t); ++i)
964f5b1c8a1SJohn Marino 				out_t[i] = in_t[i] ^ ctx->EKi.t[i];
965f5b1c8a1SJohn Marino 			out += 16;
966f5b1c8a1SJohn Marino 			in  += 16;
967f5b1c8a1SJohn Marino 			j   -= 16;
968f5b1c8a1SJohn Marino 		    }
969f5b1c8a1SJohn Marino 		    GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
970f5b1c8a1SJohn Marino 		    len -= GHASH_CHUNK;
971f5b1c8a1SJohn Marino 		}
972f5b1c8a1SJohn Marino 		if ((i = (len&(size_t)-16))) {
973f5b1c8a1SJohn Marino 		    size_t j=i;
974f5b1c8a1SJohn Marino 
975f5b1c8a1SJohn Marino 		    while (len>=16) {
976f5b1c8a1SJohn Marino 		    	size_t *out_t=(size_t *)out;
977f5b1c8a1SJohn Marino 		    	const size_t *in_t=(const size_t *)in;
978f5b1c8a1SJohn Marino 
979f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
980f5b1c8a1SJohn Marino 			++ctr;
98172c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
982f5b1c8a1SJohn Marino #ifdef BSWAP4
983f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
984f5b1c8a1SJohn Marino #else
985f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
986f5b1c8a1SJohn Marino #endif
98772c33676SMaxim Ag #else /* BIG_ENDIAN */
988f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
98972c33676SMaxim Ag #endif
990f5b1c8a1SJohn Marino 			for (i=0; i<16/sizeof(size_t); ++i)
991f5b1c8a1SJohn Marino 				out_t[i] = in_t[i] ^ ctx->EKi.t[i];
992f5b1c8a1SJohn Marino 			out += 16;
993f5b1c8a1SJohn Marino 			in  += 16;
994f5b1c8a1SJohn Marino 			len -= 16;
995f5b1c8a1SJohn Marino 		    }
996f5b1c8a1SJohn Marino 		    GHASH(ctx,out-j,j);
997f5b1c8a1SJohn Marino 		}
998f5b1c8a1SJohn Marino #else
999f5b1c8a1SJohn Marino 		while (len>=16) {
1000f5b1c8a1SJohn Marino 		    	size_t *out_t=(size_t *)out;
1001f5b1c8a1SJohn Marino 		    	const size_t *in_t=(const size_t *)in;
1002f5b1c8a1SJohn Marino 
1003f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1004f5b1c8a1SJohn Marino 			++ctr;
100572c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1006f5b1c8a1SJohn Marino #ifdef BSWAP4
1007f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1008f5b1c8a1SJohn Marino #else
1009f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1010f5b1c8a1SJohn Marino #endif
101172c33676SMaxim Ag #else /* BIG_ENDIAN */
1012f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
101372c33676SMaxim Ag #endif
1014f5b1c8a1SJohn Marino 			for (i=0; i<16/sizeof(size_t); ++i)
1015f5b1c8a1SJohn Marino 				ctx->Xi.t[i] ^=
1016f5b1c8a1SJohn Marino 				out_t[i] = in_t[i]^ctx->EKi.t[i];
1017f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Xi);
1018f5b1c8a1SJohn Marino 			out += 16;
1019f5b1c8a1SJohn Marino 			in  += 16;
1020f5b1c8a1SJohn Marino 			len -= 16;
1021f5b1c8a1SJohn Marino 		}
1022f5b1c8a1SJohn Marino #endif
1023f5b1c8a1SJohn Marino 		if (len) {
1024f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1025f5b1c8a1SJohn Marino 			++ctr;
102672c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1027f5b1c8a1SJohn Marino #ifdef BSWAP4
1028f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1029f5b1c8a1SJohn Marino #else
1030f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1031f5b1c8a1SJohn Marino #endif
103272c33676SMaxim Ag #else /* BIG_ENDIAN */
1033f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
103472c33676SMaxim Ag #endif
1035f5b1c8a1SJohn Marino 			while (len--) {
1036f5b1c8a1SJohn Marino 				ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1037f5b1c8a1SJohn Marino 				++n;
1038f5b1c8a1SJohn Marino 			}
1039f5b1c8a1SJohn Marino 		}
1040f5b1c8a1SJohn Marino 
1041f5b1c8a1SJohn Marino 		ctx->mres = n;
1042f5b1c8a1SJohn Marino 		return 0;
1043f5b1c8a1SJohn Marino 	} while(0);
1044f5b1c8a1SJohn Marino #endif
1045f5b1c8a1SJohn Marino 	for (i=0;i<len;++i) {
1046f5b1c8a1SJohn Marino 		if (n==0) {
1047f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1048f5b1c8a1SJohn Marino 			++ctr;
104972c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1050f5b1c8a1SJohn Marino #ifdef BSWAP4
1051f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1052f5b1c8a1SJohn Marino #else
1053f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1054f5b1c8a1SJohn Marino #endif
105572c33676SMaxim Ag #else /* BIG_ENDIAN */
1056f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
105772c33676SMaxim Ag #endif
1058f5b1c8a1SJohn Marino 		}
1059f5b1c8a1SJohn Marino 		ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1060f5b1c8a1SJohn Marino 		n = (n+1)%16;
1061f5b1c8a1SJohn Marino 		if (n==0)
1062f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Xi);
1063f5b1c8a1SJohn Marino 	}
1064f5b1c8a1SJohn Marino 
1065f5b1c8a1SJohn Marino 	ctx->mres = n;
1066f5b1c8a1SJohn Marino 	return 0;
1067f5b1c8a1SJohn Marino }
1068f5b1c8a1SJohn Marino 
1069f5b1c8a1SJohn Marino int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1070f5b1c8a1SJohn Marino 		const unsigned char *in, unsigned char *out,
1071f5b1c8a1SJohn Marino 		size_t len)
1072f5b1c8a1SJohn Marino {
1073f5b1c8a1SJohn Marino 	unsigned int n, ctr;
1074f5b1c8a1SJohn Marino 	size_t i;
1075f5b1c8a1SJohn Marino 	u64        mlen  = ctx->len.u[1];
1076f5b1c8a1SJohn Marino 	block128_f block = ctx->block;
1077f5b1c8a1SJohn Marino 	void      *key   = ctx->key;
1078f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
1079f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1080f5b1c8a1SJohn Marino # ifdef GHASH
1081f5b1c8a1SJohn Marino 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1082f5b1c8a1SJohn Marino 				const u8 *inp,size_t len)	= ctx->ghash;
1083f5b1c8a1SJohn Marino # endif
1084f5b1c8a1SJohn Marino #endif
1085f5b1c8a1SJohn Marino 
1086f5b1c8a1SJohn Marino 	mlen += len;
1087f5b1c8a1SJohn Marino 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1088f5b1c8a1SJohn Marino 		return -1;
1089f5b1c8a1SJohn Marino 	ctx->len.u[1] = mlen;
1090f5b1c8a1SJohn Marino 
1091f5b1c8a1SJohn Marino 	if (ctx->ares) {
1092f5b1c8a1SJohn Marino 		/* First call to decrypt finalizes GHASH(AAD) */
1093f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Xi);
1094f5b1c8a1SJohn Marino 		ctx->ares = 0;
1095f5b1c8a1SJohn Marino 	}
1096f5b1c8a1SJohn Marino 
109772c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1098f5b1c8a1SJohn Marino #ifdef BSWAP4
1099f5b1c8a1SJohn Marino 	ctr = BSWAP4(ctx->Yi.d[3]);
1100f5b1c8a1SJohn Marino #else
1101f5b1c8a1SJohn Marino 	ctr = GETU32(ctx->Yi.c+12);
1102f5b1c8a1SJohn Marino #endif
110372c33676SMaxim Ag #else /* BIG_ENDIAN */
1104f5b1c8a1SJohn Marino 	ctr = ctx->Yi.d[3];
110572c33676SMaxim Ag #endif
1106f5b1c8a1SJohn Marino 
1107f5b1c8a1SJohn Marino 	n = ctx->mres;
1108f5b1c8a1SJohn Marino #if !defined(OPENSSL_SMALL_FOOTPRINT)
1109f5b1c8a1SJohn Marino 	if (16%sizeof(size_t) == 0) do {	/* always true actually */
1110f5b1c8a1SJohn Marino 		if (n) {
1111f5b1c8a1SJohn Marino 			while (n && len) {
1112f5b1c8a1SJohn Marino 				u8 c = *(in++);
1113f5b1c8a1SJohn Marino 				*(out++) = c^ctx->EKi.c[n];
1114f5b1c8a1SJohn Marino 				ctx->Xi.c[n] ^= c;
1115f5b1c8a1SJohn Marino 				--len;
1116f5b1c8a1SJohn Marino 				n = (n+1)%16;
1117f5b1c8a1SJohn Marino 			}
1118f5b1c8a1SJohn Marino 			if (n==0) GCM_MUL (ctx,Xi);
1119f5b1c8a1SJohn Marino 			else {
1120f5b1c8a1SJohn Marino 				ctx->mres = n;
1121f5b1c8a1SJohn Marino 				return 0;
1122f5b1c8a1SJohn Marino 			}
1123f5b1c8a1SJohn Marino 		}
1124f5b1c8a1SJohn Marino #ifdef __STRICT_ALIGNMENT
1125f5b1c8a1SJohn Marino 		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1126f5b1c8a1SJohn Marino 			break;
1127f5b1c8a1SJohn Marino #endif
1128f5b1c8a1SJohn Marino #if defined(GHASH) && defined(GHASH_CHUNK)
1129f5b1c8a1SJohn Marino 		while (len>=GHASH_CHUNK) {
1130f5b1c8a1SJohn Marino 		    size_t j=GHASH_CHUNK;
1131f5b1c8a1SJohn Marino 
1132f5b1c8a1SJohn Marino 		    GHASH(ctx,in,GHASH_CHUNK);
1133f5b1c8a1SJohn Marino 		    while (j) {
1134f5b1c8a1SJohn Marino 		    	size_t *out_t=(size_t *)out;
1135f5b1c8a1SJohn Marino 		    	const size_t *in_t=(const size_t *)in;
1136f5b1c8a1SJohn Marino 
1137f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1138f5b1c8a1SJohn Marino 			++ctr;
113972c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1140f5b1c8a1SJohn Marino #ifdef BSWAP4
1141f5b1c8a1SJohn Marino 				ctx->Yi.d[3] = BSWAP4(ctr);
1142f5b1c8a1SJohn Marino #else
1143f5b1c8a1SJohn Marino 				PUTU32(ctx->Yi.c+12,ctr);
1144f5b1c8a1SJohn Marino #endif
114572c33676SMaxim Ag #else /* BIG_ENDIAN */
1146f5b1c8a1SJohn Marino 				ctx->Yi.d[3] = ctr;
114772c33676SMaxim Ag #endif
1148f5b1c8a1SJohn Marino 			for (i=0; i<16/sizeof(size_t); ++i)
1149f5b1c8a1SJohn Marino 				out_t[i] = in_t[i]^ctx->EKi.t[i];
1150f5b1c8a1SJohn Marino 			out += 16;
1151f5b1c8a1SJohn Marino 			in  += 16;
1152f5b1c8a1SJohn Marino 			j   -= 16;
1153f5b1c8a1SJohn Marino 		    }
1154f5b1c8a1SJohn Marino 		    len -= GHASH_CHUNK;
1155f5b1c8a1SJohn Marino 		}
1156f5b1c8a1SJohn Marino 		if ((i = (len&(size_t)-16))) {
1157f5b1c8a1SJohn Marino 		    GHASH(ctx,in,i);
1158f5b1c8a1SJohn Marino 		    while (len>=16) {
1159f5b1c8a1SJohn Marino 		    	size_t *out_t=(size_t *)out;
1160f5b1c8a1SJohn Marino 		    	const size_t *in_t=(const size_t *)in;
1161f5b1c8a1SJohn Marino 
1162f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1163f5b1c8a1SJohn Marino 			++ctr;
116472c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1165f5b1c8a1SJohn Marino #ifdef BSWAP4
1166f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1167f5b1c8a1SJohn Marino #else
1168f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1169f5b1c8a1SJohn Marino #endif
117072c33676SMaxim Ag #else /* BIG_ENDIAN */
1171f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
117272c33676SMaxim Ag #endif
1173f5b1c8a1SJohn Marino 			for (i=0; i<16/sizeof(size_t); ++i)
1174f5b1c8a1SJohn Marino 				out_t[i] = in_t[i]^ctx->EKi.t[i];
1175f5b1c8a1SJohn Marino 			out += 16;
1176f5b1c8a1SJohn Marino 			in  += 16;
1177f5b1c8a1SJohn Marino 			len -= 16;
1178f5b1c8a1SJohn Marino 		    }
1179f5b1c8a1SJohn Marino 		}
1180f5b1c8a1SJohn Marino #else
1181f5b1c8a1SJohn Marino 		while (len>=16) {
1182f5b1c8a1SJohn Marino 		    	size_t *out_t=(size_t *)out;
1183f5b1c8a1SJohn Marino 		    	const size_t *in_t=(const size_t *)in;
1184f5b1c8a1SJohn Marino 
1185f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1186f5b1c8a1SJohn Marino 			++ctr;
118772c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1188f5b1c8a1SJohn Marino #ifdef BSWAP4
1189f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1190f5b1c8a1SJohn Marino #else
1191f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1192f5b1c8a1SJohn Marino #endif
119372c33676SMaxim Ag #else /* BIG_ENDIAN */
1194f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
119572c33676SMaxim Ag #endif
1196f5b1c8a1SJohn Marino 			for (i=0; i<16/sizeof(size_t); ++i) {
1197f5b1c8a1SJohn Marino 				size_t c = in[i];
1198f5b1c8a1SJohn Marino 				out[i] = c^ctx->EKi.t[i];
1199f5b1c8a1SJohn Marino 				ctx->Xi.t[i] ^= c;
1200f5b1c8a1SJohn Marino 			}
1201f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Xi);
1202f5b1c8a1SJohn Marino 			out += 16;
1203f5b1c8a1SJohn Marino 			in  += 16;
1204f5b1c8a1SJohn Marino 			len -= 16;
1205f5b1c8a1SJohn Marino 		}
1206f5b1c8a1SJohn Marino #endif
1207f5b1c8a1SJohn Marino 		if (len) {
1208f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1209f5b1c8a1SJohn Marino 			++ctr;
121072c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1211f5b1c8a1SJohn Marino #ifdef BSWAP4
1212f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1213f5b1c8a1SJohn Marino #else
1214f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1215f5b1c8a1SJohn Marino #endif
121672c33676SMaxim Ag #else /* BIG_ENDIAN */
1217f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
121872c33676SMaxim Ag #endif
1219f5b1c8a1SJohn Marino 			while (len--) {
1220f5b1c8a1SJohn Marino 				u8 c = in[n];
1221f5b1c8a1SJohn Marino 				ctx->Xi.c[n] ^= c;
1222f5b1c8a1SJohn Marino 				out[n] = c^ctx->EKi.c[n];
1223f5b1c8a1SJohn Marino 				++n;
1224f5b1c8a1SJohn Marino 			}
1225f5b1c8a1SJohn Marino 		}
1226f5b1c8a1SJohn Marino 
1227f5b1c8a1SJohn Marino 		ctx->mres = n;
1228f5b1c8a1SJohn Marino 		return 0;
1229f5b1c8a1SJohn Marino 	} while(0);
1230f5b1c8a1SJohn Marino #endif
1231f5b1c8a1SJohn Marino 	for (i=0;i<len;++i) {
1232f5b1c8a1SJohn Marino 		u8 c;
1233f5b1c8a1SJohn Marino 		if (n==0) {
1234f5b1c8a1SJohn Marino 			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1235f5b1c8a1SJohn Marino 			++ctr;
123672c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1237f5b1c8a1SJohn Marino #ifdef BSWAP4
1238f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = BSWAP4(ctr);
1239f5b1c8a1SJohn Marino #else
1240f5b1c8a1SJohn Marino 			PUTU32(ctx->Yi.c+12,ctr);
1241f5b1c8a1SJohn Marino #endif
124272c33676SMaxim Ag #else /* BIG_ENDIAN */
1243f5b1c8a1SJohn Marino 			ctx->Yi.d[3] = ctr;
124472c33676SMaxim Ag #endif
1245f5b1c8a1SJohn Marino 		}
1246f5b1c8a1SJohn Marino 		c = in[i];
1247f5b1c8a1SJohn Marino 		out[i] = c^ctx->EKi.c[n];
1248f5b1c8a1SJohn Marino 		ctx->Xi.c[n] ^= c;
1249f5b1c8a1SJohn Marino 		n = (n+1)%16;
1250f5b1c8a1SJohn Marino 		if (n==0)
1251f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Xi);
1252f5b1c8a1SJohn Marino 	}
1253f5b1c8a1SJohn Marino 
1254f5b1c8a1SJohn Marino 	ctx->mres = n;
1255f5b1c8a1SJohn Marino 	return 0;
1256f5b1c8a1SJohn Marino }
1257f5b1c8a1SJohn Marino 
1258f5b1c8a1SJohn Marino int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1259f5b1c8a1SJohn Marino 		const unsigned char *in, unsigned char *out,
1260f5b1c8a1SJohn Marino 		size_t len, ctr128_f stream)
1261f5b1c8a1SJohn Marino {
1262f5b1c8a1SJohn Marino 	unsigned int n, ctr;
1263f5b1c8a1SJohn Marino 	size_t i;
1264f5b1c8a1SJohn Marino 	u64   mlen = ctx->len.u[1];
1265f5b1c8a1SJohn Marino 	void *key  = ctx->key;
1266f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
1267f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1268f5b1c8a1SJohn Marino # ifdef GHASH
1269f5b1c8a1SJohn Marino 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1270f5b1c8a1SJohn Marino 				const u8 *inp,size_t len)	= ctx->ghash;
1271f5b1c8a1SJohn Marino # endif
1272f5b1c8a1SJohn Marino #endif
1273f5b1c8a1SJohn Marino 
1274f5b1c8a1SJohn Marino 	mlen += len;
1275f5b1c8a1SJohn Marino 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1276f5b1c8a1SJohn Marino 		return -1;
1277f5b1c8a1SJohn Marino 	ctx->len.u[1] = mlen;
1278f5b1c8a1SJohn Marino 
1279f5b1c8a1SJohn Marino 	if (ctx->ares) {
1280f5b1c8a1SJohn Marino 		/* First call to encrypt finalizes GHASH(AAD) */
1281f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Xi);
1282f5b1c8a1SJohn Marino 		ctx->ares = 0;
1283f5b1c8a1SJohn Marino 	}
1284f5b1c8a1SJohn Marino 
128572c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1286f5b1c8a1SJohn Marino #ifdef BSWAP4
1287f5b1c8a1SJohn Marino 	ctr = BSWAP4(ctx->Yi.d[3]);
1288f5b1c8a1SJohn Marino #else
1289f5b1c8a1SJohn Marino 	ctr = GETU32(ctx->Yi.c+12);
1290f5b1c8a1SJohn Marino #endif
129172c33676SMaxim Ag #else /* BIG_ENDIAN */
1292f5b1c8a1SJohn Marino 	ctr = ctx->Yi.d[3];
129372c33676SMaxim Ag #endif
1294f5b1c8a1SJohn Marino 
1295f5b1c8a1SJohn Marino 	n = ctx->mres;
1296f5b1c8a1SJohn Marino 	if (n) {
1297f5b1c8a1SJohn Marino 		while (n && len) {
1298f5b1c8a1SJohn Marino 			ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1299f5b1c8a1SJohn Marino 			--len;
1300f5b1c8a1SJohn Marino 			n = (n+1)%16;
1301f5b1c8a1SJohn Marino 		}
1302f5b1c8a1SJohn Marino 		if (n==0) GCM_MUL(ctx,Xi);
1303f5b1c8a1SJohn Marino 		else {
1304f5b1c8a1SJohn Marino 			ctx->mres = n;
1305f5b1c8a1SJohn Marino 			return 0;
1306f5b1c8a1SJohn Marino 		}
1307f5b1c8a1SJohn Marino 	}
1308f5b1c8a1SJohn Marino #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1309f5b1c8a1SJohn Marino 	while (len>=GHASH_CHUNK) {
1310f5b1c8a1SJohn Marino 		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1311f5b1c8a1SJohn Marino 		ctr += GHASH_CHUNK/16;
131272c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1313f5b1c8a1SJohn Marino #ifdef BSWAP4
1314f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = BSWAP4(ctr);
1315f5b1c8a1SJohn Marino #else
1316f5b1c8a1SJohn Marino 		PUTU32(ctx->Yi.c+12,ctr);
1317f5b1c8a1SJohn Marino #endif
131872c33676SMaxim Ag #else /* BIG_ENDIAN */
1319f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = ctr;
132072c33676SMaxim Ag #endif
1321f5b1c8a1SJohn Marino 		GHASH(ctx,out,GHASH_CHUNK);
1322f5b1c8a1SJohn Marino 		out += GHASH_CHUNK;
1323f5b1c8a1SJohn Marino 		in  += GHASH_CHUNK;
1324f5b1c8a1SJohn Marino 		len -= GHASH_CHUNK;
1325f5b1c8a1SJohn Marino 	}
1326f5b1c8a1SJohn Marino #endif
1327f5b1c8a1SJohn Marino 	if ((i = (len&(size_t)-16))) {
1328f5b1c8a1SJohn Marino 		size_t j=i/16;
1329f5b1c8a1SJohn Marino 
1330f5b1c8a1SJohn Marino 		(*stream)(in,out,j,key,ctx->Yi.c);
1331f5b1c8a1SJohn Marino 		ctr += (unsigned int)j;
133272c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1333f5b1c8a1SJohn Marino #ifdef BSWAP4
1334f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = BSWAP4(ctr);
1335f5b1c8a1SJohn Marino #else
1336f5b1c8a1SJohn Marino 		PUTU32(ctx->Yi.c+12,ctr);
1337f5b1c8a1SJohn Marino #endif
133872c33676SMaxim Ag #else /* BIG_ENDIAN */
1339f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = ctr;
134072c33676SMaxim Ag #endif
1341f5b1c8a1SJohn Marino 		in  += i;
1342f5b1c8a1SJohn Marino 		len -= i;
1343f5b1c8a1SJohn Marino #if defined(GHASH)
1344f5b1c8a1SJohn Marino 		GHASH(ctx,out,i);
1345f5b1c8a1SJohn Marino 		out += i;
1346f5b1c8a1SJohn Marino #else
1347f5b1c8a1SJohn Marino 		while (j--) {
1348f5b1c8a1SJohn Marino 			for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1349f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Xi);
1350f5b1c8a1SJohn Marino 			out += 16;
1351f5b1c8a1SJohn Marino 		}
1352f5b1c8a1SJohn Marino #endif
1353f5b1c8a1SJohn Marino 	}
1354f5b1c8a1SJohn Marino 	if (len) {
1355f5b1c8a1SJohn Marino 		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1356f5b1c8a1SJohn Marino 		++ctr;
135772c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1358f5b1c8a1SJohn Marino #ifdef BSWAP4
1359f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = BSWAP4(ctr);
1360f5b1c8a1SJohn Marino #else
1361f5b1c8a1SJohn Marino 		PUTU32(ctx->Yi.c+12,ctr);
1362f5b1c8a1SJohn Marino #endif
136372c33676SMaxim Ag #else /* BIG_ENDIAN */
1364f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = ctr;
136572c33676SMaxim Ag #endif
1366f5b1c8a1SJohn Marino 		while (len--) {
1367f5b1c8a1SJohn Marino 			ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1368f5b1c8a1SJohn Marino 			++n;
1369f5b1c8a1SJohn Marino 		}
1370f5b1c8a1SJohn Marino 	}
1371f5b1c8a1SJohn Marino 
1372f5b1c8a1SJohn Marino 	ctx->mres = n;
1373f5b1c8a1SJohn Marino 	return 0;
1374f5b1c8a1SJohn Marino }
1375f5b1c8a1SJohn Marino 
1376f5b1c8a1SJohn Marino int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1377f5b1c8a1SJohn Marino 		const unsigned char *in, unsigned char *out,
1378f5b1c8a1SJohn Marino 		size_t len,ctr128_f stream)
1379f5b1c8a1SJohn Marino {
1380f5b1c8a1SJohn Marino 	unsigned int n, ctr;
1381f5b1c8a1SJohn Marino 	size_t i;
1382f5b1c8a1SJohn Marino 	u64   mlen = ctx->len.u[1];
1383f5b1c8a1SJohn Marino 	void *key  = ctx->key;
1384f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
1385f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1386f5b1c8a1SJohn Marino # ifdef GHASH
1387f5b1c8a1SJohn Marino 	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1388f5b1c8a1SJohn Marino 				const u8 *inp,size_t len)	= ctx->ghash;
1389f5b1c8a1SJohn Marino # endif
1390f5b1c8a1SJohn Marino #endif
1391f5b1c8a1SJohn Marino 
1392f5b1c8a1SJohn Marino 	mlen += len;
1393f5b1c8a1SJohn Marino 	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1394f5b1c8a1SJohn Marino 		return -1;
1395f5b1c8a1SJohn Marino 	ctx->len.u[1] = mlen;
1396f5b1c8a1SJohn Marino 
1397f5b1c8a1SJohn Marino 	if (ctx->ares) {
1398f5b1c8a1SJohn Marino 		/* First call to decrypt finalizes GHASH(AAD) */
1399f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Xi);
1400f5b1c8a1SJohn Marino 		ctx->ares = 0;
1401f5b1c8a1SJohn Marino 	}
1402f5b1c8a1SJohn Marino 
140372c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1404f5b1c8a1SJohn Marino #ifdef BSWAP4
1405f5b1c8a1SJohn Marino 	ctr = BSWAP4(ctx->Yi.d[3]);
1406f5b1c8a1SJohn Marino #else
1407f5b1c8a1SJohn Marino 	ctr = GETU32(ctx->Yi.c+12);
1408f5b1c8a1SJohn Marino #endif
140972c33676SMaxim Ag #else /* BIG_ENDIAN */
1410f5b1c8a1SJohn Marino 	ctr = ctx->Yi.d[3];
141172c33676SMaxim Ag #endif
1412f5b1c8a1SJohn Marino 
1413f5b1c8a1SJohn Marino 	n = ctx->mres;
1414f5b1c8a1SJohn Marino 	if (n) {
1415f5b1c8a1SJohn Marino 		while (n && len) {
1416f5b1c8a1SJohn Marino 			u8 c = *(in++);
1417f5b1c8a1SJohn Marino 			*(out++) = c^ctx->EKi.c[n];
1418f5b1c8a1SJohn Marino 			ctx->Xi.c[n] ^= c;
1419f5b1c8a1SJohn Marino 			--len;
1420f5b1c8a1SJohn Marino 			n = (n+1)%16;
1421f5b1c8a1SJohn Marino 		}
1422f5b1c8a1SJohn Marino 		if (n==0) GCM_MUL (ctx,Xi);
1423f5b1c8a1SJohn Marino 		else {
1424f5b1c8a1SJohn Marino 			ctx->mres = n;
1425f5b1c8a1SJohn Marino 			return 0;
1426f5b1c8a1SJohn Marino 		}
1427f5b1c8a1SJohn Marino 	}
1428f5b1c8a1SJohn Marino #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1429f5b1c8a1SJohn Marino 	while (len>=GHASH_CHUNK) {
1430f5b1c8a1SJohn Marino 		GHASH(ctx,in,GHASH_CHUNK);
1431f5b1c8a1SJohn Marino 		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1432f5b1c8a1SJohn Marino 		ctr += GHASH_CHUNK/16;
143372c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1434f5b1c8a1SJohn Marino #ifdef BSWAP4
1435f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = BSWAP4(ctr);
1436f5b1c8a1SJohn Marino #else
1437f5b1c8a1SJohn Marino 		PUTU32(ctx->Yi.c+12,ctr);
1438f5b1c8a1SJohn Marino #endif
143972c33676SMaxim Ag #else /* BIG_ENDIAN */
1440f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = ctr;
144172c33676SMaxim Ag #endif
1442f5b1c8a1SJohn Marino 		out += GHASH_CHUNK;
1443f5b1c8a1SJohn Marino 		in  += GHASH_CHUNK;
1444f5b1c8a1SJohn Marino 		len -= GHASH_CHUNK;
1445f5b1c8a1SJohn Marino 	}
1446f5b1c8a1SJohn Marino #endif
1447f5b1c8a1SJohn Marino 	if ((i = (len&(size_t)-16))) {
1448f5b1c8a1SJohn Marino 		size_t j=i/16;
1449f5b1c8a1SJohn Marino 
1450f5b1c8a1SJohn Marino #if defined(GHASH)
1451f5b1c8a1SJohn Marino 		GHASH(ctx,in,i);
1452f5b1c8a1SJohn Marino #else
1453f5b1c8a1SJohn Marino 		while (j--) {
1454f5b1c8a1SJohn Marino 			size_t k;
1455f5b1c8a1SJohn Marino 			for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1456f5b1c8a1SJohn Marino 			GCM_MUL(ctx,Xi);
1457f5b1c8a1SJohn Marino 			in += 16;
1458f5b1c8a1SJohn Marino 		}
1459f5b1c8a1SJohn Marino 		j   = i/16;
1460f5b1c8a1SJohn Marino 		in -= i;
1461f5b1c8a1SJohn Marino #endif
1462f5b1c8a1SJohn Marino 		(*stream)(in,out,j,key,ctx->Yi.c);
1463f5b1c8a1SJohn Marino 		ctr += (unsigned int)j;
146472c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1465f5b1c8a1SJohn Marino #ifdef BSWAP4
1466f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = BSWAP4(ctr);
1467f5b1c8a1SJohn Marino #else
1468f5b1c8a1SJohn Marino 		PUTU32(ctx->Yi.c+12,ctr);
1469f5b1c8a1SJohn Marino #endif
147072c33676SMaxim Ag #else /* BIG_ENDIAN */
1471f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = ctr;
147272c33676SMaxim Ag #endif
1473f5b1c8a1SJohn Marino 		out += i;
1474f5b1c8a1SJohn Marino 		in  += i;
1475f5b1c8a1SJohn Marino 		len -= i;
1476f5b1c8a1SJohn Marino 	}
1477f5b1c8a1SJohn Marino 	if (len) {
1478f5b1c8a1SJohn Marino 		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1479f5b1c8a1SJohn Marino 		++ctr;
148072c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1481f5b1c8a1SJohn Marino #ifdef BSWAP4
1482f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = BSWAP4(ctr);
1483f5b1c8a1SJohn Marino #else
1484f5b1c8a1SJohn Marino 		PUTU32(ctx->Yi.c+12,ctr);
1485f5b1c8a1SJohn Marino #endif
148672c33676SMaxim Ag #else /* BIG_ENDIAN */
1487f5b1c8a1SJohn Marino 		ctx->Yi.d[3] = ctr;
148872c33676SMaxim Ag #endif
1489f5b1c8a1SJohn Marino 		while (len--) {
1490f5b1c8a1SJohn Marino 			u8 c = in[n];
1491f5b1c8a1SJohn Marino 			ctx->Xi.c[n] ^= c;
1492f5b1c8a1SJohn Marino 			out[n] = c^ctx->EKi.c[n];
1493f5b1c8a1SJohn Marino 			++n;
1494f5b1c8a1SJohn Marino 		}
1495f5b1c8a1SJohn Marino 	}
1496f5b1c8a1SJohn Marino 
1497f5b1c8a1SJohn Marino 	ctx->mres = n;
1498f5b1c8a1SJohn Marino 	return 0;
1499f5b1c8a1SJohn Marino }
1500f5b1c8a1SJohn Marino 
1501f5b1c8a1SJohn Marino int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1502f5b1c8a1SJohn Marino 			size_t len)
1503f5b1c8a1SJohn Marino {
1504f5b1c8a1SJohn Marino 	u64 alen = ctx->len.u[0]<<3;
1505f5b1c8a1SJohn Marino 	u64 clen = ctx->len.u[1]<<3;
1506f5b1c8a1SJohn Marino #ifdef GCM_FUNCREF_4BIT
1507f5b1c8a1SJohn Marino 	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1508f5b1c8a1SJohn Marino #endif
1509f5b1c8a1SJohn Marino 
1510f5b1c8a1SJohn Marino 	if (ctx->mres || ctx->ares)
1511f5b1c8a1SJohn Marino 		GCM_MUL(ctx,Xi);
1512f5b1c8a1SJohn Marino 
151372c33676SMaxim Ag #if BYTE_ORDER == LITTLE_ENDIAN
1514f5b1c8a1SJohn Marino #ifdef BSWAP8
1515f5b1c8a1SJohn Marino 	alen = BSWAP8(alen);
1516f5b1c8a1SJohn Marino 	clen = BSWAP8(clen);
1517f5b1c8a1SJohn Marino #else
151872c33676SMaxim Ag 	{
1519f5b1c8a1SJohn Marino 		u8 *p = ctx->len.c;
1520f5b1c8a1SJohn Marino 
1521f5b1c8a1SJohn Marino 		ctx->len.u[0] = alen;
1522f5b1c8a1SJohn Marino 		ctx->len.u[1] = clen;
1523f5b1c8a1SJohn Marino 
1524f5b1c8a1SJohn Marino 		alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1525f5b1c8a1SJohn Marino 		clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1526f5b1c8a1SJohn Marino 	}
152772c33676SMaxim Ag #endif
152872c33676SMaxim Ag #endif
1529f5b1c8a1SJohn Marino 
1530f5b1c8a1SJohn Marino 	ctx->Xi.u[0] ^= alen;
1531f5b1c8a1SJohn Marino 	ctx->Xi.u[1] ^= clen;
1532f5b1c8a1SJohn Marino 	GCM_MUL(ctx,Xi);
1533f5b1c8a1SJohn Marino 
1534f5b1c8a1SJohn Marino 	ctx->Xi.u[0] ^= ctx->EK0.u[0];
1535f5b1c8a1SJohn Marino 	ctx->Xi.u[1] ^= ctx->EK0.u[1];
1536f5b1c8a1SJohn Marino 
1537f5b1c8a1SJohn Marino 	if (tag && len<=sizeof(ctx->Xi))
1538f5b1c8a1SJohn Marino 		return memcmp(ctx->Xi.c,tag,len);
1539f5b1c8a1SJohn Marino 	else
1540f5b1c8a1SJohn Marino 		return -1;
1541f5b1c8a1SJohn Marino }
1542f5b1c8a1SJohn Marino 
1543f5b1c8a1SJohn Marino void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1544f5b1c8a1SJohn Marino {
1545f5b1c8a1SJohn Marino 	CRYPTO_gcm128_finish(ctx, NULL, 0);
1546f5b1c8a1SJohn Marino 	memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1547f5b1c8a1SJohn Marino }
1548f5b1c8a1SJohn Marino 
1549f5b1c8a1SJohn Marino GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1550f5b1c8a1SJohn Marino {
1551f5b1c8a1SJohn Marino 	GCM128_CONTEXT *ret;
1552f5b1c8a1SJohn Marino 
1553f5b1c8a1SJohn Marino 	if ((ret = malloc(sizeof(GCM128_CONTEXT))))
1554f5b1c8a1SJohn Marino 		CRYPTO_gcm128_init(ret,key,block);
1555f5b1c8a1SJohn Marino 
1556f5b1c8a1SJohn Marino 	return ret;
1557f5b1c8a1SJohn Marino }
1558f5b1c8a1SJohn Marino 
1559f5b1c8a1SJohn Marino void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1560f5b1c8a1SJohn Marino {
156172c33676SMaxim Ag 	freezero(ctx, sizeof(*ctx));
1562f5b1c8a1SJohn Marino }
1563