1 /*
2  * Copyright (c) 2012-2013 Vincent Hanquez <vincent@snarc.org>
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the author nor the names of his contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_block)32 void SIZED(cryptonite_aesni_encrypt_block)(aes_block *out, aes_key *key, aes_block *in)
33 {
34 	__m128i *k = (__m128i *) key->data;
35 	PRELOAD_ENC(k);
36 	__m128i m = _mm_loadu_si128((__m128i *) in);
37 	DO_ENC_BLOCK(m);
38 	_mm_storeu_si128((__m128i *) out, m);
39 }
40 
41 TARGET_AESNI
SIZED(cryptonite_aesni_decrypt_block)42 void SIZED(cryptonite_aesni_decrypt_block)(aes_block *out, aes_key *key, aes_block *in)
43 {
44 	__m128i *k = (__m128i *) key->data;
45 	PRELOAD_DEC(k);
46 	__m128i m = _mm_loadu_si128((__m128i *) in);
47 	DO_DEC_BLOCK(m);
48 	_mm_storeu_si128((__m128i *) out, m);
49 }
50 
51 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_ecb)52 void SIZED(cryptonite_aesni_encrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks)
53 {
54 	__m128i *k = (__m128i *) key->data;
55 
56 	PRELOAD_ENC(k);
57 	for (; blocks-- > 0; in += 1, out += 1) {
58 		__m128i m = _mm_loadu_si128((__m128i *) in);
59 		DO_ENC_BLOCK(m);
60 		_mm_storeu_si128((__m128i *) out, m);
61 	}
62 }
63 
64 TARGET_AESNI
SIZED(cryptonite_aesni_decrypt_ecb)65 void SIZED(cryptonite_aesni_decrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks)
66 {
67 	__m128i *k = (__m128i *) key->data;
68 
69 	PRELOAD_DEC(k);
70 
71 	for (; blocks-- > 0; in += 1, out += 1) {
72 		__m128i m = _mm_loadu_si128((__m128i *) in);
73 		DO_DEC_BLOCK(m);
74 		_mm_storeu_si128((__m128i *) out, m);
75 	}
76 }
77 
78 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_cbc)79 void SIZED(cryptonite_aesni_encrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks)
80 {
81 	__m128i *k = (__m128i *) key->data;
82 	__m128i iv = _mm_loadu_si128((__m128i *) _iv);
83 
84 	PRELOAD_ENC(k);
85 
86 	for (; blocks-- > 0; in += 1, out += 1) {
87 		__m128i m = _mm_loadu_si128((__m128i *) in);
88 		m = _mm_xor_si128(m, iv);
89 		DO_ENC_BLOCK(m);
90 		iv = m;
91 		_mm_storeu_si128((__m128i *) out, m);
92 	}
93 }
94 
95 TARGET_AESNI
SIZED(cryptonite_aesni_decrypt_cbc)96 void SIZED(cryptonite_aesni_decrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks)
97 {
98 	__m128i *k = (__m128i *) key->data;
99 	__m128i iv = _mm_loadu_si128((__m128i *) _iv);
100 
101 	PRELOAD_DEC(k);
102 
103 	for (; blocks-- > 0; in += 1, out += 1) {
104 		__m128i m = _mm_loadu_si128((__m128i *) in);
105 		__m128i ivnext = m;
106 
107 		DO_DEC_BLOCK(m);
108 		m = _mm_xor_si128(m, iv);
109 
110 		_mm_storeu_si128((__m128i *) out, m);
111 		iv = ivnext;
112 	}
113 }
114 
115 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_ctr)116 void SIZED(cryptonite_aesni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len)
117 {
118 	__m128i *k = (__m128i *) key->data;
119 	__m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8);
120 	__m128i one        = _mm_set_epi32(0,1,0,0);
121 	uint32_t nb_blocks = len / 16;
122 	uint32_t part_block_len = len % 16;
123 
124 	/* get the IV in little endian format */
125 	__m128i iv = _mm_loadu_si128((__m128i *) _iv);
126 	iv = _mm_shuffle_epi8(iv, bswap_mask);
127 
128 	PRELOAD_ENC(k);
129 
130 	for (; nb_blocks-- > 0; output += 16, input += 16) {
131 		/* put back the iv in big endian mode,
132 		 * encrypt it and and xor it the input block
133 		 */
134 		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
135 		DO_ENC_BLOCK(tmp);
136 		__m128i m = _mm_loadu_si128((__m128i *) input);
137 		m = _mm_xor_si128(m, tmp);
138 
139 		_mm_storeu_si128((__m128i *) output, m);
140 		/* iv += 1 */
141 		iv = _mm_add_epi64(iv, one);
142 	}
143 
144 	if (part_block_len != 0) {
145 		aes_block block;
146 		memset(&block.b, 0, 16);
147 		memcpy(&block.b, input, part_block_len);
148 
149 		__m128i m = _mm_loadu_si128((__m128i *) &block);
150 		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
151 
152 		DO_ENC_BLOCK(tmp);
153 		m = _mm_xor_si128(m, tmp);
154 		_mm_storeu_si128((__m128i *) &block.b, m);
155 		memcpy(output, &block.b, part_block_len);
156 	}
157 
158 	return ;
159 }
160 
161 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_c32_)162 void SIZED(cryptonite_aesni_encrypt_c32_)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len)
163 {
164 	__m128i *k = (__m128i *) key->data;
165 	__m128i one        = _mm_set_epi32(0,0,0,1);
166 	uint32_t nb_blocks = len / 16;
167 	uint32_t part_block_len = len % 16;
168 
169 	/* get the IV */
170 	__m128i iv = _mm_loadu_si128((__m128i *) _iv);
171 
172 	PRELOAD_ENC(k);
173 
174 	for (; nb_blocks-- > 0; output += 16, input += 16) {
175 		/* encrypt the iv and and xor it the input block */
176 		__m128i tmp = iv;
177 		DO_ENC_BLOCK(tmp);
178 		__m128i m = _mm_loadu_si128((__m128i *) input);
179 		m = _mm_xor_si128(m, tmp);
180 
181 		_mm_storeu_si128((__m128i *) output, m);
182 		/* iv += 1 */
183 		iv = _mm_add_epi32(iv, one);
184 	}
185 
186 	if (part_block_len != 0) {
187 		aes_block block;
188 		memset(&block.b, 0, 16);
189 		memcpy(&block.b, input, part_block_len);
190 
191 		__m128i m = _mm_loadu_si128((__m128i *) &block);
192 		__m128i tmp = iv;
193 
194 		DO_ENC_BLOCK(tmp);
195 		m = _mm_xor_si128(m, tmp);
196 		_mm_storeu_si128((__m128i *) &block.b, m);
197 		memcpy(output, &block.b, part_block_len);
198 	}
199 
200 	return ;
201 }
202 
203 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_xts)204 void SIZED(cryptonite_aesni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key *key2,
205                                aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks)
206 {
207 	__m128i tweak = _mm_loadu_si128((__m128i *) _tweak);
208 
209 	do {
210 		__m128i *k2 = (__m128i *) key2->data;
211 		PRELOAD_ENC(k2);
212 		DO_ENC_BLOCK(tweak);
213 
214 		while (spoint-- > 0)
215 			tweak = gfmulx(tweak);
216 	} while (0) ;
217 
218 	do {
219 		__m128i *k1 = (__m128i *) key1->data;
220 		PRELOAD_ENC(k1);
221 
222 		for ( ; blocks-- > 0; in += 1, out += 1, tweak = gfmulx(tweak)) {
223 			__m128i m = _mm_loadu_si128((__m128i *) in);
224 
225 			m = _mm_xor_si128(m, tweak);
226 			DO_ENC_BLOCK(m);
227 			m = _mm_xor_si128(m, tweak);
228 
229 			_mm_storeu_si128((__m128i *) out, m);
230 		}
231 	} while (0);
232 }
233 
234 TARGET_AESNI
SIZED(cryptonite_aesni_gcm_encrypt)235 void SIZED(cryptonite_aesni_gcm_encrypt)(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length)
236 {
237 	__m128i *k = (__m128i *) key->data;
238 	__m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8);
239 	__m128i one        = _mm_set_epi32(0,1,0,0);
240 	uint32_t nb_blocks = length / 16;
241 	uint32_t part_block_len = length % 16;
242 
243 	gcm->length_input += length;
244 
245 	__m128i tag = _mm_loadu_si128((__m128i *) &gcm->tag);
246 	__m128i iv = _mm_loadu_si128((__m128i *) &gcm->civ);
247 	iv = _mm_shuffle_epi8(iv, bswap_mask);
248 
249 	PRELOAD_ENC(k);
250 
251 	for (; nb_blocks-- > 0; output += 16, input += 16) {
252 		/* iv += 1 */
253 		iv = _mm_add_epi32(iv, one);
254 
255 		/* put back iv in big endian, encrypt it,
256 		 * and xor it to input */
257 		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
258 		DO_ENC_BLOCK(tmp);
259 		__m128i m = _mm_loadu_si128((__m128i *) input);
260 		m = _mm_xor_si128(m, tmp);
261 
262 		tag = ghash_add(tag, gcm->htable, m);
263 
264 		/* store it out */
265 		_mm_storeu_si128((__m128i *) output, m);
266 	}
267 	if (part_block_len > 0) {
268 		__m128i mask;
269 		aes_block block;
270 		/* FIXME could do something a bit more clever (slli & sub & and maybe) ... */
271 		switch (part_block_len) {
272 		case 1: mask = _mm_setr_epi8(0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
273 		case 2: mask = _mm_setr_epi8(0,1,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
274 		case 3: mask = _mm_setr_epi8(0,1,2,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
275 		case 4: mask = _mm_setr_epi8(0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
276 		case 5: mask = _mm_setr_epi8(0,1,2,3,4,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
277 		case 6: mask = _mm_setr_epi8(0,1,2,3,4,5,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
278 		case 7: mask = _mm_setr_epi8(0,1,2,3,4,5,6,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
279 		case 8: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
280 		case 9: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
281 		case 10: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,0x80,0x80,0x80,0x80,0x80,0x80); break;
282 		case 11: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,0x80,0x80,0x80,0x80,0x80); break;
283 		case 12: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,0x80,0x80,0x80,0x80); break;
284 		case 13: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,0x80,0x80,0x80); break;
285 		case 14: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,0x80,0x80); break;
286 		case 15: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0x80); break;
287 		default: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); break;
288 		}
289 		block128_zero(&block);
290 		block128_copy_bytes(&block, input, part_block_len);
291 
292 		/* iv += 1 */
293 		iv = _mm_add_epi32(iv, one);
294 
295 		/* put back iv in big endian mode, encrypt it and xor it with input */
296 		__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
297 		DO_ENC_BLOCK(tmp);
298 
299 		__m128i m = _mm_loadu_si128((__m128i *) &block);
300 		m = _mm_xor_si128(m, tmp);
301 		m = _mm_shuffle_epi8(m, mask);
302 
303 		tag = ghash_add(tag, gcm->htable, m);
304 
305 		/* make output */
306 		_mm_storeu_si128((__m128i *) &block.b, m);
307 		memcpy(output, &block.b, part_block_len);
308 	}
309 	/* store back IV & tag */
310 	__m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
311 	_mm_storeu_si128((__m128i *) &gcm->civ, tmp);
312 	_mm_storeu_si128((__m128i *) &gcm->tag, tag);
313 }
314