1 /*
2 * Copyright (c) 2012-2013 Vincent Hanquez <vincent@snarc.org>
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the author nor the names of his contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_block)32 void SIZED(cryptonite_aesni_encrypt_block)(aes_block *out, aes_key *key, aes_block *in)
33 {
34 __m128i *k = (__m128i *) key->data;
35 PRELOAD_ENC(k);
36 __m128i m = _mm_loadu_si128((__m128i *) in);
37 DO_ENC_BLOCK(m);
38 _mm_storeu_si128((__m128i *) out, m);
39 }
40
41 TARGET_AESNI
SIZED(cryptonite_aesni_decrypt_block)42 void SIZED(cryptonite_aesni_decrypt_block)(aes_block *out, aes_key *key, aes_block *in)
43 {
44 __m128i *k = (__m128i *) key->data;
45 PRELOAD_DEC(k);
46 __m128i m = _mm_loadu_si128((__m128i *) in);
47 DO_DEC_BLOCK(m);
48 _mm_storeu_si128((__m128i *) out, m);
49 }
50
51 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_ecb)52 void SIZED(cryptonite_aesni_encrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks)
53 {
54 __m128i *k = (__m128i *) key->data;
55
56 PRELOAD_ENC(k);
57 for (; blocks-- > 0; in += 1, out += 1) {
58 __m128i m = _mm_loadu_si128((__m128i *) in);
59 DO_ENC_BLOCK(m);
60 _mm_storeu_si128((__m128i *) out, m);
61 }
62 }
63
64 TARGET_AESNI
SIZED(cryptonite_aesni_decrypt_ecb)65 void SIZED(cryptonite_aesni_decrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks)
66 {
67 __m128i *k = (__m128i *) key->data;
68
69 PRELOAD_DEC(k);
70
71 for (; blocks-- > 0; in += 1, out += 1) {
72 __m128i m = _mm_loadu_si128((__m128i *) in);
73 DO_DEC_BLOCK(m);
74 _mm_storeu_si128((__m128i *) out, m);
75 }
76 }
77
78 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_cbc)79 void SIZED(cryptonite_aesni_encrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks)
80 {
81 __m128i *k = (__m128i *) key->data;
82 __m128i iv = _mm_loadu_si128((__m128i *) _iv);
83
84 PRELOAD_ENC(k);
85
86 for (; blocks-- > 0; in += 1, out += 1) {
87 __m128i m = _mm_loadu_si128((__m128i *) in);
88 m = _mm_xor_si128(m, iv);
89 DO_ENC_BLOCK(m);
90 iv = m;
91 _mm_storeu_si128((__m128i *) out, m);
92 }
93 }
94
95 TARGET_AESNI
SIZED(cryptonite_aesni_decrypt_cbc)96 void SIZED(cryptonite_aesni_decrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks)
97 {
98 __m128i *k = (__m128i *) key->data;
99 __m128i iv = _mm_loadu_si128((__m128i *) _iv);
100
101 PRELOAD_DEC(k);
102
103 for (; blocks-- > 0; in += 1, out += 1) {
104 __m128i m = _mm_loadu_si128((__m128i *) in);
105 __m128i ivnext = m;
106
107 DO_DEC_BLOCK(m);
108 m = _mm_xor_si128(m, iv);
109
110 _mm_storeu_si128((__m128i *) out, m);
111 iv = ivnext;
112 }
113 }
114
115 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_ctr)116 void SIZED(cryptonite_aesni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len)
117 {
118 __m128i *k = (__m128i *) key->data;
119 __m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8);
120 __m128i one = _mm_set_epi32(0,1,0,0);
121 uint32_t nb_blocks = len / 16;
122 uint32_t part_block_len = len % 16;
123
124 /* get the IV in little endian format */
125 __m128i iv = _mm_loadu_si128((__m128i *) _iv);
126 iv = _mm_shuffle_epi8(iv, bswap_mask);
127
128 PRELOAD_ENC(k);
129
130 for (; nb_blocks-- > 0; output += 16, input += 16) {
131 /* put back the iv in big endian mode,
132 * encrypt it and and xor it the input block
133 */
134 __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
135 DO_ENC_BLOCK(tmp);
136 __m128i m = _mm_loadu_si128((__m128i *) input);
137 m = _mm_xor_si128(m, tmp);
138
139 _mm_storeu_si128((__m128i *) output, m);
140 /* iv += 1 */
141 iv = _mm_add_epi64(iv, one);
142 }
143
144 if (part_block_len != 0) {
145 aes_block block;
146 memset(&block.b, 0, 16);
147 memcpy(&block.b, input, part_block_len);
148
149 __m128i m = _mm_loadu_si128((__m128i *) &block);
150 __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
151
152 DO_ENC_BLOCK(tmp);
153 m = _mm_xor_si128(m, tmp);
154 _mm_storeu_si128((__m128i *) &block.b, m);
155 memcpy(output, &block.b, part_block_len);
156 }
157
158 return ;
159 }
160
161 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_c32_)162 void SIZED(cryptonite_aesni_encrypt_c32_)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len)
163 {
164 __m128i *k = (__m128i *) key->data;
165 __m128i one = _mm_set_epi32(0,0,0,1);
166 uint32_t nb_blocks = len / 16;
167 uint32_t part_block_len = len % 16;
168
169 /* get the IV */
170 __m128i iv = _mm_loadu_si128((__m128i *) _iv);
171
172 PRELOAD_ENC(k);
173
174 for (; nb_blocks-- > 0; output += 16, input += 16) {
175 /* encrypt the iv and and xor it the input block */
176 __m128i tmp = iv;
177 DO_ENC_BLOCK(tmp);
178 __m128i m = _mm_loadu_si128((__m128i *) input);
179 m = _mm_xor_si128(m, tmp);
180
181 _mm_storeu_si128((__m128i *) output, m);
182 /* iv += 1 */
183 iv = _mm_add_epi32(iv, one);
184 }
185
186 if (part_block_len != 0) {
187 aes_block block;
188 memset(&block.b, 0, 16);
189 memcpy(&block.b, input, part_block_len);
190
191 __m128i m = _mm_loadu_si128((__m128i *) &block);
192 __m128i tmp = iv;
193
194 DO_ENC_BLOCK(tmp);
195 m = _mm_xor_si128(m, tmp);
196 _mm_storeu_si128((__m128i *) &block.b, m);
197 memcpy(output, &block.b, part_block_len);
198 }
199
200 return ;
201 }
202
203 TARGET_AESNI
SIZED(cryptonite_aesni_encrypt_xts)204 void SIZED(cryptonite_aesni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key *key2,
205 aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks)
206 {
207 __m128i tweak = _mm_loadu_si128((__m128i *) _tweak);
208
209 do {
210 __m128i *k2 = (__m128i *) key2->data;
211 PRELOAD_ENC(k2);
212 DO_ENC_BLOCK(tweak);
213
214 while (spoint-- > 0)
215 tweak = gfmulx(tweak);
216 } while (0) ;
217
218 do {
219 __m128i *k1 = (__m128i *) key1->data;
220 PRELOAD_ENC(k1);
221
222 for ( ; blocks-- > 0; in += 1, out += 1, tweak = gfmulx(tweak)) {
223 __m128i m = _mm_loadu_si128((__m128i *) in);
224
225 m = _mm_xor_si128(m, tweak);
226 DO_ENC_BLOCK(m);
227 m = _mm_xor_si128(m, tweak);
228
229 _mm_storeu_si128((__m128i *) out, m);
230 }
231 } while (0);
232 }
233
234 TARGET_AESNI
SIZED(cryptonite_aesni_gcm_encrypt)235 void SIZED(cryptonite_aesni_gcm_encrypt)(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length)
236 {
237 __m128i *k = (__m128i *) key->data;
238 __m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8);
239 __m128i one = _mm_set_epi32(0,1,0,0);
240 uint32_t nb_blocks = length / 16;
241 uint32_t part_block_len = length % 16;
242
243 gcm->length_input += length;
244
245 __m128i tag = _mm_loadu_si128((__m128i *) &gcm->tag);
246 __m128i iv = _mm_loadu_si128((__m128i *) &gcm->civ);
247 iv = _mm_shuffle_epi8(iv, bswap_mask);
248
249 PRELOAD_ENC(k);
250
251 for (; nb_blocks-- > 0; output += 16, input += 16) {
252 /* iv += 1 */
253 iv = _mm_add_epi32(iv, one);
254
255 /* put back iv in big endian, encrypt it,
256 * and xor it to input */
257 __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
258 DO_ENC_BLOCK(tmp);
259 __m128i m = _mm_loadu_si128((__m128i *) input);
260 m = _mm_xor_si128(m, tmp);
261
262 tag = ghash_add(tag, gcm->htable, m);
263
264 /* store it out */
265 _mm_storeu_si128((__m128i *) output, m);
266 }
267 if (part_block_len > 0) {
268 __m128i mask;
269 aes_block block;
270 /* FIXME could do something a bit more clever (slli & sub & and maybe) ... */
271 switch (part_block_len) {
272 case 1: mask = _mm_setr_epi8(0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
273 case 2: mask = _mm_setr_epi8(0,1,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
274 case 3: mask = _mm_setr_epi8(0,1,2,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
275 case 4: mask = _mm_setr_epi8(0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
276 case 5: mask = _mm_setr_epi8(0,1,2,3,4,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
277 case 6: mask = _mm_setr_epi8(0,1,2,3,4,5,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
278 case 7: mask = _mm_setr_epi8(0,1,2,3,4,5,6,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
279 case 8: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
280 case 9: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break;
281 case 10: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,0x80,0x80,0x80,0x80,0x80,0x80); break;
282 case 11: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,0x80,0x80,0x80,0x80,0x80); break;
283 case 12: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,0x80,0x80,0x80,0x80); break;
284 case 13: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,0x80,0x80,0x80); break;
285 case 14: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,0x80,0x80); break;
286 case 15: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0x80); break;
287 default: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); break;
288 }
289 block128_zero(&block);
290 block128_copy_bytes(&block, input, part_block_len);
291
292 /* iv += 1 */
293 iv = _mm_add_epi32(iv, one);
294
295 /* put back iv in big endian mode, encrypt it and xor it with input */
296 __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
297 DO_ENC_BLOCK(tmp);
298
299 __m128i m = _mm_loadu_si128((__m128i *) &block);
300 m = _mm_xor_si128(m, tmp);
301 m = _mm_shuffle_epi8(m, mask);
302
303 tag = ghash_add(tag, gcm->htable, m);
304
305 /* make output */
306 _mm_storeu_si128((__m128i *) &block.b, m);
307 memcpy(output, &block.b, part_block_len);
308 }
309 /* store back IV & tag */
310 __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask);
311 _mm_storeu_si128((__m128i *) &gcm->civ, tmp);
312 _mm_storeu_si128((__m128i *) &gcm->tag, tag);
313 }
314