1 /*
2  * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #define BR_POWER_ASM_MACROS   1
26 #include "inner.h"
27 
28 #if BR_POWER8
29 
30 /* see bearssl_block.h */
31 void
32 br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
33 	const void *key, size_t len)
34 {
35 	ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
36 	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
37 }
38 
39 static void
40 cbcenc_128(const unsigned char *sk,
41 	const unsigned char *iv, unsigned char *buf, size_t len)
42 {
43 	long cc;
44 
45 #if BR_POWER8_LE
46 	static const uint32_t idx2be[] = {
47 		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
48 	};
49 #endif
50 
51 	cc = 0;
52 	asm volatile (
53 
54 		/*
55 		 * Load subkeys into v0..v10
56 		 */
57 		lxvw4x(32, %[cc], %[sk])
58 		addi(%[cc], %[cc], 16)
59 		lxvw4x(33, %[cc], %[sk])
60 		addi(%[cc], %[cc], 16)
61 		lxvw4x(34, %[cc], %[sk])
62 		addi(%[cc], %[cc], 16)
63 		lxvw4x(35, %[cc], %[sk])
64 		addi(%[cc], %[cc], 16)
65 		lxvw4x(36, %[cc], %[sk])
66 		addi(%[cc], %[cc], 16)
67 		lxvw4x(37, %[cc], %[sk])
68 		addi(%[cc], %[cc], 16)
69 		lxvw4x(38, %[cc], %[sk])
70 		addi(%[cc], %[cc], 16)
71 		lxvw4x(39, %[cc], %[sk])
72 		addi(%[cc], %[cc], 16)
73 		lxvw4x(40, %[cc], %[sk])
74 		addi(%[cc], %[cc], 16)
75 		lxvw4x(41, %[cc], %[sk])
76 		addi(%[cc], %[cc], 16)
77 		lxvw4x(42, %[cc], %[sk])
78 
79 #if BR_POWER8_LE
80 		/*
81 		 * v15 = constant for byteswapping words
82 		 */
83 		lxvw4x(47, 0, %[idx2be])
84 #endif
85 		/*
86 		 * Load IV into v16.
87 		 */
88 		lxvw4x(48, 0, %[iv])
89 #if BR_POWER8_LE
90 		vperm(16, 16, 16, 15)
91 #endif
92 
93 		mtctr(%[num_blocks])
94 	label(loop)
95 		/*
96 		 * Load next plaintext word and XOR with current IV.
97 		 */
98 		lxvw4x(49, 0, %[buf])
99 #if BR_POWER8_LE
100 		vperm(17, 17, 17, 15)
101 #endif
102 		vxor(16, 16, 17)
103 
104 		/*
105 		 * Encrypt the block.
106 		 */
107 		vxor(16, 16, 0)
108 		vcipher(16, 16, 1)
109 		vcipher(16, 16, 2)
110 		vcipher(16, 16, 3)
111 		vcipher(16, 16, 4)
112 		vcipher(16, 16, 5)
113 		vcipher(16, 16, 6)
114 		vcipher(16, 16, 7)
115 		vcipher(16, 16, 8)
116 		vcipher(16, 16, 9)
117 		vcipherlast(16, 16, 10)
118 
119 		/*
120 		 * Store back result (with byteswap)
121 		 */
122 #if BR_POWER8_LE
123 		vperm(17, 16, 16, 15)
124 		stxvw4x(49, 0, %[buf])
125 #else
126 		stxvw4x(48, 0, %[buf])
127 #endif
128 		addi(%[buf], %[buf], 16)
129 
130 		bdnz(loop)
131 
132 : [cc] "+b" (cc), [buf] "+b" (buf)
133 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
134 #if BR_POWER8_LE
135 	, [idx2be] "b" (idx2be)
136 #endif
137 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
138   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
139   "ctr", "memory"
140 	);
141 }
142 
143 static void
144 cbcenc_192(const unsigned char *sk,
145 	const unsigned char *iv, unsigned char *buf, size_t len)
146 {
147 	long cc;
148 
149 #if BR_POWER8_LE
150 	static const uint32_t idx2be[] = {
151 		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
152 	};
153 #endif
154 
155 	cc = 0;
156 	asm volatile (
157 
158 		/*
159 		 * Load subkeys into v0..v12
160 		 */
161 		lxvw4x(32, %[cc], %[sk])
162 		addi(%[cc], %[cc], 16)
163 		lxvw4x(33, %[cc], %[sk])
164 		addi(%[cc], %[cc], 16)
165 		lxvw4x(34, %[cc], %[sk])
166 		addi(%[cc], %[cc], 16)
167 		lxvw4x(35, %[cc], %[sk])
168 		addi(%[cc], %[cc], 16)
169 		lxvw4x(36, %[cc], %[sk])
170 		addi(%[cc], %[cc], 16)
171 		lxvw4x(37, %[cc], %[sk])
172 		addi(%[cc], %[cc], 16)
173 		lxvw4x(38, %[cc], %[sk])
174 		addi(%[cc], %[cc], 16)
175 		lxvw4x(39, %[cc], %[sk])
176 		addi(%[cc], %[cc], 16)
177 		lxvw4x(40, %[cc], %[sk])
178 		addi(%[cc], %[cc], 16)
179 		lxvw4x(41, %[cc], %[sk])
180 		addi(%[cc], %[cc], 16)
181 		lxvw4x(42, %[cc], %[sk])
182 		addi(%[cc], %[cc], 16)
183 		lxvw4x(43, %[cc], %[sk])
184 		addi(%[cc], %[cc], 16)
185 		lxvw4x(44, %[cc], %[sk])
186 
187 #if BR_POWER8_LE
188 		/*
189 		 * v15 = constant for byteswapping words
190 		 */
191 		lxvw4x(47, 0, %[idx2be])
192 #endif
193 		/*
194 		 * Load IV into v16.
195 		 */
196 		lxvw4x(48, 0, %[iv])
197 #if BR_POWER8_LE
198 		vperm(16, 16, 16, 15)
199 #endif
200 
201 		mtctr(%[num_blocks])
202 	label(loop)
203 		/*
204 		 * Load next plaintext word and XOR with current IV.
205 		 */
206 		lxvw4x(49, 0, %[buf])
207 #if BR_POWER8_LE
208 		vperm(17, 17, 17, 15)
209 #endif
210 		vxor(16, 16, 17)
211 
212 		/*
213 		 * Encrypt the block.
214 		 */
215 		vxor(16, 16, 0)
216 		vcipher(16, 16, 1)
217 		vcipher(16, 16, 2)
218 		vcipher(16, 16, 3)
219 		vcipher(16, 16, 4)
220 		vcipher(16, 16, 5)
221 		vcipher(16, 16, 6)
222 		vcipher(16, 16, 7)
223 		vcipher(16, 16, 8)
224 		vcipher(16, 16, 9)
225 		vcipher(16, 16, 10)
226 		vcipher(16, 16, 11)
227 		vcipherlast(16, 16, 12)
228 
229 		/*
230 		 * Store back result (with byteswap)
231 		 */
232 #if BR_POWER8_LE
233 		vperm(17, 16, 16, 15)
234 		stxvw4x(49, 0, %[buf])
235 #else
236 		stxvw4x(48, 0, %[buf])
237 #endif
238 		addi(%[buf], %[buf], 16)
239 
240 		bdnz(loop)
241 
242 : [cc] "+b" (cc), [buf] "+b" (buf)
243 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
244 #if BR_POWER8_LE
245 	, [idx2be] "b" (idx2be)
246 #endif
247 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
248   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
249   "ctr", "memory"
250 	);
251 }
252 
253 static void
254 cbcenc_256(const unsigned char *sk,
255 	const unsigned char *iv, unsigned char *buf, size_t len)
256 {
257 	long cc;
258 
259 #if BR_POWER8_LE
260 	static const uint32_t idx2be[] = {
261 		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
262 	};
263 #endif
264 
265 	cc = 0;
266 	asm volatile (
267 
268 		/*
269 		 * Load subkeys into v0..v14
270 		 */
271 		lxvw4x(32, %[cc], %[sk])
272 		addi(%[cc], %[cc], 16)
273 		lxvw4x(33, %[cc], %[sk])
274 		addi(%[cc], %[cc], 16)
275 		lxvw4x(34, %[cc], %[sk])
276 		addi(%[cc], %[cc], 16)
277 		lxvw4x(35, %[cc], %[sk])
278 		addi(%[cc], %[cc], 16)
279 		lxvw4x(36, %[cc], %[sk])
280 		addi(%[cc], %[cc], 16)
281 		lxvw4x(37, %[cc], %[sk])
282 		addi(%[cc], %[cc], 16)
283 		lxvw4x(38, %[cc], %[sk])
284 		addi(%[cc], %[cc], 16)
285 		lxvw4x(39, %[cc], %[sk])
286 		addi(%[cc], %[cc], 16)
287 		lxvw4x(40, %[cc], %[sk])
288 		addi(%[cc], %[cc], 16)
289 		lxvw4x(41, %[cc], %[sk])
290 		addi(%[cc], %[cc], 16)
291 		lxvw4x(42, %[cc], %[sk])
292 		addi(%[cc], %[cc], 16)
293 		lxvw4x(43, %[cc], %[sk])
294 		addi(%[cc], %[cc], 16)
295 		lxvw4x(44, %[cc], %[sk])
296 		addi(%[cc], %[cc], 16)
297 		lxvw4x(45, %[cc], %[sk])
298 		addi(%[cc], %[cc], 16)
299 		lxvw4x(46, %[cc], %[sk])
300 
301 #if BR_POWER8_LE
302 		/*
303 		 * v15 = constant for byteswapping words
304 		 */
305 		lxvw4x(47, 0, %[idx2be])
306 #endif
307 		/*
308 		 * Load IV into v16.
309 		 */
310 		lxvw4x(48, 0, %[iv])
311 #if BR_POWER8_LE
312 		vperm(16, 16, 16, 15)
313 #endif
314 
315 		mtctr(%[num_blocks])
316 	label(loop)
317 		/*
318 		 * Load next plaintext word and XOR with current IV.
319 		 */
320 		lxvw4x(49, 0, %[buf])
321 #if BR_POWER8_LE
322 		vperm(17, 17, 17, 15)
323 #endif
324 		vxor(16, 16, 17)
325 
326 		/*
327 		 * Encrypt the block.
328 		 */
329 		vxor(16, 16, 0)
330 		vcipher(16, 16, 1)
331 		vcipher(16, 16, 2)
332 		vcipher(16, 16, 3)
333 		vcipher(16, 16, 4)
334 		vcipher(16, 16, 5)
335 		vcipher(16, 16, 6)
336 		vcipher(16, 16, 7)
337 		vcipher(16, 16, 8)
338 		vcipher(16, 16, 9)
339 		vcipher(16, 16, 10)
340 		vcipher(16, 16, 11)
341 		vcipher(16, 16, 12)
342 		vcipher(16, 16, 13)
343 		vcipherlast(16, 16, 14)
344 
345 		/*
346 		 * Store back result (with byteswap)
347 		 */
348 #if BR_POWER8_LE
349 		vperm(17, 16, 16, 15)
350 		stxvw4x(49, 0, %[buf])
351 #else
352 		stxvw4x(48, 0, %[buf])
353 #endif
354 		addi(%[buf], %[buf], 16)
355 
356 		bdnz(loop)
357 
358 : [cc] "+b" (cc), [buf] "+b" (buf)
359 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
360 #if BR_POWER8_LE
361 	, [idx2be] "b" (idx2be)
362 #endif
363 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
364   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
365   "ctr", "memory"
366 	);
367 }
368 
369 /* see bearssl_block.h */
370 void
371 br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
372 	void *iv, void *data, size_t len)
373 {
374 	if (len > 0) {
375 		switch (ctx->num_rounds) {
376 		case 10:
377 			cbcenc_128(ctx->skey.skni, iv, data, len);
378 			break;
379 		case 12:
380 			cbcenc_192(ctx->skey.skni, iv, data, len);
381 			break;
382 		default:
383 			cbcenc_256(ctx->skey.skni, iv, data, len);
384 			break;
385 		}
386 		memcpy(iv, (unsigned char *)data + (len - 16), 16);
387 	}
388 }
389 
390 /* see bearssl_block.h */
391 const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
392 	sizeof(br_aes_pwr8_cbcenc_keys),
393 	16,
394 	4,
395 	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
396 		&br_aes_pwr8_cbcenc_init,
397 	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
398 		&br_aes_pwr8_cbcenc_run
399 };
400 
401 /* see bearssl_block.h */
402 const br_block_cbcenc_class *
403 br_aes_pwr8_cbcenc_get_vtable(void)
404 {
405 	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
406 }
407 
408 #else
409 
410 /* see bearssl_block.h */
411 const br_block_cbcenc_class *
412 br_aes_pwr8_cbcenc_get_vtable(void)
413 {
414 	return NULL;
415 }
416 
417 #endif
418