1C arm/aes-encrypt-internal.asm
2
3ifelse(<
4   Copyright (C) 2013 Niels Möller
5
6   This file is part of GNU Nettle.
7
8   GNU Nettle is free software: you can redistribute it and/or
9   modify it under the terms of either:
10
11     * the GNU Lesser General Public License as published by the Free
12       Software Foundation; either version 3 of the License, or (at your
13       option) any later version.
14
15   or
16
17     * the GNU General Public License as published by the Free
18       Software Foundation; either version 2 of the License, or (at your
19       option) any later version.
20
21   or both in parallel, as here.
22
23   GNU Nettle is distributed in the hope that it will be useful,
24   but WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received copies of the GNU General Public License and
29   the GNU Lesser General Public License along with this program.  If
30   not, see http://www.gnu.org/licenses/.
31>)
32
33include_src(<arm/aes.m4>)
34
35C	Benchmarked at at 725, 815, 990 cycles/block on cortex A9,
36C	for 128, 192 and 256 bit key sizes.
37
38C	Possible improvements: More efficient load and store with
39C	aligned accesses. Better scheduling.
40
41define(<PARAM_ROUNDS>, <r0>)
42define(<PARAM_KEYS>, <r1>)
43define(<TABLE>, <r2>)
44define(<PARAM_LENGTH>, <r3>)
45C On stack: DST, SRC
46
47define(<W0>, <r4>)
48define(<W1>, <r5>)
49define(<W2>, <r6>)
50define(<W3>, <r7>)
51define(<T0>, <r8>)
52define(<COUNT>, <r10>)
53define(<KEY>, <r11>)
54
55define(<MASK>, <r0>)	C Overlaps inputs, except TABLE
56define(<X0>, <r1>)
57define(<X1>, <r3>)
58define(<X2>, <r12>)
59define(<X3>, <r14>)	C lr
60
61define(<FRAME_ROUNDS>,  <[sp]>)
62define(<FRAME_KEYS>,  <[sp, #+4]>)
63define(<FRAME_LENGTH>,  <[sp, #+8]>)
64C 8 saved registers
65define(<FRAME_DST>,  <[sp, #+44]>)
66define(<FRAME_SRC>,  <[sp, #+48]>)
67
68
69C AES_ENCRYPT_ROUND(x0,x1,x2,x3,w0,w1,w2,w3,key)
70C MASK should hold the constant 0x3fc.
71define(<AES_ENCRYPT_ROUND>, <
72
73	and	T0, MASK, $1, lsl #2
74	ldr	$5, [TABLE, T0]
75	and	T0, MASK, $2, lsl #2
76	ldr	$6, [TABLE, T0]
77	and	T0, MASK, $3, lsl #2
78	ldr	$7, [TABLE, T0]
79	and	T0, MASK, $4, lsl #2
80	ldr	$8, [TABLE, T0]
81
82	and	T0, MASK, $2, ror #6
83	add	TABLE, TABLE, #1024
84	ldr	T0, [TABLE, T0]
85	eor	$5, $5, T0
86	and	T0, MASK, $3, ror #6
87	ldr	T0, [TABLE, T0]
88	eor	$6, $6, T0
89	and	T0, MASK, $4, ror #6
90	ldr	T0, [TABLE, T0]
91	eor	$7, $7, T0
92	and	T0, MASK, $1, ror #6
93	ldr	T0, [TABLE, T0]
94	eor	$8, $8, T0
95
96	and	T0, MASK, $3, ror #14
97	add	TABLE, TABLE, #1024
98	ldr	T0, [TABLE, T0]
99	eor	$5, $5, T0
100	and	T0, MASK, $4, ror #14
101	ldr	T0, [TABLE, T0]
102	eor	$6, $6, T0
103	and	T0, MASK, $1, ror #14
104	ldr	T0, [TABLE, T0]
105	eor	$7, $7, T0
106	and	T0, MASK, $2, ror #14
107	ldr	T0, [TABLE, T0]
108	eor	$8, $8, T0
109
110	and	T0, MASK, $4, ror #22
111	add	TABLE, TABLE, #1024
112	ldr	T0, [TABLE, T0]
113	eor	$5, $5, T0
114	and	T0, MASK, $1, ror #22
115	ldr	T0, [TABLE, T0]
116	eor	$6, $6, T0
117	and	T0, MASK, $2, ror #22
118	ldr	T0, [TABLE, T0]
119	eor	$7, $7, T0
120	and	T0, MASK, $3, ror #22
121	ldr	T0, [TABLE, T0]
122
123	ldm	$9!, {$1,$2,$3,$4}
124	eor	$8, $8, T0
125	sub	TABLE, TABLE, #3072
126	eor	$5, $5, $1
127	eor	$6, $6, $2
128	eor	$7, $7, $3
129	eor	$8, $8, $4
130>)
131
132	.file "aes-encrypt-internal.asm"
133
134	C _aes_encrypt(unsigned rounds, const uint32_t *keys,
135	C	       const struct aes_table *T,
136	C	       size_t length, uint8_t *dst,
137	C	       uint8_t *src)
138	.text
139	ALIGN(4)
140PROLOGUE(_nettle_aes_encrypt)
141	teq	PARAM_LENGTH, #0
142	beq	.Lend
143
144	push	{r0,r1,r3, r4,r5,r6,r7,r8,r10,r11,lr}
145	mov	MASK, #0x3fc
146	ALIGN(16)
147.Lblock_loop:
148	ldr	X0, FRAME_SRC		C Use X0 as SRC pointer
149	ldm	sp, {COUNT, KEY}
150
151	AES_LOAD(X0,KEY,W0)
152	AES_LOAD(X0,KEY,W1)
153	AES_LOAD(X0,KEY,W2)
154	AES_LOAD(X0,KEY,W3)
155
156	str	X0, FRAME_SRC
157
158	add	TABLE, TABLE, #AES_TABLE0
159
160	b	.Lentry
161	ALIGN(16)
162.Lround_loop:
163	C	Transform X -> W
164	AES_ENCRYPT_ROUND(X0, X1, X2, X3, W0, W1, W2, W3, KEY)
165
166.Lentry:
167	subs	COUNT, COUNT,#2
168	C	Transform W -> X
169	AES_ENCRYPT_ROUND(W0, W1, W2, W3, X0, X1, X2, X3, KEY)
170
171	bne	.Lround_loop
172
173	lsr	COUNT, MASK, #2	C Put the needed mask in the unused COUNT register
174	sub	TABLE, TABLE, #AES_TABLE0
175	C	Final round
176	AES_FINAL_ROUND_V5(X0, X1, X2, X3, KEY, W0, COUNT)
177	AES_FINAL_ROUND_V5(X1, X2, X3, X0, KEY, W1, COUNT)
178	AES_FINAL_ROUND_V5(X2, X3, X0, X1, KEY, W2, COUNT)
179	AES_FINAL_ROUND_V5(X3, X0, X1, X2, KEY, W3, COUNT)
180
181	ldr	X0, FRAME_DST
182	ldr	X1, FRAME_LENGTH
183
184	AES_STORE(X0,W0)
185	AES_STORE(X0,W1)
186	AES_STORE(X0,W2)
187	AES_STORE(X0,W3)
188
189	subs	X1, X1, #16
190	str	X0, FRAME_DST
191	str	X1, FRAME_LENGTH
192
193	bhi	.Lblock_loop
194
195	add	sp, sp, #12	C Drop saved r0, r1, r3
196	pop	{r4,r5,r6,r7,r8,r10,r11,pc}
197
198.Lend:
199	bx	lr
200EPILOGUE(_nettle_aes_encrypt)
201