1C arm/ecc-384-modp.asm
2
3ifelse(<
4   Copyright (C) 2013 Niels Möller
5
6   This file is part of GNU Nettle.
7
8   GNU Nettle is free software: you can redistribute it and/or
9   modify it under the terms of either:
10
11     * the GNU Lesser General Public License as published by the Free
12       Software Foundation; either version 3 of the License, or (at your
13       option) any later version.
14
15   or
16
17     * the GNU General Public License as published by the Free
18       Software Foundation; either version 2 of the License, or (at your
19       option) any later version.
20
21   or both in parallel, as here.
22
23   GNU Nettle is distributed in the hope that it will be useful,
24   but WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received copies of the GNU General Public License and
29   the GNU Lesser General Public License along with this program.  If
30   not, see http://www.gnu.org/licenses/.
31>)
32
33	.file "ecc-384-modp.asm"
34	.arm
35
36define(<RP>, <r1>)
37define(<T0>, <r0>)
38define(<T1>, <r2>)
39define(<T2>, <r3>)
40define(<T3>, <r4>)
41define(<F0>, <r5>)
42define(<F1>, <r6>)
43define(<F2>, <r7>)
44define(<F3>, <r8>)
45define(<F4>, <r10>)
46define(<N>, <r12>)
47define(<H>, <lr>)
48
49	C ecc_384_modp (const struct ecc_modulo *m, mp_limb_t *rp)
50	.text
51	.align 2
52
53PROLOGUE(nettle_ecc_384_modp)
54	push	{r4,r5,r6,r7,r8,r10,lr}
55
56	add	RP, RP, #80
57	ldm	RP, {T0, T1, T2, T3}	C 20-23
58
59	C First get top 4 limbs, which need folding twice, as
60	C
61	C     T3 T2 T1 T0
62	C        T3 T2 T1
63	C             -T3
64	C ----------------
65	C  F4 F3 F2 F1 F0
66	C
67	C Start with
68	C
69	C   T3 T1 T0
70	C         T1
71	C        -T3
72	C -----------
73	C   F2 F1 F0   Always fits
74
75	adds	F0, T0, T1
76	adcs	F1, T1, #0
77	adcs	F2, T3, #0
78	subs	F0, F0, T3
79	sbcs	F1, F1, #0
80	sbcs	F2, F2, #0
81
82	C      T3 T2 T2  0
83	C         F2 F1 F0
84	C  ----------------
85	C   F4 F3 F2 F1 F0
86
87	mov	F4, #0
88	adds	F1, F1, T2
89	adcs	F2, F2, T2
90	adcs	F3, T3, #0
91	adcs	F4, F4, #0
92
93	C Add in to high part
94	sub	RP, RP, #32
95	ldm	RP, {T0, T1, T2, T3}	C 12-15
96	mov	H, #0
97	adds	F0, T0, F0
98	adcs	F1, T1, F1
99	adcs	F2, T2, F2
100	adcs	F3, T3, F3
101	adcs	F4, F4, #0			C Do F4 later
102
103	C Add to low part, keeping carry (positive or negative) in H
104	sub	RP, RP, #48
105	ldm	RP, {T0, T1, T2, T3}	C 0-3
106	mov	H, #0
107	adds	T0, T0, F0
108	adcs	T1, T1, F1
109	adcs	T2, T2, F2
110	adcs	T3, T3, F3
111	adc	H, H, #0
112	subs	T1, T1, F0
113	sbcs	T2, T2, F1
114	sbcs	T3, T3, F2
115	sbc	H, H, #0
116	adds	T3, T3, F0
117	adc	H, H, #0
118
119	stm	RP!, {T0,T1,T2,T3}	C 0-3
120	mov	N, #2
121.Loop:
122	ldm	RP, {T0,T1,T2,T3}	C 4-7
123
124	C First, propagate carry
125	adds	T0, T0, H
126	asr	H, #31		C Sign extend
127	adcs	T1, T1, H
128	adcs	T2, T2, H
129	adcs	T3, T3, H
130	adc	H, H, #0
131
132	C +B^4 term
133	adds	T0, T0, F0
134	adcs	T1, T1, F1
135	adcs	T2, T2, F2
136	adcs	T3, T3, F3
137	adc	H, H, #0
138
139	C +B^3 terms
140	ldr	F0, [RP, #+48]		C 16
141	adds	T0, T0, F1
142	adcs	T1, T1, F2
143	adcs	T2, T2, F3
144	adcs	T3, T3, F0
145	adc	H, H, #0
146
147	C -B
148	ldr	F1, [RP, #+52]		C 17-18
149	ldr	F2, [RP, #+56]
150	subs	T0, T0, F3
151	sbcs	T1, T1, F0
152	sbcs	T2, T2, F1
153	sbcs	T3, T3, F2
154	sbcs	H, H, #0
155
156	C +1
157	ldr	F3, [RP, #+60]		C 19
158	adds	T0, T0, F0
159	adcs	T1, T1, F1
160	adcs	T2, T2, F2
161	adcs	T3, T3, F3
162	adc	H, H, #0
163	subs	N, N, #1
164	stm	RP!, {T0,T1,T2,T3}
165	bne	.Loop
166
167	C Fold high limbs, we need to add in
168	C
169	C F4 F4 0 -F4 F4 H H 0 -H H
170	C
171	C We always have F4 >= 0, but we can have H < 0.
172	C Sign extension gets tricky when F4 = 0 and H < 0.
173	sub	RP, RP, #48
174
175	ldm	RP, {T0,T1,T2,T3}	C 0-3
176
177	C     H  H  0 -H  H
178	C  ----------------
179	C  S  H F3 F2 F1 F0
180	C
181	C Define S = H >> 31 (asr), we then have
182	C
183	C  F0 = H
184	C  F1 = S - H
185	C  F2 = - [H > 0]
186	C  F3 = H - [H > 0]
187	C   H = H + S
188	C
189	C And we get underflow in S - H iff H > 0
190
191	C				H = 0	H > 0	H = -1
192	mov	F0, H		C	0	H	-1
193	asr	H, #31
194	subs	F1, H, F0	C	0,C=1	-H,C=0	0,C=1
195	sbc	F2, F2, F2	C	0	-1	0
196	sbc	F3, F0, #0	C	0	H-1	-1
197
198	adds	T0, T0, F0
199	adcs	T1, T1, F1
200	adcs	T2, T2, F2
201	adcs	T3, T3, F3
202	adc	H, H, F0	C	0+cy	H+cy	-2+cy
203
204	stm	RP!, {T0,T1,T2,T3}	C 0-3
205	ldm	RP, {T0,T1,T2,T3}	C 4-7
206
207	C   F4  0 -F4
208	C ---------
209	C   F3 F2  F1
210
211	rsbs	F1, F4, #0
212	sbc	F2, F2, F2
213	sbc	F3, F4, #0
214
215	C Sign extend H
216	adds	F0, F4, H
217	asr	H, H, #31
218	adcs	F1, F1, H
219	adcs	F2, F2, H
220	adcs	F3, F3, H
221	adcs	F4, F4, H
222	adc	H, H, #0
223
224	adds	T0, T0, F0
225	adcs	T1, T1, F1
226	adcs	T2, T2, F2
227	adcs	T3, T3, F3
228
229	stm	RP!, {T0,T1,T2,T3}	C 4-7
230	ldm	RP, {T0,T1,T2,T3}	C 8-11
231
232	adcs	T0, T0, F4
233	adcs	T1, T1, H
234	adcs	T2, T2, H
235	adcs	T3, T3, H
236	adc	H, H, #0
237
238	stm	RP, {T0,T1,T2,T3}	C 8-11
239
240	C Final (unlikely) carry
241	sub	RP, RP, #32
242	ldm	RP, {T0,T1,T2,T3}	C 0-3
243	C Fold H into F0-F4
244	mov	F0, H
245	asr	H, #31
246	subs	F1, H, F0
247	sbc	F2, F2, F2
248	sbc	F3, F0, #0
249	add	F4, F0, H
250
251	adds	T0, T0, F0
252	adcs	T1, T1, F1
253	adcs	T2, T2, F2
254	adcs	T3, T3, F3
255
256	stm	RP!, {T0,T1,T2,T3}	C 0-3
257	ldm	RP, {T0,T1,T2,T3}	C 4-7
258	adcs	T0, T0, F4
259	adcs	T1, T1, H
260	adcs	T2, T2, H
261	adcs	T3, T3, H
262	stm	RP!, {T0,T1,T2,T3}	C 4-7
263	ldm	RP, {T0,T1,T2,T3}	C 8-11
264	adcs	T0, T0, H
265	adcs	T1, T1, H
266	adcs	T2, T2, H
267	adcs	T3, T3, H
268	stm	RP!, {T0,T1,T2,T3}	C 8-11
269	pop	{r4,r5,r6,r7,r8,r10,pc}
270EPILOGUE(nettle_ecc_384_modp)
271