1 #if defined(ED25519_GCC_64BIT_32BIT_CHOOSE)
2
3 #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
4
5 DONNA_NOINLINE static void
ge25519_scalarmult_base_choose_niels(ge25519_niels * t,const uint8_t table[256][96],uint32_t pos,signed char b)6 ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
7 int64_t breg = (int64_t)b;
8 uint64_t sign = (uint64_t)breg >> 63;
9 uint64_t mask = ~(sign - 1);
10 uint64_t u = (breg + mask) ^ mask;
11
12 __asm__ __volatile__ (
13 /* ysubx+xaddy+t2d */
14 "movq %0, %%rax ;\n"
15 "movd %%rax, %%xmm14 ;\n"
16 "pshufd $0x00, %%xmm14, %%xmm14 ;\n"
17 "pxor %%xmm0, %%xmm0 ;\n"
18 "pxor %%xmm1, %%xmm1 ;\n"
19 "pxor %%xmm2, %%xmm2 ;\n"
20 "pxor %%xmm3, %%xmm3 ;\n"
21 "pxor %%xmm4, %%xmm4 ;\n"
22 "pxor %%xmm5, %%xmm5 ;\n"
23
24 /* 0 */
25 "movq $0, %%rax ;\n"
26 "movd %%rax, %%xmm15 ;\n"
27 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
28 "pcmpeqd %%xmm14, %%xmm15 ;\n"
29 "movq $1, %%rax ;\n"
30 "movd %%rax, %%xmm6 ;\n"
31 "pxor %%xmm7, %%xmm7 ;\n"
32 "pand %%xmm15, %%xmm6 ;\n"
33 "pand %%xmm15, %%xmm7 ;\n"
34 "por %%xmm6, %%xmm0 ;\n"
35 "por %%xmm7, %%xmm1 ;\n"
36 "por %%xmm6, %%xmm2 ;\n"
37 "por %%xmm7, %%xmm3 ;\n"
38
39 /* 1 */
40 "movq $1, %%rax ;\n"
41 "movd %%rax, %%xmm15 ;\n"
42 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
43 "pcmpeqd %%xmm14, %%xmm15 ;\n"
44 "movdqa 0(%1), %%xmm6 ;\n"
45 "movdqa 16(%1), %%xmm7 ;\n"
46 "movdqa 32(%1), %%xmm8 ;\n"
47 "movdqa 48(%1), %%xmm9 ;\n"
48 "movdqa 64(%1), %%xmm10 ;\n"
49 "movdqa 80(%1), %%xmm11 ;\n"
50 "pand %%xmm15, %%xmm6 ;\n"
51 "pand %%xmm15, %%xmm7 ;\n"
52 "pand %%xmm15, %%xmm8 ;\n"
53 "pand %%xmm15, %%xmm9 ;\n"
54 "pand %%xmm15, %%xmm10 ;\n"
55 "pand %%xmm15, %%xmm11 ;\n"
56 "por %%xmm6, %%xmm0 ;\n"
57 "por %%xmm7, %%xmm1 ;\n"
58 "por %%xmm8, %%xmm2 ;\n"
59 "por %%xmm9, %%xmm3 ;\n"
60 "por %%xmm10, %%xmm4 ;\n"
61 "por %%xmm11, %%xmm5 ;\n"
62
63 /* 2 */
64 "movq $2, %%rax ;\n"
65 "movd %%rax, %%xmm15 ;\n"
66 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
67 "pcmpeqd %%xmm14, %%xmm15 ;\n"
68 "movdqa 96(%1), %%xmm6 ;\n"
69 "movdqa 112(%1), %%xmm7 ;\n"
70 "movdqa 128(%1), %%xmm8 ;\n"
71 "movdqa 144(%1), %%xmm9 ;\n"
72 "movdqa 160(%1), %%xmm10 ;\n"
73 "movdqa 176(%1), %%xmm11 ;\n"
74 "pand %%xmm15, %%xmm6 ;\n"
75 "pand %%xmm15, %%xmm7 ;\n"
76 "pand %%xmm15, %%xmm8 ;\n"
77 "pand %%xmm15, %%xmm9 ;\n"
78 "pand %%xmm15, %%xmm10 ;\n"
79 "pand %%xmm15, %%xmm11 ;\n"
80 "por %%xmm6, %%xmm0 ;\n"
81 "por %%xmm7, %%xmm1 ;\n"
82 "por %%xmm8, %%xmm2 ;\n"
83 "por %%xmm9, %%xmm3 ;\n"
84 "por %%xmm10, %%xmm4 ;\n"
85 "por %%xmm11, %%xmm5 ;\n"
86
87 /* 3 */
88 "movq $3, %%rax ;\n"
89 "movd %%rax, %%xmm15 ;\n"
90 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
91 "pcmpeqd %%xmm14, %%xmm15 ;\n"
92 "movdqa 192(%1), %%xmm6 ;\n"
93 "movdqa 208(%1), %%xmm7 ;\n"
94 "movdqa 224(%1), %%xmm8 ;\n"
95 "movdqa 240(%1), %%xmm9 ;\n"
96 "movdqa 256(%1), %%xmm10 ;\n"
97 "movdqa 272(%1), %%xmm11 ;\n"
98 "pand %%xmm15, %%xmm6 ;\n"
99 "pand %%xmm15, %%xmm7 ;\n"
100 "pand %%xmm15, %%xmm8 ;\n"
101 "pand %%xmm15, %%xmm9 ;\n"
102 "pand %%xmm15, %%xmm10 ;\n"
103 "pand %%xmm15, %%xmm11 ;\n"
104 "por %%xmm6, %%xmm0 ;\n"
105 "por %%xmm7, %%xmm1 ;\n"
106 "por %%xmm8, %%xmm2 ;\n"
107 "por %%xmm9, %%xmm3 ;\n"
108 "por %%xmm10, %%xmm4 ;\n"
109 "por %%xmm11, %%xmm5 ;\n"
110
111 /* 4 */
112 "movq $4, %%rax ;\n"
113 "movd %%rax, %%xmm15 ;\n"
114 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
115 "pcmpeqd %%xmm14, %%xmm15 ;\n"
116 "movdqa 288(%1), %%xmm6 ;\n"
117 "movdqa 304(%1), %%xmm7 ;\n"
118 "movdqa 320(%1), %%xmm8 ;\n"
119 "movdqa 336(%1), %%xmm9 ;\n"
120 "movdqa 352(%1), %%xmm10 ;\n"
121 "movdqa 368(%1), %%xmm11 ;\n"
122 "pand %%xmm15, %%xmm6 ;\n"
123 "pand %%xmm15, %%xmm7 ;\n"
124 "pand %%xmm15, %%xmm8 ;\n"
125 "pand %%xmm15, %%xmm9 ;\n"
126 "pand %%xmm15, %%xmm10 ;\n"
127 "pand %%xmm15, %%xmm11 ;\n"
128 "por %%xmm6, %%xmm0 ;\n"
129 "por %%xmm7, %%xmm1 ;\n"
130 "por %%xmm8, %%xmm2 ;\n"
131 "por %%xmm9, %%xmm3 ;\n"
132 "por %%xmm10, %%xmm4 ;\n"
133 "por %%xmm11, %%xmm5 ;\n"
134
135 /* 5 */
136 "movq $5, %%rax ;\n"
137 "movd %%rax, %%xmm15 ;\n"
138 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
139 "pcmpeqd %%xmm14, %%xmm15 ;\n"
140 "movdqa 384(%1), %%xmm6 ;\n"
141 "movdqa 400(%1), %%xmm7 ;\n"
142 "movdqa 416(%1), %%xmm8 ;\n"
143 "movdqa 432(%1), %%xmm9 ;\n"
144 "movdqa 448(%1), %%xmm10 ;\n"
145 "movdqa 464(%1), %%xmm11 ;\n"
146 "pand %%xmm15, %%xmm6 ;\n"
147 "pand %%xmm15, %%xmm7 ;\n"
148 "pand %%xmm15, %%xmm8 ;\n"
149 "pand %%xmm15, %%xmm9 ;\n"
150 "pand %%xmm15, %%xmm10 ;\n"
151 "pand %%xmm15, %%xmm11 ;\n"
152 "por %%xmm6, %%xmm0 ;\n"
153 "por %%xmm7, %%xmm1 ;\n"
154 "por %%xmm8, %%xmm2 ;\n"
155 "por %%xmm9, %%xmm3 ;\n"
156 "por %%xmm10, %%xmm4 ;\n"
157 "por %%xmm11, %%xmm5 ;\n"
158
159 /* 6 */
160 "movq $6, %%rax ;\n"
161 "movd %%rax, %%xmm15 ;\n"
162 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
163 "pcmpeqd %%xmm14, %%xmm15 ;\n"
164 "movdqa 480(%1), %%xmm6 ;\n"
165 "movdqa 496(%1), %%xmm7 ;\n"
166 "movdqa 512(%1), %%xmm8 ;\n"
167 "movdqa 528(%1), %%xmm9 ;\n"
168 "movdqa 544(%1), %%xmm10 ;\n"
169 "movdqa 560(%1), %%xmm11 ;\n"
170 "pand %%xmm15, %%xmm6 ;\n"
171 "pand %%xmm15, %%xmm7 ;\n"
172 "pand %%xmm15, %%xmm8 ;\n"
173 "pand %%xmm15, %%xmm9 ;\n"
174 "pand %%xmm15, %%xmm10 ;\n"
175 "pand %%xmm15, %%xmm11 ;\n"
176 "por %%xmm6, %%xmm0 ;\n"
177 "por %%xmm7, %%xmm1 ;\n"
178 "por %%xmm8, %%xmm2 ;\n"
179 "por %%xmm9, %%xmm3 ;\n"
180 "por %%xmm10, %%xmm4 ;\n"
181 "por %%xmm11, %%xmm5 ;\n"
182
183 /* 7 */
184 "movq $7, %%rax ;\n"
185 "movd %%rax, %%xmm15 ;\n"
186 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
187 "pcmpeqd %%xmm14, %%xmm15 ;\n"
188 "movdqa 576(%1), %%xmm6 ;\n"
189 "movdqa 592(%1), %%xmm7 ;\n"
190 "movdqa 608(%1), %%xmm8 ;\n"
191 "movdqa 624(%1), %%xmm9 ;\n"
192 "movdqa 640(%1), %%xmm10 ;\n"
193 "movdqa 656(%1), %%xmm11 ;\n"
194 "pand %%xmm15, %%xmm6 ;\n"
195 "pand %%xmm15, %%xmm7 ;\n"
196 "pand %%xmm15, %%xmm8 ;\n"
197 "pand %%xmm15, %%xmm9 ;\n"
198 "pand %%xmm15, %%xmm10 ;\n"
199 "pand %%xmm15, %%xmm11 ;\n"
200 "por %%xmm6, %%xmm0 ;\n"
201 "por %%xmm7, %%xmm1 ;\n"
202 "por %%xmm8, %%xmm2 ;\n"
203 "por %%xmm9, %%xmm3 ;\n"
204 "por %%xmm10, %%xmm4 ;\n"
205 "por %%xmm11, %%xmm5 ;\n"
206
207 /* 8 */
208 "movq $8, %%rax ;\n"
209 "movd %%rax, %%xmm15 ;\n"
210 "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
211 "pcmpeqd %%xmm14, %%xmm15 ;\n"
212 "movdqa 672(%1), %%xmm6 ;\n"
213 "movdqa 688(%1), %%xmm7 ;\n"
214 "movdqa 704(%1), %%xmm8 ;\n"
215 "movdqa 720(%1), %%xmm9 ;\n"
216 "movdqa 736(%1), %%xmm10 ;\n"
217 "movdqa 752(%1), %%xmm11 ;\n"
218 "pand %%xmm15, %%xmm6 ;\n"
219 "pand %%xmm15, %%xmm7 ;\n"
220 "pand %%xmm15, %%xmm8 ;\n"
221 "pand %%xmm15, %%xmm9 ;\n"
222 "pand %%xmm15, %%xmm10 ;\n"
223 "pand %%xmm15, %%xmm11 ;\n"
224 "por %%xmm6, %%xmm0 ;\n"
225 "por %%xmm7, %%xmm1 ;\n"
226 "por %%xmm8, %%xmm2 ;\n"
227 "por %%xmm9, %%xmm3 ;\n"
228 "por %%xmm10, %%xmm4 ;\n"
229 "por %%xmm11, %%xmm5 ;\n"
230
231 /* conditionally swap ysubx and xaddy */
232 "movq %3, %%rax ;\n"
233 "xorq $1, %%rax ;\n"
234 "movd %%rax, %%xmm14 ;\n"
235 "pxor %%xmm15, %%xmm15 ;\n"
236 "pshufd $0x00, %%xmm14, %%xmm14 ;\n"
237 "pxor %%xmm0, %%xmm2 ;\n"
238 "pxor %%xmm1, %%xmm3 ;\n"
239 "pcmpeqd %%xmm14, %%xmm15 ;\n"
240 "movdqa %%xmm2, %%xmm6 ;\n"
241 "movdqa %%xmm3, %%xmm7 ;\n"
242 "pand %%xmm15, %%xmm6 ;\n"
243 "pand %%xmm15, %%xmm7 ;\n"
244 "pxor %%xmm6, %%xmm0 ;\n"
245 "pxor %%xmm7, %%xmm1 ;\n"
246 "pxor %%xmm0, %%xmm2 ;\n"
247 "pxor %%xmm1, %%xmm3 ;\n"
248
249 /* store ysubx */
250 "xorq %%rax, %%rax ;\n"
251 "movd %%xmm0, %%rcx ;\n"
252 "movd %%xmm0, %%r8 ;\n"
253 "movd %%xmm1, %%rsi ;\n"
254 "pshufd $0xee, %%xmm0, %%xmm0 ;\n"
255 "pshufd $0xee, %%xmm1, %%xmm1 ;\n"
256 "movd %%xmm0, %%rdx ;\n"
257 "movd %%xmm1, %%rdi ;\n"
258 "shrdq $51, %%rdx, %%r8 ;\n"
259 "shrdq $38, %%rsi, %%rdx ;\n"
260 "shrdq $25, %%rdi, %%rsi ;\n"
261 "shrq $12, %%rdi ;\n"
262 "movq %%rcx, %%r9 ;\n"
263 "movq %%r8, %%r10 ;\n"
264 "movq %%rdx, %%r11 ;\n"
265 "movq %%rsi, %%r12 ;\n"
266 "movq %%rdi, %%r13 ;\n"
267 "shrq $26, %%r9 ;\n"
268 "shrq $26, %%r10 ;\n"
269 "shrq $26, %%r11 ;\n"
270 "shrq $26, %%r12 ;\n"
271 "shrq $26, %%r13 ;\n"
272 "andl $0x3ffffff, %%ecx ;\n"
273 "andl $0x1ffffff, %%r9d ;\n"
274 "andl $0x3ffffff, %%r8d ;\n"
275 "andl $0x1ffffff, %%r10d ;\n"
276 "andl $0x3ffffff, %%edx ;\n"
277 "andl $0x1ffffff, %%r11d ;\n"
278 "andl $0x3ffffff, %%esi ;\n"
279 "andl $0x1ffffff, %%r12d ;\n"
280 "andl $0x3ffffff, %%edi ;\n"
281 "andl $0x1ffffff, %%r13d ;\n"
282 "movl %%ecx, 0(%2) ;\n"
283 "movl %%r9d, 4(%2) ;\n"
284 "movl %%r8d, 8(%2) ;\n"
285 "movl %%r10d, 12(%2) ;\n"
286 "movl %%edx, 16(%2) ;\n"
287 "movl %%r11d, 20(%2) ;\n"
288 "movl %%esi, 24(%2) ;\n"
289 "movl %%r12d, 28(%2) ;\n"
290 "movl %%edi, 32(%2) ;\n"
291 "movl %%r13d, 36(%2) ;\n"
292
293 /* store xaddy */
294 "movd %%xmm2, %%rcx ;\n"
295 "movd %%xmm2, %%r8 ;\n"
296 "movd %%xmm3, %%rsi ;\n"
297 "pshufd $0xee, %%xmm2, %%xmm2 ;\n"
298 "pshufd $0xee, %%xmm3, %%xmm3 ;\n"
299 "movd %%xmm2, %%rdx ;\n"
300 "movd %%xmm3, %%rdi ;\n"
301 "shrdq $51, %%rdx, %%r8 ;\n"
302 "shrdq $38, %%rsi, %%rdx ;\n"
303 "shrdq $25, %%rdi, %%rsi ;\n"
304 "shrq $12, %%rdi ;\n"
305 "movq %%rcx, %%r9 ;\n"
306 "movq %%r8, %%r10 ;\n"
307 "movq %%rdx, %%r11 ;\n"
308 "movq %%rsi, %%r12 ;\n"
309 "movq %%rdi, %%r13 ;\n"
310 "shrq $26, %%r9 ;\n"
311 "shrq $26, %%r10 ;\n"
312 "shrq $26, %%r11 ;\n"
313 "shrq $26, %%r12 ;\n"
314 "shrq $26, %%r13 ;\n"
315 "andl $0x3ffffff, %%ecx ;\n"
316 "andl $0x1ffffff, %%r9d ;\n"
317 "andl $0x3ffffff, %%r8d ;\n"
318 "andl $0x1ffffff, %%r10d ;\n"
319 "andl $0x3ffffff, %%edx ;\n"
320 "andl $0x1ffffff, %%r11d ;\n"
321 "andl $0x3ffffff, %%esi ;\n"
322 "andl $0x1ffffff, %%r12d ;\n"
323 "andl $0x3ffffff, %%edi ;\n"
324 "andl $0x1ffffff, %%r13d ;\n"
325 "movl %%ecx, 40(%2) ;\n"
326 "movl %%r9d, 44(%2) ;\n"
327 "movl %%r8d, 48(%2) ;\n"
328 "movl %%r10d, 52(%2) ;\n"
329 "movl %%edx, 56(%2) ;\n"
330 "movl %%r11d, 60(%2) ;\n"
331 "movl %%esi, 64(%2) ;\n"
332 "movl %%r12d, 68(%2) ;\n"
333 "movl %%edi, 72(%2) ;\n"
334 "movl %%r13d, 76(%2) ;\n"
335
336 /* extract t2d */
337 "xorq %%rax, %%rax ;\n"
338 "movd %%xmm4, %%rcx ;\n"
339 "movd %%xmm4, %%r8 ;\n"
340 "movd %%xmm5, %%rsi ;\n"
341 "pshufd $0xee, %%xmm4, %%xmm4 ;\n"
342 "pshufd $0xee, %%xmm5, %%xmm5 ;\n"
343 "movd %%xmm4, %%rdx ;\n"
344 "movd %%xmm5, %%rdi ;\n"
345 "shrdq $51, %%rdx, %%r8 ;\n"
346 "shrdq $38, %%rsi, %%rdx ;\n"
347 "shrdq $25, %%rdi, %%rsi ;\n"
348 "shrq $12, %%rdi ;\n"
349 "movq %%rcx, %%r9 ;\n"
350 "movq %%r8, %%r10 ;\n"
351 "movq %%rdx, %%r11 ;\n"
352 "movq %%rsi, %%r12 ;\n"
353 "movq %%rdi, %%r13 ;\n"
354 "shrq $26, %%r9 ;\n"
355 "shrq $26, %%r10 ;\n"
356 "shrq $26, %%r11 ;\n"
357 "shrq $26, %%r12 ;\n"
358 "shrq $26, %%r13 ;\n"
359 "andl $0x3ffffff, %%ecx ;\n"
360 "andl $0x1ffffff, %%r9d ;\n"
361 "andl $0x3ffffff, %%r8d ;\n"
362 "andl $0x1ffffff, %%r10d ;\n"
363 "andl $0x3ffffff, %%edx ;\n"
364 "andl $0x1ffffff, %%r11d ;\n"
365 "andl $0x3ffffff, %%esi ;\n"
366 "andl $0x1ffffff, %%r12d ;\n"
367 "andl $0x3ffffff, %%edi ;\n"
368 "andl $0x1ffffff, %%r13d ;\n"
369 "movd %%ecx, %%xmm0 ;\n"
370 "movd %%r9d, %%xmm4 ;\n"
371 "movd %%r8d, %%xmm8 ;\n"
372 "movd %%r10d, %%xmm3 ;\n"
373 "movd %%edx, %%xmm1 ;\n"
374 "movd %%r11d, %%xmm5 ;\n"
375 "movd %%esi, %%xmm6 ;\n"
376 "movd %%r12d, %%xmm7 ;\n"
377 "movd %%edi, %%xmm2 ;\n"
378 "movd %%r13d, %%xmm9 ;\n"
379 "punpckldq %%xmm4, %%xmm0 ;\n"
380 "punpckldq %%xmm3, %%xmm8 ;\n"
381 "punpckldq %%xmm5, %%xmm1 ;\n"
382 "punpckldq %%xmm7, %%xmm6 ;\n"
383 "punpckldq %%xmm9, %%xmm2 ;\n"
384 "punpcklqdq %%xmm8, %%xmm0 ;\n"
385 "punpcklqdq %%xmm6, %%xmm1 ;\n"
386
387 /* set up 2p in to 3/4 */
388 "movl $0x7ffffda, %%ecx ;\n"
389 "movl $0x3fffffe, %%edx ;\n"
390 "movl $0x7fffffe, %%eax ;\n"
391 "movd %%ecx, %%xmm3 ;\n"
392 "movd %%edx, %%xmm5 ;\n"
393 "movd %%eax, %%xmm4 ;\n"
394 "punpckldq %%xmm5, %%xmm3 ;\n"
395 "punpckldq %%xmm5, %%xmm4 ;\n"
396 "punpcklqdq %%xmm4, %%xmm3 ;\n"
397 "movdqa %%xmm4, %%xmm5 ;\n"
398 "punpcklqdq %%xmm4, %%xmm4 ;\n"
399
400 /* subtract and conditionally move */
401 "movl %3, %%ecx ;\n"
402 "sub $1, %%ecx ;\n"
403 "movd %%ecx, %%xmm6 ;\n"
404 "pshufd $0x00, %%xmm6, %%xmm6 ;\n"
405 "movdqa %%xmm6, %%xmm7 ;\n"
406 "psubd %%xmm0, %%xmm3 ;\n"
407 "psubd %%xmm1, %%xmm4 ;\n"
408 "psubd %%xmm2, %%xmm5 ;\n"
409 "pand %%xmm6, %%xmm0 ;\n"
410 "pand %%xmm6, %%xmm1 ;\n"
411 "pand %%xmm6, %%xmm2 ;\n"
412 "pandn %%xmm3, %%xmm6 ;\n"
413 "movdqa %%xmm7, %%xmm3 ;\n"
414 "pandn %%xmm4, %%xmm7 ;\n"
415 "pandn %%xmm5, %%xmm3 ;\n"
416 "por %%xmm6, %%xmm0 ;\n"
417 "por %%xmm7, %%xmm1 ;\n"
418 "por %%xmm3, %%xmm2 ;\n"
419
420 /* store t2d */
421 "movdqa %%xmm0, 80(%2) ;\n"
422 "movdqa %%xmm1, 96(%2) ;\n"
423 "movd %%xmm2, %%rax ;\n"
424 "movq %%rax, 112(%2) ;\n"
425 :
426 : "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
427 :
428 "%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13",
429 "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14",
430 "cc", "memory"
431 );
432 }
433
434 #endif /* defined(ED25519_GCC_64BIT_32BIT_CHOOSE) */
435
436