1 /*
2 * Lowest Level MPI Algorithms
3 * (C) 1999-2010 Jack Lloyd
4 * 2006 Luca Piccarreta
5 *
6 * Botan is released under the Simplified BSD License (see license.txt)
7 */
8
9 #ifndef BOTAN_MP_ASM_INTERNAL_H_
10 #define BOTAN_MP_ASM_INTERNAL_H_
11
12 #include <botan/internal/mp_madd.h>
13
14 namespace Botan {
15
16 #if defined(BOTAN_MP_USE_X86_32_ASM)
17
18 #define ADDSUB2_OP(OPERATION, INDEX) \
19 ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
20 ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
21
22 #define ADDSUB3_OP(OPERATION, INDEX) \
23 ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
24 ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
25 ASM("movl %[carry], 4*" #INDEX "(%[z])") \
26
27 #define LINMUL_OP(WRITE_TO, INDEX) \
28 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
29 ASM("mull %[y]") \
30 ASM("addl %[carry],%%eax") \
31 ASM("adcl $0,%%edx") \
32 ASM("movl %%edx,%[carry]") \
33 ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
34
35 #define MULADD_OP(IGNORED, INDEX) \
36 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
37 ASM("mull %[y]") \
38 ASM("addl %[carry],%%eax") \
39 ASM("adcl $0,%%edx") \
40 ASM("addl 4*" #INDEX "(%[z]),%%eax") \
41 ASM("adcl $0,%%edx") \
42 ASM("movl %%edx,%[carry]") \
43 ASM("movl %%eax, 4*" #INDEX " (%[z])")
44
45 #define ADD_OR_SUBTRACT(CORE_CODE) \
46 ASM("rorl %[carry]") \
47 CORE_CODE \
48 ASM("sbbl %[carry],%[carry]") \
49 ASM("negl %[carry]")
50
51 #elif defined(BOTAN_MP_USE_X86_64_ASM)
52
53 #define ADDSUB2_OP(OPERATION, INDEX) \
54 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
55 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
56
57 #define ADDSUB3_OP(OPERATION, INDEX) \
58 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
59 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
60 ASM("movq %[carry], 8*" #INDEX "(%[z])") \
61
62 #define LINMUL_OP(WRITE_TO, INDEX) \
63 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
64 ASM("mulq %[y]") \
65 ASM("addq %[carry],%%rax") \
66 ASM("adcq $0,%%rdx") \
67 ASM("movq %%rdx,%[carry]") \
68 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
69
70 #define MULADD_OP(IGNORED, INDEX) \
71 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
72 ASM("mulq %[y]") \
73 ASM("addq %[carry],%%rax") \
74 ASM("adcq $0,%%rdx") \
75 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
76 ASM("adcq $0,%%rdx") \
77 ASM("movq %%rdx,%[carry]") \
78 ASM("movq %%rax, 8*" #INDEX " (%[z])")
79
80 #define ADD_OR_SUBTRACT(CORE_CODE) \
81 ASM("rorq %[carry]") \
82 CORE_CODE \
83 ASM("sbbq %[carry],%[carry]") \
84 ASM("negq %[carry]")
85
86 #endif
87
88 #if defined(ADD_OR_SUBTRACT)
89
90 #define ASM(x) x "\n\t"
91
92 #define DO_8_TIMES(MACRO, ARG) \
93 MACRO(ARG, 0) \
94 MACRO(ARG, 1) \
95 MACRO(ARG, 2) \
96 MACRO(ARG, 3) \
97 MACRO(ARG, 4) \
98 MACRO(ARG, 5) \
99 MACRO(ARG, 6) \
100 MACRO(ARG, 7)
101
102 #endif
103
104 /*
105 * Word Addition
106 */
word_add(word x,word y,word * carry)107 inline word word_add(word x, word y, word* carry)
108 {
109 #if defined(BOTAN_MP_USE_X86_32_ASM)
110 asm(
111 ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
112 : [x]"=r"(x), [carry]"=r"(*carry)
113 : "0"(x), [y]"rm"(y), "1"(*carry)
114 : "cc");
115 return x;
116
117 #elif defined(BOTAN_MP_USE_X86_64_ASM)
118
119 asm(
120 ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
121 : [x]"=r"(x), [carry]"=r"(*carry)
122 : "0"(x), [y]"rm"(y), "1"(*carry)
123 : "cc");
124 return x;
125
126 #else
127 word z = x + y;
128 word c1 = (z < x);
129 z += *carry;
130 *carry = c1 | (z < *carry);
131 return z;
132 #endif
133 }
134
135 /*
136 * Eight Word Block Addition, Two Argument
137 */
word8_add2(word x[8],const word y[8],word carry)138 inline word word8_add2(word x[8], const word y[8], word carry)
139 {
140 #if defined(BOTAN_MP_USE_X86_32_ASM)
141 asm(
142 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
143 : [carry]"=r"(carry)
144 : [x]"r"(x), [y]"r"(y), "0"(carry)
145 : "cc", "memory");
146 return carry;
147
148 #elif defined(BOTAN_MP_USE_X86_64_ASM)
149
150 asm(
151 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
152 : [carry]"=r"(carry)
153 : [x]"r"(x), [y]"r"(y), "0"(carry)
154 : "cc", "memory");
155 return carry;
156
157 #else
158 x[0] = word_add(x[0], y[0], &carry);
159 x[1] = word_add(x[1], y[1], &carry);
160 x[2] = word_add(x[2], y[2], &carry);
161 x[3] = word_add(x[3], y[3], &carry);
162 x[4] = word_add(x[4], y[4], &carry);
163 x[5] = word_add(x[5], y[5], &carry);
164 x[6] = word_add(x[6], y[6], &carry);
165 x[7] = word_add(x[7], y[7], &carry);
166 return carry;
167 #endif
168 }
169
170 /*
171 * Eight Word Block Addition, Three Argument
172 */
word8_add3(word z[8],const word x[8],const word y[8],word carry)173 inline word word8_add3(word z[8], const word x[8],
174 const word y[8], word carry)
175 {
176 #if defined(BOTAN_MP_USE_X86_32_ASM)
177 asm(
178 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
179 : [carry]"=r"(carry)
180 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
181 : "cc", "memory");
182 return carry;
183
184 #elif defined(BOTAN_MP_USE_X86_64_ASM)
185
186 asm(
187 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
188 : [carry]"=r"(carry)
189 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
190 : "cc", "memory");
191 return carry;
192
193 #else
194 z[0] = word_add(x[0], y[0], &carry);
195 z[1] = word_add(x[1], y[1], &carry);
196 z[2] = word_add(x[2], y[2], &carry);
197 z[3] = word_add(x[3], y[3], &carry);
198 z[4] = word_add(x[4], y[4], &carry);
199 z[5] = word_add(x[5], y[5], &carry);
200 z[6] = word_add(x[6], y[6], &carry);
201 z[7] = word_add(x[7], y[7], &carry);
202 return carry;
203 #endif
204 }
205
206 /*
207 * Word Subtraction
208 */
word_sub(word x,word y,word * carry)209 inline word word_sub(word x, word y, word* carry)
210 {
211 #if defined(BOTAN_MP_USE_X86_32_ASM)
212 asm(
213 ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
214 : [x]"=r"(x), [carry]"=r"(*carry)
215 : "0"(x), [y]"rm"(y), "1"(*carry)
216 : "cc");
217 return x;
218
219 #elif defined(BOTAN_MP_USE_X86_64_ASM)
220
221 asm(
222 ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
223 : [x]"=r"(x), [carry]"=r"(*carry)
224 : "0"(x), [y]"rm"(y), "1"(*carry)
225 : "cc");
226 return x;
227
228 #else
229 word t0 = x - y;
230 word c1 = (t0 > x);
231 word z = t0 - *carry;
232 *carry = c1 | (z > t0);
233 return z;
234 #endif
235 }
236
237 /*
238 * Eight Word Block Subtraction, Two Argument
239 */
word8_sub2(word x[8],const word y[8],word carry)240 inline word word8_sub2(word x[8], const word y[8], word carry)
241 {
242 #if defined(BOTAN_MP_USE_X86_32_ASM)
243 asm(
244 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
245 : [carry]"=r"(carry)
246 : [x]"r"(x), [y]"r"(y), "0"(carry)
247 : "cc", "memory");
248 return carry;
249
250 #elif defined(BOTAN_MP_USE_X86_64_ASM)
251
252 asm(
253 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
254 : [carry]"=r"(carry)
255 : [x]"r"(x), [y]"r"(y), "0"(carry)
256 : "cc", "memory");
257 return carry;
258
259 #else
260 x[0] = word_sub(x[0], y[0], &carry);
261 x[1] = word_sub(x[1], y[1], &carry);
262 x[2] = word_sub(x[2], y[2], &carry);
263 x[3] = word_sub(x[3], y[3], &carry);
264 x[4] = word_sub(x[4], y[4], &carry);
265 x[5] = word_sub(x[5], y[5], &carry);
266 x[6] = word_sub(x[6], y[6], &carry);
267 x[7] = word_sub(x[7], y[7], &carry);
268 return carry;
269 #endif
270 }
271
272 /*
273 * Eight Word Block Subtraction, Two Argument
274 */
word8_sub2_rev(word x[8],const word y[8],word carry)275 inline word word8_sub2_rev(word x[8], const word y[8], word carry)
276 {
277 #if defined(BOTAN_MP_USE_X86_32_ASM)
278 asm(
279 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
280 : [carry]"=r"(carry)
281 : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
282 : "cc", "memory");
283 return carry;
284
285 #elif defined(BOTAN_MP_USE_X86_64_ASM)
286
287 asm(
288 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
289 : [carry]"=r"(carry)
290 : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
291 : "cc", "memory");
292 return carry;
293
294 #else
295 x[0] = word_sub(y[0], x[0], &carry);
296 x[1] = word_sub(y[1], x[1], &carry);
297 x[2] = word_sub(y[2], x[2], &carry);
298 x[3] = word_sub(y[3], x[3], &carry);
299 x[4] = word_sub(y[4], x[4], &carry);
300 x[5] = word_sub(y[5], x[5], &carry);
301 x[6] = word_sub(y[6], x[6], &carry);
302 x[7] = word_sub(y[7], x[7], &carry);
303 return carry;
304 #endif
305 }
306
307 /*
308 * Eight Word Block Subtraction, Three Argument
309 */
word8_sub3(word z[8],const word x[8],const word y[8],word carry)310 inline word word8_sub3(word z[8], const word x[8],
311 const word y[8], word carry)
312 {
313 #if defined(BOTAN_MP_USE_X86_32_ASM)
314 asm(
315 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
316 : [carry]"=r"(carry)
317 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
318 : "cc", "memory");
319 return carry;
320
321 #elif defined(BOTAN_MP_USE_X86_64_ASM)
322
323 asm(
324 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
325 : [carry]"=r"(carry)
326 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
327 : "cc", "memory");
328 return carry;
329
330 #else
331 z[0] = word_sub(x[0], y[0], &carry);
332 z[1] = word_sub(x[1], y[1], &carry);
333 z[2] = word_sub(x[2], y[2], &carry);
334 z[3] = word_sub(x[3], y[3], &carry);
335 z[4] = word_sub(x[4], y[4], &carry);
336 z[5] = word_sub(x[5], y[5], &carry);
337 z[6] = word_sub(x[6], y[6], &carry);
338 z[7] = word_sub(x[7], y[7], &carry);
339 return carry;
340 #endif
341 }
342
343 /*
344 * Eight Word Block Linear Multiplication
345 */
word8_linmul2(word x[8],word y,word carry)346 inline word word8_linmul2(word x[8], word y, word carry)
347 {
348 #if defined(BOTAN_MP_USE_X86_32_ASM)
349 asm(
350 DO_8_TIMES(LINMUL_OP, "x")
351 : [carry]"=r"(carry)
352 : [x]"r"(x), [y]"rm"(y), "0"(carry)
353 : "cc", "%eax", "%edx");
354 return carry;
355
356 #elif defined(BOTAN_MP_USE_X86_64_ASM)
357
358 asm(
359 DO_8_TIMES(LINMUL_OP, "x")
360 : [carry]"=r"(carry)
361 : [x]"r"(x), [y]"rm"(y), "0"(carry)
362 : "cc", "%rax", "%rdx");
363 return carry;
364
365 #else
366 x[0] = word_madd2(x[0], y, &carry);
367 x[1] = word_madd2(x[1], y, &carry);
368 x[2] = word_madd2(x[2], y, &carry);
369 x[3] = word_madd2(x[3], y, &carry);
370 x[4] = word_madd2(x[4], y, &carry);
371 x[5] = word_madd2(x[5], y, &carry);
372 x[6] = word_madd2(x[6], y, &carry);
373 x[7] = word_madd2(x[7], y, &carry);
374 return carry;
375 #endif
376 }
377
378 /*
379 * Eight Word Block Linear Multiplication
380 */
word8_linmul3(word z[8],const word x[8],word y,word carry)381 inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
382 {
383 #if defined(BOTAN_MP_USE_X86_32_ASM)
384 asm(
385 DO_8_TIMES(LINMUL_OP, "z")
386 : [carry]"=r"(carry)
387 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
388 : "cc", "%eax", "%edx");
389 return carry;
390
391 #elif defined(BOTAN_MP_USE_X86_64_ASM)
392 asm(
393 DO_8_TIMES(LINMUL_OP, "z")
394 : [carry]"=r"(carry)
395 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
396 : "cc", "%rax", "%rdx");
397 return carry;
398
399 #else
400 z[0] = word_madd2(x[0], y, &carry);
401 z[1] = word_madd2(x[1], y, &carry);
402 z[2] = word_madd2(x[2], y, &carry);
403 z[3] = word_madd2(x[3], y, &carry);
404 z[4] = word_madd2(x[4], y, &carry);
405 z[5] = word_madd2(x[5], y, &carry);
406 z[6] = word_madd2(x[6], y, &carry);
407 z[7] = word_madd2(x[7], y, &carry);
408 return carry;
409 #endif
410 }
411
412 /*
413 * Eight Word Block Multiply/Add
414 */
word8_madd3(word z[8],const word x[8],word y,word carry)415 inline word word8_madd3(word z[8], const word x[8], word y, word carry)
416 {
417 #if defined(BOTAN_MP_USE_X86_32_ASM)
418 asm(
419 DO_8_TIMES(MULADD_OP, "")
420 : [carry]"=r"(carry)
421 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
422 : "cc", "%eax", "%edx");
423 return carry;
424
425 #elif defined(BOTAN_MP_USE_X86_64_ASM)
426
427 asm(
428 DO_8_TIMES(MULADD_OP, "")
429 : [carry]"=r"(carry)
430 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
431 : "cc", "%rax", "%rdx");
432 return carry;
433
434 #else
435 z[0] = word_madd3(x[0], y, z[0], &carry);
436 z[1] = word_madd3(x[1], y, z[1], &carry);
437 z[2] = word_madd3(x[2], y, z[2], &carry);
438 z[3] = word_madd3(x[3], y, z[3], &carry);
439 z[4] = word_madd3(x[4], y, z[4], &carry);
440 z[5] = word_madd3(x[5], y, z[5], &carry);
441 z[6] = word_madd3(x[6], y, z[6], &carry);
442 z[7] = word_madd3(x[7], y, z[7], &carry);
443 return carry;
444 #endif
445 }
446
447 /*
448 * Multiply-Add Accumulator
449 * (w2,w1,w0) += x * y
450 */
word3_muladd(word * w2,word * w1,word * w0,word x,word y)451 inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
452 {
453 #if defined(BOTAN_MP_USE_X86_32_ASM)
454 word z0 = 0, z1 = 0;
455
456 asm("mull %[y]"
457 : "=a"(z0),"=d"(z1)
458 : "a"(x), [y]"rm"(y)
459 : "cc");
460
461 asm(R"(
462 addl %[z0],%[w0]
463 adcl %[z1],%[w1]
464 adcl $0,%[w2]
465 )"
466 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
467 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
468 : "cc");
469
470 #elif defined(BOTAN_MP_USE_X86_64_ASM)
471
472 word z0 = 0, z1 = 0;
473
474 asm("mulq %[y]"
475 : "=a"(z0),"=d"(z1)
476 : "a"(x), [y]"rm"(y)
477 : "cc");
478
479 asm(R"(
480 addq %[z0],%[w0]
481 adcq %[z1],%[w1]
482 adcq $0,%[w2]
483 )"
484 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
485 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
486 : "cc");
487
488 #else
489 word carry = *w0;
490 *w0 = word_madd2(x, y, &carry);
491 *w1 += carry;
492 *w2 += (*w1 < carry);
493 #endif
494 }
495
496 /*
497 * 3-word addition
498 * (w2,w1,w0) += x
499 */
word3_add(word * w2,word * w1,word * w0,word x)500 inline void word3_add(word* w2, word* w1, word* w0, word x)
501 {
502 #if defined(BOTAN_MP_USE_X86_32_ASM)
503 asm(R"(
504 addl %[x],%[w0]
505 adcl $0,%[w1]
506 adcl $0,%[w2]
507 )"
508 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
509 : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
510 : "cc");
511
512 #elif defined(BOTAN_MP_USE_X86_64_ASM)
513
514 asm(R"(
515 addq %[x],%[w0]
516 adcq $0,%[w1]
517 adcq $0,%[w2]
518 )"
519 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
520 : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
521 : "cc");
522
523 #else
524 *w0 += x;
525 word c1 = (*w0 < x);
526 *w1 += c1;
527 word c2 = (*w1 < c1);
528 *w2 += c2;
529 #endif
530 }
531
532 /*
533 * Multiply-Add Accumulator
534 * (w2,w1,w0) += 2 * x * y
535 */
word3_muladd_2(word * w2,word * w1,word * w0,word x,word y)536 inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
537 {
538 #if defined(BOTAN_MP_USE_X86_32_ASM)
539
540 word z0 = 0, z1 = 0;
541
542 asm("mull %[y]"
543 : "=a"(z0),"=d"(z1)
544 : "a"(x), [y]"rm"(y)
545 : "cc");
546
547 asm(R"(
548 addl %[z0],%[w0]
549 adcl %[z1],%[w1]
550 adcl $0,%[w2]
551
552 addl %[z0],%[w0]
553 adcl %[z1],%[w1]
554 adcl $0,%[w2]
555 )"
556 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
557 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
558 : "cc");
559
560 #elif defined(BOTAN_MP_USE_X86_64_ASM)
561
562 word z0 = 0, z1 = 0;
563
564 asm("mulq %[y]"
565 : "=a"(z0),"=d"(z1)
566 : "a"(x), [y]"rm"(y)
567 : "cc");
568
569 asm(R"(
570 addq %[z0],%[w0]
571 adcq %[z1],%[w1]
572 adcq $0,%[w2]
573
574 addq %[z0],%[w0]
575 adcq %[z1],%[w1]
576 adcq $0,%[w2]
577 )"
578 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
579 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
580 : "cc");
581
582 #else
583 word carry = 0;
584 x = word_madd2(x, y, &carry);
585 y = carry;
586
587 word top = (y >> (BOTAN_MP_WORD_BITS-1));
588 y <<= 1;
589 y |= (x >> (BOTAN_MP_WORD_BITS-1));
590 x <<= 1;
591
592 carry = 0;
593 *w0 = word_add(*w0, x, &carry);
594 *w1 = word_add(*w1, y, &carry);
595 *w2 = word_add(*w2, top, &carry);
596 #endif
597 }
598
599 #if defined(ASM)
600 #undef ASM
601 #undef DO_8_TIMES
602 #undef ADD_OR_SUBTRACT
603 #undef ADDSUB2_OP
604 #undef ADDSUB3_OP
605 #undef LINMUL_OP
606 #undef MULADD_OP
607 #endif
608
609 }
610
611 #endif
612