1 /*
2 * Lowest Level MPI Algorithms
3 * (C) 1999-2010 Jack Lloyd
4 *     2006 Luca Piccarreta
5 *
6 * Botan is released under the Simplified BSD License (see license.txt)
7 */
8 
9 #ifndef BOTAN_MP_ASM_INTERNAL_H_
10 #define BOTAN_MP_ASM_INTERNAL_H_
11 
12 #include <botan/internal/mp_madd.h>
13 
14 namespace Botan {
15 
16 #if defined(BOTAN_MP_USE_X86_32_ASM)
17 
18 #define ADDSUB2_OP(OPERATION, INDEX)                     \
19         ASM("movl 4*" #INDEX "(%[y]), %[carry]")         \
20         ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])")   \
21 
22 #define ADDSUB3_OP(OPERATION, INDEX)                     \
23         ASM("movl 4*" #INDEX "(%[x]), %[carry]")         \
24         ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]")   \
25         ASM("movl %[carry], 4*" #INDEX "(%[z])")         \
26 
27 #define LINMUL_OP(WRITE_TO, INDEX)                       \
28         ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
29         ASM("mull %[y]")                                 \
30         ASM("addl %[carry],%%eax")                       \
31         ASM("adcl $0,%%edx")                             \
32         ASM("movl %%edx,%[carry]")                       \
33         ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
34 
35 #define MULADD_OP(IGNORED, INDEX)                        \
36         ASM("movl 4*" #INDEX "(%[x]),%%eax")             \
37         ASM("mull %[y]")                                 \
38         ASM("addl %[carry],%%eax")                       \
39         ASM("adcl $0,%%edx")                             \
40         ASM("addl 4*" #INDEX "(%[z]),%%eax")             \
41         ASM("adcl $0,%%edx")                             \
42         ASM("movl %%edx,%[carry]")                       \
43         ASM("movl %%eax, 4*" #INDEX " (%[z])")
44 
45 #define ADD_OR_SUBTRACT(CORE_CODE)     \
46         ASM("rorl %[carry]")           \
47         CORE_CODE                      \
48         ASM("sbbl %[carry],%[carry]")  \
49         ASM("negl %[carry]")
50 
51 #elif defined(BOTAN_MP_USE_X86_64_ASM)
52 
53 #define ADDSUB2_OP(OPERATION, INDEX)                     \
54         ASM("movq 8*" #INDEX "(%[y]), %[carry]")         \
55         ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")   \
56 
57 #define ADDSUB3_OP(OPERATION, INDEX)                     \
58         ASM("movq 8*" #INDEX "(%[x]), %[carry]")         \
59         ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]")   \
60         ASM("movq %[carry], 8*" #INDEX "(%[z])")         \
61 
62 #define LINMUL_OP(WRITE_TO, INDEX)                       \
63         ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
64         ASM("mulq %[y]")                                 \
65         ASM("addq %[carry],%%rax")                       \
66         ASM("adcq $0,%%rdx")                             \
67         ASM("movq %%rdx,%[carry]")                       \
68         ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
69 
70 #define MULADD_OP(IGNORED, INDEX)                        \
71         ASM("movq 8*" #INDEX "(%[x]),%%rax")             \
72         ASM("mulq %[y]")                                 \
73         ASM("addq %[carry],%%rax")                       \
74         ASM("adcq $0,%%rdx")                             \
75         ASM("addq 8*" #INDEX "(%[z]),%%rax")             \
76         ASM("adcq $0,%%rdx")                             \
77         ASM("movq %%rdx,%[carry]")                       \
78         ASM("movq %%rax, 8*" #INDEX " (%[z])")
79 
80 #define ADD_OR_SUBTRACT(CORE_CODE)     \
81         ASM("rorq %[carry]")           \
82         CORE_CODE                      \
83         ASM("sbbq %[carry],%[carry]")  \
84         ASM("negq %[carry]")
85 
86 #endif
87 
88 #if defined(ADD_OR_SUBTRACT)
89 
90 #define ASM(x) x "\n\t"
91 
92 #define DO_8_TIMES(MACRO, ARG) \
93         MACRO(ARG, 0) \
94         MACRO(ARG, 1) \
95         MACRO(ARG, 2) \
96         MACRO(ARG, 3) \
97         MACRO(ARG, 4) \
98         MACRO(ARG, 5) \
99         MACRO(ARG, 6) \
100         MACRO(ARG, 7)
101 
102 #endif
103 
104 /*
105 * Word Addition
106 */
word_add(word x,word y,word * carry)107 inline word word_add(word x, word y, word* carry)
108    {
109 #if defined(BOTAN_MP_USE_X86_32_ASM)
110    asm(
111       ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
112       : [x]"=r"(x), [carry]"=r"(*carry)
113       : "0"(x), [y]"rm"(y), "1"(*carry)
114       : "cc");
115    return x;
116 
117 #elif defined(BOTAN_MP_USE_X86_64_ASM)
118 
119    asm(
120       ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
121       : [x]"=r"(x), [carry]"=r"(*carry)
122       : "0"(x), [y]"rm"(y), "1"(*carry)
123       : "cc");
124    return x;
125 
126 #else
127    word z = x + y;
128    word c1 = (z < x);
129    z += *carry;
130    *carry = c1 | (z < *carry);
131    return z;
132 #endif
133    }
134 
135 /*
136 * Eight Word Block Addition, Two Argument
137 */
word8_add2(word x[8],const word y[8],word carry)138 inline word word8_add2(word x[8], const word y[8], word carry)
139    {
140 #if defined(BOTAN_MP_USE_X86_32_ASM)
141    asm(
142       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
143       : [carry]"=r"(carry)
144       : [x]"r"(x), [y]"r"(y), "0"(carry)
145       : "cc", "memory");
146    return carry;
147 
148 #elif defined(BOTAN_MP_USE_X86_64_ASM)
149 
150    asm(
151       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
152       : [carry]"=r"(carry)
153       : [x]"r"(x), [y]"r"(y), "0"(carry)
154       : "cc", "memory");
155    return carry;
156 
157 #else
158    x[0] = word_add(x[0], y[0], &carry);
159    x[1] = word_add(x[1], y[1], &carry);
160    x[2] = word_add(x[2], y[2], &carry);
161    x[3] = word_add(x[3], y[3], &carry);
162    x[4] = word_add(x[4], y[4], &carry);
163    x[5] = word_add(x[5], y[5], &carry);
164    x[6] = word_add(x[6], y[6], &carry);
165    x[7] = word_add(x[7], y[7], &carry);
166    return carry;
167 #endif
168    }
169 
170 /*
171 * Eight Word Block Addition, Three Argument
172 */
word8_add3(word z[8],const word x[8],const word y[8],word carry)173 inline word word8_add3(word z[8], const word x[8],
174                        const word y[8], word carry)
175    {
176 #if defined(BOTAN_MP_USE_X86_32_ASM)
177    asm(
178       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
179       : [carry]"=r"(carry)
180       : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
181       : "cc", "memory");
182    return carry;
183 
184 #elif defined(BOTAN_MP_USE_X86_64_ASM)
185 
186    asm(
187       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
188       : [carry]"=r"(carry)
189       : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
190       : "cc", "memory");
191    return carry;
192 
193 #else
194    z[0] = word_add(x[0], y[0], &carry);
195    z[1] = word_add(x[1], y[1], &carry);
196    z[2] = word_add(x[2], y[2], &carry);
197    z[3] = word_add(x[3], y[3], &carry);
198    z[4] = word_add(x[4], y[4], &carry);
199    z[5] = word_add(x[5], y[5], &carry);
200    z[6] = word_add(x[6], y[6], &carry);
201    z[7] = word_add(x[7], y[7], &carry);
202    return carry;
203 #endif
204    }
205 
206 /*
207 * Word Subtraction
208 */
word_sub(word x,word y,word * carry)209 inline word word_sub(word x, word y, word* carry)
210    {
211 #if defined(BOTAN_MP_USE_X86_32_ASM)
212    asm(
213       ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
214       : [x]"=r"(x), [carry]"=r"(*carry)
215       : "0"(x), [y]"rm"(y), "1"(*carry)
216       : "cc");
217    return x;
218 
219 #elif defined(BOTAN_MP_USE_X86_64_ASM)
220 
221    asm(
222       ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
223       : [x]"=r"(x), [carry]"=r"(*carry)
224       : "0"(x), [y]"rm"(y), "1"(*carry)
225       : "cc");
226    return x;
227 
228 #else
229    word t0 = x - y;
230    word c1 = (t0 > x);
231    word z = t0 - *carry;
232    *carry = c1 | (z > t0);
233    return z;
234 #endif
235    }
236 
237 /*
238 * Eight Word Block Subtraction, Two Argument
239 */
word8_sub2(word x[8],const word y[8],word carry)240 inline word word8_sub2(word x[8], const word y[8], word carry)
241    {
242 #if defined(BOTAN_MP_USE_X86_32_ASM)
243    asm(
244       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
245       : [carry]"=r"(carry)
246       : [x]"r"(x), [y]"r"(y), "0"(carry)
247       : "cc", "memory");
248    return carry;
249 
250 #elif defined(BOTAN_MP_USE_X86_64_ASM)
251 
252    asm(
253       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
254       : [carry]"=r"(carry)
255       : [x]"r"(x), [y]"r"(y), "0"(carry)
256       : "cc", "memory");
257    return carry;
258 
259 #else
260    x[0] = word_sub(x[0], y[0], &carry);
261    x[1] = word_sub(x[1], y[1], &carry);
262    x[2] = word_sub(x[2], y[2], &carry);
263    x[3] = word_sub(x[3], y[3], &carry);
264    x[4] = word_sub(x[4], y[4], &carry);
265    x[5] = word_sub(x[5], y[5], &carry);
266    x[6] = word_sub(x[6], y[6], &carry);
267    x[7] = word_sub(x[7], y[7], &carry);
268    return carry;
269 #endif
270    }
271 
272 /*
273 * Eight Word Block Subtraction, Two Argument
274 */
word8_sub2_rev(word x[8],const word y[8],word carry)275 inline word word8_sub2_rev(word x[8], const word y[8], word carry)
276    {
277 #if defined(BOTAN_MP_USE_X86_32_ASM)
278    asm(
279       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
280       : [carry]"=r"(carry)
281       : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
282       : "cc", "memory");
283    return carry;
284 
285 #elif defined(BOTAN_MP_USE_X86_64_ASM)
286 
287    asm(
288       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
289       : [carry]"=r"(carry)
290       : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
291       : "cc", "memory");
292    return carry;
293 
294 #else
295    x[0] = word_sub(y[0], x[0], &carry);
296    x[1] = word_sub(y[1], x[1], &carry);
297    x[2] = word_sub(y[2], x[2], &carry);
298    x[3] = word_sub(y[3], x[3], &carry);
299    x[4] = word_sub(y[4], x[4], &carry);
300    x[5] = word_sub(y[5], x[5], &carry);
301    x[6] = word_sub(y[6], x[6], &carry);
302    x[7] = word_sub(y[7], x[7], &carry);
303    return carry;
304 #endif
305    }
306 
307 /*
308 * Eight Word Block Subtraction, Three Argument
309 */
word8_sub3(word z[8],const word x[8],const word y[8],word carry)310 inline word word8_sub3(word z[8], const word x[8],
311                        const word y[8], word carry)
312    {
313 #if defined(BOTAN_MP_USE_X86_32_ASM)
314    asm(
315       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
316       : [carry]"=r"(carry)
317       : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
318       : "cc", "memory");
319    return carry;
320 
321 #elif defined(BOTAN_MP_USE_X86_64_ASM)
322 
323    asm(
324       ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
325       : [carry]"=r"(carry)
326       : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
327       : "cc", "memory");
328    return carry;
329 
330 #else
331    z[0] = word_sub(x[0], y[0], &carry);
332    z[1] = word_sub(x[1], y[1], &carry);
333    z[2] = word_sub(x[2], y[2], &carry);
334    z[3] = word_sub(x[3], y[3], &carry);
335    z[4] = word_sub(x[4], y[4], &carry);
336    z[5] = word_sub(x[5], y[5], &carry);
337    z[6] = word_sub(x[6], y[6], &carry);
338    z[7] = word_sub(x[7], y[7], &carry);
339    return carry;
340 #endif
341    }
342 
343 /*
344 * Eight Word Block Linear Multiplication
345 */
word8_linmul2(word x[8],word y,word carry)346 inline word word8_linmul2(word x[8], word y, word carry)
347    {
348 #if defined(BOTAN_MP_USE_X86_32_ASM)
349    asm(
350       DO_8_TIMES(LINMUL_OP, "x")
351       : [carry]"=r"(carry)
352       : [x]"r"(x), [y]"rm"(y), "0"(carry)
353       : "cc", "%eax", "%edx");
354    return carry;
355 
356 #elif defined(BOTAN_MP_USE_X86_64_ASM)
357 
358    asm(
359       DO_8_TIMES(LINMUL_OP, "x")
360       : [carry]"=r"(carry)
361       : [x]"r"(x), [y]"rm"(y), "0"(carry)
362       : "cc", "%rax", "%rdx");
363    return carry;
364 
365 #else
366    x[0] = word_madd2(x[0], y, &carry);
367    x[1] = word_madd2(x[1], y, &carry);
368    x[2] = word_madd2(x[2], y, &carry);
369    x[3] = word_madd2(x[3], y, &carry);
370    x[4] = word_madd2(x[4], y, &carry);
371    x[5] = word_madd2(x[5], y, &carry);
372    x[6] = word_madd2(x[6], y, &carry);
373    x[7] = word_madd2(x[7], y, &carry);
374    return carry;
375 #endif
376    }
377 
378 /*
379 * Eight Word Block Linear Multiplication
380 */
word8_linmul3(word z[8],const word x[8],word y,word carry)381 inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
382    {
383 #if defined(BOTAN_MP_USE_X86_32_ASM)
384    asm(
385       DO_8_TIMES(LINMUL_OP, "z")
386       : [carry]"=r"(carry)
387       : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
388       : "cc", "%eax", "%edx");
389    return carry;
390 
391 #elif defined(BOTAN_MP_USE_X86_64_ASM)
392    asm(
393       DO_8_TIMES(LINMUL_OP, "z")
394       : [carry]"=r"(carry)
395       : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
396       : "cc", "%rax", "%rdx");
397    return carry;
398 
399 #else
400    z[0] = word_madd2(x[0], y, &carry);
401    z[1] = word_madd2(x[1], y, &carry);
402    z[2] = word_madd2(x[2], y, &carry);
403    z[3] = word_madd2(x[3], y, &carry);
404    z[4] = word_madd2(x[4], y, &carry);
405    z[5] = word_madd2(x[5], y, &carry);
406    z[6] = word_madd2(x[6], y, &carry);
407    z[7] = word_madd2(x[7], y, &carry);
408    return carry;
409 #endif
410    }
411 
412 /*
413 * Eight Word Block Multiply/Add
414 */
word8_madd3(word z[8],const word x[8],word y,word carry)415 inline word word8_madd3(word z[8], const word x[8], word y, word carry)
416    {
417 #if defined(BOTAN_MP_USE_X86_32_ASM)
418    asm(
419       DO_8_TIMES(MULADD_OP, "")
420       : [carry]"=r"(carry)
421       : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
422       : "cc", "%eax", "%edx");
423    return carry;
424 
425 #elif defined(BOTAN_MP_USE_X86_64_ASM)
426 
427    asm(
428       DO_8_TIMES(MULADD_OP, "")
429       : [carry]"=r"(carry)
430       : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
431       : "cc", "%rax", "%rdx");
432    return carry;
433 
434 #else
435    z[0] = word_madd3(x[0], y, z[0], &carry);
436    z[1] = word_madd3(x[1], y, z[1], &carry);
437    z[2] = word_madd3(x[2], y, z[2], &carry);
438    z[3] = word_madd3(x[3], y, z[3], &carry);
439    z[4] = word_madd3(x[4], y, z[4], &carry);
440    z[5] = word_madd3(x[5], y, z[5], &carry);
441    z[6] = word_madd3(x[6], y, z[6], &carry);
442    z[7] = word_madd3(x[7], y, z[7], &carry);
443    return carry;
444 #endif
445    }
446 
447 /*
448 * Multiply-Add Accumulator
449 * (w2,w1,w0) += x * y
450 */
word3_muladd(word * w2,word * w1,word * w0,word x,word y)451 inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
452    {
453 #if defined(BOTAN_MP_USE_X86_32_ASM)
454    word z0 = 0, z1 = 0;
455 
456    asm("mull %[y]"
457         : "=a"(z0),"=d"(z1)
458         : "a"(x), [y]"rm"(y)
459         : "cc");
460 
461    asm(R"(
462        addl %[z0],%[w0]
463        adcl %[z1],%[w1]
464        adcl $0,%[w2]
465        )"
466        : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
467        : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
468        : "cc");
469 
470 #elif defined(BOTAN_MP_USE_X86_64_ASM)
471 
472    word z0 = 0, z1 = 0;
473 
474    asm("mulq %[y]"
475         : "=a"(z0),"=d"(z1)
476         : "a"(x), [y]"rm"(y)
477         : "cc");
478 
479    asm(R"(
480        addq %[z0],%[w0]
481        adcq %[z1],%[w1]
482        adcq $0,%[w2]
483        )"
484        : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
485        : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
486        : "cc");
487 
488 #else
489    word carry = *w0;
490    *w0 = word_madd2(x, y, &carry);
491    *w1 += carry;
492    *w2 += (*w1 < carry);
493 #endif
494    }
495 
496 /*
497 * 3-word addition
498 * (w2,w1,w0) += x
499 */
word3_add(word * w2,word * w1,word * w0,word x)500 inline void word3_add(word* w2, word* w1, word* w0, word x)
501    {
502 #if defined(BOTAN_MP_USE_X86_32_ASM)
503    asm(R"(
504       addl %[x],%[w0]
505       adcl $0,%[w1]
506       adcl $0,%[w2]
507       )"
508       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
509       : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
510       : "cc");
511 
512 #elif defined(BOTAN_MP_USE_X86_64_ASM)
513 
514    asm(R"(
515       addq %[x],%[w0]
516       adcq $0,%[w1]
517       adcq $0,%[w2]
518       )"
519       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
520       : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
521       : "cc");
522 
523 #else
524    *w0 += x;
525    word c1 = (*w0 < x);
526    *w1 += c1;
527    word c2 = (*w1 < c1);
528    *w2 += c2;
529 #endif
530    }
531 
532 /*
533 * Multiply-Add Accumulator
534 * (w2,w1,w0) += 2 * x * y
535 */
word3_muladd_2(word * w2,word * w1,word * w0,word x,word y)536 inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
537    {
538 #if defined(BOTAN_MP_USE_X86_32_ASM)
539 
540    word z0 = 0, z1 = 0;
541 
542    asm("mull %[y]"
543         : "=a"(z0),"=d"(z1)
544         : "a"(x), [y]"rm"(y)
545         : "cc");
546 
547    asm(R"(
548       addl %[z0],%[w0]
549       adcl %[z1],%[w1]
550       adcl $0,%[w2]
551 
552       addl %[z0],%[w0]
553       adcl %[z1],%[w1]
554       adcl $0,%[w2]
555       )"
556       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
557       : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
558       : "cc");
559 
560 #elif defined(BOTAN_MP_USE_X86_64_ASM)
561 
562    word z0 = 0, z1 = 0;
563 
564    asm("mulq %[y]"
565         : "=a"(z0),"=d"(z1)
566         : "a"(x), [y]"rm"(y)
567         : "cc");
568 
569    asm(R"(
570       addq %[z0],%[w0]
571       adcq %[z1],%[w1]
572       adcq $0,%[w2]
573 
574       addq %[z0],%[w0]
575       adcq %[z1],%[w1]
576       adcq $0,%[w2]
577       )"
578       : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
579       : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
580       : "cc");
581 
582 #else
583    word carry = 0;
584    x = word_madd2(x, y, &carry);
585    y = carry;
586 
587    word top = (y >> (BOTAN_MP_WORD_BITS-1));
588    y <<= 1;
589    y |= (x >> (BOTAN_MP_WORD_BITS-1));
590    x <<= 1;
591 
592    carry = 0;
593    *w0 = word_add(*w0, x, &carry);
594    *w1 = word_add(*w1, y, &carry);
595    *w2 = word_add(*w2, top, &carry);
596 #endif
597    }
598 
599 #if defined(ASM)
600   #undef ASM
601   #undef DO_8_TIMES
602   #undef ADD_OR_SUBTRACT
603   #undef ADDSUB2_OP
604   #undef ADDSUB3_OP
605   #undef LINMUL_OP
606   #undef MULADD_OP
607 #endif
608 
609 }
610 
611 #endif
612