1 /**************************************************************************/
2 /*                                                                        */
3 /*                                 OCaml                                  */
4 /*                                                                        */
5 /*             Xavier Leroy, projet Cristal, INRIA Rocquencourt           */
6 /*                                                                        */
7 /*   Copyright 2003 Institut National de Recherche en Informatique et     */
8 /*     en Automatique.                                                    */
9 /*                                                                        */
10 /*   All rights reserved.  This file is distributed under the terms of    */
11 /*   the GNU Lesser General Public License version 2.1, with the          */
12 /*   special exception on linking described in the file LICENSE.          */
13 /*                                                                        */
14 /**************************************************************************/
15 
16 /* Code specific to the AMD x86_64 architecture. */
17 
18 #define BngAdd2(res,carryout,arg1,arg2)                                     \
19   asm("xorl %1, %1 \n\t"                                                    \
20       "addq %3, %0 \n\t"                                                    \
21       "setc %b1"                                                            \
22       : "=r" (res), "=&q" (carryout)                                        \
23       : "0" (arg1), "rm" (arg2))
24 
25 #define BngSub2(res,carryout,arg1,arg2)                                     \
26   asm("xorl %1, %1 \n\t"                                                    \
27       "subq %3, %0 \n\t"                                                    \
28       "setc %b1"                                                            \
29       : "=r" (res), "=&q" (carryout)                                        \
30       : "0" (arg1), "rm" (arg2))
31 
32 #define BngMult(resh,resl,arg1,arg2)                                        \
33   asm("mulq %3"                                                             \
34       : "=a" (resl), "=d" (resh)                                            \
35       : "a" (arg1), "r" (arg2))
36 
37 #define BngDiv(quo,rem,nh,nl,d)                                             \
38   asm("divq %4"                                                             \
39       : "=a" (quo), "=d" (rem)                                              \
40       : "a" (nl), "d" (nh), "r" (d))
41 
42 /* Reimplementation in asm of some of the bng operations. */
43 
bng_amd64_add(bng a,bngsize alen,bng b,bngsize blen,bngcarry carry)44 static bngcarry bng_amd64_add
45        (bng a/*[alen]*/, bngsize alen,
46         bng b/*[blen]*/, bngsize blen,
47         bngcarry carry)
48 {
49   bngdigit tmp;
50   alen -= blen;
51   if (blen > 0) {
52     asm("negb %b3 \n\t"
53         "1: \n\t"
54         "movq (%0), %4 \n\t"
55         "adcq (%1), %4 \n\t"
56         "movq %4, (%0) \n\t"
57         "leaq 8(%0), %0 \n\t"
58         "leaq 8(%1), %1 \n\t"
59         "decq %2 \n\t"
60         "jnz 1b \n\t"
61         "setc %b3"
62         : "=r" (a), "=r" (b), "=r" (blen), "=q" (carry), "=r" (tmp)
63         : "0" (a), "1" (b), "2" (blen), "3" (carry));
64   }
65   if (carry == 0 || alen == 0) return carry;
66   do {
67     if (++(*a) != 0) return 0;
68     a++;
69   } while (--alen);
70   return 1;
71 }
72 
bng_amd64_sub(bng a,bngsize alen,bng b,bngsize blen,bngcarry carry)73 static bngcarry bng_amd64_sub
74        (bng a/*[alen]*/, bngsize alen,
75         bng b/*[blen]*/, bngsize blen,
76         bngcarry carry)
77 {
78   bngdigit tmp;
79   alen -= blen;
80   if (blen > 0) {
81     asm("negb %b3 \n\t"
82         "1: \n\t"
83         "movq (%0), %4 \n\t"
84         "sbbq (%1), %4 \n\t"
85         "movq %4, (%0) \n\t"
86         "leaq 8(%0), %0 \n\t"
87         "leaq 8(%1), %1 \n\t"
88         "decq %2 \n\t"
89         "jnz 1b \n\t"
90         "setc %b3"
91         : "=r" (a), "=r" (b), "=r" (blen), "=q" (carry), "=r" (tmp)
92         : "0" (a), "1" (b), "2" (blen), "3" (carry));
93   }
94   if (carry == 0 || alen == 0) return carry;
95   do {
96     if ((*a)-- != 0) return 0;
97     a++;
98   } while (--alen);
99   return 1;
100 }
101 
bng_amd64_mult_add_digit(bng a,bngsize alen,bng b,bngsize blen,bngdigit d)102 static bngdigit bng_amd64_mult_add_digit
103      (bng a/*[alen]*/, bngsize alen,
104       bng b/*[blen]*/, bngsize blen,
105       bngdigit d)
106 {
107   bngdigit out;
108   bngcarry carry;
109 
110   alen -= blen;
111   out = 0;
112   if (blen > 0) {
113     asm("1: \n\t"
114         "movq (%1), %%rax \n\t"
115         "mulq %7\n\t"           /* rdx:rax = d * next digit of b */
116         "addq (%0), %%rax \n\t" /* add next digit of a to rax */
117         "adcq $0, %%rdx \n\t"   /* accumulate carry in rdx */
118         "addq %3, %%rax \n\t"   /* add out to rax */
119         "adcq $0, %%rdx \n\t"   /* accumulate carry in rdx */
120         "movq %%rax, (%0) \n\t" /* rax is next digit of result */
121         "movq %%rdx, %3 \n\t"   /* rdx is next out */
122         "leaq 8(%0), %0 \n\t"
123         "leaq 8(%1), %1 \n\t"
124         "decq %2 \n\t"
125         "jnz 1b"
126         : "=&r" (a), "=&r" (b), "=&r" (blen), "=&r" (out)
127         : "0" (a), "1" (b), "2" (blen), "rm" (d), "3" (out)
128         : "rax", "rdx");
129   }
130   if (alen == 0) return out;
131   /* current digit of a += out */
132   BngAdd2(*a, carry, *a, out);
133   a++;
134   alen--;
135   /* Propagate carry */
136   if (carry == 0 || alen == 0) return carry;
137   do {
138     if (++(*a) != 0) return 0;
139     a++;
140   } while (--alen);
141   return 1;
142 }
143 
bng_amd64_mult_sub_digit(bng a,bngsize alen,bng b,bngsize blen,bngdigit d)144 static bngdigit bng_amd64_mult_sub_digit
145      (bng a/*[alen]*/, bngsize alen,
146       bng b/*[blen]*/, bngsize blen,
147       bngdigit d)
148 {
149   bngdigit out, tmp;
150   bngcarry carry;
151 
152   alen -= blen;
153   out = 0;
154   if (blen > 0) {
155     asm("1: \n\t"
156         "movq (%1), %%rax \n\t"
157         "movq (%0), %4 \n\t"
158         "mulq %8\n\t"           /* rdx:rax = d * next digit of b */
159         "subq %%rax, %4 \n\t"   /* subtract rax from next digit of a */
160         "adcq $0, %%rdx \n\t"   /* accumulate carry in rdx */
161         "subq %3, %4 \n\t"      /* subtract out */
162         "adcq $0, %%rdx \n\t"   /* accumulate carry in rdx */
163         "movq %4, (%0) \n\t"    /* store next digit of result */
164         "movq %%rdx, %3 \n\t"   /* rdx is next out */
165         "leaq 8(%0), %0 \n\t"
166         "leaq 8(%1), %1 \n\t"
167         "decq %2 \n\t"
168         "jnz 1b"
169         : "=&r" (a), "=&r" (b), "=&rm" (blen), "=&r" (out), "=&r" (tmp)
170         : "0" (a), "1" (b), "2" (blen), "rm" (d), "3" (out)
171         : "rax", "rdx");
172   }
173   if (alen == 0) return out;
174   /* current digit of a -= out */
175   BngSub2(*a, carry, *a, out);
176   a++;
177   alen--;
178   /* Propagate carry */
179   if (carry == 0 || alen == 0) return carry;
180   do {
181     if ((*a)-- != 0) return 0;
182     a++;
183   } while (--alen);
184   return 1;
185 }
186 
bng_amd64_setup_ops(void)187 static void bng_amd64_setup_ops(void)
188 {
189   bng_ops.add = bng_amd64_add;
190   bng_ops.sub = bng_amd64_sub;
191   bng_ops.mult_add_digit = bng_amd64_mult_add_digit;
192   bng_ops.mult_sub_digit = bng_amd64_mult_sub_digit;
193 }
194 
195 #define BNG_SETUP_OPS bng_amd64_setup_ops()
196