1 /**************************************************************************/
2 /* */
3 /* OCaml */
4 /* */
5 /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
6 /* */
7 /* Copyright 2003 Institut National de Recherche en Informatique et */
8 /* en Automatique. */
9 /* */
10 /* All rights reserved. This file is distributed under the terms of */
11 /* the GNU Lesser General Public License version 2.1, with the */
12 /* special exception on linking described in the file LICENSE. */
13 /* */
14 /**************************************************************************/
15
16 /* Code specific to the AMD x86_64 architecture. */
17
18 #define BngAdd2(res,carryout,arg1,arg2) \
19 asm("xorl %1, %1 \n\t" \
20 "addq %3, %0 \n\t" \
21 "setc %b1" \
22 : "=r" (res), "=&q" (carryout) \
23 : "0" (arg1), "rm" (arg2))
24
25 #define BngSub2(res,carryout,arg1,arg2) \
26 asm("xorl %1, %1 \n\t" \
27 "subq %3, %0 \n\t" \
28 "setc %b1" \
29 : "=r" (res), "=&q" (carryout) \
30 : "0" (arg1), "rm" (arg2))
31
32 #define BngMult(resh,resl,arg1,arg2) \
33 asm("mulq %3" \
34 : "=a" (resl), "=d" (resh) \
35 : "a" (arg1), "r" (arg2))
36
37 #define BngDiv(quo,rem,nh,nl,d) \
38 asm("divq %4" \
39 : "=a" (quo), "=d" (rem) \
40 : "a" (nl), "d" (nh), "r" (d))
41
42 /* Reimplementation in asm of some of the bng operations. */
43
bng_amd64_add(bng a,bngsize alen,bng b,bngsize blen,bngcarry carry)44 static bngcarry bng_amd64_add
45 (bng a/*[alen]*/, bngsize alen,
46 bng b/*[blen]*/, bngsize blen,
47 bngcarry carry)
48 {
49 bngdigit tmp;
50 alen -= blen;
51 if (blen > 0) {
52 asm("negb %b3 \n\t"
53 "1: \n\t"
54 "movq (%0), %4 \n\t"
55 "adcq (%1), %4 \n\t"
56 "movq %4, (%0) \n\t"
57 "leaq 8(%0), %0 \n\t"
58 "leaq 8(%1), %1 \n\t"
59 "decq %2 \n\t"
60 "jnz 1b \n\t"
61 "setc %b3"
62 : "=r" (a), "=r" (b), "=r" (blen), "=q" (carry), "=r" (tmp)
63 : "0" (a), "1" (b), "2" (blen), "3" (carry));
64 }
65 if (carry == 0 || alen == 0) return carry;
66 do {
67 if (++(*a) != 0) return 0;
68 a++;
69 } while (--alen);
70 return 1;
71 }
72
bng_amd64_sub(bng a,bngsize alen,bng b,bngsize blen,bngcarry carry)73 static bngcarry bng_amd64_sub
74 (bng a/*[alen]*/, bngsize alen,
75 bng b/*[blen]*/, bngsize blen,
76 bngcarry carry)
77 {
78 bngdigit tmp;
79 alen -= blen;
80 if (blen > 0) {
81 asm("negb %b3 \n\t"
82 "1: \n\t"
83 "movq (%0), %4 \n\t"
84 "sbbq (%1), %4 \n\t"
85 "movq %4, (%0) \n\t"
86 "leaq 8(%0), %0 \n\t"
87 "leaq 8(%1), %1 \n\t"
88 "decq %2 \n\t"
89 "jnz 1b \n\t"
90 "setc %b3"
91 : "=r" (a), "=r" (b), "=r" (blen), "=q" (carry), "=r" (tmp)
92 : "0" (a), "1" (b), "2" (blen), "3" (carry));
93 }
94 if (carry == 0 || alen == 0) return carry;
95 do {
96 if ((*a)-- != 0) return 0;
97 a++;
98 } while (--alen);
99 return 1;
100 }
101
bng_amd64_mult_add_digit(bng a,bngsize alen,bng b,bngsize blen,bngdigit d)102 static bngdigit bng_amd64_mult_add_digit
103 (bng a/*[alen]*/, bngsize alen,
104 bng b/*[blen]*/, bngsize blen,
105 bngdigit d)
106 {
107 bngdigit out;
108 bngcarry carry;
109
110 alen -= blen;
111 out = 0;
112 if (blen > 0) {
113 asm("1: \n\t"
114 "movq (%1), %%rax \n\t"
115 "mulq %7\n\t" /* rdx:rax = d * next digit of b */
116 "addq (%0), %%rax \n\t" /* add next digit of a to rax */
117 "adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
118 "addq %3, %%rax \n\t" /* add out to rax */
119 "adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
120 "movq %%rax, (%0) \n\t" /* rax is next digit of result */
121 "movq %%rdx, %3 \n\t" /* rdx is next out */
122 "leaq 8(%0), %0 \n\t"
123 "leaq 8(%1), %1 \n\t"
124 "decq %2 \n\t"
125 "jnz 1b"
126 : "=&r" (a), "=&r" (b), "=&r" (blen), "=&r" (out)
127 : "0" (a), "1" (b), "2" (blen), "rm" (d), "3" (out)
128 : "rax", "rdx");
129 }
130 if (alen == 0) return out;
131 /* current digit of a += out */
132 BngAdd2(*a, carry, *a, out);
133 a++;
134 alen--;
135 /* Propagate carry */
136 if (carry == 0 || alen == 0) return carry;
137 do {
138 if (++(*a) != 0) return 0;
139 a++;
140 } while (--alen);
141 return 1;
142 }
143
bng_amd64_mult_sub_digit(bng a,bngsize alen,bng b,bngsize blen,bngdigit d)144 static bngdigit bng_amd64_mult_sub_digit
145 (bng a/*[alen]*/, bngsize alen,
146 bng b/*[blen]*/, bngsize blen,
147 bngdigit d)
148 {
149 bngdigit out, tmp;
150 bngcarry carry;
151
152 alen -= blen;
153 out = 0;
154 if (blen > 0) {
155 asm("1: \n\t"
156 "movq (%1), %%rax \n\t"
157 "movq (%0), %4 \n\t"
158 "mulq %8\n\t" /* rdx:rax = d * next digit of b */
159 "subq %%rax, %4 \n\t" /* subtract rax from next digit of a */
160 "adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
161 "subq %3, %4 \n\t" /* subtract out */
162 "adcq $0, %%rdx \n\t" /* accumulate carry in rdx */
163 "movq %4, (%0) \n\t" /* store next digit of result */
164 "movq %%rdx, %3 \n\t" /* rdx is next out */
165 "leaq 8(%0), %0 \n\t"
166 "leaq 8(%1), %1 \n\t"
167 "decq %2 \n\t"
168 "jnz 1b"
169 : "=&r" (a), "=&r" (b), "=&rm" (blen), "=&r" (out), "=&r" (tmp)
170 : "0" (a), "1" (b), "2" (blen), "rm" (d), "3" (out)
171 : "rax", "rdx");
172 }
173 if (alen == 0) return out;
174 /* current digit of a -= out */
175 BngSub2(*a, carry, *a, out);
176 a++;
177 alen--;
178 /* Propagate carry */
179 if (carry == 0 || alen == 0) return carry;
180 do {
181 if ((*a)-- != 0) return 0;
182 a++;
183 } while (--alen);
184 return 1;
185 }
186
bng_amd64_setup_ops(void)187 static void bng_amd64_setup_ops(void)
188 {
189 bng_ops.add = bng_amd64_add;
190 bng_ops.sub = bng_amd64_sub;
191 bng_ops.mult_add_digit = bng_amd64_mult_add_digit;
192 bng_ops.mult_sub_digit = bng_amd64_mult_sub_digit;
193 }
194
195 #define BNG_SETUP_OPS bng_amd64_setup_ops()
196