1 /*
2 Copyright (C) 2014 Fredrik Johansson
3
4 This file is part of Arb.
5
6 Arb is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #include "arf.h"
13
arf_complex_mul_fallback(arf_t e,arf_t f,const arf_t a,const arf_t b,const arf_t c,const arf_t d,slong prec,arf_rnd_t rnd)14 int arf_complex_mul_fallback(arf_t e, arf_t f,
15 const arf_t a, const arf_t b,
16 const arf_t c, const arf_t d,
17 slong prec, arf_rnd_t rnd)
18 {
19 int inex1, inex2;
20
21 /* here the operations are ordered to allow aliasing */
22 if (arf_is_zero(d))
23 {
24 /* (a + bi) * c */
25 inex2 = arf_mul(f, b, c, prec, rnd);
26 inex1 = arf_mul(e, a, c, prec, rnd);
27 }
28 else if (arf_is_zero(b))
29 {
30 /* a * (c + di) */
31 inex2 = arf_mul(f, a, d, prec, rnd);
32 inex1 = arf_mul(e, a, c, prec, rnd);
33 }
34 else if (arf_is_zero(c))
35 {
36 /* (a + bi) * di = -bd + adi */
37 inex2 = arf_mul(e, a, d, prec, rnd);
38 inex1 = arf_neg_mul(f, b, d, prec, rnd);
39 arf_swap(e, f);
40 }
41 else if (arf_is_zero(a))
42 {
43 /* bi * (c + di) = -bd + bci */
44 inex2 = arf_mul(e, b, c, prec, rnd);
45 inex1 = arf_neg_mul(f, b, d, prec, rnd);
46 arf_swap(e, f);
47 }
48 else
49 {
50 arf_t t, u, v, w;
51
52 arf_init(t);
53 arf_init(u);
54 arf_init(v);
55 arf_init(w);
56
57 arf_mul(t, a, c, ARF_PREC_EXACT, ARF_RND_DOWN);
58 arf_mul(u, b, d, ARF_PREC_EXACT, ARF_RND_DOWN);
59
60 arf_mul(v, b, c, ARF_PREC_EXACT, ARF_RND_DOWN);
61 arf_mul(w, a, d, ARF_PREC_EXACT, ARF_RND_DOWN);
62
63 inex1 = arf_sub(e, t, u, prec, rnd);
64 inex2 = arf_add(f, v, w, prec, rnd);
65
66 arf_clear(t);
67 arf_clear(u);
68 arf_clear(v);
69 arf_clear(w);
70 }
71
72 return inex1 | (inex2 << 1);
73 }
74
75
arf_complex_mul(arf_t e,arf_t f,const arf_t a,const arf_t b,const arf_t c,const arf_t d,slong prec,arf_rnd_t rnd)76 int arf_complex_mul(arf_t e, arf_t f, const arf_t a, const arf_t b,
77 const arf_t c, const arf_t d,
78 slong prec, arf_rnd_t rnd)
79 {
80 mp_srcptr ap, bp, cp, dp;
81 int asgn, bsgn, csgn, dsgn, inex1, inex2;
82 mp_ptr tmp, acp, bdp, adp, bcp;
83 mp_size_t an, bn, cn, dn, acn, bdn, adn, bcn, alloc;
84 slong shift;
85 slong aexp, bexp, cexp, dexp;
86 fmpz texp, uexp;
87
88 if (!ARF_IS_LAGOM(a) || !ARF_IS_LAGOM(b) ||
89 !ARF_IS_LAGOM(c) || !ARF_IS_LAGOM(d) ||
90 ARF_IS_SPECIAL(d) || ARF_IS_SPECIAL(a) ||
91 ARF_IS_SPECIAL(b) || ARF_IS_SPECIAL(c))
92 {
93 return arf_complex_mul_fallback(e, f, a, b, c, d, prec, rnd);
94 }
95
96 ARF_GET_MPN_READONLY(ap, an, a);
97 asgn = ARF_SGNBIT(a);
98 aexp = ARF_EXP(a);
99
100 ARF_GET_MPN_READONLY(bp, bn, b);
101 bsgn = ARF_SGNBIT(b);
102 bexp = ARF_EXP(b);
103
104 ARF_GET_MPN_READONLY(cp, cn, c);
105 csgn = ARF_SGNBIT(c);
106 cexp = ARF_EXP(c);
107
108 ARF_GET_MPN_READONLY(dp, dn, d);
109 dsgn = ARF_SGNBIT(d);
110 dexp = ARF_EXP(d);
111
112 /* Gauss multiplication
113 e = ac - bd
114 f = (a+b)(c+d) - ac - bd */
115 if (an >= 20 &&
116 cn >= 20 &&
117 FLINT_ABS(an - bn) <= 2 &&
118 FLINT_ABS(cn - dn) <= 2 &&
119 FLINT_ABS(aexp - bexp) <= 64 &&
120 FLINT_ABS(cexp - dexp) <= 64)
121 {
122 fmpz_t za, zb, zc, zd, t, u, v;
123 slong abot, bbot, cbot, dbot;
124
125 abot = aexp - an * FLINT_BITS;
126 bbot = bexp - bn * FLINT_BITS;
127 cbot = cexp - cn * FLINT_BITS;
128 dbot = dexp - dn * FLINT_BITS;
129
130 texp = FLINT_MIN(abot, bbot);
131 uexp = FLINT_MIN(cbot, dbot);
132
133 fmpz_init(za);
134 fmpz_init(zb);
135 fmpz_init(zc);
136 fmpz_init(zd);
137 fmpz_init(t);
138 fmpz_init(u);
139 fmpz_init(v);
140
141 /* todo: could avoid two copies */
142 fmpz_lshift_mpn(za, ap, an, asgn, abot - texp);
143 fmpz_lshift_mpn(zb, bp, bn, bsgn, bbot - texp);
144 fmpz_lshift_mpn(zc, cp, cn, csgn, cbot - uexp);
145 fmpz_lshift_mpn(zd, dp, dn, dsgn, dbot - uexp);
146
147 fmpz_add(t, za, zb);
148 fmpz_add(v, zc, zd);
149 fmpz_mul(u, t, v);
150 fmpz_mul(t, za, zc);
151 fmpz_mul(v, zb, zd);
152 fmpz_sub(u, u, t);
153 fmpz_sub(u, u, v);
154 fmpz_sub(t, t, v);
155
156 texp += uexp;
157 inex1 = arf_set_round_fmpz_2exp(e, t, &texp, prec, rnd);
158 inex2 = arf_set_round_fmpz_2exp(f, u, &texp, prec, rnd);
159
160 fmpz_clear(za);
161 fmpz_clear(zb);
162 fmpz_clear(zc);
163 fmpz_clear(zd);
164 fmpz_clear(t);
165 fmpz_clear(u);
166 fmpz_clear(v);
167 }
168 else
169 {
170 ARF_MUL_TMP_DECL
171
172 acn = an + cn;
173 bdn = bn + dn;
174 adn = an + dn;
175 bcn = bn + cn;
176
177 alloc = acn + bdn + adn + bcn;
178
179 ARF_MUL_TMP_ALLOC(tmp, alloc)
180 acp = tmp;
181 bdp = acp + acn;
182 adp = bdp + bdn;
183 bcp = adp + adn;
184
185 ARF_MPN_MUL(acp, ap, an, cp, cn)
186 acn -= (acp[0] == 0);
187 acp += (acp[0] == 0);
188
189 ARF_MPN_MUL(bdp, bp, bn, dp, dn)
190 bdn -= (bdp[0] == 0);
191 bdp += (bdp[0] == 0);
192
193 ARF_MPN_MUL(adp, ap, an, dp, dn)
194 adn -= (adp[0] == 0);
195 adp += (adp[0] == 0);
196
197 ARF_MPN_MUL(bcp, bp, bn, cp, cn)
198 bcn -= (bcp[0] == 0);
199 bcp += (bcp[0] == 0);
200
201 texp = aexp + cexp;
202 uexp = bexp + dexp;
203 shift = texp - uexp;
204
205 if (shift >= 0)
206 inex1 = _arf_add_mpn(e, acp, acn, asgn ^ csgn, &texp,
207 bdp, bdn, bsgn ^ dsgn ^ 1, shift, prec, rnd);
208 else
209 inex1 = _arf_add_mpn(e, bdp, bdn, bsgn ^ dsgn ^ 1, &uexp,
210 acp, acn, asgn ^ csgn, -shift, prec, rnd);
211
212 texp = aexp + dexp;
213 uexp = bexp + cexp;
214 shift = texp - uexp;
215
216 if (shift >= 0)
217 inex2 = _arf_add_mpn(f, adp, adn, asgn ^ dsgn, &texp,
218 bcp, bcn, bsgn ^ csgn, shift, prec, rnd);
219 else
220 inex2 = _arf_add_mpn(f, bcp, bcn, bsgn ^ csgn, &uexp,
221 adp, adn, asgn ^ dsgn, -shift, prec, rnd);
222
223 ARF_MUL_TMP_FREE(tmp, alloc)
224 }
225
226 return inex1 | (inex2 << 1);
227 }
228
arf_complex_sqr(arf_t e,arf_t f,const arf_t a,const arf_t b,slong prec,arf_rnd_t rnd)229 int arf_complex_sqr(arf_t e, arf_t f,
230 const arf_t a, const arf_t b, slong prec, arf_rnd_t rnd)
231 {
232 if (!ARF_IS_LAGOM(a) || !ARF_IS_LAGOM(b) ||
233 ARF_IS_SPECIAL(a) || ARF_IS_SPECIAL(b))
234 {
235 return arf_complex_mul_fallback(e, f, a, b, a, b, prec, rnd);
236 }
237 else
238 {
239 mp_srcptr ap, bp;
240 int inex1, inex2;
241 mp_ptr tmp, aap, bbp;
242 mp_size_t an, bn, aan, bbn, alloc;
243 slong shift;
244 slong aexp, bexp;
245 fmpz texp, uexp;
246 TMP_INIT;
247
248 ARF_GET_MPN_READONLY(ap, an, a);
249 aexp = ARF_EXP(a);
250
251 ARF_GET_MPN_READONLY(bp, bn, b);
252 bexp = ARF_EXP(b);
253
254 aan = 2 * an;
255 bbn = 2 * bn;
256
257 alloc = aan + bbn;
258
259 TMP_START;
260
261 tmp = TMP_ALLOC(alloc * sizeof(mp_limb_t));
262 aap = tmp;
263 bbp = tmp + aan;
264
265 ARF_MPN_MUL(aap, ap, an, ap, an)
266 aan -= (aap[0] == 0);
267 aap += (aap[0] == 0);
268
269 ARF_MPN_MUL(bbp, bp, bn, bp, bn)
270 bbn -= (bbp[0] == 0);
271 bbp += (bbp[0] == 0);
272
273 texp = aexp + aexp;
274 uexp = bexp + bexp;
275 shift = texp - uexp;
276
277 inex2 = arf_mul(f, a, b, prec, rnd);
278 ARF_EXP(f) += 1;
279
280 if (shift >= 0)
281 inex1 = _arf_add_mpn(e, aap, aan, 0, &texp,
282 bbp, bbn, 1, shift, prec, rnd);
283 else
284 inex1 = _arf_add_mpn(e, bbp, bbn, 1, &uexp,
285 aap, aan, 0, -shift, prec, rnd);
286
287 TMP_END;
288
289 return inex1 | (inex2 << 1);
290 }
291 }
292
293