1 /*
2     Copyright (C) 2014 Fredrik Johansson
3 
4     This file is part of Arb.
5 
6     Arb is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #include "arf.h"
13 
arf_complex_mul_fallback(arf_t e,arf_t f,const arf_t a,const arf_t b,const arf_t c,const arf_t d,slong prec,arf_rnd_t rnd)14 int arf_complex_mul_fallback(arf_t e, arf_t f,
15         const arf_t a, const arf_t b,
16         const arf_t c, const arf_t d,
17         slong prec, arf_rnd_t rnd)
18 {
19     int inex1, inex2;
20 
21     /* here the operations are ordered to allow aliasing */
22     if (arf_is_zero(d))
23     {
24         /* (a + bi) * c */
25         inex2 = arf_mul(f, b, c, prec, rnd);
26         inex1 = arf_mul(e, a, c, prec, rnd);
27     }
28     else if (arf_is_zero(b))
29     {
30         /* a * (c + di) */
31         inex2 = arf_mul(f, a, d, prec, rnd);
32         inex1 = arf_mul(e, a, c, prec, rnd);
33     }
34     else if (arf_is_zero(c))
35     {
36         /* (a + bi) * di = -bd + adi */
37         inex2 = arf_mul(e, a, d, prec, rnd);
38         inex1 = arf_neg_mul(f, b, d, prec, rnd);
39         arf_swap(e, f);
40     }
41     else if (arf_is_zero(a))
42     {
43         /* bi * (c + di) = -bd + bci */
44         inex2 = arf_mul(e, b, c, prec, rnd);
45         inex1 = arf_neg_mul(f, b, d, prec, rnd);
46         arf_swap(e, f);
47     }
48     else
49     {
50         arf_t t, u, v, w;
51 
52         arf_init(t);
53         arf_init(u);
54         arf_init(v);
55         arf_init(w);
56 
57         arf_mul(t, a, c, ARF_PREC_EXACT, ARF_RND_DOWN);
58         arf_mul(u, b, d, ARF_PREC_EXACT, ARF_RND_DOWN);
59 
60         arf_mul(v, b, c, ARF_PREC_EXACT, ARF_RND_DOWN);
61         arf_mul(w, a, d, ARF_PREC_EXACT, ARF_RND_DOWN);
62 
63         inex1 = arf_sub(e, t, u, prec, rnd);
64         inex2 = arf_add(f, v, w, prec, rnd);
65 
66         arf_clear(t);
67         arf_clear(u);
68         arf_clear(v);
69         arf_clear(w);
70     }
71 
72     return inex1 | (inex2 << 1);
73 }
74 
75 
arf_complex_mul(arf_t e,arf_t f,const arf_t a,const arf_t b,const arf_t c,const arf_t d,slong prec,arf_rnd_t rnd)76 int arf_complex_mul(arf_t e, arf_t f, const arf_t a, const arf_t b,
77                                       const arf_t c, const arf_t d,
78                                       slong prec, arf_rnd_t rnd)
79 {
80     mp_srcptr ap, bp, cp, dp;
81     int asgn, bsgn, csgn, dsgn, inex1, inex2;
82     mp_ptr tmp, acp, bdp, adp, bcp;
83     mp_size_t an, bn, cn, dn, acn, bdn, adn, bcn, alloc;
84     slong shift;
85     slong aexp, bexp, cexp, dexp;
86     fmpz texp, uexp;
87 
88     if (!ARF_IS_LAGOM(a) || !ARF_IS_LAGOM(b) ||
89         !ARF_IS_LAGOM(c) || !ARF_IS_LAGOM(d) ||
90         ARF_IS_SPECIAL(d) || ARF_IS_SPECIAL(a) ||
91         ARF_IS_SPECIAL(b) || ARF_IS_SPECIAL(c))
92     {
93         return arf_complex_mul_fallback(e, f, a, b, c, d, prec, rnd);
94     }
95 
96     ARF_GET_MPN_READONLY(ap, an, a);
97     asgn = ARF_SGNBIT(a);
98     aexp = ARF_EXP(a);
99 
100     ARF_GET_MPN_READONLY(bp, bn, b);
101     bsgn = ARF_SGNBIT(b);
102     bexp = ARF_EXP(b);
103 
104     ARF_GET_MPN_READONLY(cp, cn, c);
105     csgn = ARF_SGNBIT(c);
106     cexp = ARF_EXP(c);
107 
108     ARF_GET_MPN_READONLY(dp, dn, d);
109     dsgn = ARF_SGNBIT(d);
110     dexp = ARF_EXP(d);
111 
112     /* Gauss multiplication
113         e = ac - bd
114         f = (a+b)(c+d) - ac - bd */
115     if (an >= 20 &&
116         cn >= 20 &&
117         FLINT_ABS(an - bn) <= 2 &&
118         FLINT_ABS(cn - dn) <= 2 &&
119         FLINT_ABS(aexp - bexp) <= 64 &&
120         FLINT_ABS(cexp - dexp) <= 64)
121     {
122         fmpz_t za, zb, zc, zd, t, u, v;
123         slong abot, bbot, cbot, dbot;
124 
125         abot = aexp - an * FLINT_BITS;
126         bbot = bexp - bn * FLINT_BITS;
127         cbot = cexp - cn * FLINT_BITS;
128         dbot = dexp - dn * FLINT_BITS;
129 
130         texp = FLINT_MIN(abot, bbot);
131         uexp = FLINT_MIN(cbot, dbot);
132 
133         fmpz_init(za);
134         fmpz_init(zb);
135         fmpz_init(zc);
136         fmpz_init(zd);
137         fmpz_init(t);
138         fmpz_init(u);
139         fmpz_init(v);
140 
141         /* todo: could avoid two copies */
142         fmpz_lshift_mpn(za, ap, an, asgn, abot - texp);
143         fmpz_lshift_mpn(zb, bp, bn, bsgn, bbot - texp);
144         fmpz_lshift_mpn(zc, cp, cn, csgn, cbot - uexp);
145         fmpz_lshift_mpn(zd, dp, dn, dsgn, dbot - uexp);
146 
147         fmpz_add(t, za, zb);
148         fmpz_add(v, zc, zd);
149         fmpz_mul(u, t, v);
150         fmpz_mul(t, za, zc);
151         fmpz_mul(v, zb, zd);
152         fmpz_sub(u, u, t);
153         fmpz_sub(u, u, v);
154         fmpz_sub(t, t, v);
155 
156         texp += uexp;
157         inex1 = arf_set_round_fmpz_2exp(e, t, &texp, prec, rnd);
158         inex2 = arf_set_round_fmpz_2exp(f, u, &texp, prec, rnd);
159 
160         fmpz_clear(za);
161         fmpz_clear(zb);
162         fmpz_clear(zc);
163         fmpz_clear(zd);
164         fmpz_clear(t);
165         fmpz_clear(u);
166         fmpz_clear(v);
167     }
168     else
169     {
170         ARF_MUL_TMP_DECL
171 
172         acn = an + cn;
173         bdn = bn + dn;
174         adn = an + dn;
175         bcn = bn + cn;
176 
177         alloc = acn + bdn + adn + bcn;
178 
179         ARF_MUL_TMP_ALLOC(tmp, alloc)
180         acp = tmp;
181         bdp = acp + acn;
182         adp = bdp + bdn;
183         bcp = adp + adn;
184 
185         ARF_MPN_MUL(acp, ap, an, cp, cn)
186         acn -= (acp[0] == 0);
187         acp += (acp[0] == 0);
188 
189         ARF_MPN_MUL(bdp, bp, bn, dp, dn)
190         bdn -= (bdp[0] == 0);
191         bdp += (bdp[0] == 0);
192 
193         ARF_MPN_MUL(adp, ap, an, dp, dn)
194         adn -= (adp[0] == 0);
195         adp += (adp[0] == 0);
196 
197         ARF_MPN_MUL(bcp, bp, bn, cp, cn)
198         bcn -= (bcp[0] == 0);
199         bcp += (bcp[0] == 0);
200 
201         texp = aexp + cexp;
202         uexp = bexp + dexp;
203         shift = texp - uexp;
204 
205         if (shift >= 0)
206             inex1 = _arf_add_mpn(e, acp, acn, asgn ^ csgn, &texp,
207                                     bdp, bdn, bsgn ^ dsgn ^ 1, shift, prec, rnd);
208         else
209             inex1 = _arf_add_mpn(e, bdp, bdn, bsgn ^ dsgn ^ 1, &uexp,
210                                     acp, acn, asgn ^ csgn, -shift, prec, rnd);
211 
212         texp = aexp + dexp;
213         uexp = bexp + cexp;
214         shift = texp - uexp;
215 
216         if (shift >= 0)
217             inex2 = _arf_add_mpn(f, adp, adn, asgn ^ dsgn, &texp,
218                                     bcp, bcn, bsgn ^ csgn, shift, prec, rnd);
219         else
220             inex2 = _arf_add_mpn(f, bcp, bcn, bsgn ^ csgn, &uexp,
221                                     adp, adn, asgn ^ dsgn, -shift, prec, rnd);
222 
223         ARF_MUL_TMP_FREE(tmp, alloc)
224     }
225 
226     return inex1 | (inex2 << 1);
227 }
228 
arf_complex_sqr(arf_t e,arf_t f,const arf_t a,const arf_t b,slong prec,arf_rnd_t rnd)229 int arf_complex_sqr(arf_t e, arf_t f,
230     const arf_t a, const arf_t b, slong prec, arf_rnd_t rnd)
231 {
232     if (!ARF_IS_LAGOM(a) || !ARF_IS_LAGOM(b) ||
233         ARF_IS_SPECIAL(a) || ARF_IS_SPECIAL(b))
234     {
235         return arf_complex_mul_fallback(e, f, a, b, a, b, prec, rnd);
236     }
237     else
238     {
239         mp_srcptr ap, bp;
240         int inex1, inex2;
241         mp_ptr tmp, aap, bbp;
242         mp_size_t an, bn, aan, bbn, alloc;
243         slong shift;
244         slong aexp, bexp;
245         fmpz texp, uexp;
246         TMP_INIT;
247 
248         ARF_GET_MPN_READONLY(ap, an, a);
249         aexp = ARF_EXP(a);
250 
251         ARF_GET_MPN_READONLY(bp, bn, b);
252         bexp = ARF_EXP(b);
253 
254         aan = 2 * an;
255         bbn = 2 * bn;
256 
257         alloc = aan + bbn;
258 
259         TMP_START;
260 
261         tmp = TMP_ALLOC(alloc * sizeof(mp_limb_t));
262         aap = tmp;
263         bbp = tmp + aan;
264 
265         ARF_MPN_MUL(aap, ap, an, ap, an)
266         aan -= (aap[0] == 0);
267         aap += (aap[0] == 0);
268 
269         ARF_MPN_MUL(bbp, bp, bn, bp, bn)
270         bbn -= (bbp[0] == 0);
271         bbp += (bbp[0] == 0);
272 
273         texp = aexp + aexp;
274         uexp = bexp + bexp;
275         shift = texp - uexp;
276 
277         inex2 = arf_mul(f, a, b, prec, rnd);
278         ARF_EXP(f) += 1;
279 
280         if (shift >= 0)
281             inex1 = _arf_add_mpn(e, aap, aan, 0, &texp,
282                                     bbp, bbn, 1, shift, prec, rnd);
283         else
284             inex1 = _arf_add_mpn(e, bbp, bbn, 1, &uexp,
285                                     aap, aan, 0, -shift, prec, rnd);
286 
287         TMP_END;
288 
289         return inex1 | (inex2 << 1);
290     }
291 }
292 
293