xref: /dragonfly/contrib/gmp/mpz/aorsmul_i.c (revision 86d7f5d3)
1*86d7f5d3SJohn Marino /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
2*86d7f5d3SJohn Marino 
3*86d7f5d3SJohn Marino    THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
4*86d7f5d3SJohn Marino    ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5*86d7f5d3SJohn Marino    COMPLETELY IN FUTURE GNU MP RELEASES.
6*86d7f5d3SJohn Marino 
7*86d7f5d3SJohn Marino Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
8*86d7f5d3SJohn Marino 
9*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
10*86d7f5d3SJohn Marino 
11*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
12*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
13*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
14*86d7f5d3SJohn Marino option) any later version.
15*86d7f5d3SJohn Marino 
16*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
17*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19*86d7f5d3SJohn Marino License for more details.
20*86d7f5d3SJohn Marino 
21*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
22*86d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23*86d7f5d3SJohn Marino 
24*86d7f5d3SJohn Marino #include "gmp.h"
25*86d7f5d3SJohn Marino #include "gmp-impl.h"
26*86d7f5d3SJohn Marino 
27*86d7f5d3SJohn Marino 
28*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
29*86d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
30*86d7f5d3SJohn Marino   do {                                                  \
31*86d7f5d3SJohn Marino     (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
32*86d7f5d3SJohn Marino   } while (0)
33*86d7f5d3SJohn Marino #else
34*86d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
35*86d7f5d3SJohn Marino   do {                                                  \
36*86d7f5d3SJohn Marino     mp_limb_t __cy;                                     \
37*86d7f5d3SJohn Marino     __cy = mpn_mul_1 (dst, src, size, n);               \
38*86d7f5d3SJohn Marino     (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
39*86d7f5d3SJohn Marino   } while (0)
40*86d7f5d3SJohn Marino #endif
41*86d7f5d3SJohn Marino 
42*86d7f5d3SJohn Marino 
43*86d7f5d3SJohn Marino /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
44*86d7f5d3SJohn Marino 
45*86d7f5d3SJohn Marino    All that's needed to account for negative w or x is to flip "sub".
46*86d7f5d3SJohn Marino 
47*86d7f5d3SJohn Marino    The final w will retain its sign, unless an underflow occurs in a submul
48*86d7f5d3SJohn Marino    of absolute values, in which case it's flipped.
49*86d7f5d3SJohn Marino 
50*86d7f5d3SJohn Marino    If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
51*86d7f5d3SJohn Marino    used.  The alternative would be mpn_mul_1 into temporary space followed
52*86d7f5d3SJohn Marino    by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
53*86d7f5d3SJohn Marino    a chance of being faster since it involves only one set of carry
54*86d7f5d3SJohn Marino    propagations, not two.  Note that doing an addmul_1 with a
55*86d7f5d3SJohn Marino    twos-complement negative y doesn't work, because it effectively adds an
56*86d7f5d3SJohn Marino    extra x * 2^GMP_LIMB_BITS.  */
57*86d7f5d3SJohn Marino 
58*86d7f5d3SJohn Marino REGPARM_ATTR(1) void
mpz_aorsmul_1(mpz_ptr w,mpz_srcptr x,mp_limb_t y,mp_size_t sub)59*86d7f5d3SJohn Marino mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
60*86d7f5d3SJohn Marino {
61*86d7f5d3SJohn Marino   mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
62*86d7f5d3SJohn Marino   mp_srcptr  xp;
63*86d7f5d3SJohn Marino   mp_ptr     wp;
64*86d7f5d3SJohn Marino   mp_limb_t  cy;
65*86d7f5d3SJohn Marino 
66*86d7f5d3SJohn Marino   /* w unaffected if x==0 or y==0 */
67*86d7f5d3SJohn Marino   xsize = SIZ (x);
68*86d7f5d3SJohn Marino   if (xsize == 0 || y == 0)
69*86d7f5d3SJohn Marino     return;
70*86d7f5d3SJohn Marino 
71*86d7f5d3SJohn Marino   sub ^= xsize;
72*86d7f5d3SJohn Marino   xsize = ABS (xsize);
73*86d7f5d3SJohn Marino 
74*86d7f5d3SJohn Marino   wsize_signed = SIZ (w);
75*86d7f5d3SJohn Marino   if (wsize_signed == 0)
76*86d7f5d3SJohn Marino     {
77*86d7f5d3SJohn Marino       /* nothing to add to, just set x*y, "sub" gives the sign */
78*86d7f5d3SJohn Marino       MPZ_REALLOC (w, xsize+1);
79*86d7f5d3SJohn Marino       wp = PTR (w);
80*86d7f5d3SJohn Marino       cy = mpn_mul_1 (wp, PTR(x), xsize, y);
81*86d7f5d3SJohn Marino       wp[xsize] = cy;
82*86d7f5d3SJohn Marino       xsize += (cy != 0);
83*86d7f5d3SJohn Marino       SIZ (w) = (sub >= 0 ? xsize : -xsize);
84*86d7f5d3SJohn Marino       return;
85*86d7f5d3SJohn Marino     }
86*86d7f5d3SJohn Marino 
87*86d7f5d3SJohn Marino   sub ^= wsize_signed;
88*86d7f5d3SJohn Marino   wsize = ABS (wsize_signed);
89*86d7f5d3SJohn Marino 
90*86d7f5d3SJohn Marino   new_wsize = MAX (wsize, xsize);
91*86d7f5d3SJohn Marino   MPZ_REALLOC (w, new_wsize+1);
92*86d7f5d3SJohn Marino   wp = PTR (w);
93*86d7f5d3SJohn Marino   xp = PTR (x);
94*86d7f5d3SJohn Marino   min_size = MIN (wsize, xsize);
95*86d7f5d3SJohn Marino 
96*86d7f5d3SJohn Marino   if (sub >= 0)
97*86d7f5d3SJohn Marino     {
98*86d7f5d3SJohn Marino       /* addmul of absolute values */
99*86d7f5d3SJohn Marino 
100*86d7f5d3SJohn Marino       cy = mpn_addmul_1 (wp, xp, min_size, y);
101*86d7f5d3SJohn Marino       wp += min_size;
102*86d7f5d3SJohn Marino       xp += min_size;
103*86d7f5d3SJohn Marino 
104*86d7f5d3SJohn Marino       dsize = xsize - wsize;
105*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
106*86d7f5d3SJohn Marino       if (dsize > 0)
107*86d7f5d3SJohn Marino         cy = mpn_mul_1c (wp, xp, dsize, y, cy);
108*86d7f5d3SJohn Marino       else if (dsize < 0)
109*86d7f5d3SJohn Marino         {
110*86d7f5d3SJohn Marino           dsize = -dsize;
111*86d7f5d3SJohn Marino           cy = mpn_add_1 (wp, wp, dsize, cy);
112*86d7f5d3SJohn Marino         }
113*86d7f5d3SJohn Marino #else
114*86d7f5d3SJohn Marino       if (dsize != 0)
115*86d7f5d3SJohn Marino         {
116*86d7f5d3SJohn Marino           mp_limb_t  cy2;
117*86d7f5d3SJohn Marino           if (dsize > 0)
118*86d7f5d3SJohn Marino             cy2 = mpn_mul_1 (wp, xp, dsize, y);
119*86d7f5d3SJohn Marino           else
120*86d7f5d3SJohn Marino             {
121*86d7f5d3SJohn Marino               dsize = -dsize;
122*86d7f5d3SJohn Marino               cy2 = 0;
123*86d7f5d3SJohn Marino             }
124*86d7f5d3SJohn Marino           cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
125*86d7f5d3SJohn Marino         }
126*86d7f5d3SJohn Marino #endif
127*86d7f5d3SJohn Marino 
128*86d7f5d3SJohn Marino       wp[dsize] = cy;
129*86d7f5d3SJohn Marino       new_wsize += (cy != 0);
130*86d7f5d3SJohn Marino     }
131*86d7f5d3SJohn Marino   else
132*86d7f5d3SJohn Marino     {
133*86d7f5d3SJohn Marino       /* submul of absolute values */
134*86d7f5d3SJohn Marino 
135*86d7f5d3SJohn Marino       cy = mpn_submul_1 (wp, xp, min_size, y);
136*86d7f5d3SJohn Marino       if (wsize >= xsize)
137*86d7f5d3SJohn Marino         {
138*86d7f5d3SJohn Marino           /* if w bigger than x, then propagate borrow through it */
139*86d7f5d3SJohn Marino           if (wsize != xsize)
140*86d7f5d3SJohn Marino             cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
141*86d7f5d3SJohn Marino 
142*86d7f5d3SJohn Marino           if (cy != 0)
143*86d7f5d3SJohn Marino             {
144*86d7f5d3SJohn Marino               /* Borrow out of w, take twos complement negative to get
145*86d7f5d3SJohn Marino                  absolute value, flip sign of w.  */
146*86d7f5d3SJohn Marino               wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
147*86d7f5d3SJohn Marino               mpn_com (wp, wp, new_wsize);
148*86d7f5d3SJohn Marino               new_wsize++;
149*86d7f5d3SJohn Marino               MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
150*86d7f5d3SJohn Marino               wsize_signed = -wsize_signed;
151*86d7f5d3SJohn Marino             }
152*86d7f5d3SJohn Marino         }
153*86d7f5d3SJohn Marino       else /* wsize < xsize */
154*86d7f5d3SJohn Marino         {
155*86d7f5d3SJohn Marino           /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
156*86d7f5d3SJohn Marino              take twos complement and use an mpn_mul_1 for the rest.  */
157*86d7f5d3SJohn Marino 
158*86d7f5d3SJohn Marino           mp_limb_t  cy2;
159*86d7f5d3SJohn Marino 
160*86d7f5d3SJohn Marino           /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
161*86d7f5d3SJohn Marino           mpn_com (wp, wp, wsize);
162*86d7f5d3SJohn Marino           cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
163*86d7f5d3SJohn Marino           cy -= 1;
164*86d7f5d3SJohn Marino 
165*86d7f5d3SJohn Marino           /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
166*86d7f5d3SJohn Marino              returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
167*86d7f5d3SJohn Marino           cy2 = (cy == MP_LIMB_T_MAX);
168*86d7f5d3SJohn Marino           cy += cy2;
169*86d7f5d3SJohn Marino           MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
170*86d7f5d3SJohn Marino           wp[new_wsize] = cy;
171*86d7f5d3SJohn Marino           new_wsize += (cy != 0);
172*86d7f5d3SJohn Marino 
173*86d7f5d3SJohn Marino           /* Apply any -1 from above.  The value at wp+wsize is non-zero
174*86d7f5d3SJohn Marino              because y!=0 and the high limb of x will be non-zero.  */
175*86d7f5d3SJohn Marino           if (cy2)
176*86d7f5d3SJohn Marino             MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
177*86d7f5d3SJohn Marino 
178*86d7f5d3SJohn Marino           wsize_signed = -wsize_signed;
179*86d7f5d3SJohn Marino         }
180*86d7f5d3SJohn Marino 
181*86d7f5d3SJohn Marino       /* submul can produce high zero limbs due to cancellation, both when w
182*86d7f5d3SJohn Marino          has more limbs or x has more  */
183*86d7f5d3SJohn Marino       MPN_NORMALIZE (wp, new_wsize);
184*86d7f5d3SJohn Marino     }
185*86d7f5d3SJohn Marino 
186*86d7f5d3SJohn Marino   SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
187*86d7f5d3SJohn Marino 
188*86d7f5d3SJohn Marino   ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
189*86d7f5d3SJohn Marino }
190*86d7f5d3SJohn Marino 
191*86d7f5d3SJohn Marino 
192*86d7f5d3SJohn Marino void
mpz_addmul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)193*86d7f5d3SJohn Marino mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
194*86d7f5d3SJohn Marino {
195*86d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
196*86d7f5d3SJohn Marino   if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))
197*86d7f5d3SJohn Marino     {
198*86d7f5d3SJohn Marino       mpz_t t;
199*86d7f5d3SJohn Marino       mp_ptr tp;
200*86d7f5d3SJohn Marino       mp_size_t xn;
201*86d7f5d3SJohn Marino       TMP_DECL;
202*86d7f5d3SJohn Marino       TMP_MARK;
203*86d7f5d3SJohn Marino       xn = SIZ (x);
204*86d7f5d3SJohn Marino       MPZ_TMP_INIT (t, ABS (xn) + 1);
205*86d7f5d3SJohn Marino       tp = PTR (t);
206*86d7f5d3SJohn Marino       tp[0] = 0;
207*86d7f5d3SJohn Marino       MPN_COPY (tp + 1, PTR(x), ABS (xn));
208*86d7f5d3SJohn Marino       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
209*86d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
210*86d7f5d3SJohn Marino       PTR(t) = tp + 1;
211*86d7f5d3SJohn Marino       SIZ(t) = xn;
212*86d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
213*86d7f5d3SJohn Marino       TMP_FREE;
214*86d7f5d3SJohn Marino       return;
215*86d7f5d3SJohn Marino     }
216*86d7f5d3SJohn Marino #endif
217*86d7f5d3SJohn Marino   mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
218*86d7f5d3SJohn Marino }
219*86d7f5d3SJohn Marino 
220*86d7f5d3SJohn Marino void
mpz_submul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)221*86d7f5d3SJohn Marino mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
222*86d7f5d3SJohn Marino {
223*86d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
224*86d7f5d3SJohn Marino   if (y > GMP_NUMB_MAX && SIZ(x) != 0)
225*86d7f5d3SJohn Marino     {
226*86d7f5d3SJohn Marino       mpz_t t;
227*86d7f5d3SJohn Marino       mp_ptr tp;
228*86d7f5d3SJohn Marino       mp_size_t xn;
229*86d7f5d3SJohn Marino       TMP_DECL;
230*86d7f5d3SJohn Marino       TMP_MARK;
231*86d7f5d3SJohn Marino       xn = SIZ (x);
232*86d7f5d3SJohn Marino       MPZ_TMP_INIT (t, ABS (xn) + 1);
233*86d7f5d3SJohn Marino       tp = PTR (t);
234*86d7f5d3SJohn Marino       tp[0] = 0;
235*86d7f5d3SJohn Marino       MPN_COPY (tp + 1, PTR(x), ABS (xn));
236*86d7f5d3SJohn Marino       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
237*86d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
238*86d7f5d3SJohn Marino       PTR(t) = tp + 1;
239*86d7f5d3SJohn Marino       SIZ(t) = xn;
240*86d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
241*86d7f5d3SJohn Marino       TMP_FREE;
242*86d7f5d3SJohn Marino       return;
243*86d7f5d3SJohn Marino     }
244*86d7f5d3SJohn Marino #endif
245*86d7f5d3SJohn Marino   mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
246*86d7f5d3SJohn Marino }
247