1*86d7f5d3SJohn Marino /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
2*86d7f5d3SJohn Marino
3*86d7f5d3SJohn Marino THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
4*86d7f5d3SJohn Marino ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5*86d7f5d3SJohn Marino COMPLETELY IN FUTURE GNU MP RELEASES.
6*86d7f5d3SJohn Marino
7*86d7f5d3SJohn Marino Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
8*86d7f5d3SJohn Marino
9*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
10*86d7f5d3SJohn Marino
11*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
12*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
13*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
14*86d7f5d3SJohn Marino option) any later version.
15*86d7f5d3SJohn Marino
16*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
17*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19*86d7f5d3SJohn Marino License for more details.
20*86d7f5d3SJohn Marino
21*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
22*86d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23*86d7f5d3SJohn Marino
24*86d7f5d3SJohn Marino #include "gmp.h"
25*86d7f5d3SJohn Marino #include "gmp-impl.h"
26*86d7f5d3SJohn Marino
27*86d7f5d3SJohn Marino
28*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
29*86d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin) \
30*86d7f5d3SJohn Marino do { \
31*86d7f5d3SJohn Marino (cout) = mpn_mul_1c (dst, src, size, n, cin); \
32*86d7f5d3SJohn Marino } while (0)
33*86d7f5d3SJohn Marino #else
34*86d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin) \
35*86d7f5d3SJohn Marino do { \
36*86d7f5d3SJohn Marino mp_limb_t __cy; \
37*86d7f5d3SJohn Marino __cy = mpn_mul_1 (dst, src, size, n); \
38*86d7f5d3SJohn Marino (cout) = __cy + mpn_add_1 (dst, dst, size, cin); \
39*86d7f5d3SJohn Marino } while (0)
40*86d7f5d3SJohn Marino #endif
41*86d7f5d3SJohn Marino
42*86d7f5d3SJohn Marino
43*86d7f5d3SJohn Marino /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
44*86d7f5d3SJohn Marino
45*86d7f5d3SJohn Marino All that's needed to account for negative w or x is to flip "sub".
46*86d7f5d3SJohn Marino
47*86d7f5d3SJohn Marino The final w will retain its sign, unless an underflow occurs in a submul
48*86d7f5d3SJohn Marino of absolute values, in which case it's flipped.
49*86d7f5d3SJohn Marino
50*86d7f5d3SJohn Marino If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
51*86d7f5d3SJohn Marino used. The alternative would be mpn_mul_1 into temporary space followed
52*86d7f5d3SJohn Marino by mpn_sub_n. Avoiding temporary space seem good, and submul+com stands
53*86d7f5d3SJohn Marino a chance of being faster since it involves only one set of carry
54*86d7f5d3SJohn Marino propagations, not two. Note that doing an addmul_1 with a
55*86d7f5d3SJohn Marino twos-complement negative y doesn't work, because it effectively adds an
56*86d7f5d3SJohn Marino extra x * 2^GMP_LIMB_BITS. */
57*86d7f5d3SJohn Marino
58*86d7f5d3SJohn Marino REGPARM_ATTR(1) void
mpz_aorsmul_1(mpz_ptr w,mpz_srcptr x,mp_limb_t y,mp_size_t sub)59*86d7f5d3SJohn Marino mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
60*86d7f5d3SJohn Marino {
61*86d7f5d3SJohn Marino mp_size_t xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
62*86d7f5d3SJohn Marino mp_srcptr xp;
63*86d7f5d3SJohn Marino mp_ptr wp;
64*86d7f5d3SJohn Marino mp_limb_t cy;
65*86d7f5d3SJohn Marino
66*86d7f5d3SJohn Marino /* w unaffected if x==0 or y==0 */
67*86d7f5d3SJohn Marino xsize = SIZ (x);
68*86d7f5d3SJohn Marino if (xsize == 0 || y == 0)
69*86d7f5d3SJohn Marino return;
70*86d7f5d3SJohn Marino
71*86d7f5d3SJohn Marino sub ^= xsize;
72*86d7f5d3SJohn Marino xsize = ABS (xsize);
73*86d7f5d3SJohn Marino
74*86d7f5d3SJohn Marino wsize_signed = SIZ (w);
75*86d7f5d3SJohn Marino if (wsize_signed == 0)
76*86d7f5d3SJohn Marino {
77*86d7f5d3SJohn Marino /* nothing to add to, just set x*y, "sub" gives the sign */
78*86d7f5d3SJohn Marino MPZ_REALLOC (w, xsize+1);
79*86d7f5d3SJohn Marino wp = PTR (w);
80*86d7f5d3SJohn Marino cy = mpn_mul_1 (wp, PTR(x), xsize, y);
81*86d7f5d3SJohn Marino wp[xsize] = cy;
82*86d7f5d3SJohn Marino xsize += (cy != 0);
83*86d7f5d3SJohn Marino SIZ (w) = (sub >= 0 ? xsize : -xsize);
84*86d7f5d3SJohn Marino return;
85*86d7f5d3SJohn Marino }
86*86d7f5d3SJohn Marino
87*86d7f5d3SJohn Marino sub ^= wsize_signed;
88*86d7f5d3SJohn Marino wsize = ABS (wsize_signed);
89*86d7f5d3SJohn Marino
90*86d7f5d3SJohn Marino new_wsize = MAX (wsize, xsize);
91*86d7f5d3SJohn Marino MPZ_REALLOC (w, new_wsize+1);
92*86d7f5d3SJohn Marino wp = PTR (w);
93*86d7f5d3SJohn Marino xp = PTR (x);
94*86d7f5d3SJohn Marino min_size = MIN (wsize, xsize);
95*86d7f5d3SJohn Marino
96*86d7f5d3SJohn Marino if (sub >= 0)
97*86d7f5d3SJohn Marino {
98*86d7f5d3SJohn Marino /* addmul of absolute values */
99*86d7f5d3SJohn Marino
100*86d7f5d3SJohn Marino cy = mpn_addmul_1 (wp, xp, min_size, y);
101*86d7f5d3SJohn Marino wp += min_size;
102*86d7f5d3SJohn Marino xp += min_size;
103*86d7f5d3SJohn Marino
104*86d7f5d3SJohn Marino dsize = xsize - wsize;
105*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
106*86d7f5d3SJohn Marino if (dsize > 0)
107*86d7f5d3SJohn Marino cy = mpn_mul_1c (wp, xp, dsize, y, cy);
108*86d7f5d3SJohn Marino else if (dsize < 0)
109*86d7f5d3SJohn Marino {
110*86d7f5d3SJohn Marino dsize = -dsize;
111*86d7f5d3SJohn Marino cy = mpn_add_1 (wp, wp, dsize, cy);
112*86d7f5d3SJohn Marino }
113*86d7f5d3SJohn Marino #else
114*86d7f5d3SJohn Marino if (dsize != 0)
115*86d7f5d3SJohn Marino {
116*86d7f5d3SJohn Marino mp_limb_t cy2;
117*86d7f5d3SJohn Marino if (dsize > 0)
118*86d7f5d3SJohn Marino cy2 = mpn_mul_1 (wp, xp, dsize, y);
119*86d7f5d3SJohn Marino else
120*86d7f5d3SJohn Marino {
121*86d7f5d3SJohn Marino dsize = -dsize;
122*86d7f5d3SJohn Marino cy2 = 0;
123*86d7f5d3SJohn Marino }
124*86d7f5d3SJohn Marino cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
125*86d7f5d3SJohn Marino }
126*86d7f5d3SJohn Marino #endif
127*86d7f5d3SJohn Marino
128*86d7f5d3SJohn Marino wp[dsize] = cy;
129*86d7f5d3SJohn Marino new_wsize += (cy != 0);
130*86d7f5d3SJohn Marino }
131*86d7f5d3SJohn Marino else
132*86d7f5d3SJohn Marino {
133*86d7f5d3SJohn Marino /* submul of absolute values */
134*86d7f5d3SJohn Marino
135*86d7f5d3SJohn Marino cy = mpn_submul_1 (wp, xp, min_size, y);
136*86d7f5d3SJohn Marino if (wsize >= xsize)
137*86d7f5d3SJohn Marino {
138*86d7f5d3SJohn Marino /* if w bigger than x, then propagate borrow through it */
139*86d7f5d3SJohn Marino if (wsize != xsize)
140*86d7f5d3SJohn Marino cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
141*86d7f5d3SJohn Marino
142*86d7f5d3SJohn Marino if (cy != 0)
143*86d7f5d3SJohn Marino {
144*86d7f5d3SJohn Marino /* Borrow out of w, take twos complement negative to get
145*86d7f5d3SJohn Marino absolute value, flip sign of w. */
146*86d7f5d3SJohn Marino wp[new_wsize] = ~-cy; /* extra limb is 0-cy */
147*86d7f5d3SJohn Marino mpn_com (wp, wp, new_wsize);
148*86d7f5d3SJohn Marino new_wsize++;
149*86d7f5d3SJohn Marino MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
150*86d7f5d3SJohn Marino wsize_signed = -wsize_signed;
151*86d7f5d3SJohn Marino }
152*86d7f5d3SJohn Marino }
153*86d7f5d3SJohn Marino else /* wsize < xsize */
154*86d7f5d3SJohn Marino {
155*86d7f5d3SJohn Marino /* x bigger than w, so want x*y-w. Submul has given w-x*y, so
156*86d7f5d3SJohn Marino take twos complement and use an mpn_mul_1 for the rest. */
157*86d7f5d3SJohn Marino
158*86d7f5d3SJohn Marino mp_limb_t cy2;
159*86d7f5d3SJohn Marino
160*86d7f5d3SJohn Marino /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
161*86d7f5d3SJohn Marino mpn_com (wp, wp, wsize);
162*86d7f5d3SJohn Marino cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
163*86d7f5d3SJohn Marino cy -= 1;
164*86d7f5d3SJohn Marino
165*86d7f5d3SJohn Marino /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never
166*86d7f5d3SJohn Marino returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
167*86d7f5d3SJohn Marino cy2 = (cy == MP_LIMB_T_MAX);
168*86d7f5d3SJohn Marino cy += cy2;
169*86d7f5d3SJohn Marino MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
170*86d7f5d3SJohn Marino wp[new_wsize] = cy;
171*86d7f5d3SJohn Marino new_wsize += (cy != 0);
172*86d7f5d3SJohn Marino
173*86d7f5d3SJohn Marino /* Apply any -1 from above. The value at wp+wsize is non-zero
174*86d7f5d3SJohn Marino because y!=0 and the high limb of x will be non-zero. */
175*86d7f5d3SJohn Marino if (cy2)
176*86d7f5d3SJohn Marino MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
177*86d7f5d3SJohn Marino
178*86d7f5d3SJohn Marino wsize_signed = -wsize_signed;
179*86d7f5d3SJohn Marino }
180*86d7f5d3SJohn Marino
181*86d7f5d3SJohn Marino /* submul can produce high zero limbs due to cancellation, both when w
182*86d7f5d3SJohn Marino has more limbs or x has more */
183*86d7f5d3SJohn Marino MPN_NORMALIZE (wp, new_wsize);
184*86d7f5d3SJohn Marino }
185*86d7f5d3SJohn Marino
186*86d7f5d3SJohn Marino SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
187*86d7f5d3SJohn Marino
188*86d7f5d3SJohn Marino ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
189*86d7f5d3SJohn Marino }
190*86d7f5d3SJohn Marino
191*86d7f5d3SJohn Marino
192*86d7f5d3SJohn Marino void
mpz_addmul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)193*86d7f5d3SJohn Marino mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
194*86d7f5d3SJohn Marino {
195*86d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
196*86d7f5d3SJohn Marino if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))
197*86d7f5d3SJohn Marino {
198*86d7f5d3SJohn Marino mpz_t t;
199*86d7f5d3SJohn Marino mp_ptr tp;
200*86d7f5d3SJohn Marino mp_size_t xn;
201*86d7f5d3SJohn Marino TMP_DECL;
202*86d7f5d3SJohn Marino TMP_MARK;
203*86d7f5d3SJohn Marino xn = SIZ (x);
204*86d7f5d3SJohn Marino MPZ_TMP_INIT (t, ABS (xn) + 1);
205*86d7f5d3SJohn Marino tp = PTR (t);
206*86d7f5d3SJohn Marino tp[0] = 0;
207*86d7f5d3SJohn Marino MPN_COPY (tp + 1, PTR(x), ABS (xn));
208*86d7f5d3SJohn Marino SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
209*86d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
210*86d7f5d3SJohn Marino PTR(t) = tp + 1;
211*86d7f5d3SJohn Marino SIZ(t) = xn;
212*86d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
213*86d7f5d3SJohn Marino TMP_FREE;
214*86d7f5d3SJohn Marino return;
215*86d7f5d3SJohn Marino }
216*86d7f5d3SJohn Marino #endif
217*86d7f5d3SJohn Marino mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
218*86d7f5d3SJohn Marino }
219*86d7f5d3SJohn Marino
220*86d7f5d3SJohn Marino void
mpz_submul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)221*86d7f5d3SJohn Marino mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
222*86d7f5d3SJohn Marino {
223*86d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
224*86d7f5d3SJohn Marino if (y > GMP_NUMB_MAX && SIZ(x) != 0)
225*86d7f5d3SJohn Marino {
226*86d7f5d3SJohn Marino mpz_t t;
227*86d7f5d3SJohn Marino mp_ptr tp;
228*86d7f5d3SJohn Marino mp_size_t xn;
229*86d7f5d3SJohn Marino TMP_DECL;
230*86d7f5d3SJohn Marino TMP_MARK;
231*86d7f5d3SJohn Marino xn = SIZ (x);
232*86d7f5d3SJohn Marino MPZ_TMP_INIT (t, ABS (xn) + 1);
233*86d7f5d3SJohn Marino tp = PTR (t);
234*86d7f5d3SJohn Marino tp[0] = 0;
235*86d7f5d3SJohn Marino MPN_COPY (tp + 1, PTR(x), ABS (xn));
236*86d7f5d3SJohn Marino SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
237*86d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
238*86d7f5d3SJohn Marino PTR(t) = tp + 1;
239*86d7f5d3SJohn Marino SIZ(t) = xn;
240*86d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
241*86d7f5d3SJohn Marino TMP_FREE;
242*86d7f5d3SJohn Marino return;
243*86d7f5d3SJohn Marino }
244*86d7f5d3SJohn Marino #endif
245*86d7f5d3SJohn Marino mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
246*86d7f5d3SJohn Marino }
247