xref: /dragonfly/contrib/gmp/mpn/generic/mul.c (revision 9ddb8543)
1 /* mpn_mul -- Multiply two natural numbers.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5 Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
6 2006, 2007 Free Software Foundation, Inc.
7 
8 This file is part of the GNU MP Library.
9 
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14 
15 The GNU MP Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18 License for more details.
19 
20 You should have received a copy of the GNU Lesser General Public License
21 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
22 
23 #include "gmp.h"
24 #include "gmp-impl.h"
25 
26 
27 #ifndef MUL_BASECASE_MAX_UN
28 #define MUL_BASECASE_MAX_UN 500
29 #endif
30 
31 /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
32    (pointed to by VP, with VN limbs), and store the result at PRODP.  The
33    result is UN + VN limbs.  Return the most significant limb of the result.
34 
35    NOTE: The space pointed to by PRODP is overwritten before finished with U
36    and V, so overlap is an error.
37 
38    Argument constraints:
39    1. UN >= VN.
40    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
41       the multiplier and the multiplicand.  */
42 
43 mp_limb_t
44 mpn_mul (mp_ptr prodp,
45 	 mp_srcptr up, mp_size_t un,
46 	 mp_srcptr vp, mp_size_t vn)
47 {
48   ASSERT (un >= vn);
49   ASSERT (vn >= 1);
50   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
51   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
52 
53   if (un == vn)
54     {
55       if (up == vp)
56 	mpn_sqr_n (prodp, up, un);
57       else
58 	mpn_mul_n (prodp, up, vp, un);
59       return prodp[2 * un - 1];
60     }
61 
62   if (vn < MUL_KARATSUBA_THRESHOLD)
63     { /* plain schoolbook multiplication */
64 
65       /* Unless un is very large, or else if have an applicable mpn_mul_N,
66 	 perform basecase multiply directly.  */
67       if (un <= MUL_BASECASE_MAX_UN
68 #if HAVE_NATIVE_mpn_mul_2
69 	  || vn <= 2
70 #else
71 	  || vn == 1
72 #endif
73 	  )
74 	mpn_mul_basecase (prodp, up, un, vp, vn);
75       else
76 	{
77 	  /* We have un >> MUL_BASECASE_MAX_UN > vn.  For better memory
78 	     locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
79 	     these pieces with the vp[] operand.  After each such partial
80 	     multiplication (but the last) we copy the most significant vn
81 	     limbs into a temporary buffer since that part would otherwise be
82 	     overwritten by the next multiplication.  After the next
83 	     multiplication, we add it back.  This illustrates the situation:
84 
85                                                     -->vn<--
86                                                       |  |<------- un ------->|
87                                                          _____________________|
88                                                         X                    /|
89                                                       /XX__________________/  |
90                                     _____________________                     |
91                                    X                    /                     |
92                                  /XX__________________/                       |
93                _____________________                                          |
94               /                    /                                          |
95             /____________________/                                            |
96 	    ==================================================================
97 
98 	    The parts marked with X are the parts whose sums are copied into
99 	    the temporary buffer.  */
100 
101 	  mp_limb_t tp[MUL_KARATSUBA_THRESHOLD_LIMIT];
102 	  mp_limb_t cy;
103           ASSERT (MUL_KARATSUBA_THRESHOLD <= MUL_KARATSUBA_THRESHOLD_LIMIT);
104 
105 	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
106 	  prodp += MUL_BASECASE_MAX_UN;
107 	  MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
108 	  up += MUL_BASECASE_MAX_UN;
109 	  un -= MUL_BASECASE_MAX_UN;
110 	  while (un > MUL_BASECASE_MAX_UN)
111 	    {
112 	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
113 	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
114 	      mpn_incr_u (prodp + vn, cy);		/* safe? */
115 	      prodp += MUL_BASECASE_MAX_UN;
116 	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
117 	      up += MUL_BASECASE_MAX_UN;
118 	      un -= MUL_BASECASE_MAX_UN;
119 	    }
120 	  if (un > vn)
121 	    {
122 	      mpn_mul_basecase (prodp, up, un, vp, vn);
123 	    }
124 	  else
125 	    {
126 	      ASSERT_ALWAYS (un > 0);
127 	      mpn_mul_basecase (prodp, vp, vn, up, un);
128 	    }
129 	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
130 	  mpn_incr_u (prodp + vn, cy);		/* safe? */
131 	}
132       return prodp[un + vn - 1];
133     }
134 
135   if (ABOVE_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) &&
136       ABOVE_THRESHOLD (vn, MUL_FFT_THRESHOLD / 3)) /* FIXME */
137     {
138       mpn_mul_fft_full (prodp, up, un, vp, vn);
139       return prodp[un + vn - 1];
140     }
141 
142   {
143     mp_ptr ws;
144     mp_ptr scratch;
145 #if WANT_ASSERT
146     mp_ptr ssssp;
147 #endif
148     TMP_DECL;
149     TMP_MARK;
150 
151 #define WSALL (4 * vn)
152     ws = TMP_SALLOC_LIMBS (WSALL + 1);
153 
154 #define ITCH ((un + vn) * 4 + 100)
155     scratch = TMP_ALLOC_LIMBS (ITCH + 1);
156 #if WANT_ASSERT
157     ssssp = scratch + ITCH;
158     ws[WSALL] = 0xbabecafe;
159     ssssp[0] = 0xbeef;
160 #endif
161 
162     if (un >= 3 * vn)
163       {
164 	mp_limb_t cy;
165 
166 	mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
167 	un -= 2 * vn;
168 	up += 2 * vn;
169 	prodp += 2 * vn;
170 
171 	while (un >= 3 * vn)
172 	  {
173 	    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
174 	    un -= 2 * vn;
175 	    up += 2 * vn;
176 	    cy = mpn_add_n (prodp, prodp, ws, vn);
177 	    MPN_COPY (prodp + vn, ws + vn, 2 * vn);
178 	    mpn_incr_u (prodp + vn, cy);
179 	    prodp += 2 * vn;
180 	  }
181 
182 	if (5 * un > 9 * vn)
183 	  {
184 	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
185 	    cy = mpn_add_n (prodp, prodp, ws, vn);
186 	    MPN_COPY (prodp + vn, ws + vn, un);
187 	    mpn_incr_u (prodp + vn, cy);
188 	  }
189 	else if (9 * un > 10 * vn)
190 	  {
191 	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
192 	    cy = mpn_add_n (prodp, prodp, ws, vn);
193 	    MPN_COPY (prodp + vn, ws + vn, un);
194 	    mpn_incr_u (prodp + vn, cy);
195 	  }
196 	else
197 	  {
198 	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
199 	    cy = mpn_add_n (prodp, prodp, ws, vn);
200 	    MPN_COPY (prodp + vn, ws + vn, un);
201 	    mpn_incr_u (prodp + vn, cy);
202 	  }
203 
204 	ASSERT (ws[WSALL] == 0xbabecafe);
205 	ASSERT (ssssp[0] == 0xbeef);
206 	TMP_FREE;
207 	return prodp[un + vn - 1];
208       }
209 
210     if (un * 5 > vn * 9)
211       mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
212     else if (9 * un > 10 * vn)
213       mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
214     else
215       mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
216 
217     ASSERT (ws[WSALL] == 0xbabecafe);
218     ASSERT (ssssp[0] == 0xbeef);
219     TMP_FREE;
220     return prodp[un + vn - 1];
221   }
222 }
223