1 /* mpn_mul -- Multiply two natural numbers. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005, 6 2006, 2007 Free Software Foundation, Inc. 7 8 This file is part of the GNU MP Library. 9 10 The GNU MP Library is free software; you can redistribute it and/or modify 11 it under the terms of the GNU Lesser General Public License as published by 12 the Free Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 The GNU MP Library is distributed in the hope that it will be useful, but 16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18 License for more details. 19 20 You should have received a copy of the GNU Lesser General Public License 21 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 22 23 #include "gmp.h" 24 #include "gmp-impl.h" 25 26 27 #ifndef MUL_BASECASE_MAX_UN 28 #define MUL_BASECASE_MAX_UN 500 29 #endif 30 31 /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v 32 (pointed to by VP, with VN limbs), and store the result at PRODP. The 33 result is UN + VN limbs. Return the most significant limb of the result. 34 35 NOTE: The space pointed to by PRODP is overwritten before finished with U 36 and V, so overlap is an error. 37 38 Argument constraints: 39 1. UN >= VN. 40 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from 41 the multiplier and the multiplicand. */ 42 43 mp_limb_t 44 mpn_mul (mp_ptr prodp, 45 mp_srcptr up, mp_size_t un, 46 mp_srcptr vp, mp_size_t vn) 47 { 48 ASSERT (un >= vn); 49 ASSERT (vn >= 1); 50 ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un)); 51 ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn)); 52 53 if (un == vn) 54 { 55 if (up == vp) 56 mpn_sqr_n (prodp, up, un); 57 else 58 mpn_mul_n (prodp, up, vp, un); 59 return prodp[2 * un - 1]; 60 } 61 62 if (vn < MUL_KARATSUBA_THRESHOLD) 63 { /* plain schoolbook multiplication */ 64 65 /* Unless un is very large, or else if have an applicable mpn_mul_N, 66 perform basecase multiply directly. */ 67 if (un <= MUL_BASECASE_MAX_UN 68 #if HAVE_NATIVE_mpn_mul_2 69 || vn <= 2 70 #else 71 || vn == 1 72 #endif 73 ) 74 mpn_mul_basecase (prodp, up, un, vp, vn); 75 else 76 { 77 /* We have un >> MUL_BASECASE_MAX_UN > vn. For better memory 78 locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply 79 these pieces with the vp[] operand. After each such partial 80 multiplication (but the last) we copy the most significant vn 81 limbs into a temporary buffer since that part would otherwise be 82 overwritten by the next multiplication. After the next 83 multiplication, we add it back. This illustrates the situation: 84 85 -->vn<-- 86 | |<------- un ------->| 87 _____________________| 88 X /| 89 /XX__________________/ | 90 _____________________ | 91 X / | 92 /XX__________________/ | 93 _____________________ | 94 / / | 95 /____________________/ | 96 ================================================================== 97 98 The parts marked with X are the parts whose sums are copied into 99 the temporary buffer. */ 100 101 mp_limb_t tp[MUL_KARATSUBA_THRESHOLD_LIMIT]; 102 mp_limb_t cy; 103 ASSERT (MUL_KARATSUBA_THRESHOLD <= MUL_KARATSUBA_THRESHOLD_LIMIT); 104 105 mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn); 106 prodp += MUL_BASECASE_MAX_UN; 107 MPN_COPY (tp, prodp, vn); /* preserve high triangle */ 108 up += MUL_BASECASE_MAX_UN; 109 un -= MUL_BASECASE_MAX_UN; 110 while (un > MUL_BASECASE_MAX_UN) 111 { 112 mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn); 113 cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */ 114 mpn_incr_u (prodp + vn, cy); /* safe? */ 115 prodp += MUL_BASECASE_MAX_UN; 116 MPN_COPY (tp, prodp, vn); /* preserve high triangle */ 117 up += MUL_BASECASE_MAX_UN; 118 un -= MUL_BASECASE_MAX_UN; 119 } 120 if (un > vn) 121 { 122 mpn_mul_basecase (prodp, up, un, vp, vn); 123 } 124 else 125 { 126 ASSERT_ALWAYS (un > 0); 127 mpn_mul_basecase (prodp, vp, vn, up, un); 128 } 129 cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */ 130 mpn_incr_u (prodp + vn, cy); /* safe? */ 131 } 132 return prodp[un + vn - 1]; 133 } 134 135 if (ABOVE_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) && 136 ABOVE_THRESHOLD (vn, MUL_FFT_THRESHOLD / 3)) /* FIXME */ 137 { 138 mpn_mul_fft_full (prodp, up, un, vp, vn); 139 return prodp[un + vn - 1]; 140 } 141 142 { 143 mp_ptr ws; 144 mp_ptr scratch; 145 #if WANT_ASSERT 146 mp_ptr ssssp; 147 #endif 148 TMP_DECL; 149 TMP_MARK; 150 151 #define WSALL (4 * vn) 152 ws = TMP_SALLOC_LIMBS (WSALL + 1); 153 154 #define ITCH ((un + vn) * 4 + 100) 155 scratch = TMP_ALLOC_LIMBS (ITCH + 1); 156 #if WANT_ASSERT 157 ssssp = scratch + ITCH; 158 ws[WSALL] = 0xbabecafe; 159 ssssp[0] = 0xbeef; 160 #endif 161 162 if (un >= 3 * vn) 163 { 164 mp_limb_t cy; 165 166 mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch); 167 un -= 2 * vn; 168 up += 2 * vn; 169 prodp += 2 * vn; 170 171 while (un >= 3 * vn) 172 { 173 mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch); 174 un -= 2 * vn; 175 up += 2 * vn; 176 cy = mpn_add_n (prodp, prodp, ws, vn); 177 MPN_COPY (prodp + vn, ws + vn, 2 * vn); 178 mpn_incr_u (prodp + vn, cy); 179 prodp += 2 * vn; 180 } 181 182 if (5 * un > 9 * vn) 183 { 184 mpn_toom42_mul (ws, up, un, vp, vn, scratch); 185 cy = mpn_add_n (prodp, prodp, ws, vn); 186 MPN_COPY (prodp + vn, ws + vn, un); 187 mpn_incr_u (prodp + vn, cy); 188 } 189 else if (9 * un > 10 * vn) 190 { 191 mpn_toom32_mul (ws, up, un, vp, vn, scratch); 192 cy = mpn_add_n (prodp, prodp, ws, vn); 193 MPN_COPY (prodp + vn, ws + vn, un); 194 mpn_incr_u (prodp + vn, cy); 195 } 196 else 197 { 198 mpn_toom22_mul (ws, up, un, vp, vn, scratch); 199 cy = mpn_add_n (prodp, prodp, ws, vn); 200 MPN_COPY (prodp + vn, ws + vn, un); 201 mpn_incr_u (prodp + vn, cy); 202 } 203 204 ASSERT (ws[WSALL] == 0xbabecafe); 205 ASSERT (ssssp[0] == 0xbeef); 206 TMP_FREE; 207 return prodp[un + vn - 1]; 208 } 209 210 if (un * 5 > vn * 9) 211 mpn_toom42_mul (prodp, up, un, vp, vn, scratch); 212 else if (9 * un > 10 * vn) 213 mpn_toom32_mul (prodp, up, un, vp, vn, scratch); 214 else 215 mpn_toom22_mul (prodp, up, un, vp, vn, scratch); 216 217 ASSERT (ws[WSALL] == 0xbabecafe); 218 ASSERT (ssssp[0] == 0xbeef); 219 TMP_FREE; 220 return prodp[un + vn - 1]; 221 } 222 } 223