1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and 2 store the truncated integer part at rootp and the remainder at remp. 3 4 Contributed by Paul Zimmermann (algorithm) and 5 Paul Zimmermann and Torbjorn Granlund (implementation). 6 7 THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S 8 ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST 9 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 10 11 Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify 16 it under the terms of the GNU Lesser General Public License as published by 17 the Free Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 The GNU MP Library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 23 License for more details. 24 25 You should have received a copy of the GNU Lesser General Public License 26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 27 28 /* FIXME: 29 This implementation is not optimal when remp == NULL, since the complexity 30 is M(n), whereas it should be M(n/k) on average. 31 */ 32 33 #include <stdio.h> /* for NULL */ 34 35 #include "gmp.h" 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, 40 mp_limb_t, int); 41 42 #define MPN_RSHIFT(cy,rp,up,un,cnt) \ 43 do { \ 44 if ((cnt) != 0) \ 45 cy = mpn_rshift (rp, up, un, cnt); \ 46 else \ 47 { \ 48 MPN_COPY_INCR (rp, up, un); \ 49 cy = 0; \ 50 } \ 51 } while (0) 52 53 #define MPN_LSHIFT(cy,rp,up,un,cnt) \ 54 do { \ 55 if ((cnt) != 0) \ 56 cy = mpn_lshift (rp, up, un, cnt); \ 57 else \ 58 { \ 59 MPN_COPY_DECR (rp, up, un); \ 60 cy = 0; \ 61 } \ 62 } while (0) 63 64 65 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero. 66 If remp <> NULL, put in {remp, un} the remainder. 67 Return the size (in limbs) of the remainder if remp <> NULL, 68 or a non-zero value iff the remainder is non-zero when remp = NULL. 69 Assumes: 70 (a) up[un-1] is not zero 71 (b) rootp has at least space for ceil(un/k) limbs 72 (c) remp has at least space for un limbs (in case remp <> NULL) 73 (d) the operands do not overlap. 74 75 The auxiliary memory usage is 3*un+2 if remp = NULL, 76 and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment. 77 */ 78 mp_size_t 79 mpn_rootrem (mp_ptr rootp, mp_ptr remp, 80 mp_srcptr up, mp_size_t un, mp_limb_t k) 81 { 82 ASSERT (un > 0); 83 ASSERT (up[un - 1] != 0); 84 ASSERT (k > 1); 85 86 if ((remp == NULL) && (un / k > 2)) 87 /* call mpn_rootrem recursively, padding {up,un} with k zero limbs, 88 which will produce an approximate root with one more limb, 89 so that in most cases we can conclude. */ 90 { 91 mp_ptr sp, wp; 92 mp_size_t rn, sn, wn; 93 TMP_DECL; 94 TMP_MARK; 95 wn = un + k; 96 wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */ 97 sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */ 98 sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */ 99 MPN_COPY (wp + k, up, un); 100 MPN_ZERO (wp, k); 101 rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1); 102 /* the approximate root S = {sp,sn} is either the correct root of 103 {sp,sn}, or one too large. Thus unless the least significant limb 104 of S is 0 or 1, we can deduce the root of {up,un} is S truncated by 105 one limb. (In case sp[0]=1, we can deduce the root, but not decide 106 whether it is exact or not.) */ 107 MPN_COPY (rootp, sp + 1, sn - 1); 108 TMP_FREE; 109 return rn; 110 } 111 else /* remp <> NULL */ 112 { 113 return mpn_rootrem_internal (rootp, remp, up, un, k, 0); 114 } 115 } 116 117 /* if approx is non-zero, does not compute the final remainder */ 118 static mp_size_t 119 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un, 120 mp_limb_t k, int approx) 121 { 122 mp_ptr qp, rp, sp, wp, scratch; 123 mp_size_t qn, rn, sn, wn, nl, bn; 124 mp_limb_t save, save2, cy; 125 unsigned long int unb; /* number of significant bits of {up,un} */ 126 unsigned long int xnb; /* number of significant bits of the result */ 127 unsigned int cnt; 128 unsigned long b, kk; 129 unsigned long sizes[GMP_NUMB_BITS + 1]; 130 int ni, i; 131 int c; 132 int logk; 133 TMP_DECL; 134 135 TMP_MARK; 136 137 /* qp and wp need enough space to store S'^k where S' is an approximate 138 root. Since S' can be as large as S+2, the worst case is when S=2 and 139 S'=4. But then since we know the number of bits of S in advance, S' 140 can only be 3 at most. Similarly for S=4, then S' can be 6 at most. 141 So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k 142 fits in un limbs, the number of extra limbs needed is bounded by 143 ceil(k*log2(3/2)/GMP_NUMB_BITS). */ 144 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS) 145 qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder 146 of R/(k*S^(k-1)), and S^k */ 147 if (remp == NULL) 148 { 149 rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */ 150 scratch = rp; /* used by mpn_div_q */ 151 } 152 else 153 { 154 scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */ 155 rp = remp; 156 } 157 sp = rootp; 158 wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1), 159 and temporary for mpn_pow_1 */ 160 count_leading_zeros (cnt, up[un - 1]); 161 unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS; 162 /* unb is the number of bits of the input U */ 163 164 xnb = (unb - 1) / k + 1; /* ceil (unb / k) */ 165 /* xnb is the number of bits of the root R */ 166 167 if (xnb == 1) /* root is 1 */ 168 { 169 if (remp == NULL) 170 remp = rp; 171 mpn_sub_1 (remp, up, un, (mp_limb_t) 1); 172 MPN_NORMALIZE (remp, un); /* There should be at most one zero limb, 173 if we demand u to be normalized */ 174 rootp[0] = 1; 175 TMP_FREE; 176 return un; 177 } 178 179 /* We initialize the algorithm with a 1-bit approximation to zero: since we 180 know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that 181 r0^k = 2^(k*(xnb-1)), that we subtract to the input. */ 182 kk = k * (xnb - 1); /* number of truncated bits in the input */ 183 rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */ 184 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS); 185 mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since 186 the non-truncated part is less than 2^k, it 187 is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */ 188 sp[0] = 1; /* initial approximation */ 189 sn = 1; /* it has one limb */ 190 191 for (logk = 1; ((k - 1) >> logk) != 0; logk++) 192 ; 193 /* logk = ceil(log(k)/log(2)) */ 194 195 b = xnb - 1; /* number of remaining bits to determine in the kth root */ 196 ni = 0; 197 while (b != 0) 198 { 199 /* invariant: here we want b+1 total bits for the kth root */ 200 sizes[ni] = b; 201 /* if c is the new value of b, this means that we'll go from a root 202 of c+1 bits (say s') to a root of b+1 bits. 203 It is proved in the book "Modern Computer Arithmetic" from Brent 204 and Zimmermann, Chapter 1, that 205 if s' >= k*beta, then at most one correction is necessary. 206 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that 207 c >= ceil((b + log2(k))/2). */ 208 b = (b + logk + 1) / 2; 209 if (b >= sizes[ni]) 210 b = sizes[ni] - 1; /* add just one bit at a time */ 211 ni++; 212 } 213 sizes[ni] = 0; 214 ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1); 215 /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with 216 sizes[i] <= 2 * sizes[i+1]. 217 Newton iteration will first compute sizes[ni-1] extra bits, 218 then sizes[ni-2], ..., then sizes[0] = b. */ 219 220 wp[0] = 1; /* {sp,sn}^(k-1) = 1 */ 221 wn = 1; 222 for (i = ni; i != 0; i--) 223 { 224 /* 1: loop invariant: 225 {sp, sn} is the current approximation of the root, which has 226 exactly 1 + sizes[ni] bits. 227 {rp, rn} is the current remainder 228 {wp, wn} = {sp, sn}^(k-1) 229 kk = number of truncated bits of the input 230 */ 231 b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that 232 iteration */ 233 234 /* Reinsert a low zero limb if we normalized away the entire remainder */ 235 if (rn == 0) 236 { 237 rp[0] = 0; 238 rn = 1; 239 } 240 241 /* first multiply the remainder by 2^b */ 242 MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS); 243 rn = rn + b / GMP_NUMB_BITS; 244 if (cy != 0) 245 { 246 rp[rn] = cy; 247 rn++; 248 } 249 250 kk = kk - b; 251 252 /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 253 254 /* Now insert bits [kk,kk+b-1] from the input U */ 255 bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */ 256 save = rp[bn]; 257 /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */ 258 nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS); 259 /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS) 260 - floor(kk / GMP_NUMB_BITS) 261 <= 1 + (kk + b - 1) / GMP_NUMB_BITS 262 - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS 263 = 2 + (b - 2) / GMP_NUMB_BITS 264 thus since nl is an integer: 265 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */ 266 /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */ 267 if (nl - 1 > bn) 268 save2 = rp[bn + 1]; 269 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS); 270 /* set to zero high bits of rp[bn] */ 271 rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1; 272 /* restore corresponding bits */ 273 rp[bn] |= save; 274 if (nl - 1 > bn) 275 rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since 276 they start by bit 0 in rp[0], so they use 277 at most ceil(b/GMP_NUMB_BITS) limbs */ 278 279 /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 280 281 /* compute {wp, wn} = k * {sp, sn}^(k-1) */ 282 cy = mpn_mul_1 (wp, wp, wn, k); 283 wp[wn] = cy; 284 wn += cy != 0; 285 286 /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 287 288 /* now divide {rp, rn} by {wp, wn} to get the low part of the root */ 289 if (rn < wn) 290 { 291 qn = 0; 292 } 293 else 294 { 295 mp_ptr tp; 296 qn = rn - wn; /* expected quotient size */ 297 /* tp must have space for wn limbs. 298 The quotient needs rn-wn+1 limbs, thus quotient+remainder 299 need altogether rn+1 limbs. */ 300 tp = qp + qn + 1; /* put remainder in Q buffer */ 301 mpn_div_q (qp, rp, rn, wp, wn, scratch); 302 qn += qp[qn] != 0; 303 } 304 305 /* 5: current buffers: {sp,sn}, {qp,qn}. 306 Note: {rp,rn} is not needed any more since we'll compute it from 307 scratch at the end of the loop. 308 */ 309 310 /* Number of limbs used by b bits, when least significant bit is 311 aligned to least limb */ 312 bn = (b - 1) / GMP_NUMB_BITS + 1; 313 314 /* the quotient should be smaller than 2^b, since the previous 315 approximation was correctly rounded toward zero */ 316 if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) && 317 qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)))) 318 { 319 qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */ 320 MPN_ZERO (qp, qn); 321 qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS); 322 MPN_DECR_U (qp, qn, 1); 323 qn -= qp[qn - 1] == 0; 324 } 325 326 /* 6: current buffers: {sp,sn}, {qp,qn} */ 327 328 /* multiply the root approximation by 2^b */ 329 MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS); 330 sn = sn + b / GMP_NUMB_BITS; 331 if (cy != 0) 332 { 333 sp[sn] = cy; 334 sn++; 335 } 336 337 /* 7: current buffers: {sp,sn}, {qp,qn} */ 338 339 ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn 340 above, q is set to 2^b-1, which has 341 exactly bn limbs */ 342 343 /* Combine sB and q to form sB + q. */ 344 save = sp[b / GMP_NUMB_BITS]; 345 MPN_COPY (sp, qp, qn); 346 MPN_ZERO (sp + qn, bn - qn); 347 sp[b / GMP_NUMB_BITS] |= save; 348 349 /* 8: current buffer: {sp,sn} */ 350 351 /* Since each iteration treats b bits from the root and thus k*b bits 352 from the input, and we already considered b bits from the input, 353 we now have to take another (k-1)*b bits from the input. */ 354 kk -= (k - 1) * b; /* remaining input bits */ 355 /* {rp, rn} = floor({up, un} / 2^kk) */ 356 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS); 357 rn = un - kk / GMP_NUMB_BITS; 358 rn -= rp[rn - 1] == 0; 359 360 /* 9: current buffers: {sp,sn}, {rp,rn} */ 361 362 for (c = 0;; c++) 363 { 364 /* Compute S^k in {qp,qn}. */ 365 if (i == 1) 366 { 367 /* Last iteration: we don't need W anymore. */ 368 /* mpn_pow_1 requires that both qp and wp have enough space to 369 store the result {sp,sn}^k + 1 limb */ 370 approx = approx && (sp[0] > 1); 371 qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0; 372 } 373 else 374 { 375 /* W <- S^(k-1) for the next iteration, 376 and S^k = W * S. */ 377 wn = mpn_pow_1 (wp, sp, sn, k - 1, qp); 378 mpn_mul (qp, wp, wn, sp, sn); 379 qn = wn + sn; 380 qn -= qp[qn - 1] == 0; 381 } 382 383 /* if S^k > floor(U/2^kk), the root approximation was too large */ 384 if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0)) 385 MPN_DECR_U (sp, sn, 1); 386 else 387 break; 388 } 389 390 /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */ 391 392 ASSERT_ALWAYS (c <= 1); 393 ASSERT_ALWAYS (rn >= qn); 394 395 /* R = R - Q = floor(U/2^kk) - S^k */ 396 if ((i > 1) || (approx == 0)) 397 { 398 mpn_sub (rp, rp, rn, qp, qn); 399 MPN_NORMALIZE (rp, rn); 400 } 401 /* otherwise we have rn > 0, thus the return value is ok */ 402 403 /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 404 } 405 406 TMP_FREE; 407 return rn; 408 } 409