1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and 2 store the truncated integer part at rootp and the remainder at remp. 3 4 Contributed by Paul Zimmermann (algorithm) and 5 Paul Zimmermann and Torbjorn Granlund (implementation). 6 7 THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S 8 ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST 9 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 10 11 Copyright 2002, 2005, 2009 Free Software Foundation, Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify 16 it under the terms of the GNU Lesser General Public License as published by 17 the Free Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 The GNU MP Library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 23 License for more details. 24 25 You should have received a copy of the GNU Lesser General Public License 26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 27 28 /* FIXME: 29 (a) Once there is a native mpn_tdiv_q function in GMP (division without 30 remainder), replace the quick-and-dirty implementation below by it. 31 (b) The implementation below is not optimal when remp == NULL, since the 32 complexity is M(n) where n is the input size, whereas it should be 33 only M(n/k) on average. 34 */ 35 36 #include <stdio.h> /* for NULL */ 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 #include "longlong.h" 41 42 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, 43 mp_limb_t, int); 44 static void mpn_tdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, 45 mp_srcptr, mp_size_t); 46 47 #define MPN_RSHIFT(cy,rp,up,un,cnt) \ 48 do { \ 49 if ((cnt) != 0) \ 50 cy = mpn_rshift (rp, up, un, cnt); \ 51 else \ 52 { \ 53 MPN_COPY_INCR (rp, up, un); \ 54 cy = 0; \ 55 } \ 56 } while (0) 57 58 #define MPN_LSHIFT(cy,rp,up,un,cnt) \ 59 do { \ 60 if ((cnt) != 0) \ 61 cy = mpn_lshift (rp, up, un, cnt); \ 62 else \ 63 { \ 64 MPN_COPY_DECR (rp, up, un); \ 65 cy = 0; \ 66 } \ 67 } while (0) 68 69 70 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero. 71 If remp <> NULL, put in {remp, un} the remainder. 72 Return the size (in limbs) of the remainder if remp <> NULL, 73 or a non-zero value iff the remainder is non-zero when remp = NULL. 74 Assumes: 75 (a) up[un-1] is not zero 76 (b) rootp has at least space for ceil(un/k) limbs 77 (c) remp has at least space for un limbs (in case remp <> NULL) 78 (d) the operands do not overlap. 79 80 The auxiliary memory usage is 3*un+2 if remp = NULL, 81 and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment. 82 */ 83 mp_size_t 84 mpn_rootrem (mp_ptr rootp, mp_ptr remp, 85 mp_srcptr up, mp_size_t un, mp_limb_t k) 86 { 87 ASSERT (un > 0); 88 ASSERT (up[un - 1] != 0); 89 ASSERT (k > 1); 90 91 if ((remp == NULL) && (un / k > 2)) 92 /* call mpn_rootrem recursively, padding {up,un} with k zero limbs, 93 which will produce an approximate root with one more limb, 94 so that in most cases we can conclude. */ 95 { 96 mp_ptr sp, wp; 97 mp_size_t rn, sn, wn; 98 TMP_DECL; 99 TMP_MARK; 100 wn = un + k; 101 wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */ 102 sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */ 103 sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */ 104 MPN_COPY (wp + k, up, un); 105 MPN_ZERO (wp, k); 106 rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1); 107 /* the approximate root S = {sp,sn} is either the correct root of 108 {sp,sn}, or one too large. Thus unless the least significant limb 109 of S is 0 or 1, we can deduce the root of {up,un} is S truncated by 110 one limb. (In case sp[0]=1, we can deduce the root, but not decide 111 whether it is exact or not.) */ 112 MPN_COPY (rootp, sp + 1, sn - 1); 113 TMP_FREE; 114 return rn; 115 } 116 else /* remp <> NULL */ 117 { 118 return mpn_rootrem_internal (rootp, remp, up, un, k, 0); 119 } 120 } 121 122 /* if approx is non-zero, does not compute the final remainder */ 123 static mp_size_t 124 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un, 125 mp_limb_t k, int approx) 126 { 127 mp_ptr qp, rp, sp, wp; 128 mp_size_t qn, rn, sn, wn, nl, bn; 129 mp_limb_t save, save2, cy; 130 unsigned long int unb; /* number of significant bits of {up,un} */ 131 unsigned long int xnb; /* number of significant bits of the result */ 132 unsigned int cnt; 133 unsigned long b, kk; 134 unsigned long sizes[GMP_NUMB_BITS + 1]; 135 int ni, i; 136 int c; 137 int logk; 138 TMP_DECL; 139 140 TMP_MARK; 141 142 /* qp and wp need enough space to store S'^k where S' is an approximate 143 root. Since S' can be as large as S+2, the worst case is when S=2 and 144 S'=4. But then since we know the number of bits of S in advance, S' 145 can only be 3 at most. Similarly for S=4, then S' can be 6 at most. 146 So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k 147 fits in un limbs, the number of extra limbs needed is bounded by 148 ceil(k*log2(3/2)/GMP_NUMB_BITS). */ 149 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS) 150 qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder 151 of R/(k*S^(k-1)), and S^k */ 152 if (remp == NULL) 153 rp = TMP_ALLOC_LIMBS (un); /* will contain the remainder */ 154 else 155 rp = remp; 156 sp = rootp; 157 wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1), 158 and temporary for mpn_pow_1 */ 159 count_leading_zeros (cnt, up[un - 1]); 160 unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS; 161 /* unb is the number of bits of the input U */ 162 163 xnb = (unb - 1) / k + 1; /* ceil (unb / k) */ 164 /* xnb is the number of bits of the root R */ 165 166 if (xnb == 1) /* root is 1 */ 167 { 168 if (remp == NULL) 169 remp = rp; 170 mpn_sub_1 (remp, up, un, (mp_limb_t) 1); 171 MPN_NORMALIZE (remp, un); /* There should be at most one zero limb, 172 if we demand u to be normalized */ 173 rootp[0] = 1; 174 TMP_FREE; 175 return un; 176 } 177 178 /* We initialize the algorithm with a 1-bit approximation to zero: since we 179 know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that 180 r0^k = 2^(k*(xnb-1)), that we subtract to the input. */ 181 kk = k * (xnb - 1); /* number of truncated bits in the input */ 182 rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */ 183 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS); 184 mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since 185 the non-truncated part is less than 2^k, it 186 is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */ 187 sp[0] = 1; /* initial approximation */ 188 sn = 1; /* it has one limb */ 189 190 for (logk = 1; ((k - 1) >> logk) != 0; logk++) 191 ; 192 /* logk = ceil(log(k)/log(2)) */ 193 194 b = xnb - 1; /* number of remaining bits to determine in the kth root */ 195 ni = 0; 196 while (b != 0) 197 { 198 /* invariant: here we want b+1 total bits for the kth root */ 199 sizes[ni] = b; 200 /* if c is the new value of b, this means that we'll go from a root 201 of c+1 bits (say s') to a root of b+1 bits. 202 It is proved in the book "Modern Computer Arithmetic" from Brent 203 and Zimmermann, Chapter 1, that 204 if s' >= k*beta, then at most one correction is necessary. 205 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that 206 c >= ceil((b + log2(k))/2). */ 207 b = (b + logk + 1) / 2; 208 if (b >= sizes[ni]) 209 b = sizes[ni] - 1; /* add just one bit at a time */ 210 ni++; 211 } 212 sizes[ni] = 0; 213 ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1); 214 /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with 215 sizes[i] <= 2 * sizes[i+1]. 216 Newton iteration will first compute sizes[ni-1] extra bits, 217 then sizes[ni-2], ..., then sizes[0] = b. */ 218 219 wp[0] = 1; /* {sp,sn}^(k-1) = 1 */ 220 wn = 1; 221 for (i = ni; i != 0; i--) 222 { 223 /* 1: loop invariant: 224 {sp, sn} is the current approximation of the root, which has 225 exactly 1 + sizes[ni] bits. 226 {rp, rn} is the current remainder 227 {wp, wn} = {sp, sn}^(k-1) 228 kk = number of truncated bits of the input 229 */ 230 b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that 231 iteration */ 232 233 /* Reinsert a low zero limb if we normalized away the entire remainder */ 234 if (rn == 0) 235 { 236 rp[0] = 0; 237 rn = 1; 238 } 239 240 /* first multiply the remainder by 2^b */ 241 MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS); 242 rn = rn + b / GMP_NUMB_BITS; 243 if (cy != 0) 244 { 245 rp[rn] = cy; 246 rn++; 247 } 248 249 kk = kk - b; 250 251 /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 252 253 /* Now insert bits [kk,kk+b-1] from the input U */ 254 bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */ 255 save = rp[bn]; 256 /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */ 257 nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS); 258 /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS) 259 - floor(kk / GMP_NUMB_BITS) 260 <= 1 + (kk + b - 1) / GMP_NUMB_BITS 261 - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS 262 = 2 + (b - 2) / GMP_NUMB_BITS 263 thus since nl is an integer: 264 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */ 265 /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */ 266 if (nl - 1 > bn) 267 save2 = rp[bn + 1]; 268 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS); 269 /* set to zero high bits of rp[bn] */ 270 rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1; 271 /* restore corresponding bits */ 272 rp[bn] |= save; 273 if (nl - 1 > bn) 274 rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since 275 they start by bit 0 in rp[0], so they use 276 at most ceil(b/GMP_NUMB_BITS) limbs */ 277 278 /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 279 280 /* compute {wp, wn} = k * {sp, sn}^(k-1) */ 281 cy = mpn_mul_1 (wp, wp, wn, k); 282 wp[wn] = cy; 283 wn += cy != 0; 284 285 /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 286 287 /* now divide {rp, rn} by {wp, wn} to get the low part of the root */ 288 if (rn < wn) 289 { 290 qn = 0; 291 } 292 else 293 { 294 mp_ptr tp; 295 qn = rn - wn; /* expected quotient size */ 296 /* tp must have space for wn limbs. 297 The quotient needs rn-wn+1 limbs, thus quotient+remainder 298 need altogether rn+1 limbs. */ 299 tp = qp + qn + 1; /* put remainder in Q buffer */ 300 mpn_tdiv_q (qp, tp, 0, rp, rn, wp, wn); 301 qn += qp[qn] != 0; 302 } 303 304 /* 5: current buffers: {sp,sn}, {qp,qn}. 305 Note: {rp,rn} is not needed any more since we'll compute it from 306 scratch at the end of the loop. 307 */ 308 309 /* Number of limbs used by b bits, when least significant bit is 310 aligned to least limb */ 311 bn = (b - 1) / GMP_NUMB_BITS + 1; 312 313 /* the quotient should be smaller than 2^b, since the previous 314 approximation was correctly rounded toward zero */ 315 if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) && 316 qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)))) 317 { 318 qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */ 319 MPN_ZERO (qp, qn); 320 qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS); 321 MPN_DECR_U (qp, qn, 1); 322 qn -= qp[qn - 1] == 0; 323 } 324 325 /* 6: current buffers: {sp,sn}, {qp,qn} */ 326 327 /* multiply the root approximation by 2^b */ 328 MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS); 329 sn = sn + b / GMP_NUMB_BITS; 330 if (cy != 0) 331 { 332 sp[sn] = cy; 333 sn++; 334 } 335 336 /* 7: current buffers: {sp,sn}, {qp,qn} */ 337 338 ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn 339 above, q is set to 2^b-1, which has 340 exactly bn limbs */ 341 342 /* Combine sB and q to form sB + q. */ 343 save = sp[b / GMP_NUMB_BITS]; 344 MPN_COPY (sp, qp, qn); 345 MPN_ZERO (sp + qn, bn - qn); 346 sp[b / GMP_NUMB_BITS] |= save; 347 348 /* 8: current buffer: {sp,sn} */ 349 350 /* Since each iteration treats b bits from the root and thus k*b bits 351 from the input, and we already considered b bits from the input, 352 we now have to take another (k-1)*b bits from the input. */ 353 kk -= (k - 1) * b; /* remaining input bits */ 354 /* {rp, rn} = floor({up, un} / 2^kk) */ 355 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS); 356 rn = un - kk / GMP_NUMB_BITS; 357 rn -= rp[rn - 1] == 0; 358 359 /* 9: current buffers: {sp,sn}, {rp,rn} */ 360 361 for (c = 0;; c++) 362 { 363 /* Compute S^k in {qp,qn}. */ 364 if (i == 1) 365 { 366 /* Last iteration: we don't need W anymore. */ 367 /* mpn_pow_1 requires that both qp and wp have enough space to 368 store the result {sp,sn}^k + 1 limb */ 369 approx = approx && (sp[0] > 1); 370 qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0; 371 } 372 else 373 { 374 /* W <- S^(k-1) for the next iteration, 375 and S^k = W * S. */ 376 wn = mpn_pow_1 (wp, sp, sn, k - 1, qp); 377 mpn_mul (qp, wp, wn, sp, sn); 378 qn = wn + sn; 379 qn -= qp[qn - 1] == 0; 380 } 381 382 /* if S^k > floor(U/2^kk), the root approximation was too large */ 383 if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0)) 384 MPN_DECR_U (sp, sn, 1); 385 else 386 break; 387 } 388 389 /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */ 390 391 ASSERT_ALWAYS (c <= 1); 392 ASSERT_ALWAYS (rn >= qn); 393 394 /* R = R - Q = floor(U/2^kk) - S^k */ 395 if ((i > 1) || (approx == 0)) 396 { 397 mpn_sub (rp, rp, rn, qp, qn); 398 MPN_NORMALIZE (rp, rn); 399 } 400 /* otherwise we have rn > 0, thus the return value is ok */ 401 402 /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */ 403 } 404 405 TMP_FREE; 406 return rn; 407 } 408 409 /* return the quotient Q = {np, nn} divided by {dp, dn} only */ 410 static void 411 mpn_tdiv_q (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_srcptr np, mp_size_t nn, 412 mp_srcptr dp, mp_size_t dn) 413 { 414 mp_size_t qn = nn - dn; /* expected quotient size is qn+1 */ 415 mp_size_t cut; 416 417 ASSERT_ALWAYS (qxn == 0); 418 if (dn <= qn + 3) 419 { 420 mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn); 421 } 422 else 423 { 424 mp_ptr tp; 425 TMP_DECL; 426 TMP_MARK; 427 tp = TMP_ALLOC_LIMBS (qn + 2); 428 cut = dn - (qn + 3); 429 /* perform a first division with divisor cut to dn-cut=qn+3 limbs 430 and dividend to nn-(cut-1) limbs, i.e. the quotient will be one 431 limb more than the final quotient. 432 The quotient will have qn+2 < dn-cut limbs, 433 and the remainder dn-cut = qn+3 limbs. */ 434 mpn_tdiv_qr (tp, rp, 0, np + cut - 1, nn - cut + 1, dp + cut, dn - cut); 435 /* let Q' be the quotient of B * {np, nn} by {dp, dn} [qn+2 limbs] 436 and T be the approximation of Q' computed above, where 437 B = 2^GMP_NUMB_BITS. 438 We have Q' <= T <= Q'+1, and since floor(Q'/B) = Q, we have 439 Q = floor(T/B), unless the last limb of T only consists of zeroes. */ 440 if (tp[0] != 0) 441 { 442 /* simply truncate one limb of T */ 443 MPN_COPY (qp, tp + 1, qn + 1); 444 } 445 else /* too bad: perform the expensive division */ 446 { 447 mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn); 448 } 449 TMP_FREE; 450 } 451 } 452