xref: /dragonfly/contrib/gmp/mpn/generic/get_str.c (revision 0db87cb7)
1 /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE
6    INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
7    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
8    GNU MP RELEASE.
9 
10 Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
11 2008 Free Software Foundation, Inc.
12 
13 This file is part of the GNU MP Library.
14 
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of the GNU Lesser General Public License as published by
17 the Free Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19 
20 The GNU MP Library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
23 License for more details.
24 
25 You should have received a copy of the GNU Lesser General Public License
26 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
27 
28 #include "gmp.h"
29 #include "gmp-impl.h"
30 #include "longlong.h"
31 
32 /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
33    base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
34 
35   A) Divide U repeatedly by B, generating a quotient and remainder, until the
36      quotient becomes zero.  The remainders hold the converted digits.  Digits
37      come out from right to left.  (Used in mpn_sb_get_str.)
38 
39   B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
40      Then develop digits by multiplying the fraction repeatedly by b.  Digits
41      come out from left to right.  (Currently not used herein, except for in
42      code for converting single limbs to individual digits.)
43 
44   C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
45      sqrt(U).  Then divide U by B^s, generating quotient and remainder.
46      Recursively convert the quotient, then the remainder, using the
47      precomputed powers.  Digits come out from left to right.  (Used in
48      mpn_dc_get_str.)
49 
50   When using algorithm C, algorithm B might be suitable for basecase code,
51   since the required b^g power will be readily accessible.
52 
53   Optimization ideas:
54   1. The recursive function of (C) could use less temporary memory.  The powtab
55      allocation could be trimmed with some computation, and the tmp area could
56      be reduced, or perhaps eliminated if up is reused for both quotient and
57      remainder (it is currently used just for remainder).
58   2. Store the powers of (C) in normalized form, with the normalization count.
59      Quotients will usually need to be left-shifted before each divide, and
60      remainders will either need to be left-shifted of right-shifted.
61   3. In the code for developing digits from a single limb, we could avoid using
62      a full umul_ppmm except for the first (or first few) digits, provided base
63      is even.  Subsequent digits can be developed using plain multiplication.
64      (This saves on register-starved machines (read x86) and on all machines
65      that generate the upper product half using a separate instruction (alpha,
66      powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
67   4. Separate mpn_dc_get_str basecase code from code for small conversions. The
68      former code will have the exact right power readily available in the
69      powtab parameter for dividing the current number into a fraction.  Convert
70      that using algorithm B.
71   5. Completely avoid division.  Compute the inverses of the powers now in
72      powtab instead of the actual powers.
73   6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
74      about 30% (1-log(5)/log(10)).
75 
76   Basic structure of (C):
77     mpn_get_str:
78       if POW2_P (n)
79 	...
80       else
81 	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
82 	  mpn_sb_get_str (str, base, up, un);
83 	else
84 	  precompute_power_tables
85 	  mpn_dc_get_str
86 
87     mpn_dc_get_str:
88 	mpn_tdiv_qr
89 	if (qn < GET_STR_DC_THRESHOLD)
90 	  mpn_sb_get_str
91 	else
92 	  mpn_dc_get_str
93 	if (rn < GET_STR_DC_THRESHOLD)
94 	  mpn_sb_get_str
95 	else
96 	  mpn_dc_get_str
97 
98 
99   The reason for the two threshold values is the cost of
100   precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
101   larger than GET_STR_PRECOMPUTE_THRESHOLD.  */
102 
103 
104 /* The x86s and m68020 have a quotient and remainder "div" instruction and
105    gcc recognises an adjacent "/" and "%" can be combined using that.
106    Elsewhere "/" and "%" are either separate instructions, or separate
107    libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
108    A multiply and subtract should be faster than a "%" in those cases.  */
109 #if HAVE_HOST_CPU_FAMILY_x86            \
110   || HAVE_HOST_CPU_m68020               \
111   || HAVE_HOST_CPU_m68030               \
112   || HAVE_HOST_CPU_m68040               \
113   || HAVE_HOST_CPU_m68060               \
114   || HAVE_HOST_CPU_m68360 /* CPU32 */
115 #define udiv_qrnd_unnorm(q,r,n,d)       \
116   do {                                  \
117     mp_limb_t  __q = (n) / (d);         \
118     mp_limb_t  __r = (n) % (d);         \
119     (q) = __q;                          \
120     (r) = __r;                          \
121   } while (0)
122 #else
123 #define udiv_qrnd_unnorm(q,r,n,d)       \
124   do {                                  \
125     mp_limb_t  __q = (n) / (d);         \
126     mp_limb_t  __r = (n) - __q*(d);     \
127     (q) = __q;                          \
128     (r) = __r;                          \
129   } while (0)
130 #endif
131 
132 
133 /* Convert {up,un} to a string in base base, and put the result in str.
134    Generate len characters, possibly padding with zeros to the left.  If len is
135    zero, generate as many characters as required.  Return a pointer immediately
136    after the last digit of the result string.  Complexity is O(un^2); intended
137    for small conversions.  */
138 static unsigned char *
139 mpn_sb_get_str (unsigned char *str, size_t len,
140 		mp_ptr up, mp_size_t un, int base)
141 {
142   mp_limb_t rl, ul;
143   unsigned char *s;
144   size_t l;
145   /* Allocate memory for largest possible string, given that we only get here
146      for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
147      base is 3.  7/11 is an approximation to 1/log2(3).  */
148 #if TUNE_PROGRAM_BUILD
149 #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
150 #else
151 #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
152 #endif
153   unsigned char buf[BUF_ALLOC];
154 #if TUNE_PROGRAM_BUILD
155   mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
156 #else
157   mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
158 #endif
159 
160   if (base == 10)
161     {
162       /* Special case code for base==10 so that the compiler has a chance to
163 	 optimize things.  */
164 
165       MPN_COPY (rp + 1, up, un);
166 
167       s = buf + BUF_ALLOC;
168       while (un > 1)
169 	{
170 	  int i;
171 	  mp_limb_t frac, digit;
172 	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
173 					 MP_BASES_BIG_BASE_10,
174 					 MP_BASES_BIG_BASE_INVERTED_10,
175 					 MP_BASES_NORMALIZATION_STEPS_10);
176 	  un -= rp[un] == 0;
177 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
178 	  s -= MP_BASES_CHARS_PER_LIMB_10;
179 #if HAVE_HOST_CPU_FAMILY_x86
180 	  /* The code below turns out to be a bit slower for x86 using gcc.
181 	     Use plain code.  */
182 	  i = MP_BASES_CHARS_PER_LIMB_10;
183 	  do
184 	    {
185 	      umul_ppmm (digit, frac, frac, 10);
186 	      *s++ = digit;
187 	    }
188 	  while (--i);
189 #else
190 	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
191 	     After a few umul_ppmm, we will have accumulated enough low zeros
192 	     to use a plain multiply.  */
193 	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
194 	    {
195 	      umul_ppmm (digit, frac, frac, 10);
196 	      *s++ = digit;
197 	    }
198 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
199 	    {
200 	      umul_ppmm (digit, frac, frac, 10);
201 	      *s++ = digit;
202 	    }
203 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
204 	    {
205 	      umul_ppmm (digit, frac, frac, 10);
206 	      *s++ = digit;
207 	    }
208 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
209 	    {
210 	      umul_ppmm (digit, frac, frac, 10);
211 	      *s++ = digit;
212 	    }
213 	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
214 					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
215 					     : 0));
216 	  frac = (frac + 0xf) >> 4;
217 	  do
218 	    {
219 	      frac *= 10;
220 	      digit = frac >> (GMP_LIMB_BITS - 4);
221 	      *s++ = digit;
222 	      frac &= (~(mp_limb_t) 0) >> 4;
223 	    }
224 	  while (--i);
225 #endif
226 	  s -= MP_BASES_CHARS_PER_LIMB_10;
227 	}
228 
229       ul = rp[1];
230       while (ul != 0)
231 	{
232 	  udiv_qrnd_unnorm (ul, rl, ul, 10);
233 	  *--s = rl;
234 	}
235     }
236   else /* not base 10 */
237     {
238       unsigned chars_per_limb;
239       mp_limb_t big_base, big_base_inverted;
240       unsigned normalization_steps;
241 
242       chars_per_limb = mp_bases[base].chars_per_limb;
243       big_base = mp_bases[base].big_base;
244       big_base_inverted = mp_bases[base].big_base_inverted;
245       count_leading_zeros (normalization_steps, big_base);
246 
247       MPN_COPY (rp + 1, up, un);
248 
249       s = buf + BUF_ALLOC;
250       while (un > 1)
251 	{
252 	  int i;
253 	  mp_limb_t frac;
254 	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
255 					 big_base, big_base_inverted,
256 					 normalization_steps);
257 	  un -= rp[un] == 0;
258 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
259 	  s -= chars_per_limb;
260 	  i = chars_per_limb;
261 	  do
262 	    {
263 	      mp_limb_t digit;
264 	      umul_ppmm (digit, frac, frac, base);
265 	      *s++ = digit;
266 	    }
267 	  while (--i);
268 	  s -= chars_per_limb;
269 	}
270 
271       ul = rp[1];
272       while (ul != 0)
273 	{
274 	  udiv_qrnd_unnorm (ul, rl, ul, base);
275 	  *--s = rl;
276 	}
277     }
278 
279   l = buf + BUF_ALLOC - s;
280   while (l < len)
281     {
282       *str++ = 0;
283       len--;
284     }
285   while (l != 0)
286     {
287       *str++ = *s++;
288       l--;
289     }
290   return str;
291 }
292 
293 
294 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
295    the string in STR.  Generate LEN characters, possibly padding with zeros to
296    the left.  If LEN is zero, generate as many characters as required.
297    Return a pointer immediately after the last digit of the result string.
298    This uses divide-and-conquer and is intended for large conversions.  */
299 static unsigned char *
300 mpn_dc_get_str (unsigned char *str, size_t len,
301 		mp_ptr up, mp_size_t un,
302 		const powers_t *powtab, mp_ptr tmp)
303 {
304   if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
305     {
306       if (un != 0)
307 	str = mpn_sb_get_str (str, len, up, un, powtab->base);
308       else
309 	{
310 	  while (len != 0)
311 	    {
312 	      *str++ = 0;
313 	      len--;
314 	    }
315 	}
316     }
317   else
318     {
319       mp_ptr pwp, qp, rp;
320       mp_size_t pwn, qn;
321       mp_size_t sn;
322 
323       pwp = powtab->p;
324       pwn = powtab->n;
325       sn = powtab->shift;
326 
327       if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
328 	{
329 	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
330 	}
331       else
332 	{
333 	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
334 	  rp = up;		/* pwn limbs for rp; overwrite up area */
335 
336 	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
337 	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
338 
339 	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
340 
341 	  if (len != 0)
342 	    len = len - powtab->digits_in_base;
343 
344 	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
345 	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
346 	}
347     }
348   return str;
349 }
350 
351 
352 /* There are no leading zeros on the digits generated at str, but that's not
353    currently a documented feature.  */
354 
355 size_t
356 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
357 {
358   mp_ptr powtab_mem, powtab_mem_ptr;
359   mp_limb_t big_base;
360   size_t digits_in_base;
361   powers_t powtab[GMP_LIMB_BITS];
362   int pi;
363   mp_size_t n;
364   mp_ptr p, t;
365   size_t out_len;
366   mp_ptr tmp;
367   TMP_DECL;
368 
369   /* Special case zero, as the code below doesn't handle it.  */
370   if (un == 0)
371     {
372       str[0] = 0;
373       return 1;
374     }
375 
376   if (POW2_P (base))
377     {
378       /* The base is a power of 2.  Convert from most significant end.  */
379       mp_limb_t n1, n0;
380       int bits_per_digit = mp_bases[base].big_base;
381       int cnt;
382       int bit_pos;
383       mp_size_t i;
384       unsigned char *s = str;
385       mp_bitcnt_t bits;
386 
387       n1 = up[un - 1];
388       count_leading_zeros (cnt, n1);
389 
390       /* BIT_POS should be R when input ends in least significant nibble,
391 	 R + bits_per_digit * n when input ends in nth least significant
392 	 nibble. */
393 
394       bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
395       cnt = bits % bits_per_digit;
396       if (cnt != 0)
397 	bits += bits_per_digit - cnt;
398       bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
399 
400       /* Fast loop for bit output.  */
401       i = un - 1;
402       for (;;)
403 	{
404 	  bit_pos -= bits_per_digit;
405 	  while (bit_pos >= 0)
406 	    {
407 	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
408 	      bit_pos -= bits_per_digit;
409 	    }
410 	  i--;
411 	  if (i < 0)
412 	    break;
413 	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
414 	  n1 = up[i];
415 	  bit_pos += GMP_NUMB_BITS;
416 	  *s++ = n0 | (n1 >> bit_pos);
417 	}
418 
419       return s - str;
420     }
421 
422   /* General case.  The base is not a power of 2.  */
423 
424   if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
425     return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
426 
427   TMP_MARK;
428 
429   /* Allocate one large block for the powers of big_base.  */
430   powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));
431   powtab_mem_ptr = powtab_mem;
432 
433   /* Compute a table of powers, were the largest power is >= sqrt(U).  */
434 
435   big_base = mp_bases[base].big_base;
436   digits_in_base = mp_bases[base].chars_per_limb;
437 
438   {
439     mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
440     mp_limb_t cy;
441     mp_size_t shift;
442 
443     n_pows = 0;
444     xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
445     for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
446       {
447 	exptab[n_pows] = pn;
448 	n_pows++;
449       }
450     exptab[n_pows] = 1;
451 
452     powtab[0].p = &big_base;
453     powtab[0].n = 1;
454     powtab[0].digits_in_base = digits_in_base;
455     powtab[0].base = base;
456     powtab[0].shift = 0;
457 
458     powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
459     powtab[1].p[0] = big_base;
460     powtab[1].n = 1;
461     powtab[1].digits_in_base = digits_in_base;
462     powtab[1].base = base;
463     powtab[1].shift = 0;
464 
465     n = 1;
466     p = &big_base;
467     bexp = 1;
468     shift = 0;
469     for (pi = 2; pi < n_pows; pi++)
470       {
471 	t = powtab_mem_ptr;
472 	powtab_mem_ptr += 2 * n + 2;
473 
474 	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
475 
476 	mpn_sqr (t, p, n);
477 
478 	digits_in_base *= 2;
479 	n *= 2;  n -= t[n - 1] == 0;
480 	bexp *= 2;
481 
482 	if (bexp + 1 < exptab[n_pows - pi])
483 	  {
484 	    digits_in_base += mp_bases[base].chars_per_limb;
485 	    cy = mpn_mul_1 (t, t, n, big_base);
486 	    t[n] = cy;
487 	    n += cy != 0;
488 	    bexp += 1;
489 	  }
490 	shift *= 2;
491 	/* Strip low zero limbs.  */
492 	while (t[0] == 0)
493 	  {
494 	    t++;
495 	    n--;
496 	    shift++;
497 	  }
498 	p = t;
499 	powtab[pi].p = p;
500 	powtab[pi].n = n;
501 	powtab[pi].digits_in_base = digits_in_base;
502 	powtab[pi].base = base;
503 	powtab[pi].shift = shift;
504       }
505 
506     for (pi = 1; pi < n_pows; pi++)
507       {
508 	t = powtab[pi].p;
509 	n = powtab[pi].n;
510 	cy = mpn_mul_1 (t, t, n, big_base);
511 	t[n] = cy;
512 	n += cy != 0;
513 	if (t[0] == 0)
514 	  {
515 	    powtab[pi].p = t + 1;
516 	    n--;
517 	    powtab[pi].shift++;
518 	  }
519 	powtab[pi].n = n;
520 	powtab[pi].digits_in_base += mp_bases[base].chars_per_limb;
521       }
522 
523 #if 0
524     { int i;
525       printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
526       for (i = 0; i < n_pows; i++)
527 	printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);
528     }
529 #endif
530   }
531 
532   /* Using our precomputed powers, now in powtab[], convert our number.  */
533   tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
534   out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
535   TMP_FREE;
536 
537   return out_len;
538 }
539