1 /* factor -- print prime factors of n.
2    Copyright (C) 1986-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18    Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19    Arbitrary-precision code adapted by James Youngman from Torbjörn
20    Granlund's factorize.c, from GNU MP version 4.2.2.
21    In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22    Contains code from GNU MP.  */
23 
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25    or, with GMP, numbers of any size.
26 
27   Code organisation:
28 
29     There are several variants of many functions, for handling one word, two
30     words, and GMP's mpz_t type.  If the one-word variant is called foo, the
31     two-word variant will be foo2, and the one for mpz_t will be mp_foo.  In
32     some cases, the plain function variants will handle both one-word and
33     two-word numbers, evidenced by function arguments.
34 
35     The factoring code for two words will fall into the code for one word when
36     progress allows that.
37 
38     Using GMP is optional.  Define HAVE_GMP to make this code include GMP
39     factoring code.  The GMP factoring code is based on GMP's demos/factorize.c
40     (last synced 2012-09-07).  The GMP-based factoring code will stay in GMP
41     factoring code even if numbers get small enough for using the two-word
42     code.
43 
44   Algorithm:
45 
46     (1) Perform trial division using a small primes table, but without hardware
47         division since the primes table store inverses modulo the word base.
48         (The GMP variant of this code doesn't make use of the precomputed
49         inverses, but instead relies on GMP for fast divisibility testing.)
50     (2) Check the nature of any non-factored part using Miller-Rabin for
51         detecting composites, and Lucas for detecting primes.
52     (3) Factor any remaining composite part using the Pollard-Brent rho
53         algorithm or if USE_SQUFOF is defined to 1, try that first.
54         Status of found factors are checked again using Miller-Rabin and Lucas.
55 
56     We prefer using Hensel norm in the divisions, not the more familiar
57     Euclidian norm, since the former leads to much faster code.  In the
58     Pollard-Brent rho code and the prime testing code, we use Montgomery's
59     trick of multiplying all n-residues by the word base, allowing cheap Hensel
60     reductions mod n.
61 
62   Improvements:
63 
64     * Use modular inverses also for exact division in the Lucas code, and
65       elsewhere.  A problem is to locate the inverses not from an index, but
66       from a prime.  We might instead compute the inverse on-the-fly.
67 
68     * Tune trial division table size (not forgetting that this is a standalone
69       program where the table will be read from disk for each invocation).
70 
71     * Implement less naive powm, using k-ary exponentiation for k = 3 or
72       perhaps k = 4.
73 
74     * Try to speed trial division code for single uintmax_t numbers, i.e., the
75       code using DIVBLOCK.  It currently runs at 2 cycles per prime (Intel SBR,
76       IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77       using gcc 4.6 and 4.7.  Some software pipelining should help; 1, 2, and 4
78       respectively cycles ought to be possible.
79 
80     * The redcify function could be vastly improved by using (plain Euclidian)
81       pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82       GMP's gmp-impl.h).  The redcify2 function could be vastly improved using
83       similar methoods.  These functions currently dominate run time when using
84       the -w option.
85 */
86 
87 /* Whether to recursively factor to prove primality,
88    or run faster probabilistic tests.  */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
92 
93 /* Faster for certain ranges but less general.  */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
97 
98 /* Output SQUFOF statistics.  */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
102 
103 
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #if HAVE_GMP
108 # include <gmp.h>
109 # if !HAVE_DECL_MPZ_INITS
110 #  include <stdarg.h>
111 # endif
112 #endif
113 
114 #include <assert.h>
115 
116 #include "system.h"
117 #include "die.h"
118 #include "error.h"
119 #include "full-write.h"
120 #include "quote.h"
121 #include "readtokens.h"
122 #include "xstrtol.h"
123 
124 /* The official name of this program (e.g., no 'g' prefix).  */
125 #define PROGRAM_NAME "factor"
126 
127 #define AUTHORS \
128   proper_name ("Paul Rubin"),                                           \
129   proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"),   \
130   proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
131 
132 /* Token delimiters when reading from a file.  */
133 #define DELIM "\n\t "
134 
135 #ifndef USE_LONGLONG_H
136 /* With the way we use longlong.h, it's only safe to use
137    when UWtype = UHWtype, as there were various cases
138    (as can be seen in the history for longlong.h) where
139    for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
140    to avoid compile time or run time issues.  */
141 # if LONG_MAX == INTMAX_MAX
142 #  define USE_LONGLONG_H 1
143 # endif
144 #endif
145 
146 #if USE_LONGLONG_H
147 
148 /* Make definitions for longlong.h to make it do what it can do for us */
149 
150 /* bitcount for uintmax_t */
151 # if UINTMAX_MAX == UINT32_MAX
152 #  define W_TYPE_SIZE 32
153 # elif UINTMAX_MAX == UINT64_MAX
154 #  define W_TYPE_SIZE 64
155 # elif UINTMAX_MAX == UINT128_MAX
156 #  define W_TYPE_SIZE 128
157 # endif
158 
159 # define UWtype  uintmax_t
160 # define UHWtype unsigned long int
161 # undef UDWtype
162 # if HAVE_ATTRIBUTE_MODE
163 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
164 typedef          int SItype     __attribute__ ((mode (SI)));
165 typedef unsigned int USItype    __attribute__ ((mode (SI)));
166 typedef          int DItype     __attribute__ ((mode (DI)));
167 typedef unsigned int UDItype    __attribute__ ((mode (DI)));
168 # else
169 typedef unsigned char UQItype;
170 typedef          long SItype;
171 typedef unsigned long int USItype;
172 #  if HAVE_LONG_LONG_INT
173 typedef long long int DItype;
174 typedef unsigned long long int UDItype;
175 #  else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
176 typedef long int DItype;
177 typedef unsigned long int UDItype;
178 #  endif
179 # endif
180 # define LONGLONG_STANDALONE     /* Don't require GMP's longlong.h mdep files */
181 # define ASSERT(x)               /* FIXME make longlong.h really standalone */
182 # define __GMP_DECLSPEC          /* FIXME make longlong.h really standalone */
183 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
184 # ifndef __GMP_GNUC_PREREQ
185 #  define __GMP_GNUC_PREREQ(a,b) 1
186 # endif
187 
188 /* These stub macros are only used in longlong.h in certain system compiler
189    combinations, so ensure usage to avoid -Wunused-macros warnings.  */
190 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
191 ASSERT (1)
192 __GMP_DECLSPEC
193 # endif
194 
195 # if _ARCH_PPC
196 #  define HAVE_HOST_CPU_FAMILY_powerpc 1
197 # endif
198 # include "longlong.h"
199 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
200 const unsigned char factor_clz_tab[129] =
201 {
202   1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
203   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
204   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
205   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
206   9
207 };
208 # endif
209 
210 #else /* not USE_LONGLONG_H */
211 
212 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
213 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
214 # define __ll_lowpart(t)  ((uintmax_t) (t) & (__ll_B - 1))
215 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
216 
217 #endif
218 
219 #if !defined __clz_tab && !defined UHWtype
220 /* Without this seemingly useless conditional, gcc -Wunused-macros
221    warns that each of the two tested macros is unused on Fedora 18.
222    FIXME: this is just an ugly band-aid.  Fix it properly.  */
223 #endif
224 
225 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
226 #define MAX_NFACTS 26
227 
228 enum
229 {
230   DEV_DEBUG_OPTION = CHAR_MAX + 1
231 };
232 
233 static struct option const long_options[] =
234 {
235   {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
236   {GETOPT_HELP_OPTION_DECL},
237   {GETOPT_VERSION_OPTION_DECL},
238   {NULL, 0, NULL, 0}
239 };
240 
241 struct factors
242 {
243   uintmax_t     plarge[2]; /* Can have a single large factor */
244   uintmax_t     p[MAX_NFACTS];
245   unsigned char e[MAX_NFACTS];
246   unsigned char nfactors;
247 };
248 
249 #if HAVE_GMP
250 struct mp_factors
251 {
252   mpz_t             *p;
253   unsigned long int *e;
254   unsigned long int nfactors;
255 };
256 #endif
257 
258 static void factor (uintmax_t, uintmax_t, struct factors *);
259 
260 #ifndef umul_ppmm
261 # define umul_ppmm(w1, w0, u, v)                                        \
262   do {                                                                  \
263     uintmax_t __x0, __x1, __x2, __x3;                                   \
264     unsigned long int __ul, __vl, __uh, __vh;                           \
265     uintmax_t __u = (u), __v = (v);                                     \
266                                                                         \
267     __ul = __ll_lowpart (__u);                                          \
268     __uh = __ll_highpart (__u);                                         \
269     __vl = __ll_lowpart (__v);                                          \
270     __vh = __ll_highpart (__v);                                         \
271                                                                         \
272     __x0 = (uintmax_t) __ul * __vl;                                     \
273     __x1 = (uintmax_t) __ul * __vh;                                     \
274     __x2 = (uintmax_t) __uh * __vl;                                     \
275     __x3 = (uintmax_t) __uh * __vh;                                     \
276                                                                         \
277     __x1 += __ll_highpart (__x0);/* this can't give carry */            \
278     __x1 += __x2;               /* but this indeed can */               \
279     if (__x1 < __x2)            /* did we get it? */                    \
280       __x3 += __ll_B;           /* yes, add it in the proper pos. */    \
281                                                                         \
282     (w1) = __x3 + __ll_highpart (__x1);                                 \
283     (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0);             \
284   } while (0)
285 #endif
286 
287 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
288 /* Define our own, not needing normalization. This function is
289    currently not performance critical, so keep it simple. Similar to
290    the mod macro below. */
291 # undef udiv_qrnnd
292 # define udiv_qrnnd(q, r, n1, n0, d)                                    \
293   do {                                                                  \
294     uintmax_t __d1, __d0, __q, __r1, __r0;                              \
295                                                                         \
296     assert ((n1) < (d));                                                \
297     __d1 = (d); __d0 = 0;                                               \
298     __r1 = (n1); __r0 = (n0);                                           \
299     __q = 0;                                                            \
300     for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--)                \
301       {                                                                 \
302         rsh2 (__d1, __d0, __d1, __d0, 1);                               \
303         __q <<= 1;                                                      \
304         if (ge2 (__r1, __r0, __d1, __d0))                               \
305           {                                                             \
306             __q++;                                                      \
307             sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0);            \
308           }                                                             \
309       }                                                                 \
310     (r) = __r0;                                                         \
311     (q) = __q;                                                          \
312   } while (0)
313 #endif
314 
315 #if !defined add_ssaaaa
316 # define add_ssaaaa(sh, sl, ah, al, bh, bl)                             \
317   do {                                                                  \
318     uintmax_t _add_x;                                                   \
319     _add_x = (al) + (bl);                                               \
320     (sh) = (ah) + (bh) + (_add_x < (al));                               \
321     (sl) = _add_x;                                                      \
322   } while (0)
323 #endif
324 
325 #define rsh2(rh, rl, ah, al, cnt)                                       \
326   do {                                                                  \
327     (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt));           \
328     (rh) = (ah) >> (cnt);                                               \
329   } while (0)
330 
331 #define lsh2(rh, rl, ah, al, cnt)                                       \
332   do {                                                                  \
333     (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt)));             \
334     (rl) = (al) << (cnt);                                               \
335   } while (0)
336 
337 #define ge2(ah, al, bh, bl)                                             \
338   ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
339 
340 #define gt2(ah, al, bh, bl)                                             \
341   ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
342 
343 #ifndef sub_ddmmss
344 # define sub_ddmmss(rh, rl, ah, al, bh, bl)                             \
345   do {                                                                  \
346     uintmax_t _cy;                                                      \
347     _cy = (al) < (bl);                                                  \
348     (rl) = (al) - (bl);                                                 \
349     (rh) = (ah) - (bh) - _cy;                                           \
350   } while (0)
351 #endif
352 
353 #ifndef count_leading_zeros
354 # define count_leading_zeros(count, x) do {                             \
355     uintmax_t __clz_x = (x);                                            \
356     unsigned int __clz_c;                                               \
357     for (__clz_c = 0;                                                   \
358          (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0;      \
359          __clz_c += 8)                                                  \
360       __clz_x <<= 8;                                                    \
361     for (; (intmax_t)__clz_x >= 0; __clz_c++)                           \
362       __clz_x <<= 1;                                                    \
363     (count) = __clz_c;                                                  \
364   } while (0)
365 #endif
366 
367 #ifndef count_trailing_zeros
368 # define count_trailing_zeros(count, x) do {                            \
369     uintmax_t __ctz_x = (x);                                            \
370     unsigned int __ctz_c = 0;                                           \
371     while ((__ctz_x & 1) == 0)                                          \
372       {                                                                 \
373         __ctz_x >>= 1;                                                  \
374         __ctz_c++;                                                      \
375       }                                                                 \
376     (count) = __ctz_c;                                                  \
377   } while (0)
378 #endif
379 
380 /* Requires that a < n and b <= n */
381 #define submod(r,a,b,n)                                                 \
382   do {                                                                  \
383     uintmax_t _t = - (uintmax_t) (a < b);                               \
384     (r) = ((n) & _t) + (a) - (b);                                       \
385   } while (0)
386 
387 #define addmod(r,a,b,n)                                                 \
388   submod ((r), (a), ((n) - (b)), (n))
389 
390 /* Modular two-word addition and subtraction.  For performance reasons, the
391    most significant bit of n1 must be clear.  The destination variables must be
392    distinct from the mod operand.  */
393 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0)                         \
394   do {                                                                  \
395     add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0));                    \
396     if (ge2 ((r1), (r0), (n1), (n0)))                                   \
397       sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0));                  \
398   } while (0)
399 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0)                         \
400   do {                                                                  \
401     sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0));                    \
402     if ((intmax_t) (r1) < 0)                                            \
403       add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0));                  \
404   } while (0)
405 
406 #define HIGHBIT_TO_MASK(x)                                              \
407   (((intmax_t)-1 >> 1) < 0                                              \
408    ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1))                    \
409    : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1))                        \
410       ? UINTMAX_MAX : (uintmax_t) 0))
411 
412 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
413    Requires that d1 != 0.  */
414 static uintmax_t
mod2(uintmax_t * r1,uintmax_t a1,uintmax_t a0,uintmax_t d1,uintmax_t d0)415 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
416 {
417   int cntd, cnta;
418 
419   assert (d1 != 0);
420 
421   if (a1 == 0)
422     {
423       *r1 = 0;
424       return a0;
425     }
426 
427   count_leading_zeros (cntd, d1);
428   count_leading_zeros (cnta, a1);
429   int cnt = cntd - cnta;
430   lsh2 (d1, d0, d1, d0, cnt);
431   for (int i = 0; i < cnt; i++)
432     {
433       if (ge2 (a1, a0, d1, d0))
434         sub_ddmmss (a1, a0, a1, a0, d1, d0);
435       rsh2 (d1, d0, d1, d0, 1);
436     }
437 
438   *r1 = a1;
439   return a0;
440 }
441 
442 static uintmax_t _GL_ATTRIBUTE_CONST
gcd_odd(uintmax_t a,uintmax_t b)443 gcd_odd (uintmax_t a, uintmax_t b)
444 {
445   if ( (b & 1) == 0)
446     {
447       uintmax_t t = b;
448       b = a;
449       a = t;
450     }
451   if (a == 0)
452     return b;
453 
454   /* Take out least significant one bit, to make room for sign */
455   b >>= 1;
456 
457   for (;;)
458     {
459       uintmax_t t;
460       uintmax_t bgta;
461 
462       while ((a & 1) == 0)
463         a >>= 1;
464       a >>= 1;
465 
466       t = a - b;
467       if (t == 0)
468         return (a << 1) + 1;
469 
470       bgta = HIGHBIT_TO_MASK (t);
471 
472       /* b <-- min (a, b) */
473       b += (bgta & t);
474 
475       /* a <-- |a - b| */
476       a = (t ^ bgta) - bgta;
477     }
478 }
479 
480 static uintmax_t
gcd2_odd(uintmax_t * r1,uintmax_t a1,uintmax_t a0,uintmax_t b1,uintmax_t b0)481 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
482 {
483   assert (b0 & 1);
484 
485   if ( (a0 | a1) == 0)
486     {
487       *r1 = b1;
488       return b0;
489     }
490 
491   while ((a0 & 1) == 0)
492     rsh2 (a1, a0, a1, a0, 1);
493 
494   for (;;)
495     {
496       if ((b1 | a1) == 0)
497         {
498           *r1 = 0;
499           return gcd_odd (b0, a0);
500         }
501 
502       if (gt2 (a1, a0, b1, b0))
503         {
504           sub_ddmmss (a1, a0, a1, a0, b1, b0);
505           do
506             rsh2 (a1, a0, a1, a0, 1);
507           while ((a0 & 1) == 0);
508         }
509       else if (gt2 (b1, b0, a1, a0))
510         {
511           sub_ddmmss (b1, b0, b1, b0, a1, a0);
512           do
513             rsh2 (b1, b0, b1, b0, 1);
514           while ((b0 & 1) == 0);
515         }
516       else
517         break;
518     }
519 
520   *r1 = a1;
521   return a0;
522 }
523 
524 static void
factor_insert_multiplicity(struct factors * factors,uintmax_t prime,unsigned int m)525 factor_insert_multiplicity (struct factors *factors,
526                             uintmax_t prime, unsigned int m)
527 {
528   unsigned int nfactors = factors->nfactors;
529   uintmax_t *p = factors->p;
530   unsigned char *e = factors->e;
531 
532   /* Locate position for insert new or increment e.  */
533   int i;
534   for (i = nfactors - 1; i >= 0; i--)
535     {
536       if (p[i] <= prime)
537         break;
538     }
539 
540   if (i < 0 || p[i] != prime)
541     {
542       for (int j = nfactors - 1; j > i; j--)
543         {
544           p[j + 1] = p[j];
545           e[j + 1] = e[j];
546         }
547       p[i + 1] = prime;
548       e[i + 1] = m;
549       factors->nfactors = nfactors + 1;
550     }
551   else
552     {
553       e[i] += m;
554     }
555 }
556 
557 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
558 
559 static void
factor_insert_large(struct factors * factors,uintmax_t p1,uintmax_t p0)560 factor_insert_large (struct factors *factors,
561                      uintmax_t p1, uintmax_t p0)
562 {
563   if (p1 > 0)
564     {
565       assert (factors->plarge[1] == 0);
566       factors->plarge[0] = p0;
567       factors->plarge[1] = p1;
568     }
569   else
570     factor_insert (factors, p0);
571 }
572 
573 #if HAVE_GMP
574 
575 # if !HAVE_DECL_MPZ_INITS
576 
577 #  define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
578 #  define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
579 
580 static void
mpz_va_init(void (* mpz_single_init)(mpz_t),...)581 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
582 {
583   va_list ap;
584 
585   va_start (ap, mpz_single_init);
586 
587   mpz_t *mpz;
588   while ((mpz = va_arg (ap, mpz_t *)))
589     mpz_single_init (*mpz);
590 
591   va_end (ap);
592 }
593 # endif
594 
595 static void mp_factor (mpz_t, struct mp_factors *);
596 
597 static void
mp_factor_init(struct mp_factors * factors)598 mp_factor_init (struct mp_factors *factors)
599 {
600   factors->p = NULL;
601   factors->e = NULL;
602   factors->nfactors = 0;
603 }
604 
605 static void
mp_factor_clear(struct mp_factors * factors)606 mp_factor_clear (struct mp_factors *factors)
607 {
608   for (unsigned int i = 0; i < factors->nfactors; i++)
609     mpz_clear (factors->p[i]);
610 
611   free (factors->p);
612   free (factors->e);
613 }
614 
615 static void
mp_factor_insert(struct mp_factors * factors,mpz_t prime)616 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
617 {
618   unsigned long int nfactors = factors->nfactors;
619   mpz_t         *p  = factors->p;
620   unsigned long int *e  = factors->e;
621   long i;
622 
623   /* Locate position for insert new or increment e.  */
624   for (i = nfactors - 1; i >= 0; i--)
625     {
626       if (mpz_cmp (p[i], prime) <= 0)
627         break;
628     }
629 
630   if (i < 0 || mpz_cmp (p[i], prime) != 0)
631     {
632       p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
633       e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
634 
635       mpz_init (p[nfactors]);
636       for (long j = nfactors - 1; j > i; j--)
637         {
638           mpz_set (p[j + 1], p[j]);
639           e[j + 1] = e[j];
640         }
641       mpz_set (p[i + 1], prime);
642       e[i + 1] = 1;
643 
644       factors->p = p;
645       factors->e = e;
646       factors->nfactors = nfactors + 1;
647     }
648   else
649     {
650       e[i] += 1;
651     }
652 }
653 
654 static void
mp_factor_insert_ui(struct mp_factors * factors,unsigned long int prime)655 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
656 {
657   mpz_t pz;
658 
659   mpz_init_set_ui (pz, prime);
660   mp_factor_insert (factors, pz);
661   mpz_clear (pz);
662 }
663 #endif /* HAVE_GMP */
664 
665 
666 /* Number of bits in an uintmax_t.  */
667 enum { W = sizeof (uintmax_t) * CHAR_BIT };
668 
669 /* Verify that uintmax_t does not have holes in its representation.  */
670 verify (UINTMAX_MAX >> (W - 1) == 1);
671 
672 #define P(a,b,c,d) a,
673 static const unsigned char primes_diff[] = {
674 #include "primes.h"
675 0,0,0,0,0,0,0                           /* 7 sentinels for 8-way loop */
676 };
677 #undef P
678 
679 #define PRIMES_PTAB_ENTRIES \
680   (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
681 
682 #define P(a,b,c,d) b,
683 static const unsigned char primes_diff8[] = {
684 #include "primes.h"
685 0,0,0,0,0,0,0                           /* 7 sentinels for 8-way loop */
686 };
687 #undef P
688 
689 struct primes_dtab
690 {
691   uintmax_t binv, lim;
692 };
693 
694 #define P(a,b,c,d) {c,d},
695 static const struct primes_dtab primes_dtab[] = {
696 #include "primes.h"
697 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
698 };
699 #undef P
700 
701 /* Verify that uintmax_t is not wider than
702    the integers used to generate primes.h.  */
703 verify (W <= WIDE_UINT_BITS);
704 
705 /* debugging for developers.  Enables devmsg().
706    This flag is used only in the GMP code.  */
707 static bool dev_debug = false;
708 
709 /* Prove primality or run probabilistic tests.  */
710 static bool flag_prove_primality = PROVE_PRIMALITY;
711 
712 /* Number of Miller-Rabin tests to run when not proving primality. */
713 #define MR_REPS 25
714 
715 static void
factor_insert_refind(struct factors * factors,uintmax_t p,unsigned int i,unsigned int off)716 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
717                       unsigned int off)
718 {
719   for (unsigned int j = 0; j < off; j++)
720     p += primes_diff[i + j];
721   factor_insert (factors, p);
722 }
723 
724 /* Trial division with odd primes uses the following trick.
725 
726    Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
727    consider the case t < B (this is the second loop below).
728 
729    From our tables we get
730 
731      binv = p^{-1} (mod B)
732      lim = floor ( (B-1) / p ).
733 
734    First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
735    (and all quotients in this range occur for some t).
736 
737    Then t = q * p is true also (mod B), and p is invertible we get
738 
739      q = t * binv (mod B).
740 
741    Next, assume that t is *not* divisible by p. Since multiplication
742    by binv (mod B) is a one-to-one mapping,
743 
744      t * binv (mod B) > lim,
745 
746    because all the smaller values are already taken.
747 
748    This can be summed up by saying that the function
749 
750      q(t) = binv * t (mod B)
751 
752    is a permutation of the range 0 <= t < B, with the curious property
753    that it maps the multiples of p onto the range 0 <= q <= lim, in
754    order, and the non-multiples of p onto the range lim < q < B.
755  */
756 
757 static uintmax_t
factor_using_division(uintmax_t * t1p,uintmax_t t1,uintmax_t t0,struct factors * factors)758 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
759                        struct factors *factors)
760 {
761   if (t0 % 2 == 0)
762     {
763       unsigned int cnt;
764 
765       if (t0 == 0)
766         {
767           count_trailing_zeros (cnt, t1);
768           t0 = t1 >> cnt;
769           t1 = 0;
770           cnt += W_TYPE_SIZE;
771         }
772       else
773         {
774           count_trailing_zeros (cnt, t0);
775           rsh2 (t1, t0, t1, t0, cnt);
776         }
777 
778       factor_insert_multiplicity (factors, 2, cnt);
779     }
780 
781   uintmax_t p = 3;
782   unsigned int i;
783   for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
784     {
785       for (;;)
786         {
787           uintmax_t q1, q0, hi, lo _GL_UNUSED;
788 
789           q0 = t0 * primes_dtab[i].binv;
790           umul_ppmm (hi, lo, q0, p);
791           if (hi > t1)
792             break;
793           hi = t1 - hi;
794           q1 = hi * primes_dtab[i].binv;
795           if (LIKELY (q1 > primes_dtab[i].lim))
796             break;
797           t1 = q1; t0 = q0;
798           factor_insert (factors, p);
799         }
800       p += primes_diff[i + 1];
801     }
802   if (t1p)
803     *t1p = t1;
804 
805 #define DIVBLOCK(I)                                                     \
806   do {                                                                  \
807     for (;;)                                                            \
808       {                                                                 \
809         q = t0 * pd[I].binv;                                            \
810         if (LIKELY (q > pd[I].lim))                                     \
811           break;                                                        \
812         t0 = q;                                                         \
813         factor_insert_refind (factors, p, i + 1, I);                    \
814       }                                                                 \
815   } while (0)
816 
817   for (; i < PRIMES_PTAB_ENTRIES; i += 8)
818     {
819       uintmax_t q;
820       const struct primes_dtab *pd = &primes_dtab[i];
821       DIVBLOCK (0);
822       DIVBLOCK (1);
823       DIVBLOCK (2);
824       DIVBLOCK (3);
825       DIVBLOCK (4);
826       DIVBLOCK (5);
827       DIVBLOCK (6);
828       DIVBLOCK (7);
829 
830       p += primes_diff8[i];
831       if (p * p > t0)
832         break;
833     }
834 
835   return t0;
836 }
837 
838 #if HAVE_GMP
839 static void
mp_factor_using_division(mpz_t t,struct mp_factors * factors)840 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
841 {
842   mpz_t q;
843   unsigned long int p;
844 
845   devmsg ("[trial division] ");
846 
847   mpz_init (q);
848 
849   p = mpz_scan1 (t, 0);
850   mpz_div_2exp (t, t, p);
851   while (p)
852     {
853       mp_factor_insert_ui (factors, 2);
854       --p;
855     }
856 
857   p = 3;
858   for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
859     {
860       if (! mpz_divisible_ui_p (t, p))
861         {
862           p += primes_diff[i++];
863           if (mpz_cmp_ui (t, p * p) < 0)
864             break;
865         }
866       else
867         {
868           mpz_tdiv_q_ui (t, t, p);
869           mp_factor_insert_ui (factors, p);
870         }
871     }
872 
873   mpz_clear (q);
874 }
875 #endif
876 
877 /* Entry i contains (2i+1)^(-1) mod 2^8.  */
878 static const unsigned char  binvert_table[128] =
879 {
880   0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
881   0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
882   0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
883   0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
884   0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
885   0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
886   0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
887   0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
888   0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
889   0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
890   0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
891   0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
892   0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
893   0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
894   0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
895   0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
896 };
897 
898 /* Compute n^(-1) mod B, using a Newton iteration.  */
899 #define binv(inv,n)                                                     \
900   do {                                                                  \
901     uintmax_t  __n = (n);                                               \
902     uintmax_t  __inv;                                                   \
903                                                                         \
904     __inv = binvert_table[(__n / 2) & 0x7F]; /*  8 */                   \
905     if (W_TYPE_SIZE > 8)   __inv = 2 * __inv - __inv * __inv * __n;     \
906     if (W_TYPE_SIZE > 16)  __inv = 2 * __inv - __inv * __inv * __n;     \
907     if (W_TYPE_SIZE > 32)  __inv = 2 * __inv - __inv * __inv * __n;     \
908                                                                         \
909     if (W_TYPE_SIZE > 64)                                               \
910       {                                                                 \
911         int  __invbits = 64;                                            \
912         do {                                                            \
913           __inv = 2 * __inv - __inv * __inv * __n;                      \
914           __invbits *= 2;                                               \
915         } while (__invbits < W_TYPE_SIZE);                              \
916       }                                                                 \
917                                                                         \
918     (inv) = __inv;                                                      \
919   } while (0)
920 
921 /* q = u / d, assuming d|u.  */
922 #define divexact_21(q1, q0, u1, u0, d)                                  \
923   do {                                                                  \
924     uintmax_t _di, _q0;                                                 \
925     binv (_di, (d));                                                    \
926     _q0 = (u0) * _di;                                                   \
927     if ((u1) >= (d))                                                    \
928       {                                                                 \
929         uintmax_t _p1, _p0 _GL_UNUSED;                            \
930         umul_ppmm (_p1, _p0, _q0, d);                                   \
931         (q1) = ((u1) - _p1) * _di;                                      \
932         (q0) = _q0;                                                     \
933       }                                                                 \
934     else                                                                \
935       {                                                                 \
936         (q0) = _q0;                                                     \
937         (q1) = 0;                                                       \
938       }                                                                 \
939   } while (0)
940 
941 /* x B (mod n). */
942 #define redcify(r_prim, r, n)                                           \
943   do {                                                                  \
944     uintmax_t _redcify_q _GL_UNUSED;                              \
945     udiv_qrnnd (_redcify_q, r_prim, r, 0, n);                           \
946   } while (0)
947 
948 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
949 #define redcify2(r1, r0, x, n1, n0)                                     \
950   do {                                                                  \
951     uintmax_t _r1, _r0, _i;                                             \
952     if ((x) < (n1))                                                     \
953       {                                                                 \
954         _r1 = (x); _r0 = 0;                                             \
955         _i = W_TYPE_SIZE;                                               \
956       }                                                                 \
957     else                                                                \
958       {                                                                 \
959         _r1 = 0; _r0 = (x);                                             \
960         _i = 2*W_TYPE_SIZE;                                             \
961       }                                                                 \
962     while (_i-- > 0)                                                    \
963       {                                                                 \
964         lsh2 (_r1, _r0, _r1, _r0, 1);                                   \
965         if (ge2 (_r1, _r0, (n1), (n0)))                                 \
966           sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0));                  \
967       }                                                                 \
968     (r1) = _r1;                                                         \
969     (r0) = _r0;                                                         \
970   } while (0)
971 
972 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
973    Both a and b must be in redc form, the result will be in redc form too. */
974 static inline uintmax_t
mulredc(uintmax_t a,uintmax_t b,uintmax_t m,uintmax_t mi)975 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
976 {
977   uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh;
978 
979   umul_ppmm (rh, rl, a, b);
980   q = rl * mi;
981   umul_ppmm (th, tl, q, m);
982   xh = rh - th;
983   if (rh < th)
984     xh += m;
985 
986   return xh;
987 }
988 
989 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
990    Both a and b must be in redc form, the result will be in redc form too.
991    For performance reasons, the most significant bit of m must be clear. */
992 static uintmax_t
mulredc2(uintmax_t * r1p,uintmax_t a1,uintmax_t a0,uintmax_t b1,uintmax_t b0,uintmax_t m1,uintmax_t m0,uintmax_t mi)993 mulredc2 (uintmax_t *r1p,
994           uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
995           uintmax_t m1, uintmax_t m0, uintmax_t mi)
996 {
997   uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0;
998   mi = -mi;
999   assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
1000   assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
1001   assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
1002 
1003   /* First compute a0 * <b1, b0> B^{-1}
1004         +-----+
1005         |a0 b0|
1006      +--+--+--+
1007      |a0 b1|
1008      +--+--+--+
1009         |q0 m0|
1010      +--+--+--+
1011      |q0 m1|
1012     -+--+--+--+
1013      |r1|r0| 0|
1014      +--+--+--+
1015   */
1016   umul_ppmm (t1, t0, a0, b0);
1017   umul_ppmm (r1, r0, a0, b1);
1018   q = mi * t0;
1019   umul_ppmm (p1, p0, q, m0);
1020   umul_ppmm (s1, s0, q, m1);
1021   r0 += (t0 != 0); /* Carry */
1022   add_ssaaaa (r1, r0, r1, r0, 0, p1);
1023   add_ssaaaa (r1, r0, r1, r0, 0, t1);
1024   add_ssaaaa (r1, r0, r1, r0, s1, s0);
1025 
1026   /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1027         +-----+
1028         |a1 b0|
1029         +--+--+
1030         |r1|r0|
1031      +--+--+--+
1032      |a1 b1|
1033      +--+--+--+
1034         |q1 m0|
1035      +--+--+--+
1036      |q1 m1|
1037     -+--+--+--+
1038      |r1|r0| 0|
1039      +--+--+--+
1040   */
1041   umul_ppmm (t1, t0, a1, b0);
1042   umul_ppmm (s1, s0, a1, b1);
1043   add_ssaaaa (t1, t0, t1, t0, 0, r0);
1044   q = mi * t0;
1045   add_ssaaaa (r1, r0, s1, s0, 0, r1);
1046   umul_ppmm (p1, p0, q, m0);
1047   umul_ppmm (s1, s0, q, m1);
1048   r0 += (t0 != 0); /* Carry */
1049   add_ssaaaa (r1, r0, r1, r0, 0, p1);
1050   add_ssaaaa (r1, r0, r1, r0, 0, t1);
1051   add_ssaaaa (r1, r0, r1, r0, s1, s0);
1052 
1053   if (ge2 (r1, r0, m1, m0))
1054     sub_ddmmss (r1, r0, r1, r0, m1, m0);
1055 
1056   *r1p = r1;
1057   return r0;
1058 }
1059 
1060 static uintmax_t _GL_ATTRIBUTE_CONST
powm(uintmax_t b,uintmax_t e,uintmax_t n,uintmax_t ni,uintmax_t one)1061 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1062 {
1063   uintmax_t y = one;
1064 
1065   if (e & 1)
1066     y = b;
1067 
1068   while (e != 0)
1069     {
1070       b = mulredc (b, b, n, ni);
1071       e >>= 1;
1072 
1073       if (e & 1)
1074         y = mulredc (y, b, n, ni);
1075     }
1076 
1077   return y;
1078 }
1079 
1080 static uintmax_t
powm2(uintmax_t * r1m,const uintmax_t * bp,const uintmax_t * ep,const uintmax_t * np,uintmax_t ni,const uintmax_t * one)1081 powm2 (uintmax_t *r1m,
1082        const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1083        uintmax_t ni, const uintmax_t *one)
1084 {
1085   uintmax_t r1, r0, b1, b0, n1, n0;
1086   unsigned int i;
1087   uintmax_t e;
1088 
1089   b0 = bp[0];
1090   b1 = bp[1];
1091   n0 = np[0];
1092   n1 = np[1];
1093 
1094   r0 = one[0];
1095   r1 = one[1];
1096 
1097   for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1098     {
1099       if (e & 1)
1100         {
1101           r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1102           r1 = *r1m;
1103         }
1104       b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1105       b1 = *r1m;
1106     }
1107   for (e = ep[1]; e > 0; e >>= 1)
1108     {
1109       if (e & 1)
1110         {
1111           r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1112           r1 = *r1m;
1113         }
1114       b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1115       b1 = *r1m;
1116     }
1117   *r1m = r1;
1118   return r0;
1119 }
1120 
1121 static bool _GL_ATTRIBUTE_CONST
millerrabin(uintmax_t n,uintmax_t ni,uintmax_t b,uintmax_t q,unsigned int k,uintmax_t one)1122 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1123              unsigned int k, uintmax_t one)
1124 {
1125   uintmax_t y = powm (b, q, n, ni, one);
1126 
1127   uintmax_t nm1 = n - one;      /* -1, but in redc representation. */
1128 
1129   if (y == one || y == nm1)
1130     return true;
1131 
1132   for (unsigned int i = 1; i < k; i++)
1133     {
1134       y = mulredc (y, y, n, ni);
1135 
1136       if (y == nm1)
1137         return true;
1138       if (y == one)
1139         return false;
1140     }
1141   return false;
1142 }
1143 
1144 static bool
millerrabin2(const uintmax_t * np,uintmax_t ni,const uintmax_t * bp,const uintmax_t * qp,unsigned int k,const uintmax_t * one)1145 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1146               const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1147 {
1148   uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1149 
1150   y0 = powm2 (&r1m, bp, qp, np, ni, one);
1151   y1 = r1m;
1152 
1153   if (y0 == one[0] && y1 == one[1])
1154     return true;
1155 
1156   sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1157 
1158   if (y0 == nm1_0 && y1 == nm1_1)
1159     return true;
1160 
1161   for (unsigned int i = 1; i < k; i++)
1162     {
1163       y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1164       y1 = r1m;
1165 
1166       if (y0 == nm1_0 && y1 == nm1_1)
1167         return true;
1168       if (y0 == one[0] && y1 == one[1])
1169         return false;
1170     }
1171   return false;
1172 }
1173 
1174 #if HAVE_GMP
1175 static bool
mp_millerrabin(mpz_srcptr n,mpz_srcptr nm1,mpz_ptr x,mpz_ptr y,mpz_srcptr q,unsigned long int k)1176 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1177                 mpz_srcptr q, unsigned long int k)
1178 {
1179   mpz_powm (y, x, q, n);
1180 
1181   if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1182     return true;
1183 
1184   for (unsigned long int i = 1; i < k; i++)
1185     {
1186       mpz_powm_ui (y, y, 2, n);
1187       if (mpz_cmp (y, nm1) == 0)
1188         return true;
1189       if (mpz_cmp_ui (y, 1) == 0)
1190         return false;
1191     }
1192   return false;
1193 }
1194 #endif
1195 
1196 /* Lucas' prime test.  The number of iterations vary greatly, up to a few dozen
1197    have been observed.  The average seem to be about 2.  */
1198 static bool
prime_p(uintmax_t n)1199 prime_p (uintmax_t n)
1200 {
1201   int k;
1202   bool is_prime;
1203   uintmax_t a_prim, one, ni;
1204   struct factors factors;
1205 
1206   if (n <= 1)
1207     return false;
1208 
1209   /* We have already casted out small primes. */
1210   if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1211     return true;
1212 
1213   /* Precomputation for Miller-Rabin.  */
1214   uintmax_t q = n - 1;
1215   for (k = 0; (q & 1) == 0; k++)
1216     q >>= 1;
1217 
1218   uintmax_t a = 2;
1219   binv (ni, n);                 /* ni <- 1/n mod B */
1220   redcify (one, 1, n);
1221   addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1222 
1223   /* Perform a Miller-Rabin test, finds most composites quickly.  */
1224   if (!millerrabin (n, ni, a_prim, q, k, one))
1225     return false;
1226 
1227   if (flag_prove_primality)
1228     {
1229       /* Factor n-1 for Lucas.  */
1230       factor (0, n - 1, &factors);
1231     }
1232 
1233   /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1234      number composite.  */
1235   for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1236     {
1237       if (flag_prove_primality)
1238         {
1239           is_prime = true;
1240           for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1241             {
1242               is_prime
1243                 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1244             }
1245         }
1246       else
1247         {
1248           /* After enough Miller-Rabin runs, be content. */
1249           is_prime = (r == MR_REPS - 1);
1250         }
1251 
1252       if (is_prime)
1253         return true;
1254 
1255       a += primes_diff[r];      /* Establish new base.  */
1256 
1257       /* The following is equivalent to redcify (a_prim, a, n).  It runs faster
1258          on most processors, since it avoids udiv_qrnnd.  If we go down the
1259          udiv_qrnnd_preinv path, this code should be replaced.  */
1260       {
1261         uintmax_t s1, s0;
1262         umul_ppmm (s1, s0, one, a);
1263         if (LIKELY (s1 == 0))
1264           a_prim = s0 % n;
1265         else
1266           {
1267             uintmax_t dummy _GL_UNUSED;
1268             udiv_qrnnd (dummy, a_prim, s1, s0, n);
1269           }
1270       }
1271 
1272       if (!millerrabin (n, ni, a_prim, q, k, one))
1273         return false;
1274     }
1275 
1276   error (0, 0, _("Lucas prime test failure.  This should not happen"));
1277   abort ();
1278 }
1279 
1280 static bool
prime2_p(uintmax_t n1,uintmax_t n0)1281 prime2_p (uintmax_t n1, uintmax_t n0)
1282 {
1283   uintmax_t q[2], nm1[2];
1284   uintmax_t a_prim[2];
1285   uintmax_t one[2];
1286   uintmax_t na[2];
1287   uintmax_t ni;
1288   unsigned int k;
1289   struct factors factors;
1290 
1291   if (n1 == 0)
1292     return prime_p (n0);
1293 
1294   nm1[1] = n1 - (n0 == 0);
1295   nm1[0] = n0 - 1;
1296   if (nm1[0] == 0)
1297     {
1298       count_trailing_zeros (k, nm1[1]);
1299 
1300       q[0] = nm1[1] >> k;
1301       q[1] = 0;
1302       k += W_TYPE_SIZE;
1303     }
1304   else
1305     {
1306       count_trailing_zeros (k, nm1[0]);
1307       rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1308     }
1309 
1310   uintmax_t a = 2;
1311   binv (ni, n0);
1312   redcify2 (one[1], one[0], 1, n1, n0);
1313   addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1314 
1315   /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1316   na[0] = n0;
1317   na[1] = n1;
1318 
1319   if (!millerrabin2 (na, ni, a_prim, q, k, one))
1320     return false;
1321 
1322   if (flag_prove_primality)
1323     {
1324       /* Factor n-1 for Lucas.  */
1325       factor (nm1[1], nm1[0], &factors);
1326     }
1327 
1328   /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1329      number composite.  */
1330   for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1331     {
1332       bool is_prime;
1333       uintmax_t e[2], y[2];
1334 
1335       if (flag_prove_primality)
1336         {
1337           is_prime = true;
1338           if (factors.plarge[1])
1339             {
1340               uintmax_t pi;
1341               binv (pi, factors.plarge[0]);
1342               e[0] = pi * nm1[0];
1343               e[1] = 0;
1344               y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1345               is_prime = (y[0] != one[0] || y[1] != one[1]);
1346             }
1347           for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1348             {
1349               /* FIXME: We always have the factor 2. Do we really need to
1350                  handle it here? We have done the same powering as part
1351                  of millerrabin. */
1352               if (factors.p[i] == 2)
1353                 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1354               else
1355                 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1356               y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1357               is_prime = (y[0] != one[0] || y[1] != one[1]);
1358             }
1359         }
1360       else
1361         {
1362           /* After enough Miller-Rabin runs, be content. */
1363           is_prime = (r == MR_REPS - 1);
1364         }
1365 
1366       if (is_prime)
1367         return true;
1368 
1369       a += primes_diff[r];      /* Establish new base.  */
1370       redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1371 
1372       if (!millerrabin2 (na, ni, a_prim, q, k, one))
1373         return false;
1374     }
1375 
1376   error (0, 0, _("Lucas prime test failure.  This should not happen"));
1377   abort ();
1378 }
1379 
1380 #if HAVE_GMP
1381 static bool
mp_prime_p(mpz_t n)1382 mp_prime_p (mpz_t n)
1383 {
1384   bool is_prime;
1385   mpz_t q, a, nm1, tmp;
1386   struct mp_factors factors;
1387 
1388   if (mpz_cmp_ui (n, 1) <= 0)
1389     return false;
1390 
1391   /* We have already casted out small primes. */
1392   if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1393     return true;
1394 
1395   mpz_inits (q, a, nm1, tmp, NULL);
1396 
1397   /* Precomputation for Miller-Rabin.  */
1398   mpz_sub_ui (nm1, n, 1);
1399 
1400   /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
1401   unsigned long int k = mpz_scan1 (nm1, 0);
1402   mpz_tdiv_q_2exp (q, nm1, k);
1403 
1404   mpz_set_ui (a, 2);
1405 
1406   /* Perform a Miller-Rabin test, finds most composites quickly.  */
1407   if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1408     {
1409       is_prime = false;
1410       goto ret2;
1411     }
1412 
1413   if (flag_prove_primality)
1414     {
1415       /* Factor n-1 for Lucas.  */
1416       mpz_set (tmp, nm1);
1417       mp_factor (tmp, &factors);
1418     }
1419 
1420   /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1421      number composite.  */
1422   for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1423     {
1424       if (flag_prove_primality)
1425         {
1426           is_prime = true;
1427           for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1428             {
1429               mpz_divexact (tmp, nm1, factors.p[i]);
1430               mpz_powm (tmp, a, tmp, n);
1431               is_prime = mpz_cmp_ui (tmp, 1) != 0;
1432             }
1433         }
1434       else
1435         {
1436           /* After enough Miller-Rabin runs, be content. */
1437           is_prime = (r == MR_REPS - 1);
1438         }
1439 
1440       if (is_prime)
1441         goto ret1;
1442 
1443       mpz_add_ui (a, a, primes_diff[r]);        /* Establish new base.  */
1444 
1445       if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1446         {
1447           is_prime = false;
1448           goto ret1;
1449         }
1450     }
1451 
1452   error (0, 0, _("Lucas prime test failure.  This should not happen"));
1453   abort ();
1454 
1455  ret1:
1456   if (flag_prove_primality)
1457     mp_factor_clear (&factors);
1458  ret2:
1459   mpz_clears (q, a, nm1, tmp, NULL);
1460 
1461   return is_prime;
1462 }
1463 #endif
1464 
1465 static void
factor_using_pollard_rho(uintmax_t n,unsigned long int a,struct factors * factors)1466 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1467                           struct factors *factors)
1468 {
1469   uintmax_t x, z, y, P, t, ni, g;
1470 
1471   unsigned long int k = 1;
1472   unsigned long int l = 1;
1473 
1474   redcify (P, 1, n);
1475   addmod (x, P, P, n);          /* i.e., redcify(2) */
1476   y = z = x;
1477 
1478   while (n != 1)
1479     {
1480       assert (a < n);
1481 
1482       binv (ni, n);             /* FIXME: when could we use old 'ni' value? */
1483 
1484       for (;;)
1485         {
1486           do
1487             {
1488               x = mulredc (x, x, n, ni);
1489               addmod (x, x, a, n);
1490 
1491               submod (t, z, x, n);
1492               P = mulredc (P, t, n, ni);
1493 
1494               if (k % 32 == 1)
1495                 {
1496                   if (gcd_odd (P, n) != 1)
1497                     goto factor_found;
1498                   y = x;
1499                 }
1500             }
1501           while (--k != 0);
1502 
1503           z = x;
1504           k = l;
1505           l = 2 * l;
1506           for (unsigned long int i = 0; i < k; i++)
1507             {
1508               x = mulredc (x, x, n, ni);
1509               addmod (x, x, a, n);
1510             }
1511           y = x;
1512         }
1513 
1514     factor_found:
1515       do
1516         {
1517           y = mulredc (y, y, n, ni);
1518           addmod (y, y, a, n);
1519 
1520           submod (t, z, y, n);
1521           g = gcd_odd (t, n);
1522         }
1523       while (g == 1);
1524 
1525       if (n == g)
1526         {
1527           /* Found n itself as factor.  Restart with different params.  */
1528           factor_using_pollard_rho (n, a + 1, factors);
1529           return;
1530         }
1531 
1532       n = n / g;
1533 
1534       if (!prime_p (g))
1535         factor_using_pollard_rho (g, a + 1, factors);
1536       else
1537         factor_insert (factors, g);
1538 
1539       if (prime_p (n))
1540         {
1541           factor_insert (factors, n);
1542           break;
1543         }
1544 
1545       x = x % n;
1546       z = z % n;
1547       y = y % n;
1548     }
1549 }
1550 
1551 static void
factor_using_pollard_rho2(uintmax_t n1,uintmax_t n0,unsigned long int a,struct factors * factors)1552 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1553                            struct factors *factors)
1554 {
1555   uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1556 
1557   unsigned long int k = 1;
1558   unsigned long int l = 1;
1559 
1560   redcify2 (P1, P0, 1, n1, n0);
1561   addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1562   y1 = z1 = x1;
1563   y0 = z0 = x0;
1564 
1565   while (n1 != 0 || n0 != 1)
1566     {
1567       binv (ni, n0);
1568 
1569       for (;;)
1570         {
1571           do
1572             {
1573               x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1574               x1 = r1m;
1575               addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1576 
1577               submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1578               P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1579               P1 = r1m;
1580 
1581               if (k % 32 == 1)
1582                 {
1583                   g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1584                   if (g1 != 0 || g0 != 1)
1585                     goto factor_found;
1586                   y1 = x1; y0 = x0;
1587                 }
1588             }
1589           while (--k != 0);
1590 
1591           z1 = x1; z0 = x0;
1592           k = l;
1593           l = 2 * l;
1594           for (unsigned long int i = 0; i < k; i++)
1595             {
1596               x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1597               x1 = r1m;
1598               addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1599             }
1600           y1 = x1; y0 = x0;
1601         }
1602 
1603     factor_found:
1604       do
1605         {
1606           y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1607           y1 = r1m;
1608           addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1609 
1610           submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1611           g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1612         }
1613       while (g1 == 0 && g0 == 1);
1614 
1615       if (g1 == 0)
1616         {
1617           /* The found factor is one word, and > 1. */
1618           divexact_21 (n1, n0, n1, n0, g0);     /* n = n / g */
1619 
1620           if (!prime_p (g0))
1621             factor_using_pollard_rho (g0, a + 1, factors);
1622           else
1623             factor_insert (factors, g0);
1624         }
1625       else
1626         {
1627           /* The found factor is two words.  This is highly unlikely, thus hard
1628              to trigger.  Please be careful before you change this code!  */
1629           uintmax_t ginv;
1630 
1631           if (n1 == g1 && n0 == g0)
1632             {
1633               /* Found n itself as factor.  Restart with different params.  */
1634               factor_using_pollard_rho2 (n1, n0, a + 1, factors);
1635               return;
1636             }
1637 
1638           binv (ginv, g0);      /* Compute n = n / g.  Since the result will */
1639           n0 = ginv * n0;       /* fit one word, we can compute the quotient */
1640           n1 = 0;               /* modulo B, ignoring the high divisor word. */
1641 
1642           if (!prime2_p (g1, g0))
1643             factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1644           else
1645             factor_insert_large (factors, g1, g0);
1646         }
1647 
1648       if (n1 == 0)
1649         {
1650           if (prime_p (n0))
1651             {
1652               factor_insert (factors, n0);
1653               break;
1654             }
1655 
1656           factor_using_pollard_rho (n0, a, factors);
1657           return;
1658         }
1659 
1660       if (prime2_p (n1, n0))
1661         {
1662           factor_insert_large (factors, n1, n0);
1663           break;
1664         }
1665 
1666       x0 = mod2 (&x1, x1, x0, n1, n0);
1667       z0 = mod2 (&z1, z1, z0, n1, n0);
1668       y0 = mod2 (&y1, y1, y0, n1, n0);
1669     }
1670 }
1671 
1672 #if HAVE_GMP
1673 static void
mp_factor_using_pollard_rho(mpz_t n,unsigned long int a,struct mp_factors * factors)1674 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1675                              struct mp_factors *factors)
1676 {
1677   mpz_t x, z, y, P;
1678   mpz_t t, t2;
1679 
1680   devmsg ("[pollard-rho (%lu)] ", a);
1681 
1682   mpz_inits (t, t2, NULL);
1683   mpz_init_set_si (y, 2);
1684   mpz_init_set_si (x, 2);
1685   mpz_init_set_si (z, 2);
1686   mpz_init_set_ui (P, 1);
1687 
1688   unsigned long long int k = 1;
1689   unsigned long long int l = 1;
1690 
1691   while (mpz_cmp_ui (n, 1) != 0)
1692     {
1693       for (;;)
1694         {
1695           do
1696             {
1697               mpz_mul (t, x, x);
1698               mpz_mod (x, t, n);
1699               mpz_add_ui (x, x, a);
1700 
1701               mpz_sub (t, z, x);
1702               mpz_mul (t2, P, t);
1703               mpz_mod (P, t2, n);
1704 
1705               if (k % 32 == 1)
1706                 {
1707                   mpz_gcd (t, P, n);
1708                   if (mpz_cmp_ui (t, 1) != 0)
1709                     goto factor_found;
1710                   mpz_set (y, x);
1711                 }
1712             }
1713           while (--k != 0);
1714 
1715           mpz_set (z, x);
1716           k = l;
1717           l = 2 * l;
1718           for (unsigned long long int i = 0; i < k; i++)
1719             {
1720               mpz_mul (t, x, x);
1721               mpz_mod (x, t, n);
1722               mpz_add_ui (x, x, a);
1723             }
1724           mpz_set (y, x);
1725         }
1726 
1727     factor_found:
1728       do
1729         {
1730           mpz_mul (t, y, y);
1731           mpz_mod (y, t, n);
1732           mpz_add_ui (y, y, a);
1733 
1734           mpz_sub (t, z, y);
1735           mpz_gcd (t, t, n);
1736         }
1737       while (mpz_cmp_ui (t, 1) == 0);
1738 
1739       mpz_divexact (n, n, t);   /* divide by t, before t is overwritten */
1740 
1741       if (!mp_prime_p (t))
1742         {
1743           devmsg ("[composite factor--restarting pollard-rho] ");
1744           mp_factor_using_pollard_rho (t, a + 1, factors);
1745         }
1746       else
1747         {
1748           mp_factor_insert (factors, t);
1749         }
1750 
1751       if (mp_prime_p (n))
1752         {
1753           mp_factor_insert (factors, n);
1754           break;
1755         }
1756 
1757       mpz_mod (x, x, n);
1758       mpz_mod (z, z, n);
1759       mpz_mod (y, y, n);
1760     }
1761 
1762   mpz_clears (P, t2, t, z, x, y, NULL);
1763 }
1764 #endif
1765 
1766 #if USE_SQUFOF
1767 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)?  If
1768    algorithm is replaced, consider also returning the remainder. */
1769 static uintmax_t _GL_ATTRIBUTE_CONST
isqrt(uintmax_t n)1770 isqrt (uintmax_t n)
1771 {
1772   uintmax_t x;
1773   unsigned c;
1774   if (n == 0)
1775     return 0;
1776 
1777   count_leading_zeros (c, n);
1778 
1779   /* Make x > sqrt(n). This will be invariant through the loop. */
1780   x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1781 
1782   for (;;)
1783     {
1784       uintmax_t y = (x + n/x) / 2;
1785       if (y >= x)
1786         return x;
1787 
1788       x = y;
1789     }
1790 }
1791 
1792 static uintmax_t _GL_ATTRIBUTE_CONST
isqrt2(uintmax_t nh,uintmax_t nl)1793 isqrt2 (uintmax_t nh, uintmax_t nl)
1794 {
1795   unsigned int shift;
1796   uintmax_t x;
1797 
1798   /* Ensures the remainder fits in an uintmax_t. */
1799   assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1800 
1801   if (nh == 0)
1802     return isqrt (nl);
1803 
1804   count_leading_zeros (shift, nh);
1805   shift &= ~1;
1806 
1807   /* Make x > sqrt(n) */
1808   x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1809   x <<= (W_TYPE_SIZE - shift) / 2;
1810 
1811   /* Do we need more than one iteration? */
1812   for (;;)
1813     {
1814       uintmax_t r _GL_UNUSED;
1815       uintmax_t q, y;
1816       udiv_qrnnd (q, r, nh, nl, x);
1817       y = (x + q) / 2;
1818 
1819       if (y >= x)
1820         {
1821           uintmax_t hi, lo;
1822           umul_ppmm (hi, lo, x + 1, x + 1);
1823           assert (gt2 (hi, lo, nh, nl));
1824 
1825           umul_ppmm (hi, lo, x, x);
1826           assert (ge2 (nh, nl, hi, lo));
1827           sub_ddmmss (hi, lo, nh, nl, hi, lo);
1828           assert (hi == 0);
1829 
1830           return x;
1831         }
1832 
1833       x = y;
1834     }
1835 }
1836 
1837 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1838 # define MAGIC64 0x0202021202030213ULL
1839 # define MAGIC63 0x0402483012450293ULL
1840 # define MAGIC65 0x218a019866014613ULL
1841 # define MAGIC11 0x23b
1842 
1843 /* Return the square root if the input is a square, otherwise 0. */
1844 static uintmax_t _GL_ATTRIBUTE_CONST
is_square(uintmax_t x)1845 is_square (uintmax_t x)
1846 {
1847   /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1848      computing the square root. */
1849   if (((MAGIC64 >> (x & 63)) & 1)
1850       && ((MAGIC63 >> (x % 63)) & 1)
1851       /* Both 0 and 64 are squares mod (65) */
1852       && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1853       && ((MAGIC11 >> (x % 11) & 1)))
1854     {
1855       uintmax_t r = isqrt (x);
1856       if (r*r == x)
1857         return r;
1858     }
1859   return 0;
1860 }
1861 
1862 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1863 static const unsigned short invtab[0x81] =
1864   {
1865     0x200,
1866     0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1867     0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1868     0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1869     0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1870     0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1871     0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1872     0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1873     0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1874     0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1875     0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1876     0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1877     0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1878     0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1879     0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1880     0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1881     0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1882   };
1883 
1884 /* Compute q = [u/d], r = u mod d.  Avoids slow hardware division for the case
1885    that q < 0x40; here it instead uses a table of (Euclidian) inverses.  */
1886 # define div_smallq(q, r, u, d)                                          \
1887   do {                                                                  \
1888     if ((u) / 0x40 < (d))                                               \
1889       {                                                                 \
1890         int _cnt;                                                       \
1891         uintmax_t _dinv, _mask, _q, _r;                                 \
1892         count_leading_zeros (_cnt, (d));                                \
1893         _r = (u);                                                       \
1894         if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8)))                        \
1895           {                                                             \
1896             _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80];   \
1897             _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt);                \
1898           }                                                             \
1899         else                                                            \
1900           {                                                             \
1901             _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f];   \
1902             _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11;        \
1903           }                                                             \
1904         _r -= _q*(d);                                                   \
1905                                                                         \
1906         _mask = -(uintmax_t) (_r >= (d));                               \
1907         (r) = _r - (_mask & (d));                                       \
1908         (q) = _q - _mask;                                               \
1909         assert ( (q) * (d) + (r) == u);                                 \
1910       }                                                                 \
1911     else                                                                \
1912       {                                                                 \
1913         uintmax_t _q = (u) / (d);                                       \
1914         (r) = (u) - _q * (d);                                           \
1915         (q) = _q;                                                       \
1916       }                                                                 \
1917   } while (0)
1918 
1919 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1920    = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1921    with 3025 = 55^2.
1922 
1923    Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1924    representing G_0 = (-55, 2*4652, 8653).
1925 
1926    In the notation of the paper:
1927 
1928    S_{-1} = 55, S_0 = 8653, R_0 = 4652
1929 
1930    Put
1931 
1932      t_0 = floor([q_0 + R_0] / S0) = 1
1933      R_1 = t_0 * S_0 - R_0 = 4001
1934      S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1935 */
1936 
1937 /* Multipliers, in order of efficiency:
1938    0.7268  3*5*7*11 = 1155 = 3 (mod 4)
1939    0.7317  3*5*7    =  105 = 1
1940    0.7820  3*5*11   =  165 = 1
1941    0.7872  3*5      =   15 = 3
1942    0.8101  3*7*11   =  231 = 3
1943    0.8155  3*7      =   21 = 1
1944    0.8284  5*7*11   =  385 = 1
1945    0.8339  5*7      =   35 = 3
1946    0.8716  3*11     =   33 = 1
1947    0.8774  3        =    3 = 3
1948    0.8913  5*11     =   55 = 3
1949    0.8972  5        =    5 = 1
1950    0.9233  7*11     =   77 = 1
1951    0.9295  7        =    7 = 3
1952    0.9934  11       =   11 = 3
1953 */
1954 # define QUEUE_SIZE 50
1955 #endif
1956 
1957 #if STAT_SQUFOF
1958 # define Q_FREQ_SIZE 50
1959 /* Element 0 keeps the total */
1960 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1961 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1962 #endif
1963 
1964 #if USE_SQUFOF
1965 /* Return true on success.  Expected to fail only for numbers
1966    >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1967 static bool
factor_using_squfof(uintmax_t n1,uintmax_t n0,struct factors * factors)1968 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1969 {
1970   /* Uses algorithm and notation from
1971 
1972      SQUARE FORM FACTORIZATION
1973      JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1974 
1975      https://homes.cerias.purdue.edu/~ssw/squfof.pdf
1976    */
1977 
1978   static const unsigned int multipliers_1[] =
1979     { /* = 1 (mod 4) */
1980       105, 165, 21, 385, 33, 5, 77, 1, 0
1981     };
1982   static const unsigned int multipliers_3[] =
1983     { /* = 3 (mod 4) */
1984       1155, 15, 231, 35, 3, 55, 7, 11, 0
1985     };
1986 
1987   const unsigned int *m;
1988 
1989   struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1990 
1991   if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1992     return false;
1993 
1994   uintmax_t sqrt_n = isqrt2 (n1, n0);
1995 
1996   if (n0 == sqrt_n * sqrt_n)
1997     {
1998       uintmax_t p1, p0;
1999 
2000       umul_ppmm (p1, p0, sqrt_n, sqrt_n);
2001       assert (p0 == n0);
2002 
2003       if (n1 == p1)
2004         {
2005           if (prime_p (sqrt_n))
2006             factor_insert_multiplicity (factors, sqrt_n, 2);
2007           else
2008             {
2009               struct factors f;
2010 
2011               f.nfactors = 0;
2012               if (!factor_using_squfof (0, sqrt_n, &f))
2013                 {
2014                   /* Try pollard rho instead */
2015                   factor_using_pollard_rho (sqrt_n, 1, &f);
2016                 }
2017               /* Duplicate the new factors */
2018               for (unsigned int i = 0; i < f.nfactors; i++)
2019                 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
2020             }
2021           return true;
2022         }
2023     }
2024 
2025   /* Select multipliers so we always get n * mu = 3 (mod 4) */
2026   for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2027        *m; m++)
2028     {
2029       uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2030       unsigned int i;
2031       unsigned int mu = *m;
2032       unsigned int qpos = 0;
2033 
2034       assert (mu * n0 % 4 == 3);
2035 
2036       /* In the notation of the paper, with mu * n == 3 (mod 4), we
2037          get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2038          I understand it, the necessary bound is 4 \mu^3 < n, or 32
2039          mu^3 < n.
2040 
2041          However, this seems insufficient: With n = 37243139 and mu =
2042          105, we get a trivial factor, from the square 38809 = 197^2,
2043          without any corresponding Q earlier in the iteration.
2044 
2045          Requiring 64 mu^3 < n seems sufficient. */
2046       if (n1 == 0)
2047         {
2048           if ((uintmax_t) mu*mu*mu >= n0 / 64)
2049             continue;
2050         }
2051       else
2052         {
2053           if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2054             continue;
2055         }
2056       umul_ppmm (Dh, Dl, n0, mu);
2057       Dh += n1 * mu;
2058 
2059       assert (Dl % 4 != 1);
2060       assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2061 
2062       S = isqrt2 (Dh, Dl);
2063 
2064       Q1 = 1;
2065       P = S;
2066 
2067       /* Square root remainder fits in one word, so ignore high part. */
2068       Q = Dl - P*P;
2069       /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2070       L = isqrt (2*S);
2071       B = 2*L;
2072       L1 = mu * 2 * L;
2073 
2074       /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2075          4 D. */
2076 
2077       for (i = 0; i <= B; i++)
2078         {
2079           uintmax_t q, P1, t, rem;
2080 
2081           div_smallq (q, rem, S+P, Q);
2082           P1 = S - rem; /* P1 = q*Q - P */
2083 
2084           IF_LINT (assert (q > 0 && Q > 0));
2085 
2086 # if STAT_SQUFOF
2087           q_freq[0]++;
2088           q_freq[MIN (q, Q_FREQ_SIZE)]++;
2089 # endif
2090 
2091           if (Q <= L1)
2092             {
2093               uintmax_t g = Q;
2094 
2095               if ( (Q & 1) == 0)
2096                 g /= 2;
2097 
2098               g /= gcd_odd (g, mu);
2099 
2100               if (g <= L)
2101                 {
2102                   if (qpos >= QUEUE_SIZE)
2103                     die (EXIT_FAILURE, 0, _("squfof queue overflow"));
2104                   queue[qpos].Q = g;
2105                   queue[qpos].P = P % g;
2106                   qpos++;
2107                 }
2108             }
2109 
2110           /* I think the difference can be either sign, but mod
2111              2^W_TYPE_SIZE arithmetic should be fine. */
2112           t = Q1 + q * (P - P1);
2113           Q1 = Q;
2114           Q = t;
2115           P = P1;
2116 
2117           if ( (i & 1) == 0)
2118             {
2119               uintmax_t r = is_square (Q);
2120               if (r)
2121                 {
2122                   for (unsigned int j = 0; j < qpos; j++)
2123                     {
2124                       if (queue[j].Q == r)
2125                         {
2126                           if (r == 1)
2127                             /* Traversed entire cycle. */
2128                             goto next_multiplier;
2129 
2130                           /* Need the absolute value for divisibility test. */
2131                           if (P >= queue[j].P)
2132                             t = P - queue[j].P;
2133                           else
2134                             t = queue[j].P - P;
2135                           if (t % r == 0)
2136                             {
2137                               /* Delete entries up to and including entry
2138                                  j, which matched. */
2139                               memmove (queue, queue + j + 1,
2140                                        (qpos - j - 1) * sizeof (queue[0]));
2141                               qpos -= (j + 1);
2142                             }
2143                           goto next_i;
2144                         }
2145                     }
2146 
2147                   /* We have found a square form, which should give a
2148                      factor. */
2149                   Q1 = r;
2150                   assert (S >= P); /* What signs are possible? */
2151                   P += r * ((S - P) / r);
2152 
2153                   /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2154                      for the case D = 2N. */
2155                   /* Compute Q = (D - P*P) / Q1, but we need double
2156                      precision. */
2157                   uintmax_t hi, lo;
2158                   umul_ppmm (hi, lo, P, P);
2159                   sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2160                   udiv_qrnnd (Q, rem, hi, lo, Q1);
2161                   assert (rem == 0);
2162 
2163                   for (;;)
2164                     {
2165                       /* Note: There appears to by a typo in the paper,
2166                          Step 4a in the algorithm description says q <--
2167                          floor([S+P]/\hat Q), but looking at the equations
2168                          in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2169                          (In this code, \hat Q is Q1). */
2170                       div_smallq (q, rem, S+P, Q);
2171                       P1 = S - rem;     /* P1 = q*Q - P */
2172 
2173 # if STAT_SQUFOF
2174                       q_freq[0]++;
2175                       q_freq[MIN (q, Q_FREQ_SIZE)]++;
2176 # endif
2177                       if (P == P1)
2178                         break;
2179                       t = Q1 + q * (P - P1);
2180                       Q1 = Q;
2181                       Q = t;
2182                       P = P1;
2183                     }
2184 
2185                   if ( (Q & 1) == 0)
2186                     Q /= 2;
2187                   Q /= gcd_odd (Q, mu);
2188 
2189                   assert (Q > 1 && (n1 || Q < n0));
2190 
2191                   if (prime_p (Q))
2192                     factor_insert (factors, Q);
2193                   else if (!factor_using_squfof (0, Q, factors))
2194                     factor_using_pollard_rho (Q, 2, factors);
2195 
2196                   divexact_21 (n1, n0, n1, n0, Q);
2197 
2198                   if (prime2_p (n1, n0))
2199                     factor_insert_large (factors, n1, n0);
2200                   else
2201                     {
2202                       if (!factor_using_squfof (n1, n0, factors))
2203                         {
2204                           if (n1 == 0)
2205                             factor_using_pollard_rho (n0, 1, factors);
2206                           else
2207                             factor_using_pollard_rho2 (n1, n0, 1, factors);
2208                         }
2209                     }
2210 
2211                   return true;
2212                 }
2213             }
2214         next_i:;
2215         }
2216     next_multiplier:;
2217     }
2218   return false;
2219 }
2220 #endif
2221 
2222 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2223    results in FACTORS.  */
2224 static void
factor(uintmax_t t1,uintmax_t t0,struct factors * factors)2225 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2226 {
2227   factors->nfactors = 0;
2228   factors->plarge[1] = 0;
2229 
2230   if (t1 == 0 && t0 < 2)
2231     return;
2232 
2233   t0 = factor_using_division (&t1, t1, t0, factors);
2234 
2235   if (t1 == 0 && t0 < 2)
2236     return;
2237 
2238   if (prime2_p (t1, t0))
2239     factor_insert_large (factors, t1, t0);
2240   else
2241     {
2242 #if USE_SQUFOF
2243       if (factor_using_squfof (t1, t0, factors))
2244         return;
2245 #endif
2246 
2247       if (t1 == 0)
2248         factor_using_pollard_rho (t0, 1, factors);
2249       else
2250         factor_using_pollard_rho2 (t1, t0, 1, factors);
2251     }
2252 }
2253 
2254 #if HAVE_GMP
2255 /* Use Pollard-rho to compute the prime factors of
2256    arbitrary-precision T, and put the results in FACTORS.  */
2257 static void
mp_factor(mpz_t t,struct mp_factors * factors)2258 mp_factor (mpz_t t, struct mp_factors *factors)
2259 {
2260   mp_factor_init (factors);
2261 
2262   if (mpz_sgn (t) != 0)
2263     {
2264       mp_factor_using_division (t, factors);
2265 
2266       if (mpz_cmp_ui (t, 1) != 0)
2267         {
2268           devmsg ("[is number prime?] ");
2269           if (mp_prime_p (t))
2270             mp_factor_insert (factors, t);
2271           else
2272             mp_factor_using_pollard_rho (t, 1, factors);
2273         }
2274     }
2275 }
2276 #endif
2277 
2278 static strtol_error
strto2uintmax(uintmax_t * hip,uintmax_t * lop,const char * s)2279 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2280 {
2281   unsigned int lo_carry;
2282   uintmax_t hi = 0, lo = 0;
2283 
2284   strtol_error err = LONGINT_INVALID;
2285 
2286   /* Skip initial spaces and '+'.  */
2287   for (;;)
2288     {
2289       char c = *s;
2290       if (c == ' ')
2291         s++;
2292       else if (c == '+')
2293         {
2294           s++;
2295           break;
2296         }
2297       else
2298         break;
2299     }
2300 
2301   /* Initial scan for invalid digits.  */
2302   const char *p = s;
2303   for (;;)
2304     {
2305       unsigned int c = *p++;
2306       if (c == 0)
2307         break;
2308 
2309       if (UNLIKELY (!ISDIGIT (c)))
2310         {
2311           err = LONGINT_INVALID;
2312           break;
2313         }
2314 
2315       err = LONGINT_OK;           /* we've seen at least one valid digit */
2316     }
2317 
2318   for (;err == LONGINT_OK;)
2319     {
2320       unsigned int c = *s++;
2321       if (c == 0)
2322         break;
2323 
2324       c -= '0';
2325 
2326       if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2327         {
2328           err = LONGINT_OVERFLOW;
2329           break;
2330         }
2331       hi = 10 * hi;
2332 
2333       lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2334       lo_carry += 10 * lo < 2 * lo;
2335 
2336       lo = 10 * lo;
2337       lo += c;
2338 
2339       lo_carry += lo < c;
2340       hi += lo_carry;
2341       if (UNLIKELY (hi < lo_carry))
2342         {
2343           err = LONGINT_OVERFLOW;
2344           break;
2345         }
2346     }
2347 
2348   *hip = hi;
2349   *lop = lo;
2350 
2351   return err;
2352 }
2353 
2354 /* Structure and routines for buffering and outputting full lines,
2355    to support parallel operation efficiently.  */
2356 static struct lbuf_
2357 {
2358   char *buf;
2359   char *end;
2360 } lbuf;
2361 
2362 /* 512 is chosen to give good performance,
2363    and also is the max guaranteed size that
2364    consumers can read atomically through pipes.
2365    Also it's big enough to cater for max line length
2366    even with 128 bit uintmax_t.  */
2367 #define FACTOR_PIPE_BUF 512
2368 
2369 static void
lbuf_alloc(void)2370 lbuf_alloc (void)
2371 {
2372   if (lbuf.buf)
2373     return;
2374 
2375   /* Double to ensure enough space for
2376      previous numbers + next number.  */
2377   lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2378   lbuf.end = lbuf.buf;
2379 }
2380 
2381 /* Write complete LBUF to standard output.  */
2382 static void
lbuf_flush(void)2383 lbuf_flush (void)
2384 {
2385   size_t size = lbuf.end - lbuf.buf;
2386   if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2387     die (EXIT_FAILURE, errno, "%s", _("write error"));
2388   lbuf.end = lbuf.buf;
2389 }
2390 
2391 /* Add a character C to LBUF and if it's a newline
2392    and enough bytes are already buffered,
2393    then write atomically to standard output.  */
2394 static void
lbuf_putc(char c)2395 lbuf_putc (char c)
2396 {
2397   *lbuf.end++ = c;
2398 
2399   if (c == '\n')
2400     {
2401       size_t buffered = lbuf.end - lbuf.buf;
2402 
2403       /* Provide immediate output for interactive use.  */
2404       static int line_buffered = -1;
2405       if (line_buffered == -1)
2406         line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO);
2407       if (line_buffered)
2408         lbuf_flush ();
2409       else if (buffered >= FACTOR_PIPE_BUF)
2410         {
2411           /* Write output in <= PIPE_BUF chunks
2412              so consumers can read atomically.  */
2413           char const *tend = lbuf.end;
2414 
2415           /* Since a umaxint_t's factors must fit in 512
2416              we're guaranteed to find a newline here.  */
2417           char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2418           while (*--tlend != '\n');
2419           tlend++;
2420 
2421           lbuf.end = tlend;
2422           lbuf_flush ();
2423 
2424           /* Buffer the remainder.  */
2425           memcpy (lbuf.buf, tlend, tend - tlend);
2426           lbuf.end = lbuf.buf + (tend - tlend);
2427         }
2428     }
2429 }
2430 
2431 /* Buffer an int to the internal LBUF.  */
2432 static void
lbuf_putint(uintmax_t i,size_t min_width)2433 lbuf_putint (uintmax_t i, size_t min_width)
2434 {
2435   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2436   char const *umaxstr = umaxtostr (i, buf);
2437   size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2438   size_t z = width;
2439 
2440   for (; z < min_width; z++)
2441     *lbuf.end++ = '0';
2442 
2443   memcpy (lbuf.end, umaxstr, width);
2444   lbuf.end += width;
2445 }
2446 
2447 static void
print_uintmaxes(uintmax_t t1,uintmax_t t0)2448 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2449 {
2450   uintmax_t q, r;
2451 
2452   if (t1 == 0)
2453     lbuf_putint (t0, 0);
2454   else
2455     {
2456       /* Use very plain code here since it seems hard to write fast code
2457          without assuming a specific word size.  */
2458       q = t1 / 1000000000;
2459       r = t1 % 1000000000;
2460       udiv_qrnnd (t0, r, r, t0, 1000000000);
2461       print_uintmaxes (q, t0);
2462       lbuf_putint (r, 9);
2463     }
2464 }
2465 
2466 /* Single-precision factoring */
2467 static void
print_factors_single(uintmax_t t1,uintmax_t t0)2468 print_factors_single (uintmax_t t1, uintmax_t t0)
2469 {
2470   struct factors factors;
2471 
2472   print_uintmaxes (t1, t0);
2473   lbuf_putc (':');
2474 
2475   factor (t1, t0, &factors);
2476 
2477   for (unsigned int j = 0; j < factors.nfactors; j++)
2478     for (unsigned int k = 0; k < factors.e[j]; k++)
2479       {
2480         lbuf_putc (' ');
2481         print_uintmaxes (0, factors.p[j]);
2482       }
2483 
2484   if (factors.plarge[1])
2485     {
2486       lbuf_putc (' ');
2487       print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2488     }
2489 
2490   lbuf_putc ('\n');
2491 }
2492 
2493 /* Emit the factors of the indicated number.  If we have the option of using
2494    either algorithm, we select on the basis of the length of the number.
2495    For longer numbers, we prefer the MP algorithm even if the native algorithm
2496    has enough digits, because the algorithm is better.  The turnover point
2497    depends on the value.  */
2498 static bool
print_factors(const char * input)2499 print_factors (const char *input)
2500 {
2501   uintmax_t t1, t0;
2502 
2503   /* Try converting the number to one or two words.  If it fails, use GMP or
2504      print an error message.  The 2nd condition checks that the most
2505      significant bit of the two-word number is clear, in a typesize neutral
2506      way.  */
2507   strtol_error err = strto2uintmax (&t1, &t0, input);
2508 
2509   switch (err)
2510     {
2511     case LONGINT_OK:
2512       if (((t1 << 1) >> 1) == t1)
2513         {
2514           devmsg ("[using single-precision arithmetic] ");
2515           print_factors_single (t1, t0);
2516           return true;
2517         }
2518       break;
2519 
2520     case LONGINT_OVERFLOW:
2521       /* Try GMP.  */
2522       break;
2523 
2524     default:
2525       error (0, 0, _("%s is not a valid positive integer"), quote (input));
2526       return false;
2527     }
2528 
2529 #if HAVE_GMP
2530   devmsg ("[using arbitrary-precision arithmetic] ");
2531   mpz_t t;
2532   struct mp_factors factors;
2533 
2534   mpz_init_set_str (t, input, 10);
2535 
2536   gmp_printf ("%Zd:", t);
2537   mp_factor (t, &factors);
2538 
2539   for (unsigned int j = 0; j < factors.nfactors; j++)
2540     for (unsigned int k = 0; k < factors.e[j]; k++)
2541       gmp_printf (" %Zd", factors.p[j]);
2542 
2543   mp_factor_clear (&factors);
2544   mpz_clear (t);
2545   putchar ('\n');
2546   fflush (stdout);
2547   return true;
2548 #else
2549   error (0, 0, _("%s is too large"), quote (input));
2550   return false;
2551 #endif
2552 }
2553 
2554 void
usage(int status)2555 usage (int status)
2556 {
2557   if (status != EXIT_SUCCESS)
2558     emit_try_help ();
2559   else
2560     {
2561       printf (_("\
2562 Usage: %s [NUMBER]...\n\
2563   or:  %s OPTION\n\
2564 "),
2565               program_name, program_name);
2566       fputs (_("\
2567 Print the prime factors of each specified integer NUMBER.  If none\n\
2568 are specified on the command line, read them from standard input.\n\
2569 \n\
2570 "), stdout);
2571       fputs (HELP_OPTION_DESCRIPTION, stdout);
2572       fputs (VERSION_OPTION_DESCRIPTION, stdout);
2573       emit_ancillary_info (PROGRAM_NAME);
2574     }
2575   exit (status);
2576 }
2577 
2578 static bool
do_stdin(void)2579 do_stdin (void)
2580 {
2581   bool ok = true;
2582   token_buffer tokenbuffer;
2583 
2584   init_tokenbuffer (&tokenbuffer);
2585 
2586   while (true)
2587     {
2588       size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2589                                        &tokenbuffer);
2590       if (token_length == (size_t) -1)
2591         break;
2592       ok &= print_factors (tokenbuffer.buffer);
2593     }
2594   free (tokenbuffer.buffer);
2595 
2596   return ok;
2597 }
2598 
2599 int
main(int argc,char ** argv)2600 main (int argc, char **argv)
2601 {
2602   initialize_main (&argc, &argv);
2603   set_program_name (argv[0]);
2604   setlocale (LC_ALL, "");
2605   bindtextdomain (PACKAGE, LOCALEDIR);
2606   textdomain (PACKAGE);
2607 
2608   lbuf_alloc ();
2609   atexit (close_stdout);
2610   atexit (lbuf_flush);
2611 
2612   int c;
2613   while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2614     {
2615       switch (c)
2616         {
2617         case DEV_DEBUG_OPTION:
2618           dev_debug = true;
2619           break;
2620 
2621         case_GETOPT_HELP_CHAR;
2622 
2623         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2624 
2625         default:
2626           usage (EXIT_FAILURE);
2627         }
2628     }
2629 
2630 #if STAT_SQUFOF
2631   memset (q_freq, 0, sizeof (q_freq));
2632 #endif
2633 
2634   bool ok;
2635   if (argc <= optind)
2636     ok = do_stdin ();
2637   else
2638     {
2639       ok = true;
2640       for (int i = optind; i < argc; i++)
2641         if (! print_factors (argv[i]))
2642           ok = false;
2643     }
2644 
2645 #if STAT_SQUFOF
2646   if (q_freq[0] > 0)
2647     {
2648       double acc_f;
2649       printf ("q  freq.  cum. freq.(total: %d)\n", q_freq[0]);
2650       for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2651         {
2652           double f = (double) q_freq[i] / q_freq[0];
2653           acc_f += f;
2654           printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2655                   100.0 * f, 100.0 * acc_f);
2656         }
2657     }
2658 #endif
2659 
2660   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
2661 }
2662