xref: /dragonfly/contrib/gmp/mpn/generic/fib2_ui.c (revision 73610d44)
1 /* mpn_fib2_ui -- calculate Fibonacci numbers.
2 
3    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
4    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5    FUTURE GNU MP RELEASES.
6 
7 Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15 
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19 License for more details.
20 
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23 
24 #include <stdio.h>
25 #include "gmp.h"
26 #include "gmp-impl.h"
27 
28 /* change this to "#define TRACE(x) x" for diagnostics */
29 #define TRACE(x)
30 
31 
32 /* Store F[n] at fp and F[n-1] at f1p.  fp and f1p should have room for
33    MPN_FIB2_SIZE(n) limbs.
34 
35    The return value is the actual number of limbs stored, this will be at
36    least 1.  fp[size-1] will be non-zero, except when n==0, in which case
37    fp[0] is 0 and f1p[0] is 1.  f1p[size-1] can be zero, since F[n-1]<F[n]
38    (for n>0).
39 
40    Notes:
41 
42    In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
43    low limb.
44 
45    In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
46    F[k-1]^2.  This F[2k+1] is an F[4m+3] and such numbers are congruent to
47    1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
48    that would leave 6 or 7 mod 8).
49 
50    This property of F[4m+3] can be verified by induction on F[4m+3] =
51    7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
52    identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
53 */
54 
55 mp_size_t
56 mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
57 {
58   mp_size_t      size;
59   unsigned long  nfirst, mask;
60 
61   TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
62 
63   ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
64 
65   /* Take a starting pair from the table. */
66   mask = 1;
67   for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
68     mask <<= 1;
69   TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
70 
71   f1p[0] = FIB_TABLE ((int) nfirst - 1);
72   fp[0]  = FIB_TABLE (nfirst);
73   size = 1;
74 
75   /* Skip to the end if the table lookup gives the final answer. */
76   if (mask != 1)
77     {
78       mp_size_t  alloc;
79       mp_ptr        xp;
80       TMP_DECL;
81 
82       TMP_MARK;
83       alloc = MPN_FIB2_SIZE (n);
84       xp = TMP_ALLOC_LIMBS (alloc);
85 
86       do
87 	{
88 	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
89 	     n&mask upwards.
90 
91 	     The next bit of n is n&(mask>>1) and we'll double to the pair
92 	     fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
93 	     that bit is 0 or 1 respectively.  */
94 
95 	  TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
96 			 n >> refmpn_count_trailing_zeros(mask),
97 			 mask, size, alloc);
98 		 mpn_trace ("fp ", fp, size);
99 		 mpn_trace ("f1p", f1p, size));
100 
101 	  /* fp normalized, f1p at most one high zero */
102 	  ASSERT (fp[size-1] != 0);
103 	  ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
104 
105 	  /* f1p[size-1] might be zero, but this occurs rarely, so it's not
106 	     worth bothering checking for it */
107 	  ASSERT (alloc >= 2*size);
108 	  mpn_sqr (xp, fp,  size);
109 	  mpn_sqr (fp, f1p, size);
110 	  size *= 2;
111 
112 	  /* Shrink if possible.  Since fp was normalized there'll be at
113 	     most one high zero on xp (and if there is then there's one on
114 	     yp too).  */
115 	  ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
116 	  size -= (xp[size-1] == 0);
117 	  ASSERT (xp[size-1] != 0);  /* only one xp high zero */
118 
119 	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
120 	  f1p[size] = mpn_add_n (f1p, xp, fp, size);
121 
122 	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
123 	     n&mask is the low bit of our implied k.  */
124 #if HAVE_NATIVE_mpn_rsblsh2_n || HAVE_NATIVE_mpn_rsblsh_n
125 #if HAVE_NATIVE_mpn_rsblsh2_n
126 	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
127 #else /* HAVE_NATIVE_mpn_rsblsh_n */
128 	  fp[size] = mpn_rsblsh_n (fp, fp, xp, size, 2);
129 #endif
130 	  if ((n & mask) == 0)
131 	    MPN_INCR_U(fp, size + 1, 2);	/* possible +2 */
132 	  else
133 	  {
134 	    ASSERT (fp[0] >= 2);
135 	    fp[0] -= 2;				/* possible -2 */
136 	  }
137 #else
138 	  {
139 	    mp_limb_t  c;
140 
141 	    c = mpn_lshift (xp, xp, size, 2);
142 	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
143 	    c -= mpn_sub_n (fp, xp, fp, size);
144 	    ASSERT (n & mask ? fp[0] != 0 && fp[0] != 1 : 1);
145 	    fp[0] -= (n & mask ? 2 : 0);	/* possible -2 */
146 	    fp[size] = c;
147 	  }
148 #endif
149 	  ASSERT (alloc >= size+1);
150 	  size += (fp[size] != 0);
151 
152 	  /* now n&mask is the new bit of n being considered */
153 	  mask >>= 1;
154 
155 	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
156 	     F[2k+1] and F[2k-1].  */
157 	  if (n & mask)
158 	    ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
159 	  else {
160 	    ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
161 
162 	    /* Can have a high zero after replacing F[2k+1] with F[2k].
163 	       f1p will have a high zero if fp does. */
164 	    ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
165 	    size -= (fp[size-1] == 0);
166 	  }
167 	}
168       while (mask != 1);
169 
170       TMP_FREE;
171     }
172 
173   TRACE (printf ("done size=%ld\n", size);
174 	 mpn_trace ("fp ", fp, size);
175 	 mpn_trace ("f1p", f1p, size));
176 
177   return size;
178 }
179