1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
2 
3    Contributed to the GNU project by Robert Harley.
4    Improvements by Paul Zimmermann and Marco Bodrato.
5 
6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007 Free Software Foundation,
11 Inc.
12 
13 This file is part of the GNU MP Library.
14 
15 The GNU MP Library is free software; you can redistribute it and/or modify it
16 under the terms of the GNU Lesser General Public License as published by the
17 Free Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19 
20 The GNU MP Library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
22 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
23 for more details.
24 
25 You should have received a copy of the GNU Lesser General Public License
26 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
27 
28 #include "gmp.h"
29 #include "gmp-impl.h"
30 
31 void
32 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
33 			   mp_size_t k, mp_size_t twor, int sa,
34 			   mp_limb_t vinf0, mp_ptr ws)
35 {
36   mp_limb_t cy, saved;
37   mp_size_t twok = k + k;
38   mp_size_t kk1 = twok + 1;
39   mp_ptr c1, v1, c3, vinf, c5;
40   mp_limb_t cout; /* final carry, should be zero at the end */
41 
42   c1 = c  + k;
43   v1 = c1 + k;
44   c3 = v1 + k;
45   vinf = c3 + k;
46   c5 = vinf + k;
47 
48 #define v0 (c)
49   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
50      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
51   */
52   if (sa <= 0)
53     mpn_add_n (v2, v2, vm1, kk1);
54   else
55     mpn_sub_n (v2, v2, vm1, kk1);
56 
57   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
58        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
59 
60   ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
61 						      /* (5 3 1 1 0)*/
62 
63   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
64        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
65 
66   /* (2) vm1 <- tm1 := (v1 - sa*vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
67      tm1 >= 0                                            (0  1 0  1 0)
68      No carry comes out from {v1, kk1} +/- {vm1, kk1},
69      and the division by two is exact */
70   if (sa <= 0)
71     {
72 #ifdef HAVE_NATIVE_mpn_rsh1add_n
73       mpn_rsh1add_n (vm1, v1, vm1, kk1);
74 #else
75       mpn_add_n (vm1, v1, vm1, kk1);
76       mpn_rshift (vm1, vm1, kk1, 1);
77 #endif
78     }
79   else
80     {
81 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
82       mpn_rsh1sub_n (vm1, v1, vm1, kk1);
83 #else
84       mpn_sub_n (vm1, v1, vm1, kk1);
85       mpn_rshift (vm1, vm1, kk1, 1);
86 #endif
87     }
88 
89   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
90        v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
91 
92   /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
93      t1 >= 0
94   */
95   vinf[0] -= mpn_sub_n (v1, v1, c, twok);
96 
97   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
98        v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
99 
100   /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
101      t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
102   */
103 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
104   mpn_rsh1sub_n (v2, v2, v1, kk1);
105 #else
106   mpn_sub_n (v2, v2, v1, kk1);
107   mpn_rshift (v2, v2, kk1, 1);
108 #endif
109 
110   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
111        v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
112 
113   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
114      result is v1 >= 0
115   */
116   mpn_sub_n (v1, v1, vm1, kk1);
117 
118   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
119        v0   v1-v0-tm1      hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
120 
121   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
122      result is v2 >= 0 */
123   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
124   vinf[0] = vinf0;       /* Set the right value for vinf0                     */
125 #ifdef HAVE_NATIVE_mpn_sublsh1_n
126   cy = mpn_sublsh1_n (v2, v2, vinf, twor);
127 #else
128   cy = mpn_lshift (ws, vinf, twor, 1);
129   cy += mpn_sub_n (v2, v2, ws, twor);
130 #endif
131   MPN_DECR_U (v2 + twor, kk1 - twor, cy);
132 
133   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
134      result is >= 0 */
135   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
136   vinf[0] = saved;
137   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
138   __GMPN_ADD_1 (cout, vinf, vinf, twor, vinf0); /* Add vinf0, propagate carry. */
139 
140   /* (8) vm1 <- vm1-t2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
141      vm1 >= 0
142   */
143   mpn_sub_n (vm1, vm1, v2, kk1);            /* No overlapping here.        */
144 
145   /********************* Beginning the final phase **********************/
146 
147   /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
148        v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
149 
150   /* (9) add t2 in {c+3k, ...} */
151   cy = mpn_add_n (c3, c3, v2, kk1);
152   __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
153 
154   /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
155        v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
156   /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
157      v0       t1         vinf      tm1  t2
158 		    +t2 */
159 
160   /* add vm1 in {c+k, ...} */
161   cy = mpn_add_n (c1, c1, vm1, kk1);
162   __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
163 
164   /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
165      v0       t1         vinf      tm1  t2
166 	  +tm1      +t2    */
167 
168 #undef v0
169 #undef t2
170 }
171