xref: /dragonfly/contrib/gmp/mpn/generic/toom2_sqr.c (revision 956939d5)
1 /* mpn_toom2_sqr -- Square {ap,an}.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 /* Evaluate in: -1, 0, +inf
31 
32   <-s--><--n-->
33    ____ ______
34   |_a1_|___a0_|
35    |b1_|___b0_|
36    <-t-><--n-->
37 
38   v0  =  a0     * b0       #   A(0)*B(0)
39   vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
40   vinf=      a1 *     b1   # A(inf)*B(inf)
41 */
42 
43 #if TUNE_PROGRAM_BUILD
44 #define MAYBE_sqr_toom2   1
45 #else
46 #define MAYBE_sqr_toom2							\
47   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
48 #endif
49 
50 #define TOOM2_SQR_N_REC(p, a, n, ws)					\
51   do {									\
52     if (! MAYBE_sqr_toom2						\
53 	|| BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
54       mpn_sqr_basecase (p, a, n);					\
55     else								\
56       mpn_toom2_sqr (p, a, n, ws);					\
57   } while (0)
58 
59 void
60 mpn_toom2_sqr (mp_ptr pp,
61 	       mp_srcptr ap, mp_size_t an,
62 	       mp_ptr scratch)
63 {
64   mp_size_t n, s;
65   mp_limb_t cy, cy2;
66   mp_ptr asm1;
67 
68 #define a0  ap
69 #define a1  (ap + n)
70 
71   s = an >> 1;
72   n = an - s;
73 
74   ASSERT (0 < s && s <= n);
75 
76   asm1 = pp;
77 
78   /* Compute asm1.  */
79   if (s == n)
80     {
81       if (mpn_cmp (a0, a1, n) < 0)
82 	{
83 	  mpn_sub_n (asm1, a1, a0, n);
84 	}
85       else
86 	{
87 	  mpn_sub_n (asm1, a0, a1, n);
88 	}
89     }
90   else
91     {
92       if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0)
93 	{
94 	  mpn_sub_n (asm1, a1, a0, s);
95 	  MPN_ZERO (asm1 + s, n - s);
96 	}
97       else
98 	{
99 	  mpn_sub (asm1, a0, n, a1, s);
100 	}
101     }
102 
103 #define v0	pp				/* 2n */
104 #define vinf	(pp + 2 * n)			/* s+s */
105 #define vm1	scratch				/* 2n */
106 
107   /* vm1, 2n limbs */
108   TOOM2_SQR_N_REC (vm1, asm1, n, scratch);
109 
110   /* vinf, s+s limbs */
111   TOOM2_SQR_N_REC (vinf, a1, s, scratch);
112 
113   /* v0, 2n limbs */
114   TOOM2_SQR_N_REC (v0, ap, n, scratch);
115 
116   /* H(v0) + L(vinf) */
117   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
118 
119   /* L(v0) + H(v0) */
120   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
121 
122   /* L(vinf) + H(vinf) */
123   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n);
124 
125   cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
126 
127   ASSERT (cy + 1  <= 3);
128   ASSERT (cy2 <= 2);
129 
130   mpn_incr_u (pp + 2 * n, cy2);
131   if (LIKELY (cy <= 2))
132     mpn_incr_u (pp + 3 * n, cy);
133   else
134     mpn_decr_u (pp + 3 * n, 1);
135 }
136