1 /* mpn_sqrlo_basecase -- Internal routine to square a natural number
2 of length n.
3
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015,
9 2016 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp-impl.h"
38 #include "longlong.h"
39
40 #ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
41 #if HAVE_NATIVE_mpn_addmul_1
42 #define SQRLO_SHORTCUT_MULTIPLICATIONS 0
43 #else
44 #define SQRLO_SHORTCUT_MULTIPLICATIONS 1
45 #endif
46 #endif
47
48 #if HAVE_NATIVE_mpn_sqr_diagonal
49 #define MPN_SQR_DIAGONAL(rp, up, n) \
50 mpn_sqr_diagonal (rp, up, n)
51 #else
52 #define MPN_SQR_DIAGONAL(rp, up, n) \
53 do { \
54 mp_size_t _i; \
55 for (_i = 0; _i < (n); _i++) \
56 { \
57 mp_limb_t ul, lpl; \
58 ul = (up)[_i]; \
59 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
60 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
61 } \
62 } while (0)
63 #endif
64
65 #define MPN_SQRLO_DIAGONAL(rp, up, n) \
66 do { \
67 mp_size_t nhalf; \
68 nhalf = (n) >> 1; \
69 MPN_SQR_DIAGONAL ((rp), (up), nhalf); \
70 if (((n) & 1) != 0) \
71 { \
72 mp_limb_t op; \
73 op = (up)[nhalf]; \
74 (rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK; \
75 } \
76 } while (0)
77
78 #if HAVE_NATIVE_mpn_addlsh1_n_ip1
79 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \
80 do { \
81 MPN_SQRLO_DIAGONAL((rp), (up), (n)); \
82 mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1); \
83 } while (0)
84 #else
85 #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \
86 do { \
87 MPN_SQRLO_DIAGONAL((rp), (up), (n)); \
88 mpn_lshift ((tp), (tp), (n) - 1, 1); \
89 mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1); \
90 } while (0)
91 #endif
92
93 /* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
94 #define SQRLO_BASECASE_ALLOC \
95 (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
96
97 /* Default mpn_sqrlo_basecase using mpn_addmul_1. */
98 #ifndef SQRLO_SPECIAL_CASES
99 #define SQRLO_SPECIAL_CASES 2
100 #endif
101
102 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
103 #define MAYBE_special_cases 1
104 #else
105 #define MAYBE_special_cases \
106 ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0))
107 #endif
108
109 void
mpn_sqrlo_basecase(mp_ptr rp,mp_srcptr up,mp_size_t n)110 mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
111 {
112 mp_limb_t ul;
113
114 ASSERT (n >= 1);
115 ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
116
117 ul = up[0];
118
119 if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES)
120 {
121 #if SQRLO_SPECIAL_CASES == 1
122 rp[0] = (ul * ul) & GMP_NUMB_MASK;
123 #else
124 if (n == 1)
125 rp[0] = (ul * ul) & GMP_NUMB_MASK;
126 else
127 {
128 mp_limb_t hi, lo, ul1;
129 umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
130 rp[0] = lo >> GMP_NAIL_BITS;
131 ul1 = up[1];
132 #if SQRLO_SPECIAL_CASES == 2
133 rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
134 #else
135 if (n == 2)
136 rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
137 else
138 {
139 mp_limb_t hi1;
140 #if GMP_NAIL_BITS != 0
141 ul <<= 1;
142 #endif
143 umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
144 hi1 += ul * up[2];
145 #if GMP_NAIL_BITS == 0
146 hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
147 add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
148 #else
149 hi += lo >> GMP_NAIL_BITS;
150 rp[1] = hi & GMP_NUMB_MASK;
151 rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
152 #endif
153 }
154 #endif
155 }
156 #endif
157 }
158 else
159 {
160 mp_limb_t tp[SQRLO_BASECASE_ALLOC];
161 mp_size_t i;
162
163 /* must fit n-1 limbs in tp */
164 ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
165
166 --n;
167 #if SQRLO_SHORTCUT_MULTIPLICATIONS
168 {
169 mp_limb_t cy;
170
171 cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
172 for (i = 1; 2 * i + 1 < n; ++i)
173 {
174 ul = up[i];
175 cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
176 }
177 tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
178 }
179 #else
180 mpn_mul_1 (tp, up + 1, n, ul);
181 for (i = 1; 2 * i < n; ++i)
182 mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
183 #endif
184
185 MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
186 }
187 }
188 #undef SQRLO_SPECIAL_CASES
189 #undef MAYBE_special_cases
190 #undef SQRLO_BASECASE_ALLOC
191 #undef SQRLO_SHORTCUT_MULTIPLICATIONS
192 #undef MPN_SQR_DIAGONAL
193 #undef MPN_SQRLO_DIAGONAL
194 #undef MPN_SQRLO_DIAG_ADDLSH1
195