1 /* mpn_mulmid -- middle product
2
3 Contributed by David Harvey.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2011 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 #include "gmp.h"
39 #include "gmp-impl.h"
40
41
42 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
43
44
45 void
mpn_mulmid(mp_ptr rp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn)46 mpn_mulmid (mp_ptr rp,
47 mp_srcptr ap, mp_size_t an,
48 mp_srcptr bp, mp_size_t bn)
49 {
50 mp_size_t rn, k;
51 mp_ptr scratch, temp;
52
53 ASSERT (an >= bn);
54 ASSERT (bn >= 1);
55 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
56 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
57
58 if (bn < MULMID_TOOM42_THRESHOLD)
59 {
60 /* region not tall enough to make toom42 worthwhile for any portion */
61
62 if (an < CHUNK)
63 {
64 /* region not too wide either, just call basecase directly */
65 mpn_mulmid_basecase (rp, ap, an, bp, bn);
66 return;
67 }
68
69 /* Region quite wide. For better locality, use basecase on chunks:
70
71 AAABBBCC..
72 .AAABBBCC.
73 ..AAABBBCC
74 */
75
76 k = CHUNK - bn + 1; /* number of diagonals per chunk */
77
78 /* first chunk (marked A in the above diagram) */
79 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
80
81 /* remaining chunks (B, C, etc) */
82 an -= k;
83
84 while (an >= CHUNK)
85 {
86 mp_limb_t t0, t1, cy;
87 ap += k, rp += k;
88 t0 = rp[0], t1 = rp[1];
89 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
90 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
91 MPN_INCR_U (rp + 1, k + 1, t1 + cy);
92 an -= k;
93 }
94
95 if (an >= bn)
96 {
97 /* last remaining chunk */
98 mp_limb_t t0, t1, cy;
99 ap += k, rp += k;
100 t0 = rp[0], t1 = rp[1];
101 mpn_mulmid_basecase (rp, ap, an, bp, bn);
102 ADDC_LIMB (cy, rp[0], rp[0], t0);
103 MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
104 }
105
106 return;
107 }
108
109 /* region is tall enough for toom42 */
110
111 rn = an - bn + 1;
112
113 if (rn < MULMID_TOOM42_THRESHOLD)
114 {
115 /* region not wide enough to make toom42 worthwhile for any portion */
116
117 TMP_DECL;
118
119 if (bn < CHUNK)
120 {
121 /* region not too tall either, just call basecase directly */
122 mpn_mulmid_basecase (rp, ap, an, bp, bn);
123 return;
124 }
125
126 /* Region quite tall. For better locality, use basecase on chunks:
127
128 AAAAA....
129 .AAAAA...
130 ..BBBBB..
131 ...BBBBB.
132 ....CCCCC
133 */
134
135 TMP_MARK;
136
137 temp = TMP_ALLOC_LIMBS (rn + 2);
138
139 /* first chunk (marked A in the above diagram) */
140 bp += bn - CHUNK, an -= bn - CHUNK;
141 mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
142
143 /* remaining chunks (B, C, etc) */
144 bn -= CHUNK;
145
146 while (bn >= CHUNK)
147 {
148 ap += CHUNK, bp -= CHUNK;
149 mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
150 mpn_add_n (rp, rp, temp, rn + 2);
151 bn -= CHUNK;
152 }
153
154 if (bn)
155 {
156 /* last remaining chunk */
157 ap += CHUNK, bp -= bn;
158 mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
159 mpn_add_n (rp, rp, temp, rn + 2);
160 }
161
162 TMP_FREE;
163 return;
164 }
165
166 /* we're definitely going to use toom42 somewhere */
167
168 if (bn > rn)
169 {
170 /* slice region into chunks, use toom42 on all chunks except possibly
171 the last:
172
173 AA....
174 .AA...
175 ..BB..
176 ...BB.
177 ....CC
178 */
179
180 TMP_DECL;
181 TMP_MARK;
182
183 temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
184 scratch = temp + rn + 2;
185
186 /* first chunk (marked A in the above diagram) */
187 bp += bn - rn;
188 mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
189
190 /* remaining chunks (B, C, etc) */
191 bn -= rn;
192
193 while (bn >= rn)
194 {
195 ap += rn, bp -= rn;
196 mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
197 mpn_add_n (rp, rp, temp, rn + 2);
198 bn -= rn;
199 }
200
201 if (bn)
202 {
203 /* last remaining chunk */
204 ap += rn, bp -= bn;
205 mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
206 mpn_add_n (rp, rp, temp, rn + 2);
207 }
208
209 TMP_FREE;
210 }
211 else
212 {
213 /* slice region into chunks, use toom42 on all chunks except possibly
214 the last:
215
216 AAABBBCC..
217 .AAABBBCC.
218 ..AAABBBCC
219 */
220
221 TMP_DECL;
222 TMP_MARK;
223
224 scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
225
226 /* first chunk (marked A in the above diagram) */
227 mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
228
229 /* remaining chunks (B, C, etc) */
230 rn -= bn;
231
232 while (rn >= bn)
233 {
234 mp_limb_t t0, t1, cy;
235 ap += bn, rp += bn;
236 t0 = rp[0], t1 = rp[1];
237 mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
238 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
239 MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
240 rn -= bn;
241 }
242
243 TMP_FREE;
244
245 if (rn)
246 {
247 /* last remaining chunk */
248 mp_limb_t t0, t1, cy;
249 ap += bn, rp += bn;
250 t0 = rp[0], t1 = rp[1];
251 mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
252 ADDC_LIMB (cy, rp[0], rp[0], t0);
253 MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
254 }
255 }
256 }
257