1 /* mpn_mulmid -- middle product
2 
3    Contributed by David Harvey.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2011 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 
41 
42 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
43 
44 
45 void
mpn_mulmid(mp_ptr rp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn)46 mpn_mulmid (mp_ptr rp,
47             mp_srcptr ap, mp_size_t an,
48             mp_srcptr bp, mp_size_t bn)
49 {
50   mp_size_t rn, k;
51   mp_ptr scratch, temp;
52 
53   ASSERT (an >= bn);
54   ASSERT (bn >= 1);
55   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
56   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
57 
58   if (bn < MULMID_TOOM42_THRESHOLD)
59     {
60       /* region not tall enough to make toom42 worthwhile for any portion */
61 
62       if (an < CHUNK)
63 	{
64 	  /* region not too wide either, just call basecase directly */
65 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
66 	  return;
67 	}
68 
69       /* Region quite wide. For better locality, use basecase on chunks:
70 
71 	 AAABBBCC..
72 	 .AAABBBCC.
73 	 ..AAABBBCC
74       */
75 
76       k = CHUNK - bn + 1;    /* number of diagonals per chunk */
77 
78       /* first chunk (marked A in the above diagram) */
79       mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
80 
81       /* remaining chunks (B, C, etc) */
82       an -= k;
83 
84       while (an >= CHUNK)
85 	{
86 	  mp_limb_t t0, t1, cy;
87 	  ap += k, rp += k;
88 	  t0 = rp[0], t1 = rp[1];
89 	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
90 	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
91 	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
92 	  an -= k;
93 	}
94 
95       if (an >= bn)
96 	{
97 	  /* last remaining chunk */
98 	  mp_limb_t t0, t1, cy;
99 	  ap += k, rp += k;
100 	  t0 = rp[0], t1 = rp[1];
101 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
102 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
103 	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
104 	}
105 
106       return;
107     }
108 
109   /* region is tall enough for toom42 */
110 
111   rn = an - bn + 1;
112 
113   if (rn < MULMID_TOOM42_THRESHOLD)
114     {
115       /* region not wide enough to make toom42 worthwhile for any portion */
116 
117       TMP_DECL;
118 
119       if (bn < CHUNK)
120 	{
121 	  /* region not too tall either, just call basecase directly */
122 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
123 	  return;
124 	}
125 
126       /* Region quite tall. For better locality, use basecase on chunks:
127 
128 	 AAAAA....
129 	 .AAAAA...
130 	 ..BBBBB..
131 	 ...BBBBB.
132 	 ....CCCCC
133       */
134 
135       TMP_MARK;
136 
137       temp = TMP_ALLOC_LIMBS (rn + 2);
138 
139       /* first chunk (marked A in the above diagram) */
140       bp += bn - CHUNK, an -= bn - CHUNK;
141       mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
142 
143       /* remaining chunks (B, C, etc) */
144       bn -= CHUNK;
145 
146       while (bn >= CHUNK)
147 	{
148 	  ap += CHUNK, bp -= CHUNK;
149 	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
150 	  mpn_add_n (rp, rp, temp, rn + 2);
151 	  bn -= CHUNK;
152 	}
153 
154       if (bn)
155 	{
156 	  /* last remaining chunk */
157 	  ap += CHUNK, bp -= bn;
158 	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
159 	  mpn_add_n (rp, rp, temp, rn + 2);
160 	}
161 
162       TMP_FREE;
163       return;
164     }
165 
166   /* we're definitely going to use toom42 somewhere */
167 
168   if (bn > rn)
169     {
170       /* slice region into chunks, use toom42 on all chunks except possibly
171 	 the last:
172 
173          AA....
174          .AA...
175          ..BB..
176          ...BB.
177          ....CC
178       */
179 
180       TMP_DECL;
181       TMP_MARK;
182 
183       temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
184       scratch = temp + rn + 2;
185 
186       /* first chunk (marked A in the above diagram) */
187       bp += bn - rn;
188       mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
189 
190       /* remaining chunks (B, C, etc) */
191       bn -= rn;
192 
193       while (bn >= rn)
194         {
195           ap += rn, bp -= rn;
196 	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
197           mpn_add_n (rp, rp, temp, rn + 2);
198           bn -= rn;
199         }
200 
201       if (bn)
202         {
203           /* last remaining chunk */
204           ap += rn, bp -= bn;
205 	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
206           mpn_add_n (rp, rp, temp, rn + 2);
207         }
208 
209       TMP_FREE;
210     }
211   else
212     {
213       /* slice region into chunks, use toom42 on all chunks except possibly
214 	 the last:
215 
216          AAABBBCC..
217          .AAABBBCC.
218          ..AAABBBCC
219       */
220 
221       TMP_DECL;
222       TMP_MARK;
223 
224       scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
225 
226       /* first chunk (marked A in the above diagram) */
227       mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
228 
229       /* remaining chunks (B, C, etc) */
230       rn -= bn;
231 
232       while (rn >= bn)
233         {
234 	  mp_limb_t t0, t1, cy;
235           ap += bn, rp += bn;
236           t0 = rp[0], t1 = rp[1];
237           mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
238 	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
239 	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
240 	  rn -= bn;
241         }
242 
243       TMP_FREE;
244 
245       if (rn)
246         {
247           /* last remaining chunk */
248 	  mp_limb_t t0, t1, cy;
249           ap += bn, rp += bn;
250           t0 = rp[0], t1 = rp[1];
251           mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
252 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
253 	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
254         }
255     }
256 }
257