1 /* matrix22_mul.c.
2 
3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6 
7 Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15 
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19 License for more details.
20 
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23 
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27 
28 #define MUL(rp, ap, an, bp, bn) do {		\
29   if (an >= bn)					\
30     mpn_mul (rp, ap, an, bp, bn);		\
31   else						\
32     mpn_mul (rp, bp, bn, ap, an);		\
33 } while (0)
34 
35 /* Inputs are unsigned. */
36 static int
37 abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
38 {
39   int c;
40   MPN_CMP (c, ap, bp, n);
41   if (c >= 0)
42     {
43       mpn_sub_n (rp, ap, bp, n);
44       return 0;
45     }
46   else
47     {
48       mpn_sub_n (rp, bp, ap, n);
49       return 1;
50     }
51 }
52 
53 static int
54 add_signed_n (mp_ptr rp,
55 	      mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
56 {
57   if (as != bs)
58     return as ^ abs_sub_n (rp, ap, bp, n);
59   else
60     {
61       ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
62       return as;
63     }
64 }
65 
66 mp_size_t
67 mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
68 {
69   if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
70       || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
71     return 3*rn + 2*mn;
72   else
73     return 4*(rn + mn) + 5;
74 }
75 
76 /* Algorithm:
77 
78     / s0 \   /  1  0  0  0 \ / r0 \
79     | s1 |   |  0  1  0  0 | | r1 |
80     | s2 |   |  0  0  1  1 | | r2 |
81     | s3 | = | -1  0  1  1 | \ r3 /
82     | s4 |   |  1  0 -1  0 |
83     | s5 |   |  1  1 -1 -1 |
84     \ s6 /   \  0  0  0  1 /
85 
86     / t0 \   /  1  0  0  0 \ / m0 \
87     | t1 |   |  0  0  1  0 | | m1 |
88     | t2 |   | -1  1  0  0 | | m2 |
89     | t3 | = |  1 -1  0  1 | \ m3 /
90     | t4 |   |  0 -1  0  1 |
91     | t5 |   |  0  0  0  1 |
92     \ t6 /   \ -1  1  1 -1 /
93 
94     / r0 \   / 1 1 0 0 0 0 0 \ / s0 * t0 \
95     | r1 | = | 1 0 1 1 0 1 0 | | s1 * t1 |
96     | r2 |   | 1 0 0 1 1 0 1 | | s2 * t2 |
97     \ r3 /   \ 1 0 1 1 1 0 0 / | s3 * t3 |
98 			       | s4 * t4 |
99 			       | s5 * t5 |
100 			       \ s6 * t6 /
101 */
102 
103 /* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
104  *
105  * Resulting elements are of size up to rn + mn + 1.
106  *
107  * Temporary storage: 4 rn + 4 mn + 5. */
108 void
109 mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
110 			   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
111 			   mp_ptr tp)
112 {
113   mp_ptr s2, s3, t2, t3, u0, u1;
114   int r2s, r3s, s3s, t2s, t3s, u0s, u1s;
115   s2 = tp; tp += rn;
116   s3 = tp; tp += rn + 1;
117   t2 = tp; tp += mn;
118   t3 = tp; tp += mn + 1;
119   u0 = tp; tp += rn + mn + 1;
120   u1 = tp; /* rn + mn + 2 */
121 
122   MUL (u0, r0, rn, m0, mn); /* 0 */
123   MUL (u1, r1, rn, m2, mn); /* 1 */
124 
125   MPN_COPY (s2, r3, rn);
126 
127   r3[rn] = mpn_add_n (r3, r3, r2, rn);
128   r0[rn] = 0;
129   s3s = abs_sub_n (s3, r3, r0, rn + 1);
130   t2s = abs_sub_n (t2, m1, m0, mn);
131   if (t2s)
132     {
133       t3[mn] = mpn_add_n (t3, m3, t2, mn);
134       t3s = 0;
135     }
136   else
137     {
138       t3s = abs_sub_n (t3, m3, t2, mn);
139       t3[mn] = 0;
140     }
141 
142   r2s = abs_sub_n (r2, r0, r2, rn);
143   r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
144 
145   MUL(u1, s3, rn+1, t3, mn+1); /* 3 */
146   u1s = s3s ^ t3s;
147   ASSERT (u1[rn+mn+1] == 0);
148   ASSERT (u1[rn+mn] < 4);
149 
150   if (u1s)
151     {
152       u0[rn+mn] = 0;
153       u0s = abs_sub_n (u0, u0, u1, rn + mn + 1);
154     }
155   else
156     {
157       u0[rn+mn] = u1[rn+mn] + mpn_add_n (u0, u0, u1, rn + mn);
158       u0s = 0;
159     }
160   MUL(u1, r3, rn + 1, t2, mn); /* 2 */
161   u1s = t2s;
162   ASSERT (u1[rn+mn] < 2);
163 
164   u1s = add_signed_n (u1, u0, u0s, u1, u1s, rn + mn + 1);
165 
166   t2s = abs_sub_n (t2, m3, m1, mn);
167   if (s3s)
168     {
169       s3[rn] += mpn_add_n (s3, s3, r1, rn);
170       s3s = 0;
171     }
172   else if (s3[rn] > 0)
173     {
174       s3[rn] -= mpn_sub_n (s3, s3, r1, rn);
175       s3s = 1;
176     }
177   else
178     {
179       s3s = abs_sub_n (s3, r1, s3, rn);
180     }
181   MUL (r1, s3, rn+1, m3, mn); /* 5 */
182   ASSERT_NOCARRY(add_signed_n (r1, r1, s3s, u1, u1s, rn + mn + 1));
183   ASSERT (r1[rn + mn] < 2);
184 
185   MUL (r3, r2, rn, t2, mn); /* 4 */
186   r3s = r2s ^ t2s;
187   r3[rn + mn] = 0;
188   u0s = add_signed_n (u0, u0, u0s, r3, r3s, rn + mn + 1);
189   ASSERT_NOCARRY (add_signed_n (r3, r3, r3s, u1, u1s, rn + mn + 1));
190   ASSERT (r3[rn + mn] < 2);
191 
192   if (t3s)
193     {
194       t3[mn] += mpn_add_n (t3, m2, t3, mn);
195       t3s = 0;
196     }
197   else if (t3[mn] > 0)
198     {
199       t3[mn] -= mpn_sub_n (t3, t3, m2, mn);
200       t3s = 1;
201     }
202   else
203     {
204       t3s = abs_sub_n (t3, m2, t3, mn);
205     }
206   MUL (r2, s2, rn, t3, mn + 1); /* 6 */
207 
208   ASSERT_NOCARRY (add_signed_n (r2, r2, t3s, u0, u0s, rn + mn + 1));
209   ASSERT (r2[rn + mn] < 2);
210 }
211 
212 void
213 mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
214 		  mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
215 		  mp_ptr tp)
216 {
217   if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
218       || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
219     {
220       mp_ptr p0, p1;
221       unsigned i;
222 
223       /* Temporary storage: 3 rn + 2 mn */
224       p0 = tp + rn;
225       p1 = p0 + rn + mn;
226 
227       for (i = 0; i < 2; i++)
228 	{
229 	  MPN_COPY (tp, r0, rn);
230 
231 	  if (rn >= mn)
232 	    {
233 	      mpn_mul (p0, r0, rn, m0, mn);
234 	      mpn_mul (p1, r1, rn, m3, mn);
235 	      mpn_mul (r0, r1, rn, m2, mn);
236 	      mpn_mul (r1, tp, rn, m1, mn);
237 	    }
238 	  else
239 	    {
240 	      mpn_mul (p0, m0, mn, r0, rn);
241 	      mpn_mul (p1, m3, mn, r1, rn);
242 	      mpn_mul (r0, m2, mn, r1, rn);
243 	      mpn_mul (r1, m1, mn, tp, rn);
244 	    }
245 	  r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
246 	  r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);
247 
248 	  r0 = r2; r1 = r3;
249 	}
250     }
251   else
252     mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
253 			       m0, m1, m2, m3, mn, tp);
254 }
255