1 /* ---------------------------------------------------------------------
2  *
3  * -- Integer Matrix Library (IML)
4  *    (C) Copyright 2004, 2006 All Rights Reserved
5  *
6  * -- IML routines -- Version 1.0.1 -- November, 2006
7  *
8  * Author         : Zhuliang Chen
9  * Contributor(s) : Arne Storjohann, Cory Fletcher
10  * University of Waterloo -- School of Computer Science
11  * Waterloo, Ontario, N2L3G1 Canada
12  *
13  * ---------------------------------------------------------------------
14  *
15  * -- Copyright notice and Licensing terms:
16  *
17  *  Redistribution  and  use in  source and binary forms, with or without
18  *  modification, are  permitted provided  that the following  conditions
19  *  are met:
20  *
21  * 1. Redistributions  of  source  code  must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce  the above copyright
24  *    notice,  this list of conditions, and the  following disclaimer in
25  *    the documentation and/or other materials provided with the distri-
26  *    bution.
27  * 3. The name of the University,  the IML group,  or the names of its
28  *    contributors  may not be used to endorse or promote products deri-
29  *    ved from this software without specific written permission.
30  *
31  * -- Disclaimer:
32  *
33  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
35  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
37  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  INDIRECT, INCIDENTAL, SPE-
38  * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
39  * TO,  PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
40  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
41  * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  (IN-
42  * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44  *
45  */
46 
47 
48 #include "certsolve.h"
49 
50 #define NULLSPACE_COLUMN 10
51 
52 
53 /*
54  *
55  * Calling Sequence:
56  *   1/2/3 <-- certSolveLong(certflag, n, m, A, mp_b, mp_N, mp_D,
57  *                           mp_NZ, mp_DZ)
58  *
59  * Summary:
60  *   Certified solve a system of linear equations without reducing the
61  *   solution size, where the left hand side input matrix is represented
62  *   by signed long integers
63  *
64  * Description:
65  *   Let the system of linear equations be Av = b, where A is a n x m matrix,
66  *   and b is a n x 1 vector. There are three possibilities:
67  *
68  *   1. The system has more than one rational solution
69  *   2. The system has a unique rational solution
70  *   3. The system has no solution
71  *
72  *   In the first case, there exist a solution vector v with minimal
73  *   denominator and a rational certificate vector z to certify that the
74  *   denominator of solution v is really the minimal denominator.
75  *
76  *   The 1 x n certificate vector z satisfies that z.A is an integer vector
77  *   and z.b has the same denominator as the solution vector v.
78  *   In this case, the function will output the solution with minimal
79  *   denominator and optional certificate vector z (if certflag = 1).
80  *
81  *   Note: if choose not to compute the certificate vector z, the solution
82  *     will not garantee, but with high probability, to be the minimal
83  *     denominator solution, and the function will run faster.
84  *
85  *   In the second case, the function will only compute the unique solution
86  *   and the contents in the space for certificate vector make no sense.
87  *
88  *   In the third case, there exists a certificate vector q to certify that
89  *   the system has no solution. The 1 x n vector q satisfies q.A = 0 but
90  *   q.b <> 0. In this case, the function will output this certificate vector
91  *   q and store it into the same space for certificate z. The value of
92  *   certflag also controls whether to output q or not.
93  *
94  *   Note: if the function returns 3, then the system determinately does not
95  *     exist solution, no matter whether to output certificate q or not.
96  *
97  * Input:
98  *   certflag: 1/0, flag to indicate whether or not to compute the certificate
99  *             vector z or q.
100  *           - If certflag = 1, compute the certificate.
101  *           - If certflag = 0, not compute the certificate.
102  *          n: long, row dimension of the system
103  *          m: long, column dimension of the system
104  *          A: 1-dim signed long array length n*m, representation of n x m
105  *             matrix A
106  *       mp_b: 1-dim mpz_t array length n, representation of n x 1 vector b
107  *
108  * Return:
109  *   1: the first case, system has more than one solution
110  *   2: the second case, system has a unique solution
111  *   3: the third case, system has no solution
112  *
113  * Output:
114  *   mp_N: 1-dim mpz_t array length m,
115  *       - numerator vector of the solution with minimal denominator
116  *         in the first case
117  *       - numerator vector of the unique solution in the second case
118  *       - make no sense in the third case
119  *   mp_D: mpz_t,
120  *       - minimal denominator of the solutions in the first case
121  *       - denominator of the unique solution in the second case
122  *       - make no sense in the third case
123  *
124  * The following will only be computed when certflag = 1
125  *   mp_NZ: 1-dim mpz_t array length n,
126  *        - numerator vector of the certificate z in the first case
127  *        - make no sense in the second case
128  *        - numerator vector of the certificate q in the third case
129  *   mp_DZ: mpz_t,
130  *        - denominator of the certificate z if in the first case
131  *        - make no sense in the second case
132  *        - denominator of the certificate q in the third case
133  *
134  * Note:
135  *   - The space of (mp_N, mp_D) is needed to be preallocated, and entries in
136  *     mp_N and integer mp_D are needed to be initiated as any integer values.
137  *   - If certflag is specified to be 1, then also needs to preallocate space
138  *     for (mp_NZ, mp_DZ), and initiate integer mp_DZ and entries in mp_NZ to
139  *     be any integer values.
140  *     Otherwise, set mp_NZ = NULL, and mp_DZ = any integer
141  *
142  */
143 
144 long
certSolveLong(const long certflag,const long n,const long m,const long * A,mpz_t * mp_b,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)145 certSolveLong (const long certflag, const long n, const long m, \
146 	       const long *A, mpz_t *mp_b, mpz_t *mp_N, mpz_t mp_D, \
147 	       mpz_t *mp_NZ, mpz_t mp_DZ)
148 {
149   long i, j, l, r, r1, t, idx, cmp, ver, alpha, checkflag, temp;
150   FiniteField p, d=1;
151   long *C, *P, *rp, *Pt, *rpt, *A11;
152   Double *DA, *Db, *Dc;
153   mpz_t mp_bd, mp_beta, mp_Du, mp_temp, mp_temp1;
154   mpz_t *mp_b1, *mp_A21, *mp_Nu;
155   double tt;
156 
157 #if HAVE_TIME_H
158   clock_t ti;
159 #endif
160 
161   alpha = maxMagnLong(A, n, m, m);
162   if (!alpha)
163     {
164       /* in case A is a zero matrix, check vector b */
165       mpz_init(mp_beta);
166       maxMagnMP(mp_b, n, 1, 1, mp_beta);
167       if (!mpz_sgn(mp_beta))
168 	{
169 	  /* if b is also a zero matrix, set N = 0, D = 1, NZ = 0, DZ = 1 */
170 	  for (i = 0; i < m; i++) { mpz_set_ui(mp_N[i], 0); }
171 	  mpz_set_ui(mp_D, 1);
172 	  if (certflag == 1)
173 	    {
174 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
175 	      mpz_set_ui(mp_DZ, 1);
176 	    }
177 	  mpz_clear(mp_beta);
178 	  return 1;
179 	}
180       else
181 	{
182 	  /* compute certificate q, q.A = 0, q.b <> 0 in this case */
183 	  if (certflag == 1)
184 	    {
185 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
186 	      mpz_set_ui(mp_DZ, 1);
187 	      for (i = 0; i < n; i++)
188 		if (mpz_sgn(mp_b[i]) != 0)
189 		  { mpz_set_ui(mp_NZ[i], 1);  break; }
190 	    }
191 	  /* if b has non-zero entries, no solution */
192 	  mpz_clear(mp_beta);
193 	  return 3;
194 	}
195     }
196   while (1)
197     {
198       p = RandPrime(15, 19); /* choose prime p randomly, 2^15 < p < 2^19 */
199 
200 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
201       ti = clock();
202       printf("prime: %ld\n", p);
203 #endif
204 
205       /* call RowEchelonTransform to reduce DA = A mod p, and obtain rank r */
206       DA = XMALLOC(Double, n*m);
207       for (i = 0; i < n*m; i++)
208 	{ DA[i] = (Double)((temp = (A[i] % ((long) p))) >= 0 ? temp : ((long) p)+temp); }
209       P = XMALLOC(long, n+1);
210       for (i = 0; i < n+1; i++) { P[i] = i; }
211       rp = XCALLOC(long, n+1);
212       RowEchelonTransform(p, DA, n, m, 1, 1, 0, 0, P, rp, &d);
213       r = rp[0];
214 
215       /* if rank is 0, then p is a bad prime, restart */
216       if (r == 0)
217 	{
218 
219 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
220 	  printf("rank deficient in decomposition, repeat!\n");
221 #endif
222 
223 	  { XFREE(DA); XFREE(P); XFREE(rp); }
224 	  continue;
225 	}
226 
227       /* compute Pt, rpt satisfying that row Pt[i], column rpt[j] of A will be
228 	 changed to row i, column j respectively after the decomposition */
229       Pt = revseq(r, n, P);
230       rpt = revseq(r, m, rp);
231 
232       /* decomposite <A|b> mod p */
233       Db = XMALLOC(Double, n);
234       for (i = 0; i < n; i++)
235 	Db[i] = (Double)mpz_fdiv_ui(mp_b[Pt[i]], p);
236       Dc = XMALLOC(Double, n);
237       for (i = r; i < n; i++) { Dc[i] = Db[i]; }
238 
239       /* compute first r rows of Dc by DA[1..r, 1..r].Db[1..r] */
240       cblas_dgemv(CblasRowMajor, CblasNoTrans, r, r, 1.0, DA, m, Db, 1, 0.0, \
241 		  Dc, 1);
242 
243       /* compute last n-r rows of Dc by DA[r+1..n, 1..r].Db[1..r]+Db[r+1..n] */
244       if (r < n)
245 	cblas_dgemv(CblasRowMajor, CblasNoTrans, n-r, r, 1.0, DA+r*m, m, \
246 		    Db, 1, 1.0, Dc+r, 1);
247       Dmod(p, Dc, n, 1, 1);
248       idx = r-1;
249       while ((++idx < n) && (Dc[idx] == 0)) ;
250       { XFREE(DA); XFREE(Db); XFREE(Dc); }
251 
252 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
253       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
254       printf("Decomposition Time: %f\n", tt);
255       fflush(stdout);
256 #endif
257 
258       /* rank of A mod p == rank of <A|b> mod p */
259       if (idx == n)
260 	{
261 
262 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
263 	  ti = clock();
264 #endif
265 
266 	  /* compute mp_b1 = P.b */
267 	  mp_b1 = XMALLOC(mpz_t, n);
268 	  for (i = 0; i < n; i++) { mpz_init_set(mp_b1[i], mp_b[Pt[i]]); }
269 
270 	  /* C = [A_11, A_12] */
271 	  C = XMALLOC(long, r*m);
272 	  for (i = 0; i < r; i++)
273 	    for (j = 0; j < m; j++)
274 	      C[i*m+j] = A[Pt[i]*m+rpt[j]];
275 
276 	  /* full column rank, the solution is unique */
277 	  if (r == m)
278 	    nonsingSolvMM(RightSolu, r, 1, C, mp_b1, mp_N, mp_D);
279 	  else
280 	    {
281 	      /* not in full column rank, compute minimal denominator
282 		 solution */
283 	      mpz_init(mp_bd);
284 	      compressBoundLong(r, m, Pt, A, mp_bd);
285 	      ver = specialminSolveLong(certflag, 0, NULLSPACE_COLUMN, r, \
286 					m, mp_bd, C, mp_b1, mp_N, mp_D, \
287 					mp_NZ, mp_DZ);
288 	      mpz_clear(mp_bd);
289 	      if (ver == 0)
290 		{
291 		  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); }
292 		  { XFREE(C); XFREE(mp_b1); }
293 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
294 
295 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
296 		  printf("certificate checking fails, repeat!\n");
297 #endif
298 
299 		  continue;
300 		}
301 	    }
302 	  XFREE(C);
303 
304 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
305 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
306 	  printf("Minimial Denominator Solution Solving Time: %f\n", tt);
307 	  fflush(stdout);
308 	  ti = clock();
309 #endif
310 
311 	  /* not in full row rank, need to check whether <A_31|A_32>x = b_3 */
312 	  if (r < n)
313 	    {
314 	      { mpz_init(mp_temp); mpz_init(mp_temp1); }
315 	      checkflag = 0;
316 	      for (i = 0; i < n-r; i++)
317 		{
318 		  mpz_mul_si(mp_temp, mp_N[0], A[Pt[r+i]*m+rpt[0]]);
319 		  for (j = 1; j < m; j++)
320 		    {
321 		      mpz_set_si(mp_temp1, A[Pt[r+i]*m+rpt[j]]);
322 		      mpz_addmul(mp_temp, mp_N[j], mp_temp1);
323 		    }
324 		  mpz_mul(mp_temp1, mp_D, mp_b1[r+i]);
325 		  if (mpz_cmp(mp_temp, mp_temp1) != 0)
326 		    {
327 		      /* if compare fails, restart */
328 		      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
329 		      for (l = 0; l < n; l++) { mpz_clear(mp_b1[i]); }
330 		      { XFREE(P); XFREE(rp); XFREE(Pt); }
331 		      { XFREE(rpt); XFREE(mp_b1);}
332 
333 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
334 		      printf("checking lower n-r rows fails, repeat!\n");
335 #endif
336 		      checkflag = 1;
337 		      break;
338 		    }
339 		}
340 	      if (checkflag == 1) { continue; }
341 	      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
342 	    }
343 	  { XFREE(Pt); XFREE(rpt); }
344 	  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); } { XFREE(mp_b1); }
345 
346 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
347 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
348 	  printf("Solution Checking Time: %f\n", tt);
349 	  fflush(stdout);
350 #endif
351 
352 	  /* recover the solution vector and the certificate vector */
353 	  if (r < m)
354 	    {
355 	      /* compute Qy */
356 	      for (i = r; i > 0; i--)
357 		if (rp[i] != i) { mpz_swap(mp_N[i-1], mp_N[rp[i]-1]); }
358 
359 	      /* set last n-r entries in z be 0 and compute zP */
360 	      if (certflag == 1)
361 		{
362 		  for (i = r; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
363 		  for (i = r; i > 0; i--)
364 		    if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
365 		}
366 	      { XFREE(P); XFREE(rp); }
367 
368 	      /* system has more than one solution, the first case */
369 	      return 1;
370 	    }
371 	  else
372 	    {
373 	      /* system has a unique solution, the second case */
374 	      { XFREE(P); XFREE(rp); }
375 	      return 2;
376 	    }
377 	}
378       else
379 	{
380 	  if ((r == m) && (certflag == 0))
381 	    {
382 	      { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
383 	      return 3;
384 	    }
385 	  P[r+1] = idx+1;     /* update P for permutation of row r+1 */
386 
387 	  /* compute u = A_21.A_11^(-1) */
388 	  mpz_init(mp_Du);
389 	  mp_Nu = XMALLOC(mpz_t, r);
390 	  for (i = 0; i < r; i++) { mpz_init(mp_Nu[i]); }
391 	  A11 = XMALLOC(long, r*r);
392 	  for (i = 0; i < r; i++)
393 	    for (j = 0; j < r; j++)
394 	      A11[i*r+j] = A[Pt[i]*m+rpt[j]];
395 	  mp_A21 = XMALLOC(mpz_t, r);
396 	  for (i = 0; i < r; i++)
397 	    mpz_init_set_si(mp_A21[i], A[Pt[idx]*m+rpt[i]]);
398 	  nonsingSolvMM(LeftSolu, r, 1, A11, mp_A21, mp_Nu, mp_Du);
399 	  XFREE(A11);
400 	  for (i = 0; i < r; i++) { mpz_clear(mp_A21[i]); } { XFREE(mp_A21); }
401 	  if (r < m)
402 	    {
403 	      { mpz_init(mp_temp); mpz_init(mp_temp1); }
404 	      checkflag = 0;
405 
406 	      /* compare u.A_12 with A_22 */
407 	      for (i = 0; i < m-r; i++)
408 		{
409 		  mpz_mul_si(mp_temp, mp_Nu[0], A[Pt[0]*m+rpt[r+i]]);
410 		  for (j = 1; j < r; j++)
411 		    {
412 		      mpz_set_si(mp_temp1, A[Pt[j]*m+rpt[r+i]]);
413 		      mpz_addmul(mp_temp, mp_Nu[j], mp_temp1);
414 		    }
415 		  mpz_mul_si(mp_temp1, mp_Du, A[Pt[idx]*m+rpt[r+i]]);
416 		  if (mpz_cmp(mp_temp, mp_temp1) != 0)
417 		    {
418 		      /* comparison fails, restart */
419 		      mpz_clear(mp_Du);
420 		      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
421 		      for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); }
422 		      { XFREE(P); XFREE(rp); XFREE(Pt); }
423 		      { XFREE(rpt); XFREE(mp_Nu); }
424 
425 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
426 		      printf("checking no solution case fails, repeat!\n");
427 #endif
428 		      checkflag = 1;
429 		      break;
430 		    }
431 		}
432 	      if (checkflag == 1) { continue; }
433 	      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
434 	    }
435 	  if (certflag == 1)
436 	    {
437 	      /* set q = (u, -1, 0, ..., 0), which is store in mp_NZ */
438 	      mpz_set(mp_DZ, mp_Du);
439 	      for (i = 0; i < r; i++) { mpz_set(mp_NZ[i], mp_Nu[i]); }
440 	      mpz_set(mp_NZ[r], mp_Du);
441 	      mpz_neg(mp_NZ[r], mp_NZ[r]);
442 	      for (i = r+1; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
443 
444 	      /* compute qP */
445 	      for (i = r+1; i > 0; i--)
446 		if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
447 	    }
448 
449 	  mpz_clear(mp_Du);
450 	  for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); } { XFREE(mp_Nu); }
451 	  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
452 
453 	  /* system has no solution, the third case */
454 	  return 3;
455 	}
456     }
457 }
458 
459 
460 
461 
462 /*
463  *
464  * Calling Sequence:
465  *   1/2/3 <-- certSolveRedLong(certflag, nullcol, n, m, A, mp_b, mp_N, mp_D,
466  *                              mp_NZ, mp_DZ)
467  *
468  * Summary:
469  *   Certified solve a system of linear equations and reduce the solution
470  *   size, where the left hand side input matrix is represented by signed
471  *   long integers
472  *
473  * Description:
474  *   Let the system of linear equations be Av = b, where A is a n x m matrix,
475  *   and b is a n x 1 vector. There are three possibilities:
476  *
477  *   1. The system has more than one rational solution
478  *   2. The system has a unique rational solution
479  *   3. The system has no solution
480  *
481  *   In the first case, there exist a solution vector v with minimal
482  *   denominator and a rational certificate vector z to certify that the
483  *   denominator of solution v is really the minimal denominator.
484  *
485  *   The 1 x n certificate vector z satisfies that z.A is an integer vector
486  *   and z.b has the same denominator as the solution vector v.
487  *   In this case, the function will output the solution with minimal
488  *   denominator and optional certificate vector z (if certflag = 1).
489  *
490  *   Note: if choose not to compute the certificate vector z, the solution
491  *     will not garantee, but with high probability, to be the minimal
492  *     denominator solution, and the function will run faster.
493  *
494  *   Lattice reduction will be used to reduce the solution size. Parameter
495  *   nullcol designates the dimension of kernal basis we use to reduce the
496  *   solution size as well as the dimension of nullspace we use to compute
497  *   the minimal denominator. The heuristic results show that the solution
498  *   size will be reduced by factor 1/nullcol.
499  *
500  *   To find the minimum denominator as fast as possible, nullcol cannot be
501  *   too small. We use NULLSPACE_COLUMN as the minimal value of nullcol. That
502  *   is, if the input nullcol is less than NULLSPACE_COLUMN, NULLSPACE_COLUMN
503  *   will be used instead. However, if the input nullcol becomes larger, the
504  *   function will be slower. Meanwhile, it does not make sense to make
505  *   nullcol greater than the dimension of nullspace of the input system.
506  *
507  *   As a result, the parameter nullcol will not take effect unless
508  *   NULLSPACE_COLUMN < nullcol < dimnullspace is satisfied, where
509  *   dimnullspace is the dimension of nullspace of the input system.  If the
510  *   above condition is not satisfied, the boundary value NULLSPACE_COLUMN or
511  *   dimnullspace will be used.
512  *
513  *   In the second case, the function will only compute the unique solution
514  *   and the contents in the space for certificate vector make no sense.
515  *
516  *   In the third case, there exists a certificate vector q to certify that
517  *   the system has no solution. The 1 x n vector q satisfies q.A = 0 but
518  *   q.b <> 0. In this case, the function will output this certificate vector
519  *   q and store it into the same space for certificate z. The value of
520  *   certflag also controls whether to output q or not.
521  *
522  *   Note: if the function returns 3, then the system determinately does not
523  *     exist solution, no matter whether to output certificate q or not.
524  *
525  * Input:
526  *   certflag: 1/0, flag to indicate whether or not to compute the certificate
527  *             vector z or q.
528  *           - If certflag = 1, compute the certificate.
529  *           - If certflag = 0, not compute the certificate.
530  *    nullcol: long, dimension of nullspace and kernel basis of conditioned
531  *             system,
532  *             if nullcol < NULLSPACE_COLUMN, use NULLSPACE_COLUMN instead
533  *          n: long, row dimension of the system
534  *          m: long, column dimension of the system
535  *          A: 1-dim signed long array length n*m, representation of n x m
536  *             matrix A
537  *       mp_b: 1-dim mpz_t array length n, representation of n x 1 vector b
538  *
539  * Return:
540  *   1: the first case, system has more than one solution
541  *   2: the second case, system has a unique solution
542  *   3: the third case, system has no solution
543  *
544  * Output:
545  *   mp_N: 1-dim mpz_t array length m,
546  *       - numerator vector of the solution with minimal denominator
547  *         in the first case
548  *       - numerator vector of the unique solution in the second case
549  *       - make no sense in the third case
550  *   mp_D: mpz_t,
551  *       - minimal denominator of the solutions in the first case
552  *       - denominator of the unique solution in the second case
553  *       - make no sense in the third case
554  *
555  * The following will only be computed when certflag = 1
556  *   mp_NZ: 1-dim mpz_t array length n,
557  *        - numerator vector of the certificate z in the first case
558  *        - make no sense in the second case
559  *        - numerator vector of the certificate q in the third case
560  *   mp_DZ: mpz_t,
561  *        - denominator of the certificate z if in the first case
562  *        - make no sense in the second case
563  *        - denominator of the certificate q in the third case
564  *
565  * Note:
566  *   - The space of (mp_N, mp_D) is needed to be preallocated, and entries in
567  *     mp_N and integer mp_D are needed to be initiated as any integer values.
568  *   - If certflag is specified to be 1, then also needs to preallocate space
569  *     for (mp_NZ, mp_DZ), and initiate integer mp_DZ and entries in mp_NZ to
570  *     be any integer values.
571  *     Otherwise, set mp_NZ = NULL, and mp_DZ = any integer
572  *
573  */
574 
575 long
certSolveRedLong(const long certflag,const long nullcol,const long n,const long m,const long * A,mpz_t * mp_b,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)576 certSolveRedLong (const long certflag, const long nullcol, const long n,
577 		  const long m, const long *A, mpz_t *mp_b, mpz_t *mp_N,
578 		  mpz_t mp_D, mpz_t *mp_NZ, mpz_t mp_DZ)
579 {
580   long i, j, l, r, r1, t, idx, cmp, ver, alpha, checkflag, temp;
581   FiniteField p, d=1;
582   long *C, *P, *rp, *Pt, *rpt, *A11;
583   Double *DA, *Db, *Dc;
584   mpz_t mp_bd, mp_beta, mp_Du, mp_temp, mp_temp1;
585   mpz_t *mp_b1, *mp_A21, *mp_Nu;
586   double tt;
587 
588 #if HAVE_TIME_H
589   clock_t ti;
590 #endif
591 
592   alpha = maxMagnLong(A, n, m, m);
593   if (!alpha)
594     {
595       /* in case A is a zero matrix, check vector b */
596       mpz_init(mp_beta);
597       maxMagnMP(mp_b, n, 1, 1, mp_beta);
598       if (!mpz_sgn(mp_beta))
599 	{
600 	  /* if b is also a zero matrix, set N = 0, D = 1, NZ = 0, DZ = 1 */
601 	  for (i = 0; i < m; i++) { mpz_set_ui(mp_N[i], 0); }
602 	  mpz_set_ui(mp_D, 1);
603 	  if (certflag == 1)
604 	    {
605 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
606 	      mpz_set_ui(mp_DZ, 1);
607 	    }
608 	  mpz_clear(mp_beta);
609 	  return 1;
610 	}
611       else
612 	{
613 	  /* compute certificate q, q.A = 0, q.b <> 0 in this case */
614 	  if (certflag == 1)
615 	    {
616 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
617 	      mpz_set_ui(mp_DZ, 1);
618 	      for (i = 0; i < n; i++)
619 		if (mpz_sgn(mp_b[i]) != 0)
620 		  { mpz_set_ui(mp_NZ[i], 1);  break; }
621 	    }
622 	  /* if b has non-zero entries, no solution */
623 	  mpz_clear(mp_beta);
624 	  return 3;
625 	}
626     }
627   while (1)
628     {
629 
630       p = RandPrime(15, 19); /* choose prime p randomly, 2^15 < p < 2^19 */
631 
632 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
633       ti = clock();
634       printf("prime: %ld\n", p);
635 #endif
636 
637       /* call RowEchelonTransform to reduce DA = A mod p, and obtain rank r */
638       DA = XMALLOC(Double, n*m);
639       for (i = 0; i < n*m; i++)
640 	{ DA[i] = (Double)((temp = (A[i] % ((long) p))) >= 0 ? temp : ((long) p)+temp); }
641       P = XMALLOC(long, n+1);
642       for (i = 0; i < n+1; i++) { P[i] = i; }
643       rp = XCALLOC(long, n+1);
644       RowEchelonTransform(p, DA, n, m, 1, 1, 0, 0, P, rp, &d);
645       r = rp[0];
646 
647       /* if rank is 0, then p is a bad prime, restart */
648       if (r == 0)
649 	{
650 
651 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
652 	  printf("rank deficient in decomposition, repeat!\n");
653 #endif
654 
655 	  { XFREE(DA); XFREE(P); XFREE(rp); }
656 	  continue;
657 	}
658 
659       /* compute Pt, rpt satisfying that row Pt[i], column rpt[j] of A will be
660 	 changed to row i, column j respectively after the decomposition */
661       Pt = revseq(r, n, P);
662       rpt = revseq(r, m, rp);
663 
664       /* decomposite <A|b> mod p */
665       Db = XMALLOC(Double, n);
666       for (i = 0; i < n; i++)
667 	Db[i] = (Double)mpz_fdiv_ui(mp_b[Pt[i]], p);
668       Dc = XMALLOC(Double, n);
669       for (i = r; i < n; i++) { Dc[i] = Db[i]; }
670 
671       /* compute first r rows of Dc by DA[1..r, 1..r].Db[1..r] */
672       cblas_dgemv(CblasRowMajor, CblasNoTrans, r, r, 1.0, DA, m, Db, 1, 0.0, \
673 		  Dc, 1);
674 
675       /* compute last n-r rows of Dc by DA[r+1..n, 1..r].Db[1..r]+Db[r+1..n] */
676       if (r < n)
677 	cblas_dgemv(CblasRowMajor, CblasNoTrans, n-r, r, 1.0, DA+r*m, m, \
678 		    Db, 1, 1.0, Dc+r, 1);
679       Dmod(p, Dc, n, 1, 1);
680       idx = r-1;
681       while ((++idx < n) && (Dc[idx] == 0)) ;
682       { XFREE(DA); XFREE(Db); XFREE(Dc); }
683 
684 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
685       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
686       printf("Decomposition Time: %f\n", tt);
687       fflush(stdout);
688 #endif
689 
690       /* rank of A mod p == rank of <A|b> mod p */
691       if (idx == n)
692 	{
693 
694 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
695 	  ti = clock();
696 #endif
697 
698 	  /* compute mp_b1 = P.b */
699 	  mp_b1 = XMALLOC(mpz_t, n);
700 	  for (i = 0; i < n; i++) { mpz_init_set(mp_b1[i], mp_b[Pt[i]]); }
701 
702 	  /* C = [A_11, A_12] */
703 	  C = XMALLOC(long, r*m);
704 	  for (i = 0; i < r; i++)
705 	    for (j = 0; j < m; j++)
706 	      C[i*m+j] = A[Pt[i]*m+rpt[j]];
707 
708 	  /* full column rank, the solution is unique */
709 	  if (r == m)
710 	    nonsingSolvMM(RightSolu, r, 1, C, mp_b1, mp_N, mp_D);
711 	  else
712 	    {
713 	      /* not in full column rank, compute minimal denominator
714 		 solution */
715 	      mpz_init(mp_bd);
716 	      compressBoundLong(r, m, Pt, A, mp_bd);
717 	      ver = specialminSolveLong(certflag, 1, nullcol, r, m, mp_bd, \
718 					C, mp_b1, mp_N, mp_D, mp_NZ, mp_DZ);
719 	      mpz_clear(mp_bd);
720 	      if (ver == 0)
721 		{
722 		  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); }
723 		  { XFREE(C); XFREE(mp_b1); }
724 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
725 
726 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
727 		  printf("certificate checking fails, repeat!\n");
728 #endif
729 
730 		  continue;
731 		}
732 	    }
733 	  XFREE(C);
734 
735 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
736 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
737 	  printf("Minimial Denominator Solution Solving Time: %f\n", tt);
738 	  fflush(stdout);
739 	  ti = clock();
740 #endif
741 
742 	  /* not in full row rank, need to check whether <A_31|A_32>x = b_3 */
743 	  if (r < n)
744 	    {
745 	      { mpz_init(mp_temp); mpz_init(mp_temp1); }
746 	      checkflag = 0;
747 	      for (i = 0; i < n-r; i++)
748 		{
749 		  mpz_mul_si(mp_temp, mp_N[0], A[Pt[r+i]*m+rpt[0]]);
750 		  for (j = 1; j < m; j++)
751 		    {
752 		      mpz_set_si(mp_temp1, A[Pt[r+i]*m+rpt[j]]);
753 		      mpz_addmul(mp_temp, mp_N[j], mp_temp1);
754 		    }
755 		  mpz_mul(mp_temp1, mp_D, mp_b1[r+i]);
756 		  if (mpz_cmp(mp_temp, mp_temp1) != 0)
757 		    {
758 		      /* if compare fails, restart */
759 		      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
760 		      for (l = 0; l < n; l++) { mpz_clear(mp_b1[i]); }
761 		      { XFREE(P); XFREE(rp); XFREE(Pt); }
762 		      { XFREE(rpt); XFREE(mp_b1);}
763 
764 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
765 		      printf("checking lower n-r rows fails, repeat!\n");
766 #endif
767 
768 		      checkflag = 1;
769 		      break;
770 		    }
771 		}
772 	      if (checkflag == 1) { continue; }
773 	      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
774 	    }
775 	  { XFREE(Pt); XFREE(rpt); }
776 	  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); } { XFREE(mp_b1); }
777 
778 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
779 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
780 	  printf("Solution Checking Time: %f\n", tt);
781 	  fflush(stdout);
782 #endif
783 
784 	  /* recover the solution vector and the certificate vector */
785 	  if (r < m)
786 	    {
787 	      /* compute Qy */
788 	      for (i = r; i > 0; i--)
789 		if (rp[i] != i) { mpz_swap(mp_N[i-1], mp_N[rp[i]-1]); }
790 
791 	      /* set last n-r entries in z be 0 and compute zP */
792 	      if (certflag == 1)
793 		{
794 		  for (i = r; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
795 		  for (i = r; i > 0; i--)
796 		    if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
797 		}
798 	      { XFREE(P); XFREE(rp); }
799 
800 	      /* system has more than one solution, the first case */
801 	      return 1;
802 	    }
803 	  else
804 	    {
805 	      /* system has a unique solution, the second case */
806 	      { XFREE(P); XFREE(rp); }
807 	      return 2;
808 	    }
809 	}
810       else
811 	{
812 	  if ((r == m) && (certflag == 0))
813 	    {
814 	      { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
815 	      return 3;
816 	    }
817 	  P[r+1] = idx+1;     /* update P for permutation of row r+1 */
818 
819 	  /* compute u = A_21.A_11^(-1) */
820 	  mpz_init(mp_Du);
821 	  mp_Nu = XMALLOC(mpz_t, r);
822 	  for (i = 0; i < r; i++) { mpz_init(mp_Nu[i]); }
823 	  A11 = XMALLOC(long, r*r);
824 	  for (i = 0; i < r; i++)
825 	    for (j = 0; j < r; j++)
826 	      A11[i*r+j] = A[Pt[i]*m+rpt[j]];
827 	  mp_A21 = XMALLOC(mpz_t, r);
828 	  for (i = 0; i < r; i++)
829 	    mpz_init_set_si(mp_A21[i], A[Pt[idx]*m+rpt[i]]);
830 	  nonsingSolvMM(LeftSolu, r, 1, A11, mp_A21, mp_Nu, mp_Du);
831 	  XFREE(A11);
832 	  for (i = 0; i < r; i++) { mpz_clear(mp_A21[i]); } { XFREE(mp_A21); }
833 	  if (r < m)
834 	    {
835 	      { mpz_init(mp_temp); mpz_init(mp_temp1); }
836 	      checkflag = 0;
837 
838 	      /* compare u.A_12 with A_22 */
839 	      for (i = 0; i < m-r; i++)
840 		{
841 		  mpz_mul_si(mp_temp, mp_Nu[0], A[Pt[0]*m+rpt[r+i]]);
842 		  for (j = 1; j < r; j++)
843 		    {
844 		      mpz_set_si(mp_temp1, A[Pt[j]*m+rpt[r+i]]);
845 		      mpz_addmul(mp_temp, mp_Nu[j], mp_temp1);
846 		    }
847 		  mpz_mul_si(mp_temp1, mp_Du, A[Pt[idx]*m+rpt[r+i]]);
848 		  if (mpz_cmp(mp_temp, mp_temp1) != 0)
849 		    {
850 		      /* comparison fails, restart */
851 		      mpz_clear(mp_Du);
852 		      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
853 		      for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); }
854 		      { XFREE(P); XFREE(rp); XFREE(Pt); }
855 		      { XFREE(rpt); XFREE(mp_Nu); }
856 
857 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
858 		      printf("checking no solution case fails, repeat!\n");
859 #endif
860 
861 		      checkflag = 1;
862 		      break;
863 		    }
864 		}
865 	      if (checkflag == 1) { continue; }
866 	      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
867 	    }
868 	  if (certflag == 1)
869 	    {
870 	      /* set q = (u, -1, 0, ..., 0), which is store in mp_NZ */
871 	      mpz_set(mp_DZ, mp_Du);
872 	      for (i = 0; i < r; i++) { mpz_set(mp_NZ[i], mp_Nu[i]); }
873 	      mpz_set(mp_NZ[r], mp_Du);
874 	      mpz_neg(mp_NZ[r], mp_NZ[r]);
875 	      for (i = r+1; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
876 
877 	      /* compute qP */
878 	      for (i = r+1; i > 0; i--)
879 		if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
880 	    }
881 
882 	  mpz_clear(mp_Du);
883 	  for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); } { XFREE(mp_Nu); }
884 	  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
885 
886 	  /* system has no solution, the third case */
887 	  return 3;
888 	}
889     }
890 }
891 
892 
893 
894 
895 
896 /*
897  *
898  * Calling Sequence:
899  *   1/2/3 <-- certSolveMP(certflag, n, m, mp_A, mp_b, mp_N, mp_D,
900  *                         mp_NZ, mp_DZ)
901  *
902  * Summary:
903  *   Certified solve a system of linear equations without reducing the
904  *   solution size, where the left hand side input matrix is represented
905  *   by mpz_t integers
906  *
907  * Description:
908  *   Let the system of linear equations be Av = b, where A is a n x m matrix,
909  *   and b is a n x 1 vector. There are three possibilities:
910  *
911  *   1. The system has more than one rational solution
912  *   2. The system has a unique rational solution
913  *   3. The system has no solution
914  *
915  *   In the first case, there exist a solution vector v with minimal
916  *   denominator and a rational certificate vector z to certify that the
917  *   denominator of solution v is really the minimal denominator.
918  *
919  *   The 1 x n certificate vector z satisfies that z.A is an integer vector
920  *   and z.b has the same denominator as the solution vector v.
921  *   In this case, the function will output the solution with minimal
922  *   denominator and optional certificate vector z (if certflag = 1).
923  *
924  *   Note: if choose not to compute the certificate vector z, the solution
925  *     will not garantee, but with high probability, to be the minimal
926  *     denominator solution, and the function will run faster.
927  *
928  *   In the second case, the function will only compute the unique solution
929  *   and the contents in the space for certificate vector make no sense.
930  *
931  *   In the third case, there exists a certificate vector q to certify that
932  *   the system has no solution. The 1 x n vector q satisfies q.A = 0 but
933  *   q.b <> 0. In this case, the function will output this certificate vector
934  *   q and store it into the same space for certificate z. The value of
935  *   certflag also controls whether to output q or not.
936  *
937  *   Note: if the function returns 3, then the system determinately does not
938  *     exist solution, no matter whether to output certificate q or not.
939  *   In the first case, there exist a solution vector v with minimal
940  *   denominator and a rational certificate vector z to certify that the
941  *   denominator of solution v is the minimal denominator.
942  *
943  * Input:
944  *   certflag: 1/0, flag to indicate whether or not to compute the certificate
945  *             vector z or q.
946  *           - If certflag = 1, compute the certificate.
947  *           - If certflag = 0, not compute the certificate.
948  *          n: long, row dimension of the system
949  *          m: long, column dimension of the system
950  *       mp_A: 1-dim mpz_t array length n*m, representation of n x m matrix A
951  *       mp_b: 1-dim mpz_t array length n, representation of n x 1 vector b
952  *
953  * Return:
954  *   1: the first case, system has more than one solution
955  *   2: the second case, system has a unique solution
956  *   3: the third case, system has no solution
957  *
958  * Output:
959  *   mp_N: 1-dim mpz_t array length m,
960  *       - numerator vector of the solution with minimal denominator
961  *         in the first case
962  *       - numerator vector of the unique solution in the second case
963  *       - make no sense in the third case
964  *   mp_D: mpz_t,
965  *       - minimal denominator of the solutions in the first case
966  *       - denominator of the unique solution in the second case
967  *       - make no sense in the third case
968  *
969  * The following will only be computed when certflag = 1
970  *   mp_NZ: 1-dim mpz_t array length n,
971  *        - numerator vector of the certificate z in the first case
972  *        - make no sense in the second case
973  *        - numerator vector of the certificate q in the third case
974  *   mp_DZ: mpz_t,
975  *        - denominator of the certificate z if in the first case
976  *        - make no sense in the second case
977  *        - denominator of the certificate q in the third case
978  *
979  * Note:
980  *   - The space of (mp_N, mp_D) is needed to be preallocated, and entries in
981  *     mp_N and integer mp_D are needed to be initiated as any integer values.
982  *   - If certflag is specified to be 1, then also needs to preallocate space
983  *     for (mp_NZ, mp_DZ), and initiate integer mp_DZ and entries in mp_NZ to
984  *     be any integer values.
985  *     Otherwise, set mp_NZ = NULL, and mp_DZ = any integer
986  *
987  */
988 
989 long
certSolveMP(const long certflag,const long n,const long m,mpz_t * mp_A,mpz_t * mp_b,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)990 certSolveMP (const long certflag, const long n, const long m, \
991 		  mpz_t *mp_A, mpz_t *mp_b, mpz_t *mp_N, mpz_t mp_D, \
992 		  mpz_t *mp_NZ, mpz_t mp_DZ)
993 {
994   long i, j, l, r, r1, t, idx, cmp, ver, basislen=1, checkflag;
995   FiniteField qh, p, d=1;
996   long *P, *rp, *Pt, *rpt;
997   FiniteField *basis, **basiscmb;
998   Double *DA, *Db, *Dc, **CRNS, **C1RNS, **A11RNS, **A12RNS;
999   mpz_t mp_maxInter, mp_bd, mp_alpha, mp_beta, mp_Du;
1000   mpz_t *mp_b1, *mp_A21, *mp_A22, *mp_Nu;
1001   double tt;
1002 
1003 #if HAVE_TIME_H
1004   clock_t ti;
1005 #endif
1006 
1007   mpz_init(mp_alpha);
1008   maxMagnMP(mp_A, n, m, m, mp_alpha);
1009   if (!mpz_sgn(mp_alpha))
1010     {
1011       /* in case A is a zero matrix, check vector b */
1012       mpz_init(mp_beta);
1013       maxMagnMP(mp_b, n, 1, 1, mp_beta);
1014       if (!mpz_sgn(mp_beta))
1015 	{
1016 	  /* if b is also a zero matrix, set N = 0, D = 1, NZ = 0, DZ = 1 */
1017 	  for (i = 0; i < m; i++) { mpz_set_ui(mp_N[i], 0); }
1018 	  mpz_set_ui(mp_D, 1);
1019 	  if (certflag == 1)
1020 	    {
1021 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1022 	      mpz_set_ui(mp_DZ, 1);
1023 	    }
1024 	  { mpz_clear(mp_alpha); mpz_clear(mp_beta); }
1025 	  return 1;
1026 	}
1027       else
1028 	{
1029 	  /* compute certificate q, q.A = 0, q.b <> 0 in this case */
1030 	  if (certflag == 1)
1031 	    {
1032 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1033 	      mpz_set_ui(mp_DZ, 1);
1034 	      for (i = 0; i < n; i++)
1035 		if (mpz_sgn(mp_b[i]) != 0)
1036 		  { mpz_set_ui(mp_NZ[i], 1);  break; }
1037 	    }
1038 	  /* if b has non-zero entries, no solution */
1039 	  mpz_clear(mp_alpha); mpz_clear(mp_beta);
1040 	  return 3;
1041 	}
1042     }
1043   /* generate RNS basis */
1044   mpz_init_set_ui(mp_maxInter, 1);
1045   mpz_addmul_ui(mp_maxInter, mp_alpha, 2);
1046   qh = RNSbound(m);
1047   basiscmb = findRNS(qh, mp_maxInter, &basislen);
1048   basis = basiscmb[0];
1049   { mpz_clear(mp_maxInter); mpz_clear(mp_alpha); }
1050 
1051   while (1)
1052     {
1053       p = RandPrime(15, 19); /* choose p randomly, 2^15 < p < 2^19 */
1054 
1055 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1056       ti = clock();
1057       printf("prime: %ld\n", p);
1058 #endif
1059 
1060       /* call RowEchelonTransform to reduce DA = A mod p, and obtain rank r */
1061       DA = XMALLOC(Double, n*m);
1062       for (i = 0; i < n*m; i++) { DA[i] = (Double)mpz_fdiv_ui(mp_A[i], p); }
1063       P = XMALLOC(long, n+1);
1064       for (i = 0; i < n+1; i++) { P[i] = i; }
1065       rp = XCALLOC(long, n+1);
1066       RowEchelonTransform(p, DA, n, m, 1, 1, 0, 0, P, rp, &d);
1067       r = rp[0];
1068 
1069       /* if rank is 0, then p is a bad prime, restart */
1070       if (r == 0)
1071 	{
1072 
1073 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1074 	  printf("rank deficient in decomposition, repeat!\n");
1075 #endif
1076 
1077 	  { XFREE(DA); XFREE(P); XFREE(rp); }
1078 	  continue;
1079 	}
1080 
1081       /* compute Pt, rpt satisfying that row Pt[i], column rpt[j] of A will be
1082 	 changed to row i, column j respectively after the decomposition */
1083       Pt = revseq(r, n, P);
1084       rpt = revseq(r, m, rp);
1085 
1086       /* decomposite <A|b> mod p */
1087       Db = XMALLOC(Double, n);
1088       for (i = 0; i < n; i++)
1089 	Db[i] = (Double)mpz_fdiv_ui(mp_b[Pt[i]], p);
1090       Dc = XMALLOC(Double, n);
1091       for (i = r; i < n; i++) { Dc[i] = Db[i]; }
1092 
1093       /* compute first r rows of Dc by DA[1..r, 1..r].Db[1..r] */
1094       cblas_dgemv(CblasRowMajor, CblasNoTrans, r, r, 1.0, DA, m, Db, 1, 0.0, \
1095 		  Dc, 1);
1096 
1097       /* compute last n-r rows of Dc by DA[r+1..n, 1..r].Db[1..r]+Db[r+1..n] */
1098       if (r < n)
1099 	cblas_dgemv(CblasRowMajor, CblasNoTrans, n-r, r, 1.0, DA+r*m, m, \
1100 		    Db, 1, 1.0, Dc+r, 1);
1101       Dmod(p, Dc, n, 1, 1);
1102       idx = r-1;
1103       while ((++idx < n) && (Dc[idx] == 0)) ;
1104       { XFREE(DA); XFREE(Db); XFREE(Dc); }
1105 
1106 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1107       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
1108       printf("Decomposition Time: %f\n", tt);
1109 #endif
1110 
1111       /* rank of A mod p == rank of <A|b> mod p */
1112       if (idx == n)
1113 	{
1114 
1115 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1116 	  ti = clock();
1117 #endif
1118 
1119 	  /* compute mp_b1 = P.b */
1120 	  mp_b1 = XMALLOC(mpz_t, n);
1121 	  for (i = 0; i < n; i++) { mpz_init_set(mp_b1[i], mp_b[Pt[i]]); }
1122 
1123 	  /* CRNS[i] = [A_11, A_12] mod basis[i] */
1124 	  CRNS = XMALLOC(Double *, basislen);
1125 	  for (i = 0; i < basislen; i++)
1126 	    {
1127 	      CRNS[i] = XMALLOC(Double, r*m);
1128 	      for (j = 0; j < r; j++)
1129 		for (l = 0; l < m; l++)
1130 		  CRNS[i][j*m+l] = (Double)mpz_fdiv_ui(mp_A[Pt[j]*m+rpt[l]], \
1131 						       basis[i]);
1132 	    }
1133 	  /* full column rank, the solution is unique */
1134 	  if (r == m)
1135 	    nonsingSolvRNSMM(RightSolu, r, 1, basislen, basis, CRNS, mp_b1, \
1136 			     mp_N, mp_D);
1137 	  else
1138 	    {
1139 	      /* not in full column rank, compute minimal denominator solution
1140 		 compute the bound after compression in specialminSolveRNS */
1141 	      mpz_init(mp_bd);
1142 	      compressBoundMP(r, m, Pt, mp_A, mp_bd);
1143 	      ver = specialminSolveRNS(certflag, 0, NULLSPACE_COLUMN, r, m,\
1144 				       basislen, mp_bd, basis, CRNS, mp_b1, \
1145 				       mp_N, mp_D, mp_NZ, mp_DZ);
1146 	      mpz_clear(mp_bd);
1147 	      if (ver == 0)
1148 		{
1149 		  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); }
1150 		  for (i = 0; i < basislen; i++) { XFREE(CRNS[i]); }
1151 		  { XFREE(CRNS); XFREE(mp_b1); }
1152 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
1153 
1154 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1155 		  printf("certificate checking fails, repeat!\n");
1156 #endif
1157 
1158 		  continue;
1159 		}
1160 	    }
1161 	  for (i = 0; i < basislen; i++) { XFREE(CRNS[i]); } { XFREE(CRNS); }
1162 
1163 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1164 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
1165 	  printf("Minimial Denominator Solution Solving Time: %f\n", tt);
1166 	  ti = clock();
1167 #endif
1168 
1169 	  /* not in full row rank, need to check whether <A_31|A_32>x = b_3 */
1170 	  if (r < n)
1171 	    {
1172 	      C1RNS = XMALLOC(Double *, basislen);
1173 	      for (i = 0; i < basislen; i++)
1174 		{
1175 		  C1RNS[i] = XMALLOC(Double, (n-r)*m);
1176 		  for (j = 0; j < n-r; j++)
1177 		    for (l = 0; l < m; l++)
1178 		      C1RNS[i][j*m+l] = \
1179 			(Double)mpz_fdiv_ui(mp_A[Pt[r+j]*m+rpt[l]], basis[i]);
1180 		}
1181 
1182 	      cmp = LVecSMatMulCmp(RightMul, basislen, n-r, m, basis, C1RNS, \
1183 				   mp_D, mp_N, mp_b1+r);
1184 	      for (i = 0; i < basislen; i++) { XFREE(C1RNS[i]); }
1185 	      XFREE(C1RNS);
1186 
1187 	      /* if compare fails, restart */
1188 	      if (cmp == 0)
1189 		{
1190 		  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); }
1191 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); XFREE(mp_b1); }
1192 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1193 		  printf("checking lower n-r rows fails, repeat!\n");
1194 #endif
1195 
1196 		  continue;
1197 		}
1198 	    }
1199 	  { XFREE(Pt); XFREE(rpt); }
1200 	  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); } { XFREE(mp_b1); }
1201 	  { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
1202 
1203 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1204 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
1205 	  printf("Solution Checking Time: %f\n", tt);
1206 #endif
1207 
1208 	  /* recover the solution vector and the certificate vector */
1209 	  if (r < m)
1210 	    {
1211 	      /* system has more than one solution, the first case */
1212 	      /* compute Qy */
1213 	      for (i = r; i > 0; i--)
1214 		if (rp[i] != i) { mpz_swap(mp_N[i-1], mp_N[rp[i]-1]); }
1215 
1216 	      /* set last n-r entries in z be 0 and compute zP */
1217 	      if (certflag == 1)
1218 		{
1219 		  for (i = r; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1220 		  for (i = r; i > 0; i--)
1221 		    if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
1222 		}
1223 	      { XFREE(P); XFREE(rp); }
1224 	      return 1;
1225 	    }
1226 	  else
1227 	    {
1228 	      /* system has a unique solution, the second case */
1229 	      { XFREE(P); XFREE(rp); }
1230 	      return 2;
1231 	    }
1232 	}
1233       else
1234 	{
1235 	  if ((r == m) && (certflag == 0))
1236 	    {
1237 	      { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
1238 	      { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
1239 	      return 3;
1240 	    }
1241 	  P[r+1] = idx+1;      /* update P for permutation of row r+1 */
1242 
1243 	  /* compute u = A_21.A_11^(-1) */
1244 	  mpz_init(mp_Du);
1245 	  mp_Nu = XMALLOC(mpz_t, r);
1246 	  for (i = 0; i < r; i++) { mpz_init(mp_Nu[i]); }
1247 	  A11RNS = XMALLOC(Double *, basislen);
1248 	  for (i = 0; i < basislen; i++)
1249 	    {
1250 	      A11RNS[i] = XMALLOC(Double, r*r);
1251 	      for (j = 0; j < r; j++)
1252 		for (l = 0; l < r; l++)
1253 		  A11RNS[i][j*r+l] = \
1254 		    (Double)mpz_fdiv_ui(mp_A[Pt[j]*m+rpt[l]], basis[i]);
1255 	    }
1256 	  mp_A21 = XMALLOC(mpz_t, r);
1257 	  for (i = 0; i < r; i++)
1258 	    mpz_init_set(mp_A21[i], mp_A[Pt[idx]*m+rpt[i]]);
1259 	  nonsingSolvRNSMM(LeftSolu, r, 1, basislen, basis, A11RNS, mp_A21, \
1260 			   mp_Nu, mp_Du);
1261 	  for (i = 0; i < basislen; i++)  { XFREE(A11RNS[i]); }
1262 	  for (i = 0; i < r; i++) { mpz_clear(mp_A21[i]); }
1263 	  { XFREE(mp_A21); XFREE(A11RNS); }
1264 	  if (r < m)
1265 	    {
1266 	      /* compare u.A_12 with A_22 */
1267 	      A12RNS = XMALLOC(Double *, basislen);
1268 	      for (i = 0; i < basislen; i++)
1269 		{
1270 		  A12RNS[i] = XMALLOC(Double, r*(m-r));
1271 		  for (j = 0; j < r; j++)
1272 		    for (l = 0; l < m-r; l++)
1273 		      A12RNS[i][j*(m-r)+l] = \
1274 			(Double)mpz_fdiv_ui(mp_A[Pt[j]*m+rpt[r+l]], basis[i]);
1275 		}
1276 	      mp_A22 = XMALLOC(mpz_t, m-r);
1277 	      for (i = 0; i < m-r; i++)
1278 		mpz_init_set(mp_A22[i], mp_A[Pt[idx]*m+rpt[r+i]]);
1279 	      cmp = LVecSMatMulCmp(LeftMul, basislen, r, m-r, basis, A12RNS, \
1280 				   mp_Du, mp_Nu, mp_A22);
1281 	      for (i = 0; i < basislen; i++) { XFREE(A12RNS[i]); }
1282 	      for (i = 0; i < m-r; i++) { mpz_clear(mp_A22[i]); }
1283 	      { XFREE(A12RNS); XFREE(mp_A22); }
1284 
1285 	      /* comparison fails, restart */
1286 	      if (cmp == 0)
1287 		{
1288 		  mpz_clear(mp_Du);
1289 		  for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); }
1290 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); XFREE(mp_Nu); }
1291 
1292 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1293 		  printf("checking no solution case fails, repeat!\n");
1294 #endif
1295 
1296 		  continue;
1297 		}
1298 	    }
1299 	  if (certflag == 1)
1300 	    {
1301 	      /* set q = (u, -1, 0, ..., 0), which is store in mp_NZ */
1302 	      mpz_set(mp_DZ, mp_Du);
1303 	      for (i = 0; i < r; i++) { mpz_set(mp_NZ[i], mp_Nu[i]); }
1304 	      mpz_set(mp_NZ[r], mp_Du);
1305 	      mpz_neg(mp_NZ[r], mp_NZ[r]);
1306 	      for (i = r+1; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1307 
1308 	      /* compute qP */
1309 	      for (i = r+1; i > 0; i--)
1310 		if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
1311 	    }
1312 
1313 	  mpz_clear(mp_Du);
1314 	  { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
1315 	  for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); } { XFREE(mp_Nu); }
1316 	  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
1317 
1318 	  /* system has no solution, the third case */
1319 	  return 3;
1320 	}
1321     }
1322 }
1323 
1324 
1325 
1326 /*
1327  *
1328  * Calling Sequence:
1329  *   1/2/3 <-- certSolveRedMP(certflag, nullcol, n, m, mp_A, mp_b, mp_N, mp_D,
1330  *                            mp_NZ, mp_DZ)
1331  *
1332  * Summary:
1333  *   Certified solve a system of linear equations and reduce the solution
1334  *   size, where the left hand side input matrix is represented by signed
1335  *   mpz_t integers
1336  *
1337  * Description:
1338  *   Let the system of linear equations be Av = b, where A is a n x m matrix,
1339  *   and b is a n x 1 vector. There are three possibilities:
1340  *
1341  *   1. The system has more than one rational solution
1342  *   2. The system has a unique rational solution
1343  *   3. The system has no solution
1344  *
1345  *   In the first case, there exist a solution vector v with minimal
1346  *   denominator and a rational certificate vector z to certify that the
1347  *   denominator of solution v is really the minimal denominator.
1348  *
1349  *   The 1 x n certificate vector z satisfies that z.A is an integer vector
1350  *   and z.b has the same denominator as the solution vector v.
1351  *   In this case, the function will output the solution with minimal
1352  *   denominator and optional certificate vector z (if certflag = 1).
1353  *
1354  *   Note: if choose not to compute the certificate vector z, the solution
1355  *     will not garantee, but with high probability, to be the minimal
1356  *     denominator solution, and the function will run faster.
1357  *
1358  *   Lattice reduction will be used to reduce the solution size. Parameter
1359  *   nullcol designates the dimension of kernal basis we use to reduce the
1360  *   solution size as well as the dimension of nullspace we use to compute
1361  *   the minimal denominator. The heuristic results show that the solution
1362  *   size will be reduced by factor 1/nullcol.
1363  *
1364  *   To find the minimum denominator as fast as possible, nullcol cannot be
1365  *   too small. We use NULLSPACE_COLUMN as the minimal value of nullcol. That
1366  *   is, if the input nullcol is less than NULLSPACE_COLUMN, NULLSPACE_COLUMN
1367  *   will be used instead. However, if the input nullcol becomes larger, the
1368  *   function will be slower. Meanwhile, it does not make sense to make
1369  *   nullcol greater than the dimension of nullspace of the input system.
1370  *
1371  *   As a result, the parameter nullcol will not take effect unless
1372  *   NULLSPACE_COLUMN < nullcol < dimnullspace is satisfied, where
1373  *   dimnullspace is the dimension of nullspace of the input system.  If the
1374  *   above condition is not satisfied, the boundary value NULLSPACE_COLUMN or
1375  *   dimnullspace will be used.
1376  *
1377  *   In the second case, the function will only compute the unique solution
1378  *   and the contents in the space for certificate vector make no sense.
1379  *
1380  *   In the third case, there exists a certificate vector q to certify that
1381  *   the system has no solution. The 1 x n vector q satisfies q.A = 0 but
1382  *   q.b <> 0. In this case, the function will output this certificate vector
1383  *   q and store it into the same space for certificate z. The value of
1384  *   certflag also controls whether to output q or not.
1385  *
1386  *   Note: if the function returns 3, then the system determinately does not
1387  *     exist solution, no matter whether to output certificate q or not.
1388  *   In the first case, there exist a solution vector v with minimal
1389  *   denominator and a rational certificate vector z to certify that the
1390  *   denominator of solution v is the minimal denominator.
1391  *
1392  * Input:
1393  *   certflag: 1/0, flag to indicate whether or not to compute the certificate
1394  *             vector z or q.
1395  *           - If certflag = 1, compute the certificate.
1396  *           - If certflag = 0, not compute the certificate.
1397  *    nullcol: long, dimension of nullspace and kernel basis of conditioned
1398  *             system,
1399  *             if nullcol < NULLSPACE_COLUMN, use NULLSPACE_COLUMN instead
1400  *          n: long, row dimension of the system
1401  *          m: long, column dimension of the system
1402  *       mp_A: 1-dim mpz_t array length n*m, representation of n x m matrix A
1403  *       mp_b: 1-dim mpz_t array length n, representation of n x 1 vector b
1404  *
1405  * Return:
1406  *   1: the first case, system has more than one solution
1407  *   2: the second case, system has a unique solution
1408  *   3: the third case, system has no solution
1409  *
1410  * Output:
1411  *   mp_N: 1-dim mpz_t array length m,
1412  *       - numerator vector of the solution with minimal denominator
1413  *         in the first case
1414  *       - numerator vector of the unique solution in the second case
1415  *       - make no sense in the third case
1416  *   mp_D: mpz_t,
1417  *       - minimal denominator of the solutions in the first case
1418  *       - denominator of the unique solution in the second case
1419  *       - make no sense in the third case
1420  *
1421  * The following will only be computed when certflag = 1
1422  *   mp_NZ: 1-dim mpz_t array length n,
1423  *        - numerator vector of the certificate z in the first case
1424  *        - make no sense in the second case
1425  *        - numerator vector of the certificate q in the third case
1426  *   mp_DZ: mpz_t,
1427  *        - denominator of the certificate z if in the first case
1428  *        - make no sense in the second case
1429  *        - denominator of the certificate q in the third case
1430  *
1431  * Note:
1432  *   - The space of (mp_N, mp_D) is needed to be preallocated, and entries in
1433  *     mp_N and integer mp_D are needed to be initiated as any integer values.
1434  *   - If certflag is specified to be 1, then also needs to preallocate space
1435  *     for (mp_NZ, mp_DZ), and initiate integer mp_DZ and entries in mp_NZ to
1436  *     be any integer values.
1437  *     Otherwise, set mp_NZ = NULL, and mp_DZ = any integer
1438  *
1439  */
1440 
1441 long
certSolveRedMP(const long certflag,const long nullcol,const long n,const long m,mpz_t * mp_A,mpz_t * mp_b,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)1442 certSolveRedMP (const long certflag, const long nullcol, const long n, \
1443 		const long m, mpz_t *mp_A, mpz_t *mp_b, \
1444 		mpz_t *mp_N, mpz_t mp_D, mpz_t *mp_NZ, mpz_t mp_DZ)
1445 {
1446   long i, j, l, r, r1, t, idx, cmp, ver, basislen=1;
1447   FiniteField qh, p, d=1;
1448   long *P, *rp, *Pt, *rpt;
1449   FiniteField *basis, **basiscmb;
1450   Double *DA, *Db, *Dc, **CRNS, **C1RNS, **A11RNS, **A12RNS;
1451   mpz_t mp_maxInter, mp_bd, mp_alpha, mp_beta, mp_Du;
1452   mpz_t *mp_b1, *mp_A21, *mp_A22, *mp_Nu;
1453   double tt;
1454 
1455 #if HAVE_TIME_H
1456   clock_t ti;
1457 #endif
1458 
1459   mpz_init(mp_alpha);
1460   maxMagnMP(mp_A, n, m, m, mp_alpha);
1461   if (!mpz_sgn(mp_alpha))
1462     {
1463       /* in case A is a zero matrix, check vector b */
1464       mpz_init(mp_beta);
1465       maxMagnMP(mp_b, n, 1, 1, mp_beta);
1466       if (!mpz_sgn(mp_beta))
1467 	{
1468 	  /* if b is also a zero matrix, set N = 0, D = 1, NZ = 0, DZ = 1 */
1469 	  for (i = 0; i < m; i++) { mpz_set_ui(mp_N[i], 0); }
1470 	  mpz_set_ui(mp_D, 1);
1471 	  if (certflag == 1)
1472 	    {
1473 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1474 	      mpz_set_ui(mp_DZ, 1);
1475 	    }
1476 	  { mpz_clear(mp_alpha); mpz_clear(mp_beta); }
1477 	  return 1;
1478 	}
1479       else
1480 	{
1481 	  /* compute certificate q, q.A = 0, q.b <> 0 in this case */
1482 	  if (certflag == 1)
1483 	    {
1484 	      for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1485 	      mpz_set_ui(mp_DZ, 1);
1486 	      for (i = 0; i < n; i++)
1487 		if (mpz_sgn(mp_b[i]) != 0)
1488 		  { mpz_set_ui(mp_NZ[i], 1);  break; }
1489 	    }
1490 	  /* if b has non-zero entries, no solution */
1491 	  mpz_clear(mp_alpha); mpz_clear(mp_beta);
1492 	  return 3;
1493 	}
1494     }
1495   /* generate RNS basis */
1496   mpz_init_set_ui(mp_maxInter, 1);
1497   mpz_addmul_ui(mp_maxInter, mp_alpha, 2);
1498   qh = RNSbound(m);
1499   basiscmb = findRNS(qh, mp_maxInter, &basislen);
1500   basis = basiscmb[0];
1501   { mpz_clear(mp_maxInter); mpz_clear(mp_alpha); }
1502   while (1)
1503     {
1504       p = RandPrime(15, 19); /* choose p randomly, 2^15 < p < 2^19 */
1505 
1506 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1507       ti = clock();
1508       printf("prime: %ld\n", p);
1509 #endif
1510 
1511       /* call RowEchelonTransform to reduce DA = A mod p, and obtain rank r */
1512       DA = XMALLOC(Double, n*m);
1513       for (i = 0; i < n*m; i++) { DA[i] = (Double)mpz_fdiv_ui(mp_A[i], p); }
1514       P = XMALLOC(long, n+1);
1515       for (i = 0; i < n+1; i++) { P[i] = i; }
1516       rp = XCALLOC(long, n+1);
1517       RowEchelonTransform(p, DA, n, m, 1, 1, 0, 0, P, rp, &d);
1518       r = rp[0];
1519 
1520       /* if rank is 0, then p is a bad prime, restart */
1521       if (r == 0)
1522 	{
1523 
1524 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1525 	  printf("rank deficient in decomposition, repeat!\n");
1526 #endif
1527 
1528 	  { XFREE(DA); XFREE(P); XFREE(rp); }
1529 	  continue;
1530 	}
1531 
1532       /* compute Pt, rpt satisfying that row Pt[i], column rpt[j] of A will be
1533 	 changed to row i, column j respectively after the decomposition */
1534       Pt = revseq(r, n, P);
1535       rpt = revseq(r, m, rp);
1536 
1537       /* decomposite <A|b> mod p */
1538       Db = XMALLOC(Double, n);
1539       for (i = 0; i < n; i++)
1540 	Db[i] = (Double)mpz_fdiv_ui(mp_b[Pt[i]], p);
1541       Dc = XMALLOC(Double, n);
1542       for (i = r; i < n; i++) { Dc[i] = Db[i]; }
1543 
1544       /* compute first r rows of Dc by DA[1..r, 1..r].Db[1..r] */
1545       cblas_dgemv(CblasRowMajor, CblasNoTrans, r, r, 1.0, DA, m, Db, 1, 0.0, \
1546 		  Dc, 1);
1547 
1548       /* compute last n-r rows of Dc by DA[r+1..n, 1..r].Db[1..r]+Db[r+1..n] */
1549       if (r < n)
1550 	cblas_dgemv(CblasRowMajor, CblasNoTrans, n-r, r, 1.0, DA+r*m, m, \
1551 		    Db, 1, 1.0, Dc+r, 1);
1552       Dmod(p, Dc, n, 1, 1);
1553       idx = r-1;
1554       while ((++idx < n) && (Dc[idx] == 0)) ;
1555       { XFREE(DA); XFREE(Db); XFREE(Dc); }
1556 
1557 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1558       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
1559       printf("Decomposition Time: %f\n", tt);
1560 #endif
1561 
1562       /* rank of A mod p == rank of <A|b> mod p */
1563       if (idx == n)
1564 	{
1565 
1566 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1567 	  ti = clock();
1568 #endif
1569 
1570 	  /* compute mp_b1 = P.b */
1571 	  mp_b1 = XMALLOC(mpz_t, n);
1572 	  for (i = 0; i < n; i++) { mpz_init_set(mp_b1[i], mp_b[Pt[i]]); }
1573 
1574 	  /* CRNS[i] = [A_11, A_12] mod basis[i] */
1575 	  CRNS = XMALLOC(Double *, basislen);
1576 	  for (i = 0; i < basislen; i++)
1577 	    {
1578 	      CRNS[i] = XMALLOC(Double, r*m);
1579 	      for (j = 0; j < r; j++)
1580 		for (l = 0; l < m; l++)
1581 		  CRNS[i][j*m+l] = (Double)mpz_fdiv_ui(mp_A[Pt[j]*m+rpt[l]], \
1582 						       basis[i]);
1583 	    }
1584 	  /* full column rank, the solution is unique */
1585 	  if (r == m)
1586 	    nonsingSolvRNSMM(RightSolu, r, 1, basislen, basis, CRNS, mp_b1, \
1587 			     mp_N, mp_D);
1588 	  else
1589 	    {
1590 	      /* not in full column rank, compute minimal denominator solution
1591 		 compute the bound after compression in specialminSolveRNS */
1592 	      mpz_init(mp_bd);
1593 	      compressBoundMP(r, m, Pt, mp_A, mp_bd);
1594 	      ver = specialminSolveRNS(certflag, 1, nullcol, r, m,\
1595 				       basislen, mp_bd, basis, CRNS, mp_b1, \
1596 				       mp_N, mp_D, mp_NZ, mp_DZ);
1597 	      mpz_clear(mp_bd);
1598 	      if (ver == 0)
1599 		{
1600 		  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); }
1601 		  for (i = 0; i < basislen; i++) { XFREE(CRNS[i]); }
1602 		  { XFREE(CRNS); XFREE(mp_b1); }
1603 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
1604 
1605 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1606 		  printf("certificate checking fails, repeat!\n");
1607 #endif
1608 
1609 		  continue;
1610 		}
1611 	    }
1612 	  for (i = 0; i < basislen; i++) { XFREE(CRNS[i]); } { XFREE(CRNS); }
1613 
1614 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1615 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
1616 	  printf("Minimial Denominator Solution Solving Time: %f\n", tt);
1617 	  ti = clock();
1618 #endif
1619 
1620 	  /* not in full row rank, need to check whether <A_31|A_32>x = b_3 */
1621 	  if (r < n)
1622 	    {
1623 	      C1RNS = XMALLOC(Double *, basislen);
1624 	      for (i = 0; i < basislen; i++)
1625 		{
1626 		  C1RNS[i] = XMALLOC(Double, (n-r)*m);
1627 		  for (j = 0; j < n-r; j++)
1628 		    for (l = 0; l < m; l++)
1629 		      C1RNS[i][j*m+l] = \
1630 			(Double)mpz_fdiv_ui(mp_A[Pt[r+j]*m+rpt[l]], basis[i]);
1631 		}
1632 
1633 	      cmp = LVecSMatMulCmp(RightMul, basislen, n-r, m, basis, C1RNS, \
1634 				   mp_D, mp_N, mp_b1+r);
1635 	      for (i = 0; i < basislen; i++) { XFREE(C1RNS[i]); }
1636 	      XFREE(C1RNS);
1637 
1638 	      /* if compare fails, restart */
1639 	      if (cmp == 0)
1640 		{
1641 		  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); }
1642 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); XFREE(mp_b1); }
1643 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1644 		  printf("checking lower n-r rows fails, repeat!\n");
1645 #endif
1646 
1647 		  continue;
1648 		}
1649 	    }
1650 	  { XFREE(Pt); XFREE(rpt); }
1651 	  for (i = 0; i < n; i++) { mpz_clear(mp_b1[i]); } { XFREE(mp_b1); }
1652 	  { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
1653 
1654 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1655 	  tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
1656 	  printf("Solution Checking Time: %f\n", tt);
1657 #endif
1658 
1659 	  /* recover the solution vector and the certificate vector */
1660 	  if (r < m)
1661 	    {
1662 	      /* system has more than one solution, the first case */
1663 	      /* compute Qy */
1664 	      for (i = r; i > 0; i--)
1665 		if (rp[i] != i) { mpz_swap(mp_N[i-1], mp_N[rp[i]-1]); }
1666 
1667 	      /* set last n-r entries in z be 0 and compute zP */
1668 	      if (certflag == 1)
1669 		{
1670 		  for (i = r; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1671 		  for (i = r; i > 0; i--)
1672 		    if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
1673 		}
1674 	      { XFREE(P); XFREE(rp); }
1675 	      return 1;
1676 	    }
1677 	  else
1678 	    {
1679 	      /* system has a unique solution, the second case */
1680 	      { XFREE(P); XFREE(rp); }
1681 	      return 2;
1682 	    }
1683 	}
1684       else
1685 	{
1686 	  if ((r == m) && (certflag == 0))
1687 	    {
1688 	      { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
1689 	      { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
1690 	      return 3;
1691 	    }
1692 	  P[r+1] = idx+1;      /* update P for permutation of row r+1 */
1693 
1694 	  /* compute u = A_21.A_11^(-1) */
1695 	  mpz_init(mp_Du);
1696 	  mp_Nu = XMALLOC(mpz_t, r);
1697 	  for (i = 0; i < r; i++) { mpz_init(mp_Nu[i]); }
1698 	  A11RNS = XMALLOC(Double *, basislen);
1699 	  for (i = 0; i < basislen; i++)
1700 	    {
1701 	      A11RNS[i] = XMALLOC(Double, r*r);
1702 	      for (j = 0; j < r; j++)
1703 		for (l = 0; l < r; l++)
1704 		  A11RNS[i][j*r+l] = \
1705 		    (Double)mpz_fdiv_ui(mp_A[Pt[j]*m+rpt[l]], basis[i]);
1706 	    }
1707 	  mp_A21 = XMALLOC(mpz_t, r);
1708 	  for (i = 0; i < r; i++)
1709 	    mpz_init_set(mp_A21[i], mp_A[Pt[idx]*m+rpt[i]]);
1710 	  nonsingSolvRNSMM(LeftSolu, r, 1, basislen, basis, A11RNS, mp_A21, \
1711 			   mp_Nu, mp_Du);
1712 	  for (i = 0; i < basislen; i++)  { XFREE(A11RNS[i]); }
1713 	  for (i = 0; i < r; i++) { mpz_clear(mp_A21[i]); }
1714 	  { XFREE(mp_A21); XFREE(A11RNS); }
1715 	  if (r < m)
1716 	    {
1717 	      /* compare u.A_12 with A_22 */
1718 	      A12RNS = XMALLOC(Double *, basislen);
1719 	      for (i = 0; i < basislen; i++)
1720 		{
1721 		  A12RNS[i] = XMALLOC(Double, r*(m-r));
1722 		  for (j = 0; j < r; j++)
1723 		    for (l = 0; l < m-r; l++)
1724 		      A12RNS[i][j*(m-r)+l] = \
1725 			(Double)mpz_fdiv_ui(mp_A[Pt[j]*m+rpt[r+l]], basis[i]);
1726 		}
1727 	      mp_A22 = XMALLOC(mpz_t, m-r);
1728 	      for (i = 0; i < m-r; i++)
1729 		mpz_init_set(mp_A22[i], mp_A[Pt[idx]*m+rpt[r+i]]);
1730 	      cmp = LVecSMatMulCmp(LeftMul, basislen, r, m-r, basis, A12RNS, \
1731 				   mp_Du, mp_Nu, mp_A22);
1732 	      for (i = 0; i < basislen; i++) { XFREE(A12RNS[i]); }
1733 	      for (i = 0; i < m-r; i++) { mpz_clear(mp_A22[i]); }
1734 	      { XFREE(A12RNS); XFREE(mp_A22); }
1735 
1736 	      /* comparison fails, restart */
1737 	      if (cmp == 0)
1738 		{
1739 		  mpz_clear(mp_Du);
1740 		  for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); }
1741 		  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); XFREE(mp_Nu); }
1742 
1743 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
1744 		  printf("checking no solution case fails, repeat!\n");
1745 #endif
1746 
1747 		  continue;
1748 		}
1749 	    }
1750 	  if (certflag == 1)
1751 	    {
1752 	      /* set q = (u, -1, 0, ..., 0), which is store in mp_NZ */
1753 	      mpz_set(mp_DZ, mp_Du);
1754 	      for (i = 0; i < r; i++) { mpz_set(mp_NZ[i], mp_Nu[i]); }
1755 	      mpz_set(mp_NZ[r], mp_Du);
1756 	      mpz_neg(mp_NZ[r], mp_NZ[r]);
1757 	      for (i = r+1; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
1758 
1759 	      /* compute qP */
1760 	      for (i = r+1; i > 0; i--)
1761 		if (P[i] != i) { mpz_swap(mp_NZ[i-1], mp_NZ[P[i]-1]); }
1762 	    }
1763 
1764 	  mpz_clear(mp_Du);
1765 	  { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
1766 	  for (i = 0; i < r; i++) { mpz_clear(mp_Nu[i]); } { XFREE(mp_Nu); }
1767 	  { XFREE(P); XFREE(rp); XFREE(Pt); XFREE(rpt); }
1768 
1769 	  /* system has no solution, the third case */
1770 	  return 3;
1771 	}
1772     }
1773 }
1774 
1775 
1776 
1777 
1778 
1779 /*
1780  *
1781  * Calling Sequence:
1782  *   At <-- revseq(r, m, A)
1783  *
1784  * Summary:
1785  *   Perform operations on the permutation vector
1786  *
1787  * Description:
1788  *   Let A be a vector length m+1 to record the permutations over a matrix M.
1789  *   A[i] represents the switch of row/column i-1 of M with row/column A[i]-1
1790  *   of M, i = 1..r. The permutation order is A[r].A[r-1]. ... .A[1].M.
1791  *
1792  *   The function at first generates a vector At length m, At[i] = i,
1793  *   i = 0..m-1. Then apply A[r]. ... .A[1] in order on At. The outputed At
1794  *   means that row/column At[i] of matrix M will be changed to row/column i
1795  *   of M after applying all the permutations to M in order.
1796  *
1797  * Input:
1798  *   r: long, permutation happens in the first r row/column of M
1799  *   m: long, length of A
1800  *   A: 1-dim long array length m+1, record of permutations on matrix M
1801  *
1802  * Return value:
1803  *   At: 1-dim long array length m, explained above
1804  *
1805  */
1806 
1807 long *
revseq(const long r,const long m,const long * A)1808 revseq(const long r, const long m, const long *A)
1809 {
1810   long i, t, *At;
1811 
1812   At = XMALLOC(long, m);
1813   for (i = 0; i < m; i++) { At[i] = i; }
1814   for (i = 1; i < r+1; i++)
1815     if (A[i] != i)
1816       { t = At[i-1];  At[i-1] = At[A[i]-1];  At[A[i]-1] = t; }
1817 
1818   return At;
1819 }
1820 
1821 
1822 
1823 
1824 /*
1825  * Calling Sequence:
1826  *   compressBoundLong(n, m, Pt, A, mp_bd)
1827  *
1828  * Summary:
1829  *   Compute the maximum magnitude of a compressed signed long submatrix
1830  *
1831  * Description:
1832  *   Let A be a n1 x m matrix, B be a n x m submatrix of A. Pt[i] represents
1833  *   that row i of B comes from row Pt[i] of A. The function computes the
1834  *   maximum magnitude of entries in the compressed matrix B.P, where P is an
1835  *   m x n+k 0-1 matrix.
1836  *
1837  * Input:
1838  *      n: long, row dimension of B
1839  *      m: long, column dimension of B
1840  *     Pt: 1-dim long array length n1+1, storing row relations between B and A
1841  *   mp_A: 1-dim long array length n1*m, n1 x m matrix A
1842  *
1843  * Output:
1844  *   mp_bd: mpz_t, maximum magnitude of entries in B.P
1845  *
1846  */
1847 
1848 void
compressBoundLong(const long n,const long m,const long * Pt,const long * A,mpz_t mp_bd)1849 compressBoundLong (const long n, const long m, const long *Pt, const long *A,\
1850 		   mpz_t mp_bd)
1851 {
1852   long i, j, temp;
1853   mpz_t mp_temp;
1854 
1855   mpz_init(mp_temp);
1856   mpz_set_ui(mp_bd, 0);
1857   for (i = 0; i < n; i++)
1858     {
1859       mpz_set_ui(mp_temp, 0);
1860       for (j = 0; j < m; j++)
1861 	{
1862 	  temp = labs(A[Pt[i]*m+j]);
1863 	  mpz_add_ui(mp_temp, mp_temp, temp);
1864 	}
1865       if (mpz_cmp(mp_bd, mp_temp) < 0) { mpz_set(mp_bd, mp_temp); }
1866     }
1867 
1868   mpz_clear(mp_temp);
1869 }
1870 
1871 
1872 
1873 /*
1874  * Calling Sequence:
1875  *   compressBoundMP(n, m, Pt, mp_A, mp_bd)
1876  *
1877  * Summary:
1878  *   Compute the maximum magnitude of a compressed mpz_t submatrix
1879  *
1880  * Description:
1881  *   Let A be a n1 x m matrix, B be a n x m submatrix of A. Pt[i] represents
1882  *   that row i of B comes from row Pt[i] of A. The function computes the
1883  *   maximum magnitude of entries in the compressed matrix B.P, where P is an
1884  *   m x n+k 0-1 matrix.
1885  *
1886  * Input:
1887  *      n: long, row dimension of B
1888  *      m: long, column dimension of B
1889  *     Pt: 1-dim long array length n1+1, storing row relations between B and A
1890  *   mp_A: 1-dim mpz_t array length n1*m, n1 x m matrix A
1891  *
1892  * Output:
1893  *   mp_bd: mpz_t, maximum magnitude of entries in B.P
1894  *
1895  */
1896 
1897 void
compressBoundMP(const long n,const long m,const long * Pt,mpz_t * mp_A,mpz_t mp_bd)1898 compressBoundMP (const long n, const long m, const long *Pt, mpz_t *mp_A, \
1899 		 mpz_t mp_bd)
1900 {
1901   long i, j;
1902   mpz_t mp_temp, mp_temp1;
1903 
1904   { mpz_init(mp_temp); mpz_init(mp_temp1); }
1905   mpz_set_ui(mp_bd, 0);
1906   for (i = 0; i < n; i++)
1907     {
1908       mpz_set_ui(mp_temp, 0);
1909       for (j = 0; j < m; j++)
1910 	{
1911 	  mpz_abs(mp_temp1, mp_A[Pt[i]*m+j]);
1912 	  mpz_add(mp_temp, mp_temp, mp_temp1);
1913 	}
1914       if (mpz_cmp(mp_bd, mp_temp) < 0) { mpz_set(mp_bd, mp_temp); }
1915     }
1916 
1917   { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
1918 }
1919 
1920 
1921 
1922 
1923 
1924 
1925 /*
1926  * Calling Sequence:
1927  *   1/0 <-- LVecSMatMulCmp(mulpos, basislen, n, m, basis, ARNS, mp_s,
1928  *                          mp_V, mp_b)
1929  *
1930  * Summary:
1931  *   Verify the equality of a matrix-vector product and a scalar-vector
1932  *   product
1933  *
1934  * Description:
1935  *   The function verifies whether a matrix-vector product A.V or V.A is equal
1936  *   to a scalar-vector product s.b, where A is a n x m matrix, b and V is a
1937  *   vector, and s is a scalar. The flag mulpos is used to specify whether the
1938  *   matrix-vector product is A.V or V.A. The comparison result is output
1939  *   sensitive, i.e., the comparsion will stop as long as one failure occurs.
1940  *   The input matrix A is represented in a RNS.
1941  *
1942  * Input:
1943  *     mulpos: LeftMul/RightMul, flag to indicate whether A.V or V.A is
1944  *             performed.
1945  *             If mulpos = LeftMul ==> V.A. If mulpos = RightMul ==> A.V
1946  *   basislen: long, dimension of the RNS basis
1947  *          n: long, row dimension of A
1948  *          m: long, column dimension of A
1949  *      basis: 1-dim FiniteField array length basislen, RNS basis
1950  *       ARNS: 2-dim Double array, dimension basislen x n*m, representation of
1951  *             A in the RNS. ARNS[i] = A mod basis[i], i = 0..basislen-1
1952  *       mp_s: mpz_t, scalar s
1953  *       mp_V: 1-dim mpz_t array length n or m depending on mulpos, vector V
1954  *       mp_b: 1-dim mpz_t array length n or m depending on mulpos, vector b
1955  *
1956  * Return:
1957  *   1: comparison succeeds
1958  *   0: comparison fails
1959  *
1960  */
1961 
1962 long
LVecSMatMulCmp(const enum MULT_POS mulpos,const long basislen,const long n,const long m,const FiniteField * basis,Double ** ARNS,mpz_t mp_s,mpz_t * mp_V,mpz_t * mp_b)1963 LVecSMatMulCmp (const enum MULT_POS mulpos, const long basislen, \
1964 		const long n, const long m, const FiniteField *basis, \
1965 		Double **ARNS, mpz_t mp_s, mpz_t *mp_V, mpz_t *mp_b)
1966 {
1967   long i, j, l, sharednum, unsharednum;
1968   FiniteField *bdcoeff, *cmbasis;
1969   double temp;
1970   mpz_t mp_AV, mp_temp, mp_AV1;
1971   Double **U;
1972 
1973   if (mulpos == LeftMul) { sharednum = n;  unsharednum = m; }
1974   else if (mulpos == RightMul) { sharednum = m;  unsharednum = n; }
1975 
1976   /* allocate matrix U to store mix radix coefficients of one row/column
1977      of matrix A */
1978   U = XMALLOC(Double *, basislen);
1979   for (i = 0; i < basislen; i++)
1980     U[i] = XMALLOC(Double, sharednum);
1981   cmbasis = combBasis(basislen, basis);
1982   bdcoeff = repBound(basislen, basis, cmbasis);
1983   mpz_init(mp_AV);
1984   mpz_init(mp_AV1);
1985   mpz_init(mp_temp);
1986   for (i = 0; i < unsharednum; i++)
1987     {
1988       /* apply Garner's algorithm to compute U in positive representation */
1989       if (mulpos == LeftMul)
1990 	cblas_dcopy(sharednum, ARNS[0]+i, unsharednum, U[0], 1);
1991       else if (mulpos == RightMul)
1992 	cblas_dcopy(sharednum, ARNS[0]+i*sharednum, 1, U[0], 1);
1993       for (j = 1; j < basislen; j++)
1994 	{
1995 	  cblas_dcopy(sharednum, U[j-1], 1, U[j], 1);
1996 	  for (l = j-2; l >= 0; l--)
1997 	    {
1998 	      cblas_dscal(sharednum, (double)(basis[l] % basis[j]), U[j], 1);
1999 	      cblas_daxpy(sharednum, 1.0, U[l], 1, U[j], 1);
2000 	      Dmod((double)basis[j], U[j], 1, sharednum, sharednum);
2001 	    }
2002 	  temp = (double)cmbasis[j]*(double)(basis[j]-1);
2003 	  temp = fmod(temp, (double)basis[j]);
2004 	  if (mulpos == LeftMul) {
2005 	    cblas_dscal(sharednum, temp, U[j], 1);
2006 	    cblas_daxpy(sharednum, (double)cmbasis[j], ARNS[j]+i, unsharednum, U[j], 1);
2007           } else if (mulpos == RightMul) {
2008 	    cblas_dscal(sharednum, temp, U[j], 1);
2009 	    cblas_daxpy(sharednum, (double)cmbasis[j], ARNS[j]+i*sharednum,  1, U[j], 1);
2010           }
2011 	  Dmod((double)basis[j], U[j], 1, sharednum, sharednum);
2012 	}
2013       /* change U to symmetric representation */
2014       for (j = 0; j < sharednum; j++)
2015 	for (l = basislen-1; l >= 0; l--)
2016 	  {
2017 	    if (U[l][j] > bdcoeff[l])
2018 	      {
2019 		U[basislen-1][j] = U[basislen-1][j]-basis[basislen-1];
2020 		break;
2021 	      }
2022 	    else if (U[l][j] < bdcoeff[l]) { break; }
2023 	  }
2024       /* do vector-vector product and sum up the product */
2025       mpz_set_ui(mp_AV, 0);
2026       for (j = 0; j < sharednum; j++)
2027 	{
2028 	  mpz_mul_si(mp_temp, mp_V[j], U[basislen-1][j]);
2029 	  mpz_add(mp_AV, mp_AV, mp_temp);
2030 	}
2031       for (j = basislen-2; j >= 0; j--)
2032 	{
2033 
2034 	  mpz_mul_ui(mp_AV, mp_AV, basis[j]);
2035 	  mpz_set_ui(mp_AV1, 0);
2036 	  for (l = 0; l < sharednum; l++)
2037 	    {
2038 	      mpz_mul_ui(mp_temp, mp_V[l], U[j][l]);
2039 	      mpz_add(mp_AV1, mp_AV1, mp_temp);
2040 	    }
2041 	  mpz_add(mp_AV, mp_AV, mp_AV1);
2042 	}
2043       /* compare with mp_s*mp_b */
2044       mpz_mul(mp_temp, mp_s, mp_b[i]);
2045       if (mpz_cmp(mp_temp, mp_AV) != 0)
2046 	{
2047 	  { mpz_clear(mp_AV); mpz_clear(mp_AV1); mpz_clear(mp_temp); }
2048 	  for (j = 0; j < basislen; j++) { XFREE(U[j]); } { XFREE(U); }
2049 	  { XFREE(bdcoeff); XFREE(cmbasis); }
2050 	  return 0;
2051 	}
2052     }
2053 
2054   { mpz_clear(mp_AV); mpz_clear(mp_AV1); mpz_clear(mp_temp); }
2055   for (j = 0; j < basislen; j++) { XFREE(U[j]); } { XFREE(U); }
2056   { XFREE(bdcoeff); XFREE(cmbasis); }
2057   return 1;
2058 }
2059 
2060 
2061 
2062 /*
2063  *
2064  * Calling Sequence:
2065  *   1/0 <-- specialminSolveLong(certflag, redflag, nullcol, n, m, mp_bdC,
2066  *                               C, mp_b, mp_N, mp_D, mp_NZ, mp_DZ)
2067  *
2068  * Summary:
2069  *   Compute the minimal denominator solution of a full row rank system,
2070  *   represented by signed long integers, and corresponding certificate
2071  *   vector(optional). The solution size could be reduced.
2072  *
2073  * Description:
2074  *   Let the full row rank system be Cx = b, where b is a vector, and C is
2075  *
2076  *               m
2077  *       [     |         ]
2078  *   C = [  A  |    B    ] n,   m > n
2079  *       [     |         ]
2080  *
2081  *   The certificate vector z satisfies that z.C is an integer vector and
2082  *   z.b has the same denominator as the solution vector.
2083  *
2084  *   The function takes the signed long array C, as input. Based on
2085  *   function minSolnoncompressRNS, the function can deal with the case
2086  *   when m >> n. In that case, the function compresses the system, and then
2087  *   computes the solution and the certificate of the compressed system.
2088  *   Then the function verifies the certificate of the compressed system over
2089  *   the uncompressed system as well as converts the solution of the
2090  *   compressed system to that of uncompressed system. If the verification of
2091  *   certificate succeeds, the function returns 1. Otherwise, reture 0.
2092  *
2093  *   On the other hand, if the nullspace of post-conditioned is designated
2094  *   larger than m-n, then the system is uncompressed and m-n is used.
2095  *
2096  *   If redflag is specified as 1, then reduce the solution by the kernel
2097  *   basis with dimension min(max(nullcol, NULLSPACE_COLUMN), dimnullspace),
2098  *   where dimnullspace is the dimension of nullspace of C.
2099  *
2100  * Input:
2101  *   certflag: 1/0, flag to indicate whether to compute the certificate vector
2102  *             or not
2103  *           - certflag = 1, compute the certificate vector
2104  *           - certflag = 0, not compute the certificate
2105  *    redflag: 1/0, flag to indicate whether to reduce the solution size or not
2106  *           - redflag = 1, reduce the solution size
2107  *           - redflag = 0, not reduce the solution size
2108  *    nullcol: long, dimension of nullspace and kernel basis of conditioned
2109  *             system,
2110  *             if nullcol < NULLSPACE_COLUMN, use NULLSPACE_COLUMN instead
2111  *          n: long, row dimension of the input system
2112  *          m: long, column dimension of input matrix C
2113  *     mp_bdC: mpz_t, the maximum sum of absolute values of entries of
2114  *             each row in matrix C
2115  *      basis: 1-dim FiniteField array length basislen, RNS basis
2116  *          C: 1-dim long array length n*m, input matrix C
2117  *       mp_b: 1-dim mpz_t array length n, right hand side vector b
2118  *
2119  * Return:
2120  *   1: verification of the certificate vector succeeds
2121  *   0: verification of the certificate vector fails, needs to restart
2122  *
2123  * Output (only correct when returned value is 1):
2124  *    mp_N: 1-dim mpz_t array length m, numerator vector of the solution
2125  *          with minimal denominator
2126  *    mp_D: mpz_t, minimal denominator of the solution
2127  *   mp_NZ: 1-dim mpz_t array, the first n entries store the numerator
2128  *          vector of the certificate z
2129  *   mp_DZ: mpz_t, the denominator of the certificate z
2130  *
2131  * Note:
2132  *   - The space of the solution (mp_N, mp_D) is needed to be preallocated and
2133  *     the integer mp_D and the entries in mp_N are needed to be initiated as
2134  *     any integer values.
2135  *   - If certflag is specified to be 1, then also needs to preallocate space
2136  *     for (mp_NZ, mp_DZ). Otherwise, set mp_NZ = NULL, and mp_DZ = any int
2137  *
2138  */
2139 
2140 long
specialminSolveLong(const long certflag,const long redflag,const long nullcol,const long n,const long m,const mpz_t mp_bdC,const long * C,mpz_t * mp_b,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)2141 specialminSolveLong (const long certflag, const long redflag, \
2142 		     const long nullcol, const long n, \
2143 		     const long m, const mpz_t mp_bdC, const long *C, \
2144 		     mpz_t *mp_b,  mpz_t *mp_N, mpz_t mp_D, \
2145 		     mpz_t *mp_NZ, mpz_t mp_DZ)
2146 {
2147   long i, j, l, q, temp, k, len=1;
2148   FiniteField p, qh, d;
2149   mpz_t mp_maxInter, mp_basisprod, mp_temp, mp_temp1;
2150   long *A, *P, *rp;
2151   double *cumprod;
2152   FiniteField *bdcoeff, *basis, **basiscmb;
2153   Double temp1, *DA, *DB, *DC, *P1, *P2, *dtemp, *CRNS, *AP, **ARNS, **BRNS;
2154   mpz_t *mp_Bb, *mp_Nt;
2155   double tt;
2156 
2157 #if HAVE_TIME_H
2158   clock_t ti;
2159 #endif
2160 
2161   /* only when we need to reduce the solution, we may reset number
2162      of columns */
2163   k = NULLSPACE_COLUMN;
2164   if ((redflag == 1) && (nullcol > NULLSPACE_COLUMN)) { k = nullcol; }
2165 
2166   p = RandPrime(15, 19);
2167 
2168   /* in case m-n is small, do not need to compress the system */
2169   if (m-n <= k)
2170     {
2171       mp_Bb = XMALLOC(mpz_t, (m-n+1)*n);
2172       A = XMALLOC(long, n*n);
2173       for (i = 0; i < n; i++)
2174 	{
2175 	  for (j = 0; j < n; j++)
2176 	    A[i*n+j] = C[i*m+j];
2177 	  for (j = 0; j < m-n; j++)
2178 	    mpz_init_set_si(mp_Bb[i*(m-n+1)+j], C[i*m+(j+n)]);
2179 	  mpz_init_set(mp_Bb[i*(m-n+1)+m-n], mp_b[i]);
2180 	}
2181 
2182       minSolnoncompressLong(certflag, redflag, n, m-n, mp_Bb, A, mp_N, mp_D, \
2183 			    mp_NZ, mp_DZ);
2184       for (i = 0; i < (m-n+1)*n; i++){ mpz_clear(mp_Bb[i]); } { XFREE(mp_Bb); }
2185       XFREE(A);
2186       return 1;
2187     }
2188   mp_Nt = XMALLOC(mpz_t, n+k);
2189   for (i = 0; i < n+k; i++) { mpz_init(mp_Nt[i]); }
2190   AP = XMALLOC(Double, n*n);
2191   P = XMALLOC(long, n+1);
2192   rp = XMALLOC(long, n+1);
2193   mp_Bb = XMALLOC(mpz_t, n*(k+1));
2194   for (i = 0; i < n*(k+1); i++) { mpz_init(mp_Bb[i]); }
2195 
2196   /* if fit into long and mantissa of double, then not compute in RNS */
2197   if (mpz_cmp_ui(mp_bdC, LongRNSbound()) <= 0)
2198     {
2199       DA = XMALLOC(Double, n*n);
2200       DB = XMALLOC(Double, n*k);
2201       DC = XMALLOC(Double, n*m);
2202       for (i = 0; i < m*n; i++) { DC[i] = (Double)C[i]; }
2203 
2204 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2205       printf("    Compressed Matrix Represented in Long\n");
2206       fflush(stdout);
2207       ti = clock();
2208 #endif
2209 
2210       while (1)
2211 	{
2212 	  P1 = randomDb(m, n, 1);
2213 	  P2 = randomDb(m, k, 1);
2214 	  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, m, \
2215 		      1.0, DC, m, P1, n, 0.0, DA, n);
2216 	  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, k, m, \
2217 		      1.0, DC, m, P2, k, 0.0, DB, k);
2218 
2219 	  /* check whether the rank does not decrease after compression */
2220 	  for (i = 0; i < n+1; i++) { P[i] = i;  rp[i] = 0; }
2221 	  for (i = 0; i < n*n; i++)
2222 	    AP[i] = (temp1 =fmod(DA[i], p)) >= 0 ? temp1 : p+temp1;
2223 	  d=1;
2224 	  RowEchelonTransform(p, AP, n, n, 0, 0, 0, 0, P, rp, &d);
2225 	  if (rp[0] == n) { break; }
2226 	  else { XFREE(P1); XFREE(P2); }
2227 	}
2228       A = XMALLOC(long, n*n);
2229       for (i = 0; i < n*n; i++) { A[i] = (long)DA[i]; }
2230       for (i = 0; i < n; i++)
2231 	{
2232 	  for (j = 0; j < k; j++)
2233 	    mpz_set_d(mp_Bb[i*(k+1)+j], DB[i*k+j]);
2234 	  mpz_set(mp_Bb[i*(k+1)+k], mp_b[i]);
2235 	}
2236       { XFREE(AP); XFREE(P); XFREE(rp); XFREE(DA); XFREE(DB); XFREE(DC); }
2237 
2238 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2239       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2240       printf("    Compression Time: %f\n", tt);
2241       fflush(stdout);
2242       ti = clock();
2243 #endif
2244 
2245       minSolnoncompressLong(certflag, redflag, n, k, mp_Bb, A, mp_Nt, mp_D, \
2246 			    mp_NZ, mp_DZ);
2247 
2248 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2249       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2250       printf("    Solution of Compressed System Finding Time: %f\n", tt);
2251       fflush(stdout);
2252 #endif
2253       XFREE(A);
2254       for (i = 0; i < n*(k+1); i++) { mpz_clear(mp_Bb[i]); } { XFREE(mp_Bb); }
2255     }
2256   else
2257     {
2258       /* compute maximum mangnitude bound of elements of extended RNS basis */
2259       qh = RNSbound(m);
2260 
2261       /* compute maximum intermidiate result during the compression */
2262       mpz_init_set_ui(mp_maxInter, 1);
2263       mpz_addmul_ui(mp_maxInter, mp_bdC, 2);
2264 
2265       /* compute RNS basis */
2266       basiscmb = findRNS(qh, mp_maxInter, &len);
2267       basis = basiscmb[0];
2268       mpz_clear(mp_maxInter);
2269       ARNS = XMALLOC(Double *, len);
2270       BRNS = XMALLOC(Double *, len);
2271       for (i = 0; i < len; i++)
2272 	{
2273 	  ARNS[i] = XMALLOC(Double, n*n);
2274 	  BRNS[i] = XMALLOC(Double, n*k);
2275 	}
2276       CRNS = XMALLOC(Double, n*m);
2277       cumprod = cumProd(len, basis, 1, &p);
2278       bdcoeff = repBound(len, basiscmb[0], basiscmb[1]);
2279 
2280 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2281       printf("    Compressed Matrix Represented in RNS\n");
2282       fflush(stdout);
2283       ti = clock();
2284 #endif
2285 
2286       /* compress the large matrix to a small one
2287 	 generate random 0, 1 matrices P1, P2 for compression
2288 	 note: matrix might be <I_(n+k), *> */
2289       while (1)
2290 	{
2291 	  P1 = randomDb(m, n, 1);
2292 	  P2 = randomDb(m, k, 1);
2293 	  for (i = 0; i < len; i++)
2294 	    {
2295 	      /* extend CRNS to the extended RNS basis */
2296 	      q = (long)basis[i];
2297 	      for (j = 0; j < n*m; j++)
2298 		CRNS[j] = (Double)((temp = (C[j] % q)) >= 0 ? temp : q+temp);
2299 	      cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, m, \
2300 			  1.0, CRNS, m, P1, n, 0.0, ARNS[i], n);
2301 	      Dmod(basis[i], ARNS[i], n, n, n);
2302 	      cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, k, m, \
2303 			  1.0, CRNS, m, P2, k, 0.0, BRNS[i], k);
2304 	      Dmod(basis[i], BRNS[i], n, k, k);
2305 	    }
2306 	  /* check whether the rank does not decrease after compression */
2307 	  for (i = 0; i < n+1; i++) { P[i] = i;  rp[i] = 0; }
2308 	  basisExt(len, n*n, p, basiscmb[0], basiscmb[1], *cumprod, \
2309 		   bdcoeff, ARNS, AP);
2310 	  RowEchelonTransform(p, AP, n, n, 0, 0, 0, 0, P, rp, &d);
2311 	  if (rp[0] == n) { break; }
2312 	  else { XFREE(P1); XFREE(P2); }
2313 	}
2314       { XFREE(cumprod); XFREE(CRNS); XFREE(AP); XFREE(P); XFREE(rp); }
2315 
2316       /* use CRT to reconstruct B */
2317       mpz_init(mp_basisprod);
2318       basisProd(len, basis, mp_basisprod);
2319       dtemp = XMALLOC(Double, len);
2320       for (i = 0; i < n; i++)
2321 	{
2322 	  for (j = 0; j < k; j++)
2323 	    {
2324 	      for (l = 0; l < len; l++) { dtemp[l] = BRNS[l][i*k+j]; }
2325 	      ChineseRemainder(len, mp_basisprod, basiscmb[0], basiscmb[1], \
2326 			       bdcoeff, dtemp, mp_Bb[i*(k+1)+j]);
2327 	    }
2328 	  mpz_set(mp_Bb[i*(k+1)+k], mp_b[i]);
2329 	}
2330       mpz_clear(mp_basisprod);
2331       { XFREE(dtemp); XFREE(bdcoeff); }
2332       for (i = 0; i < len; i++) { XFREE(BRNS[i]); } { XFREE(BRNS); }
2333 
2334 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2335       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2336       printf("    Compression Time: %f\n", tt);
2337       fflush(stdout);
2338       ti = clock();
2339 #endif
2340 
2341       /* compute minimal denominator solution of system APv=b */
2342       minSolnoncompressRNS(certflag, redflag, n, k, len, basis, mp_Bb, ARNS,\
2343 			   mp_Nt, mp_D, mp_NZ, mp_DZ);
2344 
2345 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2346       tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2347       printf("    Solution of Compressed System Finding Time: %f\n", tt);
2348       fflush(stdout);
2349 #endif
2350 
2351       for (i = 0; i < n*(k+1); i++) { mpz_clear(mp_Bb[i]); } { XFREE(mp_Bb); }
2352       { XFREE(basiscmb[0]); XFREE(basiscmb[1]); XFREE(basiscmb); }
2353       for (i = 0; i < len; i++) { XFREE(ARNS[i]); } { XFREE(ARNS); }
2354     }
2355 
2356 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2357   ti = clock();
2358 #endif
2359 
2360   /* verify certificate vector of compressed system in uncompressed system */
2361   if ((certflag == 1) && (mpz_cmp_ui(mp_D, 1) != 0))
2362     {
2363       { mpz_init(mp_temp); mpz_init(mp_temp1); }
2364       for (i = 0; i < m; i++)
2365 	{
2366 	  mpz_mul_si(mp_temp, mp_NZ[0], C[i]);
2367 	  for (j = 1; j < n; j++)
2368 	    {
2369 	      mpz_set_si(mp_temp1, C[j*m+i]);
2370 	      mpz_addmul(mp_temp, mp_NZ[j], mp_temp1);
2371 	    }
2372 	  if (!mpz_divisible_p(mp_temp, mp_DZ))
2373 	    {
2374 	      /* check failed */
2375 	      for (l = 0; l < n+k; l++) { mpz_clear(mp_Nt[l]); }
2376 	      { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
2377 	      { XFREE(P1); XFREE(P2); XFREE(mp_Nt); }
2378 	      return 0;
2379 	    }
2380 	}
2381       { mpz_clear(mp_temp); mpz_clear(mp_temp1); }
2382     }
2383 
2384 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2385   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2386   printf("    Certificate Verification Time: %f\n", tt);
2387   fflush(stdout);
2388   ti = clock();
2389 #endif
2390 
2391   /* compute mp_N by Pmp_Nt */
2392   for (i = 0; i < m; i++)
2393     {
2394       mpz_set_ui(mp_N[i], 0);
2395       for (j = 0; j < n; j++)
2396 	if (P1[i*n+j] == 1) { mpz_add(mp_N[i], mp_N[i], mp_Nt[j]); }
2397       for (j = 0; j < k; j++)
2398 	if (P2[i*k+j] == 1) { mpz_add(mp_N[i], mp_N[i], mp_Nt[n+j]); }
2399     }
2400 
2401 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2402   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2403   printf("    Solution of Uncompressed System Recovering Time: %f\n", tt);
2404   fflush(stdout);
2405 #endif
2406 
2407   { XFREE(P1); XFREE(P2); }
2408   for (i = 0; i < n+k; i++) { mpz_clear(mp_Nt[i]); } { XFREE(mp_Nt); }
2409   return 1;
2410 }
2411 
2412 
2413 /* return max( upper bound of signed long, 2^52-1 ) */
2414 long
LongRNSbound(void)2415 LongRNSbound(void)
2416 {
2417   long a;
2418   mpz_t mp_a, mp_b;
2419 
2420   mpz_init(mp_a);
2421   mpz_ui_pow_ui(mp_a, 2, sizeof(long)*8-1);
2422   a = mpz_get_ui(mp_a);
2423   a = a-1;                    /* a := maximal number in Signed Long */
2424   mpz_clear(mp_a);
2425   mpz_init(mp_b);
2426   mpz_ui_pow_ui(mp_b, 2, 52);
2427   mpz_sub_ui(mp_b, mp_b, 1);  /* mp_b := 2^52-1 */
2428   if (mpz_cmp_ui(mp_b, a) >= 0) { mpz_clear(mp_b); return a; }
2429   else
2430     {
2431       unsigned long tmp = mpz_get_ui(mp_b);
2432       mpz_clear(mp_b);
2433       return tmp;
2434     }
2435 }
2436 
2437 
2438 
2439 
2440 /*
2441  * Calling Sequence:
2442  *   1/0 <-- specialminSolveRNS(certflag, redflag, nullcol, n, m,
2443  *                              basislen, mp_bdC, basis, CRNS, mp_b,
2444  *                              mp_N, mp_D, mp_NZ, mp_DZ)
2445  *
2446  * Summary:
2447  *   Compute the minimal denominator solution of a full row rank system,
2448  *   represented by mpz_t integers, and corresponding certificate
2449  *   vector(optional). The solution size could be reduced.
2450  *
2451  * Description:
2452  *   Let the full row rank system be Cx = b, where b is a vector, and C is
2453  *
2454  *               m
2455  *       [     |         ]
2456  *   C = [  A  |    B    ] n,   m > n
2457  *       [     |         ]
2458  *
2459  *   Then certificate vector z satisfies that z.C is an integer vector and
2460  *   z.b has the same denominator as the solution vector.
2461  *
2462  *   The function takes CRNS, representation of C in a RNS, as input. Based on
2463  *   function minSolnoncompressRNS, the function can deal with the case
2464  *   when m >> n. In that case, the function compresses the system, and then
2465  *   computes the solution and the certificate of the compressed system.
2466  *   Then the function verifies the certificate of the compressed system over
2467  *   the uncompressed system as well as converts the solution of the
2468  *   compressed system to that of uncompressed system. If the verification of
2469  *   certificate succeeds, the function returns 1. Otherwise, reture 0.
2470  *
2471  *   On the other hand, if the nullspace of post-conditioned is designated
2472  *   larger than m-n, then the system is uncompressed and m-n is used.
2473  *
2474  *   If redflag is specified as 1, then reduce the solution by the kernel
2475  *   basis with dimension min(max(nullcol, NULLSPACE_COLUMN), dimnullspace),
2476  *   where dimnullspace is the dimension of nullspace of C.
2477  *
2478  *
2479  * Input:
2480  *   certflag: 1/0, flag to indicate whether to compute the certificate vector
2481  *             or not. If certflag = 1, then compute the certificate vector.
2482  *             Otherwise, not compute the certificate.
2483  *    redflag: 1/0, flag to indicate whether to reduce the solution size or not
2484  *           - redflag = 1, reduce the solution size
2485  *           - redflag = 0, not reduce the solution size
2486  *    nullcol: long, dimension of nullspace and kernel basis of conditioned
2487  *             system,
2488  *             if nullcol < NULLSPACE_COLUMN, use NULLSPACE_COLUMN instead
2489  *          n: long, row dimension of the input system
2490  *          m: long, column dimension of input matrix C
2491  *   basislen: FiniteField, dimension of the RNS basis
2492  *     mp_bdC: mpz_t, the maximum sum of absolute values of entries of
2493  *             each row in matrix C
2494  *      basis: 1-dim FiniteField array length basislen, RNS basis
2495  *       CRNS: 2-dim Double array, dimension basislen x n*m, representation of
2496  *             C in the RNS. CRNS[i] = C mod basis[i], i = 0..basislen-1
2497  *       mp_b: 1-dim mpz_t array length n, right hand side vector b
2498  *
2499  * Return:
2500  *   1: verification of the certificate vector succeeds
2501  *   0: verification of the certificate vector fails, needs to restart
2502  *
2503  * Output (only correct when returned value is 1):
2504  *    mp_N: 1-dim mpz_t array length m, numerator vector of the solution
2505  *          with minimal denominator
2506  *    mp_D: mpz_t, minimal denominator of the solution
2507  *   mp_NZ: 1-dim mpz_t array, the first n entries store the numerator
2508  *          vector of the certificate z
2509  *   mp_DZ: mpz_t, the denominator of the certificate z
2510  *
2511  * Note:
2512  *   - The space of the solution (mp_N, mp_D) is needed to preallocated.
2513  *   - If certflag is specified to be 1, then also needs to preallocate space
2514  *     for (mp_NZ, mp_DZ). Otherwise, set mp_NZ = NULL, and mp_DZ = any int
2515  *
2516  * Precondition:
2517  *   any element p in RNS basis must satisfy 2*(p-1)^2 <= 2^53-1.
2518  *
2519  */
2520 
2521 long
specialminSolveRNS(const long certflag,const long redflag,const long nullcol,const long n,const long m,const long basislen,const mpz_t mp_bdC,const FiniteField * basis,Double ** CRNS,mpz_t * mp_b,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)2522 specialminSolveRNS (const long certflag, const long redflag, \
2523 		    const long nullcol, const long n, const long m, \
2524 		    const long basislen, const mpz_t mp_bdC, \
2525 		    const FiniteField *basis, Double **CRNS, mpz_t *mp_b, \
2526 		    mpz_t *mp_N, mpz_t mp_D, mpz_t *mp_NZ, mpz_t mp_DZ)
2527 {
2528   long i, j, l, ver, k, len=1;
2529   FiniteField p, qh, d;
2530   mpz_t mp_maxInter, mp_basisprod;
2531   long *P, *rp;
2532   double *cumprod, *cumprod1;
2533   FiniteField *bdcoeff, *cmbasis, *extbdcoeff, **extbasiscmb;
2534   Double *P1, *P2, *dtemp, *CExtRNS, *AP, **ARNS, **BRNS;
2535   mpz_t *mp_Bb, *mp_Nt;
2536   double tt;
2537 
2538 #if HAVE_TIME_H
2539   clock_t ti;
2540 #endif
2541 
2542   /* only when we need to reduce the solution, we may reset number
2543      of columns */
2544   k = NULLSPACE_COLUMN;
2545   if ((redflag == 1) && (nullcol > NULLSPACE_COLUMN)) { k = nullcol; }
2546 
2547   p = RandPrime(15, 19);
2548   cmbasis = combBasis(basislen, basis);
2549   bdcoeff = repBound(basislen, basis, cmbasis);
2550   mpz_init(mp_basisprod);
2551 
2552   /* in case m-n is small, do not need to compress the system */
2553   if (m-n <= k)
2554     {
2555       dtemp = XMALLOC(Double, basislen);
2556       mp_Bb = XMALLOC(mpz_t, (m-n+1)*n);
2557       for (i = 0; i < (m-n+1)*n; i++) { mpz_init(mp_Bb[i]); }
2558 
2559       /* use CRT to reconstruct B */
2560       basisProd(basislen, basis, mp_basisprod);
2561       for (i = 0; i < n; i++)
2562 	for (j = 0; j < m-n; j++)
2563 	  {
2564 	    for (l = 0; l < basislen; l++) { dtemp[l] = CRNS[l][i*m+(j+n)]; }
2565 	    ChineseRemainder(basislen, mp_basisprod, basis, cmbasis, bdcoeff, \
2566 			     dtemp, mp_Bb[i*(m-n+1)+j]);
2567 	  }
2568       for (i = 0; i < n; i++) { mpz_set(mp_Bb[i*(m-n+1)+m-n], mp_b[i]); }
2569       mpz_clear(mp_basisprod);
2570       { XFREE(dtemp); XFREE(cmbasis); XFREE(bdcoeff); }
2571 
2572       /* copy n x n submatrices ARNS[i] from CRNS[i] */
2573       ARNS = XMALLOC(Double *, basislen);
2574       for (i = 0; i < basislen; i++)
2575 	{
2576 	  ARNS[i] = XMALLOC(Double, n*n);
2577 	  for (j = 0; j < n; j++)
2578 	    for (l = 0; l < n; l++)
2579 	      ARNS[i][j*n+l] = CRNS[i][j*m+l];
2580 	}
2581       minSolnoncompressRNS(certflag, redflag, n, m-n, basislen, basis, mp_Bb, \
2582 			   ARNS, mp_N, mp_D, mp_NZ, mp_DZ);
2583       for (i = 0; i < (m-n+1)*n; i++) { mpz_clear(mp_Bb[i]); }
2584       XFREE(mp_Bb);
2585       for (i = 0; i < basislen; i++) { XFREE(ARNS[i]); } { XFREE(ARNS); }
2586       return 1;
2587     }
2588 
2589   /* compute maximum mangnitude bound of elements of extended RNS basis */
2590   qh = RNSbound(m);
2591 
2592   /* compute maximum intermidiate result during the compression */
2593   mpz_init_set_ui(mp_maxInter, 1);
2594   mpz_addmul_ui(mp_maxInter, mp_bdC, 2);
2595 
2596   /* compute extended RNS basis */
2597   extbasiscmb = findRNS(qh, mp_maxInter, &len);
2598   { mpz_clear(mp_maxInter); }
2599   cumprod = cumProd(basislen, basis, len, extbasiscmb[0]);
2600   ARNS = XMALLOC(Double *, len);
2601   BRNS = XMALLOC(Double *, len);
2602   CExtRNS = XMALLOC(Double, n*m);
2603   cumprod1 = cumProd(len, extbasiscmb[0], 1, &p);
2604   extbdcoeff = repBound(len, extbasiscmb[0], extbasiscmb[1]);
2605   AP = XMALLOC(Double, n*n);
2606   P = XMALLOC(long, n+1);
2607   rp = XMALLOC(long, n+1);
2608 
2609 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2610   ti = clock();
2611 #endif
2612 
2613   /* compress the large matrix to a small one
2614      generate random 0, 1 matrices P1, P2 for compression
2615      note: matrix might be <I_(n+k), *> */
2616   while (1)
2617     {
2618       P1 = randomDb(m, n, 1);
2619       P2 = randomDb(m, k, 1);
2620       for (i = 0; i < len; i++)
2621 	{
2622 	  ARNS[i] = XMALLOC(Double, n*n);
2623 	  BRNS[i] = XMALLOC(Double, n*k);
2624 
2625 	  /* extend CRNS to the extended RNS basis */
2626 	  basisExt(basislen, n*m, extbasiscmb[0][i], basis, cmbasis, \
2627 		   cumprod[i], bdcoeff, CRNS, CExtRNS);
2628 	  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, m, \
2629 		      1.0, CExtRNS, m, P1, n, 0.0, ARNS[i], n);
2630 	  Dmod(extbasiscmb[0][i], ARNS[i], n, n, n);
2631 	  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, k, m, \
2632 		      1.0, CExtRNS, m, P2, k, 0.0, BRNS[i], k);
2633 	  Dmod(extbasiscmb[0][i], BRNS[i], n, k, k);
2634 	}
2635       /* check whether the rank does not decrease after compression */
2636       for (i = 0; i < n+1; i++) { P[i] = i;  rp[i] = 0; }
2637       basisExt(len, n*n, p, extbasiscmb[0], extbasiscmb[1], *cumprod1, \
2638 	       extbdcoeff, ARNS, AP);
2639       d=1;
2640       RowEchelonTransform(p, AP, n, n, 0, 0, 0, 0, P, rp, &d);
2641       if (rp[0] == n) { break; }
2642       else
2643 	{
2644 	  for (i = 0; i < len; i++) { XFREE(ARNS[i]); XFREE(BRNS[i]); }
2645 	  { XFREE(P1); XFREE(P2); }
2646 	}
2647     }
2648   { XFREE(bdcoeff); XFREE(cumprod); XFREE(cmbasis); XFREE(CExtRNS); }
2649   { XFREE(extbdcoeff); XFREE(cumprod1); XFREE(AP); XFREE(P); XFREE(rp); }
2650 
2651   /* use CRT to reconstruct mpz_t matrix B */
2652   mp_Bb = XMALLOC(mpz_t, n*(k+1));
2653   for (i = 0; i < n*(k+1); i++) { mpz_init(mp_Bb[i]); }
2654   basisProd(len, extbasiscmb[0], mp_basisprod);
2655   bdcoeff = repBound(len, extbasiscmb[0], extbasiscmb[1]);
2656   dtemp = XMALLOC(Double, len);
2657   for (i = 0; i < n; i++)
2658     for (j = 0; j < k; j++)
2659       {
2660 	for (l = 0; l < len; l++) { dtemp[l] = BRNS[l][i*k+j]; }
2661 	ChineseRemainder(len, mp_basisprod, extbasiscmb[0], extbasiscmb[1], \
2662 			 bdcoeff, dtemp, mp_Bb[i*(k+1)+j]);
2663       }
2664   for (i = 0; i < n; i++) { mpz_set(mp_Bb[i*(k+1)+k], mp_b[i]); }
2665   mpz_clear(mp_basisprod);
2666   { XFREE(dtemp); XFREE(bdcoeff); }
2667   for (i = 0; i < len; i++) { XFREE(BRNS[i]); } { XFREE(BRNS); }
2668   mp_Nt = XMALLOC(mpz_t, n+k);
2669   for (i = 0; i < n+k; i++) { mpz_init(mp_Nt[i]); }
2670 
2671 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2672   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2673   printf("    Compression Time: %f\n", tt);
2674   ti = clock();
2675 #endif
2676 
2677   /* compute minimal denominator solution of system APv=b */
2678   minSolnoncompressRNS(certflag, redflag, n, k, len, extbasiscmb[0], mp_Bb, \
2679 		       ARNS, mp_Nt, mp_D, mp_NZ, mp_DZ);
2680 
2681 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2682   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2683   printf("    Solution of Compressed System Finding Time: %f\n", tt);
2684 #endif
2685 
2686   for (i = 0; i < n*(k+1); i++) { mpz_clear(mp_Bb[i]); } { XFREE(mp_Bb); }
2687   { XFREE(extbasiscmb[0]); XFREE(extbasiscmb[1]); XFREE(extbasiscmb); }
2688   for (i = 0; i < len; i++) { XFREE(ARNS[i]); } { XFREE(ARNS); }
2689 
2690 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2691   ti = clock();
2692 #endif
2693 
2694   /* verify certificate vector of compressed system in uncompressed system */
2695   if ((certflag == 1) && (mpz_cmp_ui(mp_D, 1) != 0))
2696     {
2697       ver = certVerify(basislen, n, m, basis, CRNS, mp_DZ, mp_NZ);
2698 
2699       /* check failed */
2700       if (ver == 0)
2701 	{
2702 	  XFREE(P1); XFREE(P2);
2703 	  for (i = 0; i < n+k; i++) { mpz_clear(mp_Nt[i]); } { XFREE(mp_Nt); }
2704 	  return 0;
2705 	}
2706     }
2707 
2708 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2709   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2710   printf("    Certificate Verification Time: %f\n", tt);
2711   ti = clock();
2712 #endif
2713 
2714   /* compute mp_N by Pmp_Nt */
2715   for (i = 0; i < m; i++)
2716     {
2717       mpz_set_ui(mp_N[i], 0);
2718       for (j = 0; j < n; j++)
2719 	if (P1[i*n+j] == 1) { mpz_add(mp_N[i], mp_N[i], mp_Nt[j]); }
2720       for (j = 0; j < k; j++)
2721 	if (P2[i*k+j] == 1) { mpz_add(mp_N[i], mp_N[i], mp_Nt[n+j]); }
2722     }
2723 
2724 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2725   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2726   printf("    Solution of Uncompressed System Recovering Time: %f\n", tt);
2727 #endif
2728 
2729   { XFREE(P1); XFREE(P2); }
2730   for (i = 0; i < n+k; i++) { mpz_clear(mp_Nt[i]); } { XFREE(mp_Nt); }
2731   return 1;
2732 }
2733 
2734 
2735 
2736 /*
2737  * Calling Sequences:
2738  *   1/0 <-- certVerify(basislen, n, m, basis, ARNS, mp_DZ, mp_NZ)
2739  *
2740  * Summary:
2741  *   Verify the correctness of a certificate vector for a system of linear
2742  *   equations
2743  *
2744  * Description:
2745  *   Let the system of linear equations be Ax = b. The function verifies the
2746  *   rational vector z by checking whether z.A is an integer vector. The
2747  *   n x m matrix A is represented in RNS. z is represented by an integer
2748  *   numerator vector and an integer denominator.  The function is output
2749  *   sensitive, i.e., the verification will stop as long as one failure
2750  *   occurs.
2751  *
2752  * Input:
2753  *   basislen: long, dimension of the RNS basis
2754  *          n: long, row dimension of A
2755  *          m: long, column dimension of A
2756  *      basis: 1-dim FiniteField array length basislen, RNS basis
2757  *       ARNS: 2-dim Double array, dimension basislen x n*m, representation of
2758  *             A in the RNS. ARNS[i] = A mod basis[i], i = 0..basislen-1
2759  *      mp_DZ: mpz_t, denominator of vector z
2760  *      mp_NZ: 1-dim mpz_t array length n, numerator array of vector z
2761  *
2762  * Return:
2763  *   1: z is the certificate of A
2764  *   0: z is not the certificate of A
2765  *
2766  */
2767 
2768 long
certVerify(const long basislen,const long n,const long m,const FiniteField * basis,Double ** ARNS,mpz_t mp_DZ,mpz_t * mp_NZ)2769 certVerify (const long basislen, const long n, const long m, \
2770 	    const FiniteField *basis, Double **ARNS, mpz_t mp_DZ, \
2771 	    mpz_t *mp_NZ)
2772 {
2773   long i, j, l;
2774   FiniteField *bdcoeff, *cmbasis;
2775   double temp;
2776   mpz_t mp_AZ, mp_temp, mp_AZ1;
2777   Double **U;
2778 
2779   /* allocate matrix U to store mix radix coefficients of one row/column
2780      of matrix A */
2781   U = XMALLOC(Double *, basislen);
2782   for (i = 0; i < basislen; i++)
2783     U[i] = XMALLOC(Double, n);
2784   cmbasis = combBasis(basislen, basis);
2785   bdcoeff = repBound(basislen, basis, cmbasis);
2786   mpz_init(mp_AZ);
2787   mpz_init(mp_AZ1);
2788   mpz_init(mp_temp);
2789   for (i = 0; i < m; i++)
2790     {
2791       /* apply Garner's algorithm to compute U in positive representation */
2792       cblas_dcopy(n, ARNS[0]+i, m, U[0], 1);
2793       for (j = 1; j < basislen; j++)
2794 	{
2795 	  cblas_dcopy(n, U[j-1], 1, U[j], 1);
2796 	  for (l = j-2; l >= 0; l--)
2797 	    {
2798 	      cblas_dscal(n,  (double)(basis[l] % basis[j]), U[j], 1);
2799 	      cblas_daxpy(n, 1.0, U[l], 1, U[j], 1);
2800 	      Dmod((double)basis[j], U[j], 1, n, n);
2801 	    }
2802 	  temp = (double)cmbasis[j]*(double)(basis[j]-1);
2803 	  temp = fmod(temp, (double)basis[j]);
2804 	  cblas_dscal(n, temp, U[j], 1);
2805 	  cblas_daxpy(n, (double)cmbasis[j], ARNS[j]+i, m, U[j], 1);
2806 	  Dmod((double)basis[j], U[j], 1, n, n);
2807 	}
2808       /* change U to symmetric representation */
2809       for (j = 0; j < n; j++)
2810 	for (l = basislen-1; l >= 0; l--)
2811 	  {
2812 	    if (U[l][j] > bdcoeff[l])
2813 	      {
2814 		U[basislen-1][j] = U[basislen-1][j]-basis[basislen-1];
2815 		break;
2816 	      }
2817 	    else if (U[l][j] < bdcoeff[l]) { break; }
2818 	  }
2819       /* do vector-vector product and sum up the product */
2820       mpz_set_ui(mp_AZ, 0);
2821       for (j = 0; j < n; j++)
2822 	{
2823 	  mpz_mul_si(mp_temp, mp_NZ[j], U[basislen-1][j]);
2824 	  mpz_add(mp_AZ, mp_AZ, mp_temp);
2825 	}
2826       for (j = basislen-2; j >= 0; j--)
2827 	{
2828 
2829 	  mpz_mul_ui(mp_AZ, mp_AZ, basis[j]);
2830 	  mpz_set_ui(mp_AZ1, 0);
2831 	  for (l = 0; l < n; l++)
2832 	    {
2833 	      mpz_mul_ui(mp_temp, mp_NZ[l], U[j][l]);
2834 	      mpz_add(mp_AZ1, mp_AZ1, mp_temp);
2835 	    }
2836 	  mpz_add(mp_AZ, mp_AZ, mp_AZ1);
2837 	}
2838       /* check whether mp_DZ|mp_AZ */
2839       if (!mpz_divisible_p(mp_AZ, mp_DZ))
2840 	{
2841 	  { mpz_clear(mp_AZ); mpz_clear(mp_AZ1); mpz_clear(mp_temp); }
2842 	  for (j = 0; j < basislen; j++) { XFREE(U[j]); } { XFREE(U); }
2843 	  { XFREE(bdcoeff); XFREE(cmbasis); }
2844 	  return 0;
2845 	}
2846     }
2847   { mpz_clear(mp_AZ); mpz_clear(mp_AZ1); mpz_clear(mp_temp); }
2848   for (j = 0; j < basislen; j++) { XFREE(U[j]); } { XFREE(U); }
2849   { XFREE(bdcoeff); XFREE(cmbasis); }
2850   return 1;
2851 }
2852 
2853 
2854 
2855 /*
2856  * Calling Seqeuence:
2857  *   minSolnoncompressLong(certflag, redflag, n, k, mp_Bb, A, mp_N, mp_D,
2858  *                         mp_NZ, mp_DZ)
2859  *
2860  * Summary:
2861  *   Compute the minimal denominator solution of a full row rank system
2862  *   without compression, represented by signed long integers, and the
2863  *   corresponding certificate vector(optionalf). The solution size could be
2864  *   reduced.
2865  *
2866  * Description:
2867  *   Let the full row rank system be Cx = b, where b is a vector, and C is like
2868  *          n    k
2869  *       [     |   ]
2870  *   C = [  A  | B ] n
2871  *       [     |   ]
2872  *
2873  *   The function deals with the case when C is a signed long matrix. It
2874  *   takes A and Bb, represented by signed long and mpz_t respectively as
2875  *   input, where Bb is the matrix composed of B and b, Bb[1..-1, 1..k] = B
2876  *   and Bb[1..-1, k+1] = b.
2877  *
2878  *   The certificate vector z satisfy z.C is an integer vector and z.b has the
2879  *   same denominator as the solution vector.
2880  *
2881  *   If redflag is specified as 1, then reduce the solution by the kernel
2882  *   basis with dimension k.
2883  *
2884  * Input:
2885  *   certflag: 1/0, flag to indicate whether to compute the certificate vector
2886  *             or not. If certflag = 1, then compute the certificate vector.
2887  *             Otherwise, not compute the certificate.
2888  *    redflag: 1/0, flag to indicate whether to reduce the solution size or not
2889  *           - redflag = 1, reduce the solution size
2890  *           - redflag = 0, not reduce the solution size
2891  *          n: long, row dimension of the input system
2892  *          k: long, column dimension of B
2893  *      mp_Bb: 1-dim mpz_t array length (k+1)*n, matrix Bb consisting of
2894  *             submatrix B and vector b
2895  *          A: 1-dim long array length n*n, submatrix A of C
2896  *
2897  * Output:
2898  *    mp_N: 1-dim mpz_t array length n+k, numerator vector of the solution
2899  *          with minimal denominator
2900  *    mp_D: mpz_t, denominator of the solution
2901  *   mp_NZ: 1-dim mpz_t array, the first n entries store the numerator vector
2902  *          of the certificate z
2903  *   mp_DZ: mpz_t, the denominator of the certificate z
2904  *
2905  * Note:
2906  *   - The space of the solution (mp_N, mp_D) is needed to preallocated.
2907  *   - If certflag is specified to be 1, then also needs to preallocate space
2908  *     for (mp_NZ, mp_DZ). Otherwise, set mp_NZ = NULL, and mp_DZ = any int.
2909  *
2910  */
2911 
2912 void
minSolnoncompressLong(const long certflag,const long redflag,const long n,const long k,mpz_t * mp_Bb,const long * A,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)2913 minSolnoncompressLong (const long certflag, const long redflag, const long n, \
2914 		       const long k, mpz_t *mp_Bb, const long* A, \
2915 		       mpz_t *mp_N, mpz_t mp_D, mpz_t *mp_NZ, mpz_t mp_DZ)
2916 {
2917   long i, j, s, t, h;
2918   mpz_t mp_DABb, mp_s, mp_ns, mp_temp, mp_h;
2919   mpz_t *mp_NABb, *mp_M, *mp_Cert=NULL, *mp_T, *mp_Lat;
2920   double tt;
2921 
2922 #if HAVE_TIME_H
2923   clock_t ti, ti1;
2924 #endif
2925 
2926   mp_NABb = XMALLOC(mpz_t, (k+1)*n);
2927   for (i = 0; i < (k+1)*n; i++) { mpz_init(mp_NABb[i]); }
2928   mpz_init(mp_DABb);
2929 
2930 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2931   ti = clock();
2932 #endif
2933 
2934   /* compute A^(-1)B and A^(-1)b at the same time */
2935   nonsingSolvMM(RightSolu, n, k+1, A, mp_Bb, mp_NABb, mp_DABb);
2936 
2937   /* compute s */
2938   mpz_init_set(mp_s, mp_DABb);
2939   mpz_init(mp_ns);
2940   mpz_neg(mp_ns, mp_s);
2941 
2942   /* construct matrix M = <s*<A^(-1)B, I_k>|s*<A^(-1)b, 0>, <0|s>> */
2943   mp_M = XMALLOC(mpz_t, (n+k+1)*(k+1));
2944   for (i = 0; i < (n+k+1)*(k+1); i++) { mpz_init(mp_M[i]); }
2945   mpz_init_set_ui(mp_temp, 1);
2946   scalCpMP(n, k+1, k+1, k+1, mp_temp, mp_NABb, mp_M);
2947   for (i = 0; i < k; i++) { mpz_set(mp_M[n*(k+1)+i*(k+1)+i], mp_ns); }
2948   mpz_set(mp_M[(n+k)*(k+1)+k], mp_s);
2949   { mpz_clear(mp_DABb); mpz_clear(mp_ns); }
2950   for (i = 0; i < (k+1)*n; i++) { mpz_clear(mp_NABb[i]); } { XFREE(mp_NABb); }
2951 
2952 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2953   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2954   printf("          Matix M Construction Time(A^(-1)B, A^(-1)b): %f\n", tt);
2955 #endif
2956 
2957   /* construct matrix mp_T = s*<A^(-1)B, -I_k> for kernel basis N */
2958   mp_T = XMALLOC(mpz_t, (n+k)*k);
2959   for (i = 0; i < (n+k)*k; i++) { mpz_init(mp_T[i]); }
2960   scalCpMP(n+k, k, k+1, k, mp_temp, mp_M, mp_T);
2961 
2962   /* store s*<A^(-1)b, 0> into solution array mp_N */
2963   scalCpMP(n, 1, k+1, 1, mp_temp, mp_M+k, mp_N);
2964   for (i = n; i < n+k; i++) { mpz_set_ui(mp_N[i], 0); }
2965   mpz_clear(mp_temp);
2966 
2967 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2968   ti = clock();
2969 #endif
2970 
2971   /* compute special hermite form of M and certificate(if certflag == true) */
2972   if (certflag == 1)
2973     {
2974       mp_Cert = XMALLOC(mpz_t, n+k+1);
2975       for (i = 0; i < n+k+1; i++) { mpz_init(mp_Cert[i]); }
2976     }
2977   specialHermite(certflag, n, k, 1, mp_M, mp_Cert);
2978 
2979 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2980   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
2981   printf("          Hermite Form Computing Time: %f\n", tt);
2982 #endif
2983 
2984   /* compute minimal denominator */
2985   mpz_init_set(mp_h, mp_M[k*(k+1)+k]);
2986   mpz_divexact(mp_D, mp_s, mp_h);
2987 
2988 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
2989   ti = clock();
2990 #endif
2991 
2992   /* compute kernel basis N = mp_TH^(-1), which is inplaced into mp_T */
2993   kernelBasis(n, k, 1, mp_M, mp_T);
2994 
2995   /* initialize the lattice */
2996   if (redflag == 1)
2997     {
2998       mp_Lat = XMALLOC(mpz_t, (n+k)*(k+1));
2999       for (i = 0; i < k*(n+k); i++)
3000 	{
3001 	  s = (long)(i/(n+k));  t = i-s*(n+k);  h = t*k+s;
3002 	  mpz_init_set(mp_Lat[i], mp_T[h]);
3003 	}
3004     }
3005 
3006   /* compute mp_T.M[1..k, k+1] and inplace the result into first column
3007      of mp_T */
3008   for (i = 0; i < n+k; i++)
3009     {
3010       mpz_mul(mp_T[i*k], mp_T[i*k], mp_M[k]);
3011       for (j = 1; j < k; j++)
3012 	mpz_addmul(mp_T[i*k], mp_T[i*k+j], mp_M[j*(k+1)+k]);
3013 
3014       /* compute numerator vector of the solution at the same time */
3015       mpz_sub(mp_N[i], mp_N[i], mp_T[i*k]);
3016       mpz_divexact(mp_N[i], mp_N[i], mp_h);
3017     }
3018   { mpz_clear(mp_s); mpz_clear(mp_h); }
3019   for (i = 0; i < (n+k)*k; i++) { mpz_clear(mp_T[i]); } { XFREE(mp_T); }
3020   for (i = 0; i < (n+k+1)*(k+1); i++) { mpz_clear(mp_M[i]); } { XFREE(mp_M); }
3021 
3022   /* reduct solution by lattice reduction */
3023   if (redflag == 1)
3024     {
3025       for (i = 0; i < n+k; i++) { mpz_init_set(mp_Lat[k*(n+k)+i], mp_N[i]); }
3026 
3027 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3028       ti1 = clock();
3029 #endif
3030 
3031       ired(mp_Lat, k+1, n+k, k);
3032 
3033 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3034       tt = (double)(clock()-ti1)/CLOCKS_PER_SEC;
3035       printf("           lattice reduce solution time: %f\n", tt);
3036 #endif
3037 
3038       for (i = 0; i < n+k; i++) { mpz_set(mp_N[i], mp_Lat[k*(n+k)+i]); }
3039       for (i = 0; i < (n+k)*(k+1); i++) { mpz_clear(mp_Lat[i]); }
3040       XFREE(mp_Lat);
3041     }
3042 
3043   /* compute certificate */
3044   if (certflag == 1)
3045     {
3046       if (mpz_cmp_ui(mp_D, 1) != 0)
3047 	nonsingSolvMM(LeftSolu, n, 1, A, mp_Cert, mp_NZ, mp_DZ);
3048       else
3049 	{
3050 	  for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
3051 	  mpz_set_ui(mp_DZ, 1);
3052 	}
3053       for (i = 0; i< n+k+1; i++) { mpz_clear(mp_Cert[i]); } { XFREE(mp_Cert); }
3054     }
3055 
3056 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3057   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
3058   printf("          Kernel Basis, Certificate, Solution Finding Time: %f\n", \
3059 	 tt);
3060 #endif
3061 
3062   return;
3063 }
3064 
3065 
3066 
3067 /*
3068  * Calling Sequence:
3069  *   minSolnoncompressRNS(certflag, redflag, n, k, basislen, basis, mp_Bb,
3070  *                        ARNS, mp_N, mp_D, mp_NZ, mp_DZ)
3071  *
3072  * Summary:
3073  *   Compute the minimal denominator solution of a full row rank system
3074  *   without compression, represented in RNS, and the corresponding
3075  *   certificate vector(optional). The solution size could be reduced.
3076  *
3077  * Description:
3078  *   Let the full row rank system be Cx = b, where b is a vector, and C is like
3079  *          n    k
3080  *       [     |   ]
3081  *   C = [  A  | B ] n
3082  *       [     |   ]
3083  *
3084  *   The function doesn't take C as input, but takes ARNS, the representation
3085  *   of A in a RNS, and mpz_t matrix Bb, the matrix composed of B and b as
3086  *   input, where Bb[1..-1, 1..k] = B and Bb[1..-1, k+1] = b.
3087  *
3088  *   The certificate vector z satisfy z.C is an integer vector and z.b has the
3089  *   same denominator as the solution vector.
3090  *
3091  *   If redflag is specified as 1, then reduce the solution by the kernel
3092  *   basis with dimension k.
3093  *
3094  *
3095  * Input:
3096  *   certflag: 1/0, flag to indicate whether to compute the certificate vector
3097  *             or not. If certflag = 1, then compute the certificate vector.
3098  *             Otherwise, not compute the certificate.
3099  *    redflag: 1/0, flag to indicate whether to reduce the solution size or not
3100  *           - redflag = 1, reduce the solution size
3101  *           - redflag = 0, not reduce the solution size
3102  *          n: long, row dimension of the input system
3103  *          k: long, column dimension of B
3104  *   basislen: long, dimension of the RNS basis
3105  *      basis: 1-dim FiniteField array length basislen, RNS basis
3106  *      mp_Bb: 1-dim mpz_t array length (k+1)*n, matrix Bb consisting of
3107  *             submatrix B and vector b
3108  *       ARNS: 2-dim Double array, dimension basislen x n*n, representation of
3109  *             A in the RNS. ARNS[i] = A mod basis[i], i = 0..basislen-1
3110  *
3111  * Output:
3112  *    mp_N: 1-dim mpz_t array length n+k, numerator vector of the solution
3113  *          with minimal denominator
3114  *    mp_D: mpz_t, denominator of the solution
3115  *   mp_NZ: 1-dim mpz_t array, the first n entries store the numerator
3116  *          vector of the certificate z
3117  *   mp_DZ: mpz_t, the denominator of the certificate z
3118  *
3119  * Note:
3120  *   - The space of the solution (mp_N, mp_D) is needed to preallocated.
3121  *   - If certflag is specified to be 1, then also needs to preallocate space
3122  *       for (mp_NZ, mp_DZ). Otherwise, set mp_NZ = NULL, and mp_DZ = any int.
3123  *
3124  * Precondition:
3125  *   any element p in RNS basis must satisfy 2*(p-1)^2 <= 2^53-1.
3126  *
3127  */
3128 
3129 void
minSolnoncompressRNS(const long certflag,const long redflag,const long n,const long k,const long basislen,const FiniteField * basis,mpz_t * mp_Bb,Double ** ARNS,mpz_t * mp_N,mpz_t mp_D,mpz_t * mp_NZ,mpz_t mp_DZ)3130 minSolnoncompressRNS (const long certflag, const long redflag, const long n, \
3131 		      const long k, const long basislen, \
3132 		      const FiniteField *basis, mpz_t *mp_Bb, Double **ARNS, \
3133 		      mpz_t *mp_N, mpz_t mp_D, mpz_t *mp_NZ, mpz_t mp_DZ)
3134 {
3135   long i, j, s, t, h;
3136   mpz_t mp_DABb, mp_s, mp_ns, mp_temp, mp_h;
3137   mpz_t *mp_NABb, *mp_M, *mp_Cert=NULL, *mp_T, *mp_Lat;
3138   double tt;
3139 
3140 #if HAVE_TIME_H
3141   clock_t ti, ti1;
3142 #endif
3143 
3144   mp_NABb = XMALLOC(mpz_t, (k+1)*n);
3145   for (i = 0; i < (k+1)*n; i++) { mpz_init(mp_NABb[i]); }
3146   mpz_init(mp_DABb);
3147 
3148 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3149   ti = clock();
3150 #endif
3151 
3152   /* compute A^(-1)B and A^(-1)b at the same time */
3153   nonsingSolvRNSMM(RightSolu, n, k+1, basislen, basis, ARNS, mp_Bb, \
3154 		   mp_NABb, mp_DABb);
3155 
3156 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3157   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
3158   printf("A^(-1)B, A^(-1)b solving time: %f\n", tt);
3159   fflush(stdout);
3160   ti = clock();
3161 #endif
3162 
3163   /* compute s */
3164   mpz_init_set(mp_s, mp_DABb);
3165   mpz_init(mp_ns);
3166   mpz_neg(mp_ns, mp_s);
3167 
3168   /* construct matrix M = <s*<A^(-1)B, I_k>|s*<A^(-1)b, 0>, <0|s>> */
3169   mp_M = XMALLOC(mpz_t, (n+k+1)*(k+1));
3170   for (i = 0; i < (n+k+1)*(k+1); i++) { mpz_init(mp_M[i]); }
3171   mpz_init_set_ui(mp_temp, 1);
3172   scalCpMP(n, k+1, k+1, k+1, mp_temp, mp_NABb, mp_M);
3173   for (i = 0; i < k; i++) { mpz_set(mp_M[n*(k+1)+i*(k+1)+i], mp_ns); }
3174   mpz_set(mp_M[(n+k)*(k+1)+k], mp_s);
3175   { mpz_clear(mp_DABb); mpz_clear(mp_ns); }
3176   for (i = 0; i < (k+1)*n; i++) { mpz_clear(mp_NABb[i]); } { XFREE(mp_NABb); }
3177 
3178   /* construct matrix mp_T = s*<A^(-1)B, -I_k> for kernel basis N */
3179   mp_T = XMALLOC(mpz_t, (n+k)*k);
3180   for (i = 0; i < (n+k)*k; i++) { mpz_init(mp_T[i]); }
3181   scalCpMP(n+k, k, k+1, k, mp_temp, mp_M, mp_T);
3182 
3183 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3184   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
3185   printf("          Matix M, N Construction Time: %f\n", tt);
3186 #endif
3187 
3188   /* store s*<A^(-1)b, 0> into solution array mp_N */
3189   scalCpMP(n, 1, k+1, 1, mp_temp, mp_M+k, mp_N);
3190   for (i = n; i < n+k; i++) { mpz_set_ui(mp_N[i], 0); }
3191   mpz_clear(mp_temp);
3192 
3193 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3194   ti = clock();
3195 #endif
3196 
3197   /* compute special hermite form of M and certificate(if certflag == true) */
3198   if (certflag == 1)
3199     {
3200       mp_Cert = XMALLOC(mpz_t, n+k+1);
3201       for (i = 0; i < n+k+1; i++) { mpz_init(mp_Cert[i]); }
3202     }
3203   specialHermite(certflag, n, k, 1, mp_M, mp_Cert);
3204 
3205 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3206   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
3207   printf("          Hermite Form Computing Time: %f\n", tt);
3208   fflush(stdout);
3209 #endif
3210 
3211   /* compute minimal denominator */
3212   mpz_init_set(mp_h, mp_M[k*(k+1)+k]);
3213   mpz_divexact(mp_D, mp_s, mp_h);
3214 
3215 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3216   ti = clock();
3217 #endif
3218 
3219   /* compute kernel basis N = mp_TH^(-1), which is inplaced into mp_T */
3220   kernelBasis(n, k, 1, mp_M, mp_T);
3221 
3222   /* initialize the lattice */
3223   if (redflag == 1)
3224     {
3225       mp_Lat = XMALLOC(mpz_t, (n+k)*(k+1));
3226       for (i = 0; i < k*(n+k); i++)
3227 	{
3228 	  s = (long)(i/(n+k));  t = i-s*(n+k);  h = t*k+s;
3229 	  mpz_init_set(mp_Lat[i], mp_T[h]);
3230 	}
3231     }
3232 
3233   /* compute mp_T.M[1..k, k+1] and inplace the result into first column
3234      of mp_T */
3235   for (i = 0; i < n+k; i++)
3236     {
3237       mpz_mul(mp_T[i*k], mp_T[i*k], mp_M[k]);
3238       for (j = 1; j < k; j++)
3239 	mpz_addmul(mp_T[i*k], mp_T[i*k+j], mp_M[j*(k+1)+k]);
3240 
3241       /* compute numerator vector of the solution at the same time */
3242       mpz_sub(mp_N[i], mp_N[i], mp_T[i*k]);
3243       mpz_divexact(mp_N[i], mp_N[i], mp_h);
3244     }
3245   { mpz_clear(mp_s); mpz_clear(mp_h); }
3246   for (i = 0; i < (n+k)*k; i++) { mpz_clear(mp_T[i]); } { XFREE(mp_T); }
3247   for (i = 0; i < (n+k+1)*(k+1); i++) { mpz_clear(mp_M[i]); } { XFREE(mp_M); }
3248 
3249   /* reduct solution by lattice reduction */
3250   if (redflag == 1)
3251     {
3252       for (i = 0; i < n+k; i++) { mpz_init_set(mp_Lat[k*(n+k)+i], mp_N[i]); }
3253 
3254 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3255       ti1 = clock();
3256 #endif
3257 
3258       ired(mp_Lat, k+1, n+k, k);
3259 
3260 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3261       tt = (double)(clock()-ti1)/CLOCKS_PER_SEC;
3262       printf("           lattice reduce solution time: %f\n", tt);
3263 #endif
3264 
3265       for (i = 0; i < n+k; i++) { mpz_set(mp_N[i], mp_Lat[k*(n+k)+i]); }
3266       for (i = 0; i < (n+k)*(k+1); i++) { mpz_clear(mp_Lat[i]); }
3267       XFREE(mp_Lat);
3268     }
3269 
3270   /* compute certificate */
3271   if (certflag == 1)
3272     {
3273       if (mpz_cmp_ui(mp_D, 1) != 0)
3274 	nonsingSolvRNSMM(LeftSolu, n, 1, basislen, basis, ARNS, mp_Cert, \
3275 			 mp_NZ, mp_DZ);
3276       else
3277 	{
3278 	  for (i = 0; i < n; i++) { mpz_set_ui(mp_NZ[i], 0); }
3279 	  mpz_set_ui(mp_DZ, 1);
3280 	}
3281       for (i = 0; i< n+k+1; i++) { mpz_clear(mp_Cert[i]); } { XFREE(mp_Cert); }
3282     }
3283 
3284 #if HAVE_VERBOSE_MODE && HAVE_TIME_H
3285   tt = (double)(clock()-ti)/CLOCKS_PER_SEC;
3286   printf("          Kernel Basis, Certificate, Solution Finding Time: %f\n", \
3287 	 tt);
3288   fflush(stdout);
3289 #endif
3290 
3291   return;
3292 }
3293 
3294 
3295 
3296 /*
3297  * Calling Sequence:
3298  *   specialHermite(certflag, n, k, t, M, Cert)
3299  *
3300  * Summary:
3301  *   Perform unimodular transformation inplace in matrix M and return an
3302  *   certificate matrix Cert(optional) at the same time
3303  *
3304  * Description:
3305  *   Matrix M consists of:
3306  *
3307  *               k        t
3308  *         [          |        ]
3309  *         [          |        ]
3310  *         [  A^(-1)B | A^(-1)b] n
3311  *   M = s*[          |        ]
3312  *         [          |        ]
3313  *         [-------------------]
3314  *         [                   ]
3315  *         [                   ] k+t
3316  *         [     I_(k+t)       ]
3317  *         [                   ]
3318  *
3319  *   where A a n x n matrix, B a n x k matrix, b a n x t matrix, and I_(k+t) a
3320  *   k+t x k+t identity matrix.
3321  *
3322  *   The inplaced M has the properties:
3323  *    - upper triangular part of M[1..k, 1..k] is in hermite form
3324  *
3325  *          [g_1  *         * ]
3326  *          [    g_2   . .  * ]
3327  *      H = [          .    * ]
3328  *          [            .  * ]
3329  *          [              g_k]
3330  *
3331  *    - M[k+1, k+i] = s/d_i, where d_i is the minimum denominator of the
3332  *      system of linear equations [A | B]x = b[1..-1, i], i = 1..t
3333  *    - Vectors M[1..k, i] is reduced by doing modular M[k+1, i], i = k+1..k+t
3334  *
3335  *   The t x n+k+t certificate matrix Cert has the property:
3336  *    - For each row of Cert, fraction number computed by
3337  *      Cert[i, 1..-1].(A^(-1).b[1..-1, i]) has denominator d_i, i = 1..t.
3338  *      And Cert[i, 1..-1].(A^(-1).B) is an integer vector
3339  *
3340  * Input:
3341  *   certflag: 1/0, flag to indicate whether to compute certificate Cert
3342  *             or not. If certflag = 1, then compute the certificate.
3343  *             Otherwise, not compute the certificate.
3344  *          n: long, row dimension of A
3345  *          k: long, column dimension of B
3346  *          t: long, column dimension of b
3347  *          M: 1-dim mpz_t array length (n+k+t)*(k+t), n+k+t x k+t matrix M
3348  *
3349  * Output:
3350  *   inplaced mp_M
3351  *   Cert: 1-dim mpz_t array length t*(n+k+t), t x n+k+t certificate matrix
3352  *         Cert as above
3353  *
3354  * Note:
3355  *   the space of mp_Cert needs to be preallocated if certflag = 1.
3356  *   Otherwise, set mp_Cert = NULL
3357  *
3358  */
3359 
3360 void
specialHermite(const long certflag,const long n,const long k,const long t,mpz_t * M,mpz_t * Cert)3361 specialHermite (const long certflag, const long n, const long k, const long t,\
3362 		mpz_t *M, mpz_t *Cert)
3363 {
3364   long i, j, l;
3365   unsigned *C, *c;
3366   mpz_t s, g, p, q, p1, q1, tmpa, tmpb;
3367   mpz_t *P, *P1, *Q, *Q1, *tmpz;
3368 
3369   C = XMALLOC(unsigned, k*(n-1));
3370   c = XMALLOC(unsigned, n+t);
3371   mpz_init_set(s, _M(n+1,1));
3372   { mpz_init(g); mpz_init(p); mpz_init(q); mpz_init(p1); mpz_init(q1); }
3373   { mpz_init(tmpa); mpz_init(tmpb); }
3374   P = mpz_init_array(k);
3375   P1 = mpz_init_array(k);
3376   Q = mpz_init_array(k);
3377   Q1 = mpz_init_array(k);
3378   tmpz = mpz_init_array(n+t);
3379 
3380   for (i = 1; i <= k; i++)
3381     {
3382       /* compute one row of matrix C */
3383       for (j = i; j <= n+i-1; j++)
3384 	mpz_set(_tmpz(j-i+1), _M(j,i));
3385       migcdex(s, _tmpz(1), &_tmpz(2), n-1, &_C(i,1));
3386       for (j = 1; j <= n-1; j++)
3387 	{
3388 	  for (l = i; l <= k+t; l++)
3389 	    {
3390 	      mpz_addmul_ui(_M(i,l), _M(i+j,l), _C(i,j));
3391 	      mpz_mod(_M(i,l), _M(i,l), s);
3392 	    }
3393 	}
3394       /* compute gcdex of M(i,i) and M(n+i,i), apply the unimodular
3395 	 transformation to M and store the transformation to P, Q, P1, Q1 */
3396       mpz_neg(tmpa, s);
3397       mpz_gcdext(g, p, q, _M(i,i), tmpa);
3398       mpz_divexact(p1, s, g);	mpz_divexact(q1, _M(i,i), g);
3399       mpz_set(_P(i), p);   mpz_set(_Q(i), q);
3400       mpz_set(_P1(i), p1); mpz_set(_Q1(i), q1);
3401       mpz_set(_M(i,i), g); mpz_set_ui(_M(n+i,i), 0);
3402       for (j = i+1; j <= k+t; j++)
3403 	{
3404 	  mpz_mul(tmpa, p, _M(i,j));
3405 	  mpz_addmul(tmpa, q, _M(n+i,j));
3406 	  mpz_mul(_M(n+i,j), p1, _M(i,j));
3407 	  mpz_mod(_M(n+i,j), _M(n+i,j), s);
3408 	  mpz_mod(_M(i,j), tmpa, s);
3409 	}
3410       /* zero out the entries of column i of M below M(i,i) and store the
3411 	 quotients q into M */
3412       for (j = i+1; j <= n+i-1; j++)
3413 	{
3414 	  mpz_divexact(q, _M(j,i), _M(i,i));
3415 	  for (l = i+1; l <= k+t; l++)
3416 	    {
3417 	      mpz_submul(_M(j,l), q, _M(i,l));
3418 	      mpz_mod(_M(j,l), _M(j,l), s);
3419 	    }
3420 	  mpz_neg(_M(j,i), q);
3421 	}
3422     }
3423   for (i = 1; i <= t; i++)
3424     {
3425       for (j = k+1; j <= n+k+i-1; j++)
3426 	mpz_set(_tmpz(j-k), _M(j,k+i));
3427       migcdex(s, _tmpz(1), &_tmpz(2), n+i-2, &_c(1));
3428       for (j = k+2; j <= n+k+i-1; j++)
3429 	mpz_addmul_ui(_M(k+1,k+i), _M(j,k+i), _c(j-(k+1)));
3430       mpz_gcd(_M(k+1,k+i), _M(k+1,k+i), _M(n+k+i,k+i));
3431 
3432       if (certflag == 1)
3433 	{
3434 	  /* when gcd(M(k+1..n+k+i-1, k+i), s) = s then do not need to compute
3435 	     Cert(i, 1..-1).  carry on to compute next row of Cert */
3436 	  if (!mpz_cmp(_M(k+1,k+i), s)) { continue; }
3437 
3438 	  /* initializtion of Cert */
3439 	  mpz_set_ui(_Cert(i,k+1), 1);
3440 	  for (j = k+2; j <= n+k+i-1; j++)
3441 	    mpz_set_ui(_Cert(i,j), _c(j-(k+1)));
3442 
3443 	  /* apply the stored transforms to Cert in inversed sequence */
3444 	  for (j = k; j >= 1; j--)
3445 	    {
3446 	      for (l = j+1; l <= n+j-1; l++)
3447 		{
3448 		  mpz_addmul(_Cert(i,j), _Cert(i,l), _M(l,j));
3449 		  mpz_mod(_Cert(i,j), _Cert(i,j), s);
3450 		}
3451 	      mpz_mul(tmpa, _P(j), _Cert(i,j));
3452 	      mpz_addmul(tmpa, _P1(j), _Cert(i,n+j));
3453 	      mpz_mul(tmpb, _Q(j), _Cert(i,j));
3454 	      mpz_addmul(tmpb, _Q1(j), _Cert(i,n+j));
3455 	      mpz_mod(_Cert(i,n+j), tmpb, s);
3456 	      mpz_mod(_Cert(i,j), tmpa, s);
3457 	      for (l = j+1; l <= n+j-1; l++)
3458 		{
3459 		  mpz_addmul_ui(_Cert(i,l), _Cert(i,j), _C(j,l-j));
3460 		  mpz_mod(_Cert(i,l), _Cert(i,l), s);
3461 		}
3462 	    }
3463 	}
3464     }
3465   /* carry on to reduce the upper triangular entries in the first k columns
3466      of M using diagonal entries */
3467   for (i = 2; i <= k; i++)
3468     {
3469       for (j = 1; j <= i-1; j++)
3470 	{
3471 	  mpz_tdiv_qr(q, _M(j,i), _M(j,i), _M(i,i));
3472 	  for (l = i+1; l <= k+t; l++)
3473 	    {
3474 	      mpz_submul(_M(j,l), q, _M(i,l));
3475 	      mpz_mod(_M(j,l), _M(j,l), s);
3476 	    }
3477 	}
3478     }
3479   /* reduce the last t columns of M using M(k+1,j), j=k+1..k+t */
3480   for (i = 1; i <= t; i++)
3481     {
3482       for (j = 1; j < k+1; j++)
3483 	mpz_mod(_M(j,k+i), _M(j,k+i), _M(k+1,k+i));
3484     }
3485 
3486   { XFREE(C); XFREE(c); }
3487   { mpz_clear(s); mpz_clear(g); mpz_clear(tmpa); mpz_clear(tmpb); }
3488   { mpz_clear(p); mpz_clear(q); mpz_clear(p1); mpz_clear(q1); }
3489   for (i = 0; i < k; i++)
3490     { mpz_clear(P[i]); mpz_clear(P1[i]); mpz_clear(Q[i]); mpz_clear(Q1[i]); }
3491   { XFREE(P); XFREE(P1); XFREE(Q); XFREE(Q1); }
3492   for (i = 0; i < n+t; i++) { mpz_clear(tmpz[i]); } { XFREE(tmpz); }
3493   return;
3494 }
3495 
3496 
3497 /*
3498  * Calling Sequence:
3499  *   mpz_init_array(n)
3500  *
3501  * Summary:
3502  *   Allocate a mpz_t array length n dynamically and set all the entries in
3503  *   the array to be zero.
3504  *
3505  */
3506 
3507 mpz_t *
mpz_init_array(const long n)3508 mpz_init_array (const long n)
3509 {
3510   long i;
3511   mpz_t *t = XMALLOC(mpz_t, n);
3512   for (i = 0; i < n; i++) { mpz_init(t[i]); }
3513   return t;
3514 }
3515 
3516 
3517 
3518 /*
3519  * Calling Sequence:
3520  *   migcdex(N, a, b, n, c)
3521  *
3522  * Summary:
3523  *   Compute solutions to the modulo N extended GCD problem
3524  *
3525  * Description:
3526  *   Given two mpz_t integers a and N, mpz_t array b, this function computes
3527  *   non-negative integers c[0], ... , c[n] such that
3528  *   gcd(a + c[0]b[0] + ... + c[n]b[n], N) = gcd(a, b[0], ..., b[n], N).
3529  *
3530  * Input:
3531  *   N: mpz_t, explained above
3532  *   a: mpz_t, explained above
3533  *   b: 1-dim mpz_t array length n, explained above
3534  *   n: long, dimension of array b and c
3535  *
3536  * Output:
3537  *   c: 1-dim unsigned array length n, explained above
3538  *
3539  */
3540 
3541 void
migcdex(const mpz_t N,const mpz_t a,mpz_t * b,const long n,unsigned * c)3542 migcdex (const mpz_t N, const mpz_t a, mpz_t *b, const long n, unsigned *c)
3543 {
3544   long i, j;
3545   mpz_t gAN, gAbN, A;
3546 
3547   mpz_init(gAN);
3548   mpz_init(gAbN);
3549   mpz_init_set(A, a);
3550   mpz_gcd(gAbN, a, N);
3551   for (i = 0; i < n; i++)
3552     {
3553       mpz_gcd(gAbN, gAbN, b[i]);
3554       for (j = 0; ; j++)
3555 	{
3556 	  mpz_gcd(gAN, A, N);
3557 	  if (!mpz_cmp(gAbN, gAN)) { break; }
3558 	  mpz_add(A, A, b[i]);
3559 	}
3560       c[i] = j;
3561     }
3562 
3563   { mpz_clear(gAN); mpz_clear(gAbN); mpz_clear(A); }
3564   return;
3565 }
3566 
3567 
3568 
3569 /*
3570  * Calling Sequence:
3571  *   kernelBasis(n, k, t, mp_M, mp_N)
3572  *
3573  * Summary:
3574  *   Compute a kernel basis of a full row rank matrix
3575  *
3576  * Description:
3577  *   The function computes inplace a right integer kernel basis N of full
3578  *   row rank matrix [A | B] taking M, the matrix computed by function
3579  *   specialHermite, and N as input.
3580  *
3581  *   The input matrix N looks like
3582  *
3583  *              k
3584  *         [ A^(-1)B ] n
3585  *   N = s*[---------]
3586  *         [  -I_k   ] k
3587  *
3588  *   where A a n x n matrix, B a n x k matrix, I_k a k x k identity matrix.
3589  *
3590  *   N is inplaced by
3591  *
3592  *         [ A^(-1)B ]
3593  *   N = s*[---------].H^(-1)
3594  *         [  -I_k   ]
3595  *
3596  *   where k x k matrix H is the hermite form of M computed by specialHermite.
3597  *
3598  * Input:
3599  *      n: long, row dimension of A
3600  *      k: long, column dimension of B
3601  *      t: long, same as parameter passed to function specialHermite (usually 1)
3602  *   mp_M: 1-dim mpz_t array length (n+k+t)*(k+t), computed by function
3603  *         specialHermite
3604  *
3605  * Output:
3606  *   mp_N: 1-dim mpz_t array length (n+k)*k, representing a n+k x k kernel
3607  *         basis M, as shown above
3608  *
3609  * Note:
3610  *   the space of mp_N needs to be preallocated
3611  *
3612  */
3613 
3614 void
kernelBasis(const long n,const long k,const long t,mpz_t * mp_M,mpz_t * mp_N)3615 kernelBasis (const long n, const long k, const long t,  mpz_t *mp_M, mpz_t *mp_N)
3616 {
3617   long i, j, l;
3618   mpz_t mp_temp;
3619 
3620   mpz_init(mp_temp);
3621   for (i = 0; i < k; i++)
3622     {
3623       for (j = 0; j < i; j++)
3624 	/* column operation N[1..-1, i] = N[1..-1, i]-N[1..-1, j]*M[j, i] */
3625 	for (l = 0; l < n+k; l++)
3626 	  {
3627 	    mpz_mul(mp_temp, mp_N[l*k+j], mp_M[j*(k+t)+i]);
3628 	    mpz_sub(mp_N[l*k+i], mp_N[l*k+i], mp_temp);
3629 	  }
3630 
3631       /* column operation N[1..-1, i] = N[1..-1, i]/M[i, i] */
3632       for (l = 0; l < n+k; l++)
3633 	{
3634 	  mpz_divexact(mp_N[l*k+i], mp_N[l*k+i], mp_M[i*(k+t)+i]);
3635 	}
3636     }
3637 
3638   mpz_clear(mp_temp);
3639 }
3640 
3641