1 /*******************************************************************************
2  * Copyright (c) 2018, College of William & Mary
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the College of William & Mary nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COLLEGE OF WILLIAM & MARY BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * PRIMME: https://github.com/primme/primme
28  * Contact: Andreas Stathopoulos, a n d r e a s _at_ c s . w m . e d u
29  *******************************************************************************
30  * File: auxiliary.c
31  *
32  * Purpose - Miscellanea functions to copy and permuting matrices.
33  *
34  ******************************************************************************/
35 
36 #ifndef THIS_FILE
37 #define THIS_FILE "../linalg/auxiliary.c"
38 #endif
39 
40 #include <string.h>   /* memmove */
41 #include <assert.h>
42 #include <math.h>
43 #include "numerical.h"
44 
45 #ifdef SUPPORTED_TYPE
46 
47 
48 /******************************************************************************
49  * Function Num_matrix_astype - Create a matrix, y (if asked) and copy the
50  *    matrix x into y (if asked).
51  *
52  *    do_alloc do_copy xt==yt action
53  *    --------------------------------------------
54  *        0       0      *    do nothing
55  *        0       1      *    copy x into y
56  *        1       0      0    create y of type yt
57  *        1       1      0    create y of type yt and copy x into y
58  *        1       *      1    assign *y = x
59  *       -1       *      1    do nothing
60  *       -1       0      0    destroy x
61  *       -1       1      0    copy x into y and destroy x
62  *
63  * PARAMETERS
64  * ---------------------------
65  * x           The source matrix
66  * m           The number of rows of x
67  * n           The number of columns of x
68  * ldx         The leading dimension of x
69  * xt          The datatype of x
70  * y           On output y = x
71  * ldy         The leading dimension of y
72  * yt          The datatype of y
73  *
74  * NOTE: x and y *cannot* partially overlap
75  *
76  ******************************************************************************/
77 
78 TEMPLATE_PLEASE
Num_matrix_astype_Sprimme(void * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_op_datatype xt,void ** y,PRIMME_INT * ldy,primme_op_datatype yt,int do_alloc,int do_copy,primme_context ctx)79 int Num_matrix_astype_Sprimme(void *x, PRIMME_INT m, PRIMME_INT n,
80       PRIMME_INT ldx, primme_op_datatype xt, void **y, PRIMME_INT *ldy,
81       primme_op_datatype yt, int do_alloc, int do_copy, primme_context ctx) {
82 
83    /* Replace primme_op_default */
84 
85    if (xt == primme_op_default) xt = PRIMME_OP_SCALAR;
86    if (yt == primme_op_default) yt = PRIMME_OP_SCALAR;
87 
88    /* Call the function that y has the type of the SCALAR */
89 
90    if (yt != PRIMME_OP_SCALAR) {
91       switch(yt) {
92 #ifdef SUPPORTED_HALF_TYPE
93       case primme_op_half:   return Num_matrix_astype_Shprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
94 #endif
95 #ifndef PRIMME_WITHOUT_FLOAT
96       case primme_op_float:  return Num_matrix_astype_Ssprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
97 #endif
98       case primme_op_double: return Num_matrix_astype_Sdprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
99 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
100       case primme_op_quad:   return Num_matrix_astype_Sqprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
101 #endif
102 #ifdef USE_HOST
103       case primme_op_int:    return Num_matrix_astype_iprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
104 #endif
105       default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
106       }
107    }
108 
109    /* Quick exit */
110 
111    if (xt == PRIMME_OP_SCALAR && do_alloc) {
112       *y = x;
113       if (ldy) *ldy = ldx;
114       return 0;
115    }
116 
117    /* Create workspace for y and copy x on y */
118 
119    SCALAR *y0 = NULL;
120    PRIMME_INT ldy0 = 0;
121    if (do_alloc > 0) {
122       Mem_keep_frame(
123             ctx); /* The next allocation will not be freed in this function */
124       CHKERR(Num_malloc_Sprimme(m * n, &y0, ctx));
125       *y = (void*)y0;
126       ldy0 = m;
127       if (ldy) *ldy = m;
128    } else {
129       y0 = (SCALAR*)*y;
130       ldy0 = (ldy ? *ldy : 1);
131    }
132 
133    if (do_copy && x != NULL) {
134       CHKERR(Num_copy_Tmatrix_Sprimme(x, xt, m, n, ldx, y0, ldy0, ctx));
135    }
136 
137    /* Destroy x if asked */
138 
139    if (do_alloc < 0 && x != y0) CHKERR(Num_free_Sprimme((SCALAR *)x, ctx));
140 
141    return 0;
142 }
143 
144 #ifdef USE_HOST
145 #ifdef USE_DOUBLE
146 
147 TEMPLATE_PLEASE
Num_matrix_astype_iprimme(void * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_op_datatype xt,void ** y,PRIMME_INT * ldy,primme_op_datatype yt,int do_alloc,int do_copy,primme_context ctx)148 int Num_matrix_astype_iprimme(void *x, PRIMME_INT m, PRIMME_INT n,
149       PRIMME_INT ldx, primme_op_datatype xt, void **y, PRIMME_INT *ldy,
150       primme_op_datatype yt, int do_alloc, int do_copy, primme_context ctx) {
151 
152    /* Replace primme_op_default */
153 
154    if (xt == primme_op_default) xt = primme_op_int;
155    if (yt == primme_op_default) yt = primme_op_int;
156 
157    /* Call the function that y has the type of the SCALAR */
158 
159    if (yt != primme_op_int) {
160       switch(yt) {
161 #ifdef SUPPORTED_HALF_TYPE
162       case primme_op_half:   return Num_matrix_astype_Shprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
163 #endif
164 #ifndef PRIMME_WITHOUT_FLOAT
165       case primme_op_float:  return Num_matrix_astype_Ssprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
166 #endif
167       case primme_op_double: return Num_matrix_astype_Sdprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
168 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
169       case primme_op_quad:   return Num_matrix_astype_Sqprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
170 #endif
171       default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
172       }
173    }
174 
175    /* Quick exit */
176 
177    if (xt == primme_op_int && do_alloc) {
178       *y = x;
179       if (ldy) *ldy = ldx;
180       return 0;
181    }
182 
183    /* Create workspace for y and copy x on y */
184 
185    int *y0 = NULL;
186    PRIMME_INT ldy0 = 0;
187    if (do_alloc > 0) {
188       Mem_keep_frame(
189             ctx); /* The next allocation will not be freed in this function */
190       CHKERR(Num_malloc_iprimme(m * n, &y0, ctx));
191       *y = (void*)y0;
192       ldy0 = m;
193       if (ldy) *ldy = m;
194    } else {
195       y0 = (int*)*y;
196       ldy0 = (ldy ? *ldy : 1);
197    }
198 
199    if (do_copy && x != NULL) {
200       CHKERR(Num_copy_Tmatrix_iprimme(x, xt, m, n, ldx, y0, ldy0, ctx));
201    }
202 
203    /* Destroy x if asked */
204 
205    if (do_alloc < 0 && x != y0) CHKERR(Num_free_iprimme((int *)x, ctx));
206 
207    return 0;
208 }
209 
210 #endif /* USE_DOUBLE */
211 #endif /* USE_HOST */
212 
213 /******************************************************************************
214  * Function Num_copy_matrix_astype - copy the matrix x into y.
215  *
216  * PARAMETERS
217  * ---------------------------
218  * x           The source matrix
219  * xm0         The starting row
220  * xn0         The starting column
221  * m           The number of rows of x
222  * n           The number of columns of x
223  * ldx         The leading dimension of x
224  * xt          The datatype of x
225  * y           On output y = x
226  * xm0         The starting row
227  * xn0         The starting column
228  * ldy         The leading dimension of y
229  * yt          The datatype of y
230  *
231  * NOTE: x and y *cannot* partially overlap
232  *
233  ******************************************************************************/
234 
235 TEMPLATE_PLEASE
Num_copy_matrix_astype_Sprimme(void * x,PRIMME_INT xm0,PRIMME_INT xn0,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_op_datatype xt,void * y,PRIMME_INT ym0,PRIMME_INT yn0,PRIMME_INT ldy,primme_op_datatype yt,primme_context ctx)236 int Num_copy_matrix_astype_Sprimme(void *x, PRIMME_INT xm0, PRIMME_INT xn0,
237       PRIMME_INT m, PRIMME_INT n, PRIMME_INT ldx, primme_op_datatype xt,
238       void *y, PRIMME_INT ym0, PRIMME_INT yn0, PRIMME_INT ldy,
239       primme_op_datatype yt, primme_context ctx) {
240 
241    /* Replace primme_op_default */
242 
243    if (xt == primme_op_default) xt = PRIMME_OP_SCALAR;
244    if (yt == primme_op_default) yt = PRIMME_OP_SCALAR;
245 
246    /* Call the function that y has the type of the SCALAR */
247 
248    if (yt != PRIMME_OP_SCALAR) {
249       switch(yt) {
250 #ifdef SUPPORTED_HALF_TYPE
251       case primme_op_half:   return Num_copy_matrix_astype_Shprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
252 #endif
253 #ifndef PRIMME_WITHOUT_FLOAT
254       case primme_op_float:  return Num_copy_matrix_astype_Ssprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
255 #endif
256       case primme_op_double: return Num_copy_matrix_astype_Sdprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
257 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
258       case primme_op_quad:   return Num_copy_matrix_astype_Sqprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
259 #endif
260       default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
261       }
262    }
263 
264    size_t xt_size;
265    CHKERR(Num_sizeof_Sprimme(xt, &xt_size));
266    return Num_copy_Tmatrix_Sprimme(&((char *)x)[xt_size * (xm0 + ldx * xn0)],
267          xt, m, n, ldx, &((SCALAR *)y)[ym0 + ldy * yn0], ldy, ctx);
268 }
269 
270 
271 /******************************************************************************
272  * Function Num_sizeof_Sprimme - Return the size of an element with the given
273  *    type.
274  *
275  * INPUT/OUTPUT PARAMETERS
276  * ---------------------------
277  * t    Type
278  * s    returned size of the type in bytes
279  *
280  * RETURN
281  * ------
282  * int  error code
283  *
284  ******************************************************************************/
285 
286 TEMPLATE_PLEASE
Num_sizeof_Sprimme(primme_op_datatype t,size_t * s)287 int Num_sizeof_Sprimme(primme_op_datatype t, size_t *s) {
288 
289    if (t == primme_op_default) t = PRIMME_OP_SCALAR;
290 
291    *s = 0;
292 
293    switch(t) {
294 #  ifdef SUPPORTED_HALF_TYPE
295    case primme_op_half:    *s = sizeof(PRIMME_HALF); break;
296 #  endif
297    case primme_op_float:   *s = sizeof(float); break;
298    case primme_op_double:  *s = sizeof(double); break;
299 #  ifdef PRIMME_WITH_NATIVE_QUAD
300    case primme_op_quad:    *s = sizeof(PRIMME_QUAD); break;
301 #  endif
302    case primme_op_int:     *s = sizeof(int); break;
303    default:                return PRIMME_FUNCTION_UNAVAILABLE;
304    }
305 
306 #ifdef USE_COMPLEX
307    *s *= 2;
308 #endif
309 
310    return 0;
311 }
312 
313 /******************************************************************************
314  * Function Num_machine_epsilon_Sprimme - Return the machine epsilon of the
315  *    type.
316  *
317  * INPUT/OUTPUT PARAMETERS
318  * ---------------------------
319  * t    Type
320  * eps  The returned machine epsilon
321  *
322  * RETURN
323  * ------
324  * int  error code
325  *
326  ******************************************************************************/
327 
328 TEMPLATE_PLEASE
Num_machine_epsilon_Sprimme(primme_op_datatype t,double * eps)329 int Num_machine_epsilon_Sprimme(primme_op_datatype t, double *eps) {
330 
331    /* Replace primme_op_default */
332 
333    if (t == primme_op_default) t = PRIMME_OP_SCALAR;
334 
335    /* Call the function that t has the type of the SCALAR */
336 
337    if (t != PRIMME_OP_SCALAR) {
338       switch(t) {
339 #ifdef SUPPORTED_HALF_TYPE
340       case primme_op_half:   return Num_machine_epsilon_Shprimme(t, eps);
341 #endif
342 #ifndef PRIMME_WITHOUT_FLOAT
343       case primme_op_float:  return Num_machine_epsilon_Ssprimme(t, eps);
344 #endif
345       case primme_op_double: return Num_machine_epsilon_Sdprimme(t, eps);
346 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
347       case primme_op_quad:   return Num_machine_epsilon_Sqprimme(t, eps);
348 #endif
349       default: return PRIMME_FUNCTION_UNAVAILABLE;
350       }
351    }
352 
353    if (eps) *eps = MACHINE_EPSILON;
354 
355    return 0;
356 }
357 
358 #ifdef USE_HOST
359 
360 /******************************************************************************
361  * Function Num_copy_matrix_conj - Copy the matrix x' into y
362  *
363  * PARAMETERS
364  * ---------------------------
365  * x           The source matrix
366  * m           The number of rows of x
367  * n           The number of columns of x
368  * ldx         The leading dimension of x
369  * y           On output y = x
370  * ldy         The leading dimension of y
371  *
372  * NOTE: x and y *can't* overlap
373  *
374  ******************************************************************************/
375 
376 #if !defined(USE_HALFCOMPLEX) || defined(PRIMME_WITH_NATIVE_COMPLEX_HALF)
377 TEMPLATE_PLEASE
Num_copy_matrix_conj_Sprimme(SCALAR * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,SCALAR * y,PRIMME_INT ldy,primme_context ctx)378 int Num_copy_matrix_conj_Sprimme(SCALAR *x, PRIMME_INT m, PRIMME_INT n,
379       PRIMME_INT ldx, SCALAR *y, PRIMME_INT ldy, primme_context ctx) {
380    (void)ctx;
381 
382    PRIMME_INT i, j;
383 
384    assert(m == 0 || n == 0 || (ldx >= m && ldy >= n));
385 
386    /* TODO: assert x and y don't overlap */
387    for (i = 0; i < n; i++)
388       for (j = 0; j < m; j++)
389          y[j * ldy + i] = CONJ(x[i * ldx + j]);
390 
391    return 0;
392 }
393 #endif
394 
395 #ifdef USE_DOUBLE
396 TEMPLATE_PLEASE
Num_zero_matrix_Tprimme(void * x,primme_op_datatype xt,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_context ctx)397 int Num_zero_matrix_Tprimme(void *x, primme_op_datatype xt, PRIMME_INT m,
398       PRIMME_INT n, PRIMME_INT ldx, primme_context ctx) {
399 
400    switch (xt) {
401 #  ifdef SUPPORTED_HALF_TYPE
402       case primme_op_half:   return Num_zero_matrix_hprimme((PRIMME_HALF*)x, m, n, ldx, ctx);
403 #  endif
404 #  ifndef PRIMME_WITHOUT_FLOAT
405       case primme_op_float:  return Num_zero_matrix_sprimme((float*)      x, m, n, ldx, ctx);
406 #  endif
407       case primme_op_double: return Num_zero_matrix_dprimme((double*)     x, m, n, ldx, ctx);
408 #  ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
409       case primme_op_quad:   return Num_zero_matrix_qprimme((PRIME_QUAD*) x, m, n, ldx, ctx);
410 #  endif
411       case primme_op_int: {
412          PRIMME_INT i, j;
413          int *xi = (int *)x;
414          for (i = 0; i < n; i++)
415             for (j = 0; j < m; j++) xi[i * ldx + j] = 0;
416          break;
417       }
418       default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
419    }
420 
421    return 0;
422 }
423 #endif /* USE_DOUBLE */
424 
425 /******************************************************************************
426  * Function Num_copy_trimatrix - Copy the upper/lower triangular part of the
427  *    matrix x into y
428  *
429  * PARAMETERS
430  * ---------------------------
431  * x           The source matrix
432  * m           The number of rows of x
433  * n           The number of columns of x
434  * ldx         The leading dimension of x
435  * ul          if 0, copy the upper part; otherwise copy the lower part
436  * i0          The row index that diagonal starts from
437  * y           On output y = x
438  * ldy         The leading dimension of y
439  * zero        If nonzero, zero the triangular part not copied
440  *
441  * NOTE: the distance between x and y can be less than ldx, or
442  *       x and y *cannot* overlap at all
443  *
444  ******************************************************************************/
445 
446 TEMPLATE_PLEASE
Num_copy_trimatrix_Sprimme(SCALAR * x,int m,int n,int ldx,int ul,int i0,SCALAR * y,int ldy,int zero)447 int Num_copy_trimatrix_Sprimme(SCALAR *x, int m, int n, int ldx, int ul,
448       int i0, SCALAR *y, int ldy, int zero) {
449 
450    int i, j, jm;
451 
452    assert(m == 0 || n == 0 || (ldx >= m && ldy >= m));
453    if (x == y) return 0;
454    if (ul == 0) {
455       /* Copy upper part */
456 
457       if (ldx == ldy && (y > x ? y-x : x-y) < ldx) {
458          /* x and y overlap */
459          for (i=0; i<n; i++) {
460             memmove(&y[i*ldy], &x[i*ldx], sizeof(SCALAR)*min(i0+i+1, m));
461             /* zero lower part*/
462             if (zero) for (j=min(i0+i+1, m); j<m; j++) SET_ZERO(y[i*ldy+j]);
463          }
464       }
465       else {
466          /* x and y don't overlap */
467          for (i=0; i<n; i++) {
468             for (j=0, jm=min(i0+i+1, m); j<jm; j++)
469                y[i*ldy+j] = x[i*ldx+j];
470             /* zero lower part*/
471             if (zero) for (j=min(i0+i+1, m); j<m; j++) SET_ZERO(y[i*ldy+j]);
472          }
473       }
474    }
475    else {
476       /* Copy lower part */
477 
478       if (ldx == ldy && (y > x ? y-x : x-y) < ldx) {
479          /* x and y overlap */
480          for (i=0; i<n; i++) {
481             memmove(&y[i*ldy+i0+i], &x[i*ldx+i0+i], sizeof(SCALAR)*(m-min(i0+i, m)));
482             /* zero upper part*/
483             if (zero) for (j=0, jm=min(i0+i, m); j<jm; j++) SET_ZERO(y[i*ldy+j]);
484          }
485       }
486       else {
487          /* x and y don't overlap */
488          for (i=0; i<n; i++) {
489             for (j=i+i0; j<m; j++)
490                y[i*ldy+j] = x[i*ldx+j];
491             /* zero upper part*/
492             if (zero) for (j=0, jm=min(i0+i, m); j<jm; j++) SET_ZERO(y[i*ldy+j]);
493          }
494       }
495    }
496 
497    return 0;
498 }
499 
500 
501 /******************************************************************************
502  * Function Num_copy_trimatrix_compact - Copy the upper triangular part of the matrix x
503  *    into y contiguously, i.e., y has all columns of x row-stacked
504  *
505  * PARAMETERS
506  * ---------------------------
507  * x           The source upper triangular matrix
508  * m           The number of rows of x
509  * n           The number of columns of x
510  * ldx         The leading dimension of x
511  * i0          The row index that diagonal starts from
512  * y           On output y = x and nonzero elements of y are contiguous
513  * ly          Output the final length of y
514  *
515  * NOTE: x and y *cannot* overlap
516  *
517  ******************************************************************************/
518 
519 TEMPLATE_PLEASE
Num_copy_trimatrix_compact_Sprimme(SCALAR * x,PRIMME_INT m,int n,PRIMME_INT ldx,int i0,SCALAR * y,int * ly)520 int Num_copy_trimatrix_compact_Sprimme(SCALAR *x, PRIMME_INT m, int n,
521       PRIMME_INT ldx, int i0, SCALAR *y, int *ly) {
522 
523    int i, j, k;
524 
525    assert(m == 0 || n == 0 || ldx >= m);
526 
527    for (i=0, k=0; i<n; i++)
528       for (j=0; j<=i+i0; j++)
529          y[k++] = x[i*ldx+j];
530    if (ly) *ly = k;
531 
532    return 0;
533 }
534 
535 /******************************************************************************
536  * Function Num_copy_compact_trimatrix - Copy y into the upper triangular part of the
537  *    matrix x
538  *
539  * PARAMETERS
540  * ---------------------------
541  * x           The source vector
542  * m           The number of rows of y
543  * n           The number of columns of y
544  * i0          The row index that diagonal starts from
545  * y           On output the upper triangular part of y has x
546  * ldy         The leading dimension of y
547  *
548  * NOTE: x and y *cannot* overlap
549  *
550  ******************************************************************************/
551 
552 TEMPLATE_PLEASE
Num_copy_compact_trimatrix_Sprimme(SCALAR * x,PRIMME_INT m,int n,int i0,SCALAR * y,int ldy)553 int Num_copy_compact_trimatrix_Sprimme(SCALAR *x, PRIMME_INT m, int n, int i0,
554       SCALAR *y, int ldy) {
555 
556    int i, j, k;
557 
558    assert(m == 0 || n == 0 || (ldy >= m && m >= n));
559 
560    for (i=n-1, k=(n+1)*n/2+i0*n-1; i>=0; i--)
561       for (j=i+i0; j>=0; j--)
562          y[i*ldy+j] = x[k--];
563 
564    return 0;
565 }
566 
567 /*******************************************************************************
568  * Subroutine compute_submatrix - This subroutine computes the nX x nX submatrix
569  *    R = X'*H*X, where H stores the upper triangular part of a symmetric matrix,
570  *    or H is a full non-Hermitian matrix.
571  *
572  * Input parameters
573  * ----------------
574  * X        The coefficient vectors retained from the previous iteration
575  *
576  * nX       Number of columns of X
577  *
578  * H        Matrix
579  *
580  * nH       Dimension of H
581  *
582  * ldH      Leading dimension of H
583  *
584  * isherm   whether H is Hermitian
585  *
586  * ldR      Leading dimension of R
587  *
588  * Output parameters
589  * -----------------
590  * R - nX x nX matrix computed
591  *
592  ******************************************************************************/
593 
594 #if !defined(USE_HALF) && !defined(USE_HALFCOMPLEX)
595 TEMPLATE_PLEASE
compute_submatrix_Sprimme(SCALAR * X,int nX,int ldX,SCALAR * H,int nH,int ldH,int isherm,SCALAR * R,int ldR,primme_context ctx)596 int compute_submatrix_Sprimme(SCALAR *X, int nX, int ldX, SCALAR *H, int nH,
597       int ldH, int isherm, SCALAR *R, int ldR, primme_context ctx) {
598 
599    if (nH == 0 || nX == 0) return 0;
600 
601    SCALAR *rwork;
602    CHKERR(Num_malloc_Sprimme((size_t)nH * (size_t)nX, &rwork, ctx));
603 
604    /* rwork = H * X */
605 
606    Num_zero_matrix_Sprimme(rwork, nH, nX, nH, ctx);
607    if (isherm) {
608       CHKERR(Num_hemm_Sprimme(
609             "L", "U", nH, nX, 1.0, H, ldH, X, ldX, 0.0, rwork, nH, ctx));
610    } else {
611       CHKERR(Num_gemm_Sprimme(
612             "N", "N", nH, nX, nH, 1.0, H, ldH, X, ldX, 0.0, rwork, nH, ctx));
613    }
614 
615    /* R = X' * rwork */
616 
617    Num_zero_matrix_Sprimme(R, nX, nX, ldR, ctx);
618    CHKERR(Num_gemm_Sprimme(
619          "C", "N", nX, nX, nH, 1.0, X, ldX, rwork, nH, 0.0, R, ldR, ctx));
620    CHKERR(Num_free_Sprimme(rwork, ctx));
621 
622   return 0;
623 }
624 #endif /* !defined(USE_HALF) && !defined(USE_HALFCOMPLEX) */
625 
626 #endif /* USE_HOST */
627 
628 /******************************************************************************
629  * Function Num_copy_matrix_columns - Copy the matrix x(:,xin) into y(:,yin)
630  *
631  * PARAMETERS
632  * ---------------------------
633  * x           The source matrix
634  * m           The number of rows of x
635  * xin         The column indices to copy
636  * n           The number of columns of x
637  * ldx         The leading dimension of x
638  * y           On output y(:,yin) = x(:,xin)
639  * yin         The column indices of y to be modified
640  * ldy         The leading dimension of y
641  *
642  * NOTE: x(:,xin) and y(:,yin) *cannot* overlap
643  *
644  ******************************************************************************/
645 
646 TEMPLATE_PLEASE
Num_copy_matrix_columns_Sprimme(SCALAR * x,PRIMME_INT m,int * xin,int n,PRIMME_INT ldx,SCALAR * y,int * yin,PRIMME_INT ldy,primme_context ctx)647 int Num_copy_matrix_columns_Sprimme(SCALAR *x, PRIMME_INT m, int *xin, int n,
648                                      PRIMME_INT ldx, SCALAR *y, int *yin,
649                                      PRIMME_INT ldy, primme_context ctx) {
650 
651    int i;
652 
653    /* TODO: assert x and y don't overlap */
654    for (i = 0; i < n; i++) {
655       Num_copy_Sprimme(m, &x[(xin ? xin[i] : i) * ldx], 1,
656             &y[(yin ? yin[i] : i) * ldy], 1, ctx);
657    }
658 
659    return 0;
660 }
661 
662 /******************************************************************************
663  * Function Num_copy_matrix_rows - Copy the matrix x(xin,:) into y(yin,:)
664  *
665  * PARAMETERS
666  * ---------------------------
667  * x           The source matrix
668  * xim         The row indices to copy
669  * m           The number of rows of x
670  * n           The number of columns of x
671  * ldx         The leading dimension of x
672  * y           On output y(yin,:) = x(xin,:)
673  * yim         The row indices of y to be modified
674  * ldy         The leading dimension of y
675  *
676  * NOTE: x(xin,:) and y(yin,:) *cannot* overlap
677  *
678  ******************************************************************************/
679 
680 TEMPLATE_PLEASE
Num_copy_matrix_rows_Sprimme(SCALAR * x,int * xim,int m,int n,PRIMME_INT ldx,SCALAR * y,int * yim,PRIMME_INT ldy,primme_context ctx)681 int Num_copy_matrix_rows_Sprimme(SCALAR *x, int *xim, int m, int n,
682                                      PRIMME_INT ldx, SCALAR *y, int *yim,
683                                      PRIMME_INT ldy, primme_context ctx) {
684 
685    int i;
686 
687    /* TODO: assert x and y don't overlap */
688    for (i = 0; i < m; i++) {
689       Num_copy_Sprimme(
690             n, &x[xim ? xim[i] : i], ldx, &y[yim ? yim[i] : i], ldy, ctx);
691    }
692 
693    return 0;
694 }
695 
696 /******************************************************************************
697  * Subroutine permute_vecs - This routine permutes a set of vectors according
698  *            to a permutation array perm.
699  *
700  * INPUT ARRAYS AND PARAMETERS
701  * ---------------------------
702  * m, n, ld    The number of rows and columns and the leading dimension of
703  *vecs perm        The permutation of the columns rwork       Temporary space
704  *of size the number of rows iwork       Temporary space of size the number
705  *of columns
706  *
707  * INPUT/OUTPUT ARRAYS
708  * -------------------
709  * vecs        The matrix whose columns will be reordered
710  *
711  ******************************************************************************/
712 
713 TEMPLATE_PLEASE
permute_vecs_Sprimme(SCALAR * vecs,PRIMME_INT m,int n,PRIMME_INT ld,int * perm_,primme_context ctx)714 int permute_vecs_Sprimme(SCALAR *vecs, PRIMME_INT m, int n, PRIMME_INT ld,
715                          int *perm_, primme_context ctx) {
716 
717    int currentIndex;     /* Index of vector in sorted order                   */
718    int sourceIndex;      /* Position of out-of-order vector in original order */
719    int destinationIndex; /* Position of out-of-order vector in sorted order */
720    int tempIndex;        /* Used to swap                                      */
721    int *perm;            /* A copy of perm_                                   */
722    SCALAR *rwork;        /* vector buffer */
723 
724    CHKERR(Num_malloc_iprimme(n, &perm, ctx));
725    CHKERR(Num_malloc_Sprimme(m, &rwork, ctx));
726 
727    /* Check perm_ is a permutation */
728 
729 #ifndef NDEBUG
730    for (tempIndex = 0; tempIndex < n; tempIndex++)
731       perm[tempIndex] = 0;
732    for (tempIndex = 0; tempIndex < n; tempIndex++) {
733       assert(0 <= perm_[tempIndex] && perm_[tempIndex] < n);
734       perm[perm_[tempIndex]] = 1;
735    }
736    for (tempIndex = 0; tempIndex < n; tempIndex++)
737       assert(perm[tempIndex] == 1);
738 #endif
739 
740    /* Copy of perm_ into perm, to avoid to modify the input permutation */
741 
742    for (tempIndex = 0; tempIndex < n; tempIndex++)
743       perm[tempIndex] = perm_[tempIndex];
744 
745    /* Continue until all vectors are in the sorted order */
746 
747    currentIndex = 0;
748    while (1) {
749 
750       /* Find a vector that does not belong in its original position */
751       while ((currentIndex < n) && (perm[currentIndex] == currentIndex)) {
752          currentIndex++;
753       }
754 
755       /* Return if they are in the sorted order */
756       if (currentIndex >= n) {
757          break;
758       }
759 
760       /* Copy the vector to a buffer for swapping */
761       Num_copy_Sprimme(m, &vecs[currentIndex * ld], 1, rwork, 1, ctx);
762 
763       destinationIndex = currentIndex;
764       /* Copy vector perm[destinationIndex] into position destinationIndex */
765 
766       while (perm[destinationIndex] != currentIndex) {
767 
768          sourceIndex = perm[destinationIndex];
769          Num_copy_Sprimme(m, &vecs[sourceIndex * ld], 1,
770                           &vecs[destinationIndex * ld], 1, ctx);
771          tempIndex = perm[destinationIndex];
772          perm[destinationIndex] = destinationIndex;
773          destinationIndex = tempIndex;
774       }
775 
776       /* Copy the vector from the buffer to where it belongs */
777       Num_copy_Sprimme(m, rwork, 1, &vecs[destinationIndex * ld], 1, ctx);
778       perm[destinationIndex] = destinationIndex;
779 
780       currentIndex++;
781    }
782 
783    /* Check permutation */
784    for (currentIndex = 0; currentIndex < n; currentIndex++)
785       assert(perm[currentIndex] == currentIndex);
786 
787    CHKERR(Num_free_iprimme(perm, ctx));
788    CHKERR(Num_free_Sprimme(rwork, ctx));
789 
790    return 0;
791 }
792 
793 #ifdef USE_DOUBLE
794 TEMPLATE_PLEASE
permute_vecs_iprimme(int * vecs,int n,int * perm_,primme_context ctx)795 int permute_vecs_iprimme(int *vecs, int n, int *perm_, primme_context ctx) {
796 
797    int currentIndex;     /* Index of vector in sorted order                   */
798    int sourceIndex;      /* Position of out-of-order vector in original order */
799    int destinationIndex; /* Position of out-of-order vector in sorted order   */
800    int tempIndex;        /* Used to swap                                      */
801    int *perm;            /* A copy of perm_                                   */
802    int aux;
803 
804    /* Check that perm_ and iwork do not overlap */
805 
806    CHKERR(Num_malloc_iprimme(n, &perm, ctx));
807 
808    /* Check perm_ is a permutation */
809 
810 #ifndef NDEBUG
811    for (tempIndex=0; tempIndex<n; tempIndex++) perm[tempIndex] = 0;
812    for (tempIndex=0; tempIndex<n; tempIndex++) {
813       assert(0 <= perm_[tempIndex] && perm_[tempIndex] < n);
814       perm[perm_[tempIndex]] = 1;
815    }
816    for (tempIndex=0; tempIndex<n; tempIndex++) assert(perm[tempIndex] == 1);
817 #endif
818 
819    /* Copy of perm_ into perm, to avoid to modify the input permutation */
820 
821    for (tempIndex=0; tempIndex<n; tempIndex++)
822       perm[tempIndex] = perm_[tempIndex];
823 
824    /* Continue until all vectors are in the sorted order */
825 
826    currentIndex = 0;
827    while (1) {
828 
829       /* Find a vector that does not belong in its original position */
830       while ((currentIndex < n) && (perm[currentIndex] == currentIndex)) {
831          currentIndex++;
832       }
833 
834       /* Return if they are in the sorted order */
835       if (currentIndex >= n) {
836          break;
837       }
838 
839       /* Copy the vector to a buffer for swapping */
840       aux = vecs[currentIndex];
841 
842       destinationIndex = currentIndex;
843       /* Copy vector perm[destinationIndex] into position destinationIndex */
844 
845       while (perm[destinationIndex] != currentIndex) {
846 
847          sourceIndex = perm[destinationIndex];
848          vecs[destinationIndex] = vecs[sourceIndex];
849          tempIndex = perm[destinationIndex];
850          perm[destinationIndex] = destinationIndex;
851          destinationIndex = tempIndex;
852       }
853 
854       /* Copy the vector from the buffer to where it belongs */
855       vecs[destinationIndex] = aux;
856       perm[destinationIndex] = destinationIndex;
857 
858       currentIndex++;
859    }
860 
861    /* Check permutation */
862    for (currentIndex=0; currentIndex < n; currentIndex++)
863       assert(perm[currentIndex] == currentIndex);
864 
865    CHKERR(Num_free_iprimme(perm, ctx));
866 
867    return 0;
868 }
869 #endif
870 
871 
872 /******************************************************************************
873  * Subroutine Num_compact_vecs - copy certain columns of matrix into another
874  *       matrix, i.e., work = vecs(perm). If avoidCopy and perm indices are
875  *       consecutive the routine returns a reference in vecs and doesn't copy.
876  *
877  *
878  * PARAMETERS
879  * ---------------------------
880  *
881  * vecs        The matrix
882  * m           The number of rows of vecs
883  * n           The number of columns of to copy
884  * ld          The leading dimension of vecs
885  * perm        The indices of columns to copy
886  * work        The columns are copied to this matrix
887  * ldwork      The leading dimension of work
888  * avoidCopy   If nonzero, the copy is avoid
889  *
890  * return      Reference of a matrix with the columns ordered as perm
891  *
892  ******************************************************************************/
893 
894 TEMPLATE_PLEASE
Num_compact_vecs_Sprimme(SCALAR * vecs,PRIMME_INT m,int n,PRIMME_INT ld,int * perm,SCALAR * work,PRIMME_INT ldwork,int avoidCopy,primme_context ctx)895 SCALAR* Num_compact_vecs_Sprimme(SCALAR *vecs, PRIMME_INT m, int n,
896       PRIMME_INT ld, int *perm, SCALAR *work, PRIMME_INT ldwork,
897       int avoidCopy, primme_context ctx) {
898 
899    int i;
900 
901    if (avoidCopy) {
902       for (i=0; i<n-1 && perm[i]+1 == perm[i+1]; i++);
903       if (i >= n-1) return &vecs[ld*perm[0]];
904    }
905 
906    for (i=0; i < n; i++) {
907      Num_copy_matrix_Sprimme(&vecs[perm[i] * ld], m, 1, ld, &work[i * ldwork],
908                              ld, ctx);
909    }
910    return work;
911 }
912 
913 /******************************************************************************
914  * Function Num_scale_matrix - Scale the column of the matrix x into y
915  *
916  * PARAMETERS
917  * ---------------------------
918  * x           The source matrix
919  * m           The number of rows of x
920  * n           The number of columns of x
921  * ldx         The leading dimension of x
922  * s           The scales
923  * y           On output y = x
924  * ldy         The leading dimension of y
925  *
926  * NOTE: x and y *can* overlap
927  *
928  ******************************************************************************/
929 
930 TEMPLATE_PLEASE
Num_scale_matrix_Sprimme(SCALAR * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,HREAL * s,SCALAR * y,PRIMME_INT ldy,primme_context ctx)931 int Num_scale_matrix_Sprimme(SCALAR *x, PRIMME_INT m, PRIMME_INT n,
932       PRIMME_INT ldx, HREAL *s, SCALAR *y, PRIMME_INT ldy, primme_context ctx) {
933 
934    PRIMME_INT i;
935 
936    assert(m == 0 || n == 0 || (ldx >= m && ldy >= m));
937 
938    /* Copy the matrix some columns backward, and other cases */
939    /* TODO: assert x and y don't overlap */
940    for (i=0; i<n; i++) {
941       Num_copy_Sprimme(m, &x[ldx*i], 1, &y[ldy*i], 1, ctx);
942       Num_scal_Sprimme(m, s[i], &y[ldy*i], 1, ctx);
943    }
944 
945    return 0;
946 }
947 
948 #endif /* SUPPORTED_TYPE */
949