1 /*******************************************************************************
2 * Copyright (c) 2018, College of William & Mary
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the College of William & Mary nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COLLEGE OF WILLIAM & MARY BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * PRIMME: https://github.com/primme/primme
28 * Contact: Andreas Stathopoulos, a n d r e a s _at_ c s . w m . e d u
29 *******************************************************************************
30 * File: auxiliary.c
31 *
32 * Purpose - Miscellanea functions to copy and permuting matrices.
33 *
34 ******************************************************************************/
35
36 #ifndef THIS_FILE
37 #define THIS_FILE "../linalg/auxiliary.c"
38 #endif
39
40 #include <string.h> /* memmove */
41 #include <assert.h>
42 #include <math.h>
43 #include "numerical.h"
44
45 #ifdef SUPPORTED_TYPE
46
47
48 /******************************************************************************
49 * Function Num_matrix_astype - Create a matrix, y (if asked) and copy the
50 * matrix x into y (if asked).
51 *
52 * do_alloc do_copy xt==yt action
53 * --------------------------------------------
54 * 0 0 * do nothing
55 * 0 1 * copy x into y
56 * 1 0 0 create y of type yt
57 * 1 1 0 create y of type yt and copy x into y
58 * 1 * 1 assign *y = x
59 * -1 * 1 do nothing
60 * -1 0 0 destroy x
61 * -1 1 0 copy x into y and destroy x
62 *
63 * PARAMETERS
64 * ---------------------------
65 * x The source matrix
66 * m The number of rows of x
67 * n The number of columns of x
68 * ldx The leading dimension of x
69 * xt The datatype of x
70 * y On output y = x
71 * ldy The leading dimension of y
72 * yt The datatype of y
73 *
74 * NOTE: x and y *cannot* partially overlap
75 *
76 ******************************************************************************/
77
78 TEMPLATE_PLEASE
Num_matrix_astype_Sprimme(void * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_op_datatype xt,void ** y,PRIMME_INT * ldy,primme_op_datatype yt,int do_alloc,int do_copy,primme_context ctx)79 int Num_matrix_astype_Sprimme(void *x, PRIMME_INT m, PRIMME_INT n,
80 PRIMME_INT ldx, primme_op_datatype xt, void **y, PRIMME_INT *ldy,
81 primme_op_datatype yt, int do_alloc, int do_copy, primme_context ctx) {
82
83 /* Replace primme_op_default */
84
85 if (xt == primme_op_default) xt = PRIMME_OP_SCALAR;
86 if (yt == primme_op_default) yt = PRIMME_OP_SCALAR;
87
88 /* Call the function that y has the type of the SCALAR */
89
90 if (yt != PRIMME_OP_SCALAR) {
91 switch(yt) {
92 #ifdef SUPPORTED_HALF_TYPE
93 case primme_op_half: return Num_matrix_astype_Shprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
94 #endif
95 #ifndef PRIMME_WITHOUT_FLOAT
96 case primme_op_float: return Num_matrix_astype_Ssprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
97 #endif
98 case primme_op_double: return Num_matrix_astype_Sdprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
99 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
100 case primme_op_quad: return Num_matrix_astype_Sqprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
101 #endif
102 #ifdef USE_HOST
103 case primme_op_int: return Num_matrix_astype_iprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
104 #endif
105 default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
106 }
107 }
108
109 /* Quick exit */
110
111 if (xt == PRIMME_OP_SCALAR && do_alloc) {
112 *y = x;
113 if (ldy) *ldy = ldx;
114 return 0;
115 }
116
117 /* Create workspace for y and copy x on y */
118
119 SCALAR *y0 = NULL;
120 PRIMME_INT ldy0 = 0;
121 if (do_alloc > 0) {
122 Mem_keep_frame(
123 ctx); /* The next allocation will not be freed in this function */
124 CHKERR(Num_malloc_Sprimme(m * n, &y0, ctx));
125 *y = (void*)y0;
126 ldy0 = m;
127 if (ldy) *ldy = m;
128 } else {
129 y0 = (SCALAR*)*y;
130 ldy0 = (ldy ? *ldy : 1);
131 }
132
133 if (do_copy && x != NULL) {
134 CHKERR(Num_copy_Tmatrix_Sprimme(x, xt, m, n, ldx, y0, ldy0, ctx));
135 }
136
137 /* Destroy x if asked */
138
139 if (do_alloc < 0 && x != y0) CHKERR(Num_free_Sprimme((SCALAR *)x, ctx));
140
141 return 0;
142 }
143
144 #ifdef USE_HOST
145 #ifdef USE_DOUBLE
146
147 TEMPLATE_PLEASE
Num_matrix_astype_iprimme(void * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_op_datatype xt,void ** y,PRIMME_INT * ldy,primme_op_datatype yt,int do_alloc,int do_copy,primme_context ctx)148 int Num_matrix_astype_iprimme(void *x, PRIMME_INT m, PRIMME_INT n,
149 PRIMME_INT ldx, primme_op_datatype xt, void **y, PRIMME_INT *ldy,
150 primme_op_datatype yt, int do_alloc, int do_copy, primme_context ctx) {
151
152 /* Replace primme_op_default */
153
154 if (xt == primme_op_default) xt = primme_op_int;
155 if (yt == primme_op_default) yt = primme_op_int;
156
157 /* Call the function that y has the type of the SCALAR */
158
159 if (yt != primme_op_int) {
160 switch(yt) {
161 #ifdef SUPPORTED_HALF_TYPE
162 case primme_op_half: return Num_matrix_astype_Shprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
163 #endif
164 #ifndef PRIMME_WITHOUT_FLOAT
165 case primme_op_float: return Num_matrix_astype_Ssprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
166 #endif
167 case primme_op_double: return Num_matrix_astype_Sdprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
168 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
169 case primme_op_quad: return Num_matrix_astype_Sqprimme(x, m, n, ldx, xt, y, ldy, yt, do_alloc, do_copy, ctx);
170 #endif
171 default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
172 }
173 }
174
175 /* Quick exit */
176
177 if (xt == primme_op_int && do_alloc) {
178 *y = x;
179 if (ldy) *ldy = ldx;
180 return 0;
181 }
182
183 /* Create workspace for y and copy x on y */
184
185 int *y0 = NULL;
186 PRIMME_INT ldy0 = 0;
187 if (do_alloc > 0) {
188 Mem_keep_frame(
189 ctx); /* The next allocation will not be freed in this function */
190 CHKERR(Num_malloc_iprimme(m * n, &y0, ctx));
191 *y = (void*)y0;
192 ldy0 = m;
193 if (ldy) *ldy = m;
194 } else {
195 y0 = (int*)*y;
196 ldy0 = (ldy ? *ldy : 1);
197 }
198
199 if (do_copy && x != NULL) {
200 CHKERR(Num_copy_Tmatrix_iprimme(x, xt, m, n, ldx, y0, ldy0, ctx));
201 }
202
203 /* Destroy x if asked */
204
205 if (do_alloc < 0 && x != y0) CHKERR(Num_free_iprimme((int *)x, ctx));
206
207 return 0;
208 }
209
210 #endif /* USE_DOUBLE */
211 #endif /* USE_HOST */
212
213 /******************************************************************************
214 * Function Num_copy_matrix_astype - copy the matrix x into y.
215 *
216 * PARAMETERS
217 * ---------------------------
218 * x The source matrix
219 * xm0 The starting row
220 * xn0 The starting column
221 * m The number of rows of x
222 * n The number of columns of x
223 * ldx The leading dimension of x
224 * xt The datatype of x
225 * y On output y = x
226 * xm0 The starting row
227 * xn0 The starting column
228 * ldy The leading dimension of y
229 * yt The datatype of y
230 *
231 * NOTE: x and y *cannot* partially overlap
232 *
233 ******************************************************************************/
234
235 TEMPLATE_PLEASE
Num_copy_matrix_astype_Sprimme(void * x,PRIMME_INT xm0,PRIMME_INT xn0,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_op_datatype xt,void * y,PRIMME_INT ym0,PRIMME_INT yn0,PRIMME_INT ldy,primme_op_datatype yt,primme_context ctx)236 int Num_copy_matrix_astype_Sprimme(void *x, PRIMME_INT xm0, PRIMME_INT xn0,
237 PRIMME_INT m, PRIMME_INT n, PRIMME_INT ldx, primme_op_datatype xt,
238 void *y, PRIMME_INT ym0, PRIMME_INT yn0, PRIMME_INT ldy,
239 primme_op_datatype yt, primme_context ctx) {
240
241 /* Replace primme_op_default */
242
243 if (xt == primme_op_default) xt = PRIMME_OP_SCALAR;
244 if (yt == primme_op_default) yt = PRIMME_OP_SCALAR;
245
246 /* Call the function that y has the type of the SCALAR */
247
248 if (yt != PRIMME_OP_SCALAR) {
249 switch(yt) {
250 #ifdef SUPPORTED_HALF_TYPE
251 case primme_op_half: return Num_copy_matrix_astype_Shprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
252 #endif
253 #ifndef PRIMME_WITHOUT_FLOAT
254 case primme_op_float: return Num_copy_matrix_astype_Ssprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
255 #endif
256 case primme_op_double: return Num_copy_matrix_astype_Sdprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
257 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
258 case primme_op_quad: return Num_copy_matrix_astype_Sqprimme(x, xm0, xn0, m, n, ldx, xt, y, ym0, yn0, ldy, yt, ctx);
259 #endif
260 default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
261 }
262 }
263
264 size_t xt_size;
265 CHKERR(Num_sizeof_Sprimme(xt, &xt_size));
266 return Num_copy_Tmatrix_Sprimme(&((char *)x)[xt_size * (xm0 + ldx * xn0)],
267 xt, m, n, ldx, &((SCALAR *)y)[ym0 + ldy * yn0], ldy, ctx);
268 }
269
270
271 /******************************************************************************
272 * Function Num_sizeof_Sprimme - Return the size of an element with the given
273 * type.
274 *
275 * INPUT/OUTPUT PARAMETERS
276 * ---------------------------
277 * t Type
278 * s returned size of the type in bytes
279 *
280 * RETURN
281 * ------
282 * int error code
283 *
284 ******************************************************************************/
285
286 TEMPLATE_PLEASE
Num_sizeof_Sprimme(primme_op_datatype t,size_t * s)287 int Num_sizeof_Sprimme(primme_op_datatype t, size_t *s) {
288
289 if (t == primme_op_default) t = PRIMME_OP_SCALAR;
290
291 *s = 0;
292
293 switch(t) {
294 # ifdef SUPPORTED_HALF_TYPE
295 case primme_op_half: *s = sizeof(PRIMME_HALF); break;
296 # endif
297 case primme_op_float: *s = sizeof(float); break;
298 case primme_op_double: *s = sizeof(double); break;
299 # ifdef PRIMME_WITH_NATIVE_QUAD
300 case primme_op_quad: *s = sizeof(PRIMME_QUAD); break;
301 # endif
302 case primme_op_int: *s = sizeof(int); break;
303 default: return PRIMME_FUNCTION_UNAVAILABLE;
304 }
305
306 #ifdef USE_COMPLEX
307 *s *= 2;
308 #endif
309
310 return 0;
311 }
312
313 /******************************************************************************
314 * Function Num_machine_epsilon_Sprimme - Return the machine epsilon of the
315 * type.
316 *
317 * INPUT/OUTPUT PARAMETERS
318 * ---------------------------
319 * t Type
320 * eps The returned machine epsilon
321 *
322 * RETURN
323 * ------
324 * int error code
325 *
326 ******************************************************************************/
327
328 TEMPLATE_PLEASE
Num_machine_epsilon_Sprimme(primme_op_datatype t,double * eps)329 int Num_machine_epsilon_Sprimme(primme_op_datatype t, double *eps) {
330
331 /* Replace primme_op_default */
332
333 if (t == primme_op_default) t = PRIMME_OP_SCALAR;
334
335 /* Call the function that t has the type of the SCALAR */
336
337 if (t != PRIMME_OP_SCALAR) {
338 switch(t) {
339 #ifdef SUPPORTED_HALF_TYPE
340 case primme_op_half: return Num_machine_epsilon_Shprimme(t, eps);
341 #endif
342 #ifndef PRIMME_WITHOUT_FLOAT
343 case primme_op_float: return Num_machine_epsilon_Ssprimme(t, eps);
344 #endif
345 case primme_op_double: return Num_machine_epsilon_Sdprimme(t, eps);
346 #ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
347 case primme_op_quad: return Num_machine_epsilon_Sqprimme(t, eps);
348 #endif
349 default: return PRIMME_FUNCTION_UNAVAILABLE;
350 }
351 }
352
353 if (eps) *eps = MACHINE_EPSILON;
354
355 return 0;
356 }
357
358 #ifdef USE_HOST
359
360 /******************************************************************************
361 * Function Num_copy_matrix_conj - Copy the matrix x' into y
362 *
363 * PARAMETERS
364 * ---------------------------
365 * x The source matrix
366 * m The number of rows of x
367 * n The number of columns of x
368 * ldx The leading dimension of x
369 * y On output y = x
370 * ldy The leading dimension of y
371 *
372 * NOTE: x and y *can't* overlap
373 *
374 ******************************************************************************/
375
376 #if !defined(USE_HALFCOMPLEX) || defined(PRIMME_WITH_NATIVE_COMPLEX_HALF)
377 TEMPLATE_PLEASE
Num_copy_matrix_conj_Sprimme(SCALAR * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,SCALAR * y,PRIMME_INT ldy,primme_context ctx)378 int Num_copy_matrix_conj_Sprimme(SCALAR *x, PRIMME_INT m, PRIMME_INT n,
379 PRIMME_INT ldx, SCALAR *y, PRIMME_INT ldy, primme_context ctx) {
380 (void)ctx;
381
382 PRIMME_INT i, j;
383
384 assert(m == 0 || n == 0 || (ldx >= m && ldy >= n));
385
386 /* TODO: assert x and y don't overlap */
387 for (i = 0; i < n; i++)
388 for (j = 0; j < m; j++)
389 y[j * ldy + i] = CONJ(x[i * ldx + j]);
390
391 return 0;
392 }
393 #endif
394
395 #ifdef USE_DOUBLE
396 TEMPLATE_PLEASE
Num_zero_matrix_Tprimme(void * x,primme_op_datatype xt,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,primme_context ctx)397 int Num_zero_matrix_Tprimme(void *x, primme_op_datatype xt, PRIMME_INT m,
398 PRIMME_INT n, PRIMME_INT ldx, primme_context ctx) {
399
400 switch (xt) {
401 # ifdef SUPPORTED_HALF_TYPE
402 case primme_op_half: return Num_zero_matrix_hprimme((PRIMME_HALF*)x, m, n, ldx, ctx);
403 # endif
404 # ifndef PRIMME_WITHOUT_FLOAT
405 case primme_op_float: return Num_zero_matrix_sprimme((float*) x, m, n, ldx, ctx);
406 # endif
407 case primme_op_double: return Num_zero_matrix_dprimme((double*) x, m, n, ldx, ctx);
408 # ifdef PRIMME_WITH_NATIVE_COMPLEX_QUAD
409 case primme_op_quad: return Num_zero_matrix_qprimme((PRIME_QUAD*) x, m, n, ldx, ctx);
410 # endif
411 case primme_op_int: {
412 PRIMME_INT i, j;
413 int *xi = (int *)x;
414 for (i = 0; i < n; i++)
415 for (j = 0; j < m; j++) xi[i * ldx + j] = 0;
416 break;
417 }
418 default: CHKERR(PRIMME_FUNCTION_UNAVAILABLE);
419 }
420
421 return 0;
422 }
423 #endif /* USE_DOUBLE */
424
425 /******************************************************************************
426 * Function Num_copy_trimatrix - Copy the upper/lower triangular part of the
427 * matrix x into y
428 *
429 * PARAMETERS
430 * ---------------------------
431 * x The source matrix
432 * m The number of rows of x
433 * n The number of columns of x
434 * ldx The leading dimension of x
435 * ul if 0, copy the upper part; otherwise copy the lower part
436 * i0 The row index that diagonal starts from
437 * y On output y = x
438 * ldy The leading dimension of y
439 * zero If nonzero, zero the triangular part not copied
440 *
441 * NOTE: the distance between x and y can be less than ldx, or
442 * x and y *cannot* overlap at all
443 *
444 ******************************************************************************/
445
446 TEMPLATE_PLEASE
Num_copy_trimatrix_Sprimme(SCALAR * x,int m,int n,int ldx,int ul,int i0,SCALAR * y,int ldy,int zero)447 int Num_copy_trimatrix_Sprimme(SCALAR *x, int m, int n, int ldx, int ul,
448 int i0, SCALAR *y, int ldy, int zero) {
449
450 int i, j, jm;
451
452 assert(m == 0 || n == 0 || (ldx >= m && ldy >= m));
453 if (x == y) return 0;
454 if (ul == 0) {
455 /* Copy upper part */
456
457 if (ldx == ldy && (y > x ? y-x : x-y) < ldx) {
458 /* x and y overlap */
459 for (i=0; i<n; i++) {
460 memmove(&y[i*ldy], &x[i*ldx], sizeof(SCALAR)*min(i0+i+1, m));
461 /* zero lower part*/
462 if (zero) for (j=min(i0+i+1, m); j<m; j++) SET_ZERO(y[i*ldy+j]);
463 }
464 }
465 else {
466 /* x and y don't overlap */
467 for (i=0; i<n; i++) {
468 for (j=0, jm=min(i0+i+1, m); j<jm; j++)
469 y[i*ldy+j] = x[i*ldx+j];
470 /* zero lower part*/
471 if (zero) for (j=min(i0+i+1, m); j<m; j++) SET_ZERO(y[i*ldy+j]);
472 }
473 }
474 }
475 else {
476 /* Copy lower part */
477
478 if (ldx == ldy && (y > x ? y-x : x-y) < ldx) {
479 /* x and y overlap */
480 for (i=0; i<n; i++) {
481 memmove(&y[i*ldy+i0+i], &x[i*ldx+i0+i], sizeof(SCALAR)*(m-min(i0+i, m)));
482 /* zero upper part*/
483 if (zero) for (j=0, jm=min(i0+i, m); j<jm; j++) SET_ZERO(y[i*ldy+j]);
484 }
485 }
486 else {
487 /* x and y don't overlap */
488 for (i=0; i<n; i++) {
489 for (j=i+i0; j<m; j++)
490 y[i*ldy+j] = x[i*ldx+j];
491 /* zero upper part*/
492 if (zero) for (j=0, jm=min(i0+i, m); j<jm; j++) SET_ZERO(y[i*ldy+j]);
493 }
494 }
495 }
496
497 return 0;
498 }
499
500
501 /******************************************************************************
502 * Function Num_copy_trimatrix_compact - Copy the upper triangular part of the matrix x
503 * into y contiguously, i.e., y has all columns of x row-stacked
504 *
505 * PARAMETERS
506 * ---------------------------
507 * x The source upper triangular matrix
508 * m The number of rows of x
509 * n The number of columns of x
510 * ldx The leading dimension of x
511 * i0 The row index that diagonal starts from
512 * y On output y = x and nonzero elements of y are contiguous
513 * ly Output the final length of y
514 *
515 * NOTE: x and y *cannot* overlap
516 *
517 ******************************************************************************/
518
519 TEMPLATE_PLEASE
Num_copy_trimatrix_compact_Sprimme(SCALAR * x,PRIMME_INT m,int n,PRIMME_INT ldx,int i0,SCALAR * y,int * ly)520 int Num_copy_trimatrix_compact_Sprimme(SCALAR *x, PRIMME_INT m, int n,
521 PRIMME_INT ldx, int i0, SCALAR *y, int *ly) {
522
523 int i, j, k;
524
525 assert(m == 0 || n == 0 || ldx >= m);
526
527 for (i=0, k=0; i<n; i++)
528 for (j=0; j<=i+i0; j++)
529 y[k++] = x[i*ldx+j];
530 if (ly) *ly = k;
531
532 return 0;
533 }
534
535 /******************************************************************************
536 * Function Num_copy_compact_trimatrix - Copy y into the upper triangular part of the
537 * matrix x
538 *
539 * PARAMETERS
540 * ---------------------------
541 * x The source vector
542 * m The number of rows of y
543 * n The number of columns of y
544 * i0 The row index that diagonal starts from
545 * y On output the upper triangular part of y has x
546 * ldy The leading dimension of y
547 *
548 * NOTE: x and y *cannot* overlap
549 *
550 ******************************************************************************/
551
552 TEMPLATE_PLEASE
Num_copy_compact_trimatrix_Sprimme(SCALAR * x,PRIMME_INT m,int n,int i0,SCALAR * y,int ldy)553 int Num_copy_compact_trimatrix_Sprimme(SCALAR *x, PRIMME_INT m, int n, int i0,
554 SCALAR *y, int ldy) {
555
556 int i, j, k;
557
558 assert(m == 0 || n == 0 || (ldy >= m && m >= n));
559
560 for (i=n-1, k=(n+1)*n/2+i0*n-1; i>=0; i--)
561 for (j=i+i0; j>=0; j--)
562 y[i*ldy+j] = x[k--];
563
564 return 0;
565 }
566
567 /*******************************************************************************
568 * Subroutine compute_submatrix - This subroutine computes the nX x nX submatrix
569 * R = X'*H*X, where H stores the upper triangular part of a symmetric matrix,
570 * or H is a full non-Hermitian matrix.
571 *
572 * Input parameters
573 * ----------------
574 * X The coefficient vectors retained from the previous iteration
575 *
576 * nX Number of columns of X
577 *
578 * H Matrix
579 *
580 * nH Dimension of H
581 *
582 * ldH Leading dimension of H
583 *
584 * isherm whether H is Hermitian
585 *
586 * ldR Leading dimension of R
587 *
588 * Output parameters
589 * -----------------
590 * R - nX x nX matrix computed
591 *
592 ******************************************************************************/
593
594 #if !defined(USE_HALF) && !defined(USE_HALFCOMPLEX)
595 TEMPLATE_PLEASE
compute_submatrix_Sprimme(SCALAR * X,int nX,int ldX,SCALAR * H,int nH,int ldH,int isherm,SCALAR * R,int ldR,primme_context ctx)596 int compute_submatrix_Sprimme(SCALAR *X, int nX, int ldX, SCALAR *H, int nH,
597 int ldH, int isherm, SCALAR *R, int ldR, primme_context ctx) {
598
599 if (nH == 0 || nX == 0) return 0;
600
601 SCALAR *rwork;
602 CHKERR(Num_malloc_Sprimme((size_t)nH * (size_t)nX, &rwork, ctx));
603
604 /* rwork = H * X */
605
606 Num_zero_matrix_Sprimme(rwork, nH, nX, nH, ctx);
607 if (isherm) {
608 CHKERR(Num_hemm_Sprimme(
609 "L", "U", nH, nX, 1.0, H, ldH, X, ldX, 0.0, rwork, nH, ctx));
610 } else {
611 CHKERR(Num_gemm_Sprimme(
612 "N", "N", nH, nX, nH, 1.0, H, ldH, X, ldX, 0.0, rwork, nH, ctx));
613 }
614
615 /* R = X' * rwork */
616
617 Num_zero_matrix_Sprimme(R, nX, nX, ldR, ctx);
618 CHKERR(Num_gemm_Sprimme(
619 "C", "N", nX, nX, nH, 1.0, X, ldX, rwork, nH, 0.0, R, ldR, ctx));
620 CHKERR(Num_free_Sprimme(rwork, ctx));
621
622 return 0;
623 }
624 #endif /* !defined(USE_HALF) && !defined(USE_HALFCOMPLEX) */
625
626 #endif /* USE_HOST */
627
628 /******************************************************************************
629 * Function Num_copy_matrix_columns - Copy the matrix x(:,xin) into y(:,yin)
630 *
631 * PARAMETERS
632 * ---------------------------
633 * x The source matrix
634 * m The number of rows of x
635 * xin The column indices to copy
636 * n The number of columns of x
637 * ldx The leading dimension of x
638 * y On output y(:,yin) = x(:,xin)
639 * yin The column indices of y to be modified
640 * ldy The leading dimension of y
641 *
642 * NOTE: x(:,xin) and y(:,yin) *cannot* overlap
643 *
644 ******************************************************************************/
645
646 TEMPLATE_PLEASE
Num_copy_matrix_columns_Sprimme(SCALAR * x,PRIMME_INT m,int * xin,int n,PRIMME_INT ldx,SCALAR * y,int * yin,PRIMME_INT ldy,primme_context ctx)647 int Num_copy_matrix_columns_Sprimme(SCALAR *x, PRIMME_INT m, int *xin, int n,
648 PRIMME_INT ldx, SCALAR *y, int *yin,
649 PRIMME_INT ldy, primme_context ctx) {
650
651 int i;
652
653 /* TODO: assert x and y don't overlap */
654 for (i = 0; i < n; i++) {
655 Num_copy_Sprimme(m, &x[(xin ? xin[i] : i) * ldx], 1,
656 &y[(yin ? yin[i] : i) * ldy], 1, ctx);
657 }
658
659 return 0;
660 }
661
662 /******************************************************************************
663 * Function Num_copy_matrix_rows - Copy the matrix x(xin,:) into y(yin,:)
664 *
665 * PARAMETERS
666 * ---------------------------
667 * x The source matrix
668 * xim The row indices to copy
669 * m The number of rows of x
670 * n The number of columns of x
671 * ldx The leading dimension of x
672 * y On output y(yin,:) = x(xin,:)
673 * yim The row indices of y to be modified
674 * ldy The leading dimension of y
675 *
676 * NOTE: x(xin,:) and y(yin,:) *cannot* overlap
677 *
678 ******************************************************************************/
679
680 TEMPLATE_PLEASE
Num_copy_matrix_rows_Sprimme(SCALAR * x,int * xim,int m,int n,PRIMME_INT ldx,SCALAR * y,int * yim,PRIMME_INT ldy,primme_context ctx)681 int Num_copy_matrix_rows_Sprimme(SCALAR *x, int *xim, int m, int n,
682 PRIMME_INT ldx, SCALAR *y, int *yim,
683 PRIMME_INT ldy, primme_context ctx) {
684
685 int i;
686
687 /* TODO: assert x and y don't overlap */
688 for (i = 0; i < m; i++) {
689 Num_copy_Sprimme(
690 n, &x[xim ? xim[i] : i], ldx, &y[yim ? yim[i] : i], ldy, ctx);
691 }
692
693 return 0;
694 }
695
696 /******************************************************************************
697 * Subroutine permute_vecs - This routine permutes a set of vectors according
698 * to a permutation array perm.
699 *
700 * INPUT ARRAYS AND PARAMETERS
701 * ---------------------------
702 * m, n, ld The number of rows and columns and the leading dimension of
703 *vecs perm The permutation of the columns rwork Temporary space
704 *of size the number of rows iwork Temporary space of size the number
705 *of columns
706 *
707 * INPUT/OUTPUT ARRAYS
708 * -------------------
709 * vecs The matrix whose columns will be reordered
710 *
711 ******************************************************************************/
712
713 TEMPLATE_PLEASE
permute_vecs_Sprimme(SCALAR * vecs,PRIMME_INT m,int n,PRIMME_INT ld,int * perm_,primme_context ctx)714 int permute_vecs_Sprimme(SCALAR *vecs, PRIMME_INT m, int n, PRIMME_INT ld,
715 int *perm_, primme_context ctx) {
716
717 int currentIndex; /* Index of vector in sorted order */
718 int sourceIndex; /* Position of out-of-order vector in original order */
719 int destinationIndex; /* Position of out-of-order vector in sorted order */
720 int tempIndex; /* Used to swap */
721 int *perm; /* A copy of perm_ */
722 SCALAR *rwork; /* vector buffer */
723
724 CHKERR(Num_malloc_iprimme(n, &perm, ctx));
725 CHKERR(Num_malloc_Sprimme(m, &rwork, ctx));
726
727 /* Check perm_ is a permutation */
728
729 #ifndef NDEBUG
730 for (tempIndex = 0; tempIndex < n; tempIndex++)
731 perm[tempIndex] = 0;
732 for (tempIndex = 0; tempIndex < n; tempIndex++) {
733 assert(0 <= perm_[tempIndex] && perm_[tempIndex] < n);
734 perm[perm_[tempIndex]] = 1;
735 }
736 for (tempIndex = 0; tempIndex < n; tempIndex++)
737 assert(perm[tempIndex] == 1);
738 #endif
739
740 /* Copy of perm_ into perm, to avoid to modify the input permutation */
741
742 for (tempIndex = 0; tempIndex < n; tempIndex++)
743 perm[tempIndex] = perm_[tempIndex];
744
745 /* Continue until all vectors are in the sorted order */
746
747 currentIndex = 0;
748 while (1) {
749
750 /* Find a vector that does not belong in its original position */
751 while ((currentIndex < n) && (perm[currentIndex] == currentIndex)) {
752 currentIndex++;
753 }
754
755 /* Return if they are in the sorted order */
756 if (currentIndex >= n) {
757 break;
758 }
759
760 /* Copy the vector to a buffer for swapping */
761 Num_copy_Sprimme(m, &vecs[currentIndex * ld], 1, rwork, 1, ctx);
762
763 destinationIndex = currentIndex;
764 /* Copy vector perm[destinationIndex] into position destinationIndex */
765
766 while (perm[destinationIndex] != currentIndex) {
767
768 sourceIndex = perm[destinationIndex];
769 Num_copy_Sprimme(m, &vecs[sourceIndex * ld], 1,
770 &vecs[destinationIndex * ld], 1, ctx);
771 tempIndex = perm[destinationIndex];
772 perm[destinationIndex] = destinationIndex;
773 destinationIndex = tempIndex;
774 }
775
776 /* Copy the vector from the buffer to where it belongs */
777 Num_copy_Sprimme(m, rwork, 1, &vecs[destinationIndex * ld], 1, ctx);
778 perm[destinationIndex] = destinationIndex;
779
780 currentIndex++;
781 }
782
783 /* Check permutation */
784 for (currentIndex = 0; currentIndex < n; currentIndex++)
785 assert(perm[currentIndex] == currentIndex);
786
787 CHKERR(Num_free_iprimme(perm, ctx));
788 CHKERR(Num_free_Sprimme(rwork, ctx));
789
790 return 0;
791 }
792
793 #ifdef USE_DOUBLE
794 TEMPLATE_PLEASE
permute_vecs_iprimme(int * vecs,int n,int * perm_,primme_context ctx)795 int permute_vecs_iprimme(int *vecs, int n, int *perm_, primme_context ctx) {
796
797 int currentIndex; /* Index of vector in sorted order */
798 int sourceIndex; /* Position of out-of-order vector in original order */
799 int destinationIndex; /* Position of out-of-order vector in sorted order */
800 int tempIndex; /* Used to swap */
801 int *perm; /* A copy of perm_ */
802 int aux;
803
804 /* Check that perm_ and iwork do not overlap */
805
806 CHKERR(Num_malloc_iprimme(n, &perm, ctx));
807
808 /* Check perm_ is a permutation */
809
810 #ifndef NDEBUG
811 for (tempIndex=0; tempIndex<n; tempIndex++) perm[tempIndex] = 0;
812 for (tempIndex=0; tempIndex<n; tempIndex++) {
813 assert(0 <= perm_[tempIndex] && perm_[tempIndex] < n);
814 perm[perm_[tempIndex]] = 1;
815 }
816 for (tempIndex=0; tempIndex<n; tempIndex++) assert(perm[tempIndex] == 1);
817 #endif
818
819 /* Copy of perm_ into perm, to avoid to modify the input permutation */
820
821 for (tempIndex=0; tempIndex<n; tempIndex++)
822 perm[tempIndex] = perm_[tempIndex];
823
824 /* Continue until all vectors are in the sorted order */
825
826 currentIndex = 0;
827 while (1) {
828
829 /* Find a vector that does not belong in its original position */
830 while ((currentIndex < n) && (perm[currentIndex] == currentIndex)) {
831 currentIndex++;
832 }
833
834 /* Return if they are in the sorted order */
835 if (currentIndex >= n) {
836 break;
837 }
838
839 /* Copy the vector to a buffer for swapping */
840 aux = vecs[currentIndex];
841
842 destinationIndex = currentIndex;
843 /* Copy vector perm[destinationIndex] into position destinationIndex */
844
845 while (perm[destinationIndex] != currentIndex) {
846
847 sourceIndex = perm[destinationIndex];
848 vecs[destinationIndex] = vecs[sourceIndex];
849 tempIndex = perm[destinationIndex];
850 perm[destinationIndex] = destinationIndex;
851 destinationIndex = tempIndex;
852 }
853
854 /* Copy the vector from the buffer to where it belongs */
855 vecs[destinationIndex] = aux;
856 perm[destinationIndex] = destinationIndex;
857
858 currentIndex++;
859 }
860
861 /* Check permutation */
862 for (currentIndex=0; currentIndex < n; currentIndex++)
863 assert(perm[currentIndex] == currentIndex);
864
865 CHKERR(Num_free_iprimme(perm, ctx));
866
867 return 0;
868 }
869 #endif
870
871
872 /******************************************************************************
873 * Subroutine Num_compact_vecs - copy certain columns of matrix into another
874 * matrix, i.e., work = vecs(perm). If avoidCopy and perm indices are
875 * consecutive the routine returns a reference in vecs and doesn't copy.
876 *
877 *
878 * PARAMETERS
879 * ---------------------------
880 *
881 * vecs The matrix
882 * m The number of rows of vecs
883 * n The number of columns of to copy
884 * ld The leading dimension of vecs
885 * perm The indices of columns to copy
886 * work The columns are copied to this matrix
887 * ldwork The leading dimension of work
888 * avoidCopy If nonzero, the copy is avoid
889 *
890 * return Reference of a matrix with the columns ordered as perm
891 *
892 ******************************************************************************/
893
894 TEMPLATE_PLEASE
Num_compact_vecs_Sprimme(SCALAR * vecs,PRIMME_INT m,int n,PRIMME_INT ld,int * perm,SCALAR * work,PRIMME_INT ldwork,int avoidCopy,primme_context ctx)895 SCALAR* Num_compact_vecs_Sprimme(SCALAR *vecs, PRIMME_INT m, int n,
896 PRIMME_INT ld, int *perm, SCALAR *work, PRIMME_INT ldwork,
897 int avoidCopy, primme_context ctx) {
898
899 int i;
900
901 if (avoidCopy) {
902 for (i=0; i<n-1 && perm[i]+1 == perm[i+1]; i++);
903 if (i >= n-1) return &vecs[ld*perm[0]];
904 }
905
906 for (i=0; i < n; i++) {
907 Num_copy_matrix_Sprimme(&vecs[perm[i] * ld], m, 1, ld, &work[i * ldwork],
908 ld, ctx);
909 }
910 return work;
911 }
912
913 /******************************************************************************
914 * Function Num_scale_matrix - Scale the column of the matrix x into y
915 *
916 * PARAMETERS
917 * ---------------------------
918 * x The source matrix
919 * m The number of rows of x
920 * n The number of columns of x
921 * ldx The leading dimension of x
922 * s The scales
923 * y On output y = x
924 * ldy The leading dimension of y
925 *
926 * NOTE: x and y *can* overlap
927 *
928 ******************************************************************************/
929
930 TEMPLATE_PLEASE
Num_scale_matrix_Sprimme(SCALAR * x,PRIMME_INT m,PRIMME_INT n,PRIMME_INT ldx,HREAL * s,SCALAR * y,PRIMME_INT ldy,primme_context ctx)931 int Num_scale_matrix_Sprimme(SCALAR *x, PRIMME_INT m, PRIMME_INT n,
932 PRIMME_INT ldx, HREAL *s, SCALAR *y, PRIMME_INT ldy, primme_context ctx) {
933
934 PRIMME_INT i;
935
936 assert(m == 0 || n == 0 || (ldx >= m && ldy >= m));
937
938 /* Copy the matrix some columns backward, and other cases */
939 /* TODO: assert x and y don't overlap */
940 for (i=0; i<n; i++) {
941 Num_copy_Sprimme(m, &x[ldx*i], 1, &y[ldy*i], 1, ctx);
942 Num_scal_Sprimme(m, s[i], &y[ldy*i], 1, ctx);
943 }
944
945 return 0;
946 }
947
948 #endif /* SUPPORTED_TYPE */
949