1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "blas_extended.h"
4 #include "blas_extended_test.h"
5
BLAS_ssymm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,float * alpha,int alpha_flag,float * beta,int beta_flag,float * a,int lda,float * b,int ldb,float * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)6 void BLAS_ssymm_testgen(int norm, enum blas_order_type order,
7 enum blas_uplo_type uplo, enum blas_side_type side,
8 int m, int n, int randomize, float *alpha,
9 int alpha_flag, float *beta, int beta_flag, float *a,
10 int lda, float *b, int ldb, float *c, int ldc,
11 int *seed, double *head_r_true, double *tail_r_true)
12
13 /*
14 * Purpose
15 * =======
16 *
17 * Generates the test inputs to BLAS_ssymm{_x}
18 *
19 * Arguments
20 * =========
21 *
22 * norm (input) int
23 * = -1: the vectors are scaled with norms near underflow.
24 * = 0: the vectors have norms of order 1.
25 * = 1: the vectors are scaled with norms near overflow.
26 *
27 * order (input) enum blas_side_type
28 * storage format of the matrices
29 *
30 * uplo (input) enum blas_uplo_type
31 * which half of the symmetric matrix a is to be stored.
32 *
33 * side (input) enum blas_side_type
34 * which side of matrix b matrix a is to be multiplied.
35 *
36 * m n (input) int
37 * sizes of matrices a, b, c:
38 * matrix a is m-by-m for left multiplication
39 * n-by-n otherwise,
40 * matrices b, c are m-by-n.
41 *
42 * randomize (input) int
43 * if 0, entries in matrices A, B will be chosen for
44 * maximum cancellation, but with less randomness.
45 * if 1, every entry in the matrix A, B will be
46 * random.
47 *
48 * alpha (input/output) float*
49 * if alpha_flag = 1, alpha is input.
50 * if alpha_flag = 0, alpha is output.
51 *
52 * alpha_flag (input) int
53 * = 0: alpha is free, and is output.
54 * = 1: alpha is fixed on input.
55 *
56 * beta (input/output) float*
57 * if beta_flag = 1, beta is input.
58 * if beta_flag = 0, beta is output.
59 *
60 * beta_flag (input) int
61 * = 0: beta is free, and is output.
62 * = 1: beta is fixed on input.
63 *
64 * a (input/output) float*
65 *
66 * lda (input) lda
67 * leading dimension of matrix A.
68 *
69 * b (input/output) float*
70 *
71 * ldb (input) int
72 * leading dimension of matrix B.
73 *
74 * c (input/output) float*
75 * generated matrix C that will be used as an input to SYMM.
76 *
77 * ldc (input) int
78 * leading dimension of matrix C.
79 *
80 * seed (input/output) int *
81 * seed for the random number generator.
82 *
83 * double (output) *head_r_true
84 * the leading part of the truth in double-double.
85 *
86 * double (output) *tail_r_true
87 * the trailing part of the truth in double-double
88 *
89 */
90 {
91
92 int i, j;
93 int cij, ci;
94 int bij, bi;
95 int aij, ai;
96 int inccij, incci;
97 int incbij, incbi;
98 int incaij, incai;
99 int inca, incb;
100 int m_i, n_i;
101
102 float c_elem;
103 float a_elem;
104 float b_elem;
105 double head_r_true_elem, tail_r_true_elem;
106
107 float *a_vec;
108 float *b_vec;
109
110 float *c_i = c;
111 float *alpha_i = alpha;
112 float *beta_i = beta;
113 float *a_i = a;
114 float *b_i = b;
115
116 if (side == blas_left_side) {
117 m_i = m;
118 n_i = n;
119 } else {
120 m_i = n;
121 n_i = m;
122 }
123
124 inca = incb = 1;
125
126
127 a_vec = (float *) blas_malloc(m_i * sizeof(float));
128 if (m_i > 0 && a_vec == NULL) {
129 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
130 }
131 for (i = 0; i < m_i * inca; i += inca) {
132 a_vec[i] = 0.0;
133 }
134 b_vec = (float *) blas_malloc(m_i * sizeof(float));
135 if (m_i > 0 && b_vec == NULL) {
136 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
137 }
138 for (i = 0; i < m_i * incb; i += incb) {
139 b_vec[i] = 0.0;
140 }
141
142 if ((order == blas_colmajor && side == blas_left_side) ||
143 (order == blas_rowmajor && side == blas_right_side)) {
144 incci = 1;
145 inccij = ldc;
146 } else {
147 incci = ldc;
148 inccij = 1;
149 }
150
151
152
153
154
155 if (randomize == 0) {
156 /* First fill in the first row of A and the first column/row of B */
157
158 BLAS_sdot_testgen(m_i, 0, 0, norm, blas_no_conj,
159 alpha, alpha_flag, beta, beta_flag,
160 b_vec, a_vec, seed, &c_elem,
161 &head_r_true_elem, &tail_r_true_elem);
162
163 cij = 0;
164 c_i[cij] = c_elem;
165 head_r_true[cij] = head_r_true_elem;
166 tail_r_true[cij] = tail_r_true_elem;
167
168 /* Copy a_vec to first row of A */
169 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
170
171 /* set every column of B to be b_vec */
172 for (j = 0; j < n_i; j++) {
173 if (side == blas_left_side)
174 sge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
175 else
176 sge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
177 }
178
179 /* Fill in rest of matrix A */
180 cij = incci;
181 for (i = 1; i < m_i; i++, cij += incci) {
182 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
183 BLAS_sdot_testgen(m_i, i, m_i - i, norm,
184 blas_no_conj, alpha, 1,
185 beta, 1, b_vec, a_vec, seed,
186 &c_elem, &head_r_true_elem, &tail_r_true_elem);
187
188 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
189
190 c_i[cij] = c_elem;
191 head_r_true[cij] = head_r_true_elem;
192 tail_r_true[cij] = tail_r_true_elem;
193 }
194
195 /* Now fill in c and r_true */
196 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
197 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
198 c_elem = c_i[ci];
199 c_i[cij] = c_elem;
200 head_r_true[cij] = head_r_true[ci];
201 tail_r_true[cij] = tail_r_true[ci];
202 }
203 }
204 } else {
205
206
207
208
209
210
211
212 if (alpha_flag == 0) {
213 c_elem = xrand(seed);
214 alpha_i[0] = c_elem;
215 }
216 if (beta_flag == 0) {
217 c_elem = xrand(seed);
218 beta_i[0] = c_elem;
219 }
220
221 if ((order == blas_colmajor && side == blas_left_side) ||
222 (order == blas_rowmajor && side == blas_right_side)) {
223 incai = incbi = 1;
224 incbij = ldb;
225 incaij = lda;
226 } else {
227 incai = lda;
228 incbi = ldb;
229 incaij = incbij = 1;
230 }
231
232
233
234
235
236
237 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
238 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
239 a_elem = xrand(seed);
240 a_i[aij] = a_elem;
241 }
242 }
243
244 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
245 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
246 b_elem = xrand(seed);
247 b_i[bij] = b_elem;
248 }
249 }
250
251 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
252 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
253
254
255 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
256
257 if (side == blas_left_side)
258 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
259 else
260 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
261
262
263
264 BLAS_sdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
265 beta, 1, b_vec, a_vec, seed,
266 &c_elem, &head_r_true_elem, &tail_r_true_elem);
267
268 c_i[cij] = c_elem;
269 head_r_true[cij] = head_r_true_elem;
270 tail_r_true[cij] = tail_r_true_elem;
271 }
272 }
273
274
275
276 }
277
278 blas_free(a_vec);
279 blas_free(b_vec);
280 }
BLAS_dsymm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,double * alpha,int alpha_flag,double * beta,int beta_flag,double * a,int lda,double * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)281 void BLAS_dsymm_testgen(int norm, enum blas_order_type order,
282 enum blas_uplo_type uplo, enum blas_side_type side,
283 int m, int n, int randomize, double *alpha,
284 int alpha_flag, double *beta, int beta_flag,
285 double *a, int lda, double *b, int ldb, double *c,
286 int ldc, int *seed, double *head_r_true,
287 double *tail_r_true)
288
289 /*
290 * Purpose
291 * =======
292 *
293 * Generates the test inputs to BLAS_dsymm{_x}
294 *
295 * Arguments
296 * =========
297 *
298 * norm (input) int
299 * = -1: the vectors are scaled with norms near underflow.
300 * = 0: the vectors have norms of order 1.
301 * = 1: the vectors are scaled with norms near overflow.
302 *
303 * order (input) enum blas_side_type
304 * storage format of the matrices
305 *
306 * uplo (input) enum blas_uplo_type
307 * which half of the symmetric matrix a is to be stored.
308 *
309 * side (input) enum blas_side_type
310 * which side of matrix b matrix a is to be multiplied.
311 *
312 * m n (input) int
313 * sizes of matrices a, b, c:
314 * matrix a is m-by-m for left multiplication
315 * n-by-n otherwise,
316 * matrices b, c are m-by-n.
317 *
318 * randomize (input) int
319 * if 0, entries in matrices A, B will be chosen for
320 * maximum cancellation, but with less randomness.
321 * if 1, every entry in the matrix A, B will be
322 * random.
323 *
324 * alpha (input/output) double*
325 * if alpha_flag = 1, alpha is input.
326 * if alpha_flag = 0, alpha is output.
327 *
328 * alpha_flag (input) int
329 * = 0: alpha is free, and is output.
330 * = 1: alpha is fixed on input.
331 *
332 * beta (input/output) double*
333 * if beta_flag = 1, beta is input.
334 * if beta_flag = 0, beta is output.
335 *
336 * beta_flag (input) int
337 * = 0: beta is free, and is output.
338 * = 1: beta is fixed on input.
339 *
340 * a (input/output) double*
341 *
342 * lda (input) lda
343 * leading dimension of matrix A.
344 *
345 * b (input/output) double*
346 *
347 * ldb (input) int
348 * leading dimension of matrix B.
349 *
350 * c (input/output) double*
351 * generated matrix C that will be used as an input to SYMM.
352 *
353 * ldc (input) int
354 * leading dimension of matrix C.
355 *
356 * seed (input/output) int *
357 * seed for the random number generator.
358 *
359 * double (output) *head_r_true
360 * the leading part of the truth in double-double.
361 *
362 * double (output) *tail_r_true
363 * the trailing part of the truth in double-double
364 *
365 */
366 {
367
368 int i, j;
369 int cij, ci;
370 int bij, bi;
371 int aij, ai;
372 int inccij, incci;
373 int incbij, incbi;
374 int incaij, incai;
375 int inca, incb;
376 int m_i, n_i;
377
378 double c_elem;
379 double a_elem;
380 double b_elem;
381 double head_r_true_elem, tail_r_true_elem;
382
383 double *a_vec;
384 double *b_vec;
385
386 double *c_i = c;
387 double *alpha_i = alpha;
388 double *beta_i = beta;
389 double *a_i = a;
390 double *b_i = b;
391
392 if (side == blas_left_side) {
393 m_i = m;
394 n_i = n;
395 } else {
396 m_i = n;
397 n_i = m;
398 }
399
400 inca = incb = 1;
401
402
403 a_vec = (double *) blas_malloc(m_i * sizeof(double));
404 if (m_i > 0 && a_vec == NULL) {
405 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
406 }
407 for (i = 0; i < m_i * inca; i += inca) {
408 a_vec[i] = 0.0;
409 }
410 b_vec = (double *) blas_malloc(m_i * sizeof(double));
411 if (m_i > 0 && b_vec == NULL) {
412 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
413 }
414 for (i = 0; i < m_i * incb; i += incb) {
415 b_vec[i] = 0.0;
416 }
417
418 if ((order == blas_colmajor && side == blas_left_side) ||
419 (order == blas_rowmajor && side == blas_right_side)) {
420 incci = 1;
421 inccij = ldc;
422 } else {
423 incci = ldc;
424 inccij = 1;
425 }
426
427
428
429
430
431 if (randomize == 0) {
432 /* First fill in the first row of A and the first column/row of B */
433
434 BLAS_ddot_testgen(m_i, 0, 0, norm, blas_no_conj,
435 alpha, alpha_flag, beta, beta_flag,
436 b_vec, a_vec, seed, &c_elem,
437 &head_r_true_elem, &tail_r_true_elem);
438
439 cij = 0;
440 c_i[cij] = c_elem;
441 head_r_true[cij] = head_r_true_elem;
442 tail_r_true[cij] = tail_r_true_elem;
443
444 /* Copy a_vec to first row of A */
445 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
446
447 /* set every column of B to be b_vec */
448 for (j = 0; j < n_i; j++) {
449 if (side == blas_left_side)
450 dge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
451 else
452 dge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
453 }
454
455 /* Fill in rest of matrix A */
456 cij = incci;
457 for (i = 1; i < m_i; i++, cij += incci) {
458 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
459 BLAS_ddot_testgen(m_i, i, m_i - i, norm,
460 blas_no_conj, alpha, 1,
461 beta, 1, b_vec, a_vec, seed,
462 &c_elem, &head_r_true_elem, &tail_r_true_elem);
463
464 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
465
466 c_i[cij] = c_elem;
467 head_r_true[cij] = head_r_true_elem;
468 tail_r_true[cij] = tail_r_true_elem;
469 }
470
471 /* Now fill in c and r_true */
472 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
473 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
474 c_elem = c_i[ci];
475 c_i[cij] = c_elem;
476 head_r_true[cij] = head_r_true[ci];
477 tail_r_true[cij] = tail_r_true[ci];
478 }
479 }
480 } else {
481
482
483
484
485
486
487
488 if (alpha_flag == 0) {
489 c_elem = xrand(seed);
490 alpha_i[0] = c_elem;
491 }
492 if (beta_flag == 0) {
493 c_elem = xrand(seed);
494 beta_i[0] = c_elem;
495 }
496
497 if ((order == blas_colmajor && side == blas_left_side) ||
498 (order == blas_rowmajor && side == blas_right_side)) {
499 incai = incbi = 1;
500 incbij = ldb;
501 incaij = lda;
502 } else {
503 incai = lda;
504 incbi = ldb;
505 incaij = incbij = 1;
506 }
507
508
509
510
511
512
513 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
514 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
515 a_elem = xrand(seed);
516 a_i[aij] = a_elem;
517 }
518 }
519
520 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
521 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
522 b_elem = xrand(seed);
523 b_i[bij] = b_elem;
524 }
525 }
526
527 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
528 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
529
530
531 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
532
533 if (side == blas_left_side)
534 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
535 else
536 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
537
538
539
540 BLAS_ddot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
541 beta, 1, b_vec, a_vec, seed,
542 &c_elem, &head_r_true_elem, &tail_r_true_elem);
543
544 c_i[cij] = c_elem;
545 head_r_true[cij] = head_r_true_elem;
546 tail_r_true[cij] = tail_r_true_elem;
547 }
548 }
549
550
551
552 }
553
554 blas_free(a_vec);
555 blas_free(b_vec);
556 }
BLAS_csymm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)557 void BLAS_csymm_testgen(int norm, enum blas_order_type order,
558 enum blas_uplo_type uplo, enum blas_side_type side,
559 int m, int n, int randomize, void *alpha,
560 int alpha_flag, void *beta, int beta_flag, void *a,
561 int lda, void *b, int ldb, void *c, int ldc,
562 int *seed, double *head_r_true, double *tail_r_true)
563
564 /*
565 * Purpose
566 * =======
567 *
568 * Generates the test inputs to BLAS_csymm{_x}
569 *
570 * Arguments
571 * =========
572 *
573 * norm (input) int
574 * = -1: the vectors are scaled with norms near underflow.
575 * = 0: the vectors have norms of order 1.
576 * = 1: the vectors are scaled with norms near overflow.
577 *
578 * order (input) enum blas_side_type
579 * storage format of the matrices
580 *
581 * uplo (input) enum blas_uplo_type
582 * which half of the symmetric matrix a is to be stored.
583 *
584 * side (input) enum blas_side_type
585 * which side of matrix b matrix a is to be multiplied.
586 *
587 * m n (input) int
588 * sizes of matrices a, b, c:
589 * matrix a is m-by-m for left multiplication
590 * n-by-n otherwise,
591 * matrices b, c are m-by-n.
592 *
593 * randomize (input) int
594 * if 0, entries in matrices A, B will be chosen for
595 * maximum cancellation, but with less randomness.
596 * if 1, every entry in the matrix A, B will be
597 * random.
598 *
599 * alpha (input/output) void*
600 * if alpha_flag = 1, alpha is input.
601 * if alpha_flag = 0, alpha is output.
602 *
603 * alpha_flag (input) int
604 * = 0: alpha is free, and is output.
605 * = 1: alpha is fixed on input.
606 *
607 * beta (input/output) void*
608 * if beta_flag = 1, beta is input.
609 * if beta_flag = 0, beta is output.
610 *
611 * beta_flag (input) int
612 * = 0: beta is free, and is output.
613 * = 1: beta is fixed on input.
614 *
615 * a (input/output) void*
616 *
617 * lda (input) lda
618 * leading dimension of matrix A.
619 *
620 * b (input/output) void*
621 *
622 * ldb (input) int
623 * leading dimension of matrix B.
624 *
625 * c (input/output) void*
626 * generated matrix C that will be used as an input to SYMM.
627 *
628 * ldc (input) int
629 * leading dimension of matrix C.
630 *
631 * seed (input/output) int *
632 * seed for the random number generator.
633 *
634 * double (output) *head_r_true
635 * the leading part of the truth in double-double.
636 *
637 * double (output) *tail_r_true
638 * the trailing part of the truth in double-double
639 *
640 */
641 {
642
643 int i, j;
644 int cij, ci;
645 int bij, bi;
646 int aij, ai;
647 int inccij, incci;
648 int incbij, incbi;
649 int incaij, incai;
650 int inca, incb;
651 int m_i, n_i;
652
653 float c_elem[2];
654 float a_elem[2];
655 float b_elem[2];
656 double head_r_true_elem[2], tail_r_true_elem[2];
657
658 float *a_vec;
659 float *b_vec;
660
661 float *c_i = (float *) c;
662 float *alpha_i = (float *) alpha;
663 float *beta_i = (float *) beta;
664 float *a_i = (float *) a;
665 float *b_i = (float *) b;
666
667 if (side == blas_left_side) {
668 m_i = m;
669 n_i = n;
670 } else {
671 m_i = n;
672 n_i = m;
673 }
674
675 inca = incb = 1;
676 inca *= 2;
677 incb *= 2;
678 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
679 if (m_i > 0 && a_vec == NULL) {
680 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
681 }
682 for (i = 0; i < m_i * inca; i += inca) {
683 a_vec[i] = 0.0;
684 a_vec[i + 1] = 0.0;
685 }
686 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
687 if (m_i > 0 && b_vec == NULL) {
688 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
689 }
690 for (i = 0; i < m_i * incb; i += incb) {
691 b_vec[i] = 0.0;
692 b_vec[i + 1] = 0.0;
693 }
694
695 if ((order == blas_colmajor && side == blas_left_side) ||
696 (order == blas_rowmajor && side == blas_right_side)) {
697 incci = 1;
698 inccij = ldc;
699 } else {
700 incci = ldc;
701 inccij = 1;
702 }
703
704 incci *= 2;
705 inccij *= 2;
706
707
708 if (randomize == 0) {
709 /* First fill in the first row of A and the first column/row of B */
710
711 BLAS_cdot_testgen(m_i, 0, 0, norm, blas_no_conj,
712 alpha, alpha_flag, beta, beta_flag,
713 b_vec, a_vec, seed, c_elem,
714 head_r_true_elem, tail_r_true_elem);
715
716 cij = 0;
717 c_i[cij] = c_elem[0];
718 c_i[cij + 1] = c_elem[1];
719 head_r_true[cij] = head_r_true_elem[0];
720 head_r_true[cij + 1] = head_r_true_elem[1];
721 tail_r_true[cij] = tail_r_true_elem[0];
722 tail_r_true[cij + 1] = tail_r_true_elem[1];
723
724 /* Copy a_vec to first row of A */
725 csy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
726
727 /* set every column of B to be b_vec */
728 for (j = 0; j < n_i; j++) {
729 if (side == blas_left_side)
730 cge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
731 else
732 cge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
733 }
734
735 /* Fill in rest of matrix A */
736 cij = incci;
737 for (i = 1; i < m_i; i++, cij += incci) {
738 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
739 BLAS_cdot_testgen(m_i, i, m_i - i, norm,
740 blas_no_conj, alpha, 1,
741 beta, 1, b_vec, a_vec, seed,
742 c_elem, head_r_true_elem, tail_r_true_elem);
743
744 csy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
745
746 c_i[cij] = c_elem[0];
747 c_i[cij + 1] = c_elem[1];
748 head_r_true[cij] = head_r_true_elem[0];
749 head_r_true[cij + 1] = head_r_true_elem[1];
750 tail_r_true[cij] = tail_r_true_elem[0];
751 tail_r_true[cij + 1] = tail_r_true_elem[1];
752 }
753
754 /* Now fill in c and r_true */
755 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
756 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
757 c_elem[0] = c_i[ci];
758 c_elem[1] = c_i[ci + 1];
759 c_i[cij] = c_elem[0];
760 c_i[cij + 1] = c_elem[1];
761 head_r_true[cij] = head_r_true[ci];
762 tail_r_true[cij] = tail_r_true[ci];
763 head_r_true[cij + 1] = head_r_true[ci + 1];
764 tail_r_true[cij + 1] = tail_r_true[ci + 1];
765 }
766 }
767 } else {
768
769
770
771
772
773
774
775 if (alpha_flag == 0) {
776 c_elem[0] = xrand(seed);
777 c_elem[1] = xrand(seed);
778 alpha_i[0] = c_elem[0];
779 alpha_i[0 + 1] = c_elem[1];
780 }
781 if (beta_flag == 0) {
782 c_elem[0] = xrand(seed);
783 c_elem[1] = xrand(seed);
784 beta_i[0] = c_elem[0];
785 beta_i[0 + 1] = c_elem[1];
786 }
787
788 if ((order == blas_colmajor && side == blas_left_side) ||
789 (order == blas_rowmajor && side == blas_right_side)) {
790 incai = incbi = 1;
791 incbij = ldb;
792 incaij = lda;
793 } else {
794 incai = lda;
795 incbi = ldb;
796 incaij = incbij = 1;
797 }
798
799 incbi *= 2;
800 incbij *= 2;
801 incai *= 2;
802 incaij *= 2;
803
804 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
805 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
806 a_elem[0] = xrand(seed);
807 a_elem[1] = xrand(seed);
808 a_i[aij] = a_elem[0];
809 a_i[aij + 1] = a_elem[1];
810 }
811 }
812
813 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
814 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
815 b_elem[0] = xrand(seed);
816 b_elem[1] = xrand(seed);
817 b_i[bij] = b_elem[0];
818 b_i[bij + 1] = b_elem[1];
819 }
820 }
821
822 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
823 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
824
825
826 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
827
828 if (side == blas_left_side)
829 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
830 else
831 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
832
833
834
835 BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
836 beta, 1, b_vec, a_vec, seed,
837 c_elem, head_r_true_elem, tail_r_true_elem);
838
839 c_i[cij] = c_elem[0];
840 c_i[cij + 1] = c_elem[1];
841 head_r_true[cij] = head_r_true_elem[0];
842 head_r_true[cij + 1] = head_r_true_elem[1];
843 tail_r_true[cij] = tail_r_true_elem[0];
844 tail_r_true[cij + 1] = tail_r_true_elem[1];
845 }
846 }
847
848
849
850 }
851
852 blas_free(a_vec);
853 blas_free(b_vec);
854 }
BLAS_zsymm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)855 void BLAS_zsymm_testgen(int norm, enum blas_order_type order,
856 enum blas_uplo_type uplo, enum blas_side_type side,
857 int m, int n, int randomize, void *alpha,
858 int alpha_flag, void *beta, int beta_flag, void *a,
859 int lda, void *b, int ldb, void *c, int ldc,
860 int *seed, double *head_r_true, double *tail_r_true)
861
862 /*
863 * Purpose
864 * =======
865 *
866 * Generates the test inputs to BLAS_zsymm{_x}
867 *
868 * Arguments
869 * =========
870 *
871 * norm (input) int
872 * = -1: the vectors are scaled with norms near underflow.
873 * = 0: the vectors have norms of order 1.
874 * = 1: the vectors are scaled with norms near overflow.
875 *
876 * order (input) enum blas_side_type
877 * storage format of the matrices
878 *
879 * uplo (input) enum blas_uplo_type
880 * which half of the symmetric matrix a is to be stored.
881 *
882 * side (input) enum blas_side_type
883 * which side of matrix b matrix a is to be multiplied.
884 *
885 * m n (input) int
886 * sizes of matrices a, b, c:
887 * matrix a is m-by-m for left multiplication
888 * n-by-n otherwise,
889 * matrices b, c are m-by-n.
890 *
891 * randomize (input) int
892 * if 0, entries in matrices A, B will be chosen for
893 * maximum cancellation, but with less randomness.
894 * if 1, every entry in the matrix A, B will be
895 * random.
896 *
897 * alpha (input/output) void*
898 * if alpha_flag = 1, alpha is input.
899 * if alpha_flag = 0, alpha is output.
900 *
901 * alpha_flag (input) int
902 * = 0: alpha is free, and is output.
903 * = 1: alpha is fixed on input.
904 *
905 * beta (input/output) void*
906 * if beta_flag = 1, beta is input.
907 * if beta_flag = 0, beta is output.
908 *
909 * beta_flag (input) int
910 * = 0: beta is free, and is output.
911 * = 1: beta is fixed on input.
912 *
913 * a (input/output) void*
914 *
915 * lda (input) lda
916 * leading dimension of matrix A.
917 *
918 * b (input/output) void*
919 *
920 * ldb (input) int
921 * leading dimension of matrix B.
922 *
923 * c (input/output) void*
924 * generated matrix C that will be used as an input to SYMM.
925 *
926 * ldc (input) int
927 * leading dimension of matrix C.
928 *
929 * seed (input/output) int *
930 * seed for the random number generator.
931 *
932 * double (output) *head_r_true
933 * the leading part of the truth in double-double.
934 *
935 * double (output) *tail_r_true
936 * the trailing part of the truth in double-double
937 *
938 */
939 {
940
941 int i, j;
942 int cij, ci;
943 int bij, bi;
944 int aij, ai;
945 int inccij, incci;
946 int incbij, incbi;
947 int incaij, incai;
948 int inca, incb;
949 int m_i, n_i;
950
951 double c_elem[2];
952 double a_elem[2];
953 double b_elem[2];
954 double head_r_true_elem[2], tail_r_true_elem[2];
955
956 double *a_vec;
957 double *b_vec;
958
959 double *c_i = (double *) c;
960 double *alpha_i = (double *) alpha;
961 double *beta_i = (double *) beta;
962 double *a_i = (double *) a;
963 double *b_i = (double *) b;
964
965 if (side == blas_left_side) {
966 m_i = m;
967 n_i = n;
968 } else {
969 m_i = n;
970 n_i = m;
971 }
972
973 inca = incb = 1;
974 inca *= 2;
975 incb *= 2;
976 a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
977 if (m_i > 0 && a_vec == NULL) {
978 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
979 }
980 for (i = 0; i < m_i * inca; i += inca) {
981 a_vec[i] = 0.0;
982 a_vec[i + 1] = 0.0;
983 }
984 b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
985 if (m_i > 0 && b_vec == NULL) {
986 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
987 }
988 for (i = 0; i < m_i * incb; i += incb) {
989 b_vec[i] = 0.0;
990 b_vec[i + 1] = 0.0;
991 }
992
993 if ((order == blas_colmajor && side == blas_left_side) ||
994 (order == blas_rowmajor && side == blas_right_side)) {
995 incci = 1;
996 inccij = ldc;
997 } else {
998 incci = ldc;
999 inccij = 1;
1000 }
1001
1002 incci *= 2;
1003 inccij *= 2;
1004
1005
1006 if (randomize == 0) {
1007 /* First fill in the first row of A and the first column/row of B */
1008
1009 BLAS_zdot_testgen(m_i, 0, 0, norm, blas_no_conj,
1010 alpha, alpha_flag, beta, beta_flag,
1011 b_vec, a_vec, seed, c_elem,
1012 head_r_true_elem, tail_r_true_elem);
1013
1014 cij = 0;
1015 c_i[cij] = c_elem[0];
1016 c_i[cij + 1] = c_elem[1];
1017 head_r_true[cij] = head_r_true_elem[0];
1018 head_r_true[cij + 1] = head_r_true_elem[1];
1019 tail_r_true[cij] = tail_r_true_elem[0];
1020 tail_r_true[cij + 1] = tail_r_true_elem[1];
1021
1022 /* Copy a_vec to first row of A */
1023 zsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
1024
1025 /* set every column of B to be b_vec */
1026 for (j = 0; j < n_i; j++) {
1027 if (side == blas_left_side)
1028 zge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1029 else
1030 zge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1031 }
1032
1033 /* Fill in rest of matrix A */
1034 cij = incci;
1035 for (i = 1; i < m_i; i++, cij += incci) {
1036 zsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1037 BLAS_zdot_testgen(m_i, i, m_i - i, norm,
1038 blas_no_conj, alpha, 1,
1039 beta, 1, b_vec, a_vec, seed,
1040 c_elem, head_r_true_elem, tail_r_true_elem);
1041
1042 zsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
1043
1044 c_i[cij] = c_elem[0];
1045 c_i[cij + 1] = c_elem[1];
1046 head_r_true[cij] = head_r_true_elem[0];
1047 head_r_true[cij + 1] = head_r_true_elem[1];
1048 tail_r_true[cij] = tail_r_true_elem[0];
1049 tail_r_true[cij + 1] = tail_r_true_elem[1];
1050 }
1051
1052 /* Now fill in c and r_true */
1053 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1054 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
1055 c_elem[0] = c_i[ci];
1056 c_elem[1] = c_i[ci + 1];
1057 c_i[cij] = c_elem[0];
1058 c_i[cij + 1] = c_elem[1];
1059 head_r_true[cij] = head_r_true[ci];
1060 tail_r_true[cij] = tail_r_true[ci];
1061 head_r_true[cij + 1] = head_r_true[ci + 1];
1062 tail_r_true[cij + 1] = tail_r_true[ci + 1];
1063 }
1064 }
1065 } else {
1066
1067
1068
1069
1070
1071
1072
1073 if (alpha_flag == 0) {
1074 c_elem[0] = xrand(seed);
1075 c_elem[1] = xrand(seed);
1076 alpha_i[0] = c_elem[0];
1077 alpha_i[0 + 1] = c_elem[1];
1078 }
1079 if (beta_flag == 0) {
1080 c_elem[0] = xrand(seed);
1081 c_elem[1] = xrand(seed);
1082 beta_i[0] = c_elem[0];
1083 beta_i[0 + 1] = c_elem[1];
1084 }
1085
1086 if ((order == blas_colmajor && side == blas_left_side) ||
1087 (order == blas_rowmajor && side == blas_right_side)) {
1088 incai = incbi = 1;
1089 incbij = ldb;
1090 incaij = lda;
1091 } else {
1092 incai = lda;
1093 incbi = ldb;
1094 incaij = incbij = 1;
1095 }
1096
1097 incbi *= 2;
1098 incbij *= 2;
1099 incai *= 2;
1100 incaij *= 2;
1101
1102 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1103 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1104 a_elem[0] = xrand(seed);
1105 a_elem[1] = xrand(seed);
1106 a_i[aij] = a_elem[0];
1107 a_i[aij + 1] = a_elem[1];
1108 }
1109 }
1110
1111 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1112 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1113 b_elem[0] = xrand(seed);
1114 b_elem[1] = xrand(seed);
1115 b_i[bij] = b_elem[0];
1116 b_i[bij + 1] = b_elem[1];
1117 }
1118 }
1119
1120 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1121 zsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1122
1123
1124 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1125
1126 if (side == blas_left_side)
1127 zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1128 else
1129 zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1130
1131
1132
1133 BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
1134 beta, 1, b_vec, a_vec, seed,
1135 c_elem, head_r_true_elem, tail_r_true_elem);
1136
1137 c_i[cij] = c_elem[0];
1138 c_i[cij + 1] = c_elem[1];
1139 head_r_true[cij] = head_r_true_elem[0];
1140 head_r_true[cij + 1] = head_r_true_elem[1];
1141 tail_r_true[cij] = tail_r_true_elem[0];
1142 tail_r_true[cij + 1] = tail_r_true_elem[1];
1143 }
1144 }
1145
1146
1147
1148 }
1149
1150 blas_free(a_vec);
1151 blas_free(b_vec);
1152 }
BLAS_csymm_s_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,float * a,int lda,float * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1153 void BLAS_csymm_s_s_testgen(int norm, enum blas_order_type order,
1154 enum blas_uplo_type uplo,
1155 enum blas_side_type side, int m, int n,
1156 int randomize, void *alpha, int alpha_flag,
1157 void *beta, int beta_flag, float *a, int lda,
1158 float *b, int ldb, void *c, int ldc, int *seed,
1159 double *head_r_true, double *tail_r_true)
1160
1161 /*
1162 * Purpose
1163 * =======
1164 *
1165 * Generates the test inputs to BLAS_csymm_s_s{_x}
1166 *
1167 * Arguments
1168 * =========
1169 *
1170 * norm (input) int
1171 * = -1: the vectors are scaled with norms near underflow.
1172 * = 0: the vectors have norms of order 1.
1173 * = 1: the vectors are scaled with norms near overflow.
1174 *
1175 * order (input) enum blas_side_type
1176 * storage format of the matrices
1177 *
1178 * uplo (input) enum blas_uplo_type
1179 * which half of the symmetric matrix a is to be stored.
1180 *
1181 * side (input) enum blas_side_type
1182 * which side of matrix b matrix a is to be multiplied.
1183 *
1184 * m n (input) int
1185 * sizes of matrices a, b, c:
1186 * matrix a is m-by-m for left multiplication
1187 * n-by-n otherwise,
1188 * matrices b, c are m-by-n.
1189 *
1190 * randomize (input) int
1191 * if 0, entries in matrices A, B will be chosen for
1192 * maximum cancellation, but with less randomness.
1193 * if 1, every entry in the matrix A, B will be
1194 * random.
1195 *
1196 * alpha (input/output) void*
1197 * if alpha_flag = 1, alpha is input.
1198 * if alpha_flag = 0, alpha is output.
1199 *
1200 * alpha_flag (input) int
1201 * = 0: alpha is free, and is output.
1202 * = 1: alpha is fixed on input.
1203 *
1204 * beta (input/output) void*
1205 * if beta_flag = 1, beta is input.
1206 * if beta_flag = 0, beta is output.
1207 *
1208 * beta_flag (input) int
1209 * = 0: beta is free, and is output.
1210 * = 1: beta is fixed on input.
1211 *
1212 * a (input/output) float*
1213 *
1214 * lda (input) lda
1215 * leading dimension of matrix A.
1216 *
1217 * b (input/output) float*
1218 *
1219 * ldb (input) int
1220 * leading dimension of matrix B.
1221 *
1222 * c (input/output) void*
1223 * generated matrix C that will be used as an input to SYMM.
1224 *
1225 * ldc (input) int
1226 * leading dimension of matrix C.
1227 *
1228 * seed (input/output) int *
1229 * seed for the random number generator.
1230 *
1231 * double (output) *head_r_true
1232 * the leading part of the truth in double-double.
1233 *
1234 * double (output) *tail_r_true
1235 * the trailing part of the truth in double-double
1236 *
1237 */
1238 {
1239
1240 int i, j;
1241 int cij, ci;
1242 int bij, bi;
1243 int aij, ai;
1244 int inccij, incci;
1245 int incbij, incbi;
1246 int incaij, incai;
1247 int inca, incb;
1248 int m_i, n_i;
1249
1250 float c_elem[2];
1251 float a_elem;
1252 float b_elem;
1253 double head_r_true_elem[2], tail_r_true_elem[2];
1254
1255 float *a_vec;
1256 float *b_vec;
1257
1258 float *c_i = (float *) c;
1259 float *alpha_i = (float *) alpha;
1260 float *beta_i = (float *) beta;
1261 float *a_i = a;
1262 float *b_i = b;
1263
1264 if (side == blas_left_side) {
1265 m_i = m;
1266 n_i = n;
1267 } else {
1268 m_i = n;
1269 n_i = m;
1270 }
1271
1272 inca = incb = 1;
1273
1274
1275 a_vec = (float *) blas_malloc(m_i * sizeof(float));
1276 if (m_i > 0 && a_vec == NULL) {
1277 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1278 }
1279 for (i = 0; i < m_i * inca; i += inca) {
1280 a_vec[i] = 0.0;
1281 }
1282 b_vec = (float *) blas_malloc(m_i * sizeof(float));
1283 if (m_i > 0 && b_vec == NULL) {
1284 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1285 }
1286 for (i = 0; i < m_i * incb; i += incb) {
1287 b_vec[i] = 0.0;
1288 }
1289
1290 if ((order == blas_colmajor && side == blas_left_side) ||
1291 (order == blas_rowmajor && side == blas_right_side)) {
1292 incci = 1;
1293 inccij = ldc;
1294 } else {
1295 incci = ldc;
1296 inccij = 1;
1297 }
1298
1299 incci *= 2;
1300 inccij *= 2;
1301
1302
1303 if (randomize == 0) {
1304 /* First fill in the first row of A and the first column/row of B */
1305
1306 BLAS_cdot_s_s_testgen(m_i, 0, 0, norm, blas_no_conj,
1307 alpha, alpha_flag, beta, beta_flag,
1308 b_vec, a_vec, seed, c_elem,
1309 head_r_true_elem, tail_r_true_elem);
1310
1311 cij = 0;
1312 c_i[cij] = c_elem[0];
1313 c_i[cij + 1] = c_elem[1];
1314 head_r_true[cij] = head_r_true_elem[0];
1315 head_r_true[cij + 1] = head_r_true_elem[1];
1316 tail_r_true[cij] = tail_r_true_elem[0];
1317 tail_r_true[cij + 1] = tail_r_true_elem[1];
1318
1319 /* Copy a_vec to first row of A */
1320 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
1321
1322 /* set every column of B to be b_vec */
1323 for (j = 0; j < n_i; j++) {
1324 if (side == blas_left_side)
1325 sge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1326 else
1327 sge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1328 }
1329
1330 /* Fill in rest of matrix A */
1331 cij = incci;
1332 for (i = 1; i < m_i; i++, cij += incci) {
1333 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1334 BLAS_cdot_s_s_testgen(m_i, i, m_i - i, norm,
1335 blas_no_conj, alpha, 1,
1336 beta, 1, b_vec, a_vec, seed,
1337 c_elem, head_r_true_elem, tail_r_true_elem);
1338
1339 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
1340
1341 c_i[cij] = c_elem[0];
1342 c_i[cij + 1] = c_elem[1];
1343 head_r_true[cij] = head_r_true_elem[0];
1344 head_r_true[cij + 1] = head_r_true_elem[1];
1345 tail_r_true[cij] = tail_r_true_elem[0];
1346 tail_r_true[cij + 1] = tail_r_true_elem[1];
1347 }
1348
1349 /* Now fill in c and r_true */
1350 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1351 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
1352 c_elem[0] = c_i[ci];
1353 c_elem[1] = c_i[ci + 1];
1354 c_i[cij] = c_elem[0];
1355 c_i[cij + 1] = c_elem[1];
1356 head_r_true[cij] = head_r_true[ci];
1357 tail_r_true[cij] = tail_r_true[ci];
1358 head_r_true[cij + 1] = head_r_true[ci + 1];
1359 tail_r_true[cij + 1] = tail_r_true[ci + 1];
1360 }
1361 }
1362 } else {
1363
1364 float *aa_vec;
1365 float *bb_vec;
1366
1367 aa_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1368 if (m_i > 0 && aa_vec == NULL) {
1369 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1370 }
1371 bb_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1372 if (m_i > 0 && bb_vec == NULL) {
1373 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1374 }
1375
1376 if (alpha_flag == 0) {
1377 c_elem[0] = (float) xrand(seed);
1378 c_elem[1] = (float) xrand(seed);
1379 alpha_i[0] = c_elem[0];
1380 alpha_i[0 + 1] = c_elem[1];
1381 }
1382 if (beta_flag == 0) {
1383 c_elem[0] = (float) xrand(seed);
1384 c_elem[1] = (float) xrand(seed);
1385 beta_i[0] = c_elem[0];
1386 beta_i[0 + 1] = c_elem[1];
1387 }
1388
1389 if ((order == blas_colmajor && side == blas_left_side) ||
1390 (order == blas_rowmajor && side == blas_right_side)) {
1391 incai = incbi = 1;
1392 incbij = ldb;
1393 incaij = lda;
1394 } else {
1395 incai = lda;
1396 incbi = ldb;
1397 incaij = incbij = 1;
1398 }
1399
1400
1401
1402
1403
1404
1405 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1406 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1407 a_elem = (float) xrand(seed);
1408 a_i[aij] = a_elem;
1409 }
1410 }
1411
1412 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1413 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1414 b_elem = (float) xrand(seed);
1415 b_i[bij] = b_elem;
1416 }
1417 }
1418
1419 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1420 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1421 {
1422 int r;
1423 for (r = 0; r < m_i; r++) {
1424 aa_vec[2 * r] = a_vec[r];
1425 aa_vec[2 * r + 1] = 0.0;
1426 }
1427 }
1428
1429 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1430
1431 if (side == blas_left_side)
1432 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1433 else
1434 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1435 {
1436 int r;
1437 for (r = 0; r < m_i; r++) {
1438 bb_vec[2 * r] = b_vec[r];
1439 bb_vec[2 * r + 1] = 0.0;
1440 }
1441 }
1442
1443
1444 BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
1445 beta, 1,
1446 bb_vec,
1447 aa_vec,
1448 seed, c_elem, head_r_true_elem, tail_r_true_elem);
1449
1450 c_i[cij] = c_elem[0];
1451 c_i[cij + 1] = c_elem[1];
1452 head_r_true[cij] = head_r_true_elem[0];
1453 head_r_true[cij + 1] = head_r_true_elem[1];
1454 tail_r_true[cij] = tail_r_true_elem[0];
1455 tail_r_true[cij + 1] = tail_r_true_elem[1];
1456 }
1457 }
1458
1459 blas_free(aa_vec);
1460 blas_free(bb_vec);
1461 }
1462
1463 blas_free(a_vec);
1464 blas_free(b_vec);
1465 }
BLAS_csymm_s_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,float * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1466 void BLAS_csymm_s_c_testgen(int norm, enum blas_order_type order,
1467 enum blas_uplo_type uplo,
1468 enum blas_side_type side, int m, int n,
1469 int randomize, void *alpha, int alpha_flag,
1470 void *beta, int beta_flag, float *a, int lda,
1471 void *b, int ldb, void *c, int ldc, int *seed,
1472 double *head_r_true, double *tail_r_true)
1473
1474 /*
1475 * Purpose
1476 * =======
1477 *
1478 * Generates the test inputs to BLAS_csymm_s_c{_x}
1479 *
1480 * Arguments
1481 * =========
1482 *
1483 * norm (input) int
1484 * = -1: the vectors are scaled with norms near underflow.
1485 * = 0: the vectors have norms of order 1.
1486 * = 1: the vectors are scaled with norms near overflow.
1487 *
1488 * order (input) enum blas_side_type
1489 * storage format of the matrices
1490 *
1491 * uplo (input) enum blas_uplo_type
1492 * which half of the symmetric matrix a is to be stored.
1493 *
1494 * side (input) enum blas_side_type
1495 * which side of matrix b matrix a is to be multiplied.
1496 *
1497 * m n (input) int
1498 * sizes of matrices a, b, c:
1499 * matrix a is m-by-m for left multiplication
1500 * n-by-n otherwise,
1501 * matrices b, c are m-by-n.
1502 *
1503 * randomize (input) int
1504 * if 0, entries in matrices A, B will be chosen for
1505 * maximum cancellation, but with less randomness.
1506 * if 1, every entry in the matrix A, B will be
1507 * random.
1508 *
1509 * alpha (input/output) void*
1510 * if alpha_flag = 1, alpha is input.
1511 * if alpha_flag = 0, alpha is output.
1512 *
1513 * alpha_flag (input) int
1514 * = 0: alpha is free, and is output.
1515 * = 1: alpha is fixed on input.
1516 *
1517 * beta (input/output) void*
1518 * if beta_flag = 1, beta is input.
1519 * if beta_flag = 0, beta is output.
1520 *
1521 * beta_flag (input) int
1522 * = 0: beta is free, and is output.
1523 * = 1: beta is fixed on input.
1524 *
1525 * a (input/output) float*
1526 *
1527 * lda (input) lda
1528 * leading dimension of matrix A.
1529 *
1530 * b (input/output) void*
1531 *
1532 * ldb (input) int
1533 * leading dimension of matrix B.
1534 *
1535 * c (input/output) void*
1536 * generated matrix C that will be used as an input to SYMM.
1537 *
1538 * ldc (input) int
1539 * leading dimension of matrix C.
1540 *
1541 * seed (input/output) int *
1542 * seed for the random number generator.
1543 *
1544 * double (output) *head_r_true
1545 * the leading part of the truth in double-double.
1546 *
1547 * double (output) *tail_r_true
1548 * the trailing part of the truth in double-double
1549 *
1550 */
1551 {
1552
1553 int i, j;
1554 int cij, ci;
1555 int bij, bi;
1556 int aij, ai;
1557 int inccij, incci;
1558 int incbij, incbi;
1559 int incaij, incai;
1560 int inca, incb;
1561 int m_i, n_i;
1562
1563 float c_elem[2];
1564 float a_elem;
1565 float b_elem[2];
1566 double head_r_true_elem[2], tail_r_true_elem[2];
1567
1568 float *a_vec;
1569 float *b_vec;
1570
1571 float *c_i = (float *) c;
1572 float *alpha_i = (float *) alpha;
1573 float *beta_i = (float *) beta;
1574 float *a_i = a;
1575 float *b_i = (float *) b;
1576
1577 if (side == blas_left_side) {
1578 m_i = m;
1579 n_i = n;
1580 } else {
1581 m_i = n;
1582 n_i = m;
1583 }
1584
1585 inca = incb = 1;
1586
1587 incb *= 2;
1588 a_vec = (float *) blas_malloc(m_i * sizeof(float));
1589 if (m_i > 0 && a_vec == NULL) {
1590 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1591 }
1592 for (i = 0; i < m_i * inca; i += inca) {
1593 a_vec[i] = 0.0;
1594 }
1595 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1596 if (m_i > 0 && b_vec == NULL) {
1597 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1598 }
1599 for (i = 0; i < m_i * incb; i += incb) {
1600 b_vec[i] = 0.0;
1601 b_vec[i + 1] = 0.0;
1602 }
1603
1604 if ((order == blas_colmajor && side == blas_left_side) ||
1605 (order == blas_rowmajor && side == blas_right_side)) {
1606 incci = 1;
1607 inccij = ldc;
1608 } else {
1609 incci = ldc;
1610 inccij = 1;
1611 }
1612
1613 incci *= 2;
1614 inccij *= 2;
1615
1616
1617 if (randomize == 0) {
1618 /* First fill in the first row of A and the first column/row of B */
1619
1620 BLAS_cdot_c_s_testgen(m_i, 0, 0, norm, blas_no_conj,
1621 alpha, alpha_flag, beta, beta_flag,
1622 b_vec, a_vec, seed, c_elem,
1623 head_r_true_elem, tail_r_true_elem);
1624
1625 cij = 0;
1626 c_i[cij] = c_elem[0];
1627 c_i[cij + 1] = c_elem[1];
1628 head_r_true[cij] = head_r_true_elem[0];
1629 head_r_true[cij + 1] = head_r_true_elem[1];
1630 tail_r_true[cij] = tail_r_true_elem[0];
1631 tail_r_true[cij + 1] = tail_r_true_elem[1];
1632
1633 /* Copy a_vec to first row of A */
1634 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
1635
1636 /* set every column of B to be b_vec */
1637 for (j = 0; j < n_i; j++) {
1638 if (side == blas_left_side)
1639 cge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1640 else
1641 cge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1642 }
1643
1644 /* Fill in rest of matrix A */
1645 cij = incci;
1646 for (i = 1; i < m_i; i++, cij += incci) {
1647 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1648 BLAS_cdot_c_s_testgen(m_i, i, m_i - i, norm,
1649 blas_no_conj, alpha, 1,
1650 beta, 1, b_vec, a_vec, seed,
1651 c_elem, head_r_true_elem, tail_r_true_elem);
1652
1653 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
1654
1655 c_i[cij] = c_elem[0];
1656 c_i[cij + 1] = c_elem[1];
1657 head_r_true[cij] = head_r_true_elem[0];
1658 head_r_true[cij + 1] = head_r_true_elem[1];
1659 tail_r_true[cij] = tail_r_true_elem[0];
1660 tail_r_true[cij + 1] = tail_r_true_elem[1];
1661 }
1662
1663 /* Now fill in c and r_true */
1664 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1665 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
1666 c_elem[0] = c_i[ci];
1667 c_elem[1] = c_i[ci + 1];
1668 c_i[cij] = c_elem[0];
1669 c_i[cij + 1] = c_elem[1];
1670 head_r_true[cij] = head_r_true[ci];
1671 tail_r_true[cij] = tail_r_true[ci];
1672 head_r_true[cij + 1] = head_r_true[ci + 1];
1673 tail_r_true[cij + 1] = tail_r_true[ci + 1];
1674 }
1675 }
1676 } else {
1677
1678 float *aa_vec;
1679
1680
1681 aa_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1682 if (m_i > 0 && aa_vec == NULL) {
1683 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1684 }
1685
1686
1687 if (alpha_flag == 0) {
1688 c_elem[0] = (float) xrand(seed);
1689 c_elem[1] = (float) xrand(seed);
1690 alpha_i[0] = c_elem[0];
1691 alpha_i[0 + 1] = c_elem[1];
1692 }
1693 if (beta_flag == 0) {
1694 c_elem[0] = (float) xrand(seed);
1695 c_elem[1] = (float) xrand(seed);
1696 beta_i[0] = c_elem[0];
1697 beta_i[0 + 1] = c_elem[1];
1698 }
1699
1700 if ((order == blas_colmajor && side == blas_left_side) ||
1701 (order == blas_rowmajor && side == blas_right_side)) {
1702 incai = incbi = 1;
1703 incbij = ldb;
1704 incaij = lda;
1705 } else {
1706 incai = lda;
1707 incbi = ldb;
1708 incaij = incbij = 1;
1709 }
1710
1711 incbi *= 2;
1712 incbij *= 2;
1713
1714
1715
1716 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1717 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1718 a_elem = (float) xrand(seed);
1719 a_i[aij] = a_elem;
1720 }
1721 }
1722
1723 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1724 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1725 b_elem[0] = (float) xrand(seed);
1726 b_elem[1] = (float) xrand(seed);
1727 b_i[bij] = b_elem[0];
1728 b_i[bij + 1] = b_elem[1];
1729 }
1730 }
1731
1732 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1733 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1734 {
1735 int r;
1736 for (r = 0; r < m_i; r++) {
1737 aa_vec[2 * r] = a_vec[r];
1738 aa_vec[2 * r + 1] = 0.0;
1739 }
1740 }
1741
1742 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1743
1744 if (side == blas_left_side)
1745 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1746 else
1747 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1748
1749
1750
1751 BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
1752 beta, 1,
1753 b_vec,
1754 aa_vec,
1755 seed, c_elem, head_r_true_elem, tail_r_true_elem);
1756
1757 c_i[cij] = c_elem[0];
1758 c_i[cij + 1] = c_elem[1];
1759 head_r_true[cij] = head_r_true_elem[0];
1760 head_r_true[cij + 1] = head_r_true_elem[1];
1761 tail_r_true[cij] = tail_r_true_elem[0];
1762 tail_r_true[cij + 1] = tail_r_true_elem[1];
1763 }
1764 }
1765
1766 blas_free(aa_vec);
1767
1768 }
1769
1770 blas_free(a_vec);
1771 blas_free(b_vec);
1772 }
BLAS_csymm_c_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,float * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1773 void BLAS_csymm_c_s_testgen(int norm, enum blas_order_type order,
1774 enum blas_uplo_type uplo,
1775 enum blas_side_type side, int m, int n,
1776 int randomize, void *alpha, int alpha_flag,
1777 void *beta, int beta_flag, void *a, int lda,
1778 float *b, int ldb, void *c, int ldc, int *seed,
1779 double *head_r_true, double *tail_r_true)
1780
1781 /*
1782 * Purpose
1783 * =======
1784 *
1785 * Generates the test inputs to BLAS_csymm_c_s{_x}
1786 *
1787 * Arguments
1788 * =========
1789 *
1790 * norm (input) int
1791 * = -1: the vectors are scaled with norms near underflow.
1792 * = 0: the vectors have norms of order 1.
1793 * = 1: the vectors are scaled with norms near overflow.
1794 *
1795 * order (input) enum blas_side_type
1796 * storage format of the matrices
1797 *
1798 * uplo (input) enum blas_uplo_type
1799 * which half of the symmetric matrix a is to be stored.
1800 *
1801 * side (input) enum blas_side_type
1802 * which side of matrix b matrix a is to be multiplied.
1803 *
1804 * m n (input) int
1805 * sizes of matrices a, b, c:
1806 * matrix a is m-by-m for left multiplication
1807 * n-by-n otherwise,
1808 * matrices b, c are m-by-n.
1809 *
1810 * randomize (input) int
1811 * if 0, entries in matrices A, B will be chosen for
1812 * maximum cancellation, but with less randomness.
1813 * if 1, every entry in the matrix A, B will be
1814 * random.
1815 *
1816 * alpha (input/output) void*
1817 * if alpha_flag = 1, alpha is input.
1818 * if alpha_flag = 0, alpha is output.
1819 *
1820 * alpha_flag (input) int
1821 * = 0: alpha is free, and is output.
1822 * = 1: alpha is fixed on input.
1823 *
1824 * beta (input/output) void*
1825 * if beta_flag = 1, beta is input.
1826 * if beta_flag = 0, beta is output.
1827 *
1828 * beta_flag (input) int
1829 * = 0: beta is free, and is output.
1830 * = 1: beta is fixed on input.
1831 *
1832 * a (input/output) void*
1833 *
1834 * lda (input) lda
1835 * leading dimension of matrix A.
1836 *
1837 * b (input/output) float*
1838 *
1839 * ldb (input) int
1840 * leading dimension of matrix B.
1841 *
1842 * c (input/output) void*
1843 * generated matrix C that will be used as an input to SYMM.
1844 *
1845 * ldc (input) int
1846 * leading dimension of matrix C.
1847 *
1848 * seed (input/output) int *
1849 * seed for the random number generator.
1850 *
1851 * double (output) *head_r_true
1852 * the leading part of the truth in double-double.
1853 *
1854 * double (output) *tail_r_true
1855 * the trailing part of the truth in double-double
1856 *
1857 */
1858 {
1859
1860 int i, j;
1861 int cij, ci;
1862 int bij, bi;
1863 int aij, ai;
1864 int inccij, incci;
1865 int incbij, incbi;
1866 int incaij, incai;
1867 int inca, incb;
1868 int m_i, n_i;
1869
1870 float c_elem[2];
1871 float a_elem[2];
1872 float b_elem;
1873 double head_r_true_elem[2], tail_r_true_elem[2];
1874
1875 float *a_vec;
1876 float *b_vec;
1877
1878 float *c_i = (float *) c;
1879 float *alpha_i = (float *) alpha;
1880 float *beta_i = (float *) beta;
1881 float *a_i = (float *) a;
1882 float *b_i = b;
1883
1884 if (side == blas_left_side) {
1885 m_i = m;
1886 n_i = n;
1887 } else {
1888 m_i = n;
1889 n_i = m;
1890 }
1891
1892 inca = incb = 1;
1893 inca *= 2;
1894
1895 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1896 if (m_i > 0 && a_vec == NULL) {
1897 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1898 }
1899 for (i = 0; i < m_i * inca; i += inca) {
1900 a_vec[i] = 0.0;
1901 a_vec[i + 1] = 0.0;
1902 }
1903 b_vec = (float *) blas_malloc(m_i * sizeof(float));
1904 if (m_i > 0 && b_vec == NULL) {
1905 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1906 }
1907 for (i = 0; i < m_i * incb; i += incb) {
1908 b_vec[i] = 0.0;
1909 }
1910
1911 if ((order == blas_colmajor && side == blas_left_side) ||
1912 (order == blas_rowmajor && side == blas_right_side)) {
1913 incci = 1;
1914 inccij = ldc;
1915 } else {
1916 incci = ldc;
1917 inccij = 1;
1918 }
1919
1920 incci *= 2;
1921 inccij *= 2;
1922
1923
1924 if (randomize == 0) {
1925 /* First fill in the first row of A and the first column/row of B */
1926
1927 BLAS_cdot_s_c_testgen(m_i, 0, 0, norm, blas_no_conj,
1928 alpha, alpha_flag, beta, beta_flag,
1929 b_vec, a_vec, seed, c_elem,
1930 head_r_true_elem, tail_r_true_elem);
1931
1932 cij = 0;
1933 c_i[cij] = c_elem[0];
1934 c_i[cij + 1] = c_elem[1];
1935 head_r_true[cij] = head_r_true_elem[0];
1936 head_r_true[cij + 1] = head_r_true_elem[1];
1937 tail_r_true[cij] = tail_r_true_elem[0];
1938 tail_r_true[cij + 1] = tail_r_true_elem[1];
1939
1940 /* Copy a_vec to first row of A */
1941 csy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
1942
1943 /* set every column of B to be b_vec */
1944 for (j = 0; j < n_i; j++) {
1945 if (side == blas_left_side)
1946 sge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1947 else
1948 sge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1949 }
1950
1951 /* Fill in rest of matrix A */
1952 cij = incci;
1953 for (i = 1; i < m_i; i++, cij += incci) {
1954 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
1955 BLAS_cdot_s_c_testgen(m_i, i, m_i - i, norm,
1956 blas_no_conj, alpha, 1,
1957 beta, 1, b_vec, a_vec, seed,
1958 c_elem, head_r_true_elem, tail_r_true_elem);
1959
1960 csy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
1961
1962 c_i[cij] = c_elem[0];
1963 c_i[cij + 1] = c_elem[1];
1964 head_r_true[cij] = head_r_true_elem[0];
1965 head_r_true[cij + 1] = head_r_true_elem[1];
1966 tail_r_true[cij] = tail_r_true_elem[0];
1967 tail_r_true[cij + 1] = tail_r_true_elem[1];
1968 }
1969
1970 /* Now fill in c and r_true */
1971 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1972 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
1973 c_elem[0] = c_i[ci];
1974 c_elem[1] = c_i[ci + 1];
1975 c_i[cij] = c_elem[0];
1976 c_i[cij + 1] = c_elem[1];
1977 head_r_true[cij] = head_r_true[ci];
1978 tail_r_true[cij] = tail_r_true[ci];
1979 head_r_true[cij + 1] = head_r_true[ci + 1];
1980 tail_r_true[cij + 1] = tail_r_true[ci + 1];
1981 }
1982 }
1983 } else {
1984
1985
1986 float *bb_vec;
1987
1988
1989 bb_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1990 if (m_i > 0 && bb_vec == NULL) {
1991 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1992 }
1993
1994 if (alpha_flag == 0) {
1995 c_elem[0] = xrand(seed);
1996 c_elem[1] = xrand(seed);
1997 alpha_i[0] = c_elem[0];
1998 alpha_i[0 + 1] = c_elem[1];
1999 }
2000 if (beta_flag == 0) {
2001 c_elem[0] = xrand(seed);
2002 c_elem[1] = xrand(seed);
2003 beta_i[0] = c_elem[0];
2004 beta_i[0 + 1] = c_elem[1];
2005 }
2006
2007 if ((order == blas_colmajor && side == blas_left_side) ||
2008 (order == blas_rowmajor && side == blas_right_side)) {
2009 incai = incbi = 1;
2010 incbij = ldb;
2011 incaij = lda;
2012 } else {
2013 incai = lda;
2014 incbi = ldb;
2015 incaij = incbij = 1;
2016 }
2017
2018
2019
2020 incai *= 2;
2021 incaij *= 2;
2022
2023 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2024 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2025 a_elem[0] = xrand(seed);
2026 a_elem[1] = xrand(seed);
2027 a_i[aij] = a_elem[0];
2028 a_i[aij + 1] = a_elem[1];
2029 }
2030 }
2031
2032 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2033 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2034 b_elem = xrand(seed);
2035 b_i[bij] = b_elem;
2036 }
2037 }
2038
2039 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2040 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2041
2042
2043 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2044
2045 if (side == blas_left_side)
2046 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2047 else
2048 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2049 {
2050 int r;
2051 for (r = 0; r < m_i; r++) {
2052 bb_vec[2 * r] = b_vec[r];
2053 bb_vec[2 * r + 1] = 0.0;
2054 }
2055 }
2056
2057
2058 BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
2059 beta, 1,
2060 bb_vec,
2061 a_vec,
2062 seed, c_elem, head_r_true_elem, tail_r_true_elem);
2063
2064 c_i[cij] = c_elem[0];
2065 c_i[cij + 1] = c_elem[1];
2066 head_r_true[cij] = head_r_true_elem[0];
2067 head_r_true[cij + 1] = head_r_true_elem[1];
2068 tail_r_true[cij] = tail_r_true_elem[0];
2069 tail_r_true[cij + 1] = tail_r_true_elem[1];
2070 }
2071 }
2072
2073
2074 blas_free(bb_vec);
2075 }
2076
2077 blas_free(a_vec);
2078 blas_free(b_vec);
2079 }
BLAS_zsymm_d_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,double * a,int lda,double * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2080 void BLAS_zsymm_d_d_testgen(int norm, enum blas_order_type order,
2081 enum blas_uplo_type uplo,
2082 enum blas_side_type side, int m, int n,
2083 int randomize, void *alpha, int alpha_flag,
2084 void *beta, int beta_flag, double *a, int lda,
2085 double *b, int ldb, void *c, int ldc, int *seed,
2086 double *head_r_true, double *tail_r_true)
2087
2088 /*
2089 * Purpose
2090 * =======
2091 *
2092 * Generates the test inputs to BLAS_zsymm_d_d{_x}
2093 *
2094 * Arguments
2095 * =========
2096 *
2097 * norm (input) int
2098 * = -1: the vectors are scaled with norms near underflow.
2099 * = 0: the vectors have norms of order 1.
2100 * = 1: the vectors are scaled with norms near overflow.
2101 *
2102 * order (input) enum blas_side_type
2103 * storage format of the matrices
2104 *
2105 * uplo (input) enum blas_uplo_type
2106 * which half of the symmetric matrix a is to be stored.
2107 *
2108 * side (input) enum blas_side_type
2109 * which side of matrix b matrix a is to be multiplied.
2110 *
2111 * m n (input) int
2112 * sizes of matrices a, b, c:
2113 * matrix a is m-by-m for left multiplication
2114 * n-by-n otherwise,
2115 * matrices b, c are m-by-n.
2116 *
2117 * randomize (input) int
2118 * if 0, entries in matrices A, B will be chosen for
2119 * maximum cancellation, but with less randomness.
2120 * if 1, every entry in the matrix A, B will be
2121 * random.
2122 *
2123 * alpha (input/output) void*
2124 * if alpha_flag = 1, alpha is input.
2125 * if alpha_flag = 0, alpha is output.
2126 *
2127 * alpha_flag (input) int
2128 * = 0: alpha is free, and is output.
2129 * = 1: alpha is fixed on input.
2130 *
2131 * beta (input/output) void*
2132 * if beta_flag = 1, beta is input.
2133 * if beta_flag = 0, beta is output.
2134 *
2135 * beta_flag (input) int
2136 * = 0: beta is free, and is output.
2137 * = 1: beta is fixed on input.
2138 *
2139 * a (input/output) double*
2140 *
2141 * lda (input) lda
2142 * leading dimension of matrix A.
2143 *
2144 * b (input/output) double*
2145 *
2146 * ldb (input) int
2147 * leading dimension of matrix B.
2148 *
2149 * c (input/output) void*
2150 * generated matrix C that will be used as an input to SYMM.
2151 *
2152 * ldc (input) int
2153 * leading dimension of matrix C.
2154 *
2155 * seed (input/output) int *
2156 * seed for the random number generator.
2157 *
2158 * double (output) *head_r_true
2159 * the leading part of the truth in double-double.
2160 *
2161 * double (output) *tail_r_true
2162 * the trailing part of the truth in double-double
2163 *
2164 */
2165 {
2166
2167 int i, j;
2168 int cij, ci;
2169 int bij, bi;
2170 int aij, ai;
2171 int inccij, incci;
2172 int incbij, incbi;
2173 int incaij, incai;
2174 int inca, incb;
2175 int m_i, n_i;
2176
2177 double c_elem[2];
2178 double a_elem;
2179 double b_elem;
2180 double head_r_true_elem[2], tail_r_true_elem[2];
2181
2182 double *a_vec;
2183 double *b_vec;
2184
2185 double *c_i = (double *) c;
2186 double *alpha_i = (double *) alpha;
2187 double *beta_i = (double *) beta;
2188 double *a_i = a;
2189 double *b_i = b;
2190
2191 if (side == blas_left_side) {
2192 m_i = m;
2193 n_i = n;
2194 } else {
2195 m_i = n;
2196 n_i = m;
2197 }
2198
2199 inca = incb = 1;
2200
2201
2202 a_vec = (double *) blas_malloc(m_i * sizeof(double));
2203 if (m_i > 0 && a_vec == NULL) {
2204 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2205 }
2206 for (i = 0; i < m_i * inca; i += inca) {
2207 a_vec[i] = 0.0;
2208 }
2209 b_vec = (double *) blas_malloc(m_i * sizeof(double));
2210 if (m_i > 0 && b_vec == NULL) {
2211 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2212 }
2213 for (i = 0; i < m_i * incb; i += incb) {
2214 b_vec[i] = 0.0;
2215 }
2216
2217 if ((order == blas_colmajor && side == blas_left_side) ||
2218 (order == blas_rowmajor && side == blas_right_side)) {
2219 incci = 1;
2220 inccij = ldc;
2221 } else {
2222 incci = ldc;
2223 inccij = 1;
2224 }
2225
2226 incci *= 2;
2227 inccij *= 2;
2228
2229
2230 if (randomize == 0) {
2231 /* First fill in the first row of A and the first column/row of B */
2232
2233 BLAS_zdot_d_d_testgen(m_i, 0, 0, norm, blas_no_conj,
2234 alpha, alpha_flag, beta, beta_flag,
2235 b_vec, a_vec, seed, c_elem,
2236 head_r_true_elem, tail_r_true_elem);
2237
2238 cij = 0;
2239 c_i[cij] = c_elem[0];
2240 c_i[cij + 1] = c_elem[1];
2241 head_r_true[cij] = head_r_true_elem[0];
2242 head_r_true[cij + 1] = head_r_true_elem[1];
2243 tail_r_true[cij] = tail_r_true_elem[0];
2244 tail_r_true[cij + 1] = tail_r_true_elem[1];
2245
2246 /* Copy a_vec to first row of A */
2247 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
2248
2249 /* set every column of B to be b_vec */
2250 for (j = 0; j < n_i; j++) {
2251 if (side == blas_left_side)
2252 dge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2253 else
2254 dge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2255 }
2256
2257 /* Fill in rest of matrix A */
2258 cij = incci;
2259 for (i = 1; i < m_i; i++, cij += incci) {
2260 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2261 BLAS_zdot_d_d_testgen(m_i, i, m_i - i, norm,
2262 blas_no_conj, alpha, 1,
2263 beta, 1, b_vec, a_vec, seed,
2264 c_elem, head_r_true_elem, tail_r_true_elem);
2265
2266 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
2267
2268 c_i[cij] = c_elem[0];
2269 c_i[cij + 1] = c_elem[1];
2270 head_r_true[cij] = head_r_true_elem[0];
2271 head_r_true[cij + 1] = head_r_true_elem[1];
2272 tail_r_true[cij] = tail_r_true_elem[0];
2273 tail_r_true[cij + 1] = tail_r_true_elem[1];
2274 }
2275
2276 /* Now fill in c and r_true */
2277 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2278 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
2279 c_elem[0] = c_i[ci];
2280 c_elem[1] = c_i[ci + 1];
2281 c_i[cij] = c_elem[0];
2282 c_i[cij + 1] = c_elem[1];
2283 head_r_true[cij] = head_r_true[ci];
2284 tail_r_true[cij] = tail_r_true[ci];
2285 head_r_true[cij + 1] = head_r_true[ci + 1];
2286 tail_r_true[cij + 1] = tail_r_true[ci + 1];
2287 }
2288 }
2289 } else {
2290
2291 double *aa_vec;
2292 double *bb_vec;
2293
2294 aa_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2295 if (m_i > 0 && aa_vec == NULL) {
2296 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2297 }
2298 bb_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2299 if (m_i > 0 && bb_vec == NULL) {
2300 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2301 }
2302
2303 if (alpha_flag == 0) {
2304 c_elem[0] = (float) xrand(seed);
2305 c_elem[1] = (float) xrand(seed);
2306 alpha_i[0] = c_elem[0];
2307 alpha_i[0 + 1] = c_elem[1];
2308 }
2309 if (beta_flag == 0) {
2310 c_elem[0] = (float) xrand(seed);
2311 c_elem[1] = (float) xrand(seed);
2312 beta_i[0] = c_elem[0];
2313 beta_i[0 + 1] = c_elem[1];
2314 }
2315
2316 if ((order == blas_colmajor && side == blas_left_side) ||
2317 (order == blas_rowmajor && side == blas_right_side)) {
2318 incai = incbi = 1;
2319 incbij = ldb;
2320 incaij = lda;
2321 } else {
2322 incai = lda;
2323 incbi = ldb;
2324 incaij = incbij = 1;
2325 }
2326
2327
2328
2329
2330
2331
2332 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2333 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2334 a_elem = (float) xrand(seed);
2335 a_i[aij] = a_elem;
2336 }
2337 }
2338
2339 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2340 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2341 b_elem = (float) xrand(seed);
2342 b_i[bij] = b_elem;
2343 }
2344 }
2345
2346 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2347 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2348 {
2349 int r;
2350 for (r = 0; r < m_i; r++) {
2351 aa_vec[2 * r] = a_vec[r];
2352 aa_vec[2 * r + 1] = 0.0;
2353 }
2354 }
2355
2356 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2357
2358 if (side == blas_left_side)
2359 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2360 else
2361 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2362 {
2363 int r;
2364 for (r = 0; r < m_i; r++) {
2365 bb_vec[2 * r] = b_vec[r];
2366 bb_vec[2 * r + 1] = 0.0;
2367 }
2368 }
2369
2370
2371 BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
2372 beta, 1,
2373 bb_vec,
2374 aa_vec,
2375 seed, c_elem, head_r_true_elem, tail_r_true_elem);
2376
2377 c_i[cij] = c_elem[0];
2378 c_i[cij + 1] = c_elem[1];
2379 head_r_true[cij] = head_r_true_elem[0];
2380 head_r_true[cij + 1] = head_r_true_elem[1];
2381 tail_r_true[cij] = tail_r_true_elem[0];
2382 tail_r_true[cij + 1] = tail_r_true_elem[1];
2383 }
2384 }
2385
2386 blas_free(aa_vec);
2387 blas_free(bb_vec);
2388 }
2389
2390 blas_free(a_vec);
2391 blas_free(b_vec);
2392 }
BLAS_zsymm_d_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,double * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2393 void BLAS_zsymm_d_z_testgen(int norm, enum blas_order_type order,
2394 enum blas_uplo_type uplo,
2395 enum blas_side_type side, int m, int n,
2396 int randomize, void *alpha, int alpha_flag,
2397 void *beta, int beta_flag, double *a, int lda,
2398 void *b, int ldb, void *c, int ldc, int *seed,
2399 double *head_r_true, double *tail_r_true)
2400
2401 /*
2402 * Purpose
2403 * =======
2404 *
2405 * Generates the test inputs to BLAS_zsymm_d_z{_x}
2406 *
2407 * Arguments
2408 * =========
2409 *
2410 * norm (input) int
2411 * = -1: the vectors are scaled with norms near underflow.
2412 * = 0: the vectors have norms of order 1.
2413 * = 1: the vectors are scaled with norms near overflow.
2414 *
2415 * order (input) enum blas_side_type
2416 * storage format of the matrices
2417 *
2418 * uplo (input) enum blas_uplo_type
2419 * which half of the symmetric matrix a is to be stored.
2420 *
2421 * side (input) enum blas_side_type
2422 * which side of matrix b matrix a is to be multiplied.
2423 *
2424 * m n (input) int
2425 * sizes of matrices a, b, c:
2426 * matrix a is m-by-m for left multiplication
2427 * n-by-n otherwise,
2428 * matrices b, c are m-by-n.
2429 *
2430 * randomize (input) int
2431 * if 0, entries in matrices A, B will be chosen for
2432 * maximum cancellation, but with less randomness.
2433 * if 1, every entry in the matrix A, B will be
2434 * random.
2435 *
2436 * alpha (input/output) void*
2437 * if alpha_flag = 1, alpha is input.
2438 * if alpha_flag = 0, alpha is output.
2439 *
2440 * alpha_flag (input) int
2441 * = 0: alpha is free, and is output.
2442 * = 1: alpha is fixed on input.
2443 *
2444 * beta (input/output) void*
2445 * if beta_flag = 1, beta is input.
2446 * if beta_flag = 0, beta is output.
2447 *
2448 * beta_flag (input) int
2449 * = 0: beta is free, and is output.
2450 * = 1: beta is fixed on input.
2451 *
2452 * a (input/output) double*
2453 *
2454 * lda (input) lda
2455 * leading dimension of matrix A.
2456 *
2457 * b (input/output) void*
2458 *
2459 * ldb (input) int
2460 * leading dimension of matrix B.
2461 *
2462 * c (input/output) void*
2463 * generated matrix C that will be used as an input to SYMM.
2464 *
2465 * ldc (input) int
2466 * leading dimension of matrix C.
2467 *
2468 * seed (input/output) int *
2469 * seed for the random number generator.
2470 *
2471 * double (output) *head_r_true
2472 * the leading part of the truth in double-double.
2473 *
2474 * double (output) *tail_r_true
2475 * the trailing part of the truth in double-double
2476 *
2477 */
2478 {
2479
2480 int i, j;
2481 int cij, ci;
2482 int bij, bi;
2483 int aij, ai;
2484 int inccij, incci;
2485 int incbij, incbi;
2486 int incaij, incai;
2487 int inca, incb;
2488 int m_i, n_i;
2489
2490 double c_elem[2];
2491 double a_elem;
2492 double b_elem[2];
2493 double head_r_true_elem[2], tail_r_true_elem[2];
2494
2495 double *a_vec;
2496 double *b_vec;
2497
2498 double *c_i = (double *) c;
2499 double *alpha_i = (double *) alpha;
2500 double *beta_i = (double *) beta;
2501 double *a_i = a;
2502 double *b_i = (double *) b;
2503
2504 if (side == blas_left_side) {
2505 m_i = m;
2506 n_i = n;
2507 } else {
2508 m_i = n;
2509 n_i = m;
2510 }
2511
2512 inca = incb = 1;
2513
2514 incb *= 2;
2515 a_vec = (double *) blas_malloc(m_i * sizeof(double));
2516 if (m_i > 0 && a_vec == NULL) {
2517 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2518 }
2519 for (i = 0; i < m_i * inca; i += inca) {
2520 a_vec[i] = 0.0;
2521 }
2522 b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2523 if (m_i > 0 && b_vec == NULL) {
2524 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2525 }
2526 for (i = 0; i < m_i * incb; i += incb) {
2527 b_vec[i] = 0.0;
2528 b_vec[i + 1] = 0.0;
2529 }
2530
2531 if ((order == blas_colmajor && side == blas_left_side) ||
2532 (order == blas_rowmajor && side == blas_right_side)) {
2533 incci = 1;
2534 inccij = ldc;
2535 } else {
2536 incci = ldc;
2537 inccij = 1;
2538 }
2539
2540 incci *= 2;
2541 inccij *= 2;
2542
2543
2544 if (randomize == 0) {
2545 /* First fill in the first row of A and the first column/row of B */
2546
2547 BLAS_zdot_z_d_testgen(m_i, 0, 0, norm, blas_no_conj,
2548 alpha, alpha_flag, beta, beta_flag,
2549 b_vec, a_vec, seed, c_elem,
2550 head_r_true_elem, tail_r_true_elem);
2551
2552 cij = 0;
2553 c_i[cij] = c_elem[0];
2554 c_i[cij + 1] = c_elem[1];
2555 head_r_true[cij] = head_r_true_elem[0];
2556 head_r_true[cij + 1] = head_r_true_elem[1];
2557 tail_r_true[cij] = tail_r_true_elem[0];
2558 tail_r_true[cij + 1] = tail_r_true_elem[1];
2559
2560 /* Copy a_vec to first row of A */
2561 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
2562
2563 /* set every column of B to be b_vec */
2564 for (j = 0; j < n_i; j++) {
2565 if (side == blas_left_side)
2566 zge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2567 else
2568 zge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2569 }
2570
2571 /* Fill in rest of matrix A */
2572 cij = incci;
2573 for (i = 1; i < m_i; i++, cij += incci) {
2574 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2575 BLAS_zdot_z_d_testgen(m_i, i, m_i - i, norm,
2576 blas_no_conj, alpha, 1,
2577 beta, 1, b_vec, a_vec, seed,
2578 c_elem, head_r_true_elem, tail_r_true_elem);
2579
2580 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
2581
2582 c_i[cij] = c_elem[0];
2583 c_i[cij + 1] = c_elem[1];
2584 head_r_true[cij] = head_r_true_elem[0];
2585 head_r_true[cij + 1] = head_r_true_elem[1];
2586 tail_r_true[cij] = tail_r_true_elem[0];
2587 tail_r_true[cij + 1] = tail_r_true_elem[1];
2588 }
2589
2590 /* Now fill in c and r_true */
2591 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2592 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
2593 c_elem[0] = c_i[ci];
2594 c_elem[1] = c_i[ci + 1];
2595 c_i[cij] = c_elem[0];
2596 c_i[cij + 1] = c_elem[1];
2597 head_r_true[cij] = head_r_true[ci];
2598 tail_r_true[cij] = tail_r_true[ci];
2599 head_r_true[cij + 1] = head_r_true[ci + 1];
2600 tail_r_true[cij + 1] = tail_r_true[ci + 1];
2601 }
2602 }
2603 } else {
2604
2605 double *aa_vec;
2606
2607
2608 aa_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2609 if (m_i > 0 && aa_vec == NULL) {
2610 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2611 }
2612
2613
2614 if (alpha_flag == 0) {
2615 c_elem[0] = (float) xrand(seed);
2616 c_elem[1] = (float) xrand(seed);
2617 alpha_i[0] = c_elem[0];
2618 alpha_i[0 + 1] = c_elem[1];
2619 }
2620 if (beta_flag == 0) {
2621 c_elem[0] = (float) xrand(seed);
2622 c_elem[1] = (float) xrand(seed);
2623 beta_i[0] = c_elem[0];
2624 beta_i[0 + 1] = c_elem[1];
2625 }
2626
2627 if ((order == blas_colmajor && side == blas_left_side) ||
2628 (order == blas_rowmajor && side == blas_right_side)) {
2629 incai = incbi = 1;
2630 incbij = ldb;
2631 incaij = lda;
2632 } else {
2633 incai = lda;
2634 incbi = ldb;
2635 incaij = incbij = 1;
2636 }
2637
2638 incbi *= 2;
2639 incbij *= 2;
2640
2641
2642
2643 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2644 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2645 a_elem = (float) xrand(seed);
2646 a_i[aij] = a_elem;
2647 }
2648 }
2649
2650 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2651 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2652 b_elem[0] = (float) xrand(seed);
2653 b_elem[1] = (float) xrand(seed);
2654 b_i[bij] = b_elem[0];
2655 b_i[bij + 1] = b_elem[1];
2656 }
2657 }
2658
2659 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2660 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2661 {
2662 int r;
2663 for (r = 0; r < m_i; r++) {
2664 aa_vec[2 * r] = a_vec[r];
2665 aa_vec[2 * r + 1] = 0.0;
2666 }
2667 }
2668
2669 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2670
2671 if (side == blas_left_side)
2672 zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2673 else
2674 zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2675
2676
2677
2678 BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
2679 beta, 1,
2680 b_vec,
2681 aa_vec,
2682 seed, c_elem, head_r_true_elem, tail_r_true_elem);
2683
2684 c_i[cij] = c_elem[0];
2685 c_i[cij + 1] = c_elem[1];
2686 head_r_true[cij] = head_r_true_elem[0];
2687 head_r_true[cij + 1] = head_r_true_elem[1];
2688 tail_r_true[cij] = tail_r_true_elem[0];
2689 tail_r_true[cij + 1] = tail_r_true_elem[1];
2690 }
2691 }
2692
2693 blas_free(aa_vec);
2694
2695 }
2696
2697 blas_free(a_vec);
2698 blas_free(b_vec);
2699 }
BLAS_zsymm_z_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,double * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2700 void BLAS_zsymm_z_d_testgen(int norm, enum blas_order_type order,
2701 enum blas_uplo_type uplo,
2702 enum blas_side_type side, int m, int n,
2703 int randomize, void *alpha, int alpha_flag,
2704 void *beta, int beta_flag, void *a, int lda,
2705 double *b, int ldb, void *c, int ldc, int *seed,
2706 double *head_r_true, double *tail_r_true)
2707
2708 /*
2709 * Purpose
2710 * =======
2711 *
2712 * Generates the test inputs to BLAS_zsymm_z_d{_x}
2713 *
2714 * Arguments
2715 * =========
2716 *
2717 * norm (input) int
2718 * = -1: the vectors are scaled with norms near underflow.
2719 * = 0: the vectors have norms of order 1.
2720 * = 1: the vectors are scaled with norms near overflow.
2721 *
2722 * order (input) enum blas_side_type
2723 * storage format of the matrices
2724 *
2725 * uplo (input) enum blas_uplo_type
2726 * which half of the symmetric matrix a is to be stored.
2727 *
2728 * side (input) enum blas_side_type
2729 * which side of matrix b matrix a is to be multiplied.
2730 *
2731 * m n (input) int
2732 * sizes of matrices a, b, c:
2733 * matrix a is m-by-m for left multiplication
2734 * n-by-n otherwise,
2735 * matrices b, c are m-by-n.
2736 *
2737 * randomize (input) int
2738 * if 0, entries in matrices A, B will be chosen for
2739 * maximum cancellation, but with less randomness.
2740 * if 1, every entry in the matrix A, B will be
2741 * random.
2742 *
2743 * alpha (input/output) void*
2744 * if alpha_flag = 1, alpha is input.
2745 * if alpha_flag = 0, alpha is output.
2746 *
2747 * alpha_flag (input) int
2748 * = 0: alpha is free, and is output.
2749 * = 1: alpha is fixed on input.
2750 *
2751 * beta (input/output) void*
2752 * if beta_flag = 1, beta is input.
2753 * if beta_flag = 0, beta is output.
2754 *
2755 * beta_flag (input) int
2756 * = 0: beta is free, and is output.
2757 * = 1: beta is fixed on input.
2758 *
2759 * a (input/output) void*
2760 *
2761 * lda (input) lda
2762 * leading dimension of matrix A.
2763 *
2764 * b (input/output) double*
2765 *
2766 * ldb (input) int
2767 * leading dimension of matrix B.
2768 *
2769 * c (input/output) void*
2770 * generated matrix C that will be used as an input to SYMM.
2771 *
2772 * ldc (input) int
2773 * leading dimension of matrix C.
2774 *
2775 * seed (input/output) int *
2776 * seed for the random number generator.
2777 *
2778 * double (output) *head_r_true
2779 * the leading part of the truth in double-double.
2780 *
2781 * double (output) *tail_r_true
2782 * the trailing part of the truth in double-double
2783 *
2784 */
2785 {
2786
2787 int i, j;
2788 int cij, ci;
2789 int bij, bi;
2790 int aij, ai;
2791 int inccij, incci;
2792 int incbij, incbi;
2793 int incaij, incai;
2794 int inca, incb;
2795 int m_i, n_i;
2796
2797 double c_elem[2];
2798 double a_elem[2];
2799 double b_elem;
2800 double head_r_true_elem[2], tail_r_true_elem[2];
2801
2802 double *a_vec;
2803 double *b_vec;
2804
2805 double *c_i = (double *) c;
2806 double *alpha_i = (double *) alpha;
2807 double *beta_i = (double *) beta;
2808 double *a_i = (double *) a;
2809 double *b_i = b;
2810
2811 if (side == blas_left_side) {
2812 m_i = m;
2813 n_i = n;
2814 } else {
2815 m_i = n;
2816 n_i = m;
2817 }
2818
2819 inca = incb = 1;
2820 inca *= 2;
2821
2822 a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2823 if (m_i > 0 && a_vec == NULL) {
2824 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2825 }
2826 for (i = 0; i < m_i * inca; i += inca) {
2827 a_vec[i] = 0.0;
2828 a_vec[i + 1] = 0.0;
2829 }
2830 b_vec = (double *) blas_malloc(m_i * sizeof(double));
2831 if (m_i > 0 && b_vec == NULL) {
2832 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2833 }
2834 for (i = 0; i < m_i * incb; i += incb) {
2835 b_vec[i] = 0.0;
2836 }
2837
2838 if ((order == blas_colmajor && side == blas_left_side) ||
2839 (order == blas_rowmajor && side == blas_right_side)) {
2840 incci = 1;
2841 inccij = ldc;
2842 } else {
2843 incci = ldc;
2844 inccij = 1;
2845 }
2846
2847 incci *= 2;
2848 inccij *= 2;
2849
2850
2851 if (randomize == 0) {
2852 /* First fill in the first row of A and the first column/row of B */
2853
2854 BLAS_zdot_d_z_testgen(m_i, 0, 0, norm, blas_no_conj,
2855 alpha, alpha_flag, beta, beta_flag,
2856 b_vec, a_vec, seed, c_elem,
2857 head_r_true_elem, tail_r_true_elem);
2858
2859 cij = 0;
2860 c_i[cij] = c_elem[0];
2861 c_i[cij + 1] = c_elem[1];
2862 head_r_true[cij] = head_r_true_elem[0];
2863 head_r_true[cij + 1] = head_r_true_elem[1];
2864 tail_r_true[cij] = tail_r_true_elem[0];
2865 tail_r_true[cij + 1] = tail_r_true_elem[1];
2866
2867 /* Copy a_vec to first row of A */
2868 zsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
2869
2870 /* set every column of B to be b_vec */
2871 for (j = 0; j < n_i; j++) {
2872 if (side == blas_left_side)
2873 dge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2874 else
2875 dge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2876 }
2877
2878 /* Fill in rest of matrix A */
2879 cij = incci;
2880 for (i = 1; i < m_i; i++, cij += incci) {
2881 zsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2882 BLAS_zdot_d_z_testgen(m_i, i, m_i - i, norm,
2883 blas_no_conj, alpha, 1,
2884 beta, 1, b_vec, a_vec, seed,
2885 c_elem, head_r_true_elem, tail_r_true_elem);
2886
2887 zsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
2888
2889 c_i[cij] = c_elem[0];
2890 c_i[cij + 1] = c_elem[1];
2891 head_r_true[cij] = head_r_true_elem[0];
2892 head_r_true[cij + 1] = head_r_true_elem[1];
2893 tail_r_true[cij] = tail_r_true_elem[0];
2894 tail_r_true[cij + 1] = tail_r_true_elem[1];
2895 }
2896
2897 /* Now fill in c and r_true */
2898 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2899 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
2900 c_elem[0] = c_i[ci];
2901 c_elem[1] = c_i[ci + 1];
2902 c_i[cij] = c_elem[0];
2903 c_i[cij + 1] = c_elem[1];
2904 head_r_true[cij] = head_r_true[ci];
2905 tail_r_true[cij] = tail_r_true[ci];
2906 head_r_true[cij + 1] = head_r_true[ci + 1];
2907 tail_r_true[cij + 1] = tail_r_true[ci + 1];
2908 }
2909 }
2910 } else {
2911
2912
2913 double *bb_vec;
2914
2915
2916 bb_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2917 if (m_i > 0 && bb_vec == NULL) {
2918 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2919 }
2920
2921 if (alpha_flag == 0) {
2922 c_elem[0] = xrand(seed);
2923 c_elem[1] = xrand(seed);
2924 alpha_i[0] = c_elem[0];
2925 alpha_i[0 + 1] = c_elem[1];
2926 }
2927 if (beta_flag == 0) {
2928 c_elem[0] = xrand(seed);
2929 c_elem[1] = xrand(seed);
2930 beta_i[0] = c_elem[0];
2931 beta_i[0 + 1] = c_elem[1];
2932 }
2933
2934 if ((order == blas_colmajor && side == blas_left_side) ||
2935 (order == blas_rowmajor && side == blas_right_side)) {
2936 incai = incbi = 1;
2937 incbij = ldb;
2938 incaij = lda;
2939 } else {
2940 incai = lda;
2941 incbi = ldb;
2942 incaij = incbij = 1;
2943 }
2944
2945
2946
2947 incai *= 2;
2948 incaij *= 2;
2949
2950 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2951 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2952 a_elem[0] = xrand(seed);
2953 a_elem[1] = xrand(seed);
2954 a_i[aij] = a_elem[0];
2955 a_i[aij + 1] = a_elem[1];
2956 }
2957 }
2958
2959 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2960 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2961 b_elem = xrand(seed);
2962 b_i[bij] = b_elem;
2963 }
2964 }
2965
2966 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2967 zsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
2968
2969
2970 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2971
2972 if (side == blas_left_side)
2973 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2974 else
2975 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2976 {
2977 int r;
2978 for (r = 0; r < m_i; r++) {
2979 bb_vec[2 * r] = b_vec[r];
2980 bb_vec[2 * r + 1] = 0.0;
2981 }
2982 }
2983
2984
2985 BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
2986 beta, 1,
2987 bb_vec,
2988 a_vec,
2989 seed, c_elem, head_r_true_elem, tail_r_true_elem);
2990
2991 c_i[cij] = c_elem[0];
2992 c_i[cij + 1] = c_elem[1];
2993 head_r_true[cij] = head_r_true_elem[0];
2994 head_r_true[cij + 1] = head_r_true_elem[1];
2995 tail_r_true[cij] = tail_r_true_elem[0];
2996 tail_r_true[cij + 1] = tail_r_true_elem[1];
2997 }
2998 }
2999
3000
3001 blas_free(bb_vec);
3002 }
3003
3004 blas_free(a_vec);
3005 blas_free(b_vec);
3006 }
BLAS_dsymm_s_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,double * alpha,int alpha_flag,double * beta,int beta_flag,float * a,int lda,float * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3007 void BLAS_dsymm_s_s_testgen(int norm, enum blas_order_type order,
3008 enum blas_uplo_type uplo,
3009 enum blas_side_type side, int m, int n,
3010 int randomize, double *alpha, int alpha_flag,
3011 double *beta, int beta_flag, float *a, int lda,
3012 float *b, int ldb, double *c, int ldc, int *seed,
3013 double *head_r_true, double *tail_r_true)
3014
3015 /*
3016 * Purpose
3017 * =======
3018 *
3019 * Generates the test inputs to BLAS_dsymm_s_s{_x}
3020 *
3021 * Arguments
3022 * =========
3023 *
3024 * norm (input) int
3025 * = -1: the vectors are scaled with norms near underflow.
3026 * = 0: the vectors have norms of order 1.
3027 * = 1: the vectors are scaled with norms near overflow.
3028 *
3029 * order (input) enum blas_side_type
3030 * storage format of the matrices
3031 *
3032 * uplo (input) enum blas_uplo_type
3033 * which half of the symmetric matrix a is to be stored.
3034 *
3035 * side (input) enum blas_side_type
3036 * which side of matrix b matrix a is to be multiplied.
3037 *
3038 * m n (input) int
3039 * sizes of matrices a, b, c:
3040 * matrix a is m-by-m for left multiplication
3041 * n-by-n otherwise,
3042 * matrices b, c are m-by-n.
3043 *
3044 * randomize (input) int
3045 * if 0, entries in matrices A, B will be chosen for
3046 * maximum cancellation, but with less randomness.
3047 * if 1, every entry in the matrix A, B will be
3048 * random.
3049 *
3050 * alpha (input/output) double*
3051 * if alpha_flag = 1, alpha is input.
3052 * if alpha_flag = 0, alpha is output.
3053 *
3054 * alpha_flag (input) int
3055 * = 0: alpha is free, and is output.
3056 * = 1: alpha is fixed on input.
3057 *
3058 * beta (input/output) double*
3059 * if beta_flag = 1, beta is input.
3060 * if beta_flag = 0, beta is output.
3061 *
3062 * beta_flag (input) int
3063 * = 0: beta is free, and is output.
3064 * = 1: beta is fixed on input.
3065 *
3066 * a (input/output) float*
3067 *
3068 * lda (input) lda
3069 * leading dimension of matrix A.
3070 *
3071 * b (input/output) float*
3072 *
3073 * ldb (input) int
3074 * leading dimension of matrix B.
3075 *
3076 * c (input/output) double*
3077 * generated matrix C that will be used as an input to SYMM.
3078 *
3079 * ldc (input) int
3080 * leading dimension of matrix C.
3081 *
3082 * seed (input/output) int *
3083 * seed for the random number generator.
3084 *
3085 * double (output) *head_r_true
3086 * the leading part of the truth in double-double.
3087 *
3088 * double (output) *tail_r_true
3089 * the trailing part of the truth in double-double
3090 *
3091 */
3092 {
3093
3094 int i, j;
3095 int cij, ci;
3096 int bij, bi;
3097 int aij, ai;
3098 int inccij, incci;
3099 int incbij, incbi;
3100 int incaij, incai;
3101 int inca, incb;
3102 int m_i, n_i;
3103
3104 double c_elem;
3105 float a_elem;
3106 float b_elem;
3107 double head_r_true_elem, tail_r_true_elem;
3108
3109 float *a_vec;
3110 float *b_vec;
3111
3112 double *c_i = c;
3113 double *alpha_i = alpha;
3114 double *beta_i = beta;
3115 float *a_i = a;
3116 float *b_i = b;
3117
3118 if (side == blas_left_side) {
3119 m_i = m;
3120 n_i = n;
3121 } else {
3122 m_i = n;
3123 n_i = m;
3124 }
3125
3126 inca = incb = 1;
3127
3128
3129 a_vec = (float *) blas_malloc(m_i * sizeof(float));
3130 if (m_i > 0 && a_vec == NULL) {
3131 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3132 }
3133 for (i = 0; i < m_i * inca; i += inca) {
3134 a_vec[i] = 0.0;
3135 }
3136 b_vec = (float *) blas_malloc(m_i * sizeof(float));
3137 if (m_i > 0 && b_vec == NULL) {
3138 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3139 }
3140 for (i = 0; i < m_i * incb; i += incb) {
3141 b_vec[i] = 0.0;
3142 }
3143
3144 if ((order == blas_colmajor && side == blas_left_side) ||
3145 (order == blas_rowmajor && side == blas_right_side)) {
3146 incci = 1;
3147 inccij = ldc;
3148 } else {
3149 incci = ldc;
3150 inccij = 1;
3151 }
3152
3153
3154
3155
3156
3157 if (randomize == 0) {
3158 /* First fill in the first row of A and the first column/row of B */
3159
3160 BLAS_ddot_s_s_testgen(m_i, 0, 0, norm, blas_no_conj,
3161 alpha, alpha_flag, beta, beta_flag,
3162 b_vec, a_vec, seed, &c_elem,
3163 &head_r_true_elem, &tail_r_true_elem);
3164
3165 cij = 0;
3166 c_i[cij] = c_elem;
3167 head_r_true[cij] = head_r_true_elem;
3168 tail_r_true[cij] = tail_r_true_elem;
3169
3170 /* Copy a_vec to first row of A */
3171 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
3172
3173 /* set every column of B to be b_vec */
3174 for (j = 0; j < n_i; j++) {
3175 if (side == blas_left_side)
3176 sge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3177 else
3178 sge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3179 }
3180
3181 /* Fill in rest of matrix A */
3182 cij = incci;
3183 for (i = 1; i < m_i; i++, cij += incci) {
3184 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
3185 BLAS_ddot_s_s_testgen(m_i, i, m_i - i, norm,
3186 blas_no_conj, alpha, 1,
3187 beta, 1, b_vec, a_vec, seed,
3188 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3189
3190 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
3191
3192 c_i[cij] = c_elem;
3193 head_r_true[cij] = head_r_true_elem;
3194 tail_r_true[cij] = tail_r_true_elem;
3195 }
3196
3197 /* Now fill in c and r_true */
3198 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3199 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3200 c_elem = c_i[ci];
3201 c_i[cij] = c_elem;
3202 head_r_true[cij] = head_r_true[ci];
3203 tail_r_true[cij] = tail_r_true[ci];
3204 }
3205 }
3206 } else {
3207
3208
3209
3210
3211
3212
3213
3214 if (alpha_flag == 0) {
3215 c_elem = (float) xrand(seed);
3216 alpha_i[0] = c_elem;
3217 }
3218 if (beta_flag == 0) {
3219 c_elem = (float) xrand(seed);
3220 beta_i[0] = c_elem;
3221 }
3222
3223 if ((order == blas_colmajor && side == blas_left_side) ||
3224 (order == blas_rowmajor && side == blas_right_side)) {
3225 incai = incbi = 1;
3226 incbij = ldb;
3227 incaij = lda;
3228 } else {
3229 incai = lda;
3230 incbi = ldb;
3231 incaij = incbij = 1;
3232 }
3233
3234
3235
3236
3237
3238
3239 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3240 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3241 a_elem = (float) xrand(seed);
3242 a_i[aij] = a_elem;
3243 }
3244 }
3245
3246 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3247 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3248 b_elem = (float) xrand(seed);
3249 b_i[bij] = b_elem;
3250 }
3251 }
3252
3253 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3254 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
3255
3256
3257 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3258
3259 if (side == blas_left_side)
3260 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3261 else
3262 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3263
3264
3265
3266 BLAS_ddot_s_s_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
3267 beta, 1, b_vec, a_vec, seed,
3268 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3269
3270 c_i[cij] = c_elem;
3271 head_r_true[cij] = head_r_true_elem;
3272 tail_r_true[cij] = tail_r_true_elem;
3273 }
3274 }
3275
3276
3277
3278 }
3279
3280 blas_free(a_vec);
3281 blas_free(b_vec);
3282 }
BLAS_dsymm_s_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,double * alpha,int alpha_flag,double * beta,int beta_flag,float * a,int lda,double * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3283 void BLAS_dsymm_s_d_testgen(int norm, enum blas_order_type order,
3284 enum blas_uplo_type uplo,
3285 enum blas_side_type side, int m, int n,
3286 int randomize, double *alpha, int alpha_flag,
3287 double *beta, int beta_flag, float *a, int lda,
3288 double *b, int ldb, double *c, int ldc, int *seed,
3289 double *head_r_true, double *tail_r_true)
3290
3291 /*
3292 * Purpose
3293 * =======
3294 *
3295 * Generates the test inputs to BLAS_dsymm_s_d{_x}
3296 *
3297 * Arguments
3298 * =========
3299 *
3300 * norm (input) int
3301 * = -1: the vectors are scaled with norms near underflow.
3302 * = 0: the vectors have norms of order 1.
3303 * = 1: the vectors are scaled with norms near overflow.
3304 *
3305 * order (input) enum blas_side_type
3306 * storage format of the matrices
3307 *
3308 * uplo (input) enum blas_uplo_type
3309 * which half of the symmetric matrix a is to be stored.
3310 *
3311 * side (input) enum blas_side_type
3312 * which side of matrix b matrix a is to be multiplied.
3313 *
3314 * m n (input) int
3315 * sizes of matrices a, b, c:
3316 * matrix a is m-by-m for left multiplication
3317 * n-by-n otherwise,
3318 * matrices b, c are m-by-n.
3319 *
3320 * randomize (input) int
3321 * if 0, entries in matrices A, B will be chosen for
3322 * maximum cancellation, but with less randomness.
3323 * if 1, every entry in the matrix A, B will be
3324 * random.
3325 *
3326 * alpha (input/output) double*
3327 * if alpha_flag = 1, alpha is input.
3328 * if alpha_flag = 0, alpha is output.
3329 *
3330 * alpha_flag (input) int
3331 * = 0: alpha is free, and is output.
3332 * = 1: alpha is fixed on input.
3333 *
3334 * beta (input/output) double*
3335 * if beta_flag = 1, beta is input.
3336 * if beta_flag = 0, beta is output.
3337 *
3338 * beta_flag (input) int
3339 * = 0: beta is free, and is output.
3340 * = 1: beta is fixed on input.
3341 *
3342 * a (input/output) float*
3343 *
3344 * lda (input) lda
3345 * leading dimension of matrix A.
3346 *
3347 * b (input/output) double*
3348 *
3349 * ldb (input) int
3350 * leading dimension of matrix B.
3351 *
3352 * c (input/output) double*
3353 * generated matrix C that will be used as an input to SYMM.
3354 *
3355 * ldc (input) int
3356 * leading dimension of matrix C.
3357 *
3358 * seed (input/output) int *
3359 * seed for the random number generator.
3360 *
3361 * double (output) *head_r_true
3362 * the leading part of the truth in double-double.
3363 *
3364 * double (output) *tail_r_true
3365 * the trailing part of the truth in double-double
3366 *
3367 */
3368 {
3369
3370 int i, j;
3371 int cij, ci;
3372 int bij, bi;
3373 int aij, ai;
3374 int inccij, incci;
3375 int incbij, incbi;
3376 int incaij, incai;
3377 int inca, incb;
3378 int m_i, n_i;
3379
3380 double c_elem;
3381 float a_elem;
3382 double b_elem;
3383 double head_r_true_elem, tail_r_true_elem;
3384
3385 float *a_vec;
3386 double *b_vec;
3387
3388 double *c_i = c;
3389 double *alpha_i = alpha;
3390 double *beta_i = beta;
3391 float *a_i = a;
3392 double *b_i = b;
3393
3394 if (side == blas_left_side) {
3395 m_i = m;
3396 n_i = n;
3397 } else {
3398 m_i = n;
3399 n_i = m;
3400 }
3401
3402 inca = incb = 1;
3403
3404
3405 a_vec = (float *) blas_malloc(m_i * sizeof(float));
3406 if (m_i > 0 && a_vec == NULL) {
3407 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3408 }
3409 for (i = 0; i < m_i * inca; i += inca) {
3410 a_vec[i] = 0.0;
3411 }
3412 b_vec = (double *) blas_malloc(m_i * sizeof(double));
3413 if (m_i > 0 && b_vec == NULL) {
3414 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3415 }
3416 for (i = 0; i < m_i * incb; i += incb) {
3417 b_vec[i] = 0.0;
3418 }
3419
3420 if ((order == blas_colmajor && side == blas_left_side) ||
3421 (order == blas_rowmajor && side == blas_right_side)) {
3422 incci = 1;
3423 inccij = ldc;
3424 } else {
3425 incci = ldc;
3426 inccij = 1;
3427 }
3428
3429
3430
3431
3432
3433 if (randomize == 0) {
3434 /* First fill in the first row of A and the first column/row of B */
3435
3436 BLAS_ddot_d_s_testgen(m_i, 0, 0, norm, blas_no_conj,
3437 alpha, alpha_flag, beta, beta_flag,
3438 b_vec, a_vec, seed, &c_elem,
3439 &head_r_true_elem, &tail_r_true_elem);
3440
3441 cij = 0;
3442 c_i[cij] = c_elem;
3443 head_r_true[cij] = head_r_true_elem;
3444 tail_r_true[cij] = tail_r_true_elem;
3445
3446 /* Copy a_vec to first row of A */
3447 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
3448
3449 /* set every column of B to be b_vec */
3450 for (j = 0; j < n_i; j++) {
3451 if (side == blas_left_side)
3452 dge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3453 else
3454 dge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3455 }
3456
3457 /* Fill in rest of matrix A */
3458 cij = incci;
3459 for (i = 1; i < m_i; i++, cij += incci) {
3460 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
3461 BLAS_ddot_d_s_testgen(m_i, i, m_i - i, norm,
3462 blas_no_conj, alpha, 1,
3463 beta, 1, b_vec, a_vec, seed,
3464 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3465
3466 ssy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
3467
3468 c_i[cij] = c_elem;
3469 head_r_true[cij] = head_r_true_elem;
3470 tail_r_true[cij] = tail_r_true_elem;
3471 }
3472
3473 /* Now fill in c and r_true */
3474 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3475 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3476 c_elem = c_i[ci];
3477 c_i[cij] = c_elem;
3478 head_r_true[cij] = head_r_true[ci];
3479 tail_r_true[cij] = tail_r_true[ci];
3480 }
3481 }
3482 } else {
3483
3484
3485
3486
3487
3488
3489
3490 if (alpha_flag == 0) {
3491 c_elem = (float) xrand(seed);
3492 alpha_i[0] = c_elem;
3493 }
3494 if (beta_flag == 0) {
3495 c_elem = (float) xrand(seed);
3496 beta_i[0] = c_elem;
3497 }
3498
3499 if ((order == blas_colmajor && side == blas_left_side) ||
3500 (order == blas_rowmajor && side == blas_right_side)) {
3501 incai = incbi = 1;
3502 incbij = ldb;
3503 incaij = lda;
3504 } else {
3505 incai = lda;
3506 incbi = ldb;
3507 incaij = incbij = 1;
3508 }
3509
3510
3511
3512
3513
3514
3515 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3516 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3517 a_elem = (float) xrand(seed);
3518 a_i[aij] = a_elem;
3519 }
3520 }
3521
3522 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3523 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3524 b_elem = (float) xrand(seed);
3525 b_i[bij] = b_elem;
3526 }
3527 }
3528
3529 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3530 ssy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
3531
3532
3533 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3534
3535 if (side == blas_left_side)
3536 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3537 else
3538 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3539
3540
3541
3542 BLAS_ddot_d_s_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
3543 beta, 1, b_vec, a_vec, seed,
3544 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3545
3546 c_i[cij] = c_elem;
3547 head_r_true[cij] = head_r_true_elem;
3548 tail_r_true[cij] = tail_r_true_elem;
3549 }
3550 }
3551
3552
3553
3554 }
3555
3556 blas_free(a_vec);
3557 blas_free(b_vec);
3558 }
BLAS_dsymm_d_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,double * alpha,int alpha_flag,double * beta,int beta_flag,double * a,int lda,float * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3559 void BLAS_dsymm_d_s_testgen(int norm, enum blas_order_type order,
3560 enum blas_uplo_type uplo,
3561 enum blas_side_type side, int m, int n,
3562 int randomize, double *alpha, int alpha_flag,
3563 double *beta, int beta_flag, double *a, int lda,
3564 float *b, int ldb, double *c, int ldc, int *seed,
3565 double *head_r_true, double *tail_r_true)
3566
3567 /*
3568 * Purpose
3569 * =======
3570 *
3571 * Generates the test inputs to BLAS_dsymm_d_s{_x}
3572 *
3573 * Arguments
3574 * =========
3575 *
3576 * norm (input) int
3577 * = -1: the vectors are scaled with norms near underflow.
3578 * = 0: the vectors have norms of order 1.
3579 * = 1: the vectors are scaled with norms near overflow.
3580 *
3581 * order (input) enum blas_side_type
3582 * storage format of the matrices
3583 *
3584 * uplo (input) enum blas_uplo_type
3585 * which half of the symmetric matrix a is to be stored.
3586 *
3587 * side (input) enum blas_side_type
3588 * which side of matrix b matrix a is to be multiplied.
3589 *
3590 * m n (input) int
3591 * sizes of matrices a, b, c:
3592 * matrix a is m-by-m for left multiplication
3593 * n-by-n otherwise,
3594 * matrices b, c are m-by-n.
3595 *
3596 * randomize (input) int
3597 * if 0, entries in matrices A, B will be chosen for
3598 * maximum cancellation, but with less randomness.
3599 * if 1, every entry in the matrix A, B will be
3600 * random.
3601 *
3602 * alpha (input/output) double*
3603 * if alpha_flag = 1, alpha is input.
3604 * if alpha_flag = 0, alpha is output.
3605 *
3606 * alpha_flag (input) int
3607 * = 0: alpha is free, and is output.
3608 * = 1: alpha is fixed on input.
3609 *
3610 * beta (input/output) double*
3611 * if beta_flag = 1, beta is input.
3612 * if beta_flag = 0, beta is output.
3613 *
3614 * beta_flag (input) int
3615 * = 0: beta is free, and is output.
3616 * = 1: beta is fixed on input.
3617 *
3618 * a (input/output) double*
3619 *
3620 * lda (input) lda
3621 * leading dimension of matrix A.
3622 *
3623 * b (input/output) float*
3624 *
3625 * ldb (input) int
3626 * leading dimension of matrix B.
3627 *
3628 * c (input/output) double*
3629 * generated matrix C that will be used as an input to SYMM.
3630 *
3631 * ldc (input) int
3632 * leading dimension of matrix C.
3633 *
3634 * seed (input/output) int *
3635 * seed for the random number generator.
3636 *
3637 * double (output) *head_r_true
3638 * the leading part of the truth in double-double.
3639 *
3640 * double (output) *tail_r_true
3641 * the trailing part of the truth in double-double
3642 *
3643 */
3644 {
3645
3646 int i, j;
3647 int cij, ci;
3648 int bij, bi;
3649 int aij, ai;
3650 int inccij, incci;
3651 int incbij, incbi;
3652 int incaij, incai;
3653 int inca, incb;
3654 int m_i, n_i;
3655
3656 double c_elem;
3657 double a_elem;
3658 float b_elem;
3659 double head_r_true_elem, tail_r_true_elem;
3660
3661 double *a_vec;
3662 float *b_vec;
3663
3664 double *c_i = c;
3665 double *alpha_i = alpha;
3666 double *beta_i = beta;
3667 double *a_i = a;
3668 float *b_i = b;
3669
3670 if (side == blas_left_side) {
3671 m_i = m;
3672 n_i = n;
3673 } else {
3674 m_i = n;
3675 n_i = m;
3676 }
3677
3678 inca = incb = 1;
3679
3680
3681 a_vec = (double *) blas_malloc(m_i * sizeof(double));
3682 if (m_i > 0 && a_vec == NULL) {
3683 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3684 }
3685 for (i = 0; i < m_i * inca; i += inca) {
3686 a_vec[i] = 0.0;
3687 }
3688 b_vec = (float *) blas_malloc(m_i * sizeof(float));
3689 if (m_i > 0 && b_vec == NULL) {
3690 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3691 }
3692 for (i = 0; i < m_i * incb; i += incb) {
3693 b_vec[i] = 0.0;
3694 }
3695
3696 if ((order == blas_colmajor && side == blas_left_side) ||
3697 (order == blas_rowmajor && side == blas_right_side)) {
3698 incci = 1;
3699 inccij = ldc;
3700 } else {
3701 incci = ldc;
3702 inccij = 1;
3703 }
3704
3705
3706
3707
3708
3709 if (randomize == 0) {
3710 /* First fill in the first row of A and the first column/row of B */
3711
3712 BLAS_ddot_s_d_testgen(m_i, 0, 0, norm, blas_no_conj,
3713 alpha, alpha_flag, beta, beta_flag,
3714 b_vec, a_vec, seed, &c_elem,
3715 &head_r_true_elem, &tail_r_true_elem);
3716
3717 cij = 0;
3718 c_i[cij] = c_elem;
3719 head_r_true[cij] = head_r_true_elem;
3720 tail_r_true[cij] = tail_r_true_elem;
3721
3722 /* Copy a_vec to first row of A */
3723 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
3724
3725 /* set every column of B to be b_vec */
3726 for (j = 0; j < n_i; j++) {
3727 if (side == blas_left_side)
3728 sge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3729 else
3730 sge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3731 }
3732
3733 /* Fill in rest of matrix A */
3734 cij = incci;
3735 for (i = 1; i < m_i; i++, cij += incci) {
3736 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
3737 BLAS_ddot_s_d_testgen(m_i, i, m_i - i, norm,
3738 blas_no_conj, alpha, 1,
3739 beta, 1, b_vec, a_vec, seed,
3740 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3741
3742 dsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
3743
3744 c_i[cij] = c_elem;
3745 head_r_true[cij] = head_r_true_elem;
3746 tail_r_true[cij] = tail_r_true_elem;
3747 }
3748
3749 /* Now fill in c and r_true */
3750 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3751 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3752 c_elem = c_i[ci];
3753 c_i[cij] = c_elem;
3754 head_r_true[cij] = head_r_true[ci];
3755 tail_r_true[cij] = tail_r_true[ci];
3756 }
3757 }
3758 } else {
3759
3760
3761
3762
3763
3764
3765
3766 if (alpha_flag == 0) {
3767 c_elem = (float) xrand(seed);
3768 alpha_i[0] = c_elem;
3769 }
3770 if (beta_flag == 0) {
3771 c_elem = (float) xrand(seed);
3772 beta_i[0] = c_elem;
3773 }
3774
3775 if ((order == blas_colmajor && side == blas_left_side) ||
3776 (order == blas_rowmajor && side == blas_right_side)) {
3777 incai = incbi = 1;
3778 incbij = ldb;
3779 incaij = lda;
3780 } else {
3781 incai = lda;
3782 incbi = ldb;
3783 incaij = incbij = 1;
3784 }
3785
3786
3787
3788
3789
3790
3791 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3792 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3793 a_elem = (float) xrand(seed);
3794 a_i[aij] = a_elem;
3795 }
3796 }
3797
3798 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3799 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3800 b_elem = (float) xrand(seed);
3801 b_i[bij] = b_elem;
3802 }
3803 }
3804
3805 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3806 dsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
3807
3808
3809 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3810
3811 if (side == blas_left_side)
3812 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3813 else
3814 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3815
3816
3817
3818 BLAS_ddot_s_d_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
3819 beta, 1, b_vec, a_vec, seed,
3820 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3821
3822 c_i[cij] = c_elem;
3823 head_r_true[cij] = head_r_true_elem;
3824 tail_r_true[cij] = tail_r_true_elem;
3825 }
3826 }
3827
3828
3829
3830 }
3831
3832 blas_free(a_vec);
3833 blas_free(b_vec);
3834 }
BLAS_zsymm_c_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3835 void BLAS_zsymm_c_c_testgen(int norm, enum blas_order_type order,
3836 enum blas_uplo_type uplo,
3837 enum blas_side_type side, int m, int n,
3838 int randomize, void *alpha, int alpha_flag,
3839 void *beta, int beta_flag, void *a, int lda,
3840 void *b, int ldb, void *c, int ldc, int *seed,
3841 double *head_r_true, double *tail_r_true)
3842
3843 /*
3844 * Purpose
3845 * =======
3846 *
3847 * Generates the test inputs to BLAS_zsymm_c_c{_x}
3848 *
3849 * Arguments
3850 * =========
3851 *
3852 * norm (input) int
3853 * = -1: the vectors are scaled with norms near underflow.
3854 * = 0: the vectors have norms of order 1.
3855 * = 1: the vectors are scaled with norms near overflow.
3856 *
3857 * order (input) enum blas_side_type
3858 * storage format of the matrices
3859 *
3860 * uplo (input) enum blas_uplo_type
3861 * which half of the symmetric matrix a is to be stored.
3862 *
3863 * side (input) enum blas_side_type
3864 * which side of matrix b matrix a is to be multiplied.
3865 *
3866 * m n (input) int
3867 * sizes of matrices a, b, c:
3868 * matrix a is m-by-m for left multiplication
3869 * n-by-n otherwise,
3870 * matrices b, c are m-by-n.
3871 *
3872 * randomize (input) int
3873 * if 0, entries in matrices A, B will be chosen for
3874 * maximum cancellation, but with less randomness.
3875 * if 1, every entry in the matrix A, B will be
3876 * random.
3877 *
3878 * alpha (input/output) void*
3879 * if alpha_flag = 1, alpha is input.
3880 * if alpha_flag = 0, alpha is output.
3881 *
3882 * alpha_flag (input) int
3883 * = 0: alpha is free, and is output.
3884 * = 1: alpha is fixed on input.
3885 *
3886 * beta (input/output) void*
3887 * if beta_flag = 1, beta is input.
3888 * if beta_flag = 0, beta is output.
3889 *
3890 * beta_flag (input) int
3891 * = 0: beta is free, and is output.
3892 * = 1: beta is fixed on input.
3893 *
3894 * a (input/output) void*
3895 *
3896 * lda (input) lda
3897 * leading dimension of matrix A.
3898 *
3899 * b (input/output) void*
3900 *
3901 * ldb (input) int
3902 * leading dimension of matrix B.
3903 *
3904 * c (input/output) void*
3905 * generated matrix C that will be used as an input to SYMM.
3906 *
3907 * ldc (input) int
3908 * leading dimension of matrix C.
3909 *
3910 * seed (input/output) int *
3911 * seed for the random number generator.
3912 *
3913 * double (output) *head_r_true
3914 * the leading part of the truth in double-double.
3915 *
3916 * double (output) *tail_r_true
3917 * the trailing part of the truth in double-double
3918 *
3919 */
3920 {
3921
3922 int i, j;
3923 int cij, ci;
3924 int bij, bi;
3925 int aij, ai;
3926 int inccij, incci;
3927 int incbij, incbi;
3928 int incaij, incai;
3929 int inca, incb;
3930 int m_i, n_i;
3931
3932 double c_elem[2];
3933 float a_elem[2];
3934 float b_elem[2];
3935 double head_r_true_elem[2], tail_r_true_elem[2];
3936
3937 float *a_vec;
3938 float *b_vec;
3939
3940 double *c_i = (double *) c;
3941 double *alpha_i = (double *) alpha;
3942 double *beta_i = (double *) beta;
3943 float *a_i = (float *) a;
3944 float *b_i = (float *) b;
3945
3946 if (side == blas_left_side) {
3947 m_i = m;
3948 n_i = n;
3949 } else {
3950 m_i = n;
3951 n_i = m;
3952 }
3953
3954 inca = incb = 1;
3955 inca *= 2;
3956 incb *= 2;
3957 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
3958 if (m_i > 0 && a_vec == NULL) {
3959 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3960 }
3961 for (i = 0; i < m_i * inca; i += inca) {
3962 a_vec[i] = 0.0;
3963 a_vec[i + 1] = 0.0;
3964 }
3965 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
3966 if (m_i > 0 && b_vec == NULL) {
3967 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3968 }
3969 for (i = 0; i < m_i * incb; i += incb) {
3970 b_vec[i] = 0.0;
3971 b_vec[i + 1] = 0.0;
3972 }
3973
3974 if ((order == blas_colmajor && side == blas_left_side) ||
3975 (order == blas_rowmajor && side == blas_right_side)) {
3976 incci = 1;
3977 inccij = ldc;
3978 } else {
3979 incci = ldc;
3980 inccij = 1;
3981 }
3982
3983 incci *= 2;
3984 inccij *= 2;
3985
3986
3987 if (randomize == 0) {
3988 /* First fill in the first row of A and the first column/row of B */
3989
3990 BLAS_zdot_c_c_testgen(m_i, 0, 0, norm, blas_no_conj,
3991 alpha, alpha_flag, beta, beta_flag,
3992 b_vec, a_vec, seed, c_elem,
3993 head_r_true_elem, tail_r_true_elem);
3994
3995 cij = 0;
3996 c_i[cij] = c_elem[0];
3997 c_i[cij + 1] = c_elem[1];
3998 head_r_true[cij] = head_r_true_elem[0];
3999 head_r_true[cij + 1] = head_r_true_elem[1];
4000 tail_r_true[cij] = tail_r_true_elem[0];
4001 tail_r_true[cij + 1] = tail_r_true_elem[1];
4002
4003 /* Copy a_vec to first row of A */
4004 csy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
4005
4006 /* set every column of B to be b_vec */
4007 for (j = 0; j < n_i; j++) {
4008 if (side == blas_left_side)
4009 cge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4010 else
4011 cge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4012 }
4013
4014 /* Fill in rest of matrix A */
4015 cij = incci;
4016 for (i = 1; i < m_i; i++, cij += incci) {
4017 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
4018 BLAS_zdot_c_c_testgen(m_i, i, m_i - i, norm,
4019 blas_no_conj, alpha, 1,
4020 beta, 1, b_vec, a_vec, seed,
4021 c_elem, head_r_true_elem, tail_r_true_elem);
4022
4023 csy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
4024
4025 c_i[cij] = c_elem[0];
4026 c_i[cij + 1] = c_elem[1];
4027 head_r_true[cij] = head_r_true_elem[0];
4028 head_r_true[cij + 1] = head_r_true_elem[1];
4029 tail_r_true[cij] = tail_r_true_elem[0];
4030 tail_r_true[cij + 1] = tail_r_true_elem[1];
4031 }
4032
4033 /* Now fill in c and r_true */
4034 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4035 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4036 c_elem[0] = c_i[ci];
4037 c_elem[1] = c_i[ci + 1];
4038 c_i[cij] = c_elem[0];
4039 c_i[cij + 1] = c_elem[1];
4040 head_r_true[cij] = head_r_true[ci];
4041 tail_r_true[cij] = tail_r_true[ci];
4042 head_r_true[cij + 1] = head_r_true[ci + 1];
4043 tail_r_true[cij + 1] = tail_r_true[ci + 1];
4044 }
4045 }
4046 } else {
4047
4048
4049
4050
4051
4052
4053
4054 if (alpha_flag == 0) {
4055 c_elem[0] = (float) xrand(seed);
4056 c_elem[1] = (float) xrand(seed);
4057 alpha_i[0] = c_elem[0];
4058 alpha_i[0 + 1] = c_elem[1];
4059 }
4060 if (beta_flag == 0) {
4061 c_elem[0] = (float) xrand(seed);
4062 c_elem[1] = (float) xrand(seed);
4063 beta_i[0] = c_elem[0];
4064 beta_i[0 + 1] = c_elem[1];
4065 }
4066
4067 if ((order == blas_colmajor && side == blas_left_side) ||
4068 (order == blas_rowmajor && side == blas_right_side)) {
4069 incai = incbi = 1;
4070 incbij = ldb;
4071 incaij = lda;
4072 } else {
4073 incai = lda;
4074 incbi = ldb;
4075 incaij = incbij = 1;
4076 }
4077
4078 incbi *= 2;
4079 incbij *= 2;
4080 incai *= 2;
4081 incaij *= 2;
4082
4083 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
4084 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
4085 a_elem[0] = (float) xrand(seed);
4086 a_elem[1] = (float) xrand(seed);
4087 a_i[aij] = a_elem[0];
4088 a_i[aij + 1] = a_elem[1];
4089 }
4090 }
4091
4092 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
4093 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
4094 b_elem[0] = (float) xrand(seed);
4095 b_elem[1] = (float) xrand(seed);
4096 b_i[bij] = b_elem[0];
4097 b_i[bij + 1] = b_elem[1];
4098 }
4099 }
4100
4101 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4102 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
4103
4104
4105 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
4106
4107 if (side == blas_left_side)
4108 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4109 else
4110 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4111
4112
4113
4114 BLAS_zdot_c_c_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
4115 beta, 1, b_vec, a_vec, seed,
4116 c_elem, head_r_true_elem, tail_r_true_elem);
4117
4118 c_i[cij] = c_elem[0];
4119 c_i[cij + 1] = c_elem[1];
4120 head_r_true[cij] = head_r_true_elem[0];
4121 head_r_true[cij + 1] = head_r_true_elem[1];
4122 tail_r_true[cij] = tail_r_true_elem[0];
4123 tail_r_true[cij + 1] = tail_r_true_elem[1];
4124 }
4125 }
4126
4127
4128
4129 }
4130
4131 blas_free(a_vec);
4132 blas_free(b_vec);
4133 }
BLAS_zsymm_c_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)4134 void BLAS_zsymm_c_z_testgen(int norm, enum blas_order_type order,
4135 enum blas_uplo_type uplo,
4136 enum blas_side_type side, int m, int n,
4137 int randomize, void *alpha, int alpha_flag,
4138 void *beta, int beta_flag, void *a, int lda,
4139 void *b, int ldb, void *c, int ldc, int *seed,
4140 double *head_r_true, double *tail_r_true)
4141
4142 /*
4143 * Purpose
4144 * =======
4145 *
4146 * Generates the test inputs to BLAS_zsymm_c_z{_x}
4147 *
4148 * Arguments
4149 * =========
4150 *
4151 * norm (input) int
4152 * = -1: the vectors are scaled with norms near underflow.
4153 * = 0: the vectors have norms of order 1.
4154 * = 1: the vectors are scaled with norms near overflow.
4155 *
4156 * order (input) enum blas_side_type
4157 * storage format of the matrices
4158 *
4159 * uplo (input) enum blas_uplo_type
4160 * which half of the symmetric matrix a is to be stored.
4161 *
4162 * side (input) enum blas_side_type
4163 * which side of matrix b matrix a is to be multiplied.
4164 *
4165 * m n (input) int
4166 * sizes of matrices a, b, c:
4167 * matrix a is m-by-m for left multiplication
4168 * n-by-n otherwise,
4169 * matrices b, c are m-by-n.
4170 *
4171 * randomize (input) int
4172 * if 0, entries in matrices A, B will be chosen for
4173 * maximum cancellation, but with less randomness.
4174 * if 1, every entry in the matrix A, B will be
4175 * random.
4176 *
4177 * alpha (input/output) void*
4178 * if alpha_flag = 1, alpha is input.
4179 * if alpha_flag = 0, alpha is output.
4180 *
4181 * alpha_flag (input) int
4182 * = 0: alpha is free, and is output.
4183 * = 1: alpha is fixed on input.
4184 *
4185 * beta (input/output) void*
4186 * if beta_flag = 1, beta is input.
4187 * if beta_flag = 0, beta is output.
4188 *
4189 * beta_flag (input) int
4190 * = 0: beta is free, and is output.
4191 * = 1: beta is fixed on input.
4192 *
4193 * a (input/output) void*
4194 *
4195 * lda (input) lda
4196 * leading dimension of matrix A.
4197 *
4198 * b (input/output) void*
4199 *
4200 * ldb (input) int
4201 * leading dimension of matrix B.
4202 *
4203 * c (input/output) void*
4204 * generated matrix C that will be used as an input to SYMM.
4205 *
4206 * ldc (input) int
4207 * leading dimension of matrix C.
4208 *
4209 * seed (input/output) int *
4210 * seed for the random number generator.
4211 *
4212 * double (output) *head_r_true
4213 * the leading part of the truth in double-double.
4214 *
4215 * double (output) *tail_r_true
4216 * the trailing part of the truth in double-double
4217 *
4218 */
4219 {
4220
4221 int i, j;
4222 int cij, ci;
4223 int bij, bi;
4224 int aij, ai;
4225 int inccij, incci;
4226 int incbij, incbi;
4227 int incaij, incai;
4228 int inca, incb;
4229 int m_i, n_i;
4230
4231 double c_elem[2];
4232 float a_elem[2];
4233 double b_elem[2];
4234 double head_r_true_elem[2], tail_r_true_elem[2];
4235
4236 float *a_vec;
4237 double *b_vec;
4238
4239 double *c_i = (double *) c;
4240 double *alpha_i = (double *) alpha;
4241 double *beta_i = (double *) beta;
4242 float *a_i = (float *) a;
4243 double *b_i = (double *) b;
4244
4245 if (side == blas_left_side) {
4246 m_i = m;
4247 n_i = n;
4248 } else {
4249 m_i = n;
4250 n_i = m;
4251 }
4252
4253 inca = incb = 1;
4254 inca *= 2;
4255 incb *= 2;
4256 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
4257 if (m_i > 0 && a_vec == NULL) {
4258 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4259 }
4260 for (i = 0; i < m_i * inca; i += inca) {
4261 a_vec[i] = 0.0;
4262 a_vec[i + 1] = 0.0;
4263 }
4264 b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
4265 if (m_i > 0 && b_vec == NULL) {
4266 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4267 }
4268 for (i = 0; i < m_i * incb; i += incb) {
4269 b_vec[i] = 0.0;
4270 b_vec[i + 1] = 0.0;
4271 }
4272
4273 if ((order == blas_colmajor && side == blas_left_side) ||
4274 (order == blas_rowmajor && side == blas_right_side)) {
4275 incci = 1;
4276 inccij = ldc;
4277 } else {
4278 incci = ldc;
4279 inccij = 1;
4280 }
4281
4282 incci *= 2;
4283 inccij *= 2;
4284
4285
4286 if (randomize == 0) {
4287 /* First fill in the first row of A and the first column/row of B */
4288
4289 BLAS_zdot_z_c_testgen(m_i, 0, 0, norm, blas_no_conj,
4290 alpha, alpha_flag, beta, beta_flag,
4291 b_vec, a_vec, seed, c_elem,
4292 head_r_true_elem, tail_r_true_elem);
4293
4294 cij = 0;
4295 c_i[cij] = c_elem[0];
4296 c_i[cij + 1] = c_elem[1];
4297 head_r_true[cij] = head_r_true_elem[0];
4298 head_r_true[cij + 1] = head_r_true_elem[1];
4299 tail_r_true[cij] = tail_r_true_elem[0];
4300 tail_r_true[cij + 1] = tail_r_true_elem[1];
4301
4302 /* Copy a_vec to first row of A */
4303 csy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
4304
4305 /* set every column of B to be b_vec */
4306 for (j = 0; j < n_i; j++) {
4307 if (side == blas_left_side)
4308 zge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4309 else
4310 zge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4311 }
4312
4313 /* Fill in rest of matrix A */
4314 cij = incci;
4315 for (i = 1; i < m_i; i++, cij += incci) {
4316 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
4317 BLAS_zdot_z_c_testgen(m_i, i, m_i - i, norm,
4318 blas_no_conj, alpha, 1,
4319 beta, 1, b_vec, a_vec, seed,
4320 c_elem, head_r_true_elem, tail_r_true_elem);
4321
4322 csy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
4323
4324 c_i[cij] = c_elem[0];
4325 c_i[cij + 1] = c_elem[1];
4326 head_r_true[cij] = head_r_true_elem[0];
4327 head_r_true[cij + 1] = head_r_true_elem[1];
4328 tail_r_true[cij] = tail_r_true_elem[0];
4329 tail_r_true[cij + 1] = tail_r_true_elem[1];
4330 }
4331
4332 /* Now fill in c and r_true */
4333 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4334 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4335 c_elem[0] = c_i[ci];
4336 c_elem[1] = c_i[ci + 1];
4337 c_i[cij] = c_elem[0];
4338 c_i[cij + 1] = c_elem[1];
4339 head_r_true[cij] = head_r_true[ci];
4340 tail_r_true[cij] = tail_r_true[ci];
4341 head_r_true[cij + 1] = head_r_true[ci + 1];
4342 tail_r_true[cij + 1] = tail_r_true[ci + 1];
4343 }
4344 }
4345 } else {
4346
4347
4348
4349
4350
4351
4352
4353 if (alpha_flag == 0) {
4354 c_elem[0] = (float) xrand(seed);
4355 c_elem[1] = (float) xrand(seed);
4356 alpha_i[0] = c_elem[0];
4357 alpha_i[0 + 1] = c_elem[1];
4358 }
4359 if (beta_flag == 0) {
4360 c_elem[0] = (float) xrand(seed);
4361 c_elem[1] = (float) xrand(seed);
4362 beta_i[0] = c_elem[0];
4363 beta_i[0 + 1] = c_elem[1];
4364 }
4365
4366 if ((order == blas_colmajor && side == blas_left_side) ||
4367 (order == blas_rowmajor && side == blas_right_side)) {
4368 incai = incbi = 1;
4369 incbij = ldb;
4370 incaij = lda;
4371 } else {
4372 incai = lda;
4373 incbi = ldb;
4374 incaij = incbij = 1;
4375 }
4376
4377 incbi *= 2;
4378 incbij *= 2;
4379 incai *= 2;
4380 incaij *= 2;
4381
4382 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
4383 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
4384 a_elem[0] = (float) xrand(seed);
4385 a_elem[1] = (float) xrand(seed);
4386 a_i[aij] = a_elem[0];
4387 a_i[aij + 1] = a_elem[1];
4388 }
4389 }
4390
4391 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
4392 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
4393 b_elem[0] = (float) xrand(seed);
4394 b_elem[1] = (float) xrand(seed);
4395 b_i[bij] = b_elem[0];
4396 b_i[bij + 1] = b_elem[1];
4397 }
4398 }
4399
4400 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4401 csy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
4402
4403
4404 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
4405
4406 if (side == blas_left_side)
4407 zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4408 else
4409 zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4410
4411
4412
4413 BLAS_zdot_z_c_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
4414 beta, 1, b_vec, a_vec, seed,
4415 c_elem, head_r_true_elem, tail_r_true_elem);
4416
4417 c_i[cij] = c_elem[0];
4418 c_i[cij + 1] = c_elem[1];
4419 head_r_true[cij] = head_r_true_elem[0];
4420 head_r_true[cij + 1] = head_r_true_elem[1];
4421 tail_r_true[cij] = tail_r_true_elem[0];
4422 tail_r_true[cij + 1] = tail_r_true_elem[1];
4423 }
4424 }
4425
4426
4427
4428 }
4429
4430 blas_free(a_vec);
4431 blas_free(b_vec);
4432 }
BLAS_zsymm_z_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)4433 void BLAS_zsymm_z_c_testgen(int norm, enum blas_order_type order,
4434 enum blas_uplo_type uplo,
4435 enum blas_side_type side, int m, int n,
4436 int randomize, void *alpha, int alpha_flag,
4437 void *beta, int beta_flag, void *a, int lda,
4438 void *b, int ldb, void *c, int ldc, int *seed,
4439 double *head_r_true, double *tail_r_true)
4440
4441 /*
4442 * Purpose
4443 * =======
4444 *
4445 * Generates the test inputs to BLAS_zsymm_z_c{_x}
4446 *
4447 * Arguments
4448 * =========
4449 *
4450 * norm (input) int
4451 * = -1: the vectors are scaled with norms near underflow.
4452 * = 0: the vectors have norms of order 1.
4453 * = 1: the vectors are scaled with norms near overflow.
4454 *
4455 * order (input) enum blas_side_type
4456 * storage format of the matrices
4457 *
4458 * uplo (input) enum blas_uplo_type
4459 * which half of the symmetric matrix a is to be stored.
4460 *
4461 * side (input) enum blas_side_type
4462 * which side of matrix b matrix a is to be multiplied.
4463 *
4464 * m n (input) int
4465 * sizes of matrices a, b, c:
4466 * matrix a is m-by-m for left multiplication
4467 * n-by-n otherwise,
4468 * matrices b, c are m-by-n.
4469 *
4470 * randomize (input) int
4471 * if 0, entries in matrices A, B will be chosen for
4472 * maximum cancellation, but with less randomness.
4473 * if 1, every entry in the matrix A, B will be
4474 * random.
4475 *
4476 * alpha (input/output) void*
4477 * if alpha_flag = 1, alpha is input.
4478 * if alpha_flag = 0, alpha is output.
4479 *
4480 * alpha_flag (input) int
4481 * = 0: alpha is free, and is output.
4482 * = 1: alpha is fixed on input.
4483 *
4484 * beta (input/output) void*
4485 * if beta_flag = 1, beta is input.
4486 * if beta_flag = 0, beta is output.
4487 *
4488 * beta_flag (input) int
4489 * = 0: beta is free, and is output.
4490 * = 1: beta is fixed on input.
4491 *
4492 * a (input/output) void*
4493 *
4494 * lda (input) lda
4495 * leading dimension of matrix A.
4496 *
4497 * b (input/output) void*
4498 *
4499 * ldb (input) int
4500 * leading dimension of matrix B.
4501 *
4502 * c (input/output) void*
4503 * generated matrix C that will be used as an input to SYMM.
4504 *
4505 * ldc (input) int
4506 * leading dimension of matrix C.
4507 *
4508 * seed (input/output) int *
4509 * seed for the random number generator.
4510 *
4511 * double (output) *head_r_true
4512 * the leading part of the truth in double-double.
4513 *
4514 * double (output) *tail_r_true
4515 * the trailing part of the truth in double-double
4516 *
4517 */
4518 {
4519
4520 int i, j;
4521 int cij, ci;
4522 int bij, bi;
4523 int aij, ai;
4524 int inccij, incci;
4525 int incbij, incbi;
4526 int incaij, incai;
4527 int inca, incb;
4528 int m_i, n_i;
4529
4530 double c_elem[2];
4531 double a_elem[2];
4532 float b_elem[2];
4533 double head_r_true_elem[2], tail_r_true_elem[2];
4534
4535 double *a_vec;
4536 float *b_vec;
4537
4538 double *c_i = (double *) c;
4539 double *alpha_i = (double *) alpha;
4540 double *beta_i = (double *) beta;
4541 double *a_i = (double *) a;
4542 float *b_i = (float *) b;
4543
4544 if (side == blas_left_side) {
4545 m_i = m;
4546 n_i = n;
4547 } else {
4548 m_i = n;
4549 n_i = m;
4550 }
4551
4552 inca = incb = 1;
4553 inca *= 2;
4554 incb *= 2;
4555 a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
4556 if (m_i > 0 && a_vec == NULL) {
4557 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4558 }
4559 for (i = 0; i < m_i * inca; i += inca) {
4560 a_vec[i] = 0.0;
4561 a_vec[i + 1] = 0.0;
4562 }
4563 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
4564 if (m_i > 0 && b_vec == NULL) {
4565 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4566 }
4567 for (i = 0; i < m_i * incb; i += incb) {
4568 b_vec[i] = 0.0;
4569 b_vec[i + 1] = 0.0;
4570 }
4571
4572 if ((order == blas_colmajor && side == blas_left_side) ||
4573 (order == blas_rowmajor && side == blas_right_side)) {
4574 incci = 1;
4575 inccij = ldc;
4576 } else {
4577 incci = ldc;
4578 inccij = 1;
4579 }
4580
4581 incci *= 2;
4582 inccij *= 2;
4583
4584
4585 if (randomize == 0) {
4586 /* First fill in the first row of A and the first column/row of B */
4587
4588 BLAS_zdot_c_z_testgen(m_i, 0, 0, norm, blas_no_conj,
4589 alpha, alpha_flag, beta, beta_flag,
4590 b_vec, a_vec, seed, c_elem,
4591 head_r_true_elem, tail_r_true_elem);
4592
4593 cij = 0;
4594 c_i[cij] = c_elem[0];
4595 c_i[cij + 1] = c_elem[1];
4596 head_r_true[cij] = head_r_true_elem[0];
4597 head_r_true[cij + 1] = head_r_true_elem[1];
4598 tail_r_true[cij] = tail_r_true_elem[0];
4599 tail_r_true[cij + 1] = tail_r_true_elem[1];
4600
4601 /* Copy a_vec to first row of A */
4602 zsy_commit_row(order, uplo, m_i, a, lda, a_vec, 0);
4603
4604 /* set every column of B to be b_vec */
4605 for (j = 0; j < n_i; j++) {
4606 if (side == blas_left_side)
4607 cge_commit_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4608 else
4609 cge_commit_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4610 }
4611
4612 /* Fill in rest of matrix A */
4613 cij = incci;
4614 for (i = 1; i < m_i; i++, cij += incci) {
4615 zsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
4616 BLAS_zdot_c_z_testgen(m_i, i, m_i - i, norm,
4617 blas_no_conj, alpha, 1,
4618 beta, 1, b_vec, a_vec, seed,
4619 c_elem, head_r_true_elem, tail_r_true_elem);
4620
4621 zsy_commit_row(order, uplo, m_i, a, lda, a_vec, i);
4622
4623 c_i[cij] = c_elem[0];
4624 c_i[cij + 1] = c_elem[1];
4625 head_r_true[cij] = head_r_true_elem[0];
4626 head_r_true[cij + 1] = head_r_true_elem[1];
4627 tail_r_true[cij] = tail_r_true_elem[0];
4628 tail_r_true[cij + 1] = tail_r_true_elem[1];
4629 }
4630
4631 /* Now fill in c and r_true */
4632 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4633 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4634 c_elem[0] = c_i[ci];
4635 c_elem[1] = c_i[ci + 1];
4636 c_i[cij] = c_elem[0];
4637 c_i[cij + 1] = c_elem[1];
4638 head_r_true[cij] = head_r_true[ci];
4639 tail_r_true[cij] = tail_r_true[ci];
4640 head_r_true[cij + 1] = head_r_true[ci + 1];
4641 tail_r_true[cij + 1] = tail_r_true[ci + 1];
4642 }
4643 }
4644 } else {
4645
4646
4647
4648
4649
4650
4651
4652 if (alpha_flag == 0) {
4653 c_elem[0] = (float) xrand(seed);
4654 c_elem[1] = (float) xrand(seed);
4655 alpha_i[0] = c_elem[0];
4656 alpha_i[0 + 1] = c_elem[1];
4657 }
4658 if (beta_flag == 0) {
4659 c_elem[0] = (float) xrand(seed);
4660 c_elem[1] = (float) xrand(seed);
4661 beta_i[0] = c_elem[0];
4662 beta_i[0 + 1] = c_elem[1];
4663 }
4664
4665 if ((order == blas_colmajor && side == blas_left_side) ||
4666 (order == blas_rowmajor && side == blas_right_side)) {
4667 incai = incbi = 1;
4668 incbij = ldb;
4669 incaij = lda;
4670 } else {
4671 incai = lda;
4672 incbi = ldb;
4673 incaij = incbij = 1;
4674 }
4675
4676 incbi *= 2;
4677 incbij *= 2;
4678 incai *= 2;
4679 incaij *= 2;
4680
4681 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
4682 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
4683 a_elem[0] = (float) xrand(seed);
4684 a_elem[1] = (float) xrand(seed);
4685 a_i[aij] = a_elem[0];
4686 a_i[aij + 1] = a_elem[1];
4687 }
4688 }
4689
4690 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
4691 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
4692 b_elem[0] = (float) xrand(seed);
4693 b_elem[1] = (float) xrand(seed);
4694 b_i[bij] = b_elem[0];
4695 b_i[bij + 1] = b_elem[1];
4696 }
4697 }
4698
4699 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4700 zsy_copy_row(order, uplo, m_i, a, lda, a_vec, i);
4701
4702
4703 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
4704
4705 if (side == blas_left_side)
4706 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4707 else
4708 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
4709
4710
4711
4712 BLAS_zdot_c_z_testgen(m_i, m_i, 0, norm, blas_no_conj, alpha, 1,
4713 beta, 1, b_vec, a_vec, seed,
4714 c_elem, head_r_true_elem, tail_r_true_elem);
4715
4716 c_i[cij] = c_elem[0];
4717 c_i[cij + 1] = c_elem[1];
4718 head_r_true[cij] = head_r_true_elem[0];
4719 head_r_true[cij + 1] = head_r_true_elem[1];
4720 tail_r_true[cij] = tail_r_true_elem[0];
4721 tail_r_true[cij + 1] = tail_r_true_elem[1];
4722 }
4723 }
4724
4725
4726
4727 }
4728
4729 blas_free(a_vec);
4730 blas_free(b_vec);
4731 }
4732