1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "blas_extended.h"
4 #include "blas_extended_private.h"
5 #include "blas_extended_test.h"
6
BLAS_chemm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)7 void BLAS_chemm_testgen(int norm, enum blas_order_type order,
8 enum blas_uplo_type uplo, enum blas_side_type side,
9 int m, int n, int randomize, void *alpha,
10 int alpha_flag, void *beta, int beta_flag, void *a,
11 int lda, void *b, int ldb, void *c, int ldc,
12 int *seed, double *head_r_true, double *tail_r_true)
13
14 /*
15 * Purpose
16 * =======
17 *
18 * Generates the test inputs to BLAS_chemm{_x}
19 *
20 * Arguments
21 * =========
22 *
23 * norm (input) int
24 * = -1: the vectors are scaled with norms near underflow.
25 * = 0: the vectors have norms of order 1.
26 * = 1: the vectors are scaled with norms near overflow.
27 *
28 * order (input) enum blas_side_type
29 * storage format of the matrices
30 *
31 * uplo (input) enum blas_uplo_type
32 * which half of the hermitian matrix a is to be stored.
33 *
34 * side (input) enum blas_side_type
35 * which side of matrix b matrix a is to be multiplied.
36 *
37 * m n (input) int
38 * sizes of matrices a, b, c:
39 * matrix a is m-by-m for left multiplication
40 * n-by-n otherwise,
41 * matrices b, c are m-by-n.
42 *
43 * randomize (input) int
44 * = 0: test case made for maximum cancellation.
45 * = 1: test case made for maximum radomization.
46 *
47 * alpha (input/output) void*
48 * if alpha_flag = 1, alpha is input.
49 * if alpha_flag = 0, alpha is output.
50 *
51 * alpha_flag (input) int
52 * = 0: alpha is free, and is output.
53 * = 1: alpha is fixed on input.
54 *
55 * beta (input/output) void*
56 * if beta_flag = 1, beta is input.
57 * if beta_flag = 0, beta is output.
58 *
59 * beta_flag (input) int
60 * = 0: beta is free, and is output.
61 * = 1: beta is fixed on input.
62 *
63 * a (input/output) void*
64 *
65 * lda (input) lda
66 * leading dimension of matrix A.
67 *
68 * b (input/output) void*
69 *
70 * ldb (input) int
71 * leading dimension of matrix B.
72 *
73 * c (input/output) void*
74 * generated matrix C that will be used as an input to HEMM.
75 *
76 * ldc (input) int
77 * leading dimension of matrix C.
78 *
79 * seed (input/output) int *
80 * seed for the random number generator.
81 *
82 * head_r_true (output) double *
83 * the leading part of the truth in double-double.
84 *
85 * tail_r_true (output) double *
86 * the trailing part of the truth in double-double
87 *
88 */
89 {
90
91 /* Strategy:
92 R1 = alpha * A1 * B + beta * C1
93 R2 = alpha * A2 * B + beta * C2
94 where all the matrices are real. Then let R = R1 + i R2,
95 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
96 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
97 */
98
99 int i, j;
100 int cij, ci;
101 int aij, ai;
102 int a1ij, a1i;
103 int bij, bi;
104 int mij, mi;
105 int inccij, incci;
106 int incaij, incai;
107 int inca1ij, inca1i;
108 int incbij, incbi;
109 int inci, incij;
110 int inca, incb;
111 int m_i, n_i;
112 int ld;
113 int ab;
114
115 float *a1;
116 float *a2;
117 float *c1;
118 float *c2;
119 float *b0;
120
121 double *head_r1_true, *tail_r1_true;
122 double *head_r2_true, *tail_r2_true;
123
124 double head_r_elem1, tail_r_elem1;
125 double head_r_elem2, tail_r_elem2;
126 double head_r_elem, tail_r_elem;
127
128 float *a_vec;
129 float *b_vec;
130
131 float *c_i = (float *) c;
132 float *alpha_i = (float *) alpha;
133 float *beta_i = (float *) beta;
134 float *a_i = a;
135 float *b_i = b;
136
137 if (side == blas_left_side) {
138 m_i = m;
139 n_i = n;
140 } else {
141 m_i = n;
142 n_i = m;
143 }
144
145 if (order == blas_colmajor)
146 ld = m;
147 else
148 ld = n;
149
150
151 if ((order == blas_colmajor && side == blas_left_side) ||
152 (order == blas_rowmajor && side == blas_right_side)) {
153 inci = inca1i = incai = incbi = incci = 1;
154 inccij = ldc;
155 incaij = lda;
156 incbij = ldb;
157 inca1ij = m_i;
158 incij = ld;
159 } else {
160 incci = ldc;
161 incai = lda;
162 incbi = ldb;
163 inca1i = m_i;
164 inci = ld;
165 incij = inca1ij = incaij = incbij = inccij = 1;
166 }
167
168 incci *= 2;
169 inccij *= 2;
170 incai *= 2;
171 incaij *= 2;
172 incbi *= 2;
173 incbij *= 2;
174
175 inca = incb = 1;
176 inca *= 2;
177 incb *= 2;
178 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
179 if (m_i > 0 && a_vec == NULL) {
180 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
181 }
182 for (i = 0; i < m_i * inca; i += inca) {
183 a_vec[i] = 0.0;
184 a_vec[i + 1] = 0.0;
185 }
186 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
187 if (m_i > 0 && b_vec == NULL) {
188 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
189 }
190 for (i = 0; i < m_i * incb; i += incb) {
191 b_vec[i] = 0.0;
192 b_vec[i + 1] = 0.0;
193 }
194
195 if (randomize == 0) {
196 a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
197 if (m_i * m_i > 0 && a1 == NULL) {
198 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
199 }
200 a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
201 if (m_i * m_i > 0 && a2 == NULL) {
202 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
203 }
204 for (i = 0; i < m_i * m_i; ++i) {
205 a1[i] = a2[i] = 0.0;
206 }
207 c1 = (float *) blas_malloc(m_i * n_i * sizeof(float));
208 if (m_i * n_i > 0 && c1 == NULL) {
209 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
210 }
211 c2 = (float *) blas_malloc(m_i * n_i * sizeof(float));
212 if (m_i * n_i > 0 && c2 == NULL) {
213 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
214 }
215 b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
216 if (m_i * n_i > 0 && b0 == NULL) {
217 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
218 }
219 for (i = 0; i < m_i * n_i; ++i) {
220 c1[i] = c2[i] = b0[i] = 0.0;
221 }
222 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
223 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
224 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
225 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
226 };
227 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
228 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
229 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
230 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
231 };
232
233 /* First generate the real portion of matrix A, and matrix B.
234 Note that Re(A) is a symmetric matrix. */
235 BLAS_ssymm_testgen
236 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
237 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
238
239 BLAS_sskew_testgen
240 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
241 c2, ld, seed, head_r2_true, tail_r2_true);
242
243
244 /* The case where B is a complex matrix. Since B is generated
245 as a real matrix, we need to perform some scaling.
246
247 There are four cases to consider, depending on the values
248 of alpha and beta.
249
250 values scaling
251 alpha beta alpha A B beta C R (truth)
252 0 1 1 i i i
253 1 1 ? 1+i 1+i 1+i
254 2 ? 1 1+i 1+i 2i 2i
255 3 ? ? 1+i 1+i 2i 2i
256
257 Note that we can afford to scale R by 1+i, since they are
258 computed in double-double precision.
259 */
260
261 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
262 ab = 0;
263 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
264 } else if (alpha_i[0] == 1.0) {
265 ab = 1;
266 /* set alpha to 1, multiply beta by 1+i. */
267 alpha_i[1] = 0.0;
268 beta_i[1] = beta_i[0];
269 } else if (beta_i[0] == 1.0) {
270 ab = 2;
271 /* set beta to 1, multiply alpha by 1+i. */
272 beta_i[1] = 0.0;
273 alpha_i[1] = alpha_i[0];
274 } else {
275 ab = 3;
276 /* multiply alpha by 1+i, beta by 2i. */
277 alpha_i[1] = alpha_i[0];
278 beta_i[1] = 2.0 * beta_i[0];
279 beta_i[0] = 0.0;
280 }
281
282
283 /* Now fill in a */
284 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
285 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
286 j++, aij += incaij, a1ij += inca1ij) {
287 a_i[aij] = a1[a1ij];
288 a_i[aij + 1] = a2[a1ij];
289 }
290 }
291
292 /* Fill in b */
293 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
294 for (j = 0, bij = bi, mij = mi; j < n_i;
295 j++, bij += incbij, mij += incij) {
296 if (ab == 0) {
297 b_i[bij] = 0.0;
298 b_i[bij + 1] = b0[mij];
299 } else {
300 b_i[bij] = b0[mij];
301 b_i[bij + 1] = b0[mij];
302 }
303 }
304 }
305
306 /* Fill in c */
307 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
308 for (j = 0, cij = ci, mij = mi; j < n_i;
309 j++, cij += inccij, mij += incij) {
310 if (ab == 0) {
311 c_i[cij] = -c2[mij];
312 c_i[cij + 1] = c1[mij];
313 } else if (ab == 2) {
314 c_i[cij] = -2.0 * c2[mij];
315 c_i[cij + 1] = 2.0 * c1[mij];
316 } else {
317 c_i[cij] = c1[mij];
318 c_i[cij + 1] = c2[mij];
319 }
320 }
321 }
322
323 /* Fill in truth */
324 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
325 for (j = 0, cij = ci, mij = mi; j < n_i;
326 j++, cij += inccij, mij += incij) {
327
328 head_r_elem1 = head_r1_true[mij];
329 tail_r_elem1 = tail_r1_true[mij];
330
331 head_r_elem2 = head_r2_true[mij];
332 tail_r_elem2 = tail_r2_true[mij];
333
334 if (ab == 0) {
335 head_r_true[cij] = -head_r_elem2;
336 tail_r_true[cij] = -tail_r_elem2;
337 head_r_true[cij + 1] = head_r_elem1;
338 tail_r_true[cij + 1] = tail_r_elem1;
339 } else if (ab == 1) {
340 {
341 /* Compute double-double = double-double + double-double. */
342 double bv;
343 double s1, s2, t1, t2;
344
345 /* Add two hi words. */
346 s1 = head_r_elem1 + head_r_elem2;
347 bv = s1 - head_r_elem1;
348 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
349
350 /* Add two lo words. */
351 t1 = tail_r_elem1 + tail_r_elem2;
352 bv = t1 - tail_r_elem1;
353 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
354
355 s2 += t1;
356
357 /* Renormalize (s1, s2) to (t1, s2) */
358 t1 = s1 + s2;
359 s2 = s2 - (t1 - s1);
360
361 t2 += s2;
362
363 /* Renormalize (t1, t2) */
364 head_r_elem = t1 + t2;
365 tail_r_elem = t2 - (head_r_elem - t1);
366 }
367
368 /* Set the imaginary part to R1 + R2 */
369 tail_r_true[cij + 1] = tail_r_elem;
370 head_r_true[cij + 1] = head_r_elem;
371
372 /* Set the real part to R1 - R2. */
373 {
374 double head_bt, tail_bt;
375 head_bt = -head_r_elem2;
376 tail_bt = -tail_r_elem2;
377 {
378 /* Compute double-double = double-double + double-double. */
379 double bv;
380 double s1, s2, t1, t2;
381
382 /* Add two hi words. */
383 s1 = head_r_elem1 + head_bt;
384 bv = s1 - head_r_elem1;
385 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
386
387 /* Add two lo words. */
388 t1 = tail_r_elem1 + tail_bt;
389 bv = t1 - tail_r_elem1;
390 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
391
392 s2 += t1;
393
394 /* Renormalize (s1, s2) to (t1, s2) */
395 t1 = s1 + s2;
396 s2 = s2 - (t1 - s1);
397
398 t2 += s2;
399
400 /* Renormalize (t1, t2) */
401 head_r_elem = t1 + t2;
402 tail_r_elem = t2 - (head_r_elem - t1);
403 }
404 }
405 tail_r_true[cij] = tail_r_elem;
406 head_r_true[cij] = head_r_elem;
407 } else {
408
409 /* Real part */
410 {
411 /* Compute double-double = double-double * double. */
412 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
413
414 con = head_r_elem2 * split;
415 a11 = con - head_r_elem2;
416 a11 = con - a11;
417 a21 = head_r_elem2 - a11;
418 con = -2.0 * split;
419 b1 = con - -2.0;
420 b1 = con - b1;
421 b2 = -2.0 - b1;
422
423 c11 = head_r_elem2 * -2.0;
424 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
425
426 c2 = tail_r_elem2 * -2.0;
427 t1 = c11 + c2;
428 t2 = (c2 - (t1 - c11)) + c21;
429
430 head_r_elem = t1 + t2;
431 tail_r_elem = t2 - (head_r_elem - t1);
432 }
433 head_r_true[cij] = head_r_elem;
434 tail_r_true[cij] = tail_r_elem;
435
436 /* Imaginary Part */
437 {
438 /* Compute double-double = double-double * double. */
439 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
440
441 con = head_r_elem1 * split;
442 a11 = con - head_r_elem1;
443 a11 = con - a11;
444 a21 = head_r_elem1 - a11;
445 con = 2.0 * split;
446 b1 = con - 2.0;
447 b1 = con - b1;
448 b2 = 2.0 - b1;
449
450 c11 = head_r_elem1 * 2.0;
451 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
452
453 c2 = tail_r_elem1 * 2.0;
454 t1 = c11 + c2;
455 t2 = (c2 - (t1 - c11)) + c21;
456
457 head_r_elem = t1 + t2;
458 tail_r_elem = t2 - (head_r_elem - t1);
459 }
460 head_r_true[cij + 1] = head_r_elem;
461 tail_r_true[cij + 1] = tail_r_elem;
462 }
463 }
464 }
465
466 blas_free(a1);
467 blas_free(a2);
468 blas_free(c1);
469 blas_free(c2);
470 blas_free(b0);
471 blas_free(head_r1_true);
472 blas_free(tail_r1_true);
473 blas_free(head_r2_true);
474 blas_free(tail_r2_true);
475
476 } else {
477 /* random matrix */
478 float a_elem[2];
479 float b_elem[2];
480 float c_elem[2];
481 double head_r_true_elem[2], tail_r_true_elem[2];
482
483 /* Since mixed real/complex test generator for dot
484 scales the vectors, we need to used the non-mixed
485 version if B is real (since A is always complex). */
486
487
488 if (alpha_flag == 0) {
489 alpha_i[0] = xrand(seed);
490 alpha_i[1] = xrand(seed);
491 }
492 if (beta_flag == 0) {
493 beta_i[0] = xrand(seed);
494 beta_i[1] = xrand(seed);
495 }
496
497 /* Fill in matrix A -- Hermitian. */
498 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
499 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
500 a_elem[0] = xrand(seed);
501 a_elem[1] = xrand(seed);
502 a_i[aij] = a_elem[0];
503 a_i[aij + 1] = a_elem[1];
504 if (i == j)
505 a_i[aij + 1] = 0.0;
506 }
507 }
508
509 /* Fill in matrix B */
510 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
511 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
512 b_elem[0] = xrand(seed);
513 b_elem[1] = xrand(seed);
514 b_i[bij] = b_elem[0];
515 b_i[bij + 1] = b_elem[1];
516 }
517 }
518
519 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
520 che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
521 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
522
523 if (side == blas_left_side)
524 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
525 else
526 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
527
528 /* copy the real b_vec into complex bb_vec, so that
529 pure complex test case generator can be called. */
530
531 BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
532 alpha, 1, beta, 1, a_vec, b_vec, seed,
533 c_elem, head_r_true_elem, tail_r_true_elem);
534
535 c_i[cij] = c_elem[0];
536 c_i[cij + 1] = c_elem[1];
537 head_r_true[cij] = head_r_true_elem[0];
538 head_r_true[cij + 1] = head_r_true_elem[1];
539 tail_r_true[cij] = tail_r_true_elem[0];
540 tail_r_true[cij + 1] = tail_r_true_elem[1];
541 }
542 }
543
544
545
546 }
547
548 blas_free(a_vec);
549 blas_free(b_vec);
550 }
BLAS_zhemm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)551 void BLAS_zhemm_testgen(int norm, enum blas_order_type order,
552 enum blas_uplo_type uplo, enum blas_side_type side,
553 int m, int n, int randomize, void *alpha,
554 int alpha_flag, void *beta, int beta_flag, void *a,
555 int lda, void *b, int ldb, void *c, int ldc,
556 int *seed, double *head_r_true, double *tail_r_true)
557
558 /*
559 * Purpose
560 * =======
561 *
562 * Generates the test inputs to BLAS_zhemm{_x}
563 *
564 * Arguments
565 * =========
566 *
567 * norm (input) int
568 * = -1: the vectors are scaled with norms near underflow.
569 * = 0: the vectors have norms of order 1.
570 * = 1: the vectors are scaled with norms near overflow.
571 *
572 * order (input) enum blas_side_type
573 * storage format of the matrices
574 *
575 * uplo (input) enum blas_uplo_type
576 * which half of the hermitian matrix a is to be stored.
577 *
578 * side (input) enum blas_side_type
579 * which side of matrix b matrix a is to be multiplied.
580 *
581 * m n (input) int
582 * sizes of matrices a, b, c:
583 * matrix a is m-by-m for left multiplication
584 * n-by-n otherwise,
585 * matrices b, c are m-by-n.
586 *
587 * randomize (input) int
588 * = 0: test case made for maximum cancellation.
589 * = 1: test case made for maximum radomization.
590 *
591 * alpha (input/output) void*
592 * if alpha_flag = 1, alpha is input.
593 * if alpha_flag = 0, alpha is output.
594 *
595 * alpha_flag (input) int
596 * = 0: alpha is free, and is output.
597 * = 1: alpha is fixed on input.
598 *
599 * beta (input/output) void*
600 * if beta_flag = 1, beta is input.
601 * if beta_flag = 0, beta is output.
602 *
603 * beta_flag (input) int
604 * = 0: beta is free, and is output.
605 * = 1: beta is fixed on input.
606 *
607 * a (input/output) void*
608 *
609 * lda (input) lda
610 * leading dimension of matrix A.
611 *
612 * b (input/output) void*
613 *
614 * ldb (input) int
615 * leading dimension of matrix B.
616 *
617 * c (input/output) void*
618 * generated matrix C that will be used as an input to HEMM.
619 *
620 * ldc (input) int
621 * leading dimension of matrix C.
622 *
623 * seed (input/output) int *
624 * seed for the random number generator.
625 *
626 * head_r_true (output) double *
627 * the leading part of the truth in double-double.
628 *
629 * tail_r_true (output) double *
630 * the trailing part of the truth in double-double
631 *
632 */
633 {
634
635 /* Strategy:
636 R1 = alpha * A1 * B + beta * C1
637 R2 = alpha * A2 * B + beta * C2
638 where all the matrices are real. Then let R = R1 + i R2,
639 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
640 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
641 */
642
643 int i, j;
644 int cij, ci;
645 int aij, ai;
646 int a1ij, a1i;
647 int bij, bi;
648 int mij, mi;
649 int inccij, incci;
650 int incaij, incai;
651 int inca1ij, inca1i;
652 int incbij, incbi;
653 int inci, incij;
654 int inca, incb;
655 int m_i, n_i;
656 int ld;
657 int ab;
658
659 double *a1;
660 double *a2;
661 double *c1;
662 double *c2;
663 double *b0;
664
665 double *head_r1_true, *tail_r1_true;
666 double *head_r2_true, *tail_r2_true;
667
668 double head_r_elem1, tail_r_elem1;
669 double head_r_elem2, tail_r_elem2;
670 double head_r_elem, tail_r_elem;
671
672 double *a_vec;
673 double *b_vec;
674
675 double *c_i = (double *) c;
676 double *alpha_i = (double *) alpha;
677 double *beta_i = (double *) beta;
678 double *a_i = a;
679 double *b_i = b;
680
681 if (side == blas_left_side) {
682 m_i = m;
683 n_i = n;
684 } else {
685 m_i = n;
686 n_i = m;
687 }
688
689 if (order == blas_colmajor)
690 ld = m;
691 else
692 ld = n;
693
694
695 if ((order == blas_colmajor && side == blas_left_side) ||
696 (order == blas_rowmajor && side == blas_right_side)) {
697 inci = inca1i = incai = incbi = incci = 1;
698 inccij = ldc;
699 incaij = lda;
700 incbij = ldb;
701 inca1ij = m_i;
702 incij = ld;
703 } else {
704 incci = ldc;
705 incai = lda;
706 incbi = ldb;
707 inca1i = m_i;
708 inci = ld;
709 incij = inca1ij = incaij = incbij = inccij = 1;
710 }
711
712 incci *= 2;
713 inccij *= 2;
714 incai *= 2;
715 incaij *= 2;
716 incbi *= 2;
717 incbij *= 2;
718
719 inca = incb = 1;
720 inca *= 2;
721 incb *= 2;
722 a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
723 if (m_i > 0 && a_vec == NULL) {
724 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
725 }
726 for (i = 0; i < m_i * inca; i += inca) {
727 a_vec[i] = 0.0;
728 a_vec[i + 1] = 0.0;
729 }
730 b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
731 if (m_i > 0 && b_vec == NULL) {
732 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
733 }
734 for (i = 0; i < m_i * incb; i += incb) {
735 b_vec[i] = 0.0;
736 b_vec[i + 1] = 0.0;
737 }
738
739 if (randomize == 0) {
740 a1 = (double *) blas_malloc(m_i * m_i * sizeof(double));
741 if (m_i * m_i > 0 && a1 == NULL) {
742 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
743 }
744 a2 = (double *) blas_malloc(m_i * m_i * sizeof(double));
745 if (m_i * m_i > 0 && a2 == NULL) {
746 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
747 }
748 for (i = 0; i < m_i * m_i; ++i) {
749 a1[i] = a2[i] = 0.0;
750 }
751 c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
752 if (m_i * n_i > 0 && c1 == NULL) {
753 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
754 }
755 c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
756 if (m_i * n_i > 0 && c2 == NULL) {
757 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
758 }
759 b0 = (double *) blas_malloc(m_i * n_i * sizeof(double));
760 if (m_i * n_i > 0 && b0 == NULL) {
761 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
762 }
763 for (i = 0; i < m_i * n_i; ++i) {
764 c1[i] = c2[i] = b0[i] = 0.0;
765 }
766 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
767 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
768 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
769 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
770 };
771 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
772 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
773 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
774 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
775 };
776
777 /* First generate the real portion of matrix A, and matrix B.
778 Note that Re(A) is a symmetric matrix. */
779 BLAS_dsymm_testgen
780 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
781 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
782
783 BLAS_dskew_testgen
784 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
785 c2, ld, seed, head_r2_true, tail_r2_true);
786
787
788 /* The case where B is a complex matrix. Since B is generated
789 as a real matrix, we need to perform some scaling.
790
791 There are four cases to consider, depending on the values
792 of alpha and beta.
793
794 values scaling
795 alpha beta alpha A B beta C R (truth)
796 0 1 1 i i i
797 1 1 ? 1+i 1+i 1+i
798 2 ? 1 1+i 1+i 2i 2i
799 3 ? ? 1+i 1+i 2i 2i
800
801 Note that we can afford to scale R by 1+i, since they are
802 computed in double-double precision.
803 */
804
805 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
806 ab = 0;
807 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
808 } else if (alpha_i[0] == 1.0) {
809 ab = 1;
810 /* set alpha to 1, multiply beta by 1+i. */
811 alpha_i[1] = 0.0;
812 beta_i[1] = beta_i[0];
813 } else if (beta_i[0] == 1.0) {
814 ab = 2;
815 /* set beta to 1, multiply alpha by 1+i. */
816 beta_i[1] = 0.0;
817 alpha_i[1] = alpha_i[0];
818 } else {
819 ab = 3;
820 /* multiply alpha by 1+i, beta by 2i. */
821 alpha_i[1] = alpha_i[0];
822 beta_i[1] = 2.0 * beta_i[0];
823 beta_i[0] = 0.0;
824 }
825
826
827 /* Now fill in a */
828 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
829 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
830 j++, aij += incaij, a1ij += inca1ij) {
831 a_i[aij] = a1[a1ij];
832 a_i[aij + 1] = a2[a1ij];
833 }
834 }
835
836 /* Fill in b */
837 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
838 for (j = 0, bij = bi, mij = mi; j < n_i;
839 j++, bij += incbij, mij += incij) {
840 if (ab == 0) {
841 b_i[bij] = 0.0;
842 b_i[bij + 1] = b0[mij];
843 } else {
844 b_i[bij] = b0[mij];
845 b_i[bij + 1] = b0[mij];
846 }
847 }
848 }
849
850 /* Fill in c */
851 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
852 for (j = 0, cij = ci, mij = mi; j < n_i;
853 j++, cij += inccij, mij += incij) {
854 if (ab == 0) {
855 c_i[cij] = -c2[mij];
856 c_i[cij + 1] = c1[mij];
857 } else if (ab == 2) {
858 c_i[cij] = -2.0 * c2[mij];
859 c_i[cij + 1] = 2.0 * c1[mij];
860 } else {
861 c_i[cij] = c1[mij];
862 c_i[cij + 1] = c2[mij];
863 }
864 }
865 }
866
867 /* Fill in truth */
868 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
869 for (j = 0, cij = ci, mij = mi; j < n_i;
870 j++, cij += inccij, mij += incij) {
871
872 head_r_elem1 = head_r1_true[mij];
873 tail_r_elem1 = tail_r1_true[mij];
874
875 head_r_elem2 = head_r2_true[mij];
876 tail_r_elem2 = tail_r2_true[mij];
877
878 if (ab == 0) {
879 head_r_true[cij] = -head_r_elem2;
880 tail_r_true[cij] = -tail_r_elem2;
881 head_r_true[cij + 1] = head_r_elem1;
882 tail_r_true[cij + 1] = tail_r_elem1;
883 } else if (ab == 1) {
884 {
885 /* Compute double-double = double-double + double-double. */
886 double bv;
887 double s1, s2, t1, t2;
888
889 /* Add two hi words. */
890 s1 = head_r_elem1 + head_r_elem2;
891 bv = s1 - head_r_elem1;
892 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
893
894 /* Add two lo words. */
895 t1 = tail_r_elem1 + tail_r_elem2;
896 bv = t1 - tail_r_elem1;
897 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
898
899 s2 += t1;
900
901 /* Renormalize (s1, s2) to (t1, s2) */
902 t1 = s1 + s2;
903 s2 = s2 - (t1 - s1);
904
905 t2 += s2;
906
907 /* Renormalize (t1, t2) */
908 head_r_elem = t1 + t2;
909 tail_r_elem = t2 - (head_r_elem - t1);
910 }
911
912 /* Set the imaginary part to R1 + R2 */
913 tail_r_true[cij + 1] = tail_r_elem;
914 head_r_true[cij + 1] = head_r_elem;
915
916 /* Set the real part to R1 - R2. */
917 {
918 double head_bt, tail_bt;
919 head_bt = -head_r_elem2;
920 tail_bt = -tail_r_elem2;
921 {
922 /* Compute double-double = double-double + double-double. */
923 double bv;
924 double s1, s2, t1, t2;
925
926 /* Add two hi words. */
927 s1 = head_r_elem1 + head_bt;
928 bv = s1 - head_r_elem1;
929 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
930
931 /* Add two lo words. */
932 t1 = tail_r_elem1 + tail_bt;
933 bv = t1 - tail_r_elem1;
934 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
935
936 s2 += t1;
937
938 /* Renormalize (s1, s2) to (t1, s2) */
939 t1 = s1 + s2;
940 s2 = s2 - (t1 - s1);
941
942 t2 += s2;
943
944 /* Renormalize (t1, t2) */
945 head_r_elem = t1 + t2;
946 tail_r_elem = t2 - (head_r_elem - t1);
947 }
948 }
949 tail_r_true[cij] = tail_r_elem;
950 head_r_true[cij] = head_r_elem;
951 } else {
952
953 /* Real part */
954 {
955 /* Compute double-double = double-double * double. */
956 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
957
958 con = head_r_elem2 * split;
959 a11 = con - head_r_elem2;
960 a11 = con - a11;
961 a21 = head_r_elem2 - a11;
962 con = -2.0 * split;
963 b1 = con - -2.0;
964 b1 = con - b1;
965 b2 = -2.0 - b1;
966
967 c11 = head_r_elem2 * -2.0;
968 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
969
970 c2 = tail_r_elem2 * -2.0;
971 t1 = c11 + c2;
972 t2 = (c2 - (t1 - c11)) + c21;
973
974 head_r_elem = t1 + t2;
975 tail_r_elem = t2 - (head_r_elem - t1);
976 }
977 head_r_true[cij] = head_r_elem;
978 tail_r_true[cij] = tail_r_elem;
979
980 /* Imaginary Part */
981 {
982 /* Compute double-double = double-double * double. */
983 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
984
985 con = head_r_elem1 * split;
986 a11 = con - head_r_elem1;
987 a11 = con - a11;
988 a21 = head_r_elem1 - a11;
989 con = 2.0 * split;
990 b1 = con - 2.0;
991 b1 = con - b1;
992 b2 = 2.0 - b1;
993
994 c11 = head_r_elem1 * 2.0;
995 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
996
997 c2 = tail_r_elem1 * 2.0;
998 t1 = c11 + c2;
999 t2 = (c2 - (t1 - c11)) + c21;
1000
1001 head_r_elem = t1 + t2;
1002 tail_r_elem = t2 - (head_r_elem - t1);
1003 }
1004 head_r_true[cij + 1] = head_r_elem;
1005 tail_r_true[cij + 1] = tail_r_elem;
1006 }
1007 }
1008 }
1009
1010 blas_free(a1);
1011 blas_free(a2);
1012 blas_free(c1);
1013 blas_free(c2);
1014 blas_free(b0);
1015 blas_free(head_r1_true);
1016 blas_free(tail_r1_true);
1017 blas_free(head_r2_true);
1018 blas_free(tail_r2_true);
1019
1020 } else {
1021 /* random matrix */
1022 double a_elem[2];
1023 double b_elem[2];
1024 double c_elem[2];
1025 double head_r_true_elem[2], tail_r_true_elem[2];
1026
1027 /* Since mixed real/complex test generator for dot
1028 scales the vectors, we need to used the non-mixed
1029 version if B is real (since A is always complex). */
1030
1031
1032 if (alpha_flag == 0) {
1033 alpha_i[0] = xrand(seed);
1034 alpha_i[1] = xrand(seed);
1035 }
1036 if (beta_flag == 0) {
1037 beta_i[0] = xrand(seed);
1038 beta_i[1] = xrand(seed);
1039 }
1040
1041 /* Fill in matrix A -- Hermitian. */
1042 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1043 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1044 a_elem[0] = xrand(seed);
1045 a_elem[1] = xrand(seed);
1046 a_i[aij] = a_elem[0];
1047 a_i[aij + 1] = a_elem[1];
1048 if (i == j)
1049 a_i[aij + 1] = 0.0;
1050 }
1051 }
1052
1053 /* Fill in matrix B */
1054 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1055 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1056 b_elem[0] = xrand(seed);
1057 b_elem[1] = xrand(seed);
1058 b_i[bij] = b_elem[0];
1059 b_i[bij + 1] = b_elem[1];
1060 }
1061 }
1062
1063 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1064 zhe_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
1065 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1066
1067 if (side == blas_left_side)
1068 zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1069 else
1070 zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1071
1072 /* copy the real b_vec into complex bb_vec, so that
1073 pure complex test case generator can be called. */
1074
1075 BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
1076 alpha, 1, beta, 1, a_vec, b_vec, seed,
1077 c_elem, head_r_true_elem, tail_r_true_elem);
1078
1079 c_i[cij] = c_elem[0];
1080 c_i[cij + 1] = c_elem[1];
1081 head_r_true[cij] = head_r_true_elem[0];
1082 head_r_true[cij + 1] = head_r_true_elem[1];
1083 tail_r_true[cij] = tail_r_true_elem[0];
1084 tail_r_true[cij + 1] = tail_r_true_elem[1];
1085 }
1086 }
1087
1088
1089
1090 }
1091
1092 blas_free(a_vec);
1093 blas_free(b_vec);
1094 }
BLAS_zhemm_c_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1095 void BLAS_zhemm_c_z_testgen(int norm, enum blas_order_type order,
1096 enum blas_uplo_type uplo,
1097 enum blas_side_type side, int m, int n,
1098 int randomize, void *alpha, int alpha_flag,
1099 void *beta, int beta_flag, void *a, int lda,
1100 void *b, int ldb, void *c, int ldc, int *seed,
1101 double *head_r_true, double *tail_r_true)
1102
1103 /*
1104 * Purpose
1105 * =======
1106 *
1107 * Generates the test inputs to BLAS_zhemm_c_z{_x}
1108 *
1109 * Arguments
1110 * =========
1111 *
1112 * norm (input) int
1113 * = -1: the vectors are scaled with norms near underflow.
1114 * = 0: the vectors have norms of order 1.
1115 * = 1: the vectors are scaled with norms near overflow.
1116 *
1117 * order (input) enum blas_side_type
1118 * storage format of the matrices
1119 *
1120 * uplo (input) enum blas_uplo_type
1121 * which half of the hermitian matrix a is to be stored.
1122 *
1123 * side (input) enum blas_side_type
1124 * which side of matrix b matrix a is to be multiplied.
1125 *
1126 * m n (input) int
1127 * sizes of matrices a, b, c:
1128 * matrix a is m-by-m for left multiplication
1129 * n-by-n otherwise,
1130 * matrices b, c are m-by-n.
1131 *
1132 * randomize (input) int
1133 * = 0: test case made for maximum cancellation.
1134 * = 1: test case made for maximum radomization.
1135 *
1136 * alpha (input/output) void*
1137 * if alpha_flag = 1, alpha is input.
1138 * if alpha_flag = 0, alpha is output.
1139 *
1140 * alpha_flag (input) int
1141 * = 0: alpha is free, and is output.
1142 * = 1: alpha is fixed on input.
1143 *
1144 * beta (input/output) void*
1145 * if beta_flag = 1, beta is input.
1146 * if beta_flag = 0, beta is output.
1147 *
1148 * beta_flag (input) int
1149 * = 0: beta is free, and is output.
1150 * = 1: beta is fixed on input.
1151 *
1152 * a (input/output) void*
1153 *
1154 * lda (input) lda
1155 * leading dimension of matrix A.
1156 *
1157 * b (input/output) void*
1158 *
1159 * ldb (input) int
1160 * leading dimension of matrix B.
1161 *
1162 * c (input/output) void*
1163 * generated matrix C that will be used as an input to HEMM.
1164 *
1165 * ldc (input) int
1166 * leading dimension of matrix C.
1167 *
1168 * seed (input/output) int *
1169 * seed for the random number generator.
1170 *
1171 * head_r_true (output) double *
1172 * the leading part of the truth in double-double.
1173 *
1174 * tail_r_true (output) double *
1175 * the trailing part of the truth in double-double
1176 *
1177 */
1178 {
1179
1180 /* Strategy:
1181 R1 = alpha * A1 * B + beta * C1
1182 R2 = alpha * A2 * B + beta * C2
1183 where all the matrices are real. Then let R = R1 + i R2,
1184 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
1185 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
1186 */
1187
1188 int i, j;
1189 int cij, ci;
1190 int aij, ai;
1191 int a1ij, a1i;
1192 int bij, bi;
1193 int mij, mi;
1194 int inccij, incci;
1195 int incaij, incai;
1196 int inca1ij, inca1i;
1197 int incbij, incbi;
1198 int inci, incij;
1199 int inca, incb;
1200 int m_i, n_i;
1201 int ld;
1202 int ab;
1203
1204 float *a1;
1205 float *a2;
1206 double *c1;
1207 double *c2;
1208 double *b0;
1209
1210 double *head_r1_true, *tail_r1_true;
1211 double *head_r2_true, *tail_r2_true;
1212
1213 double head_r_elem1, tail_r_elem1;
1214 double head_r_elem2, tail_r_elem2;
1215 double head_r_elem, tail_r_elem;
1216
1217 float *a_vec;
1218 double *b_vec;
1219
1220 double *c_i = (double *) c;
1221 double *alpha_i = (double *) alpha;
1222 double *beta_i = (double *) beta;
1223 float *a_i = a;
1224 double *b_i = b;
1225
1226 if (side == blas_left_side) {
1227 m_i = m;
1228 n_i = n;
1229 } else {
1230 m_i = n;
1231 n_i = m;
1232 }
1233
1234 if (order == blas_colmajor)
1235 ld = m;
1236 else
1237 ld = n;
1238
1239
1240 if ((order == blas_colmajor && side == blas_left_side) ||
1241 (order == blas_rowmajor && side == blas_right_side)) {
1242 inci = inca1i = incai = incbi = incci = 1;
1243 inccij = ldc;
1244 incaij = lda;
1245 incbij = ldb;
1246 inca1ij = m_i;
1247 incij = ld;
1248 } else {
1249 incci = ldc;
1250 incai = lda;
1251 incbi = ldb;
1252 inca1i = m_i;
1253 inci = ld;
1254 incij = inca1ij = incaij = incbij = inccij = 1;
1255 }
1256
1257 incci *= 2;
1258 inccij *= 2;
1259 incai *= 2;
1260 incaij *= 2;
1261 incbi *= 2;
1262 incbij *= 2;
1263
1264 inca = incb = 1;
1265 inca *= 2;
1266 incb *= 2;
1267 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1268 if (m_i > 0 && a_vec == NULL) {
1269 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1270 }
1271 for (i = 0; i < m_i * inca; i += inca) {
1272 a_vec[i] = 0.0;
1273 a_vec[i + 1] = 0.0;
1274 }
1275 b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
1276 if (m_i > 0 && b_vec == NULL) {
1277 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1278 }
1279 for (i = 0; i < m_i * incb; i += incb) {
1280 b_vec[i] = 0.0;
1281 b_vec[i + 1] = 0.0;
1282 }
1283
1284 if (randomize == 0) {
1285 a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
1286 if (m_i * m_i > 0 && a1 == NULL) {
1287 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1288 }
1289 a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
1290 if (m_i * m_i > 0 && a2 == NULL) {
1291 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1292 }
1293 for (i = 0; i < m_i * m_i; ++i) {
1294 a1[i] = a2[i] = 0.0;
1295 }
1296 c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1297 if (m_i * n_i > 0 && c1 == NULL) {
1298 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1299 }
1300 c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1301 if (m_i * n_i > 0 && c2 == NULL) {
1302 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1303 }
1304 b0 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1305 if (m_i * n_i > 0 && b0 == NULL) {
1306 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1307 }
1308 for (i = 0; i < m_i * n_i; ++i) {
1309 c1[i] = c2[i] = b0[i] = 0.0;
1310 }
1311 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1312 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1313 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1314 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1315 };
1316 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1317 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1318 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1319 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1320 };
1321
1322 /* First generate the real portion of matrix A, and matrix B.
1323 Note that Re(A) is a symmetric matrix. */
1324 BLAS_dsymm_s_d_testgen
1325 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
1326 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
1327
1328 BLAS_dskew_s_d_testgen
1329 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
1330 c2, ld, seed, head_r2_true, tail_r2_true);
1331
1332
1333 /* The case where B is a complex matrix. Since B is generated
1334 as a real matrix, we need to perform some scaling.
1335
1336 There are four cases to consider, depending on the values
1337 of alpha and beta.
1338
1339 values scaling
1340 alpha beta alpha A B beta C R (truth)
1341 0 1 1 i i i
1342 1 1 ? 1+i 1+i 1+i
1343 2 ? 1 1+i 1+i 2i 2i
1344 3 ? ? 1+i 1+i 2i 2i
1345
1346 Note that we can afford to scale R by 1+i, since they are
1347 computed in double-double precision.
1348 */
1349
1350 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1351 ab = 0;
1352 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
1353 } else if (alpha_i[0] == 1.0) {
1354 ab = 1;
1355 /* set alpha to 1, multiply beta by 1+i. */
1356 alpha_i[1] = 0.0;
1357 beta_i[1] = beta_i[0];
1358 } else if (beta_i[0] == 1.0) {
1359 ab = 2;
1360 /* set beta to 1, multiply alpha by 1+i. */
1361 beta_i[1] = 0.0;
1362 alpha_i[1] = alpha_i[0];
1363 } else {
1364 ab = 3;
1365 /* multiply alpha by 1+i, beta by 2i. */
1366 alpha_i[1] = alpha_i[0];
1367 beta_i[1] = 2.0 * beta_i[0];
1368 beta_i[0] = 0.0;
1369 }
1370
1371
1372 /* Now fill in a */
1373 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
1374 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
1375 j++, aij += incaij, a1ij += inca1ij) {
1376 a_i[aij] = a1[a1ij];
1377 a_i[aij + 1] = a2[a1ij];
1378 }
1379 }
1380
1381 /* Fill in b */
1382 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
1383 for (j = 0, bij = bi, mij = mi; j < n_i;
1384 j++, bij += incbij, mij += incij) {
1385 if (ab == 0) {
1386 b_i[bij] = 0.0;
1387 b_i[bij + 1] = b0[mij];
1388 } else {
1389 b_i[bij] = b0[mij];
1390 b_i[bij + 1] = b0[mij];
1391 }
1392 }
1393 }
1394
1395 /* Fill in c */
1396 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1397 for (j = 0, cij = ci, mij = mi; j < n_i;
1398 j++, cij += inccij, mij += incij) {
1399 if (ab == 0) {
1400 c_i[cij] = -c2[mij];
1401 c_i[cij + 1] = c1[mij];
1402 } else if (ab == 2) {
1403 c_i[cij] = -2.0 * c2[mij];
1404 c_i[cij + 1] = 2.0 * c1[mij];
1405 } else {
1406 c_i[cij] = c1[mij];
1407 c_i[cij + 1] = c2[mij];
1408 }
1409 }
1410 }
1411
1412 /* Fill in truth */
1413 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1414 for (j = 0, cij = ci, mij = mi; j < n_i;
1415 j++, cij += inccij, mij += incij) {
1416
1417 head_r_elem1 = head_r1_true[mij];
1418 tail_r_elem1 = tail_r1_true[mij];
1419
1420 head_r_elem2 = head_r2_true[mij];
1421 tail_r_elem2 = tail_r2_true[mij];
1422
1423 if (ab == 0) {
1424 head_r_true[cij] = -head_r_elem2;
1425 tail_r_true[cij] = -tail_r_elem2;
1426 head_r_true[cij + 1] = head_r_elem1;
1427 tail_r_true[cij + 1] = tail_r_elem1;
1428 } else if (ab == 1) {
1429 {
1430 /* Compute double-double = double-double + double-double. */
1431 double bv;
1432 double s1, s2, t1, t2;
1433
1434 /* Add two hi words. */
1435 s1 = head_r_elem1 + head_r_elem2;
1436 bv = s1 - head_r_elem1;
1437 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1438
1439 /* Add two lo words. */
1440 t1 = tail_r_elem1 + tail_r_elem2;
1441 bv = t1 - tail_r_elem1;
1442 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1443
1444 s2 += t1;
1445
1446 /* Renormalize (s1, s2) to (t1, s2) */
1447 t1 = s1 + s2;
1448 s2 = s2 - (t1 - s1);
1449
1450 t2 += s2;
1451
1452 /* Renormalize (t1, t2) */
1453 head_r_elem = t1 + t2;
1454 tail_r_elem = t2 - (head_r_elem - t1);
1455 }
1456
1457 /* Set the imaginary part to R1 + R2 */
1458 tail_r_true[cij + 1] = tail_r_elem;
1459 head_r_true[cij + 1] = head_r_elem;
1460
1461 /* Set the real part to R1 - R2. */
1462 {
1463 double head_bt, tail_bt;
1464 head_bt = -head_r_elem2;
1465 tail_bt = -tail_r_elem2;
1466 {
1467 /* Compute double-double = double-double + double-double. */
1468 double bv;
1469 double s1, s2, t1, t2;
1470
1471 /* Add two hi words. */
1472 s1 = head_r_elem1 + head_bt;
1473 bv = s1 - head_r_elem1;
1474 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1475
1476 /* Add two lo words. */
1477 t1 = tail_r_elem1 + tail_bt;
1478 bv = t1 - tail_r_elem1;
1479 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1480
1481 s2 += t1;
1482
1483 /* Renormalize (s1, s2) to (t1, s2) */
1484 t1 = s1 + s2;
1485 s2 = s2 - (t1 - s1);
1486
1487 t2 += s2;
1488
1489 /* Renormalize (t1, t2) */
1490 head_r_elem = t1 + t2;
1491 tail_r_elem = t2 - (head_r_elem - t1);
1492 }
1493 }
1494 tail_r_true[cij] = tail_r_elem;
1495 head_r_true[cij] = head_r_elem;
1496 } else {
1497
1498 /* Real part */
1499 {
1500 /* Compute double-double = double-double * double. */
1501 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1502
1503 con = head_r_elem2 * split;
1504 a11 = con - head_r_elem2;
1505 a11 = con - a11;
1506 a21 = head_r_elem2 - a11;
1507 con = -2.0 * split;
1508 b1 = con - -2.0;
1509 b1 = con - b1;
1510 b2 = -2.0 - b1;
1511
1512 c11 = head_r_elem2 * -2.0;
1513 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1514
1515 c2 = tail_r_elem2 * -2.0;
1516 t1 = c11 + c2;
1517 t2 = (c2 - (t1 - c11)) + c21;
1518
1519 head_r_elem = t1 + t2;
1520 tail_r_elem = t2 - (head_r_elem - t1);
1521 }
1522 head_r_true[cij] = head_r_elem;
1523 tail_r_true[cij] = tail_r_elem;
1524
1525 /* Imaginary Part */
1526 {
1527 /* Compute double-double = double-double * double. */
1528 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1529
1530 con = head_r_elem1 * split;
1531 a11 = con - head_r_elem1;
1532 a11 = con - a11;
1533 a21 = head_r_elem1 - a11;
1534 con = 2.0 * split;
1535 b1 = con - 2.0;
1536 b1 = con - b1;
1537 b2 = 2.0 - b1;
1538
1539 c11 = head_r_elem1 * 2.0;
1540 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1541
1542 c2 = tail_r_elem1 * 2.0;
1543 t1 = c11 + c2;
1544 t2 = (c2 - (t1 - c11)) + c21;
1545
1546 head_r_elem = t1 + t2;
1547 tail_r_elem = t2 - (head_r_elem - t1);
1548 }
1549 head_r_true[cij + 1] = head_r_elem;
1550 tail_r_true[cij + 1] = tail_r_elem;
1551 }
1552 }
1553 }
1554
1555 blas_free(a1);
1556 blas_free(a2);
1557 blas_free(c1);
1558 blas_free(c2);
1559 blas_free(b0);
1560 blas_free(head_r1_true);
1561 blas_free(tail_r1_true);
1562 blas_free(head_r2_true);
1563 blas_free(tail_r2_true);
1564
1565 } else {
1566 /* random matrix */
1567 float a_elem[2];
1568 double b_elem[2];
1569 double c_elem[2];
1570 double head_r_true_elem[2], tail_r_true_elem[2];
1571
1572 /* Since mixed real/complex test generator for dot
1573 scales the vectors, we need to used the non-mixed
1574 version if B is real (since A is always complex). */
1575
1576
1577 if (alpha_flag == 0) {
1578 alpha_i[0] = (float) xrand(seed);
1579 alpha_i[1] = (float) xrand(seed);
1580 }
1581 if (beta_flag == 0) {
1582 beta_i[0] = (float) xrand(seed);
1583 beta_i[1] = (float) xrand(seed);
1584 }
1585
1586 /* Fill in matrix A -- Hermitian. */
1587 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1588 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1589 a_elem[0] = (float) xrand(seed);
1590 a_elem[1] = (float) xrand(seed);
1591 a_i[aij] = a_elem[0];
1592 a_i[aij + 1] = a_elem[1];
1593 if (i == j)
1594 a_i[aij + 1] = 0.0;
1595 }
1596 }
1597
1598 /* Fill in matrix B */
1599 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1600 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1601 b_elem[0] = (float) xrand(seed);
1602 b_elem[1] = (float) xrand(seed);
1603 b_i[bij] = b_elem[0];
1604 b_i[bij + 1] = b_elem[1];
1605 }
1606 }
1607
1608 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1609 che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
1610 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1611
1612 if (side == blas_left_side)
1613 zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1614 else
1615 zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1616
1617 /* copy the real b_vec into complex bb_vec, so that
1618 pure complex test case generator can be called. */
1619
1620 BLAS_zdot_c_z_testgen(m_i, m_i, 0, norm, blas_no_conj,
1621 alpha, 1, beta, 1, a_vec, b_vec, seed,
1622 c_elem, head_r_true_elem, tail_r_true_elem);
1623
1624 c_i[cij] = c_elem[0];
1625 c_i[cij + 1] = c_elem[1];
1626 head_r_true[cij] = head_r_true_elem[0];
1627 head_r_true[cij + 1] = head_r_true_elem[1];
1628 tail_r_true[cij] = tail_r_true_elem[0];
1629 tail_r_true[cij + 1] = tail_r_true_elem[1];
1630 }
1631 }
1632
1633
1634
1635 }
1636
1637 blas_free(a_vec);
1638 blas_free(b_vec);
1639 }
BLAS_zhemm_z_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1640 void BLAS_zhemm_z_c_testgen(int norm, enum blas_order_type order,
1641 enum blas_uplo_type uplo,
1642 enum blas_side_type side, int m, int n,
1643 int randomize, void *alpha, int alpha_flag,
1644 void *beta, int beta_flag, void *a, int lda,
1645 void *b, int ldb, void *c, int ldc, int *seed,
1646 double *head_r_true, double *tail_r_true)
1647
1648 /*
1649 * Purpose
1650 * =======
1651 *
1652 * Generates the test inputs to BLAS_zhemm_z_c{_x}
1653 *
1654 * Arguments
1655 * =========
1656 *
1657 * norm (input) int
1658 * = -1: the vectors are scaled with norms near underflow.
1659 * = 0: the vectors have norms of order 1.
1660 * = 1: the vectors are scaled with norms near overflow.
1661 *
1662 * order (input) enum blas_side_type
1663 * storage format of the matrices
1664 *
1665 * uplo (input) enum blas_uplo_type
1666 * which half of the hermitian matrix a is to be stored.
1667 *
1668 * side (input) enum blas_side_type
1669 * which side of matrix b matrix a is to be multiplied.
1670 *
1671 * m n (input) int
1672 * sizes of matrices a, b, c:
1673 * matrix a is m-by-m for left multiplication
1674 * n-by-n otherwise,
1675 * matrices b, c are m-by-n.
1676 *
1677 * randomize (input) int
1678 * = 0: test case made for maximum cancellation.
1679 * = 1: test case made for maximum radomization.
1680 *
1681 * alpha (input/output) void*
1682 * if alpha_flag = 1, alpha is input.
1683 * if alpha_flag = 0, alpha is output.
1684 *
1685 * alpha_flag (input) int
1686 * = 0: alpha is free, and is output.
1687 * = 1: alpha is fixed on input.
1688 *
1689 * beta (input/output) void*
1690 * if beta_flag = 1, beta is input.
1691 * if beta_flag = 0, beta is output.
1692 *
1693 * beta_flag (input) int
1694 * = 0: beta is free, and is output.
1695 * = 1: beta is fixed on input.
1696 *
1697 * a (input/output) void*
1698 *
1699 * lda (input) lda
1700 * leading dimension of matrix A.
1701 *
1702 * b (input/output) void*
1703 *
1704 * ldb (input) int
1705 * leading dimension of matrix B.
1706 *
1707 * c (input/output) void*
1708 * generated matrix C that will be used as an input to HEMM.
1709 *
1710 * ldc (input) int
1711 * leading dimension of matrix C.
1712 *
1713 * seed (input/output) int *
1714 * seed for the random number generator.
1715 *
1716 * head_r_true (output) double *
1717 * the leading part of the truth in double-double.
1718 *
1719 * tail_r_true (output) double *
1720 * the trailing part of the truth in double-double
1721 *
1722 */
1723 {
1724
1725 /* Strategy:
1726 R1 = alpha * A1 * B + beta * C1
1727 R2 = alpha * A2 * B + beta * C2
1728 where all the matrices are real. Then let R = R1 + i R2,
1729 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
1730 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
1731 */
1732
1733 int i, j;
1734 int cij, ci;
1735 int aij, ai;
1736 int a1ij, a1i;
1737 int bij, bi;
1738 int mij, mi;
1739 int inccij, incci;
1740 int incaij, incai;
1741 int inca1ij, inca1i;
1742 int incbij, incbi;
1743 int inci, incij;
1744 int inca, incb;
1745 int m_i, n_i;
1746 int ld;
1747 int ab;
1748
1749 double *a1;
1750 double *a2;
1751 double *c1;
1752 double *c2;
1753 float *b0;
1754
1755 double *head_r1_true, *tail_r1_true;
1756 double *head_r2_true, *tail_r2_true;
1757
1758 double head_r_elem1, tail_r_elem1;
1759 double head_r_elem2, tail_r_elem2;
1760 double head_r_elem, tail_r_elem;
1761
1762 double *a_vec;
1763 float *b_vec;
1764
1765 double *c_i = (double *) c;
1766 double *alpha_i = (double *) alpha;
1767 double *beta_i = (double *) beta;
1768 double *a_i = a;
1769 float *b_i = b;
1770
1771 if (side == blas_left_side) {
1772 m_i = m;
1773 n_i = n;
1774 } else {
1775 m_i = n;
1776 n_i = m;
1777 }
1778
1779 if (order == blas_colmajor)
1780 ld = m;
1781 else
1782 ld = n;
1783
1784
1785 if ((order == blas_colmajor && side == blas_left_side) ||
1786 (order == blas_rowmajor && side == blas_right_side)) {
1787 inci = inca1i = incai = incbi = incci = 1;
1788 inccij = ldc;
1789 incaij = lda;
1790 incbij = ldb;
1791 inca1ij = m_i;
1792 incij = ld;
1793 } else {
1794 incci = ldc;
1795 incai = lda;
1796 incbi = ldb;
1797 inca1i = m_i;
1798 inci = ld;
1799 incij = inca1ij = incaij = incbij = inccij = 1;
1800 }
1801
1802 incci *= 2;
1803 inccij *= 2;
1804 incai *= 2;
1805 incaij *= 2;
1806 incbi *= 2;
1807 incbij *= 2;
1808
1809 inca = incb = 1;
1810 inca *= 2;
1811 incb *= 2;
1812 a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
1813 if (m_i > 0 && a_vec == NULL) {
1814 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1815 }
1816 for (i = 0; i < m_i * inca; i += inca) {
1817 a_vec[i] = 0.0;
1818 a_vec[i + 1] = 0.0;
1819 }
1820 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1821 if (m_i > 0 && b_vec == NULL) {
1822 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1823 }
1824 for (i = 0; i < m_i * incb; i += incb) {
1825 b_vec[i] = 0.0;
1826 b_vec[i + 1] = 0.0;
1827 }
1828
1829 if (randomize == 0) {
1830 a1 = (double *) blas_malloc(m_i * m_i * sizeof(double));
1831 if (m_i * m_i > 0 && a1 == NULL) {
1832 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1833 }
1834 a2 = (double *) blas_malloc(m_i * m_i * sizeof(double));
1835 if (m_i * m_i > 0 && a2 == NULL) {
1836 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1837 }
1838 for (i = 0; i < m_i * m_i; ++i) {
1839 a1[i] = a2[i] = 0.0;
1840 }
1841 c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1842 if (m_i * n_i > 0 && c1 == NULL) {
1843 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1844 }
1845 c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1846 if (m_i * n_i > 0 && c2 == NULL) {
1847 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1848 }
1849 b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
1850 if (m_i * n_i > 0 && b0 == NULL) {
1851 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1852 }
1853 for (i = 0; i < m_i * n_i; ++i) {
1854 c1[i] = c2[i] = b0[i] = 0.0;
1855 }
1856 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1857 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1858 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1859 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1860 };
1861 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1862 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1863 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1864 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1865 };
1866
1867 /* First generate the real portion of matrix A, and matrix B.
1868 Note that Re(A) is a symmetric matrix. */
1869 BLAS_dsymm_d_s_testgen
1870 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
1871 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
1872
1873 BLAS_dskew_d_s_testgen
1874 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
1875 c2, ld, seed, head_r2_true, tail_r2_true);
1876
1877
1878 /* The case where B is a complex matrix. Since B is generated
1879 as a real matrix, we need to perform some scaling.
1880
1881 There are four cases to consider, depending on the values
1882 of alpha and beta.
1883
1884 values scaling
1885 alpha beta alpha A B beta C R (truth)
1886 0 1 1 i i i
1887 1 1 ? 1+i 1+i 1+i
1888 2 ? 1 1+i 1+i 2i 2i
1889 3 ? ? 1+i 1+i 2i 2i
1890
1891 Note that we can afford to scale R by 1+i, since they are
1892 computed in double-double precision.
1893 */
1894
1895 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1896 ab = 0;
1897 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
1898 } else if (alpha_i[0] == 1.0) {
1899 ab = 1;
1900 /* set alpha to 1, multiply beta by 1+i. */
1901 alpha_i[1] = 0.0;
1902 beta_i[1] = beta_i[0];
1903 } else if (beta_i[0] == 1.0) {
1904 ab = 2;
1905 /* set beta to 1, multiply alpha by 1+i. */
1906 beta_i[1] = 0.0;
1907 alpha_i[1] = alpha_i[0];
1908 } else {
1909 ab = 3;
1910 /* multiply alpha by 1+i, beta by 2i. */
1911 alpha_i[1] = alpha_i[0];
1912 beta_i[1] = 2.0 * beta_i[0];
1913 beta_i[0] = 0.0;
1914 }
1915
1916
1917 /* Now fill in a */
1918 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
1919 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
1920 j++, aij += incaij, a1ij += inca1ij) {
1921 a_i[aij] = a1[a1ij];
1922 a_i[aij + 1] = a2[a1ij];
1923 }
1924 }
1925
1926 /* Fill in b */
1927 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
1928 for (j = 0, bij = bi, mij = mi; j < n_i;
1929 j++, bij += incbij, mij += incij) {
1930 if (ab == 0) {
1931 b_i[bij] = 0.0;
1932 b_i[bij + 1] = b0[mij];
1933 } else {
1934 b_i[bij] = b0[mij];
1935 b_i[bij + 1] = b0[mij];
1936 }
1937 }
1938 }
1939
1940 /* Fill in c */
1941 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1942 for (j = 0, cij = ci, mij = mi; j < n_i;
1943 j++, cij += inccij, mij += incij) {
1944 if (ab == 0) {
1945 c_i[cij] = -c2[mij];
1946 c_i[cij + 1] = c1[mij];
1947 } else if (ab == 2) {
1948 c_i[cij] = -2.0 * c2[mij];
1949 c_i[cij + 1] = 2.0 * c1[mij];
1950 } else {
1951 c_i[cij] = c1[mij];
1952 c_i[cij + 1] = c2[mij];
1953 }
1954 }
1955 }
1956
1957 /* Fill in truth */
1958 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1959 for (j = 0, cij = ci, mij = mi; j < n_i;
1960 j++, cij += inccij, mij += incij) {
1961
1962 head_r_elem1 = head_r1_true[mij];
1963 tail_r_elem1 = tail_r1_true[mij];
1964
1965 head_r_elem2 = head_r2_true[mij];
1966 tail_r_elem2 = tail_r2_true[mij];
1967
1968 if (ab == 0) {
1969 head_r_true[cij] = -head_r_elem2;
1970 tail_r_true[cij] = -tail_r_elem2;
1971 head_r_true[cij + 1] = head_r_elem1;
1972 tail_r_true[cij + 1] = tail_r_elem1;
1973 } else if (ab == 1) {
1974 {
1975 /* Compute double-double = double-double + double-double. */
1976 double bv;
1977 double s1, s2, t1, t2;
1978
1979 /* Add two hi words. */
1980 s1 = head_r_elem1 + head_r_elem2;
1981 bv = s1 - head_r_elem1;
1982 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1983
1984 /* Add two lo words. */
1985 t1 = tail_r_elem1 + tail_r_elem2;
1986 bv = t1 - tail_r_elem1;
1987 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1988
1989 s2 += t1;
1990
1991 /* Renormalize (s1, s2) to (t1, s2) */
1992 t1 = s1 + s2;
1993 s2 = s2 - (t1 - s1);
1994
1995 t2 += s2;
1996
1997 /* Renormalize (t1, t2) */
1998 head_r_elem = t1 + t2;
1999 tail_r_elem = t2 - (head_r_elem - t1);
2000 }
2001
2002 /* Set the imaginary part to R1 + R2 */
2003 tail_r_true[cij + 1] = tail_r_elem;
2004 head_r_true[cij + 1] = head_r_elem;
2005
2006 /* Set the real part to R1 - R2. */
2007 {
2008 double head_bt, tail_bt;
2009 head_bt = -head_r_elem2;
2010 tail_bt = -tail_r_elem2;
2011 {
2012 /* Compute double-double = double-double + double-double. */
2013 double bv;
2014 double s1, s2, t1, t2;
2015
2016 /* Add two hi words. */
2017 s1 = head_r_elem1 + head_bt;
2018 bv = s1 - head_r_elem1;
2019 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2020
2021 /* Add two lo words. */
2022 t1 = tail_r_elem1 + tail_bt;
2023 bv = t1 - tail_r_elem1;
2024 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2025
2026 s2 += t1;
2027
2028 /* Renormalize (s1, s2) to (t1, s2) */
2029 t1 = s1 + s2;
2030 s2 = s2 - (t1 - s1);
2031
2032 t2 += s2;
2033
2034 /* Renormalize (t1, t2) */
2035 head_r_elem = t1 + t2;
2036 tail_r_elem = t2 - (head_r_elem - t1);
2037 }
2038 }
2039 tail_r_true[cij] = tail_r_elem;
2040 head_r_true[cij] = head_r_elem;
2041 } else {
2042
2043 /* Real part */
2044 {
2045 /* Compute double-double = double-double * double. */
2046 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2047
2048 con = head_r_elem2 * split;
2049 a11 = con - head_r_elem2;
2050 a11 = con - a11;
2051 a21 = head_r_elem2 - a11;
2052 con = -2.0 * split;
2053 b1 = con - -2.0;
2054 b1 = con - b1;
2055 b2 = -2.0 - b1;
2056
2057 c11 = head_r_elem2 * -2.0;
2058 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2059
2060 c2 = tail_r_elem2 * -2.0;
2061 t1 = c11 + c2;
2062 t2 = (c2 - (t1 - c11)) + c21;
2063
2064 head_r_elem = t1 + t2;
2065 tail_r_elem = t2 - (head_r_elem - t1);
2066 }
2067 head_r_true[cij] = head_r_elem;
2068 tail_r_true[cij] = tail_r_elem;
2069
2070 /* Imaginary Part */
2071 {
2072 /* Compute double-double = double-double * double. */
2073 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2074
2075 con = head_r_elem1 * split;
2076 a11 = con - head_r_elem1;
2077 a11 = con - a11;
2078 a21 = head_r_elem1 - a11;
2079 con = 2.0 * split;
2080 b1 = con - 2.0;
2081 b1 = con - b1;
2082 b2 = 2.0 - b1;
2083
2084 c11 = head_r_elem1 * 2.0;
2085 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2086
2087 c2 = tail_r_elem1 * 2.0;
2088 t1 = c11 + c2;
2089 t2 = (c2 - (t1 - c11)) + c21;
2090
2091 head_r_elem = t1 + t2;
2092 tail_r_elem = t2 - (head_r_elem - t1);
2093 }
2094 head_r_true[cij + 1] = head_r_elem;
2095 tail_r_true[cij + 1] = tail_r_elem;
2096 }
2097 }
2098 }
2099
2100 blas_free(a1);
2101 blas_free(a2);
2102 blas_free(c1);
2103 blas_free(c2);
2104 blas_free(b0);
2105 blas_free(head_r1_true);
2106 blas_free(tail_r1_true);
2107 blas_free(head_r2_true);
2108 blas_free(tail_r2_true);
2109
2110 } else {
2111 /* random matrix */
2112 double a_elem[2];
2113 float b_elem[2];
2114 double c_elem[2];
2115 double head_r_true_elem[2], tail_r_true_elem[2];
2116
2117 /* Since mixed real/complex test generator for dot
2118 scales the vectors, we need to used the non-mixed
2119 version if B is real (since A is always complex). */
2120
2121
2122 if (alpha_flag == 0) {
2123 alpha_i[0] = (float) xrand(seed);
2124 alpha_i[1] = (float) xrand(seed);
2125 }
2126 if (beta_flag == 0) {
2127 beta_i[0] = (float) xrand(seed);
2128 beta_i[1] = (float) xrand(seed);
2129 }
2130
2131 /* Fill in matrix A -- Hermitian. */
2132 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2133 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2134 a_elem[0] = (float) xrand(seed);
2135 a_elem[1] = (float) xrand(seed);
2136 a_i[aij] = a_elem[0];
2137 a_i[aij + 1] = a_elem[1];
2138 if (i == j)
2139 a_i[aij + 1] = 0.0;
2140 }
2141 }
2142
2143 /* Fill in matrix B */
2144 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2145 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2146 b_elem[0] = (float) xrand(seed);
2147 b_elem[1] = (float) xrand(seed);
2148 b_i[bij] = b_elem[0];
2149 b_i[bij + 1] = b_elem[1];
2150 }
2151 }
2152
2153 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2154 zhe_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
2155 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2156
2157 if (side == blas_left_side)
2158 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2159 else
2160 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2161
2162 /* copy the real b_vec into complex bb_vec, so that
2163 pure complex test case generator can be called. */
2164
2165 BLAS_zdot_z_c_testgen(m_i, m_i, 0, norm, blas_no_conj,
2166 alpha, 1, beta, 1, a_vec, b_vec, seed,
2167 c_elem, head_r_true_elem, tail_r_true_elem);
2168
2169 c_i[cij] = c_elem[0];
2170 c_i[cij + 1] = c_elem[1];
2171 head_r_true[cij] = head_r_true_elem[0];
2172 head_r_true[cij + 1] = head_r_true_elem[1];
2173 tail_r_true[cij] = tail_r_true_elem[0];
2174 tail_r_true[cij + 1] = tail_r_true_elem[1];
2175 }
2176 }
2177
2178
2179
2180 }
2181
2182 blas_free(a_vec);
2183 blas_free(b_vec);
2184 }
BLAS_zhemm_c_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2185 void BLAS_zhemm_c_c_testgen(int norm, enum blas_order_type order,
2186 enum blas_uplo_type uplo,
2187 enum blas_side_type side, int m, int n,
2188 int randomize, void *alpha, int alpha_flag,
2189 void *beta, int beta_flag, void *a, int lda,
2190 void *b, int ldb, void *c, int ldc, int *seed,
2191 double *head_r_true, double *tail_r_true)
2192
2193 /*
2194 * Purpose
2195 * =======
2196 *
2197 * Generates the test inputs to BLAS_zhemm_c_c{_x}
2198 *
2199 * Arguments
2200 * =========
2201 *
2202 * norm (input) int
2203 * = -1: the vectors are scaled with norms near underflow.
2204 * = 0: the vectors have norms of order 1.
2205 * = 1: the vectors are scaled with norms near overflow.
2206 *
2207 * order (input) enum blas_side_type
2208 * storage format of the matrices
2209 *
2210 * uplo (input) enum blas_uplo_type
2211 * which half of the hermitian matrix a is to be stored.
2212 *
2213 * side (input) enum blas_side_type
2214 * which side of matrix b matrix a is to be multiplied.
2215 *
2216 * m n (input) int
2217 * sizes of matrices a, b, c:
2218 * matrix a is m-by-m for left multiplication
2219 * n-by-n otherwise,
2220 * matrices b, c are m-by-n.
2221 *
2222 * randomize (input) int
2223 * = 0: test case made for maximum cancellation.
2224 * = 1: test case made for maximum radomization.
2225 *
2226 * alpha (input/output) void*
2227 * if alpha_flag = 1, alpha is input.
2228 * if alpha_flag = 0, alpha is output.
2229 *
2230 * alpha_flag (input) int
2231 * = 0: alpha is free, and is output.
2232 * = 1: alpha is fixed on input.
2233 *
2234 * beta (input/output) void*
2235 * if beta_flag = 1, beta is input.
2236 * if beta_flag = 0, beta is output.
2237 *
2238 * beta_flag (input) int
2239 * = 0: beta is free, and is output.
2240 * = 1: beta is fixed on input.
2241 *
2242 * a (input/output) void*
2243 *
2244 * lda (input) lda
2245 * leading dimension of matrix A.
2246 *
2247 * b (input/output) void*
2248 *
2249 * ldb (input) int
2250 * leading dimension of matrix B.
2251 *
2252 * c (input/output) void*
2253 * generated matrix C that will be used as an input to HEMM.
2254 *
2255 * ldc (input) int
2256 * leading dimension of matrix C.
2257 *
2258 * seed (input/output) int *
2259 * seed for the random number generator.
2260 *
2261 * head_r_true (output) double *
2262 * the leading part of the truth in double-double.
2263 *
2264 * tail_r_true (output) double *
2265 * the trailing part of the truth in double-double
2266 *
2267 */
2268 {
2269
2270 /* Strategy:
2271 R1 = alpha * A1 * B + beta * C1
2272 R2 = alpha * A2 * B + beta * C2
2273 where all the matrices are real. Then let R = R1 + i R2,
2274 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
2275 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
2276 */
2277
2278 int i, j;
2279 int cij, ci;
2280 int aij, ai;
2281 int a1ij, a1i;
2282 int bij, bi;
2283 int mij, mi;
2284 int inccij, incci;
2285 int incaij, incai;
2286 int inca1ij, inca1i;
2287 int incbij, incbi;
2288 int inci, incij;
2289 int inca, incb;
2290 int m_i, n_i;
2291 int ld;
2292 int ab;
2293
2294 float *a1;
2295 float *a2;
2296 double *c1;
2297 double *c2;
2298 float *b0;
2299
2300 double *head_r1_true, *tail_r1_true;
2301 double *head_r2_true, *tail_r2_true;
2302
2303 double head_r_elem1, tail_r_elem1;
2304 double head_r_elem2, tail_r_elem2;
2305 double head_r_elem, tail_r_elem;
2306
2307 float *a_vec;
2308 float *b_vec;
2309
2310 double *c_i = (double *) c;
2311 double *alpha_i = (double *) alpha;
2312 double *beta_i = (double *) beta;
2313 float *a_i = a;
2314 float *b_i = b;
2315
2316 if (side == blas_left_side) {
2317 m_i = m;
2318 n_i = n;
2319 } else {
2320 m_i = n;
2321 n_i = m;
2322 }
2323
2324 if (order == blas_colmajor)
2325 ld = m;
2326 else
2327 ld = n;
2328
2329
2330 if ((order == blas_colmajor && side == blas_left_side) ||
2331 (order == blas_rowmajor && side == blas_right_side)) {
2332 inci = inca1i = incai = incbi = incci = 1;
2333 inccij = ldc;
2334 incaij = lda;
2335 incbij = ldb;
2336 inca1ij = m_i;
2337 incij = ld;
2338 } else {
2339 incci = ldc;
2340 incai = lda;
2341 incbi = ldb;
2342 inca1i = m_i;
2343 inci = ld;
2344 incij = inca1ij = incaij = incbij = inccij = 1;
2345 }
2346
2347 incci *= 2;
2348 inccij *= 2;
2349 incai *= 2;
2350 incaij *= 2;
2351 incbi *= 2;
2352 incbij *= 2;
2353
2354 inca = incb = 1;
2355 inca *= 2;
2356 incb *= 2;
2357 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
2358 if (m_i > 0 && a_vec == NULL) {
2359 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2360 }
2361 for (i = 0; i < m_i * inca; i += inca) {
2362 a_vec[i] = 0.0;
2363 a_vec[i + 1] = 0.0;
2364 }
2365 b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
2366 if (m_i > 0 && b_vec == NULL) {
2367 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2368 }
2369 for (i = 0; i < m_i * incb; i += incb) {
2370 b_vec[i] = 0.0;
2371 b_vec[i + 1] = 0.0;
2372 }
2373
2374 if (randomize == 0) {
2375 a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
2376 if (m_i * m_i > 0 && a1 == NULL) {
2377 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2378 }
2379 a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
2380 if (m_i * m_i > 0 && a2 == NULL) {
2381 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2382 }
2383 for (i = 0; i < m_i * m_i; ++i) {
2384 a1[i] = a2[i] = 0.0;
2385 }
2386 c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2387 if (m_i * n_i > 0 && c1 == NULL) {
2388 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2389 }
2390 c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2391 if (m_i * n_i > 0 && c2 == NULL) {
2392 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2393 }
2394 b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
2395 if (m_i * n_i > 0 && b0 == NULL) {
2396 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2397 }
2398 for (i = 0; i < m_i * n_i; ++i) {
2399 c1[i] = c2[i] = b0[i] = 0.0;
2400 }
2401 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2402 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2403 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2404 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2405 };
2406 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2407 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2408 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2409 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2410 };
2411
2412 /* First generate the real portion of matrix A, and matrix B.
2413 Note that Re(A) is a symmetric matrix. */
2414 BLAS_dsymm_s_s_testgen
2415 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
2416 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
2417
2418 BLAS_dskew_s_s_testgen
2419 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
2420 c2, ld, seed, head_r2_true, tail_r2_true);
2421
2422
2423 /* The case where B is a complex matrix. Since B is generated
2424 as a real matrix, we need to perform some scaling.
2425
2426 There are four cases to consider, depending on the values
2427 of alpha and beta.
2428
2429 values scaling
2430 alpha beta alpha A B beta C R (truth)
2431 0 1 1 i i i
2432 1 1 ? 1+i 1+i 1+i
2433 2 ? 1 1+i 1+i 2i 2i
2434 3 ? ? 1+i 1+i 2i 2i
2435
2436 Note that we can afford to scale R by 1+i, since they are
2437 computed in double-double precision.
2438 */
2439
2440 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2441 ab = 0;
2442 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
2443 } else if (alpha_i[0] == 1.0) {
2444 ab = 1;
2445 /* set alpha to 1, multiply beta by 1+i. */
2446 alpha_i[1] = 0.0;
2447 beta_i[1] = beta_i[0];
2448 } else if (beta_i[0] == 1.0) {
2449 ab = 2;
2450 /* set beta to 1, multiply alpha by 1+i. */
2451 beta_i[1] = 0.0;
2452 alpha_i[1] = alpha_i[0];
2453 } else {
2454 ab = 3;
2455 /* multiply alpha by 1+i, beta by 2i. */
2456 alpha_i[1] = alpha_i[0];
2457 beta_i[1] = 2.0 * beta_i[0];
2458 beta_i[0] = 0.0;
2459 }
2460
2461
2462 /* Now fill in a */
2463 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
2464 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
2465 j++, aij += incaij, a1ij += inca1ij) {
2466 a_i[aij] = a1[a1ij];
2467 a_i[aij + 1] = a2[a1ij];
2468 }
2469 }
2470
2471 /* Fill in b */
2472 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
2473 for (j = 0, bij = bi, mij = mi; j < n_i;
2474 j++, bij += incbij, mij += incij) {
2475 if (ab == 0) {
2476 b_i[bij] = 0.0;
2477 b_i[bij + 1] = b0[mij];
2478 } else {
2479 b_i[bij] = b0[mij];
2480 b_i[bij + 1] = b0[mij];
2481 }
2482 }
2483 }
2484
2485 /* Fill in c */
2486 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
2487 for (j = 0, cij = ci, mij = mi; j < n_i;
2488 j++, cij += inccij, mij += incij) {
2489 if (ab == 0) {
2490 c_i[cij] = -c2[mij];
2491 c_i[cij + 1] = c1[mij];
2492 } else if (ab == 2) {
2493 c_i[cij] = -2.0 * c2[mij];
2494 c_i[cij + 1] = 2.0 * c1[mij];
2495 } else {
2496 c_i[cij] = c1[mij];
2497 c_i[cij + 1] = c2[mij];
2498 }
2499 }
2500 }
2501
2502 /* Fill in truth */
2503 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
2504 for (j = 0, cij = ci, mij = mi; j < n_i;
2505 j++, cij += inccij, mij += incij) {
2506
2507 head_r_elem1 = head_r1_true[mij];
2508 tail_r_elem1 = tail_r1_true[mij];
2509
2510 head_r_elem2 = head_r2_true[mij];
2511 tail_r_elem2 = tail_r2_true[mij];
2512
2513 if (ab == 0) {
2514 head_r_true[cij] = -head_r_elem2;
2515 tail_r_true[cij] = -tail_r_elem2;
2516 head_r_true[cij + 1] = head_r_elem1;
2517 tail_r_true[cij + 1] = tail_r_elem1;
2518 } else if (ab == 1) {
2519 {
2520 /* Compute double-double = double-double + double-double. */
2521 double bv;
2522 double s1, s2, t1, t2;
2523
2524 /* Add two hi words. */
2525 s1 = head_r_elem1 + head_r_elem2;
2526 bv = s1 - head_r_elem1;
2527 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2528
2529 /* Add two lo words. */
2530 t1 = tail_r_elem1 + tail_r_elem2;
2531 bv = t1 - tail_r_elem1;
2532 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2533
2534 s2 += t1;
2535
2536 /* Renormalize (s1, s2) to (t1, s2) */
2537 t1 = s1 + s2;
2538 s2 = s2 - (t1 - s1);
2539
2540 t2 += s2;
2541
2542 /* Renormalize (t1, t2) */
2543 head_r_elem = t1 + t2;
2544 tail_r_elem = t2 - (head_r_elem - t1);
2545 }
2546
2547 /* Set the imaginary part to R1 + R2 */
2548 tail_r_true[cij + 1] = tail_r_elem;
2549 head_r_true[cij + 1] = head_r_elem;
2550
2551 /* Set the real part to R1 - R2. */
2552 {
2553 double head_bt, tail_bt;
2554 head_bt = -head_r_elem2;
2555 tail_bt = -tail_r_elem2;
2556 {
2557 /* Compute double-double = double-double + double-double. */
2558 double bv;
2559 double s1, s2, t1, t2;
2560
2561 /* Add two hi words. */
2562 s1 = head_r_elem1 + head_bt;
2563 bv = s1 - head_r_elem1;
2564 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2565
2566 /* Add two lo words. */
2567 t1 = tail_r_elem1 + tail_bt;
2568 bv = t1 - tail_r_elem1;
2569 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2570
2571 s2 += t1;
2572
2573 /* Renormalize (s1, s2) to (t1, s2) */
2574 t1 = s1 + s2;
2575 s2 = s2 - (t1 - s1);
2576
2577 t2 += s2;
2578
2579 /* Renormalize (t1, t2) */
2580 head_r_elem = t1 + t2;
2581 tail_r_elem = t2 - (head_r_elem - t1);
2582 }
2583 }
2584 tail_r_true[cij] = tail_r_elem;
2585 head_r_true[cij] = head_r_elem;
2586 } else {
2587
2588 /* Real part */
2589 {
2590 /* Compute double-double = double-double * double. */
2591 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2592
2593 con = head_r_elem2 * split;
2594 a11 = con - head_r_elem2;
2595 a11 = con - a11;
2596 a21 = head_r_elem2 - a11;
2597 con = -2.0 * split;
2598 b1 = con - -2.0;
2599 b1 = con - b1;
2600 b2 = -2.0 - b1;
2601
2602 c11 = head_r_elem2 * -2.0;
2603 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2604
2605 c2 = tail_r_elem2 * -2.0;
2606 t1 = c11 + c2;
2607 t2 = (c2 - (t1 - c11)) + c21;
2608
2609 head_r_elem = t1 + t2;
2610 tail_r_elem = t2 - (head_r_elem - t1);
2611 }
2612 head_r_true[cij] = head_r_elem;
2613 tail_r_true[cij] = tail_r_elem;
2614
2615 /* Imaginary Part */
2616 {
2617 /* Compute double-double = double-double * double. */
2618 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2619
2620 con = head_r_elem1 * split;
2621 a11 = con - head_r_elem1;
2622 a11 = con - a11;
2623 a21 = head_r_elem1 - a11;
2624 con = 2.0 * split;
2625 b1 = con - 2.0;
2626 b1 = con - b1;
2627 b2 = 2.0 - b1;
2628
2629 c11 = head_r_elem1 * 2.0;
2630 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2631
2632 c2 = tail_r_elem1 * 2.0;
2633 t1 = c11 + c2;
2634 t2 = (c2 - (t1 - c11)) + c21;
2635
2636 head_r_elem = t1 + t2;
2637 tail_r_elem = t2 - (head_r_elem - t1);
2638 }
2639 head_r_true[cij + 1] = head_r_elem;
2640 tail_r_true[cij + 1] = tail_r_elem;
2641 }
2642 }
2643 }
2644
2645 blas_free(a1);
2646 blas_free(a2);
2647 blas_free(c1);
2648 blas_free(c2);
2649 blas_free(b0);
2650 blas_free(head_r1_true);
2651 blas_free(tail_r1_true);
2652 blas_free(head_r2_true);
2653 blas_free(tail_r2_true);
2654
2655 } else {
2656 /* random matrix */
2657 float a_elem[2];
2658 float b_elem[2];
2659 double c_elem[2];
2660 double head_r_true_elem[2], tail_r_true_elem[2];
2661
2662 /* Since mixed real/complex test generator for dot
2663 scales the vectors, we need to used the non-mixed
2664 version if B is real (since A is always complex). */
2665
2666
2667 if (alpha_flag == 0) {
2668 alpha_i[0] = (float) xrand(seed);
2669 alpha_i[1] = (float) xrand(seed);
2670 }
2671 if (beta_flag == 0) {
2672 beta_i[0] = (float) xrand(seed);
2673 beta_i[1] = (float) xrand(seed);
2674 }
2675
2676 /* Fill in matrix A -- Hermitian. */
2677 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2678 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2679 a_elem[0] = (float) xrand(seed);
2680 a_elem[1] = (float) xrand(seed);
2681 a_i[aij] = a_elem[0];
2682 a_i[aij + 1] = a_elem[1];
2683 if (i == j)
2684 a_i[aij + 1] = 0.0;
2685 }
2686 }
2687
2688 /* Fill in matrix B */
2689 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2690 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2691 b_elem[0] = (float) xrand(seed);
2692 b_elem[1] = (float) xrand(seed);
2693 b_i[bij] = b_elem[0];
2694 b_i[bij + 1] = b_elem[1];
2695 }
2696 }
2697
2698 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2699 che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
2700 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2701
2702 if (side == blas_left_side)
2703 cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2704 else
2705 cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2706
2707 /* copy the real b_vec into complex bb_vec, so that
2708 pure complex test case generator can be called. */
2709
2710 BLAS_zdot_c_c_testgen(m_i, m_i, 0, norm, blas_no_conj,
2711 alpha, 1, beta, 1, a_vec, b_vec, seed,
2712 c_elem, head_r_true_elem, tail_r_true_elem);
2713
2714 c_i[cij] = c_elem[0];
2715 c_i[cij + 1] = c_elem[1];
2716 head_r_true[cij] = head_r_true_elem[0];
2717 head_r_true[cij + 1] = head_r_true_elem[1];
2718 tail_r_true[cij] = tail_r_true_elem[0];
2719 tail_r_true[cij + 1] = tail_r_true_elem[1];
2720 }
2721 }
2722
2723
2724
2725 }
2726
2727 blas_free(a_vec);
2728 blas_free(b_vec);
2729 }
2730
BLAS_zhemm_z_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,double * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2731 void BLAS_zhemm_z_d_testgen(int norm, enum blas_order_type order,
2732 enum blas_uplo_type uplo,
2733 enum blas_side_type side, int m, int n,
2734 int randomize, void *alpha, int alpha_flag,
2735 void *beta, int beta_flag, void *a, int lda,
2736 double *b, int ldb, void *c, int ldc, int *seed,
2737 double *head_r_true, double *tail_r_true)
2738
2739 /*
2740 * Purpose
2741 * =======
2742 *
2743 * Generates the test inputs to BLAS_zhemm_z_d{_x}
2744 *
2745 * Arguments
2746 * =========
2747 *
2748 * norm (input) int
2749 * = -1: the vectors are scaled with norms near underflow.
2750 * = 0: the vectors have norms of order 1.
2751 * = 1: the vectors are scaled with norms near overflow.
2752 *
2753 * order (input) enum blas_side_type
2754 * storage format of the matrices
2755 *
2756 * uplo (input) enum blas_uplo_type
2757 * which half of the hermitian matrix a is to be stored.
2758 *
2759 * side (input) enum blas_side_type
2760 * which side of matrix b matrix a is to be multiplied.
2761 *
2762 * m n (input) int
2763 * sizes of matrices a, b, c:
2764 * matrix a is m-by-m for left multiplication
2765 * n-by-n otherwise,
2766 * matrices b, c are m-by-n.
2767 *
2768 * randomize (input) int
2769 * = 0: test case made for maximum cancellation.
2770 * = 1: test case made for maximum radomization.
2771 *
2772 * alpha (input/output) void*
2773 * if alpha_flag = 1, alpha is input.
2774 * if alpha_flag = 0, alpha is output.
2775 *
2776 * alpha_flag (input) int
2777 * = 0: alpha is free, and is output.
2778 * = 1: alpha is fixed on input.
2779 *
2780 * beta (input/output) void*
2781 * if beta_flag = 1, beta is input.
2782 * if beta_flag = 0, beta is output.
2783 *
2784 * beta_flag (input) int
2785 * = 0: beta is free, and is output.
2786 * = 1: beta is fixed on input.
2787 *
2788 * a (input/output) void*
2789 *
2790 * lda (input) lda
2791 * leading dimension of matrix A.
2792 *
2793 * b (input/output) double*
2794 *
2795 * ldb (input) int
2796 * leading dimension of matrix B.
2797 *
2798 * c (input/output) void*
2799 * generated matrix C that will be used as an input to HEMM.
2800 *
2801 * ldc (input) int
2802 * leading dimension of matrix C.
2803 *
2804 * seed (input/output) int *
2805 * seed for the random number generator.
2806 *
2807 * head_r_true (output) double *
2808 * the leading part of the truth in double-double.
2809 *
2810 * tail_r_true (output) double *
2811 * the trailing part of the truth in double-double
2812 *
2813 */
2814 {
2815
2816 /* Strategy:
2817 R1 = alpha * A1 * B + beta * C1
2818 R2 = alpha * A2 * B + beta * C2
2819 where all the matrices are real. Then let R = R1 + i R2,
2820 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
2821 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
2822 */
2823
2824 int i, j;
2825 int cij, ci;
2826 int aij, ai;
2827 int a1ij, a1i;
2828 int bij, bi;
2829 int mij, mi;
2830 int inccij, incci;
2831 int incaij, incai;
2832 int inca1ij, inca1i;
2833 int incbij, incbi;
2834 int inci, incij;
2835 int inca, incb;
2836 int m_i, n_i;
2837 int ld;
2838 int ab;
2839
2840 double *a1;
2841 double *a2;
2842 double *c1;
2843 double *c2;
2844 double *b0;
2845
2846 double *head_r1_true, *tail_r1_true;
2847 double *head_r2_true, *tail_r2_true;
2848
2849 double head_r_elem1, tail_r_elem1;
2850 double head_r_elem2, tail_r_elem2;
2851 double head_r_elem, tail_r_elem;
2852
2853 double *a_vec;
2854 double *b_vec;
2855
2856 double *c_i = (double *) c;
2857 double *alpha_i = (double *) alpha;
2858 double *beta_i = (double *) beta;
2859 double *a_i = a;
2860 double *b_i = b;
2861
2862 if (side == blas_left_side) {
2863 m_i = m;
2864 n_i = n;
2865 } else {
2866 m_i = n;
2867 n_i = m;
2868 }
2869
2870 if (order == blas_colmajor)
2871 ld = m;
2872 else
2873 ld = n;
2874
2875
2876 if ((order == blas_colmajor && side == blas_left_side) ||
2877 (order == blas_rowmajor && side == blas_right_side)) {
2878 inci = inca1i = incai = incbi = incci = 1;
2879 inccij = ldc;
2880 incaij = lda;
2881 incbij = ldb;
2882 inca1ij = m_i;
2883 incij = ld;
2884 } else {
2885 incci = ldc;
2886 incai = lda;
2887 incbi = ldb;
2888 inca1i = m_i;
2889 inci = ld;
2890 incij = inca1ij = incaij = incbij = inccij = 1;
2891 }
2892
2893 incci *= 2;
2894 inccij *= 2;
2895 incai *= 2;
2896 incaij *= 2;
2897
2898
2899
2900 inca = incb = 1;
2901 inca *= 2;
2902
2903 a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2904 if (m_i > 0 && a_vec == NULL) {
2905 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2906 }
2907 for (i = 0; i < m_i * inca; i += inca) {
2908 a_vec[i] = 0.0;
2909 a_vec[i + 1] = 0.0;
2910 }
2911 b_vec = (double *) blas_malloc(m_i * sizeof(double));
2912 if (m_i > 0 && b_vec == NULL) {
2913 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2914 }
2915 for (i = 0; i < m_i * incb; i += incb) {
2916 b_vec[i] = 0.0;
2917 }
2918
2919 if (randomize == 0) {
2920 a1 = (double *) blas_malloc(m_i * m_i * sizeof(double));
2921 if (m_i * m_i > 0 && a1 == NULL) {
2922 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2923 }
2924 a2 = (double *) blas_malloc(m_i * m_i * sizeof(double));
2925 if (m_i * m_i > 0 && a2 == NULL) {
2926 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2927 }
2928 for (i = 0; i < m_i * m_i; ++i) {
2929 a1[i] = a2[i] = 0.0;
2930 }
2931 c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2932 if (m_i * n_i > 0 && c1 == NULL) {
2933 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2934 }
2935 c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2936 if (m_i * n_i > 0 && c2 == NULL) {
2937 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2938 }
2939 b0 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2940 if (m_i * n_i > 0 && b0 == NULL) {
2941 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2942 }
2943 for (i = 0; i < m_i * n_i; ++i) {
2944 c1[i] = c2[i] = b0[i] = 0.0;
2945 }
2946 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2947 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2948 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2949 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2950 };
2951 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2952 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2953 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2954 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2955 };
2956
2957 /* First generate the real portion of matrix A, and matrix B.
2958 Note that Re(A) is a symmetric matrix. */
2959 BLAS_dsymm_testgen
2960 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
2961 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
2962
2963 BLAS_dskew_testgen
2964 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
2965 c2, ld, seed, head_r2_true, tail_r2_true);
2966
2967
2968 /* The case where B is a real matrix.
2969
2970 There are four cases to consider, depending on the
2971 values of alpha and beta.
2972
2973 values scaling
2974 alpha beta alpha A B beta C R (truth)
2975 0 1 1
2976 1 1 ? -i i
2977 2 ? 1 i i i
2978 3 ? ? 1+i 1+i 1+i
2979
2980 Note that we can afford to scale truth by (1+i) since they
2981 are computed in double-double.
2982 */
2983
2984 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2985 ab = 0;
2986 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
2987 } else if (alpha_i[0] == 1.0) {
2988 ab = 1;
2989 /* set alpha to 1, multiply beta by -i. */
2990 alpha_i[1] = 0.0;
2991 beta_i[1] = -beta_i[0];
2992 beta_i[0] = 0.0;
2993 } else if (beta_i[0] == 1.0) {
2994 ab = 2;
2995 /* set beta to 1, multiply alpha by i. */
2996 beta_i[1] = 0.0;
2997 alpha_i[1] = alpha_i[0];
2998 alpha_i[0] = 0.0;
2999 } else {
3000 ab = 3;
3001 /* multiply alpha, beta by (1 + i). */
3002 alpha_i[1] = alpha_i[0];
3003 beta_i[1] = beta_i[0];
3004 }
3005
3006 /* Now fill in a */
3007 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
3008 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
3009 j++, aij += incaij, a1ij += inca1ij) {
3010 a_i[aij] = a1[a1ij];
3011 a_i[aij + 1] = a2[a1ij];
3012 }
3013 }
3014
3015 /* Fill in b */
3016 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
3017 for (j = 0, bij = bi, mij = mi; j < n_i;
3018 j++, bij += incbij, mij += incij) {
3019 b_i[bij] = b0[mij];
3020 }
3021 }
3022
3023 /* Fill in c */
3024 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3025 for (j = 0, cij = ci, mij = mi; j < n_i;
3026 j++, cij += inccij, mij += incij) {
3027 if (ab == 1 || ab == 2) {
3028 c_i[cij] = -c2[mij];
3029 c_i[cij + 1] = c1[mij];
3030 } else {
3031 c_i[cij] = c1[mij];
3032 c_i[cij + 1] = c2[mij];
3033 }
3034 }
3035 }
3036
3037 /* Fill in truth */
3038 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3039 for (j = 0, cij = ci, mij = mi; j < n_i;
3040 j++, cij += inccij, mij += incij) {
3041
3042 if (ab == 0 || ab == 1) {
3043 head_r_true[cij] = head_r1_true[mij];
3044 tail_r_true[cij] = tail_r1_true[mij];
3045 head_r_true[cij + 1] = head_r2_true[mij];
3046 tail_r_true[cij + 1] = tail_r2_true[mij];
3047 } else if (ab == 2) {
3048 head_r_true[cij] = -head_r2_true[mij];
3049 tail_r_true[cij] = -tail_r2_true[mij];
3050 head_r_true[cij + 1] = head_r1_true[mij];
3051 tail_r_true[cij + 1] = tail_r1_true[mij];
3052 } else {
3053 head_r_elem1 = head_r1_true[mij];
3054 tail_r_elem1 = tail_r1_true[mij];
3055
3056 head_r_elem2 = head_r2_true[mij];
3057 tail_r_elem2 = tail_r2_true[mij];
3058
3059 {
3060 /* Compute double-double = double-double + double-double. */
3061 double bv;
3062 double s1, s2, t1, t2;
3063
3064 /* Add two hi words. */
3065 s1 = head_r_elem1 + head_r_elem2;
3066 bv = s1 - head_r_elem1;
3067 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3068
3069 /* Add two lo words. */
3070 t1 = tail_r_elem1 + tail_r_elem2;
3071 bv = t1 - tail_r_elem1;
3072 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3073
3074 s2 += t1;
3075
3076 /* Renormalize (s1, s2) to (t1, s2) */
3077 t1 = s1 + s2;
3078 s2 = s2 - (t1 - s1);
3079
3080 t2 += s2;
3081
3082 /* Renormalize (t1, t2) */
3083 head_r_elem = t1 + t2;
3084 tail_r_elem = t2 - (head_r_elem - t1);
3085 }
3086
3087 /* Set the imaginary part to R1 + R2 */
3088 tail_r_true[cij + 1] = tail_r_elem;
3089 head_r_true[cij + 1] = head_r_elem;
3090
3091 /* Set the real part to R1 - R2. */
3092 {
3093 double head_bt, tail_bt;
3094 head_bt = -head_r_elem2;
3095 tail_bt = -tail_r_elem2;
3096 {
3097 /* Compute double-double = double-double + double-double. */
3098 double bv;
3099 double s1, s2, t1, t2;
3100
3101 /* Add two hi words. */
3102 s1 = head_r_elem1 + head_bt;
3103 bv = s1 - head_r_elem1;
3104 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3105
3106 /* Add two lo words. */
3107 t1 = tail_r_elem1 + tail_bt;
3108 bv = t1 - tail_r_elem1;
3109 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3110
3111 s2 += t1;
3112
3113 /* Renormalize (s1, s2) to (t1, s2) */
3114 t1 = s1 + s2;
3115 s2 = s2 - (t1 - s1);
3116
3117 t2 += s2;
3118
3119 /* Renormalize (t1, t2) */
3120 head_r_elem = t1 + t2;
3121 tail_r_elem = t2 - (head_r_elem - t1);
3122 }
3123 }
3124 tail_r_true[cij] = tail_r_elem;
3125 head_r_true[cij] = head_r_elem;
3126 }
3127
3128 }
3129 }
3130
3131 blas_free(a1);
3132 blas_free(a2);
3133 blas_free(c1);
3134 blas_free(c2);
3135 blas_free(b0);
3136 blas_free(head_r1_true);
3137 blas_free(tail_r1_true);
3138 blas_free(head_r2_true);
3139 blas_free(tail_r2_true);
3140
3141 } else {
3142 /* random matrix */
3143 double a_elem[2];
3144 double b_elem;
3145 double c_elem[2];
3146 double head_r_true_elem[2], tail_r_true_elem[2];
3147
3148 /* Since mixed real/complex test generator for dot
3149 scales the vectors, we need to used the non-mixed
3150 version if B is real (since A is always complex). */
3151 double *bb_vec;
3152 bb_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
3153 if (m_i > 0 && bb_vec == NULL) {
3154 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3155 }
3156
3157 if (alpha_flag == 0) {
3158 alpha_i[0] = xrand(seed);
3159 alpha_i[1] = xrand(seed);
3160 }
3161 if (beta_flag == 0) {
3162 beta_i[0] = xrand(seed);
3163 beta_i[1] = xrand(seed);
3164 }
3165
3166 /* Fill in matrix A -- Hermitian. */
3167 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3168 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3169 a_elem[0] = xrand(seed);
3170 a_elem[1] = xrand(seed);
3171 a_i[aij] = a_elem[0];
3172 a_i[aij + 1] = a_elem[1];
3173 if (i == j)
3174 a_i[aij + 1] = 0.0;
3175 }
3176 }
3177
3178 /* Fill in matrix B */
3179 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3180 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3181 b_elem = xrand(seed);
3182 b_i[bij] = b_elem;
3183 }
3184 }
3185
3186 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3187 zhe_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3188 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3189
3190 if (side == blas_left_side)
3191 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3192 else
3193 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3194
3195 /* copy the real b_vec into complex bb_vec, so that
3196 pure complex test case generator can be called. */
3197
3198 {
3199 int k;
3200 for (k = 0; k < m_i; k++) {
3201 bb_vec[2 * k] = b_vec[k];
3202 bb_vec[2 * k + 1] = 0.0;
3203 }
3204 }
3205 BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
3206 alpha, 1, beta, 1, a_vec, bb_vec, seed,
3207 c_elem, head_r_true_elem, tail_r_true_elem);
3208
3209 c_i[cij] = c_elem[0];
3210 c_i[cij + 1] = c_elem[1];
3211 head_r_true[cij] = head_r_true_elem[0];
3212 head_r_true[cij + 1] = head_r_true_elem[1];
3213 tail_r_true[cij] = tail_r_true_elem[0];
3214 tail_r_true[cij + 1] = tail_r_true_elem[1];
3215 }
3216 }
3217
3218 blas_free(bb_vec);
3219
3220 }
3221
3222 blas_free(a_vec);
3223 blas_free(b_vec);
3224 }
BLAS_chemm_c_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,float * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3225 void BLAS_chemm_c_s_testgen(int norm, enum blas_order_type order,
3226 enum blas_uplo_type uplo,
3227 enum blas_side_type side, int m, int n,
3228 int randomize, void *alpha, int alpha_flag,
3229 void *beta, int beta_flag, void *a, int lda,
3230 float *b, int ldb, void *c, int ldc, int *seed,
3231 double *head_r_true, double *tail_r_true)
3232
3233 /*
3234 * Purpose
3235 * =======
3236 *
3237 * Generates the test inputs to BLAS_chemm_c_s{_x}
3238 *
3239 * Arguments
3240 * =========
3241 *
3242 * norm (input) int
3243 * = -1: the vectors are scaled with norms near underflow.
3244 * = 0: the vectors have norms of order 1.
3245 * = 1: the vectors are scaled with norms near overflow.
3246 *
3247 * order (input) enum blas_side_type
3248 * storage format of the matrices
3249 *
3250 * uplo (input) enum blas_uplo_type
3251 * which half of the hermitian matrix a is to be stored.
3252 *
3253 * side (input) enum blas_side_type
3254 * which side of matrix b matrix a is to be multiplied.
3255 *
3256 * m n (input) int
3257 * sizes of matrices a, b, c:
3258 * matrix a is m-by-m for left multiplication
3259 * n-by-n otherwise,
3260 * matrices b, c are m-by-n.
3261 *
3262 * randomize (input) int
3263 * = 0: test case made for maximum cancellation.
3264 * = 1: test case made for maximum radomization.
3265 *
3266 * alpha (input/output) void*
3267 * if alpha_flag = 1, alpha is input.
3268 * if alpha_flag = 0, alpha is output.
3269 *
3270 * alpha_flag (input) int
3271 * = 0: alpha is free, and is output.
3272 * = 1: alpha is fixed on input.
3273 *
3274 * beta (input/output) void*
3275 * if beta_flag = 1, beta is input.
3276 * if beta_flag = 0, beta is output.
3277 *
3278 * beta_flag (input) int
3279 * = 0: beta is free, and is output.
3280 * = 1: beta is fixed on input.
3281 *
3282 * a (input/output) void*
3283 *
3284 * lda (input) lda
3285 * leading dimension of matrix A.
3286 *
3287 * b (input/output) float*
3288 *
3289 * ldb (input) int
3290 * leading dimension of matrix B.
3291 *
3292 * c (input/output) void*
3293 * generated matrix C that will be used as an input to HEMM.
3294 *
3295 * ldc (input) int
3296 * leading dimension of matrix C.
3297 *
3298 * seed (input/output) int *
3299 * seed for the random number generator.
3300 *
3301 * head_r_true (output) double *
3302 * the leading part of the truth in double-double.
3303 *
3304 * tail_r_true (output) double *
3305 * the trailing part of the truth in double-double
3306 *
3307 */
3308 {
3309
3310 /* Strategy:
3311 R1 = alpha * A1 * B + beta * C1
3312 R2 = alpha * A2 * B + beta * C2
3313 where all the matrices are real. Then let R = R1 + i R2,
3314 A = A1 + i A2, C = C1 + i C2. To make A hermitian, A1 is
3315 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
3316 */
3317
3318 int i, j;
3319 int cij, ci;
3320 int aij, ai;
3321 int a1ij, a1i;
3322 int bij, bi;
3323 int mij, mi;
3324 int inccij, incci;
3325 int incaij, incai;
3326 int inca1ij, inca1i;
3327 int incbij, incbi;
3328 int inci, incij;
3329 int inca, incb;
3330 int m_i, n_i;
3331 int ld;
3332 int ab;
3333
3334 float *a1;
3335 float *a2;
3336 float *c1;
3337 float *c2;
3338 float *b0;
3339
3340 double *head_r1_true, *tail_r1_true;
3341 double *head_r2_true, *tail_r2_true;
3342
3343 double head_r_elem1, tail_r_elem1;
3344 double head_r_elem2, tail_r_elem2;
3345 double head_r_elem, tail_r_elem;
3346
3347 float *a_vec;
3348 float *b_vec;
3349
3350 float *c_i = (float *) c;
3351 float *alpha_i = (float *) alpha;
3352 float *beta_i = (float *) beta;
3353 float *a_i = a;
3354 float *b_i = b;
3355
3356 if (side == blas_left_side) {
3357 m_i = m;
3358 n_i = n;
3359 } else {
3360 m_i = n;
3361 n_i = m;
3362 }
3363
3364 if (order == blas_colmajor)
3365 ld = m;
3366 else
3367 ld = n;
3368
3369
3370 if ((order == blas_colmajor && side == blas_left_side) ||
3371 (order == blas_rowmajor && side == blas_right_side)) {
3372 inci = inca1i = incai = incbi = incci = 1;
3373 inccij = ldc;
3374 incaij = lda;
3375 incbij = ldb;
3376 inca1ij = m_i;
3377 incij = ld;
3378 } else {
3379 incci = ldc;
3380 incai = lda;
3381 incbi = ldb;
3382 inca1i = m_i;
3383 inci = ld;
3384 incij = inca1ij = incaij = incbij = inccij = 1;
3385 }
3386
3387 incci *= 2;
3388 inccij *= 2;
3389 incai *= 2;
3390 incaij *= 2;
3391
3392
3393
3394 inca = incb = 1;
3395 inca *= 2;
3396
3397 a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
3398 if (m_i > 0 && a_vec == NULL) {
3399 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3400 }
3401 for (i = 0; i < m_i * inca; i += inca) {
3402 a_vec[i] = 0.0;
3403 a_vec[i + 1] = 0.0;
3404 }
3405 b_vec = (float *) blas_malloc(m_i * sizeof(float));
3406 if (m_i > 0 && b_vec == NULL) {
3407 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3408 }
3409 for (i = 0; i < m_i * incb; i += incb) {
3410 b_vec[i] = 0.0;
3411 }
3412
3413 if (randomize == 0) {
3414 a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
3415 if (m_i * m_i > 0 && a1 == NULL) {
3416 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3417 }
3418 a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
3419 if (m_i * m_i > 0 && a2 == NULL) {
3420 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3421 }
3422 for (i = 0; i < m_i * m_i; ++i) {
3423 a1[i] = a2[i] = 0.0;
3424 }
3425 c1 = (float *) blas_malloc(m_i * n_i * sizeof(float));
3426 if (m_i * n_i > 0 && c1 == NULL) {
3427 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3428 }
3429 c2 = (float *) blas_malloc(m_i * n_i * sizeof(float));
3430 if (m_i * n_i > 0 && c2 == NULL) {
3431 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3432 }
3433 b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
3434 if (m_i * n_i > 0 && b0 == NULL) {
3435 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3436 }
3437 for (i = 0; i < m_i * n_i; ++i) {
3438 c1[i] = c2[i] = b0[i] = 0.0;
3439 }
3440 head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3441 tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3442 if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3443 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3444 };
3445 head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3446 tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3447 if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3448 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3449 };
3450
3451 /* First generate the real portion of matrix A, and matrix B.
3452 Note that Re(A) is a symmetric matrix. */
3453 BLAS_ssymm_testgen
3454 (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
3455 beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
3456
3457 BLAS_sskew_testgen
3458 (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
3459 c2, ld, seed, head_r2_true, tail_r2_true);
3460
3461
3462 /* The case where B is a real matrix.
3463
3464 There are four cases to consider, depending on the
3465 values of alpha and beta.
3466
3467 values scaling
3468 alpha beta alpha A B beta C R (truth)
3469 0 1 1
3470 1 1 ? -i i
3471 2 ? 1 i i i
3472 3 ? ? 1+i 1+i 1+i
3473
3474 Note that we can afford to scale truth by (1+i) since they
3475 are computed in double-double.
3476 */
3477
3478 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3479 ab = 0;
3480 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
3481 } else if (alpha_i[0] == 1.0) {
3482 ab = 1;
3483 /* set alpha to 1, multiply beta by -i. */
3484 alpha_i[1] = 0.0;
3485 beta_i[1] = -beta_i[0];
3486 beta_i[0] = 0.0;
3487 } else if (beta_i[0] == 1.0) {
3488 ab = 2;
3489 /* set beta to 1, multiply alpha by i. */
3490 beta_i[1] = 0.0;
3491 alpha_i[1] = alpha_i[0];
3492 alpha_i[0] = 0.0;
3493 } else {
3494 ab = 3;
3495 /* multiply alpha, beta by (1 + i). */
3496 alpha_i[1] = alpha_i[0];
3497 beta_i[1] = beta_i[0];
3498 }
3499
3500 /* Now fill in a */
3501 for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
3502 for (j = 0, aij = ai, a1ij = a1i; j < m_i;
3503 j++, aij += incaij, a1ij += inca1ij) {
3504 a_i[aij] = a1[a1ij];
3505 a_i[aij + 1] = a2[a1ij];
3506 }
3507 }
3508
3509 /* Fill in b */
3510 for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
3511 for (j = 0, bij = bi, mij = mi; j < n_i;
3512 j++, bij += incbij, mij += incij) {
3513 b_i[bij] = b0[mij];
3514 }
3515 }
3516
3517 /* Fill in c */
3518 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3519 for (j = 0, cij = ci, mij = mi; j < n_i;
3520 j++, cij += inccij, mij += incij) {
3521 if (ab == 1 || ab == 2) {
3522 c_i[cij] = -c2[mij];
3523 c_i[cij + 1] = c1[mij];
3524 } else {
3525 c_i[cij] = c1[mij];
3526 c_i[cij + 1] = c2[mij];
3527 }
3528 }
3529 }
3530
3531 /* Fill in truth */
3532 for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3533 for (j = 0, cij = ci, mij = mi; j < n_i;
3534 j++, cij += inccij, mij += incij) {
3535
3536 if (ab == 0 || ab == 1) {
3537 head_r_true[cij] = head_r1_true[mij];
3538 tail_r_true[cij] = tail_r1_true[mij];
3539 head_r_true[cij + 1] = head_r2_true[mij];
3540 tail_r_true[cij + 1] = tail_r2_true[mij];
3541 } else if (ab == 2) {
3542 head_r_true[cij] = -head_r2_true[mij];
3543 tail_r_true[cij] = -tail_r2_true[mij];
3544 head_r_true[cij + 1] = head_r1_true[mij];
3545 tail_r_true[cij + 1] = tail_r1_true[mij];
3546 } else {
3547 head_r_elem1 = head_r1_true[mij];
3548 tail_r_elem1 = tail_r1_true[mij];
3549
3550 head_r_elem2 = head_r2_true[mij];
3551 tail_r_elem2 = tail_r2_true[mij];
3552
3553 {
3554 /* Compute double-double = double-double + double-double. */
3555 double bv;
3556 double s1, s2, t1, t2;
3557
3558 /* Add two hi words. */
3559 s1 = head_r_elem1 + head_r_elem2;
3560 bv = s1 - head_r_elem1;
3561 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3562
3563 /* Add two lo words. */
3564 t1 = tail_r_elem1 + tail_r_elem2;
3565 bv = t1 - tail_r_elem1;
3566 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3567
3568 s2 += t1;
3569
3570 /* Renormalize (s1, s2) to (t1, s2) */
3571 t1 = s1 + s2;
3572 s2 = s2 - (t1 - s1);
3573
3574 t2 += s2;
3575
3576 /* Renormalize (t1, t2) */
3577 head_r_elem = t1 + t2;
3578 tail_r_elem = t2 - (head_r_elem - t1);
3579 }
3580
3581 /* Set the imaginary part to R1 + R2 */
3582 tail_r_true[cij + 1] = tail_r_elem;
3583 head_r_true[cij + 1] = head_r_elem;
3584
3585 /* Set the real part to R1 - R2. */
3586 {
3587 double head_bt, tail_bt;
3588 head_bt = -head_r_elem2;
3589 tail_bt = -tail_r_elem2;
3590 {
3591 /* Compute double-double = double-double + double-double. */
3592 double bv;
3593 double s1, s2, t1, t2;
3594
3595 /* Add two hi words. */
3596 s1 = head_r_elem1 + head_bt;
3597 bv = s1 - head_r_elem1;
3598 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3599
3600 /* Add two lo words. */
3601 t1 = tail_r_elem1 + tail_bt;
3602 bv = t1 - tail_r_elem1;
3603 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3604
3605 s2 += t1;
3606
3607 /* Renormalize (s1, s2) to (t1, s2) */
3608 t1 = s1 + s2;
3609 s2 = s2 - (t1 - s1);
3610
3611 t2 += s2;
3612
3613 /* Renormalize (t1, t2) */
3614 head_r_elem = t1 + t2;
3615 tail_r_elem = t2 - (head_r_elem - t1);
3616 }
3617 }
3618 tail_r_true[cij] = tail_r_elem;
3619 head_r_true[cij] = head_r_elem;
3620 }
3621
3622 }
3623 }
3624
3625 blas_free(a1);
3626 blas_free(a2);
3627 blas_free(c1);
3628 blas_free(c2);
3629 blas_free(b0);
3630 blas_free(head_r1_true);
3631 blas_free(tail_r1_true);
3632 blas_free(head_r2_true);
3633 blas_free(tail_r2_true);
3634
3635 } else {
3636 /* random matrix */
3637 float a_elem[2];
3638 float b_elem;
3639 float c_elem[2];
3640 double head_r_true_elem[2], tail_r_true_elem[2];
3641
3642 /* Since mixed real/complex test generator for dot
3643 scales the vectors, we need to used the non-mixed
3644 version if B is real (since A is always complex). */
3645 float *bb_vec;
3646 bb_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
3647 if (m_i > 0 && bb_vec == NULL) {
3648 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3649 }
3650
3651 if (alpha_flag == 0) {
3652 alpha_i[0] = xrand(seed);
3653 alpha_i[1] = xrand(seed);
3654 }
3655 if (beta_flag == 0) {
3656 beta_i[0] = xrand(seed);
3657 beta_i[1] = xrand(seed);
3658 }
3659
3660 /* Fill in matrix A -- Hermitian. */
3661 for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3662 for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3663 a_elem[0] = xrand(seed);
3664 a_elem[1] = xrand(seed);
3665 a_i[aij] = a_elem[0];
3666 a_i[aij + 1] = a_elem[1];
3667 if (i == j)
3668 a_i[aij + 1] = 0.0;
3669 }
3670 }
3671
3672 /* Fill in matrix B */
3673 for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3674 for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3675 b_elem = xrand(seed);
3676 b_i[bij] = b_elem;
3677 }
3678 }
3679
3680 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3681 che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3682 for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3683
3684 if (side == blas_left_side)
3685 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3686 else
3687 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3688
3689 /* copy the real b_vec into complex bb_vec, so that
3690 pure complex test case generator can be called. */
3691
3692 {
3693 int k;
3694 for (k = 0; k < m_i; k++) {
3695 bb_vec[2 * k] = b_vec[k];
3696 bb_vec[2 * k + 1] = 0.0;
3697 }
3698 }
3699 BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
3700 alpha, 1, beta, 1, a_vec, bb_vec, seed,
3701 c_elem, head_r_true_elem, tail_r_true_elem);
3702
3703 c_i[cij] = c_elem[0];
3704 c_i[cij + 1] = c_elem[1];
3705 head_r_true[cij] = head_r_true_elem[0];
3706 head_r_true[cij + 1] = head_r_true_elem[1];
3707 tail_r_true[cij] = tail_r_true_elem[0];
3708 tail_r_true[cij + 1] = tail_r_true_elem[1];
3709 }
3710 }
3711
3712 blas_free(bb_vec);
3713
3714 }
3715
3716 blas_free(a_vec);
3717 blas_free(b_vec);
3718 }
3719
BLAS_sskew_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,float * alpha,int alpha_flag,float * beta,int beta_flag,float * a,int lda,float * b,int ldb,float * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3720 void BLAS_sskew_testgen(int norm, enum blas_order_type order,
3721 enum blas_uplo_type uplo, enum blas_side_type side,
3722 int m, int n, float *alpha, int alpha_flag,
3723 float *beta, int beta_flag, float *a, int lda,
3724 float *b, int ldb, float *c, int ldc, int *seed,
3725 double *head_r_true, double *tail_r_true)
3726 {
3727
3728 int i, j;
3729 int cij, ci;
3730 int inccij, incci;
3731 int inca, incb;
3732 int m_i, n_i;
3733
3734 float c_elem;
3735 double head_r_true_elem, tail_r_true_elem;
3736
3737 float *a_vec;
3738 float *b_vec;
3739
3740 float *c_i = c;
3741
3742 if (side == blas_left_side) {
3743 m_i = m;
3744 n_i = n;
3745 } else {
3746 m_i = n;
3747 n_i = m;
3748 }
3749
3750 inca = incb = 1;
3751
3752
3753 a_vec = (float *) blas_malloc(m_i * sizeof(float));
3754 if (m_i > 0 && a_vec == NULL) {
3755 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3756 }
3757 for (i = 0; i < m_i * inca; i += inca) {
3758 a_vec[i] = 0.0;
3759 }
3760 b_vec = (float *) blas_malloc(m_i * sizeof(float));
3761 if (m_i > 0 && b_vec == NULL) {
3762 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3763 }
3764 for (i = 0; i < m_i * incb; i += incb) {
3765 b_vec[i] = 0.0;
3766 }
3767
3768 if ((order == blas_colmajor && side == blas_left_side) ||
3769 (order == blas_rowmajor && side == blas_right_side)) {
3770 incci = 1;
3771 inccij = ldc;
3772 } else {
3773 incci = ldc;
3774 inccij = 1;
3775 }
3776
3777
3778
3779
3780 if (side == blas_left_side)
3781 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3782 else
3783 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3784
3785 /* Fill in matrix A */
3786 cij = 0;
3787 for (i = 0; i < m_i; i++, cij += incci) {
3788 sskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3789 BLAS_sdot_testgen(m_i, i, m_i - i, norm,
3790 blas_no_conj, alpha, 1,
3791 beta, 1, b_vec, a_vec, seed,
3792 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3793
3794 sskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
3795
3796 c_i[cij] = c_elem;
3797 head_r_true[cij] = head_r_true_elem;
3798 tail_r_true[cij] = tail_r_true_elem;
3799 }
3800
3801 /* Now fill in c and r_true */
3802
3803 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3804 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3805 c_i[cij] = c_i[ci];
3806 head_r_true[cij] = head_r_true[ci];
3807 tail_r_true[cij] = tail_r_true[ci];
3808 }
3809 }
3810
3811 blas_free(a_vec);
3812 blas_free(b_vec);
3813 }
BLAS_dskew_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,double * a,int lda,double * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3814 void BLAS_dskew_testgen(int norm, enum blas_order_type order,
3815 enum blas_uplo_type uplo, enum blas_side_type side,
3816 int m, int n, double *alpha, int alpha_flag,
3817 double *beta, int beta_flag, double *a, int lda,
3818 double *b, int ldb, double *c, int ldc, int *seed,
3819 double *head_r_true, double *tail_r_true)
3820 {
3821
3822 int i, j;
3823 int cij, ci;
3824 int inccij, incci;
3825 int inca, incb;
3826 int m_i, n_i;
3827
3828 double c_elem;
3829 double head_r_true_elem, tail_r_true_elem;
3830
3831 double *a_vec;
3832 double *b_vec;
3833
3834 double *c_i = c;
3835
3836 if (side == blas_left_side) {
3837 m_i = m;
3838 n_i = n;
3839 } else {
3840 m_i = n;
3841 n_i = m;
3842 }
3843
3844 inca = incb = 1;
3845
3846
3847 a_vec = (double *) blas_malloc(m_i * sizeof(double));
3848 if (m_i > 0 && a_vec == NULL) {
3849 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3850 }
3851 for (i = 0; i < m_i * inca; i += inca) {
3852 a_vec[i] = 0.0;
3853 }
3854 b_vec = (double *) blas_malloc(m_i * sizeof(double));
3855 if (m_i > 0 && b_vec == NULL) {
3856 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3857 }
3858 for (i = 0; i < m_i * incb; i += incb) {
3859 b_vec[i] = 0.0;
3860 }
3861
3862 if ((order == blas_colmajor && side == blas_left_side) ||
3863 (order == blas_rowmajor && side == blas_right_side)) {
3864 incci = 1;
3865 inccij = ldc;
3866 } else {
3867 incci = ldc;
3868 inccij = 1;
3869 }
3870
3871
3872
3873
3874 if (side == blas_left_side)
3875 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3876 else
3877 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3878
3879 /* Fill in matrix A */
3880 cij = 0;
3881 for (i = 0; i < m_i; i++, cij += incci) {
3882 dskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3883 BLAS_ddot_testgen(m_i, i, m_i - i, norm,
3884 blas_no_conj, alpha, 1,
3885 beta, 1, b_vec, a_vec, seed,
3886 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3887
3888 dskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
3889
3890 c_i[cij] = c_elem;
3891 head_r_true[cij] = head_r_true_elem;
3892 tail_r_true[cij] = tail_r_true_elem;
3893 }
3894
3895 /* Now fill in c and r_true */
3896
3897 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3898 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3899 c_i[cij] = c_i[ci];
3900 head_r_true[cij] = head_r_true[ci];
3901 tail_r_true[cij] = tail_r_true[ci];
3902 }
3903 }
3904
3905 blas_free(a_vec);
3906 blas_free(b_vec);
3907 }
BLAS_dskew_d_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,double * a,int lda,float * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3908 void BLAS_dskew_d_s_testgen(int norm, enum blas_order_type order,
3909 enum blas_uplo_type uplo,
3910 enum blas_side_type side, int m, int n,
3911 double *alpha, int alpha_flag, double *beta,
3912 int beta_flag, double *a, int lda, float *b,
3913 int ldb, double *c, int ldc, int *seed,
3914 double *head_r_true, double *tail_r_true)
3915 {
3916
3917 int i, j;
3918 int cij, ci;
3919 int inccij, incci;
3920 int inca, incb;
3921 int m_i, n_i;
3922
3923 double c_elem;
3924 double head_r_true_elem, tail_r_true_elem;
3925
3926 double *a_vec;
3927 float *b_vec;
3928
3929 double *c_i = c;
3930
3931 if (side == blas_left_side) {
3932 m_i = m;
3933 n_i = n;
3934 } else {
3935 m_i = n;
3936 n_i = m;
3937 }
3938
3939 inca = incb = 1;
3940
3941
3942 a_vec = (double *) blas_malloc(m_i * sizeof(double));
3943 if (m_i > 0 && a_vec == NULL) {
3944 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3945 }
3946 for (i = 0; i < m_i * inca; i += inca) {
3947 a_vec[i] = 0.0;
3948 }
3949 b_vec = (float *) blas_malloc(m_i * sizeof(float));
3950 if (m_i > 0 && b_vec == NULL) {
3951 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3952 }
3953 for (i = 0; i < m_i * incb; i += incb) {
3954 b_vec[i] = 0.0;
3955 }
3956
3957 if ((order == blas_colmajor && side == blas_left_side) ||
3958 (order == blas_rowmajor && side == blas_right_side)) {
3959 incci = 1;
3960 inccij = ldc;
3961 } else {
3962 incci = ldc;
3963 inccij = 1;
3964 }
3965
3966
3967
3968
3969 if (side == blas_left_side)
3970 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3971 else
3972 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3973
3974 /* Fill in matrix A */
3975 cij = 0;
3976 for (i = 0; i < m_i; i++, cij += incci) {
3977 dskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3978 BLAS_ddot_s_d_testgen(m_i, i, m_i - i, norm,
3979 blas_no_conj, alpha, 1,
3980 beta, 1, b_vec, a_vec, seed,
3981 &c_elem, &head_r_true_elem, &tail_r_true_elem);
3982
3983 dskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
3984
3985 c_i[cij] = c_elem;
3986 head_r_true[cij] = head_r_true_elem;
3987 tail_r_true[cij] = tail_r_true_elem;
3988 }
3989
3990 /* Now fill in c and r_true */
3991
3992 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3993 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3994 c_i[cij] = c_i[ci];
3995 head_r_true[cij] = head_r_true[ci];
3996 tail_r_true[cij] = tail_r_true[ci];
3997 }
3998 }
3999
4000 blas_free(a_vec);
4001 blas_free(b_vec);
4002 }
BLAS_dskew_s_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,float * a,int lda,double * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)4003 void BLAS_dskew_s_d_testgen(int norm, enum blas_order_type order,
4004 enum blas_uplo_type uplo,
4005 enum blas_side_type side, int m, int n,
4006 double *alpha, int alpha_flag, double *beta,
4007 int beta_flag, float *a, int lda, double *b,
4008 int ldb, double *c, int ldc, int *seed,
4009 double *head_r_true, double *tail_r_true)
4010 {
4011
4012 int i, j;
4013 int cij, ci;
4014 int inccij, incci;
4015 int inca, incb;
4016 int m_i, n_i;
4017
4018 double c_elem;
4019 double head_r_true_elem, tail_r_true_elem;
4020
4021 float *a_vec;
4022 double *b_vec;
4023
4024 double *c_i = c;
4025
4026 if (side == blas_left_side) {
4027 m_i = m;
4028 n_i = n;
4029 } else {
4030 m_i = n;
4031 n_i = m;
4032 }
4033
4034 inca = incb = 1;
4035
4036
4037 a_vec = (float *) blas_malloc(m_i * sizeof(float));
4038 if (m_i > 0 && a_vec == NULL) {
4039 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4040 }
4041 for (i = 0; i < m_i * inca; i += inca) {
4042 a_vec[i] = 0.0;
4043 }
4044 b_vec = (double *) blas_malloc(m_i * sizeof(double));
4045 if (m_i > 0 && b_vec == NULL) {
4046 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4047 }
4048 for (i = 0; i < m_i * incb; i += incb) {
4049 b_vec[i] = 0.0;
4050 }
4051
4052 if ((order == blas_colmajor && side == blas_left_side) ||
4053 (order == blas_rowmajor && side == blas_right_side)) {
4054 incci = 1;
4055 inccij = ldc;
4056 } else {
4057 incci = ldc;
4058 inccij = 1;
4059 }
4060
4061
4062
4063
4064 if (side == blas_left_side)
4065 dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4066 else
4067 dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4068
4069 /* Fill in matrix A */
4070 cij = 0;
4071 for (i = 0; i < m_i; i++, cij += incci) {
4072 sskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
4073 BLAS_ddot_d_s_testgen(m_i, i, m_i - i, norm,
4074 blas_no_conj, alpha, 1,
4075 beta, 1, b_vec, a_vec, seed,
4076 &c_elem, &head_r_true_elem, &tail_r_true_elem);
4077
4078 sskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
4079
4080 c_i[cij] = c_elem;
4081 head_r_true[cij] = head_r_true_elem;
4082 tail_r_true[cij] = tail_r_true_elem;
4083 }
4084
4085 /* Now fill in c and r_true */
4086
4087 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4088 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4089 c_i[cij] = c_i[ci];
4090 head_r_true[cij] = head_r_true[ci];
4091 tail_r_true[cij] = tail_r_true[ci];
4092 }
4093 }
4094
4095 blas_free(a_vec);
4096 blas_free(b_vec);
4097 }
BLAS_dskew_s_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,float * a,int lda,float * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)4098 void BLAS_dskew_s_s_testgen(int norm, enum blas_order_type order,
4099 enum blas_uplo_type uplo,
4100 enum blas_side_type side, int m, int n,
4101 double *alpha, int alpha_flag, double *beta,
4102 int beta_flag, float *a, int lda, float *b,
4103 int ldb, double *c, int ldc, int *seed,
4104 double *head_r_true, double *tail_r_true)
4105 {
4106
4107 int i, j;
4108 int cij, ci;
4109 int inccij, incci;
4110 int inca, incb;
4111 int m_i, n_i;
4112
4113 double c_elem;
4114 double head_r_true_elem, tail_r_true_elem;
4115
4116 float *a_vec;
4117 float *b_vec;
4118
4119 double *c_i = c;
4120
4121 if (side == blas_left_side) {
4122 m_i = m;
4123 n_i = n;
4124 } else {
4125 m_i = n;
4126 n_i = m;
4127 }
4128
4129 inca = incb = 1;
4130
4131
4132 a_vec = (float *) blas_malloc(m_i * sizeof(float));
4133 if (m_i > 0 && a_vec == NULL) {
4134 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4135 }
4136 for (i = 0; i < m_i * inca; i += inca) {
4137 a_vec[i] = 0.0;
4138 }
4139 b_vec = (float *) blas_malloc(m_i * sizeof(float));
4140 if (m_i > 0 && b_vec == NULL) {
4141 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4142 }
4143 for (i = 0; i < m_i * incb; i += incb) {
4144 b_vec[i] = 0.0;
4145 }
4146
4147 if ((order == blas_colmajor && side == blas_left_side) ||
4148 (order == blas_rowmajor && side == blas_right_side)) {
4149 incci = 1;
4150 inccij = ldc;
4151 } else {
4152 incci = ldc;
4153 inccij = 1;
4154 }
4155
4156
4157
4158
4159 if (side == blas_left_side)
4160 sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4161 else
4162 sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4163
4164 /* Fill in matrix A */
4165 cij = 0;
4166 for (i = 0; i < m_i; i++, cij += incci) {
4167 sskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
4168 BLAS_ddot_s_s_testgen(m_i, i, m_i - i, norm,
4169 blas_no_conj, alpha, 1,
4170 beta, 1, b_vec, a_vec, seed,
4171 &c_elem, &head_r_true_elem, &tail_r_true_elem);
4172
4173 sskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
4174
4175 c_i[cij] = c_elem;
4176 head_r_true[cij] = head_r_true_elem;
4177 tail_r_true[cij] = tail_r_true_elem;
4178 }
4179
4180 /* Now fill in c and r_true */
4181
4182 for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4183 for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4184 c_i[cij] = c_i[ci];
4185 head_r_true[cij] = head_r_true[ci];
4186 tail_r_true[cij] = tail_r_true[ci];
4187 }
4188 }
4189
4190 blas_free(a_vec);
4191 blas_free(b_vec);
4192 }
4193