1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "blas_extended.h"
4 #include "blas_extended_private.h"
5 #include "blas_extended_test.h"
6 
BLAS_chemm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)7 void BLAS_chemm_testgen(int norm, enum blas_order_type order,
8 			enum blas_uplo_type uplo, enum blas_side_type side,
9 			int m, int n, int randomize, void *alpha,
10 			int alpha_flag, void *beta, int beta_flag, void *a,
11 			int lda, void *b, int ldb, void *c, int ldc,
12 			int *seed, double *head_r_true, double *tail_r_true)
13 
14 /*
15  * Purpose
16  * =======
17  *
18  *   Generates the test inputs to BLAS_chemm{_x}
19  *
20  * Arguments
21  * =========
22  *
23  * norm    (input) int
24  *           = -1: the vectors are scaled with norms near underflow.
25  *           =  0: the vectors have norms of order 1.
26  *           =  1: the vectors are scaled with norms near overflow.
27  *
28  * order   (input) enum blas_side_type
29  *           storage format of the matrices
30  *
31  * uplo    (input) enum blas_uplo_type
32  *           which half of the hermitian matrix a is to be stored.
33  *
34  * side    (input) enum blas_side_type
35  *           which side of matrix b matrix a is to be multiplied.
36  *
37  * m n     (input) int
38  *           sizes of matrices a, b, c:
39  *              matrix a is m-by-m for left multiplication
40  *                          n-by-n otherwise,
41  *              matrices b, c are m-by-n.
42  *
43  * randomize (input) int
44  *           = 0: test case made for maximum cancellation.
45  *           = 1: test case made for maximum radomization.
46  *
47  * alpha   (input/output) void*
48  *           if alpha_flag = 1, alpha is input.
49  *           if alpha_flag = 0, alpha is output.
50  *
51  * alpha_flag (input) int
52  *           = 0: alpha is free, and is output.
53  *           = 1: alpha is fixed on input.
54  *
55  * beta    (input/output) void*
56  *           if beta_flag = 1, beta is input.
57  *           if beta_flag = 0, beta is output.
58  *
59  * beta_flag (input) int
60  *           = 0: beta is free, and is output.
61  *           = 1: beta is fixed on input.
62  *
63  * a       (input/output) void*
64  *
65  * lda     (input) lda
66  *         leading dimension of matrix A.
67  *
68  * b       (input/output) void*
69  *
70  * ldb     (input) int
71  *         leading dimension of matrix B.
72  *
73  * c       (input/output) void*
74  *         generated matrix C that will be used as an input to HEMM.
75  *
76  * ldc     (input) int
77  *         leading dimension of matrix C.
78  *
79  * seed    (input/output) int *
80  *         seed for the random number generator.
81  *
82  * head_r_true  (output) double *
83  *         the leading part of the truth in double-double.
84  *
85  * tail_r_true  (output) double *
86  *         the trailing part of the truth in double-double
87  *
88  */
89 {
90 
91   /* Strategy:
92      R1 = alpha * A1 * B + beta * C1
93      R2 = alpha * A2 * B + beta * C2
94      where all the matrices are real.  Then let R = R1 + i R2,
95      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
96      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
97    */
98 
99   int i, j;
100   int cij, ci;
101   int aij, ai;
102   int a1ij, a1i;
103   int bij, bi;
104   int mij, mi;
105   int inccij, incci;
106   int incaij, incai;
107   int inca1ij, inca1i;
108   int incbij, incbi;
109   int inci, incij;
110   int inca, incb;
111   int m_i, n_i;
112   int ld;
113   int ab;
114 
115   float *a1;
116   float *a2;
117   float *c1;
118   float *c2;
119   float *b0;
120 
121   double *head_r1_true, *tail_r1_true;
122   double *head_r2_true, *tail_r2_true;
123 
124   double head_r_elem1, tail_r_elem1;
125   double head_r_elem2, tail_r_elem2;
126   double head_r_elem, tail_r_elem;
127 
128   float *a_vec;
129   float *b_vec;
130 
131   float *c_i = (float *) c;
132   float *alpha_i = (float *) alpha;
133   float *beta_i = (float *) beta;
134   float *a_i = a;
135   float *b_i = b;
136 
137   if (side == blas_left_side) {
138     m_i = m;
139     n_i = n;
140   } else {
141     m_i = n;
142     n_i = m;
143   }
144 
145   if (order == blas_colmajor)
146     ld = m;
147   else
148     ld = n;
149 
150 
151   if ((order == blas_colmajor && side == blas_left_side) ||
152       (order == blas_rowmajor && side == blas_right_side)) {
153     inci = inca1i = incai = incbi = incci = 1;
154     inccij = ldc;
155     incaij = lda;
156     incbij = ldb;
157     inca1ij = m_i;
158     incij = ld;
159   } else {
160     incci = ldc;
161     incai = lda;
162     incbi = ldb;
163     inca1i = m_i;
164     inci = ld;
165     incij = inca1ij = incaij = incbij = inccij = 1;
166   }
167 
168   incci *= 2;
169   inccij *= 2;
170   incai *= 2;
171   incaij *= 2;
172   incbi *= 2;
173   incbij *= 2;
174 
175   inca = incb = 1;
176   inca *= 2;
177   incb *= 2;
178   a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
179   if (m_i > 0 && a_vec == NULL) {
180     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
181   }
182   for (i = 0; i < m_i * inca; i += inca) {
183     a_vec[i] = 0.0;
184     a_vec[i + 1] = 0.0;
185   }
186   b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
187   if (m_i > 0 && b_vec == NULL) {
188     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
189   }
190   for (i = 0; i < m_i * incb; i += incb) {
191     b_vec[i] = 0.0;
192     b_vec[i + 1] = 0.0;
193   }
194 
195   if (randomize == 0) {
196     a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
197     if (m_i * m_i > 0 && a1 == NULL) {
198       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
199     }
200     a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
201     if (m_i * m_i > 0 && a2 == NULL) {
202       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
203     }
204     for (i = 0; i < m_i * m_i; ++i) {
205       a1[i] = a2[i] = 0.0;
206     }
207     c1 = (float *) blas_malloc(m_i * n_i * sizeof(float));
208     if (m_i * n_i > 0 && c1 == NULL) {
209       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
210     }
211     c2 = (float *) blas_malloc(m_i * n_i * sizeof(float));
212     if (m_i * n_i > 0 && c2 == NULL) {
213       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
214     }
215     b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
216     if (m_i * n_i > 0 && b0 == NULL) {
217       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
218     }
219     for (i = 0; i < m_i * n_i; ++i) {
220       c1[i] = c2[i] = b0[i] = 0.0;
221     }
222     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
223     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
224     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
225       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
226     };
227     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
228     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
229     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
230       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
231     };
232 
233     /* First generate the real portion of matrix A, and matrix B.
234        Note that Re(A) is a symmetric matrix. */
235     BLAS_ssymm_testgen
236       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
237        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
238 
239     BLAS_sskew_testgen
240       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
241        c2, ld, seed, head_r2_true, tail_r2_true);
242 
243 
244     /* The case where B is a complex matrix.  Since B is generated
245        as a real matrix, we need to perform some scaling.
246 
247        There are four cases to consider, depending on the values
248        of alpha and beta.
249 
250        values                         scaling
251        alpha   beta      alpha  A    B       beta    C    R (truth)
252        0    1      1                    i               i    i
253        1    1      ?                   1+i      1+i         1+i
254        2    ?      1         1+i       1+i             2i    2i
255        3    ?      ?         1+i       1+i      2i           2i
256 
257        Note that we can afford to scale R by 1+i, since they are
258        computed in double-double precision.
259      */
260 
261     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
262       ab = 0;
263       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
264     } else if (alpha_i[0] == 1.0) {
265       ab = 1;
266       /* set alpha to 1, multiply beta by 1+i. */
267       alpha_i[1] = 0.0;
268       beta_i[1] = beta_i[0];
269     } else if (beta_i[0] == 1.0) {
270       ab = 2;
271       /* set beta to 1, multiply alpha by 1+i. */
272       beta_i[1] = 0.0;
273       alpha_i[1] = alpha_i[0];
274     } else {
275       ab = 3;
276       /* multiply alpha by 1+i, beta by 2i. */
277       alpha_i[1] = alpha_i[0];
278       beta_i[1] = 2.0 * beta_i[0];
279       beta_i[0] = 0.0;
280     }
281 
282 
283     /* Now fill in a */
284     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
285       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
286 	   j++, aij += incaij, a1ij += inca1ij) {
287 	a_i[aij] = a1[a1ij];
288 	a_i[aij + 1] = a2[a1ij];
289       }
290     }
291 
292     /* Fill in b */
293     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
294       for (j = 0, bij = bi, mij = mi; j < n_i;
295 	   j++, bij += incbij, mij += incij) {
296 	if (ab == 0) {
297 	  b_i[bij] = 0.0;
298 	  b_i[bij + 1] = b0[mij];
299 	} else {
300 	  b_i[bij] = b0[mij];
301 	  b_i[bij + 1] = b0[mij];
302 	}
303       }
304     }
305 
306     /* Fill in c */
307     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
308       for (j = 0, cij = ci, mij = mi; j < n_i;
309 	   j++, cij += inccij, mij += incij) {
310 	if (ab == 0) {
311 	  c_i[cij] = -c2[mij];
312 	  c_i[cij + 1] = c1[mij];
313 	} else if (ab == 2) {
314 	  c_i[cij] = -2.0 * c2[mij];
315 	  c_i[cij + 1] = 2.0 * c1[mij];
316 	} else {
317 	  c_i[cij] = c1[mij];
318 	  c_i[cij + 1] = c2[mij];
319 	}
320       }
321     }
322 
323     /* Fill in truth */
324     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
325       for (j = 0, cij = ci, mij = mi; j < n_i;
326 	   j++, cij += inccij, mij += incij) {
327 
328 	head_r_elem1 = head_r1_true[mij];
329 	tail_r_elem1 = tail_r1_true[mij];
330 
331 	head_r_elem2 = head_r2_true[mij];
332 	tail_r_elem2 = tail_r2_true[mij];
333 
334 	if (ab == 0) {
335 	  head_r_true[cij] = -head_r_elem2;
336 	  tail_r_true[cij] = -tail_r_elem2;
337 	  head_r_true[cij + 1] = head_r_elem1;
338 	  tail_r_true[cij + 1] = tail_r_elem1;
339 	} else if (ab == 1) {
340 	  {
341 	    /* Compute double-double = double-double + double-double. */
342 	    double bv;
343 	    double s1, s2, t1, t2;
344 
345 	    /* Add two hi words. */
346 	    s1 = head_r_elem1 + head_r_elem2;
347 	    bv = s1 - head_r_elem1;
348 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
349 
350 	    /* Add two lo words. */
351 	    t1 = tail_r_elem1 + tail_r_elem2;
352 	    bv = t1 - tail_r_elem1;
353 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
354 
355 	    s2 += t1;
356 
357 	    /* Renormalize (s1, s2)  to  (t1, s2) */
358 	    t1 = s1 + s2;
359 	    s2 = s2 - (t1 - s1);
360 
361 	    t2 += s2;
362 
363 	    /* Renormalize (t1, t2)  */
364 	    head_r_elem = t1 + t2;
365 	    tail_r_elem = t2 - (head_r_elem - t1);
366 	  }
367 
368 	  /* Set the imaginary part to  R1 + R2 */
369 	  tail_r_true[cij + 1] = tail_r_elem;
370 	  head_r_true[cij + 1] = head_r_elem;
371 
372 	  /* Set the real part to R1 - R2. */
373 	  {
374 	    double head_bt, tail_bt;
375 	    head_bt = -head_r_elem2;
376 	    tail_bt = -tail_r_elem2;
377 	    {
378 	      /* Compute double-double = double-double + double-double. */
379 	      double bv;
380 	      double s1, s2, t1, t2;
381 
382 	      /* Add two hi words. */
383 	      s1 = head_r_elem1 + head_bt;
384 	      bv = s1 - head_r_elem1;
385 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
386 
387 	      /* Add two lo words. */
388 	      t1 = tail_r_elem1 + tail_bt;
389 	      bv = t1 - tail_r_elem1;
390 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
391 
392 	      s2 += t1;
393 
394 	      /* Renormalize (s1, s2)  to  (t1, s2) */
395 	      t1 = s1 + s2;
396 	      s2 = s2 - (t1 - s1);
397 
398 	      t2 += s2;
399 
400 	      /* Renormalize (t1, t2)  */
401 	      head_r_elem = t1 + t2;
402 	      tail_r_elem = t2 - (head_r_elem - t1);
403 	    }
404 	  }
405 	  tail_r_true[cij] = tail_r_elem;
406 	  head_r_true[cij] = head_r_elem;
407 	} else {
408 
409 	  /* Real part */
410 	  {
411 	    /* Compute double-double = double-double * double. */
412 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
413 
414 	    con = head_r_elem2 * split;
415 	    a11 = con - head_r_elem2;
416 	    a11 = con - a11;
417 	    a21 = head_r_elem2 - a11;
418 	    con = -2.0 * split;
419 	    b1 = con - -2.0;
420 	    b1 = con - b1;
421 	    b2 = -2.0 - b1;
422 
423 	    c11 = head_r_elem2 * -2.0;
424 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
425 
426 	    c2 = tail_r_elem2 * -2.0;
427 	    t1 = c11 + c2;
428 	    t2 = (c2 - (t1 - c11)) + c21;
429 
430 	    head_r_elem = t1 + t2;
431 	    tail_r_elem = t2 - (head_r_elem - t1);
432 	  }
433 	  head_r_true[cij] = head_r_elem;
434 	  tail_r_true[cij] = tail_r_elem;
435 
436 	  /* Imaginary Part */
437 	  {
438 	    /* Compute double-double = double-double * double. */
439 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
440 
441 	    con = head_r_elem1 * split;
442 	    a11 = con - head_r_elem1;
443 	    a11 = con - a11;
444 	    a21 = head_r_elem1 - a11;
445 	    con = 2.0 * split;
446 	    b1 = con - 2.0;
447 	    b1 = con - b1;
448 	    b2 = 2.0 - b1;
449 
450 	    c11 = head_r_elem1 * 2.0;
451 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
452 
453 	    c2 = tail_r_elem1 * 2.0;
454 	    t1 = c11 + c2;
455 	    t2 = (c2 - (t1 - c11)) + c21;
456 
457 	    head_r_elem = t1 + t2;
458 	    tail_r_elem = t2 - (head_r_elem - t1);
459 	  }
460 	  head_r_true[cij + 1] = head_r_elem;
461 	  tail_r_true[cij + 1] = tail_r_elem;
462 	}
463       }
464     }
465 
466     blas_free(a1);
467     blas_free(a2);
468     blas_free(c1);
469     blas_free(c2);
470     blas_free(b0);
471     blas_free(head_r1_true);
472     blas_free(tail_r1_true);
473     blas_free(head_r2_true);
474     blas_free(tail_r2_true);
475 
476   } else {
477     /* random matrix */
478     float a_elem[2];
479     float b_elem[2];
480     float c_elem[2];
481     double head_r_true_elem[2], tail_r_true_elem[2];
482 
483     /* Since mixed real/complex test generator for dot
484        scales the vectors, we need to used the non-mixed
485        version if B is real (since A is always complex). */
486 
487 
488     if (alpha_flag == 0) {
489       alpha_i[0] = xrand(seed);
490       alpha_i[1] = xrand(seed);
491     }
492     if (beta_flag == 0) {
493       beta_i[0] = xrand(seed);
494       beta_i[1] = xrand(seed);
495     }
496 
497     /* Fill in matrix A -- Hermitian. */
498     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
499       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
500 	a_elem[0] = xrand(seed);
501 	a_elem[1] = xrand(seed);
502 	a_i[aij] = a_elem[0];
503 	a_i[aij + 1] = a_elem[1];
504 	if (i == j)
505 	  a_i[aij + 1] = 0.0;
506       }
507     }
508 
509     /* Fill in matrix B */
510     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
511       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
512 	b_elem[0] = xrand(seed);
513 	b_elem[1] = xrand(seed);
514 	b_i[bij] = b_elem[0];
515 	b_i[bij + 1] = b_elem[1];
516       }
517     }
518 
519     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
520       che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
521       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
522 
523 	if (side == blas_left_side)
524 	  cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
525 	else
526 	  cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
527 
528 	/* copy the real b_vec into complex bb_vec, so that
529 	   pure complex test case generator can be called. */
530 
531 	BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
532 			  alpha, 1, beta, 1, a_vec, b_vec, seed,
533 			  c_elem, head_r_true_elem, tail_r_true_elem);
534 
535 	c_i[cij] = c_elem[0];
536 	c_i[cij + 1] = c_elem[1];
537 	head_r_true[cij] = head_r_true_elem[0];
538 	head_r_true[cij + 1] = head_r_true_elem[1];
539 	tail_r_true[cij] = tail_r_true_elem[0];
540 	tail_r_true[cij + 1] = tail_r_true_elem[1];
541       }
542     }
543 
544 
545 
546   }
547 
548   blas_free(a_vec);
549   blas_free(b_vec);
550 }
BLAS_zhemm_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)551 void BLAS_zhemm_testgen(int norm, enum blas_order_type order,
552 			enum blas_uplo_type uplo, enum blas_side_type side,
553 			int m, int n, int randomize, void *alpha,
554 			int alpha_flag, void *beta, int beta_flag, void *a,
555 			int lda, void *b, int ldb, void *c, int ldc,
556 			int *seed, double *head_r_true, double *tail_r_true)
557 
558 /*
559  * Purpose
560  * =======
561  *
562  *   Generates the test inputs to BLAS_zhemm{_x}
563  *
564  * Arguments
565  * =========
566  *
567  * norm    (input) int
568  *           = -1: the vectors are scaled with norms near underflow.
569  *           =  0: the vectors have norms of order 1.
570  *           =  1: the vectors are scaled with norms near overflow.
571  *
572  * order   (input) enum blas_side_type
573  *           storage format of the matrices
574  *
575  * uplo    (input) enum blas_uplo_type
576  *           which half of the hermitian matrix a is to be stored.
577  *
578  * side    (input) enum blas_side_type
579  *           which side of matrix b matrix a is to be multiplied.
580  *
581  * m n     (input) int
582  *           sizes of matrices a, b, c:
583  *              matrix a is m-by-m for left multiplication
584  *                          n-by-n otherwise,
585  *              matrices b, c are m-by-n.
586  *
587  * randomize (input) int
588  *           = 0: test case made for maximum cancellation.
589  *           = 1: test case made for maximum radomization.
590  *
591  * alpha   (input/output) void*
592  *           if alpha_flag = 1, alpha is input.
593  *           if alpha_flag = 0, alpha is output.
594  *
595  * alpha_flag (input) int
596  *           = 0: alpha is free, and is output.
597  *           = 1: alpha is fixed on input.
598  *
599  * beta    (input/output) void*
600  *           if beta_flag = 1, beta is input.
601  *           if beta_flag = 0, beta is output.
602  *
603  * beta_flag (input) int
604  *           = 0: beta is free, and is output.
605  *           = 1: beta is fixed on input.
606  *
607  * a       (input/output) void*
608  *
609  * lda     (input) lda
610  *         leading dimension of matrix A.
611  *
612  * b       (input/output) void*
613  *
614  * ldb     (input) int
615  *         leading dimension of matrix B.
616  *
617  * c       (input/output) void*
618  *         generated matrix C that will be used as an input to HEMM.
619  *
620  * ldc     (input) int
621  *         leading dimension of matrix C.
622  *
623  * seed    (input/output) int *
624  *         seed for the random number generator.
625  *
626  * head_r_true  (output) double *
627  *         the leading part of the truth in double-double.
628  *
629  * tail_r_true  (output) double *
630  *         the trailing part of the truth in double-double
631  *
632  */
633 {
634 
635   /* Strategy:
636      R1 = alpha * A1 * B + beta * C1
637      R2 = alpha * A2 * B + beta * C2
638      where all the matrices are real.  Then let R = R1 + i R2,
639      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
640      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
641    */
642 
643   int i, j;
644   int cij, ci;
645   int aij, ai;
646   int a1ij, a1i;
647   int bij, bi;
648   int mij, mi;
649   int inccij, incci;
650   int incaij, incai;
651   int inca1ij, inca1i;
652   int incbij, incbi;
653   int inci, incij;
654   int inca, incb;
655   int m_i, n_i;
656   int ld;
657   int ab;
658 
659   double *a1;
660   double *a2;
661   double *c1;
662   double *c2;
663   double *b0;
664 
665   double *head_r1_true, *tail_r1_true;
666   double *head_r2_true, *tail_r2_true;
667 
668   double head_r_elem1, tail_r_elem1;
669   double head_r_elem2, tail_r_elem2;
670   double head_r_elem, tail_r_elem;
671 
672   double *a_vec;
673   double *b_vec;
674 
675   double *c_i = (double *) c;
676   double *alpha_i = (double *) alpha;
677   double *beta_i = (double *) beta;
678   double *a_i = a;
679   double *b_i = b;
680 
681   if (side == blas_left_side) {
682     m_i = m;
683     n_i = n;
684   } else {
685     m_i = n;
686     n_i = m;
687   }
688 
689   if (order == blas_colmajor)
690     ld = m;
691   else
692     ld = n;
693 
694 
695   if ((order == blas_colmajor && side == blas_left_side) ||
696       (order == blas_rowmajor && side == blas_right_side)) {
697     inci = inca1i = incai = incbi = incci = 1;
698     inccij = ldc;
699     incaij = lda;
700     incbij = ldb;
701     inca1ij = m_i;
702     incij = ld;
703   } else {
704     incci = ldc;
705     incai = lda;
706     incbi = ldb;
707     inca1i = m_i;
708     inci = ld;
709     incij = inca1ij = incaij = incbij = inccij = 1;
710   }
711 
712   incci *= 2;
713   inccij *= 2;
714   incai *= 2;
715   incaij *= 2;
716   incbi *= 2;
717   incbij *= 2;
718 
719   inca = incb = 1;
720   inca *= 2;
721   incb *= 2;
722   a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
723   if (m_i > 0 && a_vec == NULL) {
724     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
725   }
726   for (i = 0; i < m_i * inca; i += inca) {
727     a_vec[i] = 0.0;
728     a_vec[i + 1] = 0.0;
729   }
730   b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
731   if (m_i > 0 && b_vec == NULL) {
732     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
733   }
734   for (i = 0; i < m_i * incb; i += incb) {
735     b_vec[i] = 0.0;
736     b_vec[i + 1] = 0.0;
737   }
738 
739   if (randomize == 0) {
740     a1 = (double *) blas_malloc(m_i * m_i * sizeof(double));
741     if (m_i * m_i > 0 && a1 == NULL) {
742       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
743     }
744     a2 = (double *) blas_malloc(m_i * m_i * sizeof(double));
745     if (m_i * m_i > 0 && a2 == NULL) {
746       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
747     }
748     for (i = 0; i < m_i * m_i; ++i) {
749       a1[i] = a2[i] = 0.0;
750     }
751     c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
752     if (m_i * n_i > 0 && c1 == NULL) {
753       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
754     }
755     c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
756     if (m_i * n_i > 0 && c2 == NULL) {
757       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
758     }
759     b0 = (double *) blas_malloc(m_i * n_i * sizeof(double));
760     if (m_i * n_i > 0 && b0 == NULL) {
761       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
762     }
763     for (i = 0; i < m_i * n_i; ++i) {
764       c1[i] = c2[i] = b0[i] = 0.0;
765     }
766     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
767     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
768     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
769       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
770     };
771     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
772     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
773     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
774       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
775     };
776 
777     /* First generate the real portion of matrix A, and matrix B.
778        Note that Re(A) is a symmetric matrix. */
779     BLAS_dsymm_testgen
780       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
781        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
782 
783     BLAS_dskew_testgen
784       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
785        c2, ld, seed, head_r2_true, tail_r2_true);
786 
787 
788     /* The case where B is a complex matrix.  Since B is generated
789        as a real matrix, we need to perform some scaling.
790 
791        There are four cases to consider, depending on the values
792        of alpha and beta.
793 
794        values                         scaling
795        alpha   beta      alpha  A    B       beta    C    R (truth)
796        0    1      1                    i               i    i
797        1    1      ?                   1+i      1+i         1+i
798        2    ?      1         1+i       1+i             2i    2i
799        3    ?      ?         1+i       1+i      2i           2i
800 
801        Note that we can afford to scale R by 1+i, since they are
802        computed in double-double precision.
803      */
804 
805     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
806       ab = 0;
807       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
808     } else if (alpha_i[0] == 1.0) {
809       ab = 1;
810       /* set alpha to 1, multiply beta by 1+i. */
811       alpha_i[1] = 0.0;
812       beta_i[1] = beta_i[0];
813     } else if (beta_i[0] == 1.0) {
814       ab = 2;
815       /* set beta to 1, multiply alpha by 1+i. */
816       beta_i[1] = 0.0;
817       alpha_i[1] = alpha_i[0];
818     } else {
819       ab = 3;
820       /* multiply alpha by 1+i, beta by 2i. */
821       alpha_i[1] = alpha_i[0];
822       beta_i[1] = 2.0 * beta_i[0];
823       beta_i[0] = 0.0;
824     }
825 
826 
827     /* Now fill in a */
828     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
829       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
830 	   j++, aij += incaij, a1ij += inca1ij) {
831 	a_i[aij] = a1[a1ij];
832 	a_i[aij + 1] = a2[a1ij];
833       }
834     }
835 
836     /* Fill in b */
837     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
838       for (j = 0, bij = bi, mij = mi; j < n_i;
839 	   j++, bij += incbij, mij += incij) {
840 	if (ab == 0) {
841 	  b_i[bij] = 0.0;
842 	  b_i[bij + 1] = b0[mij];
843 	} else {
844 	  b_i[bij] = b0[mij];
845 	  b_i[bij + 1] = b0[mij];
846 	}
847       }
848     }
849 
850     /* Fill in c */
851     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
852       for (j = 0, cij = ci, mij = mi; j < n_i;
853 	   j++, cij += inccij, mij += incij) {
854 	if (ab == 0) {
855 	  c_i[cij] = -c2[mij];
856 	  c_i[cij + 1] = c1[mij];
857 	} else if (ab == 2) {
858 	  c_i[cij] = -2.0 * c2[mij];
859 	  c_i[cij + 1] = 2.0 * c1[mij];
860 	} else {
861 	  c_i[cij] = c1[mij];
862 	  c_i[cij + 1] = c2[mij];
863 	}
864       }
865     }
866 
867     /* Fill in truth */
868     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
869       for (j = 0, cij = ci, mij = mi; j < n_i;
870 	   j++, cij += inccij, mij += incij) {
871 
872 	head_r_elem1 = head_r1_true[mij];
873 	tail_r_elem1 = tail_r1_true[mij];
874 
875 	head_r_elem2 = head_r2_true[mij];
876 	tail_r_elem2 = tail_r2_true[mij];
877 
878 	if (ab == 0) {
879 	  head_r_true[cij] = -head_r_elem2;
880 	  tail_r_true[cij] = -tail_r_elem2;
881 	  head_r_true[cij + 1] = head_r_elem1;
882 	  tail_r_true[cij + 1] = tail_r_elem1;
883 	} else if (ab == 1) {
884 	  {
885 	    /* Compute double-double = double-double + double-double. */
886 	    double bv;
887 	    double s1, s2, t1, t2;
888 
889 	    /* Add two hi words. */
890 	    s1 = head_r_elem1 + head_r_elem2;
891 	    bv = s1 - head_r_elem1;
892 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
893 
894 	    /* Add two lo words. */
895 	    t1 = tail_r_elem1 + tail_r_elem2;
896 	    bv = t1 - tail_r_elem1;
897 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
898 
899 	    s2 += t1;
900 
901 	    /* Renormalize (s1, s2)  to  (t1, s2) */
902 	    t1 = s1 + s2;
903 	    s2 = s2 - (t1 - s1);
904 
905 	    t2 += s2;
906 
907 	    /* Renormalize (t1, t2)  */
908 	    head_r_elem = t1 + t2;
909 	    tail_r_elem = t2 - (head_r_elem - t1);
910 	  }
911 
912 	  /* Set the imaginary part to  R1 + R2 */
913 	  tail_r_true[cij + 1] = tail_r_elem;
914 	  head_r_true[cij + 1] = head_r_elem;
915 
916 	  /* Set the real part to R1 - R2. */
917 	  {
918 	    double head_bt, tail_bt;
919 	    head_bt = -head_r_elem2;
920 	    tail_bt = -tail_r_elem2;
921 	    {
922 	      /* Compute double-double = double-double + double-double. */
923 	      double bv;
924 	      double s1, s2, t1, t2;
925 
926 	      /* Add two hi words. */
927 	      s1 = head_r_elem1 + head_bt;
928 	      bv = s1 - head_r_elem1;
929 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
930 
931 	      /* Add two lo words. */
932 	      t1 = tail_r_elem1 + tail_bt;
933 	      bv = t1 - tail_r_elem1;
934 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
935 
936 	      s2 += t1;
937 
938 	      /* Renormalize (s1, s2)  to  (t1, s2) */
939 	      t1 = s1 + s2;
940 	      s2 = s2 - (t1 - s1);
941 
942 	      t2 += s2;
943 
944 	      /* Renormalize (t1, t2)  */
945 	      head_r_elem = t1 + t2;
946 	      tail_r_elem = t2 - (head_r_elem - t1);
947 	    }
948 	  }
949 	  tail_r_true[cij] = tail_r_elem;
950 	  head_r_true[cij] = head_r_elem;
951 	} else {
952 
953 	  /* Real part */
954 	  {
955 	    /* Compute double-double = double-double * double. */
956 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
957 
958 	    con = head_r_elem2 * split;
959 	    a11 = con - head_r_elem2;
960 	    a11 = con - a11;
961 	    a21 = head_r_elem2 - a11;
962 	    con = -2.0 * split;
963 	    b1 = con - -2.0;
964 	    b1 = con - b1;
965 	    b2 = -2.0 - b1;
966 
967 	    c11 = head_r_elem2 * -2.0;
968 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
969 
970 	    c2 = tail_r_elem2 * -2.0;
971 	    t1 = c11 + c2;
972 	    t2 = (c2 - (t1 - c11)) + c21;
973 
974 	    head_r_elem = t1 + t2;
975 	    tail_r_elem = t2 - (head_r_elem - t1);
976 	  }
977 	  head_r_true[cij] = head_r_elem;
978 	  tail_r_true[cij] = tail_r_elem;
979 
980 	  /* Imaginary Part */
981 	  {
982 	    /* Compute double-double = double-double * double. */
983 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
984 
985 	    con = head_r_elem1 * split;
986 	    a11 = con - head_r_elem1;
987 	    a11 = con - a11;
988 	    a21 = head_r_elem1 - a11;
989 	    con = 2.0 * split;
990 	    b1 = con - 2.0;
991 	    b1 = con - b1;
992 	    b2 = 2.0 - b1;
993 
994 	    c11 = head_r_elem1 * 2.0;
995 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
996 
997 	    c2 = tail_r_elem1 * 2.0;
998 	    t1 = c11 + c2;
999 	    t2 = (c2 - (t1 - c11)) + c21;
1000 
1001 	    head_r_elem = t1 + t2;
1002 	    tail_r_elem = t2 - (head_r_elem - t1);
1003 	  }
1004 	  head_r_true[cij + 1] = head_r_elem;
1005 	  tail_r_true[cij + 1] = tail_r_elem;
1006 	}
1007       }
1008     }
1009 
1010     blas_free(a1);
1011     blas_free(a2);
1012     blas_free(c1);
1013     blas_free(c2);
1014     blas_free(b0);
1015     blas_free(head_r1_true);
1016     blas_free(tail_r1_true);
1017     blas_free(head_r2_true);
1018     blas_free(tail_r2_true);
1019 
1020   } else {
1021     /* random matrix */
1022     double a_elem[2];
1023     double b_elem[2];
1024     double c_elem[2];
1025     double head_r_true_elem[2], tail_r_true_elem[2];
1026 
1027     /* Since mixed real/complex test generator for dot
1028        scales the vectors, we need to used the non-mixed
1029        version if B is real (since A is always complex). */
1030 
1031 
1032     if (alpha_flag == 0) {
1033       alpha_i[0] = xrand(seed);
1034       alpha_i[1] = xrand(seed);
1035     }
1036     if (beta_flag == 0) {
1037       beta_i[0] = xrand(seed);
1038       beta_i[1] = xrand(seed);
1039     }
1040 
1041     /* Fill in matrix A -- Hermitian. */
1042     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1043       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1044 	a_elem[0] = xrand(seed);
1045 	a_elem[1] = xrand(seed);
1046 	a_i[aij] = a_elem[0];
1047 	a_i[aij + 1] = a_elem[1];
1048 	if (i == j)
1049 	  a_i[aij + 1] = 0.0;
1050       }
1051     }
1052 
1053     /* Fill in matrix B */
1054     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1055       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1056 	b_elem[0] = xrand(seed);
1057 	b_elem[1] = xrand(seed);
1058 	b_i[bij] = b_elem[0];
1059 	b_i[bij + 1] = b_elem[1];
1060       }
1061     }
1062 
1063     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1064       zhe_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
1065       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1066 
1067 	if (side == blas_left_side)
1068 	  zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1069 	else
1070 	  zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1071 
1072 	/* copy the real b_vec into complex bb_vec, so that
1073 	   pure complex test case generator can be called. */
1074 
1075 	BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
1076 			  alpha, 1, beta, 1, a_vec, b_vec, seed,
1077 			  c_elem, head_r_true_elem, tail_r_true_elem);
1078 
1079 	c_i[cij] = c_elem[0];
1080 	c_i[cij + 1] = c_elem[1];
1081 	head_r_true[cij] = head_r_true_elem[0];
1082 	head_r_true[cij + 1] = head_r_true_elem[1];
1083 	tail_r_true[cij] = tail_r_true_elem[0];
1084 	tail_r_true[cij + 1] = tail_r_true_elem[1];
1085       }
1086     }
1087 
1088 
1089 
1090   }
1091 
1092   blas_free(a_vec);
1093   blas_free(b_vec);
1094 }
BLAS_zhemm_c_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1095 void BLAS_zhemm_c_z_testgen(int norm, enum blas_order_type order,
1096 			    enum blas_uplo_type uplo,
1097 			    enum blas_side_type side, int m, int n,
1098 			    int randomize, void *alpha, int alpha_flag,
1099 			    void *beta, int beta_flag, void *a, int lda,
1100 			    void *b, int ldb, void *c, int ldc, int *seed,
1101 			    double *head_r_true, double *tail_r_true)
1102 
1103 /*
1104  * Purpose
1105  * =======
1106  *
1107  *   Generates the test inputs to BLAS_zhemm_c_z{_x}
1108  *
1109  * Arguments
1110  * =========
1111  *
1112  * norm    (input) int
1113  *           = -1: the vectors are scaled with norms near underflow.
1114  *           =  0: the vectors have norms of order 1.
1115  *           =  1: the vectors are scaled with norms near overflow.
1116  *
1117  * order   (input) enum blas_side_type
1118  *           storage format of the matrices
1119  *
1120  * uplo    (input) enum blas_uplo_type
1121  *           which half of the hermitian matrix a is to be stored.
1122  *
1123  * side    (input) enum blas_side_type
1124  *           which side of matrix b matrix a is to be multiplied.
1125  *
1126  * m n     (input) int
1127  *           sizes of matrices a, b, c:
1128  *              matrix a is m-by-m for left multiplication
1129  *                          n-by-n otherwise,
1130  *              matrices b, c are m-by-n.
1131  *
1132  * randomize (input) int
1133  *           = 0: test case made for maximum cancellation.
1134  *           = 1: test case made for maximum radomization.
1135  *
1136  * alpha   (input/output) void*
1137  *           if alpha_flag = 1, alpha is input.
1138  *           if alpha_flag = 0, alpha is output.
1139  *
1140  * alpha_flag (input) int
1141  *           = 0: alpha is free, and is output.
1142  *           = 1: alpha is fixed on input.
1143  *
1144  * beta    (input/output) void*
1145  *           if beta_flag = 1, beta is input.
1146  *           if beta_flag = 0, beta is output.
1147  *
1148  * beta_flag (input) int
1149  *           = 0: beta is free, and is output.
1150  *           = 1: beta is fixed on input.
1151  *
1152  * a       (input/output) void*
1153  *
1154  * lda     (input) lda
1155  *         leading dimension of matrix A.
1156  *
1157  * b       (input/output) void*
1158  *
1159  * ldb     (input) int
1160  *         leading dimension of matrix B.
1161  *
1162  * c       (input/output) void*
1163  *         generated matrix C that will be used as an input to HEMM.
1164  *
1165  * ldc     (input) int
1166  *         leading dimension of matrix C.
1167  *
1168  * seed    (input/output) int *
1169  *         seed for the random number generator.
1170  *
1171  * head_r_true  (output) double *
1172  *         the leading part of the truth in double-double.
1173  *
1174  * tail_r_true  (output) double *
1175  *         the trailing part of the truth in double-double
1176  *
1177  */
1178 {
1179 
1180   /* Strategy:
1181      R1 = alpha * A1 * B + beta * C1
1182      R2 = alpha * A2 * B + beta * C2
1183      where all the matrices are real.  Then let R = R1 + i R2,
1184      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
1185      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
1186    */
1187 
1188   int i, j;
1189   int cij, ci;
1190   int aij, ai;
1191   int a1ij, a1i;
1192   int bij, bi;
1193   int mij, mi;
1194   int inccij, incci;
1195   int incaij, incai;
1196   int inca1ij, inca1i;
1197   int incbij, incbi;
1198   int inci, incij;
1199   int inca, incb;
1200   int m_i, n_i;
1201   int ld;
1202   int ab;
1203 
1204   float *a1;
1205   float *a2;
1206   double *c1;
1207   double *c2;
1208   double *b0;
1209 
1210   double *head_r1_true, *tail_r1_true;
1211   double *head_r2_true, *tail_r2_true;
1212 
1213   double head_r_elem1, tail_r_elem1;
1214   double head_r_elem2, tail_r_elem2;
1215   double head_r_elem, tail_r_elem;
1216 
1217   float *a_vec;
1218   double *b_vec;
1219 
1220   double *c_i = (double *) c;
1221   double *alpha_i = (double *) alpha;
1222   double *beta_i = (double *) beta;
1223   float *a_i = a;
1224   double *b_i = b;
1225 
1226   if (side == blas_left_side) {
1227     m_i = m;
1228     n_i = n;
1229   } else {
1230     m_i = n;
1231     n_i = m;
1232   }
1233 
1234   if (order == blas_colmajor)
1235     ld = m;
1236   else
1237     ld = n;
1238 
1239 
1240   if ((order == blas_colmajor && side == blas_left_side) ||
1241       (order == blas_rowmajor && side == blas_right_side)) {
1242     inci = inca1i = incai = incbi = incci = 1;
1243     inccij = ldc;
1244     incaij = lda;
1245     incbij = ldb;
1246     inca1ij = m_i;
1247     incij = ld;
1248   } else {
1249     incci = ldc;
1250     incai = lda;
1251     incbi = ldb;
1252     inca1i = m_i;
1253     inci = ld;
1254     incij = inca1ij = incaij = incbij = inccij = 1;
1255   }
1256 
1257   incci *= 2;
1258   inccij *= 2;
1259   incai *= 2;
1260   incaij *= 2;
1261   incbi *= 2;
1262   incbij *= 2;
1263 
1264   inca = incb = 1;
1265   inca *= 2;
1266   incb *= 2;
1267   a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1268   if (m_i > 0 && a_vec == NULL) {
1269     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1270   }
1271   for (i = 0; i < m_i * inca; i += inca) {
1272     a_vec[i] = 0.0;
1273     a_vec[i + 1] = 0.0;
1274   }
1275   b_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
1276   if (m_i > 0 && b_vec == NULL) {
1277     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1278   }
1279   for (i = 0; i < m_i * incb; i += incb) {
1280     b_vec[i] = 0.0;
1281     b_vec[i + 1] = 0.0;
1282   }
1283 
1284   if (randomize == 0) {
1285     a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
1286     if (m_i * m_i > 0 && a1 == NULL) {
1287       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1288     }
1289     a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
1290     if (m_i * m_i > 0 && a2 == NULL) {
1291       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1292     }
1293     for (i = 0; i < m_i * m_i; ++i) {
1294       a1[i] = a2[i] = 0.0;
1295     }
1296     c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1297     if (m_i * n_i > 0 && c1 == NULL) {
1298       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1299     }
1300     c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1301     if (m_i * n_i > 0 && c2 == NULL) {
1302       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1303     }
1304     b0 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1305     if (m_i * n_i > 0 && b0 == NULL) {
1306       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1307     }
1308     for (i = 0; i < m_i * n_i; ++i) {
1309       c1[i] = c2[i] = b0[i] = 0.0;
1310     }
1311     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1312     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1313     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1314       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1315     };
1316     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1317     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1318     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1319       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1320     };
1321 
1322     /* First generate the real portion of matrix A, and matrix B.
1323        Note that Re(A) is a symmetric matrix. */
1324     BLAS_dsymm_s_d_testgen
1325       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
1326        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
1327 
1328     BLAS_dskew_s_d_testgen
1329       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
1330        c2, ld, seed, head_r2_true, tail_r2_true);
1331 
1332 
1333     /* The case where B is a complex matrix.  Since B is generated
1334        as a real matrix, we need to perform some scaling.
1335 
1336        There are four cases to consider, depending on the values
1337        of alpha and beta.
1338 
1339        values                         scaling
1340        alpha   beta      alpha  A    B       beta    C    R (truth)
1341        0    1      1                    i               i    i
1342        1    1      ?                   1+i      1+i         1+i
1343        2    ?      1         1+i       1+i             2i    2i
1344        3    ?      ?         1+i       1+i      2i           2i
1345 
1346        Note that we can afford to scale R by 1+i, since they are
1347        computed in double-double precision.
1348      */
1349 
1350     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1351       ab = 0;
1352       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
1353     } else if (alpha_i[0] == 1.0) {
1354       ab = 1;
1355       /* set alpha to 1, multiply beta by 1+i. */
1356       alpha_i[1] = 0.0;
1357       beta_i[1] = beta_i[0];
1358     } else if (beta_i[0] == 1.0) {
1359       ab = 2;
1360       /* set beta to 1, multiply alpha by 1+i. */
1361       beta_i[1] = 0.0;
1362       alpha_i[1] = alpha_i[0];
1363     } else {
1364       ab = 3;
1365       /* multiply alpha by 1+i, beta by 2i. */
1366       alpha_i[1] = alpha_i[0];
1367       beta_i[1] = 2.0 * beta_i[0];
1368       beta_i[0] = 0.0;
1369     }
1370 
1371 
1372     /* Now fill in a */
1373     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
1374       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
1375 	   j++, aij += incaij, a1ij += inca1ij) {
1376 	a_i[aij] = a1[a1ij];
1377 	a_i[aij + 1] = a2[a1ij];
1378       }
1379     }
1380 
1381     /* Fill in b */
1382     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
1383       for (j = 0, bij = bi, mij = mi; j < n_i;
1384 	   j++, bij += incbij, mij += incij) {
1385 	if (ab == 0) {
1386 	  b_i[bij] = 0.0;
1387 	  b_i[bij + 1] = b0[mij];
1388 	} else {
1389 	  b_i[bij] = b0[mij];
1390 	  b_i[bij + 1] = b0[mij];
1391 	}
1392       }
1393     }
1394 
1395     /* Fill in c */
1396     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1397       for (j = 0, cij = ci, mij = mi; j < n_i;
1398 	   j++, cij += inccij, mij += incij) {
1399 	if (ab == 0) {
1400 	  c_i[cij] = -c2[mij];
1401 	  c_i[cij + 1] = c1[mij];
1402 	} else if (ab == 2) {
1403 	  c_i[cij] = -2.0 * c2[mij];
1404 	  c_i[cij + 1] = 2.0 * c1[mij];
1405 	} else {
1406 	  c_i[cij] = c1[mij];
1407 	  c_i[cij + 1] = c2[mij];
1408 	}
1409       }
1410     }
1411 
1412     /* Fill in truth */
1413     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1414       for (j = 0, cij = ci, mij = mi; j < n_i;
1415 	   j++, cij += inccij, mij += incij) {
1416 
1417 	head_r_elem1 = head_r1_true[mij];
1418 	tail_r_elem1 = tail_r1_true[mij];
1419 
1420 	head_r_elem2 = head_r2_true[mij];
1421 	tail_r_elem2 = tail_r2_true[mij];
1422 
1423 	if (ab == 0) {
1424 	  head_r_true[cij] = -head_r_elem2;
1425 	  tail_r_true[cij] = -tail_r_elem2;
1426 	  head_r_true[cij + 1] = head_r_elem1;
1427 	  tail_r_true[cij + 1] = tail_r_elem1;
1428 	} else if (ab == 1) {
1429 	  {
1430 	    /* Compute double-double = double-double + double-double. */
1431 	    double bv;
1432 	    double s1, s2, t1, t2;
1433 
1434 	    /* Add two hi words. */
1435 	    s1 = head_r_elem1 + head_r_elem2;
1436 	    bv = s1 - head_r_elem1;
1437 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1438 
1439 	    /* Add two lo words. */
1440 	    t1 = tail_r_elem1 + tail_r_elem2;
1441 	    bv = t1 - tail_r_elem1;
1442 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1443 
1444 	    s2 += t1;
1445 
1446 	    /* Renormalize (s1, s2)  to  (t1, s2) */
1447 	    t1 = s1 + s2;
1448 	    s2 = s2 - (t1 - s1);
1449 
1450 	    t2 += s2;
1451 
1452 	    /* Renormalize (t1, t2)  */
1453 	    head_r_elem = t1 + t2;
1454 	    tail_r_elem = t2 - (head_r_elem - t1);
1455 	  }
1456 
1457 	  /* Set the imaginary part to  R1 + R2 */
1458 	  tail_r_true[cij + 1] = tail_r_elem;
1459 	  head_r_true[cij + 1] = head_r_elem;
1460 
1461 	  /* Set the real part to R1 - R2. */
1462 	  {
1463 	    double head_bt, tail_bt;
1464 	    head_bt = -head_r_elem2;
1465 	    tail_bt = -tail_r_elem2;
1466 	    {
1467 	      /* Compute double-double = double-double + double-double. */
1468 	      double bv;
1469 	      double s1, s2, t1, t2;
1470 
1471 	      /* Add two hi words. */
1472 	      s1 = head_r_elem1 + head_bt;
1473 	      bv = s1 - head_r_elem1;
1474 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1475 
1476 	      /* Add two lo words. */
1477 	      t1 = tail_r_elem1 + tail_bt;
1478 	      bv = t1 - tail_r_elem1;
1479 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1480 
1481 	      s2 += t1;
1482 
1483 	      /* Renormalize (s1, s2)  to  (t1, s2) */
1484 	      t1 = s1 + s2;
1485 	      s2 = s2 - (t1 - s1);
1486 
1487 	      t2 += s2;
1488 
1489 	      /* Renormalize (t1, t2)  */
1490 	      head_r_elem = t1 + t2;
1491 	      tail_r_elem = t2 - (head_r_elem - t1);
1492 	    }
1493 	  }
1494 	  tail_r_true[cij] = tail_r_elem;
1495 	  head_r_true[cij] = head_r_elem;
1496 	} else {
1497 
1498 	  /* Real part */
1499 	  {
1500 	    /* Compute double-double = double-double * double. */
1501 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1502 
1503 	    con = head_r_elem2 * split;
1504 	    a11 = con - head_r_elem2;
1505 	    a11 = con - a11;
1506 	    a21 = head_r_elem2 - a11;
1507 	    con = -2.0 * split;
1508 	    b1 = con - -2.0;
1509 	    b1 = con - b1;
1510 	    b2 = -2.0 - b1;
1511 
1512 	    c11 = head_r_elem2 * -2.0;
1513 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1514 
1515 	    c2 = tail_r_elem2 * -2.0;
1516 	    t1 = c11 + c2;
1517 	    t2 = (c2 - (t1 - c11)) + c21;
1518 
1519 	    head_r_elem = t1 + t2;
1520 	    tail_r_elem = t2 - (head_r_elem - t1);
1521 	  }
1522 	  head_r_true[cij] = head_r_elem;
1523 	  tail_r_true[cij] = tail_r_elem;
1524 
1525 	  /* Imaginary Part */
1526 	  {
1527 	    /* Compute double-double = double-double * double. */
1528 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1529 
1530 	    con = head_r_elem1 * split;
1531 	    a11 = con - head_r_elem1;
1532 	    a11 = con - a11;
1533 	    a21 = head_r_elem1 - a11;
1534 	    con = 2.0 * split;
1535 	    b1 = con - 2.0;
1536 	    b1 = con - b1;
1537 	    b2 = 2.0 - b1;
1538 
1539 	    c11 = head_r_elem1 * 2.0;
1540 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1541 
1542 	    c2 = tail_r_elem1 * 2.0;
1543 	    t1 = c11 + c2;
1544 	    t2 = (c2 - (t1 - c11)) + c21;
1545 
1546 	    head_r_elem = t1 + t2;
1547 	    tail_r_elem = t2 - (head_r_elem - t1);
1548 	  }
1549 	  head_r_true[cij + 1] = head_r_elem;
1550 	  tail_r_true[cij + 1] = tail_r_elem;
1551 	}
1552       }
1553     }
1554 
1555     blas_free(a1);
1556     blas_free(a2);
1557     blas_free(c1);
1558     blas_free(c2);
1559     blas_free(b0);
1560     blas_free(head_r1_true);
1561     blas_free(tail_r1_true);
1562     blas_free(head_r2_true);
1563     blas_free(tail_r2_true);
1564 
1565   } else {
1566     /* random matrix */
1567     float a_elem[2];
1568     double b_elem[2];
1569     double c_elem[2];
1570     double head_r_true_elem[2], tail_r_true_elem[2];
1571 
1572     /* Since mixed real/complex test generator for dot
1573        scales the vectors, we need to used the non-mixed
1574        version if B is real (since A is always complex). */
1575 
1576 
1577     if (alpha_flag == 0) {
1578       alpha_i[0] = (float) xrand(seed);
1579       alpha_i[1] = (float) xrand(seed);
1580     }
1581     if (beta_flag == 0) {
1582       beta_i[0] = (float) xrand(seed);
1583       beta_i[1] = (float) xrand(seed);
1584     }
1585 
1586     /* Fill in matrix A -- Hermitian. */
1587     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
1588       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
1589 	a_elem[0] = (float) xrand(seed);
1590 	a_elem[1] = (float) xrand(seed);
1591 	a_i[aij] = a_elem[0];
1592 	a_i[aij + 1] = a_elem[1];
1593 	if (i == j)
1594 	  a_i[aij + 1] = 0.0;
1595       }
1596     }
1597 
1598     /* Fill in matrix B */
1599     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
1600       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
1601 	b_elem[0] = (float) xrand(seed);
1602 	b_elem[1] = (float) xrand(seed);
1603 	b_i[bij] = b_elem[0];
1604 	b_i[bij + 1] = b_elem[1];
1605       }
1606     }
1607 
1608     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
1609       che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
1610       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
1611 
1612 	if (side == blas_left_side)
1613 	  zge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1614 	else
1615 	  zge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
1616 
1617 	/* copy the real b_vec into complex bb_vec, so that
1618 	   pure complex test case generator can be called. */
1619 
1620 	BLAS_zdot_c_z_testgen(m_i, m_i, 0, norm, blas_no_conj,
1621 			      alpha, 1, beta, 1, a_vec, b_vec, seed,
1622 			      c_elem, head_r_true_elem, tail_r_true_elem);
1623 
1624 	c_i[cij] = c_elem[0];
1625 	c_i[cij + 1] = c_elem[1];
1626 	head_r_true[cij] = head_r_true_elem[0];
1627 	head_r_true[cij + 1] = head_r_true_elem[1];
1628 	tail_r_true[cij] = tail_r_true_elem[0];
1629 	tail_r_true[cij + 1] = tail_r_true_elem[1];
1630       }
1631     }
1632 
1633 
1634 
1635   }
1636 
1637   blas_free(a_vec);
1638   blas_free(b_vec);
1639 }
BLAS_zhemm_z_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)1640 void BLAS_zhemm_z_c_testgen(int norm, enum blas_order_type order,
1641 			    enum blas_uplo_type uplo,
1642 			    enum blas_side_type side, int m, int n,
1643 			    int randomize, void *alpha, int alpha_flag,
1644 			    void *beta, int beta_flag, void *a, int lda,
1645 			    void *b, int ldb, void *c, int ldc, int *seed,
1646 			    double *head_r_true, double *tail_r_true)
1647 
1648 /*
1649  * Purpose
1650  * =======
1651  *
1652  *   Generates the test inputs to BLAS_zhemm_z_c{_x}
1653  *
1654  * Arguments
1655  * =========
1656  *
1657  * norm    (input) int
1658  *           = -1: the vectors are scaled with norms near underflow.
1659  *           =  0: the vectors have norms of order 1.
1660  *           =  1: the vectors are scaled with norms near overflow.
1661  *
1662  * order   (input) enum blas_side_type
1663  *           storage format of the matrices
1664  *
1665  * uplo    (input) enum blas_uplo_type
1666  *           which half of the hermitian matrix a is to be stored.
1667  *
1668  * side    (input) enum blas_side_type
1669  *           which side of matrix b matrix a is to be multiplied.
1670  *
1671  * m n     (input) int
1672  *           sizes of matrices a, b, c:
1673  *              matrix a is m-by-m for left multiplication
1674  *                          n-by-n otherwise,
1675  *              matrices b, c are m-by-n.
1676  *
1677  * randomize (input) int
1678  *           = 0: test case made for maximum cancellation.
1679  *           = 1: test case made for maximum radomization.
1680  *
1681  * alpha   (input/output) void*
1682  *           if alpha_flag = 1, alpha is input.
1683  *           if alpha_flag = 0, alpha is output.
1684  *
1685  * alpha_flag (input) int
1686  *           = 0: alpha is free, and is output.
1687  *           = 1: alpha is fixed on input.
1688  *
1689  * beta    (input/output) void*
1690  *           if beta_flag = 1, beta is input.
1691  *           if beta_flag = 0, beta is output.
1692  *
1693  * beta_flag (input) int
1694  *           = 0: beta is free, and is output.
1695  *           = 1: beta is fixed on input.
1696  *
1697  * a       (input/output) void*
1698  *
1699  * lda     (input) lda
1700  *         leading dimension of matrix A.
1701  *
1702  * b       (input/output) void*
1703  *
1704  * ldb     (input) int
1705  *         leading dimension of matrix B.
1706  *
1707  * c       (input/output) void*
1708  *         generated matrix C that will be used as an input to HEMM.
1709  *
1710  * ldc     (input) int
1711  *         leading dimension of matrix C.
1712  *
1713  * seed    (input/output) int *
1714  *         seed for the random number generator.
1715  *
1716  * head_r_true  (output) double *
1717  *         the leading part of the truth in double-double.
1718  *
1719  * tail_r_true  (output) double *
1720  *         the trailing part of the truth in double-double
1721  *
1722  */
1723 {
1724 
1725   /* Strategy:
1726      R1 = alpha * A1 * B + beta * C1
1727      R2 = alpha * A2 * B + beta * C2
1728      where all the matrices are real.  Then let R = R1 + i R2,
1729      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
1730      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
1731    */
1732 
1733   int i, j;
1734   int cij, ci;
1735   int aij, ai;
1736   int a1ij, a1i;
1737   int bij, bi;
1738   int mij, mi;
1739   int inccij, incci;
1740   int incaij, incai;
1741   int inca1ij, inca1i;
1742   int incbij, incbi;
1743   int inci, incij;
1744   int inca, incb;
1745   int m_i, n_i;
1746   int ld;
1747   int ab;
1748 
1749   double *a1;
1750   double *a2;
1751   double *c1;
1752   double *c2;
1753   float *b0;
1754 
1755   double *head_r1_true, *tail_r1_true;
1756   double *head_r2_true, *tail_r2_true;
1757 
1758   double head_r_elem1, tail_r_elem1;
1759   double head_r_elem2, tail_r_elem2;
1760   double head_r_elem, tail_r_elem;
1761 
1762   double *a_vec;
1763   float *b_vec;
1764 
1765   double *c_i = (double *) c;
1766   double *alpha_i = (double *) alpha;
1767   double *beta_i = (double *) beta;
1768   double *a_i = a;
1769   float *b_i = b;
1770 
1771   if (side == blas_left_side) {
1772     m_i = m;
1773     n_i = n;
1774   } else {
1775     m_i = n;
1776     n_i = m;
1777   }
1778 
1779   if (order == blas_colmajor)
1780     ld = m;
1781   else
1782     ld = n;
1783 
1784 
1785   if ((order == blas_colmajor && side == blas_left_side) ||
1786       (order == blas_rowmajor && side == blas_right_side)) {
1787     inci = inca1i = incai = incbi = incci = 1;
1788     inccij = ldc;
1789     incaij = lda;
1790     incbij = ldb;
1791     inca1ij = m_i;
1792     incij = ld;
1793   } else {
1794     incci = ldc;
1795     incai = lda;
1796     incbi = ldb;
1797     inca1i = m_i;
1798     inci = ld;
1799     incij = inca1ij = incaij = incbij = inccij = 1;
1800   }
1801 
1802   incci *= 2;
1803   inccij *= 2;
1804   incai *= 2;
1805   incaij *= 2;
1806   incbi *= 2;
1807   incbij *= 2;
1808 
1809   inca = incb = 1;
1810   inca *= 2;
1811   incb *= 2;
1812   a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
1813   if (m_i > 0 && a_vec == NULL) {
1814     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1815   }
1816   for (i = 0; i < m_i * inca; i += inca) {
1817     a_vec[i] = 0.0;
1818     a_vec[i + 1] = 0.0;
1819   }
1820   b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
1821   if (m_i > 0 && b_vec == NULL) {
1822     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1823   }
1824   for (i = 0; i < m_i * incb; i += incb) {
1825     b_vec[i] = 0.0;
1826     b_vec[i + 1] = 0.0;
1827   }
1828 
1829   if (randomize == 0) {
1830     a1 = (double *) blas_malloc(m_i * m_i * sizeof(double));
1831     if (m_i * m_i > 0 && a1 == NULL) {
1832       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1833     }
1834     a2 = (double *) blas_malloc(m_i * m_i * sizeof(double));
1835     if (m_i * m_i > 0 && a2 == NULL) {
1836       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1837     }
1838     for (i = 0; i < m_i * m_i; ++i) {
1839       a1[i] = a2[i] = 0.0;
1840     }
1841     c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1842     if (m_i * n_i > 0 && c1 == NULL) {
1843       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1844     }
1845     c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
1846     if (m_i * n_i > 0 && c2 == NULL) {
1847       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1848     }
1849     b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
1850     if (m_i * n_i > 0 && b0 == NULL) {
1851       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1852     }
1853     for (i = 0; i < m_i * n_i; ++i) {
1854       c1[i] = c2[i] = b0[i] = 0.0;
1855     }
1856     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1857     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1858     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1859       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1860     };
1861     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1862     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
1863     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1864       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1865     };
1866 
1867     /* First generate the real portion of matrix A, and matrix B.
1868        Note that Re(A) is a symmetric matrix. */
1869     BLAS_dsymm_d_s_testgen
1870       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
1871        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
1872 
1873     BLAS_dskew_d_s_testgen
1874       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
1875        c2, ld, seed, head_r2_true, tail_r2_true);
1876 
1877 
1878     /* The case where B is a complex matrix.  Since B is generated
1879        as a real matrix, we need to perform some scaling.
1880 
1881        There are four cases to consider, depending on the values
1882        of alpha and beta.
1883 
1884        values                         scaling
1885        alpha   beta      alpha  A    B       beta    C    R (truth)
1886        0    1      1                    i               i    i
1887        1    1      ?                   1+i      1+i         1+i
1888        2    ?      1         1+i       1+i             2i    2i
1889        3    ?      ?         1+i       1+i      2i           2i
1890 
1891        Note that we can afford to scale R by 1+i, since they are
1892        computed in double-double precision.
1893      */
1894 
1895     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1896       ab = 0;
1897       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
1898     } else if (alpha_i[0] == 1.0) {
1899       ab = 1;
1900       /* set alpha to 1, multiply beta by 1+i. */
1901       alpha_i[1] = 0.0;
1902       beta_i[1] = beta_i[0];
1903     } else if (beta_i[0] == 1.0) {
1904       ab = 2;
1905       /* set beta to 1, multiply alpha by 1+i. */
1906       beta_i[1] = 0.0;
1907       alpha_i[1] = alpha_i[0];
1908     } else {
1909       ab = 3;
1910       /* multiply alpha by 1+i, beta by 2i. */
1911       alpha_i[1] = alpha_i[0];
1912       beta_i[1] = 2.0 * beta_i[0];
1913       beta_i[0] = 0.0;
1914     }
1915 
1916 
1917     /* Now fill in a */
1918     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
1919       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
1920 	   j++, aij += incaij, a1ij += inca1ij) {
1921 	a_i[aij] = a1[a1ij];
1922 	a_i[aij + 1] = a2[a1ij];
1923       }
1924     }
1925 
1926     /* Fill in b */
1927     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
1928       for (j = 0, bij = bi, mij = mi; j < n_i;
1929 	   j++, bij += incbij, mij += incij) {
1930 	if (ab == 0) {
1931 	  b_i[bij] = 0.0;
1932 	  b_i[bij + 1] = b0[mij];
1933 	} else {
1934 	  b_i[bij] = b0[mij];
1935 	  b_i[bij + 1] = b0[mij];
1936 	}
1937       }
1938     }
1939 
1940     /* Fill in c */
1941     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1942       for (j = 0, cij = ci, mij = mi; j < n_i;
1943 	   j++, cij += inccij, mij += incij) {
1944 	if (ab == 0) {
1945 	  c_i[cij] = -c2[mij];
1946 	  c_i[cij + 1] = c1[mij];
1947 	} else if (ab == 2) {
1948 	  c_i[cij] = -2.0 * c2[mij];
1949 	  c_i[cij + 1] = 2.0 * c1[mij];
1950 	} else {
1951 	  c_i[cij] = c1[mij];
1952 	  c_i[cij + 1] = c2[mij];
1953 	}
1954       }
1955     }
1956 
1957     /* Fill in truth */
1958     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
1959       for (j = 0, cij = ci, mij = mi; j < n_i;
1960 	   j++, cij += inccij, mij += incij) {
1961 
1962 	head_r_elem1 = head_r1_true[mij];
1963 	tail_r_elem1 = tail_r1_true[mij];
1964 
1965 	head_r_elem2 = head_r2_true[mij];
1966 	tail_r_elem2 = tail_r2_true[mij];
1967 
1968 	if (ab == 0) {
1969 	  head_r_true[cij] = -head_r_elem2;
1970 	  tail_r_true[cij] = -tail_r_elem2;
1971 	  head_r_true[cij + 1] = head_r_elem1;
1972 	  tail_r_true[cij + 1] = tail_r_elem1;
1973 	} else if (ab == 1) {
1974 	  {
1975 	    /* Compute double-double = double-double + double-double. */
1976 	    double bv;
1977 	    double s1, s2, t1, t2;
1978 
1979 	    /* Add two hi words. */
1980 	    s1 = head_r_elem1 + head_r_elem2;
1981 	    bv = s1 - head_r_elem1;
1982 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1983 
1984 	    /* Add two lo words. */
1985 	    t1 = tail_r_elem1 + tail_r_elem2;
1986 	    bv = t1 - tail_r_elem1;
1987 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1988 
1989 	    s2 += t1;
1990 
1991 	    /* Renormalize (s1, s2)  to  (t1, s2) */
1992 	    t1 = s1 + s2;
1993 	    s2 = s2 - (t1 - s1);
1994 
1995 	    t2 += s2;
1996 
1997 	    /* Renormalize (t1, t2)  */
1998 	    head_r_elem = t1 + t2;
1999 	    tail_r_elem = t2 - (head_r_elem - t1);
2000 	  }
2001 
2002 	  /* Set the imaginary part to  R1 + R2 */
2003 	  tail_r_true[cij + 1] = tail_r_elem;
2004 	  head_r_true[cij + 1] = head_r_elem;
2005 
2006 	  /* Set the real part to R1 - R2. */
2007 	  {
2008 	    double head_bt, tail_bt;
2009 	    head_bt = -head_r_elem2;
2010 	    tail_bt = -tail_r_elem2;
2011 	    {
2012 	      /* Compute double-double = double-double + double-double. */
2013 	      double bv;
2014 	      double s1, s2, t1, t2;
2015 
2016 	      /* Add two hi words. */
2017 	      s1 = head_r_elem1 + head_bt;
2018 	      bv = s1 - head_r_elem1;
2019 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2020 
2021 	      /* Add two lo words. */
2022 	      t1 = tail_r_elem1 + tail_bt;
2023 	      bv = t1 - tail_r_elem1;
2024 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2025 
2026 	      s2 += t1;
2027 
2028 	      /* Renormalize (s1, s2)  to  (t1, s2) */
2029 	      t1 = s1 + s2;
2030 	      s2 = s2 - (t1 - s1);
2031 
2032 	      t2 += s2;
2033 
2034 	      /* Renormalize (t1, t2)  */
2035 	      head_r_elem = t1 + t2;
2036 	      tail_r_elem = t2 - (head_r_elem - t1);
2037 	    }
2038 	  }
2039 	  tail_r_true[cij] = tail_r_elem;
2040 	  head_r_true[cij] = head_r_elem;
2041 	} else {
2042 
2043 	  /* Real part */
2044 	  {
2045 	    /* Compute double-double = double-double * double. */
2046 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2047 
2048 	    con = head_r_elem2 * split;
2049 	    a11 = con - head_r_elem2;
2050 	    a11 = con - a11;
2051 	    a21 = head_r_elem2 - a11;
2052 	    con = -2.0 * split;
2053 	    b1 = con - -2.0;
2054 	    b1 = con - b1;
2055 	    b2 = -2.0 - b1;
2056 
2057 	    c11 = head_r_elem2 * -2.0;
2058 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2059 
2060 	    c2 = tail_r_elem2 * -2.0;
2061 	    t1 = c11 + c2;
2062 	    t2 = (c2 - (t1 - c11)) + c21;
2063 
2064 	    head_r_elem = t1 + t2;
2065 	    tail_r_elem = t2 - (head_r_elem - t1);
2066 	  }
2067 	  head_r_true[cij] = head_r_elem;
2068 	  tail_r_true[cij] = tail_r_elem;
2069 
2070 	  /* Imaginary Part */
2071 	  {
2072 	    /* Compute double-double = double-double * double. */
2073 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2074 
2075 	    con = head_r_elem1 * split;
2076 	    a11 = con - head_r_elem1;
2077 	    a11 = con - a11;
2078 	    a21 = head_r_elem1 - a11;
2079 	    con = 2.0 * split;
2080 	    b1 = con - 2.0;
2081 	    b1 = con - b1;
2082 	    b2 = 2.0 - b1;
2083 
2084 	    c11 = head_r_elem1 * 2.0;
2085 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2086 
2087 	    c2 = tail_r_elem1 * 2.0;
2088 	    t1 = c11 + c2;
2089 	    t2 = (c2 - (t1 - c11)) + c21;
2090 
2091 	    head_r_elem = t1 + t2;
2092 	    tail_r_elem = t2 - (head_r_elem - t1);
2093 	  }
2094 	  head_r_true[cij + 1] = head_r_elem;
2095 	  tail_r_true[cij + 1] = tail_r_elem;
2096 	}
2097       }
2098     }
2099 
2100     blas_free(a1);
2101     blas_free(a2);
2102     blas_free(c1);
2103     blas_free(c2);
2104     blas_free(b0);
2105     blas_free(head_r1_true);
2106     blas_free(tail_r1_true);
2107     blas_free(head_r2_true);
2108     blas_free(tail_r2_true);
2109 
2110   } else {
2111     /* random matrix */
2112     double a_elem[2];
2113     float b_elem[2];
2114     double c_elem[2];
2115     double head_r_true_elem[2], tail_r_true_elem[2];
2116 
2117     /* Since mixed real/complex test generator for dot
2118        scales the vectors, we need to used the non-mixed
2119        version if B is real (since A is always complex). */
2120 
2121 
2122     if (alpha_flag == 0) {
2123       alpha_i[0] = (float) xrand(seed);
2124       alpha_i[1] = (float) xrand(seed);
2125     }
2126     if (beta_flag == 0) {
2127       beta_i[0] = (float) xrand(seed);
2128       beta_i[1] = (float) xrand(seed);
2129     }
2130 
2131     /* Fill in matrix A -- Hermitian. */
2132     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2133       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2134 	a_elem[0] = (float) xrand(seed);
2135 	a_elem[1] = (float) xrand(seed);
2136 	a_i[aij] = a_elem[0];
2137 	a_i[aij + 1] = a_elem[1];
2138 	if (i == j)
2139 	  a_i[aij + 1] = 0.0;
2140       }
2141     }
2142 
2143     /* Fill in matrix B */
2144     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2145       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2146 	b_elem[0] = (float) xrand(seed);
2147 	b_elem[1] = (float) xrand(seed);
2148 	b_i[bij] = b_elem[0];
2149 	b_i[bij + 1] = b_elem[1];
2150       }
2151     }
2152 
2153     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2154       zhe_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
2155       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2156 
2157 	if (side == blas_left_side)
2158 	  cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2159 	else
2160 	  cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2161 
2162 	/* copy the real b_vec into complex bb_vec, so that
2163 	   pure complex test case generator can be called. */
2164 
2165 	BLAS_zdot_z_c_testgen(m_i, m_i, 0, norm, blas_no_conj,
2166 			      alpha, 1, beta, 1, a_vec, b_vec, seed,
2167 			      c_elem, head_r_true_elem, tail_r_true_elem);
2168 
2169 	c_i[cij] = c_elem[0];
2170 	c_i[cij + 1] = c_elem[1];
2171 	head_r_true[cij] = head_r_true_elem[0];
2172 	head_r_true[cij + 1] = head_r_true_elem[1];
2173 	tail_r_true[cij] = tail_r_true_elem[0];
2174 	tail_r_true[cij + 1] = tail_r_true_elem[1];
2175       }
2176     }
2177 
2178 
2179 
2180   }
2181 
2182   blas_free(a_vec);
2183   blas_free(b_vec);
2184 }
BLAS_zhemm_c_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2185 void BLAS_zhemm_c_c_testgen(int norm, enum blas_order_type order,
2186 			    enum blas_uplo_type uplo,
2187 			    enum blas_side_type side, int m, int n,
2188 			    int randomize, void *alpha, int alpha_flag,
2189 			    void *beta, int beta_flag, void *a, int lda,
2190 			    void *b, int ldb, void *c, int ldc, int *seed,
2191 			    double *head_r_true, double *tail_r_true)
2192 
2193 /*
2194  * Purpose
2195  * =======
2196  *
2197  *   Generates the test inputs to BLAS_zhemm_c_c{_x}
2198  *
2199  * Arguments
2200  * =========
2201  *
2202  * norm    (input) int
2203  *           = -1: the vectors are scaled with norms near underflow.
2204  *           =  0: the vectors have norms of order 1.
2205  *           =  1: the vectors are scaled with norms near overflow.
2206  *
2207  * order   (input) enum blas_side_type
2208  *           storage format of the matrices
2209  *
2210  * uplo    (input) enum blas_uplo_type
2211  *           which half of the hermitian matrix a is to be stored.
2212  *
2213  * side    (input) enum blas_side_type
2214  *           which side of matrix b matrix a is to be multiplied.
2215  *
2216  * m n     (input) int
2217  *           sizes of matrices a, b, c:
2218  *              matrix a is m-by-m for left multiplication
2219  *                          n-by-n otherwise,
2220  *              matrices b, c are m-by-n.
2221  *
2222  * randomize (input) int
2223  *           = 0: test case made for maximum cancellation.
2224  *           = 1: test case made for maximum radomization.
2225  *
2226  * alpha   (input/output) void*
2227  *           if alpha_flag = 1, alpha is input.
2228  *           if alpha_flag = 0, alpha is output.
2229  *
2230  * alpha_flag (input) int
2231  *           = 0: alpha is free, and is output.
2232  *           = 1: alpha is fixed on input.
2233  *
2234  * beta    (input/output) void*
2235  *           if beta_flag = 1, beta is input.
2236  *           if beta_flag = 0, beta is output.
2237  *
2238  * beta_flag (input) int
2239  *           = 0: beta is free, and is output.
2240  *           = 1: beta is fixed on input.
2241  *
2242  * a       (input/output) void*
2243  *
2244  * lda     (input) lda
2245  *         leading dimension of matrix A.
2246  *
2247  * b       (input/output) void*
2248  *
2249  * ldb     (input) int
2250  *         leading dimension of matrix B.
2251  *
2252  * c       (input/output) void*
2253  *         generated matrix C that will be used as an input to HEMM.
2254  *
2255  * ldc     (input) int
2256  *         leading dimension of matrix C.
2257  *
2258  * seed    (input/output) int *
2259  *         seed for the random number generator.
2260  *
2261  * head_r_true  (output) double *
2262  *         the leading part of the truth in double-double.
2263  *
2264  * tail_r_true  (output) double *
2265  *         the trailing part of the truth in double-double
2266  *
2267  */
2268 {
2269 
2270   /* Strategy:
2271      R1 = alpha * A1 * B + beta * C1
2272      R2 = alpha * A2 * B + beta * C2
2273      where all the matrices are real.  Then let R = R1 + i R2,
2274      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
2275      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
2276    */
2277 
2278   int i, j;
2279   int cij, ci;
2280   int aij, ai;
2281   int a1ij, a1i;
2282   int bij, bi;
2283   int mij, mi;
2284   int inccij, incci;
2285   int incaij, incai;
2286   int inca1ij, inca1i;
2287   int incbij, incbi;
2288   int inci, incij;
2289   int inca, incb;
2290   int m_i, n_i;
2291   int ld;
2292   int ab;
2293 
2294   float *a1;
2295   float *a2;
2296   double *c1;
2297   double *c2;
2298   float *b0;
2299 
2300   double *head_r1_true, *tail_r1_true;
2301   double *head_r2_true, *tail_r2_true;
2302 
2303   double head_r_elem1, tail_r_elem1;
2304   double head_r_elem2, tail_r_elem2;
2305   double head_r_elem, tail_r_elem;
2306 
2307   float *a_vec;
2308   float *b_vec;
2309 
2310   double *c_i = (double *) c;
2311   double *alpha_i = (double *) alpha;
2312   double *beta_i = (double *) beta;
2313   float *a_i = a;
2314   float *b_i = b;
2315 
2316   if (side == blas_left_side) {
2317     m_i = m;
2318     n_i = n;
2319   } else {
2320     m_i = n;
2321     n_i = m;
2322   }
2323 
2324   if (order == blas_colmajor)
2325     ld = m;
2326   else
2327     ld = n;
2328 
2329 
2330   if ((order == blas_colmajor && side == blas_left_side) ||
2331       (order == blas_rowmajor && side == blas_right_side)) {
2332     inci = inca1i = incai = incbi = incci = 1;
2333     inccij = ldc;
2334     incaij = lda;
2335     incbij = ldb;
2336     inca1ij = m_i;
2337     incij = ld;
2338   } else {
2339     incci = ldc;
2340     incai = lda;
2341     incbi = ldb;
2342     inca1i = m_i;
2343     inci = ld;
2344     incij = inca1ij = incaij = incbij = inccij = 1;
2345   }
2346 
2347   incci *= 2;
2348   inccij *= 2;
2349   incai *= 2;
2350   incaij *= 2;
2351   incbi *= 2;
2352   incbij *= 2;
2353 
2354   inca = incb = 1;
2355   inca *= 2;
2356   incb *= 2;
2357   a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
2358   if (m_i > 0 && a_vec == NULL) {
2359     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2360   }
2361   for (i = 0; i < m_i * inca; i += inca) {
2362     a_vec[i] = 0.0;
2363     a_vec[i + 1] = 0.0;
2364   }
2365   b_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
2366   if (m_i > 0 && b_vec == NULL) {
2367     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2368   }
2369   for (i = 0; i < m_i * incb; i += incb) {
2370     b_vec[i] = 0.0;
2371     b_vec[i + 1] = 0.0;
2372   }
2373 
2374   if (randomize == 0) {
2375     a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
2376     if (m_i * m_i > 0 && a1 == NULL) {
2377       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2378     }
2379     a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
2380     if (m_i * m_i > 0 && a2 == NULL) {
2381       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2382     }
2383     for (i = 0; i < m_i * m_i; ++i) {
2384       a1[i] = a2[i] = 0.0;
2385     }
2386     c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2387     if (m_i * n_i > 0 && c1 == NULL) {
2388       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2389     }
2390     c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2391     if (m_i * n_i > 0 && c2 == NULL) {
2392       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2393     }
2394     b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
2395     if (m_i * n_i > 0 && b0 == NULL) {
2396       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2397     }
2398     for (i = 0; i < m_i * n_i; ++i) {
2399       c1[i] = c2[i] = b0[i] = 0.0;
2400     }
2401     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2402     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2403     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2404       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2405     };
2406     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2407     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2408     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2409       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2410     };
2411 
2412     /* First generate the real portion of matrix A, and matrix B.
2413        Note that Re(A) is a symmetric matrix. */
2414     BLAS_dsymm_s_s_testgen
2415       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
2416        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
2417 
2418     BLAS_dskew_s_s_testgen
2419       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
2420        c2, ld, seed, head_r2_true, tail_r2_true);
2421 
2422 
2423     /* The case where B is a complex matrix.  Since B is generated
2424        as a real matrix, we need to perform some scaling.
2425 
2426        There are four cases to consider, depending on the values
2427        of alpha and beta.
2428 
2429        values                         scaling
2430        alpha   beta      alpha  A    B       beta    C    R (truth)
2431        0    1      1                    i               i    i
2432        1    1      ?                   1+i      1+i         1+i
2433        2    ?      1         1+i       1+i             2i    2i
2434        3    ?      ?         1+i       1+i      2i           2i
2435 
2436        Note that we can afford to scale R by 1+i, since they are
2437        computed in double-double precision.
2438      */
2439 
2440     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2441       ab = 0;
2442       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
2443     } else if (alpha_i[0] == 1.0) {
2444       ab = 1;
2445       /* set alpha to 1, multiply beta by 1+i. */
2446       alpha_i[1] = 0.0;
2447       beta_i[1] = beta_i[0];
2448     } else if (beta_i[0] == 1.0) {
2449       ab = 2;
2450       /* set beta to 1, multiply alpha by 1+i. */
2451       beta_i[1] = 0.0;
2452       alpha_i[1] = alpha_i[0];
2453     } else {
2454       ab = 3;
2455       /* multiply alpha by 1+i, beta by 2i. */
2456       alpha_i[1] = alpha_i[0];
2457       beta_i[1] = 2.0 * beta_i[0];
2458       beta_i[0] = 0.0;
2459     }
2460 
2461 
2462     /* Now fill in a */
2463     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
2464       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
2465 	   j++, aij += incaij, a1ij += inca1ij) {
2466 	a_i[aij] = a1[a1ij];
2467 	a_i[aij + 1] = a2[a1ij];
2468       }
2469     }
2470 
2471     /* Fill in b */
2472     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
2473       for (j = 0, bij = bi, mij = mi; j < n_i;
2474 	   j++, bij += incbij, mij += incij) {
2475 	if (ab == 0) {
2476 	  b_i[bij] = 0.0;
2477 	  b_i[bij + 1] = b0[mij];
2478 	} else {
2479 	  b_i[bij] = b0[mij];
2480 	  b_i[bij + 1] = b0[mij];
2481 	}
2482       }
2483     }
2484 
2485     /* Fill in c */
2486     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
2487       for (j = 0, cij = ci, mij = mi; j < n_i;
2488 	   j++, cij += inccij, mij += incij) {
2489 	if (ab == 0) {
2490 	  c_i[cij] = -c2[mij];
2491 	  c_i[cij + 1] = c1[mij];
2492 	} else if (ab == 2) {
2493 	  c_i[cij] = -2.0 * c2[mij];
2494 	  c_i[cij + 1] = 2.0 * c1[mij];
2495 	} else {
2496 	  c_i[cij] = c1[mij];
2497 	  c_i[cij + 1] = c2[mij];
2498 	}
2499       }
2500     }
2501 
2502     /* Fill in truth */
2503     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
2504       for (j = 0, cij = ci, mij = mi; j < n_i;
2505 	   j++, cij += inccij, mij += incij) {
2506 
2507 	head_r_elem1 = head_r1_true[mij];
2508 	tail_r_elem1 = tail_r1_true[mij];
2509 
2510 	head_r_elem2 = head_r2_true[mij];
2511 	tail_r_elem2 = tail_r2_true[mij];
2512 
2513 	if (ab == 0) {
2514 	  head_r_true[cij] = -head_r_elem2;
2515 	  tail_r_true[cij] = -tail_r_elem2;
2516 	  head_r_true[cij + 1] = head_r_elem1;
2517 	  tail_r_true[cij + 1] = tail_r_elem1;
2518 	} else if (ab == 1) {
2519 	  {
2520 	    /* Compute double-double = double-double + double-double. */
2521 	    double bv;
2522 	    double s1, s2, t1, t2;
2523 
2524 	    /* Add two hi words. */
2525 	    s1 = head_r_elem1 + head_r_elem2;
2526 	    bv = s1 - head_r_elem1;
2527 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2528 
2529 	    /* Add two lo words. */
2530 	    t1 = tail_r_elem1 + tail_r_elem2;
2531 	    bv = t1 - tail_r_elem1;
2532 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2533 
2534 	    s2 += t1;
2535 
2536 	    /* Renormalize (s1, s2)  to  (t1, s2) */
2537 	    t1 = s1 + s2;
2538 	    s2 = s2 - (t1 - s1);
2539 
2540 	    t2 += s2;
2541 
2542 	    /* Renormalize (t1, t2)  */
2543 	    head_r_elem = t1 + t2;
2544 	    tail_r_elem = t2 - (head_r_elem - t1);
2545 	  }
2546 
2547 	  /* Set the imaginary part to  R1 + R2 */
2548 	  tail_r_true[cij + 1] = tail_r_elem;
2549 	  head_r_true[cij + 1] = head_r_elem;
2550 
2551 	  /* Set the real part to R1 - R2. */
2552 	  {
2553 	    double head_bt, tail_bt;
2554 	    head_bt = -head_r_elem2;
2555 	    tail_bt = -tail_r_elem2;
2556 	    {
2557 	      /* Compute double-double = double-double + double-double. */
2558 	      double bv;
2559 	      double s1, s2, t1, t2;
2560 
2561 	      /* Add two hi words. */
2562 	      s1 = head_r_elem1 + head_bt;
2563 	      bv = s1 - head_r_elem1;
2564 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2565 
2566 	      /* Add two lo words. */
2567 	      t1 = tail_r_elem1 + tail_bt;
2568 	      bv = t1 - tail_r_elem1;
2569 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2570 
2571 	      s2 += t1;
2572 
2573 	      /* Renormalize (s1, s2)  to  (t1, s2) */
2574 	      t1 = s1 + s2;
2575 	      s2 = s2 - (t1 - s1);
2576 
2577 	      t2 += s2;
2578 
2579 	      /* Renormalize (t1, t2)  */
2580 	      head_r_elem = t1 + t2;
2581 	      tail_r_elem = t2 - (head_r_elem - t1);
2582 	    }
2583 	  }
2584 	  tail_r_true[cij] = tail_r_elem;
2585 	  head_r_true[cij] = head_r_elem;
2586 	} else {
2587 
2588 	  /* Real part */
2589 	  {
2590 	    /* Compute double-double = double-double * double. */
2591 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2592 
2593 	    con = head_r_elem2 * split;
2594 	    a11 = con - head_r_elem2;
2595 	    a11 = con - a11;
2596 	    a21 = head_r_elem2 - a11;
2597 	    con = -2.0 * split;
2598 	    b1 = con - -2.0;
2599 	    b1 = con - b1;
2600 	    b2 = -2.0 - b1;
2601 
2602 	    c11 = head_r_elem2 * -2.0;
2603 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2604 
2605 	    c2 = tail_r_elem2 * -2.0;
2606 	    t1 = c11 + c2;
2607 	    t2 = (c2 - (t1 - c11)) + c21;
2608 
2609 	    head_r_elem = t1 + t2;
2610 	    tail_r_elem = t2 - (head_r_elem - t1);
2611 	  }
2612 	  head_r_true[cij] = head_r_elem;
2613 	  tail_r_true[cij] = tail_r_elem;
2614 
2615 	  /* Imaginary Part */
2616 	  {
2617 	    /* Compute double-double = double-double * double. */
2618 	    double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2619 
2620 	    con = head_r_elem1 * split;
2621 	    a11 = con - head_r_elem1;
2622 	    a11 = con - a11;
2623 	    a21 = head_r_elem1 - a11;
2624 	    con = 2.0 * split;
2625 	    b1 = con - 2.0;
2626 	    b1 = con - b1;
2627 	    b2 = 2.0 - b1;
2628 
2629 	    c11 = head_r_elem1 * 2.0;
2630 	    c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2631 
2632 	    c2 = tail_r_elem1 * 2.0;
2633 	    t1 = c11 + c2;
2634 	    t2 = (c2 - (t1 - c11)) + c21;
2635 
2636 	    head_r_elem = t1 + t2;
2637 	    tail_r_elem = t2 - (head_r_elem - t1);
2638 	  }
2639 	  head_r_true[cij + 1] = head_r_elem;
2640 	  tail_r_true[cij + 1] = tail_r_elem;
2641 	}
2642       }
2643     }
2644 
2645     blas_free(a1);
2646     blas_free(a2);
2647     blas_free(c1);
2648     blas_free(c2);
2649     blas_free(b0);
2650     blas_free(head_r1_true);
2651     blas_free(tail_r1_true);
2652     blas_free(head_r2_true);
2653     blas_free(tail_r2_true);
2654 
2655   } else {
2656     /* random matrix */
2657     float a_elem[2];
2658     float b_elem[2];
2659     double c_elem[2];
2660     double head_r_true_elem[2], tail_r_true_elem[2];
2661 
2662     /* Since mixed real/complex test generator for dot
2663        scales the vectors, we need to used the non-mixed
2664        version if B is real (since A is always complex). */
2665 
2666 
2667     if (alpha_flag == 0) {
2668       alpha_i[0] = (float) xrand(seed);
2669       alpha_i[1] = (float) xrand(seed);
2670     }
2671     if (beta_flag == 0) {
2672       beta_i[0] = (float) xrand(seed);
2673       beta_i[1] = (float) xrand(seed);
2674     }
2675 
2676     /* Fill in matrix A -- Hermitian. */
2677     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
2678       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
2679 	a_elem[0] = (float) xrand(seed);
2680 	a_elem[1] = (float) xrand(seed);
2681 	a_i[aij] = a_elem[0];
2682 	a_i[aij + 1] = a_elem[1];
2683 	if (i == j)
2684 	  a_i[aij + 1] = 0.0;
2685       }
2686     }
2687 
2688     /* Fill in matrix B */
2689     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
2690       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
2691 	b_elem[0] = (float) xrand(seed);
2692 	b_elem[1] = (float) xrand(seed);
2693 	b_i[bij] = b_elem[0];
2694 	b_i[bij + 1] = b_elem[1];
2695       }
2696     }
2697 
2698     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
2699       che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
2700       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
2701 
2702 	if (side == blas_left_side)
2703 	  cge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2704 	else
2705 	  cge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
2706 
2707 	/* copy the real b_vec into complex bb_vec, so that
2708 	   pure complex test case generator can be called. */
2709 
2710 	BLAS_zdot_c_c_testgen(m_i, m_i, 0, norm, blas_no_conj,
2711 			      alpha, 1, beta, 1, a_vec, b_vec, seed,
2712 			      c_elem, head_r_true_elem, tail_r_true_elem);
2713 
2714 	c_i[cij] = c_elem[0];
2715 	c_i[cij + 1] = c_elem[1];
2716 	head_r_true[cij] = head_r_true_elem[0];
2717 	head_r_true[cij + 1] = head_r_true_elem[1];
2718 	tail_r_true[cij] = tail_r_true_elem[0];
2719 	tail_r_true[cij + 1] = tail_r_true_elem[1];
2720       }
2721     }
2722 
2723 
2724 
2725   }
2726 
2727   blas_free(a_vec);
2728   blas_free(b_vec);
2729 }
2730 
BLAS_zhemm_z_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,double * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)2731 void BLAS_zhemm_z_d_testgen(int norm, enum blas_order_type order,
2732 			    enum blas_uplo_type uplo,
2733 			    enum blas_side_type side, int m, int n,
2734 			    int randomize, void *alpha, int alpha_flag,
2735 			    void *beta, int beta_flag, void *a, int lda,
2736 			    double *b, int ldb, void *c, int ldc, int *seed,
2737 			    double *head_r_true, double *tail_r_true)
2738 
2739 /*
2740  * Purpose
2741  * =======
2742  *
2743  *   Generates the test inputs to BLAS_zhemm_z_d{_x}
2744  *
2745  * Arguments
2746  * =========
2747  *
2748  * norm    (input) int
2749  *           = -1: the vectors are scaled with norms near underflow.
2750  *           =  0: the vectors have norms of order 1.
2751  *           =  1: the vectors are scaled with norms near overflow.
2752  *
2753  * order   (input) enum blas_side_type
2754  *           storage format of the matrices
2755  *
2756  * uplo    (input) enum blas_uplo_type
2757  *           which half of the hermitian matrix a is to be stored.
2758  *
2759  * side    (input) enum blas_side_type
2760  *           which side of matrix b matrix a is to be multiplied.
2761  *
2762  * m n     (input) int
2763  *           sizes of matrices a, b, c:
2764  *              matrix a is m-by-m for left multiplication
2765  *                          n-by-n otherwise,
2766  *              matrices b, c are m-by-n.
2767  *
2768  * randomize (input) int
2769  *           = 0: test case made for maximum cancellation.
2770  *           = 1: test case made for maximum radomization.
2771  *
2772  * alpha   (input/output) void*
2773  *           if alpha_flag = 1, alpha is input.
2774  *           if alpha_flag = 0, alpha is output.
2775  *
2776  * alpha_flag (input) int
2777  *           = 0: alpha is free, and is output.
2778  *           = 1: alpha is fixed on input.
2779  *
2780  * beta    (input/output) void*
2781  *           if beta_flag = 1, beta is input.
2782  *           if beta_flag = 0, beta is output.
2783  *
2784  * beta_flag (input) int
2785  *           = 0: beta is free, and is output.
2786  *           = 1: beta is fixed on input.
2787  *
2788  * a       (input/output) void*
2789  *
2790  * lda     (input) lda
2791  *         leading dimension of matrix A.
2792  *
2793  * b       (input/output) double*
2794  *
2795  * ldb     (input) int
2796  *         leading dimension of matrix B.
2797  *
2798  * c       (input/output) void*
2799  *         generated matrix C that will be used as an input to HEMM.
2800  *
2801  * ldc     (input) int
2802  *         leading dimension of matrix C.
2803  *
2804  * seed    (input/output) int *
2805  *         seed for the random number generator.
2806  *
2807  * head_r_true  (output) double *
2808  *         the leading part of the truth in double-double.
2809  *
2810  * tail_r_true  (output) double *
2811  *         the trailing part of the truth in double-double
2812  *
2813  */
2814 {
2815 
2816   /* Strategy:
2817      R1 = alpha * A1 * B + beta * C1
2818      R2 = alpha * A2 * B + beta * C2
2819      where all the matrices are real.  Then let R = R1 + i R2,
2820      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
2821      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
2822    */
2823 
2824   int i, j;
2825   int cij, ci;
2826   int aij, ai;
2827   int a1ij, a1i;
2828   int bij, bi;
2829   int mij, mi;
2830   int inccij, incci;
2831   int incaij, incai;
2832   int inca1ij, inca1i;
2833   int incbij, incbi;
2834   int inci, incij;
2835   int inca, incb;
2836   int m_i, n_i;
2837   int ld;
2838   int ab;
2839 
2840   double *a1;
2841   double *a2;
2842   double *c1;
2843   double *c2;
2844   double *b0;
2845 
2846   double *head_r1_true, *tail_r1_true;
2847   double *head_r2_true, *tail_r2_true;
2848 
2849   double head_r_elem1, tail_r_elem1;
2850   double head_r_elem2, tail_r_elem2;
2851   double head_r_elem, tail_r_elem;
2852 
2853   double *a_vec;
2854   double *b_vec;
2855 
2856   double *c_i = (double *) c;
2857   double *alpha_i = (double *) alpha;
2858   double *beta_i = (double *) beta;
2859   double *a_i = a;
2860   double *b_i = b;
2861 
2862   if (side == blas_left_side) {
2863     m_i = m;
2864     n_i = n;
2865   } else {
2866     m_i = n;
2867     n_i = m;
2868   }
2869 
2870   if (order == blas_colmajor)
2871     ld = m;
2872   else
2873     ld = n;
2874 
2875 
2876   if ((order == blas_colmajor && side == blas_left_side) ||
2877       (order == blas_rowmajor && side == blas_right_side)) {
2878     inci = inca1i = incai = incbi = incci = 1;
2879     inccij = ldc;
2880     incaij = lda;
2881     incbij = ldb;
2882     inca1ij = m_i;
2883     incij = ld;
2884   } else {
2885     incci = ldc;
2886     incai = lda;
2887     incbi = ldb;
2888     inca1i = m_i;
2889     inci = ld;
2890     incij = inca1ij = incaij = incbij = inccij = 1;
2891   }
2892 
2893   incci *= 2;
2894   inccij *= 2;
2895   incai *= 2;
2896   incaij *= 2;
2897 
2898 
2899 
2900   inca = incb = 1;
2901   inca *= 2;
2902 
2903   a_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
2904   if (m_i > 0 && a_vec == NULL) {
2905     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2906   }
2907   for (i = 0; i < m_i * inca; i += inca) {
2908     a_vec[i] = 0.0;
2909     a_vec[i + 1] = 0.0;
2910   }
2911   b_vec = (double *) blas_malloc(m_i * sizeof(double));
2912   if (m_i > 0 && b_vec == NULL) {
2913     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2914   }
2915   for (i = 0; i < m_i * incb; i += incb) {
2916     b_vec[i] = 0.0;
2917   }
2918 
2919   if (randomize == 0) {
2920     a1 = (double *) blas_malloc(m_i * m_i * sizeof(double));
2921     if (m_i * m_i > 0 && a1 == NULL) {
2922       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2923     }
2924     a2 = (double *) blas_malloc(m_i * m_i * sizeof(double));
2925     if (m_i * m_i > 0 && a2 == NULL) {
2926       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2927     }
2928     for (i = 0; i < m_i * m_i; ++i) {
2929       a1[i] = a2[i] = 0.0;
2930     }
2931     c1 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2932     if (m_i * n_i > 0 && c1 == NULL) {
2933       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2934     }
2935     c2 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2936     if (m_i * n_i > 0 && c2 == NULL) {
2937       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2938     }
2939     b0 = (double *) blas_malloc(m_i * n_i * sizeof(double));
2940     if (m_i * n_i > 0 && b0 == NULL) {
2941       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2942     }
2943     for (i = 0; i < m_i * n_i; ++i) {
2944       c1[i] = c2[i] = b0[i] = 0.0;
2945     }
2946     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2947     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2948     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2949       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2950     };
2951     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2952     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
2953     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2954       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2955     };
2956 
2957     /* First generate the real portion of matrix A, and matrix B.
2958        Note that Re(A) is a symmetric matrix. */
2959     BLAS_dsymm_testgen
2960       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
2961        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
2962 
2963     BLAS_dskew_testgen
2964       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
2965        c2, ld, seed, head_r2_true, tail_r2_true);
2966 
2967 
2968     /* The case where B is a real matrix.
2969 
2970        There are four cases to consider, depending on the
2971        values of alpha and beta.
2972 
2973        values                             scaling
2974        alpha  beta         alpha    A    B    beta    C     R (truth)
2975        0    1      1
2976        1    1      ?                              -i     i
2977        2    ?      1            i                        i     i
2978        3    ?      ?           1+i                1+i         1+i
2979 
2980        Note that we can afford to scale truth by (1+i) since they
2981        are computed in double-double.
2982      */
2983 
2984     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2985       ab = 0;
2986       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
2987     } else if (alpha_i[0] == 1.0) {
2988       ab = 1;
2989       /* set alpha to 1, multiply beta by -i. */
2990       alpha_i[1] = 0.0;
2991       beta_i[1] = -beta_i[0];
2992       beta_i[0] = 0.0;
2993     } else if (beta_i[0] == 1.0) {
2994       ab = 2;
2995       /* set beta to 1, multiply alpha by i. */
2996       beta_i[1] = 0.0;
2997       alpha_i[1] = alpha_i[0];
2998       alpha_i[0] = 0.0;
2999     } else {
3000       ab = 3;
3001       /* multiply alpha, beta by (1 + i). */
3002       alpha_i[1] = alpha_i[0];
3003       beta_i[1] = beta_i[0];
3004     }
3005 
3006     /* Now fill in a */
3007     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
3008       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
3009 	   j++, aij += incaij, a1ij += inca1ij) {
3010 	a_i[aij] = a1[a1ij];
3011 	a_i[aij + 1] = a2[a1ij];
3012       }
3013     }
3014 
3015     /* Fill in b */
3016     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
3017       for (j = 0, bij = bi, mij = mi; j < n_i;
3018 	   j++, bij += incbij, mij += incij) {
3019 	b_i[bij] = b0[mij];
3020       }
3021     }
3022 
3023     /* Fill in c */
3024     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3025       for (j = 0, cij = ci, mij = mi; j < n_i;
3026 	   j++, cij += inccij, mij += incij) {
3027 	if (ab == 1 || ab == 2) {
3028 	  c_i[cij] = -c2[mij];
3029 	  c_i[cij + 1] = c1[mij];
3030 	} else {
3031 	  c_i[cij] = c1[mij];
3032 	  c_i[cij + 1] = c2[mij];
3033 	}
3034       }
3035     }
3036 
3037     /* Fill in truth */
3038     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3039       for (j = 0, cij = ci, mij = mi; j < n_i;
3040 	   j++, cij += inccij, mij += incij) {
3041 
3042 	if (ab == 0 || ab == 1) {
3043 	  head_r_true[cij] = head_r1_true[mij];
3044 	  tail_r_true[cij] = tail_r1_true[mij];
3045 	  head_r_true[cij + 1] = head_r2_true[mij];
3046 	  tail_r_true[cij + 1] = tail_r2_true[mij];
3047 	} else if (ab == 2) {
3048 	  head_r_true[cij] = -head_r2_true[mij];
3049 	  tail_r_true[cij] = -tail_r2_true[mij];
3050 	  head_r_true[cij + 1] = head_r1_true[mij];
3051 	  tail_r_true[cij + 1] = tail_r1_true[mij];
3052 	} else {
3053 	  head_r_elem1 = head_r1_true[mij];
3054 	  tail_r_elem1 = tail_r1_true[mij];
3055 
3056 	  head_r_elem2 = head_r2_true[mij];
3057 	  tail_r_elem2 = tail_r2_true[mij];
3058 
3059 	  {
3060 	    /* Compute double-double = double-double + double-double. */
3061 	    double bv;
3062 	    double s1, s2, t1, t2;
3063 
3064 	    /* Add two hi words. */
3065 	    s1 = head_r_elem1 + head_r_elem2;
3066 	    bv = s1 - head_r_elem1;
3067 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3068 
3069 	    /* Add two lo words. */
3070 	    t1 = tail_r_elem1 + tail_r_elem2;
3071 	    bv = t1 - tail_r_elem1;
3072 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3073 
3074 	    s2 += t1;
3075 
3076 	    /* Renormalize (s1, s2)  to  (t1, s2) */
3077 	    t1 = s1 + s2;
3078 	    s2 = s2 - (t1 - s1);
3079 
3080 	    t2 += s2;
3081 
3082 	    /* Renormalize (t1, t2)  */
3083 	    head_r_elem = t1 + t2;
3084 	    tail_r_elem = t2 - (head_r_elem - t1);
3085 	  }
3086 
3087 	  /* Set the imaginary part to  R1 + R2 */
3088 	  tail_r_true[cij + 1] = tail_r_elem;
3089 	  head_r_true[cij + 1] = head_r_elem;
3090 
3091 	  /* Set the real part to R1 - R2. */
3092 	  {
3093 	    double head_bt, tail_bt;
3094 	    head_bt = -head_r_elem2;
3095 	    tail_bt = -tail_r_elem2;
3096 	    {
3097 	      /* Compute double-double = double-double + double-double. */
3098 	      double bv;
3099 	      double s1, s2, t1, t2;
3100 
3101 	      /* Add two hi words. */
3102 	      s1 = head_r_elem1 + head_bt;
3103 	      bv = s1 - head_r_elem1;
3104 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3105 
3106 	      /* Add two lo words. */
3107 	      t1 = tail_r_elem1 + tail_bt;
3108 	      bv = t1 - tail_r_elem1;
3109 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3110 
3111 	      s2 += t1;
3112 
3113 	      /* Renormalize (s1, s2)  to  (t1, s2) */
3114 	      t1 = s1 + s2;
3115 	      s2 = s2 - (t1 - s1);
3116 
3117 	      t2 += s2;
3118 
3119 	      /* Renormalize (t1, t2)  */
3120 	      head_r_elem = t1 + t2;
3121 	      tail_r_elem = t2 - (head_r_elem - t1);
3122 	    }
3123 	  }
3124 	  tail_r_true[cij] = tail_r_elem;
3125 	  head_r_true[cij] = head_r_elem;
3126 	}
3127 
3128       }
3129     }
3130 
3131     blas_free(a1);
3132     blas_free(a2);
3133     blas_free(c1);
3134     blas_free(c2);
3135     blas_free(b0);
3136     blas_free(head_r1_true);
3137     blas_free(tail_r1_true);
3138     blas_free(head_r2_true);
3139     blas_free(tail_r2_true);
3140 
3141   } else {
3142     /* random matrix */
3143     double a_elem[2];
3144     double b_elem;
3145     double c_elem[2];
3146     double head_r_true_elem[2], tail_r_true_elem[2];
3147 
3148     /* Since mixed real/complex test generator for dot
3149        scales the vectors, we need to used the non-mixed
3150        version if B is real (since A is always complex). */
3151     double *bb_vec;
3152     bb_vec = (double *) blas_malloc(m_i * sizeof(double) * 2);
3153     if (m_i > 0 && bb_vec == NULL) {
3154       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3155     }
3156 
3157     if (alpha_flag == 0) {
3158       alpha_i[0] = xrand(seed);
3159       alpha_i[1] = xrand(seed);
3160     }
3161     if (beta_flag == 0) {
3162       beta_i[0] = xrand(seed);
3163       beta_i[1] = xrand(seed);
3164     }
3165 
3166     /* Fill in matrix A -- Hermitian. */
3167     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3168       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3169 	a_elem[0] = xrand(seed);
3170 	a_elem[1] = xrand(seed);
3171 	a_i[aij] = a_elem[0];
3172 	a_i[aij + 1] = a_elem[1];
3173 	if (i == j)
3174 	  a_i[aij + 1] = 0.0;
3175       }
3176     }
3177 
3178     /* Fill in matrix B */
3179     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3180       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3181 	b_elem = xrand(seed);
3182 	b_i[bij] = b_elem;
3183       }
3184     }
3185 
3186     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3187       zhe_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3188       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3189 
3190 	if (side == blas_left_side)
3191 	  dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3192 	else
3193 	  dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3194 
3195 	/* copy the real b_vec into complex bb_vec, so that
3196 	   pure complex test case generator can be called. */
3197 
3198 	{
3199 	  int k;
3200 	  for (k = 0; k < m_i; k++) {
3201 	    bb_vec[2 * k] = b_vec[k];
3202 	    bb_vec[2 * k + 1] = 0.0;
3203 	  }
3204 	}
3205 	BLAS_zdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
3206 			  alpha, 1, beta, 1, a_vec, bb_vec, seed,
3207 			  c_elem, head_r_true_elem, tail_r_true_elem);
3208 
3209 	c_i[cij] = c_elem[0];
3210 	c_i[cij + 1] = c_elem[1];
3211 	head_r_true[cij] = head_r_true_elem[0];
3212 	head_r_true[cij + 1] = head_r_true_elem[1];
3213 	tail_r_true[cij] = tail_r_true_elem[0];
3214 	tail_r_true[cij + 1] = tail_r_true_elem[1];
3215       }
3216     }
3217 
3218     blas_free(bb_vec);
3219 
3220   }
3221 
3222   blas_free(a_vec);
3223   blas_free(b_vec);
3224 }
BLAS_chemm_c_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,float * b,int ldb,void * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3225 void BLAS_chemm_c_s_testgen(int norm, enum blas_order_type order,
3226 			    enum blas_uplo_type uplo,
3227 			    enum blas_side_type side, int m, int n,
3228 			    int randomize, void *alpha, int alpha_flag,
3229 			    void *beta, int beta_flag, void *a, int lda,
3230 			    float *b, int ldb, void *c, int ldc, int *seed,
3231 			    double *head_r_true, double *tail_r_true)
3232 
3233 /*
3234  * Purpose
3235  * =======
3236  *
3237  *   Generates the test inputs to BLAS_chemm_c_s{_x}
3238  *
3239  * Arguments
3240  * =========
3241  *
3242  * norm    (input) int
3243  *           = -1: the vectors are scaled with norms near underflow.
3244  *           =  0: the vectors have norms of order 1.
3245  *           =  1: the vectors are scaled with norms near overflow.
3246  *
3247  * order   (input) enum blas_side_type
3248  *           storage format of the matrices
3249  *
3250  * uplo    (input) enum blas_uplo_type
3251  *           which half of the hermitian matrix a is to be stored.
3252  *
3253  * side    (input) enum blas_side_type
3254  *           which side of matrix b matrix a is to be multiplied.
3255  *
3256  * m n     (input) int
3257  *           sizes of matrices a, b, c:
3258  *              matrix a is m-by-m for left multiplication
3259  *                          n-by-n otherwise,
3260  *              matrices b, c are m-by-n.
3261  *
3262  * randomize (input) int
3263  *           = 0: test case made for maximum cancellation.
3264  *           = 1: test case made for maximum radomization.
3265  *
3266  * alpha   (input/output) void*
3267  *           if alpha_flag = 1, alpha is input.
3268  *           if alpha_flag = 0, alpha is output.
3269  *
3270  * alpha_flag (input) int
3271  *           = 0: alpha is free, and is output.
3272  *           = 1: alpha is fixed on input.
3273  *
3274  * beta    (input/output) void*
3275  *           if beta_flag = 1, beta is input.
3276  *           if beta_flag = 0, beta is output.
3277  *
3278  * beta_flag (input) int
3279  *           = 0: beta is free, and is output.
3280  *           = 1: beta is fixed on input.
3281  *
3282  * a       (input/output) void*
3283  *
3284  * lda     (input) lda
3285  *         leading dimension of matrix A.
3286  *
3287  * b       (input/output) float*
3288  *
3289  * ldb     (input) int
3290  *         leading dimension of matrix B.
3291  *
3292  * c       (input/output) void*
3293  *         generated matrix C that will be used as an input to HEMM.
3294  *
3295  * ldc     (input) int
3296  *         leading dimension of matrix C.
3297  *
3298  * seed    (input/output) int *
3299  *         seed for the random number generator.
3300  *
3301  * head_r_true  (output) double *
3302  *         the leading part of the truth in double-double.
3303  *
3304  * tail_r_true  (output) double *
3305  *         the trailing part of the truth in double-double
3306  *
3307  */
3308 {
3309 
3310   /* Strategy:
3311      R1 = alpha * A1 * B + beta * C1
3312      R2 = alpha * A2 * B + beta * C2
3313      where all the matrices are real.  Then let R = R1 + i R2,
3314      A = A1 + i A2, C = C1 + i C2.  To make A hermitian, A1 is
3315      symmetric, and A2 is a skew matrix (trans(A2) = -A2).
3316    */
3317 
3318   int i, j;
3319   int cij, ci;
3320   int aij, ai;
3321   int a1ij, a1i;
3322   int bij, bi;
3323   int mij, mi;
3324   int inccij, incci;
3325   int incaij, incai;
3326   int inca1ij, inca1i;
3327   int incbij, incbi;
3328   int inci, incij;
3329   int inca, incb;
3330   int m_i, n_i;
3331   int ld;
3332   int ab;
3333 
3334   float *a1;
3335   float *a2;
3336   float *c1;
3337   float *c2;
3338   float *b0;
3339 
3340   double *head_r1_true, *tail_r1_true;
3341   double *head_r2_true, *tail_r2_true;
3342 
3343   double head_r_elem1, tail_r_elem1;
3344   double head_r_elem2, tail_r_elem2;
3345   double head_r_elem, tail_r_elem;
3346 
3347   float *a_vec;
3348   float *b_vec;
3349 
3350   float *c_i = (float *) c;
3351   float *alpha_i = (float *) alpha;
3352   float *beta_i = (float *) beta;
3353   float *a_i = a;
3354   float *b_i = b;
3355 
3356   if (side == blas_left_side) {
3357     m_i = m;
3358     n_i = n;
3359   } else {
3360     m_i = n;
3361     n_i = m;
3362   }
3363 
3364   if (order == blas_colmajor)
3365     ld = m;
3366   else
3367     ld = n;
3368 
3369 
3370   if ((order == blas_colmajor && side == blas_left_side) ||
3371       (order == blas_rowmajor && side == blas_right_side)) {
3372     inci = inca1i = incai = incbi = incci = 1;
3373     inccij = ldc;
3374     incaij = lda;
3375     incbij = ldb;
3376     inca1ij = m_i;
3377     incij = ld;
3378   } else {
3379     incci = ldc;
3380     incai = lda;
3381     incbi = ldb;
3382     inca1i = m_i;
3383     inci = ld;
3384     incij = inca1ij = incaij = incbij = inccij = 1;
3385   }
3386 
3387   incci *= 2;
3388   inccij *= 2;
3389   incai *= 2;
3390   incaij *= 2;
3391 
3392 
3393 
3394   inca = incb = 1;
3395   inca *= 2;
3396 
3397   a_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
3398   if (m_i > 0 && a_vec == NULL) {
3399     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3400   }
3401   for (i = 0; i < m_i * inca; i += inca) {
3402     a_vec[i] = 0.0;
3403     a_vec[i + 1] = 0.0;
3404   }
3405   b_vec = (float *) blas_malloc(m_i * sizeof(float));
3406   if (m_i > 0 && b_vec == NULL) {
3407     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3408   }
3409   for (i = 0; i < m_i * incb; i += incb) {
3410     b_vec[i] = 0.0;
3411   }
3412 
3413   if (randomize == 0) {
3414     a1 = (float *) blas_malloc(m_i * m_i * sizeof(float));
3415     if (m_i * m_i > 0 && a1 == NULL) {
3416       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3417     }
3418     a2 = (float *) blas_malloc(m_i * m_i * sizeof(float));
3419     if (m_i * m_i > 0 && a2 == NULL) {
3420       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3421     }
3422     for (i = 0; i < m_i * m_i; ++i) {
3423       a1[i] = a2[i] = 0.0;
3424     }
3425     c1 = (float *) blas_malloc(m_i * n_i * sizeof(float));
3426     if (m_i * n_i > 0 && c1 == NULL) {
3427       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3428     }
3429     c2 = (float *) blas_malloc(m_i * n_i * sizeof(float));
3430     if (m_i * n_i > 0 && c2 == NULL) {
3431       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3432     }
3433     b0 = (float *) blas_malloc(m_i * n_i * sizeof(float));
3434     if (m_i * n_i > 0 && b0 == NULL) {
3435       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3436     }
3437     for (i = 0; i < m_i * n_i; ++i) {
3438       c1[i] = c2[i] = b0[i] = 0.0;
3439     }
3440     head_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3441     tail_r1_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3442     if (m_i * n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3443       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3444     };
3445     head_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3446     tail_r2_true = (double *) blas_malloc(m_i * n_i * sizeof(double));
3447     if (m_i * n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3448       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3449     };
3450 
3451     /* First generate the real portion of matrix A, and matrix B.
3452        Note that Re(A) is a symmetric matrix. */
3453     BLAS_ssymm_testgen
3454       (norm, order, uplo, side, m, n, 0, alpha_i, alpha_flag, beta_i,
3455        beta_flag, a1, m_i, b0, ld, c1, ld, seed, head_r1_true, tail_r1_true);
3456 
3457     BLAS_sskew_testgen
3458       (norm, order, uplo, side, m, n, alpha_i, 1, beta_i, 1, a2, m_i, b0, ld,
3459        c2, ld, seed, head_r2_true, tail_r2_true);
3460 
3461 
3462     /* The case where B is a real matrix.
3463 
3464        There are four cases to consider, depending on the
3465        values of alpha and beta.
3466 
3467        values                             scaling
3468        alpha  beta         alpha    A    B    beta    C     R (truth)
3469        0    1      1
3470        1    1      ?                              -i     i
3471        2    ?      1            i                        i     i
3472        3    ?      ?           1+i                1+i         1+i
3473 
3474        Note that we can afford to scale truth by (1+i) since they
3475        are computed in double-double.
3476      */
3477 
3478     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3479       ab = 0;
3480       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
3481     } else if (alpha_i[0] == 1.0) {
3482       ab = 1;
3483       /* set alpha to 1, multiply beta by -i. */
3484       alpha_i[1] = 0.0;
3485       beta_i[1] = -beta_i[0];
3486       beta_i[0] = 0.0;
3487     } else if (beta_i[0] == 1.0) {
3488       ab = 2;
3489       /* set beta to 1, multiply alpha by i. */
3490       beta_i[1] = 0.0;
3491       alpha_i[1] = alpha_i[0];
3492       alpha_i[0] = 0.0;
3493     } else {
3494       ab = 3;
3495       /* multiply alpha, beta by (1 + i). */
3496       alpha_i[1] = alpha_i[0];
3497       beta_i[1] = beta_i[0];
3498     }
3499 
3500     /* Now fill in a */
3501     for (i = 0, ai = 0, a1i = 0; i < m_i; i++, ai += incai, a1i += inca1i) {
3502       for (j = 0, aij = ai, a1ij = a1i; j < m_i;
3503 	   j++, aij += incaij, a1ij += inca1ij) {
3504 	a_i[aij] = a1[a1ij];
3505 	a_i[aij + 1] = a2[a1ij];
3506       }
3507     }
3508 
3509     /* Fill in b */
3510     for (i = 0, bi = 0, mi = 0; i < m_i; i++, bi += incbi, mi += inci) {
3511       for (j = 0, bij = bi, mij = mi; j < n_i;
3512 	   j++, bij += incbij, mij += incij) {
3513 	b_i[bij] = b0[mij];
3514       }
3515     }
3516 
3517     /* Fill in c */
3518     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3519       for (j = 0, cij = ci, mij = mi; j < n_i;
3520 	   j++, cij += inccij, mij += incij) {
3521 	if (ab == 1 || ab == 2) {
3522 	  c_i[cij] = -c2[mij];
3523 	  c_i[cij + 1] = c1[mij];
3524 	} else {
3525 	  c_i[cij] = c1[mij];
3526 	  c_i[cij + 1] = c2[mij];
3527 	}
3528       }
3529     }
3530 
3531     /* Fill in truth */
3532     for (i = 0, ci = 0, mi = 0; i < m_i; i++, ci += incci, mi += inci) {
3533       for (j = 0, cij = ci, mij = mi; j < n_i;
3534 	   j++, cij += inccij, mij += incij) {
3535 
3536 	if (ab == 0 || ab == 1) {
3537 	  head_r_true[cij] = head_r1_true[mij];
3538 	  tail_r_true[cij] = tail_r1_true[mij];
3539 	  head_r_true[cij + 1] = head_r2_true[mij];
3540 	  tail_r_true[cij + 1] = tail_r2_true[mij];
3541 	} else if (ab == 2) {
3542 	  head_r_true[cij] = -head_r2_true[mij];
3543 	  tail_r_true[cij] = -tail_r2_true[mij];
3544 	  head_r_true[cij + 1] = head_r1_true[mij];
3545 	  tail_r_true[cij + 1] = tail_r1_true[mij];
3546 	} else {
3547 	  head_r_elem1 = head_r1_true[mij];
3548 	  tail_r_elem1 = tail_r1_true[mij];
3549 
3550 	  head_r_elem2 = head_r2_true[mij];
3551 	  tail_r_elem2 = tail_r2_true[mij];
3552 
3553 	  {
3554 	    /* Compute double-double = double-double + double-double. */
3555 	    double bv;
3556 	    double s1, s2, t1, t2;
3557 
3558 	    /* Add two hi words. */
3559 	    s1 = head_r_elem1 + head_r_elem2;
3560 	    bv = s1 - head_r_elem1;
3561 	    s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3562 
3563 	    /* Add two lo words. */
3564 	    t1 = tail_r_elem1 + tail_r_elem2;
3565 	    bv = t1 - tail_r_elem1;
3566 	    t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3567 
3568 	    s2 += t1;
3569 
3570 	    /* Renormalize (s1, s2)  to  (t1, s2) */
3571 	    t1 = s1 + s2;
3572 	    s2 = s2 - (t1 - s1);
3573 
3574 	    t2 += s2;
3575 
3576 	    /* Renormalize (t1, t2)  */
3577 	    head_r_elem = t1 + t2;
3578 	    tail_r_elem = t2 - (head_r_elem - t1);
3579 	  }
3580 
3581 	  /* Set the imaginary part to  R1 + R2 */
3582 	  tail_r_true[cij + 1] = tail_r_elem;
3583 	  head_r_true[cij + 1] = head_r_elem;
3584 
3585 	  /* Set the real part to R1 - R2. */
3586 	  {
3587 	    double head_bt, tail_bt;
3588 	    head_bt = -head_r_elem2;
3589 	    tail_bt = -tail_r_elem2;
3590 	    {
3591 	      /* Compute double-double = double-double + double-double. */
3592 	      double bv;
3593 	      double s1, s2, t1, t2;
3594 
3595 	      /* Add two hi words. */
3596 	      s1 = head_r_elem1 + head_bt;
3597 	      bv = s1 - head_r_elem1;
3598 	      s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3599 
3600 	      /* Add two lo words. */
3601 	      t1 = tail_r_elem1 + tail_bt;
3602 	      bv = t1 - tail_r_elem1;
3603 	      t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3604 
3605 	      s2 += t1;
3606 
3607 	      /* Renormalize (s1, s2)  to  (t1, s2) */
3608 	      t1 = s1 + s2;
3609 	      s2 = s2 - (t1 - s1);
3610 
3611 	      t2 += s2;
3612 
3613 	      /* Renormalize (t1, t2)  */
3614 	      head_r_elem = t1 + t2;
3615 	      tail_r_elem = t2 - (head_r_elem - t1);
3616 	    }
3617 	  }
3618 	  tail_r_true[cij] = tail_r_elem;
3619 	  head_r_true[cij] = head_r_elem;
3620 	}
3621 
3622       }
3623     }
3624 
3625     blas_free(a1);
3626     blas_free(a2);
3627     blas_free(c1);
3628     blas_free(c2);
3629     blas_free(b0);
3630     blas_free(head_r1_true);
3631     blas_free(tail_r1_true);
3632     blas_free(head_r2_true);
3633     blas_free(tail_r2_true);
3634 
3635   } else {
3636     /* random matrix */
3637     float a_elem[2];
3638     float b_elem;
3639     float c_elem[2];
3640     double head_r_true_elem[2], tail_r_true_elem[2];
3641 
3642     /* Since mixed real/complex test generator for dot
3643        scales the vectors, we need to used the non-mixed
3644        version if B is real (since A is always complex). */
3645     float *bb_vec;
3646     bb_vec = (float *) blas_malloc(m_i * sizeof(float) * 2);
3647     if (m_i > 0 && bb_vec == NULL) {
3648       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3649     }
3650 
3651     if (alpha_flag == 0) {
3652       alpha_i[0] = xrand(seed);
3653       alpha_i[1] = xrand(seed);
3654     }
3655     if (beta_flag == 0) {
3656       beta_i[0] = xrand(seed);
3657       beta_i[1] = xrand(seed);
3658     }
3659 
3660     /* Fill in matrix A -- Hermitian. */
3661     for (i = 0, ai = 0; i < m_i; i++, ai += incai) {
3662       for (j = 0, aij = ai; j < m_i; j++, aij += incaij) {
3663 	a_elem[0] = xrand(seed);
3664 	a_elem[1] = xrand(seed);
3665 	a_i[aij] = a_elem[0];
3666 	a_i[aij + 1] = a_elem[1];
3667 	if (i == j)
3668 	  a_i[aij + 1] = 0.0;
3669       }
3670     }
3671 
3672     /* Fill in matrix B */
3673     for (i = 0, bi = 0; i < m_i; i++, bi += incbi) {
3674       for (j = 0, bij = bi; j < n_i; j++, bij += incbij) {
3675 	b_elem = xrand(seed);
3676 	b_i[bij] = b_elem;
3677       }
3678     }
3679 
3680     for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3681       che_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3682       for (j = 0, cij = ci; j < n_i; j++, cij += inccij) {
3683 
3684 	if (side == blas_left_side)
3685 	  sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3686 	else
3687 	  sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, j);
3688 
3689 	/* copy the real b_vec into complex bb_vec, so that
3690 	   pure complex test case generator can be called. */
3691 
3692 	{
3693 	  int k;
3694 	  for (k = 0; k < m_i; k++) {
3695 	    bb_vec[2 * k] = b_vec[k];
3696 	    bb_vec[2 * k + 1] = 0.0;
3697 	  }
3698 	}
3699 	BLAS_cdot_testgen(m_i, m_i, 0, norm, blas_no_conj,
3700 			  alpha, 1, beta, 1, a_vec, bb_vec, seed,
3701 			  c_elem, head_r_true_elem, tail_r_true_elem);
3702 
3703 	c_i[cij] = c_elem[0];
3704 	c_i[cij + 1] = c_elem[1];
3705 	head_r_true[cij] = head_r_true_elem[0];
3706 	head_r_true[cij + 1] = head_r_true_elem[1];
3707 	tail_r_true[cij] = tail_r_true_elem[0];
3708 	tail_r_true[cij + 1] = tail_r_true_elem[1];
3709       }
3710     }
3711 
3712     blas_free(bb_vec);
3713 
3714   }
3715 
3716   blas_free(a_vec);
3717   blas_free(b_vec);
3718 }
3719 
BLAS_sskew_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,float * alpha,int alpha_flag,float * beta,int beta_flag,float * a,int lda,float * b,int ldb,float * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3720 void BLAS_sskew_testgen(int norm, enum blas_order_type order,
3721 			enum blas_uplo_type uplo, enum blas_side_type side,
3722 			int m, int n, float *alpha, int alpha_flag,
3723 			float *beta, int beta_flag, float *a, int lda,
3724 			float *b, int ldb, float *c, int ldc, int *seed,
3725 			double *head_r_true, double *tail_r_true)
3726 {
3727 
3728   int i, j;
3729   int cij, ci;
3730   int inccij, incci;
3731   int inca, incb;
3732   int m_i, n_i;
3733 
3734   float c_elem;
3735   double head_r_true_elem, tail_r_true_elem;
3736 
3737   float *a_vec;
3738   float *b_vec;
3739 
3740   float *c_i = c;
3741 
3742   if (side == blas_left_side) {
3743     m_i = m;
3744     n_i = n;
3745   } else {
3746     m_i = n;
3747     n_i = m;
3748   }
3749 
3750   inca = incb = 1;
3751 
3752 
3753   a_vec = (float *) blas_malloc(m_i * sizeof(float));
3754   if (m_i > 0 && a_vec == NULL) {
3755     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3756   }
3757   for (i = 0; i < m_i * inca; i += inca) {
3758     a_vec[i] = 0.0;
3759   }
3760   b_vec = (float *) blas_malloc(m_i * sizeof(float));
3761   if (m_i > 0 && b_vec == NULL) {
3762     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3763   }
3764   for (i = 0; i < m_i * incb; i += incb) {
3765     b_vec[i] = 0.0;
3766   }
3767 
3768   if ((order == blas_colmajor && side == blas_left_side) ||
3769       (order == blas_rowmajor && side == blas_right_side)) {
3770     incci = 1;
3771     inccij = ldc;
3772   } else {
3773     incci = ldc;
3774     inccij = 1;
3775   }
3776 
3777 
3778 
3779 
3780   if (side == blas_left_side)
3781     sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3782   else
3783     sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3784 
3785   /* Fill in matrix A */
3786   cij = 0;
3787   for (i = 0; i < m_i; i++, cij += incci) {
3788     sskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3789     BLAS_sdot_testgen(m_i, i, m_i - i, norm,
3790 		      blas_no_conj, alpha, 1,
3791 		      beta, 1, b_vec, a_vec, seed,
3792 		      &c_elem, &head_r_true_elem, &tail_r_true_elem);
3793 
3794     sskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
3795 
3796     c_i[cij] = c_elem;
3797     head_r_true[cij] = head_r_true_elem;
3798     tail_r_true[cij] = tail_r_true_elem;
3799   }
3800 
3801   /* Now fill in c and r_true */
3802 
3803   for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3804     for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3805       c_i[cij] = c_i[ci];
3806       head_r_true[cij] = head_r_true[ci];
3807       tail_r_true[cij] = tail_r_true[ci];
3808     }
3809   }
3810 
3811   blas_free(a_vec);
3812   blas_free(b_vec);
3813 }
BLAS_dskew_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,double * a,int lda,double * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3814 void BLAS_dskew_testgen(int norm, enum blas_order_type order,
3815 			enum blas_uplo_type uplo, enum blas_side_type side,
3816 			int m, int n, double *alpha, int alpha_flag,
3817 			double *beta, int beta_flag, double *a, int lda,
3818 			double *b, int ldb, double *c, int ldc, int *seed,
3819 			double *head_r_true, double *tail_r_true)
3820 {
3821 
3822   int i, j;
3823   int cij, ci;
3824   int inccij, incci;
3825   int inca, incb;
3826   int m_i, n_i;
3827 
3828   double c_elem;
3829   double head_r_true_elem, tail_r_true_elem;
3830 
3831   double *a_vec;
3832   double *b_vec;
3833 
3834   double *c_i = c;
3835 
3836   if (side == blas_left_side) {
3837     m_i = m;
3838     n_i = n;
3839   } else {
3840     m_i = n;
3841     n_i = m;
3842   }
3843 
3844   inca = incb = 1;
3845 
3846 
3847   a_vec = (double *) blas_malloc(m_i * sizeof(double));
3848   if (m_i > 0 && a_vec == NULL) {
3849     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3850   }
3851   for (i = 0; i < m_i * inca; i += inca) {
3852     a_vec[i] = 0.0;
3853   }
3854   b_vec = (double *) blas_malloc(m_i * sizeof(double));
3855   if (m_i > 0 && b_vec == NULL) {
3856     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3857   }
3858   for (i = 0; i < m_i * incb; i += incb) {
3859     b_vec[i] = 0.0;
3860   }
3861 
3862   if ((order == blas_colmajor && side == blas_left_side) ||
3863       (order == blas_rowmajor && side == blas_right_side)) {
3864     incci = 1;
3865     inccij = ldc;
3866   } else {
3867     incci = ldc;
3868     inccij = 1;
3869   }
3870 
3871 
3872 
3873 
3874   if (side == blas_left_side)
3875     dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3876   else
3877     dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3878 
3879   /* Fill in matrix A */
3880   cij = 0;
3881   for (i = 0; i < m_i; i++, cij += incci) {
3882     dskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3883     BLAS_ddot_testgen(m_i, i, m_i - i, norm,
3884 		      blas_no_conj, alpha, 1,
3885 		      beta, 1, b_vec, a_vec, seed,
3886 		      &c_elem, &head_r_true_elem, &tail_r_true_elem);
3887 
3888     dskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
3889 
3890     c_i[cij] = c_elem;
3891     head_r_true[cij] = head_r_true_elem;
3892     tail_r_true[cij] = tail_r_true_elem;
3893   }
3894 
3895   /* Now fill in c and r_true */
3896 
3897   for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3898     for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3899       c_i[cij] = c_i[ci];
3900       head_r_true[cij] = head_r_true[ci];
3901       tail_r_true[cij] = tail_r_true[ci];
3902     }
3903   }
3904 
3905   blas_free(a_vec);
3906   blas_free(b_vec);
3907 }
BLAS_dskew_d_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,double * a,int lda,float * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)3908 void BLAS_dskew_d_s_testgen(int norm, enum blas_order_type order,
3909 			    enum blas_uplo_type uplo,
3910 			    enum blas_side_type side, int m, int n,
3911 			    double *alpha, int alpha_flag, double *beta,
3912 			    int beta_flag, double *a, int lda, float *b,
3913 			    int ldb, double *c, int ldc, int *seed,
3914 			    double *head_r_true, double *tail_r_true)
3915 {
3916 
3917   int i, j;
3918   int cij, ci;
3919   int inccij, incci;
3920   int inca, incb;
3921   int m_i, n_i;
3922 
3923   double c_elem;
3924   double head_r_true_elem, tail_r_true_elem;
3925 
3926   double *a_vec;
3927   float *b_vec;
3928 
3929   double *c_i = c;
3930 
3931   if (side == blas_left_side) {
3932     m_i = m;
3933     n_i = n;
3934   } else {
3935     m_i = n;
3936     n_i = m;
3937   }
3938 
3939   inca = incb = 1;
3940 
3941 
3942   a_vec = (double *) blas_malloc(m_i * sizeof(double));
3943   if (m_i > 0 && a_vec == NULL) {
3944     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3945   }
3946   for (i = 0; i < m_i * inca; i += inca) {
3947     a_vec[i] = 0.0;
3948   }
3949   b_vec = (float *) blas_malloc(m_i * sizeof(float));
3950   if (m_i > 0 && b_vec == NULL) {
3951     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3952   }
3953   for (i = 0; i < m_i * incb; i += incb) {
3954     b_vec[i] = 0.0;
3955   }
3956 
3957   if ((order == blas_colmajor && side == blas_left_side) ||
3958       (order == blas_rowmajor && side == blas_right_side)) {
3959     incci = 1;
3960     inccij = ldc;
3961   } else {
3962     incci = ldc;
3963     inccij = 1;
3964   }
3965 
3966 
3967 
3968 
3969   if (side == blas_left_side)
3970     sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3971   else
3972     sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
3973 
3974   /* Fill in matrix A */
3975   cij = 0;
3976   for (i = 0; i < m_i; i++, cij += incci) {
3977     dskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
3978     BLAS_ddot_s_d_testgen(m_i, i, m_i - i, norm,
3979 			  blas_no_conj, alpha, 1,
3980 			  beta, 1, b_vec, a_vec, seed,
3981 			  &c_elem, &head_r_true_elem, &tail_r_true_elem);
3982 
3983     dskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
3984 
3985     c_i[cij] = c_elem;
3986     head_r_true[cij] = head_r_true_elem;
3987     tail_r_true[cij] = tail_r_true_elem;
3988   }
3989 
3990   /* Now fill in c and r_true */
3991 
3992   for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
3993     for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
3994       c_i[cij] = c_i[ci];
3995       head_r_true[cij] = head_r_true[ci];
3996       tail_r_true[cij] = tail_r_true[ci];
3997     }
3998   }
3999 
4000   blas_free(a_vec);
4001   blas_free(b_vec);
4002 }
BLAS_dskew_s_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,float * a,int lda,double * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)4003 void BLAS_dskew_s_d_testgen(int norm, enum blas_order_type order,
4004 			    enum blas_uplo_type uplo,
4005 			    enum blas_side_type side, int m, int n,
4006 			    double *alpha, int alpha_flag, double *beta,
4007 			    int beta_flag, float *a, int lda, double *b,
4008 			    int ldb, double *c, int ldc, int *seed,
4009 			    double *head_r_true, double *tail_r_true)
4010 {
4011 
4012   int i, j;
4013   int cij, ci;
4014   int inccij, incci;
4015   int inca, incb;
4016   int m_i, n_i;
4017 
4018   double c_elem;
4019   double head_r_true_elem, tail_r_true_elem;
4020 
4021   float *a_vec;
4022   double *b_vec;
4023 
4024   double *c_i = c;
4025 
4026   if (side == blas_left_side) {
4027     m_i = m;
4028     n_i = n;
4029   } else {
4030     m_i = n;
4031     n_i = m;
4032   }
4033 
4034   inca = incb = 1;
4035 
4036 
4037   a_vec = (float *) blas_malloc(m_i * sizeof(float));
4038   if (m_i > 0 && a_vec == NULL) {
4039     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4040   }
4041   for (i = 0; i < m_i * inca; i += inca) {
4042     a_vec[i] = 0.0;
4043   }
4044   b_vec = (double *) blas_malloc(m_i * sizeof(double));
4045   if (m_i > 0 && b_vec == NULL) {
4046     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4047   }
4048   for (i = 0; i < m_i * incb; i += incb) {
4049     b_vec[i] = 0.0;
4050   }
4051 
4052   if ((order == blas_colmajor && side == blas_left_side) ||
4053       (order == blas_rowmajor && side == blas_right_side)) {
4054     incci = 1;
4055     inccij = ldc;
4056   } else {
4057     incci = ldc;
4058     inccij = 1;
4059   }
4060 
4061 
4062 
4063 
4064   if (side == blas_left_side)
4065     dge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4066   else
4067     dge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4068 
4069   /* Fill in matrix A */
4070   cij = 0;
4071   for (i = 0; i < m_i; i++, cij += incci) {
4072     sskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
4073     BLAS_ddot_d_s_testgen(m_i, i, m_i - i, norm,
4074 			  blas_no_conj, alpha, 1,
4075 			  beta, 1, b_vec, a_vec, seed,
4076 			  &c_elem, &head_r_true_elem, &tail_r_true_elem);
4077 
4078     sskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
4079 
4080     c_i[cij] = c_elem;
4081     head_r_true[cij] = head_r_true_elem;
4082     tail_r_true[cij] = tail_r_true_elem;
4083   }
4084 
4085   /* Now fill in c and r_true */
4086 
4087   for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4088     for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4089       c_i[cij] = c_i[ci];
4090       head_r_true[cij] = head_r_true[ci];
4091       tail_r_true[cij] = tail_r_true[ci];
4092     }
4093   }
4094 
4095   blas_free(a_vec);
4096   blas_free(b_vec);
4097 }
BLAS_dskew_s_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_side_type side,int m,int n,double * alpha,int alpha_flag,double * beta,int beta_flag,float * a,int lda,float * b,int ldb,double * c,int ldc,int * seed,double * head_r_true,double * tail_r_true)4098 void BLAS_dskew_s_s_testgen(int norm, enum blas_order_type order,
4099 			    enum blas_uplo_type uplo,
4100 			    enum blas_side_type side, int m, int n,
4101 			    double *alpha, int alpha_flag, double *beta,
4102 			    int beta_flag, float *a, int lda, float *b,
4103 			    int ldb, double *c, int ldc, int *seed,
4104 			    double *head_r_true, double *tail_r_true)
4105 {
4106 
4107   int i, j;
4108   int cij, ci;
4109   int inccij, incci;
4110   int inca, incb;
4111   int m_i, n_i;
4112 
4113   double c_elem;
4114   double head_r_true_elem, tail_r_true_elem;
4115 
4116   float *a_vec;
4117   float *b_vec;
4118 
4119   double *c_i = c;
4120 
4121   if (side == blas_left_side) {
4122     m_i = m;
4123     n_i = n;
4124   } else {
4125     m_i = n;
4126     n_i = m;
4127   }
4128 
4129   inca = incb = 1;
4130 
4131 
4132   a_vec = (float *) blas_malloc(m_i * sizeof(float));
4133   if (m_i > 0 && a_vec == NULL) {
4134     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4135   }
4136   for (i = 0; i < m_i * inca; i += inca) {
4137     a_vec[i] = 0.0;
4138   }
4139   b_vec = (float *) blas_malloc(m_i * sizeof(float));
4140   if (m_i > 0 && b_vec == NULL) {
4141     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4142   }
4143   for (i = 0; i < m_i * incb; i += incb) {
4144     b_vec[i] = 0.0;
4145   }
4146 
4147   if ((order == blas_colmajor && side == blas_left_side) ||
4148       (order == blas_rowmajor && side == blas_right_side)) {
4149     incci = 1;
4150     inccij = ldc;
4151   } else {
4152     incci = ldc;
4153     inccij = 1;
4154   }
4155 
4156 
4157 
4158 
4159   if (side == blas_left_side)
4160     sge_copy_col(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4161   else
4162     sge_copy_row(order, blas_no_trans, m, n, b, ldb, b_vec, 0);
4163 
4164   /* Fill in matrix A */
4165   cij = 0;
4166   for (i = 0; i < m_i; i++, cij += incci) {
4167     sskew_copy_row(order, uplo, side, m_i, a, lda, a_vec, i);
4168     BLAS_ddot_s_s_testgen(m_i, i, m_i - i, norm,
4169 			  blas_no_conj, alpha, 1,
4170 			  beta, 1, b_vec, a_vec, seed,
4171 			  &c_elem, &head_r_true_elem, &tail_r_true_elem);
4172 
4173     sskew_commit_row(order, uplo, side, m_i, a, lda, a_vec, i);
4174 
4175     c_i[cij] = c_elem;
4176     head_r_true[cij] = head_r_true_elem;
4177     tail_r_true[cij] = tail_r_true_elem;
4178   }
4179 
4180   /* Now fill in c and r_true */
4181 
4182   for (i = 0, ci = 0; i < m_i; i++, ci += incci) {
4183     for (j = 1, cij = ci + inccij; j < n_i; j++, cij += inccij) {
4184       c_i[cij] = c_i[ci];
4185       head_r_true[cij] = head_r_true[ci];
4186       tail_r_true[cij] = tail_r_true[ci];
4187     }
4188   }
4189 
4190   blas_free(a_vec);
4191   blas_free(b_vec);
4192 }
4193