1 
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7 
BLAS_sskmv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,float * alpha,float * beta,float * a,int lda,float * x_head,float * x_tail,float * y,int * seed,double * head_r_true,double * tail_r_true)8 void BLAS_sskmv2_testgen(int norm, enum blas_order_type order,
9 			 enum blas_uplo_type uplo, int n, float *alpha,
10 			 float *beta, float *a, int lda, float *x_head,
11 			 float *x_tail, float *y, int *seed,
12 			 double *head_r_true, double *tail_r_true)
13 
14 /*
15  * Purpose
16  * =======
17  *
18  *   Generates a skew Matrix for use with hemv2 testing
19  *
20  * Arguments
21  * =========
22  *
23  * norm    (input) int
24  *           = -1: the vectors are scaled with norms near underflow.
25  *           =  0: the vectors have norms of order 1.
26  *           =  1: the vectors are scaled with norms near overflow.
27  *
28  * order   (input) enum blas_order_type
29  *           storage format of the matrices
30  *
31  * uplo    (input) enum blas_uplo_type
32  *           which half of the skew symmetric matrix a is to be stored.
33  *
34  * n       (input) int
35  *           sizes of symmetrical matrix a, size of vectors x, y:
36  *              matrix a is n-by-n.
37  *
38  * alpha   (input/output) float*
39  *
40  * beta    (input) float*
41  *
42  * a       (input/output) float*
43  *
44  * lda     (input) lda
45  *         leading dimension of matrix A.
46  *
47  * x_head  (input) float*
48  * x_tail  (input) float*
49  *         note : x is input only. x should be determined before calling
50  *         this function.
51  *
52  * y       (input/output) float*
53  *         generated vector y.
54  *
55  * seed    (input/output) int *
56  *         seed for the random number generator.
57  *
58  * head_r_true  (output) double *
59  *         the leading part of the truth in double-double.
60  *
61  * tail_r_true  (output) double *
62  *         the trailing part of the truth in double-double
63  *
64  */
65 {
66 
67   int i, yi, ri;
68   int incx = 1, incy = 1;
69   int incyi, incri;
70   int incx_veci, yi0;
71   int inca_vec;
72 
73   float y_elem;
74   double head_r_true_elem, tail_r_true_elem;
75 
76   float *a_vec;
77   float *x_head_vec;
78   float *x_tail_vec;
79 
80   float *y_i = y;
81   float *alpha_i = alpha;
82   float *beta_i = beta;
83   float *x_head_i = x_head;
84   float *x_tail_i = x_tail;
85 
86   /*a_vec must have stride of 1 */
87   inca_vec = 1;
88 
89 
90   a_vec = (float *) blas_malloc(n * sizeof(float));
91   if (n > 0 && a_vec == NULL) {
92     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
93   }
94   for (i = 0; i < n; i += inca_vec) {
95     a_vec[i] = 0.0;
96   }
97   x_head_vec = (float *) blas_malloc(n * sizeof(float));
98   if (n > 0 && x_head_vec == NULL) {
99     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
100   }
101   x_tail_vec = (float *) blas_malloc(n * sizeof(float));
102   if (n > 0 && x_tail_vec == NULL) {
103     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
104   }
105   scopy_vector(x_head_i, n, incx, x_head_vec, 1);
106   scopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
107 
108   incyi = incy;
109 
110   yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
111 
112   incri = 1;
113 
114 
115   incx_veci = 1;
116 
117 
118   /* Fill in skew matrix A */
119   for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
120     /* x_i has already been copied to x_vec */
121     sskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
122 
123     /* skew matricies have zeroed diagonals */
124     a_vec[i] = 0.0;
125     BLAS_sdot2_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i, 1,
126 		       beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
127 		       &y_elem, &head_r_true_elem, &tail_r_true_elem);
128 
129     sskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
130 
131     /*commits an element to the generated y */
132     y_i[yi] = y_elem;
133     head_r_true[ri] = head_r_true_elem;
134     tail_r_true[ri] = tail_r_true_elem;
135   }
136 
137   blas_free(a_vec);
138   blas_free(x_head_vec);
139   blas_free(x_tail_vec);
140 }
BLAS_dskmv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,double * a,int lda,double * x_head,double * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)141 void BLAS_dskmv2_testgen(int norm, enum blas_order_type order,
142 			 enum blas_uplo_type uplo, int n, double *alpha,
143 			 double *beta, double *a, int lda, double *x_head,
144 			 double *x_tail, double *y, int *seed,
145 			 double *head_r_true, double *tail_r_true)
146 
147 /*
148  * Purpose
149  * =======
150  *
151  *   Generates a skew Matrix for use with hemv2 testing
152  *
153  * Arguments
154  * =========
155  *
156  * norm    (input) int
157  *           = -1: the vectors are scaled with norms near underflow.
158  *           =  0: the vectors have norms of order 1.
159  *           =  1: the vectors are scaled with norms near overflow.
160  *
161  * order   (input) enum blas_order_type
162  *           storage format of the matrices
163  *
164  * uplo    (input) enum blas_uplo_type
165  *           which half of the skew symmetric matrix a is to be stored.
166  *
167  * n       (input) int
168  *           sizes of symmetrical matrix a, size of vectors x, y:
169  *              matrix a is n-by-n.
170  *
171  * alpha   (input/output) double*
172  *
173  * beta    (input) double*
174  *
175  * a       (input/output) double*
176  *
177  * lda     (input) lda
178  *         leading dimension of matrix A.
179  *
180  * x_head  (input) double*
181  * x_tail  (input) double*
182  *         note : x is input only. x should be determined before calling
183  *         this function.
184  *
185  * y       (input/output) double*
186  *         generated vector y.
187  *
188  * seed    (input/output) int *
189  *         seed for the random number generator.
190  *
191  * head_r_true  (output) double *
192  *         the leading part of the truth in double-double.
193  *
194  * tail_r_true  (output) double *
195  *         the trailing part of the truth in double-double
196  *
197  */
198 {
199 
200   int i, yi, ri;
201   int incx = 1, incy = 1;
202   int incyi, incri;
203   int incx_veci, yi0;
204   int inca_vec;
205 
206   double y_elem;
207   double head_r_true_elem, tail_r_true_elem;
208 
209   double *a_vec;
210   double *x_head_vec;
211   double *x_tail_vec;
212 
213   double *y_i = y;
214   double *alpha_i = alpha;
215   double *beta_i = beta;
216   double *x_head_i = x_head;
217   double *x_tail_i = x_tail;
218 
219   /*a_vec must have stride of 1 */
220   inca_vec = 1;
221 
222 
223   a_vec = (double *) blas_malloc(n * sizeof(double));
224   if (n > 0 && a_vec == NULL) {
225     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
226   }
227   for (i = 0; i < n; i += inca_vec) {
228     a_vec[i] = 0.0;
229   }
230   x_head_vec = (double *) blas_malloc(n * sizeof(double));
231   if (n > 0 && x_head_vec == NULL) {
232     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
233   }
234   x_tail_vec = (double *) blas_malloc(n * sizeof(double));
235   if (n > 0 && x_tail_vec == NULL) {
236     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
237   }
238   dcopy_vector(x_head_i, n, incx, x_head_vec, 1);
239   dcopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
240 
241   incyi = incy;
242 
243   yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
244 
245   incri = 1;
246 
247 
248   incx_veci = 1;
249 
250 
251   /* Fill in skew matrix A */
252   for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
253     /* x_i has already been copied to x_vec */
254     dskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
255 
256     /* skew matricies have zeroed diagonals */
257     a_vec[i] = 0.0;
258     BLAS_ddot2_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i, 1,
259 		       beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
260 		       &y_elem, &head_r_true_elem, &tail_r_true_elem);
261 
262     dskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
263 
264     /*commits an element to the generated y */
265     y_i[yi] = y_elem;
266     head_r_true[ri] = head_r_true_elem;
267     tail_r_true[ri] = tail_r_true_elem;
268   }
269 
270   blas_free(a_vec);
271   blas_free(x_head_vec);
272   blas_free(x_tail_vec);
273 }
BLAS_dskmv2_testgen_d_s(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,double * a,int lda,float * x_head,float * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)274 void BLAS_dskmv2_testgen_d_s(int norm, enum blas_order_type order,
275 			     enum blas_uplo_type uplo, int n, double *alpha,
276 			     double *beta, double *a, int lda, float *x_head,
277 			     float *x_tail, double *y, int *seed,
278 			     double *head_r_true, double *tail_r_true)
279 
280 /*
281  * Purpose
282  * =======
283  *
284  *   Generates a skew Matrix for use with hemv2 testing
285  *
286  * Arguments
287  * =========
288  *
289  * norm    (input) int
290  *           = -1: the vectors are scaled with norms near underflow.
291  *           =  0: the vectors have norms of order 1.
292  *           =  1: the vectors are scaled with norms near overflow.
293  *
294  * order   (input) enum blas_order_type
295  *           storage format of the matrices
296  *
297  * uplo    (input) enum blas_uplo_type
298  *           which half of the skew symmetric matrix a is to be stored.
299  *
300  * n       (input) int
301  *           sizes of symmetrical matrix a, size of vectors x, y:
302  *              matrix a is n-by-n.
303  *
304  * alpha   (input/output) double*
305  *
306  * beta    (input) double*
307  *
308  * a       (input/output) double*
309  *
310  * lda     (input) lda
311  *         leading dimension of matrix A.
312  *
313  * x_head  (input) float*
314  * x_tail  (input) float*
315  *         note : x is input only. x should be determined before calling
316  *         this function.
317  *
318  * y       (input/output) double*
319  *         generated vector y.
320  *
321  * seed    (input/output) int *
322  *         seed for the random number generator.
323  *
324  * head_r_true  (output) double *
325  *         the leading part of the truth in double-double.
326  *
327  * tail_r_true  (output) double *
328  *         the trailing part of the truth in double-double
329  *
330  */
331 {
332 
333   int i, yi, ri;
334   int incx = 1, incy = 1;
335   int incyi, incri;
336   int incx_veci, yi0;
337   int inca_vec;
338 
339   double y_elem;
340   double head_r_true_elem, tail_r_true_elem;
341 
342   double *a_vec;
343   float *x_head_vec;
344   float *x_tail_vec;
345 
346   double *y_i = y;
347   double *alpha_i = alpha;
348   double *beta_i = beta;
349   float *x_head_i = x_head;
350   float *x_tail_i = x_tail;
351 
352   /*a_vec must have stride of 1 */
353   inca_vec = 1;
354 
355 
356   a_vec = (double *) blas_malloc(n * sizeof(double));
357   if (n > 0 && a_vec == NULL) {
358     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
359   }
360   for (i = 0; i < n; i += inca_vec) {
361     a_vec[i] = 0.0;
362   }
363   x_head_vec = (float *) blas_malloc(n * sizeof(float));
364   if (n > 0 && x_head_vec == NULL) {
365     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
366   }
367   x_tail_vec = (float *) blas_malloc(n * sizeof(float));
368   if (n > 0 && x_tail_vec == NULL) {
369     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
370   }
371   scopy_vector(x_head_i, n, incx, x_head_vec, 1);
372   scopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
373 
374   incyi = incy;
375 
376   yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
377 
378   incri = 1;
379 
380 
381   incx_veci = 1;
382 
383 
384   /* Fill in skew matrix A */
385   for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
386     /* x_i has already been copied to x_vec */
387     dskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
388 
389     /* skew matricies have zeroed diagonals */
390     a_vec[i] = 0.0;
391     BLAS_ddot2_s_d_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i,
392 			   1, beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
393 			   &y_elem, &head_r_true_elem, &tail_r_true_elem);
394 
395     dskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
396 
397     /*commits an element to the generated y */
398     y_i[yi] = y_elem;
399     head_r_true[ri] = head_r_true_elem;
400     tail_r_true[ri] = tail_r_true_elem;
401   }
402 
403   blas_free(a_vec);
404   blas_free(x_head_vec);
405   blas_free(x_tail_vec);
406 }
BLAS_dskmv2_testgen_s_d(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,float * a,int lda,double * x_head,double * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)407 void BLAS_dskmv2_testgen_s_d(int norm, enum blas_order_type order,
408 			     enum blas_uplo_type uplo, int n, double *alpha,
409 			     double *beta, float *a, int lda, double *x_head,
410 			     double *x_tail, double *y, int *seed,
411 			     double *head_r_true, double *tail_r_true)
412 
413 /*
414  * Purpose
415  * =======
416  *
417  *   Generates a skew Matrix for use with hemv2 testing
418  *
419  * Arguments
420  * =========
421  *
422  * norm    (input) int
423  *           = -1: the vectors are scaled with norms near underflow.
424  *           =  0: the vectors have norms of order 1.
425  *           =  1: the vectors are scaled with norms near overflow.
426  *
427  * order   (input) enum blas_order_type
428  *           storage format of the matrices
429  *
430  * uplo    (input) enum blas_uplo_type
431  *           which half of the skew symmetric matrix a is to be stored.
432  *
433  * n       (input) int
434  *           sizes of symmetrical matrix a, size of vectors x, y:
435  *              matrix a is n-by-n.
436  *
437  * alpha   (input/output) double*
438  *
439  * beta    (input) double*
440  *
441  * a       (input/output) float*
442  *
443  * lda     (input) lda
444  *         leading dimension of matrix A.
445  *
446  * x_head  (input) double*
447  * x_tail  (input) double*
448  *         note : x is input only. x should be determined before calling
449  *         this function.
450  *
451  * y       (input/output) double*
452  *         generated vector y.
453  *
454  * seed    (input/output) int *
455  *         seed for the random number generator.
456  *
457  * head_r_true  (output) double *
458  *         the leading part of the truth in double-double.
459  *
460  * tail_r_true  (output) double *
461  *         the trailing part of the truth in double-double
462  *
463  */
464 {
465 
466   int i, yi, ri;
467   int incx = 1, incy = 1;
468   int incyi, incri;
469   int incx_veci, yi0;
470   int inca_vec;
471 
472   double y_elem;
473   double head_r_true_elem, tail_r_true_elem;
474 
475   float *a_vec;
476   double *x_head_vec;
477   double *x_tail_vec;
478 
479   double *y_i = y;
480   double *alpha_i = alpha;
481   double *beta_i = beta;
482   double *x_head_i = x_head;
483   double *x_tail_i = x_tail;
484 
485   /*a_vec must have stride of 1 */
486   inca_vec = 1;
487 
488 
489   a_vec = (float *) blas_malloc(n * sizeof(float));
490   if (n > 0 && a_vec == NULL) {
491     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
492   }
493   for (i = 0; i < n; i += inca_vec) {
494     a_vec[i] = 0.0;
495   }
496   x_head_vec = (double *) blas_malloc(n * sizeof(double));
497   if (n > 0 && x_head_vec == NULL) {
498     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
499   }
500   x_tail_vec = (double *) blas_malloc(n * sizeof(double));
501   if (n > 0 && x_tail_vec == NULL) {
502     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
503   }
504   dcopy_vector(x_head_i, n, incx, x_head_vec, 1);
505   dcopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
506 
507   incyi = incy;
508 
509   yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
510 
511   incri = 1;
512 
513 
514   incx_veci = 1;
515 
516 
517   /* Fill in skew matrix A */
518   for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
519     /* x_i has already been copied to x_vec */
520     sskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
521 
522     /* skew matricies have zeroed diagonals */
523     a_vec[i] = 0.0;
524     BLAS_ddot2_d_s_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i,
525 			   1, beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
526 			   &y_elem, &head_r_true_elem, &tail_r_true_elem);
527 
528     sskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
529 
530     /*commits an element to the generated y */
531     y_i[yi] = y_elem;
532     head_r_true[ri] = head_r_true_elem;
533     tail_r_true[ri] = tail_r_true_elem;
534   }
535 
536   blas_free(a_vec);
537   blas_free(x_head_vec);
538   blas_free(x_tail_vec);
539 }
BLAS_dskmv2_testgen_s_s(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,float * a,int lda,float * x_head,float * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)540 void BLAS_dskmv2_testgen_s_s(int norm, enum blas_order_type order,
541 			     enum blas_uplo_type uplo, int n, double *alpha,
542 			     double *beta, float *a, int lda, float *x_head,
543 			     float *x_tail, double *y, int *seed,
544 			     double *head_r_true, double *tail_r_true)
545 
546 /*
547  * Purpose
548  * =======
549  *
550  *   Generates a skew Matrix for use with hemv2 testing
551  *
552  * Arguments
553  * =========
554  *
555  * norm    (input) int
556  *           = -1: the vectors are scaled with norms near underflow.
557  *           =  0: the vectors have norms of order 1.
558  *           =  1: the vectors are scaled with norms near overflow.
559  *
560  * order   (input) enum blas_order_type
561  *           storage format of the matrices
562  *
563  * uplo    (input) enum blas_uplo_type
564  *           which half of the skew symmetric matrix a is to be stored.
565  *
566  * n       (input) int
567  *           sizes of symmetrical matrix a, size of vectors x, y:
568  *              matrix a is n-by-n.
569  *
570  * alpha   (input/output) double*
571  *
572  * beta    (input) double*
573  *
574  * a       (input/output) float*
575  *
576  * lda     (input) lda
577  *         leading dimension of matrix A.
578  *
579  * x_head  (input) float*
580  * x_tail  (input) float*
581  *         note : x is input only. x should be determined before calling
582  *         this function.
583  *
584  * y       (input/output) double*
585  *         generated vector y.
586  *
587  * seed    (input/output) int *
588  *         seed for the random number generator.
589  *
590  * head_r_true  (output) double *
591  *         the leading part of the truth in double-double.
592  *
593  * tail_r_true  (output) double *
594  *         the trailing part of the truth in double-double
595  *
596  */
597 {
598 
599   int i, yi, ri;
600   int incx = 1, incy = 1;
601   int incyi, incri;
602   int incx_veci, yi0;
603   int inca_vec;
604 
605   double y_elem;
606   double head_r_true_elem, tail_r_true_elem;
607 
608   float *a_vec;
609   float *x_head_vec;
610   float *x_tail_vec;
611 
612   double *y_i = y;
613   double *alpha_i = alpha;
614   double *beta_i = beta;
615   float *x_head_i = x_head;
616   float *x_tail_i = x_tail;
617 
618   /*a_vec must have stride of 1 */
619   inca_vec = 1;
620 
621 
622   a_vec = (float *) blas_malloc(n * sizeof(float));
623   if (n > 0 && a_vec == NULL) {
624     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
625   }
626   for (i = 0; i < n; i += inca_vec) {
627     a_vec[i] = 0.0;
628   }
629   x_head_vec = (float *) blas_malloc(n * sizeof(float));
630   if (n > 0 && x_head_vec == NULL) {
631     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
632   }
633   x_tail_vec = (float *) blas_malloc(n * sizeof(float));
634   if (n > 0 && x_tail_vec == NULL) {
635     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
636   }
637   scopy_vector(x_head_i, n, incx, x_head_vec, 1);
638   scopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
639 
640   incyi = incy;
641 
642   yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
643 
644   incri = 1;
645 
646 
647   incx_veci = 1;
648 
649 
650   /* Fill in skew matrix A */
651   for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
652     /* x_i has already been copied to x_vec */
653     sskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
654 
655     /* skew matricies have zeroed diagonals */
656     a_vec[i] = 0.0;
657     BLAS_ddot2_s_s_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i,
658 			   1, beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
659 			   &y_elem, &head_r_true_elem, &tail_r_true_elem);
660 
661     sskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
662 
663     /*commits an element to the generated y */
664     y_i[yi] = y_elem;
665     head_r_true[ri] = head_r_true_elem;
666     tail_r_true[ri] = tail_r_true_elem;
667   }
668 
669   blas_free(a_vec);
670   blas_free(x_head_vec);
671   blas_free(x_tail_vec);
672 }
673 
674 
BLAS_chemv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)675 void BLAS_chemv2_testgen(int norm, enum blas_order_type order,
676 			 enum blas_uplo_type uplo, int n, void *alpha,
677 			 int alpha_flag, void *beta, int beta_flag, void *a,
678 			 int lda, void *x_head, void *x_tail, void *y,
679 			 int *seed, double *head_r_true, double *tail_r_true)
680 
681 /*
682  * Purpose
683  * =======
684  *
685  *   Generates the test inputs to BLAS_chemv2{_x}
686  *
687  * Arguments
688  * =========
689  *
690  * norm    (input) int
691  *           = -1: the vectors are scaled with norms near underflow.
692  *           =  0: the vectors have norms of order 1.
693  *           =  1: the vectors are scaled with norms near overflow.
694  *
695  * order   (input) enum blas_order_type
696  *           storage format of the matrices
697  *
698  * uplo    (input) enum blas_uplo_type
699  *           which half of the hemv2etric matrix a is to be stored.
700  *
701  * n       (input) int
702  *           sizes of symmetrical matrix a, size of vectors x, y:
703  *              matrix a is n-by-n.
704  *
705  * alpha   (input/output) void*
706  *           if alpha_flag = 1, alpha is input.
707  *           if alpha_flag = 0, alpha is output.
708  *
709  * alpha_flag (input) int
710  *           = 0: alpha is free, and is output.
711  *           = 1: alpha is fixed on input.
712  *
713  * beta    (input/output) void*
714  *           if beta_flag = 1, beta is input.
715  *           if beta_flag = 0, beta is output.
716  *
717  * beta_flag (input) int
718  *           = 0: beta is free, and is output.
719  *           = 1: beta is fixed on input.
720  *
721  * a       (input/output) void*
722  *
723  * lda     (input) lda
724  *         leading dimension of matrix A.
725  *
726  * x_head  (input/output) void*
727  * x_tail  (input/output) void*
728  *         vector x = (x_head + x_tail)
729  *
730  * y       (input/output) void*
731  *         generated vector y that will be used as an input to HEMV2.
732  *
733  * seed    (input/output) int *
734  *         seed for the random number generator.
735  *
736  * head_r_true  (output) double *
737  *         the leading part of the truth in double-double.
738  *
739  * tail_r_true  (output) double *
740  *         the trailing part of the truth in double-double
741  *
742  */
743 {
744   {
745 
746     /* Strategy:
747        r1 = alpha * A1 * x + beta * y1
748        r2 = alpha * A2 * x + beta * y2
749        where all the matrices and vectors are real.  Then let R = R1 + i R2,
750        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
751        and A2 is skew-symmetric.
752      */
753 
754     int i, j;
755     int yi;
756     int aij, ai;
757     int a1ij, a1i;
758     int xi;
759     int mi;
760     int incx = 1, incy = 1;
761     int incyi, xi0, yi0;
762     int incaij, incai;
763     int inca1ij, inca1i;
764     int incxi;
765     int inca_vec, incx_vec;
766     int ld;
767     int ab;
768     int ri, incri;
769 
770     float *a1;
771     float *a2;
772     float *y1;
773     float *y2;
774     float *x_head_0;
775     float *x_tail_0;
776 
777     double *head_r1_true, *tail_r1_true;
778     double *head_r2_true, *tail_r2_true;
779 
780     double head_r_elem1, tail_r_elem1;
781     double head_r_elem2, tail_r_elem2;
782     double head_r_elem, tail_r_elem;
783 
784     float *a_vec;
785     float *x_head_vec;
786     float *x_tail_vec;
787 
788     float *y_i = (float *) y;
789     float *alpha_i = (float *) alpha;
790     float *beta_i = (float *) beta;
791 
792     float *a_i = (float *) a;
793     float *x_head_i = (float *) x_head;
794     float *x_tail_i = (float *) x_tail;
795 
796     ld = n;
797     if (order == blas_colmajor) {
798       inca1i = incai = 1;
799       incyi = incy;
800       incaij = lda;
801       incxi = incx;
802       inca1ij = n;
803     } else {
804       incyi = incy;
805       incai = lda;
806       incxi = incx;
807       inca1i = n;
808       inca1ij = incaij = 1;
809     }
810     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
811     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
812     incri = 1;
813     incri *= 2;
814     xi0 *= 2;
815     yi0 *= 2;
816     incyi *= 2;
817     incai *= 2;
818     incaij *= 2;
819     incxi *= 2;
820 
821     inca_vec = incx_vec = 1;
822     inca_vec *= 2;
823     incx_vec *= 2;
824     a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
825     if (n > 0 && a_vec == NULL) {
826       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
827     }
828     x_head_vec = (float *) blas_malloc(n * sizeof(float) * 2);
829     if (n > 0 && x_head_vec == NULL) {
830       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
831     }
832     x_tail_vec = (float *) blas_malloc(n * sizeof(float) * 2);
833     if (n > 0 && x_tail_vec == NULL) {
834       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
835     }
836 
837     int incx0, incy1, incy2, incmi = 1;
838     a1 = (float *) blas_malloc(n * n * sizeof(float));
839     if (n * n > 0 && a1 == NULL) {
840       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
841     }
842     a2 = (float *) blas_malloc(n * n * sizeof(float));
843     if (n * n > 0 && a2 == NULL) {
844       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
845     }
846     y1 = (float *) blas_malloc(n * sizeof(float));
847     if (n > 0 && y1 == NULL) {
848       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
849     }
850     incy1 = 1;
851 
852     y2 = (float *) blas_malloc(n * sizeof(float));
853     if (n > 0 && y2 == NULL) {
854       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
855     }
856     incy2 = 1;
857 
858     x_head_0 = (float *) blas_malloc(n * sizeof(float));
859     if (n > 0 && x_head_0 == NULL) {
860       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
861     }
862     x_tail_0 = (float *) blas_malloc(n * sizeof(float));
863     if (n > 0 && x_tail_0 == NULL) {
864       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
865     }
866     incx0 = 1;
867 
868     head_r1_true = (double *) blas_malloc(n * sizeof(double));
869     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
870     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
871       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
872     };
873     head_r2_true = (double *) blas_malloc(n * sizeof(double));
874     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
875     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
876       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
877     };
878 
879     /* First generate the real portion of A and x. */
880     BLAS_ssymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
881 			x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
882 			head_r1_true, tail_r1_true);
883 
884     /* x0 is now fixed and is input to this call */
885     BLAS_sskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
886 			x_head_0, x_tail_0, y2, seed, head_r2_true,
887 			tail_r2_true);
888 
889 
890     /* The case where x is a complex vector.  Since x is generated
891        as a real vector, we need to perform some scaling.
892 
893        There are four cases to consider, depending on the values
894        of alpha and beta.
895 
896        values                         scaling
897        alpha   beta      alpha  A    x       beta    y    r (truth)
898        0    1      1                    i               i    i
899        1    1      ?                   1+i      1+i         1+i
900        2    ?      1         1+i       1+i             2i    2i
901        3    ?      ?         1+i       1+i      2i           2i
902 
903        Note that we can afford to scale r by 1+i, since they are
904        computed in double-double precision.
905      */
906 
907     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
908       ab = 0;
909       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
910     } else if (alpha_i[0] == 1.0) {
911       ab = 1;
912       /* set alpha to 1, multiply beta by 1+i. */
913       alpha_i[1] = 0.0;
914       beta_i[1] = beta_i[0];
915     } else if (beta_i[0] == 1.0) {
916       ab = 2;
917       /* set beta to 1, multiply alpha by 1+i. */
918       beta_i[1] = 0.0;
919       alpha_i[1] = alpha_i[0];
920     } else {
921       ab = 3;
922       /* multiply alpha by 1+i, beta by 2i. */
923       alpha_i[1] = alpha_i[0];
924       beta_i[1] = 2.0 * beta_i[0];
925       beta_i[0] = 0.0;
926     }
927 
928     /* Now fill in a */
929     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
930       for (j = 0, aij = ai, a1ij = a1i; j < n;
931 	   j++, aij += incaij, a1ij += inca1ij) {
932 	a_i[aij] = a1[a1ij];
933 	a_i[aij + 1] = a2[a1ij];
934       }
935     }
936 
937     /* Fill in x */
938     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
939       if (ab == 0) {
940 	x_head_i[xi] = 0.0;
941 	x_head_i[xi + 1] = x_head_0[mi];
942 	x_tail_i[xi] = 0.0;
943 	x_tail_i[xi + 1] = x_tail_0[mi];
944       } else {
945 	x_head_i[xi] = x_head_0[mi];
946 	x_head_i[xi + 1] = x_head_0[mi];
947 	x_tail_i[xi] = x_tail_0[mi];
948 	x_tail_i[xi + 1] = x_tail_0[mi];
949       }
950     }
951 
952     /* Fill in y */
953     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
954       if (ab == 0) {
955 	y_i[yi] = -y2[mi];
956 	y_i[yi + 1] = y1[mi];
957       } else if (ab == 2) {
958 	y_i[yi] = -2.0 * y2[mi];
959 	y_i[yi + 1] = 2.0 * y1[mi];
960       } else {
961 	y_i[yi] = y1[mi];
962 	y_i[yi + 1] = y2[mi];
963       }
964     }
965 
966     /* Fill in the truth */
967     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
968 
969       head_r_elem1 = head_r1_true[mi];
970       tail_r_elem1 = tail_r1_true[mi];
971       head_r_elem2 = head_r2_true[mi];
972       tail_r_elem2 = tail_r2_true[mi];
973 
974       if (ab == 0) {
975 	head_r_true[ri] = -head_r_elem2;
976 	tail_r_true[ri] = -tail_r_elem2;
977 	head_r_true[ri + 1] = head_r_elem1;
978 	tail_r_true[ri + 1] = tail_r_elem1;
979       } else if (ab == 1) {
980 	{
981 	  /* Compute double-double = double-double + double-double. */
982 	  double bv;
983 	  double s1, s2, t1, t2;
984 
985 	  /* Add two hi words. */
986 	  s1 = head_r_elem1 + head_r_elem2;
987 	  bv = s1 - head_r_elem1;
988 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
989 
990 	  /* Add two lo words. */
991 	  t1 = tail_r_elem1 + tail_r_elem2;
992 	  bv = t1 - tail_r_elem1;
993 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
994 
995 	  s2 += t1;
996 
997 	  /* Renormalize (s1, s2)  to  (t1, s2) */
998 	  t1 = s1 + s2;
999 	  s2 = s2 - (t1 - s1);
1000 
1001 	  t2 += s2;
1002 
1003 	  /* Renormalize (t1, t2)  */
1004 	  head_r_elem = t1 + t2;
1005 	  tail_r_elem = t2 - (head_r_elem - t1);
1006 	}
1007 
1008 	/* Set the imaginary part to  R1 + R2 */
1009 	tail_r_true[ri + 1] = tail_r_elem;
1010 	head_r_true[ri + 1] = head_r_elem;
1011 
1012 	/* Set the real part to R1 - R2. */
1013 	{
1014 	  double head_bt, tail_bt;
1015 	  head_bt = -head_r_elem2;
1016 	  tail_bt = -tail_r_elem2;
1017 	  {
1018 	    /* Compute double-double = double-double + double-double. */
1019 	    double bv;
1020 	    double s1, s2, t1, t2;
1021 
1022 	    /* Add two hi words. */
1023 	    s1 = head_r_elem1 + head_bt;
1024 	    bv = s1 - head_r_elem1;
1025 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1026 
1027 	    /* Add two lo words. */
1028 	    t1 = tail_r_elem1 + tail_bt;
1029 	    bv = t1 - tail_r_elem1;
1030 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1031 
1032 	    s2 += t1;
1033 
1034 	    /* Renormalize (s1, s2)  to  (t1, s2) */
1035 	    t1 = s1 + s2;
1036 	    s2 = s2 - (t1 - s1);
1037 
1038 	    t2 += s2;
1039 
1040 	    /* Renormalize (t1, t2)  */
1041 	    head_r_elem = t1 + t2;
1042 	    tail_r_elem = t2 - (head_r_elem - t1);
1043 	  }
1044 	}
1045 	tail_r_true[ri] = tail_r_elem;
1046 	head_r_true[ri] = head_r_elem;
1047       } else {
1048 	/* Real part */
1049 	{
1050 	  /* Compute double-double = double-double * double. */
1051 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1052 
1053 	  con = head_r_elem2 * split;
1054 	  a11 = con - head_r_elem2;
1055 	  a11 = con - a11;
1056 	  a21 = head_r_elem2 - a11;
1057 	  con = -2.0 * split;
1058 	  b1 = con - -2.0;
1059 	  b1 = con - b1;
1060 	  b2 = -2.0 - b1;
1061 
1062 	  c11 = head_r_elem2 * -2.0;
1063 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1064 
1065 	  c2 = tail_r_elem2 * -2.0;
1066 	  t1 = c11 + c2;
1067 	  t2 = (c2 - (t1 - c11)) + c21;
1068 
1069 	  head_r_elem = t1 + t2;
1070 	  tail_r_elem = t2 - (head_r_elem - t1);
1071 	}
1072 	head_r_true[ri] = head_r_elem;
1073 	tail_r_true[ri] = tail_r_elem;
1074 
1075 	/* Imaginary Part */
1076 	{
1077 	  /* Compute double-double = double-double * double. */
1078 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1079 
1080 	  con = head_r_elem1 * split;
1081 	  a11 = con - head_r_elem1;
1082 	  a11 = con - a11;
1083 	  a21 = head_r_elem1 - a11;
1084 	  con = 2.0 * split;
1085 	  b1 = con - 2.0;
1086 	  b1 = con - b1;
1087 	  b2 = 2.0 - b1;
1088 
1089 	  c11 = head_r_elem1 * 2.0;
1090 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1091 
1092 	  c2 = tail_r_elem1 * 2.0;
1093 	  t1 = c11 + c2;
1094 	  t2 = (c2 - (t1 - c11)) + c21;
1095 
1096 	  head_r_elem = t1 + t2;
1097 	  tail_r_elem = t2 - (head_r_elem - t1);
1098 	}
1099 	head_r_true[ri + 1] = head_r_elem;
1100 	tail_r_true[ri + 1] = tail_r_elem;
1101       }
1102     }
1103 
1104 
1105     blas_free(a1);
1106     blas_free(a2);
1107     blas_free(y1);
1108     blas_free(y2);
1109     blas_free(x_head_0);
1110     blas_free(x_tail_0);
1111     blas_free(head_r1_true);
1112     blas_free(tail_r1_true);
1113     blas_free(head_r2_true);
1114     blas_free(tail_r2_true);
1115     blas_free(a_vec);
1116     blas_free(x_head_vec);
1117     blas_free(x_tail_vec);
1118   }
1119 }
1120 
BLAS_zhemv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)1121 void BLAS_zhemv2_testgen(int norm, enum blas_order_type order,
1122 			 enum blas_uplo_type uplo, int n, void *alpha,
1123 			 int alpha_flag, void *beta, int beta_flag, void *a,
1124 			 int lda, void *x_head, void *x_tail, void *y,
1125 			 int *seed, double *head_r_true, double *tail_r_true)
1126 
1127 /*
1128  * Purpose
1129  * =======
1130  *
1131  *   Generates the test inputs to BLAS_zhemv2{_x}
1132  *
1133  * Arguments
1134  * =========
1135  *
1136  * norm    (input) int
1137  *           = -1: the vectors are scaled with norms near underflow.
1138  *           =  0: the vectors have norms of order 1.
1139  *           =  1: the vectors are scaled with norms near overflow.
1140  *
1141  * order   (input) enum blas_order_type
1142  *           storage format of the matrices
1143  *
1144  * uplo    (input) enum blas_uplo_type
1145  *           which half of the hemv2etric matrix a is to be stored.
1146  *
1147  * n       (input) int
1148  *           sizes of symmetrical matrix a, size of vectors x, y:
1149  *              matrix a is n-by-n.
1150  *
1151  * alpha   (input/output) void*
1152  *           if alpha_flag = 1, alpha is input.
1153  *           if alpha_flag = 0, alpha is output.
1154  *
1155  * alpha_flag (input) int
1156  *           = 0: alpha is free, and is output.
1157  *           = 1: alpha is fixed on input.
1158  *
1159  * beta    (input/output) void*
1160  *           if beta_flag = 1, beta is input.
1161  *           if beta_flag = 0, beta is output.
1162  *
1163  * beta_flag (input) int
1164  *           = 0: beta is free, and is output.
1165  *           = 1: beta is fixed on input.
1166  *
1167  * a       (input/output) void*
1168  *
1169  * lda     (input) lda
1170  *         leading dimension of matrix A.
1171  *
1172  * x_head  (input/output) void*
1173  * x_tail  (input/output) void*
1174  *         vector x = (x_head + x_tail)
1175  *
1176  * y       (input/output) void*
1177  *         generated vector y that will be used as an input to HEMV2.
1178  *
1179  * seed    (input/output) int *
1180  *         seed for the random number generator.
1181  *
1182  * head_r_true  (output) double *
1183  *         the leading part of the truth in double-double.
1184  *
1185  * tail_r_true  (output) double *
1186  *         the trailing part of the truth in double-double
1187  *
1188  */
1189 {
1190   {
1191 
1192     /* Strategy:
1193        r1 = alpha * A1 * x + beta * y1
1194        r2 = alpha * A2 * x + beta * y2
1195        where all the matrices and vectors are real.  Then let R = R1 + i R2,
1196        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
1197        and A2 is skew-symmetric.
1198      */
1199 
1200     int i, j;
1201     int yi;
1202     int aij, ai;
1203     int a1ij, a1i;
1204     int xi;
1205     int mi;
1206     int incx = 1, incy = 1;
1207     int incyi, xi0, yi0;
1208     int incaij, incai;
1209     int inca1ij, inca1i;
1210     int incxi;
1211     int inca_vec, incx_vec;
1212     int ld;
1213     int ab;
1214     int ri, incri;
1215 
1216     double *a1;
1217     double *a2;
1218     double *y1;
1219     double *y2;
1220     double *x_head_0;
1221     double *x_tail_0;
1222 
1223     double *head_r1_true, *tail_r1_true;
1224     double *head_r2_true, *tail_r2_true;
1225 
1226     double head_r_elem1, tail_r_elem1;
1227     double head_r_elem2, tail_r_elem2;
1228     double head_r_elem, tail_r_elem;
1229 
1230     double *a_vec;
1231     double *x_head_vec;
1232     double *x_tail_vec;
1233 
1234     double *y_i = (double *) y;
1235     double *alpha_i = (double *) alpha;
1236     double *beta_i = (double *) beta;
1237 
1238     double *a_i = (double *) a;
1239     double *x_head_i = (double *) x_head;
1240     double *x_tail_i = (double *) x_tail;
1241 
1242     ld = n;
1243     if (order == blas_colmajor) {
1244       inca1i = incai = 1;
1245       incyi = incy;
1246       incaij = lda;
1247       incxi = incx;
1248       inca1ij = n;
1249     } else {
1250       incyi = incy;
1251       incai = lda;
1252       incxi = incx;
1253       inca1i = n;
1254       inca1ij = incaij = 1;
1255     }
1256     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
1257     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
1258     incri = 1;
1259     incri *= 2;
1260     xi0 *= 2;
1261     yi0 *= 2;
1262     incyi *= 2;
1263     incai *= 2;
1264     incaij *= 2;
1265     incxi *= 2;
1266 
1267     inca_vec = incx_vec = 1;
1268     inca_vec *= 2;
1269     incx_vec *= 2;
1270     a_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1271     if (n > 0 && a_vec == NULL) {
1272       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1273     }
1274     x_head_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1275     if (n > 0 && x_head_vec == NULL) {
1276       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1277     }
1278     x_tail_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1279     if (n > 0 && x_tail_vec == NULL) {
1280       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1281     }
1282 
1283     int incx0, incy1, incy2, incmi = 1;
1284     a1 = (double *) blas_malloc(n * n * sizeof(double));
1285     if (n * n > 0 && a1 == NULL) {
1286       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1287     }
1288     a2 = (double *) blas_malloc(n * n * sizeof(double));
1289     if (n * n > 0 && a2 == NULL) {
1290       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1291     }
1292     y1 = (double *) blas_malloc(n * sizeof(double));
1293     if (n > 0 && y1 == NULL) {
1294       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1295     }
1296     incy1 = 1;
1297 
1298     y2 = (double *) blas_malloc(n * sizeof(double));
1299     if (n > 0 && y2 == NULL) {
1300       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1301     }
1302     incy2 = 1;
1303 
1304     x_head_0 = (double *) blas_malloc(n * sizeof(double));
1305     if (n > 0 && x_head_0 == NULL) {
1306       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1307     }
1308     x_tail_0 = (double *) blas_malloc(n * sizeof(double));
1309     if (n > 0 && x_tail_0 == NULL) {
1310       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1311     }
1312     incx0 = 1;
1313 
1314     head_r1_true = (double *) blas_malloc(n * sizeof(double));
1315     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
1316     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1317       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1318     };
1319     head_r2_true = (double *) blas_malloc(n * sizeof(double));
1320     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
1321     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1322       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1323     };
1324 
1325     /* First generate the real portion of A and x. */
1326     BLAS_dsymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
1327 			x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
1328 			head_r1_true, tail_r1_true);
1329 
1330     /* x0 is now fixed and is input to this call */
1331     BLAS_dskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
1332 			x_head_0, x_tail_0, y2, seed, head_r2_true,
1333 			tail_r2_true);
1334 
1335 
1336     /* The case where x is a complex vector.  Since x is generated
1337        as a real vector, we need to perform some scaling.
1338 
1339        There are four cases to consider, depending on the values
1340        of alpha and beta.
1341 
1342        values                         scaling
1343        alpha   beta      alpha  A    x       beta    y    r (truth)
1344        0    1      1                    i               i    i
1345        1    1      ?                   1+i      1+i         1+i
1346        2    ?      1         1+i       1+i             2i    2i
1347        3    ?      ?         1+i       1+i      2i           2i
1348 
1349        Note that we can afford to scale r by 1+i, since they are
1350        computed in double-double precision.
1351      */
1352 
1353     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1354       ab = 0;
1355       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
1356     } else if (alpha_i[0] == 1.0) {
1357       ab = 1;
1358       /* set alpha to 1, multiply beta by 1+i. */
1359       alpha_i[1] = 0.0;
1360       beta_i[1] = beta_i[0];
1361     } else if (beta_i[0] == 1.0) {
1362       ab = 2;
1363       /* set beta to 1, multiply alpha by 1+i. */
1364       beta_i[1] = 0.0;
1365       alpha_i[1] = alpha_i[0];
1366     } else {
1367       ab = 3;
1368       /* multiply alpha by 1+i, beta by 2i. */
1369       alpha_i[1] = alpha_i[0];
1370       beta_i[1] = 2.0 * beta_i[0];
1371       beta_i[0] = 0.0;
1372     }
1373 
1374     /* Now fill in a */
1375     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
1376       for (j = 0, aij = ai, a1ij = a1i; j < n;
1377 	   j++, aij += incaij, a1ij += inca1ij) {
1378 	a_i[aij] = a1[a1ij];
1379 	a_i[aij + 1] = a2[a1ij];
1380       }
1381     }
1382 
1383     /* Fill in x */
1384     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
1385       if (ab == 0) {
1386 	x_head_i[xi] = 0.0;
1387 	x_head_i[xi + 1] = x_head_0[mi];
1388 	x_tail_i[xi] = 0.0;
1389 	x_tail_i[xi + 1] = x_tail_0[mi];
1390       } else {
1391 	x_head_i[xi] = x_head_0[mi];
1392 	x_head_i[xi + 1] = x_head_0[mi];
1393 	x_tail_i[xi] = x_tail_0[mi];
1394 	x_tail_i[xi + 1] = x_tail_0[mi];
1395       }
1396     }
1397 
1398     /* Fill in y */
1399     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
1400       if (ab == 0) {
1401 	y_i[yi] = -y2[mi];
1402 	y_i[yi + 1] = y1[mi];
1403       } else if (ab == 2) {
1404 	y_i[yi] = -2.0 * y2[mi];
1405 	y_i[yi + 1] = 2.0 * y1[mi];
1406       } else {
1407 	y_i[yi] = y1[mi];
1408 	y_i[yi + 1] = y2[mi];
1409       }
1410     }
1411 
1412     /* Fill in the truth */
1413     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
1414 
1415       head_r_elem1 = head_r1_true[mi];
1416       tail_r_elem1 = tail_r1_true[mi];
1417       head_r_elem2 = head_r2_true[mi];
1418       tail_r_elem2 = tail_r2_true[mi];
1419 
1420       if (ab == 0) {
1421 	head_r_true[ri] = -head_r_elem2;
1422 	tail_r_true[ri] = -tail_r_elem2;
1423 	head_r_true[ri + 1] = head_r_elem1;
1424 	tail_r_true[ri + 1] = tail_r_elem1;
1425       } else if (ab == 1) {
1426 	{
1427 	  /* Compute double-double = double-double + double-double. */
1428 	  double bv;
1429 	  double s1, s2, t1, t2;
1430 
1431 	  /* Add two hi words. */
1432 	  s1 = head_r_elem1 + head_r_elem2;
1433 	  bv = s1 - head_r_elem1;
1434 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1435 
1436 	  /* Add two lo words. */
1437 	  t1 = tail_r_elem1 + tail_r_elem2;
1438 	  bv = t1 - tail_r_elem1;
1439 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1440 
1441 	  s2 += t1;
1442 
1443 	  /* Renormalize (s1, s2)  to  (t1, s2) */
1444 	  t1 = s1 + s2;
1445 	  s2 = s2 - (t1 - s1);
1446 
1447 	  t2 += s2;
1448 
1449 	  /* Renormalize (t1, t2)  */
1450 	  head_r_elem = t1 + t2;
1451 	  tail_r_elem = t2 - (head_r_elem - t1);
1452 	}
1453 
1454 	/* Set the imaginary part to  R1 + R2 */
1455 	tail_r_true[ri + 1] = tail_r_elem;
1456 	head_r_true[ri + 1] = head_r_elem;
1457 
1458 	/* Set the real part to R1 - R2. */
1459 	{
1460 	  double head_bt, tail_bt;
1461 	  head_bt = -head_r_elem2;
1462 	  tail_bt = -tail_r_elem2;
1463 	  {
1464 	    /* Compute double-double = double-double + double-double. */
1465 	    double bv;
1466 	    double s1, s2, t1, t2;
1467 
1468 	    /* Add two hi words. */
1469 	    s1 = head_r_elem1 + head_bt;
1470 	    bv = s1 - head_r_elem1;
1471 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1472 
1473 	    /* Add two lo words. */
1474 	    t1 = tail_r_elem1 + tail_bt;
1475 	    bv = t1 - tail_r_elem1;
1476 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1477 
1478 	    s2 += t1;
1479 
1480 	    /* Renormalize (s1, s2)  to  (t1, s2) */
1481 	    t1 = s1 + s2;
1482 	    s2 = s2 - (t1 - s1);
1483 
1484 	    t2 += s2;
1485 
1486 	    /* Renormalize (t1, t2)  */
1487 	    head_r_elem = t1 + t2;
1488 	    tail_r_elem = t2 - (head_r_elem - t1);
1489 	  }
1490 	}
1491 	tail_r_true[ri] = tail_r_elem;
1492 	head_r_true[ri] = head_r_elem;
1493       } else {
1494 	/* Real part */
1495 	{
1496 	  /* Compute double-double = double-double * double. */
1497 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1498 
1499 	  con = head_r_elem2 * split;
1500 	  a11 = con - head_r_elem2;
1501 	  a11 = con - a11;
1502 	  a21 = head_r_elem2 - a11;
1503 	  con = -2.0 * split;
1504 	  b1 = con - -2.0;
1505 	  b1 = con - b1;
1506 	  b2 = -2.0 - b1;
1507 
1508 	  c11 = head_r_elem2 * -2.0;
1509 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1510 
1511 	  c2 = tail_r_elem2 * -2.0;
1512 	  t1 = c11 + c2;
1513 	  t2 = (c2 - (t1 - c11)) + c21;
1514 
1515 	  head_r_elem = t1 + t2;
1516 	  tail_r_elem = t2 - (head_r_elem - t1);
1517 	}
1518 	head_r_true[ri] = head_r_elem;
1519 	tail_r_true[ri] = tail_r_elem;
1520 
1521 	/* Imaginary Part */
1522 	{
1523 	  /* Compute double-double = double-double * double. */
1524 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1525 
1526 	  con = head_r_elem1 * split;
1527 	  a11 = con - head_r_elem1;
1528 	  a11 = con - a11;
1529 	  a21 = head_r_elem1 - a11;
1530 	  con = 2.0 * split;
1531 	  b1 = con - 2.0;
1532 	  b1 = con - b1;
1533 	  b2 = 2.0 - b1;
1534 
1535 	  c11 = head_r_elem1 * 2.0;
1536 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1537 
1538 	  c2 = tail_r_elem1 * 2.0;
1539 	  t1 = c11 + c2;
1540 	  t2 = (c2 - (t1 - c11)) + c21;
1541 
1542 	  head_r_elem = t1 + t2;
1543 	  tail_r_elem = t2 - (head_r_elem - t1);
1544 	}
1545 	head_r_true[ri + 1] = head_r_elem;
1546 	tail_r_true[ri + 1] = tail_r_elem;
1547       }
1548     }
1549 
1550 
1551     blas_free(a1);
1552     blas_free(a2);
1553     blas_free(y1);
1554     blas_free(y2);
1555     blas_free(x_head_0);
1556     blas_free(x_tail_0);
1557     blas_free(head_r1_true);
1558     blas_free(tail_r1_true);
1559     blas_free(head_r2_true);
1560     blas_free(tail_r2_true);
1561     blas_free(a_vec);
1562     blas_free(x_head_vec);
1563     blas_free(x_tail_vec);
1564   }
1565 }
1566 
BLAS_zhemv2_c_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)1567 void BLAS_zhemv2_c_z_testgen(int norm, enum blas_order_type order,
1568 			     enum blas_uplo_type uplo, int n, void *alpha,
1569 			     int alpha_flag, void *beta, int beta_flag,
1570 			     void *a, int lda, void *x_head, void *x_tail,
1571 			     void *y, int *seed, double *head_r_true,
1572 			     double *tail_r_true)
1573 
1574 /*
1575  * Purpose
1576  * =======
1577  *
1578  *   Generates the test inputs to BLAS_zhemv_c_z{_x}
1579  *
1580  * Arguments
1581  * =========
1582  *
1583  * norm    (input) int
1584  *           = -1: the vectors are scaled with norms near underflow.
1585  *           =  0: the vectors have norms of order 1.
1586  *           =  1: the vectors are scaled with norms near overflow.
1587  *
1588  * order   (input) enum blas_order_type
1589  *           storage format of the matrices
1590  *
1591  * uplo    (input) enum blas_uplo_type
1592  *           which half of the hemv2etric matrix a is to be stored.
1593  *
1594  * n       (input) int
1595  *           sizes of symmetrical matrix a, size of vectors x, y:
1596  *              matrix a is n-by-n.
1597  *
1598  * alpha   (input/output) void*
1599  *           if alpha_flag = 1, alpha is input.
1600  *           if alpha_flag = 0, alpha is output.
1601  *
1602  * alpha_flag (input) int
1603  *           = 0: alpha is free, and is output.
1604  *           = 1: alpha is fixed on input.
1605  *
1606  * beta    (input/output) void*
1607  *           if beta_flag = 1, beta is input.
1608  *           if beta_flag = 0, beta is output.
1609  *
1610  * beta_flag (input) int
1611  *           = 0: beta is free, and is output.
1612  *           = 1: beta is fixed on input.
1613  *
1614  * a       (input/output) void*
1615  *
1616  * lda     (input) lda
1617  *         leading dimension of matrix A.
1618  *
1619  * x_head  (input/output) void*
1620  * x_tail  (input/output) void*
1621  *         vector x = (x_head + x_tail)
1622  *
1623  * y       (input/output) void*
1624  *         generated vector y that will be used as an input to HEMV2.
1625  *
1626  * seed    (input/output) int *
1627  *         seed for the random number generator.
1628  *
1629  * head_r_true  (output) double *
1630  *         the leading part of the truth in double-double.
1631  *
1632  * tail_r_true  (output) double *
1633  *         the trailing part of the truth in double-double
1634  *
1635  */
1636 {
1637   {
1638 
1639     /* Strategy:
1640        r1 = alpha * A1 * x + beta * y1
1641        r2 = alpha * A2 * x + beta * y2
1642        where all the matrices and vectors are real.  Then let R = R1 + i R2,
1643        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
1644        and A2 is skew-symmetric.
1645      */
1646 
1647     int i, j;
1648     int yi;
1649     int aij, ai;
1650     int a1ij, a1i;
1651     int xi;
1652     int mi;
1653     int incx = 1, incy = 1;
1654     int incyi, xi0, yi0;
1655     int incaij, incai;
1656     int inca1ij, inca1i;
1657     int incxi;
1658     int inca_vec, incx_vec;
1659     int ld;
1660     int ab;
1661     int ri, incri;
1662 
1663     float *a1;
1664     float *a2;
1665     double *y1;
1666     double *y2;
1667     double *x_head_0;
1668     double *x_tail_0;
1669 
1670     double *head_r1_true, *tail_r1_true;
1671     double *head_r2_true, *tail_r2_true;
1672 
1673     double head_r_elem1, tail_r_elem1;
1674     double head_r_elem2, tail_r_elem2;
1675     double head_r_elem, tail_r_elem;
1676 
1677     float *a_vec;
1678     double *x_head_vec;
1679     double *x_tail_vec;
1680 
1681     double *y_i = (double *) y;
1682     double *alpha_i = (double *) alpha;
1683     double *beta_i = (double *) beta;
1684 
1685     float *a_i = (float *) a;
1686     double *x_head_i = (double *) x_head;
1687     double *x_tail_i = (double *) x_tail;
1688 
1689     ld = n;
1690     if (order == blas_colmajor) {
1691       inca1i = incai = 1;
1692       incyi = incy;
1693       incaij = lda;
1694       incxi = incx;
1695       inca1ij = n;
1696     } else {
1697       incyi = incy;
1698       incai = lda;
1699       incxi = incx;
1700       inca1i = n;
1701       inca1ij = incaij = 1;
1702     }
1703     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
1704     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
1705     incri = 1;
1706     incri *= 2;
1707     xi0 *= 2;
1708     yi0 *= 2;
1709     incyi *= 2;
1710     incai *= 2;
1711     incaij *= 2;
1712     incxi *= 2;
1713 
1714     inca_vec = incx_vec = 1;
1715     inca_vec *= 2;
1716     incx_vec *= 2;
1717     a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
1718     if (n > 0 && a_vec == NULL) {
1719       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1720     }
1721     x_head_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1722     if (n > 0 && x_head_vec == NULL) {
1723       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1724     }
1725     x_tail_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1726     if (n > 0 && x_tail_vec == NULL) {
1727       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1728     }
1729 
1730     int incx0, incy1, incy2, incmi = 1;
1731     a1 = (float *) blas_malloc(n * n * sizeof(float));
1732     if (n * n > 0 && a1 == NULL) {
1733       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1734     }
1735     a2 = (float *) blas_malloc(n * n * sizeof(float));
1736     if (n * n > 0 && a2 == NULL) {
1737       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1738     }
1739     y1 = (double *) blas_malloc(n * sizeof(double));
1740     if (n > 0 && y1 == NULL) {
1741       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1742     }
1743     incy1 = 1;
1744 
1745     y2 = (double *) blas_malloc(n * sizeof(double));
1746     if (n > 0 && y2 == NULL) {
1747       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1748     }
1749     incy2 = 1;
1750 
1751     x_head_0 = (double *) blas_malloc(n * sizeof(double));
1752     if (n > 0 && x_head_0 == NULL) {
1753       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1754     }
1755     x_tail_0 = (double *) blas_malloc(n * sizeof(double));
1756     if (n > 0 && x_tail_0 == NULL) {
1757       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1758     }
1759     incx0 = 1;
1760 
1761     head_r1_true = (double *) blas_malloc(n * sizeof(double));
1762     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
1763     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1764       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1765     };
1766     head_r2_true = (double *) blas_malloc(n * sizeof(double));
1767     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
1768     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1769       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1770     };
1771 
1772     /* First generate the real portion of A and x. */
1773     BLAS_dsymv2_s_d_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
1774 			    x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
1775 			    head_r1_true, tail_r1_true);
1776 
1777     /* x0 is now fixed and is input to this call */
1778     BLAS_dskmv2_testgen_s_d(norm, order, uplo, n, alpha_i, beta_i, a2, n,
1779 			    x_head_0, x_tail_0, y2, seed, head_r2_true,
1780 			    tail_r2_true);
1781 
1782 
1783     /* The case where x is a complex vector.  Since x is generated
1784        as a real vector, we need to perform some scaling.
1785 
1786        There are four cases to consider, depending on the values
1787        of alpha and beta.
1788 
1789        values                         scaling
1790        alpha   beta      alpha  A    x       beta    y    r (truth)
1791        0    1      1                    i               i    i
1792        1    1      ?                   1+i      1+i         1+i
1793        2    ?      1         1+i       1+i             2i    2i
1794        3    ?      ?         1+i       1+i      2i           2i
1795 
1796        Note that we can afford to scale r by 1+i, since they are
1797        computed in double-double precision.
1798      */
1799 
1800     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1801       ab = 0;
1802       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
1803     } else if (alpha_i[0] == 1.0) {
1804       ab = 1;
1805       /* set alpha to 1, multiply beta by 1+i. */
1806       alpha_i[1] = 0.0;
1807       beta_i[1] = beta_i[0];
1808     } else if (beta_i[0] == 1.0) {
1809       ab = 2;
1810       /* set beta to 1, multiply alpha by 1+i. */
1811       beta_i[1] = 0.0;
1812       alpha_i[1] = alpha_i[0];
1813     } else {
1814       ab = 3;
1815       /* multiply alpha by 1+i, beta by 2i. */
1816       alpha_i[1] = alpha_i[0];
1817       beta_i[1] = 2.0 * beta_i[0];
1818       beta_i[0] = 0.0;
1819     }
1820 
1821     /* Now fill in a */
1822     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
1823       for (j = 0, aij = ai, a1ij = a1i; j < n;
1824 	   j++, aij += incaij, a1ij += inca1ij) {
1825 	a_i[aij] = a1[a1ij];
1826 	a_i[aij + 1] = a2[a1ij];
1827       }
1828     }
1829 
1830     /* Fill in x */
1831     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
1832       if (ab == 0) {
1833 	x_head_i[xi] = 0.0;
1834 	x_head_i[xi + 1] = x_head_0[mi];
1835 	x_tail_i[xi] = 0.0;
1836 	x_tail_i[xi + 1] = x_tail_0[mi];
1837       } else {
1838 	x_head_i[xi] = x_head_0[mi];
1839 	x_head_i[xi + 1] = x_head_0[mi];
1840 	x_tail_i[xi] = x_tail_0[mi];
1841 	x_tail_i[xi + 1] = x_tail_0[mi];
1842       }
1843     }
1844 
1845     /* Fill in y */
1846     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
1847       if (ab == 0) {
1848 	y_i[yi] = -y2[mi];
1849 	y_i[yi + 1] = y1[mi];
1850       } else if (ab == 2) {
1851 	y_i[yi] = -2.0 * y2[mi];
1852 	y_i[yi + 1] = 2.0 * y1[mi];
1853       } else {
1854 	y_i[yi] = y1[mi];
1855 	y_i[yi + 1] = y2[mi];
1856       }
1857     }
1858 
1859     /* Fill in the truth */
1860     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
1861 
1862       head_r_elem1 = head_r1_true[mi];
1863       tail_r_elem1 = tail_r1_true[mi];
1864       head_r_elem2 = head_r2_true[mi];
1865       tail_r_elem2 = tail_r2_true[mi];
1866 
1867       if (ab == 0) {
1868 	head_r_true[ri] = -head_r_elem2;
1869 	tail_r_true[ri] = -tail_r_elem2;
1870 	head_r_true[ri + 1] = head_r_elem1;
1871 	tail_r_true[ri + 1] = tail_r_elem1;
1872       } else if (ab == 1) {
1873 	{
1874 	  /* Compute double-double = double-double + double-double. */
1875 	  double bv;
1876 	  double s1, s2, t1, t2;
1877 
1878 	  /* Add two hi words. */
1879 	  s1 = head_r_elem1 + head_r_elem2;
1880 	  bv = s1 - head_r_elem1;
1881 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1882 
1883 	  /* Add two lo words. */
1884 	  t1 = tail_r_elem1 + tail_r_elem2;
1885 	  bv = t1 - tail_r_elem1;
1886 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1887 
1888 	  s2 += t1;
1889 
1890 	  /* Renormalize (s1, s2)  to  (t1, s2) */
1891 	  t1 = s1 + s2;
1892 	  s2 = s2 - (t1 - s1);
1893 
1894 	  t2 += s2;
1895 
1896 	  /* Renormalize (t1, t2)  */
1897 	  head_r_elem = t1 + t2;
1898 	  tail_r_elem = t2 - (head_r_elem - t1);
1899 	}
1900 
1901 	/* Set the imaginary part to  R1 + R2 */
1902 	tail_r_true[ri + 1] = tail_r_elem;
1903 	head_r_true[ri + 1] = head_r_elem;
1904 
1905 	/* Set the real part to R1 - R2. */
1906 	{
1907 	  double head_bt, tail_bt;
1908 	  head_bt = -head_r_elem2;
1909 	  tail_bt = -tail_r_elem2;
1910 	  {
1911 	    /* Compute double-double = double-double + double-double. */
1912 	    double bv;
1913 	    double s1, s2, t1, t2;
1914 
1915 	    /* Add two hi words. */
1916 	    s1 = head_r_elem1 + head_bt;
1917 	    bv = s1 - head_r_elem1;
1918 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1919 
1920 	    /* Add two lo words. */
1921 	    t1 = tail_r_elem1 + tail_bt;
1922 	    bv = t1 - tail_r_elem1;
1923 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1924 
1925 	    s2 += t1;
1926 
1927 	    /* Renormalize (s1, s2)  to  (t1, s2) */
1928 	    t1 = s1 + s2;
1929 	    s2 = s2 - (t1 - s1);
1930 
1931 	    t2 += s2;
1932 
1933 	    /* Renormalize (t1, t2)  */
1934 	    head_r_elem = t1 + t2;
1935 	    tail_r_elem = t2 - (head_r_elem - t1);
1936 	  }
1937 	}
1938 	tail_r_true[ri] = tail_r_elem;
1939 	head_r_true[ri] = head_r_elem;
1940       } else {
1941 	/* Real part */
1942 	{
1943 	  /* Compute double-double = double-double * double. */
1944 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1945 
1946 	  con = head_r_elem2 * split;
1947 	  a11 = con - head_r_elem2;
1948 	  a11 = con - a11;
1949 	  a21 = head_r_elem2 - a11;
1950 	  con = -2.0 * split;
1951 	  b1 = con - -2.0;
1952 	  b1 = con - b1;
1953 	  b2 = -2.0 - b1;
1954 
1955 	  c11 = head_r_elem2 * -2.0;
1956 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1957 
1958 	  c2 = tail_r_elem2 * -2.0;
1959 	  t1 = c11 + c2;
1960 	  t2 = (c2 - (t1 - c11)) + c21;
1961 
1962 	  head_r_elem = t1 + t2;
1963 	  tail_r_elem = t2 - (head_r_elem - t1);
1964 	}
1965 	head_r_true[ri] = head_r_elem;
1966 	tail_r_true[ri] = tail_r_elem;
1967 
1968 	/* Imaginary Part */
1969 	{
1970 	  /* Compute double-double = double-double * double. */
1971 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1972 
1973 	  con = head_r_elem1 * split;
1974 	  a11 = con - head_r_elem1;
1975 	  a11 = con - a11;
1976 	  a21 = head_r_elem1 - a11;
1977 	  con = 2.0 * split;
1978 	  b1 = con - 2.0;
1979 	  b1 = con - b1;
1980 	  b2 = 2.0 - b1;
1981 
1982 	  c11 = head_r_elem1 * 2.0;
1983 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1984 
1985 	  c2 = tail_r_elem1 * 2.0;
1986 	  t1 = c11 + c2;
1987 	  t2 = (c2 - (t1 - c11)) + c21;
1988 
1989 	  head_r_elem = t1 + t2;
1990 	  tail_r_elem = t2 - (head_r_elem - t1);
1991 	}
1992 	head_r_true[ri + 1] = head_r_elem;
1993 	tail_r_true[ri + 1] = tail_r_elem;
1994       }
1995     }
1996 
1997 
1998     blas_free(a1);
1999     blas_free(a2);
2000     blas_free(y1);
2001     blas_free(y2);
2002     blas_free(x_head_0);
2003     blas_free(x_tail_0);
2004     blas_free(head_r1_true);
2005     blas_free(tail_r1_true);
2006     blas_free(head_r2_true);
2007     blas_free(tail_r2_true);
2008     blas_free(a_vec);
2009     blas_free(x_head_vec);
2010     blas_free(x_tail_vec);
2011   }
2012 }
2013 
BLAS_zhemv2_z_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)2014 void BLAS_zhemv2_z_c_testgen(int norm, enum blas_order_type order,
2015 			     enum blas_uplo_type uplo, int n, void *alpha,
2016 			     int alpha_flag, void *beta, int beta_flag,
2017 			     void *a, int lda, void *x_head, void *x_tail,
2018 			     void *y, int *seed, double *head_r_true,
2019 			     double *tail_r_true)
2020 
2021 /*
2022  * Purpose
2023  * =======
2024  *
2025  *   Generates the test inputs to BLAS_zhemv_z_c{_x}
2026  *
2027  * Arguments
2028  * =========
2029  *
2030  * norm    (input) int
2031  *           = -1: the vectors are scaled with norms near underflow.
2032  *           =  0: the vectors have norms of order 1.
2033  *           =  1: the vectors are scaled with norms near overflow.
2034  *
2035  * order   (input) enum blas_order_type
2036  *           storage format of the matrices
2037  *
2038  * uplo    (input) enum blas_uplo_type
2039  *           which half of the hemv2etric matrix a is to be stored.
2040  *
2041  * n       (input) int
2042  *           sizes of symmetrical matrix a, size of vectors x, y:
2043  *              matrix a is n-by-n.
2044  *
2045  * alpha   (input/output) void*
2046  *           if alpha_flag = 1, alpha is input.
2047  *           if alpha_flag = 0, alpha is output.
2048  *
2049  * alpha_flag (input) int
2050  *           = 0: alpha is free, and is output.
2051  *           = 1: alpha is fixed on input.
2052  *
2053  * beta    (input/output) void*
2054  *           if beta_flag = 1, beta is input.
2055  *           if beta_flag = 0, beta is output.
2056  *
2057  * beta_flag (input) int
2058  *           = 0: beta is free, and is output.
2059  *           = 1: beta is fixed on input.
2060  *
2061  * a       (input/output) void*
2062  *
2063  * lda     (input) lda
2064  *         leading dimension of matrix A.
2065  *
2066  * x_head  (input/output) void*
2067  * x_tail  (input/output) void*
2068  *         vector x = (x_head + x_tail)
2069  *
2070  * y       (input/output) void*
2071  *         generated vector y that will be used as an input to HEMV2.
2072  *
2073  * seed    (input/output) int *
2074  *         seed for the random number generator.
2075  *
2076  * head_r_true  (output) double *
2077  *         the leading part of the truth in double-double.
2078  *
2079  * tail_r_true  (output) double *
2080  *         the trailing part of the truth in double-double
2081  *
2082  */
2083 {
2084   {
2085 
2086     /* Strategy:
2087        r1 = alpha * A1 * x + beta * y1
2088        r2 = alpha * A2 * x + beta * y2
2089        where all the matrices and vectors are real.  Then let R = R1 + i R2,
2090        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
2091        and A2 is skew-symmetric.
2092      */
2093 
2094     int i, j;
2095     int yi;
2096     int aij, ai;
2097     int a1ij, a1i;
2098     int xi;
2099     int mi;
2100     int incx = 1, incy = 1;
2101     int incyi, xi0, yi0;
2102     int incaij, incai;
2103     int inca1ij, inca1i;
2104     int incxi;
2105     int inca_vec, incx_vec;
2106     int ld;
2107     int ab;
2108     int ri, incri;
2109 
2110     double *a1;
2111     double *a2;
2112     double *y1;
2113     double *y2;
2114     float *x_head_0;
2115     float *x_tail_0;
2116 
2117     double *head_r1_true, *tail_r1_true;
2118     double *head_r2_true, *tail_r2_true;
2119 
2120     double head_r_elem1, tail_r_elem1;
2121     double head_r_elem2, tail_r_elem2;
2122     double head_r_elem, tail_r_elem;
2123 
2124     double *a_vec;
2125     float *x_head_vec;
2126     float *x_tail_vec;
2127 
2128     double *y_i = (double *) y;
2129     double *alpha_i = (double *) alpha;
2130     double *beta_i = (double *) beta;
2131 
2132     double *a_i = (double *) a;
2133     float *x_head_i = (float *) x_head;
2134     float *x_tail_i = (float *) x_tail;
2135 
2136     ld = n;
2137     if (order == blas_colmajor) {
2138       inca1i = incai = 1;
2139       incyi = incy;
2140       incaij = lda;
2141       incxi = incx;
2142       inca1ij = n;
2143     } else {
2144       incyi = incy;
2145       incai = lda;
2146       incxi = incx;
2147       inca1i = n;
2148       inca1ij = incaij = 1;
2149     }
2150     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
2151     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
2152     incri = 1;
2153     incri *= 2;
2154     xi0 *= 2;
2155     yi0 *= 2;
2156     incyi *= 2;
2157     incai *= 2;
2158     incaij *= 2;
2159     incxi *= 2;
2160 
2161     inca_vec = incx_vec = 1;
2162     inca_vec *= 2;
2163     incx_vec *= 2;
2164     a_vec = (double *) blas_malloc(n * sizeof(double) * 2);
2165     if (n > 0 && a_vec == NULL) {
2166       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2167     }
2168     x_head_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2169     if (n > 0 && x_head_vec == NULL) {
2170       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2171     }
2172     x_tail_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2173     if (n > 0 && x_tail_vec == NULL) {
2174       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2175     }
2176 
2177     int incx0, incy1, incy2, incmi = 1;
2178     a1 = (double *) blas_malloc(n * n * sizeof(double));
2179     if (n * n > 0 && a1 == NULL) {
2180       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2181     }
2182     a2 = (double *) blas_malloc(n * n * sizeof(double));
2183     if (n * n > 0 && a2 == NULL) {
2184       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2185     }
2186     y1 = (double *) blas_malloc(n * sizeof(double));
2187     if (n > 0 && y1 == NULL) {
2188       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2189     }
2190     incy1 = 1;
2191 
2192     y2 = (double *) blas_malloc(n * sizeof(double));
2193     if (n > 0 && y2 == NULL) {
2194       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2195     }
2196     incy2 = 1;
2197 
2198     x_head_0 = (float *) blas_malloc(n * sizeof(float));
2199     if (n > 0 && x_head_0 == NULL) {
2200       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2201     }
2202     x_tail_0 = (float *) blas_malloc(n * sizeof(float));
2203     if (n > 0 && x_tail_0 == NULL) {
2204       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2205     }
2206     incx0 = 1;
2207 
2208     head_r1_true = (double *) blas_malloc(n * sizeof(double));
2209     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
2210     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2211       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2212     };
2213     head_r2_true = (double *) blas_malloc(n * sizeof(double));
2214     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
2215     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2216       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2217     };
2218 
2219     /* First generate the real portion of A and x. */
2220     BLAS_dsymv2_d_s_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
2221 			    x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
2222 			    head_r1_true, tail_r1_true);
2223 
2224     /* x0 is now fixed and is input to this call */
2225     BLAS_dskmv2_testgen_d_s(norm, order, uplo, n, alpha_i, beta_i, a2, n,
2226 			    x_head_0, x_tail_0, y2, seed, head_r2_true,
2227 			    tail_r2_true);
2228 
2229 
2230     /* The case where x is a complex vector.  Since x is generated
2231        as a real vector, we need to perform some scaling.
2232 
2233        There are four cases to consider, depending on the values
2234        of alpha and beta.
2235 
2236        values                         scaling
2237        alpha   beta      alpha  A    x       beta    y    r (truth)
2238        0    1      1                    i               i    i
2239        1    1      ?                   1+i      1+i         1+i
2240        2    ?      1         1+i       1+i             2i    2i
2241        3    ?      ?         1+i       1+i      2i           2i
2242 
2243        Note that we can afford to scale r by 1+i, since they are
2244        computed in double-double precision.
2245      */
2246 
2247     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2248       ab = 0;
2249       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
2250     } else if (alpha_i[0] == 1.0) {
2251       ab = 1;
2252       /* set alpha to 1, multiply beta by 1+i. */
2253       alpha_i[1] = 0.0;
2254       beta_i[1] = beta_i[0];
2255     } else if (beta_i[0] == 1.0) {
2256       ab = 2;
2257       /* set beta to 1, multiply alpha by 1+i. */
2258       beta_i[1] = 0.0;
2259       alpha_i[1] = alpha_i[0];
2260     } else {
2261       ab = 3;
2262       /* multiply alpha by 1+i, beta by 2i. */
2263       alpha_i[1] = alpha_i[0];
2264       beta_i[1] = 2.0 * beta_i[0];
2265       beta_i[0] = 0.0;
2266     }
2267 
2268     /* Now fill in a */
2269     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
2270       for (j = 0, aij = ai, a1ij = a1i; j < n;
2271 	   j++, aij += incaij, a1ij += inca1ij) {
2272 	a_i[aij] = a1[a1ij];
2273 	a_i[aij + 1] = a2[a1ij];
2274       }
2275     }
2276 
2277     /* Fill in x */
2278     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
2279       if (ab == 0) {
2280 	x_head_i[xi] = 0.0;
2281 	x_head_i[xi + 1] = x_head_0[mi];
2282 	x_tail_i[xi] = 0.0;
2283 	x_tail_i[xi + 1] = x_tail_0[mi];
2284       } else {
2285 	x_head_i[xi] = x_head_0[mi];
2286 	x_head_i[xi + 1] = x_head_0[mi];
2287 	x_tail_i[xi] = x_tail_0[mi];
2288 	x_tail_i[xi + 1] = x_tail_0[mi];
2289       }
2290     }
2291 
2292     /* Fill in y */
2293     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
2294       if (ab == 0) {
2295 	y_i[yi] = -y2[mi];
2296 	y_i[yi + 1] = y1[mi];
2297       } else if (ab == 2) {
2298 	y_i[yi] = -2.0 * y2[mi];
2299 	y_i[yi + 1] = 2.0 * y1[mi];
2300       } else {
2301 	y_i[yi] = y1[mi];
2302 	y_i[yi + 1] = y2[mi];
2303       }
2304     }
2305 
2306     /* Fill in the truth */
2307     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
2308 
2309       head_r_elem1 = head_r1_true[mi];
2310       tail_r_elem1 = tail_r1_true[mi];
2311       head_r_elem2 = head_r2_true[mi];
2312       tail_r_elem2 = tail_r2_true[mi];
2313 
2314       if (ab == 0) {
2315 	head_r_true[ri] = -head_r_elem2;
2316 	tail_r_true[ri] = -tail_r_elem2;
2317 	head_r_true[ri + 1] = head_r_elem1;
2318 	tail_r_true[ri + 1] = tail_r_elem1;
2319       } else if (ab == 1) {
2320 	{
2321 	  /* Compute double-double = double-double + double-double. */
2322 	  double bv;
2323 	  double s1, s2, t1, t2;
2324 
2325 	  /* Add two hi words. */
2326 	  s1 = head_r_elem1 + head_r_elem2;
2327 	  bv = s1 - head_r_elem1;
2328 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2329 
2330 	  /* Add two lo words. */
2331 	  t1 = tail_r_elem1 + tail_r_elem2;
2332 	  bv = t1 - tail_r_elem1;
2333 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2334 
2335 	  s2 += t1;
2336 
2337 	  /* Renormalize (s1, s2)  to  (t1, s2) */
2338 	  t1 = s1 + s2;
2339 	  s2 = s2 - (t1 - s1);
2340 
2341 	  t2 += s2;
2342 
2343 	  /* Renormalize (t1, t2)  */
2344 	  head_r_elem = t1 + t2;
2345 	  tail_r_elem = t2 - (head_r_elem - t1);
2346 	}
2347 
2348 	/* Set the imaginary part to  R1 + R2 */
2349 	tail_r_true[ri + 1] = tail_r_elem;
2350 	head_r_true[ri + 1] = head_r_elem;
2351 
2352 	/* Set the real part to R1 - R2. */
2353 	{
2354 	  double head_bt, tail_bt;
2355 	  head_bt = -head_r_elem2;
2356 	  tail_bt = -tail_r_elem2;
2357 	  {
2358 	    /* Compute double-double = double-double + double-double. */
2359 	    double bv;
2360 	    double s1, s2, t1, t2;
2361 
2362 	    /* Add two hi words. */
2363 	    s1 = head_r_elem1 + head_bt;
2364 	    bv = s1 - head_r_elem1;
2365 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2366 
2367 	    /* Add two lo words. */
2368 	    t1 = tail_r_elem1 + tail_bt;
2369 	    bv = t1 - tail_r_elem1;
2370 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2371 
2372 	    s2 += t1;
2373 
2374 	    /* Renormalize (s1, s2)  to  (t1, s2) */
2375 	    t1 = s1 + s2;
2376 	    s2 = s2 - (t1 - s1);
2377 
2378 	    t2 += s2;
2379 
2380 	    /* Renormalize (t1, t2)  */
2381 	    head_r_elem = t1 + t2;
2382 	    tail_r_elem = t2 - (head_r_elem - t1);
2383 	  }
2384 	}
2385 	tail_r_true[ri] = tail_r_elem;
2386 	head_r_true[ri] = head_r_elem;
2387       } else {
2388 	/* Real part */
2389 	{
2390 	  /* Compute double-double = double-double * double. */
2391 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2392 
2393 	  con = head_r_elem2 * split;
2394 	  a11 = con - head_r_elem2;
2395 	  a11 = con - a11;
2396 	  a21 = head_r_elem2 - a11;
2397 	  con = -2.0 * split;
2398 	  b1 = con - -2.0;
2399 	  b1 = con - b1;
2400 	  b2 = -2.0 - b1;
2401 
2402 	  c11 = head_r_elem2 * -2.0;
2403 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2404 
2405 	  c2 = tail_r_elem2 * -2.0;
2406 	  t1 = c11 + c2;
2407 	  t2 = (c2 - (t1 - c11)) + c21;
2408 
2409 	  head_r_elem = t1 + t2;
2410 	  tail_r_elem = t2 - (head_r_elem - t1);
2411 	}
2412 	head_r_true[ri] = head_r_elem;
2413 	tail_r_true[ri] = tail_r_elem;
2414 
2415 	/* Imaginary Part */
2416 	{
2417 	  /* Compute double-double = double-double * double. */
2418 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2419 
2420 	  con = head_r_elem1 * split;
2421 	  a11 = con - head_r_elem1;
2422 	  a11 = con - a11;
2423 	  a21 = head_r_elem1 - a11;
2424 	  con = 2.0 * split;
2425 	  b1 = con - 2.0;
2426 	  b1 = con - b1;
2427 	  b2 = 2.0 - b1;
2428 
2429 	  c11 = head_r_elem1 * 2.0;
2430 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2431 
2432 	  c2 = tail_r_elem1 * 2.0;
2433 	  t1 = c11 + c2;
2434 	  t2 = (c2 - (t1 - c11)) + c21;
2435 
2436 	  head_r_elem = t1 + t2;
2437 	  tail_r_elem = t2 - (head_r_elem - t1);
2438 	}
2439 	head_r_true[ri + 1] = head_r_elem;
2440 	tail_r_true[ri + 1] = tail_r_elem;
2441       }
2442     }
2443 
2444 
2445     blas_free(a1);
2446     blas_free(a2);
2447     blas_free(y1);
2448     blas_free(y2);
2449     blas_free(x_head_0);
2450     blas_free(x_tail_0);
2451     blas_free(head_r1_true);
2452     blas_free(tail_r1_true);
2453     blas_free(head_r2_true);
2454     blas_free(tail_r2_true);
2455     blas_free(a_vec);
2456     blas_free(x_head_vec);
2457     blas_free(x_tail_vec);
2458   }
2459 }
2460 
BLAS_zhemv2_c_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)2461 void BLAS_zhemv2_c_c_testgen(int norm, enum blas_order_type order,
2462 			     enum blas_uplo_type uplo, int n, void *alpha,
2463 			     int alpha_flag, void *beta, int beta_flag,
2464 			     void *a, int lda, void *x_head, void *x_tail,
2465 			     void *y, int *seed, double *head_r_true,
2466 			     double *tail_r_true)
2467 
2468 /*
2469  * Purpose
2470  * =======
2471  *
2472  *   Generates the test inputs to BLAS_zhemv_c_c{_x}
2473  *
2474  * Arguments
2475  * =========
2476  *
2477  * norm    (input) int
2478  *           = -1: the vectors are scaled with norms near underflow.
2479  *           =  0: the vectors have norms of order 1.
2480  *           =  1: the vectors are scaled with norms near overflow.
2481  *
2482  * order   (input) enum blas_order_type
2483  *           storage format of the matrices
2484  *
2485  * uplo    (input) enum blas_uplo_type
2486  *           which half of the hemv2etric matrix a is to be stored.
2487  *
2488  * n       (input) int
2489  *           sizes of symmetrical matrix a, size of vectors x, y:
2490  *              matrix a is n-by-n.
2491  *
2492  * alpha   (input/output) void*
2493  *           if alpha_flag = 1, alpha is input.
2494  *           if alpha_flag = 0, alpha is output.
2495  *
2496  * alpha_flag (input) int
2497  *           = 0: alpha is free, and is output.
2498  *           = 1: alpha is fixed on input.
2499  *
2500  * beta    (input/output) void*
2501  *           if beta_flag = 1, beta is input.
2502  *           if beta_flag = 0, beta is output.
2503  *
2504  * beta_flag (input) int
2505  *           = 0: beta is free, and is output.
2506  *           = 1: beta is fixed on input.
2507  *
2508  * a       (input/output) void*
2509  *
2510  * lda     (input) lda
2511  *         leading dimension of matrix A.
2512  *
2513  * x_head  (input/output) void*
2514  * x_tail  (input/output) void*
2515  *         vector x = (x_head + x_tail)
2516  *
2517  * y       (input/output) void*
2518  *         generated vector y that will be used as an input to HEMV2.
2519  *
2520  * seed    (input/output) int *
2521  *         seed for the random number generator.
2522  *
2523  * head_r_true  (output) double *
2524  *         the leading part of the truth in double-double.
2525  *
2526  * tail_r_true  (output) double *
2527  *         the trailing part of the truth in double-double
2528  *
2529  */
2530 {
2531   {
2532 
2533     /* Strategy:
2534        r1 = alpha * A1 * x + beta * y1
2535        r2 = alpha * A2 * x + beta * y2
2536        where all the matrices and vectors are real.  Then let R = R1 + i R2,
2537        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
2538        and A2 is skew-symmetric.
2539      */
2540 
2541     int i, j;
2542     int yi;
2543     int aij, ai;
2544     int a1ij, a1i;
2545     int xi;
2546     int mi;
2547     int incx = 1, incy = 1;
2548     int incyi, xi0, yi0;
2549     int incaij, incai;
2550     int inca1ij, inca1i;
2551     int incxi;
2552     int inca_vec, incx_vec;
2553     int ld;
2554     int ab;
2555     int ri, incri;
2556 
2557     float *a1;
2558     float *a2;
2559     double *y1;
2560     double *y2;
2561     float *x_head_0;
2562     float *x_tail_0;
2563 
2564     double *head_r1_true, *tail_r1_true;
2565     double *head_r2_true, *tail_r2_true;
2566 
2567     double head_r_elem1, tail_r_elem1;
2568     double head_r_elem2, tail_r_elem2;
2569     double head_r_elem, tail_r_elem;
2570 
2571     float *a_vec;
2572     float *x_head_vec;
2573     float *x_tail_vec;
2574 
2575     double *y_i = (double *) y;
2576     double *alpha_i = (double *) alpha;
2577     double *beta_i = (double *) beta;
2578 
2579     float *a_i = (float *) a;
2580     float *x_head_i = (float *) x_head;
2581     float *x_tail_i = (float *) x_tail;
2582 
2583     ld = n;
2584     if (order == blas_colmajor) {
2585       inca1i = incai = 1;
2586       incyi = incy;
2587       incaij = lda;
2588       incxi = incx;
2589       inca1ij = n;
2590     } else {
2591       incyi = incy;
2592       incai = lda;
2593       incxi = incx;
2594       inca1i = n;
2595       inca1ij = incaij = 1;
2596     }
2597     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
2598     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
2599     incri = 1;
2600     incri *= 2;
2601     xi0 *= 2;
2602     yi0 *= 2;
2603     incyi *= 2;
2604     incai *= 2;
2605     incaij *= 2;
2606     incxi *= 2;
2607 
2608     inca_vec = incx_vec = 1;
2609     inca_vec *= 2;
2610     incx_vec *= 2;
2611     a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2612     if (n > 0 && a_vec == NULL) {
2613       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2614     }
2615     x_head_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2616     if (n > 0 && x_head_vec == NULL) {
2617       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2618     }
2619     x_tail_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2620     if (n > 0 && x_tail_vec == NULL) {
2621       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2622     }
2623 
2624     int incx0, incy1, incy2, incmi = 1;
2625     a1 = (float *) blas_malloc(n * n * sizeof(float));
2626     if (n * n > 0 && a1 == NULL) {
2627       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2628     }
2629     a2 = (float *) blas_malloc(n * n * sizeof(float));
2630     if (n * n > 0 && a2 == NULL) {
2631       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2632     }
2633     y1 = (double *) blas_malloc(n * sizeof(double));
2634     if (n > 0 && y1 == NULL) {
2635       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2636     }
2637     incy1 = 1;
2638 
2639     y2 = (double *) blas_malloc(n * sizeof(double));
2640     if (n > 0 && y2 == NULL) {
2641       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2642     }
2643     incy2 = 1;
2644 
2645     x_head_0 = (float *) blas_malloc(n * sizeof(float));
2646     if (n > 0 && x_head_0 == NULL) {
2647       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2648     }
2649     x_tail_0 = (float *) blas_malloc(n * sizeof(float));
2650     if (n > 0 && x_tail_0 == NULL) {
2651       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2652     }
2653     incx0 = 1;
2654 
2655     head_r1_true = (double *) blas_malloc(n * sizeof(double));
2656     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
2657     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2658       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2659     };
2660     head_r2_true = (double *) blas_malloc(n * sizeof(double));
2661     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
2662     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2663       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2664     };
2665 
2666     /* First generate the real portion of A and x. */
2667     BLAS_dsymv2_s_s_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
2668 			    x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
2669 			    head_r1_true, tail_r1_true);
2670 
2671     /* x0 is now fixed and is input to this call */
2672     BLAS_dskmv2_testgen_s_s(norm, order, uplo, n, alpha_i, beta_i, a2, n,
2673 			    x_head_0, x_tail_0, y2, seed, head_r2_true,
2674 			    tail_r2_true);
2675 
2676 
2677     /* The case where x is a complex vector.  Since x is generated
2678        as a real vector, we need to perform some scaling.
2679 
2680        There are four cases to consider, depending on the values
2681        of alpha and beta.
2682 
2683        values                         scaling
2684        alpha   beta      alpha  A    x       beta    y    r (truth)
2685        0    1      1                    i               i    i
2686        1    1      ?                   1+i      1+i         1+i
2687        2    ?      1         1+i       1+i             2i    2i
2688        3    ?      ?         1+i       1+i      2i           2i
2689 
2690        Note that we can afford to scale r by 1+i, since they are
2691        computed in double-double precision.
2692      */
2693 
2694     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2695       ab = 0;
2696       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
2697     } else if (alpha_i[0] == 1.0) {
2698       ab = 1;
2699       /* set alpha to 1, multiply beta by 1+i. */
2700       alpha_i[1] = 0.0;
2701       beta_i[1] = beta_i[0];
2702     } else if (beta_i[0] == 1.0) {
2703       ab = 2;
2704       /* set beta to 1, multiply alpha by 1+i. */
2705       beta_i[1] = 0.0;
2706       alpha_i[1] = alpha_i[0];
2707     } else {
2708       ab = 3;
2709       /* multiply alpha by 1+i, beta by 2i. */
2710       alpha_i[1] = alpha_i[0];
2711       beta_i[1] = 2.0 * beta_i[0];
2712       beta_i[0] = 0.0;
2713     }
2714 
2715     /* Now fill in a */
2716     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
2717       for (j = 0, aij = ai, a1ij = a1i; j < n;
2718 	   j++, aij += incaij, a1ij += inca1ij) {
2719 	a_i[aij] = a1[a1ij];
2720 	a_i[aij + 1] = a2[a1ij];
2721       }
2722     }
2723 
2724     /* Fill in x */
2725     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
2726       if (ab == 0) {
2727 	x_head_i[xi] = 0.0;
2728 	x_head_i[xi + 1] = x_head_0[mi];
2729 	x_tail_i[xi] = 0.0;
2730 	x_tail_i[xi + 1] = x_tail_0[mi];
2731       } else {
2732 	x_head_i[xi] = x_head_0[mi];
2733 	x_head_i[xi + 1] = x_head_0[mi];
2734 	x_tail_i[xi] = x_tail_0[mi];
2735 	x_tail_i[xi + 1] = x_tail_0[mi];
2736       }
2737     }
2738 
2739     /* Fill in y */
2740     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
2741       if (ab == 0) {
2742 	y_i[yi] = -y2[mi];
2743 	y_i[yi + 1] = y1[mi];
2744       } else if (ab == 2) {
2745 	y_i[yi] = -2.0 * y2[mi];
2746 	y_i[yi + 1] = 2.0 * y1[mi];
2747       } else {
2748 	y_i[yi] = y1[mi];
2749 	y_i[yi + 1] = y2[mi];
2750       }
2751     }
2752 
2753     /* Fill in the truth */
2754     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
2755 
2756       head_r_elem1 = head_r1_true[mi];
2757       tail_r_elem1 = tail_r1_true[mi];
2758       head_r_elem2 = head_r2_true[mi];
2759       tail_r_elem2 = tail_r2_true[mi];
2760 
2761       if (ab == 0) {
2762 	head_r_true[ri] = -head_r_elem2;
2763 	tail_r_true[ri] = -tail_r_elem2;
2764 	head_r_true[ri + 1] = head_r_elem1;
2765 	tail_r_true[ri + 1] = tail_r_elem1;
2766       } else if (ab == 1) {
2767 	{
2768 	  /* Compute double-double = double-double + double-double. */
2769 	  double bv;
2770 	  double s1, s2, t1, t2;
2771 
2772 	  /* Add two hi words. */
2773 	  s1 = head_r_elem1 + head_r_elem2;
2774 	  bv = s1 - head_r_elem1;
2775 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2776 
2777 	  /* Add two lo words. */
2778 	  t1 = tail_r_elem1 + tail_r_elem2;
2779 	  bv = t1 - tail_r_elem1;
2780 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2781 
2782 	  s2 += t1;
2783 
2784 	  /* Renormalize (s1, s2)  to  (t1, s2) */
2785 	  t1 = s1 + s2;
2786 	  s2 = s2 - (t1 - s1);
2787 
2788 	  t2 += s2;
2789 
2790 	  /* Renormalize (t1, t2)  */
2791 	  head_r_elem = t1 + t2;
2792 	  tail_r_elem = t2 - (head_r_elem - t1);
2793 	}
2794 
2795 	/* Set the imaginary part to  R1 + R2 */
2796 	tail_r_true[ri + 1] = tail_r_elem;
2797 	head_r_true[ri + 1] = head_r_elem;
2798 
2799 	/* Set the real part to R1 - R2. */
2800 	{
2801 	  double head_bt, tail_bt;
2802 	  head_bt = -head_r_elem2;
2803 	  tail_bt = -tail_r_elem2;
2804 	  {
2805 	    /* Compute double-double = double-double + double-double. */
2806 	    double bv;
2807 	    double s1, s2, t1, t2;
2808 
2809 	    /* Add two hi words. */
2810 	    s1 = head_r_elem1 + head_bt;
2811 	    bv = s1 - head_r_elem1;
2812 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2813 
2814 	    /* Add two lo words. */
2815 	    t1 = tail_r_elem1 + tail_bt;
2816 	    bv = t1 - tail_r_elem1;
2817 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2818 
2819 	    s2 += t1;
2820 
2821 	    /* Renormalize (s1, s2)  to  (t1, s2) */
2822 	    t1 = s1 + s2;
2823 	    s2 = s2 - (t1 - s1);
2824 
2825 	    t2 += s2;
2826 
2827 	    /* Renormalize (t1, t2)  */
2828 	    head_r_elem = t1 + t2;
2829 	    tail_r_elem = t2 - (head_r_elem - t1);
2830 	  }
2831 	}
2832 	tail_r_true[ri] = tail_r_elem;
2833 	head_r_true[ri] = head_r_elem;
2834       } else {
2835 	/* Real part */
2836 	{
2837 	  /* Compute double-double = double-double * double. */
2838 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2839 
2840 	  con = head_r_elem2 * split;
2841 	  a11 = con - head_r_elem2;
2842 	  a11 = con - a11;
2843 	  a21 = head_r_elem2 - a11;
2844 	  con = -2.0 * split;
2845 	  b1 = con - -2.0;
2846 	  b1 = con - b1;
2847 	  b2 = -2.0 - b1;
2848 
2849 	  c11 = head_r_elem2 * -2.0;
2850 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2851 
2852 	  c2 = tail_r_elem2 * -2.0;
2853 	  t1 = c11 + c2;
2854 	  t2 = (c2 - (t1 - c11)) + c21;
2855 
2856 	  head_r_elem = t1 + t2;
2857 	  tail_r_elem = t2 - (head_r_elem - t1);
2858 	}
2859 	head_r_true[ri] = head_r_elem;
2860 	tail_r_true[ri] = tail_r_elem;
2861 
2862 	/* Imaginary Part */
2863 	{
2864 	  /* Compute double-double = double-double * double. */
2865 	  double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2866 
2867 	  con = head_r_elem1 * split;
2868 	  a11 = con - head_r_elem1;
2869 	  a11 = con - a11;
2870 	  a21 = head_r_elem1 - a11;
2871 	  con = 2.0 * split;
2872 	  b1 = con - 2.0;
2873 	  b1 = con - b1;
2874 	  b2 = 2.0 - b1;
2875 
2876 	  c11 = head_r_elem1 * 2.0;
2877 	  c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2878 
2879 	  c2 = tail_r_elem1 * 2.0;
2880 	  t1 = c11 + c2;
2881 	  t2 = (c2 - (t1 - c11)) + c21;
2882 
2883 	  head_r_elem = t1 + t2;
2884 	  tail_r_elem = t2 - (head_r_elem - t1);
2885 	}
2886 	head_r_true[ri + 1] = head_r_elem;
2887 	tail_r_true[ri + 1] = tail_r_elem;
2888       }
2889     }
2890 
2891 
2892     blas_free(a1);
2893     blas_free(a2);
2894     blas_free(y1);
2895     blas_free(y2);
2896     blas_free(x_head_0);
2897     blas_free(x_tail_0);
2898     blas_free(head_r1_true);
2899     blas_free(tail_r1_true);
2900     blas_free(head_r2_true);
2901     blas_free(tail_r2_true);
2902     blas_free(a_vec);
2903     blas_free(x_head_vec);
2904     blas_free(x_tail_vec);
2905   }
2906 }
2907 
BLAS_zhemv2_z_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,double * x_head,double * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)2908 void BLAS_zhemv2_z_d_testgen(int norm, enum blas_order_type order,
2909 			     enum blas_uplo_type uplo, int n, void *alpha,
2910 			     int alpha_flag, void *beta, int beta_flag,
2911 			     void *a, int lda, double *x_head, double *x_tail,
2912 			     void *y, int *seed, double *head_r_true,
2913 			     double *tail_r_true)
2914 
2915 /*
2916  * Purpose
2917  * =======
2918  *
2919  *   Generates the test inputs to BLAS_zhemv_z_d{_x}
2920  *
2921  * Arguments
2922  * =========
2923  *
2924  * norm    (input) int
2925  *           = -1: the vectors are scaled with norms near underflow.
2926  *           =  0: the vectors have norms of order 1.
2927  *           =  1: the vectors are scaled with norms near overflow.
2928  *
2929  * order   (input) enum blas_order_type
2930  *           storage format of the matrices
2931  *
2932  * uplo    (input) enum blas_uplo_type
2933  *           which half of the hemv2etric matrix a is to be stored.
2934  *
2935  * n       (input) int
2936  *           sizes of symmetrical matrix a, size of vectors x, y:
2937  *              matrix a is n-by-n.
2938  *
2939  * alpha   (input/output) void*
2940  *           if alpha_flag = 1, alpha is input.
2941  *           if alpha_flag = 0, alpha is output.
2942  *
2943  * alpha_flag (input) int
2944  *           = 0: alpha is free, and is output.
2945  *           = 1: alpha is fixed on input.
2946  *
2947  * beta    (input/output) void*
2948  *           if beta_flag = 1, beta is input.
2949  *           if beta_flag = 0, beta is output.
2950  *
2951  * beta_flag (input) int
2952  *           = 0: beta is free, and is output.
2953  *           = 1: beta is fixed on input.
2954  *
2955  * a       (input/output) void*
2956  *
2957  * lda     (input) lda
2958  *         leading dimension of matrix A.
2959  *
2960  * x_head  (input/output) double*
2961  * x_tail  (input/output) double*
2962  *         vector x = (x_head + x_tail)
2963  *
2964  * y       (input/output) void*
2965  *         generated vector y that will be used as an input to HEMV2.
2966  *
2967  * seed    (input/output) int *
2968  *         seed for the random number generator.
2969  *
2970  * head_r_true  (output) double *
2971  *         the leading part of the truth in double-double.
2972  *
2973  * tail_r_true  (output) double *
2974  *         the trailing part of the truth in double-double
2975  *
2976  */
2977 {
2978   {
2979 
2980     /* Strategy:
2981        r1 = alpha * A1 * x + beta * y1
2982        r2 = alpha * A2 * x + beta * y2
2983        where all the matrices and vectors are real.  Then let R = R1 + i R2,
2984        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
2985        and A2 is skew-symmetric.
2986      */
2987 
2988     int i, j;
2989     int yi;
2990     int aij, ai;
2991     int a1ij, a1i;
2992     int xi;
2993     int mi;
2994     int incx = 1, incy = 1;
2995     int incyi, xi0, yi0;
2996     int incaij, incai;
2997     int inca1ij, inca1i;
2998     int incxi;
2999     int inca_vec, incx_vec;
3000     int ld;
3001     int ab;
3002     int ri, incri;
3003 
3004     double *a1;
3005     double *a2;
3006     double *y1;
3007     double *y2;
3008     double *x_head_0;
3009     double *x_tail_0;
3010 
3011     double *head_r1_true, *tail_r1_true;
3012     double *head_r2_true, *tail_r2_true;
3013 
3014     double head_r_elem1, tail_r_elem1;
3015     double head_r_elem2, tail_r_elem2;
3016     double head_r_elem, tail_r_elem;
3017 
3018     double *a_vec;
3019     double *x_head_vec;
3020     double *x_tail_vec;
3021 
3022     double *y_i = (double *) y;
3023     double *alpha_i = (double *) alpha;
3024     double *beta_i = (double *) beta;
3025 
3026     double *a_i = (double *) a;
3027     double *x_head_i = x_head;
3028     double *x_tail_i = x_tail;
3029 
3030     ld = n;
3031     if (order == blas_colmajor) {
3032       inca1i = incai = 1;
3033       incyi = incy;
3034       incaij = lda;
3035       incxi = incx;
3036       inca1ij = n;
3037     } else {
3038       incyi = incy;
3039       incai = lda;
3040       incxi = incx;
3041       inca1i = n;
3042       inca1ij = incaij = 1;
3043     }
3044     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
3045     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
3046     incri = 1;
3047     incri *= 2;
3048 
3049     yi0 *= 2;
3050     incyi *= 2;
3051     incai *= 2;
3052     incaij *= 2;
3053 
3054 
3055     inca_vec = incx_vec = 1;
3056     inca_vec *= 2;
3057 
3058     a_vec = (double *) blas_malloc(n * sizeof(double) * 2);
3059     if (n > 0 && a_vec == NULL) {
3060       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3061     }
3062     x_head_vec = (double *) blas_malloc(n * sizeof(double));
3063     if (n > 0 && x_head_vec == NULL) {
3064       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3065     }
3066     x_tail_vec = (double *) blas_malloc(n * sizeof(double));
3067     if (n > 0 && x_tail_vec == NULL) {
3068       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3069     }
3070 
3071     int incx0, incy1, incy2, incmi = 1;
3072     a1 = (double *) blas_malloc(n * n * sizeof(double));
3073     if (n * n > 0 && a1 == NULL) {
3074       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3075     }
3076     a2 = (double *) blas_malloc(n * n * sizeof(double));
3077     if (n * n > 0 && a2 == NULL) {
3078       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3079     }
3080     y1 = (double *) blas_malloc(n * sizeof(double));
3081     if (n > 0 && y1 == NULL) {
3082       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3083     }
3084     incy1 = 1;
3085 
3086     y2 = (double *) blas_malloc(n * sizeof(double));
3087     if (n > 0 && y2 == NULL) {
3088       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3089     }
3090     incy2 = 1;
3091 
3092     x_head_0 = (double *) blas_malloc(n * sizeof(double));
3093     if (n > 0 && x_head_0 == NULL) {
3094       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3095     }
3096     x_tail_0 = (double *) blas_malloc(n * sizeof(double));
3097     if (n > 0 && x_tail_0 == NULL) {
3098       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3099     }
3100     incx0 = 1;
3101 
3102     head_r1_true = (double *) blas_malloc(n * sizeof(double));
3103     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
3104     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3105       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3106     };
3107     head_r2_true = (double *) blas_malloc(n * sizeof(double));
3108     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
3109     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3110       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3111     };
3112 
3113     /* First generate the real portion of A and x. */
3114     BLAS_dsymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
3115 			x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
3116 			head_r1_true, tail_r1_true);
3117 
3118     /* x0 is now fixed and is input to this call */
3119     BLAS_dskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
3120 			x_head_0, x_tail_0, y2, seed, head_r2_true,
3121 			tail_r2_true);
3122 
3123 
3124     /* The case where x is a real vector.
3125 
3126        There are four cases to consider, depending on the
3127        values of alpha and beta.
3128 
3129        values                             scaling
3130        alpha  beta         alpha    A    x    beta    y     r (truth)
3131        0    1      1
3132        1    1      ?                              -i     i
3133        2    ?      1            i                        i     i
3134        3    ?      ?           1+i                1+i         1+i
3135 
3136        Note that we can afford to scale truth by (1+i) since they
3137        are computed in double-double.
3138      */
3139 
3140     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3141       ab = 0;
3142       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
3143     } else if (alpha_i[0] == 1.0) {
3144       ab = 1;
3145       /* set alpha to 1, multiply beta by -i. */
3146       alpha_i[1] = 0.0;
3147       beta_i[1] = -beta_i[0];
3148       beta_i[0] = 0.0;
3149     } else if (beta_i[0] == 1.0) {
3150       ab = 2;
3151       /* set beta to 1, multiply alpha by i. */
3152       beta_i[1] = 0.0;
3153       alpha_i[1] = alpha_i[0];
3154       alpha_i[0] = 0.0;
3155     } else {
3156       ab = 3;
3157       /* multiply alpha, beta by (1 + i). */
3158       alpha_i[1] = alpha_i[0];
3159       beta_i[1] = beta_i[0];
3160     }
3161 
3162     /* Now fill in a */
3163     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
3164       for (j = 0, aij = ai, a1ij = a1i; j < n;
3165 	   j++, aij += incaij, a1ij += inca1ij) {
3166 	a_i[aij] = a1[a1ij];
3167 	a_i[aij + 1] = a2[a1ij];
3168       }
3169     }
3170 
3171     /* Fill in x */
3172     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
3173       x_head_i[xi] = x_head_0[mi];
3174       x_tail_i[xi] = x_tail_0[mi];
3175     }
3176 
3177     /* Fill in y */
3178     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
3179       if (ab == 1 || ab == 2) {
3180 	y_i[yi] = -y2[mi];
3181 	y_i[yi + 1] = y1[mi];
3182       } else {
3183 	y_i[yi] = y1[mi];
3184 	y_i[yi + 1] = y2[mi];
3185       }
3186     }
3187 
3188     /* Fill in truth */
3189     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
3190       if (ab == 0 || ab == 1) {
3191 	head_r_true[ri] = head_r1_true[mi];
3192 	tail_r_true[ri] = tail_r1_true[mi];
3193 	head_r_true[ri + 1] = head_r2_true[mi];
3194 	tail_r_true[ri + 1] = tail_r2_true[mi];
3195       } else if (ab == 2) {
3196 	head_r_true[ri] = -head_r2_true[mi];
3197 	tail_r_true[ri] = -tail_r2_true[mi];
3198 	head_r_true[ri + 1] = head_r1_true[mi];
3199 	tail_r_true[ri + 1] = tail_r1_true[mi];
3200       } else {
3201 	head_r_elem1 = head_r1_true[mi];
3202 	tail_r_elem1 = tail_r1_true[mi];
3203 
3204 	head_r_elem2 = head_r2_true[mi];
3205 	tail_r_elem2 = tail_r2_true[mi];
3206 
3207 	{
3208 	  /* Compute double-double = double-double + double-double. */
3209 	  double bv;
3210 	  double s1, s2, t1, t2;
3211 
3212 	  /* Add two hi words. */
3213 	  s1 = head_r_elem1 + head_r_elem2;
3214 	  bv = s1 - head_r_elem1;
3215 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3216 
3217 	  /* Add two lo words. */
3218 	  t1 = tail_r_elem1 + tail_r_elem2;
3219 	  bv = t1 - tail_r_elem1;
3220 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3221 
3222 	  s2 += t1;
3223 
3224 	  /* Renormalize (s1, s2)  to  (t1, s2) */
3225 	  t1 = s1 + s2;
3226 	  s2 = s2 - (t1 - s1);
3227 
3228 	  t2 += s2;
3229 
3230 	  /* Renormalize (t1, t2)  */
3231 	  head_r_elem = t1 + t2;
3232 	  tail_r_elem = t2 - (head_r_elem - t1);
3233 	}
3234 
3235 	/* Set the imaginary part to  r1 + r2 */
3236 	tail_r_true[ri + 1] = tail_r_elem;
3237 	head_r_true[ri + 1] = head_r_elem;
3238 
3239 	/* Set the real part to r1 - r2. */
3240 	{
3241 	  double head_bt, tail_bt;
3242 	  head_bt = -head_r_elem2;
3243 	  tail_bt = -tail_r_elem2;
3244 	  {
3245 	    /* Compute double-double = double-double + double-double. */
3246 	    double bv;
3247 	    double s1, s2, t1, t2;
3248 
3249 	    /* Add two hi words. */
3250 	    s1 = head_r_elem1 + head_bt;
3251 	    bv = s1 - head_r_elem1;
3252 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3253 
3254 	    /* Add two lo words. */
3255 	    t1 = tail_r_elem1 + tail_bt;
3256 	    bv = t1 - tail_r_elem1;
3257 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3258 
3259 	    s2 += t1;
3260 
3261 	    /* Renormalize (s1, s2)  to  (t1, s2) */
3262 	    t1 = s1 + s2;
3263 	    s2 = s2 - (t1 - s1);
3264 
3265 	    t2 += s2;
3266 
3267 	    /* Renormalize (t1, t2)  */
3268 	    head_r_elem = t1 + t2;
3269 	    tail_r_elem = t2 - (head_r_elem - t1);
3270 	  }
3271 	}
3272 	tail_r_true[ri] = tail_r_elem;
3273 	head_r_true[ri] = head_r_elem;
3274       }
3275     }
3276 
3277 
3278     blas_free(a1);
3279     blas_free(a2);
3280     blas_free(y1);
3281     blas_free(y2);
3282     blas_free(x_head_0);
3283     blas_free(x_tail_0);
3284     blas_free(head_r1_true);
3285     blas_free(tail_r1_true);
3286     blas_free(head_r2_true);
3287     blas_free(tail_r2_true);
3288     blas_free(a_vec);
3289     blas_free(x_head_vec);
3290     blas_free(x_tail_vec);
3291   }
3292 }
3293 
BLAS_chemv2_c_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,float * x_head,float * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)3294 void BLAS_chemv2_c_s_testgen(int norm, enum blas_order_type order,
3295 			     enum blas_uplo_type uplo, int n, void *alpha,
3296 			     int alpha_flag, void *beta, int beta_flag,
3297 			     void *a, int lda, float *x_head, float *x_tail,
3298 			     void *y, int *seed, double *head_r_true,
3299 			     double *tail_r_true)
3300 
3301 /*
3302  * Purpose
3303  * =======
3304  *
3305  *   Generates the test inputs to BLAS_chemv_c_s{_x}
3306  *
3307  * Arguments
3308  * =========
3309  *
3310  * norm    (input) int
3311  *           = -1: the vectors are scaled with norms near underflow.
3312  *           =  0: the vectors have norms of order 1.
3313  *           =  1: the vectors are scaled with norms near overflow.
3314  *
3315  * order   (input) enum blas_order_type
3316  *           storage format of the matrices
3317  *
3318  * uplo    (input) enum blas_uplo_type
3319  *           which half of the hemv2etric matrix a is to be stored.
3320  *
3321  * n       (input) int
3322  *           sizes of symmetrical matrix a, size of vectors x, y:
3323  *              matrix a is n-by-n.
3324  *
3325  * alpha   (input/output) void*
3326  *           if alpha_flag = 1, alpha is input.
3327  *           if alpha_flag = 0, alpha is output.
3328  *
3329  * alpha_flag (input) int
3330  *           = 0: alpha is free, and is output.
3331  *           = 1: alpha is fixed on input.
3332  *
3333  * beta    (input/output) void*
3334  *           if beta_flag = 1, beta is input.
3335  *           if beta_flag = 0, beta is output.
3336  *
3337  * beta_flag (input) int
3338  *           = 0: beta is free, and is output.
3339  *           = 1: beta is fixed on input.
3340  *
3341  * a       (input/output) void*
3342  *
3343  * lda     (input) lda
3344  *         leading dimension of matrix A.
3345  *
3346  * x_head  (input/output) float*
3347  * x_tail  (input/output) float*
3348  *         vector x = (x_head + x_tail)
3349  *
3350  * y       (input/output) void*
3351  *         generated vector y that will be used as an input to HEMV2.
3352  *
3353  * seed    (input/output) int *
3354  *         seed for the random number generator.
3355  *
3356  * head_r_true  (output) double *
3357  *         the leading part of the truth in double-double.
3358  *
3359  * tail_r_true  (output) double *
3360  *         the trailing part of the truth in double-double
3361  *
3362  */
3363 {
3364   {
3365 
3366     /* Strategy:
3367        r1 = alpha * A1 * x + beta * y1
3368        r2 = alpha * A2 * x + beta * y2
3369        where all the matrices and vectors are real.  Then let R = R1 + i R2,
3370        A = A1 + i A2, y = y1 + i y2.  To make A Hermitian, A1 is symmetric,
3371        and A2 is skew-symmetric.
3372      */
3373 
3374     int i, j;
3375     int yi;
3376     int aij, ai;
3377     int a1ij, a1i;
3378     int xi;
3379     int mi;
3380     int incx = 1, incy = 1;
3381     int incyi, xi0, yi0;
3382     int incaij, incai;
3383     int inca1ij, inca1i;
3384     int incxi;
3385     int inca_vec, incx_vec;
3386     int ld;
3387     int ab;
3388     int ri, incri;
3389 
3390     float *a1;
3391     float *a2;
3392     float *y1;
3393     float *y2;
3394     float *x_head_0;
3395     float *x_tail_0;
3396 
3397     double *head_r1_true, *tail_r1_true;
3398     double *head_r2_true, *tail_r2_true;
3399 
3400     double head_r_elem1, tail_r_elem1;
3401     double head_r_elem2, tail_r_elem2;
3402     double head_r_elem, tail_r_elem;
3403 
3404     float *a_vec;
3405     float *x_head_vec;
3406     float *x_tail_vec;
3407 
3408     float *y_i = (float *) y;
3409     float *alpha_i = (float *) alpha;
3410     float *beta_i = (float *) beta;
3411 
3412     float *a_i = (float *) a;
3413     float *x_head_i = x_head;
3414     float *x_tail_i = x_tail;
3415 
3416     ld = n;
3417     if (order == blas_colmajor) {
3418       inca1i = incai = 1;
3419       incyi = incy;
3420       incaij = lda;
3421       incxi = incx;
3422       inca1ij = n;
3423     } else {
3424       incyi = incy;
3425       incai = lda;
3426       incxi = incx;
3427       inca1i = n;
3428       inca1ij = incaij = 1;
3429     }
3430     xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
3431     yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
3432     incri = 1;
3433     incri *= 2;
3434 
3435     yi0 *= 2;
3436     incyi *= 2;
3437     incai *= 2;
3438     incaij *= 2;
3439 
3440 
3441     inca_vec = incx_vec = 1;
3442     inca_vec *= 2;
3443 
3444     a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
3445     if (n > 0 && a_vec == NULL) {
3446       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3447     }
3448     x_head_vec = (float *) blas_malloc(n * sizeof(float));
3449     if (n > 0 && x_head_vec == NULL) {
3450       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3451     }
3452     x_tail_vec = (float *) blas_malloc(n * sizeof(float));
3453     if (n > 0 && x_tail_vec == NULL) {
3454       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3455     }
3456 
3457     int incx0, incy1, incy2, incmi = 1;
3458     a1 = (float *) blas_malloc(n * n * sizeof(float));
3459     if (n * n > 0 && a1 == NULL) {
3460       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3461     }
3462     a2 = (float *) blas_malloc(n * n * sizeof(float));
3463     if (n * n > 0 && a2 == NULL) {
3464       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3465     }
3466     y1 = (float *) blas_malloc(n * sizeof(float));
3467     if (n > 0 && y1 == NULL) {
3468       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3469     }
3470     incy1 = 1;
3471 
3472     y2 = (float *) blas_malloc(n * sizeof(float));
3473     if (n > 0 && y2 == NULL) {
3474       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3475     }
3476     incy2 = 1;
3477 
3478     x_head_0 = (float *) blas_malloc(n * sizeof(float));
3479     if (n > 0 && x_head_0 == NULL) {
3480       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3481     }
3482     x_tail_0 = (float *) blas_malloc(n * sizeof(float));
3483     if (n > 0 && x_tail_0 == NULL) {
3484       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3485     }
3486     incx0 = 1;
3487 
3488     head_r1_true = (double *) blas_malloc(n * sizeof(double));
3489     tail_r1_true = (double *) blas_malloc(n * sizeof(double));
3490     if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3491       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3492     };
3493     head_r2_true = (double *) blas_malloc(n * sizeof(double));
3494     tail_r2_true = (double *) blas_malloc(n * sizeof(double));
3495     if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3496       BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3497     };
3498 
3499     /* First generate the real portion of A and x. */
3500     BLAS_ssymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
3501 			x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
3502 			head_r1_true, tail_r1_true);
3503 
3504     /* x0 is now fixed and is input to this call */
3505     BLAS_sskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
3506 			x_head_0, x_tail_0, y2, seed, head_r2_true,
3507 			tail_r2_true);
3508 
3509 
3510     /* The case where x is a real vector.
3511 
3512        There are four cases to consider, depending on the
3513        values of alpha and beta.
3514 
3515        values                             scaling
3516        alpha  beta         alpha    A    x    beta    y     r (truth)
3517        0    1      1
3518        1    1      ?                              -i     i
3519        2    ?      1            i                        i     i
3520        3    ?      ?           1+i                1+i         1+i
3521 
3522        Note that we can afford to scale truth by (1+i) since they
3523        are computed in double-double.
3524      */
3525 
3526     if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3527       ab = 0;
3528       alpha_i[1] = beta_i[1] = 0.0;	/* set alpha, beta to be 1. */
3529     } else if (alpha_i[0] == 1.0) {
3530       ab = 1;
3531       /* set alpha to 1, multiply beta by -i. */
3532       alpha_i[1] = 0.0;
3533       beta_i[1] = -beta_i[0];
3534       beta_i[0] = 0.0;
3535     } else if (beta_i[0] == 1.0) {
3536       ab = 2;
3537       /* set beta to 1, multiply alpha by i. */
3538       beta_i[1] = 0.0;
3539       alpha_i[1] = alpha_i[0];
3540       alpha_i[0] = 0.0;
3541     } else {
3542       ab = 3;
3543       /* multiply alpha, beta by (1 + i). */
3544       alpha_i[1] = alpha_i[0];
3545       beta_i[1] = beta_i[0];
3546     }
3547 
3548     /* Now fill in a */
3549     for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
3550       for (j = 0, aij = ai, a1ij = a1i; j < n;
3551 	   j++, aij += incaij, a1ij += inca1ij) {
3552 	a_i[aij] = a1[a1ij];
3553 	a_i[aij + 1] = a2[a1ij];
3554       }
3555     }
3556 
3557     /* Fill in x */
3558     for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
3559       x_head_i[xi] = x_head_0[mi];
3560       x_tail_i[xi] = x_tail_0[mi];
3561     }
3562 
3563     /* Fill in y */
3564     for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
3565       if (ab == 1 || ab == 2) {
3566 	y_i[yi] = -y2[mi];
3567 	y_i[yi + 1] = y1[mi];
3568       } else {
3569 	y_i[yi] = y1[mi];
3570 	y_i[yi + 1] = y2[mi];
3571       }
3572     }
3573 
3574     /* Fill in truth */
3575     for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
3576       if (ab == 0 || ab == 1) {
3577 	head_r_true[ri] = head_r1_true[mi];
3578 	tail_r_true[ri] = tail_r1_true[mi];
3579 	head_r_true[ri + 1] = head_r2_true[mi];
3580 	tail_r_true[ri + 1] = tail_r2_true[mi];
3581       } else if (ab == 2) {
3582 	head_r_true[ri] = -head_r2_true[mi];
3583 	tail_r_true[ri] = -tail_r2_true[mi];
3584 	head_r_true[ri + 1] = head_r1_true[mi];
3585 	tail_r_true[ri + 1] = tail_r1_true[mi];
3586       } else {
3587 	head_r_elem1 = head_r1_true[mi];
3588 	tail_r_elem1 = tail_r1_true[mi];
3589 
3590 	head_r_elem2 = head_r2_true[mi];
3591 	tail_r_elem2 = tail_r2_true[mi];
3592 
3593 	{
3594 	  /* Compute double-double = double-double + double-double. */
3595 	  double bv;
3596 	  double s1, s2, t1, t2;
3597 
3598 	  /* Add two hi words. */
3599 	  s1 = head_r_elem1 + head_r_elem2;
3600 	  bv = s1 - head_r_elem1;
3601 	  s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3602 
3603 	  /* Add two lo words. */
3604 	  t1 = tail_r_elem1 + tail_r_elem2;
3605 	  bv = t1 - tail_r_elem1;
3606 	  t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3607 
3608 	  s2 += t1;
3609 
3610 	  /* Renormalize (s1, s2)  to  (t1, s2) */
3611 	  t1 = s1 + s2;
3612 	  s2 = s2 - (t1 - s1);
3613 
3614 	  t2 += s2;
3615 
3616 	  /* Renormalize (t1, t2)  */
3617 	  head_r_elem = t1 + t2;
3618 	  tail_r_elem = t2 - (head_r_elem - t1);
3619 	}
3620 
3621 	/* Set the imaginary part to  r1 + r2 */
3622 	tail_r_true[ri + 1] = tail_r_elem;
3623 	head_r_true[ri + 1] = head_r_elem;
3624 
3625 	/* Set the real part to r1 - r2. */
3626 	{
3627 	  double head_bt, tail_bt;
3628 	  head_bt = -head_r_elem2;
3629 	  tail_bt = -tail_r_elem2;
3630 	  {
3631 	    /* Compute double-double = double-double + double-double. */
3632 	    double bv;
3633 	    double s1, s2, t1, t2;
3634 
3635 	    /* Add two hi words. */
3636 	    s1 = head_r_elem1 + head_bt;
3637 	    bv = s1 - head_r_elem1;
3638 	    s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3639 
3640 	    /* Add two lo words. */
3641 	    t1 = tail_r_elem1 + tail_bt;
3642 	    bv = t1 - tail_r_elem1;
3643 	    t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3644 
3645 	    s2 += t1;
3646 
3647 	    /* Renormalize (s1, s2)  to  (t1, s2) */
3648 	    t1 = s1 + s2;
3649 	    s2 = s2 - (t1 - s1);
3650 
3651 	    t2 += s2;
3652 
3653 	    /* Renormalize (t1, t2)  */
3654 	    head_r_elem = t1 + t2;
3655 	    tail_r_elem = t2 - (head_r_elem - t1);
3656 	  }
3657 	}
3658 	tail_r_true[ri] = tail_r_elem;
3659 	head_r_true[ri] = head_r_elem;
3660       }
3661     }
3662 
3663 
3664     blas_free(a1);
3665     blas_free(a2);
3666     blas_free(y1);
3667     blas_free(y2);
3668     blas_free(x_head_0);
3669     blas_free(x_tail_0);
3670     blas_free(head_r1_true);
3671     blas_free(tail_r1_true);
3672     blas_free(head_r2_true);
3673     blas_free(tail_r2_true);
3674     blas_free(a_vec);
3675     blas_free(x_head_vec);
3676     blas_free(x_tail_vec);
3677   }
3678 }
3679