1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
BLAS_sskew_testgen_hemv(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,float * alpha,float * beta,float * a,int lda,float * x,int incx,float * y,int incy,int * seed,double * head_r_true,double * tail_r_true)8 void BLAS_sskew_testgen_hemv(int norm, enum blas_order_type order,
9 enum blas_uplo_type uplo,
10 int n, int randomize,
11 float *alpha, float *beta,
12 float *a, int lda, float *x, int incx, float *y,
13 int incy, int *seed, double *head_r_true,
14 double *tail_r_true)
15
16 /*
17 * Purpose
18 * =======
19 *
20 * Generates a skew Matrix for use with hemv testing
21 *
22 * Arguments
23 * =========
24 *
25 * norm (input) int
26 * = -1: the vectors are scaled with norms near underflow.
27 * = 0: the vectors have norms of order 1.
28 * = 1: the vectors are scaled with norms near overflow.
29 *
30 * order (input) enum blas_order_type
31 * storage format of the matrices
32 *
33 * uplo (input) enum blas_uplo_type
34 * which half of the skew symmetric matrix a is to be stored.
35 *
36 * n (input) int
37 * sizes of symmetrical matrix a, size of vectors x, y:
38 * matrix a is n-by-n.
39 *
40 * randomize (input) int
41 * if 0, entries in matrices A, x will be chosen for
42 * maximum cancellation, but with less randomness.
43 * if 1, every entry in the matrix A, x will be
44 * random.
45 *
46 * alpha (input/output) float*
47 *
48 * beta (input) float*
49 *
50 * a (input/output) float*
51 *
52 * lda (input) lda
53 * leading dimension of matrix A.
54 *
55 * x (input) float*
56 * note : x is input only. x should be determined before calling
57 * this function.
58 *
59 * incx (input) int
60 * stride of vector x.
61 *
62 * y (input/output) float*
63 * generated vector y.
64 *
65 * incy (input) int
66 * leading dimension of vector y.
67 *
68 * seed (input/output) int *
69 * seed for the random number generator.
70 *
71 * head_r_true (output) double *
72 * the leading part of the truth in double-double.
73 *
74 * tail_r_true (output) double *
75 * the trailing part of the truth in double-double
76 *
77 */
78 {
79
80 int i, j;
81 int yi;
82 int aij, ai, ri;
83 int incyi, incri;
84 int incx_veci, y_starti;
85 int incaij, incai;
86 int inca_vec;
87 int n_i;
88
89 float y_elem;
90 float a_elem;
91 double head_r_true_elem, tail_r_true_elem;
92
93 float *a_vec;
94 float *x_vec;
95
96 float *y_i = y;
97 float *alpha_i = alpha;
98 float *beta_i = beta;
99 float *a_i = a;
100 float *x_i = x;
101
102 n_i = n;
103
104 /*a_vec must have stride of 1 */
105 inca_vec = 1;
106
107
108 a_vec = (float *) blas_malloc(n_i * sizeof(float));
109 if (n_i > 0 && a_vec == NULL) {
110 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
111 }
112 for (i = 0; i < n_i; i += inca_vec) {
113 a_vec[i] = 0.0;
114 }
115 x_vec = (float *) blas_malloc(n_i * sizeof(float));
116 if (n_i > 0 && x_vec == NULL) {
117 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
118 }
119 scopy_vector(x_i, n_i, incx, x_vec, 1);
120
121 incyi = incy;
122
123 if (incyi < 0) {
124 y_starti = (-n + 1) * incyi;
125 } else {
126 y_starti = 0;
127 }
128
129 incri = 1;
130
131
132 incx_veci = 1;
133
134
135 if (randomize == 0) {
136
137 /* Fill in skew matrix A */
138 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, ri += incri, yi += incyi) {
139 /* x_i has already been copied to x_vec */
140 sskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
141 /* skew matricies have zeroed diagonals */
142 a_vec[i] = 0.0;
143 BLAS_sdot_testgen(n_i, i + 1, n_i - i - 1, norm,
144 blas_no_conj, alpha_i, 1,
145 beta_i, 1, x_vec, a_vec, seed,
146 &y_elem, &head_r_true_elem, &tail_r_true_elem);
147
148
149 sskew_commit_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
150
151 /*commits an element to the generated y */
152 y_i[yi] = y_elem;
153 head_r_true[ri] = head_r_true_elem;
154 tail_r_true[ri] = tail_r_true_elem;
155 }
156
157 } else {
158 /*set a randomly */
159
160 if (order == blas_colmajor) {
161 incai = 1;
162 incaij = lda;
163 } else {
164 incai = lda;
165 incaij = 1;
166 }
167
168
169
170
171 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
172 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
173 a_elem = xrand(seed);
174 if (i != j) {
175 a_i[aij] = a_elem;
176 } else {
177 /* skew matricies have zeroed diagonals */
178 a_i[aij] = 0.0;
179 }
180 }
181 }
182
183 /* now compute appropriate y vector */
184
185 /* get x */
186 scopy_vector(x_i, n_i, incx, x_vec, 1);
187
188 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi, ri += incri) {
189 sskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
190
191 BLAS_sdot_testgen(n_i, n_i, 0, norm,
192 blas_no_conj, alpha_i, 1,
193 beta_i, 1, x_vec, a_vec, seed,
194 &y_elem, &head_r_true_elem, &tail_r_true_elem);
195
196 y_i[yi] = y_elem;
197 head_r_true[ri] = head_r_true_elem;
198 tail_r_true[ri] = tail_r_true_elem;
199 }
200 }
201
202 blas_free(a_vec);
203 blas_free(x_vec);
204 }
BLAS_dskew_testgen_hemv(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,double * alpha,double * beta,double * a,int lda,double * x,int incx,double * y,int incy,int * seed,double * head_r_true,double * tail_r_true)205 void BLAS_dskew_testgen_hemv(int norm, enum blas_order_type order,
206 enum blas_uplo_type uplo,
207 int n, int randomize,
208 double *alpha, double *beta,
209 double *a, int lda, double *x, int incx,
210 double *y, int incy, int *seed,
211 double *head_r_true, double *tail_r_true)
212
213 /*
214 * Purpose
215 * =======
216 *
217 * Generates a skew Matrix for use with hemv testing
218 *
219 * Arguments
220 * =========
221 *
222 * norm (input) int
223 * = -1: the vectors are scaled with norms near underflow.
224 * = 0: the vectors have norms of order 1.
225 * = 1: the vectors are scaled with norms near overflow.
226 *
227 * order (input) enum blas_order_type
228 * storage format of the matrices
229 *
230 * uplo (input) enum blas_uplo_type
231 * which half of the skew symmetric matrix a is to be stored.
232 *
233 * n (input) int
234 * sizes of symmetrical matrix a, size of vectors x, y:
235 * matrix a is n-by-n.
236 *
237 * randomize (input) int
238 * if 0, entries in matrices A, x will be chosen for
239 * maximum cancellation, but with less randomness.
240 * if 1, every entry in the matrix A, x will be
241 * random.
242 *
243 * alpha (input/output) double*
244 *
245 * beta (input) double*
246 *
247 * a (input/output) double*
248 *
249 * lda (input) lda
250 * leading dimension of matrix A.
251 *
252 * x (input) double*
253 * note : x is input only. x should be determined before calling
254 * this function.
255 *
256 * incx (input) int
257 * stride of vector x.
258 *
259 * y (input/output) double*
260 * generated vector y.
261 *
262 * incy (input) int
263 * leading dimension of vector y.
264 *
265 * seed (input/output) int *
266 * seed for the random number generator.
267 *
268 * head_r_true (output) double *
269 * the leading part of the truth in double-double.
270 *
271 * tail_r_true (output) double *
272 * the trailing part of the truth in double-double
273 *
274 */
275 {
276
277 int i, j;
278 int yi;
279 int aij, ai, ri;
280 int incyi, incri;
281 int incx_veci, y_starti;
282 int incaij, incai;
283 int inca_vec;
284 int n_i;
285
286 double y_elem;
287 double a_elem;
288 double head_r_true_elem, tail_r_true_elem;
289
290 double *a_vec;
291 double *x_vec;
292
293 double *y_i = y;
294 double *alpha_i = alpha;
295 double *beta_i = beta;
296 double *a_i = a;
297 double *x_i = x;
298
299 n_i = n;
300
301 /*a_vec must have stride of 1 */
302 inca_vec = 1;
303
304
305 a_vec = (double *) blas_malloc(n_i * sizeof(double));
306 if (n_i > 0 && a_vec == NULL) {
307 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
308 }
309 for (i = 0; i < n_i; i += inca_vec) {
310 a_vec[i] = 0.0;
311 }
312 x_vec = (double *) blas_malloc(n_i * sizeof(double));
313 if (n_i > 0 && x_vec == NULL) {
314 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
315 }
316 dcopy_vector(x_i, n_i, incx, x_vec, 1);
317
318 incyi = incy;
319
320 if (incyi < 0) {
321 y_starti = (-n + 1) * incyi;
322 } else {
323 y_starti = 0;
324 }
325
326 incri = 1;
327
328
329 incx_veci = 1;
330
331
332 if (randomize == 0) {
333
334 /* Fill in skew matrix A */
335 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, ri += incri, yi += incyi) {
336 /* x_i has already been copied to x_vec */
337 dskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
338 /* skew matricies have zeroed diagonals */
339 a_vec[i] = 0.0;
340 BLAS_ddot_testgen(n_i, i + 1, n_i - i - 1, norm,
341 blas_no_conj, alpha_i, 1,
342 beta_i, 1, x_vec, a_vec, seed,
343 &y_elem, &head_r_true_elem, &tail_r_true_elem);
344
345
346 dskew_commit_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
347
348 /*commits an element to the generated y */
349 y_i[yi] = y_elem;
350 head_r_true[ri] = head_r_true_elem;
351 tail_r_true[ri] = tail_r_true_elem;
352 }
353
354 } else {
355 /*set a randomly */
356
357 if (order == blas_colmajor) {
358 incai = 1;
359 incaij = lda;
360 } else {
361 incai = lda;
362 incaij = 1;
363 }
364
365
366
367
368 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
369 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
370 a_elem = xrand(seed);
371 if (i != j) {
372 a_i[aij] = a_elem;
373 } else {
374 /* skew matricies have zeroed diagonals */
375 a_i[aij] = 0.0;
376 }
377 }
378 }
379
380 /* now compute appropriate y vector */
381
382 /* get x */
383 dcopy_vector(x_i, n_i, incx, x_vec, 1);
384
385 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi, ri += incri) {
386 dskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
387
388 BLAS_ddot_testgen(n_i, n_i, 0, norm,
389 blas_no_conj, alpha_i, 1,
390 beta_i, 1, x_vec, a_vec, seed,
391 &y_elem, &head_r_true_elem, &tail_r_true_elem);
392
393 y_i[yi] = y_elem;
394 head_r_true[ri] = head_r_true_elem;
395 tail_r_true[ri] = tail_r_true_elem;
396 }
397 }
398
399 blas_free(a_vec);
400 blas_free(x_vec);
401 }
BLAS_dskew_testgen_hemv_d_s(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,double * alpha,double * beta,double * a,int lda,float * x,int incx,double * y,int incy,int * seed,double * head_r_true,double * tail_r_true)402 void BLAS_dskew_testgen_hemv_d_s(int norm, enum blas_order_type order,
403 enum blas_uplo_type uplo,
404 int n, int randomize,
405 double *alpha, double *beta,
406 double *a, int lda, float *x, int incx,
407 double *y, int incy, int *seed,
408 double *head_r_true, double *tail_r_true)
409
410 /*
411 * Purpose
412 * =======
413 *
414 * Generates a skew Matrix for use with hemv testing
415 *
416 * Arguments
417 * =========
418 *
419 * norm (input) int
420 * = -1: the vectors are scaled with norms near underflow.
421 * = 0: the vectors have norms of order 1.
422 * = 1: the vectors are scaled with norms near overflow.
423 *
424 * order (input) enum blas_order_type
425 * storage format of the matrices
426 *
427 * uplo (input) enum blas_uplo_type
428 * which half of the skew symmetric matrix a is to be stored.
429 *
430 * n (input) int
431 * sizes of symmetrical matrix a, size of vectors x, y:
432 * matrix a is n-by-n.
433 *
434 * randomize (input) int
435 * if 0, entries in matrices A, x will be chosen for
436 * maximum cancellation, but with less randomness.
437 * if 1, every entry in the matrix A, x will be
438 * random.
439 *
440 * alpha (input/output) double*
441 *
442 * beta (input) double*
443 *
444 * a (input/output) double*
445 *
446 * lda (input) lda
447 * leading dimension of matrix A.
448 *
449 * x (input) float*
450 * note : x is input only. x should be determined before calling
451 * this function.
452 *
453 * incx (input) int
454 * stride of vector x.
455 *
456 * y (input/output) double*
457 * generated vector y.
458 *
459 * incy (input) int
460 * leading dimension of vector y.
461 *
462 * seed (input/output) int *
463 * seed for the random number generator.
464 *
465 * head_r_true (output) double *
466 * the leading part of the truth in double-double.
467 *
468 * tail_r_true (output) double *
469 * the trailing part of the truth in double-double
470 *
471 */
472 {
473
474 int i, j;
475 int yi;
476 int aij, ai, ri;
477 int incyi, incri;
478 int incx_veci, y_starti;
479 int incaij, incai;
480 int inca_vec;
481 int n_i;
482
483 double y_elem;
484 double a_elem;
485 double head_r_true_elem, tail_r_true_elem;
486
487 double *a_vec;
488 float *x_vec;
489
490 double *y_i = y;
491 double *alpha_i = alpha;
492 double *beta_i = beta;
493 double *a_i = a;
494 float *x_i = x;
495
496 n_i = n;
497
498 /*a_vec must have stride of 1 */
499 inca_vec = 1;
500
501
502 a_vec = (double *) blas_malloc(n_i * sizeof(double));
503 if (n_i > 0 && a_vec == NULL) {
504 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
505 }
506 for (i = 0; i < n_i; i += inca_vec) {
507 a_vec[i] = 0.0;
508 }
509 x_vec = (float *) blas_malloc(n_i * sizeof(float));
510 if (n_i > 0 && x_vec == NULL) {
511 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
512 }
513 scopy_vector(x_i, n_i, incx, x_vec, 1);
514
515 incyi = incy;
516
517 if (incyi < 0) {
518 y_starti = (-n + 1) * incyi;
519 } else {
520 y_starti = 0;
521 }
522
523 incri = 1;
524
525
526 incx_veci = 1;
527
528
529 if (randomize == 0) {
530
531 /* Fill in skew matrix A */
532 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, ri += incri, yi += incyi) {
533 /* x_i has already been copied to x_vec */
534 dskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
535 /* skew matricies have zeroed diagonals */
536 a_vec[i] = 0.0;
537 BLAS_ddot_s_d_testgen(n_i, i + 1, n_i - i - 1, norm,
538 blas_no_conj, alpha_i, 1,
539 beta_i, 1, x_vec, a_vec, seed,
540 &y_elem, &head_r_true_elem, &tail_r_true_elem);
541
542
543 dskew_commit_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
544
545 /*commits an element to the generated y */
546 y_i[yi] = y_elem;
547 head_r_true[ri] = head_r_true_elem;
548 tail_r_true[ri] = tail_r_true_elem;
549 }
550
551 } else {
552 /*set a randomly */
553
554 if (order == blas_colmajor) {
555 incai = 1;
556 incaij = lda;
557 } else {
558 incai = lda;
559 incaij = 1;
560 }
561
562
563
564
565 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
566 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
567 a_elem = (float) xrand(seed);
568 if (i != j) {
569 a_i[aij] = a_elem;
570 } else {
571 /* skew matricies have zeroed diagonals */
572 a_i[aij] = 0.0;
573 }
574 }
575 }
576
577 /* now compute appropriate y vector */
578
579 /* get x */
580 scopy_vector(x_i, n_i, incx, x_vec, 1);
581
582 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi, ri += incri) {
583 dskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
584
585 BLAS_ddot_s_d_testgen(n_i, n_i, 0, norm,
586 blas_no_conj, alpha_i, 1,
587 beta_i, 1, x_vec, a_vec, seed,
588 &y_elem, &head_r_true_elem, &tail_r_true_elem);
589
590 y_i[yi] = y_elem;
591 head_r_true[ri] = head_r_true_elem;
592 tail_r_true[ri] = tail_r_true_elem;
593 }
594 }
595
596 blas_free(a_vec);
597 blas_free(x_vec);
598 }
BLAS_dskew_testgen_hemv_s_d(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,double * alpha,double * beta,float * a,int lda,double * x,int incx,double * y,int incy,int * seed,double * head_r_true,double * tail_r_true)599 void BLAS_dskew_testgen_hemv_s_d(int norm, enum blas_order_type order,
600 enum blas_uplo_type uplo,
601 int n, int randomize,
602 double *alpha, double *beta,
603 float *a, int lda, double *x, int incx,
604 double *y, int incy, int *seed,
605 double *head_r_true, double *tail_r_true)
606
607 /*
608 * Purpose
609 * =======
610 *
611 * Generates a skew Matrix for use with hemv testing
612 *
613 * Arguments
614 * =========
615 *
616 * norm (input) int
617 * = -1: the vectors are scaled with norms near underflow.
618 * = 0: the vectors have norms of order 1.
619 * = 1: the vectors are scaled with norms near overflow.
620 *
621 * order (input) enum blas_order_type
622 * storage format of the matrices
623 *
624 * uplo (input) enum blas_uplo_type
625 * which half of the skew symmetric matrix a is to be stored.
626 *
627 * n (input) int
628 * sizes of symmetrical matrix a, size of vectors x, y:
629 * matrix a is n-by-n.
630 *
631 * randomize (input) int
632 * if 0, entries in matrices A, x will be chosen for
633 * maximum cancellation, but with less randomness.
634 * if 1, every entry in the matrix A, x will be
635 * random.
636 *
637 * alpha (input/output) double*
638 *
639 * beta (input) double*
640 *
641 * a (input/output) float*
642 *
643 * lda (input) lda
644 * leading dimension of matrix A.
645 *
646 * x (input) double*
647 * note : x is input only. x should be determined before calling
648 * this function.
649 *
650 * incx (input) int
651 * stride of vector x.
652 *
653 * y (input/output) double*
654 * generated vector y.
655 *
656 * incy (input) int
657 * leading dimension of vector y.
658 *
659 * seed (input/output) int *
660 * seed for the random number generator.
661 *
662 * head_r_true (output) double *
663 * the leading part of the truth in double-double.
664 *
665 * tail_r_true (output) double *
666 * the trailing part of the truth in double-double
667 *
668 */
669 {
670
671 int i, j;
672 int yi;
673 int aij, ai, ri;
674 int incyi, incri;
675 int incx_veci, y_starti;
676 int incaij, incai;
677 int inca_vec;
678 int n_i;
679
680 double y_elem;
681 float a_elem;
682 double head_r_true_elem, tail_r_true_elem;
683
684 float *a_vec;
685 double *x_vec;
686
687 double *y_i = y;
688 double *alpha_i = alpha;
689 double *beta_i = beta;
690 float *a_i = a;
691 double *x_i = x;
692
693 n_i = n;
694
695 /*a_vec must have stride of 1 */
696 inca_vec = 1;
697
698
699 a_vec = (float *) blas_malloc(n_i * sizeof(float));
700 if (n_i > 0 && a_vec == NULL) {
701 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
702 }
703 for (i = 0; i < n_i; i += inca_vec) {
704 a_vec[i] = 0.0;
705 }
706 x_vec = (double *) blas_malloc(n_i * sizeof(double));
707 if (n_i > 0 && x_vec == NULL) {
708 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
709 }
710 dcopy_vector(x_i, n_i, incx, x_vec, 1);
711
712 incyi = incy;
713
714 if (incyi < 0) {
715 y_starti = (-n + 1) * incyi;
716 } else {
717 y_starti = 0;
718 }
719
720 incri = 1;
721
722
723 incx_veci = 1;
724
725
726 if (randomize == 0) {
727
728 /* Fill in skew matrix A */
729 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, ri += incri, yi += incyi) {
730 /* x_i has already been copied to x_vec */
731 sskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
732 /* skew matricies have zeroed diagonals */
733 a_vec[i] = 0.0;
734 BLAS_ddot_d_s_testgen(n_i, i + 1, n_i - i - 1, norm,
735 blas_no_conj, alpha_i, 1,
736 beta_i, 1, x_vec, a_vec, seed,
737 &y_elem, &head_r_true_elem, &tail_r_true_elem);
738
739
740 sskew_commit_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
741
742 /*commits an element to the generated y */
743 y_i[yi] = y_elem;
744 head_r_true[ri] = head_r_true_elem;
745 tail_r_true[ri] = tail_r_true_elem;
746 }
747
748 } else {
749 /*set a randomly */
750
751 if (order == blas_colmajor) {
752 incai = 1;
753 incaij = lda;
754 } else {
755 incai = lda;
756 incaij = 1;
757 }
758
759
760
761
762 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
763 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
764 a_elem = (float) xrand(seed);
765 if (i != j) {
766 a_i[aij] = a_elem;
767 } else {
768 /* skew matricies have zeroed diagonals */
769 a_i[aij] = 0.0;
770 }
771 }
772 }
773
774 /* now compute appropriate y vector */
775
776 /* get x */
777 dcopy_vector(x_i, n_i, incx, x_vec, 1);
778
779 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi, ri += incri) {
780 sskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
781
782 BLAS_ddot_d_s_testgen(n_i, n_i, 0, norm,
783 blas_no_conj, alpha_i, 1,
784 beta_i, 1, x_vec, a_vec, seed,
785 &y_elem, &head_r_true_elem, &tail_r_true_elem);
786
787 y_i[yi] = y_elem;
788 head_r_true[ri] = head_r_true_elem;
789 tail_r_true[ri] = tail_r_true_elem;
790 }
791 }
792
793 blas_free(a_vec);
794 blas_free(x_vec);
795 }
BLAS_dskew_testgen_hemv_s_s(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,double * alpha,double * beta,float * a,int lda,float * x,int incx,double * y,int incy,int * seed,double * head_r_true,double * tail_r_true)796 void BLAS_dskew_testgen_hemv_s_s(int norm, enum blas_order_type order,
797 enum blas_uplo_type uplo,
798 int n, int randomize,
799 double *alpha, double *beta,
800 float *a, int lda, float *x, int incx,
801 double *y, int incy, int *seed,
802 double *head_r_true, double *tail_r_true)
803
804 /*
805 * Purpose
806 * =======
807 *
808 * Generates a skew Matrix for use with hemv testing
809 *
810 * Arguments
811 * =========
812 *
813 * norm (input) int
814 * = -1: the vectors are scaled with norms near underflow.
815 * = 0: the vectors have norms of order 1.
816 * = 1: the vectors are scaled with norms near overflow.
817 *
818 * order (input) enum blas_order_type
819 * storage format of the matrices
820 *
821 * uplo (input) enum blas_uplo_type
822 * which half of the skew symmetric matrix a is to be stored.
823 *
824 * n (input) int
825 * sizes of symmetrical matrix a, size of vectors x, y:
826 * matrix a is n-by-n.
827 *
828 * randomize (input) int
829 * if 0, entries in matrices A, x will be chosen for
830 * maximum cancellation, but with less randomness.
831 * if 1, every entry in the matrix A, x will be
832 * random.
833 *
834 * alpha (input/output) double*
835 *
836 * beta (input) double*
837 *
838 * a (input/output) float*
839 *
840 * lda (input) lda
841 * leading dimension of matrix A.
842 *
843 * x (input) float*
844 * note : x is input only. x should be determined before calling
845 * this function.
846 *
847 * incx (input) int
848 * stride of vector x.
849 *
850 * y (input/output) double*
851 * generated vector y.
852 *
853 * incy (input) int
854 * leading dimension of vector y.
855 *
856 * seed (input/output) int *
857 * seed for the random number generator.
858 *
859 * head_r_true (output) double *
860 * the leading part of the truth in double-double.
861 *
862 * tail_r_true (output) double *
863 * the trailing part of the truth in double-double
864 *
865 */
866 {
867
868 int i, j;
869 int yi;
870 int aij, ai, ri;
871 int incyi, incri;
872 int incx_veci, y_starti;
873 int incaij, incai;
874 int inca_vec;
875 int n_i;
876
877 double y_elem;
878 float a_elem;
879 double head_r_true_elem, tail_r_true_elem;
880
881 float *a_vec;
882 float *x_vec;
883
884 double *y_i = y;
885 double *alpha_i = alpha;
886 double *beta_i = beta;
887 float *a_i = a;
888 float *x_i = x;
889
890 n_i = n;
891
892 /*a_vec must have stride of 1 */
893 inca_vec = 1;
894
895
896 a_vec = (float *) blas_malloc(n_i * sizeof(float));
897 if (n_i > 0 && a_vec == NULL) {
898 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
899 }
900 for (i = 0; i < n_i; i += inca_vec) {
901 a_vec[i] = 0.0;
902 }
903 x_vec = (float *) blas_malloc(n_i * sizeof(float));
904 if (n_i > 0 && x_vec == NULL) {
905 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
906 }
907 scopy_vector(x_i, n_i, incx, x_vec, 1);
908
909 incyi = incy;
910
911 if (incyi < 0) {
912 y_starti = (-n + 1) * incyi;
913 } else {
914 y_starti = 0;
915 }
916
917 incri = 1;
918
919
920 incx_veci = 1;
921
922
923 if (randomize == 0) {
924
925 /* Fill in skew matrix A */
926 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, ri += incri, yi += incyi) {
927 /* x_i has already been copied to x_vec */
928 sskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
929 /* skew matricies have zeroed diagonals */
930 a_vec[i] = 0.0;
931 BLAS_ddot_s_s_testgen(n_i, i + 1, n_i - i - 1, norm,
932 blas_no_conj, alpha_i, 1,
933 beta_i, 1, x_vec, a_vec, seed,
934 &y_elem, &head_r_true_elem, &tail_r_true_elem);
935
936
937 sskew_commit_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
938
939 /*commits an element to the generated y */
940 y_i[yi] = y_elem;
941 head_r_true[ri] = head_r_true_elem;
942 tail_r_true[ri] = tail_r_true_elem;
943 }
944
945 } else {
946 /*set a randomly */
947
948 if (order == blas_colmajor) {
949 incai = 1;
950 incaij = lda;
951 } else {
952 incai = lda;
953 incaij = 1;
954 }
955
956
957
958
959 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
960 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
961 a_elem = (float) xrand(seed);
962 if (i != j) {
963 a_i[aij] = a_elem;
964 } else {
965 /* skew matricies have zeroed diagonals */
966 a_i[aij] = 0.0;
967 }
968 }
969 }
970
971 /* now compute appropriate y vector */
972
973 /* get x */
974 scopy_vector(x_i, n_i, incx, x_vec, 1);
975
976 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi, ri += incri) {
977 sskew_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
978
979 BLAS_ddot_s_s_testgen(n_i, n_i, 0, norm,
980 blas_no_conj, alpha_i, 1,
981 beta_i, 1, x_vec, a_vec, seed,
982 &y_elem, &head_r_true_elem, &tail_r_true_elem);
983
984 y_i[yi] = y_elem;
985 head_r_true[ri] = head_r_true_elem;
986 tail_r_true[ri] = tail_r_true_elem;
987 }
988 }
989
990 blas_free(a_vec);
991 blas_free(x_vec);
992 }
993
BLAS_chemv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)994 void BLAS_chemv_testgen(int norm, enum blas_order_type order,
995 enum blas_uplo_type uplo,
996 int n, int randomize,
997 void *alpha, int alpha_flag, void *beta,
998 int beta_flag, void *a, int lda, void *x, int incx,
999 void *y, int incy, int *seed, double *head_r_true,
1000 double *tail_r_true)
1001
1002 /*
1003 * Purpose
1004 * =======
1005 *
1006 * Generates the test inputs to BLAS_chemv{_x}
1007 *
1008 * Arguments
1009 * =========
1010 *
1011 * norm (input) int
1012 * = -1: the vectors are scaled with norms near underflow.
1013 * = 0: the vectors have norms of order 1.
1014 * = 1: the vectors are scaled with norms near overflow.
1015 *
1016 * order (input) enum blas_order_type
1017 * storage format of the matrices
1018 *
1019 * uplo (input) enum blas_uplo_type
1020 * which half of the hemvetric matrix a is to be stored.
1021 *
1022 * n (input) int
1023 * sizes of symmetrical matrix a, size of vectors x, y:
1024 * matrix a is n-by-n.
1025 *
1026 * randomize (input) int
1027 * if 0, entries in matrices A, x will be chosen for
1028 * maximum cancellation, but with less randomness.
1029 * if 1, every entry in the matrix A, x will be
1030 * random.
1031 *
1032 * alpha (input/output) void*
1033 * if alpha_flag = 1, alpha is input.
1034 * if alpha_flag = 0, alpha is output.
1035 *
1036 * alpha_flag (input) int
1037 * = 0: alpha is free, and is output.
1038 * = 1: alpha is fixed on input.
1039 *
1040 * beta (input/output) void*
1041 * if beta_flag = 1, beta is input.
1042 * if beta_flag = 0, beta is output.
1043 *
1044 * beta_flag (input) int
1045 * = 0: beta is free, and is output.
1046 * = 1: beta is fixed on input.
1047 *
1048 * a (input/output) void*
1049 *
1050 * lda (input) lda
1051 * leading dimension of matrix A.
1052 *
1053 * x (input/output) void*
1054 *
1055 * incx (input) int
1056 * stride of vector x.
1057 *
1058 * y (input/output) void*
1059 * generated vector y that will be used as an input to HEMV.
1060 *
1061 * incy (input) int
1062 * leading dimension of vector y.
1063 *
1064 * seed (input/output) int *
1065 * seed for the random number generator.
1066 *
1067 * head_r_true (output) double *
1068 * the leading part of the truth in double-double.
1069 *
1070 * tail_r_true (output) double *
1071 * the trailing part of the truth in double-double
1072 *
1073 */
1074 {
1075 char *routine_name = "BLAS_chemv_testgen";
1076 {
1077
1078 /* Strategy:
1079 R1 = alpha * A1 * x + beta * y1
1080 R2 = alpha * A2 * x + beta * y2
1081 where all the matrices and vectors are real. Then let R = R1 + i R2,
1082 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
1083 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
1084 */
1085
1086 int i, j;
1087 int yi;
1088 int aij, ai;
1089 int a1ij, a1i;
1090 int xi;
1091 int mi;
1092 int incyi, x_starti, y_starti;
1093 int incaij, incai;
1094 int inca1ij, inca1i;
1095 int incxi;
1096 int inca_vec, incx_vec;
1097 int n_i;
1098 int ld;
1099 int ab;
1100 int ri, incri;
1101
1102 float *a1;
1103 float *a2;
1104 float *y1;
1105 float *y2;
1106 float *x0;
1107
1108 double *head_r1_true, *tail_r1_true;
1109 double *head_r2_true, *tail_r2_true;
1110
1111 double head_r_elem1, tail_r_elem1;
1112 double head_r_elem2, tail_r_elem2;
1113 double head_r_elem, tail_r_elem;
1114
1115 float *a_vec;
1116 float *x_vec;
1117
1118 float *y_i = (float *) y;
1119 float *alpha_i = (float *) alpha;
1120 float *beta_i = (float *) beta;
1121
1122 float *a_i = (float *) a;
1123 float *x_i = (float *) x;
1124
1125 n_i = n;
1126
1127 ld = n_i;
1128
1129
1130
1131 if (order == blas_colmajor) {
1132 inca1i = incai = 1;
1133 incyi = incy;
1134 incaij = lda;
1135 incxi = incx;
1136 inca1ij = n_i;
1137 } else {
1138 incyi = incy;
1139 incai = lda;
1140 incxi = incx;
1141 inca1i = n_i;
1142 inca1ij = incaij = 1;
1143 }
1144 if ((0 == incx) || (0 == incy)) {
1145 BLAS_error(routine_name, 0, 0, NULL);
1146 }
1147 if (incx > 0) {
1148 x_starti = 0;
1149 } else {
1150 x_starti = (-n_i + 1) * incx;
1151 }
1152 if (incy > 0) {
1153 y_starti = 0;
1154 } else {
1155 y_starti = (-n_i + 1) * incy;
1156 }
1157 incri = 1;
1158 incri *= 2;
1159 x_starti *= 2;
1160 y_starti *= 2;
1161 incyi *= 2;
1162 incai *= 2;
1163 incaij *= 2;
1164 incxi *= 2;
1165
1166 inca_vec = incx_vec = 1;
1167 inca_vec *= 2;
1168 incx_vec *= 2;
1169 a_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
1170 if (n_i > 0 && a_vec == NULL) {
1171 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1172 }
1173 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
1174 a_vec[i] = 0.0;
1175 a_vec[i + 1] = 0.0;
1176 }
1177 x_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
1178 if (n_i > 0 && x_vec == NULL) {
1179 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1180 }
1181 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
1182 x_vec[i] = 0.0;
1183 x_vec[i + 1] = 0.0;
1184 }
1185
1186 if (randomize == 0) {
1187 int incx0, incy1, incy2, incmi = 1;
1188 a1 = (float *) blas_malloc(n_i * n_i * sizeof(float));
1189 if (n_i * n_i > 0 && a1 == NULL) {
1190 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1191 }
1192 a2 = (float *) blas_malloc(n_i * n_i * sizeof(float));
1193 if (n_i * n_i > 0 && a2 == NULL) {
1194 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1195 }
1196 for (i = 0; i < n_i * n_i; ++i) {
1197 a1[i] = a2[i] = 0.0;
1198 }
1199 y1 = (float *) blas_malloc(n_i * sizeof(float));
1200 if (n_i > 0 && y1 == NULL) {
1201 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1202 }
1203 incy1 = 1;
1204
1205 y2 = (float *) blas_malloc(n_i * sizeof(float));
1206 if (n_i > 0 && y2 == NULL) {
1207 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1208 }
1209 incy2 = 1;
1210
1211 x0 = (float *) blas_malloc(n_i * sizeof(float));
1212 if (n_i > 0 && x0 == NULL) {
1213 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1214 }
1215 incx0 = 1;
1216
1217 for (i = 0; i < n_i; ++i) {
1218 y1[i] = y2[i] = x0[i] = 0.0;
1219 }
1220 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
1221 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
1222 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1223 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1224 };
1225 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
1226 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
1227 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1228 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1229 };
1230
1231 /* First generate the real portion of matrix A, and matrix B.
1232 Note that Re(A) is a symmetric matrix. */
1233 /* x0 is output from this call */
1234 BLAS_ssymv_testgen
1235 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
1236 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
1237 tail_r1_true);
1238
1239 /* x0 is now fixed and is input to this call */
1240 BLAS_sskew_testgen_hemv
1241 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
1242 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
1243
1244
1245 /* The case where x is a complex vector. Since x is generated
1246 as a real vector, we need to perform some scaling.
1247
1248 There are four cases to consider, depending on the values
1249 of alpha and beta.
1250
1251 values scaling
1252 alpha beta alpha A x beta y R (truth)
1253 0 1 1 i i i
1254 1 1 ? 1+i 1+i 1+i
1255 2 ? 1 1+i 1+i 2i 2i
1256 3 ? ? 1+i 1+i 2i 2i
1257
1258 Note that we can afford to scale R by 1+i, since they are
1259 computed in double-double precision.
1260 */
1261
1262 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1263 ab = 0;
1264 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
1265 } else if (alpha_i[0] == 1.0) {
1266 ab = 1;
1267 /* set alpha to 1, multiply beta by 1+i. */
1268 alpha_i[1] = 0.0;
1269 beta_i[1] = beta_i[0];
1270 } else if (beta_i[0] == 1.0) {
1271 ab = 2;
1272 /* set beta to 1, multiply alpha by 1+i. */
1273 beta_i[1] = 0.0;
1274 alpha_i[1] = alpha_i[0];
1275 } else {
1276 ab = 3;
1277 /* multiply alpha by 1+i, beta by 2i. */
1278 alpha_i[1] = alpha_i[0];
1279 beta_i[1] = 2.0 * beta_i[0];
1280 beta_i[0] = 0.0;
1281 }
1282
1283
1284 /* Now fill in a */
1285 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
1286 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
1287 j++, aij += incaij, a1ij += inca1ij) {
1288 a_i[aij] = a1[a1ij];
1289 a_i[aij + 1] = a2[a1ij];
1290 }
1291 }
1292
1293 /* Fill in x */
1294 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
1295 mi += incx0) {
1296 if (ab == 0) {
1297 x_i[xi] = 0.0;
1298 x_i[xi + 1] = x0[mi];
1299 } else {
1300 x_i[xi] = x0[mi];
1301 x_i[xi + 1] = x0[mi];
1302 }
1303 }
1304
1305 /* Fill in y */
1306 for (i = 0, yi = y_starti, mi = 0; i < n_i;
1307 i++, yi += incyi, mi += incy1) {
1308 if (ab == 0) {
1309 y_i[yi] = -y2[mi];
1310 y_i[yi + 1] = y1[mi];
1311 } else if (ab == 2) {
1312 y_i[yi] = -2.0 * y2[mi];
1313 y_i[yi + 1] = 2.0 * y1[mi];
1314 } else {
1315 y_i[yi] = y1[mi];
1316 y_i[yi + 1] = y2[mi];
1317 }
1318 }
1319
1320 /* Fill in the truth */
1321 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
1322
1323 head_r_elem1 = head_r1_true[mi];
1324 tail_r_elem1 = tail_r1_true[mi];
1325
1326 head_r_elem2 = head_r2_true[mi];
1327 tail_r_elem2 = tail_r2_true[mi];
1328
1329 if (ab == 0) {
1330 head_r_true[ri] = -head_r_elem2;
1331 tail_r_true[ri] = -tail_r_elem2;
1332 head_r_true[ri + 1] = head_r_elem1;
1333 tail_r_true[ri + 1] = tail_r_elem1;
1334 } else if (ab == 1) {
1335 {
1336 /* Compute double-double = double-double + double-double. */
1337 double bv;
1338 double s1, s2, t1, t2;
1339
1340 /* Add two hi words. */
1341 s1 = head_r_elem1 + head_r_elem2;
1342 bv = s1 - head_r_elem1;
1343 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1344
1345 /* Add two lo words. */
1346 t1 = tail_r_elem1 + tail_r_elem2;
1347 bv = t1 - tail_r_elem1;
1348 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1349
1350 s2 += t1;
1351
1352 /* Renormalize (s1, s2) to (t1, s2) */
1353 t1 = s1 + s2;
1354 s2 = s2 - (t1 - s1);
1355
1356 t2 += s2;
1357
1358 /* Renormalize (t1, t2) */
1359 head_r_elem = t1 + t2;
1360 tail_r_elem = t2 - (head_r_elem - t1);
1361 }
1362
1363 /* Set the imaginary part to R1 + R2 */
1364 tail_r_true[ri + 1] = tail_r_elem;
1365 head_r_true[ri + 1] = head_r_elem;
1366
1367 /* Set the real part to R1 - R2. */
1368 {
1369 double head_bt, tail_bt;
1370 head_bt = -head_r_elem2;
1371 tail_bt = -tail_r_elem2;
1372 {
1373 /* Compute double-double = double-double + double-double. */
1374 double bv;
1375 double s1, s2, t1, t2;
1376
1377 /* Add two hi words. */
1378 s1 = head_r_elem1 + head_bt;
1379 bv = s1 - head_r_elem1;
1380 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1381
1382 /* Add two lo words. */
1383 t1 = tail_r_elem1 + tail_bt;
1384 bv = t1 - tail_r_elem1;
1385 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1386
1387 s2 += t1;
1388
1389 /* Renormalize (s1, s2) to (t1, s2) */
1390 t1 = s1 + s2;
1391 s2 = s2 - (t1 - s1);
1392
1393 t2 += s2;
1394
1395 /* Renormalize (t1, t2) */
1396 head_r_elem = t1 + t2;
1397 tail_r_elem = t2 - (head_r_elem - t1);
1398 }
1399 }
1400 tail_r_true[ri] = tail_r_elem;
1401 head_r_true[ri] = head_r_elem;
1402 } else {
1403
1404 /* Real part */
1405 {
1406 /* Compute double-double = double-double * double. */
1407 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1408
1409 con = head_r_elem2 * split;
1410 a11 = con - head_r_elem2;
1411 a11 = con - a11;
1412 a21 = head_r_elem2 - a11;
1413 con = -2.0 * split;
1414 b1 = con - -2.0;
1415 b1 = con - b1;
1416 b2 = -2.0 - b1;
1417
1418 c11 = head_r_elem2 * -2.0;
1419 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1420
1421 c2 = tail_r_elem2 * -2.0;
1422 t1 = c11 + c2;
1423 t2 = (c2 - (t1 - c11)) + c21;
1424
1425 head_r_elem = t1 + t2;
1426 tail_r_elem = t2 - (head_r_elem - t1);
1427 }
1428 head_r_true[ri] = head_r_elem;
1429 tail_r_true[ri] = tail_r_elem;
1430
1431 /* Imaginary Part */
1432 {
1433 /* Compute double-double = double-double * double. */
1434 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1435
1436 con = head_r_elem1 * split;
1437 a11 = con - head_r_elem1;
1438 a11 = con - a11;
1439 a21 = head_r_elem1 - a11;
1440 con = 2.0 * split;
1441 b1 = con - 2.0;
1442 b1 = con - b1;
1443 b2 = 2.0 - b1;
1444
1445 c11 = head_r_elem1 * 2.0;
1446 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1447
1448 c2 = tail_r_elem1 * 2.0;
1449 t1 = c11 + c2;
1450 t2 = (c2 - (t1 - c11)) + c21;
1451
1452 head_r_elem = t1 + t2;
1453 tail_r_elem = t2 - (head_r_elem - t1);
1454 }
1455 head_r_true[ri + 1] = head_r_elem;
1456 tail_r_true[ri + 1] = tail_r_elem;
1457 }
1458 }
1459
1460
1461 blas_free(a1);
1462 blas_free(a2);
1463 blas_free(y1);
1464 blas_free(y2);
1465 blas_free(x0);
1466 blas_free(head_r1_true);
1467 blas_free(tail_r1_true);
1468 blas_free(head_r2_true);
1469 blas_free(tail_r2_true);
1470 } else {
1471 /* get random A, x, then compute y for some cancellation. */
1472 float a_elem[2];
1473 float x_elem[2];
1474 float y_elem[2];
1475 double head_r_true_elem[2], tail_r_true_elem[2];
1476
1477 /* Since mixed real/complex test generator for dot
1478 scales the vectors, we need to used the non-mixed
1479 version if x is real (since A is always complex). */
1480
1481
1482 if (alpha_flag == 0) {
1483 alpha_i[0] = xrand(seed);
1484 alpha_i[1] = xrand(seed);
1485 }
1486 if (beta_flag == 0) {
1487 beta_i[0] = xrand(seed);
1488 beta_i[1] = xrand(seed);
1489 }
1490
1491 /* Fill in matrix A -- Hermitian. */
1492 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
1493 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
1494 a_elem[0] = xrand(seed);
1495 a_elem[1] = xrand(seed);
1496 a_i[aij] = a_elem[0];
1497 a_i[aij + 1] = a_elem[1];
1498 if (i == j)
1499 a_i[aij + 1] = 0.0;
1500 }
1501 }
1502
1503 /* Fill in vector x */
1504 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
1505 x_elem[0] = xrand(seed);
1506 x_elem[1] = xrand(seed);
1507 x_i[xi] = x_elem[0];
1508 x_i[xi + 1] = x_elem[1];
1509 }
1510
1511 ccopy_vector(x_i, n_i, incx, x_vec, 1);
1512
1513
1514 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
1515 ri += incri) {
1516 che_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
1517
1518 BLAS_cdot_testgen(n_i, n_i, 0,
1519 norm, blas_no_conj,
1520 alpha, 1, beta, 1, a_vec,
1521 x_vec, seed,
1522 y_elem, head_r_true_elem, tail_r_true_elem);
1523
1524 y_i[yi] = y_elem[0];
1525 y_i[yi + 1] = y_elem[1];
1526 head_r_true[ri] = head_r_true_elem[0];
1527 head_r_true[ri + 1] = head_r_true_elem[1];
1528 tail_r_true[ri] = tail_r_true_elem[0];
1529 tail_r_true[ri + 1] = tail_r_true_elem[1];
1530 }
1531
1532
1533 }
1534
1535 blas_free(a_vec);
1536 blas_free(x_vec);
1537 }
1538 } /* end BLAS_chemv_testgen */
BLAS_zhemv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)1539 void BLAS_zhemv_testgen(int norm, enum blas_order_type order,
1540 enum blas_uplo_type uplo,
1541 int n, int randomize,
1542 void *alpha, int alpha_flag, void *beta,
1543 int beta_flag, void *a, int lda, void *x, int incx,
1544 void *y, int incy, int *seed, double *head_r_true,
1545 double *tail_r_true)
1546
1547 /*
1548 * Purpose
1549 * =======
1550 *
1551 * Generates the test inputs to BLAS_zhemv{_x}
1552 *
1553 * Arguments
1554 * =========
1555 *
1556 * norm (input) int
1557 * = -1: the vectors are scaled with norms near underflow.
1558 * = 0: the vectors have norms of order 1.
1559 * = 1: the vectors are scaled with norms near overflow.
1560 *
1561 * order (input) enum blas_order_type
1562 * storage format of the matrices
1563 *
1564 * uplo (input) enum blas_uplo_type
1565 * which half of the hemvetric matrix a is to be stored.
1566 *
1567 * n (input) int
1568 * sizes of symmetrical matrix a, size of vectors x, y:
1569 * matrix a is n-by-n.
1570 *
1571 * randomize (input) int
1572 * if 0, entries in matrices A, x will be chosen for
1573 * maximum cancellation, but with less randomness.
1574 * if 1, every entry in the matrix A, x will be
1575 * random.
1576 *
1577 * alpha (input/output) void*
1578 * if alpha_flag = 1, alpha is input.
1579 * if alpha_flag = 0, alpha is output.
1580 *
1581 * alpha_flag (input) int
1582 * = 0: alpha is free, and is output.
1583 * = 1: alpha is fixed on input.
1584 *
1585 * beta (input/output) void*
1586 * if beta_flag = 1, beta is input.
1587 * if beta_flag = 0, beta is output.
1588 *
1589 * beta_flag (input) int
1590 * = 0: beta is free, and is output.
1591 * = 1: beta is fixed on input.
1592 *
1593 * a (input/output) void*
1594 *
1595 * lda (input) lda
1596 * leading dimension of matrix A.
1597 *
1598 * x (input/output) void*
1599 *
1600 * incx (input) int
1601 * stride of vector x.
1602 *
1603 * y (input/output) void*
1604 * generated vector y that will be used as an input to HEMV.
1605 *
1606 * incy (input) int
1607 * leading dimension of vector y.
1608 *
1609 * seed (input/output) int *
1610 * seed for the random number generator.
1611 *
1612 * head_r_true (output) double *
1613 * the leading part of the truth in double-double.
1614 *
1615 * tail_r_true (output) double *
1616 * the trailing part of the truth in double-double
1617 *
1618 */
1619 {
1620 char *routine_name = "BLAS_zhemv_testgen";
1621 {
1622
1623 /* Strategy:
1624 R1 = alpha * A1 * x + beta * y1
1625 R2 = alpha * A2 * x + beta * y2
1626 where all the matrices and vectors are real. Then let R = R1 + i R2,
1627 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
1628 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
1629 */
1630
1631 int i, j;
1632 int yi;
1633 int aij, ai;
1634 int a1ij, a1i;
1635 int xi;
1636 int mi;
1637 int incyi, x_starti, y_starti;
1638 int incaij, incai;
1639 int inca1ij, inca1i;
1640 int incxi;
1641 int inca_vec, incx_vec;
1642 int n_i;
1643 int ld;
1644 int ab;
1645 int ri, incri;
1646
1647 double *a1;
1648 double *a2;
1649 double *y1;
1650 double *y2;
1651 double *x0;
1652
1653 double *head_r1_true, *tail_r1_true;
1654 double *head_r2_true, *tail_r2_true;
1655
1656 double head_r_elem1, tail_r_elem1;
1657 double head_r_elem2, tail_r_elem2;
1658 double head_r_elem, tail_r_elem;
1659
1660 double *a_vec;
1661 double *x_vec;
1662
1663 double *y_i = (double *) y;
1664 double *alpha_i = (double *) alpha;
1665 double *beta_i = (double *) beta;
1666
1667 double *a_i = (double *) a;
1668 double *x_i = (double *) x;
1669
1670 n_i = n;
1671
1672 ld = n_i;
1673
1674
1675
1676 if (order == blas_colmajor) {
1677 inca1i = incai = 1;
1678 incyi = incy;
1679 incaij = lda;
1680 incxi = incx;
1681 inca1ij = n_i;
1682 } else {
1683 incyi = incy;
1684 incai = lda;
1685 incxi = incx;
1686 inca1i = n_i;
1687 inca1ij = incaij = 1;
1688 }
1689 if ((0 == incx) || (0 == incy)) {
1690 BLAS_error(routine_name, 0, 0, NULL);
1691 }
1692 if (incx > 0) {
1693 x_starti = 0;
1694 } else {
1695 x_starti = (-n_i + 1) * incx;
1696 }
1697 if (incy > 0) {
1698 y_starti = 0;
1699 } else {
1700 y_starti = (-n_i + 1) * incy;
1701 }
1702 incri = 1;
1703 incri *= 2;
1704 x_starti *= 2;
1705 y_starti *= 2;
1706 incyi *= 2;
1707 incai *= 2;
1708 incaij *= 2;
1709 incxi *= 2;
1710
1711 inca_vec = incx_vec = 1;
1712 inca_vec *= 2;
1713 incx_vec *= 2;
1714 a_vec = (double *) blas_malloc(n_i * sizeof(double) * 2);
1715 if (n_i > 0 && a_vec == NULL) {
1716 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1717 }
1718 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
1719 a_vec[i] = 0.0;
1720 a_vec[i + 1] = 0.0;
1721 }
1722 x_vec = (double *) blas_malloc(n_i * sizeof(double) * 2);
1723 if (n_i > 0 && x_vec == NULL) {
1724 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1725 }
1726 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
1727 x_vec[i] = 0.0;
1728 x_vec[i + 1] = 0.0;
1729 }
1730
1731 if (randomize == 0) {
1732 int incx0, incy1, incy2, incmi = 1;
1733 a1 = (double *) blas_malloc(n_i * n_i * sizeof(double));
1734 if (n_i * n_i > 0 && a1 == NULL) {
1735 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1736 }
1737 a2 = (double *) blas_malloc(n_i * n_i * sizeof(double));
1738 if (n_i * n_i > 0 && a2 == NULL) {
1739 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1740 }
1741 for (i = 0; i < n_i * n_i; ++i) {
1742 a1[i] = a2[i] = 0.0;
1743 }
1744 y1 = (double *) blas_malloc(n_i * sizeof(double));
1745 if (n_i > 0 && y1 == NULL) {
1746 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1747 }
1748 incy1 = 1;
1749
1750 y2 = (double *) blas_malloc(n_i * sizeof(double));
1751 if (n_i > 0 && y2 == NULL) {
1752 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1753 }
1754 incy2 = 1;
1755
1756 x0 = (double *) blas_malloc(n_i * sizeof(double));
1757 if (n_i > 0 && x0 == NULL) {
1758 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1759 }
1760 incx0 = 1;
1761
1762 for (i = 0; i < n_i; ++i) {
1763 y1[i] = y2[i] = x0[i] = 0.0;
1764 }
1765 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
1766 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
1767 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1768 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1769 };
1770 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
1771 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
1772 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1773 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1774 };
1775
1776 /* First generate the real portion of matrix A, and matrix B.
1777 Note that Re(A) is a symmetric matrix. */
1778 /* x0 is output from this call */
1779 BLAS_dsymv_testgen
1780 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
1781 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
1782 tail_r1_true);
1783
1784 /* x0 is now fixed and is input to this call */
1785 BLAS_dskew_testgen_hemv
1786 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
1787 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
1788
1789
1790 /* The case where x is a complex vector. Since x is generated
1791 as a real vector, we need to perform some scaling.
1792
1793 There are four cases to consider, depending on the values
1794 of alpha and beta.
1795
1796 values scaling
1797 alpha beta alpha A x beta y R (truth)
1798 0 1 1 i i i
1799 1 1 ? 1+i 1+i 1+i
1800 2 ? 1 1+i 1+i 2i 2i
1801 3 ? ? 1+i 1+i 2i 2i
1802
1803 Note that we can afford to scale R by 1+i, since they are
1804 computed in double-double precision.
1805 */
1806
1807 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1808 ab = 0;
1809 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
1810 } else if (alpha_i[0] == 1.0) {
1811 ab = 1;
1812 /* set alpha to 1, multiply beta by 1+i. */
1813 alpha_i[1] = 0.0;
1814 beta_i[1] = beta_i[0];
1815 } else if (beta_i[0] == 1.0) {
1816 ab = 2;
1817 /* set beta to 1, multiply alpha by 1+i. */
1818 beta_i[1] = 0.0;
1819 alpha_i[1] = alpha_i[0];
1820 } else {
1821 ab = 3;
1822 /* multiply alpha by 1+i, beta by 2i. */
1823 alpha_i[1] = alpha_i[0];
1824 beta_i[1] = 2.0 * beta_i[0];
1825 beta_i[0] = 0.0;
1826 }
1827
1828
1829 /* Now fill in a */
1830 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
1831 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
1832 j++, aij += incaij, a1ij += inca1ij) {
1833 a_i[aij] = a1[a1ij];
1834 a_i[aij + 1] = a2[a1ij];
1835 }
1836 }
1837
1838 /* Fill in x */
1839 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
1840 mi += incx0) {
1841 if (ab == 0) {
1842 x_i[xi] = 0.0;
1843 x_i[xi + 1] = x0[mi];
1844 } else {
1845 x_i[xi] = x0[mi];
1846 x_i[xi + 1] = x0[mi];
1847 }
1848 }
1849
1850 /* Fill in y */
1851 for (i = 0, yi = y_starti, mi = 0; i < n_i;
1852 i++, yi += incyi, mi += incy1) {
1853 if (ab == 0) {
1854 y_i[yi] = -y2[mi];
1855 y_i[yi + 1] = y1[mi];
1856 } else if (ab == 2) {
1857 y_i[yi] = -2.0 * y2[mi];
1858 y_i[yi + 1] = 2.0 * y1[mi];
1859 } else {
1860 y_i[yi] = y1[mi];
1861 y_i[yi + 1] = y2[mi];
1862 }
1863 }
1864
1865 /* Fill in the truth */
1866 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
1867
1868 head_r_elem1 = head_r1_true[mi];
1869 tail_r_elem1 = tail_r1_true[mi];
1870
1871 head_r_elem2 = head_r2_true[mi];
1872 tail_r_elem2 = tail_r2_true[mi];
1873
1874 if (ab == 0) {
1875 head_r_true[ri] = -head_r_elem2;
1876 tail_r_true[ri] = -tail_r_elem2;
1877 head_r_true[ri + 1] = head_r_elem1;
1878 tail_r_true[ri + 1] = tail_r_elem1;
1879 } else if (ab == 1) {
1880 {
1881 /* Compute double-double = double-double + double-double. */
1882 double bv;
1883 double s1, s2, t1, t2;
1884
1885 /* Add two hi words. */
1886 s1 = head_r_elem1 + head_r_elem2;
1887 bv = s1 - head_r_elem1;
1888 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1889
1890 /* Add two lo words. */
1891 t1 = tail_r_elem1 + tail_r_elem2;
1892 bv = t1 - tail_r_elem1;
1893 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1894
1895 s2 += t1;
1896
1897 /* Renormalize (s1, s2) to (t1, s2) */
1898 t1 = s1 + s2;
1899 s2 = s2 - (t1 - s1);
1900
1901 t2 += s2;
1902
1903 /* Renormalize (t1, t2) */
1904 head_r_elem = t1 + t2;
1905 tail_r_elem = t2 - (head_r_elem - t1);
1906 }
1907
1908 /* Set the imaginary part to R1 + R2 */
1909 tail_r_true[ri + 1] = tail_r_elem;
1910 head_r_true[ri + 1] = head_r_elem;
1911
1912 /* Set the real part to R1 - R2. */
1913 {
1914 double head_bt, tail_bt;
1915 head_bt = -head_r_elem2;
1916 tail_bt = -tail_r_elem2;
1917 {
1918 /* Compute double-double = double-double + double-double. */
1919 double bv;
1920 double s1, s2, t1, t2;
1921
1922 /* Add two hi words. */
1923 s1 = head_r_elem1 + head_bt;
1924 bv = s1 - head_r_elem1;
1925 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1926
1927 /* Add two lo words. */
1928 t1 = tail_r_elem1 + tail_bt;
1929 bv = t1 - tail_r_elem1;
1930 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1931
1932 s2 += t1;
1933
1934 /* Renormalize (s1, s2) to (t1, s2) */
1935 t1 = s1 + s2;
1936 s2 = s2 - (t1 - s1);
1937
1938 t2 += s2;
1939
1940 /* Renormalize (t1, t2) */
1941 head_r_elem = t1 + t2;
1942 tail_r_elem = t2 - (head_r_elem - t1);
1943 }
1944 }
1945 tail_r_true[ri] = tail_r_elem;
1946 head_r_true[ri] = head_r_elem;
1947 } else {
1948
1949 /* Real part */
1950 {
1951 /* Compute double-double = double-double * double. */
1952 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1953
1954 con = head_r_elem2 * split;
1955 a11 = con - head_r_elem2;
1956 a11 = con - a11;
1957 a21 = head_r_elem2 - a11;
1958 con = -2.0 * split;
1959 b1 = con - -2.0;
1960 b1 = con - b1;
1961 b2 = -2.0 - b1;
1962
1963 c11 = head_r_elem2 * -2.0;
1964 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1965
1966 c2 = tail_r_elem2 * -2.0;
1967 t1 = c11 + c2;
1968 t2 = (c2 - (t1 - c11)) + c21;
1969
1970 head_r_elem = t1 + t2;
1971 tail_r_elem = t2 - (head_r_elem - t1);
1972 }
1973 head_r_true[ri] = head_r_elem;
1974 tail_r_true[ri] = tail_r_elem;
1975
1976 /* Imaginary Part */
1977 {
1978 /* Compute double-double = double-double * double. */
1979 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1980
1981 con = head_r_elem1 * split;
1982 a11 = con - head_r_elem1;
1983 a11 = con - a11;
1984 a21 = head_r_elem1 - a11;
1985 con = 2.0 * split;
1986 b1 = con - 2.0;
1987 b1 = con - b1;
1988 b2 = 2.0 - b1;
1989
1990 c11 = head_r_elem1 * 2.0;
1991 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1992
1993 c2 = tail_r_elem1 * 2.0;
1994 t1 = c11 + c2;
1995 t2 = (c2 - (t1 - c11)) + c21;
1996
1997 head_r_elem = t1 + t2;
1998 tail_r_elem = t2 - (head_r_elem - t1);
1999 }
2000 head_r_true[ri + 1] = head_r_elem;
2001 tail_r_true[ri + 1] = tail_r_elem;
2002 }
2003 }
2004
2005
2006 blas_free(a1);
2007 blas_free(a2);
2008 blas_free(y1);
2009 blas_free(y2);
2010 blas_free(x0);
2011 blas_free(head_r1_true);
2012 blas_free(tail_r1_true);
2013 blas_free(head_r2_true);
2014 blas_free(tail_r2_true);
2015 } else {
2016 /* get random A, x, then compute y for some cancellation. */
2017 double a_elem[2];
2018 double x_elem[2];
2019 double y_elem[2];
2020 double head_r_true_elem[2], tail_r_true_elem[2];
2021
2022 /* Since mixed real/complex test generator for dot
2023 scales the vectors, we need to used the non-mixed
2024 version if x is real (since A is always complex). */
2025
2026
2027 if (alpha_flag == 0) {
2028 alpha_i[0] = xrand(seed);
2029 alpha_i[1] = xrand(seed);
2030 }
2031 if (beta_flag == 0) {
2032 beta_i[0] = xrand(seed);
2033 beta_i[1] = xrand(seed);
2034 }
2035
2036 /* Fill in matrix A -- Hermitian. */
2037 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
2038 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
2039 a_elem[0] = xrand(seed);
2040 a_elem[1] = xrand(seed);
2041 a_i[aij] = a_elem[0];
2042 a_i[aij + 1] = a_elem[1];
2043 if (i == j)
2044 a_i[aij + 1] = 0.0;
2045 }
2046 }
2047
2048 /* Fill in vector x */
2049 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
2050 x_elem[0] = xrand(seed);
2051 x_elem[1] = xrand(seed);
2052 x_i[xi] = x_elem[0];
2053 x_i[xi + 1] = x_elem[1];
2054 }
2055
2056 zcopy_vector(x_i, n_i, incx, x_vec, 1);
2057
2058
2059 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
2060 ri += incri) {
2061 zhe_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
2062
2063 BLAS_zdot_testgen(n_i, n_i, 0,
2064 norm, blas_no_conj,
2065 alpha, 1, beta, 1, a_vec,
2066 x_vec, seed,
2067 y_elem, head_r_true_elem, tail_r_true_elem);
2068
2069 y_i[yi] = y_elem[0];
2070 y_i[yi + 1] = y_elem[1];
2071 head_r_true[ri] = head_r_true_elem[0];
2072 head_r_true[ri + 1] = head_r_true_elem[1];
2073 tail_r_true[ri] = tail_r_true_elem[0];
2074 tail_r_true[ri + 1] = tail_r_true_elem[1];
2075 }
2076
2077
2078 }
2079
2080 blas_free(a_vec);
2081 blas_free(x_vec);
2082 }
2083 } /* end BLAS_zhemv_testgen */
BLAS_zhemv_c_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)2084 void BLAS_zhemv_c_z_testgen(int norm, enum blas_order_type order,
2085 enum blas_uplo_type uplo,
2086 int n, int randomize,
2087 void *alpha, int alpha_flag, void *beta,
2088 int beta_flag, void *a, int lda, void *x,
2089 int incx, void *y, int incy, int *seed,
2090 double *head_r_true, double *tail_r_true)
2091
2092 /*
2093 * Purpose
2094 * =======
2095 *
2096 * Generates the test inputs to BLAS_zhemv_c_z{_x}
2097 *
2098 * Arguments
2099 * =========
2100 *
2101 * norm (input) int
2102 * = -1: the vectors are scaled with norms near underflow.
2103 * = 0: the vectors have norms of order 1.
2104 * = 1: the vectors are scaled with norms near overflow.
2105 *
2106 * order (input) enum blas_order_type
2107 * storage format of the matrices
2108 *
2109 * uplo (input) enum blas_uplo_type
2110 * which half of the hemvetric matrix a is to be stored.
2111 *
2112 * n (input) int
2113 * sizes of symmetrical matrix a, size of vectors x, y:
2114 * matrix a is n-by-n.
2115 *
2116 * randomize (input) int
2117 * if 0, entries in matrices A, x will be chosen for
2118 * maximum cancellation, but with less randomness.
2119 * if 1, every entry in the matrix A, x will be
2120 * random.
2121 *
2122 * alpha (input/output) void*
2123 * if alpha_flag = 1, alpha is input.
2124 * if alpha_flag = 0, alpha is output.
2125 *
2126 * alpha_flag (input) int
2127 * = 0: alpha is free, and is output.
2128 * = 1: alpha is fixed on input.
2129 *
2130 * beta (input/output) void*
2131 * if beta_flag = 1, beta is input.
2132 * if beta_flag = 0, beta is output.
2133 *
2134 * beta_flag (input) int
2135 * = 0: beta is free, and is output.
2136 * = 1: beta is fixed on input.
2137 *
2138 * a (input/output) void*
2139 *
2140 * lda (input) lda
2141 * leading dimension of matrix A.
2142 *
2143 * x (input/output) void*
2144 *
2145 * incx (input) int
2146 * stride of vector x.
2147 *
2148 * y (input/output) void*
2149 * generated vector y that will be used as an input to HEMV.
2150 *
2151 * incy (input) int
2152 * leading dimension of vector y.
2153 *
2154 * seed (input/output) int *
2155 * seed for the random number generator.
2156 *
2157 * head_r_true (output) double *
2158 * the leading part of the truth in double-double.
2159 *
2160 * tail_r_true (output) double *
2161 * the trailing part of the truth in double-double
2162 *
2163 */
2164 {
2165 char *routine_name = "BLAS_zhemv_c_z_testgen";
2166 {
2167
2168 /* Strategy:
2169 R1 = alpha * A1 * x + beta * y1
2170 R2 = alpha * A2 * x + beta * y2
2171 where all the matrices and vectors are real. Then let R = R1 + i R2,
2172 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
2173 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
2174 */
2175
2176 int i, j;
2177 int yi;
2178 int aij, ai;
2179 int a1ij, a1i;
2180 int xi;
2181 int mi;
2182 int incyi, x_starti, y_starti;
2183 int incaij, incai;
2184 int inca1ij, inca1i;
2185 int incxi;
2186 int inca_vec, incx_vec;
2187 int n_i;
2188 int ld;
2189 int ab;
2190 int ri, incri;
2191
2192 float *a1;
2193 float *a2;
2194 double *y1;
2195 double *y2;
2196 double *x0;
2197
2198 double *head_r1_true, *tail_r1_true;
2199 double *head_r2_true, *tail_r2_true;
2200
2201 double head_r_elem1, tail_r_elem1;
2202 double head_r_elem2, tail_r_elem2;
2203 double head_r_elem, tail_r_elem;
2204
2205 float *a_vec;
2206 double *x_vec;
2207
2208 double *y_i = (double *) y;
2209 double *alpha_i = (double *) alpha;
2210 double *beta_i = (double *) beta;
2211
2212 float *a_i = (float *) a;
2213 double *x_i = (double *) x;
2214
2215 n_i = n;
2216
2217 ld = n_i;
2218
2219
2220
2221 if (order == blas_colmajor) {
2222 inca1i = incai = 1;
2223 incyi = incy;
2224 incaij = lda;
2225 incxi = incx;
2226 inca1ij = n_i;
2227 } else {
2228 incyi = incy;
2229 incai = lda;
2230 incxi = incx;
2231 inca1i = n_i;
2232 inca1ij = incaij = 1;
2233 }
2234 if ((0 == incx) || (0 == incy)) {
2235 BLAS_error(routine_name, 0, 0, NULL);
2236 }
2237 if (incx > 0) {
2238 x_starti = 0;
2239 } else {
2240 x_starti = (-n_i + 1) * incx;
2241 }
2242 if (incy > 0) {
2243 y_starti = 0;
2244 } else {
2245 y_starti = (-n_i + 1) * incy;
2246 }
2247 incri = 1;
2248 incri *= 2;
2249 x_starti *= 2;
2250 y_starti *= 2;
2251 incyi *= 2;
2252 incai *= 2;
2253 incaij *= 2;
2254 incxi *= 2;
2255
2256 inca_vec = incx_vec = 1;
2257 inca_vec *= 2;
2258 incx_vec *= 2;
2259 a_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
2260 if (n_i > 0 && a_vec == NULL) {
2261 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2262 }
2263 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
2264 a_vec[i] = 0.0;
2265 a_vec[i + 1] = 0.0;
2266 }
2267 x_vec = (double *) blas_malloc(n_i * sizeof(double) * 2);
2268 if (n_i > 0 && x_vec == NULL) {
2269 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2270 }
2271 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
2272 x_vec[i] = 0.0;
2273 x_vec[i + 1] = 0.0;
2274 }
2275
2276 if (randomize == 0) {
2277 int incx0, incy1, incy2, incmi = 1;
2278 a1 = (float *) blas_malloc(n_i * n_i * sizeof(float));
2279 if (n_i * n_i > 0 && a1 == NULL) {
2280 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2281 }
2282 a2 = (float *) blas_malloc(n_i * n_i * sizeof(float));
2283 if (n_i * n_i > 0 && a2 == NULL) {
2284 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2285 }
2286 for (i = 0; i < n_i * n_i; ++i) {
2287 a1[i] = a2[i] = 0.0;
2288 }
2289 y1 = (double *) blas_malloc(n_i * sizeof(double));
2290 if (n_i > 0 && y1 == NULL) {
2291 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2292 }
2293 incy1 = 1;
2294
2295 y2 = (double *) blas_malloc(n_i * sizeof(double));
2296 if (n_i > 0 && y2 == NULL) {
2297 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2298 }
2299 incy2 = 1;
2300
2301 x0 = (double *) blas_malloc(n_i * sizeof(double));
2302 if (n_i > 0 && x0 == NULL) {
2303 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2304 }
2305 incx0 = 1;
2306
2307 for (i = 0; i < n_i; ++i) {
2308 y1[i] = y2[i] = x0[i] = 0.0;
2309 }
2310 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
2311 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
2312 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2313 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2314 };
2315 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
2316 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
2317 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2318 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2319 };
2320
2321 /* First generate the real portion of matrix A, and matrix B.
2322 Note that Re(A) is a symmetric matrix. */
2323 /* x0 is output from this call */
2324 BLAS_dsymv_s_d_testgen
2325 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
2326 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
2327 tail_r1_true);
2328
2329 /* x0 is now fixed and is input to this call */
2330 BLAS_dskew_testgen_hemv_s_d
2331 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
2332 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
2333
2334
2335 /* The case where x is a complex vector. Since x is generated
2336 as a real vector, we need to perform some scaling.
2337
2338 There are four cases to consider, depending on the values
2339 of alpha and beta.
2340
2341 values scaling
2342 alpha beta alpha A x beta y R (truth)
2343 0 1 1 i i i
2344 1 1 ? 1+i 1+i 1+i
2345 2 ? 1 1+i 1+i 2i 2i
2346 3 ? ? 1+i 1+i 2i 2i
2347
2348 Note that we can afford to scale R by 1+i, since they are
2349 computed in double-double precision.
2350 */
2351
2352 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2353 ab = 0;
2354 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
2355 } else if (alpha_i[0] == 1.0) {
2356 ab = 1;
2357 /* set alpha to 1, multiply beta by 1+i. */
2358 alpha_i[1] = 0.0;
2359 beta_i[1] = beta_i[0];
2360 } else if (beta_i[0] == 1.0) {
2361 ab = 2;
2362 /* set beta to 1, multiply alpha by 1+i. */
2363 beta_i[1] = 0.0;
2364 alpha_i[1] = alpha_i[0];
2365 } else {
2366 ab = 3;
2367 /* multiply alpha by 1+i, beta by 2i. */
2368 alpha_i[1] = alpha_i[0];
2369 beta_i[1] = 2.0 * beta_i[0];
2370 beta_i[0] = 0.0;
2371 }
2372
2373
2374 /* Now fill in a */
2375 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
2376 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
2377 j++, aij += incaij, a1ij += inca1ij) {
2378 a_i[aij] = a1[a1ij];
2379 a_i[aij + 1] = a2[a1ij];
2380 }
2381 }
2382
2383 /* Fill in x */
2384 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
2385 mi += incx0) {
2386 if (ab == 0) {
2387 x_i[xi] = 0.0;
2388 x_i[xi + 1] = x0[mi];
2389 } else {
2390 x_i[xi] = x0[mi];
2391 x_i[xi + 1] = x0[mi];
2392 }
2393 }
2394
2395 /* Fill in y */
2396 for (i = 0, yi = y_starti, mi = 0; i < n_i;
2397 i++, yi += incyi, mi += incy1) {
2398 if (ab == 0) {
2399 y_i[yi] = -y2[mi];
2400 y_i[yi + 1] = y1[mi];
2401 } else if (ab == 2) {
2402 y_i[yi] = -2.0 * y2[mi];
2403 y_i[yi + 1] = 2.0 * y1[mi];
2404 } else {
2405 y_i[yi] = y1[mi];
2406 y_i[yi + 1] = y2[mi];
2407 }
2408 }
2409
2410 /* Fill in the truth */
2411 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
2412
2413 head_r_elem1 = head_r1_true[mi];
2414 tail_r_elem1 = tail_r1_true[mi];
2415
2416 head_r_elem2 = head_r2_true[mi];
2417 tail_r_elem2 = tail_r2_true[mi];
2418
2419 if (ab == 0) {
2420 head_r_true[ri] = -head_r_elem2;
2421 tail_r_true[ri] = -tail_r_elem2;
2422 head_r_true[ri + 1] = head_r_elem1;
2423 tail_r_true[ri + 1] = tail_r_elem1;
2424 } else if (ab == 1) {
2425 {
2426 /* Compute double-double = double-double + double-double. */
2427 double bv;
2428 double s1, s2, t1, t2;
2429
2430 /* Add two hi words. */
2431 s1 = head_r_elem1 + head_r_elem2;
2432 bv = s1 - head_r_elem1;
2433 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2434
2435 /* Add two lo words. */
2436 t1 = tail_r_elem1 + tail_r_elem2;
2437 bv = t1 - tail_r_elem1;
2438 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2439
2440 s2 += t1;
2441
2442 /* Renormalize (s1, s2) to (t1, s2) */
2443 t1 = s1 + s2;
2444 s2 = s2 - (t1 - s1);
2445
2446 t2 += s2;
2447
2448 /* Renormalize (t1, t2) */
2449 head_r_elem = t1 + t2;
2450 tail_r_elem = t2 - (head_r_elem - t1);
2451 }
2452
2453 /* Set the imaginary part to R1 + R2 */
2454 tail_r_true[ri + 1] = tail_r_elem;
2455 head_r_true[ri + 1] = head_r_elem;
2456
2457 /* Set the real part to R1 - R2. */
2458 {
2459 double head_bt, tail_bt;
2460 head_bt = -head_r_elem2;
2461 tail_bt = -tail_r_elem2;
2462 {
2463 /* Compute double-double = double-double + double-double. */
2464 double bv;
2465 double s1, s2, t1, t2;
2466
2467 /* Add two hi words. */
2468 s1 = head_r_elem1 + head_bt;
2469 bv = s1 - head_r_elem1;
2470 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2471
2472 /* Add two lo words. */
2473 t1 = tail_r_elem1 + tail_bt;
2474 bv = t1 - tail_r_elem1;
2475 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2476
2477 s2 += t1;
2478
2479 /* Renormalize (s1, s2) to (t1, s2) */
2480 t1 = s1 + s2;
2481 s2 = s2 - (t1 - s1);
2482
2483 t2 += s2;
2484
2485 /* Renormalize (t1, t2) */
2486 head_r_elem = t1 + t2;
2487 tail_r_elem = t2 - (head_r_elem - t1);
2488 }
2489 }
2490 tail_r_true[ri] = tail_r_elem;
2491 head_r_true[ri] = head_r_elem;
2492 } else {
2493
2494 /* Real part */
2495 {
2496 /* Compute double-double = double-double * double. */
2497 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2498
2499 con = head_r_elem2 * split;
2500 a11 = con - head_r_elem2;
2501 a11 = con - a11;
2502 a21 = head_r_elem2 - a11;
2503 con = -2.0 * split;
2504 b1 = con - -2.0;
2505 b1 = con - b1;
2506 b2 = -2.0 - b1;
2507
2508 c11 = head_r_elem2 * -2.0;
2509 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2510
2511 c2 = tail_r_elem2 * -2.0;
2512 t1 = c11 + c2;
2513 t2 = (c2 - (t1 - c11)) + c21;
2514
2515 head_r_elem = t1 + t2;
2516 tail_r_elem = t2 - (head_r_elem - t1);
2517 }
2518 head_r_true[ri] = head_r_elem;
2519 tail_r_true[ri] = tail_r_elem;
2520
2521 /* Imaginary Part */
2522 {
2523 /* Compute double-double = double-double * double. */
2524 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2525
2526 con = head_r_elem1 * split;
2527 a11 = con - head_r_elem1;
2528 a11 = con - a11;
2529 a21 = head_r_elem1 - a11;
2530 con = 2.0 * split;
2531 b1 = con - 2.0;
2532 b1 = con - b1;
2533 b2 = 2.0 - b1;
2534
2535 c11 = head_r_elem1 * 2.0;
2536 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2537
2538 c2 = tail_r_elem1 * 2.0;
2539 t1 = c11 + c2;
2540 t2 = (c2 - (t1 - c11)) + c21;
2541
2542 head_r_elem = t1 + t2;
2543 tail_r_elem = t2 - (head_r_elem - t1);
2544 }
2545 head_r_true[ri + 1] = head_r_elem;
2546 tail_r_true[ri + 1] = tail_r_elem;
2547 }
2548 }
2549
2550
2551 blas_free(a1);
2552 blas_free(a2);
2553 blas_free(y1);
2554 blas_free(y2);
2555 blas_free(x0);
2556 blas_free(head_r1_true);
2557 blas_free(tail_r1_true);
2558 blas_free(head_r2_true);
2559 blas_free(tail_r2_true);
2560 } else {
2561 /* get random A, x, then compute y for some cancellation. */
2562 float a_elem[2];
2563 double x_elem[2];
2564 double y_elem[2];
2565 double head_r_true_elem[2], tail_r_true_elem[2];
2566
2567 /* Since mixed real/complex test generator for dot
2568 scales the vectors, we need to used the non-mixed
2569 version if x is real (since A is always complex). */
2570
2571
2572 if (alpha_flag == 0) {
2573 alpha_i[0] = (float) xrand(seed);
2574 alpha_i[1] = (float) xrand(seed);
2575 }
2576 if (beta_flag == 0) {
2577 beta_i[0] = (float) xrand(seed);
2578 beta_i[1] = (float) xrand(seed);
2579 }
2580
2581 /* Fill in matrix A -- Hermitian. */
2582 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
2583 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
2584 a_elem[0] = (float) xrand(seed);
2585 a_elem[1] = (float) xrand(seed);
2586 a_i[aij] = a_elem[0];
2587 a_i[aij + 1] = a_elem[1];
2588 if (i == j)
2589 a_i[aij + 1] = 0.0;
2590 }
2591 }
2592
2593 /* Fill in vector x */
2594 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
2595 x_elem[0] = (float) xrand(seed);
2596 x_elem[1] = (float) xrand(seed);
2597 x_i[xi] = x_elem[0];
2598 x_i[xi + 1] = x_elem[1];
2599 }
2600
2601 zcopy_vector(x_i, n_i, incx, x_vec, 1);
2602
2603
2604 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
2605 ri += incri) {
2606 che_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
2607
2608 BLAS_zdot_c_z_testgen(n_i, n_i, 0,
2609 norm, blas_no_conj,
2610 alpha, 1, beta, 1, a_vec,
2611 x_vec, seed,
2612 y_elem, head_r_true_elem, tail_r_true_elem);
2613
2614 y_i[yi] = y_elem[0];
2615 y_i[yi + 1] = y_elem[1];
2616 head_r_true[ri] = head_r_true_elem[0];
2617 head_r_true[ri + 1] = head_r_true_elem[1];
2618 tail_r_true[ri] = tail_r_true_elem[0];
2619 tail_r_true[ri + 1] = tail_r_true_elem[1];
2620 }
2621
2622
2623 }
2624
2625 blas_free(a_vec);
2626 blas_free(x_vec);
2627 }
2628 } /* end BLAS_zhemv_c_z_testgen */
BLAS_zhemv_z_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)2629 void BLAS_zhemv_z_c_testgen(int norm, enum blas_order_type order,
2630 enum blas_uplo_type uplo,
2631 int n, int randomize,
2632 void *alpha, int alpha_flag, void *beta,
2633 int beta_flag, void *a, int lda, void *x,
2634 int incx, void *y, int incy, int *seed,
2635 double *head_r_true, double *tail_r_true)
2636
2637 /*
2638 * Purpose
2639 * =======
2640 *
2641 * Generates the test inputs to BLAS_zhemv_z_c{_x}
2642 *
2643 * Arguments
2644 * =========
2645 *
2646 * norm (input) int
2647 * = -1: the vectors are scaled with norms near underflow.
2648 * = 0: the vectors have norms of order 1.
2649 * = 1: the vectors are scaled with norms near overflow.
2650 *
2651 * order (input) enum blas_order_type
2652 * storage format of the matrices
2653 *
2654 * uplo (input) enum blas_uplo_type
2655 * which half of the hemvetric matrix a is to be stored.
2656 *
2657 * n (input) int
2658 * sizes of symmetrical matrix a, size of vectors x, y:
2659 * matrix a is n-by-n.
2660 *
2661 * randomize (input) int
2662 * if 0, entries in matrices A, x will be chosen for
2663 * maximum cancellation, but with less randomness.
2664 * if 1, every entry in the matrix A, x will be
2665 * random.
2666 *
2667 * alpha (input/output) void*
2668 * if alpha_flag = 1, alpha is input.
2669 * if alpha_flag = 0, alpha is output.
2670 *
2671 * alpha_flag (input) int
2672 * = 0: alpha is free, and is output.
2673 * = 1: alpha is fixed on input.
2674 *
2675 * beta (input/output) void*
2676 * if beta_flag = 1, beta is input.
2677 * if beta_flag = 0, beta is output.
2678 *
2679 * beta_flag (input) int
2680 * = 0: beta is free, and is output.
2681 * = 1: beta is fixed on input.
2682 *
2683 * a (input/output) void*
2684 *
2685 * lda (input) lda
2686 * leading dimension of matrix A.
2687 *
2688 * x (input/output) void*
2689 *
2690 * incx (input) int
2691 * stride of vector x.
2692 *
2693 * y (input/output) void*
2694 * generated vector y that will be used as an input to HEMV.
2695 *
2696 * incy (input) int
2697 * leading dimension of vector y.
2698 *
2699 * seed (input/output) int *
2700 * seed for the random number generator.
2701 *
2702 * head_r_true (output) double *
2703 * the leading part of the truth in double-double.
2704 *
2705 * tail_r_true (output) double *
2706 * the trailing part of the truth in double-double
2707 *
2708 */
2709 {
2710 char *routine_name = "BLAS_zhemv_z_c_testgen";
2711 {
2712
2713 /* Strategy:
2714 R1 = alpha * A1 * x + beta * y1
2715 R2 = alpha * A2 * x + beta * y2
2716 where all the matrices and vectors are real. Then let R = R1 + i R2,
2717 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
2718 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
2719 */
2720
2721 int i, j;
2722 int yi;
2723 int aij, ai;
2724 int a1ij, a1i;
2725 int xi;
2726 int mi;
2727 int incyi, x_starti, y_starti;
2728 int incaij, incai;
2729 int inca1ij, inca1i;
2730 int incxi;
2731 int inca_vec, incx_vec;
2732 int n_i;
2733 int ld;
2734 int ab;
2735 int ri, incri;
2736
2737 double *a1;
2738 double *a2;
2739 double *y1;
2740 double *y2;
2741 float *x0;
2742
2743 double *head_r1_true, *tail_r1_true;
2744 double *head_r2_true, *tail_r2_true;
2745
2746 double head_r_elem1, tail_r_elem1;
2747 double head_r_elem2, tail_r_elem2;
2748 double head_r_elem, tail_r_elem;
2749
2750 double *a_vec;
2751 float *x_vec;
2752
2753 double *y_i = (double *) y;
2754 double *alpha_i = (double *) alpha;
2755 double *beta_i = (double *) beta;
2756
2757 double *a_i = (double *) a;
2758 float *x_i = (float *) x;
2759
2760 n_i = n;
2761
2762 ld = n_i;
2763
2764
2765
2766 if (order == blas_colmajor) {
2767 inca1i = incai = 1;
2768 incyi = incy;
2769 incaij = lda;
2770 incxi = incx;
2771 inca1ij = n_i;
2772 } else {
2773 incyi = incy;
2774 incai = lda;
2775 incxi = incx;
2776 inca1i = n_i;
2777 inca1ij = incaij = 1;
2778 }
2779 if ((0 == incx) || (0 == incy)) {
2780 BLAS_error(routine_name, 0, 0, NULL);
2781 }
2782 if (incx > 0) {
2783 x_starti = 0;
2784 } else {
2785 x_starti = (-n_i + 1) * incx;
2786 }
2787 if (incy > 0) {
2788 y_starti = 0;
2789 } else {
2790 y_starti = (-n_i + 1) * incy;
2791 }
2792 incri = 1;
2793 incri *= 2;
2794 x_starti *= 2;
2795 y_starti *= 2;
2796 incyi *= 2;
2797 incai *= 2;
2798 incaij *= 2;
2799 incxi *= 2;
2800
2801 inca_vec = incx_vec = 1;
2802 inca_vec *= 2;
2803 incx_vec *= 2;
2804 a_vec = (double *) blas_malloc(n_i * sizeof(double) * 2);
2805 if (n_i > 0 && a_vec == NULL) {
2806 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2807 }
2808 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
2809 a_vec[i] = 0.0;
2810 a_vec[i + 1] = 0.0;
2811 }
2812 x_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
2813 if (n_i > 0 && x_vec == NULL) {
2814 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2815 }
2816 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
2817 x_vec[i] = 0.0;
2818 x_vec[i + 1] = 0.0;
2819 }
2820
2821 if (randomize == 0) {
2822 int incx0, incy1, incy2, incmi = 1;
2823 a1 = (double *) blas_malloc(n_i * n_i * sizeof(double));
2824 if (n_i * n_i > 0 && a1 == NULL) {
2825 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2826 }
2827 a2 = (double *) blas_malloc(n_i * n_i * sizeof(double));
2828 if (n_i * n_i > 0 && a2 == NULL) {
2829 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2830 }
2831 for (i = 0; i < n_i * n_i; ++i) {
2832 a1[i] = a2[i] = 0.0;
2833 }
2834 y1 = (double *) blas_malloc(n_i * sizeof(double));
2835 if (n_i > 0 && y1 == NULL) {
2836 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2837 }
2838 incy1 = 1;
2839
2840 y2 = (double *) blas_malloc(n_i * sizeof(double));
2841 if (n_i > 0 && y2 == NULL) {
2842 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2843 }
2844 incy2 = 1;
2845
2846 x0 = (float *) blas_malloc(n_i * sizeof(float));
2847 if (n_i > 0 && x0 == NULL) {
2848 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2849 }
2850 incx0 = 1;
2851
2852 for (i = 0; i < n_i; ++i) {
2853 y1[i] = y2[i] = x0[i] = 0.0;
2854 }
2855 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
2856 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
2857 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2858 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2859 };
2860 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
2861 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
2862 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2863 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2864 };
2865
2866 /* First generate the real portion of matrix A, and matrix B.
2867 Note that Re(A) is a symmetric matrix. */
2868 /* x0 is output from this call */
2869 BLAS_dsymv_d_s_testgen
2870 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
2871 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
2872 tail_r1_true);
2873
2874 /* x0 is now fixed and is input to this call */
2875 BLAS_dskew_testgen_hemv_d_s
2876 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
2877 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
2878
2879
2880 /* The case where x is a complex vector. Since x is generated
2881 as a real vector, we need to perform some scaling.
2882
2883 There are four cases to consider, depending on the values
2884 of alpha and beta.
2885
2886 values scaling
2887 alpha beta alpha A x beta y R (truth)
2888 0 1 1 i i i
2889 1 1 ? 1+i 1+i 1+i
2890 2 ? 1 1+i 1+i 2i 2i
2891 3 ? ? 1+i 1+i 2i 2i
2892
2893 Note that we can afford to scale R by 1+i, since they are
2894 computed in double-double precision.
2895 */
2896
2897 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2898 ab = 0;
2899 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
2900 } else if (alpha_i[0] == 1.0) {
2901 ab = 1;
2902 /* set alpha to 1, multiply beta by 1+i. */
2903 alpha_i[1] = 0.0;
2904 beta_i[1] = beta_i[0];
2905 } else if (beta_i[0] == 1.0) {
2906 ab = 2;
2907 /* set beta to 1, multiply alpha by 1+i. */
2908 beta_i[1] = 0.0;
2909 alpha_i[1] = alpha_i[0];
2910 } else {
2911 ab = 3;
2912 /* multiply alpha by 1+i, beta by 2i. */
2913 alpha_i[1] = alpha_i[0];
2914 beta_i[1] = 2.0 * beta_i[0];
2915 beta_i[0] = 0.0;
2916 }
2917
2918
2919 /* Now fill in a */
2920 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
2921 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
2922 j++, aij += incaij, a1ij += inca1ij) {
2923 a_i[aij] = a1[a1ij];
2924 a_i[aij + 1] = a2[a1ij];
2925 }
2926 }
2927
2928 /* Fill in x */
2929 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
2930 mi += incx0) {
2931 if (ab == 0) {
2932 x_i[xi] = 0.0;
2933 x_i[xi + 1] = x0[mi];
2934 } else {
2935 x_i[xi] = x0[mi];
2936 x_i[xi + 1] = x0[mi];
2937 }
2938 }
2939
2940 /* Fill in y */
2941 for (i = 0, yi = y_starti, mi = 0; i < n_i;
2942 i++, yi += incyi, mi += incy1) {
2943 if (ab == 0) {
2944 y_i[yi] = -y2[mi];
2945 y_i[yi + 1] = y1[mi];
2946 } else if (ab == 2) {
2947 y_i[yi] = -2.0 * y2[mi];
2948 y_i[yi + 1] = 2.0 * y1[mi];
2949 } else {
2950 y_i[yi] = y1[mi];
2951 y_i[yi + 1] = y2[mi];
2952 }
2953 }
2954
2955 /* Fill in the truth */
2956 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
2957
2958 head_r_elem1 = head_r1_true[mi];
2959 tail_r_elem1 = tail_r1_true[mi];
2960
2961 head_r_elem2 = head_r2_true[mi];
2962 tail_r_elem2 = tail_r2_true[mi];
2963
2964 if (ab == 0) {
2965 head_r_true[ri] = -head_r_elem2;
2966 tail_r_true[ri] = -tail_r_elem2;
2967 head_r_true[ri + 1] = head_r_elem1;
2968 tail_r_true[ri + 1] = tail_r_elem1;
2969 } else if (ab == 1) {
2970 {
2971 /* Compute double-double = double-double + double-double. */
2972 double bv;
2973 double s1, s2, t1, t2;
2974
2975 /* Add two hi words. */
2976 s1 = head_r_elem1 + head_r_elem2;
2977 bv = s1 - head_r_elem1;
2978 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2979
2980 /* Add two lo words. */
2981 t1 = tail_r_elem1 + tail_r_elem2;
2982 bv = t1 - tail_r_elem1;
2983 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2984
2985 s2 += t1;
2986
2987 /* Renormalize (s1, s2) to (t1, s2) */
2988 t1 = s1 + s2;
2989 s2 = s2 - (t1 - s1);
2990
2991 t2 += s2;
2992
2993 /* Renormalize (t1, t2) */
2994 head_r_elem = t1 + t2;
2995 tail_r_elem = t2 - (head_r_elem - t1);
2996 }
2997
2998 /* Set the imaginary part to R1 + R2 */
2999 tail_r_true[ri + 1] = tail_r_elem;
3000 head_r_true[ri + 1] = head_r_elem;
3001
3002 /* Set the real part to R1 - R2. */
3003 {
3004 double head_bt, tail_bt;
3005 head_bt = -head_r_elem2;
3006 tail_bt = -tail_r_elem2;
3007 {
3008 /* Compute double-double = double-double + double-double. */
3009 double bv;
3010 double s1, s2, t1, t2;
3011
3012 /* Add two hi words. */
3013 s1 = head_r_elem1 + head_bt;
3014 bv = s1 - head_r_elem1;
3015 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3016
3017 /* Add two lo words. */
3018 t1 = tail_r_elem1 + tail_bt;
3019 bv = t1 - tail_r_elem1;
3020 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3021
3022 s2 += t1;
3023
3024 /* Renormalize (s1, s2) to (t1, s2) */
3025 t1 = s1 + s2;
3026 s2 = s2 - (t1 - s1);
3027
3028 t2 += s2;
3029
3030 /* Renormalize (t1, t2) */
3031 head_r_elem = t1 + t2;
3032 tail_r_elem = t2 - (head_r_elem - t1);
3033 }
3034 }
3035 tail_r_true[ri] = tail_r_elem;
3036 head_r_true[ri] = head_r_elem;
3037 } else {
3038
3039 /* Real part */
3040 {
3041 /* Compute double-double = double-double * double. */
3042 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
3043
3044 con = head_r_elem2 * split;
3045 a11 = con - head_r_elem2;
3046 a11 = con - a11;
3047 a21 = head_r_elem2 - a11;
3048 con = -2.0 * split;
3049 b1 = con - -2.0;
3050 b1 = con - b1;
3051 b2 = -2.0 - b1;
3052
3053 c11 = head_r_elem2 * -2.0;
3054 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
3055
3056 c2 = tail_r_elem2 * -2.0;
3057 t1 = c11 + c2;
3058 t2 = (c2 - (t1 - c11)) + c21;
3059
3060 head_r_elem = t1 + t2;
3061 tail_r_elem = t2 - (head_r_elem - t1);
3062 }
3063 head_r_true[ri] = head_r_elem;
3064 tail_r_true[ri] = tail_r_elem;
3065
3066 /* Imaginary Part */
3067 {
3068 /* Compute double-double = double-double * double. */
3069 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
3070
3071 con = head_r_elem1 * split;
3072 a11 = con - head_r_elem1;
3073 a11 = con - a11;
3074 a21 = head_r_elem1 - a11;
3075 con = 2.0 * split;
3076 b1 = con - 2.0;
3077 b1 = con - b1;
3078 b2 = 2.0 - b1;
3079
3080 c11 = head_r_elem1 * 2.0;
3081 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
3082
3083 c2 = tail_r_elem1 * 2.0;
3084 t1 = c11 + c2;
3085 t2 = (c2 - (t1 - c11)) + c21;
3086
3087 head_r_elem = t1 + t2;
3088 tail_r_elem = t2 - (head_r_elem - t1);
3089 }
3090 head_r_true[ri + 1] = head_r_elem;
3091 tail_r_true[ri + 1] = tail_r_elem;
3092 }
3093 }
3094
3095
3096 blas_free(a1);
3097 blas_free(a2);
3098 blas_free(y1);
3099 blas_free(y2);
3100 blas_free(x0);
3101 blas_free(head_r1_true);
3102 blas_free(tail_r1_true);
3103 blas_free(head_r2_true);
3104 blas_free(tail_r2_true);
3105 } else {
3106 /* get random A, x, then compute y for some cancellation. */
3107 double a_elem[2];
3108 float x_elem[2];
3109 double y_elem[2];
3110 double head_r_true_elem[2], tail_r_true_elem[2];
3111
3112 /* Since mixed real/complex test generator for dot
3113 scales the vectors, we need to used the non-mixed
3114 version if x is real (since A is always complex). */
3115
3116
3117 if (alpha_flag == 0) {
3118 alpha_i[0] = (float) xrand(seed);
3119 alpha_i[1] = (float) xrand(seed);
3120 }
3121 if (beta_flag == 0) {
3122 beta_i[0] = (float) xrand(seed);
3123 beta_i[1] = (float) xrand(seed);
3124 }
3125
3126 /* Fill in matrix A -- Hermitian. */
3127 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
3128 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
3129 a_elem[0] = (float) xrand(seed);
3130 a_elem[1] = (float) xrand(seed);
3131 a_i[aij] = a_elem[0];
3132 a_i[aij + 1] = a_elem[1];
3133 if (i == j)
3134 a_i[aij + 1] = 0.0;
3135 }
3136 }
3137
3138 /* Fill in vector x */
3139 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
3140 x_elem[0] = (float) xrand(seed);
3141 x_elem[1] = (float) xrand(seed);
3142 x_i[xi] = x_elem[0];
3143 x_i[xi + 1] = x_elem[1];
3144 }
3145
3146 ccopy_vector(x_i, n_i, incx, x_vec, 1);
3147
3148
3149 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
3150 ri += incri) {
3151 zhe_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
3152
3153 BLAS_zdot_z_c_testgen(n_i, n_i, 0,
3154 norm, blas_no_conj,
3155 alpha, 1, beta, 1, a_vec,
3156 x_vec, seed,
3157 y_elem, head_r_true_elem, tail_r_true_elem);
3158
3159 y_i[yi] = y_elem[0];
3160 y_i[yi + 1] = y_elem[1];
3161 head_r_true[ri] = head_r_true_elem[0];
3162 head_r_true[ri + 1] = head_r_true_elem[1];
3163 tail_r_true[ri] = tail_r_true_elem[0];
3164 tail_r_true[ri + 1] = tail_r_true_elem[1];
3165 }
3166
3167
3168 }
3169
3170 blas_free(a_vec);
3171 blas_free(x_vec);
3172 }
3173 } /* end BLAS_zhemv_z_c_testgen */
BLAS_zhemv_c_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)3174 void BLAS_zhemv_c_c_testgen(int norm, enum blas_order_type order,
3175 enum blas_uplo_type uplo,
3176 int n, int randomize,
3177 void *alpha, int alpha_flag, void *beta,
3178 int beta_flag, void *a, int lda, void *x,
3179 int incx, void *y, int incy, int *seed,
3180 double *head_r_true, double *tail_r_true)
3181
3182 /*
3183 * Purpose
3184 * =======
3185 *
3186 * Generates the test inputs to BLAS_zhemv_c_c{_x}
3187 *
3188 * Arguments
3189 * =========
3190 *
3191 * norm (input) int
3192 * = -1: the vectors are scaled with norms near underflow.
3193 * = 0: the vectors have norms of order 1.
3194 * = 1: the vectors are scaled with norms near overflow.
3195 *
3196 * order (input) enum blas_order_type
3197 * storage format of the matrices
3198 *
3199 * uplo (input) enum blas_uplo_type
3200 * which half of the hemvetric matrix a is to be stored.
3201 *
3202 * n (input) int
3203 * sizes of symmetrical matrix a, size of vectors x, y:
3204 * matrix a is n-by-n.
3205 *
3206 * randomize (input) int
3207 * if 0, entries in matrices A, x will be chosen for
3208 * maximum cancellation, but with less randomness.
3209 * if 1, every entry in the matrix A, x will be
3210 * random.
3211 *
3212 * alpha (input/output) void*
3213 * if alpha_flag = 1, alpha is input.
3214 * if alpha_flag = 0, alpha is output.
3215 *
3216 * alpha_flag (input) int
3217 * = 0: alpha is free, and is output.
3218 * = 1: alpha is fixed on input.
3219 *
3220 * beta (input/output) void*
3221 * if beta_flag = 1, beta is input.
3222 * if beta_flag = 0, beta is output.
3223 *
3224 * beta_flag (input) int
3225 * = 0: beta is free, and is output.
3226 * = 1: beta is fixed on input.
3227 *
3228 * a (input/output) void*
3229 *
3230 * lda (input) lda
3231 * leading dimension of matrix A.
3232 *
3233 * x (input/output) void*
3234 *
3235 * incx (input) int
3236 * stride of vector x.
3237 *
3238 * y (input/output) void*
3239 * generated vector y that will be used as an input to HEMV.
3240 *
3241 * incy (input) int
3242 * leading dimension of vector y.
3243 *
3244 * seed (input/output) int *
3245 * seed for the random number generator.
3246 *
3247 * head_r_true (output) double *
3248 * the leading part of the truth in double-double.
3249 *
3250 * tail_r_true (output) double *
3251 * the trailing part of the truth in double-double
3252 *
3253 */
3254 {
3255 char *routine_name = "BLAS_zhemv_c_c_testgen";
3256 {
3257
3258 /* Strategy:
3259 R1 = alpha * A1 * x + beta * y1
3260 R2 = alpha * A2 * x + beta * y2
3261 where all the matrices and vectors are real. Then let R = R1 + i R2,
3262 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
3263 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
3264 */
3265
3266 int i, j;
3267 int yi;
3268 int aij, ai;
3269 int a1ij, a1i;
3270 int xi;
3271 int mi;
3272 int incyi, x_starti, y_starti;
3273 int incaij, incai;
3274 int inca1ij, inca1i;
3275 int incxi;
3276 int inca_vec, incx_vec;
3277 int n_i;
3278 int ld;
3279 int ab;
3280 int ri, incri;
3281
3282 float *a1;
3283 float *a2;
3284 double *y1;
3285 double *y2;
3286 float *x0;
3287
3288 double *head_r1_true, *tail_r1_true;
3289 double *head_r2_true, *tail_r2_true;
3290
3291 double head_r_elem1, tail_r_elem1;
3292 double head_r_elem2, tail_r_elem2;
3293 double head_r_elem, tail_r_elem;
3294
3295 float *a_vec;
3296 float *x_vec;
3297
3298 double *y_i = (double *) y;
3299 double *alpha_i = (double *) alpha;
3300 double *beta_i = (double *) beta;
3301
3302 float *a_i = (float *) a;
3303 float *x_i = (float *) x;
3304
3305 n_i = n;
3306
3307 ld = n_i;
3308
3309
3310
3311 if (order == blas_colmajor) {
3312 inca1i = incai = 1;
3313 incyi = incy;
3314 incaij = lda;
3315 incxi = incx;
3316 inca1ij = n_i;
3317 } else {
3318 incyi = incy;
3319 incai = lda;
3320 incxi = incx;
3321 inca1i = n_i;
3322 inca1ij = incaij = 1;
3323 }
3324 if ((0 == incx) || (0 == incy)) {
3325 BLAS_error(routine_name, 0, 0, NULL);
3326 }
3327 if (incx > 0) {
3328 x_starti = 0;
3329 } else {
3330 x_starti = (-n_i + 1) * incx;
3331 }
3332 if (incy > 0) {
3333 y_starti = 0;
3334 } else {
3335 y_starti = (-n_i + 1) * incy;
3336 }
3337 incri = 1;
3338 incri *= 2;
3339 x_starti *= 2;
3340 y_starti *= 2;
3341 incyi *= 2;
3342 incai *= 2;
3343 incaij *= 2;
3344 incxi *= 2;
3345
3346 inca_vec = incx_vec = 1;
3347 inca_vec *= 2;
3348 incx_vec *= 2;
3349 a_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
3350 if (n_i > 0 && a_vec == NULL) {
3351 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3352 }
3353 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
3354 a_vec[i] = 0.0;
3355 a_vec[i + 1] = 0.0;
3356 }
3357 x_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
3358 if (n_i > 0 && x_vec == NULL) {
3359 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3360 }
3361 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
3362 x_vec[i] = 0.0;
3363 x_vec[i + 1] = 0.0;
3364 }
3365
3366 if (randomize == 0) {
3367 int incx0, incy1, incy2, incmi = 1;
3368 a1 = (float *) blas_malloc(n_i * n_i * sizeof(float));
3369 if (n_i * n_i > 0 && a1 == NULL) {
3370 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3371 }
3372 a2 = (float *) blas_malloc(n_i * n_i * sizeof(float));
3373 if (n_i * n_i > 0 && a2 == NULL) {
3374 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3375 }
3376 for (i = 0; i < n_i * n_i; ++i) {
3377 a1[i] = a2[i] = 0.0;
3378 }
3379 y1 = (double *) blas_malloc(n_i * sizeof(double));
3380 if (n_i > 0 && y1 == NULL) {
3381 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3382 }
3383 incy1 = 1;
3384
3385 y2 = (double *) blas_malloc(n_i * sizeof(double));
3386 if (n_i > 0 && y2 == NULL) {
3387 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3388 }
3389 incy2 = 1;
3390
3391 x0 = (float *) blas_malloc(n_i * sizeof(float));
3392 if (n_i > 0 && x0 == NULL) {
3393 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3394 }
3395 incx0 = 1;
3396
3397 for (i = 0; i < n_i; ++i) {
3398 y1[i] = y2[i] = x0[i] = 0.0;
3399 }
3400 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
3401 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
3402 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3403 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3404 };
3405 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
3406 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
3407 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3408 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3409 };
3410
3411 /* First generate the real portion of matrix A, and matrix B.
3412 Note that Re(A) is a symmetric matrix. */
3413 /* x0 is output from this call */
3414 BLAS_dsymv_s_s_testgen
3415 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
3416 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
3417 tail_r1_true);
3418
3419 /* x0 is now fixed and is input to this call */
3420 BLAS_dskew_testgen_hemv_s_s
3421 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
3422 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
3423
3424
3425 /* The case where x is a complex vector. Since x is generated
3426 as a real vector, we need to perform some scaling.
3427
3428 There are four cases to consider, depending on the values
3429 of alpha and beta.
3430
3431 values scaling
3432 alpha beta alpha A x beta y R (truth)
3433 0 1 1 i i i
3434 1 1 ? 1+i 1+i 1+i
3435 2 ? 1 1+i 1+i 2i 2i
3436 3 ? ? 1+i 1+i 2i 2i
3437
3438 Note that we can afford to scale R by 1+i, since they are
3439 computed in double-double precision.
3440 */
3441
3442 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3443 ab = 0;
3444 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
3445 } else if (alpha_i[0] == 1.0) {
3446 ab = 1;
3447 /* set alpha to 1, multiply beta by 1+i. */
3448 alpha_i[1] = 0.0;
3449 beta_i[1] = beta_i[0];
3450 } else if (beta_i[0] == 1.0) {
3451 ab = 2;
3452 /* set beta to 1, multiply alpha by 1+i. */
3453 beta_i[1] = 0.0;
3454 alpha_i[1] = alpha_i[0];
3455 } else {
3456 ab = 3;
3457 /* multiply alpha by 1+i, beta by 2i. */
3458 alpha_i[1] = alpha_i[0];
3459 beta_i[1] = 2.0 * beta_i[0];
3460 beta_i[0] = 0.0;
3461 }
3462
3463
3464 /* Now fill in a */
3465 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
3466 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
3467 j++, aij += incaij, a1ij += inca1ij) {
3468 a_i[aij] = a1[a1ij];
3469 a_i[aij + 1] = a2[a1ij];
3470 }
3471 }
3472
3473 /* Fill in x */
3474 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
3475 mi += incx0) {
3476 if (ab == 0) {
3477 x_i[xi] = 0.0;
3478 x_i[xi + 1] = x0[mi];
3479 } else {
3480 x_i[xi] = x0[mi];
3481 x_i[xi + 1] = x0[mi];
3482 }
3483 }
3484
3485 /* Fill in y */
3486 for (i = 0, yi = y_starti, mi = 0; i < n_i;
3487 i++, yi += incyi, mi += incy1) {
3488 if (ab == 0) {
3489 y_i[yi] = -y2[mi];
3490 y_i[yi + 1] = y1[mi];
3491 } else if (ab == 2) {
3492 y_i[yi] = -2.0 * y2[mi];
3493 y_i[yi + 1] = 2.0 * y1[mi];
3494 } else {
3495 y_i[yi] = y1[mi];
3496 y_i[yi + 1] = y2[mi];
3497 }
3498 }
3499
3500 /* Fill in the truth */
3501 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
3502
3503 head_r_elem1 = head_r1_true[mi];
3504 tail_r_elem1 = tail_r1_true[mi];
3505
3506 head_r_elem2 = head_r2_true[mi];
3507 tail_r_elem2 = tail_r2_true[mi];
3508
3509 if (ab == 0) {
3510 head_r_true[ri] = -head_r_elem2;
3511 tail_r_true[ri] = -tail_r_elem2;
3512 head_r_true[ri + 1] = head_r_elem1;
3513 tail_r_true[ri + 1] = tail_r_elem1;
3514 } else if (ab == 1) {
3515 {
3516 /* Compute double-double = double-double + double-double. */
3517 double bv;
3518 double s1, s2, t1, t2;
3519
3520 /* Add two hi words. */
3521 s1 = head_r_elem1 + head_r_elem2;
3522 bv = s1 - head_r_elem1;
3523 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3524
3525 /* Add two lo words. */
3526 t1 = tail_r_elem1 + tail_r_elem2;
3527 bv = t1 - tail_r_elem1;
3528 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3529
3530 s2 += t1;
3531
3532 /* Renormalize (s1, s2) to (t1, s2) */
3533 t1 = s1 + s2;
3534 s2 = s2 - (t1 - s1);
3535
3536 t2 += s2;
3537
3538 /* Renormalize (t1, t2) */
3539 head_r_elem = t1 + t2;
3540 tail_r_elem = t2 - (head_r_elem - t1);
3541 }
3542
3543 /* Set the imaginary part to R1 + R2 */
3544 tail_r_true[ri + 1] = tail_r_elem;
3545 head_r_true[ri + 1] = head_r_elem;
3546
3547 /* Set the real part to R1 - R2. */
3548 {
3549 double head_bt, tail_bt;
3550 head_bt = -head_r_elem2;
3551 tail_bt = -tail_r_elem2;
3552 {
3553 /* Compute double-double = double-double + double-double. */
3554 double bv;
3555 double s1, s2, t1, t2;
3556
3557 /* Add two hi words. */
3558 s1 = head_r_elem1 + head_bt;
3559 bv = s1 - head_r_elem1;
3560 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3561
3562 /* Add two lo words. */
3563 t1 = tail_r_elem1 + tail_bt;
3564 bv = t1 - tail_r_elem1;
3565 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3566
3567 s2 += t1;
3568
3569 /* Renormalize (s1, s2) to (t1, s2) */
3570 t1 = s1 + s2;
3571 s2 = s2 - (t1 - s1);
3572
3573 t2 += s2;
3574
3575 /* Renormalize (t1, t2) */
3576 head_r_elem = t1 + t2;
3577 tail_r_elem = t2 - (head_r_elem - t1);
3578 }
3579 }
3580 tail_r_true[ri] = tail_r_elem;
3581 head_r_true[ri] = head_r_elem;
3582 } else {
3583
3584 /* Real part */
3585 {
3586 /* Compute double-double = double-double * double. */
3587 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
3588
3589 con = head_r_elem2 * split;
3590 a11 = con - head_r_elem2;
3591 a11 = con - a11;
3592 a21 = head_r_elem2 - a11;
3593 con = -2.0 * split;
3594 b1 = con - -2.0;
3595 b1 = con - b1;
3596 b2 = -2.0 - b1;
3597
3598 c11 = head_r_elem2 * -2.0;
3599 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
3600
3601 c2 = tail_r_elem2 * -2.0;
3602 t1 = c11 + c2;
3603 t2 = (c2 - (t1 - c11)) + c21;
3604
3605 head_r_elem = t1 + t2;
3606 tail_r_elem = t2 - (head_r_elem - t1);
3607 }
3608 head_r_true[ri] = head_r_elem;
3609 tail_r_true[ri] = tail_r_elem;
3610
3611 /* Imaginary Part */
3612 {
3613 /* Compute double-double = double-double * double. */
3614 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
3615
3616 con = head_r_elem1 * split;
3617 a11 = con - head_r_elem1;
3618 a11 = con - a11;
3619 a21 = head_r_elem1 - a11;
3620 con = 2.0 * split;
3621 b1 = con - 2.0;
3622 b1 = con - b1;
3623 b2 = 2.0 - b1;
3624
3625 c11 = head_r_elem1 * 2.0;
3626 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
3627
3628 c2 = tail_r_elem1 * 2.0;
3629 t1 = c11 + c2;
3630 t2 = (c2 - (t1 - c11)) + c21;
3631
3632 head_r_elem = t1 + t2;
3633 tail_r_elem = t2 - (head_r_elem - t1);
3634 }
3635 head_r_true[ri + 1] = head_r_elem;
3636 tail_r_true[ri + 1] = tail_r_elem;
3637 }
3638 }
3639
3640
3641 blas_free(a1);
3642 blas_free(a2);
3643 blas_free(y1);
3644 blas_free(y2);
3645 blas_free(x0);
3646 blas_free(head_r1_true);
3647 blas_free(tail_r1_true);
3648 blas_free(head_r2_true);
3649 blas_free(tail_r2_true);
3650 } else {
3651 /* get random A, x, then compute y for some cancellation. */
3652 float a_elem[2];
3653 float x_elem[2];
3654 double y_elem[2];
3655 double head_r_true_elem[2], tail_r_true_elem[2];
3656
3657 /* Since mixed real/complex test generator for dot
3658 scales the vectors, we need to used the non-mixed
3659 version if x is real (since A is always complex). */
3660
3661
3662 if (alpha_flag == 0) {
3663 alpha_i[0] = (float) xrand(seed);
3664 alpha_i[1] = (float) xrand(seed);
3665 }
3666 if (beta_flag == 0) {
3667 beta_i[0] = (float) xrand(seed);
3668 beta_i[1] = (float) xrand(seed);
3669 }
3670
3671 /* Fill in matrix A -- Hermitian. */
3672 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
3673 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
3674 a_elem[0] = (float) xrand(seed);
3675 a_elem[1] = (float) xrand(seed);
3676 a_i[aij] = a_elem[0];
3677 a_i[aij + 1] = a_elem[1];
3678 if (i == j)
3679 a_i[aij + 1] = 0.0;
3680 }
3681 }
3682
3683 /* Fill in vector x */
3684 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
3685 x_elem[0] = (float) xrand(seed);
3686 x_elem[1] = (float) xrand(seed);
3687 x_i[xi] = x_elem[0];
3688 x_i[xi + 1] = x_elem[1];
3689 }
3690
3691 ccopy_vector(x_i, n_i, incx, x_vec, 1);
3692
3693
3694 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
3695 ri += incri) {
3696 che_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
3697
3698 BLAS_zdot_c_c_testgen(n_i, n_i, 0,
3699 norm, blas_no_conj,
3700 alpha, 1, beta, 1, a_vec,
3701 x_vec, seed,
3702 y_elem, head_r_true_elem, tail_r_true_elem);
3703
3704 y_i[yi] = y_elem[0];
3705 y_i[yi + 1] = y_elem[1];
3706 head_r_true[ri] = head_r_true_elem[0];
3707 head_r_true[ri + 1] = head_r_true_elem[1];
3708 tail_r_true[ri] = tail_r_true_elem[0];
3709 tail_r_true[ri + 1] = tail_r_true_elem[1];
3710 }
3711
3712
3713 }
3714
3715 blas_free(a_vec);
3716 blas_free(x_vec);
3717 }
3718 } /* end BLAS_zhemv_c_c_testgen */
BLAS_zhemv_z_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,double * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)3719 void BLAS_zhemv_z_d_testgen(int norm, enum blas_order_type order,
3720 enum blas_uplo_type uplo,
3721 int n, int randomize,
3722 void *alpha, int alpha_flag, void *beta,
3723 int beta_flag, void *a, int lda, double *x,
3724 int incx, void *y, int incy, int *seed,
3725 double *head_r_true, double *tail_r_true)
3726
3727 /*
3728 * Purpose
3729 * =======
3730 *
3731 * Generates the test inputs to BLAS_zhemv_z_d{_x}
3732 *
3733 * Arguments
3734 * =========
3735 *
3736 * norm (input) int
3737 * = -1: the vectors are scaled with norms near underflow.
3738 * = 0: the vectors have norms of order 1.
3739 * = 1: the vectors are scaled with norms near overflow.
3740 *
3741 * order (input) enum blas_order_type
3742 * storage format of the matrices
3743 *
3744 * uplo (input) enum blas_uplo_type
3745 * which half of the hemvetric matrix a is to be stored.
3746 *
3747 * n (input) int
3748 * sizes of symmetrical matrix a, size of vectors x, y:
3749 * matrix a is n-by-n.
3750 *
3751 * randomize (input) int
3752 * if 0, entries in matrices A, x will be chosen for
3753 * maximum cancellation, but with less randomness.
3754 * if 1, every entry in the matrix A, x will be
3755 * random.
3756 *
3757 * alpha (input/output) void*
3758 * if alpha_flag = 1, alpha is input.
3759 * if alpha_flag = 0, alpha is output.
3760 *
3761 * alpha_flag (input) int
3762 * = 0: alpha is free, and is output.
3763 * = 1: alpha is fixed on input.
3764 *
3765 * beta (input/output) void*
3766 * if beta_flag = 1, beta is input.
3767 * if beta_flag = 0, beta is output.
3768 *
3769 * beta_flag (input) int
3770 * = 0: beta is free, and is output.
3771 * = 1: beta is fixed on input.
3772 *
3773 * a (input/output) void*
3774 *
3775 * lda (input) lda
3776 * leading dimension of matrix A.
3777 *
3778 * x (input/output) double*
3779 *
3780 * incx (input) int
3781 * stride of vector x.
3782 *
3783 * y (input/output) void*
3784 * generated vector y that will be used as an input to HEMV.
3785 *
3786 * incy (input) int
3787 * leading dimension of vector y.
3788 *
3789 * seed (input/output) int *
3790 * seed for the random number generator.
3791 *
3792 * head_r_true (output) double *
3793 * the leading part of the truth in double-double.
3794 *
3795 * tail_r_true (output) double *
3796 * the trailing part of the truth in double-double
3797 *
3798 */
3799 {
3800 char *routine_name = "BLAS_zhemv_z_d_testgen";
3801 {
3802
3803 /* Strategy:
3804 R1 = alpha * A1 * x + beta * y1
3805 R2 = alpha * A2 * x + beta * y2
3806 where all the matrices and vectors are real. Then let R = R1 + i R2,
3807 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
3808 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
3809 */
3810
3811 int i, j;
3812 int yi;
3813 int aij, ai;
3814 int a1ij, a1i;
3815 int xi;
3816 int mi;
3817 int incyi, x_starti, y_starti;
3818 int incaij, incai;
3819 int inca1ij, inca1i;
3820 int incxi;
3821 int inca_vec, incx_vec;
3822 int n_i;
3823 int ld;
3824 int ab;
3825 int ri, incri;
3826
3827 double *a1;
3828 double *a2;
3829 double *y1;
3830 double *y2;
3831 double *x0;
3832
3833 double *head_r1_true, *tail_r1_true;
3834 double *head_r2_true, *tail_r2_true;
3835
3836 double head_r_elem1, tail_r_elem1;
3837 double head_r_elem2, tail_r_elem2;
3838 double head_r_elem, tail_r_elem;
3839
3840 double *a_vec;
3841 double *x_vec;
3842
3843 double *y_i = (double *) y;
3844 double *alpha_i = (double *) alpha;
3845 double *beta_i = (double *) beta;
3846
3847 double *a_i = (double *) a;
3848 double *x_i = x;
3849
3850 n_i = n;
3851
3852 ld = n_i;
3853
3854
3855
3856 if (order == blas_colmajor) {
3857 inca1i = incai = 1;
3858 incyi = incy;
3859 incaij = lda;
3860 incxi = incx;
3861 inca1ij = n_i;
3862 } else {
3863 incyi = incy;
3864 incai = lda;
3865 incxi = incx;
3866 inca1i = n_i;
3867 inca1ij = incaij = 1;
3868 }
3869 if ((0 == incx) || (0 == incy)) {
3870 BLAS_error(routine_name, 0, 0, NULL);
3871 }
3872 if (incx > 0) {
3873 x_starti = 0;
3874 } else {
3875 x_starti = (-n_i + 1) * incx;
3876 }
3877 if (incy > 0) {
3878 y_starti = 0;
3879 } else {
3880 y_starti = (-n_i + 1) * incy;
3881 }
3882 incri = 1;
3883 incri *= 2;
3884
3885 y_starti *= 2;
3886 incyi *= 2;
3887 incai *= 2;
3888 incaij *= 2;
3889
3890
3891 inca_vec = incx_vec = 1;
3892 inca_vec *= 2;
3893
3894 a_vec = (double *) blas_malloc(n_i * sizeof(double) * 2);
3895 if (n_i > 0 && a_vec == NULL) {
3896 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3897 }
3898 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
3899 a_vec[i] = 0.0;
3900 a_vec[i + 1] = 0.0;
3901 }
3902 x_vec = (double *) blas_malloc(n_i * sizeof(double));
3903 if (n_i > 0 && x_vec == NULL) {
3904 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3905 }
3906 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
3907 x_vec[i] = 0.0;
3908 }
3909
3910 if (randomize == 0) {
3911 int incx0, incy1, incy2, incmi = 1;
3912 a1 = (double *) blas_malloc(n_i * n_i * sizeof(double));
3913 if (n_i * n_i > 0 && a1 == NULL) {
3914 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3915 }
3916 a2 = (double *) blas_malloc(n_i * n_i * sizeof(double));
3917 if (n_i * n_i > 0 && a2 == NULL) {
3918 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3919 }
3920 for (i = 0; i < n_i * n_i; ++i) {
3921 a1[i] = a2[i] = 0.0;
3922 }
3923 y1 = (double *) blas_malloc(n_i * sizeof(double));
3924 if (n_i > 0 && y1 == NULL) {
3925 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3926 }
3927 incy1 = 1;
3928
3929 y2 = (double *) blas_malloc(n_i * sizeof(double));
3930 if (n_i > 0 && y2 == NULL) {
3931 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3932 }
3933 incy2 = 1;
3934
3935 x0 = (double *) blas_malloc(n_i * sizeof(double));
3936 if (n_i > 0 && x0 == NULL) {
3937 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3938 }
3939 incx0 = 1;
3940
3941 for (i = 0; i < n_i; ++i) {
3942 y1[i] = y2[i] = x0[i] = 0.0;
3943 }
3944 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
3945 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
3946 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3947 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3948 };
3949 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
3950 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
3951 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3952 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3953 };
3954
3955 /* First generate the real portion of matrix A, and matrix B.
3956 Note that Re(A) is a symmetric matrix. */
3957 /* x0 is output from this call */
3958 BLAS_dsymv_testgen
3959 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
3960 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
3961 tail_r1_true);
3962
3963 /* x0 is now fixed and is input to this call */
3964 BLAS_dskew_testgen_hemv
3965 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
3966 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
3967
3968
3969 /* The case where x is a real vector.
3970
3971 There are four cases to consider, depending on the
3972 values of alpha and beta.
3973
3974 values scaling
3975 alpha beta alpha A x beta y R (truth)
3976 0 1 1
3977 1 1 ? -i i
3978 2 ? 1 i i i
3979 3 ? ? 1+i 1+i 1+i
3980
3981 Note that we can afford to scale truth by (1+i) since they
3982 are computed in double-double.
3983 */
3984
3985 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3986 ab = 0;
3987 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
3988 } else if (alpha_i[0] == 1.0) {
3989 ab = 1;
3990 /* set alpha to 1, multiply beta by -i. */
3991 alpha_i[1] = 0.0;
3992 beta_i[1] = -beta_i[0];
3993 beta_i[0] = 0.0;
3994 } else if (beta_i[0] == 1.0) {
3995 ab = 2;
3996 /* set beta to 1, multiply alpha by i. */
3997 beta_i[1] = 0.0;
3998 alpha_i[1] = alpha_i[0];
3999 alpha_i[0] = 0.0;
4000 } else {
4001 ab = 3;
4002 /* multiply alpha, beta by (1 + i). */
4003 alpha_i[1] = alpha_i[0];
4004 beta_i[1] = beta_i[0];
4005 }
4006
4007 /* Now fill in a */
4008 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
4009 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
4010 j++, aij += incaij, a1ij += inca1ij) {
4011 a_i[aij] = a1[a1ij];
4012 a_i[aij + 1] = a2[a1ij];
4013 }
4014 }
4015
4016 /* Fill in x */
4017 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
4018 mi += incx0) {
4019 x_i[xi] = x0[mi];
4020 }
4021
4022 /* Fill in y */
4023 for (i = 0, yi = y_starti, mi = 0; i < n_i; i++, yi += incyi,
4024 mi += incy1) {
4025 if (ab == 1 || ab == 2) {
4026 y_i[yi] = -y2[mi];
4027 y_i[yi + 1] = y1[mi];
4028 } else {
4029 y_i[yi] = y1[mi];
4030 y_i[yi + 1] = y2[mi];
4031 }
4032 }
4033
4034 /* Fill in truth */
4035 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
4036 if (ab == 0 || ab == 1) {
4037 head_r_true[ri] = head_r1_true[mi];
4038 tail_r_true[ri] = tail_r1_true[mi];
4039 head_r_true[ri + 1] = head_r2_true[mi];
4040 tail_r_true[ri + 1] = tail_r2_true[mi];
4041 } else if (ab == 2) {
4042 head_r_true[ri] = -head_r2_true[mi];
4043 tail_r_true[ri] = -tail_r2_true[mi];
4044 head_r_true[ri + 1] = head_r1_true[mi];
4045 tail_r_true[ri + 1] = tail_r1_true[mi];
4046 } else {
4047 head_r_elem1 = head_r1_true[mi];
4048 tail_r_elem1 = tail_r1_true[mi];
4049
4050 head_r_elem2 = head_r2_true[mi];
4051 tail_r_elem2 = tail_r2_true[mi];
4052
4053 {
4054 /* Compute double-double = double-double + double-double. */
4055 double bv;
4056 double s1, s2, t1, t2;
4057
4058 /* Add two hi words. */
4059 s1 = head_r_elem1 + head_r_elem2;
4060 bv = s1 - head_r_elem1;
4061 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
4062
4063 /* Add two lo words. */
4064 t1 = tail_r_elem1 + tail_r_elem2;
4065 bv = t1 - tail_r_elem1;
4066 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
4067
4068 s2 += t1;
4069
4070 /* Renormalize (s1, s2) to (t1, s2) */
4071 t1 = s1 + s2;
4072 s2 = s2 - (t1 - s1);
4073
4074 t2 += s2;
4075
4076 /* Renormalize (t1, t2) */
4077 head_r_elem = t1 + t2;
4078 tail_r_elem = t2 - (head_r_elem - t1);
4079 }
4080
4081 /* Set the imaginary part to R1 + R2 */
4082 tail_r_true[ri + 1] = tail_r_elem;
4083 head_r_true[ri + 1] = head_r_elem;
4084
4085 /* Set the real part to R1 - R2. */
4086 {
4087 double head_bt, tail_bt;
4088 head_bt = -head_r_elem2;
4089 tail_bt = -tail_r_elem2;
4090 {
4091 /* Compute double-double = double-double + double-double. */
4092 double bv;
4093 double s1, s2, t1, t2;
4094
4095 /* Add two hi words. */
4096 s1 = head_r_elem1 + head_bt;
4097 bv = s1 - head_r_elem1;
4098 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
4099
4100 /* Add two lo words. */
4101 t1 = tail_r_elem1 + tail_bt;
4102 bv = t1 - tail_r_elem1;
4103 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
4104
4105 s2 += t1;
4106
4107 /* Renormalize (s1, s2) to (t1, s2) */
4108 t1 = s1 + s2;
4109 s2 = s2 - (t1 - s1);
4110
4111 t2 += s2;
4112
4113 /* Renormalize (t1, t2) */
4114 head_r_elem = t1 + t2;
4115 tail_r_elem = t2 - (head_r_elem - t1);
4116 }
4117 }
4118 tail_r_true[ri] = tail_r_elem;
4119 head_r_true[ri] = head_r_elem;
4120 }
4121 }
4122
4123
4124 blas_free(a1);
4125 blas_free(a2);
4126 blas_free(y1);
4127 blas_free(y2);
4128 blas_free(x0);
4129 blas_free(head_r1_true);
4130 blas_free(tail_r1_true);
4131 blas_free(head_r2_true);
4132 blas_free(tail_r2_true);
4133 } else {
4134 /* get random A, x, then compute y for some cancellation. */
4135 double a_elem[2];
4136 double x_elem;
4137 double y_elem[2];
4138 double head_r_true_elem[2], tail_r_true_elem[2];
4139
4140 /* Since mixed real/complex test generator for dot
4141 scales the vectors, we need to used the non-mixed
4142 version if x is real (since A is always complex). */
4143 double *xx_vec;
4144 xx_vec = (double *) blas_malloc(n_i * sizeof(double) * 2);
4145 if (n_i > 0 && xx_vec == NULL) {
4146 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4147 }
4148
4149 if (alpha_flag == 0) {
4150 alpha_i[0] = xrand(seed);
4151 alpha_i[1] = xrand(seed);
4152 }
4153 if (beta_flag == 0) {
4154 beta_i[0] = xrand(seed);
4155 beta_i[1] = xrand(seed);
4156 }
4157
4158 /* Fill in matrix A -- Hermitian. */
4159 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
4160 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
4161 a_elem[0] = xrand(seed);
4162 a_elem[1] = xrand(seed);
4163 a_i[aij] = a_elem[0];
4164 a_i[aij + 1] = a_elem[1];
4165 if (i == j)
4166 a_i[aij + 1] = 0.0;
4167 }
4168 }
4169
4170 /* Fill in vector x */
4171 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
4172 x_elem = xrand(seed);
4173 x_i[xi] = x_elem;
4174 }
4175
4176 dcopy_vector(x_i, n_i, incx, x_vec, 1);
4177
4178 /* copy the real x_vec into complex xx_vec, so that
4179 pure complex test case generator can be called. */
4180 {
4181 int k;
4182 for (k = 0; k < n_i; k++) {
4183 xx_vec[2 * k] = x_vec[k];
4184 xx_vec[2 * k + 1] = 0.0;
4185 }
4186 }
4187
4188 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
4189 ri += incri) {
4190 zhe_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
4191
4192 BLAS_zdot_testgen(n_i, n_i, 0,
4193 norm, blas_no_conj,
4194 alpha, 1, beta, 1, a_vec,
4195 xx_vec, seed,
4196 y_elem, head_r_true_elem, tail_r_true_elem);
4197
4198 y_i[yi] = y_elem[0];
4199 y_i[yi + 1] = y_elem[1];
4200 head_r_true[ri] = head_r_true_elem[0];
4201 head_r_true[ri + 1] = head_r_true_elem[1];
4202 tail_r_true[ri] = tail_r_true_elem[0];
4203 tail_r_true[ri + 1] = tail_r_true_elem[1];
4204 }
4205
4206 blas_free(xx_vec);
4207 }
4208
4209 blas_free(a_vec);
4210 blas_free(x_vec);
4211 }
4212 } /* end BLAS_zhemv_z_d_testgen */
BLAS_chemv_c_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,int randomize,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,float * x,int incx,void * y,int incy,int * seed,double * head_r_true,double * tail_r_true)4213 void BLAS_chemv_c_s_testgen(int norm, enum blas_order_type order,
4214 enum blas_uplo_type uplo,
4215 int n, int randomize,
4216 void *alpha, int alpha_flag, void *beta,
4217 int beta_flag, void *a, int lda, float *x,
4218 int incx, void *y, int incy, int *seed,
4219 double *head_r_true, double *tail_r_true)
4220
4221 /*
4222 * Purpose
4223 * =======
4224 *
4225 * Generates the test inputs to BLAS_chemv_c_s{_x}
4226 *
4227 * Arguments
4228 * =========
4229 *
4230 * norm (input) int
4231 * = -1: the vectors are scaled with norms near underflow.
4232 * = 0: the vectors have norms of order 1.
4233 * = 1: the vectors are scaled with norms near overflow.
4234 *
4235 * order (input) enum blas_order_type
4236 * storage format of the matrices
4237 *
4238 * uplo (input) enum blas_uplo_type
4239 * which half of the hemvetric matrix a is to be stored.
4240 *
4241 * n (input) int
4242 * sizes of symmetrical matrix a, size of vectors x, y:
4243 * matrix a is n-by-n.
4244 *
4245 * randomize (input) int
4246 * if 0, entries in matrices A, x will be chosen for
4247 * maximum cancellation, but with less randomness.
4248 * if 1, every entry in the matrix A, x will be
4249 * random.
4250 *
4251 * alpha (input/output) void*
4252 * if alpha_flag = 1, alpha is input.
4253 * if alpha_flag = 0, alpha is output.
4254 *
4255 * alpha_flag (input) int
4256 * = 0: alpha is free, and is output.
4257 * = 1: alpha is fixed on input.
4258 *
4259 * beta (input/output) void*
4260 * if beta_flag = 1, beta is input.
4261 * if beta_flag = 0, beta is output.
4262 *
4263 * beta_flag (input) int
4264 * = 0: beta is free, and is output.
4265 * = 1: beta is fixed on input.
4266 *
4267 * a (input/output) void*
4268 *
4269 * lda (input) lda
4270 * leading dimension of matrix A.
4271 *
4272 * x (input/output) float*
4273 *
4274 * incx (input) int
4275 * stride of vector x.
4276 *
4277 * y (input/output) void*
4278 * generated vector y that will be used as an input to HEMV.
4279 *
4280 * incy (input) int
4281 * leading dimension of vector y.
4282 *
4283 * seed (input/output) int *
4284 * seed for the random number generator.
4285 *
4286 * head_r_true (output) double *
4287 * the leading part of the truth in double-double.
4288 *
4289 * tail_r_true (output) double *
4290 * the trailing part of the truth in double-double
4291 *
4292 */
4293 {
4294 char *routine_name = "BLAS_chemv_c_s_testgen";
4295 {
4296
4297 /* Strategy:
4298 R1 = alpha * A1 * x + beta * y1
4299 R2 = alpha * A2 * x + beta * y2
4300 where all the matrices and vectors are real. Then let R = R1 + i R2,
4301 A = A1 + i A2, y = y1 + i y2. To make A hermitian, A1 is
4302 symmetric, and A2 is a skew matrix (trans(A2) = -A2).
4303 */
4304
4305 int i, j;
4306 int yi;
4307 int aij, ai;
4308 int a1ij, a1i;
4309 int xi;
4310 int mi;
4311 int incyi, x_starti, y_starti;
4312 int incaij, incai;
4313 int inca1ij, inca1i;
4314 int incxi;
4315 int inca_vec, incx_vec;
4316 int n_i;
4317 int ld;
4318 int ab;
4319 int ri, incri;
4320
4321 float *a1;
4322 float *a2;
4323 float *y1;
4324 float *y2;
4325 float *x0;
4326
4327 double *head_r1_true, *tail_r1_true;
4328 double *head_r2_true, *tail_r2_true;
4329
4330 double head_r_elem1, tail_r_elem1;
4331 double head_r_elem2, tail_r_elem2;
4332 double head_r_elem, tail_r_elem;
4333
4334 float *a_vec;
4335 float *x_vec;
4336
4337 float *y_i = (float *) y;
4338 float *alpha_i = (float *) alpha;
4339 float *beta_i = (float *) beta;
4340
4341 float *a_i = (float *) a;
4342 float *x_i = x;
4343
4344 n_i = n;
4345
4346 ld = n_i;
4347
4348
4349
4350 if (order == blas_colmajor) {
4351 inca1i = incai = 1;
4352 incyi = incy;
4353 incaij = lda;
4354 incxi = incx;
4355 inca1ij = n_i;
4356 } else {
4357 incyi = incy;
4358 incai = lda;
4359 incxi = incx;
4360 inca1i = n_i;
4361 inca1ij = incaij = 1;
4362 }
4363 if ((0 == incx) || (0 == incy)) {
4364 BLAS_error(routine_name, 0, 0, NULL);
4365 }
4366 if (incx > 0) {
4367 x_starti = 0;
4368 } else {
4369 x_starti = (-n_i + 1) * incx;
4370 }
4371 if (incy > 0) {
4372 y_starti = 0;
4373 } else {
4374 y_starti = (-n_i + 1) * incy;
4375 }
4376 incri = 1;
4377 incri *= 2;
4378
4379 y_starti *= 2;
4380 incyi *= 2;
4381 incai *= 2;
4382 incaij *= 2;
4383
4384
4385 inca_vec = incx_vec = 1;
4386 inca_vec *= 2;
4387
4388 a_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
4389 if (n_i > 0 && a_vec == NULL) {
4390 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4391 }
4392 for (i = 0; i < n_i * inca_vec; i += inca_vec) {
4393 a_vec[i] = 0.0;
4394 a_vec[i + 1] = 0.0;
4395 }
4396 x_vec = (float *) blas_malloc(n_i * sizeof(float));
4397 if (n_i > 0 && x_vec == NULL) {
4398 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4399 }
4400 for (i = 0; i < n_i * incx_vec; i += incx_vec) {
4401 x_vec[i] = 0.0;
4402 }
4403
4404 if (randomize == 0) {
4405 int incx0, incy1, incy2, incmi = 1;
4406 a1 = (float *) blas_malloc(n_i * n_i * sizeof(float));
4407 if (n_i * n_i > 0 && a1 == NULL) {
4408 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4409 }
4410 a2 = (float *) blas_malloc(n_i * n_i * sizeof(float));
4411 if (n_i * n_i > 0 && a2 == NULL) {
4412 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4413 }
4414 for (i = 0; i < n_i * n_i; ++i) {
4415 a1[i] = a2[i] = 0.0;
4416 }
4417 y1 = (float *) blas_malloc(n_i * sizeof(float));
4418 if (n_i > 0 && y1 == NULL) {
4419 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4420 }
4421 incy1 = 1;
4422
4423 y2 = (float *) blas_malloc(n_i * sizeof(float));
4424 if (n_i > 0 && y2 == NULL) {
4425 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4426 }
4427 incy2 = 1;
4428
4429 x0 = (float *) blas_malloc(n_i * sizeof(float));
4430 if (n_i > 0 && x0 == NULL) {
4431 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4432 }
4433 incx0 = 1;
4434
4435 for (i = 0; i < n_i; ++i) {
4436 y1[i] = y2[i] = x0[i] = 0.0;
4437 }
4438 head_r1_true = (double *) blas_malloc(n_i * sizeof(double));
4439 tail_r1_true = (double *) blas_malloc(n_i * sizeof(double));
4440 if (n_i > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
4441 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4442 };
4443 head_r2_true = (double *) blas_malloc(n_i * sizeof(double));
4444 tail_r2_true = (double *) blas_malloc(n_i * sizeof(double));
4445 if (n_i > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
4446 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4447 };
4448
4449 /* First generate the real portion of matrix A, and matrix B.
4450 Note that Re(A) is a symmetric matrix. */
4451 /* x0 is output from this call */
4452 BLAS_ssymv_testgen
4453 (norm, order, uplo, n, 0, alpha_i, alpha_flag, beta_i,
4454 beta_flag, a1, n_i, x0, incx0, y1, incy1, seed, head_r1_true,
4455 tail_r1_true);
4456
4457 /* x0 is now fixed and is input to this call */
4458 BLAS_sskew_testgen_hemv
4459 (norm, order, uplo, n, 0, alpha_i, beta_i, a2, n_i, x0,
4460 incx0, y2, incy2, seed, head_r2_true, tail_r2_true);
4461
4462
4463 /* The case where x is a real vector.
4464
4465 There are four cases to consider, depending on the
4466 values of alpha and beta.
4467
4468 values scaling
4469 alpha beta alpha A x beta y R (truth)
4470 0 1 1
4471 1 1 ? -i i
4472 2 ? 1 i i i
4473 3 ? ? 1+i 1+i 1+i
4474
4475 Note that we can afford to scale truth by (1+i) since they
4476 are computed in double-double.
4477 */
4478
4479 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
4480 ab = 0;
4481 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
4482 } else if (alpha_i[0] == 1.0) {
4483 ab = 1;
4484 /* set alpha to 1, multiply beta by -i. */
4485 alpha_i[1] = 0.0;
4486 beta_i[1] = -beta_i[0];
4487 beta_i[0] = 0.0;
4488 } else if (beta_i[0] == 1.0) {
4489 ab = 2;
4490 /* set beta to 1, multiply alpha by i. */
4491 beta_i[1] = 0.0;
4492 alpha_i[1] = alpha_i[0];
4493 alpha_i[0] = 0.0;
4494 } else {
4495 ab = 3;
4496 /* multiply alpha, beta by (1 + i). */
4497 alpha_i[1] = alpha_i[0];
4498 beta_i[1] = beta_i[0];
4499 }
4500
4501 /* Now fill in a */
4502 for (i = 0, ai = 0, a1i = 0; i < n_i; i++, ai += incai, a1i += inca1i) {
4503 for (j = 0, aij = ai, a1ij = a1i; j < n_i;
4504 j++, aij += incaij, a1ij += inca1ij) {
4505 a_i[aij] = a1[a1ij];
4506 a_i[aij + 1] = a2[a1ij];
4507 }
4508 }
4509
4510 /* Fill in x */
4511 for (i = 0, xi = x_starti, mi = 0; i < n_i; i++, xi += incxi,
4512 mi += incx0) {
4513 x_i[xi] = x0[mi];
4514 }
4515
4516 /* Fill in y */
4517 for (i = 0, yi = y_starti, mi = 0; i < n_i; i++, yi += incyi,
4518 mi += incy1) {
4519 if (ab == 1 || ab == 2) {
4520 y_i[yi] = -y2[mi];
4521 y_i[yi + 1] = y1[mi];
4522 } else {
4523 y_i[yi] = y1[mi];
4524 y_i[yi + 1] = y2[mi];
4525 }
4526 }
4527
4528 /* Fill in truth */
4529 for (i = 0, ri = 0, mi = 0; i < n_i; i++, ri += incri, mi += incmi) {
4530 if (ab == 0 || ab == 1) {
4531 head_r_true[ri] = head_r1_true[mi];
4532 tail_r_true[ri] = tail_r1_true[mi];
4533 head_r_true[ri + 1] = head_r2_true[mi];
4534 tail_r_true[ri + 1] = tail_r2_true[mi];
4535 } else if (ab == 2) {
4536 head_r_true[ri] = -head_r2_true[mi];
4537 tail_r_true[ri] = -tail_r2_true[mi];
4538 head_r_true[ri + 1] = head_r1_true[mi];
4539 tail_r_true[ri + 1] = tail_r1_true[mi];
4540 } else {
4541 head_r_elem1 = head_r1_true[mi];
4542 tail_r_elem1 = tail_r1_true[mi];
4543
4544 head_r_elem2 = head_r2_true[mi];
4545 tail_r_elem2 = tail_r2_true[mi];
4546
4547 {
4548 /* Compute double-double = double-double + double-double. */
4549 double bv;
4550 double s1, s2, t1, t2;
4551
4552 /* Add two hi words. */
4553 s1 = head_r_elem1 + head_r_elem2;
4554 bv = s1 - head_r_elem1;
4555 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
4556
4557 /* Add two lo words. */
4558 t1 = tail_r_elem1 + tail_r_elem2;
4559 bv = t1 - tail_r_elem1;
4560 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
4561
4562 s2 += t1;
4563
4564 /* Renormalize (s1, s2) to (t1, s2) */
4565 t1 = s1 + s2;
4566 s2 = s2 - (t1 - s1);
4567
4568 t2 += s2;
4569
4570 /* Renormalize (t1, t2) */
4571 head_r_elem = t1 + t2;
4572 tail_r_elem = t2 - (head_r_elem - t1);
4573 }
4574
4575 /* Set the imaginary part to R1 + R2 */
4576 tail_r_true[ri + 1] = tail_r_elem;
4577 head_r_true[ri + 1] = head_r_elem;
4578
4579 /* Set the real part to R1 - R2. */
4580 {
4581 double head_bt, tail_bt;
4582 head_bt = -head_r_elem2;
4583 tail_bt = -tail_r_elem2;
4584 {
4585 /* Compute double-double = double-double + double-double. */
4586 double bv;
4587 double s1, s2, t1, t2;
4588
4589 /* Add two hi words. */
4590 s1 = head_r_elem1 + head_bt;
4591 bv = s1 - head_r_elem1;
4592 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
4593
4594 /* Add two lo words. */
4595 t1 = tail_r_elem1 + tail_bt;
4596 bv = t1 - tail_r_elem1;
4597 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
4598
4599 s2 += t1;
4600
4601 /* Renormalize (s1, s2) to (t1, s2) */
4602 t1 = s1 + s2;
4603 s2 = s2 - (t1 - s1);
4604
4605 t2 += s2;
4606
4607 /* Renormalize (t1, t2) */
4608 head_r_elem = t1 + t2;
4609 tail_r_elem = t2 - (head_r_elem - t1);
4610 }
4611 }
4612 tail_r_true[ri] = tail_r_elem;
4613 head_r_true[ri] = head_r_elem;
4614 }
4615 }
4616
4617
4618 blas_free(a1);
4619 blas_free(a2);
4620 blas_free(y1);
4621 blas_free(y2);
4622 blas_free(x0);
4623 blas_free(head_r1_true);
4624 blas_free(tail_r1_true);
4625 blas_free(head_r2_true);
4626 blas_free(tail_r2_true);
4627 } else {
4628 /* get random A, x, then compute y for some cancellation. */
4629 float a_elem[2];
4630 float x_elem;
4631 float y_elem[2];
4632 double head_r_true_elem[2], tail_r_true_elem[2];
4633
4634 /* Since mixed real/complex test generator for dot
4635 scales the vectors, we need to used the non-mixed
4636 version if x is real (since A is always complex). */
4637 float *xx_vec;
4638 xx_vec = (float *) blas_malloc(n_i * sizeof(float) * 2);
4639 if (n_i > 0 && xx_vec == NULL) {
4640 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4641 }
4642
4643 if (alpha_flag == 0) {
4644 alpha_i[0] = xrand(seed);
4645 alpha_i[1] = xrand(seed);
4646 }
4647 if (beta_flag == 0) {
4648 beta_i[0] = xrand(seed);
4649 beta_i[1] = xrand(seed);
4650 }
4651
4652 /* Fill in matrix A -- Hermitian. */
4653 for (i = 0, ai = 0; i < n_i; i++, ai += incai) {
4654 for (j = 0, aij = ai; j < n_i; j++, aij += incaij) {
4655 a_elem[0] = xrand(seed);
4656 a_elem[1] = xrand(seed);
4657 a_i[aij] = a_elem[0];
4658 a_i[aij + 1] = a_elem[1];
4659 if (i == j)
4660 a_i[aij + 1] = 0.0;
4661 }
4662 }
4663
4664 /* Fill in vector x */
4665 for (i = 0, xi = x_starti; i < n_i; i++, xi += incxi) {
4666 x_elem = xrand(seed);
4667 x_i[xi] = x_elem;
4668 }
4669
4670 scopy_vector(x_i, n_i, incx, x_vec, 1);
4671
4672 /* copy the real x_vec into complex xx_vec, so that
4673 pure complex test case generator can be called. */
4674 {
4675 int k;
4676 for (k = 0; k < n_i; k++) {
4677 xx_vec[2 * k] = x_vec[k];
4678 xx_vec[2 * k + 1] = 0.0;
4679 }
4680 }
4681
4682 for (i = 0, yi = y_starti, ri = 0; i < n_i; i++, yi += incyi,
4683 ri += incri) {
4684 che_copy_row(order, uplo, blas_left_side, n_i, a, lda, a_vec, i);
4685
4686 BLAS_cdot_testgen(n_i, n_i, 0,
4687 norm, blas_no_conj,
4688 alpha, 1, beta, 1, a_vec,
4689 xx_vec, seed,
4690 y_elem, head_r_true_elem, tail_r_true_elem);
4691
4692 y_i[yi] = y_elem[0];
4693 y_i[yi + 1] = y_elem[1];
4694 head_r_true[ri] = head_r_true_elem[0];
4695 head_r_true[ri + 1] = head_r_true_elem[1];
4696 tail_r_true[ri] = tail_r_true_elem[0];
4697 tail_r_true[ri + 1] = tail_r_true_elem[1];
4698 }
4699
4700 blas_free(xx_vec);
4701 }
4702
4703 blas_free(a_vec);
4704 blas_free(x_vec);
4705 }
4706 } /* end BLAS_chemv_c_s_testgen */
4707