1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
BLAS_sskmv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,float * alpha,float * beta,float * a,int lda,float * x_head,float * x_tail,float * y,int * seed,double * head_r_true,double * tail_r_true)8 void BLAS_sskmv2_testgen(int norm, enum blas_order_type order,
9 enum blas_uplo_type uplo, int n, float *alpha,
10 float *beta, float *a, int lda, float *x_head,
11 float *x_tail, float *y, int *seed,
12 double *head_r_true, double *tail_r_true)
13
14 /*
15 * Purpose
16 * =======
17 *
18 * Generates a skew Matrix for use with hemv2 testing
19 *
20 * Arguments
21 * =========
22 *
23 * norm (input) int
24 * = -1: the vectors are scaled with norms near underflow.
25 * = 0: the vectors have norms of order 1.
26 * = 1: the vectors are scaled with norms near overflow.
27 *
28 * order (input) enum blas_order_type
29 * storage format of the matrices
30 *
31 * uplo (input) enum blas_uplo_type
32 * which half of the skew symmetric matrix a is to be stored.
33 *
34 * n (input) int
35 * sizes of symmetrical matrix a, size of vectors x, y:
36 * matrix a is n-by-n.
37 *
38 * alpha (input/output) float*
39 *
40 * beta (input) float*
41 *
42 * a (input/output) float*
43 *
44 * lda (input) lda
45 * leading dimension of matrix A.
46 *
47 * x_head (input) float*
48 * x_tail (input) float*
49 * note : x is input only. x should be determined before calling
50 * this function.
51 *
52 * y (input/output) float*
53 * generated vector y.
54 *
55 * seed (input/output) int *
56 * seed for the random number generator.
57 *
58 * head_r_true (output) double *
59 * the leading part of the truth in double-double.
60 *
61 * tail_r_true (output) double *
62 * the trailing part of the truth in double-double
63 *
64 */
65 {
66
67 int i, yi, ri;
68 int incx = 1, incy = 1;
69 int incyi, incri;
70 int incx_veci, yi0;
71 int inca_vec;
72
73 float y_elem;
74 double head_r_true_elem, tail_r_true_elem;
75
76 float *a_vec;
77 float *x_head_vec;
78 float *x_tail_vec;
79
80 float *y_i = y;
81 float *alpha_i = alpha;
82 float *beta_i = beta;
83 float *x_head_i = x_head;
84 float *x_tail_i = x_tail;
85
86 /*a_vec must have stride of 1 */
87 inca_vec = 1;
88
89
90 a_vec = (float *) blas_malloc(n * sizeof(float));
91 if (n > 0 && a_vec == NULL) {
92 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
93 }
94 for (i = 0; i < n; i += inca_vec) {
95 a_vec[i] = 0.0;
96 }
97 x_head_vec = (float *) blas_malloc(n * sizeof(float));
98 if (n > 0 && x_head_vec == NULL) {
99 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
100 }
101 x_tail_vec = (float *) blas_malloc(n * sizeof(float));
102 if (n > 0 && x_tail_vec == NULL) {
103 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
104 }
105 scopy_vector(x_head_i, n, incx, x_head_vec, 1);
106 scopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
107
108 incyi = incy;
109
110 yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
111
112 incri = 1;
113
114
115 incx_veci = 1;
116
117
118 /* Fill in skew matrix A */
119 for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
120 /* x_i has already been copied to x_vec */
121 sskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
122
123 /* skew matricies have zeroed diagonals */
124 a_vec[i] = 0.0;
125 BLAS_sdot2_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i, 1,
126 beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
127 &y_elem, &head_r_true_elem, &tail_r_true_elem);
128
129 sskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
130
131 /*commits an element to the generated y */
132 y_i[yi] = y_elem;
133 head_r_true[ri] = head_r_true_elem;
134 tail_r_true[ri] = tail_r_true_elem;
135 }
136
137 blas_free(a_vec);
138 blas_free(x_head_vec);
139 blas_free(x_tail_vec);
140 }
BLAS_dskmv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,double * a,int lda,double * x_head,double * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)141 void BLAS_dskmv2_testgen(int norm, enum blas_order_type order,
142 enum blas_uplo_type uplo, int n, double *alpha,
143 double *beta, double *a, int lda, double *x_head,
144 double *x_tail, double *y, int *seed,
145 double *head_r_true, double *tail_r_true)
146
147 /*
148 * Purpose
149 * =======
150 *
151 * Generates a skew Matrix for use with hemv2 testing
152 *
153 * Arguments
154 * =========
155 *
156 * norm (input) int
157 * = -1: the vectors are scaled with norms near underflow.
158 * = 0: the vectors have norms of order 1.
159 * = 1: the vectors are scaled with norms near overflow.
160 *
161 * order (input) enum blas_order_type
162 * storage format of the matrices
163 *
164 * uplo (input) enum blas_uplo_type
165 * which half of the skew symmetric matrix a is to be stored.
166 *
167 * n (input) int
168 * sizes of symmetrical matrix a, size of vectors x, y:
169 * matrix a is n-by-n.
170 *
171 * alpha (input/output) double*
172 *
173 * beta (input) double*
174 *
175 * a (input/output) double*
176 *
177 * lda (input) lda
178 * leading dimension of matrix A.
179 *
180 * x_head (input) double*
181 * x_tail (input) double*
182 * note : x is input only. x should be determined before calling
183 * this function.
184 *
185 * y (input/output) double*
186 * generated vector y.
187 *
188 * seed (input/output) int *
189 * seed for the random number generator.
190 *
191 * head_r_true (output) double *
192 * the leading part of the truth in double-double.
193 *
194 * tail_r_true (output) double *
195 * the trailing part of the truth in double-double
196 *
197 */
198 {
199
200 int i, yi, ri;
201 int incx = 1, incy = 1;
202 int incyi, incri;
203 int incx_veci, yi0;
204 int inca_vec;
205
206 double y_elem;
207 double head_r_true_elem, tail_r_true_elem;
208
209 double *a_vec;
210 double *x_head_vec;
211 double *x_tail_vec;
212
213 double *y_i = y;
214 double *alpha_i = alpha;
215 double *beta_i = beta;
216 double *x_head_i = x_head;
217 double *x_tail_i = x_tail;
218
219 /*a_vec must have stride of 1 */
220 inca_vec = 1;
221
222
223 a_vec = (double *) blas_malloc(n * sizeof(double));
224 if (n > 0 && a_vec == NULL) {
225 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
226 }
227 for (i = 0; i < n; i += inca_vec) {
228 a_vec[i] = 0.0;
229 }
230 x_head_vec = (double *) blas_malloc(n * sizeof(double));
231 if (n > 0 && x_head_vec == NULL) {
232 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
233 }
234 x_tail_vec = (double *) blas_malloc(n * sizeof(double));
235 if (n > 0 && x_tail_vec == NULL) {
236 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
237 }
238 dcopy_vector(x_head_i, n, incx, x_head_vec, 1);
239 dcopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
240
241 incyi = incy;
242
243 yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
244
245 incri = 1;
246
247
248 incx_veci = 1;
249
250
251 /* Fill in skew matrix A */
252 for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
253 /* x_i has already been copied to x_vec */
254 dskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
255
256 /* skew matricies have zeroed diagonals */
257 a_vec[i] = 0.0;
258 BLAS_ddot2_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i, 1,
259 beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
260 &y_elem, &head_r_true_elem, &tail_r_true_elem);
261
262 dskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
263
264 /*commits an element to the generated y */
265 y_i[yi] = y_elem;
266 head_r_true[ri] = head_r_true_elem;
267 tail_r_true[ri] = tail_r_true_elem;
268 }
269
270 blas_free(a_vec);
271 blas_free(x_head_vec);
272 blas_free(x_tail_vec);
273 }
BLAS_dskmv2_testgen_d_s(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,double * a,int lda,float * x_head,float * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)274 void BLAS_dskmv2_testgen_d_s(int norm, enum blas_order_type order,
275 enum blas_uplo_type uplo, int n, double *alpha,
276 double *beta, double *a, int lda, float *x_head,
277 float *x_tail, double *y, int *seed,
278 double *head_r_true, double *tail_r_true)
279
280 /*
281 * Purpose
282 * =======
283 *
284 * Generates a skew Matrix for use with hemv2 testing
285 *
286 * Arguments
287 * =========
288 *
289 * norm (input) int
290 * = -1: the vectors are scaled with norms near underflow.
291 * = 0: the vectors have norms of order 1.
292 * = 1: the vectors are scaled with norms near overflow.
293 *
294 * order (input) enum blas_order_type
295 * storage format of the matrices
296 *
297 * uplo (input) enum blas_uplo_type
298 * which half of the skew symmetric matrix a is to be stored.
299 *
300 * n (input) int
301 * sizes of symmetrical matrix a, size of vectors x, y:
302 * matrix a is n-by-n.
303 *
304 * alpha (input/output) double*
305 *
306 * beta (input) double*
307 *
308 * a (input/output) double*
309 *
310 * lda (input) lda
311 * leading dimension of matrix A.
312 *
313 * x_head (input) float*
314 * x_tail (input) float*
315 * note : x is input only. x should be determined before calling
316 * this function.
317 *
318 * y (input/output) double*
319 * generated vector y.
320 *
321 * seed (input/output) int *
322 * seed for the random number generator.
323 *
324 * head_r_true (output) double *
325 * the leading part of the truth in double-double.
326 *
327 * tail_r_true (output) double *
328 * the trailing part of the truth in double-double
329 *
330 */
331 {
332
333 int i, yi, ri;
334 int incx = 1, incy = 1;
335 int incyi, incri;
336 int incx_veci, yi0;
337 int inca_vec;
338
339 double y_elem;
340 double head_r_true_elem, tail_r_true_elem;
341
342 double *a_vec;
343 float *x_head_vec;
344 float *x_tail_vec;
345
346 double *y_i = y;
347 double *alpha_i = alpha;
348 double *beta_i = beta;
349 float *x_head_i = x_head;
350 float *x_tail_i = x_tail;
351
352 /*a_vec must have stride of 1 */
353 inca_vec = 1;
354
355
356 a_vec = (double *) blas_malloc(n * sizeof(double));
357 if (n > 0 && a_vec == NULL) {
358 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
359 }
360 for (i = 0; i < n; i += inca_vec) {
361 a_vec[i] = 0.0;
362 }
363 x_head_vec = (float *) blas_malloc(n * sizeof(float));
364 if (n > 0 && x_head_vec == NULL) {
365 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
366 }
367 x_tail_vec = (float *) blas_malloc(n * sizeof(float));
368 if (n > 0 && x_tail_vec == NULL) {
369 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
370 }
371 scopy_vector(x_head_i, n, incx, x_head_vec, 1);
372 scopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
373
374 incyi = incy;
375
376 yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
377
378 incri = 1;
379
380
381 incx_veci = 1;
382
383
384 /* Fill in skew matrix A */
385 for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
386 /* x_i has already been copied to x_vec */
387 dskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
388
389 /* skew matricies have zeroed diagonals */
390 a_vec[i] = 0.0;
391 BLAS_ddot2_s_d_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i,
392 1, beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
393 &y_elem, &head_r_true_elem, &tail_r_true_elem);
394
395 dskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
396
397 /*commits an element to the generated y */
398 y_i[yi] = y_elem;
399 head_r_true[ri] = head_r_true_elem;
400 tail_r_true[ri] = tail_r_true_elem;
401 }
402
403 blas_free(a_vec);
404 blas_free(x_head_vec);
405 blas_free(x_tail_vec);
406 }
BLAS_dskmv2_testgen_s_d(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,float * a,int lda,double * x_head,double * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)407 void BLAS_dskmv2_testgen_s_d(int norm, enum blas_order_type order,
408 enum blas_uplo_type uplo, int n, double *alpha,
409 double *beta, float *a, int lda, double *x_head,
410 double *x_tail, double *y, int *seed,
411 double *head_r_true, double *tail_r_true)
412
413 /*
414 * Purpose
415 * =======
416 *
417 * Generates a skew Matrix for use with hemv2 testing
418 *
419 * Arguments
420 * =========
421 *
422 * norm (input) int
423 * = -1: the vectors are scaled with norms near underflow.
424 * = 0: the vectors have norms of order 1.
425 * = 1: the vectors are scaled with norms near overflow.
426 *
427 * order (input) enum blas_order_type
428 * storage format of the matrices
429 *
430 * uplo (input) enum blas_uplo_type
431 * which half of the skew symmetric matrix a is to be stored.
432 *
433 * n (input) int
434 * sizes of symmetrical matrix a, size of vectors x, y:
435 * matrix a is n-by-n.
436 *
437 * alpha (input/output) double*
438 *
439 * beta (input) double*
440 *
441 * a (input/output) float*
442 *
443 * lda (input) lda
444 * leading dimension of matrix A.
445 *
446 * x_head (input) double*
447 * x_tail (input) double*
448 * note : x is input only. x should be determined before calling
449 * this function.
450 *
451 * y (input/output) double*
452 * generated vector y.
453 *
454 * seed (input/output) int *
455 * seed for the random number generator.
456 *
457 * head_r_true (output) double *
458 * the leading part of the truth in double-double.
459 *
460 * tail_r_true (output) double *
461 * the trailing part of the truth in double-double
462 *
463 */
464 {
465
466 int i, yi, ri;
467 int incx = 1, incy = 1;
468 int incyi, incri;
469 int incx_veci, yi0;
470 int inca_vec;
471
472 double y_elem;
473 double head_r_true_elem, tail_r_true_elem;
474
475 float *a_vec;
476 double *x_head_vec;
477 double *x_tail_vec;
478
479 double *y_i = y;
480 double *alpha_i = alpha;
481 double *beta_i = beta;
482 double *x_head_i = x_head;
483 double *x_tail_i = x_tail;
484
485 /*a_vec must have stride of 1 */
486 inca_vec = 1;
487
488
489 a_vec = (float *) blas_malloc(n * sizeof(float));
490 if (n > 0 && a_vec == NULL) {
491 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
492 }
493 for (i = 0; i < n; i += inca_vec) {
494 a_vec[i] = 0.0;
495 }
496 x_head_vec = (double *) blas_malloc(n * sizeof(double));
497 if (n > 0 && x_head_vec == NULL) {
498 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
499 }
500 x_tail_vec = (double *) blas_malloc(n * sizeof(double));
501 if (n > 0 && x_tail_vec == NULL) {
502 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
503 }
504 dcopy_vector(x_head_i, n, incx, x_head_vec, 1);
505 dcopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
506
507 incyi = incy;
508
509 yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
510
511 incri = 1;
512
513
514 incx_veci = 1;
515
516
517 /* Fill in skew matrix A */
518 for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
519 /* x_i has already been copied to x_vec */
520 sskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
521
522 /* skew matricies have zeroed diagonals */
523 a_vec[i] = 0.0;
524 BLAS_ddot2_d_s_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i,
525 1, beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
526 &y_elem, &head_r_true_elem, &tail_r_true_elem);
527
528 sskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
529
530 /*commits an element to the generated y */
531 y_i[yi] = y_elem;
532 head_r_true[ri] = head_r_true_elem;
533 tail_r_true[ri] = tail_r_true_elem;
534 }
535
536 blas_free(a_vec);
537 blas_free(x_head_vec);
538 blas_free(x_tail_vec);
539 }
BLAS_dskmv2_testgen_s_s(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,double * alpha,double * beta,float * a,int lda,float * x_head,float * x_tail,double * y,int * seed,double * head_r_true,double * tail_r_true)540 void BLAS_dskmv2_testgen_s_s(int norm, enum blas_order_type order,
541 enum blas_uplo_type uplo, int n, double *alpha,
542 double *beta, float *a, int lda, float *x_head,
543 float *x_tail, double *y, int *seed,
544 double *head_r_true, double *tail_r_true)
545
546 /*
547 * Purpose
548 * =======
549 *
550 * Generates a skew Matrix for use with hemv2 testing
551 *
552 * Arguments
553 * =========
554 *
555 * norm (input) int
556 * = -1: the vectors are scaled with norms near underflow.
557 * = 0: the vectors have norms of order 1.
558 * = 1: the vectors are scaled with norms near overflow.
559 *
560 * order (input) enum blas_order_type
561 * storage format of the matrices
562 *
563 * uplo (input) enum blas_uplo_type
564 * which half of the skew symmetric matrix a is to be stored.
565 *
566 * n (input) int
567 * sizes of symmetrical matrix a, size of vectors x, y:
568 * matrix a is n-by-n.
569 *
570 * alpha (input/output) double*
571 *
572 * beta (input) double*
573 *
574 * a (input/output) float*
575 *
576 * lda (input) lda
577 * leading dimension of matrix A.
578 *
579 * x_head (input) float*
580 * x_tail (input) float*
581 * note : x is input only. x should be determined before calling
582 * this function.
583 *
584 * y (input/output) double*
585 * generated vector y.
586 *
587 * seed (input/output) int *
588 * seed for the random number generator.
589 *
590 * head_r_true (output) double *
591 * the leading part of the truth in double-double.
592 *
593 * tail_r_true (output) double *
594 * the trailing part of the truth in double-double
595 *
596 */
597 {
598
599 int i, yi, ri;
600 int incx = 1, incy = 1;
601 int incyi, incri;
602 int incx_veci, yi0;
603 int inca_vec;
604
605 double y_elem;
606 double head_r_true_elem, tail_r_true_elem;
607
608 float *a_vec;
609 float *x_head_vec;
610 float *x_tail_vec;
611
612 double *y_i = y;
613 double *alpha_i = alpha;
614 double *beta_i = beta;
615 float *x_head_i = x_head;
616 float *x_tail_i = x_tail;
617
618 /*a_vec must have stride of 1 */
619 inca_vec = 1;
620
621
622 a_vec = (float *) blas_malloc(n * sizeof(float));
623 if (n > 0 && a_vec == NULL) {
624 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
625 }
626 for (i = 0; i < n; i += inca_vec) {
627 a_vec[i] = 0.0;
628 }
629 x_head_vec = (float *) blas_malloc(n * sizeof(float));
630 if (n > 0 && x_head_vec == NULL) {
631 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
632 }
633 x_tail_vec = (float *) blas_malloc(n * sizeof(float));
634 if (n > 0 && x_tail_vec == NULL) {
635 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
636 }
637 scopy_vector(x_head_i, n, incx, x_head_vec, 1);
638 scopy_vector(x_tail_i, n, incx, x_tail_vec, 1);
639
640 incyi = incy;
641
642 yi0 = (incy > 0) ? 0 : -(n - 1) * incyi;
643
644 incri = 1;
645
646
647 incx_veci = 1;
648
649
650 /* Fill in skew matrix A */
651 for (i = 0, yi = yi0, ri = 0; i < n; i++, ri += incri, yi += incyi) {
652 /* x_i has already been copied to x_vec */
653 sskew_copy_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
654
655 /* skew matricies have zeroed diagonals */
656 a_vec[i] = 0.0;
657 BLAS_ddot2_s_s_testgen(n, i + 1, n - i - 1, norm, blas_no_conj, alpha_i,
658 1, beta_i, 1, x_head_vec, x_tail_vec, a_vec, seed,
659 &y_elem, &head_r_true_elem, &tail_r_true_elem);
660
661 sskew_commit_row(order, uplo, blas_left_side, n, a, lda, a_vec, i);
662
663 /*commits an element to the generated y */
664 y_i[yi] = y_elem;
665 head_r_true[ri] = head_r_true_elem;
666 tail_r_true[ri] = tail_r_true_elem;
667 }
668
669 blas_free(a_vec);
670 blas_free(x_head_vec);
671 blas_free(x_tail_vec);
672 }
673
674
BLAS_chemv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)675 void BLAS_chemv2_testgen(int norm, enum blas_order_type order,
676 enum blas_uplo_type uplo, int n, void *alpha,
677 int alpha_flag, void *beta, int beta_flag, void *a,
678 int lda, void *x_head, void *x_tail, void *y,
679 int *seed, double *head_r_true, double *tail_r_true)
680
681 /*
682 * Purpose
683 * =======
684 *
685 * Generates the test inputs to BLAS_chemv2{_x}
686 *
687 * Arguments
688 * =========
689 *
690 * norm (input) int
691 * = -1: the vectors are scaled with norms near underflow.
692 * = 0: the vectors have norms of order 1.
693 * = 1: the vectors are scaled with norms near overflow.
694 *
695 * order (input) enum blas_order_type
696 * storage format of the matrices
697 *
698 * uplo (input) enum blas_uplo_type
699 * which half of the hemv2etric matrix a is to be stored.
700 *
701 * n (input) int
702 * sizes of symmetrical matrix a, size of vectors x, y:
703 * matrix a is n-by-n.
704 *
705 * alpha (input/output) void*
706 * if alpha_flag = 1, alpha is input.
707 * if alpha_flag = 0, alpha is output.
708 *
709 * alpha_flag (input) int
710 * = 0: alpha is free, and is output.
711 * = 1: alpha is fixed on input.
712 *
713 * beta (input/output) void*
714 * if beta_flag = 1, beta is input.
715 * if beta_flag = 0, beta is output.
716 *
717 * beta_flag (input) int
718 * = 0: beta is free, and is output.
719 * = 1: beta is fixed on input.
720 *
721 * a (input/output) void*
722 *
723 * lda (input) lda
724 * leading dimension of matrix A.
725 *
726 * x_head (input/output) void*
727 * x_tail (input/output) void*
728 * vector x = (x_head + x_tail)
729 *
730 * y (input/output) void*
731 * generated vector y that will be used as an input to HEMV2.
732 *
733 * seed (input/output) int *
734 * seed for the random number generator.
735 *
736 * head_r_true (output) double *
737 * the leading part of the truth in double-double.
738 *
739 * tail_r_true (output) double *
740 * the trailing part of the truth in double-double
741 *
742 */
743 {
744 {
745
746 /* Strategy:
747 r1 = alpha * A1 * x + beta * y1
748 r2 = alpha * A2 * x + beta * y2
749 where all the matrices and vectors are real. Then let R = R1 + i R2,
750 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
751 and A2 is skew-symmetric.
752 */
753
754 int i, j;
755 int yi;
756 int aij, ai;
757 int a1ij, a1i;
758 int xi;
759 int mi;
760 int incx = 1, incy = 1;
761 int incyi, xi0, yi0;
762 int incaij, incai;
763 int inca1ij, inca1i;
764 int incxi;
765 int inca_vec, incx_vec;
766 int ld;
767 int ab;
768 int ri, incri;
769
770 float *a1;
771 float *a2;
772 float *y1;
773 float *y2;
774 float *x_head_0;
775 float *x_tail_0;
776
777 double *head_r1_true, *tail_r1_true;
778 double *head_r2_true, *tail_r2_true;
779
780 double head_r_elem1, tail_r_elem1;
781 double head_r_elem2, tail_r_elem2;
782 double head_r_elem, tail_r_elem;
783
784 float *a_vec;
785 float *x_head_vec;
786 float *x_tail_vec;
787
788 float *y_i = (float *) y;
789 float *alpha_i = (float *) alpha;
790 float *beta_i = (float *) beta;
791
792 float *a_i = (float *) a;
793 float *x_head_i = (float *) x_head;
794 float *x_tail_i = (float *) x_tail;
795
796 ld = n;
797 if (order == blas_colmajor) {
798 inca1i = incai = 1;
799 incyi = incy;
800 incaij = lda;
801 incxi = incx;
802 inca1ij = n;
803 } else {
804 incyi = incy;
805 incai = lda;
806 incxi = incx;
807 inca1i = n;
808 inca1ij = incaij = 1;
809 }
810 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
811 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
812 incri = 1;
813 incri *= 2;
814 xi0 *= 2;
815 yi0 *= 2;
816 incyi *= 2;
817 incai *= 2;
818 incaij *= 2;
819 incxi *= 2;
820
821 inca_vec = incx_vec = 1;
822 inca_vec *= 2;
823 incx_vec *= 2;
824 a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
825 if (n > 0 && a_vec == NULL) {
826 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
827 }
828 x_head_vec = (float *) blas_malloc(n * sizeof(float) * 2);
829 if (n > 0 && x_head_vec == NULL) {
830 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
831 }
832 x_tail_vec = (float *) blas_malloc(n * sizeof(float) * 2);
833 if (n > 0 && x_tail_vec == NULL) {
834 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
835 }
836
837 int incx0, incy1, incy2, incmi = 1;
838 a1 = (float *) blas_malloc(n * n * sizeof(float));
839 if (n * n > 0 && a1 == NULL) {
840 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
841 }
842 a2 = (float *) blas_malloc(n * n * sizeof(float));
843 if (n * n > 0 && a2 == NULL) {
844 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
845 }
846 y1 = (float *) blas_malloc(n * sizeof(float));
847 if (n > 0 && y1 == NULL) {
848 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
849 }
850 incy1 = 1;
851
852 y2 = (float *) blas_malloc(n * sizeof(float));
853 if (n > 0 && y2 == NULL) {
854 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
855 }
856 incy2 = 1;
857
858 x_head_0 = (float *) blas_malloc(n * sizeof(float));
859 if (n > 0 && x_head_0 == NULL) {
860 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
861 }
862 x_tail_0 = (float *) blas_malloc(n * sizeof(float));
863 if (n > 0 && x_tail_0 == NULL) {
864 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
865 }
866 incx0 = 1;
867
868 head_r1_true = (double *) blas_malloc(n * sizeof(double));
869 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
870 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
871 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
872 };
873 head_r2_true = (double *) blas_malloc(n * sizeof(double));
874 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
875 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
876 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
877 };
878
879 /* First generate the real portion of A and x. */
880 BLAS_ssymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
881 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
882 head_r1_true, tail_r1_true);
883
884 /* x0 is now fixed and is input to this call */
885 BLAS_sskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
886 x_head_0, x_tail_0, y2, seed, head_r2_true,
887 tail_r2_true);
888
889
890 /* The case where x is a complex vector. Since x is generated
891 as a real vector, we need to perform some scaling.
892
893 There are four cases to consider, depending on the values
894 of alpha and beta.
895
896 values scaling
897 alpha beta alpha A x beta y r (truth)
898 0 1 1 i i i
899 1 1 ? 1+i 1+i 1+i
900 2 ? 1 1+i 1+i 2i 2i
901 3 ? ? 1+i 1+i 2i 2i
902
903 Note that we can afford to scale r by 1+i, since they are
904 computed in double-double precision.
905 */
906
907 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
908 ab = 0;
909 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
910 } else if (alpha_i[0] == 1.0) {
911 ab = 1;
912 /* set alpha to 1, multiply beta by 1+i. */
913 alpha_i[1] = 0.0;
914 beta_i[1] = beta_i[0];
915 } else if (beta_i[0] == 1.0) {
916 ab = 2;
917 /* set beta to 1, multiply alpha by 1+i. */
918 beta_i[1] = 0.0;
919 alpha_i[1] = alpha_i[0];
920 } else {
921 ab = 3;
922 /* multiply alpha by 1+i, beta by 2i. */
923 alpha_i[1] = alpha_i[0];
924 beta_i[1] = 2.0 * beta_i[0];
925 beta_i[0] = 0.0;
926 }
927
928 /* Now fill in a */
929 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
930 for (j = 0, aij = ai, a1ij = a1i; j < n;
931 j++, aij += incaij, a1ij += inca1ij) {
932 a_i[aij] = a1[a1ij];
933 a_i[aij + 1] = a2[a1ij];
934 }
935 }
936
937 /* Fill in x */
938 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
939 if (ab == 0) {
940 x_head_i[xi] = 0.0;
941 x_head_i[xi + 1] = x_head_0[mi];
942 x_tail_i[xi] = 0.0;
943 x_tail_i[xi + 1] = x_tail_0[mi];
944 } else {
945 x_head_i[xi] = x_head_0[mi];
946 x_head_i[xi + 1] = x_head_0[mi];
947 x_tail_i[xi] = x_tail_0[mi];
948 x_tail_i[xi + 1] = x_tail_0[mi];
949 }
950 }
951
952 /* Fill in y */
953 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
954 if (ab == 0) {
955 y_i[yi] = -y2[mi];
956 y_i[yi + 1] = y1[mi];
957 } else if (ab == 2) {
958 y_i[yi] = -2.0 * y2[mi];
959 y_i[yi + 1] = 2.0 * y1[mi];
960 } else {
961 y_i[yi] = y1[mi];
962 y_i[yi + 1] = y2[mi];
963 }
964 }
965
966 /* Fill in the truth */
967 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
968
969 head_r_elem1 = head_r1_true[mi];
970 tail_r_elem1 = tail_r1_true[mi];
971 head_r_elem2 = head_r2_true[mi];
972 tail_r_elem2 = tail_r2_true[mi];
973
974 if (ab == 0) {
975 head_r_true[ri] = -head_r_elem2;
976 tail_r_true[ri] = -tail_r_elem2;
977 head_r_true[ri + 1] = head_r_elem1;
978 tail_r_true[ri + 1] = tail_r_elem1;
979 } else if (ab == 1) {
980 {
981 /* Compute double-double = double-double + double-double. */
982 double bv;
983 double s1, s2, t1, t2;
984
985 /* Add two hi words. */
986 s1 = head_r_elem1 + head_r_elem2;
987 bv = s1 - head_r_elem1;
988 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
989
990 /* Add two lo words. */
991 t1 = tail_r_elem1 + tail_r_elem2;
992 bv = t1 - tail_r_elem1;
993 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
994
995 s2 += t1;
996
997 /* Renormalize (s1, s2) to (t1, s2) */
998 t1 = s1 + s2;
999 s2 = s2 - (t1 - s1);
1000
1001 t2 += s2;
1002
1003 /* Renormalize (t1, t2) */
1004 head_r_elem = t1 + t2;
1005 tail_r_elem = t2 - (head_r_elem - t1);
1006 }
1007
1008 /* Set the imaginary part to R1 + R2 */
1009 tail_r_true[ri + 1] = tail_r_elem;
1010 head_r_true[ri + 1] = head_r_elem;
1011
1012 /* Set the real part to R1 - R2. */
1013 {
1014 double head_bt, tail_bt;
1015 head_bt = -head_r_elem2;
1016 tail_bt = -tail_r_elem2;
1017 {
1018 /* Compute double-double = double-double + double-double. */
1019 double bv;
1020 double s1, s2, t1, t2;
1021
1022 /* Add two hi words. */
1023 s1 = head_r_elem1 + head_bt;
1024 bv = s1 - head_r_elem1;
1025 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1026
1027 /* Add two lo words. */
1028 t1 = tail_r_elem1 + tail_bt;
1029 bv = t1 - tail_r_elem1;
1030 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1031
1032 s2 += t1;
1033
1034 /* Renormalize (s1, s2) to (t1, s2) */
1035 t1 = s1 + s2;
1036 s2 = s2 - (t1 - s1);
1037
1038 t2 += s2;
1039
1040 /* Renormalize (t1, t2) */
1041 head_r_elem = t1 + t2;
1042 tail_r_elem = t2 - (head_r_elem - t1);
1043 }
1044 }
1045 tail_r_true[ri] = tail_r_elem;
1046 head_r_true[ri] = head_r_elem;
1047 } else {
1048 /* Real part */
1049 {
1050 /* Compute double-double = double-double * double. */
1051 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1052
1053 con = head_r_elem2 * split;
1054 a11 = con - head_r_elem2;
1055 a11 = con - a11;
1056 a21 = head_r_elem2 - a11;
1057 con = -2.0 * split;
1058 b1 = con - -2.0;
1059 b1 = con - b1;
1060 b2 = -2.0 - b1;
1061
1062 c11 = head_r_elem2 * -2.0;
1063 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1064
1065 c2 = tail_r_elem2 * -2.0;
1066 t1 = c11 + c2;
1067 t2 = (c2 - (t1 - c11)) + c21;
1068
1069 head_r_elem = t1 + t2;
1070 tail_r_elem = t2 - (head_r_elem - t1);
1071 }
1072 head_r_true[ri] = head_r_elem;
1073 tail_r_true[ri] = tail_r_elem;
1074
1075 /* Imaginary Part */
1076 {
1077 /* Compute double-double = double-double * double. */
1078 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1079
1080 con = head_r_elem1 * split;
1081 a11 = con - head_r_elem1;
1082 a11 = con - a11;
1083 a21 = head_r_elem1 - a11;
1084 con = 2.0 * split;
1085 b1 = con - 2.0;
1086 b1 = con - b1;
1087 b2 = 2.0 - b1;
1088
1089 c11 = head_r_elem1 * 2.0;
1090 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1091
1092 c2 = tail_r_elem1 * 2.0;
1093 t1 = c11 + c2;
1094 t2 = (c2 - (t1 - c11)) + c21;
1095
1096 head_r_elem = t1 + t2;
1097 tail_r_elem = t2 - (head_r_elem - t1);
1098 }
1099 head_r_true[ri + 1] = head_r_elem;
1100 tail_r_true[ri + 1] = tail_r_elem;
1101 }
1102 }
1103
1104
1105 blas_free(a1);
1106 blas_free(a2);
1107 blas_free(y1);
1108 blas_free(y2);
1109 blas_free(x_head_0);
1110 blas_free(x_tail_0);
1111 blas_free(head_r1_true);
1112 blas_free(tail_r1_true);
1113 blas_free(head_r2_true);
1114 blas_free(tail_r2_true);
1115 blas_free(a_vec);
1116 blas_free(x_head_vec);
1117 blas_free(x_tail_vec);
1118 }
1119 }
1120
BLAS_zhemv2_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)1121 void BLAS_zhemv2_testgen(int norm, enum blas_order_type order,
1122 enum blas_uplo_type uplo, int n, void *alpha,
1123 int alpha_flag, void *beta, int beta_flag, void *a,
1124 int lda, void *x_head, void *x_tail, void *y,
1125 int *seed, double *head_r_true, double *tail_r_true)
1126
1127 /*
1128 * Purpose
1129 * =======
1130 *
1131 * Generates the test inputs to BLAS_zhemv2{_x}
1132 *
1133 * Arguments
1134 * =========
1135 *
1136 * norm (input) int
1137 * = -1: the vectors are scaled with norms near underflow.
1138 * = 0: the vectors have norms of order 1.
1139 * = 1: the vectors are scaled with norms near overflow.
1140 *
1141 * order (input) enum blas_order_type
1142 * storage format of the matrices
1143 *
1144 * uplo (input) enum blas_uplo_type
1145 * which half of the hemv2etric matrix a is to be stored.
1146 *
1147 * n (input) int
1148 * sizes of symmetrical matrix a, size of vectors x, y:
1149 * matrix a is n-by-n.
1150 *
1151 * alpha (input/output) void*
1152 * if alpha_flag = 1, alpha is input.
1153 * if alpha_flag = 0, alpha is output.
1154 *
1155 * alpha_flag (input) int
1156 * = 0: alpha is free, and is output.
1157 * = 1: alpha is fixed on input.
1158 *
1159 * beta (input/output) void*
1160 * if beta_flag = 1, beta is input.
1161 * if beta_flag = 0, beta is output.
1162 *
1163 * beta_flag (input) int
1164 * = 0: beta is free, and is output.
1165 * = 1: beta is fixed on input.
1166 *
1167 * a (input/output) void*
1168 *
1169 * lda (input) lda
1170 * leading dimension of matrix A.
1171 *
1172 * x_head (input/output) void*
1173 * x_tail (input/output) void*
1174 * vector x = (x_head + x_tail)
1175 *
1176 * y (input/output) void*
1177 * generated vector y that will be used as an input to HEMV2.
1178 *
1179 * seed (input/output) int *
1180 * seed for the random number generator.
1181 *
1182 * head_r_true (output) double *
1183 * the leading part of the truth in double-double.
1184 *
1185 * tail_r_true (output) double *
1186 * the trailing part of the truth in double-double
1187 *
1188 */
1189 {
1190 {
1191
1192 /* Strategy:
1193 r1 = alpha * A1 * x + beta * y1
1194 r2 = alpha * A2 * x + beta * y2
1195 where all the matrices and vectors are real. Then let R = R1 + i R2,
1196 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
1197 and A2 is skew-symmetric.
1198 */
1199
1200 int i, j;
1201 int yi;
1202 int aij, ai;
1203 int a1ij, a1i;
1204 int xi;
1205 int mi;
1206 int incx = 1, incy = 1;
1207 int incyi, xi0, yi0;
1208 int incaij, incai;
1209 int inca1ij, inca1i;
1210 int incxi;
1211 int inca_vec, incx_vec;
1212 int ld;
1213 int ab;
1214 int ri, incri;
1215
1216 double *a1;
1217 double *a2;
1218 double *y1;
1219 double *y2;
1220 double *x_head_0;
1221 double *x_tail_0;
1222
1223 double *head_r1_true, *tail_r1_true;
1224 double *head_r2_true, *tail_r2_true;
1225
1226 double head_r_elem1, tail_r_elem1;
1227 double head_r_elem2, tail_r_elem2;
1228 double head_r_elem, tail_r_elem;
1229
1230 double *a_vec;
1231 double *x_head_vec;
1232 double *x_tail_vec;
1233
1234 double *y_i = (double *) y;
1235 double *alpha_i = (double *) alpha;
1236 double *beta_i = (double *) beta;
1237
1238 double *a_i = (double *) a;
1239 double *x_head_i = (double *) x_head;
1240 double *x_tail_i = (double *) x_tail;
1241
1242 ld = n;
1243 if (order == blas_colmajor) {
1244 inca1i = incai = 1;
1245 incyi = incy;
1246 incaij = lda;
1247 incxi = incx;
1248 inca1ij = n;
1249 } else {
1250 incyi = incy;
1251 incai = lda;
1252 incxi = incx;
1253 inca1i = n;
1254 inca1ij = incaij = 1;
1255 }
1256 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
1257 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
1258 incri = 1;
1259 incri *= 2;
1260 xi0 *= 2;
1261 yi0 *= 2;
1262 incyi *= 2;
1263 incai *= 2;
1264 incaij *= 2;
1265 incxi *= 2;
1266
1267 inca_vec = incx_vec = 1;
1268 inca_vec *= 2;
1269 incx_vec *= 2;
1270 a_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1271 if (n > 0 && a_vec == NULL) {
1272 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1273 }
1274 x_head_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1275 if (n > 0 && x_head_vec == NULL) {
1276 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1277 }
1278 x_tail_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1279 if (n > 0 && x_tail_vec == NULL) {
1280 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1281 }
1282
1283 int incx0, incy1, incy2, incmi = 1;
1284 a1 = (double *) blas_malloc(n * n * sizeof(double));
1285 if (n * n > 0 && a1 == NULL) {
1286 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1287 }
1288 a2 = (double *) blas_malloc(n * n * sizeof(double));
1289 if (n * n > 0 && a2 == NULL) {
1290 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1291 }
1292 y1 = (double *) blas_malloc(n * sizeof(double));
1293 if (n > 0 && y1 == NULL) {
1294 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1295 }
1296 incy1 = 1;
1297
1298 y2 = (double *) blas_malloc(n * sizeof(double));
1299 if (n > 0 && y2 == NULL) {
1300 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1301 }
1302 incy2 = 1;
1303
1304 x_head_0 = (double *) blas_malloc(n * sizeof(double));
1305 if (n > 0 && x_head_0 == NULL) {
1306 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1307 }
1308 x_tail_0 = (double *) blas_malloc(n * sizeof(double));
1309 if (n > 0 && x_tail_0 == NULL) {
1310 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1311 }
1312 incx0 = 1;
1313
1314 head_r1_true = (double *) blas_malloc(n * sizeof(double));
1315 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
1316 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1317 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1318 };
1319 head_r2_true = (double *) blas_malloc(n * sizeof(double));
1320 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
1321 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1322 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1323 };
1324
1325 /* First generate the real portion of A and x. */
1326 BLAS_dsymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
1327 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
1328 head_r1_true, tail_r1_true);
1329
1330 /* x0 is now fixed and is input to this call */
1331 BLAS_dskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
1332 x_head_0, x_tail_0, y2, seed, head_r2_true,
1333 tail_r2_true);
1334
1335
1336 /* The case where x is a complex vector. Since x is generated
1337 as a real vector, we need to perform some scaling.
1338
1339 There are four cases to consider, depending on the values
1340 of alpha and beta.
1341
1342 values scaling
1343 alpha beta alpha A x beta y r (truth)
1344 0 1 1 i i i
1345 1 1 ? 1+i 1+i 1+i
1346 2 ? 1 1+i 1+i 2i 2i
1347 3 ? ? 1+i 1+i 2i 2i
1348
1349 Note that we can afford to scale r by 1+i, since they are
1350 computed in double-double precision.
1351 */
1352
1353 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1354 ab = 0;
1355 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
1356 } else if (alpha_i[0] == 1.0) {
1357 ab = 1;
1358 /* set alpha to 1, multiply beta by 1+i. */
1359 alpha_i[1] = 0.0;
1360 beta_i[1] = beta_i[0];
1361 } else if (beta_i[0] == 1.0) {
1362 ab = 2;
1363 /* set beta to 1, multiply alpha by 1+i. */
1364 beta_i[1] = 0.0;
1365 alpha_i[1] = alpha_i[0];
1366 } else {
1367 ab = 3;
1368 /* multiply alpha by 1+i, beta by 2i. */
1369 alpha_i[1] = alpha_i[0];
1370 beta_i[1] = 2.0 * beta_i[0];
1371 beta_i[0] = 0.0;
1372 }
1373
1374 /* Now fill in a */
1375 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
1376 for (j = 0, aij = ai, a1ij = a1i; j < n;
1377 j++, aij += incaij, a1ij += inca1ij) {
1378 a_i[aij] = a1[a1ij];
1379 a_i[aij + 1] = a2[a1ij];
1380 }
1381 }
1382
1383 /* Fill in x */
1384 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
1385 if (ab == 0) {
1386 x_head_i[xi] = 0.0;
1387 x_head_i[xi + 1] = x_head_0[mi];
1388 x_tail_i[xi] = 0.0;
1389 x_tail_i[xi + 1] = x_tail_0[mi];
1390 } else {
1391 x_head_i[xi] = x_head_0[mi];
1392 x_head_i[xi + 1] = x_head_0[mi];
1393 x_tail_i[xi] = x_tail_0[mi];
1394 x_tail_i[xi + 1] = x_tail_0[mi];
1395 }
1396 }
1397
1398 /* Fill in y */
1399 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
1400 if (ab == 0) {
1401 y_i[yi] = -y2[mi];
1402 y_i[yi + 1] = y1[mi];
1403 } else if (ab == 2) {
1404 y_i[yi] = -2.0 * y2[mi];
1405 y_i[yi + 1] = 2.0 * y1[mi];
1406 } else {
1407 y_i[yi] = y1[mi];
1408 y_i[yi + 1] = y2[mi];
1409 }
1410 }
1411
1412 /* Fill in the truth */
1413 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
1414
1415 head_r_elem1 = head_r1_true[mi];
1416 tail_r_elem1 = tail_r1_true[mi];
1417 head_r_elem2 = head_r2_true[mi];
1418 tail_r_elem2 = tail_r2_true[mi];
1419
1420 if (ab == 0) {
1421 head_r_true[ri] = -head_r_elem2;
1422 tail_r_true[ri] = -tail_r_elem2;
1423 head_r_true[ri + 1] = head_r_elem1;
1424 tail_r_true[ri + 1] = tail_r_elem1;
1425 } else if (ab == 1) {
1426 {
1427 /* Compute double-double = double-double + double-double. */
1428 double bv;
1429 double s1, s2, t1, t2;
1430
1431 /* Add two hi words. */
1432 s1 = head_r_elem1 + head_r_elem2;
1433 bv = s1 - head_r_elem1;
1434 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1435
1436 /* Add two lo words. */
1437 t1 = tail_r_elem1 + tail_r_elem2;
1438 bv = t1 - tail_r_elem1;
1439 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1440
1441 s2 += t1;
1442
1443 /* Renormalize (s1, s2) to (t1, s2) */
1444 t1 = s1 + s2;
1445 s2 = s2 - (t1 - s1);
1446
1447 t2 += s2;
1448
1449 /* Renormalize (t1, t2) */
1450 head_r_elem = t1 + t2;
1451 tail_r_elem = t2 - (head_r_elem - t1);
1452 }
1453
1454 /* Set the imaginary part to R1 + R2 */
1455 tail_r_true[ri + 1] = tail_r_elem;
1456 head_r_true[ri + 1] = head_r_elem;
1457
1458 /* Set the real part to R1 - R2. */
1459 {
1460 double head_bt, tail_bt;
1461 head_bt = -head_r_elem2;
1462 tail_bt = -tail_r_elem2;
1463 {
1464 /* Compute double-double = double-double + double-double. */
1465 double bv;
1466 double s1, s2, t1, t2;
1467
1468 /* Add two hi words. */
1469 s1 = head_r_elem1 + head_bt;
1470 bv = s1 - head_r_elem1;
1471 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1472
1473 /* Add two lo words. */
1474 t1 = tail_r_elem1 + tail_bt;
1475 bv = t1 - tail_r_elem1;
1476 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1477
1478 s2 += t1;
1479
1480 /* Renormalize (s1, s2) to (t1, s2) */
1481 t1 = s1 + s2;
1482 s2 = s2 - (t1 - s1);
1483
1484 t2 += s2;
1485
1486 /* Renormalize (t1, t2) */
1487 head_r_elem = t1 + t2;
1488 tail_r_elem = t2 - (head_r_elem - t1);
1489 }
1490 }
1491 tail_r_true[ri] = tail_r_elem;
1492 head_r_true[ri] = head_r_elem;
1493 } else {
1494 /* Real part */
1495 {
1496 /* Compute double-double = double-double * double. */
1497 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1498
1499 con = head_r_elem2 * split;
1500 a11 = con - head_r_elem2;
1501 a11 = con - a11;
1502 a21 = head_r_elem2 - a11;
1503 con = -2.0 * split;
1504 b1 = con - -2.0;
1505 b1 = con - b1;
1506 b2 = -2.0 - b1;
1507
1508 c11 = head_r_elem2 * -2.0;
1509 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1510
1511 c2 = tail_r_elem2 * -2.0;
1512 t1 = c11 + c2;
1513 t2 = (c2 - (t1 - c11)) + c21;
1514
1515 head_r_elem = t1 + t2;
1516 tail_r_elem = t2 - (head_r_elem - t1);
1517 }
1518 head_r_true[ri] = head_r_elem;
1519 tail_r_true[ri] = tail_r_elem;
1520
1521 /* Imaginary Part */
1522 {
1523 /* Compute double-double = double-double * double. */
1524 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1525
1526 con = head_r_elem1 * split;
1527 a11 = con - head_r_elem1;
1528 a11 = con - a11;
1529 a21 = head_r_elem1 - a11;
1530 con = 2.0 * split;
1531 b1 = con - 2.0;
1532 b1 = con - b1;
1533 b2 = 2.0 - b1;
1534
1535 c11 = head_r_elem1 * 2.0;
1536 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1537
1538 c2 = tail_r_elem1 * 2.0;
1539 t1 = c11 + c2;
1540 t2 = (c2 - (t1 - c11)) + c21;
1541
1542 head_r_elem = t1 + t2;
1543 tail_r_elem = t2 - (head_r_elem - t1);
1544 }
1545 head_r_true[ri + 1] = head_r_elem;
1546 tail_r_true[ri + 1] = tail_r_elem;
1547 }
1548 }
1549
1550
1551 blas_free(a1);
1552 blas_free(a2);
1553 blas_free(y1);
1554 blas_free(y2);
1555 blas_free(x_head_0);
1556 blas_free(x_tail_0);
1557 blas_free(head_r1_true);
1558 blas_free(tail_r1_true);
1559 blas_free(head_r2_true);
1560 blas_free(tail_r2_true);
1561 blas_free(a_vec);
1562 blas_free(x_head_vec);
1563 blas_free(x_tail_vec);
1564 }
1565 }
1566
BLAS_zhemv2_c_z_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)1567 void BLAS_zhemv2_c_z_testgen(int norm, enum blas_order_type order,
1568 enum blas_uplo_type uplo, int n, void *alpha,
1569 int alpha_flag, void *beta, int beta_flag,
1570 void *a, int lda, void *x_head, void *x_tail,
1571 void *y, int *seed, double *head_r_true,
1572 double *tail_r_true)
1573
1574 /*
1575 * Purpose
1576 * =======
1577 *
1578 * Generates the test inputs to BLAS_zhemv_c_z{_x}
1579 *
1580 * Arguments
1581 * =========
1582 *
1583 * norm (input) int
1584 * = -1: the vectors are scaled with norms near underflow.
1585 * = 0: the vectors have norms of order 1.
1586 * = 1: the vectors are scaled with norms near overflow.
1587 *
1588 * order (input) enum blas_order_type
1589 * storage format of the matrices
1590 *
1591 * uplo (input) enum blas_uplo_type
1592 * which half of the hemv2etric matrix a is to be stored.
1593 *
1594 * n (input) int
1595 * sizes of symmetrical matrix a, size of vectors x, y:
1596 * matrix a is n-by-n.
1597 *
1598 * alpha (input/output) void*
1599 * if alpha_flag = 1, alpha is input.
1600 * if alpha_flag = 0, alpha is output.
1601 *
1602 * alpha_flag (input) int
1603 * = 0: alpha is free, and is output.
1604 * = 1: alpha is fixed on input.
1605 *
1606 * beta (input/output) void*
1607 * if beta_flag = 1, beta is input.
1608 * if beta_flag = 0, beta is output.
1609 *
1610 * beta_flag (input) int
1611 * = 0: beta is free, and is output.
1612 * = 1: beta is fixed on input.
1613 *
1614 * a (input/output) void*
1615 *
1616 * lda (input) lda
1617 * leading dimension of matrix A.
1618 *
1619 * x_head (input/output) void*
1620 * x_tail (input/output) void*
1621 * vector x = (x_head + x_tail)
1622 *
1623 * y (input/output) void*
1624 * generated vector y that will be used as an input to HEMV2.
1625 *
1626 * seed (input/output) int *
1627 * seed for the random number generator.
1628 *
1629 * head_r_true (output) double *
1630 * the leading part of the truth in double-double.
1631 *
1632 * tail_r_true (output) double *
1633 * the trailing part of the truth in double-double
1634 *
1635 */
1636 {
1637 {
1638
1639 /* Strategy:
1640 r1 = alpha * A1 * x + beta * y1
1641 r2 = alpha * A2 * x + beta * y2
1642 where all the matrices and vectors are real. Then let R = R1 + i R2,
1643 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
1644 and A2 is skew-symmetric.
1645 */
1646
1647 int i, j;
1648 int yi;
1649 int aij, ai;
1650 int a1ij, a1i;
1651 int xi;
1652 int mi;
1653 int incx = 1, incy = 1;
1654 int incyi, xi0, yi0;
1655 int incaij, incai;
1656 int inca1ij, inca1i;
1657 int incxi;
1658 int inca_vec, incx_vec;
1659 int ld;
1660 int ab;
1661 int ri, incri;
1662
1663 float *a1;
1664 float *a2;
1665 double *y1;
1666 double *y2;
1667 double *x_head_0;
1668 double *x_tail_0;
1669
1670 double *head_r1_true, *tail_r1_true;
1671 double *head_r2_true, *tail_r2_true;
1672
1673 double head_r_elem1, tail_r_elem1;
1674 double head_r_elem2, tail_r_elem2;
1675 double head_r_elem, tail_r_elem;
1676
1677 float *a_vec;
1678 double *x_head_vec;
1679 double *x_tail_vec;
1680
1681 double *y_i = (double *) y;
1682 double *alpha_i = (double *) alpha;
1683 double *beta_i = (double *) beta;
1684
1685 float *a_i = (float *) a;
1686 double *x_head_i = (double *) x_head;
1687 double *x_tail_i = (double *) x_tail;
1688
1689 ld = n;
1690 if (order == blas_colmajor) {
1691 inca1i = incai = 1;
1692 incyi = incy;
1693 incaij = lda;
1694 incxi = incx;
1695 inca1ij = n;
1696 } else {
1697 incyi = incy;
1698 incai = lda;
1699 incxi = incx;
1700 inca1i = n;
1701 inca1ij = incaij = 1;
1702 }
1703 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
1704 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
1705 incri = 1;
1706 incri *= 2;
1707 xi0 *= 2;
1708 yi0 *= 2;
1709 incyi *= 2;
1710 incai *= 2;
1711 incaij *= 2;
1712 incxi *= 2;
1713
1714 inca_vec = incx_vec = 1;
1715 inca_vec *= 2;
1716 incx_vec *= 2;
1717 a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
1718 if (n > 0 && a_vec == NULL) {
1719 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1720 }
1721 x_head_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1722 if (n > 0 && x_head_vec == NULL) {
1723 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1724 }
1725 x_tail_vec = (double *) blas_malloc(n * sizeof(double) * 2);
1726 if (n > 0 && x_tail_vec == NULL) {
1727 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1728 }
1729
1730 int incx0, incy1, incy2, incmi = 1;
1731 a1 = (float *) blas_malloc(n * n * sizeof(float));
1732 if (n * n > 0 && a1 == NULL) {
1733 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1734 }
1735 a2 = (float *) blas_malloc(n * n * sizeof(float));
1736 if (n * n > 0 && a2 == NULL) {
1737 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1738 }
1739 y1 = (double *) blas_malloc(n * sizeof(double));
1740 if (n > 0 && y1 == NULL) {
1741 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1742 }
1743 incy1 = 1;
1744
1745 y2 = (double *) blas_malloc(n * sizeof(double));
1746 if (n > 0 && y2 == NULL) {
1747 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1748 }
1749 incy2 = 1;
1750
1751 x_head_0 = (double *) blas_malloc(n * sizeof(double));
1752 if (n > 0 && x_head_0 == NULL) {
1753 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1754 }
1755 x_tail_0 = (double *) blas_malloc(n * sizeof(double));
1756 if (n > 0 && x_tail_0 == NULL) {
1757 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1758 }
1759 incx0 = 1;
1760
1761 head_r1_true = (double *) blas_malloc(n * sizeof(double));
1762 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
1763 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
1764 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1765 };
1766 head_r2_true = (double *) blas_malloc(n * sizeof(double));
1767 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
1768 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
1769 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1770 };
1771
1772 /* First generate the real portion of A and x. */
1773 BLAS_dsymv2_s_d_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
1774 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
1775 head_r1_true, tail_r1_true);
1776
1777 /* x0 is now fixed and is input to this call */
1778 BLAS_dskmv2_testgen_s_d(norm, order, uplo, n, alpha_i, beta_i, a2, n,
1779 x_head_0, x_tail_0, y2, seed, head_r2_true,
1780 tail_r2_true);
1781
1782
1783 /* The case where x is a complex vector. Since x is generated
1784 as a real vector, we need to perform some scaling.
1785
1786 There are four cases to consider, depending on the values
1787 of alpha and beta.
1788
1789 values scaling
1790 alpha beta alpha A x beta y r (truth)
1791 0 1 1 i i i
1792 1 1 ? 1+i 1+i 1+i
1793 2 ? 1 1+i 1+i 2i 2i
1794 3 ? ? 1+i 1+i 2i 2i
1795
1796 Note that we can afford to scale r by 1+i, since they are
1797 computed in double-double precision.
1798 */
1799
1800 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
1801 ab = 0;
1802 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
1803 } else if (alpha_i[0] == 1.0) {
1804 ab = 1;
1805 /* set alpha to 1, multiply beta by 1+i. */
1806 alpha_i[1] = 0.0;
1807 beta_i[1] = beta_i[0];
1808 } else if (beta_i[0] == 1.0) {
1809 ab = 2;
1810 /* set beta to 1, multiply alpha by 1+i. */
1811 beta_i[1] = 0.0;
1812 alpha_i[1] = alpha_i[0];
1813 } else {
1814 ab = 3;
1815 /* multiply alpha by 1+i, beta by 2i. */
1816 alpha_i[1] = alpha_i[0];
1817 beta_i[1] = 2.0 * beta_i[0];
1818 beta_i[0] = 0.0;
1819 }
1820
1821 /* Now fill in a */
1822 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
1823 for (j = 0, aij = ai, a1ij = a1i; j < n;
1824 j++, aij += incaij, a1ij += inca1ij) {
1825 a_i[aij] = a1[a1ij];
1826 a_i[aij + 1] = a2[a1ij];
1827 }
1828 }
1829
1830 /* Fill in x */
1831 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
1832 if (ab == 0) {
1833 x_head_i[xi] = 0.0;
1834 x_head_i[xi + 1] = x_head_0[mi];
1835 x_tail_i[xi] = 0.0;
1836 x_tail_i[xi + 1] = x_tail_0[mi];
1837 } else {
1838 x_head_i[xi] = x_head_0[mi];
1839 x_head_i[xi + 1] = x_head_0[mi];
1840 x_tail_i[xi] = x_tail_0[mi];
1841 x_tail_i[xi + 1] = x_tail_0[mi];
1842 }
1843 }
1844
1845 /* Fill in y */
1846 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
1847 if (ab == 0) {
1848 y_i[yi] = -y2[mi];
1849 y_i[yi + 1] = y1[mi];
1850 } else if (ab == 2) {
1851 y_i[yi] = -2.0 * y2[mi];
1852 y_i[yi + 1] = 2.0 * y1[mi];
1853 } else {
1854 y_i[yi] = y1[mi];
1855 y_i[yi + 1] = y2[mi];
1856 }
1857 }
1858
1859 /* Fill in the truth */
1860 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
1861
1862 head_r_elem1 = head_r1_true[mi];
1863 tail_r_elem1 = tail_r1_true[mi];
1864 head_r_elem2 = head_r2_true[mi];
1865 tail_r_elem2 = tail_r2_true[mi];
1866
1867 if (ab == 0) {
1868 head_r_true[ri] = -head_r_elem2;
1869 tail_r_true[ri] = -tail_r_elem2;
1870 head_r_true[ri + 1] = head_r_elem1;
1871 tail_r_true[ri + 1] = tail_r_elem1;
1872 } else if (ab == 1) {
1873 {
1874 /* Compute double-double = double-double + double-double. */
1875 double bv;
1876 double s1, s2, t1, t2;
1877
1878 /* Add two hi words. */
1879 s1 = head_r_elem1 + head_r_elem2;
1880 bv = s1 - head_r_elem1;
1881 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
1882
1883 /* Add two lo words. */
1884 t1 = tail_r_elem1 + tail_r_elem2;
1885 bv = t1 - tail_r_elem1;
1886 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
1887
1888 s2 += t1;
1889
1890 /* Renormalize (s1, s2) to (t1, s2) */
1891 t1 = s1 + s2;
1892 s2 = s2 - (t1 - s1);
1893
1894 t2 += s2;
1895
1896 /* Renormalize (t1, t2) */
1897 head_r_elem = t1 + t2;
1898 tail_r_elem = t2 - (head_r_elem - t1);
1899 }
1900
1901 /* Set the imaginary part to R1 + R2 */
1902 tail_r_true[ri + 1] = tail_r_elem;
1903 head_r_true[ri + 1] = head_r_elem;
1904
1905 /* Set the real part to R1 - R2. */
1906 {
1907 double head_bt, tail_bt;
1908 head_bt = -head_r_elem2;
1909 tail_bt = -tail_r_elem2;
1910 {
1911 /* Compute double-double = double-double + double-double. */
1912 double bv;
1913 double s1, s2, t1, t2;
1914
1915 /* Add two hi words. */
1916 s1 = head_r_elem1 + head_bt;
1917 bv = s1 - head_r_elem1;
1918 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
1919
1920 /* Add two lo words. */
1921 t1 = tail_r_elem1 + tail_bt;
1922 bv = t1 - tail_r_elem1;
1923 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
1924
1925 s2 += t1;
1926
1927 /* Renormalize (s1, s2) to (t1, s2) */
1928 t1 = s1 + s2;
1929 s2 = s2 - (t1 - s1);
1930
1931 t2 += s2;
1932
1933 /* Renormalize (t1, t2) */
1934 head_r_elem = t1 + t2;
1935 tail_r_elem = t2 - (head_r_elem - t1);
1936 }
1937 }
1938 tail_r_true[ri] = tail_r_elem;
1939 head_r_true[ri] = head_r_elem;
1940 } else {
1941 /* Real part */
1942 {
1943 /* Compute double-double = double-double * double. */
1944 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1945
1946 con = head_r_elem2 * split;
1947 a11 = con - head_r_elem2;
1948 a11 = con - a11;
1949 a21 = head_r_elem2 - a11;
1950 con = -2.0 * split;
1951 b1 = con - -2.0;
1952 b1 = con - b1;
1953 b2 = -2.0 - b1;
1954
1955 c11 = head_r_elem2 * -2.0;
1956 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1957
1958 c2 = tail_r_elem2 * -2.0;
1959 t1 = c11 + c2;
1960 t2 = (c2 - (t1 - c11)) + c21;
1961
1962 head_r_elem = t1 + t2;
1963 tail_r_elem = t2 - (head_r_elem - t1);
1964 }
1965 head_r_true[ri] = head_r_elem;
1966 tail_r_true[ri] = tail_r_elem;
1967
1968 /* Imaginary Part */
1969 {
1970 /* Compute double-double = double-double * double. */
1971 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
1972
1973 con = head_r_elem1 * split;
1974 a11 = con - head_r_elem1;
1975 a11 = con - a11;
1976 a21 = head_r_elem1 - a11;
1977 con = 2.0 * split;
1978 b1 = con - 2.0;
1979 b1 = con - b1;
1980 b2 = 2.0 - b1;
1981
1982 c11 = head_r_elem1 * 2.0;
1983 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
1984
1985 c2 = tail_r_elem1 * 2.0;
1986 t1 = c11 + c2;
1987 t2 = (c2 - (t1 - c11)) + c21;
1988
1989 head_r_elem = t1 + t2;
1990 tail_r_elem = t2 - (head_r_elem - t1);
1991 }
1992 head_r_true[ri + 1] = head_r_elem;
1993 tail_r_true[ri + 1] = tail_r_elem;
1994 }
1995 }
1996
1997
1998 blas_free(a1);
1999 blas_free(a2);
2000 blas_free(y1);
2001 blas_free(y2);
2002 blas_free(x_head_0);
2003 blas_free(x_tail_0);
2004 blas_free(head_r1_true);
2005 blas_free(tail_r1_true);
2006 blas_free(head_r2_true);
2007 blas_free(tail_r2_true);
2008 blas_free(a_vec);
2009 blas_free(x_head_vec);
2010 blas_free(x_tail_vec);
2011 }
2012 }
2013
BLAS_zhemv2_z_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)2014 void BLAS_zhemv2_z_c_testgen(int norm, enum blas_order_type order,
2015 enum blas_uplo_type uplo, int n, void *alpha,
2016 int alpha_flag, void *beta, int beta_flag,
2017 void *a, int lda, void *x_head, void *x_tail,
2018 void *y, int *seed, double *head_r_true,
2019 double *tail_r_true)
2020
2021 /*
2022 * Purpose
2023 * =======
2024 *
2025 * Generates the test inputs to BLAS_zhemv_z_c{_x}
2026 *
2027 * Arguments
2028 * =========
2029 *
2030 * norm (input) int
2031 * = -1: the vectors are scaled with norms near underflow.
2032 * = 0: the vectors have norms of order 1.
2033 * = 1: the vectors are scaled with norms near overflow.
2034 *
2035 * order (input) enum blas_order_type
2036 * storage format of the matrices
2037 *
2038 * uplo (input) enum blas_uplo_type
2039 * which half of the hemv2etric matrix a is to be stored.
2040 *
2041 * n (input) int
2042 * sizes of symmetrical matrix a, size of vectors x, y:
2043 * matrix a is n-by-n.
2044 *
2045 * alpha (input/output) void*
2046 * if alpha_flag = 1, alpha is input.
2047 * if alpha_flag = 0, alpha is output.
2048 *
2049 * alpha_flag (input) int
2050 * = 0: alpha is free, and is output.
2051 * = 1: alpha is fixed on input.
2052 *
2053 * beta (input/output) void*
2054 * if beta_flag = 1, beta is input.
2055 * if beta_flag = 0, beta is output.
2056 *
2057 * beta_flag (input) int
2058 * = 0: beta is free, and is output.
2059 * = 1: beta is fixed on input.
2060 *
2061 * a (input/output) void*
2062 *
2063 * lda (input) lda
2064 * leading dimension of matrix A.
2065 *
2066 * x_head (input/output) void*
2067 * x_tail (input/output) void*
2068 * vector x = (x_head + x_tail)
2069 *
2070 * y (input/output) void*
2071 * generated vector y that will be used as an input to HEMV2.
2072 *
2073 * seed (input/output) int *
2074 * seed for the random number generator.
2075 *
2076 * head_r_true (output) double *
2077 * the leading part of the truth in double-double.
2078 *
2079 * tail_r_true (output) double *
2080 * the trailing part of the truth in double-double
2081 *
2082 */
2083 {
2084 {
2085
2086 /* Strategy:
2087 r1 = alpha * A1 * x + beta * y1
2088 r2 = alpha * A2 * x + beta * y2
2089 where all the matrices and vectors are real. Then let R = R1 + i R2,
2090 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
2091 and A2 is skew-symmetric.
2092 */
2093
2094 int i, j;
2095 int yi;
2096 int aij, ai;
2097 int a1ij, a1i;
2098 int xi;
2099 int mi;
2100 int incx = 1, incy = 1;
2101 int incyi, xi0, yi0;
2102 int incaij, incai;
2103 int inca1ij, inca1i;
2104 int incxi;
2105 int inca_vec, incx_vec;
2106 int ld;
2107 int ab;
2108 int ri, incri;
2109
2110 double *a1;
2111 double *a2;
2112 double *y1;
2113 double *y2;
2114 float *x_head_0;
2115 float *x_tail_0;
2116
2117 double *head_r1_true, *tail_r1_true;
2118 double *head_r2_true, *tail_r2_true;
2119
2120 double head_r_elem1, tail_r_elem1;
2121 double head_r_elem2, tail_r_elem2;
2122 double head_r_elem, tail_r_elem;
2123
2124 double *a_vec;
2125 float *x_head_vec;
2126 float *x_tail_vec;
2127
2128 double *y_i = (double *) y;
2129 double *alpha_i = (double *) alpha;
2130 double *beta_i = (double *) beta;
2131
2132 double *a_i = (double *) a;
2133 float *x_head_i = (float *) x_head;
2134 float *x_tail_i = (float *) x_tail;
2135
2136 ld = n;
2137 if (order == blas_colmajor) {
2138 inca1i = incai = 1;
2139 incyi = incy;
2140 incaij = lda;
2141 incxi = incx;
2142 inca1ij = n;
2143 } else {
2144 incyi = incy;
2145 incai = lda;
2146 incxi = incx;
2147 inca1i = n;
2148 inca1ij = incaij = 1;
2149 }
2150 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
2151 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
2152 incri = 1;
2153 incri *= 2;
2154 xi0 *= 2;
2155 yi0 *= 2;
2156 incyi *= 2;
2157 incai *= 2;
2158 incaij *= 2;
2159 incxi *= 2;
2160
2161 inca_vec = incx_vec = 1;
2162 inca_vec *= 2;
2163 incx_vec *= 2;
2164 a_vec = (double *) blas_malloc(n * sizeof(double) * 2);
2165 if (n > 0 && a_vec == NULL) {
2166 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2167 }
2168 x_head_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2169 if (n > 0 && x_head_vec == NULL) {
2170 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2171 }
2172 x_tail_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2173 if (n > 0 && x_tail_vec == NULL) {
2174 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2175 }
2176
2177 int incx0, incy1, incy2, incmi = 1;
2178 a1 = (double *) blas_malloc(n * n * sizeof(double));
2179 if (n * n > 0 && a1 == NULL) {
2180 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2181 }
2182 a2 = (double *) blas_malloc(n * n * sizeof(double));
2183 if (n * n > 0 && a2 == NULL) {
2184 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2185 }
2186 y1 = (double *) blas_malloc(n * sizeof(double));
2187 if (n > 0 && y1 == NULL) {
2188 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2189 }
2190 incy1 = 1;
2191
2192 y2 = (double *) blas_malloc(n * sizeof(double));
2193 if (n > 0 && y2 == NULL) {
2194 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2195 }
2196 incy2 = 1;
2197
2198 x_head_0 = (float *) blas_malloc(n * sizeof(float));
2199 if (n > 0 && x_head_0 == NULL) {
2200 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2201 }
2202 x_tail_0 = (float *) blas_malloc(n * sizeof(float));
2203 if (n > 0 && x_tail_0 == NULL) {
2204 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2205 }
2206 incx0 = 1;
2207
2208 head_r1_true = (double *) blas_malloc(n * sizeof(double));
2209 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
2210 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2211 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2212 };
2213 head_r2_true = (double *) blas_malloc(n * sizeof(double));
2214 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
2215 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2216 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2217 };
2218
2219 /* First generate the real portion of A and x. */
2220 BLAS_dsymv2_d_s_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
2221 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
2222 head_r1_true, tail_r1_true);
2223
2224 /* x0 is now fixed and is input to this call */
2225 BLAS_dskmv2_testgen_d_s(norm, order, uplo, n, alpha_i, beta_i, a2, n,
2226 x_head_0, x_tail_0, y2, seed, head_r2_true,
2227 tail_r2_true);
2228
2229
2230 /* The case where x is a complex vector. Since x is generated
2231 as a real vector, we need to perform some scaling.
2232
2233 There are four cases to consider, depending on the values
2234 of alpha and beta.
2235
2236 values scaling
2237 alpha beta alpha A x beta y r (truth)
2238 0 1 1 i i i
2239 1 1 ? 1+i 1+i 1+i
2240 2 ? 1 1+i 1+i 2i 2i
2241 3 ? ? 1+i 1+i 2i 2i
2242
2243 Note that we can afford to scale r by 1+i, since they are
2244 computed in double-double precision.
2245 */
2246
2247 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2248 ab = 0;
2249 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
2250 } else if (alpha_i[0] == 1.0) {
2251 ab = 1;
2252 /* set alpha to 1, multiply beta by 1+i. */
2253 alpha_i[1] = 0.0;
2254 beta_i[1] = beta_i[0];
2255 } else if (beta_i[0] == 1.0) {
2256 ab = 2;
2257 /* set beta to 1, multiply alpha by 1+i. */
2258 beta_i[1] = 0.0;
2259 alpha_i[1] = alpha_i[0];
2260 } else {
2261 ab = 3;
2262 /* multiply alpha by 1+i, beta by 2i. */
2263 alpha_i[1] = alpha_i[0];
2264 beta_i[1] = 2.0 * beta_i[0];
2265 beta_i[0] = 0.0;
2266 }
2267
2268 /* Now fill in a */
2269 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
2270 for (j = 0, aij = ai, a1ij = a1i; j < n;
2271 j++, aij += incaij, a1ij += inca1ij) {
2272 a_i[aij] = a1[a1ij];
2273 a_i[aij + 1] = a2[a1ij];
2274 }
2275 }
2276
2277 /* Fill in x */
2278 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
2279 if (ab == 0) {
2280 x_head_i[xi] = 0.0;
2281 x_head_i[xi + 1] = x_head_0[mi];
2282 x_tail_i[xi] = 0.0;
2283 x_tail_i[xi + 1] = x_tail_0[mi];
2284 } else {
2285 x_head_i[xi] = x_head_0[mi];
2286 x_head_i[xi + 1] = x_head_0[mi];
2287 x_tail_i[xi] = x_tail_0[mi];
2288 x_tail_i[xi + 1] = x_tail_0[mi];
2289 }
2290 }
2291
2292 /* Fill in y */
2293 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
2294 if (ab == 0) {
2295 y_i[yi] = -y2[mi];
2296 y_i[yi + 1] = y1[mi];
2297 } else if (ab == 2) {
2298 y_i[yi] = -2.0 * y2[mi];
2299 y_i[yi + 1] = 2.0 * y1[mi];
2300 } else {
2301 y_i[yi] = y1[mi];
2302 y_i[yi + 1] = y2[mi];
2303 }
2304 }
2305
2306 /* Fill in the truth */
2307 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
2308
2309 head_r_elem1 = head_r1_true[mi];
2310 tail_r_elem1 = tail_r1_true[mi];
2311 head_r_elem2 = head_r2_true[mi];
2312 tail_r_elem2 = tail_r2_true[mi];
2313
2314 if (ab == 0) {
2315 head_r_true[ri] = -head_r_elem2;
2316 tail_r_true[ri] = -tail_r_elem2;
2317 head_r_true[ri + 1] = head_r_elem1;
2318 tail_r_true[ri + 1] = tail_r_elem1;
2319 } else if (ab == 1) {
2320 {
2321 /* Compute double-double = double-double + double-double. */
2322 double bv;
2323 double s1, s2, t1, t2;
2324
2325 /* Add two hi words. */
2326 s1 = head_r_elem1 + head_r_elem2;
2327 bv = s1 - head_r_elem1;
2328 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2329
2330 /* Add two lo words. */
2331 t1 = tail_r_elem1 + tail_r_elem2;
2332 bv = t1 - tail_r_elem1;
2333 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2334
2335 s2 += t1;
2336
2337 /* Renormalize (s1, s2) to (t1, s2) */
2338 t1 = s1 + s2;
2339 s2 = s2 - (t1 - s1);
2340
2341 t2 += s2;
2342
2343 /* Renormalize (t1, t2) */
2344 head_r_elem = t1 + t2;
2345 tail_r_elem = t2 - (head_r_elem - t1);
2346 }
2347
2348 /* Set the imaginary part to R1 + R2 */
2349 tail_r_true[ri + 1] = tail_r_elem;
2350 head_r_true[ri + 1] = head_r_elem;
2351
2352 /* Set the real part to R1 - R2. */
2353 {
2354 double head_bt, tail_bt;
2355 head_bt = -head_r_elem2;
2356 tail_bt = -tail_r_elem2;
2357 {
2358 /* Compute double-double = double-double + double-double. */
2359 double bv;
2360 double s1, s2, t1, t2;
2361
2362 /* Add two hi words. */
2363 s1 = head_r_elem1 + head_bt;
2364 bv = s1 - head_r_elem1;
2365 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2366
2367 /* Add two lo words. */
2368 t1 = tail_r_elem1 + tail_bt;
2369 bv = t1 - tail_r_elem1;
2370 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2371
2372 s2 += t1;
2373
2374 /* Renormalize (s1, s2) to (t1, s2) */
2375 t1 = s1 + s2;
2376 s2 = s2 - (t1 - s1);
2377
2378 t2 += s2;
2379
2380 /* Renormalize (t1, t2) */
2381 head_r_elem = t1 + t2;
2382 tail_r_elem = t2 - (head_r_elem - t1);
2383 }
2384 }
2385 tail_r_true[ri] = tail_r_elem;
2386 head_r_true[ri] = head_r_elem;
2387 } else {
2388 /* Real part */
2389 {
2390 /* Compute double-double = double-double * double. */
2391 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2392
2393 con = head_r_elem2 * split;
2394 a11 = con - head_r_elem2;
2395 a11 = con - a11;
2396 a21 = head_r_elem2 - a11;
2397 con = -2.0 * split;
2398 b1 = con - -2.0;
2399 b1 = con - b1;
2400 b2 = -2.0 - b1;
2401
2402 c11 = head_r_elem2 * -2.0;
2403 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2404
2405 c2 = tail_r_elem2 * -2.0;
2406 t1 = c11 + c2;
2407 t2 = (c2 - (t1 - c11)) + c21;
2408
2409 head_r_elem = t1 + t2;
2410 tail_r_elem = t2 - (head_r_elem - t1);
2411 }
2412 head_r_true[ri] = head_r_elem;
2413 tail_r_true[ri] = tail_r_elem;
2414
2415 /* Imaginary Part */
2416 {
2417 /* Compute double-double = double-double * double. */
2418 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2419
2420 con = head_r_elem1 * split;
2421 a11 = con - head_r_elem1;
2422 a11 = con - a11;
2423 a21 = head_r_elem1 - a11;
2424 con = 2.0 * split;
2425 b1 = con - 2.0;
2426 b1 = con - b1;
2427 b2 = 2.0 - b1;
2428
2429 c11 = head_r_elem1 * 2.0;
2430 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2431
2432 c2 = tail_r_elem1 * 2.0;
2433 t1 = c11 + c2;
2434 t2 = (c2 - (t1 - c11)) + c21;
2435
2436 head_r_elem = t1 + t2;
2437 tail_r_elem = t2 - (head_r_elem - t1);
2438 }
2439 head_r_true[ri + 1] = head_r_elem;
2440 tail_r_true[ri + 1] = tail_r_elem;
2441 }
2442 }
2443
2444
2445 blas_free(a1);
2446 blas_free(a2);
2447 blas_free(y1);
2448 blas_free(y2);
2449 blas_free(x_head_0);
2450 blas_free(x_tail_0);
2451 blas_free(head_r1_true);
2452 blas_free(tail_r1_true);
2453 blas_free(head_r2_true);
2454 blas_free(tail_r2_true);
2455 blas_free(a_vec);
2456 blas_free(x_head_vec);
2457 blas_free(x_tail_vec);
2458 }
2459 }
2460
BLAS_zhemv2_c_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,void * x_head,void * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)2461 void BLAS_zhemv2_c_c_testgen(int norm, enum blas_order_type order,
2462 enum blas_uplo_type uplo, int n, void *alpha,
2463 int alpha_flag, void *beta, int beta_flag,
2464 void *a, int lda, void *x_head, void *x_tail,
2465 void *y, int *seed, double *head_r_true,
2466 double *tail_r_true)
2467
2468 /*
2469 * Purpose
2470 * =======
2471 *
2472 * Generates the test inputs to BLAS_zhemv_c_c{_x}
2473 *
2474 * Arguments
2475 * =========
2476 *
2477 * norm (input) int
2478 * = -1: the vectors are scaled with norms near underflow.
2479 * = 0: the vectors have norms of order 1.
2480 * = 1: the vectors are scaled with norms near overflow.
2481 *
2482 * order (input) enum blas_order_type
2483 * storage format of the matrices
2484 *
2485 * uplo (input) enum blas_uplo_type
2486 * which half of the hemv2etric matrix a is to be stored.
2487 *
2488 * n (input) int
2489 * sizes of symmetrical matrix a, size of vectors x, y:
2490 * matrix a is n-by-n.
2491 *
2492 * alpha (input/output) void*
2493 * if alpha_flag = 1, alpha is input.
2494 * if alpha_flag = 0, alpha is output.
2495 *
2496 * alpha_flag (input) int
2497 * = 0: alpha is free, and is output.
2498 * = 1: alpha is fixed on input.
2499 *
2500 * beta (input/output) void*
2501 * if beta_flag = 1, beta is input.
2502 * if beta_flag = 0, beta is output.
2503 *
2504 * beta_flag (input) int
2505 * = 0: beta is free, and is output.
2506 * = 1: beta is fixed on input.
2507 *
2508 * a (input/output) void*
2509 *
2510 * lda (input) lda
2511 * leading dimension of matrix A.
2512 *
2513 * x_head (input/output) void*
2514 * x_tail (input/output) void*
2515 * vector x = (x_head + x_tail)
2516 *
2517 * y (input/output) void*
2518 * generated vector y that will be used as an input to HEMV2.
2519 *
2520 * seed (input/output) int *
2521 * seed for the random number generator.
2522 *
2523 * head_r_true (output) double *
2524 * the leading part of the truth in double-double.
2525 *
2526 * tail_r_true (output) double *
2527 * the trailing part of the truth in double-double
2528 *
2529 */
2530 {
2531 {
2532
2533 /* Strategy:
2534 r1 = alpha * A1 * x + beta * y1
2535 r2 = alpha * A2 * x + beta * y2
2536 where all the matrices and vectors are real. Then let R = R1 + i R2,
2537 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
2538 and A2 is skew-symmetric.
2539 */
2540
2541 int i, j;
2542 int yi;
2543 int aij, ai;
2544 int a1ij, a1i;
2545 int xi;
2546 int mi;
2547 int incx = 1, incy = 1;
2548 int incyi, xi0, yi0;
2549 int incaij, incai;
2550 int inca1ij, inca1i;
2551 int incxi;
2552 int inca_vec, incx_vec;
2553 int ld;
2554 int ab;
2555 int ri, incri;
2556
2557 float *a1;
2558 float *a2;
2559 double *y1;
2560 double *y2;
2561 float *x_head_0;
2562 float *x_tail_0;
2563
2564 double *head_r1_true, *tail_r1_true;
2565 double *head_r2_true, *tail_r2_true;
2566
2567 double head_r_elem1, tail_r_elem1;
2568 double head_r_elem2, tail_r_elem2;
2569 double head_r_elem, tail_r_elem;
2570
2571 float *a_vec;
2572 float *x_head_vec;
2573 float *x_tail_vec;
2574
2575 double *y_i = (double *) y;
2576 double *alpha_i = (double *) alpha;
2577 double *beta_i = (double *) beta;
2578
2579 float *a_i = (float *) a;
2580 float *x_head_i = (float *) x_head;
2581 float *x_tail_i = (float *) x_tail;
2582
2583 ld = n;
2584 if (order == blas_colmajor) {
2585 inca1i = incai = 1;
2586 incyi = incy;
2587 incaij = lda;
2588 incxi = incx;
2589 inca1ij = n;
2590 } else {
2591 incyi = incy;
2592 incai = lda;
2593 incxi = incx;
2594 inca1i = n;
2595 inca1ij = incaij = 1;
2596 }
2597 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
2598 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
2599 incri = 1;
2600 incri *= 2;
2601 xi0 *= 2;
2602 yi0 *= 2;
2603 incyi *= 2;
2604 incai *= 2;
2605 incaij *= 2;
2606 incxi *= 2;
2607
2608 inca_vec = incx_vec = 1;
2609 inca_vec *= 2;
2610 incx_vec *= 2;
2611 a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2612 if (n > 0 && a_vec == NULL) {
2613 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2614 }
2615 x_head_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2616 if (n > 0 && x_head_vec == NULL) {
2617 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2618 }
2619 x_tail_vec = (float *) blas_malloc(n * sizeof(float) * 2);
2620 if (n > 0 && x_tail_vec == NULL) {
2621 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2622 }
2623
2624 int incx0, incy1, incy2, incmi = 1;
2625 a1 = (float *) blas_malloc(n * n * sizeof(float));
2626 if (n * n > 0 && a1 == NULL) {
2627 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2628 }
2629 a2 = (float *) blas_malloc(n * n * sizeof(float));
2630 if (n * n > 0 && a2 == NULL) {
2631 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2632 }
2633 y1 = (double *) blas_malloc(n * sizeof(double));
2634 if (n > 0 && y1 == NULL) {
2635 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2636 }
2637 incy1 = 1;
2638
2639 y2 = (double *) blas_malloc(n * sizeof(double));
2640 if (n > 0 && y2 == NULL) {
2641 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2642 }
2643 incy2 = 1;
2644
2645 x_head_0 = (float *) blas_malloc(n * sizeof(float));
2646 if (n > 0 && x_head_0 == NULL) {
2647 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2648 }
2649 x_tail_0 = (float *) blas_malloc(n * sizeof(float));
2650 if (n > 0 && x_tail_0 == NULL) {
2651 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2652 }
2653 incx0 = 1;
2654
2655 head_r1_true = (double *) blas_malloc(n * sizeof(double));
2656 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
2657 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
2658 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2659 };
2660 head_r2_true = (double *) blas_malloc(n * sizeof(double));
2661 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
2662 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
2663 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2664 };
2665
2666 /* First generate the real portion of A and x. */
2667 BLAS_dsymv2_s_s_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
2668 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
2669 head_r1_true, tail_r1_true);
2670
2671 /* x0 is now fixed and is input to this call */
2672 BLAS_dskmv2_testgen_s_s(norm, order, uplo, n, alpha_i, beta_i, a2, n,
2673 x_head_0, x_tail_0, y2, seed, head_r2_true,
2674 tail_r2_true);
2675
2676
2677 /* The case where x is a complex vector. Since x is generated
2678 as a real vector, we need to perform some scaling.
2679
2680 There are four cases to consider, depending on the values
2681 of alpha and beta.
2682
2683 values scaling
2684 alpha beta alpha A x beta y r (truth)
2685 0 1 1 i i i
2686 1 1 ? 1+i 1+i 1+i
2687 2 ? 1 1+i 1+i 2i 2i
2688 3 ? ? 1+i 1+i 2i 2i
2689
2690 Note that we can afford to scale r by 1+i, since they are
2691 computed in double-double precision.
2692 */
2693
2694 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
2695 ab = 0;
2696 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
2697 } else if (alpha_i[0] == 1.0) {
2698 ab = 1;
2699 /* set alpha to 1, multiply beta by 1+i. */
2700 alpha_i[1] = 0.0;
2701 beta_i[1] = beta_i[0];
2702 } else if (beta_i[0] == 1.0) {
2703 ab = 2;
2704 /* set beta to 1, multiply alpha by 1+i. */
2705 beta_i[1] = 0.0;
2706 alpha_i[1] = alpha_i[0];
2707 } else {
2708 ab = 3;
2709 /* multiply alpha by 1+i, beta by 2i. */
2710 alpha_i[1] = alpha_i[0];
2711 beta_i[1] = 2.0 * beta_i[0];
2712 beta_i[0] = 0.0;
2713 }
2714
2715 /* Now fill in a */
2716 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
2717 for (j = 0, aij = ai, a1ij = a1i; j < n;
2718 j++, aij += incaij, a1ij += inca1ij) {
2719 a_i[aij] = a1[a1ij];
2720 a_i[aij + 1] = a2[a1ij];
2721 }
2722 }
2723
2724 /* Fill in x */
2725 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
2726 if (ab == 0) {
2727 x_head_i[xi] = 0.0;
2728 x_head_i[xi + 1] = x_head_0[mi];
2729 x_tail_i[xi] = 0.0;
2730 x_tail_i[xi + 1] = x_tail_0[mi];
2731 } else {
2732 x_head_i[xi] = x_head_0[mi];
2733 x_head_i[xi + 1] = x_head_0[mi];
2734 x_tail_i[xi] = x_tail_0[mi];
2735 x_tail_i[xi + 1] = x_tail_0[mi];
2736 }
2737 }
2738
2739 /* Fill in y */
2740 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
2741 if (ab == 0) {
2742 y_i[yi] = -y2[mi];
2743 y_i[yi + 1] = y1[mi];
2744 } else if (ab == 2) {
2745 y_i[yi] = -2.0 * y2[mi];
2746 y_i[yi + 1] = 2.0 * y1[mi];
2747 } else {
2748 y_i[yi] = y1[mi];
2749 y_i[yi + 1] = y2[mi];
2750 }
2751 }
2752
2753 /* Fill in the truth */
2754 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
2755
2756 head_r_elem1 = head_r1_true[mi];
2757 tail_r_elem1 = tail_r1_true[mi];
2758 head_r_elem2 = head_r2_true[mi];
2759 tail_r_elem2 = tail_r2_true[mi];
2760
2761 if (ab == 0) {
2762 head_r_true[ri] = -head_r_elem2;
2763 tail_r_true[ri] = -tail_r_elem2;
2764 head_r_true[ri + 1] = head_r_elem1;
2765 tail_r_true[ri + 1] = tail_r_elem1;
2766 } else if (ab == 1) {
2767 {
2768 /* Compute double-double = double-double + double-double. */
2769 double bv;
2770 double s1, s2, t1, t2;
2771
2772 /* Add two hi words. */
2773 s1 = head_r_elem1 + head_r_elem2;
2774 bv = s1 - head_r_elem1;
2775 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
2776
2777 /* Add two lo words. */
2778 t1 = tail_r_elem1 + tail_r_elem2;
2779 bv = t1 - tail_r_elem1;
2780 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
2781
2782 s2 += t1;
2783
2784 /* Renormalize (s1, s2) to (t1, s2) */
2785 t1 = s1 + s2;
2786 s2 = s2 - (t1 - s1);
2787
2788 t2 += s2;
2789
2790 /* Renormalize (t1, t2) */
2791 head_r_elem = t1 + t2;
2792 tail_r_elem = t2 - (head_r_elem - t1);
2793 }
2794
2795 /* Set the imaginary part to R1 + R2 */
2796 tail_r_true[ri + 1] = tail_r_elem;
2797 head_r_true[ri + 1] = head_r_elem;
2798
2799 /* Set the real part to R1 - R2. */
2800 {
2801 double head_bt, tail_bt;
2802 head_bt = -head_r_elem2;
2803 tail_bt = -tail_r_elem2;
2804 {
2805 /* Compute double-double = double-double + double-double. */
2806 double bv;
2807 double s1, s2, t1, t2;
2808
2809 /* Add two hi words. */
2810 s1 = head_r_elem1 + head_bt;
2811 bv = s1 - head_r_elem1;
2812 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
2813
2814 /* Add two lo words. */
2815 t1 = tail_r_elem1 + tail_bt;
2816 bv = t1 - tail_r_elem1;
2817 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
2818
2819 s2 += t1;
2820
2821 /* Renormalize (s1, s2) to (t1, s2) */
2822 t1 = s1 + s2;
2823 s2 = s2 - (t1 - s1);
2824
2825 t2 += s2;
2826
2827 /* Renormalize (t1, t2) */
2828 head_r_elem = t1 + t2;
2829 tail_r_elem = t2 - (head_r_elem - t1);
2830 }
2831 }
2832 tail_r_true[ri] = tail_r_elem;
2833 head_r_true[ri] = head_r_elem;
2834 } else {
2835 /* Real part */
2836 {
2837 /* Compute double-double = double-double * double. */
2838 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2839
2840 con = head_r_elem2 * split;
2841 a11 = con - head_r_elem2;
2842 a11 = con - a11;
2843 a21 = head_r_elem2 - a11;
2844 con = -2.0 * split;
2845 b1 = con - -2.0;
2846 b1 = con - b1;
2847 b2 = -2.0 - b1;
2848
2849 c11 = head_r_elem2 * -2.0;
2850 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2851
2852 c2 = tail_r_elem2 * -2.0;
2853 t1 = c11 + c2;
2854 t2 = (c2 - (t1 - c11)) + c21;
2855
2856 head_r_elem = t1 + t2;
2857 tail_r_elem = t2 - (head_r_elem - t1);
2858 }
2859 head_r_true[ri] = head_r_elem;
2860 tail_r_true[ri] = tail_r_elem;
2861
2862 /* Imaginary Part */
2863 {
2864 /* Compute double-double = double-double * double. */
2865 double a11, a21, b1, b2, c11, c21, c2, con, t1, t2;
2866
2867 con = head_r_elem1 * split;
2868 a11 = con - head_r_elem1;
2869 a11 = con - a11;
2870 a21 = head_r_elem1 - a11;
2871 con = 2.0 * split;
2872 b1 = con - 2.0;
2873 b1 = con - b1;
2874 b2 = 2.0 - b1;
2875
2876 c11 = head_r_elem1 * 2.0;
2877 c21 = (((a11 * b1 - c11) + a11 * b2) + a21 * b1) + a21 * b2;
2878
2879 c2 = tail_r_elem1 * 2.0;
2880 t1 = c11 + c2;
2881 t2 = (c2 - (t1 - c11)) + c21;
2882
2883 head_r_elem = t1 + t2;
2884 tail_r_elem = t2 - (head_r_elem - t1);
2885 }
2886 head_r_true[ri + 1] = head_r_elem;
2887 tail_r_true[ri + 1] = tail_r_elem;
2888 }
2889 }
2890
2891
2892 blas_free(a1);
2893 blas_free(a2);
2894 blas_free(y1);
2895 blas_free(y2);
2896 blas_free(x_head_0);
2897 blas_free(x_tail_0);
2898 blas_free(head_r1_true);
2899 blas_free(tail_r1_true);
2900 blas_free(head_r2_true);
2901 blas_free(tail_r2_true);
2902 blas_free(a_vec);
2903 blas_free(x_head_vec);
2904 blas_free(x_tail_vec);
2905 }
2906 }
2907
BLAS_zhemv2_z_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,double * x_head,double * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)2908 void BLAS_zhemv2_z_d_testgen(int norm, enum blas_order_type order,
2909 enum blas_uplo_type uplo, int n, void *alpha,
2910 int alpha_flag, void *beta, int beta_flag,
2911 void *a, int lda, double *x_head, double *x_tail,
2912 void *y, int *seed, double *head_r_true,
2913 double *tail_r_true)
2914
2915 /*
2916 * Purpose
2917 * =======
2918 *
2919 * Generates the test inputs to BLAS_zhemv_z_d{_x}
2920 *
2921 * Arguments
2922 * =========
2923 *
2924 * norm (input) int
2925 * = -1: the vectors are scaled with norms near underflow.
2926 * = 0: the vectors have norms of order 1.
2927 * = 1: the vectors are scaled with norms near overflow.
2928 *
2929 * order (input) enum blas_order_type
2930 * storage format of the matrices
2931 *
2932 * uplo (input) enum blas_uplo_type
2933 * which half of the hemv2etric matrix a is to be stored.
2934 *
2935 * n (input) int
2936 * sizes of symmetrical matrix a, size of vectors x, y:
2937 * matrix a is n-by-n.
2938 *
2939 * alpha (input/output) void*
2940 * if alpha_flag = 1, alpha is input.
2941 * if alpha_flag = 0, alpha is output.
2942 *
2943 * alpha_flag (input) int
2944 * = 0: alpha is free, and is output.
2945 * = 1: alpha is fixed on input.
2946 *
2947 * beta (input/output) void*
2948 * if beta_flag = 1, beta is input.
2949 * if beta_flag = 0, beta is output.
2950 *
2951 * beta_flag (input) int
2952 * = 0: beta is free, and is output.
2953 * = 1: beta is fixed on input.
2954 *
2955 * a (input/output) void*
2956 *
2957 * lda (input) lda
2958 * leading dimension of matrix A.
2959 *
2960 * x_head (input/output) double*
2961 * x_tail (input/output) double*
2962 * vector x = (x_head + x_tail)
2963 *
2964 * y (input/output) void*
2965 * generated vector y that will be used as an input to HEMV2.
2966 *
2967 * seed (input/output) int *
2968 * seed for the random number generator.
2969 *
2970 * head_r_true (output) double *
2971 * the leading part of the truth in double-double.
2972 *
2973 * tail_r_true (output) double *
2974 * the trailing part of the truth in double-double
2975 *
2976 */
2977 {
2978 {
2979
2980 /* Strategy:
2981 r1 = alpha * A1 * x + beta * y1
2982 r2 = alpha * A2 * x + beta * y2
2983 where all the matrices and vectors are real. Then let R = R1 + i R2,
2984 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
2985 and A2 is skew-symmetric.
2986 */
2987
2988 int i, j;
2989 int yi;
2990 int aij, ai;
2991 int a1ij, a1i;
2992 int xi;
2993 int mi;
2994 int incx = 1, incy = 1;
2995 int incyi, xi0, yi0;
2996 int incaij, incai;
2997 int inca1ij, inca1i;
2998 int incxi;
2999 int inca_vec, incx_vec;
3000 int ld;
3001 int ab;
3002 int ri, incri;
3003
3004 double *a1;
3005 double *a2;
3006 double *y1;
3007 double *y2;
3008 double *x_head_0;
3009 double *x_tail_0;
3010
3011 double *head_r1_true, *tail_r1_true;
3012 double *head_r2_true, *tail_r2_true;
3013
3014 double head_r_elem1, tail_r_elem1;
3015 double head_r_elem2, tail_r_elem2;
3016 double head_r_elem, tail_r_elem;
3017
3018 double *a_vec;
3019 double *x_head_vec;
3020 double *x_tail_vec;
3021
3022 double *y_i = (double *) y;
3023 double *alpha_i = (double *) alpha;
3024 double *beta_i = (double *) beta;
3025
3026 double *a_i = (double *) a;
3027 double *x_head_i = x_head;
3028 double *x_tail_i = x_tail;
3029
3030 ld = n;
3031 if (order == blas_colmajor) {
3032 inca1i = incai = 1;
3033 incyi = incy;
3034 incaij = lda;
3035 incxi = incx;
3036 inca1ij = n;
3037 } else {
3038 incyi = incy;
3039 incai = lda;
3040 incxi = incx;
3041 inca1i = n;
3042 inca1ij = incaij = 1;
3043 }
3044 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
3045 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
3046 incri = 1;
3047 incri *= 2;
3048
3049 yi0 *= 2;
3050 incyi *= 2;
3051 incai *= 2;
3052 incaij *= 2;
3053
3054
3055 inca_vec = incx_vec = 1;
3056 inca_vec *= 2;
3057
3058 a_vec = (double *) blas_malloc(n * sizeof(double) * 2);
3059 if (n > 0 && a_vec == NULL) {
3060 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3061 }
3062 x_head_vec = (double *) blas_malloc(n * sizeof(double));
3063 if (n > 0 && x_head_vec == NULL) {
3064 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3065 }
3066 x_tail_vec = (double *) blas_malloc(n * sizeof(double));
3067 if (n > 0 && x_tail_vec == NULL) {
3068 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3069 }
3070
3071 int incx0, incy1, incy2, incmi = 1;
3072 a1 = (double *) blas_malloc(n * n * sizeof(double));
3073 if (n * n > 0 && a1 == NULL) {
3074 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3075 }
3076 a2 = (double *) blas_malloc(n * n * sizeof(double));
3077 if (n * n > 0 && a2 == NULL) {
3078 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3079 }
3080 y1 = (double *) blas_malloc(n * sizeof(double));
3081 if (n > 0 && y1 == NULL) {
3082 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3083 }
3084 incy1 = 1;
3085
3086 y2 = (double *) blas_malloc(n * sizeof(double));
3087 if (n > 0 && y2 == NULL) {
3088 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3089 }
3090 incy2 = 1;
3091
3092 x_head_0 = (double *) blas_malloc(n * sizeof(double));
3093 if (n > 0 && x_head_0 == NULL) {
3094 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3095 }
3096 x_tail_0 = (double *) blas_malloc(n * sizeof(double));
3097 if (n > 0 && x_tail_0 == NULL) {
3098 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3099 }
3100 incx0 = 1;
3101
3102 head_r1_true = (double *) blas_malloc(n * sizeof(double));
3103 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
3104 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3105 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3106 };
3107 head_r2_true = (double *) blas_malloc(n * sizeof(double));
3108 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
3109 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3110 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3111 };
3112
3113 /* First generate the real portion of A and x. */
3114 BLAS_dsymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
3115 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
3116 head_r1_true, tail_r1_true);
3117
3118 /* x0 is now fixed and is input to this call */
3119 BLAS_dskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
3120 x_head_0, x_tail_0, y2, seed, head_r2_true,
3121 tail_r2_true);
3122
3123
3124 /* The case where x is a real vector.
3125
3126 There are four cases to consider, depending on the
3127 values of alpha and beta.
3128
3129 values scaling
3130 alpha beta alpha A x beta y r (truth)
3131 0 1 1
3132 1 1 ? -i i
3133 2 ? 1 i i i
3134 3 ? ? 1+i 1+i 1+i
3135
3136 Note that we can afford to scale truth by (1+i) since they
3137 are computed in double-double.
3138 */
3139
3140 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3141 ab = 0;
3142 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
3143 } else if (alpha_i[0] == 1.0) {
3144 ab = 1;
3145 /* set alpha to 1, multiply beta by -i. */
3146 alpha_i[1] = 0.0;
3147 beta_i[1] = -beta_i[0];
3148 beta_i[0] = 0.0;
3149 } else if (beta_i[0] == 1.0) {
3150 ab = 2;
3151 /* set beta to 1, multiply alpha by i. */
3152 beta_i[1] = 0.0;
3153 alpha_i[1] = alpha_i[0];
3154 alpha_i[0] = 0.0;
3155 } else {
3156 ab = 3;
3157 /* multiply alpha, beta by (1 + i). */
3158 alpha_i[1] = alpha_i[0];
3159 beta_i[1] = beta_i[0];
3160 }
3161
3162 /* Now fill in a */
3163 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
3164 for (j = 0, aij = ai, a1ij = a1i; j < n;
3165 j++, aij += incaij, a1ij += inca1ij) {
3166 a_i[aij] = a1[a1ij];
3167 a_i[aij + 1] = a2[a1ij];
3168 }
3169 }
3170
3171 /* Fill in x */
3172 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
3173 x_head_i[xi] = x_head_0[mi];
3174 x_tail_i[xi] = x_tail_0[mi];
3175 }
3176
3177 /* Fill in y */
3178 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
3179 if (ab == 1 || ab == 2) {
3180 y_i[yi] = -y2[mi];
3181 y_i[yi + 1] = y1[mi];
3182 } else {
3183 y_i[yi] = y1[mi];
3184 y_i[yi + 1] = y2[mi];
3185 }
3186 }
3187
3188 /* Fill in truth */
3189 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
3190 if (ab == 0 || ab == 1) {
3191 head_r_true[ri] = head_r1_true[mi];
3192 tail_r_true[ri] = tail_r1_true[mi];
3193 head_r_true[ri + 1] = head_r2_true[mi];
3194 tail_r_true[ri + 1] = tail_r2_true[mi];
3195 } else if (ab == 2) {
3196 head_r_true[ri] = -head_r2_true[mi];
3197 tail_r_true[ri] = -tail_r2_true[mi];
3198 head_r_true[ri + 1] = head_r1_true[mi];
3199 tail_r_true[ri + 1] = tail_r1_true[mi];
3200 } else {
3201 head_r_elem1 = head_r1_true[mi];
3202 tail_r_elem1 = tail_r1_true[mi];
3203
3204 head_r_elem2 = head_r2_true[mi];
3205 tail_r_elem2 = tail_r2_true[mi];
3206
3207 {
3208 /* Compute double-double = double-double + double-double. */
3209 double bv;
3210 double s1, s2, t1, t2;
3211
3212 /* Add two hi words. */
3213 s1 = head_r_elem1 + head_r_elem2;
3214 bv = s1 - head_r_elem1;
3215 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3216
3217 /* Add two lo words. */
3218 t1 = tail_r_elem1 + tail_r_elem2;
3219 bv = t1 - tail_r_elem1;
3220 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3221
3222 s2 += t1;
3223
3224 /* Renormalize (s1, s2) to (t1, s2) */
3225 t1 = s1 + s2;
3226 s2 = s2 - (t1 - s1);
3227
3228 t2 += s2;
3229
3230 /* Renormalize (t1, t2) */
3231 head_r_elem = t1 + t2;
3232 tail_r_elem = t2 - (head_r_elem - t1);
3233 }
3234
3235 /* Set the imaginary part to r1 + r2 */
3236 tail_r_true[ri + 1] = tail_r_elem;
3237 head_r_true[ri + 1] = head_r_elem;
3238
3239 /* Set the real part to r1 - r2. */
3240 {
3241 double head_bt, tail_bt;
3242 head_bt = -head_r_elem2;
3243 tail_bt = -tail_r_elem2;
3244 {
3245 /* Compute double-double = double-double + double-double. */
3246 double bv;
3247 double s1, s2, t1, t2;
3248
3249 /* Add two hi words. */
3250 s1 = head_r_elem1 + head_bt;
3251 bv = s1 - head_r_elem1;
3252 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3253
3254 /* Add two lo words. */
3255 t1 = tail_r_elem1 + tail_bt;
3256 bv = t1 - tail_r_elem1;
3257 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3258
3259 s2 += t1;
3260
3261 /* Renormalize (s1, s2) to (t1, s2) */
3262 t1 = s1 + s2;
3263 s2 = s2 - (t1 - s1);
3264
3265 t2 += s2;
3266
3267 /* Renormalize (t1, t2) */
3268 head_r_elem = t1 + t2;
3269 tail_r_elem = t2 - (head_r_elem - t1);
3270 }
3271 }
3272 tail_r_true[ri] = tail_r_elem;
3273 head_r_true[ri] = head_r_elem;
3274 }
3275 }
3276
3277
3278 blas_free(a1);
3279 blas_free(a2);
3280 blas_free(y1);
3281 blas_free(y2);
3282 blas_free(x_head_0);
3283 blas_free(x_tail_0);
3284 blas_free(head_r1_true);
3285 blas_free(tail_r1_true);
3286 blas_free(head_r2_true);
3287 blas_free(tail_r2_true);
3288 blas_free(a_vec);
3289 blas_free(x_head_vec);
3290 blas_free(x_tail_vec);
3291 }
3292 }
3293
BLAS_chemv2_c_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,int n,void * alpha,int alpha_flag,void * beta,int beta_flag,void * a,int lda,float * x_head,float * x_tail,void * y,int * seed,double * head_r_true,double * tail_r_true)3294 void BLAS_chemv2_c_s_testgen(int norm, enum blas_order_type order,
3295 enum blas_uplo_type uplo, int n, void *alpha,
3296 int alpha_flag, void *beta, int beta_flag,
3297 void *a, int lda, float *x_head, float *x_tail,
3298 void *y, int *seed, double *head_r_true,
3299 double *tail_r_true)
3300
3301 /*
3302 * Purpose
3303 * =======
3304 *
3305 * Generates the test inputs to BLAS_chemv_c_s{_x}
3306 *
3307 * Arguments
3308 * =========
3309 *
3310 * norm (input) int
3311 * = -1: the vectors are scaled with norms near underflow.
3312 * = 0: the vectors have norms of order 1.
3313 * = 1: the vectors are scaled with norms near overflow.
3314 *
3315 * order (input) enum blas_order_type
3316 * storage format of the matrices
3317 *
3318 * uplo (input) enum blas_uplo_type
3319 * which half of the hemv2etric matrix a is to be stored.
3320 *
3321 * n (input) int
3322 * sizes of symmetrical matrix a, size of vectors x, y:
3323 * matrix a is n-by-n.
3324 *
3325 * alpha (input/output) void*
3326 * if alpha_flag = 1, alpha is input.
3327 * if alpha_flag = 0, alpha is output.
3328 *
3329 * alpha_flag (input) int
3330 * = 0: alpha is free, and is output.
3331 * = 1: alpha is fixed on input.
3332 *
3333 * beta (input/output) void*
3334 * if beta_flag = 1, beta is input.
3335 * if beta_flag = 0, beta is output.
3336 *
3337 * beta_flag (input) int
3338 * = 0: beta is free, and is output.
3339 * = 1: beta is fixed on input.
3340 *
3341 * a (input/output) void*
3342 *
3343 * lda (input) lda
3344 * leading dimension of matrix A.
3345 *
3346 * x_head (input/output) float*
3347 * x_tail (input/output) float*
3348 * vector x = (x_head + x_tail)
3349 *
3350 * y (input/output) void*
3351 * generated vector y that will be used as an input to HEMV2.
3352 *
3353 * seed (input/output) int *
3354 * seed for the random number generator.
3355 *
3356 * head_r_true (output) double *
3357 * the leading part of the truth in double-double.
3358 *
3359 * tail_r_true (output) double *
3360 * the trailing part of the truth in double-double
3361 *
3362 */
3363 {
3364 {
3365
3366 /* Strategy:
3367 r1 = alpha * A1 * x + beta * y1
3368 r2 = alpha * A2 * x + beta * y2
3369 where all the matrices and vectors are real. Then let R = R1 + i R2,
3370 A = A1 + i A2, y = y1 + i y2. To make A Hermitian, A1 is symmetric,
3371 and A2 is skew-symmetric.
3372 */
3373
3374 int i, j;
3375 int yi;
3376 int aij, ai;
3377 int a1ij, a1i;
3378 int xi;
3379 int mi;
3380 int incx = 1, incy = 1;
3381 int incyi, xi0, yi0;
3382 int incaij, incai;
3383 int inca1ij, inca1i;
3384 int incxi;
3385 int inca_vec, incx_vec;
3386 int ld;
3387 int ab;
3388 int ri, incri;
3389
3390 float *a1;
3391 float *a2;
3392 float *y1;
3393 float *y2;
3394 float *x_head_0;
3395 float *x_tail_0;
3396
3397 double *head_r1_true, *tail_r1_true;
3398 double *head_r2_true, *tail_r2_true;
3399
3400 double head_r_elem1, tail_r_elem1;
3401 double head_r_elem2, tail_r_elem2;
3402 double head_r_elem, tail_r_elem;
3403
3404 float *a_vec;
3405 float *x_head_vec;
3406 float *x_tail_vec;
3407
3408 float *y_i = (float *) y;
3409 float *alpha_i = (float *) alpha;
3410 float *beta_i = (float *) beta;
3411
3412 float *a_i = (float *) a;
3413 float *x_head_i = x_head;
3414 float *x_tail_i = x_tail;
3415
3416 ld = n;
3417 if (order == blas_colmajor) {
3418 inca1i = incai = 1;
3419 incyi = incy;
3420 incaij = lda;
3421 incxi = incx;
3422 inca1ij = n;
3423 } else {
3424 incyi = incy;
3425 incai = lda;
3426 incxi = incx;
3427 inca1i = n;
3428 inca1ij = incaij = 1;
3429 }
3430 xi0 = (incx > 0) ? 0 : -(n - 1) * incx;
3431 yi0 = (incy > 0) ? 0 : -(n - 1) * incy;
3432 incri = 1;
3433 incri *= 2;
3434
3435 yi0 *= 2;
3436 incyi *= 2;
3437 incai *= 2;
3438 incaij *= 2;
3439
3440
3441 inca_vec = incx_vec = 1;
3442 inca_vec *= 2;
3443
3444 a_vec = (float *) blas_malloc(n * sizeof(float) * 2);
3445 if (n > 0 && a_vec == NULL) {
3446 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3447 }
3448 x_head_vec = (float *) blas_malloc(n * sizeof(float));
3449 if (n > 0 && x_head_vec == NULL) {
3450 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3451 }
3452 x_tail_vec = (float *) blas_malloc(n * sizeof(float));
3453 if (n > 0 && x_tail_vec == NULL) {
3454 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3455 }
3456
3457 int incx0, incy1, incy2, incmi = 1;
3458 a1 = (float *) blas_malloc(n * n * sizeof(float));
3459 if (n * n > 0 && a1 == NULL) {
3460 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3461 }
3462 a2 = (float *) blas_malloc(n * n * sizeof(float));
3463 if (n * n > 0 && a2 == NULL) {
3464 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3465 }
3466 y1 = (float *) blas_malloc(n * sizeof(float));
3467 if (n > 0 && y1 == NULL) {
3468 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3469 }
3470 incy1 = 1;
3471
3472 y2 = (float *) blas_malloc(n * sizeof(float));
3473 if (n > 0 && y2 == NULL) {
3474 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3475 }
3476 incy2 = 1;
3477
3478 x_head_0 = (float *) blas_malloc(n * sizeof(float));
3479 if (n > 0 && x_head_0 == NULL) {
3480 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3481 }
3482 x_tail_0 = (float *) blas_malloc(n * sizeof(float));
3483 if (n > 0 && x_tail_0 == NULL) {
3484 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3485 }
3486 incx0 = 1;
3487
3488 head_r1_true = (double *) blas_malloc(n * sizeof(double));
3489 tail_r1_true = (double *) blas_malloc(n * sizeof(double));
3490 if (n > 0 && (head_r1_true == NULL || tail_r1_true == NULL)) {
3491 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3492 };
3493 head_r2_true = (double *) blas_malloc(n * sizeof(double));
3494 tail_r2_true = (double *) blas_malloc(n * sizeof(double));
3495 if (n > 0 && (head_r2_true == NULL || tail_r2_true == NULL)) {
3496 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3497 };
3498
3499 /* First generate the real portion of A and x. */
3500 BLAS_ssymv2_testgen(norm, order, uplo, n, alpha_i, alpha_flag, a1, n,
3501 x_head_0, x_tail_0, beta_i, beta_flag, y1, seed,
3502 head_r1_true, tail_r1_true);
3503
3504 /* x0 is now fixed and is input to this call */
3505 BLAS_sskmv2_testgen(norm, order, uplo, n, alpha_i, beta_i, a2, n,
3506 x_head_0, x_tail_0, y2, seed, head_r2_true,
3507 tail_r2_true);
3508
3509
3510 /* The case where x is a real vector.
3511
3512 There are four cases to consider, depending on the
3513 values of alpha and beta.
3514
3515 values scaling
3516 alpha beta alpha A x beta y r (truth)
3517 0 1 1
3518 1 1 ? -i i
3519 2 ? 1 i i i
3520 3 ? ? 1+i 1+i 1+i
3521
3522 Note that we can afford to scale truth by (1+i) since they
3523 are computed in double-double.
3524 */
3525
3526 if (alpha_i[0] == 1.0 && beta_i[0] == 1.0) {
3527 ab = 0;
3528 alpha_i[1] = beta_i[1] = 0.0; /* set alpha, beta to be 1. */
3529 } else if (alpha_i[0] == 1.0) {
3530 ab = 1;
3531 /* set alpha to 1, multiply beta by -i. */
3532 alpha_i[1] = 0.0;
3533 beta_i[1] = -beta_i[0];
3534 beta_i[0] = 0.0;
3535 } else if (beta_i[0] == 1.0) {
3536 ab = 2;
3537 /* set beta to 1, multiply alpha by i. */
3538 beta_i[1] = 0.0;
3539 alpha_i[1] = alpha_i[0];
3540 alpha_i[0] = 0.0;
3541 } else {
3542 ab = 3;
3543 /* multiply alpha, beta by (1 + i). */
3544 alpha_i[1] = alpha_i[0];
3545 beta_i[1] = beta_i[0];
3546 }
3547
3548 /* Now fill in a */
3549 for (i = 0, ai = 0, a1i = 0; i < n; i++, ai += incai, a1i += inca1i) {
3550 for (j = 0, aij = ai, a1ij = a1i; j < n;
3551 j++, aij += incaij, a1ij += inca1ij) {
3552 a_i[aij] = a1[a1ij];
3553 a_i[aij + 1] = a2[a1ij];
3554 }
3555 }
3556
3557 /* Fill in x */
3558 for (i = 0, xi = xi0, mi = 0; i < n; i++, xi += incxi, mi += incx0) {
3559 x_head_i[xi] = x_head_0[mi];
3560 x_tail_i[xi] = x_tail_0[mi];
3561 }
3562
3563 /* Fill in y */
3564 for (i = 0, yi = yi0, mi = 0; i < n; i++, yi += incyi, mi += incy1) {
3565 if (ab == 1 || ab == 2) {
3566 y_i[yi] = -y2[mi];
3567 y_i[yi + 1] = y1[mi];
3568 } else {
3569 y_i[yi] = y1[mi];
3570 y_i[yi + 1] = y2[mi];
3571 }
3572 }
3573
3574 /* Fill in truth */
3575 for (i = 0, ri = 0, mi = 0; i < n; i++, ri += incri, mi += incmi) {
3576 if (ab == 0 || ab == 1) {
3577 head_r_true[ri] = head_r1_true[mi];
3578 tail_r_true[ri] = tail_r1_true[mi];
3579 head_r_true[ri + 1] = head_r2_true[mi];
3580 tail_r_true[ri + 1] = tail_r2_true[mi];
3581 } else if (ab == 2) {
3582 head_r_true[ri] = -head_r2_true[mi];
3583 tail_r_true[ri] = -tail_r2_true[mi];
3584 head_r_true[ri + 1] = head_r1_true[mi];
3585 tail_r_true[ri + 1] = tail_r1_true[mi];
3586 } else {
3587 head_r_elem1 = head_r1_true[mi];
3588 tail_r_elem1 = tail_r1_true[mi];
3589
3590 head_r_elem2 = head_r2_true[mi];
3591 tail_r_elem2 = tail_r2_true[mi];
3592
3593 {
3594 /* Compute double-double = double-double + double-double. */
3595 double bv;
3596 double s1, s2, t1, t2;
3597
3598 /* Add two hi words. */
3599 s1 = head_r_elem1 + head_r_elem2;
3600 bv = s1 - head_r_elem1;
3601 s2 = ((head_r_elem2 - bv) + (head_r_elem1 - (s1 - bv)));
3602
3603 /* Add two lo words. */
3604 t1 = tail_r_elem1 + tail_r_elem2;
3605 bv = t1 - tail_r_elem1;
3606 t2 = ((tail_r_elem2 - bv) + (tail_r_elem1 - (t1 - bv)));
3607
3608 s2 += t1;
3609
3610 /* Renormalize (s1, s2) to (t1, s2) */
3611 t1 = s1 + s2;
3612 s2 = s2 - (t1 - s1);
3613
3614 t2 += s2;
3615
3616 /* Renormalize (t1, t2) */
3617 head_r_elem = t1 + t2;
3618 tail_r_elem = t2 - (head_r_elem - t1);
3619 }
3620
3621 /* Set the imaginary part to r1 + r2 */
3622 tail_r_true[ri + 1] = tail_r_elem;
3623 head_r_true[ri + 1] = head_r_elem;
3624
3625 /* Set the real part to r1 - r2. */
3626 {
3627 double head_bt, tail_bt;
3628 head_bt = -head_r_elem2;
3629 tail_bt = -tail_r_elem2;
3630 {
3631 /* Compute double-double = double-double + double-double. */
3632 double bv;
3633 double s1, s2, t1, t2;
3634
3635 /* Add two hi words. */
3636 s1 = head_r_elem1 + head_bt;
3637 bv = s1 - head_r_elem1;
3638 s2 = ((head_bt - bv) + (head_r_elem1 - (s1 - bv)));
3639
3640 /* Add two lo words. */
3641 t1 = tail_r_elem1 + tail_bt;
3642 bv = t1 - tail_r_elem1;
3643 t2 = ((tail_bt - bv) + (tail_r_elem1 - (t1 - bv)));
3644
3645 s2 += t1;
3646
3647 /* Renormalize (s1, s2) to (t1, s2) */
3648 t1 = s1 + s2;
3649 s2 = s2 - (t1 - s1);
3650
3651 t2 += s2;
3652
3653 /* Renormalize (t1, t2) */
3654 head_r_elem = t1 + t2;
3655 tail_r_elem = t2 - (head_r_elem - t1);
3656 }
3657 }
3658 tail_r_true[ri] = tail_r_elem;
3659 head_r_true[ri] = head_r_elem;
3660 }
3661 }
3662
3663
3664 blas_free(a1);
3665 blas_free(a2);
3666 blas_free(y1);
3667 blas_free(y2);
3668 blas_free(x_head_0);
3669 blas_free(x_tail_0);
3670 blas_free(head_r1_true);
3671 blas_free(tail_r1_true);
3672 blas_free(head_r2_true);
3673 blas_free(tail_r2_true);
3674 blas_free(a_vec);
3675 blas_free(x_head_vec);
3676 blas_free(x_tail_vec);
3677 }
3678 }
3679