1
2 #include "blas_extended.h"
3 #include "blas_extended_test.h"
4
5
BLAS_stpmv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,float * alpha,int alpha_flag,float * tp,float * x,int * seed,double * head_r_true,double * tail_r_true)6 void BLAS_stpmv_testgen(int norm, enum blas_order_type order,
7 enum blas_uplo_type uplo, enum blas_trans_type trans,
8 enum blas_diag_type diag, int n, float *alpha,
9 int alpha_flag, float *tp, float *x, int *seed,
10 double *head_r_true, double *tail_r_true)
11
12 /*
13 * Purpose
14 * =======
15 *
16 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
17 *
18 * Arguments
19 * =========
20 *
21 * norm (input) blas_norm_type
22 *
23 * order (input) blas_order_type
24 * Order of tp; row or column major
25 *
26 * uplo (input) blas_uplo_type
27 * Whether tp is upper or lower
28 *
29 * trans (input) blas_trans_type
30 * No trans, trans, conj trans
31 *
32 * diag (input) blas_diag_type
33 * non unit, unit
34 *
35 * n (input) int
36 * Dimension of tp and the length of vector x
37 *
38 * alpha (input/output) float*
39 * If alpha_flag = 1, alpha is input.
40 * If alpha_flag = 0, alpha is output.
41 *
42 * alpha_flag (input) int
43 * = 0 : alpha is free, and is output.
44 * = 1 : alpha is fixed on input.
45 *
46 * tp (output) float*
47 *
48 * x (input/output) float*
49 *
50 * seed (input/output) int
51 *
52 * head_r_true (output) double*
53 * The leading part of the truth in double-double.
54 *
55 * tail_r_true (output) double*
56 * The trailing part of the truth in double-double.
57 *
58 */
59 {
60 int n_fix2 = 0;
61 int n_mix = 0;
62 int i;
63 int j;
64 int length;
65 float *temp;
66 float beta_zero_fake;
67 int beta_flag = 1;
68 float r_zero_fake;
69 float one_this;
70 float tp_swapTemp1;
71 float tp_swapTemp2;
72 float x_swapTemp1;
73 float x_swapTemp2;
74 double head_r_true_swapTemp1, tail_r_true_swapTemp1;
75 double head_r_true_swapTemp2, tail_r_true_swapTemp2;
76
77 float *x_i = x;
78 float *alpha_i = alpha;
79 int inctp = 1;
80 int inc_index = 1;
81
82 beta_zero_fake = 0.0;
83 r_zero_fake = 0.0;
84 one_this = 1.0;
85
86
87
88
89 temp = (float *) blas_malloc(n * 2 * inc_index * sizeof(float));
90 if (n * 2 * inc_index > 0 && temp == NULL) {
91 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
92 }
93 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
94 temp[i] = 0.0;
95 }
96 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
97 tp[i] = 0.0;
98 }
99
100 /* calling dot_testgen n time. in each iteration, one row of tp, and one
101 element of x are produced. */
102
103 if (alpha_flag == 0 && diag == blas_unit_diag) {
104 *alpha_i = xrand(seed);
105 alpha_flag = 1;
106 }
107
108 for (i = 0; i < n; i++) {
109
110 if (i == 0) {
111 n_fix2 = 0;
112 n_mix = 0;
113 } else {
114 n_mix = i;
115
116 /* from now on, fix alpha */
117 alpha_flag = 1;
118 }
119
120 for (j = 0; j < n * inc_index; j += inc_index) {
121 temp[j] = 0.0;
122 }
123 length = i + 1;
124
125 if (diag == blas_unit_diag) {
126 length = i;
127
128
129 BLAS_sdot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
130 alpha, alpha_flag,
131 alpha, beta_flag, x, temp, seed, &r_zero_fake,
132 &((double *) head_r_true)[i * inc_index],
133 &((double *) tail_r_true)[i * inc_index]);
134 } else {
135 BLAS_sdot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
136 alpha, alpha_flag,
137 &beta_zero_fake, beta_flag, x, temp, seed,
138 &r_zero_fake,
139 &((double *) head_r_true)[i * inc_index],
140 &((double *) tail_r_true)[i * inc_index]);
141 }
142
143 /* one type is s */
144 if (diag == blas_unit_diag) {
145 temp[length * inctp] = one_this;
146 x_i[length * inc_index] = r_zero_fake;
147 }
148
149
150 /* copy temp to tp */
151 if (((order == blas_rowmajor) && (trans == blas_no_trans)
152 && (uplo == blas_upper))
153 || ((order == blas_colmajor) && (trans != blas_no_trans)
154 && (uplo == blas_lower))
155 || ((order == blas_rowmajor) && (trans != blas_no_trans)
156 && (uplo == blas_lower))
157 || ((order == blas_colmajor) && (trans == blas_no_trans)
158 && (uplo == blas_upper))) {
159 for (j = 0; j < n / 2; j++) {
160 tp_swapTemp1 = temp[(n - 1 - j) * inctp];
161 tp_swapTemp2 = temp[j * inctp];
162 temp[(n - 1 - j) * inctp] = tp_swapTemp2;
163 temp[j * inctp] = tp_swapTemp1;
164 }
165
166 stpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
167 } else
168 if (((order == blas_rowmajor) && (trans == blas_no_trans)
169 && (uplo == blas_lower))
170 || ((order == blas_colmajor) && (trans != blas_no_trans)
171 && (uplo == blas_upper))
172 || ((order == blas_rowmajor) && (trans != blas_no_trans)
173 && (uplo == blas_upper))
174 || ((order == blas_colmajor) && (trans == blas_no_trans)
175 && (uplo == blas_lower))) {
176 stpmv_commit_row(order, uplo, trans, n, tp, temp, i);
177 }
178
179 }
180
181 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
182 if (((order == blas_rowmajor) && (trans == blas_no_trans)
183 && (uplo == blas_upper))
184 || ((order == blas_colmajor) && (trans != blas_no_trans)
185 && (uplo == blas_lower))
186 || ((order == blas_rowmajor) && (trans != blas_no_trans)
187 && (uplo == blas_lower))
188 || ((order == blas_colmajor) && (trans == blas_no_trans)
189 && (uplo == blas_upper))) {
190 for (j = 0; j < n / 2; j++) {
191 x_swapTemp1 = x_i[(n - 1 - j) * inc_index];
192 x_swapTemp2 = x_i[j * inc_index];
193 x_i[(n - 1 - j) * inc_index] = x_swapTemp2;
194 x_i[j * inc_index] = x_swapTemp1;
195
196 head_r_true_swapTemp1 = head_r_true[(n - 1 - j) * inc_index];
197 tail_r_true_swapTemp1 = tail_r_true[(n - 1 - j) * inc_index];
198 head_r_true_swapTemp2 = head_r_true[j * inc_index];
199 tail_r_true_swapTemp2 = tail_r_true[j * inc_index];
200 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2;
201 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2;
202 head_r_true[j * inc_index] = head_r_true_swapTemp1;
203 tail_r_true[j * inc_index] = tail_r_true_swapTemp1;
204 }
205 }
206
207 blas_free(temp);
208 }
209
BLAS_dtpmv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,double * alpha,int alpha_flag,double * tp,double * x,int * seed,double * head_r_true,double * tail_r_true)210 void BLAS_dtpmv_testgen(int norm, enum blas_order_type order,
211 enum blas_uplo_type uplo, enum blas_trans_type trans,
212 enum blas_diag_type diag, int n, double *alpha,
213 int alpha_flag, double *tp, double *x, int *seed,
214 double *head_r_true, double *tail_r_true)
215
216 /*
217 * Purpose
218 * =======
219 *
220 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
221 *
222 * Arguments
223 * =========
224 *
225 * norm (input) blas_norm_type
226 *
227 * order (input) blas_order_type
228 * Order of tp; row or column major
229 *
230 * uplo (input) blas_uplo_type
231 * Whether tp is upper or lower
232 *
233 * trans (input) blas_trans_type
234 * No trans, trans, conj trans
235 *
236 * diag (input) blas_diag_type
237 * non unit, unit
238 *
239 * n (input) int
240 * Dimension of tp and the length of vector x
241 *
242 * alpha (input/output) double*
243 * If alpha_flag = 1, alpha is input.
244 * If alpha_flag = 0, alpha is output.
245 *
246 * alpha_flag (input) int
247 * = 0 : alpha is free, and is output.
248 * = 1 : alpha is fixed on input.
249 *
250 * tp (output) double*
251 *
252 * x (input/output) double*
253 *
254 * seed (input/output) int
255 *
256 * head_r_true (output) double*
257 * The leading part of the truth in double-double.
258 *
259 * tail_r_true (output) double*
260 * The trailing part of the truth in double-double.
261 *
262 */
263 {
264 int n_fix2 = 0;
265 int n_mix = 0;
266 int i;
267 int j;
268 int length;
269 double *temp;
270 double beta_zero_fake;
271 int beta_flag = 1;
272 double r_zero_fake;
273 double one_this;
274 double tp_swapTemp1;
275 double tp_swapTemp2;
276 double x_swapTemp1;
277 double x_swapTemp2;
278 double head_r_true_swapTemp1, tail_r_true_swapTemp1;
279 double head_r_true_swapTemp2, tail_r_true_swapTemp2;
280
281 double *x_i = x;
282 double *alpha_i = alpha;
283 int inctp = 1;
284 int inc_index = 1;
285
286 beta_zero_fake = 0.0;
287 r_zero_fake = 0.0;
288 one_this = 1.0;
289
290
291
292
293 temp = (double *) blas_malloc(n * 2 * inc_index * sizeof(double));
294 if (n * 2 * inc_index > 0 && temp == NULL) {
295 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
296 }
297 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
298 temp[i] = 0.0;
299 }
300 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
301 tp[i] = 0.0;
302 }
303
304 /* calling dot_testgen n time. in each iteration, one row of tp, and one
305 element of x are produced. */
306
307 if (alpha_flag == 0 && diag == blas_unit_diag) {
308 *alpha_i = xrand(seed);
309 alpha_flag = 1;
310 }
311
312 for (i = 0; i < n; i++) {
313
314 if (i == 0) {
315 n_fix2 = 0;
316 n_mix = 0;
317 } else {
318 n_mix = i;
319
320 /* from now on, fix alpha */
321 alpha_flag = 1;
322 }
323
324 for (j = 0; j < n * inc_index; j += inc_index) {
325 temp[j] = 0.0;
326 }
327 length = i + 1;
328
329 if (diag == blas_unit_diag) {
330 length = i;
331
332
333 BLAS_ddot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
334 alpha, alpha_flag,
335 alpha, beta_flag, x, temp, seed, &r_zero_fake,
336 &((double *) head_r_true)[i * inc_index],
337 &((double *) tail_r_true)[i * inc_index]);
338 } else {
339 BLAS_ddot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
340 alpha, alpha_flag,
341 &beta_zero_fake, beta_flag, x, temp, seed,
342 &r_zero_fake,
343 &((double *) head_r_true)[i * inc_index],
344 &((double *) tail_r_true)[i * inc_index]);
345 }
346
347 /* one type is d */
348 if (diag == blas_unit_diag) {
349 temp[length * inctp] = one_this;
350 x_i[length * inc_index] = r_zero_fake;
351 }
352
353
354 /* copy temp to tp */
355 if (((order == blas_rowmajor) && (trans == blas_no_trans)
356 && (uplo == blas_upper))
357 || ((order == blas_colmajor) && (trans != blas_no_trans)
358 && (uplo == blas_lower))
359 || ((order == blas_rowmajor) && (trans != blas_no_trans)
360 && (uplo == blas_lower))
361 || ((order == blas_colmajor) && (trans == blas_no_trans)
362 && (uplo == blas_upper))) {
363 for (j = 0; j < n / 2; j++) {
364 tp_swapTemp1 = temp[(n - 1 - j) * inctp];
365 tp_swapTemp2 = temp[j * inctp];
366 temp[(n - 1 - j) * inctp] = tp_swapTemp2;
367 temp[j * inctp] = tp_swapTemp1;
368 }
369
370 dtpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
371 } else
372 if (((order == blas_rowmajor) && (trans == blas_no_trans)
373 && (uplo == blas_lower))
374 || ((order == blas_colmajor) && (trans != blas_no_trans)
375 && (uplo == blas_upper))
376 || ((order == blas_rowmajor) && (trans != blas_no_trans)
377 && (uplo == blas_upper))
378 || ((order == blas_colmajor) && (trans == blas_no_trans)
379 && (uplo == blas_lower))) {
380 dtpmv_commit_row(order, uplo, trans, n, tp, temp, i);
381 }
382
383 }
384
385 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
386 if (((order == blas_rowmajor) && (trans == blas_no_trans)
387 && (uplo == blas_upper))
388 || ((order == blas_colmajor) && (trans != blas_no_trans)
389 && (uplo == blas_lower))
390 || ((order == blas_rowmajor) && (trans != blas_no_trans)
391 && (uplo == blas_lower))
392 || ((order == blas_colmajor) && (trans == blas_no_trans)
393 && (uplo == blas_upper))) {
394 for (j = 0; j < n / 2; j++) {
395 x_swapTemp1 = x_i[(n - 1 - j) * inc_index];
396 x_swapTemp2 = x_i[j * inc_index];
397 x_i[(n - 1 - j) * inc_index] = x_swapTemp2;
398 x_i[j * inc_index] = x_swapTemp1;
399
400 head_r_true_swapTemp1 = head_r_true[(n - 1 - j) * inc_index];
401 tail_r_true_swapTemp1 = tail_r_true[(n - 1 - j) * inc_index];
402 head_r_true_swapTemp2 = head_r_true[j * inc_index];
403 tail_r_true_swapTemp2 = tail_r_true[j * inc_index];
404 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2;
405 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2;
406 head_r_true[j * inc_index] = head_r_true_swapTemp1;
407 tail_r_true[j * inc_index] = tail_r_true_swapTemp1;
408 }
409 }
410
411 blas_free(temp);
412 }
413
BLAS_ctpmv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,void * alpha,int alpha_flag,void * tp,void * x,int * seed,double * head_r_true,double * tail_r_true)414 void BLAS_ctpmv_testgen(int norm, enum blas_order_type order,
415 enum blas_uplo_type uplo, enum blas_trans_type trans,
416 enum blas_diag_type diag, int n, void *alpha,
417 int alpha_flag, void *tp, void *x, int *seed,
418 double *head_r_true, double *tail_r_true)
419
420 /*
421 * Purpose
422 * =======
423 *
424 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
425 *
426 * Arguments
427 * =========
428 *
429 * norm (input) blas_norm_type
430 *
431 * order (input) blas_order_type
432 * Order of tp; row or column major
433 *
434 * uplo (input) blas_uplo_type
435 * Whether tp is upper or lower
436 *
437 * trans (input) blas_trans_type
438 * No trans, trans, conj trans
439 *
440 * diag (input) blas_diag_type
441 * non unit, unit
442 *
443 * n (input) int
444 * Dimension of tp and the length of vector x
445 *
446 * alpha (input/output) void*
447 * If alpha_flag = 1, alpha is input.
448 * If alpha_flag = 0, alpha is output.
449 *
450 * alpha_flag (input) int
451 * = 0 : alpha is free, and is output.
452 * = 1 : alpha is fixed on input.
453 *
454 * tp (output) void*
455 *
456 * x (input/output) void*
457 *
458 * seed (input/output) int
459 *
460 * head_r_true (output) double*
461 * The leading part of the truth in double-double.
462 *
463 * tail_r_true (output) double*
464 * The trailing part of the truth in double-double.
465 *
466 */
467 {
468 int n_fix2 = 0;
469 int n_mix = 0;
470 int i;
471 int j;
472 int length;
473 float *temp;
474 float beta_zero_fake[2];
475 int beta_flag = 1;
476 float r_zero_fake[2];
477 float one_this[2];
478 float tp_swapTemp1[2];
479 float tp_swapTemp2[2];
480 float x_swapTemp1[2];
481 float x_swapTemp2[2];
482 double head_r_true_swapTemp1[2], tail_r_true_swapTemp1[2];
483 double head_r_true_swapTemp2[2], tail_r_true_swapTemp2[2];
484
485 float *x_i = (float *) x;
486 float *alpha_i = (float *) alpha;
487 int inctp = 1;
488 int inc_index = 1;
489
490 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
491 r_zero_fake[0] = r_zero_fake[1] = 0.0;
492 one_this[0] = 1.0;
493 one_this[1] = 0.0;
494
495 inctp *= 2;
496 inc_index *= 2;
497
498 temp = (float *) blas_malloc(n * 2 * inc_index * sizeof(float) * 2);
499 if (n * 2 * inc_index > 0 && temp == NULL) {
500 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
501 }
502 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
503 temp[i] = 0.0;
504 temp[i + 1] = 0.0;
505 }
506 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
507 ((float *) tp)[i] = 0.0;
508 }
509
510 /* calling dot_testgen n time. in each iteration, one row of tp, and one
511 element of x are produced. */
512
513 if (alpha_flag == 0 && diag == blas_unit_diag) {
514 alpha_i[0] = xrand(seed);
515 alpha_i[1] = xrand(seed);
516 alpha_flag = 1;
517 }
518
519 for (i = 0; i < n; i++) {
520
521 if (i == 0) {
522 n_fix2 = 0;
523 n_mix = 0;
524 } else {
525 n_mix = i;
526
527 /* from now on, fix alpha */
528 alpha_flag = 1;
529 }
530
531 for (j = 0; j < n * inc_index; j += inc_index) {
532 temp[j] = 0.0;
533 temp[j + 1] = 0.0;
534 }
535 length = i + 1;
536
537 if (diag == blas_unit_diag) {
538 length = i;
539
540
541 BLAS_cdot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
542 alpha, alpha_flag,
543 alpha, beta_flag, x, temp, seed, r_zero_fake,
544 &((double *) head_r_true)[i * inc_index],
545 &((double *) tail_r_true)[i * inc_index]);
546 } else {
547 BLAS_cdot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
548 alpha, alpha_flag,
549 beta_zero_fake, beta_flag, x, temp, seed, r_zero_fake,
550 &((double *) head_r_true)[i * inc_index],
551 &((double *) tail_r_true)[i * inc_index]);
552 }
553
554 /* one type is c */
555 if (diag == blas_unit_diag) {
556 temp[length * inctp] = one_this[0];
557 temp[length * inctp + 1] = one_this[1];
558 x_i[length * inc_index] = r_zero_fake[0];
559 x_i[length * inc_index + 1] = r_zero_fake[1];
560 }
561
562
563 /* copy temp to tp */
564 if (((order == blas_rowmajor) && (trans == blas_no_trans)
565 && (uplo == blas_upper))
566 || ((order == blas_colmajor) && (trans != blas_no_trans)
567 && (uplo == blas_lower))
568 || ((order == blas_rowmajor) && (trans != blas_no_trans)
569 && (uplo == blas_lower))
570 || ((order == blas_colmajor) && (trans == blas_no_trans)
571 && (uplo == blas_upper))) {
572 for (j = 0; j < n / 2; j++) {
573 tp_swapTemp1[0] = temp[(n - 1 - j) * inctp];
574 tp_swapTemp1[1] = temp[(n - 1 - j) * inctp + 1];
575 tp_swapTemp2[0] = temp[j * inctp];
576 tp_swapTemp2[1] = temp[j * inctp + 1];
577 temp[(n - 1 - j) * inctp] = tp_swapTemp2[0];
578 temp[(n - 1 - j) * inctp + 1] = tp_swapTemp2[1];
579 temp[j * inctp] = tp_swapTemp1[0];
580 temp[j * inctp + 1] = tp_swapTemp1[1];
581 }
582
583 ctpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
584 } else
585 if (((order == blas_rowmajor) && (trans == blas_no_trans)
586 && (uplo == blas_lower))
587 || ((order == blas_colmajor) && (trans != blas_no_trans)
588 && (uplo == blas_upper))
589 || ((order == blas_rowmajor) && (trans != blas_no_trans)
590 && (uplo == blas_upper))
591 || ((order == blas_colmajor) && (trans == blas_no_trans)
592 && (uplo == blas_lower))) {
593 ctpmv_commit_row(order, uplo, trans, n, tp, temp, i);
594 }
595
596 }
597
598 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
599 if (((order == blas_rowmajor) && (trans == blas_no_trans)
600 && (uplo == blas_upper))
601 || ((order == blas_colmajor) && (trans != blas_no_trans)
602 && (uplo == blas_lower))
603 || ((order == blas_rowmajor) && (trans != blas_no_trans)
604 && (uplo == blas_lower))
605 || ((order == blas_colmajor) && (trans == blas_no_trans)
606 && (uplo == blas_upper))) {
607 for (j = 0; j < n / 2; j++) {
608 x_swapTemp1[0] = x_i[(n - 1 - j) * inc_index];
609 x_swapTemp1[1] = x_i[(n - 1 - j) * inc_index + 1];
610 x_swapTemp2[0] = x_i[j * inc_index];
611 x_swapTemp2[1] = x_i[j * inc_index + 1];
612 x_i[(n - 1 - j) * inc_index] = x_swapTemp2[0];
613 x_i[(n - 1 - j) * inc_index + 1] = x_swapTemp2[1];
614 x_i[j * inc_index] = x_swapTemp1[0];
615 x_i[j * inc_index + 1] = x_swapTemp1[1];
616
617 head_r_true_swapTemp1[0] = head_r_true[(n - 1 - j) * inc_index];
618 head_r_true_swapTemp1[1] = head_r_true[1 + (n - 1 - j) * inc_index];
619 tail_r_true_swapTemp1[0] = tail_r_true[(n - 1 - j) * inc_index];
620 tail_r_true_swapTemp1[1] = tail_r_true[1 + (n - 1 - j) * inc_index];
621 head_r_true_swapTemp2[0] = head_r_true[j * inc_index];
622 head_r_true_swapTemp2[1] = head_r_true[1 + j * inc_index];
623 tail_r_true_swapTemp2[0] = tail_r_true[j * inc_index];
624 tail_r_true_swapTemp2[1] = tail_r_true[1 + j * inc_index];
625 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2[0];
626 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2[0];
627 head_r_true[1 + (n - 1 - j) * inc_index] = head_r_true_swapTemp2[1];
628 tail_r_true[1 + (n - 1 - j) * inc_index] = tail_r_true_swapTemp2[1];
629 head_r_true[j * inc_index] = head_r_true_swapTemp1[0];
630 tail_r_true[j * inc_index] = tail_r_true_swapTemp1[0];
631 head_r_true[1 + j * inc_index] = head_r_true_swapTemp1[1];
632 tail_r_true[1 + j * inc_index] = tail_r_true_swapTemp1[1];
633 }
634 }
635
636 blas_free(temp);
637 }
638
BLAS_ztpmv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,void * alpha,int alpha_flag,void * tp,void * x,int * seed,double * head_r_true,double * tail_r_true)639 void BLAS_ztpmv_testgen(int norm, enum blas_order_type order,
640 enum blas_uplo_type uplo, enum blas_trans_type trans,
641 enum blas_diag_type diag, int n, void *alpha,
642 int alpha_flag, void *tp, void *x, int *seed,
643 double *head_r_true, double *tail_r_true)
644
645 /*
646 * Purpose
647 * =======
648 *
649 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
650 *
651 * Arguments
652 * =========
653 *
654 * norm (input) blas_norm_type
655 *
656 * order (input) blas_order_type
657 * Order of tp; row or column major
658 *
659 * uplo (input) blas_uplo_type
660 * Whether tp is upper or lower
661 *
662 * trans (input) blas_trans_type
663 * No trans, trans, conj trans
664 *
665 * diag (input) blas_diag_type
666 * non unit, unit
667 *
668 * n (input) int
669 * Dimension of tp and the length of vector x
670 *
671 * alpha (input/output) void*
672 * If alpha_flag = 1, alpha is input.
673 * If alpha_flag = 0, alpha is output.
674 *
675 * alpha_flag (input) int
676 * = 0 : alpha is free, and is output.
677 * = 1 : alpha is fixed on input.
678 *
679 * tp (output) void*
680 *
681 * x (input/output) void*
682 *
683 * seed (input/output) int
684 *
685 * head_r_true (output) double*
686 * The leading part of the truth in double-double.
687 *
688 * tail_r_true (output) double*
689 * The trailing part of the truth in double-double.
690 *
691 */
692 {
693 int n_fix2 = 0;
694 int n_mix = 0;
695 int i;
696 int j;
697 int length;
698 double *temp;
699 double beta_zero_fake[2];
700 int beta_flag = 1;
701 double r_zero_fake[2];
702 double one_this[2];
703 double tp_swapTemp1[2];
704 double tp_swapTemp2[2];
705 double x_swapTemp1[2];
706 double x_swapTemp2[2];
707 double head_r_true_swapTemp1[2], tail_r_true_swapTemp1[2];
708 double head_r_true_swapTemp2[2], tail_r_true_swapTemp2[2];
709
710 double *x_i = (double *) x;
711 double *alpha_i = (double *) alpha;
712 int inctp = 1;
713 int inc_index = 1;
714
715 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
716 r_zero_fake[0] = r_zero_fake[1] = 0.0;
717 one_this[0] = 1.0;
718 one_this[1] = 0.0;
719
720 inctp *= 2;
721 inc_index *= 2;
722
723 temp = (double *) blas_malloc(n * 2 * inc_index * sizeof(double) * 2);
724 if (n * 2 * inc_index > 0 && temp == NULL) {
725 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
726 }
727 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
728 temp[i] = 0.0;
729 temp[i + 1] = 0.0;
730 }
731 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
732 ((double *) tp)[i] = 0.0;
733 }
734
735 /* calling dot_testgen n time. in each iteration, one row of tp, and one
736 element of x are produced. */
737
738 if (alpha_flag == 0 && diag == blas_unit_diag) {
739 alpha_i[0] = xrand(seed);
740 alpha_i[1] = xrand(seed);
741 alpha_flag = 1;
742 }
743
744 for (i = 0; i < n; i++) {
745
746 if (i == 0) {
747 n_fix2 = 0;
748 n_mix = 0;
749 } else {
750 n_mix = i;
751
752 /* from now on, fix alpha */
753 alpha_flag = 1;
754 }
755
756 for (j = 0; j < n * inc_index; j += inc_index) {
757 temp[j] = 0.0;
758 temp[j + 1] = 0.0;
759 }
760 length = i + 1;
761
762 if (diag == blas_unit_diag) {
763 length = i;
764
765
766 BLAS_zdot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
767 alpha, alpha_flag,
768 alpha, beta_flag, x, temp, seed, r_zero_fake,
769 &((double *) head_r_true)[i * inc_index],
770 &((double *) tail_r_true)[i * inc_index]);
771 } else {
772 BLAS_zdot_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
773 alpha, alpha_flag,
774 beta_zero_fake, beta_flag, x, temp, seed, r_zero_fake,
775 &((double *) head_r_true)[i * inc_index],
776 &((double *) tail_r_true)[i * inc_index]);
777 }
778
779 /* one type is z */
780 if (diag == blas_unit_diag) {
781 temp[length * inctp] = one_this[0];
782 temp[length * inctp + 1] = one_this[1];
783 x_i[length * inc_index] = r_zero_fake[0];
784 x_i[length * inc_index + 1] = r_zero_fake[1];
785 }
786
787
788 /* copy temp to tp */
789 if (((order == blas_rowmajor) && (trans == blas_no_trans)
790 && (uplo == blas_upper))
791 || ((order == blas_colmajor) && (trans != blas_no_trans)
792 && (uplo == blas_lower))
793 || ((order == blas_rowmajor) && (trans != blas_no_trans)
794 && (uplo == blas_lower))
795 || ((order == blas_colmajor) && (trans == blas_no_trans)
796 && (uplo == blas_upper))) {
797 for (j = 0; j < n / 2; j++) {
798 tp_swapTemp1[0] = temp[(n - 1 - j) * inctp];
799 tp_swapTemp1[1] = temp[(n - 1 - j) * inctp + 1];
800 tp_swapTemp2[0] = temp[j * inctp];
801 tp_swapTemp2[1] = temp[j * inctp + 1];
802 temp[(n - 1 - j) * inctp] = tp_swapTemp2[0];
803 temp[(n - 1 - j) * inctp + 1] = tp_swapTemp2[1];
804 temp[j * inctp] = tp_swapTemp1[0];
805 temp[j * inctp + 1] = tp_swapTemp1[1];
806 }
807
808 ztpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
809 } else
810 if (((order == blas_rowmajor) && (trans == blas_no_trans)
811 && (uplo == blas_lower))
812 || ((order == blas_colmajor) && (trans != blas_no_trans)
813 && (uplo == blas_upper))
814 || ((order == blas_rowmajor) && (trans != blas_no_trans)
815 && (uplo == blas_upper))
816 || ((order == blas_colmajor) && (trans == blas_no_trans)
817 && (uplo == blas_lower))) {
818 ztpmv_commit_row(order, uplo, trans, n, tp, temp, i);
819 }
820
821 }
822
823 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
824 if (((order == blas_rowmajor) && (trans == blas_no_trans)
825 && (uplo == blas_upper))
826 || ((order == blas_colmajor) && (trans != blas_no_trans)
827 && (uplo == blas_lower))
828 || ((order == blas_rowmajor) && (trans != blas_no_trans)
829 && (uplo == blas_lower))
830 || ((order == blas_colmajor) && (trans == blas_no_trans)
831 && (uplo == blas_upper))) {
832 for (j = 0; j < n / 2; j++) {
833 x_swapTemp1[0] = x_i[(n - 1 - j) * inc_index];
834 x_swapTemp1[1] = x_i[(n - 1 - j) * inc_index + 1];
835 x_swapTemp2[0] = x_i[j * inc_index];
836 x_swapTemp2[1] = x_i[j * inc_index + 1];
837 x_i[(n - 1 - j) * inc_index] = x_swapTemp2[0];
838 x_i[(n - 1 - j) * inc_index + 1] = x_swapTemp2[1];
839 x_i[j * inc_index] = x_swapTemp1[0];
840 x_i[j * inc_index + 1] = x_swapTemp1[1];
841
842 head_r_true_swapTemp1[0] = head_r_true[(n - 1 - j) * inc_index];
843 head_r_true_swapTemp1[1] = head_r_true[1 + (n - 1 - j) * inc_index];
844 tail_r_true_swapTemp1[0] = tail_r_true[(n - 1 - j) * inc_index];
845 tail_r_true_swapTemp1[1] = tail_r_true[1 + (n - 1 - j) * inc_index];
846 head_r_true_swapTemp2[0] = head_r_true[j * inc_index];
847 head_r_true_swapTemp2[1] = head_r_true[1 + j * inc_index];
848 tail_r_true_swapTemp2[0] = tail_r_true[j * inc_index];
849 tail_r_true_swapTemp2[1] = tail_r_true[1 + j * inc_index];
850 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2[0];
851 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2[0];
852 head_r_true[1 + (n - 1 - j) * inc_index] = head_r_true_swapTemp2[1];
853 tail_r_true[1 + (n - 1 - j) * inc_index] = tail_r_true_swapTemp2[1];
854 head_r_true[j * inc_index] = head_r_true_swapTemp1[0];
855 tail_r_true[j * inc_index] = tail_r_true_swapTemp1[0];
856 head_r_true[1 + j * inc_index] = head_r_true_swapTemp1[1];
857 tail_r_true[1 + j * inc_index] = tail_r_true_swapTemp1[1];
858 }
859 }
860
861 blas_free(temp);
862 }
863
BLAS_dtpmv_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,double * alpha,int alpha_flag,float * tp,double * x,int * seed,double * head_r_true,double * tail_r_true)864 void BLAS_dtpmv_s_testgen(int norm, enum blas_order_type order,
865 enum blas_uplo_type uplo,
866 enum blas_trans_type trans,
867 enum blas_diag_type diag, int n, double *alpha,
868 int alpha_flag, float *tp, double *x, int *seed,
869 double *head_r_true, double *tail_r_true)
870
871 /*
872 * Purpose
873 * =======
874 *
875 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
876 *
877 * Arguments
878 * =========
879 *
880 * norm (input) blas_norm_type
881 *
882 * order (input) blas_order_type
883 * Order of tp; row or column major
884 *
885 * uplo (input) blas_uplo_type
886 * Whether tp is upper or lower
887 *
888 * trans (input) blas_trans_type
889 * No trans, trans, conj trans
890 *
891 * diag (input) blas_diag_type
892 * non unit, unit
893 *
894 * n (input) int
895 * Dimension of tp and the length of vector x
896 *
897 * alpha (input/output) double*
898 * If alpha_flag = 1, alpha is input.
899 * If alpha_flag = 0, alpha is output.
900 *
901 * alpha_flag (input) int
902 * = 0 : alpha is free, and is output.
903 * = 1 : alpha is fixed on input.
904 *
905 * tp (output) float*
906 *
907 * x (input/output) double*
908 *
909 * seed (input/output) int
910 *
911 * head_r_true (output) double*
912 * The leading part of the truth in double-double.
913 *
914 * tail_r_true (output) double*
915 * The trailing part of the truth in double-double.
916 *
917 */
918 {
919 int n_fix2 = 0;
920 int n_mix = 0;
921 int i;
922 int j;
923 int length;
924 float *temp;
925 double beta_zero_fake;
926 int beta_flag = 1;
927 double r_zero_fake;
928 float one_this;
929 float tp_swapTemp1;
930 float tp_swapTemp2;
931 double x_swapTemp1;
932 double x_swapTemp2;
933 double head_r_true_swapTemp1, tail_r_true_swapTemp1;
934 double head_r_true_swapTemp2, tail_r_true_swapTemp2;
935
936 double *x_i = x;
937 double *alpha_i = alpha;
938 int inctp = 1;
939 int inc_index = 1;
940
941 beta_zero_fake = 0.0;
942 r_zero_fake = 0.0;
943 one_this = 1.0;
944
945
946
947
948 temp = (float *) blas_malloc(n * 2 * inc_index * sizeof(float));
949 if (n * 2 * inc_index > 0 && temp == NULL) {
950 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
951 }
952 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
953 temp[i] = 0.0;
954 }
955 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
956 tp[i] = 0.0;
957 }
958
959 /* calling dot_testgen n time. in each iteration, one row of tp, and one
960 element of x are produced. */
961
962 if (alpha_flag == 0 && diag == blas_unit_diag) {
963 *alpha_i = xrand(seed);
964 alpha_flag = 1;
965 }
966
967 for (i = 0; i < n; i++) {
968
969 if (i == 0) {
970 n_fix2 = 0;
971 n_mix = 0;
972 } else {
973 n_mix = i;
974
975 /* from now on, fix alpha */
976 alpha_flag = 1;
977 }
978
979 for (j = 0; j < n * inc_index; j += inc_index) {
980 temp[j] = 0.0;
981 }
982 length = i + 1;
983
984 if (diag == blas_unit_diag) {
985 length = i;
986
987
988 BLAS_ddot_d_s_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
989 alpha, alpha_flag,
990 alpha, beta_flag, x, temp, seed, &r_zero_fake,
991 &((double *) head_r_true)[i * inc_index],
992 &((double *) tail_r_true)[i * inc_index]);
993 } else {
994 BLAS_ddot_d_s_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
995 alpha, alpha_flag,
996 &beta_zero_fake, beta_flag, x, temp, seed,
997 &r_zero_fake,
998 &((double *) head_r_true)[i * inc_index],
999 &((double *) tail_r_true)[i * inc_index]);
1000 }
1001
1002 /* one type is d */
1003 if (diag == blas_unit_diag) {
1004 temp[length * inctp] = one_this;
1005 x_i[length * inc_index] = r_zero_fake;
1006 }
1007
1008
1009 /* copy temp to tp */
1010 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1011 && (uplo == blas_upper))
1012 || ((order == blas_colmajor) && (trans != blas_no_trans)
1013 && (uplo == blas_lower))
1014 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1015 && (uplo == blas_lower))
1016 || ((order == blas_colmajor) && (trans == blas_no_trans)
1017 && (uplo == blas_upper))) {
1018 for (j = 0; j < n / 2; j++) {
1019 tp_swapTemp1 = temp[(n - 1 - j) * inctp];
1020 tp_swapTemp2 = temp[j * inctp];
1021 temp[(n - 1 - j) * inctp] = tp_swapTemp2;
1022 temp[j * inctp] = tp_swapTemp1;
1023 }
1024
1025 stpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
1026 } else
1027 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1028 && (uplo == blas_lower))
1029 || ((order == blas_colmajor) && (trans != blas_no_trans)
1030 && (uplo == blas_upper))
1031 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1032 && (uplo == blas_upper))
1033 || ((order == blas_colmajor) && (trans == blas_no_trans)
1034 && (uplo == blas_lower))) {
1035 stpmv_commit_row(order, uplo, trans, n, tp, temp, i);
1036 }
1037
1038 }
1039
1040 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
1041 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1042 && (uplo == blas_upper))
1043 || ((order == blas_colmajor) && (trans != blas_no_trans)
1044 && (uplo == blas_lower))
1045 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1046 && (uplo == blas_lower))
1047 || ((order == blas_colmajor) && (trans == blas_no_trans)
1048 && (uplo == blas_upper))) {
1049 for (j = 0; j < n / 2; j++) {
1050 x_swapTemp1 = x_i[(n - 1 - j) * inc_index];
1051 x_swapTemp2 = x_i[j * inc_index];
1052 x_i[(n - 1 - j) * inc_index] = x_swapTemp2;
1053 x_i[j * inc_index] = x_swapTemp1;
1054
1055 head_r_true_swapTemp1 = head_r_true[(n - 1 - j) * inc_index];
1056 tail_r_true_swapTemp1 = tail_r_true[(n - 1 - j) * inc_index];
1057 head_r_true_swapTemp2 = head_r_true[j * inc_index];
1058 tail_r_true_swapTemp2 = tail_r_true[j * inc_index];
1059 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2;
1060 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2;
1061 head_r_true[j * inc_index] = head_r_true_swapTemp1;
1062 tail_r_true[j * inc_index] = tail_r_true_swapTemp1;
1063 }
1064 }
1065
1066 blas_free(temp);
1067 }
1068
BLAS_ztpmv_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,void * alpha,int alpha_flag,void * tp,void * x,int * seed,double * head_r_true,double * tail_r_true)1069 void BLAS_ztpmv_c_testgen(int norm, enum blas_order_type order,
1070 enum blas_uplo_type uplo,
1071 enum blas_trans_type trans,
1072 enum blas_diag_type diag, int n, void *alpha,
1073 int alpha_flag, void *tp, void *x, int *seed,
1074 double *head_r_true, double *tail_r_true)
1075
1076 /*
1077 * Purpose
1078 * =======
1079 *
1080 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
1081 *
1082 * Arguments
1083 * =========
1084 *
1085 * norm (input) blas_norm_type
1086 *
1087 * order (input) blas_order_type
1088 * Order of tp; row or column major
1089 *
1090 * uplo (input) blas_uplo_type
1091 * Whether tp is upper or lower
1092 *
1093 * trans (input) blas_trans_type
1094 * No trans, trans, conj trans
1095 *
1096 * diag (input) blas_diag_type
1097 * non unit, unit
1098 *
1099 * n (input) int
1100 * Dimension of tp and the length of vector x
1101 *
1102 * alpha (input/output) void*
1103 * If alpha_flag = 1, alpha is input.
1104 * If alpha_flag = 0, alpha is output.
1105 *
1106 * alpha_flag (input) int
1107 * = 0 : alpha is free, and is output.
1108 * = 1 : alpha is fixed on input.
1109 *
1110 * tp (output) void*
1111 *
1112 * x (input/output) void*
1113 *
1114 * seed (input/output) int
1115 *
1116 * head_r_true (output) double*
1117 * The leading part of the truth in double-double.
1118 *
1119 * tail_r_true (output) double*
1120 * The trailing part of the truth in double-double.
1121 *
1122 */
1123 {
1124 int n_fix2 = 0;
1125 int n_mix = 0;
1126 int i;
1127 int j;
1128 int length;
1129 float *temp;
1130 double beta_zero_fake[2];
1131 int beta_flag = 1;
1132 double r_zero_fake[2];
1133 float one_this[2];
1134 float tp_swapTemp1[2];
1135 float tp_swapTemp2[2];
1136 double x_swapTemp1[2];
1137 double x_swapTemp2[2];
1138 double head_r_true_swapTemp1[2], tail_r_true_swapTemp1[2];
1139 double head_r_true_swapTemp2[2], tail_r_true_swapTemp2[2];
1140
1141 double *x_i = (double *) x;
1142 double *alpha_i = (double *) alpha;
1143 int inctp = 1;
1144 int inc_index = 1;
1145
1146 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1147 r_zero_fake[0] = r_zero_fake[1] = 0.0;
1148 one_this[0] = 1.0;
1149 one_this[1] = 0.0;
1150
1151 inctp *= 2;
1152 inc_index *= 2;
1153
1154 temp = (float *) blas_malloc(n * 2 * inc_index * sizeof(float) * 2);
1155 if (n * 2 * inc_index > 0 && temp == NULL) {
1156 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1157 }
1158 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
1159 temp[i] = 0.0;
1160 temp[i + 1] = 0.0;
1161 }
1162 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
1163 ((float *) tp)[i] = 0.0;
1164 }
1165
1166 /* calling dot_testgen n time. in each iteration, one row of tp, and one
1167 element of x are produced. */
1168
1169 if (alpha_flag == 0 && diag == blas_unit_diag) {
1170 alpha_i[0] = xrand(seed);
1171 alpha_i[1] = xrand(seed);
1172 alpha_flag = 1;
1173 }
1174
1175 for (i = 0; i < n; i++) {
1176
1177 if (i == 0) {
1178 n_fix2 = 0;
1179 n_mix = 0;
1180 } else {
1181 n_mix = i;
1182
1183 /* from now on, fix alpha */
1184 alpha_flag = 1;
1185 }
1186
1187 for (j = 0; j < n * inc_index; j += inc_index) {
1188 temp[j] = 0.0;
1189 temp[j + 1] = 0.0;
1190 }
1191 length = i + 1;
1192
1193 if (diag == blas_unit_diag) {
1194 length = i;
1195
1196
1197 BLAS_zdot_z_c_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
1198 alpha, alpha_flag,
1199 alpha, beta_flag, x, temp, seed, r_zero_fake,
1200 &((double *) head_r_true)[i * inc_index],
1201 &((double *) tail_r_true)[i * inc_index]);
1202 } else {
1203 BLAS_zdot_z_c_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
1204 alpha, alpha_flag,
1205 beta_zero_fake, beta_flag, x, temp, seed,
1206 r_zero_fake,
1207 &((double *) head_r_true)[i * inc_index],
1208 &((double *) tail_r_true)[i * inc_index]);
1209 }
1210
1211 /* one type is z */
1212 if (diag == blas_unit_diag) {
1213 temp[length * inctp] = one_this[0];
1214 temp[length * inctp + 1] = one_this[1];
1215 x_i[length * inc_index] = r_zero_fake[0];
1216 x_i[length * inc_index + 1] = r_zero_fake[1];
1217 }
1218
1219
1220 /* copy temp to tp */
1221 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1222 && (uplo == blas_upper))
1223 || ((order == blas_colmajor) && (trans != blas_no_trans)
1224 && (uplo == blas_lower))
1225 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1226 && (uplo == blas_lower))
1227 || ((order == blas_colmajor) && (trans == blas_no_trans)
1228 && (uplo == blas_upper))) {
1229 for (j = 0; j < n / 2; j++) {
1230 tp_swapTemp1[0] = temp[(n - 1 - j) * inctp];
1231 tp_swapTemp1[1] = temp[(n - 1 - j) * inctp + 1];
1232 tp_swapTemp2[0] = temp[j * inctp];
1233 tp_swapTemp2[1] = temp[j * inctp + 1];
1234 temp[(n - 1 - j) * inctp] = tp_swapTemp2[0];
1235 temp[(n - 1 - j) * inctp + 1] = tp_swapTemp2[1];
1236 temp[j * inctp] = tp_swapTemp1[0];
1237 temp[j * inctp + 1] = tp_swapTemp1[1];
1238 }
1239
1240 ctpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
1241 } else
1242 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1243 && (uplo == blas_lower))
1244 || ((order == blas_colmajor) && (trans != blas_no_trans)
1245 && (uplo == blas_upper))
1246 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1247 && (uplo == blas_upper))
1248 || ((order == blas_colmajor) && (trans == blas_no_trans)
1249 && (uplo == blas_lower))) {
1250 ctpmv_commit_row(order, uplo, trans, n, tp, temp, i);
1251 }
1252
1253 }
1254
1255 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
1256 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1257 && (uplo == blas_upper))
1258 || ((order == blas_colmajor) && (trans != blas_no_trans)
1259 && (uplo == blas_lower))
1260 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1261 && (uplo == blas_lower))
1262 || ((order == blas_colmajor) && (trans == blas_no_trans)
1263 && (uplo == blas_upper))) {
1264 for (j = 0; j < n / 2; j++) {
1265 x_swapTemp1[0] = x_i[(n - 1 - j) * inc_index];
1266 x_swapTemp1[1] = x_i[(n - 1 - j) * inc_index + 1];
1267 x_swapTemp2[0] = x_i[j * inc_index];
1268 x_swapTemp2[1] = x_i[j * inc_index + 1];
1269 x_i[(n - 1 - j) * inc_index] = x_swapTemp2[0];
1270 x_i[(n - 1 - j) * inc_index + 1] = x_swapTemp2[1];
1271 x_i[j * inc_index] = x_swapTemp1[0];
1272 x_i[j * inc_index + 1] = x_swapTemp1[1];
1273
1274 head_r_true_swapTemp1[0] = head_r_true[(n - 1 - j) * inc_index];
1275 head_r_true_swapTemp1[1] = head_r_true[1 + (n - 1 - j) * inc_index];
1276 tail_r_true_swapTemp1[0] = tail_r_true[(n - 1 - j) * inc_index];
1277 tail_r_true_swapTemp1[1] = tail_r_true[1 + (n - 1 - j) * inc_index];
1278 head_r_true_swapTemp2[0] = head_r_true[j * inc_index];
1279 head_r_true_swapTemp2[1] = head_r_true[1 + j * inc_index];
1280 tail_r_true_swapTemp2[0] = tail_r_true[j * inc_index];
1281 tail_r_true_swapTemp2[1] = tail_r_true[1 + j * inc_index];
1282 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2[0];
1283 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2[0];
1284 head_r_true[1 + (n - 1 - j) * inc_index] = head_r_true_swapTemp2[1];
1285 tail_r_true[1 + (n - 1 - j) * inc_index] = tail_r_true_swapTemp2[1];
1286 head_r_true[j * inc_index] = head_r_true_swapTemp1[0];
1287 tail_r_true[j * inc_index] = tail_r_true_swapTemp1[0];
1288 head_r_true[1 + j * inc_index] = head_r_true_swapTemp1[1];
1289 tail_r_true[1 + j * inc_index] = tail_r_true_swapTemp1[1];
1290 }
1291 }
1292
1293 blas_free(temp);
1294 }
1295
BLAS_ctpmv_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,void * alpha,int alpha_flag,float * tp,void * x,int * seed,double * head_r_true,double * tail_r_true)1296 void BLAS_ctpmv_s_testgen(int norm, enum blas_order_type order,
1297 enum blas_uplo_type uplo,
1298 enum blas_trans_type trans,
1299 enum blas_diag_type diag, int n, void *alpha,
1300 int alpha_flag, float *tp, void *x, int *seed,
1301 double *head_r_true, double *tail_r_true)
1302
1303 /*
1304 * Purpose
1305 * =======
1306 *
1307 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
1308 *
1309 * Arguments
1310 * =========
1311 *
1312 * norm (input) blas_norm_type
1313 *
1314 * order (input) blas_order_type
1315 * Order of tp; row or column major
1316 *
1317 * uplo (input) blas_uplo_type
1318 * Whether tp is upper or lower
1319 *
1320 * trans (input) blas_trans_type
1321 * No trans, trans, conj trans
1322 *
1323 * diag (input) blas_diag_type
1324 * non unit, unit
1325 *
1326 * n (input) int
1327 * Dimension of tp and the length of vector x
1328 *
1329 * alpha (input/output) void*
1330 * If alpha_flag = 1, alpha is input.
1331 * If alpha_flag = 0, alpha is output.
1332 *
1333 * alpha_flag (input) int
1334 * = 0 : alpha is free, and is output.
1335 * = 1 : alpha is fixed on input.
1336 *
1337 * tp (output) float*
1338 *
1339 * x (input/output) void*
1340 *
1341 * seed (input/output) int
1342 *
1343 * head_r_true (output) double*
1344 * The leading part of the truth in double-double.
1345 *
1346 * tail_r_true (output) double*
1347 * The trailing part of the truth in double-double.
1348 *
1349 */
1350 {
1351 int n_fix2 = 0;
1352 int n_mix = 0;
1353 int i;
1354 int j;
1355 int length;
1356 float *temp;
1357 float beta_zero_fake[2];
1358 int beta_flag = 1;
1359 float r_zero_fake[2];
1360 float one_this;
1361 float tp_swapTemp1;
1362 float tp_swapTemp2;
1363 float x_swapTemp1[2];
1364 float x_swapTemp2[2];
1365 double head_r_true_swapTemp1[2], tail_r_true_swapTemp1[2];
1366 double head_r_true_swapTemp2[2], tail_r_true_swapTemp2[2];
1367
1368 float *x_i = (float *) x;
1369 float *alpha_i = (float *) alpha;
1370 int inctp = 1;
1371 int inc_index = 1;
1372
1373 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1374 r_zero_fake[0] = r_zero_fake[1] = 0.0;
1375 one_this = 1.0;
1376
1377
1378 inc_index *= 2;
1379
1380 temp = (float *) blas_malloc(n * 2 * inc_index * sizeof(float));
1381 if (n * 2 * inc_index > 0 && temp == NULL) {
1382 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1383 }
1384 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
1385 temp[i] = 0.0;
1386 }
1387 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
1388 tp[i] = 0.0;
1389 }
1390
1391 /* calling dot_testgen n time. in each iteration, one row of tp, and one
1392 element of x are produced. */
1393
1394 if (alpha_flag == 0 && diag == blas_unit_diag) {
1395 alpha_i[0] = xrand(seed);
1396 alpha_i[1] = xrand(seed);
1397 alpha_flag = 1;
1398 }
1399
1400 for (i = 0; i < n; i++) {
1401
1402 if (i == 0) {
1403 n_fix2 = 0;
1404 n_mix = 0;
1405 } else {
1406 n_mix = i;
1407
1408 /* from now on, fix alpha */
1409 alpha_flag = 1;
1410 }
1411
1412 for (j = 0; j < n * inc_index; j += inc_index) {
1413 temp[j] = 0.0;
1414 }
1415 length = i + 1;
1416
1417 if (diag == blas_unit_diag) {
1418 length = i;
1419
1420
1421 BLAS_cdot_c_s_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
1422 alpha, alpha_flag,
1423 alpha, beta_flag, x, temp, seed, r_zero_fake,
1424 &((double *) head_r_true)[i * inc_index],
1425 &((double *) tail_r_true)[i * inc_index]);
1426 } else {
1427 BLAS_cdot_c_s_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
1428 alpha, alpha_flag,
1429 beta_zero_fake, beta_flag, x, temp, seed,
1430 r_zero_fake,
1431 &((double *) head_r_true)[i * inc_index],
1432 &((double *) tail_r_true)[i * inc_index]);
1433 }
1434
1435 /* one type is c */
1436 if (diag == blas_unit_diag) {
1437 temp[length * inctp] = one_this;
1438 x_i[length * inc_index] = r_zero_fake[0];
1439 x_i[length * inc_index + 1] = r_zero_fake[1];
1440 }
1441
1442
1443 /* copy temp to tp */
1444 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1445 && (uplo == blas_upper))
1446 || ((order == blas_colmajor) && (trans != blas_no_trans)
1447 && (uplo == blas_lower))
1448 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1449 && (uplo == blas_lower))
1450 || ((order == blas_colmajor) && (trans == blas_no_trans)
1451 && (uplo == blas_upper))) {
1452 for (j = 0; j < n / 2; j++) {
1453 tp_swapTemp1 = temp[(n - 1 - j) * inctp];
1454 tp_swapTemp2 = temp[j * inctp];
1455 temp[(n - 1 - j) * inctp] = tp_swapTemp2;
1456 temp[j * inctp] = tp_swapTemp1;
1457 }
1458
1459 stpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
1460 } else
1461 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1462 && (uplo == blas_lower))
1463 || ((order == blas_colmajor) && (trans != blas_no_trans)
1464 && (uplo == blas_upper))
1465 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1466 && (uplo == blas_upper))
1467 || ((order == blas_colmajor) && (trans == blas_no_trans)
1468 && (uplo == blas_lower))) {
1469 stpmv_commit_row(order, uplo, trans, n, tp, temp, i);
1470 }
1471
1472 }
1473
1474 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
1475 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1476 && (uplo == blas_upper))
1477 || ((order == blas_colmajor) && (trans != blas_no_trans)
1478 && (uplo == blas_lower))
1479 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1480 && (uplo == blas_lower))
1481 || ((order == blas_colmajor) && (trans == blas_no_trans)
1482 && (uplo == blas_upper))) {
1483 for (j = 0; j < n / 2; j++) {
1484 x_swapTemp1[0] = x_i[(n - 1 - j) * inc_index];
1485 x_swapTemp1[1] = x_i[(n - 1 - j) * inc_index + 1];
1486 x_swapTemp2[0] = x_i[j * inc_index];
1487 x_swapTemp2[1] = x_i[j * inc_index + 1];
1488 x_i[(n - 1 - j) * inc_index] = x_swapTemp2[0];
1489 x_i[(n - 1 - j) * inc_index + 1] = x_swapTemp2[1];
1490 x_i[j * inc_index] = x_swapTemp1[0];
1491 x_i[j * inc_index + 1] = x_swapTemp1[1];
1492
1493 head_r_true_swapTemp1[0] = head_r_true[(n - 1 - j) * inc_index];
1494 head_r_true_swapTemp1[1] = head_r_true[1 + (n - 1 - j) * inc_index];
1495 tail_r_true_swapTemp1[0] = tail_r_true[(n - 1 - j) * inc_index];
1496 tail_r_true_swapTemp1[1] = tail_r_true[1 + (n - 1 - j) * inc_index];
1497 head_r_true_swapTemp2[0] = head_r_true[j * inc_index];
1498 head_r_true_swapTemp2[1] = head_r_true[1 + j * inc_index];
1499 tail_r_true_swapTemp2[0] = tail_r_true[j * inc_index];
1500 tail_r_true_swapTemp2[1] = tail_r_true[1 + j * inc_index];
1501 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2[0];
1502 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2[0];
1503 head_r_true[1 + (n - 1 - j) * inc_index] = head_r_true_swapTemp2[1];
1504 tail_r_true[1 + (n - 1 - j) * inc_index] = tail_r_true_swapTemp2[1];
1505 head_r_true[j * inc_index] = head_r_true_swapTemp1[0];
1506 tail_r_true[j * inc_index] = tail_r_true_swapTemp1[0];
1507 head_r_true[1 + j * inc_index] = head_r_true_swapTemp1[1];
1508 tail_r_true[1 + j * inc_index] = tail_r_true_swapTemp1[1];
1509 }
1510 }
1511
1512 blas_free(temp);
1513 }
1514
BLAS_ztpmv_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,void * alpha,int alpha_flag,double * tp,void * x,int * seed,double * head_r_true,double * tail_r_true)1515 void BLAS_ztpmv_d_testgen(int norm, enum blas_order_type order,
1516 enum blas_uplo_type uplo,
1517 enum blas_trans_type trans,
1518 enum blas_diag_type diag, int n, void *alpha,
1519 int alpha_flag, double *tp, void *x, int *seed,
1520 double *head_r_true, double *tail_r_true)
1521
1522 /*
1523 * Purpose
1524 * =======
1525 *
1526 * Generates alpha, tp and x, where tp is a triangular matrix; and computes r_true.
1527 *
1528 * Arguments
1529 * =========
1530 *
1531 * norm (input) blas_norm_type
1532 *
1533 * order (input) blas_order_type
1534 * Order of tp; row or column major
1535 *
1536 * uplo (input) blas_uplo_type
1537 * Whether tp is upper or lower
1538 *
1539 * trans (input) blas_trans_type
1540 * No trans, trans, conj trans
1541 *
1542 * diag (input) blas_diag_type
1543 * non unit, unit
1544 *
1545 * n (input) int
1546 * Dimension of tp and the length of vector x
1547 *
1548 * alpha (input/output) void*
1549 * If alpha_flag = 1, alpha is input.
1550 * If alpha_flag = 0, alpha is output.
1551 *
1552 * alpha_flag (input) int
1553 * = 0 : alpha is free, and is output.
1554 * = 1 : alpha is fixed on input.
1555 *
1556 * tp (output) double*
1557 *
1558 * x (input/output) void*
1559 *
1560 * seed (input/output) int
1561 *
1562 * head_r_true (output) double*
1563 * The leading part of the truth in double-double.
1564 *
1565 * tail_r_true (output) double*
1566 * The trailing part of the truth in double-double.
1567 *
1568 */
1569 {
1570 int n_fix2 = 0;
1571 int n_mix = 0;
1572 int i;
1573 int j;
1574 int length;
1575 double *temp;
1576 double beta_zero_fake[2];
1577 int beta_flag = 1;
1578 double r_zero_fake[2];
1579 double one_this;
1580 double tp_swapTemp1;
1581 double tp_swapTemp2;
1582 double x_swapTemp1[2];
1583 double x_swapTemp2[2];
1584 double head_r_true_swapTemp1[2], tail_r_true_swapTemp1[2];
1585 double head_r_true_swapTemp2[2], tail_r_true_swapTemp2[2];
1586
1587 double *x_i = (double *) x;
1588 double *alpha_i = (double *) alpha;
1589 int inctp = 1;
1590 int inc_index = 1;
1591
1592 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1593 r_zero_fake[0] = r_zero_fake[1] = 0.0;
1594 one_this = 1.0;
1595
1596
1597 inc_index *= 2;
1598
1599 temp = (double *) blas_malloc(n * 2 * inc_index * sizeof(double));
1600 if (n * 2 * inc_index > 0 && temp == NULL) {
1601 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1602 }
1603 for (i = 0; i < n * 2 * inc_index; i += inc_index) {
1604 temp[i] = 0.0;
1605 }
1606 for (i = 0; i < (n - 1 + n - 1 + 1) * n * 2 * inctp; i++) {
1607 tp[i] = 0.0;
1608 }
1609
1610 /* calling dot_testgen n time. in each iteration, one row of tp, and one
1611 element of x are produced. */
1612
1613 if (alpha_flag == 0 && diag == blas_unit_diag) {
1614 alpha_i[0] = xrand(seed);
1615 alpha_i[1] = xrand(seed);
1616 alpha_flag = 1;
1617 }
1618
1619 for (i = 0; i < n; i++) {
1620
1621 if (i == 0) {
1622 n_fix2 = 0;
1623 n_mix = 0;
1624 } else {
1625 n_mix = i;
1626
1627 /* from now on, fix alpha */
1628 alpha_flag = 1;
1629 }
1630
1631 for (j = 0; j < n * inc_index; j += inc_index) {
1632 temp[j] = 0.0;
1633 }
1634 length = i + 1;
1635
1636 if (diag == blas_unit_diag) {
1637 length = i;
1638
1639
1640 BLAS_zdot_z_d_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
1641 alpha, alpha_flag,
1642 alpha, beta_flag, x, temp, seed, r_zero_fake,
1643 &((double *) head_r_true)[i * inc_index],
1644 &((double *) tail_r_true)[i * inc_index]);
1645 } else {
1646 BLAS_zdot_z_d_testgen(length, n_fix2, n_mix, norm, blas_no_conj,
1647 alpha, alpha_flag,
1648 beta_zero_fake, beta_flag, x, temp, seed,
1649 r_zero_fake,
1650 &((double *) head_r_true)[i * inc_index],
1651 &((double *) tail_r_true)[i * inc_index]);
1652 }
1653
1654 /* one type is z */
1655 if (diag == blas_unit_diag) {
1656 temp[length * inctp] = one_this;
1657 x_i[length * inc_index] = r_zero_fake[0];
1658 x_i[length * inc_index + 1] = r_zero_fake[1];
1659 }
1660
1661
1662 /* copy temp to tp */
1663 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1664 && (uplo == blas_upper))
1665 || ((order == blas_colmajor) && (trans != blas_no_trans)
1666 && (uplo == blas_lower))
1667 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1668 && (uplo == blas_lower))
1669 || ((order == blas_colmajor) && (trans == blas_no_trans)
1670 && (uplo == blas_upper))) {
1671 for (j = 0; j < n / 2; j++) {
1672 tp_swapTemp1 = temp[(n - 1 - j) * inctp];
1673 tp_swapTemp2 = temp[j * inctp];
1674 temp[(n - 1 - j) * inctp] = tp_swapTemp2;
1675 temp[j * inctp] = tp_swapTemp1;
1676 }
1677
1678 dtpmv_commit_row(order, uplo, trans, n, tp, temp, (n - 1 - i));
1679 } else
1680 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1681 && (uplo == blas_lower))
1682 || ((order == blas_colmajor) && (trans != blas_no_trans)
1683 && (uplo == blas_upper))
1684 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1685 && (uplo == blas_upper))
1686 || ((order == blas_colmajor) && (trans == blas_no_trans)
1687 && (uplo == blas_lower))) {
1688 dtpmv_commit_row(order, uplo, trans, n, tp, temp, i);
1689 }
1690
1691 }
1692
1693 /* The upper triangular matrices generate x backwards, so need to rearrange the elements */
1694 if (((order == blas_rowmajor) && (trans == blas_no_trans)
1695 && (uplo == blas_upper))
1696 || ((order == blas_colmajor) && (trans != blas_no_trans)
1697 && (uplo == blas_lower))
1698 || ((order == blas_rowmajor) && (trans != blas_no_trans)
1699 && (uplo == blas_lower))
1700 || ((order == blas_colmajor) && (trans == blas_no_trans)
1701 && (uplo == blas_upper))) {
1702 for (j = 0; j < n / 2; j++) {
1703 x_swapTemp1[0] = x_i[(n - 1 - j) * inc_index];
1704 x_swapTemp1[1] = x_i[(n - 1 - j) * inc_index + 1];
1705 x_swapTemp2[0] = x_i[j * inc_index];
1706 x_swapTemp2[1] = x_i[j * inc_index + 1];
1707 x_i[(n - 1 - j) * inc_index] = x_swapTemp2[0];
1708 x_i[(n - 1 - j) * inc_index + 1] = x_swapTemp2[1];
1709 x_i[j * inc_index] = x_swapTemp1[0];
1710 x_i[j * inc_index + 1] = x_swapTemp1[1];
1711
1712 head_r_true_swapTemp1[0] = head_r_true[(n - 1 - j) * inc_index];
1713 head_r_true_swapTemp1[1] = head_r_true[1 + (n - 1 - j) * inc_index];
1714 tail_r_true_swapTemp1[0] = tail_r_true[(n - 1 - j) * inc_index];
1715 tail_r_true_swapTemp1[1] = tail_r_true[1 + (n - 1 - j) * inc_index];
1716 head_r_true_swapTemp2[0] = head_r_true[j * inc_index];
1717 head_r_true_swapTemp2[1] = head_r_true[1 + j * inc_index];
1718 tail_r_true_swapTemp2[0] = tail_r_true[j * inc_index];
1719 tail_r_true_swapTemp2[1] = tail_r_true[1 + j * inc_index];
1720 head_r_true[(n - 1 - j) * inc_index] = head_r_true_swapTemp2[0];
1721 tail_r_true[(n - 1 - j) * inc_index] = tail_r_true_swapTemp2[0];
1722 head_r_true[1 + (n - 1 - j) * inc_index] = head_r_true_swapTemp2[1];
1723 tail_r_true[1 + (n - 1 - j) * inc_index] = tail_r_true_swapTemp2[1];
1724 head_r_true[j * inc_index] = head_r_true_swapTemp1[0];
1725 tail_r_true[j * inc_index] = tail_r_true_swapTemp1[0];
1726 head_r_true[1 + j * inc_index] = head_r_true_swapTemp1[1];
1727 tail_r_true[1 + j * inc_index] = tail_r_true_swapTemp1[1];
1728 }
1729 }
1730
1731 blas_free(temp);
1732 }
1733