1 #include <stdlib.h>
2 #include <math.h>
3 #include "blas_extended.h"
4 #include "blas_extended_private.h"
5 #include "blas_extended_test.h"
6
7
test_BLAS_sdot2(int n,enum blas_conj_type conj,float alpha,float beta,float rin,float rout,double r_true_l,double r_true_t,float * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)8 void test_BLAS_sdot2(int n, enum blas_conj_type conj, float alpha, float beta,
9 float rin, float rout, double r_true_l, double r_true_t,
10 float *x, int incx, float *head_y, float *tail_y,
11 int incy, double eps_int, double un_int,
12 double *test_ratio)
13
14 /* Purpose
15 * =======
16 *
17 * Computes ratio of the computed error from DOT2 over the expected
18 * error bound.
19 *
20 * Arguments
21 * =========
22 *
23 * n (input) int
24 * The length of the vectors X and Y.
25 *
26 * conj (input) enum blas_conj_type
27 *
28 * alpha (input) float
29 *
30 * beta (input) float
31 *
32 * rin (input) float
33 *
34 * rout (input) float
35 * This result was computed by some other routine, and will be
36 * tested by this routine by comparing it with the truth.
37 *
38 * r_true_l (input) double
39 * The leading part of the truth.
40 *
41 * r_true_t (input) double
42 * The trailing part of the truth.
43 *
44 * x (input) float*
45 *
46 * head_y
47 * tail_y (input) float*
48 *
49 * eps_int (input) double
50 * The internal machine precision.
51 *
52 * un_int (input) double
53 * The internal underflow threshold.
54 *
55 * test_ratio (output) float*
56 * The ratio of computed error for r over the error bound.
57 */
58 {
59 int i, ix, iy;
60 double eps_accurate, eps_out, tmp1, S, S1, S2, U;
61 double un_d, un_accurate, un_out;
62
63 /* Set the starting position */
64 ix = 0;
65 iy = 0;
66 if (incx < 0)
67 ix = -(n - 1) * incx;
68 if (incy < 0)
69 iy = -(n - 1) * incy;
70
71 /* computing S */
72 S = S1 = S2 = 0.;
73 for (i = 0; i < n; ++i) {
74 S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
75 S1 += fabs(x[ix]);
76 S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
77 ix += incx;
78 iy += incy;
79 }
80 S *= fabs(alpha);
81 S += fabs(beta * rin);
82
83 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
84 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
85 S = MAX(S, un_d);
86
87 eps_accurate = power(2, -BITS_E);
88 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
89 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
90 eps_out = power(2, -BITS_S);
91 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
92 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
93 tmp1 = fabs((rout - r_true_l) - r_true_t);
94
95 /* underflow */
96 U = 2 * fabs(alpha) * n + 3;
97 U = MAX(U, S1 + 2 * n + 1);
98 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
99
100 *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
101 + eps_out * fabs(r_true_l) + U);
102 } /* end of test_BLAS_sdot */
test_BLAS_ddot2(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,double * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)103 void test_BLAS_ddot2(int n, enum blas_conj_type conj, double alpha,
104 double beta, double rin, double rout, double r_true_l,
105 double r_true_t, double *x, int incx, double *head_y,
106 double *tail_y, int incy, double eps_int, double un_int,
107 double *test_ratio)
108
109 /* Purpose
110 * =======
111 *
112 * Computes ratio of the computed error from DOT2 over the expected
113 * error bound.
114 *
115 * Arguments
116 * =========
117 *
118 * n (input) int
119 * The length of the vectors X and Y.
120 *
121 * conj (input) enum blas_conj_type
122 *
123 * alpha (input) double
124 *
125 * beta (input) double
126 *
127 * rin (input) double
128 *
129 * rout (input) double
130 * This result was computed by some other routine, and will be
131 * tested by this routine by comparing it with the truth.
132 *
133 * r_true_l (input) double
134 * The leading part of the truth.
135 *
136 * r_true_t (input) double
137 * The trailing part of the truth.
138 *
139 * x (input) double*
140 *
141 * head_y
142 * tail_y (input) double*
143 *
144 * eps_int (input) double
145 * The internal machine precision.
146 *
147 * un_int (input) double
148 * The internal underflow threshold.
149 *
150 * test_ratio (output) float*
151 * The ratio of computed error for r over the error bound.
152 */
153 {
154 int i, ix, iy;
155 double eps_accurate, eps_out, tmp1, S, S1, S2, U;
156 double un_d, un_accurate, un_out;
157
158 /* Set the starting position */
159 ix = 0;
160 iy = 0;
161 if (incx < 0)
162 ix = -(n - 1) * incx;
163 if (incy < 0)
164 iy = -(n - 1) * incy;
165
166 /* computing S */
167 S = S1 = S2 = 0.;
168 for (i = 0; i < n; ++i) {
169 S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
170 S1 += fabs(x[ix]);
171 S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
172 ix += incx;
173 iy += incy;
174 }
175 S *= fabs(alpha);
176 S += fabs(beta * rin);
177
178 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
179 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
180 S = MAX(S, un_d);
181
182 eps_accurate = power(2, -BITS_E);
183 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
184 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
185 eps_out = power(2, -BITS_D);
186 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
187 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
188 tmp1 = fabs((rout - r_true_l) - r_true_t);
189
190 /* underflow */
191 U = 2 * fabs(alpha) * n + 3;
192 U = MAX(U, S1 + 2 * n + 1);
193 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
194
195 *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
196 + eps_out * fabs(r_true_l) + U);
197 } /* end of test_BLAS_ddot */
test_BLAS_cdot2(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)198 void test_BLAS_cdot2(int n, enum blas_conj_type conj, const void *alpha,
199 const void *beta, const void *rin, const void *rout,
200 double *r_true_l, double *r_true_t, void *x, int incx,
201 void *head_y, void *tail_y, int incy, double eps_int,
202 double un_int, double *test_ratio)
203
204 /* Purpose
205 * =======
206 *
207 * Computes ratio of the computed error from DOT2 over the expected
208 * error bound.
209 *
210 * Arguments
211 * =========
212 *
213 * n (input) int
214 * The length of the vectors X and Y.
215 *
216 * conj (input) enum blas_conj_type
217 *
218 * alpha (input) const void*
219 *
220 * beta (input) const void*
221 *
222 * rin (input) const void*
223 *
224 * rout (input) const void*
225 * This result was computed by some other routine, and will be
226 * tested by this routine by comparing it with the truth.
227 *
228 * r_true_l (input) double*
229 * The leading part of the truth.
230 *
231 * r_true_t (input) double*
232 * The trailing part of the truth.
233 *
234 * x (input) void*
235 *
236 * head_y
237 * tail_y (input) void*
238 *
239 * eps_int (input) double
240 * The internal machine precision.
241 *
242 * un_int (input) double
243 * The internal underflow threshold.
244 *
245 * test_ratio (output) float*
246 * The ratio of computed error for r over the error bound.
247 */
248 {
249 int i, ix, iy;
250 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
251 double un_d, un_accurate, un_out;
252 float *x_i = (float *) x;
253 float *head_y_i = (float *) head_y;
254 float *tail_y_i = (float *) tail_y;
255 float *alpha_i = (float *) alpha;
256 float *beta_i = (float *) beta;
257 float *rin_i = (float *) rin;
258 float *rout_i = (float *) rout;
259 float x_ii[2];
260 float y_ii[2];
261
262 /* Set the starting position */
263 incx *= 2;
264 incy *= 2;
265 ix = 0;
266 iy = 0;
267 if (incx < 0)
268 ix = -(n - 1) * incx;
269 if (incy < 0)
270 iy = -(n - 1) * incy;
271
272 /* computing S */
273 S = S1 = S2 = 0.;
274 for (i = 0; i < n; ++i) {
275 x_ii[0] = x_i[ix];
276 x_ii[1] = x_i[ix + 1];
277 y_ii[0] = head_y_i[iy];
278 y_ii[1] = head_y_i[iy + 1];
279 if (conj == blas_conj)
280 x_ii[1] = -x_ii[1];
281 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
282 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
283
284 {
285 prod[0] = x_ii[0] * y_ii[0] - x_ii[1] * y_ii[1];
286 prod[1] = x_ii[0] * y_ii[1] + x_ii[1] * y_ii[0];
287 }
288
289 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
290
291 /* Now get the tail of y */
292 y_ii[0] = tail_y_i[iy];
293 y_ii[1] = tail_y_i[iy + 1];
294 {
295 prod[0] = x_ii[0] * y_ii[0] - x_ii[1] * y_ii[1];
296 prod[1] = x_ii[0] * y_ii[1] + x_ii[1] * y_ii[0];
297 }
298 /* prod = x[i]*y[i] */
299 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
300
301 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
302 ix += incx;
303 iy += incy;
304 }
305 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
306 {
307 prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
308 prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
309 }
310
311 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
312
313 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
314 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
315 S = MAX(S, un_d);
316
317 eps_accurate = power(2, -BITS_E);
318 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
319 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
320 eps_out = power(2, -BITS_S);
321 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
322 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
323 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
324 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
325 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
326
327 /* underflow */
328 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
329 U = MAX(U, S1 + 2 * n + 1);
330 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
331 U *= 2 * sqrt(2.);
332
333 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
334 +
335 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
336 r_true_l[1] * r_true_l[1]) +
337 U);
338 }
339
340 /* end of test_BLAS_cdot */
test_BLAS_zdot2(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)341 void test_BLAS_zdot2(int n, enum blas_conj_type conj, const void *alpha,
342 const void *beta, const void *rin, const void *rout,
343 double *r_true_l, double *r_true_t, void *x, int incx,
344 void *head_y, void *tail_y, int incy, double eps_int,
345 double un_int, double *test_ratio)
346
347 /* Purpose
348 * =======
349 *
350 * Computes ratio of the computed error from DOT2 over the expected
351 * error bound.
352 *
353 * Arguments
354 * =========
355 *
356 * n (input) int
357 * The length of the vectors X and Y.
358 *
359 * conj (input) enum blas_conj_type
360 *
361 * alpha (input) const void*
362 *
363 * beta (input) const void*
364 *
365 * rin (input) const void*
366 *
367 * rout (input) const void*
368 * This result was computed by some other routine, and will be
369 * tested by this routine by comparing it with the truth.
370 *
371 * r_true_l (input) double*
372 * The leading part of the truth.
373 *
374 * r_true_t (input) double*
375 * The trailing part of the truth.
376 *
377 * x (input) void*
378 *
379 * head_y
380 * tail_y (input) void*
381 *
382 * eps_int (input) double
383 * The internal machine precision.
384 *
385 * un_int (input) double
386 * The internal underflow threshold.
387 *
388 * test_ratio (output) float*
389 * The ratio of computed error for r over the error bound.
390 */
391 {
392 int i, ix, iy;
393 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
394 double un_d, un_accurate, un_out;
395 double *x_i = (double *) x;
396 double *head_y_i = (double *) head_y;
397 double *tail_y_i = (double *) tail_y;
398 double *alpha_i = (double *) alpha;
399 double *beta_i = (double *) beta;
400 double *rin_i = (double *) rin;
401 double *rout_i = (double *) rout;
402 double x_ii[2];
403 double y_ii[2];
404
405 /* Set the starting position */
406 incx *= 2;
407 incy *= 2;
408 ix = 0;
409 iy = 0;
410 if (incx < 0)
411 ix = -(n - 1) * incx;
412 if (incy < 0)
413 iy = -(n - 1) * incy;
414
415 /* computing S */
416 S = S1 = S2 = 0.;
417 for (i = 0; i < n; ++i) {
418 x_ii[0] = x_i[ix];
419 x_ii[1] = x_i[ix + 1];
420 y_ii[0] = head_y_i[iy];
421 y_ii[1] = head_y_i[iy + 1];
422 if (conj == blas_conj)
423 x_ii[1] = -x_ii[1];
424 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
425 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
426
427 {
428 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
429 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
430 }
431 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
432
433 /* Now get the tail of y */
434 y_ii[0] = tail_y_i[iy];
435 y_ii[1] = tail_y_i[iy + 1];
436 {
437 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
438 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
439 } /* prod = x[i]*y[i] */
440 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
441
442 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
443 ix += incx;
444 iy += incy;
445 }
446 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
447 {
448 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
449 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
450 }
451 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
452
453 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
454 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
455 S = MAX(S, un_d);
456
457 eps_accurate = power(2, -BITS_E);
458 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
459 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
460 eps_out = power(2, -BITS_D);
461 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
462 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
463 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
464 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
465 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
466
467 /* underflow */
468 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
469 U = MAX(U, S1 + 2 * n + 1);
470 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
471 U *= 2 * sqrt(2.);
472
473 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
474 +
475 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
476 r_true_l[1] * r_true_l[1]) +
477 U);
478 }
479
480 /* end of test_BLAS_zdot */
test_BLAS_cdot2_s_s(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,float * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)481 void test_BLAS_cdot2_s_s(int n, enum blas_conj_type conj, const void *alpha,
482 const void *beta, const void *rin, const void *rout,
483 double *r_true_l, double *r_true_t, float *x,
484 int incx, float *head_y, float *tail_y, int incy,
485 double eps_int, double un_int, double *test_ratio)
486
487 /* Purpose
488 * =======
489 *
490 * Computes ratio of the computed error from DOT2 over the expected
491 * error bound.
492 *
493 * Arguments
494 * =========
495 *
496 * n (input) int
497 * The length of the vectors X and Y.
498 *
499 * conj (input) enum blas_conj_type
500 *
501 * alpha (input) const void*
502 *
503 * beta (input) const void*
504 *
505 * rin (input) const void*
506 *
507 * rout (input) const void*
508 * This result was computed by some other routine, and will be
509 * tested by this routine by comparing it with the truth.
510 *
511 * r_true_l (input) double*
512 * The leading part of the truth.
513 *
514 * r_true_t (input) double*
515 * The trailing part of the truth.
516 *
517 * x (input) float*
518 *
519 * head_y
520 * tail_y (input) float*
521 *
522 * eps_int (input) double
523 * The internal machine precision.
524 *
525 * un_int (input) double
526 * The internal underflow threshold.
527 *
528 * test_ratio (output) float*
529 * The ratio of computed error for r over the error bound.
530 */
531 {
532 int i, ix, iy;
533 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
534 double un_d, un_accurate, un_out;
535 float *x_i = x;
536 float *head_y_i = head_y;
537 float *tail_y_i = tail_y;
538 float *alpha_i = (float *) alpha;
539 float *beta_i = (float *) beta;
540 float *rin_i = (float *) rin;
541 float *rout_i = (float *) rout;
542 float x_ii;
543 float y_ii;
544
545 /* Set the starting position */
546
547
548 ix = 0;
549 iy = 0;
550 if (incx < 0)
551 ix = -(n - 1) * incx;
552 if (incy < 0)
553 iy = -(n - 1) * incy;
554
555 /* computing S */
556 S = S1 = S2 = 0.;
557 for (i = 0; i < n; ++i) {
558 x_ii = x_i[ix];
559 y_ii = head_y_i[iy];
560 S1 += fabs(x_ii);
561 S2 += fabs(y_ii);
562
563 prod[0] = x_ii * y_ii;
564 prod[1] = 0.0;
565 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
566
567 /* Now get the tail of y */
568 y_ii = tail_y_i[iy];
569 prod[0] = x_ii * y_ii;
570 prod[1] = 0.0; /* prod = x[i]*y[i] */
571 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
572
573 S2 += fabs(y_ii);
574 ix += incx;
575 iy += incy;
576 }
577 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
578 {
579 prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
580 prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
581 }
582
583 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
584
585 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
586 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
587 S = MAX(S, un_d);
588
589 eps_accurate = power(2, -BITS_E);
590 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
591 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
592 eps_out = power(2, -BITS_S);
593 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
594 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
595 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
596 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
597 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
598
599 /* underflow */
600 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
601 U = MAX(U, S1 + 2 * n + 1);
602 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
603 U *= 2 * sqrt(2.);
604
605 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
606 +
607 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
608 r_true_l[1] * r_true_l[1]) +
609 U);
610 }
611
612 /* end of test_BLAS_cdot */
test_BLAS_cdot2_s_c(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,float * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)613 void test_BLAS_cdot2_s_c(int n, enum blas_conj_type conj, const void *alpha,
614 const void *beta, const void *rin, const void *rout,
615 double *r_true_l, double *r_true_t, float *x,
616 int incx, void *head_y, void *tail_y, int incy,
617 double eps_int, double un_int, double *test_ratio)
618
619 /* Purpose
620 * =======
621 *
622 * Computes ratio of the computed error from DOT2 over the expected
623 * error bound.
624 *
625 * Arguments
626 * =========
627 *
628 * n (input) int
629 * The length of the vectors X and Y.
630 *
631 * conj (input) enum blas_conj_type
632 *
633 * alpha (input) const void*
634 *
635 * beta (input) const void*
636 *
637 * rin (input) const void*
638 *
639 * rout (input) const void*
640 * This result was computed by some other routine, and will be
641 * tested by this routine by comparing it with the truth.
642 *
643 * r_true_l (input) double*
644 * The leading part of the truth.
645 *
646 * r_true_t (input) double*
647 * The trailing part of the truth.
648 *
649 * x (input) float*
650 *
651 * head_y
652 * tail_y (input) void*
653 *
654 * eps_int (input) double
655 * The internal machine precision.
656 *
657 * un_int (input) double
658 * The internal underflow threshold.
659 *
660 * test_ratio (output) float*
661 * The ratio of computed error for r over the error bound.
662 */
663 {
664 int i, ix, iy;
665 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
666 double un_d, un_accurate, un_out;
667 float *x_i = x;
668 float *head_y_i = (float *) head_y;
669 float *tail_y_i = (float *) tail_y;
670 float *alpha_i = (float *) alpha;
671 float *beta_i = (float *) beta;
672 float *rin_i = (float *) rin;
673 float *rout_i = (float *) rout;
674 float x_ii;
675 float y_ii[2];
676
677 /* Set the starting position */
678
679 incy *= 2;
680 ix = 0;
681 iy = 0;
682 if (incx < 0)
683 ix = -(n - 1) * incx;
684 if (incy < 0)
685 iy = -(n - 1) * incy;
686
687 /* computing S */
688 S = S1 = S2 = 0.;
689 for (i = 0; i < n; ++i) {
690 x_ii = x_i[ix];
691 y_ii[0] = head_y_i[iy];
692 y_ii[1] = head_y_i[iy + 1];
693 S1 += fabs(x_ii);
694 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
695
696 {
697 prod[0] = y_ii[0] * x_ii;
698 prod[1] = y_ii[1] * x_ii;
699 }
700 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
701
702 /* Now get the tail of y */
703 y_ii[0] = tail_y_i[iy];
704 y_ii[1] = tail_y_i[iy + 1];
705 {
706 prod[0] = y_ii[0] * x_ii;
707 prod[1] = y_ii[1] * x_ii;
708 } /* prod = x[i]*y[i] */
709 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
710
711 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
712 ix += incx;
713 iy += incy;
714 }
715 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
716 {
717 prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
718 prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
719 }
720
721 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
722
723 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
724 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
725 S = MAX(S, un_d);
726
727 eps_accurate = power(2, -BITS_E);
728 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
729 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
730 eps_out = power(2, -BITS_S);
731 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
732 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
733 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
734 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
735 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
736
737 /* underflow */
738 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
739 U = MAX(U, S1 + 2 * n + 1);
740 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
741 U *= 2 * sqrt(2.);
742
743 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
744 +
745 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
746 r_true_l[1] * r_true_l[1]) +
747 U);
748 }
749
750 /* end of test_BLAS_cdot */
test_BLAS_cdot2_c_s(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)751 void test_BLAS_cdot2_c_s(int n, enum blas_conj_type conj, const void *alpha,
752 const void *beta, const void *rin, const void *rout,
753 double *r_true_l, double *r_true_t, void *x,
754 int incx, float *head_y, float *tail_y, int incy,
755 double eps_int, double un_int, double *test_ratio)
756
757 /* Purpose
758 * =======
759 *
760 * Computes ratio of the computed error from DOT2 over the expected
761 * error bound.
762 *
763 * Arguments
764 * =========
765 *
766 * n (input) int
767 * The length of the vectors X and Y.
768 *
769 * conj (input) enum blas_conj_type
770 *
771 * alpha (input) const void*
772 *
773 * beta (input) const void*
774 *
775 * rin (input) const void*
776 *
777 * rout (input) const void*
778 * This result was computed by some other routine, and will be
779 * tested by this routine by comparing it with the truth.
780 *
781 * r_true_l (input) double*
782 * The leading part of the truth.
783 *
784 * r_true_t (input) double*
785 * The trailing part of the truth.
786 *
787 * x (input) void*
788 *
789 * head_y
790 * tail_y (input) float*
791 *
792 * eps_int (input) double
793 * The internal machine precision.
794 *
795 * un_int (input) double
796 * The internal underflow threshold.
797 *
798 * test_ratio (output) float*
799 * The ratio of computed error for r over the error bound.
800 */
801 {
802 int i, ix, iy;
803 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
804 double un_d, un_accurate, un_out;
805 float *x_i = (float *) x;
806 float *head_y_i = head_y;
807 float *tail_y_i = tail_y;
808 float *alpha_i = (float *) alpha;
809 float *beta_i = (float *) beta;
810 float *rin_i = (float *) rin;
811 float *rout_i = (float *) rout;
812 float x_ii[2];
813 float y_ii;
814
815 /* Set the starting position */
816 incx *= 2;
817
818 ix = 0;
819 iy = 0;
820 if (incx < 0)
821 ix = -(n - 1) * incx;
822 if (incy < 0)
823 iy = -(n - 1) * incy;
824
825 /* computing S */
826 S = S1 = S2 = 0.;
827 for (i = 0; i < n; ++i) {
828 x_ii[0] = x_i[ix];
829 x_ii[1] = x_i[ix + 1];
830 y_ii = head_y_i[iy];
831 if (conj == blas_conj)
832 x_ii[1] = -x_ii[1];
833 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
834 S2 += fabs(y_ii);
835
836 {
837 prod[0] = x_ii[0] * y_ii;
838 prod[1] = x_ii[1] * y_ii;
839 }
840 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
841
842 /* Now get the tail of y */
843 y_ii = tail_y_i[iy];
844 {
845 prod[0] = x_ii[0] * y_ii;
846 prod[1] = x_ii[1] * y_ii;
847 } /* prod = x[i]*y[i] */
848 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
849
850 S2 += fabs(y_ii);
851 ix += incx;
852 iy += incy;
853 }
854 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
855 {
856 prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
857 prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
858 }
859
860 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
861
862 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
863 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
864 S = MAX(S, un_d);
865
866 eps_accurate = power(2, -BITS_E);
867 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
868 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
869 eps_out = power(2, -BITS_S);
870 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
871 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
872 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
873 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
874 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
875
876 /* underflow */
877 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
878 U = MAX(U, S1 + 2 * n + 1);
879 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
880 U *= 2 * sqrt(2.);
881
882 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
883 +
884 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
885 r_true_l[1] * r_true_l[1]) +
886 U);
887 }
888
889 /* end of test_BLAS_cdot */
test_BLAS_zdot2_d_d(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,double * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)890 void test_BLAS_zdot2_d_d(int n, enum blas_conj_type conj, const void *alpha,
891 const void *beta, const void *rin, const void *rout,
892 double *r_true_l, double *r_true_t, double *x,
893 int incx, double *head_y, double *tail_y, int incy,
894 double eps_int, double un_int, double *test_ratio)
895
896 /* Purpose
897 * =======
898 *
899 * Computes ratio of the computed error from DOT2 over the expected
900 * error bound.
901 *
902 * Arguments
903 * =========
904 *
905 * n (input) int
906 * The length of the vectors X and Y.
907 *
908 * conj (input) enum blas_conj_type
909 *
910 * alpha (input) const void*
911 *
912 * beta (input) const void*
913 *
914 * rin (input) const void*
915 *
916 * rout (input) const void*
917 * This result was computed by some other routine, and will be
918 * tested by this routine by comparing it with the truth.
919 *
920 * r_true_l (input) double*
921 * The leading part of the truth.
922 *
923 * r_true_t (input) double*
924 * The trailing part of the truth.
925 *
926 * x (input) double*
927 *
928 * head_y
929 * tail_y (input) double*
930 *
931 * eps_int (input) double
932 * The internal machine precision.
933 *
934 * un_int (input) double
935 * The internal underflow threshold.
936 *
937 * test_ratio (output) float*
938 * The ratio of computed error for r over the error bound.
939 */
940 {
941 int i, ix, iy;
942 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
943 double un_d, un_accurate, un_out;
944 double *x_i = x;
945 double *head_y_i = head_y;
946 double *tail_y_i = tail_y;
947 double *alpha_i = (double *) alpha;
948 double *beta_i = (double *) beta;
949 double *rin_i = (double *) rin;
950 double *rout_i = (double *) rout;
951 double x_ii;
952 double y_ii;
953
954 /* Set the starting position */
955
956
957 ix = 0;
958 iy = 0;
959 if (incx < 0)
960 ix = -(n - 1) * incx;
961 if (incy < 0)
962 iy = -(n - 1) * incy;
963
964 /* computing S */
965 S = S1 = S2 = 0.;
966 for (i = 0; i < n; ++i) {
967 x_ii = x_i[ix];
968 y_ii = head_y_i[iy];
969 S1 += fabs(x_ii);
970 S2 += fabs(y_ii);
971
972 prod[0] = x_ii * y_ii;
973 prod[1] = 0.0;
974 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
975
976 /* Now get the tail of y */
977 y_ii = tail_y_i[iy];
978 prod[0] = x_ii * y_ii;
979 prod[1] = 0.0; /* prod = x[i]*y[i] */
980 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
981
982 S2 += fabs(y_ii);
983 ix += incx;
984 iy += incy;
985 }
986 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
987 {
988 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
989 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
990 }
991 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
992
993 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
994 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
995 S = MAX(S, un_d);
996
997 eps_accurate = power(2, -BITS_E);
998 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
999 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1000 eps_out = power(2, -BITS_D);
1001 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1002 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1003 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1004 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1005 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1006
1007 /* underflow */
1008 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1009 U = MAX(U, S1 + 2 * n + 1);
1010 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1011 U *= 2 * sqrt(2.);
1012
1013 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1014 +
1015 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1016 r_true_l[1] * r_true_l[1]) +
1017 U);
1018 }
1019
1020 /* end of test_BLAS_zdot */
test_BLAS_zdot2_d_z(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,double * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1021 void test_BLAS_zdot2_d_z(int n, enum blas_conj_type conj, const void *alpha,
1022 const void *beta, const void *rin, const void *rout,
1023 double *r_true_l, double *r_true_t, double *x,
1024 int incx, void *head_y, void *tail_y, int incy,
1025 double eps_int, double un_int, double *test_ratio)
1026
1027 /* Purpose
1028 * =======
1029 *
1030 * Computes ratio of the computed error from DOT2 over the expected
1031 * error bound.
1032 *
1033 * Arguments
1034 * =========
1035 *
1036 * n (input) int
1037 * The length of the vectors X and Y.
1038 *
1039 * conj (input) enum blas_conj_type
1040 *
1041 * alpha (input) const void*
1042 *
1043 * beta (input) const void*
1044 *
1045 * rin (input) const void*
1046 *
1047 * rout (input) const void*
1048 * This result was computed by some other routine, and will be
1049 * tested by this routine by comparing it with the truth.
1050 *
1051 * r_true_l (input) double*
1052 * The leading part of the truth.
1053 *
1054 * r_true_t (input) double*
1055 * The trailing part of the truth.
1056 *
1057 * x (input) double*
1058 *
1059 * head_y
1060 * tail_y (input) void*
1061 *
1062 * eps_int (input) double
1063 * The internal machine precision.
1064 *
1065 * un_int (input) double
1066 * The internal underflow threshold.
1067 *
1068 * test_ratio (output) float*
1069 * The ratio of computed error for r over the error bound.
1070 */
1071 {
1072 int i, ix, iy;
1073 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1074 double un_d, un_accurate, un_out;
1075 double *x_i = x;
1076 double *head_y_i = (double *) head_y;
1077 double *tail_y_i = (double *) tail_y;
1078 double *alpha_i = (double *) alpha;
1079 double *beta_i = (double *) beta;
1080 double *rin_i = (double *) rin;
1081 double *rout_i = (double *) rout;
1082 double x_ii;
1083 double y_ii[2];
1084
1085 /* Set the starting position */
1086
1087 incy *= 2;
1088 ix = 0;
1089 iy = 0;
1090 if (incx < 0)
1091 ix = -(n - 1) * incx;
1092 if (incy < 0)
1093 iy = -(n - 1) * incy;
1094
1095 /* computing S */
1096 S = S1 = S2 = 0.;
1097 for (i = 0; i < n; ++i) {
1098 x_ii = x_i[ix];
1099 y_ii[0] = head_y_i[iy];
1100 y_ii[1] = head_y_i[iy + 1];
1101 S1 += fabs(x_ii);
1102 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1103
1104 {
1105 prod[0] = y_ii[0] * x_ii;
1106 prod[1] = y_ii[1] * x_ii;
1107 }
1108 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1109
1110 /* Now get the tail of y */
1111 y_ii[0] = tail_y_i[iy];
1112 y_ii[1] = tail_y_i[iy + 1];
1113 {
1114 prod[0] = y_ii[0] * x_ii;
1115 prod[1] = y_ii[1] * x_ii;
1116 } /* prod = x[i]*y[i] */
1117 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1118
1119 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1120 ix += incx;
1121 iy += incy;
1122 }
1123 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1124 {
1125 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1126 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1127 }
1128 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1129
1130 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1131 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1132 S = MAX(S, un_d);
1133
1134 eps_accurate = power(2, -BITS_E);
1135 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1136 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1137 eps_out = power(2, -BITS_D);
1138 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1139 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1140 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1141 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1142 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1143
1144 /* underflow */
1145 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1146 U = MAX(U, S1 + 2 * n + 1);
1147 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1148 U *= 2 * sqrt(2.);
1149
1150 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1151 +
1152 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1153 r_true_l[1] * r_true_l[1]) +
1154 U);
1155 }
1156
1157 /* end of test_BLAS_zdot */
test_BLAS_zdot2_z_d(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1158 void test_BLAS_zdot2_z_d(int n, enum blas_conj_type conj, const void *alpha,
1159 const void *beta, const void *rin, const void *rout,
1160 double *r_true_l, double *r_true_t, void *x,
1161 int incx, double *head_y, double *tail_y, int incy,
1162 double eps_int, double un_int, double *test_ratio)
1163
1164 /* Purpose
1165 * =======
1166 *
1167 * Computes ratio of the computed error from DOT2 over the expected
1168 * error bound.
1169 *
1170 * Arguments
1171 * =========
1172 *
1173 * n (input) int
1174 * The length of the vectors X and Y.
1175 *
1176 * conj (input) enum blas_conj_type
1177 *
1178 * alpha (input) const void*
1179 *
1180 * beta (input) const void*
1181 *
1182 * rin (input) const void*
1183 *
1184 * rout (input) const void*
1185 * This result was computed by some other routine, and will be
1186 * tested by this routine by comparing it with the truth.
1187 *
1188 * r_true_l (input) double*
1189 * The leading part of the truth.
1190 *
1191 * r_true_t (input) double*
1192 * The trailing part of the truth.
1193 *
1194 * x (input) void*
1195 *
1196 * head_y
1197 * tail_y (input) double*
1198 *
1199 * eps_int (input) double
1200 * The internal machine precision.
1201 *
1202 * un_int (input) double
1203 * The internal underflow threshold.
1204 *
1205 * test_ratio (output) float*
1206 * The ratio of computed error for r over the error bound.
1207 */
1208 {
1209 int i, ix, iy;
1210 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1211 double un_d, un_accurate, un_out;
1212 double *x_i = (double *) x;
1213 double *head_y_i = head_y;
1214 double *tail_y_i = tail_y;
1215 double *alpha_i = (double *) alpha;
1216 double *beta_i = (double *) beta;
1217 double *rin_i = (double *) rin;
1218 double *rout_i = (double *) rout;
1219 double x_ii[2];
1220 double y_ii;
1221
1222 /* Set the starting position */
1223 incx *= 2;
1224
1225 ix = 0;
1226 iy = 0;
1227 if (incx < 0)
1228 ix = -(n - 1) * incx;
1229 if (incy < 0)
1230 iy = -(n - 1) * incy;
1231
1232 /* computing S */
1233 S = S1 = S2 = 0.;
1234 for (i = 0; i < n; ++i) {
1235 x_ii[0] = x_i[ix];
1236 x_ii[1] = x_i[ix + 1];
1237 y_ii = head_y_i[iy];
1238 if (conj == blas_conj)
1239 x_ii[1] = -x_ii[1];
1240 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1241 S2 += fabs(y_ii);
1242
1243 {
1244 prod[0] = x_ii[0] * y_ii;
1245 prod[1] = x_ii[1] * y_ii;
1246 }
1247 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1248
1249 /* Now get the tail of y */
1250 y_ii = tail_y_i[iy];
1251 {
1252 prod[0] = x_ii[0] * y_ii;
1253 prod[1] = x_ii[1] * y_ii;
1254 } /* prod = x[i]*y[i] */
1255 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1256
1257 S2 += fabs(y_ii);
1258 ix += incx;
1259 iy += incy;
1260 }
1261 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1262 {
1263 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1264 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1265 }
1266 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1267
1268 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1269 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1270 S = MAX(S, un_d);
1271
1272 eps_accurate = power(2, -BITS_E);
1273 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1274 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1275 eps_out = power(2, -BITS_D);
1276 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1277 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1278 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1279 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1280 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1281
1282 /* underflow */
1283 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1284 U = MAX(U, S1 + 2 * n + 1);
1285 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1286 U *= 2 * sqrt(2.);
1287
1288 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1289 +
1290 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1291 r_true_l[1] * r_true_l[1]) +
1292 U);
1293 }
1294
1295 /* end of test_BLAS_zdot */
test_BLAS_ddot2_s_s(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,float * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1296 void test_BLAS_ddot2_s_s(int n, enum blas_conj_type conj, double alpha,
1297 double beta, double rin, double rout,
1298 double r_true_l, double r_true_t, float *x, int incx,
1299 float *head_y, float *tail_y, int incy,
1300 double eps_int, double un_int, double *test_ratio)
1301
1302 /* Purpose
1303 * =======
1304 *
1305 * Computes ratio of the computed error from DOT2 over the expected
1306 * error bound.
1307 *
1308 * Arguments
1309 * =========
1310 *
1311 * n (input) int
1312 * The length of the vectors X and Y.
1313 *
1314 * conj (input) enum blas_conj_type
1315 *
1316 * alpha (input) double
1317 *
1318 * beta (input) double
1319 *
1320 * rin (input) double
1321 *
1322 * rout (input) double
1323 * This result was computed by some other routine, and will be
1324 * tested by this routine by comparing it with the truth.
1325 *
1326 * r_true_l (input) double
1327 * The leading part of the truth.
1328 *
1329 * r_true_t (input) double
1330 * The trailing part of the truth.
1331 *
1332 * x (input) float*
1333 *
1334 * head_y
1335 * tail_y (input) float*
1336 *
1337 * eps_int (input) double
1338 * The internal machine precision.
1339 *
1340 * un_int (input) double
1341 * The internal underflow threshold.
1342 *
1343 * test_ratio (output) float*
1344 * The ratio of computed error for r over the error bound.
1345 */
1346 {
1347 int i, ix, iy;
1348 double eps_accurate, eps_out, tmp1, S, S1, S2, U;
1349 double un_d, un_accurate, un_out;
1350
1351 /* Set the starting position */
1352 ix = 0;
1353 iy = 0;
1354 if (incx < 0)
1355 ix = -(n - 1) * incx;
1356 if (incy < 0)
1357 iy = -(n - 1) * incy;
1358
1359 /* computing S */
1360 S = S1 = S2 = 0.;
1361 for (i = 0; i < n; ++i) {
1362 S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
1363 S1 += fabs(x[ix]);
1364 S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
1365 ix += incx;
1366 iy += incy;
1367 }
1368 S *= fabs(alpha);
1369 S += fabs(beta * rin);
1370
1371 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1372 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1373 S = MAX(S, un_d);
1374
1375 eps_accurate = power(2, -BITS_E);
1376 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1377 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1378 eps_out = power(2, -BITS_D);
1379 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1380 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1381 tmp1 = fabs((rout - r_true_l) - r_true_t);
1382
1383 /* underflow */
1384 U = 2 * fabs(alpha) * n + 3;
1385 U = MAX(U, S1 + 2 * n + 1);
1386 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1387
1388 *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
1389 + eps_out * fabs(r_true_l) + U);
1390 } /* end of test_BLAS_ddot */
test_BLAS_ddot2_s_d(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,float * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1391 void test_BLAS_ddot2_s_d(int n, enum blas_conj_type conj, double alpha,
1392 double beta, double rin, double rout,
1393 double r_true_l, double r_true_t, float *x, int incx,
1394 double *head_y, double *tail_y, int incy,
1395 double eps_int, double un_int, double *test_ratio)
1396
1397 /* Purpose
1398 * =======
1399 *
1400 * Computes ratio of the computed error from DOT2 over the expected
1401 * error bound.
1402 *
1403 * Arguments
1404 * =========
1405 *
1406 * n (input) int
1407 * The length of the vectors X and Y.
1408 *
1409 * conj (input) enum blas_conj_type
1410 *
1411 * alpha (input) double
1412 *
1413 * beta (input) double
1414 *
1415 * rin (input) double
1416 *
1417 * rout (input) double
1418 * This result was computed by some other routine, and will be
1419 * tested by this routine by comparing it with the truth.
1420 *
1421 * r_true_l (input) double
1422 * The leading part of the truth.
1423 *
1424 * r_true_t (input) double
1425 * The trailing part of the truth.
1426 *
1427 * x (input) float*
1428 *
1429 * head_y
1430 * tail_y (input) double*
1431 *
1432 * eps_int (input) double
1433 * The internal machine precision.
1434 *
1435 * un_int (input) double
1436 * The internal underflow threshold.
1437 *
1438 * test_ratio (output) float*
1439 * The ratio of computed error for r over the error bound.
1440 */
1441 {
1442 int i, ix, iy;
1443 double eps_accurate, eps_out, tmp1, S, S1, S2, U;
1444 double un_d, un_accurate, un_out;
1445
1446 /* Set the starting position */
1447 ix = 0;
1448 iy = 0;
1449 if (incx < 0)
1450 ix = -(n - 1) * incx;
1451 if (incy < 0)
1452 iy = -(n - 1) * incy;
1453
1454 /* computing S */
1455 S = S1 = S2 = 0.;
1456 for (i = 0; i < n; ++i) {
1457 S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
1458 S1 += fabs(x[ix]);
1459 S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
1460 ix += incx;
1461 iy += incy;
1462 }
1463 S *= fabs(alpha);
1464 S += fabs(beta * rin);
1465
1466 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1467 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1468 S = MAX(S, un_d);
1469
1470 eps_accurate = power(2, -BITS_E);
1471 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1472 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1473 eps_out = power(2, -BITS_D);
1474 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1475 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1476 tmp1 = fabs((rout - r_true_l) - r_true_t);
1477
1478 /* underflow */
1479 U = 2 * fabs(alpha) * n + 3;
1480 U = MAX(U, S1 + 2 * n + 1);
1481 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1482
1483 *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
1484 + eps_out * fabs(r_true_l) + U);
1485 } /* end of test_BLAS_ddot */
test_BLAS_ddot2_d_s(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,double * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1486 void test_BLAS_ddot2_d_s(int n, enum blas_conj_type conj, double alpha,
1487 double beta, double rin, double rout,
1488 double r_true_l, double r_true_t, double *x,
1489 int incx, float *head_y, float *tail_y, int incy,
1490 double eps_int, double un_int, double *test_ratio)
1491
1492 /* Purpose
1493 * =======
1494 *
1495 * Computes ratio of the computed error from DOT2 over the expected
1496 * error bound.
1497 *
1498 * Arguments
1499 * =========
1500 *
1501 * n (input) int
1502 * The length of the vectors X and Y.
1503 *
1504 * conj (input) enum blas_conj_type
1505 *
1506 * alpha (input) double
1507 *
1508 * beta (input) double
1509 *
1510 * rin (input) double
1511 *
1512 * rout (input) double
1513 * This result was computed by some other routine, and will be
1514 * tested by this routine by comparing it with the truth.
1515 *
1516 * r_true_l (input) double
1517 * The leading part of the truth.
1518 *
1519 * r_true_t (input) double
1520 * The trailing part of the truth.
1521 *
1522 * x (input) double*
1523 *
1524 * head_y
1525 * tail_y (input) float*
1526 *
1527 * eps_int (input) double
1528 * The internal machine precision.
1529 *
1530 * un_int (input) double
1531 * The internal underflow threshold.
1532 *
1533 * test_ratio (output) float*
1534 * The ratio of computed error for r over the error bound.
1535 */
1536 {
1537 int i, ix, iy;
1538 double eps_accurate, eps_out, tmp1, S, S1, S2, U;
1539 double un_d, un_accurate, un_out;
1540
1541 /* Set the starting position */
1542 ix = 0;
1543 iy = 0;
1544 if (incx < 0)
1545 ix = -(n - 1) * incx;
1546 if (incy < 0)
1547 iy = -(n - 1) * incy;
1548
1549 /* computing S */
1550 S = S1 = S2 = 0.;
1551 for (i = 0; i < n; ++i) {
1552 S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
1553 S1 += fabs(x[ix]);
1554 S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
1555 ix += incx;
1556 iy += incy;
1557 }
1558 S *= fabs(alpha);
1559 S += fabs(beta * rin);
1560
1561 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1562 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1563 S = MAX(S, un_d);
1564
1565 eps_accurate = power(2, -BITS_E);
1566 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1567 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1568 eps_out = power(2, -BITS_D);
1569 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1570 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1571 tmp1 = fabs((rout - r_true_l) - r_true_t);
1572
1573 /* underflow */
1574 U = 2 * fabs(alpha) * n + 3;
1575 U = MAX(U, S1 + 2 * n + 1);
1576 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1577
1578 *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
1579 + eps_out * fabs(r_true_l) + U);
1580 } /* end of test_BLAS_ddot */
test_BLAS_zdot2_c_c(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1581 void test_BLAS_zdot2_c_c(int n, enum blas_conj_type conj, const void *alpha,
1582 const void *beta, const void *rin, const void *rout,
1583 double *r_true_l, double *r_true_t, void *x,
1584 int incx, void *head_y, void *tail_y, int incy,
1585 double eps_int, double un_int, double *test_ratio)
1586
1587 /* Purpose
1588 * =======
1589 *
1590 * Computes ratio of the computed error from DOT2 over the expected
1591 * error bound.
1592 *
1593 * Arguments
1594 * =========
1595 *
1596 * n (input) int
1597 * The length of the vectors X and Y.
1598 *
1599 * conj (input) enum blas_conj_type
1600 *
1601 * alpha (input) const void*
1602 *
1603 * beta (input) const void*
1604 *
1605 * rin (input) const void*
1606 *
1607 * rout (input) const void*
1608 * This result was computed by some other routine, and will be
1609 * tested by this routine by comparing it with the truth.
1610 *
1611 * r_true_l (input) double*
1612 * The leading part of the truth.
1613 *
1614 * r_true_t (input) double*
1615 * The trailing part of the truth.
1616 *
1617 * x (input) void*
1618 *
1619 * head_y
1620 * tail_y (input) void*
1621 *
1622 * eps_int (input) double
1623 * The internal machine precision.
1624 *
1625 * un_int (input) double
1626 * The internal underflow threshold.
1627 *
1628 * test_ratio (output) float*
1629 * The ratio of computed error for r over the error bound.
1630 */
1631 {
1632 int i, ix, iy;
1633 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1634 double un_d, un_accurate, un_out;
1635 float *x_i = (float *) x;
1636 float *head_y_i = (float *) head_y;
1637 float *tail_y_i = (float *) tail_y;
1638 double *alpha_i = (double *) alpha;
1639 double *beta_i = (double *) beta;
1640 double *rin_i = (double *) rin;
1641 double *rout_i = (double *) rout;
1642 float x_ii[2];
1643 float y_ii[2];
1644
1645 /* Set the starting position */
1646 incx *= 2;
1647 incy *= 2;
1648 ix = 0;
1649 iy = 0;
1650 if (incx < 0)
1651 ix = -(n - 1) * incx;
1652 if (incy < 0)
1653 iy = -(n - 1) * incy;
1654
1655 /* computing S */
1656 S = S1 = S2 = 0.;
1657 for (i = 0; i < n; ++i) {
1658 x_ii[0] = x_i[ix];
1659 x_ii[1] = x_i[ix + 1];
1660 y_ii[0] = head_y_i[iy];
1661 y_ii[1] = head_y_i[iy + 1];
1662 if (conj == blas_conj)
1663 x_ii[1] = -x_ii[1];
1664 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1665 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1666
1667 {
1668 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1669 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1670 }
1671 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1672
1673 /* Now get the tail of y */
1674 y_ii[0] = tail_y_i[iy];
1675 y_ii[1] = tail_y_i[iy + 1];
1676 {
1677 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1678 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1679 } /* prod = x[i]*y[i] */
1680 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1681
1682 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1683 ix += incx;
1684 iy += incy;
1685 }
1686 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1687 {
1688 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1689 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1690 }
1691 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1692
1693 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1694 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1695 S = MAX(S, un_d);
1696
1697 eps_accurate = power(2, -BITS_E);
1698 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1699 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1700 eps_out = power(2, -BITS_D);
1701 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1702 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1703 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1704 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1705 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1706
1707 /* underflow */
1708 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1709 U = MAX(U, S1 + 2 * n + 1);
1710 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1711 U *= 2 * sqrt(2.);
1712
1713 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1714 +
1715 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1716 r_true_l[1] * r_true_l[1]) +
1717 U);
1718 }
1719
1720 /* end of test_BLAS_zdot */
test_BLAS_zdot2_c_z(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1721 void test_BLAS_zdot2_c_z(int n, enum blas_conj_type conj, const void *alpha,
1722 const void *beta, const void *rin, const void *rout,
1723 double *r_true_l, double *r_true_t, void *x,
1724 int incx, void *head_y, void *tail_y, int incy,
1725 double eps_int, double un_int, double *test_ratio)
1726
1727 /* Purpose
1728 * =======
1729 *
1730 * Computes ratio of the computed error from DOT2 over the expected
1731 * error bound.
1732 *
1733 * Arguments
1734 * =========
1735 *
1736 * n (input) int
1737 * The length of the vectors X and Y.
1738 *
1739 * conj (input) enum blas_conj_type
1740 *
1741 * alpha (input) const void*
1742 *
1743 * beta (input) const void*
1744 *
1745 * rin (input) const void*
1746 *
1747 * rout (input) const void*
1748 * This result was computed by some other routine, and will be
1749 * tested by this routine by comparing it with the truth.
1750 *
1751 * r_true_l (input) double*
1752 * The leading part of the truth.
1753 *
1754 * r_true_t (input) double*
1755 * The trailing part of the truth.
1756 *
1757 * x (input) void*
1758 *
1759 * head_y
1760 * tail_y (input) void*
1761 *
1762 * eps_int (input) double
1763 * The internal machine precision.
1764 *
1765 * un_int (input) double
1766 * The internal underflow threshold.
1767 *
1768 * test_ratio (output) float*
1769 * The ratio of computed error for r over the error bound.
1770 */
1771 {
1772 int i, ix, iy;
1773 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1774 double un_d, un_accurate, un_out;
1775 float *x_i = (float *) x;
1776 double *head_y_i = (double *) head_y;
1777 double *tail_y_i = (double *) tail_y;
1778 double *alpha_i = (double *) alpha;
1779 double *beta_i = (double *) beta;
1780 double *rin_i = (double *) rin;
1781 double *rout_i = (double *) rout;
1782 float x_ii[2];
1783 double y_ii[2];
1784
1785 /* Set the starting position */
1786 incx *= 2;
1787 incy *= 2;
1788 ix = 0;
1789 iy = 0;
1790 if (incx < 0)
1791 ix = -(n - 1) * incx;
1792 if (incy < 0)
1793 iy = -(n - 1) * incy;
1794
1795 /* computing S */
1796 S = S1 = S2 = 0.;
1797 for (i = 0; i < n; ++i) {
1798 x_ii[0] = x_i[ix];
1799 x_ii[1] = x_i[ix + 1];
1800 y_ii[0] = head_y_i[iy];
1801 y_ii[1] = head_y_i[iy + 1];
1802 if (conj == blas_conj)
1803 x_ii[1] = -x_ii[1];
1804 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1805 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1806
1807 {
1808 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1809 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1810 }
1811 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1812
1813 /* Now get the tail of y */
1814 y_ii[0] = tail_y_i[iy];
1815 y_ii[1] = tail_y_i[iy + 1];
1816 {
1817 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1818 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1819 } /* prod = x[i]*y[i] */
1820 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1821
1822 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1823 ix += incx;
1824 iy += incy;
1825 }
1826 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1827 {
1828 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1829 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1830 }
1831 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1832
1833 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1834 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1835 S = MAX(S, un_d);
1836
1837 eps_accurate = power(2, -BITS_E);
1838 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1839 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1840 eps_out = power(2, -BITS_D);
1841 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1842 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1843 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1844 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1845 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1846
1847 /* underflow */
1848 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1849 U = MAX(U, S1 + 2 * n + 1);
1850 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1851 U *= 2 * sqrt(2.);
1852
1853 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1854 +
1855 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1856 r_true_l[1] * r_true_l[1]) +
1857 U);
1858 }
1859
1860 /* end of test_BLAS_zdot */
test_BLAS_zdot2_z_c(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1861 void test_BLAS_zdot2_z_c(int n, enum blas_conj_type conj, const void *alpha,
1862 const void *beta, const void *rin, const void *rout,
1863 double *r_true_l, double *r_true_t, void *x,
1864 int incx, void *head_y, void *tail_y, int incy,
1865 double eps_int, double un_int, double *test_ratio)
1866
1867 /* Purpose
1868 * =======
1869 *
1870 * Computes ratio of the computed error from DOT2 over the expected
1871 * error bound.
1872 *
1873 * Arguments
1874 * =========
1875 *
1876 * n (input) int
1877 * The length of the vectors X and Y.
1878 *
1879 * conj (input) enum blas_conj_type
1880 *
1881 * alpha (input) const void*
1882 *
1883 * beta (input) const void*
1884 *
1885 * rin (input) const void*
1886 *
1887 * rout (input) const void*
1888 * This result was computed by some other routine, and will be
1889 * tested by this routine by comparing it with the truth.
1890 *
1891 * r_true_l (input) double*
1892 * The leading part of the truth.
1893 *
1894 * r_true_t (input) double*
1895 * The trailing part of the truth.
1896 *
1897 * x (input) void*
1898 *
1899 * head_y
1900 * tail_y (input) void*
1901 *
1902 * eps_int (input) double
1903 * The internal machine precision.
1904 *
1905 * un_int (input) double
1906 * The internal underflow threshold.
1907 *
1908 * test_ratio (output) float*
1909 * The ratio of computed error for r over the error bound.
1910 */
1911 {
1912 int i, ix, iy;
1913 double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1914 double un_d, un_accurate, un_out;
1915 double *x_i = (double *) x;
1916 float *head_y_i = (float *) head_y;
1917 float *tail_y_i = (float *) tail_y;
1918 double *alpha_i = (double *) alpha;
1919 double *beta_i = (double *) beta;
1920 double *rin_i = (double *) rin;
1921 double *rout_i = (double *) rout;
1922 double x_ii[2];
1923 float y_ii[2];
1924
1925 /* Set the starting position */
1926 incx *= 2;
1927 incy *= 2;
1928 ix = 0;
1929 iy = 0;
1930 if (incx < 0)
1931 ix = -(n - 1) * incx;
1932 if (incy < 0)
1933 iy = -(n - 1) * incy;
1934
1935 /* computing S */
1936 S = S1 = S2 = 0.;
1937 for (i = 0; i < n; ++i) {
1938 x_ii[0] = x_i[ix];
1939 x_ii[1] = x_i[ix + 1];
1940 y_ii[0] = head_y_i[iy];
1941 y_ii[1] = head_y_i[iy + 1];
1942 if (conj == blas_conj)
1943 x_ii[1] = -x_ii[1];
1944 S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1945 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1946
1947 {
1948 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1949 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1950 }
1951 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1952
1953 /* Now get the tail of y */
1954 y_ii[0] = tail_y_i[iy];
1955 y_ii[1] = tail_y_i[iy + 1];
1956 {
1957 prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1958 prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1959 } /* prod = x[i]*y[i] */
1960 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1961
1962 S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1963 ix += incx;
1964 iy += incy;
1965 }
1966 S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1967 {
1968 prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1969 prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1970 }
1971 S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1972
1973 un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1974 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1975 S = MAX(S, un_d);
1976
1977 eps_accurate = power(2, -BITS_E);
1978 un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1979 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1980 eps_out = power(2, -BITS_D);
1981 un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1982 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1983 tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1984 tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1985 tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1986
1987 /* underflow */
1988 U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1989 U = MAX(U, S1 + 2 * n + 1);
1990 U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1991 U *= 2 * sqrt(2.);
1992
1993 *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1994 +
1995 sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1996 r_true_l[1] * r_true_l[1]) +
1997 U);
1998 }
1999
2000 /* end of test_BLAS_zdot */
2001