1 #include <stdlib.h>
2 #include <math.h>
3 #include "blas_extended.h"
4 #include "blas_extended_private.h"
5 #include "blas_extended_test.h"
6 
7 
test_BLAS_sdot2(int n,enum blas_conj_type conj,float alpha,float beta,float rin,float rout,double r_true_l,double r_true_t,float * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)8 void test_BLAS_sdot2(int n, enum blas_conj_type conj, float alpha, float beta,
9 		     float rin, float rout, double r_true_l, double r_true_t,
10 		     float *x, int incx, float *head_y, float *tail_y,
11 		     int incy, double eps_int, double un_int,
12 		     double *test_ratio)
13 
14 /* Purpose
15  * =======
16  *
17  * Computes ratio of the computed error from DOT2 over the expected
18  * error bound.
19  *
20  * Arguments
21  * =========
22  *
23  * n       (input) int
24  *         The length of the vectors X and Y.
25  *
26  * conj    (input) enum blas_conj_type
27  *
28  * alpha   (input) float
29  *
30  * beta    (input) float
31  *
32  * rin     (input) float
33  *
34  * rout    (input) float
35  *         This result was computed by some other routine, and will be
36  *         tested by this routine by comparing it with the truth.
37  *
38  * r_true_l (input) double
39  *         The leading part of the truth.
40  *
41  * r_true_t (input) double
42  *         The trailing part of the truth.
43  *
44  * x       (input) float*
45  *
46  * head_y
47  * tail_y  (input) float*
48  *
49  * eps_int (input) double
50  *         The internal machine precision.
51  *
52  * un_int  (input) double
53  *         The internal underflow threshold.
54  *
55  * test_ratio (output) float*
56  *         The ratio of computed error for r over the error bound.
57  */
58 {
59   int i, ix, iy;
60   double eps_accurate, eps_out, tmp1, S, S1, S2, U;
61   double un_d, un_accurate, un_out;
62 
63   /* Set the starting position */
64   ix = 0;
65   iy = 0;
66   if (incx < 0)
67     ix = -(n - 1) * incx;
68   if (incy < 0)
69     iy = -(n - 1) * incy;
70 
71   /* computing S */
72   S = S1 = S2 = 0.;
73   for (i = 0; i < n; ++i) {
74     S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
75     S1 += fabs(x[ix]);
76     S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
77     ix += incx;
78     iy += incy;
79   }
80   S *= fabs(alpha);
81   S += fabs(beta * rin);
82 
83   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
84 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
85   S = MAX(S, un_d);
86 
87   eps_accurate = power(2, -BITS_E);
88   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
89 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
90   eps_out = power(2, -BITS_S);
91   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
92 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
93   tmp1 = fabs((rout - r_true_l) - r_true_t);
94 
95   /* underflow */
96   U = 2 * fabs(alpha) * n + 3;
97   U = MAX(U, S1 + 2 * n + 1);
98   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
99 
100   *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
101 			+ eps_out * fabs(r_true_l) + U);
102 }				/* end of test_BLAS_sdot */
test_BLAS_ddot2(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,double * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)103 void test_BLAS_ddot2(int n, enum blas_conj_type conj, double alpha,
104 		     double beta, double rin, double rout, double r_true_l,
105 		     double r_true_t, double *x, int incx, double *head_y,
106 		     double *tail_y, int incy, double eps_int, double un_int,
107 		     double *test_ratio)
108 
109 /* Purpose
110  * =======
111  *
112  * Computes ratio of the computed error from DOT2 over the expected
113  * error bound.
114  *
115  * Arguments
116  * =========
117  *
118  * n       (input) int
119  *         The length of the vectors X and Y.
120  *
121  * conj    (input) enum blas_conj_type
122  *
123  * alpha   (input) double
124  *
125  * beta    (input) double
126  *
127  * rin     (input) double
128  *
129  * rout    (input) double
130  *         This result was computed by some other routine, and will be
131  *         tested by this routine by comparing it with the truth.
132  *
133  * r_true_l (input) double
134  *         The leading part of the truth.
135  *
136  * r_true_t (input) double
137  *         The trailing part of the truth.
138  *
139  * x       (input) double*
140  *
141  * head_y
142  * tail_y  (input) double*
143  *
144  * eps_int (input) double
145  *         The internal machine precision.
146  *
147  * un_int  (input) double
148  *         The internal underflow threshold.
149  *
150  * test_ratio (output) float*
151  *         The ratio of computed error for r over the error bound.
152  */
153 {
154   int i, ix, iy;
155   double eps_accurate, eps_out, tmp1, S, S1, S2, U;
156   double un_d, un_accurate, un_out;
157 
158   /* Set the starting position */
159   ix = 0;
160   iy = 0;
161   if (incx < 0)
162     ix = -(n - 1) * incx;
163   if (incy < 0)
164     iy = -(n - 1) * incy;
165 
166   /* computing S */
167   S = S1 = S2 = 0.;
168   for (i = 0; i < n; ++i) {
169     S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
170     S1 += fabs(x[ix]);
171     S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
172     ix += incx;
173     iy += incy;
174   }
175   S *= fabs(alpha);
176   S += fabs(beta * rin);
177 
178   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
179 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
180   S = MAX(S, un_d);
181 
182   eps_accurate = power(2, -BITS_E);
183   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
184 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
185   eps_out = power(2, -BITS_D);
186   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
187 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
188   tmp1 = fabs((rout - r_true_l) - r_true_t);
189 
190   /* underflow */
191   U = 2 * fabs(alpha) * n + 3;
192   U = MAX(U, S1 + 2 * n + 1);
193   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
194 
195   *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
196 			+ eps_out * fabs(r_true_l) + U);
197 }				/* end of test_BLAS_ddot */
test_BLAS_cdot2(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)198 void test_BLAS_cdot2(int n, enum blas_conj_type conj, const void *alpha,
199 		     const void *beta, const void *rin, const void *rout,
200 		     double *r_true_l, double *r_true_t, void *x, int incx,
201 		     void *head_y, void *tail_y, int incy, double eps_int,
202 		     double un_int, double *test_ratio)
203 
204 /* Purpose
205  * =======
206  *
207  * Computes ratio of the computed error from DOT2 over the expected
208  * error bound.
209  *
210  * Arguments
211  * =========
212  *
213  * n       (input) int
214  *         The length of the vectors X and Y.
215  *
216  * conj    (input) enum blas_conj_type
217  *
218  * alpha   (input) const void*
219  *
220  * beta    (input) const void*
221  *
222  * rin     (input) const void*
223  *
224  * rout    (input) const void*
225  *         This result was computed by some other routine, and will be
226  *         tested by this routine by comparing it with the truth.
227  *
228  * r_true_l (input) double*
229  *         The leading part of the truth.
230  *
231  * r_true_t (input) double*
232  *         The trailing part of the truth.
233  *
234  * x       (input) void*
235  *
236  * head_y
237  * tail_y  (input) void*
238  *
239  * eps_int (input) double
240  *         The internal machine precision.
241  *
242  * un_int  (input) double
243  *         The internal underflow threshold.
244  *
245  * test_ratio (output) float*
246  *         The ratio of computed error for r over the error bound.
247  */
248 {
249   int i, ix, iy;
250   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
251   double un_d, un_accurate, un_out;
252   float *x_i = (float *) x;
253   float *head_y_i = (float *) head_y;
254   float *tail_y_i = (float *) tail_y;
255   float *alpha_i = (float *) alpha;
256   float *beta_i = (float *) beta;
257   float *rin_i = (float *) rin;
258   float *rout_i = (float *) rout;
259   float x_ii[2];
260   float y_ii[2];
261 
262   /* Set the starting position */
263   incx *= 2;
264   incy *= 2;
265   ix = 0;
266   iy = 0;
267   if (incx < 0)
268     ix = -(n - 1) * incx;
269   if (incy < 0)
270     iy = -(n - 1) * incy;
271 
272   /* computing S */
273   S = S1 = S2 = 0.;
274   for (i = 0; i < n; ++i) {
275     x_ii[0] = x_i[ix];
276     x_ii[1] = x_i[ix + 1];
277     y_ii[0] = head_y_i[iy];
278     y_ii[1] = head_y_i[iy + 1];
279     if (conj == blas_conj)
280       x_ii[1] = -x_ii[1];
281     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
282     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
283 
284     {
285       prod[0] = x_ii[0] * y_ii[0] - x_ii[1] * y_ii[1];
286       prod[1] = x_ii[0] * y_ii[1] + x_ii[1] * y_ii[0];
287     }
288 
289     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
290 
291     /* Now get the tail of y */
292     y_ii[0] = tail_y_i[iy];
293     y_ii[1] = tail_y_i[iy + 1];
294     {
295       prod[0] = x_ii[0] * y_ii[0] - x_ii[1] * y_ii[1];
296       prod[1] = x_ii[0] * y_ii[1] + x_ii[1] * y_ii[0];
297     }
298     /* prod = x[i]*y[i] */
299     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
300 
301     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
302     ix += incx;
303     iy += incy;
304   }
305   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
306   {
307     prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
308     prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
309   }
310 
311   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
312 
313   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
314 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
315   S = MAX(S, un_d);
316 
317   eps_accurate = power(2, -BITS_E);
318   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
319 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
320   eps_out = power(2, -BITS_S);
321   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
322 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
323   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
324   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
325   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
326 
327   /* underflow */
328   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
329   U = MAX(U, S1 + 2 * n + 1);
330   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
331   U *= 2 * sqrt(2.);
332 
333   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
334 			+
335 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
336 						  r_true_l[1] * r_true_l[1]) +
337 			U);
338 }
339 
340   /* end of test_BLAS_cdot */
test_BLAS_zdot2(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)341 void test_BLAS_zdot2(int n, enum blas_conj_type conj, const void *alpha,
342 		     const void *beta, const void *rin, const void *rout,
343 		     double *r_true_l, double *r_true_t, void *x, int incx,
344 		     void *head_y, void *tail_y, int incy, double eps_int,
345 		     double un_int, double *test_ratio)
346 
347 /* Purpose
348  * =======
349  *
350  * Computes ratio of the computed error from DOT2 over the expected
351  * error bound.
352  *
353  * Arguments
354  * =========
355  *
356  * n       (input) int
357  *         The length of the vectors X and Y.
358  *
359  * conj    (input) enum blas_conj_type
360  *
361  * alpha   (input) const void*
362  *
363  * beta    (input) const void*
364  *
365  * rin     (input) const void*
366  *
367  * rout    (input) const void*
368  *         This result was computed by some other routine, and will be
369  *         tested by this routine by comparing it with the truth.
370  *
371  * r_true_l (input) double*
372  *         The leading part of the truth.
373  *
374  * r_true_t (input) double*
375  *         The trailing part of the truth.
376  *
377  * x       (input) void*
378  *
379  * head_y
380  * tail_y  (input) void*
381  *
382  * eps_int (input) double
383  *         The internal machine precision.
384  *
385  * un_int  (input) double
386  *         The internal underflow threshold.
387  *
388  * test_ratio (output) float*
389  *         The ratio of computed error for r over the error bound.
390  */
391 {
392   int i, ix, iy;
393   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
394   double un_d, un_accurate, un_out;
395   double *x_i = (double *) x;
396   double *head_y_i = (double *) head_y;
397   double *tail_y_i = (double *) tail_y;
398   double *alpha_i = (double *) alpha;
399   double *beta_i = (double *) beta;
400   double *rin_i = (double *) rin;
401   double *rout_i = (double *) rout;
402   double x_ii[2];
403   double y_ii[2];
404 
405   /* Set the starting position */
406   incx *= 2;
407   incy *= 2;
408   ix = 0;
409   iy = 0;
410   if (incx < 0)
411     ix = -(n - 1) * incx;
412   if (incy < 0)
413     iy = -(n - 1) * incy;
414 
415   /* computing S */
416   S = S1 = S2 = 0.;
417   for (i = 0; i < n; ++i) {
418     x_ii[0] = x_i[ix];
419     x_ii[1] = x_i[ix + 1];
420     y_ii[0] = head_y_i[iy];
421     y_ii[1] = head_y_i[iy + 1];
422     if (conj == blas_conj)
423       x_ii[1] = -x_ii[1];
424     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
425     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
426 
427     {
428       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
429       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
430     }
431     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
432 
433     /* Now get the tail of y */
434     y_ii[0] = tail_y_i[iy];
435     y_ii[1] = tail_y_i[iy + 1];
436     {
437       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
438       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
439     }				/* prod = x[i]*y[i] */
440     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
441 
442     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
443     ix += incx;
444     iy += incy;
445   }
446   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
447   {
448     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
449     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
450   }
451   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
452 
453   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
454 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
455   S = MAX(S, un_d);
456 
457   eps_accurate = power(2, -BITS_E);
458   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
459 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
460   eps_out = power(2, -BITS_D);
461   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
462 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
463   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
464   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
465   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
466 
467   /* underflow */
468   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
469   U = MAX(U, S1 + 2 * n + 1);
470   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
471   U *= 2 * sqrt(2.);
472 
473   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
474 			+
475 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
476 						  r_true_l[1] * r_true_l[1]) +
477 			U);
478 }
479 
480   /* end of test_BLAS_zdot */
test_BLAS_cdot2_s_s(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,float * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)481 void test_BLAS_cdot2_s_s(int n, enum blas_conj_type conj, const void *alpha,
482 			 const void *beta, const void *rin, const void *rout,
483 			 double *r_true_l, double *r_true_t, float *x,
484 			 int incx, float *head_y, float *tail_y, int incy,
485 			 double eps_int, double un_int, double *test_ratio)
486 
487 /* Purpose
488  * =======
489  *
490  * Computes ratio of the computed error from DOT2 over the expected
491  * error bound.
492  *
493  * Arguments
494  * =========
495  *
496  * n       (input) int
497  *         The length of the vectors X and Y.
498  *
499  * conj    (input) enum blas_conj_type
500  *
501  * alpha   (input) const void*
502  *
503  * beta    (input) const void*
504  *
505  * rin     (input) const void*
506  *
507  * rout    (input) const void*
508  *         This result was computed by some other routine, and will be
509  *         tested by this routine by comparing it with the truth.
510  *
511  * r_true_l (input) double*
512  *         The leading part of the truth.
513  *
514  * r_true_t (input) double*
515  *         The trailing part of the truth.
516  *
517  * x       (input) float*
518  *
519  * head_y
520  * tail_y  (input) float*
521  *
522  * eps_int (input) double
523  *         The internal machine precision.
524  *
525  * un_int  (input) double
526  *         The internal underflow threshold.
527  *
528  * test_ratio (output) float*
529  *         The ratio of computed error for r over the error bound.
530  */
531 {
532   int i, ix, iy;
533   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
534   double un_d, un_accurate, un_out;
535   float *x_i = x;
536   float *head_y_i = head_y;
537   float *tail_y_i = tail_y;
538   float *alpha_i = (float *) alpha;
539   float *beta_i = (float *) beta;
540   float *rin_i = (float *) rin;
541   float *rout_i = (float *) rout;
542   float x_ii;
543   float y_ii;
544 
545   /* Set the starting position */
546 
547 
548   ix = 0;
549   iy = 0;
550   if (incx < 0)
551     ix = -(n - 1) * incx;
552   if (incy < 0)
553     iy = -(n - 1) * incy;
554 
555   /* computing S */
556   S = S1 = S2 = 0.;
557   for (i = 0; i < n; ++i) {
558     x_ii = x_i[ix];
559     y_ii = head_y_i[iy];
560     S1 += fabs(x_ii);
561     S2 += fabs(y_ii);
562 
563     prod[0] = x_ii * y_ii;
564     prod[1] = 0.0;
565     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
566 
567     /* Now get the tail of y */
568     y_ii = tail_y_i[iy];
569     prod[0] = x_ii * y_ii;
570     prod[1] = 0.0;		/* prod = x[i]*y[i] */
571     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
572 
573     S2 += fabs(y_ii);
574     ix += incx;
575     iy += incy;
576   }
577   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
578   {
579     prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
580     prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
581   }
582 
583   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
584 
585   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
586 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
587   S = MAX(S, un_d);
588 
589   eps_accurate = power(2, -BITS_E);
590   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
591 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
592   eps_out = power(2, -BITS_S);
593   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
594 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
595   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
596   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
597   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
598 
599   /* underflow */
600   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
601   U = MAX(U, S1 + 2 * n + 1);
602   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
603   U *= 2 * sqrt(2.);
604 
605   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
606 			+
607 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
608 						  r_true_l[1] * r_true_l[1]) +
609 			U);
610 }
611 
612   /* end of test_BLAS_cdot */
test_BLAS_cdot2_s_c(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,float * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)613 void test_BLAS_cdot2_s_c(int n, enum blas_conj_type conj, const void *alpha,
614 			 const void *beta, const void *rin, const void *rout,
615 			 double *r_true_l, double *r_true_t, float *x,
616 			 int incx, void *head_y, void *tail_y, int incy,
617 			 double eps_int, double un_int, double *test_ratio)
618 
619 /* Purpose
620  * =======
621  *
622  * Computes ratio of the computed error from DOT2 over the expected
623  * error bound.
624  *
625  * Arguments
626  * =========
627  *
628  * n       (input) int
629  *         The length of the vectors X and Y.
630  *
631  * conj    (input) enum blas_conj_type
632  *
633  * alpha   (input) const void*
634  *
635  * beta    (input) const void*
636  *
637  * rin     (input) const void*
638  *
639  * rout    (input) const void*
640  *         This result was computed by some other routine, and will be
641  *         tested by this routine by comparing it with the truth.
642  *
643  * r_true_l (input) double*
644  *         The leading part of the truth.
645  *
646  * r_true_t (input) double*
647  *         The trailing part of the truth.
648  *
649  * x       (input) float*
650  *
651  * head_y
652  * tail_y  (input) void*
653  *
654  * eps_int (input) double
655  *         The internal machine precision.
656  *
657  * un_int  (input) double
658  *         The internal underflow threshold.
659  *
660  * test_ratio (output) float*
661  *         The ratio of computed error for r over the error bound.
662  */
663 {
664   int i, ix, iy;
665   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
666   double un_d, un_accurate, un_out;
667   float *x_i = x;
668   float *head_y_i = (float *) head_y;
669   float *tail_y_i = (float *) tail_y;
670   float *alpha_i = (float *) alpha;
671   float *beta_i = (float *) beta;
672   float *rin_i = (float *) rin;
673   float *rout_i = (float *) rout;
674   float x_ii;
675   float y_ii[2];
676 
677   /* Set the starting position */
678 
679   incy *= 2;
680   ix = 0;
681   iy = 0;
682   if (incx < 0)
683     ix = -(n - 1) * incx;
684   if (incy < 0)
685     iy = -(n - 1) * incy;
686 
687   /* computing S */
688   S = S1 = S2 = 0.;
689   for (i = 0; i < n; ++i) {
690     x_ii = x_i[ix];
691     y_ii[0] = head_y_i[iy];
692     y_ii[1] = head_y_i[iy + 1];
693     S1 += fabs(x_ii);
694     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
695 
696     {
697       prod[0] = y_ii[0] * x_ii;
698       prod[1] = y_ii[1] * x_ii;
699     }
700     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
701 
702     /* Now get the tail of y */
703     y_ii[0] = tail_y_i[iy];
704     y_ii[1] = tail_y_i[iy + 1];
705     {
706       prod[0] = y_ii[0] * x_ii;
707       prod[1] = y_ii[1] * x_ii;
708     }				/* prod = x[i]*y[i] */
709     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
710 
711     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
712     ix += incx;
713     iy += incy;
714   }
715   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
716   {
717     prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
718     prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
719   }
720 
721   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
722 
723   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
724 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
725   S = MAX(S, un_d);
726 
727   eps_accurate = power(2, -BITS_E);
728   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
729 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
730   eps_out = power(2, -BITS_S);
731   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
732 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
733   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
734   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
735   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
736 
737   /* underflow */
738   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
739   U = MAX(U, S1 + 2 * n + 1);
740   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
741   U *= 2 * sqrt(2.);
742 
743   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
744 			+
745 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
746 						  r_true_l[1] * r_true_l[1]) +
747 			U);
748 }
749 
750   /* end of test_BLAS_cdot */
test_BLAS_cdot2_c_s(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)751 void test_BLAS_cdot2_c_s(int n, enum blas_conj_type conj, const void *alpha,
752 			 const void *beta, const void *rin, const void *rout,
753 			 double *r_true_l, double *r_true_t, void *x,
754 			 int incx, float *head_y, float *tail_y, int incy,
755 			 double eps_int, double un_int, double *test_ratio)
756 
757 /* Purpose
758  * =======
759  *
760  * Computes ratio of the computed error from DOT2 over the expected
761  * error bound.
762  *
763  * Arguments
764  * =========
765  *
766  * n       (input) int
767  *         The length of the vectors X and Y.
768  *
769  * conj    (input) enum blas_conj_type
770  *
771  * alpha   (input) const void*
772  *
773  * beta    (input) const void*
774  *
775  * rin     (input) const void*
776  *
777  * rout    (input) const void*
778  *         This result was computed by some other routine, and will be
779  *         tested by this routine by comparing it with the truth.
780  *
781  * r_true_l (input) double*
782  *         The leading part of the truth.
783  *
784  * r_true_t (input) double*
785  *         The trailing part of the truth.
786  *
787  * x       (input) void*
788  *
789  * head_y
790  * tail_y  (input) float*
791  *
792  * eps_int (input) double
793  *         The internal machine precision.
794  *
795  * un_int  (input) double
796  *         The internal underflow threshold.
797  *
798  * test_ratio (output) float*
799  *         The ratio of computed error for r over the error bound.
800  */
801 {
802   int i, ix, iy;
803   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
804   double un_d, un_accurate, un_out;
805   float *x_i = (float *) x;
806   float *head_y_i = head_y;
807   float *tail_y_i = tail_y;
808   float *alpha_i = (float *) alpha;
809   float *beta_i = (float *) beta;
810   float *rin_i = (float *) rin;
811   float *rout_i = (float *) rout;
812   float x_ii[2];
813   float y_ii;
814 
815   /* Set the starting position */
816   incx *= 2;
817 
818   ix = 0;
819   iy = 0;
820   if (incx < 0)
821     ix = -(n - 1) * incx;
822   if (incy < 0)
823     iy = -(n - 1) * incy;
824 
825   /* computing S */
826   S = S1 = S2 = 0.;
827   for (i = 0; i < n; ++i) {
828     x_ii[0] = x_i[ix];
829     x_ii[1] = x_i[ix + 1];
830     y_ii = head_y_i[iy];
831     if (conj == blas_conj)
832       x_ii[1] = -x_ii[1];
833     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
834     S2 += fabs(y_ii);
835 
836     {
837       prod[0] = x_ii[0] * y_ii;
838       prod[1] = x_ii[1] * y_ii;
839     }
840     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
841 
842     /* Now get the tail of y */
843     y_ii = tail_y_i[iy];
844     {
845       prod[0] = x_ii[0] * y_ii;
846       prod[1] = x_ii[1] * y_ii;
847     }				/* prod = x[i]*y[i] */
848     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
849 
850     S2 += fabs(y_ii);
851     ix += incx;
852     iy += incy;
853   }
854   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
855   {
856     prod[0] = beta_i[0] * rin_i[0] - beta_i[1] * rin_i[1];
857     prod[1] = beta_i[0] * rin_i[1] + beta_i[1] * rin_i[0];
858   }
859 
860   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
861 
862   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
863 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
864   S = MAX(S, un_d);
865 
866   eps_accurate = power(2, -BITS_E);
867   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
868 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
869   eps_out = power(2, -BITS_S);
870   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
871 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
872   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
873   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
874   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
875 
876   /* underflow */
877   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
878   U = MAX(U, S1 + 2 * n + 1);
879   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
880   U *= 2 * sqrt(2.);
881 
882   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
883 			+
884 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
885 						  r_true_l[1] * r_true_l[1]) +
886 			U);
887 }
888 
889   /* end of test_BLAS_cdot */
test_BLAS_zdot2_d_d(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,double * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)890 void test_BLAS_zdot2_d_d(int n, enum blas_conj_type conj, const void *alpha,
891 			 const void *beta, const void *rin, const void *rout,
892 			 double *r_true_l, double *r_true_t, double *x,
893 			 int incx, double *head_y, double *tail_y, int incy,
894 			 double eps_int, double un_int, double *test_ratio)
895 
896 /* Purpose
897  * =======
898  *
899  * Computes ratio of the computed error from DOT2 over the expected
900  * error bound.
901  *
902  * Arguments
903  * =========
904  *
905  * n       (input) int
906  *         The length of the vectors X and Y.
907  *
908  * conj    (input) enum blas_conj_type
909  *
910  * alpha   (input) const void*
911  *
912  * beta    (input) const void*
913  *
914  * rin     (input) const void*
915  *
916  * rout    (input) const void*
917  *         This result was computed by some other routine, and will be
918  *         tested by this routine by comparing it with the truth.
919  *
920  * r_true_l (input) double*
921  *         The leading part of the truth.
922  *
923  * r_true_t (input) double*
924  *         The trailing part of the truth.
925  *
926  * x       (input) double*
927  *
928  * head_y
929  * tail_y  (input) double*
930  *
931  * eps_int (input) double
932  *         The internal machine precision.
933  *
934  * un_int  (input) double
935  *         The internal underflow threshold.
936  *
937  * test_ratio (output) float*
938  *         The ratio of computed error for r over the error bound.
939  */
940 {
941   int i, ix, iy;
942   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
943   double un_d, un_accurate, un_out;
944   double *x_i = x;
945   double *head_y_i = head_y;
946   double *tail_y_i = tail_y;
947   double *alpha_i = (double *) alpha;
948   double *beta_i = (double *) beta;
949   double *rin_i = (double *) rin;
950   double *rout_i = (double *) rout;
951   double x_ii;
952   double y_ii;
953 
954   /* Set the starting position */
955 
956 
957   ix = 0;
958   iy = 0;
959   if (incx < 0)
960     ix = -(n - 1) * incx;
961   if (incy < 0)
962     iy = -(n - 1) * incy;
963 
964   /* computing S */
965   S = S1 = S2 = 0.;
966   for (i = 0; i < n; ++i) {
967     x_ii = x_i[ix];
968     y_ii = head_y_i[iy];
969     S1 += fabs(x_ii);
970     S2 += fabs(y_ii);
971 
972     prod[0] = x_ii * y_ii;
973     prod[1] = 0.0;
974     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
975 
976     /* Now get the tail of y */
977     y_ii = tail_y_i[iy];
978     prod[0] = x_ii * y_ii;
979     prod[1] = 0.0;		/* prod = x[i]*y[i] */
980     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
981 
982     S2 += fabs(y_ii);
983     ix += incx;
984     iy += incy;
985   }
986   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
987   {
988     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
989     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
990   }
991   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
992 
993   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
994 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
995   S = MAX(S, un_d);
996 
997   eps_accurate = power(2, -BITS_E);
998   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
999 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1000   eps_out = power(2, -BITS_D);
1001   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1002 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1003   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1004   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1005   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1006 
1007   /* underflow */
1008   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1009   U = MAX(U, S1 + 2 * n + 1);
1010   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1011   U *= 2 * sqrt(2.);
1012 
1013   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1014 			+
1015 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1016 						  r_true_l[1] * r_true_l[1]) +
1017 			U);
1018 }
1019 
1020   /* end of test_BLAS_zdot */
test_BLAS_zdot2_d_z(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,double * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1021 void test_BLAS_zdot2_d_z(int n, enum blas_conj_type conj, const void *alpha,
1022 			 const void *beta, const void *rin, const void *rout,
1023 			 double *r_true_l, double *r_true_t, double *x,
1024 			 int incx, void *head_y, void *tail_y, int incy,
1025 			 double eps_int, double un_int, double *test_ratio)
1026 
1027 /* Purpose
1028  * =======
1029  *
1030  * Computes ratio of the computed error from DOT2 over the expected
1031  * error bound.
1032  *
1033  * Arguments
1034  * =========
1035  *
1036  * n       (input) int
1037  *         The length of the vectors X and Y.
1038  *
1039  * conj    (input) enum blas_conj_type
1040  *
1041  * alpha   (input) const void*
1042  *
1043  * beta    (input) const void*
1044  *
1045  * rin     (input) const void*
1046  *
1047  * rout    (input) const void*
1048  *         This result was computed by some other routine, and will be
1049  *         tested by this routine by comparing it with the truth.
1050  *
1051  * r_true_l (input) double*
1052  *         The leading part of the truth.
1053  *
1054  * r_true_t (input) double*
1055  *         The trailing part of the truth.
1056  *
1057  * x       (input) double*
1058  *
1059  * head_y
1060  * tail_y  (input) void*
1061  *
1062  * eps_int (input) double
1063  *         The internal machine precision.
1064  *
1065  * un_int  (input) double
1066  *         The internal underflow threshold.
1067  *
1068  * test_ratio (output) float*
1069  *         The ratio of computed error for r over the error bound.
1070  */
1071 {
1072   int i, ix, iy;
1073   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1074   double un_d, un_accurate, un_out;
1075   double *x_i = x;
1076   double *head_y_i = (double *) head_y;
1077   double *tail_y_i = (double *) tail_y;
1078   double *alpha_i = (double *) alpha;
1079   double *beta_i = (double *) beta;
1080   double *rin_i = (double *) rin;
1081   double *rout_i = (double *) rout;
1082   double x_ii;
1083   double y_ii[2];
1084 
1085   /* Set the starting position */
1086 
1087   incy *= 2;
1088   ix = 0;
1089   iy = 0;
1090   if (incx < 0)
1091     ix = -(n - 1) * incx;
1092   if (incy < 0)
1093     iy = -(n - 1) * incy;
1094 
1095   /* computing S */
1096   S = S1 = S2 = 0.;
1097   for (i = 0; i < n; ++i) {
1098     x_ii = x_i[ix];
1099     y_ii[0] = head_y_i[iy];
1100     y_ii[1] = head_y_i[iy + 1];
1101     S1 += fabs(x_ii);
1102     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1103 
1104     {
1105       prod[0] = y_ii[0] * x_ii;
1106       prod[1] = y_ii[1] * x_ii;
1107     }
1108     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1109 
1110     /* Now get the tail of y */
1111     y_ii[0] = tail_y_i[iy];
1112     y_ii[1] = tail_y_i[iy + 1];
1113     {
1114       prod[0] = y_ii[0] * x_ii;
1115       prod[1] = y_ii[1] * x_ii;
1116     }				/* prod = x[i]*y[i] */
1117     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1118 
1119     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1120     ix += incx;
1121     iy += incy;
1122   }
1123   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1124   {
1125     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1126     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1127   }
1128   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1129 
1130   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1131 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1132   S = MAX(S, un_d);
1133 
1134   eps_accurate = power(2, -BITS_E);
1135   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1136 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1137   eps_out = power(2, -BITS_D);
1138   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1139 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1140   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1141   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1142   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1143 
1144   /* underflow */
1145   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1146   U = MAX(U, S1 + 2 * n + 1);
1147   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1148   U *= 2 * sqrt(2.);
1149 
1150   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1151 			+
1152 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1153 						  r_true_l[1] * r_true_l[1]) +
1154 			U);
1155 }
1156 
1157   /* end of test_BLAS_zdot */
test_BLAS_zdot2_z_d(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1158 void test_BLAS_zdot2_z_d(int n, enum blas_conj_type conj, const void *alpha,
1159 			 const void *beta, const void *rin, const void *rout,
1160 			 double *r_true_l, double *r_true_t, void *x,
1161 			 int incx, double *head_y, double *tail_y, int incy,
1162 			 double eps_int, double un_int, double *test_ratio)
1163 
1164 /* Purpose
1165  * =======
1166  *
1167  * Computes ratio of the computed error from DOT2 over the expected
1168  * error bound.
1169  *
1170  * Arguments
1171  * =========
1172  *
1173  * n       (input) int
1174  *         The length of the vectors X and Y.
1175  *
1176  * conj    (input) enum blas_conj_type
1177  *
1178  * alpha   (input) const void*
1179  *
1180  * beta    (input) const void*
1181  *
1182  * rin     (input) const void*
1183  *
1184  * rout    (input) const void*
1185  *         This result was computed by some other routine, and will be
1186  *         tested by this routine by comparing it with the truth.
1187  *
1188  * r_true_l (input) double*
1189  *         The leading part of the truth.
1190  *
1191  * r_true_t (input) double*
1192  *         The trailing part of the truth.
1193  *
1194  * x       (input) void*
1195  *
1196  * head_y
1197  * tail_y  (input) double*
1198  *
1199  * eps_int (input) double
1200  *         The internal machine precision.
1201  *
1202  * un_int  (input) double
1203  *         The internal underflow threshold.
1204  *
1205  * test_ratio (output) float*
1206  *         The ratio of computed error for r over the error bound.
1207  */
1208 {
1209   int i, ix, iy;
1210   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1211   double un_d, un_accurate, un_out;
1212   double *x_i = (double *) x;
1213   double *head_y_i = head_y;
1214   double *tail_y_i = tail_y;
1215   double *alpha_i = (double *) alpha;
1216   double *beta_i = (double *) beta;
1217   double *rin_i = (double *) rin;
1218   double *rout_i = (double *) rout;
1219   double x_ii[2];
1220   double y_ii;
1221 
1222   /* Set the starting position */
1223   incx *= 2;
1224 
1225   ix = 0;
1226   iy = 0;
1227   if (incx < 0)
1228     ix = -(n - 1) * incx;
1229   if (incy < 0)
1230     iy = -(n - 1) * incy;
1231 
1232   /* computing S */
1233   S = S1 = S2 = 0.;
1234   for (i = 0; i < n; ++i) {
1235     x_ii[0] = x_i[ix];
1236     x_ii[1] = x_i[ix + 1];
1237     y_ii = head_y_i[iy];
1238     if (conj == blas_conj)
1239       x_ii[1] = -x_ii[1];
1240     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1241     S2 += fabs(y_ii);
1242 
1243     {
1244       prod[0] = x_ii[0] * y_ii;
1245       prod[1] = x_ii[1] * y_ii;
1246     }
1247     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1248 
1249     /* Now get the tail of y */
1250     y_ii = tail_y_i[iy];
1251     {
1252       prod[0] = x_ii[0] * y_ii;
1253       prod[1] = x_ii[1] * y_ii;
1254     }				/* prod = x[i]*y[i] */
1255     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1256 
1257     S2 += fabs(y_ii);
1258     ix += incx;
1259     iy += incy;
1260   }
1261   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1262   {
1263     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1264     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1265   }
1266   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1267 
1268   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1269 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1270   S = MAX(S, un_d);
1271 
1272   eps_accurate = power(2, -BITS_E);
1273   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1274 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1275   eps_out = power(2, -BITS_D);
1276   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1277 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1278   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1279   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1280   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1281 
1282   /* underflow */
1283   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1284   U = MAX(U, S1 + 2 * n + 1);
1285   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1286   U *= 2 * sqrt(2.);
1287 
1288   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1289 			+
1290 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1291 						  r_true_l[1] * r_true_l[1]) +
1292 			U);
1293 }
1294 
1295   /* end of test_BLAS_zdot */
test_BLAS_ddot2_s_s(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,float * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1296 void test_BLAS_ddot2_s_s(int n, enum blas_conj_type conj, double alpha,
1297 			 double beta, double rin, double rout,
1298 			 double r_true_l, double r_true_t, float *x, int incx,
1299 			 float *head_y, float *tail_y, int incy,
1300 			 double eps_int, double un_int, double *test_ratio)
1301 
1302 /* Purpose
1303  * =======
1304  *
1305  * Computes ratio of the computed error from DOT2 over the expected
1306  * error bound.
1307  *
1308  * Arguments
1309  * =========
1310  *
1311  * n       (input) int
1312  *         The length of the vectors X and Y.
1313  *
1314  * conj    (input) enum blas_conj_type
1315  *
1316  * alpha   (input) double
1317  *
1318  * beta    (input) double
1319  *
1320  * rin     (input) double
1321  *
1322  * rout    (input) double
1323  *         This result was computed by some other routine, and will be
1324  *         tested by this routine by comparing it with the truth.
1325  *
1326  * r_true_l (input) double
1327  *         The leading part of the truth.
1328  *
1329  * r_true_t (input) double
1330  *         The trailing part of the truth.
1331  *
1332  * x       (input) float*
1333  *
1334  * head_y
1335  * tail_y  (input) float*
1336  *
1337  * eps_int (input) double
1338  *         The internal machine precision.
1339  *
1340  * un_int  (input) double
1341  *         The internal underflow threshold.
1342  *
1343  * test_ratio (output) float*
1344  *         The ratio of computed error for r over the error bound.
1345  */
1346 {
1347   int i, ix, iy;
1348   double eps_accurate, eps_out, tmp1, S, S1, S2, U;
1349   double un_d, un_accurate, un_out;
1350 
1351   /* Set the starting position */
1352   ix = 0;
1353   iy = 0;
1354   if (incx < 0)
1355     ix = -(n - 1) * incx;
1356   if (incy < 0)
1357     iy = -(n - 1) * incy;
1358 
1359   /* computing S */
1360   S = S1 = S2 = 0.;
1361   for (i = 0; i < n; ++i) {
1362     S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
1363     S1 += fabs(x[ix]);
1364     S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
1365     ix += incx;
1366     iy += incy;
1367   }
1368   S *= fabs(alpha);
1369   S += fabs(beta * rin);
1370 
1371   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1372 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1373   S = MAX(S, un_d);
1374 
1375   eps_accurate = power(2, -BITS_E);
1376   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1377 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1378   eps_out = power(2, -BITS_D);
1379   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1380 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1381   tmp1 = fabs((rout - r_true_l) - r_true_t);
1382 
1383   /* underflow */
1384   U = 2 * fabs(alpha) * n + 3;
1385   U = MAX(U, S1 + 2 * n + 1);
1386   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1387 
1388   *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
1389 			+ eps_out * fabs(r_true_l) + U);
1390 }				/* end of test_BLAS_ddot */
test_BLAS_ddot2_s_d(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,float * x,int incx,double * head_y,double * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1391 void test_BLAS_ddot2_s_d(int n, enum blas_conj_type conj, double alpha,
1392 			 double beta, double rin, double rout,
1393 			 double r_true_l, double r_true_t, float *x, int incx,
1394 			 double *head_y, double *tail_y, int incy,
1395 			 double eps_int, double un_int, double *test_ratio)
1396 
1397 /* Purpose
1398  * =======
1399  *
1400  * Computes ratio of the computed error from DOT2 over the expected
1401  * error bound.
1402  *
1403  * Arguments
1404  * =========
1405  *
1406  * n       (input) int
1407  *         The length of the vectors X and Y.
1408  *
1409  * conj    (input) enum blas_conj_type
1410  *
1411  * alpha   (input) double
1412  *
1413  * beta    (input) double
1414  *
1415  * rin     (input) double
1416  *
1417  * rout    (input) double
1418  *         This result was computed by some other routine, and will be
1419  *         tested by this routine by comparing it with the truth.
1420  *
1421  * r_true_l (input) double
1422  *         The leading part of the truth.
1423  *
1424  * r_true_t (input) double
1425  *         The trailing part of the truth.
1426  *
1427  * x       (input) float*
1428  *
1429  * head_y
1430  * tail_y  (input) double*
1431  *
1432  * eps_int (input) double
1433  *         The internal machine precision.
1434  *
1435  * un_int  (input) double
1436  *         The internal underflow threshold.
1437  *
1438  * test_ratio (output) float*
1439  *         The ratio of computed error for r over the error bound.
1440  */
1441 {
1442   int i, ix, iy;
1443   double eps_accurate, eps_out, tmp1, S, S1, S2, U;
1444   double un_d, un_accurate, un_out;
1445 
1446   /* Set the starting position */
1447   ix = 0;
1448   iy = 0;
1449   if (incx < 0)
1450     ix = -(n - 1) * incx;
1451   if (incy < 0)
1452     iy = -(n - 1) * incy;
1453 
1454   /* computing S */
1455   S = S1 = S2 = 0.;
1456   for (i = 0; i < n; ++i) {
1457     S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
1458     S1 += fabs(x[ix]);
1459     S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
1460     ix += incx;
1461     iy += incy;
1462   }
1463   S *= fabs(alpha);
1464   S += fabs(beta * rin);
1465 
1466   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1467 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1468   S = MAX(S, un_d);
1469 
1470   eps_accurate = power(2, -BITS_E);
1471   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1472 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1473   eps_out = power(2, -BITS_D);
1474   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1475 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1476   tmp1 = fabs((rout - r_true_l) - r_true_t);
1477 
1478   /* underflow */
1479   U = 2 * fabs(alpha) * n + 3;
1480   U = MAX(U, S1 + 2 * n + 1);
1481   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1482 
1483   *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
1484 			+ eps_out * fabs(r_true_l) + U);
1485 }				/* end of test_BLAS_ddot */
test_BLAS_ddot2_d_s(int n,enum blas_conj_type conj,double alpha,double beta,double rin,double rout,double r_true_l,double r_true_t,double * x,int incx,float * head_y,float * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1486 void test_BLAS_ddot2_d_s(int n, enum blas_conj_type conj, double alpha,
1487 			 double beta, double rin, double rout,
1488 			 double r_true_l, double r_true_t, double *x,
1489 			 int incx, float *head_y, float *tail_y, int incy,
1490 			 double eps_int, double un_int, double *test_ratio)
1491 
1492 /* Purpose
1493  * =======
1494  *
1495  * Computes ratio of the computed error from DOT2 over the expected
1496  * error bound.
1497  *
1498  * Arguments
1499  * =========
1500  *
1501  * n       (input) int
1502  *         The length of the vectors X and Y.
1503  *
1504  * conj    (input) enum blas_conj_type
1505  *
1506  * alpha   (input) double
1507  *
1508  * beta    (input) double
1509  *
1510  * rin     (input) double
1511  *
1512  * rout    (input) double
1513  *         This result was computed by some other routine, and will be
1514  *         tested by this routine by comparing it with the truth.
1515  *
1516  * r_true_l (input) double
1517  *         The leading part of the truth.
1518  *
1519  * r_true_t (input) double
1520  *         The trailing part of the truth.
1521  *
1522  * x       (input) double*
1523  *
1524  * head_y
1525  * tail_y  (input) float*
1526  *
1527  * eps_int (input) double
1528  *         The internal machine precision.
1529  *
1530  * un_int  (input) double
1531  *         The internal underflow threshold.
1532  *
1533  * test_ratio (output) float*
1534  *         The ratio of computed error for r over the error bound.
1535  */
1536 {
1537   int i, ix, iy;
1538   double eps_accurate, eps_out, tmp1, S, S1, S2, U;
1539   double un_d, un_accurate, un_out;
1540 
1541   /* Set the starting position */
1542   ix = 0;
1543   iy = 0;
1544   if (incx < 0)
1545     ix = -(n - 1) * incx;
1546   if (incy < 0)
1547     iy = -(n - 1) * incy;
1548 
1549   /* computing S */
1550   S = S1 = S2 = 0.;
1551   for (i = 0; i < n; ++i) {
1552     S += fabs(x[ix] * head_y[iy]) + fabs(x[ix] * tail_y[iy]);
1553     S1 += fabs(x[ix]);
1554     S2 += fabs(head_y[iy]) + fabs(tail_y[iy]);
1555     ix += incx;
1556     iy += incy;
1557   }
1558   S *= fabs(alpha);
1559   S += fabs(beta * rin);
1560 
1561   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1562 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1563   S = MAX(S, un_d);
1564 
1565   eps_accurate = power(2, -BITS_E);
1566   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1567 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1568   eps_out = power(2, -BITS_D);
1569   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1570 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1571   tmp1 = fabs((rout - r_true_l) - r_true_t);
1572 
1573   /* underflow */
1574   U = 2 * fabs(alpha) * n + 3;
1575   U = MAX(U, S1 + 2 * n + 1);
1576   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1577 
1578   *test_ratio = tmp1 / ((n + 2) * (eps_int + eps_accurate) * S
1579 			+ eps_out * fabs(r_true_l) + U);
1580 }				/* end of test_BLAS_ddot */
test_BLAS_zdot2_c_c(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1581 void test_BLAS_zdot2_c_c(int n, enum blas_conj_type conj, const void *alpha,
1582 			 const void *beta, const void *rin, const void *rout,
1583 			 double *r_true_l, double *r_true_t, void *x,
1584 			 int incx, void *head_y, void *tail_y, int incy,
1585 			 double eps_int, double un_int, double *test_ratio)
1586 
1587 /* Purpose
1588  * =======
1589  *
1590  * Computes ratio of the computed error from DOT2 over the expected
1591  * error bound.
1592  *
1593  * Arguments
1594  * =========
1595  *
1596  * n       (input) int
1597  *         The length of the vectors X and Y.
1598  *
1599  * conj    (input) enum blas_conj_type
1600  *
1601  * alpha   (input) const void*
1602  *
1603  * beta    (input) const void*
1604  *
1605  * rin     (input) const void*
1606  *
1607  * rout    (input) const void*
1608  *         This result was computed by some other routine, and will be
1609  *         tested by this routine by comparing it with the truth.
1610  *
1611  * r_true_l (input) double*
1612  *         The leading part of the truth.
1613  *
1614  * r_true_t (input) double*
1615  *         The trailing part of the truth.
1616  *
1617  * x       (input) void*
1618  *
1619  * head_y
1620  * tail_y  (input) void*
1621  *
1622  * eps_int (input) double
1623  *         The internal machine precision.
1624  *
1625  * un_int  (input) double
1626  *         The internal underflow threshold.
1627  *
1628  * test_ratio (output) float*
1629  *         The ratio of computed error for r over the error bound.
1630  */
1631 {
1632   int i, ix, iy;
1633   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1634   double un_d, un_accurate, un_out;
1635   float *x_i = (float *) x;
1636   float *head_y_i = (float *) head_y;
1637   float *tail_y_i = (float *) tail_y;
1638   double *alpha_i = (double *) alpha;
1639   double *beta_i = (double *) beta;
1640   double *rin_i = (double *) rin;
1641   double *rout_i = (double *) rout;
1642   float x_ii[2];
1643   float y_ii[2];
1644 
1645   /* Set the starting position */
1646   incx *= 2;
1647   incy *= 2;
1648   ix = 0;
1649   iy = 0;
1650   if (incx < 0)
1651     ix = -(n - 1) * incx;
1652   if (incy < 0)
1653     iy = -(n - 1) * incy;
1654 
1655   /* computing S */
1656   S = S1 = S2 = 0.;
1657   for (i = 0; i < n; ++i) {
1658     x_ii[0] = x_i[ix];
1659     x_ii[1] = x_i[ix + 1];
1660     y_ii[0] = head_y_i[iy];
1661     y_ii[1] = head_y_i[iy + 1];
1662     if (conj == blas_conj)
1663       x_ii[1] = -x_ii[1];
1664     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1665     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1666 
1667     {
1668       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1669       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1670     }
1671     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1672 
1673     /* Now get the tail of y */
1674     y_ii[0] = tail_y_i[iy];
1675     y_ii[1] = tail_y_i[iy + 1];
1676     {
1677       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1678       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1679     }				/* prod = x[i]*y[i] */
1680     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1681 
1682     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1683     ix += incx;
1684     iy += incy;
1685   }
1686   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1687   {
1688     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1689     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1690   }
1691   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1692 
1693   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1694 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1695   S = MAX(S, un_d);
1696 
1697   eps_accurate = power(2, -BITS_E);
1698   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1699 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1700   eps_out = power(2, -BITS_D);
1701   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1702 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1703   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1704   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1705   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1706 
1707   /* underflow */
1708   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1709   U = MAX(U, S1 + 2 * n + 1);
1710   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1711   U *= 2 * sqrt(2.);
1712 
1713   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1714 			+
1715 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1716 						  r_true_l[1] * r_true_l[1]) +
1717 			U);
1718 }
1719 
1720   /* end of test_BLAS_zdot */
test_BLAS_zdot2_c_z(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1721 void test_BLAS_zdot2_c_z(int n, enum blas_conj_type conj, const void *alpha,
1722 			 const void *beta, const void *rin, const void *rout,
1723 			 double *r_true_l, double *r_true_t, void *x,
1724 			 int incx, void *head_y, void *tail_y, int incy,
1725 			 double eps_int, double un_int, double *test_ratio)
1726 
1727 /* Purpose
1728  * =======
1729  *
1730  * Computes ratio of the computed error from DOT2 over the expected
1731  * error bound.
1732  *
1733  * Arguments
1734  * =========
1735  *
1736  * n       (input) int
1737  *         The length of the vectors X and Y.
1738  *
1739  * conj    (input) enum blas_conj_type
1740  *
1741  * alpha   (input) const void*
1742  *
1743  * beta    (input) const void*
1744  *
1745  * rin     (input) const void*
1746  *
1747  * rout    (input) const void*
1748  *         This result was computed by some other routine, and will be
1749  *         tested by this routine by comparing it with the truth.
1750  *
1751  * r_true_l (input) double*
1752  *         The leading part of the truth.
1753  *
1754  * r_true_t (input) double*
1755  *         The trailing part of the truth.
1756  *
1757  * x       (input) void*
1758  *
1759  * head_y
1760  * tail_y  (input) void*
1761  *
1762  * eps_int (input) double
1763  *         The internal machine precision.
1764  *
1765  * un_int  (input) double
1766  *         The internal underflow threshold.
1767  *
1768  * test_ratio (output) float*
1769  *         The ratio of computed error for r over the error bound.
1770  */
1771 {
1772   int i, ix, iy;
1773   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1774   double un_d, un_accurate, un_out;
1775   float *x_i = (float *) x;
1776   double *head_y_i = (double *) head_y;
1777   double *tail_y_i = (double *) tail_y;
1778   double *alpha_i = (double *) alpha;
1779   double *beta_i = (double *) beta;
1780   double *rin_i = (double *) rin;
1781   double *rout_i = (double *) rout;
1782   float x_ii[2];
1783   double y_ii[2];
1784 
1785   /* Set the starting position */
1786   incx *= 2;
1787   incy *= 2;
1788   ix = 0;
1789   iy = 0;
1790   if (incx < 0)
1791     ix = -(n - 1) * incx;
1792   if (incy < 0)
1793     iy = -(n - 1) * incy;
1794 
1795   /* computing S */
1796   S = S1 = S2 = 0.;
1797   for (i = 0; i < n; ++i) {
1798     x_ii[0] = x_i[ix];
1799     x_ii[1] = x_i[ix + 1];
1800     y_ii[0] = head_y_i[iy];
1801     y_ii[1] = head_y_i[iy + 1];
1802     if (conj == blas_conj)
1803       x_ii[1] = -x_ii[1];
1804     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1805     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1806 
1807     {
1808       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1809       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1810     }
1811     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1812 
1813     /* Now get the tail of y */
1814     y_ii[0] = tail_y_i[iy];
1815     y_ii[1] = tail_y_i[iy + 1];
1816     {
1817       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1818       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1819     }				/* prod = x[i]*y[i] */
1820     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1821 
1822     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1823     ix += incx;
1824     iy += incy;
1825   }
1826   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1827   {
1828     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1829     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1830   }
1831   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1832 
1833   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1834 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1835   S = MAX(S, un_d);
1836 
1837   eps_accurate = power(2, -BITS_E);
1838   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1839 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1840   eps_out = power(2, -BITS_D);
1841   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1842 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1843   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1844   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1845   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1846 
1847   /* underflow */
1848   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1849   U = MAX(U, S1 + 2 * n + 1);
1850   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1851   U *= 2 * sqrt(2.);
1852 
1853   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1854 			+
1855 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1856 						  r_true_l[1] * r_true_l[1]) +
1857 			U);
1858 }
1859 
1860   /* end of test_BLAS_zdot */
test_BLAS_zdot2_z_c(int n,enum blas_conj_type conj,const void * alpha,const void * beta,const void * rin,const void * rout,double * r_true_l,double * r_true_t,void * x,int incx,void * head_y,void * tail_y,int incy,double eps_int,double un_int,double * test_ratio)1861 void test_BLAS_zdot2_z_c(int n, enum blas_conj_type conj, const void *alpha,
1862 			 const void *beta, const void *rin, const void *rout,
1863 			 double *r_true_l, double *r_true_t, void *x,
1864 			 int incx, void *head_y, void *tail_y, int incy,
1865 			 double eps_int, double un_int, double *test_ratio)
1866 
1867 /* Purpose
1868  * =======
1869  *
1870  * Computes ratio of the computed error from DOT2 over the expected
1871  * error bound.
1872  *
1873  * Arguments
1874  * =========
1875  *
1876  * n       (input) int
1877  *         The length of the vectors X and Y.
1878  *
1879  * conj    (input) enum blas_conj_type
1880  *
1881  * alpha   (input) const void*
1882  *
1883  * beta    (input) const void*
1884  *
1885  * rin     (input) const void*
1886  *
1887  * rout    (input) const void*
1888  *         This result was computed by some other routine, and will be
1889  *         tested by this routine by comparing it with the truth.
1890  *
1891  * r_true_l (input) double*
1892  *         The leading part of the truth.
1893  *
1894  * r_true_t (input) double*
1895  *         The trailing part of the truth.
1896  *
1897  * x       (input) void*
1898  *
1899  * head_y
1900  * tail_y  (input) void*
1901  *
1902  * eps_int (input) double
1903  *         The internal machine precision.
1904  *
1905  * un_int  (input) double
1906  *         The internal underflow threshold.
1907  *
1908  * test_ratio (output) float*
1909  *         The ratio of computed error for r over the error bound.
1910  */
1911 {
1912   int i, ix, iy;
1913   double eps_accurate, eps_out, tmp1, S, S1, S2, U, prod[2], tmp[2];
1914   double un_d, un_accurate, un_out;
1915   double *x_i = (double *) x;
1916   float *head_y_i = (float *) head_y;
1917   float *tail_y_i = (float *) tail_y;
1918   double *alpha_i = (double *) alpha;
1919   double *beta_i = (double *) beta;
1920   double *rin_i = (double *) rin;
1921   double *rout_i = (double *) rout;
1922   double x_ii[2];
1923   float y_ii[2];
1924 
1925   /* Set the starting position */
1926   incx *= 2;
1927   incy *= 2;
1928   ix = 0;
1929   iy = 0;
1930   if (incx < 0)
1931     ix = -(n - 1) * incx;
1932   if (incy < 0)
1933     iy = -(n - 1) * incy;
1934 
1935   /* computing S */
1936   S = S1 = S2 = 0.;
1937   for (i = 0; i < n; ++i) {
1938     x_ii[0] = x_i[ix];
1939     x_ii[1] = x_i[ix + 1];
1940     y_ii[0] = head_y_i[iy];
1941     y_ii[1] = head_y_i[iy + 1];
1942     if (conj == blas_conj)
1943       x_ii[1] = -x_ii[1];
1944     S1 += sqrt(x_ii[0] * x_ii[0] + x_ii[1] * x_ii[1]);
1945     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1946 
1947     {
1948       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1949       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1950     }
1951     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1952 
1953     /* Now get the tail of y */
1954     y_ii[0] = tail_y_i[iy];
1955     y_ii[1] = tail_y_i[iy + 1];
1956     {
1957       prod[0] = (double) x_ii[0] * y_ii[0] - (double) x_ii[1] * y_ii[1];
1958       prod[1] = (double) x_ii[0] * y_ii[1] + (double) x_ii[1] * y_ii[0];
1959     }				/* prod = x[i]*y[i] */
1960     S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1961 
1962     S2 += sqrt(y_ii[0] * y_ii[0] + y_ii[1] * y_ii[1]);
1963     ix += incx;
1964     iy += incy;
1965   }
1966   S *= sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]);
1967   {
1968     prod[0] = (double) beta_i[0] * rin_i[0] - (double) beta_i[1] * rin_i[1];
1969     prod[1] = (double) beta_i[0] * rin_i[1] + (double) beta_i[1] * rin_i[0];
1970   }
1971   S += sqrt(prod[0] * prod[0] + prod[1] * prod[1]);
1972 
1973   un_d = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1974 	     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1975   S = MAX(S, un_d);
1976 
1977   eps_accurate = power(2, -BITS_E);
1978   un_accurate = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
1979 		    (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
1980   eps_out = power(2, -BITS_D);
1981   un_out = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1982 	       (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1983   tmp[0] = (rout_i[0] - r_true_l[0]) - r_true_t[0];
1984   tmp[1] = (rout_i[1] - r_true_l[1]) - r_true_t[1];
1985   tmp1 = sqrt(tmp[0] * tmp[0] + tmp[1] * tmp[1]);
1986 
1987   /* underflow */
1988   U = 2 * sqrt(alpha_i[0] * alpha_i[0] + alpha_i[1] * alpha_i[1]) * n + 3;
1989   U = MAX(U, S1 + 2 * n + 1);
1990   U = MAX(U, S2 + 2 * n + 1) * (un_int + un_accurate) + un_out;
1991   U *= 2 * sqrt(2.);
1992 
1993   *test_ratio = tmp1 / (2 * sqrt(2.) * (n + 2) * (eps_int + eps_accurate) * S
1994 			+
1995 			sqrt(2.) * eps_out * sqrt(r_true_l[0] * r_true_l[0] +
1996 						  r_true_l[1] * r_true_l[1]) +
1997 			U);
1998 }
1999 
2000   /* end of test_BLAS_zdot */
2001