1 /*
2  * -- High Performance Computing Linpack Benchmark (HPL)
3  *    HPL - 2.3 - December 2, 2018
4  *    Antoine P. Petitet
5  *    University of Tennessee, Knoxville
6  *    Innovative Computing Laboratory
7  *    (C) Copyright 2000-2008 All Rights Reserved
8  *
9  * -- Copyright notice and Licensing terms:
10  *
11  * Redistribution  and  use in  source and binary forms, with or without
12  * modification, are  permitted provided  that the following  conditions
13  * are met:
14  *
15  * 1. Redistributions  of  source  code  must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce  the above copyright
19  * notice, this list of conditions,  and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * 3. All  advertising  materials  mentioning  features  or  use of this
23  * software must display the following acknowledgement:
24  * This  product  includes  software  developed  at  the  University  of
25  * Tennessee, Knoxville, Innovative Computing Laboratory.
26  *
27  * 4. The name of the  University,  the name of the  Laboratory,  or the
28  * names  of  its  contributors  may  not  be used to endorse or promote
29  * products  derived   from   this  software  without  specific  written
30  * permission.
31  *
32  * -- Disclaimer:
33  *
34  * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38  * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39  * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  * ---------------------------------------------------------------------
46  */
47 /*
48  * Include files
49  */
50 #include "hpl.h"
51 /*
52  * ---------------------------------------------------------------------
53  * Static function prototypes
54  * ---------------------------------------------------------------------
55  */
56 static void     HPL_dlamc1
57 STDC_ARGS(
58 (  int *,           int *,           int *,           int * ) );
59 static void     HPL_dlamc2
60 STDC_ARGS(
61 (  int *,           int *,           int *,           double *,
62    int *,           double *,        int *,           double * ) );
63 static double   HPL_dlamc3
64 STDC_ARGS(
65 (  const double,    const double ) );
66 static void     HPL_dlamc4
67 STDC_ARGS(
68 (  int *,           const double,    const int ) );
69 static void     HPL_dlamc5
70 STDC_ARGS(
71 (  const int,       const int,       const int,       const int,
72    int *,           double * ) );
73 static double   HPL_dipow
74 STDC_ARGS(
75 (  const double,    const int ) );
76 
77 #ifdef STDC_HEADERS
HPL_dlamch(const HPL_T_MACH CMACH)78 double HPL_dlamch
79 (
80    const HPL_T_MACH                 CMACH
81 )
82 #else
83 double HPL_dlamch
84 ( CMACH )
85    const HPL_T_MACH                 CMACH;
86 #endif
87 {
88 /*
89  * Purpose
90  * =======
91  *
92  * HPL_dlamch determines  machine-specific  arithmetic constants such as
93  * the relative machine precision  (eps),  the safe minimum (sfmin) such
94  * that 1 / sfmin does not overflow, the base of the machine (base), the
95  * precision (prec), the  number of (base) digits  in the  mantissa (t),
96  * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise),  the
97  * minimum exponent before  (gradual)  underflow (emin),  the  underflow
98  * threshold (rmin) base**(emin-1), the largest exponent before overflow
99  * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
100  *
101  * Notes
102  * =====
103  *
104  * This function has been manually translated from the Fortran 77 LAPACK
105  * auxiliary function dlamch.f  (version 2.0 -- 1992), that  was  itself
106  * based on the function ENVRON  by Malcolm and incorporated suggestions
107  * by Gentleman and Marovich. See
108  *
109  * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
110  * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
111  *
112  * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
113  * properties of  floating point arithmetic units.,  Comms. of  the ACM,
114  * 17, 276-277 (1974).
115  *
116  * Arguments
117  * =========
118  *
119  * CMACH   (local input)                 const HPL_T_MACH
120  *         Specifies the value to be returned by HPL_dlamch
121  *            = HPL_MACH_EPS,   HPL_dlamch := eps (default)
122  *            = HPL_MACH_SFMIN, HPL_dlamch := sfmin
123  *            = HPL_MACH_BASE,  HPL_dlamch := base
124  *            = HPL_MACH_PREC,  HPL_dlamch := eps*base
125  *            = HPL_MACH_MLEN,  HPL_dlamch := t
126  *            = HPL_MACH_RND,   HPL_dlamch := rnd
127  *            = HPL_MACH_EMIN,  HPL_dlamch := emin
128  *            = HPL_MACH_RMIN,  HPL_dlamch := rmin
129  *            = HPL_MACH_EMAX,  HPL_dlamch := emax
130  *            = HPL_MACH_RMAX,  HPL_dlamch := rmax
131  *
132  *         where
133  *
134  *            eps   = relative machine precision,
135  *            sfmin = safe minimum,
136  *            base  = base of the machine,
137  *            prec  = eps*base,
138  *            t     = number of digits in the mantissa,
139  *            rnd   = 1.0 if rounding occurs in addition,
140  *            emin  = minimum exponent before underflow,
141  *            rmin  = underflow threshold,
142  *            emax  = largest exponent before overflow,
143  *            rmax  = overflow threshold.
144  *
145  * ---------------------------------------------------------------------
146  */
147 /*
148  * .. Local Variables ..
149  */
150    static double              eps, sfmin, base, t, rnd, emin, rmin, emax,
151                               rmax, prec;
152    double                     small;
153    static int                 first=1;
154    int                        beta=0, imax=0, imin=0, it=0, lrnd=0;
155 /* ..
156  * .. Executable Statements ..
157  */
158    if( first != 0 )
159    {
160       first = 0;
161       HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
162       base  = (double)(beta);  t     = (double)(it);
163       if( lrnd != 0 )
164       { rnd = HPL_rone;  eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
165       else
166       { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it );            }
167       prec  = eps * base;  emin  = (double)(imin); emax  = (double)(imax);
168       sfmin = rmin;        small = HPL_rone / rmax;
169 /*
170  * Use  SMALL  plus a bit,  to avoid the possibility of rounding causing
171  * overflow when computing  1/sfmin.
172  */
173       if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
174    }
175 
176    if( CMACH == HPL_MACH_EPS   ) return( eps   );
177    if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
178    if( CMACH == HPL_MACH_BASE  ) return( base  );
179    if( CMACH == HPL_MACH_PREC  ) return( prec  );
180    if( CMACH == HPL_MACH_MLEN  ) return( t     );
181    if( CMACH == HPL_MACH_RND   ) return( rnd   );
182    if( CMACH == HPL_MACH_EMIN  ) return( emin  );
183    if( CMACH == HPL_MACH_RMIN  ) return( rmin  );
184    if( CMACH == HPL_MACH_EMAX  ) return( emax  );
185    if( CMACH == HPL_MACH_RMAX  ) return( rmax  );
186 
187    return( eps );
188 /*
189  * End of HPL_dlamch
190  */
191 }
192 
193 #ifdef STDC_HEADERS
HPL_dlamc1(int * BETA,int * T,int * RND,int * IEEE1)194 static void HPL_dlamc1
195 (
196    int                        * BETA,
197    int                        * T,
198    int                        * RND,
199    int                        * IEEE1
200 )
201 #else
202 static void HPL_dlamc1
203 ( BETA, T, RND, IEEE1 )
204 /*
205  * .. Scalar Arguments ..
206  */
207    int                        * BETA, * IEEE1, * RND, * T;
208 #endif
209 {
210 /*
211  * Purpose
212  * =======
213  *
214  * HPL_dlamc1  determines  the machine parameters given by BETA, T, RND,
215  * and IEEE1.
216  *
217  * Notes
218  * =====
219  *
220  * This function has been manually translated from the Fortran 77 LAPACK
221  * auxiliary function dlamc1.f  (version 2.0 -- 1992), that  was  itself
222  * based on the function ENVRON  by Malcolm and incorporated suggestions
223  * by Gentleman and Marovich. See
224  *
225  * Malcolm M. A.,  Algorithms  to  reveal  properties  of floating-point
226  * arithmetic.,  Comms. of the ACM, 15, 949-951 (1972).
227  *
228  * Gentleman W. M. and Marovich S. B.,  More  on algorithms  that reveal
229  * properties of  floating point arithmetic units.,  Comms. of  the ACM,
230  * 17, 276-277 (1974).
231  *
232  * Arguments
233  * =========
234  *
235  * BETA    (local output)              int *
236  *         The base of the machine.
237  *
238  * T       (local output)              int *
239  *         The number of ( BETA ) digits in the mantissa.
240  *
241  * RND     (local output)              int *
242  *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
243  *         occurs in addition.  This may not be a  reliable guide to the
244  *         way in which the machine performs its arithmetic.
245  *
246  * IEEE1   (local output)              int *
247  *         Specifies  whether  rounding  appears  to be done in the IEEE
248  *         `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
249  *
250  * ---------------------------------------------------------------------
251  */
252 /*
253  * .. Local Variables ..
254  */
255    double                     a, b, c, f, one, qtr, savec, t1, t2;
256    static int                 first=1, lbeta, lieee1, lrnd, lt;
257 /* ..
258  * .. Executable Statements ..
259  */
260    if( first != 0 )
261    {
262       first = 0; one = HPL_rone;
263 /*
264  * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
265  * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
266  * that relevant values are stored and not held in registers, or are not
267  * affected by optimizers.
268  *
269  * Compute  a = 2.0**m  with the  smallest  positive integer m such that
270  * fl( a + 1.0 ) == a.
271  */
272       a = HPL_rone; c = HPL_rone;
273       do
274       { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
275       while( c == HPL_rone );
276 /*
277  * Now compute b = 2.0**m with the smallest positive integer m such that
278  * fl( a + b ) > a.
279  */
280       b = HPL_rone; c = HPL_dlamc3( a, b );
281       while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
282 /*
283  * Now compute the base.  a and c  are  neighbouring floating point num-
284  * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
285  * rence is BETA.  Adding 0.25 to c is to ensure that it is truncated to
286  * BETA and not (BETA-1).
287  */
288       qtr = one / 4.0; savec = c;
289       c   = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
290 /*
291  * Now  determine  whether  rounding or chopping occurs, by adding a bit
292  * less than BETA/2 and a bit more than BETA/2 to a.
293  */
294       b = (double)(lbeta);
295       f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
296       if( c == a ) { lrnd = 1; } else { lrnd = 0; }
297       f = HPL_dlamc3( b / HPL_rtwo,  b / 100.0 ); c = HPL_dlamc3( f, a );
298       if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
299 /*
300  * Try  and decide whether rounding is done in the  IEEE  round to nea-
301  * rest style.  b/2 is half a unit in the last place of the two numbers
302  * a  and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
303  * vec is odd.  Thus adding b/2 to a should not change a, but adding b/2
304  * to savec should change savec.
305  */
306       t1 = HPL_dlamc3( b / HPL_rtwo, a );
307       t2 = HPL_dlamc3( b / HPL_rtwo, savec );
308       if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
309       else                                                  lieee1 = 0;
310 /*
311  * Now find the mantissa, T. It should be the integer part of log to the
312  * base BETA of a, however it is safer to determine T by powering. So we
313  * find T as the smallest positive integer for which fl( beta**t + 1.0 )
314  * is equal to 1.0.
315  */
316       lt = 0; a = HPL_rone; c = HPL_rone;
317 
318       do
319       {
320          lt++; a *= (double)(lbeta);
321          c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c,  -a );
322       } while( c == HPL_rone );
323    }
324 
325    *BETA  = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
326 }
327 
328 #ifdef STDC_HEADERS
HPL_dlamc2(int * BETA,int * T,int * RND,double * EPS,int * EMIN,double * RMIN,int * EMAX,double * RMAX)329 static void HPL_dlamc2
330 (
331    int                        * BETA,
332    int                        * T,
333    int                        * RND,
334    double                     * EPS,
335    int                        * EMIN,
336    double                     * RMIN,
337    int                        * EMAX,
338    double                     * RMAX
339 )
340 #else
341 static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
342 /*
343  * .. Scalar Arguments ..
344  */
345    int                        * BETA, * EMAX, * EMIN, * RND, * T;
346    double                     * EPS, * RMAX, * RMIN;
347 #endif
348 {
349 /*
350  * Purpose
351  * =======
352  *
353  * HPL_dlamc2  determines the machine  parameters specified in its argu-
354  * ment list.
355  *
356  * Notes
357  * =====
358  *
359  * This function has been manually translated from the Fortran 77 LAPACK
360  * auxiliary function  dlamc2.f (version 2.0 -- 1992), that  was  itself
361  * based on a function PARANOIA  by  W. Kahan of the University of Cali-
362  * fornia at Berkeley for the computation of the  relative machine epsi-
363  * lon eps.
364  *
365  * Arguments
366  * =========
367  *
368  * BETA    (local output)              int *
369  *         The base of the machine.
370  *
371  * T       (local output)              int *
372  *         The number of ( BETA ) digits in the mantissa.
373  *
374  * RND     (local output)              int *
375  *         Specifies whether proper rounding (RND=1) or chopping (RND=0)
376  *         occurs in addition. This may not be a reliable  guide to  the
377  *         way in which the machine performs its arithmetic.
378  *
379  * EPS     (local output)              double *
380  *         The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
381  *         where fl denotes the computed value.
382  *
383  * EMIN    (local output)              int *
384  *         The minimum exponent before (gradual) underflow occurs.
385  *
386  * RMIN    (local output)              double *
387  *         The smallest  normalized  number  for  the  machine, given by
388  *         BASE**( EMIN - 1 ), where  BASE  is the floating  point value
389  *         of BETA.
390  *
391  * EMAX    (local output)              int *
392  *         The maximum exponent before overflow occurs.
393  *
394  * RMAX    (local output)              double *
395  *         The  largest  positive  number  for  the  machine,  given  by
396  *         BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating  point
397  *         value of BETA.
398  *
399  * ---------------------------------------------------------------------
400  */
401 /*
402  * .. Local Variables ..
403  */
404    static double              leps, lrmax, lrmin;
405    double                     a, b, c, half, one, rbase, sixth, small,
406                               third, two, zero;
407    static int                 first=1, iwarn=0, lbeta=0, lemax, lemin,
408                               lt=0;
409    int                        gnmin=0, gpmin=0, i, ieee, lieee1=0,
410                               lrnd=0, ngnmin=0, ngpmin=0;
411 /* ..
412  * .. Executable Statements ..
413  */
414    if( first != 0 )
415    {
416       first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
417 /*
418  * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of  BETA,
419  * T, RND, EPS, EMIN and RMIN.
420  *
421  * Throughout this routine we use the function HPL_dlamc3 to ensure that
422  * relevant values are stored and not held in registers,  or are not af-
423  * fected by optimizers.
424  *
425  * HPL_dlamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
426  */
427       HPL_dlamc1( &lbeta, &lt, &lrnd, &lieee1 );
428 /*
429  * Start to find eps.
430  */
431       b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
432 /*
433  * Try some tricks to see whether or not this is the correct  EPS.
434  */
435       b     = two / 3.0;
436       half  = one / HPL_rtwo;
437       sixth = HPL_dlamc3( b, -half );
438       third = HPL_dlamc3( sixth, sixth );
439       b     = HPL_dlamc3( third, -half );
440       b     = HPL_dlamc3( b, sixth );
441       b     = Mabs( b ); if( b < leps ) b = leps;
442 
443       leps = HPL_rone;
444 
445       while( ( leps > b ) && ( b > zero ) )
446       {
447          leps = b;
448          c = HPL_dlamc3( half * leps,
449                          HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
450          c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
451          c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
452       }
453       if( a < leps ) leps = a;
454 /*
455  * Computation of EPS complete.
456  *
457  * Now find  EMIN.  Let a = + or - 1, and + or - (1 + BASE**(-3)).  Keep
458  * dividing a by BETA until (gradual) underflow occurs. This is detected
459  * when we cannot recover the previous a.
460  */
461       rbase = one / (double)(lbeta); small = one;
462       for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
463       a = HPL_dlamc3( one, small );
464       HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
465       HPL_dlamc4( &gpmin,    a, lbeta ); HPL_dlamc4( &gnmin,    -a, lbeta );
466 
467       ieee = 0;
468 
469       if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
470       {
471          if( ngpmin == gpmin )
472          {
473 /*
474  * Non twos-complement machines, no gradual underflow; e.g.,  VAX )
475  */
476             lemin = ngpmin;
477          }
478          else if( ( gpmin-ngpmin ) == 3 )
479          {
480 /*
481  * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
482  * dard followers
483  */
484             lemin = ngpmin - 1 + lt; ieee = 1;
485          }
486          else
487          {
488 /*
489  * A guess; no known machine
490  */
491             lemin = Mmin( ngpmin, gpmin );
492             iwarn = 1;
493          }
494       }
495       else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
496       {
497          if( Mabs( ngpmin-ngnmin ) == 1 )
498          {
499 /*
500  * Twos-complement machines, no gradual underflow; e.g., CYBER 205
501  */
502             lemin = Mmax( ngpmin, ngnmin );
503          }
504          else
505          {
506 /*
507  * A guess; no known machine
508  */
509             lemin = Mmin( ngpmin, ngnmin );
510             iwarn = 1;
511          }
512       }
513       else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
514       {
515          if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
516          {
517 /*
518  * Twos-complement machines with gradual underflow; no known machine
519  */
520             lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
521          }
522          else
523          {
524 /*
525  * A guess; no known machine
526  */
527             lemin = Mmin( ngpmin, ngnmin );
528             iwarn = 1;
529          }
530       }
531       else
532       {
533 /*
534  * A guess; no known machine
535  */
536          lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
537          lemin = Mmin( lemin, gnmin ); iwarn = 1;
538       }
539 /*
540  * Comment out this if block if EMIN is ok
541  */
542       if( iwarn != 0 )
543       {
544          first = 1;
545          HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
546 "WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
547 "If, after inspection, the value EMIN looks acceptable, please comment ",
548 "out the  if  block  as marked within the code of routine  HPL_dlamc2, ",
549 "otherwise supply EMIN explicitly." );
550       }
551 /*
552  * Assume IEEE arithmetic if we found denormalised  numbers above, or if
553  * arithmetic seems to round in the  IEEE style,  determined  in routine
554  * HPL_dlamc1.  A true  IEEE  machine should have both things true; how-
555  * ever, faulty machines may have one or the other.
556  */
557       if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
558       else                                   ieee = 0;
559 /*
560  * Compute  RMIN by successive division by  BETA. We could compute  RMIN
561  * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
562  * tation.
563  */
564       lrmin = HPL_rone;
565       for( i = 0; i < 1 - lemin; i++ )
566          lrmin = HPL_dlamc3( lrmin*rbase, zero );
567 /*
568  * Finally, call HPL_dlamc5 to compute emax and rmax.
569  */
570       HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
571    }
572    *BETA = lbeta; *T    = lt;    *RND  = lrnd;  *EPS  = leps;
573    *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
574 }
575 
576 #ifdef STDC_HEADERS
HPL_dlamc3(const double A,const double B)577 static double HPL_dlamc3( const double A, const double B )
578 #else
579 static double HPL_dlamc3( A, B )
580 /*
581  * .. Scalar Arguments ..
582  */
583    const double               A, B;
584 #endif
585 {
586 /*
587  * Purpose
588  * =======
589  *
590  * HPL_dlamc3  is intended to force a and b  to be stored prior to doing
591  * the addition of  a  and  b,  for  use  in situations where optimizers
592  * might hold one of these in a register.
593  *
594  * Notes
595  * =====
596  *
597  * This function has been manually translated from the Fortran 77 LAPACK
598  * auxiliary function dlamc3.f (version 2.0 -- 1992).
599  *
600  * Arguments
601  * =========
602  *
603  * A, B    (local input)               double
604  *         The values a and b.
605  *
606  * ---------------------------------------------------------------------
607  */
608 /* ..
609  * .. Executable Statements ..
610  */
611    return( A + B );
612 }
613 
614 #ifdef STDC_HEADERS
HPL_dlamc4(int * EMIN,const double START,const int BASE)615 static void HPL_dlamc4
616 (
617    int                        * EMIN,
618    const double               START,
619    const int                  BASE
620 )
621 #else
622 static void HPL_dlamc4( EMIN, START, BASE )
623 /*
624  * .. Scalar Arguments ..
625  */
626    int                        * EMIN;
627    const int                  BASE;
628    const double               START;
629 #endif
630 {
631 /*
632  * Purpose
633  * =======
634  *
635  * HPL_dlamc4 is a service function for HPL_dlamc2.
636  *
637  * Notes
638  * =====
639  *
640  * This function has been manually translated from the Fortran 77 LAPACK
641  * auxiliary function dlamc4.f (version 2.0 -- 1992).
642  *
643  * Arguments
644  * =========
645  *
646  * EMIN    (local output)              int *
647  *         The minimum exponent before  (gradual) underflow, computed by
648  *         setting A = START and dividing  by  BASE until the previous A
649  *         can not be recovered.
650  *
651  * START   (local input)               double
652  *         The starting point for determining EMIN.
653  *
654  * BASE    (local input)               int
655  *         The base of the machine.
656  *
657  * ---------------------------------------------------------------------
658  */
659 /*
660  * .. Local Variables ..
661  */
662    double                     a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
663    int                        i;
664 /* ..
665  * .. Executable Statements ..
666  */
667    a     = START; one = HPL_rone; rbase = one / (double)(BASE);
668    zero  = HPL_rzero;
669    *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
670 
671    do
672    {
673       (*EMIN)--; a = b1;
674       b1 = HPL_dlamc3( a /  BASE,  zero );
675       c1 = HPL_dlamc3( b1 *  BASE, zero );
676       d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
677       b2 = HPL_dlamc3( a * rbase,  zero );
678       c2 = HPL_dlamc3( b2 / rbase, zero );
679       d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
680    } while( ( c1 == a ) && ( c2 == a ) &&  ( d1 == a ) && ( d2 == a ) );
681 }
682 
683 #ifdef STDC_HEADERS
HPL_dlamc5(const int BETA,const int P,const int EMIN,const int IEEE,int * EMAX,double * RMAX)684 static void HPL_dlamc5
685 (
686    const int                  BETA,
687    const int                  P,
688    const int                  EMIN,
689    const int                  IEEE,
690    int                        * EMAX,
691    double                     * RMAX
692 )
693 #else
694 static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
695 /*
696  * .. Scalar Arguments ..
697  */
698    const int                  BETA, EMIN, IEEE, P;
699    int                        * EMAX;
700    double                     * RMAX;
701 #endif
702 {
703 /*
704  * Purpose
705  * =======
706  *
707  * HPL_dlamc5  attempts  to compute RMAX, the largest machine  floating-
708  * point number, without overflow.  It assumes that EMAX + abs(EMIN) sum
709  * approximately to a power of 2.  It will fail  on machines where  this
710  * assumption does not hold, for example, the  Cyber 205 (EMIN = -28625,
711  * EMAX = 28718).  It will also fail if  the value supplied for  EMIN is
712  * too large (i.e. too close to zero), probably with overflow.
713  *
714  * Notes
715  * =====
716  *
717  * This function has been manually translated from the Fortran 77 LAPACK
718  * auxiliary function dlamc5.f (version 2.0 -- 1992).
719  *
720  * Arguments
721  * =========
722  *
723  * BETA    (local input)               int
724  *         The base of floating-point arithmetic.
725  *
726  * P       (local input)               int
727  *         The number of base BETA digits in the mantissa of a floating-
728  *         point value.
729  *
730  * EMIN    (local input)               int
731  *         The minimum exponent before (gradual) underflow.
732  *
733  * IEEE    (local input)               int
734  *         A logical flag specifying whether or not  the arithmetic sys-
735  *         tem is thought to comply with the IEEE standard.
736  *
737  * EMAX    (local output)              int *
738  *         The largest exponent before overflow.
739  *
740  * RMAX    (local output)              double *
741  *         The largest machine floating-point number.
742  *
743  * ---------------------------------------------------------------------
744  */
745 /*
746  * .. Local Variables ..
747  */
748    double                     oldy=HPL_rzero, recbas, y, z;
749    int                        exbits=1, expsum, i, lexp=1, nbits, try,
750                               uexp;
751 /* ..
752  * .. Executable Statements ..
753  */
754 /*
755  * First compute  lexp  and  uexp, two powers of 2 that bound abs(EMIN).
756  * We then assume that  EMAX + abs( EMIN ) will sum approximately to the
757  * bound that  is closest to abs( EMIN ). (EMAX  is the  exponent of the
758  * required number RMAX).
759  */
760 l_10:
761    try = (int)( (unsigned int)(lexp) << 1 );
762    if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
763 
764    if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
765 /*
766  * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
767  * equal to EMIN. exbits is the number of bits needed to store the expo-
768  * nent.
769  */
770    if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
771    { expsum = (int)( (unsigned int)(lexp) << 1 ); }
772    else
773    { expsum = (int)( (unsigned int)(uexp) << 1 ); }
774 /*
775  * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
776  */
777    *EMAX = expsum + EMIN - 1;
778 /*
779  * nbits  is  the total number of bits needed to store a  floating-point
780  * number.
781  */
782    nbits = 1 + exbits + P;
783 
784    if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
785    {
786 /*
787  * Either there are an odd number of bits used to store a floating-point
788  * number, which is unlikely, or some bits are not used in the represen-
789  * tation of numbers,  which is possible,  (e.g. Cray machines)  or  the
790  * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
791  * which is perhaps the most likely. We have to assume the last alterna-
792  * tive.  If this is true,  then we need to reduce  EMAX  by one because
793  * there must be some way of representing zero  in an  implicit-bit sys-
794  * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
795  */
796       (*EMAX)--;
797    }
798 
799    if( IEEE != 0 )
800    {
801 /*
802  * Assume we are on an IEEE  machine which reserves one exponent for in-
803  * finity and NaN.
804  */
805       (*EMAX)--;
806    }
807 /*
808  * Now create RMAX, the largest machine number, which should be equal to
809  * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
810  * careful that the result is less than 1.0.
811  */
812    recbas = HPL_rone / (double)(BETA);
813    z      = (double)(BETA) - HPL_rone;
814    y      = HPL_rzero;
815 
816    for( i = 0; i < P; i++ )
817    { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
818 
819    if( y >= HPL_rone ) y = oldy;
820 /*
821  * Now multiply by BETA**EMAX to get RMAX.
822  */
823    for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
824 
825    *RMAX = y;
826 /*
827  * End of HPL_dlamch
828  */
829 }
830 
831 #ifdef STDC_HEADERS
HPL_dipow(const double X,const int N)832 static double HPL_dipow
833 (
834    const double               X,
835    const int                  N
836 )
837 #else
838 static double HPL_dipow( X, N )
839 /*
840  * .. Scalar Arguments ..
841  */
842    const int                  N;
843    const double               X;
844 #endif
845 {
846 /*
847  * Purpose
848  * =======
849  *
850  * HPL_dipow computes the integer n-th power of a real scalar x.
851  *
852  * Arguments
853  * =========
854  *
855  * X       (local input)               const double
856  *         The real scalar x.
857  *
858  * N       (local input)               const int
859  *         The integer power to raise x to.
860  *
861  * ---------------------------------------------------------------------
862  */
863 /*
864  * .. Local Variables ..
865  */
866    double                     r, y=HPL_rone;
867    int                        k, n;
868 /* ..
869  * .. Executable Statements ..
870  */
871    if( X == HPL_rzero ) return( HPL_rzero );
872    if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
873    for( k = 0; k < n; k++ ) y *= r;
874 
875    return( y );
876 }
877