1 /*
2 * -- High Performance Computing Linpack Benchmark (HPL)
3 * HPL - 2.3 - December 2, 2018
4 * Antoine P. Petitet
5 * University of Tennessee, Knoxville
6 * Innovative Computing Laboratory
7 * (C) Copyright 2000-2008 All Rights Reserved
8 *
9 * -- Copyright notice and Licensing terms:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 *
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions, and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * 3. All advertising materials mentioning features or use of this
23 * software must display the following acknowledgement:
24 * This product includes software developed at the University of
25 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 *
27 * 4. The name of the University, the name of the Laboratory, or the
28 * names of its contributors may not be used to endorse or promote
29 * products derived from this software without specific written
30 * permission.
31 *
32 * -- Disclaimer:
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * ---------------------------------------------------------------------
46 */
47 /*
48 * Include files
49 */
50 #include "hpl.h"
51 /*
52 * ---------------------------------------------------------------------
53 * Static function prototypes
54 * ---------------------------------------------------------------------
55 */
56 static void HPL_dlamc1
57 STDC_ARGS(
58 ( int *, int *, int *, int * ) );
59 static void HPL_dlamc2
60 STDC_ARGS(
61 ( int *, int *, int *, double *,
62 int *, double *, int *, double * ) );
63 static double HPL_dlamc3
64 STDC_ARGS(
65 ( const double, const double ) );
66 static void HPL_dlamc4
67 STDC_ARGS(
68 ( int *, const double, const int ) );
69 static void HPL_dlamc5
70 STDC_ARGS(
71 ( const int, const int, const int, const int,
72 int *, double * ) );
73 static double HPL_dipow
74 STDC_ARGS(
75 ( const double, const int ) );
76
77 #ifdef STDC_HEADERS
HPL_dlamch(const HPL_T_MACH CMACH)78 double HPL_dlamch
79 (
80 const HPL_T_MACH CMACH
81 )
82 #else
83 double HPL_dlamch
84 ( CMACH )
85 const HPL_T_MACH CMACH;
86 #endif
87 {
88 /*
89 * Purpose
90 * =======
91 *
92 * HPL_dlamch determines machine-specific arithmetic constants such as
93 * the relative machine precision (eps), the safe minimum (sfmin) such
94 * that 1 / sfmin does not overflow, the base of the machine (base), the
95 * precision (prec), the number of (base) digits in the mantissa (t),
96 * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the
97 * minimum exponent before (gradual) underflow (emin), the underflow
98 * threshold (rmin) base**(emin-1), the largest exponent before overflow
99 * (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
100 *
101 * Notes
102 * =====
103 *
104 * This function has been manually translated from the Fortran 77 LAPACK
105 * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself
106 * based on the function ENVRON by Malcolm and incorporated suggestions
107 * by Gentleman and Marovich. See
108 *
109 * Malcolm M. A., Algorithms to reveal properties of floating-point
110 * arithmetic., Comms. of the ACM, 15, 949-951 (1972).
111 *
112 * Gentleman W. M. and Marovich S. B., More on algorithms that reveal
113 * properties of floating point arithmetic units., Comms. of the ACM,
114 * 17, 276-277 (1974).
115 *
116 * Arguments
117 * =========
118 *
119 * CMACH (local input) const HPL_T_MACH
120 * Specifies the value to be returned by HPL_dlamch
121 * = HPL_MACH_EPS, HPL_dlamch := eps (default)
122 * = HPL_MACH_SFMIN, HPL_dlamch := sfmin
123 * = HPL_MACH_BASE, HPL_dlamch := base
124 * = HPL_MACH_PREC, HPL_dlamch := eps*base
125 * = HPL_MACH_MLEN, HPL_dlamch := t
126 * = HPL_MACH_RND, HPL_dlamch := rnd
127 * = HPL_MACH_EMIN, HPL_dlamch := emin
128 * = HPL_MACH_RMIN, HPL_dlamch := rmin
129 * = HPL_MACH_EMAX, HPL_dlamch := emax
130 * = HPL_MACH_RMAX, HPL_dlamch := rmax
131 *
132 * where
133 *
134 * eps = relative machine precision,
135 * sfmin = safe minimum,
136 * base = base of the machine,
137 * prec = eps*base,
138 * t = number of digits in the mantissa,
139 * rnd = 1.0 if rounding occurs in addition,
140 * emin = minimum exponent before underflow,
141 * rmin = underflow threshold,
142 * emax = largest exponent before overflow,
143 * rmax = overflow threshold.
144 *
145 * ---------------------------------------------------------------------
146 */
147 /*
148 * .. Local Variables ..
149 */
150 static double eps, sfmin, base, t, rnd, emin, rmin, emax,
151 rmax, prec;
152 double small;
153 static int first=1;
154 int beta=0, imax=0, imin=0, it=0, lrnd=0;
155 /* ..
156 * .. Executable Statements ..
157 */
158 if( first != 0 )
159 {
160 first = 0;
161 HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax );
162 base = (double)(beta); t = (double)(it);
163 if( lrnd != 0 )
164 { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
165 else
166 { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); }
167 prec = eps * base; emin = (double)(imin); emax = (double)(imax);
168 sfmin = rmin; small = HPL_rone / rmax;
169 /*
170 * Use SMALL plus a bit, to avoid the possibility of rounding causing
171 * overflow when computing 1/sfmin.
172 */
173 if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
174 }
175
176 if( CMACH == HPL_MACH_EPS ) return( eps );
177 if( CMACH == HPL_MACH_SFMIN ) return( sfmin );
178 if( CMACH == HPL_MACH_BASE ) return( base );
179 if( CMACH == HPL_MACH_PREC ) return( prec );
180 if( CMACH == HPL_MACH_MLEN ) return( t );
181 if( CMACH == HPL_MACH_RND ) return( rnd );
182 if( CMACH == HPL_MACH_EMIN ) return( emin );
183 if( CMACH == HPL_MACH_RMIN ) return( rmin );
184 if( CMACH == HPL_MACH_EMAX ) return( emax );
185 if( CMACH == HPL_MACH_RMAX ) return( rmax );
186
187 return( eps );
188 /*
189 * End of HPL_dlamch
190 */
191 }
192
193 #ifdef STDC_HEADERS
HPL_dlamc1(int * BETA,int * T,int * RND,int * IEEE1)194 static void HPL_dlamc1
195 (
196 int * BETA,
197 int * T,
198 int * RND,
199 int * IEEE1
200 )
201 #else
202 static void HPL_dlamc1
203 ( BETA, T, RND, IEEE1 )
204 /*
205 * .. Scalar Arguments ..
206 */
207 int * BETA, * IEEE1, * RND, * T;
208 #endif
209 {
210 /*
211 * Purpose
212 * =======
213 *
214 * HPL_dlamc1 determines the machine parameters given by BETA, T, RND,
215 * and IEEE1.
216 *
217 * Notes
218 * =====
219 *
220 * This function has been manually translated from the Fortran 77 LAPACK
221 * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself
222 * based on the function ENVRON by Malcolm and incorporated suggestions
223 * by Gentleman and Marovich. See
224 *
225 * Malcolm M. A., Algorithms to reveal properties of floating-point
226 * arithmetic., Comms. of the ACM, 15, 949-951 (1972).
227 *
228 * Gentleman W. M. and Marovich S. B., More on algorithms that reveal
229 * properties of floating point arithmetic units., Comms. of the ACM,
230 * 17, 276-277 (1974).
231 *
232 * Arguments
233 * =========
234 *
235 * BETA (local output) int *
236 * The base of the machine.
237 *
238 * T (local output) int *
239 * The number of ( BETA ) digits in the mantissa.
240 *
241 * RND (local output) int *
242 * Specifies whether proper rounding (RND=1) or chopping (RND=0)
243 * occurs in addition. This may not be a reliable guide to the
244 * way in which the machine performs its arithmetic.
245 *
246 * IEEE1 (local output) int *
247 * Specifies whether rounding appears to be done in the IEEE
248 * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
249 *
250 * ---------------------------------------------------------------------
251 */
252 /*
253 * .. Local Variables ..
254 */
255 double a, b, c, f, one, qtr, savec, t1, t2;
256 static int first=1, lbeta, lieee1, lrnd, lt;
257 /* ..
258 * .. Executable Statements ..
259 */
260 if( first != 0 )
261 {
262 first = 0; one = HPL_rone;
263 /*
264 * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
265 * RND. Throughout this routine we use the function HPL_dlamc3 to ensure
266 * that relevant values are stored and not held in registers, or are not
267 * affected by optimizers.
268 *
269 * Compute a = 2.0**m with the smallest positive integer m such that
270 * fl( a + 1.0 ) == a.
271 */
272 a = HPL_rone; c = HPL_rone;
273 do
274 { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); }
275 while( c == HPL_rone );
276 /*
277 * Now compute b = 2.0**m with the smallest positive integer m such that
278 * fl( a + b ) > a.
279 */
280 b = HPL_rone; c = HPL_dlamc3( a, b );
281 while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
282 /*
283 * Now compute the base. a and c are neighbouring floating point num-
284 * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
285 * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to
286 * BETA and not (BETA-1).
287 */
288 qtr = one / 4.0; savec = c;
289 c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
290 /*
291 * Now determine whether rounding or chopping occurs, by adding a bit
292 * less than BETA/2 and a bit more than BETA/2 to a.
293 */
294 b = (double)(lbeta);
295 f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a );
296 if( c == a ) { lrnd = 1; } else { lrnd = 0; }
297 f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a );
298 if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0;
299 /*
300 * Try and decide whether rounding is done in the IEEE round to nea-
301 * rest style. b/2 is half a unit in the last place of the two numbers
302 * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
303 * vec is odd. Thus adding b/2 to a should not change a, but adding b/2
304 * to savec should change savec.
305 */
306 t1 = HPL_dlamc3( b / HPL_rtwo, a );
307 t2 = HPL_dlamc3( b / HPL_rtwo, savec );
308 if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1;
309 else lieee1 = 0;
310 /*
311 * Now find the mantissa, T. It should be the integer part of log to the
312 * base BETA of a, however it is safer to determine T by powering. So we
313 * find T as the smallest positive integer for which fl( beta**t + 1.0 )
314 * is equal to 1.0.
315 */
316 lt = 0; a = HPL_rone; c = HPL_rone;
317
318 do
319 {
320 lt++; a *= (double)(lbeta);
321 c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a );
322 } while( c == HPL_rone );
323 }
324
325 *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1;
326 }
327
328 #ifdef STDC_HEADERS
HPL_dlamc2(int * BETA,int * T,int * RND,double * EPS,int * EMIN,double * RMIN,int * EMAX,double * RMAX)329 static void HPL_dlamc2
330 (
331 int * BETA,
332 int * T,
333 int * RND,
334 double * EPS,
335 int * EMIN,
336 double * RMIN,
337 int * EMAX,
338 double * RMAX
339 )
340 #else
341 static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
342 /*
343 * .. Scalar Arguments ..
344 */
345 int * BETA, * EMAX, * EMIN, * RND, * T;
346 double * EPS, * RMAX, * RMIN;
347 #endif
348 {
349 /*
350 * Purpose
351 * =======
352 *
353 * HPL_dlamc2 determines the machine parameters specified in its argu-
354 * ment list.
355 *
356 * Notes
357 * =====
358 *
359 * This function has been manually translated from the Fortran 77 LAPACK
360 * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself
361 * based on a function PARANOIA by W. Kahan of the University of Cali-
362 * fornia at Berkeley for the computation of the relative machine epsi-
363 * lon eps.
364 *
365 * Arguments
366 * =========
367 *
368 * BETA (local output) int *
369 * The base of the machine.
370 *
371 * T (local output) int *
372 * The number of ( BETA ) digits in the mantissa.
373 *
374 * RND (local output) int *
375 * Specifies whether proper rounding (RND=1) or chopping (RND=0)
376 * occurs in addition. This may not be a reliable guide to the
377 * way in which the machine performs its arithmetic.
378 *
379 * EPS (local output) double *
380 * The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
381 * where fl denotes the computed value.
382 *
383 * EMIN (local output) int *
384 * The minimum exponent before (gradual) underflow occurs.
385 *
386 * RMIN (local output) double *
387 * The smallest normalized number for the machine, given by
388 * BASE**( EMIN - 1 ), where BASE is the floating point value
389 * of BETA.
390 *
391 * EMAX (local output) int *
392 * The maximum exponent before overflow occurs.
393 *
394 * RMAX (local output) double *
395 * The largest positive number for the machine, given by
396 * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point
397 * value of BETA.
398 *
399 * ---------------------------------------------------------------------
400 */
401 /*
402 * .. Local Variables ..
403 */
404 static double leps, lrmax, lrmin;
405 double a, b, c, half, one, rbase, sixth, small,
406 third, two, zero;
407 static int first=1, iwarn=0, lbeta=0, lemax, lemin,
408 lt=0;
409 int gnmin=0, gpmin=0, i, ieee, lieee1=0,
410 lrnd=0, ngnmin=0, ngpmin=0;
411 /* ..
412 * .. Executable Statements ..
413 */
414 if( first != 0 )
415 {
416 first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
417 /*
418 * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA,
419 * T, RND, EPS, EMIN and RMIN.
420 *
421 * Throughout this routine we use the function HPL_dlamc3 to ensure that
422 * relevant values are stored and not held in registers, or are not af-
423 * fected by optimizers.
424 *
425 * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1.
426 */
427 HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 );
428 /*
429 * Start to find eps.
430 */
431 b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
432 /*
433 * Try some tricks to see whether or not this is the correct EPS.
434 */
435 b = two / 3.0;
436 half = one / HPL_rtwo;
437 sixth = HPL_dlamc3( b, -half );
438 third = HPL_dlamc3( sixth, sixth );
439 b = HPL_dlamc3( third, -half );
440 b = HPL_dlamc3( b, sixth );
441 b = Mabs( b ); if( b < leps ) b = leps;
442
443 leps = HPL_rone;
444
445 while( ( leps > b ) && ( b > zero ) )
446 {
447 leps = b;
448 c = HPL_dlamc3( half * leps,
449 HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) );
450 c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c );
451 c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c );
452 }
453 if( a < leps ) leps = a;
454 /*
455 * Computation of EPS complete.
456 *
457 * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep
458 * dividing a by BETA until (gradual) underflow occurs. This is detected
459 * when we cannot recover the previous a.
460 */
461 rbase = one / (double)(lbeta); small = one;
462 for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero );
463 a = HPL_dlamc3( one, small );
464 HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta );
465 HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta );
466
467 ieee = 0;
468
469 if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
470 {
471 if( ngpmin == gpmin )
472 {
473 /*
474 * Non twos-complement machines, no gradual underflow; e.g., VAX )
475 */
476 lemin = ngpmin;
477 }
478 else if( ( gpmin-ngpmin ) == 3 )
479 {
480 /*
481 * Non twos-complement machines with gradual underflow; e.g., IEEE stan-
482 * dard followers
483 */
484 lemin = ngpmin - 1 + lt; ieee = 1;
485 }
486 else
487 {
488 /*
489 * A guess; no known machine
490 */
491 lemin = Mmin( ngpmin, gpmin );
492 iwarn = 1;
493 }
494 }
495 else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) )
496 {
497 if( Mabs( ngpmin-ngnmin ) == 1 )
498 {
499 /*
500 * Twos-complement machines, no gradual underflow; e.g., CYBER 205
501 */
502 lemin = Mmax( ngpmin, ngnmin );
503 }
504 else
505 {
506 /*
507 * A guess; no known machine
508 */
509 lemin = Mmin( ngpmin, ngnmin );
510 iwarn = 1;
511 }
512 }
513 else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) )
514 {
515 if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 )
516 {
517 /*
518 * Twos-complement machines with gradual underflow; no known machine
519 */
520 lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
521 }
522 else
523 {
524 /*
525 * A guess; no known machine
526 */
527 lemin = Mmin( ngpmin, ngnmin );
528 iwarn = 1;
529 }
530 }
531 else
532 {
533 /*
534 * A guess; no known machine
535 */
536 lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin );
537 lemin = Mmin( lemin, gnmin ); iwarn = 1;
538 }
539 /*
540 * Comment out this if block if EMIN is ok
541 */
542 if( iwarn != 0 )
543 {
544 first = 1;
545 HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
546 "WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
547 "If, after inspection, the value EMIN looks acceptable, please comment ",
548 "out the if block as marked within the code of routine HPL_dlamc2, ",
549 "otherwise supply EMIN explicitly." );
550 }
551 /*
552 * Assume IEEE arithmetic if we found denormalised numbers above, or if
553 * arithmetic seems to round in the IEEE style, determined in routine
554 * HPL_dlamc1. A true IEEE machine should have both things true; how-
555 * ever, faulty machines may have one or the other.
556 */
557 if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1;
558 else ieee = 0;
559 /*
560 * Compute RMIN by successive division by BETA. We could compute RMIN
561 * as BASE**( EMIN - 1 ), but some machines underflow during this compu-
562 * tation.
563 */
564 lrmin = HPL_rone;
565 for( i = 0; i < 1 - lemin; i++ )
566 lrmin = HPL_dlamc3( lrmin*rbase, zero );
567 /*
568 * Finally, call HPL_dlamc5 to compute emax and rmax.
569 */
570 HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax );
571 }
572 *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps;
573 *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax;
574 }
575
576 #ifdef STDC_HEADERS
HPL_dlamc3(const double A,const double B)577 static double HPL_dlamc3( const double A, const double B )
578 #else
579 static double HPL_dlamc3( A, B )
580 /*
581 * .. Scalar Arguments ..
582 */
583 const double A, B;
584 #endif
585 {
586 /*
587 * Purpose
588 * =======
589 *
590 * HPL_dlamc3 is intended to force a and b to be stored prior to doing
591 * the addition of a and b, for use in situations where optimizers
592 * might hold one of these in a register.
593 *
594 * Notes
595 * =====
596 *
597 * This function has been manually translated from the Fortran 77 LAPACK
598 * auxiliary function dlamc3.f (version 2.0 -- 1992).
599 *
600 * Arguments
601 * =========
602 *
603 * A, B (local input) double
604 * The values a and b.
605 *
606 * ---------------------------------------------------------------------
607 */
608 /* ..
609 * .. Executable Statements ..
610 */
611 return( A + B );
612 }
613
614 #ifdef STDC_HEADERS
HPL_dlamc4(int * EMIN,const double START,const int BASE)615 static void HPL_dlamc4
616 (
617 int * EMIN,
618 const double START,
619 const int BASE
620 )
621 #else
622 static void HPL_dlamc4( EMIN, START, BASE )
623 /*
624 * .. Scalar Arguments ..
625 */
626 int * EMIN;
627 const int BASE;
628 const double START;
629 #endif
630 {
631 /*
632 * Purpose
633 * =======
634 *
635 * HPL_dlamc4 is a service function for HPL_dlamc2.
636 *
637 * Notes
638 * =====
639 *
640 * This function has been manually translated from the Fortran 77 LAPACK
641 * auxiliary function dlamc4.f (version 2.0 -- 1992).
642 *
643 * Arguments
644 * =========
645 *
646 * EMIN (local output) int *
647 * The minimum exponent before (gradual) underflow, computed by
648 * setting A = START and dividing by BASE until the previous A
649 * can not be recovered.
650 *
651 * START (local input) double
652 * The starting point for determining EMIN.
653 *
654 * BASE (local input) int
655 * The base of the machine.
656 *
657 * ---------------------------------------------------------------------
658 */
659 /*
660 * .. Local Variables ..
661 */
662 double a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
663 int i;
664 /* ..
665 * .. Executable Statements ..
666 */
667 a = START; one = HPL_rone; rbase = one / (double)(BASE);
668 zero = HPL_rzero;
669 *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
670
671 do
672 {
673 (*EMIN)--; a = b1;
674 b1 = HPL_dlamc3( a / BASE, zero );
675 c1 = HPL_dlamc3( b1 * BASE, zero );
676 d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1;
677 b2 = HPL_dlamc3( a * rbase, zero );
678 c2 = HPL_dlamc3( b2 / rbase, zero );
679 d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2;
680 } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) );
681 }
682
683 #ifdef STDC_HEADERS
HPL_dlamc5(const int BETA,const int P,const int EMIN,const int IEEE,int * EMAX,double * RMAX)684 static void HPL_dlamc5
685 (
686 const int BETA,
687 const int P,
688 const int EMIN,
689 const int IEEE,
690 int * EMAX,
691 double * RMAX
692 )
693 #else
694 static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX )
695 /*
696 * .. Scalar Arguments ..
697 */
698 const int BETA, EMIN, IEEE, P;
699 int * EMAX;
700 double * RMAX;
701 #endif
702 {
703 /*
704 * Purpose
705 * =======
706 *
707 * HPL_dlamc5 attempts to compute RMAX, the largest machine floating-
708 * point number, without overflow. It assumes that EMAX + abs(EMIN) sum
709 * approximately to a power of 2. It will fail on machines where this
710 * assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
711 * EMAX = 28718). It will also fail if the value supplied for EMIN is
712 * too large (i.e. too close to zero), probably with overflow.
713 *
714 * Notes
715 * =====
716 *
717 * This function has been manually translated from the Fortran 77 LAPACK
718 * auxiliary function dlamc5.f (version 2.0 -- 1992).
719 *
720 * Arguments
721 * =========
722 *
723 * BETA (local input) int
724 * The base of floating-point arithmetic.
725 *
726 * P (local input) int
727 * The number of base BETA digits in the mantissa of a floating-
728 * point value.
729 *
730 * EMIN (local input) int
731 * The minimum exponent before (gradual) underflow.
732 *
733 * IEEE (local input) int
734 * A logical flag specifying whether or not the arithmetic sys-
735 * tem is thought to comply with the IEEE standard.
736 *
737 * EMAX (local output) int *
738 * The largest exponent before overflow.
739 *
740 * RMAX (local output) double *
741 * The largest machine floating-point number.
742 *
743 * ---------------------------------------------------------------------
744 */
745 /*
746 * .. Local Variables ..
747 */
748 double oldy=HPL_rzero, recbas, y, z;
749 int exbits=1, expsum, i, lexp=1, nbits, try,
750 uexp;
751 /* ..
752 * .. Executable Statements ..
753 */
754 /*
755 * First compute lexp and uexp, two powers of 2 that bound abs(EMIN).
756 * We then assume that EMAX + abs( EMIN ) will sum approximately to the
757 * bound that is closest to abs( EMIN ). (EMAX is the exponent of the
758 * required number RMAX).
759 */
760 l_10:
761 try = (int)( (unsigned int)(lexp) << 1 );
762 if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; }
763
764 if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; }
765 /*
766 * Now -lexp is less than or equal to EMIN, and -uexp is greater than or
767 * equal to EMIN. exbits is the number of bits needed to store the expo-
768 * nent.
769 */
770 if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
771 { expsum = (int)( (unsigned int)(lexp) << 1 ); }
772 else
773 { expsum = (int)( (unsigned int)(uexp) << 1 ); }
774 /*
775 * expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
776 */
777 *EMAX = expsum + EMIN - 1;
778 /*
779 * nbits is the total number of bits needed to store a floating-point
780 * number.
781 */
782 nbits = 1 + exbits + P;
783
784 if( ( nbits % 2 == 1 ) && ( BETA == 2 ) )
785 {
786 /*
787 * Either there are an odd number of bits used to store a floating-point
788 * number, which is unlikely, or some bits are not used in the represen-
789 * tation of numbers, which is possible, (e.g. Cray machines) or the
790 * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
791 * which is perhaps the most likely. We have to assume the last alterna-
792 * tive. If this is true, then we need to reduce EMAX by one because
793 * there must be some way of representing zero in an implicit-bit sys-
794 * tem. On machines like Cray we are reducing EMAX by one unnecessarily.
795 */
796 (*EMAX)--;
797 }
798
799 if( IEEE != 0 )
800 {
801 /*
802 * Assume we are on an IEEE machine which reserves one exponent for in-
803 * finity and NaN.
804 */
805 (*EMAX)--;
806 }
807 /*
808 * Now create RMAX, the largest machine number, which should be equal to
809 * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
810 * careful that the result is less than 1.0.
811 */
812 recbas = HPL_rone / (double)(BETA);
813 z = (double)(BETA) - HPL_rone;
814 y = HPL_rzero;
815
816 for( i = 0; i < P; i++ )
817 { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
818
819 if( y >= HPL_rone ) y = oldy;
820 /*
821 * Now multiply by BETA**EMAX to get RMAX.
822 */
823 for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero );
824
825 *RMAX = y;
826 /*
827 * End of HPL_dlamch
828 */
829 }
830
831 #ifdef STDC_HEADERS
HPL_dipow(const double X,const int N)832 static double HPL_dipow
833 (
834 const double X,
835 const int N
836 )
837 #else
838 static double HPL_dipow( X, N )
839 /*
840 * .. Scalar Arguments ..
841 */
842 const int N;
843 const double X;
844 #endif
845 {
846 /*
847 * Purpose
848 * =======
849 *
850 * HPL_dipow computes the integer n-th power of a real scalar x.
851 *
852 * Arguments
853 * =========
854 *
855 * X (local input) const double
856 * The real scalar x.
857 *
858 * N (local input) const int
859 * The integer power to raise x to.
860 *
861 * ---------------------------------------------------------------------
862 */
863 /*
864 * .. Local Variables ..
865 */
866 double r, y=HPL_rone;
867 int k, n;
868 /* ..
869 * .. Executable Statements ..
870 */
871 if( X == HPL_rzero ) return( HPL_rzero );
872 if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; }
873 for( k = 0; k < n; k++ ) y *= r;
874
875 return( y );
876 }
877