1 /* Time routines for speed measurements.
2 
3 Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 
32 /* Usage:
33 
34    The code in this file implements the lowest level of time measuring,
35    simple one-time measuring of time between two points.
36 
37    void speed_starttime (void)
38    double speed_endtime (void)
39        Call speed_starttime to start measuring, and then call speed_endtime
40        when done.
41 
42        speed_endtime returns the time taken, in seconds.  Or if the timebase
43        is in CPU cycles and the CPU frequency is unknown then speed_endtime
44        returns cycles.  Applications can identify the cycles return by
45        checking for speed_cycletime (described below) equal to 1.0.
46 
47        If some sort of temporary glitch occurs then speed_endtime returns
48        0.0.  Currently this is for various cases where a negative time has
49        occurred.  This unfortunately occurs with getrusage on some systems,
50        and with the hppa cycle counter on hpux.
51 
52    double speed_cycletime
53        The time in seconds for each CPU cycle.  For example on a 100 MHz CPU
54        this would be 1.0e-8.
55 
56        If the CPU frequency is unknown, then speed_cycletime is either 0.0
57        or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's
58        1.0 when speed_endtime is returning cycles.
59 
60        It may be noted that "speed_endtime() / speed_cycletime" gives a
61        measured time in cycles, irrespective of whether speed_endtime is
62        returning cycles or seconds.  (Assuming cycles can be had, ie. it's
63        either cycles already or the cpu frequency is known.  See also
64        speed_cycletime_need_cycles below.)
65 
66    double speed_unittime
67        The unit of time measurement accuracy for the timing method in use.
68        This is in seconds or cycles, as per speed_endtime.
69 
70    char speed_time_string[]
71        A null-terminated string describing the time method in use.
72 
73    void speed_time_init (void)
74        Initialize time measuring.  speed_starttime() does this
75        automatically, so it's only needed if an application wants to inspect
76        the above global variables before making a measurement.
77 
78    int speed_precision
79        The intended accuracy of time measurements.  speed_measure() in
80        common.c for instance runs target routines with enough repetitions so
81        it takes at least "speed_unittime * speed_precision" (this expression
82        works for both cycles or seconds from speed_endtime).
83 
84        A program can provide an option so the user to set speed_precision.
85        If speed_precision is zero when speed_time_init or speed_starttime
86        first run then it gets a default based on the measuring method
87        chosen.  (More precision for higher accuracy methods.)
88 
89    void speed_cycletime_need_seconds (void)
90        Call this to demand that speed_endtime will return seconds, and not
91        cycles.  If only cycles are available then an error is printed and
92        the program exits.
93 
94    void speed_cycletime_need_cycles (void)
95        Call this to demand that speed_cycletime is non-zero, so that
96        "speed_endtime() / speed_cycletime" will give times in cycles.
97 
98 
99 
100    Notes:
101 
102    Various combinations of cycle counter, read_real_time(), getrusage(),
103    gettimeofday() and times() can arise, according to which are available
104    and their precision.
105 
106 
107    Allowing speed_endtime() to return either seconds or cycles is only a
108    slight complication and makes it possible for the speed program to do
109    some sensible things without demanding the CPU frequency.  If seconds are
110    being measured then it can always print seconds, and if cycles are being
111    measured then it can always print them without needing to know how long
112    they are.  Also the tune program doesn't care at all what the units are.
113 
114    GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
115    fail.  This will be needed if times in seconds are wanted but a cycle
116    counter is being used, or if times in cycles are wanted but getrusage or
117    another seconds based timer is in use.
118 
119    If the measuring method uses a cycle counter but supplements it with
120    getrusage or the like, then knowing the CPU frequency is mandatory since
121    the code compares values from the two.
122 
123 
124    Not done:
125 
126    Solaris gethrtime() seems no more than a slow way to access the Sparc V9
127    cycle counter.  gethrvtime() seems to be relevant only to light weight
128    processes, it doesn't for instance give nanosecond virtual time.  So
129    neither of these are used.
130 
131 
132    Bugs:
133 
134    getrusage_microseconds_p is fundamentally flawed, getrusage and
135    gettimeofday can have resolutions other than clock ticks or microseconds,
136    for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
137 
138 
139    Enhancements:
140 
141    The SGI hardware counter has 64 bits on some machines, which could be
142    used when available.  But perhaps 32 bits is enough range, and then rely
143    on the getrusage supplement.
144 
145    Maybe getrusage (or times) should be used as a supplement for any
146    wall-clock measuring method.  Currently a wall clock with a good range
147    (eg. a 64-bit cycle counter) is used without a supplement.
148 
149    On PowerPC the timebase registers could be used, but would have to do
150    something to find out the speed.  On 6xx chips it's normally 1/4 bus
151    speed, on 4xx chips it's either that or an external clock.  Measuring
152    against gettimeofday might be ok.  */
153 
154 #include "config.h"
155 
156 #include <errno.h>
157 #include <setjmp.h>
158 #include <signal.h>
159 #include <stddef.h>
160 #include <stdio.h>
161 #include <string.h>
162 #include <stdlib.h> /* for getenv() */
163 
164 #if HAVE_FCNTL_H
165 #include <fcntl.h>  /* for open() */
166 #endif
167 
168 #if HAVE_STDINT_H
169 #include <stdint.h> /* for uint64_t */
170 #endif
171 
172 #if HAVE_UNISTD_H
173 #include <unistd.h> /* for sysconf() */
174 #endif
175 
176 #include <sys/types.h>
177 
178 #if TIME_WITH_SYS_TIME
179 # include <sys/time.h>  /* for struct timeval */
180 # include <time.h>
181 #else
182 # if HAVE_SYS_TIME_H
183 #  include <sys/time.h>
184 # else
185 #  include <time.h>
186 # endif
187 #endif
188 
189 #if HAVE_SYS_MMAN_H
190 #include <sys/mman.h>      /* for mmap() */
191 #endif
192 
193 #if HAVE_SYS_RESOURCE_H
194 #include <sys/resource.h>  /* for struct rusage */
195 #endif
196 
197 #if HAVE_SYS_SYSSGI_H
198 #include <sys/syssgi.h>    /* for syssgi() */
199 #endif
200 
201 #if HAVE_SYS_SYSTEMCFG_H
202 #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
203 #endif
204 
205 #if HAVE_SYS_TIMES_H
206 #include <sys/times.h>  /* for times() and struct tms */
207 #endif
208 
209 #include "gmp-impl.h"
210 
211 #include "speed.h"
212 
213 
214 /* strerror is only used for some stuff on newish systems, no need to have a
215    proper replacement */
216 #if ! HAVE_STRERROR
217 #define strerror(n)  "<strerror not available>"
218 #endif
219 
220 
221 char    speed_time_string[256];
222 int     speed_precision = 0;
223 double  speed_unittime;
224 double  speed_cycletime = 0.0;
225 
226 
227 /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
228    native cc */
229 #define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
230 
231 #define M_2POW32  4294967296.0
232 #define M_2POW64  (M_2POW32 * M_2POW32)
233 
234 
235 /* Conditionals for the time functions available are done with normal C
236    code, which is a lot easier than wildly nested preprocessor directives.
237 
238    The choice of what to use is partly made at run-time, according to
239    whether the cycle counter works and the measured accuracy of getrusage
240    and gettimeofday.
241 
242    A routine that's not available won't be getting called, but is an abort()
243    to be sure it isn't called mistakenly.
244 
245    It can be assumed that if a function exists then its data type will, but
246    if the function doesn't then the data type might or might not exist, so
247    the type can't be used unconditionally.  The "struct_rusage" etc macros
248    provide dummies when the respective function doesn't exist. */
249 
250 
251 #if HAVE_SPEED_CYCLECOUNTER
252 static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
253 #else
254 static const int have_cycles = 0;
255 #define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
256 #endif
257 
258 /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
259    microseconds.  Same #ifdefs here as in longlong.h.  */
260 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
261   && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
262 static const int  have_stck = 1;
263 static const int  use_stck = 1;  /* always use when available */
264 typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
265 #define STCK(timestamp)                 \
266   do {                                  \
267     asm ("stck %0" : "=Q" (timestamp)); \
268   } while (0)
269 #else
270 static const int  have_stck = 0;
271 static const int  use_stck = 0;
272 typedef unsigned long  stck_t;   /* dummy */
273 #define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
274 #endif
275 #define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
276 
277 /* mftb
278    Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
279    and a loop (see powerpc64.asm).  */
280 #if HAVE_HOST_CPU_FAMILY_powerpc
281 static const int  have_mftb = 1;
282 #if defined (__GNUC__) && ! defined (NO_ASM)
283 #define MFTB(a)                         \
284   do {                                  \
285     unsigned  __h1, __l, __h2;          \
286     do {                                \
287       asm volatile ("mftbu %0\n"        \
288 		    "mftb  %1\n"        \
289 		    "mftbu %2"          \
290 		    : "=r" (__h1),      \
291 		      "=r" (__l),       \
292 		      "=r" (__h2));     \
293     } while (__h1 != __h2);             \
294     a[0] = __l;                         \
295     a[1] = __h1;                        \
296   } while (0)
297 #else
298 #define MFTB(a)   mftb_function (a)
299 #endif
300 #else /* ! powerpc */
301 static const int  have_mftb = 0;
302 #define MFTB(a)                         \
303   do {                                  \
304     a[0] = 0;                           \
305     a[1] = 0;                           \
306     ASSERT_FAIL (mftb not available);   \
307   } while (0)
308 #endif
309 
310 /* Unicos 10.X has syssgi(), but not mmap(). */
311 #if HAVE_SYSSGI && HAVE_MMAP
312 static const int  have_sgi = 1;
313 #else
314 static const int  have_sgi = 0;
315 #endif
316 
317 #if HAVE_READ_REAL_TIME
318 static const int have_rrt = 1;
319 #else
320 static const int have_rrt = 0;
321 #define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
322 #define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
323 #define RTC_POWER     1
324 #define RTC_POWER_PC  2
325 #define timebasestruct_t   struct timebasestruct_dummy
326 struct timebasestruct_dummy {
327   int             flag;
328   unsigned int    tb_high;
329   unsigned int    tb_low;
330 };
331 #endif
332 
333 #if HAVE_CLOCK_GETTIME
334 static const int have_cgt = 1;
335 #define struct_timespec  struct timespec
336 #else
337 static const int have_cgt = 0;
338 #define struct_timespec       struct timespec_dummy
339 #define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
340 #define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
341 #endif
342 
343 #if HAVE_GETRUSAGE
344 static const int have_grus = 1;
345 #define struct_rusage   struct rusage
346 #else
347 static const int have_grus = 0;
348 #define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
349 #define struct_rusage    struct rusage_dummy
350 #endif
351 
352 #if HAVE_GETTIMEOFDAY
353 static const int have_gtod = 1;
354 #define struct_timeval   struct timeval
355 #else
356 static const int have_gtod = 0;
357 #define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
358 #define struct_timeval   struct timeval_dummy
359 #endif
360 
361 #if HAVE_TIMES
362 static const int have_times = 1;
363 #define struct_tms   struct tms
364 #else
365 static const int have_times = 0;
366 #define times(tms)   ASSERT_FAIL (times not available)
367 #define struct_tms   struct tms_dummy
368 #endif
369 
370 struct tms_dummy {
371   long  tms_utime;
372 };
373 struct timeval_dummy {
374   long  tv_sec;
375   long  tv_usec;
376 };
377 struct rusage_dummy {
378   struct_timeval ru_utime;
379 };
380 struct timespec_dummy {
381   long  tv_sec;
382   long  tv_nsec;
383 };
384 
385 static int  use_cycles;
386 static int  use_mftb;
387 static int  use_sgi;
388 static int  use_rrt;
389 static int  use_cgt;
390 static int  use_gtod;
391 static int  use_grus;
392 static int  use_times;
393 static int  use_tick_boundary;
394 
395 static unsigned         start_cycles[2];
396 static stck_t           start_stck;
397 static unsigned         start_mftb[2];
398 static unsigned         start_sgi;
399 static timebasestruct_t start_rrt;
400 static struct_timespec  start_cgt;
401 static struct_rusage    start_grus;
402 static struct_timeval   start_gtod;
403 static struct_tms       start_times;
404 
405 static double  cycles_limit = 1e100;
406 static double  mftb_unittime;
407 static double  sgi_unittime;
408 static double  cgt_unittime;
409 static double  grus_unittime;
410 static double  gtod_unittime;
411 static double  times_unittime;
412 
413 /* for RTC_POWER format, ie. seconds and nanoseconds */
414 #define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
415 
416 
417 /* Return a string representing a time in seconds, nicely formatted.
418    Eg. "10.25ms".  */
419 char *
unittime_string(double t)420 unittime_string (double t)
421 {
422   static char  buf[128];
423 
424   const char  *unit;
425   int         prec;
426 
427   /* choose units and scale */
428   if (t < 1e-6)
429     t *= 1e9, unit = "ns";
430   else if (t < 1e-3)
431     t *= 1e6, unit = "us";
432   else if (t < 1.0)
433     t *= 1e3, unit = "ms";
434   else
435     unit = "s";
436 
437   /* want 4 significant figures */
438   if (t < 1.0)
439     prec = 4;
440   else if (t < 10.0)
441     prec = 3;
442   else if (t < 100.0)
443     prec = 2;
444   else
445     prec = 1;
446 
447   sprintf (buf, "%.*f%s", prec, t, unit);
448   return buf;
449 }
450 
451 
452 static jmp_buf  cycles_works_buf;
453 
454 static RETSIGTYPE
cycles_works_handler(int sig)455 cycles_works_handler (int sig)
456 {
457   longjmp (cycles_works_buf, 1);
458 }
459 
460 int
cycles_works_p(void)461 cycles_works_p (void)
462 {
463   static int  result = -1;
464 
465   if (result != -1)
466     goto done;
467 
468   /* FIXME: On linux, the cycle counter is not saved and restored over
469    * context switches, making it almost useless for precise cputime
470    * measurements. When available, it's better to use clock_gettime,
471    * which seems to have reasonable accuracy (tested on x86_32,
472    * linux-2.6.26, glibc-2.7). However, there are also some linux
473    * systems where clock_gettime is broken in one way or the other,
474    * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
475    * kind-of implemented but broken (needs code to detect that), and
476    * on those systems a wall-clock cycle counter is the least bad
477    * fallback.
478    *
479    * So we need some code to disable the cycle counter on some but not
480    * all linux systems. */
481 #ifdef SIGILL
482   {
483     RETSIGTYPE (*old_handler) (int);
484     unsigned  cycles[2];
485 
486     old_handler = signal (SIGILL, cycles_works_handler);
487     if (old_handler == SIG_ERR)
488       {
489 	if (speed_option_verbose)
490 	  printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
491 	goto yes;
492       }
493     if (setjmp (cycles_works_buf))
494       {
495 	if (speed_option_verbose)
496 	  printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
497 	result = 0;
498 	goto done;
499       }
500     speed_cyclecounter (cycles);
501     signal (SIGILL, old_handler);
502     if (speed_option_verbose)
503       printf ("cycles_works_p(): speed_cyclecounter() works\n");
504   }
505 #else
506 
507   if (speed_option_verbose)
508     printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
509   goto yes;
510 #endif
511 
512  yes:
513   result = 1;
514 
515  done:
516   return result;
517 }
518 
519 
520 /* The number of clock ticks per second, but looking at sysconf rather than
521    just CLK_TCK, where possible.  */
522 long
clk_tck(void)523 clk_tck (void)
524 {
525   static long  result = -1L;
526   if (result != -1L)
527     return result;
528 
529 #if HAVE_SYSCONF
530   result = sysconf (_SC_CLK_TCK);
531   if (result != -1L)
532     {
533       if (speed_option_verbose)
534 	printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
535       return result;
536     }
537 
538   fprintf (stderr,
539 	   "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
540 #endif
541 
542 #ifdef CLK_TCK
543   result = CLK_TCK;
544   if (speed_option_verbose)
545     printf ("CLK_TCK is %ld per second\n", result);
546   return result;
547 #else
548   fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
549   abort ();
550 #endif
551 }
552 
553 
554 /* If two times can be observed less than half a clock tick apart, then
555    assume "get" is microsecond accurate.
556 
557    Two times only 1 microsecond apart are not believed, since some kernels
558    take it upon themselves to ensure gettimeofday doesn't return the same
559    value twice, for the benefit of applications using it for a timestamp.
560    This is obviously very stupid given the speed of CPUs these days.
561 
562    Making "reps" many calls to noop_1() is designed to waste some CPU, with
563    a view to getting measurements 2 microseconds (or more) apart.  "reps" is
564    increased progressively until such a period is seen.
565 
566    The outer loop "attempts" are just to allow for any random nonsense or
567    system load upsetting the measurements (ie. making two successive calls
568    to "get" come out as a longer interval than normal).
569 
570    Bugs:
571 
572    The assumption that any interval less than a half tick implies
573    microsecond resolution is obviously fairly rash, the true resolution
574    could be anything between a microsecond and that half tick.  Perhaps
575    something special would have to be done on a system where this is the
576    case, since there's no obvious reliable way to detect it
577    automatically.  */
578 
579 #define MICROSECONDS_P(name, type, get, sec, usec)                      \
580   {                                                                     \
581     static int  result = -1;                                            \
582     type      st, et;                                                   \
583     long      dt, half_tick;                                            \
584     unsigned  attempt, reps, i, j;                                      \
585 									\
586     if (result != -1)                                                   \
587       return result;                                                    \
588 									\
589     result = 0;                                                         \
590     half_tick = (1000000L / clk_tck ()) / 2;                            \
591 									\
592     for (attempt = 0; attempt < 5; attempt++)                           \
593       {                                                                 \
594 	reps = 0;                                                       \
595 	for (;;)                                                        \
596 	  {                                                             \
597 	    get (st);                                                   \
598 	    for (i = 0; i < reps; i++)                                  \
599 	      for (j = 0; j < 100; j++)                                 \
600 		noop_1 (CNST_LIMB(0));                                  \
601 	    get (et);                                                   \
602 									\
603 	    dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
604 									\
605 	    if (speed_option_verbose >= 2)                              \
606 	      printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
607 		      name, attempt, reps, dt);                         \
608 									\
609 	    if (dt >= 2)                                                \
610 	      break;                                                    \
611 									\
612 	    reps = (reps == 0 ? 1 : 2*reps);                            \
613 	    if (reps == 0)                                              \
614 	      break;  /* uint overflow, not normal */                   \
615 	  }                                                             \
616 									\
617 	if (dt < half_tick)                                             \
618 	  {                                                             \
619 	    result = 1;                                                 \
620 	    break;                                                      \
621 	  }                                                             \
622       }                                                                 \
623 									\
624     if (speed_option_verbose)                                           \
625       {                                                                 \
626 	if (result)                                                     \
627 	  printf ("%s is microsecond accurate\n", name);                \
628 	else                                                            \
629 	  printf ("%s is only %s clock tick accurate\n",                \
630 		  name, unittime_string (1.0/clk_tck()));               \
631       }                                                                 \
632     return result;                                                      \
633   }
634 
635 
636 int
gettimeofday_microseconds_p(void)637 gettimeofday_microseconds_p (void)
638 {
639 #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
640 #define timeval_tv_sec(t)      ((t).tv_sec)
641 #define timeval_tv_usec(t)     ((t).tv_usec)
642   MICROSECONDS_P ("gettimeofday", struct_timeval,
643 		  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
644 }
645 
646 int
getrusage_microseconds_p(void)647 getrusage_microseconds_p (void)
648 {
649 #define call_getrusage(t)   getrusage (0, &(t))
650 #define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
651 #define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
652   MICROSECONDS_P ("getrusage", struct_rusage,
653 		  call_getrusage, rusage_tv_sec, rusage_tv_usec);
654 }
655 
656 /* Test whether getrusage goes backwards, return non-zero if it does
657    (suggesting it's flawed).
658 
659    On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
660    microsecond accurate, but has been seen remaining unchanged after many
661    microseconds have elapsed.  It also regularly goes backwards by 1000 to
662    5000 usecs, this has been seen after between 500 and 4000 attempts taking
663    perhaps 0.03 seconds.  We consider this too broken for good measuring.
664    We used to have configure pretend getrusage didn't exist on this system,
665    but a runtime test should be more reliable, since we imagine the problem
666    is not confined to just this exact system tuple.  */
667 
668 int
getrusage_backwards_p(void)669 getrusage_backwards_p (void)
670 {
671   static int result = -1;
672   struct rusage  start, prev, next;
673   long  d;
674   int   i;
675 
676   if (result != -1)
677     return result;
678 
679   getrusage (0, &start);
680   memcpy (&next, &start, sizeof (next));
681 
682   result = 0;
683   i = 0;
684   for (;;)
685     {
686       memcpy (&prev, &next, sizeof (prev));
687       getrusage (0, &next);
688 
689       if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
690 	  || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
691 	      && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
692 	{
693 	  if (speed_option_verbose)
694 	    printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
695 		    i,
696 		    (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
697 		    (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
698 	  result = 1;
699 	  break;
700 	}
701 
702       /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
703 	 attempts, whichever comes first */
704       d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
705 	+ (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
706       i++;
707       if (i > 50000 || (i > 1000 && d > 100000))
708 	break;
709     }
710 
711   return result;
712 }
713 
714 /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
715    of glibc (some time post 2.2).
716 
717    CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
718    defined, but returning -1 for an error).  */
719 
720 #ifdef CLOCK_PROCESS_CPUTIME_ID
721 # define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
722 #else
723 # ifdef CLOCK_VIRTUAL
724 #  define CGT_ID       CLOCK_VIRTUAL
725 # endif
726 #endif
727 #ifdef CGT_ID
728 const int  have_cgt_id = 1;
729 #else
730 const int  have_cgt_id = 0;
731 # define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
732 #endif
733 
734 #define CGT_DELAY_COUNT 1000
735 
736 int
cgt_works_p(void)737 cgt_works_p (void)
738 {
739   static int  result = -1;
740   struct_timespec  unit;
741 
742   if (! have_cgt)
743     return 0;
744 
745   if (! have_cgt_id)
746     {
747       if (speed_option_verbose)
748 	printf ("clock_gettime don't know what ID to use\n");
749       result = 0;
750       return result;
751     }
752 
753   if (result != -1)
754     return result;
755 
756   /* trial run to see if it works */
757   if (clock_gettime (CGT_ID, &unit) != 0)
758     {
759       if (speed_option_verbose)
760 	printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
761       result = 0;
762       return result;
763     }
764 
765   /* get the resolution */
766   if (clock_getres (CGT_ID, &unit) != 0)
767     {
768       if (speed_option_verbose)
769 	printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
770       result = 0;
771       return result;
772     }
773 
774   cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
775   if (speed_option_verbose)
776     printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
777 
778   if (cgt_unittime < 10e-9)
779     {
780       /* Do we believe this? */
781       struct timespec start, end;
782       static volatile int counter;
783       double duration;
784       if (clock_gettime (CGT_ID, &start))
785 	{
786 	  if (speed_option_verbose)
787 	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
788 	  result = 0;
789 	  return result;
790 	}
791       /* Loop of at least 1000 memory accesses, ought to take at
792 	 least 100 ns*/
793       for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
794 	;
795       if (clock_gettime (CGT_ID, &end))
796 	{
797 	  if (speed_option_verbose)
798 	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
799 	  result = 0;
800 	  return result;
801 	}
802       duration = (end.tv_sec + end.tv_nsec * 1e-9
803 		  - start.tv_sec - start.tv_nsec * 1e-9);
804       if (speed_option_verbose)
805 	printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
806 		CGT_DELAY_COUNT, unittime_string (duration));
807       if (duration < 100e-9)
808 	{
809 	  if (speed_option_verbose)
810 	    printf ("clock_gettime id=%d not believable\n", CGT_ID);
811 	  result = 0;
812 	  return result;
813 	}
814     }
815   result = 1;
816   return result;
817 }
818 
819 
820 static double
freq_measure_mftb_one(void)821 freq_measure_mftb_one (void)
822 {
823 #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
824 #define timeval_tv_sec(t)      ((t).tv_sec)
825 #define timeval_tv_usec(t)     ((t).tv_usec)
826   FREQ_MEASURE_ONE ("mftb", struct_timeval,
827 		    call_gettimeofday, MFTB,
828 		    timeval_tv_sec, timeval_tv_usec);
829 }
830 
831 
832 static jmp_buf  mftb_works_buf;
833 
834 static RETSIGTYPE
mftb_works_handler(int sig)835 mftb_works_handler (int sig)
836 {
837   longjmp (mftb_works_buf, 1);
838 }
839 
840 int
mftb_works_p(void)841 mftb_works_p (void)
842 {
843   unsigned   a[2];
844   RETSIGTYPE (*old_handler) (int);
845   double     cycletime;
846 
847   /* suppress a warning about a[] unused */
848   a[0] = 0;
849 
850   if (! have_mftb)
851     return 0;
852 
853 #ifdef SIGILL
854   old_handler = signal (SIGILL, mftb_works_handler);
855   if (old_handler == SIG_ERR)
856     {
857       if (speed_option_verbose)
858 	printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
859       return 1;
860     }
861   if (setjmp (mftb_works_buf))
862     {
863       if (speed_option_verbose)
864 	printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
865       return 0;
866     }
867   MFTB (a);
868   signal (SIGILL, old_handler);
869   if (speed_option_verbose)
870     printf ("mftb_works_p(): mftb works\n");
871 #else
872 
873   if (speed_option_verbose)
874     printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
875 #endif
876 
877 #if ! HAVE_GETTIMEOFDAY
878   if (speed_option_verbose)
879     printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
880   return 0;
881 #endif
882 
883   /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
884      other chips it can be driven from an external clock. */
885   cycletime = freq_measure ("mftb", freq_measure_mftb_one);
886   if (cycletime == -1.0)
887     {
888       if (speed_option_verbose)
889 	printf ("mftb_works_p(): cannot measure mftb period\n");
890       return 0;
891     }
892 
893   mftb_unittime = cycletime;
894   return 1;
895 }
896 
897 
898 volatile unsigned  *sgi_addr;
899 
900 int
sgi_works_p(void)901 sgi_works_p (void)
902 {
903 #if HAVE_SYSSGI && HAVE_MMAP
904   static int  result = -1;
905 
906   size_t          pagesize, offset;
907   __psunsigned_t  phys, physpage;
908   void            *virtpage;
909   unsigned        period_picoseconds;
910   int             size, fd;
911 
912   if (result != -1)
913     return result;
914 
915   phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
916   if (phys == (__psunsigned_t) -1)
917     {
918       /* ENODEV is the error when a counter is not available */
919       if (speed_option_verbose)
920 	printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
921       result = 0;
922       return result;
923     }
924   sgi_unittime = period_picoseconds * 1e-12;
925 
926   /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
927      Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
928      obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
929 #ifdef SGI_CYCLECNTR_SIZE
930   size = syssgi (SGI_CYCLECNTR_SIZE);
931   if (size == -1)
932     {
933       if (speed_option_verbose)
934 	{
935 	  printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
936 	  printf ("    will assume size==4\n");
937 	}
938       size = 32;
939     }
940 #else
941   size = 32;
942 #endif
943 
944   if (size < 32)
945     {
946       printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
947       result = 0;
948       return result;
949     }
950 
951   pagesize = getpagesize();
952   offset = (size_t) phys & (pagesize-1);
953   physpage = phys - offset;
954 
955   /* shouldn't cross over a page boundary */
956   ASSERT_ALWAYS (offset + size/8 <= pagesize);
957 
958   fd = open("/dev/mmem", O_RDONLY);
959   if (fd == -1)
960     {
961       if (speed_option_verbose)
962 	printf ("open /dev/mmem: %s\n", strerror (errno));
963       result = 0;
964       return result;
965     }
966 
967   virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
968   if (virtpage == (void *) -1)
969     {
970       if (speed_option_verbose)
971 	printf ("mmap /dev/mmem: %s\n", strerror (errno));
972       result = 0;
973       return result;
974     }
975 
976   /* address of least significant 4 bytes, knowing mips is big endian */
977   sgi_addr = (unsigned *) ((char *) virtpage + offset
978 			   + size/8 - sizeof(unsigned));
979   result = 1;
980   return result;
981 
982 #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
983   return 0;
984 #endif
985 }
986 
987 
988 #define DEFAULT(var,n)  \
989   do {                  \
990     if (! (var))        \
991       (var) = (n);      \
992   } while (0)
993 
994 void
speed_time_init(void)995 speed_time_init (void)
996 {
997   double supplement_unittime = 0.0;
998 
999   static int  speed_time_initialized = 0;
1000   if (speed_time_initialized)
1001     return;
1002   speed_time_initialized = 1;
1003 
1004   speed_cycletime_init ();
1005 
1006   if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
1007     {
1008       use_cycles = 1;
1009       DEFAULT (speed_cycletime, 1.0);
1010       speed_unittime = speed_cycletime;
1011       DEFAULT (speed_precision, 10000);
1012       strcpy (speed_time_string, "CPU cycle counter");
1013 
1014       /* only used if a supplementary method is chosen below */
1015       cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
1016 	* speed_cycletime;
1017 
1018       if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1019 	{
1020 	  /* this is a good combination */
1021 	  use_grus = 1;
1022 	  supplement_unittime = grus_unittime = 1.0e-6;
1023 	  strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
1024 	}
1025       else if (have_cycles == 1)
1026 	{
1027 	  /* When speed_cyclecounter has a limited range, look for something
1028 	     to supplement it. */
1029 	  if (have_gtod && gettimeofday_microseconds_p())
1030 	    {
1031 	      use_gtod = 1;
1032 	      supplement_unittime = gtod_unittime = 1.0e-6;
1033 	      strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
1034 	    }
1035 	  else if (have_grus)
1036 	    {
1037 	      use_grus = 1;
1038 	      supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1039 	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
1040 	    }
1041 	  else if (have_times)
1042 	    {
1043 	      use_times = 1;
1044 	      supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
1045 	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
1046 	    }
1047 	  else if (have_gtod)
1048 	    {
1049 	      use_gtod = 1;
1050 	      supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1051 	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
1052 	    }
1053 	  else
1054 	    {
1055 	      fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
1056 	      fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
1057 	    }
1058 	}
1059 
1060       if (use_grus || use_times || use_gtod)
1061 	{
1062 	  /* must know cycle period to compare cycles to other measuring
1063 	     (via cycles_limit) */
1064 	  speed_cycletime_need_seconds ();
1065 
1066 	  if (speed_precision * supplement_unittime > cycles_limit)
1067 	    {
1068 	      fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1069 	      fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
1070 	      fprintf (stderr, "    (%s)\n", speed_time_string);
1071 	    }
1072 	}
1073     }
1074   else if (have_stck)
1075     {
1076       strcpy (speed_time_string, "STCK timestamp");
1077       /* stck is in units of 2^-12 microseconds, which is very likely higher
1078 	 resolution than a cpu cycle */
1079       if (speed_cycletime == 0.0)
1080 	speed_cycletime_fail
1081 	  ("Need to know CPU frequency for effective stck unit");
1082       speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1083       DEFAULT (speed_precision, 10000);
1084     }
1085   else if (have_mftb && mftb_works_p ())
1086     {
1087       use_mftb = 1;
1088       DEFAULT (speed_precision, 10000);
1089       speed_unittime = mftb_unittime;
1090       sprintf (speed_time_string, "mftb counter (%s)",
1091 	       unittime_string (speed_unittime));
1092     }
1093   else if (have_sgi && sgi_works_p ())
1094     {
1095       use_sgi = 1;
1096       DEFAULT (speed_precision, 10000);
1097       speed_unittime = sgi_unittime;
1098       sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1099 	       unittime_string (speed_unittime));
1100       /* supplemented with getrusage, which we assume to have 1ms resolution */
1101       use_grus = 1;
1102       supplement_unittime = 1e-3;
1103     }
1104   else if (have_rrt)
1105     {
1106       timebasestruct_t  t;
1107       use_rrt = 1;
1108       DEFAULT (speed_precision, 10000);
1109       read_real_time (&t, sizeof(t));
1110       switch (t.flag) {
1111       case RTC_POWER:
1112 	/* FIXME: What's the actual RTC resolution? */
1113 	speed_unittime = 1e-7;
1114 	strcpy (speed_time_string, "read_real_time() power nanoseconds");
1115 	break;
1116       case RTC_POWER_PC:
1117 	t.tb_high = 1;
1118 	t.tb_low = 0;
1119 	time_base_to_time (&t, sizeof(t));
1120 	speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1121 	sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1122 		 unittime_string (speed_unittime));
1123 	break;
1124       default:
1125 	fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1126 		 t.flag);
1127 	abort ();
1128       }
1129     }
1130   else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1131     {
1132       /* use clock_gettime if microsecond or better resolution */
1133     choose_cgt:
1134       use_cgt = 1;
1135       speed_unittime = cgt_unittime;
1136       DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1137       strcpy (speed_time_string, "microsecond accurate clock_gettime()");
1138     }
1139   else if (have_times && clk_tck() > 1000000)
1140     {
1141       /* Cray vector systems have times() which is clock cycle resolution
1142 	 (eg. 450 MHz).  */
1143       DEFAULT (speed_precision, 10000);
1144       goto choose_times;
1145     }
1146   else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1147     {
1148       use_grus = 1;
1149       speed_unittime = grus_unittime = 1.0e-6;
1150       DEFAULT (speed_precision, 1000);
1151       strcpy (speed_time_string, "microsecond accurate getrusage()");
1152     }
1153   else if (have_gtod && gettimeofday_microseconds_p())
1154     {
1155       use_gtod = 1;
1156       speed_unittime = gtod_unittime = 1.0e-6;
1157       DEFAULT (speed_precision, 1000);
1158       strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1159     }
1160   else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1161     {
1162       /* use clock_gettime if 1 tick or better resolution */
1163       goto choose_cgt;
1164     }
1165   else if (have_times)
1166     {
1167       use_tick_boundary = 1;
1168       DEFAULT (speed_precision, 200);
1169     choose_times:
1170       use_times = 1;
1171       speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1172       sprintf (speed_time_string, "%s clock tick times()",
1173 	       unittime_string (speed_unittime));
1174     }
1175   else if (have_grus)
1176     {
1177       use_grus = 1;
1178       use_tick_boundary = 1;
1179       speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1180       DEFAULT (speed_precision, 200);
1181       sprintf (speed_time_string, "%s clock tick getrusage()\n",
1182 	       unittime_string (speed_unittime));
1183     }
1184   else if (have_gtod)
1185     {
1186       use_gtod = 1;
1187       use_tick_boundary = 1;
1188       speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1189       DEFAULT (speed_precision, 200);
1190       sprintf (speed_time_string, "%s clock tick gettimeofday()",
1191 	       unittime_string (speed_unittime));
1192     }
1193   else
1194     {
1195       fprintf (stderr, "No time measuring method available\n");
1196       fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1197       abort ();
1198     }
1199 
1200   if (speed_option_verbose)
1201     {
1202       printf ("speed_time_init: %s\n", speed_time_string);
1203       printf ("    speed_precision     %d\n", speed_precision);
1204       printf ("    speed_unittime      %.2g\n", speed_unittime);
1205       if (supplement_unittime)
1206 	printf ("    supplement_unittime %.2g\n", supplement_unittime);
1207       printf ("    use_tick_boundary   %d\n", use_tick_boundary);
1208       if (have_cycles)
1209 	printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
1210     }
1211 }
1212 
1213 
1214 
1215 /* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
1216    corresponding "start_foo" appropriately too. */
1217 
1218 void
grus_tick_boundary(void)1219 grus_tick_boundary (void)
1220 {
1221   struct_rusage  prev;
1222   getrusage (0, &prev);
1223   do {
1224     getrusage (0, &start_grus);
1225   } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1226 }
1227 
1228 void
gtod_tick_boundary(void)1229 gtod_tick_boundary (void)
1230 {
1231   struct_timeval  prev;
1232   gettimeofday (&prev, NULL);
1233   do {
1234     gettimeofday (&start_gtod, NULL);
1235   } while (start_gtod.tv_usec == prev.tv_usec);
1236 }
1237 
1238 void
times_tick_boundary(void)1239 times_tick_boundary (void)
1240 {
1241   struct_tms  prev;
1242   times (&prev);
1243   do
1244     times (&start_times);
1245   while (start_times.tms_utime == prev.tms_utime);
1246 }
1247 
1248 
1249 /* "have_" values are tested to let unused code go dead.  */
1250 
1251 void
speed_starttime(void)1252 speed_starttime (void)
1253 {
1254   speed_time_init ();
1255 
1256   if (have_grus && use_grus)
1257     {
1258       if (use_tick_boundary)
1259 	grus_tick_boundary ();
1260       else
1261 	getrusage (0, &start_grus);
1262     }
1263 
1264   if (have_gtod && use_gtod)
1265     {
1266       if (use_tick_boundary)
1267 	gtod_tick_boundary ();
1268       else
1269 	gettimeofday (&start_gtod, NULL);
1270     }
1271 
1272   if (have_times && use_times)
1273     {
1274       if (use_tick_boundary)
1275 	times_tick_boundary ();
1276       else
1277 	times (&start_times);
1278     }
1279 
1280   if (have_cgt && use_cgt)
1281     clock_gettime (CGT_ID, &start_cgt);
1282 
1283   if (have_rrt && use_rrt)
1284     read_real_time (&start_rrt, sizeof(start_rrt));
1285 
1286   if (have_sgi && use_sgi)
1287     start_sgi = *sgi_addr;
1288 
1289   if (have_mftb && use_mftb)
1290     MFTB (start_mftb);
1291 
1292   if (have_stck && use_stck)
1293     STCK (start_stck);
1294 
1295   /* Cycles sampled last for maximum accuracy. */
1296   if (have_cycles && use_cycles)
1297     speed_cyclecounter (start_cycles);
1298 }
1299 
1300 
1301 /* Calculate the difference between two cycle counter samples, as a "double"
1302    counter of cycles.
1303 
1304    The start and end values are allowed to cancel in integers in case the
1305    counter values are bigger than the 53 bits that normally fit in a double.
1306 
1307    This works even if speed_cyclecounter() puts a value bigger than 32-bits
1308    in the low word (the high word always gets a 2**32 multiplier though). */
1309 
1310 double
speed_cyclecounter_diff(const unsigned end[2],const unsigned start[2])1311 speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1312 {
1313   unsigned  d;
1314   double    t;
1315 
1316   if (have_cycles == 1)
1317     {
1318       t = (end[0] - start[0]);
1319     }
1320   else
1321     {
1322       d = end[0] - start[0];
1323       t = d - (d > end[0] ? M_2POWU : 0.0);
1324       t += (end[1] - start[1]) * M_2POW32;
1325     }
1326   return t;
1327 }
1328 
1329 
1330 double
speed_mftb_diff(const unsigned end[2],const unsigned start[2])1331 speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1332 {
1333   unsigned  d;
1334   double    t;
1335 
1336   d = end[0] - start[0];
1337   t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1338   t += (end[1] - start[1]) * M_2POW32;
1339   return t;
1340 }
1341 
1342 
1343 /* Calculate the difference between "start" and "end" using fields "sec" and
1344    "psec", where each "psec" is a "punit" of a second.
1345 
1346    The seconds parts are allowed to cancel before being combined with the
1347    psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1348    double.
1349 
1350    Total time is only calculated in a "double" since an integer count of
1351    psecs might overflow.  2^32 microseconds is only a bit over an hour, or
1352    2^32 nanoseconds only about 4 seconds.
1353 
1354    The casts to "long" are for the benefit of timebasestruct_t, where the
1355    fields are only "unsigned int", but we want a signed difference.  */
1356 
1357 #define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
1358   {                                                             \
1359     long  sec_diff, psec_diff;                                  \
1360     sec_diff = (long) end->sec - (long) start->sec;             \
1361     psec_diff = (long) end->psec - (long) start->psec;          \
1362     return (double) sec_diff + punit * (double) psec_diff;      \
1363   }
1364 
1365 double
timeval_diff_secs(const struct_timeval * end,const struct_timeval * start)1366 timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1367 {
1368   DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1369 }
1370 
1371 double
rusage_diff_secs(const struct_rusage * end,const struct_rusage * start)1372 rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1373 {
1374   DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1375 }
1376 
1377 double
timespec_diff_secs(const struct_timespec * end,const struct_timespec * start)1378 timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1379 {
1380   DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1381 }
1382 
1383 /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1384 double
timebasestruct_diff_secs(const timebasestruct_t * end,const timebasestruct_t * start)1385 timebasestruct_diff_secs (const timebasestruct_t *end,
1386 			  const timebasestruct_t *start)
1387 {
1388   DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1389 }
1390 
1391 
1392 double
speed_endtime(void)1393 speed_endtime (void)
1394 {
1395 #define END_USE(name,value)                             \
1396   do {                                                  \
1397     if (speed_option_verbose >= 3)                      \
1398       printf ("speed_endtime(): used %s\n", name);      \
1399     result = value;                                     \
1400     goto done;                                          \
1401   } while (0)
1402 
1403 #define END_ENOUGH(name,value)                                          \
1404   do {                                                                  \
1405     if (speed_option_verbose >= 3)                                      \
1406       printf ("speed_endtime(): %s gives enough precision\n", name);    \
1407     result = value;                                                     \
1408     goto done;                                                          \
1409   } while (0)
1410 
1411 #define END_EXCEED(name,value)                                            \
1412   do {                                                                    \
1413     if (speed_option_verbose >= 3)                                        \
1414       printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1415 	      name);                                                      \
1416     result = value;                                                       \
1417     goto done;                                                            \
1418   } while (0)
1419 
1420   unsigned          end_cycles[2];
1421   stck_t            end_stck;
1422   unsigned          end_mftb[2];
1423   unsigned          end_sgi;
1424   timebasestruct_t  end_rrt;
1425   struct_timespec   end_cgt;
1426   struct_timeval    end_gtod;
1427   struct_rusage     end_grus;
1428   struct_tms        end_times;
1429   double            t_gtod, t_grus, t_times, t_cgt;
1430   double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1431   double            result;
1432 
1433   /* Cycles sampled first for maximum accuracy.
1434      "have_" values tested to let unused code go dead.  */
1435 
1436   if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
1437   if (have_stck   && use_stck)    STCK (end_stck);
1438   if (have_mftb   && use_mftb)    MFTB (end_mftb);
1439   if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
1440   if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
1441   if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
1442   if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
1443   if (have_grus   && use_grus)    getrusage (0, &end_grus);
1444   if (have_times  && use_times)   times (&end_times);
1445 
1446   result = -1.0;
1447 
1448   if (speed_option_verbose >= 4)
1449     {
1450       printf ("speed_endtime():\n");
1451       if (use_cycles)
1452 	printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
1453 		start_cycles[1], start_cycles[0],
1454 		end_cycles[1], end_cycles[0]);
1455 
1456       if (use_stck)
1457 	printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
1458 
1459       if (use_mftb)
1460 	printf ("   mftb  0x%X,%08X -> 0x%X,%08X\n",
1461 		start_mftb[1], start_mftb[0],
1462 		end_mftb[1], end_mftb[0]);
1463 
1464       if (use_sgi)
1465 	printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
1466 
1467       if (use_rrt)
1468 	printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
1469 		start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1470 		end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1471 
1472       if (use_cgt)
1473 	printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
1474 		(long) start_cgt.tv_sec, (long) start_cgt.tv_nsec,
1475 		(long) end_cgt.tv_sec, (long) end_cgt.tv_nsec);
1476 
1477       if (use_gtod)
1478 	printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
1479 		(long) start_gtod.tv_sec,
1480 		(long) start_gtod.tv_usec,
1481 		(long) end_gtod.tv_sec,
1482 		(long) end_gtod.tv_usec);
1483 
1484       if (use_grus)
1485 	printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
1486 		(long) start_grus.ru_utime.tv_sec,
1487 		(long) start_grus.ru_utime.tv_usec,
1488 		(long) end_grus.ru_utime.tv_sec,
1489 		(long) end_grus.ru_utime.tv_usec);
1490 
1491       if (use_times)
1492 	printf ("   times  %ld -> %ld\n",
1493 		start_times.tms_utime, end_times.tms_utime);
1494     }
1495 
1496   if (use_rrt)
1497     {
1498       time_base_to_time (&start_rrt, sizeof(start_rrt));
1499       time_base_to_time (&end_rrt, sizeof(end_rrt));
1500       t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1501       END_USE ("read_real_time()", t_rrt);
1502     }
1503 
1504   if (use_cgt)
1505     {
1506       t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1507       END_USE ("clock_gettime()", t_cgt);
1508     }
1509 
1510   if (use_grus)
1511     {
1512       t_grus = rusage_diff_secs (&end_grus, &start_grus);
1513 
1514       /* Use getrusage() if the cycle counter limit would be exceeded, or if
1515 	 it provides enough accuracy already. */
1516       if (use_cycles)
1517 	{
1518 	  if (t_grus >= speed_precision*grus_unittime)
1519 	    END_ENOUGH ("getrusage()", t_grus);
1520 	  if (t_grus >= cycles_limit)
1521 	    END_EXCEED ("getrusage()", t_grus);
1522 	}
1523     }
1524 
1525   if (use_times)
1526     {
1527       t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1528 
1529       /* Use times() if the cycle counter limit would be exceeded, or if
1530 	 it provides enough accuracy already. */
1531       if (use_cycles)
1532 	{
1533 	  if (t_times >= speed_precision*times_unittime)
1534 	    END_ENOUGH ("times()", t_times);
1535 	  if (t_times >= cycles_limit)
1536 	    END_EXCEED ("times()", t_times);
1537 	}
1538     }
1539 
1540   if (use_gtod)
1541     {
1542       t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1543 
1544       /* Use gettimeofday() if it measured a value bigger than the cycle
1545 	 counter can handle.  */
1546       if (use_cycles)
1547 	{
1548 	  if (t_gtod >= cycles_limit)
1549 	    END_EXCEED ("gettimeofday()", t_gtod);
1550 	}
1551     }
1552 
1553   if (use_mftb)
1554     {
1555       t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1556       END_USE ("mftb", t_mftb);
1557     }
1558 
1559   if (use_stck)
1560     {
1561       t_stck = (end_stck - start_stck) * STCK_PERIOD;
1562       END_USE ("stck", t_stck);
1563     }
1564 
1565   if (use_sgi)
1566     {
1567       t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1568       END_USE ("SGI hardware counter", t_sgi);
1569     }
1570 
1571   if (use_cycles)
1572     {
1573       t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1574 	* speed_cycletime;
1575       END_USE ("cycle counter", t_cycles);
1576     }
1577 
1578   if (use_grus && getrusage_microseconds_p())
1579     END_USE ("getrusage()", t_grus);
1580 
1581   if (use_gtod && gettimeofday_microseconds_p())
1582     END_USE ("gettimeofday()", t_gtod);
1583 
1584   if (use_times)  END_USE ("times()",        t_times);
1585   if (use_grus)   END_USE ("getrusage()",    t_grus);
1586   if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
1587 
1588   fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1589   abort ();
1590 
1591  done:
1592   if (result < 0.0)
1593     {
1594       if (speed_option_verbose >= 2)
1595 	fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
1596       result = 0.0;
1597     }
1598   return result;
1599 }
1600