1 /* Time routines for speed measurements.
2
3 Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
4
5 This file is part of the GNU MP Library.
6
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14 or
15
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
19
20 or both in parallel, as here.
21
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 for more details.
26
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library. If not,
29 see https://www.gnu.org/licenses/. */
30
31
32 /* Usage:
33
34 The code in this file implements the lowest level of time measuring,
35 simple one-time measuring of time between two points.
36
37 void speed_starttime (void)
38 double speed_endtime (void)
39 Call speed_starttime to start measuring, and then call speed_endtime
40 when done.
41
42 speed_endtime returns the time taken, in seconds. Or if the timebase
43 is in CPU cycles and the CPU frequency is unknown then speed_endtime
44 returns cycles. Applications can identify the cycles return by
45 checking for speed_cycletime (described below) equal to 1.0.
46
47 If some sort of temporary glitch occurs then speed_endtime returns
48 0.0. Currently this is for various cases where a negative time has
49 occurred. This unfortunately occurs with getrusage on some systems,
50 and with the hppa cycle counter on hpux.
51
52 double speed_cycletime
53 The time in seconds for each CPU cycle. For example on a 100 MHz CPU
54 this would be 1.0e-8.
55
56 If the CPU frequency is unknown, then speed_cycletime is either 0.0
57 or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's
58 1.0 when speed_endtime is returning cycles.
59
60 It may be noted that "speed_endtime() / speed_cycletime" gives a
61 measured time in cycles, irrespective of whether speed_endtime is
62 returning cycles or seconds. (Assuming cycles can be had, ie. it's
63 either cycles already or the cpu frequency is known. See also
64 speed_cycletime_need_cycles below.)
65
66 double speed_unittime
67 The unit of time measurement accuracy for the timing method in use.
68 This is in seconds or cycles, as per speed_endtime.
69
70 char speed_time_string[]
71 A null-terminated string describing the time method in use.
72
73 void speed_time_init (void)
74 Initialize time measuring. speed_starttime() does this
75 automatically, so it's only needed if an application wants to inspect
76 the above global variables before making a measurement.
77
78 int speed_precision
79 The intended accuracy of time measurements. speed_measure() in
80 common.c for instance runs target routines with enough repetitions so
81 it takes at least "speed_unittime * speed_precision" (this expression
82 works for both cycles or seconds from speed_endtime).
83
84 A program can provide an option so the user to set speed_precision.
85 If speed_precision is zero when speed_time_init or speed_starttime
86 first run then it gets a default based on the measuring method
87 chosen. (More precision for higher accuracy methods.)
88
89 void speed_cycletime_need_seconds (void)
90 Call this to demand that speed_endtime will return seconds, and not
91 cycles. If only cycles are available then an error is printed and
92 the program exits.
93
94 void speed_cycletime_need_cycles (void)
95 Call this to demand that speed_cycletime is non-zero, so that
96 "speed_endtime() / speed_cycletime" will give times in cycles.
97
98
99
100 Notes:
101
102 Various combinations of cycle counter, read_real_time(), getrusage(),
103 gettimeofday() and times() can arise, according to which are available
104 and their precision.
105
106
107 Allowing speed_endtime() to return either seconds or cycles is only a
108 slight complication and makes it possible for the speed program to do
109 some sensible things without demanding the CPU frequency. If seconds are
110 being measured then it can always print seconds, and if cycles are being
111 measured then it can always print them without needing to know how long
112 they are. Also the tune program doesn't care at all what the units are.
113
114 GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
115 fail. This will be needed if times in seconds are wanted but a cycle
116 counter is being used, or if times in cycles are wanted but getrusage or
117 another seconds based timer is in use.
118
119 If the measuring method uses a cycle counter but supplements it with
120 getrusage or the like, then knowing the CPU frequency is mandatory since
121 the code compares values from the two.
122
123
124 Not done:
125
126 Solaris gethrtime() seems no more than a slow way to access the Sparc V9
127 cycle counter. gethrvtime() seems to be relevant only to light weight
128 processes, it doesn't for instance give nanosecond virtual time. So
129 neither of these are used.
130
131
132 Bugs:
133
134 getrusage_microseconds_p is fundamentally flawed, getrusage and
135 gettimeofday can have resolutions other than clock ticks or microseconds,
136 for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
137
138
139 Enhancements:
140
141 The SGI hardware counter has 64 bits on some machines, which could be
142 used when available. But perhaps 32 bits is enough range, and then rely
143 on the getrusage supplement.
144
145 Maybe getrusage (or times) should be used as a supplement for any
146 wall-clock measuring method. Currently a wall clock with a good range
147 (eg. a 64-bit cycle counter) is used without a supplement.
148
149 On PowerPC the timebase registers could be used, but would have to do
150 something to find out the speed. On 6xx chips it's normally 1/4 bus
151 speed, on 4xx chips it's either that or an external clock. Measuring
152 against gettimeofday might be ok. */
153
154 #include "config.h"
155
156 #include <errno.h>
157 #include <setjmp.h>
158 #include <signal.h>
159 #include <stddef.h>
160 #include <stdio.h>
161 #include <string.h>
162 #include <stdlib.h> /* for getenv() */
163
164 #if HAVE_FCNTL_H
165 #include <fcntl.h> /* for open() */
166 #endif
167
168 #if HAVE_STDINT_H
169 #include <stdint.h> /* for uint64_t */
170 #endif
171
172 #if HAVE_UNISTD_H
173 #include <unistd.h> /* for sysconf() */
174 #endif
175
176 #include <sys/types.h>
177
178 #if TIME_WITH_SYS_TIME
179 # include <sys/time.h> /* for struct timeval */
180 # include <time.h>
181 #else
182 # if HAVE_SYS_TIME_H
183 # include <sys/time.h>
184 # else
185 # include <time.h>
186 # endif
187 #endif
188
189 #if HAVE_SYS_MMAN_H
190 #include <sys/mman.h> /* for mmap() */
191 #endif
192
193 #if HAVE_SYS_RESOURCE_H
194 #include <sys/resource.h> /* for struct rusage */
195 #endif
196
197 #if HAVE_SYS_SYSSGI_H
198 #include <sys/syssgi.h> /* for syssgi() */
199 #endif
200
201 #if HAVE_SYS_SYSTEMCFG_H
202 #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
203 #endif
204
205 #if HAVE_SYS_TIMES_H
206 #include <sys/times.h> /* for times() and struct tms */
207 #endif
208
209 #include "gmp-impl.h"
210
211 #include "speed.h"
212
213
214 /* strerror is only used for some stuff on newish systems, no need to have a
215 proper replacement */
216 #if ! HAVE_STRERROR
217 #define strerror(n) "<strerror not available>"
218 #endif
219
220
221 char speed_time_string[256];
222 int speed_precision = 0;
223 double speed_unittime;
224 double speed_cycletime = 0.0;
225
226
227 /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
228 native cc */
229 #define M_2POWU (((double) INT_MAX + 1.0) * 2.0)
230
231 #define M_2POW32 4294967296.0
232 #define M_2POW64 (M_2POW32 * M_2POW32)
233
234
235 /* Conditionals for the time functions available are done with normal C
236 code, which is a lot easier than wildly nested preprocessor directives.
237
238 The choice of what to use is partly made at run-time, according to
239 whether the cycle counter works and the measured accuracy of getrusage
240 and gettimeofday.
241
242 A routine that's not available won't be getting called, but is an abort()
243 to be sure it isn't called mistakenly.
244
245 It can be assumed that if a function exists then its data type will, but
246 if the function doesn't then the data type might or might not exist, so
247 the type can't be used unconditionally. The "struct_rusage" etc macros
248 provide dummies when the respective function doesn't exist. */
249
250
251 #if HAVE_SPEED_CYCLECOUNTER
252 static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
253 #else
254 static const int have_cycles = 0;
255 #define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available)
256 #endif
257
258 /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
259 microseconds. Same #ifdefs here as in longlong.h. */
260 #if defined (__GNUC__) && ! defined (NO_ASM) \
261 && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
262 static const int have_stck = 1;
263 static const int use_stck = 1; /* always use when available */
264 typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
265 #define STCK(timestamp) \
266 do { \
267 asm ("stck %0" : "=Q" (timestamp)); \
268 } while (0)
269 #else
270 static const int have_stck = 0;
271 static const int use_stck = 0;
272 typedef unsigned long stck_t; /* dummy */
273 #define STCK(timestamp) ASSERT_FAIL (stck instruction not available)
274 #endif
275 #define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */
276
277 /* mftb
278 Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
279 and a loop (see powerpc64.asm). */
280 #if HAVE_HOST_CPU_FAMILY_powerpc
281 static const int have_mftb = 1;
282 #if defined (__GNUC__) && ! defined (NO_ASM)
283 #define MFTB(a) \
284 do { \
285 unsigned __h1, __l, __h2; \
286 do { \
287 asm volatile ("mftbu %0\n" \
288 "mftb %1\n" \
289 "mftbu %2" \
290 : "=r" (__h1), \
291 "=r" (__l), \
292 "=r" (__h2)); \
293 } while (__h1 != __h2); \
294 a[0] = __l; \
295 a[1] = __h1; \
296 } while (0)
297 #else
298 #define MFTB(a) mftb_function (a)
299 #endif
300 #else /* ! powerpc */
301 static const int have_mftb = 0;
302 #define MFTB(a) \
303 do { \
304 a[0] = 0; \
305 a[1] = 0; \
306 ASSERT_FAIL (mftb not available); \
307 } while (0)
308 #endif
309
310 /* Unicos 10.X has syssgi(), but not mmap(). */
311 #if HAVE_SYSSGI && HAVE_MMAP
312 static const int have_sgi = 1;
313 #else
314 static const int have_sgi = 0;
315 #endif
316
317 #if HAVE_READ_REAL_TIME
318 static const int have_rrt = 1;
319 #else
320 static const int have_rrt = 0;
321 #define read_real_time(t,s) ASSERT_FAIL (read_real_time not available)
322 #define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available)
323 #define RTC_POWER 1
324 #define RTC_POWER_PC 2
325 #define timebasestruct_t struct timebasestruct_dummy
326 struct timebasestruct_dummy {
327 int flag;
328 unsigned int tb_high;
329 unsigned int tb_low;
330 };
331 #endif
332
333 #if HAVE_CLOCK_GETTIME
334 static const int have_cgt = 1;
335 #define struct_timespec struct timespec
336 #else
337 static const int have_cgt = 0;
338 #define struct_timespec struct timespec_dummy
339 #define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1)
340 #define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1)
341 #endif
342
343 #if HAVE_GETRUSAGE
344 static const int have_grus = 1;
345 #define struct_rusage struct rusage
346 #else
347 static const int have_grus = 0;
348 #define getrusage(n,ru) ASSERT_FAIL (getrusage not available)
349 #define struct_rusage struct rusage_dummy
350 #endif
351
352 #if HAVE_GETTIMEOFDAY
353 static const int have_gtod = 1;
354 #define struct_timeval struct timeval
355 #else
356 static const int have_gtod = 0;
357 #define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available)
358 #define struct_timeval struct timeval_dummy
359 #endif
360
361 #if HAVE_TIMES
362 static const int have_times = 1;
363 #define struct_tms struct tms
364 #else
365 static const int have_times = 0;
366 #define times(tms) ASSERT_FAIL (times not available)
367 #define struct_tms struct tms_dummy
368 #endif
369
370 struct tms_dummy {
371 long tms_utime;
372 };
373 struct timeval_dummy {
374 long tv_sec;
375 long tv_usec;
376 };
377 struct rusage_dummy {
378 struct_timeval ru_utime;
379 };
380 struct timespec_dummy {
381 long tv_sec;
382 long tv_nsec;
383 };
384
385 static int use_cycles;
386 static int use_mftb;
387 static int use_sgi;
388 static int use_rrt;
389 static int use_cgt;
390 static int use_gtod;
391 static int use_grus;
392 static int use_times;
393 static int use_tick_boundary;
394
395 static unsigned start_cycles[2];
396 static stck_t start_stck;
397 static unsigned start_mftb[2];
398 static unsigned start_sgi;
399 static timebasestruct_t start_rrt;
400 static struct_timespec start_cgt;
401 static struct_rusage start_grus;
402 static struct_timeval start_gtod;
403 static struct_tms start_times;
404
405 static double cycles_limit = 1e100;
406 static double mftb_unittime;
407 static double sgi_unittime;
408 static double cgt_unittime;
409 static double grus_unittime;
410 static double gtod_unittime;
411 static double times_unittime;
412
413 /* for RTC_POWER format, ie. seconds and nanoseconds */
414 #define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9)
415
416
417 /* Return a string representing a time in seconds, nicely formatted.
418 Eg. "10.25ms". */
419 char *
unittime_string(double t)420 unittime_string (double t)
421 {
422 static char buf[128];
423
424 const char *unit;
425 int prec;
426
427 /* choose units and scale */
428 if (t < 1e-6)
429 t *= 1e9, unit = "ns";
430 else if (t < 1e-3)
431 t *= 1e6, unit = "us";
432 else if (t < 1.0)
433 t *= 1e3, unit = "ms";
434 else
435 unit = "s";
436
437 /* want 4 significant figures */
438 if (t < 1.0)
439 prec = 4;
440 else if (t < 10.0)
441 prec = 3;
442 else if (t < 100.0)
443 prec = 2;
444 else
445 prec = 1;
446
447 sprintf (buf, "%.*f%s", prec, t, unit);
448 return buf;
449 }
450
451
452 static jmp_buf cycles_works_buf;
453
454 static RETSIGTYPE
cycles_works_handler(int sig)455 cycles_works_handler (int sig)
456 {
457 longjmp (cycles_works_buf, 1);
458 }
459
460 int
cycles_works_p(void)461 cycles_works_p (void)
462 {
463 static int result = -1;
464
465 if (result != -1)
466 goto done;
467
468 /* FIXME: On linux, the cycle counter is not saved and restored over
469 * context switches, making it almost useless for precise cputime
470 * measurements. When available, it's better to use clock_gettime,
471 * which seems to have reasonable accuracy (tested on x86_32,
472 * linux-2.6.26, glibc-2.7). However, there are also some linux
473 * systems where clock_gettime is broken in one way or the other,
474 * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
475 * kind-of implemented but broken (needs code to detect that), and
476 * on those systems a wall-clock cycle counter is the least bad
477 * fallback.
478 *
479 * So we need some code to disable the cycle counter on some but not
480 * all linux systems. */
481 #ifdef SIGILL
482 {
483 RETSIGTYPE (*old_handler) (int);
484 unsigned cycles[2];
485
486 old_handler = signal (SIGILL, cycles_works_handler);
487 if (old_handler == SIG_ERR)
488 {
489 if (speed_option_verbose)
490 printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
491 goto yes;
492 }
493 if (setjmp (cycles_works_buf))
494 {
495 if (speed_option_verbose)
496 printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
497 result = 0;
498 goto done;
499 }
500 speed_cyclecounter (cycles);
501 signal (SIGILL, old_handler);
502 if (speed_option_verbose)
503 printf ("cycles_works_p(): speed_cyclecounter() works\n");
504 }
505 #else
506
507 if (speed_option_verbose)
508 printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
509 goto yes;
510 #endif
511
512 yes:
513 result = 1;
514
515 done:
516 return result;
517 }
518
519
520 /* The number of clock ticks per second, but looking at sysconf rather than
521 just CLK_TCK, where possible. */
522 long
clk_tck(void)523 clk_tck (void)
524 {
525 static long result = -1L;
526 if (result != -1L)
527 return result;
528
529 #if HAVE_SYSCONF
530 result = sysconf (_SC_CLK_TCK);
531 if (result != -1L)
532 {
533 if (speed_option_verbose)
534 printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
535 return result;
536 }
537
538 fprintf (stderr,
539 "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
540 #endif
541
542 #ifdef CLK_TCK
543 result = CLK_TCK;
544 if (speed_option_verbose)
545 printf ("CLK_TCK is %ld per second\n", result);
546 return result;
547 #else
548 fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
549 abort ();
550 #endif
551 }
552
553
554 /* If two times can be observed less than half a clock tick apart, then
555 assume "get" is microsecond accurate.
556
557 Two times only 1 microsecond apart are not believed, since some kernels
558 take it upon themselves to ensure gettimeofday doesn't return the same
559 value twice, for the benefit of applications using it for a timestamp.
560 This is obviously very stupid given the speed of CPUs these days.
561
562 Making "reps" many calls to noop_1() is designed to waste some CPU, with
563 a view to getting measurements 2 microseconds (or more) apart. "reps" is
564 increased progressively until such a period is seen.
565
566 The outer loop "attempts" are just to allow for any random nonsense or
567 system load upsetting the measurements (ie. making two successive calls
568 to "get" come out as a longer interval than normal).
569
570 Bugs:
571
572 The assumption that any interval less than a half tick implies
573 microsecond resolution is obviously fairly rash, the true resolution
574 could be anything between a microsecond and that half tick. Perhaps
575 something special would have to be done on a system where this is the
576 case, since there's no obvious reliable way to detect it
577 automatically. */
578
579 #define MICROSECONDS_P(name, type, get, sec, usec) \
580 { \
581 static int result = -1; \
582 type st, et; \
583 long dt, half_tick; \
584 unsigned attempt, reps, i, j; \
585 \
586 if (result != -1) \
587 return result; \
588 \
589 result = 0; \
590 half_tick = (1000000L / clk_tck ()) / 2; \
591 \
592 for (attempt = 0; attempt < 5; attempt++) \
593 { \
594 reps = 0; \
595 for (;;) \
596 { \
597 get (st); \
598 for (i = 0; i < reps; i++) \
599 for (j = 0; j < 100; j++) \
600 noop_1 (CNST_LIMB(0)); \
601 get (et); \
602 \
603 dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \
604 \
605 if (speed_option_verbose >= 2) \
606 printf ("%s attempt=%u, reps=%u, dt=%ld\n", \
607 name, attempt, reps, dt); \
608 \
609 if (dt >= 2) \
610 break; \
611 \
612 reps = (reps == 0 ? 1 : 2*reps); \
613 if (reps == 0) \
614 break; /* uint overflow, not normal */ \
615 } \
616 \
617 if (dt < half_tick) \
618 { \
619 result = 1; \
620 break; \
621 } \
622 } \
623 \
624 if (speed_option_verbose) \
625 { \
626 if (result) \
627 printf ("%s is microsecond accurate\n", name); \
628 else \
629 printf ("%s is only %s clock tick accurate\n", \
630 name, unittime_string (1.0/clk_tck())); \
631 } \
632 return result; \
633 }
634
635
636 int
gettimeofday_microseconds_p(void)637 gettimeofday_microseconds_p (void)
638 {
639 #define call_gettimeofday(t) gettimeofday (&(t), NULL)
640 #define timeval_tv_sec(t) ((t).tv_sec)
641 #define timeval_tv_usec(t) ((t).tv_usec)
642 MICROSECONDS_P ("gettimeofday", struct_timeval,
643 call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
644 }
645
646 int
getrusage_microseconds_p(void)647 getrusage_microseconds_p (void)
648 {
649 #define call_getrusage(t) getrusage (0, &(t))
650 #define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
651 #define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
652 MICROSECONDS_P ("getrusage", struct_rusage,
653 call_getrusage, rusage_tv_sec, rusage_tv_usec);
654 }
655
656 /* Test whether getrusage goes backwards, return non-zero if it does
657 (suggesting it's flawed).
658
659 On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
660 microsecond accurate, but has been seen remaining unchanged after many
661 microseconds have elapsed. It also regularly goes backwards by 1000 to
662 5000 usecs, this has been seen after between 500 and 4000 attempts taking
663 perhaps 0.03 seconds. We consider this too broken for good measuring.
664 We used to have configure pretend getrusage didn't exist on this system,
665 but a runtime test should be more reliable, since we imagine the problem
666 is not confined to just this exact system tuple. */
667
668 int
getrusage_backwards_p(void)669 getrusage_backwards_p (void)
670 {
671 static int result = -1;
672 struct rusage start, prev, next;
673 long d;
674 int i;
675
676 if (result != -1)
677 return result;
678
679 getrusage (0, &start);
680 memcpy (&next, &start, sizeof (next));
681
682 result = 0;
683 i = 0;
684 for (;;)
685 {
686 memcpy (&prev, &next, sizeof (prev));
687 getrusage (0, &next);
688
689 if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
690 || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
691 && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
692 {
693 if (speed_option_verbose)
694 printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
695 i,
696 (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
697 (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
698 result = 1;
699 break;
700 }
701
702 /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
703 attempts, whichever comes first */
704 d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
705 + (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
706 i++;
707 if (i > 50000 || (i > 1000 && d > 100000))
708 break;
709 }
710
711 return result;
712 }
713
714 /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
715 of glibc (some time post 2.2).
716
717 CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
718 defined, but returning -1 for an error). */
719
720 #ifdef CLOCK_PROCESS_CPUTIME_ID
721 # define CGT_ID CLOCK_PROCESS_CPUTIME_ID
722 #else
723 # ifdef CLOCK_VIRTUAL
724 # define CGT_ID CLOCK_VIRTUAL
725 # endif
726 #endif
727 #ifdef CGT_ID
728 const int have_cgt_id = 1;
729 #else
730 const int have_cgt_id = 0;
731 # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
732 #endif
733
734 #define CGT_DELAY_COUNT 1000
735
736 int
cgt_works_p(void)737 cgt_works_p (void)
738 {
739 static int result = -1;
740 struct_timespec unit;
741
742 if (! have_cgt)
743 return 0;
744
745 if (! have_cgt_id)
746 {
747 if (speed_option_verbose)
748 printf ("clock_gettime don't know what ID to use\n");
749 result = 0;
750 return result;
751 }
752
753 if (result != -1)
754 return result;
755
756 /* trial run to see if it works */
757 if (clock_gettime (CGT_ID, &unit) != 0)
758 {
759 if (speed_option_verbose)
760 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
761 result = 0;
762 return result;
763 }
764
765 /* get the resolution */
766 if (clock_getres (CGT_ID, &unit) != 0)
767 {
768 if (speed_option_verbose)
769 printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
770 result = 0;
771 return result;
772 }
773
774 cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
775 if (speed_option_verbose)
776 printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
777
778 if (cgt_unittime < 10e-9)
779 {
780 /* Do we believe this? */
781 struct timespec start, end;
782 static volatile int counter;
783 double duration;
784 if (clock_gettime (CGT_ID, &start))
785 {
786 if (speed_option_verbose)
787 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
788 result = 0;
789 return result;
790 }
791 /* Loop of at least 1000 memory accesses, ought to take at
792 least 100 ns*/
793 for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
794 ;
795 if (clock_gettime (CGT_ID, &end))
796 {
797 if (speed_option_verbose)
798 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
799 result = 0;
800 return result;
801 }
802 duration = (end.tv_sec + end.tv_nsec * 1e-9
803 - start.tv_sec - start.tv_nsec * 1e-9);
804 if (speed_option_verbose)
805 printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
806 CGT_DELAY_COUNT, unittime_string (duration));
807 if (duration < 100e-9)
808 {
809 if (speed_option_verbose)
810 printf ("clock_gettime id=%d not believable\n", CGT_ID);
811 result = 0;
812 return result;
813 }
814 }
815 result = 1;
816 return result;
817 }
818
819
820 static double
freq_measure_mftb_one(void)821 freq_measure_mftb_one (void)
822 {
823 #define call_gettimeofday(t) gettimeofday (&(t), NULL)
824 #define timeval_tv_sec(t) ((t).tv_sec)
825 #define timeval_tv_usec(t) ((t).tv_usec)
826 FREQ_MEASURE_ONE ("mftb", struct_timeval,
827 call_gettimeofday, MFTB,
828 timeval_tv_sec, timeval_tv_usec);
829 }
830
831
832 static jmp_buf mftb_works_buf;
833
834 static RETSIGTYPE
mftb_works_handler(int sig)835 mftb_works_handler (int sig)
836 {
837 longjmp (mftb_works_buf, 1);
838 }
839
840 int
mftb_works_p(void)841 mftb_works_p (void)
842 {
843 unsigned a[2];
844 RETSIGTYPE (*old_handler) (int);
845 double cycletime;
846
847 /* suppress a warning about a[] unused */
848 a[0] = 0;
849
850 if (! have_mftb)
851 return 0;
852
853 #ifdef SIGILL
854 old_handler = signal (SIGILL, mftb_works_handler);
855 if (old_handler == SIG_ERR)
856 {
857 if (speed_option_verbose)
858 printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
859 return 1;
860 }
861 if (setjmp (mftb_works_buf))
862 {
863 if (speed_option_verbose)
864 printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
865 return 0;
866 }
867 MFTB (a);
868 signal (SIGILL, old_handler);
869 if (speed_option_verbose)
870 printf ("mftb_works_p(): mftb works\n");
871 #else
872
873 if (speed_option_verbose)
874 printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
875 #endif
876
877 #if ! HAVE_GETTIMEOFDAY
878 if (speed_option_verbose)
879 printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
880 return 0;
881 #endif
882
883 /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
884 other chips it can be driven from an external clock. */
885 cycletime = freq_measure ("mftb", freq_measure_mftb_one);
886 if (cycletime == -1.0)
887 {
888 if (speed_option_verbose)
889 printf ("mftb_works_p(): cannot measure mftb period\n");
890 return 0;
891 }
892
893 mftb_unittime = cycletime;
894 return 1;
895 }
896
897
898 volatile unsigned *sgi_addr;
899
900 int
sgi_works_p(void)901 sgi_works_p (void)
902 {
903 #if HAVE_SYSSGI && HAVE_MMAP
904 static int result = -1;
905
906 size_t pagesize, offset;
907 __psunsigned_t phys, physpage;
908 void *virtpage;
909 unsigned period_picoseconds;
910 int size, fd;
911
912 if (result != -1)
913 return result;
914
915 phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
916 if (phys == (__psunsigned_t) -1)
917 {
918 /* ENODEV is the error when a counter is not available */
919 if (speed_option_verbose)
920 printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
921 result = 0;
922 return result;
923 }
924 sgi_unittime = period_picoseconds * 1e-12;
925
926 /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
927 Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
928 obvious way to identify that without SGI_CYCLECNTR_SIZE. */
929 #ifdef SGI_CYCLECNTR_SIZE
930 size = syssgi (SGI_CYCLECNTR_SIZE);
931 if (size == -1)
932 {
933 if (speed_option_verbose)
934 {
935 printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
936 printf (" will assume size==4\n");
937 }
938 size = 32;
939 }
940 #else
941 size = 32;
942 #endif
943
944 if (size < 32)
945 {
946 printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
947 result = 0;
948 return result;
949 }
950
951 pagesize = getpagesize();
952 offset = (size_t) phys & (pagesize-1);
953 physpage = phys - offset;
954
955 /* shouldn't cross over a page boundary */
956 ASSERT_ALWAYS (offset + size/8 <= pagesize);
957
958 fd = open("/dev/mmem", O_RDONLY);
959 if (fd == -1)
960 {
961 if (speed_option_verbose)
962 printf ("open /dev/mmem: %s\n", strerror (errno));
963 result = 0;
964 return result;
965 }
966
967 virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
968 if (virtpage == (void *) -1)
969 {
970 if (speed_option_verbose)
971 printf ("mmap /dev/mmem: %s\n", strerror (errno));
972 result = 0;
973 return result;
974 }
975
976 /* address of least significant 4 bytes, knowing mips is big endian */
977 sgi_addr = (unsigned *) ((char *) virtpage + offset
978 + size/8 - sizeof(unsigned));
979 result = 1;
980 return result;
981
982 #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
983 return 0;
984 #endif
985 }
986
987
988 #define DEFAULT(var,n) \
989 do { \
990 if (! (var)) \
991 (var) = (n); \
992 } while (0)
993
994 void
speed_time_init(void)995 speed_time_init (void)
996 {
997 double supplement_unittime = 0.0;
998
999 static int speed_time_initialized = 0;
1000 if (speed_time_initialized)
1001 return;
1002 speed_time_initialized = 1;
1003
1004 speed_cycletime_init ();
1005
1006 if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
1007 {
1008 use_cycles = 1;
1009 DEFAULT (speed_cycletime, 1.0);
1010 speed_unittime = speed_cycletime;
1011 DEFAULT (speed_precision, 10000);
1012 strcpy (speed_time_string, "CPU cycle counter");
1013
1014 /* only used if a supplementary method is chosen below */
1015 cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
1016 * speed_cycletime;
1017
1018 if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1019 {
1020 /* this is a good combination */
1021 use_grus = 1;
1022 supplement_unittime = grus_unittime = 1.0e-6;
1023 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
1024 }
1025 else if (have_cycles == 1)
1026 {
1027 /* When speed_cyclecounter has a limited range, look for something
1028 to supplement it. */
1029 if (have_gtod && gettimeofday_microseconds_p())
1030 {
1031 use_gtod = 1;
1032 supplement_unittime = gtod_unittime = 1.0e-6;
1033 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
1034 }
1035 else if (have_grus)
1036 {
1037 use_grus = 1;
1038 supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1039 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
1040 }
1041 else if (have_times)
1042 {
1043 use_times = 1;
1044 supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
1045 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
1046 }
1047 else if (have_gtod)
1048 {
1049 use_gtod = 1;
1050 supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1051 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
1052 }
1053 else
1054 {
1055 fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
1056 fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n");
1057 }
1058 }
1059
1060 if (use_grus || use_times || use_gtod)
1061 {
1062 /* must know cycle period to compare cycles to other measuring
1063 (via cycles_limit) */
1064 speed_cycletime_need_seconds ();
1065
1066 if (speed_precision * supplement_unittime > cycles_limit)
1067 {
1068 fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1069 fprintf (stderr, " cycle counter and limited precision supplemental method\n");
1070 fprintf (stderr, " (%s)\n", speed_time_string);
1071 }
1072 }
1073 }
1074 else if (have_stck)
1075 {
1076 strcpy (speed_time_string, "STCK timestamp");
1077 /* stck is in units of 2^-12 microseconds, which is very likely higher
1078 resolution than a cpu cycle */
1079 if (speed_cycletime == 0.0)
1080 speed_cycletime_fail
1081 ("Need to know CPU frequency for effective stck unit");
1082 speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1083 DEFAULT (speed_precision, 10000);
1084 }
1085 else if (have_mftb && mftb_works_p ())
1086 {
1087 use_mftb = 1;
1088 DEFAULT (speed_precision, 10000);
1089 speed_unittime = mftb_unittime;
1090 sprintf (speed_time_string, "mftb counter (%s)",
1091 unittime_string (speed_unittime));
1092 }
1093 else if (have_sgi && sgi_works_p ())
1094 {
1095 use_sgi = 1;
1096 DEFAULT (speed_precision, 10000);
1097 speed_unittime = sgi_unittime;
1098 sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1099 unittime_string (speed_unittime));
1100 /* supplemented with getrusage, which we assume to have 1ms resolution */
1101 use_grus = 1;
1102 supplement_unittime = 1e-3;
1103 }
1104 else if (have_rrt)
1105 {
1106 timebasestruct_t t;
1107 use_rrt = 1;
1108 DEFAULT (speed_precision, 10000);
1109 read_real_time (&t, sizeof(t));
1110 switch (t.flag) {
1111 case RTC_POWER:
1112 /* FIXME: What's the actual RTC resolution? */
1113 speed_unittime = 1e-7;
1114 strcpy (speed_time_string, "read_real_time() power nanoseconds");
1115 break;
1116 case RTC_POWER_PC:
1117 t.tb_high = 1;
1118 t.tb_low = 0;
1119 time_base_to_time (&t, sizeof(t));
1120 speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1121 sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1122 unittime_string (speed_unittime));
1123 break;
1124 default:
1125 fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1126 t.flag);
1127 abort ();
1128 }
1129 }
1130 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1131 {
1132 /* use clock_gettime if microsecond or better resolution */
1133 choose_cgt:
1134 use_cgt = 1;
1135 speed_unittime = cgt_unittime;
1136 DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1137 strcpy (speed_time_string, "microsecond accurate clock_gettime()");
1138 }
1139 else if (have_times && clk_tck() > 1000000)
1140 {
1141 /* Cray vector systems have times() which is clock cycle resolution
1142 (eg. 450 MHz). */
1143 DEFAULT (speed_precision, 10000);
1144 goto choose_times;
1145 }
1146 else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1147 {
1148 use_grus = 1;
1149 speed_unittime = grus_unittime = 1.0e-6;
1150 DEFAULT (speed_precision, 1000);
1151 strcpy (speed_time_string, "microsecond accurate getrusage()");
1152 }
1153 else if (have_gtod && gettimeofday_microseconds_p())
1154 {
1155 use_gtod = 1;
1156 speed_unittime = gtod_unittime = 1.0e-6;
1157 DEFAULT (speed_precision, 1000);
1158 strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1159 }
1160 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1161 {
1162 /* use clock_gettime if 1 tick or better resolution */
1163 goto choose_cgt;
1164 }
1165 else if (have_times)
1166 {
1167 use_tick_boundary = 1;
1168 DEFAULT (speed_precision, 200);
1169 choose_times:
1170 use_times = 1;
1171 speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1172 sprintf (speed_time_string, "%s clock tick times()",
1173 unittime_string (speed_unittime));
1174 }
1175 else if (have_grus)
1176 {
1177 use_grus = 1;
1178 use_tick_boundary = 1;
1179 speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1180 DEFAULT (speed_precision, 200);
1181 sprintf (speed_time_string, "%s clock tick getrusage()\n",
1182 unittime_string (speed_unittime));
1183 }
1184 else if (have_gtod)
1185 {
1186 use_gtod = 1;
1187 use_tick_boundary = 1;
1188 speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1189 DEFAULT (speed_precision, 200);
1190 sprintf (speed_time_string, "%s clock tick gettimeofday()",
1191 unittime_string (speed_unittime));
1192 }
1193 else
1194 {
1195 fprintf (stderr, "No time measuring method available\n");
1196 fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1197 abort ();
1198 }
1199
1200 if (speed_option_verbose)
1201 {
1202 printf ("speed_time_init: %s\n", speed_time_string);
1203 printf (" speed_precision %d\n", speed_precision);
1204 printf (" speed_unittime %.2g\n", speed_unittime);
1205 if (supplement_unittime)
1206 printf (" supplement_unittime %.2g\n", supplement_unittime);
1207 printf (" use_tick_boundary %d\n", use_tick_boundary);
1208 if (have_cycles)
1209 printf (" cycles_limit %.2g seconds\n", cycles_limit);
1210 }
1211 }
1212
1213
1214
1215 /* Burn up CPU until a clock tick boundary, for greater accuracy. Set the
1216 corresponding "start_foo" appropriately too. */
1217
1218 void
grus_tick_boundary(void)1219 grus_tick_boundary (void)
1220 {
1221 struct_rusage prev;
1222 getrusage (0, &prev);
1223 do {
1224 getrusage (0, &start_grus);
1225 } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1226 }
1227
1228 void
gtod_tick_boundary(void)1229 gtod_tick_boundary (void)
1230 {
1231 struct_timeval prev;
1232 gettimeofday (&prev, NULL);
1233 do {
1234 gettimeofday (&start_gtod, NULL);
1235 } while (start_gtod.tv_usec == prev.tv_usec);
1236 }
1237
1238 void
times_tick_boundary(void)1239 times_tick_boundary (void)
1240 {
1241 struct_tms prev;
1242 times (&prev);
1243 do
1244 times (&start_times);
1245 while (start_times.tms_utime == prev.tms_utime);
1246 }
1247
1248
1249 /* "have_" values are tested to let unused code go dead. */
1250
1251 void
speed_starttime(void)1252 speed_starttime (void)
1253 {
1254 speed_time_init ();
1255
1256 if (have_grus && use_grus)
1257 {
1258 if (use_tick_boundary)
1259 grus_tick_boundary ();
1260 else
1261 getrusage (0, &start_grus);
1262 }
1263
1264 if (have_gtod && use_gtod)
1265 {
1266 if (use_tick_boundary)
1267 gtod_tick_boundary ();
1268 else
1269 gettimeofday (&start_gtod, NULL);
1270 }
1271
1272 if (have_times && use_times)
1273 {
1274 if (use_tick_boundary)
1275 times_tick_boundary ();
1276 else
1277 times (&start_times);
1278 }
1279
1280 if (have_cgt && use_cgt)
1281 clock_gettime (CGT_ID, &start_cgt);
1282
1283 if (have_rrt && use_rrt)
1284 read_real_time (&start_rrt, sizeof(start_rrt));
1285
1286 if (have_sgi && use_sgi)
1287 start_sgi = *sgi_addr;
1288
1289 if (have_mftb && use_mftb)
1290 MFTB (start_mftb);
1291
1292 if (have_stck && use_stck)
1293 STCK (start_stck);
1294
1295 /* Cycles sampled last for maximum accuracy. */
1296 if (have_cycles && use_cycles)
1297 speed_cyclecounter (start_cycles);
1298 }
1299
1300
1301 /* Calculate the difference between two cycle counter samples, as a "double"
1302 counter of cycles.
1303
1304 The start and end values are allowed to cancel in integers in case the
1305 counter values are bigger than the 53 bits that normally fit in a double.
1306
1307 This works even if speed_cyclecounter() puts a value bigger than 32-bits
1308 in the low word (the high word always gets a 2**32 multiplier though). */
1309
1310 double
speed_cyclecounter_diff(const unsigned end[2],const unsigned start[2])1311 speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1312 {
1313 unsigned d;
1314 double t;
1315
1316 if (have_cycles == 1)
1317 {
1318 t = (end[0] - start[0]);
1319 }
1320 else
1321 {
1322 d = end[0] - start[0];
1323 t = d - (d > end[0] ? M_2POWU : 0.0);
1324 t += (end[1] - start[1]) * M_2POW32;
1325 }
1326 return t;
1327 }
1328
1329
1330 double
speed_mftb_diff(const unsigned end[2],const unsigned start[2])1331 speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1332 {
1333 unsigned d;
1334 double t;
1335
1336 d = end[0] - start[0];
1337 t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1338 t += (end[1] - start[1]) * M_2POW32;
1339 return t;
1340 }
1341
1342
1343 /* Calculate the difference between "start" and "end" using fields "sec" and
1344 "psec", where each "psec" is a "punit" of a second.
1345
1346 The seconds parts are allowed to cancel before being combined with the
1347 psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1348 double.
1349
1350 Total time is only calculated in a "double" since an integer count of
1351 psecs might overflow. 2^32 microseconds is only a bit over an hour, or
1352 2^32 nanoseconds only about 4 seconds.
1353
1354 The casts to "long" are for the benefit of timebasestruct_t, where the
1355 fields are only "unsigned int", but we want a signed difference. */
1356
1357 #define DIFF_SECS_ROUTINE(sec, psec, punit) \
1358 { \
1359 long sec_diff, psec_diff; \
1360 sec_diff = (long) end->sec - (long) start->sec; \
1361 psec_diff = (long) end->psec - (long) start->psec; \
1362 return (double) sec_diff + punit * (double) psec_diff; \
1363 }
1364
1365 double
timeval_diff_secs(const struct_timeval * end,const struct_timeval * start)1366 timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1367 {
1368 DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1369 }
1370
1371 double
rusage_diff_secs(const struct_rusage * end,const struct_rusage * start)1372 rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1373 {
1374 DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1375 }
1376
1377 double
timespec_diff_secs(const struct_timespec * end,const struct_timespec * start)1378 timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1379 {
1380 DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1381 }
1382
1383 /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1384 double
timebasestruct_diff_secs(const timebasestruct_t * end,const timebasestruct_t * start)1385 timebasestruct_diff_secs (const timebasestruct_t *end,
1386 const timebasestruct_t *start)
1387 {
1388 DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1389 }
1390
1391
1392 double
speed_endtime(void)1393 speed_endtime (void)
1394 {
1395 #define END_USE(name,value) \
1396 do { \
1397 if (speed_option_verbose >= 3) \
1398 printf ("speed_endtime(): used %s\n", name); \
1399 result = value; \
1400 goto done; \
1401 } while (0)
1402
1403 #define END_ENOUGH(name,value) \
1404 do { \
1405 if (speed_option_verbose >= 3) \
1406 printf ("speed_endtime(): %s gives enough precision\n", name); \
1407 result = value; \
1408 goto done; \
1409 } while (0)
1410
1411 #define END_EXCEED(name,value) \
1412 do { \
1413 if (speed_option_verbose >= 3) \
1414 printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1415 name); \
1416 result = value; \
1417 goto done; \
1418 } while (0)
1419
1420 unsigned end_cycles[2];
1421 stck_t end_stck;
1422 unsigned end_mftb[2];
1423 unsigned end_sgi;
1424 timebasestruct_t end_rrt;
1425 struct_timespec end_cgt;
1426 struct_timeval end_gtod;
1427 struct_rusage end_grus;
1428 struct_tms end_times;
1429 double t_gtod, t_grus, t_times, t_cgt;
1430 double t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1431 double result;
1432
1433 /* Cycles sampled first for maximum accuracy.
1434 "have_" values tested to let unused code go dead. */
1435
1436 if (have_cycles && use_cycles) speed_cyclecounter (end_cycles);
1437 if (have_stck && use_stck) STCK (end_stck);
1438 if (have_mftb && use_mftb) MFTB (end_mftb);
1439 if (have_sgi && use_sgi) end_sgi = *sgi_addr;
1440 if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt));
1441 if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt);
1442 if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL);
1443 if (have_grus && use_grus) getrusage (0, &end_grus);
1444 if (have_times && use_times) times (&end_times);
1445
1446 result = -1.0;
1447
1448 if (speed_option_verbose >= 4)
1449 {
1450 printf ("speed_endtime():\n");
1451 if (use_cycles)
1452 printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n",
1453 start_cycles[1], start_cycles[0],
1454 end_cycles[1], end_cycles[0]);
1455
1456 if (use_stck)
1457 printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck);
1458
1459 if (use_mftb)
1460 printf (" mftb 0x%X,%08X -> 0x%X,%08X\n",
1461 start_mftb[1], start_mftb[0],
1462 end_mftb[1], end_mftb[0]);
1463
1464 if (use_sgi)
1465 printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi);
1466
1467 if (use_rrt)
1468 printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n",
1469 start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1470 end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1471
1472 if (use_cgt)
1473 printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n",
1474 (long) start_cgt.tv_sec, (long) start_cgt.tv_nsec,
1475 (long) end_cgt.tv_sec, (long) end_cgt.tv_nsec);
1476
1477 if (use_gtod)
1478 printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n",
1479 (long) start_gtod.tv_sec,
1480 (long) start_gtod.tv_usec,
1481 (long) end_gtod.tv_sec,
1482 (long) end_gtod.tv_usec);
1483
1484 if (use_grus)
1485 printf (" getrusage %ld.%06ld -> %ld.%06ld\n",
1486 (long) start_grus.ru_utime.tv_sec,
1487 (long) start_grus.ru_utime.tv_usec,
1488 (long) end_grus.ru_utime.tv_sec,
1489 (long) end_grus.ru_utime.tv_usec);
1490
1491 if (use_times)
1492 printf (" times %ld -> %ld\n",
1493 start_times.tms_utime, end_times.tms_utime);
1494 }
1495
1496 if (use_rrt)
1497 {
1498 time_base_to_time (&start_rrt, sizeof(start_rrt));
1499 time_base_to_time (&end_rrt, sizeof(end_rrt));
1500 t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1501 END_USE ("read_real_time()", t_rrt);
1502 }
1503
1504 if (use_cgt)
1505 {
1506 t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1507 END_USE ("clock_gettime()", t_cgt);
1508 }
1509
1510 if (use_grus)
1511 {
1512 t_grus = rusage_diff_secs (&end_grus, &start_grus);
1513
1514 /* Use getrusage() if the cycle counter limit would be exceeded, or if
1515 it provides enough accuracy already. */
1516 if (use_cycles)
1517 {
1518 if (t_grus >= speed_precision*grus_unittime)
1519 END_ENOUGH ("getrusage()", t_grus);
1520 if (t_grus >= cycles_limit)
1521 END_EXCEED ("getrusage()", t_grus);
1522 }
1523 }
1524
1525 if (use_times)
1526 {
1527 t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1528
1529 /* Use times() if the cycle counter limit would be exceeded, or if
1530 it provides enough accuracy already. */
1531 if (use_cycles)
1532 {
1533 if (t_times >= speed_precision*times_unittime)
1534 END_ENOUGH ("times()", t_times);
1535 if (t_times >= cycles_limit)
1536 END_EXCEED ("times()", t_times);
1537 }
1538 }
1539
1540 if (use_gtod)
1541 {
1542 t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1543
1544 /* Use gettimeofday() if it measured a value bigger than the cycle
1545 counter can handle. */
1546 if (use_cycles)
1547 {
1548 if (t_gtod >= cycles_limit)
1549 END_EXCEED ("gettimeofday()", t_gtod);
1550 }
1551 }
1552
1553 if (use_mftb)
1554 {
1555 t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1556 END_USE ("mftb", t_mftb);
1557 }
1558
1559 if (use_stck)
1560 {
1561 t_stck = (end_stck - start_stck) * STCK_PERIOD;
1562 END_USE ("stck", t_stck);
1563 }
1564
1565 if (use_sgi)
1566 {
1567 t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1568 END_USE ("SGI hardware counter", t_sgi);
1569 }
1570
1571 if (use_cycles)
1572 {
1573 t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1574 * speed_cycletime;
1575 END_USE ("cycle counter", t_cycles);
1576 }
1577
1578 if (use_grus && getrusage_microseconds_p())
1579 END_USE ("getrusage()", t_grus);
1580
1581 if (use_gtod && gettimeofday_microseconds_p())
1582 END_USE ("gettimeofday()", t_gtod);
1583
1584 if (use_times) END_USE ("times()", t_times);
1585 if (use_grus) END_USE ("getrusage()", t_grus);
1586 if (use_gtod) END_USE ("gettimeofday()", t_gtod);
1587
1588 fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1589 abort ();
1590
1591 done:
1592 if (result < 0.0)
1593 {
1594 if (speed_option_verbose >= 2)
1595 fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
1596 result = 0.0;
1597 }
1598 return result;
1599 }
1600