1 /* Copyright (c) 2008, 2021, Oracle and/or its affiliates.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   Without limiting anything contained in the foregoing, this file,
15   which is part of C Driver for MySQL (Connector/C), is also subject to the
16   Universal FOSS Exception, version 1.0, a copy of which can be found at
17   http://oss.oracle.com/licenses/universal-foss-exception.
18 
19   This program is distributed in the hope that it will be useful,
20   but WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22   GNU General Public License, version 2.0, for more details.
23 
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
27 
28 /*
29   rdtsc3 -- multi-platform timer code
30   pgulutzan@mysql.com, 2005-08-29
31   modified 2008-11-02
32 
33   Functions:
34 
35   my_timer_cycles           ulonglong cycles
36   my_timer_nanoseconds      ulonglong nanoseconds
37   my_timer_microseconds     ulonglong "microseconds"
38   my_timer_milliseconds     ulonglong milliseconds
39   my_timer_ticks            ulonglong ticks
40   my_timer_init             initialization / test
41 
42   We'll call the first 5 functions (the ones that return
43   a ulonglong) "my_timer_xxx" functions.
44   Each my_timer_xxx function returns a 64-bit timing value
45   since an arbitrary 'epoch' start. Since the only purpose
46   is to determine elapsed times, wall-clock time-of-day
47   is not known and not relevant.
48 
49   The my_timer_init function is necessary for initializing.
50   It returns information (underlying routine name,
51   frequency, resolution, overhead) about all my_timer_xxx
52   functions. A program should call my_timer_init once,
53   use the information to decide what my_timer_xxx function
54   to use, and subsequently call that function by function
55   pointer.
56 
57   A typical use would be:
58   my_timer_init()        ... once, at program start
59   ...
60   time1= my_timer_xxx()  ... time before start
61   [code that's timed]
62   time2= my_timer_xxx()  ... time after end
63   elapsed_time= (time2 - time1) - overhead
64 */
65 
66 #include "my_global.h"
67 #include "my_rdtsc.h"
68 
69 #include <stdio.h>
70 #if defined(_WIN32)
71 #include "windows.h"
72 #endif
73 
74 #if defined(TIME_WITH_SYS_TIME)
75 #include <sys/time.h>
76 #include <time.h>           /* for clock_gettime */
77 #endif
78 
79 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
80 #include <sys/times.h>       /* for times */
81 #endif
82 
83 #if defined(__APPLE__) && defined(__MACH__)
84 #include <mach/mach_time.h>
85 #endif
86 
87 #if defined(__SUNPRO_CC) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
88 extern "C" ulonglong my_timer_cycles_il_sparc64();
89 #elif defined(__SUNPRO_CC) && defined(_ILP32) && !defined(__SunOS_5_7)
90 extern "C" ulonglong my_timer_cycles_il_sparc32();
91 #elif defined(__SUNPRO_CC) && defined(__i386) && defined(_ILP32)
92 extern "C" ulonglong my_timer_cycles_il_i386();
93 #elif defined(__SUNPRO_CC) && defined(__x86_64) && defined(_LP64)
94 extern "C" ulonglong my_timer_cycles_il_x86_64();
95 #elif defined(__SUNPRO_C) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
96 ulonglong my_timer_cycles_il_sparc64();
97 #elif defined(__SUNPRO_C) && defined(_ILP32) && !defined(__SunOS_5_7)
98 ulonglong my_timer_cycles_il_sparc32();
99 #elif defined(__SUNPRO_C) && defined(__i386) && defined(_ILP32)
100 ulonglong my_timer_cycles_il_i386();
101 #elif defined(__SUNPRO_C) && defined(__x86_64) && defined(_LP64)
102 ulonglong my_timer_cycles_il_x86_64();
103 #endif
104 
105 /*
106   For cycles, we depend on RDTSC for x86 platforms,
107   or on time buffer (which is not really a cycle count
108   but a separate counter with less than nanosecond
109   resolution) for most PowerPC platforms, or on
110   gethrtime which is okay for solaris.
111 */
112 
my_timer_cycles(void)113 ulonglong my_timer_cycles(void)
114 {
115 #if defined(__GNUC__) && defined(__i386__)
116   /* This works much better if compiled with "gcc -O3". */
117   ulonglong result;
118   __asm__ __volatile__ ("rdtsc" : "=A" (result));
119   return result;
120 #elif defined(__SUNPRO_C) && defined(__i386)
121   __asm("rdtsc");
122 #elif defined(__GNUC__) && defined(__x86_64__)
123   ulonglong result;
124   __asm__ __volatile__ ("rdtsc\n\t" \
125                         "shlq $32,%%rdx\n\t" \
126                         "orq %%rdx,%%rax"
127                         : "=a" (result) :: "%edx");
128   return result;
129 #elif defined(_WIN32) && defined(_M_IX86)
130   __asm {rdtsc};
131 #elif defined(_WIN64) && defined(_M_X64)
132   /* For 64-bit Windows: unsigned __int64 __rdtsc(); */
133   return __rdtsc();
134 #elif defined(__GNUC__) && defined(__ia64__)
135   {
136     ulonglong result;
137     __asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
138     return result;
139   }
140 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (defined(__64BIT__) || defined(_ARCH_PPC64))
141   {
142     ulonglong result;
143     __asm __volatile__ ("mftb %0" : "=r" (result));
144     return result;
145   }
146 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
147   {
148     /*
149       mftbu means "move from time-buffer-upper to result".
150       The loop is saying: x1=upper, x2=lower, x3=upper,
151       if x1!=x3 there was an overflow so repeat.
152     */
153     unsigned int x1, x2, x3;
154     ulonglong result;
155     for (;;)
156     {
157        __asm __volatile__ ( "mftbu %0" : "=r"(x1) );
158        __asm __volatile__ ( "mftb %0" : "=r"(x2) );
159        __asm __volatile__ ( "mftbu %0" : "=r"(x3) );
160        if (x1 == x3) break;
161     }
162     result = x1;
163     return ( result << 32 ) | x2;
164   }
165 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
166   return (my_timer_cycles_il_sparc64());
167 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(_ILP32) && !defined(__SunOS_5_7)
168   return (my_timer_cycles_il_sparc32());
169 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
170   /* This is probably redundant for __SUNPRO_C. */
171   return (my_timer_cycles_il_i386());
172 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
173   return (my_timer_cycles_il_x86_64());
174 #elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
175   {
176     ulonglong result;
177     __asm __volatile__ ("rd %%tick,%0" : "=r" (result));
178     return result;
179   }
180 #elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
181   {
182       union {
183               ulonglong wholeresult;
184               struct {
185                       ulong high;
186                       ulong low;
187               }       splitresult;
188       } result;
189     __asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
190     return result.wholeresult;
191   }
192 #elif defined(__GNUC__) && defined(__aarch64__)
193   {
194     ulonglong result;
195     __asm __volatile__ ("mrs %[rt],cntvct_el0" : [rt] "=r" (result));
196     return result;
197   }
198 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
199   /* gethrtime may appear as either cycle or nanosecond counter */
200   return (ulonglong) gethrtime();
201 #else
202   return 0;
203 #endif
204 }
205 
206 /*
207   For nanoseconds, most platforms have nothing available that
208   (a) doesn't require bringing in a 40-kb librt.so library
209   (b) really has nanosecond resolution.
210 */
211 
my_timer_nanoseconds(void)212 ulonglong my_timer_nanoseconds(void)
213 {
214 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
215   /* SunOS 5.10+, Solaris, HP-UX: hrtime_t gethrtime(void) */
216   return (ulonglong) gethrtime();
217 #elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_REALTIME)
218   {
219     struct timespec tp;
220     clock_gettime(CLOCK_REALTIME, &tp);
221     return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
222   }
223 #elif defined(__APPLE__) && defined(__MACH__)
224   {
225     ulonglong tm;
226     static mach_timebase_info_data_t timebase_info= {0,0};
227     if (timebase_info.denom == 0)
228       (void) mach_timebase_info(&timebase_info);
229     tm= mach_absolute_time();
230     return (tm * timebase_info.numer) / timebase_info.denom;
231   }
232 #else
233   return 0;
234 #endif
235 }
236 
237 /*
238   For microseconds, gettimeofday() is available on
239   almost all platforms. On Windows we use
240   QueryPerformanceCounter which will usually tick over
241   3.5 million times per second, and we don't throw
242   away the extra precision. (On Windows Server 2003
243   the frequency is same as the cycle frequency.)
244 */
245 
my_timer_microseconds(void)246 ulonglong my_timer_microseconds(void)
247 {
248 #if defined(HAVE_GETTIMEOFDAY)
249   {
250     static ulonglong last_value= 0;
251     struct timeval tv;
252     if (gettimeofday(&tv, NULL) == 0)
253       last_value= (ulonglong) tv.tv_sec * 1000000 + (ulonglong) tv.tv_usec;
254     else
255     {
256       /*
257         There are reports that gettimeofday(2) can have intermittent failures
258         on some platform, see for example Bug#36819.
259         We are not trying again or looping, just returning the best value possible
260         under the circumstances ...
261       */
262       last_value++;
263     }
264     return last_value;
265   }
266 #elif defined(_WIN32)
267   {
268     /* QueryPerformanceCounter usually works with about 1/3 microsecond. */
269     LARGE_INTEGER t_cnt;
270 
271     QueryPerformanceCounter(&t_cnt);
272     return (ulonglong) t_cnt.QuadPart;
273   }
274 #else
275   return 0;
276 #endif
277 }
278 
279 /*
280   For milliseconds, gettimeofday() is available on
281   almost all platforms. On Windows we use
282   GetSystemTimeAsFileTime.
283 */
284 
my_timer_milliseconds(void)285 ulonglong my_timer_milliseconds(void)
286 {
287 #if defined(HAVE_GETTIMEOFDAY)
288   {
289     static ulonglong last_ms_value= 0;
290     struct timeval tv;
291     if (gettimeofday(&tv, NULL) == 0)
292       last_ms_value= (ulonglong) tv.tv_sec * 1000 +
293                      (ulonglong) tv.tv_usec / 1000;
294     else
295     {
296       /*
297         There are reports that gettimeofday(2) can have intermittent failures
298         on some platform, see for example Bug#36819.
299         We are not trying again or looping, just returning the best value possible
300         under the circumstances ...
301       */
302       last_ms_value++;
303     }
304     return last_ms_value;
305   }
306 #elif defined(_WIN32)
307    FILETIME ft;
308    GetSystemTimeAsFileTime( &ft );
309    return ((ulonglong)ft.dwLowDateTime +
310                   (((ulonglong)ft.dwHighDateTime) << 32))/10000;
311 #else
312   return 0;
313 #endif
314 }
315 
316 /*
317   For ticks, which we handle with times(), the frequency
318   is usually 100/second and the overhead is surprisingly
319   bad, sometimes even worse than gettimeofday's overhead.
320 */
321 
my_timer_ticks(void)322 ulonglong my_timer_ticks(void)
323 {
324 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
325   {
326     struct tms times_buf;
327     return (ulonglong) times(&times_buf);
328   }
329 #elif defined(_WIN32)
330   return (ulonglong) GetTickCount();
331 #else
332   return 0;
333 #endif
334 }
335 
336 /*
337   The my_timer_init() function and its sub-functions
338   have several loops which call timers. If there's
339   something wrong with a timer -- which has never
340   happened in tests -- we want the loop to end after
341   an arbitrary number of iterations, and my_timer_info
342   will show a discouraging result. The arbitrary
343   number is 1,000,000.
344 */
345 #define MY_TIMER_ITERATIONS 1000000
346 
347 /*
348   Calculate overhead. Called from my_timer_init().
349   Usually best_timer_overhead = cycles.overhead or
350   nanoseconds.overhead, so returned amount is in
351   cycles or nanoseconds. We repeat the calculation
352   ten times, so that we can disregard effects of
353   caching or interrupts. Result is quite consistent
354   for cycles, at least. But remember it's a minimum.
355 */
356 
my_timer_init_overhead(ulonglong * overhead,ulonglong (* cycle_timer)(void),ulonglong (* this_timer)(void),ulonglong best_timer_overhead)357 static void my_timer_init_overhead(ulonglong *overhead,
358                                    ulonglong (*cycle_timer)(void),
359                                    ulonglong (*this_timer)(void),
360                                    ulonglong best_timer_overhead)
361 {
362   ulonglong time1, time2;
363   int i;
364 
365   /* *overhead, least of 20 calculations - cycles.overhead */
366   for (i= 0, *overhead= 1000000000; i < 20; ++i)
367   {
368     time1= cycle_timer();
369     this_timer(); /* rather than 'time_tmp= timer();' */
370     time2= cycle_timer() - time1;
371     if (*overhead > time2)
372       *overhead= time2;
373   }
374   *overhead-= best_timer_overhead;
375 }
376 
377 /*
378   Calculate Resolution. Called from my_timer_init().
379   If a timer goes up by jumps, e.g. 1050, 1075, 1100, ...
380   then the best resolution is the minimum jump, e.g. 25.
381   If it's always divisible by 1000 then it's just a
382   result of multiplication of a lower-precision timer
383   result, e.g. nanoseconds are often microseconds * 1000.
384   If the minimum jump is less than an arbitrary passed
385   figure (a guess based on maximum overhead * 2), ignore.
386   Usually we end up with nanoseconds = 1 because it's too
387   hard to detect anything <= 100 nanoseconds.
388   Often GetTickCount() has resolution = 15.
389   We don't check with ticks because they take too long.
390 */
my_timer_init_resolution(ulonglong (* this_timer)(void),ulonglong overhead_times_2)391 static ulonglong my_timer_init_resolution(ulonglong (*this_timer)(void),
392                                           ulonglong overhead_times_2)
393 {
394   ulonglong time1, time2;
395   ulonglong best_jump;
396   int i, jumps, divisible_by_1000, divisible_by_1000000;
397 
398   divisible_by_1000= divisible_by_1000000= 0;
399   best_jump= 1000000;
400   for (i= jumps= 0; jumps < 3 && i < MY_TIMER_ITERATIONS * 10; ++i)
401   {
402     time1= this_timer();
403     time2= this_timer();
404     time2-= time1;
405     if (time2)
406     {
407       ++jumps;
408       if (!(time2 % 1000))
409       {
410         ++divisible_by_1000;
411         if (!(time2 % 1000000))
412           ++divisible_by_1000000;
413       }
414       if (best_jump > time2)
415         best_jump= time2;
416       /* For milliseconds, one jump is enough. */
417       if (overhead_times_2 == 0)
418         break;
419     }
420   }
421   if (jumps == 3)
422   {
423     if (jumps == divisible_by_1000000)
424       return 1000000;
425     if (jumps == divisible_by_1000)
426       return 1000;
427   }
428   if (best_jump > overhead_times_2)
429     return best_jump;
430   return 1;
431 }
432 
433 /*
434   Calculate cycle frequency by seeing how many cycles pass
435   in a 200-microsecond period. I tried with 10-microsecond
436   periods originally, and the result was often very wrong.
437 */
438 
my_timer_init_frequency(MY_TIMER_INFO * mti)439 static ulonglong my_timer_init_frequency(MY_TIMER_INFO *mti)
440 {
441   int i;
442   ulonglong time1, time2, time3, time4;
443   time1= my_timer_cycles();
444   time2= my_timer_microseconds();
445   time3= time2; /* Avoids a Microsoft/IBM compiler warning */
446   for (i= 0; i < MY_TIMER_ITERATIONS; ++i)
447   {
448     time3= my_timer_microseconds();
449     if (time3 - time2 > 200) break;
450   }
451   time4= my_timer_cycles() - mti->cycles.overhead;
452   time4-= mti->microseconds.overhead;
453   return (mti->microseconds.frequency * (time4 - time1)) / (time3 - time2);
454 }
455 
456 /*
457   Call my_timer_init before the first call to my_timer_xxx().
458   If something must be initialized, it happens here.
459   Set: what routine is being used e.g. "asm_x86"
460   Set: function, overhead, actual frequency, resolution.
461 */
462 
my_timer_init(MY_TIMER_INFO * mti)463 void my_timer_init(MY_TIMER_INFO *mti)
464 {
465   ulonglong (*best_timer)(void);
466   ulonglong best_timer_overhead;
467   ulonglong time1, time2;
468   int i;
469 
470   /* cycles */
471   mti->cycles.frequency= 1000000000;
472 #if defined(__GNUC__) && defined(__i386__)
473   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
474 #elif defined(__SUNPRO_C) && defined(__i386)
475   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
476 #elif defined(__GNUC__) && defined(__x86_64__)
477   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_64;
478 #elif defined(_WIN32) && defined(_M_IX86)
479   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_WIN;
480 #elif defined(_WIN64) && defined(_M_X64)
481   mti->cycles.routine= MY_TIMER_ROUTINE_RDTSC;
482 #elif defined(__GNUC__) && defined(__ia64__)
483   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_IA64;
484 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (defined(__64BIT__) || defined(_ARCH_PPC64))
485   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC64;
486 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
487   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC;
488 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
489   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64;
490 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(_ILP32) && !defined(__SunOS_5_7)
491   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32;
492 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
493   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_I386;
494 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
495   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64;
496 #elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
497   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC64;
498 #elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
499   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC32;
500 #elif defined(__GNUC__) && defined(__aarch64__)
501   mti->cycles.routine= MY_TIMER_ROUTINE_ASM_AARCH64;
502 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
503   mti->cycles.routine= MY_TIMER_ROUTINE_GETHRTIME;
504 #else
505   mti->cycles.routine= 0;
506 #endif
507 
508   if (!mti->cycles.routine || !my_timer_cycles())
509   {
510     mti->cycles.routine= 0;
511     mti->cycles.resolution= 0;
512     mti->cycles.frequency= 0;
513     mti->cycles.overhead= 0;
514   }
515 
516   /* nanoseconds */
517   mti->nanoseconds.frequency=  1000000000; /* initial assumption */
518 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
519   mti->nanoseconds.routine= MY_TIMER_ROUTINE_GETHRTIME;
520 #elif defined(HAVE_CLOCK_GETTIME)
521   mti->nanoseconds.routine= MY_TIMER_ROUTINE_CLOCK_GETTIME;
522 #elif defined(__APPLE__) && defined(__MACH__)
523   mti->nanoseconds.routine= MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME;
524 #else
525   mti->nanoseconds.routine= 0;
526 #endif
527   if (!mti->nanoseconds.routine || !my_timer_nanoseconds())
528   {
529     mti->nanoseconds.routine= 0;
530     mti->nanoseconds.resolution= 0;
531     mti->nanoseconds.frequency= 0;
532     mti->nanoseconds.overhead= 0;
533   }
534 
535   /* microseconds */
536   mti->microseconds.frequency= 1000000; /* initial assumption */
537 #if defined(HAVE_GETTIMEOFDAY)
538    mti->microseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
539 #elif defined(_WIN32)
540   {
541     LARGE_INTEGER li;
542     /* Windows: typical frequency = 3579545, actually 1/3 microsecond. */
543     if (!QueryPerformanceFrequency(&li))
544       mti->microseconds.routine= 0;
545     else
546     {
547       mti->microseconds.frequency= li.QuadPart;
548       mti->microseconds.routine= MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER;
549     }
550   }
551 #else
552   mti->microseconds.routine= 0;
553 #endif
554   if (!mti->microseconds.routine || !my_timer_microseconds())
555   {
556     mti->microseconds.routine= 0;
557     mti->microseconds.resolution= 0;
558     mti->microseconds.frequency= 0;
559     mti->microseconds.overhead= 0;
560   }
561 
562   /* milliseconds */
563   mti->milliseconds.frequency= 1000; /* initial assumption */
564 #if defined(HAVE_GETTIMEOFDAY)
565   mti->milliseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
566 #elif defined(_WIN32)
567   mti->milliseconds.routine= MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME;
568 #else
569   mti->milliseconds.routine= 0;
570 #endif
571   if (!mti->milliseconds.routine || !my_timer_milliseconds())
572   {
573     mti->milliseconds.routine= 0;
574     mti->milliseconds.resolution= 0;
575     mti->milliseconds.frequency= 0;
576     mti->milliseconds.overhead= 0;
577   }
578 
579   /* ticks */
580   mti->ticks.frequency= 100; /* permanent assumption */
581 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
582   mti->ticks.routine= MY_TIMER_ROUTINE_TIMES;
583 #elif defined(_WIN32)
584   mti->ticks.routine= MY_TIMER_ROUTINE_GETTICKCOUNT;
585 #else
586   mti->ticks.routine= 0;
587 #endif
588   if (!mti->ticks.routine || !my_timer_ticks())
589   {
590     mti->ticks.routine= 0;
591     mti->ticks.resolution= 0;
592     mti->ticks.frequency= 0;
593     mti->ticks.overhead= 0;
594   }
595 
596   /*
597     Calculate overhead in terms of the timer that
598     gives the best resolution: cycles or nanoseconds.
599     I doubt it ever will be as bad as microseconds.
600   */
601   if (mti->cycles.routine)
602     best_timer= &my_timer_cycles;
603   else
604   {
605     if (mti->nanoseconds.routine)
606     {
607       best_timer= &my_timer_nanoseconds;
608     }
609     else
610       best_timer= &my_timer_microseconds;
611   }
612 
613   /* best_timer_overhead = least of 20 calculations */
614   for (i= 0, best_timer_overhead= 1000000000; i < 20; ++i)
615   {
616     time1= best_timer();
617     time2= best_timer() - time1;
618     if (best_timer_overhead > time2)
619       best_timer_overhead= time2;
620   }
621   if (mti->cycles.routine)
622     my_timer_init_overhead(&mti->cycles.overhead,
623                            best_timer,
624                            &my_timer_cycles,
625                            best_timer_overhead);
626   if (mti->nanoseconds.routine)
627     my_timer_init_overhead(&mti->nanoseconds.overhead,
628                            best_timer,
629                            &my_timer_nanoseconds,
630                            best_timer_overhead);
631   if (mti->microseconds.routine)
632     my_timer_init_overhead(&mti->microseconds.overhead,
633                            best_timer,
634                            &my_timer_microseconds,
635                            best_timer_overhead);
636   if (mti->milliseconds.routine)
637     my_timer_init_overhead(&mti->milliseconds.overhead,
638                            best_timer,
639                            &my_timer_milliseconds,
640                            best_timer_overhead);
641   if (mti->ticks.routine)
642     my_timer_init_overhead(&mti->ticks.overhead,
643                            best_timer,
644                            &my_timer_ticks,
645                            best_timer_overhead);
646 
647 /*
648   Calculate resolution for nanoseconds or microseconds
649   or milliseconds, by seeing if it's always divisible
650   by 1000, and by noticing how much jumping occurs.
651   For ticks, just assume the resolution is 1.
652 */
653   if (mti->cycles.routine)
654     mti->cycles.resolution= 1;
655   if (mti->nanoseconds.routine)
656     mti->nanoseconds.resolution=
657     my_timer_init_resolution(&my_timer_nanoseconds, 20000);
658   if (mti->microseconds.routine)
659     mti->microseconds.resolution=
660     my_timer_init_resolution(&my_timer_microseconds, 20);
661   if (mti->milliseconds.routine)
662     mti->milliseconds.resolution=
663     my_timer_init_resolution(&my_timer_milliseconds, 0);
664   if (mti->ticks.routine)
665     mti->ticks.resolution= 1;
666 
667 /*
668   Calculate cycles frequency,
669   if we have both a cycles routine and a microseconds routine.
670   In tests, this usually results in a figure within 2% of
671   what "cat /proc/cpuinfo" says.
672   If the microseconds routine is QueryPerformanceCounter
673   (i.e. it's Windows), and the microseconds frequency is >
674   500,000,000 (i.e. it's Windows Server so it uses RDTSC)
675   and the microseconds resolution is > 100 (i.e. dreadful),
676   then calculate cycles frequency = microseconds frequency.
677 */
678   if (mti->cycles.routine
679   &&  mti->microseconds.routine)
680   {
681     if (mti->microseconds.routine ==
682     MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER
683     &&  mti->microseconds.frequency > 500000000
684     &&  mti->microseconds.resolution > 100)
685       mti->cycles.frequency= mti->microseconds.frequency;
686     else
687     {
688       ulonglong time1, time2;
689       time1= my_timer_init_frequency(mti);
690       /* Repeat once in case there was an interruption. */
691       time2= my_timer_init_frequency(mti);
692       if (time1 < time2) mti->cycles.frequency= time1;
693       else mti->cycles.frequency= time2;
694     }
695   }
696 
697 /*
698   Calculate milliseconds frequency =
699   (cycles-frequency/#-of-cycles) * #-of-milliseconds,
700   if we have both a milliseconds routine and a cycles
701   routine.
702   This will be inaccurate if milliseconds resolution > 1.
703   This is probably only useful when testing new platforms.
704 */
705   if (mti->milliseconds.routine
706   &&  mti->milliseconds.resolution < 1000
707   &&  mti->microseconds.routine
708   &&  mti->cycles.routine)
709   {
710     int i;
711     ulonglong time1, time2, time3, time4;
712     time1= my_timer_cycles();
713     time2= my_timer_milliseconds();
714     time3= time2; /* Avoids a Microsoft/IBM compiler warning */
715     for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
716     {
717       time3= my_timer_milliseconds();
718       if (time3 - time2 > 10) break;
719     }
720     time4= my_timer_cycles();
721     mti->milliseconds.frequency=
722     (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
723   }
724 
725 /*
726   Calculate ticks.frequency =
727   (cycles-frequency/#-of-cycles * #-of-ticks,
728   if we have both a ticks routine and a cycles
729   routine,
730   This is probably only useful when testing new platforms.
731 */
732   if (mti->ticks.routine
733   &&  mti->microseconds.routine
734   &&  mti->cycles.routine)
735   {
736     int i;
737     ulonglong time1, time2, time3, time4;
738     time1= my_timer_cycles();
739     time2= my_timer_ticks();
740     time3= time2; /* Avoids a Microsoft/IBM compiler warning */
741     for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
742     {
743       time3= my_timer_ticks();
744       if (time3 - time2 > 10) break;
745     }
746     time4= my_timer_cycles();
747     mti->ticks.frequency=
748     (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
749   }
750 }
751 
752 /*
753    Additional Comments
754    -------------------
755 
756    This is for timing, i.e. finding out how long a piece of code
757    takes. If you want time of day matching a wall clock, the
758    my_timer_xxx functions won't help you.
759 
760    The best timer is the one with highest frequency, lowest
761    overhead, and resolution=1. The my_timer_info() routine will tell
762    you at runtime which timer that is. Usually it will be
763    my_timer_cycles() but be aware that, although it's best,
764    it has possible flaws and dangers. Depending on platform:
765    - The frequency might change. We don't test for this. It
766      happens on laptops for power saving, and on blade servers
767      for avoiding overheating.
768    - The overhead that my_timer_init() returns is the minimum.
769      In fact it could be slightly greater because of caching or
770      because you call the routine by address, as recommended.
771      It could be hugely greater if there's an interrupt.
772    - The x86 cycle counter, RDTSC doesn't "serialize". That is,
773      if there is out-of-order execution, rdtsc might be processed
774      after an instruction that logically follows it.
775      (We could force serialization, but that would be slower.)
776    - It is possible to set a flag which renders RDTSC
777      inoperative. Somebody responsible for the kernel
778      of the operating system would have to make this
779      decision. For the platforms we've tested with, there's
780      no such problem.
781    - With a multi-processor arrangement, it's possible
782      to get the cycle count from one processor in
783      thread X, and the cycle count from another processor
784      in thread Y. They may not always be in synch.
785    - You can't depend on a cycle counter being available for
786      all platforms. On Alphas, the
787      cycle counter is only 32-bit, so it would overflow quickly,
788      so we don't bother with it. On platforms that we haven't
789      tested, there might be some if/endif combination that we
790      didn't expect, or some assembler routine that we didn't
791      supply.
792 
793    The recommended way to use the timer routines is:
794    1. Somewhere near the beginning of the program, call
795       my_timer_init(). This should only be necessary once,
796       although you can call it again if you think that the
797       frequency has changed.
798    2. Determine the best timer based on frequency, resolution,
799       overhead -- all things that my_timer_init() returns.
800       Preserve the address of the timer and the my_timer_into
801       results in an easily-accessible place.
802    3. Instrument the code section that you're monitoring, thus:
803       time1= my_timer_xxx();
804       Instrumented code;
805       time2= my_timer_xxx();
806       elapsed_time= (time2 - time1) - overhead;
807       If the timer is always on, then overhead is always there,
808       so don't subtract it.
809    4. Save the elapsed time, or add it to a totaller.
810    5. When all timing processes are complete, transfer the
811       saved / totalled elapsed time to permanent storage.
812       Optionally you can convert cycles to microseconds at
813       this point. (Don't do so every time you calculate
814       elapsed_time! That would waste time and lose precision!)
815       For converting cycles to microseconds, use the frequency
816       that my_timer_init() returns. You'll also need to convert
817       if the my_timer_microseconds() function is the Windows
818       function QueryPerformanceCounter(), since that's sometimes
819       a counter with precision slightly better than microseconds.
820 
821    Since we recommend calls by function pointer, we supply
822    no inline functions.
823 
824    Some comments on the many candidate routines for timing ...
825 
826    clock() -- We don't use because it would overflow frequently.
827 
828    clock_gettime() -- In tests, clock_gettime often had
829    resolution = 1000.
830 
831    gettimeofday() -- available on most platforms, though not
832    on Windows. There is a hardware timer (sometimes a Programmable
833    Interrupt Timer or "PIT") (sometimes a "HPET") used for
834    interrupt generation. When it interrupts (a "tick" or "jiffy",
835    typically 1 centisecond) it sets xtime. For gettimeofday, a
836    Linux kernel routine usually gets xtime and then gets rdtsc
837    to get elapsed nanoseconds since the last tick. On Red Hat
838    Enterprise Linux 3, there was once a bug which caused the
839    resolution to be 1000, i.e. one centisecond. We never check
840    for time-zone change.
841 
842    getnstimeofday() -- something to watch for in future Linux
843 
844    do_gettimeofday() -- exists on Linux but not for "userland"
845 
846    get_cycles() -- a multi-platform function, worth watching
847    in future Linux versions. But we found platform-specific
848    functions which were better documented in operating-system
849    manuals. And get_cycles() can fail or return a useless
850    32-bit number. It might be available on some platforms,
851    such as arm, which we didn't test.  Using
852    "include <linux/timex.h>" or "include <asm/timex.h>"
853    can lead to autoconf or compile errors, depending on system.
854 
855    rdtsc, __rdtsc, rdtscll: available for x86 with Linux BSD,
856    Solaris, Windows. See "possible flaws and dangers" comments.
857 
858    times(): what we use for ticks. Should just read the last
859    (xtime) tick count, therefore should be fast, but usually
860    isn't.
861 
862    GetTickCount(): we use this for my_timer_ticks() on
863    Windows. Actually it really is a tick counter, so resolution
864    >= 10 milliseconds unless you have a very old Windows version.
865    With Windows 95 or 98 or ME, timeGetTime() has better resolution than
866    GetTickCount (1ms rather than 55ms). But with Windows NT or XP or 2000,
867    they're both getting from a variable in the Process Environment Block
868    (PEB), and the variable is set by the programmable interrupt timer, so
869    the resolution is the same (usually 10-15 milliseconds). Also timeGetTime
870    is slower on old machines:
871    http://www.doumo.jp/aon-java/jsp/postgretips/tips.jsp?tips=74.
872    Also timeGetTime requires linking winmm.lib,
873    Therefore we use GetTickCount.
874    It will overflow every 49 days because the return is 32-bit.
875    There is also a GetTickCount64 but it requires Vista or Windows Server 2008.
876    (As for GetSystemTimeAsFileTime, its precision is spurious, it
877    just reads the tick variable like the other functions do.
878    However, we don't expect it to overflow every 49 days, so we
879    will prefer it for my_timer_milliseconds().)
880 
881    QueryPerformanceCounter() we use this for my_timer_microseconds()
882    on Windows. 1-PIT-tick (often 1/3-microsecond). Usually reads
883    the PIT so it's slow. On some Windows variants, uses RDTSC.
884 
885    GetLocalTime() this is available on Windows but we don't use it.
886 
887    getclock(): documented for Alpha, but not found during tests.
888 
889    mach_absolute_time() and UpTime() are recommended for Apple.
890    Inititally they weren't tried, because asm_ppc seems to do the job.
891    But now we use mach_absolute_time for nanoseconds.
892 
893    Any clock-based timer can be affected by NPT (ntpd program),
894    which means:
895    - full-second correction can occur for leap second
896    - tiny corrections can occcur approimately every 11 minutes
897      (but I think they only affect the RTC which isn't the PIT).
898 
899    We define "precision" as "frequency" and "high precision" is
900    "frequency better than 1 microsecond". We define "resolution"
901    as a synonym for "granularity". We define "accuracy" as
902    "closeness to the truth" as established by some authoritative
903    clock, but we can't measure accuracy.
904 
905    Do not expect any of our timers to be monotonic; we
906    won't guarantee that they return constantly-increasing
907    unique numbers.
908 
909    We tested with AIX, Solaris (x86 + Sparc), Linux (x86 +
910    Itanium), Windows, 64-bit Windows, QNX, FreeBSD, HPUX,
911    Irix, Mac. We didn't test with SCO.
912 
913 */
914 
915