1 /* Copyright (c) 2008, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
27
28 /*
29 rdtsc3 -- multi-platform timer code
30 pgulutzan@mysql.com, 2005-08-29
31 modified 2008-11-02
32
33 Functions:
34
35 my_timer_cycles ulonglong cycles
36 my_timer_nanoseconds ulonglong nanoseconds
37 my_timer_microseconds ulonglong "microseconds"
38 my_timer_milliseconds ulonglong milliseconds
39 my_timer_ticks ulonglong ticks
40 my_timer_init initialization / test
41
42 We'll call the first 5 functions (the ones that return
43 a ulonglong) "my_timer_xxx" functions.
44 Each my_timer_xxx function returns a 64-bit timing value
45 since an arbitrary 'epoch' start. Since the only purpose
46 is to determine elapsed times, wall-clock time-of-day
47 is not known and not relevant.
48
49 The my_timer_init function is necessary for initializing.
50 It returns information (underlying routine name,
51 frequency, resolution, overhead) about all my_timer_xxx
52 functions. A program should call my_timer_init once,
53 use the information to decide what my_timer_xxx function
54 to use, and subsequently call that function by function
55 pointer.
56
57 A typical use would be:
58 my_timer_init() ... once, at program start
59 ...
60 time1= my_timer_xxx() ... time before start
61 [code that's timed]
62 time2= my_timer_xxx() ... time after end
63 elapsed_time= (time2 - time1) - overhead
64 */
65
66 #include "my_global.h"
67 #include "my_rdtsc.h"
68
69 #include <stdio.h>
70 #if defined(_WIN32)
71 #include "windows.h"
72 #endif
73
74 #if defined(TIME_WITH_SYS_TIME)
75 #include <sys/time.h>
76 #include <time.h> /* for clock_gettime */
77 #endif
78
79 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
80 #include <sys/times.h> /* for times */
81 #endif
82
83 #if defined(__APPLE__) && defined(__MACH__)
84 #include <mach/mach_time.h>
85 #endif
86
87 #if defined(__SUNPRO_CC) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
88 extern "C" ulonglong my_timer_cycles_il_sparc64();
89 #elif defined(__SUNPRO_CC) && defined(_ILP32) && !defined(__SunOS_5_7)
90 extern "C" ulonglong my_timer_cycles_il_sparc32();
91 #elif defined(__SUNPRO_CC) && defined(__i386) && defined(_ILP32)
92 extern "C" ulonglong my_timer_cycles_il_i386();
93 #elif defined(__SUNPRO_CC) && defined(__x86_64) && defined(_LP64)
94 extern "C" ulonglong my_timer_cycles_il_x86_64();
95 #elif defined(__SUNPRO_C) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
96 ulonglong my_timer_cycles_il_sparc64();
97 #elif defined(__SUNPRO_C) && defined(_ILP32) && !defined(__SunOS_5_7)
98 ulonglong my_timer_cycles_il_sparc32();
99 #elif defined(__SUNPRO_C) && defined(__i386) && defined(_ILP32)
100 ulonglong my_timer_cycles_il_i386();
101 #elif defined(__SUNPRO_C) && defined(__x86_64) && defined(_LP64)
102 ulonglong my_timer_cycles_il_x86_64();
103 #endif
104
105 /*
106 For cycles, we depend on RDTSC for x86 platforms,
107 or on time buffer (which is not really a cycle count
108 but a separate counter with less than nanosecond
109 resolution) for most PowerPC platforms, or on
110 gethrtime which is okay for solaris.
111 */
112
my_timer_cycles(void)113 ulonglong my_timer_cycles(void)
114 {
115 #if defined(__GNUC__) && defined(__i386__)
116 /* This works much better if compiled with "gcc -O3". */
117 ulonglong result;
118 __asm__ __volatile__ ("rdtsc" : "=A" (result));
119 return result;
120 #elif defined(__SUNPRO_C) && defined(__i386)
121 __asm("rdtsc");
122 #elif defined(__GNUC__) && defined(__x86_64__)
123 ulonglong result;
124 __asm__ __volatile__ ("rdtsc\n\t" \
125 "shlq $32,%%rdx\n\t" \
126 "orq %%rdx,%%rax"
127 : "=a" (result) :: "%edx");
128 return result;
129 #elif defined(_WIN32) && defined(_M_IX86)
130 __asm {rdtsc};
131 #elif defined(_WIN64) && defined(_M_X64)
132 /* For 64-bit Windows: unsigned __int64 __rdtsc(); */
133 return __rdtsc();
134 #elif defined(__GNUC__) && defined(__ia64__)
135 {
136 ulonglong result;
137 __asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
138 return result;
139 }
140 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (defined(__64BIT__) || defined(_ARCH_PPC64))
141 {
142 ulonglong result;
143 __asm __volatile__ ("mftb %0" : "=r" (result));
144 return result;
145 }
146 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
147 {
148 /*
149 mftbu means "move from time-buffer-upper to result".
150 The loop is saying: x1=upper, x2=lower, x3=upper,
151 if x1!=x3 there was an overflow so repeat.
152 */
153 unsigned int x1, x2, x3;
154 ulonglong result;
155 for (;;)
156 {
157 __asm __volatile__ ( "mftbu %0" : "=r"(x1) );
158 __asm __volatile__ ( "mftb %0" : "=r"(x2) );
159 __asm __volatile__ ( "mftbu %0" : "=r"(x3) );
160 if (x1 == x3) break;
161 }
162 result = x1;
163 return ( result << 32 ) | x2;
164 }
165 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
166 return (my_timer_cycles_il_sparc64());
167 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(_ILP32) && !defined(__SunOS_5_7)
168 return (my_timer_cycles_il_sparc32());
169 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
170 /* This is probably redundant for __SUNPRO_C. */
171 return (my_timer_cycles_il_i386());
172 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
173 return (my_timer_cycles_il_x86_64());
174 #elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
175 {
176 ulonglong result;
177 __asm __volatile__ ("rd %%tick,%0" : "=r" (result));
178 return result;
179 }
180 #elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
181 {
182 union {
183 ulonglong wholeresult;
184 struct {
185 ulong high;
186 ulong low;
187 } splitresult;
188 } result;
189 __asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
190 return result.wholeresult;
191 }
192 #elif defined(__GNUC__) && defined(__aarch64__)
193 {
194 ulonglong result;
195 __asm __volatile__ ("mrs %[rt],cntvct_el0" : [rt] "=r" (result));
196 return result;
197 }
198 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
199 /* gethrtime may appear as either cycle or nanosecond counter */
200 return (ulonglong) gethrtime();
201 #else
202 return 0;
203 #endif
204 }
205
206 /*
207 For nanoseconds, most platforms have nothing available that
208 (a) doesn't require bringing in a 40-kb librt.so library
209 (b) really has nanosecond resolution.
210 */
211
my_timer_nanoseconds(void)212 ulonglong my_timer_nanoseconds(void)
213 {
214 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
215 /* SunOS 5.10+, Solaris, HP-UX: hrtime_t gethrtime(void) */
216 return (ulonglong) gethrtime();
217 #elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_REALTIME)
218 {
219 struct timespec tp;
220 clock_gettime(CLOCK_REALTIME, &tp);
221 return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
222 }
223 #elif defined(__APPLE__) && defined(__MACH__)
224 {
225 ulonglong tm;
226 static mach_timebase_info_data_t timebase_info= {0,0};
227 if (timebase_info.denom == 0)
228 (void) mach_timebase_info(&timebase_info);
229 tm= mach_absolute_time();
230 return (tm * timebase_info.numer) / timebase_info.denom;
231 }
232 #else
233 return 0;
234 #endif
235 }
236
237 /*
238 For microseconds, gettimeofday() is available on
239 almost all platforms. On Windows we use
240 QueryPerformanceCounter which will usually tick over
241 3.5 million times per second, and we don't throw
242 away the extra precision. (On Windows Server 2003
243 the frequency is same as the cycle frequency.)
244 */
245
my_timer_microseconds(void)246 ulonglong my_timer_microseconds(void)
247 {
248 #if defined(HAVE_GETTIMEOFDAY)
249 {
250 static ulonglong last_value= 0;
251 struct timeval tv;
252 if (gettimeofday(&tv, NULL) == 0)
253 last_value= (ulonglong) tv.tv_sec * 1000000 + (ulonglong) tv.tv_usec;
254 else
255 {
256 /*
257 There are reports that gettimeofday(2) can have intermittent failures
258 on some platform, see for example Bug#36819.
259 We are not trying again or looping, just returning the best value possible
260 under the circumstances ...
261 */
262 last_value++;
263 }
264 return last_value;
265 }
266 #elif defined(_WIN32)
267 {
268 /* QueryPerformanceCounter usually works with about 1/3 microsecond. */
269 LARGE_INTEGER t_cnt;
270
271 QueryPerformanceCounter(&t_cnt);
272 return (ulonglong) t_cnt.QuadPart;
273 }
274 #else
275 return 0;
276 #endif
277 }
278
279 /*
280 For milliseconds, gettimeofday() is available on
281 almost all platforms. On Windows we use
282 GetSystemTimeAsFileTime.
283 */
284
my_timer_milliseconds(void)285 ulonglong my_timer_milliseconds(void)
286 {
287 #if defined(HAVE_GETTIMEOFDAY)
288 {
289 static ulonglong last_ms_value= 0;
290 struct timeval tv;
291 if (gettimeofday(&tv, NULL) == 0)
292 last_ms_value= (ulonglong) tv.tv_sec * 1000 +
293 (ulonglong) tv.tv_usec / 1000;
294 else
295 {
296 /*
297 There are reports that gettimeofday(2) can have intermittent failures
298 on some platform, see for example Bug#36819.
299 We are not trying again or looping, just returning the best value possible
300 under the circumstances ...
301 */
302 last_ms_value++;
303 }
304 return last_ms_value;
305 }
306 #elif defined(_WIN32)
307 FILETIME ft;
308 GetSystemTimeAsFileTime( &ft );
309 return ((ulonglong)ft.dwLowDateTime +
310 (((ulonglong)ft.dwHighDateTime) << 32))/10000;
311 #else
312 return 0;
313 #endif
314 }
315
316 /*
317 For ticks, which we handle with times(), the frequency
318 is usually 100/second and the overhead is surprisingly
319 bad, sometimes even worse than gettimeofday's overhead.
320 */
321
my_timer_ticks(void)322 ulonglong my_timer_ticks(void)
323 {
324 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
325 {
326 struct tms times_buf;
327 return (ulonglong) times(×_buf);
328 }
329 #elif defined(_WIN32)
330 return (ulonglong) GetTickCount();
331 #else
332 return 0;
333 #endif
334 }
335
336 /*
337 The my_timer_init() function and its sub-functions
338 have several loops which call timers. If there's
339 something wrong with a timer -- which has never
340 happened in tests -- we want the loop to end after
341 an arbitrary number of iterations, and my_timer_info
342 will show a discouraging result. The arbitrary
343 number is 1,000,000.
344 */
345 #define MY_TIMER_ITERATIONS 1000000
346
347 /*
348 Calculate overhead. Called from my_timer_init().
349 Usually best_timer_overhead = cycles.overhead or
350 nanoseconds.overhead, so returned amount is in
351 cycles or nanoseconds. We repeat the calculation
352 ten times, so that we can disregard effects of
353 caching or interrupts. Result is quite consistent
354 for cycles, at least. But remember it's a minimum.
355 */
356
my_timer_init_overhead(ulonglong * overhead,ulonglong (* cycle_timer)(void),ulonglong (* this_timer)(void),ulonglong best_timer_overhead)357 static void my_timer_init_overhead(ulonglong *overhead,
358 ulonglong (*cycle_timer)(void),
359 ulonglong (*this_timer)(void),
360 ulonglong best_timer_overhead)
361 {
362 ulonglong time1, time2;
363 int i;
364
365 /* *overhead, least of 20 calculations - cycles.overhead */
366 for (i= 0, *overhead= 1000000000; i < 20; ++i)
367 {
368 time1= cycle_timer();
369 this_timer(); /* rather than 'time_tmp= timer();' */
370 time2= cycle_timer() - time1;
371 if (*overhead > time2)
372 *overhead= time2;
373 }
374 *overhead-= best_timer_overhead;
375 }
376
377 /*
378 Calculate Resolution. Called from my_timer_init().
379 If a timer goes up by jumps, e.g. 1050, 1075, 1100, ...
380 then the best resolution is the minimum jump, e.g. 25.
381 If it's always divisible by 1000 then it's just a
382 result of multiplication of a lower-precision timer
383 result, e.g. nanoseconds are often microseconds * 1000.
384 If the minimum jump is less than an arbitrary passed
385 figure (a guess based on maximum overhead * 2), ignore.
386 Usually we end up with nanoseconds = 1 because it's too
387 hard to detect anything <= 100 nanoseconds.
388 Often GetTickCount() has resolution = 15.
389 We don't check with ticks because they take too long.
390 */
my_timer_init_resolution(ulonglong (* this_timer)(void),ulonglong overhead_times_2)391 static ulonglong my_timer_init_resolution(ulonglong (*this_timer)(void),
392 ulonglong overhead_times_2)
393 {
394 ulonglong time1, time2;
395 ulonglong best_jump;
396 int i, jumps, divisible_by_1000, divisible_by_1000000;
397
398 divisible_by_1000= divisible_by_1000000= 0;
399 best_jump= 1000000;
400 for (i= jumps= 0; jumps < 3 && i < MY_TIMER_ITERATIONS * 10; ++i)
401 {
402 time1= this_timer();
403 time2= this_timer();
404 time2-= time1;
405 if (time2)
406 {
407 ++jumps;
408 if (!(time2 % 1000))
409 {
410 ++divisible_by_1000;
411 if (!(time2 % 1000000))
412 ++divisible_by_1000000;
413 }
414 if (best_jump > time2)
415 best_jump= time2;
416 /* For milliseconds, one jump is enough. */
417 if (overhead_times_2 == 0)
418 break;
419 }
420 }
421 if (jumps == 3)
422 {
423 if (jumps == divisible_by_1000000)
424 return 1000000;
425 if (jumps == divisible_by_1000)
426 return 1000;
427 }
428 if (best_jump > overhead_times_2)
429 return best_jump;
430 return 1;
431 }
432
433 /*
434 Calculate cycle frequency by seeing how many cycles pass
435 in a 200-microsecond period. I tried with 10-microsecond
436 periods originally, and the result was often very wrong.
437 */
438
my_timer_init_frequency(MY_TIMER_INFO * mti)439 static ulonglong my_timer_init_frequency(MY_TIMER_INFO *mti)
440 {
441 int i;
442 ulonglong time1, time2, time3, time4;
443 time1= my_timer_cycles();
444 time2= my_timer_microseconds();
445 time3= time2; /* Avoids a Microsoft/IBM compiler warning */
446 for (i= 0; i < MY_TIMER_ITERATIONS; ++i)
447 {
448 time3= my_timer_microseconds();
449 if (time3 - time2 > 200) break;
450 }
451 time4= my_timer_cycles() - mti->cycles.overhead;
452 time4-= mti->microseconds.overhead;
453 return (mti->microseconds.frequency * (time4 - time1)) / (time3 - time2);
454 }
455
456 /*
457 Call my_timer_init before the first call to my_timer_xxx().
458 If something must be initialized, it happens here.
459 Set: what routine is being used e.g. "asm_x86"
460 Set: function, overhead, actual frequency, resolution.
461 */
462
my_timer_init(MY_TIMER_INFO * mti)463 void my_timer_init(MY_TIMER_INFO *mti)
464 {
465 ulonglong (*best_timer)(void);
466 ulonglong best_timer_overhead;
467 ulonglong time1, time2;
468 int i;
469
470 /* cycles */
471 mti->cycles.frequency= 1000000000;
472 #if defined(__GNUC__) && defined(__i386__)
473 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
474 #elif defined(__SUNPRO_C) && defined(__i386)
475 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86;
476 #elif defined(__GNUC__) && defined(__x86_64__)
477 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_64;
478 #elif defined(_WIN32) && defined(_M_IX86)
479 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_X86_WIN;
480 #elif defined(_WIN64) && defined(_M_X64)
481 mti->cycles.routine= MY_TIMER_ROUTINE_RDTSC;
482 #elif defined(__GNUC__) && defined(__ia64__)
483 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_IA64;
484 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (defined(__64BIT__) || defined(_ARCH_PPC64))
485 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC64;
486 #elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
487 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_PPC;
488 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
489 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64;
490 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(_ILP32) && !defined(__SunOS_5_7)
491 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32;
492 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
493 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_I386;
494 #elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
495 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64;
496 #elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64)
497 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC64;
498 #elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64)
499 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC32;
500 #elif defined(__GNUC__) && defined(__aarch64__)
501 mti->cycles.routine= MY_TIMER_ROUTINE_ASM_AARCH64;
502 #elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
503 mti->cycles.routine= MY_TIMER_ROUTINE_GETHRTIME;
504 #else
505 mti->cycles.routine= 0;
506 #endif
507
508 if (!mti->cycles.routine || !my_timer_cycles())
509 {
510 mti->cycles.routine= 0;
511 mti->cycles.resolution= 0;
512 mti->cycles.frequency= 0;
513 mti->cycles.overhead= 0;
514 }
515
516 /* nanoseconds */
517 mti->nanoseconds.frequency= 1000000000; /* initial assumption */
518 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
519 mti->nanoseconds.routine= MY_TIMER_ROUTINE_GETHRTIME;
520 #elif defined(HAVE_CLOCK_GETTIME)
521 mti->nanoseconds.routine= MY_TIMER_ROUTINE_CLOCK_GETTIME;
522 #elif defined(__APPLE__) && defined(__MACH__)
523 mti->nanoseconds.routine= MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME;
524 #else
525 mti->nanoseconds.routine= 0;
526 #endif
527 if (!mti->nanoseconds.routine || !my_timer_nanoseconds())
528 {
529 mti->nanoseconds.routine= 0;
530 mti->nanoseconds.resolution= 0;
531 mti->nanoseconds.frequency= 0;
532 mti->nanoseconds.overhead= 0;
533 }
534
535 /* microseconds */
536 mti->microseconds.frequency= 1000000; /* initial assumption */
537 #if defined(HAVE_GETTIMEOFDAY)
538 mti->microseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
539 #elif defined(_WIN32)
540 {
541 LARGE_INTEGER li;
542 /* Windows: typical frequency = 3579545, actually 1/3 microsecond. */
543 if (!QueryPerformanceFrequency(&li))
544 mti->microseconds.routine= 0;
545 else
546 {
547 mti->microseconds.frequency= li.QuadPart;
548 mti->microseconds.routine= MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER;
549 }
550 }
551 #else
552 mti->microseconds.routine= 0;
553 #endif
554 if (!mti->microseconds.routine || !my_timer_microseconds())
555 {
556 mti->microseconds.routine= 0;
557 mti->microseconds.resolution= 0;
558 mti->microseconds.frequency= 0;
559 mti->microseconds.overhead= 0;
560 }
561
562 /* milliseconds */
563 mti->milliseconds.frequency= 1000; /* initial assumption */
564 #if defined(HAVE_GETTIMEOFDAY)
565 mti->milliseconds.routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
566 #elif defined(_WIN32)
567 mti->milliseconds.routine= MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME;
568 #else
569 mti->milliseconds.routine= 0;
570 #endif
571 if (!mti->milliseconds.routine || !my_timer_milliseconds())
572 {
573 mti->milliseconds.routine= 0;
574 mti->milliseconds.resolution= 0;
575 mti->milliseconds.frequency= 0;
576 mti->milliseconds.overhead= 0;
577 }
578
579 /* ticks */
580 mti->ticks.frequency= 100; /* permanent assumption */
581 #if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
582 mti->ticks.routine= MY_TIMER_ROUTINE_TIMES;
583 #elif defined(_WIN32)
584 mti->ticks.routine= MY_TIMER_ROUTINE_GETTICKCOUNT;
585 #else
586 mti->ticks.routine= 0;
587 #endif
588 if (!mti->ticks.routine || !my_timer_ticks())
589 {
590 mti->ticks.routine= 0;
591 mti->ticks.resolution= 0;
592 mti->ticks.frequency= 0;
593 mti->ticks.overhead= 0;
594 }
595
596 /*
597 Calculate overhead in terms of the timer that
598 gives the best resolution: cycles or nanoseconds.
599 I doubt it ever will be as bad as microseconds.
600 */
601 if (mti->cycles.routine)
602 best_timer= &my_timer_cycles;
603 else
604 {
605 if (mti->nanoseconds.routine)
606 {
607 best_timer= &my_timer_nanoseconds;
608 }
609 else
610 best_timer= &my_timer_microseconds;
611 }
612
613 /* best_timer_overhead = least of 20 calculations */
614 for (i= 0, best_timer_overhead= 1000000000; i < 20; ++i)
615 {
616 time1= best_timer();
617 time2= best_timer() - time1;
618 if (best_timer_overhead > time2)
619 best_timer_overhead= time2;
620 }
621 if (mti->cycles.routine)
622 my_timer_init_overhead(&mti->cycles.overhead,
623 best_timer,
624 &my_timer_cycles,
625 best_timer_overhead);
626 if (mti->nanoseconds.routine)
627 my_timer_init_overhead(&mti->nanoseconds.overhead,
628 best_timer,
629 &my_timer_nanoseconds,
630 best_timer_overhead);
631 if (mti->microseconds.routine)
632 my_timer_init_overhead(&mti->microseconds.overhead,
633 best_timer,
634 &my_timer_microseconds,
635 best_timer_overhead);
636 if (mti->milliseconds.routine)
637 my_timer_init_overhead(&mti->milliseconds.overhead,
638 best_timer,
639 &my_timer_milliseconds,
640 best_timer_overhead);
641 if (mti->ticks.routine)
642 my_timer_init_overhead(&mti->ticks.overhead,
643 best_timer,
644 &my_timer_ticks,
645 best_timer_overhead);
646
647 /*
648 Calculate resolution for nanoseconds or microseconds
649 or milliseconds, by seeing if it's always divisible
650 by 1000, and by noticing how much jumping occurs.
651 For ticks, just assume the resolution is 1.
652 */
653 if (mti->cycles.routine)
654 mti->cycles.resolution= 1;
655 if (mti->nanoseconds.routine)
656 mti->nanoseconds.resolution=
657 my_timer_init_resolution(&my_timer_nanoseconds, 20000);
658 if (mti->microseconds.routine)
659 mti->microseconds.resolution=
660 my_timer_init_resolution(&my_timer_microseconds, 20);
661 if (mti->milliseconds.routine)
662 mti->milliseconds.resolution=
663 my_timer_init_resolution(&my_timer_milliseconds, 0);
664 if (mti->ticks.routine)
665 mti->ticks.resolution= 1;
666
667 /*
668 Calculate cycles frequency,
669 if we have both a cycles routine and a microseconds routine.
670 In tests, this usually results in a figure within 2% of
671 what "cat /proc/cpuinfo" says.
672 If the microseconds routine is QueryPerformanceCounter
673 (i.e. it's Windows), and the microseconds frequency is >
674 500,000,000 (i.e. it's Windows Server so it uses RDTSC)
675 and the microseconds resolution is > 100 (i.e. dreadful),
676 then calculate cycles frequency = microseconds frequency.
677 */
678 if (mti->cycles.routine
679 && mti->microseconds.routine)
680 {
681 if (mti->microseconds.routine ==
682 MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER
683 && mti->microseconds.frequency > 500000000
684 && mti->microseconds.resolution > 100)
685 mti->cycles.frequency= mti->microseconds.frequency;
686 else
687 {
688 ulonglong time1, time2;
689 time1= my_timer_init_frequency(mti);
690 /* Repeat once in case there was an interruption. */
691 time2= my_timer_init_frequency(mti);
692 if (time1 < time2) mti->cycles.frequency= time1;
693 else mti->cycles.frequency= time2;
694 }
695 }
696
697 /*
698 Calculate milliseconds frequency =
699 (cycles-frequency/#-of-cycles) * #-of-milliseconds,
700 if we have both a milliseconds routine and a cycles
701 routine.
702 This will be inaccurate if milliseconds resolution > 1.
703 This is probably only useful when testing new platforms.
704 */
705 if (mti->milliseconds.routine
706 && mti->milliseconds.resolution < 1000
707 && mti->microseconds.routine
708 && mti->cycles.routine)
709 {
710 int i;
711 ulonglong time1, time2, time3, time4;
712 time1= my_timer_cycles();
713 time2= my_timer_milliseconds();
714 time3= time2; /* Avoids a Microsoft/IBM compiler warning */
715 for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
716 {
717 time3= my_timer_milliseconds();
718 if (time3 - time2 > 10) break;
719 }
720 time4= my_timer_cycles();
721 mti->milliseconds.frequency=
722 (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
723 }
724
725 /*
726 Calculate ticks.frequency =
727 (cycles-frequency/#-of-cycles * #-of-ticks,
728 if we have both a ticks routine and a cycles
729 routine,
730 This is probably only useful when testing new platforms.
731 */
732 if (mti->ticks.routine
733 && mti->microseconds.routine
734 && mti->cycles.routine)
735 {
736 int i;
737 ulonglong time1, time2, time3, time4;
738 time1= my_timer_cycles();
739 time2= my_timer_ticks();
740 time3= time2; /* Avoids a Microsoft/IBM compiler warning */
741 for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
742 {
743 time3= my_timer_ticks();
744 if (time3 - time2 > 10) break;
745 }
746 time4= my_timer_cycles();
747 mti->ticks.frequency=
748 (mti->cycles.frequency * (time3 - time2)) / (time4 - time1);
749 }
750 }
751
752 /*
753 Additional Comments
754 -------------------
755
756 This is for timing, i.e. finding out how long a piece of code
757 takes. If you want time of day matching a wall clock, the
758 my_timer_xxx functions won't help you.
759
760 The best timer is the one with highest frequency, lowest
761 overhead, and resolution=1. The my_timer_info() routine will tell
762 you at runtime which timer that is. Usually it will be
763 my_timer_cycles() but be aware that, although it's best,
764 it has possible flaws and dangers. Depending on platform:
765 - The frequency might change. We don't test for this. It
766 happens on laptops for power saving, and on blade servers
767 for avoiding overheating.
768 - The overhead that my_timer_init() returns is the minimum.
769 In fact it could be slightly greater because of caching or
770 because you call the routine by address, as recommended.
771 It could be hugely greater if there's an interrupt.
772 - The x86 cycle counter, RDTSC doesn't "serialize". That is,
773 if there is out-of-order execution, rdtsc might be processed
774 after an instruction that logically follows it.
775 (We could force serialization, but that would be slower.)
776 - It is possible to set a flag which renders RDTSC
777 inoperative. Somebody responsible for the kernel
778 of the operating system would have to make this
779 decision. For the platforms we've tested with, there's
780 no such problem.
781 - With a multi-processor arrangement, it's possible
782 to get the cycle count from one processor in
783 thread X, and the cycle count from another processor
784 in thread Y. They may not always be in synch.
785 - You can't depend on a cycle counter being available for
786 all platforms. On Alphas, the
787 cycle counter is only 32-bit, so it would overflow quickly,
788 so we don't bother with it. On platforms that we haven't
789 tested, there might be some if/endif combination that we
790 didn't expect, or some assembler routine that we didn't
791 supply.
792
793 The recommended way to use the timer routines is:
794 1. Somewhere near the beginning of the program, call
795 my_timer_init(). This should only be necessary once,
796 although you can call it again if you think that the
797 frequency has changed.
798 2. Determine the best timer based on frequency, resolution,
799 overhead -- all things that my_timer_init() returns.
800 Preserve the address of the timer and the my_timer_into
801 results in an easily-accessible place.
802 3. Instrument the code section that you're monitoring, thus:
803 time1= my_timer_xxx();
804 Instrumented code;
805 time2= my_timer_xxx();
806 elapsed_time= (time2 - time1) - overhead;
807 If the timer is always on, then overhead is always there,
808 so don't subtract it.
809 4. Save the elapsed time, or add it to a totaller.
810 5. When all timing processes are complete, transfer the
811 saved / totalled elapsed time to permanent storage.
812 Optionally you can convert cycles to microseconds at
813 this point. (Don't do so every time you calculate
814 elapsed_time! That would waste time and lose precision!)
815 For converting cycles to microseconds, use the frequency
816 that my_timer_init() returns. You'll also need to convert
817 if the my_timer_microseconds() function is the Windows
818 function QueryPerformanceCounter(), since that's sometimes
819 a counter with precision slightly better than microseconds.
820
821 Since we recommend calls by function pointer, we supply
822 no inline functions.
823
824 Some comments on the many candidate routines for timing ...
825
826 clock() -- We don't use because it would overflow frequently.
827
828 clock_gettime() -- In tests, clock_gettime often had
829 resolution = 1000.
830
831 gettimeofday() -- available on most platforms, though not
832 on Windows. There is a hardware timer (sometimes a Programmable
833 Interrupt Timer or "PIT") (sometimes a "HPET") used for
834 interrupt generation. When it interrupts (a "tick" or "jiffy",
835 typically 1 centisecond) it sets xtime. For gettimeofday, a
836 Linux kernel routine usually gets xtime and then gets rdtsc
837 to get elapsed nanoseconds since the last tick. On Red Hat
838 Enterprise Linux 3, there was once a bug which caused the
839 resolution to be 1000, i.e. one centisecond. We never check
840 for time-zone change.
841
842 getnstimeofday() -- something to watch for in future Linux
843
844 do_gettimeofday() -- exists on Linux but not for "userland"
845
846 get_cycles() -- a multi-platform function, worth watching
847 in future Linux versions. But we found platform-specific
848 functions which were better documented in operating-system
849 manuals. And get_cycles() can fail or return a useless
850 32-bit number. It might be available on some platforms,
851 such as arm, which we didn't test. Using
852 "include <linux/timex.h>" or "include <asm/timex.h>"
853 can lead to autoconf or compile errors, depending on system.
854
855 rdtsc, __rdtsc, rdtscll: available for x86 with Linux BSD,
856 Solaris, Windows. See "possible flaws and dangers" comments.
857
858 times(): what we use for ticks. Should just read the last
859 (xtime) tick count, therefore should be fast, but usually
860 isn't.
861
862 GetTickCount(): we use this for my_timer_ticks() on
863 Windows. Actually it really is a tick counter, so resolution
864 >= 10 milliseconds unless you have a very old Windows version.
865 With Windows 95 or 98 or ME, timeGetTime() has better resolution than
866 GetTickCount (1ms rather than 55ms). But with Windows NT or XP or 2000,
867 they're both getting from a variable in the Process Environment Block
868 (PEB), and the variable is set by the programmable interrupt timer, so
869 the resolution is the same (usually 10-15 milliseconds). Also timeGetTime
870 is slower on old machines:
871 http://www.doumo.jp/aon-java/jsp/postgretips/tips.jsp?tips=74.
872 Also timeGetTime requires linking winmm.lib,
873 Therefore we use GetTickCount.
874 It will overflow every 49 days because the return is 32-bit.
875 There is also a GetTickCount64 but it requires Vista or Windows Server 2008.
876 (As for GetSystemTimeAsFileTime, its precision is spurious, it
877 just reads the tick variable like the other functions do.
878 However, we don't expect it to overflow every 49 days, so we
879 will prefer it for my_timer_milliseconds().)
880
881 QueryPerformanceCounter() we use this for my_timer_microseconds()
882 on Windows. 1-PIT-tick (often 1/3-microsecond). Usually reads
883 the PIT so it's slow. On some Windows variants, uses RDTSC.
884
885 GetLocalTime() this is available on Windows but we don't use it.
886
887 getclock(): documented for Alpha, but not found during tests.
888
889 mach_absolute_time() and UpTime() are recommended for Apple.
890 Inititally they weren't tried, because asm_ppc seems to do the job.
891 But now we use mach_absolute_time for nanoseconds.
892
893 Any clock-based timer can be affected by NPT (ntpd program),
894 which means:
895 - full-second correction can occur for leap second
896 - tiny corrections can occcur approimately every 11 minutes
897 (but I think they only affect the RTC which isn't the PIT).
898
899 We define "precision" as "frequency" and "high precision" is
900 "frequency better than 1 microsecond". We define "resolution"
901 as a synonym for "granularity". We define "accuracy" as
902 "closeness to the truth" as established by some authoritative
903 clock, but we can't measure accuracy.
904
905 Do not expect any of our timers to be monotonic; we
906 won't guarantee that they return constantly-increasing
907 unique numbers.
908
909 We tested with AIX, Solaris (x86 + Sparc), Linux (x86 +
910 Itanium), Windows, 64-bit Windows, QNX, FreeBSD, HPUX,
911 Irix, Mac. We didn't test with SCO.
912
913 */
914
915