1 /*******************************************
2 *
3 * Interbench - Interactivity benchmark
4 *
5 * Author: Con Kolivas <kernel@kolivas.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 *******************************************/
22
23 #define _GNU_SOURCE
24 #define _FILE_OFFSET_BITS 64 /* Large file support */
25 #define INTERBENCH_VERSION "0.30"
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdarg.h>
30 #include <strings.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <sched.h>
35 #include <time.h>
36 #include <errno.h>
37 #include <semaphore.h>
38 #include <pthread.h>
39 #include <math.h>
40 #include <fenv.h>
41 #include <signal.h>
42 #include <sys/utsname.h>
43 #include <sys/time.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
46 #include <sys/mman.h>
47 #include <sys/wait.h>
48 #include <sys/stat.h>
49 #include <sys/sysctl.h>
50 #include <sys/vmmeter.h>
51 #include "interbench.h"
52
53 #define MAX_UNAME_LENGTH 100
54 #define MAX_LOG_LENGTH ((MAX_UNAME_LENGTH) + 4)
55 #define MIN_BLK_SIZE 1024
56 #define DEFAULT_RESERVE 64
57 #define MB (1024 * 1024) /* 2^20 bytes */
58 #define KB 1024
59 #define MAX_MEM_IN_MB (1024 * 64) /* 64 GB */
60
61 struct user_data {
62 unsigned long loops_per_ms;
63 unsigned long ram, swap;
64 int duration;
65 int do_rt;
66 int bench_nice;
67 int load_nice;
68 unsigned long custom_run;
69 unsigned long custom_interval;
70 unsigned long cpu_load;
71 char logfilename[MAX_LOG_LENGTH];
72 int log;
73 char unamer[MAX_UNAME_LENGTH];
74 char datestamp[13];
75 FILE *logfile;
76 } ud = {
77 .duration = 30,
78 .cpu_load = 4,
79 .log = 1,
80 };
81
82 /* Pipes main to/from load and bench processes */
83 static int m2l[2], l2m[2], m2b[2], b2m[2];
84
85 /* Which member of becnhmarks is used when not benchmarking */
86 #define NOT_BENCHING (THREADS)
87 #define CUSTOM (THREADS - 1)
88
89 /*
90 * To add another load or a benchmark you need to increment the value of
91 * THREADS, add a function prototype for your function and add an entry to
92 * the threadlist. To specify whether the function is a benchmark or a load
93 * set the benchmark and/or load flag as appropriate. The basic requirements
94 * of a new load can be seen by using emulate_none as a template.
95 */
96
97 void emulate_none(struct thread *th);
98 void emulate_audio(struct thread *th);
99 void emulate_video(struct thread *th);
100 void emulate_x(struct thread *th);
101 void emulate_game(struct thread *th);
102 void emulate_burn(struct thread *th);
103 void emulate_write(struct thread *th);
104 void emulate_read(struct thread *th);
105 void emulate_ring(struct thread *th);
106 void emulate_compile(struct thread *th);
107 void emulate_memload(struct thread *th);
108 void emulate_hackbench(struct thread *th);
109 void emulate_custom(struct thread *th);
110
111 struct thread threadlist[THREADS] = {
112 {.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
113 {.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
114 {.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
115 {.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
116 {.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
117 {.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
118 {.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
119 {.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
120 {.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0}, /* No useful data from this */
121 {.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
122 {.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
123 {.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0}, /* This is causing signal headaches */
124 {.label = "Custom", .name = emulate_custom}, /* Leave custom as last entry */
125 };
126
127 void init_sem(sem_t *sem);
128 void init_all_sems(struct sems *s);
129 void initialise_thread(int i);
130 void start_thread(struct thread *th);
131 void stop_thread(struct thread *th);
132
terminal_error(const char * name)133 void terminal_error(const char *name)
134 {
135 fprintf(stderr, "\n");
136 perror(name);
137 exit (1);
138 }
139
terminal_fileopen_error(FILE * fp,char * name)140 void terminal_fileopen_error(FILE *fp, char *name)
141 {
142 if (fclose(fp) == -1)
143 terminal_error("fclose");
144 terminal_error(name);
145 }
146
get_nsecs(struct timespec * myts)147 unsigned long long get_nsecs(struct timespec *myts)
148 {
149 if (clock_gettime(CLOCK_REALTIME, myts))
150 terminal_error("clock_gettime");
151 return (myts->tv_sec * 1000000000 + myts->tv_nsec );
152 }
153
get_usecs(struct timespec * myts)154 unsigned long get_usecs(struct timespec *myts)
155 {
156 if (clock_gettime(CLOCK_REALTIME, myts))
157 terminal_error("clock_gettime");
158 return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
159 }
160
set_fifo(int prio)161 void set_fifo(int prio)
162 {
163 struct sched_param sp;
164
165 memset(&sp, 0, sizeof(sp));
166 sp.sched_priority = prio;
167 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
168 if (errno != EPERM)
169 terminal_error("sched_setscheduler");
170 }
171 }
172
set_mlock(void)173 void set_mlock(void)
174 {
175 int mlockflags;
176
177 mlockflags = MCL_CURRENT | MCL_FUTURE;
178 #if 0
179 mlockall(mlockflags); /* Is not critical if this fails */
180 #endif
181 }
182
set_munlock(void)183 void set_munlock(void)
184 {
185 #if 0
186 if (munlockall() == -1)
187 terminal_error("munlockall");
188 #endif
189 }
190
set_thread_fifo(pthread_t pthread,int prio)191 void set_thread_fifo(pthread_t pthread, int prio)
192 {
193 struct sched_param sp;
194 memset(&sp, 0, sizeof(sp));
195 sp.sched_priority = prio;
196 if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
197 terminal_error("pthread_setschedparam");
198 }
199
set_normal(void)200 void set_normal(void)
201 {
202 struct sched_param sp;
203 memset(&sp, 0, sizeof(sp));
204 sp.sched_priority = 0;
205 if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
206 fprintf(stderr, "Weird, could not unset RT scheduling!\n");
207 }
208 }
209
set_nice(int prio)210 void set_nice(int prio)
211 {
212 if (setpriority(PRIO_PROCESS, 0, prio) == -1)
213 terminal_error("setpriority");
214 }
215
test_fifo(void)216 int test_fifo(void)
217 {
218 struct sched_param sp;
219 memset(&sp, 0, sizeof(sp));
220 sp.sched_priority = 99;
221 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
222 if (errno != EPERM)
223 terminal_error("sched_setscheduler");
224 goto out_fail;
225 }
226 if (sched_getscheduler(0) != SCHED_FIFO)
227 goto out_fail;
228 set_normal();
229 return 1;
230 out_fail:
231 set_normal();
232 return 0;
233 }
234
set_thread_normal(pthread_t pthread)235 void set_thread_normal(pthread_t pthread)
236 {
237 struct sched_param sp;
238 memset(&sp, 0, sizeof(sp));
239 sp.sched_priority = 0;
240 if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
241 terminal_error("pthread_setschedparam");
242 }
243
sync_flush(void)244 void sync_flush(void)
245 {
246 if ((fflush(NULL)) == EOF)
247 terminal_error("fflush");
248 sync();
249 sync();
250 sync();
251 }
252
compute_allocable_mem(void)253 unsigned long compute_allocable_mem(void)
254 {
255 unsigned long total = ud.ram + ud.swap;
256 unsigned long usage = ud.ram * 110 / 100 ;
257
258 /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
259 if (total - DEFAULT_RESERVE < usage)
260 usage = total - DEFAULT_RESERVE;
261 usage /= 1024; /* to megabytes */
262 if (usage > 2930)
263 usage = 2930;
264 return usage;
265 }
266
burn_loops(unsigned long loops)267 void burn_loops(unsigned long loops)
268 {
269 unsigned long i;
270
271 /*
272 * We need some magic here to prevent the compiler from optimising
273 * this loop away. Otherwise trying to emulate a fixed cpu load
274 * with this loop will not work.
275 */
276 for (i = 0 ; i < loops ; i++)
277 asm volatile("" : : : "memory");
278 }
279
280 /* Use this many usecs of cpu time */
burn_usecs(unsigned long usecs)281 void burn_usecs(unsigned long usecs)
282 {
283 unsigned long ms_loops;
284
285 ms_loops = ud.loops_per_ms / 1000 * usecs;
286 burn_loops(ms_loops);
287 }
288
microsleep(unsigned long long usecs)289 void microsleep(unsigned long long usecs)
290 {
291 struct timespec req, rem;
292
293 rem.tv_sec = rem.tv_nsec = 0;
294
295 req.tv_sec = usecs / 1000000;
296 req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
297 continue_sleep:
298 if ((nanosleep(&req, &rem)) == -1) {
299 if (errno == EINTR) {
300 if (rem.tv_sec || rem.tv_nsec) {
301 req.tv_sec = rem.tv_sec;
302 req.tv_nsec = rem.tv_nsec;
303 goto continue_sleep;
304 }
305 goto out;
306 }
307 terminal_error("nanosleep");
308 }
309 out:
310 return;
311 }
312
313 /*
314 * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
315 * handle it.
316 */
post_sem(sem_t * s)317 inline void post_sem(sem_t *s)
318 {
319 retry:
320 if ((sem_post(s)) == -1) {
321 if (errno == EINTR)
322 goto retry;
323 terminal_error("sem_post");
324 }
325 }
326
wait_sem(sem_t * s)327 inline void wait_sem(sem_t *s)
328 {
329 retry:
330 if ((sem_wait(s)) == -1) {
331 if (errno == EINTR)
332 goto retry;
333 terminal_error("sem_wait");
334 }
335 }
336
trywait_sem(sem_t * s)337 inline int trywait_sem(sem_t *s)
338 {
339 int ret;
340
341 retry:
342 if ((ret = sem_trywait(s)) == -1) {
343 if (errno == EINTR)
344 goto retry;
345 if (errno != EAGAIN)
346 terminal_error("sem_trywait");
347 }
348 return ret;
349 }
350
Read(int fd,void * buf,size_t count)351 inline ssize_t Read(int fd, void *buf, size_t count)
352 {
353 ssize_t retval;
354
355 retry:
356 retval = read(fd, buf, count);
357 if (retval == -1) {
358 if (errno == EINTR)
359 goto retry;
360 terminal_error("read");
361 }
362 return retval;
363 }
364
Write(int fd,const void * buf,size_t count)365 inline ssize_t Write(int fd, const void *buf, size_t count)
366 {
367 ssize_t retval;
368
369 retry:
370 retval = write(fd, &buf, count);
371 if (retval == -1) {
372 if (errno == EINTR)
373 goto retry;
374 terminal_error("write");
375 }
376 return retval;
377 }
378
periodic_schedule(struct thread * th,unsigned long run_usecs,unsigned long interval_usecs,unsigned long long deadline)379 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
380 unsigned long interval_usecs, unsigned long long deadline)
381 {
382 unsigned long long latency, missed_latency;
383 unsigned long long current_time;
384 struct tk_thread *tk;
385 struct data_table *tb;
386 struct timespec myts;
387
388 latency = 0;
389 tb = th->dt;
390 tk = &th->tkthread;
391
392 current_time = get_usecs(&myts);
393 if (current_time > deadline + tk->slept_interval)
394 latency = current_time - deadline- tk->slept_interval;
395
396 /* calculate the latency for missed frames */
397 missed_latency = 0;
398
399 current_time = get_usecs(&myts);
400 if (interval_usecs && current_time > deadline + interval_usecs) {
401 /* We missed the deadline even before we consumed cpu */
402 unsigned long intervals;
403
404 deadline += interval_usecs;
405 intervals = (current_time - deadline) /
406 interval_usecs + 1;
407
408 tb->missed_deadlines += intervals;
409 missed_latency = intervals * interval_usecs;
410 deadline += intervals * interval_usecs;
411 tb->missed_burns += intervals;
412 goto bypass_burn;
413 }
414
415 burn_usecs(run_usecs);
416 current_time = get_usecs(&myts);
417 tb->achieved_burns++;
418
419 /*
420 * If we meet the deadline we move the deadline forward, otherwise
421 * we consider it a missed deadline and dropped frame etc.
422 */
423 deadline += interval_usecs;
424 if (deadline >= current_time) {
425 tb->deadlines_met++;
426 } else {
427 if (interval_usecs) {
428 unsigned long intervals = (current_time - deadline) /
429 interval_usecs + 1;
430
431 tb->missed_deadlines += intervals;
432 missed_latency = intervals * interval_usecs;
433 deadline += intervals * interval_usecs;
434 if (intervals > 1)
435 tb->missed_burns += intervals;
436 } else {
437 deadline = current_time;
438 goto out_nosleep;
439 }
440 }
441 bypass_burn:
442 tk->sleep_interval = deadline - current_time;
443
444 post_sem(&tk->sem.start);
445 wait_sem(&tk->sem.complete);
446 out_nosleep:
447 /*
448 * Must add missed_latency to total here as this function may not be
449 * called again and the missed latency can be lost
450 */
451 latency += missed_latency;
452 if (latency > tb->max_latency)
453 tb->max_latency = latency;
454 tb->total_latency += latency;
455 tb->sum_latency_squared += latency * latency;
456 tb->nr_samples++;
457
458 return deadline;
459 }
460
initialise_thread_data(struct data_table * tb)461 void initialise_thread_data(struct data_table *tb)
462 {
463 tb->max_latency =
464 tb->total_latency =
465 tb->sum_latency_squared =
466 tb->deadlines_met =
467 tb->missed_deadlines =
468 tb->missed_burns =
469 tb->nr_samples = 0;
470 }
471
create_pthread(pthread_t * thread,pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)472 void create_pthread(pthread_t * thread, pthread_attr_t * attr,
473 void * (*start_routine)(void *), void *arg)
474 {
475 if (pthread_create(thread, attr, start_routine, arg))
476 terminal_error("pthread_create");
477 }
478
join_pthread(pthread_t th,void ** thread_return)479 void join_pthread(pthread_t th, void **thread_return)
480 {
481 if (pthread_join(th, thread_return))
482 terminal_error("pthread_join");
483 }
484
emulate_none(struct thread * th)485 void emulate_none(struct thread *th)
486 {
487 sem_t *s = &th->sem.stop;
488 wait_sem(s);
489 }
490
491 #define AUDIO_INTERVAL (50000)
492 #define AUDIO_RUN (AUDIO_INTERVAL / 20)
493 /* We emulate audio by using 5% cpu and waking every 50ms */
emulate_audio(struct thread * th)494 void emulate_audio(struct thread *th)
495 {
496 unsigned long long deadline;
497 sem_t *s = &th->sem.stop;
498 struct timespec myts;
499
500 th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
501 deadline = get_usecs(&myts);
502
503 while (1) {
504 deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
505 deadline);
506 if (!trywait_sem(s))
507 return;
508 }
509 }
510
511 /* We emulate video by using 40% cpu and waking for 60fps */
512 #define VIDEO_INTERVAL (1000000 / 60)
513 #define VIDEO_RUN (VIDEO_INTERVAL * 40 / 100)
emulate_video(struct thread * th)514 void emulate_video(struct thread *th)
515 {
516 unsigned long long deadline;
517 sem_t *s = &th->sem.stop;
518 struct timespec myts;
519
520 th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
521 deadline = get_usecs(&myts);
522
523 while (1) {
524 deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
525 deadline);
526 if (!trywait_sem(s))
527 return;
528 }
529 }
530
531 /*
532 * We emulate X by running for a variable percentage of cpu from 0-100%
533 * in 1ms chunks.
534 */
emulate_x(struct thread * th)535 void emulate_x(struct thread *th)
536 {
537 unsigned long long deadline;
538 sem_t *s = &th->sem.stop;
539 struct timespec myts;
540
541 th->decasecond_deadlines = 100;
542 deadline = get_usecs(&myts);
543
544 while (1) {
545 int i, j;
546 for (i = 0 ; i <= 100 ; i++) {
547 j = 100 - i;
548 deadline = periodic_schedule(th, i * 1000, j * 1000,
549 deadline);
550 deadline += i * 1000;
551 if (!trywait_sem(s))
552 return;
553 }
554 }
555 }
556
557 /*
558 * We emulate gaming by using 100% cpu and seeing how many frames (jobs
559 * completed) we can do in that time. Deadlines are meaningless with
560 * unlocked frame rates. We do not use periodic schedule because for
561 * this load because this never wants to sleep.
562 */
563 #define GAME_INTERVAL (100000)
564 #define GAME_RUN (GAME_INTERVAL)
emulate_game(struct thread * th)565 void emulate_game(struct thread *th)
566 {
567 unsigned long long deadline, current_time, latency;
568 sem_t *s = &th->sem.stop;
569 struct timespec myts;
570 struct data_table *tb;
571
572 tb = th->dt;
573 th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
574
575 while (1) {
576 deadline = get_usecs(&myts) + GAME_INTERVAL;
577 burn_usecs(GAME_RUN);
578 current_time = get_usecs(&myts);
579 /* use usecs instead of simple count for game burn statistics */
580 tb->achieved_burns += GAME_RUN;
581 if (current_time > deadline) {
582 latency = current_time - deadline;
583 tb->missed_burns += latency;
584 } else
585 latency = 0;
586 if (latency > tb->max_latency)
587 tb->max_latency = latency;
588 tb->total_latency += latency;
589 tb->sum_latency_squared += latency * latency;
590 tb->nr_samples++;
591 if (!trywait_sem(s))
592 return;
593 }
594 }
595
burn_thread(void * t)596 void *burn_thread(void *t)
597 {
598 struct thread *th;
599 sem_t *s;
600 long i = (long)t;
601
602 th = &threadlist[i];
603 s = &th->sem.stopchild;
604
605 while (1) {
606 burn_loops(ud.loops_per_ms);
607 if (!trywait_sem(s)) {
608 post_sem(s);
609 break;
610 }
611 }
612 return NULL;
613 }
614
615 /* Have ud.cpu_load threads burn cpu continuously */
emulate_burn(struct thread * th)616 void emulate_burn(struct thread *th)
617 {
618 sem_t *s = &th->sem.stop;
619 unsigned long i;
620 long t;
621 pthread_t burnthreads[ud.cpu_load];
622
623 t = th->threadno;
624 for (i = 0 ; i < ud.cpu_load ; i++)
625 create_pthread(&burnthreads[i], NULL, burn_thread,
626 (void*)(long) t);
627 wait_sem(s);
628 post_sem(&th->sem.stopchild);
629 for (i = 0 ; i < ud.cpu_load ; i++)
630 join_pthread(burnthreads[i], NULL);
631 }
632
633 /* Write a file the size of ram continuously */
emulate_write(struct thread * th)634 void emulate_write(struct thread *th)
635 {
636 sem_t *s = &th->sem.stop;
637 FILE *fp;
638 char *name = "interbench.write";
639 void *buf = NULL;
640 struct stat statbuf;
641 unsigned long mem;
642
643 if (!(fp = fopen(name, "w")))
644 terminal_error("fopen");
645 if (stat(name, &statbuf) == -1)
646 terminal_fileopen_error(fp, "stat");
647 if (statbuf.st_blksize < MIN_BLK_SIZE)
648 statbuf.st_blksize = MIN_BLK_SIZE;
649 mem = ud.ram / (statbuf.st_blksize / 1024); /* kilobytes to blocks */
650 if (!(buf = calloc(1, statbuf.st_blksize)))
651 terminal_fileopen_error(fp, "calloc");
652 if (fclose(fp) == -1)
653 terminal_error("fclose");
654
655 while (1) {
656 unsigned int i;
657
658 if (!(fp = fopen(name, "w")))
659 terminal_error("fopen");
660 if (stat(name, &statbuf) == -1)
661 terminal_fileopen_error(fp, "stat");
662 for (i = 0 ; i < mem; i++) {
663 if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
664 terminal_fileopen_error(fp, "fwrite");
665 if (!trywait_sem(s))
666 goto out;
667 }
668 if (fclose(fp) == -1)
669 terminal_error("fclose");
670 }
671
672 out:
673 if (fclose(fp) == -1)
674 terminal_error("fclose");
675 if (remove(name) == -1)
676 terminal_error("remove");
677 sync_flush();
678 }
679
680 /* Read a file the size of ram continuously */
emulate_read(struct thread * th)681 void emulate_read(struct thread *th)
682 {
683 sem_t *s = &th->sem.stop;
684 char *name = "interbench.read";
685 void *buf = NULL;
686 struct stat statbuf;
687 unsigned long bsize;
688 int tmp;
689
690 if ((tmp = open(name, O_RDONLY)) == -1)
691 terminal_error("open");
692 if (stat(name, &statbuf) == -1)
693 terminal_error("stat");
694 bsize = statbuf.st_blksize;
695 if (!(buf = malloc(bsize)))
696 terminal_error("malloc");
697
698 while (1) {
699 int rd;
700
701 /*
702 * We have to read the whole file before quitting the load
703 * to prevent the data being cached for the next read. This
704 * is also the reason the file is the size of physical ram.
705 */
706 while ((rd = Read(tmp , buf, bsize)) > 0);
707 if(!trywait_sem(s))
708 return;
709 if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
710 terminal_error("lseek");
711 }
712 }
713
714 #define RINGTHREADS 4
715
716 struct thread ringthreads[RINGTHREADS];
717
ring_thread(void * t)718 void *ring_thread(void *t)
719 {
720 struct thread *th;
721 struct sems *s;
722 int i, post_to;
723
724 i = (long)t;
725 th = &ringthreads[i];
726 s = &th->sem;
727 post_to = i + 1;
728 if (post_to == RINGTHREADS)
729 post_to = 0;
730 if (i == 0)
731 post_sem(&s->ready);
732
733 while (1) {
734 wait_sem(&s->start);
735 post_sem(&ringthreads[post_to].sem.start);
736 if (!trywait_sem(&s->stop))
737 goto out;
738 }
739 out:
740 post_sem(&ringthreads[post_to].sem.start);
741 post_sem(&s->complete);
742 return NULL;
743 }
744
745 /* Create a ring of 4 processes that wake each other up in a circle */
emulate_ring(struct thread * th)746 void emulate_ring(struct thread *th)
747 {
748 sem_t *s = &th->sem.stop;
749 int i;
750
751 for (i = 0 ; i < RINGTHREADS ; i++) {
752 init_all_sems(&ringthreads[i].sem);
753 create_pthread(&ringthreads[i].pthread, NULL,
754 ring_thread, (void*)(long) i);
755 }
756
757 wait_sem(&ringthreads[0].sem.ready);
758 post_sem(&ringthreads[0].sem.start);
759 wait_sem(s);
760 for (i = 0 ; i < RINGTHREADS ; i++)
761 post_sem(&ringthreads[i].sem.stop);
762 for (i = 0 ; i < RINGTHREADS ; i++) {
763 wait_sem(&ringthreads[i].sem.complete);
764 join_pthread(ringthreads[i].pthread, NULL);
765 }
766 }
767
768 /* We emulate a compile by running burn, write and read threads simultaneously */
emulate_compile(struct thread * th)769 void emulate_compile(struct thread *th)
770 {
771 sem_t *s = &th->sem.stop;
772 unsigned long i, threads[3];
773
774 bzero(threads, 3 * sizeof(threads[0]));
775
776 for (i = 0 ; i < THREADS ; i++) {
777 if (strcmp(threadlist[i].label, "Burn") == 0)
778 threads[0] = i;
779 if (strcmp(threadlist[i].label, "Write") == 0)
780 threads[1] = i;
781 if (strcmp(threadlist[i].label, "Read") == 0)
782 threads[2] = i;
783 }
784 for (i = 0 ; i < 3 ; i++) {
785 if (!threads[i]) {
786 fprintf(stderr, "Can't find all threads for compile load\n");
787 exit(1);
788 }
789 }
790 for (i = 0 ; i < 3 ; i++) {
791 initialise_thread(threads[i]);
792 start_thread(&threadlist[threads[i]]);
793 }
794 wait_sem(s);
795 for (i = 0 ; i < 3 ; i++)
796 stop_thread(&threadlist[threads[i]]);
797 }
798
grab_and_touch(char * block[],int i)799 int *grab_and_touch (char *block[], int i)
800 {
801 block[i] = (char *) malloc(MB);
802 if (!block[i])
803 return NULL;
804 return (memset(block[i], 1, MB));
805 }
806
807 /* We emulate a memory load by allocating and torturing 110% of available ram */
emulate_memload(struct thread * th)808 void emulate_memload(struct thread *th)
809 {
810 sem_t *s = &th->sem.stop;
811 unsigned long touchable_mem, i;
812 char *mem_block[MAX_MEM_IN_MB];
813 void *success;
814
815 touchable_mem = compute_allocable_mem();
816 /* loop until we're killed, frobbing memory in various perverted ways */
817 while (1) {
818 for (i = 0; i < touchable_mem; i++) {
819 success = grab_and_touch(mem_block, i);
820 if (!success) {
821 touchable_mem = i-1;
822 break;
823 }
824 }
825 if (!trywait_sem(s))
826 goto out_freemem;
827 for (i = 0; i < touchable_mem; i++) {
828 memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
829 touchable_mem], MB);
830 if (!trywait_sem(s))
831 goto out_freemem;
832 }
833 for (i = 0; i < touchable_mem; i++) {
834 free(mem_block[i]);
835 }
836 if (!trywait_sem(s))
837 goto out;
838 }
839 out_freemem:
840 for (i = 0; i < touchable_mem; i++)
841 free(mem_block[i]);
842 out:
843 return;
844 }
845
846 struct thread hackthread;
847
emulate_hackbench(struct thread * th)848 void emulate_hackbench(struct thread *th)
849 {
850 sem_t *s = &th->sem.stop;
851
852 init_all_sems(&hackthread.sem);
853 create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
854
855 wait_sem(s);
856
857 post_sem(&hackthread.sem.stop);
858 wait_sem(&hackthread.sem.complete);
859
860 join_pthread(hackthread.pthread, NULL);
861 }
862
863 #define CUSTOM_INTERVAL (ud.custom_interval)
864 #define CUSTOM_RUN (ud.custom_run)
emulate_custom(struct thread * th)865 void emulate_custom(struct thread *th)
866 {
867 unsigned long long deadline;
868 sem_t *s = &th->sem.stop;
869 struct timespec myts;
870
871 th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
872 deadline = get_usecs(&myts);
873
874 while (1) {
875 deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
876 deadline);
877 if (!trywait_sem(s))
878 return;
879 }
880 }
881
timekeeping_thread(void * t)882 void *timekeeping_thread(void *t)
883 {
884 struct thread *th;
885 struct tk_thread *tk;
886 struct sems *s;
887 struct timespec myts;
888 long i = (long)t;
889
890 th = &threadlist[i];
891 tk = &th->tkthread;
892 s = &th->tkthread.sem;
893 /*
894 * If this timekeeping thread is that of a benchmarked thread we run
895 * even higher priority than the benched thread is if running real
896 * time. Otherwise, the load timekeeping thread, which does not need
897 * accurate accounting remains SCHED_NORMAL;
898 */
899 if (th->dt != &th->benchmarks[NOT_BENCHING])
900 set_fifo(96);
901 /* These values must be changed at the appropriate places or race */
902 tk->sleep_interval = tk->slept_interval = 0;
903 post_sem(&s->ready);
904
905 while (1) {
906 unsigned long start_time, now;
907
908 if (!trywait_sem(&s->stop))
909 goto out;
910 wait_sem(&s->start);
911 tk->slept_interval = 0;
912 start_time = get_usecs(&myts);
913 if (!trywait_sem(&s->stop))
914 goto out;
915 if (tk->sleep_interval) {
916 unsigned long diff = 0;
917 microsleep(tk->sleep_interval);
918 now = get_usecs(&myts);
919 /* now should always be > start_time but... */
920 if (now > start_time) {
921 diff = now - start_time;
922 if (diff > tk->sleep_interval)
923 tk->slept_interval = diff -
924 tk->sleep_interval;
925 }
926 }
927 tk->sleep_interval = 0;
928 post_sem(&s->complete);
929 }
930 out:
931 return NULL;
932 }
933
934 /*
935 * All the sleep functions such as nanosleep can only guarantee that they
936 * sleep for _at least_ the time requested. We work around this by having
937 * a high priority real time thread that accounts for the extra time slept
938 * in nanosleep. This allows wakeup latency of the tested thread to be
939 * accurate and reflect true scheduling delays.
940 */
emulation_thread(void * t)941 void *emulation_thread(void *t)
942 {
943 struct thread *th;
944 struct tk_thread *tk;
945 struct sems *s, *tks;
946 long i = (long)t;
947
948 th = &threadlist[i];
949 tk = &th->tkthread;
950 s = &th->sem;
951 tks = &tk->sem;
952 init_all_sems(tks);
953
954 /* Start the timekeeping thread */
955 create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
956 (void*)(long) i);
957 /* Wait for timekeeping thread to be ready */
958 wait_sem(&tks->ready);
959
960 /* Tell main we're ready to start*/
961 post_sem(&s->ready);
962
963 /* Wait for signal from main to start thread */
964 wait_sem(&s->start);
965
966 /* Start the actual function being benched/or running as load */
967 th->name(th);
968
969 /* Stop the timekeeping thread */
970 post_sem(&tks->stop);
971 post_sem(&tks->start);
972 join_pthread(th->tk_pthread, NULL);
973
974 /* Tell main we've finished */
975 post_sem(&s->complete);
976 return NULL;
977 }
978
979 /*
980 * In an unoptimised loop we try to benchmark how many meaningless loops
981 * per second we can perform on this hardware to fairly accurately
982 * reproduce certain percentage cpu usage
983 */
calibrate_loop(void)984 void calibrate_loop(void)
985 {
986 unsigned long long start_time, loops_per_msec, run_time = 0;
987 unsigned long loops;
988 struct timespec myts;
989
990 loops_per_msec = 100000;
991 redo:
992 /* Calibrate to within 1% accuracy */
993 while (run_time > 1010000 || run_time < 990000) {
994 loops = loops_per_msec;
995 start_time = get_nsecs(&myts);
996 burn_loops(loops);
997 run_time = get_nsecs(&myts) - start_time;
998 loops_per_msec = (1000000 * loops_per_msec / run_time ? :
999 loops_per_msec);
1000 }
1001
1002 /* Rechecking after a pause increases reproducibility */
1003 sleep(1);
1004 loops = loops_per_msec;
1005 start_time = get_nsecs(&myts);
1006 burn_loops(loops);
1007 run_time = get_nsecs(&myts) - start_time;
1008
1009 /* Tolerate 5% difference on checking */
1010 if (run_time > 1050000 || run_time < 950000)
1011 goto redo;
1012
1013 ud.loops_per_ms = loops_per_msec;
1014 }
1015
1016 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1017
1018 /* Output to console +/- logfile */
log_output(const char * format,...)1019 void log_output(const char *format, ...)
1020 {
1021 va_list ap;
1022
1023 va_start(ap, format);
1024 if (vprintf(format, ap) == -1)
1025 terminal_error("vprintf");
1026 va_end(ap);
1027 if (ud.log) {
1028 va_start(ap, format);
1029 if (vfprintf(ud.logfile, format, ap) == -1)
1030 terminal_error("vpfrintf");
1031 va_end(ap);
1032 }
1033 fflush(NULL);
1034 }
1035
1036 /* Calculate statistics and output them */
show_latencies(struct thread * th)1037 void show_latencies(struct thread *th)
1038 {
1039 struct data_table *tbj;
1040 struct tk_thread *tk;
1041 double average_latency, deadlines_met, samples_met, sd, max_latency;
1042 long double variance = 0;
1043
1044 tbj = th->dt;
1045 tk = &th->tkthread;
1046
1047 if (tbj->nr_samples > 1) {
1048 average_latency = tbj->total_latency / tbj->nr_samples;
1049 variance = (tbj->sum_latency_squared - (average_latency *
1050 average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1051 sd = sqrt((double)variance);
1052 } else {
1053 average_latency = tbj->total_latency;
1054 sd = 0.0;
1055 }
1056
1057 /*
1058 * Landing on the boundary of a deadline can make loaded runs appear
1059 * to do more work than unloaded due to tiny duration differences.
1060 */
1061 if (tbj->achieved_burns > 0)
1062 samples_met = (double)tbj->achieved_burns /
1063 (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1064 else
1065 samples_met = 0.0;
1066 max_latency = tbj->max_latency;
1067 /* When benchmarking rt we represent the data in us */
1068 if (!ud.do_rt) {
1069 average_latency /= 1000;
1070 sd /= 1000;
1071 max_latency /= 1000;
1072 }
1073 if (tbj->deadlines_met == 0)
1074 deadlines_met = 0;
1075 else
1076 deadlines_met = (double)tbj->deadlines_met /
1077 (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1078
1079 /* Messy nonsense to format the output nicely */
1080 if (average_latency >= 100)
1081 log_output("%7.0f +/- ", average_latency);
1082 else
1083 log_output("%7.3g +/- ", average_latency);
1084 if (sd >= 100)
1085 log_output("%-9.0f", sd);
1086 else
1087 log_output("%-9.3g", sd);
1088 if (max_latency >= 100)
1089 log_output("%7.0f\t", max_latency);
1090 else
1091 log_output("%7.3g\t", max_latency);
1092 log_output("\t%4.3g", samples_met);
1093 if (!th->nodeadlines)
1094 log_output("\t%11.3g", deadlines_met);
1095 log_output("\n");
1096 sync_flush();
1097 }
1098
create_read_file(void)1099 void create_read_file(void)
1100 {
1101 unsigned int i;
1102 FILE *fp;
1103 char *name = "interbench.read";
1104 void *buf = NULL;
1105 struct stat statbuf;
1106 unsigned long mem, bsize;
1107 int tmp;
1108
1109 if ((tmp = open(name, O_RDONLY)) == -1) {
1110 if (errno != ENOENT)
1111 terminal_error("open");
1112 goto write;
1113 }
1114 if (stat(name, &statbuf) == -1)
1115 terminal_error("stat");
1116 if (statbuf.st_blksize < MIN_BLK_SIZE)
1117 statbuf.st_blksize = MIN_BLK_SIZE;
1118 bsize = statbuf.st_blksize;
1119 if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1120 return;
1121 if (remove(name) == -1)
1122 terminal_error("remove");
1123 write:
1124 fprintf(stderr,"Creating file for read load...\n");
1125 if (!(fp = fopen(name, "w")))
1126 terminal_error("fopen");
1127 if (stat(name, &statbuf) == -1)
1128 terminal_fileopen_error(fp, "stat");
1129 if (statbuf.st_blksize < MIN_BLK_SIZE)
1130 statbuf.st_blksize = MIN_BLK_SIZE;
1131 bsize = statbuf.st_blksize;
1132 if (!(buf = calloc(1, bsize)))
1133 terminal_fileopen_error(fp, "calloc");
1134 mem = ud.ram / (bsize / 1024); /* kilobytes to blocks */
1135
1136 for (i = 0 ; i < mem; i++) {
1137 if (fwrite(buf, bsize, 1, fp) != 1)
1138 terminal_fileopen_error(fp, "fwrite");
1139 }
1140 if (fclose(fp) == -1)
1141 terminal_error("fclose");
1142 sync_flush();
1143 }
1144
get_ram(void)1145 void get_ram(void)
1146 {
1147 struct vmstats vms;
1148 size_t vms_size = sizeof(vms);
1149
1150 if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0))
1151 terminal_error("sysctlbyname: vm.vmstats");
1152
1153 ud.ram = vms.v_page_count * vms.v_page_size;
1154 ud.ram /= 1024; /* linux size is in kB */
1155 ud.swap = ud.ram; /* XXX: swap doesn't have to be the same as RAM */
1156
1157 if( !ud.ram || !ud.swap ) {
1158 unsigned long i;
1159 fprintf(stderr, "\nCould not get memory or swap size. ");
1160 fprintf(stderr, "Will not perform mem_load\n");
1161 for (i = 0 ; i < THREADS ; i++) {
1162 if (strcmp(threadlist[i].label, "Memload") == 0) {
1163 threadlist[i].load = 0;
1164 threadlist[i].rtload = 0;
1165 }
1166 }
1167 }
1168 }
1169
get_logfilename(void)1170 void get_logfilename(void)
1171 {
1172 struct tm *mytm;
1173 struct utsname buf;
1174 time_t t;
1175 int year, month, day, hours, minutes;
1176
1177 time(&t);
1178 if (uname(&buf) == -1)
1179 terminal_error("uname");
1180 if (!(mytm = localtime(&t)))
1181 terminal_error("localtime");
1182 year = mytm->tm_year + 1900;
1183 month = mytm->tm_mon + 1;
1184 day = mytm->tm_mday;
1185 hours = mytm->tm_hour;
1186 minutes = mytm->tm_min;
1187 strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1188
1189 sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1190 year, month, day, hours, minutes);
1191 snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1192 }
1193
start_thread(struct thread * th)1194 void start_thread(struct thread *th)
1195 {
1196 post_sem(&th->sem.start);
1197 }
1198
stop_thread(struct thread * th)1199 void stop_thread(struct thread *th)
1200 {
1201 post_sem(&th->sem.stop);
1202 wait_sem(&th->sem.complete);
1203
1204 /* Kill the thread */
1205 join_pthread(th->pthread, NULL);
1206 }
1207
init_sem(sem_t * sem)1208 void init_sem(sem_t *sem)
1209 {
1210 if (sem_init(sem, 0, 0))
1211 terminal_error("sem_init");
1212 }
1213
init_all_sems(struct sems * s)1214 void init_all_sems(struct sems *s)
1215 {
1216 /* Initialise the semaphores */
1217 init_sem(&s->ready);
1218 init_sem(&s->start);
1219 init_sem(&s->stop);
1220 init_sem(&s->complete);
1221 init_sem(&s->stopchild);
1222 }
1223
initialise_thread(int i)1224 void initialise_thread(int i)
1225 {
1226 struct thread *th = &threadlist[i];
1227
1228 init_all_sems(&th->sem);
1229 /* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1230 create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1231
1232 wait_sem(&th->sem.ready);
1233 /*
1234 * We set this pointer generically to NOT_BENCHING and set it to the
1235 * benchmarked array entry only on benched threads.
1236 */
1237 th->dt = &th->benchmarks[NOT_BENCHING];
1238 initialise_thread_data(th->dt);
1239
1240 }
1241
1242 /* A pseudo-semaphore for processes using a pipe */
wait_on(int pype)1243 void wait_on(int pype)
1244 {
1245 int retval, buf = 0;
1246
1247 retval = Read(pype, &buf, sizeof(buf));
1248 if (retval == 0) {
1249 fprintf(stderr, "\nread returned 0\n");
1250 exit (1);
1251 }
1252 }
1253
wakeup_with(int pype)1254 void wakeup_with(int pype)
1255 {
1256 int retval, buf = 1;
1257
1258 retval = Write(pype, &buf, sizeof(buf));
1259 if (retval == 0) {
1260 fprintf(stderr, "\nwrite returned 0\n");
1261 exit (1);
1262 }
1263 }
1264
run_loadchild(int j)1265 void run_loadchild(int j)
1266 {
1267 struct thread *thj;
1268 thj = &threadlist[j];
1269
1270 set_nice(ud.load_nice);
1271 initialise_thread(j);
1272
1273 /* Tell main we're ready */
1274 wakeup_with(l2m[1]);
1275
1276 /* Main tells us we're ready */
1277 wait_on(m2l[0]);
1278 start_thread(thj);
1279
1280 /* Tell main we received the start and are running */
1281 wakeup_with(l2m[1]);
1282
1283 /* Main tells us to stop */
1284 wait_on(m2l[0]);
1285 stop_thread(thj);
1286
1287 /* Tell main we've finished */
1288 wakeup_with(l2m[1]);
1289 exit (0);
1290 }
1291
run_benchchild(int i,int j)1292 void run_benchchild(int i, int j)
1293 {
1294 struct thread *thi;
1295
1296 thi = &threadlist[i];
1297
1298 set_nice(ud.bench_nice);
1299 if (ud.do_rt)
1300 set_mlock();
1301 initialise_thread(i);
1302 /* Point the data table to the appropriate load being tested */
1303 thi->dt = &thi->benchmarks[j];
1304 initialise_thread_data(thi->dt);
1305 if (ud.do_rt)
1306 set_thread_fifo(thi->pthread, 95);
1307
1308 /* Tell main we're ready */
1309 wakeup_with(b2m[1]);
1310
1311 /* Main tells us we're ready */
1312 wait_on(m2b[0]);
1313 start_thread(thi);
1314
1315 /* Tell main we have started */
1316 wakeup_with(b2m[1]);
1317
1318 /* Main tells us to stop */
1319 wait_on(m2b[0]);
1320 stop_thread(thi);
1321
1322 if (ud.do_rt) {
1323 set_thread_normal(thi->pthread);
1324 set_munlock();
1325 }
1326 show_latencies(thi);
1327
1328 /* Tell main we've finished */
1329 wakeup_with(b2m[1]);
1330 exit(0);
1331 }
1332
bench(int i,int j)1333 void bench(int i, int j)
1334 {
1335 pid_t bench_pid, load_pid;
1336
1337 if ((load_pid = fork()) == -1)
1338 terminal_error("fork");
1339 if (!load_pid)
1340 run_loadchild(j);
1341
1342 /* Wait for load process to be ready */
1343
1344 wait_on(l2m[0]);
1345 if ((bench_pid = fork()) == -1)
1346 terminal_error("fork");
1347 if (!bench_pid)
1348 run_benchchild(i, j);
1349
1350 /* Wait for bench process to be ready */
1351 wait_on(b2m[0]);
1352
1353 /*
1354 * We want to be higher priority than everything to signal them to
1355 * stop and we lock our memory if we can as well
1356 */
1357 set_fifo(99);
1358 set_mlock();
1359
1360 /* Wakeup the load process */
1361 wakeup_with(m2l[1]);
1362 /* Load tells it has received the first message and is running */
1363 wait_on(l2m[0]);
1364
1365 /* After a small delay, wake up the benched process */
1366 sleep(1);
1367 wakeup_with(m2b[1]);
1368
1369 /* Bench tells it has received the first message and is running */
1370 wait_on(b2m[0]);
1371 microsleep(ud.duration * 1000000);
1372
1373 /* Tell the benched process to stop its threads and output results */
1374 wakeup_with(m2b[1]);
1375
1376 /* Tell the load process to stop its threads */
1377 wakeup_with(m2l[1]);
1378
1379 /* Return to SCHED_NORMAL */
1380 set_normal();
1381 set_munlock();
1382
1383 /* Wait for load and bench processes to terminate */
1384 wait_on(l2m[0]);
1385 wait_on(b2m[0]);
1386 }
1387
init_pipe(int * pype)1388 void init_pipe(int *pype)
1389 {
1390 if (pipe(pype) == -1)
1391 terminal_error("pipe");
1392 }
1393
init_pipes(void)1394 void init_pipes(void)
1395 {
1396 init_pipe(m2l);
1397 init_pipe(l2m);
1398 init_pipe(m2b);
1399 init_pipe(b2m);
1400 }
1401
usage(void)1402 void usage(void)
1403 {
1404 /* Affinity commented out till working on all architectures */
1405 fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1406 fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1407 fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1408 fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1409 fprintf(stderr, "\t[-h]\n\n");
1410 fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1411 fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1412 fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1413 fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1414 fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1415 //fprintf(stderr, " -u\tImitate uniprocessor\n");
1416 fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1417 fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1418 fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1419 fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1420 fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1421 fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1422 fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1423 fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1424 fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1425 fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1426 fprintf(stderr, " -h\tShow this help\n");
1427 fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1428 }
1429
1430 #ifdef DEBUG
deadchild(int crap)1431 void deadchild(int crap)
1432 {
1433 pid_t retval;
1434 int status;
1435
1436 crap = 0;
1437
1438 if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1439 if (errno == ECHILD)
1440 return;
1441 terminal_error("waitpid");
1442 }
1443 if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1444 return;
1445 fprintf(stderr, "\nChild terminated abnormally ");
1446 if (WIFSIGNALED(status))
1447 fprintf(stderr, "with signal %d", WTERMSIG(status));
1448 fprintf(stderr, "\n");
1449 exit (1);
1450 }
1451 #endif
1452
load_index(const char * loadname)1453 int load_index(const char* loadname)
1454 {
1455 int i;
1456
1457 for (i = 0 ; i < THREADS ; i++)
1458 if (strcasecmp(loadname, threadlist[i].label) == 0)
1459 return i;
1460 return -1;
1461 }
1462
bit_is_on(const unsigned int mask,int index)1463 inline int bit_is_on(const unsigned int mask, int index)
1464 {
1465 return (mask & (1 << index)) != 0;
1466 }
1467
set_bit_on(unsigned int * mask,int index)1468 inline void set_bit_on(unsigned int *mask, int index)
1469 {
1470 *mask |= (1 << index);
1471 }
1472
main(int argc,char ** argv)1473 int main(int argc, char **argv)
1474 {
1475 unsigned long custom_cpu = 0;
1476 int q, i, j, affinity, benchmark = 0;
1477 unsigned int selected_loads = 0;
1478 unsigned int excluded_loads = 0;
1479 unsigned int selected_benches = 0;
1480 unsigned int excluded_benches = 0;
1481 FILE *fp;
1482 /*
1483 * This file stores the loops_per_ms to be reused in a filename that
1484 * can't be confused
1485 */
1486 char *fname = "interbench.loops_per_ms";
1487 char *comment = NULL;
1488 #ifdef DEBUG
1489 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1490 if (signal(SIGCHLD, deadchild) == SIG_ERR)
1491 terminal_error("signal");
1492 #endif
1493
1494 while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1495 switch (q) {
1496 case 'h':
1497 usage();
1498 return (0);
1499 case 'l':
1500 ud.loops_per_ms = atoi(optarg);
1501 break;
1502 case 't':
1503 ud.duration = atoi(optarg);
1504 break;
1505 case 'L':
1506 ud.cpu_load = atoi(optarg);
1507 break;
1508 case 'B':
1509 ud.bench_nice = atoi(optarg);
1510 break;
1511 case 'N':
1512 ud.load_nice = atoi(optarg);
1513 break;
1514 case 'u':
1515 affinity = 1;
1516 break;
1517 case 'b':
1518 benchmark = 1;
1519 break;
1520 case 'c':
1521 ud.log = 0;
1522 break;
1523 case 'r':
1524 ud.do_rt = 1;
1525 break;
1526 case 'C':
1527 custom_cpu = (unsigned long)atol(optarg);
1528 break;
1529 case 'I':
1530 ud.custom_interval = atol(optarg);
1531 break;
1532 case 'm':
1533 comment = optarg;
1534 break;
1535 case 'w':
1536 i = load_index(optarg);
1537 if (i == -1) {
1538 fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1539 return (-2);
1540 }
1541 set_bit_on(&selected_loads, i);
1542 break;
1543 case 'x':
1544 i = load_index(optarg);
1545 if (i == -1) {
1546 fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1547 return (-2);
1548 }
1549 set_bit_on(&excluded_loads, i);
1550 break;
1551 case 'W':
1552 i = load_index(optarg);
1553 if (i == -1) {
1554 fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1555 return (-2);
1556 }
1557 set_bit_on(&selected_benches, i);
1558 break;
1559 case 'X':
1560 i = load_index(optarg);
1561 if (i == -1) {
1562 fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1563 return (-2);
1564 }
1565 set_bit_on(&excluded_benches, i);
1566 break;
1567 default:
1568 usage();
1569 return (1);
1570 }
1571 }
1572 argc -= optind;
1573 argv += optind;
1574 /* default is all loads */
1575 if (selected_loads == 0)
1576 selected_loads = (unsigned int)-1;
1577 selected_loads &= ~excluded_loads;
1578 /* default is all benches */
1579 if (selected_benches == 0)
1580 selected_benches = (unsigned int)-1;
1581 selected_benches &= ~excluded_benches;
1582
1583 if (!test_fifo()) {
1584 fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1585 fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1586 if (ud.do_rt) {
1587 fprintf(stderr, "Real time tests were requested, aborting.\n");
1588 exit (1);
1589 }
1590 fprintf(stderr, "Results will be unreliable.\n");
1591 }
1592 if (!ud.cpu_load) {
1593 fprintf(stderr, "Invalid cpu load\n");
1594 exit (1);
1595 }
1596
1597 if ((custom_cpu && !ud.custom_interval) ||
1598 (ud.custom_interval && !custom_cpu) ||
1599 custom_cpu > 100) {
1600 fprintf(stderr, "Invalid custom values, aborting.\n");
1601 exit (1);
1602 }
1603
1604 if (custom_cpu && ud.custom_interval) {
1605 ud.custom_run = ud.custom_interval * custom_cpu / 100;
1606 threadlist[CUSTOM].bench = 1;
1607 threadlist[CUSTOM].load = 1;
1608 threadlist[CUSTOM].rtbench = 1;
1609 threadlist[CUSTOM].rtload = 1;
1610 }
1611
1612 /*FIXME Affinity commented out till working on all architectures */
1613 #if 0
1614 if (affinity) {
1615 #ifdef CPU_SET /* Current glibc expects cpu_set_t */
1616 cpu_set_t cpumask;
1617
1618 CPU_ZERO(&cpumask);
1619 CPU_SET(0, &cpumask);
1620 #else /* Old glibc expects unsigned long */
1621 unsigned long cpumask = 1;
1622 #endif
1623 if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1624 if (errno != EPERM)
1625 terminal_error("sched_setaffinity");
1626 fprintf(stderr, "could not set cpu affinity\n");
1627 }
1628 }
1629 #endif
1630
1631 /* Make benchmark a multiple of 10 seconds for proper range of X loads */
1632 if (ud.duration % 10)
1633 ud.duration += 10 - ud.duration % 10;
1634
1635 if (benchmark)
1636 ud.loops_per_ms = 0;
1637 /*
1638 * Try to get loops_per_ms from command line first, file second, and
1639 * benchmark if not available.
1640 */
1641 if (!ud.loops_per_ms) {
1642 if (benchmark)
1643 goto bench;
1644 if ((fp = fopen(fname, "r"))) {
1645 fscanf(fp, "%lu", &ud.loops_per_ms);
1646 if (fclose(fp) == -1)
1647 terminal_error("fclose");
1648 if (ud.loops_per_ms) {
1649 fprintf(stderr,
1650 "%lu loops_per_ms read from file interbench.loops_per_ms\n",
1651 ud.loops_per_ms);
1652 goto loops_known;
1653 }
1654 } else
1655 if (errno != ENOENT)
1656 terminal_error("fopen");
1657 bench:
1658 fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1659
1660 /*
1661 * To get as accurate a loop as possible we time it running
1662 * SCHED_FIFO if we can
1663 */
1664 set_fifo(99);
1665 calibrate_loop();
1666 set_normal();
1667 } else
1668 fprintf(stderr, "loops_per_ms specified from command line\n");
1669
1670 if (!(fp = fopen(fname, "w"))) {
1671 if (errno != EACCES) /* No write access is not terminal */
1672 terminal_error("fopen");
1673 fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1674 goto loops_known;
1675 }
1676 fprintf(fp, "%lu", ud.loops_per_ms);
1677 fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1678 ud.loops_per_ms);
1679 if (fclose(fp) == -1)
1680 terminal_error("fclose");
1681
1682 loops_known:
1683 get_ram();
1684 get_logfilename();
1685 create_read_file();
1686 init_pipes();
1687
1688 if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1689 if (errno != EACCES)
1690 terminal_error("fopen");
1691 fprintf(stderr, "Unable to write to logfile\n");
1692 ud.log = 0;
1693 }
1694 log_output("\n");
1695 log_output("Using %lu loops per ms, running every load for %d seconds\n",
1696 ud.loops_per_ms, ud.duration);
1697 log_output("Benchmarking kernel %s at datestamp %s\n",
1698 ud.unamer, ud.datestamp);
1699 if (comment)
1700 log_output("Comment: %s\n", comment);
1701 log_output("\n");
1702
1703 for (i = 0 ; i < THREADS ; i++)
1704 threadlist[i].threadno = i;
1705
1706 for (i = 0 ; i < THREADS ; i++) {
1707 struct thread *thi = &threadlist[i];
1708 int *benchme;
1709
1710 if (ud.do_rt)
1711 benchme = &threadlist[i].rtbench;
1712 else
1713 benchme = &threadlist[i].bench;
1714
1715 if (!*benchme || !bit_is_on(selected_benches, i))
1716 continue;
1717
1718 log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1719 if (ud.do_rt)
1720 log_output("real time ");
1721 else if (ud.bench_nice)
1722 log_output("nice %d ", ud.bench_nice);
1723 log_output("in the presence of simulated ");
1724 if (ud.load_nice)
1725 log_output("nice %d ", ud.load_nice);
1726 log_output("---\n");
1727
1728 log_output("Load");
1729 if (ud.do_rt)
1730 log_output("\tLatency +/- SD (us)");
1731 else
1732 log_output("\tLatency +/- SD (ms)");
1733 log_output(" Max Latency ");
1734 log_output(" %% Desired CPU");
1735 if (!thi->nodeadlines)
1736 log_output(" %% Deadlines Met");
1737 log_output("\n");
1738
1739 for (j = 0 ; j < THREADS ; j++) {
1740 struct thread *thj = &threadlist[j];
1741
1742 if (j == i || !bit_is_on(selected_loads, j) ||
1743 (!threadlist[j].load && !ud.do_rt) ||
1744 (!threadlist[j].rtload && ud.do_rt))
1745 continue;
1746 log_output("%s\t", thj->label);
1747 sync_flush();
1748 bench(i, j);
1749 }
1750 log_output("\n");
1751 }
1752 log_output("\n");
1753 if (ud.log)
1754 fclose(ud.logfile);
1755
1756 return 0;
1757 }
1758