xref: /dragonfly/test/interbench/interbench.c (revision 10cbe914)
1 /*******************************************
2  *
3  * Interbench - Interactivity benchmark
4  *
5  * Author:  Con Kolivas <kernel@kolivas.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  *******************************************/
22 
23 #define _GNU_SOURCE
24 #define _FILE_OFFSET_BITS 64	/* Large file support */
25 #define INTERBENCH_VERSION	"0.30"
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdarg.h>
30 #include <strings.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <sched.h>
35 #include <time.h>
36 #include <errno.h>
37 #include <semaphore.h>
38 #include <pthread.h>
39 #include <math.h>
40 #include <fenv.h>
41 #include <signal.h>
42 #include <sys/utsname.h>
43 #include <sys/time.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
46 #include <sys/mman.h>
47 #include <sys/wait.h>
48 #include <sys/stat.h>
49 #include <sys/sysctl.h>
50 #include <sys/vmmeter.h>
51 #include "interbench.h"
52 
53 #define MAX_UNAME_LENGTH	100
54 #define MAX_LOG_LENGTH		((MAX_UNAME_LENGTH) + 4)
55 #define MIN_BLK_SIZE		1024
56 #define DEFAULT_RESERVE		64
57 #define MB			(1024 * 1024)	/* 2^20 bytes */
58 #define KB			1024
59 #define MAX_MEM_IN_MB		(1024 * 64)	/* 64 GB */
60 
61 struct user_data {
62 	unsigned long loops_per_ms;
63 	unsigned long ram, swap;
64 	int duration;
65 	int do_rt;
66 	int bench_nice;
67 	int load_nice;
68 	unsigned long custom_run;
69 	unsigned long custom_interval;
70 	unsigned long cpu_load;
71 	char logfilename[MAX_LOG_LENGTH];
72 	int log;
73 	char unamer[MAX_UNAME_LENGTH];
74 	char datestamp[13];
75 	FILE *logfile;
76 } ud = {
77 	.duration = 30,
78 	.cpu_load = 4,
79 	.log = 1,
80 };
81 
82 /* Pipes main to/from load and bench processes */
83 static int m2l[2], l2m[2], m2b[2], b2m[2];
84 
85 /* Which member of becnhmarks is used when not benchmarking */
86 #define NOT_BENCHING	(THREADS)
87 #define CUSTOM		(THREADS - 1)
88 
89 /*
90  * To add another load or a benchmark you need to increment the value of
91  * THREADS, add a function prototype for your function and add an entry to
92  * the threadlist. To specify whether the function is a benchmark or a load
93  * set the benchmark and/or load flag as appropriate. The basic requirements
94  * of a new load can be seen by using emulate_none as a template.
95  */
96 
97 void emulate_none(struct thread *th);
98 void emulate_audio(struct thread *th);
99 void emulate_video(struct thread *th);
100 void emulate_x(struct thread *th);
101 void emulate_game(struct thread *th);
102 void emulate_burn(struct thread *th);
103 void emulate_write(struct thread *th);
104 void emulate_read(struct thread *th);
105 void emulate_ring(struct thread *th);
106 void emulate_compile(struct thread *th);
107 void emulate_memload(struct thread *th);
108 void emulate_hackbench(struct thread *th);
109 void emulate_custom(struct thread *th);
110 
111 struct thread threadlist[THREADS] = {
112 	{.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
113 	{.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
114 	{.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
115 	{.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
116 	{.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
117 	{.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
118 	{.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
119 	{.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
120 	{.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0},	/* No useful data from this */
121 	{.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
122 	{.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
123 	{.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0},	/* This is causing signal headaches */
124 	{.label = "Custom", .name = emulate_custom},	/* Leave custom as last entry */
125 };
126 
127 void init_sem(sem_t *sem);
128 void init_all_sems(struct sems *s);
129 void initialise_thread(int i);
130 void start_thread(struct thread *th);
131 void stop_thread(struct thread *th);
132 
133 void terminal_error(const char *name)
134 {
135 	fprintf(stderr, "\n");
136 	perror(name);
137 	exit (1);
138 }
139 
140 void terminal_fileopen_error(FILE *fp, char *name)
141 {
142 	if (fclose(fp) == -1)
143 		terminal_error("fclose");
144 	terminal_error(name);
145 }
146 
147 unsigned long long get_nsecs(struct timespec *myts)
148 {
149 	if (clock_gettime(CLOCK_REALTIME, myts))
150 		terminal_error("clock_gettime");
151 	return (myts->tv_sec * 1000000000 + myts->tv_nsec );
152 }
153 
154 unsigned long get_usecs(struct timespec *myts)
155 {
156 	if (clock_gettime(CLOCK_REALTIME, myts))
157 		terminal_error("clock_gettime");
158 	return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
159 }
160 
161 void set_fifo(int prio)
162 {
163 	struct sched_param sp;
164 
165 	memset(&sp, 0, sizeof(sp));
166 	sp.sched_priority = prio;
167 	if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
168 		if (errno != EPERM)
169 			terminal_error("sched_setscheduler");
170 	}
171 }
172 
173 void set_mlock(void)
174 {
175 	int mlockflags;
176 
177 	mlockflags = MCL_CURRENT | MCL_FUTURE;
178 #if 0
179 	mlockall(mlockflags);	/* Is not critical if this fails */
180 #endif
181 }
182 
183 void set_munlock(void)
184 {
185 #if 0
186 	if (munlockall() == -1)
187 		terminal_error("munlockall");
188 #endif
189 }
190 
191 void set_thread_fifo(pthread_t pthread, int prio)
192 {
193 	struct sched_param sp;
194 	memset(&sp, 0, sizeof(sp));
195 	sp.sched_priority = prio;
196 	if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
197 		terminal_error("pthread_setschedparam");
198 }
199 
200 void set_normal(void)
201 {
202 	struct sched_param sp;
203 	memset(&sp, 0, sizeof(sp));
204 	sp.sched_priority = 0;
205 	if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
206 		fprintf(stderr, "Weird, could not unset RT scheduling!\n");
207 	}
208 }
209 
210 void set_nice(int prio)
211 {
212 	if (setpriority(PRIO_PROCESS, 0, prio) == -1)
213 		terminal_error("setpriority");
214 }
215 
216 int test_fifo(void)
217 {
218 	struct sched_param sp;
219 	memset(&sp, 0, sizeof(sp));
220 	sp.sched_priority = 99;
221 	if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
222 		if (errno != EPERM)
223 			terminal_error("sched_setscheduler");
224 		goto out_fail;
225 	}
226 	if (sched_getscheduler(0) != SCHED_FIFO)
227 		goto out_fail;
228 	set_normal();
229 	return 1;
230 out_fail:
231 	set_normal();
232 	return 0;
233 }
234 
235 void set_thread_normal(pthread_t pthread)
236 {
237 	struct sched_param sp;
238 	memset(&sp, 0, sizeof(sp));
239 	sp.sched_priority = 0;
240 	if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
241 		terminal_error("pthread_setschedparam");
242 }
243 
244 void sync_flush(void)
245 {
246 	if ((fflush(NULL)) == EOF)
247 		terminal_error("fflush");
248 	sync();
249 	sync();
250 	sync();
251 }
252 
253 unsigned long compute_allocable_mem(void)
254 {
255 	unsigned long total = ud.ram + ud.swap;
256 	unsigned long usage = ud.ram * 110 / 100 ;
257 
258 	/* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
259 	if (total - DEFAULT_RESERVE < usage)
260 		usage = total - DEFAULT_RESERVE;
261 	usage /= 1024;	/* to megabytes */
262 	if (usage > 2930)
263 		usage = 2930;
264 	return usage;
265 }
266 
267 void burn_loops(unsigned long loops)
268 {
269 	unsigned long i;
270 
271 	/*
272 	 * We need some magic here to prevent the compiler from optimising
273 	 * this loop away. Otherwise trying to emulate a fixed cpu load
274 	 * with this loop will not work.
275 	 */
276 	for (i = 0 ; i < loops ; i++)
277 	     asm volatile("" : : : "memory");
278 }
279 
280 /* Use this many usecs of cpu time */
281 void burn_usecs(unsigned long usecs)
282 {
283 	unsigned long ms_loops;
284 
285 	ms_loops = ud.loops_per_ms / 1000 * usecs;
286 	burn_loops(ms_loops);
287 }
288 
289 void microsleep(unsigned long long usecs)
290 {
291 	struct timespec req, rem;
292 
293 	rem.tv_sec = rem.tv_nsec = 0;
294 
295 	req.tv_sec = usecs / 1000000;
296 	req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
297 continue_sleep:
298 	if ((nanosleep(&req, &rem)) == -1) {
299 		if (errno == EINTR) {
300 			if (rem.tv_sec || rem.tv_nsec) {
301 				req.tv_sec = rem.tv_sec;
302 				req.tv_nsec = rem.tv_nsec;
303 				goto continue_sleep;
304 			}
305 			goto out;
306 		}
307 		terminal_error("nanosleep");
308 	}
309 out:
310 	return;
311 }
312 
313 /*
314  * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
315  * handle it.
316  */
317 inline void post_sem(sem_t *s)
318 {
319 retry:
320 	if ((sem_post(s)) == -1) {
321 		if (errno == EINTR)
322 			goto retry;
323 		terminal_error("sem_post");
324 	}
325 }
326 
327 inline void wait_sem(sem_t *s)
328 {
329 retry:
330 	if ((sem_wait(s)) == -1) {
331 		if (errno == EINTR)
332 			goto retry;
333 		terminal_error("sem_wait");
334 	}
335 }
336 
337 inline int trywait_sem(sem_t *s)
338 {
339 	int ret;
340 
341 retry:
342 	if ((ret = sem_trywait(s)) == -1) {
343 		if (errno == EINTR)
344 			goto retry;
345 		if (errno != EAGAIN)
346 			terminal_error("sem_trywait");
347 	}
348 	return ret;
349 }
350 
351 inline ssize_t Read(int fd, void *buf, size_t count)
352 {
353 	ssize_t retval;
354 
355 retry:
356 	retval = read(fd, buf, count);
357 	if (retval == -1) {
358 		if (errno == EINTR)
359 			goto retry;
360 		terminal_error("read");
361 	}
362 	return retval;
363 }
364 
365 inline ssize_t Write(int fd, const void *buf, size_t count)
366 {
367 	ssize_t retval;
368 
369 retry:
370 	retval = write(fd, &buf, count);
371 	if (retval == -1) {
372 		if (errno == EINTR)
373 			goto retry;
374 		terminal_error("write");
375 	}
376 	return retval;
377 }
378 
379 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
380 	unsigned long interval_usecs, unsigned long long deadline)
381 {
382 	unsigned long long latency, missed_latency;
383 	unsigned long long current_time;
384 	struct tk_thread *tk;
385 	struct data_table *tb;
386 	struct timespec myts;
387 
388 	latency = 0;
389 	tb = th->dt;
390 	tk = &th->tkthread;
391 
392 	current_time = get_usecs(&myts);
393 	if (current_time > deadline + tk->slept_interval)
394 		latency = current_time - deadline- tk->slept_interval;
395 
396 	/* calculate the latency for missed frames */
397 	missed_latency = 0;
398 
399 	current_time = get_usecs(&myts);
400 	if (interval_usecs && current_time > deadline + interval_usecs) {
401 		/* We missed the deadline even before we consumed cpu */
402 		unsigned long intervals;
403 
404 		deadline += interval_usecs;
405 		intervals = (current_time - deadline) /
406 			interval_usecs + 1;
407 
408 		tb->missed_deadlines += intervals;
409 		missed_latency = intervals * interval_usecs;
410 		deadline += intervals * interval_usecs;
411 		tb->missed_burns += intervals;
412 		goto bypass_burn;
413 	}
414 
415 	burn_usecs(run_usecs);
416 	current_time = get_usecs(&myts);
417 	tb->achieved_burns++;
418 
419 	/*
420 	 * If we meet the deadline we move the deadline forward, otherwise
421 	 * we consider it a missed deadline and dropped frame etc.
422 	 */
423 	deadline += interval_usecs;
424 	if (deadline >= current_time) {
425 		tb->deadlines_met++;
426 	} else {
427 		if (interval_usecs) {
428 			unsigned long intervals = (current_time - deadline) /
429 				interval_usecs + 1;
430 
431 			tb->missed_deadlines += intervals;
432 			missed_latency = intervals * interval_usecs;
433 			deadline += intervals * interval_usecs;
434 			if (intervals > 1)
435 				tb->missed_burns += intervals;
436 		} else {
437 			deadline = current_time;
438 			goto out_nosleep;
439 		}
440 	}
441 bypass_burn:
442 	tk->sleep_interval = deadline - current_time;
443 
444 	post_sem(&tk->sem.start);
445 	wait_sem(&tk->sem.complete);
446 out_nosleep:
447 	/*
448 	 * Must add missed_latency to total here as this function may not be
449 	 * called again and the missed latency can be lost
450 	 */
451 	latency += missed_latency;
452 	if (latency > tb->max_latency)
453 		tb->max_latency = latency;
454 	tb->total_latency += latency;
455 	tb->sum_latency_squared += latency * latency;
456 	tb->nr_samples++;
457 
458 	return deadline;
459 }
460 
461 void initialise_thread_data(struct data_table *tb)
462 {
463 	tb->max_latency =
464 		tb->total_latency =
465 		tb->sum_latency_squared =
466 		tb->deadlines_met =
467 		tb->missed_deadlines =
468 		tb->missed_burns =
469 		tb->nr_samples = 0;
470 }
471 
472 void create_pthread(pthread_t  * thread, pthread_attr_t * attr,
473 	void * (*start_routine)(void *), void *arg)
474 {
475 	if (pthread_create(thread, attr, start_routine, arg))
476 		terminal_error("pthread_create");
477 }
478 
479 void join_pthread(pthread_t th, void **thread_return)
480 {
481 	if (pthread_join(th, thread_return))
482 		terminal_error("pthread_join");
483 }
484 
485 void emulate_none(struct thread *th)
486 {
487 	sem_t *s = &th->sem.stop;
488 	wait_sem(s);
489 }
490 
491 #define AUDIO_INTERVAL	(50000)
492 #define AUDIO_RUN	(AUDIO_INTERVAL / 20)
493 /* We emulate audio by using 5% cpu and waking every 50ms */
494 void emulate_audio(struct thread *th)
495 {
496 	unsigned long long deadline;
497 	sem_t *s = &th->sem.stop;
498 	struct timespec myts;
499 
500 	th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
501 	deadline = get_usecs(&myts);
502 
503 	while (1) {
504 		deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
505 			deadline);
506 		if (!trywait_sem(s))
507 			return;
508 	}
509 }
510 
511 /* We emulate video by using 40% cpu and waking for 60fps */
512 #define VIDEO_INTERVAL	(1000000 / 60)
513 #define VIDEO_RUN	(VIDEO_INTERVAL * 40 / 100)
514 void emulate_video(struct thread *th)
515 {
516 	unsigned long long deadline;
517 	sem_t *s = &th->sem.stop;
518 	struct timespec myts;
519 
520 	th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
521 	deadline = get_usecs(&myts);
522 
523 	while (1) {
524 		deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
525 			deadline);
526 		if (!trywait_sem(s))
527 			return;
528 	}
529 }
530 
531 /*
532  * We emulate X by running for a variable percentage of cpu from 0-100%
533  * in 1ms chunks.
534  */
535 void emulate_x(struct thread *th)
536 {
537 	unsigned long long deadline;
538 	sem_t *s = &th->sem.stop;
539 	struct timespec myts;
540 
541 	th->decasecond_deadlines = 100;
542 	deadline = get_usecs(&myts);
543 
544 	while (1) {
545 		int i, j;
546 		for (i = 0 ; i <= 100 ; i++) {
547 			j = 100 - i;
548 			deadline = periodic_schedule(th, i * 1000, j * 1000,
549 				deadline);
550 			deadline += i * 1000;
551 			if (!trywait_sem(s))
552 				return;
553 		}
554 	}
555 }
556 
557 /*
558  * We emulate gaming by using 100% cpu and seeing how many frames (jobs
559  * completed) we can do in that time. Deadlines are meaningless with
560  * unlocked frame rates. We do not use periodic schedule because for
561  * this load because this never wants to sleep.
562  */
563 #define GAME_INTERVAL	(100000)
564 #define GAME_RUN	(GAME_INTERVAL)
565 void emulate_game(struct thread *th)
566 {
567 	unsigned long long deadline, current_time, latency;
568 	sem_t *s = &th->sem.stop;
569 	struct timespec myts;
570 	struct data_table *tb;
571 
572 	tb = th->dt;
573 	th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
574 
575 	while (1) {
576 		deadline = get_usecs(&myts) + GAME_INTERVAL;
577 		burn_usecs(GAME_RUN);
578 		current_time = get_usecs(&myts);
579 		/* use usecs instead of simple count for game burn statistics */
580 		tb->achieved_burns += GAME_RUN;
581 		if (current_time > deadline) {
582 			latency = current_time - deadline;
583 			tb->missed_burns += latency;
584 		} else
585 			latency = 0;
586 		if (latency > tb->max_latency)
587 			tb->max_latency = latency;
588 		tb->total_latency += latency;
589 		tb->sum_latency_squared += latency * latency;
590 		tb->nr_samples++;
591 		if (!trywait_sem(s))
592 			return;
593 	}
594 }
595 
596 void *burn_thread(void *t)
597 {
598 	struct thread *th;
599 	sem_t *s;
600 	long i = (long)t;
601 
602 	th = &threadlist[i];
603 	s = &th->sem.stopchild;
604 
605 	while (1) {
606 		burn_loops(ud.loops_per_ms);
607 		if (!trywait_sem(s)) {
608 			post_sem(s);
609 			break;
610 		}
611 	}
612 	return NULL;
613 }
614 
615 /* Have ud.cpu_load threads burn cpu continuously */
616 void emulate_burn(struct thread *th)
617 {
618 	sem_t *s = &th->sem.stop;
619 	unsigned long i;
620 	long t;
621 	pthread_t burnthreads[ud.cpu_load];
622 
623 	t = th->threadno;
624 	for (i = 0 ; i < ud.cpu_load ; i++)
625 		create_pthread(&burnthreads[i], NULL, burn_thread,
626 			(void*)(long) t);
627 	wait_sem(s);
628 	post_sem(&th->sem.stopchild);
629 	for (i = 0 ; i < ud.cpu_load ; i++)
630 		join_pthread(burnthreads[i], NULL);
631 }
632 
633 /* Write a file the size of ram continuously */
634 void emulate_write(struct thread *th)
635 {
636 	sem_t *s = &th->sem.stop;
637 	FILE *fp;
638 	char *name = "interbench.write";
639 	void *buf = NULL;
640 	struct stat statbuf;
641 	unsigned long mem;
642 
643 	if (!(fp = fopen(name, "w")))
644 		terminal_error("fopen");
645 	if (stat(name, &statbuf) == -1)
646 		terminal_fileopen_error(fp, "stat");
647 	if (statbuf.st_blksize < MIN_BLK_SIZE)
648 		statbuf.st_blksize = MIN_BLK_SIZE;
649 	mem = ud.ram / (statbuf.st_blksize / 1024);	/* kilobytes to blocks */
650 	if (!(buf = calloc(1, statbuf.st_blksize)))
651 		terminal_fileopen_error(fp, "calloc");
652 	if (fclose(fp) == -1)
653 		terminal_error("fclose");
654 
655 	while (1) {
656 		unsigned int i;
657 
658 		if (!(fp = fopen(name, "w")))
659 			terminal_error("fopen");
660 		if (stat(name, &statbuf) == -1)
661 			terminal_fileopen_error(fp, "stat");
662 		for (i = 0 ; i < mem; i++) {
663 			if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
664 				terminal_fileopen_error(fp, "fwrite");
665 			if (!trywait_sem(s))
666 				goto out;
667 		}
668 		if (fclose(fp) == -1)
669 			terminal_error("fclose");
670 	}
671 
672 out:
673 	if (fclose(fp) == -1)
674 		terminal_error("fclose");
675 	if (remove(name) == -1)
676 		terminal_error("remove");
677 	sync_flush();
678 }
679 
680 /* Read a file the size of ram continuously */
681 void emulate_read(struct thread *th)
682 {
683 	sem_t *s = &th->sem.stop;
684 	char *name = "interbench.read";
685 	void *buf = NULL;
686 	struct stat statbuf;
687 	unsigned long bsize;
688 	int tmp;
689 
690 	if ((tmp = open(name, O_RDONLY)) == -1)
691 		terminal_error("open");
692 	if (stat(name, &statbuf) == -1)
693 		terminal_error("stat");
694 	bsize = statbuf.st_blksize;
695 	if (!(buf = malloc(bsize)))
696 		terminal_error("malloc");
697 
698 	while (1) {
699 		int rd;
700 
701 		/*
702 		 * We have to read the whole file before quitting the load
703 		 * to prevent the data being cached for the next read. This
704 		 * is also the reason the file is the size of physical ram.
705 		 */
706 		while ((rd = Read(tmp , buf, bsize)) > 0);
707 		if(!trywait_sem(s))
708 			return;
709 		if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
710 			terminal_error("lseek");
711 	}
712 }
713 
714 #define RINGTHREADS	4
715 
716 struct thread ringthreads[RINGTHREADS];
717 
718 void *ring_thread(void *t)
719 {
720 	struct thread *th;
721 	struct sems *s;
722 	int i, post_to;
723 
724 	i = (long)t;
725 	th = &ringthreads[i];
726 	s = &th->sem;
727 	post_to = i + 1;
728 	if (post_to == RINGTHREADS)
729 		post_to = 0;
730 	if (i == 0)
731 		post_sem(&s->ready);
732 
733 	while (1) {
734 		wait_sem(&s->start);
735 		post_sem(&ringthreads[post_to].sem.start);
736 		if (!trywait_sem(&s->stop))
737 			goto out;
738 	}
739 out:
740 	post_sem(&ringthreads[post_to].sem.start);
741 	post_sem(&s->complete);
742 	return NULL;
743 }
744 
745 /* Create a ring of 4 processes that wake each other up in a circle */
746 void emulate_ring(struct thread *th)
747 {
748 	sem_t *s = &th->sem.stop;
749 	int i;
750 
751 	for (i = 0 ; i < RINGTHREADS ; i++) {
752 		init_all_sems(&ringthreads[i].sem);
753 		create_pthread(&ringthreads[i].pthread, NULL,
754 			ring_thread, (void*)(long) i);
755 	}
756 
757 	wait_sem(&ringthreads[0].sem.ready);
758 	post_sem(&ringthreads[0].sem.start);
759 	wait_sem(s);
760 	for (i = 0 ; i < RINGTHREADS ; i++)
761 		post_sem(&ringthreads[i].sem.stop);
762 	for (i = 0 ; i < RINGTHREADS ; i++) {
763 		wait_sem(&ringthreads[i].sem.complete);
764 		join_pthread(ringthreads[i].pthread, NULL);
765 	}
766 }
767 
768 /* We emulate a compile by running burn, write and read threads simultaneously */
769 void emulate_compile(struct thread *th)
770 {
771 	sem_t *s = &th->sem.stop;
772 	unsigned long i, threads[3];
773 
774 	for (i = 0 ; i < THREADS ; i++) {
775 		if (threadlist[i].label == "Burn")
776 			threads[0] = i;
777 		if (threadlist[i].label == "Write")
778 			threads[1] = i;
779 		if (threadlist[i].label == "Read")
780 			threads[2] = i;
781 	}
782 	for (i = 0 ; i < 3 ; i++) {
783 		if (!threads[i]) {
784 			fprintf(stderr, "Can't find all threads for compile load\n");
785 			exit(1);
786 		}
787 	}
788 	for (i = 0 ; i < 3 ; i++) {
789 		initialise_thread(threads[i]);
790 		start_thread(&threadlist[threads[i]]);
791 	}
792 	wait_sem(s);
793 	for (i = 0 ; i < 3 ; i++)
794 		stop_thread(&threadlist[threads[i]]);
795 }
796 
797 int *grab_and_touch (char *block[], int i)
798 {
799 	block[i] = (char *) malloc(MB);
800 	if (!block[i])
801 		return NULL;
802 	return (memset(block[i], 1, MB));
803 }
804 
805 /* We emulate a memory load by allocating and torturing 110% of available ram */
806 void emulate_memload(struct thread *th)
807 {
808 	sem_t *s = &th->sem.stop;
809 	unsigned long touchable_mem, i;
810 	char *mem_block[MAX_MEM_IN_MB];
811 	void *success;
812 
813 	touchable_mem = compute_allocable_mem();
814 	/* loop until we're killed, frobbing memory in various perverted ways */
815 	while (1) {
816 		for (i = 0;  i < touchable_mem; i++) {
817 			success = grab_and_touch(mem_block, i);
818 			if (!success) {
819 				touchable_mem = i-1;
820 				break;
821 			}
822 		}
823 		if (!trywait_sem(s))
824 			goto out_freemem;
825 		for (i = 0;  i < touchable_mem; i++) {
826 			memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
827 				touchable_mem], MB);
828 			if (!trywait_sem(s))
829 				goto out_freemem;
830 		}
831 		for (i = 0; i < touchable_mem; i++) {
832 			free(mem_block[i]);
833 		}
834 		if (!trywait_sem(s))
835 			goto out;
836 	}
837 out_freemem:
838 	for (i = 0; i < touchable_mem; i++)
839 		free(mem_block[i]);
840 out:
841 	return;
842 }
843 
844 struct thread hackthread;
845 
846 void emulate_hackbench(struct thread *th)
847 {
848 	sem_t *s = &th->sem.stop;
849 
850 	init_all_sems(&hackthread.sem);
851 	create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
852 
853 	wait_sem(s);
854 
855 	post_sem(&hackthread.sem.stop);
856 	wait_sem(&hackthread.sem.complete);
857 
858 	join_pthread(hackthread.pthread, NULL);
859 }
860 
861 #define CUSTOM_INTERVAL	(ud.custom_interval)
862 #define CUSTOM_RUN	(ud.custom_run)
863 void emulate_custom(struct thread *th)
864 {
865 	unsigned long long deadline;
866 	sem_t *s = &th->sem.stop;
867 	struct timespec myts;
868 
869 	th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
870 	deadline = get_usecs(&myts);
871 
872 	while (1) {
873 		deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
874 			deadline);
875 		if (!trywait_sem(s))
876 			return;
877 	}
878 }
879 
880 void *timekeeping_thread(void *t)
881 {
882 	struct thread *th;
883 	struct tk_thread *tk;
884 	struct sems *s;
885 	struct timespec myts;
886 	long i = (long)t;
887 
888 	th = &threadlist[i];
889 	tk = &th->tkthread;
890 	s = &th->tkthread.sem;
891 	/*
892 	 * If this timekeeping thread is that of a benchmarked thread we run
893 	 * even higher priority than the benched thread is if running real
894 	 * time. Otherwise, the load timekeeping thread, which does not need
895 	 * accurate accounting remains SCHED_NORMAL;
896 	 */
897 	if (th->dt != &th->benchmarks[NOT_BENCHING])
898 		set_fifo(96);
899 	/* These values must be changed at the appropriate places or race */
900 	tk->sleep_interval = tk->slept_interval = 0;
901 	post_sem(&s->ready);
902 
903 	while (1) {
904 		unsigned long start_time, now;
905 
906 		if (!trywait_sem(&s->stop))
907 			goto out;
908 		wait_sem(&s->start);
909 		tk->slept_interval = 0;
910 		start_time = get_usecs(&myts);
911 		if (!trywait_sem(&s->stop))
912 			goto out;
913 		if (tk->sleep_interval) {
914 			unsigned long diff = 0;
915 			microsleep(tk->sleep_interval);
916 			now = get_usecs(&myts);
917 			/* now should always be > start_time but... */
918 			if (now > start_time) {
919 				diff = now - start_time;
920 				if (diff > tk->sleep_interval)
921 					tk->slept_interval = diff -
922 						tk->sleep_interval;
923 			}
924 		}
925 		tk->sleep_interval = 0;
926 		post_sem(&s->complete);
927 	}
928 out:
929 	return NULL;
930 }
931 
932 /*
933  * All the sleep functions such as nanosleep can only guarantee that they
934  * sleep for _at least_ the time requested. We work around this by having
935  * a high priority real time thread that accounts for the extra time slept
936  * in nanosleep. This allows wakeup latency of the tested thread to be
937  * accurate and reflect true scheduling delays.
938  */
939 void *emulation_thread(void *t)
940 {
941 	struct thread *th;
942 	struct tk_thread *tk;
943 	struct sems *s, *tks;
944 	long i = (long)t;
945 
946 	th = &threadlist[i];
947 	tk = &th->tkthread;
948 	s = &th->sem;
949 	tks = &tk->sem;
950 	init_all_sems(tks);
951 
952 	/* Start the timekeeping thread */
953 	create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
954 		(void*)(long) i);
955 	/* Wait for timekeeping thread to be ready */
956 	wait_sem(&tks->ready);
957 
958 	/* Tell main we're ready to start*/
959 	post_sem(&s->ready);
960 
961 	/* Wait for signal from main to start thread */
962 	wait_sem(&s->start);
963 
964 	/* Start the actual function being benched/or running as load */
965 	th->name(th);
966 
967 	/* Stop the timekeeping thread */
968 	post_sem(&tks->stop);
969 	post_sem(&tks->start);
970 	join_pthread(th->tk_pthread, NULL);
971 
972 	/* Tell main we've finished */
973 	post_sem(&s->complete);
974 	return NULL;
975 }
976 
977 /*
978  * In an unoptimised loop we try to benchmark how many meaningless loops
979  * per second we can perform on this hardware to fairly accurately
980  * reproduce certain percentage cpu usage
981  */
982 void calibrate_loop(void)
983 {
984 	unsigned long long start_time, loops_per_msec, run_time = 0;
985 	unsigned long loops;
986 	struct timespec myts;
987 
988 	loops_per_msec = 100000;
989 redo:
990 	/* Calibrate to within 1% accuracy */
991 	while (run_time > 1010000 || run_time < 990000) {
992 		loops = loops_per_msec;
993 		start_time = get_nsecs(&myts);
994 		burn_loops(loops);
995 		run_time = get_nsecs(&myts) - start_time;
996 		loops_per_msec = (1000000 * loops_per_msec / run_time ? :
997 			loops_per_msec);
998 	}
999 
1000 	/* Rechecking after a pause increases reproducibility */
1001 	sleep(1);
1002 	loops = loops_per_msec;
1003 	start_time = get_nsecs(&myts);
1004 	burn_loops(loops);
1005 	run_time = get_nsecs(&myts) - start_time;
1006 
1007 	/* Tolerate 5% difference on checking */
1008 	if (run_time > 1050000 || run_time < 950000)
1009 		goto redo;
1010 
1011 	ud.loops_per_ms = loops_per_msec;
1012 }
1013 
1014 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1015 
1016 /* Output to console +/- logfile */
1017 void log_output(const char *format, ...)
1018 {
1019 	va_list ap;
1020 
1021 	va_start(ap, format);
1022 	if (vprintf(format, ap) == -1)
1023 		terminal_error("vprintf");
1024 	va_end(ap);
1025 	if (ud.log) {
1026 		va_start(ap, format);
1027 		if (vfprintf(ud.logfile, format, ap) == -1)
1028 			terminal_error("vpfrintf");
1029 		va_end(ap);
1030 	}
1031 	fflush(NULL);
1032 }
1033 
1034 /* Calculate statistics and output them */
1035 void show_latencies(struct thread *th)
1036 {
1037 	struct data_table *tbj;
1038 	struct tk_thread *tk;
1039 	double average_latency, deadlines_met, samples_met, sd, max_latency;
1040 	long double variance = 0;
1041 
1042 	tbj = th->dt;
1043 	tk = &th->tkthread;
1044 
1045 	if (tbj->nr_samples > 1) {
1046 		average_latency = tbj->total_latency / tbj->nr_samples;
1047 		variance = (tbj->sum_latency_squared - (average_latency *
1048 			average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1049 		sd = sqrt((double)variance);
1050 	} else {
1051 		average_latency = tbj->total_latency;
1052 		sd = 0.0;
1053 	}
1054 
1055 	/*
1056 	 * Landing on the boundary of a deadline can make loaded runs appear
1057 	 * to do more work than unloaded due to tiny duration differences.
1058 	 */
1059 	if (tbj->achieved_burns > 0)
1060 		samples_met = (double)tbj->achieved_burns /
1061 		    (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1062 	else
1063 		samples_met = 0.0;
1064 	max_latency = tbj->max_latency;
1065 	/* When benchmarking rt we represent the data in us */
1066 	if (!ud.do_rt) {
1067 		average_latency /= 1000;
1068 		sd /= 1000;
1069 		max_latency /= 1000;
1070 	}
1071 	if (tbj->deadlines_met == 0)
1072 		deadlines_met = 0;
1073 	else
1074 		deadlines_met = (double)tbj->deadlines_met /
1075 		    (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1076 
1077 	/* Messy nonsense to format the output nicely */
1078 	if (average_latency >= 100)
1079 		log_output("%7.0f +/- ", average_latency);
1080 	else
1081 		log_output("%7.3g +/- ", average_latency);
1082 	if (sd >= 100)
1083 		log_output("%-9.0f", sd);
1084 	else
1085 		log_output("%-9.3g", sd);
1086 	if (max_latency >= 100)
1087 		log_output("%7.0f\t", max_latency);
1088 	else
1089 		log_output("%7.3g\t", max_latency);
1090 	log_output("\t%4.3g", samples_met);
1091 	if (!th->nodeadlines)
1092 		log_output("\t%11.3g", deadlines_met);
1093 	log_output("\n");
1094 	sync_flush();
1095 }
1096 
1097 void create_read_file(void)
1098 {
1099 	unsigned int i;
1100 	FILE *fp;
1101 	char *name = "interbench.read";
1102 	void *buf = NULL;
1103 	struct stat statbuf;
1104 	unsigned long mem, bsize;
1105 	int tmp;
1106 
1107 	if ((tmp = open(name, O_RDONLY)) == -1) {
1108 		if (errno != ENOENT)
1109 			terminal_error("open");
1110 		goto write;
1111 	}
1112 	if (stat(name, &statbuf) == -1)
1113 		terminal_error("stat");
1114 	if (statbuf.st_blksize < MIN_BLK_SIZE)
1115 		statbuf.st_blksize = MIN_BLK_SIZE;
1116 	bsize = statbuf.st_blksize;
1117 	if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1118 		return;
1119 	if (remove(name) == -1)
1120 		terminal_error("remove");
1121 write:
1122 	fprintf(stderr,"Creating file for read load...\n");
1123 	if (!(fp = fopen(name, "w")))
1124 		terminal_error("fopen");
1125 	if (stat(name, &statbuf) == -1)
1126 		terminal_fileopen_error(fp, "stat");
1127 	if (statbuf.st_blksize < MIN_BLK_SIZE)
1128 		statbuf.st_blksize = MIN_BLK_SIZE;
1129 	bsize = statbuf.st_blksize;
1130 	if (!(buf = calloc(1, bsize)))
1131 		terminal_fileopen_error(fp, "calloc");
1132 	mem = ud.ram / (bsize / 1024);	/* kilobytes to blocks */
1133 
1134 	for (i = 0 ; i < mem; i++) {
1135 		if (fwrite(buf, bsize, 1, fp) != 1)
1136 			terminal_fileopen_error(fp, "fwrite");
1137 	}
1138 	if (fclose(fp) == -1)
1139 		terminal_error("fclose");
1140 	sync_flush();
1141 }
1142 
1143 void get_ram(void)
1144 {
1145         struct vmstats vms;
1146         size_t vms_size = sizeof(vms);
1147 
1148         if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0))
1149                 terminal_error("sysctlbyname: vm.vmstats");
1150 
1151 	ud.ram = vms.v_page_count * vms.v_page_size;
1152 	ud.ram /= 1024; /* linux size is in kB */
1153 	ud.swap = ud.ram; /* XXX: swap doesn't have to be the same as RAM */
1154 
1155 	if( !ud.ram || !ud.swap ) {
1156 		unsigned long i;
1157 		fprintf(stderr, "\nCould not get memory or swap size. ");
1158 		fprintf(stderr, "Will not perform mem_load\n");
1159 		for (i = 0 ; i < THREADS ; i++) {
1160 			if (threadlist[i].label == "Memload") {
1161 				threadlist[i].load = 0;
1162 				threadlist[i].rtload = 0;
1163 			}
1164 		}
1165 	}
1166 }
1167 
1168 void get_logfilename(void)
1169 {
1170 	struct tm *mytm;
1171 	struct utsname buf;
1172 	time_t t;
1173 	int year, month, day, hours, minutes;
1174 
1175 	time(&t);
1176 	if (uname(&buf) == -1)
1177 		terminal_error("uname");
1178 	if (!(mytm = localtime(&t)))
1179 		terminal_error("localtime");
1180 	year = mytm->tm_year + 1900;
1181 	month = mytm->tm_mon + 1;
1182 	day = mytm->tm_mday;
1183 	hours = mytm->tm_hour;
1184 	minutes = mytm->tm_min;
1185 	strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1186 
1187 	sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1188 		year, month, day, hours, minutes);
1189 	snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1190 }
1191 
1192 void start_thread(struct thread *th)
1193 {
1194 	post_sem(&th->sem.start);
1195 }
1196 
1197 void stop_thread(struct thread *th)
1198 {
1199 	post_sem(&th->sem.stop);
1200 	wait_sem(&th->sem.complete);
1201 
1202 	/* Kill the thread */
1203 	join_pthread(th->pthread, NULL);
1204 }
1205 
1206 void init_sem(sem_t *sem)
1207 {
1208 	if (sem_init(sem, 0, 0))
1209 		terminal_error("sem_init");
1210 }
1211 
1212 void init_all_sems(struct sems *s)
1213 {
1214 	/* Initialise the semaphores */
1215 	init_sem(&s->ready);
1216 	init_sem(&s->start);
1217 	init_sem(&s->stop);
1218 	init_sem(&s->complete);
1219 	init_sem(&s->stopchild);
1220 }
1221 
1222 void initialise_thread(int i)
1223 {
1224 	struct thread *th = &threadlist[i];
1225 
1226 	init_all_sems(&th->sem);
1227 	/* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1228 	create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1229 
1230 	wait_sem(&th->sem.ready);
1231 	/*
1232 	 * We set this pointer generically to NOT_BENCHING and set it to the
1233 	 * benchmarked array entry only on benched threads.
1234 	 */
1235 	th->dt = &th->benchmarks[NOT_BENCHING];
1236 	initialise_thread_data(th->dt);
1237 
1238 }
1239 
1240 /* A pseudo-semaphore for processes using a pipe */
1241 void wait_on(int pype)
1242 {
1243 	int retval, buf = 0;
1244 
1245 	retval = Read(pype, &buf, sizeof(buf));
1246 	if (retval == 0) {
1247 		fprintf(stderr, "\nread returned 0\n");
1248 		exit (1);
1249 	}
1250 }
1251 
1252 void wakeup_with(int pype)
1253 {
1254 	int retval, buf = 1;
1255 
1256 	retval = Write(pype, &buf, sizeof(buf));
1257 	if (retval == 0) {
1258 		fprintf(stderr, "\nwrite returned 0\n");
1259 		exit (1);
1260 	}
1261 }
1262 
1263 void run_loadchild(int j)
1264 {
1265 	struct thread *thj;
1266 	thj = &threadlist[j];
1267 
1268 	set_nice(ud.load_nice);
1269 	initialise_thread(j);
1270 
1271 	/* Tell main we're ready */
1272 	wakeup_with(l2m[1]);
1273 
1274 	/* Main tells us we're ready */
1275 	wait_on(m2l[0]);
1276 	start_thread(thj);
1277 
1278 	/* Tell main we received the start and are running */
1279 	wakeup_with(l2m[1]);
1280 
1281 	/* Main tells us to stop */
1282 	wait_on(m2l[0]);
1283 	stop_thread(thj);
1284 
1285 	/* Tell main we've finished */
1286 	wakeup_with(l2m[1]);
1287 	exit (0);
1288 }
1289 
1290 void run_benchchild(int i, int j)
1291 {
1292 	struct thread *thi;
1293 
1294 	thi = &threadlist[i];
1295 
1296 	set_nice(ud.bench_nice);
1297 	if (ud.do_rt)
1298 		set_mlock();
1299 	initialise_thread(i);
1300 	/* Point the data table to the appropriate load being tested */
1301 	thi->dt = &thi->benchmarks[j];
1302 	initialise_thread_data(thi->dt);
1303 	if (ud.do_rt)
1304 		set_thread_fifo(thi->pthread, 95);
1305 
1306 	/* Tell main we're ready */
1307 	wakeup_with(b2m[1]);
1308 
1309 	/* Main tells us we're ready */
1310 	wait_on(m2b[0]);
1311 	start_thread(thi);
1312 
1313 	/* Tell main we have started */
1314 	wakeup_with(b2m[1]);
1315 
1316 	/* Main tells us to stop */
1317 	wait_on(m2b[0]);
1318 	stop_thread(thi);
1319 
1320 	if (ud.do_rt) {
1321 		set_thread_normal(thi->pthread);
1322 		set_munlock();
1323 	}
1324 	show_latencies(thi);
1325 
1326 	/* Tell main we've finished */
1327 	wakeup_with(b2m[1]);
1328 	exit(0);
1329 }
1330 
1331 void bench(int i, int j)
1332 {
1333 	pid_t bench_pid, load_pid;
1334 
1335 	if ((load_pid = fork()) == -1)
1336 		terminal_error("fork");
1337 	if (!load_pid)
1338 		run_loadchild(j);
1339 
1340 	/* Wait for load process to be ready */
1341 
1342 	wait_on(l2m[0]);
1343 	if ((bench_pid = fork()) == -1)
1344 		terminal_error("fork");
1345 	if (!bench_pid)
1346 		run_benchchild(i, j);
1347 
1348 	/* Wait for bench process to be ready */
1349 	wait_on(b2m[0]);
1350 
1351 	/*
1352 	 * We want to be higher priority than everything to signal them to
1353 	 * stop and we lock our memory if we can as well
1354 	 */
1355 	set_fifo(99);
1356 	set_mlock();
1357 
1358 	/* Wakeup the load process */
1359 	wakeup_with(m2l[1]);
1360 	/* Load tells it has received the first message and is running */
1361 	wait_on(l2m[0]);
1362 
1363 	/* After a small delay, wake up the benched process */
1364 	sleep(1);
1365 	wakeup_with(m2b[1]);
1366 
1367 	/* Bench tells it has received the first message and is running */
1368 	wait_on(b2m[0]);
1369 	microsleep(ud.duration * 1000000);
1370 
1371 	/* Tell the benched process to stop its threads and output results */
1372 	wakeup_with(m2b[1]);
1373 
1374 	/* Tell the load process to stop its threads */
1375 	wakeup_with(m2l[1]);
1376 
1377 	/* Return to SCHED_NORMAL */
1378 	set_normal();
1379 	set_munlock();
1380 
1381 	/* Wait for load and bench processes to terminate */
1382 	wait_on(l2m[0]);
1383 	wait_on(b2m[0]);
1384 }
1385 
1386 void init_pipe(int *pype)
1387 {
1388 	if (pipe(pype) == -1)
1389 		terminal_error("pipe");
1390 }
1391 
1392 void init_pipes(void)
1393 {
1394 	init_pipe(m2l);
1395 	init_pipe(l2m);
1396 	init_pipe(m2b);
1397 	init_pipe(b2m);
1398 }
1399 
1400 void usage(void)
1401 {
1402 	/* Affinity commented out till working on all architectures */
1403 	fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1404 	fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1405 	fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1406 	fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1407 	fprintf(stderr, "\t[-h]\n\n");
1408 	fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1409 	fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1410 	fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1411 	fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1412 	fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1413 	//fprintf(stderr, " -u\tImitate uniprocessor\n");
1414 	fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1415 	fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1416 	fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1417 	fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1418 	fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1419 	fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1420 	fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1421 	fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1422 	fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1423 	fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1424 	fprintf(stderr, " -h\tShow this help\n");
1425 	fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1426 }
1427 
1428 #ifdef DEBUG
1429 void deadchild(int crap)
1430 {
1431 	pid_t retval;
1432 	int status;
1433 
1434 	crap = 0;
1435 
1436 	if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1437 		if (errno == ECHILD)
1438 			return;
1439 		terminal_error("waitpid");
1440 	}
1441 	if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1442 		return;
1443 	fprintf(stderr, "\nChild terminated abnormally ");
1444 	if (WIFSIGNALED(status))
1445 		fprintf(stderr, "with signal %d", WTERMSIG(status));
1446 	fprintf(stderr, "\n");
1447 	exit (1);
1448 }
1449 #endif
1450 
1451 int load_index(const char* loadname)
1452 {
1453 	int i;
1454 
1455 	for (i = 0 ; i < THREADS ; i++)
1456 		if (strcasecmp(loadname, threadlist[i].label) == 0)
1457 			return i;
1458 	return -1;
1459 }
1460 
1461 inline int bit_is_on(const unsigned int mask, int index)
1462 {
1463 	return (mask & (1 << index)) != 0;
1464 }
1465 
1466 inline void set_bit_on(unsigned int *mask, int index)
1467 {
1468 	*mask |= (1 << index);
1469 }
1470 
1471 int main(int argc, char **argv)
1472 {
1473 	unsigned long custom_cpu = 0;
1474 	int q, i, j, affinity, benchmark = 0;
1475 	unsigned int selected_loads = 0;
1476 	unsigned int excluded_loads = 0;
1477 	unsigned int selected_benches = 0;
1478 	unsigned int excluded_benches = 0;
1479 	FILE *fp;
1480 	/*
1481 	 * This file stores the loops_per_ms to be reused in a filename that
1482 	 * can't be confused
1483 	 */
1484 	char *fname = "interbench.loops_per_ms";
1485 	char *comment = NULL;
1486 #ifdef DEBUG
1487 	feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1488 	if (signal(SIGCHLD, deadchild) == SIG_ERR)
1489 		terminal_error("signal");
1490 #endif
1491 
1492 	while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1493 		switch (q) {
1494 			case 'h':
1495 				usage();
1496 				return (0);
1497 			case 'l':
1498 				ud.loops_per_ms = atoi(optarg);
1499 				break;
1500 			case 't':
1501 				ud.duration = atoi(optarg);
1502 				break;
1503 			case 'L':
1504 				ud.cpu_load = atoi(optarg);
1505 				break;
1506 			case 'B':
1507 				ud.bench_nice = atoi(optarg);
1508 				break;
1509 			case 'N':
1510 				ud.load_nice = atoi(optarg);
1511 				break;
1512 			case 'u':
1513 				affinity = 1;
1514 				break;
1515 			case 'b':
1516 				benchmark = 1;
1517 				break;
1518 			case 'c':
1519 				ud.log = 0;
1520 				break;
1521 			case 'r':
1522 				ud.do_rt = 1;
1523 				break;
1524 			case 'C':
1525 				custom_cpu = (unsigned long)atol(optarg);
1526 				break;
1527 			case 'I':
1528 				ud.custom_interval = atol(optarg);
1529 				break;
1530 			case 'm':
1531 				comment = optarg;
1532 				break;
1533 			case 'w':
1534 				i = load_index(optarg);
1535 				if (i == -1) {
1536 					fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1537 					return (-2);
1538 				}
1539 				set_bit_on(&selected_loads, i);
1540 				break;
1541 			case 'x':
1542 				i = load_index(optarg);
1543 				if (i == -1) {
1544 					fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1545 					return (-2);
1546 				}
1547 				set_bit_on(&excluded_loads, i);
1548 				break;
1549 			case 'W':
1550 				i = load_index(optarg);
1551 				if (i == -1) {
1552 					fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1553 					return (-2);
1554 				}
1555 				set_bit_on(&selected_benches, i);
1556 				break;
1557 			case 'X':
1558 				i = load_index(optarg);
1559 				if (i == -1) {
1560 					fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1561 					return (-2);
1562 				}
1563 				set_bit_on(&excluded_benches, i);
1564 				break;
1565 			default:
1566 				usage();
1567 				return (1);
1568 		}
1569 	}
1570 	argc -= optind;
1571 	argv += optind;
1572 	/* default is all loads */
1573 	if (selected_loads == 0)
1574 		selected_loads = (unsigned int)-1;
1575 	selected_loads &= ~excluded_loads;
1576 	/* default is all benches */
1577 	if (selected_benches == 0)
1578 		selected_benches = (unsigned int)-1;
1579 	selected_benches &= ~excluded_benches;
1580 
1581 	if (!test_fifo()) {
1582 		fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1583 		fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1584 		if (ud.do_rt) {
1585 			fprintf(stderr, "Real time tests were requested, aborting.\n");
1586 			exit (1);
1587 		}
1588 		fprintf(stderr, "Results will be unreliable.\n");
1589 	}
1590 	if (!ud.cpu_load) {
1591 		fprintf(stderr, "Invalid cpu load\n");
1592 		exit (1);
1593 	}
1594 
1595 	if ((custom_cpu && !ud.custom_interval) ||
1596 		(ud.custom_interval && !custom_cpu) ||
1597 		custom_cpu > 100) {
1598 			fprintf(stderr, "Invalid custom values, aborting.\n");
1599 			exit (1);
1600 	}
1601 
1602 	if (custom_cpu && ud.custom_interval) {
1603 		ud.custom_run = ud.custom_interval * custom_cpu / 100;
1604 		threadlist[CUSTOM].bench = 1;
1605 		threadlist[CUSTOM].load = 1;
1606 		threadlist[CUSTOM].rtbench = 1;
1607 		threadlist[CUSTOM].rtload = 1;
1608 	}
1609 
1610 	/*FIXME Affinity commented out till working on all architectures */
1611 #if 0
1612 	if (affinity) {
1613 #ifdef CPU_SET	/* Current glibc expects cpu_set_t */
1614 		cpu_set_t cpumask;
1615 
1616 		CPU_ZERO(&cpumask);
1617 		CPU_SET(0, &cpumask);
1618 #else		/* Old glibc expects unsigned long */
1619 		unsigned long cpumask = 1;
1620 #endif
1621 		if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1622 			if (errno != EPERM)
1623 				terminal_error("sched_setaffinity");
1624 			fprintf(stderr, "could not set cpu affinity\n");
1625 		}
1626 	}
1627 #endif
1628 
1629 	/* Make benchmark a multiple of 10 seconds for proper range of X loads */
1630 	if (ud.duration % 10)
1631 		ud.duration += 10 - ud.duration % 10;
1632 
1633 	if (benchmark)
1634 		ud.loops_per_ms = 0;
1635 	/*
1636 	 * Try to get loops_per_ms from command line first, file second, and
1637 	 * benchmark if not available.
1638 	 */
1639 	if (!ud.loops_per_ms) {
1640 		if (benchmark)
1641 			goto bench;
1642 		if ((fp = fopen(fname, "r"))) {
1643 			fscanf(fp, "%lu", &ud.loops_per_ms);
1644 			if (fclose(fp) == -1)
1645 				terminal_error("fclose");
1646 			if (ud.loops_per_ms) {
1647 				fprintf(stderr,
1648 					"%lu loops_per_ms read from file interbench.loops_per_ms\n",
1649 					ud.loops_per_ms);
1650 				goto loops_known;
1651 			}
1652 		} else
1653 			if (errno != ENOENT)
1654 				terminal_error("fopen");
1655 bench:
1656 		fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1657 
1658 		/*
1659 		 * To get as accurate a loop as possible we time it running
1660 		 * SCHED_FIFO if we can
1661 		 */
1662 		set_fifo(99);
1663 		calibrate_loop();
1664 		set_normal();
1665 	} else
1666 		fprintf(stderr, "loops_per_ms specified from command line\n");
1667 
1668 	if (!(fp = fopen(fname, "w"))) {
1669 		if (errno != EACCES)	/* No write access is not terminal */
1670 			terminal_error("fopen");
1671 		fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1672 		goto loops_known;
1673 	}
1674 	fprintf(fp, "%lu", ud.loops_per_ms);
1675 	fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1676 		ud.loops_per_ms);
1677 	if (fclose(fp) == -1)
1678 		terminal_error("fclose");
1679 
1680 loops_known:
1681 	get_ram();
1682 	get_logfilename();
1683 	create_read_file();
1684 	init_pipes();
1685 
1686 	if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1687 		if (errno != EACCES)
1688 			terminal_error("fopen");
1689 		fprintf(stderr, "Unable to write to logfile\n");
1690 		ud.log = 0;
1691 	}
1692 	log_output("\n");
1693 	log_output("Using %lu loops per ms, running every load for %d seconds\n",
1694 		ud.loops_per_ms, ud.duration);
1695 	log_output("Benchmarking kernel %s at datestamp %s\n",
1696 		ud.unamer, ud.datestamp);
1697 	if (comment)
1698 		log_output("Comment: %s\n", comment);
1699 	log_output("\n");
1700 
1701 	for (i = 0 ; i < THREADS ; i++)
1702 		threadlist[i].threadno = i;
1703 
1704 	for (i = 0 ; i < THREADS ; i++) {
1705 		struct thread *thi = &threadlist[i];
1706 		int *benchme;
1707 
1708 		if (ud.do_rt)
1709 			benchme = &threadlist[i].rtbench;
1710 		else
1711 			benchme = &threadlist[i].bench;
1712 
1713 		if (!*benchme || !bit_is_on(selected_benches, i))
1714 			continue;
1715 
1716 		log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1717 		if (ud.do_rt)
1718 			log_output("real time ");
1719 		else if (ud.bench_nice)
1720 			log_output("nice %d ", ud.bench_nice);
1721 		log_output("in the presence of simulated ");
1722 		if (ud.load_nice)
1723 			log_output("nice %d ", ud.load_nice);
1724 		log_output("---\n");
1725 
1726 		log_output("Load");
1727 		if (ud.do_rt)
1728 			log_output("\tLatency +/- SD (us)");
1729 		else
1730 			log_output("\tLatency +/- SD (ms)");
1731 		log_output("  Max Latency ");
1732 		log_output("  %% Desired CPU");
1733 		if (!thi->nodeadlines)
1734 			log_output("  %% Deadlines Met");
1735 		log_output("\n");
1736 
1737 		for (j = 0 ; j < THREADS ; j++) {
1738 			struct thread *thj = &threadlist[j];
1739 
1740 			if (j == i || !bit_is_on(selected_loads, j) ||
1741 				(!threadlist[j].load && !ud.do_rt) ||
1742 				(!threadlist[j].rtload && ud.do_rt))
1743 					continue;
1744 			log_output("%s\t", thj->label);
1745 			sync_flush();
1746 			bench(i, j);
1747 		}
1748 		log_output("\n");
1749 	}
1750 	log_output("\n");
1751 	if (ud.log)
1752 		fclose(ud.logfile);
1753 
1754 	return 0;
1755 }
1756