xref: /dragonfly/test/interbench/interbench.c (revision 0db87cb7)
1 /*******************************************
2  *
3  * Interbench - Interactivity benchmark
4  *
5  * Author:  Con Kolivas <kernel@kolivas.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  *******************************************/
22 
23 #define _GNU_SOURCE
24 #define _FILE_OFFSET_BITS 64	/* Large file support */
25 #define INTERBENCH_VERSION	"0.30"
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdarg.h>
30 #include <strings.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <sched.h>
35 #include <time.h>
36 #include <errno.h>
37 #include <semaphore.h>
38 #include <pthread.h>
39 #include <math.h>
40 #include <fenv.h>
41 #include <signal.h>
42 #include <sys/utsname.h>
43 #include <sys/time.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
46 #include <sys/mman.h>
47 #include <sys/wait.h>
48 #include <sys/stat.h>
49 #include <sys/sysctl.h>
50 #include <sys/vmmeter.h>
51 #include "interbench.h"
52 
53 #define MAX_UNAME_LENGTH	100
54 #define MAX_LOG_LENGTH		((MAX_UNAME_LENGTH) + 4)
55 #define MIN_BLK_SIZE		1024
56 #define DEFAULT_RESERVE		64
57 #define MB			(1024 * 1024)	/* 2^20 bytes */
58 #define KB			1024
59 #define MAX_MEM_IN_MB		(1024 * 64)	/* 64 GB */
60 
61 struct user_data {
62 	unsigned long loops_per_ms;
63 	unsigned long ram, swap;
64 	int duration;
65 	int do_rt;
66 	int bench_nice;
67 	int load_nice;
68 	unsigned long custom_run;
69 	unsigned long custom_interval;
70 	unsigned long cpu_load;
71 	char logfilename[MAX_LOG_LENGTH];
72 	int log;
73 	char unamer[MAX_UNAME_LENGTH];
74 	char datestamp[13];
75 	FILE *logfile;
76 } ud = {
77 	.duration = 30,
78 	.cpu_load = 4,
79 	.log = 1,
80 };
81 
82 /* Pipes main to/from load and bench processes */
83 static int m2l[2], l2m[2], m2b[2], b2m[2];
84 
85 /* Which member of becnhmarks is used when not benchmarking */
86 #define NOT_BENCHING	(THREADS)
87 #define CUSTOM		(THREADS - 1)
88 
89 /*
90  * To add another load or a benchmark you need to increment the value of
91  * THREADS, add a function prototype for your function and add an entry to
92  * the threadlist. To specify whether the function is a benchmark or a load
93  * set the benchmark and/or load flag as appropriate. The basic requirements
94  * of a new load can be seen by using emulate_none as a template.
95  */
96 
97 void emulate_none(struct thread *th);
98 void emulate_audio(struct thread *th);
99 void emulate_video(struct thread *th);
100 void emulate_x(struct thread *th);
101 void emulate_game(struct thread *th);
102 void emulate_burn(struct thread *th);
103 void emulate_write(struct thread *th);
104 void emulate_read(struct thread *th);
105 void emulate_ring(struct thread *th);
106 void emulate_compile(struct thread *th);
107 void emulate_memload(struct thread *th);
108 void emulate_hackbench(struct thread *th);
109 void emulate_custom(struct thread *th);
110 
111 struct thread threadlist[THREADS] = {
112 	{.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
113 	{.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
114 	{.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
115 	{.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
116 	{.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
117 	{.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
118 	{.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
119 	{.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
120 	{.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0},	/* No useful data from this */
121 	{.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
122 	{.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
123 	{.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0},	/* This is causing signal headaches */
124 	{.label = "Custom", .name = emulate_custom},	/* Leave custom as last entry */
125 };
126 
127 void init_sem(sem_t *sem);
128 void init_all_sems(struct sems *s);
129 void initialise_thread(int i);
130 void start_thread(struct thread *th);
131 void stop_thread(struct thread *th);
132 
133 void terminal_error(const char *name)
134 {
135 	fprintf(stderr, "\n");
136 	perror(name);
137 	exit (1);
138 }
139 
140 void terminal_fileopen_error(FILE *fp, char *name)
141 {
142 	if (fclose(fp) == -1)
143 		terminal_error("fclose");
144 	terminal_error(name);
145 }
146 
147 unsigned long long get_nsecs(struct timespec *myts)
148 {
149 	if (clock_gettime(CLOCK_REALTIME, myts))
150 		terminal_error("clock_gettime");
151 	return (myts->tv_sec * 1000000000 + myts->tv_nsec );
152 }
153 
154 unsigned long get_usecs(struct timespec *myts)
155 {
156 	if (clock_gettime(CLOCK_REALTIME, myts))
157 		terminal_error("clock_gettime");
158 	return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
159 }
160 
161 void set_fifo(int prio)
162 {
163 	struct sched_param sp;
164 
165 	memset(&sp, 0, sizeof(sp));
166 	sp.sched_priority = prio;
167 	if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
168 		if (errno != EPERM)
169 			terminal_error("sched_setscheduler");
170 	}
171 }
172 
173 void set_mlock(void)
174 {
175 	int mlockflags;
176 
177 	mlockflags = MCL_CURRENT | MCL_FUTURE;
178 #if 0
179 	mlockall(mlockflags);	/* Is not critical if this fails */
180 #endif
181 }
182 
183 void set_munlock(void)
184 {
185 #if 0
186 	if (munlockall() == -1)
187 		terminal_error("munlockall");
188 #endif
189 }
190 
191 void set_thread_fifo(pthread_t pthread, int prio)
192 {
193 	struct sched_param sp;
194 	memset(&sp, 0, sizeof(sp));
195 	sp.sched_priority = prio;
196 	if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
197 		terminal_error("pthread_setschedparam");
198 }
199 
200 void set_normal(void)
201 {
202 	struct sched_param sp;
203 	memset(&sp, 0, sizeof(sp));
204 	sp.sched_priority = 0;
205 	if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
206 		fprintf(stderr, "Weird, could not unset RT scheduling!\n");
207 	}
208 }
209 
210 void set_nice(int prio)
211 {
212 	if (setpriority(PRIO_PROCESS, 0, prio) == -1)
213 		terminal_error("setpriority");
214 }
215 
216 int test_fifo(void)
217 {
218 	struct sched_param sp;
219 	memset(&sp, 0, sizeof(sp));
220 	sp.sched_priority = 99;
221 	if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
222 		if (errno != EPERM)
223 			terminal_error("sched_setscheduler");
224 		goto out_fail;
225 	}
226 	if (sched_getscheduler(0) != SCHED_FIFO)
227 		goto out_fail;
228 	set_normal();
229 	return 1;
230 out_fail:
231 	set_normal();
232 	return 0;
233 }
234 
235 void set_thread_normal(pthread_t pthread)
236 {
237 	struct sched_param sp;
238 	memset(&sp, 0, sizeof(sp));
239 	sp.sched_priority = 0;
240 	if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
241 		terminal_error("pthread_setschedparam");
242 }
243 
244 void sync_flush(void)
245 {
246 	if ((fflush(NULL)) == EOF)
247 		terminal_error("fflush");
248 	sync();
249 	sync();
250 	sync();
251 }
252 
253 unsigned long compute_allocable_mem(void)
254 {
255 	unsigned long total = ud.ram + ud.swap;
256 	unsigned long usage = ud.ram * 110 / 100 ;
257 
258 	/* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
259 	if (total - DEFAULT_RESERVE < usage)
260 		usage = total - DEFAULT_RESERVE;
261 	usage /= 1024;	/* to megabytes */
262 	if (usage > 2930)
263 		usage = 2930;
264 	return usage;
265 }
266 
267 void burn_loops(unsigned long loops)
268 {
269 	unsigned long i;
270 
271 	/*
272 	 * We need some magic here to prevent the compiler from optimising
273 	 * this loop away. Otherwise trying to emulate a fixed cpu load
274 	 * with this loop will not work.
275 	 */
276 	for (i = 0 ; i < loops ; i++)
277 	     asm volatile("" : : : "memory");
278 }
279 
280 /* Use this many usecs of cpu time */
281 void burn_usecs(unsigned long usecs)
282 {
283 	unsigned long ms_loops;
284 
285 	ms_loops = ud.loops_per_ms / 1000 * usecs;
286 	burn_loops(ms_loops);
287 }
288 
289 void microsleep(unsigned long long usecs)
290 {
291 	struct timespec req, rem;
292 
293 	rem.tv_sec = rem.tv_nsec = 0;
294 
295 	req.tv_sec = usecs / 1000000;
296 	req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
297 continue_sleep:
298 	if ((nanosleep(&req, &rem)) == -1) {
299 		if (errno == EINTR) {
300 			if (rem.tv_sec || rem.tv_nsec) {
301 				req.tv_sec = rem.tv_sec;
302 				req.tv_nsec = rem.tv_nsec;
303 				goto continue_sleep;
304 			}
305 			goto out;
306 		}
307 		terminal_error("nanosleep");
308 	}
309 out:
310 	return;
311 }
312 
313 /*
314  * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
315  * handle it.
316  */
317 inline void post_sem(sem_t *s)
318 {
319 retry:
320 	if ((sem_post(s)) == -1) {
321 		if (errno == EINTR)
322 			goto retry;
323 		terminal_error("sem_post");
324 	}
325 }
326 
327 inline void wait_sem(sem_t *s)
328 {
329 retry:
330 	if ((sem_wait(s)) == -1) {
331 		if (errno == EINTR)
332 			goto retry;
333 		terminal_error("sem_wait");
334 	}
335 }
336 
337 inline int trywait_sem(sem_t *s)
338 {
339 	int ret;
340 
341 retry:
342 	if ((ret = sem_trywait(s)) == -1) {
343 		if (errno == EINTR)
344 			goto retry;
345 		if (errno != EAGAIN)
346 			terminal_error("sem_trywait");
347 	}
348 	return ret;
349 }
350 
351 inline ssize_t Read(int fd, void *buf, size_t count)
352 {
353 	ssize_t retval;
354 
355 retry:
356 	retval = read(fd, buf, count);
357 	if (retval == -1) {
358 		if (errno == EINTR)
359 			goto retry;
360 		terminal_error("read");
361 	}
362 	return retval;
363 }
364 
365 inline ssize_t Write(int fd, const void *buf, size_t count)
366 {
367 	ssize_t retval;
368 
369 retry:
370 	retval = write(fd, &buf, count);
371 	if (retval == -1) {
372 		if (errno == EINTR)
373 			goto retry;
374 		terminal_error("write");
375 	}
376 	return retval;
377 }
378 
379 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
380 	unsigned long interval_usecs, unsigned long long deadline)
381 {
382 	unsigned long long latency, missed_latency;
383 	unsigned long long current_time;
384 	struct tk_thread *tk;
385 	struct data_table *tb;
386 	struct timespec myts;
387 
388 	latency = 0;
389 	tb = th->dt;
390 	tk = &th->tkthread;
391 
392 	current_time = get_usecs(&myts);
393 	if (current_time > deadline + tk->slept_interval)
394 		latency = current_time - deadline- tk->slept_interval;
395 
396 	/* calculate the latency for missed frames */
397 	missed_latency = 0;
398 
399 	current_time = get_usecs(&myts);
400 	if (interval_usecs && current_time > deadline + interval_usecs) {
401 		/* We missed the deadline even before we consumed cpu */
402 		unsigned long intervals;
403 
404 		deadline += interval_usecs;
405 		intervals = (current_time - deadline) /
406 			interval_usecs + 1;
407 
408 		tb->missed_deadlines += intervals;
409 		missed_latency = intervals * interval_usecs;
410 		deadline += intervals * interval_usecs;
411 		tb->missed_burns += intervals;
412 		goto bypass_burn;
413 	}
414 
415 	burn_usecs(run_usecs);
416 	current_time = get_usecs(&myts);
417 	tb->achieved_burns++;
418 
419 	/*
420 	 * If we meet the deadline we move the deadline forward, otherwise
421 	 * we consider it a missed deadline and dropped frame etc.
422 	 */
423 	deadline += interval_usecs;
424 	if (deadline >= current_time) {
425 		tb->deadlines_met++;
426 	} else {
427 		if (interval_usecs) {
428 			unsigned long intervals = (current_time - deadline) /
429 				interval_usecs + 1;
430 
431 			tb->missed_deadlines += intervals;
432 			missed_latency = intervals * interval_usecs;
433 			deadline += intervals * interval_usecs;
434 			if (intervals > 1)
435 				tb->missed_burns += intervals;
436 		} else {
437 			deadline = current_time;
438 			goto out_nosleep;
439 		}
440 	}
441 bypass_burn:
442 	tk->sleep_interval = deadline - current_time;
443 
444 	post_sem(&tk->sem.start);
445 	wait_sem(&tk->sem.complete);
446 out_nosleep:
447 	/*
448 	 * Must add missed_latency to total here as this function may not be
449 	 * called again and the missed latency can be lost
450 	 */
451 	latency += missed_latency;
452 	if (latency > tb->max_latency)
453 		tb->max_latency = latency;
454 	tb->total_latency += latency;
455 	tb->sum_latency_squared += latency * latency;
456 	tb->nr_samples++;
457 
458 	return deadline;
459 }
460 
461 void initialise_thread_data(struct data_table *tb)
462 {
463 	tb->max_latency =
464 		tb->total_latency =
465 		tb->sum_latency_squared =
466 		tb->deadlines_met =
467 		tb->missed_deadlines =
468 		tb->missed_burns =
469 		tb->nr_samples = 0;
470 }
471 
472 void create_pthread(pthread_t  * thread, pthread_attr_t * attr,
473 	void * (*start_routine)(void *), void *arg)
474 {
475 	if (pthread_create(thread, attr, start_routine, arg))
476 		terminal_error("pthread_create");
477 }
478 
479 void join_pthread(pthread_t th, void **thread_return)
480 {
481 	if (pthread_join(th, thread_return))
482 		terminal_error("pthread_join");
483 }
484 
485 void emulate_none(struct thread *th)
486 {
487 	sem_t *s = &th->sem.stop;
488 	wait_sem(s);
489 }
490 
491 #define AUDIO_INTERVAL	(50000)
492 #define AUDIO_RUN	(AUDIO_INTERVAL / 20)
493 /* We emulate audio by using 5% cpu and waking every 50ms */
494 void emulate_audio(struct thread *th)
495 {
496 	unsigned long long deadline;
497 	sem_t *s = &th->sem.stop;
498 	struct timespec myts;
499 
500 	th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
501 	deadline = get_usecs(&myts);
502 
503 	while (1) {
504 		deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
505 			deadline);
506 		if (!trywait_sem(s))
507 			return;
508 	}
509 }
510 
511 /* We emulate video by using 40% cpu and waking for 60fps */
512 #define VIDEO_INTERVAL	(1000000 / 60)
513 #define VIDEO_RUN	(VIDEO_INTERVAL * 40 / 100)
514 void emulate_video(struct thread *th)
515 {
516 	unsigned long long deadline;
517 	sem_t *s = &th->sem.stop;
518 	struct timespec myts;
519 
520 	th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
521 	deadline = get_usecs(&myts);
522 
523 	while (1) {
524 		deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
525 			deadline);
526 		if (!trywait_sem(s))
527 			return;
528 	}
529 }
530 
531 /*
532  * We emulate X by running for a variable percentage of cpu from 0-100%
533  * in 1ms chunks.
534  */
535 void emulate_x(struct thread *th)
536 {
537 	unsigned long long deadline;
538 	sem_t *s = &th->sem.stop;
539 	struct timespec myts;
540 
541 	th->decasecond_deadlines = 100;
542 	deadline = get_usecs(&myts);
543 
544 	while (1) {
545 		int i, j;
546 		for (i = 0 ; i <= 100 ; i++) {
547 			j = 100 - i;
548 			deadline = periodic_schedule(th, i * 1000, j * 1000,
549 				deadline);
550 			deadline += i * 1000;
551 			if (!trywait_sem(s))
552 				return;
553 		}
554 	}
555 }
556 
557 /*
558  * We emulate gaming by using 100% cpu and seeing how many frames (jobs
559  * completed) we can do in that time. Deadlines are meaningless with
560  * unlocked frame rates. We do not use periodic schedule because for
561  * this load because this never wants to sleep.
562  */
563 #define GAME_INTERVAL	(100000)
564 #define GAME_RUN	(GAME_INTERVAL)
565 void emulate_game(struct thread *th)
566 {
567 	unsigned long long deadline, current_time, latency;
568 	sem_t *s = &th->sem.stop;
569 	struct timespec myts;
570 	struct data_table *tb;
571 
572 	tb = th->dt;
573 	th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
574 
575 	while (1) {
576 		deadline = get_usecs(&myts) + GAME_INTERVAL;
577 		burn_usecs(GAME_RUN);
578 		current_time = get_usecs(&myts);
579 		/* use usecs instead of simple count for game burn statistics */
580 		tb->achieved_burns += GAME_RUN;
581 		if (current_time > deadline) {
582 			latency = current_time - deadline;
583 			tb->missed_burns += latency;
584 		} else
585 			latency = 0;
586 		if (latency > tb->max_latency)
587 			tb->max_latency = latency;
588 		tb->total_latency += latency;
589 		tb->sum_latency_squared += latency * latency;
590 		tb->nr_samples++;
591 		if (!trywait_sem(s))
592 			return;
593 	}
594 }
595 
596 void *burn_thread(void *t)
597 {
598 	struct thread *th;
599 	sem_t *s;
600 	long i = (long)t;
601 
602 	th = &threadlist[i];
603 	s = &th->sem.stopchild;
604 
605 	while (1) {
606 		burn_loops(ud.loops_per_ms);
607 		if (!trywait_sem(s)) {
608 			post_sem(s);
609 			break;
610 		}
611 	}
612 	return NULL;
613 }
614 
615 /* Have ud.cpu_load threads burn cpu continuously */
616 void emulate_burn(struct thread *th)
617 {
618 	sem_t *s = &th->sem.stop;
619 	unsigned long i;
620 	long t;
621 	pthread_t burnthreads[ud.cpu_load];
622 
623 	t = th->threadno;
624 	for (i = 0 ; i < ud.cpu_load ; i++)
625 		create_pthread(&burnthreads[i], NULL, burn_thread,
626 			(void*)(long) t);
627 	wait_sem(s);
628 	post_sem(&th->sem.stopchild);
629 	for (i = 0 ; i < ud.cpu_load ; i++)
630 		join_pthread(burnthreads[i], NULL);
631 }
632 
633 /* Write a file the size of ram continuously */
634 void emulate_write(struct thread *th)
635 {
636 	sem_t *s = &th->sem.stop;
637 	FILE *fp;
638 	char *name = "interbench.write";
639 	void *buf = NULL;
640 	struct stat statbuf;
641 	unsigned long mem;
642 
643 	if (!(fp = fopen(name, "w")))
644 		terminal_error("fopen");
645 	if (stat(name, &statbuf) == -1)
646 		terminal_fileopen_error(fp, "stat");
647 	if (statbuf.st_blksize < MIN_BLK_SIZE)
648 		statbuf.st_blksize = MIN_BLK_SIZE;
649 	mem = ud.ram / (statbuf.st_blksize / 1024);	/* kilobytes to blocks */
650 	if (!(buf = calloc(1, statbuf.st_blksize)))
651 		terminal_fileopen_error(fp, "calloc");
652 	if (fclose(fp) == -1)
653 		terminal_error("fclose");
654 
655 	while (1) {
656 		unsigned int i;
657 
658 		if (!(fp = fopen(name, "w")))
659 			terminal_error("fopen");
660 		if (stat(name, &statbuf) == -1)
661 			terminal_fileopen_error(fp, "stat");
662 		for (i = 0 ; i < mem; i++) {
663 			if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
664 				terminal_fileopen_error(fp, "fwrite");
665 			if (!trywait_sem(s))
666 				goto out;
667 		}
668 		if (fclose(fp) == -1)
669 			terminal_error("fclose");
670 	}
671 
672 out:
673 	if (fclose(fp) == -1)
674 		terminal_error("fclose");
675 	if (remove(name) == -1)
676 		terminal_error("remove");
677 	sync_flush();
678 }
679 
680 /* Read a file the size of ram continuously */
681 void emulate_read(struct thread *th)
682 {
683 	sem_t *s = &th->sem.stop;
684 	char *name = "interbench.read";
685 	void *buf = NULL;
686 	struct stat statbuf;
687 	unsigned long bsize;
688 	int tmp;
689 
690 	if ((tmp = open(name, O_RDONLY)) == -1)
691 		terminal_error("open");
692 	if (stat(name, &statbuf) == -1)
693 		terminal_error("stat");
694 	bsize = statbuf.st_blksize;
695 	if (!(buf = malloc(bsize)))
696 		terminal_error("malloc");
697 
698 	while (1) {
699 		int rd;
700 
701 		/*
702 		 * We have to read the whole file before quitting the load
703 		 * to prevent the data being cached for the next read. This
704 		 * is also the reason the file is the size of physical ram.
705 		 */
706 		while ((rd = Read(tmp , buf, bsize)) > 0);
707 		if(!trywait_sem(s))
708 			return;
709 		if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
710 			terminal_error("lseek");
711 	}
712 }
713 
714 #define RINGTHREADS	4
715 
716 struct thread ringthreads[RINGTHREADS];
717 
718 void *ring_thread(void *t)
719 {
720 	struct thread *th;
721 	struct sems *s;
722 	int i, post_to;
723 
724 	i = (long)t;
725 	th = &ringthreads[i];
726 	s = &th->sem;
727 	post_to = i + 1;
728 	if (post_to == RINGTHREADS)
729 		post_to = 0;
730 	if (i == 0)
731 		post_sem(&s->ready);
732 
733 	while (1) {
734 		wait_sem(&s->start);
735 		post_sem(&ringthreads[post_to].sem.start);
736 		if (!trywait_sem(&s->stop))
737 			goto out;
738 	}
739 out:
740 	post_sem(&ringthreads[post_to].sem.start);
741 	post_sem(&s->complete);
742 	return NULL;
743 }
744 
745 /* Create a ring of 4 processes that wake each other up in a circle */
746 void emulate_ring(struct thread *th)
747 {
748 	sem_t *s = &th->sem.stop;
749 	int i;
750 
751 	for (i = 0 ; i < RINGTHREADS ; i++) {
752 		init_all_sems(&ringthreads[i].sem);
753 		create_pthread(&ringthreads[i].pthread, NULL,
754 			ring_thread, (void*)(long) i);
755 	}
756 
757 	wait_sem(&ringthreads[0].sem.ready);
758 	post_sem(&ringthreads[0].sem.start);
759 	wait_sem(s);
760 	for (i = 0 ; i < RINGTHREADS ; i++)
761 		post_sem(&ringthreads[i].sem.stop);
762 	for (i = 0 ; i < RINGTHREADS ; i++) {
763 		wait_sem(&ringthreads[i].sem.complete);
764 		join_pthread(ringthreads[i].pthread, NULL);
765 	}
766 }
767 
768 /* We emulate a compile by running burn, write and read threads simultaneously */
769 void emulate_compile(struct thread *th)
770 {
771 	sem_t *s = &th->sem.stop;
772 	unsigned long i, threads[3];
773 
774 	bzero(threads, 3 * sizeof(threads[0]));
775 
776 	for (i = 0 ; i < THREADS ; i++) {
777 		if (strcmp(threadlist[i].label, "Burn") == 0)
778 			threads[0] = i;
779 		if (strcmp(threadlist[i].label, "Write") == 0)
780 			threads[1] = i;
781 		if (strcmp(threadlist[i].label, "Read") == 0)
782 			threads[2] = i;
783 	}
784 	for (i = 0 ; i < 3 ; i++) {
785 		if (!threads[i]) {
786 			fprintf(stderr, "Can't find all threads for compile load\n");
787 			exit(1);
788 		}
789 	}
790 	for (i = 0 ; i < 3 ; i++) {
791 		initialise_thread(threads[i]);
792 		start_thread(&threadlist[threads[i]]);
793 	}
794 	wait_sem(s);
795 	for (i = 0 ; i < 3 ; i++)
796 		stop_thread(&threadlist[threads[i]]);
797 }
798 
799 int *grab_and_touch (char *block[], int i)
800 {
801 	block[i] = (char *) malloc(MB);
802 	if (!block[i])
803 		return NULL;
804 	return (memset(block[i], 1, MB));
805 }
806 
807 /* We emulate a memory load by allocating and torturing 110% of available ram */
808 void emulate_memload(struct thread *th)
809 {
810 	sem_t *s = &th->sem.stop;
811 	unsigned long touchable_mem, i;
812 	char *mem_block[MAX_MEM_IN_MB];
813 	void *success;
814 
815 	touchable_mem = compute_allocable_mem();
816 	/* loop until we're killed, frobbing memory in various perverted ways */
817 	while (1) {
818 		for (i = 0;  i < touchable_mem; i++) {
819 			success = grab_and_touch(mem_block, i);
820 			if (!success) {
821 				touchable_mem = i-1;
822 				break;
823 			}
824 		}
825 		if (!trywait_sem(s))
826 			goto out_freemem;
827 		for (i = 0;  i < touchable_mem; i++) {
828 			memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
829 				touchable_mem], MB);
830 			if (!trywait_sem(s))
831 				goto out_freemem;
832 		}
833 		for (i = 0; i < touchable_mem; i++) {
834 			free(mem_block[i]);
835 		}
836 		if (!trywait_sem(s))
837 			goto out;
838 	}
839 out_freemem:
840 	for (i = 0; i < touchable_mem; i++)
841 		free(mem_block[i]);
842 out:
843 	return;
844 }
845 
846 struct thread hackthread;
847 
848 void emulate_hackbench(struct thread *th)
849 {
850 	sem_t *s = &th->sem.stop;
851 
852 	init_all_sems(&hackthread.sem);
853 	create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
854 
855 	wait_sem(s);
856 
857 	post_sem(&hackthread.sem.stop);
858 	wait_sem(&hackthread.sem.complete);
859 
860 	join_pthread(hackthread.pthread, NULL);
861 }
862 
863 #define CUSTOM_INTERVAL	(ud.custom_interval)
864 #define CUSTOM_RUN	(ud.custom_run)
865 void emulate_custom(struct thread *th)
866 {
867 	unsigned long long deadline;
868 	sem_t *s = &th->sem.stop;
869 	struct timespec myts;
870 
871 	th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
872 	deadline = get_usecs(&myts);
873 
874 	while (1) {
875 		deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
876 			deadline);
877 		if (!trywait_sem(s))
878 			return;
879 	}
880 }
881 
882 void *timekeeping_thread(void *t)
883 {
884 	struct thread *th;
885 	struct tk_thread *tk;
886 	struct sems *s;
887 	struct timespec myts;
888 	long i = (long)t;
889 
890 	th = &threadlist[i];
891 	tk = &th->tkthread;
892 	s = &th->tkthread.sem;
893 	/*
894 	 * If this timekeeping thread is that of a benchmarked thread we run
895 	 * even higher priority than the benched thread is if running real
896 	 * time. Otherwise, the load timekeeping thread, which does not need
897 	 * accurate accounting remains SCHED_NORMAL;
898 	 */
899 	if (th->dt != &th->benchmarks[NOT_BENCHING])
900 		set_fifo(96);
901 	/* These values must be changed at the appropriate places or race */
902 	tk->sleep_interval = tk->slept_interval = 0;
903 	post_sem(&s->ready);
904 
905 	while (1) {
906 		unsigned long start_time, now;
907 
908 		if (!trywait_sem(&s->stop))
909 			goto out;
910 		wait_sem(&s->start);
911 		tk->slept_interval = 0;
912 		start_time = get_usecs(&myts);
913 		if (!trywait_sem(&s->stop))
914 			goto out;
915 		if (tk->sleep_interval) {
916 			unsigned long diff = 0;
917 			microsleep(tk->sleep_interval);
918 			now = get_usecs(&myts);
919 			/* now should always be > start_time but... */
920 			if (now > start_time) {
921 				diff = now - start_time;
922 				if (diff > tk->sleep_interval)
923 					tk->slept_interval = diff -
924 						tk->sleep_interval;
925 			}
926 		}
927 		tk->sleep_interval = 0;
928 		post_sem(&s->complete);
929 	}
930 out:
931 	return NULL;
932 }
933 
934 /*
935  * All the sleep functions such as nanosleep can only guarantee that they
936  * sleep for _at least_ the time requested. We work around this by having
937  * a high priority real time thread that accounts for the extra time slept
938  * in nanosleep. This allows wakeup latency of the tested thread to be
939  * accurate and reflect true scheduling delays.
940  */
941 void *emulation_thread(void *t)
942 {
943 	struct thread *th;
944 	struct tk_thread *tk;
945 	struct sems *s, *tks;
946 	long i = (long)t;
947 
948 	th = &threadlist[i];
949 	tk = &th->tkthread;
950 	s = &th->sem;
951 	tks = &tk->sem;
952 	init_all_sems(tks);
953 
954 	/* Start the timekeeping thread */
955 	create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
956 		(void*)(long) i);
957 	/* Wait for timekeeping thread to be ready */
958 	wait_sem(&tks->ready);
959 
960 	/* Tell main we're ready to start*/
961 	post_sem(&s->ready);
962 
963 	/* Wait for signal from main to start thread */
964 	wait_sem(&s->start);
965 
966 	/* Start the actual function being benched/or running as load */
967 	th->name(th);
968 
969 	/* Stop the timekeeping thread */
970 	post_sem(&tks->stop);
971 	post_sem(&tks->start);
972 	join_pthread(th->tk_pthread, NULL);
973 
974 	/* Tell main we've finished */
975 	post_sem(&s->complete);
976 	return NULL;
977 }
978 
979 /*
980  * In an unoptimised loop we try to benchmark how many meaningless loops
981  * per second we can perform on this hardware to fairly accurately
982  * reproduce certain percentage cpu usage
983  */
984 void calibrate_loop(void)
985 {
986 	unsigned long long start_time, loops_per_msec, run_time = 0;
987 	unsigned long loops;
988 	struct timespec myts;
989 
990 	loops_per_msec = 100000;
991 redo:
992 	/* Calibrate to within 1% accuracy */
993 	while (run_time > 1010000 || run_time < 990000) {
994 		loops = loops_per_msec;
995 		start_time = get_nsecs(&myts);
996 		burn_loops(loops);
997 		run_time = get_nsecs(&myts) - start_time;
998 		loops_per_msec = (1000000 * loops_per_msec / run_time ? :
999 			loops_per_msec);
1000 	}
1001 
1002 	/* Rechecking after a pause increases reproducibility */
1003 	sleep(1);
1004 	loops = loops_per_msec;
1005 	start_time = get_nsecs(&myts);
1006 	burn_loops(loops);
1007 	run_time = get_nsecs(&myts) - start_time;
1008 
1009 	/* Tolerate 5% difference on checking */
1010 	if (run_time > 1050000 || run_time < 950000)
1011 		goto redo;
1012 
1013 	ud.loops_per_ms = loops_per_msec;
1014 }
1015 
1016 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1017 
1018 /* Output to console +/- logfile */
1019 void log_output(const char *format, ...)
1020 {
1021 	va_list ap;
1022 
1023 	va_start(ap, format);
1024 	if (vprintf(format, ap) == -1)
1025 		terminal_error("vprintf");
1026 	va_end(ap);
1027 	if (ud.log) {
1028 		va_start(ap, format);
1029 		if (vfprintf(ud.logfile, format, ap) == -1)
1030 			terminal_error("vpfrintf");
1031 		va_end(ap);
1032 	}
1033 	fflush(NULL);
1034 }
1035 
1036 /* Calculate statistics and output them */
1037 void show_latencies(struct thread *th)
1038 {
1039 	struct data_table *tbj;
1040 	struct tk_thread *tk;
1041 	double average_latency, deadlines_met, samples_met, sd, max_latency;
1042 	long double variance = 0;
1043 
1044 	tbj = th->dt;
1045 	tk = &th->tkthread;
1046 
1047 	if (tbj->nr_samples > 1) {
1048 		average_latency = tbj->total_latency / tbj->nr_samples;
1049 		variance = (tbj->sum_latency_squared - (average_latency *
1050 			average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1051 		sd = sqrt((double)variance);
1052 	} else {
1053 		average_latency = tbj->total_latency;
1054 		sd = 0.0;
1055 	}
1056 
1057 	/*
1058 	 * Landing on the boundary of a deadline can make loaded runs appear
1059 	 * to do more work than unloaded due to tiny duration differences.
1060 	 */
1061 	if (tbj->achieved_burns > 0)
1062 		samples_met = (double)tbj->achieved_burns /
1063 		    (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1064 	else
1065 		samples_met = 0.0;
1066 	max_latency = tbj->max_latency;
1067 	/* When benchmarking rt we represent the data in us */
1068 	if (!ud.do_rt) {
1069 		average_latency /= 1000;
1070 		sd /= 1000;
1071 		max_latency /= 1000;
1072 	}
1073 	if (tbj->deadlines_met == 0)
1074 		deadlines_met = 0;
1075 	else
1076 		deadlines_met = (double)tbj->deadlines_met /
1077 		    (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1078 
1079 	/* Messy nonsense to format the output nicely */
1080 	if (average_latency >= 100)
1081 		log_output("%7.0f +/- ", average_latency);
1082 	else
1083 		log_output("%7.3g +/- ", average_latency);
1084 	if (sd >= 100)
1085 		log_output("%-9.0f", sd);
1086 	else
1087 		log_output("%-9.3g", sd);
1088 	if (max_latency >= 100)
1089 		log_output("%7.0f\t", max_latency);
1090 	else
1091 		log_output("%7.3g\t", max_latency);
1092 	log_output("\t%4.3g", samples_met);
1093 	if (!th->nodeadlines)
1094 		log_output("\t%11.3g", deadlines_met);
1095 	log_output("\n");
1096 	sync_flush();
1097 }
1098 
1099 void create_read_file(void)
1100 {
1101 	unsigned int i;
1102 	FILE *fp;
1103 	char *name = "interbench.read";
1104 	void *buf = NULL;
1105 	struct stat statbuf;
1106 	unsigned long mem, bsize;
1107 	int tmp;
1108 
1109 	if ((tmp = open(name, O_RDONLY)) == -1) {
1110 		if (errno != ENOENT)
1111 			terminal_error("open");
1112 		goto write;
1113 	}
1114 	if (stat(name, &statbuf) == -1)
1115 		terminal_error("stat");
1116 	if (statbuf.st_blksize < MIN_BLK_SIZE)
1117 		statbuf.st_blksize = MIN_BLK_SIZE;
1118 	bsize = statbuf.st_blksize;
1119 	if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1120 		return;
1121 	if (remove(name) == -1)
1122 		terminal_error("remove");
1123 write:
1124 	fprintf(stderr,"Creating file for read load...\n");
1125 	if (!(fp = fopen(name, "w")))
1126 		terminal_error("fopen");
1127 	if (stat(name, &statbuf) == -1)
1128 		terminal_fileopen_error(fp, "stat");
1129 	if (statbuf.st_blksize < MIN_BLK_SIZE)
1130 		statbuf.st_blksize = MIN_BLK_SIZE;
1131 	bsize = statbuf.st_blksize;
1132 	if (!(buf = calloc(1, bsize)))
1133 		terminal_fileopen_error(fp, "calloc");
1134 	mem = ud.ram / (bsize / 1024);	/* kilobytes to blocks */
1135 
1136 	for (i = 0 ; i < mem; i++) {
1137 		if (fwrite(buf, bsize, 1, fp) != 1)
1138 			terminal_fileopen_error(fp, "fwrite");
1139 	}
1140 	if (fclose(fp) == -1)
1141 		terminal_error("fclose");
1142 	sync_flush();
1143 }
1144 
1145 void get_ram(void)
1146 {
1147         struct vmstats vms;
1148         size_t vms_size = sizeof(vms);
1149 
1150         if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0))
1151                 terminal_error("sysctlbyname: vm.vmstats");
1152 
1153 	ud.ram = vms.v_page_count * vms.v_page_size;
1154 	ud.ram /= 1024; /* linux size is in kB */
1155 	ud.swap = ud.ram; /* XXX: swap doesn't have to be the same as RAM */
1156 
1157 	if( !ud.ram || !ud.swap ) {
1158 		unsigned long i;
1159 		fprintf(stderr, "\nCould not get memory or swap size. ");
1160 		fprintf(stderr, "Will not perform mem_load\n");
1161 		for (i = 0 ; i < THREADS ; i++) {
1162 			if (strcmp(threadlist[i].label, "Memload") == 0) {
1163 				threadlist[i].load = 0;
1164 				threadlist[i].rtload = 0;
1165 			}
1166 		}
1167 	}
1168 }
1169 
1170 void get_logfilename(void)
1171 {
1172 	struct tm *mytm;
1173 	struct utsname buf;
1174 	time_t t;
1175 	int year, month, day, hours, minutes;
1176 
1177 	time(&t);
1178 	if (uname(&buf) == -1)
1179 		terminal_error("uname");
1180 	if (!(mytm = localtime(&t)))
1181 		terminal_error("localtime");
1182 	year = mytm->tm_year + 1900;
1183 	month = mytm->tm_mon + 1;
1184 	day = mytm->tm_mday;
1185 	hours = mytm->tm_hour;
1186 	minutes = mytm->tm_min;
1187 	strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1188 
1189 	sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1190 		year, month, day, hours, minutes);
1191 	snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1192 }
1193 
1194 void start_thread(struct thread *th)
1195 {
1196 	post_sem(&th->sem.start);
1197 }
1198 
1199 void stop_thread(struct thread *th)
1200 {
1201 	post_sem(&th->sem.stop);
1202 	wait_sem(&th->sem.complete);
1203 
1204 	/* Kill the thread */
1205 	join_pthread(th->pthread, NULL);
1206 }
1207 
1208 void init_sem(sem_t *sem)
1209 {
1210 	if (sem_init(sem, 0, 0))
1211 		terminal_error("sem_init");
1212 }
1213 
1214 void init_all_sems(struct sems *s)
1215 {
1216 	/* Initialise the semaphores */
1217 	init_sem(&s->ready);
1218 	init_sem(&s->start);
1219 	init_sem(&s->stop);
1220 	init_sem(&s->complete);
1221 	init_sem(&s->stopchild);
1222 }
1223 
1224 void initialise_thread(int i)
1225 {
1226 	struct thread *th = &threadlist[i];
1227 
1228 	init_all_sems(&th->sem);
1229 	/* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1230 	create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1231 
1232 	wait_sem(&th->sem.ready);
1233 	/*
1234 	 * We set this pointer generically to NOT_BENCHING and set it to the
1235 	 * benchmarked array entry only on benched threads.
1236 	 */
1237 	th->dt = &th->benchmarks[NOT_BENCHING];
1238 	initialise_thread_data(th->dt);
1239 
1240 }
1241 
1242 /* A pseudo-semaphore for processes using a pipe */
1243 void wait_on(int pype)
1244 {
1245 	int retval, buf = 0;
1246 
1247 	retval = Read(pype, &buf, sizeof(buf));
1248 	if (retval == 0) {
1249 		fprintf(stderr, "\nread returned 0\n");
1250 		exit (1);
1251 	}
1252 }
1253 
1254 void wakeup_with(int pype)
1255 {
1256 	int retval, buf = 1;
1257 
1258 	retval = Write(pype, &buf, sizeof(buf));
1259 	if (retval == 0) {
1260 		fprintf(stderr, "\nwrite returned 0\n");
1261 		exit (1);
1262 	}
1263 }
1264 
1265 void run_loadchild(int j)
1266 {
1267 	struct thread *thj;
1268 	thj = &threadlist[j];
1269 
1270 	set_nice(ud.load_nice);
1271 	initialise_thread(j);
1272 
1273 	/* Tell main we're ready */
1274 	wakeup_with(l2m[1]);
1275 
1276 	/* Main tells us we're ready */
1277 	wait_on(m2l[0]);
1278 	start_thread(thj);
1279 
1280 	/* Tell main we received the start and are running */
1281 	wakeup_with(l2m[1]);
1282 
1283 	/* Main tells us to stop */
1284 	wait_on(m2l[0]);
1285 	stop_thread(thj);
1286 
1287 	/* Tell main we've finished */
1288 	wakeup_with(l2m[1]);
1289 	exit (0);
1290 }
1291 
1292 void run_benchchild(int i, int j)
1293 {
1294 	struct thread *thi;
1295 
1296 	thi = &threadlist[i];
1297 
1298 	set_nice(ud.bench_nice);
1299 	if (ud.do_rt)
1300 		set_mlock();
1301 	initialise_thread(i);
1302 	/* Point the data table to the appropriate load being tested */
1303 	thi->dt = &thi->benchmarks[j];
1304 	initialise_thread_data(thi->dt);
1305 	if (ud.do_rt)
1306 		set_thread_fifo(thi->pthread, 95);
1307 
1308 	/* Tell main we're ready */
1309 	wakeup_with(b2m[1]);
1310 
1311 	/* Main tells us we're ready */
1312 	wait_on(m2b[0]);
1313 	start_thread(thi);
1314 
1315 	/* Tell main we have started */
1316 	wakeup_with(b2m[1]);
1317 
1318 	/* Main tells us to stop */
1319 	wait_on(m2b[0]);
1320 	stop_thread(thi);
1321 
1322 	if (ud.do_rt) {
1323 		set_thread_normal(thi->pthread);
1324 		set_munlock();
1325 	}
1326 	show_latencies(thi);
1327 
1328 	/* Tell main we've finished */
1329 	wakeup_with(b2m[1]);
1330 	exit(0);
1331 }
1332 
1333 void bench(int i, int j)
1334 {
1335 	pid_t bench_pid, load_pid;
1336 
1337 	if ((load_pid = fork()) == -1)
1338 		terminal_error("fork");
1339 	if (!load_pid)
1340 		run_loadchild(j);
1341 
1342 	/* Wait for load process to be ready */
1343 
1344 	wait_on(l2m[0]);
1345 	if ((bench_pid = fork()) == -1)
1346 		terminal_error("fork");
1347 	if (!bench_pid)
1348 		run_benchchild(i, j);
1349 
1350 	/* Wait for bench process to be ready */
1351 	wait_on(b2m[0]);
1352 
1353 	/*
1354 	 * We want to be higher priority than everything to signal them to
1355 	 * stop and we lock our memory if we can as well
1356 	 */
1357 	set_fifo(99);
1358 	set_mlock();
1359 
1360 	/* Wakeup the load process */
1361 	wakeup_with(m2l[1]);
1362 	/* Load tells it has received the first message and is running */
1363 	wait_on(l2m[0]);
1364 
1365 	/* After a small delay, wake up the benched process */
1366 	sleep(1);
1367 	wakeup_with(m2b[1]);
1368 
1369 	/* Bench tells it has received the first message and is running */
1370 	wait_on(b2m[0]);
1371 	microsleep(ud.duration * 1000000);
1372 
1373 	/* Tell the benched process to stop its threads and output results */
1374 	wakeup_with(m2b[1]);
1375 
1376 	/* Tell the load process to stop its threads */
1377 	wakeup_with(m2l[1]);
1378 
1379 	/* Return to SCHED_NORMAL */
1380 	set_normal();
1381 	set_munlock();
1382 
1383 	/* Wait for load and bench processes to terminate */
1384 	wait_on(l2m[0]);
1385 	wait_on(b2m[0]);
1386 }
1387 
1388 void init_pipe(int *pype)
1389 {
1390 	if (pipe(pype) == -1)
1391 		terminal_error("pipe");
1392 }
1393 
1394 void init_pipes(void)
1395 {
1396 	init_pipe(m2l);
1397 	init_pipe(l2m);
1398 	init_pipe(m2b);
1399 	init_pipe(b2m);
1400 }
1401 
1402 void usage(void)
1403 {
1404 	/* Affinity commented out till working on all architectures */
1405 	fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1406 	fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1407 	fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1408 	fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1409 	fprintf(stderr, "\t[-h]\n\n");
1410 	fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1411 	fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1412 	fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1413 	fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1414 	fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1415 	//fprintf(stderr, " -u\tImitate uniprocessor\n");
1416 	fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1417 	fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1418 	fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1419 	fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1420 	fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1421 	fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1422 	fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1423 	fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1424 	fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1425 	fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1426 	fprintf(stderr, " -h\tShow this help\n");
1427 	fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1428 }
1429 
1430 #ifdef DEBUG
1431 void deadchild(int crap)
1432 {
1433 	pid_t retval;
1434 	int status;
1435 
1436 	crap = 0;
1437 
1438 	if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1439 		if (errno == ECHILD)
1440 			return;
1441 		terminal_error("waitpid");
1442 	}
1443 	if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1444 		return;
1445 	fprintf(stderr, "\nChild terminated abnormally ");
1446 	if (WIFSIGNALED(status))
1447 		fprintf(stderr, "with signal %d", WTERMSIG(status));
1448 	fprintf(stderr, "\n");
1449 	exit (1);
1450 }
1451 #endif
1452 
1453 int load_index(const char* loadname)
1454 {
1455 	int i;
1456 
1457 	for (i = 0 ; i < THREADS ; i++)
1458 		if (strcasecmp(loadname, threadlist[i].label) == 0)
1459 			return i;
1460 	return -1;
1461 }
1462 
1463 inline int bit_is_on(const unsigned int mask, int index)
1464 {
1465 	return (mask & (1 << index)) != 0;
1466 }
1467 
1468 inline void set_bit_on(unsigned int *mask, int index)
1469 {
1470 	*mask |= (1 << index);
1471 }
1472 
1473 int main(int argc, char **argv)
1474 {
1475 	unsigned long custom_cpu = 0;
1476 	int q, i, j, affinity, benchmark = 0;
1477 	unsigned int selected_loads = 0;
1478 	unsigned int excluded_loads = 0;
1479 	unsigned int selected_benches = 0;
1480 	unsigned int excluded_benches = 0;
1481 	FILE *fp;
1482 	/*
1483 	 * This file stores the loops_per_ms to be reused in a filename that
1484 	 * can't be confused
1485 	 */
1486 	char *fname = "interbench.loops_per_ms";
1487 	char *comment = NULL;
1488 #ifdef DEBUG
1489 	feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1490 	if (signal(SIGCHLD, deadchild) == SIG_ERR)
1491 		terminal_error("signal");
1492 #endif
1493 
1494 	while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1495 		switch (q) {
1496 			case 'h':
1497 				usage();
1498 				return (0);
1499 			case 'l':
1500 				ud.loops_per_ms = atoi(optarg);
1501 				break;
1502 			case 't':
1503 				ud.duration = atoi(optarg);
1504 				break;
1505 			case 'L':
1506 				ud.cpu_load = atoi(optarg);
1507 				break;
1508 			case 'B':
1509 				ud.bench_nice = atoi(optarg);
1510 				break;
1511 			case 'N':
1512 				ud.load_nice = atoi(optarg);
1513 				break;
1514 			case 'u':
1515 				affinity = 1;
1516 				break;
1517 			case 'b':
1518 				benchmark = 1;
1519 				break;
1520 			case 'c':
1521 				ud.log = 0;
1522 				break;
1523 			case 'r':
1524 				ud.do_rt = 1;
1525 				break;
1526 			case 'C':
1527 				custom_cpu = (unsigned long)atol(optarg);
1528 				break;
1529 			case 'I':
1530 				ud.custom_interval = atol(optarg);
1531 				break;
1532 			case 'm':
1533 				comment = optarg;
1534 				break;
1535 			case 'w':
1536 				i = load_index(optarg);
1537 				if (i == -1) {
1538 					fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1539 					return (-2);
1540 				}
1541 				set_bit_on(&selected_loads, i);
1542 				break;
1543 			case 'x':
1544 				i = load_index(optarg);
1545 				if (i == -1) {
1546 					fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1547 					return (-2);
1548 				}
1549 				set_bit_on(&excluded_loads, i);
1550 				break;
1551 			case 'W':
1552 				i = load_index(optarg);
1553 				if (i == -1) {
1554 					fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1555 					return (-2);
1556 				}
1557 				set_bit_on(&selected_benches, i);
1558 				break;
1559 			case 'X':
1560 				i = load_index(optarg);
1561 				if (i == -1) {
1562 					fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1563 					return (-2);
1564 				}
1565 				set_bit_on(&excluded_benches, i);
1566 				break;
1567 			default:
1568 				usage();
1569 				return (1);
1570 		}
1571 	}
1572 	argc -= optind;
1573 	argv += optind;
1574 	/* default is all loads */
1575 	if (selected_loads == 0)
1576 		selected_loads = (unsigned int)-1;
1577 	selected_loads &= ~excluded_loads;
1578 	/* default is all benches */
1579 	if (selected_benches == 0)
1580 		selected_benches = (unsigned int)-1;
1581 	selected_benches &= ~excluded_benches;
1582 
1583 	if (!test_fifo()) {
1584 		fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1585 		fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1586 		if (ud.do_rt) {
1587 			fprintf(stderr, "Real time tests were requested, aborting.\n");
1588 			exit (1);
1589 		}
1590 		fprintf(stderr, "Results will be unreliable.\n");
1591 	}
1592 	if (!ud.cpu_load) {
1593 		fprintf(stderr, "Invalid cpu load\n");
1594 		exit (1);
1595 	}
1596 
1597 	if ((custom_cpu && !ud.custom_interval) ||
1598 		(ud.custom_interval && !custom_cpu) ||
1599 		custom_cpu > 100) {
1600 			fprintf(stderr, "Invalid custom values, aborting.\n");
1601 			exit (1);
1602 	}
1603 
1604 	if (custom_cpu && ud.custom_interval) {
1605 		ud.custom_run = ud.custom_interval * custom_cpu / 100;
1606 		threadlist[CUSTOM].bench = 1;
1607 		threadlist[CUSTOM].load = 1;
1608 		threadlist[CUSTOM].rtbench = 1;
1609 		threadlist[CUSTOM].rtload = 1;
1610 	}
1611 
1612 	/*FIXME Affinity commented out till working on all architectures */
1613 #if 0
1614 	if (affinity) {
1615 #ifdef CPU_SET	/* Current glibc expects cpu_set_t */
1616 		cpu_set_t cpumask;
1617 
1618 		CPU_ZERO(&cpumask);
1619 		CPU_SET(0, &cpumask);
1620 #else		/* Old glibc expects unsigned long */
1621 		unsigned long cpumask = 1;
1622 #endif
1623 		if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1624 			if (errno != EPERM)
1625 				terminal_error("sched_setaffinity");
1626 			fprintf(stderr, "could not set cpu affinity\n");
1627 		}
1628 	}
1629 #endif
1630 
1631 	/* Make benchmark a multiple of 10 seconds for proper range of X loads */
1632 	if (ud.duration % 10)
1633 		ud.duration += 10 - ud.duration % 10;
1634 
1635 	if (benchmark)
1636 		ud.loops_per_ms = 0;
1637 	/*
1638 	 * Try to get loops_per_ms from command line first, file second, and
1639 	 * benchmark if not available.
1640 	 */
1641 	if (!ud.loops_per_ms) {
1642 		if (benchmark)
1643 			goto bench;
1644 		if ((fp = fopen(fname, "r"))) {
1645 			fscanf(fp, "%lu", &ud.loops_per_ms);
1646 			if (fclose(fp) == -1)
1647 				terminal_error("fclose");
1648 			if (ud.loops_per_ms) {
1649 				fprintf(stderr,
1650 					"%lu loops_per_ms read from file interbench.loops_per_ms\n",
1651 					ud.loops_per_ms);
1652 				goto loops_known;
1653 			}
1654 		} else
1655 			if (errno != ENOENT)
1656 				terminal_error("fopen");
1657 bench:
1658 		fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1659 
1660 		/*
1661 		 * To get as accurate a loop as possible we time it running
1662 		 * SCHED_FIFO if we can
1663 		 */
1664 		set_fifo(99);
1665 		calibrate_loop();
1666 		set_normal();
1667 	} else
1668 		fprintf(stderr, "loops_per_ms specified from command line\n");
1669 
1670 	if (!(fp = fopen(fname, "w"))) {
1671 		if (errno != EACCES)	/* No write access is not terminal */
1672 			terminal_error("fopen");
1673 		fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1674 		goto loops_known;
1675 	}
1676 	fprintf(fp, "%lu", ud.loops_per_ms);
1677 	fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1678 		ud.loops_per_ms);
1679 	if (fclose(fp) == -1)
1680 		terminal_error("fclose");
1681 
1682 loops_known:
1683 	get_ram();
1684 	get_logfilename();
1685 	create_read_file();
1686 	init_pipes();
1687 
1688 	if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1689 		if (errno != EACCES)
1690 			terminal_error("fopen");
1691 		fprintf(stderr, "Unable to write to logfile\n");
1692 		ud.log = 0;
1693 	}
1694 	log_output("\n");
1695 	log_output("Using %lu loops per ms, running every load for %d seconds\n",
1696 		ud.loops_per_ms, ud.duration);
1697 	log_output("Benchmarking kernel %s at datestamp %s\n",
1698 		ud.unamer, ud.datestamp);
1699 	if (comment)
1700 		log_output("Comment: %s\n", comment);
1701 	log_output("\n");
1702 
1703 	for (i = 0 ; i < THREADS ; i++)
1704 		threadlist[i].threadno = i;
1705 
1706 	for (i = 0 ; i < THREADS ; i++) {
1707 		struct thread *thi = &threadlist[i];
1708 		int *benchme;
1709 
1710 		if (ud.do_rt)
1711 			benchme = &threadlist[i].rtbench;
1712 		else
1713 			benchme = &threadlist[i].bench;
1714 
1715 		if (!*benchme || !bit_is_on(selected_benches, i))
1716 			continue;
1717 
1718 		log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1719 		if (ud.do_rt)
1720 			log_output("real time ");
1721 		else if (ud.bench_nice)
1722 			log_output("nice %d ", ud.bench_nice);
1723 		log_output("in the presence of simulated ");
1724 		if (ud.load_nice)
1725 			log_output("nice %d ", ud.load_nice);
1726 		log_output("---\n");
1727 
1728 		log_output("Load");
1729 		if (ud.do_rt)
1730 			log_output("\tLatency +/- SD (us)");
1731 		else
1732 			log_output("\tLatency +/- SD (ms)");
1733 		log_output("  Max Latency ");
1734 		log_output("  %% Desired CPU");
1735 		if (!thi->nodeadlines)
1736 			log_output("  %% Deadlines Met");
1737 		log_output("\n");
1738 
1739 		for (j = 0 ; j < THREADS ; j++) {
1740 			struct thread *thj = &threadlist[j];
1741 
1742 			if (j == i || !bit_is_on(selected_loads, j) ||
1743 				(!threadlist[j].load && !ud.do_rt) ||
1744 				(!threadlist[j].rtload && ud.do_rt))
1745 					continue;
1746 			log_output("%s\t", thj->label);
1747 			sync_flush();
1748 			bench(i, j);
1749 		}
1750 		log_output("\n");
1751 	}
1752 	log_output("\n");
1753 	if (ud.log)
1754 		fclose(ud.logfile);
1755 
1756 	return 0;
1757 }
1758