1 /*******************************************
2  *
3  * Interbench - Interactivity benchmark
4  *
5  * Author:  Con Kolivas <kernel@kolivas.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  *******************************************/
22 
23 #define _GNU_SOURCE
24 #define _FILE_OFFSET_BITS 64	/* Large file support */
25 #define INTERBENCH_VERSION	"0.31"
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdarg.h>
30 #include <strings.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <sched.h>
35 #include <time.h>
36 #include <errno.h>
37 #include <semaphore.h>
38 #include <pthread.h>
39 #include <math.h>
40 #include <fenv.h>
41 #include <signal.h>
42 #include <sys/utsname.h>
43 #include <sys/time.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <sys/mman.h>
48 #include <sys/wait.h>
49 #if defined(__FreeBSD__) || defined(__DragonFly__)
50 #include <sys/sysctl.h>
51 #endif
52 #include "interbench.h"
53 
54 #define MAX_UNAME_LENGTH	100
55 #define MAX_LOG_LENGTH		((MAX_UNAME_LENGTH) + 4)
56 #define MIN_BLK_SIZE		1024
57 #define DEFAULT_RESERVE		64
58 #define MB			(1024 * 1024)	/* 2^20 bytes */
59 #define KB			1024
60 #define MAX_MEM_IN_MB		(1024 * 64)	/* 64 GB */
61 
62 struct user_data {
63 	unsigned long loops_per_ms;
64 	unsigned long ram, swap;
65 	int duration;
66 	int do_rt;
67 	int bench_nice;
68 	int load_nice;
69 	unsigned long custom_run;
70 	unsigned long custom_interval;
71 	unsigned long cpu_load;
72 	char logfilename[MAX_LOG_LENGTH];
73 	int log;
74 	char unamer[MAX_UNAME_LENGTH];
75 	char datestamp[13];
76 	FILE *logfile;
77 } ud = {
78 	.duration = 30,
79 	.cpu_load = 4,
80 	.log = 1,
81 };
82 
83 /* Pipes main to/from load and bench processes */
84 static int m2l[2], l2m[2], m2b[2], b2m[2];
85 
86 /* Which member of becnhmarks is used when not benchmarking */
87 #define NOT_BENCHING	(THREADS)
88 #define CUSTOM		(THREADS - 1)
89 
90 /*
91  * To add another load or a benchmark you need to increment the value of
92  * THREADS, add a function prototype for your function and add an entry to
93  * the threadlist. To specify whether the function is a benchmark or a load
94  * set the benchmark and/or load flag as appropriate. The basic requirements
95  * of a new load can be seen by using emulate_none as a template.
96  */
97 
98 void emulate_none(struct thread *th);
99 void emulate_audio(struct thread *th);
100 void emulate_video(struct thread *th);
101 void emulate_x(struct thread *th);
102 void emulate_game(struct thread *th);
103 void emulate_burn(struct thread *th);
104 void emulate_write(struct thread *th);
105 void emulate_read(struct thread *th);
106 void emulate_ring(struct thread *th);
107 void emulate_compile(struct thread *th);
108 void emulate_memload(struct thread *th);
109 void emulate_hackbench(struct thread *th);
110 void emulate_custom(struct thread *th);
111 
112 struct thread threadlist[THREADS] = {
113 	{.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
114 	{.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
115 	{.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
116 	{.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
117 	{.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
118 	{.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
119 	{.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
120 	{.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
121 	{.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0},	/* No useful data from this */
122 	{.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
123 	{.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
124 	{.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0},	/* This is causing signal headaches */
125 	{.label = "Custom", .name = emulate_custom},	/* Leave custom as last entry */
126 };
127 
128 void init_sem(sem_t *sem);
129 void init_all_sems(struct sems *s);
130 void initialise_thread(int i);
131 void start_thread(struct thread *th);
132 void stop_thread(struct thread *th);
133 
terminal_error(const char * name)134 void terminal_error(const char *name)
135 {
136 	fprintf(stderr, "\n");
137 	perror(name);
138 	exit (1);
139 }
140 
terminal_fileopen_error(FILE * fp,char * name)141 void terminal_fileopen_error(FILE *fp, char *name)
142 {
143 	if (fclose(fp) == -1)
144 		terminal_error("fclose");
145 	terminal_error(name);
146 }
147 
get_nsecs(struct timespec * myts)148 unsigned long long get_nsecs(struct timespec *myts)
149 {
150 	if (clock_gettime(CLOCK_REALTIME, myts))
151 		terminal_error("clock_gettime");
152 	return (myts->tv_sec * 1000000000 + myts->tv_nsec );
153 }
154 
get_usecs(struct timespec * myts)155 unsigned long get_usecs(struct timespec *myts)
156 {
157 	if (clock_gettime(CLOCK_REALTIME, myts))
158 		terminal_error("clock_gettime");
159 	return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
160 }
161 
set_fifo(int prio)162 void set_fifo(int prio)
163 {
164 	struct sched_param sp;
165 
166 	memset(&sp, 0, sizeof(sp));
167 	sp.sched_priority = prio;
168 	if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
169 		if (errno != EPERM)
170 			terminal_error("sched_setscheduler");
171 	}
172 }
173 
set_mlock(void)174 void set_mlock(void)
175 {
176 	int mlockflags;
177 
178 	mlockflags = MCL_CURRENT | MCL_FUTURE;
179 	mlockall(mlockflags);	/* Is not critical if this fails */
180 }
181 
set_munlock(void)182 void set_munlock(void)
183 {
184 	if (munlockall() == -1)
185 		terminal_error("munlockall");
186 }
187 
set_thread_fifo(pthread_t pthread,int prio)188 void set_thread_fifo(pthread_t pthread, int prio)
189 {
190 	struct sched_param sp;
191 	memset(&sp, 0, sizeof(sp));
192 	sp.sched_priority = prio;
193 	if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
194 		terminal_error("pthread_setschedparam");
195 }
196 
set_normal(void)197 void set_normal(void)
198 {
199 	struct sched_param sp;
200 	memset(&sp, 0, sizeof(sp));
201 	sp.sched_priority = 0;
202 	if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
203 		fprintf(stderr, "Weird, could not unset RT scheduling!\n");
204 	}
205 }
206 
set_nice(int prio)207 void set_nice(int prio)
208 {
209 	if (setpriority(PRIO_PROCESS, 0, prio) == -1)
210 		terminal_error("setpriority");
211 }
212 
test_fifo(void)213 int test_fifo(void)
214 {
215 	struct sched_param sp;
216 	memset(&sp, 0, sizeof(sp));
217 	sp.sched_priority = sched_get_priority_max(SCHED_FIFO);
218 	if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
219 		if (errno != EPERM)
220 			terminal_error("sched_setscheduler");
221 		goto out_fail;
222 	}
223 	if (sched_getscheduler(0) != SCHED_FIFO)
224 		goto out_fail;
225 	set_normal();
226 	return 1;
227 out_fail:
228 	set_normal();
229 	return 0;
230 }
231 
set_thread_normal(pthread_t pthread)232 void set_thread_normal(pthread_t pthread)
233 {
234 	struct sched_param sp;
235 	memset(&sp, 0, sizeof(sp));
236 	sp.sched_priority = 0;
237 	if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
238 		terminal_error("pthread_setschedparam");
239 }
240 
sync_flush(void)241 void sync_flush(void)
242 {
243 	if ((fflush(NULL)) == EOF)
244 		terminal_error("fflush");
245 	sync();
246 	sync();
247 	sync();
248 }
249 
compute_allocable_mem(void)250 unsigned long compute_allocable_mem(void)
251 {
252 	unsigned long total = ud.ram + ud.swap;
253 	unsigned long usage = ud.ram * 110 / 100 ;
254 
255 	/* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
256 	if (total - DEFAULT_RESERVE < usage)
257 		usage = total - DEFAULT_RESERVE;
258 	usage /= 1024;	/* to megabytes */
259 	if (usage > 2930)
260 		usage = 2930;
261 	return usage;
262 }
263 
burn_loops(unsigned long loops)264 void burn_loops(unsigned long loops)
265 {
266 	unsigned long i;
267 
268 	/*
269 	 * We need some magic here to prevent the compiler from optimising
270 	 * this loop away. Otherwise trying to emulate a fixed cpu load
271 	 * with this loop will not work.
272 	 */
273 	for (i = 0 ; i < loops ; i++)
274 	     asm volatile("" : : : "memory");
275 }
276 
277 /* Use this many usecs of cpu time */
burn_usecs(unsigned long usecs)278 void burn_usecs(unsigned long usecs)
279 {
280 	unsigned long ms_loops;
281 
282 	ms_loops = ud.loops_per_ms / 1000 * usecs;
283 	burn_loops(ms_loops);
284 }
285 
microsleep(unsigned long long usecs)286 void microsleep(unsigned long long usecs)
287 {
288 	struct timespec req, rem;
289 
290 	rem.tv_sec = rem.tv_nsec = 0;
291 
292 	req.tv_sec = usecs / 1000000;
293 	req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
294 continue_sleep:
295 	if ((nanosleep(&req, &rem)) == -1) {
296 		if (errno == EINTR) {
297 			if (rem.tv_sec || rem.tv_nsec) {
298 				req.tv_sec = rem.tv_sec;
299 				req.tv_nsec = rem.tv_nsec;
300 				goto continue_sleep;
301 			}
302 			goto out;
303 		}
304 		terminal_error("nanosleep");
305 	}
306 out:
307 	return;
308 }
309 
310 /*
311  * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
312  * handle it.
313  */
post_sem(sem_t * s)314 inline void post_sem(sem_t *s)
315 {
316 retry:
317 	if ((sem_post(s)) == -1) {
318 		if (errno == EINTR)
319 			goto retry;
320 		terminal_error("sem_post");
321 	}
322 }
323 
wait_sem(sem_t * s)324 inline void wait_sem(sem_t *s)
325 {
326 retry:
327 	if ((sem_wait(s)) == -1) {
328 		if (errno == EINTR)
329 			goto retry;
330 		terminal_error("sem_wait");
331 	}
332 }
333 
trywait_sem(sem_t * s)334 inline int trywait_sem(sem_t *s)
335 {
336 	int ret;
337 
338 retry:
339 	if ((ret = sem_trywait(s)) == -1) {
340 		if (errno == EINTR)
341 			goto retry;
342 		if (errno != EAGAIN)
343 			terminal_error("sem_trywait");
344 	}
345 	return ret;
346 }
347 
Read(int fd,void * buf,size_t count)348 inline ssize_t Read(int fd, void *buf, size_t count)
349 {
350 	ssize_t retval;
351 
352 retry:
353 	retval = read(fd, buf, count);
354 	if (retval == -1) {
355 		if (errno == EINTR)
356 			goto retry;
357 		terminal_error("read");
358 	}
359 	return retval;
360 }
361 
Write(int fd,const void * buf,size_t count)362 inline ssize_t Write(int fd, const void *buf, size_t count)
363 {
364 	ssize_t retval;
365 
366 retry:
367 	retval = write(fd, &buf, count);
368 	if (retval == -1) {
369 		if (errno == EINTR)
370 			goto retry;
371 		terminal_error("write");
372 	}
373 	return retval;
374 }
375 
periodic_schedule(struct thread * th,unsigned long run_usecs,unsigned long interval_usecs,unsigned long long deadline)376 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
377 	unsigned long interval_usecs, unsigned long long deadline)
378 {
379 	unsigned long long latency, missed_latency;
380 	unsigned long long current_time;
381 	struct tk_thread *tk;
382 	struct data_table *tb;
383 	struct timespec myts;
384 
385 	latency = 0;
386 	tb = th->dt;
387 	tk = &th->tkthread;
388 
389 	current_time = get_usecs(&myts);
390 	if (current_time > deadline + tk->slept_interval)
391 		latency = current_time - deadline- tk->slept_interval;
392 
393 	/* calculate the latency for missed frames */
394 	missed_latency = 0;
395 
396 	current_time = get_usecs(&myts);
397 	if (interval_usecs && current_time > deadline + interval_usecs) {
398 		/* We missed the deadline even before we consumed cpu */
399 		unsigned long intervals;
400 
401 		deadline += interval_usecs;
402 		intervals = (current_time - deadline) /
403 			interval_usecs + 1;
404 
405 		tb->missed_deadlines += intervals;
406 		missed_latency = intervals * interval_usecs;
407 		deadline += intervals * interval_usecs;
408 		tb->missed_burns += intervals;
409 		goto bypass_burn;
410 	}
411 
412 	burn_usecs(run_usecs);
413 	current_time = get_usecs(&myts);
414 	tb->achieved_burns++;
415 
416 	/*
417 	 * If we meet the deadline we move the deadline forward, otherwise
418 	 * we consider it a missed deadline and dropped frame etc.
419 	 */
420 	deadline += interval_usecs;
421 	if (deadline >= current_time) {
422 		tb->deadlines_met++;
423 	} else {
424 		if (interval_usecs) {
425 			unsigned long intervals = (current_time - deadline) /
426 				interval_usecs + 1;
427 
428 			tb->missed_deadlines += intervals;
429 			missed_latency = intervals * interval_usecs;
430 			deadline += intervals * interval_usecs;
431 			if (intervals > 1)
432 				tb->missed_burns += intervals;
433 		} else {
434 			deadline = current_time;
435 			goto out_nosleep;
436 		}
437 	}
438 bypass_burn:
439 	tk->sleep_interval = deadline - current_time;
440 
441 	post_sem(&tk->sem.start);
442 	wait_sem(&tk->sem.complete);
443 out_nosleep:
444 	/*
445 	 * Must add missed_latency to total here as this function may not be
446 	 * called again and the missed latency can be lost
447 	 */
448 	latency += missed_latency;
449 	if (latency > tb->max_latency)
450 		tb->max_latency = latency;
451 	tb->total_latency += latency;
452 	tb->sum_latency_squared += latency * latency;
453 	tb->nr_samples++;
454 
455 	return deadline;
456 }
457 
initialise_thread_data(struct data_table * tb)458 void initialise_thread_data(struct data_table *tb)
459 {
460 	tb->max_latency =
461 		tb->total_latency =
462 		tb->sum_latency_squared =
463 		tb->deadlines_met =
464 		tb->missed_deadlines =
465 		tb->missed_burns =
466 		tb->nr_samples = 0;
467 }
468 
create_pthread(pthread_t * thread,pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)469 void create_pthread(pthread_t  * thread, pthread_attr_t * attr,
470 	void * (*start_routine)(void *), void *arg)
471 {
472 	if (pthread_create(thread, attr, start_routine, arg))
473 		terminal_error("pthread_create");
474 }
475 
join_pthread(pthread_t th,void ** thread_return)476 void join_pthread(pthread_t th, void **thread_return)
477 {
478 	if (pthread_join(th, thread_return))
479 		terminal_error("pthread_join");
480 }
481 
emulate_none(struct thread * th)482 void emulate_none(struct thread *th)
483 {
484 	sem_t *s = &th->sem.stop;
485 	wait_sem(s);
486 }
487 
488 #define AUDIO_INTERVAL	(50000)
489 #define AUDIO_RUN	(AUDIO_INTERVAL / 20)
490 /* We emulate audio by using 5% cpu and waking every 50ms */
emulate_audio(struct thread * th)491 void emulate_audio(struct thread *th)
492 {
493 	unsigned long long deadline;
494 	sem_t *s = &th->sem.stop;
495 	struct timespec myts;
496 
497 	th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
498 	deadline = get_usecs(&myts);
499 
500 	while (1) {
501 		deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
502 			deadline);
503 		if (!trywait_sem(s))
504 			return;
505 	}
506 }
507 
508 /* We emulate video by using 40% cpu and waking for 60fps */
509 #define VIDEO_INTERVAL	(1000000 / 60)
510 #define VIDEO_RUN	(VIDEO_INTERVAL * 40 / 100)
emulate_video(struct thread * th)511 void emulate_video(struct thread *th)
512 {
513 	unsigned long long deadline;
514 	sem_t *s = &th->sem.stop;
515 	struct timespec myts;
516 
517 	th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
518 	deadline = get_usecs(&myts);
519 
520 	while (1) {
521 		deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
522 			deadline);
523 		if (!trywait_sem(s))
524 			return;
525 	}
526 }
527 
528 /*
529  * We emulate X by running for a variable percentage of cpu from 0-100%
530  * in 1ms chunks.
531  */
emulate_x(struct thread * th)532 void emulate_x(struct thread *th)
533 {
534 	unsigned long long deadline;
535 	sem_t *s = &th->sem.stop;
536 	struct timespec myts;
537 
538 	th->decasecond_deadlines = 100;
539 	deadline = get_usecs(&myts);
540 
541 	while (1) {
542 		int i, j;
543 		for (i = 0 ; i <= 100 ; i++) {
544 			j = 100 - i;
545 			deadline = periodic_schedule(th, i * 1000, j * 1000,
546 				deadline);
547 			deadline += i * 1000;
548 			if (!trywait_sem(s))
549 				return;
550 		}
551 	}
552 }
553 
554 /*
555  * We emulate gaming by using 100% cpu and seeing how many frames (jobs
556  * completed) we can do in that time. Deadlines are meaningless with
557  * unlocked frame rates. We do not use periodic schedule because for
558  * this load because this never wants to sleep.
559  */
560 #define GAME_INTERVAL	(100000)
561 #define GAME_RUN	(GAME_INTERVAL)
emulate_game(struct thread * th)562 void emulate_game(struct thread *th)
563 {
564 	unsigned long long deadline, current_time, latency;
565 	sem_t *s = &th->sem.stop;
566 	struct timespec myts;
567 	struct data_table *tb;
568 
569 	tb = th->dt;
570 	th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
571 
572 	while (1) {
573 		deadline = get_usecs(&myts) + GAME_INTERVAL;
574 		burn_usecs(GAME_RUN);
575 		current_time = get_usecs(&myts);
576 		/* use usecs instead of simple count for game burn statistics */
577 		tb->achieved_burns += GAME_RUN;
578 		if (current_time > deadline) {
579 			latency = current_time - deadline;
580 			tb->missed_burns += latency;
581 		} else
582 			latency = 0;
583 		if (latency > tb->max_latency)
584 			tb->max_latency = latency;
585 		tb->total_latency += latency;
586 		tb->sum_latency_squared += latency * latency;
587 		tb->nr_samples++;
588 		if (!trywait_sem(s))
589 			return;
590 	}
591 }
592 
burn_thread(void * t)593 void *burn_thread(void *t)
594 {
595 	struct thread *th;
596 	sem_t *s;
597 	long i = (long)t;
598 
599 	th = &threadlist[i];
600 	s = &th->sem.stopchild;
601 
602 	while (1) {
603 		burn_loops(ud.loops_per_ms);
604 		if (!trywait_sem(s)) {
605 			post_sem(s);
606 			break;
607 		}
608 	}
609 	return NULL;
610 }
611 
612 /* Have ud.cpu_load threads burn cpu continuously */
emulate_burn(struct thread * th)613 void emulate_burn(struct thread *th)
614 {
615 	sem_t *s = &th->sem.stop;
616 	unsigned long i;
617 	long t;
618 	pthread_t burnthreads[ud.cpu_load];
619 
620 	t = th->threadno;
621 	for (i = 0 ; i < ud.cpu_load ; i++)
622 		create_pthread(&burnthreads[i], NULL, burn_thread,
623 			(void*)(long) t);
624 	wait_sem(s);
625 	post_sem(&th->sem.stopchild);
626 	for (i = 0 ; i < ud.cpu_load ; i++)
627 		join_pthread(burnthreads[i], NULL);
628 }
629 
630 /* Write a file the size of ram continuously */
emulate_write(struct thread * th)631 void emulate_write(struct thread *th)
632 {
633 	sem_t *s = &th->sem.stop;
634 	FILE *fp;
635 	char *name = "interbench.write";
636 	void *buf = NULL;
637 	struct stat statbuf;
638 	unsigned long mem;
639 
640 	if (!(fp = fopen(name, "w")))
641 		terminal_error("fopen");
642 	if (stat(name, &statbuf) == -1)
643 		terminal_fileopen_error(fp, "stat");
644 	if (statbuf.st_blksize < MIN_BLK_SIZE)
645 		statbuf.st_blksize = MIN_BLK_SIZE;
646 	mem = ud.ram / (statbuf.st_blksize / 1024);	/* kilobytes to blocks */
647 	if (!(buf = calloc(1, statbuf.st_blksize)))
648 		terminal_fileopen_error(fp, "calloc");
649 	if (fclose(fp) == -1)
650 		terminal_error("fclose");
651 
652 	while (1) {
653 		unsigned int i;
654 
655 		if (!(fp = fopen(name, "w")))
656 			terminal_error("fopen");
657 		if (stat(name, &statbuf) == -1)
658 			terminal_fileopen_error(fp, "stat");
659 		for (i = 0 ; i < mem; i++) {
660 			if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
661 				terminal_fileopen_error(fp, "fwrite");
662 			if (!trywait_sem(s))
663 				goto out;
664 		}
665 		if (fclose(fp) == -1)
666 			terminal_error("fclose");
667 	}
668 
669 out:
670 	if (fclose(fp) == -1)
671 		terminal_error("fclose");
672 	if (remove(name) == -1)
673 		terminal_error("remove");
674 	sync_flush();
675 }
676 
677 /* Read a file the size of ram continuously */
emulate_read(struct thread * th)678 void emulate_read(struct thread *th)
679 {
680 	sem_t *s = &th->sem.stop;
681 	char *name = "interbench.read";
682 	void *buf = NULL;
683 	struct stat statbuf;
684 	unsigned long bsize;
685 	int tmp;
686 
687 	if ((tmp = open(name, O_RDONLY)) == -1)
688 		terminal_error("open");
689 	if (stat(name, &statbuf) == -1)
690 		terminal_error("stat");
691 	bsize = statbuf.st_blksize;
692 	if (!(buf = malloc(bsize)))
693 		terminal_error("malloc");
694 
695 	while (1) {
696 		int rd;
697 
698 		/*
699 		 * We have to read the whole file before quitting the load
700 		 * to prevent the data being cached for the next read. This
701 		 * is also the reason the file is the size of physical ram.
702 		 */
703 		while ((rd = Read(tmp , buf, bsize)) > 0);
704 		if(!trywait_sem(s))
705 			return;
706 		if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
707 			terminal_error("lseek");
708 	}
709 }
710 
711 #define RINGTHREADS	4
712 
713 struct thread ringthreads[RINGTHREADS];
714 
ring_thread(void * t)715 void *ring_thread(void *t)
716 {
717 	struct thread *th;
718 	struct sems *s;
719 	int i, post_to;
720 
721 	i = (long)t;
722 	th = &ringthreads[i];
723 	s = &th->sem;
724 	post_to = i + 1;
725 	if (post_to == RINGTHREADS)
726 		post_to = 0;
727 	if (i == 0)
728 		post_sem(&s->ready);
729 
730 	while (1) {
731 		wait_sem(&s->start);
732 		post_sem(&ringthreads[post_to].sem.start);
733 		if (!trywait_sem(&s->stop))
734 			goto out;
735 	}
736 out:
737 	post_sem(&ringthreads[post_to].sem.start);
738 	post_sem(&s->complete);
739 	return NULL;
740 }
741 
742 /* Create a ring of 4 processes that wake each other up in a circle */
emulate_ring(struct thread * th)743 void emulate_ring(struct thread *th)
744 {
745 	sem_t *s = &th->sem.stop;
746 	int i;
747 
748 	for (i = 0 ; i < RINGTHREADS ; i++) {
749 		init_all_sems(&ringthreads[i].sem);
750 		create_pthread(&ringthreads[i].pthread, NULL,
751 			ring_thread, (void*)(long) i);
752 	}
753 
754 	wait_sem(&ringthreads[0].sem.ready);
755 	post_sem(&ringthreads[0].sem.start);
756 	wait_sem(s);
757 	for (i = 0 ; i < RINGTHREADS ; i++)
758 		post_sem(&ringthreads[i].sem.stop);
759 	for (i = 0 ; i < RINGTHREADS ; i++) {
760 		wait_sem(&ringthreads[i].sem.complete);
761 		join_pthread(ringthreads[i].pthread, NULL);
762 	}
763 }
764 
765 /* We emulate a compile by running burn, write and read threads simultaneously */
emulate_compile(struct thread * th)766 void emulate_compile(struct thread *th)
767 {
768 	sem_t *s = &th->sem.stop;
769 	unsigned long i, threads[3];
770 
771 	for (i = 0 ; i < THREADS ; i++) {
772 		if (!strcmp(threadlist[i].label, "Burn"))
773 			threads[0] = i;
774 		if (!strcmp(threadlist[i].label, "Write"))
775 			threads[1] = i;
776 		if (!strcmp(threadlist[i].label, "Read"))
777 			threads[2] = i;
778 	}
779 	for (i = 0 ; i < 3 ; i++) {
780 		if (!threads[i]) {
781 			fprintf(stderr, "Can't find all threads for compile load\n");
782 			exit(1);
783 		}
784 	}
785 	for (i = 0 ; i < 3 ; i++) {
786 		initialise_thread(threads[i]);
787 		start_thread(&threadlist[threads[i]]);
788 	}
789 	wait_sem(s);
790 	for (i = 0 ; i < 3 ; i++)
791 		stop_thread(&threadlist[threads[i]]);
792 }
793 
grab_and_touch(char * block[],int i)794 int *grab_and_touch (char *block[], int i)
795 {
796 	block[i] = (char *) malloc(MB);
797 	if (!block[i])
798 		return NULL;
799 	return (memset(block[i], 1, MB));
800 }
801 
802 /* We emulate a memory load by allocating and torturing 110% of available ram */
emulate_memload(struct thread * th)803 void emulate_memload(struct thread *th)
804 {
805 	sem_t *s = &th->sem.stop;
806 	unsigned long touchable_mem, i;
807 	char *mem_block[MAX_MEM_IN_MB];
808 	void *success;
809 
810 	touchable_mem = compute_allocable_mem();
811 	/* loop until we're killed, frobbing memory in various perverted ways */
812 	while (1) {
813 		for (i = 0;  i < touchable_mem; i++) {
814 			success = grab_and_touch(mem_block, i);
815 			if (!success) {
816 				touchable_mem = i-1;
817 				break;
818 			}
819 		}
820 		if (!trywait_sem(s))
821 			goto out_freemem;
822 		for (i = 0;  i < touchable_mem; i++) {
823 			memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
824 				touchable_mem], MB);
825 			if (!trywait_sem(s))
826 				goto out_freemem;
827 		}
828 		for (i = 0; i < touchable_mem; i++) {
829 			free(mem_block[i]);
830 		}
831 		if (!trywait_sem(s))
832 			goto out;
833 	}
834 out_freemem:
835 	for (i = 0; i < touchable_mem; i++)
836 		free(mem_block[i]);
837 out:
838 	return;
839 }
840 
841 struct thread hackthread;
842 
emulate_hackbench(struct thread * th)843 void emulate_hackbench(struct thread *th)
844 {
845 	sem_t *s = &th->sem.stop;
846 
847 	init_all_sems(&hackthread.sem);
848 	create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
849 
850 	wait_sem(s);
851 
852 	post_sem(&hackthread.sem.stop);
853 	wait_sem(&hackthread.sem.complete);
854 
855 	join_pthread(hackthread.pthread, NULL);
856 }
857 
858 #define CUSTOM_INTERVAL	(ud.custom_interval)
859 #define CUSTOM_RUN	(ud.custom_run)
emulate_custom(struct thread * th)860 void emulate_custom(struct thread *th)
861 {
862 	unsigned long long deadline;
863 	sem_t *s = &th->sem.stop;
864 	struct timespec myts;
865 
866 	th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
867 	deadline = get_usecs(&myts);
868 
869 	while (1) {
870 		deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
871 			deadline);
872 		if (!trywait_sem(s))
873 			return;
874 	}
875 }
876 
timekeeping_thread(void * t)877 void *timekeeping_thread(void *t)
878 {
879 	struct thread *th;
880 	struct tk_thread *tk;
881 	struct sems *s;
882 	struct timespec myts;
883 	long i = (long)t;
884 
885 	th = &threadlist[i];
886 	tk = &th->tkthread;
887 	s = &th->tkthread.sem;
888 	/*
889 	 * If this timekeeping thread is that of a benchmarked thread we run
890 	 * even higher priority than the benched thread is if running real
891 	 * time. Otherwise, the load timekeeping thread, which does not need
892 	 * accurate accounting remains SCHED_NORMAL;
893 	 */
894 	if (th->dt != &th->benchmarks[NOT_BENCHING])
895 		set_fifo(sched_get_priority_max(SCHED_FIFO) - 1);
896 	/* These values must be changed at the appropriate places or race */
897 	tk->sleep_interval = tk->slept_interval = 0;
898 	post_sem(&s->ready);
899 
900 	while (1) {
901 		unsigned long start_time, now;
902 
903 		if (!trywait_sem(&s->stop))
904 			goto out;
905 		wait_sem(&s->start);
906 		tk->slept_interval = 0;
907 		start_time = get_usecs(&myts);
908 		if (!trywait_sem(&s->stop))
909 			goto out;
910 		if (tk->sleep_interval) {
911 			unsigned long diff = 0;
912 			microsleep(tk->sleep_interval);
913 			now = get_usecs(&myts);
914 			/* now should always be > start_time but... */
915 			if (now > start_time) {
916 				diff = now - start_time;
917 				if (diff > tk->sleep_interval)
918 					tk->slept_interval = diff -
919 						tk->sleep_interval;
920 			}
921 		}
922 		tk->sleep_interval = 0;
923 		post_sem(&s->complete);
924 	}
925 out:
926 	return NULL;
927 }
928 
929 /*
930  * All the sleep functions such as nanosleep can only guarantee that they
931  * sleep for _at least_ the time requested. We work around this by having
932  * a high priority real time thread that accounts for the extra time slept
933  * in nanosleep. This allows wakeup latency of the tested thread to be
934  * accurate and reflect true scheduling delays.
935  */
emulation_thread(void * t)936 void *emulation_thread(void *t)
937 {
938 	struct thread *th;
939 	struct tk_thread *tk;
940 	struct sems *s, *tks;
941 	long i = (long)t;
942 
943 	th = &threadlist[i];
944 	tk = &th->tkthread;
945 	s = &th->sem;
946 	tks = &tk->sem;
947 	init_all_sems(tks);
948 
949 	/* Start the timekeeping thread */
950 	create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
951 		(void*)(long) i);
952 	/* Wait for timekeeping thread to be ready */
953 	wait_sem(&tks->ready);
954 
955 	/* Tell main we're ready to start*/
956 	post_sem(&s->ready);
957 
958 	/* Wait for signal from main to start thread */
959 	wait_sem(&s->start);
960 
961 	/* Start the actual function being benched/or running as load */
962 	th->name(th);
963 
964 	/* Stop the timekeeping thread */
965 	post_sem(&tks->stop);
966 	post_sem(&tks->start);
967 	join_pthread(th->tk_pthread, NULL);
968 
969 	/* Tell main we've finished */
970 	post_sem(&s->complete);
971 	return NULL;
972 }
973 
974 /*
975  * In an unoptimised loop we try to benchmark how many meaningless loops
976  * per second we can perform on this hardware to fairly accurately
977  * reproduce certain percentage cpu usage
978  */
calibrate_loop(void)979 void calibrate_loop(void)
980 {
981 	unsigned long long start_time, loops_per_msec, run_time = 0;
982 	unsigned long loops;
983 	struct timespec myts;
984 
985 	loops_per_msec = 100000;
986 redo:
987 	/* Calibrate to within 1% accuracy */
988 	while (run_time > 1010000 || run_time < 990000) {
989 		loops = loops_per_msec;
990 		start_time = get_nsecs(&myts);
991 		burn_loops(loops);
992 		run_time = get_nsecs(&myts) - start_time;
993 		loops_per_msec = (1000000 * loops_per_msec / run_time ? :
994 			loops_per_msec);
995 	}
996 
997 	/* Rechecking after a pause increases reproducibility */
998 	sleep(1);
999 	loops = loops_per_msec;
1000 	start_time = get_nsecs(&myts);
1001 	burn_loops(loops);
1002 	run_time = get_nsecs(&myts) - start_time;
1003 
1004 	/* Tolerate 5% difference on checking */
1005 	if (run_time > 1050000 || run_time < 950000)
1006 		goto redo;
1007 
1008 	ud.loops_per_ms = loops_per_msec;
1009 }
1010 
1011 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1012 
1013 /* Output to console +/- logfile */
log_output(const char * format,...)1014 void log_output(const char *format, ...)
1015 {
1016 	va_list ap;
1017 
1018 	va_start(ap, format);
1019 	if (vprintf(format, ap) == -1)
1020 		terminal_error("vprintf");
1021 	va_end(ap);
1022 	if (ud.log) {
1023 		va_start(ap, format);
1024 		if (vfprintf(ud.logfile, format, ap) == -1)
1025 			terminal_error("vpfrintf");
1026 		va_end(ap);
1027 	}
1028 	fflush(NULL);
1029 }
1030 
1031 /* Calculate statistics and output them */
show_latencies(struct thread * th)1032 void show_latencies(struct thread *th)
1033 {
1034 	struct data_table *tbj;
1035 	struct tk_thread *tk;
1036 	double average_latency, deadlines_met, samples_met, sd, max_latency;
1037 	long double variance = 0;
1038 
1039 	tbj = th->dt;
1040 	tk = &th->tkthread;
1041 
1042 	if (tbj->nr_samples > 1) {
1043 		average_latency = tbj->total_latency / tbj->nr_samples;
1044 		variance = (tbj->sum_latency_squared - (average_latency *
1045 			average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1046 		sd = sqrtl(variance);
1047 	} else {
1048 		average_latency = tbj->total_latency;
1049 		sd = 0.0;
1050 	}
1051 
1052 	/*
1053 	 * Landing on the boundary of a deadline can make loaded runs appear
1054 	 * to do more work than unloaded due to tiny duration differences.
1055 	 */
1056 	if (tbj->achieved_burns > 0)
1057 		samples_met = (double)tbj->achieved_burns /
1058 		    (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1059 	else
1060 		samples_met = 0.0;
1061 	max_latency = tbj->max_latency;
1062 	/* When benchmarking rt we represent the data in us */
1063 	if (!ud.do_rt) {
1064 		average_latency /= 1000;
1065 		sd /= 1000;
1066 		max_latency /= 1000;
1067 	}
1068 	if (tbj->deadlines_met == 0)
1069 		deadlines_met = 0;
1070 	else
1071 		deadlines_met = (double)tbj->deadlines_met /
1072 		    (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1073 
1074 	/*
1075 	 * Messy nonsense to format the output nicely. Values less than 1ms
1076 	 * are meaningless for interactivity and values less than 1us for real
1077 	 * time tests are below noise, so round off to integers.
1078 	 */
1079 	log_output("%6.1f +/- ", average_latency);
1080 	log_output("%-8.1f", sd);
1081 	log_output("%6.1f\t", max_latency);
1082 	log_output("\t%4.3g", samples_met);
1083 	if (!th->nodeadlines)
1084 		log_output("\t%11.3g", deadlines_met);
1085 	log_output("\n");
1086 	sync_flush();
1087 }
1088 
create_read_file(void)1089 void create_read_file(void)
1090 {
1091 	unsigned int i;
1092 	FILE *fp;
1093 	char *name = "interbench.read";
1094 	void *buf = NULL;
1095 	struct stat statbuf;
1096 	unsigned long mem, bsize;
1097 	int tmp;
1098 
1099 	if ((tmp = open(name, O_RDONLY)) == -1) {
1100 		if (errno != ENOENT)
1101 			terminal_error("open");
1102 		goto write;
1103 	}
1104 	if (stat(name, &statbuf) == -1)
1105 		terminal_error("stat");
1106 	if (statbuf.st_blksize < MIN_BLK_SIZE)
1107 		statbuf.st_blksize = MIN_BLK_SIZE;
1108 	bsize = statbuf.st_blksize;
1109 	if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1110 		return;
1111 	if (remove(name) == -1)
1112 		terminal_error("remove");
1113 write:
1114 	fprintf(stderr,"Creating file for read load...\n");
1115 	if (!(fp = fopen(name, "w")))
1116 		terminal_error("fopen");
1117 	if (stat(name, &statbuf) == -1)
1118 		terminal_fileopen_error(fp, "stat");
1119 	if (statbuf.st_blksize < MIN_BLK_SIZE)
1120 		statbuf.st_blksize = MIN_BLK_SIZE;
1121 	bsize = statbuf.st_blksize;
1122 	if (!(buf = calloc(1, bsize)))
1123 		terminal_fileopen_error(fp, "calloc");
1124 	mem = ud.ram / (bsize / 1024);	/* kilobytes to blocks */
1125 
1126 	for (i = 0 ; i < mem; i++) {
1127 		if (fwrite(buf, bsize, 1, fp) != 1)
1128 			terminal_fileopen_error(fp, "fwrite");
1129 	}
1130 	if (fclose(fp) == -1)
1131 		terminal_error("fclose");
1132 	sync_flush();
1133 }
1134 
get_ram(void)1135 void get_ram(void)
1136 {
1137 #if defined(__linux__)
1138 	FILE *meminfo;
1139         char aux[256];
1140 
1141 	if(!(meminfo = fopen("/proc/meminfo", "r")))
1142 		terminal_error("fopen");
1143 
1144 	ud.ram = ud.swap = 0;
1145 	while( !feof(meminfo) && !fscanf(meminfo, "MemTotal: %lu kB", &ud.ram) )
1146             fgets(aux,sizeof(aux),meminfo);
1147 	while( !feof(meminfo) && !fscanf(meminfo, "SwapTotal: %lu kB", &ud.swap) )
1148             fgets(aux,sizeof(aux),meminfo);
1149 	if (fclose(meminfo) == -1)
1150 		terminal_error("fclose");
1151 #elif defined(__FreeBSD__)
1152 	long pagesize, numpages;
1153 	quad_t swap;
1154 	size_t len = sizeof(swap);
1155 
1156 	pagesize = sysconf(_SC_PAGESIZE);
1157 	numpages = sysconf(_SC_PHYS_PAGES);
1158 	if (sysctlbyname("vm.swap_total", &swap, &len, NULL, 0) == -1)
1159 		swap = 0;
1160 
1161 	ud.ram = pagesize / 1024 * numpages;
1162 	ud.swap = swap / 1024;
1163 #elif defined(__DragonFly__)
1164 	long pagesize, numpages;
1165 	int swap;
1166 	size_t len = sizeof(swap);
1167 
1168 	pagesize = sysconf(_SC_PAGESIZE);
1169 	numpages = sysconf(_SC_PHYS_PAGES);
1170 	if (sysctlbyname("vm.swap_size", &swap, &len, NULL, 0) == -1)
1171 		swap = 0;
1172 
1173 	ud.ram = pagesize / 1024 * numpages;
1174 	ud.swap = swap / 1024 * pagesize;
1175 #else
1176 #error unsupported operating system
1177 #endif
1178 
1179 	if( !ud.ram || !ud.swap ) {
1180 		unsigned long i;
1181 		fprintf(stderr, "\nCould not get memory or swap size. ");
1182 		fprintf(stderr, "Will not perform mem_load\n");
1183 		for (i = 0 ; i < THREADS ; i++) {
1184 			if (!strcmp(threadlist[i].label, "Memload")) {
1185 				threadlist[i].load = 0;
1186 				threadlist[i].rtload = 0;
1187 			}
1188 		}
1189 	}
1190 }
1191 
get_logfilename(void)1192 void get_logfilename(void)
1193 {
1194 	struct tm *mytm;
1195 	struct utsname buf;
1196 	time_t t;
1197 	int year, month, day, hours, minutes;
1198 
1199 	time(&t);
1200 	if (uname(&buf) == -1)
1201 		terminal_error("uname");
1202 	if (!(mytm = localtime(&t)))
1203 		terminal_error("localtime");
1204 	year = mytm->tm_year + 1900;
1205 	month = mytm->tm_mon + 1;
1206 	day = mytm->tm_mday;
1207 	hours = mytm->tm_hour;
1208 	minutes = mytm->tm_min;
1209 	strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1210 
1211 	sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1212 		year, month, day, hours, minutes);
1213 	snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1214 }
1215 
start_thread(struct thread * th)1216 void start_thread(struct thread *th)
1217 {
1218 	post_sem(&th->sem.start);
1219 }
1220 
stop_thread(struct thread * th)1221 void stop_thread(struct thread *th)
1222 {
1223 	post_sem(&th->sem.stop);
1224 	wait_sem(&th->sem.complete);
1225 
1226 	/* Kill the thread */
1227 	join_pthread(th->pthread, NULL);
1228 }
1229 
init_sem(sem_t * sem)1230 void init_sem(sem_t *sem)
1231 {
1232 	if (sem_init(sem, 0, 0))
1233 		terminal_error("sem_init");
1234 }
1235 
init_all_sems(struct sems * s)1236 void init_all_sems(struct sems *s)
1237 {
1238 	/* Initialise the semaphores */
1239 	init_sem(&s->ready);
1240 	init_sem(&s->start);
1241 	init_sem(&s->stop);
1242 	init_sem(&s->complete);
1243 	init_sem(&s->stopchild);
1244 }
1245 
initialise_thread(int i)1246 void initialise_thread(int i)
1247 {
1248 	struct thread *th = &threadlist[i];
1249 
1250 	init_all_sems(&th->sem);
1251 	/* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1252 	create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1253 
1254 	wait_sem(&th->sem.ready);
1255 	/*
1256 	 * We set this pointer generically to NOT_BENCHING and set it to the
1257 	 * benchmarked array entry only on benched threads.
1258 	 */
1259 	th->dt = &th->benchmarks[NOT_BENCHING];
1260 	initialise_thread_data(th->dt);
1261 
1262 }
1263 
1264 /* A pseudo-semaphore for processes using a pipe */
wait_on(int pype)1265 void wait_on(int pype)
1266 {
1267 	int retval, buf = 0;
1268 
1269 	retval = Read(pype, &buf, sizeof(buf));
1270 	if (retval == 0) {
1271 		fprintf(stderr, "\nread returned 0\n");
1272 		exit (1);
1273 	}
1274 }
1275 
wakeup_with(int pype)1276 void wakeup_with(int pype)
1277 {
1278 	int retval, buf = 1;
1279 
1280 	retval = Write(pype, &buf, sizeof(buf));
1281 	if (retval == 0) {
1282 		fprintf(stderr, "\nwrite returned 0\n");
1283 		exit (1);
1284 	}
1285 }
1286 
run_loadchild(int j)1287 void run_loadchild(int j)
1288 {
1289 	struct thread *thj;
1290 	thj = &threadlist[j];
1291 
1292 	set_nice(ud.load_nice);
1293 	initialise_thread(j);
1294 
1295 	/* Tell main we're ready */
1296 	wakeup_with(l2m[1]);
1297 
1298 	/* Main tells us we're ready */
1299 	wait_on(m2l[0]);
1300 	start_thread(thj);
1301 
1302 	/* Tell main we received the start and are running */
1303 	wakeup_with(l2m[1]);
1304 
1305 	/* Main tells us to stop */
1306 	wait_on(m2l[0]);
1307 	stop_thread(thj);
1308 
1309 	/* Tell main we've finished */
1310 	wakeup_with(l2m[1]);
1311 	exit (0);
1312 }
1313 
run_benchchild(int i,int j)1314 void run_benchchild(int i, int j)
1315 {
1316 	struct thread *thi;
1317 
1318 	thi = &threadlist[i];
1319 
1320 	set_nice(ud.bench_nice);
1321 	if (ud.do_rt)
1322 		set_mlock();
1323 	initialise_thread(i);
1324 	/* Point the data table to the appropriate load being tested */
1325 	thi->dt = &thi->benchmarks[j];
1326 	initialise_thread_data(thi->dt);
1327 	if (ud.do_rt)
1328 		set_thread_fifo(thi->pthread,
1329 		    sched_get_priority_max(SCHED_FIFO) - 1);
1330 
1331 	/* Tell main we're ready */
1332 	wakeup_with(b2m[1]);
1333 
1334 	/* Main tells us we're ready */
1335 	wait_on(m2b[0]);
1336 	start_thread(thi);
1337 
1338 	/* Tell main we have started */
1339 	wakeup_with(b2m[1]);
1340 
1341 	/* Main tells us to stop */
1342 	wait_on(m2b[0]);
1343 	stop_thread(thi);
1344 
1345 	if (ud.do_rt) {
1346 		set_thread_normal(thi->pthread);
1347 		set_munlock();
1348 	}
1349 	show_latencies(thi);
1350 
1351 	/* Tell main we've finished */
1352 	wakeup_with(b2m[1]);
1353 	exit(0);
1354 }
1355 
bench(int i,int j)1356 void bench(int i, int j)
1357 {
1358 	pid_t bench_pid, load_pid;
1359 
1360 	if ((load_pid = fork()) == -1)
1361 		terminal_error("fork");
1362 	if (!load_pid)
1363 		run_loadchild(j);
1364 
1365 	/* Wait for load process to be ready */
1366 
1367 	wait_on(l2m[0]);
1368 	if ((bench_pid = fork()) == -1)
1369 		terminal_error("fork");
1370 	if (!bench_pid)
1371 		run_benchchild(i, j);
1372 
1373 	/* Wait for bench process to be ready */
1374 	wait_on(b2m[0]);
1375 
1376 	/*
1377 	 * We want to be higher priority than everything to signal them to
1378 	 * stop and we lock our memory if we can as well
1379 	 */
1380 	set_fifo(sched_get_priority_max(SCHED_FIFO));
1381 	set_mlock();
1382 
1383 	/* Wakeup the load process */
1384 	wakeup_with(m2l[1]);
1385 	/* Load tells it has received the first message and is running */
1386 	wait_on(l2m[0]);
1387 
1388 	/* After a small delay, wake up the benched process */
1389 	sleep(1);
1390 	wakeup_with(m2b[1]);
1391 
1392 	/* Bench tells it has received the first message and is running */
1393 	wait_on(b2m[0]);
1394 	microsleep(ud.duration * 1000000);
1395 
1396 	/* Tell the benched process to stop its threads and output results */
1397 	wakeup_with(m2b[1]);
1398 
1399 	/* Tell the load process to stop its threads */
1400 	wakeup_with(m2l[1]);
1401 
1402 	/* Return to SCHED_NORMAL */
1403 	set_normal();
1404 	set_munlock();
1405 
1406 	/* Wait for load and bench processes to terminate */
1407 	wait_on(l2m[0]);
1408 	wait_on(b2m[0]);
1409 }
1410 
init_pipe(int * pype)1411 void init_pipe(int *pype)
1412 {
1413 	if (pipe(pype) == -1)
1414 		terminal_error("pipe");
1415 }
1416 
init_pipes(void)1417 void init_pipes(void)
1418 {
1419 	init_pipe(m2l);
1420 	init_pipe(l2m);
1421 	init_pipe(m2b);
1422 	init_pipe(b2m);
1423 }
1424 
usage(void)1425 void usage(void)
1426 {
1427 	/* Affinity commented out till working on all architectures */
1428 	fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1429 	fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1430 	fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1431 	fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1432 	fprintf(stderr, "\t[-h]\n\n");
1433 	fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1434 	fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1435 	fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1436 	fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1437 	fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1438 	//fprintf(stderr, " -u\tImitate uniprocessor\n");
1439 	fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1440 	fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1441 	fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1442 	fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1443 	fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1444 	fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1445 	fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1446 	fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1447 	fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1448 	fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1449 	fprintf(stderr, " -h\tShow this help\n");
1450 	fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n");
1451 	fprintf(stderr, "\nRecommend to run as root and set -L to number of CPUs on the system\n\n");
1452 }
1453 
1454 #ifdef DEBUG
deadchild(int crap)1455 void deadchild(int crap)
1456 {
1457 	pid_t retval;
1458 	int status;
1459 
1460 	crap = 0;
1461 
1462 	if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1463 		if (errno == ECHILD)
1464 			return;
1465 		terminal_error("waitpid");
1466 	}
1467 	if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1468 		return;
1469 	fprintf(stderr, "\nChild terminated abnormally ");
1470 	if (WIFSIGNALED(status))
1471 		fprintf(stderr, "with signal %d", WTERMSIG(status));
1472 	fprintf(stderr, "\n");
1473 	exit (1);
1474 }
1475 #endif
1476 
load_index(const char * loadname)1477 int load_index(const char* loadname)
1478 {
1479 	int i;
1480 
1481 	for (i = 0 ; i < THREADS ; i++)
1482 		if (strcasecmp(loadname, threadlist[i].label) == 0)
1483 			return i;
1484 	return -1;
1485 }
1486 
bit_is_on(const unsigned int mask,int index)1487 inline int bit_is_on(const unsigned int mask, int index)
1488 {
1489 	return (mask & (1 << index)) != 0;
1490 }
1491 
set_bit_on(unsigned int * mask,int index)1492 inline void set_bit_on(unsigned int *mask, int index)
1493 {
1494 	*mask |= (1 << index);
1495 }
1496 
main(int argc,char ** argv)1497 int main(int argc, char **argv)
1498 {
1499 	unsigned long custom_cpu = 0;
1500 	int q, i, j, affinity, benchmark = 0;
1501 	unsigned int selected_loads = 0;
1502 	unsigned int excluded_loads = 0;
1503 	unsigned int selected_benches = 0;
1504 	unsigned int excluded_benches = 0;
1505 	FILE *fp;
1506 	/*
1507 	 * This file stores the loops_per_ms to be reused in a filename that
1508 	 * can't be confused
1509 	 */
1510 	char *fname = "interbench.loops_per_ms";
1511 	char *comment = NULL;
1512 #ifdef DEBUG
1513 	feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1514 	if (signal(SIGCHLD, deadchild) == SIG_ERR)
1515 		terminal_error("signal");
1516 #endif
1517 
1518 	while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1519 		switch (q) {
1520 			case 'h':
1521 				usage();
1522 				return (0);
1523 			case 'l':
1524 				ud.loops_per_ms = atoi(optarg);
1525 				break;
1526 			case 't':
1527 				ud.duration = atoi(optarg);
1528 				break;
1529 			case 'L':
1530 				ud.cpu_load = atoi(optarg);
1531 				break;
1532 			case 'B':
1533 				ud.bench_nice = atoi(optarg);
1534 				break;
1535 			case 'N':
1536 				ud.load_nice = atoi(optarg);
1537 				break;
1538 			case 'u':
1539 				affinity = 1;
1540 				break;
1541 			case 'b':
1542 				benchmark = 1;
1543 				break;
1544 			case 'c':
1545 				ud.log = 0;
1546 				break;
1547 			case 'r':
1548 				ud.do_rt = 1;
1549 				break;
1550 			case 'C':
1551 				custom_cpu = (unsigned long)atol(optarg);
1552 				break;
1553 			case 'I':
1554 				ud.custom_interval = atol(optarg);
1555 				break;
1556 			case 'm':
1557 				comment = optarg;
1558 				break;
1559 			case 'w':
1560 				i = load_index(optarg);
1561 				if (i == -1) {
1562 					fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1563 					return (-2);
1564 				}
1565 				set_bit_on(&selected_loads, i);
1566 				break;
1567 			case 'x':
1568 				i = load_index(optarg);
1569 				if (i == -1) {
1570 					fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1571 					return (-2);
1572 				}
1573 				set_bit_on(&excluded_loads, i);
1574 				break;
1575 			case 'W':
1576 				i = load_index(optarg);
1577 				if (i == -1) {
1578 					fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1579 					return (-2);
1580 				}
1581 				set_bit_on(&selected_benches, i);
1582 				break;
1583 			case 'X':
1584 				i = load_index(optarg);
1585 				if (i == -1) {
1586 					fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1587 					return (-2);
1588 				}
1589 				set_bit_on(&excluded_benches, i);
1590 				break;
1591 			default:
1592 				usage();
1593 				return (1);
1594 		}
1595 	}
1596 	argc -= optind;
1597 	argv += optind;
1598 	/* default is all loads */
1599 	if (selected_loads == 0)
1600 		selected_loads = (unsigned int)-1;
1601 	selected_loads &= ~excluded_loads;
1602 	/* default is all benches */
1603 	if (selected_benches == 0)
1604 		selected_benches = (unsigned int)-1;
1605 	selected_benches &= ~excluded_benches;
1606 
1607 	if (!test_fifo()) {
1608 		fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1609 		fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1610 		if (ud.do_rt) {
1611 			fprintf(stderr, "Real time tests were requested, aborting.\n");
1612 			exit (1);
1613 		}
1614 		fprintf(stderr, "Results will be unreliable.\n");
1615 	}
1616 	if (!ud.cpu_load) {
1617 		fprintf(stderr, "Invalid cpu load\n");
1618 		exit (1);
1619 	}
1620 
1621 	if ((custom_cpu && !ud.custom_interval) ||
1622 		(ud.custom_interval && !custom_cpu) ||
1623 		custom_cpu > 100) {
1624 			fprintf(stderr, "Invalid custom values, aborting.\n");
1625 			exit (1);
1626 	}
1627 
1628 	if (custom_cpu && ud.custom_interval) {
1629 		ud.custom_run = ud.custom_interval * custom_cpu / 100;
1630 		threadlist[CUSTOM].bench = 1;
1631 		threadlist[CUSTOM].load = 1;
1632 		threadlist[CUSTOM].rtbench = 1;
1633 		threadlist[CUSTOM].rtload = 1;
1634 	}
1635 
1636 	/*FIXME Affinity commented out till working on all architectures */
1637 #if 0
1638 	if (affinity) {
1639 #ifdef CPU_SET	/* Current glibc expects cpu_set_t */
1640 		cpu_set_t cpumask;
1641 
1642 		CPU_ZERO(&cpumask);
1643 		CPU_SET(0, &cpumask);
1644 #else		/* Old glibc expects unsigned long */
1645 		unsigned long cpumask = 1;
1646 #endif
1647 		if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1648 			if (errno != EPERM)
1649 				terminal_error("sched_setaffinity");
1650 			fprintf(stderr, "could not set cpu affinity\n");
1651 		}
1652 	}
1653 #endif
1654 
1655 	/* Make benchmark a multiple of 10 seconds for proper range of X loads */
1656 	if (ud.duration % 10)
1657 		ud.duration += 10 - ud.duration % 10;
1658 
1659 	if (benchmark)
1660 		ud.loops_per_ms = 0;
1661 	/*
1662 	 * Try to get loops_per_ms from command line first, file second, and
1663 	 * benchmark if not available.
1664 	 */
1665 	if (!ud.loops_per_ms) {
1666 		if (benchmark)
1667 			goto bench;
1668 		if ((fp = fopen(fname, "r"))) {
1669 			fscanf(fp, "%lu", &ud.loops_per_ms);
1670 			if (fclose(fp) == -1)
1671 				terminal_error("fclose");
1672 			if (ud.loops_per_ms) {
1673 				fprintf(stderr,
1674 					"%lu loops_per_ms read from file interbench.loops_per_ms\n",
1675 					ud.loops_per_ms);
1676 				goto loops_known;
1677 			}
1678 		} else
1679 			if (errno != ENOENT)
1680 				terminal_error("fopen");
1681 bench:
1682 		fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1683 
1684 		/*
1685 		 * To get as accurate a loop as possible we time it running
1686 		 * SCHED_FIFO if we can
1687 		 */
1688 		set_fifo(sched_get_priority_max(SCHED_FIFO));
1689 		calibrate_loop();
1690 		set_normal();
1691 	} else
1692 		fprintf(stderr, "loops_per_ms specified from command line\n");
1693 
1694 	if (!(fp = fopen(fname, "w"))) {
1695 		if (errno != EACCES)	/* No write access is not terminal */
1696 			terminal_error("fopen");
1697 		fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1698 		goto loops_known;
1699 	}
1700 	fprintf(fp, "%lu", ud.loops_per_ms);
1701 	fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1702 		ud.loops_per_ms);
1703 	if (fclose(fp) == -1)
1704 		terminal_error("fclose");
1705 
1706 loops_known:
1707 	get_ram();
1708 	get_logfilename();
1709 	create_read_file();
1710 	init_pipes();
1711 
1712 	if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1713 		if (errno != EACCES)
1714 			terminal_error("fopen");
1715 		fprintf(stderr, "Unable to write to logfile\n");
1716 		ud.log = 0;
1717 	}
1718 	log_output("\n");
1719 	log_output("Using %lu loops per ms, running every load for %d seconds\n",
1720 		ud.loops_per_ms, ud.duration);
1721 	log_output("Benchmarking kernel %s at datestamp %s\n",
1722 		ud.unamer, ud.datestamp);
1723 	if (comment)
1724 		log_output("Comment: %s\n", comment);
1725 	log_output("\n");
1726 
1727 	for (i = 0 ; i < THREADS ; i++)
1728 		threadlist[i].threadno = i;
1729 
1730 	for (i = 0 ; i < THREADS ; i++) {
1731 		struct thread *thi = &threadlist[i];
1732 		int *benchme;
1733 
1734 		if (ud.do_rt)
1735 			benchme = &threadlist[i].rtbench;
1736 		else
1737 			benchme = &threadlist[i].bench;
1738 
1739 		if (!*benchme || !bit_is_on(selected_benches, i))
1740 			continue;
1741 
1742 		log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1743 		if (ud.do_rt)
1744 			log_output("real time ");
1745 		else if (ud.bench_nice)
1746 			log_output("nice %d ", ud.bench_nice);
1747 		log_output("in the presence of simulated ");
1748 		if (ud.load_nice)
1749 			log_output("nice %d ", ud.load_nice);
1750 		log_output("---\n");
1751 
1752 		log_output("Load");
1753 		if (ud.do_rt)
1754 			log_output("\tLatency +/- SD (us)");
1755 		else
1756 			log_output("\tLatency +/- SD (ms)");
1757 		log_output("  Max Latency ");
1758 		log_output("  %% Desired CPU");
1759 		if (!thi->nodeadlines)
1760 			log_output("  %% Deadlines Met");
1761 		log_output("\n");
1762 
1763 		for (j = 0 ; j < THREADS ; j++) {
1764 			struct thread *thj = &threadlist[j];
1765 
1766 			if (j == i || !bit_is_on(selected_loads, j) ||
1767 				(!threadlist[j].load && !ud.do_rt) ||
1768 				(!threadlist[j].rtload && ud.do_rt))
1769 					continue;
1770 			log_output("%s\t", thj->label);
1771 			sync_flush();
1772 			bench(i, j);
1773 		}
1774 		log_output("\n");
1775 	}
1776 	log_output("\n");
1777 	if (ud.log)
1778 		fclose(ud.logfile);
1779 
1780 	return 0;
1781 }
1782