1 /******************************************* 2 * 3 * Interbench - Interactivity benchmark 4 * 5 * Author: Con Kolivas <kernel@kolivas.org> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 * 21 *******************************************/ 22 23 #define _GNU_SOURCE 24 #define _FILE_OFFSET_BITS 64 /* Large file support */ 25 #define INTERBENCH_VERSION "0.30" 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <stdarg.h> 30 #include <strings.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <fcntl.h> 34 #include <sched.h> 35 #include <time.h> 36 #include <errno.h> 37 #include <semaphore.h> 38 #include <pthread.h> 39 #include <math.h> 40 #include <fenv.h> 41 #include <signal.h> 42 #include <sys/utsname.h> 43 #include <sys/time.h> 44 #include <sys/resource.h> 45 #include <sys/types.h> 46 #include <sys/mman.h> 47 #include <sys/wait.h> 48 #include <sys/stat.h> 49 #include <sys/sysctl.h> 50 #include <sys/vmmeter.h> 51 #include "interbench.h" 52 53 #define MAX_UNAME_LENGTH 100 54 #define MAX_LOG_LENGTH ((MAX_UNAME_LENGTH) + 4) 55 #define MIN_BLK_SIZE 1024 56 #define DEFAULT_RESERVE 64 57 #define MB (1024 * 1024) /* 2^20 bytes */ 58 #define KB 1024 59 #define MAX_MEM_IN_MB (1024 * 64) /* 64 GB */ 60 61 struct user_data { 62 unsigned long loops_per_ms; 63 unsigned long ram, swap; 64 int duration; 65 int do_rt; 66 int bench_nice; 67 int load_nice; 68 unsigned long custom_run; 69 unsigned long custom_interval; 70 unsigned long cpu_load; 71 char logfilename[MAX_LOG_LENGTH]; 72 int log; 73 char unamer[MAX_UNAME_LENGTH]; 74 char datestamp[13]; 75 FILE *logfile; 76 } ud = { 77 .duration = 30, 78 .cpu_load = 4, 79 .log = 1, 80 }; 81 82 /* Pipes main to/from load and bench processes */ 83 static int m2l[2], l2m[2], m2b[2], b2m[2]; 84 85 /* Which member of becnhmarks is used when not benchmarking */ 86 #define NOT_BENCHING (THREADS) 87 #define CUSTOM (THREADS - 1) 88 89 /* 90 * To add another load or a benchmark you need to increment the value of 91 * THREADS, add a function prototype for your function and add an entry to 92 * the threadlist. To specify whether the function is a benchmark or a load 93 * set the benchmark and/or load flag as appropriate. The basic requirements 94 * of a new load can be seen by using emulate_none as a template. 95 */ 96 97 void emulate_none(struct thread *th); 98 void emulate_audio(struct thread *th); 99 void emulate_video(struct thread *th); 100 void emulate_x(struct thread *th); 101 void emulate_game(struct thread *th); 102 void emulate_burn(struct thread *th); 103 void emulate_write(struct thread *th); 104 void emulate_read(struct thread *th); 105 void emulate_ring(struct thread *th); 106 void emulate_compile(struct thread *th); 107 void emulate_memload(struct thread *th); 108 void emulate_hackbench(struct thread *th); 109 void emulate_custom(struct thread *th); 110 111 struct thread threadlist[THREADS] = { 112 {.label = "None", .name = emulate_none, .load = 1, .rtload = 1}, 113 {.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1}, 114 {.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1}, 115 {.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1}, 116 {.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1}, 117 {.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1}, 118 {.label = "Write", .name = emulate_write, .load = 1, .rtload = 1}, 119 {.label = "Read", .name = emulate_read, .load = 1, .rtload = 1}, 120 {.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0}, /* No useful data from this */ 121 {.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1}, 122 {.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1}, 123 {.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0}, /* This is causing signal headaches */ 124 {.label = "Custom", .name = emulate_custom}, /* Leave custom as last entry */ 125 }; 126 127 void init_sem(sem_t *sem); 128 void init_all_sems(struct sems *s); 129 void initialise_thread(int i); 130 void start_thread(struct thread *th); 131 void stop_thread(struct thread *th); 132 133 void terminal_error(const char *name) 134 { 135 fprintf(stderr, "\n"); 136 perror(name); 137 exit (1); 138 } 139 140 void terminal_fileopen_error(FILE *fp, char *name) 141 { 142 if (fclose(fp) == -1) 143 terminal_error("fclose"); 144 terminal_error(name); 145 } 146 147 unsigned long long get_nsecs(struct timespec *myts) 148 { 149 if (clock_gettime(CLOCK_REALTIME, myts)) 150 terminal_error("clock_gettime"); 151 return (myts->tv_sec * 1000000000 + myts->tv_nsec ); 152 } 153 154 unsigned long get_usecs(struct timespec *myts) 155 { 156 if (clock_gettime(CLOCK_REALTIME, myts)) 157 terminal_error("clock_gettime"); 158 return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 ); 159 } 160 161 void set_fifo(int prio) 162 { 163 struct sched_param sp; 164 165 memset(&sp, 0, sizeof(sp)); 166 sp.sched_priority = prio; 167 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) { 168 if (errno != EPERM) 169 terminal_error("sched_setscheduler"); 170 } 171 } 172 173 void set_mlock(void) 174 { 175 int mlockflags; 176 177 mlockflags = MCL_CURRENT | MCL_FUTURE; 178 #if 0 179 mlockall(mlockflags); /* Is not critical if this fails */ 180 #endif 181 } 182 183 void set_munlock(void) 184 { 185 #if 0 186 if (munlockall() == -1) 187 terminal_error("munlockall"); 188 #endif 189 } 190 191 void set_thread_fifo(pthread_t pthread, int prio) 192 { 193 struct sched_param sp; 194 memset(&sp, 0, sizeof(sp)); 195 sp.sched_priority = prio; 196 if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1) 197 terminal_error("pthread_setschedparam"); 198 } 199 200 void set_normal(void) 201 { 202 struct sched_param sp; 203 memset(&sp, 0, sizeof(sp)); 204 sp.sched_priority = 0; 205 if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { 206 fprintf(stderr, "Weird, could not unset RT scheduling!\n"); 207 } 208 } 209 210 void set_nice(int prio) 211 { 212 if (setpriority(PRIO_PROCESS, 0, prio) == -1) 213 terminal_error("setpriority"); 214 } 215 216 int test_fifo(void) 217 { 218 struct sched_param sp; 219 memset(&sp, 0, sizeof(sp)); 220 sp.sched_priority = 99; 221 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) { 222 if (errno != EPERM) 223 terminal_error("sched_setscheduler"); 224 goto out_fail; 225 } 226 if (sched_getscheduler(0) != SCHED_FIFO) 227 goto out_fail; 228 set_normal(); 229 return 1; 230 out_fail: 231 set_normal(); 232 return 0; 233 } 234 235 void set_thread_normal(pthread_t pthread) 236 { 237 struct sched_param sp; 238 memset(&sp, 0, sizeof(sp)); 239 sp.sched_priority = 0; 240 if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1) 241 terminal_error("pthread_setschedparam"); 242 } 243 244 void sync_flush(void) 245 { 246 if ((fflush(NULL)) == EOF) 247 terminal_error("fflush"); 248 sync(); 249 sync(); 250 sync(); 251 } 252 253 unsigned long compute_allocable_mem(void) 254 { 255 unsigned long total = ud.ram + ud.swap; 256 unsigned long usage = ud.ram * 110 / 100 ; 257 258 /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */ 259 if (total - DEFAULT_RESERVE < usage) 260 usage = total - DEFAULT_RESERVE; 261 usage /= 1024; /* to megabytes */ 262 if (usage > 2930) 263 usage = 2930; 264 return usage; 265 } 266 267 void burn_loops(unsigned long loops) 268 { 269 unsigned long i; 270 271 /* 272 * We need some magic here to prevent the compiler from optimising 273 * this loop away. Otherwise trying to emulate a fixed cpu load 274 * with this loop will not work. 275 */ 276 for (i = 0 ; i < loops ; i++) 277 asm volatile("" : : : "memory"); 278 } 279 280 /* Use this many usecs of cpu time */ 281 void burn_usecs(unsigned long usecs) 282 { 283 unsigned long ms_loops; 284 285 ms_loops = ud.loops_per_ms / 1000 * usecs; 286 burn_loops(ms_loops); 287 } 288 289 void microsleep(unsigned long long usecs) 290 { 291 struct timespec req, rem; 292 293 rem.tv_sec = rem.tv_nsec = 0; 294 295 req.tv_sec = usecs / 1000000; 296 req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000; 297 continue_sleep: 298 if ((nanosleep(&req, &rem)) == -1) { 299 if (errno == EINTR) { 300 if (rem.tv_sec || rem.tv_nsec) { 301 req.tv_sec = rem.tv_sec; 302 req.tv_nsec = rem.tv_nsec; 303 goto continue_sleep; 304 } 305 goto out; 306 } 307 terminal_error("nanosleep"); 308 } 309 out: 310 return; 311 } 312 313 /* 314 * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must 315 * handle it. 316 */ 317 inline void post_sem(sem_t *s) 318 { 319 retry: 320 if ((sem_post(s)) == -1) { 321 if (errno == EINTR) 322 goto retry; 323 terminal_error("sem_post"); 324 } 325 } 326 327 inline void wait_sem(sem_t *s) 328 { 329 retry: 330 if ((sem_wait(s)) == -1) { 331 if (errno == EINTR) 332 goto retry; 333 terminal_error("sem_wait"); 334 } 335 } 336 337 inline int trywait_sem(sem_t *s) 338 { 339 int ret; 340 341 retry: 342 if ((ret = sem_trywait(s)) == -1) { 343 if (errno == EINTR) 344 goto retry; 345 if (errno != EAGAIN) 346 terminal_error("sem_trywait"); 347 } 348 return ret; 349 } 350 351 inline ssize_t Read(int fd, void *buf, size_t count) 352 { 353 ssize_t retval; 354 355 retry: 356 retval = read(fd, buf, count); 357 if (retval == -1) { 358 if (errno == EINTR) 359 goto retry; 360 terminal_error("read"); 361 } 362 return retval; 363 } 364 365 inline ssize_t Write(int fd, const void *buf, size_t count) 366 { 367 ssize_t retval; 368 369 retry: 370 retval = write(fd, &buf, count); 371 if (retval == -1) { 372 if (errno == EINTR) 373 goto retry; 374 terminal_error("write"); 375 } 376 return retval; 377 } 378 379 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs, 380 unsigned long interval_usecs, unsigned long long deadline) 381 { 382 unsigned long long latency, missed_latency; 383 unsigned long long current_time; 384 struct tk_thread *tk; 385 struct data_table *tb; 386 struct timespec myts; 387 388 latency = 0; 389 tb = th->dt; 390 tk = &th->tkthread; 391 392 current_time = get_usecs(&myts); 393 if (current_time > deadline + tk->slept_interval) 394 latency = current_time - deadline- tk->slept_interval; 395 396 /* calculate the latency for missed frames */ 397 missed_latency = 0; 398 399 current_time = get_usecs(&myts); 400 if (interval_usecs && current_time > deadline + interval_usecs) { 401 /* We missed the deadline even before we consumed cpu */ 402 unsigned long intervals; 403 404 deadline += interval_usecs; 405 intervals = (current_time - deadline) / 406 interval_usecs + 1; 407 408 tb->missed_deadlines += intervals; 409 missed_latency = intervals * interval_usecs; 410 deadline += intervals * interval_usecs; 411 tb->missed_burns += intervals; 412 goto bypass_burn; 413 } 414 415 burn_usecs(run_usecs); 416 current_time = get_usecs(&myts); 417 tb->achieved_burns++; 418 419 /* 420 * If we meet the deadline we move the deadline forward, otherwise 421 * we consider it a missed deadline and dropped frame etc. 422 */ 423 deadline += interval_usecs; 424 if (deadline >= current_time) { 425 tb->deadlines_met++; 426 } else { 427 if (interval_usecs) { 428 unsigned long intervals = (current_time - deadline) / 429 interval_usecs + 1; 430 431 tb->missed_deadlines += intervals; 432 missed_latency = intervals * interval_usecs; 433 deadline += intervals * interval_usecs; 434 if (intervals > 1) 435 tb->missed_burns += intervals; 436 } else { 437 deadline = current_time; 438 goto out_nosleep; 439 } 440 } 441 bypass_burn: 442 tk->sleep_interval = deadline - current_time; 443 444 post_sem(&tk->sem.start); 445 wait_sem(&tk->sem.complete); 446 out_nosleep: 447 /* 448 * Must add missed_latency to total here as this function may not be 449 * called again and the missed latency can be lost 450 */ 451 latency += missed_latency; 452 if (latency > tb->max_latency) 453 tb->max_latency = latency; 454 tb->total_latency += latency; 455 tb->sum_latency_squared += latency * latency; 456 tb->nr_samples++; 457 458 return deadline; 459 } 460 461 void initialise_thread_data(struct data_table *tb) 462 { 463 tb->max_latency = 464 tb->total_latency = 465 tb->sum_latency_squared = 466 tb->deadlines_met = 467 tb->missed_deadlines = 468 tb->missed_burns = 469 tb->nr_samples = 0; 470 } 471 472 void create_pthread(pthread_t * thread, pthread_attr_t * attr, 473 void * (*start_routine)(void *), void *arg) 474 { 475 if (pthread_create(thread, attr, start_routine, arg)) 476 terminal_error("pthread_create"); 477 } 478 479 void join_pthread(pthread_t th, void **thread_return) 480 { 481 if (pthread_join(th, thread_return)) 482 terminal_error("pthread_join"); 483 } 484 485 void emulate_none(struct thread *th) 486 { 487 sem_t *s = &th->sem.stop; 488 wait_sem(s); 489 } 490 491 #define AUDIO_INTERVAL (50000) 492 #define AUDIO_RUN (AUDIO_INTERVAL / 20) 493 /* We emulate audio by using 5% cpu and waking every 50ms */ 494 void emulate_audio(struct thread *th) 495 { 496 unsigned long long deadline; 497 sem_t *s = &th->sem.stop; 498 struct timespec myts; 499 500 th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10; 501 deadline = get_usecs(&myts); 502 503 while (1) { 504 deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL, 505 deadline); 506 if (!trywait_sem(s)) 507 return; 508 } 509 } 510 511 /* We emulate video by using 40% cpu and waking for 60fps */ 512 #define VIDEO_INTERVAL (1000000 / 60) 513 #define VIDEO_RUN (VIDEO_INTERVAL * 40 / 100) 514 void emulate_video(struct thread *th) 515 { 516 unsigned long long deadline; 517 sem_t *s = &th->sem.stop; 518 struct timespec myts; 519 520 th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10; 521 deadline = get_usecs(&myts); 522 523 while (1) { 524 deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL, 525 deadline); 526 if (!trywait_sem(s)) 527 return; 528 } 529 } 530 531 /* 532 * We emulate X by running for a variable percentage of cpu from 0-100% 533 * in 1ms chunks. 534 */ 535 void emulate_x(struct thread *th) 536 { 537 unsigned long long deadline; 538 sem_t *s = &th->sem.stop; 539 struct timespec myts; 540 541 th->decasecond_deadlines = 100; 542 deadline = get_usecs(&myts); 543 544 while (1) { 545 int i, j; 546 for (i = 0 ; i <= 100 ; i++) { 547 j = 100 - i; 548 deadline = periodic_schedule(th, i * 1000, j * 1000, 549 deadline); 550 deadline += i * 1000; 551 if (!trywait_sem(s)) 552 return; 553 } 554 } 555 } 556 557 /* 558 * We emulate gaming by using 100% cpu and seeing how many frames (jobs 559 * completed) we can do in that time. Deadlines are meaningless with 560 * unlocked frame rates. We do not use periodic schedule because for 561 * this load because this never wants to sleep. 562 */ 563 #define GAME_INTERVAL (100000) 564 #define GAME_RUN (GAME_INTERVAL) 565 void emulate_game(struct thread *th) 566 { 567 unsigned long long deadline, current_time, latency; 568 sem_t *s = &th->sem.stop; 569 struct timespec myts; 570 struct data_table *tb; 571 572 tb = th->dt; 573 th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10; 574 575 while (1) { 576 deadline = get_usecs(&myts) + GAME_INTERVAL; 577 burn_usecs(GAME_RUN); 578 current_time = get_usecs(&myts); 579 /* use usecs instead of simple count for game burn statistics */ 580 tb->achieved_burns += GAME_RUN; 581 if (current_time > deadline) { 582 latency = current_time - deadline; 583 tb->missed_burns += latency; 584 } else 585 latency = 0; 586 if (latency > tb->max_latency) 587 tb->max_latency = latency; 588 tb->total_latency += latency; 589 tb->sum_latency_squared += latency * latency; 590 tb->nr_samples++; 591 if (!trywait_sem(s)) 592 return; 593 } 594 } 595 596 void *burn_thread(void *t) 597 { 598 struct thread *th; 599 sem_t *s; 600 long i = (long)t; 601 602 th = &threadlist[i]; 603 s = &th->sem.stopchild; 604 605 while (1) { 606 burn_loops(ud.loops_per_ms); 607 if (!trywait_sem(s)) { 608 post_sem(s); 609 break; 610 } 611 } 612 return NULL; 613 } 614 615 /* Have ud.cpu_load threads burn cpu continuously */ 616 void emulate_burn(struct thread *th) 617 { 618 sem_t *s = &th->sem.stop; 619 unsigned long i; 620 long t; 621 pthread_t burnthreads[ud.cpu_load]; 622 623 t = th->threadno; 624 for (i = 0 ; i < ud.cpu_load ; i++) 625 create_pthread(&burnthreads[i], NULL, burn_thread, 626 (void*)(long) t); 627 wait_sem(s); 628 post_sem(&th->sem.stopchild); 629 for (i = 0 ; i < ud.cpu_load ; i++) 630 join_pthread(burnthreads[i], NULL); 631 } 632 633 /* Write a file the size of ram continuously */ 634 void emulate_write(struct thread *th) 635 { 636 sem_t *s = &th->sem.stop; 637 FILE *fp; 638 char *name = "interbench.write"; 639 void *buf = NULL; 640 struct stat statbuf; 641 unsigned long mem; 642 643 if (!(fp = fopen(name, "w"))) 644 terminal_error("fopen"); 645 if (stat(name, &statbuf) == -1) 646 terminal_fileopen_error(fp, "stat"); 647 if (statbuf.st_blksize < MIN_BLK_SIZE) 648 statbuf.st_blksize = MIN_BLK_SIZE; 649 mem = ud.ram / (statbuf.st_blksize / 1024); /* kilobytes to blocks */ 650 if (!(buf = calloc(1, statbuf.st_blksize))) 651 terminal_fileopen_error(fp, "calloc"); 652 if (fclose(fp) == -1) 653 terminal_error("fclose"); 654 655 while (1) { 656 unsigned int i; 657 658 if (!(fp = fopen(name, "w"))) 659 terminal_error("fopen"); 660 if (stat(name, &statbuf) == -1) 661 terminal_fileopen_error(fp, "stat"); 662 for (i = 0 ; i < mem; i++) { 663 if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1) 664 terminal_fileopen_error(fp, "fwrite"); 665 if (!trywait_sem(s)) 666 goto out; 667 } 668 if (fclose(fp) == -1) 669 terminal_error("fclose"); 670 } 671 672 out: 673 if (fclose(fp) == -1) 674 terminal_error("fclose"); 675 if (remove(name) == -1) 676 terminal_error("remove"); 677 sync_flush(); 678 } 679 680 /* Read a file the size of ram continuously */ 681 void emulate_read(struct thread *th) 682 { 683 sem_t *s = &th->sem.stop; 684 char *name = "interbench.read"; 685 void *buf = NULL; 686 struct stat statbuf; 687 unsigned long bsize; 688 int tmp; 689 690 if ((tmp = open(name, O_RDONLY)) == -1) 691 terminal_error("open"); 692 if (stat(name, &statbuf) == -1) 693 terminal_error("stat"); 694 bsize = statbuf.st_blksize; 695 if (!(buf = malloc(bsize))) 696 terminal_error("malloc"); 697 698 while (1) { 699 int rd; 700 701 /* 702 * We have to read the whole file before quitting the load 703 * to prevent the data being cached for the next read. This 704 * is also the reason the file is the size of physical ram. 705 */ 706 while ((rd = Read(tmp , buf, bsize)) > 0); 707 if(!trywait_sem(s)) 708 return; 709 if (lseek(tmp, (off_t)0, SEEK_SET) == -1) 710 terminal_error("lseek"); 711 } 712 } 713 714 #define RINGTHREADS 4 715 716 struct thread ringthreads[RINGTHREADS]; 717 718 void *ring_thread(void *t) 719 { 720 struct thread *th; 721 struct sems *s; 722 int i, post_to; 723 724 i = (long)t; 725 th = &ringthreads[i]; 726 s = &th->sem; 727 post_to = i + 1; 728 if (post_to == RINGTHREADS) 729 post_to = 0; 730 if (i == 0) 731 post_sem(&s->ready); 732 733 while (1) { 734 wait_sem(&s->start); 735 post_sem(&ringthreads[post_to].sem.start); 736 if (!trywait_sem(&s->stop)) 737 goto out; 738 } 739 out: 740 post_sem(&ringthreads[post_to].sem.start); 741 post_sem(&s->complete); 742 return NULL; 743 } 744 745 /* Create a ring of 4 processes that wake each other up in a circle */ 746 void emulate_ring(struct thread *th) 747 { 748 sem_t *s = &th->sem.stop; 749 int i; 750 751 for (i = 0 ; i < RINGTHREADS ; i++) { 752 init_all_sems(&ringthreads[i].sem); 753 create_pthread(&ringthreads[i].pthread, NULL, 754 ring_thread, (void*)(long) i); 755 } 756 757 wait_sem(&ringthreads[0].sem.ready); 758 post_sem(&ringthreads[0].sem.start); 759 wait_sem(s); 760 for (i = 0 ; i < RINGTHREADS ; i++) 761 post_sem(&ringthreads[i].sem.stop); 762 for (i = 0 ; i < RINGTHREADS ; i++) { 763 wait_sem(&ringthreads[i].sem.complete); 764 join_pthread(ringthreads[i].pthread, NULL); 765 } 766 } 767 768 /* We emulate a compile by running burn, write and read threads simultaneously */ 769 void emulate_compile(struct thread *th) 770 { 771 sem_t *s = &th->sem.stop; 772 unsigned long i, threads[3]; 773 774 bzero(threads, 3 * sizeof(threads[0])); 775 776 for (i = 0 ; i < THREADS ; i++) { 777 if (strcmp(threadlist[i].label, "Burn") == 0) 778 threads[0] = i; 779 if (strcmp(threadlist[i].label, "Write") == 0) 780 threads[1] = i; 781 if (strcmp(threadlist[i].label, "Read") == 0) 782 threads[2] = i; 783 } 784 for (i = 0 ; i < 3 ; i++) { 785 if (!threads[i]) { 786 fprintf(stderr, "Can't find all threads for compile load\n"); 787 exit(1); 788 } 789 } 790 for (i = 0 ; i < 3 ; i++) { 791 initialise_thread(threads[i]); 792 start_thread(&threadlist[threads[i]]); 793 } 794 wait_sem(s); 795 for (i = 0 ; i < 3 ; i++) 796 stop_thread(&threadlist[threads[i]]); 797 } 798 799 int *grab_and_touch (char *block[], int i) 800 { 801 block[i] = (char *) malloc(MB); 802 if (!block[i]) 803 return NULL; 804 return (memset(block[i], 1, MB)); 805 } 806 807 /* We emulate a memory load by allocating and torturing 110% of available ram */ 808 void emulate_memload(struct thread *th) 809 { 810 sem_t *s = &th->sem.stop; 811 unsigned long touchable_mem, i; 812 char *mem_block[MAX_MEM_IN_MB]; 813 void *success; 814 815 touchable_mem = compute_allocable_mem(); 816 /* loop until we're killed, frobbing memory in various perverted ways */ 817 while (1) { 818 for (i = 0; i < touchable_mem; i++) { 819 success = grab_and_touch(mem_block, i); 820 if (!success) { 821 touchable_mem = i-1; 822 break; 823 } 824 } 825 if (!trywait_sem(s)) 826 goto out_freemem; 827 for (i = 0; i < touchable_mem; i++) { 828 memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) % 829 touchable_mem], MB); 830 if (!trywait_sem(s)) 831 goto out_freemem; 832 } 833 for (i = 0; i < touchable_mem; i++) { 834 free(mem_block[i]); 835 } 836 if (!trywait_sem(s)) 837 goto out; 838 } 839 out_freemem: 840 for (i = 0; i < touchable_mem; i++) 841 free(mem_block[i]); 842 out: 843 return; 844 } 845 846 struct thread hackthread; 847 848 void emulate_hackbench(struct thread *th) 849 { 850 sem_t *s = &th->sem.stop; 851 852 init_all_sems(&hackthread.sem); 853 create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0); 854 855 wait_sem(s); 856 857 post_sem(&hackthread.sem.stop); 858 wait_sem(&hackthread.sem.complete); 859 860 join_pthread(hackthread.pthread, NULL); 861 } 862 863 #define CUSTOM_INTERVAL (ud.custom_interval) 864 #define CUSTOM_RUN (ud.custom_run) 865 void emulate_custom(struct thread *th) 866 { 867 unsigned long long deadline; 868 sem_t *s = &th->sem.stop; 869 struct timespec myts; 870 871 th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10; 872 deadline = get_usecs(&myts); 873 874 while (1) { 875 deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL, 876 deadline); 877 if (!trywait_sem(s)) 878 return; 879 } 880 } 881 882 void *timekeeping_thread(void *t) 883 { 884 struct thread *th; 885 struct tk_thread *tk; 886 struct sems *s; 887 struct timespec myts; 888 long i = (long)t; 889 890 th = &threadlist[i]; 891 tk = &th->tkthread; 892 s = &th->tkthread.sem; 893 /* 894 * If this timekeeping thread is that of a benchmarked thread we run 895 * even higher priority than the benched thread is if running real 896 * time. Otherwise, the load timekeeping thread, which does not need 897 * accurate accounting remains SCHED_NORMAL; 898 */ 899 if (th->dt != &th->benchmarks[NOT_BENCHING]) 900 set_fifo(96); 901 /* These values must be changed at the appropriate places or race */ 902 tk->sleep_interval = tk->slept_interval = 0; 903 post_sem(&s->ready); 904 905 while (1) { 906 unsigned long start_time, now; 907 908 if (!trywait_sem(&s->stop)) 909 goto out; 910 wait_sem(&s->start); 911 tk->slept_interval = 0; 912 start_time = get_usecs(&myts); 913 if (!trywait_sem(&s->stop)) 914 goto out; 915 if (tk->sleep_interval) { 916 unsigned long diff = 0; 917 microsleep(tk->sleep_interval); 918 now = get_usecs(&myts); 919 /* now should always be > start_time but... */ 920 if (now > start_time) { 921 diff = now - start_time; 922 if (diff > tk->sleep_interval) 923 tk->slept_interval = diff - 924 tk->sleep_interval; 925 } 926 } 927 tk->sleep_interval = 0; 928 post_sem(&s->complete); 929 } 930 out: 931 return NULL; 932 } 933 934 /* 935 * All the sleep functions such as nanosleep can only guarantee that they 936 * sleep for _at least_ the time requested. We work around this by having 937 * a high priority real time thread that accounts for the extra time slept 938 * in nanosleep. This allows wakeup latency of the tested thread to be 939 * accurate and reflect true scheduling delays. 940 */ 941 void *emulation_thread(void *t) 942 { 943 struct thread *th; 944 struct tk_thread *tk; 945 struct sems *s, *tks; 946 long i = (long)t; 947 948 th = &threadlist[i]; 949 tk = &th->tkthread; 950 s = &th->sem; 951 tks = &tk->sem; 952 init_all_sems(tks); 953 954 /* Start the timekeeping thread */ 955 create_pthread(&th->tk_pthread, NULL, timekeeping_thread, 956 (void*)(long) i); 957 /* Wait for timekeeping thread to be ready */ 958 wait_sem(&tks->ready); 959 960 /* Tell main we're ready to start*/ 961 post_sem(&s->ready); 962 963 /* Wait for signal from main to start thread */ 964 wait_sem(&s->start); 965 966 /* Start the actual function being benched/or running as load */ 967 th->name(th); 968 969 /* Stop the timekeeping thread */ 970 post_sem(&tks->stop); 971 post_sem(&tks->start); 972 join_pthread(th->tk_pthread, NULL); 973 974 /* Tell main we've finished */ 975 post_sem(&s->complete); 976 return NULL; 977 } 978 979 /* 980 * In an unoptimised loop we try to benchmark how many meaningless loops 981 * per second we can perform on this hardware to fairly accurately 982 * reproduce certain percentage cpu usage 983 */ 984 void calibrate_loop(void) 985 { 986 unsigned long long start_time, loops_per_msec, run_time = 0; 987 unsigned long loops; 988 struct timespec myts; 989 990 loops_per_msec = 100000; 991 redo: 992 /* Calibrate to within 1% accuracy */ 993 while (run_time > 1010000 || run_time < 990000) { 994 loops = loops_per_msec; 995 start_time = get_nsecs(&myts); 996 burn_loops(loops); 997 run_time = get_nsecs(&myts) - start_time; 998 loops_per_msec = (1000000 * loops_per_msec / run_time ? : 999 loops_per_msec); 1000 } 1001 1002 /* Rechecking after a pause increases reproducibility */ 1003 sleep(1); 1004 loops = loops_per_msec; 1005 start_time = get_nsecs(&myts); 1006 burn_loops(loops); 1007 run_time = get_nsecs(&myts) - start_time; 1008 1009 /* Tolerate 5% difference on checking */ 1010 if (run_time > 1050000 || run_time < 950000) 1011 goto redo; 1012 1013 ud.loops_per_ms = loops_per_msec; 1014 } 1015 1016 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2))); 1017 1018 /* Output to console +/- logfile */ 1019 void log_output(const char *format, ...) 1020 { 1021 va_list ap; 1022 1023 va_start(ap, format); 1024 if (vprintf(format, ap) == -1) 1025 terminal_error("vprintf"); 1026 va_end(ap); 1027 if (ud.log) { 1028 va_start(ap, format); 1029 if (vfprintf(ud.logfile, format, ap) == -1) 1030 terminal_error("vpfrintf"); 1031 va_end(ap); 1032 } 1033 fflush(NULL); 1034 } 1035 1036 /* Calculate statistics and output them */ 1037 void show_latencies(struct thread *th) 1038 { 1039 struct data_table *tbj; 1040 struct tk_thread *tk; 1041 double average_latency, deadlines_met, samples_met, sd, max_latency; 1042 long double variance = 0; 1043 1044 tbj = th->dt; 1045 tk = &th->tkthread; 1046 1047 if (tbj->nr_samples > 1) { 1048 average_latency = tbj->total_latency / tbj->nr_samples; 1049 variance = (tbj->sum_latency_squared - (average_latency * 1050 average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1); 1051 sd = sqrt((double)variance); 1052 } else { 1053 average_latency = tbj->total_latency; 1054 sd = 0.0; 1055 } 1056 1057 /* 1058 * Landing on the boundary of a deadline can make loaded runs appear 1059 * to do more work than unloaded due to tiny duration differences. 1060 */ 1061 if (tbj->achieved_burns > 0) 1062 samples_met = (double)tbj->achieved_burns / 1063 (double)(tbj->achieved_burns + tbj->missed_burns) * 100; 1064 else 1065 samples_met = 0.0; 1066 max_latency = tbj->max_latency; 1067 /* When benchmarking rt we represent the data in us */ 1068 if (!ud.do_rt) { 1069 average_latency /= 1000; 1070 sd /= 1000; 1071 max_latency /= 1000; 1072 } 1073 if (tbj->deadlines_met == 0) 1074 deadlines_met = 0; 1075 else 1076 deadlines_met = (double)tbj->deadlines_met / 1077 (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100; 1078 1079 /* Messy nonsense to format the output nicely */ 1080 if (average_latency >= 100) 1081 log_output("%7.0f +/- ", average_latency); 1082 else 1083 log_output("%7.3g +/- ", average_latency); 1084 if (sd >= 100) 1085 log_output("%-9.0f", sd); 1086 else 1087 log_output("%-9.3g", sd); 1088 if (max_latency >= 100) 1089 log_output("%7.0f\t", max_latency); 1090 else 1091 log_output("%7.3g\t", max_latency); 1092 log_output("\t%4.3g", samples_met); 1093 if (!th->nodeadlines) 1094 log_output("\t%11.3g", deadlines_met); 1095 log_output("\n"); 1096 sync_flush(); 1097 } 1098 1099 void create_read_file(void) 1100 { 1101 unsigned int i; 1102 FILE *fp; 1103 char *name = "interbench.read"; 1104 void *buf = NULL; 1105 struct stat statbuf; 1106 unsigned long mem, bsize; 1107 int tmp; 1108 1109 if ((tmp = open(name, O_RDONLY)) == -1) { 1110 if (errno != ENOENT) 1111 terminal_error("open"); 1112 goto write; 1113 } 1114 if (stat(name, &statbuf) == -1) 1115 terminal_error("stat"); 1116 if (statbuf.st_blksize < MIN_BLK_SIZE) 1117 statbuf.st_blksize = MIN_BLK_SIZE; 1118 bsize = statbuf.st_blksize; 1119 if (statbuf.st_size / 1024 / bsize == ud.ram / bsize) 1120 return; 1121 if (remove(name) == -1) 1122 terminal_error("remove"); 1123 write: 1124 fprintf(stderr,"Creating file for read load...\n"); 1125 if (!(fp = fopen(name, "w"))) 1126 terminal_error("fopen"); 1127 if (stat(name, &statbuf) == -1) 1128 terminal_fileopen_error(fp, "stat"); 1129 if (statbuf.st_blksize < MIN_BLK_SIZE) 1130 statbuf.st_blksize = MIN_BLK_SIZE; 1131 bsize = statbuf.st_blksize; 1132 if (!(buf = calloc(1, bsize))) 1133 terminal_fileopen_error(fp, "calloc"); 1134 mem = ud.ram / (bsize / 1024); /* kilobytes to blocks */ 1135 1136 for (i = 0 ; i < mem; i++) { 1137 if (fwrite(buf, bsize, 1, fp) != 1) 1138 terminal_fileopen_error(fp, "fwrite"); 1139 } 1140 if (fclose(fp) == -1) 1141 terminal_error("fclose"); 1142 sync_flush(); 1143 } 1144 1145 void get_ram(void) 1146 { 1147 struct vmstats vms; 1148 size_t vms_size = sizeof(vms); 1149 1150 if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0)) 1151 terminal_error("sysctlbyname: vm.vmstats"); 1152 1153 ud.ram = vms.v_page_count * vms.v_page_size; 1154 ud.ram /= 1024; /* linux size is in kB */ 1155 ud.swap = ud.ram; /* XXX: swap doesn't have to be the same as RAM */ 1156 1157 if( !ud.ram || !ud.swap ) { 1158 unsigned long i; 1159 fprintf(stderr, "\nCould not get memory or swap size. "); 1160 fprintf(stderr, "Will not perform mem_load\n"); 1161 for (i = 0 ; i < THREADS ; i++) { 1162 if (strcmp(threadlist[i].label, "Memload") == 0) { 1163 threadlist[i].load = 0; 1164 threadlist[i].rtload = 0; 1165 } 1166 } 1167 } 1168 } 1169 1170 void get_logfilename(void) 1171 { 1172 struct tm *mytm; 1173 struct utsname buf; 1174 time_t t; 1175 int year, month, day, hours, minutes; 1176 1177 time(&t); 1178 if (uname(&buf) == -1) 1179 terminal_error("uname"); 1180 if (!(mytm = localtime(&t))) 1181 terminal_error("localtime"); 1182 year = mytm->tm_year + 1900; 1183 month = mytm->tm_mon + 1; 1184 day = mytm->tm_mday; 1185 hours = mytm->tm_hour; 1186 minutes = mytm->tm_min; 1187 strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH); 1188 1189 sprintf(ud.datestamp, "%2d%02d%02d%02d%02d", 1190 year, month, day, hours, minutes); 1191 snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer); 1192 } 1193 1194 void start_thread(struct thread *th) 1195 { 1196 post_sem(&th->sem.start); 1197 } 1198 1199 void stop_thread(struct thread *th) 1200 { 1201 post_sem(&th->sem.stop); 1202 wait_sem(&th->sem.complete); 1203 1204 /* Kill the thread */ 1205 join_pthread(th->pthread, NULL); 1206 } 1207 1208 void init_sem(sem_t *sem) 1209 { 1210 if (sem_init(sem, 0, 0)) 1211 terminal_error("sem_init"); 1212 } 1213 1214 void init_all_sems(struct sems *s) 1215 { 1216 /* Initialise the semaphores */ 1217 init_sem(&s->ready); 1218 init_sem(&s->start); 1219 init_sem(&s->stop); 1220 init_sem(&s->complete); 1221 init_sem(&s->stopchild); 1222 } 1223 1224 void initialise_thread(int i) 1225 { 1226 struct thread *th = &threadlist[i]; 1227 1228 init_all_sems(&th->sem); 1229 /* Create the threads. Yes, the (long) cast is fugly but it's safe*/ 1230 create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i); 1231 1232 wait_sem(&th->sem.ready); 1233 /* 1234 * We set this pointer generically to NOT_BENCHING and set it to the 1235 * benchmarked array entry only on benched threads. 1236 */ 1237 th->dt = &th->benchmarks[NOT_BENCHING]; 1238 initialise_thread_data(th->dt); 1239 1240 } 1241 1242 /* A pseudo-semaphore for processes using a pipe */ 1243 void wait_on(int pype) 1244 { 1245 int retval, buf = 0; 1246 1247 retval = Read(pype, &buf, sizeof(buf)); 1248 if (retval == 0) { 1249 fprintf(stderr, "\nread returned 0\n"); 1250 exit (1); 1251 } 1252 } 1253 1254 void wakeup_with(int pype) 1255 { 1256 int retval, buf = 1; 1257 1258 retval = Write(pype, &buf, sizeof(buf)); 1259 if (retval == 0) { 1260 fprintf(stderr, "\nwrite returned 0\n"); 1261 exit (1); 1262 } 1263 } 1264 1265 void run_loadchild(int j) 1266 { 1267 struct thread *thj; 1268 thj = &threadlist[j]; 1269 1270 set_nice(ud.load_nice); 1271 initialise_thread(j); 1272 1273 /* Tell main we're ready */ 1274 wakeup_with(l2m[1]); 1275 1276 /* Main tells us we're ready */ 1277 wait_on(m2l[0]); 1278 start_thread(thj); 1279 1280 /* Tell main we received the start and are running */ 1281 wakeup_with(l2m[1]); 1282 1283 /* Main tells us to stop */ 1284 wait_on(m2l[0]); 1285 stop_thread(thj); 1286 1287 /* Tell main we've finished */ 1288 wakeup_with(l2m[1]); 1289 exit (0); 1290 } 1291 1292 void run_benchchild(int i, int j) 1293 { 1294 struct thread *thi; 1295 1296 thi = &threadlist[i]; 1297 1298 set_nice(ud.bench_nice); 1299 if (ud.do_rt) 1300 set_mlock(); 1301 initialise_thread(i); 1302 /* Point the data table to the appropriate load being tested */ 1303 thi->dt = &thi->benchmarks[j]; 1304 initialise_thread_data(thi->dt); 1305 if (ud.do_rt) 1306 set_thread_fifo(thi->pthread, 95); 1307 1308 /* Tell main we're ready */ 1309 wakeup_with(b2m[1]); 1310 1311 /* Main tells us we're ready */ 1312 wait_on(m2b[0]); 1313 start_thread(thi); 1314 1315 /* Tell main we have started */ 1316 wakeup_with(b2m[1]); 1317 1318 /* Main tells us to stop */ 1319 wait_on(m2b[0]); 1320 stop_thread(thi); 1321 1322 if (ud.do_rt) { 1323 set_thread_normal(thi->pthread); 1324 set_munlock(); 1325 } 1326 show_latencies(thi); 1327 1328 /* Tell main we've finished */ 1329 wakeup_with(b2m[1]); 1330 exit(0); 1331 } 1332 1333 void bench(int i, int j) 1334 { 1335 pid_t bench_pid, load_pid; 1336 1337 if ((load_pid = fork()) == -1) 1338 terminal_error("fork"); 1339 if (!load_pid) 1340 run_loadchild(j); 1341 1342 /* Wait for load process to be ready */ 1343 1344 wait_on(l2m[0]); 1345 if ((bench_pid = fork()) == -1) 1346 terminal_error("fork"); 1347 if (!bench_pid) 1348 run_benchchild(i, j); 1349 1350 /* Wait for bench process to be ready */ 1351 wait_on(b2m[0]); 1352 1353 /* 1354 * We want to be higher priority than everything to signal them to 1355 * stop and we lock our memory if we can as well 1356 */ 1357 set_fifo(99); 1358 set_mlock(); 1359 1360 /* Wakeup the load process */ 1361 wakeup_with(m2l[1]); 1362 /* Load tells it has received the first message and is running */ 1363 wait_on(l2m[0]); 1364 1365 /* After a small delay, wake up the benched process */ 1366 sleep(1); 1367 wakeup_with(m2b[1]); 1368 1369 /* Bench tells it has received the first message and is running */ 1370 wait_on(b2m[0]); 1371 microsleep(ud.duration * 1000000); 1372 1373 /* Tell the benched process to stop its threads and output results */ 1374 wakeup_with(m2b[1]); 1375 1376 /* Tell the load process to stop its threads */ 1377 wakeup_with(m2l[1]); 1378 1379 /* Return to SCHED_NORMAL */ 1380 set_normal(); 1381 set_munlock(); 1382 1383 /* Wait for load and bench processes to terminate */ 1384 wait_on(l2m[0]); 1385 wait_on(b2m[0]); 1386 } 1387 1388 void init_pipe(int *pype) 1389 { 1390 if (pipe(pype) == -1) 1391 terminal_error("pipe"); 1392 } 1393 1394 void init_pipes(void) 1395 { 1396 init_pipe(m2l); 1397 init_pipe(l2m); 1398 init_pipe(m2b); 1399 init_pipe(b2m); 1400 } 1401 1402 void usage(void) 1403 { 1404 /* Affinity commented out till working on all architectures */ 1405 fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n"); 1406 fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n"); 1407 fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n"); 1408 fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n"); 1409 fprintf(stderr, "\t[-h]\n\n"); 1410 fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n"); 1411 fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n"); 1412 fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n"); 1413 fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n"); 1414 fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n"); 1415 //fprintf(stderr, " -u\tImitate uniprocessor\n"); 1416 fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n"); 1417 fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n"); 1418 fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n"); 1419 fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n"); 1420 fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n"); 1421 fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n"); 1422 fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n"); 1423 fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n"); 1424 fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n"); 1425 fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n"); 1426 fprintf(stderr, " -h\tShow this help\n"); 1427 fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n"); 1428 } 1429 1430 #ifdef DEBUG 1431 void deadchild(int crap) 1432 { 1433 pid_t retval; 1434 int status; 1435 1436 crap = 0; 1437 1438 if ((retval = waitpid(-1, &status, WNOHANG)) == -1) { 1439 if (errno == ECHILD) 1440 return; 1441 terminal_error("waitpid"); 1442 } 1443 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) 1444 return; 1445 fprintf(stderr, "\nChild terminated abnormally "); 1446 if (WIFSIGNALED(status)) 1447 fprintf(stderr, "with signal %d", WTERMSIG(status)); 1448 fprintf(stderr, "\n"); 1449 exit (1); 1450 } 1451 #endif 1452 1453 int load_index(const char* loadname) 1454 { 1455 int i; 1456 1457 for (i = 0 ; i < THREADS ; i++) 1458 if (strcasecmp(loadname, threadlist[i].label) == 0) 1459 return i; 1460 return -1; 1461 } 1462 1463 inline int bit_is_on(const unsigned int mask, int index) 1464 { 1465 return (mask & (1 << index)) != 0; 1466 } 1467 1468 inline void set_bit_on(unsigned int *mask, int index) 1469 { 1470 *mask |= (1 << index); 1471 } 1472 1473 int main(int argc, char **argv) 1474 { 1475 unsigned long custom_cpu = 0; 1476 int q, i, j, affinity, benchmark = 0; 1477 unsigned int selected_loads = 0; 1478 unsigned int excluded_loads = 0; 1479 unsigned int selected_benches = 0; 1480 unsigned int excluded_benches = 0; 1481 FILE *fp; 1482 /* 1483 * This file stores the loops_per_ms to be reused in a filename that 1484 * can't be confused 1485 */ 1486 char *fname = "interbench.loops_per_ms"; 1487 char *comment = NULL; 1488 #ifdef DEBUG 1489 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); 1490 if (signal(SIGCHLD, deadchild) == SIG_ERR) 1491 terminal_error("signal"); 1492 #endif 1493 1494 while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) { 1495 switch (q) { 1496 case 'h': 1497 usage(); 1498 return (0); 1499 case 'l': 1500 ud.loops_per_ms = atoi(optarg); 1501 break; 1502 case 't': 1503 ud.duration = atoi(optarg); 1504 break; 1505 case 'L': 1506 ud.cpu_load = atoi(optarg); 1507 break; 1508 case 'B': 1509 ud.bench_nice = atoi(optarg); 1510 break; 1511 case 'N': 1512 ud.load_nice = atoi(optarg); 1513 break; 1514 case 'u': 1515 affinity = 1; 1516 break; 1517 case 'b': 1518 benchmark = 1; 1519 break; 1520 case 'c': 1521 ud.log = 0; 1522 break; 1523 case 'r': 1524 ud.do_rt = 1; 1525 break; 1526 case 'C': 1527 custom_cpu = (unsigned long)atol(optarg); 1528 break; 1529 case 'I': 1530 ud.custom_interval = atol(optarg); 1531 break; 1532 case 'm': 1533 comment = optarg; 1534 break; 1535 case 'w': 1536 i = load_index(optarg); 1537 if (i == -1) { 1538 fprintf(stderr, "Unknown load \"%s\"\n", optarg); 1539 return (-2); 1540 } 1541 set_bit_on(&selected_loads, i); 1542 break; 1543 case 'x': 1544 i = load_index(optarg); 1545 if (i == -1) { 1546 fprintf(stderr, "Unknown load \"%s\"\n", optarg); 1547 return (-2); 1548 } 1549 set_bit_on(&excluded_loads, i); 1550 break; 1551 case 'W': 1552 i = load_index(optarg); 1553 if (i == -1) { 1554 fprintf(stderr, "Unknown bench \"%s\"\n", optarg); 1555 return (-2); 1556 } 1557 set_bit_on(&selected_benches, i); 1558 break; 1559 case 'X': 1560 i = load_index(optarg); 1561 if (i == -1) { 1562 fprintf(stderr, "Unknown bench \"%s\"\n", optarg); 1563 return (-2); 1564 } 1565 set_bit_on(&excluded_benches, i); 1566 break; 1567 default: 1568 usage(); 1569 return (1); 1570 } 1571 } 1572 argc -= optind; 1573 argv += optind; 1574 /* default is all loads */ 1575 if (selected_loads == 0) 1576 selected_loads = (unsigned int)-1; 1577 selected_loads &= ~excluded_loads; 1578 /* default is all benches */ 1579 if (selected_benches == 0) 1580 selected_benches = (unsigned int)-1; 1581 selected_benches &= ~excluded_benches; 1582 1583 if (!test_fifo()) { 1584 fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n"); 1585 fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n"); 1586 if (ud.do_rt) { 1587 fprintf(stderr, "Real time tests were requested, aborting.\n"); 1588 exit (1); 1589 } 1590 fprintf(stderr, "Results will be unreliable.\n"); 1591 } 1592 if (!ud.cpu_load) { 1593 fprintf(stderr, "Invalid cpu load\n"); 1594 exit (1); 1595 } 1596 1597 if ((custom_cpu && !ud.custom_interval) || 1598 (ud.custom_interval && !custom_cpu) || 1599 custom_cpu > 100) { 1600 fprintf(stderr, "Invalid custom values, aborting.\n"); 1601 exit (1); 1602 } 1603 1604 if (custom_cpu && ud.custom_interval) { 1605 ud.custom_run = ud.custom_interval * custom_cpu / 100; 1606 threadlist[CUSTOM].bench = 1; 1607 threadlist[CUSTOM].load = 1; 1608 threadlist[CUSTOM].rtbench = 1; 1609 threadlist[CUSTOM].rtload = 1; 1610 } 1611 1612 /*FIXME Affinity commented out till working on all architectures */ 1613 #if 0 1614 if (affinity) { 1615 #ifdef CPU_SET /* Current glibc expects cpu_set_t */ 1616 cpu_set_t cpumask; 1617 1618 CPU_ZERO(&cpumask); 1619 CPU_SET(0, &cpumask); 1620 #else /* Old glibc expects unsigned long */ 1621 unsigned long cpumask = 1; 1622 #endif 1623 if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) { 1624 if (errno != EPERM) 1625 terminal_error("sched_setaffinity"); 1626 fprintf(stderr, "could not set cpu affinity\n"); 1627 } 1628 } 1629 #endif 1630 1631 /* Make benchmark a multiple of 10 seconds for proper range of X loads */ 1632 if (ud.duration % 10) 1633 ud.duration += 10 - ud.duration % 10; 1634 1635 if (benchmark) 1636 ud.loops_per_ms = 0; 1637 /* 1638 * Try to get loops_per_ms from command line first, file second, and 1639 * benchmark if not available. 1640 */ 1641 if (!ud.loops_per_ms) { 1642 if (benchmark) 1643 goto bench; 1644 if ((fp = fopen(fname, "r"))) { 1645 fscanf(fp, "%lu", &ud.loops_per_ms); 1646 if (fclose(fp) == -1) 1647 terminal_error("fclose"); 1648 if (ud.loops_per_ms) { 1649 fprintf(stderr, 1650 "%lu loops_per_ms read from file interbench.loops_per_ms\n", 1651 ud.loops_per_ms); 1652 goto loops_known; 1653 } 1654 } else 1655 if (errno != ENOENT) 1656 terminal_error("fopen"); 1657 bench: 1658 fprintf(stderr, "loops_per_ms unknown; benchmarking...\n"); 1659 1660 /* 1661 * To get as accurate a loop as possible we time it running 1662 * SCHED_FIFO if we can 1663 */ 1664 set_fifo(99); 1665 calibrate_loop(); 1666 set_normal(); 1667 } else 1668 fprintf(stderr, "loops_per_ms specified from command line\n"); 1669 1670 if (!(fp = fopen(fname, "w"))) { 1671 if (errno != EACCES) /* No write access is not terminal */ 1672 terminal_error("fopen"); 1673 fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n"); 1674 goto loops_known; 1675 } 1676 fprintf(fp, "%lu", ud.loops_per_ms); 1677 fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n", 1678 ud.loops_per_ms); 1679 if (fclose(fp) == -1) 1680 terminal_error("fclose"); 1681 1682 loops_known: 1683 get_ram(); 1684 get_logfilename(); 1685 create_read_file(); 1686 init_pipes(); 1687 1688 if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) { 1689 if (errno != EACCES) 1690 terminal_error("fopen"); 1691 fprintf(stderr, "Unable to write to logfile\n"); 1692 ud.log = 0; 1693 } 1694 log_output("\n"); 1695 log_output("Using %lu loops per ms, running every load for %d seconds\n", 1696 ud.loops_per_ms, ud.duration); 1697 log_output("Benchmarking kernel %s at datestamp %s\n", 1698 ud.unamer, ud.datestamp); 1699 if (comment) 1700 log_output("Comment: %s\n", comment); 1701 log_output("\n"); 1702 1703 for (i = 0 ; i < THREADS ; i++) 1704 threadlist[i].threadno = i; 1705 1706 for (i = 0 ; i < THREADS ; i++) { 1707 struct thread *thi = &threadlist[i]; 1708 int *benchme; 1709 1710 if (ud.do_rt) 1711 benchme = &threadlist[i].rtbench; 1712 else 1713 benchme = &threadlist[i].bench; 1714 1715 if (!*benchme || !bit_is_on(selected_benches, i)) 1716 continue; 1717 1718 log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label); 1719 if (ud.do_rt) 1720 log_output("real time "); 1721 else if (ud.bench_nice) 1722 log_output("nice %d ", ud.bench_nice); 1723 log_output("in the presence of simulated "); 1724 if (ud.load_nice) 1725 log_output("nice %d ", ud.load_nice); 1726 log_output("---\n"); 1727 1728 log_output("Load"); 1729 if (ud.do_rt) 1730 log_output("\tLatency +/- SD (us)"); 1731 else 1732 log_output("\tLatency +/- SD (ms)"); 1733 log_output(" Max Latency "); 1734 log_output(" %% Desired CPU"); 1735 if (!thi->nodeadlines) 1736 log_output(" %% Deadlines Met"); 1737 log_output("\n"); 1738 1739 for (j = 0 ; j < THREADS ; j++) { 1740 struct thread *thj = &threadlist[j]; 1741 1742 if (j == i || !bit_is_on(selected_loads, j) || 1743 (!threadlist[j].load && !ud.do_rt) || 1744 (!threadlist[j].rtload && ud.do_rt)) 1745 continue; 1746 log_output("%s\t", thj->label); 1747 sync_flush(); 1748 bench(i, j); 1749 } 1750 log_output("\n"); 1751 } 1752 log_output("\n"); 1753 if (ud.log) 1754 fclose(ud.logfile); 1755 1756 return 0; 1757 } 1758