1 /******************************************* 2 * 3 * Interbench - Interactivity benchmark 4 * 5 * Author: Con Kolivas <kernel@kolivas.org> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 * 21 *******************************************/ 22 23 #define _GNU_SOURCE 24 #define _FILE_OFFSET_BITS 64 /* Large file support */ 25 #define INTERBENCH_VERSION "0.30" 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <stdarg.h> 30 #include <strings.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <fcntl.h> 34 #include <sched.h> 35 #include <time.h> 36 #include <errno.h> 37 #include <semaphore.h> 38 #include <pthread.h> 39 #include <math.h> 40 #include <fenv.h> 41 #include <signal.h> 42 #include <sys/utsname.h> 43 #include <sys/time.h> 44 #include <sys/resource.h> 45 #include <sys/types.h> 46 #include <sys/mman.h> 47 #include <sys/wait.h> 48 #include <sys/stat.h> 49 #include <sys/sysctl.h> 50 #include <sys/vmmeter.h> 51 #include "interbench.h" 52 53 #define MAX_UNAME_LENGTH 100 54 #define MAX_LOG_LENGTH ((MAX_UNAME_LENGTH) + 4) 55 #define MIN_BLK_SIZE 1024 56 #define DEFAULT_RESERVE 64 57 #define MB (1024 * 1024) /* 2^20 bytes */ 58 #define KB 1024 59 #define MAX_MEM_IN_MB (1024 * 64) /* 64 GB */ 60 61 struct user_data { 62 unsigned long loops_per_ms; 63 unsigned long ram, swap; 64 int duration; 65 int do_rt; 66 int bench_nice; 67 int load_nice; 68 unsigned long custom_run; 69 unsigned long custom_interval; 70 unsigned long cpu_load; 71 char logfilename[MAX_LOG_LENGTH]; 72 int log; 73 char unamer[MAX_UNAME_LENGTH]; 74 char datestamp[13]; 75 FILE *logfile; 76 } ud = { 77 .duration = 30, 78 .cpu_load = 4, 79 .log = 1, 80 }; 81 82 /* Pipes main to/from load and bench processes */ 83 static int m2l[2], l2m[2], m2b[2], b2m[2]; 84 85 /* Which member of becnhmarks is used when not benchmarking */ 86 #define NOT_BENCHING (THREADS) 87 #define CUSTOM (THREADS - 1) 88 89 /* 90 * To add another load or a benchmark you need to increment the value of 91 * THREADS, add a function prototype for your function and add an entry to 92 * the threadlist. To specify whether the function is a benchmark or a load 93 * set the benchmark and/or load flag as appropriate. The basic requirements 94 * of a new load can be seen by using emulate_none as a template. 95 */ 96 97 void emulate_none(struct thread *th); 98 void emulate_audio(struct thread *th); 99 void emulate_video(struct thread *th); 100 void emulate_x(struct thread *th); 101 void emulate_game(struct thread *th); 102 void emulate_burn(struct thread *th); 103 void emulate_write(struct thread *th); 104 void emulate_read(struct thread *th); 105 void emulate_ring(struct thread *th); 106 void emulate_compile(struct thread *th); 107 void emulate_memload(struct thread *th); 108 void emulate_hackbench(struct thread *th); 109 void emulate_custom(struct thread *th); 110 111 struct thread threadlist[THREADS] = { 112 {.label = "None", .name = emulate_none, .load = 1, .rtload = 1}, 113 {.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1}, 114 {.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1}, 115 {.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1}, 116 {.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1}, 117 {.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1}, 118 {.label = "Write", .name = emulate_write, .load = 1, .rtload = 1}, 119 {.label = "Read", .name = emulate_read, .load = 1, .rtload = 1}, 120 {.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0}, /* No useful data from this */ 121 {.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1}, 122 {.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1}, 123 {.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0}, /* This is causing signal headaches */ 124 {.label = "Custom", .name = emulate_custom}, /* Leave custom as last entry */ 125 }; 126 127 void init_sem(sem_t *sem); 128 void init_all_sems(struct sems *s); 129 void initialise_thread(int i); 130 void start_thread(struct thread *th); 131 void stop_thread(struct thread *th); 132 133 void terminal_error(const char *name) 134 { 135 fprintf(stderr, "\n"); 136 perror(name); 137 exit (1); 138 } 139 140 void terminal_fileopen_error(FILE *fp, char *name) 141 { 142 if (fclose(fp) == -1) 143 terminal_error("fclose"); 144 terminal_error(name); 145 } 146 147 unsigned long long get_nsecs(struct timespec *myts) 148 { 149 if (clock_gettime(CLOCK_REALTIME, myts)) 150 terminal_error("clock_gettime"); 151 return (myts->tv_sec * 1000000000 + myts->tv_nsec ); 152 } 153 154 unsigned long get_usecs(struct timespec *myts) 155 { 156 if (clock_gettime(CLOCK_REALTIME, myts)) 157 terminal_error("clock_gettime"); 158 return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 ); 159 } 160 161 void set_fifo(int prio) 162 { 163 struct sched_param sp; 164 165 memset(&sp, 0, sizeof(sp)); 166 sp.sched_priority = prio; 167 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) { 168 if (errno != EPERM) 169 terminal_error("sched_setscheduler"); 170 } 171 } 172 173 void set_mlock(void) 174 { 175 int mlockflags; 176 177 mlockflags = MCL_CURRENT | MCL_FUTURE; 178 #if 0 179 mlockall(mlockflags); /* Is not critical if this fails */ 180 #endif 181 } 182 183 void set_munlock(void) 184 { 185 #if 0 186 if (munlockall() == -1) 187 terminal_error("munlockall"); 188 #endif 189 } 190 191 void set_thread_fifo(pthread_t pthread, int prio) 192 { 193 struct sched_param sp; 194 memset(&sp, 0, sizeof(sp)); 195 sp.sched_priority = prio; 196 if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1) 197 terminal_error("pthread_setschedparam"); 198 } 199 200 void set_normal(void) 201 { 202 struct sched_param sp; 203 memset(&sp, 0, sizeof(sp)); 204 sp.sched_priority = 0; 205 if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { 206 fprintf(stderr, "Weird, could not unset RT scheduling!\n"); 207 } 208 } 209 210 void set_nice(int prio) 211 { 212 if (setpriority(PRIO_PROCESS, 0, prio) == -1) 213 terminal_error("setpriority"); 214 } 215 216 int test_fifo(void) 217 { 218 struct sched_param sp; 219 memset(&sp, 0, sizeof(sp)); 220 sp.sched_priority = 99; 221 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) { 222 if (errno != EPERM) 223 terminal_error("sched_setscheduler"); 224 goto out_fail; 225 } 226 if (sched_getscheduler(0) != SCHED_FIFO) 227 goto out_fail; 228 set_normal(); 229 return 1; 230 out_fail: 231 set_normal(); 232 return 0; 233 } 234 235 void set_thread_normal(pthread_t pthread) 236 { 237 struct sched_param sp; 238 memset(&sp, 0, sizeof(sp)); 239 sp.sched_priority = 0; 240 if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1) 241 terminal_error("pthread_setschedparam"); 242 } 243 244 void sync_flush(void) 245 { 246 if ((fflush(NULL)) == EOF) 247 terminal_error("fflush"); 248 sync(); 249 sync(); 250 sync(); 251 } 252 253 unsigned long compute_allocable_mem(void) 254 { 255 unsigned long total = ud.ram + ud.swap; 256 unsigned long usage = ud.ram * 110 / 100 ; 257 258 /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */ 259 if (total - DEFAULT_RESERVE < usage) 260 usage = total - DEFAULT_RESERVE; 261 usage /= 1024; /* to megabytes */ 262 if (usage > 2930) 263 usage = 2930; 264 return usage; 265 } 266 267 void burn_loops(unsigned long loops) 268 { 269 unsigned long i; 270 271 /* 272 * We need some magic here to prevent the compiler from optimising 273 * this loop away. Otherwise trying to emulate a fixed cpu load 274 * with this loop will not work. 275 */ 276 for (i = 0 ; i < loops ; i++) 277 asm volatile("" : : : "memory"); 278 } 279 280 /* Use this many usecs of cpu time */ 281 void burn_usecs(unsigned long usecs) 282 { 283 unsigned long ms_loops; 284 285 ms_loops = ud.loops_per_ms / 1000 * usecs; 286 burn_loops(ms_loops); 287 } 288 289 void microsleep(unsigned long long usecs) 290 { 291 struct timespec req, rem; 292 293 rem.tv_sec = rem.tv_nsec = 0; 294 295 req.tv_sec = usecs / 1000000; 296 req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000; 297 continue_sleep: 298 if ((nanosleep(&req, &rem)) == -1) { 299 if (errno == EINTR) { 300 if (rem.tv_sec || rem.tv_nsec) { 301 req.tv_sec = rem.tv_sec; 302 req.tv_nsec = rem.tv_nsec; 303 goto continue_sleep; 304 } 305 goto out; 306 } 307 terminal_error("nanosleep"); 308 } 309 out: 310 return; 311 } 312 313 /* 314 * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must 315 * handle it. 316 */ 317 inline void post_sem(sem_t *s) 318 { 319 retry: 320 if ((sem_post(s)) == -1) { 321 if (errno == EINTR) 322 goto retry; 323 terminal_error("sem_post"); 324 } 325 } 326 327 inline void wait_sem(sem_t *s) 328 { 329 retry: 330 if ((sem_wait(s)) == -1) { 331 if (errno == EINTR) 332 goto retry; 333 terminal_error("sem_wait"); 334 } 335 } 336 337 inline int trywait_sem(sem_t *s) 338 { 339 int ret; 340 341 retry: 342 if ((ret = sem_trywait(s)) == -1) { 343 if (errno == EINTR) 344 goto retry; 345 if (errno != EAGAIN) 346 terminal_error("sem_trywait"); 347 } 348 return ret; 349 } 350 351 inline ssize_t Read(int fd, void *buf, size_t count) 352 { 353 ssize_t retval; 354 355 retry: 356 retval = read(fd, buf, count); 357 if (retval == -1) { 358 if (errno == EINTR) 359 goto retry; 360 terminal_error("read"); 361 } 362 return retval; 363 } 364 365 inline ssize_t Write(int fd, const void *buf, size_t count) 366 { 367 ssize_t retval; 368 369 retry: 370 retval = write(fd, &buf, count); 371 if (retval == -1) { 372 if (errno == EINTR) 373 goto retry; 374 terminal_error("write"); 375 } 376 return retval; 377 } 378 379 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs, 380 unsigned long interval_usecs, unsigned long long deadline) 381 { 382 unsigned long long latency, missed_latency; 383 unsigned long long current_time; 384 struct tk_thread *tk; 385 struct data_table *tb; 386 struct timespec myts; 387 388 latency = 0; 389 tb = th->dt; 390 tk = &th->tkthread; 391 392 current_time = get_usecs(&myts); 393 if (current_time > deadline + tk->slept_interval) 394 latency = current_time - deadline- tk->slept_interval; 395 396 /* calculate the latency for missed frames */ 397 missed_latency = 0; 398 399 current_time = get_usecs(&myts); 400 if (interval_usecs && current_time > deadline + interval_usecs) { 401 /* We missed the deadline even before we consumed cpu */ 402 unsigned long intervals; 403 404 deadline += interval_usecs; 405 intervals = (current_time - deadline) / 406 interval_usecs + 1; 407 408 tb->missed_deadlines += intervals; 409 missed_latency = intervals * interval_usecs; 410 deadline += intervals * interval_usecs; 411 tb->missed_burns += intervals; 412 goto bypass_burn; 413 } 414 415 burn_usecs(run_usecs); 416 current_time = get_usecs(&myts); 417 tb->achieved_burns++; 418 419 /* 420 * If we meet the deadline we move the deadline forward, otherwise 421 * we consider it a missed deadline and dropped frame etc. 422 */ 423 deadline += interval_usecs; 424 if (deadline >= current_time) { 425 tb->deadlines_met++; 426 } else { 427 if (interval_usecs) { 428 unsigned long intervals = (current_time - deadline) / 429 interval_usecs + 1; 430 431 tb->missed_deadlines += intervals; 432 missed_latency = intervals * interval_usecs; 433 deadline += intervals * interval_usecs; 434 if (intervals > 1) 435 tb->missed_burns += intervals; 436 } else { 437 deadline = current_time; 438 goto out_nosleep; 439 } 440 } 441 bypass_burn: 442 tk->sleep_interval = deadline - current_time; 443 444 post_sem(&tk->sem.start); 445 wait_sem(&tk->sem.complete); 446 out_nosleep: 447 /* 448 * Must add missed_latency to total here as this function may not be 449 * called again and the missed latency can be lost 450 */ 451 latency += missed_latency; 452 if (latency > tb->max_latency) 453 tb->max_latency = latency; 454 tb->total_latency += latency; 455 tb->sum_latency_squared += latency * latency; 456 tb->nr_samples++; 457 458 return deadline; 459 } 460 461 void initialise_thread_data(struct data_table *tb) 462 { 463 tb->max_latency = 464 tb->total_latency = 465 tb->sum_latency_squared = 466 tb->deadlines_met = 467 tb->missed_deadlines = 468 tb->missed_burns = 469 tb->nr_samples = 0; 470 } 471 472 void create_pthread(pthread_t * thread, pthread_attr_t * attr, 473 void * (*start_routine)(void *), void *arg) 474 { 475 if (pthread_create(thread, attr, start_routine, arg)) 476 terminal_error("pthread_create"); 477 } 478 479 void join_pthread(pthread_t th, void **thread_return) 480 { 481 if (pthread_join(th, thread_return)) 482 terminal_error("pthread_join"); 483 } 484 485 void emulate_none(struct thread *th) 486 { 487 sem_t *s = &th->sem.stop; 488 wait_sem(s); 489 } 490 491 #define AUDIO_INTERVAL (50000) 492 #define AUDIO_RUN (AUDIO_INTERVAL / 20) 493 /* We emulate audio by using 5% cpu and waking every 50ms */ 494 void emulate_audio(struct thread *th) 495 { 496 unsigned long long deadline; 497 sem_t *s = &th->sem.stop; 498 struct timespec myts; 499 500 th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10; 501 deadline = get_usecs(&myts); 502 503 while (1) { 504 deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL, 505 deadline); 506 if (!trywait_sem(s)) 507 return; 508 } 509 } 510 511 /* We emulate video by using 40% cpu and waking for 60fps */ 512 #define VIDEO_INTERVAL (1000000 / 60) 513 #define VIDEO_RUN (VIDEO_INTERVAL * 40 / 100) 514 void emulate_video(struct thread *th) 515 { 516 unsigned long long deadline; 517 sem_t *s = &th->sem.stop; 518 struct timespec myts; 519 520 th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10; 521 deadline = get_usecs(&myts); 522 523 while (1) { 524 deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL, 525 deadline); 526 if (!trywait_sem(s)) 527 return; 528 } 529 } 530 531 /* 532 * We emulate X by running for a variable percentage of cpu from 0-100% 533 * in 1ms chunks. 534 */ 535 void emulate_x(struct thread *th) 536 { 537 unsigned long long deadline; 538 sem_t *s = &th->sem.stop; 539 struct timespec myts; 540 541 th->decasecond_deadlines = 100; 542 deadline = get_usecs(&myts); 543 544 while (1) { 545 int i, j; 546 for (i = 0 ; i <= 100 ; i++) { 547 j = 100 - i; 548 deadline = periodic_schedule(th, i * 1000, j * 1000, 549 deadline); 550 deadline += i * 1000; 551 if (!trywait_sem(s)) 552 return; 553 } 554 } 555 } 556 557 /* 558 * We emulate gaming by using 100% cpu and seeing how many frames (jobs 559 * completed) we can do in that time. Deadlines are meaningless with 560 * unlocked frame rates. We do not use periodic schedule because for 561 * this load because this never wants to sleep. 562 */ 563 #define GAME_INTERVAL (100000) 564 #define GAME_RUN (GAME_INTERVAL) 565 void emulate_game(struct thread *th) 566 { 567 unsigned long long deadline, current_time, latency; 568 sem_t *s = &th->sem.stop; 569 struct timespec myts; 570 struct data_table *tb; 571 572 tb = th->dt; 573 th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10; 574 575 while (1) { 576 deadline = get_usecs(&myts) + GAME_INTERVAL; 577 burn_usecs(GAME_RUN); 578 current_time = get_usecs(&myts); 579 /* use usecs instead of simple count for game burn statistics */ 580 tb->achieved_burns += GAME_RUN; 581 if (current_time > deadline) { 582 latency = current_time - deadline; 583 tb->missed_burns += latency; 584 } else 585 latency = 0; 586 if (latency > tb->max_latency) 587 tb->max_latency = latency; 588 tb->total_latency += latency; 589 tb->sum_latency_squared += latency * latency; 590 tb->nr_samples++; 591 if (!trywait_sem(s)) 592 return; 593 } 594 } 595 596 void *burn_thread(void *t) 597 { 598 struct thread *th; 599 sem_t *s; 600 long i = (long)t; 601 602 th = &threadlist[i]; 603 s = &th->sem.stopchild; 604 605 while (1) { 606 burn_loops(ud.loops_per_ms); 607 if (!trywait_sem(s)) { 608 post_sem(s); 609 break; 610 } 611 } 612 return NULL; 613 } 614 615 /* Have ud.cpu_load threads burn cpu continuously */ 616 void emulate_burn(struct thread *th) 617 { 618 sem_t *s = &th->sem.stop; 619 unsigned long i; 620 long t; 621 pthread_t burnthreads[ud.cpu_load]; 622 623 t = th->threadno; 624 for (i = 0 ; i < ud.cpu_load ; i++) 625 create_pthread(&burnthreads[i], NULL, burn_thread, 626 (void*)(long) t); 627 wait_sem(s); 628 post_sem(&th->sem.stopchild); 629 for (i = 0 ; i < ud.cpu_load ; i++) 630 join_pthread(burnthreads[i], NULL); 631 } 632 633 /* Write a file the size of ram continuously */ 634 void emulate_write(struct thread *th) 635 { 636 sem_t *s = &th->sem.stop; 637 FILE *fp; 638 char *name = "interbench.write"; 639 void *buf = NULL; 640 struct stat statbuf; 641 unsigned long mem; 642 643 if (!(fp = fopen(name, "w"))) 644 terminal_error("fopen"); 645 if (stat(name, &statbuf) == -1) 646 terminal_fileopen_error(fp, "stat"); 647 if (statbuf.st_blksize < MIN_BLK_SIZE) 648 statbuf.st_blksize = MIN_BLK_SIZE; 649 mem = ud.ram / (statbuf.st_blksize / 1024); /* kilobytes to blocks */ 650 if (!(buf = calloc(1, statbuf.st_blksize))) 651 terminal_fileopen_error(fp, "calloc"); 652 if (fclose(fp) == -1) 653 terminal_error("fclose"); 654 655 while (1) { 656 unsigned int i; 657 658 if (!(fp = fopen(name, "w"))) 659 terminal_error("fopen"); 660 if (stat(name, &statbuf) == -1) 661 terminal_fileopen_error(fp, "stat"); 662 for (i = 0 ; i < mem; i++) { 663 if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1) 664 terminal_fileopen_error(fp, "fwrite"); 665 if (!trywait_sem(s)) 666 goto out; 667 } 668 if (fclose(fp) == -1) 669 terminal_error("fclose"); 670 } 671 672 out: 673 if (fclose(fp) == -1) 674 terminal_error("fclose"); 675 if (remove(name) == -1) 676 terminal_error("remove"); 677 sync_flush(); 678 } 679 680 /* Read a file the size of ram continuously */ 681 void emulate_read(struct thread *th) 682 { 683 sem_t *s = &th->sem.stop; 684 char *name = "interbench.read"; 685 void *buf = NULL; 686 struct stat statbuf; 687 unsigned long bsize; 688 int tmp; 689 690 if ((tmp = open(name, O_RDONLY)) == -1) 691 terminal_error("open"); 692 if (stat(name, &statbuf) == -1) 693 terminal_error("stat"); 694 bsize = statbuf.st_blksize; 695 if (!(buf = malloc(bsize))) 696 terminal_error("malloc"); 697 698 while (1) { 699 int rd; 700 701 /* 702 * We have to read the whole file before quitting the load 703 * to prevent the data being cached for the next read. This 704 * is also the reason the file is the size of physical ram. 705 */ 706 while ((rd = Read(tmp , buf, bsize)) > 0); 707 if(!trywait_sem(s)) 708 return; 709 if (lseek(tmp, (off_t)0, SEEK_SET) == -1) 710 terminal_error("lseek"); 711 } 712 } 713 714 #define RINGTHREADS 4 715 716 struct thread ringthreads[RINGTHREADS]; 717 718 void *ring_thread(void *t) 719 { 720 struct thread *th; 721 struct sems *s; 722 int i, post_to; 723 724 i = (long)t; 725 th = &ringthreads[i]; 726 s = &th->sem; 727 post_to = i + 1; 728 if (post_to == RINGTHREADS) 729 post_to = 0; 730 if (i == 0) 731 post_sem(&s->ready); 732 733 while (1) { 734 wait_sem(&s->start); 735 post_sem(&ringthreads[post_to].sem.start); 736 if (!trywait_sem(&s->stop)) 737 goto out; 738 } 739 out: 740 post_sem(&ringthreads[post_to].sem.start); 741 post_sem(&s->complete); 742 return NULL; 743 } 744 745 /* Create a ring of 4 processes that wake each other up in a circle */ 746 void emulate_ring(struct thread *th) 747 { 748 sem_t *s = &th->sem.stop; 749 int i; 750 751 for (i = 0 ; i < RINGTHREADS ; i++) { 752 init_all_sems(&ringthreads[i].sem); 753 create_pthread(&ringthreads[i].pthread, NULL, 754 ring_thread, (void*)(long) i); 755 } 756 757 wait_sem(&ringthreads[0].sem.ready); 758 post_sem(&ringthreads[0].sem.start); 759 wait_sem(s); 760 for (i = 0 ; i < RINGTHREADS ; i++) 761 post_sem(&ringthreads[i].sem.stop); 762 for (i = 0 ; i < RINGTHREADS ; i++) { 763 wait_sem(&ringthreads[i].sem.complete); 764 join_pthread(ringthreads[i].pthread, NULL); 765 } 766 } 767 768 /* We emulate a compile by running burn, write and read threads simultaneously */ 769 void emulate_compile(struct thread *th) 770 { 771 sem_t *s = &th->sem.stop; 772 unsigned long i, threads[3]; 773 774 for (i = 0 ; i < THREADS ; i++) { 775 if (threadlist[i].label == "Burn") 776 threads[0] = i; 777 if (threadlist[i].label == "Write") 778 threads[1] = i; 779 if (threadlist[i].label == "Read") 780 threads[2] = i; 781 } 782 for (i = 0 ; i < 3 ; i++) { 783 if (!threads[i]) { 784 fprintf(stderr, "Can't find all threads for compile load\n"); 785 exit(1); 786 } 787 } 788 for (i = 0 ; i < 3 ; i++) { 789 initialise_thread(threads[i]); 790 start_thread(&threadlist[threads[i]]); 791 } 792 wait_sem(s); 793 for (i = 0 ; i < 3 ; i++) 794 stop_thread(&threadlist[threads[i]]); 795 } 796 797 int *grab_and_touch (char *block[], int i) 798 { 799 block[i] = (char *) malloc(MB); 800 if (!block[i]) 801 return NULL; 802 return (memset(block[i], 1, MB)); 803 } 804 805 /* We emulate a memory load by allocating and torturing 110% of available ram */ 806 void emulate_memload(struct thread *th) 807 { 808 sem_t *s = &th->sem.stop; 809 unsigned long touchable_mem, i; 810 char *mem_block[MAX_MEM_IN_MB]; 811 void *success; 812 813 touchable_mem = compute_allocable_mem(); 814 /* loop until we're killed, frobbing memory in various perverted ways */ 815 while (1) { 816 for (i = 0; i < touchable_mem; i++) { 817 success = grab_and_touch(mem_block, i); 818 if (!success) { 819 touchable_mem = i-1; 820 break; 821 } 822 } 823 if (!trywait_sem(s)) 824 goto out_freemem; 825 for (i = 0; i < touchable_mem; i++) { 826 memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) % 827 touchable_mem], MB); 828 if (!trywait_sem(s)) 829 goto out_freemem; 830 } 831 for (i = 0; i < touchable_mem; i++) { 832 free(mem_block[i]); 833 } 834 if (!trywait_sem(s)) 835 goto out; 836 } 837 out_freemem: 838 for (i = 0; i < touchable_mem; i++) 839 free(mem_block[i]); 840 out: 841 return; 842 } 843 844 struct thread hackthread; 845 846 void emulate_hackbench(struct thread *th) 847 { 848 sem_t *s = &th->sem.stop; 849 850 init_all_sems(&hackthread.sem); 851 create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0); 852 853 wait_sem(s); 854 855 post_sem(&hackthread.sem.stop); 856 wait_sem(&hackthread.sem.complete); 857 858 join_pthread(hackthread.pthread, NULL); 859 } 860 861 #define CUSTOM_INTERVAL (ud.custom_interval) 862 #define CUSTOM_RUN (ud.custom_run) 863 void emulate_custom(struct thread *th) 864 { 865 unsigned long long deadline; 866 sem_t *s = &th->sem.stop; 867 struct timespec myts; 868 869 th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10; 870 deadline = get_usecs(&myts); 871 872 while (1) { 873 deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL, 874 deadline); 875 if (!trywait_sem(s)) 876 return; 877 } 878 } 879 880 void *timekeeping_thread(void *t) 881 { 882 struct thread *th; 883 struct tk_thread *tk; 884 struct sems *s; 885 struct timespec myts; 886 long i = (long)t; 887 888 th = &threadlist[i]; 889 tk = &th->tkthread; 890 s = &th->tkthread.sem; 891 /* 892 * If this timekeeping thread is that of a benchmarked thread we run 893 * even higher priority than the benched thread is if running real 894 * time. Otherwise, the load timekeeping thread, which does not need 895 * accurate accounting remains SCHED_NORMAL; 896 */ 897 if (th->dt != &th->benchmarks[NOT_BENCHING]) 898 set_fifo(96); 899 /* These values must be changed at the appropriate places or race */ 900 tk->sleep_interval = tk->slept_interval = 0; 901 post_sem(&s->ready); 902 903 while (1) { 904 unsigned long start_time, now; 905 906 if (!trywait_sem(&s->stop)) 907 goto out; 908 wait_sem(&s->start); 909 tk->slept_interval = 0; 910 start_time = get_usecs(&myts); 911 if (!trywait_sem(&s->stop)) 912 goto out; 913 if (tk->sleep_interval) { 914 unsigned long diff = 0; 915 microsleep(tk->sleep_interval); 916 now = get_usecs(&myts); 917 /* now should always be > start_time but... */ 918 if (now > start_time) { 919 diff = now - start_time; 920 if (diff > tk->sleep_interval) 921 tk->slept_interval = diff - 922 tk->sleep_interval; 923 } 924 } 925 tk->sleep_interval = 0; 926 post_sem(&s->complete); 927 } 928 out: 929 return NULL; 930 } 931 932 /* 933 * All the sleep functions such as nanosleep can only guarantee that they 934 * sleep for _at least_ the time requested. We work around this by having 935 * a high priority real time thread that accounts for the extra time slept 936 * in nanosleep. This allows wakeup latency of the tested thread to be 937 * accurate and reflect true scheduling delays. 938 */ 939 void *emulation_thread(void *t) 940 { 941 struct thread *th; 942 struct tk_thread *tk; 943 struct sems *s, *tks; 944 long i = (long)t; 945 946 th = &threadlist[i]; 947 tk = &th->tkthread; 948 s = &th->sem; 949 tks = &tk->sem; 950 init_all_sems(tks); 951 952 /* Start the timekeeping thread */ 953 create_pthread(&th->tk_pthread, NULL, timekeeping_thread, 954 (void*)(long) i); 955 /* Wait for timekeeping thread to be ready */ 956 wait_sem(&tks->ready); 957 958 /* Tell main we're ready to start*/ 959 post_sem(&s->ready); 960 961 /* Wait for signal from main to start thread */ 962 wait_sem(&s->start); 963 964 /* Start the actual function being benched/or running as load */ 965 th->name(th); 966 967 /* Stop the timekeeping thread */ 968 post_sem(&tks->stop); 969 post_sem(&tks->start); 970 join_pthread(th->tk_pthread, NULL); 971 972 /* Tell main we've finished */ 973 post_sem(&s->complete); 974 return NULL; 975 } 976 977 /* 978 * In an unoptimised loop we try to benchmark how many meaningless loops 979 * per second we can perform on this hardware to fairly accurately 980 * reproduce certain percentage cpu usage 981 */ 982 void calibrate_loop(void) 983 { 984 unsigned long long start_time, loops_per_msec, run_time = 0; 985 unsigned long loops; 986 struct timespec myts; 987 988 loops_per_msec = 100000; 989 redo: 990 /* Calibrate to within 1% accuracy */ 991 while (run_time > 1010000 || run_time < 990000) { 992 loops = loops_per_msec; 993 start_time = get_nsecs(&myts); 994 burn_loops(loops); 995 run_time = get_nsecs(&myts) - start_time; 996 loops_per_msec = (1000000 * loops_per_msec / run_time ? : 997 loops_per_msec); 998 } 999 1000 /* Rechecking after a pause increases reproducibility */ 1001 sleep(1); 1002 loops = loops_per_msec; 1003 start_time = get_nsecs(&myts); 1004 burn_loops(loops); 1005 run_time = get_nsecs(&myts) - start_time; 1006 1007 /* Tolerate 5% difference on checking */ 1008 if (run_time > 1050000 || run_time < 950000) 1009 goto redo; 1010 1011 ud.loops_per_ms = loops_per_msec; 1012 } 1013 1014 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2))); 1015 1016 /* Output to console +/- logfile */ 1017 void log_output(const char *format, ...) 1018 { 1019 va_list ap; 1020 1021 va_start(ap, format); 1022 if (vprintf(format, ap) == -1) 1023 terminal_error("vprintf"); 1024 va_end(ap); 1025 if (ud.log) { 1026 va_start(ap, format); 1027 if (vfprintf(ud.logfile, format, ap) == -1) 1028 terminal_error("vpfrintf"); 1029 va_end(ap); 1030 } 1031 fflush(NULL); 1032 } 1033 1034 /* Calculate statistics and output them */ 1035 void show_latencies(struct thread *th) 1036 { 1037 struct data_table *tbj; 1038 struct tk_thread *tk; 1039 double average_latency, deadlines_met, samples_met, sd, max_latency; 1040 long double variance = 0; 1041 1042 tbj = th->dt; 1043 tk = &th->tkthread; 1044 1045 if (tbj->nr_samples > 1) { 1046 average_latency = tbj->total_latency / tbj->nr_samples; 1047 variance = (tbj->sum_latency_squared - (average_latency * 1048 average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1); 1049 sd = sqrt((double)variance); 1050 } else { 1051 average_latency = tbj->total_latency; 1052 sd = 0.0; 1053 } 1054 1055 /* 1056 * Landing on the boundary of a deadline can make loaded runs appear 1057 * to do more work than unloaded due to tiny duration differences. 1058 */ 1059 if (tbj->achieved_burns > 0) 1060 samples_met = (double)tbj->achieved_burns / 1061 (double)(tbj->achieved_burns + tbj->missed_burns) * 100; 1062 else 1063 samples_met = 0.0; 1064 max_latency = tbj->max_latency; 1065 /* When benchmarking rt we represent the data in us */ 1066 if (!ud.do_rt) { 1067 average_latency /= 1000; 1068 sd /= 1000; 1069 max_latency /= 1000; 1070 } 1071 if (tbj->deadlines_met == 0) 1072 deadlines_met = 0; 1073 else 1074 deadlines_met = (double)tbj->deadlines_met / 1075 (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100; 1076 1077 /* Messy nonsense to format the output nicely */ 1078 if (average_latency >= 100) 1079 log_output("%7.0f +/- ", average_latency); 1080 else 1081 log_output("%7.3g +/- ", average_latency); 1082 if (sd >= 100) 1083 log_output("%-9.0f", sd); 1084 else 1085 log_output("%-9.3g", sd); 1086 if (max_latency >= 100) 1087 log_output("%7.0f\t", max_latency); 1088 else 1089 log_output("%7.3g\t", max_latency); 1090 log_output("\t%4.3g", samples_met); 1091 if (!th->nodeadlines) 1092 log_output("\t%11.3g", deadlines_met); 1093 log_output("\n"); 1094 sync_flush(); 1095 } 1096 1097 void create_read_file(void) 1098 { 1099 unsigned int i; 1100 FILE *fp; 1101 char *name = "interbench.read"; 1102 void *buf = NULL; 1103 struct stat statbuf; 1104 unsigned long mem, bsize; 1105 int tmp; 1106 1107 if ((tmp = open(name, O_RDONLY)) == -1) { 1108 if (errno != ENOENT) 1109 terminal_error("open"); 1110 goto write; 1111 } 1112 if (stat(name, &statbuf) == -1) 1113 terminal_error("stat"); 1114 if (statbuf.st_blksize < MIN_BLK_SIZE) 1115 statbuf.st_blksize = MIN_BLK_SIZE; 1116 bsize = statbuf.st_blksize; 1117 if (statbuf.st_size / 1024 / bsize == ud.ram / bsize) 1118 return; 1119 if (remove(name) == -1) 1120 terminal_error("remove"); 1121 write: 1122 fprintf(stderr,"Creating file for read load...\n"); 1123 if (!(fp = fopen(name, "w"))) 1124 terminal_error("fopen"); 1125 if (stat(name, &statbuf) == -1) 1126 terminal_fileopen_error(fp, "stat"); 1127 if (statbuf.st_blksize < MIN_BLK_SIZE) 1128 statbuf.st_blksize = MIN_BLK_SIZE; 1129 bsize = statbuf.st_blksize; 1130 if (!(buf = calloc(1, bsize))) 1131 terminal_fileopen_error(fp, "calloc"); 1132 mem = ud.ram / (bsize / 1024); /* kilobytes to blocks */ 1133 1134 for (i = 0 ; i < mem; i++) { 1135 if (fwrite(buf, bsize, 1, fp) != 1) 1136 terminal_fileopen_error(fp, "fwrite"); 1137 } 1138 if (fclose(fp) == -1) 1139 terminal_error("fclose"); 1140 sync_flush(); 1141 } 1142 1143 void get_ram(void) 1144 { 1145 struct vmstats vms; 1146 size_t vms_size = sizeof(vms); 1147 1148 if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0)) 1149 terminal_error("sysctlbyname: vm.vmstats"); 1150 1151 ud.ram = vms.v_page_count * vms.v_page_size; 1152 ud.ram /= 1024; /* linux size is in kB */ 1153 ud.swap = ud.ram; /* XXX: swap doesn't have to be the same as RAM */ 1154 1155 if( !ud.ram || !ud.swap ) { 1156 unsigned long i; 1157 fprintf(stderr, "\nCould not get memory or swap size. "); 1158 fprintf(stderr, "Will not perform mem_load\n"); 1159 for (i = 0 ; i < THREADS ; i++) { 1160 if (threadlist[i].label == "Memload") { 1161 threadlist[i].load = 0; 1162 threadlist[i].rtload = 0; 1163 } 1164 } 1165 } 1166 } 1167 1168 void get_logfilename(void) 1169 { 1170 struct tm *mytm; 1171 struct utsname buf; 1172 time_t t; 1173 int year, month, day, hours, minutes; 1174 1175 time(&t); 1176 if (uname(&buf) == -1) 1177 terminal_error("uname"); 1178 if (!(mytm = localtime(&t))) 1179 terminal_error("localtime"); 1180 year = mytm->tm_year + 1900; 1181 month = mytm->tm_mon + 1; 1182 day = mytm->tm_mday; 1183 hours = mytm->tm_hour; 1184 minutes = mytm->tm_min; 1185 strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH); 1186 1187 sprintf(ud.datestamp, "%2d%02d%02d%02d%02d", 1188 year, month, day, hours, minutes); 1189 snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer); 1190 } 1191 1192 void start_thread(struct thread *th) 1193 { 1194 post_sem(&th->sem.start); 1195 } 1196 1197 void stop_thread(struct thread *th) 1198 { 1199 post_sem(&th->sem.stop); 1200 wait_sem(&th->sem.complete); 1201 1202 /* Kill the thread */ 1203 join_pthread(th->pthread, NULL); 1204 } 1205 1206 void init_sem(sem_t *sem) 1207 { 1208 if (sem_init(sem, 0, 0)) 1209 terminal_error("sem_init"); 1210 } 1211 1212 void init_all_sems(struct sems *s) 1213 { 1214 /* Initialise the semaphores */ 1215 init_sem(&s->ready); 1216 init_sem(&s->start); 1217 init_sem(&s->stop); 1218 init_sem(&s->complete); 1219 init_sem(&s->stopchild); 1220 } 1221 1222 void initialise_thread(int i) 1223 { 1224 struct thread *th = &threadlist[i]; 1225 1226 init_all_sems(&th->sem); 1227 /* Create the threads. Yes, the (long) cast is fugly but it's safe*/ 1228 create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i); 1229 1230 wait_sem(&th->sem.ready); 1231 /* 1232 * We set this pointer generically to NOT_BENCHING and set it to the 1233 * benchmarked array entry only on benched threads. 1234 */ 1235 th->dt = &th->benchmarks[NOT_BENCHING]; 1236 initialise_thread_data(th->dt); 1237 1238 } 1239 1240 /* A pseudo-semaphore for processes using a pipe */ 1241 void wait_on(int pype) 1242 { 1243 int retval, buf = 0; 1244 1245 retval = Read(pype, &buf, sizeof(buf)); 1246 if (retval == 0) { 1247 fprintf(stderr, "\nread returned 0\n"); 1248 exit (1); 1249 } 1250 } 1251 1252 void wakeup_with(int pype) 1253 { 1254 int retval, buf = 1; 1255 1256 retval = Write(pype, &buf, sizeof(buf)); 1257 if (retval == 0) { 1258 fprintf(stderr, "\nwrite returned 0\n"); 1259 exit (1); 1260 } 1261 } 1262 1263 void run_loadchild(int j) 1264 { 1265 struct thread *thj; 1266 thj = &threadlist[j]; 1267 1268 set_nice(ud.load_nice); 1269 initialise_thread(j); 1270 1271 /* Tell main we're ready */ 1272 wakeup_with(l2m[1]); 1273 1274 /* Main tells us we're ready */ 1275 wait_on(m2l[0]); 1276 start_thread(thj); 1277 1278 /* Tell main we received the start and are running */ 1279 wakeup_with(l2m[1]); 1280 1281 /* Main tells us to stop */ 1282 wait_on(m2l[0]); 1283 stop_thread(thj); 1284 1285 /* Tell main we've finished */ 1286 wakeup_with(l2m[1]); 1287 exit (0); 1288 } 1289 1290 void run_benchchild(int i, int j) 1291 { 1292 struct thread *thi; 1293 1294 thi = &threadlist[i]; 1295 1296 set_nice(ud.bench_nice); 1297 if (ud.do_rt) 1298 set_mlock(); 1299 initialise_thread(i); 1300 /* Point the data table to the appropriate load being tested */ 1301 thi->dt = &thi->benchmarks[j]; 1302 initialise_thread_data(thi->dt); 1303 if (ud.do_rt) 1304 set_thread_fifo(thi->pthread, 95); 1305 1306 /* Tell main we're ready */ 1307 wakeup_with(b2m[1]); 1308 1309 /* Main tells us we're ready */ 1310 wait_on(m2b[0]); 1311 start_thread(thi); 1312 1313 /* Tell main we have started */ 1314 wakeup_with(b2m[1]); 1315 1316 /* Main tells us to stop */ 1317 wait_on(m2b[0]); 1318 stop_thread(thi); 1319 1320 if (ud.do_rt) { 1321 set_thread_normal(thi->pthread); 1322 set_munlock(); 1323 } 1324 show_latencies(thi); 1325 1326 /* Tell main we've finished */ 1327 wakeup_with(b2m[1]); 1328 exit(0); 1329 } 1330 1331 void bench(int i, int j) 1332 { 1333 pid_t bench_pid, load_pid; 1334 1335 if ((load_pid = fork()) == -1) 1336 terminal_error("fork"); 1337 if (!load_pid) 1338 run_loadchild(j); 1339 1340 /* Wait for load process to be ready */ 1341 1342 wait_on(l2m[0]); 1343 if ((bench_pid = fork()) == -1) 1344 terminal_error("fork"); 1345 if (!bench_pid) 1346 run_benchchild(i, j); 1347 1348 /* Wait for bench process to be ready */ 1349 wait_on(b2m[0]); 1350 1351 /* 1352 * We want to be higher priority than everything to signal them to 1353 * stop and we lock our memory if we can as well 1354 */ 1355 set_fifo(99); 1356 set_mlock(); 1357 1358 /* Wakeup the load process */ 1359 wakeup_with(m2l[1]); 1360 /* Load tells it has received the first message and is running */ 1361 wait_on(l2m[0]); 1362 1363 /* After a small delay, wake up the benched process */ 1364 sleep(1); 1365 wakeup_with(m2b[1]); 1366 1367 /* Bench tells it has received the first message and is running */ 1368 wait_on(b2m[0]); 1369 microsleep(ud.duration * 1000000); 1370 1371 /* Tell the benched process to stop its threads and output results */ 1372 wakeup_with(m2b[1]); 1373 1374 /* Tell the load process to stop its threads */ 1375 wakeup_with(m2l[1]); 1376 1377 /* Return to SCHED_NORMAL */ 1378 set_normal(); 1379 set_munlock(); 1380 1381 /* Wait for load and bench processes to terminate */ 1382 wait_on(l2m[0]); 1383 wait_on(b2m[0]); 1384 } 1385 1386 void init_pipe(int *pype) 1387 { 1388 if (pipe(pype) == -1) 1389 terminal_error("pipe"); 1390 } 1391 1392 void init_pipes(void) 1393 { 1394 init_pipe(m2l); 1395 init_pipe(l2m); 1396 init_pipe(m2b); 1397 init_pipe(b2m); 1398 } 1399 1400 void usage(void) 1401 { 1402 /* Affinity commented out till working on all architectures */ 1403 fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n"); 1404 fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n"); 1405 fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n"); 1406 fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n"); 1407 fprintf(stderr, "\t[-h]\n\n"); 1408 fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n"); 1409 fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n"); 1410 fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n"); 1411 fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n"); 1412 fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n"); 1413 //fprintf(stderr, " -u\tImitate uniprocessor\n"); 1414 fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n"); 1415 fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n"); 1416 fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n"); 1417 fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n"); 1418 fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n"); 1419 fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n"); 1420 fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n"); 1421 fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n"); 1422 fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n"); 1423 fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n"); 1424 fprintf(stderr, " -h\tShow this help\n"); 1425 fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n"); 1426 } 1427 1428 #ifdef DEBUG 1429 void deadchild(int crap) 1430 { 1431 pid_t retval; 1432 int status; 1433 1434 crap = 0; 1435 1436 if ((retval = waitpid(-1, &status, WNOHANG)) == -1) { 1437 if (errno == ECHILD) 1438 return; 1439 terminal_error("waitpid"); 1440 } 1441 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) 1442 return; 1443 fprintf(stderr, "\nChild terminated abnormally "); 1444 if (WIFSIGNALED(status)) 1445 fprintf(stderr, "with signal %d", WTERMSIG(status)); 1446 fprintf(stderr, "\n"); 1447 exit (1); 1448 } 1449 #endif 1450 1451 int load_index(const char* loadname) 1452 { 1453 int i; 1454 1455 for (i = 0 ; i < THREADS ; i++) 1456 if (strcasecmp(loadname, threadlist[i].label) == 0) 1457 return i; 1458 return -1; 1459 } 1460 1461 inline int bit_is_on(const unsigned int mask, int index) 1462 { 1463 return (mask & (1 << index)) != 0; 1464 } 1465 1466 inline void set_bit_on(unsigned int *mask, int index) 1467 { 1468 *mask |= (1 << index); 1469 } 1470 1471 int main(int argc, char **argv) 1472 { 1473 unsigned long custom_cpu = 0; 1474 int q, i, j, affinity, benchmark = 0; 1475 unsigned int selected_loads = 0; 1476 unsigned int excluded_loads = 0; 1477 unsigned int selected_benches = 0; 1478 unsigned int excluded_benches = 0; 1479 FILE *fp; 1480 /* 1481 * This file stores the loops_per_ms to be reused in a filename that 1482 * can't be confused 1483 */ 1484 char *fname = "interbench.loops_per_ms"; 1485 char *comment = NULL; 1486 #ifdef DEBUG 1487 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); 1488 if (signal(SIGCHLD, deadchild) == SIG_ERR) 1489 terminal_error("signal"); 1490 #endif 1491 1492 while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) { 1493 switch (q) { 1494 case 'h': 1495 usage(); 1496 return (0); 1497 case 'l': 1498 ud.loops_per_ms = atoi(optarg); 1499 break; 1500 case 't': 1501 ud.duration = atoi(optarg); 1502 break; 1503 case 'L': 1504 ud.cpu_load = atoi(optarg); 1505 break; 1506 case 'B': 1507 ud.bench_nice = atoi(optarg); 1508 break; 1509 case 'N': 1510 ud.load_nice = atoi(optarg); 1511 break; 1512 case 'u': 1513 affinity = 1; 1514 break; 1515 case 'b': 1516 benchmark = 1; 1517 break; 1518 case 'c': 1519 ud.log = 0; 1520 break; 1521 case 'r': 1522 ud.do_rt = 1; 1523 break; 1524 case 'C': 1525 custom_cpu = (unsigned long)atol(optarg); 1526 break; 1527 case 'I': 1528 ud.custom_interval = atol(optarg); 1529 break; 1530 case 'm': 1531 comment = optarg; 1532 break; 1533 case 'w': 1534 i = load_index(optarg); 1535 if (i == -1) { 1536 fprintf(stderr, "Unknown load \"%s\"\n", optarg); 1537 return (-2); 1538 } 1539 set_bit_on(&selected_loads, i); 1540 break; 1541 case 'x': 1542 i = load_index(optarg); 1543 if (i == -1) { 1544 fprintf(stderr, "Unknown load \"%s\"\n", optarg); 1545 return (-2); 1546 } 1547 set_bit_on(&excluded_loads, i); 1548 break; 1549 case 'W': 1550 i = load_index(optarg); 1551 if (i == -1) { 1552 fprintf(stderr, "Unknown bench \"%s\"\n", optarg); 1553 return (-2); 1554 } 1555 set_bit_on(&selected_benches, i); 1556 break; 1557 case 'X': 1558 i = load_index(optarg); 1559 if (i == -1) { 1560 fprintf(stderr, "Unknown bench \"%s\"\n", optarg); 1561 return (-2); 1562 } 1563 set_bit_on(&excluded_benches, i); 1564 break; 1565 default: 1566 usage(); 1567 return (1); 1568 } 1569 } 1570 argc -= optind; 1571 argv += optind; 1572 /* default is all loads */ 1573 if (selected_loads == 0) 1574 selected_loads = (unsigned int)-1; 1575 selected_loads &= ~excluded_loads; 1576 /* default is all benches */ 1577 if (selected_benches == 0) 1578 selected_benches = (unsigned int)-1; 1579 selected_benches &= ~excluded_benches; 1580 1581 if (!test_fifo()) { 1582 fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n"); 1583 fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n"); 1584 if (ud.do_rt) { 1585 fprintf(stderr, "Real time tests were requested, aborting.\n"); 1586 exit (1); 1587 } 1588 fprintf(stderr, "Results will be unreliable.\n"); 1589 } 1590 if (!ud.cpu_load) { 1591 fprintf(stderr, "Invalid cpu load\n"); 1592 exit (1); 1593 } 1594 1595 if ((custom_cpu && !ud.custom_interval) || 1596 (ud.custom_interval && !custom_cpu) || 1597 custom_cpu > 100) { 1598 fprintf(stderr, "Invalid custom values, aborting.\n"); 1599 exit (1); 1600 } 1601 1602 if (custom_cpu && ud.custom_interval) { 1603 ud.custom_run = ud.custom_interval * custom_cpu / 100; 1604 threadlist[CUSTOM].bench = 1; 1605 threadlist[CUSTOM].load = 1; 1606 threadlist[CUSTOM].rtbench = 1; 1607 threadlist[CUSTOM].rtload = 1; 1608 } 1609 1610 /*FIXME Affinity commented out till working on all architectures */ 1611 #if 0 1612 if (affinity) { 1613 #ifdef CPU_SET /* Current glibc expects cpu_set_t */ 1614 cpu_set_t cpumask; 1615 1616 CPU_ZERO(&cpumask); 1617 CPU_SET(0, &cpumask); 1618 #else /* Old glibc expects unsigned long */ 1619 unsigned long cpumask = 1; 1620 #endif 1621 if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) { 1622 if (errno != EPERM) 1623 terminal_error("sched_setaffinity"); 1624 fprintf(stderr, "could not set cpu affinity\n"); 1625 } 1626 } 1627 #endif 1628 1629 /* Make benchmark a multiple of 10 seconds for proper range of X loads */ 1630 if (ud.duration % 10) 1631 ud.duration += 10 - ud.duration % 10; 1632 1633 if (benchmark) 1634 ud.loops_per_ms = 0; 1635 /* 1636 * Try to get loops_per_ms from command line first, file second, and 1637 * benchmark if not available. 1638 */ 1639 if (!ud.loops_per_ms) { 1640 if (benchmark) 1641 goto bench; 1642 if ((fp = fopen(fname, "r"))) { 1643 fscanf(fp, "%lu", &ud.loops_per_ms); 1644 if (fclose(fp) == -1) 1645 terminal_error("fclose"); 1646 if (ud.loops_per_ms) { 1647 fprintf(stderr, 1648 "%lu loops_per_ms read from file interbench.loops_per_ms\n", 1649 ud.loops_per_ms); 1650 goto loops_known; 1651 } 1652 } else 1653 if (errno != ENOENT) 1654 terminal_error("fopen"); 1655 bench: 1656 fprintf(stderr, "loops_per_ms unknown; benchmarking...\n"); 1657 1658 /* 1659 * To get as accurate a loop as possible we time it running 1660 * SCHED_FIFO if we can 1661 */ 1662 set_fifo(99); 1663 calibrate_loop(); 1664 set_normal(); 1665 } else 1666 fprintf(stderr, "loops_per_ms specified from command line\n"); 1667 1668 if (!(fp = fopen(fname, "w"))) { 1669 if (errno != EACCES) /* No write access is not terminal */ 1670 terminal_error("fopen"); 1671 fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n"); 1672 goto loops_known; 1673 } 1674 fprintf(fp, "%lu", ud.loops_per_ms); 1675 fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n", 1676 ud.loops_per_ms); 1677 if (fclose(fp) == -1) 1678 terminal_error("fclose"); 1679 1680 loops_known: 1681 get_ram(); 1682 get_logfilename(); 1683 create_read_file(); 1684 init_pipes(); 1685 1686 if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) { 1687 if (errno != EACCES) 1688 terminal_error("fopen"); 1689 fprintf(stderr, "Unable to write to logfile\n"); 1690 ud.log = 0; 1691 } 1692 log_output("\n"); 1693 log_output("Using %lu loops per ms, running every load for %d seconds\n", 1694 ud.loops_per_ms, ud.duration); 1695 log_output("Benchmarking kernel %s at datestamp %s\n", 1696 ud.unamer, ud.datestamp); 1697 if (comment) 1698 log_output("Comment: %s\n", comment); 1699 log_output("\n"); 1700 1701 for (i = 0 ; i < THREADS ; i++) 1702 threadlist[i].threadno = i; 1703 1704 for (i = 0 ; i < THREADS ; i++) { 1705 struct thread *thi = &threadlist[i]; 1706 int *benchme; 1707 1708 if (ud.do_rt) 1709 benchme = &threadlist[i].rtbench; 1710 else 1711 benchme = &threadlist[i].bench; 1712 1713 if (!*benchme || !bit_is_on(selected_benches, i)) 1714 continue; 1715 1716 log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label); 1717 if (ud.do_rt) 1718 log_output("real time "); 1719 else if (ud.bench_nice) 1720 log_output("nice %d ", ud.bench_nice); 1721 log_output("in the presence of simulated "); 1722 if (ud.load_nice) 1723 log_output("nice %d ", ud.load_nice); 1724 log_output("---\n"); 1725 1726 log_output("Load"); 1727 if (ud.do_rt) 1728 log_output("\tLatency +/- SD (us)"); 1729 else 1730 log_output("\tLatency +/- SD (ms)"); 1731 log_output(" Max Latency "); 1732 log_output(" %% Desired CPU"); 1733 if (!thi->nodeadlines) 1734 log_output(" %% Deadlines Met"); 1735 log_output("\n"); 1736 1737 for (j = 0 ; j < THREADS ; j++) { 1738 struct thread *thj = &threadlist[j]; 1739 1740 if (j == i || !bit_is_on(selected_loads, j) || 1741 (!threadlist[j].load && !ud.do_rt) || 1742 (!threadlist[j].rtload && ud.do_rt)) 1743 continue; 1744 log_output("%s\t", thj->label); 1745 sync_flush(); 1746 bench(i, j); 1747 } 1748 log_output("\n"); 1749 } 1750 log_output("\n"); 1751 if (ud.log) 1752 fclose(ud.logfile); 1753 1754 return 0; 1755 } 1756