1 #include <assert.h> 2 #include <errno.h> 3 #include <math.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <unistd.h> 8 #include <minix/type.h> 9 10 #include "tdist.h" 11 12 /* user-configurable settings */ 13 #define DEBUG 0 14 15 #define PROC_NAME_WIDTH 10 16 17 #define SYMBOL_NAME_WIDTH 24 18 19 /* types */ 20 #define SYMBOL_HASHTAB_SIZE 1024 21 22 #define SYMBOL_NAME_SIZE 52 23 24 struct symbol_count { 25 unsigned long sum; 26 unsigned long long sum2; 27 unsigned long min; 28 unsigned long max; 29 }; 30 31 enum symbol_class { 32 sc_total, 33 sc_idle, 34 sc_system, 35 sc_user, 36 sc_process, 37 sc_symbol 38 }; 39 40 struct symbol_info { 41 struct symbol_info *next; 42 struct symbol_info *hashtab_next; 43 char binary[PROC_NAME_LEN]; 44 char name[SYMBOL_NAME_SIZE]; 45 struct symbol_count count[2]; 46 long diff; 47 enum symbol_class class; 48 }; 49 50 /* global variables */ 51 static unsigned n1, n2; 52 static struct symbol_info *symbols; 53 static struct symbol_info *symbol_hashtab[SYMBOL_HASHTAB_SIZE]; 54 55 /* prototypes */ 56 static double compute_sig(double avg1, double var1, double avg2, double var2); 57 static void compute_stats(const struct symbol_count *count, unsigned n, 58 double *avg, double *var); 59 static void load_file(const char *path, int count_index); 60 static void *malloc_checked(size_t size); 61 static void print_report(void); 62 static void print_report_line(const struct symbol_info *symbol); 63 static int read_line(FILE *file, const char *path, int line, char *binary, 64 char *name, unsigned long *samples); 65 static enum symbol_class symbol_classify(const char *binary, const char *name); 66 static unsigned string_hash(const char *s, size_t size); 67 static struct symbol_info *symbol_find_or_add(const char *binary, 68 const char *name); 69 static unsigned symbol_hash(const char *binary, const char *name); 70 static int symbol_qsort_compare(const void *p1, const void *p2); 71 static void symbol_tally(const char *binary, const char *name, 72 unsigned long samples, int count_index); 73 static unsigned symbols_count(void); 74 static void usage(const char *argv0); 75 76 #define MALLOC_CHECKED(type, count) \ 77 ((type *) malloc_checked(sizeof(type) * (count))) 78 79 #if DEBUG 80 #define dprintf(...) do { \ 81 fprintf(stderr, "debug(%s:%d): ", __FUNCTION__, __LINE__); \ 82 fprintf(stderr, __VA_ARGS__); \ 83 } while(0) 84 #else 85 #define dprintf(...) 86 #endif 87 88 int main(int argc, char **argv) { 89 int i; 90 91 #ifdef DEBUG 92 /* disable buffering so the output mixes correctly */ 93 setvbuf(stdout, NULL, _IONBF, 0); 94 setvbuf(stderr, NULL, _IONBF, 0); 95 #endif 96 97 if (argc < 3) usage(argv[0]); 98 99 /* load left-hand files */ 100 for (i = 1; i < argc; i++) { 101 if (strcmp(argv[i], "-r") == 0) { 102 i++; 103 break; 104 } 105 if (argc == 3 && i == 2) break; 106 load_file(argv[i], 0); 107 n1++; 108 } 109 110 /* load right-hand files */ 111 for (; i < argc; i++) { 112 load_file(argv[i], 1); 113 n2++; 114 } 115 116 if (n1 < 1 || n2 < 1) usage(argv[0]); 117 118 /* report analysis results */ 119 print_report(); 120 return 0; 121 } 122 123 static double compute_sig(double avg1, double var1, double avg2, double var2) { 124 double df, t, var; 125 126 /* prevent division by zero with lack of variance */ 127 var = var1 / n1 + var2 / n2; 128 if (var <= 0 || n1 <= 1 || n2 <= 1) return -1; 129 130 /* do we have enough degrees of freedom? */ 131 df = var * var / ( 132 var1 * var1 / (n1 * n1 * (n1 - 1)) + 133 var2 * var2 / (n2 * n2 * (n2 - 1))); 134 if (df < 1) return -1; 135 136 /* perform t-test */ 137 t = (avg1 - avg2) / sqrt(var); 138 return student_t_p_2tail(t, df); 139 } 140 141 static void compute_stats(const struct symbol_count *count, unsigned n, 142 double *avg, double *var) { 143 double sum; 144 145 assert(count); 146 assert(avg); 147 assert(var); 148 149 sum = count->sum; 150 if (n < 1) { 151 *avg = 0; 152 } else { 153 *avg = sum / n; 154 } 155 156 if (n < 2) { 157 *var = 0; 158 } else { 159 *var = (count->sum2 - sum * sum / n) / (n - 1); 160 } 161 } 162 163 static void load_file(const char *path, int count_index) { 164 char binary[PROC_NAME_LEN]; 165 FILE *file; 166 int line; 167 char name[SYMBOL_NAME_SIZE]; 168 unsigned long samples; 169 170 assert(path); 171 assert(count_index == 0 || count_index == 1); 172 173 file = fopen(path, "r"); 174 if (!file) { 175 fprintf(stderr, "error: cannot open \"%s\": %s\n", 176 path, strerror(errno)); 177 exit(1); 178 } 179 180 line = 1; 181 while (read_line(file, path, line++, binary, name, &samples)) { 182 symbol_tally(binary, name, samples, count_index); 183 } 184 185 fclose(file); 186 } 187 188 static void *malloc_checked(size_t size) { 189 void *p; 190 if (!size) return NULL; 191 p = malloc(size); 192 if (!p) { 193 fprintf(stderr, "error: malloc cannot allocate %lu bytes: %s\n", 194 (unsigned long) size, strerror(errno)); 195 exit(-1); 196 } 197 return p; 198 } 199 200 static void print_report(void) { 201 unsigned i, index, symbol_count; 202 struct symbol_info *symbol, **symbol_list; 203 204 /* list the symbols in an array for sorting */ 205 symbol_count = symbols_count(); 206 symbol_list = MALLOC_CHECKED(struct symbol_info *, symbol_count); 207 index = 0; 208 for (symbol = symbols; symbol; symbol = symbol->next) { 209 symbol_list[index++] = symbol; 210 211 /* sort by difference in average, multiply both sides by 212 * n1 * n2 to avoid division 213 */ 214 symbol->diff = (long) (symbol->count[1].sum * n1) - 215 (long) (symbol->count[0].sum * n2); 216 } 217 assert(index == symbol_count); 218 219 /* sort symbols */ 220 qsort(symbol_list, symbol_count, sizeof(struct symbol_info *), 221 symbol_qsort_compare); 222 223 printf("%-*s %-*s ------avg------ ----stdev---- diff sig\n", 224 PROC_NAME_WIDTH, "binary", SYMBOL_NAME_WIDTH, "symbol"); 225 printf("%-*s left right left right\n", 226 PROC_NAME_WIDTH + SYMBOL_NAME_WIDTH + 1, ""); 227 printf("\n"); 228 for (i = 0; i < symbol_count; i++) { 229 if (i > 0 && symbol_list[i]->class >= sc_process && 230 symbol_list[i]->class != symbol_list[i - 1]->class) { 231 printf("\n"); 232 } 233 print_report_line(symbol_list[i]); 234 } 235 printf("\n"); 236 printf("significance levels (two-tailed):\n"); 237 printf(" * p < 0.05\n"); 238 printf(" ** p < 0.01\n"); 239 printf(" *** p < 0.001\n"); 240 free(symbol_list); 241 } 242 243 static void print_report_line(const struct symbol_info *symbol) { 244 double avg1, avg2, p, var1, var2; 245 246 /* compute statistics; t is Welch's t, which is a t-test that allows 247 * for unpaired samples with unequal variance; df is the degrees of 248 * freedom as given by the Welch-Satterthwaite equation 249 */ 250 compute_stats(&symbol->count[0], n1, &avg1, &var1); 251 compute_stats(&symbol->count[1], n2, &avg2, &var2); 252 p = compute_sig(avg1, var1, avg2, var2); 253 254 /* list applicable values */ 255 assert(PROC_NAME_WIDTH <= PROC_NAME_LEN); 256 assert(SYMBOL_NAME_WIDTH <= SYMBOL_NAME_SIZE); 257 printf("%-*.*s %-*.*s", 258 PROC_NAME_WIDTH, PROC_NAME_WIDTH, symbol->binary, 259 SYMBOL_NAME_WIDTH, SYMBOL_NAME_WIDTH, symbol->name); 260 if (symbol->count[0].sum > 0) { 261 printf("%8.0f", avg1); 262 } else { 263 printf(" "); 264 } 265 if (symbol->count[1].sum > 0) { 266 printf("%8.0f", avg2); 267 } else { 268 printf(" "); 269 } 270 if (symbol->count[0].sum > 0 && n1 >= 2) { 271 printf("%7.0f", sqrt(var1)); 272 } else { 273 printf(" "); 274 } 275 if (symbol->count[1].sum > 0 && n2 >= 2) { 276 printf("%7.0f", sqrt(var2)); 277 } else { 278 printf(" "); 279 } 280 printf("%8.0f ", avg2 - avg1); 281 if (p >= 0) { 282 if (p <= 0.05) printf("*"); 283 if (p <= 0.01) printf("*"); 284 if (p <= 0.001) printf("*"); 285 } 286 printf("\n"); 287 } 288 289 static int read_line(FILE *file, const char *path, int line, char *binary, 290 char *name, unsigned long *samples) { 291 int c, index; 292 293 assert(file); 294 assert(binary); 295 assert(name); 296 assert(samples); 297 298 c = fgetc(file); 299 if (c == EOF) return 0; 300 301 /* read binary name, truncating if necessary */ 302 index = 0; 303 while (c != '\t' && c != '\n') { 304 if (index < PROC_NAME_LEN) binary[index++] = c; 305 c = fgetc(file); 306 } 307 if (index < PROC_NAME_LEN) binary[index] = 0; 308 309 /* read tab */ 310 if (c != '\t') { 311 fprintf(stderr, "error: garbage %d after binary name " 312 "(\"%s\", line %d)\n", c, path, line); 313 exit(1); 314 } 315 c = fgetc(file); 316 317 /* read symbol name, truncating if necessary */ 318 index = 0; 319 while (c != '\t' && c != '\n') { 320 if (index < SYMBOL_NAME_SIZE) name[index++] = c; 321 c = fgetc(file); 322 } 323 if (index < SYMBOL_NAME_SIZE) name[index] = 0; 324 325 /* read tab */ 326 if (c != '\t') { 327 fprintf(stderr, "error: garbage %d after symbol name " 328 "(\"%s\", line %d)\n", c, path, line); 329 exit(1); 330 } 331 c = fgetc(file); 332 333 /* read number of samples */ 334 *samples = 0; 335 while (c >= '0' && c <= '9') { 336 *samples = *samples * 10 + (c - '0'); 337 c = fgetc(file); 338 } 339 340 /* read newline */ 341 if (c != '\n') { 342 fprintf(stderr, "error: garbage %d after sample count " 343 "(\"%s\", line %d)\n", c, path, line); 344 exit(1); 345 } 346 return 1; 347 } 348 349 static unsigned string_hash(const char *s, size_t size) { 350 unsigned result = 0; 351 352 assert(s); 353 354 while (*s && size-- > 0) { 355 result = result * 31 + *(s++); 356 } 357 return result; 358 } 359 360 static enum symbol_class symbol_classify(const char *binary, const char *name) { 361 if (strncmp(binary, "(total)", PROC_NAME_LEN) == 0) return sc_total; 362 if (strncmp(binary, "(idle)", PROC_NAME_LEN) == 0) return sc_idle; 363 if (strncmp(binary, "(system)", PROC_NAME_LEN) == 0) return sc_system; 364 if (strncmp(binary, "(user)", PROC_NAME_LEN) == 0) return sc_user; 365 if (strncmp(name, "(total)", SYMBOL_NAME_SIZE) == 0) return sc_process; 366 return sc_symbol; 367 } 368 369 static struct symbol_info *symbol_find_or_add(const char *binary, 370 const char *name) { 371 struct symbol_info **ptr, *symbol; 372 373 assert(binary); 374 assert(name); 375 376 /* look up symbol in hash table */ 377 ptr = &symbol_hashtab[symbol_hash(binary, name) % SYMBOL_HASHTAB_SIZE]; 378 while ((symbol = *ptr)) { 379 if (strncmp(symbol->binary, binary, PROC_NAME_LEN) == 0 && 380 strncmp(symbol->name, name, SYMBOL_NAME_SIZE) == 0) { 381 return symbol; 382 } 383 ptr = &symbol->hashtab_next; 384 } 385 386 /* unknown symbol, add it */ 387 *ptr = symbol = MALLOC_CHECKED(struct symbol_info, 1); 388 memset(symbol, 0, sizeof(struct symbol_info)); 389 strncpy(symbol->binary, binary, PROC_NAME_LEN); 390 strncpy(symbol->name, name, SYMBOL_NAME_SIZE); 391 symbol->count[0].min = ~0UL; 392 symbol->count[1].min = ~0UL; 393 symbol->class = symbol_classify(binary, name); 394 395 /* also add to linked list */ 396 symbol->next = symbols; 397 symbols = symbol; 398 return symbol; 399 } 400 401 static unsigned symbol_hash(const char *binary, const char *name) { 402 return string_hash(binary, PROC_NAME_LEN) + 403 string_hash(name, SYMBOL_NAME_SIZE); 404 } 405 406 static int symbol_qsort_compare(const void *p1, const void *p2) { 407 int r; 408 const struct symbol_info *s1, *s2; 409 410 assert(p1); 411 assert(p2); 412 s1 = *(const struct symbol_info **) p1; 413 s2 = *(const struct symbol_info **) p2; 414 assert(s1); 415 assert(s2); 416 417 /* totals come first */ 418 if (s1->class < s2->class) return -1; 419 if (s1->class > s2->class) return 1; 420 421 /* sort by difference in average */ 422 if (s1->diff < s2->diff) return -1; 423 if (s1->diff > s2->diff) return 1; 424 425 /* otherwise, by name */ 426 r = strncmp(s1->binary, s2->binary, PROC_NAME_LEN); 427 if (r) return r; 428 429 return strncmp(s1->name, s2->name, SYMBOL_NAME_SIZE); 430 } 431 432 static void symbol_tally(const char *binary, const char *name, 433 unsigned long samples, int count_index) { 434 struct symbol_count *count; 435 struct symbol_info *symbol; 436 437 /* look up or add symbol */ 438 symbol = symbol_find_or_add(binary, name); 439 440 /* update count */ 441 count = &symbol->count[count_index]; 442 count->sum += samples; 443 count->sum2 += (unsigned long long) samples * samples; 444 if (count->min > samples) count->min = samples; 445 if (count->max < samples) count->max = samples; 446 } 447 448 static unsigned symbols_count(void) { 449 int count = 0; 450 const struct symbol_info *symbol; 451 452 for (symbol = symbols; symbol; symbol = symbol->next) { 453 count++; 454 } 455 return count; 456 } 457 458 static void usage(const char *argv0) { 459 printf("usage:\n"); 460 printf(" %s leftfile rightfile\n", argv0); 461 printf(" %s leftfile... -r rightfile...\n", argv0); 462 printf("\n"); 463 printf("sprofdiff compares the sprofile information from multiple\n"); 464 printf("output files of sprofalyze -d.\n"); 465 exit(1); 466 } 467