xref: /minix/minix/commands/sprofdiff/sprofdiff.c (revision 83133719)
1 #include <assert.h>
2 #include <errno.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 #include <minix/type.h>
9 
10 #include "tdist.h"
11 
12 /* user-configurable settings */
13 #define DEBUG 0
14 
15 #define PROC_NAME_WIDTH 10
16 
17 #define SYMBOL_NAME_WIDTH 24
18 
19 /* types */
20 #define SYMBOL_HASHTAB_SIZE 1024
21 
22 #define SYMBOL_NAME_SIZE 52
23 
24 struct symbol_count {
25 	unsigned long sum;
26 	unsigned long long sum2;
27 	unsigned long min;
28 	unsigned long max;
29 };
30 
31 enum symbol_class {
32 	sc_total,
33 	sc_idle,
34 	sc_system,
35 	sc_user,
36 	sc_process,
37 	sc_symbol
38 };
39 
40 struct symbol_info {
41 	struct symbol_info *next;
42 	struct symbol_info *hashtab_next;
43 	char binary[PROC_NAME_LEN];
44 	char name[SYMBOL_NAME_SIZE];
45 	struct symbol_count count[2];
46 	long diff;
47 	enum symbol_class class;
48 };
49 
50 /* global variables */
51 static unsigned n1, n2;
52 static struct symbol_info *symbols;
53 static struct symbol_info *symbol_hashtab[SYMBOL_HASHTAB_SIZE];
54 
55 /* prototypes */
56 static double compute_sig(double avg1, double var1, double avg2, double var2);
57 static void compute_stats(const struct symbol_count *count, unsigned n,
58 	double *avg, double *var);
59 static void load_file(const char *path, int count_index);
60 static void *malloc_checked(size_t size);
61 static void print_report(void);
62 static void print_report_line(const struct symbol_info *symbol);
63 static int read_line(FILE *file, const char *path, int line, char *binary,
64 	char *name, unsigned long *samples);
65 static enum symbol_class symbol_classify(const char *binary, const char *name);
66 static unsigned string_hash(const char *s, size_t size);
67 static struct symbol_info *symbol_find_or_add(const char *binary,
68 	const char *name);
69 static unsigned symbol_hash(const char *binary, const char *name);
70 static int symbol_qsort_compare(const void *p1, const void *p2);
71 static void symbol_tally(const char *binary, const char *name,
72 	unsigned long samples, int count_index);
73 static unsigned symbols_count(void);
74 static void usage(const char *argv0);
75 
76 #define MALLOC_CHECKED(type, count) \
77 	((type *) malloc_checked(sizeof(type) * (count)))
78 
79 #if DEBUG
80 #define dprintf(...) do { 						\
81 	fprintf(stderr, "debug(%s:%d): ", __FUNCTION__, __LINE__); 	\
82 	fprintf(stderr, __VA_ARGS__); 					\
83 } while(0)
84 #else
85 #define dprintf(...)
86 #endif
87 
88 int main(int argc, char **argv) {
89 	int i;
90 
91 #ifdef DEBUG
92 	/* disable buffering so the output mixes correctly */
93 	setvbuf(stdout, NULL, _IONBF, 0);
94 	setvbuf(stderr, NULL, _IONBF, 0);
95 #endif
96 
97 	if (argc < 3) usage(argv[0]);
98 
99 	/* load left-hand files */
100 	for (i = 1; i < argc; i++) {
101 		if (strcmp(argv[i], "-r") == 0) {
102 			i++;
103 			break;
104 		}
105 		if (argc == 3 && i == 2) break;
106 		load_file(argv[i], 0);
107 		n1++;
108 	}
109 
110 	/* load right-hand files */
111 	for (; i < argc; i++) {
112 		load_file(argv[i], 1);
113 		n2++;
114 	}
115 
116 	if (n1 < 1 || n2 < 1) usage(argv[0]);
117 
118 	/* report analysis results */
119 	print_report();
120 	return 0;
121 }
122 
123 static double compute_sig(double avg1, double var1, double avg2, double var2) {
124 	double df, t, var;
125 
126 	/* prevent division by zero with lack of variance */
127 	var = var1 / n1 + var2 / n2;
128 	if (var <= 0 || n1 <= 1 || n2 <= 1) return -1;
129 
130 	/* do we have enough degrees of freedom? */
131 	df = var * var / (
132 		var1 * var1 / (n1 * n1 * (n1 - 1)) +
133 		var2 * var2 / (n2 * n2 * (n2 - 1)));
134 	if (df < 1) return -1;
135 
136 	/* perform t-test */
137 	t = (avg1 - avg2) / sqrt(var);
138 	return student_t_p_2tail(t, df);
139 }
140 
141 static void compute_stats(const struct symbol_count *count, unsigned n,
142 	double *avg, double *var) {
143 	double sum;
144 
145 	assert(count);
146 	assert(avg);
147 	assert(var);
148 
149 	sum = count->sum;
150 	if (n < 1) {
151 		*avg = 0;
152 	} else {
153 		*avg = sum / n;
154 	}
155 
156 	if (n < 2) {
157 		*var = 0;
158 	} else {
159 		*var = (count->sum2 - sum * sum / n) / (n - 1);
160 	}
161 }
162 
163 static void load_file(const char *path, int count_index) {
164 	char binary[PROC_NAME_LEN];
165 	FILE *file;
166 	int line;
167 	char name[SYMBOL_NAME_SIZE];
168 	unsigned long samples;
169 
170 	assert(path);
171 	assert(count_index == 0 || count_index == 1);
172 
173 	file = fopen(path, "r");
174 	if (!file) {
175 		fprintf(stderr, "error: cannot open \"%s\": %s\n",
176 			path, strerror(errno));
177 		exit(1);
178 	}
179 
180 	line = 1;
181 	while (read_line(file, path, line++, binary, name, &samples)) {
182 		symbol_tally(binary, name, samples, count_index);
183 	}
184 
185 	fclose(file);
186 }
187 
188 static void *malloc_checked(size_t size) {
189 	void *p;
190 	if (!size) return NULL;
191 	p = malloc(size);
192 	if (!p) {
193 		fprintf(stderr, "error: malloc cannot allocate %lu bytes: %s\n",
194 			(unsigned long) size, strerror(errno));
195 		exit(-1);
196 	}
197 	return p;
198 }
199 
200 static void print_report(void) {
201 	unsigned i, index, symbol_count;
202 	struct symbol_info *symbol, **symbol_list;
203 
204 	/* list the symbols in an array for sorting */
205 	symbol_count = symbols_count();
206 	symbol_list = MALLOC_CHECKED(struct symbol_info *, symbol_count);
207 	index = 0;
208 	for (symbol = symbols; symbol; symbol = symbol->next) {
209 		symbol_list[index++] = symbol;
210 
211 		/* sort by difference in average, multiply both sides by
212 		 * n1 * n2 to avoid division
213 		 */
214 		symbol->diff = (long) (symbol->count[1].sum * n1) -
215 			(long) (symbol->count[0].sum * n2);
216 	}
217 	assert(index == symbol_count);
218 
219 	/* sort symbols  */
220 	qsort(symbol_list, symbol_count, sizeof(struct symbol_info *),
221 		symbol_qsort_compare);
222 
223 	printf("%-*s %-*s ------avg------ ----stdev----    diff sig\n",
224 		PROC_NAME_WIDTH, "binary", SYMBOL_NAME_WIDTH, "symbol");
225 	printf("%-*s    left   right   left  right\n",
226 		PROC_NAME_WIDTH + SYMBOL_NAME_WIDTH + 1, "");
227 	printf("\n");
228 	for (i = 0; i < symbol_count; i++) {
229 		if (i > 0 && symbol_list[i]->class >= sc_process &&
230 			symbol_list[i]->class != symbol_list[i - 1]->class) {
231 			printf("\n");
232 		}
233 		print_report_line(symbol_list[i]);
234 	}
235 	printf("\n");
236 	printf("significance levels (two-tailed):\n");
237 	printf("  *    p < 0.05\n");
238 	printf("  **   p < 0.01\n");
239 	printf("  ***  p < 0.001\n");
240 	free(symbol_list);
241 }
242 
243 static void print_report_line(const struct symbol_info *symbol) {
244 	double avg1, avg2, p, var1, var2;
245 
246 	/* compute statistics; t is Welch's t, which is a t-test that allows
247 	 * for unpaired samples with unequal variance; df is the degrees of
248 	 * freedom as given by the Welch-Satterthwaite equation
249 	 */
250 	compute_stats(&symbol->count[0], n1, &avg1, &var1);
251 	compute_stats(&symbol->count[1], n2, &avg2, &var2);
252 	p = compute_sig(avg1, var1, avg2, var2);
253 
254 	/* list applicable values */
255 	assert(PROC_NAME_WIDTH <= PROC_NAME_LEN);
256 	assert(SYMBOL_NAME_WIDTH <= SYMBOL_NAME_SIZE);
257 	printf("%-*.*s %-*.*s",
258 		PROC_NAME_WIDTH, PROC_NAME_WIDTH, symbol->binary,
259 		SYMBOL_NAME_WIDTH, SYMBOL_NAME_WIDTH, symbol->name);
260 	if (symbol->count[0].sum > 0) {
261 		printf("%8.0f", avg1);
262 	} else {
263 		printf("        ");
264 	}
265 	if (symbol->count[1].sum > 0) {
266 		printf("%8.0f", avg2);
267 	} else {
268 		printf("        ");
269 	}
270 	if (symbol->count[0].sum > 0 && n1 >= 2) {
271 		printf("%7.0f", sqrt(var1));
272 	} else {
273 		printf("       ");
274 	}
275 	if (symbol->count[1].sum > 0 && n2 >= 2) {
276 		printf("%7.0f", sqrt(var2));
277 	} else {
278 		printf("       ");
279 	}
280 	printf("%8.0f ", avg2 - avg1);
281 	if (p >= 0) {
282 		if (p <= 0.05) printf("*");
283 		if (p <= 0.01) printf("*");
284 		if (p <= 0.001) printf("*");
285 	}
286 	printf("\n");
287 }
288 
289 static int read_line(FILE *file, const char *path, int line, char *binary,
290 	char *name, unsigned long *samples) {
291 	int c, index;
292 
293 	assert(file);
294 	assert(binary);
295 	assert(name);
296 	assert(samples);
297 
298 	c = fgetc(file);
299 	if (c == EOF) return 0;
300 
301 	/* read binary name, truncating if necessary */
302 	index = 0;
303 	while (c != '\t' && c != '\n') {
304 		if (index < PROC_NAME_LEN) binary[index++] = c;
305 		c = fgetc(file);
306 	}
307 	if (index < PROC_NAME_LEN) binary[index] = 0;
308 
309 	/* read tab */
310 	if (c != '\t') {
311 		fprintf(stderr, "error: garbage %d after binary name "
312 			"(\"%s\", line %d)\n", c, path, line);
313 		exit(1);
314 	}
315 	c = fgetc(file);
316 
317 	/* read symbol name, truncating if necessary */
318 	index = 0;
319 	while (c != '\t' && c != '\n') {
320 		if (index < SYMBOL_NAME_SIZE) name[index++] = c;
321 		c = fgetc(file);
322 	}
323 	if (index < SYMBOL_NAME_SIZE) name[index] = 0;
324 
325 	/* read tab */
326 	if (c != '\t') {
327 		fprintf(stderr, "error: garbage %d after symbol name "
328 			"(\"%s\", line %d)\n", c, path, line);
329 		exit(1);
330 	}
331 	c = fgetc(file);
332 
333 	/* read number of samples */
334 	*samples = 0;
335 	while (c >= '0' && c <= '9') {
336 		*samples = *samples * 10 + (c - '0');
337 		c = fgetc(file);
338 	}
339 
340 	/* read newline */
341 	if (c != '\n') {
342 		fprintf(stderr, "error: garbage %d after sample count "
343 			"(\"%s\", line %d)\n", c, path, line);
344 		exit(1);
345 	}
346 	return 1;
347 }
348 
349 static unsigned string_hash(const char *s, size_t size) {
350 	unsigned result = 0;
351 
352 	assert(s);
353 
354 	while (*s && size-- > 0) {
355 		result = result * 31 + *(s++);
356 	}
357 	return result;
358 }
359 
360 static enum symbol_class symbol_classify(const char *binary, const char *name) {
361 	if (strncmp(binary, "(total)", PROC_NAME_LEN) == 0) return sc_total;
362 	if (strncmp(binary, "(idle)", PROC_NAME_LEN) == 0) return sc_idle;
363 	if (strncmp(binary, "(system)", PROC_NAME_LEN) == 0) return sc_system;
364 	if (strncmp(binary, "(user)", PROC_NAME_LEN) == 0) return sc_user;
365 	if (strncmp(name, "(total)", SYMBOL_NAME_SIZE) == 0) return sc_process;
366 	return sc_symbol;
367 }
368 
369 static struct symbol_info *symbol_find_or_add(const char *binary,
370 	const char *name) {
371 	struct symbol_info **ptr, *symbol;
372 
373 	assert(binary);
374 	assert(name);
375 
376 	/* look up symbol in hash table */
377 	ptr = &symbol_hashtab[symbol_hash(binary, name) % SYMBOL_HASHTAB_SIZE];
378 	while ((symbol = *ptr)) {
379 		if (strncmp(symbol->binary, binary, PROC_NAME_LEN) == 0 &&
380 			strncmp(symbol->name, name, SYMBOL_NAME_SIZE) == 0) {
381 			return symbol;
382 		}
383 		ptr = &symbol->hashtab_next;
384 	}
385 
386 	/* unknown symbol, add it */
387 	*ptr = symbol = MALLOC_CHECKED(struct symbol_info, 1);
388 	memset(symbol, 0, sizeof(struct symbol_info));
389 	strncpy(symbol->binary, binary, PROC_NAME_LEN);
390 	strncpy(symbol->name, name, SYMBOL_NAME_SIZE);
391 	symbol->count[0].min = ~0UL;
392 	symbol->count[1].min = ~0UL;
393 	symbol->class = symbol_classify(binary, name);
394 
395 	/* also add to linked list */
396 	symbol->next = symbols;
397 	symbols = symbol;
398 	return symbol;
399 }
400 
401 static unsigned symbol_hash(const char *binary, const char *name) {
402 	return string_hash(binary, PROC_NAME_LEN) +
403 		string_hash(name, SYMBOL_NAME_SIZE);
404 }
405 
406 static int symbol_qsort_compare(const void *p1, const void *p2) {
407 	int r;
408 	const struct symbol_info *s1, *s2;
409 
410 	assert(p1);
411 	assert(p2);
412 	s1 = *(const struct symbol_info **) p1;
413 	s2 = *(const struct symbol_info **) p2;
414 	assert(s1);
415 	assert(s2);
416 
417 	/* totals come first */
418 	if (s1->class < s2->class) return -1;
419 	if (s1->class > s2->class) return 1;
420 
421 	/* sort by difference in average */
422 	if (s1->diff < s2->diff) return -1;
423 	if (s1->diff > s2->diff) return 1;
424 
425 	/* otherwise, by name */
426 	r = strncmp(s1->binary, s2->binary, PROC_NAME_LEN);
427 	if (r) return r;
428 
429 	return strncmp(s1->name, s2->name, SYMBOL_NAME_SIZE);
430 }
431 
432 static void symbol_tally(const char *binary, const char *name,
433 	unsigned long samples, int count_index) {
434 	struct symbol_count *count;
435 	struct symbol_info *symbol;
436 
437 	/* look up or add symbol */
438 	symbol = symbol_find_or_add(binary, name);
439 
440 	/* update count */
441 	count = &symbol->count[count_index];
442 	count->sum += samples;
443 	count->sum2 += (unsigned long long) samples * samples;
444 	if (count->min > samples) count->min = samples;
445 	if (count->max < samples) count->max = samples;
446 }
447 
448 static unsigned symbols_count(void) {
449 	int count = 0;
450 	const struct symbol_info *symbol;
451 
452 	for (symbol = symbols; symbol; symbol = symbol->next) {
453 		count++;
454 	}
455 	return count;
456 }
457 
458 static void usage(const char *argv0) {
459 	printf("usage:\n");
460 	printf("  %s leftfile rightfile\n", argv0);
461 	printf("  %s leftfile... -r rightfile...\n", argv0);
462 	printf("\n");
463 	printf("sprofdiff compares the sprofile information from multiple\n");
464 	printf("output files of sprofalyze -d.\n");
465 	exit(1);
466 }
467