xref: /original-bsd/usr.bin/gprof/gprof.c (revision 7eb91141)
1 /*
2  * Copyright (c) 1983 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 char copyright[] =
10 "@(#) Copyright (c) 1983 Regents of the University of California.\n\
11  All rights reserved.\n";
12 #endif /* not lint */
13 
14 #ifndef lint
15 static char sccsid[] = "@(#)gprof.c	5.10 (Berkeley) 03/02/92";
16 #endif /* not lint */
17 
18 #include "gprof.h"
19 
20 char	*whoami = "gprof";
21 
22     /*
23      *	things which get -E excluded by default.
24      */
25 char	*defaultEs[] = { "mcount" , "__mcleanup" , 0 };
26 
27 static struct phdr	h;
28 
29 main(argc, argv)
30     int argc;
31     char **argv;
32 {
33     char	**sp;
34     nltype	**timesortnlp;
35 
36     --argc;
37     argv++;
38     debug = 0;
39     bflag = TRUE;
40     while ( *argv != 0 && **argv == '-' ) {
41 	(*argv)++;
42 	switch ( **argv ) {
43 	case 'a':
44 	    aflag = TRUE;
45 	    break;
46 	case 'b':
47 	    bflag = FALSE;
48 	    break;
49 	case 'C':
50 	    Cflag = TRUE;
51 	    cyclethreshold = atoi( *++argv );
52 	    break;
53 	case 'c':
54 #if defined(vax) || defined(tahoe)
55 	    cflag = TRUE;
56 #else
57 	    fprintf(stderr, "gprof: -c isn't supported on this architecture yet\n");
58 	    exit(1);
59 #endif
60 	    break;
61 	case 'd':
62 	    dflag = TRUE;
63 	    debug |= atoi( *++argv );
64 	    debug |= ANYDEBUG;
65 #	    ifdef DEBUG
66 		printf("[main] debug = %d\n", debug);
67 #	    else not DEBUG
68 		printf("%s: -d ignored\n", whoami);
69 #	    endif DEBUG
70 	    break;
71 	case 'E':
72 	    ++argv;
73 	    addlist( Elist , *argv );
74 	    Eflag = TRUE;
75 	    addlist( elist , *argv );
76 	    eflag = TRUE;
77 	    break;
78 	case 'e':
79 	    addlist( elist , *++argv );
80 	    eflag = TRUE;
81 	    break;
82 	case 'F':
83 	    ++argv;
84 	    addlist( Flist , *argv );
85 	    Fflag = TRUE;
86 	    addlist( flist , *argv );
87 	    fflag = TRUE;
88 	    break;
89 	case 'f':
90 	    addlist( flist , *++argv );
91 	    fflag = TRUE;
92 	    break;
93 	case 'k':
94 	    addlist( kfromlist , *++argv );
95 	    addlist( ktolist , *++argv );
96 	    kflag = TRUE;
97 	    break;
98 	case 's':
99 	    sflag = TRUE;
100 	    break;
101 	case 'z':
102 	    zflag = TRUE;
103 	    break;
104 	}
105 	argv++;
106     }
107     if ( *argv != 0 ) {
108 	a_outname  = *argv;
109 	argv++;
110     } else {
111 	a_outname  = A_OUTNAME;
112     }
113     if ( *argv != 0 ) {
114 	gmonname = *argv;
115 	argv++;
116     } else {
117 	gmonname = GMONNAME;
118     }
119 	/*
120 	 *	turn off default functions
121 	 */
122     for ( sp = &defaultEs[0] ; *sp ; sp++ ) {
123 	Eflag = TRUE;
124 	addlist( Elist , *sp );
125 	eflag = TRUE;
126 	addlist( elist , *sp );
127     }
128 	/*
129 	 *	get information about a.out file.
130 	 */
131     getnfile();
132 	/*
133 	 *	get information about mon.out file(s).
134 	 */
135     do	{
136 	getpfile( gmonname );
137 	if ( *argv != 0 ) {
138 	    gmonname = *argv;
139 	}
140     } while ( *argv++ != 0 );
141 	/*
142 	 *	how many ticks per second?
143 	 *	if we can't tell, report time in ticks.
144 	 */
145     if (hz == 0) {
146 	hz = 1;
147 	fprintf(stderr, "time is in ticks, not seconds\n");
148     }
149 	/*
150 	 *	dump out a gmon.sum file if requested
151 	 */
152     if ( sflag ) {
153 	dumpsum( GMONSUM );
154     }
155 	/*
156 	 *	assign samples to procedures
157 	 */
158     asgnsamples();
159 	/*
160 	 *	assemble the dynamic profile
161 	 */
162     timesortnlp = doarcs();
163 	/*
164 	 *	print the dynamic profile
165 	 */
166     printgprof( timesortnlp );
167 	/*
168 	 *	print the flat profile
169 	 */
170     printprof();
171 	/*
172 	 *	print the index
173 	 */
174     printindex();
175     done();
176 }
177 
178     /*
179      * Set up string and symbol tables from a.out.
180      *	and optionally the text space.
181      * On return symbol table is sorted by value.
182      */
183 getnfile()
184 {
185     FILE	*nfile;
186     int		valcmp();
187 
188     nfile = fopen( a_outname ,"r");
189     if (nfile == NULL) {
190 	perror( a_outname );
191 	done();
192     }
193     fread(&xbuf, 1, sizeof(xbuf), nfile);
194     if (N_BADMAG(xbuf)) {
195 	fprintf(stderr, "%s: %s: bad format\n", whoami , a_outname );
196 	done();
197     }
198     getstrtab(nfile);
199     getsymtab(nfile);
200     gettextspace( nfile );
201     qsort(nl, nname, sizeof(nltype), valcmp);
202     fclose(nfile);
203 #   ifdef DEBUG
204 	if ( debug & AOUTDEBUG ) {
205 	    register int j;
206 
207 	    for (j = 0; j < nname; j++){
208 		printf("[getnfile] 0X%08x\t%s\n", nl[j].value, nl[j].name);
209 	    }
210 	}
211 #   endif DEBUG
212 }
213 
214 getstrtab(nfile)
215     FILE	*nfile;
216 {
217 
218     fseek(nfile, (long)(N_SYMOFF(xbuf) + xbuf.a_syms), 0);
219     if (fread(&ssiz, sizeof (ssiz), 1, nfile) == 0) {
220 	fprintf(stderr, "%s: %s: no string table (old format?)\n" ,
221 		whoami , a_outname );
222 	done();
223     }
224     strtab = (char *)calloc(ssiz, 1);
225     if (strtab == NULL) {
226 	fprintf(stderr, "%s: %s: no room for %d bytes of string table",
227 		whoami , a_outname , ssiz);
228 	done();
229     }
230     if (fread(strtab+sizeof(ssiz), ssiz-sizeof(ssiz), 1, nfile) != 1) {
231 	fprintf(stderr, "%s: %s: error reading string table\n",
232 		whoami , a_outname );
233 	done();
234     }
235 }
236 
237     /*
238      * Read in symbol table
239      */
240 getsymtab(nfile)
241     FILE	*nfile;
242 {
243     register long	i;
244     int			askfor;
245     struct nlist	nbuf;
246 
247     /* pass1 - count symbols */
248     fseek(nfile, (long)N_SYMOFF(xbuf), 0);
249     nname = 0;
250     for (i = xbuf.a_syms; i > 0; i -= sizeof(struct nlist)) {
251 	fread(&nbuf, sizeof(nbuf), 1, nfile);
252 	if ( ! funcsymbol( &nbuf ) ) {
253 	    continue;
254 	}
255 	nname++;
256     }
257     if (nname == 0) {
258 	fprintf(stderr, "%s: %s: no symbols\n", whoami , a_outname );
259 	done();
260     }
261     askfor = nname + 1;
262     nl = (nltype *) calloc( askfor , sizeof(nltype) );
263     if (nl == 0) {
264 	fprintf(stderr, "%s: No room for %d bytes of symbol table\n",
265 		whoami, askfor * sizeof(nltype) );
266 	done();
267     }
268 
269     /* pass2 - read symbols */
270     fseek(nfile, (long)N_SYMOFF(xbuf), 0);
271     npe = nl;
272     nname = 0;
273     for (i = xbuf.a_syms; i > 0; i -= sizeof(struct nlist)) {
274 	fread(&nbuf, sizeof(nbuf), 1, nfile);
275 	if ( ! funcsymbol( &nbuf ) ) {
276 #	    ifdef DEBUG
277 		if ( debug & AOUTDEBUG ) {
278 		    printf( "[getsymtab] rejecting: 0x%x %s\n" ,
279 			    nbuf.n_type , strtab + nbuf.n_un.n_strx );
280 		}
281 #	    endif DEBUG
282 	    continue;
283 	}
284 	npe->value = nbuf.n_value;
285 	npe->name = strtab+nbuf.n_un.n_strx;
286 #	ifdef DEBUG
287 	    if ( debug & AOUTDEBUG ) {
288 		printf( "[getsymtab] %d %s 0x%08x\n" ,
289 			nname , npe -> name , npe -> value );
290 	    }
291 #	endif DEBUG
292 	npe++;
293 	nname++;
294     }
295     npe->value = -1;
296 }
297 
298     /*
299      *	read in the text space of an a.out file
300      */
301 gettextspace( nfile )
302     FILE	*nfile;
303 {
304     char	*malloc();
305 
306     if ( cflag == 0 ) {
307 	return;
308     }
309     textspace = (u_char *) malloc( xbuf.a_text );
310     if ( textspace == 0 ) {
311 	fprintf( stderr , "%s: ran out room for %d bytes of text space:  " ,
312 			whoami , xbuf.a_text );
313 	fprintf( stderr , "can't do -c\n" );
314 	return;
315     }
316     (void) fseek( nfile , N_TXTOFF( xbuf ) , 0 );
317     if ( fread( textspace , 1 , xbuf.a_text , nfile ) != xbuf.a_text ) {
318 	fprintf( stderr , "%s: couldn't read text space:  " , whoami );
319 	fprintf( stderr , "can't do -c\n" );
320 	free( textspace );
321 	textspace = 0;
322 	return;
323     }
324 }
325     /*
326      *	information from a gmon.out file is in two parts:
327      *	an array of sampling hits within pc ranges,
328      *	and the arcs.
329      */
330 getpfile(filename)
331     char *filename;
332 {
333     FILE		*pfile;
334     FILE		*openpfile();
335     struct rawarc	arc;
336 
337     pfile = openpfile(filename);
338     readsamples(pfile);
339 	/*
340 	 *	the rest of the file consists of
341 	 *	a bunch of <from,self,count> tuples.
342 	 */
343     while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
344 #	ifdef DEBUG
345 	    if ( debug & SAMPLEDEBUG ) {
346 		printf( "[getpfile] frompc 0x%x selfpc 0x%x count %d\n" ,
347 			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
348 	    }
349 #	endif DEBUG
350 	    /*
351 	     *	add this arc
352 	     */
353 	tally( &arc );
354     }
355     fclose(pfile);
356 }
357 
358 FILE *
359 openpfile(filename)
360     char *filename;
361 {
362     struct phdr		tmp;
363     FILE		*pfile;
364     int			size;
365     int			rate;
366 
367     if((pfile = fopen(filename, "r")) == NULL) {
368 	perror(filename);
369 	done();
370     }
371     fread(&tmp, sizeof(struct phdr), 1, pfile);
372     if ( s_highpc != 0 && ( tmp.lpc != h.lpc ||
373 	 tmp.hpc != h.hpc || tmp.ncnt != h.ncnt ) ) {
374 	fprintf(stderr, "%s: incompatible with first gmon file\n", filename);
375 	done();
376     }
377     h = tmp;
378     if ( h.version == GMONVERSION ) {
379 	size = sizeof(struct phdr);
380 	rate = h.profrate;
381 
382     } else {
383 	size = sizeof(struct ophdr);
384 	fseek(pfile, size, SEEK_SET);
385 	h.profrate = rate = hertz();
386 	h.version = GMONVERSION;
387     }
388     if (hz == 0) {
389 	hz = rate;
390     } else if (hz != rate) {
391 	fprintf(stderr,
392 	    "%s: profile clock rate (%d) %s (%d) in first gmon file\n",
393 	    filename, rate, "incompatible with clock rate", hz);
394 	done();
395     }
396     s_lowpc = (unsigned long) h.lpc;
397     s_highpc = (unsigned long) h.hpc;
398     lowpc = (unsigned long)h.lpc / sizeof(UNIT);
399     highpc = (unsigned long)h.hpc / sizeof(UNIT);
400     sampbytes = h.ncnt - size;
401     nsamples = sampbytes / sizeof (UNIT);
402 #   ifdef DEBUG
403 	if ( debug & SAMPLEDEBUG ) {
404 	    printf( "[openpfile] hdr.lpc 0x%x hdr.hpc 0x%x hdr.ncnt %d\n",
405 		h.lpc , h.hpc , h.ncnt );
406 	    printf( "[openpfile]   s_lowpc 0x%x   s_highpc 0x%x\n" ,
407 		s_lowpc , s_highpc );
408 	    printf( "[openpfile]     lowpc 0x%x     highpc 0x%x\n" ,
409 		lowpc , highpc );
410 	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
411 		sampbytes , nsamples );
412 	    printf( "[openpfile] sample rate %d\n" , hz );
413 	}
414 #   endif DEBUG
415     return(pfile);
416 }
417 
418 tally( rawp )
419     struct rawarc	*rawp;
420 {
421     nltype		*parentp;
422     nltype		*childp;
423 
424     parentp = nllookup( rawp -> raw_frompc );
425     childp = nllookup( rawp -> raw_selfpc );
426     if ( parentp == 0 || childp == 0 )
427 	return;
428     if ( kflag
429 	 && onlist( kfromlist , parentp -> name )
430 	 && onlist( ktolist , childp -> name ) ) {
431 	return;
432     }
433     childp -> ncall += rawp -> raw_count;
434 #   ifdef DEBUG
435 	if ( debug & TALLYDEBUG ) {
436 	    printf( "[tally] arc from %s to %s traversed %d times\n" ,
437 		    parentp -> name , childp -> name , rawp -> raw_count );
438 	}
439 #   endif DEBUG
440     addarc( parentp , childp , rawp -> raw_count );
441 }
442 
443 /*
444  * dump out the gmon.sum file
445  */
446 dumpsum( sumfile )
447     char *sumfile;
448 {
449     register nltype *nlp;
450     register arctype *arcp;
451     struct rawarc arc;
452     FILE *sfile;
453 
454     if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) {
455 	perror( sumfile );
456 	done();
457     }
458     /*
459      * dump the header; use the last header read in
460      */
461     if ( fwrite( &h , sizeof h , 1 , sfile ) != 1 ) {
462 	perror( sumfile );
463 	done();
464     }
465     /*
466      * dump the samples
467      */
468     if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) {
469 	perror( sumfile );
470 	done();
471     }
472     /*
473      * dump the normalized raw arc information
474      */
475     for ( nlp = nl ; nlp < npe ; nlp++ ) {
476 	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
477 	    arc.raw_frompc = arcp -> arc_parentp -> value;
478 	    arc.raw_selfpc = arcp -> arc_childp -> value;
479 	    arc.raw_count = arcp -> arc_count;
480 	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) {
481 		perror( sumfile );
482 		done();
483 	    }
484 #	    ifdef DEBUG
485 		if ( debug & SAMPLEDEBUG ) {
486 		    printf( "[dumpsum] frompc 0x%x selfpc 0x%x count %d\n" ,
487 			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
488 		}
489 #	    endif DEBUG
490 	}
491     }
492     fclose( sfile );
493 }
494 
495 valcmp(p1, p2)
496     nltype *p1, *p2;
497 {
498     if ( p1 -> value < p2 -> value ) {
499 	return LESSTHAN;
500     }
501     if ( p1 -> value > p2 -> value ) {
502 	return GREATERTHAN;
503     }
504     return EQUALTO;
505 }
506 
507 readsamples(pfile)
508     FILE	*pfile;
509 {
510     register i;
511     UNIT	sample;
512 
513     if (samples == 0) {
514 	samples = (UNIT *) calloc(sampbytes, sizeof (UNIT));
515 	if (samples == 0) {
516 	    fprintf( stderr , "%s: No room for %d sample pc's\n",
517 		whoami , sampbytes / sizeof (UNIT));
518 	    done();
519 	}
520     }
521     for (i = 0; i < nsamples; i++) {
522 	fread(&sample, sizeof (UNIT), 1, pfile);
523 	if (feof(pfile))
524 		break;
525 	samples[i] += sample;
526     }
527     if (i != nsamples) {
528 	fprintf(stderr,
529 	    "%s: unexpected EOF after reading %d/%d samples\n",
530 		whoami , --i , nsamples );
531 	done();
532     }
533 }
534 
535 /*
536  *	Assign samples to the procedures to which they belong.
537  *
538  *	There are three cases as to where pcl and pch can be
539  *	with respect to the routine entry addresses svalue0 and svalue1
540  *	as shown in the following diagram.  overlap computes the
541  *	distance between the arrows, the fraction of the sample
542  *	that is to be credited to the routine which starts at svalue0.
543  *
544  *	    svalue0                                         svalue1
545  *	       |                                               |
546  *	       v                                               v
547  *
548  *	       +-----------------------------------------------+
549  *	       |					       |
550  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
551  *	  |         |		  |         |		  |         |
552  *	  +---------+		  +---------+		  +---------+
553  *
554  *	  ^         ^		  ^         ^		  ^         ^
555  *	  |         |		  |         |		  |         |
556  *	 pcl       pch		 pcl       pch		 pcl       pch
557  *
558  *	For the vax we assert that samples will never fall in the first
559  *	two bytes of any routine, since that is the entry mask,
560  *	thus we give call alignentries() to adjust the entry points if
561  *	the entry mask falls in one bucket but the code for the routine
562  *	doesn't start until the next bucket.  In conjunction with the
563  *	alignment of routine addresses, this should allow us to have
564  *	only one sample for every four bytes of text space and never
565  *	have any overlap (the two end cases, above).
566  */
567 asgnsamples()
568 {
569     register int	j;
570     UNIT		ccnt;
571     double		time;
572     unsigned long	pcl, pch;
573     register int	i;
574     unsigned long	overlap;
575     unsigned long	svalue0, svalue1;
576 
577     /* read samples and assign to namelist symbols */
578     scale = highpc - lowpc;
579     scale /= nsamples;
580     alignentries();
581     for (i = 0, j = 1; i < nsamples; i++) {
582 	ccnt = samples[i];
583 	if (ccnt == 0)
584 		continue;
585 	pcl = lowpc + scale * i;
586 	pch = lowpc + scale * (i + 1);
587 	time = ccnt;
588 #	ifdef DEBUG
589 	    if ( debug & SAMPLEDEBUG ) {
590 		printf( "[asgnsamples] pcl 0x%x pch 0x%x ccnt %d\n" ,
591 			pcl , pch , ccnt );
592 	    }
593 #	endif DEBUG
594 	totime += time;
595 	for (j = j - 1; j < nname; j++) {
596 	    svalue0 = nl[j].svalue;
597 	    svalue1 = nl[j+1].svalue;
598 		/*
599 		 *	if high end of tick is below entry address,
600 		 *	go for next tick.
601 		 */
602 	    if (pch < svalue0)
603 		    break;
604 		/*
605 		 *	if low end of tick into next routine,
606 		 *	go for next routine.
607 		 */
608 	    if (pcl >= svalue1)
609 		    continue;
610 	    overlap = min(pch, svalue1) - max(pcl, svalue0);
611 	    if (overlap > 0) {
612 #		ifdef DEBUG
613 		    if (debug & SAMPLEDEBUG) {
614 			printf("[asgnsamples] (0x%x->0x%x-0x%x) %s gets %f ticks %d overlap\n",
615 				nl[j].value/sizeof(UNIT), svalue0, svalue1,
616 				nl[j].name,
617 				overlap * time / scale, overlap);
618 		    }
619 #		endif DEBUG
620 		nl[j].time += overlap * time / scale;
621 	    }
622 	}
623     }
624 #   ifdef DEBUG
625 	if (debug & SAMPLEDEBUG) {
626 	    printf("[asgnsamples] totime %f\n", totime);
627 	}
628 #   endif DEBUG
629 }
630 
631 
632 unsigned long
633 min(a, b)
634     unsigned long a,b;
635 {
636     if (a<b)
637 	return(a);
638     return(b);
639 }
640 
641 unsigned long
642 max(a, b)
643     unsigned long a,b;
644 {
645     if (a>b)
646 	return(a);
647     return(b);
648 }
649 
650     /*
651      *	calculate scaled entry point addresses (to save time in asgnsamples),
652      *	and possibly push the scaled entry points over the entry mask,
653      *	if it turns out that the entry point is in one bucket and the code
654      *	for a routine is in the next bucket.
655      */
656 alignentries()
657 {
658     register struct nl	*nlp;
659     unsigned long	bucket_of_entry;
660     unsigned long	bucket_of_code;
661 
662     for (nlp = nl; nlp < npe; nlp++) {
663 	nlp -> svalue = nlp -> value / sizeof(UNIT);
664 	bucket_of_entry = (nlp->svalue - lowpc) / scale;
665 	bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
666 	if (bucket_of_entry < bucket_of_code) {
667 #	    ifdef DEBUG
668 		if (debug & SAMPLEDEBUG) {
669 		    printf("[alignentries] pushing svalue 0x%x to 0x%x\n",
670 			    nlp->svalue, nlp->svalue + UNITS_TO_CODE);
671 		}
672 #	    endif DEBUG
673 	    nlp->svalue += UNITS_TO_CODE;
674 	}
675     }
676 }
677 
678 bool
679 funcsymbol( nlistp )
680     struct nlist	*nlistp;
681 {
682     extern char	*strtab;	/* string table from a.out */
683     extern int	aflag;		/* if static functions aren't desired */
684     char	*name;
685 
686 	/*
687 	 *	must be a text symbol,
688 	 *	and static text symbols don't qualify if aflag set.
689 	 */
690     if ( ! (  ( nlistp -> n_type == ( N_TEXT | N_EXT ) )
691 	   || ( ( nlistp -> n_type == N_TEXT ) && ( aflag == 0 ) ) ) ) {
692 	return FALSE;
693     }
694 	/*
695 	 *	can't have any `funny' characters in name,
696 	 *	where `funny' includes	`.', .o file names
697 	 *			and	`$', pascal labels.
698 	 */
699     for ( name = strtab + nlistp -> n_un.n_strx ; *name ; name += 1 ) {
700 	if ( *name == '.' || *name == '$' ) {
701 	    return FALSE;
702 	}
703     }
704     return TRUE;
705 }
706 
707 done()
708 {
709 
710     exit(0);
711 }
712