xref: /openbsd/usr.bin/gprof/gprof.c (revision 404b540a)
1 /*	$OpenBSD: gprof.c,v 1.18 2008/06/25 15:09:32 deraadt Exp $	*/
2 /*	$NetBSD: gprof.c,v 1.8 1995/04/19 07:15:59 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1983, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef lint
34 static char copyright[] =
35 "@(#) Copyright (c) 1983, 1993\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif /* not lint */
38 
39 #ifndef lint
40 #if 0
41 static char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
42 #else
43 static char rcsid[] = "$OpenBSD: gprof.c,v 1.18 2008/06/25 15:09:32 deraadt Exp $";
44 #endif
45 #endif /* not lint */
46 
47 #include "gprof.h"
48 
49 int valcmp(const void *, const void *);
50 
51 static struct gmonhdr	gmonhdr;
52 extern char *__progname;
53 
54 int
55 main(int argc, char *argv[])
56 {
57     char	**sp;
58     nltype	**timesortnlp;
59     char	**defaultEs;
60 
61     --argc;
62     argv++;
63     debug = 0;
64     bflag = TRUE;
65     while ( *argv != 0 && **argv == '-' ) {
66 	(*argv)++;
67 	switch ( **argv ) {
68 	case 'a':
69 	    aflag = TRUE;
70 	    break;
71 	case 'b':
72 	    bflag = FALSE;
73 	    break;
74 	case 'C':
75 	    Cflag = TRUE;
76 	    cyclethreshold = atoi( *++argv );
77 	    break;
78 	case 'c':
79 #if defined(__i386__) || defined(__vax__) || defined(__tahoe__) || \
80     defined(__sparc__) || defined(__sparc64__)
81 	    cflag = TRUE;
82 #else
83 	    fprintf(stderr, "%s: -c isn't supported on this architecture yet\n", __progname);
84 	    exit(1);
85 #endif
86 	    break;
87 	case 'd':
88 	    dflag = TRUE;
89 	    setlinebuf(stdout);
90 	    debug |= atoi( *++argv );
91 	    debug |= ANYDEBUG;
92 #	    ifdef DEBUG
93 		printf("[main] debug = %d\n", debug);
94 #	    else /* not DEBUG */
95 		warnx("-d ignored");
96 #	    endif /* DEBUG */
97 	    break;
98 	case 'E':
99 	    ++argv;
100 	    addlist( Elist , *argv );
101 	    Eflag = TRUE;
102 	    addlist( elist , *argv );
103 	    eflag = TRUE;
104 	    break;
105 	case 'e':
106 	    addlist( elist , *++argv );
107 	    eflag = TRUE;
108 	    break;
109 	case 'F':
110 	    ++argv;
111 	    addlist( Flist , *argv );
112 	    Fflag = TRUE;
113 	    addlist( flist , *argv );
114 	    fflag = TRUE;
115 	    break;
116 	case 'f':
117 	    addlist( flist , *++argv );
118 	    fflag = TRUE;
119 	    break;
120 	case 'k':
121 	    addlist( kfromlist , *++argv );
122 	    addlist( ktolist , *++argv );
123 	    kflag = TRUE;
124 	    break;
125 	case 's':
126 	    sflag = TRUE;
127 	    break;
128 	case 'z':
129 	    zflag = TRUE;
130 	    break;
131 	}
132 	argv++;
133     }
134     if ( *argv != 0 ) {
135 	a_outname  = *argv;
136 	argv++;
137     } else {
138 	a_outname  = A_OUTNAME;
139     }
140     if ( *argv != 0 ) {
141 	gmonname = *argv;
142 	argv++;
143     } else {
144 	gmonname = GMONNAME;
145     }
146 	/*
147 	 *	get information about a.out file.
148 	 */
149     if (getnfile(a_outname, &defaultEs) == -1)
150 	errx(1, "%s: bad format", a_outname);
151 	/*
152 	 *	sort symbol table.
153 	 */
154     qsort(nl, nname, sizeof(nltype), valcmp);
155 	/*
156 	 *	turn off default functions
157 	 */
158     for ( sp = &defaultEs[0] ; *sp ; sp++ ) {
159 	Eflag = TRUE;
160 	addlist( Elist , *sp );
161 	eflag = TRUE;
162 	addlist( elist , *sp );
163     }
164 	/*
165 	 *	get information about mon.out file(s).
166 	 */
167     do	{
168 	getpfile( gmonname );
169 	if ( *argv != 0 ) {
170 	    gmonname = *argv;
171 	}
172     } while ( *argv++ != 0 );
173 	/*
174 	 *	how many ticks per second?
175 	 *	if we can't tell, report time in ticks.
176 	 */
177     if (hz == 0) {
178 	hz = 1;
179 	warnx("time is in ticks, not seconds");
180     }
181 	/*
182 	 *	dump out a gmon.sum file if requested
183 	 */
184     if ( sflag ) {
185 	dumpsum( GMONSUM );
186     }
187 	/*
188 	 *	assign samples to procedures
189 	 */
190     asgnsamples();
191 	/*
192 	 *	assemble the dynamic profile
193 	 */
194     timesortnlp = doarcs();
195 	/*
196 	 *	print the dynamic profile
197 	 */
198     printgprof( timesortnlp );
199 	/*
200 	 *	print the flat profile
201 	 */
202     printprof();
203 	/*
204 	 *	print the index
205 	 */
206     printindex();
207 
208     return (0);
209 }
210 
211     /*
212      *	information from a gmon.out file is in two parts:
213      *	an array of sampling hits within pc ranges,
214      *	and the arcs.
215      */
216 void
217 getpfile(const char *filename)
218 {
219     FILE		*pfile;
220     struct rawarc	arc;
221 
222     pfile = openpfile(filename);
223     readsamples(pfile);
224 	/*
225 	 *	the rest of the file consists of
226 	 *	a bunch of <from,self,count> tuples.
227 	 */
228     while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
229 #	ifdef DEBUG
230 	    if ( debug & SAMPLEDEBUG ) {
231 		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
232 			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
233 	    }
234 #	endif /* DEBUG */
235 	    /*
236 	     *	add this arc
237 	     */
238 	tally( &arc );
239     }
240     fclose(pfile);
241 }
242 
243 FILE *
244 openpfile(const char *filename)
245 {
246     struct gmonhdr	tmp;
247     FILE		*pfile;
248     int			size;
249     int			rate;
250 
251     if((pfile = fopen(filename, "r")) == NULL)
252 	err(1, "fopen: %s", filename);
253     if (fread(&tmp, sizeof(struct gmonhdr), 1, pfile) != 1)
254 	errx(1, "%s: bad gmon header", filename);
255     if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
256 	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt))
257 	errx(1, "%s: incompatible with first gmon file", filename);
258     gmonhdr = tmp;
259     if ( gmonhdr.version == GMONVERSION ) {
260 	rate = gmonhdr.profrate;
261 	size = sizeof(struct gmonhdr);
262     } else {
263 	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
264 	size = sizeof(struct ophdr);
265 	gmonhdr.profrate = rate = hertz();
266 	gmonhdr.version = GMONVERSION;
267     }
268     if (hz == 0) {
269 	hz = rate;
270     } else if (hz != rate)
271 	errx(1, "%s: profile clock rate (%d) incompatible with clock rate "
272 	    "(%ld) in first gmon file", filename, rate, hz);
273     s_lowpc = (unsigned long) gmonhdr.lpc;
274     s_highpc = (unsigned long) gmonhdr.hpc;
275     lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT);
276     highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT);
277     sampbytes = gmonhdr.ncnt - size;
278     nsamples = sampbytes / sizeof (UNIT);
279 #   ifdef DEBUG
280 	if ( debug & SAMPLEDEBUG ) {
281 	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
282 		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
283 	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
284 		s_lowpc , s_highpc );
285 	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
286 		lowpc , highpc );
287 	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
288 		sampbytes , nsamples );
289 	    printf( "[openpfile] sample rate %ld\n" , hz );
290 	}
291 #   endif /* DEBUG */
292     return(pfile);
293 }
294 
295 void
296 tally(struct rawarc *rawp)
297 {
298     nltype		*parentp;
299     nltype		*childp;
300 
301     parentp = nllookup( rawp -> raw_frompc );
302     childp = nllookup( rawp -> raw_selfpc );
303     if ( parentp == 0 || childp == 0 )
304 	return;
305     if ( kflag
306 	 && onlist( kfromlist , parentp -> name )
307 	 && onlist( ktolist , childp -> name ) ) {
308 	return;
309     }
310     childp -> ncall += rawp -> raw_count;
311 #   ifdef DEBUG
312 	if ( debug & TALLYDEBUG ) {
313 	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
314 		    parentp -> name , childp -> name , rawp -> raw_count );
315 	}
316 #   endif /* DEBUG */
317     addarc( parentp , childp , rawp -> raw_count );
318 }
319 
320 /*
321  * dump out the gmon.sum file
322  */
323 void
324 dumpsum(const char *sumfile)
325 {
326     nltype *nlp;
327     arctype *arcp;
328     struct rawarc arc;
329     FILE *sfile;
330 
331     if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
332 	err(1, "fopen: %s", sumfile);
333     /*
334      * dump the header; use the last header read in
335      */
336     if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
337 	err(1, "fwrite: %s", sumfile);
338     /*
339      * dump the samples
340      */
341     if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples)
342 	err(1, "fwrite: %s", sumfile);
343     /*
344      * dump the normalized raw arc information
345      */
346     for ( nlp = nl ; nlp < npe ; nlp++ ) {
347 	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
348 	    arc.raw_frompc = arcp -> arc_parentp -> value;
349 	    arc.raw_selfpc = arcp -> arc_childp -> value;
350 	    arc.raw_count = arcp -> arc_count;
351 	    if (fwrite ( &arc , sizeof arc , 1 , sfile ) != 1)
352 	        err(1, "fwrite: %s", sumfile);
353 #	    ifdef DEBUG
354 		if ( debug & SAMPLEDEBUG ) {
355 		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
356 			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
357 		}
358 #	    endif /* DEBUG */
359 	}
360     }
361     fclose( sfile );
362 }
363 
364 int
365 valcmp(const void *vp1, const void *vp2)
366 {
367     const nltype *p1 = vp1;
368     const nltype *p2 = vp2;
369 
370     if ( p1 -> value < p2 -> value ) {
371 	return LESSTHAN;
372     }
373     if ( p1 -> value > p2 -> value ) {
374 	return GREATERTHAN;
375     }
376     return EQUALTO;
377 }
378 
379 void
380 readsamples(FILE *pfile)
381 {
382     UNIT	sample;
383     int i;
384 
385     if (samples == 0) {
386 	samples = (UNIT *) calloc(sampbytes, sizeof (UNIT));
387 	if (samples == 0)
388 	    errx(1, "No room for %ld sample pc's", sampbytes / sizeof (UNIT));
389     }
390     for (i = 0; i < nsamples; i++) {
391 	fread(&sample, sizeof (UNIT), 1, pfile);
392 	if (feof(pfile))
393 		break;
394 	samples[i] += sample;
395     }
396     if (i != nsamples)
397 	errx(1, "unexpected EOF after reading %d/%d samples", i, nsamples );
398 }
399 
400 /*
401  *	Assign samples to the procedures to which they belong.
402  *
403  *	There are three cases as to where pcl and pch can be
404  *	with respect to the routine entry addresses svalue0 and svalue1
405  *	as shown in the following diagram.  overlap computes the
406  *	distance between the arrows, the fraction of the sample
407  *	that is to be credited to the routine which starts at svalue0.
408  *
409  *	    svalue0                                         svalue1
410  *	       |                                               |
411  *	       v                                               v
412  *
413  *	       +-----------------------------------------------+
414  *	       |					       |
415  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
416  *	  |         |		  |         |		  |         |
417  *	  +---------+		  +---------+		  +---------+
418  *
419  *	  ^         ^		  ^         ^		  ^         ^
420  *	  |         |		  |         |		  |         |
421  *	 pcl       pch		 pcl       pch		 pcl       pch
422  *
423  *	For the vax we assert that samples will never fall in the first
424  *	two bytes of any routine, since that is the entry mask,
425  *	thus we give call alignentries() to adjust the entry points if
426  *	the entry mask falls in one bucket but the code for the routine
427  *	doesn't start until the next bucket.  In conjunction with the
428  *	alignment of routine addresses, this should allow us to have
429  *	only one sample for every four bytes of text space and never
430  *	have any overlap (the two end cases, above).
431  */
432 void
433 asgnsamples(void)
434 {
435     int	j;
436     UNIT		ccnt;
437     double		time;
438     unsigned long	pcl, pch;
439     unsigned long	i;
440     unsigned long	overlap;
441     unsigned long	svalue0, svalue1;
442 
443     /* read samples and assign to namelist symbols */
444     scale = highpc - lowpc;
445     scale /= nsamples;
446     alignentries();
447     for (i = 0, j = 1; i < nsamples; i++) {
448 	ccnt = samples[i];
449 	if (ccnt == 0)
450 		continue;
451 	pcl = lowpc + (unsigned long)(scale * i);
452 	pch = lowpc + (unsigned long)(scale * (i + 1));
453 	time = ccnt;
454 #	ifdef DEBUG
455 	    if ( debug & SAMPLEDEBUG ) {
456 		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" ,
457 			pcl , pch , ccnt );
458 	    }
459 #	endif /* DEBUG */
460 	totime += time;
461 	for (j = j - 1; j < nname; j++) {
462 	    svalue0 = nl[j].svalue;
463 	    svalue1 = nl[j+1].svalue;
464 		/*
465 		 *	if high end of tick is below entry address,
466 		 *	go for next tick.
467 		 */
468 	    if (pch < svalue0)
469 		    break;
470 		/*
471 		 *	if low end of tick into next routine,
472 		 *	go for next routine.
473 		 */
474 	    if (pcl >= svalue1)
475 		    continue;
476 	    overlap = min(pch, svalue1) - max(pcl, svalue0);
477 	    if (overlap > 0) {
478 #		ifdef DEBUG
479 		    if (debug & SAMPLEDEBUG) {
480 			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %ld overlap\n",
481 				nl[j].value/sizeof(UNIT), svalue0, svalue1,
482 				nl[j].name,
483 				overlap * time / scale, overlap);
484 		    }
485 #		endif /* DEBUG */
486 		nl[j].time += overlap * time / scale;
487 	    }
488 	}
489     }
490 #   ifdef DEBUG
491 	if (debug & SAMPLEDEBUG) {
492 	    printf("[asgnsamples] totime %f\n", totime);
493 	}
494 #   endif /* DEBUG */
495 }
496 
497 
498 unsigned long
499 min(unsigned long a, unsigned long b)
500 {
501     if (a<b)
502 	return(a);
503     return(b);
504 }
505 
506 unsigned long
507 max(unsigned long a, unsigned long b)
508 {
509     if (a>b)
510 	return(a);
511     return(b);
512 }
513 
514     /*
515      *	calculate scaled entry point addresses (to save time in asgnsamples),
516      *	and possibly push the scaled entry points over the entry mask,
517      *	if it turns out that the entry point is in one bucket and the code
518      *	for a routine is in the next bucket.
519      */
520 void
521 alignentries(void)
522 {
523     struct nl		*nlp;
524     unsigned long	bucket_of_entry;
525     unsigned long	bucket_of_code;
526 
527     for (nlp = nl; nlp < npe; nlp++) {
528 	nlp -> svalue = nlp -> value / sizeof(UNIT);
529 	bucket_of_entry = (nlp->svalue - lowpc) / scale;
530 	bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
531 	if (bucket_of_entry < bucket_of_code) {
532 #	    ifdef DEBUG
533 		if (debug & SAMPLEDEBUG) {
534 		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
535 			    nlp->svalue, nlp->svalue + UNITS_TO_CODE);
536 		}
537 #	    endif /* DEBUG */
538 	    nlp->svalue += UNITS_TO_CODE;
539 	}
540     }
541 }
542