xref: /openbsd/usr.bin/gprof/gprof.c (revision 5d07573c)
1 /*	$OpenBSD: gprof.c,v 1.27 2021/01/27 07:18:41 deraadt Exp $	*/
2 /*	$NetBSD: gprof.c,v 1.8 1995/04/19 07:15:59 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1983, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include "gprof.h"
34 
35 int valcmp(const void *, const void *);
36 
37 static struct gmonhdr	gmonhdr;
38 extern char *__progname;
39 
40 long    hz;
41 char    *a_outname;
42 char    *gmonname;
43 nltype  *nl;                    /* the whole namelist */
44 nltype  *npe;                   /* the virtual end of the namelist */
45 int     nname;                  /* the number of function names */
46 arctype *archead;               /* the head of arcs in current cycle list */
47 cltype  *cyclehead;             /* the head of the list */
48 int     cyclecnt;               /* the number of cycles found */
49 nltype  *cyclenl;               /* cycle header namelist */
50 int     ncycle;                 /* number of cycles discovered */
51 int     debug;
52 UNIT    *samples;
53 unsigned long   s_lowpc;        /* lowpc from the profile file */
54 unsigned long   s_highpc;       /* highpc from the profile file */
55 unsigned long   lowpc, highpc;  /* range profiled, in UNIT's */
56 unsigned sampbytes;             /* number of bytes of samples */
57 int     nsamples;               /* number of samples */
58 double  actime;                 /* accumulated time thus far for putprofline */
59 double  totime;                 /* total time for all routines */
60 double  printtime;              /* total of time being printed */
61 double  scale;                  /* scale factor converting samples to pc */
62 unsigned char   *textspace;     /* text space of a.out in core */
63 int     cyclethreshold;         /* with -C, minimum cycle size to ignore */
64 bool    aflag;                          /* suppress static functions */
65 bool    bflag;                          /* blurbs, too */
66 bool    cflag;                          /* discovered call graph, too */
67 bool    Cflag;                          /* find cut-set to eliminate cycles */
68 bool    dflag;                          /* debugging options */
69 bool    eflag;                          /* specific functions excluded */
70 bool    Eflag;                          /* functions excluded with time */
71 bool    fflag;                          /* specific functions requested */
72 bool    Fflag;                          /* functions requested with time */
73 bool    kflag;                          /* arcs to be deleted */
74 bool    sflag;                          /* sum multiple gmon.out files */
75 bool    zflag;                          /* zero time/called functions, too */
76 
77 int
main(int argc,char * argv[])78 main(int argc, char *argv[])
79 {
80     char	**sp;
81     nltype	**timesortnlp;
82     char	**defaultEs;
83 
84     if (pledge("stdio rpath wpath cpath", NULL) == -1)
85         err(1, NULL);
86 
87     --argc;
88     argv++;
89     debug = 0;
90     bflag = TRUE;
91     while ( *argv != 0 && **argv == '-' ) {
92 	(*argv)++;
93 	switch ( **argv ) {
94 	case 'a':
95 	    aflag = TRUE;
96 	    break;
97 	case 'b':
98 	    bflag = FALSE;
99 	    break;
100 	case 'C':
101 	    Cflag = TRUE;
102 	    cyclethreshold = atoi( *++argv );
103 	    break;
104 	case 'c':
105 #if defined(__i386__) || defined(__mips64__)
106 	    cflag = TRUE;
107 #else
108 	    fprintf(stderr, "%s: -c isn't supported on this architecture yet\n", __progname);
109 	    exit(1);
110 #endif
111 	    break;
112 	case 'd':
113 	    dflag = TRUE;
114 	    setvbuf(stdout, NULL, _IOLBF, 0);
115 	    debug |= atoi( *++argv );
116 	    debug |= ANYDEBUG;
117 #	    ifdef DEBUG
118 		printf("[main] debug = %d\n", debug);
119 #	    else /* not DEBUG */
120 		warnx("-d ignored");
121 #	    endif /* DEBUG */
122 	    break;
123 	case 'E':
124 	    ++argv;
125 	    addlist( Elist , *argv );
126 	    Eflag = TRUE;
127 	    addlist( elist , *argv );
128 	    eflag = TRUE;
129 	    break;
130 	case 'e':
131 	    addlist( elist , *++argv );
132 	    eflag = TRUE;
133 	    break;
134 	case 'F':
135 	    ++argv;
136 	    addlist( Flist , *argv );
137 	    Fflag = TRUE;
138 	    addlist( flist , *argv );
139 	    fflag = TRUE;
140 	    break;
141 	case 'f':
142 	    addlist( flist , *++argv );
143 	    fflag = TRUE;
144 	    break;
145 	case 'k':
146 	    addlist( kfromlist , *++argv );
147 	    addlist( ktolist , *++argv );
148 	    kflag = TRUE;
149 	    break;
150 	case 's':
151 	    sflag = TRUE;
152 	    break;
153 	case 'z':
154 	    zflag = TRUE;
155 	    break;
156 	}
157 	argv++;
158     }
159     if ( *argv != 0 ) {
160 	a_outname  = *argv;
161 	argv++;
162     } else {
163 	a_outname  = A_OUTNAME;
164     }
165     if ( *argv != 0 ) {
166 	gmonname = *argv;
167 	argv++;
168     } else {
169 	gmonname = GMONNAME;
170     }
171     if ( sflag == FALSE ) {
172         if (pledge("stdio rpath", NULL) == -1)
173             err(1, "pledge");
174     }
175 	/*
176 	 *	get information about a.out file.
177 	 */
178     if (getnfile(a_outname, &defaultEs) == -1)
179 	errx(1, "%s: bad format", a_outname);
180 	/*
181 	 *	sort symbol table.
182 	 */
183     qsort(nl, nname, sizeof(nltype), valcmp);
184 	/*
185 	 *	turn off default functions
186 	 */
187     for ( sp = &defaultEs[0] ; *sp ; sp++ ) {
188 	Eflag = TRUE;
189 	addlist( Elist , *sp );
190 	eflag = TRUE;
191 	addlist( elist , *sp );
192     }
193 	/*
194 	 *	get information about mon.out file(s).
195 	 */
196     do	{
197 	getpfile( gmonname );
198 	if ( *argv != 0 ) {
199 	    gmonname = *argv;
200 	}
201     } while ( *argv++ != 0 );
202 	/*
203 	 *	how many ticks per second?
204 	 *	if we can't tell, report time in ticks.
205 	 */
206     if (hz == 0) {
207 	hz = 1;
208 	warnx("time is in ticks, not seconds");
209     }
210 	/*
211 	 *	dump out a gmon.sum file if requested
212 	 */
213     if ( sflag ) {
214 	dumpsum( GMONSUM );
215     }
216 	/*
217 	 *	assign samples to procedures
218 	 */
219     asgnsamples();
220 	/*
221 	 *	assemble the dynamic profile
222 	 */
223     timesortnlp = doarcs();
224 	/*
225 	 *	print the dynamic profile
226 	 */
227     printgprof( timesortnlp );
228 	/*
229 	 *	print the flat profile
230 	 */
231     printprof();
232 	/*
233 	 *	print the index
234 	 */
235     printindex();
236 
237     return (0);
238 }
239 
240     /*
241      *	information from a gmon.out file is in two parts:
242      *	an array of sampling hits within pc ranges,
243      *	and the arcs.
244      */
245 void
getpfile(const char * filename)246 getpfile(const char *filename)
247 {
248     FILE		*pfile;
249     struct rawarc	arc;
250 
251     pfile = openpfile(filename);
252     readsamples(pfile);
253 	/*
254 	 *	the rest of the file consists of
255 	 *	a bunch of <from,self,count> tuples.
256 	 */
257     while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
258 #	ifdef DEBUG
259 	    if ( debug & SAMPLEDEBUG ) {
260 		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
261 			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
262 	    }
263 #	endif /* DEBUG */
264 	    /*
265 	     *	add this arc
266 	     */
267 	tally( &arc );
268     }
269     fclose(pfile);
270 }
271 
272 FILE *
openpfile(const char * filename)273 openpfile(const char *filename)
274 {
275     struct gmonhdr	tmp;
276     FILE		*pfile;
277     int			size;
278     int			rate;
279 
280     if((pfile = fopen(filename, "r")) == NULL)
281 	err(1, "fopen: %s", filename);
282     if (fread(&tmp, sizeof(struct gmonhdr), 1, pfile) != 1)
283 	errx(1, "%s: bad gmon header", filename);
284     if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
285 	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt))
286 	errx(1, "%s: incompatible with first gmon file", filename);
287     gmonhdr = tmp;
288     if ( gmonhdr.version == GMONVERSION ) {
289 	rate = gmonhdr.profrate;
290 	size = sizeof(struct gmonhdr);
291     } else {
292 	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
293 	size = sizeof(struct ophdr);
294 	gmonhdr.profrate = rate = hertz();
295 	gmonhdr.version = GMONVERSION;
296     }
297     if (hz == 0) {
298 	hz = rate;
299     } else if (hz != rate)
300 	errx(1, "%s: profile clock rate (%d) incompatible with clock rate "
301 	    "(%ld) in first gmon file", filename, rate, hz);
302     s_lowpc = (unsigned long) gmonhdr.lpc;
303     s_highpc = (unsigned long) gmonhdr.hpc;
304     lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT);
305     highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT);
306     sampbytes = gmonhdr.ncnt - size;
307     nsamples = sampbytes / sizeof (UNIT);
308 #   ifdef DEBUG
309 	if ( debug & SAMPLEDEBUG ) {
310 	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
311 		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
312 	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
313 		s_lowpc , s_highpc );
314 	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
315 		lowpc , highpc );
316 	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
317 		sampbytes , nsamples );
318 	    printf( "[openpfile] sample rate %ld\n" , hz );
319 	}
320 #   endif /* DEBUG */
321     return(pfile);
322 }
323 
324 void
tally(struct rawarc * rawp)325 tally(struct rawarc *rawp)
326 {
327     nltype		*parentp;
328     nltype		*childp;
329 
330     parentp = nllookup( rawp -> raw_frompc );
331     childp = nllookup( rawp -> raw_selfpc );
332     if ( parentp == 0 || childp == 0 )
333 	return;
334     if ( kflag
335 	 && onlist( kfromlist , parentp -> name )
336 	 && onlist( ktolist , childp -> name ) ) {
337 	return;
338     }
339     childp -> ncall += rawp -> raw_count;
340 #   ifdef DEBUG
341 	if ( debug & TALLYDEBUG ) {
342 	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
343 		    parentp -> name , childp -> name , rawp -> raw_count );
344 	}
345 #   endif /* DEBUG */
346     addarc( parentp , childp , rawp -> raw_count );
347 }
348 
349 /*
350  * dump out the gmon.sum file
351  */
352 void
dumpsum(const char * sumfile)353 dumpsum(const char *sumfile)
354 {
355     nltype *nlp;
356     arctype *arcp;
357     struct rawarc arc;
358     FILE *sfile;
359 
360     if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
361 	err(1, "fopen: %s", sumfile);
362     /*
363      * dump the header; use the last header read in
364      */
365     if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
366 	err(1, "fwrite: %s", sumfile);
367     /*
368      * dump the samples
369      */
370     if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples)
371 	err(1, "fwrite: %s", sumfile);
372     /*
373      * dump the normalized raw arc information
374      */
375     for ( nlp = nl ; nlp < npe ; nlp++ ) {
376 	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
377 	    arc.raw_frompc = arcp -> arc_parentp -> value;
378 	    arc.raw_selfpc = arcp -> arc_childp -> value;
379 	    arc.raw_count = arcp -> arc_count;
380 	    if (fwrite ( &arc , sizeof arc , 1 , sfile ) != 1)
381 	        err(1, "fwrite: %s", sumfile);
382 #	    ifdef DEBUG
383 		if ( debug & SAMPLEDEBUG ) {
384 		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
385 			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
386 		}
387 #	    endif /* DEBUG */
388 	}
389     }
390     fclose( sfile );
391 }
392 
393 int
valcmp(const void * vp1,const void * vp2)394 valcmp(const void *vp1, const void *vp2)
395 {
396     const nltype *p1 = vp1;
397     const nltype *p2 = vp2;
398 
399     if ( p1 -> value < p2 -> value ) {
400 	return LESSTHAN;
401     }
402     if ( p1 -> value > p2 -> value ) {
403 	return GREATERTHAN;
404     }
405     return EQUALTO;
406 }
407 
408 void
readsamples(FILE * pfile)409 readsamples(FILE *pfile)
410 {
411     UNIT	sample;
412     int i;
413 
414     if (samples == 0) {
415 	samples = calloc(sampbytes, sizeof (UNIT));
416 	if (samples == 0)
417 	    errx(1, "No room for %ld sample pc's", sampbytes / sizeof (UNIT));
418     }
419     for (i = 0; i < nsamples; i++) {
420 	fread(&sample, sizeof (UNIT), 1, pfile);
421 	if (feof(pfile))
422 		break;
423 	samples[i] += sample;
424     }
425     if (i != nsamples)
426 	errx(1, "unexpected EOF after reading %d/%d samples", i, nsamples );
427 }
428 
429 /*
430  *	Assign samples to the procedures to which they belong.
431  *
432  *	There are three cases as to where pcl and pch can be
433  *	with respect to the routine entry addresses svalue0 and svalue1
434  *	as shown in the following diagram.  overlap computes the
435  *	distance between the arrows, the fraction of the sample
436  *	that is to be credited to the routine which starts at svalue0.
437  *
438  *	    svalue0                                         svalue1
439  *	       |                                               |
440  *	       v                                               v
441  *
442  *	       +-----------------------------------------------+
443  *	       |					       |
444  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
445  *	  |         |		  |         |		  |         |
446  *	  +---------+		  +---------+		  +---------+
447  *
448  *	  ^         ^		  ^         ^		  ^         ^
449  *	  |         |		  |         |		  |         |
450  *	 pcl       pch		 pcl       pch		 pcl       pch
451  *
452  *	For the vax we assert that samples will never fall in the first
453  *	two bytes of any routine, since that is the entry mask,
454  *	thus we give call alignentries() to adjust the entry points if
455  *	the entry mask falls in one bucket but the code for the routine
456  *	doesn't start until the next bucket.  In conjunction with the
457  *	alignment of routine addresses, this should allow us to have
458  *	only one sample for every four bytes of text space and never
459  *	have any overlap (the two end cases, above).
460  */
461 void
asgnsamples(void)462 asgnsamples(void)
463 {
464     int	j;
465     UNIT		ccnt;
466     double		time;
467     unsigned long	pcl, pch;
468     unsigned long	i;
469     unsigned long	overlap;
470     unsigned long	svalue0, svalue1;
471 
472     /* read samples and assign to namelist symbols */
473     scale = highpc - lowpc;
474     scale /= nsamples;
475     alignentries();
476     for (i = 0, j = 1; i < nsamples; i++) {
477 	ccnt = samples[i];
478 	if (ccnt == 0)
479 		continue;
480 	pcl = lowpc + (unsigned long)(scale * i);
481 	pch = lowpc + (unsigned long)(scale * (i + 1));
482 	time = ccnt;
483 #	ifdef DEBUG
484 	    if ( debug & SAMPLEDEBUG ) {
485 		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" ,
486 			pcl , pch , ccnt );
487 	    }
488 #	endif /* DEBUG */
489 	totime += time;
490 	for (j = j - 1; j < nname; j++) {
491 	    svalue0 = nl[j].svalue;
492 	    svalue1 = nl[j+1].svalue;
493 		/*
494 		 *	if high end of tick is below entry address,
495 		 *	go for next tick.
496 		 */
497 	    if (pch < svalue0)
498 		    break;
499 		/*
500 		 *	if low end of tick into next routine,
501 		 *	go for next routine.
502 		 */
503 	    if (pcl >= svalue1)
504 		    continue;
505 	    overlap = min(pch, svalue1) - max(pcl, svalue0);
506 	    if (overlap > 0) {
507 #		ifdef DEBUG
508 		    if (debug & SAMPLEDEBUG) {
509 			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %ld overlap\n",
510 				nl[j].value/sizeof(UNIT), svalue0, svalue1,
511 				nl[j].name,
512 				overlap * time / scale, overlap);
513 		    }
514 #		endif /* DEBUG */
515 		nl[j].time += overlap * time / scale;
516 	    }
517 	}
518     }
519 #   ifdef DEBUG
520 	if (debug & SAMPLEDEBUG) {
521 	    printf("[asgnsamples] totime %f\n", totime);
522 	}
523 #   endif /* DEBUG */
524 }
525 
526 
527 unsigned long
min(unsigned long a,unsigned long b)528 min(unsigned long a, unsigned long b)
529 {
530     if (a<b)
531 	return(a);
532     return(b);
533 }
534 
535 unsigned long
max(unsigned long a,unsigned long b)536 max(unsigned long a, unsigned long b)
537 {
538     if (a>b)
539 	return(a);
540     return(b);
541 }
542 
543     /*
544      *	calculate scaled entry point addresses (to save time in asgnsamples),
545      *	and possibly push the scaled entry points over the entry mask,
546      *	if it turns out that the entry point is in one bucket and the code
547      *	for a routine is in the next bucket.
548      */
549 void
alignentries(void)550 alignentries(void)
551 {
552     struct nl		*nlp;
553     unsigned long	bucket_of_entry;
554     unsigned long	bucket_of_code;
555 
556     for (nlp = nl; nlp < npe; nlp++) {
557 	nlp -> svalue = nlp -> value / sizeof(UNIT);
558 	bucket_of_entry = (nlp->svalue - lowpc) / scale;
559 	bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
560 	if (bucket_of_entry < bucket_of_code) {
561 #	    ifdef DEBUG
562 		if (debug & SAMPLEDEBUG) {
563 		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
564 			    nlp->svalue, nlp->svalue + UNITS_TO_CODE);
565 		}
566 #	    endif /* DEBUG */
567 	    nlp->svalue += UNITS_TO_CODE;
568 	}
569     }
570 }
571