1 /*
2  * Function call tracing for gcc >= 2.95
3  * WARNING! THIS CODE IS NOT THREAD-SAFE!
4  *
5  * Copyright 2012 Willy Tarreau <w@1wt.eu>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  *
12  * gcc is able to call a specific function when entering and leaving any
13  * function when compiled with -finstrument-functions. This code must not
14  * be built with this argument. The performance impact is huge, so this
15  * feature should only be used when debugging.
16  *
17  * The entry and exits of all functions will be dumped into a file designated
18  * by the HAPROXY_TRACE environment variable, or by default "trace.out". If the
19  * trace file name is empty or "/dev/null", then traces are disabled. If
20  * opening the trace file fails, then stderr is used. If HAPROXY_TRACE_FAST is
21  * used, then the time is taken from the global <now> variable. Last, if
22  * HAPROXY_TRACE_TSC is used, then the machine's TSC is used instead of the
23  * real time (almost twice as fast).
24  *
25  * The output format is :
26  *
27  *   <sec.usec> <level> <caller_ptr> <dir> <callee_ptr>
28  *  or :
29  *   <tsc> <level> <caller_ptr> <dir> <callee_ptr>
30  *
31  * where <dir> is '>' when entering a function and '<' when leaving.
32  *
33  * It is also possible to emit comments using the calltrace() function which uses
34  * the printf() format. Such comments are then inserted by replacing the caller
35  * pointer with a sharp ('#') like this :
36  *
37  *   <sec.usec> <level> # <comment>
38  *  or :
39  *   <tsc> <level> # <comment>
40  *
41  * The article below is a nice explanation of how this works :
42  *   http://balau82.wordpress.com/2010/10/06/trace-and-profile-function-calls-with-gcc/
43  */
44 
45 #include <sys/time.h>
46 #include <stdarg.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <haproxy/api.h>
51 #include <haproxy/time.h>
52 #include <haproxy/tools.h>
53 
54 static FILE *log;
55 static int level;
56 static int disabled;
57 static int fast_time;
58 static int use_tsc;
59 static struct timeval trace_now;
60 static struct timeval *now_ptr;
61 static char line[128]; /* more than enough for a message (9+1+6+1+3+1+18+1+1+18+1+1) */
62 
open_trace()63 static int open_trace()
64 {
65 	const char *output = getenv("HAPROXY_TRACE");
66 
67 	if (!output)
68 		output = "trace.out";
69 
70 	if (!*output || strcmp(output, "/dev/null") == 0) {
71 		disabled = 1;
72 		return 0;
73 	}
74 
75 	log = fopen(output, "w");
76 	if (!log)
77 		log = stderr;
78 
79 	now_ptr = &now;
80 	if (getenv("HAPROXY_TRACE_FAST") != NULL) {
81 		fast_time = 1;
82 		now_ptr = &trace_now;
83 	}
84 	if (getenv("HAPROXY_TRACE_TSC") != NULL) {
85 		fast_time = 1;
86 		use_tsc = 1;
87 	}
88 	return 1;
89 }
90 
91 /* This function first divides the number by 100M then iteratively multiplies it
92  * by 100 (using adds and shifts). The trick is that dividing by 100M is equivalent
93  * to multiplying by 1/100M, which approximates to 1441151881/2^57. All local
94  * variables fit in registers on x86. This version outputs two digits per round.
95  * <min_pairs> indicates the minimum number of pairs of digits that have to be
96  * emitted, which might be left-padded with zeroes.
97  * It returns the pointer to the ending '\0'.
98  */
ultoad2(unsigned int x,char * out,int min_pairs)99 static char *ultoad2(unsigned int x, char *out, int min_pairs)
100 {
101 	unsigned int q;
102 	char *p = out;
103 	int pos = 4;
104 	unsigned long long y;
105 
106 	static const unsigned short bcd[100] = {
107 		0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
108 		0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
109 		0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
110 		0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
111 		0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
112 		0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
113 		0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
114 		0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
115 		0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
116 		0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 };
117 
118 	y = x * 1441151881ULL;  /* y>>57 will be the integer part of x/100M */
119 	while (1) {
120 		q = y >> 57;
121 		/* Q is composed of the first digit in the lower byte and the second
122 		 * digit in the higher byte.
123 		 */
124 		if (p != out || q > 9 || pos < min_pairs) {
125 #if defined(__i386__) || defined(__x86_64__)
126 			/* unaligned accesses are fast on x86 */
127 			*(unsigned short *)p = bcd[q];
128 			p += 2;
129 #else
130 			*(p++) = bcd[q];
131 			*(p++) = bcd[q] >> 8;
132 #endif
133 		}
134 		else if (q || !pos) {
135 			/* only at most one digit */
136 			*(p++) = bcd[q] >> 8;
137 		}
138 		if (--pos < 0)
139 			break;
140 
141 		y &= 0x1FFFFFFFFFFFFFFULL;  // remainder
142 
143 		if (sizeof(long) >= sizeof(long long)) {
144 			/* shifting is preferred on 64-bit archs, while mult is faster on 32-bit.
145 			 * We multiply by 100 by doing *5, *5 and *4, all of which are trivial.
146 			 */
147 			y += (y << 2);
148 			y += (y << 2);
149 			y <<= 2;
150 		}
151 		else
152 			y *= 100;
153 	}
154 
155 	*p = '\0';
156 	return p;
157 }
158 
159 /* Send <h> as hex into <out>. Returns the pointer to the ending '\0'. */
emit_hex(unsigned long h,char * out)160 static char *emit_hex(unsigned long h, char *out)
161 {
162 	static unsigned char hextab[16] = "0123456789abcdef";
163 	int shift = sizeof(h) * 8 - 4;
164 	unsigned int idx;
165 
166 	do {
167 		idx = (h >> shift);
168 		if (idx || !shift)
169 			*out++ = hextab[idx & 15];
170 		shift -= 4;
171 	} while (shift >= 0);
172 	*out = '\0';
173 	return out;
174 }
175 
make_line(void * from,void * to,int level,char dir,long ret)176 static void make_line(void *from, void *to, int level, char dir, long ret)
177 {
178 	char *p = line;
179 
180 	if (unlikely(!log) && !open_trace())
181 		return;
182 
183 	if (unlikely(!fast_time))
184 		gettimeofday(now_ptr, NULL);
185 
186 #ifdef USE_SLOW_FPRINTF
187 	if (!use_tsc)
188 		fprintf(log, "%u.%06u %d %p %c %p\n",
189 			(unsigned int)now_ptr->tv_sec,
190 			(unsigned int)now_ptr->tv_usec,
191 			level, from, dir, to);
192 	else
193 		fprintf(log, "%llx %d %p %c %p\n",
194 			rdtsc(), level, from, dir, to);
195 	return;
196 #endif
197 
198 	if (unlikely(!use_tsc)) {
199 		/* "%u.06u", tv_sec, tv_usec */
200 		p = ultoad2(now_ptr->tv_sec, p, 0);
201 		*p++ = '.';
202 		p = ultoad2(now_ptr->tv_usec, p, 3);
203 	} else {
204 		/* "%08x%08x", high, low */
205 		unsigned long long t = rdtsc();
206 		if (sizeof(long) < sizeof(long long))
207 			p = emit_hex((unsigned long)(t >> 32U), p);
208 		p = emit_hex((unsigned long)(t), p);
209 	}
210 
211 	/* " %u", level */
212 	*p++ = ' ';
213 	p = ultoad2(level, p, 0);
214 
215 	/* " %p", from */
216 	*p++ = ' '; *p++ = '0'; *p++ = 'x';
217 	p = emit_hex((unsigned long)from, p);
218 
219 	/* " %c", dir */
220 	*p++ = ' '; *p++ = dir;
221 
222 	/* " %p", to */
223 	*p++ = ' '; *p++ = '0'; *p++ = 'x';
224 	p = emit_hex((unsigned long)to, p);
225 
226 	if (dir == '<') {
227 		/* " %x", ret */
228 		*p++ = ' '; *p++ = '0'; *p++ = 'x';
229 		p = emit_hex(ret, p);
230 	}
231 
232 	*p++ = '\n';
233 
234 	fwrite(line, p - line, 1, log);
235 }
236 
237 /* These are the functions GCC calls */
__cyg_profile_func_enter(void * to,void * from)238 void __cyg_profile_func_enter(void *to,  void *from)
239 {
240 	if (!disabled)
241 		return make_line(from, to, ++level, '>', 0);
242 }
243 
__cyg_profile_func_exit(void * to,void * from)244 void __cyg_profile_func_exit(void *to,  void *from)
245 {
246 	long ret = 0;
247 
248 #if defined(__x86_64__)
249 	/* on x86_64, the return value (eax) is temporarily stored in ebx
250 	 * during the call to __cyg_profile_func_exit() so we can snoop it.
251 	 */
252 	asm volatile("mov %%rbx, %0" : "=r"(ret));
253 #endif
254 	if (!disabled)
255 		return make_line(from, to, level--, '<', ret);
256 }
257 
258 /* the one adds comments in the trace above. The output format is :
259  * <timestamp> <level> # <string>
260  */
261 __attribute__((format(printf, 1, 2)))
calltrace(char * fmt,...)262 void calltrace(char *fmt, ...)
263 {
264 	va_list ap;
265 
266 	if (unlikely(!log) && !open_trace())
267 		return;
268 
269 	if (unlikely(!fast_time))
270 		gettimeofday(now_ptr, NULL);
271 
272 	if (!use_tsc)
273 		fprintf(log, "%u.%06u %d # ",
274 			(unsigned int)now_ptr->tv_sec,
275 			(unsigned int)now_ptr->tv_usec,
276 			level + 1);
277 	else
278 		fprintf(log, "%llx %d # ",
279 			rdtsc(), level + 1);
280 
281 	va_start(ap, fmt);
282 	vfprintf(log, fmt, ap);
283 	va_end(ap);
284 	fputc('\n', log);
285 	fflush(log);
286 }
287