1 /*
2 ** Timer functions, to gather profiling data.
3 **
4 ** These functions log profiling information about where the server spends
5 ** its time. While this doesn't provide as detailed of information as a
6 ** profiling build would, it's much faster and simpler, and since it's fast
7 ** enough to always leave on even on production servers, it can gather
8 ** information *before* it's needed and show long-term trends.
9 **
10 ** Functions that should have their time monitored need to call TMRstart(n)
11 ** at the beginning of the segment of code and TMRstop(n) at the end. The
12 ** time spent will be accumulated and added to the total for the counter n,
13 ** where n should be one of the constants in timer.h or defined in your
14 ** application. If you add new timers in the library code, add them to
15 ** timer.h and also add a description to TMRsummary; if you add them in
16 ** your application add them to your own description array. Also add them
17 ** to innreport.
18 **
19 ** Calls are sanity-checked to some degree and errors reported via
20 ** warn/die, so all callers should have the proper warn and die handlers
21 ** set up, if appropriate.
22 **
23 ** Recursion is not allowed on a given timer. Setting multiple timers
24 ** at once is fine (i.e., you may have a timer for the total time to write
25 ** an article, how long the disk write takes, how long the history update
26 ** takes, etc. which are components of the total article write time). If a
27 ** timer is started while another timer is running, the new timer is
28 ** considered to be a sub-timer of the running timer, and must be stopped
29 ** before the parent timer is stopped. Note that the same timer number can
30 ** be a sub-timer of more than one timer or a timer without a parent, and
31 ** each of those counts will be reported separately.
32 **
33 ** Note that this code is not thread-safe and in fact would need to be
34 ** completely overhauled for a threaded server (since the idea of global
35 ** timing statistics doesn't make as much sense when different tasks are
36 ** done in different threads).
37 */
38
39 #include "portable/system.h"
40
41 #include <syslog.h>
42
43 #ifdef HAVE_SYS_TIME_H
44 # include <sys/time.h>
45 #endif
46 #include <time.h>
47
48 #include "inn/libinn.h"
49 #include "inn/messages.h"
50 #include "inn/timer.h"
51
52 /* Timer values are stored in a series of trees. This allows use to use
53 nested timers. Each nested timer node is linked to three of its
54 neighbours to make lookups easy and fast. The current position in the
55 graph is given by timer_current.
56
57 As an optimization, since most timers aren't nested, timer_list holds an
58 array of pointers to non-nested timers that's filled in as TMRstart is
59 called so that the non-nested case remains O(1). That array is stored in
60 timers. This is the "top level" of the timer trees; if timer_current is
61 NULL, any timer that's started is found in this array. If timer_current
62 isn't NULL, there's a running timer, and starting a new timer adds to
63 that tree.
64
65 Note that without the parent pointer, this is a tree. id is the
66 identifier of the timer. start stores the time (relative to the last
67 summary) at which TMRstart was last called for each timer. total is
68 the total time accrued by that timer since the last summary. count is
69 the number of times the timer has been stopped since the last summary. */
70 struct timer {
71 unsigned int id;
72 unsigned long start;
73 unsigned long total;
74 unsigned long count;
75
76 struct timer *parent;
77 struct timer *brother;
78 struct timer *child;
79 };
80 static struct timer **timers = NULL;
81 static struct timer *timer_current = NULL;
82 static unsigned int timer_count = 0;
83
84 /* Names for all of the timers. These must be given in the same order
85 as the definition of the enum in timer.h. */
86 static const char *const timer_name[TMR_APPLICATION] = {
87 "hishave",
88 "hisgrep",
89 "hiswrite",
90 "hissync",
91 };
92
93
94 /*
95 ** Returns the current time as a double. This is not used by any of the
96 ** other timer code, but is used by various programs right now to keep track
97 ** of elapsed time.
98 */
99 double
TMRnow_double(void)100 TMRnow_double(void)
101 {
102 struct timeval tv;
103
104 gettimeofday(&tv, NULL);
105 return ((double) tv.tv_sec + (double) tv.tv_usec * 1.0e-6);
106 }
107
108
109 /*
110 ** Returns the number of milliseconds since the base time. This gives
111 ** better resolution than time, but the return value is a lot easier to
112 ** work with than a struct timeval. If the argument is true, also reset
113 ** the base time.
114 */
115 static unsigned long
TMRgettime(bool reset)116 TMRgettime(bool reset)
117 {
118 unsigned long now;
119 long usec, msec;
120 struct timeval tv;
121
122 /* The time of the last summary, used as a base for times returned by
123 TMRnow. Formerly, times were relative to the last call to TMRinit,
124 which was only called once when innd was starting up; with that
125 approach, times may overflow a 32-bit unsigned long about 50 days
126 after the server starts up. While this may still work due to unsigned
127 arithmetic, this approach is less confusing to follow. */
128 static struct timeval base;
129
130 gettimeofday(&tv, NULL);
131 now = (unsigned long) (tv.tv_sec - base.tv_sec) * 1000u;
132 usec = tv.tv_usec - base.tv_usec; /* maybe negative */
133 msec = usec / 1000; /* still maybe negative */
134 now += (unsigned long) msec;
135 if (reset)
136 base = tv;
137 return now;
138 }
139
140
141 /*
142 ** Initialize the timer. Zero out even variables that would initially be
143 ** zero so that this function can be called multiple times if wanted.
144 */
145 void
TMRinit(unsigned int count)146 TMRinit(unsigned int count)
147 {
148 unsigned int i;
149
150 /* TMRinit(0) disables all timers. */
151 TMRfree();
152 if (count != 0) {
153 timers = xmalloc(count * sizeof(struct timer *));
154 for (i = 0; i < count; i++)
155 timers[i] = NULL;
156 TMRgettime(true);
157 }
158 timer_count = count;
159 }
160
161
162 /*
163 ** Recursively destroy a timer node.
164 */
165 static void
TMRfreeone(struct timer * timer)166 TMRfreeone(struct timer *timer)
167 {
168 if (timer == NULL)
169 return;
170 if (timer->child != NULL)
171 TMRfreeone(timer->child);
172 if (timer->brother != NULL)
173 TMRfreeone(timer->brother);
174 free(timer);
175 }
176
177
178 /*
179 ** Free all timers and the resources devoted to them.
180 */
181 void
TMRfree(void)182 TMRfree(void)
183 {
184 unsigned int i;
185
186 if (timers != NULL)
187 for (i = 0; i < timer_count; i++)
188 TMRfreeone(timers[i]);
189 free(timers);
190 timers = NULL;
191 timer_count = 0;
192 }
193
194
195 /*
196 ** Allocate a new timer node. Takes the id and the parent pointer.
197 */
198 static struct timer *
TMRnew(unsigned int id,struct timer * parent)199 TMRnew(unsigned int id, struct timer *parent)
200 {
201 struct timer *timer;
202
203 timer = xmalloc(sizeof(struct timer));
204 timer->parent = parent;
205 timer->brother = NULL;
206 timer->child = NULL;
207 timer->id = id;
208 timer->start = 0;
209 timer->total = 0;
210 timer->count = 0;
211 return timer;
212 }
213
214
215 /*
216 ** Start a particular timer. If no timer is currently running, start one
217 ** of the top-level timers in the timers array (creating a new one if
218 ** needed). Otherwise, search for the timer among the children of the
219 ** currently running timer, again creating a new timer if necessary.
220 */
221 void
TMRstart(unsigned int timer)222 TMRstart(unsigned int timer)
223 {
224 struct timer *search;
225
226 if (timer_count == 0) {
227 /* this should happen if innconf->timer == 0 */
228 return;
229 }
230 if (timer >= timer_count) {
231 warn("timer %u is larger than the maximum timer %u, ignored", timer,
232 timer_count - 1);
233 return;
234 }
235
236 /* timers will be non-NULL if timer_count > 0. */
237 if (timer_current == NULL) {
238 if (timers[timer] == NULL)
239 timers[timer] = TMRnew(timer, NULL);
240 timer_current = timers[timer];
241 } else {
242 search = timer_current;
243
244 /* Go to the "child" level and look for the good "brother"; the
245 "brothers" are a simple linked list. */
246 if (search->child == NULL) {
247 search->child = TMRnew(timer, search);
248 timer_current = search->child;
249 } else {
250 search = search->child;
251 while (search->id != timer && search->brother != NULL)
252 search = search->brother;
253 if (search->id != timer) {
254 search->brother = TMRnew(timer, search->parent);
255 timer_current = search->brother;
256 } else {
257 timer_current = search;
258 }
259 }
260 }
261 timer_current->start = TMRgettime(false);
262 }
263
264
265 /*
266 ** Stop a particular timer, adding the total time to total and incrementing
267 ** the count of times that timer has been invoked.
268 */
269 void
TMRstop(unsigned int timer)270 TMRstop(unsigned int timer)
271 {
272 if (timer_count == 0) {
273 /* this should happen if innconf->timer == 0 */
274 return;
275 }
276 if (timer_current == NULL)
277 warn("timer %u stopped when no timer was running", timer);
278 else if (timer != timer_current->id)
279 warn("timer %u stopped doesn't match running timer %u", timer,
280 timer_current->id);
281 else {
282 timer_current->total += TMRgettime(false) - timer_current->start;
283 timer_current->count++;
284 timer_current = timer_current->parent;
285 }
286 }
287
288
289 /*
290 ** Return the current time in milliseconds since the last summary or the
291 ** initialization of the timer. This is intended for use by the caller to
292 ** determine when next to call TMRsummary.
293 */
294 unsigned long
TMRnow(void)295 TMRnow(void)
296 {
297 return TMRgettime(false);
298 }
299
300
301 /*
302 ** Return the label associated with timer number id. Used internally
303 ** to do the right thing when fetching from the timer_name or labels
304 ** arrays
305 */
306 static const char *
TMRlabel(const char * const * labels,unsigned int id)307 TMRlabel(const char *const *labels, unsigned int id)
308 {
309 if (id >= TMR_APPLICATION)
310 return labels[id - TMR_APPLICATION];
311 else
312 return timer_name[id];
313 }
314
315
316 /*
317 ** Recursively summarize a single timer tree into the supplied buffer,
318 ** returning the number of characters added to the buffer.
319 */
320 static size_t
TMRsumone(const char * const * labels,struct timer * timer,char * buf,size_t len)321 TMRsumone(const char *const *labels, struct timer *timer, char *buf,
322 size_t len)
323 {
324 struct timer *node;
325 size_t off = 0;
326 int rc;
327
328 /* This results in "child/parent nn(nn)" instead of the arguably more
329 intuitive "parent/child" but it's easy. Since we ensure sane snprintf
330 semantics, it's safe to defer checking for overflow until after
331 formatting all of the timer data. */
332 for (node = timer; node != NULL; node = node->parent) {
333 rc = snprintf(buf + off, len - off, "%s/", TMRlabel(labels, node->id));
334 if (rc < 0) {
335 /* Do nothing. */
336 } else if ((size_t) rc >= len - off) {
337 off = len;
338 } else {
339 off += rc;
340 }
341 }
342 if (off > 0)
343 off--;
344
345 rc = snprintf(buf + off, len - off, " %lu(%lu) ", timer->total,
346 timer->count);
347 if (rc < 0) {
348 /* Do nothing. */
349 } else if ((size_t) rc >= len - off) {
350 off = len;
351 } else {
352 off += rc;
353 }
354 if (off == len) {
355 warn("timer log too long while processing %s",
356 TMRlabel(labels, timer->id));
357 return off;
358 }
359
360 timer->total = 0;
361 timer->count = 0;
362 if (timer->child != NULL)
363 off += TMRsumone(labels, timer->child, buf + off, len - off);
364 if (timer->brother != NULL)
365 off += TMRsumone(labels, timer->brother, buf + off, len - off);
366 return off;
367 }
368
369
370 /*
371 ** Summarize the current timer statistics, report them to syslog, and then
372 ** reset them for the next polling interval.
373 */
374 void
TMRsummary(const char * prefix,const char * const * labels)375 TMRsummary(const char *prefix, const char *const *labels)
376 {
377 char *buf;
378 unsigned int i;
379 size_t len, off;
380 int rc;
381
382 /* To find the needed buffer size, note that a 64-bit unsigned number can
383 be up to 20 digits long, so each timer can be 52 characters. We also
384 allow another 27 characters for the introductory timestamp, plus some
385 for the prefix. We may have timers recurring at multiple points in
386 the structure, so this may not be long enough, but this is over-sized
387 enough that it shouldn't be a problem. We use snprintf, so if the
388 buffer isn't large enough it will just result in logged errors. */
389 len = 52 * timer_count + 27 + (prefix == NULL ? 0 : strlen(prefix)) + 1;
390 buf = xmalloc(len);
391 off = 0;
392 if (prefix == NULL)
393 rc = 0;
394 else
395 rc = snprintf(buf, len, "%s ", prefix);
396 if (rc < 0) {
397 /* Do nothing. */
398 } else if ((size_t) rc >= len) {
399 off = len;
400 } else {
401 off += rc;
402 }
403
404 rc = snprintf(buf + off, len - off, "time %lu ", TMRgettime(true));
405 if (rc < 0) {
406 /* Do nothing. */
407 } else if ((size_t) rc >= len - off) {
408 off = len;
409 } else {
410 off += rc;
411 }
412
413 for (i = 0; i < timer_count; i++) {
414 if (timers[i] != NULL) {
415 off += TMRsumone(labels, timers[i], buf + off, len - off);
416 }
417 }
418 notice("%s", buf);
419 free(buf);
420 }
421