1 /*
2 **  Timer functions, to gather profiling data.
3 **
4 **  These functions log profiling information about where the server spends
5 **  its time.  While this doesn't provide as detailed of information as a
6 **  profiling build would, it's much faster and simpler, and since it's fast
7 **  enough to always leave on even on production servers, it can gather
8 **  information *before* it's needed and show long-term trends.
9 **
10 **  Functions that should have their time monitored need to call TMRstart(n)
11 **  at the beginning of the segment of code and TMRstop(n) at the end.  The
12 **  time spent will be accumulated and added to the total for the counter n,
13 **  where n should be one of the constants in timer.h or defined in your
14 **  application.  If you add new timers in the library code, add them to
15 **  timer.h and also add a description to TMRsummary; if you add them in
16 **  your application add them to your own description array.  Also add them
17 **  to innreport.
18 **
19 **  Calls are sanity-checked to some degree and errors reported via
20 **  warn/die, so all callers should have the proper warn and die handlers
21 **  set up, if appropriate.
22 **
23 **  Recursion is not allowed on a given timer.  Setting multiple timers
24 **  at once is fine (i.e., you may have a timer for the total time to write
25 **  an article, how long the disk write takes, how long the history update
26 **  takes, etc. which are components of the total article write time).  If a
27 **  timer is started while another timer is running, the new timer is
28 **  considered to be a sub-timer of the running timer, and must be stopped
29 **  before the parent timer is stopped.  Note that the same timer number can
30 **  be a sub-timer of more than one timer or a timer without a parent, and
31 **  each of those counts will be reported separately.
32 **
33 **  Note that this code is not thread-safe and in fact would need to be
34 **  completely overhauled for a threaded server (since the idea of global
35 **  timing statistics doesn't make as much sense when different tasks are
36 **  done in different threads).
37 */
38 
39 #include "portable/system.h"
40 
41 #include <syslog.h>
42 
43 #ifdef HAVE_SYS_TIME_H
44 #    include <sys/time.h>
45 #endif
46 #include <time.h>
47 
48 #include "inn/libinn.h"
49 #include "inn/messages.h"
50 #include "inn/timer.h"
51 
52 /* Timer values are stored in a series of trees.  This allows use to use
53    nested timers.  Each nested timer node is linked to three of its
54    neighbours to make lookups easy and fast.  The current position in the
55    graph is given by timer_current.
56 
57    As an optimization, since most timers aren't nested, timer_list holds an
58    array of pointers to non-nested timers that's filled in as TMRstart is
59    called so that the non-nested case remains O(1).  That array is stored in
60    timers.  This is the "top level" of the timer trees; if timer_current is
61    NULL, any timer that's started is found in this array.  If timer_current
62    isn't NULL, there's a running timer, and starting a new timer adds to
63    that tree.
64 
65    Note that without the parent pointer, this is a tree.  id is the
66    identifier of the timer.  start stores the time (relative to the last
67    summary) at which TMRstart was last called for each timer.  total is
68    the total time accrued by that timer since the last summary.  count is
69    the number of times the timer has been stopped since the last summary. */
70 struct timer {
71     unsigned int id;
72     unsigned long start;
73     unsigned long total;
74     unsigned long count;
75 
76     struct timer *parent;
77     struct timer *brother;
78     struct timer *child;
79 };
80 static struct timer **timers = NULL;
81 static struct timer *timer_current = NULL;
82 static unsigned int timer_count = 0;
83 
84 /* Names for all of the timers.  These must be given in the same order
85    as the definition of the enum in timer.h. */
86 static const char *const timer_name[TMR_APPLICATION] = {
87     "hishave",
88     "hisgrep",
89     "hiswrite",
90     "hissync",
91 };
92 
93 
94 /*
95 **  Returns the current time as a double.  This is not used by any of the
96 **  other timer code, but is used by various programs right now to keep track
97 **  of elapsed time.
98 */
99 double
TMRnow_double(void)100 TMRnow_double(void)
101 {
102     struct timeval tv;
103 
104     gettimeofday(&tv, NULL);
105     return ((double) tv.tv_sec + (double) tv.tv_usec * 1.0e-6);
106 }
107 
108 
109 /*
110 **  Returns the number of milliseconds since the base time.  This gives
111 **  better resolution than time, but the return value is a lot easier to
112 **  work with than a struct timeval.  If the argument is true, also reset
113 **  the base time.
114 */
115 static unsigned long
TMRgettime(bool reset)116 TMRgettime(bool reset)
117 {
118     unsigned long now;
119     long usec, msec;
120     struct timeval tv;
121 
122     /* The time of the last summary, used as a base for times returned by
123        TMRnow.  Formerly, times were relative to the last call to TMRinit,
124        which was only called once when innd was starting up; with that
125        approach, times may overflow a 32-bit unsigned long about 50 days
126        after the server starts up.  While this may still work due to unsigned
127        arithmetic, this approach is less confusing to follow. */
128     static struct timeval base;
129 
130     gettimeofday(&tv, NULL);
131     now = (unsigned long) (tv.tv_sec - base.tv_sec) * 1000u;
132     usec = tv.tv_usec - base.tv_usec; /* maybe negative */
133     msec = usec / 1000;               /* still maybe negative */
134     now += (unsigned long) msec;
135     if (reset)
136         base = tv;
137     return now;
138 }
139 
140 
141 /*
142 **  Initialize the timer.  Zero out even variables that would initially be
143 **  zero so that this function can be called multiple times if wanted.
144 */
145 void
TMRinit(unsigned int count)146 TMRinit(unsigned int count)
147 {
148     unsigned int i;
149 
150     /* TMRinit(0) disables all timers. */
151     TMRfree();
152     if (count != 0) {
153         timers = xmalloc(count * sizeof(struct timer *));
154         for (i = 0; i < count; i++)
155             timers[i] = NULL;
156         TMRgettime(true);
157     }
158     timer_count = count;
159 }
160 
161 
162 /*
163 **  Recursively destroy a timer node.
164 */
165 static void
TMRfreeone(struct timer * timer)166 TMRfreeone(struct timer *timer)
167 {
168     if (timer == NULL)
169         return;
170     if (timer->child != NULL)
171         TMRfreeone(timer->child);
172     if (timer->brother != NULL)
173         TMRfreeone(timer->brother);
174     free(timer);
175 }
176 
177 
178 /*
179 **  Free all timers and the resources devoted to them.
180 */
181 void
TMRfree(void)182 TMRfree(void)
183 {
184     unsigned int i;
185 
186     if (timers != NULL)
187         for (i = 0; i < timer_count; i++)
188             TMRfreeone(timers[i]);
189     free(timers);
190     timers = NULL;
191     timer_count = 0;
192 }
193 
194 
195 /*
196 **  Allocate a new timer node.  Takes the id and the parent pointer.
197 */
198 static struct timer *
TMRnew(unsigned int id,struct timer * parent)199 TMRnew(unsigned int id, struct timer *parent)
200 {
201     struct timer *timer;
202 
203     timer = xmalloc(sizeof(struct timer));
204     timer->parent = parent;
205     timer->brother = NULL;
206     timer->child = NULL;
207     timer->id = id;
208     timer->start = 0;
209     timer->total = 0;
210     timer->count = 0;
211     return timer;
212 }
213 
214 
215 /*
216 **  Start a particular timer.  If no timer is currently running, start one
217 **  of the top-level timers in the timers array (creating a new one if
218 **  needed).  Otherwise, search for the timer among the children of the
219 **  currently running timer, again creating a new timer if necessary.
220 */
221 void
TMRstart(unsigned int timer)222 TMRstart(unsigned int timer)
223 {
224     struct timer *search;
225 
226     if (timer_count == 0) {
227         /* this should happen if innconf->timer == 0 */
228         return;
229     }
230     if (timer >= timer_count) {
231         warn("timer %u is larger than the maximum timer %u, ignored", timer,
232              timer_count - 1);
233         return;
234     }
235 
236     /* timers will be non-NULL if timer_count > 0. */
237     if (timer_current == NULL) {
238         if (timers[timer] == NULL)
239             timers[timer] = TMRnew(timer, NULL);
240         timer_current = timers[timer];
241     } else {
242         search = timer_current;
243 
244         /* Go to the "child" level and look for the good "brother"; the
245            "brothers" are a simple linked list. */
246         if (search->child == NULL) {
247             search->child = TMRnew(timer, search);
248             timer_current = search->child;
249         } else {
250             search = search->child;
251             while (search->id != timer && search->brother != NULL)
252                 search = search->brother;
253             if (search->id != timer) {
254                 search->brother = TMRnew(timer, search->parent);
255                 timer_current = search->brother;
256             } else {
257                 timer_current = search;
258             }
259         }
260     }
261     timer_current->start = TMRgettime(false);
262 }
263 
264 
265 /*
266 **  Stop a particular timer, adding the total time to total and incrementing
267 **  the count of times that timer has been invoked.
268 */
269 void
TMRstop(unsigned int timer)270 TMRstop(unsigned int timer)
271 {
272     if (timer_count == 0) {
273         /* this should happen if innconf->timer == 0 */
274         return;
275     }
276     if (timer_current == NULL)
277         warn("timer %u stopped when no timer was running", timer);
278     else if (timer != timer_current->id)
279         warn("timer %u stopped doesn't match running timer %u", timer,
280              timer_current->id);
281     else {
282         timer_current->total += TMRgettime(false) - timer_current->start;
283         timer_current->count++;
284         timer_current = timer_current->parent;
285     }
286 }
287 
288 
289 /*
290 **  Return the current time in milliseconds since the last summary or the
291 **  initialization of the timer.  This is intended for use by the caller to
292 **  determine when next to call TMRsummary.
293 */
294 unsigned long
TMRnow(void)295 TMRnow(void)
296 {
297     return TMRgettime(false);
298 }
299 
300 
301 /*
302 **  Return the label associated with timer number id.  Used internally
303 **  to do the right thing when fetching from the timer_name or labels
304 **  arrays
305 */
306 static const char *
TMRlabel(const char * const * labels,unsigned int id)307 TMRlabel(const char *const *labels, unsigned int id)
308 {
309     if (id >= TMR_APPLICATION)
310         return labels[id - TMR_APPLICATION];
311     else
312         return timer_name[id];
313 }
314 
315 
316 /*
317 **  Recursively summarize a single timer tree into the supplied buffer,
318 **  returning the number of characters added to the buffer.
319 */
320 static size_t
TMRsumone(const char * const * labels,struct timer * timer,char * buf,size_t len)321 TMRsumone(const char *const *labels, struct timer *timer, char *buf,
322           size_t len)
323 {
324     struct timer *node;
325     size_t off = 0;
326     int rc;
327 
328     /* This results in "child/parent nn(nn)" instead of the arguably more
329        intuitive "parent/child" but it's easy.  Since we ensure sane snprintf
330        semantics, it's safe to defer checking for overflow until after
331        formatting all of the timer data. */
332     for (node = timer; node != NULL; node = node->parent) {
333         rc = snprintf(buf + off, len - off, "%s/", TMRlabel(labels, node->id));
334         if (rc < 0) {
335             /* Do nothing. */
336         } else if ((size_t) rc >= len - off) {
337             off = len;
338         } else {
339             off += rc;
340         }
341     }
342     if (off > 0)
343         off--;
344 
345     rc = snprintf(buf + off, len - off, " %lu(%lu) ", timer->total,
346                   timer->count);
347     if (rc < 0) {
348         /* Do nothing. */
349     } else if ((size_t) rc >= len - off) {
350         off = len;
351     } else {
352         off += rc;
353     }
354     if (off == len) {
355         warn("timer log too long while processing %s",
356              TMRlabel(labels, timer->id));
357         return off;
358     }
359 
360     timer->total = 0;
361     timer->count = 0;
362     if (timer->child != NULL)
363         off += TMRsumone(labels, timer->child, buf + off, len - off);
364     if (timer->brother != NULL)
365         off += TMRsumone(labels, timer->brother, buf + off, len - off);
366     return off;
367 }
368 
369 
370 /*
371 **  Summarize the current timer statistics, report them to syslog, and then
372 **  reset them for the next polling interval.
373 */
374 void
TMRsummary(const char * prefix,const char * const * labels)375 TMRsummary(const char *prefix, const char *const *labels)
376 {
377     char *buf;
378     unsigned int i;
379     size_t len, off;
380     int rc;
381 
382     /* To find the needed buffer size, note that a 64-bit unsigned number can
383        be up to 20 digits long, so each timer can be 52 characters.  We also
384        allow another 27 characters for the introductory timestamp, plus some
385        for the prefix.  We may have timers recurring at multiple points in
386        the structure, so this may not be long enough, but this is over-sized
387        enough that it shouldn't be a problem.  We use snprintf, so if the
388        buffer isn't large enough it will just result in logged errors. */
389     len = 52 * timer_count + 27 + (prefix == NULL ? 0 : strlen(prefix)) + 1;
390     buf = xmalloc(len);
391     off = 0;
392     if (prefix == NULL)
393         rc = 0;
394     else
395         rc = snprintf(buf, len, "%s ", prefix);
396     if (rc < 0) {
397         /* Do nothing. */
398     } else if ((size_t) rc >= len) {
399         off = len;
400     } else {
401         off += rc;
402     }
403 
404     rc = snprintf(buf + off, len - off, "time %lu ", TMRgettime(true));
405     if (rc < 0) {
406         /* Do nothing. */
407     } else if ((size_t) rc >= len - off) {
408         off = len;
409     } else {
410         off += rc;
411     }
412 
413     for (i = 0; i < timer_count; i++) {
414         if (timers[i] != NULL) {
415             off += TMRsumone(labels, timers[i], buf + off, len - off);
416         }
417     }
418     notice("%s", buf);
419     free(buf);
420 }
421