1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3 #include <arpa/inet.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <errno.h>
8 #include <poll.h>
9 #include <ctype.h>
10 #include <stdarg.h>
11
12 #if defined(__sun)
13 #include <atomic.h>
14 #endif
15
16 #include "memcached.h"
17 #include "bipbuffer.h"
18
19 #ifdef LOGGER_DEBUG
20 #define L_DEBUG(...) \
21 do { \
22 fprintf(stderr, __VA_ARGS__); \
23 } while (0)
24 #else
25 #define L_DEBUG(...)
26 #endif
27
28
29 /* TODO: put this in a struct and ditch the global vars. */
30 static logger *logger_stack_head = NULL;
31 static logger *logger_stack_tail = NULL;
32 static unsigned int logger_count = 0;
33 static volatile int do_run_logger_thread = 1;
34 static pthread_t logger_tid;
35 pthread_mutex_t logger_stack_lock = PTHREAD_MUTEX_INITIALIZER;
36 pthread_cond_t logger_stack_cond = PTHREAD_COND_INITIALIZER;
37
38 pthread_key_t logger_key;
39
40 #if !defined(HAVE_GCC_64ATOMICS) && !defined(__sun)
41 pthread_mutex_t logger_atomics_mutex = PTHREAD_MUTEX_INITIALIZER;
42 #endif
43
44 #define WATCHER_LIMIT 20
45 logger_watcher *watchers[20];
46 struct pollfd watchers_pollfds[20];
47 int watcher_count = 0;
48
49 #define WATCHER_ALL -1
50 static int logger_thread_poll_watchers(int force_poll, int watcher);
51
52 /* helpers for logger_log */
53
_logger_log_text(logentry * e,const entry_details * d,const void * entry,va_list ap)54 static void _logger_log_text(logentry *e, const entry_details *d, const void *entry, va_list ap) {
55 int reqlen = d->reqlen;
56 int total = vsnprintf((char *) e->data, reqlen, d->format, ap);
57 if (total >= reqlen || total <= 0) {
58 fprintf(stderr, "LOGGER: Failed to vsnprintf a text entry: (total) %d\n", total);
59 }
60 e->size = total + 1; // null byte
61 }
62
_logger_log_evictions(logentry * e,const entry_details * d,const void * entry,va_list ap)63 static void _logger_log_evictions(logentry *e, const entry_details *d, const void *entry, va_list ap) {
64 item *it = (item *)entry;
65 struct logentry_eviction *le = (struct logentry_eviction *) e->data;
66
67 le->exptime = (it->exptime > 0) ? (long long int)(it->exptime - current_time) : (long long int) -1;
68 le->latime = current_time - it->time;
69 le->it_flags = it->it_flags;
70 le->nkey = it->nkey;
71 le->nbytes = it->nbytes;
72 le->clsid = ITEM_clsid(it);
73 memcpy(le->key, ITEM_key(it), it->nkey);
74 e->size = sizeof(struct logentry_eviction) + le->nkey;
75 }
76 #ifdef EXTSTORE
_logger_log_ext_write(logentry * e,const entry_details * d,const void * entry,va_list ap)77 static void _logger_log_ext_write(logentry *e, const entry_details *d, const void *entry, va_list ap) {
78 item *it = (item *)entry;
79 int ew_bucket = va_arg(ap, int);
80
81 struct logentry_ext_write *le = (struct logentry_ext_write *) e->data;
82 le->exptime = (it->exptime > 0) ? (long long int)(it->exptime - current_time) : (long long int) -1;
83 le->latime = current_time - it->time;
84 le->it_flags = it->it_flags;
85 le->nkey = it->nkey;
86 le->clsid = ITEM_clsid(it);
87 le->bucket = (uint8_t)ew_bucket;
88 memcpy(le->key, ITEM_key(it), it->nkey);
89 e->size = sizeof(struct logentry_ext_write) + le->nkey;
90 }
91 #endif
92 // 0 == nf, 1 == found. 2 == flushed. 3 == expired.
93 // might be useful to store/print the flags an item has?
94 // could also collapse this and above code into an "item status" struct. wait
95 // for more endpoints to be written before making it generic, though.
_logger_log_item_get(logentry * e,const entry_details * d,const void * entry,va_list ap)96 static void _logger_log_item_get(logentry *e, const entry_details *d, const void *entry, va_list ap) {
97 int was_found = va_arg(ap, int);
98 char *key = va_arg(ap, char *);
99 size_t nkey = va_arg(ap, size_t);
100 int nbytes = va_arg(ap, int);
101 uint8_t clsid = va_arg(ap, int);
102 int sfd = va_arg(ap, int);
103
104 struct logentry_item_get *le = (struct logentry_item_get *) e->data;
105 le->was_found = was_found;
106 le->nkey = nkey;
107 le->nbytes = nbytes;
108 le->clsid = clsid;
109 memcpy(le->key, key, nkey);
110 le->sfd = sfd;
111 e->size = sizeof(struct logentry_item_get) + nkey;
112 }
113
_logger_log_item_store(logentry * e,const entry_details * d,const void * entry,va_list ap)114 static void _logger_log_item_store(logentry *e, const entry_details *d, const void *entry, va_list ap) {
115 enum store_item_type status = va_arg(ap, enum store_item_type);
116 int comm = va_arg(ap, int);
117 char *key = va_arg(ap, char *);
118 size_t nkey = va_arg(ap, size_t);
119 int nbytes = va_arg(ap, int);
120 rel_time_t ttl = va_arg(ap, rel_time_t);
121 uint8_t clsid = va_arg(ap, int);
122 int sfd = va_arg(ap, int);
123
124 struct logentry_item_store *le = (struct logentry_item_store *) e->data;
125 le->status = status;
126 le->cmd = comm;
127 le->nkey = nkey;
128 le->nbytes = nbytes;
129 le->clsid = clsid;
130 if (ttl != 0) {
131 le->ttl = ttl - current_time;
132 } else {
133 le->ttl = 0;
134 }
135 memcpy(le->key, key, nkey);
136 le->sfd = sfd;
137 e->size = sizeof(struct logentry_item_store) + nkey;
138 }
139
_logger_log_conn_event(logentry * e,const entry_details * d,const void * entry,va_list ap)140 static void _logger_log_conn_event(logentry *e, const entry_details *d, const void *entry, va_list ap) {
141 struct sockaddr_in6 *addr = va_arg(ap, struct sockaddr_in6 *);
142 socklen_t addrlen = va_arg(ap, socklen_t);
143 enum network_transport transport = va_arg(ap, enum network_transport);
144 enum close_reasons reason = va_arg(ap, enum close_reasons);
145 int sfd = va_arg(ap, int);
146
147 struct logentry_conn_event *le = (struct logentry_conn_event *) e->data;
148
149 memcpy(&le->addr, addr, addrlen);
150 le->sfd = sfd;
151 le->transport = transport;
152 le->reason = reason;
153 e->size = sizeof(struct logentry_conn_event);
154 }
155
156 /*************************
157 * Util functions used by the logger background thread
158 *************************/
159
_logger_util_addr_endpoint(struct sockaddr_in6 * addr,char * rip,size_t riplen,unsigned short * rport)160 static int _logger_util_addr_endpoint(struct sockaddr_in6 *addr, char *rip,
161 size_t riplen, unsigned short *rport) {
162 memset(rip, 0, riplen);
163
164 switch (addr->sin6_family) {
165 case AF_INET:
166 inet_ntop(AF_INET, &((struct sockaddr_in *) addr)->sin_addr,
167 rip, riplen - 1);
168 *rport = ntohs(((struct sockaddr_in *) addr)->sin_port);
169 break;
170 case AF_INET6:
171 inet_ntop(AF_INET6, &((struct sockaddr_in6 *) addr)->sin6_addr,
172 rip, riplen - 1);
173 *rport = ntohs(((struct sockaddr_in6 *) addr)->sin6_port);
174 break;
175 #ifndef DISABLE_UNIX_SOCKET
176 // Connections on Unix socket transports have c->request_addr zeroed out.
177 case AF_UNSPEC:
178 case AF_UNIX:
179 strncpy(rip, "unix", strlen("unix") + 1);
180 *rport = 0;
181 break;
182 #endif // #ifndef DISABLE_UNIX_SOCKET
183 }
184
185 return 0;
186 }
187
188 /*************************
189 * Logger background thread functions. Aggregates per-worker buffers and
190 * writes to any watchers.
191 *************************/
192
193 #define LOGGER_PARSE_SCRATCH 4096
194
_logger_parse_text(logentry * e,char * scratch)195 static int _logger_parse_text(logentry *e, char *scratch) {
196 return snprintf(scratch, LOGGER_PARSE_SCRATCH, "ts=%d.%d gid=%llu %s\n",
197 (int)e->tv.tv_sec, (int)e->tv.tv_usec,
198 (unsigned long long) e->gid, (char *) e->data);
199 }
200
_logger_parse_ise(logentry * e,char * scratch)201 static int _logger_parse_ise(logentry *e, char *scratch) {
202 int total;
203 const char *cmd = "na";
204 char keybuf[KEY_MAX_URI_ENCODED_LENGTH];
205 struct logentry_item_store *le = (struct logentry_item_store *) e->data;
206 const char * const status_map[] = {
207 "not_stored", "stored", "exists", "not_found", "too_large", "no_memory" };
208 const char * const cmd_map[] = {
209 "null", "add", "set", "replace", "append", "prepend", "cas" };
210
211 if (le->cmd <= 6)
212 cmd = cmd_map[le->cmd];
213
214 uriencode(le->key, keybuf, le->nkey, KEY_MAX_URI_ENCODED_LENGTH);
215 total = snprintf(scratch, LOGGER_PARSE_SCRATCH,
216 "ts=%d.%d gid=%llu type=item_store key=%s status=%s cmd=%s ttl=%u clsid=%u cfd=%d size=%d\n",
217 (int)e->tv.tv_sec, (int)e->tv.tv_usec, (unsigned long long) e->gid,
218 keybuf, status_map[le->status], cmd, le->ttl, le->clsid, le->sfd,
219 le->nbytes > 0 ? le->nbytes - 2 : 0); // CLRF
220 return total;
221 }
222
_logger_parse_ige(logentry * e,char * scratch)223 static int _logger_parse_ige(logentry *e, char *scratch) {
224 int total;
225 struct logentry_item_get *le = (struct logentry_item_get *) e->data;
226 char keybuf[KEY_MAX_URI_ENCODED_LENGTH];
227 const char * const was_found_map[] = {
228 "not_found", "found", "flushed", "expired" };
229
230 uriencode(le->key, keybuf, le->nkey, KEY_MAX_URI_ENCODED_LENGTH);
231 total = snprintf(scratch, LOGGER_PARSE_SCRATCH,
232 "ts=%d.%d gid=%llu type=item_get key=%s status=%s clsid=%u cfd=%d size=%d\n",
233 (int)e->tv.tv_sec, (int)e->tv.tv_usec, (unsigned long long) e->gid,
234 keybuf, was_found_map[le->was_found], le->clsid, le->sfd,
235 le->nbytes > 0 ? le->nbytes - 2 : 0); // CLRF
236 return total;
237 }
238
_logger_parse_ee(logentry * e,char * scratch)239 static int _logger_parse_ee(logentry *e, char *scratch) {
240 int total;
241 char keybuf[KEY_MAX_URI_ENCODED_LENGTH];
242 struct logentry_eviction *le = (struct logentry_eviction *) e->data;
243 uriencode(le->key, keybuf, le->nkey, KEY_MAX_URI_ENCODED_LENGTH);
244 total = snprintf(scratch, LOGGER_PARSE_SCRATCH,
245 "ts=%d.%d gid=%llu type=eviction key=%s fetch=%s ttl=%lld la=%d clsid=%u size=%d\n",
246 (int)e->tv.tv_sec, (int)e->tv.tv_usec, (unsigned long long) e->gid,
247 keybuf, (le->it_flags & ITEM_FETCHED) ? "yes" : "no",
248 (long long int)le->exptime, le->latime, le->clsid,
249 le->nbytes > 0 ? le->nbytes - 2 : 0); // CLRF
250
251 return total;
252 }
253
254 #ifdef EXTSTORE
_logger_parse_extw(logentry * e,char * scratch)255 static int _logger_parse_extw(logentry *e, char *scratch) {
256 int total;
257 char keybuf[KEY_MAX_URI_ENCODED_LENGTH];
258 struct logentry_ext_write *le = (struct logentry_ext_write *) e->data;
259 uriencode(le->key, keybuf, le->nkey, KEY_MAX_URI_ENCODED_LENGTH);
260 total = snprintf(scratch, LOGGER_PARSE_SCRATCH,
261 "ts=%d.%d gid=%llu type=extwrite key=%s fetch=%s ttl=%lld la=%d clsid=%u bucket=%u\n",
262 (int)e->tv.tv_sec, (int)e->tv.tv_usec, (unsigned long long) e->gid,
263 keybuf, (le->it_flags & ITEM_FETCHED) ? "yes" : "no",
264 (long long int)le->exptime, le->latime, le->clsid, le->bucket);
265
266 return total;
267 }
268 #endif
269
_logger_parse_cne(logentry * e,char * scratch)270 static int _logger_parse_cne(logentry *e, char *scratch) {
271 int total;
272 unsigned short rport;
273 char rip[64];
274 struct logentry_conn_event *le = (struct logentry_conn_event *) e->data;
275 const char * const transport_map[] = { "local", "tcp", "udp" };
276
277 _logger_util_addr_endpoint(&le->addr, rip, sizeof(rip), &rport);
278
279 total = snprintf(scratch, LOGGER_PARSE_SCRATCH,
280 "ts=%d.%d gid=%llu type=conn_new rip=%s rport=%hu transport=%s cfd=%d\n",
281 (int) e->tv.tv_sec, (int) e->tv.tv_usec, (unsigned long long) e->gid,
282 rip, rport, transport_map[le->transport], le->sfd);
283
284 return total;
285 }
286
_logger_parse_cce(logentry * e,char * scratch)287 static int _logger_parse_cce(logentry *e, char *scratch) {
288 int total;
289 unsigned short rport;
290 char rip[64];
291 struct logentry_conn_event *le = (struct logentry_conn_event *) e->data;
292 const char * const transport_map[] = { "local", "tcp", "udp" };
293 const char * const reason_map[] = { "error", "normal", "idle_timeout", "shutdown" };
294
295 _logger_util_addr_endpoint(&le->addr, rip, sizeof(rip), &rport);
296
297 total = snprintf(scratch, LOGGER_PARSE_SCRATCH,
298 "ts=%d.%d gid=%llu type=conn_close rip=%s rport=%hu transport=%s reason=%s cfd=%d\n",
299 (int) e->tv.tv_sec, (int) e->tv.tv_usec, (unsigned long long) e->gid,
300 rip, rport, transport_map[le->transport],
301 reason_map[le->reason], le->sfd);
302
303 return total;
304 }
305
306 /* Should this go somewhere else? */
307 static const entry_details default_entries[] = {
308 [LOGGER_ASCII_CMD] = {512, LOG_RAWCMDS, _logger_log_text, _logger_parse_text, "<%d %s"},
309 [LOGGER_EVICTION] = {512, LOG_EVICTIONS, _logger_log_evictions, _logger_parse_ee, NULL},
310 [LOGGER_ITEM_GET] = {512, LOG_FETCHERS, _logger_log_item_get, _logger_parse_ige, NULL},
311 [LOGGER_ITEM_STORE] = {512, LOG_MUTATIONS, _logger_log_item_store, _logger_parse_ise, NULL},
312 [LOGGER_CRAWLER_STATUS] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
313 "type=lru_crawler crawler=%d lru=%s low_mark=%llu next_reclaims=%llu since_run=%u next_run=%d elapsed=%u examined=%llu reclaimed=%llu"
314 },
315 [LOGGER_SLAB_MOVE] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
316 "type=slab_move src=%d dst=%d"
317 },
318 [LOGGER_CONNECTION_NEW] = {512, LOG_CONNEVENTS, _logger_log_conn_event, _logger_parse_cne, NULL},
319 [LOGGER_CONNECTION_CLOSE] = {512, LOG_CONNEVENTS, _logger_log_conn_event, _logger_parse_cce, NULL},
320 #ifdef EXTSTORE
321 [LOGGER_EXTSTORE_WRITE] = {512, LOG_EVICTIONS, _logger_log_ext_write, _logger_parse_extw, NULL},
322 [LOGGER_COMPACT_START] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
323 "type=compact_start id=%lu version=%llu"
324 },
325 [LOGGER_COMPACT_ABORT] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
326 "type=compact_abort id=%lu"
327 },
328 [LOGGER_COMPACT_READ_START] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
329 "type=compact_read_start id=%lu offset=%llu"
330 },
331 [LOGGER_COMPACT_READ_END] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
332 "type=compact_read_end id=%lu offset=%llu rescues=%lu lost=%lu skipped=%lu"
333 },
334 [LOGGER_COMPACT_END] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
335 "type=compact_end id=%lu"
336 },
337 [LOGGER_COMPACT_FRAGINFO] = {512, LOG_SYSEVENTS, _logger_log_text, _logger_parse_text,
338 "type=compact_fraginfo ratio=%.2f bytes=%lu"
339 },
340 #endif
341 };
342
343 /*************************
344 * Util functions shared between bg thread and workers
345 *************************/
346
347 /* Logger GID's can be used by watchers to put logs back into strict order
348 */
349 static uint64_t logger_gid = 0;
logger_get_gid(void)350 uint64_t logger_get_gid(void) {
351 #ifdef HAVE_GCC_64ATOMICS
352 return __sync_add_and_fetch(&logger_gid, 1);
353 #elif defined(__sun)
354 return atomic_inc_64_nv(&logger_gid);
355 #else
356 mutex_lock(&logger_atomics_mutex);
357 uint64_t res = ++logger_gid;
358 mutex_unlock(&logger_atomics_mutex);
359 return res;
360 #endif
361 }
362
logger_set_gid(uint64_t gid)363 void logger_set_gid(uint64_t gid) {
364 #ifdef HAVE_GCC_64ATOMICS
365 __sync_add_and_fetch(&logger_gid, gid);
366 #elif defined(__sun)
367 atomic_add_64(&logger_gid);
368 #else
369 mutex_lock(&logger_atomics_mutex);
370 logger_gid = gid;
371 mutex_unlock(&logger_atomics_mutex);
372 #endif
373 }
374
375 /* TODO: genericize lists. would be nice to import queue.h if the impact is
376 * studied... otherwise can just write a local one.
377 */
378 /* Add to the list of threads with a logger object */
logger_link_q(logger * l)379 static void logger_link_q(logger *l) {
380 pthread_mutex_lock(&logger_stack_lock);
381 assert(l != logger_stack_head);
382
383 l->prev = 0;
384 l->next = logger_stack_head;
385 if (l->next) l->next->prev = l;
386 logger_stack_head = l;
387 if (logger_stack_tail == 0) logger_stack_tail = l;
388 logger_count++;
389 pthread_mutex_unlock(&logger_stack_lock);
390 return;
391 }
392
393 /* Remove from the list of threads with a logger object */
394 /*static void logger_unlink_q(logger *l) {
395 pthread_mutex_lock(&logger_stack_lock);
396 if (logger_stack_head == l) {
397 assert(l->prev == 0);
398 logger_stack_head = l->next;
399 }
400 if (logger_stack_tail == l) {
401 assert(l->next == 0);
402 logger_stack_tail = l->prev;
403 }
404 assert(l->next != l);
405 assert(l->prev != l);
406
407 if (l->next) l->next->prev = l->prev;
408 if (l->prev) l->prev->next = l->next;
409 logger_count--;
410 pthread_mutex_unlock(&logger_stack_lock);
411 return;
412 }*/
413
414 /* Called with logger stack locked.
415 * Iterates over every watcher collecting enabled flags.
416 */
logger_set_flags(void)417 static void logger_set_flags(void) {
418 logger *l = NULL;
419 int x = 0;
420 uint16_t f = 0; /* logger eflags */
421
422 for (x = 0; x < WATCHER_LIMIT; x++) {
423 logger_watcher *w = watchers[x];
424 if (w == NULL)
425 continue;
426
427 f |= w->eflags;
428 }
429 for (l = logger_stack_head; l != NULL; l=l->next) {
430 pthread_mutex_lock(&l->mutex);
431 l->eflags = f;
432 pthread_mutex_unlock(&l->mutex);
433 }
434 return;
435 }
436
437 /* Completes rendering of log line. */
logger_thread_parse_entry(logentry * e,struct logger_stats * ls,char * scratch,int * scratch_len)438 static enum logger_parse_entry_ret logger_thread_parse_entry(logentry *e, struct logger_stats *ls,
439 char *scratch, int *scratch_len) {
440 int total = 0;
441 const entry_details *d = &default_entries[e->event];
442 assert(d->parse_cb != NULL);
443 total = d->parse_cb(e, scratch);
444
445 if (total >= LOGGER_PARSE_SCRATCH || total <= 0) {
446 L_DEBUG("LOGGER: Failed to flatten log entry!\n");
447 return LOGGER_PARSE_ENTRY_FAILED;
448 } else {
449 *scratch_len = total;
450 }
451
452 return LOGGER_PARSE_ENTRY_OK;
453 }
454
455 /* Writes flattened entry to available watchers */
logger_thread_write_entry(logentry * e,struct logger_stats * ls,char * scratch,int scratch_len)456 static void logger_thread_write_entry(logentry *e, struct logger_stats *ls,
457 char *scratch, int scratch_len) {
458 int x, total;
459 /* Write the line into available watchers with matching flags */
460 for (x = 0; x < WATCHER_LIMIT; x++) {
461 logger_watcher *w = watchers[x];
462 char *skip_scr = NULL;
463 if (w == NULL || (e->eflags & w->eflags) == 0 || (e->gid < w->min_gid))
464 continue;
465
466 /* Avoid poll()'ing constantly when buffer is full by resetting a
467 * flag periodically.
468 */
469 while (!w->failed_flush &&
470 (skip_scr = (char *) bipbuf_request(w->buf, scratch_len + 128)) == NULL) {
471 if (logger_thread_poll_watchers(0, x) <= 0) {
472 L_DEBUG("LOGGER: Watcher had no free space for line of size (%d)\n", scratch_len + 128);
473 w->failed_flush = true;
474 }
475 }
476
477 if (w->failed_flush) {
478 L_DEBUG("LOGGER: Fast skipped for watcher [%d] due to failed_flush\n", w->sfd);
479 w->skipped++;
480 ls->watcher_skipped++;
481 continue;
482 }
483
484 if (w->skipped > 0) {
485 total = snprintf(skip_scr, 128, "skipped=%llu\n", (unsigned long long) w->skipped);
486 if (total >= 128 || total <= 0) {
487 L_DEBUG("LOGGER: Failed to flatten skipped message into watcher [%d]\n", w->sfd);
488 w->skipped++;
489 ls->watcher_skipped++;
490 continue;
491 }
492 bipbuf_push(w->buf, total);
493 w->skipped = 0;
494 }
495 /* Can't fail because bipbuf_request succeeded. */
496 bipbuf_offer(w->buf, (unsigned char *) scratch, scratch_len);
497 ls->watcher_sent++;
498 }
499 }
500
501 /* Called with logger stack locked.
502 * Releases every chunk associated with a watcher and closes the connection.
503 * We can't presently send a connection back to the worker for further
504 * processing.
505 */
logger_thread_close_watcher(logger_watcher * w)506 static void logger_thread_close_watcher(logger_watcher *w) {
507 L_DEBUG("LOGGER: Closing dead watcher\n");
508 watchers[w->id] = NULL;
509 sidethread_conn_close(w->c);
510 watcher_count--;
511 bipbuf_free(w->buf);
512 free(w);
513 logger_set_flags();
514 }
515
516 /* Reads a particular worker thread's available bipbuf bytes. Parses each log
517 * entry into the watcher buffers.
518 */
logger_thread_read(logger * l,struct logger_stats * ls)519 static int logger_thread_read(logger *l, struct logger_stats *ls) {
520 unsigned int size;
521 unsigned int pos = 0;
522 unsigned char *data;
523 char scratch[LOGGER_PARSE_SCRATCH];
524 logentry *e;
525 pthread_mutex_lock(&l->mutex);
526 data = bipbuf_peek_all(l->buf, &size);
527 pthread_mutex_unlock(&l->mutex);
528
529 if (data == NULL) {
530 return 0;
531 }
532 L_DEBUG("LOGGER: Got %d bytes from bipbuffer\n", size);
533
534 /* parse buffer */
535 while (pos < size && watcher_count > 0) {
536 enum logger_parse_entry_ret ret;
537 int scratch_len = 0;
538 e = (logentry *) (data + pos);
539 ret = logger_thread_parse_entry(e, ls, scratch, &scratch_len);
540 if (ret != LOGGER_PARSE_ENTRY_OK) {
541 /* TODO: stats counter */
542 fprintf(stderr, "LOGGER: Failed to parse log entry\n");
543 } else {
544 logger_thread_write_entry(e, ls, scratch, scratch_len);
545 }
546 pos += sizeof(logentry) + e->size + e->pad;
547 }
548 assert(pos <= size);
549
550 pthread_mutex_lock(&l->mutex);
551 data = bipbuf_poll(l->buf, size);
552 ls->worker_written += l->written;
553 ls->worker_dropped += l->dropped;
554 l->written = 0;
555 l->dropped = 0;
556 pthread_mutex_unlock(&l->mutex);
557 if (data == NULL) {
558 fprintf(stderr, "LOGGER: unexpectedly couldn't advance buf pointer\n");
559 assert(0);
560 }
561 return size; /* maybe the count of objects iterated? */
562 }
563
564 /* Since the event loop code isn't reusable without a refactor, and we have a
565 * limited number of potential watchers, we run our own poll loop.
566 * This calls poll() unnecessarily during write flushes, should be possible to
567 * micro-optimize later.
568 *
569 * This flushes buffers attached to watchers, iterating through the bytes set
570 * to each worker. Also checks for readability in case client connection was
571 * closed.
572 *
573 * Allows a specific watcher to be flushed (if buf full)
574 */
logger_thread_poll_watchers(int force_poll,int watcher)575 static int logger_thread_poll_watchers(int force_poll, int watcher) {
576 int x;
577 int nfd = 0;
578 unsigned char *data;
579 unsigned int data_size = 0;
580 int flushed = 0;
581
582 for (x = 0; x < WATCHER_LIMIT; x++) {
583 logger_watcher *w = watchers[x];
584 if (w == NULL || (watcher != WATCHER_ALL && x != watcher))
585 continue;
586
587 data = bipbuf_peek_all(w->buf, &data_size);
588 if (data != NULL) {
589 watchers_pollfds[nfd].fd = w->sfd;
590 watchers_pollfds[nfd].events = POLLOUT;
591 nfd++;
592 } else if (force_poll) {
593 watchers_pollfds[nfd].fd = w->sfd;
594 watchers_pollfds[nfd].events = POLLIN;
595 nfd++;
596 }
597 /* This gets set after a call to poll, and should be used to gate on
598 * calling poll again.
599 */
600 w->failed_flush = false;
601 }
602
603 if (nfd == 0)
604 return 0;
605
606 //L_DEBUG("LOGGER: calling poll() [data_size: %d]\n", data_size);
607 int ret = poll(watchers_pollfds, nfd, 0);
608
609 if (ret < 0) {
610 perror("something failed with logger thread watcher fd polling");
611 return -1;
612 }
613
614 nfd = 0;
615 for (x = 0; x < WATCHER_LIMIT; x++) {
616 logger_watcher *w = watchers[x];
617 if (w == NULL || (watcher != WATCHER_ALL && x != watcher))
618 continue;
619
620 data_size = 0;
621 /* Early detection of a disconnect. Otherwise we have to wait until
622 * the next write
623 */
624 if (watchers_pollfds[nfd].revents & POLLIN) {
625 char buf[1];
626 int res = ((conn*)w->c)->read(w->c, buf, 1);
627 if (res == 0 || (res == -1 && (errno != EAGAIN && errno != EWOULDBLOCK))) {
628 L_DEBUG("LOGGER: watcher closed remotely\n");
629 logger_thread_close_watcher(w);
630 nfd++;
631 continue;
632 }
633 }
634 if ((data = bipbuf_peek_all(w->buf, &data_size)) != NULL) {
635 if (watchers_pollfds[nfd].revents & (POLLHUP|POLLERR)) {
636 L_DEBUG("LOGGER: watcher closed during poll() call\n");
637 logger_thread_close_watcher(w);
638 } else if (watchers_pollfds[nfd].revents & POLLOUT) {
639 int total = 0;
640
641 /* We can write a bit. */
642 switch (w->t) {
643 case LOGGER_WATCHER_STDERR:
644 total = fwrite(data, 1, data_size, stderr);
645 break;
646 case LOGGER_WATCHER_CLIENT:
647 total = ((conn*)w->c)->write(w->c, data, data_size);
648 break;
649 }
650
651 L_DEBUG("LOGGER: poll() wrote %d to %d (data_size: %d) (bipbuf_used: %d)\n", total, w->sfd,
652 data_size, bipbuf_used(w->buf));
653 if (total == -1) {
654 if (errno != EAGAIN && errno != EWOULDBLOCK) {
655 logger_thread_close_watcher(w);
656 }
657 L_DEBUG("LOGGER: watcher hit EAGAIN\n");
658 } else if (total == 0) {
659 logger_thread_close_watcher(w);
660 } else {
661 bipbuf_poll(w->buf, total);
662 flushed += total;
663 }
664 }
665 }
666 nfd++;
667 }
668 return flushed;
669 }
670
logger_thread_flush_stats(struct logger_stats * ls)671 static void logger_thread_flush_stats(struct logger_stats *ls) {
672 STATS_LOCK();
673 stats.log_worker_dropped += ls->worker_dropped;
674 stats.log_worker_written += ls->worker_written;
675 stats.log_watcher_skipped += ls->watcher_skipped;
676 stats.log_watcher_sent += ls->watcher_sent;
677 stats_state.log_watchers = ls->watcher_count;
678 STATS_UNLOCK();
679 }
680
681 #define MAX_LOGGER_SLEEP 1000000
682 #define MIN_LOGGER_SLEEP 1000
683
684 /* Primary logger thread routine */
logger_thread(void * arg)685 static void *logger_thread(void *arg) {
686 useconds_t to_sleep = MIN_LOGGER_SLEEP;
687 L_DEBUG("LOGGER: Starting logger thread\n");
688 // TODO: If we ever have item references in the logger code, will need to
689 // ensure everything is dequeued before stopping the thread.
690 while (do_run_logger_thread) {
691 int found_logs = 0;
692 logger *l;
693 struct logger_stats ls;
694 memset(&ls, 0, sizeof(struct logger_stats));
695
696 /* only sleep if we're *above* the minimum */
697 if (to_sleep > MIN_LOGGER_SLEEP)
698 usleep(to_sleep);
699
700 /* Call function to iterate each logger. */
701 pthread_mutex_lock(&logger_stack_lock);
702 if (watcher_count == 0) {
703 // Not bothering to loop on the condition here since it's fine to
704 // walk through with zero watchers.
705 pthread_cond_wait(&logger_stack_cond, &logger_stack_lock);
706 }
707 for (l = logger_stack_head; l != NULL; l=l->next) {
708 /* lock logger, call function to manipulate it */
709 found_logs += logger_thread_read(l, &ls);
710 }
711
712 logger_thread_poll_watchers(1, WATCHER_ALL);
713
714 /* capture the current count within mutual exclusion of the lock */
715 ls.watcher_count = watcher_count;
716
717 pthread_mutex_unlock(&logger_stack_lock);
718
719 /* TODO: abstract into a function and share with lru_crawler */
720 if (!found_logs) {
721 if (to_sleep < MAX_LOGGER_SLEEP)
722 to_sleep += to_sleep / 8;
723 if (to_sleep > MAX_LOGGER_SLEEP)
724 to_sleep = MAX_LOGGER_SLEEP;
725 } else {
726 to_sleep /= 2;
727 if (to_sleep < MIN_LOGGER_SLEEP)
728 to_sleep = MIN_LOGGER_SLEEP;
729 }
730 logger_thread_flush_stats(&ls);
731 }
732
733 return NULL;
734 }
735
start_logger_thread(void)736 static int start_logger_thread(void) {
737 int ret;
738 do_run_logger_thread = 1;
739 if ((ret = pthread_create(&logger_tid, NULL,
740 logger_thread, NULL)) != 0) {
741 fprintf(stderr, "Can't start logger thread: %s\n", strerror(ret));
742 return -1;
743 }
744 return 0;
745 }
746
stop_logger_thread(void)747 static int stop_logger_thread(void) {
748 do_run_logger_thread = 0;
749 pthread_cond_signal(&logger_stack_cond);
750 pthread_join(logger_tid, NULL);
751 return 0;
752 }
753
754 /*************************
755 * Public functions for submitting logs and starting loggers from workers.
756 *************************/
757
758 /* Global logger thread start/init */
logger_init(void)759 void logger_init(void) {
760 /* TODO: auto destructor when threads exit */
761 /* TODO: error handling */
762
763 /* init stack for iterating loggers */
764 logger_stack_head = 0;
765 logger_stack_tail = 0;
766 pthread_key_create(&logger_key, NULL);
767
768 if (start_logger_thread() != 0) {
769 abort();
770 }
771
772 /* This is what adding a STDERR watcher looks like. should replace old
773 * "verbose" settings. */
774 //logger_add_watcher(NULL, 0);
775 return;
776 }
777
logger_stop(void)778 void logger_stop(void) {
779 stop_logger_thread();
780 }
781
782 /* called *from* the thread using a logger.
783 * initializes the per-thread bipbuf, links it into the list of loggers
784 */
logger_create(void)785 logger *logger_create(void) {
786 L_DEBUG("LOGGER: Creating and linking new logger instance\n");
787 logger *l = calloc(1, sizeof(logger));
788 if (l == NULL) {
789 return NULL;
790 }
791
792 l->buf = bipbuf_new(settings.logger_buf_size);
793 if (l->buf == NULL) {
794 free(l);
795 return NULL;
796 }
797
798 l->entry_map = default_entries;
799
800 pthread_mutex_init(&l->mutex, NULL);
801 pthread_setspecific(logger_key, l);
802
803 /* add to list of loggers */
804 logger_link_q(l);
805 return l;
806 }
807
808 /* Public function for logging an entry.
809 * Tries to encapsulate as much of the formatting as possible to simplify the
810 * caller's code.
811 */
logger_log(logger * l,const enum log_entry_type event,const void * entry,...)812 enum logger_ret_type logger_log(logger *l, const enum log_entry_type event, const void *entry, ...) {
813 bipbuf_t *buf = l->buf;
814 bool nospace = false;
815 va_list ap;
816 logentry *e;
817
818 const entry_details *d = &l->entry_map[event];
819 int reqlen = d->reqlen;
820
821 pthread_mutex_lock(&l->mutex);
822 /* Request a maximum length of data to write to */
823 e = (logentry *) bipbuf_request(buf, (sizeof(logentry) + reqlen));
824 if (e == NULL) {
825 pthread_mutex_unlock(&l->mutex);
826 l->dropped++;
827 return LOGGER_RET_NOSPACE;
828 }
829 e->event = event;
830 e->pad = 0;
831 e->gid = logger_get_gid();
832 /* TODO: Could pass this down as an argument now that we're using
833 * LOGGER_LOG() macro.
834 */
835 e->eflags = d->eflags;
836 /* Noting time isn't optional. A feature may be added to avoid rendering
837 * time and/or gid to a logger.
838 */
839 gettimeofday(&e->tv, NULL);
840
841 va_start(ap, entry);
842 d->log_cb(e, d, entry, ap);
843 va_end(ap);
844
845 #ifdef NEED_ALIGN
846 /* Need to ensure *next* request is aligned. */
847 if (sizeof(logentry) + e->size % 8 != 0) {
848 e->pad = 8 - (sizeof(logentry) + e->size % 8);
849 }
850 #endif
851
852 /* Push pointer forward by the actual amount required */
853 if (bipbuf_push(buf, (sizeof(logentry) + e->size + e->pad)) == 0) {
854 fprintf(stderr, "LOGGER: Failed to bipbuf push a text entry\n");
855 pthread_mutex_unlock(&l->mutex);
856 return LOGGER_RET_ERR;
857 }
858 l->written++;
859 L_DEBUG("LOGGER: Requested %d bytes, wrote %lu bytes\n", reqlen,
860 (sizeof(logentry) + e->size));
861
862 pthread_mutex_unlock(&l->mutex);
863
864 if (nospace) {
865 return LOGGER_RET_NOSPACE;
866 } else {
867 return LOGGER_RET_OK;
868 }
869 }
870
871 /* Passes a client connection socket from a primary worker thread to the
872 * logger thread. Caller *must* event_del() the client before handing it over.
873 * Presently there's no way to hand the client back to the worker thread.
874 */
logger_add_watcher(void * c,const int sfd,uint16_t f)875 enum logger_add_watcher_ret logger_add_watcher(void *c, const int sfd, uint16_t f) {
876 int x;
877 logger_watcher *w = NULL;
878 pthread_mutex_lock(&logger_stack_lock);
879 if (watcher_count >= WATCHER_LIMIT) {
880 pthread_mutex_unlock(&logger_stack_lock);
881 return LOGGER_ADD_WATCHER_TOO_MANY;
882 }
883
884 for (x = 0; x < WATCHER_LIMIT-1; x++) {
885 if (watchers[x] == NULL)
886 break;
887 }
888
889 w = calloc(1, sizeof(logger_watcher));
890 if (w == NULL) {
891 pthread_mutex_unlock(&logger_stack_lock);
892 return LOGGER_ADD_WATCHER_FAILED;
893 }
894 w->c = c;
895 w->sfd = sfd;
896 if (sfd == 0 && c == NULL) {
897 w->t = LOGGER_WATCHER_STDERR;
898 } else {
899 w->t = LOGGER_WATCHER_CLIENT;
900 }
901 w->id = x;
902 w->eflags = f;
903 w->min_gid = logger_get_gid();
904 w->buf = bipbuf_new(settings.logger_watcher_buf_size);
905 if (w->buf == NULL) {
906 free(w);
907 pthread_mutex_unlock(&logger_stack_lock);
908 return LOGGER_ADD_WATCHER_FAILED;
909 }
910 bipbuf_offer(w->buf, (unsigned char *) "OK\r\n", 4);
911
912 watchers[x] = w;
913 watcher_count++;
914 /* Update what flags the global logs will watch */
915 logger_set_flags();
916 pthread_cond_signal(&logger_stack_cond);
917
918 pthread_mutex_unlock(&logger_stack_lock);
919 return LOGGER_ADD_WATCHER_OK;
920 }
921