1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * memcached - memory caching daemon
4 *
5 * https://www.memcached.org/
6 *
7 * Copyright 2003 Danga Interactive, Inc. All rights reserved.
8 *
9 * Use and distribution licensed under the BSD license. See
10 * the LICENSE file for full text.
11 *
12 * Authors:
13 * Anatoly Vorobey <mellon@pobox.com>
14 * Brad Fitzpatrick <brad@danga.com>
15 */
16 #include "memcached.h"
17 #include "storage.h"
18 #include "authfile.h"
19 #include "restart.h"
20 #include <sys/stat.h>
21 #include <sys/socket.h>
22 #include <sys/un.h>
23 #include <signal.h>
24 #include <sys/param.h>
25 #include <sys/resource.h>
26 #include <sys/uio.h>
27 #include <ctype.h>
28 #include <stdarg.h>
29
30 /* some POSIX systems need the following definition
31 * to get mlockall flags out of sys/mman.h. */
32 #ifndef _P1003_1B_VISIBLE
33 #define _P1003_1B_VISIBLE
34 #endif
35 #include <pwd.h>
36 #include <sys/mman.h>
37 #include <fcntl.h>
38 #include <netinet/tcp.h>
39 #include <arpa/inet.h>
40 #include <errno.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <time.h>
45 #include <assert.h>
46 #include <sysexits.h>
47 #include <stddef.h>
48
49 #ifdef HAVE_GETOPT_LONG
50 #include <getopt.h>
51 #endif
52
53 #ifdef TLS
54 #include "tls.h"
55 #endif
56
57 #include "proto_text.h"
58 #include "proto_bin.h"
59
60 #if defined(__FreeBSD__)
61 #include <sys/sysctl.h>
62 #endif
63
64 /*
65 * forward declarations
66 */
67 static void drive_machine(conn *c);
68 static int new_socket(struct addrinfo *ai);
69 static ssize_t tcp_read(conn *arg, void *buf, size_t count);
70 static ssize_t tcp_sendmsg(conn *arg, struct msghdr *msg, int flags);
71 static ssize_t tcp_write(conn *arg, void *buf, size_t count);
72
73 enum try_read_result {
74 READ_DATA_RECEIVED,
75 READ_NO_DATA_RECEIVED,
76 READ_ERROR, /** an error occurred (on the socket) (or client closed connection) */
77 READ_MEMORY_ERROR /** failed to allocate more memory */
78 };
79
80 static int try_read_command_negotiate(conn *c);
81 static int try_read_command_udp(conn *c);
82
83 static enum try_read_result try_read_network(conn *c);
84 static enum try_read_result try_read_udp(conn *c);
85
86 static int start_conn_timeout_thread();
87
88
89 /* stats */
90 static void stats_init(void);
91 static void conn_to_str(const conn *c, char *addr, char *svr_addr);
92
93 /* defaults */
94 static void settings_init(void);
95
96 /* event handling, network IO */
97 static void event_handler(const evutil_socket_t fd, const short which, void *arg);
98 static void conn_close(conn *c);
99 static void conn_init(void);
100 static bool update_event(conn *c, const int new_flags);
101 static void complete_nread(conn *c);
102
103 static void conn_free(conn *c);
104
105 /** exported globals **/
106 struct stats stats;
107 struct stats_state stats_state;
108 struct settings settings;
109 time_t process_started; /* when the process was started */
110 conn **conns;
111
112 struct slab_rebalance slab_rebal;
113 volatile int slab_rebalance_signal;
114 #ifdef EXTSTORE
115 /* hoping this is temporary; I'd prefer to cut globals, but will complete this
116 * battle another day.
117 */
118 void *ext_storage = NULL;
119 #endif
120 /** file scope variables **/
121 static conn *listen_conn = NULL;
122 static int max_fds;
123 static struct event_base *main_base;
124
125 enum transmit_result {
126 TRANSMIT_COMPLETE, /** All done writing. */
127 TRANSMIT_INCOMPLETE, /** More data remaining to write. */
128 TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */
129 TRANSMIT_HARD_ERROR /** Can't write (c->state is set to conn_closing) */
130 };
131
132 /* Default methods to read from/ write to a socket */
tcp_read(conn * c,void * buf,size_t count)133 ssize_t tcp_read(conn *c, void *buf, size_t count) {
134 assert (c != NULL);
135 return read(c->sfd, buf, count);
136 }
137
tcp_sendmsg(conn * c,struct msghdr * msg,int flags)138 ssize_t tcp_sendmsg(conn *c, struct msghdr *msg, int flags) {
139 assert (c != NULL);
140 return sendmsg(c->sfd, msg, flags);
141 }
142
tcp_write(conn * c,void * buf,size_t count)143 ssize_t tcp_write(conn *c, void *buf, size_t count) {
144 assert (c != NULL);
145 return write(c->sfd, buf, count);
146 }
147
148 static enum transmit_result transmit(conn *c);
149
150 /* This reduces the latency without adding lots of extra wiring to be able to
151 * notify the listener thread of when to listen again.
152 * Also, the clock timer could be broken out into its own thread and we
153 * can block the listener via a condition.
154 */
155 static volatile bool allow_new_conns = true;
156 static int stop_main_loop = NOT_STOP;
157 static struct event maxconnsevent;
maxconns_handler(const evutil_socket_t fd,const short which,void * arg)158 static void maxconns_handler(const evutil_socket_t fd, const short which, void *arg) {
159 struct timeval t = {.tv_sec = 0, .tv_usec = 10000};
160
161 if (fd == -42 || allow_new_conns == false) {
162 /* reschedule in 10ms if we need to keep polling */
163 evtimer_set(&maxconnsevent, maxconns_handler, 0);
164 event_base_set(main_base, &maxconnsevent);
165 evtimer_add(&maxconnsevent, &t);
166 } else {
167 evtimer_del(&maxconnsevent);
168 accept_new_conns(true);
169 }
170 }
171
172 /*
173 * given time value that's either unix time or delta from current unix time, return
174 * unix time. Use the fact that delta can't exceed one month (and real time value can't
175 * be that low).
176 */
realtime(const time_t exptime)177 rel_time_t realtime(const time_t exptime) {
178 /* no. of seconds in 30 days - largest possible delta exptime */
179
180 if (exptime == 0) return 0; /* 0 means never expire */
181
182 if (exptime > REALTIME_MAXDELTA) {
183 /* if item expiration is at/before the server started, give it an
184 expiration time of 1 second after the server started.
185 (because 0 means don't expire). without this, we'd
186 underflow and wrap around to some large value way in the
187 future, effectively making items expiring in the past
188 really expiring never */
189 if (exptime <= process_started)
190 return (rel_time_t)1;
191 return (rel_time_t)(exptime - process_started);
192 } else {
193 return (rel_time_t)(exptime + current_time);
194 }
195 }
196
stats_init(void)197 static void stats_init(void) {
198 memset(&stats, 0, sizeof(struct stats));
199 memset(&stats_state, 0, sizeof(struct stats_state));
200 stats_state.accepting_conns = true; /* assuming we start in this state. */
201
202 /* make the time we started always be 2 seconds before we really
203 did, so time(0) - time.started is never zero. if so, things
204 like 'settings.oldest_live' which act as booleans as well as
205 values are now false in boolean context... */
206 process_started = time(0) - ITEM_UPDATE_INTERVAL - 2;
207 stats_prefix_init(settings.prefix_delimiter);
208 }
209
stats_reset(void)210 void stats_reset(void) {
211 STATS_LOCK();
212 memset(&stats, 0, sizeof(struct stats));
213 stats_prefix_clear();
214 STATS_UNLOCK();
215 threadlocal_stats_reset();
216 item_stats_reset();
217 }
218
settings_init(void)219 static void settings_init(void) {
220 settings.use_cas = true;
221 settings.access = 0700;
222 settings.port = 11211;
223 settings.udpport = 0;
224 #ifdef TLS
225 settings.ssl_enabled = false;
226 settings.ssl_ctx = NULL;
227 settings.ssl_chain_cert = NULL;
228 settings.ssl_key = NULL;
229 settings.ssl_verify_mode = SSL_VERIFY_NONE;
230 settings.ssl_keyformat = SSL_FILETYPE_PEM;
231 settings.ssl_ciphers = NULL;
232 settings.ssl_ca_cert = NULL;
233 settings.ssl_last_cert_refresh_time = current_time;
234 settings.ssl_wbuf_size = 16 * 1024; // default is 16KB (SSL max frame size is 17KB)
235 settings.ssl_session_cache = false;
236 settings.ssl_min_version = TLS1_2_VERSION;
237 #endif
238 /* By default this string should be NULL for getaddrinfo() */
239 settings.inter = NULL;
240 settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
241 settings.maxconns = 1024; /* to limit connections-related memory to about 5MB */
242 settings.verbose = 0;
243 settings.oldest_live = 0;
244 settings.oldest_cas = 0; /* supplements accuracy of oldest_live */
245 settings.evict_to_free = 1; /* push old items out of cache when memory runs out */
246 settings.socketpath = NULL; /* by default, not using a unix socket */
247 settings.auth_file = NULL; /* by default, not using ASCII authentication tokens */
248 settings.factor = 1.25;
249 settings.chunk_size = 48; /* space for a modest key and value */
250 settings.num_threads = 4; /* N workers */
251 settings.num_threads_per_udp = 0;
252 settings.prefix_delimiter = ':';
253 settings.detail_enabled = 0;
254 settings.reqs_per_event = 20;
255 settings.backlog = 1024;
256 settings.binding_protocol = negotiating_prot;
257 settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
258 settings.slab_page_size = 1024 * 1024; /* chunks are split from 1MB pages. */
259 settings.slab_chunk_size_max = settings.slab_page_size / 2;
260 settings.sasl = false;
261 settings.maxconns_fast = true;
262 settings.lru_crawler = false;
263 settings.lru_crawler_sleep = 100;
264 settings.lru_crawler_tocrawl = 0;
265 settings.lru_maintainer_thread = false;
266 settings.lru_segmented = true;
267 settings.hot_lru_pct = 20;
268 settings.warm_lru_pct = 40;
269 settings.hot_max_factor = 0.2;
270 settings.warm_max_factor = 2.0;
271 settings.temp_lru = false;
272 settings.temporary_ttl = 61;
273 settings.idle_timeout = 0; /* disabled */
274 settings.hashpower_init = 0;
275 settings.slab_reassign = true;
276 settings.slab_automove = 1;
277 settings.slab_automove_ratio = 0.8;
278 settings.slab_automove_window = 30;
279 settings.shutdown_command = false;
280 settings.tail_repair_time = TAIL_REPAIR_TIME_DEFAULT;
281 settings.flush_enabled = true;
282 settings.dump_enabled = true;
283 settings.crawls_persleep = 1000;
284 settings.logger_watcher_buf_size = LOGGER_WATCHER_BUF_SIZE;
285 settings.logger_buf_size = LOGGER_BUF_SIZE;
286 settings.drop_privileges = false;
287 settings.watch_enabled = true;
288 settings.read_buf_mem_limit = 0;
289 #ifdef MEMCACHED_DEBUG
290 settings.relaxed_privileges = false;
291 #endif
292 settings.num_napi_ids = 0;
293 settings.memory_file = NULL;
294 }
295
296 extern pthread_mutex_t conn_lock;
297
298 /* Connection timeout thread bits */
299 static pthread_t conn_timeout_tid;
300 static int do_run_conn_timeout_thread;
301 static pthread_cond_t conn_timeout_cond = PTHREAD_COND_INITIALIZER;
302 static pthread_mutex_t conn_timeout_lock = PTHREAD_MUTEX_INITIALIZER;
303
304 #define CONNS_PER_SLICE 100
conn_timeout_thread(void * arg)305 static void *conn_timeout_thread(void *arg) {
306 int i;
307 conn *c;
308 rel_time_t oldest_last_cmd;
309 int sleep_time;
310 int sleep_slice = max_fds / CONNS_PER_SLICE;
311 if (sleep_slice == 0)
312 sleep_slice = CONNS_PER_SLICE;
313
314 useconds_t timeslice = 1000000 / sleep_slice;
315
316 mutex_lock(&conn_timeout_lock);
317 while(do_run_conn_timeout_thread) {
318 if (settings.verbose > 2)
319 fprintf(stderr, "idle timeout thread at top of connection list\n");
320
321 oldest_last_cmd = current_time;
322
323 for (i = 0; i < max_fds; i++) {
324 if ((i % CONNS_PER_SLICE) == 0) {
325 if (settings.verbose > 2)
326 fprintf(stderr, "idle timeout thread sleeping for %ulus\n",
327 (unsigned int)timeslice);
328 usleep(timeslice);
329 }
330
331 if (!conns[i])
332 continue;
333
334 c = conns[i];
335
336 if (!IS_TCP(c->transport))
337 continue;
338
339 if (c->state != conn_new_cmd && c->state != conn_read)
340 continue;
341
342 if ((current_time - c->last_cmd_time) > settings.idle_timeout) {
343 timeout_conn(c);
344 } else {
345 if (c->last_cmd_time < oldest_last_cmd)
346 oldest_last_cmd = c->last_cmd_time;
347 }
348 }
349
350 /* This is the soonest we could have another connection time out */
351 sleep_time = settings.idle_timeout - (current_time - oldest_last_cmd) + 1;
352 if (sleep_time <= 0)
353 sleep_time = 1;
354
355 if (settings.verbose > 2)
356 fprintf(stderr,
357 "idle timeout thread finished pass, sleeping for %ds\n",
358 sleep_time);
359
360 struct timeval now;
361 struct timespec to_sleep;
362 gettimeofday(&now, NULL);
363 to_sleep.tv_sec = now.tv_sec + sleep_time;
364 to_sleep.tv_nsec = 0;
365
366 pthread_cond_timedwait(&conn_timeout_cond, &conn_timeout_lock, &to_sleep);
367 }
368
369 mutex_unlock(&conn_timeout_lock);
370 return NULL;
371 }
372
start_conn_timeout_thread()373 static int start_conn_timeout_thread() {
374 int ret;
375
376 if (settings.idle_timeout == 0)
377 return -1;
378
379 do_run_conn_timeout_thread = 1;
380 if ((ret = pthread_create(&conn_timeout_tid, NULL,
381 conn_timeout_thread, NULL)) != 0) {
382 fprintf(stderr, "Can't create idle connection timeout thread: %s\n",
383 strerror(ret));
384 return -1;
385 }
386
387 return 0;
388 }
389
stop_conn_timeout_thread(void)390 int stop_conn_timeout_thread(void) {
391 if (!do_run_conn_timeout_thread)
392 return -1;
393 mutex_lock(&conn_timeout_lock);
394 do_run_conn_timeout_thread = 0;
395 pthread_cond_signal(&conn_timeout_cond);
396 mutex_unlock(&conn_timeout_lock);
397 pthread_join(conn_timeout_tid, NULL);
398 return 0;
399 }
400
401 /*
402 * read buffer cache helper functions
403 */
rbuf_release(conn * c)404 static void rbuf_release(conn *c) {
405 if (c->rbuf != NULL && c->rbytes == 0 && !IS_UDP(c->transport)) {
406 if (c->rbuf_malloced) {
407 free(c->rbuf);
408 c->rbuf_malloced = false;
409 } else {
410 do_cache_free(c->thread->rbuf_cache, c->rbuf);
411 }
412 c->rsize = 0;
413 c->rbuf = NULL;
414 c->rcurr = NULL;
415 }
416 }
417
rbuf_alloc(conn * c)418 static bool rbuf_alloc(conn *c) {
419 if (c->rbuf == NULL) {
420 c->rbuf = do_cache_alloc(c->thread->rbuf_cache);
421 if (!c->rbuf) {
422 THR_STATS_LOCK(c);
423 c->thread->stats.read_buf_oom++;
424 THR_STATS_UNLOCK(c);
425 return false;
426 }
427 c->rsize = READ_BUFFER_SIZE;
428 c->rcurr = c->rbuf;
429 }
430 return true;
431 }
432
433 // Just for handling huge ASCII multigets.
434 // The previous system was essentially the same; realloc'ing until big enough,
435 // then realloc'ing back down after the request finished.
rbuf_switch_to_malloc(conn * c)436 bool rbuf_switch_to_malloc(conn *c) {
437 // Might as well start with x2 and work from there.
438 size_t size = c->rsize * 2;
439 char *tmp = malloc(size);
440 if (!tmp)
441 return false;
442
443 do_cache_free(c->thread->rbuf_cache, c->rbuf);
444 memcpy(tmp, c->rcurr, c->rbytes);
445
446 c->rcurr = c->rbuf = tmp;
447 c->rsize = size;
448 c->rbuf_malloced = true;
449 return true;
450 }
451
452 /*
453 * Initializes the connections array. We don't actually allocate connection
454 * structures until they're needed, so as to avoid wasting memory when the
455 * maximum connection count is much higher than the actual number of
456 * connections.
457 *
458 * This does end up wasting a few pointers' worth of memory for FDs that are
459 * used for things other than connections, but that's worth it in exchange for
460 * being able to directly index the conns array by FD.
461 */
conn_init(void)462 static void conn_init(void) {
463 /* We're unlikely to see an FD much higher than maxconns. */
464 int next_fd = dup(1);
465 if (next_fd < 0) {
466 perror("Failed to duplicate file descriptor\n");
467 exit(1);
468 }
469 int headroom = 10; /* account for extra unexpected open FDs */
470 struct rlimit rl;
471
472 max_fds = settings.maxconns + headroom + next_fd;
473
474 /* But if possible, get the actual highest FD we can possibly ever see. */
475 if (getrlimit(RLIMIT_NOFILE, &rl) == 0) {
476 max_fds = rl.rlim_max;
477 } else {
478 fprintf(stderr, "Failed to query maximum file descriptor; "
479 "falling back to maxconns\n");
480 }
481
482 close(next_fd);
483
484 if ((conns = calloc(max_fds, sizeof(conn *))) == NULL) {
485 fprintf(stderr, "Failed to allocate connection structures\n");
486 /* This is unrecoverable so bail out early. */
487 exit(1);
488 }
489 }
490
prot_text(enum protocol prot)491 static const char *prot_text(enum protocol prot) {
492 char *rv = "unknown";
493 switch(prot) {
494 case ascii_prot:
495 rv = "ascii";
496 break;
497 case binary_prot:
498 rv = "binary";
499 break;
500 case negotiating_prot:
501 rv = "auto-negotiate";
502 break;
503 }
504 return rv;
505 }
506
conn_close_idle(conn * c)507 void conn_close_idle(conn *c) {
508 if (settings.idle_timeout > 0 &&
509 (current_time - c->last_cmd_time) > settings.idle_timeout) {
510 if (c->state != conn_new_cmd && c->state != conn_read) {
511 if (settings.verbose > 1)
512 fprintf(stderr,
513 "fd %d wants to timeout, but isn't in read state", c->sfd);
514 return;
515 }
516
517 if (settings.verbose > 1)
518 fprintf(stderr, "Closing idle fd %d\n", c->sfd);
519
520 pthread_mutex_lock(&c->thread->stats.mutex);
521 c->thread->stats.idle_kicks++;
522 pthread_mutex_unlock(&c->thread->stats.mutex);
523
524 c->close_reason = IDLE_TIMEOUT_CLOSE;
525
526 conn_set_state(c, conn_closing);
527 drive_machine(c);
528 }
529 }
530
531 /* bring conn back from a sidethread. could have had its event base moved. */
conn_worker_readd(conn * c)532 void conn_worker_readd(conn *c) {
533 if (c->state == conn_io_queue) {
534 c->io_queues_submitted--;
535 // If we're still waiting for other queues to return, don't re-add the
536 // connection yet.
537 if (c->io_queues_submitted != 0) {
538 return;
539 }
540 }
541 c->ev_flags = EV_READ | EV_PERSIST;
542 event_set(&c->event, c->sfd, c->ev_flags, event_handler, (void *)c);
543 event_base_set(c->thread->base, &c->event);
544
545 // TODO: call conn_cleanup/fail/etc
546 if (event_add(&c->event, 0) == -1) {
547 perror("event_add");
548 }
549
550 // side thread wanted us to close immediately.
551 if (c->state == conn_closing) {
552 drive_machine(c);
553 return;
554 } else if (c->state == conn_io_queue) {
555 // machine will know how to return based on secondary state.
556 drive_machine(c);
557 } else {
558 conn_set_state(c, conn_new_cmd);
559 }
560 }
561
thread_io_queue_add(LIBEVENT_THREAD * t,int type,void * ctx,io_queue_stack_cb cb,io_queue_stack_cb com_cb,io_queue_cb ret_cb,io_queue_cb fin_cb)562 void thread_io_queue_add(LIBEVENT_THREAD *t, int type, void *ctx, io_queue_stack_cb cb, io_queue_stack_cb com_cb, io_queue_cb ret_cb, io_queue_cb fin_cb) {
563 io_queue_cb_t *q = t->io_queues;
564 while (q->type != IO_QUEUE_NONE) {
565 q++;
566 }
567 q->type = type;
568 q->ctx = ctx;
569 q->submit_cb = cb;
570 q->complete_cb = com_cb;
571 q->finalize_cb = fin_cb;
572 q->return_cb = ret_cb;
573 return;
574 }
575
conn_io_queue_setup(conn * c)576 void conn_io_queue_setup(conn *c) {
577 io_queue_cb_t *qcb = c->thread->io_queues;
578 io_queue_t *q = c->io_queues;
579 while (qcb->type != IO_QUEUE_NONE) {
580 q->type = qcb->type;
581 q->ctx = qcb->ctx;
582 q->stack_ctx = NULL;
583 q->count = 0;
584 qcb++;
585 q++;
586 }
587 }
588
589 // To be called from conn_release_items to ensure the stack ptrs are reset.
conn_io_queue_reset(conn * c)590 static void conn_io_queue_reset(conn *c) {
591 for (io_queue_t *q = c->io_queues; q->type != IO_QUEUE_NONE; q++) {
592 assert(q->count == 0);
593 q->stack_ctx = NULL;
594 }
595 }
596
thread_io_queue_get(LIBEVENT_THREAD * t,int type)597 io_queue_cb_t *thread_io_queue_get(LIBEVENT_THREAD *t, int type) {
598 io_queue_cb_t *q = t->io_queues;
599 while (q->type != IO_QUEUE_NONE) {
600 if (q->type == type) {
601 return q;
602 }
603 q++;
604 }
605 return NULL;
606 }
607
conn_io_queue_get(conn * c,int type)608 io_queue_t *conn_io_queue_get(conn *c, int type) {
609 io_queue_t *q = c->io_queues;
610 while (q->type != IO_QUEUE_NONE) {
611 if (q->type == type) {
612 return q;
613 }
614 q++;
615 }
616 return NULL;
617 }
618
619 // called after returning to the main worker thread.
620 // users of the queue need to distinguish if the IO was actually consumed or
621 // not and handle appropriately.
conn_io_queue_complete(conn * c)622 static void conn_io_queue_complete(conn *c) {
623 io_queue_t *q = c->io_queues;
624 io_queue_cb_t *qcb = c->thread->io_queues;
625 while (q->type != IO_QUEUE_NONE) {
626 if (q->stack_ctx) {
627 qcb->complete_cb(q);
628 }
629 qcb++;
630 q++;
631 }
632 }
633
634 // called to return a single IO object to the original worker thread.
conn_io_queue_return(io_pending_t * io)635 void conn_io_queue_return(io_pending_t *io) {
636 io_queue_cb_t *q = thread_io_queue_get(io->thread, io->io_queue_type);
637 q->return_cb(io);
638 return;
639 }
640
conn_new(const int sfd,enum conn_states init_state,const int event_flags,const int read_buffer_size,enum network_transport transport,struct event_base * base,void * ssl)641 conn *conn_new(const int sfd, enum conn_states init_state,
642 const int event_flags,
643 const int read_buffer_size, enum network_transport transport,
644 struct event_base *base, void *ssl) {
645 conn *c;
646
647 assert(sfd >= 0 && sfd < max_fds);
648 c = conns[sfd];
649
650 if (NULL == c) {
651 if (!(c = (conn *)calloc(1, sizeof(conn)))) {
652 STATS_LOCK();
653 stats.malloc_fails++;
654 STATS_UNLOCK();
655 fprintf(stderr, "Failed to allocate connection object\n");
656 return NULL;
657 }
658 MEMCACHED_CONN_CREATE(c);
659 c->read = NULL;
660 c->sendmsg = NULL;
661 c->write = NULL;
662 c->rbuf = NULL;
663
664 c->rsize = read_buffer_size;
665
666 // UDP connections use a persistent static buffer.
667 if (c->rsize) {
668 c->rbuf = (char *)malloc((size_t)c->rsize);
669 }
670
671 if (c->rsize && c->rbuf == NULL) {
672 conn_free(c);
673 STATS_LOCK();
674 stats.malloc_fails++;
675 STATS_UNLOCK();
676 fprintf(stderr, "Failed to allocate buffers for connection\n");
677 return NULL;
678 }
679
680
681 STATS_LOCK();
682 stats_state.conn_structs++;
683 STATS_UNLOCK();
684
685 c->sfd = sfd;
686 conns[sfd] = c;
687 }
688
689 c->transport = transport;
690 c->protocol = settings.binding_protocol;
691
692 /* unix socket mode doesn't need this, so zeroed out. but why
693 * is this done for every command? presumably for UDP
694 * mode. */
695 if (!settings.socketpath) {
696 c->request_addr_size = sizeof(c->request_addr);
697 } else {
698 c->request_addr_size = 0;
699 }
700
701 if (transport == tcp_transport && init_state == conn_new_cmd) {
702 if (getpeername(sfd, (struct sockaddr *) &c->request_addr,
703 &c->request_addr_size)) {
704 perror("getpeername");
705 memset(&c->request_addr, 0, sizeof(c->request_addr));
706 }
707 }
708
709 if (init_state == conn_new_cmd) {
710 LOGGER_LOG(NULL, LOG_CONNEVENTS, LOGGER_CONNECTION_NEW, NULL,
711 &c->request_addr, c->request_addr_size, c->transport, 0, sfd);
712 }
713
714 if (settings.verbose > 1) {
715 if (init_state == conn_listening) {
716 fprintf(stderr, "<%d server listening (%s)\n", sfd,
717 prot_text(c->protocol));
718 } else if IS_UDP(transport) {
719 fprintf(stderr, "<%d server listening (udp)\n", sfd);
720 } else if (c->protocol == negotiating_prot) {
721 fprintf(stderr, "<%d new auto-negotiating client connection\n",
722 sfd);
723 } else if (c->protocol == ascii_prot) {
724 fprintf(stderr, "<%d new ascii client connection.\n", sfd);
725 } else if (c->protocol == binary_prot) {
726 fprintf(stderr, "<%d new binary client connection.\n", sfd);
727 } else {
728 fprintf(stderr, "<%d new unknown (%d) client connection\n",
729 sfd, c->protocol);
730 assert(false);
731 }
732 }
733
734 #ifdef TLS
735 c->ssl = NULL;
736 c->ssl_wbuf = NULL;
737 c->ssl_enabled = false;
738 #endif
739 c->state = init_state;
740 c->rlbytes = 0;
741 c->cmd = -1;
742 c->rbytes = 0;
743 c->rcurr = c->rbuf;
744 c->ritem = 0;
745 c->rbuf_malloced = false;
746 c->item_malloced = false;
747 c->sasl_started = false;
748 c->set_stale = false;
749 c->mset_res = false;
750 c->close_after_write = false;
751 c->last_cmd_time = current_time; /* initialize for idle kicker */
752 // wipe all queues.
753 memset(c->io_queues, 0, sizeof(c->io_queues));
754 c->io_queues_submitted = 0;
755
756 c->item = 0;
757
758 c->noreply = false;
759
760 #ifdef TLS
761 if (ssl) {
762 c->ssl = (SSL*)ssl;
763 c->read = ssl_read;
764 c->sendmsg = ssl_sendmsg;
765 c->write = ssl_write;
766 c->ssl_enabled = true;
767 SSL_set_info_callback(c->ssl, ssl_callback);
768 } else
769 #else
770 // This must be NULL if TLS is not enabled.
771 assert(ssl == NULL);
772 #endif
773 {
774 c->read = tcp_read;
775 c->sendmsg = tcp_sendmsg;
776 c->write = tcp_write;
777 }
778
779 if IS_UDP(transport) {
780 c->try_read_command = try_read_command_udp;
781 } else {
782 switch (c->protocol) {
783 case ascii_prot:
784 if (settings.auth_file == NULL) {
785 c->authenticated = true;
786 c->try_read_command = try_read_command_ascii;
787 } else {
788 c->authenticated = false;
789 c->try_read_command = try_read_command_asciiauth;
790 }
791 break;
792 case binary_prot:
793 // binprot handles its own authentication via SASL parsing.
794 c->authenticated = false;
795 c->try_read_command = try_read_command_binary;
796 break;
797 case negotiating_prot:
798 c->try_read_command = try_read_command_negotiate;
799 break;
800 }
801 }
802
803 event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
804 event_base_set(base, &c->event);
805 c->ev_flags = event_flags;
806
807 if (event_add(&c->event, 0) == -1) {
808 perror("event_add");
809 return NULL;
810 }
811
812 STATS_LOCK();
813 stats_state.curr_conns++;
814 stats.total_conns++;
815 STATS_UNLOCK();
816
817 MEMCACHED_CONN_ALLOCATE(c->sfd);
818
819 return c;
820 }
821
conn_release_items(conn * c)822 void conn_release_items(conn *c) {
823 assert(c != NULL);
824
825 if (c->item) {
826 if (c->item_malloced) {
827 free(c->item);
828 c->item_malloced = false;
829 } else {
830 item_remove(c->item);
831 }
832 c->item = 0;
833 }
834
835 // Cull any unsent responses.
836 if (c->resp_head) {
837 mc_resp *resp = c->resp_head;
838 // r_f() handles the chain maintenance.
839 while (resp) {
840 // temporary by default. hide behind a debug flag in the future:
841 // double free detection. Transmit loops can drop out early, but
842 // here we could infinite loop.
843 if (resp->free) {
844 fprintf(stderr, "ERROR: double free detected during conn_release_items(): [%d] [%s]\n",
845 c->sfd, c->protocol == binary_prot ? "binary" : "ascii");
846 // Since this is a critical failure, just leak the memory.
847 // If these errors are seen, an abort() can be used instead.
848 c->resp_head = NULL;
849 c->resp = NULL;
850 break;
851 }
852 resp = resp_finish(c, resp);
853 }
854 conn_io_queue_reset(c);
855 }
856 }
857
conn_cleanup(conn * c)858 static void conn_cleanup(conn *c) {
859 assert(c != NULL);
860
861 conn_release_items(c);
862
863 if (c->sasl_conn) {
864 assert(settings.sasl);
865 sasl_dispose(&c->sasl_conn);
866 c->sasl_conn = NULL;
867 }
868
869 if IS_UDP(c->transport) {
870 conn_set_state(c, conn_read);
871 }
872 }
873
874 /*
875 * Frees a connection.
876 */
conn_free(conn * c)877 void conn_free(conn *c) {
878 if (c) {
879 assert(c != NULL);
880 assert(c->sfd >= 0 && c->sfd < max_fds);
881
882 MEMCACHED_CONN_DESTROY(c);
883 conns[c->sfd] = NULL;
884 if (c->rbuf)
885 free(c->rbuf);
886 #ifdef TLS
887 if (c->ssl_wbuf)
888 c->ssl_wbuf = NULL;
889 #endif
890
891 free(c);
892 }
893 }
894
conn_close(conn * c)895 static void conn_close(conn *c) {
896 assert(c != NULL);
897
898 if (c->thread) {
899 LOGGER_LOG(c->thread->l, LOG_CONNEVENTS, LOGGER_CONNECTION_CLOSE, NULL,
900 &c->request_addr, c->request_addr_size, c->transport,
901 c->close_reason, c->sfd);
902 }
903
904 /* delete the event, the socket and the conn */
905 event_del(&c->event);
906
907 if (settings.verbose > 1)
908 fprintf(stderr, "<%d connection closed.\n", c->sfd);
909
910 conn_cleanup(c);
911
912 // force release of read buffer.
913 if (c->thread) {
914 c->rbytes = 0;
915 rbuf_release(c);
916 }
917
918 MEMCACHED_CONN_RELEASE(c->sfd);
919 conn_set_state(c, conn_closed);
920 #ifdef TLS
921 if (c->ssl) {
922 SSL_shutdown(c->ssl);
923 SSL_free(c->ssl);
924 }
925 #endif
926 close(c->sfd);
927 c->close_reason = 0;
928 pthread_mutex_lock(&conn_lock);
929 allow_new_conns = true;
930 pthread_mutex_unlock(&conn_lock);
931
932 STATS_LOCK();
933 stats_state.curr_conns--;
934 STATS_UNLOCK();
935
936 return;
937 }
938
939 // Since some connections might be off on side threads and some are managed as
940 // listeners we need to walk through them all from a central point.
941 // Must be called with all worker threads hung or in the process of closing.
conn_close_all(void)942 void conn_close_all(void) {
943 int i;
944 for (i = 0; i < max_fds; i++) {
945 if (conns[i] && conns[i]->state != conn_closed) {
946 conn_close(conns[i]);
947 }
948 }
949 }
950
951 /**
952 * Convert a state name to a human readable form.
953 */
state_text(enum conn_states state)954 static const char *state_text(enum conn_states state) {
955 const char* const statenames[] = { "conn_listening",
956 "conn_new_cmd",
957 "conn_waiting",
958 "conn_read",
959 "conn_parse_cmd",
960 "conn_write",
961 "conn_nread",
962 "conn_swallow",
963 "conn_closing",
964 "conn_mwrite",
965 "conn_closed",
966 "conn_watch",
967 "conn_io_queue" };
968 return statenames[state];
969 }
970
971 /*
972 * Sets a connection's current state in the state machine. Any special
973 * processing that needs to happen on certain state transitions can
974 * happen here.
975 */
conn_set_state(conn * c,enum conn_states state)976 void conn_set_state(conn *c, enum conn_states state) {
977 assert(c != NULL);
978 assert(state >= conn_listening && state < conn_max_state);
979
980 if (state != c->state) {
981 if (settings.verbose > 2) {
982 fprintf(stderr, "%d: going from %s to %s\n",
983 c->sfd, state_text(c->state),
984 state_text(state));
985 }
986
987 if (state == conn_write || state == conn_mwrite) {
988 MEMCACHED_PROCESS_COMMAND_END(c->sfd, c->resp->wbuf, c->resp->wbytes);
989 }
990 c->state = state;
991 }
992 }
993
994 /*
995 * response object helper functions
996 */
resp_reset(mc_resp * resp)997 void resp_reset(mc_resp *resp) {
998 if (resp->item) {
999 item_remove(resp->item);
1000 resp->item = NULL;
1001 }
1002 if (resp->write_and_free) {
1003 free(resp->write_and_free);
1004 resp->write_and_free = NULL;
1005 }
1006 resp->wbytes = 0;
1007 resp->tosend = 0;
1008 resp->iovcnt = 0;
1009 resp->chunked_data_iov = 0;
1010 resp->chunked_total = 0;
1011 resp->skip = false;
1012 }
1013
resp_add_iov(mc_resp * resp,const void * buf,int len)1014 void resp_add_iov(mc_resp *resp, const void *buf, int len) {
1015 assert(resp->iovcnt < MC_RESP_IOVCOUNT);
1016 int x = resp->iovcnt;
1017 resp->iov[x].iov_base = (void *)buf;
1018 resp->iov[x].iov_len = len;
1019 resp->iovcnt++;
1020 resp->tosend += len;
1021 }
1022
1023 // Notes that an IOV should be handled as a chunked item header.
1024 // TODO: I'm hoping this isn't a permanent abstraction while I learn what the
1025 // API should be.
resp_add_chunked_iov(mc_resp * resp,const void * buf,int len)1026 void resp_add_chunked_iov(mc_resp *resp, const void *buf, int len) {
1027 resp->chunked_data_iov = resp->iovcnt;
1028 resp->chunked_total = len;
1029 resp_add_iov(resp, buf, len);
1030 }
1031
1032 // resp_allocate and resp_free are a wrapper around read buffers which makes
1033 // read buffers the only network memory to track.
1034 // Normally this would be too excessive. In this case it allows end users to
1035 // track a single memory limit for ephemeral connection buffers.
1036 // Fancy bit twiddling tricks are avoided to help keep this straightforward.
resp_allocate(conn * c)1037 static mc_resp* resp_allocate(conn *c) {
1038 LIBEVENT_THREAD *th = c->thread;
1039 mc_resp *resp = NULL;
1040 mc_resp_bundle *b = th->open_bundle;
1041
1042 if (b != NULL) {
1043 for (int i = 0; i < MAX_RESP_PER_BUNDLE; i++) {
1044 // loop around starting from the most likely to be free
1045 int x = (i + b->next_check) % MAX_RESP_PER_BUNDLE;
1046 if (b->r[x].free) {
1047 resp = &b->r[x];
1048 b->next_check = x+1;
1049 break;
1050 }
1051 }
1052
1053 if (resp != NULL) {
1054 b->refcount++;
1055 resp->free = false;
1056 if (b->refcount == MAX_RESP_PER_BUNDLE) {
1057 assert(b->prev == NULL);
1058 // We only allocate off the head. Assign new head.
1059 th->open_bundle = b->next;
1060 // Remove ourselves from the list.
1061 if (b->next) {
1062 b->next->prev = 0;
1063 b->next = 0;
1064 }
1065 }
1066 }
1067 }
1068
1069 if (resp == NULL) {
1070 assert(th->open_bundle == NULL);
1071 b = do_cache_alloc(th->rbuf_cache);
1072 if (b) {
1073 THR_STATS_LOCK(c);
1074 c->thread->stats.response_obj_bytes += READ_BUFFER_SIZE;
1075 THR_STATS_UNLOCK(c);
1076 b->next_check = 1;
1077 b->refcount = 1;
1078 for (int i = 0; i < MAX_RESP_PER_BUNDLE; i++) {
1079 b->r[i].bundle = b;
1080 b->r[i].free = true;
1081 }
1082 b->next = 0;
1083 b->prev = 0;
1084 th->open_bundle = b;
1085 resp = &b->r[0];
1086 resp->free = false;
1087 } else {
1088 return NULL;
1089 }
1090 }
1091
1092 return resp;
1093 }
1094
resp_free(conn * c,mc_resp * resp)1095 static void resp_free(conn *c, mc_resp *resp) {
1096 LIBEVENT_THREAD *th = c->thread;
1097 mc_resp_bundle *b = resp->bundle;
1098
1099 resp->free = true;
1100 b->refcount--;
1101 if (b->refcount == 0) {
1102 if (b == th->open_bundle && b->next == 0) {
1103 // This is the final bundle. Just hold and reuse to skip init loop
1104 assert(b->prev == 0);
1105 b->next_check = 0;
1106 } else {
1107 // Assert that we're either in the list or at the head.
1108 assert((b->next || b->prev) || b == th->open_bundle);
1109
1110 // unlink from list.
1111 mc_resp_bundle **head = &th->open_bundle;
1112 if (*head == b) *head = b->next;
1113 // Not tracking the tail.
1114 assert(b->next != b && b->prev != b);
1115
1116 if (b->next) b->next->prev = b->prev;
1117 if (b->prev) b->prev->next = b->next;
1118
1119 // Now completely done with this buffer.
1120 do_cache_free(th->rbuf_cache, b);
1121 THR_STATS_LOCK(c);
1122 c->thread->stats.response_obj_bytes -= READ_BUFFER_SIZE;
1123 THR_STATS_UNLOCK(c);
1124 }
1125 } else {
1126 mc_resp_bundle **head = &th->open_bundle;
1127 // NOTE: since we're not tracking tail, latest free ends up in head.
1128 if (b == th->open_bundle || (b->prev || b->next)) {
1129 // If we're already linked, leave it in place to save CPU.
1130 } else {
1131 // Non-zero refcount, need to link into the freelist.
1132 b->prev = 0;
1133 b->next = *head;
1134 if (b->next) b->next->prev = b;
1135 *head = b;
1136 }
1137
1138 }
1139 }
1140
resp_start(conn * c)1141 bool resp_start(conn *c) {
1142 mc_resp *resp = resp_allocate(c);
1143 if (!resp) {
1144 THR_STATS_LOCK(c);
1145 c->thread->stats.response_obj_oom++;
1146 THR_STATS_UNLOCK(c);
1147 return false;
1148 }
1149 // handling the stats counters here to simplify testing
1150 THR_STATS_LOCK(c);
1151 c->thread->stats.response_obj_count++;
1152 THR_STATS_UNLOCK(c);
1153 // Skip zeroing the bundle pointer at the start.
1154 // TODO: this line is here temporarily to make the code easy to disable.
1155 // when it's more mature, move the memset into resp_allocate() and have it
1156 // set the bundle pointer on allocate so this line isn't as complex.
1157 memset((char *)resp + sizeof(mc_resp_bundle*), 0, sizeof(*resp) - sizeof(mc_resp_bundle*));
1158 // TODO: this next line works. memset _does_ show up significantly under
1159 // perf reports due to zeroing out the entire resp->wbuf. before swapping
1160 // the lines more validation work should be done to ensure wbuf's aren't
1161 // accidentally reused without being written to.
1162 //memset((char *)resp + sizeof(mc_resp_bundle*), 0, offsetof(mc_resp, wbuf));
1163 if (!c->resp_head) {
1164 c->resp_head = resp;
1165 }
1166 if (!c->resp) {
1167 c->resp = resp;
1168 } else {
1169 c->resp->next = resp;
1170 c->resp = resp;
1171 }
1172 if IS_UDP(c->transport) {
1173 // need to hold on to some data for async responses.
1174 c->resp->request_id = c->request_id;
1175 c->resp->request_addr = c->request_addr;
1176 c->resp->request_addr_size = c->request_addr_size;
1177 }
1178 return true;
1179 }
1180
1181 // returns next response in chain.
resp_finish(conn * c,mc_resp * resp)1182 mc_resp* resp_finish(conn *c, mc_resp *resp) {
1183 mc_resp *next = resp->next;
1184 if (resp->item) {
1185 // TODO: cache hash value in resp obj?
1186 item_remove(resp->item);
1187 resp->item = NULL;
1188 }
1189 if (resp->write_and_free) {
1190 free(resp->write_and_free);
1191 }
1192 if (resp->io_pending) {
1193 // If we had a pending IO, tell it to internally clean up then return
1194 // the main object back to our thread cache.
1195 io_queue_cb_t *qcb = thread_io_queue_get(c->thread, resp->io_pending->io_queue_type);
1196 qcb->finalize_cb(resp->io_pending);
1197 do_cache_free(c->thread->io_cache, resp->io_pending);
1198 resp->io_pending = NULL;
1199 }
1200 if (c->resp_head == resp) {
1201 c->resp_head = next;
1202 }
1203 if (c->resp == resp) {
1204 c->resp = NULL;
1205 }
1206 resp_free(c, resp);
1207 THR_STATS_LOCK(c);
1208 c->thread->stats.response_obj_count--;
1209 THR_STATS_UNLOCK(c);
1210 return next;
1211 }
1212
1213 // tells if connection has a depth of response objects to process.
resp_has_stack(conn * c)1214 bool resp_has_stack(conn *c) {
1215 return c->resp_head->next != NULL ? true : false;
1216 }
1217
out_string(conn * c,const char * str)1218 void out_string(conn *c, const char *str) {
1219 size_t len;
1220 assert(c != NULL);
1221 mc_resp *resp = c->resp;
1222
1223 // if response was original filled with something, but we're now writing
1224 // out an error or similar, have to reset the object first.
1225 // TODO: since this is often redundant with allocation, how many callers
1226 // are actually requiring it be reset? Can we fast test by just looking at
1227 // tosend and reset if nonzero?
1228 resp_reset(resp);
1229
1230 if (c->noreply) {
1231 // TODO: just invalidate the response since nothing's been attempted
1232 // to send yet?
1233 resp->skip = true;
1234 if (settings.verbose > 1)
1235 fprintf(stderr, ">%d NOREPLY %s\n", c->sfd, str);
1236 conn_set_state(c, conn_new_cmd);
1237 return;
1238 }
1239
1240 if (settings.verbose > 1)
1241 fprintf(stderr, ">%d %s\n", c->sfd, str);
1242
1243 // Fill response object with static string.
1244
1245 len = strlen(str);
1246 if ((len + 2) > WRITE_BUFFER_SIZE) {
1247 /* ought to be always enough. just fail for simplicity */
1248 str = "SERVER_ERROR output line too long";
1249 len = strlen(str);
1250 }
1251
1252 memcpy(resp->wbuf, str, len);
1253 memcpy(resp->wbuf + len, "\r\n", 2);
1254 resp_add_iov(resp, resp->wbuf, len + 2);
1255
1256 conn_set_state(c, conn_new_cmd);
1257 return;
1258 }
1259
1260 // For metaget-style ASCII commands. Ignores noreply, ensuring clients see
1261 // protocol level errors.
out_errstring(conn * c,const char * str)1262 void out_errstring(conn *c, const char *str) {
1263 c->noreply = false;
1264 out_string(c, str);
1265 }
1266
1267 /*
1268 * Outputs a protocol-specific "out of memory" error. For ASCII clients,
1269 * this is equivalent to out_string().
1270 */
out_of_memory(conn * c,char * ascii_error)1271 void out_of_memory(conn *c, char *ascii_error) {
1272 const static char error_prefix[] = "SERVER_ERROR ";
1273 const static int error_prefix_len = sizeof(error_prefix) - 1;
1274
1275 if (c->protocol == binary_prot) {
1276 /* Strip off the generic error prefix; it's irrelevant in binary */
1277 if (!strncmp(ascii_error, error_prefix, error_prefix_len)) {
1278 ascii_error += error_prefix_len;
1279 }
1280 write_bin_error(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, ascii_error, 0);
1281 } else {
1282 out_string(c, ascii_error);
1283 }
1284 }
1285
append_bin_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1286 static void append_bin_stats(const char *key, const uint16_t klen,
1287 const char *val, const uint32_t vlen,
1288 conn *c) {
1289 char *buf = c->stats.buffer + c->stats.offset;
1290 uint32_t bodylen = klen + vlen;
1291 protocol_binary_response_header header = {
1292 .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
1293 .response.opcode = PROTOCOL_BINARY_CMD_STAT,
1294 .response.keylen = (uint16_t)htons(klen),
1295 .response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES,
1296 .response.bodylen = htonl(bodylen),
1297 .response.opaque = c->opaque
1298 };
1299
1300 memcpy(buf, header.bytes, sizeof(header.response));
1301 buf += sizeof(header.response);
1302
1303 if (klen > 0) {
1304 memcpy(buf, key, klen);
1305 buf += klen;
1306
1307 if (vlen > 0) {
1308 memcpy(buf, val, vlen);
1309 }
1310 }
1311
1312 c->stats.offset += sizeof(header.response) + bodylen;
1313 }
1314
append_ascii_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1315 static void append_ascii_stats(const char *key, const uint16_t klen,
1316 const char *val, const uint32_t vlen,
1317 conn *c) {
1318 char *pos = c->stats.buffer + c->stats.offset;
1319 uint32_t nbytes = 0;
1320 int remaining = c->stats.size - c->stats.offset;
1321 int room = remaining - 1;
1322
1323 if (klen == 0 && vlen == 0) {
1324 nbytes = snprintf(pos, room, "END\r\n");
1325 } else if (vlen == 0) {
1326 nbytes = snprintf(pos, room, "STAT %s\r\n", key);
1327 } else {
1328 nbytes = snprintf(pos, room, "STAT %s %s\r\n", key, val);
1329 }
1330
1331 c->stats.offset += nbytes;
1332 }
1333
grow_stats_buf(conn * c,size_t needed)1334 static bool grow_stats_buf(conn *c, size_t needed) {
1335 size_t nsize = c->stats.size;
1336 size_t available = nsize - c->stats.offset;
1337 bool rv = true;
1338
1339 /* Special case: No buffer -- need to allocate fresh */
1340 if (c->stats.buffer == NULL) {
1341 nsize = 1024;
1342 available = c->stats.size = c->stats.offset = 0;
1343 }
1344
1345 while (needed > available) {
1346 assert(nsize > 0);
1347 nsize = nsize << 1;
1348 available = nsize - c->stats.offset;
1349 }
1350
1351 if (nsize != c->stats.size) {
1352 char *ptr = realloc(c->stats.buffer, nsize);
1353 if (ptr) {
1354 c->stats.buffer = ptr;
1355 c->stats.size = nsize;
1356 } else {
1357 STATS_LOCK();
1358 stats.malloc_fails++;
1359 STATS_UNLOCK();
1360 rv = false;
1361 }
1362 }
1363
1364 return rv;
1365 }
1366
append_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,const void * cookie)1367 void append_stats(const char *key, const uint16_t klen,
1368 const char *val, const uint32_t vlen,
1369 const void *cookie)
1370 {
1371 /* value without a key is invalid */
1372 if (klen == 0 && vlen > 0) {
1373 return;
1374 }
1375
1376 conn *c = (conn*)cookie;
1377
1378 if (c->protocol == binary_prot) {
1379 size_t needed = vlen + klen + sizeof(protocol_binary_response_header);
1380 if (!grow_stats_buf(c, needed)) {
1381 return;
1382 }
1383 append_bin_stats(key, klen, val, vlen, c);
1384 } else {
1385 size_t needed = vlen + klen + 10; // 10 == "STAT = \r\n"
1386 if (!grow_stats_buf(c, needed)) {
1387 return;
1388 }
1389 append_ascii_stats(key, klen, val, vlen, c);
1390 }
1391
1392 assert(c->stats.offset <= c->stats.size);
1393 }
1394
reset_cmd_handler(conn * c)1395 static void reset_cmd_handler(conn *c) {
1396 c->cmd = -1;
1397 c->substate = bin_no_state;
1398 if (c->item != NULL) {
1399 // TODO: Any other way to get here?
1400 // SASL auth was mistakenly using it. Nothing else should?
1401 if (c->item_malloced) {
1402 free(c->item);
1403 c->item_malloced = false;
1404 } else {
1405 item_remove(c->item);
1406 }
1407 c->item = NULL;
1408 }
1409 if (c->rbytes > 0) {
1410 conn_set_state(c, conn_parse_cmd);
1411 } else if (c->resp_head) {
1412 conn_set_state(c, conn_mwrite);
1413 } else {
1414 conn_set_state(c, conn_waiting);
1415 }
1416 }
1417
complete_nread(conn * c)1418 static void complete_nread(conn *c) {
1419 assert(c != NULL);
1420 assert(c->protocol == ascii_prot
1421 || c->protocol == binary_prot);
1422
1423 if (c->protocol == ascii_prot) {
1424 complete_nread_ascii(c);
1425 } else if (c->protocol == binary_prot) {
1426 complete_nread_binary(c);
1427 }
1428 }
1429
1430 /* Destination must always be chunked */
1431 /* This should be part of item.c */
_store_item_copy_chunks(item * d_it,item * s_it,const int len)1432 static int _store_item_copy_chunks(item *d_it, item *s_it, const int len) {
1433 item_chunk *dch = (item_chunk *) ITEM_schunk(d_it);
1434 /* Advance dch until we find free space */
1435 while (dch->size == dch->used) {
1436 if (dch->next) {
1437 dch = dch->next;
1438 } else {
1439 break;
1440 }
1441 }
1442
1443 if (s_it->it_flags & ITEM_CHUNKED) {
1444 int remain = len;
1445 item_chunk *sch = (item_chunk *) ITEM_schunk(s_it);
1446 int copied = 0;
1447 /* Fills dch's to capacity, not straight copy sch in case data is
1448 * being added or removed (ie append/prepend)
1449 */
1450 while (sch && dch && remain) {
1451 assert(dch->used <= dch->size);
1452 int todo = (dch->size - dch->used < sch->used - copied)
1453 ? dch->size - dch->used : sch->used - copied;
1454 if (remain < todo)
1455 todo = remain;
1456 memcpy(dch->data + dch->used, sch->data + copied, todo);
1457 dch->used += todo;
1458 copied += todo;
1459 remain -= todo;
1460 assert(dch->used <= dch->size);
1461 if (dch->size == dch->used) {
1462 item_chunk *tch = do_item_alloc_chunk(dch, remain);
1463 if (tch) {
1464 dch = tch;
1465 } else {
1466 return -1;
1467 }
1468 }
1469 assert(copied <= sch->used);
1470 if (copied == sch->used) {
1471 copied = 0;
1472 sch = sch->next;
1473 }
1474 }
1475 /* assert that the destination had enough space for the source */
1476 assert(remain == 0);
1477 } else {
1478 int done = 0;
1479 /* Fill dch's via a non-chunked item. */
1480 while (len > done && dch) {
1481 int todo = (dch->size - dch->used < len - done)
1482 ? dch->size - dch->used : len - done;
1483 //assert(dch->size - dch->used != 0);
1484 memcpy(dch->data + dch->used, ITEM_data(s_it) + done, todo);
1485 done += todo;
1486 dch->used += todo;
1487 assert(dch->used <= dch->size);
1488 if (dch->size == dch->used) {
1489 item_chunk *tch = do_item_alloc_chunk(dch, len - done);
1490 if (tch) {
1491 dch = tch;
1492 } else {
1493 return -1;
1494 }
1495 }
1496 }
1497 assert(len == done);
1498 }
1499 return 0;
1500 }
1501
_store_item_copy_data(int comm,item * old_it,item * new_it,item * add_it)1502 static int _store_item_copy_data(int comm, item *old_it, item *new_it, item *add_it) {
1503 if (comm == NREAD_APPEND) {
1504 if (new_it->it_flags & ITEM_CHUNKED) {
1505 if (_store_item_copy_chunks(new_it, old_it, old_it->nbytes - 2) == -1 ||
1506 _store_item_copy_chunks(new_it, add_it, add_it->nbytes) == -1) {
1507 return -1;
1508 }
1509 } else {
1510 memcpy(ITEM_data(new_it), ITEM_data(old_it), old_it->nbytes);
1511 memcpy(ITEM_data(new_it) + old_it->nbytes - 2 /* CRLF */, ITEM_data(add_it), add_it->nbytes);
1512 }
1513 } else {
1514 /* NREAD_PREPEND */
1515 if (new_it->it_flags & ITEM_CHUNKED) {
1516 if (_store_item_copy_chunks(new_it, add_it, add_it->nbytes - 2) == -1 ||
1517 _store_item_copy_chunks(new_it, old_it, old_it->nbytes) == -1) {
1518 return -1;
1519 }
1520 } else {
1521 memcpy(ITEM_data(new_it), ITEM_data(add_it), add_it->nbytes);
1522 memcpy(ITEM_data(new_it) + add_it->nbytes - 2 /* CRLF */, ITEM_data(old_it), old_it->nbytes);
1523 }
1524 }
1525 return 0;
1526 }
1527
1528 /*
1529 * Stores an item in the cache according to the semantics of one of the set
1530 * commands. Protected by the item lock.
1531 *
1532 * Returns the state of storage.
1533 */
do_store_item(item * it,int comm,conn * c,const uint32_t hv)1534 enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t hv) {
1535 char *key = ITEM_key(it);
1536 item *old_it = do_item_get(key, it->nkey, hv, c, DONT_UPDATE);
1537 enum store_item_type stored = NOT_STORED;
1538
1539 enum cas_result { CAS_NONE, CAS_MATCH, CAS_BADVAL, CAS_STALE, CAS_MISS };
1540
1541 item *new_it = NULL;
1542 uint32_t flags;
1543
1544 /* Do the CAS test up front so we can apply to all store modes */
1545 enum cas_result cas_res = CAS_NONE;
1546
1547 bool do_store = false;
1548 if (old_it != NULL) {
1549 // Most of the CAS work requires something to compare to.
1550 uint64_t it_cas = ITEM_get_cas(it);
1551 uint64_t old_cas = ITEM_get_cas(old_it);
1552 if (it_cas == 0) {
1553 cas_res = CAS_NONE;
1554 } else if (it_cas == old_cas) {
1555 cas_res = CAS_MATCH;
1556 } else if (c->set_stale && it_cas < old_cas) {
1557 cas_res = CAS_STALE;
1558 } else {
1559 cas_res = CAS_BADVAL;
1560 }
1561
1562 switch (comm) {
1563 case NREAD_ADD:
1564 /* add only adds a nonexistent item, but promote to head of LRU */
1565 do_item_update(old_it);
1566 break;
1567 case NREAD_CAS:
1568 if (cas_res == CAS_MATCH) {
1569 // cas validates
1570 // it and old_it may belong to different classes.
1571 // I'm updating the stats for the one that's getting pushed out
1572 pthread_mutex_lock(&c->thread->stats.mutex);
1573 c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_hits++;
1574 pthread_mutex_unlock(&c->thread->stats.mutex);
1575 do_store = true;
1576 } else if (cas_res == CAS_STALE) {
1577 // if we're allowed to set a stale value, CAS must be lower than
1578 // the current item's CAS.
1579 // This replaces the value, but should preserve TTL, and stale
1580 // item marker bit + token sent if exists.
1581 it->exptime = old_it->exptime;
1582 it->it_flags |= ITEM_STALE;
1583 if (old_it->it_flags & ITEM_TOKEN_SENT) {
1584 it->it_flags |= ITEM_TOKEN_SENT;
1585 }
1586
1587 pthread_mutex_lock(&c->thread->stats.mutex);
1588 c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_hits++;
1589 pthread_mutex_unlock(&c->thread->stats.mutex);
1590 do_store = true;
1591 } else {
1592 // NONE or BADVAL are the same for CAS cmd
1593 pthread_mutex_lock(&c->thread->stats.mutex);
1594 c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_badval++;
1595 pthread_mutex_unlock(&c->thread->stats.mutex);
1596
1597 if (settings.verbose > 1) {
1598 fprintf(stderr, "CAS: failure: expected %llu, got %llu\n",
1599 (unsigned long long)ITEM_get_cas(old_it),
1600 (unsigned long long)ITEM_get_cas(it));
1601 }
1602 stored = EXISTS;
1603 }
1604 break;
1605 case NREAD_APPEND:
1606 case NREAD_PREPEND:
1607 if (cas_res != CAS_NONE && cas_res != CAS_MATCH) {
1608 stored = EXISTS;
1609 break;
1610 }
1611 #ifdef EXTSTORE
1612 if ((old_it->it_flags & ITEM_HDR) != 0) {
1613 /* block append/prepend from working with extstore-d items.
1614 * leave response code to NOT_STORED default */
1615 break;
1616 }
1617 #endif
1618 /* we have it and old_it here - alloc memory to hold both */
1619 FLAGS_CONV(old_it, flags);
1620 new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */);
1621
1622 // OOM trying to copy.
1623 if (new_it == NULL)
1624 break;
1625 /* copy data from it and old_it to new_it */
1626 if (_store_item_copy_data(comm, old_it, new_it, it) == -1) {
1627 // failed data copy
1628 break;
1629 } else {
1630 // refcount of new_it is 1 here. will end up 2 after link.
1631 // it's original ref is managed outside of this function
1632 it = new_it;
1633 do_store = true;
1634 }
1635 break;
1636 case NREAD_REPLACE:
1637 case NREAD_SET:
1638 do_store = true;
1639 break;
1640 }
1641
1642 if (do_store) {
1643 STORAGE_delete(c->thread->storage, old_it);
1644 item_replace(old_it, it, hv);
1645 stored = STORED;
1646 }
1647
1648 do_item_remove(old_it); /* release our reference */
1649 if (new_it != NULL) {
1650 // append/prepend end up with an extra reference for new_it.
1651 do_item_remove(new_it);
1652 }
1653 } else {
1654 /* No pre-existing item to replace or compare to. */
1655 if (ITEM_get_cas(it) != 0) {
1656 /* Asked for a CAS match but nothing to compare it to. */
1657 cas_res = CAS_MISS;
1658 }
1659
1660 switch (comm) {
1661 case NREAD_ADD:
1662 case NREAD_SET:
1663 do_store = true;
1664 break;
1665 case NREAD_CAS:
1666 // LRU expired
1667 stored = NOT_FOUND;
1668 pthread_mutex_lock(&c->thread->stats.mutex);
1669 c->thread->stats.cas_misses++;
1670 pthread_mutex_unlock(&c->thread->stats.mutex);
1671 break;
1672 case NREAD_REPLACE:
1673 case NREAD_APPEND:
1674 case NREAD_PREPEND:
1675 /* Requires an existing item. */
1676 break;
1677 }
1678
1679 if (do_store) {
1680 do_item_link(it, hv);
1681 stored = STORED;
1682 }
1683 }
1684
1685 if (stored == STORED) {
1686 c->cas = ITEM_get_cas(it);
1687 }
1688 LOGGER_LOG(c->thread->l, LOG_MUTATIONS, LOGGER_ITEM_STORE, NULL,
1689 stored, comm, ITEM_key(it), it->nkey, it->nbytes, it->exptime,
1690 ITEM_clsid(it), c->sfd);
1691
1692 return stored;
1693 }
1694
1695 /* set up a connection to write a buffer then free it, used for stats */
write_and_free(conn * c,char * buf,int bytes)1696 void write_and_free(conn *c, char *buf, int bytes) {
1697 if (buf) {
1698 mc_resp *resp = c->resp;
1699 resp->write_and_free = buf;
1700 resp_add_iov(resp, buf, bytes);
1701 conn_set_state(c, conn_new_cmd);
1702 } else {
1703 out_of_memory(c, "SERVER_ERROR out of memory writing stats");
1704 }
1705 }
1706
append_stat(const char * name,ADD_STAT add_stats,conn * c,const char * fmt,...)1707 void append_stat(const char *name, ADD_STAT add_stats, conn *c,
1708 const char *fmt, ...) {
1709 char val_str[STAT_VAL_LEN];
1710 int vlen;
1711 va_list ap;
1712
1713 assert(name);
1714 assert(add_stats);
1715 assert(c);
1716 assert(fmt);
1717
1718 va_start(ap, fmt);
1719 vlen = vsnprintf(val_str, sizeof(val_str) - 1, fmt, ap);
1720 va_end(ap);
1721
1722 add_stats(name, strlen(name), val_str, vlen, c);
1723 }
1724
1725 /* return server specific stats only */
server_stats(ADD_STAT add_stats,conn * c)1726 void server_stats(ADD_STAT add_stats, conn *c) {
1727 pid_t pid = getpid();
1728 rel_time_t now = current_time;
1729
1730 struct thread_stats thread_stats;
1731 threadlocal_stats_aggregate(&thread_stats);
1732 struct slab_stats slab_stats;
1733 slab_stats_aggregate(&thread_stats, &slab_stats);
1734 #ifndef WIN32
1735 struct rusage usage;
1736 getrusage(RUSAGE_SELF, &usage);
1737 #endif /* !WIN32 */
1738
1739 STATS_LOCK();
1740
1741 APPEND_STAT("pid", "%lu", (long)pid);
1742 APPEND_STAT("uptime", "%u", now - ITEM_UPDATE_INTERVAL);
1743 APPEND_STAT("time", "%ld", now + (long)process_started);
1744 APPEND_STAT("version", "%s", VERSION);
1745 APPEND_STAT("libevent", "%s", event_get_version());
1746 APPEND_STAT("pointer_size", "%d", (int)(8 * sizeof(void *)));
1747
1748 #ifndef WIN32
1749 append_stat("rusage_user", add_stats, c, "%ld.%06ld",
1750 (long)usage.ru_utime.tv_sec,
1751 (long)usage.ru_utime.tv_usec);
1752 append_stat("rusage_system", add_stats, c, "%ld.%06ld",
1753 (long)usage.ru_stime.tv_sec,
1754 (long)usage.ru_stime.tv_usec);
1755 #endif /* !WIN32 */
1756
1757 APPEND_STAT("max_connections", "%d", settings.maxconns);
1758 APPEND_STAT("curr_connections", "%llu", (unsigned long long)stats_state.curr_conns - 1);
1759 APPEND_STAT("total_connections", "%llu", (unsigned long long)stats.total_conns);
1760 if (settings.maxconns_fast) {
1761 APPEND_STAT("rejected_connections", "%llu", (unsigned long long)stats.rejected_conns);
1762 }
1763 APPEND_STAT("connection_structures", "%u", stats_state.conn_structs);
1764 APPEND_STAT("response_obj_oom", "%llu", (unsigned long long)thread_stats.response_obj_oom);
1765 APPEND_STAT("response_obj_count", "%llu", (unsigned long long)thread_stats.response_obj_count);
1766 APPEND_STAT("response_obj_bytes", "%llu", (unsigned long long)thread_stats.response_obj_bytes);
1767 APPEND_STAT("read_buf_count", "%llu", (unsigned long long)thread_stats.read_buf_count);
1768 APPEND_STAT("read_buf_bytes", "%llu", (unsigned long long)thread_stats.read_buf_bytes);
1769 APPEND_STAT("read_buf_bytes_free", "%llu", (unsigned long long)thread_stats.read_buf_bytes_free);
1770 APPEND_STAT("read_buf_oom", "%llu", (unsigned long long)thread_stats.read_buf_oom);
1771 APPEND_STAT("reserved_fds", "%u", stats_state.reserved_fds);
1772 APPEND_STAT("cmd_get", "%llu", (unsigned long long)thread_stats.get_cmds);
1773 APPEND_STAT("cmd_set", "%llu", (unsigned long long)slab_stats.set_cmds);
1774 APPEND_STAT("cmd_flush", "%llu", (unsigned long long)thread_stats.flush_cmds);
1775 APPEND_STAT("cmd_touch", "%llu", (unsigned long long)thread_stats.touch_cmds);
1776 APPEND_STAT("cmd_meta", "%llu", (unsigned long long)thread_stats.meta_cmds);
1777 APPEND_STAT("get_hits", "%llu", (unsigned long long)slab_stats.get_hits);
1778 APPEND_STAT("get_misses", "%llu", (unsigned long long)thread_stats.get_misses);
1779 APPEND_STAT("get_expired", "%llu", (unsigned long long)thread_stats.get_expired);
1780 APPEND_STAT("get_flushed", "%llu", (unsigned long long)thread_stats.get_flushed);
1781 #ifdef EXTSTORE
1782 if (c->thread->storage) {
1783 APPEND_STAT("get_extstore", "%llu", (unsigned long long)thread_stats.get_extstore);
1784 APPEND_STAT("get_aborted_extstore", "%llu", (unsigned long long)thread_stats.get_aborted_extstore);
1785 APPEND_STAT("get_oom_extstore", "%llu", (unsigned long long)thread_stats.get_oom_extstore);
1786 APPEND_STAT("recache_from_extstore", "%llu", (unsigned long long)thread_stats.recache_from_extstore);
1787 APPEND_STAT("miss_from_extstore", "%llu", (unsigned long long)thread_stats.miss_from_extstore);
1788 APPEND_STAT("badcrc_from_extstore", "%llu", (unsigned long long)thread_stats.badcrc_from_extstore);
1789 }
1790 #endif
1791 APPEND_STAT("delete_misses", "%llu", (unsigned long long)thread_stats.delete_misses);
1792 APPEND_STAT("delete_hits", "%llu", (unsigned long long)slab_stats.delete_hits);
1793 APPEND_STAT("incr_misses", "%llu", (unsigned long long)thread_stats.incr_misses);
1794 APPEND_STAT("incr_hits", "%llu", (unsigned long long)slab_stats.incr_hits);
1795 APPEND_STAT("decr_misses", "%llu", (unsigned long long)thread_stats.decr_misses);
1796 APPEND_STAT("decr_hits", "%llu", (unsigned long long)slab_stats.decr_hits);
1797 APPEND_STAT("cas_misses", "%llu", (unsigned long long)thread_stats.cas_misses);
1798 APPEND_STAT("cas_hits", "%llu", (unsigned long long)slab_stats.cas_hits);
1799 APPEND_STAT("cas_badval", "%llu", (unsigned long long)slab_stats.cas_badval);
1800 APPEND_STAT("touch_hits", "%llu", (unsigned long long)slab_stats.touch_hits);
1801 APPEND_STAT("touch_misses", "%llu", (unsigned long long)thread_stats.touch_misses);
1802 APPEND_STAT("auth_cmds", "%llu", (unsigned long long)thread_stats.auth_cmds);
1803 APPEND_STAT("auth_errors", "%llu", (unsigned long long)thread_stats.auth_errors);
1804 if (settings.idle_timeout) {
1805 APPEND_STAT("idle_kicks", "%llu", (unsigned long long)thread_stats.idle_kicks);
1806 }
1807 APPEND_STAT("bytes_read", "%llu", (unsigned long long)thread_stats.bytes_read);
1808 APPEND_STAT("bytes_written", "%llu", (unsigned long long)thread_stats.bytes_written);
1809 APPEND_STAT("limit_maxbytes", "%llu", (unsigned long long)settings.maxbytes);
1810 APPEND_STAT("accepting_conns", "%u", stats_state.accepting_conns);
1811 APPEND_STAT("listen_disabled_num", "%llu", (unsigned long long)stats.listen_disabled_num);
1812 APPEND_STAT("time_in_listen_disabled_us", "%llu", stats.time_in_listen_disabled_us);
1813 APPEND_STAT("threads", "%d", settings.num_threads);
1814 APPEND_STAT("conn_yields", "%llu", (unsigned long long)thread_stats.conn_yields);
1815 APPEND_STAT("hash_power_level", "%u", stats_state.hash_power_level);
1816 APPEND_STAT("hash_bytes", "%llu", (unsigned long long)stats_state.hash_bytes);
1817 APPEND_STAT("hash_is_expanding", "%u", stats_state.hash_is_expanding);
1818 if (settings.slab_reassign) {
1819 APPEND_STAT("slab_reassign_rescues", "%llu", stats.slab_reassign_rescues);
1820 APPEND_STAT("slab_reassign_chunk_rescues", "%llu", stats.slab_reassign_chunk_rescues);
1821 APPEND_STAT("slab_reassign_evictions_nomem", "%llu", stats.slab_reassign_evictions_nomem);
1822 APPEND_STAT("slab_reassign_inline_reclaim", "%llu", stats.slab_reassign_inline_reclaim);
1823 APPEND_STAT("slab_reassign_busy_items", "%llu", stats.slab_reassign_busy_items);
1824 APPEND_STAT("slab_reassign_busy_deletes", "%llu", stats.slab_reassign_busy_deletes);
1825 APPEND_STAT("slab_reassign_running", "%u", stats_state.slab_reassign_running);
1826 APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved);
1827 }
1828 if (settings.lru_crawler) {
1829 APPEND_STAT("lru_crawler_running", "%u", stats_state.lru_crawler_running);
1830 APPEND_STAT("lru_crawler_starts", "%u", stats.lru_crawler_starts);
1831 }
1832 if (settings.lru_maintainer_thread) {
1833 APPEND_STAT("lru_maintainer_juggles", "%llu", (unsigned long long)stats.lru_maintainer_juggles);
1834 }
1835 APPEND_STAT("malloc_fails", "%llu",
1836 (unsigned long long)stats.malloc_fails);
1837 APPEND_STAT("log_worker_dropped", "%llu", (unsigned long long)stats.log_worker_dropped);
1838 APPEND_STAT("log_worker_written", "%llu", (unsigned long long)stats.log_worker_written);
1839 APPEND_STAT("log_watcher_skipped", "%llu", (unsigned long long)stats.log_watcher_skipped);
1840 APPEND_STAT("log_watcher_sent", "%llu", (unsigned long long)stats.log_watcher_sent);
1841 APPEND_STAT("log_watchers", "%llu", (unsigned long long)stats_state.log_watchers);
1842 STATS_UNLOCK();
1843 #ifdef EXTSTORE
1844 storage_stats(add_stats, c);
1845 #endif
1846 #ifdef TLS
1847 if (settings.ssl_enabled) {
1848 if (settings.ssl_session_cache) {
1849 APPEND_STAT("ssl_new_sessions", "%llu", (unsigned long long)stats.ssl_new_sessions);
1850 }
1851 APPEND_STAT("ssl_handshake_errors", "%llu", (unsigned long long)stats.ssl_handshake_errors);
1852 APPEND_STAT("time_since_server_cert_refresh", "%u", now - settings.ssl_last_cert_refresh_time);
1853 }
1854 #endif
1855 APPEND_STAT("unexpected_napi_ids", "%llu", (unsigned long long)stats.unexpected_napi_ids);
1856 APPEND_STAT("round_robin_fallback", "%llu", (unsigned long long)stats.round_robin_fallback);
1857 }
1858
process_stat_settings(ADD_STAT add_stats,void * c)1859 void process_stat_settings(ADD_STAT add_stats, void *c) {
1860 assert(add_stats);
1861 APPEND_STAT("maxbytes", "%llu", (unsigned long long)settings.maxbytes);
1862 APPEND_STAT("maxconns", "%d", settings.maxconns);
1863 APPEND_STAT("tcpport", "%d", settings.port);
1864 APPEND_STAT("udpport", "%d", settings.udpport);
1865 APPEND_STAT("inter", "%s", settings.inter ? settings.inter : "NULL");
1866 APPEND_STAT("verbosity", "%d", settings.verbose);
1867 APPEND_STAT("oldest", "%lu", (unsigned long)settings.oldest_live);
1868 APPEND_STAT("evictions", "%s", settings.evict_to_free ? "on" : "off");
1869 APPEND_STAT("domain_socket", "%s",
1870 settings.socketpath ? settings.socketpath : "NULL");
1871 APPEND_STAT("umask", "%o", settings.access);
1872 APPEND_STAT("shutdown_command", "%s",
1873 settings.shutdown_command ? "yes" : "no");
1874 APPEND_STAT("growth_factor", "%.2f", settings.factor);
1875 APPEND_STAT("chunk_size", "%d", settings.chunk_size);
1876 APPEND_STAT("num_threads", "%d", settings.num_threads);
1877 APPEND_STAT("num_threads_per_udp", "%d", settings.num_threads_per_udp);
1878 APPEND_STAT("stat_key_prefix", "%c", settings.prefix_delimiter);
1879 APPEND_STAT("detail_enabled", "%s",
1880 settings.detail_enabled ? "yes" : "no");
1881 APPEND_STAT("reqs_per_event", "%d", settings.reqs_per_event);
1882 APPEND_STAT("cas_enabled", "%s", settings.use_cas ? "yes" : "no");
1883 APPEND_STAT("tcp_backlog", "%d", settings.backlog);
1884 APPEND_STAT("binding_protocol", "%s",
1885 prot_text(settings.binding_protocol));
1886 APPEND_STAT("auth_enabled_sasl", "%s", settings.sasl ? "yes" : "no");
1887 APPEND_STAT("auth_enabled_ascii", "%s", settings.auth_file ? settings.auth_file : "no");
1888 APPEND_STAT("item_size_max", "%d", settings.item_size_max);
1889 APPEND_STAT("maxconns_fast", "%s", settings.maxconns_fast ? "yes" : "no");
1890 APPEND_STAT("hashpower_init", "%d", settings.hashpower_init);
1891 APPEND_STAT("slab_reassign", "%s", settings.slab_reassign ? "yes" : "no");
1892 APPEND_STAT("slab_automove", "%d", settings.slab_automove);
1893 APPEND_STAT("slab_automove_ratio", "%.2f", settings.slab_automove_ratio);
1894 APPEND_STAT("slab_automove_window", "%u", settings.slab_automove_window);
1895 APPEND_STAT("slab_chunk_max", "%d", settings.slab_chunk_size_max);
1896 APPEND_STAT("lru_crawler", "%s", settings.lru_crawler ? "yes" : "no");
1897 APPEND_STAT("lru_crawler_sleep", "%d", settings.lru_crawler_sleep);
1898 APPEND_STAT("lru_crawler_tocrawl", "%lu", (unsigned long)settings.lru_crawler_tocrawl);
1899 APPEND_STAT("tail_repair_time", "%d", settings.tail_repair_time);
1900 APPEND_STAT("flush_enabled", "%s", settings.flush_enabled ? "yes" : "no");
1901 APPEND_STAT("dump_enabled", "%s", settings.dump_enabled ? "yes" : "no");
1902 APPEND_STAT("hash_algorithm", "%s", settings.hash_algorithm);
1903 APPEND_STAT("lru_maintainer_thread", "%s", settings.lru_maintainer_thread ? "yes" : "no");
1904 APPEND_STAT("lru_segmented", "%s", settings.lru_segmented ? "yes" : "no");
1905 APPEND_STAT("hot_lru_pct", "%d", settings.hot_lru_pct);
1906 APPEND_STAT("warm_lru_pct", "%d", settings.warm_lru_pct);
1907 APPEND_STAT("hot_max_factor", "%.2f", settings.hot_max_factor);
1908 APPEND_STAT("warm_max_factor", "%.2f", settings.warm_max_factor);
1909 APPEND_STAT("temp_lru", "%s", settings.temp_lru ? "yes" : "no");
1910 APPEND_STAT("temporary_ttl", "%u", settings.temporary_ttl);
1911 APPEND_STAT("idle_timeout", "%d", settings.idle_timeout);
1912 APPEND_STAT("watcher_logbuf_size", "%u", settings.logger_watcher_buf_size);
1913 APPEND_STAT("worker_logbuf_size", "%u", settings.logger_buf_size);
1914 APPEND_STAT("read_buf_mem_limit", "%u", settings.read_buf_mem_limit);
1915 APPEND_STAT("track_sizes", "%s", item_stats_sizes_status() ? "yes" : "no");
1916 APPEND_STAT("inline_ascii_response", "%s", "no"); // setting is dead, cannot be yes.
1917 #ifdef HAVE_DROP_PRIVILEGES
1918 APPEND_STAT("drop_privileges", "%s", settings.drop_privileges ? "yes" : "no");
1919 #endif
1920 #ifdef EXTSTORE
1921 APPEND_STAT("ext_item_size", "%u", settings.ext_item_size);
1922 APPEND_STAT("ext_item_age", "%u", settings.ext_item_age);
1923 APPEND_STAT("ext_low_ttl", "%u", settings.ext_low_ttl);
1924 APPEND_STAT("ext_recache_rate", "%u", settings.ext_recache_rate);
1925 APPEND_STAT("ext_wbuf_size", "%u", settings.ext_wbuf_size);
1926 APPEND_STAT("ext_compact_under", "%u", settings.ext_compact_under);
1927 APPEND_STAT("ext_drop_under", "%u", settings.ext_drop_under);
1928 APPEND_STAT("ext_max_frag", "%.2f", settings.ext_max_frag);
1929 APPEND_STAT("slab_automove_freeratio", "%.3f", settings.slab_automove_freeratio);
1930 APPEND_STAT("ext_drop_unread", "%s", settings.ext_drop_unread ? "yes" : "no");
1931 #endif
1932 #ifdef TLS
1933 APPEND_STAT("ssl_enabled", "%s", settings.ssl_enabled ? "yes" : "no");
1934 APPEND_STAT("ssl_chain_cert", "%s", settings.ssl_chain_cert);
1935 APPEND_STAT("ssl_key", "%s", settings.ssl_key);
1936 APPEND_STAT("ssl_verify_mode", "%d", settings.ssl_verify_mode);
1937 APPEND_STAT("ssl_keyformat", "%d", settings.ssl_keyformat);
1938 APPEND_STAT("ssl_ciphers", "%s", settings.ssl_ciphers ? settings.ssl_ciphers : "NULL");
1939 APPEND_STAT("ssl_ca_cert", "%s", settings.ssl_ca_cert ? settings.ssl_ca_cert : "NULL");
1940 APPEND_STAT("ssl_wbuf_size", "%u", settings.ssl_wbuf_size);
1941 APPEND_STAT("ssl_session_cache", "%s", settings.ssl_session_cache ? "yes" : "no");
1942 APPEND_STAT("ssl_min_version", "%s", ssl_proto_text(settings.ssl_min_version));
1943 #endif
1944 APPEND_STAT("num_napi_ids", "%s", settings.num_napi_ids);
1945 APPEND_STAT("memory_file", "%s", settings.memory_file);
1946 }
1947
nz_strcmp(int nzlength,const char * nz,const char * z)1948 static int nz_strcmp(int nzlength, const char *nz, const char *z) {
1949 int zlength=strlen(z);
1950 return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1;
1951 }
1952
get_stats(const char * stat_type,int nkey,ADD_STAT add_stats,void * c)1953 bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
1954 bool ret = true;
1955
1956 if (add_stats != NULL) {
1957 if (!stat_type) {
1958 /* prepare general statistics for the engine */
1959 STATS_LOCK();
1960 APPEND_STAT("bytes", "%llu", (unsigned long long)stats_state.curr_bytes);
1961 APPEND_STAT("curr_items", "%llu", (unsigned long long)stats_state.curr_items);
1962 APPEND_STAT("total_items", "%llu", (unsigned long long)stats.total_items);
1963 STATS_UNLOCK();
1964 APPEND_STAT("slab_global_page_pool", "%u", global_page_pool_size(NULL));
1965 item_stats_totals(add_stats, c);
1966 } else if (nz_strcmp(nkey, stat_type, "items") == 0) {
1967 item_stats(add_stats, c);
1968 } else if (nz_strcmp(nkey, stat_type, "slabs") == 0) {
1969 slabs_stats(add_stats, c);
1970 } else if (nz_strcmp(nkey, stat_type, "sizes") == 0) {
1971 item_stats_sizes(add_stats, c);
1972 } else if (nz_strcmp(nkey, stat_type, "sizes_enable") == 0) {
1973 item_stats_sizes_enable(add_stats, c);
1974 } else if (nz_strcmp(nkey, stat_type, "sizes_disable") == 0) {
1975 item_stats_sizes_disable(add_stats, c);
1976 } else {
1977 ret = false;
1978 }
1979 } else {
1980 ret = false;
1981 }
1982
1983 return ret;
1984 }
1985
get_conn_text(const conn * c,const int af,char * addr,struct sockaddr * sock_addr)1986 static inline void get_conn_text(const conn *c, const int af,
1987 char* addr, struct sockaddr *sock_addr) {
1988 char addr_text[MAXPATHLEN];
1989 addr_text[0] = '\0';
1990 const char *protoname = "?";
1991 unsigned short port = 0;
1992
1993 switch (af) {
1994 case AF_INET:
1995 (void) inet_ntop(af,
1996 &((struct sockaddr_in *)sock_addr)->sin_addr,
1997 addr_text,
1998 sizeof(addr_text) - 1);
1999 port = ntohs(((struct sockaddr_in *)sock_addr)->sin_port);
2000 protoname = IS_UDP(c->transport) ? "udp" : "tcp";
2001 break;
2002
2003 case AF_INET6:
2004 addr_text[0] = '[';
2005 addr_text[1] = '\0';
2006 if (inet_ntop(af,
2007 &((struct sockaddr_in6 *)sock_addr)->sin6_addr,
2008 addr_text + 1,
2009 sizeof(addr_text) - 2)) {
2010 strcat(addr_text, "]");
2011 }
2012 port = ntohs(((struct sockaddr_in6 *)sock_addr)->sin6_port);
2013 protoname = IS_UDP(c->transport) ? "udp6" : "tcp6";
2014 break;
2015
2016 #ifndef DISABLE_UNIX_SOCKET
2017 case AF_UNIX:
2018 {
2019 size_t pathlen = 0;
2020 // this strncpy call originally could piss off an address
2021 // sanitizer; we supplied the size of the dest buf as a limiter,
2022 // but optimized versions of strncpy could read past the end of
2023 // *src while looking for a null terminator. Since buf and
2024 // sun_path here are both on the stack they could even overlap,
2025 // which is "undefined". In all OSS versions of strncpy I could
2026 // find this has no effect; it'll still only copy until the first null
2027 // terminator is found. Thus it's possible to get the OS to
2028 // examine past the end of sun_path but it's unclear to me if this
2029 // can cause any actual problem.
2030 //
2031 // We need a safe_strncpy util function but I'll punt on figuring
2032 // that out for now.
2033 pathlen = sizeof(((struct sockaddr_un *)sock_addr)->sun_path);
2034 if (MAXPATHLEN <= pathlen) {
2035 pathlen = MAXPATHLEN - 1;
2036 }
2037 strncpy(addr_text,
2038 ((struct sockaddr_un *)sock_addr)->sun_path,
2039 pathlen);
2040 addr_text[pathlen] = '\0';
2041 protoname = "unix";
2042 }
2043 break;
2044 #endif /* #ifndef DISABLE_UNIX_SOCKET */
2045 }
2046
2047 if (strlen(addr_text) < 2) {
2048 /* Most likely this is a connected UNIX-domain client which
2049 * has no peer socket address, but there's no portable way
2050 * to tell for sure.
2051 */
2052 sprintf(addr_text, "<AF %d>", af);
2053 }
2054
2055 if (port) {
2056 sprintf(addr, "%s:%s:%u", protoname, addr_text, port);
2057 } else {
2058 sprintf(addr, "%s:%s", protoname, addr_text);
2059 }
2060 }
2061
conn_to_str(const conn * c,char * addr,char * svr_addr)2062 static void conn_to_str(const conn *c, char *addr, char *svr_addr) {
2063 if (!c) {
2064 strcpy(addr, "<null>");
2065 } else if (c->state == conn_closed) {
2066 strcpy(addr, "<closed>");
2067 } else {
2068 struct sockaddr_in6 local_addr;
2069 struct sockaddr *sock_addr = (void *)&c->request_addr;
2070
2071 /* For listen ports and idle UDP ports, show listen address */
2072 if (c->state == conn_listening ||
2073 (IS_UDP(c->transport) &&
2074 c->state == conn_read)) {
2075 socklen_t local_addr_len = sizeof(local_addr);
2076
2077 if (getsockname(c->sfd,
2078 (struct sockaddr *)&local_addr,
2079 &local_addr_len) == 0) {
2080 sock_addr = (struct sockaddr *)&local_addr;
2081 }
2082 }
2083 get_conn_text(c, sock_addr->sa_family, addr, sock_addr);
2084
2085 if (c->state != conn_listening && !(IS_UDP(c->transport) &&
2086 c->state == conn_read)) {
2087 struct sockaddr_storage svr_sock_addr;
2088 socklen_t svr_addr_len = sizeof(svr_sock_addr);
2089 getsockname(c->sfd, (struct sockaddr *)&svr_sock_addr, &svr_addr_len);
2090 get_conn_text(c, svr_sock_addr.ss_family, svr_addr, (struct sockaddr *)&svr_sock_addr);
2091 }
2092 }
2093 }
2094
process_stats_conns(ADD_STAT add_stats,void * c)2095 void process_stats_conns(ADD_STAT add_stats, void *c) {
2096 int i;
2097 char key_str[STAT_KEY_LEN];
2098 char val_str[STAT_VAL_LEN];
2099 size_t extras_len = sizeof("unix:") + sizeof("65535");
2100 char addr[MAXPATHLEN + extras_len];
2101 char svr_addr[MAXPATHLEN + extras_len];
2102 int klen = 0, vlen = 0;
2103
2104 assert(add_stats);
2105
2106 for (i = 0; i < max_fds; i++) {
2107 if (conns[i]) {
2108 /* This is safe to do unlocked because conns are never freed; the
2109 * worst that'll happen will be a minor inconsistency in the
2110 * output -- not worth the complexity of the locking that'd be
2111 * required to prevent it.
2112 */
2113 if IS_UDP(conns[i]->transport) {
2114 APPEND_NUM_STAT(i, "UDP", "%s", "UDP");
2115 }
2116 if (conns[i]->state != conn_closed) {
2117 conn_to_str(conns[i], addr, svr_addr);
2118
2119 APPEND_NUM_STAT(i, "addr", "%s", addr);
2120 if (conns[i]->state != conn_listening &&
2121 !(IS_UDP(conns[i]->transport) && conns[i]->state == conn_read)) {
2122 APPEND_NUM_STAT(i, "listen_addr", "%s", svr_addr);
2123 }
2124 APPEND_NUM_STAT(i, "state", "%s",
2125 state_text(conns[i]->state));
2126 APPEND_NUM_STAT(i, "secs_since_last_cmd", "%d",
2127 current_time - conns[i]->last_cmd_time);
2128 }
2129 }
2130 }
2131 }
2132
2133 #define IT_REFCOUNT_LIMIT 60000
limited_get(char * key,size_t nkey,conn * c,uint32_t exptime,bool should_touch,bool do_update,bool * overflow)2134 item* limited_get(char *key, size_t nkey, conn *c, uint32_t exptime, bool should_touch, bool do_update, bool *overflow) {
2135 item *it;
2136 if (should_touch) {
2137 it = item_touch(key, nkey, exptime, c);
2138 } else {
2139 it = item_get(key, nkey, c, do_update);
2140 }
2141 if (it && it->refcount > IT_REFCOUNT_LIMIT) {
2142 item_remove(it);
2143 it = NULL;
2144 *overflow = true;
2145 } else {
2146 *overflow = false;
2147 }
2148 return it;
2149 }
2150
2151 // Semantics are different than limited_get; since the item is returned
2152 // locked, caller can directly change what it needs.
2153 // though it might eventually be a better interface to sink it all into
2154 // items.c.
limited_get_locked(char * key,size_t nkey,conn * c,bool do_update,uint32_t * hv,bool * overflow)2155 item* limited_get_locked(char *key, size_t nkey, conn *c, bool do_update, uint32_t *hv, bool *overflow) {
2156 item *it;
2157 it = item_get_locked(key, nkey, c, do_update, hv);
2158 if (it && it->refcount > IT_REFCOUNT_LIMIT) {
2159 do_item_remove(it);
2160 it = NULL;
2161 item_unlock(*hv);
2162 *overflow = true;
2163 } else {
2164 *overflow = false;
2165 }
2166 return it;
2167 }
2168
2169 /*
2170 * adds a delta value to a numeric item.
2171 *
2172 * c connection requesting the operation
2173 * it item to adjust
2174 * incr true to increment value, false to decrement
2175 * delta amount to adjust value by
2176 * buf buffer for response string
2177 *
2178 * returns a response string to send back to the client.
2179 */
do_add_delta(conn * c,const char * key,const size_t nkey,const bool incr,const int64_t delta,char * buf,uint64_t * cas,const uint32_t hv,item ** it_ret)2180 enum delta_result_type do_add_delta(conn *c, const char *key, const size_t nkey,
2181 const bool incr, const int64_t delta,
2182 char *buf, uint64_t *cas,
2183 const uint32_t hv,
2184 item **it_ret) {
2185 char *ptr;
2186 uint64_t value;
2187 int res;
2188 item *it;
2189
2190 it = do_item_get(key, nkey, hv, c, DONT_UPDATE);
2191 if (!it) {
2192 return DELTA_ITEM_NOT_FOUND;
2193 }
2194
2195 /* Can't delta zero byte values. 2-byte are the "\r\n" */
2196 /* Also can't delta for chunked items. Too large to be a number */
2197 #ifdef EXTSTORE
2198 if (it->nbytes <= 2 || (it->it_flags & (ITEM_CHUNKED|ITEM_HDR)) != 0) {
2199 #else
2200 if (it->nbytes <= 2 || (it->it_flags & (ITEM_CHUNKED)) != 0) {
2201 #endif
2202 do_item_remove(it);
2203 return NON_NUMERIC;
2204 }
2205
2206 if (cas != NULL && *cas != 0 && ITEM_get_cas(it) != *cas) {
2207 do_item_remove(it);
2208 return DELTA_ITEM_CAS_MISMATCH;
2209 }
2210
2211 ptr = ITEM_data(it);
2212
2213 if (!safe_strtoull(ptr, &value)) {
2214 do_item_remove(it);
2215 return NON_NUMERIC;
2216 }
2217
2218 if (incr) {
2219 value += delta;
2220 MEMCACHED_COMMAND_INCR(c->sfd, ITEM_key(it), it->nkey, value);
2221 } else {
2222 if(delta > value) {
2223 value = 0;
2224 } else {
2225 value -= delta;
2226 }
2227 MEMCACHED_COMMAND_DECR(c->sfd, ITEM_key(it), it->nkey, value);
2228 }
2229
2230 pthread_mutex_lock(&c->thread->stats.mutex);
2231 if (incr) {
2232 c->thread->stats.slab_stats[ITEM_clsid(it)].incr_hits++;
2233 } else {
2234 c->thread->stats.slab_stats[ITEM_clsid(it)].decr_hits++;
2235 }
2236 pthread_mutex_unlock(&c->thread->stats.mutex);
2237
2238 itoa_u64(value, buf);
2239 res = strlen(buf);
2240 /* refcount == 2 means we are the only ones holding the item, and it is
2241 * linked. We hold the item's lock in this function, so refcount cannot
2242 * increase. */
2243 if (res + 2 <= it->nbytes && it->refcount == 2) { /* replace in-place */
2244 /* When changing the value without replacing the item, we
2245 need to update the CAS on the existing item. */
2246 /* We also need to fiddle it in the sizes tracker in case the tracking
2247 * was enabled at runtime, since it relies on the CAS value to know
2248 * whether to remove an item or not. */
2249 item_stats_sizes_remove(it);
2250 ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
2251 item_stats_sizes_add(it);
2252 memcpy(ITEM_data(it), buf, res);
2253 memset(ITEM_data(it) + res, ' ', it->nbytes - res - 2);
2254 do_item_update(it);
2255 } else if (it->refcount > 1) {
2256 item *new_it;
2257 uint32_t flags;
2258 FLAGS_CONV(it, flags);
2259 new_it = do_item_alloc(ITEM_key(it), it->nkey, flags, it->exptime, res + 2);
2260 if (new_it == 0) {
2261 do_item_remove(it);
2262 return EOM;
2263 }
2264 memcpy(ITEM_data(new_it), buf, res);
2265 memcpy(ITEM_data(new_it) + res, "\r\n", 2);
2266 item_replace(it, new_it, hv);
2267 // Overwrite the older item's CAS with our new CAS since we're
2268 // returning the CAS of the old item below.
2269 ITEM_set_cas(it, (settings.use_cas) ? ITEM_get_cas(new_it) : 0);
2270 do_item_remove(new_it); /* release our reference */
2271 } else {
2272 /* Should never get here. This means we somehow fetched an unlinked
2273 * item. TODO: Add a counter? */
2274 if (settings.verbose) {
2275 fprintf(stderr, "Tried to do incr/decr on invalid item\n");
2276 }
2277 if (it->refcount == 1)
2278 do_item_remove(it);
2279 return DELTA_ITEM_NOT_FOUND;
2280 }
2281
2282 if (cas) {
2283 *cas = ITEM_get_cas(it); /* swap the incoming CAS value */
2284 }
2285 if (it_ret != NULL) {
2286 *it_ret = it;
2287 } else {
2288 do_item_remove(it); /* release our reference */
2289 }
2290 return OK;
2291 }
2292
2293 static int try_read_command_negotiate(conn *c) {
2294 assert(c->protocol == negotiating_prot);
2295 assert(c != NULL);
2296 assert(c->rcurr <= (c->rbuf + c->rsize));
2297 assert(c->rbytes > 0);
2298
2299 if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) {
2300 c->protocol = binary_prot;
2301 c->try_read_command = try_read_command_binary;
2302 } else {
2303 // authentication doesn't work with negotiated protocol.
2304 c->protocol = ascii_prot;
2305 c->try_read_command = try_read_command_ascii;
2306 }
2307
2308 if (settings.verbose > 1) {
2309 fprintf(stderr, "%d: Client using the %s protocol\n", c->sfd,
2310 prot_text(c->protocol));
2311 }
2312
2313 return c->try_read_command(c);
2314 }
2315
2316 static int try_read_command_udp(conn *c) {
2317 assert(c != NULL);
2318 assert(c->rcurr <= (c->rbuf + c->rsize));
2319 assert(c->rbytes > 0);
2320
2321 if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) {
2322 c->protocol = binary_prot;
2323 return try_read_command_binary(c);
2324 } else {
2325 c->protocol = ascii_prot;
2326 return try_read_command_ascii(c);
2327 }
2328 }
2329
2330 /*
2331 * read a UDP request.
2332 */
2333 static enum try_read_result try_read_udp(conn *c) {
2334 int res;
2335
2336 assert(c != NULL);
2337
2338 c->request_addr_size = sizeof(c->request_addr);
2339 res = recvfrom(c->sfd, c->rbuf, c->rsize,
2340 0, (struct sockaddr *)&c->request_addr,
2341 &c->request_addr_size);
2342 if (res > 8) {
2343 unsigned char *buf = (unsigned char *)c->rbuf;
2344 pthread_mutex_lock(&c->thread->stats.mutex);
2345 c->thread->stats.bytes_read += res;
2346 pthread_mutex_unlock(&c->thread->stats.mutex);
2347
2348 /* Beginning of UDP packet is the request ID; save it. */
2349 c->request_id = buf[0] * 256 + buf[1];
2350
2351 /* If this is a multi-packet request, drop it. */
2352 if (buf[4] != 0 || buf[5] != 1) {
2353 return READ_NO_DATA_RECEIVED;
2354 }
2355
2356 /* Don't care about any of the rest of the header. */
2357 res -= 8;
2358 memmove(c->rbuf, c->rbuf + 8, res);
2359
2360 c->rbytes = res;
2361 c->rcurr = c->rbuf;
2362 return READ_DATA_RECEIVED;
2363 }
2364 return READ_NO_DATA_RECEIVED;
2365 }
2366
2367 /*
2368 * read from network as much as we can, handle buffer overflow and connection
2369 * close.
2370 * before reading, move the remaining incomplete fragment of a command
2371 * (if any) to the beginning of the buffer.
2372 *
2373 * To protect us from someone flooding a connection with bogus data causing
2374 * the connection to eat up all available memory, break out and start looking
2375 * at the data I've got after a number of reallocs...
2376 *
2377 * @return enum try_read_result
2378 */
2379 static enum try_read_result try_read_network(conn *c) {
2380 enum try_read_result gotdata = READ_NO_DATA_RECEIVED;
2381 int res;
2382 int num_allocs = 0;
2383 assert(c != NULL);
2384
2385 if (c->rcurr != c->rbuf) {
2386 if (c->rbytes != 0) /* otherwise there's nothing to copy */
2387 memmove(c->rbuf, c->rcurr, c->rbytes);
2388 c->rcurr = c->rbuf;
2389 }
2390
2391 while (1) {
2392 // TODO: move to rbuf_* func?
2393 if (c->rbytes >= c->rsize && c->rbuf_malloced) {
2394 if (num_allocs == 4) {
2395 return gotdata;
2396 }
2397 ++num_allocs;
2398 char *new_rbuf = realloc(c->rbuf, c->rsize * 2);
2399 if (!new_rbuf) {
2400 STATS_LOCK();
2401 stats.malloc_fails++;
2402 STATS_UNLOCK();
2403 if (settings.verbose > 0) {
2404 fprintf(stderr, "Couldn't realloc input buffer\n");
2405 }
2406 c->rbytes = 0; /* ignore what we read */
2407 out_of_memory(c, "SERVER_ERROR out of memory reading request");
2408 c->close_after_write = true;
2409 return READ_MEMORY_ERROR;
2410 }
2411 c->rcurr = c->rbuf = new_rbuf;
2412 c->rsize *= 2;
2413 }
2414
2415 int avail = c->rsize - c->rbytes;
2416 res = c->read(c, c->rbuf + c->rbytes, avail);
2417 if (res > 0) {
2418 pthread_mutex_lock(&c->thread->stats.mutex);
2419 c->thread->stats.bytes_read += res;
2420 pthread_mutex_unlock(&c->thread->stats.mutex);
2421 gotdata = READ_DATA_RECEIVED;
2422 c->rbytes += res;
2423 if (res == avail && c->rbuf_malloced) {
2424 // Resize rbuf and try a few times if huge ascii multiget.
2425 continue;
2426 } else {
2427 break;
2428 }
2429 }
2430 if (res == 0) {
2431 c->close_reason = NORMAL_CLOSE;
2432 return READ_ERROR;
2433 }
2434 if (res == -1) {
2435 if (errno == EAGAIN || errno == EWOULDBLOCK) {
2436 break;
2437 }
2438 return READ_ERROR;
2439 }
2440 }
2441 return gotdata;
2442 }
2443
2444 static bool update_event(conn *c, const int new_flags) {
2445 assert(c != NULL);
2446
2447 struct event_base *base = c->event.ev_base;
2448 if (c->ev_flags == new_flags)
2449 return true;
2450 if (event_del(&c->event) == -1) return false;
2451 event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c);
2452 event_base_set(base, &c->event);
2453 c->ev_flags = new_flags;
2454 if (event_add(&c->event, 0) == -1) return false;
2455 return true;
2456 }
2457
2458 /*
2459 * Sets whether we are listening for new connections or not.
2460 */
2461 void do_accept_new_conns(const bool do_accept) {
2462 conn *next;
2463
2464 for (next = listen_conn; next; next = next->next) {
2465 if (do_accept) {
2466 update_event(next, EV_READ | EV_PERSIST);
2467 if (listen(next->sfd, settings.backlog) != 0) {
2468 perror("listen");
2469 }
2470 }
2471 else {
2472 update_event(next, 0);
2473 if (listen(next->sfd, 0) != 0) {
2474 perror("listen");
2475 }
2476 }
2477 }
2478
2479 if (do_accept) {
2480 struct timeval maxconns_exited;
2481 uint64_t elapsed_us;
2482 gettimeofday(&maxconns_exited,NULL);
2483 STATS_LOCK();
2484 elapsed_us =
2485 (maxconns_exited.tv_sec - stats.maxconns_entered.tv_sec) * 1000000
2486 + (maxconns_exited.tv_usec - stats.maxconns_entered.tv_usec);
2487 stats.time_in_listen_disabled_us += elapsed_us;
2488 stats_state.accepting_conns = true;
2489 STATS_UNLOCK();
2490 } else {
2491 STATS_LOCK();
2492 stats_state.accepting_conns = false;
2493 gettimeofday(&stats.maxconns_entered,NULL);
2494 stats.listen_disabled_num++;
2495 STATS_UNLOCK();
2496 allow_new_conns = false;
2497 maxconns_handler(-42, 0, 0);
2498 }
2499 }
2500
2501 #define TRANSMIT_ONE_RESP true
2502 #define TRANSMIT_ALL_RESP false
2503 static int _transmit_pre(conn *c, struct iovec *iovs, int iovused, bool one_resp) {
2504 mc_resp *resp = c->resp_head;
2505 while (resp && iovused + resp->iovcnt < IOV_MAX-1) {
2506 if (resp->skip) {
2507 // Don't actually unchain the resp obj here since it's singly-linked.
2508 // Just let the post function handle it linearly.
2509 resp = resp->next;
2510 continue;
2511 }
2512 if (resp->chunked_data_iov) {
2513 // Handle chunked items specially.
2514 // They spend much more time in send so we can be a bit wasteful
2515 // in rebuilding iovecs for them.
2516 item_chunk *ch = (item_chunk *)ITEM_schunk((item *)resp->iov[resp->chunked_data_iov].iov_base);
2517 int x;
2518 for (x = 0; x < resp->iovcnt; x++) {
2519 // This iov is tracking how far we've copied so far.
2520 if (x == resp->chunked_data_iov) {
2521 int done = resp->chunked_total - resp->iov[x].iov_len;
2522 // Start from the len to allow binprot to cut the \r\n
2523 int todo = resp->iov[x].iov_len;
2524 while (ch && todo > 0 && iovused < IOV_MAX-1) {
2525 int skip = 0;
2526 if (!ch->used) {
2527 ch = ch->next;
2528 continue;
2529 }
2530 // Skip parts we've already sent.
2531 if (done >= ch->used) {
2532 done -= ch->used;
2533 ch = ch->next;
2534 continue;
2535 } else if (done) {
2536 skip = done;
2537 done = 0;
2538 }
2539 iovs[iovused].iov_base = ch->data + skip;
2540 // Stupid binary protocol makes this go negative.
2541 iovs[iovused].iov_len = ch->used - skip > todo ? todo : ch->used - skip;
2542 iovused++;
2543 todo -= ch->used - skip;
2544 ch = ch->next;
2545 }
2546 } else {
2547 iovs[iovused].iov_base = resp->iov[x].iov_base;
2548 iovs[iovused].iov_len = resp->iov[x].iov_len;
2549 iovused++;
2550 }
2551 if (iovused >= IOV_MAX-1)
2552 break;
2553 }
2554 } else {
2555 memcpy(&iovs[iovused], resp->iov, sizeof(struct iovec)*resp->iovcnt);
2556 iovused += resp->iovcnt;
2557 }
2558
2559 // done looking at first response, walk down the chain.
2560 resp = resp->next;
2561 // used for UDP mode: UDP cannot send multiple responses per packet.
2562 if (one_resp)
2563 break;
2564 }
2565 return iovused;
2566 }
2567
2568 /*
2569 * Decrements and completes responses based on how much data was transmitted.
2570 * Takes the connection and current result bytes.
2571 */
2572 static void _transmit_post(conn *c, ssize_t res) {
2573 // We've written some of the data. Remove the completed
2574 // responses from the list of pending writes.
2575 mc_resp *resp = c->resp_head;
2576 while (resp) {
2577 int x;
2578 if (resp->skip) {
2579 resp = resp_finish(c, resp);
2580 continue;
2581 }
2582
2583 // fastpath check. all small responses should cut here.
2584 if (res >= resp->tosend) {
2585 res -= resp->tosend;
2586 resp = resp_finish(c, resp);
2587 continue;
2588 }
2589
2590 // it's fine to re-check iov's that were zeroed out before.
2591 for (x = 0; x < resp->iovcnt; x++) {
2592 struct iovec *iov = &resp->iov[x];
2593 if (res >= iov->iov_len) {
2594 resp->tosend -= iov->iov_len;
2595 res -= iov->iov_len;
2596 iov->iov_len = 0;
2597 } else {
2598 // Dumb special case for chunked items. Currently tracking
2599 // where to inject the chunked item via iov_base.
2600 // Extra not-great since chunked items can't be the first
2601 // index, so we have to check for non-zero c_d_iov first.
2602 if (!resp->chunked_data_iov || x != resp->chunked_data_iov) {
2603 iov->iov_base = (char *)iov->iov_base + res;
2604 }
2605 iov->iov_len -= res;
2606 resp->tosend -= res;
2607 res = 0;
2608 break;
2609 }
2610 }
2611
2612 // are we done with this response object?
2613 if (resp->tosend == 0) {
2614 resp = resp_finish(c, resp);
2615 } else {
2616 // Jammed up here. This is the new head.
2617 break;
2618 }
2619 }
2620 }
2621
2622 /*
2623 * Transmit the next chunk of data from our list of msgbuf structures.
2624 *
2625 * Returns:
2626 * TRANSMIT_COMPLETE All done writing.
2627 * TRANSMIT_INCOMPLETE More data remaining to write.
2628 * TRANSMIT_SOFT_ERROR Can't write any more right now.
2629 * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
2630 */
2631 static enum transmit_result transmit(conn *c) {
2632 assert(c != NULL);
2633 struct iovec iovs[IOV_MAX];
2634 struct msghdr msg;
2635 int iovused = 0;
2636
2637 // init the msg.
2638 memset(&msg, 0, sizeof(struct msghdr));
2639 msg.msg_iov = iovs;
2640
2641 iovused = _transmit_pre(c, iovs, iovused, TRANSMIT_ALL_RESP);
2642 if (iovused == 0) {
2643 // Avoid the syscall if we're only handling a noreply.
2644 // Return the response object.
2645 _transmit_post(c, 0);
2646 return TRANSMIT_COMPLETE;
2647 }
2648
2649 // Alright, send.
2650 ssize_t res;
2651 msg.msg_iovlen = iovused;
2652 res = c->sendmsg(c, &msg, 0);
2653 if (res >= 0) {
2654 pthread_mutex_lock(&c->thread->stats.mutex);
2655 c->thread->stats.bytes_written += res;
2656 pthread_mutex_unlock(&c->thread->stats.mutex);
2657
2658 // Decrement any partial IOV's and complete any finished resp's.
2659 _transmit_post(c, res);
2660
2661 if (c->resp_head) {
2662 return TRANSMIT_INCOMPLETE;
2663 } else {
2664 return TRANSMIT_COMPLETE;
2665 }
2666 }
2667
2668 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
2669 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
2670 if (settings.verbose > 0)
2671 fprintf(stderr, "Couldn't update event\n");
2672 conn_set_state(c, conn_closing);
2673 return TRANSMIT_HARD_ERROR;
2674 }
2675 return TRANSMIT_SOFT_ERROR;
2676 }
2677 /* if res == -1 and error is not EAGAIN or EWOULDBLOCK,
2678 we have a real error, on which we close the connection */
2679 if (settings.verbose > 0)
2680 perror("Failed to write, and not due to blocking");
2681
2682 conn_set_state(c, conn_closing);
2683 return TRANSMIT_HARD_ERROR;
2684 }
2685
2686 static void build_udp_header(unsigned char *hdr, mc_resp *resp) {
2687 // We need to communicate the total number of packets
2688 // If this isn't set, it's the first time this response is building a udp
2689 // header, so "tosend" must be static.
2690 if (!resp->udp_total) {
2691 uint32_t total;
2692 total = resp->tosend / UDP_DATA_SIZE;
2693 if (resp->tosend % UDP_DATA_SIZE)
2694 total++;
2695 // The spec doesn't really say what we should do here. It's _probably_
2696 // better to bail out?
2697 if (total > USHRT_MAX) {
2698 total = USHRT_MAX;
2699 }
2700 resp->udp_total = total;
2701 }
2702
2703 // TODO: why wasn't this hto*'s and casts?
2704 // this ends up sending UDP hdr data specifically in host byte order.
2705 *hdr++ = resp->request_id / 256;
2706 *hdr++ = resp->request_id % 256;
2707 *hdr++ = resp->udp_sequence / 256;
2708 *hdr++ = resp->udp_sequence % 256;
2709 *hdr++ = resp->udp_total / 256;
2710 *hdr++ = resp->udp_total % 256;
2711 *hdr++ = 0;
2712 *hdr++ = 0;
2713 resp->udp_sequence++;
2714 }
2715
2716 /*
2717 * UDP specific transmit function. Uses its own function rather than check
2718 * IS_UDP() five times. If we ever implement sendmmsg or similar support they
2719 * will diverge even more.
2720 * Does not use TLS.
2721 *
2722 * Returns:
2723 * TRANSMIT_COMPLETE All done writing.
2724 * TRANSMIT_INCOMPLETE More data remaining to write.
2725 * TRANSMIT_SOFT_ERROR Can't write any more right now.
2726 * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
2727 */
2728 static enum transmit_result transmit_udp(conn *c) {
2729 assert(c != NULL);
2730 struct iovec iovs[IOV_MAX];
2731 struct msghdr msg;
2732 mc_resp *resp;
2733 int iovused = 0;
2734 unsigned char udp_hdr[UDP_HEADER_SIZE];
2735
2736 // We only send one UDP packet per call (ugh), so we can only operate on a
2737 // single response at a time.
2738 resp = c->resp_head;
2739
2740 if (!resp) {
2741 return TRANSMIT_COMPLETE;
2742 }
2743
2744 if (resp->skip) {
2745 resp = resp_finish(c, resp);
2746 return TRANSMIT_INCOMPLETE;
2747 }
2748
2749 // clear the message and initialize it.
2750 memset(&msg, 0, sizeof(struct msghdr));
2751 msg.msg_iov = iovs;
2752
2753 // the UDP source to return to.
2754 msg.msg_name = &resp->request_addr;
2755 msg.msg_namelen = resp->request_addr_size;
2756
2757 // First IOV is the custom UDP header.
2758 iovs[0].iov_base = (void *)udp_hdr;
2759 iovs[0].iov_len = UDP_HEADER_SIZE;
2760 build_udp_header(udp_hdr, resp);
2761 iovused++;
2762
2763 // Fill the IOV's the standard way.
2764 // TODO: might get a small speedup if we let it break early with a length
2765 // limit.
2766 iovused = _transmit_pre(c, iovs, iovused, TRANSMIT_ONE_RESP);
2767
2768 // Clip the IOV's to the max UDP packet size.
2769 // If we add support for send_mmsg, this can be where we split msg's.
2770 {
2771 int x = 0;
2772 int len = 0;
2773 for (x = 0; x < iovused; x++) {
2774 if (len + iovs[x].iov_len >= UDP_MAX_PAYLOAD_SIZE) {
2775 iovs[x].iov_len = UDP_MAX_PAYLOAD_SIZE - len;
2776 x++;
2777 break;
2778 } else {
2779 len += iovs[x].iov_len;
2780 }
2781 }
2782 iovused = x;
2783 }
2784
2785 ssize_t res;
2786 msg.msg_iovlen = iovused;
2787 // NOTE: uses system sendmsg since we have no support for indirect UDP.
2788 res = sendmsg(c->sfd, &msg, 0);
2789 if (res >= 0) {
2790 pthread_mutex_lock(&c->thread->stats.mutex);
2791 c->thread->stats.bytes_written += res;
2792 pthread_mutex_unlock(&c->thread->stats.mutex);
2793
2794 // Ignore the header size from forwarding the IOV's
2795 res -= UDP_HEADER_SIZE;
2796
2797 // Decrement any partial IOV's and complete any finished resp's.
2798 _transmit_post(c, res);
2799
2800 if (c->resp_head) {
2801 return TRANSMIT_INCOMPLETE;
2802 } else {
2803 return TRANSMIT_COMPLETE;
2804 }
2805 }
2806
2807 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
2808 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
2809 if (settings.verbose > 0)
2810 fprintf(stderr, "Couldn't update event\n");
2811 conn_set_state(c, conn_closing);
2812 return TRANSMIT_HARD_ERROR;
2813 }
2814 return TRANSMIT_SOFT_ERROR;
2815 }
2816 /* if res == -1 and error is not EAGAIN or EWOULDBLOCK,
2817 we have a real error, on which we close the connection */
2818 if (settings.verbose > 0)
2819 perror("Failed to write, and not due to blocking");
2820
2821 conn_set_state(c, conn_read);
2822 return TRANSMIT_HARD_ERROR;
2823 }
2824
2825
2826 /* Does a looped read to fill data chunks */
2827 /* TODO: restrict number of times this can loop.
2828 * Also, benchmark using readv's.
2829 */
2830 static int read_into_chunked_item(conn *c) {
2831 int total = 0;
2832 int res;
2833 assert(c->rcurr != c->ritem);
2834
2835 while (c->rlbytes > 0) {
2836 item_chunk *ch = (item_chunk *)c->ritem;
2837 if (ch->size == ch->used) {
2838 // FIXME: ch->next is currently always 0. remove this?
2839 if (ch->next) {
2840 c->ritem = (char *) ch->next;
2841 } else {
2842 /* Allocate next chunk. Binary protocol needs 2b for \r\n */
2843 c->ritem = (char *) do_item_alloc_chunk(ch, c->rlbytes +
2844 ((c->protocol == binary_prot) ? 2 : 0));
2845 if (!c->ritem) {
2846 // We failed an allocation. Let caller handle cleanup.
2847 total = -2;
2848 break;
2849 }
2850 // ritem has new chunk, restart the loop.
2851 continue;
2852 //assert(c->rlbytes == 0);
2853 }
2854 }
2855
2856 int unused = ch->size - ch->used;
2857 /* first check if we have leftovers in the conn_read buffer */
2858 if (c->rbytes > 0) {
2859 total = 0;
2860 int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
2861 tocopy = tocopy > unused ? unused : tocopy;
2862 if (c->ritem != c->rcurr) {
2863 memmove(ch->data + ch->used, c->rcurr, tocopy);
2864 }
2865 total += tocopy;
2866 c->rlbytes -= tocopy;
2867 c->rcurr += tocopy;
2868 c->rbytes -= tocopy;
2869 ch->used += tocopy;
2870 if (c->rlbytes == 0) {
2871 break;
2872 }
2873 } else {
2874 /* now try reading from the socket */
2875 res = c->read(c, ch->data + ch->used,
2876 (unused > c->rlbytes ? c->rlbytes : unused));
2877 if (res > 0) {
2878 pthread_mutex_lock(&c->thread->stats.mutex);
2879 c->thread->stats.bytes_read += res;
2880 pthread_mutex_unlock(&c->thread->stats.mutex);
2881 ch->used += res;
2882 total += res;
2883 c->rlbytes -= res;
2884 } else {
2885 /* Reset total to the latest result so caller can handle it */
2886 total = res;
2887 break;
2888 }
2889 }
2890 }
2891
2892 /* At some point I will be able to ditch the \r\n from item storage and
2893 remove all of these kludges.
2894 The above binprot check ensures inline space for \r\n, but if we do
2895 exactly enough allocs there will be no additional chunk for \r\n.
2896 */
2897 if (c->rlbytes == 0 && c->protocol == binary_prot && total >= 0) {
2898 item_chunk *ch = (item_chunk *)c->ritem;
2899 if (ch->size - ch->used < 2) {
2900 c->ritem = (char *) do_item_alloc_chunk(ch, 2);
2901 if (!c->ritem) {
2902 total = -2;
2903 }
2904 }
2905 }
2906 return total;
2907 }
2908
2909 static void drive_machine(conn *c) {
2910 bool stop = false;
2911 int sfd;
2912 socklen_t addrlen;
2913 struct sockaddr_storage addr;
2914 int nreqs = settings.reqs_per_event;
2915 int res;
2916 const char *str;
2917 #ifdef HAVE_ACCEPT4
2918 static int use_accept4 = 1;
2919 #else
2920 static int use_accept4 = 0;
2921 #endif
2922
2923 assert(c != NULL);
2924
2925 while (!stop) {
2926
2927 switch(c->state) {
2928 case conn_listening:
2929 addrlen = sizeof(addr);
2930 #ifdef HAVE_ACCEPT4
2931 if (use_accept4) {
2932 sfd = accept4(c->sfd, (struct sockaddr *)&addr, &addrlen, SOCK_NONBLOCK);
2933 } else {
2934 sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen);
2935 }
2936 #else
2937 sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen);
2938 #endif
2939 if (sfd == -1) {
2940 if (use_accept4 && errno == ENOSYS) {
2941 use_accept4 = 0;
2942 continue;
2943 }
2944 perror(use_accept4 ? "accept4()" : "accept()");
2945 if (errno == EAGAIN || errno == EWOULDBLOCK) {
2946 /* these are transient, so don't log anything */
2947 stop = true;
2948 } else if (errno == EMFILE) {
2949 if (settings.verbose > 0)
2950 fprintf(stderr, "Too many open connections\n");
2951 accept_new_conns(false);
2952 stop = true;
2953 } else {
2954 perror("accept()");
2955 stop = true;
2956 }
2957 break;
2958 }
2959 if (!use_accept4) {
2960 if (fcntl(sfd, F_SETFL, fcntl(sfd, F_GETFL) | O_NONBLOCK) < 0) {
2961 perror("setting O_NONBLOCK");
2962 close(sfd);
2963 break;
2964 }
2965 }
2966
2967 bool reject;
2968 if (settings.maxconns_fast) {
2969 reject = sfd >= settings.maxconns - 1;
2970 if (reject) {
2971 STATS_LOCK();
2972 stats.rejected_conns++;
2973 STATS_UNLOCK();
2974 }
2975 } else {
2976 reject = false;
2977 }
2978
2979 if (reject) {
2980 str = "ERROR Too many open connections\r\n";
2981 res = write(sfd, str, strlen(str));
2982 close(sfd);
2983 } else {
2984 void *ssl_v = NULL;
2985 #ifdef TLS
2986 SSL *ssl = NULL;
2987 if (c->ssl_enabled) {
2988 assert(IS_TCP(c->transport) && settings.ssl_enabled);
2989
2990 if (settings.ssl_ctx == NULL) {
2991 if (settings.verbose) {
2992 fprintf(stderr, "SSL context is not initialized\n");
2993 }
2994 close(sfd);
2995 break;
2996 }
2997 SSL_LOCK();
2998 ssl = SSL_new(settings.ssl_ctx);
2999 SSL_UNLOCK();
3000 if (ssl == NULL) {
3001 if (settings.verbose) {
3002 fprintf(stderr, "Failed to created the SSL object\n");
3003 }
3004 close(sfd);
3005 break;
3006 }
3007 SSL_set_fd(ssl, sfd);
3008 int ret = SSL_accept(ssl);
3009 if (ret <= 0) {
3010 int err = SSL_get_error(ssl, ret);
3011 if (err == SSL_ERROR_SYSCALL || err == SSL_ERROR_SSL) {
3012 if (settings.verbose) {
3013 fprintf(stderr, "SSL connection failed with error code : %d : %s\n", err, strerror(errno));
3014 }
3015 SSL_free(ssl);
3016 close(sfd);
3017 STATS_LOCK();
3018 stats.ssl_handshake_errors++;
3019 STATS_UNLOCK();
3020 break;
3021 }
3022 }
3023 }
3024 ssl_v = (void*) ssl;
3025 #endif
3026
3027 dispatch_conn_new(sfd, conn_new_cmd, EV_READ | EV_PERSIST,
3028 READ_BUFFER_CACHED, c->transport, ssl_v);
3029 }
3030
3031 stop = true;
3032 break;
3033
3034 case conn_waiting:
3035 rbuf_release(c);
3036 if (!update_event(c, EV_READ | EV_PERSIST)) {
3037 if (settings.verbose > 0)
3038 fprintf(stderr, "Couldn't update event\n");
3039 conn_set_state(c, conn_closing);
3040 break;
3041 }
3042
3043 conn_set_state(c, conn_read);
3044 stop = true;
3045 break;
3046
3047 case conn_read:
3048 if (!IS_UDP(c->transport)) {
3049 // Assign a read buffer if necessary.
3050 if (!rbuf_alloc(c)) {
3051 // TODO: Some way to allow for temporary failures.
3052 conn_set_state(c, conn_closing);
3053 break;
3054 }
3055 res = try_read_network(c);
3056 } else {
3057 // UDP connections always have a static buffer.
3058 res = try_read_udp(c);
3059 }
3060
3061 switch (res) {
3062 case READ_NO_DATA_RECEIVED:
3063 conn_set_state(c, conn_waiting);
3064 break;
3065 case READ_DATA_RECEIVED:
3066 conn_set_state(c, conn_parse_cmd);
3067 break;
3068 case READ_ERROR:
3069 conn_set_state(c, conn_closing);
3070 break;
3071 case READ_MEMORY_ERROR: /* Failed to allocate more memory */
3072 /* State already set by try_read_network */
3073 break;
3074 }
3075 break;
3076
3077 case conn_parse_cmd:
3078 c->noreply = false;
3079 if (c->try_read_command(c) == 0) {
3080 /* we need more data! */
3081 if (c->resp_head) {
3082 // Buffered responses waiting, flush in the meantime.
3083 conn_set_state(c, conn_mwrite);
3084 } else {
3085 conn_set_state(c, conn_waiting);
3086 }
3087 }
3088
3089 break;
3090
3091 case conn_new_cmd:
3092 /* Only process nreqs at a time to avoid starving other
3093 connections */
3094
3095 --nreqs;
3096 if (nreqs >= 0) {
3097 reset_cmd_handler(c);
3098 } else if (c->resp_head) {
3099 // flush response pipe on yield.
3100 conn_set_state(c, conn_mwrite);
3101 } else {
3102 pthread_mutex_lock(&c->thread->stats.mutex);
3103 c->thread->stats.conn_yields++;
3104 pthread_mutex_unlock(&c->thread->stats.mutex);
3105 if (c->rbytes > 0) {
3106 /* We have already read in data into the input buffer,
3107 so libevent will most likely not signal read events
3108 on the socket (unless more data is available. As a
3109 hack we should just put in a request to write data,
3110 because that should be possible ;-)
3111 */
3112 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
3113 if (settings.verbose > 0)
3114 fprintf(stderr, "Couldn't update event\n");
3115 conn_set_state(c, conn_closing);
3116 break;
3117 }
3118 }
3119 stop = true;
3120 }
3121 break;
3122
3123 case conn_nread:
3124 if (c->rlbytes == 0) {
3125 complete_nread(c);
3126 break;
3127 }
3128
3129 /* Check if rbytes < 0, to prevent crash */
3130 if (c->rlbytes < 0) {
3131 if (settings.verbose) {
3132 fprintf(stderr, "Invalid rlbytes to read: len %d\n", c->rlbytes);
3133 }
3134 conn_set_state(c, conn_closing);
3135 break;
3136 }
3137
3138 if (c->item_malloced || ((((item *)c->item)->it_flags & ITEM_CHUNKED) == 0) ) {
3139 /* first check if we have leftovers in the conn_read buffer */
3140 if (c->rbytes > 0) {
3141 int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
3142 memmove(c->ritem, c->rcurr, tocopy);
3143 c->ritem += tocopy;
3144 c->rlbytes -= tocopy;
3145 c->rcurr += tocopy;
3146 c->rbytes -= tocopy;
3147 if (c->rlbytes == 0) {
3148 break;
3149 }
3150 }
3151
3152 /* now try reading from the socket */
3153 res = c->read(c, c->ritem, c->rlbytes);
3154 if (res > 0) {
3155 pthread_mutex_lock(&c->thread->stats.mutex);
3156 c->thread->stats.bytes_read += res;
3157 pthread_mutex_unlock(&c->thread->stats.mutex);
3158 if (c->rcurr == c->ritem) {
3159 c->rcurr += res;
3160 }
3161 c->ritem += res;
3162 c->rlbytes -= res;
3163 break;
3164 }
3165 } else {
3166 res = read_into_chunked_item(c);
3167 if (res > 0)
3168 break;
3169 }
3170
3171 if (res == 0) { /* end of stream */
3172 c->close_reason = NORMAL_CLOSE;
3173 conn_set_state(c, conn_closing);
3174 break;
3175 }
3176
3177 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
3178 if (!update_event(c, EV_READ | EV_PERSIST)) {
3179 if (settings.verbose > 0)
3180 fprintf(stderr, "Couldn't update event\n");
3181 conn_set_state(c, conn_closing);
3182 break;
3183 }
3184 stop = true;
3185 break;
3186 }
3187
3188 /* Memory allocation failure */
3189 if (res == -2) {
3190 out_of_memory(c, "SERVER_ERROR Out of memory during read");
3191 c->sbytes = c->rlbytes;
3192 conn_set_state(c, conn_swallow);
3193 // Ensure this flag gets cleared. It gets killed on conn_new()
3194 // so any conn_closing is fine, calling complete_nread is
3195 // fine. This swallow semms to be the only other case.
3196 c->set_stale = false;
3197 c->mset_res = false;
3198 break;
3199 }
3200 /* otherwise we have a real error, on which we close the connection */
3201 if (settings.verbose > 0) {
3202 fprintf(stderr, "Failed to read, and not due to blocking:\n"
3203 "errno: %d %s \n"
3204 "rcurr=%p ritem=%p rbuf=%p rlbytes=%d rsize=%d\n",
3205 errno, strerror(errno),
3206 (void *)c->rcurr, (void *)c->ritem, (void *)c->rbuf,
3207 (int)c->rlbytes, (int)c->rsize);
3208 }
3209 conn_set_state(c, conn_closing);
3210 break;
3211
3212 case conn_swallow:
3213 /* we are reading sbytes and throwing them away */
3214 if (c->sbytes <= 0) {
3215 conn_set_state(c, conn_new_cmd);
3216 break;
3217 }
3218
3219 /* first check if we have leftovers in the conn_read buffer */
3220 if (c->rbytes > 0) {
3221 int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
3222 c->sbytes -= tocopy;
3223 c->rcurr += tocopy;
3224 c->rbytes -= tocopy;
3225 break;
3226 }
3227
3228 /* now try reading from the socket */
3229 res = c->read(c, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize);
3230 if (res > 0) {
3231 pthread_mutex_lock(&c->thread->stats.mutex);
3232 c->thread->stats.bytes_read += res;
3233 pthread_mutex_unlock(&c->thread->stats.mutex);
3234 c->sbytes -= res;
3235 break;
3236 }
3237 if (res == 0) { /* end of stream */
3238 c->close_reason = NORMAL_CLOSE;
3239 conn_set_state(c, conn_closing);
3240 break;
3241 }
3242 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
3243 if (!update_event(c, EV_READ | EV_PERSIST)) {
3244 if (settings.verbose > 0)
3245 fprintf(stderr, "Couldn't update event\n");
3246 conn_set_state(c, conn_closing);
3247 break;
3248 }
3249 stop = true;
3250 break;
3251 }
3252 /* otherwise we have a real error, on which we close the connection */
3253 if (settings.verbose > 0)
3254 fprintf(stderr, "Failed to read, and not due to blocking\n");
3255 conn_set_state(c, conn_closing);
3256 break;
3257
3258 case conn_write:
3259 case conn_mwrite:
3260 /* have side IO's that must process before transmit() can run.
3261 * remove the connection from the worker thread and dispatch the
3262 * IO queue
3263 */
3264 assert(c->io_queues_submitted == 0);
3265
3266 for (io_queue_t *q = c->io_queues; q->type != IO_QUEUE_NONE; q++) {
3267 if (q->stack_ctx != NULL) {
3268 io_queue_cb_t *qcb = thread_io_queue_get(c->thread, q->type);
3269 qcb->submit_cb(q);
3270 c->io_queues_submitted++;
3271 }
3272 }
3273 if (c->io_queues_submitted != 0) {
3274 conn_set_state(c, conn_io_queue);
3275 event_del(&c->event);
3276
3277 stop = true;
3278 break;
3279 }
3280
3281 switch (!IS_UDP(c->transport) ? transmit(c) : transmit_udp(c)) {
3282 case TRANSMIT_COMPLETE:
3283 if (c->state == conn_mwrite) {
3284 // Free up IO wraps and any half-uploaded items.
3285 conn_release_items(c);
3286 conn_set_state(c, conn_new_cmd);
3287 if (c->close_after_write) {
3288 conn_set_state(c, conn_closing);
3289 }
3290 } else {
3291 if (settings.verbose > 0)
3292 fprintf(stderr, "Unexpected state %d\n", c->state);
3293 conn_set_state(c, conn_closing);
3294 }
3295 break;
3296
3297 case TRANSMIT_INCOMPLETE:
3298 case TRANSMIT_HARD_ERROR:
3299 break; /* Continue in state machine. */
3300
3301 case TRANSMIT_SOFT_ERROR:
3302 stop = true;
3303 break;
3304 }
3305 break;
3306
3307 case conn_closing:
3308 if IS_UDP(c->transport)
3309 conn_cleanup(c);
3310 else
3311 conn_close(c);
3312 stop = true;
3313 break;
3314
3315 case conn_closed:
3316 /* This only happens if dormando is an idiot. */
3317 abort();
3318 break;
3319
3320 case conn_watch:
3321 /* We handed off our connection to the logger thread. */
3322 stop = true;
3323 break;
3324 case conn_io_queue:
3325 /* Complete our queued IO's from within the worker thread. */
3326 conn_io_queue_complete(c);
3327 conn_set_state(c, conn_mwrite);
3328 break;
3329 case conn_max_state:
3330 assert(false);
3331 break;
3332 }
3333 }
3334
3335 return;
3336 }
3337
3338 void event_handler(const evutil_socket_t fd, const short which, void *arg) {
3339 conn *c;
3340
3341 c = (conn *)arg;
3342 assert(c != NULL);
3343
3344 c->which = which;
3345
3346 /* sanity */
3347 if (fd != c->sfd) {
3348 if (settings.verbose > 0)
3349 fprintf(stderr, "Catastrophic: event fd doesn't match conn fd!\n");
3350 conn_close(c);
3351 return;
3352 }
3353
3354 drive_machine(c);
3355
3356 /* wait for next event */
3357 return;
3358 }
3359
3360 static int new_socket(struct addrinfo *ai) {
3361 int sfd;
3362 int flags;
3363
3364 if ((sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) == -1) {
3365 return -1;
3366 }
3367
3368 if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 ||
3369 fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) {
3370 perror("setting O_NONBLOCK");
3371 close(sfd);
3372 return -1;
3373 }
3374 return sfd;
3375 }
3376
3377
3378 /*
3379 * Sets a socket's send buffer size to the maximum allowed by the system.
3380 */
3381 static void maximize_sndbuf(const int sfd) {
3382 socklen_t intsize = sizeof(int);
3383 int last_good = 0;
3384 int min, max, avg;
3385 int old_size;
3386
3387 /* Start with the default size. */
3388 #ifdef _WIN32
3389 if (getsockopt((SOCKET)sfd, SOL_SOCKET, SO_SNDBUF, (char *)&old_size, &intsize) != 0) {
3390 #else
3391 if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, &old_size, &intsize) != 0) {
3392 #endif /* #ifdef _WIN32 */
3393 if (settings.verbose > 0)
3394 perror("getsockopt(SO_SNDBUF)");
3395 return;
3396 }
3397
3398 /* Binary-search for the real maximum. */
3399 min = old_size;
3400 max = MAX_SENDBUF_SIZE;
3401
3402 while (min <= max) {
3403 avg = ((unsigned int)(min + max)) / 2;
3404 if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) {
3405 last_good = avg;
3406 min = avg + 1;
3407 } else {
3408 max = avg - 1;
3409 }
3410 }
3411
3412 if (settings.verbose > 1)
3413 fprintf(stderr, "<%d send buffer was %d, now %d\n", sfd, old_size, last_good);
3414 }
3415
3416 /**
3417 * Create a socket and bind it to a specific port number
3418 * @param interface the interface to bind to
3419 * @param port the port number to bind to
3420 * @param transport the transport protocol (TCP / UDP)
3421 * @param portnumber_file A filepointer to write the port numbers to
3422 * when they are successfully added to the list of ports we
3423 * listen on.
3424 */
3425 static int server_socket(const char *interface,
3426 int port,
3427 enum network_transport transport,
3428 FILE *portnumber_file, bool ssl_enabled) {
3429 int sfd;
3430 struct linger ling = {0, 0};
3431 struct addrinfo *ai;
3432 struct addrinfo *next;
3433 struct addrinfo hints = { .ai_flags = AI_PASSIVE,
3434 .ai_family = AF_UNSPEC };
3435 char port_buf[NI_MAXSERV];
3436 int error;
3437 int success = 0;
3438 int flags =1;
3439
3440 hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM;
3441
3442 if (port == -1) {
3443 port = 0;
3444 }
3445 snprintf(port_buf, sizeof(port_buf), "%d", port);
3446 error= getaddrinfo(interface, port_buf, &hints, &ai);
3447 if (error != 0) {
3448 if (error != EAI_SYSTEM)
3449 fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error));
3450 else
3451 perror("getaddrinfo()");
3452 return 1;
3453 }
3454
3455 for (next= ai; next; next= next->ai_next) {
3456 conn *listen_conn_add;
3457 if ((sfd = new_socket(next)) == -1) {
3458 /* getaddrinfo can return "junk" addresses,
3459 * we make sure at least one works before erroring.
3460 */
3461 if (errno == EMFILE) {
3462 /* ...unless we're out of fds */
3463 perror("server_socket");
3464 exit(EX_OSERR);
3465 }
3466 continue;
3467 }
3468
3469 if (settings.num_napi_ids) {
3470 socklen_t len = sizeof(socklen_t);
3471 int napi_id;
3472 error = getsockopt(sfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len);
3473 if (error != 0) {
3474 fprintf(stderr, "-N <num_napi_ids> option not supported\n");
3475 exit(EXIT_FAILURE);
3476 }
3477 }
3478
3479 #ifdef IPV6_V6ONLY
3480 if (next->ai_family == AF_INET6) {
3481 error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags));
3482 if (error != 0) {
3483 perror("setsockopt");
3484 close(sfd);
3485 continue;
3486 }
3487 }
3488 #endif
3489
3490 setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
3491 if IS_UDP(transport) {
3492 maximize_sndbuf(sfd);
3493 } else {
3494 error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
3495 if (error != 0)
3496 perror("setsockopt");
3497
3498 error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
3499 if (error != 0)
3500 perror("setsockopt");
3501
3502 error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));
3503 if (error != 0)
3504 perror("setsockopt");
3505 }
3506
3507 if (bind(sfd, next->ai_addr, next->ai_addrlen) == -1) {
3508 if (errno != EADDRINUSE) {
3509 perror("bind()");
3510 close(sfd);
3511 freeaddrinfo(ai);
3512 return 1;
3513 }
3514 close(sfd);
3515 continue;
3516 } else {
3517 success++;
3518 if (!IS_UDP(transport) && listen(sfd, settings.backlog) == -1) {
3519 perror("listen()");
3520 close(sfd);
3521 freeaddrinfo(ai);
3522 return 1;
3523 }
3524 if (portnumber_file != NULL &&
3525 (next->ai_addr->sa_family == AF_INET ||
3526 next->ai_addr->sa_family == AF_INET6)) {
3527 union {
3528 struct sockaddr_in in;
3529 struct sockaddr_in6 in6;
3530 } my_sockaddr;
3531 socklen_t len = sizeof(my_sockaddr);
3532 if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==0) {
3533 if (next->ai_addr->sa_family == AF_INET) {
3534 fprintf(portnumber_file, "%s INET: %u\n",
3535 IS_UDP(transport) ? "UDP" : "TCP",
3536 ntohs(my_sockaddr.in.sin_port));
3537 } else {
3538 fprintf(portnumber_file, "%s INET6: %u\n",
3539 IS_UDP(transport) ? "UDP" : "TCP",
3540 ntohs(my_sockaddr.in6.sin6_port));
3541 }
3542 }
3543 }
3544 }
3545
3546 if IS_UDP(transport) {
3547 int c;
3548
3549 for (c = 0; c < settings.num_threads_per_udp; c++) {
3550 /* Allocate one UDP file descriptor per worker thread;
3551 * this allows "stats conns" to separately list multiple
3552 * parallel UDP requests in progress.
3553 *
3554 * The dispatch code round-robins new connection requests
3555 * among threads, so this is guaranteed to assign one
3556 * FD to each thread.
3557 */
3558 int per_thread_fd;
3559 if (c == 0) {
3560 per_thread_fd = sfd;
3561 } else {
3562 per_thread_fd = dup(sfd);
3563 if (per_thread_fd < 0) {
3564 perror("Failed to duplicate file descriptor");
3565 exit(EXIT_FAILURE);
3566 }
3567 }
3568 dispatch_conn_new(per_thread_fd, conn_read,
3569 EV_READ | EV_PERSIST,
3570 UDP_READ_BUFFER_SIZE, transport, NULL);
3571 }
3572 } else {
3573 if (!(listen_conn_add = conn_new(sfd, conn_listening,
3574 EV_READ | EV_PERSIST, 1,
3575 transport, main_base, NULL))) {
3576 fprintf(stderr, "failed to create listening connection\n");
3577 exit(EXIT_FAILURE);
3578 }
3579 #ifdef TLS
3580 listen_conn_add->ssl_enabled = ssl_enabled;
3581 #else
3582 assert(ssl_enabled == false);
3583 #endif
3584 listen_conn_add->next = listen_conn;
3585 listen_conn = listen_conn_add;
3586 }
3587 }
3588
3589 freeaddrinfo(ai);
3590
3591 /* Return zero iff we detected no errors in starting up connections */
3592 return success == 0;
3593 }
3594
3595 static int server_sockets(int port, enum network_transport transport,
3596 FILE *portnumber_file) {
3597 bool ssl_enabled = false;
3598
3599 #ifdef TLS
3600 const char *notls = "notls";
3601 ssl_enabled = settings.ssl_enabled;
3602 #endif
3603
3604 if (settings.inter == NULL) {
3605 return server_socket(settings.inter, port, transport, portnumber_file, ssl_enabled);
3606 } else {
3607 // tokenize them and bind to each one of them..
3608 char *b;
3609 int ret = 0;
3610 char *list = strdup(settings.inter);
3611
3612 if (list == NULL) {
3613 fprintf(stderr, "Failed to allocate memory for parsing server interface string\n");
3614 return 1;
3615 }
3616 for (char *p = strtok_r(list, ";,", &b);
3617 p != NULL;
3618 p = strtok_r(NULL, ";,", &b)) {
3619 int the_port = port;
3620 #ifdef TLS
3621 ssl_enabled = settings.ssl_enabled;
3622 // "notls" option is valid only when memcached is run with SSL enabled.
3623 if (strncmp(p, notls, strlen(notls)) == 0) {
3624 if (!settings.ssl_enabled) {
3625 fprintf(stderr, "'notls' option is valid only when SSL is enabled\n");
3626 free(list);
3627 return 1;
3628 }
3629 ssl_enabled = false;
3630 p += strlen(notls) + 1;
3631 }
3632 #endif
3633
3634 char *h = NULL;
3635 if (*p == '[') {
3636 // expecting it to be an IPv6 address enclosed in []
3637 // i.e. RFC3986 style recommended by RFC5952
3638 char *e = strchr(p, ']');
3639 if (e == NULL) {
3640 fprintf(stderr, "Invalid IPV6 address: \"%s\"", p);
3641 free(list);
3642 return 1;
3643 }
3644 h = ++p; // skip the opening '['
3645 *e = '\0';
3646 p = ++e; // skip the closing ']'
3647 }
3648
3649 char *s = strchr(p, ':');
3650 if (s != NULL) {
3651 // If no more semicolons - attempt to treat as port number.
3652 // Otherwise the only valid option is an unenclosed IPv6 without port, until
3653 // of course there was an RFC3986 IPv6 address previously specified -
3654 // in such a case there is no good option, will just send it to fail as port number.
3655 if (strchr(s + 1, ':') == NULL || h != NULL) {
3656 *s = '\0';
3657 ++s;
3658 if (!safe_strtol(s, &the_port)) {
3659 fprintf(stderr, "Invalid port number: \"%s\"", s);
3660 free(list);
3661 return 1;
3662 }
3663 }
3664 }
3665
3666 if (h != NULL)
3667 p = h;
3668
3669 if (strcmp(p, "*") == 0) {
3670 p = NULL;
3671 }
3672 ret |= server_socket(p, the_port, transport, portnumber_file, ssl_enabled);
3673 }
3674 free(list);
3675 return ret;
3676 }
3677 }
3678
3679 #ifndef DISABLE_UNIX_SOCKET
3680 static int new_socket_unix(void) {
3681 int sfd;
3682 int flags;
3683
3684 if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
3685 perror("socket()");
3686 return -1;
3687 }
3688
3689 if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 ||
3690 fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) {
3691 perror("setting O_NONBLOCK");
3692 close(sfd);
3693 return -1;
3694 }
3695 return sfd;
3696 }
3697
3698 static int server_socket_unix(const char *path, int access_mask) {
3699 int sfd;
3700 struct linger ling = {0, 0};
3701 struct sockaddr_un addr;
3702 struct stat tstat;
3703 int flags =1;
3704 int old_umask;
3705
3706 if (!path) {
3707 return 1;
3708 }
3709
3710 if ((sfd = new_socket_unix()) == -1) {
3711 return 1;
3712 }
3713
3714 /*
3715 * Clean up a previous socket file if we left it around
3716 */
3717 if (lstat(path, &tstat) == 0) {
3718 if (S_ISSOCK(tstat.st_mode))
3719 unlink(path);
3720 }
3721
3722 setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
3723 setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
3724 setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
3725
3726 /*
3727 * the memset call clears nonstandard fields in some implementations
3728 * that otherwise mess things up.
3729 */
3730 memset(&addr, 0, sizeof(addr));
3731
3732 addr.sun_family = AF_UNIX;
3733 strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
3734 assert(strcmp(addr.sun_path, path) == 0);
3735 old_umask = umask( ~(access_mask&0777));
3736 if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
3737 perror("bind()");
3738 close(sfd);
3739 umask(old_umask);
3740 return 1;
3741 }
3742 umask(old_umask);
3743 if (listen(sfd, settings.backlog) == -1) {
3744 perror("listen()");
3745 close(sfd);
3746 return 1;
3747 }
3748 if (!(listen_conn = conn_new(sfd, conn_listening,
3749 EV_READ | EV_PERSIST, 1,
3750 local_transport, main_base, NULL))) {
3751 fprintf(stderr, "failed to create listening connection\n");
3752 exit(EXIT_FAILURE);
3753 }
3754
3755 return 0;
3756 }
3757 #else
3758 #define server_socket_unix(path, access_mask) -1
3759 #endif /* #ifndef DISABLE_UNIX_SOCKET */
3760
3761 /*
3762 * We keep the current time of day in a global variable that's updated by a
3763 * timer event. This saves us a bunch of time() system calls (we really only
3764 * need to get the time once a second, whereas there can be tens of thousands
3765 * of requests a second) and allows us to use server-start-relative timestamps
3766 * rather than absolute UNIX timestamps, a space savings on systems where
3767 * sizeof(time_t) > sizeof(unsigned int).
3768 */
3769 volatile rel_time_t current_time;
3770 static struct event clockevent;
3771 #ifdef MEMCACHED_DEBUG
3772 volatile bool is_paused;
3773 volatile int64_t delta;
3774 #endif
3775 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
3776 static bool monotonic = false;
3777 static int64_t monotonic_start;
3778 #endif
3779
3780 /* libevent uses a monotonic clock when available for event scheduling. Aside
3781 * from jitter, simply ticking our internal timer here is accurate enough.
3782 * Note that users who are setting explicit dates for expiration times *must*
3783 * ensure their clocks are correct before starting memcached. */
3784 static void clock_handler(const evutil_socket_t fd, const short which, void *arg) {
3785 struct timeval t = {.tv_sec = 1, .tv_usec = 0};
3786 static bool initialized = false;
3787
3788 if (initialized) {
3789 /* only delete the event if it's actually there. */
3790 evtimer_del(&clockevent);
3791 } else {
3792 initialized = true;
3793 }
3794
3795 // While we're here, check for hash table expansion.
3796 // This function should be quick to avoid delaying the timer.
3797 assoc_start_expand(stats_state.curr_items);
3798 // also, if HUP'ed we need to do some maintenance.
3799 // for now that's just the authfile reload.
3800 if (settings.sig_hup) {
3801 settings.sig_hup = false;
3802
3803 authfile_load(settings.auth_file);
3804 }
3805
3806 evtimer_set(&clockevent, clock_handler, 0);
3807 event_base_set(main_base, &clockevent);
3808 evtimer_add(&clockevent, &t);
3809
3810 #ifdef MEMCACHED_DEBUG
3811 if (is_paused) return;
3812 #endif
3813
3814 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
3815 if (monotonic) {
3816 struct timespec ts;
3817 if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
3818 return;
3819 #ifdef MEMCACHED_DEBUG
3820 current_time = (rel_time_t) (ts.tv_sec - monotonic_start + delta);
3821 #else
3822 current_time = (rel_time_t) (ts.tv_sec - monotonic_start);
3823 #endif
3824 return;
3825 }
3826 #endif
3827 {
3828 struct timeval tv;
3829 gettimeofday(&tv, NULL);
3830 #ifdef MEMCACHED_DEBUG
3831 current_time = (rel_time_t) (tv.tv_sec - process_started + delta);
3832 #else
3833 current_time = (rel_time_t) (tv.tv_sec - process_started);
3834 #endif
3835 }
3836 }
3837
3838 static const char* flag_enabled_disabled(bool flag) {
3839 return (flag ? "enabled" : "disabled");
3840 }
3841
3842 static void verify_default(const char* param, bool condition) {
3843 if (!condition) {
3844 printf("Default value of [%s] has changed."
3845 " Modify the help text and default value check.\n", param);
3846 exit(EXIT_FAILURE);
3847 }
3848 }
3849
3850 static void usage(void) {
3851 printf(PACKAGE " " VERSION "\n");
3852 printf("-p, --port=<num> TCP port to listen on (default: %d)\n"
3853 "-U, --udp-port=<num> UDP port to listen on (default: %d, off)\n",
3854 settings.port, settings.udpport);
3855 #ifndef DISABLE_UNIX_SOCKET
3856 printf("-s, --unix-socket=<file> UNIX socket to listen on (disables network support)\n");
3857 printf("-a, --unix-mask=<mask> access mask for UNIX socket, in octal (default: %o)\n",
3858 settings.access);
3859 #endif /* #ifndef DISABLE_UNIX_SOCKET */
3860 printf("-A, --enable-shutdown enable ascii \"shutdown\" command\n");
3861 printf("-l, --listen=<addr> interface to listen on (default: INADDR_ANY)\n");
3862 #ifdef TLS
3863 printf(" if TLS/SSL is enabled, 'notls' prefix can be used to\n"
3864 " disable for specific listeners (-l notls:<ip>:<port>) \n");
3865 #endif
3866 printf("-d, --daemon run as a daemon\n"
3867 "-r, --enable-coredumps maximize core file limit\n"
3868 "-u, --user=<user> assume identity of <username> (only when run as root)\n"
3869 "-m, --memory-limit=<num> item memory in megabytes (default: %lu)\n"
3870 "-M, --disable-evictions return error on memory exhausted instead of evicting\n"
3871 "-c, --conn-limit=<num> max simultaneous connections (default: %d)\n"
3872 "-k, --lock-memory lock down all paged memory\n"
3873 "-v, --verbose verbose (print errors/warnings while in event loop)\n"
3874 "-vv very verbose (also print client commands/responses)\n"
3875 "-vvv extremely verbose (internal state transitions)\n"
3876 "-h, --help print this help and exit\n"
3877 "-i, --license print memcached and libevent license\n"
3878 "-V, --version print version and exit\n"
3879 "-P, --pidfile=<file> save PID in <file>, only used with -d option\n"
3880 "-f, --slab-growth-factor=<num> chunk size growth factor (default: %2.2f)\n"
3881 "-n, --slab-min-size=<bytes> min space used for key+value+flags (default: %d)\n",
3882 (unsigned long) settings.maxbytes / (1 << 20),
3883 settings.maxconns, settings.factor, settings.chunk_size);
3884 verify_default("udp-port",settings.udpport == 0);
3885 printf("-L, --enable-largepages try to use large memory pages (if available)\n");
3886 printf("-D <char> Use <char> as the delimiter between key prefixes and IDs.\n"
3887 " This is used for per-prefix stats reporting. The default is\n"
3888 " \"%c\" (colon). If this option is specified, stats collection\n"
3889 " is turned on automatically; if not, then it may be turned on\n"
3890 " by sending the \"stats detail on\" command to the server.\n",
3891 settings.prefix_delimiter);
3892 printf("-t, --threads=<num> number of threads to use (default: %d)\n", settings.num_threads);
3893 printf("-R, --max-reqs-per-event maximum number of requests per event, limits the\n"
3894 " requests processed per connection to prevent \n"
3895 " starvation (default: %d)\n", settings.reqs_per_event);
3896 printf("-C, --disable-cas disable use of CAS\n");
3897 printf("-b, --listen-backlog=<num> set the backlog queue limit (default: %d)\n", settings.backlog);
3898 printf("-B, --protocol=<name> protocol - one of ascii, binary, or auto (default: %s)\n",
3899 prot_text(settings.binding_protocol));
3900 printf("-I, --max-item-size=<num> adjusts max item size\n"
3901 " (default: %dm, min: %dk, max: %dm)\n",
3902 settings.item_size_max/ (1 << 20), ITEM_SIZE_MAX_LOWER_LIMIT / (1 << 10), ITEM_SIZE_MAX_UPPER_LIMIT / (1 << 20));
3903 #ifdef ENABLE_SASL
3904 printf("-S, --enable-sasl turn on Sasl authentication\n");
3905 #endif
3906 printf("-F, --disable-flush-all disable flush_all command\n");
3907 printf("-X, --disable-dumping disable stats cachedump and lru_crawler metadump\n");
3908 printf("-W --disable-watch disable watch commands (live logging)\n");
3909 printf("-Y, --auth-file=<file> (EXPERIMENTAL) enable ASCII protocol authentication. format:\n"
3910 " user:pass\\nuser2:pass2\\n\n");
3911 printf("-e, --memory-file=<file> (EXPERIMENTAL) mmap a file for item memory.\n"
3912 " use only in ram disks or persistent memory mounts!\n"
3913 " enables restartable cache (stop with SIGUSR1)\n");
3914 #ifdef TLS
3915 printf("-Z, --enable-ssl enable TLS/SSL\n");
3916 #endif
3917 printf("-o, --extended comma separated list of extended options\n"
3918 " most options have a 'no_' prefix to disable\n"
3919 " - maxconns_fast: immediately close new connections after limit (default: %s)\n"
3920 " - hashpower: an integer multiplier for how large the hash\n"
3921 " table should be. normally grows at runtime. (default starts at: %d)\n"
3922 " set based on \"STAT hash_power_level\"\n"
3923 " - tail_repair_time: time in seconds for how long to wait before\n"
3924 " forcefully killing LRU tail item.\n"
3925 " disabled by default; very dangerous option.\n"
3926 " - hash_algorithm: the hash table algorithm\n"
3927 " default is murmur3 hash. options: jenkins, murmur3, xxh3\n"
3928 " - no_lru_crawler: disable LRU Crawler background thread.\n"
3929 " - lru_crawler_sleep: microseconds to sleep between items\n"
3930 " default is %d.\n"
3931 " - lru_crawler_tocrawl: max items to crawl per slab per run\n"
3932 " default is %u (unlimited)\n",
3933 flag_enabled_disabled(settings.maxconns_fast), settings.hashpower_init,
3934 settings.lru_crawler_sleep, settings.lru_crawler_tocrawl);
3935 printf(" - read_buf_mem_limit: limit in megabytes for connection read/response buffers.\n"
3936 " do not adjust unless you have high (20k+) conn. limits.\n"
3937 " 0 means unlimited (default: %u)\n",
3938 settings.read_buf_mem_limit);
3939 verify_default("read_buf_mem_limit", settings.read_buf_mem_limit == 0);
3940 printf(" - no_lru_maintainer: disable new LRU system + background thread.\n"
3941 " - hot_lru_pct: pct of slab memory to reserve for hot lru.\n"
3942 " (requires lru_maintainer, default pct: %d)\n"
3943 " - warm_lru_pct: pct of slab memory to reserve for warm lru.\n"
3944 " (requires lru_maintainer, default pct: %d)\n"
3945 " - hot_max_factor: items idle > cold lru age * drop from hot lru. (default: %.2f)\n"
3946 " - warm_max_factor: items idle > cold lru age * this drop from warm. (default: %.2f)\n"
3947 " - temporary_ttl: TTL's below get separate LRU, can't be evicted.\n"
3948 " (requires lru_maintainer, default: %d)\n"
3949 " - idle_timeout: timeout for idle connections. (default: %d, no timeout)\n",
3950 settings.hot_lru_pct, settings.warm_lru_pct, settings.hot_max_factor, settings.warm_max_factor,
3951 settings.temporary_ttl, settings.idle_timeout);
3952 printf(" - slab_chunk_max: (EXPERIMENTAL) maximum slab size in kilobytes. use extreme care. (default: %d)\n"
3953 " - watcher_logbuf_size: size in kilobytes of per-watcher write buffer. (default: %u)\n"
3954 " - worker_logbuf_size: size in kilobytes of per-worker-thread buffer\n"
3955 " read by background thread, then written to watchers. (default: %u)\n"
3956 " - track_sizes: enable dynamic reports for 'stats sizes' command.\n"
3957 " - no_hashexpand: disables hash table expansion (dangerous)\n"
3958 " - modern: enables options which will be default in future.\n"
3959 " currently: nothing\n"
3960 " - no_modern: uses defaults of previous major version (1.4.x)\n",
3961 settings.slab_chunk_size_max / (1 << 10), settings.logger_watcher_buf_size / (1 << 10),
3962 settings.logger_buf_size / (1 << 10));
3963 verify_default("tail_repair_time", settings.tail_repair_time == TAIL_REPAIR_TIME_DEFAULT);
3964 verify_default("lru_crawler_tocrawl", settings.lru_crawler_tocrawl == 0);
3965 verify_default("idle_timeout", settings.idle_timeout == 0);
3966 #ifdef HAVE_DROP_PRIVILEGES
3967 printf(" - drop_privileges: enable dropping extra syscall privileges\n"
3968 " - no_drop_privileges: disable drop_privileges in case it causes issues with\n"
3969 " some customisation.\n"
3970 " (default is no_drop_privileges)\n");
3971 verify_default("drop_privileges", !settings.drop_privileges);
3972 #ifdef MEMCACHED_DEBUG
3973 printf(" - relaxed_privileges: running tests requires extra privileges. (default: %s)\n",
3974 flag_enabled_disabled(settings.relaxed_privileges));
3975 #endif
3976 #endif
3977 #ifdef EXTSTORE
3978 printf("\n - External storage (ext_*) related options (see: https://memcached.org/extstore)\n");
3979 printf(" - ext_path: file to write to for external storage.\n"
3980 " ie: ext_path=/mnt/d1/extstore:1G\n"
3981 " - ext_page_size: size in megabytes of storage pages. (default: %u)\n"
3982 " - ext_wbuf_size: size in megabytes of page write buffers. (default: %u)\n"
3983 " - ext_threads: number of IO threads to run. (default: %u)\n"
3984 " - ext_item_size: store items larger than this (bytes, default %u)\n"
3985 " - ext_item_age: store items idle at least this long (seconds, default: no age limit)\n"
3986 " - ext_low_ttl: consider TTLs lower than this specially (default: %u)\n"
3987 " - ext_drop_unread: don't re-write unread values during compaction (default: %s)\n"
3988 " - ext_recache_rate: recache an item every N accesses (default: %u)\n"
3989 " - ext_compact_under: compact when fewer than this many free pages\n"
3990 " (default: 1/4th of the assigned storage)\n"
3991 " - ext_drop_under: drop COLD items when fewer than this many free pages\n"
3992 " (default: 1/4th of the assigned storage)\n"
3993 " - ext_max_frag: max page fragmentation to tolerate (default: %.2f)\n"
3994 " - slab_automove_freeratio: ratio of memory to hold free as buffer.\n"
3995 " (see doc/storage.txt for more info, default: %.3f)\n",
3996 settings.ext_page_size / (1 << 20), settings.ext_wbuf_size / (1 << 20), settings.ext_io_threadcount,
3997 settings.ext_item_size, settings.ext_low_ttl,
3998 flag_enabled_disabled(settings.ext_drop_unread), settings.ext_recache_rate,
3999 settings.ext_max_frag, settings.slab_automove_freeratio);
4000 verify_default("ext_item_age", settings.ext_item_age == UINT_MAX);
4001 #endif
4002 #ifdef TLS
4003 printf(" - ssl_chain_cert: certificate chain file in PEM format\n"
4004 " - ssl_key: private key, if not part of the -ssl_chain_cert\n"
4005 " - ssl_keyformat: private key format (PEM, DER or ENGINE) (default: PEM)\n");
4006 printf(" - ssl_verify_mode: peer certificate verification mode, default is 0(None).\n"
4007 " valid values are 0(None), 1(Request), 2(Require)\n"
4008 " or 3(Once)\n");
4009 printf(" - ssl_ciphers: specify cipher list to be used\n"
4010 " - ssl_ca_cert: PEM format file of acceptable client CA's\n"
4011 " - ssl_wbuf_size: size in kilobytes of per-connection SSL output buffer\n"
4012 " (default: %u)\n", settings.ssl_wbuf_size / (1 << 10));
4013 printf(" - ssl_session_cache: enable server-side SSL session cache, to support session\n"
4014 " resumption\n"
4015 " - ssl_min_version: minimum protocol version to accept (default: %s)\n"
4016 #if OPENSSL_VERSION_NUMBER >= 0x10101000L
4017 " valid values are 0(%s), 1(%s), 2(%s), or 3(%s).\n",
4018 ssl_proto_text(settings.ssl_min_version),
4019 ssl_proto_text(TLS1_VERSION), ssl_proto_text(TLS1_1_VERSION),
4020 ssl_proto_text(TLS1_2_VERSION), ssl_proto_text(TLS1_3_VERSION));
4021 #else
4022 " valid values are 0(%s), 1(%s), or 2(%s).\n",
4023 ssl_proto_text(settings.ssl_min_version),
4024 ssl_proto_text(TLS1_VERSION), ssl_proto_text(TLS1_1_VERSION),
4025 ssl_proto_text(TLS1_2_VERSION));
4026 #endif
4027 verify_default("ssl_keyformat", settings.ssl_keyformat == SSL_FILETYPE_PEM);
4028 verify_default("ssl_verify_mode", settings.ssl_verify_mode == SSL_VERIFY_NONE);
4029 verify_default("ssl_min_version", settings.ssl_min_version == TLS1_2_VERSION);
4030 #endif
4031 printf("-N, --napi_ids number of napi ids. see doc/napi_ids.txt for more details\n");
4032 return;
4033 }
4034
4035 static void usage_license(void) {
4036 printf(PACKAGE " " VERSION "\n\n");
4037 printf(
4038 "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n"
4039 "All rights reserved.\n"
4040 "\n"
4041 "Redistribution and use in source and binary forms, with or without\n"
4042 "modification, are permitted provided that the following conditions are\n"
4043 "met:\n"
4044 "\n"
4045 " * Redistributions of source code must retain the above copyright\n"
4046 "notice, this list of conditions and the following disclaimer.\n"
4047 "\n"
4048 " * Redistributions in binary form must reproduce the above\n"
4049 "copyright notice, this list of conditions and the following disclaimer\n"
4050 "in the documentation and/or other materials provided with the\n"
4051 "distribution.\n"
4052 "\n"
4053 " * Neither the name of the Danga Interactive nor the names of its\n"
4054 "contributors may be used to endorse or promote products derived from\n"
4055 "this software without specific prior written permission.\n"
4056 "\n"
4057 "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n"
4058 "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n"
4059 "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n"
4060 "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n"
4061 "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n"
4062 "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n"
4063 "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
4064 "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
4065 "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
4066 "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n"
4067 "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
4068 "\n"
4069 "\n"
4070 "This product includes software developed by Niels Provos.\n"
4071 "\n"
4072 "[ libevent ]\n"
4073 "\n"
4074 "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n"
4075 "All rights reserved.\n"
4076 "\n"
4077 "Redistribution and use in source and binary forms, with or without\n"
4078 "modification, are permitted provided that the following conditions\n"
4079 "are met:\n"
4080 "1. Redistributions of source code must retain the above copyright\n"
4081 " notice, this list of conditions and the following disclaimer.\n"
4082 "2. Redistributions in binary form must reproduce the above copyright\n"
4083 " notice, this list of conditions and the following disclaimer in the\n"
4084 " documentation and/or other materials provided with the distribution.\n"
4085 "3. All advertising materials mentioning features or use of this software\n"
4086 " must display the following acknowledgement:\n"
4087 " This product includes software developed by Niels Provos.\n"
4088 "4. The name of the author may not be used to endorse or promote products\n"
4089 " derived from this software without specific prior written permission.\n"
4090 "\n"
4091 "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
4092 "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
4093 "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
4094 "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
4095 "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n"
4096 "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
4097 "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
4098 "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
4099 "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
4100 "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
4101 );
4102
4103 return;
4104 }
4105
4106 static void save_pid(const char *pid_file) {
4107 FILE *fp;
4108 if (access(pid_file, F_OK) == 0) {
4109 if ((fp = fopen(pid_file, "r")) != NULL) {
4110 char buffer[1024];
4111 if (fgets(buffer, sizeof(buffer), fp) != NULL) {
4112 unsigned int pid;
4113 if (safe_strtoul(buffer, &pid) && kill((pid_t)pid, 0) == 0) {
4114 fprintf(stderr, "WARNING: The pid file contained the following (running) pid: %u\n", pid);
4115 }
4116 }
4117 fclose(fp);
4118 }
4119 }
4120
4121 /* Create the pid file first with a temporary name, then
4122 * atomically move the file to the real name to avoid a race with
4123 * another process opening the file to read the pid, but finding
4124 * it empty.
4125 */
4126 char tmp_pid_file[1024];
4127 snprintf(tmp_pid_file, sizeof(tmp_pid_file), "%s.tmp", pid_file);
4128
4129 if ((fp = fopen(tmp_pid_file, "w")) == NULL) {
4130 vperror("Could not open the pid file %s for writing", tmp_pid_file);
4131 return;
4132 }
4133
4134 fprintf(fp,"%ld\n", (long)getpid());
4135 if (fclose(fp) == -1) {
4136 vperror("Could not close the pid file %s", tmp_pid_file);
4137 }
4138
4139 if (rename(tmp_pid_file, pid_file) != 0) {
4140 vperror("Could not rename the pid file from %s to %s",
4141 tmp_pid_file, pid_file);
4142 }
4143 }
4144
4145 static void remove_pidfile(const char *pid_file) {
4146 if (pid_file == NULL)
4147 return;
4148
4149 if (unlink(pid_file) != 0) {
4150 vperror("Could not remove the pid file %s", pid_file);
4151 }
4152
4153 }
4154
4155 static void sig_handler(const int sig) {
4156 stop_main_loop = EXIT_NORMALLY;
4157 printf("Signal handled: %s.\n", strsignal(sig));
4158 }
4159
4160 static void sighup_handler(const int sig) {
4161 settings.sig_hup = true;
4162 }
4163
4164 static void sig_usrhandler(const int sig) {
4165 printf("Graceful shutdown signal handled: %s.\n", strsignal(sig));
4166 stop_main_loop = GRACE_STOP;
4167 }
4168
4169 /*
4170 * On systems that supports multiple page sizes we may reduce the
4171 * number of TLB-misses by using the biggest available page size
4172 */
4173 static int enable_large_pages(void) {
4174 #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
4175 int ret = -1;
4176 size_t sizes[32];
4177 int avail = getpagesizes(sizes, 32);
4178 if (avail != -1) {
4179 size_t max = sizes[0];
4180 struct memcntl_mha arg = {0};
4181 int ii;
4182
4183 for (ii = 1; ii < avail; ++ii) {
4184 if (max < sizes[ii]) {
4185 max = sizes[ii];
4186 }
4187 }
4188
4189 arg.mha_flags = 0;
4190 arg.mha_pagesize = max;
4191 arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
4192
4193 if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
4194 fprintf(stderr, "Failed to set large pages: %s\n",
4195 strerror(errno));
4196 fprintf(stderr, "Will use default page size\n");
4197 } else {
4198 ret = 0;
4199 }
4200 } else {
4201 fprintf(stderr, "Failed to get supported pagesizes: %s\n",
4202 strerror(errno));
4203 fprintf(stderr, "Will use default page size\n");
4204 }
4205
4206 return ret;
4207 #elif defined(__linux__) && defined(MADV_HUGEPAGE)
4208 /* check if transparent hugepages is compiled into the kernel */
4209 struct stat st;
4210 int ret = stat("/sys/kernel/mm/transparent_hugepage/enabled", &st);
4211 if (ret || !(st.st_mode & S_IFREG)) {
4212 fprintf(stderr, "Transparent huge pages support not detected.\n");
4213 fprintf(stderr, "Will use default page size.\n");
4214 return -1;
4215 }
4216 return 0;
4217 #elif defined(__FreeBSD__)
4218 int spages;
4219 size_t spagesl = sizeof(spages);
4220
4221 if (sysctlbyname("vm.pmap.pg_ps_enabled", &spages,
4222 &spagesl, NULL, 0) != 0) {
4223 fprintf(stderr, "Could not evaluate the presence of superpages features.");
4224 return -1;
4225 }
4226 if (spages != 1) {
4227 fprintf(stderr, "Superpages support not detected.\n");
4228 fprintf(stderr, "Will use default page size.\n");
4229 return -1;
4230 }
4231 return 0;
4232 #else
4233 return -1;
4234 #endif
4235 }
4236
4237 /**
4238 * Do basic sanity check of the runtime environment
4239 * @return true if no errors found, false if we can't use this env
4240 */
4241 static bool sanitycheck(void) {
4242 /* One of our biggest problems is old and bogus libevents */
4243 const char *ever = event_get_version();
4244 if (ever != NULL) {
4245 if (strncmp(ever, "1.", 2) == 0) {
4246 fprintf(stderr, "You are using libevent %s.\nPlease upgrade to 2.x"
4247 " or newer\n", event_get_version());
4248 return false;
4249 }
4250 }
4251
4252 return true;
4253 }
4254
4255 static bool _parse_slab_sizes(char *s, uint32_t *slab_sizes) {
4256 char *b = NULL;
4257 uint32_t size = 0;
4258 int i = 0;
4259 uint32_t last_size = 0;
4260
4261 if (strlen(s) < 1)
4262 return false;
4263
4264 for (char *p = strtok_r(s, "-", &b);
4265 p != NULL;
4266 p = strtok_r(NULL, "-", &b)) {
4267 if (!safe_strtoul(p, &size) || size < settings.chunk_size
4268 || size > settings.slab_chunk_size_max) {
4269 fprintf(stderr, "slab size %u is out of valid range\n", size);
4270 return false;
4271 }
4272 if (last_size >= size) {
4273 fprintf(stderr, "slab size %u cannot be lower than or equal to a previous class size\n", size);
4274 return false;
4275 }
4276 if (size <= last_size + CHUNK_ALIGN_BYTES) {
4277 fprintf(stderr, "slab size %u must be at least %d bytes larger than previous class\n",
4278 size, CHUNK_ALIGN_BYTES);
4279 return false;
4280 }
4281 slab_sizes[i++] = size;
4282 last_size = size;
4283 if (i >= MAX_NUMBER_OF_SLAB_CLASSES-1) {
4284 fprintf(stderr, "too many slab classes specified\n");
4285 return false;
4286 }
4287 }
4288
4289 slab_sizes[i] = 0;
4290 return true;
4291 }
4292
4293 struct _mc_meta_data {
4294 void *mmap_base;
4295 uint64_t old_base;
4296 char *slab_config; // string containing either factor or custom slab list.
4297 int64_t time_delta;
4298 uint64_t process_started;
4299 uint32_t current_time;
4300 };
4301
4302 // We need to remember a combination of configuration settings and global
4303 // state for restart viability and resumption of internal services.
4304 // Compared to the number of tunables and state values, relatively little
4305 // does need to be remembered.
4306 // Time is the hardest; we have to assume the sys clock is correct and re-sync for
4307 // the lost time after restart.
4308 static int _mc_meta_save_cb(const char *tag, void *ctx, void *data) {
4309 struct _mc_meta_data *meta = (struct _mc_meta_data *)data;
4310
4311 // Settings to remember.
4312 // TODO: should get a version of version which is numeric, else
4313 // comparisons for compat reasons are difficult.
4314 // it may be possible to punt on this for now; since we can test for the
4315 // absence of another key... such as the new numeric version.
4316 //restart_set_kv(ctx, "version", "%s", VERSION);
4317 // We hold the original factor or subopts _string_
4318 // it can be directly compared without roundtripping through floats or
4319 // serializing/deserializing the long options list.
4320 restart_set_kv(ctx, "slab_config", "%s", meta->slab_config);
4321 restart_set_kv(ctx, "maxbytes", "%llu", (unsigned long long) settings.maxbytes);
4322 restart_set_kv(ctx, "chunk_size", "%d", settings.chunk_size);
4323 restart_set_kv(ctx, "item_size_max", "%d", settings.item_size_max);
4324 restart_set_kv(ctx, "slab_chunk_size_max", "%d", settings.slab_chunk_size_max);
4325 restart_set_kv(ctx, "slab_page_size", "%d", settings.slab_page_size);
4326 restart_set_kv(ctx, "use_cas", "%s", settings.use_cas ? "true" : "false");
4327 restart_set_kv(ctx, "slab_reassign", "%s", settings.slab_reassign ? "true" : "false");
4328
4329 // Online state to remember.
4330
4331 // current time is tough. we need to rely on the clock being correct to
4332 // pull the delta between stop and start times. we also need to know the
4333 // delta between start time and now to restore monotonic clocks.
4334 // for non-monotonic clocks (some OS?), process_started is the only
4335 // important one.
4336 restart_set_kv(ctx, "current_time", "%u", current_time);
4337 // types are great until... this. some systems time_t could be big, but
4338 // I'm assuming never negative.
4339 restart_set_kv(ctx, "process_started", "%llu", (unsigned long long) process_started);
4340 {
4341 struct timeval tv;
4342 gettimeofday(&tv, NULL);
4343 restart_set_kv(ctx, "stop_time", "%lu", tv.tv_sec);
4344 }
4345
4346 // Might as well just fetch the next CAS value to use than tightly
4347 // coupling the internal variable into the restart system.
4348 restart_set_kv(ctx, "current_cas", "%llu", (unsigned long long) get_cas_id());
4349 restart_set_kv(ctx, "oldest_cas", "%llu", (unsigned long long) settings.oldest_cas);
4350 restart_set_kv(ctx, "logger_gid", "%llu", logger_get_gid());
4351 restart_set_kv(ctx, "hashpower", "%u", stats_state.hash_power_level);
4352 // NOTE: oldest_live is a rel_time_t, which aliases for unsigned int.
4353 // should future proof this with a 64bit upcast, or fetch value from a
4354 // converter function/macro?
4355 restart_set_kv(ctx, "oldest_live", "%u", settings.oldest_live);
4356 // TODO: use uintptr_t etc? is it portable enough?
4357 restart_set_kv(ctx, "mmap_oldbase", "%p", meta->mmap_base);
4358
4359 return 0;
4360 }
4361
4362 // We must see at least this number of checked lines. Else empty/missing lines
4363 // could cause a false-positive.
4364 // TODO: Once crc32'ing of the metadata file is done this could be ensured better by
4365 // the restart module itself (crc32 + count of lines must match on the
4366 // backend)
4367 #define RESTART_REQUIRED_META 17
4368
4369 // With this callback we make a decision on if the current configuration
4370 // matches up enough to allow reusing the cache.
4371 // We also re-load important runtime information.
4372 static int _mc_meta_load_cb(const char *tag, void *ctx, void *data) {
4373 struct _mc_meta_data *meta = (struct _mc_meta_data *)data;
4374 char *key;
4375 char *val;
4376 int reuse_mmap = 0;
4377 meta->process_started = 0;
4378 meta->time_delta = 0;
4379 meta->current_time = 0;
4380 int lines_seen = 0;
4381
4382 // TODO: not sure this is any better than just doing an if/else tree with
4383 // strcmp's...
4384 enum {
4385 R_MMAP_OLDBASE = 0,
4386 R_MAXBYTES,
4387 R_CHUNK_SIZE,
4388 R_ITEM_SIZE_MAX,
4389 R_SLAB_CHUNK_SIZE_MAX,
4390 R_SLAB_PAGE_SIZE,
4391 R_SLAB_CONFIG,
4392 R_USE_CAS,
4393 R_SLAB_REASSIGN,
4394 R_CURRENT_CAS,
4395 R_OLDEST_CAS,
4396 R_OLDEST_LIVE,
4397 R_LOGGER_GID,
4398 R_CURRENT_TIME,
4399 R_STOP_TIME,
4400 R_PROCESS_STARTED,
4401 R_HASHPOWER,
4402 };
4403
4404 const char *opts[] = {
4405 [R_MMAP_OLDBASE] = "mmap_oldbase",
4406 [R_MAXBYTES] = "maxbytes",
4407 [R_CHUNK_SIZE] = "chunk_size",
4408 [R_ITEM_SIZE_MAX] = "item_size_max",
4409 [R_SLAB_CHUNK_SIZE_MAX] = "slab_chunk_size_max",
4410 [R_SLAB_PAGE_SIZE] = "slab_page_size",
4411 [R_SLAB_CONFIG] = "slab_config",
4412 [R_USE_CAS] = "use_cas",
4413 [R_SLAB_REASSIGN] = "slab_reassign",
4414 [R_CURRENT_CAS] = "current_cas",
4415 [R_OLDEST_CAS] = "oldest_cas",
4416 [R_OLDEST_LIVE] = "oldest_live",
4417 [R_LOGGER_GID] = "logger_gid",
4418 [R_CURRENT_TIME] = "current_time",
4419 [R_STOP_TIME] = "stop_time",
4420 [R_PROCESS_STARTED] = "process_started",
4421 [R_HASHPOWER] = "hashpower",
4422 NULL
4423 };
4424
4425 while (restart_get_kv(ctx, &key, &val) == RESTART_OK) {
4426 int type = 0;
4427 int32_t val_int = 0;
4428 uint32_t val_uint = 0;
4429 int64_t bigval_int = 0;
4430 uint64_t bigval_uint = 0;
4431
4432 while (opts[type] != NULL && strcmp(key, opts[type]) != 0) {
4433 type++;
4434 }
4435 if (opts[type] == NULL) {
4436 fprintf(stderr, "[restart] unknown/unhandled key: %s\n", key);
4437 continue;
4438 }
4439 lines_seen++;
4440
4441 // helper for any boolean checkers.
4442 bool val_bool = false;
4443 bool is_bool = true;
4444 if (strcmp(val, "false") == 0) {
4445 val_bool = false;
4446 } else if (strcmp(val, "true") == 0) {
4447 val_bool = true;
4448 } else {
4449 is_bool = false;
4450 }
4451
4452 switch (type) {
4453 case R_MMAP_OLDBASE:
4454 if (!safe_strtoull_hex(val, &meta->old_base)) {
4455 fprintf(stderr, "[restart] failed to parse %s: %s\n", key, val);
4456 reuse_mmap = -1;
4457 }
4458 break;
4459 case R_MAXBYTES:
4460 if (!safe_strtoll(val, &bigval_int) || settings.maxbytes != bigval_int) {
4461 reuse_mmap = -1;
4462 }
4463 break;
4464 case R_CHUNK_SIZE:
4465 if (!safe_strtol(val, &val_int) || settings.chunk_size != val_int) {
4466 reuse_mmap = -1;
4467 }
4468 break;
4469 case R_ITEM_SIZE_MAX:
4470 if (!safe_strtol(val, &val_int) || settings.item_size_max != val_int) {
4471 reuse_mmap = -1;
4472 }
4473 break;
4474 case R_SLAB_CHUNK_SIZE_MAX:
4475 if (!safe_strtol(val, &val_int) || settings.slab_chunk_size_max != val_int) {
4476 reuse_mmap = -1;
4477 }
4478 break;
4479 case R_SLAB_PAGE_SIZE:
4480 if (!safe_strtol(val, &val_int) || settings.slab_page_size != val_int) {
4481 reuse_mmap = -1;
4482 }
4483 break;
4484 case R_SLAB_CONFIG:
4485 if (strcmp(val, meta->slab_config) != 0) {
4486 reuse_mmap = -1;
4487 }
4488 break;
4489 case R_USE_CAS:
4490 if (!is_bool || settings.use_cas != val_bool) {
4491 reuse_mmap = -1;
4492 }
4493 break;
4494 case R_SLAB_REASSIGN:
4495 if (!is_bool || settings.slab_reassign != val_bool) {
4496 reuse_mmap = -1;
4497 }
4498 break;
4499 case R_CURRENT_CAS:
4500 // FIXME: do we need to fail if these values _aren't_ found?
4501 if (!safe_strtoull(val, &bigval_uint)) {
4502 reuse_mmap = -1;
4503 } else {
4504 set_cas_id(bigval_uint);
4505 }
4506 break;
4507 case R_OLDEST_CAS:
4508 if (!safe_strtoull(val, &bigval_uint)) {
4509 reuse_mmap = -1;
4510 } else {
4511 settings.oldest_cas = bigval_uint;
4512 }
4513 break;
4514 case R_OLDEST_LIVE:
4515 if (!safe_strtoul(val, &val_uint)) {
4516 reuse_mmap = -1;
4517 } else {
4518 settings.oldest_live = val_uint;
4519 }
4520 break;
4521 case R_LOGGER_GID:
4522 if (!safe_strtoull(val, &bigval_uint)) {
4523 reuse_mmap = -1;
4524 } else {
4525 logger_set_gid(bigval_uint);
4526 }
4527 break;
4528 case R_PROCESS_STARTED:
4529 if (!safe_strtoull(val, &bigval_uint)) {
4530 reuse_mmap = -1;
4531 } else {
4532 meta->process_started = bigval_uint;
4533 }
4534 break;
4535 case R_CURRENT_TIME:
4536 if (!safe_strtoul(val, &val_uint)) {
4537 reuse_mmap = -1;
4538 } else {
4539 meta->current_time = val_uint;
4540 }
4541 break;
4542 case R_STOP_TIME:
4543 if (!safe_strtoll(val, &bigval_int)) {
4544 reuse_mmap = -1;
4545 } else {
4546 struct timeval t;
4547 gettimeofday(&t, NULL);
4548 meta->time_delta = t.tv_sec - bigval_int;
4549 // clock has done something crazy.
4550 // there are _lots_ of ways the clock can go wrong here, but
4551 // this is a safe sanity check since there's nothing else we
4552 // can realistically do.
4553 if (meta->time_delta <= 0) {
4554 reuse_mmap = -1;
4555 }
4556 }
4557 break;
4558 case R_HASHPOWER:
4559 if (!safe_strtoul(val, &val_uint)) {
4560 reuse_mmap = -1;
4561 } else {
4562 settings.hashpower_init = val_uint;
4563 }
4564 break;
4565 default:
4566 fprintf(stderr, "[restart] unhandled key: %s\n", key);
4567 }
4568
4569 if (reuse_mmap != 0) {
4570 fprintf(stderr, "[restart] restart incompatible due to setting for [%s] [old value: %s]\n", key, val);
4571 break;
4572 }
4573 }
4574
4575 if (lines_seen < RESTART_REQUIRED_META) {
4576 fprintf(stderr, "[restart] missing some metadata lines\n");
4577 reuse_mmap = -1;
4578 }
4579
4580 return reuse_mmap;
4581 }
4582
4583 int main (int argc, char **argv) {
4584 int c;
4585 bool lock_memory = false;
4586 bool do_daemonize = false;
4587 bool preallocate = false;
4588 int maxcore = 0;
4589 char *username = NULL;
4590 char *pid_file = NULL;
4591 struct passwd *pw;
4592 struct rlimit rlim;
4593 char *buf;
4594 char unit = '\0';
4595 int size_max = 0;
4596 int retval = EXIT_SUCCESS;
4597 bool protocol_specified = false;
4598 bool tcp_specified = false;
4599 bool udp_specified = false;
4600 bool start_lru_maintainer = true;
4601 bool start_lru_crawler = true;
4602 bool start_assoc_maint = true;
4603 enum hashfunc_type hash_type = MURMUR3_HASH;
4604 uint32_t tocrawl;
4605 uint32_t slab_sizes[MAX_NUMBER_OF_SLAB_CLASSES];
4606 bool use_slab_sizes = false;
4607 char *slab_sizes_unparsed = NULL;
4608 bool slab_chunk_size_changed = false;
4609 // struct for restart code. Initialized up here so we can curry
4610 // important settings to save or validate.
4611 struct _mc_meta_data *meta = malloc(sizeof(struct _mc_meta_data));
4612 meta->slab_config = NULL;
4613 char *subopts, *subopts_orig;
4614 char *subopts_value;
4615 enum {
4616 MAXCONNS_FAST = 0,
4617 HASHPOWER_INIT,
4618 NO_HASHEXPAND,
4619 SLAB_REASSIGN,
4620 SLAB_AUTOMOVE,
4621 SLAB_AUTOMOVE_RATIO,
4622 SLAB_AUTOMOVE_WINDOW,
4623 TAIL_REPAIR_TIME,
4624 HASH_ALGORITHM,
4625 LRU_CRAWLER,
4626 LRU_CRAWLER_SLEEP,
4627 LRU_CRAWLER_TOCRAWL,
4628 LRU_MAINTAINER,
4629 HOT_LRU_PCT,
4630 WARM_LRU_PCT,
4631 HOT_MAX_FACTOR,
4632 WARM_MAX_FACTOR,
4633 TEMPORARY_TTL,
4634 IDLE_TIMEOUT,
4635 WATCHER_LOGBUF_SIZE,
4636 WORKER_LOGBUF_SIZE,
4637 SLAB_SIZES,
4638 SLAB_CHUNK_MAX,
4639 TRACK_SIZES,
4640 NO_INLINE_ASCII_RESP,
4641 MODERN,
4642 NO_MODERN,
4643 NO_CHUNKED_ITEMS,
4644 NO_SLAB_REASSIGN,
4645 NO_SLAB_AUTOMOVE,
4646 NO_MAXCONNS_FAST,
4647 INLINE_ASCII_RESP,
4648 NO_LRU_CRAWLER,
4649 NO_LRU_MAINTAINER,
4650 NO_DROP_PRIVILEGES,
4651 DROP_PRIVILEGES,
4652 RESP_OBJ_MEM_LIMIT,
4653 READ_BUF_MEM_LIMIT,
4654 META_RESPONSE_OLD,
4655 #ifdef TLS
4656 SSL_CERT,
4657 SSL_KEY,
4658 SSL_VERIFY_MODE,
4659 SSL_KEYFORM,
4660 SSL_CIPHERS,
4661 SSL_CA_CERT,
4662 SSL_WBUF_SIZE,
4663 SSL_SESSION_CACHE,
4664 SSL_MIN_VERSION,
4665 #endif
4666 #ifdef MEMCACHED_DEBUG
4667 RELAXED_PRIVILEGES,
4668 #endif
4669 };
4670 char *const subopts_tokens[] = {
4671 [MAXCONNS_FAST] = "maxconns_fast",
4672 [HASHPOWER_INIT] = "hashpower",
4673 [NO_HASHEXPAND] = "no_hashexpand",
4674 [SLAB_REASSIGN] = "slab_reassign",
4675 [SLAB_AUTOMOVE] = "slab_automove",
4676 [SLAB_AUTOMOVE_RATIO] = "slab_automove_ratio",
4677 [SLAB_AUTOMOVE_WINDOW] = "slab_automove_window",
4678 [TAIL_REPAIR_TIME] = "tail_repair_time",
4679 [HASH_ALGORITHM] = "hash_algorithm",
4680 [LRU_CRAWLER] = "lru_crawler",
4681 [LRU_CRAWLER_SLEEP] = "lru_crawler_sleep",
4682 [LRU_CRAWLER_TOCRAWL] = "lru_crawler_tocrawl",
4683 [LRU_MAINTAINER] = "lru_maintainer",
4684 [HOT_LRU_PCT] = "hot_lru_pct",
4685 [WARM_LRU_PCT] = "warm_lru_pct",
4686 [HOT_MAX_FACTOR] = "hot_max_factor",
4687 [WARM_MAX_FACTOR] = "warm_max_factor",
4688 [TEMPORARY_TTL] = "temporary_ttl",
4689 [IDLE_TIMEOUT] = "idle_timeout",
4690 [WATCHER_LOGBUF_SIZE] = "watcher_logbuf_size",
4691 [WORKER_LOGBUF_SIZE] = "worker_logbuf_size",
4692 [SLAB_SIZES] = "slab_sizes",
4693 [SLAB_CHUNK_MAX] = "slab_chunk_max",
4694 [TRACK_SIZES] = "track_sizes",
4695 [NO_INLINE_ASCII_RESP] = "no_inline_ascii_resp",
4696 [MODERN] = "modern",
4697 [NO_MODERN] = "no_modern",
4698 [NO_CHUNKED_ITEMS] = "no_chunked_items",
4699 [NO_SLAB_REASSIGN] = "no_slab_reassign",
4700 [NO_SLAB_AUTOMOVE] = "no_slab_automove",
4701 [NO_MAXCONNS_FAST] = "no_maxconns_fast",
4702 [INLINE_ASCII_RESP] = "inline_ascii_resp",
4703 [NO_LRU_CRAWLER] = "no_lru_crawler",
4704 [NO_LRU_MAINTAINER] = "no_lru_maintainer",
4705 [NO_DROP_PRIVILEGES] = "no_drop_privileges",
4706 [DROP_PRIVILEGES] = "drop_privileges",
4707 [RESP_OBJ_MEM_LIMIT] = "resp_obj_mem_limit",
4708 [READ_BUF_MEM_LIMIT] = "read_buf_mem_limit",
4709 [META_RESPONSE_OLD] = "meta_response_old",
4710 #ifdef TLS
4711 [SSL_CERT] = "ssl_chain_cert",
4712 [SSL_KEY] = "ssl_key",
4713 [SSL_VERIFY_MODE] = "ssl_verify_mode",
4714 [SSL_KEYFORM] = "ssl_keyformat",
4715 [SSL_CIPHERS] = "ssl_ciphers",
4716 [SSL_CA_CERT] = "ssl_ca_cert",
4717 [SSL_WBUF_SIZE] = "ssl_wbuf_size",
4718 [SSL_SESSION_CACHE] = "ssl_session_cache",
4719 [SSL_MIN_VERSION] = "ssl_min_version",
4720 #endif
4721 #ifdef MEMCACHED_DEBUG
4722 [RELAXED_PRIVILEGES] = "relaxed_privileges",
4723 #endif
4724 NULL
4725 };
4726
4727 if (!sanitycheck()) {
4728 free(meta);
4729 return EX_OSERR;
4730 }
4731
4732 /* handle SIGINT, SIGTERM */
4733 signal(SIGINT, sig_handler);
4734 signal(SIGTERM, sig_handler);
4735 signal(SIGHUP, sighup_handler);
4736 signal(SIGUSR1, sig_usrhandler);
4737
4738 /* init settings */
4739 settings_init();
4740 verify_default("hash_algorithm", hash_type == MURMUR3_HASH);
4741 #ifdef EXTSTORE
4742 void *storage = NULL;
4743 void *storage_cf = storage_init_config(&settings);
4744 bool storage_enabled = false;
4745 if (storage_cf == NULL) {
4746 fprintf(stderr, "failed to allocate extstore config\n");
4747 return 1;
4748 }
4749 #endif
4750
4751 /* Run regardless of initializing it later */
4752 init_lru_maintainer();
4753
4754 /* set stderr non-buffering (for running under, say, daemontools) */
4755 setbuf(stderr, NULL);
4756
4757 char *shortopts =
4758 "a:" /* access mask for unix socket */
4759 "A" /* enable admin shutdown command */
4760 "Z" /* enable SSL */
4761 "p:" /* TCP port number to listen on */
4762 "s:" /* unix socket path to listen on */
4763 "U:" /* UDP port number to listen on */
4764 "m:" /* max memory to use for items in megabytes */
4765 "M" /* return error on memory exhausted */
4766 "c:" /* max simultaneous connections */
4767 "k" /* lock down all paged memory */
4768 "hiV" /* help, licence info, version */
4769 "r" /* maximize core file limit */
4770 "v" /* verbose */
4771 "d" /* daemon mode */
4772 "l:" /* interface to listen on */
4773 "u:" /* user identity to run as */
4774 "P:" /* save PID in file */
4775 "f:" /* factor? */
4776 "n:" /* minimum space allocated for key+value+flags */
4777 "t:" /* threads */
4778 "D:" /* prefix delimiter? */
4779 "L" /* Large memory pages */
4780 "R:" /* max requests per event */
4781 "C" /* Disable use of CAS */
4782 "b:" /* backlog queue limit */
4783 "B:" /* Binding protocol */
4784 "I:" /* Max item size */
4785 "S" /* Sasl ON */
4786 "F" /* Disable flush_all */
4787 "X" /* Disable dump commands */
4788 "W" /* Disable watch commands */
4789 "Y:" /* Enable token auth */
4790 "e:" /* mmap path for external item memory */
4791 "o:" /* Extended generic options */
4792 "N:" /* NAPI ID based thread selection */
4793 ;
4794
4795 /* process arguments */
4796 #ifdef HAVE_GETOPT_LONG
4797 const struct option longopts[] = {
4798 {"unix-mask", required_argument, 0, 'a'},
4799 {"enable-shutdown", no_argument, 0, 'A'},
4800 {"enable-ssl", no_argument, 0, 'Z'},
4801 {"port", required_argument, 0, 'p'},
4802 {"unix-socket", required_argument, 0, 's'},
4803 {"udp-port", required_argument, 0, 'U'},
4804 {"memory-limit", required_argument, 0, 'm'},
4805 {"disable-evictions", no_argument, 0, 'M'},
4806 {"conn-limit", required_argument, 0, 'c'},
4807 {"lock-memory", no_argument, 0, 'k'},
4808 {"help", no_argument, 0, 'h'},
4809 {"license", no_argument, 0, 'i'},
4810 {"version", no_argument, 0, 'V'},
4811 {"enable-coredumps", no_argument, 0, 'r'},
4812 {"verbose", optional_argument, 0, 'v'},
4813 {"daemon", no_argument, 0, 'd'},
4814 {"listen", required_argument, 0, 'l'},
4815 {"user", required_argument, 0, 'u'},
4816 {"pidfile", required_argument, 0, 'P'},
4817 {"slab-growth-factor", required_argument, 0, 'f'},
4818 {"slab-min-size", required_argument, 0, 'n'},
4819 {"threads", required_argument, 0, 't'},
4820 {"enable-largepages", no_argument, 0, 'L'},
4821 {"max-reqs-per-event", required_argument, 0, 'R'},
4822 {"disable-cas", no_argument, 0, 'C'},
4823 {"listen-backlog", required_argument, 0, 'b'},
4824 {"protocol", required_argument, 0, 'B'},
4825 {"max-item-size", required_argument, 0, 'I'},
4826 {"enable-sasl", no_argument, 0, 'S'},
4827 {"disable-flush-all", no_argument, 0, 'F'},
4828 {"disable-dumping", no_argument, 0, 'X'},
4829 {"disable-watch", no_argument, 0, 'W'},
4830 {"auth-file", required_argument, 0, 'Y'},
4831 {"memory-file", required_argument, 0, 'e'},
4832 {"extended", required_argument, 0, 'o'},
4833 {"napi-ids", required_argument, 0, 'N'},
4834 {0, 0, 0, 0}
4835 };
4836 int optindex;
4837 while (-1 != (c = getopt_long(argc, argv, shortopts,
4838 longopts, &optindex))) {
4839 #else
4840 while (-1 != (c = getopt(argc, argv, shortopts))) {
4841 #endif
4842 switch (c) {
4843 case 'A':
4844 /* enables "shutdown" command */
4845 settings.shutdown_command = true;
4846 break;
4847 case 'Z':
4848 /* enable secure communication*/
4849 #ifdef TLS
4850 settings.ssl_enabled = true;
4851 #else
4852 fprintf(stderr, "This server is not built with TLS support.\n");
4853 exit(EX_USAGE);
4854 #endif
4855 break;
4856 case 'a':
4857 #ifndef DISABLE_UNIX_SOCKET
4858 /* access for unix domain socket, as octal mask (like chmod)*/
4859 settings.access= strtol(optarg,NULL,8);
4860 #else
4861 fprintf(stderr, "This server is not built with unix socket support.\n");
4862 exit(EX_USAGE);
4863 #endif /* #ifndef DISABLE_UNIX_SOCKET */
4864 break;
4865 case 'U':
4866 settings.udpport = atoi(optarg);
4867 udp_specified = true;
4868 break;
4869 case 'p':
4870 settings.port = atoi(optarg);
4871 tcp_specified = true;
4872 break;
4873 case 's':
4874 #ifndef DISABLE_UNIX_SOCKET
4875 settings.socketpath = optarg;
4876 #else
4877 fprintf(stderr, "This server is not built with unix socket support.\n");
4878 exit(EX_USAGE);
4879 #endif /* #ifndef DISABLE_UNIX_SOCKET */
4880 break;
4881 case 'm':
4882 settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
4883 break;
4884 case 'M':
4885 settings.evict_to_free = 0;
4886 break;
4887 case 'c':
4888 settings.maxconns = atoi(optarg);
4889 if (settings.maxconns <= 0) {
4890 fprintf(stderr, "Maximum connections must be greater than 0\n");
4891 return 1;
4892 }
4893 break;
4894 case 'h':
4895 usage();
4896 exit(EXIT_SUCCESS);
4897 case 'i':
4898 usage_license();
4899 exit(EXIT_SUCCESS);
4900 case 'V':
4901 printf(PACKAGE " " VERSION "\n");
4902 exit(EXIT_SUCCESS);
4903 case 'k':
4904 lock_memory = true;
4905 break;
4906 case 'v':
4907 settings.verbose++;
4908 break;
4909 case 'l':
4910 if (settings.inter != NULL) {
4911 if (strstr(settings.inter, optarg) != NULL) {
4912 break;
4913 }
4914 size_t len = strlen(settings.inter) + strlen(optarg) + 2;
4915 char *p = malloc(len);
4916 if (p == NULL) {
4917 fprintf(stderr, "Failed to allocate memory\n");
4918 return 1;
4919 }
4920 snprintf(p, len, "%s,%s", settings.inter, optarg);
4921 free(settings.inter);
4922 settings.inter = p;
4923 } else {
4924 settings.inter= strdup(optarg);
4925 }
4926 break;
4927 case 'd':
4928 do_daemonize = true;
4929 break;
4930 case 'r':
4931 maxcore = 1;
4932 break;
4933 case 'R':
4934 settings.reqs_per_event = atoi(optarg);
4935 if (settings.reqs_per_event == 0) {
4936 fprintf(stderr, "Number of requests per event must be greater than 0\n");
4937 return 1;
4938 }
4939 break;
4940 case 'u':
4941 username = optarg;
4942 break;
4943 case 'P':
4944 pid_file = optarg;
4945 break;
4946 case 'e':
4947 settings.memory_file = optarg;
4948 break;
4949 case 'f':
4950 settings.factor = atof(optarg);
4951 if (settings.factor <= 1.0) {
4952 fprintf(stderr, "Factor must be greater than 1\n");
4953 return 1;
4954 }
4955 meta->slab_config = strdup(optarg);
4956 break;
4957 case 'n':
4958 settings.chunk_size = atoi(optarg);
4959 if (settings.chunk_size == 0) {
4960 fprintf(stderr, "Chunk size must be greater than 0\n");
4961 return 1;
4962 }
4963 break;
4964 case 't':
4965 settings.num_threads = atoi(optarg);
4966 if (settings.num_threads <= 0) {
4967 fprintf(stderr, "Number of threads must be greater than 0\n");
4968 return 1;
4969 }
4970 /* There're other problems when you get above 64 threads.
4971 * In the future we should portably detect # of cores for the
4972 * default.
4973 */
4974 if (settings.num_threads > 64) {
4975 fprintf(stderr, "WARNING: Setting a high number of worker"
4976 "threads is not recommended.\n"
4977 " Set this value to the number of cores in"
4978 " your machine or less.\n");
4979 }
4980 break;
4981 case 'D':
4982 if (! optarg || ! optarg[0]) {
4983 fprintf(stderr, "No delimiter specified\n");
4984 return 1;
4985 }
4986 settings.prefix_delimiter = optarg[0];
4987 settings.detail_enabled = 1;
4988 break;
4989 case 'L' :
4990 if (enable_large_pages() == 0) {
4991 preallocate = true;
4992 } else {
4993 fprintf(stderr, "Cannot enable large pages on this system\n"
4994 "(There is no support as of this version)\n");
4995 return 1;
4996 }
4997 break;
4998 case 'C' :
4999 settings.use_cas = false;
5000 break;
5001 case 'b' :
5002 settings.backlog = atoi(optarg);
5003 break;
5004 case 'B':
5005 protocol_specified = true;
5006 if (strcmp(optarg, "auto") == 0) {
5007 settings.binding_protocol = negotiating_prot;
5008 } else if (strcmp(optarg, "binary") == 0) {
5009 settings.binding_protocol = binary_prot;
5010 } else if (strcmp(optarg, "ascii") == 0) {
5011 settings.binding_protocol = ascii_prot;
5012 } else {
5013 fprintf(stderr, "Invalid value for binding protocol: %s\n"
5014 " -- should be one of auto, binary, or ascii\n", optarg);
5015 exit(EX_USAGE);
5016 }
5017 break;
5018 case 'I':
5019 buf = strdup(optarg);
5020 unit = buf[strlen(buf)-1];
5021 if (unit == 'k' || unit == 'm' ||
5022 unit == 'K' || unit == 'M') {
5023 buf[strlen(buf)-1] = '\0';
5024 size_max = atoi(buf);
5025 if (unit == 'k' || unit == 'K')
5026 size_max *= 1024;
5027 if (unit == 'm' || unit == 'M')
5028 size_max *= 1024 * 1024;
5029 settings.item_size_max = size_max;
5030 } else {
5031 settings.item_size_max = atoi(buf);
5032 }
5033 free(buf);
5034 break;
5035 case 'S': /* set Sasl authentication to true. Default is false */
5036 #ifndef ENABLE_SASL
5037 fprintf(stderr, "This server is not built with SASL support.\n");
5038 exit(EX_USAGE);
5039 #endif
5040 settings.sasl = true;
5041 break;
5042 case 'F' :
5043 settings.flush_enabled = false;
5044 break;
5045 case 'X' :
5046 settings.dump_enabled = false;
5047 break;
5048 case 'W' :
5049 settings.watch_enabled = false;
5050 break;
5051 case 'Y' :
5052 // dupe the file path now just in case the options get mangled.
5053 settings.auth_file = strdup(optarg);
5054 break;
5055 case 'N':
5056 settings.num_napi_ids = atoi(optarg);
5057 if (settings.num_napi_ids <= 0) {
5058 fprintf(stderr, "Maximum number of NAPI IDs must be greater than 0\n");
5059 return 1;
5060 }
5061 break;
5062 case 'o': /* It's sub-opts time! */
5063 subopts_orig = subopts = strdup(optarg); /* getsubopt() changes the original args */
5064
5065 while (*subopts != '\0') {
5066 // BSD getsubopt (at least) has undefined behavior on -1, so
5067 // if we want to retry the getsubopt call in submodules we
5068 // need an extra layer of string copies.
5069 char *subopts_temp_o = NULL;
5070 char *subopts_temp = subopts_temp_o = strdup(subopts);
5071
5072 switch (getsubopt(&subopts, subopts_tokens, &subopts_value)) {
5073 case MAXCONNS_FAST:
5074 settings.maxconns_fast = true;
5075 break;
5076 case HASHPOWER_INIT:
5077 if (subopts_value == NULL) {
5078 fprintf(stderr, "Missing numeric argument for hashpower\n");
5079 return 1;
5080 }
5081 settings.hashpower_init = atoi(subopts_value);
5082 if (settings.hashpower_init < 12) {
5083 fprintf(stderr, "Initial hashtable multiplier of %d is too low\n",
5084 settings.hashpower_init);
5085 return 1;
5086 } else if (settings.hashpower_init > 32) {
5087 fprintf(stderr, "Initial hashtable multiplier of %d is too high\n"
5088 "Choose a value based on \"STAT hash_power_level\" from a running instance\n",
5089 settings.hashpower_init);
5090 return 1;
5091 }
5092 break;
5093 case NO_HASHEXPAND:
5094 start_assoc_maint = false;
5095 break;
5096 case SLAB_REASSIGN:
5097 settings.slab_reassign = true;
5098 break;
5099 case SLAB_AUTOMOVE:
5100 if (subopts_value == NULL) {
5101 settings.slab_automove = 1;
5102 break;
5103 }
5104 settings.slab_automove = atoi(subopts_value);
5105 if (settings.slab_automove < 0 || settings.slab_automove > 2) {
5106 fprintf(stderr, "slab_automove must be between 0 and 2\n");
5107 return 1;
5108 }
5109 break;
5110 case SLAB_AUTOMOVE_RATIO:
5111 if (subopts_value == NULL) {
5112 fprintf(stderr, "Missing slab_automove_ratio argument\n");
5113 return 1;
5114 }
5115 settings.slab_automove_ratio = atof(subopts_value);
5116 if (settings.slab_automove_ratio <= 0 || settings.slab_automove_ratio > 1) {
5117 fprintf(stderr, "slab_automove_ratio must be > 0 and < 1\n");
5118 return 1;
5119 }
5120 break;
5121 case SLAB_AUTOMOVE_WINDOW:
5122 if (subopts_value == NULL) {
5123 fprintf(stderr, "Missing slab_automove_window argument\n");
5124 return 1;
5125 }
5126 settings.slab_automove_window = atoi(subopts_value);
5127 if (settings.slab_automove_window < 3) {
5128 fprintf(stderr, "slab_automove_window must be > 2\n");
5129 return 1;
5130 }
5131 break;
5132 case TAIL_REPAIR_TIME:
5133 if (subopts_value == NULL) {
5134 fprintf(stderr, "Missing numeric argument for tail_repair_time\n");
5135 return 1;
5136 }
5137 settings.tail_repair_time = atoi(subopts_value);
5138 if (settings.tail_repair_time < 10) {
5139 fprintf(stderr, "Cannot set tail_repair_time to less than 10 seconds\n");
5140 return 1;
5141 }
5142 break;
5143 case HASH_ALGORITHM:
5144 if (subopts_value == NULL) {
5145 fprintf(stderr, "Missing hash_algorithm argument\n");
5146 return 1;
5147 };
5148 if (strcmp(subopts_value, "jenkins") == 0) {
5149 hash_type = JENKINS_HASH;
5150 } else if (strcmp(subopts_value, "murmur3") == 0) {
5151 hash_type = MURMUR3_HASH;
5152 } else if (strcmp(subopts_value, "xxh3") == 0) {
5153 hash_type = XXH3_HASH;
5154 } else {
5155 fprintf(stderr, "Unknown hash_algorithm option (jenkins, murmur3, xxh3)\n");
5156 return 1;
5157 }
5158 break;
5159 case LRU_CRAWLER:
5160 start_lru_crawler = true;
5161 break;
5162 case LRU_CRAWLER_SLEEP:
5163 if (subopts_value == NULL) {
5164 fprintf(stderr, "Missing lru_crawler_sleep value\n");
5165 return 1;
5166 }
5167 settings.lru_crawler_sleep = atoi(subopts_value);
5168 if (settings.lru_crawler_sleep > 1000000 || settings.lru_crawler_sleep < 0) {
5169 fprintf(stderr, "LRU crawler sleep must be between 0 and 1 second\n");
5170 return 1;
5171 }
5172 break;
5173 case LRU_CRAWLER_TOCRAWL:
5174 if (subopts_value == NULL) {
5175 fprintf(stderr, "Missing lru_crawler_tocrawl value\n");
5176 return 1;
5177 }
5178 if (!safe_strtoul(subopts_value, &tocrawl)) {
5179 fprintf(stderr, "lru_crawler_tocrawl takes a numeric 32bit value\n");
5180 return 1;
5181 }
5182 settings.lru_crawler_tocrawl = tocrawl;
5183 break;
5184 case LRU_MAINTAINER:
5185 start_lru_maintainer = true;
5186 settings.lru_segmented = true;
5187 break;
5188 case HOT_LRU_PCT:
5189 if (subopts_value == NULL) {
5190 fprintf(stderr, "Missing hot_lru_pct argument\n");
5191 return 1;
5192 }
5193 settings.hot_lru_pct = atoi(subopts_value);
5194 if (settings.hot_lru_pct < 1 || settings.hot_lru_pct >= 80) {
5195 fprintf(stderr, "hot_lru_pct must be > 1 and < 80\n");
5196 return 1;
5197 }
5198 break;
5199 case WARM_LRU_PCT:
5200 if (subopts_value == NULL) {
5201 fprintf(stderr, "Missing warm_lru_pct argument\n");
5202 return 1;
5203 }
5204 settings.warm_lru_pct = atoi(subopts_value);
5205 if (settings.warm_lru_pct < 1 || settings.warm_lru_pct >= 80) {
5206 fprintf(stderr, "warm_lru_pct must be > 1 and < 80\n");
5207 return 1;
5208 }
5209 break;
5210 case HOT_MAX_FACTOR:
5211 if (subopts_value == NULL) {
5212 fprintf(stderr, "Missing hot_max_factor argument\n");
5213 return 1;
5214 }
5215 settings.hot_max_factor = atof(subopts_value);
5216 if (settings.hot_max_factor <= 0) {
5217 fprintf(stderr, "hot_max_factor must be > 0\n");
5218 return 1;
5219 }
5220 break;
5221 case WARM_MAX_FACTOR:
5222 if (subopts_value == NULL) {
5223 fprintf(stderr, "Missing warm_max_factor argument\n");
5224 return 1;
5225 }
5226 settings.warm_max_factor = atof(subopts_value);
5227 if (settings.warm_max_factor <= 0) {
5228 fprintf(stderr, "warm_max_factor must be > 0\n");
5229 return 1;
5230 }
5231 break;
5232 case TEMPORARY_TTL:
5233 if (subopts_value == NULL) {
5234 fprintf(stderr, "Missing temporary_ttl argument\n");
5235 return 1;
5236 }
5237 settings.temp_lru = true;
5238 settings.temporary_ttl = atoi(subopts_value);
5239 break;
5240 case IDLE_TIMEOUT:
5241 if (subopts_value == NULL) {
5242 fprintf(stderr, "Missing numeric argument for idle_timeout\n");
5243 return 1;
5244 }
5245 settings.idle_timeout = atoi(subopts_value);
5246 break;
5247 case WATCHER_LOGBUF_SIZE:
5248 if (subopts_value == NULL) {
5249 fprintf(stderr, "Missing watcher_logbuf_size argument\n");
5250 return 1;
5251 }
5252 if (!safe_strtoul(subopts_value, &settings.logger_watcher_buf_size)) {
5253 fprintf(stderr, "could not parse argument to watcher_logbuf_size\n");
5254 return 1;
5255 }
5256 settings.logger_watcher_buf_size *= 1024; /* kilobytes */
5257 break;
5258 case WORKER_LOGBUF_SIZE:
5259 if (subopts_value == NULL) {
5260 fprintf(stderr, "Missing worker_logbuf_size argument\n");
5261 return 1;
5262 }
5263 if (!safe_strtoul(subopts_value, &settings.logger_buf_size)) {
5264 fprintf(stderr, "could not parse argument to worker_logbuf_size\n");
5265 return 1;
5266 }
5267 settings.logger_buf_size *= 1024; /* kilobytes */
5268 case SLAB_SIZES:
5269 slab_sizes_unparsed = strdup(subopts_value);
5270 break;
5271 case SLAB_CHUNK_MAX:
5272 if (subopts_value == NULL) {
5273 fprintf(stderr, "Missing slab_chunk_max argument\n");
5274 }
5275 if (!safe_strtol(subopts_value, &settings.slab_chunk_size_max)) {
5276 fprintf(stderr, "could not parse argument to slab_chunk_max\n");
5277 }
5278 slab_chunk_size_changed = true;
5279 break;
5280 case TRACK_SIZES:
5281 item_stats_sizes_init();
5282 break;
5283 case NO_INLINE_ASCII_RESP:
5284 break;
5285 case INLINE_ASCII_RESP:
5286 break;
5287 case NO_CHUNKED_ITEMS:
5288 settings.slab_chunk_size_max = settings.slab_page_size;
5289 break;
5290 case NO_SLAB_REASSIGN:
5291 settings.slab_reassign = false;
5292 break;
5293 case NO_SLAB_AUTOMOVE:
5294 settings.slab_automove = 0;
5295 break;
5296 case NO_MAXCONNS_FAST:
5297 settings.maxconns_fast = false;
5298 break;
5299 case NO_LRU_CRAWLER:
5300 settings.lru_crawler = false;
5301 start_lru_crawler = false;
5302 break;
5303 case NO_LRU_MAINTAINER:
5304 start_lru_maintainer = false;
5305 settings.lru_segmented = false;
5306 break;
5307 case META_RESPONSE_OLD:
5308 settings.meta_response_old = true;
5309 break;
5310 #ifdef TLS
5311 case SSL_CERT:
5312 if (subopts_value == NULL) {
5313 fprintf(stderr, "Missing ssl_chain_cert argument\n");
5314 return 1;
5315 }
5316 settings.ssl_chain_cert = strdup(subopts_value);
5317 break;
5318 case SSL_KEY:
5319 if (subopts_value == NULL) {
5320 fprintf(stderr, "Missing ssl_key argument\n");
5321 return 1;
5322 }
5323 settings.ssl_key = strdup(subopts_value);
5324 break;
5325 case SSL_VERIFY_MODE:
5326 {
5327 if (subopts_value == NULL) {
5328 fprintf(stderr, "Missing ssl_verify_mode argument\n");
5329 return 1;
5330 }
5331 int verify = 0;
5332 if (!safe_strtol(subopts_value, &verify)) {
5333 fprintf(stderr, "could not parse argument to ssl_verify_mode\n");
5334 return 1;
5335 }
5336 switch(verify) {
5337 case 0:
5338 settings.ssl_verify_mode = SSL_VERIFY_NONE;
5339 break;
5340 case 1:
5341 settings.ssl_verify_mode = SSL_VERIFY_PEER;
5342 break;
5343 case 2:
5344 settings.ssl_verify_mode = SSL_VERIFY_PEER |
5345 SSL_VERIFY_FAIL_IF_NO_PEER_CERT;
5346 break;
5347 case 3:
5348 settings.ssl_verify_mode = SSL_VERIFY_PEER |
5349 SSL_VERIFY_FAIL_IF_NO_PEER_CERT |
5350 SSL_VERIFY_CLIENT_ONCE;
5351 break;
5352 default:
5353 fprintf(stderr, "Invalid ssl_verify_mode. Use help to see valid options.\n");
5354 return 1;
5355 }
5356 break;
5357 }
5358 case SSL_KEYFORM:
5359 if (subopts_value == NULL) {
5360 fprintf(stderr, "Missing ssl_keyformat argument\n");
5361 return 1;
5362 }
5363 if (!safe_strtol(subopts_value, &settings.ssl_keyformat)) {
5364 fprintf(stderr, "could not parse argument to ssl_keyformat\n");
5365 return 1;
5366 }
5367 break;
5368 case SSL_CIPHERS:
5369 if (subopts_value == NULL) {
5370 fprintf(stderr, "Missing ssl_ciphers argument\n");
5371 return 1;
5372 }
5373 settings.ssl_ciphers = strdup(subopts_value);
5374 break;
5375 case SSL_CA_CERT:
5376 if (subopts_value == NULL) {
5377 fprintf(stderr, "Missing ssl_ca_cert argument\n");
5378 return 1;
5379 }
5380 settings.ssl_ca_cert = strdup(subopts_value);
5381 break;
5382 case SSL_WBUF_SIZE:
5383 if (subopts_value == NULL) {
5384 fprintf(stderr, "Missing ssl_wbuf_size argument\n");
5385 return 1;
5386 }
5387 if (!safe_strtoul(subopts_value, &settings.ssl_wbuf_size)) {
5388 fprintf(stderr, "could not parse argument to ssl_wbuf_size\n");
5389 return 1;
5390 }
5391 settings.ssl_wbuf_size *= 1024; /* kilobytes */
5392 break;
5393 case SSL_SESSION_CACHE:
5394 settings.ssl_session_cache = true;
5395 break;
5396 case SSL_MIN_VERSION: {
5397 int min_version;
5398 if (subopts_value == NULL) {
5399 fprintf(stderr, "Missing ssl_min_version argument\n");
5400 return 1;
5401 }
5402 if (!safe_strtol(subopts_value, &min_version)) {
5403 fprintf(stderr, "could not parse argument to ssl_min_version\n");
5404 return 1;
5405 }
5406 switch (min_version) {
5407 case 0:
5408 settings.ssl_min_version = TLS1_VERSION;
5409 break;
5410 case 1:
5411 settings.ssl_min_version = TLS1_1_VERSION;
5412 break;
5413 case 2:
5414 settings.ssl_min_version = TLS1_2_VERSION;
5415 break;
5416 #if OPENSSL_VERSION_NUMBER >= 0x10101000L
5417 case 3:
5418 settings.ssl_min_version = TLS1_3_VERSION;
5419 break;
5420 #endif
5421 default:
5422 fprintf(stderr, "Invalid ssl_min_version. Use help to see valid options.\n");
5423 return 1;
5424 }
5425 break;
5426 }
5427 #endif
5428 case MODERN:
5429 /* currently no new defaults */
5430 break;
5431 case NO_MODERN:
5432 if (!slab_chunk_size_changed) {
5433 settings.slab_chunk_size_max = settings.slab_page_size;
5434 }
5435 settings.slab_reassign = false;
5436 settings.slab_automove = 0;
5437 settings.maxconns_fast = false;
5438 settings.lru_segmented = false;
5439 hash_type = JENKINS_HASH;
5440 start_lru_crawler = false;
5441 start_lru_maintainer = false;
5442 break;
5443 case NO_DROP_PRIVILEGES:
5444 settings.drop_privileges = false;
5445 break;
5446 case DROP_PRIVILEGES:
5447 settings.drop_privileges = true;
5448 break;
5449 case RESP_OBJ_MEM_LIMIT:
5450 // TODO: Remove at some point in the future.
5451 fprintf(stderr, "DEPRECATED: resp_obj_mem_limit no longer used. See read_buf_mem_limit,\n");
5452 break;
5453 case READ_BUF_MEM_LIMIT:
5454 if (subopts_value == NULL) {
5455 fprintf(stderr, "Missing read_buf_mem_limit argument\n");
5456 return 1;
5457 }
5458 if (!safe_strtoul(subopts_value, &settings.read_buf_mem_limit)) {
5459 fprintf(stderr, "could not parse argument to read_buf_mem_limit\n");
5460 return 1;
5461 }
5462 settings.read_buf_mem_limit *= 1024 * 1024; /* megabytes */
5463 break;
5464 #ifdef MEMCACHED_DEBUG
5465 case RELAXED_PRIVILEGES:
5466 settings.relaxed_privileges = true;
5467 break;
5468 #endif
5469 default:
5470 #ifdef EXTSTORE
5471 // TODO: differentiating response code.
5472 if (storage_read_config(storage_cf, &subopts_temp)) {
5473 return 1;
5474 }
5475 #else
5476 printf("Illegal suboption \"%s\"\n", subopts_temp);
5477 return 1;
5478 #endif
5479 } // switch
5480 if (subopts_temp_o) {
5481 free(subopts_temp_o);
5482 }
5483
5484 } // while
5485 free(subopts_orig);
5486 break;
5487 default:
5488 fprintf(stderr, "Illegal argument \"%c\"\n", c);
5489 return 1;
5490 }
5491 }
5492
5493 if (settings.num_napi_ids > settings.num_threads) {
5494 fprintf(stderr, "Number of napi_ids(%d) cannot be greater than number of threads(%d)\n",
5495 settings.num_napi_ids, settings.num_threads);
5496 exit(EX_USAGE);
5497 }
5498
5499 if (settings.item_size_max < ITEM_SIZE_MAX_LOWER_LIMIT) {
5500 fprintf(stderr, "Item max size cannot be less than 1024 bytes.\n");
5501 exit(EX_USAGE);
5502 }
5503 if (settings.item_size_max > (settings.maxbytes / 2)) {
5504 fprintf(stderr, "Cannot set item size limit higher than 1/2 of memory max.\n");
5505 exit(EX_USAGE);
5506 }
5507 if (settings.item_size_max > (ITEM_SIZE_MAX_UPPER_LIMIT)) {
5508 fprintf(stderr, "Cannot set item size limit higher than a gigabyte.\n");
5509 exit(EX_USAGE);
5510 }
5511 if (settings.item_size_max > 1024 * 1024) {
5512 if (!slab_chunk_size_changed) {
5513 // Ideal new default is 16k, but needs stitching.
5514 settings.slab_chunk_size_max = settings.slab_page_size / 2;
5515 }
5516 }
5517
5518 if (settings.slab_chunk_size_max > settings.item_size_max) {
5519 fprintf(stderr, "slab_chunk_max (bytes: %d) cannot be larger than -I (item_size_max %d)\n",
5520 settings.slab_chunk_size_max, settings.item_size_max);
5521 exit(EX_USAGE);
5522 }
5523
5524 if (settings.item_size_max % settings.slab_chunk_size_max != 0) {
5525 fprintf(stderr, "-I (item_size_max: %d) must be evenly divisible by slab_chunk_max (bytes: %d)\n",
5526 settings.item_size_max, settings.slab_chunk_size_max);
5527 exit(EX_USAGE);
5528 }
5529
5530 if (settings.slab_page_size % settings.slab_chunk_size_max != 0) {
5531 fprintf(stderr, "slab_chunk_max (bytes: %d) must divide evenly into %d (slab_page_size)\n",
5532 settings.slab_chunk_size_max, settings.slab_page_size);
5533 exit(EX_USAGE);
5534 }
5535 #ifdef EXTSTORE
5536 switch (storage_check_config(storage_cf)) {
5537 case 0:
5538 storage_enabled = true;
5539 break;
5540 case 1:
5541 exit(EX_USAGE);
5542 break;
5543 }
5544 #endif
5545 // Reserve this for the new default. If factor size hasn't changed, use
5546 // new default.
5547 /*if (settings.slab_chunk_size_max == 16384 && settings.factor == 1.25) {
5548 settings.factor = 1.08;
5549 }*/
5550
5551 if (slab_sizes_unparsed != NULL) {
5552 // want the unedited string for restart code.
5553 char *temp = strdup(slab_sizes_unparsed);
5554 if (_parse_slab_sizes(slab_sizes_unparsed, slab_sizes)) {
5555 use_slab_sizes = true;
5556 if (meta->slab_config) {
5557 free(meta->slab_config);
5558 }
5559 meta->slab_config = temp;
5560 } else {
5561 exit(EX_USAGE);
5562 }
5563 } else if (!meta->slab_config) {
5564 // using the default factor.
5565 meta->slab_config = "1.25";
5566 }
5567
5568 if (settings.hot_lru_pct + settings.warm_lru_pct > 80) {
5569 fprintf(stderr, "hot_lru_pct + warm_lru_pct cannot be more than 80%% combined\n");
5570 exit(EX_USAGE);
5571 }
5572
5573 if (settings.temp_lru && !start_lru_maintainer) {
5574 fprintf(stderr, "temporary_ttl requires lru_maintainer to be enabled\n");
5575 exit(EX_USAGE);
5576 }
5577
5578 if (hash_init(hash_type) != 0) {
5579 fprintf(stderr, "Failed to initialize hash_algorithm!\n");
5580 exit(EX_USAGE);
5581 }
5582
5583 /*
5584 * Use one workerthread to serve each UDP port if the user specified
5585 * multiple ports
5586 */
5587 if (settings.inter != NULL && strchr(settings.inter, ',')) {
5588 settings.num_threads_per_udp = 1;
5589 } else {
5590 settings.num_threads_per_udp = settings.num_threads;
5591 }
5592
5593 if (settings.sasl) {
5594 if (!protocol_specified) {
5595 settings.binding_protocol = binary_prot;
5596 } else {
5597 if (settings.binding_protocol != binary_prot) {
5598 fprintf(stderr, "ERROR: You cannot allow the ASCII protocol while using SASL.\n");
5599 exit(EX_USAGE);
5600 }
5601 }
5602
5603 if (settings.udpport) {
5604 fprintf(stderr, "ERROR: Cannot enable UDP while using binary SASL authentication.\n");
5605 exit(EX_USAGE);
5606 }
5607 }
5608
5609 if (settings.auth_file) {
5610 if (!protocol_specified) {
5611 settings.binding_protocol = ascii_prot;
5612 } else {
5613 if (settings.binding_protocol != ascii_prot) {
5614 fprintf(stderr, "ERROR: You cannot allow the BINARY protocol while using ascii authentication tokens.\n");
5615 exit(EX_USAGE);
5616 }
5617 }
5618 }
5619
5620 if (udp_specified && settings.udpport != 0 && !tcp_specified) {
5621 settings.port = settings.udpport;
5622 }
5623
5624
5625 #ifdef TLS
5626 /*
5627 * Setup SSL if enabled
5628 */
5629 if (settings.ssl_enabled) {
5630 if (!settings.port) {
5631 fprintf(stderr, "ERROR: You cannot enable SSL without a TCP port.\n");
5632 exit(EX_USAGE);
5633 }
5634 // openssl init methods.
5635 SSL_load_error_strings();
5636 SSLeay_add_ssl_algorithms();
5637 // Initiate the SSL context.
5638 ssl_init();
5639 }
5640 #endif
5641
5642 if (maxcore != 0) {
5643 struct rlimit rlim_new;
5644 /*
5645 * First try raising to infinity; if that fails, try bringing
5646 * the soft limit to the hard.
5647 */
5648 if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
5649 rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY;
5650 if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) {
5651 /* failed. try raising just to the old max */
5652 rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max;
5653 (void)setrlimit(RLIMIT_CORE, &rlim_new);
5654 }
5655 }
5656 /*
5657 * getrlimit again to see what we ended up with. Only fail if
5658 * the soft limit ends up 0, because then no core files will be
5659 * created at all.
5660 */
5661
5662 if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) {
5663 fprintf(stderr, "failed to ensure corefile creation\n");
5664 exit(EX_OSERR);
5665 }
5666 }
5667
5668 /*
5669 * If needed, increase rlimits to allow as many connections
5670 * as needed.
5671 */
5672
5673 if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
5674 fprintf(stderr, "failed to getrlimit number of files\n");
5675 exit(EX_OSERR);
5676 } else {
5677 #ifdef MEMCACHED_DEBUG
5678 if (rlim.rlim_cur < settings.maxconns || rlim.rlim_max < settings.maxconns) {
5679 #endif
5680 rlim.rlim_cur = settings.maxconns;
5681 rlim.rlim_max = settings.maxconns;
5682 if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
5683 fprintf(stderr, "failed to set rlimit for open files. Try starting as root or requesting smaller maxconns value.\n");
5684 exit(EX_OSERR);
5685 }
5686 #ifdef MEMCACHED_DEBUG
5687 }
5688 #endif
5689 }
5690
5691 /* lose root privileges if we have them */
5692 if (getuid() == 0 || geteuid() == 0) {
5693 if (username == 0 || *username == '\0') {
5694 fprintf(stderr, "can't run as root without the -u switch\n");
5695 exit(EX_USAGE);
5696 }
5697 if ((pw = getpwnam(username)) == 0) {
5698 fprintf(stderr, "can't find the user %s to switch to\n", username);
5699 exit(EX_NOUSER);
5700 }
5701 if (setgroups(0, NULL) < 0) {
5702 /* setgroups may fail with EPERM, indicating we are already in a
5703 * minimally-privileged state. In that case we continue. For all
5704 * other failure codes we exit.
5705 *
5706 * Note that errno is stored here because fprintf may change it.
5707 */
5708 bool should_exit = errno != EPERM;
5709 fprintf(stderr, "failed to drop supplementary groups: %s\n",
5710 strerror(errno));
5711 if (should_exit) {
5712 exit(EX_OSERR);
5713 }
5714 }
5715 if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) {
5716 fprintf(stderr, "failed to assume identity of user %s\n", username);
5717 exit(EX_OSERR);
5718 }
5719 }
5720
5721 /* Initialize Sasl if -S was specified */
5722 if (settings.sasl) {
5723 init_sasl();
5724 }
5725
5726 /* daemonize if requested */
5727 /* if we want to ensure our ability to dump core, don't chdir to / */
5728 if (do_daemonize) {
5729 if (signal(SIGHUP, SIG_IGN) == SIG_ERR) {
5730 perror("Failed to ignore SIGHUP");
5731 }
5732 if (daemonize(maxcore, settings.verbose) == -1) {
5733 fprintf(stderr, "failed to daemon() in order to daemonize\n");
5734 exit(EXIT_FAILURE);
5735 }
5736 }
5737
5738 /* lock paged memory if needed */
5739 if (lock_memory) {
5740 #ifdef HAVE_MLOCKALL
5741 int res = mlockall(MCL_CURRENT | MCL_FUTURE);
5742 if (res != 0) {
5743 fprintf(stderr, "warning: -k invalid, mlockall() failed: %s\n",
5744 strerror(errno));
5745 }
5746 #else
5747 fprintf(stderr, "warning: -k invalid, mlockall() not supported on this platform. proceeding without.\n");
5748 #endif
5749 }
5750
5751 /* initialize main thread libevent instance */
5752 #if defined(LIBEVENT_VERSION_NUMBER) && LIBEVENT_VERSION_NUMBER >= 0x02000101
5753 /* If libevent version is larger/equal to 2.0.2-alpha, use newer version */
5754 struct event_config *ev_config;
5755 ev_config = event_config_new();
5756 event_config_set_flag(ev_config, EVENT_BASE_FLAG_NOLOCK);
5757 main_base = event_base_new_with_config(ev_config);
5758 event_config_free(ev_config);
5759 #else
5760 /* Otherwise, use older API */
5761 main_base = event_init();
5762 #endif
5763
5764 /* Load initial auth file if required */
5765 if (settings.auth_file) {
5766 if (settings.udpport) {
5767 fprintf(stderr, "Cannot use UDP with ascii authentication enabled (-U 0 to disable)\n");
5768 exit(EX_USAGE);
5769 }
5770
5771 switch (authfile_load(settings.auth_file)) {
5772 case AUTHFILE_STATFAIL:
5773 vperror("Could not stat authfile [%s], error %s", settings.auth_file
5774 , strerror(errno));
5775 exit(EXIT_FAILURE);
5776 break;
5777 case AUTHFILE_OPENFAIL:
5778 vperror("Could not open authfile [%s] for reading, error %s", settings.auth_file
5779 , strerror(errno));
5780 exit(EXIT_FAILURE);
5781 break;
5782 case AUTHFILE_OOM:
5783 fprintf(stderr, "Out of memory reading password file: %s", settings.auth_file);
5784 exit(EXIT_FAILURE);
5785 break;
5786 case AUTHFILE_MALFORMED:
5787 fprintf(stderr, "Authfile [%s] has a malformed entry. Should be 'user:password'", settings.auth_file);
5788 exit(EXIT_FAILURE);
5789 break;
5790 case AUTHFILE_OK:
5791 break;
5792 }
5793 }
5794
5795 /* initialize other stuff */
5796 stats_init();
5797 logger_init();
5798 conn_init();
5799 bool reuse_mem = false;
5800 void *mem_base = NULL;
5801 bool prefill = false;
5802 if (settings.memory_file != NULL) {
5803 preallocate = true;
5804 // Easier to manage memory if we prefill the global pool when reusing.
5805 prefill = true;
5806 restart_register("main", _mc_meta_load_cb, _mc_meta_save_cb, meta);
5807 reuse_mem = restart_mmap_open(settings.maxbytes,
5808 settings.memory_file,
5809 &mem_base);
5810 // The "save" callback gets called when we're closing out the mmap,
5811 // but we don't know what the mmap_base is until after we call open.
5812 // So we pass the struct above but have to fill it in here so the
5813 // data's available during the save routine.
5814 meta->mmap_base = mem_base;
5815 // Also, the callbacks for load() run before _open returns, so we
5816 // should have the old base in 'meta' as of here.
5817 }
5818 // Initialize the hash table _after_ checking restart metadata.
5819 // We override the hash table start argument with what was live
5820 // previously, to avoid filling a huge set of items into a tiny hash
5821 // table.
5822 assoc_init(settings.hashpower_init);
5823 #ifdef EXTSTORE
5824 if (storage_enabled && reuse_mem) {
5825 fprintf(stderr, "[restart] memory restart with extstore not presently supported.\n");
5826 reuse_mem = false;
5827 }
5828 #endif
5829 slabs_init(settings.maxbytes, settings.factor, preallocate,
5830 use_slab_sizes ? slab_sizes : NULL, mem_base, reuse_mem);
5831 #ifdef EXTSTORE
5832 if (storage_enabled) {
5833 storage = storage_init(storage_cf);
5834 if (storage == NULL) {
5835 exit(EXIT_FAILURE);
5836 }
5837 ext_storage = storage;
5838 /* page mover algorithm for extstore needs memory prefilled */
5839 prefill = true;
5840 }
5841 #endif
5842
5843 if (settings.drop_privileges) {
5844 setup_privilege_violations_handler();
5845 }
5846
5847 if (prefill)
5848 slabs_prefill_global();
5849 /* In restartable mode and we've decided to issue a fixup on memory */
5850 if (settings.memory_file != NULL && reuse_mem) {
5851 mc_ptr_t old_base = meta->old_base;
5852 assert(old_base == meta->old_base);
5853
5854 // should've pulled in process_started from meta file.
5855 process_started = meta->process_started;
5856 // TODO: must be a more canonical way of serializing/deserializing
5857 // pointers? passing through uint64_t should work, and we're not
5858 // annotating the pointer with anything, but it's still slightly
5859 // insane.
5860 restart_fixup((void *)old_base);
5861 }
5862 /*
5863 * ignore SIGPIPE signals; we can use errno == EPIPE if we
5864 * need that information
5865 */
5866 if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
5867 perror("failed to ignore SIGPIPE; sigaction");
5868 exit(EX_OSERR);
5869 }
5870 /* start up worker threads if MT mode */
5871 #ifdef EXTSTORE
5872 slabs_set_storage(storage);
5873 memcached_thread_init(settings.num_threads, storage);
5874 init_lru_crawler(storage);
5875 #else
5876 memcached_thread_init(settings.num_threads, NULL);
5877 init_lru_crawler(NULL);
5878 #endif
5879
5880 if (start_assoc_maint && start_assoc_maintenance_thread() == -1) {
5881 exit(EXIT_FAILURE);
5882 }
5883 if (start_lru_crawler && start_item_crawler_thread() != 0) {
5884 fprintf(stderr, "Failed to enable LRU crawler thread\n");
5885 exit(EXIT_FAILURE);
5886 }
5887 #ifdef EXTSTORE
5888 if (storage && start_storage_compact_thread(storage) != 0) {
5889 fprintf(stderr, "Failed to start storage compaction thread\n");
5890 exit(EXIT_FAILURE);
5891 }
5892 if (storage && start_storage_write_thread(storage) != 0) {
5893 fprintf(stderr, "Failed to start storage writer thread\n");
5894 exit(EXIT_FAILURE);
5895 }
5896
5897 if (start_lru_maintainer && start_lru_maintainer_thread(storage) != 0) {
5898 #else
5899 if (start_lru_maintainer && start_lru_maintainer_thread(NULL) != 0) {
5900 #endif
5901 fprintf(stderr, "Failed to enable LRU maintainer thread\n");
5902 free(meta);
5903 return 1;
5904 }
5905
5906 if (settings.slab_reassign &&
5907 start_slab_maintenance_thread() == -1) {
5908 exit(EXIT_FAILURE);
5909 }
5910
5911 if (settings.idle_timeout && start_conn_timeout_thread() == -1) {
5912 exit(EXIT_FAILURE);
5913 }
5914
5915 /* initialise clock event */
5916 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
5917 {
5918 struct timespec ts;
5919 if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
5920 monotonic = true;
5921 monotonic_start = ts.tv_sec;
5922 // Monotonic clock needs special handling for restarts.
5923 // We get a start time at an arbitrary place, so we need to
5924 // restore the original time delta, which is always "now" - _start
5925 if (reuse_mem) {
5926 // the running timespan at stop time + the time we think we
5927 // were stopped.
5928 monotonic_start -= meta->current_time + meta->time_delta;
5929 } else {
5930 monotonic_start -= ITEM_UPDATE_INTERVAL + 2;
5931 }
5932 }
5933 }
5934 #endif
5935 clock_handler(0, 0, 0);
5936
5937 /* create unix mode sockets after dropping privileges */
5938 if (settings.socketpath != NULL) {
5939 errno = 0;
5940 if (server_socket_unix(settings.socketpath,settings.access)) {
5941 vperror("failed to listen on UNIX socket: %s", settings.socketpath);
5942 exit(EX_OSERR);
5943 }
5944 }
5945
5946 /* create the listening socket, bind it, and init */
5947 if (settings.socketpath == NULL) {
5948 const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME");
5949 char *temp_portnumber_filename = NULL;
5950 size_t len;
5951 FILE *portnumber_file = NULL;
5952
5953 if (portnumber_filename != NULL) {
5954 len = strlen(portnumber_filename)+4+1;
5955 temp_portnumber_filename = malloc(len);
5956 snprintf(temp_portnumber_filename,
5957 len,
5958 "%s.lck", portnumber_filename);
5959
5960 portnumber_file = fopen(temp_portnumber_filename, "a");
5961 if (portnumber_file == NULL) {
5962 fprintf(stderr, "Failed to open \"%s\": %s\n",
5963 temp_portnumber_filename, strerror(errno));
5964 }
5965 }
5966
5967 errno = 0;
5968 if (settings.port && server_sockets(settings.port, tcp_transport,
5969 portnumber_file)) {
5970 vperror("failed to listen on TCP port %d", settings.port);
5971 exit(EX_OSERR);
5972 }
5973
5974 /*
5975 * initialization order: first create the listening sockets
5976 * (may need root on low ports), then drop root if needed,
5977 * then daemonize if needed, then init libevent (in some cases
5978 * descriptors created by libevent wouldn't survive forking).
5979 */
5980
5981 /* create the UDP listening socket and bind it */
5982 errno = 0;
5983 if (settings.udpport && server_sockets(settings.udpport, udp_transport,
5984 portnumber_file)) {
5985 vperror("failed to listen on UDP port %d", settings.udpport);
5986 exit(EX_OSERR);
5987 }
5988
5989 if (portnumber_file) {
5990 fclose(portnumber_file);
5991 rename(temp_portnumber_filename, portnumber_filename);
5992 }
5993 if (temp_portnumber_filename)
5994 free(temp_portnumber_filename);
5995 }
5996
5997 /* Give the sockets a moment to open. I know this is dumb, but the error
5998 * is only an advisory.
5999 */
6000 usleep(1000);
6001 if (stats_state.curr_conns + stats_state.reserved_fds >= settings.maxconns - 1) {
6002 fprintf(stderr, "Maxconns setting is too low, use -c to increase.\n");
6003 exit(EXIT_FAILURE);
6004 }
6005
6006 if (pid_file != NULL) {
6007 save_pid(pid_file);
6008 }
6009
6010 /* Drop privileges no longer needed */
6011 if (settings.drop_privileges) {
6012 drop_privileges();
6013 }
6014
6015 /* Initialize the uriencode lookup table. */
6016 uriencode_init();
6017
6018 /* enter the event loop */
6019 while (!stop_main_loop) {
6020 if (event_base_loop(main_base, EVLOOP_ONCE) != 0) {
6021 retval = EXIT_FAILURE;
6022 break;
6023 }
6024 }
6025
6026 switch (stop_main_loop) {
6027 case GRACE_STOP:
6028 fprintf(stderr, "Gracefully stopping\n");
6029 break;
6030 case EXIT_NORMALLY:
6031 // Don't need to print anything to STDERR for a normal shutdown.
6032 break;
6033 default:
6034 fprintf(stderr, "Exiting on error\n");
6035 break;
6036 }
6037
6038 stop_threads();
6039 if (settings.memory_file != NULL && stop_main_loop == GRACE_STOP) {
6040 restart_mmap_close();
6041 }
6042
6043 /* remove the PID file if we're a daemon */
6044 if (do_daemonize)
6045 remove_pidfile(pid_file);
6046 /* Clean up strdup() call for bind() address */
6047 if (settings.inter)
6048 free(settings.inter);
6049
6050 /* cleanup base */
6051 event_base_free(main_base);
6052
6053 free(meta);
6054
6055 return retval;
6056 }
6057