1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * memcached - memory caching daemon
4 *
5 * http://www.danga.com/memcached/
6 * Copyright (c) 2015, 2021, Oracle and/or its affiliates.
7 * Copyright 2003 Danga Interactive, Inc. All rights reserved.
8 * This file was modified by Oracle on 28-08-2015 and 23-03-2016.
9 * Modifications Copyright (c) 2015, 2021, Oracle and/or its affiliates.
10 * All rights reserved.
11 *
12 * Use and distribution licensed under the BSD license. See
13 * the LICENSE file for full text.
14 *
15 * Authors:
16 * Anatoly Vorobey <mellon@pobox.com>
17 * Brad Fitzpatrick <brad@danga.com>
18 */
19 #include "config.h"
20 #include "config_static.h"
21 #include "memcached.h"
22 #include "memcached/extension_loggers.h"
23 #include "utilities/engine_loader.h"
24
25 #include <signal.h>
26 #include <getopt.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <time.h>
33 #include <assert.h>
34 #include <limits.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #include <stddef.h>
38 #include <dlfcn.h>
39
40 #include "memcached_mysql.h"
41
42 #define INNODB_MEMCACHED
43
item_set_cas(const void * cookie,item * it,uint64_t cas)44 static inline void item_set_cas(const void *cookie, item *it, uint64_t cas) {
45 settings.engine.v1->item_set_cas(settings.engine.v0, cookie, it, cas);
46 }
47
48 /* The item must always be called "it" */
49 #define SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
50 thread_stats->slab_stats[info.clsid].slab_op++;
51
52 #define THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
53 thread_stats->thread_op++;
54
55 #define THREAD_GUTS2(conn, thread_stats, slab_op, thread_op) \
56 thread_stats->slab_op++; \
57 thread_stats->thread_op++;
58
59 #define SLAB_THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
60 SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
61 THREAD_GUTS(conn, thread_stats, slab_op, thread_op)
62
63 #define STATS_INCR1(GUTS, conn, slab_op, thread_op, key, nkey) { \
64 struct independent_stats *independent_stats = get_independent_stats(conn); \
65 struct thread_stats *thread_stats = \
66 &independent_stats->thread_stats[conn->thread->index]; \
67 topkeys_t *topkeys = independent_stats->topkeys; \
68 pthread_mutex_lock(&thread_stats->mutex); \
69 GUTS(conn, thread_stats, slab_op, thread_op); \
70 pthread_mutex_unlock(&thread_stats->mutex); \
71 TK(topkeys, slab_op, key, nkey, current_time); \
72 }
73
74 #define STATS_INCR(conn, op, key, nkey) \
75 STATS_INCR1(THREAD_GUTS, conn, op, op, key, nkey)
76
77 #define SLAB_INCR(conn, op, key, nkey) \
78 STATS_INCR1(SLAB_GUTS, conn, op, op, key, nkey)
79
80 #define STATS_TWO(conn, slab_op, thread_op, key, nkey) \
81 STATS_INCR1(THREAD_GUTS2, conn, slab_op, thread_op, key, nkey)
82
83 #define SLAB_TWO(conn, slab_op, thread_op, key, nkey) \
84 STATS_INCR1(SLAB_THREAD_GUTS, conn, slab_op, thread_op, key, nkey)
85
86 #define STATS_HIT(conn, op, key, nkey) \
87 SLAB_TWO(conn, op##_hits, cmd_##op, key, nkey)
88
89 #define STATS_MISS(conn, op, key, nkey) \
90 STATS_TWO(conn, op##_misses, cmd_##op, key, nkey)
91
92 #if defined(HAVE_GCC_SYNC_BUILTINS)
93
94 #define STATS_NOKEY(conn, op) \
95 do { \
96 struct thread_stats *thread_stats = \
97 get_thread_stats(conn); \
98 __sync_add_and_fetch(&thread_stats->op, 1); \
99 } while (0)
100
101 #define STATS_NOKEY2(conn, op1, op2) \
102 do { \
103 struct thread_stats *thread_stats = \
104 get_thread_stats(conn); \
105 __sync_add_and_fetch(&thread_stats->op1, 1); \
106 __sync_add_and_fetch(&thread_stats->op2, 1); \
107 } while (0)
108
109 #define STATS_ADD(conn, op, amt) \
110 do { \
111 struct thread_stats *thread_stats = \
112 get_thread_stats(conn); \
113 __sync_add_and_fetch(&thread_stats->op, amt); \
114 } while (0)
115
116 #define MEMCACHED_ATOMIC_MSG "InnoDB MEMCACHED: Memcached uses atomic increment \n"
117
118 #else /* HAVE_GCC_SYNC_BUILTINS */
119 #define STATS_NOKEY(conn, op) { \
120 struct thread_stats *thread_stats = \
121 get_thread_stats(conn); \
122 pthread_mutex_lock(&thread_stats->mutex); \
123 thread_stats->op++; \
124 pthread_mutex_unlock(&thread_stats->mutex); \
125 }
126
127 #define STATS_NOKEY2(conn, op1, op2) { \
128 struct thread_stats *thread_stats = \
129 get_thread_stats(conn); \
130 pthread_mutex_lock(&thread_stats->mutex); \
131 thread_stats->op1++; \
132 thread_stats->op2++; \
133 pthread_mutex_unlock(&thread_stats->mutex); \
134 }
135
136 #define STATS_ADD(conn, op, amt) { \
137 struct thread_stats *thread_stats = \
138 get_thread_stats(conn); \
139 pthread_mutex_lock(&thread_stats->mutex); \
140 thread_stats->op += amt; \
141 pthread_mutex_unlock(&thread_stats->mutex); \
142 }
143
144 #define MEMCACHED_ATOMIC_MSG "InnoDB Memcached: Memcached DOES NOT use atomic increment"
145 #endif /* HAVE_GCC_SYNC_BUILTINS */
146
147 volatile sig_atomic_t memcached_shutdown;
148 volatile sig_atomic_t memcached_initialized;
149
150 /*
151 * We keep the current time of day in a global variable that's updated by a
152 * timer event. This saves us a bunch of time() system calls (we really only
153 * need to get the time once a second, whereas there can be tens of thousands
154 * of requests a second) and allows us to use server-start-relative timestamps
155 * rather than absolute UNIX timestamps, a space savings on systems where
156 * sizeof(time_t) > sizeof(unsigned int).
157 */
158 volatile rel_time_t current_time;
159
160 /*
161 * forward declarations
162 */
163 static SOCKET new_socket(struct addrinfo *ai);
164 static int try_read_command(conn *c);
165 static inline struct independent_stats *get_independent_stats(conn *c);
166 static inline struct thread_stats *get_thread_stats(conn *c);
167 static void register_callback(ENGINE_HANDLE *eh,
168 ENGINE_EVENT_TYPE type,
169 EVENT_CALLBACK cb, const void *cb_data);
170 enum try_read_result {
171 READ_DATA_RECEIVED,
172 READ_NO_DATA_RECEIVED,
173 READ_ERROR, /** an error occured (on the socket) (or client closed connection) */
174 READ_MEMORY_ERROR /** failed to allocate more memory */
175 };
176
177 static enum try_read_result try_read_network(conn *c);
178 static enum try_read_result try_read_udp(conn *c);
179
180 /* stats */
181 static void stats_init(void);
182 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate);
183 static void process_stat_settings(ADD_STAT add_stats, void *c);
184
185
186 /* defaults */
187 static void settings_init(void);
188
189 /* event handling, network IO */
190 static void event_handler(const int fd, const short which, void *arg);
191 static void complete_nread(conn *c);
192 static char *process_command(conn *c, char *command);
193 static void write_and_free(conn *c, char *buf, int bytes);
194 static int ensure_iov_space(conn *c);
195 static int add_iov(conn *c, const void *buf, int len);
196 static int add_msghdr(conn *c);
197
198
199 /* time handling */
200 static void set_current_time(void); /* update the global variable holding
201 global 32-bit seconds-since-start time
202 (to avoid 64 bit time_t) */
203
204 /** exported globals **/
205 struct stats stats;
206 struct settings settings;
207 static time_t process_started; /* when the process was started */
208
209 /** file scope variables **/
210 static conn *listen_conn = NULL;
211 static int udp_socket[100];
212 static int num_udp_socket;
213 static struct event_base *main_base;
214 static struct independent_stats *default_independent_stats;
215
216 static struct engine_event_handler *engine_event_handlers[MAX_ENGINE_EVENT_TYPE + 1];
217
218 enum transmit_result {
219 TRANSMIT_COMPLETE, /** All done writing. */
220 TRANSMIT_INCOMPLETE, /** More data remaining to write. */
221 TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */
222 TRANSMIT_HARD_ERROR /** Can't write (c->state is set to conn_closing) */
223 };
224
225 static enum transmit_result transmit(conn *c);
226
227 #define REALTIME_MAXDELTA 60*60*24*30
228
229 // Perform all callbacks of a given type for the given connection.
perform_callbacks(ENGINE_EVENT_TYPE type,const void * data,const void * c)230 static void perform_callbacks(ENGINE_EVENT_TYPE type,
231 const void *data,
232 const void *c) {
233 for (struct engine_event_handler *h = engine_event_handlers[type];
234 h; h = h->next) {
235 h->cb(c, type, data, h->cb_data);
236 }
237 }
238
239 /*
240 * given time value that's either unix time or delta from current unix time,
241 * return unix time. Use the fact that delta can't exceed one month
242 * (and real time value can't be that low).
243 */
realtime(const time_t exptime)244 static rel_time_t realtime(const time_t exptime) {
245 /* no. of seconds in 30 days - largest possible delta exptime */
246
247 if (exptime == 0) return 0; /* 0 means never expire */
248
249 if (exptime > REALTIME_MAXDELTA) {
250 /* if item expiration is at/before the server started, give it an
251 expiration time of 1 second after the server started.
252 (because 0 means don't expire). without this, we'd
253 underflow and wrap around to some large value way in the
254 future, effectively making items expiring in the past
255 really expiring never */
256 if (exptime <= process_started)
257 return (rel_time_t)1;
258 return (rel_time_t)(exptime - process_started);
259 } else {
260 return (rel_time_t)(exptime + current_time);
261 }
262 }
263
264 /**
265 * Convert the relative time to an absolute time (relative to EPOC ;) )
266 */
abstime(const rel_time_t exptime)267 static time_t abstime(const rel_time_t exptime)
268 {
269 return process_started + exptime;
270 }
271
stats_init(void)272 static void stats_init(void) {
273 stats.daemon_conns = 0;
274 stats.rejected_conns = 0;
275 stats.curr_conns = stats.total_conns = stats.conn_structs = 0;
276
277 stats_prefix_init();
278 }
279
stats_reset(const void * cookie)280 static void stats_reset(const void *cookie) {
281 struct conn *conn = (struct conn*)cookie;
282 STATS_LOCK();
283 stats.rejected_conns = 0;
284 stats.total_conns = 0;
285 stats_prefix_clear();
286 STATS_UNLOCK();
287 threadlocal_stats_reset(get_independent_stats(conn)->thread_stats);
288 settings.engine.v1->reset_stats(settings.engine.v0, cookie);
289 }
290
settings_init(void)291 static void settings_init(void) {
292 settings.use_cas = true;
293 settings.access = 0700;
294 settings.port = 11211;
295 settings.udpport = 11211;
296 /* By default this string should be NULL for getaddrinfo() */
297 settings.inter = NULL;
298 settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
299 settings.maxconns = 1000; /* to limit connections-related memory to about 5MB */
300 settings.verbose = 0;
301 settings.oldest_live = 0;
302 settings.evict_to_free = 1; /* push old items out of cache when memory runs out */
303 settings.socketpath = NULL; /* by default, not using a unix socket */
304 settings.factor = 1.25;
305 settings.chunk_size = 48; /* space for a modest key and value */
306 settings.num_threads = 4; /* N workers */
307 settings.num_threads_per_udp = 0;
308 settings.prefix_delimiter = ':';
309 settings.detail_enabled = 0;
310 settings.allow_detailed = true;
311 settings.reqs_per_event = DEFAULT_REQS_PER_EVENT;
312 settings.backlog = 1024;
313 settings.binding_protocol = negotiating_prot;
314 settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
315 settings.topkeys = 0;
316 settings.require_sasl = false;
317 settings.extensions.logger = get_stderr_logger();
318 }
319
320 /*
321 * Adds a message header to a connection.
322 *
323 * Returns 0 on success, -1 on out-of-memory.
324 */
add_msghdr(conn * c)325 static int add_msghdr(conn *c)
326 {
327 struct msghdr *msg;
328
329 assert(c != NULL);
330
331 if (c->msgsize == c->msgused) {
332 msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr));
333 if (! msg)
334 return -1;
335 c->msglist = msg;
336 c->msgsize *= 2;
337 }
338
339 msg = c->msglist + c->msgused;
340
341 /* this wipes msg_iovlen, msg_control, msg_controllen, and
342 msg_flags, the last 3 of which aren't defined on solaris: */
343 memset(msg, 0, sizeof(struct msghdr));
344
345 msg->msg_iov = &c->iov[c->iovused];
346
347 if (c->request_addr_size > 0) {
348 msg->msg_name = &c->request_addr;
349 msg->msg_namelen = c->request_addr_size;
350 }
351
352 c->msgbytes = 0;
353 c->msgused++;
354
355 if (IS_UDP(c->transport)) {
356 /* Leave room for the UDP header, which we'll fill in later. */
357 return add_iov(c, NULL, UDP_HEADER_SIZE);
358 }
359
360 return 0;
361 }
362
prot_text(enum protocol prot)363 static const char *prot_text(enum protocol prot) {
364 const char *rv = "unknown";
365 switch(prot) {
366 case ascii_prot:
367 rv = "ascii";
368 break;
369 case binary_prot:
370 rv = "binary";
371 break;
372 case negotiating_prot:
373 rv = "auto-negotiate";
374 break;
375 }
376 return rv;
377 }
378
379 struct {
380 pthread_mutex_t mutex;
381 bool disabled;
382 ssize_t count;
383 uint64_t num_disable;
384 } listen_state;
385
is_listen_disabled(void)386 static bool is_listen_disabled(void) {
387 bool ret;
388 pthread_mutex_lock(&listen_state.mutex);
389 ret = listen_state.disabled;
390 pthread_mutex_unlock(&listen_state.mutex);
391 return ret;
392 }
393
get_listen_disabled_num(void)394 static uint64_t get_listen_disabled_num(void) {
395 uint64_t ret;
396 pthread_mutex_lock(&listen_state.mutex);
397 ret = listen_state.num_disable;
398 pthread_mutex_unlock(&listen_state.mutex);
399 return ret;
400 }
401
disable_listen(void)402 static void disable_listen(void) {
403 pthread_mutex_lock(&listen_state.mutex);
404 listen_state.disabled = true;
405 listen_state.count = 10;
406 ++listen_state.num_disable;
407 pthread_mutex_unlock(&listen_state.mutex);
408
409 conn *next;
410 for (next = listen_conn; next; next = next->next) {
411 update_event(next, 0);
412 if (listen(next->sfd, 1) != 0) {
413 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
414 "listen() failed",
415 strerror(errno));
416 }
417 }
418 }
419
safe_close(SOCKET sfd)420 void safe_close(SOCKET sfd) {
421 if (sfd != INVALID_SOCKET) {
422 int rval;
423 while ((rval = closesocket(sfd)) == SOCKET_ERROR &&
424 (errno == EINTR || errno == EAGAIN)) {
425 /* go ahead and retry */
426 }
427
428 if (rval == SOCKET_ERROR) {
429 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
430 "Failed to close socket %d (%s)!!\n", (int)sfd,
431 strerror(errno));
432 } else {
433 STATS_LOCK();
434 stats.curr_conns--;
435 STATS_UNLOCK();
436
437 if (is_listen_disabled()) {
438 notify_dispatcher();
439 }
440 }
441 }
442 }
443
444 /*
445 * Free list management for connections.
446 */
447 cache_t *conn_cache; /* suffix cache */
448
449 /**
450 * Reset all of the dynamic buffers used by a connection back to their
451 * default sizes. The strategy for resizing the buffers is to allocate a
452 * new one of the correct size and free the old one if the allocation succeeds
453 * instead of using realloc to change the buffer size (because realloc may
454 * not shrink the buffers, and will also copy the memory). If the allocation
455 * fails the buffer will be unchanged.
456 *
457 * @param c the connection to resize the buffers for
458 * @return true if all allocations succeeded, false if one or more of the
459 * allocations failed.
460 */
conn_reset_buffersize(conn * c)461 static bool conn_reset_buffersize(conn *c) {
462 bool ret = true;
463
464 if (c->rsize != DATA_BUFFER_SIZE) {
465 void *ptr = malloc(DATA_BUFFER_SIZE);
466 if (ptr != NULL) {
467 free(c->rbuf);
468 c->rbuf = ptr;
469 c->rsize = DATA_BUFFER_SIZE;
470 } else {
471 ret = false;
472 }
473 }
474
475 if (c->wsize != DATA_BUFFER_SIZE) {
476 void *ptr = malloc(DATA_BUFFER_SIZE);
477 if (ptr != NULL) {
478 free(c->wbuf);
479 c->wbuf = ptr;
480 c->wsize = DATA_BUFFER_SIZE;
481 } else {
482 ret = false;
483 }
484 }
485
486 if (c->isize != ITEM_LIST_INITIAL) {
487 void *ptr = malloc(sizeof(item *) * ITEM_LIST_INITIAL);
488 if (ptr != NULL) {
489 free(c->ilist);
490 c->ilist = ptr;
491 c->isize = ITEM_LIST_INITIAL;
492 } else {
493 ret = false;
494 }
495 }
496
497 if (c->suffixsize != SUFFIX_LIST_INITIAL) {
498 void *ptr = malloc(sizeof(char *) * SUFFIX_LIST_INITIAL);
499 if (ptr != NULL) {
500 free(c->suffixlist);
501 c->suffixlist = ptr;
502 c->suffixsize = SUFFIX_LIST_INITIAL;
503 } else {
504 ret = false;
505 }
506 }
507
508 if (c->iovsize != IOV_LIST_INITIAL) {
509 void *ptr = malloc(sizeof(struct iovec) * IOV_LIST_INITIAL);
510 if (ptr != NULL) {
511 free(c->iov);
512 c->iov = ptr;
513 c->iovsize = IOV_LIST_INITIAL;
514 } else {
515 ret = false;
516 }
517 }
518
519 if (c->msgsize != MSG_LIST_INITIAL) {
520 void *ptr = malloc(sizeof(struct msghdr) * MSG_LIST_INITIAL);
521 if (ptr != NULL) {
522 free(c->msglist);
523 c->msglist = ptr;
524 c->msgsize = MSG_LIST_INITIAL;
525 } else {
526 ret = false;
527 }
528 }
529
530 return ret;
531 }
532
533 /**
534 * Constructor for all memory allocations of connection objects. Initialize
535 * all members and allocate the transfer buffers.
536 *
537 * @param buffer The memory allocated by the object cache
538 * @param unused1 not used
539 * @param unused2 not used
540 * @return 0 on success, 1 if we failed to allocate memory
541 */
conn_constructor(void * buffer,void * unused1,int unused2)542 static int conn_constructor(void *buffer, void *unused1, int unused2) {
543 (void)unused1; (void)unused2;
544
545 conn *c = buffer;
546 memset(c, 0, sizeof(*c));
547 MEMCACHED_CONN_CREATE(c);
548
549 if (!conn_reset_buffersize(c)) {
550 free(c->rbuf);
551 free(c->wbuf);
552 free(c->ilist);
553 free(c->suffixlist);
554 free(c->iov);
555 free(c->msglist);
556 settings.extensions.logger->log(EXTENSION_LOG_WARNING,
557 NULL,
558 "Failed to allocate buffers for connection\n");
559 return 1;
560 }
561
562 STATS_LOCK();
563 stats.conn_structs++;
564 STATS_UNLOCK();
565
566 return 0;
567 }
568
569 /**
570 * Destructor for all connection objects. Release all allocated resources.
571 *
572 * @param buffer The memory allocated by the objec cache
573 * @param unused not used
574 */
conn_destructor(void * buffer,void * unused)575 static void conn_destructor(void *buffer, void *unused) {
576 (void)unused;
577 conn *c = buffer;
578 free(c->rbuf);
579 free(c->wbuf);
580 free(c->ilist);
581 free(c->suffixlist);
582 free(c->iov);
583 free(c->msglist);
584
585 STATS_LOCK();
586 stats.conn_structs--;
587 STATS_UNLOCK();
588 }
589
conn_new(const SOCKET sfd,STATE_FUNC init_state,const int event_flags,const int read_buffer_size,enum network_transport transport,struct event_base * base,struct timeval * timeout)590 conn *conn_new(const SOCKET sfd, STATE_FUNC init_state,
591 const int event_flags,
592 const int read_buffer_size, enum network_transport transport,
593 struct event_base *base, struct timeval *timeout) {
594 conn *c = cache_alloc(conn_cache);
595 if (c == NULL) {
596 return NULL;
597 }
598
599 assert(c->thread == NULL);
600
601 if (c->rsize < read_buffer_size) {
602 void *mem = malloc(read_buffer_size);
603 if (mem) {
604 c->rsize = read_buffer_size;
605 free(c->rbuf);
606 c->rbuf = mem;
607 } else {
608 assert(c->thread == NULL);
609 cache_free(conn_cache, c);
610 return NULL;
611 }
612 }
613
614 c->transport = transport;
615 c->protocol = settings.binding_protocol;
616
617 /* unix socket mode doesn't need this, so zeroed out. but why
618 * is this done for every command? presumably for UDP
619 * mode. */
620 if (!settings.socketpath) {
621 c->request_addr_size = sizeof(c->request_addr);
622 } else {
623 c->request_addr_size = 0;
624 }
625
626 if (settings.verbose > 1) {
627 if (init_state == conn_listening) {
628 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
629 "<%d server listening (%s)\n", sfd,
630 prot_text(c->protocol));
631 } else if (IS_UDP(transport)) {
632 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
633 "<%d server listening (udp)\n", sfd);
634 } else if (c->protocol == negotiating_prot) {
635 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
636 "<%d new auto-negotiating client connection\n",
637 sfd);
638 } else if (c->protocol == ascii_prot) {
639 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
640 "<%d new ascii client connection.\n", sfd);
641 } else if (c->protocol == binary_prot) {
642 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
643 "<%d new binary client connection.\n", sfd);
644 } else {
645 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
646 "<%d new unknown (%d) client connection\n",
647 sfd, c->protocol);
648 assert(false);
649 }
650 }
651
652 c->sfd = sfd;
653 c->state = init_state;
654 c->rlbytes = 0;
655 c->cmd = -1;
656 c->ascii_cmd = NULL;
657 c->rbytes = c->wbytes = 0;
658 c->wcurr = c->wbuf;
659 c->rcurr = c->rbuf;
660 c->ritem = 0;
661 c->icurr = c->ilist;
662 c->suffixcurr = c->suffixlist;
663 c->ileft = 0;
664 c->suffixleft = 0;
665 c->iovused = 0;
666 c->msgcurr = 0;
667 c->msgused = 0;
668 c->next = NULL;
669 c->list_state = 0;
670
671 c->write_and_go = init_state;
672 c->write_and_free = 0;
673 c->item = 0;
674
675 c->noreply = false;
676
677 event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
678 event_base_set(base, &c->event);
679 c->ev_flags = event_flags;
680
681 if (!register_event(c, timeout)) {
682 assert(c->thread == NULL);
683 cache_free(conn_cache, c);
684 return NULL;
685 }
686
687 STATS_LOCK();
688 stats.total_conns++;
689 STATS_UNLOCK();
690
691 c->aiostat = ENGINE_SUCCESS;
692 c->ewouldblock = false;
693 c->refcount = 1;
694
695 MEMCACHED_CONN_ALLOCATE(c->sfd);
696
697 perform_callbacks(ON_CONNECT, NULL, c);
698
699 return c;
700 }
701
conn_cleanup(conn * c)702 static void conn_cleanup(conn *c) {
703 assert(c != NULL);
704
705 if (c->item) {
706 settings.engine.v1->release(settings.engine.v0, c, c->item);
707 c->item = 0;
708 }
709
710 if (c->ileft != 0) {
711 for (; c->ileft > 0; c->ileft--,c->icurr++) {
712 settings.engine.v1->release(settings.engine.v0, c, *(c->icurr));
713 }
714 }
715
716 if (c->suffixleft != 0) {
717 for (; c->suffixleft > 0; c->suffixleft--, c->suffixcurr++) {
718 cache_free(c->thread->suffix_cache, *(c->suffixcurr));
719 }
720 }
721
722 if (c->write_and_free) {
723 free(c->write_and_free);
724 c->write_and_free = 0;
725 }
726
727 if (c->sasl_conn) {
728 sasl_dispose(&c->sasl_conn);
729 c->sasl_conn = NULL;
730 }
731
732 if (c->engine_storage) {
733 settings.engine.v1->clean_engine(settings.engine.v0, c,
734 c->engine_storage);
735 }
736
737 c->engine_storage = NULL;
738 c->tap_iterator = NULL;
739 c->thread = NULL;
740 assert(c->next == NULL);
741 c->ascii_cmd = NULL;
742 c->sfd = INVALID_SOCKET;
743 c->tap_nack_mode = false;
744 }
745
conn_close(conn * c)746 void conn_close(conn *c) {
747 assert(c != NULL);
748 assert(c->sfd == INVALID_SOCKET);
749
750 if (c->ascii_cmd != NULL) {
751 c->ascii_cmd->abort(c->ascii_cmd, c);
752 }
753
754 assert(c->thread);
755 LOCK_THREAD(c->thread);
756 /* remove from pending-io list */
757 if (settings.verbose > 1 && list_contains(c->thread->pending_io, c)) {
758 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
759 "Current connection was in the pending-io list.. Nuking it\n");
760 }
761 c->thread->pending_io = list_remove(c->thread->pending_io, c);
762 c->thread->pending_close = list_remove(c->thread->pending_close, c);
763 UNLOCK_THREAD(c->thread);
764
765 conn_cleanup(c);
766
767 /*
768 * The contract with the object cache is that we should return the
769 * object in a constructed state. Reset the buffers to the default
770 * size
771 */
772 conn_reset_buffersize(c);
773 assert(c->thread == NULL);
774 cache_free(conn_cache, c);
775 }
776
777 /*
778 * Shrinks a connection's buffers if they're too big. This prevents
779 * periodic large "get" requests from permanently chewing lots of server
780 * memory.
781 *
782 * This should only be called in between requests since it can wipe output
783 * buffers!
784 */
conn_shrink(conn * c)785 static void conn_shrink(conn *c) {
786 assert(c != NULL);
787
788 if (IS_UDP(c->transport))
789 return;
790
791 if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) {
792 char *newbuf;
793
794 if (c->rcurr != c->rbuf)
795 memmove(c->rbuf, c->rcurr, (size_t)c->rbytes);
796
797 newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE);
798
799 if (newbuf) {
800 c->rbuf = newbuf;
801 c->rsize = DATA_BUFFER_SIZE;
802 }
803 /* TODO check other branch... */
804 c->rcurr = c->rbuf;
805 }
806
807 if (c->isize > ITEM_LIST_HIGHWAT) {
808 item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0]));
809 if (newbuf) {
810 c->ilist = newbuf;
811 c->isize = ITEM_LIST_INITIAL;
812 }
813 /* TODO check error condition? */
814 }
815
816 if (c->msgsize > MSG_LIST_HIGHWAT) {
817 struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0]));
818 if (newbuf) {
819 c->msglist = newbuf;
820 c->msgsize = MSG_LIST_INITIAL;
821 }
822 /* TODO check error condition? */
823 }
824
825 if (c->iovsize > IOV_LIST_HIGHWAT) {
826 struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0]));
827 if (newbuf) {
828 c->iov = newbuf;
829 c->iovsize = IOV_LIST_INITIAL;
830 }
831 /* TODO check return value */
832 }
833 }
834
835 /**
836 * Convert a state name to a human readable form.
837 */
state_text(STATE_FUNC state)838 const char *state_text(STATE_FUNC state) {
839 if (state == conn_listening) {
840 return "conn_listening";
841 } else if (state == conn_new_cmd) {
842 return "conn_new_cmd";
843 } else if (state == conn_waiting) {
844 return "conn_waiting";
845 } else if (state == conn_read) {
846 return "conn_read";
847 } else if (state == conn_parse_cmd) {
848 return "conn_parse_cmd";
849 } else if (state == conn_write) {
850 return "conn_write";
851 } else if (state == conn_nread) {
852 return "conn_nread";
853 } else if (state == conn_swallow) {
854 return "conn_swallow";
855 } else if (state == conn_closing) {
856 return "conn_closing";
857 } else if (state == conn_mwrite) {
858 return "conn_mwrite";
859 } else if (state == conn_ship_log) {
860 return "conn_ship_log";
861 } else if (state == conn_add_tap_client) {
862 return "conn_add_tap_client";
863 } else if (state == conn_setup_tap_stream) {
864 return "conn_setup_tap_stream";
865 } else if (state == conn_pending_close) {
866 return "conn_pending_close";
867 } else if (state == conn_immediate_close) {
868 return "conn_immediate_close";
869 } else {
870 return "Unknown";
871 }
872 }
873
874 /*
875 * Sets a connection's current state in the state machine. Any special
876 * processing that needs to happen on certain state transitions can
877 * happen here.
878 */
conn_set_state(conn * c,STATE_FUNC state)879 void conn_set_state(conn *c, STATE_FUNC state) {
880 assert(c != NULL);
881
882 if (state != c->state) {
883 /*
884 * The connections in the "tap thread" behaves differently than
885 * normal connections because they operate in a full duplex mode.
886 * New messages may appear from both sides, so we can't block on
887 * read from the nework / engine
888 */
889 if (c->thread == tap_thread) {
890 if (state == conn_waiting) {
891 c->which = EV_WRITE;
892 state = conn_ship_log;
893 }
894 }
895
896 if (settings.verbose > 2 || c->state == conn_closing
897 || c->state == conn_add_tap_client) {
898 settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
899 "%d: going from %s to %s\n",
900 c->sfd, state_text(c->state),
901 state_text(state));
902 }
903
904 c->state = state;
905
906 if (state == conn_write || state == conn_mwrite) {
907 MEMCACHED_PROCESS_COMMAND_END(c->sfd, c->wbuf, c->wbytes);
908 }
909 }
910 }
911
912 /*
913 * Ensures that there is room for another struct iovec in a connection's
914 * iov list.
915 *
916 * Returns 0 on success, -1 on out-of-memory.
917 */
ensure_iov_space(conn * c)918 static int ensure_iov_space(conn *c) {
919 assert(c != NULL);
920
921 if (c->iovused >= c->iovsize) {
922 int i, iovnum;
923 struct iovec *new_iov = (struct iovec *)realloc(c->iov,
924 (c->iovsize * 2) * sizeof(struct iovec));
925 if (! new_iov)
926 return -1;
927 c->iov = new_iov;
928 c->iovsize *= 2;
929
930 /* Point all the msghdr structures at the new list. */
931 for (i = 0, iovnum = 0; i < c->msgused; i++) {
932 c->msglist[i].msg_iov = &c->iov[iovnum];
933 iovnum += c->msglist[i].msg_iovlen;
934 }
935 }
936
937 return 0;
938 }
939
940
941 /*
942 * Adds data to the list of pending data that will be written out to a
943 * connection.
944 *
945 * Returns 0 on success, -1 on out-of-memory.
946 */
947
add_iov(conn * c,const void * buf,int len)948 static int add_iov(conn *c, const void *buf, int len) {
949 struct msghdr *m;
950 int leftover;
951 bool limit_to_mtu;
952
953 assert(c != NULL);
954
955 do {
956 m = &c->msglist[c->msgused - 1];
957
958 /*
959 * Limit UDP packets, and the first payloads of TCP replies, to
960 * UDP_MAX_PAYLOAD_SIZE bytes.
961 */
962 limit_to_mtu = IS_UDP(c->transport) || (1 == c->msgused);
963
964 /* We may need to start a new msghdr if this one is full. */
965 if (m->msg_iovlen == IOV_MAX ||
966 (limit_to_mtu && c->msgbytes >= UDP_MAX_PAYLOAD_SIZE)) {
967 add_msghdr(c);
968 m = &c->msglist[c->msgused - 1];
969 }
970
971 if (ensure_iov_space(c) != 0)
972 return -1;
973
974 /* If the fragment is too big to fit in the datagram, split it up */
975 if (limit_to_mtu && len + c->msgbytes > UDP_MAX_PAYLOAD_SIZE) {
976 leftover = len + c->msgbytes - UDP_MAX_PAYLOAD_SIZE;
977 len -= leftover;
978 } else {
979 leftover = 0;
980 }
981
982 m = &c->msglist[c->msgused - 1];
983 m->msg_iov[m->msg_iovlen].iov_base = (void *)buf;
984 m->msg_iov[m->msg_iovlen].iov_len = len;
985
986 c->msgbytes += len;
987 c->iovused++;
988 m->msg_iovlen++;
989
990 buf = ((char *)buf) + len;
991 len = leftover;
992 } while (leftover > 0);
993
994 return 0;
995 }
996
997
998 /*
999 * Constructs a set of UDP headers and attaches them to the outgoing messages.
1000 */
build_udp_headers(conn * c)1001 static int build_udp_headers(conn *c) {
1002 int i;
1003 unsigned char *hdr;
1004
1005 assert(c != NULL);
1006
1007 if (c->msgused > c->hdrsize) {
1008 void *new_hdrbuf;
1009 if (c->hdrbuf)
1010 new_hdrbuf = realloc(c->hdrbuf, c->msgused * 2 * UDP_HEADER_SIZE);
1011 else
1012 new_hdrbuf = malloc(c->msgused * 2 * UDP_HEADER_SIZE);
1013 if (! new_hdrbuf)
1014 return -1;
1015 c->hdrbuf = (unsigned char *)new_hdrbuf;
1016 c->hdrsize = c->msgused * 2;
1017 }
1018
1019 hdr = c->hdrbuf;
1020 for (i = 0; i < c->msgused; i++) {
1021 c->msglist[i].msg_iov[0].iov_base = (void*)hdr;
1022 c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE;
1023 *hdr++ = c->request_id / 256;
1024 *hdr++ = c->request_id % 256;
1025 *hdr++ = i / 256;
1026 *hdr++ = i % 256;
1027 *hdr++ = c->msgused / 256;
1028 *hdr++ = c->msgused % 256;
1029 *hdr++ = 0;
1030 *hdr++ = 0;
1031 assert((void *) hdr == (caddr_t)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE);
1032 }
1033
1034 return 0;
1035 }
1036
1037
out_string(conn * c,const char * str)1038 static void out_string(conn *c, const char *str) {
1039 size_t len;
1040
1041 assert(c != NULL);
1042
1043 if (c->noreply) {
1044 if (settings.verbose > 1) {
1045 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1046 ">%d NOREPLY %s\n", c->sfd, str);
1047 }
1048 c->noreply = false;
1049 if (c->sbytes > 0) {
1050 conn_set_state(c, conn_swallow);
1051 } else {
1052 conn_set_state(c, conn_new_cmd);
1053 }
1054 return;
1055 }
1056
1057 if (settings.verbose > 1) {
1058 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1059 ">%d %s\n", c->sfd, str);
1060 }
1061
1062 /* Nuke a partial output... */
1063 c->msgcurr = 0;
1064 c->msgused = 0;
1065 c->iovused = 0;
1066 add_msghdr(c);
1067
1068 len = strlen(str);
1069 if ((len + 2) > c->wsize) {
1070 /* ought to be always enough. just fail for simplicity */
1071 str = "SERVER_ERROR output line too long";
1072 len = strlen(str);
1073 }
1074
1075 memcpy(c->wbuf, str, len);
1076 memcpy(c->wbuf + len, "\r\n", 2);
1077 c->wbytes = len + 2;
1078 c->wcurr = c->wbuf;
1079
1080 conn_set_state(c, conn_write);
1081
1082 if (c->sbytes > 0) {
1083 c->write_and_go = conn_swallow;
1084 } else {
1085 c->write_and_go = conn_new_cmd;
1086 }
1087
1088 return;
1089 }
1090
1091 /*
1092 * we get here after reading the value in set/add/replace commands. The command
1093 * has been stored in c->cmd, and the item is ready in c->item.
1094 */
complete_update_ascii(conn * c)1095 static void complete_update_ascii(conn *c) {
1096 assert(c != NULL);
1097
1098 item *it = c->item;
1099 item_info info = { .nvalue = 1 };
1100 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1101 settings.engine.v1->release(settings.engine.v0, c, it);
1102 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1103 "%d: Failed to get item info\n",
1104 c->sfd);
1105 out_string(c, "SERVER_ERROR failed to get item details");
1106 return;
1107 }
1108
1109 c->sbytes = 2; // swallow \r\n
1110 ENGINE_ERROR_CODE ret = c->aiostat;
1111 c->aiostat = ENGINE_SUCCESS;
1112 if (ret == ENGINE_SUCCESS) {
1113 ret = settings.engine.v1->store(settings.engine.v0, c, it, &c->cas,
1114 c->store_op, 0);
1115 }
1116
1117 #ifdef ENABLE_DTRACE
1118 switch (c->store_op) {
1119 case OPERATION_ADD:
1120 MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1121 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1122 break;
1123 case OPERATION_REPLACE:
1124 MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1125 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1126 break;
1127 case OPERATION_APPEND:
1128 MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1129 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1130 break;
1131 case OPERATION_PREPEND:
1132 MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1133 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1134 break;
1135 case OPERATION_SET:
1136 MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1137 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1138 break;
1139 case OPERATION_CAS:
1140 MEMCACHED_COMMAND_CAS(c->sfd, info.key, info.nkey, info.nbytes, c->cas);
1141 break;
1142 }
1143 #endif
1144
1145 switch (ret) {
1146 case ENGINE_SUCCESS:
1147 out_string(c, "STORED");
1148 break;
1149 case ENGINE_KEY_EEXISTS:
1150 out_string(c, "EXISTS");
1151 break;
1152 case ENGINE_KEY_ENOENT:
1153 out_string(c, "NOT_FOUND");
1154 break;
1155 case ENGINE_NOT_STORED:
1156 out_string(c, "NOT_STORED");
1157 break;
1158 case ENGINE_DISCONNECT:
1159 c->state = conn_closing;
1160 break;
1161 case ENGINE_ENOTSUP:
1162 out_string(c, "SERVER_ERROR not supported");
1163 break;
1164 case ENGINE_ENOMEM:
1165 out_string(c, "SERVER_ERROR out of memory");
1166 break;
1167 case ENGINE_TMPFAIL:
1168 out_string(c, "SERVER_ERROR temporary failure");
1169 break;
1170 case ENGINE_EINVAL:
1171 out_string(c, "CLIENT_ERROR invalid arguments");
1172 break;
1173 case ENGINE_E2BIG:
1174 out_string(c, "CLIENT_ERROR value too big");
1175 break;
1176 case ENGINE_EACCESS:
1177 out_string(c, "CLIENT_ERROR access control violation");
1178 break;
1179 case ENGINE_NOT_MY_VBUCKET:
1180 out_string(c, "SERVER_ERROR not my vbucket");
1181 break;
1182 case ENGINE_FAILED:
1183 out_string(c, "SERVER_ERROR failure");
1184 break;
1185 case ENGINE_EWOULDBLOCK:
1186 c->ewouldblock = true;
1187 break;
1188 case ENGINE_WANT_MORE:
1189 assert(false);
1190 c->state = conn_closing;
1191 break;
1192
1193 default:
1194 out_string(c, "SERVER_ERROR internal");
1195 }
1196
1197 if (c->store_op == OPERATION_CAS) {
1198 switch (ret) {
1199 case ENGINE_SUCCESS:
1200 SLAB_INCR(c, cas_hits, info.key, info.nkey);
1201 break;
1202 case ENGINE_KEY_EEXISTS:
1203 SLAB_INCR(c, cas_badval, info.key, info.nkey);
1204 break;
1205 case ENGINE_KEY_ENOENT:
1206 STATS_NOKEY(c, cas_misses);
1207 break;
1208 default:
1209 ;
1210 }
1211 } else {
1212 SLAB_INCR(c, cmd_set, info.key, info.nkey);
1213 }
1214
1215 if (!c->ewouldblock) {
1216 /* release the c->item reference */
1217 settings.engine.v1->release(settings.engine.v0, c, c->item);
1218 c->item = 0;
1219 }
1220 }
1221
1222 /**
1223 * get a pointer to the start of the request struct for the current command
1224 */
binary_get_request(conn * c)1225 static void* binary_get_request(conn *c) {
1226 char *ret = c->rcurr;
1227 ret -= (sizeof(c->binary_header) + c->binary_header.request.keylen +
1228 c->binary_header.request.extlen);
1229
1230 assert(ret >= c->rbuf);
1231 return ret;
1232 }
1233
1234 /**
1235 * get a pointer to the key in this request
1236 */
binary_get_key(conn * c)1237 static char* binary_get_key(conn *c) {
1238 return c->rcurr - (c->binary_header.request.keylen);
1239 }
1240
1241 /**
1242 * Insert a key into a buffer, but replace all non-printable characters
1243 * with a '.'.
1244 *
1245 * @param dest where to store the output
1246 * @param destsz size of destination buffer
1247 * @param prefix string to insert before the data
1248 * @param client the client we are serving
1249 * @param from_client set to true if this data is from the client
1250 * @param key the key to add to the buffer
1251 * @param nkey the number of bytes in the key
1252 * @return number of bytes in dest if success, -1 otherwise
1253 */
key_to_printable_buffer(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * key,size_t nkey)1254 static ssize_t key_to_printable_buffer(char *dest, size_t destsz,
1255 int client, bool from_client,
1256 const char *prefix,
1257 const char *key,
1258 size_t nkey)
1259 {
1260 ssize_t nw = snprintf(dest, destsz, "%c%d %s ", from_client ? '>' : '<',
1261 client, prefix);
1262 if (nw == -1) {
1263 return -1;
1264 }
1265
1266 char *ptr = dest + nw;
1267 destsz -= nw;
1268 if (nkey > destsz) {
1269 nkey = destsz;
1270 }
1271
1272 for (ssize_t ii = 0; ii < nkey; ++ii, ++key, ++ptr) {
1273 if (isgraph(*key)) {
1274 *ptr = *key;
1275 } else {
1276 *ptr = '.';
1277 }
1278 }
1279
1280 *ptr = '\0';
1281 return ptr - dest;
1282 }
1283
1284 /**
1285 * Convert a byte array to a text string
1286 *
1287 * @param dest where to store the output
1288 * @param destsz size of destination buffer
1289 * @param prefix string to insert before the data
1290 * @param client the client we are serving
1291 * @param from_client set to true if this data is from the client
1292 * @param data the data to add to the buffer
1293 * @param size the number of bytes in data to print
1294 * @return number of bytes in dest if success, -1 otherwise
1295 */
bytes_to_output_string(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * data,size_t size)1296 static ssize_t bytes_to_output_string(char *dest, size_t destsz,
1297 int client, bool from_client,
1298 const char *prefix,
1299 const char *data,
1300 size_t size)
1301 {
1302 ssize_t nw = snprintf(dest, destsz, "%c%d %s", from_client ? '>' : '<',
1303 client, prefix);
1304 if (nw == -1) {
1305 return -1;
1306 }
1307 ssize_t offset = nw;
1308
1309 for (ssize_t ii = 0; ii < size; ++ii) {
1310 if (ii % 4 == 0) {
1311 if ((nw = snprintf(dest + offset, destsz - offset, "\n%c%d ",
1312 from_client ? '>' : '<', client)) == -1) {
1313 return -1;
1314 }
1315 offset += nw;
1316 }
1317 if ((nw = snprintf(dest + offset, destsz - offset,
1318 " 0x%02x", (unsigned char)data[ii])) == -1) {
1319 return -1;
1320 }
1321 offset += nw;
1322 }
1323
1324 if ((nw = snprintf(dest + offset, destsz - offset, "\n")) == -1) {
1325 return -1;
1326 }
1327
1328 return offset + nw;
1329 }
1330
add_bin_header(conn * c,uint16_t err,uint8_t hdr_len,uint16_t key_len,uint32_t body_len)1331 static void add_bin_header(conn *c, uint16_t err, uint8_t hdr_len, uint16_t key_len, uint32_t body_len) {
1332 protocol_binary_response_header* header;
1333
1334 assert(c);
1335
1336 c->msgcurr = 0;
1337 c->msgused = 0;
1338 c->iovused = 0;
1339 if (add_msghdr(c) != 0) {
1340 /* XXX: out_string is inappropriate here */
1341 out_string(c, "SERVER_ERROR out of memory");
1342 return;
1343 }
1344
1345 header = (protocol_binary_response_header *)c->wbuf;
1346
1347 header->response.magic = (uint8_t)PROTOCOL_BINARY_RES;
1348 header->response.opcode = c->binary_header.request.opcode;
1349 header->response.keylen = (uint16_t)htons(key_len);
1350
1351 header->response.extlen = (uint8_t)hdr_len;
1352 header->response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES;
1353 header->response.status = (uint16_t)htons(err);
1354
1355 header->response.bodylen = htonl(body_len);
1356 header->response.opaque = c->opaque;
1357 header->response.cas = htonll(c->cas);
1358
1359 if (settings.verbose > 1) {
1360 char buffer[1024];
1361 if (bytes_to_output_string(buffer, sizeof(buffer), c->sfd, false,
1362 "Writing bin response:",
1363 (const char*)header->bytes,
1364 sizeof(header->bytes)) != -1) {
1365 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1366 "%s", buffer);
1367 }
1368 }
1369
1370 add_iov(c, c->wbuf, sizeof(header->response));
1371 }
1372
1373 /**
1374 * Convert an error code generated from the storage engine to the corresponding
1375 * error code used by the protocol layer.
1376 * @param e the error code as used in the engine
1377 * @return the error code as used by the protocol layer
1378 */
engine_error_2_protocol_error(ENGINE_ERROR_CODE e)1379 static protocol_binary_response_status engine_error_2_protocol_error(ENGINE_ERROR_CODE e) {
1380 protocol_binary_response_status ret;
1381
1382 switch (e) {
1383 case ENGINE_SUCCESS:
1384 return PROTOCOL_BINARY_RESPONSE_SUCCESS;
1385 case ENGINE_KEY_ENOENT:
1386 return PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1387 case ENGINE_KEY_EEXISTS:
1388 return PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1389 case ENGINE_ENOMEM:
1390 return PROTOCOL_BINARY_RESPONSE_ENOMEM;
1391 case ENGINE_TMPFAIL:
1392 return PROTOCOL_BINARY_RESPONSE_ETMPFAIL;
1393 case ENGINE_NOT_STORED:
1394 return PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1395 case ENGINE_EINVAL:
1396 return PROTOCOL_BINARY_RESPONSE_EINVAL;
1397 case ENGINE_ENOTSUP:
1398 return PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED;
1399 case ENGINE_E2BIG:
1400 return PROTOCOL_BINARY_RESPONSE_E2BIG;
1401 case ENGINE_NOT_MY_VBUCKET:
1402 return PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET;
1403 default:
1404 ret = PROTOCOL_BINARY_RESPONSE_EINTERNAL;
1405 }
1406
1407 return ret;
1408 }
1409
write_bin_packet(conn * c,protocol_binary_response_status err,int swallow)1410 static void write_bin_packet(conn *c, protocol_binary_response_status err, int swallow) {
1411 ssize_t len;
1412 char buffer[1024] = { [sizeof(buffer) - 1] = '\0' };
1413
1414 switch (err) {
1415 case PROTOCOL_BINARY_RESPONSE_SUCCESS:
1416 len = 0;
1417 break;
1418 case PROTOCOL_BINARY_RESPONSE_ENOMEM:
1419 len = snprintf(buffer, sizeof(buffer), "Out of memory");
1420 break;
1421 case PROTOCOL_BINARY_RESPONSE_ETMPFAIL:
1422 len = snprintf(buffer, sizeof(buffer), "Temporary failure");
1423 break;
1424 case PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND:
1425 len = snprintf(buffer, sizeof(buffer), "Unknown command");
1426 break;
1427 case PROTOCOL_BINARY_RESPONSE_KEY_ENOENT:
1428 len = snprintf(buffer, sizeof(buffer), "Not found");
1429 break;
1430 case PROTOCOL_BINARY_RESPONSE_EINVAL:
1431 len = snprintf(buffer, sizeof(buffer), "Invalid arguments");
1432 break;
1433 case PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS:
1434 len = snprintf(buffer, sizeof(buffer), "Data exists for key");
1435 break;
1436 case PROTOCOL_BINARY_RESPONSE_E2BIG:
1437 len = snprintf(buffer, sizeof(buffer), "Too large");
1438 break;
1439 case PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL:
1440 len = snprintf(buffer, sizeof(buffer),
1441 "Non-numeric server-side value for incr or decr");
1442 break;
1443 case PROTOCOL_BINARY_RESPONSE_NOT_STORED:
1444 len = snprintf(buffer, sizeof(buffer), "Not stored");
1445 break;
1446 case PROTOCOL_BINARY_RESPONSE_AUTH_ERROR:
1447 len = snprintf(buffer, sizeof(buffer), "Auth failure");
1448 break;
1449 case PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED:
1450 len = snprintf(buffer, sizeof(buffer), "Not supported");
1451 break;
1452 case PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET:
1453 len = snprintf(buffer, sizeof(buffer),
1454 "I'm not responsible for this vbucket");
1455 break;
1456
1457 default:
1458 len = snprintf(buffer, sizeof(buffer), "UNHANDLED ERROR (%d)", err);
1459 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1460 ">%d UNHANDLED ERROR: %d\n", c->sfd, err);
1461 }
1462
1463 /* Allow the engine to pass extra error information */
1464 if (settings.engine.v1->errinfo != NULL) {
1465 size_t elen = settings.engine.v1->errinfo(settings.engine.v0, c, buffer + len + 2,
1466 sizeof(buffer) - len - 3);
1467
1468 if (elen > 0) {
1469 memcpy(buffer + len, ": ", 2);
1470 len += elen + 2;
1471 }
1472 }
1473
1474 if (err != PROTOCOL_BINARY_RESPONSE_SUCCESS && settings.verbose > 1) {
1475 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1476 ">%d Writing an error: %s\n", c->sfd,
1477 buffer);
1478 }
1479
1480 add_bin_header(c, err, 0, 0, len);
1481 if (len > 0) {
1482 add_iov(c, buffer, len);
1483 }
1484 conn_set_state(c, conn_mwrite);
1485 if (swallow > 0) {
1486 c->sbytes = swallow;
1487 c->write_and_go = conn_swallow;
1488 } else {
1489 c->write_and_go = conn_new_cmd;
1490 }
1491 }
1492
1493 /* Form and send a response to a command over the binary protocol */
write_bin_response(conn * c,void * d,int hlen,int keylen,int dlen)1494 static void write_bin_response(conn *c, void *d, int hlen, int keylen, int dlen) {
1495 if (!c->noreply || c->cmd == PROTOCOL_BINARY_CMD_GET ||
1496 c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1497 add_bin_header(c, 0, hlen, keylen, dlen);
1498 if(dlen > 0) {
1499 add_iov(c, d, dlen);
1500 }
1501 conn_set_state(c, conn_mwrite);
1502 c->write_and_go = conn_new_cmd;
1503 } else {
1504 conn_set_state(c, conn_new_cmd);
1505 }
1506 }
1507
1508
complete_incr_bin(conn * c)1509 static void complete_incr_bin(conn *c) {
1510 protocol_binary_response_incr* rsp = (protocol_binary_response_incr*)c->wbuf;
1511 protocol_binary_request_incr* req = binary_get_request(c);
1512
1513 assert(c != NULL);
1514 assert(c->wsize >= sizeof(*rsp));
1515
1516 /* fix byteorder in the request */
1517 uint64_t delta = ntohll(req->message.body.delta);
1518 uint64_t initial = ntohll(req->message.body.initial);
1519 rel_time_t expiration = ntohl(req->message.body.expiration);
1520 char *key = binary_get_key(c);
1521 size_t nkey = c->binary_header.request.keylen;
1522 bool incr = (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT ||
1523 c->cmd == PROTOCOL_BINARY_CMD_INCREMENTQ);
1524
1525 if (settings.verbose > 1) {
1526 char buffer[1024];
1527 ssize_t nw;
1528 nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1529 incr ? "INCR" : "DECR", key, nkey);
1530 if (nw != -1) {
1531 if (snprintf(buffer + nw, sizeof(buffer) - nw,
1532 " %" PRIu64 ", %" PRIu64 ", %" PRIu64 "\n",
1533 delta, initial, (uint64_t)expiration) != -1) {
1534 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
1535 buffer);
1536 }
1537 }
1538 }
1539
1540 ENGINE_ERROR_CODE ret = c->aiostat;
1541 c->aiostat = ENGINE_SUCCESS;
1542 if (ret == ENGINE_SUCCESS) {
1543 ret = settings.engine.v1->arithmetic(settings.engine.v0,
1544 c, key, nkey, incr,
1545 req->message.body.expiration != 0xffffffff,
1546 delta, initial, expiration,
1547 &c->cas,
1548 &rsp->message.body.value,
1549 c->binary_header.request.vbucket);
1550 }
1551
1552 switch (ret) {
1553 case ENGINE_SUCCESS:
1554 rsp->message.body.value = htonll(rsp->message.body.value);
1555 write_bin_response(c, &rsp->message.body, 0, 0,
1556 sizeof (rsp->message.body.value));
1557 if (incr) {
1558 STATS_INCR(c, incr_hits, key, nkey);
1559 } else {
1560 STATS_INCR(c, decr_hits, key, nkey);
1561 }
1562 break;
1563 case ENGINE_KEY_EEXISTS:
1564 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1565 break;
1566 case ENGINE_KEY_ENOENT:
1567 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1568 if (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT) {
1569 STATS_INCR(c, incr_misses, key, nkey);
1570 } else {
1571 STATS_INCR(c, decr_misses, key, nkey);
1572 }
1573 break;
1574 case ENGINE_ENOMEM:
1575 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1576 break;
1577 case ENGINE_TMPFAIL:
1578 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1579 break;
1580 case ENGINE_EINVAL:
1581 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL, 0);
1582 break;
1583 case ENGINE_NOT_STORED:
1584 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_STORED, 0);
1585 break;
1586 case ENGINE_DISCONNECT:
1587 c->state = conn_closing;
1588 break;
1589 case ENGINE_ENOTSUP:
1590 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1591 break;
1592 case ENGINE_NOT_MY_VBUCKET:
1593 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1594 break;
1595 case ENGINE_EWOULDBLOCK:
1596 c->ewouldblock = true;
1597 break;
1598 default:
1599 abort();
1600 }
1601 }
1602
complete_update_bin(conn * c)1603 static void complete_update_bin(conn *c) {
1604 protocol_binary_response_status eno = PROTOCOL_BINARY_RESPONSE_EINVAL;
1605 assert(c != NULL);
1606
1607 item *it = c->item;
1608 item_info info = { .nvalue = 1 };
1609 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1610 settings.engine.v1->release(settings.engine.v0, c, it);
1611 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1612 "%d: Failed to get item info\n",
1613 c->sfd);
1614 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1615 return;
1616 }
1617
1618 ENGINE_ERROR_CODE ret = c->aiostat;
1619 c->aiostat = ENGINE_SUCCESS;
1620 if (ret == ENGINE_SUCCESS) {
1621 ret = settings.engine.v1->store(settings.engine.v0, c,
1622 it, &c->cas, c->store_op,
1623 c->binary_header.request.vbucket);
1624 }
1625
1626 #ifdef ENABLE_DTRACE
1627 switch (c->cmd) {
1628 case OPERATION_ADD:
1629 MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1630 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1631 break;
1632 case OPERATION_REPLACE:
1633 MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1634 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1635 break;
1636 case OPERATION_APPEND:
1637 MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1638 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1639 break;
1640 case OPERATION_PREPEND:
1641 MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1642 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1643 break;
1644 case OPERATION_SET:
1645 MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1646 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1647 break;
1648 }
1649 #endif
1650
1651 switch (ret) {
1652 case ENGINE_SUCCESS:
1653 /* Stored */
1654 write_bin_response(c, NULL, 0, 0, 0);
1655 break;
1656 case ENGINE_KEY_EEXISTS:
1657 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1658 break;
1659 case ENGINE_KEY_ENOENT:
1660 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1661 break;
1662 case ENGINE_ENOMEM:
1663 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1664 break;
1665 case ENGINE_TMPFAIL:
1666 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1667 break;
1668 case ENGINE_EWOULDBLOCK:
1669 c->ewouldblock = true;
1670 break;
1671 case ENGINE_DISCONNECT:
1672 c->state = conn_closing;
1673 break;
1674 case ENGINE_ENOTSUP:
1675 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1676 break;
1677 case ENGINE_NOT_MY_VBUCKET:
1678 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1679 break;
1680 default:
1681 if (c->store_op == OPERATION_ADD) {
1682 eno = PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1683 } else if(c->store_op == OPERATION_REPLACE) {
1684 eno = PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1685 } else {
1686 eno = PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1687 }
1688 write_bin_packet(c, eno, 0);
1689 }
1690
1691 if (c->store_op == OPERATION_CAS) {
1692 switch (ret) {
1693 case ENGINE_SUCCESS:
1694 SLAB_INCR(c, cas_hits, info.key, info.nkey);
1695 break;
1696 case ENGINE_KEY_EEXISTS:
1697 SLAB_INCR(c, cas_badval, info.key, info.nkey);
1698 break;
1699 case ENGINE_KEY_ENOENT:
1700 STATS_NOKEY(c, cas_misses);
1701 break;
1702 default:
1703 ;
1704 }
1705 } else {
1706 SLAB_INCR(c, cmd_set, info.key, info.nkey);
1707 }
1708
1709 if (!c->ewouldblock) {
1710 /* release the c->item reference */
1711 settings.engine.v1->release(settings.engine.v0, c, c->item);
1712 c->item = 0;
1713 }
1714 }
1715
process_bin_get(conn * c)1716 static void process_bin_get(conn *c) {
1717 item *it;
1718
1719 protocol_binary_response_get* rsp = (protocol_binary_response_get*)c->wbuf;
1720 char* key = binary_get_key(c);
1721 size_t nkey = c->binary_header.request.keylen;
1722
1723 if (settings.verbose > 1) {
1724 char buffer[1024];
1725 if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1726 "GET", key, nkey) != -1) {
1727 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1728 buffer);
1729 }
1730 }
1731
1732 ENGINE_ERROR_CODE ret = c->aiostat;
1733 c->aiostat = ENGINE_SUCCESS;
1734 if (ret == ENGINE_SUCCESS) {
1735 ret = settings.engine.v1->get(settings.engine.v0, c, &it, key, nkey,
1736 c->binary_header.request.vbucket);
1737 }
1738
1739 uint16_t keylen;
1740 uint32_t bodylen;
1741 item_info info = { .nvalue = 1 };
1742
1743 switch (ret) {
1744 case ENGINE_SUCCESS:
1745 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1746 settings.engine.v1->release(settings.engine.v0, c, it);
1747 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1748 "%d: Failed to get item info\n",
1749 c->sfd);
1750 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1751 break;
1752 }
1753
1754 keylen = 0;
1755 bodylen = sizeof(rsp->message.body) + info.nbytes;
1756
1757 STATS_HIT(c, get, key, nkey);
1758
1759 if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1760 bodylen += nkey;
1761 keylen = nkey;
1762 }
1763 add_bin_header(c, 0, sizeof(rsp->message.body), keylen, bodylen);
1764 rsp->message.header.response.cas = htonll(info.cas);
1765
1766 // add the flags
1767 rsp->message.body.flags = info.flags;
1768 add_iov(c, &rsp->message.body, sizeof(rsp->message.body));
1769
1770 if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1771 add_iov(c, info.key, nkey);
1772 }
1773
1774 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
1775 conn_set_state(c, conn_mwrite);
1776 /* Remember this item so we can garbage collect it later */
1777 c->item = it;
1778 break;
1779 case ENGINE_KEY_ENOENT:
1780 STATS_MISS(c, get, key, nkey);
1781
1782 MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
1783
1784 if (c->noreply) {
1785 conn_set_state(c, conn_new_cmd);
1786 } else {
1787 if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1788 char *ofs = c->wbuf + sizeof(protocol_binary_response_header);
1789 add_bin_header(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT,
1790 0, nkey, nkey);
1791 memcpy(ofs, key, nkey);
1792 add_iov(c, ofs, nkey);
1793 conn_set_state(c, conn_mwrite);
1794 } else {
1795 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1796 }
1797 }
1798 break;
1799 case ENGINE_EWOULDBLOCK:
1800 c->ewouldblock = true;
1801 break;
1802 case ENGINE_DISCONNECT:
1803 c->state = conn_closing;
1804 break;
1805 case ENGINE_TMPFAIL:
1806 break;
1807 case ENGINE_ENOTSUP:
1808 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1809 break;
1810 case ENGINE_NOT_MY_VBUCKET:
1811 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1812 break;
1813 default:
1814 /* @todo add proper error handling! */
1815 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1816 "Unknown error code: %d\n", ret);
1817 abort();
1818 }
1819
1820 if (settings.detail_enabled && ret != ENGINE_EWOULDBLOCK) {
1821 stats_prefix_record_get(key, nkey, ret == ENGINE_SUCCESS);
1822 }
1823 }
1824
append_bin_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1825 static void append_bin_stats(const char *key, const uint16_t klen,
1826 const char *val, const uint32_t vlen,
1827 conn *c) {
1828 char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1829 uint32_t bodylen = klen + vlen;
1830 protocol_binary_response_header header = {
1831 .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
1832 .response.opcode = PROTOCOL_BINARY_CMD_STAT,
1833 .response.keylen = (uint16_t)htons(klen),
1834 .response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES,
1835 .response.bodylen = htonl(bodylen),
1836 .response.opaque = c->opaque
1837 };
1838
1839 memcpy(buf, header.bytes, sizeof(header.response));
1840 buf += sizeof(header.response);
1841
1842 if (klen > 0) {
1843 memcpy(buf, key, klen);
1844 buf += klen;
1845
1846 if (vlen > 0) {
1847 memcpy(buf, val, vlen);
1848 }
1849 }
1850
1851 c->dynamic_buffer.offset += sizeof(header.response) + bodylen;
1852 }
1853
1854 /**
1855 * Append a key-value pair to the stats output buffer. This function assumes
1856 * that the output buffer is big enough (it will be if you call it through
1857 * append_stats)
1858 */
append_ascii_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1859 static void append_ascii_stats(const char *key, const uint16_t klen,
1860 const char *val, const uint32_t vlen,
1861 conn *c) {
1862 char *pos = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1863 uint32_t nbytes = 5; /* "END\r\n" or "STAT " */
1864
1865 if (klen == 0 && vlen == 0) {
1866 memcpy(pos, "END\r\n", 5);
1867 } else {
1868 memcpy(pos, "STAT ", 5);
1869 memcpy(pos + nbytes, key, klen);
1870 nbytes += klen;
1871 if (vlen != 0) {
1872 pos[nbytes] = ' ';
1873 ++nbytes;
1874 memcpy(pos + nbytes, val, vlen);
1875 nbytes += vlen;
1876 }
1877 memcpy(pos + nbytes, "\r\n", 2);
1878 nbytes += 2;
1879 }
1880
1881 c->dynamic_buffer.offset += nbytes;
1882 }
1883
grow_dynamic_buffer(conn * c,size_t needed)1884 static bool grow_dynamic_buffer(conn *c, size_t needed) {
1885 size_t nsize = c->dynamic_buffer.size;
1886 size_t available = nsize - c->dynamic_buffer.offset;
1887 bool rv = true;
1888
1889 /* Special case: No buffer -- need to allocate fresh */
1890 if (c->dynamic_buffer.buffer == NULL) {
1891 nsize = 1024;
1892 available = c->dynamic_buffer.size = c->dynamic_buffer.offset = 0;
1893 }
1894
1895 while (needed > available) {
1896 assert(nsize > 0);
1897 nsize = nsize << 1;
1898 available = nsize - c->dynamic_buffer.offset;
1899 }
1900
1901 if (nsize != c->dynamic_buffer.size) {
1902 char *ptr = realloc(c->dynamic_buffer.buffer, nsize);
1903 if (ptr) {
1904 c->dynamic_buffer.buffer = ptr;
1905 c->dynamic_buffer.size = nsize;
1906 } else {
1907 rv = false;
1908 }
1909 }
1910
1911 return rv;
1912 }
1913
append_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,const void * cookie)1914 static void append_stats(const char *key, const uint16_t klen,
1915 const char *val, const uint32_t vlen,
1916 const void *cookie)
1917 {
1918 /* value without a key is invalid */
1919 if (klen == 0 && vlen > 0) {
1920 return ;
1921 }
1922
1923 conn *c = (conn*)cookie;
1924
1925 if (c->protocol == binary_prot) {
1926 size_t needed = vlen + klen + sizeof(protocol_binary_response_header);
1927 if (!grow_dynamic_buffer(c, needed)) {
1928 return ;
1929 }
1930 append_bin_stats(key, klen, val, vlen, c);
1931 } else {
1932 size_t needed = vlen + klen + 10; // 10 == "STAT = \r\n"
1933 if (!grow_dynamic_buffer(c, needed)) {
1934 return ;
1935 }
1936 append_ascii_stats(key, klen, val, vlen, c);
1937 }
1938
1939 assert(c->dynamic_buffer.offset <= c->dynamic_buffer.size);
1940 }
1941
process_bin_stat(conn * c)1942 static void process_bin_stat(conn *c) {
1943 char *subcommand = binary_get_key(c);
1944 size_t nkey = c->binary_header.request.keylen;
1945
1946 if (settings.verbose > 1) {
1947 char buffer[1024];
1948 if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1949 "STATS", subcommand, nkey) != -1) {
1950 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1951 buffer);
1952 }
1953 }
1954
1955 ENGINE_ERROR_CODE ret = c->aiostat;
1956 c->aiostat = ENGINE_SUCCESS;
1957 c->ewouldblock = false;
1958
1959 if (ret == ENGINE_SUCCESS) {
1960 if (nkey == 0) {
1961 /* request all statistics */
1962 ret = settings.engine.v1->get_stats(settings.engine.v0, c, NULL, 0, append_stats);
1963 if (ret == ENGINE_SUCCESS) {
1964 server_stats(&append_stats, c, false);
1965 }
1966 } else if (strncmp(subcommand, "reset", 5) == 0) {
1967 stats_reset(c);
1968 settings.engine.v1->reset_stats(settings.engine.v0, c);
1969 } else if (strncmp(subcommand, "settings", 8) == 0) {
1970 process_stat_settings(&append_stats, c);
1971 } else if (strncmp(subcommand, "detail", 6) == 0) {
1972 char *subcmd_pos = subcommand + 6;
1973 if (settings.allow_detailed) {
1974 if (strncmp(subcmd_pos, " dump", 5) == 0) {
1975 int len;
1976 char *dump_buf = stats_prefix_dump(&len);
1977 if (dump_buf == NULL || len <= 0) {
1978 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1979 return ;
1980 } else {
1981 append_stats("detailed", strlen("detailed"), dump_buf, len, c);
1982 free(dump_buf);
1983 }
1984 } else if (strncmp(subcmd_pos, " on", 3) == 0) {
1985 settings.detail_enabled = 1;
1986 } else if (strncmp(subcmd_pos, " off", 4) == 0) {
1987 settings.detail_enabled = 0;
1988 } else {
1989 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1990 return;
1991 }
1992 } else {
1993 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1994 return;
1995 }
1996 } else if (strncmp(subcommand, "aggregate", 9) == 0) {
1997 server_stats(&append_stats, c, true);
1998 } else if (strncmp(subcommand, "topkeys", 7) == 0) {
1999 topkeys_t *tk = get_independent_stats(c)->topkeys;
2000 if (tk != NULL) {
2001 topkeys_stats(tk, c, current_time, append_stats);
2002 } else {
2003 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2004 return;
2005 }
2006 } else {
2007 ret = settings.engine.v1->get_stats(settings.engine.v0, c,
2008 subcommand, nkey,
2009 append_stats);
2010 }
2011 }
2012
2013 switch (ret) {
2014 case ENGINE_SUCCESS:
2015 append_stats(NULL, 0, NULL, 0, c);
2016 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2017 c->dynamic_buffer.buffer = NULL;
2018 break;
2019 case ENGINE_ENOMEM:
2020 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
2021 break;
2022 case ENGINE_TMPFAIL:
2023 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
2024 break;
2025 case ENGINE_KEY_ENOENT:
2026 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2027 break;
2028 case ENGINE_DISCONNECT:
2029 c->state = conn_closing;
2030 break;
2031 case ENGINE_ENOTSUP:
2032 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2033 break;
2034 case ENGINE_EWOULDBLOCK:
2035 c->ewouldblock = true;
2036 break;
2037 default:
2038 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2039 }
2040 }
2041
bin_read_chunk(conn * c,enum bin_substates next_substate,uint32_t chunk)2042 static void bin_read_chunk(conn *c, enum bin_substates next_substate, uint32_t chunk) {
2043 assert(c);
2044 c->substate = next_substate;
2045 c->rlbytes = chunk;
2046
2047 /* Ok... do we have room for everything in our buffer? */
2048 ptrdiff_t offset = c->rcurr + sizeof(protocol_binary_request_header) - c->rbuf;
2049 if (c->rlbytes > c->rsize - offset) {
2050 size_t nsize = c->rsize;
2051 size_t size = c->rlbytes + sizeof(protocol_binary_request_header);
2052
2053 while (size > nsize) {
2054 nsize *= 2;
2055 }
2056
2057 if (nsize != c->rsize) {
2058 if (settings.verbose > 1) {
2059 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2060 "%d: Need to grow buffer from %lu to %lu\n",
2061 c->sfd, (unsigned long)c->rsize, (unsigned long)nsize);
2062 }
2063 char *newm = realloc(c->rbuf, nsize);
2064 if (newm == NULL) {
2065 if (settings.verbose) {
2066 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2067 "%d: Failed to grow buffer.. closing connection\n",
2068 c->sfd);
2069 }
2070 conn_set_state(c, conn_closing);
2071 return;
2072 }
2073
2074 c->rbuf= newm;
2075 /* rcurr should point to the same offset in the packet */
2076 c->rcurr = c->rbuf + offset - sizeof(protocol_binary_request_header);
2077 c->rsize = nsize;
2078 }
2079 if (c->rbuf != c->rcurr) {
2080 memmove(c->rbuf, c->rcurr, c->rbytes);
2081 c->rcurr = c->rbuf;
2082 if (settings.verbose > 1) {
2083 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2084 "%d: Repack input buffer\n",
2085 c->sfd);
2086 }
2087 }
2088 }
2089
2090 /* preserve the header in the buffer.. */
2091 c->ritem = c->rcurr + sizeof(protocol_binary_request_header);
2092 conn_set_state(c, conn_nread);
2093 }
2094
bin_read_key(conn * c,enum bin_substates next_substate,int extra)2095 static void bin_read_key(conn *c, enum bin_substates next_substate, int extra) {
2096 bin_read_chunk(c, next_substate, c->keylen + extra);
2097 }
2098
2099
2100 /* Just write an error message and disconnect the client */
handle_binary_protocol_error(conn * c)2101 static void handle_binary_protocol_error(conn *c) {
2102 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2103 if (settings.verbose) {
2104 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2105 "%d: Protocol error (opcode %02x), close connection\n",
2106 c->sfd, c->binary_header.request.opcode);
2107 }
2108 c->write_and_go = conn_closing;
2109 }
2110
init_sasl_conn(conn * c)2111 static void init_sasl_conn(conn *c) {
2112 assert(c);
2113 if (!c->sasl_conn) {
2114 int result=sasl_server_new("memcached",
2115 NULL, NULL, NULL, NULL,
2116 NULL, 0, &c->sasl_conn);
2117 if (result != SASL_OK) {
2118 if (settings.verbose) {
2119 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2120 "%d: Failed to initialize SASL conn.\n",
2121 c->sfd);
2122 }
2123 c->sasl_conn = NULL;
2124 }
2125 }
2126 }
2127
get_auth_data(const void * cookie,auth_data_t * data)2128 static void get_auth_data(const void *cookie, auth_data_t *data) {
2129 conn *c = (conn*)cookie;
2130 if (c->sasl_conn) {
2131 sasl_getprop(c->sasl_conn, SASL_USERNAME, (void*)&data->username);
2132 #ifdef ENABLE_ISASL
2133 sasl_getprop(c->sasl_conn, ISASL_CONFIG, (void*)&data->config);
2134 #endif
2135 }
2136 }
2137
2138 #ifdef SASL_ENABLED
bin_list_sasl_mechs(conn * c)2139 static void bin_list_sasl_mechs(conn *c) {
2140 init_sasl_conn(c);
2141 const char *result_string = NULL;
2142 unsigned int string_length = 0;
2143 int result=sasl_listmech(c->sasl_conn, NULL,
2144 "", /* What to prepend the string with */
2145 " ", /* What to separate mechanisms with */
2146 "", /* What to append to the string */
2147 &result_string, &string_length,
2148 NULL);
2149 if (result != SASL_OK) {
2150 /* Perhaps there's a better error for this... */
2151 if (settings.verbose) {
2152 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2153 "%d: Failed to list SASL mechanisms.\n",
2154 c->sfd);
2155 }
2156 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2157 return;
2158 }
2159 write_bin_response(c, (char*)result_string, 0, 0, string_length);
2160 }
2161 #endif
2162
2163 struct sasl_tmp {
2164 int ksize;
2165 int vsize;
2166 char data[]; /* data + ksize == value */
2167 };
2168
process_bin_sasl_auth(conn * c)2169 static void process_bin_sasl_auth(conn *c) {
2170 assert(c->binary_header.request.extlen == 0);
2171
2172 int nkey = c->binary_header.request.keylen;
2173 int vlen = c->binary_header.request.bodylen - nkey;
2174
2175 assert(vlen >= 0);
2176
2177 if (nkey > MAX_SASL_MECH_LEN) {
2178 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, vlen);
2179 c->write_and_go = conn_swallow;
2180 return;
2181 }
2182
2183 char *key = binary_get_key(c);
2184 assert(key);
2185
2186 size_t buffer_size = sizeof(struct sasl_tmp) + nkey + vlen + 2;
2187 struct sasl_tmp *data = calloc(sizeof(struct sasl_tmp) + buffer_size, 1);
2188 if (!data) {
2189 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
2190 c->write_and_go = conn_swallow;
2191 return;
2192 }
2193
2194 data->ksize = nkey;
2195 data->vsize = vlen;
2196 memcpy(data->data, key, nkey);
2197
2198 c->item = data;
2199 c->ritem = data->data + nkey;
2200 c->rlbytes = vlen;
2201 conn_set_state(c, conn_nread);
2202 c->substate = bin_reading_sasl_auth_data;
2203 }
2204
process_bin_complete_sasl_auth(conn * c)2205 static void process_bin_complete_sasl_auth(conn *c) {
2206 const char *out = NULL;
2207 unsigned int outlen = 0;
2208
2209 assert(c->item);
2210 init_sasl_conn(c);
2211
2212 int nkey = c->binary_header.request.keylen;
2213 int vlen = c->binary_header.request.bodylen - nkey;
2214
2215 struct sasl_tmp *stmp = c->item;
2216 char mech[nkey+1];
2217 memcpy(mech, stmp->data, nkey);
2218 mech[nkey] = 0x00;
2219
2220 if (settings.verbose) {
2221 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2222 "%d: mech: ``%s'' with %d bytes of data\n", c->sfd, mech, vlen);
2223 }
2224
2225 const char *challenge = vlen == 0 ? NULL : (stmp->data + nkey);
2226
2227 int result=-1;
2228
2229 switch (c->cmd) {
2230 case PROTOCOL_BINARY_CMD_SASL_AUTH:
2231 result = sasl_server_start(c->sasl_conn, mech,
2232 challenge, vlen,
2233 &out, &outlen);
2234 break;
2235 case PROTOCOL_BINARY_CMD_SASL_STEP:
2236 result = sasl_server_step(c->sasl_conn,
2237 challenge, vlen,
2238 &out, &outlen);
2239 break;
2240 default:
2241 assert(false); /* CMD should be one of the above */
2242 /* This code is pretty much impossible, but makes the compiler
2243 happier */
2244 if (settings.verbose) {
2245 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2246 "%d: Unhandled command %d with challenge %s\n",
2247 c->sfd, c->cmd, challenge);
2248 }
2249 break;
2250 }
2251
2252 free(c->item);
2253 c->item = NULL;
2254 c->ritem = NULL;
2255
2256 if (settings.verbose) {
2257 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2258 "%d: sasl result code: %d\n",
2259 c->sfd, result);
2260 }
2261
2262 switch(result) {
2263 case SASL_OK:
2264 write_bin_response(c, (void*)"Authenticated", 0, 0, strlen("Authenticated"));
2265 auth_data_t data;
2266 get_auth_data(c, &data);
2267 perform_callbacks(ON_AUTH, (const void*)&data, c);
2268 STATS_NOKEY(c, auth_cmds);
2269 break;
2270 case SASL_CONTINUE:
2271 add_bin_header(c, PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE, 0, 0, outlen);
2272 if(outlen > 0) {
2273 add_iov(c, out, outlen);
2274 }
2275 conn_set_state(c, conn_mwrite);
2276 c->write_and_go = conn_new_cmd;
2277 break;
2278 default:
2279 if (settings.verbose) {
2280 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2281 "%d: Unknown sasl response: %d\n",
2282 c->sfd, result);
2283 }
2284 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2285 STATS_NOKEY2(c, auth_cmds, auth_errors);
2286 }
2287 }
2288
authenticated(conn * c)2289 static bool authenticated(conn *c) {
2290 bool rv = false;
2291
2292 switch (c->cmd) {
2293 case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: /* FALLTHROUGH */
2294 case PROTOCOL_BINARY_CMD_SASL_AUTH: /* FALLTHROUGH */
2295 case PROTOCOL_BINARY_CMD_SASL_STEP: /* FALLTHROUGH */
2296 case PROTOCOL_BINARY_CMD_VERSION: /* FALLTHROUGH */
2297 rv = true;
2298 break;
2299 default:
2300 if (c->sasl_conn) {
2301 const void *uname = NULL;
2302 sasl_getprop(c->sasl_conn, SASL_USERNAME, &uname);
2303 rv = uname != NULL;
2304 }
2305 }
2306
2307 if (settings.verbose > 1) {
2308 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2309 "%d: authenticated() in cmd 0x%02x is %s\n",
2310 c->sfd, c->cmd, rv ? "true" : "false");
2311 }
2312
2313 return rv;
2314 }
2315
binary_response_handler(const void * key,uint16_t keylen,const void * ext,uint8_t extlen,const void * body,uint32_t bodylen,uint8_t datatype,uint16_t status,uint64_t cas,const void * cookie)2316 static bool binary_response_handler(const void *key, uint16_t keylen,
2317 const void *ext, uint8_t extlen,
2318 const void *body, uint32_t bodylen,
2319 uint8_t datatype, uint16_t status,
2320 uint64_t cas, const void *cookie)
2321 {
2322 conn *c = (conn*)cookie;
2323 /* Look at append_bin_stats */
2324 size_t needed = keylen + extlen + bodylen + sizeof(protocol_binary_response_header);
2325 if (!grow_dynamic_buffer(c, needed)) {
2326 if (settings.verbose > 0) {
2327 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2328 "<%d ERROR: Failed to allocate memory for response\n",
2329 c->sfd);
2330 }
2331 return false;
2332 }
2333
2334 char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
2335 protocol_binary_response_header header = {
2336 .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
2337 .response.opcode = c->binary_header.request.opcode,
2338 .response.keylen = (uint16_t)htons(keylen),
2339 .response.extlen = extlen,
2340 .response.datatype = datatype,
2341 .response.status = (uint16_t)htons(status),
2342 .response.bodylen = htonl(bodylen + keylen + extlen),
2343 .response.opaque = c->opaque,
2344 .response.cas = htonll(cas),
2345 };
2346
2347 memcpy(buf, header.bytes, sizeof(header.response));
2348 buf += sizeof(header.response);
2349
2350 if (extlen > 0) {
2351 memcpy(buf, ext, extlen);
2352 buf += extlen;
2353 }
2354
2355 if (keylen > 0) {
2356 memcpy(buf, key, keylen);
2357 buf += keylen;
2358 }
2359
2360 if (bodylen > 0) {
2361 memcpy(buf, body, bodylen);
2362 }
2363
2364 c->dynamic_buffer.offset += needed;
2365
2366 return true;
2367 }
2368
2369 /**
2370 * Tap stats (these are only used by the tap thread, so they don't need
2371 * to be in the threadlocal struct right now...
2372 */
2373 struct tap_cmd_stats {
2374 uint64_t connect;
2375 uint64_t mutation;
2376 uint64_t checkpoint_start;
2377 uint64_t checkpoint_end;
2378 uint64_t delete;
2379 uint64_t flush;
2380 uint64_t opaque;
2381 uint64_t vbucket_set;
2382 };
2383
2384 struct tap_stats {
2385 pthread_mutex_t mutex;
2386 struct tap_cmd_stats sent;
2387 struct tap_cmd_stats received;
2388 } tap_stats = { .mutex = PTHREAD_MUTEX_INITIALIZER };
2389
ship_tap_log(conn * c)2390 static void ship_tap_log(conn *c) {
2391 assert(c->thread->type == TAP);
2392 c->msgcurr = 0;
2393 c->msgused = 0;
2394 c->iovused = 0;
2395 if (add_msghdr(c) != 0) {
2396 if (settings.verbose) {
2397 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2398 "%d: Failed to create output headers. Shutting down tap connection\n", c->sfd);
2399 }
2400 conn_set_state(c, conn_closing);
2401 return ;
2402 }
2403 /* @todo add check for buffer overflow of c->wbuf) */
2404 c->wcurr = c->wbuf;
2405
2406 bool more_data = true;
2407 bool send_data = false;
2408 bool disconnect = false;
2409
2410 item *it;
2411 uint32_t bodylen;
2412 int ii = 0;
2413 c->icurr = c->ilist;
2414 do {
2415 /* @todo fixme! */
2416 if (ii++ == 10) {
2417 break;
2418 }
2419
2420 void *engine;
2421 uint16_t nengine;
2422 uint8_t ttl;
2423 uint16_t tap_flags;
2424 uint32_t seqno;
2425 uint16_t vbucket;
2426
2427 tap_event_t event = c->tap_iterator(settings.engine.v0, c, &it,
2428 &engine, &nengine, &ttl,
2429 &tap_flags, &seqno, &vbucket);
2430 union {
2431 protocol_binary_request_tap_mutation mutation;
2432 protocol_binary_request_tap_delete delete;
2433 protocol_binary_request_tap_flush flush;
2434 protocol_binary_request_tap_opaque opaque;
2435 protocol_binary_request_noop noop;
2436 } msg = {
2437 .mutation.message.header.request.magic = (uint8_t)PROTOCOL_BINARY_REQ,
2438 };
2439
2440 msg.opaque.message.header.request.opaque = htonl(seqno);
2441 msg.opaque.message.body.tap.enginespecific_length = htons(nengine);
2442 msg.opaque.message.body.tap.ttl = ttl;
2443 msg.opaque.message.body.tap.flags = htons(tap_flags);
2444 msg.opaque.message.header.request.extlen = 8;
2445 msg.opaque.message.header.request.vbucket = htons(vbucket);
2446 item_info info = { .nvalue = 1 };
2447
2448 switch (event) {
2449 case TAP_NOOP :
2450 send_data = true;
2451 msg.noop.message.header.request.opcode = PROTOCOL_BINARY_CMD_NOOP;
2452 msg.noop.message.header.request.extlen = 0;
2453 msg.noop.message.header.request.bodylen = htonl(0);
2454 memcpy(c->wcurr, msg.noop.bytes, sizeof(msg.noop.bytes));
2455 add_iov(c, c->wcurr, sizeof(msg.noop.bytes));
2456 c->wcurr += sizeof(msg.noop.bytes);
2457 c->wbytes += sizeof(msg.noop.bytes);
2458 break;
2459 case TAP_PAUSE :
2460 more_data = false;
2461 break;
2462 case TAP_CHECKPOINT_START:
2463 case TAP_CHECKPOINT_END:
2464 case TAP_MUTATION:
2465 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2466 settings.engine.v1->release(settings.engine.v0, c, it);
2467 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2468 "%d: Failed to get item info\n", c->sfd);
2469 break;
2470 }
2471 send_data = true;
2472 c->ilist[c->ileft++] = it;
2473
2474 if (event == TAP_CHECKPOINT_START) {
2475 msg.mutation.message.header.request.opcode =
2476 PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START;
2477 pthread_mutex_lock(&tap_stats.mutex);
2478 tap_stats.sent.checkpoint_start++;
2479 pthread_mutex_unlock(&tap_stats.mutex);
2480 } else if (event == TAP_CHECKPOINT_END) {
2481 msg.mutation.message.header.request.opcode =
2482 PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END;
2483 pthread_mutex_lock(&tap_stats.mutex);
2484 tap_stats.sent.checkpoint_end++;
2485 pthread_mutex_unlock(&tap_stats.mutex);
2486 } else if (event == TAP_MUTATION) {
2487 msg.mutation.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_MUTATION;
2488 pthread_mutex_lock(&tap_stats.mutex);
2489 tap_stats.sent.mutation++;
2490 pthread_mutex_unlock(&tap_stats.mutex);
2491 }
2492
2493 msg.mutation.message.header.request.cas = htonll(info.cas);
2494 msg.mutation.message.header.request.keylen = htons(info.nkey);
2495 msg.mutation.message.header.request.extlen = 16;
2496
2497 bodylen = 16 + info.nkey + nengine;
2498 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2499 bodylen += info.nbytes;
2500 }
2501 msg.mutation.message.header.request.bodylen = htonl(bodylen);
2502 msg.mutation.message.body.item.flags = htonl(info.flags);
2503 msg.mutation.message.body.item.expiration = htonl(info.exptime);
2504 msg.mutation.message.body.tap.enginespecific_length = htons(nengine);
2505 msg.mutation.message.body.tap.ttl = ttl;
2506 msg.mutation.message.body.tap.flags = htons(tap_flags);
2507 memcpy(c->wcurr, msg.mutation.bytes, sizeof(msg.mutation.bytes));
2508
2509 add_iov(c, c->wcurr, sizeof(msg.mutation.bytes));
2510 c->wcurr += sizeof(msg.mutation.bytes);
2511 c->wbytes += sizeof(msg.mutation.bytes);
2512
2513 if (nengine > 0) {
2514 memcpy(c->wcurr, engine, nengine);
2515 add_iov(c, c->wcurr, nengine);
2516 c->wcurr += nengine;
2517 c->wbytes += nengine;
2518 }
2519
2520 add_iov(c, info.key, info.nkey);
2521 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2522 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2523 }
2524
2525 break;
2526 case TAP_DELETION:
2527 /* This is a delete */
2528 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2529 settings.engine.v1->release(settings.engine.v0, c, it);
2530 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2531 "%d: Failed to get item info\n", c->sfd);
2532 break;
2533 }
2534 send_data = true;
2535 c->ilist[c->ileft++] = it;
2536 msg.delete.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_DELETE;
2537 msg.delete.message.header.request.cas = htonll(info.cas);
2538 msg.delete.message.header.request.keylen = htons(info.nkey);
2539
2540 bodylen = 8 + info.nkey + nengine;
2541 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2542 bodylen += info.nbytes;
2543 }
2544 msg.delete.message.header.request.bodylen = htonl(bodylen);
2545
2546 memcpy(c->wcurr, msg.delete.bytes, sizeof(msg.delete.bytes));
2547 add_iov(c, c->wcurr, sizeof(msg.delete.bytes));
2548 c->wcurr += sizeof(msg.delete.bytes);
2549 c->wbytes += sizeof(msg.delete.bytes);
2550
2551 if (nengine > 0) {
2552 memcpy(c->wcurr, engine, nengine);
2553 add_iov(c, c->wcurr, nengine);
2554 c->wcurr += nengine;
2555 c->wbytes += nengine;
2556 }
2557
2558 add_iov(c, info.key, info.nkey);
2559 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2560 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2561 }
2562
2563 pthread_mutex_lock(&tap_stats.mutex);
2564 tap_stats.sent.delete++;
2565 pthread_mutex_unlock(&tap_stats.mutex);
2566 break;
2567
2568 case TAP_DISCONNECT:
2569 disconnect = true;
2570 more_data = false;
2571 break;
2572 case TAP_VBUCKET_SET:
2573 case TAP_FLUSH:
2574 case TAP_OPAQUE:
2575 send_data = true;
2576
2577 if (event == TAP_OPAQUE) {
2578 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_OPAQUE;
2579 pthread_mutex_lock(&tap_stats.mutex);
2580 tap_stats.sent.opaque++;
2581 pthread_mutex_unlock(&tap_stats.mutex);
2582
2583 } else if (event == TAP_FLUSH) {
2584 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_FLUSH;
2585 pthread_mutex_lock(&tap_stats.mutex);
2586 tap_stats.sent.flush++;
2587 pthread_mutex_unlock(&tap_stats.mutex);
2588 } else if (event == TAP_VBUCKET_SET) {
2589 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET;
2590 msg.flush.message.body.tap.flags = htons(tap_flags);
2591 pthread_mutex_lock(&tap_stats.mutex);
2592 tap_stats.sent.vbucket_set++;
2593 pthread_mutex_unlock(&tap_stats.mutex);
2594 }
2595
2596 msg.flush.message.header.request.bodylen = htonl(8 + nengine);
2597 memcpy(c->wcurr, msg.flush.bytes, sizeof(msg.flush.bytes));
2598 add_iov(c, c->wcurr, sizeof(msg.flush.bytes));
2599 c->wcurr += sizeof(msg.flush.bytes);
2600 c->wbytes += sizeof(msg.flush.bytes);
2601 if (nengine > 0) {
2602 memcpy(c->wcurr, engine, nengine);
2603 add_iov(c, c->wcurr, nengine);
2604 c->wcurr += nengine;
2605 c->wbytes += nengine;
2606 }
2607 break;
2608 default:
2609 abort();
2610 }
2611 } while (more_data);
2612
2613 c->ewouldblock = false;
2614 if (send_data) {
2615 conn_set_state(c, conn_mwrite);
2616 if (disconnect) {
2617 c->write_and_go = conn_closing;
2618 } else {
2619 c->write_and_go = conn_ship_log;
2620 }
2621 } else {
2622 if (disconnect) {
2623 conn_set_state(c, conn_closing);
2624 } else {
2625 /* No more items to ship to the slave at this time.. suspend.. */
2626 if (settings.verbose > 1) {
2627 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2628 "%d: No more items in tap log.. waiting\n",
2629 c->sfd);
2630 }
2631 c->ewouldblock = true;
2632 }
2633 }
2634 }
2635
process_bin_unknown_packet(conn * c)2636 static void process_bin_unknown_packet(conn *c) {
2637 void *packet = c->rcurr - (c->binary_header.request.bodylen +
2638 sizeof(c->binary_header));
2639
2640 ENGINE_ERROR_CODE ret = c->aiostat;
2641 c->aiostat = ENGINE_SUCCESS;
2642 c->ewouldblock = false;
2643
2644 if (ret == ENGINE_SUCCESS) {
2645 ret = settings.engine.v1->unknown_command(settings.engine.v0, c, packet,
2646 binary_response_handler);
2647 }
2648
2649 if (ret == ENGINE_SUCCESS) {
2650 if (c->dynamic_buffer.buffer != NULL) {
2651 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2652 c->dynamic_buffer.buffer = NULL;
2653 } else {
2654 conn_set_state(c, conn_new_cmd);
2655 }
2656 } else if (ret == ENGINE_ENOTSUP) {
2657 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, 0);
2658 } else if (ret == ENGINE_EWOULDBLOCK) {
2659 c->ewouldblock = true;
2660 } else {
2661 /* FATAL ERROR, shut down connection */
2662 conn_set_state(c, conn_closing);
2663 }
2664 }
2665
process_bin_tap_connect(conn * c)2666 static void process_bin_tap_connect(conn *c) {
2667 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2668 sizeof(c->binary_header)));
2669 protocol_binary_request_tap_connect *req = (void*)packet;
2670 const char *key = packet + sizeof(req->bytes);
2671 const char *data = key + c->binary_header.request.keylen;
2672 uint32_t flags = 0;
2673 size_t ndata = c->binary_header.request.bodylen -
2674 c->binary_header.request.extlen -
2675 c->binary_header.request.keylen;
2676
2677 if (c->binary_header.request.extlen == 4) {
2678 flags = ntohl(req->message.body.flags);
2679
2680 if (flags & TAP_CONNECT_FLAG_BACKFILL) {
2681 /* the userdata has to be at least 8 bytes! */
2682 if (ndata < 8) {
2683 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2684 "%d: ERROR: Invalid tap connect message\n",
2685 c->sfd);
2686 conn_set_state(c, conn_closing);
2687 return ;
2688 }
2689 }
2690 } else {
2691 data -= 4;
2692 key -= 4;
2693 }
2694
2695 if (settings.verbose && c->binary_header.request.keylen > 0) {
2696 char buffer[1024];
2697 int len = c->binary_header.request.keylen;
2698 if (len >= sizeof(buffer)) {
2699 len = sizeof(buffer) - 1;
2700 }
2701 memcpy(buffer, key, len);
2702 buffer[len] = '\0';
2703 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2704 "%d: Trying to connect with named tap connection: <%s>\n",
2705 c->sfd, buffer);
2706 }
2707
2708 TAP_ITERATOR iterator = settings.engine.v1->get_tap_iterator(
2709 settings.engine.v0, c, key, c->binary_header.request.keylen,
2710 flags, data, ndata);
2711
2712 if (iterator == NULL) {
2713 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2714 "%d: FATAL: The engine does not support tap\n",
2715 c->sfd);
2716 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2717 c->write_and_go = conn_closing;
2718 } else {
2719 c->tap_iterator = iterator;
2720 c->which = EV_WRITE;
2721 conn_set_state(c, conn_ship_log);
2722 }
2723 }
2724
process_bin_tap_packet(tap_event_t event,conn * c)2725 static void process_bin_tap_packet(tap_event_t event, conn *c) {
2726 assert(c != NULL);
2727 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2728 sizeof(c->binary_header)));
2729 protocol_binary_request_tap_no_extras *tap = (void*)packet;
2730 uint16_t nengine = ntohs(tap->message.body.tap.enginespecific_length);
2731 uint16_t tap_flags = ntohs(tap->message.body.tap.flags);
2732 uint32_t seqno = ntohl(tap->message.header.request.opaque);
2733 uint8_t ttl = tap->message.body.tap.ttl;
2734 assert(ttl > 0);
2735 char *engine_specific = packet + sizeof(tap->bytes);
2736 char *key = engine_specific + nengine;
2737 uint16_t nkey = c->binary_header.request.keylen;
2738 char *data = key + nkey;
2739 uint32_t flags = 0;
2740 uint32_t exptime = 0;
2741 uint32_t ndata = c->binary_header.request.bodylen - nengine - nkey - 8;
2742
2743 if (event == TAP_MUTATION || event == TAP_CHECKPOINT_START ||
2744 event == TAP_CHECKPOINT_END) {
2745 protocol_binary_request_tap_mutation *mutation = (void*)tap;
2746 flags = ntohl(mutation->message.body.item.flags);
2747 exptime = ntohl(mutation->message.body.item.expiration);
2748 key += 8;
2749 data += 8;
2750 ndata -= 8;
2751 }
2752
2753 ENGINE_ERROR_CODE ret = c->aiostat;
2754 if (ret == ENGINE_SUCCESS) {
2755 ret = settings.engine.v1->tap_notify(settings.engine.v0, c,
2756 engine_specific, nengine,
2757 ttl - 1, tap_flags,
2758 event, seqno,
2759 key, nkey,
2760 flags, exptime,
2761 ntohll(tap->message.header.request.cas),
2762 data, ndata,
2763 c->binary_header.request.vbucket);
2764 }
2765
2766 switch (ret) {
2767 case ENGINE_DISCONNECT:
2768 conn_set_state(c, conn_closing);
2769 break;
2770 case ENGINE_EWOULDBLOCK:
2771 c->ewouldblock = true;
2772 break;
2773 default:
2774 if ((tap_flags & TAP_FLAG_ACK) ||
2775 (ret != ENGINE_SUCCESS && c->tap_nack_mode))
2776 {
2777 write_bin_packet(c, engine_error_2_protocol_error(ret), 0);
2778 } else {
2779 conn_set_state(c, conn_new_cmd);
2780 }
2781 }
2782 }
2783
process_bin_tap_ack(conn * c)2784 static void process_bin_tap_ack(conn *c) {
2785 assert(c != NULL);
2786 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2787 sizeof(c->binary_header)));
2788 protocol_binary_response_no_extras *rsp = (void*)packet;
2789 uint32_t seqno = ntohl(rsp->message.header.response.opaque);
2790 uint16_t status = ntohs(rsp->message.header.response.status);
2791 char *key = packet + sizeof(rsp->bytes);
2792
2793 ENGINE_ERROR_CODE ret = ENGINE_DISCONNECT;
2794 if (settings.engine.v1->tap_notify != NULL) {
2795 ret = settings.engine.v1->tap_notify(settings.engine.v0, c, NULL, 0, 0, status,
2796 TAP_ACK, seqno, key,
2797 c->binary_header.request.keylen, 0, 0,
2798 0, NULL, 0, 0);
2799 }
2800
2801 if (ret == ENGINE_DISCONNECT) {
2802 conn_set_state(c, conn_closing);
2803 } else {
2804 conn_set_state(c, conn_ship_log);
2805 }
2806 }
2807
2808 /**
2809 * We received a noop response.. just ignore it
2810 */
process_bin_noop_response(conn * c)2811 static void process_bin_noop_response(conn *c) {
2812 assert(c != NULL);
2813 conn_set_state(c, conn_new_cmd);
2814 }
2815
process_bin_verbosity(conn * c)2816 static void process_bin_verbosity(conn *c) {
2817 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2818 sizeof(c->binary_header)));
2819 protocol_binary_request_verbosity *req = (void*)packet;
2820 uint32_t level = (uint32_t)ntohl(req->message.body.level);
2821 if (level > MAX_VERBOSITY_LEVEL) {
2822 level = MAX_VERBOSITY_LEVEL;
2823 }
2824 settings.verbose = (int)level;
2825 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
2826 write_bin_response(c, NULL, 0, 0, 0);
2827 }
2828
process_bin_packet(conn * c)2829 static void process_bin_packet(conn *c) {
2830 /* @todo this should be an array of funciton pointers and call through */
2831 switch (c->binary_header.request.opcode) {
2832 case PROTOCOL_BINARY_CMD_TAP_CONNECT:
2833 pthread_mutex_lock(&tap_stats.mutex);
2834 tap_stats.received.connect++;
2835 pthread_mutex_unlock(&tap_stats.mutex);
2836 conn_set_state(c, conn_add_tap_client);
2837 break;
2838 case PROTOCOL_BINARY_CMD_TAP_MUTATION:
2839 pthread_mutex_lock(&tap_stats.mutex);
2840 tap_stats.received.mutation++;
2841 pthread_mutex_unlock(&tap_stats.mutex);
2842 process_bin_tap_packet(TAP_MUTATION, c);
2843 break;
2844 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
2845 pthread_mutex_lock(&tap_stats.mutex);
2846 tap_stats.received.checkpoint_start++;
2847 pthread_mutex_unlock(&tap_stats.mutex);
2848 process_bin_tap_packet(TAP_CHECKPOINT_START, c);
2849 break;
2850 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
2851 pthread_mutex_lock(&tap_stats.mutex);
2852 tap_stats.received.checkpoint_end++;
2853 pthread_mutex_unlock(&tap_stats.mutex);
2854 process_bin_tap_packet(TAP_CHECKPOINT_END, c);
2855 break;
2856 case PROTOCOL_BINARY_CMD_TAP_DELETE:
2857 pthread_mutex_lock(&tap_stats.mutex);
2858 tap_stats.received.delete++;
2859 pthread_mutex_unlock(&tap_stats.mutex);
2860 process_bin_tap_packet(TAP_DELETION, c);
2861 break;
2862 case PROTOCOL_BINARY_CMD_TAP_FLUSH:
2863 pthread_mutex_lock(&tap_stats.mutex);
2864 tap_stats.received.flush++;
2865 pthread_mutex_unlock(&tap_stats.mutex);
2866 process_bin_tap_packet(TAP_FLUSH, c);
2867 break;
2868 case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
2869 pthread_mutex_lock(&tap_stats.mutex);
2870 tap_stats.received.opaque++;
2871 pthread_mutex_unlock(&tap_stats.mutex);
2872 process_bin_tap_packet(TAP_OPAQUE, c);
2873 break;
2874 case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
2875 pthread_mutex_lock(&tap_stats.mutex);
2876 tap_stats.received.vbucket_set++;
2877 pthread_mutex_unlock(&tap_stats.mutex);
2878 process_bin_tap_packet(TAP_VBUCKET_SET, c);
2879 break;
2880 case PROTOCOL_BINARY_CMD_VERBOSITY:
2881 process_bin_verbosity(c);
2882 break;
2883 default:
2884 process_bin_unknown_packet(c);
2885 }
2886 }
2887
2888
2889
2890 typedef void (*RESPONSE_HANDLER)(conn*);
2891 /**
2892 * A map between the response packets op-code and the function to handle
2893 * the response message.
2894 */
2895 static RESPONSE_HANDLER response_handlers[256] = {
2896 [PROTOCOL_BINARY_CMD_NOOP] = process_bin_noop_response,
2897 [PROTOCOL_BINARY_CMD_TAP_MUTATION] = process_bin_tap_ack,
2898 [PROTOCOL_BINARY_CMD_TAP_DELETE] = process_bin_tap_ack,
2899 [PROTOCOL_BINARY_CMD_TAP_FLUSH] = process_bin_tap_ack,
2900 [PROTOCOL_BINARY_CMD_TAP_OPAQUE] = process_bin_tap_ack,
2901 [PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET] = process_bin_tap_ack,
2902 [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START] = process_bin_tap_ack,
2903 [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END] = process_bin_tap_ack
2904 };
2905
dispatch_bin_command(conn * c)2906 static void dispatch_bin_command(conn *c) {
2907 int protocol_error = 0;
2908
2909 uint8_t extlen = c->binary_header.request.extlen;
2910 uint16_t keylen = c->binary_header.request.keylen;
2911 uint32_t bodylen = c->binary_header.request.bodylen;
2912
2913 if (keylen > bodylen || keylen + extlen > bodylen) {
2914 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, 0);
2915 c->write_and_go = conn_closing;
2916 return;
2917 }
2918
2919 if (settings.require_sasl && !authenticated(c)) {
2920 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2921 c->write_and_go = conn_closing;
2922 return;
2923 }
2924
2925 MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
2926 c->noreply = true;
2927
2928 /* binprot supports 16bit keys, but internals are still 8bit */
2929 if (keylen > KEY_MAX_LENGTH) {
2930 handle_binary_protocol_error(c);
2931 return;
2932 }
2933
2934 switch (c->cmd) {
2935 case PROTOCOL_BINARY_CMD_SETQ:
2936 c->cmd = PROTOCOL_BINARY_CMD_SET;
2937 break;
2938 case PROTOCOL_BINARY_CMD_ADDQ:
2939 c->cmd = PROTOCOL_BINARY_CMD_ADD;
2940 break;
2941 case PROTOCOL_BINARY_CMD_REPLACEQ:
2942 c->cmd = PROTOCOL_BINARY_CMD_REPLACE;
2943 break;
2944 case PROTOCOL_BINARY_CMD_DELETEQ:
2945 c->cmd = PROTOCOL_BINARY_CMD_DELETE;
2946 break;
2947 case PROTOCOL_BINARY_CMD_INCREMENTQ:
2948 c->cmd = PROTOCOL_BINARY_CMD_INCREMENT;
2949 break;
2950 case PROTOCOL_BINARY_CMD_DECREMENTQ:
2951 c->cmd = PROTOCOL_BINARY_CMD_DECREMENT;
2952 break;
2953 case PROTOCOL_BINARY_CMD_QUITQ:
2954 c->cmd = PROTOCOL_BINARY_CMD_QUIT;
2955 break;
2956 case PROTOCOL_BINARY_CMD_FLUSHQ:
2957 c->cmd = PROTOCOL_BINARY_CMD_FLUSH;
2958 break;
2959 case PROTOCOL_BINARY_CMD_APPENDQ:
2960 c->cmd = PROTOCOL_BINARY_CMD_APPEND;
2961 break;
2962 case PROTOCOL_BINARY_CMD_PREPENDQ:
2963 c->cmd = PROTOCOL_BINARY_CMD_PREPEND;
2964 break;
2965 case PROTOCOL_BINARY_CMD_GETQ:
2966 c->cmd = PROTOCOL_BINARY_CMD_GET;
2967 break;
2968 case PROTOCOL_BINARY_CMD_GETKQ:
2969 c->cmd = PROTOCOL_BINARY_CMD_GETK;
2970 break;
2971 default:
2972 c->noreply = false;
2973 }
2974
2975 switch (c->cmd) {
2976 case PROTOCOL_BINARY_CMD_VERSION:
2977 if (extlen == 0 && keylen == 0 && bodylen == 0) {
2978 write_bin_response(c, (void*)VERSION, 0, 0, strlen(VERSION));
2979 } else {
2980 protocol_error = 1;
2981 }
2982 break;
2983 case PROTOCOL_BINARY_CMD_FLUSH:
2984 if (keylen == 0 && bodylen == extlen && (extlen == 0 || extlen == 4)) {
2985 bin_read_key(c, bin_read_flush_exptime, extlen);
2986 } else {
2987 protocol_error = 1;
2988 }
2989 break;
2990 case PROTOCOL_BINARY_CMD_NOOP:
2991 if (extlen == 0 && keylen == 0 && bodylen == 0) {
2992 write_bin_response(c, NULL, 0, 0, 0);
2993 } else {
2994 protocol_error = 1;
2995 }
2996 break;
2997 case PROTOCOL_BINARY_CMD_SET: /* FALLTHROUGH */
2998 case PROTOCOL_BINARY_CMD_ADD: /* FALLTHROUGH */
2999 case PROTOCOL_BINARY_CMD_REPLACE:
3000 if (extlen == 8 && keylen != 0 && bodylen >= (keylen + 8)) {
3001 bin_read_key(c, bin_reading_set_header, 8);
3002 } else {
3003 protocol_error = 1;
3004 }
3005 break;
3006 case PROTOCOL_BINARY_CMD_GETQ: /* FALLTHROUGH */
3007 case PROTOCOL_BINARY_CMD_GET: /* FALLTHROUGH */
3008 case PROTOCOL_BINARY_CMD_GETKQ: /* FALLTHROUGH */
3009 case PROTOCOL_BINARY_CMD_GETK:
3010 if (extlen == 0 && bodylen == keylen && keylen > 0) {
3011 bin_read_key(c, bin_reading_get_key, 0);
3012 } else {
3013 protocol_error = 1;
3014 }
3015 break;
3016 case PROTOCOL_BINARY_CMD_DELETE:
3017 if (keylen > 0 && extlen == 0 && bodylen == keylen) {
3018 bin_read_key(c, bin_reading_del_header, extlen);
3019 } else {
3020 protocol_error = 1;
3021 }
3022 break;
3023 case PROTOCOL_BINARY_CMD_INCREMENT:
3024 case PROTOCOL_BINARY_CMD_DECREMENT:
3025 if (keylen > 0 && extlen == 20 && bodylen == (keylen + extlen)) {
3026 bin_read_key(c, bin_reading_incr_header, 20);
3027 } else {
3028 protocol_error = 1;
3029 }
3030 break;
3031 case PROTOCOL_BINARY_CMD_APPEND:
3032 case PROTOCOL_BINARY_CMD_PREPEND:
3033 if (keylen > 0 && extlen == 0) {
3034 bin_read_key(c, bin_reading_set_header, 0);
3035 } else {
3036 protocol_error = 1;
3037 }
3038 break;
3039 case PROTOCOL_BINARY_CMD_STAT:
3040 if (extlen == 0) {
3041 bin_read_key(c, bin_reading_stat, 0);
3042 } else {
3043 protocol_error = 1;
3044 }
3045 break;
3046 case PROTOCOL_BINARY_CMD_QUIT:
3047 if (keylen == 0 && extlen == 0 && bodylen == 0) {
3048 write_bin_response(c, NULL, 0, 0, 0);
3049 c->write_and_go = conn_closing;
3050 if (c->noreply) {
3051 conn_set_state(c, conn_closing);
3052 }
3053 } else {
3054 protocol_error = 1;
3055 }
3056 break;
3057 case PROTOCOL_BINARY_CMD_TAP_CONNECT:
3058 if (settings.engine.v1->get_tap_iterator == NULL) {
3059 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3060 } else {
3061 bin_read_chunk(c, bin_reading_packet,
3062 c->binary_header.request.bodylen);
3063 }
3064 break;
3065 case PROTOCOL_BINARY_CMD_TAP_MUTATION:
3066 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
3067 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
3068 case PROTOCOL_BINARY_CMD_TAP_DELETE:
3069 case PROTOCOL_BINARY_CMD_TAP_FLUSH:
3070 case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
3071 case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
3072 if (settings.engine.v1->tap_notify == NULL) {
3073 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3074 } else {
3075 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3076 }
3077 break;
3078 #ifdef SASL_ENABLED
3079 case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS:
3080 if (extlen == 0 && keylen == 0 && bodylen == 0) {
3081 bin_list_sasl_mechs(c);
3082 } else {
3083 protocol_error = 1;
3084 }
3085 break;
3086 case PROTOCOL_BINARY_CMD_SASL_AUTH:
3087 case PROTOCOL_BINARY_CMD_SASL_STEP:
3088 if (extlen == 0 && keylen != 0) {
3089 bin_read_key(c, bin_reading_sasl_auth, 0);
3090 } else {
3091 protocol_error = 1;
3092 }
3093 break;
3094 #endif
3095 case PROTOCOL_BINARY_CMD_VERBOSITY:
3096 if (extlen == 4 && keylen == 0 && bodylen == 4) {
3097 bin_read_chunk(c, bin_reading_packet,
3098 c->binary_header.request.bodylen);
3099 } else {
3100 protocol_error = 1;
3101 }
3102 break;
3103 default:
3104 if (settings.engine.v1->unknown_command == NULL) {
3105 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND,
3106 bodylen);
3107 } else {
3108 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3109 }
3110 }
3111
3112 if (protocol_error)
3113 handle_binary_protocol_error(c);
3114 }
3115
process_bin_update(conn * c)3116 static void process_bin_update(conn *c) {
3117 char *key;
3118 uint16_t nkey;
3119 uint32_t vlen;
3120 item *it;
3121 protocol_binary_request_set* req = binary_get_request(c);
3122
3123 assert(c != NULL);
3124
3125 key = binary_get_key(c);
3126 nkey = c->binary_header.request.keylen;
3127
3128 /* fix byteorder in the request */
3129 req->message.body.flags = req->message.body.flags;
3130 rel_time_t expiration = ntohl(req->message.body.expiration);
3131
3132 vlen = c->binary_header.request.bodylen - (nkey + c->binary_header.request.extlen);
3133
3134 if (settings.verbose > 1) {
3135 char buffer[1024];
3136 const char *prefix;
3137 if (c->cmd == PROTOCOL_BINARY_CMD_ADD) {
3138 prefix = "ADD";
3139 } else if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3140 prefix = "SET";
3141 } else {
3142 prefix = "REPLACE";
3143 }
3144
3145 size_t nw;
3146 nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3147 prefix, key, nkey);
3148
3149 if (nw != -1) {
3150 if (snprintf(buffer + nw, sizeof(buffer) - nw,
3151 " Value len is %d\n", vlen)) {
3152 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
3153 buffer);
3154 }
3155 }
3156 }
3157
3158 if (settings.detail_enabled) {
3159 stats_prefix_record_set(key, nkey);
3160 }
3161
3162 ENGINE_ERROR_CODE ret = c->aiostat;
3163 c->aiostat = ENGINE_SUCCESS;
3164 c->ewouldblock = false;
3165 item_info info = { .nvalue = 1 };
3166
3167 if (ret == ENGINE_SUCCESS) {
3168 ret = settings.engine.v1->allocate(settings.engine.v0, c,
3169 &it, key, nkey,
3170 vlen,
3171 req->message.body.flags,
3172 expiration);
3173 if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3174 c, it, &info)) {
3175 settings.engine.v1->release(settings.engine.v0, c, it);
3176 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3177 return;
3178 }
3179 }
3180
3181 switch (ret) {
3182 case ENGINE_SUCCESS:
3183 item_set_cas(c, it, c->binary_header.request.cas);
3184
3185 switch (c->cmd) {
3186 case PROTOCOL_BINARY_CMD_ADD:
3187 c->store_op = OPERATION_ADD;
3188 break;
3189 case PROTOCOL_BINARY_CMD_SET:
3190 c->store_op = OPERATION_SET;
3191 break;
3192 case PROTOCOL_BINARY_CMD_REPLACE:
3193 c->store_op = OPERATION_REPLACE;
3194 break;
3195 default:
3196 assert(0);
3197 }
3198
3199 if (c->binary_header.request.cas != 0) {
3200 c->store_op = OPERATION_CAS;
3201 }
3202
3203 c->item = it;
3204 c->ritem = info.value[0].iov_base;
3205 c->rlbytes = vlen;
3206 conn_set_state(c, conn_nread);
3207 c->substate = bin_read_set_value;
3208 break;
3209 case ENGINE_EWOULDBLOCK:
3210 c->ewouldblock = true;
3211 break;
3212 case ENGINE_DISCONNECT:
3213 c->state = conn_closing;
3214 break;
3215 default:
3216 if (ret == ENGINE_E2BIG) {
3217 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3218 } else {
3219 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3220 }
3221
3222 /*
3223 * Avoid stale data persisting in cache because we failed alloc.
3224 * Unacceptable for SET (but only if cas matches).
3225 * Anywhere else too?
3226 */
3227 if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3228 /* @todo fix this for the ASYNC interface! */
3229 settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3230 ntohll(req->message.header.request.cas),
3231 c->binary_header.request.vbucket);
3232 }
3233
3234 /* swallow the data line */
3235 c->write_and_go = conn_swallow;
3236 }
3237 }
3238
process_bin_append_prepend(conn * c)3239 static void process_bin_append_prepend(conn *c) {
3240 char *key;
3241 int nkey;
3242 int vlen;
3243 item *it;
3244
3245 assert(c != NULL);
3246
3247 key = binary_get_key(c);
3248 nkey = c->binary_header.request.keylen;
3249 vlen = c->binary_header.request.bodylen - nkey;
3250
3251 assert(vlen >= 0);
3252
3253 if (settings.verbose > 1) {
3254 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3255 "Value len is %d\n", vlen);
3256 }
3257
3258 if (settings.detail_enabled) {
3259 stats_prefix_record_set(key, nkey);
3260 }
3261
3262 ENGINE_ERROR_CODE ret = c->aiostat;
3263 c->aiostat = ENGINE_SUCCESS;
3264 c->ewouldblock = false;
3265 item_info info = { .nvalue = 1 };
3266
3267 if (ret == ENGINE_SUCCESS) {
3268 ret = settings.engine.v1->allocate(settings.engine.v0, c,
3269 &it, key, nkey,
3270 vlen, 0, 0);
3271 if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3272 c, it, &info)) {
3273 settings.engine.v1->release(settings.engine.v0, c, it);
3274 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3275 return;
3276 }
3277 }
3278
3279 switch (ret) {
3280 case ENGINE_SUCCESS:
3281 item_set_cas(c, it, c->binary_header.request.cas);
3282
3283 switch (c->cmd) {
3284 case PROTOCOL_BINARY_CMD_APPEND:
3285 c->store_op = OPERATION_APPEND;
3286 break;
3287 case PROTOCOL_BINARY_CMD_PREPEND:
3288 c->store_op = OPERATION_PREPEND;
3289 break;
3290 default:
3291 assert(0);
3292 }
3293
3294 c->item = it;
3295 c->ritem = info.value[0].iov_base;
3296 c->rlbytes = vlen;
3297 conn_set_state(c, conn_nread);
3298 c->substate = bin_read_set_value;
3299 break;
3300 case ENGINE_EWOULDBLOCK:
3301 c->ewouldblock = true;
3302 break;
3303 case ENGINE_DISCONNECT:
3304 c->state = conn_closing;
3305 break;
3306 default:
3307 if (ret == ENGINE_E2BIG) {
3308 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3309 } else {
3310 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3311 }
3312 /* swallow the data line */
3313 c->write_and_go = conn_swallow;
3314 }
3315 }
3316
process_bin_flush(conn * c)3317 static void process_bin_flush(conn *c) {
3318 time_t exptime = 0;
3319 protocol_binary_request_flush* req = binary_get_request(c);
3320
3321 if (c->binary_header.request.extlen == sizeof(req->message.body)) {
3322 exptime = ntohl(req->message.body.expiration);
3323 }
3324
3325 if (settings.verbose > 1) {
3326 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3327 "%d: flush %ld", c->sfd,
3328 (long)exptime);
3329 }
3330
3331 ENGINE_ERROR_CODE ret;
3332 ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
3333
3334 if (ret == ENGINE_SUCCESS) {
3335 write_bin_response(c, NULL, 0, 0, 0);
3336 } else if (ret == ENGINE_ENOTSUP) {
3337 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
3338 } else {
3339 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3340 }
3341 STATS_NOKEY(c, cmd_flush);
3342 }
3343
process_bin_delete(conn * c)3344 static void process_bin_delete(conn *c) {
3345 protocol_binary_request_delete* req = binary_get_request(c);
3346
3347 char* key = binary_get_key(c);
3348 size_t nkey = c->binary_header.request.keylen;
3349
3350 assert(c != NULL);
3351
3352 if (settings.verbose > 1) {
3353 char buffer[1024];
3354 if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3355 "DELETE", key, nkey) != -1) {
3356 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
3357 buffer);
3358 }
3359 }
3360
3361 ENGINE_ERROR_CODE ret = c->aiostat;
3362 c->aiostat = ENGINE_SUCCESS;
3363 c->ewouldblock = false;
3364
3365 if (ret == ENGINE_SUCCESS) {
3366 if (settings.detail_enabled) {
3367 stats_prefix_record_delete(key, nkey);
3368 }
3369 ret = settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3370 ntohll(req->message.header.request.cas),
3371 c->binary_header.request.vbucket);
3372 }
3373
3374 /* For some reason the SLAB_INCR tries to access this... */
3375 item_info info = { .nvalue = 1 };
3376 switch (ret) {
3377 case ENGINE_SUCCESS:
3378 write_bin_response(c, NULL, 0, 0, 0);
3379 SLAB_INCR(c, delete_hits, key, nkey);
3380 break;
3381 case ENGINE_KEY_EEXISTS:
3382 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
3383 break;
3384 case ENGINE_KEY_ENOENT:
3385 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
3386 STATS_INCR(c, delete_misses, key, nkey);
3387 break;
3388 case ENGINE_NOT_MY_VBUCKET:
3389 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
3390 break;
3391 case ENGINE_EWOULDBLOCK:
3392 c->ewouldblock = true;
3393 break;
3394 default:
3395 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3396 }
3397 }
3398
complete_nread_binary(conn * c)3399 static void complete_nread_binary(conn *c) {
3400 assert(c != NULL);
3401 assert(c->cmd >= 0);
3402
3403 switch(c->substate) {
3404 case bin_reading_set_header:
3405 if (c->cmd == PROTOCOL_BINARY_CMD_APPEND ||
3406 c->cmd == PROTOCOL_BINARY_CMD_PREPEND) {
3407 process_bin_append_prepend(c);
3408 } else {
3409 process_bin_update(c);
3410 }
3411 break;
3412 case bin_read_set_value:
3413 complete_update_bin(c);
3414 break;
3415 case bin_reading_get_key:
3416 process_bin_get(c);
3417 break;
3418 case bin_reading_stat:
3419 process_bin_stat(c);
3420 break;
3421 case bin_reading_del_header:
3422 process_bin_delete(c);
3423 break;
3424 case bin_reading_incr_header:
3425 complete_incr_bin(c);
3426 break;
3427 case bin_read_flush_exptime:
3428 process_bin_flush(c);
3429 break;
3430 case bin_reading_sasl_auth:
3431 process_bin_sasl_auth(c);
3432 break;
3433 case bin_reading_sasl_auth_data:
3434 process_bin_complete_sasl_auth(c);
3435 break;
3436 case bin_reading_packet:
3437 if (c->binary_header.request.magic == PROTOCOL_BINARY_RES) {
3438 RESPONSE_HANDLER handler;
3439 handler = response_handlers[c->binary_header.request.opcode];
3440 if (handler) {
3441 handler(c);
3442 } else {
3443 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3444 "%d: ERROR: Unsupported response packet received: %u\n",
3445 c->sfd, (unsigned int)c->binary_header.request.opcode);
3446 conn_set_state(c, conn_closing);
3447 }
3448 } else {
3449 process_bin_packet(c);
3450 }
3451 break;
3452 default:
3453 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
3454 "Not handling substate %d\n", c->substate);
3455 abort();
3456 }
3457 }
3458
reset_cmd_handler(conn * c)3459 static void reset_cmd_handler(conn *c) {
3460 c->sbytes = 0;
3461 c->ascii_cmd = NULL;
3462 c->cmd = -1;
3463 c->substate = bin_no_state;
3464 if(c->item != NULL) {
3465 settings.engine.v1->release(settings.engine.v0, c, c->item);
3466 c->item = NULL;
3467 }
3468 conn_shrink(c);
3469 if (c->rbytes > 0) {
3470 conn_set_state(c, conn_parse_cmd);
3471 } else {
3472 conn_set_state(c, conn_waiting);
3473 }
3474 }
3475
ascii_response_handler(const void * cookie,int nbytes,const char * dta)3476 static ENGINE_ERROR_CODE ascii_response_handler(const void *cookie,
3477 int nbytes,
3478 const char *dta)
3479 {
3480 conn *c = (conn*)cookie;
3481 if (!grow_dynamic_buffer(c, nbytes)) {
3482 if (settings.verbose > 0) {
3483 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3484 "<%d ERROR: Failed to allocate memory for response\n",
3485 c->sfd);
3486 }
3487 return ENGINE_ENOMEM;
3488 }
3489
3490 char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
3491 memcpy(buf, dta, nbytes);
3492 c->dynamic_buffer.offset += nbytes;
3493
3494 return ENGINE_SUCCESS;
3495 }
3496
complete_nread_ascii(conn * c)3497 static void complete_nread_ascii(conn *c) {
3498 if (c->ascii_cmd != NULL) {
3499 c->ewouldblock = false;
3500 switch (c->ascii_cmd->execute(c->ascii_cmd->cookie, c, 0, NULL,
3501 ascii_response_handler)) {
3502 case ENGINE_SUCCESS:
3503 if (c->dynamic_buffer.buffer != NULL) {
3504 write_and_free(c, c->dynamic_buffer.buffer,
3505 c->dynamic_buffer.offset);
3506 c->dynamic_buffer.buffer = NULL;
3507 } else {
3508 conn_set_state(c, conn_new_cmd);
3509 }
3510 break;
3511 case ENGINE_EWOULDBLOCK:
3512 c->ewouldblock = true;
3513 break;
3514 case ENGINE_DISCONNECT:
3515 default:
3516 conn_set_state(c, conn_closing);
3517 }
3518 } else {
3519 complete_update_ascii(c);
3520 }
3521 }
3522
complete_nread(conn * c)3523 static void complete_nread(conn *c) {
3524 assert(c != NULL);
3525 assert(c->protocol == ascii_prot
3526 || c->protocol == binary_prot);
3527
3528 if (c->protocol == ascii_prot) {
3529 complete_nread_ascii(c);
3530 } else if (c->protocol == binary_prot) {
3531 complete_nread_binary(c);
3532 }
3533 }
3534
3535 #define COMMAND_TOKEN 0
3536 #define SUBCOMMAND_TOKEN 1
3537 #define KEY_TOKEN 1
3538
3539 #define MAX_TOKENS 30
3540
3541 /*
3542 * Tokenize the command string by replacing whitespace with '\0' and update
3543 * the token array tokens with pointer to start of each token and length.
3544 * Returns total number of tokens. The last valid token is the terminal
3545 * token (value points to the first unprocessed character of the string and
3546 * length zero).
3547 *
3548 * Usage example:
3549 *
3550 * while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) {
3551 * for(int ix = 0; tokens[ix].length != 0; ix++) {
3552 * ...
3553 * }
3554 * ncommand = tokens[ix].value - command;
3555 * command = tokens[ix].value;
3556 * }
3557 */
tokenize_command(char * command,token_t * tokens,const size_t max_tokens)3558 static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) {
3559 char *s, *e;
3560 size_t ntokens = 0;
3561
3562 assert(command != NULL && tokens != NULL && max_tokens > 1);
3563
3564 for (s = e = command; ntokens < max_tokens - 1; ++e) {
3565 if (*e == ' ') {
3566 if (s != e) {
3567 tokens[ntokens].value = s;
3568 tokens[ntokens].length = e - s;
3569 ntokens++;
3570 *e = '\0';
3571 }
3572 s = e + 1;
3573 }
3574 else if (*e == '\0') {
3575 if (s != e) {
3576 tokens[ntokens].value = s;
3577 tokens[ntokens].length = e - s;
3578 ntokens++;
3579 }
3580
3581 break; /* string end */
3582 }
3583 }
3584
3585 /*
3586 * If we scanned the whole string, the terminal value pointer is null,
3587 * otherwise it is the first unprocessed character.
3588 */
3589 tokens[ntokens].value = *e == '\0' ? NULL : e;
3590 tokens[ntokens].length = 0;
3591 ntokens++;
3592
3593 return ntokens;
3594 }
3595
3596 #ifdef INNODB_MEMCACHED
detokenize(token_t * tokens,size_t ntokens,char ** out,int * nbytes)3597 static void detokenize(token_t *tokens, size_t ntokens, char **out, int *nbytes)
3598 #else
3599 static void detokenize(token_t *tokens, int ntokens, char **out, int *nbytes)
3600 #endif
3601 {
3602 int i;
3603 char *buf, *p;
3604 size_t nb = ntokens; // account for spaces, which is ntokens-1, plus the null
3605
3606 for (i = 0; i < ntokens; ++i) {
3607 nb += tokens[i].length;
3608 }
3609
3610 buf = malloc(nb * sizeof(char));
3611 if (buf != NULL) {
3612 p = buf;
3613 for (i = 0; i < ntokens; ++i) {
3614 memcpy(p, tokens[i].value, tokens[i].length);
3615 p += tokens[i].length;
3616 *p = ' ';
3617 p++;
3618 }
3619 buf[nb - 1] = '\0';
3620 *nbytes = nb - 1;
3621 *out = buf;
3622 }
3623 }
3624
3625
3626 /* set up a connection to write a buffer then free it, used for stats */
write_and_free(conn * c,char * buf,int bytes)3627 static void write_and_free(conn *c, char *buf, int bytes) {
3628 if (buf) {
3629 c->write_and_free = buf;
3630 c->wcurr = buf;
3631 c->wbytes = bytes;
3632 conn_set_state(c, conn_write);
3633 c->write_and_go = conn_new_cmd;
3634 } else {
3635 out_string(c, "SERVER_ERROR out of memory writing stats");
3636 }
3637 }
3638
set_noreply_maybe(conn * c,token_t * tokens,size_t ntokens)3639 static inline bool set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens)
3640 {
3641 int noreply_index = ntokens - 2;
3642
3643 /*
3644 NOTE: this function is not the first place where we are going to
3645 send the reply. We could send it instead from process_command()
3646 if the request line has wrong number of tokens. However parsing
3647 malformed line for "noreply" option is not reliable anyway, so
3648 it can't be helped.
3649 */
3650 if (tokens[noreply_index].value
3651 && strcmp(tokens[noreply_index].value, "noreply") == 0) {
3652 c->noreply = true;
3653 }
3654 return c->noreply;
3655 }
3656
append_stat(const char * name,ADD_STAT add_stats,conn * c,const char * fmt,...)3657 void append_stat(const char *name, ADD_STAT add_stats, conn *c,
3658 const char *fmt, ...) {
3659 char val_str[STAT_VAL_LEN];
3660 int vlen;
3661 va_list ap;
3662
3663 assert(name);
3664 assert(add_stats);
3665 assert(c);
3666 assert(fmt);
3667
3668 va_start(ap, fmt);
3669 vlen = vsnprintf(val_str, sizeof(val_str) - 1, fmt, ap);
3670 va_end(ap);
3671
3672 add_stats(name, strlen(name), val_str, vlen, c);
3673 }
3674
process_stats_detail(conn * c,const char * command)3675 inline static void process_stats_detail(conn *c, const char *command) {
3676 assert(c != NULL);
3677
3678 if (settings.allow_detailed) {
3679 if (strcmp(command, "on") == 0) {
3680 settings.detail_enabled = 1;
3681 out_string(c, "OK");
3682 }
3683 else if (strcmp(command, "off") == 0) {
3684 settings.detail_enabled = 0;
3685 out_string(c, "OK");
3686 }
3687 else if (strcmp(command, "dump") == 0) {
3688 int len;
3689 char *stats = stats_prefix_dump(&len);
3690 write_and_free(c, stats, len);
3691 }
3692 else {
3693 out_string(c, "CLIENT_ERROR usage: stats detail on|off|dump");
3694 }
3695 }
3696 else {
3697 out_string(c, "CLIENT_ERROR detailed stats disabled");
3698 }
3699 }
3700
aggregate_callback(void * in,void * out)3701 static void aggregate_callback(void *in, void *out) {
3702 struct thread_stats *out_thread_stats = out;
3703 struct independent_stats *in_independent_stats = in;
3704 threadlocal_stats_aggregate(in_independent_stats->thread_stats,
3705 out_thread_stats);
3706 }
3707
3708 /* return server specific stats only */
server_stats(ADD_STAT add_stats,conn * c,bool aggregate)3709 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate) {
3710 pid_t pid = getpid();
3711 rel_time_t now = current_time;
3712
3713 struct thread_stats thread_stats;
3714 threadlocal_stats_clear(&thread_stats);
3715
3716 if (aggregate && settings.engine.v1->aggregate_stats != NULL) {
3717 settings.engine.v1->aggregate_stats(settings.engine.v0,
3718 (const void *)c,
3719 aggregate_callback,
3720 &thread_stats);
3721 } else {
3722 threadlocal_stats_aggregate(get_independent_stats(c)->thread_stats,
3723 &thread_stats);
3724 }
3725
3726 struct slab_stats slab_stats;
3727 slab_stats_aggregate(&thread_stats, &slab_stats);
3728
3729 #ifndef __WIN32__
3730 struct rusage usage;
3731 getrusage(RUSAGE_SELF, &usage);
3732 #endif
3733
3734 STATS_LOCK();
3735
3736 APPEND_STAT("pid", "%lu", (long)pid);
3737 APPEND_STAT("uptime", "%u", now);
3738 APPEND_STAT("time", "%ld", now + (long)process_started);
3739 APPEND_STAT("version", "%s", VERSION);
3740 APPEND_STAT("libevent", "%s", event_get_version());
3741 APPEND_STAT("pointer_size", "%d", (int)(8 * sizeof(void *)));
3742
3743 #ifndef __WIN32__
3744 append_stat("rusage_user", add_stats, c, "%ld.%06ld",
3745 (long)usage.ru_utime.tv_sec,
3746 (long)usage.ru_utime.tv_usec);
3747 append_stat("rusage_system", add_stats, c, "%ld.%06ld",
3748 (long)usage.ru_stime.tv_sec,
3749 (long)usage.ru_stime.tv_usec);
3750 #endif
3751
3752 APPEND_STAT("daemon_connections", "%u", stats.daemon_conns);
3753 APPEND_STAT("curr_connections", "%u", stats.curr_conns);
3754 APPEND_STAT("total_connections", "%u", stats.total_conns);
3755 APPEND_STAT("connection_structures", "%u", stats.conn_structs);
3756 APPEND_STAT("cmd_get", "%"PRIu64, thread_stats.cmd_get);
3757 APPEND_STAT("cmd_set", "%"PRIu64, slab_stats.cmd_set);
3758 APPEND_STAT("cmd_flush", "%"PRIu64, thread_stats.cmd_flush);
3759 APPEND_STAT("auth_cmds", "%"PRIu64, thread_stats.auth_cmds);
3760 APPEND_STAT("auth_errors", "%"PRIu64, thread_stats.auth_errors);
3761 APPEND_STAT("get_hits", "%"PRIu64, slab_stats.get_hits);
3762 APPEND_STAT("get_misses", "%"PRIu64, thread_stats.get_misses);
3763 APPEND_STAT("delete_misses", "%"PRIu64, thread_stats.delete_misses);
3764 APPEND_STAT("delete_hits", "%"PRIu64, slab_stats.delete_hits);
3765 APPEND_STAT("incr_misses", "%"PRIu64, thread_stats.incr_misses);
3766 APPEND_STAT("incr_hits", "%"PRIu64, thread_stats.incr_hits);
3767 APPEND_STAT("decr_misses", "%"PRIu64, thread_stats.decr_misses);
3768 APPEND_STAT("decr_hits", "%"PRIu64, thread_stats.decr_hits);
3769 APPEND_STAT("cas_misses", "%"PRIu64, thread_stats.cas_misses);
3770 APPEND_STAT("cas_hits", "%"PRIu64, slab_stats.cas_hits);
3771 APPEND_STAT("cas_badval", "%"PRIu64, slab_stats.cas_badval);
3772 APPEND_STAT("bytes_read", "%"PRIu64, thread_stats.bytes_read);
3773 APPEND_STAT("bytes_written", "%"PRIu64, thread_stats.bytes_written);
3774 APPEND_STAT("limit_maxbytes", "%"PRIu64, settings.maxbytes);
3775 APPEND_STAT("accepting_conns", "%u", is_listen_disabled() ? 0 : 1);
3776 APPEND_STAT("listen_disabled_num", "%"PRIu64, get_listen_disabled_num());
3777 APPEND_STAT("rejected_conns", "%" PRIu64, (unsigned long long)stats.rejected_conns);
3778 APPEND_STAT("threads", "%d", settings.num_threads);
3779 APPEND_STAT("conn_yields", "%" PRIu64, (unsigned long long)thread_stats.conn_yields);
3780 STATS_UNLOCK();
3781
3782 /*
3783 * Add tap stats (only if non-zero)
3784 */
3785 struct tap_stats ts;
3786 pthread_mutex_lock(&tap_stats.mutex);
3787 ts = tap_stats;
3788 pthread_mutex_unlock(&tap_stats.mutex);
3789
3790 if (ts.sent.connect) {
3791 APPEND_STAT("tap_connect_sent", "%"PRIu64, ts.sent.connect);
3792 }
3793 if (ts.sent.mutation) {
3794 APPEND_STAT("tap_mutation_sent", "%"PRIu64, ts.sent.mutation);
3795 }
3796 if (ts.sent.checkpoint_start) {
3797 APPEND_STAT("tap_checkpoint_start_sent", "%"PRIu64, ts.sent.checkpoint_start);
3798 }
3799 if (ts.sent.checkpoint_end) {
3800 APPEND_STAT("tap_checkpoint_end_sent", "%"PRIu64, ts.sent.checkpoint_end);
3801 }
3802 if (ts.sent.delete) {
3803 APPEND_STAT("tap_delete_sent", "%"PRIu64, ts.sent.delete);
3804 }
3805 if (ts.sent.flush) {
3806 APPEND_STAT("tap_flush_sent", "%"PRIu64, ts.sent.flush);
3807 }
3808 if (ts.sent.opaque) {
3809 APPEND_STAT("tap_opaque_sent", "%"PRIu64, ts.sent.opaque);
3810 }
3811 if (ts.sent.vbucket_set) {
3812 APPEND_STAT("tap_vbucket_set_sent", "%"PRIu64,
3813 ts.sent.vbucket_set);
3814 }
3815 if (ts.received.connect) {
3816 APPEND_STAT("tap_connect_received", "%"PRIu64, ts.received.connect);
3817 }
3818 if (ts.received.mutation) {
3819 APPEND_STAT("tap_mutation_received", "%"PRIu64, ts.received.mutation);
3820 }
3821 if (ts.received.checkpoint_start) {
3822 APPEND_STAT("tap_checkpoint_start_received", "%"PRIu64, ts.received.checkpoint_start);
3823 }
3824 if (ts.received.checkpoint_end) {
3825 APPEND_STAT("tap_checkpoint_end_received", "%"PRIu64, ts.received.checkpoint_end);
3826 }
3827 if (ts.received.delete) {
3828 APPEND_STAT("tap_delete_received", "%"PRIu64, ts.received.delete);
3829 }
3830 if (ts.received.flush) {
3831 APPEND_STAT("tap_flush_received", "%"PRIu64, ts.received.flush);
3832 }
3833 if (ts.received.opaque) {
3834 APPEND_STAT("tap_opaque_received", "%"PRIu64, ts.received.opaque);
3835 }
3836 if (ts.received.vbucket_set) {
3837 APPEND_STAT("tap_vbucket_set_received", "%"PRIu64,
3838 ts.received.vbucket_set);
3839 }
3840 }
3841
process_stat_settings(ADD_STAT add_stats,void * c)3842 static void process_stat_settings(ADD_STAT add_stats, void *c) {
3843 assert(add_stats);
3844 APPEND_STAT("maxbytes", "%u", (unsigned int)settings.maxbytes);
3845 APPEND_STAT("maxconns", "%d", settings.maxconns);
3846 APPEND_STAT("tcpport", "%d", settings.port);
3847 APPEND_STAT("udpport", "%d", settings.udpport);
3848 APPEND_STAT("inter", "%s", settings.inter ? settings.inter : "NULL");
3849 APPEND_STAT("verbosity", "%d", settings.verbose);
3850 APPEND_STAT("oldest", "%lu", (unsigned long)settings.oldest_live);
3851 APPEND_STAT("evictions", "%s", settings.evict_to_free ? "on" : "off");
3852 APPEND_STAT("domain_socket", "%s",
3853 settings.socketpath ? settings.socketpath : "NULL");
3854 APPEND_STAT("umask", "%o", settings.access);
3855 APPEND_STAT("growth_factor", "%.2f", settings.factor);
3856 APPEND_STAT("chunk_size", "%d", settings.chunk_size);
3857 APPEND_STAT("num_threads", "%d", settings.num_threads);
3858 APPEND_STAT("num_threads_per_udp", "%d", settings.num_threads_per_udp);
3859 APPEND_STAT("stat_key_prefix", "%c", settings.prefix_delimiter);
3860 APPEND_STAT("detail_enabled", "%s",
3861 settings.detail_enabled ? "yes" : "no");
3862 APPEND_STAT("allow_detailed", "%s",
3863 settings.allow_detailed ? "yes" : "no");
3864 APPEND_STAT("reqs_per_event", "%d", settings.reqs_per_event);
3865 APPEND_STAT("reqs_per_tap_event", "%d", settings.reqs_per_tap_event);
3866 APPEND_STAT("cas_enabled", "%s", settings.use_cas ? "yes" : "no");
3867 APPEND_STAT("tcp_backlog", "%d", settings.backlog);
3868 APPEND_STAT("binding_protocol", "%s",
3869 prot_text(settings.binding_protocol));
3870 #ifdef SASL_ENABLED
3871 APPEND_STAT("auth_enabled_sasl", "%s", "yes");
3872 #else
3873 APPEND_STAT("auth_enabled_sasl", "%s", "no");
3874 #endif
3875
3876 #ifdef ENABLE_ISASL
3877 APPEND_STAT("auth_sasl_engine", "%s", "isasl");
3878 #elif defined(ENABLE_SASL)
3879 APPEND_STAT("auth_sasl_engine", "%s", "cyrus");
3880 #else
3881 APPEND_STAT("auth_sasl_engine", "%s", "none");
3882 #endif
3883 APPEND_STAT("auth_required_sasl", "%s", settings.require_sasl ? "yes" : "no");
3884 APPEND_STAT("item_size_max", "%d", settings.item_size_max);
3885 APPEND_STAT("topkeys", "%d", settings.topkeys);
3886
3887 for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
3888 ptr != NULL;
3889 ptr = ptr->next) {
3890 APPEND_STAT("extension", "%s", ptr->get_name());
3891 }
3892
3893 APPEND_STAT("logger", "%s", settings.extensions.logger->get_name());
3894
3895 for (EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
3896 ptr != NULL;
3897 ptr = ptr->next) {
3898 APPEND_STAT("ascii_extension", "%s", ptr->get_name(ptr->cookie));
3899 }
3900 }
3901
process_stat(conn * c,token_t * tokens,const size_t ntokens)3902 static char *process_stat(conn *c, token_t *tokens, const size_t ntokens) {
3903 const char *subcommand = tokens[SUBCOMMAND_TOKEN].value;
3904 c->dynamic_buffer.offset = 0;
3905
3906 if (ntokens == 2) {
3907 ENGINE_ERROR_CODE ret = c->aiostat;
3908 c->aiostat = ENGINE_SUCCESS;
3909 c->ewouldblock = false;
3910 if (ret == ENGINE_SUCCESS) {
3911 server_stats(&append_stats, c, false);
3912 ret = settings.engine.v1->get_stats(settings.engine.v0, c,
3913 NULL, 0, &append_stats);
3914 if (ret == ENGINE_EWOULDBLOCK) {
3915 c->ewouldblock = true;
3916 return c->rcurr + 5;
3917 }
3918 }
3919 } else if (strcmp(subcommand, "reset") == 0) {
3920 stats_reset(c);
3921 out_string(c, "RESET");
3922 return NULL;
3923 } else if (strcmp(subcommand, "detail") == 0) {
3924 /* NOTE: how to tackle detail with binary? */
3925 if (ntokens < 4) {
3926 process_stats_detail(c, ""); /* outputs the error message */
3927 } else {
3928 process_stats_detail(c, tokens[2].value);
3929 }
3930 /* Output already generated */
3931 return NULL;
3932 } else if (strcmp(subcommand, "settings") == 0) {
3933 process_stat_settings(&append_stats, c);
3934 } else if (strcmp(subcommand, "cachedump") == 0) {
3935 char *buf = NULL;
3936 unsigned int bytes = 0, id, limit = 0;
3937
3938 if (ntokens < 5) {
3939 out_string(c, "CLIENT_ERROR bad command line");
3940 return NULL;
3941 }
3942
3943 if (!safe_strtoul(tokens[2].value, &id) ||
3944 !safe_strtoul(tokens[3].value, &limit)) {
3945 out_string(c, "CLIENT_ERROR bad command line format");
3946 return NULL;
3947 }
3948
3949 if (id >= POWER_LARGEST) {
3950 out_string(c, "CLIENT_ERROR Illegal slab id");
3951 return NULL;
3952 }
3953
3954 #ifdef FUTURE
3955 buf = item_cachedump(id, limit, &bytes);
3956 #endif
3957 write_and_free(c, buf, bytes);
3958 return NULL;
3959 } else if (strcmp(subcommand, "aggregate") == 0) {
3960 server_stats(&append_stats, c, true);
3961 } else if (strcmp(subcommand, "topkeys") == 0) {
3962 topkeys_t *tk = get_independent_stats(c)->topkeys;
3963 if (tk != NULL) {
3964 topkeys_stats(tk, c, current_time, append_stats);
3965 } else {
3966 out_string(c, "ERROR");
3967 return NULL;
3968 }
3969 } else {
3970 /* getting here means that the subcommand is either engine specific or
3971 is invalid. query the engine and see. */
3972 ENGINE_ERROR_CODE ret = c->aiostat;
3973 c->aiostat = ENGINE_SUCCESS;
3974 c->ewouldblock = false;
3975 if (ret == ENGINE_SUCCESS) {
3976 char *buf = NULL;
3977 int nb = -1;
3978 detokenize(&tokens[1], ntokens - 2, &buf, &nb);
3979 ret = settings.engine.v1->get_stats(settings.engine.v0, c, buf,
3980 nb, append_stats);
3981 free(buf);
3982 }
3983
3984 switch (ret) {
3985 case ENGINE_SUCCESS:
3986 append_stats(NULL, 0, NULL, 0, c);
3987 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
3988 c->dynamic_buffer.buffer = NULL;
3989 break;
3990 case ENGINE_ENOMEM:
3991 out_string(c, "SERVER_ERROR out of memory writing stats");
3992 break;
3993 case ENGINE_DISCONNECT:
3994 c->state = conn_closing;
3995 break;
3996 case ENGINE_ENOTSUP:
3997 out_string(c, "SERVER_ERROR not supported");
3998 break;
3999 case ENGINE_EWOULDBLOCK:
4000 c->ewouldblock = true;
4001 return tokens[SUBCOMMAND_TOKEN].value;
4002 default:
4003 out_string(c, "ERROR");
4004 break;
4005 }
4006
4007 return NULL;
4008 }
4009
4010 /* append terminator and start the transfer */
4011 append_stats(NULL, 0, NULL, 0, c);
4012
4013 if (c->dynamic_buffer.buffer == NULL) {
4014 out_string(c, "SERVER_ERROR out of memory writing stats");
4015 } else {
4016 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
4017 c->dynamic_buffer.buffer = NULL;
4018 }
4019
4020 return NULL;
4021 }
4022
4023 /**
4024 * Get a suffix buffer and insert it into the list of used suffix buffers
4025 * @param c the connection object
4026 * @return a pointer to a new suffix buffer or NULL if allocation failed
4027 */
get_suffix_buffer(conn * c)4028 static char *get_suffix_buffer(conn *c) {
4029 if (c->suffixleft == c->suffixsize) {
4030 char **new_suffix_list;
4031 size_t sz = sizeof(char*) * c->suffixsize * 2;
4032
4033 new_suffix_list = realloc(c->suffixlist, sz);
4034 if (new_suffix_list) {
4035 c->suffixsize *= 2;
4036 c->suffixlist = new_suffix_list;
4037 } else {
4038 if (settings.verbose > 1) {
4039 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4040 "=%d Failed to resize suffix buffer\n", c->sfd);
4041 }
4042
4043 return NULL;
4044 }
4045 }
4046
4047 char *suffix = cache_alloc(c->thread->suffix_cache);
4048 if (suffix != NULL) {
4049 *(c->suffixlist + c->suffixleft) = suffix;
4050 ++c->suffixleft;
4051 }
4052
4053 return suffix;
4054 }
4055
4056 /* ntokens is overwritten here... shrug.. */
process_get_command(conn * c,token_t * tokens,size_t ntokens,bool return_cas)4057 static inline char* process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) {
4058 char *key;
4059 size_t nkey;
4060 int i = c->ileft;
4061 item *it;
4062 token_t *key_token = &tokens[KEY_TOKEN];
4063 assert(c != NULL);
4064 (void)ntokens;
4065
4066 /* We temporarily block the mgets commands till wl6650 checked in. */
4067 if ((key_token + 1)->length > 0) {
4068 out_string(c, "We temporarily don't support multiple get option.");
4069 return NULL;
4070 }
4071
4072 do {
4073 while(key_token->length != 0) {
4074
4075 key = key_token->value;
4076 nkey = key_token->length;
4077
4078 if(nkey > KEY_MAX_LENGTH) {
4079 out_string(c, "CLIENT_ERROR bad command line format");
4080 return NULL;
4081 }
4082
4083 ENGINE_ERROR_CODE ret = c->aiostat;
4084 c->aiostat = ENGINE_SUCCESS;
4085
4086 if (ret == ENGINE_SUCCESS) {
4087 ret = settings.engine.v1->get(settings.engine.v0, c, &it, key, nkey, 0);
4088 }
4089
4090 switch (ret) {
4091 case ENGINE_EWOULDBLOCK:
4092 c->ewouldblock = true;
4093 c->ileft = i;
4094 return key;
4095
4096 case ENGINE_SUCCESS:
4097 break;
4098 case ENGINE_KEY_ENOENT:
4099 default:
4100 it = NULL;
4101 break;
4102 }
4103
4104 if (settings.detail_enabled) {
4105 stats_prefix_record_get(key, nkey, NULL != it);
4106 }
4107
4108 if (it) {
4109 item_info info = { .nvalue = 1 };
4110 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it,
4111 &info)) {
4112 settings.engine.v1->release(settings.engine.v0, c, it);
4113 out_string(c, "SERVER_ERROR error getting item data");
4114 break;
4115 }
4116
4117 if (i >= c->isize) {
4118 item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2);
4119 if (new_list) {
4120 c->isize *= 2;
4121 c->ilist = new_list;
4122 } else {
4123 settings.engine.v1->release(settings.engine.v0, c, it);
4124 break;
4125 }
4126 }
4127
4128 /* Rebuild the suffix */
4129 char *suffix = get_suffix_buffer(c);
4130 if (suffix == NULL) {
4131 out_string(c, "SERVER_ERROR out of memory rebuilding suffix");
4132 settings.engine.v1->release(settings.engine.v0, c, it);
4133 return NULL;
4134 }
4135 int suffix_len = snprintf(suffix, SUFFIX_SIZE,
4136 " %u %u\r\n", htonl(info.flags),
4137 info.nbytes);
4138
4139 /*
4140 * Construct the response. Each hit adds three elements to the
4141 * outgoing data list:
4142 * "VALUE "
4143 * key
4144 * " " + flags + " " + data length + "\r\n" + data (with \r\n)
4145 */
4146
4147 MEMCACHED_COMMAND_GET(c->sfd, info.key, info.nkey,
4148 info.nbytes, info.cas);
4149 if (return_cas)
4150 {
4151
4152 char *cas = get_suffix_buffer(c);
4153 if (cas == NULL) {
4154 out_string(c, "SERVER_ERROR out of memory making CAS suffix");
4155 settings.engine.v1->release(settings.engine.v0, c, it);
4156 return NULL;
4157 }
4158 int cas_len = snprintf(cas, SUFFIX_SIZE, " %"PRIu64"\r\n",
4159 info.cas);
4160 if (add_iov(c, "VALUE ", 6) != 0 ||
4161 add_iov(c, info.key, info.nkey) != 0 ||
4162 add_iov(c, suffix, suffix_len - 2) != 0 ||
4163 add_iov(c, cas, cas_len) != 0 ||
4164 add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4165 add_iov(c, "\r\n", 2) != 0)
4166 {
4167 settings.engine.v1->release(settings.engine.v0, c, it);
4168 break;
4169 }
4170 }
4171 else
4172 {
4173 if (add_iov(c, "VALUE ", 6) != 0 ||
4174 add_iov(c, info.key, info.nkey) != 0 ||
4175 add_iov(c, suffix, suffix_len) != 0 ||
4176 add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4177 add_iov(c, "\r\n", 2) != 0)
4178 {
4179 settings.engine.v1->release(settings.engine.v0, c, it);
4180 break;
4181 }
4182 }
4183
4184
4185 if (settings.verbose > 1) {
4186 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4187 ">%d sending key %s\n",
4188 c->sfd, info.key);
4189 }
4190
4191 /* item_get() has incremented it->refcount for us */
4192 STATS_HIT(c, get, key, nkey);
4193 *(c->ilist + i) = it;
4194 i++;
4195
4196 } else {
4197 STATS_MISS(c, get, key, nkey);
4198 MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
4199 }
4200
4201 key_token++;
4202 }
4203
4204 /*
4205 * If the command string hasn't been fully processed, get the next set
4206 * of tokens.
4207 */
4208 if(key_token->value != NULL) {
4209 ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS);
4210 key_token = tokens;
4211 }
4212
4213 } while(key_token->value != NULL);
4214
4215 c->icurr = c->ilist;
4216 c->ileft = i;
4217 c->suffixcurr = c->suffixlist;
4218
4219 if (settings.verbose > 1) {
4220 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4221 ">%d END\n", c->sfd);
4222 }
4223
4224 /*
4225 If the loop was terminated because of out-of-memory, it is not
4226 reliable to add END\r\n to the buffer, because it might not end
4227 in \r\n. So we send SERVER_ERROR instead.
4228 */
4229 if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0
4230 || (IS_UDP(c->transport) && build_udp_headers(c) != 0)) {
4231 out_string(c, "SERVER_ERROR out of memory writing get response");
4232 }
4233 else {
4234 conn_set_state(c, conn_mwrite);
4235 c->msgcurr = 0;
4236 }
4237
4238 return NULL;
4239 }
4240
process_update_command(conn * c,token_t * tokens,const size_t ntokens,ENGINE_STORE_OPERATION store_op,bool handle_cas)4241 static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, ENGINE_STORE_OPERATION store_op, bool handle_cas) {
4242 char *key;
4243 size_t nkey;
4244 unsigned int flags;
4245 int32_t exptime_int = 0;
4246 time_t exptime;
4247 int vlen;
4248 uint64_t req_cas_id=0;
4249 item *it;
4250
4251 assert(c != NULL);
4252
4253 set_noreply_maybe(c, tokens, ntokens);
4254
4255 if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4256 out_string(c, "CLIENT_ERROR bad command line format");
4257 return;
4258 }
4259
4260 key = tokens[KEY_TOKEN].value;
4261 nkey = tokens[KEY_TOKEN].length;
4262
4263 if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags)
4264 && safe_strtol(tokens[3].value, &exptime_int)
4265 && safe_strtol(tokens[4].value, (int32_t *)&vlen))) {
4266 out_string(c, "CLIENT_ERROR bad command line format");
4267 return;
4268 }
4269
4270 /* Negative expire values not allowed */
4271
4272 if (exptime_int < 0) {
4273 out_string(c, "CLIENT_ERROR Invalid expire time");
4274 return;
4275 }
4276
4277 /* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */
4278 exptime = exptime_int;
4279
4280 // does cas value exist?
4281 if (handle_cas) {
4282 if (!safe_strtoull(tokens[5].value, &req_cas_id)) {
4283 out_string(c, "CLIENT_ERROR bad command line format");
4284 return;
4285 }
4286 }
4287
4288 if (vlen < 0) {
4289 out_string(c, "CLIENT_ERROR bad command line format");
4290 return;
4291 }
4292
4293 if (settings.detail_enabled) {
4294 stats_prefix_record_set(key, nkey);
4295 }
4296
4297 ENGINE_ERROR_CODE ret = c->aiostat;
4298 c->aiostat = ENGINE_SUCCESS;
4299 c->ewouldblock = false;
4300
4301 if (ret == ENGINE_SUCCESS) {
4302 ret = settings.engine.v1->allocate(settings.engine.v0, c,
4303 &it, key, nkey,
4304 vlen, htonl(flags), exptime);
4305 }
4306
4307 item_info info = { .nvalue = 1 };
4308 switch (ret) {
4309 case ENGINE_SUCCESS:
4310 item_set_cas(c, it, req_cas_id);
4311 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
4312 settings.engine.v1->release(settings.engine.v0, c, it);
4313 out_string(c, "SERVER_ERROR error getting item data");
4314 break;
4315 }
4316 c->item = it;
4317 c->ritem = info.value[0].iov_base;
4318 c->rlbytes = vlen;
4319 c->store_op = store_op;
4320 conn_set_state(c, conn_nread);
4321 break;
4322 case ENGINE_EWOULDBLOCK:
4323 c->ewouldblock = true;
4324 break;
4325 case ENGINE_DISCONNECT:
4326 c->state = conn_closing;
4327 break;
4328 default:
4329 if (ret == ENGINE_E2BIG) {
4330 out_string(c, "SERVER_ERROR object too large for cache");
4331 } else {
4332 out_string(c, "SERVER_ERROR out of memory storing object");
4333 }
4334 /* swallow the data line */
4335 c->write_and_go = conn_swallow;
4336 c->sbytes = vlen + 2;
4337
4338 /* Avoid stale data persisting in cache because we failed alloc.
4339 * Unacceptable for SET. Anywhere else too? */
4340 if (store_op == OPERATION_SET) {
4341 settings.engine.v1->remove(settings.engine.v0, c, key, nkey, 0, 0);
4342 }
4343 }
4344 }
4345
process_arithmetic_command(conn * c,token_t * tokens,const size_t ntokens,const bool incr)4346 static char* process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) {
4347
4348 uint64_t delta;
4349 char *key;
4350 size_t nkey;
4351
4352 assert(c != NULL);
4353
4354 set_noreply_maybe(c, tokens, ntokens);
4355
4356 if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4357 out_string(c, "CLIENT_ERROR bad command line format");
4358 return NULL;
4359 }
4360
4361 key = tokens[KEY_TOKEN].value;
4362 nkey = tokens[KEY_TOKEN].length;
4363
4364 if (!safe_strtoull(tokens[2].value, &delta)) {
4365 out_string(c, "CLIENT_ERROR invalid numeric delta argument");
4366 return NULL;
4367 }
4368
4369 ENGINE_ERROR_CODE ret = c->aiostat;
4370 c->aiostat = ENGINE_SUCCESS;
4371 uint64_t cas;
4372 uint64_t result;
4373 if (ret == ENGINE_SUCCESS) {
4374 ret = settings.engine.v1->arithmetic(settings.engine.v0, c, key, nkey,
4375 incr, false, delta, 0, 0, &cas,
4376 &result, 0);
4377 }
4378
4379 char temp[INCR_MAX_STORAGE_LEN];
4380 switch (ret) {
4381 case ENGINE_SUCCESS:
4382 if (incr) {
4383 STATS_INCR(c, incr_hits, key, nkey);
4384 } else {
4385 STATS_INCR(c, decr_hits, key, nkey);
4386 }
4387 snprintf(temp, sizeof(temp), "%"PRIu64, result);
4388 out_string(c, temp);
4389 break;
4390 case ENGINE_KEY_ENOENT:
4391 if (incr) {
4392 STATS_INCR(c, incr_misses, key, nkey);
4393 } else {
4394 STATS_INCR(c, decr_misses, key, nkey);
4395 }
4396 out_string(c, "NOT_FOUND");
4397 break;
4398 case ENGINE_ENOMEM:
4399 out_string(c, "SERVER_ERROR out of memory");
4400 break;
4401 case ENGINE_TMPFAIL:
4402 out_string(c, "SERVER_ERROR temporary failure");
4403 break;
4404 case ENGINE_EINVAL:
4405 out_string(c, "CLIENT_ERROR cannot increment or decrement non-numeric value");
4406 break;
4407 case ENGINE_NOT_STORED:
4408 out_string(c, "SERVER_ERROR failed to store item");
4409 break;
4410 case ENGINE_DISCONNECT:
4411 c->state = conn_closing;
4412 break;
4413 case ENGINE_ENOTSUP:
4414 out_string(c, "SERVER_ERROR not supported");
4415 break;
4416 case ENGINE_EWOULDBLOCK:
4417 c->ewouldblock = true;
4418 return key;
4419 default:
4420 abort();
4421 }
4422
4423 return NULL;
4424 }
4425
process_delete_command(conn * c,token_t * tokens,const size_t ntokens)4426 static char *process_delete_command(conn *c, token_t *tokens,
4427 const size_t ntokens) {
4428 char *key;
4429 size_t nkey;
4430
4431 assert(c != NULL);
4432
4433 if (ntokens > 3) {
4434 bool hold_is_zero = strcmp(tokens[KEY_TOKEN+1].value, "0") == 0;
4435 bool sets_noreply = set_noreply_maybe(c, tokens, ntokens);
4436 bool valid = (ntokens == 4 && (hold_is_zero || sets_noreply))
4437 || (ntokens == 5 && hold_is_zero && sets_noreply);
4438 if (!valid) {
4439 out_string(c, "CLIENT_ERROR bad command line format. "
4440 "Usage: delete <key> [noreply]");
4441 return NULL;
4442 }
4443 }
4444
4445 key = tokens[KEY_TOKEN].value;
4446 nkey = tokens[KEY_TOKEN].length;
4447
4448 if (nkey > KEY_MAX_LENGTH) {
4449 out_string(c, "CLIENT_ERROR bad command line format");
4450 return NULL;
4451 }
4452
4453 ENGINE_ERROR_CODE ret = c->aiostat;
4454 c->aiostat = ENGINE_SUCCESS;
4455 c->ewouldblock = false;
4456 if (ret == ENGINE_SUCCESS) {
4457 ret = settings.engine.v1->remove(settings.engine.v0, c,
4458 key, nkey, 0, 0);
4459 }
4460
4461 /* For some reason the SLAB_INCR tries to access this... */
4462 item_info info = { .nvalue = 1 };
4463 switch (ret) {
4464 case ENGINE_SUCCESS:
4465 out_string(c, "DELETED");
4466 SLAB_INCR(c, delete_hits, key, nkey);
4467 break;
4468 case ENGINE_EWOULDBLOCK:
4469 c->ewouldblock = true;
4470 return key;
4471 case ENGINE_TMPFAIL:
4472 out_string(c, "SERVER_ERROR temporary failure");
4473 break;
4474 default:
4475 out_string(c, "NOT_FOUND");
4476 STATS_INCR(c, delete_misses, key, nkey);
4477 }
4478
4479 if (ret != ENGINE_EWOULDBLOCK && settings.detail_enabled) {
4480 stats_prefix_record_delete(key, nkey);
4481 }
4482 return NULL;
4483 }
4484
process_bind_command(conn * c,token_t * tokens,const size_t ntokens)4485 static char *process_bind_command(conn *c, token_t *tokens,
4486 const size_t ntokens) {
4487 char *name;
4488 size_t name_len;
4489
4490 assert(c != NULL);
4491
4492 if (ntokens > 3) {
4493 out_string(c, "CLIENT_ERROR bad command line format. "
4494 "Usage: bind <table_id_name>");
4495 return NULL;
4496 }
4497
4498 name = tokens[KEY_TOKEN].value;
4499 name_len = tokens[KEY_TOKEN].length;
4500
4501 if (name_len > KEY_MAX_LENGTH || name_len == 0) {
4502 out_string(c, "CLIENT_ERROR bad command line format");
4503 return NULL;
4504 }
4505
4506 ENGINE_ERROR_CODE ret = c->aiostat;
4507 c->aiostat = ENGINE_SUCCESS;
4508 c->ewouldblock = false;
4509 if (ret == ENGINE_SUCCESS) {
4510 ret = settings.engine.v1->bind(settings.engine.v0, c,
4511 name, name_len);
4512 }
4513
4514 switch (ret) {
4515 case ENGINE_SUCCESS:
4516 out_string(c, "SUCCEED");
4517 break;
4518 case ENGINE_EWOULDBLOCK:
4519 c->ewouldblock = true;
4520 return name;
4521 case ENGINE_TMPFAIL:
4522 default:
4523 out_string(c, "NOT_FOUND");
4524 }
4525
4526 return NULL;
4527 }
4528
process_verbosity_command(conn * c,token_t * tokens,const size_t ntokens)4529 static void process_verbosity_command(conn *c, token_t *tokens, const size_t ntokens) {
4530 unsigned int level;
4531
4532 assert(c != NULL);
4533
4534 set_noreply_maybe(c, tokens, ntokens);
4535 if (c->noreply && ntokens == 3) {
4536 /* "verbosity noreply" is not according to the correct syntax */
4537 c->noreply = false;
4538 out_string(c, "ERROR");
4539 return;
4540 }
4541
4542 if (safe_strtoul(tokens[1].value, &level)) {
4543 settings.verbose = level > MAX_VERBOSITY_LEVEL ? MAX_VERBOSITY_LEVEL : level;
4544 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
4545 out_string(c, "OK");
4546 } else {
4547 out_string(c, "ERROR");
4548 }
4549 }
4550
process_command(conn * c,char * command)4551 static char* process_command(conn *c, char *command) {
4552
4553 token_t tokens[MAX_TOKENS];
4554 size_t ntokens;
4555 int comm;
4556 char *ret = NULL;
4557
4558 assert(c != NULL);
4559
4560 MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
4561
4562 if (settings.verbose > 1) {
4563 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4564 "<%d %s\n", c->sfd, command);
4565 }
4566
4567 /*
4568 * for commands set/add/replace, we build an item and read the data
4569 * directly into it, then continue in nread_complete().
4570 */
4571
4572 if (c->ewouldblock) {
4573 /*
4574 * If we are retrying after the engine has completed a pending io for
4575 * this command, skip add_msghdr() etc and clear the ewouldblock flag.
4576 */
4577 c->ewouldblock = false;
4578 } else {
4579 c->msgcurr = 0;
4580 c->msgused = 0;
4581 c->iovused = 0;
4582 if (add_msghdr(c) != 0) {
4583 out_string(c, "SERVER_ERROR out of memory preparing response");
4584 return NULL;
4585 }
4586 }
4587
4588 ntokens = tokenize_command(command, tokens, MAX_TOKENS);
4589 if (ntokens >= 3 &&
4590 ((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ||
4591 (strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) {
4592
4593 ret = process_get_command(c, tokens, ntokens, false);
4594
4595 } else if ((ntokens == 6 || ntokens == 7) &&
4596 ((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = (int)OPERATION_ADD)) ||
4597 (strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = (int)OPERATION_SET)) ||
4598 (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = (int)OPERATION_REPLACE)) ||
4599 (strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = (int)OPERATION_PREPEND)) ||
4600 (strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = (int)OPERATION_APPEND)) )) {
4601
4602 process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, false);
4603
4604 } else if ((ntokens == 7 || ntokens == 8) && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = (int)OPERATION_CAS))) {
4605
4606 process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, true);
4607
4608 } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "incr") == 0)) {
4609
4610 ret = process_arithmetic_command(c, tokens, ntokens, 1);
4611
4612 } else if (ntokens >= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "gets") == 0)) {
4613
4614 ret = process_get_command(c, tokens, ntokens, true);
4615
4616 } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "decr") == 0)) {
4617
4618 ret = process_arithmetic_command(c, tokens, ntokens, 0);
4619
4620 } else if (ntokens >= 3 && ntokens <= 5 && (strcmp(tokens[COMMAND_TOKEN].value, "delete") == 0)) {
4621
4622 ret = process_delete_command(c, tokens, ntokens);
4623
4624 } else if (ntokens == 3 && (strcmp(tokens[COMMAND_TOKEN].value, "bind") == 0)) {
4625
4626 ret = process_bind_command(c, tokens, ntokens);
4627
4628 } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) {
4629
4630 ret = process_stat(c, tokens, ntokens);
4631
4632 } else if (ntokens >= 2 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) {
4633 time_t exptime;
4634
4635 set_noreply_maybe(c, tokens, ntokens);
4636
4637 if (ntokens == (c->noreply ? 3 : 2)) {
4638 exptime = 0;
4639 } else {
4640 exptime = strtol(tokens[1].value, NULL, 10);
4641 if(errno == ERANGE) {
4642 out_string(c, "CLIENT_ERROR bad command line format");
4643 return NULL;
4644 }
4645 }
4646
4647 ENGINE_ERROR_CODE ret = c->aiostat;
4648 c->aiostat = ENGINE_SUCCESS;
4649 c->ewouldblock = false;
4650 if (ret == ENGINE_SUCCESS) {
4651 ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
4652 }
4653
4654 switch (ret) {
4655 case ENGINE_SUCCESS:
4656 out_string(c, "OK");
4657 break;
4658 case ENGINE_ENOTSUP:
4659 out_string(c, "SERVER_ERROR not supported");
4660 break;
4661 case ENGINE_EWOULDBLOCK:
4662 c->ewouldblock = true;
4663 return c->rcurr + 9;
4664 default:
4665 out_string(c, "SERVER_ERROR failed to flush cache");
4666 }
4667
4668 if (ret != ENGINE_EWOULDBLOCK) {
4669 STATS_NOKEY(c, cmd_flush);
4670 }
4671 return NULL;
4672
4673 } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) {
4674
4675 out_string(c, "VERSION " VERSION);
4676
4677 } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) {
4678
4679 conn_set_state(c, conn_closing);
4680
4681 } else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) {
4682 process_verbosity_command(c, tokens, ntokens);
4683 } else if (settings.extensions.ascii != NULL) {
4684 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *cmd;
4685 size_t nbytes = 0;
4686 char *ptr;
4687
4688 if (ntokens > 0) {
4689 if (ntokens == MAX_TOKENS) {
4690 out_string(c, "ERROR too many arguments");
4691 return NULL;
4692 }
4693
4694 if (tokens[ntokens - 1].length == 0) {
4695 --ntokens;
4696 }
4697 }
4698
4699 for (cmd = settings.extensions.ascii; cmd != NULL; cmd = cmd->next) {
4700 if (cmd->accept(cmd->cookie, c, ntokens, tokens, &nbytes, &ptr)) {
4701 break;
4702 }
4703 }
4704
4705 if (cmd == NULL) {
4706 out_string(c, "ERROR unknown command");
4707 } else if (nbytes == 0) {
4708 switch (cmd->execute(cmd->cookie, c, ntokens, tokens,
4709 ascii_response_handler)) {
4710 case ENGINE_SUCCESS:
4711 if (c->dynamic_buffer.buffer != NULL) {
4712 write_and_free(c, c->dynamic_buffer.buffer,
4713 c->dynamic_buffer.offset);
4714 c->dynamic_buffer.buffer = NULL;
4715 } else {
4716 conn_set_state(c, conn_new_cmd);
4717 }
4718 break;
4719 case ENGINE_EWOULDBLOCK:
4720 c->ewouldblock = true;
4721 ret = tokens[KEY_TOKEN].value;;
4722 break;
4723 case ENGINE_DISCONNECT:
4724 default:
4725 conn_set_state(c, conn_closing);
4726
4727 }
4728 } else {
4729 c->rlbytes = nbytes;
4730 c->ritem = ptr;
4731 c->ascii_cmd = cmd;
4732 /* NOT SUPPORTED YET! */
4733 conn_set_state(c, conn_nread);
4734 }
4735 } else {
4736 out_string(c, "ERROR");
4737 }
4738 return ret;
4739 }
4740
4741 /*
4742 * if we have a complete line in the buffer, process it.
4743 */
try_read_command(conn * c)4744 static int try_read_command(conn *c) {
4745 assert(c != NULL);
4746 assert(c->rcurr <= (c->rbuf + c->rsize));
4747 assert(c->rbytes > 0);
4748
4749 if (c->protocol == negotiating_prot || c->transport == udp_transport) {
4750 if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) {
4751 c->protocol = binary_prot;
4752 } else {
4753 c->protocol = ascii_prot;
4754 }
4755
4756 if (settings.verbose > 1) {
4757 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4758 "%d: Client using the %s protocol\n", c->sfd,
4759 prot_text(c->protocol));
4760 }
4761 }
4762
4763 if (c->protocol == binary_prot) {
4764 /* Do we have the complete packet header? */
4765 if (c->rbytes < sizeof(c->binary_header)) {
4766 /* need more data! */
4767 return 0;
4768 } else {
4769 #ifdef NEED_ALIGN
4770 if (((long)(c->rcurr)) % 8 != 0) {
4771 /* must realign input buffer */
4772 memmove(c->rbuf, c->rcurr, c->rbytes);
4773 c->rcurr = c->rbuf;
4774 if (settings.verbose > 1) {
4775 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4776 "%d: Realign input buffer\n", c->sfd);
4777 }
4778 }
4779 #endif
4780 protocol_binary_request_header* req;
4781 req = (protocol_binary_request_header*)c->rcurr;
4782
4783 if (settings.verbose > 1) {
4784 /* Dump the packet before we convert it to host order */
4785 char buffer[1024];
4786 ssize_t nw;
4787 nw = bytes_to_output_string(buffer, sizeof(buffer), c->sfd,
4788 true, "Read binary protocol data:",
4789 (const char*)req->bytes,
4790 sizeof(req->bytes));
4791 if (nw != -1) {
4792 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4793 "%s", buffer);
4794 }
4795 }
4796
4797 c->binary_header = *req;
4798 c->binary_header.request.keylen = ntohs(req->request.keylen);
4799 c->binary_header.request.bodylen = ntohl(req->request.bodylen);
4800 c->binary_header.request.vbucket = ntohs(req->request.vbucket);
4801 c->binary_header.request.cas = ntohll(req->request.cas);
4802
4803
4804 if (c->binary_header.request.magic != PROTOCOL_BINARY_REQ &&
4805 !(c->binary_header.request.magic == PROTOCOL_BINARY_RES &&
4806 response_handlers[c->binary_header.request.opcode])) {
4807 if (settings.verbose) {
4808 if (c->binary_header.request.magic != PROTOCOL_BINARY_RES) {
4809 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4810 "%d: Invalid magic: %x\n", c->sfd,
4811 c->binary_header.request.magic);
4812 } else {
4813 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4814 "%d: ERROR: Unsupported response packet received: %u\n",
4815 c->sfd, (unsigned int)c->binary_header.request.opcode);
4816
4817 }
4818 }
4819 conn_set_state(c, conn_closing);
4820 return -1;
4821 }
4822
4823 c->msgcurr = 0;
4824 c->msgused = 0;
4825 c->iovused = 0;
4826 if (add_msghdr(c) != 0) {
4827 out_string(c, "SERVER_ERROR out of memory");
4828 return 0;
4829 }
4830
4831 c->cmd = c->binary_header.request.opcode;
4832 c->keylen = c->binary_header.request.keylen;
4833 c->opaque = c->binary_header.request.opaque;
4834 /* clear the returned cas value */
4835 c->cas = 0;
4836
4837 dispatch_bin_command(c);
4838
4839 c->rbytes -= sizeof(c->binary_header);
4840 c->rcurr += sizeof(c->binary_header);
4841 }
4842 } else {
4843 char *el, *cont, *left, lb;
4844
4845 if (c->rbytes == 0) {
4846 return 0;
4847 }
4848
4849 el = memchr(c->rcurr, '\n', c->rbytes);
4850 if (!el) {
4851 if (c->rbytes > 1024) {
4852 /*
4853 * We didn't have a '\n' in the first k. This _has_ to be a
4854 * large multiget, if not we should just nuke the connection.
4855 */
4856 char *ptr = c->rcurr;
4857 while (*ptr == ' ') { /* ignore leading whitespaces */
4858 ++ptr;
4859 }
4860
4861 if (ptr - c->rcurr > 100 ||
4862 (strncmp(ptr, "get ", 4) && strncmp(ptr, "gets ", 5))) {
4863
4864 conn_set_state(c, conn_closing);
4865 return 1;
4866 }
4867 }
4868
4869 return 0;
4870 }
4871 cont = el + 1;
4872 if ((el - c->rcurr) > 1 && *(el - 1) == '\r') {
4873 el--;
4874 }
4875 lb = *el;
4876 *el = '\0';
4877
4878 assert(cont <= (c->rcurr + c->rbytes));
4879
4880 LIBEVENT_THREAD *thread = c->thread;
4881 LOCK_THREAD(thread);
4882 left = process_command(c, c->rcurr);
4883 if (c->ewouldblock) {
4884 unregister_event(c);
4885 }
4886 UNLOCK_THREAD(thread);
4887
4888 if (left != NULL) {
4889 /*
4890 * We have not processed the entire command. This happens
4891 * when the engine returns ENGINE_EWOULDBLOCK for one of the
4892 * keys in a get/gets request.
4893 */
4894 assert (left <= el);
4895
4896 int count = strlen(c->rcurr);
4897 if ((c->rcurr + count) == left) {
4898 // Retry the entire command
4899 cont = c->rcurr;
4900 } else {
4901 left -= (count + 1);
4902 cont = left;
4903 assert(cont >= c->rcurr);
4904 if (cont > c->rcurr) {
4905 memmove(cont, c->rcurr, count);
4906 }
4907 }
4908
4909 /* de-tokenize the command */
4910 while ((left = memchr(left, '\0', el - left)) != NULL) {
4911 *left = ' ';
4912 }
4913 *el = lb;
4914 }
4915
4916 c->rbytes -= (cont - c->rcurr);
4917 c->rcurr = cont;
4918
4919 assert(c->rcurr <= (c->rbuf + c->rsize));
4920 }
4921
4922 return 1;
4923 }
4924
4925 /*
4926 * read a UDP request.
4927 */
try_read_udp(conn * c)4928 static enum try_read_result try_read_udp(conn *c) {
4929 int res;
4930
4931 assert(c != NULL);
4932
4933 c->request_addr_size = sizeof(c->request_addr);
4934 res = recvfrom(c->sfd, c->rbuf, c->rsize,
4935 0, (struct sockaddr *)&c->request_addr, &c->request_addr_size);
4936 if (res > 8) {
4937 unsigned char *buf = (unsigned char *)c->rbuf;
4938 STATS_ADD(c, bytes_read, res);
4939
4940 /* Beginning of UDP packet is the request ID; save it. */
4941 c->request_id = buf[0] * 256 + buf[1];
4942
4943 /* If this is a multi-packet request, drop it. */
4944 if (buf[4] != 0 || buf[5] != 1) {
4945 out_string(c, "SERVER_ERROR multi-packet request not supported");
4946 return READ_NO_DATA_RECEIVED;
4947 }
4948
4949 /* Don't care about any of the rest of the header. */
4950 res -= 8;
4951 memmove(c->rbuf, c->rbuf + 8, res);
4952
4953 c->rbytes += res;
4954 c->rcurr = c->rbuf;
4955 return READ_DATA_RECEIVED;
4956 }
4957 return READ_NO_DATA_RECEIVED;
4958 }
4959
4960 /*
4961 * read from network as much as we can, handle buffer overflow and connection
4962 * close.
4963 * before reading, move the remaining incomplete fragment of a command
4964 * (if any) to the beginning of the buffer.
4965 *
4966 * To protect us from someone flooding a connection with bogus data causing
4967 * the connection to eat up all available memory, break out and start looking
4968 * at the data I've got after a number of reallocs...
4969 *
4970 * @return enum try_read_result
4971 */
try_read_network(conn * c)4972 static enum try_read_result try_read_network(conn *c) {
4973 enum try_read_result gotdata = READ_NO_DATA_RECEIVED;
4974 int res;
4975 int num_allocs = 0;
4976 assert(c != NULL);
4977
4978 if (c->rcurr != c->rbuf) {
4979 if (c->rbytes != 0) /* otherwise there's nothing to copy */
4980 memmove(c->rbuf, c->rcurr, c->rbytes);
4981 c->rcurr = c->rbuf;
4982 }
4983
4984 while (1) {
4985 if (c->rbytes >= c->rsize) {
4986 if (num_allocs == 4) {
4987 return gotdata;
4988 }
4989 ++num_allocs;
4990 char *new_rbuf = realloc(c->rbuf, c->rsize * 2);
4991 if (!new_rbuf) {
4992 if (settings.verbose > 0) {
4993 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4994 "Couldn't realloc input buffer\n");
4995 }
4996 c->rbytes = 0; /* ignore what we read */
4997 out_string(c, "SERVER_ERROR out of memory reading request");
4998 c->write_and_go = conn_closing;
4999 return READ_MEMORY_ERROR;
5000 }
5001 c->rcurr = c->rbuf = new_rbuf;
5002 c->rsize *= 2;
5003 }
5004
5005 int avail = c->rsize - c->rbytes;
5006 res = recv(c->sfd, c->rbuf + c->rbytes, avail, 0);
5007 if (res > 0) {
5008 STATS_ADD(c, bytes_read, res);
5009 gotdata = READ_DATA_RECEIVED;
5010 c->rbytes += res;
5011 if (res == avail) {
5012 continue;
5013 } else {
5014 break;
5015 }
5016 }
5017 if (res == 0) {
5018 return READ_ERROR;
5019 }
5020 if (res == -1) {
5021 if (errno == EAGAIN || errno == EWOULDBLOCK) {
5022 break;
5023 }
5024 return READ_ERROR;
5025 }
5026 }
5027 return gotdata;
5028 }
5029
register_event(conn * c,struct timeval * timeout)5030 bool register_event(conn *c, struct timeval *timeout) {
5031 #ifdef DEBUG
5032 assert(!c->registered_in_libevent);
5033 #endif
5034
5035 if (event_add(&c->event, timeout) == -1) {
5036 settings.extensions.logger->log(EXTENSION_LOG_WARNING,
5037 NULL,
5038 "Failed to add connection to libevent: %s",
5039 strerror(errno));
5040 return false;
5041 }
5042
5043 #ifdef DEBUG
5044 c->registered_in_libevent = true;
5045 #endif
5046
5047 return true;
5048 }
5049
unregister_event(conn * c)5050 bool unregister_event(conn *c) {
5051 #ifdef DEBUG
5052 assert(c->registered_in_libevent);
5053 #endif
5054
5055 if (event_del(&c->event) == -1) {
5056 return false;
5057 }
5058
5059 #ifdef DEBUG
5060 c->registered_in_libevent = false;
5061 #endif
5062
5063 return true;
5064 }
5065
5066
update_event(conn * c,const int new_flags)5067 bool update_event(conn *c, const int new_flags) {
5068 assert(c != NULL);
5069
5070 struct event_base *base = c->event.ev_base;
5071 if (c->ev_flags == new_flags)
5072 return true;
5073
5074 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5075 "Updated event for %d to read=%s, write=%s\n",
5076 c->sfd, (new_flags & EV_READ ? "yes" : "no"),
5077 (new_flags & EV_WRITE ? "yes" : "no"));
5078
5079 if (!unregister_event(c)) {
5080 return false;
5081 }
5082
5083 event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c);
5084 event_base_set(base, &c->event);
5085 c->ev_flags = new_flags;
5086
5087 return register_event(c, NULL);
5088 }
5089
5090 /*
5091 * Transmit the next chunk of data from our list of msgbuf structures.
5092 *
5093 * Returns:
5094 * TRANSMIT_COMPLETE All done writing.
5095 * TRANSMIT_INCOMPLETE More data remaining to write.
5096 * TRANSMIT_SOFT_ERROR Can't write any more right now.
5097 * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
5098 */
transmit(conn * c)5099 static enum transmit_result transmit(conn *c) {
5100 assert(c != NULL);
5101
5102 if (c->msgcurr < c->msgused &&
5103 c->msglist[c->msgcurr].msg_iovlen == 0) {
5104 /* Finished writing the current msg; advance to the next. */
5105 c->msgcurr++;
5106 }
5107 if (c->msgcurr < c->msgused) {
5108 ssize_t res;
5109 struct msghdr *m = &c->msglist[c->msgcurr];
5110
5111 res = sendmsg(c->sfd, m, 0);
5112 if (res > 0) {
5113 STATS_ADD(c, bytes_written, res);
5114
5115 /* We've written some of the data. Remove the completed
5116 iovec entries from the list of pending writes. */
5117 while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) {
5118 res -= m->msg_iov->iov_len;
5119 m->msg_iovlen--;
5120 m->msg_iov++;
5121 }
5122
5123 /* Might have written just part of the last iovec entry;
5124 adjust it so the next write will do the rest. */
5125 if (res > 0) {
5126 m->msg_iov->iov_base = (caddr_t)m->msg_iov->iov_base + res;
5127 m->msg_iov->iov_len -= res;
5128 }
5129 return TRANSMIT_INCOMPLETE;
5130 }
5131 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5132 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5133 if (settings.verbose > 0) {
5134 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5135 "Couldn't update event\n");
5136 }
5137 conn_set_state(c, conn_closing);
5138 return TRANSMIT_HARD_ERROR;
5139 }
5140 return TRANSMIT_SOFT_ERROR;
5141 }
5142 /* if res == 0 or res == -1 and error is not EAGAIN or EWOULDBLOCK,
5143 we have a real error, on which we close the connection */
5144 if (settings.verbose > 0) {
5145 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5146 "Failed to write, and not due to blocking: %s",
5147 strerror(errno));
5148 }
5149
5150 if (IS_UDP(c->transport))
5151 conn_set_state(c, conn_read);
5152 else
5153 conn_set_state(c, conn_closing);
5154 return TRANSMIT_HARD_ERROR;
5155 } else {
5156 return TRANSMIT_COMPLETE;
5157 }
5158 }
5159
conn_listening(conn * c)5160 bool conn_listening(conn *c)
5161 {
5162 int sfd;
5163 struct sockaddr_storage addr;
5164 socklen_t addrlen = sizeof(addr);
5165
5166 if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) {
5167 if (errno == EMFILE) {
5168 if (settings.verbose > 0) {
5169 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5170 "Too many open connections\n");
5171 }
5172 disable_listen();
5173 } else if (errno != EAGAIN && errno != EWOULDBLOCK) {
5174 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5175 "Failed to accept new client: %s\n",
5176 strerror(errno));
5177 }
5178
5179 return false;
5180 }
5181
5182 STATS_LOCK();
5183 int curr_conns = ++stats.curr_conns;
5184 STATS_UNLOCK();
5185
5186 if (curr_conns >= settings.maxconns) {
5187 STATS_LOCK();
5188 ++stats.rejected_conns;
5189 STATS_UNLOCK();
5190
5191 if (settings.verbose > 0) {
5192 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5193 "Too many open connections\n");
5194 }
5195
5196 safe_close(sfd);
5197 return false;
5198 }
5199
5200 if (evutil_make_socket_nonblocking(sfd) == -1) {
5201 safe_close(sfd);
5202 return false;
5203 }
5204
5205 dispatch_conn_new(sfd, conn_new_cmd, EV_READ | EV_PERSIST,
5206 DATA_BUFFER_SIZE, tcp_transport);
5207
5208 return false;
5209 }
5210
5211 /**
5212 * Ship tap log to the other end. This state differs with all other states
5213 * in the way that it support full duplex dialog. We're listening to both read
5214 * and write events from libevent most of the time. If a read event occurs we
5215 * switch to the conn_read state to read and execute the input message (that would
5216 * be an ack message from the other side). If a write event occurs we continue to
5217 * send tap log to the other end.
5218 * @param c the tap connection to drive
5219 * @return true if we should continue to process work for this connection, false
5220 * if we should start processing events for other connections.
5221 */
conn_ship_log(conn * c)5222 bool conn_ship_log(conn *c) {
5223 bool cont = false;
5224
5225 if (c->sfd == INVALID_SOCKET) {
5226 return false;
5227 }
5228
5229 short mask = EV_READ | EV_PERSIST | EV_WRITE;
5230
5231 if (c->which & EV_READ || c->rbytes > 0) {
5232 if (c->rbytes > 0) {
5233 if (try_read_command(c) == 0) {
5234 conn_set_state(c, conn_read);
5235 }
5236 } else {
5237 conn_set_state(c, conn_read);
5238 }
5239
5240 // we're going to process something.. let's proceed
5241 cont = true;
5242
5243 // We have a finite number of messages in the input queue
5244 // so let's process all of them instead of backing off after
5245 // reading a subset of them.
5246 // Why? Because we've got every time we're calling ship_tap_log
5247 // we try to send a chunk of items.. This means that if we end
5248 // up in a situation where we're receiving a burst of nack messages
5249 // we'll only process a subset of messages in our input queue,
5250 // and it will slowly grow..
5251 c->nevents = settings.reqs_per_tap_event;
5252 } else if (c->which & EV_WRITE) {
5253 --c->nevents;
5254 if (c->nevents >= 0) {
5255 LOCK_THREAD(c->thread);
5256 c->ewouldblock = false;
5257 ship_tap_log(c);
5258 if (c->ewouldblock) {
5259 mask = EV_READ | EV_PERSIST;
5260 } else {
5261 cont = true;
5262 }
5263 UNLOCK_THREAD(c->thread);
5264 }
5265 }
5266
5267 if (!update_event(c, mask)) {
5268 if (settings.verbose > 0) {
5269 settings.extensions.logger->log(EXTENSION_LOG_INFO,
5270 c, "Couldn't update event\n");
5271 }
5272 conn_set_state(c, conn_closing);
5273 }
5274
5275 return cont;
5276 }
5277
conn_waiting(conn * c)5278 bool conn_waiting(conn *c) {
5279 if (!update_event(c, EV_READ | EV_PERSIST)) {
5280 if (settings.verbose > 0) {
5281 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5282 "Couldn't update event\n");
5283 }
5284 conn_set_state(c, conn_closing);
5285 return true;
5286 }
5287 conn_set_state(c, conn_read);
5288 return false;
5289 }
5290
conn_read(conn * c)5291 bool conn_read(conn *c) {
5292 int res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c);
5293 switch (res) {
5294 case READ_NO_DATA_RECEIVED:
5295 conn_set_state(c, conn_waiting);
5296 break;
5297 case READ_DATA_RECEIVED:
5298 conn_set_state(c, conn_parse_cmd);
5299 break;
5300 case READ_ERROR:
5301 conn_set_state(c, conn_closing);
5302 break;
5303 case READ_MEMORY_ERROR: /* Failed to allocate more memory */
5304 /* State already set by try_read_network */
5305 break;
5306 }
5307
5308 return true;
5309 }
5310
conn_parse_cmd(conn * c)5311 bool conn_parse_cmd(conn *c) {
5312 if (try_read_command(c) == 0) {
5313 /* wee need more data! */
5314 conn_set_state(c, conn_waiting);
5315 }
5316
5317 return !c->ewouldblock;
5318 }
5319
conn_new_cmd(conn * c)5320 bool conn_new_cmd(conn *c) {
5321 /* Only process nreqs at a time to avoid starving other connections */
5322 --c->nevents;
5323 if (c->nevents >= 0) {
5324 reset_cmd_handler(c);
5325 } else {
5326 STATS_NOKEY(c, conn_yields);
5327 if (c->rbytes > 0) {
5328 /* We have already read in data into the input buffer,
5329 so libevent will most likely not signal read events
5330 on the socket (unless more data is available. As a
5331 hack we should just put in a request to write data,
5332 because that should be possible ;-)
5333 */
5334 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5335 if (settings.verbose > 0) {
5336 settings.extensions.logger->log(EXTENSION_LOG_INFO,
5337 c, "Couldn't update event\n");
5338 }
5339 conn_set_state(c, conn_closing);
5340 return true;
5341 }
5342 }
5343 return false;
5344 }
5345
5346 return true;
5347 }
5348
5349
conn_swallow(conn * c)5350 bool conn_swallow(conn *c) {
5351 ssize_t res;
5352 /* we are reading sbytes and throwing them away */
5353 if (c->sbytes == 0) {
5354 conn_set_state(c, conn_new_cmd);
5355 return true;
5356 }
5357
5358 /* first check if we have leftovers in the conn_read buffer */
5359 if (c->rbytes > 0) {
5360 uint32_t tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
5361 c->sbytes -= tocopy;
5362 c->rcurr += tocopy;
5363 c->rbytes -= tocopy;
5364 return true;
5365 }
5366
5367 /* now try reading from the socket */
5368 res = recv(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize, 0);
5369 if (res > 0) {
5370 STATS_ADD(c, bytes_read, res);
5371 c->sbytes -= res;
5372 return true;
5373 }
5374 if (res == 0) { /* end of stream */
5375 conn_set_state(c, conn_closing);
5376 return true;
5377 }
5378 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5379 if (!update_event(c, EV_READ | EV_PERSIST)) {
5380 if (settings.verbose > 0) {
5381 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5382 "Couldn't update event\n");
5383 }
5384 conn_set_state(c, conn_closing);
5385 return true;
5386 }
5387 return false;
5388 }
5389
5390 if (errno != ENOTCONN && errno != ECONNRESET) {
5391 /* otherwise we have a real error, on which we close the connection */
5392 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5393 "Failed to read, and not due to blocking (%s)\n",
5394 strerror(errno));
5395 }
5396
5397 conn_set_state(c, conn_closing);
5398
5399 return true;
5400
5401 }
5402
conn_nread(conn * c)5403 bool conn_nread(conn *c) {
5404 ssize_t res;
5405
5406 if (c->rlbytes == 0) {
5407 LIBEVENT_THREAD *t = c->thread;
5408 LOCK_THREAD(t);
5409 bool block = c->ewouldblock = false;
5410 complete_nread(c);
5411 UNLOCK_THREAD(t);
5412 /* Breaking this into two, as complete_nread may have
5413 moved us to a different thread */
5414 t = c->thread;
5415 LOCK_THREAD(t);
5416 if (c->ewouldblock) {
5417 unregister_event(c);
5418 block = true;
5419 }
5420 UNLOCK_THREAD(t);
5421 return !block;
5422 }
5423 /* first check if we have leftovers in the conn_read buffer */
5424 if (c->rbytes > 0) {
5425 uint32_t tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
5426 if (c->ritem != c->rcurr) {
5427 memmove(c->ritem, c->rcurr, tocopy);
5428 }
5429 c->ritem += tocopy;
5430 c->rlbytes -= tocopy;
5431 c->rcurr += tocopy;
5432 c->rbytes -= tocopy;
5433 if (c->rlbytes == 0) {
5434 return true;
5435 }
5436 }
5437
5438 /* now try reading from the socket */
5439 res = recv(c->sfd, c->ritem, c->rlbytes, 0);
5440 if (res > 0) {
5441 STATS_ADD(c, bytes_read, res);
5442 if (c->rcurr == c->ritem) {
5443 c->rcurr += res;
5444 }
5445 c->ritem += res;
5446 c->rlbytes -= res;
5447 return true;
5448 }
5449 if (res == 0) { /* end of stream */
5450 conn_set_state(c, conn_closing);
5451 return true;
5452 }
5453
5454 #ifdef INNODB_MEMCACHED
5455 /* MEMCACHED_RESOLVE: on solaris platform, when connect through
5456 telnet and waiting for input from an "add" or "set" command,
5457 it could have res == -1 and errno == 0. Thus causing early termination
5458 Add "!errno" condition here to deal with this scenario for now */
5459 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK || !errno)) {
5460 #else
5461 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5462 #endif /* INNODB_MEMCACHED */
5463 if (!update_event(c, EV_READ | EV_PERSIST)) {
5464 if (settings.verbose > 0) {
5465 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5466 "Couldn't update event\n");
5467 }
5468 conn_set_state(c, conn_closing);
5469 return true;
5470 }
5471 return false;
5472 }
5473
5474 if (errno != ENOTCONN && errno != ECONNRESET) {
5475 /* otherwise we have a real error, on which we close the connection */
5476 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5477 "Failed to read, and not due to blocking:\n"
5478 "errno: %d %s \n"
5479 "rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n",
5480 errno, strerror(errno),
5481 (long)c->rcurr, (long)c->ritem, (long)c->rbuf,
5482 (int)c->rlbytes, (int)c->rsize);
5483 }
5484 conn_set_state(c, conn_closing);
5485 return true;
5486 }
5487
5488 bool conn_write(conn *c) {
5489 /*
5490 * We want to write out a simple response. If we haven't already,
5491 * assemble it into a msgbuf list (this will be a single-entry
5492 * list for TCP or a two-entry list for UDP).
5493 */
5494 if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) {
5495 if (add_iov(c, c->wcurr, c->wbytes) != 0) {
5496 if (settings.verbose > 0) {
5497 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5498 "Couldn't build response\n");
5499 }
5500 conn_set_state(c, conn_closing);
5501 return true;
5502 }
5503 }
5504
5505 return conn_mwrite(c);
5506 }
5507
5508 bool conn_mwrite(conn *c) {
5509 if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) {
5510 if (settings.verbose > 0) {
5511 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5512 "Failed to build UDP headers\n");
5513 }
5514 conn_set_state(c, conn_closing);
5515 return true;
5516 }
5517
5518 switch (transmit(c)) {
5519 case TRANSMIT_COMPLETE:
5520 if (c->state == conn_mwrite) {
5521 while (c->ileft > 0) {
5522 item *it = *(c->icurr);
5523 settings.engine.v1->release(settings.engine.v0, c, it);
5524 c->icurr++;
5525 c->ileft--;
5526 }
5527 while (c->suffixleft > 0) {
5528 char *suffix = *(c->suffixcurr);
5529 cache_free(c->thread->suffix_cache, suffix);
5530 c->suffixcurr++;
5531 c->suffixleft--;
5532 }
5533 /* XXX: I don't know why this wasn't the general case */
5534 if(c->protocol == binary_prot) {
5535 conn_set_state(c, c->write_and_go);
5536 } else {
5537 conn_set_state(c, conn_new_cmd);
5538 }
5539 } else if (c->state == conn_write) {
5540 if (c->write_and_free) {
5541 free(c->write_and_free);
5542 c->write_and_free = 0;
5543 }
5544 conn_set_state(c, c->write_and_go);
5545 } else {
5546 if (settings.verbose > 0) {
5547 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5548 "Unexpected state %d\n", c->state);
5549 }
5550 conn_set_state(c, conn_closing);
5551 }
5552 break;
5553
5554 case TRANSMIT_INCOMPLETE:
5555 case TRANSMIT_HARD_ERROR:
5556 break; /* Continue in state machine. */
5557
5558 case TRANSMIT_SOFT_ERROR:
5559 return false;
5560 }
5561
5562 return true;
5563 }
5564
5565 bool conn_pending_close(conn *c) {
5566 assert(c->sfd == INVALID_SOCKET);
5567 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5568 "Awaiting clients to release the cookie (pending close for %p)",
5569 (void*)c);
5570 LOCK_THREAD(c->thread);
5571 c->thread->pending_io = list_remove(c->thread->pending_io, c);
5572 if (!list_contains(c->thread->pending_close, c)) {
5573 enlist_conn(c, &c->thread->pending_close);
5574 }
5575 UNLOCK_THREAD(c->thread);
5576
5577 /*
5578 * tell the tap connection that we're disconnecting it now,
5579 * but give it a grace period
5580 */
5581 perform_callbacks(ON_DISCONNECT, NULL, c);
5582
5583 /*
5584 * disconnect callback may have changed the state for the object
5585 * so we might complete the disconnect now
5586 */
5587 return c->state != conn_pending_close;
5588 }
5589
5590 bool conn_immediate_close(conn *c) {
5591 settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
5592 "Immediate close of %p",
5593 (void*)c);
5594 perform_callbacks(ON_DISCONNECT, NULL, c);
5595 conn_close(c);
5596
5597 return false;
5598 }
5599
5600 bool conn_closing(conn *c) {
5601 if (IS_UDP(c->transport)) {
5602 conn_cleanup(c);
5603 return false;
5604 }
5605
5606 // We don't want any network notifications anymore..
5607 unregister_event(c);
5608 safe_close(c->sfd);
5609 c->sfd = INVALID_SOCKET;
5610
5611 if (c->refcount > 1) {
5612 conn_set_state(c, conn_pending_close);
5613 } else {
5614 conn_set_state(c, conn_immediate_close);
5615 }
5616 return true;
5617 }
5618
5619 bool conn_add_tap_client(conn *c) {
5620 LIBEVENT_THREAD *tp = tap_thread;
5621 LIBEVENT_THREAD *orig_thread = c->thread;
5622
5623 assert(orig_thread);
5624 assert(orig_thread != tp);
5625
5626 c->ewouldblock = true;
5627
5628 unregister_event(c);
5629
5630 LOCK_THREAD(orig_thread);
5631 /* Clean out the lists */
5632 orig_thread->pending_io = list_remove(orig_thread->pending_io, c);
5633 orig_thread->pending_close = list_remove(orig_thread->pending_close, c);
5634
5635 LOCK_THREAD(tp);
5636 c->ev_flags = 0;
5637 conn_set_state(c, conn_setup_tap_stream);
5638 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5639 "Moving %d conn from %p to %p\n",
5640 c->sfd, c->thread, tp);
5641 c->thread = tp;
5642 c->event.ev_base = tp->base;
5643 assert(c->next == NULL);
5644 assert(c->list_state == 0);
5645 enlist_conn(c, &tp->pending_io);
5646
5647 UNLOCK_THREAD(tp);
5648
5649 UNLOCK_THREAD(orig_thread);
5650
5651 notify_thread(tp);
5652
5653 return false;
5654 }
5655
5656 bool conn_setup_tap_stream(conn *c) {
5657 process_bin_tap_connect(c);
5658 return true;
5659 }
5660
5661 void event_handler(const int fd, const short which, void *arg) {
5662 conn *c;
5663
5664 c = (conn *)arg;
5665 assert(c != NULL);
5666
5667 if (memcached_shutdown) {
5668 event_base_loopbreak(c->event.ev_base);
5669 return ;
5670 }
5671
5672 c->which = which;
5673
5674 /* sanity */
5675 if (fd != c->sfd) {
5676 if (settings.verbose > 0) {
5677 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5678 "Catastrophic: event fd doesn't match conn fd!\n");
5679 }
5680 conn_close(c);
5681 return;
5682 }
5683
5684 perform_callbacks(ON_SWITCH_CONN, c, c);
5685
5686 c->nevents = settings.reqs_per_event;
5687 if (c->state == conn_ship_log) {
5688 c->nevents = settings.reqs_per_tap_event;
5689 }
5690
5691 LIBEVENT_THREAD *thr = c->thread;
5692
5693 // Do we have pending closes?
5694 const size_t max_items = 256;
5695 conn *pending_close[max_items];
5696 size_t n_pending_close = 0;
5697 if (thr != NULL) {
5698 LOCK_THREAD(thr);
5699 if (thr->pending_close && thr->last_checked != current_time) {
5700 assert(!has_cycle(thr->pending_close));
5701 thr->last_checked = current_time;
5702
5703 n_pending_close = list_to_array(pending_close, max_items,
5704 &thr->pending_close);
5705 }
5706 UNLOCK_THREAD(thr);
5707 }
5708
5709 if (settings.verbose) {
5710 do {
5711 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5712 "%d - Running task: (%s)\n",
5713 c->sfd, state_text(c->state));
5714 } while (c->state(c));
5715 } else {
5716 while (c->state(c)) {
5717 /* empty */
5718 }
5719 }
5720
5721 /* Close any connections pending close */
5722 if (n_pending_close > 0) {
5723 for (size_t i = 0; i < n_pending_close; ++i) {
5724 conn *ce = pending_close[i];
5725 if (ce->refcount == 1) {
5726 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5727 "OK, time to nuke: %p\n",
5728 (void*)ce);
5729 conn_close(ce);
5730 } else {
5731 LOCK_THREAD(ce->thread);
5732 enlist_conn(ce, &ce->thread->pending_close);
5733 UNLOCK_THREAD(ce->thread);
5734 }
5735 }
5736 }
5737
5738 if (thr != NULL) {
5739 LOCK_THREAD(thr);
5740 finalize_list(pending_close, n_pending_close);
5741 UNLOCK_THREAD(thr);
5742 }
5743 }
5744
5745 static void dispatch_event_handler(int fd, short which, void *arg) {
5746 char buffer[80];
5747 ssize_t nr = recv(fd, buffer, sizeof(buffer), 0);
5748
5749 if (nr != -1 && is_listen_disabled()) {
5750 bool enable = false;
5751 pthread_mutex_lock(&listen_state.mutex);
5752 listen_state.count -= nr;
5753 if (listen_state.count <= 0) {
5754 enable = true;
5755 listen_state.disabled = false;
5756 }
5757 pthread_mutex_unlock(&listen_state.mutex);
5758 if (enable) {
5759 conn *next;
5760 for (next = listen_conn; next; next = next->next) {
5761 update_event(next, EV_READ | EV_PERSIST);
5762 if (listen(next->sfd, settings.backlog) != 0) {
5763 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5764 "listen() failed",
5765 strerror(errno));
5766 }
5767 }
5768 }
5769 }
5770 }
5771
5772
5773
5774 static SOCKET new_socket(struct addrinfo *ai) {
5775 SOCKET sfd;
5776
5777 sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
5778 if (sfd == INVALID_SOCKET) {
5779 return INVALID_SOCKET;
5780 }
5781
5782 if (evutil_make_socket_nonblocking(sfd) == -1) {
5783 safe_close(sfd);
5784 return INVALID_SOCKET;
5785 }
5786
5787 return sfd;
5788 }
5789
5790
5791 /*
5792 * Sets a socket's send buffer size to the maximum allowed by the system.
5793 */
5794 static void maximize_sndbuf(const int sfd) {
5795 socklen_t intsize = sizeof(int);
5796 int last_good = 0;
5797 int min, max, avg;
5798 int old_size;
5799
5800 /* Start with the default size. */
5801 if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&old_size, &intsize) != 0) {
5802 if (settings.verbose > 0) {
5803 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5804 "getsockopt(SO_SNDBUF): %s",
5805 strerror(errno));
5806 }
5807
5808 return;
5809 }
5810
5811 /* Binary-search for the real maximum. */
5812 min = old_size;
5813 max = MAX_SENDBUF_SIZE;
5814
5815 while (min <= max) {
5816 avg = ((unsigned int)(min + max)) / 2;
5817 if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) {
5818 last_good = avg;
5819 min = avg + 1;
5820 } else {
5821 max = avg - 1;
5822 }
5823 }
5824
5825 if (settings.verbose > 1) {
5826 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5827 "<%d send buffer was %d, now %d\n", sfd, old_size, last_good);
5828 }
5829 }
5830
5831
5832
5833 /**
5834 * Create a socket and bind it to a specific port number
5835 * @param interface the interface to bind to
5836 * @param port the port number to bind to
5837 * @param transport the transport protocol (TCP / UDP)
5838 * @param portnumber_file A filepointer to write the port numbers to
5839 * when they are successfully added to the list of ports we
5840 * listen on.
5841 */
5842 static int server_socket(const char *interface,
5843 int port,
5844 enum network_transport transport,
5845 FILE *portnumber_file) {
5846 int sfd;
5847 struct linger ling = {0, 0};
5848 struct addrinfo *ai;
5849 struct addrinfo *next;
5850 struct addrinfo hints = { .ai_flags = AI_PASSIVE,
5851 .ai_family = AF_UNSPEC };
5852 char port_buf[NI_MAXSERV];
5853 int error;
5854 int success = 0;
5855 int flags =1;
5856 num_udp_socket = 0;
5857
5858 hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM;
5859
5860 if (port == -1) {
5861 port = 0;
5862 }
5863 snprintf(port_buf, sizeof(port_buf), "%d", port);
5864 error= getaddrinfo(interface, port_buf, &hints, &ai);
5865 if (error != 0) {
5866 if (error != EAI_SYSTEM) {
5867 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5868 "getaddrinfo(): %s\n", gai_strerror(error));
5869 } else {
5870 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5871 "getaddrinfo(): %s\n", strerror(error));
5872 }
5873 return 1;
5874 }
5875
5876 for (next= ai; next; next= next->ai_next) {
5877 conn *listen_conn_add;
5878 if ((sfd = new_socket(next)) == INVALID_SOCKET) {
5879 /* getaddrinfo can return "junk" addresses,
5880 * we make sure at least one works before erroring.
5881 */
5882 continue;
5883 }
5884
5885 #ifdef IPV6_V6ONLY
5886 if (next->ai_family == AF_INET6) {
5887 error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags));
5888 if (error != 0) {
5889 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5890 "setsockopt(IPV6_V6ONLY): %s",
5891 strerror(errno));
5892 safe_close(sfd);
5893 continue;
5894 }
5895 }
5896 #endif
5897
5898 setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
5899 if (IS_UDP(transport)) {
5900 maximize_sndbuf(sfd);
5901 udp_socket[num_udp_socket] = sfd;
5902 num_udp_socket++;
5903 } else {
5904 error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
5905 if (error != 0) {
5906 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5907 "setsockopt(SO_KEEPALIVE): %s",
5908 strerror(errno));
5909 }
5910
5911 error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
5912 if (error != 0) {
5913 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5914 "setsockopt(SO_LINGER): %s",
5915 strerror(errno));
5916 }
5917
5918 error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));
5919 if (error != 0) {
5920 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5921 "setsockopt(TCP_NODELAY): %s",
5922 strerror(errno));
5923 }
5924 }
5925
5926 if (bind(sfd, next->ai_addr, next->ai_addrlen) == SOCKET_ERROR) {
5927 if (errno != EADDRINUSE) {
5928 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5929 "bind(): %s",
5930 strerror(errno));
5931 safe_close(sfd);
5932 freeaddrinfo(ai);
5933 return 1;
5934 }
5935 safe_close(sfd);
5936 continue;
5937 } else {
5938 success++;
5939 if (!IS_UDP(transport) && listen(sfd, settings.backlog) == SOCKET_ERROR) {
5940 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5941 "listen(): %s",
5942 strerror(errno));
5943 safe_close(sfd);
5944 freeaddrinfo(ai);
5945 return 1;
5946 }
5947 if (portnumber_file != NULL &&
5948 (next->ai_addr->sa_family == AF_INET ||
5949 next->ai_addr->sa_family == AF_INET6)) {
5950 union {
5951 struct sockaddr_in in;
5952 struct sockaddr_in6 in6;
5953 } my_sockaddr;
5954 socklen_t len = sizeof(my_sockaddr);
5955 if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==0) {
5956 if (next->ai_addr->sa_family == AF_INET) {
5957 fprintf(portnumber_file, "%s INET: %u\n",
5958 IS_UDP(transport) ? "UDP" : "TCP",
5959 ntohs(my_sockaddr.in.sin_port));
5960 } else {
5961 fprintf(portnumber_file, "%s INET6: %u\n",
5962 IS_UDP(transport) ? "UDP" : "TCP",
5963 ntohs(my_sockaddr.in6.sin6_port));
5964 }
5965 }
5966 }
5967 }
5968
5969 if (IS_UDP(transport)) {
5970 int c;
5971
5972 for (c = 0; c < settings.num_threads_per_udp; c++) {
5973 /* this is guaranteed to hit all threads because we round-robin */
5974 dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,
5975 UDP_READ_BUFFER_SIZE, transport);
5976 STATS_LOCK();
5977 ++stats.curr_conns;
5978 ++stats.daemon_conns;
5979 STATS_UNLOCK();
5980 }
5981 } else {
5982 if (!(listen_conn_add = conn_new(sfd, conn_listening,
5983 EV_READ | EV_PERSIST, 1,
5984 transport, main_base, NULL))) {
5985 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5986 "failed to create listening connection\n");
5987 exit(EXIT_FAILURE);
5988 }
5989 listen_conn_add->next = listen_conn;
5990 listen_conn = listen_conn_add;
5991 STATS_LOCK();
5992 ++stats.curr_conns;
5993 ++stats.daemon_conns;
5994 STATS_UNLOCK();
5995 }
5996 }
5997
5998 freeaddrinfo(ai);
5999
6000 /* Return zero iff we detected no errors in starting up connections */
6001 return success == 0;
6002 }
6003
6004 static int server_sockets(int port, enum network_transport transport,
6005 FILE *portnumber_file) {
6006 if (settings.inter == NULL) {
6007 return server_socket(settings.inter, port, transport, portnumber_file);
6008 } else {
6009 // tokenize them and bind to each one of them..
6010 char *b;
6011 int ret = 0;
6012 char *list = strdup(settings.inter);
6013
6014 if (list == NULL) {
6015 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6016 "Failed to allocate memory for parsing server interface string\n");
6017 return 1;
6018 }
6019 for (char *p = strtok_r(list, ";,", &b);
6020 p != NULL;
6021 p = strtok_r(NULL, ";,", &b)) {
6022 int the_port = port;
6023
6024 char *s = strchr(p, ':');
6025 if (s != NULL) {
6026 *s = '\0';
6027 ++s;
6028 if (!safe_strtol(s, &the_port)) {
6029 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6030 "Invalid port number: \"%s\"", s);
6031 return 1;
6032 }
6033 }
6034 if (strcmp(p, "*") == 0) {
6035 p = NULL;
6036 }
6037 ret |= server_socket(p, the_port, transport, portnumber_file);
6038 }
6039 free(list);
6040 return ret;
6041 }
6042 }
6043
6044 static int new_socket_unix(void) {
6045 int sfd;
6046
6047 if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == INVALID_SOCKET) {
6048 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6049 "socket(AF_UNIX, SOCK_STREAM, 0): %s",
6050 strerror(errno));
6051 return INVALID_SOCKET;
6052 }
6053
6054 if (evutil_make_socket_nonblocking(sfd) == -1) {
6055 safe_close(sfd);
6056 return INVALID_SOCKET;
6057 }
6058 return sfd;
6059 }
6060
6061 /* this will probably not work on windows */
6062 static int server_socket_unix(const char *path, int access_mask) {
6063 int sfd;
6064 struct linger ling = {0, 0};
6065 struct sockaddr_un addr;
6066 struct stat tstat;
6067 int flags =1;
6068 int old_umask;
6069
6070 if (!path) {
6071 return 1;
6072 }
6073
6074 if ((sfd = new_socket_unix()) == -1) {
6075 return 1;
6076 }
6077
6078 /*
6079 * Clean up a previous socket file if we left it around
6080 */
6081 if (lstat(path, &tstat) == 0) {
6082 if (S_ISSOCK(tstat.st_mode))
6083 unlink(path);
6084 }
6085
6086 setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
6087 setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
6088 setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
6089
6090 /*
6091 * the memset call clears nonstandard fields in some impementations
6092 * that otherwise mess things up.
6093 */
6094 memset(&addr, 0, sizeof(addr));
6095
6096 addr.sun_family = AF_UNIX;
6097 strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
6098 assert(strcmp(addr.sun_path, path) == 0);
6099 old_umask = umask( ~(access_mask&0777));
6100 if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
6101 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6102 "bind(): %s",
6103 strerror(errno));
6104 safe_close(sfd);
6105 umask(old_umask);
6106 return 1;
6107 }
6108 umask(old_umask);
6109 if (listen(sfd, settings.backlog) == -1) {
6110 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6111 "listen(): %s",
6112 strerror(errno));
6113 safe_close(sfd);
6114 return 1;
6115 }
6116 if (!(listen_conn = conn_new(sfd, conn_listening,
6117 EV_READ | EV_PERSIST, 1,
6118 local_transport, main_base, NULL))) {
6119 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6120 "failed to create listening connection\n");
6121 exit(EXIT_FAILURE);
6122 }
6123 STATS_LOCK();
6124 ++stats.daemon_conns;
6125 STATS_UNLOCK();
6126
6127 return 0;
6128 }
6129
6130 static struct event clockevent;
6131
6132 /* time-sensitive callers can call it by hand with this, outside the normal ever-1-second timer */
6133 static void set_current_time(void) {
6134 struct timeval timer;
6135
6136 gettimeofday(&timer, NULL);
6137 current_time = (rel_time_t) (timer.tv_sec - process_started);
6138 }
6139
6140 static void clock_handler(const int fd, const short which, void *arg) {
6141 struct timeval t = {.tv_sec = 1, .tv_usec = 0};
6142 static bool initialized = false;
6143
6144 if (memcached_shutdown) {
6145 event_base_loopbreak(main_base);
6146 return ;
6147 }
6148
6149 if (initialized) {
6150 /* only delete the event if it's actually there. */
6151 evtimer_del(&clockevent);
6152 } else {
6153 initialized = true;
6154 }
6155
6156 evtimer_set(&clockevent, clock_handler, 0);
6157 event_base_set(main_base, &clockevent);
6158 evtimer_add(&clockevent, &t);
6159
6160 set_current_time();
6161 }
6162
6163 static void usage(void) {
6164 printf(PACKAGE " " VERSION "\n");
6165 printf("-p <num> TCP port number to listen on (default: 11211)\n"
6166 "-U <num> UDP port number to listen on (default: 11211, 0 is off)\n"
6167 "-s <file> UNIX socket path to listen on (disables network support)\n"
6168 "-a <mask> access mask for UNIX socket, in octal (default: 0700)\n"
6169 "-l <addr> interface to listen on (default: INADDR_ANY, all addresses)\n"
6170 " <addr> may be specified as host:port. If you don't specify\n"
6171 " a port number, the value you specified with -p or -U is\n"
6172 " used. You may specify multiple addresses separated by comma\n"
6173 " or by using -l multiple times\n"
6174 "-d run as a daemon\n"
6175 "-r maximize core file limit\n"
6176 "-u <username> assume identity of <username> (only when run as root)\n"
6177 "-m <num> max memory to use for items in megabytes (default: 64 MB)\n"
6178 "-M return error on memory exhausted (rather than removing items)\n"
6179 "-c <num> max simultaneous connections (default: 1000)\n"
6180 "-k lock down all paged memory. Note that there is a\n"
6181 " limit on how much memory you may lock. Trying to\n"
6182 " allocate more than that would fail, so be sure you\n"
6183 " set the limit correctly for the user you started\n"
6184 " the daemon with (not for -u <username> user;\n"
6185 " under sh this is done with 'ulimit -S -l NUM_KB').\n"
6186 "-v verbose (print errors/warnings while in event loop)\n"
6187 "-vv very verbose (also print client commands/reponses)\n"
6188 "-vvv extremely verbose (also print internal state transitions)\n"
6189 "-h print this help and exit\n"
6190 "-i print memcached and libevent license\n"
6191 "-P <file> save PID in <file>, only used with -d option\n"
6192 "-f <factor> chunk size growth factor (default: 1.25)\n"
6193 "-n <bytes> minimum space allocated for key+value+flags (default: 48)\n");
6194 printf("-L Try to use large memory pages (if available). Increasing\n"
6195 " the memory page size could reduce the number of TLB misses\n"
6196 " and improve the performance. In order to get large pages\n"
6197 " from the OS, memcached will allocate the total item-cache\n"
6198 " in one large chunk.\n");
6199 printf("-D <char> Use <char> as the delimiter between key prefixes and IDs.\n"
6200 " This is used for per-prefix stats reporting. The default is\n"
6201 " \":\" (colon). If this option is specified, stats collection\n"
6202 " is turned on automatically; if not, then it may be turned on\n"
6203 " by sending the \"stats detail on\" command to the server.\n");
6204 printf("-t <num> number of threads to use (default: 4)\n");
6205 printf("-R Maximum number of requests per event, limits the number of\n"
6206 " requests process for a given connection to prevent \n"
6207 " starvation (default: 20)\n");
6208 printf("-C Disable use of CAS\n");
6209 printf("-b Set the backlog queue limit (default: 1024)\n");
6210 printf("-B Binding protocol - one of ascii, binary, or auto (default)\n");
6211 printf("-I Override the size of each slab page. Adjusts max item size\n"
6212 " (default: 1mb, min: 1k, max: 128m)\n");
6213 printf("-q Disable detailed stats commands\n");
6214 #ifdef SASL_ENABLED
6215 printf("-S Require SASL authentication\n");
6216 #endif
6217 printf("-X module,cfg Load the module and initialize it with the config\n");
6218 printf("-E engine Load engine as the storage engine\n");
6219 printf("-e config Pass config as configuration options to the storage engine\n");
6220 printf("\nEnvironment variables:\n"
6221 "MEMCACHED_PORT_FILENAME File to write port information to\n"
6222 "MEMCACHED_TOP_KEYS Number of top keys to keep track of\n"
6223 "MEMCACHED_REQS_TAP_EVENT Similar to -R but for tap_ship_log\n");
6224 }
6225 static void usage_license(void) {
6226 printf(PACKAGE " " VERSION "\n\n");
6227 printf(
6228 "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n"
6229 "All rights reserved.\n"
6230 "\n"
6231 "Redistribution and use in source and binary forms, with or without\n"
6232 "modification, are permitted provided that the following conditions are\n"
6233 "met:\n"
6234 "\n"
6235 " * Redistributions of source code must retain the above copyright\n"
6236 "notice, this list of conditions and the following disclaimer.\n"
6237 "\n"
6238 " * Redistributions in binary form must reproduce the above\n"
6239 "copyright notice, this list of conditions and the following disclaimer\n"
6240 "in the documentation and/or other materials provided with the\n"
6241 "distribution.\n"
6242 "\n"
6243 " * Neither the name of the Danga Interactive nor the names of its\n"
6244 "contributors may be used to endorse or promote products derived from\n"
6245 "this software without specific prior written permission.\n"
6246 "\n"
6247 "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n"
6248 "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n"
6249 "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n"
6250 "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n"
6251 "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n"
6252 "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n"
6253 "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6254 "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6255 "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6256 "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n"
6257 "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6258 "\n"
6259 "\n"
6260 "This product includes software developed by Niels Provos.\n"
6261 "\n"
6262 "[ libevent ]\n"
6263 "\n"
6264 "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n"
6265 "All rights reserved.\n"
6266 "\n"
6267 "Redistribution and use in source and binary forms, with or without\n"
6268 "modification, are permitted provided that the following conditions\n"
6269 "are met:\n"
6270 "1. Redistributions of source code must retain the above copyright\n"
6271 " notice, this list of conditions and the following disclaimer.\n"
6272 "2. Redistributions in binary form must reproduce the above copyright\n"
6273 " notice, this list of conditions and the following disclaimer in the\n"
6274 " documentation and/or other materials provided with the distribution.\n"
6275 "3. All advertising materials mentioning features or use of this software\n"
6276 " must display the following acknowledgement:\n"
6277 " This product includes software developed by Niels Provos.\n"
6278 "4. The name of the author may not be used to endorse or promote products\n"
6279 " derived from this software without specific prior written permission.\n"
6280 "\n"
6281 "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
6282 "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
6283 "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
6284 "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
6285 "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n"
6286 "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6287 "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6288 "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6289 "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
6290 "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6291 );
6292
6293 return;
6294 }
6295
6296 static void save_pid(const char *pid_file) {
6297 FILE *fp;
6298
6299 if (access(pid_file, F_OK) == 0) {
6300 if ((fp = fopen(pid_file, "r")) != NULL) {
6301 char buffer[1024];
6302 if (fgets(buffer, sizeof(buffer), fp) != NULL) {
6303 unsigned int pid;
6304 if (safe_strtoul(buffer, &pid) && kill((pid_t)pid, 0) == 0) {
6305 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6306 "WARNING: The pid file contained the following (running) pid: %u\n", pid);
6307 }
6308 }
6309 fclose(fp);
6310 }
6311 }
6312
6313 if ((fp = fopen(pid_file, "w")) == NULL) {
6314 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6315 "Could not open the pid file %s for writing: %s\n",
6316 pid_file, strerror(errno));
6317 return;
6318 }
6319
6320 fprintf(fp,"%ld\n", (long)getpid());
6321 if (fclose(fp) == -1) {
6322 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6323 "Could not close the pid file %s: %s\n",
6324 pid_file, strerror(errno));
6325 }
6326 }
6327
6328 static void remove_pidfile(const char *pid_file) {
6329 if (pid_file != NULL) {
6330 if (unlink(pid_file) != 0) {
6331 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6332 "Could not remove the pid file %s: %s\n",
6333 pid_file, strerror(errno));
6334 }
6335 }
6336 }
6337
6338 #ifndef HAVE_SIGIGNORE
6339 static int sigignore(int sig) {
6340 struct sigaction sa = { .sa_handler = SIG_IGN, .sa_flags = 0 };
6341
6342 if (sigemptyset(&sa.sa_mask) == -1 || sigaction(sig, &sa, 0) == -1) {
6343 return -1;
6344 }
6345 return 0;
6346 }
6347 #endif /* !HAVE_SIGIGNORE */
6348
6349 static void sigterm_handler(int sig) {
6350 assert(sig == SIGTERM || sig == SIGINT);
6351 memcached_shutdown = 1;
6352 }
6353
6354 static int install_sigterm_handler(void) {
6355 struct sigaction sa = {.sa_handler = sigterm_handler, .sa_flags = 0};
6356
6357 if (sigemptyset(&sa.sa_mask) == -1 || sigaction(SIGTERM, &sa, 0) == -1 ||
6358 sigaction(SIGINT, &sa, 0) == -1) {
6359 return -1;
6360 }
6361
6362 return 0;
6363 }
6364
6365 /*
6366 * On systems that supports multiple page sizes we may reduce the
6367 * number of TLB-misses by using the biggest available page size
6368 */
6369 static int enable_large_pages(void) {
6370 #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
6371 int ret = -1;
6372 size_t sizes[32];
6373 int avail = getpagesizes(sizes, 32);
6374 if (avail != -1) {
6375 size_t max = sizes[0];
6376 struct memcntl_mha arg = {0};
6377 int ii;
6378
6379 for (ii = 1; ii < avail; ++ii) {
6380 if (max < sizes[ii]) {
6381 max = sizes[ii];
6382 }
6383 }
6384
6385 arg.mha_flags = 0;
6386 arg.mha_pagesize = max;
6387 arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
6388
6389 if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
6390 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6391 "Failed to set large pages: %s\nWill use default page size\n",
6392 strerror(errno));
6393 } else {
6394 ret = 0;
6395 }
6396 } else {
6397 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6398 "Failed to get supported pagesizes: %s\nWill use default page size\n",
6399 strerror(errno));
6400 }
6401
6402 return ret;
6403 #else
6404 return 0;
6405 #endif
6406 }
6407
6408 static const char* get_server_version(void) {
6409 return VERSION;
6410 }
6411
6412 static void store_engine_specific(const void *cookie,
6413 void *engine_data) {
6414 conn *c = (conn*)cookie;
6415 c->engine_storage = engine_data;
6416 }
6417
6418 static void *get_engine_specific(const void *cookie) {
6419 conn *c = (conn*)cookie;
6420 return c->engine_storage;
6421 }
6422
6423 static int get_socket_fd(const void *cookie) {
6424 conn *c = (conn *)cookie;
6425 return c->sfd;
6426 }
6427
6428 static void set_tap_nack_mode(const void *cookie, bool enable) {
6429 conn *c = (conn *)cookie;
6430 c->tap_nack_mode = enable;
6431 }
6432
6433 static void reserve_cookie(const void *cookie) {
6434 conn *c = (conn *)cookie;
6435 ++c->refcount;
6436 }
6437
6438 static void release_cookie(const void *cookie) {
6439 conn *c = (conn *)cookie;
6440 --c->refcount;
6441 }
6442
6443 static int num_independent_stats(void) {
6444 return settings.num_threads + 1;
6445 }
6446
6447 static void *new_independent_stats(void) {
6448 int ii;
6449 int nrecords = num_independent_stats();
6450 struct independent_stats *independent_stats = calloc(sizeof(independent_stats) + sizeof(struct thread_stats) * nrecords, 1);
6451
6452 #ifdef INNODB_MEMCACHED
6453 if (independent_stats == NULL) {
6454 fprintf(stderr, "Unable to allocate memory for"
6455 "independent_stats...\n");
6456 return (NULL);
6457 }
6458 #endif
6459
6460 if (settings.topkeys > 0)
6461 independent_stats->topkeys = topkeys_init(settings.topkeys);
6462 for (ii = 0; ii < nrecords; ii++)
6463 pthread_mutex_init(&independent_stats->thread_stats[ii].mutex, NULL);
6464 return independent_stats;
6465 }
6466
6467 static void release_independent_stats(void *stats) {
6468 int ii;
6469 int nrecords = num_independent_stats();
6470 struct independent_stats *independent_stats = stats;
6471 if (independent_stats->topkeys)
6472 topkeys_free(independent_stats->topkeys);
6473 for (ii = 0; ii < nrecords; ii++)
6474 pthread_mutex_destroy(&independent_stats->thread_stats[ii].mutex);
6475 free(independent_stats);
6476 }
6477
6478 static inline struct independent_stats *get_independent_stats(conn *c) {
6479 struct independent_stats *independent_stats;
6480 if (settings.engine.v1->get_stats_struct != NULL) {
6481 independent_stats = settings.engine.v1->get_stats_struct(settings.engine.v0, (const void *)c);
6482 if (independent_stats == NULL)
6483 independent_stats = default_independent_stats;
6484 } else {
6485 independent_stats = default_independent_stats;
6486 }
6487 return independent_stats;
6488 }
6489
6490 static inline struct thread_stats *get_thread_stats(conn *c) {
6491 struct independent_stats *independent_stats = get_independent_stats(c);
6492 assert(c->thread->index < num_independent_stats());
6493 return &independent_stats->thread_stats[c->thread->index];
6494 }
6495
6496 static void register_callback(ENGINE_HANDLE *eh,
6497 ENGINE_EVENT_TYPE type,
6498 EVENT_CALLBACK cb, const void *cb_data) {
6499 struct engine_event_handler *h =
6500 calloc(sizeof(struct engine_event_handler), 1);
6501
6502 assert(h);
6503 h->cb = cb;
6504 h->cb_data = cb_data;
6505 h->next = engine_event_handlers[type];
6506 engine_event_handlers[type] = h;
6507 }
6508
6509 static rel_time_t get_current_time(void)
6510 {
6511 return current_time;
6512 }
6513
6514 static void count_eviction(const void *cookie, const void *key, const int nkey) {
6515 topkeys_t *tk = get_independent_stats((conn*)cookie)->topkeys;
6516 TK(tk, evictions, key, nkey, get_current_time());
6517 }
6518
6519 /**
6520 * To make it easy for engine implementors that doesn't want to care about
6521 * writing their own incr/decr code, they can just set the arithmetic function
6522 * to NULL and use this implementation. It is not efficient, due to the fact
6523 * that it does multiple calls through the interface (get and then cas store).
6524 * If you don't care, feel free to use it..
6525 */
6526 static ENGINE_ERROR_CODE internal_arithmetic(ENGINE_HANDLE* handle,
6527 const void* cookie,
6528 const void* key,
6529 const int nkey,
6530 const bool increment,
6531 const bool create,
6532 const uint64_t delta,
6533 const uint64_t initial,
6534 const rel_time_t exptime,
6535 uint64_t *cas,
6536 uint64_t *result,
6537 uint16_t vbucket)
6538 {
6539 ENGINE_HANDLE_V1 *e = (ENGINE_HANDLE_V1*)handle;
6540
6541 item *it = NULL;
6542
6543 ENGINE_ERROR_CODE ret;
6544 ret = e->get(handle, cookie, &it, key, nkey, vbucket);
6545
6546 if (ret == ENGINE_SUCCESS) {
6547 item_info info = { .nvalue = 1 };
6548
6549 if (!e->get_item_info(handle, cookie, it, &info)) {
6550 e->release(handle, cookie, it);
6551 return ENGINE_FAILED;
6552 }
6553
6554 char value[80];
6555
6556 if (info.value[0].iov_len > (sizeof(value) - 1)) {
6557 e->release(handle, cookie, it);
6558 return ENGINE_EINVAL;
6559 }
6560
6561 memcpy(value, info.value[0].iov_base, info.value[0].iov_len);
6562 value[info.value[0].iov_len] = '\0';
6563
6564 uint64_t val;
6565 if (!safe_strtoull(value, &val)) {
6566 e->release(handle, cookie, it);
6567 return ENGINE_EINVAL;
6568 }
6569
6570 if (increment) {
6571 val += delta;
6572 } else {
6573 if (delta > val) {
6574 val = 0;
6575 } else {
6576 val -= delta;
6577 }
6578 }
6579
6580 size_t nb = snprintf(value, sizeof(value), "%"PRIu64, val);
6581 *result = val;
6582 item *nit = NULL;
6583 if (e->allocate(handle, cookie, &nit, key,
6584 nkey, nb, info.flags, info.exptime) != ENGINE_SUCCESS) {
6585 e->release(handle, cookie, it);
6586 return ENGINE_ENOMEM;
6587 }
6588
6589 item_info i2 = { .nvalue = 1 };
6590 if (!e->get_item_info(handle, cookie, nit, &i2)) {
6591 e->release(handle, cookie, it);
6592 e->release(handle, cookie, nit);
6593 return ENGINE_FAILED;
6594 }
6595
6596 memcpy(i2.value[0].iov_base, value, nb);
6597 e->item_set_cas(handle, cookie, nit, info.cas);
6598 ret = e->store(handle, cookie, nit, cas, OPERATION_CAS, vbucket);
6599 e->release(handle, cookie, it);
6600 e->release(handle, cookie, nit);
6601 } else if (ret == ENGINE_KEY_ENOENT && create) {
6602 char value[80];
6603 size_t nb = snprintf(value, sizeof(value), "%"PRIu64"\r\n", initial);
6604 *result = initial;
6605 if (e->allocate(handle, cookie, &it, key, nkey, nb, 0, exptime) != ENGINE_SUCCESS) {
6606 e->release(handle, cookie, it);
6607 return ENGINE_ENOMEM;
6608 }
6609
6610 item_info info = { .nvalue = 1 };
6611 if (!e->get_item_info(handle, cookie, it, &info)) {
6612 e->release(handle, cookie, it);
6613 return ENGINE_FAILED;
6614 }
6615
6616 memcpy(info.value[0].iov_base, value, nb);
6617 ret = e->store(handle, cookie, it, cas, OPERATION_CAS, vbucket);
6618 e->release(handle, cookie, it);
6619 }
6620
6621 /* We had a race condition.. just call ourself recursively to retry */
6622 if (ret == ENGINE_KEY_EEXISTS) {
6623 return internal_arithmetic(handle, cookie, key, nkey, increment, create, delta,
6624 initial, exptime, cas, result, vbucket);
6625 }
6626
6627 return ret;
6628 }
6629
6630 /**
6631 * Register an extension if it's not already registered
6632 *
6633 * @param type the type of the extension to register
6634 * @param extension the extension to register
6635 * @return true if success, false otherwise
6636 */
6637 static bool register_extension(extension_type_t type, void *extension)
6638 {
6639 if (extension == NULL) {
6640 return false;
6641 }
6642
6643 switch (type) {
6644 case EXTENSION_DAEMON:
6645 for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6646 ptr != NULL;
6647 ptr = ptr->next) {
6648 if (ptr == extension) {
6649 return false;
6650 }
6651 }
6652 ((EXTENSION_DAEMON_DESCRIPTOR *)(extension))->next = settings.extensions.daemons;
6653 settings.extensions.daemons = extension;
6654 return true;
6655 case EXTENSION_LOGGER:
6656 settings.extensions.logger = extension;
6657 return true;
6658 case EXTENSION_ASCII_PROTOCOL:
6659 if (settings.extensions.ascii != NULL) {
6660 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *last;
6661 for (last = settings.extensions.ascii; last->next != NULL;
6662 last = last->next) {
6663 if (last == extension) {
6664 return false;
6665 }
6666 }
6667 if (last == extension) {
6668 return false;
6669 }
6670 last->next = extension;
6671 last->next->next = NULL;
6672 } else {
6673 settings.extensions.ascii = extension;
6674 settings.extensions.ascii->next = NULL;
6675 }
6676 return true;
6677
6678 default:
6679 return false;
6680 }
6681 }
6682
6683 /**
6684 * Unregister an extension
6685 *
6686 * @param type the type of the extension to remove
6687 * @param extension the extension to remove
6688 */
6689 static void unregister_extension(extension_type_t type, void *extension)
6690 {
6691 switch (type) {
6692 case EXTENSION_DAEMON:
6693 {
6694 EXTENSION_DAEMON_DESCRIPTOR *prev = NULL;
6695 EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6696
6697 while (ptr != NULL && ptr != extension) {
6698 prev = ptr;
6699 ptr = ptr->next;
6700 }
6701
6702 if (ptr != NULL && prev != NULL) {
6703 prev->next = ptr->next;
6704 }
6705
6706 if (settings.extensions.daemons == ptr) {
6707 settings.extensions.daemons = ptr->next;
6708 }
6709 }
6710 break;
6711 case EXTENSION_LOGGER:
6712 if (settings.extensions.logger == extension) {
6713 if (get_stderr_logger() == extension) {
6714 settings.extensions.logger = get_null_logger();
6715 } else {
6716 settings.extensions.logger = get_stderr_logger();
6717 }
6718 }
6719 break;
6720 case EXTENSION_ASCII_PROTOCOL:
6721 {
6722 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *prev = NULL;
6723 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
6724
6725 while (ptr != NULL && ptr != extension) {
6726 prev = ptr;
6727 ptr = ptr->next;
6728 }
6729
6730 if (ptr != NULL && prev != NULL) {
6731 prev->next = ptr->next;
6732 }
6733
6734 if (settings.extensions.ascii == ptr) {
6735 settings.extensions.ascii = ptr->next;
6736 }
6737 }
6738 break;
6739
6740 default:
6741 ;
6742 }
6743
6744 }
6745
6746 /**
6747 * Get the named extension
6748 */
6749 static void* get_extension(extension_type_t type)
6750 {
6751 switch (type) {
6752 case EXTENSION_DAEMON:
6753 return settings.extensions.daemons;
6754
6755 case EXTENSION_LOGGER:
6756 return settings.extensions.logger;
6757
6758 case EXTENSION_ASCII_PROTOCOL:
6759 return settings.extensions.ascii;
6760
6761 default:
6762 return NULL;
6763 }
6764 }
6765
6766 #ifdef INNODB_MEMCACHED
6767 void shutdown_server(void) {
6768 #else
6769 static void shutdown_server(void) {
6770 #endif /* INNODB_MEMCACHED */
6771 #ifdef INNODB_MEMCACHED
6772 int i;
6773 /* Clean up connections */
6774 while (listen_conn) {
6775 conn_closing(listen_conn);
6776 listen_conn = listen_conn->next;
6777 }
6778
6779 for (i = 0; i < num_udp_socket; i++) {
6780 safe_close(udp_socket[i]);
6781 }
6782 #endif
6783 memcached_shutdown = 1;
6784 }
6785
6786 #ifdef INNODB_MEMCACHED
6787 bool shutdown_complete(void)
6788 {
6789 return(memcached_shutdown == 2);
6790 }
6791
6792 bool init_complete(void)
6793 {
6794 return(memcached_initialized == 1);
6795 }
6796 #endif
6797
6798 static EXTENSION_LOGGER_DESCRIPTOR* get_logger(void)
6799 {
6800 return settings.extensions.logger;
6801 }
6802
6803 static EXTENSION_LOG_LEVEL get_log_level(void)
6804 {
6805 EXTENSION_LOG_LEVEL ret;
6806 switch (settings.verbose) {
6807 case 0: ret = EXTENSION_LOG_WARNING; break;
6808 case 1: ret = EXTENSION_LOG_INFO; break;
6809 case 2: ret = EXTENSION_LOG_DEBUG; break;
6810 default:
6811 ret = EXTENSION_LOG_DETAIL;
6812 }
6813 return ret;
6814 }
6815
6816 static void set_log_level(EXTENSION_LOG_LEVEL severity)
6817 {
6818 switch (severity) {
6819 case EXTENSION_LOG_WARNING: settings.verbose = 0; break;
6820 case EXTENSION_LOG_INFO: settings.verbose = 1; break;
6821 case EXTENSION_LOG_DEBUG: settings.verbose = 2; break;
6822 default:
6823 settings.verbose = 3;
6824 }
6825 }
6826
6827 static void get_config_append_stats(const char *key, const uint16_t klen,
6828 const char *val, const uint32_t vlen,
6829 const void *cookie)
6830 {
6831 if (klen == 0 || vlen == 0) {
6832 return ;
6833 }
6834
6835 char *pos = (char*)cookie;
6836 size_t nbytes = strlen(pos);
6837
6838 if ((nbytes + klen + vlen + 3) > 1024) {
6839 // Not enough size in the buffer..
6840 return;
6841 }
6842
6843 memcpy(pos + nbytes, key, klen);
6844 nbytes += klen;
6845 pos[nbytes] = '=';
6846 ++nbytes;
6847 memcpy(pos + nbytes, val, vlen);
6848 nbytes += vlen;
6849 memcpy(pos + nbytes, ";", 2);
6850 }
6851
6852 static bool get_config(struct config_item items[]) {
6853 char config[1024];
6854 config[0] = '\0';
6855 process_stat_settings(get_config_append_stats, config);
6856 int rval = parse_config(config, items, NULL);
6857 return rval >= 0;
6858 }
6859
6860 /**
6861 * Callback the engines may call to get the public server interface
6862 * @return pointer to a structure containing the interface. The client should
6863 * know the layout and perform the proper casts.
6864 */
6865 static SERVER_HANDLE_V1 *get_server_api(void)
6866 {
6867 static SERVER_CORE_API core_api = {
6868 .server_version = get_server_version,
6869 .hash = hash,
6870 .realtime = realtime,
6871 .abstime = abstime,
6872 .get_current_time = get_current_time,
6873 .parse_config = parse_config,
6874 .shutdown = shutdown_server,
6875 .get_config = get_config
6876 };
6877
6878 static SERVER_COOKIE_API server_cookie_api = {
6879 .get_auth_data = get_auth_data,
6880 .store_engine_specific = store_engine_specific,
6881 .get_engine_specific = get_engine_specific,
6882 .get_socket_fd = get_socket_fd,
6883 .set_tap_nack_mode = set_tap_nack_mode,
6884 .notify_io_complete = notify_io_complete,
6885 .reserve = reserve_cookie,
6886 .release = release_cookie
6887 };
6888
6889 static SERVER_STAT_API server_stat_api = {
6890 .new_stats = new_independent_stats,
6891 .release_stats = release_independent_stats,
6892 .evicting = count_eviction
6893 };
6894
6895 static SERVER_LOG_API server_log_api = {
6896 .get_logger = get_logger,
6897 .get_level = get_log_level,
6898 .set_level = set_log_level
6899 };
6900 static SERVER_EXTENSION_API extension_api = {
6901 .register_extension = register_extension,
6902 .unregister_extension = unregister_extension,
6903 .get_extension = get_extension
6904 };
6905
6906 static SERVER_CALLBACK_API callback_api = {
6907 .register_callback = register_callback,
6908 .perform_callbacks = perform_callbacks,
6909 };
6910
6911 static SERVER_HANDLE_V1 rv = {
6912 .interface = 1,
6913 .core = &core_api,
6914 .stat = &server_stat_api,
6915 .extension = &extension_api,
6916 .callback = &callback_api,
6917 .log = &server_log_api,
6918 .cookie = &server_cookie_api
6919 };
6920
6921 if (rv.engine == NULL) {
6922 rv.engine = settings.engine.v0;
6923 }
6924
6925 return &rv;
6926 }
6927
6928 /**
6929 * Load a shared object and initialize all the extensions in there.
6930 *
6931 * @param soname the name of the shared object (may not be NULL)
6932 * @param config optional configuration parameters
6933 * @return true if success, false otherwise
6934 */
6935 static bool load_extension(const char *soname, const char *config) {
6936 if (soname == NULL) {
6937 return false;
6938 }
6939
6940 /* Hack to remove the warning from C99 */
6941 union my_hack {
6942 MEMCACHED_EXTENSIONS_INITIALIZE initialize;
6943 void* voidptr;
6944 } funky = {.initialize = NULL };
6945
6946 void *handle = dlopen(soname, RTLD_NOW | RTLD_LOCAL);
6947 if (handle == NULL) {
6948 const char *msg = dlerror();
6949 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6950 "Failed to open library \"%s\": %s\n",
6951 soname, msg ? msg : "unknown error");
6952 return false;
6953 }
6954
6955 void *symbol = dlsym(handle, "memcached_extensions_initialize");
6956 if (symbol == NULL) {
6957 const char *msg = dlerror();
6958 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6959 "Could not find symbol \"memcached_extensions_initialize\" in %s: %s\n",
6960 soname, msg ? msg : "unknown error");
6961 return false;
6962 }
6963 funky.voidptr = symbol;
6964
6965 EXTENSION_ERROR_CODE error = (*funky.initialize)(config, get_server_api);
6966
6967 if (error != EXTENSION_SUCCESS) {
6968 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6969 "Failed to initalize extensions from %s. Error code: %d\n",
6970 soname, error);
6971 dlclose(handle);
6972 return false;
6973 }
6974
6975 if (settings.verbose > 0) {
6976 settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
6977 "Loaded extensions from: %s\n", soname);
6978 }
6979
6980 return true;
6981 }
6982
6983 /**
6984 * Do basic sanity check of the runtime environment
6985 * @return true if no errors found, false if we can't use this env
6986 */
6987 static bool sanitycheck(void) {
6988 /* One of our biggest problems is old and bogus libevents */
6989 const char *ever = event_get_version();
6990 if (ever != NULL) {
6991 if (strncmp(ever, "1.", 2) == 0) {
6992 /* Require at least 1.3 (that's still a couple of years old) */
6993 if ((ever[2] == '1' || ever[2] == '2') && !isdigit(ever[3])) {
6994 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6995 "You are using libevent %s.\nPlease upgrade to"
6996 " a more recent version (1.3 or newer)\n",
6997 event_get_version());
6998 return false;
6999 }
7000 }
7001 }
7002
7003 return true;
7004 }
7005
7006 #ifdef INNODB_MEMCACHED
7007 static
7008 char*
7009 my_strdupl(const char* str, int len)
7010 {
7011 char* s = (char*) malloc(len + 1);
7012 s[len] = 0;
7013 return((char*) memcpy(s, str, len));
7014 }
7015
7016 /** Function that messages MySQL config variable string to something
7017 that can be parsed by getopt() */
7018 static
7019 void
7020 daemon_memcached_make_option(char* option, int* option_argc,
7021 char*** option_argv)
7022 {
7023 static const char* sep = " ";
7024 char* last;
7025 char* opt_str;
7026 char* my_str;
7027 int num_arg = 0;
7028 int i = 1;
7029
7030 my_str = my_strdupl(option, strlen(option));
7031
7032 for (opt_str = strtok_r(my_str, sep, &last);
7033 opt_str;
7034 opt_str = strtok_r(NULL, sep, &last)) {
7035 num_arg++;
7036 }
7037
7038 /* reset my_str, since strtok_r could alter it */
7039 strncpy(my_str, option, strlen(option));
7040
7041 *option_argv = (char**) malloc((num_arg + 1)
7042 * sizeof(**option_argv));
7043
7044 for (opt_str = strtok_r(my_str, sep, &last);
7045 opt_str;
7046 opt_str = strtok_r(NULL, sep, &last)) {
7047 (*option_argv)[i] = opt_str;
7048 i++;
7049 }
7050
7051 assert(i == num_arg + 1);
7052
7053 *option_argc = (num_arg + 1);
7054
7055 return;
7056 }
7057
7058 /* Structure that adds the call back functions struture pointers,
7059 passed to InnoDB engine */
7060 typedef struct eng_config_info {
7061 char* option_string;
7062 void* cb_ptr;
7063 unsigned int eng_r_batch_size;
7064 unsigned int eng_w_batch_size;
7065 bool enable_binlog;
7066 } eng_config_info_t;
7067 #endif /* INNODB_MEMCACHED */
7068
7069 #ifdef INNODB_MEMCACHED
7070 void* daemon_memcached_main(void *p) {
7071 #else
7072 int main (int argc, char **argv) {
7073 #endif
7074 int c;
7075 bool lock_memory = false;
7076 bool do_daemonize = false;
7077 bool preallocate = false;
7078 int maxcore = 0;
7079 char *username = NULL;
7080 char *pid_file = NULL;
7081 struct passwd *pw;
7082 struct rlimit rlim;
7083 char unit = '\0';
7084 int size_max = 0;
7085
7086 bool protocol_specified = false;
7087 bool tcp_specified = false;
7088 bool udp_specified = false;
7089 memcached_context_t* m_config = (memcached_context_t*)p;
7090 const char *engine;
7091 const char *engine_config = NULL;
7092 char old_options[1024] = { [0] = '\0' };
7093 char *old_opts = old_options;
7094 #ifdef INNODB_MEMCACHED
7095 int option_argc = 0;
7096 char** option_argv = NULL;
7097 eng_config_info_t my_eng_config;
7098
7099 memcached_initialized = 0;
7100
7101 if (m_config->m_engine_library) {
7102 engine = m_config->m_engine_library;
7103
7104 /* FIXME: We should have a better way to pass the callback structure
7105 point to storage engine. It is now appended in the configure
7106 string in eng_config_info_t structure */
7107 my_eng_config.cb_ptr = m_config->m_innodb_api_cb;
7108 my_eng_config.eng_r_batch_size = m_config->m_r_batch_size;
7109 my_eng_config.eng_w_batch_size = m_config->m_w_batch_size;
7110 my_eng_config.enable_binlog = m_config->m_enable_binlog;
7111 my_eng_config.option_string = old_opts;
7112 engine_config = (const char *) (&my_eng_config);
7113
7114 } else {
7115 engine = "default_engine.so";
7116 }
7117 #else
7118 engine = "default_engine.so";
7119 #endif /* INNODB_MEMCACHED */
7120
7121 memcached_shutdown = 0;
7122 memcached_initialized = 0;
7123
7124 if (!sanitycheck()) {
7125 return(NULL);
7126 }
7127
7128 /* make the time we started always be 2 seconds before we really
7129 did, so time(0) - time.started is never zero. if so, things
7130 like 'settings.oldest_live' which act as booleans as well as
7131 values are now false in boolean context... */
7132 process_started = time(0) - 2;
7133 set_current_time();
7134
7135 /* Initialize the socket subsystem */
7136 initialize_sockets();
7137
7138 /* init settings */
7139 settings_init();
7140
7141 if (memcached_initialize_stderr_logger(get_server_api) != EXTENSION_SUCCESS) {
7142 fprintf(stderr, "Failed to initialize log system\n");
7143 return (NULL);
7144 }
7145
7146 if (m_config->m_mem_option) {
7147 daemon_memcached_make_option(m_config->m_mem_option,
7148 &option_argc,
7149 &option_argv);
7150 }
7151
7152 #ifdef INNODB_MEMCACHED
7153
7154 if (option_argc > 0 && option_argv) {
7155 /* Always reset the index to 1, since this function can
7156 be invoked multiple times with install/uninstall plugins */
7157 optind = 1;
7158 while (-1 != (c = getopt(option_argc, option_argv,
7159 "a:" /* access mask for unix socket */
7160 "p:" /* TCP port number to listen on */
7161 "s:" /* unix socket path to listen on */
7162 "U:" /* UDP port number to listen on */
7163 "m:" /* max memory to use for items in megabytes */
7164 "M" /* return error on memory exhausted */
7165 "c:" /* max simultaneous connections */
7166 "k" /* lock down all paged memory */
7167 "hi" /* help, licence info */
7168 "r" /* maximize core file limit */
7169 "v" /* verbose */
7170 "d" /* daemon mode */
7171 "l:" /* interface to listen on */
7172 "u:" /* user identity to run as */
7173 "P:" /* save PID in file */
7174 "f:" /* factor? */
7175 "n:" /* minimum space allocated for key+value+flags */
7176 "t:" /* threads */
7177 "D:" /* prefix delimiter? */
7178 "L" /* Large memory pages */
7179 "R:" /* max requests per event */
7180 "C" /* Disable use of CAS */
7181 "b:" /* backlog queue limit */
7182 "B:" /* Binding protocol */
7183 "I:" /* Max item size */
7184 "S" /* Sasl ON */
7185 "E:" /* Engine to load */
7186 "e:" /* Engine options */
7187 "q" /* Disallow detailed stats */
7188 "X:" /* Load extension */
7189 ))) {
7190 switch (c) {
7191 case 'a':
7192 /* access for unix domain socket, as octal mask (like chmod)*/
7193 settings.access= strtol(optarg,NULL,8);
7194 break;
7195
7196 case 'U':
7197 settings.udpport = atoi(optarg);
7198 udp_specified = true;
7199 break;
7200 case 'p':
7201 settings.port = atoi(optarg);
7202 tcp_specified = true;
7203 break;
7204 case 's':
7205 settings.socketpath = optarg;
7206 break;
7207 case 'm':
7208 settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7209 old_opts += sprintf(old_opts, "cache_size=%lu;",
7210 (unsigned long)settings.maxbytes);
7211 break;
7212 case 'M':
7213 settings.evict_to_free = 0;
7214 old_opts += sprintf(old_opts, "eviction=false;");
7215 break;
7216 case 'c':
7217 settings.maxconns = atoi(optarg);
7218 break;
7219 case 'h':
7220 usage();
7221 exit(EXIT_SUCCESS);
7222 case 'i':
7223 usage_license();
7224 exit(EXIT_SUCCESS);
7225 case 'k':
7226 lock_memory = true;
7227 break;
7228 case 'v':
7229 settings.verbose++;
7230 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7231 break;
7232 case 'l':
7233 settings.inter= strdup(optarg);
7234 break;
7235 case 'd':
7236 do_daemonize = true;
7237 break;
7238 case 'r':
7239 maxcore = 1;
7240 break;
7241 case 'R':
7242 settings.reqs_per_event = atoi(optarg);
7243 if (settings.reqs_per_event <= 0) {
7244 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7245 "Number of requests per event must be greater than 0\n");
7246 return (void*)1;
7247 }
7248 break;
7249 case 'u':
7250 username = optarg;
7251 break;
7252 case 'P':
7253 pid_file = optarg;
7254 break;
7255 case 'f':
7256 settings.factor = atof(optarg);
7257 if (settings.factor <= 1.0) {
7258 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7259 "Factor must be greater than 1\n");
7260 return (void*)1;
7261 }
7262 old_opts += sprintf(old_opts, "factor=%f;",
7263 settings.factor);
7264 break;
7265 case 'n':
7266 settings.chunk_size = atoi(optarg);
7267 if (settings.chunk_size == 0) {
7268 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7269 "Chunk size must be greater than 0\n");
7270 return (void*)1;
7271 }
7272 old_opts += sprintf(old_opts, "chunk_size=%u;",
7273 settings.chunk_size);
7274 break;
7275 case 't':
7276 settings.num_threads = atoi(optarg);
7277 if (settings.num_threads <= 0) {
7278 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7279 "Number of threads must be greater than 0\n");
7280 return (void*)1;
7281 }
7282 /* There're other problems when you get above 64 threads.
7283 * In the future we should portably detect # of cores for the
7284 * default.
7285 */
7286 if (settings.num_threads > 64) {
7287 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7288 "WARNING: Setting a high number of worker"
7289 "threads is not recommended.\n"
7290 " Set this value to the number of cores in"
7291 " your machine or less.\n");
7292 }
7293 break;
7294 case 'D':
7295 settings.prefix_delimiter = optarg[0];
7296 settings.detail_enabled = 1;
7297 break;
7298 case 'L' :
7299 if (enable_large_pages() == 0) {
7300 preallocate = true;
7301 old_opts += sprintf(old_opts, "preallocate=true;");
7302 }
7303 break;
7304 case 'C' :
7305 settings.use_cas = false;
7306 break;
7307 case 'b' :
7308 settings.backlog = atoi(optarg);
7309 break;
7310 case 'B':
7311 protocol_specified = true;
7312 if (strcmp(optarg, "auto") == 0) {
7313 settings.binding_protocol = negotiating_prot;
7314 } else if (strcmp(optarg, "binary") == 0) {
7315 settings.binding_protocol = binary_prot;
7316 } else if (strcmp(optarg, "ascii") == 0) {
7317 settings.binding_protocol = ascii_prot;
7318 } else {
7319 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7320 "Invalid value for binding protocol: %s\n"
7321 " -- should be one of auto, binary, or ascii\n", optarg);
7322 exit(EX_USAGE);
7323 }
7324 break;
7325 case 'I':
7326 unit = optarg[strlen(optarg)-1];
7327 if (unit == 'k' || unit == 'm' ||
7328 unit == 'K' || unit == 'M') {
7329 optarg[strlen(optarg)-1] = '\0';
7330 size_max = atoi(optarg);
7331 if (unit == 'k' || unit == 'K')
7332 size_max *= 1024;
7333 if (unit == 'm' || unit == 'M')
7334 size_max *= 1024 * 1024;
7335 settings.item_size_max = size_max;
7336 } else {
7337 settings.item_size_max = atoi(optarg);
7338 }
7339 if (settings.item_size_max < 1024) {
7340 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7341 "Item max size cannot be less than 1024 bytes.\n");
7342 return (void*)1;
7343 }
7344 if (settings.item_size_max > 1024 * 1024 * 128) {
7345 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7346 "Cannot set item size limit higher than 128 mb.\n");
7347 return (void*)1;
7348 }
7349 if (settings.item_size_max > 1024 * 1024) {
7350 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7351 "WARNING: Setting item max size above 1MB is not"
7352 " recommended!\n"
7353 " Raising this limit increases the minimum memory requirements\n"
7354 " and will decrease your memory efficiency.\n"
7355 );
7356 }
7357 #ifndef __WIN32__
7358 old_opts += sprintf(old_opts, "item_size_max=%zu;",
7359 settings.item_size_max);
7360 #else
7361 old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7362 settings.item_size_max);
7363 #endif
7364 break;
7365 case 'E':
7366 engine = optarg;
7367 break;
7368 case 'e':
7369 /* FIXME, we use engine_config to pass callback function
7370 for now. Will need a better solution
7371 engine_config = optarg; */
7372 break;
7373 case 'q':
7374 settings.allow_detailed = false;
7375 break;
7376 case 'S': /* set Sasl authentication to true. Default is false */
7377 # ifdef ENABLE_MEMCACHED_SASL
7378 # ifndef SASL_ENABLED
7379 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7380 "This server is not built with SASL support.\n");
7381 exit(EX_USAGE);
7382 # endif /* !SASL_ENABLED */
7383 settings.require_sasl = true;
7384 # endif /* ENABLE_MEMCACHED_SASL */
7385 break;
7386 case 'X' :
7387 {
7388 char *ptr = strchr(optarg, ',');
7389 if (ptr != NULL) {
7390 *ptr = '\0';
7391 ++ptr;
7392 }
7393 if (!load_extension(optarg, ptr)) {
7394 exit(EXIT_FAILURE);
7395 }
7396 if (ptr != NULL) {
7397 *(ptr - 1) = ',';
7398 }
7399 }
7400 break;
7401 default:
7402 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7403 "Illegal argument \"%c\"\n", c);
7404 return (void*)1;
7405 }
7406 }
7407
7408 free(option_argv);
7409 }
7410 fprintf(stderr, MEMCACHED_ATOMIC_MSG);
7411 #else
7412 /* process arguments */
7413 while (-1 != (c = getopt(argc, argv,
7414 "a:" /* access mask for unix socket */
7415 "p:" /* TCP port number to listen on */
7416 "s:" /* unix socket path to listen on */
7417 "U:" /* UDP port number to listen on */
7418 "m:" /* max memory to use for items in megabytes */
7419 "M" /* return error on memory exhausted */
7420 "c:" /* max simultaneous connections */
7421 "k" /* lock down all paged memory */
7422 "hi" /* help, licence info */
7423 "r" /* maximize core file limit */
7424 "v" /* verbose */
7425 "d" /* daemon mode */
7426 "l:" /* interface to listen on */
7427 "u:" /* user identity to run as */
7428 "P:" /* save PID in file */
7429 "f:" /* factor? */
7430 "n:" /* minimum space allocated for key+value+flags */
7431 "t:" /* threads */
7432 "D:" /* prefix delimiter? */
7433 "L" /* Large memory pages */
7434 "R:" /* max requests per event */
7435 "C" /* Disable use of CAS */
7436 "b:" /* backlog queue limit */
7437 "B:" /* Binding protocol */
7438 "I:" /* Max item size */
7439 "S" /* Sasl ON */
7440 "E:" /* Engine to load */
7441 "e:" /* Engine options */
7442 "q" /* Disallow detailed stats */
7443 "X:" /* Load extension */
7444 ))) {
7445 switch (c) {
7446 case 'a':
7447 /* access for unix domain socket, as octal mask (like chmod)*/
7448 settings.access= strtol(optarg,NULL,8);
7449 break;
7450
7451 case 'U':
7452 settings.udpport = atoi(optarg);
7453 udp_specified = true;
7454 break;
7455 case 'p':
7456 settings.port = atoi(optarg);
7457 tcp_specified = true;
7458 break;
7459 case 's':
7460 settings.socketpath = optarg;
7461 break;
7462 case 'm':
7463 settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7464 old_opts += sprintf(old_opts, "cache_size=%lu;",
7465 (unsigned long)settings.maxbytes);
7466 break;
7467 case 'M':
7468 settings.evict_to_free = 0;
7469 old_opts += sprintf(old_opts, "eviction=false;");
7470 break;
7471 case 'c':
7472 settings.maxconns = atoi(optarg);
7473 break;
7474 case 'h':
7475 usage();
7476 exit(EXIT_SUCCESS);
7477 case 'i':
7478 usage_license();
7479 exit(EXIT_SUCCESS);
7480 case 'k':
7481 lock_memory = true;
7482 break;
7483 case 'v':
7484 settings.verbose++;
7485 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7486 break;
7487 case 'l':
7488 if (settings.inter != NULL) {
7489 size_t len = strlen(settings.inter) + strlen(optarg) + 2;
7490 char *p = malloc(len);
7491 if (p == NULL) {
7492 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7493 "Failed to allocate memory\n");
7494 return 1;
7495 }
7496 snprintf(p, len, "%s,%s", settings.inter, optarg);
7497 free(settings.inter);
7498 settings.inter = p;
7499 } else {
7500 settings.inter= strdup(optarg);
7501 }
7502 break;
7503 case 'd':
7504 do_daemonize = true;
7505 break;
7506 case 'r':
7507 maxcore = 1;
7508 break;
7509 case 'R':
7510 settings.reqs_per_event = atoi(optarg);
7511 if (settings.reqs_per_event <= 0) {
7512 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7513 "Number of requests per event must be greater than 0\n");
7514 return 1;
7515 }
7516 break;
7517 case 'u':
7518 username = optarg;
7519 break;
7520 case 'P':
7521 pid_file = optarg;
7522 break;
7523 case 'f':
7524 settings.factor = atof(optarg);
7525 if (settings.factor <= 1.0) {
7526 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7527 "Factor must be greater than 1\n");
7528 return 1;
7529 }
7530 old_opts += sprintf(old_opts, "factor=%f;",
7531 settings.factor);
7532 break;
7533 case 'n':
7534 settings.chunk_size = atoi(optarg);
7535 if (settings.chunk_size == 0) {
7536 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7537 "Chunk size must be greater than 0\n");
7538 return 1;
7539 }
7540 old_opts += sprintf(old_opts, "chunk_size=%u;",
7541 settings.chunk_size);
7542 break;
7543 case 't':
7544 settings.num_threads = atoi(optarg);
7545 if (settings.num_threads <= 0) {
7546 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7547 "Number of threads must be greater than 0\n");
7548 return 1;
7549 }
7550 /* There're other problems when you get above 64 threads.
7551 * In the future we should portably detect # of cores for the
7552 * default.
7553 */
7554 if (settings.num_threads > 64) {
7555 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7556 "WARNING: Setting a high number of worker"
7557 "threads is not recommended.\n"
7558 " Set this value to the number of cores in"
7559 " your machine or less.\n");
7560 }
7561 break;
7562 case 'D':
7563 settings.prefix_delimiter = optarg[0];
7564 settings.detail_enabled = 1;
7565 break;
7566 case 'L' :
7567 if (enable_large_pages() == 0) {
7568 preallocate = true;
7569 old_opts += sprintf(old_opts, "preallocate=true;");
7570 }
7571 break;
7572 case 'C' :
7573 settings.use_cas = false;
7574 break;
7575 case 'b' :
7576 settings.backlog = atoi(optarg);
7577 break;
7578 case 'B':
7579 protocol_specified = true;
7580 if (strcmp(optarg, "auto") == 0) {
7581 settings.binding_protocol = negotiating_prot;
7582 } else if (strcmp(optarg, "binary") == 0) {
7583 settings.binding_protocol = binary_prot;
7584 } else if (strcmp(optarg, "ascii") == 0) {
7585 settings.binding_protocol = ascii_prot;
7586 } else {
7587 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7588 "Invalid value for binding protocol: %s\n"
7589 " -- should be one of auto, binary, or ascii\n", optarg);
7590 exit(EX_USAGE);
7591 }
7592 break;
7593 case 'I':
7594 unit = optarg[strlen(optarg)-1];
7595 if (unit == 'k' || unit == 'm' ||
7596 unit == 'K' || unit == 'M') {
7597 optarg[strlen(optarg)-1] = '\0';
7598 size_max = atoi(optarg);
7599 if (unit == 'k' || unit == 'K')
7600 size_max *= 1024;
7601 if (unit == 'm' || unit == 'M')
7602 size_max *= 1024 * 1024;
7603 settings.item_size_max = size_max;
7604 } else {
7605 settings.item_size_max = atoi(optarg);
7606 }
7607 if (settings.item_size_max < 1024) {
7608 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7609 "Item max size cannot be less than 1024 bytes.\n");
7610 return 1;
7611 }
7612 if (settings.item_size_max > 1024 * 1024 * 128) {
7613 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7614 "Cannot set item size limit higher than 128 mb.\n");
7615 return 1;
7616 }
7617 if (settings.item_size_max > 1024 * 1024) {
7618 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7619 "WARNING: Setting item max size above 1MB is not"
7620 " recommended!\n"
7621 " Raising this limit increases the minimum memory requirements\n"
7622 " and will decrease your memory efficiency.\n"
7623 );
7624 }
7625 #ifndef __WIN32__
7626 old_opts += sprintf(old_opts, "item_size_max=%zu;",
7627 settings.item_size_max);
7628 #else
7629 old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7630 settings.item_size_max);
7631 #endif
7632 break;
7633 case 'E':
7634 engine = optarg;
7635 break;
7636 case 'e':
7637 engine_config = optarg;
7638 break;
7639 case 'q':
7640 settings.allow_detailed = false;
7641 break;
7642 case 'S': /* set Sasl authentication to true. Default is false */
7643 #ifndef SASL_ENABLED
7644 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7645 "This server is not built with SASL support.\n");
7646 exit(EX_USAGE);
7647 #endif
7648 settings.require_sasl = true;
7649 break;
7650 case 'X' :
7651 {
7652 char *ptr = strchr(optarg, ',');
7653 if (ptr != NULL) {
7654 *ptr = '\0';
7655 ++ptr;
7656 }
7657 if (!load_extension(optarg, ptr)) {
7658 exit(EXIT_FAILURE);
7659 }
7660 if (ptr != NULL) {
7661 *(ptr - 1) = ',';
7662 }
7663 }
7664 break;
7665 default:
7666 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7667 "Illegal argument \"%c\"\n", c);
7668 return 1;
7669 }
7670 }
7671 #endif /* INNODB_MEMCACHED */
7672
7673 if (getenv("MEMCACHED_REQS_TAP_EVENT") != NULL) {
7674 settings.reqs_per_tap_event = atoi(getenv("MEMCACHED_REQS_TAP_EVENT"));
7675 }
7676
7677 if (settings.reqs_per_tap_event <= 0) {
7678 settings.reqs_per_tap_event = DEFAULT_REQS_PER_TAP_EVENT;
7679 }
7680
7681
7682 if (install_sigterm_handler() != 0) {
7683 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7684 "Failed to install SIGTERM handler\n");
7685 exit(EXIT_FAILURE);
7686 }
7687
7688 char *topkeys_env = getenv("MEMCACHED_TOP_KEYS");
7689 if (topkeys_env != NULL) {
7690 settings.topkeys = atoi(topkeys_env);
7691 if (settings.topkeys < 0) {
7692 settings.topkeys = 0;
7693 }
7694 }
7695
7696 if (settings.require_sasl) {
7697 if (!protocol_specified) {
7698 settings.binding_protocol = binary_prot;
7699 } else {
7700 if (settings.binding_protocol == negotiating_prot) {
7701 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7702 "ERROR: You cannot use auto-negotiating protocol while requiring SASL.\n");
7703 exit(EX_USAGE);
7704 }
7705 if (settings.binding_protocol == ascii_prot) {
7706 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7707 "ERROR: You cannot use only ASCII protocol while requiring SASL.\n");
7708 exit(EX_USAGE);
7709 }
7710 }
7711 }
7712
7713 if (tcp_specified && !udp_specified) {
7714 settings.udpport = settings.port;
7715 } else if (udp_specified && !tcp_specified) {
7716 settings.port = settings.udpport;
7717 }
7718
7719 /*
7720 if (engine_config != NULL && strlen(old_options) > 0) {
7721 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7722 "ERROR: You can't mix -e with the old options\n");
7723 return (NULL);
7724 } else if (engine_config == NULL && strlen(old_options) > 0) {
7725 engine_config = old_options;
7726 } */
7727
7728 if (maxcore != 0) {
7729 struct rlimit rlim_new;
7730 /*
7731 * First try raising to infinity; if that fails, try bringing
7732 * the soft limit to the hard.
7733 */
7734 if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
7735 rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY;
7736 if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) {
7737 /* failed. try raising just to the old max */
7738 rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max;
7739 (void)setrlimit(RLIMIT_CORE, &rlim_new);
7740 }
7741 }
7742 /*
7743 * getrlimit again to see what we ended up with. Only fail if
7744 * the soft limit ends up 0, because then no core files will be
7745 * created at all.
7746 */
7747
7748 if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) {
7749 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7750 "failed to ensure corefile creation\n");
7751 exit(EX_OSERR);
7752 }
7753 }
7754
7755 /*
7756 * If needed, increase rlimits to allow as many connections
7757 * as needed.
7758 */
7759
7760 if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7761 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7762 "failed to getrlimit number of files\n");
7763 exit(EX_OSERR);
7764 } else {
7765 int maxfiles = settings.maxconns;
7766 if (rlim.rlim_cur < maxfiles)
7767 rlim.rlim_cur = maxfiles;
7768 if (rlim.rlim_max < rlim.rlim_cur)
7769 rlim.rlim_max = rlim.rlim_cur;
7770 if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7771 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7772 "failed to set rlimit for open files. Try running as"
7773 " root or requesting smaller maxconns value.\n");
7774 exit(EX_OSERR);
7775 }
7776 }
7777
7778 /* Sanity check for the connection structures */
7779 int nfiles = 0;
7780 if (settings.port != 0) {
7781 nfiles += 2;
7782 }
7783 if (settings.udpport != 0) {
7784 nfiles += settings.num_threads * 2;
7785 }
7786
7787 if (settings.maxconns <= nfiles) {
7788 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7789 "Configuratioin error. \n"
7790 "You specified %d connections, but the system will use at "
7791 "least %d\nconnection structures to start.\n",
7792 settings.maxconns, nfiles);
7793 exit(EX_USAGE);
7794 }
7795
7796 /* lose root privileges if we have them */
7797 if (getuid() == 0 || geteuid() == 0) {
7798 if (username == 0 || *username == '\0') {
7799 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7800 "can't run as root without the -u switch\n");
7801 exit(EX_USAGE);
7802 }
7803 if ((pw = getpwnam(username)) == 0) {
7804 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7805 "can't find the user %s to switch to\n", username);
7806 exit(EX_NOUSER);
7807 }
7808 if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) {
7809 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7810 "failed to assume identity of user %s: %s\n", username,
7811 strerror(errno));
7812 exit(EX_OSERR);
7813 }
7814 }
7815
7816 #ifdef SASL_ENABLED
7817 init_sasl();
7818 #endif /* SASL */
7819
7820 /* daemonize if requested */
7821 /* if we want to ensure our ability to dump core, don't chdir to / */
7822 if (do_daemonize) {
7823 if (sigignore(SIGHUP) == -1) {
7824 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7825 "Failed to ignore SIGHUP: ", strerror(errno));
7826 }
7827 if (daemonize(maxcore, settings.verbose) == -1) {
7828 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7829 "failed to daemon() in order to daemonize\n");
7830 exit(EXIT_FAILURE);
7831 }
7832 }
7833
7834 /* lock paged memory if needed */
7835 if (lock_memory) {
7836 #ifdef HAVE_MLOCKALL
7837 int res = mlockall(MCL_CURRENT | MCL_FUTURE);
7838 if (res != 0) {
7839 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7840 "warning: -k invalid, mlockall() failed: %s\n",
7841 strerror(errno));
7842 }
7843 #else
7844 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7845 "warning: -k invalid, mlockall() not supported on this platform. proceeding without.\n");
7846 #endif
7847 }
7848
7849 /* initialize main thread libevent instance */
7850 main_base = event_init();
7851
7852 /* Load the storage engine */
7853 ENGINE_HANDLE *engine_handle = NULL;
7854 if (!load_engine(engine,get_server_api,settings.extensions.logger,&engine_handle)) {
7855 /* Error already reported */
7856 exit(EXIT_FAILURE);
7857 }
7858
7859 if(!init_engine(engine_handle,engine_config,settings.extensions.logger)) {
7860 #ifdef INNODB_MEMCACHED
7861 shutdown_server();
7862 goto func_exit;
7863 #else
7864 return(false);
7865 #endif /* INNODB_MEMCACHED */
7866 }
7867
7868 if(settings.verbose > 0) {
7869 log_engine_details(engine_handle,settings.extensions.logger);
7870 }
7871 settings.engine.v1 = (ENGINE_HANDLE_V1 *) engine_handle;
7872
7873 if (settings.engine.v1->arithmetic == NULL) {
7874 settings.engine.v1->arithmetic = internal_arithmetic;
7875 }
7876
7877 /* initialize other stuff */
7878 stats_init();
7879
7880 if (!(conn_cache = cache_create("conn", sizeof(conn), sizeof(void*),
7881 conn_constructor, conn_destructor))) {
7882 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7883 "Failed to create connection cache\n");
7884 exit(EXIT_FAILURE);
7885 }
7886
7887 default_independent_stats = new_independent_stats();
7888
7889 #ifdef INNODB_MEMCACHED
7890 if (!default_independent_stats) {
7891 exit(EXIT_FAILURE);
7892 }
7893 #endif
7894
7895 #ifndef __WIN32__
7896 /*
7897 * ignore SIGPIPE signals; we can use errno == EPIPE if we
7898 * need that information
7899 */
7900 if (sigignore(SIGPIPE) == -1) {
7901 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7902 "failed to ignore SIGPIPE; sigaction");
7903 exit(EX_OSERR);
7904 }
7905 #endif
7906
7907 /* start up worker threads if MT mode */
7908 thread_init(settings.num_threads, main_base, dispatch_event_handler);
7909
7910 /* initialise clock event */
7911 clock_handler(0, 0, 0);
7912
7913 /* create unix mode sockets after dropping privileges */
7914 if (settings.socketpath != NULL) {
7915 if (server_socket_unix(settings.socketpath,settings.access)) {
7916 vperror("failed to listen on UNIX socket: %s", settings.socketpath);
7917 exit(EX_OSERR);
7918 }
7919 }
7920
7921 /* create the listening socket, bind it, and init */
7922 if (settings.socketpath == NULL) {
7923 int udp_port;
7924
7925 const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME");
7926 char temp_portnumber_filename[PATH_MAX];
7927 FILE *portnumber_file = NULL;
7928
7929 if (portnumber_filename != NULL) {
7930 snprintf(temp_portnumber_filename,
7931 sizeof(temp_portnumber_filename),
7932 "%s.lck", portnumber_filename);
7933
7934 portnumber_file = fopen(temp_portnumber_filename, "a");
7935 if (portnumber_file == NULL) {
7936 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7937 "Failed to open \"%s\": %s\n",
7938 temp_portnumber_filename, strerror(errno));
7939 }
7940 }
7941
7942 if (settings.port && server_sockets(settings.port, tcp_transport,
7943 portnumber_file)) {
7944 vperror("failed to listen on TCP port %d", settings.port);
7945 #ifdef INNODB_MEMCACHED
7946 shutdown_server();
7947 goto func_exit;
7948 #else
7949 exit(EX_OSERR);
7950 #endif /* INNODB_MEMCACHED */
7951 }
7952
7953 /*
7954 * initialization order: first create the listening sockets
7955 * (may need root on low ports), then drop root if needed,
7956 * then daemonise if needed, then init libevent (in some cases
7957 * descriptors created by libevent wouldn't survive forking).
7958 */
7959 udp_port = settings.udpport ? settings.udpport : settings.port;
7960
7961 /* create the UDP listening socket and bind it */
7962 if (settings.udpport && server_sockets(settings.udpport, udp_transport,
7963 portnumber_file)) {
7964 vperror("failed to listen on UDP port %d", settings.udpport);
7965 exit(EX_OSERR);
7966 }
7967
7968 if (portnumber_file) {
7969 fclose(portnumber_file);
7970 rename(temp_portnumber_filename, portnumber_filename);
7971 }
7972 }
7973
7974 if (pid_file != NULL) {
7975 save_pid(pid_file);
7976 }
7977
7978 /* Drop privileges no longer needed */
7979 drop_privileges();
7980
7981 memcached_initialized = 1;
7982
7983 /* enter the event loop */
7984 event_base_loop(main_base, 0);
7985
7986 if (settings.verbose) {
7987 settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
7988 "Initiating shutdown\n");
7989 }
7990
7991 func_exit:
7992
7993 if (settings.engine.v1)
7994 settings.engine.v1->destroy(settings.engine.v0, false);
7995
7996 threads_shutdown();
7997
7998 /* remove the PID file if we're a daemon */
7999 if (do_daemonize)
8000 remove_pidfile(pid_file);
8001 /* Clean up strdup() call for bind() address */
8002 if (settings.inter)
8003 free(settings.inter);
8004
8005 #ifdef INNODB_MEMCACHED
8006 /* free event base */
8007 if (main_base) {
8008 event_base_free(main_base);
8009 main_base = NULL;
8010 }
8011 #endif
8012
8013 memcached_shutdown = 2;
8014 memcached_initialized = 2;
8015
8016 return EXIT_SUCCESS;
8017 }
8018