1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * memcached - memory caching daemon
4 *
5 * http://www.danga.com/memcached/
6 * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
7 * Copyright 2003 Danga Interactive, Inc. All rights reserved.
8 * This file was modified by Oracle on 28-08-2015 and 23-03-2016.
9 * Modifications copyright (c) 2015, 2016, Oracle and/or its affiliates.
10 * All rights reserved.
11 *
12 * Use and distribution licensed under the BSD license. See
13 * the LICENSE file for full text.
14 *
15 * Authors:
16 * Anatoly Vorobey <mellon@pobox.com>
17 * Brad Fitzpatrick <brad@danga.com>
18 *
19 * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
20 */
21 #include "config.h"
22 #include "config_static.h"
23 #include "memcached.h"
24 #include "memcached/extension_loggers.h"
25 #include "utilities/engine_loader.h"
26
27 #include <signal.h>
28 #include <getopt.h>
29 #include <fcntl.h>
30 #include <errno.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <time.h>
35 #include <assert.h>
36 #include <limits.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #include <stddef.h>
40 #include <dlfcn.h>
41
42 #include "memcached_mysql.h"
43
44 #define INNODB_MEMCACHED
45 void my_thread_init();
46 void my_thread_end();
47
item_set_cas(const void * cookie,item * it,uint64_t cas)48 static inline void item_set_cas(const void *cookie, item *it, uint64_t cas) {
49 settings.engine.v1->item_set_cas(settings.engine.v0, cookie, it, cas);
50 }
51
52 /* The item must always be called "it" */
53 #define SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
54 thread_stats->slab_stats[info.clsid].slab_op++;
55
56 #define THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
57 thread_stats->thread_op++;
58
59 #define THREAD_GUTS2(conn, thread_stats, slab_op, thread_op) \
60 thread_stats->slab_op++; \
61 thread_stats->thread_op++;
62
63 #define SLAB_THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
64 SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
65 THREAD_GUTS(conn, thread_stats, slab_op, thread_op)
66
67 #define STATS_INCR1(GUTS, conn, slab_op, thread_op, key, nkey) { \
68 struct independent_stats *independent_stats = get_independent_stats(conn); \
69 struct thread_stats *thread_stats = \
70 &independent_stats->thread_stats[conn->thread->index]; \
71 topkeys_t *topkeys = independent_stats->topkeys; \
72 pthread_mutex_lock(&thread_stats->mutex); \
73 GUTS(conn, thread_stats, slab_op, thread_op); \
74 pthread_mutex_unlock(&thread_stats->mutex); \
75 TK(topkeys, slab_op, key, nkey, current_time); \
76 }
77
78 #define STATS_INCR(conn, op, key, nkey) \
79 STATS_INCR1(THREAD_GUTS, conn, op, op, key, nkey)
80
81 #define SLAB_INCR(conn, op, key, nkey) \
82 STATS_INCR1(SLAB_GUTS, conn, op, op, key, nkey)
83
84 #define STATS_TWO(conn, slab_op, thread_op, key, nkey) \
85 STATS_INCR1(THREAD_GUTS2, conn, slab_op, thread_op, key, nkey)
86
87 #define SLAB_TWO(conn, slab_op, thread_op, key, nkey) \
88 STATS_INCR1(SLAB_THREAD_GUTS, conn, slab_op, thread_op, key, nkey)
89
90 #define STATS_HIT(conn, op, key, nkey) \
91 SLAB_TWO(conn, op##_hits, cmd_##op, key, nkey)
92
93 #define STATS_MISS(conn, op, key, nkey) \
94 STATS_TWO(conn, op##_misses, cmd_##op, key, nkey)
95
96 #if defined(HAVE_GCC_SYNC_BUILTINS)
97
98 #define STATS_NOKEY(conn, op) \
99 do { \
100 struct thread_stats *thread_stats = \
101 get_thread_stats(conn); \
102 __sync_add_and_fetch(&thread_stats->op, 1); \
103 } while (0)
104
105 #define STATS_NOKEY2(conn, op1, op2) \
106 do { \
107 struct thread_stats *thread_stats = \
108 get_thread_stats(conn); \
109 __sync_add_and_fetch(&thread_stats->op1, 1); \
110 __sync_add_and_fetch(&thread_stats->op2, 1); \
111 } while (0)
112
113 #define STATS_ADD(conn, op, amt) \
114 do { \
115 struct thread_stats *thread_stats = \
116 get_thread_stats(conn); \
117 __sync_add_and_fetch(&thread_stats->op, amt); \
118 } while (0)
119
120 #define MEMCACHED_ATOMIC_MSG "InnoDB MEMCACHED: Memcached uses atomic increment \n"
121
122 #else /* HAVE_GCC_SYNC_BUILTINS */
123 #define STATS_NOKEY(conn, op) { \
124 struct thread_stats *thread_stats = \
125 get_thread_stats(conn); \
126 pthread_mutex_lock(&thread_stats->mutex); \
127 thread_stats->op++; \
128 pthread_mutex_unlock(&thread_stats->mutex); \
129 }
130
131 #define STATS_NOKEY2(conn, op1, op2) { \
132 struct thread_stats *thread_stats = \
133 get_thread_stats(conn); \
134 pthread_mutex_lock(&thread_stats->mutex); \
135 thread_stats->op1++; \
136 thread_stats->op2++; \
137 pthread_mutex_unlock(&thread_stats->mutex); \
138 }
139
140 #define STATS_ADD(conn, op, amt) { \
141 struct thread_stats *thread_stats = \
142 get_thread_stats(conn); \
143 pthread_mutex_lock(&thread_stats->mutex); \
144 thread_stats->op += amt; \
145 pthread_mutex_unlock(&thread_stats->mutex); \
146 }
147
148 #define MEMCACHED_ATOMIC_MSG "InnoDB Memcached: Memcached DOES NOT use atomic increment"
149 #endif /* HAVE_GCC_SYNC_BUILTINS */
150
151 volatile sig_atomic_t memcached_shutdown;
152 volatile sig_atomic_t memcached_initialized;
153
154 /*
155 * We keep the current time of day in a global variable that's updated by a
156 * timer event. This saves us a bunch of time() system calls (we really only
157 * need to get the time once a second, whereas there can be tens of thousands
158 * of requests a second) and allows us to use server-start-relative timestamps
159 * rather than absolute UNIX timestamps, a space savings on systems where
160 * sizeof(time_t) > sizeof(unsigned int).
161 */
162 volatile rel_time_t current_time;
163
164 /*
165 * forward declarations
166 */
167 static SOCKET new_socket(struct addrinfo *ai);
168 static int try_read_command(conn *c);
169 static inline struct independent_stats *get_independent_stats(conn *c);
170 static inline struct thread_stats *get_thread_stats(conn *c);
171 static void register_callback(ENGINE_HANDLE *eh,
172 ENGINE_EVENT_TYPE type,
173 EVENT_CALLBACK cb, const void *cb_data);
174 enum try_read_result {
175 READ_DATA_RECEIVED,
176 READ_NO_DATA_RECEIVED,
177 READ_ERROR, /** an error occurred (on the socket) (or client closed connection) */
178 READ_MEMORY_ERROR /** failed to allocate more memory */
179 };
180
181 static enum try_read_result try_read_network(conn *c);
182 static enum try_read_result try_read_udp(conn *c);
183
184 /* stats */
185 static void stats_init(void);
186 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate);
187 static void process_stat_settings(ADD_STAT add_stats, void *c);
188
189
190 /* defaults */
191 static void settings_init(void);
192
193 /* event handling, network IO */
194 static void event_handler(const int fd, const short which, void *arg);
195 static void complete_nread(conn *c);
196 static char *process_command(conn *c, char *command);
197 static void write_and_free(conn *c, char *buf, int bytes);
198 static int ensure_iov_space(conn *c);
199 static int add_iov(conn *c, const void *buf, int len);
200 static int add_msghdr(conn *c);
201
202
203 /* time handling */
204 static void set_current_time(void); /* update the global variable holding
205 global 32-bit seconds-since-start time
206 (to avoid 64 bit time_t) */
207
208 /** exported globals **/
209 struct stats stats;
210 struct settings settings;
211 static time_t process_started; /* when the process was started */
212
213 /** file scope variables **/
214 static conn *listen_conn = NULL;
215 static int udp_socket[100];
216 static int num_udp_socket;
217 static struct event_base *main_base;
218 static struct independent_stats *default_independent_stats;
219
220 static struct engine_event_handler *engine_event_handlers[MAX_ENGINE_EVENT_TYPE + 1];
221
222 enum transmit_result {
223 TRANSMIT_COMPLETE, /** All done writing. */
224 TRANSMIT_INCOMPLETE, /** More data remaining to write. */
225 TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */
226 TRANSMIT_HARD_ERROR /** Can't write (c->state is set to conn_closing) */
227 };
228
229 static enum transmit_result transmit(conn *c);
230
231 #define REALTIME_MAXDELTA 60*60*24*30
232
233 // Perform all callbacks of a given type for the given connection.
perform_callbacks(ENGINE_EVENT_TYPE type,const void * data,const void * c)234 static void perform_callbacks(ENGINE_EVENT_TYPE type,
235 const void *data,
236 const void *c) {
237 for (struct engine_event_handler *h = engine_event_handlers[type];
238 h; h = h->next) {
239 h->cb(c, type, data, h->cb_data);
240 }
241 }
242
243 /*
244 * given time value that's either unix time or delta from current unix time,
245 * return unix time. Use the fact that delta can't exceed one month
246 * (and real time value can't be that low).
247 */
realtime(const time_t exptime)248 static rel_time_t realtime(const time_t exptime) {
249 /* no. of seconds in 30 days - largest possible delta exptime */
250
251 if (exptime == 0) return 0; /* 0 means never expire */
252
253 if (exptime > REALTIME_MAXDELTA) {
254 /* if item expiration is at/before the server started, give it an
255 expiration time of 1 second after the server started.
256 (because 0 means don't expire). without this, we'd
257 underflow and wrap around to some large value way in the
258 future, effectively making items expiring in the past
259 really expiring never */
260 if (exptime <= process_started)
261 return (rel_time_t)1;
262 return (rel_time_t)(exptime - process_started);
263 } else {
264 return (rel_time_t)(exptime + current_time);
265 }
266 }
267
268 /**
269 * Convert the relative time to an absolute time (relative to EPOC ;) )
270 */
abstime(const rel_time_t exptime)271 static time_t abstime(const rel_time_t exptime)
272 {
273 return process_started + exptime;
274 }
275
stats_init(void)276 static void stats_init(void) {
277 stats.daemon_conns = 0;
278 stats.rejected_conns = 0;
279 stats.curr_conns = stats.total_conns = stats.conn_structs = 0;
280
281 stats_prefix_init();
282 }
283
stats_reset(const void * cookie)284 static void stats_reset(const void *cookie) {
285 struct conn *conn = (struct conn*)cookie;
286 STATS_LOCK();
287 stats.rejected_conns = 0;
288 stats.total_conns = 0;
289 stats_prefix_clear();
290 STATS_UNLOCK();
291 threadlocal_stats_reset(get_independent_stats(conn)->thread_stats);
292 settings.engine.v1->reset_stats(settings.engine.v0, cookie);
293 }
294
settings_init(void)295 static void settings_init(void) {
296 settings.use_cas = true;
297 settings.access = 0700;
298 settings.port = 11211;
299 settings.udpport = 11211;
300 /* By default this string should be NULL for getaddrinfo() */
301 settings.inter = NULL;
302 settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
303 settings.maxconns = 1000; /* to limit connections-related memory to about 5MB */
304 settings.verbose = 0;
305 settings.oldest_live = 0;
306 settings.evict_to_free = 1; /* push old items out of cache when memory runs out */
307 settings.socketpath = NULL; /* by default, not using a unix socket */
308 settings.factor = 1.25;
309 settings.chunk_size = 48; /* space for a modest key and value */
310 settings.num_threads = 4; /* N workers */
311 settings.num_threads_per_udp = 0;
312 settings.prefix_delimiter = ':';
313 settings.detail_enabled = 0;
314 settings.allow_detailed = true;
315 settings.reqs_per_event = DEFAULT_REQS_PER_EVENT;
316 settings.backlog = 1024;
317 settings.binding_protocol = negotiating_prot;
318 settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
319 settings.topkeys = 0;
320 settings.require_sasl = false;
321 settings.extensions.logger = get_stderr_logger();
322 }
323
324 /*
325 * Adds a message header to a connection.
326 *
327 * Returns 0 on success, -1 on out-of-memory.
328 */
add_msghdr(conn * c)329 static int add_msghdr(conn *c)
330 {
331 struct msghdr *msg;
332
333 assert(c != NULL);
334
335 if (c->msgsize == c->msgused) {
336 msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr));
337 if (! msg)
338 return -1;
339 c->msglist = msg;
340 c->msgsize *= 2;
341 }
342
343 msg = c->msglist + c->msgused;
344
345 /* this wipes msg_iovlen, msg_control, msg_controllen, and
346 msg_flags, the last 3 of which aren't defined on solaris: */
347 memset(msg, 0, sizeof(struct msghdr));
348
349 msg->msg_iov = &c->iov[c->iovused];
350
351 if (c->request_addr_size > 0) {
352 msg->msg_name = &c->request_addr;
353 msg->msg_namelen = c->request_addr_size;
354 }
355
356 c->msgbytes = 0;
357 c->msgused++;
358
359 if (IS_UDP(c->transport)) {
360 /* Leave room for the UDP header, which we'll fill in later. */
361 return add_iov(c, NULL, UDP_HEADER_SIZE);
362 }
363
364 return 0;
365 }
366
prot_text(enum protocol prot)367 static const char *prot_text(enum protocol prot) {
368 char *rv = "unknown";
369 switch(prot) {
370 case ascii_prot:
371 rv = "ascii";
372 break;
373 case binary_prot:
374 rv = "binary";
375 break;
376 case negotiating_prot:
377 rv = "auto-negotiate";
378 break;
379 }
380 return rv;
381 }
382
383 struct {
384 pthread_mutex_t mutex;
385 bool disabled;
386 ssize_t count;
387 uint64_t num_disable;
388 } listen_state;
389
is_listen_disabled(void)390 static bool is_listen_disabled(void) {
391 bool ret;
392 pthread_mutex_lock(&listen_state.mutex);
393 ret = listen_state.disabled;
394 pthread_mutex_unlock(&listen_state.mutex);
395 return ret;
396 }
397
get_listen_disabled_num(void)398 static uint64_t get_listen_disabled_num(void) {
399 uint64_t ret;
400 pthread_mutex_lock(&listen_state.mutex);
401 ret = listen_state.num_disable;
402 pthread_mutex_unlock(&listen_state.mutex);
403 return ret;
404 }
405
disable_listen(void)406 static void disable_listen(void) {
407 pthread_mutex_lock(&listen_state.mutex);
408 listen_state.disabled = true;
409 listen_state.count = 10;
410 ++listen_state.num_disable;
411 pthread_mutex_unlock(&listen_state.mutex);
412
413 conn *next;
414 for (next = listen_conn; next; next = next->next) {
415 update_event(next, 0);
416 if (listen(next->sfd, 1) != 0) {
417 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
418 "listen() failed",
419 strerror(errno));
420 }
421 }
422 }
423
safe_close(SOCKET sfd)424 void safe_close(SOCKET sfd) {
425 if (sfd != INVALID_SOCKET) {
426 int rval;
427 while ((rval = closesocket(sfd)) == SOCKET_ERROR &&
428 (errno == EINTR || errno == EAGAIN)) {
429 /* go ahead and retry */
430 }
431
432 if (rval == SOCKET_ERROR) {
433 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
434 "Failed to close socket %d (%s)!!\n", (int)sfd,
435 strerror(errno));
436 } else {
437 STATS_LOCK();
438 stats.curr_conns--;
439 STATS_UNLOCK();
440
441 if (is_listen_disabled()) {
442 notify_dispatcher();
443 }
444 }
445 }
446 }
447
448 /*
449 * Free list management for connections.
450 */
451 cache_t *conn_cache; /* suffix cache */
452
453 /**
454 * Reset all of the dynamic buffers used by a connection back to their
455 * default sizes. The strategy for resizing the buffers is to allocate a
456 * new one of the correct size and free the old one if the allocation succeeds
457 * instead of using realloc to change the buffer size (because realloc may
458 * not shrink the buffers, and will also copy the memory). If the allocation
459 * fails the buffer will be unchanged.
460 *
461 * @param c the connection to resize the buffers for
462 * @return true if all allocations succeeded, false if one or more of the
463 * allocations failed.
464 */
conn_reset_buffersize(conn * c)465 static bool conn_reset_buffersize(conn *c) {
466 bool ret = true;
467
468 if (c->rsize != DATA_BUFFER_SIZE) {
469 void *ptr = malloc(DATA_BUFFER_SIZE);
470 if (ptr != NULL) {
471 free(c->rbuf);
472 c->rbuf = ptr;
473 c->rsize = DATA_BUFFER_SIZE;
474 } else {
475 ret = false;
476 }
477 }
478
479 if (c->wsize != DATA_BUFFER_SIZE) {
480 void *ptr = malloc(DATA_BUFFER_SIZE);
481 if (ptr != NULL) {
482 free(c->wbuf);
483 c->wbuf = ptr;
484 c->wsize = DATA_BUFFER_SIZE;
485 } else {
486 ret = false;
487 }
488 }
489
490 if (c->isize != ITEM_LIST_INITIAL) {
491 void *ptr = malloc(sizeof(item *) * ITEM_LIST_INITIAL);
492 if (ptr != NULL) {
493 free(c->ilist);
494 c->ilist = ptr;
495 c->isize = ITEM_LIST_INITIAL;
496 } else {
497 ret = false;
498 }
499 }
500
501 if (c->suffixsize != SUFFIX_LIST_INITIAL) {
502 void *ptr = malloc(sizeof(char *) * SUFFIX_LIST_INITIAL);
503 if (ptr != NULL) {
504 free(c->suffixlist);
505 c->suffixlist = ptr;
506 c->suffixsize = SUFFIX_LIST_INITIAL;
507 } else {
508 ret = false;
509 }
510 }
511
512 if (c->iovsize != IOV_LIST_INITIAL) {
513 void *ptr = malloc(sizeof(struct iovec) * IOV_LIST_INITIAL);
514 if (ptr != NULL) {
515 free(c->iov);
516 c->iov = ptr;
517 c->iovsize = IOV_LIST_INITIAL;
518 } else {
519 ret = false;
520 }
521 }
522
523 if (c->msgsize != MSG_LIST_INITIAL) {
524 void *ptr = malloc(sizeof(struct msghdr) * MSG_LIST_INITIAL);
525 if (ptr != NULL) {
526 free(c->msglist);
527 c->msglist = ptr;
528 c->msgsize = MSG_LIST_INITIAL;
529 } else {
530 ret = false;
531 }
532 }
533
534 return ret;
535 }
536
537 /**
538 * Constructor for all memory allocations of connection objects. Initialize
539 * all members and allocate the transfer buffers.
540 *
541 * @param buffer The memory allocated by the object cache
542 * @param unused1 not used
543 * @param unused2 not used
544 * @return 0 on success, 1 if we failed to allocate memory
545 */
conn_constructor(void * buffer,void * unused1,int unused2)546 static int conn_constructor(void *buffer, void *unused1, int unused2) {
547 (void)unused1; (void)unused2;
548
549 conn *c = buffer;
550 memset(c, 0, sizeof(*c));
551 MEMCACHED_CONN_CREATE(c);
552
553 if (!conn_reset_buffersize(c)) {
554 free(c->rbuf);
555 free(c->wbuf);
556 free(c->ilist);
557 free(c->suffixlist);
558 free(c->iov);
559 free(c->msglist);
560 settings.extensions.logger->log(EXTENSION_LOG_WARNING,
561 NULL,
562 "Failed to allocate buffers for connection\n");
563 return 1;
564 }
565
566 STATS_LOCK();
567 stats.conn_structs++;
568 STATS_UNLOCK();
569
570 return 0;
571 }
572
573 /**
574 * Destructor for all connection objects. Release all allocated resources.
575 *
576 * @param buffer The memory allocated by the objec cache
577 * @param unused not used
578 */
conn_destructor(void * buffer,void * unused)579 static void conn_destructor(void *buffer, void *unused) {
580 (void)unused;
581 conn *c = buffer;
582 free(c->rbuf);
583 free(c->wbuf);
584 free(c->ilist);
585 free(c->suffixlist);
586 free(c->iov);
587 free(c->msglist);
588
589 STATS_LOCK();
590 stats.conn_structs--;
591 STATS_UNLOCK();
592 }
593
conn_new(const SOCKET sfd,STATE_FUNC init_state,const int event_flags,const int read_buffer_size,enum network_transport transport,struct event_base * base,struct timeval * timeout)594 conn *conn_new(const SOCKET sfd, STATE_FUNC init_state,
595 const int event_flags,
596 const int read_buffer_size, enum network_transport transport,
597 struct event_base *base, struct timeval *timeout) {
598 conn *c = cache_alloc(conn_cache);
599 if (c == NULL) {
600 return NULL;
601 }
602
603 assert(c->thread == NULL);
604
605 if (c->rsize < read_buffer_size) {
606 void *mem = malloc(read_buffer_size);
607 if (mem) {
608 c->rsize = read_buffer_size;
609 free(c->rbuf);
610 c->rbuf = mem;
611 } else {
612 assert(c->thread == NULL);
613 cache_free(conn_cache, c);
614 return NULL;
615 }
616 }
617
618 c->transport = transport;
619 c->protocol = settings.binding_protocol;
620
621 /* unix socket mode doesn't need this, so zeroed out. but why
622 * is this done for every command? presumably for UDP
623 * mode. */
624 if (!settings.socketpath) {
625 c->request_addr_size = sizeof(c->request_addr);
626 } else {
627 c->request_addr_size = 0;
628 }
629
630 if (settings.verbose > 1) {
631 if (init_state == conn_listening) {
632 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
633 "<%d server listening (%s)\n", sfd,
634 prot_text(c->protocol));
635 } else if (IS_UDP(transport)) {
636 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
637 "<%d server listening (udp)\n", sfd);
638 } else if (c->protocol == negotiating_prot) {
639 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
640 "<%d new auto-negotiating client connection\n",
641 sfd);
642 } else if (c->protocol == ascii_prot) {
643 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
644 "<%d new ascii client connection.\n", sfd);
645 } else if (c->protocol == binary_prot) {
646 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
647 "<%d new binary client connection.\n", sfd);
648 } else {
649 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
650 "<%d new unknown (%d) client connection\n",
651 sfd, c->protocol);
652 assert(false);
653 }
654 }
655
656 c->sfd = sfd;
657 c->state = init_state;
658 c->rlbytes = 0;
659 c->cmd = -1;
660 c->ascii_cmd = NULL;
661 c->rbytes = c->wbytes = 0;
662 c->wcurr = c->wbuf;
663 c->rcurr = c->rbuf;
664 c->ritem = 0;
665 c->icurr = c->ilist;
666 c->suffixcurr = c->suffixlist;
667 c->ileft = 0;
668 c->suffixleft = 0;
669 c->iovused = 0;
670 c->msgcurr = 0;
671 c->msgused = 0;
672 c->next = NULL;
673 c->list_state = 0;
674
675 c->write_and_go = init_state;
676 c->write_and_free = 0;
677 c->item = 0;
678
679 c->noreply = false;
680
681 event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
682 event_base_set(base, &c->event);
683 c->ev_flags = event_flags;
684
685 if (!register_event(c, timeout)) {
686 assert(c->thread == NULL);
687 cache_free(conn_cache, c);
688 return NULL;
689 }
690
691 STATS_LOCK();
692 stats.total_conns++;
693 STATS_UNLOCK();
694
695 c->aiostat = ENGINE_SUCCESS;
696 c->ewouldblock = false;
697 c->refcount = 1;
698
699 MEMCACHED_CONN_ALLOCATE(c->sfd);
700
701 perform_callbacks(ON_CONNECT, NULL, c);
702
703 return c;
704 }
705
conn_cleanup(conn * c)706 static void conn_cleanup(conn *c) {
707 assert(c != NULL);
708
709 if (c->item) {
710 settings.engine.v1->release(settings.engine.v0, c, c->item);
711 c->item = 0;
712 }
713
714 if (c->ileft != 0) {
715 for (; c->ileft > 0; c->ileft--,c->icurr++) {
716 settings.engine.v1->release(settings.engine.v0, c, *(c->icurr));
717 }
718 }
719
720 if (c->suffixleft != 0) {
721 for (; c->suffixleft > 0; c->suffixleft--, c->suffixcurr++) {
722 cache_free(c->thread->suffix_cache, *(c->suffixcurr));
723 }
724 }
725
726 if (c->write_and_free) {
727 free(c->write_and_free);
728 c->write_and_free = 0;
729 }
730
731 if (c->sasl_conn) {
732 sasl_dispose(&c->sasl_conn);
733 c->sasl_conn = NULL;
734 }
735
736 if (c->engine_storage) {
737 void* cleanup_data = c->engine_storage;
738 c->engine_storage = NULL;
739 settings.engine.v1->clean_engine(settings.engine.v0, c, cleanup_data);
740 }
741
742 c->tap_iterator = NULL;
743 c->thread = NULL;
744 assert(c->next == NULL);
745 c->ascii_cmd = NULL;
746 c->sfd = INVALID_SOCKET;
747 c->tap_nack_mode = false;
748 }
749
conn_close(conn * c)750 void conn_close(conn *c) {
751 assert(c != NULL);
752 assert(c->sfd == INVALID_SOCKET);
753
754 if (c->ascii_cmd != NULL) {
755 c->ascii_cmd->abort(c->ascii_cmd, c);
756 }
757
758 assert(c->thread);
759 LOCK_THREAD(c->thread);
760 /* remove from pending-io list */
761 if (settings.verbose > 1 && list_contains(c->thread->pending_io, c)) {
762 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
763 "Current connection was in the pending-io list.. Nuking it\n");
764 }
765 c->thread->pending_io = list_remove(c->thread->pending_io, c);
766 c->thread->pending_close = list_remove(c->thread->pending_close, c);
767 UNLOCK_THREAD(c->thread);
768
769 conn_cleanup(c);
770
771 /*
772 * The contract with the object cache is that we should return the
773 * object in a constructed state. Reset the buffers to the default
774 * size
775 */
776 conn_reset_buffersize(c);
777 assert(c->thread == NULL);
778 cache_free(conn_cache, c);
779 }
780
781 /*
782 * Shrinks a connection's buffers if they're too big. This prevents
783 * periodic large "get" requests from permanently chewing lots of server
784 * memory.
785 *
786 * This should only be called in between requests since it can wipe output
787 * buffers!
788 */
conn_shrink(conn * c)789 static void conn_shrink(conn *c) {
790 assert(c != NULL);
791
792 if (IS_UDP(c->transport))
793 return;
794
795 if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) {
796 char *newbuf;
797
798 if (c->rcurr != c->rbuf)
799 memmove(c->rbuf, c->rcurr, (size_t)c->rbytes);
800
801 newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE);
802
803 if (newbuf) {
804 c->rbuf = newbuf;
805 c->rsize = DATA_BUFFER_SIZE;
806 }
807 /* TODO check other branch... */
808 c->rcurr = c->rbuf;
809 }
810
811 if (c->isize > ITEM_LIST_HIGHWAT) {
812 item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0]));
813 if (newbuf) {
814 c->ilist = newbuf;
815 c->isize = ITEM_LIST_INITIAL;
816 }
817 /* TODO check error condition? */
818 }
819
820 if (c->msgsize > MSG_LIST_HIGHWAT) {
821 struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0]));
822 if (newbuf) {
823 c->msglist = newbuf;
824 c->msgsize = MSG_LIST_INITIAL;
825 }
826 /* TODO check error condition? */
827 }
828
829 if (c->iovsize > IOV_LIST_HIGHWAT) {
830 struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0]));
831 if (newbuf) {
832 c->iov = newbuf;
833 c->iovsize = IOV_LIST_INITIAL;
834 }
835 /* TODO check return value */
836 }
837 }
838
839 /**
840 * Convert a state name to a human readable form.
841 */
state_text(STATE_FUNC state)842 const char *state_text(STATE_FUNC state) {
843 if (state == conn_listening) {
844 return "conn_listening";
845 } else if (state == conn_new_cmd) {
846 return "conn_new_cmd";
847 } else if (state == conn_waiting) {
848 return "conn_waiting";
849 } else if (state == conn_read) {
850 return "conn_read";
851 } else if (state == conn_parse_cmd) {
852 return "conn_parse_cmd";
853 } else if (state == conn_write) {
854 return "conn_write";
855 } else if (state == conn_nread) {
856 return "conn_nread";
857 } else if (state == conn_swallow) {
858 return "conn_swallow";
859 } else if (state == conn_closing) {
860 return "conn_closing";
861 } else if (state == conn_mwrite) {
862 return "conn_mwrite";
863 } else if (state == conn_ship_log) {
864 return "conn_ship_log";
865 } else if (state == conn_add_tap_client) {
866 return "conn_add_tap_client";
867 } else if (state == conn_setup_tap_stream) {
868 return "conn_setup_tap_stream";
869 } else if (state == conn_pending_close) {
870 return "conn_pending_close";
871 } else if (state == conn_immediate_close) {
872 return "conn_immediate_close";
873 } else {
874 return "Unknown";
875 }
876 }
877
878 /*
879 * Sets a connection's current state in the state machine. Any special
880 * processing that needs to happen on certain state transitions can
881 * happen here.
882 */
conn_set_state(conn * c,STATE_FUNC state)883 void conn_set_state(conn *c, STATE_FUNC state) {
884 assert(c != NULL);
885
886 if (state != c->state) {
887 /*
888 * The connections in the "tap thread" behaves differently than
889 * normal connections because they operate in a full duplex mode.
890 * New messages may appear from both sides, so we can't block on
891 * read from the nework / engine
892 */
893 if (c->thread == tap_thread) {
894 if (state == conn_waiting) {
895 c->which = EV_WRITE;
896 state = conn_ship_log;
897 }
898 }
899
900 if (settings.verbose > 2 || c->state == conn_closing
901 || c->state == conn_add_tap_client) {
902 settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
903 "%d: going from %s to %s\n",
904 c->sfd, state_text(c->state),
905 state_text(state));
906 }
907
908 c->state = state;
909
910 if (state == conn_write || state == conn_mwrite) {
911 MEMCACHED_PROCESS_COMMAND_END(c->sfd, c->wbuf, c->wbytes);
912 }
913 }
914 }
915
916 /*
917 * Ensures that there is room for another struct iovec in a connection's
918 * iov list.
919 *
920 * Returns 0 on success, -1 on out-of-memory.
921 */
ensure_iov_space(conn * c)922 static int ensure_iov_space(conn *c) {
923 assert(c != NULL);
924
925 if (c->iovused >= c->iovsize) {
926 int i, iovnum;
927 struct iovec *new_iov = (struct iovec *)realloc(c->iov,
928 (c->iovsize * 2) * sizeof(struct iovec));
929 if (! new_iov)
930 return -1;
931 c->iov = new_iov;
932 c->iovsize *= 2;
933
934 /* Point all the msghdr structures at the new list. */
935 for (i = 0, iovnum = 0; i < c->msgused; i++) {
936 c->msglist[i].msg_iov = &c->iov[iovnum];
937 iovnum += c->msglist[i].msg_iovlen;
938 }
939 }
940
941 return 0;
942 }
943
944
945 /*
946 * Adds data to the list of pending data that will be written out to a
947 * connection.
948 *
949 * Returns 0 on success, -1 on out-of-memory.
950 */
951
add_iov(conn * c,const void * buf,int len)952 static int add_iov(conn *c, const void *buf, int len) {
953 struct msghdr *m;
954 int leftover;
955 bool limit_to_mtu;
956
957 assert(c != NULL);
958
959 do {
960 m = &c->msglist[c->msgused - 1];
961
962 /*
963 * Limit UDP packets, and the first payloads of TCP replies, to
964 * UDP_MAX_PAYLOAD_SIZE bytes.
965 */
966 limit_to_mtu = IS_UDP(c->transport) || (1 == c->msgused);
967
968 /* We may need to start a new msghdr if this one is full. */
969 if (m->msg_iovlen == IOV_MAX ||
970 (limit_to_mtu && c->msgbytes >= UDP_MAX_PAYLOAD_SIZE)) {
971 add_msghdr(c);
972 m = &c->msglist[c->msgused - 1];
973 }
974
975 if (ensure_iov_space(c) != 0)
976 return -1;
977
978 /* If the fragment is too big to fit in the datagram, split it up */
979 if (limit_to_mtu && len + c->msgbytes > UDP_MAX_PAYLOAD_SIZE) {
980 leftover = len + c->msgbytes - UDP_MAX_PAYLOAD_SIZE;
981 len -= leftover;
982 } else {
983 leftover = 0;
984 }
985
986 m = &c->msglist[c->msgused - 1];
987 m->msg_iov[m->msg_iovlen].iov_base = (void *)buf;
988 m->msg_iov[m->msg_iovlen].iov_len = len;
989
990 c->msgbytes += len;
991 c->iovused++;
992 m->msg_iovlen++;
993
994 buf = ((char *)buf) + len;
995 len = leftover;
996 } while (leftover > 0);
997
998 return 0;
999 }
1000
1001
1002 /*
1003 * Constructs a set of UDP headers and attaches them to the outgoing messages.
1004 */
build_udp_headers(conn * c)1005 static int build_udp_headers(conn *c) {
1006 int i;
1007 unsigned char *hdr;
1008
1009 assert(c != NULL);
1010
1011 if (c->msgused > c->hdrsize) {
1012 void *new_hdrbuf;
1013 if (c->hdrbuf)
1014 new_hdrbuf = realloc(c->hdrbuf, c->msgused * 2 * UDP_HEADER_SIZE);
1015 else
1016 new_hdrbuf = malloc(c->msgused * 2 * UDP_HEADER_SIZE);
1017 if (! new_hdrbuf)
1018 return -1;
1019 c->hdrbuf = (unsigned char *)new_hdrbuf;
1020 c->hdrsize = c->msgused * 2;
1021 }
1022
1023 hdr = c->hdrbuf;
1024 for (i = 0; i < c->msgused; i++) {
1025 c->msglist[i].msg_iov[0].iov_base = (void*)hdr;
1026 c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE;
1027 *hdr++ = c->request_id / 256;
1028 *hdr++ = c->request_id % 256;
1029 *hdr++ = i / 256;
1030 *hdr++ = i % 256;
1031 *hdr++ = c->msgused / 256;
1032 *hdr++ = c->msgused % 256;
1033 *hdr++ = 0;
1034 *hdr++ = 0;
1035 assert((void *) hdr == (caddr_t)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE);
1036 }
1037
1038 return 0;
1039 }
1040
1041
out_string(conn * c,const char * str)1042 static void out_string(conn *c, const char *str) {
1043 size_t len;
1044
1045 assert(c != NULL);
1046
1047 if (c->noreply) {
1048 if (settings.verbose > 1) {
1049 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1050 ">%d NOREPLY %s\n", c->sfd, str);
1051 }
1052 c->noreply = false;
1053 if (c->sbytes > 0) {
1054 conn_set_state(c, conn_swallow);
1055 } else {
1056 conn_set_state(c, conn_new_cmd);
1057 }
1058 return;
1059 }
1060
1061 if (settings.verbose > 1) {
1062 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1063 ">%d %s\n", c->sfd, str);
1064 }
1065
1066 /* Nuke a partial output... */
1067 c->msgcurr = 0;
1068 c->msgused = 0;
1069 c->iovused = 0;
1070 add_msghdr(c);
1071
1072 len = strlen(str);
1073 if ((len + 2) > c->wsize) {
1074 /* ought to be always enough. just fail for simplicity */
1075 str = "SERVER_ERROR output line too long";
1076 len = strlen(str);
1077 }
1078
1079 memcpy(c->wbuf, str, len);
1080 memcpy(c->wbuf + len, "\r\n", 2);
1081 c->wbytes = len + 2;
1082 c->wcurr = c->wbuf;
1083
1084 conn_set_state(c, conn_write);
1085
1086 if (c->sbytes > 0) {
1087 c->write_and_go = conn_swallow;
1088 } else {
1089 c->write_and_go = conn_new_cmd;
1090 }
1091
1092 return;
1093 }
1094
1095 /*
1096 * we get here after reading the value in set/add/replace commands. The command
1097 * has been stored in c->cmd, and the item is ready in c->item.
1098 */
complete_update_ascii(conn * c)1099 static void complete_update_ascii(conn *c) {
1100 assert(c != NULL);
1101
1102 item *it = c->item;
1103 item_info info = { .nvalue = 1 };
1104 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1105 settings.engine.v1->release(settings.engine.v0, c, it);
1106 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1107 "%d: Failed to get item info\n",
1108 c->sfd);
1109 out_string(c, "SERVER_ERROR failed to get item details");
1110 return;
1111 }
1112
1113 c->sbytes = 2; // swallow \r\n
1114 ENGINE_ERROR_CODE ret = c->aiostat;
1115 c->aiostat = ENGINE_SUCCESS;
1116 if (ret == ENGINE_SUCCESS) {
1117 ret = settings.engine.v1->store(settings.engine.v0, c, it, &c->cas,
1118 c->store_op, 0);
1119 }
1120
1121 #ifdef ENABLE_DTRACE
1122 switch (c->store_op) {
1123 case OPERATION_ADD:
1124 MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1125 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1126 break;
1127 case OPERATION_REPLACE:
1128 MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1129 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1130 break;
1131 case OPERATION_APPEND:
1132 MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1133 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1134 break;
1135 case OPERATION_PREPEND:
1136 MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1137 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1138 break;
1139 case OPERATION_SET:
1140 MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1141 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1142 break;
1143 case OPERATION_CAS:
1144 MEMCACHED_COMMAND_CAS(c->sfd, info.key, info.nkey, info.nbytes, c->cas);
1145 break;
1146 }
1147 #endif
1148
1149 switch (ret) {
1150 case ENGINE_SUCCESS:
1151 out_string(c, "STORED");
1152 break;
1153 case ENGINE_KEY_EEXISTS:
1154 out_string(c, "EXISTS");
1155 break;
1156 case ENGINE_KEY_ENOENT:
1157 out_string(c, "NOT_FOUND");
1158 break;
1159 case ENGINE_NOT_STORED:
1160 out_string(c, "NOT_STORED");
1161 break;
1162 case ENGINE_DISCONNECT:
1163 c->state = conn_closing;
1164 break;
1165 case ENGINE_ENOTSUP:
1166 out_string(c, "SERVER_ERROR not supported");
1167 break;
1168 case ENGINE_ENOMEM:
1169 out_string(c, "SERVER_ERROR out of memory");
1170 break;
1171 case ENGINE_TMPFAIL:
1172 out_string(c, "SERVER_ERROR temporary failure");
1173 break;
1174 case ENGINE_EINVAL:
1175 out_string(c, "CLIENT_ERROR invalid arguments");
1176 break;
1177 case ENGINE_E2BIG:
1178 out_string(c, "CLIENT_ERROR value too big");
1179 break;
1180 case ENGINE_EACCESS:
1181 out_string(c, "CLIENT_ERROR access control violation");
1182 break;
1183 case ENGINE_NOT_MY_VBUCKET:
1184 out_string(c, "SERVER_ERROR not my vbucket");
1185 break;
1186 case ENGINE_FAILED:
1187 out_string(c, "SERVER_ERROR failure");
1188 break;
1189 case ENGINE_EWOULDBLOCK:
1190 c->ewouldblock = true;
1191 break;
1192 case ENGINE_WANT_MORE:
1193 assert(false);
1194 c->state = conn_closing;
1195 break;
1196
1197 default:
1198 out_string(c, "SERVER_ERROR internal");
1199 }
1200
1201 if (c->store_op == OPERATION_CAS) {
1202 switch (ret) {
1203 case ENGINE_SUCCESS:
1204 SLAB_INCR(c, cas_hits, info.key, info.nkey);
1205 break;
1206 case ENGINE_KEY_EEXISTS:
1207 SLAB_INCR(c, cas_badval, info.key, info.nkey);
1208 break;
1209 case ENGINE_KEY_ENOENT:
1210 STATS_NOKEY(c, cas_misses);
1211 break;
1212 default:
1213 ;
1214 }
1215 } else {
1216 SLAB_INCR(c, cmd_set, info.key, info.nkey);
1217 }
1218
1219 if (!c->ewouldblock) {
1220 /* release the c->item reference */
1221 settings.engine.v1->release(settings.engine.v0, c, c->item);
1222 c->item = 0;
1223 }
1224 }
1225
1226 /**
1227 * get a pointer to the start of the request struct for the current command
1228 */
binary_get_request(conn * c)1229 static void* binary_get_request(conn *c) {
1230 char *ret = c->rcurr;
1231 ret -= (sizeof(c->binary_header) + c->binary_header.request.keylen +
1232 c->binary_header.request.extlen);
1233
1234 assert(ret >= c->rbuf);
1235 return ret;
1236 }
1237
1238 /**
1239 * get a pointer to the key in this request
1240 */
binary_get_key(conn * c)1241 static char* binary_get_key(conn *c) {
1242 return c->rcurr - (c->binary_header.request.keylen);
1243 }
1244
1245 /**
1246 * Insert a key into a buffer, but replace all non-printable characters
1247 * with a '.'.
1248 *
1249 * @param dest where to store the output
1250 * @param destsz size of destination buffer
1251 * @param prefix string to insert before the data
1252 * @param client the client we are serving
1253 * @param from_client set to true if this data is from the client
1254 * @param key the key to add to the buffer
1255 * @param nkey the number of bytes in the key
1256 * @return number of bytes in dest if success, -1 otherwise
1257 */
key_to_printable_buffer(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * key,size_t nkey)1258 static ssize_t key_to_printable_buffer(char *dest, size_t destsz,
1259 int client, bool from_client,
1260 const char *prefix,
1261 const char *key,
1262 size_t nkey)
1263 {
1264 ssize_t nw = snprintf(dest, destsz, "%c%d %s ", from_client ? '>' : '<',
1265 client, prefix);
1266 if (nw == -1) {
1267 return -1;
1268 }
1269
1270 char *ptr = dest + nw;
1271 destsz -= nw;
1272 if (nkey > destsz) {
1273 nkey = destsz;
1274 }
1275
1276 for (ssize_t ii = 0; ii < nkey; ++ii, ++key, ++ptr) {
1277 if (isgraph(*key)) {
1278 *ptr = *key;
1279 } else {
1280 *ptr = '.';
1281 }
1282 }
1283
1284 *ptr = '\0';
1285 return ptr - dest;
1286 }
1287
1288 /**
1289 * Convert a byte array to a text string
1290 *
1291 * @param dest where to store the output
1292 * @param destsz size of destination buffer
1293 * @param prefix string to insert before the data
1294 * @param client the client we are serving
1295 * @param from_client set to true if this data is from the client
1296 * @param data the data to add to the buffer
1297 * @param size the number of bytes in data to print
1298 * @return number of bytes in dest if success, -1 otherwise
1299 */
bytes_to_output_string(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * data,size_t size)1300 static ssize_t bytes_to_output_string(char *dest, size_t destsz,
1301 int client, bool from_client,
1302 const char *prefix,
1303 const char *data,
1304 size_t size)
1305 {
1306 ssize_t nw = snprintf(dest, destsz, "%c%d %s", from_client ? '>' : '<',
1307 client, prefix);
1308 if (nw == -1) {
1309 return -1;
1310 }
1311 ssize_t offset = nw;
1312
1313 for (ssize_t ii = 0; ii < size; ++ii) {
1314 if (ii % 4 == 0) {
1315 if ((nw = snprintf(dest + offset, destsz - offset, "\n%c%d ",
1316 from_client ? '>' : '<', client)) == -1) {
1317 return -1;
1318 }
1319 offset += nw;
1320 }
1321 if ((nw = snprintf(dest + offset, destsz - offset,
1322 " 0x%02x", (unsigned char)data[ii])) == -1) {
1323 return -1;
1324 }
1325 offset += nw;
1326 }
1327
1328 if ((nw = snprintf(dest + offset, destsz - offset, "\n")) == -1) {
1329 return -1;
1330 }
1331
1332 return offset + nw;
1333 }
1334
add_bin_header(conn * c,uint16_t err,uint8_t hdr_len,uint16_t key_len,uint32_t body_len)1335 static void add_bin_header(conn *c, uint16_t err, uint8_t hdr_len, uint16_t key_len, uint32_t body_len) {
1336 protocol_binary_response_header* header;
1337
1338 assert(c);
1339
1340 c->msgcurr = 0;
1341 c->msgused = 0;
1342 c->iovused = 0;
1343 if (add_msghdr(c) != 0) {
1344 /* XXX: out_string is inappropriate here */
1345 out_string(c, "SERVER_ERROR out of memory");
1346 return;
1347 }
1348
1349 header = (protocol_binary_response_header *)c->wbuf;
1350
1351 header->response.magic = (uint8_t)PROTOCOL_BINARY_RES;
1352 header->response.opcode = c->binary_header.request.opcode;
1353 header->response.keylen = (uint16_t)htons(key_len);
1354
1355 header->response.extlen = (uint8_t)hdr_len;
1356 header->response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES;
1357 header->response.status = (uint16_t)htons(err);
1358
1359 header->response.bodylen = htonl(body_len);
1360 header->response.opaque = c->opaque;
1361 header->response.cas = htonll(c->cas);
1362
1363 if (settings.verbose > 1) {
1364 char buffer[1024];
1365 if (bytes_to_output_string(buffer, sizeof(buffer), c->sfd, false,
1366 "Writing bin response:",
1367 (const char*)header->bytes,
1368 sizeof(header->bytes)) != -1) {
1369 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1370 "%s", buffer);
1371 }
1372 }
1373
1374 add_iov(c, c->wbuf, sizeof(header->response));
1375 }
1376
1377 /**
1378 * Convert an error code generated from the storage engine to the corresponding
1379 * error code used by the protocol layer.
1380 * @param e the error code as used in the engine
1381 * @return the error code as used by the protocol layer
1382 */
engine_error_2_protocol_error(ENGINE_ERROR_CODE e)1383 static protocol_binary_response_status engine_error_2_protocol_error(ENGINE_ERROR_CODE e) {
1384 protocol_binary_response_status ret;
1385
1386 switch (e) {
1387 case ENGINE_SUCCESS:
1388 return PROTOCOL_BINARY_RESPONSE_SUCCESS;
1389 case ENGINE_KEY_ENOENT:
1390 return PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1391 case ENGINE_KEY_EEXISTS:
1392 return PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1393 case ENGINE_ENOMEM:
1394 return PROTOCOL_BINARY_RESPONSE_ENOMEM;
1395 case ENGINE_TMPFAIL:
1396 return PROTOCOL_BINARY_RESPONSE_ETMPFAIL;
1397 case ENGINE_NOT_STORED:
1398 return PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1399 case ENGINE_EINVAL:
1400 return PROTOCOL_BINARY_RESPONSE_EINVAL;
1401 case ENGINE_ENOTSUP:
1402 return PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED;
1403 case ENGINE_E2BIG:
1404 return PROTOCOL_BINARY_RESPONSE_E2BIG;
1405 case ENGINE_NOT_MY_VBUCKET:
1406 return PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET;
1407 default:
1408 ret = PROTOCOL_BINARY_RESPONSE_EINTERNAL;
1409 }
1410
1411 return ret;
1412 }
1413
write_bin_packet(conn * c,protocol_binary_response_status err,int swallow)1414 static void write_bin_packet(conn *c, protocol_binary_response_status err, int swallow) {
1415 ssize_t len;
1416 char buffer[1024] = { [sizeof(buffer) - 1] = '\0' };
1417
1418 switch (err) {
1419 case PROTOCOL_BINARY_RESPONSE_SUCCESS:
1420 len = 0;
1421 break;
1422 case PROTOCOL_BINARY_RESPONSE_ENOMEM:
1423 len = snprintf(buffer, sizeof(buffer), "Out of memory");
1424 break;
1425 case PROTOCOL_BINARY_RESPONSE_ETMPFAIL:
1426 len = snprintf(buffer, sizeof(buffer), "Temporary failure");
1427 break;
1428 case PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND:
1429 len = snprintf(buffer, sizeof(buffer), "Unknown command");
1430 break;
1431 case PROTOCOL_BINARY_RESPONSE_KEY_ENOENT:
1432 len = snprintf(buffer, sizeof(buffer), "Not found");
1433 break;
1434 case PROTOCOL_BINARY_RESPONSE_EINVAL:
1435 len = snprintf(buffer, sizeof(buffer), "Invalid arguments");
1436 break;
1437 case PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS:
1438 len = snprintf(buffer, sizeof(buffer), "Data exists for key");
1439 break;
1440 case PROTOCOL_BINARY_RESPONSE_E2BIG:
1441 len = snprintf(buffer, sizeof(buffer), "Too large");
1442 break;
1443 case PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL:
1444 len = snprintf(buffer, sizeof(buffer),
1445 "Non-numeric server-side value for incr or decr");
1446 break;
1447 case PROTOCOL_BINARY_RESPONSE_NOT_STORED:
1448 len = snprintf(buffer, sizeof(buffer), "Not stored");
1449 break;
1450 case PROTOCOL_BINARY_RESPONSE_AUTH_ERROR:
1451 len = snprintf(buffer, sizeof(buffer), "Auth failure");
1452 break;
1453 case PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED:
1454 len = snprintf(buffer, sizeof(buffer), "Not supported");
1455 break;
1456 case PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET:
1457 len = snprintf(buffer, sizeof(buffer),
1458 "I'm not responsible for this vbucket");
1459 break;
1460
1461 default:
1462 len = snprintf(buffer, sizeof(buffer), "UNHANDLED ERROR (%d)", err);
1463 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1464 ">%d UNHANDLED ERROR: %d\n", c->sfd, err);
1465 }
1466
1467 /* Allow the engine to pass extra error information */
1468 if (settings.engine.v1->errinfo != NULL) {
1469 size_t elen = settings.engine.v1->errinfo(settings.engine.v0, c, buffer + len + 2,
1470 sizeof(buffer) - len - 3);
1471
1472 if (elen > 0) {
1473 memcpy(buffer + len, ": ", 2);
1474 len += elen + 2;
1475 }
1476 }
1477
1478 if (err != PROTOCOL_BINARY_RESPONSE_SUCCESS && settings.verbose > 1) {
1479 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1480 ">%d Writing an error: %s\n", c->sfd,
1481 buffer);
1482 }
1483
1484 add_bin_header(c, err, 0, 0, len);
1485 if (len > 0) {
1486 add_iov(c, buffer, len);
1487 }
1488 conn_set_state(c, conn_mwrite);
1489 if (swallow > 0) {
1490 c->sbytes = swallow;
1491 c->write_and_go = conn_swallow;
1492 } else {
1493 c->write_and_go = conn_new_cmd;
1494 }
1495 }
1496
1497 /* Form and send a response to a command over the binary protocol */
write_bin_response(conn * c,void * d,int hlen,int keylen,int dlen)1498 static void write_bin_response(conn *c, void *d, int hlen, int keylen, int dlen) {
1499 if (!c->noreply || c->cmd == PROTOCOL_BINARY_CMD_GET ||
1500 c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1501 add_bin_header(c, 0, hlen, keylen, dlen);
1502 if(dlen > 0) {
1503 add_iov(c, d, dlen);
1504 }
1505 conn_set_state(c, conn_mwrite);
1506 c->write_and_go = conn_new_cmd;
1507 } else {
1508 conn_set_state(c, conn_new_cmd);
1509 }
1510 }
1511
1512
complete_incr_bin(conn * c)1513 static void complete_incr_bin(conn *c) {
1514 protocol_binary_response_incr* rsp = (protocol_binary_response_incr*)c->wbuf;
1515 protocol_binary_request_incr* req = binary_get_request(c);
1516
1517 assert(c != NULL);
1518 assert(c->wsize >= sizeof(*rsp));
1519
1520 /* fix byteorder in the request */
1521 uint64_t delta = ntohll(req->message.body.delta);
1522 uint64_t initial = ntohll(req->message.body.initial);
1523 rel_time_t expiration = ntohl(req->message.body.expiration);
1524 char *key = binary_get_key(c);
1525 size_t nkey = c->binary_header.request.keylen;
1526 bool incr = (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT ||
1527 c->cmd == PROTOCOL_BINARY_CMD_INCREMENTQ);
1528
1529 if (settings.verbose > 1) {
1530 char buffer[1024];
1531 ssize_t nw;
1532 nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1533 incr ? "INCR" : "DECR", key, nkey);
1534 if (nw != -1) {
1535 if (snprintf(buffer + nw, sizeof(buffer) - nw,
1536 " %" PRIu64 ", %" PRIu64 ", %" PRIu64 "\n",
1537 delta, initial, (uint64_t)expiration) != -1) {
1538 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
1539 buffer);
1540 }
1541 }
1542 }
1543
1544 ENGINE_ERROR_CODE ret = c->aiostat;
1545 c->aiostat = ENGINE_SUCCESS;
1546 if (ret == ENGINE_SUCCESS) {
1547 ret = settings.engine.v1->arithmetic(settings.engine.v0,
1548 c, key, nkey, incr,
1549 req->message.body.expiration != 0xffffffff,
1550 delta, initial, expiration,
1551 &c->cas,
1552 &rsp->message.body.value,
1553 c->binary_header.request.vbucket);
1554 }
1555
1556 switch (ret) {
1557 case ENGINE_SUCCESS:
1558 rsp->message.body.value = htonll(rsp->message.body.value);
1559 write_bin_response(c, &rsp->message.body, 0, 0,
1560 sizeof (rsp->message.body.value));
1561 if (incr) {
1562 STATS_INCR(c, incr_hits, key, nkey);
1563 } else {
1564 STATS_INCR(c, decr_hits, key, nkey);
1565 }
1566 break;
1567 case ENGINE_KEY_EEXISTS:
1568 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1569 break;
1570 case ENGINE_KEY_ENOENT:
1571 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1572 if (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT) {
1573 STATS_INCR(c, incr_misses, key, nkey);
1574 } else {
1575 STATS_INCR(c, decr_misses, key, nkey);
1576 }
1577 break;
1578 case ENGINE_ENOMEM:
1579 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1580 break;
1581 case ENGINE_TMPFAIL:
1582 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1583 break;
1584 case ENGINE_EINVAL:
1585 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL, 0);
1586 break;
1587 case ENGINE_NOT_STORED:
1588 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_STORED, 0);
1589 break;
1590 case ENGINE_DISCONNECT:
1591 c->state = conn_closing;
1592 break;
1593 case ENGINE_ENOTSUP:
1594 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1595 break;
1596 case ENGINE_NOT_MY_VBUCKET:
1597 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1598 break;
1599 case ENGINE_EWOULDBLOCK:
1600 c->ewouldblock = true;
1601 break;
1602 default:
1603 abort();
1604 }
1605 }
1606
complete_update_bin(conn * c)1607 static void complete_update_bin(conn *c) {
1608 protocol_binary_response_status eno = PROTOCOL_BINARY_RESPONSE_EINVAL;
1609 assert(c != NULL);
1610
1611 item *it = c->item;
1612 item_info info = { .nvalue = 1 };
1613 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1614 settings.engine.v1->release(settings.engine.v0, c, it);
1615 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1616 "%d: Failed to get item info\n",
1617 c->sfd);
1618 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1619 return;
1620 }
1621
1622 ENGINE_ERROR_CODE ret = c->aiostat;
1623 c->aiostat = ENGINE_SUCCESS;
1624 if (ret == ENGINE_SUCCESS) {
1625 ret = settings.engine.v1->store(settings.engine.v0, c,
1626 it, &c->cas, c->store_op,
1627 c->binary_header.request.vbucket);
1628 }
1629
1630 #ifdef ENABLE_DTRACE
1631 switch (c->cmd) {
1632 case OPERATION_ADD:
1633 MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1634 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1635 break;
1636 case OPERATION_REPLACE:
1637 MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1638 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1639 break;
1640 case OPERATION_APPEND:
1641 MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1642 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1643 break;
1644 case OPERATION_PREPEND:
1645 MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1646 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1647 break;
1648 case OPERATION_SET:
1649 MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1650 (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1651 break;
1652 }
1653 #endif
1654
1655 switch (ret) {
1656 case ENGINE_SUCCESS:
1657 /* Stored */
1658 write_bin_response(c, NULL, 0, 0, 0);
1659 break;
1660 case ENGINE_KEY_EEXISTS:
1661 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1662 break;
1663 case ENGINE_KEY_ENOENT:
1664 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1665 break;
1666 case ENGINE_ENOMEM:
1667 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1668 break;
1669 case ENGINE_TMPFAIL:
1670 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1671 break;
1672 case ENGINE_EWOULDBLOCK:
1673 c->ewouldblock = true;
1674 break;
1675 case ENGINE_DISCONNECT:
1676 c->state = conn_closing;
1677 break;
1678 case ENGINE_ENOTSUP:
1679 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1680 break;
1681 case ENGINE_NOT_MY_VBUCKET:
1682 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1683 break;
1684 default:
1685 if (c->store_op == OPERATION_ADD) {
1686 eno = PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1687 } else if(c->store_op == OPERATION_REPLACE) {
1688 eno = PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1689 } else {
1690 eno = PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1691 }
1692 write_bin_packet(c, eno, 0);
1693 }
1694
1695 if (c->store_op == OPERATION_CAS) {
1696 switch (ret) {
1697 case ENGINE_SUCCESS:
1698 SLAB_INCR(c, cas_hits, info.key, info.nkey);
1699 break;
1700 case ENGINE_KEY_EEXISTS:
1701 SLAB_INCR(c, cas_badval, info.key, info.nkey);
1702 break;
1703 case ENGINE_KEY_ENOENT:
1704 STATS_NOKEY(c, cas_misses);
1705 break;
1706 default:
1707 ;
1708 }
1709 } else {
1710 SLAB_INCR(c, cmd_set, info.key, info.nkey);
1711 }
1712
1713 if (!c->ewouldblock) {
1714 /* release the c->item reference */
1715 settings.engine.v1->release(settings.engine.v0, c, c->item);
1716 c->item = 0;
1717 }
1718 }
1719
process_bin_get(conn * c)1720 static void process_bin_get(conn *c) {
1721 item *it = NULL;
1722
1723 protocol_binary_response_get* rsp = (protocol_binary_response_get*)c->wbuf;
1724 char* key = binary_get_key(c);
1725 size_t nkey = c->binary_header.request.keylen;
1726
1727 if (settings.verbose > 1) {
1728 char buffer[1024];
1729 if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1730 "GET", key, nkey) != -1) {
1731 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1732 buffer);
1733 }
1734 }
1735
1736 ENGINE_ERROR_CODE ret = c->aiostat;
1737 c->aiostat = ENGINE_SUCCESS;
1738 if (ret == ENGINE_SUCCESS) {
1739 ret = settings.engine.v1->get(settings.engine.v0, c, &it, key, nkey,
1740 c->binary_header.request.vbucket);
1741 }
1742
1743 uint16_t keylen;
1744 uint32_t bodylen;
1745 item_info info = { .nvalue = 1 };
1746
1747 switch (ret) {
1748 case ENGINE_SUCCESS:
1749 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1750 settings.engine.v1->release(settings.engine.v0, c, it);
1751 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1752 "%d: Failed to get item info\n",
1753 c->sfd);
1754 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1755 break;
1756 }
1757
1758 keylen = 0;
1759 bodylen = sizeof(rsp->message.body) + info.nbytes;
1760
1761 STATS_HIT(c, get, key, nkey);
1762
1763 if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1764 bodylen += nkey;
1765 keylen = nkey;
1766 }
1767 add_bin_header(c, 0, sizeof(rsp->message.body), keylen, bodylen);
1768 rsp->message.header.response.cas = htonll(info.cas);
1769
1770 // add the flags
1771 rsp->message.body.flags = info.flags;
1772 add_iov(c, &rsp->message.body, sizeof(rsp->message.body));
1773
1774 if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1775 add_iov(c, info.key, nkey);
1776 }
1777
1778 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
1779 conn_set_state(c, conn_mwrite);
1780 /* Remember this item so we can garbage collect it later */
1781 c->item = it;
1782 break;
1783 case ENGINE_KEY_ENOENT:
1784 STATS_MISS(c, get, key, nkey);
1785
1786 MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
1787
1788 if (c->noreply) {
1789 conn_set_state(c, conn_new_cmd);
1790 } else {
1791 if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1792 char *ofs = c->wbuf + sizeof(protocol_binary_response_header);
1793 add_bin_header(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT,
1794 0, nkey, nkey);
1795 memcpy(ofs, key, nkey);
1796 add_iov(c, ofs, nkey);
1797 conn_set_state(c, conn_mwrite);
1798 } else {
1799 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1800 }
1801 }
1802 break;
1803 case ENGINE_EWOULDBLOCK:
1804 c->ewouldblock = true;
1805 break;
1806 case ENGINE_DISCONNECT:
1807 c->state = conn_closing;
1808 break;
1809 case ENGINE_TMPFAIL:
1810 break;
1811 case ENGINE_ENOTSUP:
1812 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1813 break;
1814 case ENGINE_NOT_MY_VBUCKET:
1815 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1816 break;
1817 default:
1818 /* @todo add proper error handling! */
1819 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1820 "Unknown error code: %d\n", ret);
1821 abort();
1822 }
1823
1824 if (settings.detail_enabled && ret != ENGINE_EWOULDBLOCK) {
1825 stats_prefix_record_get(key, nkey, ret == ENGINE_SUCCESS);
1826 }
1827 }
1828
append_bin_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1829 static void append_bin_stats(const char *key, const uint16_t klen,
1830 const char *val, const uint32_t vlen,
1831 conn *c) {
1832 char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1833 uint32_t bodylen = klen + vlen;
1834 protocol_binary_response_header header = {
1835 .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
1836 .response.opcode = PROTOCOL_BINARY_CMD_STAT,
1837 .response.keylen = (uint16_t)htons(klen),
1838 .response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES,
1839 .response.bodylen = htonl(bodylen),
1840 .response.opaque = c->opaque
1841 };
1842
1843 memcpy(buf, header.bytes, sizeof(header.response));
1844 buf += sizeof(header.response);
1845
1846 if (klen > 0) {
1847 memcpy(buf, key, klen);
1848 buf += klen;
1849
1850 if (vlen > 0) {
1851 memcpy(buf, val, vlen);
1852 }
1853 }
1854
1855 c->dynamic_buffer.offset += sizeof(header.response) + bodylen;
1856 }
1857
1858 /**
1859 * Append a key-value pair to the stats output buffer. This function assumes
1860 * that the output buffer is big enough (it will be if you call it through
1861 * append_stats)
1862 */
append_ascii_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1863 static void append_ascii_stats(const char *key, const uint16_t klen,
1864 const char *val, const uint32_t vlen,
1865 conn *c) {
1866 char *pos = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1867 uint32_t nbytes = 5; /* "END\r\n" or "STAT " */
1868
1869 if (klen == 0 && vlen == 0) {
1870 memcpy(pos, "END\r\n", 5);
1871 } else {
1872 memcpy(pos, "STAT ", 5);
1873 memcpy(pos + nbytes, key, klen);
1874 nbytes += klen;
1875 if (vlen != 0) {
1876 pos[nbytes] = ' ';
1877 ++nbytes;
1878 memcpy(pos + nbytes, val, vlen);
1879 nbytes += vlen;
1880 }
1881 memcpy(pos + nbytes, "\r\n", 2);
1882 nbytes += 2;
1883 }
1884
1885 c->dynamic_buffer.offset += nbytes;
1886 }
1887
grow_dynamic_buffer(conn * c,size_t needed)1888 static bool grow_dynamic_buffer(conn *c, size_t needed) {
1889 size_t nsize = c->dynamic_buffer.size;
1890 size_t available = nsize - c->dynamic_buffer.offset;
1891 bool rv = true;
1892
1893 /* Special case: No buffer -- need to allocate fresh */
1894 if (c->dynamic_buffer.buffer == NULL) {
1895 nsize = 1024;
1896 available = c->dynamic_buffer.size = c->dynamic_buffer.offset = 0;
1897 }
1898
1899 while (needed > available) {
1900 assert(nsize > 0);
1901 nsize = nsize << 1;
1902 available = nsize - c->dynamic_buffer.offset;
1903 }
1904
1905 if (nsize != c->dynamic_buffer.size) {
1906 char *ptr = realloc(c->dynamic_buffer.buffer, nsize);
1907 if (ptr) {
1908 c->dynamic_buffer.buffer = ptr;
1909 c->dynamic_buffer.size = nsize;
1910 } else {
1911 rv = false;
1912 }
1913 }
1914
1915 return rv;
1916 }
1917
append_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,const void * cookie)1918 static void append_stats(const char *key, const uint16_t klen,
1919 const char *val, const uint32_t vlen,
1920 const void *cookie)
1921 {
1922 /* value without a key is invalid */
1923 if (klen == 0 && vlen > 0) {
1924 return ;
1925 }
1926
1927 conn *c = (conn*)cookie;
1928
1929 if (c->protocol == binary_prot) {
1930 size_t needed = vlen + klen + sizeof(protocol_binary_response_header);
1931 if (!grow_dynamic_buffer(c, needed)) {
1932 return ;
1933 }
1934 append_bin_stats(key, klen, val, vlen, c);
1935 } else {
1936 size_t needed = vlen + klen + 10; // 10 == "STAT = \r\n"
1937 if (!grow_dynamic_buffer(c, needed)) {
1938 return ;
1939 }
1940 append_ascii_stats(key, klen, val, vlen, c);
1941 }
1942
1943 assert(c->dynamic_buffer.offset <= c->dynamic_buffer.size);
1944 }
1945
process_bin_stat(conn * c)1946 static void process_bin_stat(conn *c) {
1947 char *subcommand = binary_get_key(c);
1948 size_t nkey = c->binary_header.request.keylen;
1949
1950 if (settings.verbose > 1) {
1951 char buffer[1024];
1952 if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1953 "STATS", subcommand, nkey) != -1) {
1954 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1955 buffer);
1956 }
1957 }
1958
1959 ENGINE_ERROR_CODE ret = c->aiostat;
1960 c->aiostat = ENGINE_SUCCESS;
1961 c->ewouldblock = false;
1962
1963 if (ret == ENGINE_SUCCESS) {
1964 if (nkey == 0) {
1965 /* request all statistics */
1966 ret = settings.engine.v1->get_stats(settings.engine.v0, c, NULL, 0, append_stats);
1967 if (ret == ENGINE_SUCCESS) {
1968 server_stats(&append_stats, c, false);
1969 }
1970 } else if (strncmp(subcommand, "reset", 5) == 0) {
1971 stats_reset(c);
1972 settings.engine.v1->reset_stats(settings.engine.v0, c);
1973 } else if (strncmp(subcommand, "settings", 8) == 0) {
1974 process_stat_settings(&append_stats, c);
1975 } else if (strncmp(subcommand, "detail", 6) == 0) {
1976 char *subcmd_pos = subcommand + 6;
1977 if (settings.allow_detailed) {
1978 if (strncmp(subcmd_pos, " dump", 5) == 0) {
1979 int len;
1980 char *dump_buf = stats_prefix_dump(&len);
1981 if (dump_buf == NULL || len <= 0) {
1982 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1983 return ;
1984 } else {
1985 append_stats("detailed", strlen("detailed"), dump_buf, len, c);
1986 free(dump_buf);
1987 }
1988 } else if (strncmp(subcmd_pos, " on", 3) == 0) {
1989 settings.detail_enabled = 1;
1990 } else if (strncmp(subcmd_pos, " off", 4) == 0) {
1991 settings.detail_enabled = 0;
1992 } else {
1993 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1994 return;
1995 }
1996 } else {
1997 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1998 return;
1999 }
2000 } else if (strncmp(subcommand, "aggregate", 9) == 0) {
2001 server_stats(&append_stats, c, true);
2002 } else if (strncmp(subcommand, "topkeys", 7) == 0) {
2003 topkeys_t *tk = get_independent_stats(c)->topkeys;
2004 if (tk != NULL) {
2005 topkeys_stats(tk, c, current_time, append_stats);
2006 } else {
2007 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2008 return;
2009 }
2010 } else {
2011 ret = settings.engine.v1->get_stats(settings.engine.v0, c,
2012 subcommand, nkey,
2013 append_stats);
2014 }
2015 }
2016
2017 switch (ret) {
2018 case ENGINE_SUCCESS:
2019 append_stats(NULL, 0, NULL, 0, c);
2020 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2021 c->dynamic_buffer.buffer = NULL;
2022 break;
2023 case ENGINE_ENOMEM:
2024 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
2025 break;
2026 case ENGINE_TMPFAIL:
2027 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
2028 break;
2029 case ENGINE_KEY_ENOENT:
2030 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2031 break;
2032 case ENGINE_DISCONNECT:
2033 c->state = conn_closing;
2034 break;
2035 case ENGINE_ENOTSUP:
2036 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2037 break;
2038 case ENGINE_EWOULDBLOCK:
2039 c->ewouldblock = true;
2040 break;
2041 default:
2042 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2043 }
2044 }
2045
bin_read_chunk(conn * c,enum bin_substates next_substate,uint32_t chunk)2046 static void bin_read_chunk(conn *c, enum bin_substates next_substate, uint32_t chunk) {
2047 assert(c);
2048 c->substate = next_substate;
2049 c->rlbytes = chunk;
2050
2051 /* Ok... do we have room for everything in our buffer? */
2052 ptrdiff_t offset = c->rcurr + sizeof(protocol_binary_request_header) - c->rbuf;
2053 if (c->rlbytes > c->rsize - offset) {
2054 size_t nsize = c->rsize;
2055 size_t size = c->rlbytes + sizeof(protocol_binary_request_header);
2056
2057 while (size > nsize) {
2058 nsize *= 2;
2059 }
2060
2061 if (nsize != c->rsize) {
2062 if (settings.verbose > 1) {
2063 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2064 "%d: Need to grow buffer from %lu to %lu\n",
2065 c->sfd, (unsigned long)c->rsize, (unsigned long)nsize);
2066 }
2067 char *newm = realloc(c->rbuf, nsize);
2068 if (newm == NULL) {
2069 if (settings.verbose) {
2070 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2071 "%d: Failed to grow buffer.. closing connection\n",
2072 c->sfd);
2073 }
2074 conn_set_state(c, conn_closing);
2075 return;
2076 }
2077
2078 c->rbuf= newm;
2079 /* rcurr should point to the same offset in the packet */
2080 c->rcurr = c->rbuf + offset - sizeof(protocol_binary_request_header);
2081 c->rsize = nsize;
2082 }
2083 if (c->rbuf != c->rcurr) {
2084 memmove(c->rbuf, c->rcurr, c->rbytes);
2085 c->rcurr = c->rbuf;
2086 if (settings.verbose > 1) {
2087 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2088 "%d: Repack input buffer\n",
2089 c->sfd);
2090 }
2091 }
2092 }
2093
2094 /* preserve the header in the buffer.. */
2095 c->ritem = c->rcurr + sizeof(protocol_binary_request_header);
2096 conn_set_state(c, conn_nread);
2097 }
2098
bin_read_key(conn * c,enum bin_substates next_substate,int extra)2099 static void bin_read_key(conn *c, enum bin_substates next_substate, int extra) {
2100 bin_read_chunk(c, next_substate, c->keylen + extra);
2101 }
2102
2103
2104 /* Just write an error message and disconnect the client */
handle_binary_protocol_error(conn * c)2105 static void handle_binary_protocol_error(conn *c) {
2106 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2107 if (settings.verbose) {
2108 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2109 "%d: Protocol error (opcode %02x), close connection\n",
2110 c->sfd, c->binary_header.request.opcode);
2111 }
2112 c->write_and_go = conn_closing;
2113 }
2114
init_sasl_conn(conn * c)2115 static void init_sasl_conn(conn *c) {
2116 assert(c);
2117 if (!c->sasl_conn) {
2118 int result=sasl_server_new("memcached",
2119 NULL, NULL, NULL, NULL,
2120 NULL, 0, &c->sasl_conn);
2121 if (result != SASL_OK) {
2122 if (settings.verbose) {
2123 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2124 "%d: Failed to initialize SASL conn.\n",
2125 c->sfd);
2126 }
2127 c->sasl_conn = NULL;
2128 }
2129 }
2130 }
2131
get_auth_data(const void * cookie,auth_data_t * data)2132 static void get_auth_data(const void *cookie, auth_data_t *data) {
2133 conn *c = (conn*)cookie;
2134 if (c->sasl_conn) {
2135 sasl_getprop(c->sasl_conn, SASL_USERNAME, (void*)&data->username);
2136 #ifdef ENABLE_ISASL
2137 sasl_getprop(c->sasl_conn, ISASL_CONFIG, (void*)&data->config);
2138 #endif
2139 }
2140 }
2141
2142 #ifdef SASL_ENABLED
bin_list_sasl_mechs(conn * c)2143 static void bin_list_sasl_mechs(conn *c) {
2144 init_sasl_conn(c);
2145 const char *result_string = NULL;
2146 unsigned int string_length = 0;
2147 int result=sasl_listmech(c->sasl_conn, NULL,
2148 "", /* What to prepend the string with */
2149 " ", /* What to separate mechanisms with */
2150 "", /* What to append to the string */
2151 &result_string, &string_length,
2152 NULL);
2153 if (result != SASL_OK) {
2154 /* Perhaps there's a better error for this... */
2155 if (settings.verbose) {
2156 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2157 "%d: Failed to list SASL mechanisms.\n",
2158 c->sfd);
2159 }
2160 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2161 return;
2162 }
2163 write_bin_response(c, (char*)result_string, 0, 0, string_length);
2164 }
2165 #endif
2166
2167 struct sasl_tmp {
2168 int ksize;
2169 int vsize;
2170 char data[]; /* data + ksize == value */
2171 };
2172
process_bin_sasl_auth(conn * c)2173 static void process_bin_sasl_auth(conn *c) {
2174 assert(c->binary_header.request.extlen == 0);
2175
2176 int nkey = c->binary_header.request.keylen;
2177 int vlen = c->binary_header.request.bodylen - nkey;
2178
2179 if (nkey > MAX_SASL_MECH_LEN) {
2180 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, vlen);
2181 c->write_and_go = conn_swallow;
2182 return;
2183 }
2184
2185 char *key = binary_get_key(c);
2186 assert(key);
2187
2188 size_t buffer_size = sizeof(struct sasl_tmp) + nkey + vlen + 2;
2189 struct sasl_tmp *data = calloc(sizeof(struct sasl_tmp) + buffer_size, 1);
2190 if (!data) {
2191 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
2192 c->write_and_go = conn_swallow;
2193 return;
2194 }
2195
2196 data->ksize = nkey;
2197 data->vsize = vlen;
2198 memcpy(data->data, key, nkey);
2199
2200 c->item = data;
2201 c->ritem = data->data + nkey;
2202 c->rlbytes = vlen;
2203 conn_set_state(c, conn_nread);
2204 c->substate = bin_reading_sasl_auth_data;
2205 }
2206
process_bin_complete_sasl_auth(conn * c)2207 static void process_bin_complete_sasl_auth(conn *c) {
2208 const char *out = NULL;
2209 unsigned int outlen = 0;
2210
2211 assert(c->item);
2212 init_sasl_conn(c);
2213
2214 int nkey = c->binary_header.request.keylen;
2215 int vlen = c->binary_header.request.bodylen - nkey;
2216
2217 struct sasl_tmp *stmp = c->item;
2218 char mech[nkey+1];
2219 memcpy(mech, stmp->data, nkey);
2220 mech[nkey] = 0x00;
2221
2222 if (settings.verbose) {
2223 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2224 "%d: mech: ``%s'' with %d bytes of data\n", c->sfd, mech, vlen);
2225 }
2226
2227 const char *challenge = vlen == 0 ? NULL : (stmp->data + nkey);
2228
2229 int result=-1;
2230
2231 switch (c->cmd) {
2232 case PROTOCOL_BINARY_CMD_SASL_AUTH:
2233 result = sasl_server_start(c->sasl_conn, mech,
2234 challenge, vlen,
2235 &out, &outlen);
2236 break;
2237 case PROTOCOL_BINARY_CMD_SASL_STEP:
2238 result = sasl_server_step(c->sasl_conn,
2239 challenge, vlen,
2240 &out, &outlen);
2241 break;
2242 default:
2243 assert(false); /* CMD should be one of the above */
2244 /* This code is pretty much impossible, but makes the compiler
2245 happier */
2246 if (settings.verbose) {
2247 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2248 "%d: Unhandled command %d with challenge %s\n",
2249 c->sfd, c->cmd, challenge);
2250 }
2251 break;
2252 }
2253
2254 free(c->item);
2255 c->item = NULL;
2256 c->ritem = NULL;
2257
2258 if (settings.verbose) {
2259 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2260 "%d: sasl result code: %d\n",
2261 c->sfd, result);
2262 }
2263
2264 switch(result) {
2265 case SASL_OK:
2266 write_bin_response(c, "Authenticated", 0, 0, strlen("Authenticated"));
2267 auth_data_t data;
2268 get_auth_data(c, &data);
2269 perform_callbacks(ON_AUTH, (const void*)&data, c);
2270 STATS_NOKEY(c, auth_cmds);
2271 break;
2272 case SASL_CONTINUE:
2273 add_bin_header(c, PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE, 0, 0, outlen);
2274 if(outlen > 0) {
2275 add_iov(c, out, outlen);
2276 }
2277 conn_set_state(c, conn_mwrite);
2278 c->write_and_go = conn_new_cmd;
2279 break;
2280 default:
2281 if (settings.verbose) {
2282 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2283 "%d: Unknown sasl response: %d\n",
2284 c->sfd, result);
2285 }
2286 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2287 STATS_NOKEY2(c, auth_cmds, auth_errors);
2288 }
2289 }
2290
authenticated(conn * c)2291 static bool authenticated(conn *c) {
2292 bool rv = false;
2293
2294 switch (c->cmd) {
2295 case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: /* FALLTHROUGH */
2296 case PROTOCOL_BINARY_CMD_SASL_AUTH: /* FALLTHROUGH */
2297 case PROTOCOL_BINARY_CMD_SASL_STEP: /* FALLTHROUGH */
2298 case PROTOCOL_BINARY_CMD_VERSION: /* FALLTHROUGH */
2299 rv = true;
2300 break;
2301 default:
2302 if (c->sasl_conn) {
2303 const void *uname = NULL;
2304 sasl_getprop(c->sasl_conn, SASL_USERNAME, &uname);
2305 rv = uname != NULL;
2306 }
2307 }
2308
2309 if (settings.verbose > 1) {
2310 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2311 "%d: authenticated() in cmd 0x%02x is %s\n",
2312 c->sfd, c->cmd, rv ? "true" : "false");
2313 }
2314
2315 return rv;
2316 }
2317
binary_response_handler(const void * key,uint16_t keylen,const void * ext,uint8_t extlen,const void * body,uint32_t bodylen,uint8_t datatype,uint16_t status,uint64_t cas,const void * cookie)2318 static bool binary_response_handler(const void *key, uint16_t keylen,
2319 const void *ext, uint8_t extlen,
2320 const void *body, uint32_t bodylen,
2321 uint8_t datatype, uint16_t status,
2322 uint64_t cas, const void *cookie)
2323 {
2324 conn *c = (conn*)cookie;
2325 /* Look at append_bin_stats */
2326 size_t needed = keylen + extlen + bodylen + sizeof(protocol_binary_response_header);
2327 if (!grow_dynamic_buffer(c, needed)) {
2328 if (settings.verbose > 0) {
2329 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2330 "<%d ERROR: Failed to allocate memory for response\n",
2331 c->sfd);
2332 }
2333 return false;
2334 }
2335
2336 char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
2337 protocol_binary_response_header header = {
2338 .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
2339 .response.opcode = c->binary_header.request.opcode,
2340 .response.keylen = (uint16_t)htons(keylen),
2341 .response.extlen = extlen,
2342 .response.datatype = datatype,
2343 .response.status = (uint16_t)htons(status),
2344 .response.bodylen = htonl(bodylen + keylen + extlen),
2345 .response.opaque = c->opaque,
2346 .response.cas = htonll(cas),
2347 };
2348
2349 memcpy(buf, header.bytes, sizeof(header.response));
2350 buf += sizeof(header.response);
2351
2352 if (extlen > 0) {
2353 memcpy(buf, ext, extlen);
2354 buf += extlen;
2355 }
2356
2357 if (keylen > 0) {
2358 memcpy(buf, key, keylen);
2359 buf += keylen;
2360 }
2361
2362 if (bodylen > 0) {
2363 memcpy(buf, body, bodylen);
2364 }
2365
2366 c->dynamic_buffer.offset += needed;
2367
2368 return true;
2369 }
2370
2371 /**
2372 * Tap stats (these are only used by the tap thread, so they don't need
2373 * to be in the threadlocal struct right now...
2374 */
2375 struct tap_cmd_stats {
2376 uint64_t connect;
2377 uint64_t mutation;
2378 uint64_t checkpoint_start;
2379 uint64_t checkpoint_end;
2380 uint64_t delete;
2381 uint64_t flush;
2382 uint64_t opaque;
2383 uint64_t vbucket_set;
2384 };
2385
2386 struct tap_stats {
2387 pthread_mutex_t mutex;
2388 struct tap_cmd_stats sent;
2389 struct tap_cmd_stats received;
2390 } tap_stats = { .mutex = PTHREAD_MUTEX_INITIALIZER };
2391
ship_tap_log(conn * c)2392 static void ship_tap_log(conn *c) {
2393 assert(c->thread->type == TAP);
2394 c->msgcurr = 0;
2395 c->msgused = 0;
2396 c->iovused = 0;
2397 if (add_msghdr(c) != 0) {
2398 if (settings.verbose) {
2399 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2400 "%d: Failed to create output headers. Shutting down tap connection\n", c->sfd);
2401 }
2402 conn_set_state(c, conn_closing);
2403 return ;
2404 }
2405 /* @todo add check for buffer overflow of c->wbuf) */
2406 c->wcurr = c->wbuf;
2407
2408 bool more_data = true;
2409 bool send_data = false;
2410 bool disconnect = false;
2411
2412 item *it;
2413 uint32_t bodylen;
2414 int ii = 0;
2415 c->icurr = c->ilist;
2416 do {
2417 /* @todo fixme! */
2418 if (ii++ == 10) {
2419 break;
2420 }
2421
2422 void *engine;
2423 uint16_t nengine;
2424 uint8_t ttl;
2425 uint16_t tap_flags;
2426 uint32_t seqno;
2427 uint16_t vbucket;
2428
2429 tap_event_t event = c->tap_iterator(settings.engine.v0, c, &it,
2430 &engine, &nengine, &ttl,
2431 &tap_flags, &seqno, &vbucket);
2432 union {
2433 protocol_binary_request_tap_mutation mutation;
2434 protocol_binary_request_tap_delete delete;
2435 protocol_binary_request_tap_flush flush;
2436 protocol_binary_request_tap_opaque opaque;
2437 protocol_binary_request_noop noop;
2438 } msg = {
2439 .mutation.message.header.request.magic = (uint8_t)PROTOCOL_BINARY_REQ,
2440 };
2441
2442 msg.opaque.message.header.request.opaque = htonl(seqno);
2443 msg.opaque.message.body.tap.enginespecific_length = htons(nengine);
2444 msg.opaque.message.body.tap.ttl = ttl;
2445 msg.opaque.message.body.tap.flags = htons(tap_flags);
2446 msg.opaque.message.header.request.extlen = 8;
2447 msg.opaque.message.header.request.vbucket = htons(vbucket);
2448 item_info info = { .nvalue = 1 };
2449
2450 switch (event) {
2451 case TAP_NOOP :
2452 send_data = true;
2453 msg.noop.message.header.request.opcode = PROTOCOL_BINARY_CMD_NOOP;
2454 msg.noop.message.header.request.extlen = 0;
2455 msg.noop.message.header.request.bodylen = htonl(0);
2456 memcpy(c->wcurr, msg.noop.bytes, sizeof(msg.noop.bytes));
2457 add_iov(c, c->wcurr, sizeof(msg.noop.bytes));
2458 c->wcurr += sizeof(msg.noop.bytes);
2459 c->wbytes += sizeof(msg.noop.bytes);
2460 break;
2461 case TAP_PAUSE :
2462 more_data = false;
2463 break;
2464 case TAP_CHECKPOINT_START:
2465 case TAP_CHECKPOINT_END:
2466 case TAP_MUTATION:
2467 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2468 settings.engine.v1->release(settings.engine.v0, c, it);
2469 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2470 "%d: Failed to get item info\n", c->sfd);
2471 break;
2472 }
2473 send_data = true;
2474 c->ilist[c->ileft++] = it;
2475
2476 if (event == TAP_CHECKPOINT_START) {
2477 msg.mutation.message.header.request.opcode =
2478 PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START;
2479 pthread_mutex_lock(&tap_stats.mutex);
2480 tap_stats.sent.checkpoint_start++;
2481 pthread_mutex_unlock(&tap_stats.mutex);
2482 } else if (event == TAP_CHECKPOINT_END) {
2483 msg.mutation.message.header.request.opcode =
2484 PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END;
2485 pthread_mutex_lock(&tap_stats.mutex);
2486 tap_stats.sent.checkpoint_end++;
2487 pthread_mutex_unlock(&tap_stats.mutex);
2488 } else if (event == TAP_MUTATION) {
2489 msg.mutation.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_MUTATION;
2490 pthread_mutex_lock(&tap_stats.mutex);
2491 tap_stats.sent.mutation++;
2492 pthread_mutex_unlock(&tap_stats.mutex);
2493 }
2494
2495 msg.mutation.message.header.request.cas = htonll(info.cas);
2496 msg.mutation.message.header.request.keylen = htons(info.nkey);
2497 msg.mutation.message.header.request.extlen = 16;
2498
2499 bodylen = 16 + info.nkey + nengine;
2500 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2501 bodylen += info.nbytes;
2502 }
2503 msg.mutation.message.header.request.bodylen = htonl(bodylen);
2504 msg.mutation.message.body.item.flags = htonl(info.flags);
2505 msg.mutation.message.body.item.expiration = htonl(info.exptime);
2506 msg.mutation.message.body.tap.enginespecific_length = htons(nengine);
2507 msg.mutation.message.body.tap.ttl = ttl;
2508 msg.mutation.message.body.tap.flags = htons(tap_flags);
2509 memcpy(c->wcurr, msg.mutation.bytes, sizeof(msg.mutation.bytes));
2510
2511 add_iov(c, c->wcurr, sizeof(msg.mutation.bytes));
2512 c->wcurr += sizeof(msg.mutation.bytes);
2513 c->wbytes += sizeof(msg.mutation.bytes);
2514
2515 if (nengine > 0) {
2516 memcpy(c->wcurr, engine, nengine);
2517 add_iov(c, c->wcurr, nengine);
2518 c->wcurr += nengine;
2519 c->wbytes += nengine;
2520 }
2521
2522 add_iov(c, info.key, info.nkey);
2523 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2524 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2525 }
2526
2527 break;
2528 case TAP_DELETION:
2529 /* This is a delete */
2530 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2531 settings.engine.v1->release(settings.engine.v0, c, it);
2532 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2533 "%d: Failed to get item info\n", c->sfd);
2534 break;
2535 }
2536 send_data = true;
2537 c->ilist[c->ileft++] = it;
2538 msg.delete.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_DELETE;
2539 msg.delete.message.header.request.cas = htonll(info.cas);
2540 msg.delete.message.header.request.keylen = htons(info.nkey);
2541
2542 bodylen = 8 + info.nkey + nengine;
2543 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2544 bodylen += info.nbytes;
2545 }
2546 msg.delete.message.header.request.bodylen = htonl(bodylen);
2547
2548 memcpy(c->wcurr, msg.delete.bytes, sizeof(msg.delete.bytes));
2549 add_iov(c, c->wcurr, sizeof(msg.delete.bytes));
2550 c->wcurr += sizeof(msg.delete.bytes);
2551 c->wbytes += sizeof(msg.delete.bytes);
2552
2553 if (nengine > 0) {
2554 memcpy(c->wcurr, engine, nengine);
2555 add_iov(c, c->wcurr, nengine);
2556 c->wcurr += nengine;
2557 c->wbytes += nengine;
2558 }
2559
2560 add_iov(c, info.key, info.nkey);
2561 if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2562 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2563 }
2564
2565 pthread_mutex_lock(&tap_stats.mutex);
2566 tap_stats.sent.delete++;
2567 pthread_mutex_unlock(&tap_stats.mutex);
2568 break;
2569
2570 case TAP_DISCONNECT:
2571 disconnect = true;
2572 more_data = false;
2573 break;
2574 case TAP_VBUCKET_SET:
2575 case TAP_FLUSH:
2576 case TAP_OPAQUE:
2577 send_data = true;
2578
2579 if (event == TAP_OPAQUE) {
2580 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_OPAQUE;
2581 pthread_mutex_lock(&tap_stats.mutex);
2582 tap_stats.sent.opaque++;
2583 pthread_mutex_unlock(&tap_stats.mutex);
2584
2585 } else if (event == TAP_FLUSH) {
2586 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_FLUSH;
2587 pthread_mutex_lock(&tap_stats.mutex);
2588 tap_stats.sent.flush++;
2589 pthread_mutex_unlock(&tap_stats.mutex);
2590 } else if (event == TAP_VBUCKET_SET) {
2591 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET;
2592 msg.flush.message.body.tap.flags = htons(tap_flags);
2593 pthread_mutex_lock(&tap_stats.mutex);
2594 tap_stats.sent.vbucket_set++;
2595 pthread_mutex_unlock(&tap_stats.mutex);
2596 }
2597
2598 msg.flush.message.header.request.bodylen = htonl(8 + nengine);
2599 memcpy(c->wcurr, msg.flush.bytes, sizeof(msg.flush.bytes));
2600 add_iov(c, c->wcurr, sizeof(msg.flush.bytes));
2601 c->wcurr += sizeof(msg.flush.bytes);
2602 c->wbytes += sizeof(msg.flush.bytes);
2603 if (nengine > 0) {
2604 memcpy(c->wcurr, engine, nengine);
2605 add_iov(c, c->wcurr, nengine);
2606 c->wcurr += nengine;
2607 c->wbytes += nengine;
2608 }
2609 break;
2610 default:
2611 abort();
2612 }
2613 } while (more_data);
2614
2615 c->ewouldblock = false;
2616 if (send_data) {
2617 conn_set_state(c, conn_mwrite);
2618 if (disconnect) {
2619 c->write_and_go = conn_closing;
2620 } else {
2621 c->write_and_go = conn_ship_log;
2622 }
2623 } else {
2624 if (disconnect) {
2625 conn_set_state(c, conn_closing);
2626 } else {
2627 /* No more items to ship to the slave at this time.. suspend.. */
2628 if (settings.verbose > 1) {
2629 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2630 "%d: No more items in tap log.. waiting\n",
2631 c->sfd);
2632 }
2633 c->ewouldblock = true;
2634 }
2635 }
2636 }
2637
process_bin_unknown_packet(conn * c)2638 static void process_bin_unknown_packet(conn *c) {
2639 void *packet = c->rcurr - (c->binary_header.request.bodylen +
2640 sizeof(c->binary_header));
2641
2642 ENGINE_ERROR_CODE ret = c->aiostat;
2643 c->aiostat = ENGINE_SUCCESS;
2644 c->ewouldblock = false;
2645
2646 if (ret == ENGINE_SUCCESS) {
2647 ret = settings.engine.v1->unknown_command(settings.engine.v0, c, packet,
2648 binary_response_handler);
2649 }
2650
2651 if (ret == ENGINE_SUCCESS) {
2652 if (c->dynamic_buffer.buffer != NULL) {
2653 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2654 c->dynamic_buffer.buffer = NULL;
2655 } else {
2656 conn_set_state(c, conn_new_cmd);
2657 }
2658 } else if (ret == ENGINE_ENOTSUP) {
2659 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, 0);
2660 } else if (ret == ENGINE_EWOULDBLOCK) {
2661 c->ewouldblock = true;
2662 } else {
2663 /* FATAL ERROR, shut down connection */
2664 conn_set_state(c, conn_closing);
2665 }
2666 }
2667
process_bin_tap_connect(conn * c)2668 static void process_bin_tap_connect(conn *c) {
2669 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2670 sizeof(c->binary_header)));
2671 protocol_binary_request_tap_connect *req = (void*)packet;
2672 const char *key = packet + sizeof(req->bytes);
2673 const char *data = key + c->binary_header.request.keylen;
2674 uint32_t flags = 0;
2675 size_t ndata = c->binary_header.request.bodylen -
2676 c->binary_header.request.extlen -
2677 c->binary_header.request.keylen;
2678
2679 if (c->binary_header.request.extlen == 4) {
2680 flags = ntohl(req->message.body.flags);
2681
2682 if (flags & TAP_CONNECT_FLAG_BACKFILL) {
2683 /* the userdata has to be at least 8 bytes! */
2684 if (ndata < 8) {
2685 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2686 "%d: ERROR: Invalid tap connect message\n",
2687 c->sfd);
2688 conn_set_state(c, conn_closing);
2689 return ;
2690 }
2691 }
2692 } else {
2693 data -= 4;
2694 key -= 4;
2695 }
2696
2697 if (settings.verbose && c->binary_header.request.keylen > 0) {
2698 char buffer[1024];
2699 int len = c->binary_header.request.keylen;
2700 if (len >= sizeof(buffer)) {
2701 len = sizeof(buffer) - 1;
2702 }
2703 memcpy(buffer, key, len);
2704 buffer[len] = '\0';
2705 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2706 "%d: Trying to connect with named tap connection: <%s>\n",
2707 c->sfd, buffer);
2708 }
2709
2710 TAP_ITERATOR iterator = settings.engine.v1->get_tap_iterator(
2711 settings.engine.v0, c, key, c->binary_header.request.keylen,
2712 flags, data, ndata);
2713
2714 if (iterator == NULL) {
2715 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2716 "%d: FATAL: The engine does not support tap\n",
2717 c->sfd);
2718 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2719 c->write_and_go = conn_closing;
2720 } else {
2721 c->tap_iterator = iterator;
2722 c->which = EV_WRITE;
2723 conn_set_state(c, conn_ship_log);
2724 }
2725 }
2726
process_bin_tap_packet(tap_event_t event,conn * c)2727 static void process_bin_tap_packet(tap_event_t event, conn *c) {
2728 assert(c != NULL);
2729 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2730 sizeof(c->binary_header)));
2731 protocol_binary_request_tap_no_extras *tap = (void*)packet;
2732 uint16_t nengine = ntohs(tap->message.body.tap.enginespecific_length);
2733 uint16_t tap_flags = ntohs(tap->message.body.tap.flags);
2734 uint32_t seqno = ntohl(tap->message.header.request.opaque);
2735 uint8_t ttl = tap->message.body.tap.ttl;
2736 assert(ttl > 0);
2737 char *engine_specific = packet + sizeof(tap->bytes);
2738 char *key = engine_specific + nengine;
2739 uint16_t nkey = c->binary_header.request.keylen;
2740 char *data = key + nkey;
2741 uint32_t flags = 0;
2742 uint32_t exptime = 0;
2743 uint32_t ndata = c->binary_header.request.bodylen - nengine - nkey - 8;
2744
2745 if (event == TAP_MUTATION || event == TAP_CHECKPOINT_START ||
2746 event == TAP_CHECKPOINT_END) {
2747 protocol_binary_request_tap_mutation *mutation = (void*)tap;
2748 flags = ntohl(mutation->message.body.item.flags);
2749 exptime = ntohl(mutation->message.body.item.expiration);
2750 key += 8;
2751 data += 8;
2752 ndata -= 8;
2753 }
2754
2755 ENGINE_ERROR_CODE ret = c->aiostat;
2756 if (ret == ENGINE_SUCCESS) {
2757 ret = settings.engine.v1->tap_notify(settings.engine.v0, c,
2758 engine_specific, nengine,
2759 ttl - 1, tap_flags,
2760 event, seqno,
2761 key, nkey,
2762 flags, exptime,
2763 ntohll(tap->message.header.request.cas),
2764 data, ndata,
2765 c->binary_header.request.vbucket);
2766 }
2767
2768 switch (ret) {
2769 case ENGINE_DISCONNECT:
2770 conn_set_state(c, conn_closing);
2771 break;
2772 case ENGINE_EWOULDBLOCK:
2773 c->ewouldblock = true;
2774 break;
2775 default:
2776 if ((tap_flags & TAP_FLAG_ACK) ||
2777 (ret != ENGINE_SUCCESS && c->tap_nack_mode))
2778 {
2779 write_bin_packet(c, engine_error_2_protocol_error(ret), 0);
2780 } else {
2781 conn_set_state(c, conn_new_cmd);
2782 }
2783 }
2784 }
2785
process_bin_tap_ack(conn * c)2786 static void process_bin_tap_ack(conn *c) {
2787 assert(c != NULL);
2788 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2789 sizeof(c->binary_header)));
2790 protocol_binary_response_no_extras *rsp = (void*)packet;
2791 uint32_t seqno = ntohl(rsp->message.header.response.opaque);
2792 uint16_t status = ntohs(rsp->message.header.response.status);
2793 char *key = packet + sizeof(rsp->bytes);
2794
2795 ENGINE_ERROR_CODE ret = ENGINE_DISCONNECT;
2796 if (settings.engine.v1->tap_notify != NULL) {
2797 ret = settings.engine.v1->tap_notify(settings.engine.v0, c, NULL, 0, 0, status,
2798 TAP_ACK, seqno, key,
2799 c->binary_header.request.keylen, 0, 0,
2800 0, NULL, 0, 0);
2801 }
2802
2803 if (ret == ENGINE_DISCONNECT) {
2804 conn_set_state(c, conn_closing);
2805 } else {
2806 conn_set_state(c, conn_ship_log);
2807 }
2808 }
2809
2810 /**
2811 * We received a noop response.. just ignore it
2812 */
process_bin_noop_response(conn * c)2813 static void process_bin_noop_response(conn *c) {
2814 assert(c != NULL);
2815 conn_set_state(c, conn_new_cmd);
2816 }
2817
process_bin_verbosity(conn * c)2818 static void process_bin_verbosity(conn *c) {
2819 char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2820 sizeof(c->binary_header)));
2821 protocol_binary_request_verbosity *req = (void*)packet;
2822 uint32_t level = (uint32_t)ntohl(req->message.body.level);
2823 if (level > MAX_VERBOSITY_LEVEL) {
2824 level = MAX_VERBOSITY_LEVEL;
2825 }
2826 settings.verbose = (int)level;
2827 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
2828 write_bin_response(c, NULL, 0, 0, 0);
2829 }
2830
process_bin_packet(conn * c)2831 static void process_bin_packet(conn *c) {
2832 /* @todo this should be an array of funciton pointers and call through */
2833 switch (c->binary_header.request.opcode) {
2834 case PROTOCOL_BINARY_CMD_TAP_CONNECT:
2835 pthread_mutex_lock(&tap_stats.mutex);
2836 tap_stats.received.connect++;
2837 pthread_mutex_unlock(&tap_stats.mutex);
2838 conn_set_state(c, conn_add_tap_client);
2839 break;
2840 case PROTOCOL_BINARY_CMD_TAP_MUTATION:
2841 pthread_mutex_lock(&tap_stats.mutex);
2842 tap_stats.received.mutation++;
2843 pthread_mutex_unlock(&tap_stats.mutex);
2844 process_bin_tap_packet(TAP_MUTATION, c);
2845 break;
2846 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
2847 pthread_mutex_lock(&tap_stats.mutex);
2848 tap_stats.received.checkpoint_start++;
2849 pthread_mutex_unlock(&tap_stats.mutex);
2850 process_bin_tap_packet(TAP_CHECKPOINT_START, c);
2851 break;
2852 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
2853 pthread_mutex_lock(&tap_stats.mutex);
2854 tap_stats.received.checkpoint_end++;
2855 pthread_mutex_unlock(&tap_stats.mutex);
2856 process_bin_tap_packet(TAP_CHECKPOINT_END, c);
2857 break;
2858 case PROTOCOL_BINARY_CMD_TAP_DELETE:
2859 pthread_mutex_lock(&tap_stats.mutex);
2860 tap_stats.received.delete++;
2861 pthread_mutex_unlock(&tap_stats.mutex);
2862 process_bin_tap_packet(TAP_DELETION, c);
2863 break;
2864 case PROTOCOL_BINARY_CMD_TAP_FLUSH:
2865 pthread_mutex_lock(&tap_stats.mutex);
2866 tap_stats.received.flush++;
2867 pthread_mutex_unlock(&tap_stats.mutex);
2868 process_bin_tap_packet(TAP_FLUSH, c);
2869 break;
2870 case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
2871 pthread_mutex_lock(&tap_stats.mutex);
2872 tap_stats.received.opaque++;
2873 pthread_mutex_unlock(&tap_stats.mutex);
2874 process_bin_tap_packet(TAP_OPAQUE, c);
2875 break;
2876 case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
2877 pthread_mutex_lock(&tap_stats.mutex);
2878 tap_stats.received.vbucket_set++;
2879 pthread_mutex_unlock(&tap_stats.mutex);
2880 process_bin_tap_packet(TAP_VBUCKET_SET, c);
2881 break;
2882 case PROTOCOL_BINARY_CMD_VERBOSITY:
2883 process_bin_verbosity(c);
2884 break;
2885 default:
2886 process_bin_unknown_packet(c);
2887 }
2888 }
2889
2890
2891
2892 typedef void (*RESPONSE_HANDLER)(conn*);
2893 /**
2894 * A map between the response packets op-code and the function to handle
2895 * the response message.
2896 */
2897 static RESPONSE_HANDLER response_handlers[256] = {
2898 [PROTOCOL_BINARY_CMD_NOOP] = process_bin_noop_response,
2899 [PROTOCOL_BINARY_CMD_TAP_MUTATION] = process_bin_tap_ack,
2900 [PROTOCOL_BINARY_CMD_TAP_DELETE] = process_bin_tap_ack,
2901 [PROTOCOL_BINARY_CMD_TAP_FLUSH] = process_bin_tap_ack,
2902 [PROTOCOL_BINARY_CMD_TAP_OPAQUE] = process_bin_tap_ack,
2903 [PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET] = process_bin_tap_ack,
2904 [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START] = process_bin_tap_ack,
2905 [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END] = process_bin_tap_ack
2906 };
2907
dispatch_bin_command(conn * c)2908 static void dispatch_bin_command(conn *c) {
2909 int protocol_error = 0;
2910
2911 int extlen = c->binary_header.request.extlen;
2912 uint16_t keylen = c->binary_header.request.keylen;
2913 uint32_t bodylen = c->binary_header.request.bodylen;
2914
2915 if (settings.require_sasl && !authenticated(c)) {
2916 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2917 c->write_and_go = conn_closing;
2918 return;
2919 }
2920
2921 MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
2922 c->noreply = true;
2923
2924 /* binprot supports 16bit keys, but internals are still 8bit */
2925 if (keylen > KEY_MAX_LENGTH) {
2926 handle_binary_protocol_error(c);
2927 return;
2928 }
2929
2930 switch (c->cmd) {
2931 case PROTOCOL_BINARY_CMD_SETQ:
2932 c->cmd = PROTOCOL_BINARY_CMD_SET;
2933 break;
2934 case PROTOCOL_BINARY_CMD_ADDQ:
2935 c->cmd = PROTOCOL_BINARY_CMD_ADD;
2936 break;
2937 case PROTOCOL_BINARY_CMD_REPLACEQ:
2938 c->cmd = PROTOCOL_BINARY_CMD_REPLACE;
2939 break;
2940 case PROTOCOL_BINARY_CMD_DELETEQ:
2941 c->cmd = PROTOCOL_BINARY_CMD_DELETE;
2942 break;
2943 case PROTOCOL_BINARY_CMD_INCREMENTQ:
2944 c->cmd = PROTOCOL_BINARY_CMD_INCREMENT;
2945 break;
2946 case PROTOCOL_BINARY_CMD_DECREMENTQ:
2947 c->cmd = PROTOCOL_BINARY_CMD_DECREMENT;
2948 break;
2949 case PROTOCOL_BINARY_CMD_QUITQ:
2950 c->cmd = PROTOCOL_BINARY_CMD_QUIT;
2951 break;
2952 case PROTOCOL_BINARY_CMD_FLUSHQ:
2953 c->cmd = PROTOCOL_BINARY_CMD_FLUSH;
2954 break;
2955 case PROTOCOL_BINARY_CMD_APPENDQ:
2956 c->cmd = PROTOCOL_BINARY_CMD_APPEND;
2957 break;
2958 case PROTOCOL_BINARY_CMD_PREPENDQ:
2959 c->cmd = PROTOCOL_BINARY_CMD_PREPEND;
2960 break;
2961 case PROTOCOL_BINARY_CMD_GETQ:
2962 c->cmd = PROTOCOL_BINARY_CMD_GET;
2963 break;
2964 case PROTOCOL_BINARY_CMD_GETKQ:
2965 c->cmd = PROTOCOL_BINARY_CMD_GETK;
2966 break;
2967 default:
2968 c->noreply = false;
2969 }
2970
2971 switch (c->cmd) {
2972 case PROTOCOL_BINARY_CMD_VERSION:
2973 if (extlen == 0 && keylen == 0 && bodylen == 0) {
2974 write_bin_response(c, VERSION, 0, 0, strlen(VERSION));
2975 } else {
2976 protocol_error = 1;
2977 }
2978 break;
2979 case PROTOCOL_BINARY_CMD_FLUSH:
2980 if (keylen == 0 && bodylen == extlen && (extlen == 0 || extlen == 4)) {
2981 bin_read_key(c, bin_read_flush_exptime, extlen);
2982 } else {
2983 protocol_error = 1;
2984 }
2985 break;
2986 case PROTOCOL_BINARY_CMD_NOOP:
2987 if (extlen == 0 && keylen == 0 && bodylen == 0) {
2988 write_bin_response(c, NULL, 0, 0, 0);
2989 } else {
2990 protocol_error = 1;
2991 }
2992 break;
2993 case PROTOCOL_BINARY_CMD_SET: /* FALLTHROUGH */
2994 case PROTOCOL_BINARY_CMD_ADD: /* FALLTHROUGH */
2995 case PROTOCOL_BINARY_CMD_REPLACE:
2996 if (extlen == 8 && keylen != 0 && bodylen >= (keylen + 8)) {
2997 bin_read_key(c, bin_reading_set_header, 8);
2998 } else {
2999 protocol_error = 1;
3000 }
3001 break;
3002 case PROTOCOL_BINARY_CMD_GETQ: /* FALLTHROUGH */
3003 case PROTOCOL_BINARY_CMD_GET: /* FALLTHROUGH */
3004 case PROTOCOL_BINARY_CMD_GETKQ: /* FALLTHROUGH */
3005 case PROTOCOL_BINARY_CMD_GETK:
3006 if (extlen == 0 && bodylen == keylen && keylen > 0) {
3007 bin_read_key(c, bin_reading_get_key, 0);
3008 } else {
3009 protocol_error = 1;
3010 }
3011 break;
3012 case PROTOCOL_BINARY_CMD_DELETE:
3013 if (keylen > 0 && extlen == 0 && bodylen == keylen) {
3014 bin_read_key(c, bin_reading_del_header, extlen);
3015 } else {
3016 protocol_error = 1;
3017 }
3018 break;
3019 case PROTOCOL_BINARY_CMD_INCREMENT:
3020 case PROTOCOL_BINARY_CMD_DECREMENT:
3021 if (keylen > 0 && extlen == 20 && bodylen == (keylen + extlen)) {
3022 bin_read_key(c, bin_reading_incr_header, 20);
3023 } else {
3024 protocol_error = 1;
3025 }
3026 break;
3027 case PROTOCOL_BINARY_CMD_APPEND:
3028 case PROTOCOL_BINARY_CMD_PREPEND:
3029 if (keylen > 0 && extlen == 0) {
3030 bin_read_key(c, bin_reading_set_header, 0);
3031 } else {
3032 protocol_error = 1;
3033 }
3034 break;
3035 case PROTOCOL_BINARY_CMD_STAT:
3036 if (extlen == 0) {
3037 bin_read_key(c, bin_reading_stat, 0);
3038 } else {
3039 protocol_error = 1;
3040 }
3041 break;
3042 case PROTOCOL_BINARY_CMD_QUIT:
3043 if (keylen == 0 && extlen == 0 && bodylen == 0) {
3044 write_bin_response(c, NULL, 0, 0, 0);
3045 c->write_and_go = conn_closing;
3046 if (c->noreply) {
3047 conn_set_state(c, conn_closing);
3048 }
3049 } else {
3050 protocol_error = 1;
3051 }
3052 break;
3053 case PROTOCOL_BINARY_CMD_TAP_CONNECT:
3054 if (settings.engine.v1->get_tap_iterator == NULL) {
3055 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3056 } else {
3057 bin_read_chunk(c, bin_reading_packet,
3058 c->binary_header.request.bodylen);
3059 }
3060 break;
3061 case PROTOCOL_BINARY_CMD_TAP_MUTATION:
3062 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
3063 case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
3064 case PROTOCOL_BINARY_CMD_TAP_DELETE:
3065 case PROTOCOL_BINARY_CMD_TAP_FLUSH:
3066 case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
3067 case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
3068 if (settings.engine.v1->tap_notify == NULL) {
3069 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3070 } else {
3071 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3072 }
3073 break;
3074 #ifdef SASL_ENABLED
3075 case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS:
3076 if (extlen == 0 && keylen == 0 && bodylen == 0) {
3077 bin_list_sasl_mechs(c);
3078 } else {
3079 protocol_error = 1;
3080 }
3081 break;
3082 case PROTOCOL_BINARY_CMD_SASL_AUTH:
3083 case PROTOCOL_BINARY_CMD_SASL_STEP:
3084 if (extlen == 0 && keylen != 0) {
3085 bin_read_key(c, bin_reading_sasl_auth, 0);
3086 } else {
3087 protocol_error = 1;
3088 }
3089 break;
3090 #endif
3091 case PROTOCOL_BINARY_CMD_VERBOSITY:
3092 if (extlen == 4 && keylen == 0 && bodylen == 4) {
3093 bin_read_chunk(c, bin_reading_packet,
3094 c->binary_header.request.bodylen);
3095 } else {
3096 protocol_error = 1;
3097 }
3098 break;
3099 default:
3100 if (settings.engine.v1->unknown_command == NULL) {
3101 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND,
3102 bodylen);
3103 } else {
3104 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3105 }
3106 }
3107
3108 if (protocol_error)
3109 handle_binary_protocol_error(c);
3110 }
3111
process_bin_update(conn * c)3112 static void process_bin_update(conn *c) {
3113 char *key;
3114 uint16_t nkey;
3115 uint32_t vlen;
3116 item *it = NULL;
3117 protocol_binary_request_set* req = binary_get_request(c);
3118
3119 assert(c != NULL);
3120
3121 key = binary_get_key(c);
3122 nkey = c->binary_header.request.keylen;
3123
3124 /* fix byteorder in the request */
3125 req->message.body.flags = req->message.body.flags;
3126 rel_time_t expiration = ntohl(req->message.body.expiration);
3127
3128 vlen = c->binary_header.request.bodylen - (nkey + c->binary_header.request.extlen);
3129
3130 if (settings.verbose > 1) {
3131 char buffer[1024];
3132 const char *prefix;
3133 if (c->cmd == PROTOCOL_BINARY_CMD_ADD) {
3134 prefix = "ADD";
3135 } else if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3136 prefix = "SET";
3137 } else {
3138 prefix = "REPLACE";
3139 }
3140
3141 size_t nw;
3142 nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3143 prefix, key, nkey);
3144
3145 if (nw != -1) {
3146 if (snprintf(buffer + nw, sizeof(buffer) - nw,
3147 " Value len is %d\n", vlen)) {
3148 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
3149 buffer);
3150 }
3151 }
3152 }
3153
3154 if (settings.detail_enabled) {
3155 stats_prefix_record_set(key, nkey);
3156 }
3157
3158 ENGINE_ERROR_CODE ret = c->aiostat;
3159 c->aiostat = ENGINE_SUCCESS;
3160 c->ewouldblock = false;
3161 item_info info = { .nvalue = 1 };
3162
3163 if (ret == ENGINE_SUCCESS) {
3164 ret = settings.engine.v1->allocate(settings.engine.v0, c,
3165 &it, key, nkey,
3166 vlen,
3167 req->message.body.flags,
3168 expiration);
3169 if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3170 c, it, &info)) {
3171 settings.engine.v1->release(settings.engine.v0, c, it);
3172 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3173 return;
3174 }
3175 }
3176
3177 switch (ret) {
3178 case ENGINE_SUCCESS:
3179 item_set_cas(c, it, c->binary_header.request.cas);
3180
3181 switch (c->cmd) {
3182 case PROTOCOL_BINARY_CMD_ADD:
3183 c->store_op = OPERATION_ADD;
3184 break;
3185 case PROTOCOL_BINARY_CMD_SET:
3186 c->store_op = OPERATION_SET;
3187 break;
3188 case PROTOCOL_BINARY_CMD_REPLACE:
3189 c->store_op = OPERATION_REPLACE;
3190 break;
3191 default:
3192 assert(0);
3193 }
3194
3195 if (c->binary_header.request.cas != 0) {
3196 c->store_op = OPERATION_CAS;
3197 }
3198
3199 c->item = it;
3200 c->ritem = info.value[0].iov_base;
3201 c->rlbytes = vlen;
3202 conn_set_state(c, conn_nread);
3203 c->substate = bin_read_set_value;
3204 break;
3205 case ENGINE_EWOULDBLOCK:
3206 c->ewouldblock = true;
3207 break;
3208 case ENGINE_DISCONNECT:
3209 c->state = conn_closing;
3210 break;
3211 default:
3212 if (ret == ENGINE_E2BIG) {
3213 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3214 } else {
3215 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3216 }
3217
3218 /*
3219 * Avoid stale data persisting in cache because we failed alloc.
3220 * Unacceptable for SET (but only if cas matches).
3221 * Anywhere else too?
3222 */
3223 if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3224 /* @todo fix this for the ASYNC interface! */
3225 settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3226 ntohll(req->message.header.request.cas),
3227 c->binary_header.request.vbucket);
3228 }
3229
3230 /* swallow the data line */
3231 c->write_and_go = conn_swallow;
3232 }
3233 }
3234
process_bin_append_prepend(conn * c)3235 static void process_bin_append_prepend(conn *c) {
3236 char *key;
3237 int nkey;
3238 int vlen;
3239 item *it = NULL;
3240
3241 assert(c != NULL);
3242
3243 key = binary_get_key(c);
3244 nkey = c->binary_header.request.keylen;
3245 vlen = c->binary_header.request.bodylen - nkey;
3246
3247 if (settings.verbose > 1) {
3248 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3249 "Value len is %d\n", vlen);
3250 }
3251
3252 if (settings.detail_enabled) {
3253 stats_prefix_record_set(key, nkey);
3254 }
3255
3256 ENGINE_ERROR_CODE ret = c->aiostat;
3257 c->aiostat = ENGINE_SUCCESS;
3258 c->ewouldblock = false;
3259 item_info info = { .nvalue = 1 };
3260
3261 if (ret == ENGINE_SUCCESS) {
3262 ret = settings.engine.v1->allocate(settings.engine.v0, c,
3263 &it, key, nkey,
3264 vlen, 0, 0);
3265 if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3266 c, it, &info)) {
3267 settings.engine.v1->release(settings.engine.v0, c, it);
3268 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3269 return;
3270 }
3271 }
3272
3273 switch (ret) {
3274 case ENGINE_SUCCESS:
3275 item_set_cas(c, it, c->binary_header.request.cas);
3276
3277 switch (c->cmd) {
3278 case PROTOCOL_BINARY_CMD_APPEND:
3279 c->store_op = OPERATION_APPEND;
3280 break;
3281 case PROTOCOL_BINARY_CMD_PREPEND:
3282 c->store_op = OPERATION_PREPEND;
3283 break;
3284 default:
3285 assert(0);
3286 }
3287
3288 c->item = it;
3289 c->ritem = info.value[0].iov_base;
3290 c->rlbytes = vlen;
3291 conn_set_state(c, conn_nread);
3292 c->substate = bin_read_set_value;
3293 break;
3294 case ENGINE_EWOULDBLOCK:
3295 c->ewouldblock = true;
3296 break;
3297 case ENGINE_DISCONNECT:
3298 c->state = conn_closing;
3299 break;
3300 default:
3301 if (ret == ENGINE_E2BIG) {
3302 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3303 } else {
3304 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3305 }
3306 /* swallow the data line */
3307 c->write_and_go = conn_swallow;
3308 }
3309 }
3310
process_bin_flush(conn * c)3311 static void process_bin_flush(conn *c) {
3312 time_t exptime = 0;
3313 protocol_binary_request_flush* req = binary_get_request(c);
3314
3315 if (c->binary_header.request.extlen == sizeof(req->message.body)) {
3316 exptime = ntohl(req->message.body.expiration);
3317 }
3318
3319 if (settings.verbose > 1) {
3320 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3321 "%d: flush %ld", c->sfd,
3322 (long)exptime);
3323 }
3324
3325 ENGINE_ERROR_CODE ret;
3326 ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
3327
3328 if (ret == ENGINE_SUCCESS) {
3329 write_bin_response(c, NULL, 0, 0, 0);
3330 } else if (ret == ENGINE_ENOTSUP) {
3331 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
3332 } else {
3333 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3334 }
3335 STATS_NOKEY(c, cmd_flush);
3336 }
3337
process_bin_delete(conn * c)3338 static void process_bin_delete(conn *c) {
3339 protocol_binary_request_delete* req = binary_get_request(c);
3340
3341 char* key = binary_get_key(c);
3342 size_t nkey = c->binary_header.request.keylen;
3343
3344 assert(c != NULL);
3345
3346 if (settings.verbose > 1) {
3347 char buffer[1024];
3348 if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3349 "DELETE", key, nkey) != -1) {
3350 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
3351 buffer);
3352 }
3353 }
3354
3355 ENGINE_ERROR_CODE ret = c->aiostat;
3356 c->aiostat = ENGINE_SUCCESS;
3357 c->ewouldblock = false;
3358
3359 if (ret == ENGINE_SUCCESS) {
3360 if (settings.detail_enabled) {
3361 stats_prefix_record_delete(key, nkey);
3362 }
3363 ret = settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3364 ntohll(req->message.header.request.cas),
3365 c->binary_header.request.vbucket);
3366 }
3367
3368 /* For some reason the SLAB_INCR tries to access this... */
3369 item_info info = { .nvalue = 1 };
3370 switch (ret) {
3371 case ENGINE_SUCCESS:
3372 write_bin_response(c, NULL, 0, 0, 0);
3373 SLAB_INCR(c, delete_hits, key, nkey);
3374 break;
3375 case ENGINE_KEY_EEXISTS:
3376 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
3377 break;
3378 case ENGINE_KEY_ENOENT:
3379 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
3380 STATS_INCR(c, delete_misses, key, nkey);
3381 break;
3382 case ENGINE_NOT_MY_VBUCKET:
3383 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
3384 break;
3385 case ENGINE_EWOULDBLOCK:
3386 c->ewouldblock = true;
3387 break;
3388 default:
3389 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3390 }
3391 }
3392
complete_nread_binary(conn * c)3393 static void complete_nread_binary(conn *c) {
3394 assert(c != NULL);
3395 assert(c->cmd >= 0);
3396
3397 switch(c->substate) {
3398 case bin_reading_set_header:
3399 if (c->cmd == PROTOCOL_BINARY_CMD_APPEND ||
3400 c->cmd == PROTOCOL_BINARY_CMD_PREPEND) {
3401 process_bin_append_prepend(c);
3402 } else {
3403 process_bin_update(c);
3404 }
3405 break;
3406 case bin_read_set_value:
3407 complete_update_bin(c);
3408 break;
3409 case bin_reading_get_key:
3410 process_bin_get(c);
3411 break;
3412 case bin_reading_stat:
3413 process_bin_stat(c);
3414 break;
3415 case bin_reading_del_header:
3416 process_bin_delete(c);
3417 break;
3418 case bin_reading_incr_header:
3419 complete_incr_bin(c);
3420 break;
3421 case bin_read_flush_exptime:
3422 process_bin_flush(c);
3423 break;
3424 case bin_reading_sasl_auth:
3425 process_bin_sasl_auth(c);
3426 break;
3427 case bin_reading_sasl_auth_data:
3428 process_bin_complete_sasl_auth(c);
3429 break;
3430 case bin_reading_packet:
3431 if (c->binary_header.request.magic == PROTOCOL_BINARY_RES) {
3432 RESPONSE_HANDLER handler;
3433 handler = response_handlers[c->binary_header.request.opcode];
3434 if (handler) {
3435 handler(c);
3436 } else {
3437 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3438 "%d: ERROR: Unsupported response packet received: %u\n",
3439 c->sfd, (unsigned int)c->binary_header.request.opcode);
3440 conn_set_state(c, conn_closing);
3441 }
3442 } else {
3443 process_bin_packet(c);
3444 }
3445 break;
3446 default:
3447 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
3448 "Not handling substate %d\n", c->substate);
3449 abort();
3450 }
3451 }
3452
reset_cmd_handler(conn * c)3453 static void reset_cmd_handler(conn *c) {
3454 c->sbytes = 0;
3455 c->ascii_cmd = NULL;
3456 c->cmd = -1;
3457 c->substate = bin_no_state;
3458 if(c->item != NULL) {
3459 settings.engine.v1->release(settings.engine.v0, c, c->item);
3460 c->item = NULL;
3461 }
3462 conn_shrink(c);
3463 if (c->rbytes > 0) {
3464 conn_set_state(c, conn_parse_cmd);
3465 } else {
3466 conn_set_state(c, conn_waiting);
3467 }
3468 }
3469
ascii_response_handler(const void * cookie,int nbytes,const char * dta)3470 static ENGINE_ERROR_CODE ascii_response_handler(const void *cookie,
3471 int nbytes,
3472 const char *dta)
3473 {
3474 conn *c = (conn*)cookie;
3475 if (!grow_dynamic_buffer(c, nbytes)) {
3476 if (settings.verbose > 0) {
3477 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3478 "<%d ERROR: Failed to allocate memory for response\n",
3479 c->sfd);
3480 }
3481 return ENGINE_ENOMEM;
3482 }
3483
3484 char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
3485 memcpy(buf, dta, nbytes);
3486 c->dynamic_buffer.offset += nbytes;
3487
3488 return ENGINE_SUCCESS;
3489 }
3490
complete_nread_ascii(conn * c)3491 static void complete_nread_ascii(conn *c) {
3492 if (c->ascii_cmd != NULL) {
3493 c->ewouldblock = false;
3494 switch (c->ascii_cmd->execute(c->ascii_cmd->cookie, c, 0, NULL,
3495 ascii_response_handler)) {
3496 case ENGINE_SUCCESS:
3497 if (c->dynamic_buffer.buffer != NULL) {
3498 write_and_free(c, c->dynamic_buffer.buffer,
3499 c->dynamic_buffer.offset);
3500 c->dynamic_buffer.buffer = NULL;
3501 } else {
3502 conn_set_state(c, conn_new_cmd);
3503 }
3504 break;
3505 case ENGINE_EWOULDBLOCK:
3506 c->ewouldblock = true;
3507 break;
3508 case ENGINE_DISCONNECT:
3509 default:
3510 conn_set_state(c, conn_closing);
3511 }
3512 } else {
3513 complete_update_ascii(c);
3514 }
3515 }
3516
complete_nread(conn * c)3517 static void complete_nread(conn *c) {
3518 assert(c != NULL);
3519 assert(c->protocol == ascii_prot
3520 || c->protocol == binary_prot);
3521
3522 if (c->protocol == ascii_prot) {
3523 complete_nread_ascii(c);
3524 } else if (c->protocol == binary_prot) {
3525 complete_nread_binary(c);
3526 }
3527 }
3528
3529 #define COMMAND_TOKEN 0
3530 #define SUBCOMMAND_TOKEN 1
3531 #define KEY_TOKEN 1
3532
3533 #define MAX_TOKENS 30
3534
3535 /*
3536 * Tokenize the command string by replacing whitespace with '\0' and update
3537 * the token array tokens with pointer to start of each token and length.
3538 * Returns total number of tokens. The last valid token is the terminal
3539 * token (value points to the first unprocessed character of the string and
3540 * length zero).
3541 *
3542 * Usage example:
3543 *
3544 * while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) {
3545 * for(int ix = 0; tokens[ix].length != 0; ix++) {
3546 * ...
3547 * }
3548 * ncommand = tokens[ix].value - command;
3549 * command = tokens[ix].value;
3550 * }
3551 */
tokenize_command(char * command,token_t * tokens,const size_t max_tokens)3552 static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) {
3553 char *s, *e;
3554 size_t ntokens = 0;
3555
3556 assert(command != NULL && tokens != NULL && max_tokens > 1);
3557
3558 for (s = e = command; ntokens < max_tokens - 1; ++e) {
3559 if (*e == ' ') {
3560 if (s != e) {
3561 tokens[ntokens].value = s;
3562 tokens[ntokens].length = e - s;
3563 ntokens++;
3564 *e = '\0';
3565 }
3566 s = e + 1;
3567 }
3568 else if (*e == '\0') {
3569 if (s != e) {
3570 tokens[ntokens].value = s;
3571 tokens[ntokens].length = e - s;
3572 ntokens++;
3573 }
3574
3575 break; /* string end */
3576 }
3577 }
3578
3579 /*
3580 * If we scanned the whole string, the terminal value pointer is null,
3581 * otherwise it is the first unprocessed character.
3582 */
3583 tokens[ntokens].value = *e == '\0' ? NULL : e;
3584 tokens[ntokens].length = 0;
3585 ntokens++;
3586
3587 return ntokens;
3588 }
3589
3590 #ifdef INNODB_MEMCACHED
detokenize(token_t * tokens,size_t ntokens,char ** out,int * nbytes)3591 static void detokenize(token_t *tokens, size_t ntokens, char **out, int *nbytes)
3592 #else
3593 static void detokenize(token_t *tokens, int ntokens, char **out, int *nbytes)
3594 #endif
3595 {
3596 int i, nb;
3597 char *buf, *p;
3598
3599 nb = ntokens; // account for spaces, which is ntokens-1, plus the null
3600 for (i = 0; i < ntokens; ++i) {
3601 nb += tokens[i].length;
3602 }
3603
3604 buf = malloc(nb * sizeof(char));
3605 if (buf != NULL) {
3606 p = buf;
3607 for (i = 0; i < ntokens; ++i) {
3608 memcpy(p, tokens[i].value, tokens[i].length);
3609 p += tokens[i].length;
3610 *p = ' ';
3611 p++;
3612 }
3613 buf[nb - 1] = '\0';
3614 *nbytes = nb - 1;
3615 *out = buf;
3616 }
3617 }
3618
3619
3620 /* set up a connection to write a buffer then free it, used for stats */
write_and_free(conn * c,char * buf,int bytes)3621 static void write_and_free(conn *c, char *buf, int bytes) {
3622 if (buf) {
3623 c->write_and_free = buf;
3624 c->wcurr = buf;
3625 c->wbytes = bytes;
3626 conn_set_state(c, conn_write);
3627 c->write_and_go = conn_new_cmd;
3628 } else {
3629 out_string(c, "SERVER_ERROR out of memory writing stats");
3630 }
3631 }
3632
set_noreply_maybe(conn * c,token_t * tokens,size_t ntokens)3633 static inline bool set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens)
3634 {
3635 int noreply_index = ntokens - 2;
3636
3637 /*
3638 NOTE: this function is not the first place where we are going to
3639 send the reply. We could send it instead from process_command()
3640 if the request line has wrong number of tokens. However parsing
3641 malformed line for "noreply" option is not reliable anyway, so
3642 it can't be helped.
3643 */
3644 if (tokens[noreply_index].value
3645 && strcmp(tokens[noreply_index].value, "noreply") == 0) {
3646 c->noreply = true;
3647 }
3648 return c->noreply;
3649 }
3650
append_stat(const char * name,ADD_STAT add_stats,conn * c,const char * fmt,...)3651 void append_stat(const char *name, ADD_STAT add_stats, conn *c,
3652 const char *fmt, ...) {
3653 char val_str[STAT_VAL_LEN];
3654 int vlen;
3655 va_list ap;
3656
3657 assert(name);
3658 assert(add_stats);
3659 assert(c);
3660 assert(fmt);
3661
3662 va_start(ap, fmt);
3663 vlen = vsnprintf(val_str, sizeof(val_str) - 1, fmt, ap);
3664 va_end(ap);
3665
3666 add_stats(name, strlen(name), val_str, vlen, c);
3667 }
3668
process_stats_detail(conn * c,const char * command)3669 inline static void process_stats_detail(conn *c, const char *command) {
3670 assert(c != NULL);
3671
3672 if (settings.allow_detailed) {
3673 if (strcmp(command, "on") == 0) {
3674 settings.detail_enabled = 1;
3675 out_string(c, "OK");
3676 }
3677 else if (strcmp(command, "off") == 0) {
3678 settings.detail_enabled = 0;
3679 out_string(c, "OK");
3680 }
3681 else if (strcmp(command, "dump") == 0) {
3682 int len;
3683 char *stats = stats_prefix_dump(&len);
3684 write_and_free(c, stats, len);
3685 }
3686 else {
3687 out_string(c, "CLIENT_ERROR usage: stats detail on|off|dump");
3688 }
3689 }
3690 else {
3691 out_string(c, "CLIENT_ERROR detailed stats disabled");
3692 }
3693 }
3694
aggregate_callback(void * in,void * out)3695 static void aggregate_callback(void *in, void *out) {
3696 struct thread_stats *out_thread_stats = out;
3697 struct independent_stats *in_independent_stats = in;
3698 threadlocal_stats_aggregate(in_independent_stats->thread_stats,
3699 out_thread_stats);
3700 }
3701
3702 /* return server specific stats only */
server_stats(ADD_STAT add_stats,conn * c,bool aggregate)3703 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate) {
3704 pid_t pid = getpid();
3705 rel_time_t now = current_time;
3706
3707 struct thread_stats thread_stats;
3708 threadlocal_stats_clear(&thread_stats);
3709
3710 if (aggregate && settings.engine.v1->aggregate_stats != NULL) {
3711 settings.engine.v1->aggregate_stats(settings.engine.v0,
3712 (const void *)c,
3713 aggregate_callback,
3714 &thread_stats);
3715 } else {
3716 threadlocal_stats_aggregate(get_independent_stats(c)->thread_stats,
3717 &thread_stats);
3718 }
3719
3720 struct slab_stats slab_stats;
3721 slab_stats_aggregate(&thread_stats, &slab_stats);
3722
3723 #ifndef __WIN32__
3724 struct rusage usage;
3725 getrusage(RUSAGE_SELF, &usage);
3726 #endif
3727
3728 STATS_LOCK();
3729
3730 APPEND_STAT("pid", "%lu", (long)pid);
3731 APPEND_STAT("uptime", "%u", now);
3732 APPEND_STAT("time", "%ld", now + (long)process_started);
3733 APPEND_STAT("version", "%s", VERSION);
3734 APPEND_STAT("libevent", "%s", event_get_version());
3735 APPEND_STAT("pointer_size", "%d", (int)(8 * sizeof(void *)));
3736
3737 #ifndef __WIN32__
3738 append_stat("rusage_user", add_stats, c, "%ld.%06ld",
3739 (long)usage.ru_utime.tv_sec,
3740 (long)usage.ru_utime.tv_usec);
3741 append_stat("rusage_system", add_stats, c, "%ld.%06ld",
3742 (long)usage.ru_stime.tv_sec,
3743 (long)usage.ru_stime.tv_usec);
3744 #endif
3745
3746 APPEND_STAT("daemon_connections", "%u", stats.daemon_conns);
3747 APPEND_STAT("curr_connections", "%u", stats.curr_conns);
3748 APPEND_STAT("total_connections", "%u", stats.total_conns);
3749 APPEND_STAT("connection_structures", "%u", stats.conn_structs);
3750 APPEND_STAT("cmd_get", "%"PRIu64, thread_stats.cmd_get);
3751 APPEND_STAT("cmd_set", "%"PRIu64, slab_stats.cmd_set);
3752 APPEND_STAT("cmd_flush", "%"PRIu64, thread_stats.cmd_flush);
3753 APPEND_STAT("auth_cmds", "%"PRIu64, thread_stats.auth_cmds);
3754 APPEND_STAT("auth_errors", "%"PRIu64, thread_stats.auth_errors);
3755 APPEND_STAT("get_hits", "%"PRIu64, slab_stats.get_hits);
3756 APPEND_STAT("get_misses", "%"PRIu64, thread_stats.get_misses);
3757 APPEND_STAT("delete_misses", "%"PRIu64, thread_stats.delete_misses);
3758 APPEND_STAT("delete_hits", "%"PRIu64, slab_stats.delete_hits);
3759 APPEND_STAT("incr_misses", "%"PRIu64, thread_stats.incr_misses);
3760 APPEND_STAT("incr_hits", "%"PRIu64, thread_stats.incr_hits);
3761 APPEND_STAT("decr_misses", "%"PRIu64, thread_stats.decr_misses);
3762 APPEND_STAT("decr_hits", "%"PRIu64, thread_stats.decr_hits);
3763 APPEND_STAT("cas_misses", "%"PRIu64, thread_stats.cas_misses);
3764 APPEND_STAT("cas_hits", "%"PRIu64, slab_stats.cas_hits);
3765 APPEND_STAT("cas_badval", "%"PRIu64, slab_stats.cas_badval);
3766 APPEND_STAT("bytes_read", "%"PRIu64, thread_stats.bytes_read);
3767 APPEND_STAT("bytes_written", "%"PRIu64, thread_stats.bytes_written);
3768 APPEND_STAT("limit_maxbytes", "%"PRIu64, settings.maxbytes);
3769 APPEND_STAT("accepting_conns", "%u", is_listen_disabled() ? 0 : 1);
3770 APPEND_STAT("listen_disabled_num", "%"PRIu64, get_listen_disabled_num());
3771 APPEND_STAT("rejected_conns", "%" PRIu64, (unsigned long long)stats.rejected_conns);
3772 APPEND_STAT("threads", "%d", settings.num_threads);
3773 APPEND_STAT("conn_yields", "%" PRIu64, (unsigned long long)thread_stats.conn_yields);
3774 STATS_UNLOCK();
3775
3776 /*
3777 * Add tap stats (only if non-zero)
3778 */
3779 struct tap_stats ts;
3780 pthread_mutex_lock(&tap_stats.mutex);
3781 ts = tap_stats;
3782 pthread_mutex_unlock(&tap_stats.mutex);
3783
3784 if (ts.sent.connect) {
3785 APPEND_STAT("tap_connect_sent", "%"PRIu64, ts.sent.connect);
3786 }
3787 if (ts.sent.mutation) {
3788 APPEND_STAT("tap_mutation_sent", "%"PRIu64, ts.sent.mutation);
3789 }
3790 if (ts.sent.checkpoint_start) {
3791 APPEND_STAT("tap_checkpoint_start_sent", "%"PRIu64, ts.sent.checkpoint_start);
3792 }
3793 if (ts.sent.checkpoint_end) {
3794 APPEND_STAT("tap_checkpoint_end_sent", "%"PRIu64, ts.sent.checkpoint_end);
3795 }
3796 if (ts.sent.delete) {
3797 APPEND_STAT("tap_delete_sent", "%"PRIu64, ts.sent.delete);
3798 }
3799 if (ts.sent.flush) {
3800 APPEND_STAT("tap_flush_sent", "%"PRIu64, ts.sent.flush);
3801 }
3802 if (ts.sent.opaque) {
3803 APPEND_STAT("tap_opaque_sent", "%"PRIu64, ts.sent.opaque);
3804 }
3805 if (ts.sent.vbucket_set) {
3806 APPEND_STAT("tap_vbucket_set_sent", "%"PRIu64,
3807 ts.sent.vbucket_set);
3808 }
3809 if (ts.received.connect) {
3810 APPEND_STAT("tap_connect_received", "%"PRIu64, ts.received.connect);
3811 }
3812 if (ts.received.mutation) {
3813 APPEND_STAT("tap_mutation_received", "%"PRIu64, ts.received.mutation);
3814 }
3815 if (ts.received.checkpoint_start) {
3816 APPEND_STAT("tap_checkpoint_start_received", "%"PRIu64, ts.received.checkpoint_start);
3817 }
3818 if (ts.received.checkpoint_end) {
3819 APPEND_STAT("tap_checkpoint_end_received", "%"PRIu64, ts.received.checkpoint_end);
3820 }
3821 if (ts.received.delete) {
3822 APPEND_STAT("tap_delete_received", "%"PRIu64, ts.received.delete);
3823 }
3824 if (ts.received.flush) {
3825 APPEND_STAT("tap_flush_received", "%"PRIu64, ts.received.flush);
3826 }
3827 if (ts.received.opaque) {
3828 APPEND_STAT("tap_opaque_received", "%"PRIu64, ts.received.opaque);
3829 }
3830 if (ts.received.vbucket_set) {
3831 APPEND_STAT("tap_vbucket_set_received", "%"PRIu64,
3832 ts.received.vbucket_set);
3833 }
3834 }
3835
process_stat_settings(ADD_STAT add_stats,void * c)3836 static void process_stat_settings(ADD_STAT add_stats, void *c) {
3837 assert(add_stats);
3838 APPEND_STAT("maxbytes", "%u", (unsigned int)settings.maxbytes);
3839 APPEND_STAT("maxconns", "%d", settings.maxconns);
3840 APPEND_STAT("tcpport", "%d", settings.port);
3841 APPEND_STAT("udpport", "%d", settings.udpport);
3842 APPEND_STAT("inter", "%s", settings.inter ? settings.inter : "NULL");
3843 APPEND_STAT("verbosity", "%d", settings.verbose);
3844 APPEND_STAT("oldest", "%lu", (unsigned long)settings.oldest_live);
3845 APPEND_STAT("evictions", "%s", settings.evict_to_free ? "on" : "off");
3846 APPEND_STAT("domain_socket", "%s",
3847 settings.socketpath ? settings.socketpath : "NULL");
3848 APPEND_STAT("umask", "%o", settings.access);
3849 APPEND_STAT("growth_factor", "%.2f", settings.factor);
3850 APPEND_STAT("chunk_size", "%d", settings.chunk_size);
3851 APPEND_STAT("num_threads", "%d", settings.num_threads);
3852 APPEND_STAT("num_threads_per_udp", "%d", settings.num_threads_per_udp);
3853 APPEND_STAT("stat_key_prefix", "%c", settings.prefix_delimiter);
3854 APPEND_STAT("detail_enabled", "%s",
3855 settings.detail_enabled ? "yes" : "no");
3856 APPEND_STAT("allow_detailed", "%s",
3857 settings.allow_detailed ? "yes" : "no");
3858 APPEND_STAT("reqs_per_event", "%d", settings.reqs_per_event);
3859 APPEND_STAT("reqs_per_tap_event", "%d", settings.reqs_per_tap_event);
3860 APPEND_STAT("cas_enabled", "%s", settings.use_cas ? "yes" : "no");
3861 APPEND_STAT("tcp_backlog", "%d", settings.backlog);
3862 APPEND_STAT("binding_protocol", "%s",
3863 prot_text(settings.binding_protocol));
3864 #ifdef SASL_ENABLED
3865 APPEND_STAT("auth_enabled_sasl", "%s", "yes");
3866 #else
3867 APPEND_STAT("auth_enabled_sasl", "%s", "no");
3868 #endif
3869
3870 #ifdef ENABLE_ISASL
3871 APPEND_STAT("auth_sasl_engine", "%s", "isasl");
3872 #elif defined(ENABLE_SASL)
3873 APPEND_STAT("auth_sasl_engine", "%s", "cyrus");
3874 #else
3875 APPEND_STAT("auth_sasl_engine", "%s", "none");
3876 #endif
3877 APPEND_STAT("auth_required_sasl", "%s", settings.require_sasl ? "yes" : "no");
3878 APPEND_STAT("item_size_max", "%d", settings.item_size_max);
3879 APPEND_STAT("topkeys", "%d", settings.topkeys);
3880
3881 for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
3882 ptr != NULL;
3883 ptr = ptr->next) {
3884 APPEND_STAT("extension", "%s", ptr->get_name());
3885 }
3886
3887 APPEND_STAT("logger", "%s", settings.extensions.logger->get_name());
3888
3889 for (EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
3890 ptr != NULL;
3891 ptr = ptr->next) {
3892 APPEND_STAT("ascii_extension", "%s", ptr->get_name(ptr->cookie));
3893 }
3894 }
3895
process_stat(conn * c,token_t * tokens,const size_t ntokens)3896 static char *process_stat(conn *c, token_t *tokens, const size_t ntokens) {
3897 const char *subcommand = tokens[SUBCOMMAND_TOKEN].value;
3898 c->dynamic_buffer.offset = 0;
3899
3900 if (ntokens == 2) {
3901 ENGINE_ERROR_CODE ret = c->aiostat;
3902 c->aiostat = ENGINE_SUCCESS;
3903 c->ewouldblock = false;
3904 if (ret == ENGINE_SUCCESS) {
3905 server_stats(&append_stats, c, false);
3906 ret = settings.engine.v1->get_stats(settings.engine.v0, c,
3907 NULL, 0, &append_stats);
3908 if (ret == ENGINE_EWOULDBLOCK) {
3909 c->ewouldblock = true;
3910 return c->rcurr + 5;
3911 }
3912 }
3913 } else if (strcmp(subcommand, "reset") == 0) {
3914 stats_reset(c);
3915 out_string(c, "RESET");
3916 return NULL;
3917 } else if (strcmp(subcommand, "detail") == 0) {
3918 /* NOTE: how to tackle detail with binary? */
3919 if (ntokens < 4) {
3920 process_stats_detail(c, ""); /* outputs the error message */
3921 } else {
3922 process_stats_detail(c, tokens[2].value);
3923 }
3924 /* Output already generated */
3925 return NULL;
3926 } else if (strcmp(subcommand, "settings") == 0) {
3927 process_stat_settings(&append_stats, c);
3928 } else if (strcmp(subcommand, "cachedump") == 0) {
3929 char *buf = NULL;
3930 unsigned int bytes = 0, id, limit = 0;
3931
3932 if (ntokens < 5) {
3933 out_string(c, "CLIENT_ERROR bad command line");
3934 return NULL;
3935 }
3936
3937 if (!safe_strtoul(tokens[2].value, &id) ||
3938 !safe_strtoul(tokens[3].value, &limit)) {
3939 out_string(c, "CLIENT_ERROR bad command line format");
3940 return NULL;
3941 }
3942
3943 if (id >= POWER_LARGEST) {
3944 out_string(c, "CLIENT_ERROR Illegal slab id");
3945 return NULL;
3946 }
3947
3948 #ifdef FUTURE
3949 buf = item_cachedump(id, limit, &bytes);
3950 #endif
3951 write_and_free(c, buf, bytes);
3952 return NULL;
3953 } else if (strcmp(subcommand, "aggregate") == 0) {
3954 server_stats(&append_stats, c, true);
3955 } else if (strcmp(subcommand, "topkeys") == 0) {
3956 topkeys_t *tk = get_independent_stats(c)->topkeys;
3957 if (tk != NULL) {
3958 topkeys_stats(tk, c, current_time, append_stats);
3959 } else {
3960 out_string(c, "ERROR");
3961 return NULL;
3962 }
3963 } else {
3964 /* getting here means that the subcommand is either engine specific or
3965 is invalid. query the engine and see. */
3966 ENGINE_ERROR_CODE ret = c->aiostat;
3967 c->aiostat = ENGINE_SUCCESS;
3968 c->ewouldblock = false;
3969 if (ret == ENGINE_SUCCESS) {
3970 char *buf = NULL;
3971 int nb = -1;
3972 detokenize(&tokens[1], ntokens - 2, &buf, &nb);
3973 ret = settings.engine.v1->get_stats(settings.engine.v0, c, buf,
3974 nb, append_stats);
3975 free(buf);
3976 }
3977
3978 switch (ret) {
3979 case ENGINE_SUCCESS:
3980 append_stats(NULL, 0, NULL, 0, c);
3981 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
3982 c->dynamic_buffer.buffer = NULL;
3983 break;
3984 case ENGINE_ENOMEM:
3985 out_string(c, "SERVER_ERROR out of memory writing stats");
3986 break;
3987 case ENGINE_DISCONNECT:
3988 c->state = conn_closing;
3989 break;
3990 case ENGINE_ENOTSUP:
3991 out_string(c, "SERVER_ERROR not supported");
3992 break;
3993 case ENGINE_EWOULDBLOCK:
3994 c->ewouldblock = true;
3995 return tokens[SUBCOMMAND_TOKEN].value;
3996 default:
3997 out_string(c, "ERROR");
3998 break;
3999 }
4000
4001 return NULL;
4002 }
4003
4004 /* append terminator and start the transfer */
4005 append_stats(NULL, 0, NULL, 0, c);
4006
4007 if (c->dynamic_buffer.buffer == NULL) {
4008 out_string(c, "SERVER_ERROR out of memory writing stats");
4009 } else {
4010 write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
4011 c->dynamic_buffer.buffer = NULL;
4012 }
4013
4014 return NULL;
4015 }
4016
4017 /**
4018 * Get a suffix buffer and insert it into the list of used suffix buffers
4019 * @param c the connection object
4020 * @return a pointer to a new suffix buffer or NULL if allocation failed
4021 */
get_suffix_buffer(conn * c)4022 static char *get_suffix_buffer(conn *c) {
4023 if (c->suffixleft == c->suffixsize) {
4024 char **new_suffix_list;
4025 size_t sz = sizeof(char*) * c->suffixsize * 2;
4026
4027 new_suffix_list = realloc(c->suffixlist, sz);
4028 if (new_suffix_list) {
4029 c->suffixsize *= 2;
4030 c->suffixlist = new_suffix_list;
4031 } else {
4032 if (settings.verbose > 1) {
4033 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4034 "=%d Failed to resize suffix buffer\n", c->sfd);
4035 }
4036
4037 return NULL;
4038 }
4039 }
4040
4041 char *suffix = cache_alloc(c->thread->suffix_cache);
4042 if (suffix != NULL) {
4043 *(c->suffixlist + c->suffixleft) = suffix;
4044 ++c->suffixleft;
4045 }
4046
4047 return suffix;
4048 }
4049
4050 /* ntokens is overwritten here... shrug.. */
process_get_command(conn * c,token_t * tokens,size_t ntokens,bool return_cas)4051 static inline char* process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) {
4052 char *key;
4053 size_t nkey;
4054 int i = c->ileft;
4055 item *it = NULL;
4056 token_t *key_token = &tokens[KEY_TOKEN];
4057 int range = false;
4058 assert(c != NULL);
4059
4060 do {
4061 while(key_token->length != 0) {
4062 /* whether there are more keys to fetch */
4063 bool next_get = (key_token + 1)->value;
4064
4065 key = key_token->value;
4066 nkey = key_token->length;
4067
4068 /* whether this is a range search */
4069 if (nkey >= 2 && key[0] == '@'
4070 && (key[1] == '>' || key[1] == '<')) {
4071 range = true;
4072 }
4073
4074 if(nkey > KEY_MAX_LENGTH) {
4075 out_string(c, "CLIENT_ERROR bad command line format");
4076 return NULL;
4077 }
4078
4079 ENGINE_ERROR_CODE ret = c->aiostat;
4080 c->aiostat = ENGINE_SUCCESS;
4081
4082 if (ret == ENGINE_SUCCESS) {
4083 ret = settings.engine.v1->get(settings.engine.v0, c, &it,
4084 key, nkey, next_get);
4085 }
4086
4087 switch (ret) {
4088 case ENGINE_EWOULDBLOCK:
4089 c->ewouldblock = true;
4090 c->ileft = i;
4091 return key;
4092
4093 case ENGINE_SUCCESS:
4094 break;
4095 case ENGINE_KEY_ENOENT:
4096 default:
4097 it = NULL;
4098 break;
4099 }
4100
4101 if (settings.detail_enabled) {
4102 stats_prefix_record_get(key, nkey, NULL != it);
4103 }
4104
4105 if (it) {
4106 item_info info = { .nvalue = 1 };
4107 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it,
4108 &info)) {
4109 settings.engine.v1->release(settings.engine.v0, c, it);
4110 out_string(c, "SERVER_ERROR error getting item data");
4111 break;
4112 }
4113
4114 if (i >= c->isize) {
4115 item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2);
4116 if (new_list) {
4117 c->isize *= 2;
4118 c->ilist = new_list;
4119 } else {
4120 settings.engine.v1->release(settings.engine.v0, c, it);
4121 break;
4122 }
4123 }
4124
4125 /* Rebuild the suffix */
4126 char *suffix = get_suffix_buffer(c);
4127 if (suffix == NULL) {
4128 out_string(c, "SERVER_ERROR out of memory rebuilding suffix");
4129 settings.engine.v1->release(settings.engine.v0, c, it);
4130 return NULL;
4131 }
4132 int suffix_len = snprintf(suffix, SUFFIX_SIZE,
4133 " %u %u\r\n", htonl(info.flags),
4134 info.nbytes);
4135
4136 /*
4137 * Construct the response. Each hit adds three elements to the
4138 * outgoing data list:
4139 * "VALUE "
4140 * key
4141 * " " + flags + " " + data length + "\r\n" + data (with \r\n)
4142 */
4143
4144 MEMCACHED_COMMAND_GET(c->sfd, info.key, info.nkey,
4145 info.nbytes, info.cas);
4146 if (return_cas)
4147 {
4148
4149 char *cas = get_suffix_buffer(c);
4150 if (cas == NULL) {
4151 out_string(c, "SERVER_ERROR out of memory making CAS suffix");
4152 settings.engine.v1->release(settings.engine.v0, c, it);
4153 return NULL;
4154 }
4155 int cas_len = snprintf(cas, SUFFIX_SIZE, " %"PRIu64"\r\n",
4156 info.cas);
4157 if (add_iov(c, "VALUE ", 6) != 0 ||
4158 add_iov(c, info.key, info.nkey) != 0 ||
4159 add_iov(c, suffix, suffix_len - 2) != 0 ||
4160 add_iov(c, cas, cas_len) != 0 ||
4161 add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4162 add_iov(c, "\r\n", 2) != 0)
4163 {
4164 settings.engine.v1->release(settings.engine.v0, c, it);
4165 break;
4166 }
4167 }
4168 else
4169 {
4170 if (add_iov(c, "VALUE ", 6) != 0 ||
4171 add_iov(c, info.key, info.nkey) != 0 ||
4172 add_iov(c, suffix, suffix_len) != 0 ||
4173 add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4174 add_iov(c, "\r\n", 2) != 0)
4175 {
4176 settings.engine.v1->release(settings.engine.v0, c, it);
4177 break;
4178 }
4179 }
4180
4181
4182 if (settings.verbose > 1) {
4183 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4184 ">%d sending key %s\n",
4185 c->sfd, info.key);
4186 }
4187
4188 /* item_get() has incremented it->refcount for us */
4189 STATS_HIT(c, get, key, nkey);
4190 *(c->ilist + i) = it;
4191 i++;
4192
4193 } else {
4194 STATS_MISS(c, get, key, nkey);
4195 MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
4196 }
4197
4198 if (!range) {
4199 key_token++;
4200 } else {
4201 if (ret == ENGINE_KEY_ENOENT) {
4202 key_token->value = NULL;
4203 }
4204 break;
4205 }
4206 }
4207
4208 /*
4209 * If the command string hasn't been fully processed, get the next set
4210 * of tokens.
4211 */
4212 if(key_token->value != NULL) {
4213 ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS);
4214 key_token = tokens;
4215 }
4216
4217 } while(key_token->value != NULL);
4218
4219 c->icurr = c->ilist;
4220 c->ileft = i;
4221 c->suffixcurr = c->suffixlist;
4222
4223 if (settings.verbose > 1) {
4224 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4225 ">%d END\n", c->sfd);
4226 }
4227
4228 /*
4229 If the loop was terminated because of out-of-memory, it is not
4230 reliable to add END\r\n to the buffer, because it might not end
4231 in \r\n. So we send SERVER_ERROR instead.
4232 */
4233 if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0
4234 || (IS_UDP(c->transport) && build_udp_headers(c) != 0)) {
4235 out_string(c, "SERVER_ERROR out of memory writing get response");
4236 }
4237 else {
4238 conn_set_state(c, conn_mwrite);
4239 c->msgcurr = 0;
4240 }
4241
4242 return NULL;
4243 }
4244
process_update_command(conn * c,token_t * tokens,const size_t ntokens,ENGINE_STORE_OPERATION store_op,bool handle_cas)4245 static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, ENGINE_STORE_OPERATION store_op, bool handle_cas) {
4246 char *key;
4247 size_t nkey;
4248 unsigned int flags;
4249 int32_t exptime_int = 0;
4250 time_t exptime;
4251 int vlen = 0;
4252 uint64_t req_cas_id=0;
4253 item *it = NULL;
4254
4255 assert(c != NULL);
4256
4257 set_noreply_maybe(c, tokens, ntokens);
4258
4259 if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4260 out_string(c, "CLIENT_ERROR bad command line format");
4261 return;
4262 }
4263
4264 key = tokens[KEY_TOKEN].value;
4265 nkey = tokens[KEY_TOKEN].length;
4266
4267 if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags)
4268 && safe_strtol(tokens[3].value, &exptime_int)
4269 && safe_strtol(tokens[4].value, (int32_t *)&vlen))) {
4270 out_string(c, "CLIENT_ERROR bad command line format");
4271 return;
4272 }
4273
4274 /* Negative expire values not allowed */
4275
4276 if (exptime_int < 0) {
4277 out_string(c, "CLIENT_ERROR Invalid expire time");
4278 return;
4279 }
4280
4281 /* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */
4282 exptime = exptime_int;
4283
4284 // does cas value exist?
4285 if (handle_cas) {
4286 if (!safe_strtoull(tokens[5].value, &req_cas_id)) {
4287 out_string(c, "CLIENT_ERROR bad command line format");
4288 return;
4289 }
4290 }
4291
4292 if (vlen < 0) {
4293 out_string(c, "CLIENT_ERROR bad command line format");
4294 return;
4295 }
4296
4297 if (settings.detail_enabled) {
4298 stats_prefix_record_set(key, nkey);
4299 }
4300
4301 ENGINE_ERROR_CODE ret = c->aiostat;
4302 c->aiostat = ENGINE_SUCCESS;
4303 c->ewouldblock = false;
4304
4305 if (ret == ENGINE_SUCCESS) {
4306 ret = settings.engine.v1->allocate(settings.engine.v0, c,
4307 &it, key, nkey,
4308 vlen, htonl(flags), exptime);
4309 }
4310
4311 item_info info = { .nvalue = 1 };
4312 switch (ret) {
4313 case ENGINE_SUCCESS:
4314 item_set_cas(c, it, req_cas_id);
4315 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
4316 settings.engine.v1->release(settings.engine.v0, c, it);
4317 out_string(c, "SERVER_ERROR error getting item data");
4318 break;
4319 }
4320 c->item = it;
4321 c->ritem = info.value[0].iov_base;
4322 c->rlbytes = vlen;
4323 c->store_op = store_op;
4324 conn_set_state(c, conn_nread);
4325 break;
4326 case ENGINE_EWOULDBLOCK:
4327 c->ewouldblock = true;
4328 break;
4329 case ENGINE_DISCONNECT:
4330 c->state = conn_closing;
4331 break;
4332 default:
4333 if (ret == ENGINE_E2BIG) {
4334 out_string(c, "SERVER_ERROR object too large for cache");
4335 } else {
4336 out_string(c, "SERVER_ERROR out of memory storing object");
4337 }
4338 /* swallow the data line */
4339 c->write_and_go = conn_swallow;
4340 c->sbytes = vlen + 2;
4341
4342 /* Avoid stale data persisting in cache because we failed alloc.
4343 * Unacceptable for SET. Anywhere else too? */
4344 if (store_op == OPERATION_SET) {
4345 settings.engine.v1->remove(settings.engine.v0, c, key, nkey, 0, 0);
4346 }
4347 }
4348 }
4349
process_arithmetic_command(conn * c,token_t * tokens,const size_t ntokens,const bool incr)4350 static char* process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) {
4351
4352 uint64_t delta;
4353 char *key;
4354 size_t nkey;
4355
4356 assert(c != NULL);
4357
4358 set_noreply_maybe(c, tokens, ntokens);
4359
4360 if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4361 out_string(c, "CLIENT_ERROR bad command line format");
4362 return NULL;
4363 }
4364
4365 key = tokens[KEY_TOKEN].value;
4366 nkey = tokens[KEY_TOKEN].length;
4367
4368 if (!safe_strtoull(tokens[2].value, &delta)) {
4369 out_string(c, "CLIENT_ERROR invalid numeric delta argument");
4370 return NULL;
4371 }
4372
4373 ENGINE_ERROR_CODE ret = c->aiostat;
4374 c->aiostat = ENGINE_SUCCESS;
4375 uint64_t cas;
4376 uint64_t result = 0;
4377 if (ret == ENGINE_SUCCESS) {
4378 ret = settings.engine.v1->arithmetic(settings.engine.v0, c, key, nkey,
4379 incr, false, delta, 0, 0, &cas,
4380 &result, 0);
4381 }
4382
4383 char temp[INCR_MAX_STORAGE_LEN];
4384 switch (ret) {
4385 case ENGINE_SUCCESS:
4386 if (incr) {
4387 STATS_INCR(c, incr_hits, key, nkey);
4388 } else {
4389 STATS_INCR(c, decr_hits, key, nkey);
4390 }
4391 snprintf(temp, sizeof(temp), "%"PRIu64, result);
4392 out_string(c, temp);
4393 break;
4394 case ENGINE_KEY_ENOENT:
4395 if (incr) {
4396 STATS_INCR(c, incr_misses, key, nkey);
4397 } else {
4398 STATS_INCR(c, decr_misses, key, nkey);
4399 }
4400 out_string(c, "NOT_FOUND");
4401 break;
4402 case ENGINE_ENOMEM:
4403 out_string(c, "SERVER_ERROR out of memory");
4404 break;
4405 case ENGINE_TMPFAIL:
4406 out_string(c, "SERVER_ERROR temporary failure");
4407 break;
4408 case ENGINE_EINVAL:
4409 out_string(c, "CLIENT_ERROR cannot increment or decrement non-numeric value");
4410 break;
4411 case ENGINE_NOT_STORED:
4412 out_string(c, "SERVER_ERROR failed to store item");
4413 break;
4414 case ENGINE_DISCONNECT:
4415 c->state = conn_closing;
4416 break;
4417 case ENGINE_ENOTSUP:
4418 out_string(c, "SERVER_ERROR not supported");
4419 break;
4420 case ENGINE_EWOULDBLOCK:
4421 c->ewouldblock = true;
4422 return key;
4423 default:
4424 abort();
4425 }
4426
4427 return NULL;
4428 }
4429
process_delete_command(conn * c,token_t * tokens,const size_t ntokens)4430 static char *process_delete_command(conn *c, token_t *tokens,
4431 const size_t ntokens) {
4432 char *key;
4433 size_t nkey;
4434
4435 assert(c != NULL);
4436
4437 if (ntokens > 3) {
4438 bool hold_is_zero = strcmp(tokens[KEY_TOKEN+1].value, "0") == 0;
4439 bool sets_noreply = set_noreply_maybe(c, tokens, ntokens);
4440 bool valid = (ntokens == 4 && (hold_is_zero || sets_noreply))
4441 || (ntokens == 5 && hold_is_zero && sets_noreply);
4442 if (!valid) {
4443 out_string(c, "CLIENT_ERROR bad command line format. "
4444 "Usage: delete <key> [noreply]");
4445 return NULL;
4446 }
4447 }
4448
4449 key = tokens[KEY_TOKEN].value;
4450 nkey = tokens[KEY_TOKEN].length;
4451
4452 if (nkey > KEY_MAX_LENGTH) {
4453 out_string(c, "CLIENT_ERROR bad command line format");
4454 return NULL;
4455 }
4456
4457 ENGINE_ERROR_CODE ret = c->aiostat;
4458 c->aiostat = ENGINE_SUCCESS;
4459 c->ewouldblock = false;
4460 if (ret == ENGINE_SUCCESS) {
4461 ret = settings.engine.v1->remove(settings.engine.v0, c,
4462 key, nkey, 0, 0);
4463 }
4464
4465 /* For some reason the SLAB_INCR tries to access this... */
4466 item_info info = { .nvalue = 1 };
4467 switch (ret) {
4468 case ENGINE_SUCCESS:
4469 out_string(c, "DELETED");
4470 SLAB_INCR(c, delete_hits, key, nkey);
4471 break;
4472 case ENGINE_EWOULDBLOCK:
4473 c->ewouldblock = true;
4474 return key;
4475 case ENGINE_TMPFAIL:
4476 out_string(c, "SERVER_ERROR temporary failure");
4477 break;
4478 default:
4479 out_string(c, "NOT_FOUND");
4480 STATS_INCR(c, delete_misses, key, nkey);
4481 }
4482
4483 if (ret != ENGINE_EWOULDBLOCK && settings.detail_enabled) {
4484 stats_prefix_record_delete(key, nkey);
4485 }
4486 return NULL;
4487 }
4488
process_bind_command(conn * c,token_t * tokens,const size_t ntokens)4489 static char *process_bind_command(conn *c, token_t *tokens,
4490 const size_t ntokens) {
4491 char *name;
4492 size_t name_len;
4493
4494 assert(c != NULL);
4495
4496 if (ntokens > 3) {
4497 out_string(c, "CLIENT_ERROR bad command line format. "
4498 "Usage: bind <table_id_name>");
4499 return NULL;
4500 }
4501
4502 name = tokens[KEY_TOKEN].value;
4503 name_len = tokens[KEY_TOKEN].length;
4504
4505 if (name_len > KEY_MAX_LENGTH || name_len == 0) {
4506 out_string(c, "CLIENT_ERROR bad command line format");
4507 return NULL;
4508 }
4509
4510 ENGINE_ERROR_CODE ret = c->aiostat;
4511 c->aiostat = ENGINE_SUCCESS;
4512 c->ewouldblock = false;
4513 if (ret == ENGINE_SUCCESS) {
4514 ret = settings.engine.v1->bind(settings.engine.v0, c,
4515 name, name_len);
4516 }
4517
4518 /* For some reason the SLAB_INCR tries to access this... */
4519 item_info info = { .nvalue = 1 };
4520 switch (ret) {
4521 case ENGINE_SUCCESS:
4522 out_string(c, "SUCCEED");
4523 break;
4524 case ENGINE_EWOULDBLOCK:
4525 c->ewouldblock = true;
4526 return name;
4527 case ENGINE_TMPFAIL:
4528 default:
4529 out_string(c, "NOT_FOUND");
4530 }
4531
4532 return NULL;
4533 }
4534
process_verbosity_command(conn * c,token_t * tokens,const size_t ntokens)4535 static void process_verbosity_command(conn *c, token_t *tokens, const size_t ntokens) {
4536 unsigned int level;
4537
4538 assert(c != NULL);
4539
4540 set_noreply_maybe(c, tokens, ntokens);
4541 if (c->noreply && ntokens == 3) {
4542 /* "verbosity noreply" is not according to the correct syntax */
4543 c->noreply = false;
4544 out_string(c, "ERROR");
4545 return;
4546 }
4547
4548 if (safe_strtoul(tokens[1].value, &level)) {
4549 settings.verbose = level > MAX_VERBOSITY_LEVEL ? MAX_VERBOSITY_LEVEL : level;
4550 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
4551 out_string(c, "OK");
4552 } else {
4553 out_string(c, "ERROR");
4554 }
4555 }
4556
process_command(conn * c,char * command)4557 static char* process_command(conn *c, char *command) {
4558
4559 token_t tokens[MAX_TOKENS];
4560 size_t ntokens;
4561 int comm;
4562 char *ret = NULL;
4563
4564 assert(c != NULL);
4565
4566 MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
4567
4568 if (settings.verbose > 1) {
4569 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4570 "<%d %s\n", c->sfd, command);
4571 }
4572
4573 /*
4574 * for commands set/add/replace, we build an item and read the data
4575 * directly into it, then continue in nread_complete().
4576 */
4577
4578 if (c->ewouldblock) {
4579 /*
4580 * If we are retrying after the engine has completed a pending io for
4581 * this command, skip add_msghdr() etc and clear the ewouldblock flag.
4582 */
4583 c->ewouldblock = false;
4584 } else {
4585 c->msgcurr = 0;
4586 c->msgused = 0;
4587 c->iovused = 0;
4588 if (add_msghdr(c) != 0) {
4589 out_string(c, "SERVER_ERROR out of memory preparing response");
4590 return NULL;
4591 }
4592 }
4593
4594 ntokens = tokenize_command(command, tokens, MAX_TOKENS);
4595 if (ntokens >= 3 &&
4596 ((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ||
4597 (strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) {
4598
4599 ret = process_get_command(c, tokens, ntokens, false);
4600
4601 } else if ((ntokens == 6 || ntokens == 7) &&
4602 ((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = (int)OPERATION_ADD)) ||
4603 (strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = (int)OPERATION_SET)) ||
4604 (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = (int)OPERATION_REPLACE)) ||
4605 (strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = (int)OPERATION_PREPEND)) ||
4606 (strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = (int)OPERATION_APPEND)) )) {
4607
4608 process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, false);
4609
4610 } else if ((ntokens == 7 || ntokens == 8) && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = (int)OPERATION_CAS))) {
4611
4612 process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, true);
4613
4614 } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "incr") == 0)) {
4615
4616 ret = process_arithmetic_command(c, tokens, ntokens, 1);
4617
4618 } else if (ntokens >= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "gets") == 0)) {
4619
4620 ret = process_get_command(c, tokens, ntokens, true);
4621
4622 } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "decr") == 0)) {
4623
4624 ret = process_arithmetic_command(c, tokens, ntokens, 0);
4625
4626 } else if (ntokens >= 3 && ntokens <= 5 && (strcmp(tokens[COMMAND_TOKEN].value, "delete") == 0)) {
4627
4628 ret = process_delete_command(c, tokens, ntokens);
4629
4630 } else if (ntokens == 3 && (strcmp(tokens[COMMAND_TOKEN].value, "bind") == 0)) {
4631
4632 ret = process_bind_command(c, tokens, ntokens);
4633
4634 } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) {
4635
4636 ret = process_stat(c, tokens, ntokens);
4637
4638 } else if (ntokens >= 2 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) {
4639 time_t exptime;
4640
4641 set_noreply_maybe(c, tokens, ntokens);
4642
4643 if (ntokens == (c->noreply ? 3 : 2)) {
4644 exptime = 0;
4645 } else {
4646 exptime = strtol(tokens[1].value, NULL, 10);
4647 if(errno == ERANGE) {
4648 out_string(c, "CLIENT_ERROR bad command line format");
4649 return NULL;
4650 }
4651 }
4652
4653 ENGINE_ERROR_CODE ret = c->aiostat;
4654 c->aiostat = ENGINE_SUCCESS;
4655 c->ewouldblock = false;
4656 if (ret == ENGINE_SUCCESS) {
4657 ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
4658 }
4659
4660 switch (ret) {
4661 case ENGINE_SUCCESS:
4662 out_string(c, "OK");
4663 break;
4664 case ENGINE_ENOTSUP:
4665 out_string(c, "SERVER_ERROR not supported");
4666 break;
4667 case ENGINE_EWOULDBLOCK:
4668 c->ewouldblock = true;
4669 return c->rcurr + 9;
4670 default:
4671 out_string(c, "SERVER_ERROR failed to flush cache");
4672 }
4673
4674 if (ret != ENGINE_EWOULDBLOCK) {
4675 STATS_NOKEY(c, cmd_flush);
4676 }
4677 return NULL;
4678
4679 } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) {
4680
4681 out_string(c, "VERSION " VERSION);
4682
4683 } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) {
4684
4685 conn_set_state(c, conn_closing);
4686
4687 } else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) {
4688 process_verbosity_command(c, tokens, ntokens);
4689 } else if (settings.extensions.ascii != NULL) {
4690 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *cmd;
4691 size_t nbytes = 0;
4692 char *ptr = NULL;
4693
4694 if (ntokens > 0) {
4695 if (ntokens == MAX_TOKENS) {
4696 out_string(c, "ERROR too many arguments");
4697 return NULL;
4698 }
4699
4700 if (tokens[ntokens - 1].length == 0) {
4701 --ntokens;
4702 }
4703 }
4704
4705 for (cmd = settings.extensions.ascii; cmd != NULL; cmd = cmd->next) {
4706 if (cmd->accept(cmd->cookie, c, ntokens, tokens, &nbytes, &ptr)) {
4707 break;
4708 }
4709 }
4710
4711 if (cmd == NULL) {
4712 out_string(c, "ERROR unknown command");
4713 } else if (nbytes == 0) {
4714 switch (cmd->execute(cmd->cookie, c, ntokens, tokens,
4715 ascii_response_handler)) {
4716 case ENGINE_SUCCESS:
4717 if (c->dynamic_buffer.buffer != NULL) {
4718 write_and_free(c, c->dynamic_buffer.buffer,
4719 c->dynamic_buffer.offset);
4720 c->dynamic_buffer.buffer = NULL;
4721 } else {
4722 conn_set_state(c, conn_new_cmd);
4723 }
4724 break;
4725 case ENGINE_EWOULDBLOCK:
4726 c->ewouldblock = true;
4727 ret = tokens[KEY_TOKEN].value;;
4728 break;
4729 case ENGINE_DISCONNECT:
4730 default:
4731 conn_set_state(c, conn_closing);
4732
4733 }
4734 } else {
4735 c->rlbytes = nbytes;
4736 c->ritem = ptr;
4737 c->ascii_cmd = cmd;
4738 /* NOT SUPPORTED YET! */
4739 conn_set_state(c, conn_nread);
4740 }
4741 } else {
4742 out_string(c, "ERROR");
4743 }
4744 return ret;
4745 }
4746
4747 /*
4748 * if we have a complete line in the buffer, process it.
4749 */
try_read_command(conn * c)4750 static int try_read_command(conn *c) {
4751 assert(c != NULL);
4752 assert(c->rcurr <= (c->rbuf + c->rsize));
4753 assert(c->rbytes > 0);
4754
4755 if (c->protocol == negotiating_prot || c->transport == udp_transport) {
4756 if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) {
4757 c->protocol = binary_prot;
4758 } else {
4759 c->protocol = ascii_prot;
4760 }
4761
4762 if (settings.verbose > 1) {
4763 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4764 "%d: Client using the %s protocol\n", c->sfd,
4765 prot_text(c->protocol));
4766 }
4767 }
4768
4769 if (c->protocol == binary_prot) {
4770 /* Do we have the complete packet header? */
4771 if (c->rbytes < sizeof(c->binary_header)) {
4772 /* need more data! */
4773 return 0;
4774 } else {
4775 #ifdef NEED_ALIGN
4776 if (((long)(c->rcurr)) % 8 != 0) {
4777 /* must realign input buffer */
4778 memmove(c->rbuf, c->rcurr, c->rbytes);
4779 c->rcurr = c->rbuf;
4780 if (settings.verbose > 1) {
4781 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4782 "%d: Realign input buffer\n", c->sfd);
4783 }
4784 }
4785 #endif
4786 protocol_binary_request_header* req;
4787 req = (protocol_binary_request_header*)c->rcurr;
4788
4789 if (settings.verbose > 1) {
4790 /* Dump the packet before we convert it to host order */
4791 char buffer[1024];
4792 ssize_t nw;
4793 nw = bytes_to_output_string(buffer, sizeof(buffer), c->sfd,
4794 true, "Read binary protocol data:",
4795 (const char*)req->bytes,
4796 sizeof(req->bytes));
4797 if (nw != -1) {
4798 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4799 "%s", buffer);
4800 }
4801 }
4802
4803 c->binary_header = *req;
4804 c->binary_header.request.keylen = ntohs(req->request.keylen);
4805 c->binary_header.request.bodylen = ntohl(req->request.bodylen);
4806 c->binary_header.request.vbucket = ntohs(req->request.vbucket);
4807 c->binary_header.request.cas = ntohll(req->request.cas);
4808
4809
4810 if (c->binary_header.request.magic != PROTOCOL_BINARY_REQ &&
4811 !(c->binary_header.request.magic == PROTOCOL_BINARY_RES &&
4812 response_handlers[c->binary_header.request.opcode])) {
4813 if (settings.verbose) {
4814 if (c->binary_header.request.magic != PROTOCOL_BINARY_RES) {
4815 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4816 "%d: Invalid magic: %x\n", c->sfd,
4817 c->binary_header.request.magic);
4818 } else {
4819 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4820 "%d: ERROR: Unsupported response packet received: %u\n",
4821 c->sfd, (unsigned int)c->binary_header.request.opcode);
4822
4823 }
4824 }
4825 conn_set_state(c, conn_closing);
4826 return -1;
4827 }
4828
4829 c->msgcurr = 0;
4830 c->msgused = 0;
4831 c->iovused = 0;
4832 if (add_msghdr(c) != 0) {
4833 out_string(c, "SERVER_ERROR out of memory");
4834 return 0;
4835 }
4836
4837 c->cmd = c->binary_header.request.opcode;
4838 c->keylen = c->binary_header.request.keylen;
4839 c->opaque = c->binary_header.request.opaque;
4840 /* clear the returned cas value */
4841 c->cas = 0;
4842
4843 dispatch_bin_command(c);
4844
4845 c->rbytes -= sizeof(c->binary_header);
4846 c->rcurr += sizeof(c->binary_header);
4847 }
4848 } else {
4849 char *el, *cont, *left, lb;
4850
4851 if (c->rbytes == 0) {
4852 return 0;
4853 }
4854
4855 el = memchr(c->rcurr, '\n', c->rbytes);
4856 if (!el) {
4857 if (c->rbytes > 1024) {
4858 /*
4859 * We didn't have a '\n' in the first k. This _has_ to be a
4860 * large multiget, if not we should just nuke the connection.
4861 */
4862 char *ptr = c->rcurr;
4863 while (*ptr == ' ') { /* ignore leading whitespaces */
4864 ++ptr;
4865 }
4866
4867 if (ptr - c->rcurr > 100 ||
4868 (strncmp(ptr, "get ", 4) && strncmp(ptr, "gets ", 5))) {
4869
4870 conn_set_state(c, conn_closing);
4871 return 1;
4872 }
4873 }
4874
4875 return 0;
4876 }
4877 cont = el + 1;
4878 if ((el - c->rcurr) > 1 && *(el - 1) == '\r') {
4879 el--;
4880 }
4881 lb = *el;
4882 *el = '\0';
4883
4884 assert(cont <= (c->rcurr + c->rbytes));
4885
4886 LIBEVENT_THREAD *thread = c->thread;
4887 LOCK_THREAD(thread);
4888 left = process_command(c, c->rcurr);
4889 if (c->ewouldblock) {
4890 unregister_event(c);
4891 }
4892 UNLOCK_THREAD(thread);
4893
4894 if (left != NULL) {
4895 /*
4896 * We have not processed the entire command. This happens
4897 * when the engine returns ENGINE_EWOULDBLOCK for one of the
4898 * keys in a get/gets request.
4899 */
4900 assert (left <= el);
4901
4902 int count = strlen(c->rcurr);
4903 if ((c->rcurr + count) == left) {
4904 // Retry the entire command
4905 cont = c->rcurr;
4906 } else {
4907 left -= (count + 1);
4908 cont = left;
4909 assert(cont >= c->rcurr);
4910 if (cont > c->rcurr) {
4911 memmove(cont, c->rcurr, count);
4912 }
4913 }
4914
4915 /* de-tokenize the command */
4916 while ((left = memchr(left, '\0', el - left)) != NULL) {
4917 *left = ' ';
4918 }
4919 *el = lb;
4920 }
4921
4922 c->rbytes -= (cont - c->rcurr);
4923 c->rcurr = cont;
4924
4925 assert(c->rcurr <= (c->rbuf + c->rsize));
4926 }
4927
4928 return 1;
4929 }
4930
4931 /*
4932 * read a UDP request.
4933 */
try_read_udp(conn * c)4934 static enum try_read_result try_read_udp(conn *c) {
4935 int res;
4936
4937 assert(c != NULL);
4938
4939 c->request_addr_size = sizeof(c->request_addr);
4940 res = recvfrom(c->sfd, c->rbuf, c->rsize,
4941 0, (struct sockaddr *)&c->request_addr, &c->request_addr_size);
4942 if (res > 8) {
4943 unsigned char *buf = (unsigned char *)c->rbuf;
4944 STATS_ADD(c, bytes_read, res);
4945
4946 /* Beginning of UDP packet is the request ID; save it. */
4947 c->request_id = buf[0] * 256 + buf[1];
4948
4949 /* If this is a multi-packet request, drop it. */
4950 if (buf[4] != 0 || buf[5] != 1) {
4951 out_string(c, "SERVER_ERROR multi-packet request not supported");
4952 return READ_NO_DATA_RECEIVED;
4953 }
4954
4955 /* Don't care about any of the rest of the header. */
4956 res -= 8;
4957 memmove(c->rbuf, c->rbuf + 8, res);
4958
4959 c->rbytes += res;
4960 c->rcurr = c->rbuf;
4961 return READ_DATA_RECEIVED;
4962 }
4963 return READ_NO_DATA_RECEIVED;
4964 }
4965
4966 /*
4967 * read from network as much as we can, handle buffer overflow and connection
4968 * close.
4969 * before reading, move the remaining incomplete fragment of a command
4970 * (if any) to the beginning of the buffer.
4971 *
4972 * To protect us from someone flooding a connection with bogus data causing
4973 * the connection to eat up all available memory, break out and start looking
4974 * at the data I've got after a number of reallocs...
4975 *
4976 * @return enum try_read_result
4977 */
try_read_network(conn * c)4978 static enum try_read_result try_read_network(conn *c) {
4979 enum try_read_result gotdata = READ_NO_DATA_RECEIVED;
4980 int res;
4981 int num_allocs = 0;
4982 assert(c != NULL);
4983
4984 if (c->rcurr != c->rbuf) {
4985 if (c->rbytes != 0) /* otherwise there's nothing to copy */
4986 memmove(c->rbuf, c->rcurr, c->rbytes);
4987 c->rcurr = c->rbuf;
4988 }
4989
4990 while (1) {
4991 if (c->rbytes >= c->rsize) {
4992 if (num_allocs == 4) {
4993 return gotdata;
4994 }
4995 ++num_allocs;
4996 char *new_rbuf = realloc(c->rbuf, c->rsize * 2);
4997 if (!new_rbuf) {
4998 if (settings.verbose > 0) {
4999 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5000 "Couldn't realloc input buffer\n");
5001 }
5002 c->rbytes = 0; /* ignore what we read */
5003 out_string(c, "SERVER_ERROR out of memory reading request");
5004 c->write_and_go = conn_closing;
5005 return READ_MEMORY_ERROR;
5006 }
5007 c->rcurr = c->rbuf = new_rbuf;
5008 c->rsize *= 2;
5009 }
5010
5011 int avail = c->rsize - c->rbytes;
5012 res = recv(c->sfd, c->rbuf + c->rbytes, avail, 0);
5013 if (res > 0) {
5014 STATS_ADD(c, bytes_read, res);
5015 gotdata = READ_DATA_RECEIVED;
5016 c->rbytes += res;
5017 if (res == avail) {
5018 continue;
5019 } else {
5020 break;
5021 }
5022 }
5023 if (res == 0) {
5024 return READ_ERROR;
5025 }
5026 if (res == -1) {
5027 if (errno == EAGAIN || errno == EWOULDBLOCK) {
5028 break;
5029 }
5030 return READ_ERROR;
5031 }
5032 }
5033 return gotdata;
5034 }
5035
register_event(conn * c,struct timeval * timeout)5036 bool register_event(conn *c, struct timeval *timeout) {
5037 #ifdef DEBUG
5038 assert(!c->registered_in_libevent);
5039 #endif
5040
5041 if (event_add(&c->event, timeout) == -1) {
5042 settings.extensions.logger->log(EXTENSION_LOG_WARNING,
5043 NULL,
5044 "Failed to add connection to libevent: %s",
5045 strerror(errno));
5046 return false;
5047 }
5048
5049 #ifdef DEBUG
5050 c->registered_in_libevent = true;
5051 #endif
5052
5053 return true;
5054 }
5055
unregister_event(conn * c)5056 bool unregister_event(conn *c) {
5057 #ifdef DEBUG
5058 assert(c->registered_in_libevent);
5059 #endif
5060
5061 if (event_del(&c->event) == -1) {
5062 return false;
5063 }
5064
5065 #ifdef DEBUG
5066 c->registered_in_libevent = false;
5067 #endif
5068
5069 return true;
5070 }
5071
5072
update_event(conn * c,const int new_flags)5073 bool update_event(conn *c, const int new_flags) {
5074 assert(c != NULL);
5075
5076 struct event_base *base = c->event.ev_base;
5077 if (c->ev_flags == new_flags)
5078 return true;
5079
5080 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5081 "Updated event for %d to read=%s, write=%s\n",
5082 c->sfd, (new_flags & EV_READ ? "yes" : "no"),
5083 (new_flags & EV_WRITE ? "yes" : "no"));
5084
5085 if (!unregister_event(c)) {
5086 return false;
5087 }
5088
5089 event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c);
5090 event_base_set(base, &c->event);
5091 c->ev_flags = new_flags;
5092
5093 return register_event(c, NULL);
5094 }
5095
5096 /*
5097 * Transmit the next chunk of data from our list of msgbuf structures.
5098 *
5099 * Returns:
5100 * TRANSMIT_COMPLETE All done writing.
5101 * TRANSMIT_INCOMPLETE More data remaining to write.
5102 * TRANSMIT_SOFT_ERROR Can't write any more right now.
5103 * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
5104 */
transmit(conn * c)5105 static enum transmit_result transmit(conn *c) {
5106 assert(c != NULL);
5107
5108 if (c->msgcurr < c->msgused &&
5109 c->msglist[c->msgcurr].msg_iovlen == 0) {
5110 /* Finished writing the current msg; advance to the next. */
5111 c->msgcurr++;
5112 }
5113 if (c->msgcurr < c->msgused) {
5114 ssize_t res;
5115 struct msghdr *m = &c->msglist[c->msgcurr];
5116
5117 res = sendmsg(c->sfd, m, 0);
5118 if (res > 0) {
5119 STATS_ADD(c, bytes_written, res);
5120
5121 /* We've written some of the data. Remove the completed
5122 iovec entries from the list of pending writes. */
5123 while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) {
5124 res -= m->msg_iov->iov_len;
5125 m->msg_iovlen--;
5126 m->msg_iov++;
5127 }
5128
5129 /* Might have written just part of the last iovec entry;
5130 adjust it so the next write will do the rest. */
5131 if (res > 0) {
5132 m->msg_iov->iov_base = (caddr_t)m->msg_iov->iov_base + res;
5133 m->msg_iov->iov_len -= res;
5134 }
5135 return TRANSMIT_INCOMPLETE;
5136 }
5137 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5138 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5139 if (settings.verbose > 0) {
5140 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5141 "Couldn't update event\n");
5142 }
5143 conn_set_state(c, conn_closing);
5144 return TRANSMIT_HARD_ERROR;
5145 }
5146 return TRANSMIT_SOFT_ERROR;
5147 }
5148 /* if res == 0 or res == -1 and error is not EAGAIN or EWOULDBLOCK,
5149 we have a real error, on which we close the connection */
5150 if (settings.verbose > 0) {
5151 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5152 "Failed to write, and not due to blocking: %s",
5153 strerror(errno));
5154 }
5155
5156 if (IS_UDP(c->transport))
5157 conn_set_state(c, conn_read);
5158 else
5159 conn_set_state(c, conn_closing);
5160 return TRANSMIT_HARD_ERROR;
5161 } else {
5162 return TRANSMIT_COMPLETE;
5163 }
5164 }
5165
conn_listening(conn * c)5166 bool conn_listening(conn *c)
5167 {
5168 int sfd;
5169 struct sockaddr_storage addr;
5170 socklen_t addrlen = sizeof(addr);
5171
5172 if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) {
5173 if (errno == EMFILE) {
5174 if (settings.verbose > 0) {
5175 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5176 "Too many open connections\n");
5177 }
5178 disable_listen();
5179 } else if (errno != EAGAIN && errno != EWOULDBLOCK) {
5180 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5181 "Failed to accept new client: %s\n",
5182 strerror(errno));
5183 }
5184
5185 return false;
5186 }
5187
5188 STATS_LOCK();
5189 int curr_conns = ++stats.curr_conns;
5190 STATS_UNLOCK();
5191
5192 if (curr_conns >= settings.maxconns) {
5193 STATS_LOCK();
5194 ++stats.rejected_conns;
5195 STATS_UNLOCK();
5196
5197 if (settings.verbose > 0) {
5198 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5199 "Too many open connections\n");
5200 }
5201
5202 safe_close(sfd);
5203 return false;
5204 }
5205
5206 if (evutil_make_socket_nonblocking(sfd) == -1) {
5207 safe_close(sfd);
5208 return false;
5209 }
5210
5211 dispatch_conn_new(sfd, conn_new_cmd, EV_READ | EV_PERSIST,
5212 DATA_BUFFER_SIZE, tcp_transport);
5213
5214 return false;
5215 }
5216
5217 /**
5218 * Ship tap log to the other end. This state differs with all other states
5219 * in the way that it support full duplex dialog. We're listening to both read
5220 * and write events from libevent most of the time. If a read event occurs we
5221 * switch to the conn_read state to read and execute the input message (that would
5222 * be an ack message from the other side). If a write event occurs we continue to
5223 * send tap log to the other end.
5224 * @param c the tap connection to drive
5225 * @return true if we should continue to process work for this connection, false
5226 * if we should start processing events for other connections.
5227 */
conn_ship_log(conn * c)5228 bool conn_ship_log(conn *c) {
5229 bool cont = false;
5230
5231 if (c->sfd == INVALID_SOCKET) {
5232 return false;
5233 }
5234
5235 short mask = EV_READ | EV_PERSIST | EV_WRITE;
5236
5237 if (c->which & EV_READ || c->rbytes > 0) {
5238 if (c->rbytes > 0) {
5239 if (try_read_command(c) == 0) {
5240 conn_set_state(c, conn_read);
5241 }
5242 } else {
5243 conn_set_state(c, conn_read);
5244 }
5245
5246 // we're going to process something.. let's proceed
5247 cont = true;
5248
5249 // We have a finite number of messages in the input queue
5250 // so let's process all of them instead of backing off after
5251 // reading a subset of them.
5252 // Why? Because we've got every time we're calling ship_tap_log
5253 // we try to send a chunk of items.. This means that if we end
5254 // up in a situation where we're receiving a burst of nack messages
5255 // we'll only process a subset of messages in our input queue,
5256 // and it will slowly grow..
5257 c->nevents = settings.reqs_per_tap_event;
5258 } else if (c->which & EV_WRITE) {
5259 --c->nevents;
5260 if (c->nevents >= 0) {
5261 LOCK_THREAD(c->thread);
5262 c->ewouldblock = false;
5263 ship_tap_log(c);
5264 if (c->ewouldblock) {
5265 mask = EV_READ | EV_PERSIST;
5266 } else {
5267 cont = true;
5268 }
5269 UNLOCK_THREAD(c->thread);
5270 }
5271 }
5272
5273 if (!update_event(c, mask)) {
5274 if (settings.verbose > 0) {
5275 settings.extensions.logger->log(EXTENSION_LOG_INFO,
5276 c, "Couldn't update event\n");
5277 }
5278 conn_set_state(c, conn_closing);
5279 }
5280
5281 return cont;
5282 }
5283
conn_waiting(conn * c)5284 bool conn_waiting(conn *c) {
5285 if (!update_event(c, EV_READ | EV_PERSIST)) {
5286 if (settings.verbose > 0) {
5287 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5288 "Couldn't update event\n");
5289 }
5290 conn_set_state(c, conn_closing);
5291 return true;
5292 }
5293 conn_set_state(c, conn_read);
5294 return false;
5295 }
5296
conn_read(conn * c)5297 bool conn_read(conn *c) {
5298 int res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c);
5299 switch (res) {
5300 case READ_NO_DATA_RECEIVED:
5301 conn_set_state(c, conn_waiting);
5302 break;
5303 case READ_DATA_RECEIVED:
5304 conn_set_state(c, conn_parse_cmd);
5305 break;
5306 case READ_ERROR:
5307 conn_set_state(c, conn_closing);
5308 break;
5309 case READ_MEMORY_ERROR: /* Failed to allocate more memory */
5310 /* State already set by try_read_network */
5311 break;
5312 }
5313
5314 return true;
5315 }
5316
conn_parse_cmd(conn * c)5317 bool conn_parse_cmd(conn *c) {
5318 if (try_read_command(c) == 0) {
5319 /* wee need more data! */
5320 conn_set_state(c, conn_waiting);
5321 }
5322
5323 return !c->ewouldblock;
5324 }
5325
conn_new_cmd(conn * c)5326 bool conn_new_cmd(conn *c) {
5327 /* Only process nreqs at a time to avoid starving other connections */
5328 --c->nevents;
5329 if (c->nevents >= 0) {
5330 reset_cmd_handler(c);
5331 } else {
5332 STATS_NOKEY(c, conn_yields);
5333 if (c->rbytes > 0) {
5334 /* We have already read in data into the input buffer,
5335 so libevent will most likely not signal read events
5336 on the socket (unless more data is available. As a
5337 hack we should just put in a request to write data,
5338 because that should be possible ;-)
5339 */
5340 if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5341 if (settings.verbose > 0) {
5342 settings.extensions.logger->log(EXTENSION_LOG_INFO,
5343 c, "Couldn't update event\n");
5344 }
5345 conn_set_state(c, conn_closing);
5346 return true;
5347 }
5348 }
5349 return false;
5350 }
5351
5352 return true;
5353 }
5354
5355
conn_swallow(conn * c)5356 bool conn_swallow(conn *c) {
5357 ssize_t res;
5358 /* we are reading sbytes and throwing them away */
5359 if (c->sbytes == 0) {
5360 conn_set_state(c, conn_new_cmd);
5361 return true;
5362 }
5363
5364 /* first check if we have leftovers in the conn_read buffer */
5365 if (c->rbytes > 0) {
5366 uint32_t tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
5367 c->sbytes -= tocopy;
5368 c->rcurr += tocopy;
5369 c->rbytes -= tocopy;
5370 return true;
5371 }
5372
5373 /* now try reading from the socket */
5374 res = recv(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize, 0);
5375 if (res > 0) {
5376 STATS_ADD(c, bytes_read, res);
5377 c->sbytes -= res;
5378 return true;
5379 }
5380 if (res == 0) { /* end of stream */
5381 conn_set_state(c, conn_closing);
5382 return true;
5383 }
5384 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5385 if (!update_event(c, EV_READ | EV_PERSIST)) {
5386 if (settings.verbose > 0) {
5387 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5388 "Couldn't update event\n");
5389 }
5390 conn_set_state(c, conn_closing);
5391 return true;
5392 }
5393 return false;
5394 }
5395
5396 if (errno != ENOTCONN && errno != ECONNRESET) {
5397 /* otherwise we have a real error, on which we close the connection */
5398 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5399 "Failed to read, and not due to blocking (%s)\n",
5400 strerror(errno));
5401 }
5402
5403 conn_set_state(c, conn_closing);
5404
5405 return true;
5406
5407 }
5408
conn_nread(conn * c)5409 bool conn_nread(conn *c) {
5410 ssize_t res;
5411
5412 if (c->rlbytes == 0) {
5413 LIBEVENT_THREAD *t = c->thread;
5414 LOCK_THREAD(t);
5415 bool block = c->ewouldblock = false;
5416 complete_nread(c);
5417 UNLOCK_THREAD(t);
5418 /* Breaking this into two, as complete_nread may have
5419 moved us to a different thread */
5420 t = c->thread;
5421 LOCK_THREAD(t);
5422 if (c->ewouldblock) {
5423 unregister_event(c);
5424 block = true;
5425 }
5426 UNLOCK_THREAD(t);
5427 return !block;
5428 }
5429 /* first check if we have leftovers in the conn_read buffer */
5430 if (c->rbytes > 0) {
5431 uint32_t tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
5432 if (c->ritem != c->rcurr) {
5433 memmove(c->ritem, c->rcurr, tocopy);
5434 }
5435 c->ritem += tocopy;
5436 c->rlbytes -= tocopy;
5437 c->rcurr += tocopy;
5438 c->rbytes -= tocopy;
5439 if (c->rlbytes == 0) {
5440 return true;
5441 }
5442 }
5443
5444 /* now try reading from the socket */
5445 res = recv(c->sfd, c->ritem, c->rlbytes, 0);
5446 if (res > 0) {
5447 STATS_ADD(c, bytes_read, res);
5448 if (c->rcurr == c->ritem) {
5449 c->rcurr += res;
5450 }
5451 c->ritem += res;
5452 c->rlbytes -= res;
5453 return true;
5454 }
5455 if (res == 0) { /* end of stream */
5456 conn_set_state(c, conn_closing);
5457 return true;
5458 }
5459
5460 #ifdef INNODB_MEMCACHED
5461 /* MEMCACHED_RESOLVE: on solaris platform, when connect through
5462 telnet and waiting for input from an "add" or "set" command,
5463 it could have res == -1 and errno == 0. Thus causing early termination
5464 Add "!errno" condition here to deal with this scenario for now */
5465 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK || !errno)) {
5466 #else
5467 if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5468 #endif /* INNODB_MEMCACHED */
5469 if (!update_event(c, EV_READ | EV_PERSIST)) {
5470 if (settings.verbose > 0) {
5471 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5472 "Couldn't update event\n");
5473 }
5474 conn_set_state(c, conn_closing);
5475 return true;
5476 }
5477 return false;
5478 }
5479
5480 if (errno != ENOTCONN && errno != ECONNRESET) {
5481 /* otherwise we have a real error, on which we close the connection */
5482 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5483 "Failed to read, and not due to blocking:\n"
5484 "errno: %d %s \n"
5485 "rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n",
5486 errno, strerror(errno),
5487 (long)c->rcurr, (long)c->ritem, (long)c->rbuf,
5488 (int)c->rlbytes, (int)c->rsize);
5489 }
5490 conn_set_state(c, conn_closing);
5491 return true;
5492 }
5493
5494 bool conn_write(conn *c) {
5495 /*
5496 * We want to write out a simple response. If we haven't already,
5497 * assemble it into a msgbuf list (this will be a single-entry
5498 * list for TCP or a two-entry list for UDP).
5499 */
5500 if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) {
5501 if (add_iov(c, c->wcurr, c->wbytes) != 0) {
5502 if (settings.verbose > 0) {
5503 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5504 "Couldn't build response\n");
5505 }
5506 conn_set_state(c, conn_closing);
5507 return true;
5508 }
5509 }
5510
5511 return conn_mwrite(c);
5512 }
5513
5514 bool conn_mwrite(conn *c) {
5515 if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) {
5516 if (settings.verbose > 0) {
5517 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5518 "Failed to build UDP headers\n");
5519 }
5520 conn_set_state(c, conn_closing);
5521 return true;
5522 }
5523
5524 switch (transmit(c)) {
5525 case TRANSMIT_COMPLETE:
5526 if (c->state == conn_mwrite) {
5527 while (c->ileft > 0) {
5528 item *it = *(c->icurr);
5529 settings.engine.v1->release(settings.engine.v0, c, it);
5530 c->icurr++;
5531 c->ileft--;
5532 }
5533 while (c->suffixleft > 0) {
5534 char *suffix = *(c->suffixcurr);
5535 cache_free(c->thread->suffix_cache, suffix);
5536 c->suffixcurr++;
5537 c->suffixleft--;
5538 }
5539 /* XXX: I don't know why this wasn't the general case */
5540 if(c->protocol == binary_prot) {
5541 conn_set_state(c, c->write_and_go);
5542 } else {
5543 conn_set_state(c, conn_new_cmd);
5544 }
5545 } else if (c->state == conn_write) {
5546 if (c->write_and_free) {
5547 free(c->write_and_free);
5548 c->write_and_free = 0;
5549 }
5550 conn_set_state(c, c->write_and_go);
5551 } else {
5552 if (settings.verbose > 0) {
5553 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5554 "Unexpected state %d\n", c->state);
5555 }
5556 conn_set_state(c, conn_closing);
5557 }
5558 break;
5559
5560 case TRANSMIT_INCOMPLETE:
5561 case TRANSMIT_HARD_ERROR:
5562 break; /* Continue in state machine. */
5563
5564 case TRANSMIT_SOFT_ERROR:
5565 return false;
5566 }
5567
5568 return true;
5569 }
5570
5571 bool conn_pending_close(conn *c) {
5572 assert(c->sfd == INVALID_SOCKET);
5573 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5574 "Awaiting clients to release the cookie (pending close for %p)",
5575 (void*)c);
5576 LOCK_THREAD(c->thread);
5577 c->thread->pending_io = list_remove(c->thread->pending_io, c);
5578 if (!list_contains(c->thread->pending_close, c)) {
5579 enlist_conn(c, &c->thread->pending_close);
5580 }
5581 UNLOCK_THREAD(c->thread);
5582
5583 /*
5584 * tell the tap connection that we're disconnecting it now,
5585 * but give it a grace period
5586 */
5587 perform_callbacks(ON_DISCONNECT, NULL, c);
5588
5589 /*
5590 * disconnect callback may have changed the state for the object
5591 * so we might complete the disconnect now
5592 */
5593 return c->state != conn_pending_close;
5594 }
5595
5596 bool conn_immediate_close(conn *c) {
5597 settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
5598 "Immediate close of %p",
5599 (void*)c);
5600 perform_callbacks(ON_DISCONNECT, NULL, c);
5601 conn_close(c);
5602
5603 return false;
5604 }
5605
5606 bool conn_closing(conn *c) {
5607 if (IS_UDP(c->transport)) {
5608 conn_cleanup(c);
5609 return false;
5610 }
5611
5612 // We don't want any network notifications anymore..
5613 unregister_event(c);
5614 safe_close(c->sfd);
5615 c->sfd = INVALID_SOCKET;
5616
5617 if (c->refcount > 1) {
5618 conn_set_state(c, conn_pending_close);
5619 } else {
5620 conn_set_state(c, conn_immediate_close);
5621 }
5622 return true;
5623 }
5624
5625 bool conn_add_tap_client(conn *c) {
5626 LIBEVENT_THREAD *tp = tap_thread;
5627 LIBEVENT_THREAD *orig_thread = c->thread;
5628
5629 assert(orig_thread);
5630 assert(orig_thread != tp);
5631
5632 c->ewouldblock = true;
5633
5634 unregister_event(c);
5635
5636 LOCK_THREAD(orig_thread);
5637 /* Clean out the lists */
5638 orig_thread->pending_io = list_remove(orig_thread->pending_io, c);
5639 orig_thread->pending_close = list_remove(orig_thread->pending_close, c);
5640
5641 LOCK_THREAD(tp);
5642 c->ev_flags = 0;
5643 conn_set_state(c, conn_setup_tap_stream);
5644 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5645 "Moving %d conn from %p to %p\n",
5646 c->sfd, c->thread, tp);
5647 c->thread = tp;
5648 c->event.ev_base = tp->base;
5649 assert(c->next == NULL);
5650 assert(c->list_state == 0);
5651 enlist_conn(c, &tp->pending_io);
5652
5653 UNLOCK_THREAD(tp);
5654
5655 UNLOCK_THREAD(orig_thread);
5656
5657 notify_thread(tp);
5658
5659 return false;
5660 }
5661
5662 bool conn_setup_tap_stream(conn *c) {
5663 process_bin_tap_connect(c);
5664 return true;
5665 }
5666
5667 void event_handler(const int fd, const short which, void *arg) {
5668 conn *c;
5669
5670 c = (conn *)arg;
5671 assert(c != NULL);
5672
5673 if (memcached_shutdown) {
5674 event_base_loopbreak(c->event.ev_base);
5675 return ;
5676 }
5677
5678 c->which = which;
5679
5680 /* sanity */
5681 if (fd != c->sfd) {
5682 if (settings.verbose > 0) {
5683 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5684 "Catastrophic: event fd doesn't match conn fd!\n");
5685 }
5686 conn_close(c);
5687 return;
5688 }
5689
5690 perform_callbacks(ON_SWITCH_CONN, c, c);
5691
5692 c->nevents = settings.reqs_per_event;
5693 if (c->state == conn_ship_log) {
5694 c->nevents = settings.reqs_per_tap_event;
5695 }
5696
5697 LIBEVENT_THREAD *thr = c->thread;
5698
5699 // Do we have pending closes?
5700 const size_t max_items = 256;
5701 conn *pending_close[max_items];
5702 size_t n_pending_close = 0;
5703 if (thr != NULL) {
5704 LOCK_THREAD(thr);
5705 if (thr->pending_close && thr->last_checked != current_time) {
5706 assert(!has_cycle(thr->pending_close));
5707 thr->last_checked = current_time;
5708
5709 n_pending_close = list_to_array(pending_close, max_items,
5710 &thr->pending_close);
5711 }
5712 UNLOCK_THREAD(thr);
5713 }
5714
5715 if (settings.verbose) {
5716 do {
5717 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5718 "%d - Running task: (%s)\n",
5719 c->sfd, state_text(c->state));
5720 } while (c->state(c));
5721 } else {
5722 while (c->state(c)) {
5723 /* empty */
5724 }
5725 }
5726
5727 /* Close any connections pending close */
5728 if (n_pending_close > 0) {
5729 for (size_t i = 0; i < n_pending_close; ++i) {
5730 conn *ce = pending_close[i];
5731 if (ce->refcount == 1) {
5732 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5733 "OK, time to nuke: %p\n",
5734 (void*)ce);
5735 conn_close(ce);
5736 } else {
5737 LOCK_THREAD(ce->thread);
5738 enlist_conn(ce, &ce->thread->pending_close);
5739 UNLOCK_THREAD(ce->thread);
5740 }
5741 }
5742 }
5743
5744 if (thr != NULL) {
5745 LOCK_THREAD(thr);
5746 finalize_list(pending_close, n_pending_close);
5747 UNLOCK_THREAD(thr);
5748 }
5749 }
5750
5751 static void dispatch_event_handler(int fd, short which, void *arg) {
5752 char buffer[80];
5753 ssize_t nr = recv(fd, buffer, sizeof(buffer), 0);
5754
5755 if (nr != -1 && is_listen_disabled()) {
5756 bool enable = false;
5757 pthread_mutex_lock(&listen_state.mutex);
5758 listen_state.count -= nr;
5759 if (listen_state.count <= 0) {
5760 enable = true;
5761 listen_state.disabled = false;
5762 }
5763 pthread_mutex_unlock(&listen_state.mutex);
5764 if (enable) {
5765 conn *next;
5766 for (next = listen_conn; next; next = next->next) {
5767 update_event(next, EV_READ | EV_PERSIST);
5768 if (listen(next->sfd, settings.backlog) != 0) {
5769 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5770 "listen() failed",
5771 strerror(errno));
5772 }
5773 }
5774 }
5775 }
5776 }
5777
5778
5779
5780 static SOCKET new_socket(struct addrinfo *ai) {
5781 SOCKET sfd;
5782
5783 sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
5784 if (sfd == INVALID_SOCKET) {
5785 return INVALID_SOCKET;
5786 }
5787
5788 if (evutil_make_socket_nonblocking(sfd) == -1) {
5789 safe_close(sfd);
5790 return INVALID_SOCKET;
5791 }
5792
5793 return sfd;
5794 }
5795
5796
5797 /*
5798 * Sets a socket's send buffer size to the maximum allowed by the system.
5799 */
5800 static void maximize_sndbuf(const int sfd) {
5801 socklen_t intsize = sizeof(int);
5802 int last_good = 0;
5803 int min, max, avg;
5804 int old_size;
5805
5806 /* Start with the default size. */
5807 if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&old_size, &intsize) != 0) {
5808 if (settings.verbose > 0) {
5809 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5810 "getsockopt(SO_SNDBUF): %s",
5811 strerror(errno));
5812 }
5813
5814 return;
5815 }
5816
5817 /* Binary-search for the real maximum. */
5818 min = old_size;
5819 max = MAX_SENDBUF_SIZE;
5820
5821 while (min <= max) {
5822 avg = ((unsigned int)(min + max)) / 2;
5823 if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) {
5824 last_good = avg;
5825 min = avg + 1;
5826 } else {
5827 max = avg - 1;
5828 }
5829 }
5830
5831 if (settings.verbose > 1) {
5832 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5833 "<%d send buffer was %d, now %d\n", sfd, old_size, last_good);
5834 }
5835 }
5836
5837
5838
5839 /**
5840 * Create a socket and bind it to a specific port number
5841 * @param interface the interface to bind to
5842 * @param port the port number to bind to
5843 * @param transport the transport protocol (TCP / UDP)
5844 * @param portnumber_file A filepointer to write the port numbers to
5845 * when they are successfully added to the list of ports we
5846 * listen on.
5847 */
5848 static int server_socket(const char *interface,
5849 int port,
5850 enum network_transport transport,
5851 FILE *portnumber_file) {
5852 int sfd;
5853 struct linger ling = {0, 0};
5854 struct addrinfo *ai;
5855 struct addrinfo *next;
5856 struct addrinfo hints = { .ai_flags = AI_PASSIVE,
5857 .ai_family = AF_UNSPEC };
5858 char port_buf[NI_MAXSERV];
5859 int error;
5860 int success = 0;
5861 int flags =1;
5862 num_udp_socket = 0;
5863
5864 hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM;
5865
5866 if (port == -1) {
5867 port = 0;
5868 }
5869 snprintf(port_buf, sizeof(port_buf), "%d", port);
5870 error= getaddrinfo(interface, port_buf, &hints, &ai);
5871 if (error != 0) {
5872 if (error != EAI_SYSTEM) {
5873 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5874 "getaddrinfo(): %s\n", gai_strerror(error));
5875 } else {
5876 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5877 "getaddrinfo(): %s\n", strerror(error));
5878 }
5879 return 1;
5880 }
5881
5882 for (next= ai; next; next= next->ai_next) {
5883 conn *listen_conn_add;
5884 if ((sfd = new_socket(next)) == INVALID_SOCKET) {
5885 /* getaddrinfo can return "junk" addresses,
5886 * we make sure at least one works before erroring.
5887 */
5888 continue;
5889 }
5890
5891 #ifdef IPV6_V6ONLY
5892 if (next->ai_family == AF_INET6) {
5893 error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags));
5894 if (error != 0) {
5895 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5896 "setsockopt(IPV6_V6ONLY): %s",
5897 strerror(errno));
5898 safe_close(sfd);
5899 continue;
5900 }
5901 }
5902 #endif
5903
5904 setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
5905 if (IS_UDP(transport)) {
5906 maximize_sndbuf(sfd);
5907 udp_socket[num_udp_socket] = sfd;
5908 num_udp_socket++;
5909 } else {
5910 error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
5911 if (error != 0) {
5912 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5913 "setsockopt(SO_KEEPALIVE): %s",
5914 strerror(errno));
5915 }
5916
5917 error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
5918 if (error != 0) {
5919 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5920 "setsockopt(SO_LINGER): %s",
5921 strerror(errno));
5922 }
5923
5924 error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));
5925 if (error != 0) {
5926 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5927 "setsockopt(TCP_NODELAY): %s",
5928 strerror(errno));
5929 }
5930 }
5931
5932 if (bind(sfd, next->ai_addr, next->ai_addrlen) == SOCKET_ERROR) {
5933 if (errno != EADDRINUSE) {
5934 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5935 "bind(): %s",
5936 strerror(errno));
5937 safe_close(sfd);
5938 freeaddrinfo(ai);
5939 return 1;
5940 }
5941 safe_close(sfd);
5942 continue;
5943 } else {
5944 success++;
5945 if (!IS_UDP(transport) && listen(sfd, settings.backlog) == SOCKET_ERROR) {
5946 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5947 "listen(): %s",
5948 strerror(errno));
5949 safe_close(sfd);
5950 freeaddrinfo(ai);
5951 return 1;
5952 }
5953 if (portnumber_file != NULL &&
5954 (next->ai_addr->sa_family == AF_INET ||
5955 next->ai_addr->sa_family == AF_INET6)) {
5956 union {
5957 struct sockaddr_in in;
5958 struct sockaddr_in6 in6;
5959 } my_sockaddr;
5960 socklen_t len = sizeof(my_sockaddr);
5961 if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==0) {
5962 if (next->ai_addr->sa_family == AF_INET) {
5963 fprintf(portnumber_file, "%s INET: %u\n",
5964 IS_UDP(transport) ? "UDP" : "TCP",
5965 ntohs(my_sockaddr.in.sin_port));
5966 } else {
5967 fprintf(portnumber_file, "%s INET6: %u\n",
5968 IS_UDP(transport) ? "UDP" : "TCP",
5969 ntohs(my_sockaddr.in6.sin6_port));
5970 }
5971 }
5972 }
5973 }
5974
5975 if (IS_UDP(transport)) {
5976 int c;
5977
5978 for (c = 0; c < settings.num_threads_per_udp; c++) {
5979 /* this is guaranteed to hit all threads because we round-robin */
5980 dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,
5981 UDP_READ_BUFFER_SIZE, transport);
5982 STATS_LOCK();
5983 ++stats.curr_conns;
5984 ++stats.daemon_conns;
5985 STATS_UNLOCK();
5986 }
5987 } else {
5988 if (!(listen_conn_add = conn_new(sfd, conn_listening,
5989 EV_READ | EV_PERSIST, 1,
5990 transport, main_base, NULL))) {
5991 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5992 "failed to create listening connection\n");
5993 exit(EXIT_FAILURE);
5994 }
5995 listen_conn_add->next = listen_conn;
5996 listen_conn = listen_conn_add;
5997 STATS_LOCK();
5998 ++stats.curr_conns;
5999 ++stats.daemon_conns;
6000 STATS_UNLOCK();
6001 }
6002 }
6003
6004 freeaddrinfo(ai);
6005
6006 /* Return zero iff we detected no errors in starting up connections */
6007 return success == 0;
6008 }
6009
6010 static int server_sockets(int port, enum network_transport transport,
6011 FILE *portnumber_file) {
6012 if (settings.inter == NULL) {
6013 return server_socket(settings.inter, port, transport, portnumber_file);
6014 } else {
6015 // tokenize them and bind to each one of them..
6016 char *b;
6017 int ret = 0;
6018 char *list = strdup(settings.inter);
6019
6020 if (list == NULL) {
6021 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6022 "Failed to allocate memory for parsing server interface string\n");
6023 return 1;
6024 }
6025 for (char *p = strtok_r(list, ";,", &b);
6026 p != NULL;
6027 p = strtok_r(NULL, ";,", &b)) {
6028 int the_port = port;
6029
6030 char *s = strchr(p, ':');
6031 if (s != NULL) {
6032 *s = '\0';
6033 ++s;
6034 if (!safe_strtol(s, &the_port)) {
6035 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6036 "Invalid port number: \"%s\"", s);
6037 return 1;
6038 }
6039 }
6040 if (strcmp(p, "*") == 0) {
6041 p = NULL;
6042 }
6043 ret |= server_socket(p, the_port, transport, portnumber_file);
6044 }
6045 free(list);
6046 return ret;
6047 }
6048 }
6049
6050 static int new_socket_unix(void) {
6051 int sfd;
6052
6053 if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == INVALID_SOCKET) {
6054 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6055 "socket(AF_UNIX, SOCK_STREAM, 0): %s",
6056 strerror(errno));
6057 return INVALID_SOCKET;
6058 }
6059
6060 if (evutil_make_socket_nonblocking(sfd) == -1) {
6061 safe_close(sfd);
6062 return INVALID_SOCKET;
6063 }
6064 return sfd;
6065 }
6066
6067 /* this will probably not work on windows */
6068 static int server_socket_unix(const char *path, int access_mask) {
6069 int sfd;
6070 struct linger ling = {0, 0};
6071 struct sockaddr_un addr;
6072 struct stat tstat;
6073 int flags =1;
6074 int old_umask;
6075
6076 if (!path) {
6077 return 1;
6078 }
6079
6080 if ((sfd = new_socket_unix()) == -1) {
6081 return 1;
6082 }
6083
6084 /*
6085 * Clean up a previous socket file if we left it around
6086 */
6087 if (lstat(path, &tstat) == 0) {
6088 if (S_ISSOCK(tstat.st_mode))
6089 unlink(path);
6090 }
6091
6092 setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
6093 setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
6094 setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
6095
6096 /*
6097 * the memset call clears nonstandard fields in some impementations
6098 * that otherwise mess things up.
6099 */
6100 memset(&addr, 0, sizeof(addr));
6101
6102 addr.sun_family = AF_UNIX;
6103 strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
6104 assert(strcmp(addr.sun_path, path) == 0);
6105 old_umask = umask( ~(access_mask&0777));
6106 if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
6107 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6108 "bind(): %s",
6109 strerror(errno));
6110 safe_close(sfd);
6111 umask(old_umask);
6112 return 1;
6113 }
6114 umask(old_umask);
6115 if (listen(sfd, settings.backlog) == -1) {
6116 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6117 "listen(): %s",
6118 strerror(errno));
6119 safe_close(sfd);
6120 return 1;
6121 }
6122 if (!(listen_conn = conn_new(sfd, conn_listening,
6123 EV_READ | EV_PERSIST, 1,
6124 local_transport, main_base, NULL))) {
6125 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6126 "failed to create listening connection\n");
6127 exit(EXIT_FAILURE);
6128 }
6129 STATS_LOCK();
6130 ++stats.daemon_conns;
6131 STATS_UNLOCK();
6132
6133 return 0;
6134 }
6135
6136 static struct event clockevent;
6137
6138 /* time-sensitive callers can call it by hand with this, outside the normal ever-1-second timer */
6139 static void set_current_time(void) {
6140 struct timeval timer;
6141
6142 gettimeofday(&timer, NULL);
6143 current_time = (rel_time_t) (timer.tv_sec - process_started);
6144 }
6145
6146 static void clock_handler(const int fd, const short which, void *arg) {
6147 struct timeval t = {.tv_sec = 1, .tv_usec = 0};
6148 static bool initialized = false;
6149
6150 if (memcached_shutdown) {
6151 event_base_loopbreak(main_base);
6152 return ;
6153 }
6154
6155 if (initialized) {
6156 /* only delete the event if it's actually there. */
6157 evtimer_del(&clockevent);
6158 } else {
6159 initialized = true;
6160 }
6161
6162 evtimer_set(&clockevent, clock_handler, 0);
6163 event_base_set(main_base, &clockevent);
6164 evtimer_add(&clockevent, &t);
6165
6166 set_current_time();
6167 }
6168
6169 static void usage(void) {
6170 printf(PACKAGE " " VERSION "\n");
6171 printf("-p <num> TCP port number to listen on (default: 11211)\n"
6172 "-U <num> UDP port number to listen on (default: 11211, 0 is off)\n"
6173 "-s <file> UNIX socket path to listen on (disables network support)\n"
6174 "-a <mask> access mask for UNIX socket, in octal (default: 0700)\n"
6175 "-l <addr> interface to listen on (default: INADDR_ANY, all addresses)\n"
6176 " <addr> may be specified as host:port. If you don't specify\n"
6177 " a port number, the value you specified with -p or -U is\n"
6178 " used. You may specify multiple addresses separated by comma\n"
6179 " or by using -l multiple times\n"
6180 "-d run as a daemon\n"
6181 "-r maximize core file limit\n"
6182 "-u <username> assume identity of <username> (only when run as root)\n"
6183 "-m <num> max memory to use for items in megabytes (default: 64 MB)\n"
6184 "-M return error on memory exhausted (rather than removing items)\n"
6185 "-c <num> max simultaneous connections (default: 1000)\n"
6186 "-k lock down all paged memory. Note that there is a\n"
6187 " limit on how much memory you may lock. Trying to\n"
6188 " allocate more than that would fail, so be sure you\n"
6189 " set the limit correctly for the user you started\n"
6190 " the daemon with (not for -u <username> user;\n"
6191 " under sh this is done with 'ulimit -S -l NUM_KB').\n"
6192 "-v verbose (print errors/warnings while in event loop)\n"
6193 "-vv very verbose (also print client commands/reponses)\n"
6194 "-vvv extremely verbose (also print internal state transitions)\n"
6195 "-h print this help and exit\n"
6196 "-i print memcached and libevent license\n"
6197 "-P <file> save PID in <file>, only used with -d option\n"
6198 "-f <factor> chunk size growth factor (default: 1.25)\n"
6199 "-n <bytes> minimum space allocated for key+value+flags (default: 48)\n");
6200 printf("-L Try to use large memory pages (if available). Increasing\n"
6201 " the memory page size could reduce the number of TLB misses\n"
6202 " and improve the performance. In order to get large pages\n"
6203 " from the OS, memcached will allocate the total item-cache\n"
6204 " in one large chunk.\n");
6205 printf("-D <char> Use <char> as the delimiter between key prefixes and IDs.\n"
6206 " This is used for per-prefix stats reporting. The default is\n"
6207 " \":\" (colon). If this option is specified, stats collection\n"
6208 " is turned on automatically; if not, then it may be turned on\n"
6209 " by sending the \"stats detail on\" command to the server.\n");
6210 printf("-t <num> number of threads to use (default: 4)\n");
6211 printf("-R Maximum number of requests per event, limits the number of\n"
6212 " requests process for a given connection to prevent \n"
6213 " starvation (default: 20)\n");
6214 printf("-C Disable use of CAS\n");
6215 printf("-b Set the backlog queue limit (default: 1024)\n");
6216 printf("-B Binding protocol - one of ascii, binary, or auto (default)\n");
6217 printf("-I Override the size of each slab page. Adjusts max item size\n"
6218 " (default: 1mb, min: 1k, max: 128m)\n");
6219 printf("-q Disable detailed stats commands\n");
6220 #ifdef SASL_ENABLED
6221 printf("-S Require SASL authentication\n");
6222 #endif
6223 printf("-X module,cfg Load the module and initialize it with the config\n");
6224 printf("-E engine Load engine as the storage engine\n");
6225 printf("-e config Pass config as configuration options to the storage engine\n");
6226 printf("\nEnvironment variables:\n"
6227 "MEMCACHED_PORT_FILENAME File to write port information to\n"
6228 "MEMCACHED_TOP_KEYS Number of top keys to keep track of\n"
6229 "MEMCACHED_REQS_TAP_EVENT Similar to -R but for tap_ship_log\n");
6230 }
6231 static void usage_license(void) {
6232 printf(PACKAGE " " VERSION "\n\n");
6233 printf(
6234 "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n"
6235 "All rights reserved.\n"
6236 "\n"
6237 "Redistribution and use in source and binary forms, with or without\n"
6238 "modification, are permitted provided that the following conditions are\n"
6239 "met:\n"
6240 "\n"
6241 " * Redistributions of source code must retain the above copyright\n"
6242 "notice, this list of conditions and the following disclaimer.\n"
6243 "\n"
6244 " * Redistributions in binary form must reproduce the above\n"
6245 "copyright notice, this list of conditions and the following disclaimer\n"
6246 "in the documentation and/or other materials provided with the\n"
6247 "distribution.\n"
6248 "\n"
6249 " * Neither the name of the Danga Interactive nor the names of its\n"
6250 "contributors may be used to endorse or promote products derived from\n"
6251 "this software without specific prior written permission.\n"
6252 "\n"
6253 "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n"
6254 "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n"
6255 "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n"
6256 "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n"
6257 "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n"
6258 "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n"
6259 "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6260 "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6261 "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6262 "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n"
6263 "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6264 "\n"
6265 "\n"
6266 "This product includes software developed by Niels Provos.\n"
6267 "\n"
6268 "[ libevent ]\n"
6269 "\n"
6270 "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n"
6271 "All rights reserved.\n"
6272 "\n"
6273 "Redistribution and use in source and binary forms, with or without\n"
6274 "modification, are permitted provided that the following conditions\n"
6275 "are met:\n"
6276 "1. Redistributions of source code must retain the above copyright\n"
6277 " notice, this list of conditions and the following disclaimer.\n"
6278 "2. Redistributions in binary form must reproduce the above copyright\n"
6279 " notice, this list of conditions and the following disclaimer in the\n"
6280 " documentation and/or other materials provided with the distribution.\n"
6281 "3. All advertising materials mentioning features or use of this software\n"
6282 " must display the following acknowledgement:\n"
6283 " This product includes software developed by Niels Provos.\n"
6284 "4. The name of the author may not be used to endorse or promote products\n"
6285 " derived from this software without specific prior written permission.\n"
6286 "\n"
6287 "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
6288 "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
6289 "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
6290 "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
6291 "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n"
6292 "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6293 "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6294 "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6295 "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
6296 "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6297 );
6298
6299 return;
6300 }
6301
6302 static void save_pid(const char *pid_file) {
6303 FILE *fp;
6304
6305 if (access(pid_file, F_OK) == 0) {
6306 if ((fp = fopen(pid_file, "r")) != NULL) {
6307 char buffer[1024];
6308 if (fgets(buffer, sizeof(buffer), fp) != NULL) {
6309 unsigned int pid;
6310 if (safe_strtoul(buffer, &pid) && kill((pid_t)pid, 0) == 0) {
6311 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6312 "WARNING: The pid file contained the following (running) pid: %u\n", pid);
6313 }
6314 }
6315 fclose(fp);
6316 }
6317 }
6318
6319 if ((fp = fopen(pid_file, "w")) == NULL) {
6320 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6321 "Could not open the pid file %s for writing: %s\n",
6322 pid_file, strerror(errno));
6323 return;
6324 }
6325
6326 fprintf(fp,"%ld\n", (long)getpid());
6327 if (fclose(fp) == -1) {
6328 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6329 "Could not close the pid file %s: %s\n",
6330 pid_file, strerror(errno));
6331 }
6332 }
6333
6334 static void remove_pidfile(const char *pid_file) {
6335 if (pid_file != NULL) {
6336 if (unlink(pid_file) != 0) {
6337 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6338 "Could not remove the pid file %s: %s\n",
6339 pid_file, strerror(errno));
6340 }
6341 }
6342 }
6343
6344 #ifndef HAVE_SIGIGNORE
6345 static int sigignore(int sig) {
6346 struct sigaction sa = { .sa_handler = SIG_IGN, .sa_flags = 0 };
6347
6348 if (sigemptyset(&sa.sa_mask) == -1 || sigaction(sig, &sa, 0) == -1) {
6349 return -1;
6350 }
6351 return 0;
6352 }
6353 #endif /* !HAVE_SIGIGNORE */
6354
6355 static void sigterm_handler(int sig) {
6356 assert(sig == SIGTERM || sig == SIGINT);
6357 memcached_shutdown = 1;
6358 }
6359
6360 static int install_sigterm_handler(void) {
6361 struct sigaction sa = {.sa_handler = sigterm_handler, .sa_flags = 0};
6362
6363 if (sigemptyset(&sa.sa_mask) == -1 || sigaction(SIGTERM, &sa, 0) == -1 ||
6364 sigaction(SIGINT, &sa, 0) == -1) {
6365 return -1;
6366 }
6367
6368 return 0;
6369 }
6370
6371 /*
6372 * On systems that supports multiple page sizes we may reduce the
6373 * number of TLB-misses by using the biggest available page size
6374 */
6375 static int enable_large_pages(void) {
6376 #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
6377 int ret = -1;
6378 size_t sizes[32];
6379 int avail = getpagesizes(sizes, 32);
6380 if (avail != -1) {
6381 size_t max = sizes[0];
6382 struct memcntl_mha arg = {0};
6383 int ii;
6384
6385 for (ii = 1; ii < avail; ++ii) {
6386 if (max < sizes[ii]) {
6387 max = sizes[ii];
6388 }
6389 }
6390
6391 arg.mha_flags = 0;
6392 arg.mha_pagesize = max;
6393 arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
6394
6395 if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
6396 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6397 "Failed to set large pages: %s\nWill use default page size\n",
6398 strerror(errno));
6399 } else {
6400 ret = 0;
6401 }
6402 } else {
6403 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6404 "Failed to get supported pagesizes: %s\nWill use default page size\n",
6405 strerror(errno));
6406 }
6407
6408 return ret;
6409 #else
6410 return 0;
6411 #endif
6412 }
6413
6414 static const char* get_server_version(void) {
6415 return VERSION;
6416 }
6417
6418 static void store_engine_specific(const void *cookie,
6419 void *engine_data) {
6420 conn *c = (conn*)cookie;
6421 c->engine_storage = engine_data;
6422 }
6423
6424 static void *get_engine_specific(const void *cookie) {
6425 conn *c = (conn*)cookie;
6426 return c->engine_storage;
6427 }
6428
6429 static int get_socket_fd(const void *cookie) {
6430 conn *c = (conn *)cookie;
6431 return c->sfd;
6432 }
6433
6434 static void set_tap_nack_mode(const void *cookie, bool enable) {
6435 conn *c = (conn *)cookie;
6436 c->tap_nack_mode = enable;
6437 }
6438
6439 static void reserve_cookie(const void *cookie) {
6440 conn *c = (conn *)cookie;
6441 ++c->refcount;
6442 }
6443
6444 static void release_cookie(const void *cookie) {
6445 conn *c = (conn *)cookie;
6446 --c->refcount;
6447 }
6448
6449 static int num_independent_stats(void) {
6450 return settings.num_threads + 1;
6451 }
6452
6453 static void *new_independent_stats(void) {
6454 int ii;
6455 int nrecords = num_independent_stats();
6456 struct independent_stats *independent_stats = calloc(sizeof(independent_stats) + sizeof(struct thread_stats) * nrecords, 1);
6457
6458 #ifdef INNODB_MEMCACHED
6459 if (independent_stats == NULL) {
6460 fprintf(stderr, "Unable to allocate memory for"
6461 "independent_stats...\n");
6462 return (NULL);
6463 }
6464 #endif
6465
6466 if (settings.topkeys > 0)
6467 independent_stats->topkeys = topkeys_init(settings.topkeys);
6468 for (ii = 0; ii < nrecords; ii++)
6469 pthread_mutex_init(&independent_stats->thread_stats[ii].mutex, NULL);
6470 return independent_stats;
6471 }
6472
6473 static void release_independent_stats(void *stats) {
6474 int ii;
6475 int nrecords = num_independent_stats();
6476 struct independent_stats *independent_stats = stats;
6477 if (independent_stats->topkeys)
6478 topkeys_free(independent_stats->topkeys);
6479 for (ii = 0; ii < nrecords; ii++)
6480 pthread_mutex_destroy(&independent_stats->thread_stats[ii].mutex);
6481 free(independent_stats);
6482 }
6483
6484 static inline struct independent_stats *get_independent_stats(conn *c) {
6485 struct independent_stats *independent_stats;
6486 if (settings.engine.v1->get_stats_struct != NULL) {
6487 independent_stats = settings.engine.v1->get_stats_struct(settings.engine.v0, (const void *)c);
6488 if (independent_stats == NULL)
6489 independent_stats = default_independent_stats;
6490 } else {
6491 independent_stats = default_independent_stats;
6492 }
6493 return independent_stats;
6494 }
6495
6496 static inline struct thread_stats *get_thread_stats(conn *c) {
6497 struct independent_stats *independent_stats = get_independent_stats(c);
6498 assert(c->thread->index < num_independent_stats());
6499 return &independent_stats->thread_stats[c->thread->index];
6500 }
6501
6502 static void register_callback(ENGINE_HANDLE *eh,
6503 ENGINE_EVENT_TYPE type,
6504 EVENT_CALLBACK cb, const void *cb_data) {
6505 struct engine_event_handler *h =
6506 calloc(sizeof(struct engine_event_handler), 1);
6507
6508 assert(h);
6509 h->cb = cb;
6510 h->cb_data = cb_data;
6511 h->next = engine_event_handlers[type];
6512 engine_event_handlers[type] = h;
6513 }
6514
6515 static rel_time_t get_current_time(void)
6516 {
6517 return current_time;
6518 }
6519
6520 static void count_eviction(const void *cookie, const void *key, const int nkey) {
6521 topkeys_t *tk = get_independent_stats((conn*)cookie)->topkeys;
6522 TK(tk, evictions, key, nkey, get_current_time());
6523 }
6524
6525 /**
6526 * To make it easy for engine implementors that doesn't want to care about
6527 * writing their own incr/decr code, they can just set the arithmetic function
6528 * to NULL and use this implementation. It is not efficient, due to the fact
6529 * that it does multiple calls through the interface (get and then cas store).
6530 * If you don't care, feel free to use it..
6531 */
6532 static ENGINE_ERROR_CODE internal_arithmetic(ENGINE_HANDLE* handle,
6533 const void* cookie,
6534 const void* key,
6535 const int nkey,
6536 const bool increment,
6537 const bool create,
6538 const uint64_t delta,
6539 const uint64_t initial,
6540 const rel_time_t exptime,
6541 uint64_t *cas,
6542 uint64_t *result,
6543 uint16_t vbucket)
6544 {
6545 ENGINE_HANDLE_V1 *e = (ENGINE_HANDLE_V1*)handle;
6546
6547 item *it = NULL;
6548
6549 ENGINE_ERROR_CODE ret;
6550 ret = e->get(handle, cookie, &it, key, nkey, vbucket);
6551
6552 if (ret == ENGINE_SUCCESS) {
6553 item_info info = { .nvalue = 1 };
6554
6555 if (!e->get_item_info(handle, cookie, it, &info)) {
6556 e->release(handle, cookie, it);
6557 return ENGINE_FAILED;
6558 }
6559
6560 char value[80];
6561
6562 if (info.value[0].iov_len > (sizeof(value) - 1)) {
6563 e->release(handle, cookie, it);
6564 return ENGINE_EINVAL;
6565 }
6566
6567 memcpy(value, info.value[0].iov_base, info.value[0].iov_len);
6568 value[info.value[0].iov_len] = '\0';
6569
6570 uint64_t val;
6571 if (!safe_strtoull(value, &val)) {
6572 e->release(handle, cookie, it);
6573 return ENGINE_EINVAL;
6574 }
6575
6576 if (increment) {
6577 val += delta;
6578 } else {
6579 if (delta > val) {
6580 val = 0;
6581 } else {
6582 val -= delta;
6583 }
6584 }
6585
6586 size_t nb = snprintf(value, sizeof(value), "%"PRIu64, val);
6587 *result = val;
6588 item *nit = NULL;
6589 if (e->allocate(handle, cookie, &nit, key,
6590 nkey, nb, info.flags, info.exptime) != ENGINE_SUCCESS) {
6591 e->release(handle, cookie, it);
6592 return ENGINE_ENOMEM;
6593 }
6594
6595 item_info i2 = { .nvalue = 1 };
6596 if (!e->get_item_info(handle, cookie, nit, &i2)) {
6597 e->release(handle, cookie, it);
6598 e->release(handle, cookie, nit);
6599 return ENGINE_FAILED;
6600 }
6601
6602 memcpy(i2.value[0].iov_base, value, nb);
6603 e->item_set_cas(handle, cookie, nit, info.cas);
6604 ret = e->store(handle, cookie, nit, cas, OPERATION_CAS, vbucket);
6605 e->release(handle, cookie, it);
6606 e->release(handle, cookie, nit);
6607 } else if (ret == ENGINE_KEY_ENOENT && create) {
6608 char value[80];
6609 size_t nb = snprintf(value, sizeof(value), "%"PRIu64"\r\n", initial);
6610 *result = initial;
6611 if (e->allocate(handle, cookie, &it, key, nkey, nb, 0, exptime) != ENGINE_SUCCESS) {
6612 e->release(handle, cookie, it);
6613 return ENGINE_ENOMEM;
6614 }
6615
6616 item_info info = { .nvalue = 1 };
6617 if (!e->get_item_info(handle, cookie, it, &info)) {
6618 e->release(handle, cookie, it);
6619 return ENGINE_FAILED;
6620 }
6621
6622 memcpy(info.value[0].iov_base, value, nb);
6623 ret = e->store(handle, cookie, it, cas, OPERATION_CAS, vbucket);
6624 e->release(handle, cookie, it);
6625 }
6626
6627 /* We had a race condition.. just call ourself recursively to retry */
6628 if (ret == ENGINE_KEY_EEXISTS) {
6629 return internal_arithmetic(handle, cookie, key, nkey, increment, create, delta,
6630 initial, exptime, cas, result, vbucket);
6631 }
6632
6633 return ret;
6634 }
6635
6636 /**
6637 * Register an extension if it's not already registered
6638 *
6639 * @param type the type of the extension to register
6640 * @param extension the extension to register
6641 * @return true if success, false otherwise
6642 */
6643 static bool register_extension(extension_type_t type, void *extension)
6644 {
6645 if (extension == NULL) {
6646 return false;
6647 }
6648
6649 switch (type) {
6650 case EXTENSION_DAEMON:
6651 for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6652 ptr != NULL;
6653 ptr = ptr->next) {
6654 if (ptr == extension) {
6655 return false;
6656 }
6657 }
6658 ((EXTENSION_DAEMON_DESCRIPTOR *)(extension))->next = settings.extensions.daemons;
6659 settings.extensions.daemons = extension;
6660 return true;
6661 case EXTENSION_LOGGER:
6662 settings.extensions.logger = extension;
6663 return true;
6664 case EXTENSION_ASCII_PROTOCOL:
6665 if (settings.extensions.ascii != NULL) {
6666 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *last;
6667 for (last = settings.extensions.ascii; last->next != NULL;
6668 last = last->next) {
6669 if (last == extension) {
6670 return false;
6671 }
6672 }
6673 if (last == extension) {
6674 return false;
6675 }
6676 last->next = extension;
6677 last->next->next = NULL;
6678 } else {
6679 settings.extensions.ascii = extension;
6680 settings.extensions.ascii->next = NULL;
6681 }
6682 return true;
6683
6684 default:
6685 return false;
6686 }
6687 }
6688
6689 /**
6690 * Unregister an extension
6691 *
6692 * @param type the type of the extension to remove
6693 * @param extension the extension to remove
6694 */
6695 static void unregister_extension(extension_type_t type, void *extension)
6696 {
6697 switch (type) {
6698 case EXTENSION_DAEMON:
6699 {
6700 EXTENSION_DAEMON_DESCRIPTOR *prev = NULL;
6701 EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6702
6703 while (ptr != NULL && ptr != extension) {
6704 prev = ptr;
6705 ptr = ptr->next;
6706 }
6707
6708 if (ptr != NULL && prev != NULL) {
6709 prev->next = ptr->next;
6710 }
6711
6712 if (settings.extensions.daemons == ptr) {
6713 settings.extensions.daemons = ptr->next;
6714 }
6715 }
6716 break;
6717 case EXTENSION_LOGGER:
6718 if (settings.extensions.logger == extension) {
6719 if (get_stderr_logger() == extension) {
6720 settings.extensions.logger = get_null_logger();
6721 } else {
6722 settings.extensions.logger = get_stderr_logger();
6723 }
6724 }
6725 break;
6726 case EXTENSION_ASCII_PROTOCOL:
6727 {
6728 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *prev = NULL;
6729 EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
6730
6731 while (ptr != NULL && ptr != extension) {
6732 prev = ptr;
6733 ptr = ptr->next;
6734 }
6735
6736 if (ptr != NULL && prev != NULL) {
6737 prev->next = ptr->next;
6738 }
6739
6740 if (settings.extensions.ascii == ptr) {
6741 settings.extensions.ascii = ptr->next;
6742 }
6743 }
6744 break;
6745
6746 default:
6747 ;
6748 }
6749
6750 }
6751
6752 /**
6753 * Get the named extension
6754 */
6755 static void* get_extension(extension_type_t type)
6756 {
6757 switch (type) {
6758 case EXTENSION_DAEMON:
6759 return settings.extensions.daemons;
6760
6761 case EXTENSION_LOGGER:
6762 return settings.extensions.logger;
6763
6764 case EXTENSION_ASCII_PROTOCOL:
6765 return settings.extensions.ascii;
6766
6767 default:
6768 return NULL;
6769 }
6770 }
6771
6772 #ifdef INNODB_MEMCACHED
6773 void shutdown_server(void) {
6774 #else
6775 static void shutdown_server(void) {
6776 #endif /* INNODB_MEMCACHED */
6777 #ifdef INNODB_MEMCACHED
6778 int i;
6779 /* Clean up connections */
6780 while (listen_conn) {
6781 conn_closing(listen_conn);
6782 listen_conn = listen_conn->next;
6783 }
6784
6785 for (i = 0; i < num_udp_socket; i++) {
6786 safe_close(udp_socket[i]);
6787 }
6788 #endif
6789 memcached_shutdown = 1;
6790 }
6791
6792 #ifdef INNODB_MEMCACHED
6793 bool shutdown_complete(void)
6794 {
6795 return(memcached_shutdown == 2);
6796 }
6797
6798 bool init_complete(void)
6799 {
6800 return(memcached_initialized == 1);
6801 }
6802 #endif
6803
6804 static EXTENSION_LOGGER_DESCRIPTOR* get_logger(void)
6805 {
6806 return settings.extensions.logger;
6807 }
6808
6809 static EXTENSION_LOG_LEVEL get_log_level(void)
6810 {
6811 EXTENSION_LOG_LEVEL ret;
6812 switch (settings.verbose) {
6813 case 0: ret = EXTENSION_LOG_WARNING; break;
6814 case 1: ret = EXTENSION_LOG_INFO; break;
6815 case 2: ret = EXTENSION_LOG_DEBUG; break;
6816 default:
6817 ret = EXTENSION_LOG_DETAIL;
6818 }
6819 return ret;
6820 }
6821
6822 static void set_log_level(EXTENSION_LOG_LEVEL severity)
6823 {
6824 switch (severity) {
6825 case EXTENSION_LOG_WARNING: settings.verbose = 0; break;
6826 case EXTENSION_LOG_INFO: settings.verbose = 1; break;
6827 case EXTENSION_LOG_DEBUG: settings.verbose = 2; break;
6828 default:
6829 settings.verbose = 3;
6830 }
6831 }
6832
6833 static void get_config_append_stats(const char *key, const uint16_t klen,
6834 const char *val, const uint32_t vlen,
6835 const void *cookie)
6836 {
6837 if (klen == 0 || vlen == 0) {
6838 return ;
6839 }
6840
6841 char *pos = (char*)cookie;
6842 size_t nbytes = strlen(pos);
6843
6844 if ((nbytes + klen + vlen + 3) > 1024) {
6845 // Not enough size in the buffer..
6846 return;
6847 }
6848
6849 memcpy(pos + nbytes, key, klen);
6850 nbytes += klen;
6851 pos[nbytes] = '=';
6852 ++nbytes;
6853 memcpy(pos + nbytes, val, vlen);
6854 nbytes += vlen;
6855 memcpy(pos + nbytes, ";", 2);
6856 }
6857
6858 static bool get_config(struct config_item items[]) {
6859 char config[1024];
6860 config[0] = '\0';
6861 process_stat_settings(get_config_append_stats, config);
6862 int rval = parse_config(config, items, NULL);
6863 return rval >= 0;
6864 }
6865
6866 /**
6867 * Callback the engines may call to get the public server interface
6868 * @return pointer to a structure containing the interface. The client should
6869 * know the layout and perform the proper casts.
6870 */
6871 static SERVER_HANDLE_V1 *get_server_api(void)
6872 {
6873 static SERVER_CORE_API core_api = {
6874 .server_version = get_server_version,
6875 .hash = hash,
6876 .realtime = realtime,
6877 .abstime = abstime,
6878 .get_current_time = get_current_time,
6879 .parse_config = parse_config,
6880 .shutdown = shutdown_server,
6881 .get_config = get_config
6882 };
6883
6884 static SERVER_COOKIE_API server_cookie_api = {
6885 .get_auth_data = get_auth_data,
6886 .store_engine_specific = store_engine_specific,
6887 .get_engine_specific = get_engine_specific,
6888 .get_socket_fd = get_socket_fd,
6889 .set_tap_nack_mode = set_tap_nack_mode,
6890 .notify_io_complete = notify_io_complete,
6891 .reserve = reserve_cookie,
6892 .release = release_cookie
6893 };
6894
6895 static SERVER_STAT_API server_stat_api = {
6896 .new_stats = new_independent_stats,
6897 .release_stats = release_independent_stats,
6898 .evicting = count_eviction
6899 };
6900
6901 static SERVER_LOG_API server_log_api = {
6902 .get_logger = get_logger,
6903 .get_level = get_log_level,
6904 .set_level = set_log_level
6905 };
6906 static SERVER_EXTENSION_API extension_api = {
6907 .register_extension = register_extension,
6908 .unregister_extension = unregister_extension,
6909 .get_extension = get_extension
6910 };
6911
6912 static SERVER_CALLBACK_API callback_api = {
6913 .register_callback = register_callback,
6914 .perform_callbacks = perform_callbacks,
6915 };
6916
6917 static SERVER_HANDLE_V1 rv = {
6918 .interface = 1,
6919 .core = &core_api,
6920 .stat = &server_stat_api,
6921 .extension = &extension_api,
6922 .callback = &callback_api,
6923 .log = &server_log_api,
6924 .cookie = &server_cookie_api
6925 };
6926
6927 if (rv.engine == NULL) {
6928 rv.engine = settings.engine.v0;
6929 }
6930
6931 return &rv;
6932 }
6933
6934 /**
6935 * Load a shared object and initialize all the extensions in there.
6936 *
6937 * @param soname the name of the shared object (may not be NULL)
6938 * @param config optional configuration parameters
6939 * @return true if success, false otherwise
6940 */
6941 static bool load_extension(const char *soname, const char *config) {
6942 if (soname == NULL) {
6943 return false;
6944 }
6945
6946 /* Hack to remove the warning from C99 */
6947 union my_hack {
6948 MEMCACHED_EXTENSIONS_INITIALIZE initialize;
6949 void* voidptr;
6950 } funky = {.initialize = NULL };
6951
6952 void *handle = dlopen(soname, RTLD_NOW | RTLD_LOCAL);
6953 if (handle == NULL) {
6954 const char *msg = dlerror();
6955 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6956 "Failed to open library \"%s\": %s\n",
6957 soname, msg ? msg : "unknown error");
6958 return false;
6959 }
6960
6961 void *symbol = dlsym(handle, "memcached_extensions_initialize");
6962 if (symbol == NULL) {
6963 const char *msg = dlerror();
6964 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6965 "Could not find symbol \"memcached_extensions_initialize\" in %s: %s\n",
6966 soname, msg ? msg : "unknown error");
6967 return false;
6968 }
6969 funky.voidptr = symbol;
6970
6971 EXTENSION_ERROR_CODE error = (*funky.initialize)(config, get_server_api);
6972
6973 if (error != EXTENSION_SUCCESS) {
6974 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6975 "Failed to initalize extensions from %s. Error code: %d\n",
6976 soname, error);
6977 dlclose(handle);
6978 return false;
6979 }
6980
6981 if (settings.verbose > 0) {
6982 settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
6983 "Loaded extensions from: %s\n", soname);
6984 }
6985
6986 return true;
6987 }
6988
6989 /**
6990 * Do basic sanity check of the runtime environment
6991 * @return true if no errors found, false if we can't use this env
6992 */
6993 static bool sanitycheck(void) {
6994 /* One of our biggest problems is old and bogus libevents */
6995 const char *ever = event_get_version();
6996 if (ever != NULL) {
6997 if (strncmp(ever, "1.", 2) == 0) {
6998 /* Require at least 1.3 (that's still a couple of years old) */
6999 if ((ever[2] == '1' || ever[2] == '2') && !isdigit(ever[3])) {
7000 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7001 "You are using libevent %s.\nPlease upgrade to"
7002 " a more recent version (1.3 or newer)\n",
7003 event_get_version());
7004 return false;
7005 }
7006 }
7007 }
7008
7009 return true;
7010 }
7011
7012 #ifdef INNODB_MEMCACHED
7013 static
7014 char*
7015 my_strdupl(const char* str, int len)
7016 {
7017 char* s = (char*) malloc(len + 1);
7018 s[len] = 0;
7019 return((char*) memcpy(s, str, len));
7020 }
7021
7022 /** Function that messages MySQL config variable string to something
7023 that can be parsed by getopt() */
7024 static
7025 void
7026 daemon_memcached_make_option(char* option, int* option_argc,
7027 char*** option_argv)
7028 {
7029 static const char* sep = " ";
7030 char* last;
7031 char* opt_str;
7032 char* my_str;
7033 int num_arg = 0;
7034 int i = 1;
7035
7036 my_str = my_strdupl(option, strlen(option));
7037
7038 for (opt_str = strtok_r(my_str, sep, &last);
7039 opt_str;
7040 opt_str = strtok_r(NULL, sep, &last)) {
7041 num_arg++;
7042 }
7043
7044 /* reset my_str, since strtok_r could alter it */
7045 strncpy(my_str, option, strlen(option));
7046
7047 *option_argv = (char**) malloc((num_arg + 1)
7048 * sizeof(**option_argv));
7049
7050 for (opt_str = strtok_r(my_str, sep, &last);
7051 opt_str;
7052 opt_str = strtok_r(NULL, sep, &last)) {
7053 (*option_argv)[i] = opt_str;
7054 i++;
7055 }
7056
7057 assert(i == num_arg + 1);
7058
7059 *option_argc = (num_arg + 1);
7060
7061 return;
7062 }
7063
7064 /* Structure that adds the call back functions struture pointers,
7065 passed to InnoDB engine */
7066 typedef struct eng_config_info {
7067 char* option_string;
7068 void* cb_ptr;
7069 unsigned int eng_r_batch_size;
7070 unsigned int eng_w_batch_size;
7071 bool enable_binlog;
7072 } eng_config_info_t;
7073 #endif /* INNODB_MEMCACHED */
7074
7075 #ifdef INNODB_MEMCACHED
7076 void* daemon_memcached_main(void *p) {
7077 #else
7078 int main (int argc, char **argv) {
7079 #endif
7080 int c;
7081 bool lock_memory = false;
7082 bool do_daemonize = false;
7083 bool preallocate = false;
7084 int maxcore = 0;
7085 char *username = NULL;
7086 char *pid_file = NULL;
7087 struct passwd *pw;
7088 struct rlimit rlim;
7089 char unit = '\0';
7090 int size_max = 0;
7091
7092 bool protocol_specified = false;
7093 bool tcp_specified = false;
7094 bool udp_specified = false;
7095 memcached_context_t* m_config = (memcached_context_t*)p;
7096 const char *engine;
7097 const char *engine_config = NULL;
7098 char old_options[1024] = { [0] = '\0' };
7099 char *old_opts = old_options;
7100 #ifdef INNODB_MEMCACHED
7101 int option_argc = 0;
7102 char** option_argv = NULL;
7103 eng_config_info_t my_eng_config;
7104
7105 memcached_initialized = 0;
7106
7107 if (m_config->m_engine_library) {
7108 engine = m_config->m_engine_library;
7109
7110 /* FIXME: We should have a better way to pass the callback structure
7111 point to storage engine. It is now appended in the configure
7112 string in eng_config_info_t structure */
7113 my_eng_config.cb_ptr = m_config->m_innodb_api_cb;
7114 my_eng_config.eng_r_batch_size = m_config->m_r_batch_size;
7115 my_eng_config.eng_w_batch_size = m_config->m_w_batch_size;
7116 my_eng_config.enable_binlog = m_config->m_enable_binlog;
7117 my_eng_config.option_string = old_opts;
7118 engine_config = (const char *) (&my_eng_config);
7119
7120 } else {
7121 engine = "default_engine.so";
7122 }
7123 #else
7124 engine = "default_engine.so";
7125 #endif /* INNODB_MEMCACHED */
7126
7127 memcached_shutdown = 0;
7128 memcached_initialized = 0;
7129
7130 if (!sanitycheck()) {
7131 return(NULL);
7132 }
7133
7134 /* make the time we started always be 2 seconds before we really
7135 did, so time(0) - time.started is never zero. if so, things
7136 like 'settings.oldest_live' which act as booleans as well as
7137 values are now false in boolean context... */
7138 process_started = time(0) - 2;
7139 set_current_time();
7140
7141 /* Initialize the socket subsystem */
7142 initialize_sockets();
7143
7144 /* init settings */
7145 settings_init();
7146
7147 if (memcached_initialize_stderr_logger(get_server_api) != EXTENSION_SUCCESS) {
7148 fprintf(stderr, "Failed to initialize log system\n");
7149 return (NULL);
7150 }
7151
7152 if (m_config->m_mem_option) {
7153 daemon_memcached_make_option(m_config->m_mem_option,
7154 &option_argc,
7155 &option_argv);
7156 }
7157
7158 #ifdef INNODB_MEMCACHED
7159
7160 if (option_argc > 0 && option_argv) {
7161 /* Always reset the index to 1, since this function can
7162 be invoked multiple times with install/uninstall plugins */
7163 optind = 1;
7164 while (-1 != (c = getopt(option_argc, option_argv,
7165 "a:" /* access mask for unix socket */
7166 "p:" /* TCP port number to listen on */
7167 "s:" /* unix socket path to listen on */
7168 "U:" /* UDP port number to listen on */
7169 "m:" /* max memory to use for items in megabytes */
7170 "M" /* return error on memory exhausted */
7171 "c:" /* max simultaneous connections */
7172 "k" /* lock down all paged memory */
7173 "hi" /* help, licence info */
7174 "r" /* maximize core file limit */
7175 "v" /* verbose */
7176 "d" /* daemon mode */
7177 "l:" /* interface to listen on */
7178 "u:" /* user identity to run as */
7179 "P:" /* save PID in file */
7180 "f:" /* factor? */
7181 "n:" /* minimum space allocated for key+value+flags */
7182 "t:" /* threads */
7183 "D:" /* prefix delimiter? */
7184 "L" /* Large memory pages */
7185 "R:" /* max requests per event */
7186 "C" /* Disable use of CAS */
7187 "b:" /* backlog queue limit */
7188 "B:" /* Binding protocol */
7189 "I:" /* Max item size */
7190 "S" /* Sasl ON */
7191 "E:" /* Engine to load */
7192 "e:" /* Engine options */
7193 "q" /* Disallow detailed stats */
7194 "X:" /* Load extension */
7195 ))) {
7196 switch (c) {
7197 case 'a':
7198 /* access for unix domain socket, as octal mask (like chmod)*/
7199 settings.access= strtol(optarg,NULL,8);
7200 break;
7201
7202 case 'U':
7203 settings.udpport = atoi(optarg);
7204 udp_specified = true;
7205 break;
7206 case 'p':
7207 settings.port = atoi(optarg);
7208 tcp_specified = true;
7209 break;
7210 case 's':
7211 settings.socketpath = optarg;
7212 break;
7213 case 'm':
7214 settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7215 old_opts += sprintf(old_opts, "cache_size=%lu;",
7216 (unsigned long)settings.maxbytes);
7217 break;
7218 case 'M':
7219 settings.evict_to_free = 0;
7220 old_opts += sprintf(old_opts, "eviction=false;");
7221 break;
7222 case 'c':
7223 settings.maxconns = atoi(optarg);
7224 break;
7225 case 'h':
7226 usage();
7227 exit(EXIT_SUCCESS);
7228 case 'i':
7229 usage_license();
7230 exit(EXIT_SUCCESS);
7231 case 'k':
7232 lock_memory = true;
7233 break;
7234 case 'v':
7235 settings.verbose++;
7236 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7237 break;
7238 case 'l':
7239 settings.inter= strdup(optarg);
7240 break;
7241 case 'd':
7242 do_daemonize = true;
7243 break;
7244 case 'r':
7245 maxcore = 1;
7246 break;
7247 case 'R':
7248 settings.reqs_per_event = atoi(optarg);
7249 if (settings.reqs_per_event <= 0) {
7250 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7251 "Number of requests per event must be greater than 0\n");
7252 return (void*)1;
7253 }
7254 break;
7255 case 'u':
7256 username = optarg;
7257 break;
7258 case 'P':
7259 pid_file = optarg;
7260 break;
7261 case 'f':
7262 settings.factor = atof(optarg);
7263 if (settings.factor <= 1.0) {
7264 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7265 "Factor must be greater than 1\n");
7266 return (void*)1;
7267 }
7268 old_opts += sprintf(old_opts, "factor=%f;",
7269 settings.factor);
7270 break;
7271 case 'n':
7272 settings.chunk_size = atoi(optarg);
7273 if (settings.chunk_size == 0) {
7274 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7275 "Chunk size must be greater than 0\n");
7276 return (void*)1;
7277 }
7278 old_opts += sprintf(old_opts, "chunk_size=%u;",
7279 settings.chunk_size);
7280 break;
7281 case 't':
7282 settings.num_threads = atoi(optarg);
7283 if (settings.num_threads <= 0) {
7284 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7285 "Number of threads must be greater than 0\n");
7286 return (void*)1;
7287 }
7288 /* There're other problems when you get above 64 threads.
7289 * In the future we should portably detect # of cores for the
7290 * default.
7291 */
7292 if (settings.num_threads > 64) {
7293 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7294 "WARNING: Setting a high number of worker"
7295 "threads is not recommended.\n"
7296 " Set this value to the number of cores in"
7297 " your machine or less.\n");
7298 }
7299 break;
7300 case 'D':
7301 settings.prefix_delimiter = optarg[0];
7302 settings.detail_enabled = 1;
7303 break;
7304 case 'L' :
7305 if (enable_large_pages() == 0) {
7306 preallocate = true;
7307 old_opts += sprintf(old_opts, "preallocate=true;");
7308 }
7309 break;
7310 case 'C' :
7311 settings.use_cas = false;
7312 break;
7313 case 'b' :
7314 settings.backlog = atoi(optarg);
7315 break;
7316 case 'B':
7317 protocol_specified = true;
7318 if (strcmp(optarg, "auto") == 0) {
7319 settings.binding_protocol = negotiating_prot;
7320 } else if (strcmp(optarg, "binary") == 0) {
7321 settings.binding_protocol = binary_prot;
7322 } else if (strcmp(optarg, "ascii") == 0) {
7323 settings.binding_protocol = ascii_prot;
7324 } else {
7325 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7326 "Invalid value for binding protocol: %s\n"
7327 " -- should be one of auto, binary, or ascii\n", optarg);
7328 exit(EX_USAGE);
7329 }
7330 break;
7331 case 'I':
7332 unit = optarg[strlen(optarg)-1];
7333 if (unit == 'k' || unit == 'm' ||
7334 unit == 'K' || unit == 'M') {
7335 optarg[strlen(optarg)-1] = '\0';
7336 size_max = atoi(optarg);
7337 if (unit == 'k' || unit == 'K')
7338 size_max *= 1024;
7339 if (unit == 'm' || unit == 'M')
7340 size_max *= 1024 * 1024;
7341 settings.item_size_max = size_max;
7342 } else {
7343 settings.item_size_max = atoi(optarg);
7344 }
7345 if (settings.item_size_max < 1024) {
7346 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7347 "Item max size cannot be less than 1024 bytes.\n");
7348 return (void*)1;
7349 }
7350 if (settings.item_size_max > 1024 * 1024 * 128) {
7351 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7352 "Cannot set item size limit higher than 128 mb.\n");
7353 return (void*)1;
7354 }
7355 if (settings.item_size_max > 1024 * 1024) {
7356 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7357 "WARNING: Setting item max size above 1MB is not"
7358 " recommended!\n"
7359 " Raising this limit increases the minimum memory requirements\n"
7360 " and will decrease your memory efficiency.\n"
7361 );
7362 }
7363 #ifndef __WIN32__
7364 old_opts += sprintf(old_opts, "item_size_max=%zu;",
7365 settings.item_size_max);
7366 #else
7367 old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7368 settings.item_size_max);
7369 #endif
7370 break;
7371 case 'E':
7372 engine = optarg;
7373 break;
7374 case 'e':
7375 /* FIXME, we use engine_config to pass callback function
7376 for now. Will need a better solution
7377 engine_config = optarg; */
7378 break;
7379 case 'q':
7380 settings.allow_detailed = false;
7381 break;
7382 case 'S': /* set Sasl authentication to true. Default is false */
7383 # ifdef ENABLE_MEMCACHED_SASL
7384 # ifndef SASL_ENABLED
7385 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7386 "This server is not built with SASL support.\n");
7387 exit(EX_USAGE);
7388 # endif /* !SASL_ENABLED */
7389 settings.require_sasl = true;
7390 # endif /* ENABLE_MEMCACHED_SASL */
7391 break;
7392 case 'X' :
7393 {
7394 char *ptr = strchr(optarg, ',');
7395 if (ptr != NULL) {
7396 *ptr = '\0';
7397 ++ptr;
7398 }
7399 if (!load_extension(optarg, ptr)) {
7400 exit(EXIT_FAILURE);
7401 }
7402 if (ptr != NULL) {
7403 *(ptr - 1) = ',';
7404 }
7405 }
7406 break;
7407 default:
7408 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7409 "Illegal argument \"%c\"\n", c);
7410 return (void*)1;
7411 }
7412 }
7413
7414 free(option_argv);
7415 }
7416 fprintf(stderr, MEMCACHED_ATOMIC_MSG);
7417 #else
7418 /* process arguments */
7419 while (-1 != (c = getopt(argc, argv,
7420 "a:" /* access mask for unix socket */
7421 "p:" /* TCP port number to listen on */
7422 "s:" /* unix socket path to listen on */
7423 "U:" /* UDP port number to listen on */
7424 "m:" /* max memory to use for items in megabytes */
7425 "M" /* return error on memory exhausted */
7426 "c:" /* max simultaneous connections */
7427 "k" /* lock down all paged memory */
7428 "hi" /* help, licence info */
7429 "r" /* maximize core file limit */
7430 "v" /* verbose */
7431 "d" /* daemon mode */
7432 "l:" /* interface to listen on */
7433 "u:" /* user identity to run as */
7434 "P:" /* save PID in file */
7435 "f:" /* factor? */
7436 "n:" /* minimum space allocated for key+value+flags */
7437 "t:" /* threads */
7438 "D:" /* prefix delimiter? */
7439 "L" /* Large memory pages */
7440 "R:" /* max requests per event */
7441 "C" /* Disable use of CAS */
7442 "b:" /* backlog queue limit */
7443 "B:" /* Binding protocol */
7444 "I:" /* Max item size */
7445 "S" /* Sasl ON */
7446 "E:" /* Engine to load */
7447 "e:" /* Engine options */
7448 "q" /* Disallow detailed stats */
7449 "X:" /* Load extension */
7450 ))) {
7451 switch (c) {
7452 case 'a':
7453 /* access for unix domain socket, as octal mask (like chmod)*/
7454 settings.access= strtol(optarg,NULL,8);
7455 break;
7456
7457 case 'U':
7458 settings.udpport = atoi(optarg);
7459 udp_specified = true;
7460 break;
7461 case 'p':
7462 settings.port = atoi(optarg);
7463 tcp_specified = true;
7464 break;
7465 case 's':
7466 settings.socketpath = optarg;
7467 break;
7468 case 'm':
7469 settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7470 old_opts += sprintf(old_opts, "cache_size=%lu;",
7471 (unsigned long)settings.maxbytes);
7472 break;
7473 case 'M':
7474 settings.evict_to_free = 0;
7475 old_opts += sprintf(old_opts, "eviction=false;");
7476 break;
7477 case 'c':
7478 settings.maxconns = atoi(optarg);
7479 break;
7480 case 'h':
7481 usage();
7482 exit(EXIT_SUCCESS);
7483 case 'i':
7484 usage_license();
7485 exit(EXIT_SUCCESS);
7486 case 'k':
7487 lock_memory = true;
7488 break;
7489 case 'v':
7490 settings.verbose++;
7491 perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7492 break;
7493 case 'l':
7494 if (settings.inter != NULL) {
7495 size_t len = strlen(settings.inter) + strlen(optarg) + 2;
7496 char *p = malloc(len);
7497 if (p == NULL) {
7498 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7499 "Failed to allocate memory\n");
7500 return 1;
7501 }
7502 snprintf(p, len, "%s,%s", settings.inter, optarg);
7503 free(settings.inter);
7504 settings.inter = p;
7505 } else {
7506 settings.inter= strdup(optarg);
7507 }
7508 break;
7509 case 'd':
7510 do_daemonize = true;
7511 break;
7512 case 'r':
7513 maxcore = 1;
7514 break;
7515 case 'R':
7516 settings.reqs_per_event = atoi(optarg);
7517 if (settings.reqs_per_event <= 0) {
7518 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7519 "Number of requests per event must be greater than 0\n");
7520 return 1;
7521 }
7522 break;
7523 case 'u':
7524 username = optarg;
7525 break;
7526 case 'P':
7527 pid_file = optarg;
7528 break;
7529 case 'f':
7530 settings.factor = atof(optarg);
7531 if (settings.factor <= 1.0) {
7532 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7533 "Factor must be greater than 1\n");
7534 return 1;
7535 }
7536 old_opts += sprintf(old_opts, "factor=%f;",
7537 settings.factor);
7538 break;
7539 case 'n':
7540 settings.chunk_size = atoi(optarg);
7541 if (settings.chunk_size == 0) {
7542 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7543 "Chunk size must be greater than 0\n");
7544 return 1;
7545 }
7546 old_opts += sprintf(old_opts, "chunk_size=%u;",
7547 settings.chunk_size);
7548 break;
7549 case 't':
7550 settings.num_threads = atoi(optarg);
7551 if (settings.num_threads <= 0) {
7552 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7553 "Number of threads must be greater than 0\n");
7554 return 1;
7555 }
7556 /* There're other problems when you get above 64 threads.
7557 * In the future we should portably detect # of cores for the
7558 * default.
7559 */
7560 if (settings.num_threads > 64) {
7561 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7562 "WARNING: Setting a high number of worker"
7563 "threads is not recommended.\n"
7564 " Set this value to the number of cores in"
7565 " your machine or less.\n");
7566 }
7567 break;
7568 case 'D':
7569 settings.prefix_delimiter = optarg[0];
7570 settings.detail_enabled = 1;
7571 break;
7572 case 'L' :
7573 if (enable_large_pages() == 0) {
7574 preallocate = true;
7575 old_opts += sprintf(old_opts, "preallocate=true;");
7576 }
7577 break;
7578 case 'C' :
7579 settings.use_cas = false;
7580 break;
7581 case 'b' :
7582 settings.backlog = atoi(optarg);
7583 break;
7584 case 'B':
7585 protocol_specified = true;
7586 if (strcmp(optarg, "auto") == 0) {
7587 settings.binding_protocol = negotiating_prot;
7588 } else if (strcmp(optarg, "binary") == 0) {
7589 settings.binding_protocol = binary_prot;
7590 } else if (strcmp(optarg, "ascii") == 0) {
7591 settings.binding_protocol = ascii_prot;
7592 } else {
7593 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7594 "Invalid value for binding protocol: %s\n"
7595 " -- should be one of auto, binary, or ascii\n", optarg);
7596 exit(EX_USAGE);
7597 }
7598 break;
7599 case 'I':
7600 unit = optarg[strlen(optarg)-1];
7601 if (unit == 'k' || unit == 'm' ||
7602 unit == 'K' || unit == 'M') {
7603 optarg[strlen(optarg)-1] = '\0';
7604 size_max = atoi(optarg);
7605 if (unit == 'k' || unit == 'K')
7606 size_max *= 1024;
7607 if (unit == 'm' || unit == 'M')
7608 size_max *= 1024 * 1024;
7609 settings.item_size_max = size_max;
7610 } else {
7611 settings.item_size_max = atoi(optarg);
7612 }
7613 if (settings.item_size_max < 1024) {
7614 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7615 "Item max size cannot be less than 1024 bytes.\n");
7616 return 1;
7617 }
7618 if (settings.item_size_max > 1024 * 1024 * 128) {
7619 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7620 "Cannot set item size limit higher than 128 mb.\n");
7621 return 1;
7622 }
7623 if (settings.item_size_max > 1024 * 1024) {
7624 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7625 "WARNING: Setting item max size above 1MB is not"
7626 " recommended!\n"
7627 " Raising this limit increases the minimum memory requirements\n"
7628 " and will decrease your memory efficiency.\n"
7629 );
7630 }
7631 #ifndef __WIN32__
7632 old_opts += sprintf(old_opts, "item_size_max=%zu;",
7633 settings.item_size_max);
7634 #else
7635 old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7636 settings.item_size_max);
7637 #endif
7638 break;
7639 case 'E':
7640 engine = optarg;
7641 break;
7642 case 'e':
7643 engine_config = optarg;
7644 break;
7645 case 'q':
7646 settings.allow_detailed = false;
7647 break;
7648 case 'S': /* set Sasl authentication to true. Default is false */
7649 #ifndef SASL_ENABLED
7650 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7651 "This server is not built with SASL support.\n");
7652 exit(EX_USAGE);
7653 #endif
7654 settings.require_sasl = true;
7655 break;
7656 case 'X' :
7657 {
7658 char *ptr = strchr(optarg, ',');
7659 if (ptr != NULL) {
7660 *ptr = '\0';
7661 ++ptr;
7662 }
7663 if (!load_extension(optarg, ptr)) {
7664 exit(EXIT_FAILURE);
7665 }
7666 if (ptr != NULL) {
7667 *(ptr - 1) = ',';
7668 }
7669 }
7670 break;
7671 default:
7672 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7673 "Illegal argument \"%c\"\n", c);
7674 return 1;
7675 }
7676 }
7677 #endif /* INNODB_MEMCACHED */
7678
7679 if (getenv("MEMCACHED_REQS_TAP_EVENT") != NULL) {
7680 settings.reqs_per_tap_event = atoi(getenv("MEMCACHED_REQS_TAP_EVENT"));
7681 }
7682
7683 if (settings.reqs_per_tap_event <= 0) {
7684 settings.reqs_per_tap_event = DEFAULT_REQS_PER_TAP_EVENT;
7685 }
7686
7687
7688 if (install_sigterm_handler() != 0) {
7689 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7690 "Failed to install SIGTERM handler\n");
7691 exit(EXIT_FAILURE);
7692 }
7693
7694 char *topkeys_env = getenv("MEMCACHED_TOP_KEYS");
7695 if (topkeys_env != NULL) {
7696 settings.topkeys = atoi(topkeys_env);
7697 if (settings.topkeys < 0) {
7698 settings.topkeys = 0;
7699 }
7700 }
7701
7702 if (settings.require_sasl) {
7703 if (!protocol_specified) {
7704 settings.binding_protocol = binary_prot;
7705 } else {
7706 if (settings.binding_protocol == negotiating_prot) {
7707 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7708 "ERROR: You cannot use auto-negotiating protocol while requiring SASL.\n");
7709 exit(EX_USAGE);
7710 }
7711 if (settings.binding_protocol == ascii_prot) {
7712 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7713 "ERROR: You cannot use only ASCII protocol while requiring SASL.\n");
7714 exit(EX_USAGE);
7715 }
7716 }
7717 }
7718
7719 if (tcp_specified && !udp_specified) {
7720 settings.udpport = settings.port;
7721 } else if (udp_specified && !tcp_specified) {
7722 settings.port = settings.udpport;
7723 }
7724
7725 /*
7726 if (engine_config != NULL && strlen(old_options) > 0) {
7727 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7728 "ERROR: You can't mix -e with the old options\n");
7729 return (NULL);
7730 } else if (engine_config == NULL && strlen(old_options) > 0) {
7731 engine_config = old_options;
7732 } */
7733
7734 if (maxcore != 0) {
7735 struct rlimit rlim_new;
7736 /*
7737 * First try raising to infinity; if that fails, try bringing
7738 * the soft limit to the hard.
7739 */
7740 if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
7741 rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY;
7742 if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) {
7743 /* failed. try raising just to the old max */
7744 rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max;
7745 (void)setrlimit(RLIMIT_CORE, &rlim_new);
7746 }
7747 }
7748 /*
7749 * getrlimit again to see what we ended up with. Only fail if
7750 * the soft limit ends up 0, because then no core files will be
7751 * created at all.
7752 */
7753
7754 if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) {
7755 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7756 "failed to ensure corefile creation\n");
7757 exit(EX_OSERR);
7758 }
7759 }
7760
7761 /*
7762 * If needed, increase rlimits to allow as many connections
7763 * as needed.
7764 */
7765
7766 if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7767 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7768 "failed to getrlimit number of files\n");
7769 exit(EX_OSERR);
7770 } else {
7771 int maxfiles = settings.maxconns;
7772 if (rlim.rlim_cur < maxfiles)
7773 rlim.rlim_cur = maxfiles;
7774 if (rlim.rlim_max < rlim.rlim_cur)
7775 rlim.rlim_max = rlim.rlim_cur;
7776 if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7777 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7778 "failed to set rlimit for open files. Try running as"
7779 " root or requesting smaller maxconns value.\n");
7780 exit(EX_OSERR);
7781 }
7782 }
7783
7784 /* Sanity check for the connection structures */
7785 int nfiles = 0;
7786 if (settings.port != 0) {
7787 nfiles += 2;
7788 }
7789 if (settings.udpport != 0) {
7790 nfiles += settings.num_threads * 2;
7791 }
7792
7793 if (settings.maxconns <= nfiles) {
7794 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7795 "Configuratioin error. \n"
7796 "You specified %d connections, but the system will use at "
7797 "least %d\nconnection structures to start.\n",
7798 settings.maxconns, nfiles);
7799 exit(EX_USAGE);
7800 }
7801
7802 /* lose root privileges if we have them */
7803 if (getuid() == 0 || geteuid() == 0) {
7804 if (username == 0 || *username == '\0') {
7805 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7806 "can't run as root without the -u switch\n");
7807 exit(EX_USAGE);
7808 }
7809 if ((pw = getpwnam(username)) == 0) {
7810 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7811 "can't find the user %s to switch to\n", username);
7812 exit(EX_NOUSER);
7813 }
7814 if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) {
7815 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7816 "failed to assume identity of user %s: %s\n", username,
7817 strerror(errno));
7818 exit(EX_OSERR);
7819 }
7820 }
7821
7822 #ifdef SASL_ENABLED
7823 init_sasl();
7824 #endif /* SASL */
7825
7826 /* daemonize if requested */
7827 /* if we want to ensure our ability to dump core, don't chdir to / */
7828 if (do_daemonize) {
7829 if (sigignore(SIGHUP) == -1) {
7830 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7831 "Failed to ignore SIGHUP: ", strerror(errno));
7832 }
7833 if (daemonize(maxcore, settings.verbose) == -1) {
7834 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7835 "failed to daemon() in order to daemonize\n");
7836 exit(EXIT_FAILURE);
7837 }
7838 }
7839
7840 /* lock paged memory if needed */
7841 if (lock_memory) {
7842 #ifdef HAVE_MLOCKALL
7843 int res = mlockall(MCL_CURRENT | MCL_FUTURE);
7844 if (res != 0) {
7845 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7846 "warning: -k invalid, mlockall() failed: %s\n",
7847 strerror(errno));
7848 }
7849 #else
7850 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7851 "warning: -k invalid, mlockall() not supported on this platform. proceeding without.\n");
7852 #endif
7853 }
7854
7855 /* initialize main thread libevent instance */
7856 main_base = event_init();
7857
7858 /* Load the storage engine */
7859 ENGINE_HANDLE *engine_handle = NULL;
7860 if (!load_engine(engine,get_server_api,settings.extensions.logger,&engine_handle)) {
7861 /* Error already reported */
7862 #ifdef INNODB_MEMCACHED
7863 shutdown_server();
7864 goto func_exit;
7865 #else
7866 exit(EXIT_FAILURE);
7867 #endif
7868 }
7869
7870 #ifdef INNODB_MEMCACHED
7871 my_thread_init();
7872 #endif
7873
7874 if(!init_engine(engine_handle,engine_config,settings.extensions.logger)) {
7875 #ifdef INNODB_MEMCACHED
7876 my_thread_end();
7877 shutdown_server();
7878 goto func_exit;
7879 #else
7880 return(false);
7881 #endif /* INNODB_MEMCACHED */
7882 }
7883
7884 if(settings.verbose > 0) {
7885 log_engine_details(engine_handle,settings.extensions.logger);
7886 }
7887 settings.engine.v1 = (ENGINE_HANDLE_V1 *) engine_handle;
7888
7889 if (settings.engine.v1->arithmetic == NULL) {
7890 settings.engine.v1->arithmetic = internal_arithmetic;
7891 }
7892
7893 /* initialize other stuff */
7894 stats_init();
7895
7896 if (!(conn_cache = cache_create("conn", sizeof(conn), sizeof(void*),
7897 conn_constructor, conn_destructor))) {
7898 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7899 "Failed to create connection cache\n");
7900 exit(EXIT_FAILURE);
7901 }
7902
7903 default_independent_stats = new_independent_stats();
7904
7905 #ifdef INNODB_MEMCACHED
7906 if (!default_independent_stats) {
7907 exit(EXIT_FAILURE);
7908 }
7909 #endif
7910
7911 #ifndef __WIN32__
7912 /*
7913 * ignore SIGPIPE signals; we can use errno == EPIPE if we
7914 * need that information
7915 */
7916 if (sigignore(SIGPIPE) == -1) {
7917 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7918 "failed to ignore SIGPIPE; sigaction");
7919 exit(EX_OSERR);
7920 }
7921 #endif
7922
7923 /* start up worker threads if MT mode */
7924 thread_init(settings.num_threads, main_base, dispatch_event_handler);
7925
7926 /* initialise clock event */
7927 clock_handler(0, 0, 0);
7928
7929 /* create unix mode sockets after dropping privileges */
7930 if (settings.socketpath != NULL) {
7931 if (server_socket_unix(settings.socketpath,settings.access)) {
7932 vperror("failed to listen on UNIX socket: %s", settings.socketpath);
7933 exit(EX_OSERR);
7934 }
7935 }
7936
7937 /* create the listening socket, bind it, and init */
7938 if (settings.socketpath == NULL) {
7939 int udp_port;
7940
7941 const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME");
7942 char temp_portnumber_filename[PATH_MAX];
7943 FILE *portnumber_file = NULL;
7944
7945 if (portnumber_filename != NULL) {
7946 snprintf(temp_portnumber_filename,
7947 sizeof(temp_portnumber_filename),
7948 "%s.lck", portnumber_filename);
7949
7950 portnumber_file = fopen(temp_portnumber_filename, "a");
7951 if (portnumber_file == NULL) {
7952 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7953 "Failed to open \"%s\": %s\n",
7954 temp_portnumber_filename, strerror(errno));
7955 }
7956 }
7957
7958 if (settings.port && server_sockets(settings.port, tcp_transport,
7959 portnumber_file)) {
7960 vperror("failed to listen on TCP port %d", settings.port);
7961 #ifdef INNODB_MEMCACHED
7962 my_thread_end();
7963 shutdown_server();
7964 goto func_exit;
7965 #else
7966 exit(EX_OSERR);
7967 #endif /* INNODB_MEMCACHED */
7968 }
7969
7970 /*
7971 * initialization order: first create the listening sockets
7972 * (may need root on low ports), then drop root if needed,
7973 * then daemonise if needed, then init libevent (in some cases
7974 * descriptors created by libevent wouldn't survive forking).
7975 */
7976 udp_port = settings.udpport ? settings.udpport : settings.port;
7977
7978 /* create the UDP listening socket and bind it */
7979 if (settings.udpport && server_sockets(settings.udpport, udp_transport,
7980 portnumber_file)) {
7981 vperror("failed to listen on UDP port %d", settings.udpport);
7982 exit(EX_OSERR);
7983 }
7984
7985 if (portnumber_file) {
7986 fclose(portnumber_file);
7987 rename(temp_portnumber_filename, portnumber_filename);
7988 }
7989 }
7990
7991 if (pid_file != NULL) {
7992 save_pid(pid_file);
7993 }
7994
7995 /* Drop privileges no longer needed */
7996 drop_privileges();
7997
7998 memcached_initialized = 1;
7999
8000 /* enter the event loop */
8001 event_base_loop(main_base, 0);
8002
8003 if (settings.verbose) {
8004 settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
8005 "Initiating shutdown\n");
8006 }
8007
8008 func_exit:
8009
8010 if (settings.engine.v1)
8011 settings.engine.v1->destroy(settings.engine.v0, false);
8012
8013 threads_shutdown();
8014
8015 /* remove the PID file if we're a daemon */
8016 if (do_daemonize)
8017 remove_pidfile(pid_file);
8018 /* Clean up strdup() call for bind() address */
8019 if (settings.inter)
8020 free(settings.inter);
8021
8022 #ifdef INNODB_MEMCACHED
8023 /* free event base */
8024 if (main_base) {
8025 event_base_free(main_base);
8026 main_base = NULL;
8027 }
8028 my_thread_end();
8029 #endif
8030
8031 memcached_shutdown = 2;
8032 memcached_initialized = 2;
8033
8034 return EXIT_SUCCESS;
8035 }
8036