1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *  memcached - memory caching daemon
4  *
5  *       http://www.danga.com/memcached/
6  *  Copyright (c) 2015, 2021, Oracle and/or its affiliates.
7  *  Copyright 2003 Danga Interactive, Inc.  All rights reserved.
8  *  This file was modified by Oracle on 28-08-2015 and 23-03-2016.
9  *  Modifications Copyright (c) 2015, 2021, Oracle and/or its affiliates.
10  *  All rights reserved.
11  *
12  *  Use and distribution licensed under the BSD license.  See
13  *  the LICENSE file for full text.
14  *
15  *  Authors:
16  *      Anatoly Vorobey <mellon@pobox.com>
17  *      Brad Fitzpatrick <brad@danga.com>
18  */
19 #include "config.h"
20 #include "config_static.h"
21 #include "memcached.h"
22 #include "memcached/extension_loggers.h"
23 #include "utilities/engine_loader.h"
24 
25 #include <signal.h>
26 #include <getopt.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <time.h>
33 #include <assert.h>
34 #include <limits.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #include <stddef.h>
38 #include <dlfcn.h>
39 
40 #include "memcached_mysql.h"
41 
42 #define INNODB_MEMCACHED
43 
item_set_cas(const void * cookie,item * it,uint64_t cas)44 static inline void item_set_cas(const void *cookie, item *it, uint64_t cas) {
45     settings.engine.v1->item_set_cas(settings.engine.v0, cookie, it, cas);
46 }
47 
48 /* The item must always be called "it" */
49 #define SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
50     thread_stats->slab_stats[info.clsid].slab_op++;
51 
52 #define THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
53     thread_stats->thread_op++;
54 
55 #define THREAD_GUTS2(conn, thread_stats, slab_op, thread_op) \
56     thread_stats->slab_op++; \
57     thread_stats->thread_op++;
58 
59 #define SLAB_THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
60     SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
61     THREAD_GUTS(conn, thread_stats, slab_op, thread_op)
62 
63 #define STATS_INCR1(GUTS, conn, slab_op, thread_op, key, nkey) { \
64     struct independent_stats *independent_stats = get_independent_stats(conn); \
65     struct thread_stats *thread_stats = \
66         &independent_stats->thread_stats[conn->thread->index]; \
67     topkeys_t *topkeys = independent_stats->topkeys; \
68     pthread_mutex_lock(&thread_stats->mutex); \
69     GUTS(conn, thread_stats, slab_op, thread_op); \
70     pthread_mutex_unlock(&thread_stats->mutex); \
71     TK(topkeys, slab_op, key, nkey, current_time); \
72 }
73 
74 #define STATS_INCR(conn, op, key, nkey) \
75     STATS_INCR1(THREAD_GUTS, conn, op, op, key, nkey)
76 
77 #define SLAB_INCR(conn, op, key, nkey) \
78     STATS_INCR1(SLAB_GUTS, conn, op, op, key, nkey)
79 
80 #define STATS_TWO(conn, slab_op, thread_op, key, nkey) \
81     STATS_INCR1(THREAD_GUTS2, conn, slab_op, thread_op, key, nkey)
82 
83 #define SLAB_TWO(conn, slab_op, thread_op, key, nkey) \
84     STATS_INCR1(SLAB_THREAD_GUTS, conn, slab_op, thread_op, key, nkey)
85 
86 #define STATS_HIT(conn, op, key, nkey) \
87     SLAB_TWO(conn, op##_hits, cmd_##op, key, nkey)
88 
89 #define STATS_MISS(conn, op, key, nkey) \
90     STATS_TWO(conn, op##_misses, cmd_##op, key, nkey)
91 
92 #if defined(HAVE_GCC_SYNC_BUILTINS)
93 
94 #define STATS_NOKEY(conn, op)	\
95 do { \
96     struct thread_stats *thread_stats = \
97         get_thread_stats(conn); \
98 	__sync_add_and_fetch(&thread_stats->op, 1); \
99 } while (0)
100 
101 #define STATS_NOKEY2(conn, op1, op2)	\
102 do { \
103     struct thread_stats *thread_stats = \
104         get_thread_stats(conn); \
105 	__sync_add_and_fetch(&thread_stats->op1, 1); \
106 	__sync_add_and_fetch(&thread_stats->op2, 1); \
107 } while (0)
108 
109 #define STATS_ADD(conn, op, amt)	\
110 do { \
111     struct thread_stats *thread_stats = \
112         get_thread_stats(conn); \
113 	__sync_add_and_fetch(&thread_stats->op, amt); \
114 } while (0)
115 
116 #define MEMCACHED_ATOMIC_MSG	"InnoDB MEMCACHED: Memcached uses atomic increment \n"
117 
118 #else /* HAVE_GCC_SYNC_BUILTINS */
119 #define STATS_NOKEY(conn, op) { \
120     struct thread_stats *thread_stats = \
121         get_thread_stats(conn); \
122     pthread_mutex_lock(&thread_stats->mutex); \
123     thread_stats->op++; \
124     pthread_mutex_unlock(&thread_stats->mutex); \
125 }
126 
127 #define STATS_NOKEY2(conn, op1, op2) { \
128     struct thread_stats *thread_stats = \
129         get_thread_stats(conn); \
130     pthread_mutex_lock(&thread_stats->mutex); \
131     thread_stats->op1++; \
132     thread_stats->op2++; \
133     pthread_mutex_unlock(&thread_stats->mutex); \
134 }
135 
136 #define STATS_ADD(conn, op, amt) { \
137     struct thread_stats *thread_stats = \
138         get_thread_stats(conn); \
139     pthread_mutex_lock(&thread_stats->mutex); \
140     thread_stats->op += amt; \
141     pthread_mutex_unlock(&thread_stats->mutex); \
142 }
143 
144 #define MEMCACHED_ATOMIC_MSG	"InnoDB Memcached: Memcached DOES NOT use atomic increment"
145 #endif /* HAVE_GCC_SYNC_BUILTINS */
146 
147 volatile sig_atomic_t memcached_shutdown;
148 volatile sig_atomic_t memcached_initialized;
149 
150 /*
151  * We keep the current time of day in a global variable that's updated by a
152  * timer event. This saves us a bunch of time() system calls (we really only
153  * need to get the time once a second, whereas there can be tens of thousands
154  * of requests a second) and allows us to use server-start-relative timestamps
155  * rather than absolute UNIX timestamps, a space savings on systems where
156  * sizeof(time_t) > sizeof(unsigned int).
157  */
158 volatile rel_time_t current_time;
159 
160 /*
161  * forward declarations
162  */
163 static SOCKET new_socket(struct addrinfo *ai);
164 static int try_read_command(conn *c);
165 static inline struct independent_stats *get_independent_stats(conn *c);
166 static inline struct thread_stats *get_thread_stats(conn *c);
167 static void register_callback(ENGINE_HANDLE *eh,
168                               ENGINE_EVENT_TYPE type,
169                               EVENT_CALLBACK cb, const void *cb_data);
170 enum try_read_result {
171     READ_DATA_RECEIVED,
172     READ_NO_DATA_RECEIVED,
173     READ_ERROR,            /** an error occured (on the socket) (or client closed connection) */
174     READ_MEMORY_ERROR      /** failed to allocate more memory */
175 };
176 
177 static enum try_read_result try_read_network(conn *c);
178 static enum try_read_result try_read_udp(conn *c);
179 
180 /* stats */
181 static void stats_init(void);
182 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate);
183 static void process_stat_settings(ADD_STAT add_stats, void *c);
184 
185 
186 /* defaults */
187 static void settings_init(void);
188 
189 /* event handling, network IO */
190 static void event_handler(const int fd, const short which, void *arg);
191 static void complete_nread(conn *c);
192 static char *process_command(conn *c, char *command);
193 static void write_and_free(conn *c, char *buf, int bytes);
194 static int ensure_iov_space(conn *c);
195 static int add_iov(conn *c, const void *buf, int len);
196 static int add_msghdr(conn *c);
197 
198 
199 /* time handling */
200 static void set_current_time(void);  /* update the global variable holding
201                               global 32-bit seconds-since-start time
202                               (to avoid 64 bit time_t) */
203 
204 /** exported globals **/
205 struct stats stats;
206 struct settings settings;
207 static time_t process_started;     /* when the process was started */
208 
209 /** file scope variables **/
210 static conn *listen_conn = NULL;
211 static int  udp_socket[100];
212 static int  num_udp_socket;
213 static struct event_base *main_base;
214 static struct independent_stats *default_independent_stats;
215 
216 static struct engine_event_handler *engine_event_handlers[MAX_ENGINE_EVENT_TYPE + 1];
217 
218 enum transmit_result {
219     TRANSMIT_COMPLETE,   /** All done writing. */
220     TRANSMIT_INCOMPLETE, /** More data remaining to write. */
221     TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */
222     TRANSMIT_HARD_ERROR  /** Can't write (c->state is set to conn_closing) */
223 };
224 
225 static enum transmit_result transmit(conn *c);
226 
227 #define REALTIME_MAXDELTA 60*60*24*30
228 
229 // Perform all callbacks of a given type for the given connection.
perform_callbacks(ENGINE_EVENT_TYPE type,const void * data,const void * c)230 static void perform_callbacks(ENGINE_EVENT_TYPE type,
231                               const void *data,
232                               const void *c) {
233     for (struct engine_event_handler *h = engine_event_handlers[type];
234          h; h = h->next) {
235         h->cb(c, type, data, h->cb_data);
236     }
237 }
238 
239 /*
240  * given time value that's either unix time or delta from current unix time,
241  * return unix time. Use the fact that delta can't exceed one month
242  * (and real time value can't be that low).
243  */
realtime(const time_t exptime)244 static rel_time_t realtime(const time_t exptime) {
245     /* no. of seconds in 30 days - largest possible delta exptime */
246 
247     if (exptime == 0) return 0; /* 0 means never expire */
248 
249     if (exptime > REALTIME_MAXDELTA) {
250         /* if item expiration is at/before the server started, give it an
251            expiration time of 1 second after the server started.
252            (because 0 means don't expire).  without this, we'd
253            underflow and wrap around to some large value way in the
254            future, effectively making items expiring in the past
255            really expiring never */
256         if (exptime <= process_started)
257             return (rel_time_t)1;
258         return (rel_time_t)(exptime - process_started);
259     } else {
260         return (rel_time_t)(exptime + current_time);
261     }
262 }
263 
264 /**
265  * Convert the relative time to an absolute time (relative to EPOC ;) )
266  */
abstime(const rel_time_t exptime)267 static time_t abstime(const rel_time_t exptime)
268 {
269     return process_started + exptime;
270 }
271 
stats_init(void)272 static void stats_init(void) {
273     stats.daemon_conns = 0;
274     stats.rejected_conns = 0;
275     stats.curr_conns = stats.total_conns = stats.conn_structs = 0;
276 
277     stats_prefix_init();
278 }
279 
stats_reset(const void * cookie)280 static void stats_reset(const void *cookie) {
281     struct conn *conn = (struct conn*)cookie;
282     STATS_LOCK();
283     stats.rejected_conns = 0;
284     stats.total_conns = 0;
285     stats_prefix_clear();
286     STATS_UNLOCK();
287     threadlocal_stats_reset(get_independent_stats(conn)->thread_stats);
288     settings.engine.v1->reset_stats(settings.engine.v0, cookie);
289 }
290 
settings_init(void)291 static void settings_init(void) {
292     settings.use_cas = true;
293     settings.access = 0700;
294     settings.port = 11211;
295     settings.udpport = 11211;
296     /* By default this string should be NULL for getaddrinfo() */
297     settings.inter = NULL;
298     settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
299     settings.maxconns = 1000;         /* to limit connections-related memory to about 5MB */
300     settings.verbose = 0;
301     settings.oldest_live = 0;
302     settings.evict_to_free = 1;       /* push old items out of cache when memory runs out */
303     settings.socketpath = NULL;       /* by default, not using a unix socket */
304     settings.factor = 1.25;
305     settings.chunk_size = 48;         /* space for a modest key and value */
306     settings.num_threads = 4;         /* N workers */
307     settings.num_threads_per_udp = 0;
308     settings.prefix_delimiter = ':';
309     settings.detail_enabled = 0;
310     settings.allow_detailed = true;
311     settings.reqs_per_event = DEFAULT_REQS_PER_EVENT;
312     settings.backlog = 1024;
313     settings.binding_protocol = negotiating_prot;
314     settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
315     settings.topkeys = 0;
316     settings.require_sasl = false;
317     settings.extensions.logger = get_stderr_logger();
318 }
319 
320 /*
321  * Adds a message header to a connection.
322  *
323  * Returns 0 on success, -1 on out-of-memory.
324  */
add_msghdr(conn * c)325 static int add_msghdr(conn *c)
326 {
327     struct msghdr *msg;
328 
329     assert(c != NULL);
330 
331     if (c->msgsize == c->msgused) {
332         msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr));
333         if (! msg)
334             return -1;
335         c->msglist = msg;
336         c->msgsize *= 2;
337     }
338 
339     msg = c->msglist + c->msgused;
340 
341     /* this wipes msg_iovlen, msg_control, msg_controllen, and
342        msg_flags, the last 3 of which aren't defined on solaris: */
343     memset(msg, 0, sizeof(struct msghdr));
344 
345     msg->msg_iov = &c->iov[c->iovused];
346 
347     if (c->request_addr_size > 0) {
348         msg->msg_name = &c->request_addr;
349         msg->msg_namelen = c->request_addr_size;
350     }
351 
352     c->msgbytes = 0;
353     c->msgused++;
354 
355     if (IS_UDP(c->transport)) {
356         /* Leave room for the UDP header, which we'll fill in later. */
357         return add_iov(c, NULL, UDP_HEADER_SIZE);
358     }
359 
360     return 0;
361 }
362 
prot_text(enum protocol prot)363 static const char *prot_text(enum protocol prot) {
364     const char *rv = "unknown";
365     switch(prot) {
366         case ascii_prot:
367             rv = "ascii";
368             break;
369         case binary_prot:
370             rv = "binary";
371             break;
372         case negotiating_prot:
373             rv = "auto-negotiate";
374             break;
375     }
376     return rv;
377 }
378 
379 struct {
380     pthread_mutex_t mutex;
381     bool disabled;
382     ssize_t count;
383     uint64_t num_disable;
384 } listen_state;
385 
is_listen_disabled(void)386 static bool is_listen_disabled(void) {
387     bool ret;
388     pthread_mutex_lock(&listen_state.mutex);
389     ret = listen_state.disabled;
390     pthread_mutex_unlock(&listen_state.mutex);
391     return ret;
392 }
393 
get_listen_disabled_num(void)394 static uint64_t get_listen_disabled_num(void) {
395     uint64_t ret;
396     pthread_mutex_lock(&listen_state.mutex);
397     ret = listen_state.num_disable;
398     pthread_mutex_unlock(&listen_state.mutex);
399     return ret;
400 }
401 
disable_listen(void)402 static void disable_listen(void) {
403     pthread_mutex_lock(&listen_state.mutex);
404     listen_state.disabled = true;
405     listen_state.count = 10;
406     ++listen_state.num_disable;
407     pthread_mutex_unlock(&listen_state.mutex);
408 
409     conn *next;
410     for (next = listen_conn; next; next = next->next) {
411         update_event(next, 0);
412         if (listen(next->sfd, 1) != 0) {
413             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
414                                             "listen() failed",
415                                             strerror(errno));
416         }
417     }
418 }
419 
safe_close(SOCKET sfd)420 void safe_close(SOCKET sfd) {
421     if (sfd != INVALID_SOCKET) {
422         int rval;
423         while ((rval = closesocket(sfd)) == SOCKET_ERROR &&
424                (errno == EINTR || errno == EAGAIN)) {
425             /* go ahead and retry */
426         }
427 
428         if (rval == SOCKET_ERROR) {
429             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
430                                             "Failed to close socket %d (%s)!!\n", (int)sfd,
431                                             strerror(errno));
432         } else {
433             STATS_LOCK();
434             stats.curr_conns--;
435             STATS_UNLOCK();
436 
437             if (is_listen_disabled()) {
438                 notify_dispatcher();
439             }
440         }
441     }
442 }
443 
444 /*
445  * Free list management for connections.
446  */
447 cache_t *conn_cache;      /* suffix cache */
448 
449 /**
450  * Reset all of the dynamic buffers used by a connection back to their
451  * default sizes. The strategy for resizing the buffers is to allocate a
452  * new one of the correct size and free the old one if the allocation succeeds
453  * instead of using realloc to change the buffer size (because realloc may
454  * not shrink the buffers, and will also copy the memory). If the allocation
455  * fails the buffer will be unchanged.
456  *
457  * @param c the connection to resize the buffers for
458  * @return true if all allocations succeeded, false if one or more of the
459  *         allocations failed.
460  */
conn_reset_buffersize(conn * c)461 static bool conn_reset_buffersize(conn *c) {
462     bool ret = true;
463 
464     if (c->rsize != DATA_BUFFER_SIZE) {
465         void *ptr = malloc(DATA_BUFFER_SIZE);
466         if (ptr != NULL) {
467             free(c->rbuf);
468             c->rbuf = ptr;
469             c->rsize = DATA_BUFFER_SIZE;
470         } else {
471             ret = false;
472         }
473     }
474 
475     if (c->wsize != DATA_BUFFER_SIZE) {
476         void *ptr = malloc(DATA_BUFFER_SIZE);
477         if (ptr != NULL) {
478             free(c->wbuf);
479             c->wbuf = ptr;
480             c->wsize = DATA_BUFFER_SIZE;
481         } else {
482             ret = false;
483         }
484     }
485 
486     if (c->isize != ITEM_LIST_INITIAL) {
487         void *ptr = malloc(sizeof(item *) * ITEM_LIST_INITIAL);
488         if (ptr != NULL) {
489             free(c->ilist);
490             c->ilist = ptr;
491             c->isize = ITEM_LIST_INITIAL;
492         } else {
493             ret = false;
494         }
495     }
496 
497     if (c->suffixsize != SUFFIX_LIST_INITIAL) {
498         void *ptr = malloc(sizeof(char *) * SUFFIX_LIST_INITIAL);
499         if (ptr != NULL) {
500             free(c->suffixlist);
501             c->suffixlist = ptr;
502             c->suffixsize = SUFFIX_LIST_INITIAL;
503         } else {
504             ret = false;
505         }
506     }
507 
508     if (c->iovsize != IOV_LIST_INITIAL) {
509         void *ptr = malloc(sizeof(struct iovec) * IOV_LIST_INITIAL);
510         if (ptr != NULL) {
511             free(c->iov);
512             c->iov = ptr;
513             c->iovsize = IOV_LIST_INITIAL;
514         } else {
515             ret = false;
516         }
517     }
518 
519     if (c->msgsize != MSG_LIST_INITIAL) {
520         void *ptr = malloc(sizeof(struct msghdr) * MSG_LIST_INITIAL);
521         if (ptr != NULL) {
522             free(c->msglist);
523             c->msglist = ptr;
524             c->msgsize = MSG_LIST_INITIAL;
525         } else {
526             ret = false;
527         }
528     }
529 
530     return ret;
531 }
532 
533 /**
534  * Constructor for all memory allocations of connection objects. Initialize
535  * all members and allocate the transfer buffers.
536  *
537  * @param buffer The memory allocated by the object cache
538  * @param unused1 not used
539  * @param unused2 not used
540  * @return 0 on success, 1 if we failed to allocate memory
541  */
conn_constructor(void * buffer,void * unused1,int unused2)542 static int conn_constructor(void *buffer, void *unused1, int unused2) {
543     (void)unused1; (void)unused2;
544 
545     conn *c = buffer;
546     memset(c, 0, sizeof(*c));
547     MEMCACHED_CONN_CREATE(c);
548 
549     if (!conn_reset_buffersize(c)) {
550         free(c->rbuf);
551         free(c->wbuf);
552         free(c->ilist);
553         free(c->suffixlist);
554         free(c->iov);
555         free(c->msglist);
556         settings.extensions.logger->log(EXTENSION_LOG_WARNING,
557                                         NULL,
558                                         "Failed to allocate buffers for connection\n");
559         return 1;
560     }
561 
562     STATS_LOCK();
563     stats.conn_structs++;
564     STATS_UNLOCK();
565 
566     return 0;
567 }
568 
569 /**
570  * Destructor for all connection objects. Release all allocated resources.
571  *
572  * @param buffer The memory allocated by the objec cache
573  * @param unused not used
574  */
conn_destructor(void * buffer,void * unused)575 static void conn_destructor(void *buffer, void *unused) {
576     (void)unused;
577     conn *c = buffer;
578     free(c->rbuf);
579     free(c->wbuf);
580     free(c->ilist);
581     free(c->suffixlist);
582     free(c->iov);
583     free(c->msglist);
584 
585     STATS_LOCK();
586     stats.conn_structs--;
587     STATS_UNLOCK();
588 }
589 
conn_new(const SOCKET sfd,STATE_FUNC init_state,const int event_flags,const int read_buffer_size,enum network_transport transport,struct event_base * base,struct timeval * timeout)590 conn *conn_new(const SOCKET sfd, STATE_FUNC init_state,
591                const int event_flags,
592                const int read_buffer_size, enum network_transport transport,
593                struct event_base *base, struct timeval *timeout) {
594     conn *c = cache_alloc(conn_cache);
595     if (c == NULL) {
596         return NULL;
597     }
598 
599     assert(c->thread == NULL);
600 
601     if (c->rsize < read_buffer_size) {
602         void *mem = malloc(read_buffer_size);
603         if (mem) {
604             c->rsize = read_buffer_size;
605             free(c->rbuf);
606             c->rbuf = mem;
607         } else {
608             assert(c->thread == NULL);
609             cache_free(conn_cache, c);
610             return NULL;
611         }
612     }
613 
614     c->transport = transport;
615     c->protocol = settings.binding_protocol;
616 
617     /* unix socket mode doesn't need this, so zeroed out.  but why
618      * is this done for every command?  presumably for UDP
619      * mode.  */
620     if (!settings.socketpath) {
621         c->request_addr_size = sizeof(c->request_addr);
622     } else {
623         c->request_addr_size = 0;
624     }
625 
626     if (settings.verbose > 1) {
627         if (init_state == conn_listening) {
628             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
629                                             "<%d server listening (%s)\n", sfd,
630                                             prot_text(c->protocol));
631         } else if (IS_UDP(transport)) {
632             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
633                                             "<%d server listening (udp)\n", sfd);
634         } else if (c->protocol == negotiating_prot) {
635             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
636                                             "<%d new auto-negotiating client connection\n",
637                                             sfd);
638         } else if (c->protocol == ascii_prot) {
639             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
640                                             "<%d new ascii client connection.\n", sfd);
641         } else if (c->protocol == binary_prot) {
642             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
643                                             "<%d new binary client connection.\n", sfd);
644         } else {
645             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
646                                             "<%d new unknown (%d) client connection\n",
647                                             sfd, c->protocol);
648             assert(false);
649         }
650     }
651 
652     c->sfd = sfd;
653     c->state = init_state;
654     c->rlbytes = 0;
655     c->cmd = -1;
656     c->ascii_cmd = NULL;
657     c->rbytes = c->wbytes = 0;
658     c->wcurr = c->wbuf;
659     c->rcurr = c->rbuf;
660     c->ritem = 0;
661     c->icurr = c->ilist;
662     c->suffixcurr = c->suffixlist;
663     c->ileft = 0;
664     c->suffixleft = 0;
665     c->iovused = 0;
666     c->msgcurr = 0;
667     c->msgused = 0;
668     c->next = NULL;
669     c->list_state = 0;
670 
671     c->write_and_go = init_state;
672     c->write_and_free = 0;
673     c->item = 0;
674 
675     c->noreply = false;
676 
677     event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
678     event_base_set(base, &c->event);
679     c->ev_flags = event_flags;
680 
681     if (!register_event(c, timeout)) {
682         assert(c->thread == NULL);
683         cache_free(conn_cache, c);
684         return NULL;
685     }
686 
687     STATS_LOCK();
688     stats.total_conns++;
689     STATS_UNLOCK();
690 
691     c->aiostat = ENGINE_SUCCESS;
692     c->ewouldblock = false;
693     c->refcount = 1;
694 
695     MEMCACHED_CONN_ALLOCATE(c->sfd);
696 
697     perform_callbacks(ON_CONNECT, NULL, c);
698 
699     return c;
700 }
701 
conn_cleanup(conn * c)702 static void conn_cleanup(conn *c) {
703     assert(c != NULL);
704 
705     if (c->item) {
706         settings.engine.v1->release(settings.engine.v0, c, c->item);
707         c->item = 0;
708     }
709 
710     if (c->ileft != 0) {
711         for (; c->ileft > 0; c->ileft--,c->icurr++) {
712             settings.engine.v1->release(settings.engine.v0, c, *(c->icurr));
713         }
714     }
715 
716     if (c->suffixleft != 0) {
717         for (; c->suffixleft > 0; c->suffixleft--, c->suffixcurr++) {
718             cache_free(c->thread->suffix_cache, *(c->suffixcurr));
719         }
720     }
721 
722     if (c->write_and_free) {
723         free(c->write_and_free);
724         c->write_and_free = 0;
725     }
726 
727     if (c->sasl_conn) {
728         sasl_dispose(&c->sasl_conn);
729         c->sasl_conn = NULL;
730     }
731 
732     if (c->engine_storage) {
733 	settings.engine.v1->clean_engine(settings.engine.v0, c,
734 					 c->engine_storage);
735     }
736 
737     c->engine_storage = NULL;
738     c->tap_iterator = NULL;
739     c->thread = NULL;
740     assert(c->next == NULL);
741     c->ascii_cmd = NULL;
742     c->sfd = INVALID_SOCKET;
743     c->tap_nack_mode = false;
744 }
745 
conn_close(conn * c)746 void conn_close(conn *c) {
747     assert(c != NULL);
748     assert(c->sfd == INVALID_SOCKET);
749 
750     if (c->ascii_cmd != NULL) {
751         c->ascii_cmd->abort(c->ascii_cmd, c);
752     }
753 
754     assert(c->thread);
755     LOCK_THREAD(c->thread);
756     /* remove from pending-io list */
757     if (settings.verbose > 1 && list_contains(c->thread->pending_io, c)) {
758         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
759                                         "Current connection was in the pending-io list.. Nuking it\n");
760     }
761     c->thread->pending_io = list_remove(c->thread->pending_io, c);
762     c->thread->pending_close = list_remove(c->thread->pending_close, c);
763     UNLOCK_THREAD(c->thread);
764 
765     conn_cleanup(c);
766 
767     /*
768      * The contract with the object cache is that we should return the
769      * object in a constructed state. Reset the buffers to the default
770      * size
771      */
772     conn_reset_buffersize(c);
773     assert(c->thread == NULL);
774     cache_free(conn_cache, c);
775 }
776 
777 /*
778  * Shrinks a connection's buffers if they're too big.  This prevents
779  * periodic large "get" requests from permanently chewing lots of server
780  * memory.
781  *
782  * This should only be called in between requests since it can wipe output
783  * buffers!
784  */
conn_shrink(conn * c)785 static void conn_shrink(conn *c) {
786     assert(c != NULL);
787 
788     if (IS_UDP(c->transport))
789         return;
790 
791     if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) {
792         char *newbuf;
793 
794         if (c->rcurr != c->rbuf)
795             memmove(c->rbuf, c->rcurr, (size_t)c->rbytes);
796 
797         newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE);
798 
799         if (newbuf) {
800             c->rbuf = newbuf;
801             c->rsize = DATA_BUFFER_SIZE;
802         }
803         /* TODO check other branch... */
804         c->rcurr = c->rbuf;
805     }
806 
807     if (c->isize > ITEM_LIST_HIGHWAT) {
808         item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0]));
809         if (newbuf) {
810             c->ilist = newbuf;
811             c->isize = ITEM_LIST_INITIAL;
812         }
813     /* TODO check error condition? */
814     }
815 
816     if (c->msgsize > MSG_LIST_HIGHWAT) {
817         struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0]));
818         if (newbuf) {
819             c->msglist = newbuf;
820             c->msgsize = MSG_LIST_INITIAL;
821         }
822     /* TODO check error condition? */
823     }
824 
825     if (c->iovsize > IOV_LIST_HIGHWAT) {
826         struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0]));
827         if (newbuf) {
828             c->iov = newbuf;
829             c->iovsize = IOV_LIST_INITIAL;
830         }
831     /* TODO check return value */
832     }
833 }
834 
835 /**
836  * Convert a state name to a human readable form.
837  */
state_text(STATE_FUNC state)838 const char *state_text(STATE_FUNC state) {
839     if (state == conn_listening) {
840         return "conn_listening";
841     } else if (state == conn_new_cmd) {
842         return "conn_new_cmd";
843     } else if (state == conn_waiting) {
844         return "conn_waiting";
845     } else if (state == conn_read) {
846         return "conn_read";
847     } else if (state == conn_parse_cmd) {
848         return "conn_parse_cmd";
849     } else if (state == conn_write) {
850         return "conn_write";
851     } else if (state == conn_nread) {
852         return "conn_nread";
853     } else if (state == conn_swallow) {
854         return "conn_swallow";
855     } else if (state == conn_closing) {
856         return "conn_closing";
857     } else if (state == conn_mwrite) {
858         return "conn_mwrite";
859     } else if (state == conn_ship_log) {
860         return "conn_ship_log";
861     } else if (state == conn_add_tap_client) {
862         return "conn_add_tap_client";
863     } else if (state == conn_setup_tap_stream) {
864         return "conn_setup_tap_stream";
865     } else if (state == conn_pending_close) {
866         return "conn_pending_close";
867     } else if (state == conn_immediate_close) {
868         return "conn_immediate_close";
869     } else {
870         return "Unknown";
871     }
872 }
873 
874 /*
875  * Sets a connection's current state in the state machine. Any special
876  * processing that needs to happen on certain state transitions can
877  * happen here.
878  */
conn_set_state(conn * c,STATE_FUNC state)879 void conn_set_state(conn *c, STATE_FUNC state) {
880     assert(c != NULL);
881 
882     if (state != c->state) {
883         /*
884          * The connections in the "tap thread" behaves differently than
885          * normal connections because they operate in a full duplex mode.
886          * New messages may appear from both sides, so we can't block on
887          * read from the nework / engine
888          */
889         if (c->thread == tap_thread) {
890             if (state == conn_waiting) {
891                 c->which = EV_WRITE;
892                 state = conn_ship_log;
893             }
894         }
895 
896         if (settings.verbose > 2 || c->state == conn_closing
897             || c->state == conn_add_tap_client) {
898             settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
899                                             "%d: going from %s to %s\n",
900                                             c->sfd, state_text(c->state),
901                                             state_text(state));
902         }
903 
904         c->state = state;
905 
906         if (state == conn_write || state == conn_mwrite) {
907             MEMCACHED_PROCESS_COMMAND_END(c->sfd, c->wbuf, c->wbytes);
908         }
909     }
910 }
911 
912 /*
913  * Ensures that there is room for another struct iovec in a connection's
914  * iov list.
915  *
916  * Returns 0 on success, -1 on out-of-memory.
917  */
ensure_iov_space(conn * c)918 static int ensure_iov_space(conn *c) {
919     assert(c != NULL);
920 
921     if (c->iovused >= c->iovsize) {
922         int i, iovnum;
923         struct iovec *new_iov = (struct iovec *)realloc(c->iov,
924                                 (c->iovsize * 2) * sizeof(struct iovec));
925         if (! new_iov)
926             return -1;
927         c->iov = new_iov;
928         c->iovsize *= 2;
929 
930         /* Point all the msghdr structures at the new list. */
931         for (i = 0, iovnum = 0; i < c->msgused; i++) {
932             c->msglist[i].msg_iov = &c->iov[iovnum];
933             iovnum += c->msglist[i].msg_iovlen;
934         }
935     }
936 
937     return 0;
938 }
939 
940 
941 /*
942  * Adds data to the list of pending data that will be written out to a
943  * connection.
944  *
945  * Returns 0 on success, -1 on out-of-memory.
946  */
947 
add_iov(conn * c,const void * buf,int len)948 static int add_iov(conn *c, const void *buf, int len) {
949     struct msghdr *m;
950     int leftover;
951     bool limit_to_mtu;
952 
953     assert(c != NULL);
954 
955     do {
956         m = &c->msglist[c->msgused - 1];
957 
958         /*
959          * Limit UDP packets, and the first payloads of TCP replies, to
960          * UDP_MAX_PAYLOAD_SIZE bytes.
961          */
962         limit_to_mtu = IS_UDP(c->transport) || (1 == c->msgused);
963 
964         /* We may need to start a new msghdr if this one is full. */
965         if (m->msg_iovlen == IOV_MAX ||
966             (limit_to_mtu && c->msgbytes >= UDP_MAX_PAYLOAD_SIZE)) {
967             add_msghdr(c);
968             m = &c->msglist[c->msgused - 1];
969         }
970 
971         if (ensure_iov_space(c) != 0)
972             return -1;
973 
974         /* If the fragment is too big to fit in the datagram, split it up */
975         if (limit_to_mtu && len + c->msgbytes > UDP_MAX_PAYLOAD_SIZE) {
976             leftover = len + c->msgbytes - UDP_MAX_PAYLOAD_SIZE;
977             len -= leftover;
978         } else {
979             leftover = 0;
980         }
981 
982         m = &c->msglist[c->msgused - 1];
983         m->msg_iov[m->msg_iovlen].iov_base = (void *)buf;
984         m->msg_iov[m->msg_iovlen].iov_len = len;
985 
986         c->msgbytes += len;
987         c->iovused++;
988         m->msg_iovlen++;
989 
990         buf = ((char *)buf) + len;
991         len = leftover;
992     } while (leftover > 0);
993 
994     return 0;
995 }
996 
997 
998 /*
999  * Constructs a set of UDP headers and attaches them to the outgoing messages.
1000  */
build_udp_headers(conn * c)1001 static int build_udp_headers(conn *c) {
1002     int i;
1003     unsigned char *hdr;
1004 
1005     assert(c != NULL);
1006 
1007     if (c->msgused > c->hdrsize) {
1008         void *new_hdrbuf;
1009         if (c->hdrbuf)
1010             new_hdrbuf = realloc(c->hdrbuf, c->msgused * 2 * UDP_HEADER_SIZE);
1011         else
1012             new_hdrbuf = malloc(c->msgused * 2 * UDP_HEADER_SIZE);
1013         if (! new_hdrbuf)
1014             return -1;
1015         c->hdrbuf = (unsigned char *)new_hdrbuf;
1016         c->hdrsize = c->msgused * 2;
1017     }
1018 
1019     hdr = c->hdrbuf;
1020     for (i = 0; i < c->msgused; i++) {
1021         c->msglist[i].msg_iov[0].iov_base = (void*)hdr;
1022         c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE;
1023         *hdr++ = c->request_id / 256;
1024         *hdr++ = c->request_id % 256;
1025         *hdr++ = i / 256;
1026         *hdr++ = i % 256;
1027         *hdr++ = c->msgused / 256;
1028         *hdr++ = c->msgused % 256;
1029         *hdr++ = 0;
1030         *hdr++ = 0;
1031         assert((void *) hdr == (caddr_t)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE);
1032     }
1033 
1034     return 0;
1035 }
1036 
1037 
out_string(conn * c,const char * str)1038 static void out_string(conn *c, const char *str) {
1039     size_t len;
1040 
1041     assert(c != NULL);
1042 
1043     if (c->noreply) {
1044         if (settings.verbose > 1) {
1045             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1046                                             ">%d NOREPLY %s\n", c->sfd, str);
1047         }
1048         c->noreply = false;
1049         if (c->sbytes > 0) {
1050             conn_set_state(c, conn_swallow);
1051         } else {
1052             conn_set_state(c, conn_new_cmd);
1053         }
1054         return;
1055     }
1056 
1057     if (settings.verbose > 1) {
1058         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1059                                         ">%d %s\n", c->sfd, str);
1060     }
1061 
1062     /* Nuke a partial output... */
1063     c->msgcurr = 0;
1064     c->msgused = 0;
1065     c->iovused = 0;
1066     add_msghdr(c);
1067 
1068     len = strlen(str);
1069     if ((len + 2) > c->wsize) {
1070         /* ought to be always enough. just fail for simplicity */
1071         str = "SERVER_ERROR output line too long";
1072         len = strlen(str);
1073     }
1074 
1075     memcpy(c->wbuf, str, len);
1076     memcpy(c->wbuf + len, "\r\n", 2);
1077     c->wbytes = len + 2;
1078     c->wcurr = c->wbuf;
1079 
1080     conn_set_state(c, conn_write);
1081 
1082     if (c->sbytes > 0) {
1083         c->write_and_go = conn_swallow;
1084     } else {
1085         c->write_and_go = conn_new_cmd;
1086     }
1087 
1088     return;
1089 }
1090 
1091 /*
1092  * we get here after reading the value in set/add/replace commands. The command
1093  * has been stored in c->cmd, and the item is ready in c->item.
1094  */
complete_update_ascii(conn * c)1095 static void complete_update_ascii(conn *c) {
1096     assert(c != NULL);
1097 
1098     item *it = c->item;
1099     item_info info = { .nvalue = 1 };
1100     if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1101         settings.engine.v1->release(settings.engine.v0, c, it);
1102         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1103                                         "%d: Failed to get item info\n",
1104                                         c->sfd);
1105         out_string(c, "SERVER_ERROR failed to get item details");
1106         return;
1107     }
1108 
1109     c->sbytes = 2; // swallow \r\n
1110     ENGINE_ERROR_CODE ret = c->aiostat;
1111     c->aiostat = ENGINE_SUCCESS;
1112     if (ret == ENGINE_SUCCESS) {
1113         ret = settings.engine.v1->store(settings.engine.v0, c, it, &c->cas,
1114                                         c->store_op, 0);
1115     }
1116 
1117 #ifdef ENABLE_DTRACE
1118     switch (c->store_op) {
1119     case OPERATION_ADD:
1120         MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1121                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1122         break;
1123     case OPERATION_REPLACE:
1124         MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1125                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1126         break;
1127     case OPERATION_APPEND:
1128         MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1129                                  (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1130         break;
1131     case OPERATION_PREPEND:
1132         MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1133                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1134         break;
1135     case OPERATION_SET:
1136         MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1137                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1138         break;
1139     case OPERATION_CAS:
1140         MEMCACHED_COMMAND_CAS(c->sfd, info.key, info.nkey, info.nbytes, c->cas);
1141         break;
1142     }
1143 #endif
1144 
1145     switch (ret) {
1146     case ENGINE_SUCCESS:
1147         out_string(c, "STORED");
1148         break;
1149     case ENGINE_KEY_EEXISTS:
1150         out_string(c, "EXISTS");
1151         break;
1152     case ENGINE_KEY_ENOENT:
1153         out_string(c, "NOT_FOUND");
1154         break;
1155     case ENGINE_NOT_STORED:
1156         out_string(c, "NOT_STORED");
1157         break;
1158     case ENGINE_DISCONNECT:
1159         c->state = conn_closing;
1160         break;
1161     case ENGINE_ENOTSUP:
1162         out_string(c, "SERVER_ERROR not supported");
1163         break;
1164     case ENGINE_ENOMEM:
1165         out_string(c, "SERVER_ERROR out of memory");
1166         break;
1167     case ENGINE_TMPFAIL:
1168         out_string(c, "SERVER_ERROR temporary failure");
1169         break;
1170     case ENGINE_EINVAL:
1171         out_string(c, "CLIENT_ERROR invalid arguments");
1172         break;
1173     case ENGINE_E2BIG:
1174         out_string(c, "CLIENT_ERROR value too big");
1175         break;
1176     case ENGINE_EACCESS:
1177         out_string(c, "CLIENT_ERROR access control violation");
1178         break;
1179     case ENGINE_NOT_MY_VBUCKET:
1180         out_string(c, "SERVER_ERROR not my vbucket");
1181         break;
1182     case ENGINE_FAILED:
1183         out_string(c, "SERVER_ERROR failure");
1184         break;
1185     case ENGINE_EWOULDBLOCK:
1186         c->ewouldblock = true;
1187         break;
1188     case ENGINE_WANT_MORE:
1189         assert(false);
1190         c->state = conn_closing;
1191         break;
1192 
1193     default:
1194         out_string(c, "SERVER_ERROR internal");
1195     }
1196 
1197     if (c->store_op == OPERATION_CAS) {
1198         switch (ret) {
1199         case ENGINE_SUCCESS:
1200             SLAB_INCR(c, cas_hits, info.key, info.nkey);
1201             break;
1202         case ENGINE_KEY_EEXISTS:
1203             SLAB_INCR(c, cas_badval, info.key, info.nkey);
1204             break;
1205         case ENGINE_KEY_ENOENT:
1206             STATS_NOKEY(c, cas_misses);
1207             break;
1208         default:
1209             ;
1210         }
1211     } else {
1212         SLAB_INCR(c, cmd_set, info.key, info.nkey);
1213     }
1214 
1215     if (!c->ewouldblock) {
1216         /* release the c->item reference */
1217         settings.engine.v1->release(settings.engine.v0, c, c->item);
1218         c->item = 0;
1219     }
1220 }
1221 
1222 /**
1223  * get a pointer to the start of the request struct for the current command
1224  */
binary_get_request(conn * c)1225 static void* binary_get_request(conn *c) {
1226     char *ret = c->rcurr;
1227     ret -= (sizeof(c->binary_header) + c->binary_header.request.keylen +
1228             c->binary_header.request.extlen);
1229 
1230     assert(ret >= c->rbuf);
1231     return ret;
1232 }
1233 
1234 /**
1235  * get a pointer to the key in this request
1236  */
binary_get_key(conn * c)1237 static char* binary_get_key(conn *c) {
1238     return c->rcurr - (c->binary_header.request.keylen);
1239 }
1240 
1241 /**
1242  * Insert a key into a buffer, but replace all non-printable characters
1243  * with a '.'.
1244  *
1245  * @param dest where to store the output
1246  * @param destsz size of destination buffer
1247  * @param prefix string to insert before the data
1248  * @param client the client we are serving
1249  * @param from_client set to true if this data is from the client
1250  * @param key the key to add to the buffer
1251  * @param nkey the number of bytes in the key
1252  * @return number of bytes in dest if success, -1 otherwise
1253  */
key_to_printable_buffer(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * key,size_t nkey)1254 static ssize_t key_to_printable_buffer(char *dest, size_t destsz,
1255                                        int client, bool from_client,
1256                                        const char *prefix,
1257                                        const char *key,
1258                                        size_t nkey)
1259 {
1260     ssize_t nw = snprintf(dest, destsz, "%c%d %s ", from_client ? '>' : '<',
1261                           client, prefix);
1262     if (nw == -1) {
1263         return -1;
1264     }
1265 
1266     char *ptr = dest + nw;
1267     destsz -= nw;
1268     if (nkey > destsz) {
1269         nkey = destsz;
1270     }
1271 
1272     for (ssize_t ii = 0; ii < nkey; ++ii, ++key, ++ptr) {
1273         if (isgraph(*key)) {
1274             *ptr = *key;
1275         } else {
1276             *ptr = '.';
1277         }
1278     }
1279 
1280     *ptr = '\0';
1281     return ptr - dest;
1282 }
1283 
1284 /**
1285  * Convert a byte array to a text string
1286  *
1287  * @param dest where to store the output
1288  * @param destsz size of destination buffer
1289  * @param prefix string to insert before the data
1290  * @param client the client we are serving
1291  * @param from_client set to true if this data is from the client
1292  * @param data the data to add to the buffer
1293  * @param size the number of bytes in data to print
1294  * @return number of bytes in dest if success, -1 otherwise
1295  */
bytes_to_output_string(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * data,size_t size)1296 static ssize_t bytes_to_output_string(char *dest, size_t destsz,
1297                                       int client, bool from_client,
1298                                       const char *prefix,
1299                                       const char *data,
1300                                       size_t size)
1301 {
1302     ssize_t nw = snprintf(dest, destsz, "%c%d %s", from_client ? '>' : '<',
1303                           client, prefix);
1304     if (nw == -1) {
1305         return -1;
1306     }
1307     ssize_t offset = nw;
1308 
1309     for (ssize_t ii = 0; ii < size; ++ii) {
1310         if (ii % 4 == 0) {
1311             if ((nw = snprintf(dest + offset, destsz - offset, "\n%c%d  ",
1312                                from_client ? '>' : '<', client)) == -1) {
1313                 return  -1;
1314             }
1315             offset += nw;
1316         }
1317         if ((nw = snprintf(dest + offset, destsz - offset,
1318                            " 0x%02x", (unsigned char)data[ii])) == -1) {
1319             return -1;
1320         }
1321         offset += nw;
1322     }
1323 
1324     if ((nw = snprintf(dest + offset, destsz - offset, "\n")) == -1) {
1325         return -1;
1326     }
1327 
1328     return offset + nw;
1329 }
1330 
add_bin_header(conn * c,uint16_t err,uint8_t hdr_len,uint16_t key_len,uint32_t body_len)1331 static void add_bin_header(conn *c, uint16_t err, uint8_t hdr_len, uint16_t key_len, uint32_t body_len) {
1332     protocol_binary_response_header* header;
1333 
1334     assert(c);
1335 
1336     c->msgcurr = 0;
1337     c->msgused = 0;
1338     c->iovused = 0;
1339     if (add_msghdr(c) != 0) {
1340         /* XXX:  out_string is inappropriate here */
1341         out_string(c, "SERVER_ERROR out of memory");
1342         return;
1343     }
1344 
1345     header = (protocol_binary_response_header *)c->wbuf;
1346 
1347     header->response.magic = (uint8_t)PROTOCOL_BINARY_RES;
1348     header->response.opcode = c->binary_header.request.opcode;
1349     header->response.keylen = (uint16_t)htons(key_len);
1350 
1351     header->response.extlen = (uint8_t)hdr_len;
1352     header->response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES;
1353     header->response.status = (uint16_t)htons(err);
1354 
1355     header->response.bodylen = htonl(body_len);
1356     header->response.opaque = c->opaque;
1357     header->response.cas = htonll(c->cas);
1358 
1359     if (settings.verbose > 1) {
1360         char buffer[1024];
1361         if (bytes_to_output_string(buffer, sizeof(buffer), c->sfd, false,
1362                                    "Writing bin response:",
1363                                    (const char*)header->bytes,
1364                                    sizeof(header->bytes)) != -1) {
1365             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1366                                             "%s", buffer);
1367         }
1368     }
1369 
1370     add_iov(c, c->wbuf, sizeof(header->response));
1371 }
1372 
1373 /**
1374  * Convert an error code generated from the storage engine to the corresponding
1375  * error code used by the protocol layer.
1376  * @param e the error code as used in the engine
1377  * @return the error code as used by the protocol layer
1378  */
engine_error_2_protocol_error(ENGINE_ERROR_CODE e)1379 static protocol_binary_response_status engine_error_2_protocol_error(ENGINE_ERROR_CODE e) {
1380     protocol_binary_response_status ret;
1381 
1382     switch (e) {
1383     case ENGINE_SUCCESS:
1384         return PROTOCOL_BINARY_RESPONSE_SUCCESS;
1385     case ENGINE_KEY_ENOENT:
1386         return PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1387     case ENGINE_KEY_EEXISTS:
1388         return PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1389     case ENGINE_ENOMEM:
1390         return PROTOCOL_BINARY_RESPONSE_ENOMEM;
1391     case ENGINE_TMPFAIL:
1392         return PROTOCOL_BINARY_RESPONSE_ETMPFAIL;
1393     case ENGINE_NOT_STORED:
1394         return PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1395     case ENGINE_EINVAL:
1396         return PROTOCOL_BINARY_RESPONSE_EINVAL;
1397     case ENGINE_ENOTSUP:
1398         return PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED;
1399     case ENGINE_E2BIG:
1400         return PROTOCOL_BINARY_RESPONSE_E2BIG;
1401     case ENGINE_NOT_MY_VBUCKET:
1402         return PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET;
1403     default:
1404         ret = PROTOCOL_BINARY_RESPONSE_EINTERNAL;
1405     }
1406 
1407     return ret;
1408 }
1409 
write_bin_packet(conn * c,protocol_binary_response_status err,int swallow)1410 static void write_bin_packet(conn *c, protocol_binary_response_status err, int swallow) {
1411     ssize_t len;
1412     char buffer[1024] = { [sizeof(buffer) - 1] = '\0' };
1413 
1414     switch (err) {
1415     case PROTOCOL_BINARY_RESPONSE_SUCCESS:
1416         len = 0;
1417         break;
1418     case PROTOCOL_BINARY_RESPONSE_ENOMEM:
1419         len = snprintf(buffer, sizeof(buffer), "Out of memory");
1420         break;
1421     case PROTOCOL_BINARY_RESPONSE_ETMPFAIL:
1422         len = snprintf(buffer, sizeof(buffer), "Temporary failure");
1423         break;
1424     case PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND:
1425         len = snprintf(buffer, sizeof(buffer), "Unknown command");
1426         break;
1427     case PROTOCOL_BINARY_RESPONSE_KEY_ENOENT:
1428         len = snprintf(buffer, sizeof(buffer), "Not found");
1429         break;
1430     case PROTOCOL_BINARY_RESPONSE_EINVAL:
1431         len = snprintf(buffer, sizeof(buffer), "Invalid arguments");
1432         break;
1433     case PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS:
1434         len = snprintf(buffer, sizeof(buffer), "Data exists for key");
1435         break;
1436     case PROTOCOL_BINARY_RESPONSE_E2BIG:
1437         len = snprintf(buffer, sizeof(buffer), "Too large");
1438         break;
1439     case PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL:
1440         len = snprintf(buffer, sizeof(buffer),
1441                        "Non-numeric server-side value for incr or decr");
1442         break;
1443     case PROTOCOL_BINARY_RESPONSE_NOT_STORED:
1444         len = snprintf(buffer, sizeof(buffer), "Not stored");
1445         break;
1446     case PROTOCOL_BINARY_RESPONSE_AUTH_ERROR:
1447         len = snprintf(buffer, sizeof(buffer), "Auth failure");
1448         break;
1449     case PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED:
1450         len = snprintf(buffer, sizeof(buffer), "Not supported");
1451         break;
1452     case PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET:
1453         len = snprintf(buffer, sizeof(buffer),
1454                        "I'm not responsible for this vbucket");
1455         break;
1456 
1457     default:
1458         len = snprintf(buffer, sizeof(buffer), "UNHANDLED ERROR (%d)", err);
1459         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1460                                         ">%d UNHANDLED ERROR: %d\n", c->sfd, err);
1461     }
1462 
1463     /* Allow the engine to pass extra error information */
1464     if (settings.engine.v1->errinfo != NULL) {
1465         size_t elen = settings.engine.v1->errinfo(settings.engine.v0, c, buffer + len + 2,
1466                                                   sizeof(buffer) - len - 3);
1467 
1468         if (elen > 0) {
1469             memcpy(buffer + len, ": ", 2);
1470             len += elen + 2;
1471         }
1472     }
1473 
1474     if (err != PROTOCOL_BINARY_RESPONSE_SUCCESS && settings.verbose > 1) {
1475         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1476                                         ">%d Writing an error: %s\n", c->sfd,
1477                                         buffer);
1478     }
1479 
1480     add_bin_header(c, err, 0, 0, len);
1481     if (len > 0) {
1482         add_iov(c, buffer, len);
1483     }
1484     conn_set_state(c, conn_mwrite);
1485     if (swallow > 0) {
1486         c->sbytes = swallow;
1487         c->write_and_go = conn_swallow;
1488     } else {
1489         c->write_and_go = conn_new_cmd;
1490     }
1491 }
1492 
1493 /* Form and send a response to a command over the binary protocol */
write_bin_response(conn * c,void * d,int hlen,int keylen,int dlen)1494 static void write_bin_response(conn *c, void *d, int hlen, int keylen, int dlen) {
1495     if (!c->noreply || c->cmd == PROTOCOL_BINARY_CMD_GET ||
1496         c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1497         add_bin_header(c, 0, hlen, keylen, dlen);
1498         if(dlen > 0) {
1499             add_iov(c, d, dlen);
1500         }
1501         conn_set_state(c, conn_mwrite);
1502         c->write_and_go = conn_new_cmd;
1503     } else {
1504         conn_set_state(c, conn_new_cmd);
1505     }
1506 }
1507 
1508 
complete_incr_bin(conn * c)1509 static void complete_incr_bin(conn *c) {
1510     protocol_binary_response_incr* rsp = (protocol_binary_response_incr*)c->wbuf;
1511     protocol_binary_request_incr* req = binary_get_request(c);
1512 
1513     assert(c != NULL);
1514     assert(c->wsize >= sizeof(*rsp));
1515 
1516     /* fix byteorder in the request */
1517     uint64_t delta = ntohll(req->message.body.delta);
1518     uint64_t initial = ntohll(req->message.body.initial);
1519     rel_time_t expiration = ntohl(req->message.body.expiration);
1520     char *key = binary_get_key(c);
1521     size_t nkey = c->binary_header.request.keylen;
1522     bool incr = (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT ||
1523                  c->cmd == PROTOCOL_BINARY_CMD_INCREMENTQ);
1524 
1525     if (settings.verbose > 1) {
1526         char buffer[1024];
1527         ssize_t nw;
1528         nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1529                                      incr ? "INCR" : "DECR", key, nkey);
1530         if (nw != -1) {
1531             if (snprintf(buffer + nw, sizeof(buffer) - nw,
1532                          " %" PRIu64 ", %" PRIu64 ", %" PRIu64 "\n",
1533                          delta, initial, (uint64_t)expiration) != -1) {
1534                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
1535                                                 buffer);
1536             }
1537         }
1538     }
1539 
1540     ENGINE_ERROR_CODE ret = c->aiostat;
1541     c->aiostat = ENGINE_SUCCESS;
1542     if (ret == ENGINE_SUCCESS) {
1543         ret = settings.engine.v1->arithmetic(settings.engine.v0,
1544                                              c, key, nkey, incr,
1545                                              req->message.body.expiration != 0xffffffff,
1546                                              delta, initial, expiration,
1547                                              &c->cas,
1548                                              &rsp->message.body.value,
1549                                              c->binary_header.request.vbucket);
1550     }
1551 
1552     switch (ret) {
1553     case ENGINE_SUCCESS:
1554         rsp->message.body.value = htonll(rsp->message.body.value);
1555         write_bin_response(c, &rsp->message.body, 0, 0,
1556                            sizeof (rsp->message.body.value));
1557         if (incr) {
1558             STATS_INCR(c, incr_hits, key, nkey);
1559         } else {
1560             STATS_INCR(c, decr_hits, key, nkey);
1561         }
1562         break;
1563     case ENGINE_KEY_EEXISTS:
1564         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1565         break;
1566     case ENGINE_KEY_ENOENT:
1567         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1568         if (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT) {
1569             STATS_INCR(c, incr_misses, key, nkey);
1570         } else {
1571             STATS_INCR(c, decr_misses, key, nkey);
1572         }
1573         break;
1574     case ENGINE_ENOMEM:
1575         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1576         break;
1577     case ENGINE_TMPFAIL:
1578         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1579         break;
1580     case ENGINE_EINVAL:
1581         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL, 0);
1582         break;
1583     case ENGINE_NOT_STORED:
1584         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_STORED, 0);
1585         break;
1586     case ENGINE_DISCONNECT:
1587         c->state = conn_closing;
1588         break;
1589     case ENGINE_ENOTSUP:
1590         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1591         break;
1592     case ENGINE_NOT_MY_VBUCKET:
1593         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1594         break;
1595     case ENGINE_EWOULDBLOCK:
1596         c->ewouldblock = true;
1597         break;
1598     default:
1599         abort();
1600     }
1601 }
1602 
complete_update_bin(conn * c)1603 static void complete_update_bin(conn *c) {
1604     protocol_binary_response_status eno = PROTOCOL_BINARY_RESPONSE_EINVAL;
1605     assert(c != NULL);
1606 
1607     item *it = c->item;
1608     item_info info = { .nvalue = 1 };
1609     if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1610         settings.engine.v1->release(settings.engine.v0, c, it);
1611         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1612                                         "%d: Failed to get item info\n",
1613                                         c->sfd);
1614         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1615         return;
1616     }
1617 
1618     ENGINE_ERROR_CODE ret = c->aiostat;
1619     c->aiostat = ENGINE_SUCCESS;
1620     if (ret == ENGINE_SUCCESS) {
1621         ret = settings.engine.v1->store(settings.engine.v0, c,
1622                                         it, &c->cas, c->store_op,
1623                                         c->binary_header.request.vbucket);
1624     }
1625 
1626 #ifdef ENABLE_DTRACE
1627     switch (c->cmd) {
1628     case OPERATION_ADD:
1629         MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1630                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1631         break;
1632     case OPERATION_REPLACE:
1633         MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1634                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1635         break;
1636     case OPERATION_APPEND:
1637         MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1638                                  (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1639         break;
1640     case OPERATION_PREPEND:
1641         MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1642                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1643         break;
1644     case OPERATION_SET:
1645         MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1646                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1647         break;
1648     }
1649 #endif
1650 
1651     switch (ret) {
1652     case ENGINE_SUCCESS:
1653         /* Stored */
1654         write_bin_response(c, NULL, 0, 0, 0);
1655         break;
1656     case ENGINE_KEY_EEXISTS:
1657         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1658         break;
1659     case ENGINE_KEY_ENOENT:
1660         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1661         break;
1662     case ENGINE_ENOMEM:
1663         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1664         break;
1665     case ENGINE_TMPFAIL:
1666         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1667         break;
1668     case ENGINE_EWOULDBLOCK:
1669         c->ewouldblock = true;
1670         break;
1671     case ENGINE_DISCONNECT:
1672         c->state = conn_closing;
1673         break;
1674     case ENGINE_ENOTSUP:
1675         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1676         break;
1677     case ENGINE_NOT_MY_VBUCKET:
1678         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1679         break;
1680     default:
1681         if (c->store_op == OPERATION_ADD) {
1682             eno = PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1683         } else if(c->store_op == OPERATION_REPLACE) {
1684             eno = PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1685         } else {
1686             eno = PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1687         }
1688         write_bin_packet(c, eno, 0);
1689     }
1690 
1691     if (c->store_op == OPERATION_CAS) {
1692         switch (ret) {
1693         case ENGINE_SUCCESS:
1694             SLAB_INCR(c, cas_hits, info.key, info.nkey);
1695             break;
1696         case ENGINE_KEY_EEXISTS:
1697             SLAB_INCR(c, cas_badval, info.key, info.nkey);
1698             break;
1699         case ENGINE_KEY_ENOENT:
1700             STATS_NOKEY(c, cas_misses);
1701             break;
1702         default:
1703             ;
1704         }
1705     } else {
1706         SLAB_INCR(c, cmd_set, info.key, info.nkey);
1707     }
1708 
1709     if (!c->ewouldblock) {
1710         /* release the c->item reference */
1711         settings.engine.v1->release(settings.engine.v0, c, c->item);
1712         c->item = 0;
1713     }
1714 }
1715 
process_bin_get(conn * c)1716 static void process_bin_get(conn *c) {
1717     item *it;
1718 
1719     protocol_binary_response_get* rsp = (protocol_binary_response_get*)c->wbuf;
1720     char* key = binary_get_key(c);
1721     size_t nkey = c->binary_header.request.keylen;
1722 
1723     if (settings.verbose > 1) {
1724         char buffer[1024];
1725         if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1726                                     "GET", key, nkey) != -1) {
1727             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1728                                             buffer);
1729         }
1730     }
1731 
1732     ENGINE_ERROR_CODE ret = c->aiostat;
1733     c->aiostat = ENGINE_SUCCESS;
1734     if (ret == ENGINE_SUCCESS) {
1735         ret = settings.engine.v1->get(settings.engine.v0, c, &it, key, nkey,
1736                                       c->binary_header.request.vbucket);
1737     }
1738 
1739     uint16_t keylen;
1740     uint32_t bodylen;
1741     item_info info = { .nvalue = 1 };
1742 
1743     switch (ret) {
1744     case ENGINE_SUCCESS:
1745         if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1746             settings.engine.v1->release(settings.engine.v0, c, it);
1747             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1748                                             "%d: Failed to get item info\n",
1749                                             c->sfd);
1750             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1751             break;
1752         }
1753 
1754         keylen = 0;
1755         bodylen = sizeof(rsp->message.body) + info.nbytes;
1756 
1757         STATS_HIT(c, get, key, nkey);
1758 
1759         if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1760             bodylen += nkey;
1761             keylen = nkey;
1762         }
1763         add_bin_header(c, 0, sizeof(rsp->message.body), keylen, bodylen);
1764         rsp->message.header.response.cas = htonll(info.cas);
1765 
1766         // add the flags
1767         rsp->message.body.flags = info.flags;
1768         add_iov(c, &rsp->message.body, sizeof(rsp->message.body));
1769 
1770         if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1771             add_iov(c, info.key, nkey);
1772         }
1773 
1774         add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
1775         conn_set_state(c, conn_mwrite);
1776         /* Remember this item so we can garbage collect it later */
1777         c->item = it;
1778         break;
1779     case ENGINE_KEY_ENOENT:
1780         STATS_MISS(c, get, key, nkey);
1781 
1782         MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
1783 
1784         if (c->noreply) {
1785             conn_set_state(c, conn_new_cmd);
1786         } else {
1787             if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1788                 char *ofs = c->wbuf + sizeof(protocol_binary_response_header);
1789                 add_bin_header(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT,
1790                                0, nkey, nkey);
1791                 memcpy(ofs, key, nkey);
1792                 add_iov(c, ofs, nkey);
1793                 conn_set_state(c, conn_mwrite);
1794             } else {
1795                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1796             }
1797         }
1798         break;
1799     case ENGINE_EWOULDBLOCK:
1800         c->ewouldblock = true;
1801         break;
1802     case ENGINE_DISCONNECT:
1803         c->state = conn_closing;
1804         break;
1805     case ENGINE_TMPFAIL:
1806 	break;
1807     case ENGINE_ENOTSUP:
1808         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1809         break;
1810     case ENGINE_NOT_MY_VBUCKET:
1811         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1812         break;
1813     default:
1814         /* @todo add proper error handling! */
1815         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1816                                         "Unknown error code: %d\n", ret);
1817         abort();
1818     }
1819 
1820     if (settings.detail_enabled && ret != ENGINE_EWOULDBLOCK) {
1821         stats_prefix_record_get(key, nkey, ret == ENGINE_SUCCESS);
1822     }
1823 }
1824 
append_bin_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1825 static void append_bin_stats(const char *key, const uint16_t klen,
1826                              const char *val, const uint32_t vlen,
1827                              conn *c) {
1828     char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1829     uint32_t bodylen = klen + vlen;
1830     protocol_binary_response_header header = {
1831         .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
1832         .response.opcode = PROTOCOL_BINARY_CMD_STAT,
1833         .response.keylen = (uint16_t)htons(klen),
1834         .response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES,
1835         .response.bodylen = htonl(bodylen),
1836         .response.opaque = c->opaque
1837     };
1838 
1839     memcpy(buf, header.bytes, sizeof(header.response));
1840     buf += sizeof(header.response);
1841 
1842     if (klen > 0) {
1843         memcpy(buf, key, klen);
1844         buf += klen;
1845 
1846         if (vlen > 0) {
1847             memcpy(buf, val, vlen);
1848         }
1849     }
1850 
1851     c->dynamic_buffer.offset += sizeof(header.response) + bodylen;
1852 }
1853 
1854 /**
1855  * Append a key-value pair to the stats output buffer. This function assumes
1856  * that the output buffer is big enough (it will be if you call it through
1857  * append_stats)
1858  */
append_ascii_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1859 static void append_ascii_stats(const char *key, const uint16_t klen,
1860                                const char *val, const uint32_t vlen,
1861                                conn *c) {
1862     char *pos = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1863     uint32_t nbytes = 5; /* "END\r\n" or "STAT " */
1864 
1865     if (klen == 0 && vlen == 0) {
1866         memcpy(pos, "END\r\n", 5);
1867     } else {
1868         memcpy(pos, "STAT ", 5);
1869         memcpy(pos + nbytes, key, klen);
1870         nbytes += klen;
1871         if (vlen != 0) {
1872             pos[nbytes] = ' ';
1873             ++nbytes;
1874             memcpy(pos + nbytes, val, vlen);
1875             nbytes += vlen;
1876         }
1877         memcpy(pos + nbytes, "\r\n", 2);
1878         nbytes += 2;
1879     }
1880 
1881     c->dynamic_buffer.offset += nbytes;
1882 }
1883 
grow_dynamic_buffer(conn * c,size_t needed)1884 static bool grow_dynamic_buffer(conn *c, size_t needed) {
1885     size_t nsize = c->dynamic_buffer.size;
1886     size_t available = nsize - c->dynamic_buffer.offset;
1887     bool rv = true;
1888 
1889     /* Special case: No buffer -- need to allocate fresh */
1890     if (c->dynamic_buffer.buffer == NULL) {
1891         nsize = 1024;
1892         available = c->dynamic_buffer.size = c->dynamic_buffer.offset = 0;
1893     }
1894 
1895     while (needed > available) {
1896         assert(nsize > 0);
1897         nsize = nsize << 1;
1898         available = nsize - c->dynamic_buffer.offset;
1899     }
1900 
1901     if (nsize != c->dynamic_buffer.size) {
1902         char *ptr = realloc(c->dynamic_buffer.buffer, nsize);
1903         if (ptr) {
1904             c->dynamic_buffer.buffer = ptr;
1905             c->dynamic_buffer.size = nsize;
1906         } else {
1907             rv = false;
1908         }
1909     }
1910 
1911     return rv;
1912 }
1913 
append_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,const void * cookie)1914 static void append_stats(const char *key, const uint16_t klen,
1915                          const char *val, const uint32_t vlen,
1916                          const void *cookie)
1917 {
1918     /* value without a key is invalid */
1919     if (klen == 0 && vlen > 0) {
1920         return ;
1921     }
1922 
1923     conn *c = (conn*)cookie;
1924 
1925     if (c->protocol == binary_prot) {
1926         size_t needed = vlen + klen + sizeof(protocol_binary_response_header);
1927         if (!grow_dynamic_buffer(c, needed)) {
1928             return ;
1929         }
1930         append_bin_stats(key, klen, val, vlen, c);
1931     } else {
1932         size_t needed = vlen + klen + 10; // 10 == "STAT = \r\n"
1933         if (!grow_dynamic_buffer(c, needed)) {
1934             return ;
1935         }
1936         append_ascii_stats(key, klen, val, vlen, c);
1937     }
1938 
1939     assert(c->dynamic_buffer.offset <= c->dynamic_buffer.size);
1940 }
1941 
process_bin_stat(conn * c)1942 static void process_bin_stat(conn *c) {
1943     char *subcommand = binary_get_key(c);
1944     size_t nkey = c->binary_header.request.keylen;
1945 
1946     if (settings.verbose > 1) {
1947         char buffer[1024];
1948         if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1949                                     "STATS", subcommand, nkey) != -1) {
1950             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1951                                             buffer);
1952         }
1953     }
1954 
1955     ENGINE_ERROR_CODE ret = c->aiostat;
1956     c->aiostat = ENGINE_SUCCESS;
1957     c->ewouldblock = false;
1958 
1959     if (ret == ENGINE_SUCCESS) {
1960         if (nkey == 0) {
1961             /* request all statistics */
1962             ret = settings.engine.v1->get_stats(settings.engine.v0, c, NULL, 0, append_stats);
1963             if (ret == ENGINE_SUCCESS) {
1964                 server_stats(&append_stats, c, false);
1965             }
1966         } else if (strncmp(subcommand, "reset", 5) == 0) {
1967             stats_reset(c);
1968             settings.engine.v1->reset_stats(settings.engine.v0, c);
1969         } else if (strncmp(subcommand, "settings", 8) == 0) {
1970             process_stat_settings(&append_stats, c);
1971         } else if (strncmp(subcommand, "detail", 6) == 0) {
1972             char *subcmd_pos = subcommand + 6;
1973             if (settings.allow_detailed) {
1974                 if (strncmp(subcmd_pos, " dump", 5) == 0) {
1975                     int len;
1976                     char *dump_buf = stats_prefix_dump(&len);
1977                     if (dump_buf == NULL || len <= 0) {
1978                         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1979                         return ;
1980                     } else {
1981                         append_stats("detailed", strlen("detailed"), dump_buf, len, c);
1982                         free(dump_buf);
1983                     }
1984                 } else if (strncmp(subcmd_pos, " on", 3) == 0) {
1985                     settings.detail_enabled = 1;
1986                 } else if (strncmp(subcmd_pos, " off", 4) == 0) {
1987                     settings.detail_enabled = 0;
1988                 } else {
1989                     write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1990                     return;
1991                 }
1992             } else {
1993                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1994                 return;
1995             }
1996         } else if (strncmp(subcommand, "aggregate", 9) == 0) {
1997             server_stats(&append_stats, c, true);
1998         } else if (strncmp(subcommand, "topkeys", 7) == 0) {
1999             topkeys_t *tk = get_independent_stats(c)->topkeys;
2000             if (tk != NULL) {
2001                 topkeys_stats(tk, c, current_time, append_stats);
2002             } else {
2003                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2004                 return;
2005             }
2006         } else {
2007             ret = settings.engine.v1->get_stats(settings.engine.v0, c,
2008                                                 subcommand, nkey,
2009                                                 append_stats);
2010         }
2011     }
2012 
2013     switch (ret) {
2014     case ENGINE_SUCCESS:
2015         append_stats(NULL, 0, NULL, 0, c);
2016         write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2017         c->dynamic_buffer.buffer = NULL;
2018         break;
2019     case ENGINE_ENOMEM:
2020         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
2021         break;
2022     case ENGINE_TMPFAIL:
2023         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
2024         break;
2025     case ENGINE_KEY_ENOENT:
2026         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2027         break;
2028     case ENGINE_DISCONNECT:
2029         c->state = conn_closing;
2030         break;
2031     case ENGINE_ENOTSUP:
2032         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2033         break;
2034     case ENGINE_EWOULDBLOCK:
2035         c->ewouldblock = true;
2036         break;
2037     default:
2038         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2039     }
2040 }
2041 
bin_read_chunk(conn * c,enum bin_substates next_substate,uint32_t chunk)2042 static void bin_read_chunk(conn *c, enum bin_substates next_substate, uint32_t chunk) {
2043     assert(c);
2044     c->substate = next_substate;
2045     c->rlbytes = chunk;
2046 
2047     /* Ok... do we have room for everything in our buffer? */
2048     ptrdiff_t offset = c->rcurr + sizeof(protocol_binary_request_header) - c->rbuf;
2049     if (c->rlbytes > c->rsize - offset) {
2050         size_t nsize = c->rsize;
2051         size_t size = c->rlbytes + sizeof(protocol_binary_request_header);
2052 
2053         while (size > nsize) {
2054             nsize *= 2;
2055         }
2056 
2057         if (nsize != c->rsize) {
2058             if (settings.verbose > 1) {
2059                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2060                         "%d: Need to grow buffer from %lu to %lu\n",
2061                         c->sfd, (unsigned long)c->rsize, (unsigned long)nsize);
2062             }
2063             char *newm = realloc(c->rbuf, nsize);
2064             if (newm == NULL) {
2065                 if (settings.verbose) {
2066                     settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2067                             "%d: Failed to grow buffer.. closing connection\n",
2068                             c->sfd);
2069                 }
2070                 conn_set_state(c, conn_closing);
2071                 return;
2072             }
2073 
2074             c->rbuf= newm;
2075             /* rcurr should point to the same offset in the packet */
2076             c->rcurr = c->rbuf + offset - sizeof(protocol_binary_request_header);
2077             c->rsize = nsize;
2078         }
2079         if (c->rbuf != c->rcurr) {
2080             memmove(c->rbuf, c->rcurr, c->rbytes);
2081             c->rcurr = c->rbuf;
2082             if (settings.verbose > 1) {
2083                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2084                                                 "%d: Repack input buffer\n",
2085                                                 c->sfd);
2086             }
2087         }
2088     }
2089 
2090     /* preserve the header in the buffer.. */
2091     c->ritem = c->rcurr + sizeof(protocol_binary_request_header);
2092     conn_set_state(c, conn_nread);
2093 }
2094 
bin_read_key(conn * c,enum bin_substates next_substate,int extra)2095 static void bin_read_key(conn *c, enum bin_substates next_substate, int extra) {
2096     bin_read_chunk(c, next_substate, c->keylen + extra);
2097 }
2098 
2099 
2100 /* Just write an error message and disconnect the client */
handle_binary_protocol_error(conn * c)2101 static void handle_binary_protocol_error(conn *c) {
2102     write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2103     if (settings.verbose) {
2104         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2105                 "%d: Protocol error (opcode %02x), close connection\n",
2106                 c->sfd, c->binary_header.request.opcode);
2107     }
2108     c->write_and_go = conn_closing;
2109 }
2110 
init_sasl_conn(conn * c)2111 static void init_sasl_conn(conn *c) {
2112     assert(c);
2113     if (!c->sasl_conn) {
2114         int result=sasl_server_new("memcached",
2115                                    NULL, NULL, NULL, NULL,
2116                                    NULL, 0, &c->sasl_conn);
2117         if (result != SASL_OK) {
2118             if (settings.verbose) {
2119                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2120                          "%d: Failed to initialize SASL conn.\n",
2121                          c->sfd);
2122             }
2123             c->sasl_conn = NULL;
2124         }
2125     }
2126 }
2127 
get_auth_data(const void * cookie,auth_data_t * data)2128 static void get_auth_data(const void *cookie, auth_data_t *data) {
2129     conn *c = (conn*)cookie;
2130     if (c->sasl_conn) {
2131         sasl_getprop(c->sasl_conn, SASL_USERNAME, (void*)&data->username);
2132 #ifdef ENABLE_ISASL
2133         sasl_getprop(c->sasl_conn, ISASL_CONFIG, (void*)&data->config);
2134 #endif
2135     }
2136 }
2137 
2138 #ifdef SASL_ENABLED
bin_list_sasl_mechs(conn * c)2139 static void bin_list_sasl_mechs(conn *c) {
2140     init_sasl_conn(c);
2141     const char *result_string = NULL;
2142     unsigned int string_length = 0;
2143     int result=sasl_listmech(c->sasl_conn, NULL,
2144                              "",   /* What to prepend the string with */
2145                              " ",  /* What to separate mechanisms with */
2146                              "",   /* What to append to the string */
2147                              &result_string, &string_length,
2148                              NULL);
2149     if (result != SASL_OK) {
2150         /* Perhaps there's a better error for this... */
2151         if (settings.verbose) {
2152             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2153                      "%d: Failed to list SASL mechanisms.\n",
2154                      c->sfd);
2155         }
2156         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2157         return;
2158     }
2159     write_bin_response(c, (char*)result_string, 0, 0, string_length);
2160 }
2161 #endif
2162 
2163 struct sasl_tmp {
2164     int ksize;
2165     int vsize;
2166     char data[]; /* data + ksize == value */
2167 };
2168 
process_bin_sasl_auth(conn * c)2169 static void process_bin_sasl_auth(conn *c) {
2170     assert(c->binary_header.request.extlen == 0);
2171 
2172     int nkey = c->binary_header.request.keylen;
2173     int vlen = c->binary_header.request.bodylen - nkey;
2174 
2175     assert(vlen >= 0);
2176 
2177     if (nkey > MAX_SASL_MECH_LEN) {
2178         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, vlen);
2179         c->write_and_go = conn_swallow;
2180         return;
2181     }
2182 
2183     char *key = binary_get_key(c);
2184     assert(key);
2185 
2186     size_t buffer_size = sizeof(struct sasl_tmp) + nkey + vlen + 2;
2187     struct sasl_tmp *data = calloc(sizeof(struct sasl_tmp) + buffer_size, 1);
2188     if (!data) {
2189         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
2190         c->write_and_go = conn_swallow;
2191         return;
2192     }
2193 
2194     data->ksize = nkey;
2195     data->vsize = vlen;
2196     memcpy(data->data, key, nkey);
2197 
2198     c->item = data;
2199     c->ritem = data->data + nkey;
2200     c->rlbytes = vlen;
2201     conn_set_state(c, conn_nread);
2202     c->substate = bin_reading_sasl_auth_data;
2203 }
2204 
process_bin_complete_sasl_auth(conn * c)2205 static void process_bin_complete_sasl_auth(conn *c) {
2206     const char *out = NULL;
2207     unsigned int outlen = 0;
2208 
2209     assert(c->item);
2210     init_sasl_conn(c);
2211 
2212     int nkey = c->binary_header.request.keylen;
2213     int vlen = c->binary_header.request.bodylen - nkey;
2214 
2215     struct sasl_tmp *stmp = c->item;
2216     char mech[nkey+1];
2217     memcpy(mech, stmp->data, nkey);
2218     mech[nkey] = 0x00;
2219 
2220     if (settings.verbose) {
2221         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2222                 "%d: mech: ``%s'' with %d bytes of data\n", c->sfd, mech, vlen);
2223     }
2224 
2225     const char *challenge = vlen == 0 ? NULL : (stmp->data + nkey);
2226 
2227     int result=-1;
2228 
2229     switch (c->cmd) {
2230     case PROTOCOL_BINARY_CMD_SASL_AUTH:
2231         result = sasl_server_start(c->sasl_conn, mech,
2232                                    challenge, vlen,
2233                                    &out, &outlen);
2234         break;
2235     case PROTOCOL_BINARY_CMD_SASL_STEP:
2236         result = sasl_server_step(c->sasl_conn,
2237                                   challenge, vlen,
2238                                   &out, &outlen);
2239         break;
2240     default:
2241         assert(false); /* CMD should be one of the above */
2242         /* This code is pretty much impossible, but makes the compiler
2243            happier */
2244         if (settings.verbose) {
2245             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2246                     "%d: Unhandled command %d with challenge %s\n",
2247                     c->sfd, c->cmd, challenge);
2248         }
2249         break;
2250     }
2251 
2252     free(c->item);
2253     c->item = NULL;
2254     c->ritem = NULL;
2255 
2256     if (settings.verbose) {
2257         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2258                                         "%d: sasl result code:  %d\n",
2259                                         c->sfd, result);
2260     }
2261 
2262     switch(result) {
2263     case SASL_OK:
2264         write_bin_response(c, (void*)"Authenticated", 0, 0, strlen("Authenticated"));
2265         auth_data_t data;
2266         get_auth_data(c, &data);
2267         perform_callbacks(ON_AUTH, (const void*)&data, c);
2268         STATS_NOKEY(c, auth_cmds);
2269         break;
2270     case SASL_CONTINUE:
2271         add_bin_header(c, PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE, 0, 0, outlen);
2272         if(outlen > 0) {
2273             add_iov(c, out, outlen);
2274         }
2275         conn_set_state(c, conn_mwrite);
2276         c->write_and_go = conn_new_cmd;
2277         break;
2278     default:
2279         if (settings.verbose) {
2280             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2281                                             "%d: Unknown sasl response:  %d\n",
2282                                             c->sfd, result);
2283         }
2284         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2285         STATS_NOKEY2(c, auth_cmds, auth_errors);
2286     }
2287 }
2288 
authenticated(conn * c)2289 static bool authenticated(conn *c) {
2290     bool rv = false;
2291 
2292     switch (c->cmd) {
2293     case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: /* FALLTHROUGH */
2294     case PROTOCOL_BINARY_CMD_SASL_AUTH:       /* FALLTHROUGH */
2295     case PROTOCOL_BINARY_CMD_SASL_STEP:       /* FALLTHROUGH */
2296     case PROTOCOL_BINARY_CMD_VERSION:         /* FALLTHROUGH */
2297         rv = true;
2298         break;
2299     default:
2300         if (c->sasl_conn) {
2301             const void *uname = NULL;
2302             sasl_getprop(c->sasl_conn, SASL_USERNAME, &uname);
2303             rv = uname != NULL;
2304         }
2305     }
2306 
2307     if (settings.verbose > 1) {
2308         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2309                 "%d: authenticated() in cmd 0x%02x is %s\n",
2310                 c->sfd, c->cmd, rv ? "true" : "false");
2311     }
2312 
2313     return rv;
2314 }
2315 
binary_response_handler(const void * key,uint16_t keylen,const void * ext,uint8_t extlen,const void * body,uint32_t bodylen,uint8_t datatype,uint16_t status,uint64_t cas,const void * cookie)2316 static bool binary_response_handler(const void *key, uint16_t keylen,
2317                                     const void *ext, uint8_t extlen,
2318                                     const void *body, uint32_t bodylen,
2319                                     uint8_t datatype, uint16_t status,
2320                                     uint64_t cas, const void *cookie)
2321 {
2322     conn *c = (conn*)cookie;
2323     /* Look at append_bin_stats */
2324     size_t needed = keylen + extlen + bodylen + sizeof(protocol_binary_response_header);
2325     if (!grow_dynamic_buffer(c, needed)) {
2326         if (settings.verbose > 0) {
2327             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2328                     "<%d ERROR: Failed to allocate memory for response\n",
2329                     c->sfd);
2330         }
2331         return false;
2332     }
2333 
2334     char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
2335     protocol_binary_response_header header = {
2336         .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
2337         .response.opcode = c->binary_header.request.opcode,
2338         .response.keylen = (uint16_t)htons(keylen),
2339         .response.extlen = extlen,
2340         .response.datatype = datatype,
2341         .response.status = (uint16_t)htons(status),
2342         .response.bodylen = htonl(bodylen + keylen + extlen),
2343         .response.opaque = c->opaque,
2344         .response.cas = htonll(cas),
2345     };
2346 
2347     memcpy(buf, header.bytes, sizeof(header.response));
2348     buf += sizeof(header.response);
2349 
2350     if (extlen > 0) {
2351         memcpy(buf, ext, extlen);
2352         buf += extlen;
2353     }
2354 
2355     if (keylen > 0) {
2356         memcpy(buf, key, keylen);
2357         buf += keylen;
2358     }
2359 
2360     if (bodylen > 0) {
2361         memcpy(buf, body, bodylen);
2362     }
2363 
2364     c->dynamic_buffer.offset += needed;
2365 
2366     return true;
2367 }
2368 
2369 /**
2370  * Tap stats (these are only used by the tap thread, so they don't need
2371  * to be in the threadlocal struct right now...
2372  */
2373 struct tap_cmd_stats {
2374     uint64_t connect;
2375     uint64_t mutation;
2376     uint64_t checkpoint_start;
2377     uint64_t checkpoint_end;
2378     uint64_t delete;
2379     uint64_t flush;
2380     uint64_t opaque;
2381     uint64_t vbucket_set;
2382 };
2383 
2384 struct tap_stats {
2385     pthread_mutex_t mutex;
2386     struct tap_cmd_stats sent;
2387     struct tap_cmd_stats received;
2388 } tap_stats = { .mutex = PTHREAD_MUTEX_INITIALIZER };
2389 
ship_tap_log(conn * c)2390 static void ship_tap_log(conn *c) {
2391     assert(c->thread->type == TAP);
2392     c->msgcurr = 0;
2393     c->msgused = 0;
2394     c->iovused = 0;
2395     if (add_msghdr(c) != 0) {
2396         if (settings.verbose) {
2397             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2398                                             "%d: Failed to create output headers. Shutting down tap connection\n", c->sfd);
2399         }
2400         conn_set_state(c, conn_closing);
2401         return ;
2402     }
2403     /* @todo add check for buffer overflow of c->wbuf) */
2404     c->wcurr = c->wbuf;
2405 
2406     bool more_data = true;
2407     bool send_data = false;
2408     bool disconnect = false;
2409 
2410     item *it;
2411     uint32_t bodylen;
2412     int ii = 0;
2413     c->icurr = c->ilist;
2414     do {
2415         /* @todo fixme! */
2416         if (ii++ == 10) {
2417             break;
2418         }
2419 
2420         void *engine;
2421         uint16_t nengine;
2422         uint8_t ttl;
2423         uint16_t tap_flags;
2424         uint32_t seqno;
2425         uint16_t vbucket;
2426 
2427         tap_event_t event = c->tap_iterator(settings.engine.v0, c, &it,
2428                                             &engine, &nengine, &ttl,
2429                                             &tap_flags, &seqno, &vbucket);
2430         union {
2431             protocol_binary_request_tap_mutation mutation;
2432             protocol_binary_request_tap_delete delete;
2433             protocol_binary_request_tap_flush flush;
2434             protocol_binary_request_tap_opaque opaque;
2435             protocol_binary_request_noop noop;
2436         } msg = {
2437             .mutation.message.header.request.magic = (uint8_t)PROTOCOL_BINARY_REQ,
2438         };
2439 
2440         msg.opaque.message.header.request.opaque = htonl(seqno);
2441         msg.opaque.message.body.tap.enginespecific_length = htons(nengine);
2442         msg.opaque.message.body.tap.ttl = ttl;
2443         msg.opaque.message.body.tap.flags = htons(tap_flags);
2444         msg.opaque.message.header.request.extlen = 8;
2445         msg.opaque.message.header.request.vbucket = htons(vbucket);
2446         item_info info = { .nvalue = 1 };
2447 
2448         switch (event) {
2449         case TAP_NOOP :
2450             send_data = true;
2451             msg.noop.message.header.request.opcode = PROTOCOL_BINARY_CMD_NOOP;
2452             msg.noop.message.header.request.extlen = 0;
2453             msg.noop.message.header.request.bodylen = htonl(0);
2454             memcpy(c->wcurr, msg.noop.bytes, sizeof(msg.noop.bytes));
2455             add_iov(c, c->wcurr, sizeof(msg.noop.bytes));
2456             c->wcurr += sizeof(msg.noop.bytes);
2457             c->wbytes += sizeof(msg.noop.bytes);
2458             break;
2459         case TAP_PAUSE :
2460             more_data = false;
2461             break;
2462         case TAP_CHECKPOINT_START:
2463         case TAP_CHECKPOINT_END:
2464         case TAP_MUTATION:
2465             if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2466                 settings.engine.v1->release(settings.engine.v0, c, it);
2467                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2468                                                 "%d: Failed to get item info\n", c->sfd);
2469                 break;
2470             }
2471             send_data = true;
2472             c->ilist[c->ileft++] = it;
2473 
2474             if (event == TAP_CHECKPOINT_START) {
2475                 msg.mutation.message.header.request.opcode =
2476                     PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START;
2477                 pthread_mutex_lock(&tap_stats.mutex);
2478                 tap_stats.sent.checkpoint_start++;
2479                 pthread_mutex_unlock(&tap_stats.mutex);
2480             } else if (event == TAP_CHECKPOINT_END) {
2481                 msg.mutation.message.header.request.opcode =
2482                     PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END;
2483                 pthread_mutex_lock(&tap_stats.mutex);
2484                 tap_stats.sent.checkpoint_end++;
2485                 pthread_mutex_unlock(&tap_stats.mutex);
2486             } else if (event == TAP_MUTATION) {
2487                 msg.mutation.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_MUTATION;
2488                 pthread_mutex_lock(&tap_stats.mutex);
2489                 tap_stats.sent.mutation++;
2490                 pthread_mutex_unlock(&tap_stats.mutex);
2491             }
2492 
2493             msg.mutation.message.header.request.cas = htonll(info.cas);
2494             msg.mutation.message.header.request.keylen = htons(info.nkey);
2495             msg.mutation.message.header.request.extlen = 16;
2496 
2497             bodylen = 16 + info.nkey + nengine;
2498             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2499                 bodylen += info.nbytes;
2500             }
2501             msg.mutation.message.header.request.bodylen = htonl(bodylen);
2502             msg.mutation.message.body.item.flags = htonl(info.flags);
2503             msg.mutation.message.body.item.expiration = htonl(info.exptime);
2504             msg.mutation.message.body.tap.enginespecific_length = htons(nengine);
2505             msg.mutation.message.body.tap.ttl = ttl;
2506             msg.mutation.message.body.tap.flags = htons(tap_flags);
2507             memcpy(c->wcurr, msg.mutation.bytes, sizeof(msg.mutation.bytes));
2508 
2509             add_iov(c, c->wcurr, sizeof(msg.mutation.bytes));
2510             c->wcurr += sizeof(msg.mutation.bytes);
2511             c->wbytes += sizeof(msg.mutation.bytes);
2512 
2513             if (nengine > 0) {
2514                 memcpy(c->wcurr, engine, nengine);
2515                 add_iov(c, c->wcurr, nengine);
2516                 c->wcurr += nengine;
2517                 c->wbytes += nengine;
2518             }
2519 
2520             add_iov(c, info.key, info.nkey);
2521             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2522                 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2523             }
2524 
2525             break;
2526         case TAP_DELETION:
2527             /* This is a delete */
2528             if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2529                 settings.engine.v1->release(settings.engine.v0, c, it);
2530                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2531                                                 "%d: Failed to get item info\n", c->sfd);
2532                 break;
2533             }
2534             send_data = true;
2535             c->ilist[c->ileft++] = it;
2536             msg.delete.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_DELETE;
2537             msg.delete.message.header.request.cas = htonll(info.cas);
2538             msg.delete.message.header.request.keylen = htons(info.nkey);
2539 
2540             bodylen = 8 + info.nkey + nengine;
2541             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2542                 bodylen += info.nbytes;
2543             }
2544             msg.delete.message.header.request.bodylen = htonl(bodylen);
2545 
2546             memcpy(c->wcurr, msg.delete.bytes, sizeof(msg.delete.bytes));
2547             add_iov(c, c->wcurr, sizeof(msg.delete.bytes));
2548             c->wcurr += sizeof(msg.delete.bytes);
2549             c->wbytes += sizeof(msg.delete.bytes);
2550 
2551             if (nengine > 0) {
2552                 memcpy(c->wcurr, engine, nengine);
2553                 add_iov(c, c->wcurr, nengine);
2554                 c->wcurr += nengine;
2555                 c->wbytes += nengine;
2556             }
2557 
2558             add_iov(c, info.key, info.nkey);
2559             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2560                 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2561             }
2562 
2563             pthread_mutex_lock(&tap_stats.mutex);
2564             tap_stats.sent.delete++;
2565             pthread_mutex_unlock(&tap_stats.mutex);
2566             break;
2567 
2568         case TAP_DISCONNECT:
2569             disconnect = true;
2570             more_data = false;
2571             break;
2572         case TAP_VBUCKET_SET:
2573         case TAP_FLUSH:
2574         case TAP_OPAQUE:
2575             send_data = true;
2576 
2577             if (event == TAP_OPAQUE) {
2578                 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_OPAQUE;
2579                 pthread_mutex_lock(&tap_stats.mutex);
2580                 tap_stats.sent.opaque++;
2581                 pthread_mutex_unlock(&tap_stats.mutex);
2582 
2583             } else if (event == TAP_FLUSH) {
2584                 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_FLUSH;
2585                 pthread_mutex_lock(&tap_stats.mutex);
2586                 tap_stats.sent.flush++;
2587                 pthread_mutex_unlock(&tap_stats.mutex);
2588             } else if (event == TAP_VBUCKET_SET) {
2589                 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET;
2590                 msg.flush.message.body.tap.flags = htons(tap_flags);
2591                 pthread_mutex_lock(&tap_stats.mutex);
2592                 tap_stats.sent.vbucket_set++;
2593                 pthread_mutex_unlock(&tap_stats.mutex);
2594             }
2595 
2596             msg.flush.message.header.request.bodylen = htonl(8 + nengine);
2597             memcpy(c->wcurr, msg.flush.bytes, sizeof(msg.flush.bytes));
2598             add_iov(c, c->wcurr, sizeof(msg.flush.bytes));
2599             c->wcurr += sizeof(msg.flush.bytes);
2600             c->wbytes += sizeof(msg.flush.bytes);
2601             if (nengine > 0) {
2602                 memcpy(c->wcurr, engine, nengine);
2603                 add_iov(c, c->wcurr, nengine);
2604                 c->wcurr += nengine;
2605                 c->wbytes += nengine;
2606             }
2607             break;
2608         default:
2609             abort();
2610         }
2611     } while (more_data);
2612 
2613     c->ewouldblock = false;
2614     if (send_data) {
2615         conn_set_state(c, conn_mwrite);
2616         if (disconnect) {
2617             c->write_and_go = conn_closing;
2618         } else {
2619             c->write_and_go = conn_ship_log;
2620         }
2621     } else {
2622         if (disconnect) {
2623             conn_set_state(c, conn_closing);
2624         } else {
2625             /* No more items to ship to the slave at this time.. suspend.. */
2626             if (settings.verbose > 1) {
2627                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2628                                                 "%d: No more items in tap log.. waiting\n",
2629                                                 c->sfd);
2630             }
2631             c->ewouldblock = true;
2632         }
2633     }
2634 }
2635 
process_bin_unknown_packet(conn * c)2636 static void process_bin_unknown_packet(conn *c) {
2637     void *packet = c->rcurr - (c->binary_header.request.bodylen +
2638                                sizeof(c->binary_header));
2639 
2640     ENGINE_ERROR_CODE ret = c->aiostat;
2641     c->aiostat = ENGINE_SUCCESS;
2642     c->ewouldblock = false;
2643 
2644     if (ret == ENGINE_SUCCESS) {
2645         ret = settings.engine.v1->unknown_command(settings.engine.v0, c, packet,
2646                                                   binary_response_handler);
2647     }
2648 
2649     if (ret == ENGINE_SUCCESS) {
2650         if (c->dynamic_buffer.buffer != NULL) {
2651             write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2652             c->dynamic_buffer.buffer = NULL;
2653         } else {
2654             conn_set_state(c, conn_new_cmd);
2655         }
2656     } else if (ret == ENGINE_ENOTSUP) {
2657         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, 0);
2658     } else if (ret == ENGINE_EWOULDBLOCK) {
2659         c->ewouldblock = true;
2660     } else {
2661         /* FATAL ERROR, shut down connection */
2662         conn_set_state(c, conn_closing);
2663     }
2664 }
2665 
process_bin_tap_connect(conn * c)2666 static void process_bin_tap_connect(conn *c) {
2667     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2668                                 sizeof(c->binary_header)));
2669     protocol_binary_request_tap_connect *req = (void*)packet;
2670     const char *key = packet + sizeof(req->bytes);
2671     const char *data = key + c->binary_header.request.keylen;
2672     uint32_t flags = 0;
2673     size_t ndata = c->binary_header.request.bodylen -
2674         c->binary_header.request.extlen -
2675         c->binary_header.request.keylen;
2676 
2677     if (c->binary_header.request.extlen == 4) {
2678         flags = ntohl(req->message.body.flags);
2679 
2680         if (flags & TAP_CONNECT_FLAG_BACKFILL) {
2681             /* the userdata has to be at least 8 bytes! */
2682             if (ndata < 8) {
2683                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2684                                                 "%d: ERROR: Invalid tap connect message\n",
2685                                                 c->sfd);
2686                 conn_set_state(c, conn_closing);
2687                 return ;
2688             }
2689         }
2690     } else {
2691         data -= 4;
2692         key -= 4;
2693     }
2694 
2695     if (settings.verbose && c->binary_header.request.keylen > 0) {
2696         char buffer[1024];
2697         int len = c->binary_header.request.keylen;
2698         if (len >= sizeof(buffer)) {
2699             len = sizeof(buffer) - 1;
2700         }
2701         memcpy(buffer, key, len);
2702         buffer[len] = '\0';
2703         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2704                                         "%d: Trying to connect with named tap connection: <%s>\n",
2705                                         c->sfd, buffer);
2706     }
2707 
2708     TAP_ITERATOR iterator = settings.engine.v1->get_tap_iterator(
2709         settings.engine.v0, c, key, c->binary_header.request.keylen,
2710         flags, data, ndata);
2711 
2712     if (iterator == NULL) {
2713         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2714                                         "%d: FATAL: The engine does not support tap\n",
2715                                         c->sfd);
2716         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2717         c->write_and_go = conn_closing;
2718     } else {
2719         c->tap_iterator = iterator;
2720         c->which = EV_WRITE;
2721         conn_set_state(c, conn_ship_log);
2722     }
2723 }
2724 
process_bin_tap_packet(tap_event_t event,conn * c)2725 static void process_bin_tap_packet(tap_event_t event, conn *c) {
2726     assert(c != NULL);
2727     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2728                                 sizeof(c->binary_header)));
2729     protocol_binary_request_tap_no_extras *tap = (void*)packet;
2730     uint16_t nengine = ntohs(tap->message.body.tap.enginespecific_length);
2731     uint16_t tap_flags = ntohs(tap->message.body.tap.flags);
2732     uint32_t seqno = ntohl(tap->message.header.request.opaque);
2733     uint8_t ttl = tap->message.body.tap.ttl;
2734     assert(ttl > 0);
2735     char *engine_specific = packet + sizeof(tap->bytes);
2736     char *key = engine_specific + nengine;
2737     uint16_t nkey = c->binary_header.request.keylen;
2738     char *data = key + nkey;
2739     uint32_t flags = 0;
2740     uint32_t exptime = 0;
2741     uint32_t ndata = c->binary_header.request.bodylen - nengine - nkey - 8;
2742 
2743     if (event == TAP_MUTATION || event == TAP_CHECKPOINT_START ||
2744         event == TAP_CHECKPOINT_END) {
2745         protocol_binary_request_tap_mutation *mutation = (void*)tap;
2746         flags = ntohl(mutation->message.body.item.flags);
2747         exptime = ntohl(mutation->message.body.item.expiration);
2748         key += 8;
2749         data += 8;
2750         ndata -= 8;
2751     }
2752 
2753     ENGINE_ERROR_CODE ret = c->aiostat;
2754     if (ret == ENGINE_SUCCESS) {
2755         ret = settings.engine.v1->tap_notify(settings.engine.v0, c,
2756                                              engine_specific, nengine,
2757                                              ttl - 1, tap_flags,
2758                                              event, seqno,
2759                                              key, nkey,
2760                                              flags, exptime,
2761                                              ntohll(tap->message.header.request.cas),
2762                                              data, ndata,
2763                                              c->binary_header.request.vbucket);
2764     }
2765 
2766     switch (ret) {
2767     case ENGINE_DISCONNECT:
2768         conn_set_state(c, conn_closing);
2769         break;
2770     case ENGINE_EWOULDBLOCK:
2771         c->ewouldblock = true;
2772         break;
2773     default:
2774         if ((tap_flags & TAP_FLAG_ACK) ||
2775             (ret != ENGINE_SUCCESS && c->tap_nack_mode))
2776         {
2777             write_bin_packet(c, engine_error_2_protocol_error(ret), 0);
2778         } else {
2779             conn_set_state(c, conn_new_cmd);
2780         }
2781     }
2782 }
2783 
process_bin_tap_ack(conn * c)2784 static void process_bin_tap_ack(conn *c) {
2785     assert(c != NULL);
2786     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2787                                 sizeof(c->binary_header)));
2788     protocol_binary_response_no_extras *rsp = (void*)packet;
2789     uint32_t seqno = ntohl(rsp->message.header.response.opaque);
2790     uint16_t status = ntohs(rsp->message.header.response.status);
2791     char *key = packet + sizeof(rsp->bytes);
2792 
2793     ENGINE_ERROR_CODE ret = ENGINE_DISCONNECT;
2794     if (settings.engine.v1->tap_notify != NULL) {
2795         ret = settings.engine.v1->tap_notify(settings.engine.v0, c, NULL, 0, 0, status,
2796                                              TAP_ACK, seqno, key,
2797                                              c->binary_header.request.keylen, 0, 0,
2798                                              0, NULL, 0, 0);
2799     }
2800 
2801     if (ret == ENGINE_DISCONNECT) {
2802         conn_set_state(c, conn_closing);
2803     } else {
2804         conn_set_state(c, conn_ship_log);
2805     }
2806 }
2807 
2808 /**
2809  * We received a noop response.. just ignore it
2810  */
process_bin_noop_response(conn * c)2811 static void process_bin_noop_response(conn *c) {
2812     assert(c != NULL);
2813     conn_set_state(c, conn_new_cmd);
2814 }
2815 
process_bin_verbosity(conn * c)2816 static void process_bin_verbosity(conn *c) {
2817     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2818                                 sizeof(c->binary_header)));
2819     protocol_binary_request_verbosity *req = (void*)packet;
2820     uint32_t level = (uint32_t)ntohl(req->message.body.level);
2821     if (level > MAX_VERBOSITY_LEVEL) {
2822         level = MAX_VERBOSITY_LEVEL;
2823     }
2824     settings.verbose = (int)level;
2825     perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
2826     write_bin_response(c, NULL, 0, 0, 0);
2827 }
2828 
process_bin_packet(conn * c)2829 static void process_bin_packet(conn *c) {
2830     /* @todo this should be an array of funciton pointers and call through */
2831     switch (c->binary_header.request.opcode) {
2832     case PROTOCOL_BINARY_CMD_TAP_CONNECT:
2833         pthread_mutex_lock(&tap_stats.mutex);
2834         tap_stats.received.connect++;
2835         pthread_mutex_unlock(&tap_stats.mutex);
2836         conn_set_state(c, conn_add_tap_client);
2837         break;
2838     case PROTOCOL_BINARY_CMD_TAP_MUTATION:
2839         pthread_mutex_lock(&tap_stats.mutex);
2840         tap_stats.received.mutation++;
2841         pthread_mutex_unlock(&tap_stats.mutex);
2842         process_bin_tap_packet(TAP_MUTATION, c);
2843         break;
2844     case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
2845         pthread_mutex_lock(&tap_stats.mutex);
2846         tap_stats.received.checkpoint_start++;
2847         pthread_mutex_unlock(&tap_stats.mutex);
2848         process_bin_tap_packet(TAP_CHECKPOINT_START, c);
2849         break;
2850     case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
2851         pthread_mutex_lock(&tap_stats.mutex);
2852         tap_stats.received.checkpoint_end++;
2853         pthread_mutex_unlock(&tap_stats.mutex);
2854         process_bin_tap_packet(TAP_CHECKPOINT_END, c);
2855         break;
2856     case PROTOCOL_BINARY_CMD_TAP_DELETE:
2857         pthread_mutex_lock(&tap_stats.mutex);
2858         tap_stats.received.delete++;
2859         pthread_mutex_unlock(&tap_stats.mutex);
2860         process_bin_tap_packet(TAP_DELETION, c);
2861         break;
2862     case PROTOCOL_BINARY_CMD_TAP_FLUSH:
2863         pthread_mutex_lock(&tap_stats.mutex);
2864         tap_stats.received.flush++;
2865         pthread_mutex_unlock(&tap_stats.mutex);
2866         process_bin_tap_packet(TAP_FLUSH, c);
2867         break;
2868     case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
2869         pthread_mutex_lock(&tap_stats.mutex);
2870         tap_stats.received.opaque++;
2871         pthread_mutex_unlock(&tap_stats.mutex);
2872         process_bin_tap_packet(TAP_OPAQUE, c);
2873         break;
2874     case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
2875         pthread_mutex_lock(&tap_stats.mutex);
2876         tap_stats.received.vbucket_set++;
2877         pthread_mutex_unlock(&tap_stats.mutex);
2878         process_bin_tap_packet(TAP_VBUCKET_SET, c);
2879         break;
2880     case PROTOCOL_BINARY_CMD_VERBOSITY:
2881         process_bin_verbosity(c);
2882         break;
2883     default:
2884         process_bin_unknown_packet(c);
2885     }
2886 }
2887 
2888 
2889 
2890 typedef void (*RESPONSE_HANDLER)(conn*);
2891 /**
2892  * A map between the response packets op-code and the function to handle
2893  * the response message.
2894  */
2895 static RESPONSE_HANDLER response_handlers[256] = {
2896     [PROTOCOL_BINARY_CMD_NOOP] = process_bin_noop_response,
2897     [PROTOCOL_BINARY_CMD_TAP_MUTATION] = process_bin_tap_ack,
2898     [PROTOCOL_BINARY_CMD_TAP_DELETE] = process_bin_tap_ack,
2899     [PROTOCOL_BINARY_CMD_TAP_FLUSH] = process_bin_tap_ack,
2900     [PROTOCOL_BINARY_CMD_TAP_OPAQUE] = process_bin_tap_ack,
2901     [PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET] = process_bin_tap_ack,
2902     [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START] = process_bin_tap_ack,
2903     [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END] = process_bin_tap_ack
2904 };
2905 
dispatch_bin_command(conn * c)2906 static void dispatch_bin_command(conn *c) {
2907     int protocol_error = 0;
2908 
2909     uint8_t extlen = c->binary_header.request.extlen;
2910     uint16_t keylen = c->binary_header.request.keylen;
2911     uint32_t bodylen = c->binary_header.request.bodylen;
2912 
2913     if (keylen > bodylen || keylen + extlen > bodylen) {
2914         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, 0);
2915         c->write_and_go = conn_closing;
2916         return;
2917     }
2918 
2919     if (settings.require_sasl && !authenticated(c)) {
2920         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2921         c->write_and_go = conn_closing;
2922         return;
2923     }
2924 
2925     MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
2926     c->noreply = true;
2927 
2928     /* binprot supports 16bit keys, but internals are still 8bit */
2929     if (keylen > KEY_MAX_LENGTH) {
2930         handle_binary_protocol_error(c);
2931         return;
2932     }
2933 
2934     switch (c->cmd) {
2935     case PROTOCOL_BINARY_CMD_SETQ:
2936         c->cmd = PROTOCOL_BINARY_CMD_SET;
2937         break;
2938     case PROTOCOL_BINARY_CMD_ADDQ:
2939         c->cmd = PROTOCOL_BINARY_CMD_ADD;
2940         break;
2941     case PROTOCOL_BINARY_CMD_REPLACEQ:
2942         c->cmd = PROTOCOL_BINARY_CMD_REPLACE;
2943         break;
2944     case PROTOCOL_BINARY_CMD_DELETEQ:
2945         c->cmd = PROTOCOL_BINARY_CMD_DELETE;
2946         break;
2947     case PROTOCOL_BINARY_CMD_INCREMENTQ:
2948         c->cmd = PROTOCOL_BINARY_CMD_INCREMENT;
2949         break;
2950     case PROTOCOL_BINARY_CMD_DECREMENTQ:
2951         c->cmd = PROTOCOL_BINARY_CMD_DECREMENT;
2952         break;
2953     case PROTOCOL_BINARY_CMD_QUITQ:
2954         c->cmd = PROTOCOL_BINARY_CMD_QUIT;
2955         break;
2956     case PROTOCOL_BINARY_CMD_FLUSHQ:
2957         c->cmd = PROTOCOL_BINARY_CMD_FLUSH;
2958         break;
2959     case PROTOCOL_BINARY_CMD_APPENDQ:
2960         c->cmd = PROTOCOL_BINARY_CMD_APPEND;
2961         break;
2962     case PROTOCOL_BINARY_CMD_PREPENDQ:
2963         c->cmd = PROTOCOL_BINARY_CMD_PREPEND;
2964         break;
2965     case PROTOCOL_BINARY_CMD_GETQ:
2966         c->cmd = PROTOCOL_BINARY_CMD_GET;
2967         break;
2968     case PROTOCOL_BINARY_CMD_GETKQ:
2969         c->cmd = PROTOCOL_BINARY_CMD_GETK;
2970         break;
2971     default:
2972         c->noreply = false;
2973     }
2974 
2975     switch (c->cmd) {
2976         case PROTOCOL_BINARY_CMD_VERSION:
2977             if (extlen == 0 && keylen == 0 && bodylen == 0) {
2978                 write_bin_response(c, (void*)VERSION, 0, 0, strlen(VERSION));
2979             } else {
2980                 protocol_error = 1;
2981             }
2982             break;
2983         case PROTOCOL_BINARY_CMD_FLUSH:
2984             if (keylen == 0 && bodylen == extlen && (extlen == 0 || extlen == 4)) {
2985                 bin_read_key(c, bin_read_flush_exptime, extlen);
2986             } else {
2987                 protocol_error = 1;
2988             }
2989             break;
2990         case PROTOCOL_BINARY_CMD_NOOP:
2991             if (extlen == 0 && keylen == 0 && bodylen == 0) {
2992                 write_bin_response(c, NULL, 0, 0, 0);
2993             } else {
2994                 protocol_error = 1;
2995             }
2996             break;
2997         case PROTOCOL_BINARY_CMD_SET: /* FALLTHROUGH */
2998         case PROTOCOL_BINARY_CMD_ADD: /* FALLTHROUGH */
2999         case PROTOCOL_BINARY_CMD_REPLACE:
3000             if (extlen == 8 && keylen != 0 && bodylen >= (keylen + 8)) {
3001                 bin_read_key(c, bin_reading_set_header, 8);
3002             } else {
3003                 protocol_error = 1;
3004             }
3005             break;
3006         case PROTOCOL_BINARY_CMD_GETQ:  /* FALLTHROUGH */
3007         case PROTOCOL_BINARY_CMD_GET:   /* FALLTHROUGH */
3008         case PROTOCOL_BINARY_CMD_GETKQ: /* FALLTHROUGH */
3009         case PROTOCOL_BINARY_CMD_GETK:
3010             if (extlen == 0 && bodylen == keylen && keylen > 0) {
3011                 bin_read_key(c, bin_reading_get_key, 0);
3012             } else {
3013                 protocol_error = 1;
3014             }
3015             break;
3016         case PROTOCOL_BINARY_CMD_DELETE:
3017             if (keylen > 0 && extlen == 0 && bodylen == keylen) {
3018                 bin_read_key(c, bin_reading_del_header, extlen);
3019             } else {
3020                 protocol_error = 1;
3021             }
3022             break;
3023         case PROTOCOL_BINARY_CMD_INCREMENT:
3024         case PROTOCOL_BINARY_CMD_DECREMENT:
3025             if (keylen > 0 && extlen == 20 && bodylen == (keylen + extlen)) {
3026                 bin_read_key(c, bin_reading_incr_header, 20);
3027             } else {
3028                 protocol_error = 1;
3029             }
3030             break;
3031         case PROTOCOL_BINARY_CMD_APPEND:
3032         case PROTOCOL_BINARY_CMD_PREPEND:
3033             if (keylen > 0 && extlen == 0) {
3034                 bin_read_key(c, bin_reading_set_header, 0);
3035             } else {
3036                 protocol_error = 1;
3037             }
3038             break;
3039         case PROTOCOL_BINARY_CMD_STAT:
3040             if (extlen == 0) {
3041                 bin_read_key(c, bin_reading_stat, 0);
3042             } else {
3043                 protocol_error = 1;
3044             }
3045             break;
3046         case PROTOCOL_BINARY_CMD_QUIT:
3047             if (keylen == 0 && extlen == 0 && bodylen == 0) {
3048                 write_bin_response(c, NULL, 0, 0, 0);
3049                 c->write_and_go = conn_closing;
3050                 if (c->noreply) {
3051                     conn_set_state(c, conn_closing);
3052                 }
3053             } else {
3054                 protocol_error = 1;
3055             }
3056             break;
3057        case PROTOCOL_BINARY_CMD_TAP_CONNECT:
3058             if (settings.engine.v1->get_tap_iterator == NULL) {
3059                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3060             } else {
3061                 bin_read_chunk(c, bin_reading_packet,
3062                                c->binary_header.request.bodylen);
3063             }
3064             break;
3065        case PROTOCOL_BINARY_CMD_TAP_MUTATION:
3066        case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
3067        case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
3068        case PROTOCOL_BINARY_CMD_TAP_DELETE:
3069        case PROTOCOL_BINARY_CMD_TAP_FLUSH:
3070        case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
3071        case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
3072             if (settings.engine.v1->tap_notify == NULL) {
3073                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3074             } else {
3075                 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3076             }
3077             break;
3078 #ifdef SASL_ENABLED
3079         case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS:
3080             if (extlen == 0 && keylen == 0 && bodylen == 0) {
3081                 bin_list_sasl_mechs(c);
3082             } else {
3083                 protocol_error = 1;
3084             }
3085             break;
3086         case PROTOCOL_BINARY_CMD_SASL_AUTH:
3087         case PROTOCOL_BINARY_CMD_SASL_STEP:
3088             if (extlen == 0 && keylen != 0) {
3089                 bin_read_key(c, bin_reading_sasl_auth, 0);
3090             } else {
3091                 protocol_error = 1;
3092             }
3093             break;
3094 #endif
3095         case PROTOCOL_BINARY_CMD_VERBOSITY:
3096             if (extlen == 4 && keylen == 0 && bodylen == 4) {
3097                 bin_read_chunk(c, bin_reading_packet,
3098                                c->binary_header.request.bodylen);
3099             } else {
3100                 protocol_error = 1;
3101             }
3102             break;
3103         default:
3104             if (settings.engine.v1->unknown_command == NULL) {
3105                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND,
3106                                 bodylen);
3107             } else {
3108                 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3109             }
3110     }
3111 
3112     if (protocol_error)
3113         handle_binary_protocol_error(c);
3114 }
3115 
process_bin_update(conn * c)3116 static void process_bin_update(conn *c) {
3117     char *key;
3118     uint16_t nkey;
3119     uint32_t vlen;
3120     item *it;
3121     protocol_binary_request_set* req = binary_get_request(c);
3122 
3123     assert(c != NULL);
3124 
3125     key = binary_get_key(c);
3126     nkey = c->binary_header.request.keylen;
3127 
3128     /* fix byteorder in the request */
3129     req->message.body.flags = req->message.body.flags;
3130     rel_time_t expiration = ntohl(req->message.body.expiration);
3131 
3132     vlen = c->binary_header.request.bodylen - (nkey + c->binary_header.request.extlen);
3133 
3134     if (settings.verbose > 1) {
3135         char buffer[1024];
3136         const char *prefix;
3137         if (c->cmd == PROTOCOL_BINARY_CMD_ADD) {
3138             prefix = "ADD";
3139         } else if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3140             prefix = "SET";
3141         } else {
3142             prefix = "REPLACE";
3143         }
3144 
3145         size_t nw;
3146         nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3147                                      prefix, key, nkey);
3148 
3149         if (nw != -1) {
3150             if (snprintf(buffer + nw, sizeof(buffer) - nw,
3151                          " Value len is %d\n", vlen)) {
3152                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
3153                                                 buffer);
3154             }
3155         }
3156     }
3157 
3158     if (settings.detail_enabled) {
3159         stats_prefix_record_set(key, nkey);
3160     }
3161 
3162     ENGINE_ERROR_CODE ret = c->aiostat;
3163     c->aiostat = ENGINE_SUCCESS;
3164     c->ewouldblock = false;
3165     item_info info = { .nvalue = 1 };
3166 
3167     if (ret == ENGINE_SUCCESS) {
3168         ret = settings.engine.v1->allocate(settings.engine.v0, c,
3169                                            &it, key, nkey,
3170                                            vlen,
3171                                            req->message.body.flags,
3172                                            expiration);
3173         if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3174                                                                         c, it, &info)) {
3175             settings.engine.v1->release(settings.engine.v0, c, it);
3176             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3177             return;
3178         }
3179     }
3180 
3181     switch (ret) {
3182     case ENGINE_SUCCESS:
3183         item_set_cas(c, it, c->binary_header.request.cas);
3184 
3185         switch (c->cmd) {
3186         case PROTOCOL_BINARY_CMD_ADD:
3187             c->store_op = OPERATION_ADD;
3188             break;
3189         case PROTOCOL_BINARY_CMD_SET:
3190             c->store_op = OPERATION_SET;
3191             break;
3192         case PROTOCOL_BINARY_CMD_REPLACE:
3193             c->store_op = OPERATION_REPLACE;
3194             break;
3195         default:
3196             assert(0);
3197         }
3198 
3199         if (c->binary_header.request.cas != 0) {
3200             c->store_op = OPERATION_CAS;
3201         }
3202 
3203         c->item = it;
3204         c->ritem = info.value[0].iov_base;
3205         c->rlbytes = vlen;
3206         conn_set_state(c, conn_nread);
3207         c->substate = bin_read_set_value;
3208         break;
3209     case ENGINE_EWOULDBLOCK:
3210         c->ewouldblock = true;
3211         break;
3212     case ENGINE_DISCONNECT:
3213         c->state = conn_closing;
3214         break;
3215     default:
3216         if (ret == ENGINE_E2BIG) {
3217             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3218         } else {
3219             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3220         }
3221 
3222         /*
3223          * Avoid stale data persisting in cache because we failed alloc.
3224          * Unacceptable for SET (but only if cas matches).
3225          * Anywhere else too?
3226          */
3227         if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3228             /* @todo fix this for the ASYNC interface! */
3229             settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3230                                        ntohll(req->message.header.request.cas),
3231                                        c->binary_header.request.vbucket);
3232         }
3233 
3234         /* swallow the data line */
3235         c->write_and_go = conn_swallow;
3236     }
3237 }
3238 
process_bin_append_prepend(conn * c)3239 static void process_bin_append_prepend(conn *c) {
3240     char *key;
3241     int nkey;
3242     int vlen;
3243     item *it;
3244 
3245     assert(c != NULL);
3246 
3247     key = binary_get_key(c);
3248     nkey = c->binary_header.request.keylen;
3249     vlen = c->binary_header.request.bodylen - nkey;
3250 
3251     assert(vlen >= 0);
3252 
3253     if (settings.verbose > 1) {
3254         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3255                                         "Value len is %d\n", vlen);
3256     }
3257 
3258     if (settings.detail_enabled) {
3259         stats_prefix_record_set(key, nkey);
3260     }
3261 
3262     ENGINE_ERROR_CODE ret = c->aiostat;
3263     c->aiostat = ENGINE_SUCCESS;
3264     c->ewouldblock = false;
3265     item_info info = { .nvalue = 1 };
3266 
3267     if (ret == ENGINE_SUCCESS) {
3268         ret = settings.engine.v1->allocate(settings.engine.v0, c,
3269                                            &it, key, nkey,
3270                                            vlen, 0, 0);
3271         if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3272                                                                         c, it, &info)) {
3273             settings.engine.v1->release(settings.engine.v0, c, it);
3274             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3275             return;
3276         }
3277     }
3278 
3279     switch (ret) {
3280     case ENGINE_SUCCESS:
3281         item_set_cas(c, it, c->binary_header.request.cas);
3282 
3283         switch (c->cmd) {
3284         case PROTOCOL_BINARY_CMD_APPEND:
3285             c->store_op = OPERATION_APPEND;
3286             break;
3287         case PROTOCOL_BINARY_CMD_PREPEND:
3288             c->store_op = OPERATION_PREPEND;
3289             break;
3290         default:
3291             assert(0);
3292         }
3293 
3294         c->item = it;
3295         c->ritem = info.value[0].iov_base;
3296         c->rlbytes = vlen;
3297         conn_set_state(c, conn_nread);
3298         c->substate = bin_read_set_value;
3299         break;
3300     case ENGINE_EWOULDBLOCK:
3301         c->ewouldblock = true;
3302         break;
3303     case ENGINE_DISCONNECT:
3304         c->state = conn_closing;
3305         break;
3306     default:
3307         if (ret == ENGINE_E2BIG) {
3308             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3309         } else {
3310             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3311         }
3312         /* swallow the data line */
3313         c->write_and_go = conn_swallow;
3314     }
3315 }
3316 
process_bin_flush(conn * c)3317 static void process_bin_flush(conn *c) {
3318     time_t exptime = 0;
3319     protocol_binary_request_flush* req = binary_get_request(c);
3320 
3321     if (c->binary_header.request.extlen == sizeof(req->message.body)) {
3322         exptime = ntohl(req->message.body.expiration);
3323     }
3324 
3325     if (settings.verbose > 1) {
3326         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3327                                         "%d: flush %ld", c->sfd,
3328                                         (long)exptime);
3329     }
3330 
3331     ENGINE_ERROR_CODE ret;
3332     ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
3333 
3334     if (ret == ENGINE_SUCCESS) {
3335         write_bin_response(c, NULL, 0, 0, 0);
3336     } else if (ret == ENGINE_ENOTSUP) {
3337         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
3338     } else {
3339         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3340     }
3341     STATS_NOKEY(c, cmd_flush);
3342 }
3343 
process_bin_delete(conn * c)3344 static void process_bin_delete(conn *c) {
3345     protocol_binary_request_delete* req = binary_get_request(c);
3346 
3347     char* key = binary_get_key(c);
3348     size_t nkey = c->binary_header.request.keylen;
3349 
3350     assert(c != NULL);
3351 
3352     if (settings.verbose > 1) {
3353         char buffer[1024];
3354         if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3355                                     "DELETE", key, nkey) != -1) {
3356             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
3357                                             buffer);
3358         }
3359     }
3360 
3361     ENGINE_ERROR_CODE ret = c->aiostat;
3362     c->aiostat = ENGINE_SUCCESS;
3363     c->ewouldblock = false;
3364 
3365     if (ret == ENGINE_SUCCESS) {
3366         if (settings.detail_enabled) {
3367             stats_prefix_record_delete(key, nkey);
3368         }
3369         ret = settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3370                                          ntohll(req->message.header.request.cas),
3371                                          c->binary_header.request.vbucket);
3372     }
3373 
3374     /* For some reason the SLAB_INCR tries to access this... */
3375     item_info info = { .nvalue = 1 };
3376     switch (ret) {
3377     case ENGINE_SUCCESS:
3378         write_bin_response(c, NULL, 0, 0, 0);
3379         SLAB_INCR(c, delete_hits, key, nkey);
3380         break;
3381     case ENGINE_KEY_EEXISTS:
3382         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
3383         break;
3384     case ENGINE_KEY_ENOENT:
3385         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
3386         STATS_INCR(c, delete_misses, key, nkey);
3387         break;
3388     case ENGINE_NOT_MY_VBUCKET:
3389         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
3390         break;
3391     case ENGINE_EWOULDBLOCK:
3392         c->ewouldblock = true;
3393         break;
3394     default:
3395         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3396     }
3397 }
3398 
complete_nread_binary(conn * c)3399 static void complete_nread_binary(conn *c) {
3400     assert(c != NULL);
3401     assert(c->cmd >= 0);
3402 
3403     switch(c->substate) {
3404     case bin_reading_set_header:
3405         if (c->cmd == PROTOCOL_BINARY_CMD_APPEND ||
3406                 c->cmd == PROTOCOL_BINARY_CMD_PREPEND) {
3407             process_bin_append_prepend(c);
3408         } else {
3409             process_bin_update(c);
3410         }
3411         break;
3412     case bin_read_set_value:
3413         complete_update_bin(c);
3414         break;
3415     case bin_reading_get_key:
3416         process_bin_get(c);
3417         break;
3418     case bin_reading_stat:
3419         process_bin_stat(c);
3420         break;
3421     case bin_reading_del_header:
3422         process_bin_delete(c);
3423         break;
3424     case bin_reading_incr_header:
3425         complete_incr_bin(c);
3426         break;
3427     case bin_read_flush_exptime:
3428         process_bin_flush(c);
3429         break;
3430     case bin_reading_sasl_auth:
3431         process_bin_sasl_auth(c);
3432         break;
3433     case bin_reading_sasl_auth_data:
3434         process_bin_complete_sasl_auth(c);
3435         break;
3436     case bin_reading_packet:
3437         if (c->binary_header.request.magic == PROTOCOL_BINARY_RES) {
3438             RESPONSE_HANDLER handler;
3439             handler = response_handlers[c->binary_header.request.opcode];
3440             if (handler) {
3441                 handler(c);
3442             } else {
3443                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3444                        "%d: ERROR: Unsupported response packet received: %u\n",
3445                         c->sfd, (unsigned int)c->binary_header.request.opcode);
3446                 conn_set_state(c, conn_closing);
3447             }
3448         } else {
3449             process_bin_packet(c);
3450         }
3451         break;
3452     default:
3453         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
3454                 "Not handling substate %d\n", c->substate);
3455         abort();
3456     }
3457 }
3458 
reset_cmd_handler(conn * c)3459 static void reset_cmd_handler(conn *c) {
3460     c->sbytes = 0;
3461     c->ascii_cmd = NULL;
3462     c->cmd = -1;
3463     c->substate = bin_no_state;
3464     if(c->item != NULL) {
3465         settings.engine.v1->release(settings.engine.v0, c, c->item);
3466         c->item = NULL;
3467     }
3468     conn_shrink(c);
3469     if (c->rbytes > 0) {
3470         conn_set_state(c, conn_parse_cmd);
3471     } else {
3472         conn_set_state(c, conn_waiting);
3473     }
3474 }
3475 
ascii_response_handler(const void * cookie,int nbytes,const char * dta)3476 static ENGINE_ERROR_CODE ascii_response_handler(const void *cookie,
3477                                                 int nbytes,
3478                                                 const char *dta)
3479 {
3480     conn *c = (conn*)cookie;
3481     if (!grow_dynamic_buffer(c, nbytes)) {
3482         if (settings.verbose > 0) {
3483             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3484                     "<%d ERROR: Failed to allocate memory for response\n",
3485                     c->sfd);
3486         }
3487         return ENGINE_ENOMEM;
3488     }
3489 
3490     char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
3491     memcpy(buf, dta, nbytes);
3492     c->dynamic_buffer.offset += nbytes;
3493 
3494     return ENGINE_SUCCESS;
3495 }
3496 
complete_nread_ascii(conn * c)3497 static void complete_nread_ascii(conn *c) {
3498     if (c->ascii_cmd != NULL) {
3499         c->ewouldblock = false;
3500         switch (c->ascii_cmd->execute(c->ascii_cmd->cookie, c, 0, NULL,
3501                                       ascii_response_handler)) {
3502         case ENGINE_SUCCESS:
3503             if (c->dynamic_buffer.buffer != NULL) {
3504                 write_and_free(c, c->dynamic_buffer.buffer,
3505                                c->dynamic_buffer.offset);
3506                 c->dynamic_buffer.buffer = NULL;
3507             } else {
3508                 conn_set_state(c, conn_new_cmd);
3509             }
3510             break;
3511         case ENGINE_EWOULDBLOCK:
3512             c->ewouldblock = true;
3513             break;
3514         case ENGINE_DISCONNECT:
3515         default:
3516             conn_set_state(c, conn_closing);
3517         }
3518     } else {
3519         complete_update_ascii(c);
3520     }
3521 }
3522 
complete_nread(conn * c)3523 static void complete_nread(conn *c) {
3524     assert(c != NULL);
3525     assert(c->protocol == ascii_prot
3526            || c->protocol == binary_prot);
3527 
3528     if (c->protocol == ascii_prot) {
3529         complete_nread_ascii(c);
3530     } else if (c->protocol == binary_prot) {
3531         complete_nread_binary(c);
3532     }
3533 }
3534 
3535 #define COMMAND_TOKEN 0
3536 #define SUBCOMMAND_TOKEN 1
3537 #define KEY_TOKEN 1
3538 
3539 #define MAX_TOKENS 30
3540 
3541 /*
3542  * Tokenize the command string by replacing whitespace with '\0' and update
3543  * the token array tokens with pointer to start of each token and length.
3544  * Returns total number of tokens.  The last valid token is the terminal
3545  * token (value points to the first unprocessed character of the string and
3546  * length zero).
3547  *
3548  * Usage example:
3549  *
3550  *  while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) {
3551  *      for(int ix = 0; tokens[ix].length != 0; ix++) {
3552  *          ...
3553  *      }
3554  *      ncommand = tokens[ix].value - command;
3555  *      command  = tokens[ix].value;
3556  *   }
3557  */
tokenize_command(char * command,token_t * tokens,const size_t max_tokens)3558 static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) {
3559     char *s, *e;
3560     size_t ntokens = 0;
3561 
3562     assert(command != NULL && tokens != NULL && max_tokens > 1);
3563 
3564     for (s = e = command; ntokens < max_tokens - 1; ++e) {
3565         if (*e == ' ') {
3566             if (s != e) {
3567                 tokens[ntokens].value = s;
3568                 tokens[ntokens].length = e - s;
3569                 ntokens++;
3570                 *e = '\0';
3571             }
3572             s = e + 1;
3573         }
3574         else if (*e == '\0') {
3575             if (s != e) {
3576                 tokens[ntokens].value = s;
3577                 tokens[ntokens].length = e - s;
3578                 ntokens++;
3579             }
3580 
3581             break; /* string end */
3582         }
3583     }
3584 
3585     /*
3586      * If we scanned the whole string, the terminal value pointer is null,
3587      * otherwise it is the first unprocessed character.
3588      */
3589     tokens[ntokens].value =  *e == '\0' ? NULL : e;
3590     tokens[ntokens].length = 0;
3591     ntokens++;
3592 
3593     return ntokens;
3594 }
3595 
3596 #ifdef INNODB_MEMCACHED
detokenize(token_t * tokens,size_t ntokens,char ** out,int * nbytes)3597 static void detokenize(token_t *tokens, size_t ntokens, char **out, int *nbytes)
3598 #else
3599 static void detokenize(token_t *tokens, int ntokens, char **out, int *nbytes)
3600 #endif
3601 {
3602     int i;
3603     char *buf, *p;
3604     size_t nb = ntokens; // account for spaces, which is ntokens-1, plus the null
3605 
3606     for (i = 0; i < ntokens; ++i) {
3607         nb += tokens[i].length;
3608     }
3609 
3610     buf = malloc(nb * sizeof(char));
3611     if (buf != NULL) {
3612         p = buf;
3613         for (i = 0; i < ntokens; ++i) {
3614             memcpy(p, tokens[i].value, tokens[i].length);
3615             p += tokens[i].length;
3616             *p = ' ';
3617             p++;
3618         }
3619         buf[nb - 1] = '\0';
3620         *nbytes = nb - 1;
3621         *out = buf;
3622     }
3623 }
3624 
3625 
3626 /* set up a connection to write a buffer then free it, used for stats */
write_and_free(conn * c,char * buf,int bytes)3627 static void write_and_free(conn *c, char *buf, int bytes) {
3628     if (buf) {
3629         c->write_and_free = buf;
3630         c->wcurr = buf;
3631         c->wbytes = bytes;
3632         conn_set_state(c, conn_write);
3633         c->write_and_go = conn_new_cmd;
3634     } else {
3635         out_string(c, "SERVER_ERROR out of memory writing stats");
3636     }
3637 }
3638 
set_noreply_maybe(conn * c,token_t * tokens,size_t ntokens)3639 static inline bool set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens)
3640 {
3641     int noreply_index = ntokens - 2;
3642 
3643     /*
3644       NOTE: this function is not the first place where we are going to
3645       send the reply.  We could send it instead from process_command()
3646       if the request line has wrong number of tokens.  However parsing
3647       malformed line for "noreply" option is not reliable anyway, so
3648       it can't be helped.
3649     */
3650     if (tokens[noreply_index].value
3651         && strcmp(tokens[noreply_index].value, "noreply") == 0) {
3652         c->noreply = true;
3653     }
3654     return c->noreply;
3655 }
3656 
append_stat(const char * name,ADD_STAT add_stats,conn * c,const char * fmt,...)3657 void append_stat(const char *name, ADD_STAT add_stats, conn *c,
3658                  const char *fmt, ...) {
3659     char val_str[STAT_VAL_LEN];
3660     int vlen;
3661     va_list ap;
3662 
3663     assert(name);
3664     assert(add_stats);
3665     assert(c);
3666     assert(fmt);
3667 
3668     va_start(ap, fmt);
3669     vlen = vsnprintf(val_str, sizeof(val_str) - 1, fmt, ap);
3670     va_end(ap);
3671 
3672     add_stats(name, strlen(name), val_str, vlen, c);
3673 }
3674 
process_stats_detail(conn * c,const char * command)3675 inline static void process_stats_detail(conn *c, const char *command) {
3676     assert(c != NULL);
3677 
3678     if (settings.allow_detailed) {
3679         if (strcmp(command, "on") == 0) {
3680             settings.detail_enabled = 1;
3681             out_string(c, "OK");
3682         }
3683         else if (strcmp(command, "off") == 0) {
3684             settings.detail_enabled = 0;
3685             out_string(c, "OK");
3686         }
3687         else if (strcmp(command, "dump") == 0) {
3688             int len;
3689             char *stats = stats_prefix_dump(&len);
3690             write_and_free(c, stats, len);
3691         }
3692         else {
3693             out_string(c, "CLIENT_ERROR usage: stats detail on|off|dump");
3694         }
3695     }
3696     else {
3697         out_string(c, "CLIENT_ERROR detailed stats disabled");
3698     }
3699 }
3700 
aggregate_callback(void * in,void * out)3701 static void aggregate_callback(void *in, void *out) {
3702     struct thread_stats *out_thread_stats = out;
3703     struct independent_stats *in_independent_stats = in;
3704     threadlocal_stats_aggregate(in_independent_stats->thread_stats,
3705                                 out_thread_stats);
3706 }
3707 
3708 /* return server specific stats only */
server_stats(ADD_STAT add_stats,conn * c,bool aggregate)3709 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate) {
3710     pid_t pid = getpid();
3711     rel_time_t now = current_time;
3712 
3713     struct thread_stats thread_stats;
3714     threadlocal_stats_clear(&thread_stats);
3715 
3716     if (aggregate && settings.engine.v1->aggregate_stats != NULL) {
3717         settings.engine.v1->aggregate_stats(settings.engine.v0,
3718                                             (const void *)c,
3719                                             aggregate_callback,
3720                                             &thread_stats);
3721     } else {
3722         threadlocal_stats_aggregate(get_independent_stats(c)->thread_stats,
3723                                     &thread_stats);
3724     }
3725 
3726     struct slab_stats slab_stats;
3727     slab_stats_aggregate(&thread_stats, &slab_stats);
3728 
3729 #ifndef __WIN32__
3730     struct rusage usage;
3731     getrusage(RUSAGE_SELF, &usage);
3732 #endif
3733 
3734     STATS_LOCK();
3735 
3736     APPEND_STAT("pid", "%lu", (long)pid);
3737     APPEND_STAT("uptime", "%u", now);
3738     APPEND_STAT("time", "%ld", now + (long)process_started);
3739     APPEND_STAT("version", "%s", VERSION);
3740     APPEND_STAT("libevent", "%s", event_get_version());
3741     APPEND_STAT("pointer_size", "%d", (int)(8 * sizeof(void *)));
3742 
3743 #ifndef __WIN32__
3744     append_stat("rusage_user", add_stats, c, "%ld.%06ld",
3745                 (long)usage.ru_utime.tv_sec,
3746                 (long)usage.ru_utime.tv_usec);
3747     append_stat("rusage_system", add_stats, c, "%ld.%06ld",
3748                 (long)usage.ru_stime.tv_sec,
3749                 (long)usage.ru_stime.tv_usec);
3750 #endif
3751 
3752     APPEND_STAT("daemon_connections", "%u", stats.daemon_conns);
3753     APPEND_STAT("curr_connections", "%u", stats.curr_conns);
3754     APPEND_STAT("total_connections", "%u", stats.total_conns);
3755     APPEND_STAT("connection_structures", "%u", stats.conn_structs);
3756     APPEND_STAT("cmd_get", "%"PRIu64, thread_stats.cmd_get);
3757     APPEND_STAT("cmd_set", "%"PRIu64, slab_stats.cmd_set);
3758     APPEND_STAT("cmd_flush", "%"PRIu64, thread_stats.cmd_flush);
3759     APPEND_STAT("auth_cmds", "%"PRIu64, thread_stats.auth_cmds);
3760     APPEND_STAT("auth_errors", "%"PRIu64, thread_stats.auth_errors);
3761     APPEND_STAT("get_hits", "%"PRIu64, slab_stats.get_hits);
3762     APPEND_STAT("get_misses", "%"PRIu64, thread_stats.get_misses);
3763     APPEND_STAT("delete_misses", "%"PRIu64, thread_stats.delete_misses);
3764     APPEND_STAT("delete_hits", "%"PRIu64, slab_stats.delete_hits);
3765     APPEND_STAT("incr_misses", "%"PRIu64, thread_stats.incr_misses);
3766     APPEND_STAT("incr_hits", "%"PRIu64, thread_stats.incr_hits);
3767     APPEND_STAT("decr_misses", "%"PRIu64, thread_stats.decr_misses);
3768     APPEND_STAT("decr_hits", "%"PRIu64, thread_stats.decr_hits);
3769     APPEND_STAT("cas_misses", "%"PRIu64, thread_stats.cas_misses);
3770     APPEND_STAT("cas_hits", "%"PRIu64, slab_stats.cas_hits);
3771     APPEND_STAT("cas_badval", "%"PRIu64, slab_stats.cas_badval);
3772     APPEND_STAT("bytes_read", "%"PRIu64, thread_stats.bytes_read);
3773     APPEND_STAT("bytes_written", "%"PRIu64, thread_stats.bytes_written);
3774     APPEND_STAT("limit_maxbytes", "%"PRIu64, settings.maxbytes);
3775     APPEND_STAT("accepting_conns", "%u",  is_listen_disabled() ? 0 : 1);
3776     APPEND_STAT("listen_disabled_num", "%"PRIu64, get_listen_disabled_num());
3777     APPEND_STAT("rejected_conns", "%" PRIu64, (unsigned long long)stats.rejected_conns);
3778     APPEND_STAT("threads", "%d", settings.num_threads);
3779     APPEND_STAT("conn_yields", "%" PRIu64, (unsigned long long)thread_stats.conn_yields);
3780     STATS_UNLOCK();
3781 
3782     /*
3783      * Add tap stats (only if non-zero)
3784      */
3785     struct tap_stats ts;
3786     pthread_mutex_lock(&tap_stats.mutex);
3787     ts = tap_stats;
3788     pthread_mutex_unlock(&tap_stats.mutex);
3789 
3790     if (ts.sent.connect) {
3791         APPEND_STAT("tap_connect_sent", "%"PRIu64, ts.sent.connect);
3792     }
3793     if (ts.sent.mutation) {
3794         APPEND_STAT("tap_mutation_sent", "%"PRIu64, ts.sent.mutation);
3795     }
3796     if (ts.sent.checkpoint_start) {
3797         APPEND_STAT("tap_checkpoint_start_sent", "%"PRIu64, ts.sent.checkpoint_start);
3798     }
3799     if (ts.sent.checkpoint_end) {
3800         APPEND_STAT("tap_checkpoint_end_sent", "%"PRIu64, ts.sent.checkpoint_end);
3801     }
3802     if (ts.sent.delete) {
3803         APPEND_STAT("tap_delete_sent", "%"PRIu64, ts.sent.delete);
3804     }
3805     if (ts.sent.flush) {
3806         APPEND_STAT("tap_flush_sent", "%"PRIu64, ts.sent.flush);
3807     }
3808     if (ts.sent.opaque) {
3809         APPEND_STAT("tap_opaque_sent", "%"PRIu64, ts.sent.opaque);
3810     }
3811     if (ts.sent.vbucket_set) {
3812         APPEND_STAT("tap_vbucket_set_sent", "%"PRIu64,
3813                     ts.sent.vbucket_set);
3814     }
3815     if (ts.received.connect) {
3816         APPEND_STAT("tap_connect_received", "%"PRIu64, ts.received.connect);
3817     }
3818     if (ts.received.mutation) {
3819         APPEND_STAT("tap_mutation_received", "%"PRIu64, ts.received.mutation);
3820     }
3821     if (ts.received.checkpoint_start) {
3822         APPEND_STAT("tap_checkpoint_start_received", "%"PRIu64, ts.received.checkpoint_start);
3823     }
3824     if (ts.received.checkpoint_end) {
3825         APPEND_STAT("tap_checkpoint_end_received", "%"PRIu64, ts.received.checkpoint_end);
3826     }
3827     if (ts.received.delete) {
3828         APPEND_STAT("tap_delete_received", "%"PRIu64, ts.received.delete);
3829     }
3830     if (ts.received.flush) {
3831         APPEND_STAT("tap_flush_received", "%"PRIu64, ts.received.flush);
3832     }
3833     if (ts.received.opaque) {
3834         APPEND_STAT("tap_opaque_received", "%"PRIu64, ts.received.opaque);
3835     }
3836     if (ts.received.vbucket_set) {
3837         APPEND_STAT("tap_vbucket_set_received", "%"PRIu64,
3838                     ts.received.vbucket_set);
3839     }
3840 }
3841 
process_stat_settings(ADD_STAT add_stats,void * c)3842 static void process_stat_settings(ADD_STAT add_stats, void *c) {
3843     assert(add_stats);
3844     APPEND_STAT("maxbytes", "%u", (unsigned int)settings.maxbytes);
3845     APPEND_STAT("maxconns", "%d", settings.maxconns);
3846     APPEND_STAT("tcpport", "%d", settings.port);
3847     APPEND_STAT("udpport", "%d", settings.udpport);
3848     APPEND_STAT("inter", "%s", settings.inter ? settings.inter : "NULL");
3849     APPEND_STAT("verbosity", "%d", settings.verbose);
3850     APPEND_STAT("oldest", "%lu", (unsigned long)settings.oldest_live);
3851     APPEND_STAT("evictions", "%s", settings.evict_to_free ? "on" : "off");
3852     APPEND_STAT("domain_socket", "%s",
3853                 settings.socketpath ? settings.socketpath : "NULL");
3854     APPEND_STAT("umask", "%o", settings.access);
3855     APPEND_STAT("growth_factor", "%.2f", settings.factor);
3856     APPEND_STAT("chunk_size", "%d", settings.chunk_size);
3857     APPEND_STAT("num_threads", "%d", settings.num_threads);
3858     APPEND_STAT("num_threads_per_udp", "%d", settings.num_threads_per_udp);
3859     APPEND_STAT("stat_key_prefix", "%c", settings.prefix_delimiter);
3860     APPEND_STAT("detail_enabled", "%s",
3861                 settings.detail_enabled ? "yes" : "no");
3862     APPEND_STAT("allow_detailed", "%s",
3863                 settings.allow_detailed ? "yes" : "no");
3864     APPEND_STAT("reqs_per_event", "%d", settings.reqs_per_event);
3865     APPEND_STAT("reqs_per_tap_event", "%d", settings.reqs_per_tap_event);
3866     APPEND_STAT("cas_enabled", "%s", settings.use_cas ? "yes" : "no");
3867     APPEND_STAT("tcp_backlog", "%d", settings.backlog);
3868     APPEND_STAT("binding_protocol", "%s",
3869                 prot_text(settings.binding_protocol));
3870 #ifdef SASL_ENABLED
3871     APPEND_STAT("auth_enabled_sasl", "%s", "yes");
3872 #else
3873     APPEND_STAT("auth_enabled_sasl", "%s", "no");
3874 #endif
3875 
3876 #ifdef ENABLE_ISASL
3877     APPEND_STAT("auth_sasl_engine", "%s", "isasl");
3878 #elif defined(ENABLE_SASL)
3879     APPEND_STAT("auth_sasl_engine", "%s", "cyrus");
3880 #else
3881     APPEND_STAT("auth_sasl_engine", "%s", "none");
3882 #endif
3883     APPEND_STAT("auth_required_sasl", "%s", settings.require_sasl ? "yes" : "no");
3884     APPEND_STAT("item_size_max", "%d", settings.item_size_max);
3885     APPEND_STAT("topkeys", "%d", settings.topkeys);
3886 
3887     for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
3888          ptr != NULL;
3889          ptr = ptr->next) {
3890         APPEND_STAT("extension", "%s", ptr->get_name());
3891     }
3892 
3893     APPEND_STAT("logger", "%s", settings.extensions.logger->get_name());
3894 
3895     for (EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
3896          ptr != NULL;
3897          ptr = ptr->next) {
3898         APPEND_STAT("ascii_extension", "%s", ptr->get_name(ptr->cookie));
3899     }
3900 }
3901 
process_stat(conn * c,token_t * tokens,const size_t ntokens)3902 static char *process_stat(conn *c, token_t *tokens, const size_t ntokens) {
3903     const char *subcommand = tokens[SUBCOMMAND_TOKEN].value;
3904     c->dynamic_buffer.offset = 0;
3905 
3906     if (ntokens == 2) {
3907         ENGINE_ERROR_CODE ret = c->aiostat;
3908         c->aiostat = ENGINE_SUCCESS;
3909         c->ewouldblock = false;
3910         if (ret == ENGINE_SUCCESS) {
3911             server_stats(&append_stats, c, false);
3912             ret = settings.engine.v1->get_stats(settings.engine.v0, c,
3913                                                 NULL, 0, &append_stats);
3914             if (ret == ENGINE_EWOULDBLOCK) {
3915                 c->ewouldblock = true;
3916                 return c->rcurr + 5;
3917             }
3918         }
3919     } else if (strcmp(subcommand, "reset") == 0) {
3920         stats_reset(c);
3921         out_string(c, "RESET");
3922         return NULL;
3923     } else if (strcmp(subcommand, "detail") == 0) {
3924         /* NOTE: how to tackle detail with binary? */
3925         if (ntokens < 4) {
3926             process_stats_detail(c, "");  /* outputs the error message */
3927         } else {
3928             process_stats_detail(c, tokens[2].value);
3929         }
3930         /* Output already generated */
3931         return NULL;
3932     } else if (strcmp(subcommand, "settings") == 0) {
3933         process_stat_settings(&append_stats, c);
3934     } else if (strcmp(subcommand, "cachedump") == 0) {
3935         char *buf = NULL;
3936         unsigned int bytes = 0, id, limit = 0;
3937 
3938         if (ntokens < 5) {
3939             out_string(c, "CLIENT_ERROR bad command line");
3940             return NULL;
3941         }
3942 
3943         if (!safe_strtoul(tokens[2].value, &id) ||
3944             !safe_strtoul(tokens[3].value, &limit)) {
3945             out_string(c, "CLIENT_ERROR bad command line format");
3946             return NULL;
3947         }
3948 
3949         if (id >= POWER_LARGEST) {
3950             out_string(c, "CLIENT_ERROR Illegal slab id");
3951             return NULL;
3952         }
3953 
3954 #ifdef FUTURE
3955         buf = item_cachedump(id, limit, &bytes);
3956 #endif
3957         write_and_free(c, buf, bytes);
3958         return NULL;
3959     } else if (strcmp(subcommand, "aggregate") == 0) {
3960         server_stats(&append_stats, c, true);
3961     } else if (strcmp(subcommand, "topkeys") == 0) {
3962         topkeys_t *tk = get_independent_stats(c)->topkeys;
3963         if (tk != NULL) {
3964             topkeys_stats(tk, c, current_time, append_stats);
3965         } else {
3966             out_string(c, "ERROR");
3967             return NULL;
3968         }
3969     } else {
3970         /* getting here means that the subcommand is either engine specific or
3971            is invalid. query the engine and see. */
3972         ENGINE_ERROR_CODE ret = c->aiostat;
3973         c->aiostat = ENGINE_SUCCESS;
3974         c->ewouldblock = false;
3975         if (ret == ENGINE_SUCCESS) {
3976             char *buf = NULL;
3977             int nb = -1;
3978             detokenize(&tokens[1], ntokens - 2, &buf, &nb);
3979             ret = settings.engine.v1->get_stats(settings.engine.v0, c, buf,
3980                                                 nb, append_stats);
3981             free(buf);
3982         }
3983 
3984         switch (ret) {
3985         case ENGINE_SUCCESS:
3986             append_stats(NULL, 0, NULL, 0, c);
3987             write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
3988             c->dynamic_buffer.buffer = NULL;
3989             break;
3990         case ENGINE_ENOMEM:
3991             out_string(c, "SERVER_ERROR out of memory writing stats");
3992             break;
3993         case ENGINE_DISCONNECT:
3994             c->state = conn_closing;
3995             break;
3996         case ENGINE_ENOTSUP:
3997             out_string(c, "SERVER_ERROR not supported");
3998             break;
3999         case ENGINE_EWOULDBLOCK:
4000             c->ewouldblock = true;
4001             return tokens[SUBCOMMAND_TOKEN].value;
4002         default:
4003             out_string(c, "ERROR");
4004             break;
4005         }
4006 
4007         return NULL;
4008     }
4009 
4010     /* append terminator and start the transfer */
4011     append_stats(NULL, 0, NULL, 0, c);
4012 
4013     if (c->dynamic_buffer.buffer == NULL) {
4014         out_string(c, "SERVER_ERROR out of memory writing stats");
4015     } else {
4016         write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
4017         c->dynamic_buffer.buffer = NULL;
4018     }
4019 
4020     return NULL;
4021 }
4022 
4023 /**
4024  * Get a suffix buffer and insert it into the list of used suffix buffers
4025  * @param c the connection object
4026  * @return a pointer to a new suffix buffer or NULL if allocation failed
4027  */
get_suffix_buffer(conn * c)4028 static char *get_suffix_buffer(conn *c) {
4029     if (c->suffixleft == c->suffixsize) {
4030         char **new_suffix_list;
4031         size_t sz = sizeof(char*) * c->suffixsize * 2;
4032 
4033         new_suffix_list = realloc(c->suffixlist, sz);
4034         if (new_suffix_list) {
4035             c->suffixsize *= 2;
4036             c->suffixlist = new_suffix_list;
4037         } else {
4038             if (settings.verbose > 1) {
4039                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4040                         "=%d Failed to resize suffix buffer\n", c->sfd);
4041             }
4042 
4043             return NULL;
4044         }
4045     }
4046 
4047     char *suffix = cache_alloc(c->thread->suffix_cache);
4048     if (suffix != NULL) {
4049         *(c->suffixlist + c->suffixleft) = suffix;
4050         ++c->suffixleft;
4051     }
4052 
4053     return suffix;
4054 }
4055 
4056 /* ntokens is overwritten here... shrug.. */
process_get_command(conn * c,token_t * tokens,size_t ntokens,bool return_cas)4057 static inline char* process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) {
4058     char *key;
4059     size_t nkey;
4060     int i = c->ileft;
4061     item *it;
4062     token_t *key_token = &tokens[KEY_TOKEN];
4063     assert(c != NULL);
4064     (void)ntokens;
4065 
4066     /* We temporarily block the mgets commands till wl6650 checked in. */
4067     if ((key_token + 1)->length > 0) {
4068 	out_string(c, "We temporarily don't support multiple get option.");
4069 	return NULL;
4070     }
4071 
4072     do {
4073         while(key_token->length != 0) {
4074 
4075             key = key_token->value;
4076             nkey = key_token->length;
4077 
4078             if(nkey > KEY_MAX_LENGTH) {
4079                 out_string(c, "CLIENT_ERROR bad command line format");
4080                 return NULL;
4081             }
4082 
4083             ENGINE_ERROR_CODE ret = c->aiostat;
4084             c->aiostat = ENGINE_SUCCESS;
4085 
4086             if (ret == ENGINE_SUCCESS) {
4087                 ret = settings.engine.v1->get(settings.engine.v0, c, &it, key, nkey, 0);
4088             }
4089 
4090             switch (ret) {
4091             case ENGINE_EWOULDBLOCK:
4092                 c->ewouldblock = true;
4093                 c->ileft = i;
4094                 return key;
4095 
4096             case ENGINE_SUCCESS:
4097                 break;
4098             case ENGINE_KEY_ENOENT:
4099             default:
4100                 it = NULL;
4101                 break;
4102             }
4103 
4104             if (settings.detail_enabled) {
4105                 stats_prefix_record_get(key, nkey, NULL != it);
4106             }
4107 
4108             if (it) {
4109                 item_info info = { .nvalue = 1 };
4110                 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it,
4111                                                        &info)) {
4112                     settings.engine.v1->release(settings.engine.v0, c, it);
4113                     out_string(c, "SERVER_ERROR error getting item data");
4114                     break;
4115                 }
4116 
4117                 if (i >= c->isize) {
4118                     item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2);
4119                     if (new_list) {
4120                         c->isize *= 2;
4121                         c->ilist = new_list;
4122                     } else {
4123                         settings.engine.v1->release(settings.engine.v0, c, it);
4124                         break;
4125                     }
4126                 }
4127 
4128                 /* Rebuild the suffix */
4129                 char *suffix = get_suffix_buffer(c);
4130                 if (suffix == NULL) {
4131                     out_string(c, "SERVER_ERROR out of memory rebuilding suffix");
4132                     settings.engine.v1->release(settings.engine.v0, c, it);
4133                     return NULL;
4134                 }
4135                 int suffix_len = snprintf(suffix, SUFFIX_SIZE,
4136                                           " %u %u\r\n", htonl(info.flags),
4137                                           info.nbytes);
4138 
4139                 /*
4140                  * Construct the response. Each hit adds three elements to the
4141                  * outgoing data list:
4142                  *   "VALUE "
4143                  *   key
4144                  *   " " + flags + " " + data length + "\r\n" + data (with \r\n)
4145                  */
4146 
4147                 MEMCACHED_COMMAND_GET(c->sfd, info.key, info.nkey,
4148                                       info.nbytes, info.cas);
4149                 if (return_cas)
4150                 {
4151 
4152                   char *cas = get_suffix_buffer(c);
4153                   if (cas == NULL) {
4154                     out_string(c, "SERVER_ERROR out of memory making CAS suffix");
4155                     settings.engine.v1->release(settings.engine.v0, c, it);
4156                     return NULL;
4157                   }
4158                   int cas_len = snprintf(cas, SUFFIX_SIZE, " %"PRIu64"\r\n",
4159                                          info.cas);
4160                   if (add_iov(c, "VALUE ", 6) != 0 ||
4161                       add_iov(c, info.key, info.nkey) != 0 ||
4162                       add_iov(c, suffix, suffix_len - 2) != 0 ||
4163                       add_iov(c, cas, cas_len) != 0 ||
4164                       add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4165                       add_iov(c, "\r\n", 2) != 0)
4166                       {
4167                           settings.engine.v1->release(settings.engine.v0, c, it);
4168                           break;
4169                       }
4170                 }
4171                 else
4172                 {
4173                   if (add_iov(c, "VALUE ", 6) != 0 ||
4174                       add_iov(c, info.key, info.nkey) != 0 ||
4175                       add_iov(c, suffix, suffix_len) != 0 ||
4176                       add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4177                       add_iov(c, "\r\n", 2) != 0)
4178                       {
4179                           settings.engine.v1->release(settings.engine.v0, c, it);
4180                           break;
4181                       }
4182                 }
4183 
4184 
4185                 if (settings.verbose > 1) {
4186                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4187                                                     ">%d sending key %s\n",
4188                                                     c->sfd, info.key);
4189                 }
4190 
4191                 /* item_get() has incremented it->refcount for us */
4192                 STATS_HIT(c, get, key, nkey);
4193                 *(c->ilist + i) = it;
4194                 i++;
4195 
4196             } else {
4197                 STATS_MISS(c, get, key, nkey);
4198                 MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
4199             }
4200 
4201             key_token++;
4202         }
4203 
4204         /*
4205          * If the command string hasn't been fully processed, get the next set
4206          * of tokens.
4207          */
4208         if(key_token->value != NULL) {
4209             ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS);
4210             key_token = tokens;
4211         }
4212 
4213     } while(key_token->value != NULL);
4214 
4215     c->icurr = c->ilist;
4216     c->ileft = i;
4217     c->suffixcurr = c->suffixlist;
4218 
4219     if (settings.verbose > 1) {
4220         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4221                                         ">%d END\n", c->sfd);
4222     }
4223 
4224     /*
4225         If the loop was terminated because of out-of-memory, it is not
4226         reliable to add END\r\n to the buffer, because it might not end
4227         in \r\n. So we send SERVER_ERROR instead.
4228     */
4229     if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0
4230         || (IS_UDP(c->transport) && build_udp_headers(c) != 0)) {
4231         out_string(c, "SERVER_ERROR out of memory writing get response");
4232     }
4233     else {
4234         conn_set_state(c, conn_mwrite);
4235         c->msgcurr = 0;
4236     }
4237 
4238     return NULL;
4239 }
4240 
process_update_command(conn * c,token_t * tokens,const size_t ntokens,ENGINE_STORE_OPERATION store_op,bool handle_cas)4241 static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, ENGINE_STORE_OPERATION store_op, bool handle_cas) {
4242     char *key;
4243     size_t nkey;
4244     unsigned int flags;
4245     int32_t exptime_int = 0;
4246     time_t exptime;
4247     int vlen;
4248     uint64_t req_cas_id=0;
4249     item *it;
4250 
4251     assert(c != NULL);
4252 
4253     set_noreply_maybe(c, tokens, ntokens);
4254 
4255     if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4256         out_string(c, "CLIENT_ERROR bad command line format");
4257         return;
4258     }
4259 
4260     key = tokens[KEY_TOKEN].value;
4261     nkey = tokens[KEY_TOKEN].length;
4262 
4263     if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags)
4264            && safe_strtol(tokens[3].value, &exptime_int)
4265            && safe_strtol(tokens[4].value, (int32_t *)&vlen))) {
4266         out_string(c, "CLIENT_ERROR bad command line format");
4267         return;
4268     }
4269 
4270     /* Negative expire values not allowed */
4271 
4272     if (exptime_int < 0) {
4273         out_string(c, "CLIENT_ERROR Invalid expire time");
4274         return;
4275     }
4276 
4277     /* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */
4278     exptime = exptime_int;
4279 
4280     // does cas value exist?
4281     if (handle_cas) {
4282         if (!safe_strtoull(tokens[5].value, &req_cas_id)) {
4283             out_string(c, "CLIENT_ERROR bad command line format");
4284             return;
4285         }
4286     }
4287 
4288     if (vlen < 0) {
4289         out_string(c, "CLIENT_ERROR bad command line format");
4290         return;
4291     }
4292 
4293     if (settings.detail_enabled) {
4294         stats_prefix_record_set(key, nkey);
4295     }
4296 
4297     ENGINE_ERROR_CODE ret = c->aiostat;
4298     c->aiostat = ENGINE_SUCCESS;
4299     c->ewouldblock = false;
4300 
4301     if (ret == ENGINE_SUCCESS) {
4302         ret = settings.engine.v1->allocate(settings.engine.v0, c,
4303                                            &it, key, nkey,
4304                                            vlen, htonl(flags), exptime);
4305     }
4306 
4307     item_info info = { .nvalue = 1 };
4308     switch (ret) {
4309     case ENGINE_SUCCESS:
4310         item_set_cas(c, it, req_cas_id);
4311         if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
4312             settings.engine.v1->release(settings.engine.v0, c, it);
4313             out_string(c, "SERVER_ERROR error getting item data");
4314             break;
4315         }
4316         c->item = it;
4317         c->ritem = info.value[0].iov_base;
4318         c->rlbytes = vlen;
4319         c->store_op = store_op;
4320         conn_set_state(c, conn_nread);
4321         break;
4322     case ENGINE_EWOULDBLOCK:
4323         c->ewouldblock = true;
4324         break;
4325     case ENGINE_DISCONNECT:
4326         c->state = conn_closing;
4327         break;
4328     default:
4329         if (ret == ENGINE_E2BIG) {
4330             out_string(c, "SERVER_ERROR object too large for cache");
4331         } else {
4332             out_string(c, "SERVER_ERROR out of memory storing object");
4333         }
4334         /* swallow the data line */
4335         c->write_and_go = conn_swallow;
4336         c->sbytes = vlen + 2;
4337 
4338         /* Avoid stale data persisting in cache because we failed alloc.
4339          * Unacceptable for SET. Anywhere else too? */
4340         if (store_op == OPERATION_SET) {
4341             settings.engine.v1->remove(settings.engine.v0, c, key, nkey, 0, 0);
4342         }
4343     }
4344 }
4345 
process_arithmetic_command(conn * c,token_t * tokens,const size_t ntokens,const bool incr)4346 static char* process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) {
4347 
4348     uint64_t delta;
4349     char *key;
4350     size_t nkey;
4351 
4352     assert(c != NULL);
4353 
4354     set_noreply_maybe(c, tokens, ntokens);
4355 
4356     if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4357         out_string(c, "CLIENT_ERROR bad command line format");
4358         return NULL;
4359     }
4360 
4361     key = tokens[KEY_TOKEN].value;
4362     nkey = tokens[KEY_TOKEN].length;
4363 
4364     if (!safe_strtoull(tokens[2].value, &delta)) {
4365         out_string(c, "CLIENT_ERROR invalid numeric delta argument");
4366         return NULL;
4367     }
4368 
4369     ENGINE_ERROR_CODE ret = c->aiostat;
4370     c->aiostat = ENGINE_SUCCESS;
4371     uint64_t cas;
4372     uint64_t result;
4373     if (ret == ENGINE_SUCCESS) {
4374         ret = settings.engine.v1->arithmetic(settings.engine.v0, c, key, nkey,
4375                                              incr, false, delta, 0, 0, &cas,
4376                                              &result, 0);
4377     }
4378 
4379     char temp[INCR_MAX_STORAGE_LEN];
4380     switch (ret) {
4381     case ENGINE_SUCCESS:
4382         if (incr) {
4383             STATS_INCR(c, incr_hits, key, nkey);
4384         } else {
4385             STATS_INCR(c, decr_hits, key, nkey);
4386         }
4387         snprintf(temp, sizeof(temp), "%"PRIu64, result);
4388         out_string(c, temp);
4389         break;
4390     case ENGINE_KEY_ENOENT:
4391         if (incr) {
4392             STATS_INCR(c, incr_misses, key, nkey);
4393         } else {
4394             STATS_INCR(c, decr_misses, key, nkey);
4395         }
4396         out_string(c, "NOT_FOUND");
4397         break;
4398     case ENGINE_ENOMEM:
4399         out_string(c, "SERVER_ERROR out of memory");
4400         break;
4401     case ENGINE_TMPFAIL:
4402         out_string(c, "SERVER_ERROR temporary failure");
4403         break;
4404     case ENGINE_EINVAL:
4405         out_string(c, "CLIENT_ERROR cannot increment or decrement non-numeric value");
4406         break;
4407     case ENGINE_NOT_STORED:
4408         out_string(c, "SERVER_ERROR failed to store item");
4409         break;
4410     case ENGINE_DISCONNECT:
4411         c->state = conn_closing;
4412         break;
4413     case ENGINE_ENOTSUP:
4414         out_string(c, "SERVER_ERROR not supported");
4415         break;
4416     case ENGINE_EWOULDBLOCK:
4417         c->ewouldblock = true;
4418         return key;
4419     default:
4420         abort();
4421     }
4422 
4423     return NULL;
4424 }
4425 
process_delete_command(conn * c,token_t * tokens,const size_t ntokens)4426 static char *process_delete_command(conn *c, token_t *tokens,
4427                                     const size_t ntokens) {
4428     char *key;
4429     size_t nkey;
4430 
4431     assert(c != NULL);
4432 
4433     if (ntokens > 3) {
4434         bool hold_is_zero = strcmp(tokens[KEY_TOKEN+1].value, "0") == 0;
4435         bool sets_noreply = set_noreply_maybe(c, tokens, ntokens);
4436         bool valid = (ntokens == 4 && (hold_is_zero || sets_noreply))
4437             || (ntokens == 5 && hold_is_zero && sets_noreply);
4438         if (!valid) {
4439             out_string(c, "CLIENT_ERROR bad command line format.  "
4440                        "Usage: delete <key> [noreply]");
4441             return NULL;
4442         }
4443     }
4444 
4445     key = tokens[KEY_TOKEN].value;
4446     nkey = tokens[KEY_TOKEN].length;
4447 
4448     if (nkey > KEY_MAX_LENGTH) {
4449         out_string(c, "CLIENT_ERROR bad command line format");
4450         return NULL;
4451     }
4452 
4453     ENGINE_ERROR_CODE ret = c->aiostat;
4454     c->aiostat = ENGINE_SUCCESS;
4455     c->ewouldblock = false;
4456     if (ret == ENGINE_SUCCESS) {
4457         ret = settings.engine.v1->remove(settings.engine.v0, c,
4458                                          key, nkey, 0, 0);
4459     }
4460 
4461     /* For some reason the SLAB_INCR tries to access this... */
4462     item_info info = { .nvalue = 1 };
4463     switch (ret) {
4464     case ENGINE_SUCCESS:
4465         out_string(c, "DELETED");
4466         SLAB_INCR(c, delete_hits, key, nkey);
4467         break;
4468     case ENGINE_EWOULDBLOCK:
4469         c->ewouldblock = true;
4470         return key;
4471     case ENGINE_TMPFAIL:
4472         out_string(c, "SERVER_ERROR temporary failure");
4473         break;
4474     default:
4475         out_string(c, "NOT_FOUND");
4476         STATS_INCR(c, delete_misses, key, nkey);
4477     }
4478 
4479     if (ret != ENGINE_EWOULDBLOCK && settings.detail_enabled) {
4480         stats_prefix_record_delete(key, nkey);
4481     }
4482     return NULL;
4483 }
4484 
process_bind_command(conn * c,token_t * tokens,const size_t ntokens)4485 static char *process_bind_command(conn *c, token_t *tokens,
4486                                   const size_t ntokens) {
4487     char *name;
4488     size_t name_len;
4489 
4490     assert(c != NULL);
4491 
4492     if (ntokens > 3) {
4493         out_string(c, "CLIENT_ERROR bad command line format.  "
4494                       "Usage: bind <table_id_name>");
4495         return NULL;
4496     }
4497 
4498     name = tokens[KEY_TOKEN].value;
4499     name_len = tokens[KEY_TOKEN].length;
4500 
4501     if (name_len > KEY_MAX_LENGTH || name_len == 0) {
4502         out_string(c, "CLIENT_ERROR bad command line format");
4503         return NULL;
4504     }
4505 
4506     ENGINE_ERROR_CODE ret = c->aiostat;
4507     c->aiostat = ENGINE_SUCCESS;
4508     c->ewouldblock = false;
4509     if (ret == ENGINE_SUCCESS) {
4510         ret = settings.engine.v1->bind(settings.engine.v0, c,
4511                                        name, name_len);
4512     }
4513 
4514     switch (ret) {
4515     case ENGINE_SUCCESS:
4516         out_string(c, "SUCCEED");
4517         break;
4518     case ENGINE_EWOULDBLOCK:
4519         c->ewouldblock = true;
4520         return name;
4521     case ENGINE_TMPFAIL:
4522     default:
4523         out_string(c, "NOT_FOUND");
4524     }
4525 
4526     return NULL;
4527 }
4528 
process_verbosity_command(conn * c,token_t * tokens,const size_t ntokens)4529 static void process_verbosity_command(conn *c, token_t *tokens, const size_t ntokens) {
4530     unsigned int level;
4531 
4532     assert(c != NULL);
4533 
4534     set_noreply_maybe(c, tokens, ntokens);
4535     if (c->noreply && ntokens == 3) {
4536         /* "verbosity noreply" is not according to the correct syntax */
4537         c->noreply = false;
4538         out_string(c, "ERROR");
4539         return;
4540     }
4541 
4542     if (safe_strtoul(tokens[1].value, &level)) {
4543         settings.verbose = level > MAX_VERBOSITY_LEVEL ? MAX_VERBOSITY_LEVEL : level;
4544         perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
4545         out_string(c, "OK");
4546     } else {
4547         out_string(c, "ERROR");
4548     }
4549 }
4550 
process_command(conn * c,char * command)4551 static char* process_command(conn *c, char *command) {
4552 
4553     token_t tokens[MAX_TOKENS];
4554     size_t ntokens;
4555     int comm;
4556     char *ret = NULL;
4557 
4558     assert(c != NULL);
4559 
4560     MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
4561 
4562     if (settings.verbose > 1) {
4563         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4564                                         "<%d %s\n", c->sfd, command);
4565     }
4566 
4567     /*
4568      * for commands set/add/replace, we build an item and read the data
4569      * directly into it, then continue in nread_complete().
4570      */
4571 
4572     if (c->ewouldblock) {
4573         /*
4574          * If we are retrying after the engine has completed a pending io for
4575          * this command, skip add_msghdr() etc and clear the ewouldblock flag.
4576          */
4577         c->ewouldblock = false;
4578     } else {
4579         c->msgcurr = 0;
4580         c->msgused = 0;
4581         c->iovused = 0;
4582         if (add_msghdr(c) != 0) {
4583             out_string(c, "SERVER_ERROR out of memory preparing response");
4584             return NULL;
4585         }
4586     }
4587 
4588     ntokens = tokenize_command(command, tokens, MAX_TOKENS);
4589     if (ntokens >= 3 &&
4590         ((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ||
4591          (strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) {
4592 
4593         ret = process_get_command(c, tokens, ntokens, false);
4594 
4595     } else if ((ntokens == 6 || ntokens == 7) &&
4596                ((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = (int)OPERATION_ADD)) ||
4597                 (strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = (int)OPERATION_SET)) ||
4598                 (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = (int)OPERATION_REPLACE)) ||
4599                 (strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = (int)OPERATION_PREPEND)) ||
4600                 (strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = (int)OPERATION_APPEND)) )) {
4601 
4602         process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, false);
4603 
4604     } else if ((ntokens == 7 || ntokens == 8) && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = (int)OPERATION_CAS))) {
4605 
4606         process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, true);
4607 
4608     } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "incr") == 0)) {
4609 
4610         ret = process_arithmetic_command(c, tokens, ntokens, 1);
4611 
4612     } else if (ntokens >= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "gets") == 0)) {
4613 
4614         ret = process_get_command(c, tokens, ntokens, true);
4615 
4616     } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "decr") == 0)) {
4617 
4618         ret = process_arithmetic_command(c, tokens, ntokens, 0);
4619 
4620     } else if (ntokens >= 3 && ntokens <= 5 && (strcmp(tokens[COMMAND_TOKEN].value, "delete") == 0)) {
4621 
4622         ret = process_delete_command(c, tokens, ntokens);
4623 
4624     } else if (ntokens == 3 && (strcmp(tokens[COMMAND_TOKEN].value, "bind") == 0)) {
4625 
4626         ret = process_bind_command(c, tokens, ntokens);
4627 
4628     } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) {
4629 
4630         ret = process_stat(c, tokens, ntokens);
4631 
4632     } else if (ntokens >= 2 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) {
4633         time_t exptime;
4634 
4635         set_noreply_maybe(c, tokens, ntokens);
4636 
4637         if (ntokens == (c->noreply ? 3 : 2)) {
4638             exptime = 0;
4639         } else {
4640             exptime = strtol(tokens[1].value, NULL, 10);
4641             if(errno == ERANGE) {
4642                 out_string(c, "CLIENT_ERROR bad command line format");
4643                 return NULL;
4644             }
4645         }
4646 
4647         ENGINE_ERROR_CODE ret = c->aiostat;
4648         c->aiostat = ENGINE_SUCCESS;
4649         c->ewouldblock = false;
4650         if (ret == ENGINE_SUCCESS) {
4651             ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
4652         }
4653 
4654         switch (ret) {
4655         case  ENGINE_SUCCESS:
4656             out_string(c, "OK");
4657             break;
4658         case ENGINE_ENOTSUP:
4659             out_string(c, "SERVER_ERROR not supported");
4660             break;
4661         case ENGINE_EWOULDBLOCK:
4662             c->ewouldblock = true;
4663             return c->rcurr + 9;
4664         default:
4665             out_string(c, "SERVER_ERROR failed to flush cache");
4666         }
4667 
4668         if (ret != ENGINE_EWOULDBLOCK) {
4669             STATS_NOKEY(c, cmd_flush);
4670         }
4671         return NULL;
4672 
4673     } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) {
4674 
4675         out_string(c, "VERSION " VERSION);
4676 
4677     } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) {
4678 
4679         conn_set_state(c, conn_closing);
4680 
4681     } else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) {
4682         process_verbosity_command(c, tokens, ntokens);
4683     } else if (settings.extensions.ascii != NULL) {
4684         EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *cmd;
4685         size_t nbytes = 0;
4686         char *ptr;
4687 
4688         if (ntokens > 0) {
4689             if (ntokens == MAX_TOKENS) {
4690                 out_string(c, "ERROR too many arguments");
4691                 return NULL;
4692             }
4693 
4694             if (tokens[ntokens - 1].length == 0) {
4695                 --ntokens;
4696             }
4697         }
4698 
4699         for (cmd = settings.extensions.ascii; cmd != NULL; cmd = cmd->next) {
4700             if (cmd->accept(cmd->cookie, c, ntokens, tokens, &nbytes, &ptr)) {
4701                 break;
4702             }
4703         }
4704 
4705         if (cmd == NULL) {
4706             out_string(c, "ERROR unknown command");
4707         } else if (nbytes == 0) {
4708             switch (cmd->execute(cmd->cookie, c, ntokens, tokens,
4709                                  ascii_response_handler)) {
4710             case ENGINE_SUCCESS:
4711                 if (c->dynamic_buffer.buffer != NULL) {
4712                     write_and_free(c, c->dynamic_buffer.buffer,
4713                                    c->dynamic_buffer.offset);
4714                     c->dynamic_buffer.buffer = NULL;
4715                 } else {
4716                     conn_set_state(c, conn_new_cmd);
4717                 }
4718                 break;
4719             case ENGINE_EWOULDBLOCK:
4720                 c->ewouldblock = true;
4721                 ret = tokens[KEY_TOKEN].value;;
4722                 break;
4723             case ENGINE_DISCONNECT:
4724             default:
4725                 conn_set_state(c, conn_closing);
4726 
4727             }
4728         } else {
4729             c->rlbytes = nbytes;
4730             c->ritem = ptr;
4731             c->ascii_cmd = cmd;
4732             /* NOT SUPPORTED YET! */
4733             conn_set_state(c, conn_nread);
4734         }
4735     } else {
4736         out_string(c, "ERROR");
4737     }
4738     return ret;
4739 }
4740 
4741 /*
4742  * if we have a complete line in the buffer, process it.
4743  */
try_read_command(conn * c)4744 static int try_read_command(conn *c) {
4745     assert(c != NULL);
4746     assert(c->rcurr <= (c->rbuf + c->rsize));
4747     assert(c->rbytes > 0);
4748 
4749     if (c->protocol == negotiating_prot || c->transport == udp_transport)  {
4750         if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) {
4751             c->protocol = binary_prot;
4752         } else {
4753             c->protocol = ascii_prot;
4754         }
4755 
4756         if (settings.verbose > 1) {
4757             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4758                     "%d: Client using the %s protocol\n", c->sfd,
4759                     prot_text(c->protocol));
4760         }
4761     }
4762 
4763     if (c->protocol == binary_prot) {
4764         /* Do we have the complete packet header? */
4765         if (c->rbytes < sizeof(c->binary_header)) {
4766             /* need more data! */
4767             return 0;
4768         } else {
4769 #ifdef NEED_ALIGN
4770             if (((long)(c->rcurr)) % 8 != 0) {
4771                 /* must realign input buffer */
4772                 memmove(c->rbuf, c->rcurr, c->rbytes);
4773                 c->rcurr = c->rbuf;
4774                 if (settings.verbose > 1) {
4775                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4776                              "%d: Realign input buffer\n", c->sfd);
4777                 }
4778             }
4779 #endif
4780             protocol_binary_request_header* req;
4781             req = (protocol_binary_request_header*)c->rcurr;
4782 
4783             if (settings.verbose > 1) {
4784                 /* Dump the packet before we convert it to host order */
4785                 char buffer[1024];
4786                 ssize_t nw;
4787                 nw = bytes_to_output_string(buffer, sizeof(buffer), c->sfd,
4788                                             true, "Read binary protocol data:",
4789                                             (const char*)req->bytes,
4790                                             sizeof(req->bytes));
4791                 if (nw != -1) {
4792                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4793                                                     "%s", buffer);
4794                 }
4795             }
4796 
4797             c->binary_header = *req;
4798             c->binary_header.request.keylen = ntohs(req->request.keylen);
4799             c->binary_header.request.bodylen = ntohl(req->request.bodylen);
4800             c->binary_header.request.vbucket = ntohs(req->request.vbucket);
4801             c->binary_header.request.cas = ntohll(req->request.cas);
4802 
4803 
4804             if (c->binary_header.request.magic != PROTOCOL_BINARY_REQ &&
4805                 !(c->binary_header.request.magic == PROTOCOL_BINARY_RES &&
4806                   response_handlers[c->binary_header.request.opcode])) {
4807                 if (settings.verbose) {
4808                     if (c->binary_header.request.magic != PROTOCOL_BINARY_RES) {
4809                         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4810                               "%d: Invalid magic:  %x\n", c->sfd,
4811                               c->binary_header.request.magic);
4812                     } else {
4813                         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4814                               "%d: ERROR: Unsupported response packet received: %u\n",
4815                               c->sfd, (unsigned int)c->binary_header.request.opcode);
4816 
4817                     }
4818                 }
4819                 conn_set_state(c, conn_closing);
4820                 return -1;
4821             }
4822 
4823             c->msgcurr = 0;
4824             c->msgused = 0;
4825             c->iovused = 0;
4826             if (add_msghdr(c) != 0) {
4827                 out_string(c, "SERVER_ERROR out of memory");
4828                 return 0;
4829             }
4830 
4831             c->cmd = c->binary_header.request.opcode;
4832             c->keylen = c->binary_header.request.keylen;
4833             c->opaque = c->binary_header.request.opaque;
4834             /* clear the returned cas value */
4835             c->cas = 0;
4836 
4837             dispatch_bin_command(c);
4838 
4839             c->rbytes -= sizeof(c->binary_header);
4840             c->rcurr += sizeof(c->binary_header);
4841         }
4842     } else {
4843         char *el, *cont, *left, lb;
4844 
4845         if (c->rbytes == 0) {
4846             return 0;
4847         }
4848 
4849         el = memchr(c->rcurr, '\n', c->rbytes);
4850         if (!el) {
4851             if (c->rbytes > 1024) {
4852                 /*
4853                  * We didn't have a '\n' in the first k. This _has_ to be a
4854                  * large multiget, if not we should just nuke the connection.
4855                  */
4856                 char *ptr = c->rcurr;
4857                 while (*ptr == ' ') { /* ignore leading whitespaces */
4858                     ++ptr;
4859                 }
4860 
4861                 if (ptr - c->rcurr > 100 ||
4862                     (strncmp(ptr, "get ", 4) && strncmp(ptr, "gets ", 5))) {
4863 
4864                     conn_set_state(c, conn_closing);
4865                     return 1;
4866                 }
4867             }
4868 
4869             return 0;
4870         }
4871         cont = el + 1;
4872         if ((el - c->rcurr) > 1 && *(el - 1) == '\r') {
4873             el--;
4874         }
4875         lb = *el;
4876         *el = '\0';
4877 
4878         assert(cont <= (c->rcurr + c->rbytes));
4879 
4880         LIBEVENT_THREAD *thread = c->thread;
4881         LOCK_THREAD(thread);
4882         left = process_command(c, c->rcurr);
4883         if (c->ewouldblock) {
4884             unregister_event(c);
4885         }
4886         UNLOCK_THREAD(thread);
4887 
4888         if (left != NULL) {
4889             /*
4890              * We have not processed the entire command. This happens
4891              * when the engine returns ENGINE_EWOULDBLOCK for one of the
4892              * keys in a get/gets request.
4893              */
4894             assert (left <= el);
4895 
4896             int count = strlen(c->rcurr);
4897             if ((c->rcurr + count) == left) {
4898                 // Retry the entire command
4899                 cont = c->rcurr;
4900             } else {
4901                 left -= (count + 1);
4902                 cont = left;
4903                 assert(cont >= c->rcurr);
4904                 if (cont > c->rcurr) {
4905                     memmove(cont, c->rcurr, count);
4906                 }
4907             }
4908 
4909             /* de-tokenize the command */
4910             while ((left = memchr(left, '\0', el - left)) != NULL) {
4911                 *left = ' ';
4912             }
4913             *el = lb;
4914         }
4915 
4916         c->rbytes -= (cont - c->rcurr);
4917         c->rcurr = cont;
4918 
4919         assert(c->rcurr <= (c->rbuf + c->rsize));
4920     }
4921 
4922     return 1;
4923 }
4924 
4925 /*
4926  * read a UDP request.
4927  */
try_read_udp(conn * c)4928 static enum try_read_result try_read_udp(conn *c) {
4929     int res;
4930 
4931     assert(c != NULL);
4932 
4933     c->request_addr_size = sizeof(c->request_addr);
4934     res = recvfrom(c->sfd, c->rbuf, c->rsize,
4935                    0, (struct sockaddr *)&c->request_addr, &c->request_addr_size);
4936     if (res > 8) {
4937         unsigned char *buf = (unsigned char *)c->rbuf;
4938         STATS_ADD(c, bytes_read, res);
4939 
4940         /* Beginning of UDP packet is the request ID; save it. */
4941         c->request_id = buf[0] * 256 + buf[1];
4942 
4943         /* If this is a multi-packet request, drop it. */
4944         if (buf[4] != 0 || buf[5] != 1) {
4945             out_string(c, "SERVER_ERROR multi-packet request not supported");
4946             return READ_NO_DATA_RECEIVED;
4947         }
4948 
4949         /* Don't care about any of the rest of the header. */
4950         res -= 8;
4951         memmove(c->rbuf, c->rbuf + 8, res);
4952 
4953         c->rbytes += res;
4954         c->rcurr = c->rbuf;
4955         return READ_DATA_RECEIVED;
4956     }
4957     return READ_NO_DATA_RECEIVED;
4958 }
4959 
4960 /*
4961  * read from network as much as we can, handle buffer overflow and connection
4962  * close.
4963  * before reading, move the remaining incomplete fragment of a command
4964  * (if any) to the beginning of the buffer.
4965  *
4966  * To protect us from someone flooding a connection with bogus data causing
4967  * the connection to eat up all available memory, break out and start looking
4968  * at the data I've got after a number of reallocs...
4969  *
4970  * @return enum try_read_result
4971  */
try_read_network(conn * c)4972 static enum try_read_result try_read_network(conn *c) {
4973     enum try_read_result gotdata = READ_NO_DATA_RECEIVED;
4974     int res;
4975     int num_allocs = 0;
4976     assert(c != NULL);
4977 
4978     if (c->rcurr != c->rbuf) {
4979         if (c->rbytes != 0) /* otherwise there's nothing to copy */
4980             memmove(c->rbuf, c->rcurr, c->rbytes);
4981         c->rcurr = c->rbuf;
4982     }
4983 
4984     while (1) {
4985         if (c->rbytes >= c->rsize) {
4986             if (num_allocs == 4) {
4987                 return gotdata;
4988             }
4989             ++num_allocs;
4990             char *new_rbuf = realloc(c->rbuf, c->rsize * 2);
4991             if (!new_rbuf) {
4992                 if (settings.verbose > 0) {
4993                  settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4994                           "Couldn't realloc input buffer\n");
4995                 }
4996                 c->rbytes = 0; /* ignore what we read */
4997                 out_string(c, "SERVER_ERROR out of memory reading request");
4998                 c->write_and_go = conn_closing;
4999                 return READ_MEMORY_ERROR;
5000             }
5001             c->rcurr = c->rbuf = new_rbuf;
5002             c->rsize *= 2;
5003         }
5004 
5005         int avail = c->rsize - c->rbytes;
5006         res = recv(c->sfd, c->rbuf + c->rbytes, avail, 0);
5007         if (res > 0) {
5008             STATS_ADD(c, bytes_read, res);
5009             gotdata = READ_DATA_RECEIVED;
5010             c->rbytes += res;
5011             if (res == avail) {
5012                 continue;
5013             } else {
5014                 break;
5015             }
5016         }
5017         if (res == 0) {
5018             return READ_ERROR;
5019         }
5020         if (res == -1) {
5021             if (errno == EAGAIN || errno == EWOULDBLOCK) {
5022                 break;
5023             }
5024             return READ_ERROR;
5025         }
5026     }
5027     return gotdata;
5028 }
5029 
register_event(conn * c,struct timeval * timeout)5030 bool register_event(conn *c, struct timeval *timeout) {
5031 #ifdef DEBUG
5032     assert(!c->registered_in_libevent);
5033 #endif
5034 
5035     if (event_add(&c->event, timeout) == -1) {
5036         settings.extensions.logger->log(EXTENSION_LOG_WARNING,
5037                                         NULL,
5038                                         "Failed to add connection to libevent: %s",
5039                                         strerror(errno));
5040         return false;
5041     }
5042 
5043 #ifdef DEBUG
5044     c->registered_in_libevent = true;
5045 #endif
5046 
5047     return true;
5048 }
5049 
unregister_event(conn * c)5050 bool unregister_event(conn *c) {
5051 #ifdef DEBUG
5052     assert(c->registered_in_libevent);
5053 #endif
5054 
5055     if (event_del(&c->event) == -1) {
5056         return false;
5057     }
5058 
5059 #ifdef DEBUG
5060     c->registered_in_libevent = false;
5061 #endif
5062 
5063     return true;
5064 }
5065 
5066 
update_event(conn * c,const int new_flags)5067 bool update_event(conn *c, const int new_flags) {
5068     assert(c != NULL);
5069 
5070     struct event_base *base = c->event.ev_base;
5071     if (c->ev_flags == new_flags)
5072         return true;
5073 
5074     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5075                                     "Updated event for %d to read=%s, write=%s\n",
5076                                     c->sfd, (new_flags & EV_READ ? "yes" : "no"),
5077                                     (new_flags & EV_WRITE ? "yes" : "no"));
5078 
5079     if (!unregister_event(c)) {
5080         return false;
5081     }
5082 
5083     event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c);
5084     event_base_set(base, &c->event);
5085     c->ev_flags = new_flags;
5086 
5087     return register_event(c, NULL);
5088 }
5089 
5090 /*
5091  * Transmit the next chunk of data from our list of msgbuf structures.
5092  *
5093  * Returns:
5094  *   TRANSMIT_COMPLETE   All done writing.
5095  *   TRANSMIT_INCOMPLETE More data remaining to write.
5096  *   TRANSMIT_SOFT_ERROR Can't write any more right now.
5097  *   TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
5098  */
transmit(conn * c)5099 static enum transmit_result transmit(conn *c) {
5100     assert(c != NULL);
5101 
5102     if (c->msgcurr < c->msgused &&
5103             c->msglist[c->msgcurr].msg_iovlen == 0) {
5104         /* Finished writing the current msg; advance to the next. */
5105         c->msgcurr++;
5106     }
5107     if (c->msgcurr < c->msgused) {
5108         ssize_t res;
5109         struct msghdr *m = &c->msglist[c->msgcurr];
5110 
5111         res = sendmsg(c->sfd, m, 0);
5112         if (res > 0) {
5113             STATS_ADD(c, bytes_written, res);
5114 
5115             /* We've written some of the data. Remove the completed
5116                iovec entries from the list of pending writes. */
5117             while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) {
5118                 res -= m->msg_iov->iov_len;
5119                 m->msg_iovlen--;
5120                 m->msg_iov++;
5121             }
5122 
5123             /* Might have written just part of the last iovec entry;
5124                adjust it so the next write will do the rest. */
5125             if (res > 0) {
5126                 m->msg_iov->iov_base = (caddr_t)m->msg_iov->iov_base + res;
5127                 m->msg_iov->iov_len -= res;
5128             }
5129             return TRANSMIT_INCOMPLETE;
5130         }
5131         if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5132             if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5133                 if (settings.verbose > 0) {
5134                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5135                             "Couldn't update event\n");
5136                 }
5137                 conn_set_state(c, conn_closing);
5138                 return TRANSMIT_HARD_ERROR;
5139             }
5140             return TRANSMIT_SOFT_ERROR;
5141         }
5142         /* if res == 0 or res == -1 and error is not EAGAIN or EWOULDBLOCK,
5143            we have a real error, on which we close the connection */
5144         if (settings.verbose > 0) {
5145             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5146                                             "Failed to write, and not due to blocking: %s",
5147                                             strerror(errno));
5148         }
5149 
5150         if (IS_UDP(c->transport))
5151             conn_set_state(c, conn_read);
5152         else
5153             conn_set_state(c, conn_closing);
5154         return TRANSMIT_HARD_ERROR;
5155     } else {
5156         return TRANSMIT_COMPLETE;
5157     }
5158 }
5159 
conn_listening(conn * c)5160 bool conn_listening(conn *c)
5161 {
5162     int sfd;
5163     struct sockaddr_storage addr;
5164     socklen_t addrlen = sizeof(addr);
5165 
5166     if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) {
5167         if (errno == EMFILE) {
5168             if (settings.verbose > 0) {
5169                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5170                                                 "Too many open connections\n");
5171             }
5172             disable_listen();
5173         } else if (errno != EAGAIN && errno != EWOULDBLOCK) {
5174             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5175                                             "Failed to accept new client: %s\n",
5176                                             strerror(errno));
5177         }
5178 
5179         return false;
5180     }
5181 
5182     STATS_LOCK();
5183     int curr_conns = ++stats.curr_conns;
5184     STATS_UNLOCK();
5185 
5186     if (curr_conns >= settings.maxconns) {
5187         STATS_LOCK();
5188         ++stats.rejected_conns;
5189         STATS_UNLOCK();
5190 
5191         if (settings.verbose > 0) {
5192             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5193                                             "Too many open connections\n");
5194         }
5195 
5196         safe_close(sfd);
5197         return false;
5198     }
5199 
5200     if (evutil_make_socket_nonblocking(sfd) == -1) {
5201         safe_close(sfd);
5202         return false;
5203     }
5204 
5205     dispatch_conn_new(sfd, conn_new_cmd, EV_READ | EV_PERSIST,
5206                       DATA_BUFFER_SIZE, tcp_transport);
5207 
5208     return false;
5209 }
5210 
5211 /**
5212  * Ship tap log to the other end. This state differs with all other states
5213  * in the way that it support full duplex dialog. We're listening to both read
5214  * and write events from libevent most of the time. If a read event occurs we
5215  * switch to the conn_read state to read and execute the input message (that would
5216  * be an ack message from the other side). If a write event occurs we continue to
5217  * send tap log to the other end.
5218  * @param c the tap connection to drive
5219  * @return true if we should continue to process work for this connection, false
5220  *              if we should start processing events for other connections.
5221  */
conn_ship_log(conn * c)5222 bool conn_ship_log(conn *c) {
5223     bool cont = false;
5224 
5225     if (c->sfd == INVALID_SOCKET) {
5226         return false;
5227     }
5228 
5229     short mask = EV_READ | EV_PERSIST | EV_WRITE;
5230 
5231     if (c->which & EV_READ || c->rbytes > 0) {
5232         if (c->rbytes > 0) {
5233             if (try_read_command(c) == 0) {
5234                 conn_set_state(c, conn_read);
5235             }
5236         } else {
5237             conn_set_state(c, conn_read);
5238         }
5239 
5240         // we're going to process something.. let's proceed
5241         cont = true;
5242 
5243         // We have a finite number of messages in the input queue
5244         // so let's process all of them instead of backing off after
5245         // reading a subset of them.
5246         // Why? Because we've got every time we're calling ship_tap_log
5247         // we try to send a chunk of items.. This means that if we end
5248         // up in a situation where we're receiving a burst of nack messages
5249         // we'll only process a subset of messages in our input queue,
5250         // and it will slowly grow..
5251         c->nevents = settings.reqs_per_tap_event;
5252     } else if (c->which & EV_WRITE) {
5253         --c->nevents;
5254         if (c->nevents >= 0) {
5255             LOCK_THREAD(c->thread);
5256             c->ewouldblock = false;
5257             ship_tap_log(c);
5258             if (c->ewouldblock) {
5259                 mask = EV_READ | EV_PERSIST;
5260             } else {
5261                 cont = true;
5262             }
5263             UNLOCK_THREAD(c->thread);
5264         }
5265     }
5266 
5267     if (!update_event(c, mask)) {
5268         if (settings.verbose > 0) {
5269             settings.extensions.logger->log(EXTENSION_LOG_INFO,
5270                                             c, "Couldn't update event\n");
5271         }
5272         conn_set_state(c, conn_closing);
5273     }
5274 
5275     return cont;
5276 }
5277 
conn_waiting(conn * c)5278 bool conn_waiting(conn *c) {
5279     if (!update_event(c, EV_READ | EV_PERSIST)) {
5280         if (settings.verbose > 0) {
5281             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5282                                             "Couldn't update event\n");
5283         }
5284         conn_set_state(c, conn_closing);
5285         return true;
5286     }
5287     conn_set_state(c, conn_read);
5288     return false;
5289 }
5290 
conn_read(conn * c)5291 bool conn_read(conn *c) {
5292     int res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c);
5293     switch (res) {
5294     case READ_NO_DATA_RECEIVED:
5295         conn_set_state(c, conn_waiting);
5296         break;
5297     case READ_DATA_RECEIVED:
5298         conn_set_state(c, conn_parse_cmd);
5299         break;
5300     case READ_ERROR:
5301         conn_set_state(c, conn_closing);
5302         break;
5303     case READ_MEMORY_ERROR: /* Failed to allocate more memory */
5304         /* State already set by try_read_network */
5305         break;
5306     }
5307 
5308     return true;
5309 }
5310 
conn_parse_cmd(conn * c)5311 bool conn_parse_cmd(conn *c) {
5312     if (try_read_command(c) == 0) {
5313         /* wee need more data! */
5314         conn_set_state(c, conn_waiting);
5315     }
5316 
5317     return !c->ewouldblock;
5318 }
5319 
conn_new_cmd(conn * c)5320 bool conn_new_cmd(conn *c) {
5321     /* Only process nreqs at a time to avoid starving other connections */
5322     --c->nevents;
5323     if (c->nevents >= 0) {
5324         reset_cmd_handler(c);
5325     } else {
5326         STATS_NOKEY(c, conn_yields);
5327         if (c->rbytes > 0) {
5328             /* We have already read in data into the input buffer,
5329                so libevent will most likely not signal read events
5330                on the socket (unless more data is available. As a
5331                hack we should just put in a request to write data,
5332                because that should be possible ;-)
5333             */
5334             if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5335                 if (settings.verbose > 0) {
5336                     settings.extensions.logger->log(EXTENSION_LOG_INFO,
5337                                                     c, "Couldn't update event\n");
5338                 }
5339                 conn_set_state(c, conn_closing);
5340                 return true;
5341             }
5342         }
5343         return false;
5344     }
5345 
5346     return true;
5347 }
5348 
5349 
conn_swallow(conn * c)5350 bool conn_swallow(conn *c) {
5351     ssize_t res;
5352     /* we are reading sbytes and throwing them away */
5353     if (c->sbytes == 0) {
5354         conn_set_state(c, conn_new_cmd);
5355         return true;
5356     }
5357 
5358     /* first check if we have leftovers in the conn_read buffer */
5359     if (c->rbytes > 0) {
5360         uint32_t tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
5361         c->sbytes -= tocopy;
5362         c->rcurr += tocopy;
5363         c->rbytes -= tocopy;
5364         return true;
5365     }
5366 
5367     /*  now try reading from the socket */
5368     res = recv(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize, 0);
5369     if (res > 0) {
5370         STATS_ADD(c, bytes_read, res);
5371         c->sbytes -= res;
5372         return true;
5373     }
5374     if (res == 0) { /* end of stream */
5375         conn_set_state(c, conn_closing);
5376         return true;
5377     }
5378     if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5379         if (!update_event(c, EV_READ | EV_PERSIST)) {
5380             if (settings.verbose > 0) {
5381                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5382                                                 "Couldn't update event\n");
5383             }
5384             conn_set_state(c, conn_closing);
5385             return true;
5386         }
5387         return false;
5388     }
5389 
5390     if (errno != ENOTCONN && errno != ECONNRESET) {
5391         /* otherwise we have a real error, on which we close the connection */
5392         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5393                                         "Failed to read, and not due to blocking (%s)\n",
5394                                         strerror(errno));
5395     }
5396 
5397     conn_set_state(c, conn_closing);
5398 
5399     return true;
5400 
5401 }
5402 
conn_nread(conn * c)5403 bool conn_nread(conn *c) {
5404     ssize_t res;
5405 
5406     if (c->rlbytes == 0) {
5407         LIBEVENT_THREAD *t = c->thread;
5408         LOCK_THREAD(t);
5409         bool block = c->ewouldblock = false;
5410         complete_nread(c);
5411         UNLOCK_THREAD(t);
5412         /* Breaking this into two, as complete_nread may have
5413            moved us to a different thread */
5414         t = c->thread;
5415         LOCK_THREAD(t);
5416         if (c->ewouldblock) {
5417             unregister_event(c);
5418             block = true;
5419         }
5420         UNLOCK_THREAD(t);
5421         return !block;
5422     }
5423     /* first check if we have leftovers in the conn_read buffer */
5424     if (c->rbytes > 0) {
5425         uint32_t tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
5426         if (c->ritem != c->rcurr) {
5427             memmove(c->ritem, c->rcurr, tocopy);
5428         }
5429         c->ritem += tocopy;
5430         c->rlbytes -= tocopy;
5431         c->rcurr += tocopy;
5432         c->rbytes -= tocopy;
5433         if (c->rlbytes == 0) {
5434             return true;
5435         }
5436     }
5437 
5438     /*  now try reading from the socket */
5439     res = recv(c->sfd, c->ritem, c->rlbytes, 0);
5440     if (res > 0) {
5441         STATS_ADD(c, bytes_read, res);
5442         if (c->rcurr == c->ritem) {
5443             c->rcurr += res;
5444         }
5445         c->ritem += res;
5446         c->rlbytes -= res;
5447         return true;
5448     }
5449     if (res == 0) { /* end of stream */
5450         conn_set_state(c, conn_closing);
5451         return true;
5452     }
5453 
5454 #ifdef INNODB_MEMCACHED
5455     /* MEMCACHED_RESOLVE: on solaris platform, when connect through
5456     telnet and waiting for input from an "add" or "set" command,
5457     it could have res == -1 and errno == 0. Thus causing early termination
5458     Add "!errno" condition here to deal with this scenario for now */
5459     if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK || !errno)) {
5460 #else
5461     if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5462 #endif /* INNODB_MEMCACHED */
5463         if (!update_event(c, EV_READ | EV_PERSIST)) {
5464             if (settings.verbose > 0) {
5465                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5466                                                 "Couldn't update event\n");
5467             }
5468             conn_set_state(c, conn_closing);
5469             return true;
5470         }
5471         return false;
5472     }
5473 
5474     if (errno != ENOTCONN && errno != ECONNRESET) {
5475         /* otherwise we have a real error, on which we close the connection */
5476         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5477                                         "Failed to read, and not due to blocking:\n"
5478                                         "errno: %d %s \n"
5479                                         "rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n",
5480                                         errno, strerror(errno),
5481                                         (long)c->rcurr, (long)c->ritem, (long)c->rbuf,
5482                                         (int)c->rlbytes, (int)c->rsize);
5483     }
5484     conn_set_state(c, conn_closing);
5485     return true;
5486 }
5487 
5488 bool conn_write(conn *c) {
5489     /*
5490      * We want to write out a simple response. If we haven't already,
5491      * assemble it into a msgbuf list (this will be a single-entry
5492      * list for TCP or a two-entry list for UDP).
5493      */
5494     if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) {
5495         if (add_iov(c, c->wcurr, c->wbytes) != 0) {
5496             if (settings.verbose > 0) {
5497                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5498                                                 "Couldn't build response\n");
5499             }
5500             conn_set_state(c, conn_closing);
5501             return true;
5502         }
5503     }
5504 
5505     return conn_mwrite(c);
5506 }
5507 
5508 bool conn_mwrite(conn *c) {
5509     if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) {
5510         if (settings.verbose > 0) {
5511             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5512                                             "Failed to build UDP headers\n");
5513         }
5514         conn_set_state(c, conn_closing);
5515         return true;
5516     }
5517 
5518     switch (transmit(c)) {
5519     case TRANSMIT_COMPLETE:
5520         if (c->state == conn_mwrite) {
5521             while (c->ileft > 0) {
5522                 item *it = *(c->icurr);
5523                 settings.engine.v1->release(settings.engine.v0, c, it);
5524                 c->icurr++;
5525                 c->ileft--;
5526             }
5527             while (c->suffixleft > 0) {
5528                 char *suffix = *(c->suffixcurr);
5529                 cache_free(c->thread->suffix_cache, suffix);
5530                 c->suffixcurr++;
5531                 c->suffixleft--;
5532             }
5533             /* XXX:  I don't know why this wasn't the general case */
5534             if(c->protocol == binary_prot) {
5535                 conn_set_state(c, c->write_and_go);
5536             } else {
5537                 conn_set_state(c, conn_new_cmd);
5538             }
5539         } else if (c->state == conn_write) {
5540             if (c->write_and_free) {
5541                 free(c->write_and_free);
5542                 c->write_and_free = 0;
5543             }
5544             conn_set_state(c, c->write_and_go);
5545         } else {
5546             if (settings.verbose > 0) {
5547                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5548                                                 "Unexpected state %d\n", c->state);
5549             }
5550             conn_set_state(c, conn_closing);
5551         }
5552         break;
5553 
5554     case TRANSMIT_INCOMPLETE:
5555     case TRANSMIT_HARD_ERROR:
5556         break;                   /* Continue in state machine. */
5557 
5558     case TRANSMIT_SOFT_ERROR:
5559         return false;
5560     }
5561 
5562     return true;
5563 }
5564 
5565 bool conn_pending_close(conn *c) {
5566     assert(c->sfd == INVALID_SOCKET);
5567     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5568                                     "Awaiting clients to release the cookie (pending close for %p)",
5569                                     (void*)c);
5570     LOCK_THREAD(c->thread);
5571     c->thread->pending_io = list_remove(c->thread->pending_io, c);
5572     if (!list_contains(c->thread->pending_close, c)) {
5573         enlist_conn(c, &c->thread->pending_close);
5574     }
5575     UNLOCK_THREAD(c->thread);
5576 
5577     /*
5578      * tell the tap connection that we're disconnecting it now,
5579      * but give it a grace period
5580      */
5581     perform_callbacks(ON_DISCONNECT, NULL, c);
5582 
5583     /*
5584      * disconnect callback may have changed the state for the object
5585      * so we might complete the disconnect now
5586      */
5587     return c->state != conn_pending_close;
5588 }
5589 
5590 bool conn_immediate_close(conn *c) {
5591     settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
5592                                     "Immediate close of %p",
5593                                     (void*)c);
5594     perform_callbacks(ON_DISCONNECT, NULL, c);
5595     conn_close(c);
5596 
5597     return false;
5598 }
5599 
5600 bool conn_closing(conn *c) {
5601     if (IS_UDP(c->transport)) {
5602         conn_cleanup(c);
5603         return false;
5604     }
5605 
5606     // We don't want any network notifications anymore..
5607     unregister_event(c);
5608     safe_close(c->sfd);
5609     c->sfd = INVALID_SOCKET;
5610 
5611     if (c->refcount > 1) {
5612         conn_set_state(c, conn_pending_close);
5613     } else {
5614         conn_set_state(c, conn_immediate_close);
5615     }
5616     return true;
5617 }
5618 
5619 bool conn_add_tap_client(conn *c) {
5620     LIBEVENT_THREAD *tp = tap_thread;
5621     LIBEVENT_THREAD *orig_thread = c->thread;
5622 
5623     assert(orig_thread);
5624     assert(orig_thread != tp);
5625 
5626     c->ewouldblock = true;
5627 
5628     unregister_event(c);
5629 
5630     LOCK_THREAD(orig_thread);
5631     /* Clean out the lists */
5632     orig_thread->pending_io = list_remove(orig_thread->pending_io, c);
5633     orig_thread->pending_close = list_remove(orig_thread->pending_close, c);
5634 
5635     LOCK_THREAD(tp);
5636     c->ev_flags = 0;
5637     conn_set_state(c, conn_setup_tap_stream);
5638     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5639                                     "Moving %d conn from %p to %p\n",
5640                                     c->sfd, c->thread, tp);
5641     c->thread = tp;
5642     c->event.ev_base = tp->base;
5643     assert(c->next == NULL);
5644     assert(c->list_state == 0);
5645     enlist_conn(c, &tp->pending_io);
5646 
5647     UNLOCK_THREAD(tp);
5648 
5649     UNLOCK_THREAD(orig_thread);
5650 
5651     notify_thread(tp);
5652 
5653     return false;
5654 }
5655 
5656 bool conn_setup_tap_stream(conn *c) {
5657     process_bin_tap_connect(c);
5658     return true;
5659 }
5660 
5661 void event_handler(const int fd, const short which, void *arg) {
5662     conn *c;
5663 
5664     c = (conn *)arg;
5665     assert(c != NULL);
5666 
5667     if (memcached_shutdown) {
5668         event_base_loopbreak(c->event.ev_base);
5669         return ;
5670     }
5671 
5672     c->which = which;
5673 
5674     /* sanity */
5675     if (fd != c->sfd) {
5676         if (settings.verbose > 0) {
5677             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5678                     "Catastrophic: event fd doesn't match conn fd!\n");
5679         }
5680         conn_close(c);
5681         return;
5682     }
5683 
5684     perform_callbacks(ON_SWITCH_CONN, c, c);
5685 
5686     c->nevents = settings.reqs_per_event;
5687     if (c->state == conn_ship_log) {
5688         c->nevents = settings.reqs_per_tap_event;
5689     }
5690 
5691     LIBEVENT_THREAD *thr = c->thread;
5692 
5693     // Do we have pending closes?
5694     const size_t max_items = 256;
5695     conn *pending_close[max_items];
5696     size_t n_pending_close = 0;
5697     if (thr != NULL) {
5698         LOCK_THREAD(thr);
5699         if (thr->pending_close && thr->last_checked != current_time) {
5700             assert(!has_cycle(thr->pending_close));
5701             thr->last_checked = current_time;
5702 
5703             n_pending_close = list_to_array(pending_close, max_items,
5704                                             &thr->pending_close);
5705         }
5706         UNLOCK_THREAD(thr);
5707     }
5708 
5709     if (settings.verbose) {
5710         do {
5711             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5712                                             "%d - Running task: (%s)\n",
5713                                             c->sfd, state_text(c->state));
5714         } while (c->state(c));
5715     } else {
5716         while (c->state(c)) {
5717             /* empty */
5718         }
5719     }
5720 
5721     /* Close any connections pending close */
5722     if (n_pending_close > 0) {
5723         for (size_t i = 0; i < n_pending_close; ++i) {
5724             conn *ce = pending_close[i];
5725             if (ce->refcount == 1) {
5726                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5727                                                 "OK, time to nuke: %p\n",
5728                                                 (void*)ce);
5729                 conn_close(ce);
5730             } else {
5731                 LOCK_THREAD(ce->thread);
5732                 enlist_conn(ce, &ce->thread->pending_close);
5733                 UNLOCK_THREAD(ce->thread);
5734             }
5735         }
5736     }
5737 
5738     if (thr != NULL) {
5739         LOCK_THREAD(thr);
5740         finalize_list(pending_close, n_pending_close);
5741         UNLOCK_THREAD(thr);
5742     }
5743 }
5744 
5745 static void dispatch_event_handler(int fd, short which, void *arg) {
5746     char buffer[80];
5747     ssize_t nr = recv(fd, buffer, sizeof(buffer), 0);
5748 
5749     if (nr != -1 && is_listen_disabled()) {
5750         bool enable = false;
5751         pthread_mutex_lock(&listen_state.mutex);
5752         listen_state.count -= nr;
5753         if (listen_state.count <= 0) {
5754             enable = true;
5755             listen_state.disabled = false;
5756         }
5757         pthread_mutex_unlock(&listen_state.mutex);
5758         if (enable) {
5759             conn *next;
5760             for (next = listen_conn; next; next = next->next) {
5761                 update_event(next, EV_READ | EV_PERSIST);
5762                 if (listen(next->sfd, settings.backlog) != 0) {
5763                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5764                                                     "listen() failed",
5765                                                     strerror(errno));
5766                 }
5767             }
5768         }
5769     }
5770 }
5771 
5772 
5773 
5774 static SOCKET new_socket(struct addrinfo *ai) {
5775     SOCKET sfd;
5776 
5777     sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
5778     if (sfd == INVALID_SOCKET) {
5779         return INVALID_SOCKET;
5780     }
5781 
5782     if (evutil_make_socket_nonblocking(sfd) == -1) {
5783         safe_close(sfd);
5784         return INVALID_SOCKET;
5785     }
5786 
5787     return sfd;
5788 }
5789 
5790 
5791 /*
5792  * Sets a socket's send buffer size to the maximum allowed by the system.
5793  */
5794 static void maximize_sndbuf(const int sfd) {
5795     socklen_t intsize = sizeof(int);
5796     int last_good = 0;
5797     int min, max, avg;
5798     int old_size;
5799 
5800     /* Start with the default size. */
5801     if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&old_size, &intsize) != 0) {
5802         if (settings.verbose > 0) {
5803             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5804                                             "getsockopt(SO_SNDBUF): %s",
5805                                             strerror(errno));
5806         }
5807 
5808         return;
5809     }
5810 
5811     /* Binary-search for the real maximum. */
5812     min = old_size;
5813     max = MAX_SENDBUF_SIZE;
5814 
5815     while (min <= max) {
5816         avg = ((unsigned int)(min + max)) / 2;
5817         if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) {
5818             last_good = avg;
5819             min = avg + 1;
5820         } else {
5821             max = avg - 1;
5822         }
5823     }
5824 
5825     if (settings.verbose > 1) {
5826         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5827                  "<%d send buffer was %d, now %d\n", sfd, old_size, last_good);
5828     }
5829 }
5830 
5831 
5832 
5833 /**
5834  * Create a socket and bind it to a specific port number
5835  * @param interface the interface to bind to
5836  * @param port the port number to bind to
5837  * @param transport the transport protocol (TCP / UDP)
5838  * @param portnumber_file A filepointer to write the port numbers to
5839  *        when they are successfully added to the list of ports we
5840  *        listen on.
5841  */
5842 static int server_socket(const char *interface,
5843                          int port,
5844                          enum network_transport transport,
5845                          FILE *portnumber_file) {
5846     int sfd;
5847     struct linger ling = {0, 0};
5848     struct addrinfo *ai;
5849     struct addrinfo *next;
5850     struct addrinfo hints = { .ai_flags = AI_PASSIVE,
5851                               .ai_family = AF_UNSPEC };
5852     char port_buf[NI_MAXSERV];
5853     int error;
5854     int success = 0;
5855     int flags =1;
5856     num_udp_socket = 0;
5857 
5858     hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM;
5859 
5860     if (port == -1) {
5861         port = 0;
5862     }
5863     snprintf(port_buf, sizeof(port_buf), "%d", port);
5864     error= getaddrinfo(interface, port_buf, &hints, &ai);
5865     if (error != 0) {
5866         if (error != EAI_SYSTEM) {
5867             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5868                      "getaddrinfo(): %s\n", gai_strerror(error));
5869         } else {
5870             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5871                      "getaddrinfo(): %s\n", strerror(error));
5872         }
5873         return 1;
5874     }
5875 
5876     for (next= ai; next; next= next->ai_next) {
5877         conn *listen_conn_add;
5878         if ((sfd = new_socket(next)) == INVALID_SOCKET) {
5879             /* getaddrinfo can return "junk" addresses,
5880              * we make sure at least one works before erroring.
5881              */
5882             continue;
5883         }
5884 
5885 #ifdef IPV6_V6ONLY
5886         if (next->ai_family == AF_INET6) {
5887             error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags));
5888             if (error != 0) {
5889                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5890                                                 "setsockopt(IPV6_V6ONLY): %s",
5891                                                 strerror(errno));
5892                 safe_close(sfd);
5893                 continue;
5894             }
5895         }
5896 #endif
5897 
5898         setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
5899         if (IS_UDP(transport)) {
5900             maximize_sndbuf(sfd);
5901 	    udp_socket[num_udp_socket] = sfd;
5902 	    num_udp_socket++;
5903         } else {
5904             error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
5905             if (error != 0) {
5906                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5907                                                 "setsockopt(SO_KEEPALIVE): %s",
5908                                                 strerror(errno));
5909             }
5910 
5911             error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
5912             if (error != 0) {
5913                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5914                                                 "setsockopt(SO_LINGER): %s",
5915                                                 strerror(errno));
5916             }
5917 
5918             error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));
5919             if (error != 0) {
5920                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5921                                                 "setsockopt(TCP_NODELAY): %s",
5922                                                 strerror(errno));
5923             }
5924         }
5925 
5926         if (bind(sfd, next->ai_addr, next->ai_addrlen) == SOCKET_ERROR) {
5927             if (errno != EADDRINUSE) {
5928                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5929                                                 "bind(): %s",
5930                                                 strerror(errno));
5931                 safe_close(sfd);
5932                 freeaddrinfo(ai);
5933                 return 1;
5934             }
5935             safe_close(sfd);
5936             continue;
5937         } else {
5938             success++;
5939             if (!IS_UDP(transport) && listen(sfd, settings.backlog) == SOCKET_ERROR) {
5940                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5941                                                 "listen(): %s",
5942                                                 strerror(errno));
5943                 safe_close(sfd);
5944                 freeaddrinfo(ai);
5945                 return 1;
5946             }
5947             if (portnumber_file != NULL &&
5948                 (next->ai_addr->sa_family == AF_INET ||
5949                  next->ai_addr->sa_family == AF_INET6)) {
5950                 union {
5951                     struct sockaddr_in in;
5952                     struct sockaddr_in6 in6;
5953                 } my_sockaddr;
5954                 socklen_t len = sizeof(my_sockaddr);
5955                 if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==0) {
5956                     if (next->ai_addr->sa_family == AF_INET) {
5957                         fprintf(portnumber_file, "%s INET: %u\n",
5958                                 IS_UDP(transport) ? "UDP" : "TCP",
5959                                 ntohs(my_sockaddr.in.sin_port));
5960                     } else {
5961                         fprintf(portnumber_file, "%s INET6: %u\n",
5962                                 IS_UDP(transport) ? "UDP" : "TCP",
5963                                 ntohs(my_sockaddr.in6.sin6_port));
5964                     }
5965                 }
5966             }
5967         }
5968 
5969         if (IS_UDP(transport)) {
5970             int c;
5971 
5972             for (c = 0; c < settings.num_threads_per_udp; c++) {
5973                 /* this is guaranteed to hit all threads because we round-robin */
5974                 dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,
5975                                   UDP_READ_BUFFER_SIZE, transport);
5976                 STATS_LOCK();
5977                 ++stats.curr_conns;
5978                 ++stats.daemon_conns;
5979                 STATS_UNLOCK();
5980             }
5981         } else {
5982             if (!(listen_conn_add = conn_new(sfd, conn_listening,
5983                                              EV_READ | EV_PERSIST, 1,
5984                                              transport, main_base, NULL))) {
5985                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5986                         "failed to create listening connection\n");
5987                 exit(EXIT_FAILURE);
5988             }
5989             listen_conn_add->next = listen_conn;
5990             listen_conn = listen_conn_add;
5991             STATS_LOCK();
5992             ++stats.curr_conns;
5993             ++stats.daemon_conns;
5994             STATS_UNLOCK();
5995         }
5996     }
5997 
5998     freeaddrinfo(ai);
5999 
6000     /* Return zero iff we detected no errors in starting up connections */
6001     return success == 0;
6002 }
6003 
6004 static int server_sockets(int port, enum network_transport transport,
6005                           FILE *portnumber_file) {
6006     if (settings.inter == NULL) {
6007         return server_socket(settings.inter, port, transport, portnumber_file);
6008     } else {
6009         // tokenize them and bind to each one of them..
6010         char *b;
6011         int ret = 0;
6012         char *list = strdup(settings.inter);
6013 
6014         if (list == NULL) {
6015             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6016                                             "Failed to allocate memory for parsing server interface string\n");
6017             return 1;
6018         }
6019         for (char *p = strtok_r(list, ";,", &b);
6020              p != NULL;
6021              p = strtok_r(NULL, ";,", &b)) {
6022             int the_port = port;
6023 
6024             char *s = strchr(p, ':');
6025             if (s != NULL) {
6026                 *s = '\0';
6027                 ++s;
6028                 if (!safe_strtol(s, &the_port)) {
6029                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6030                                                     "Invalid port number: \"%s\"", s);
6031                     return 1;
6032                 }
6033             }
6034             if (strcmp(p, "*") == 0) {
6035                 p = NULL;
6036             }
6037             ret |= server_socket(p, the_port, transport, portnumber_file);
6038         }
6039         free(list);
6040         return ret;
6041     }
6042 }
6043 
6044 static int new_socket_unix(void) {
6045     int sfd;
6046 
6047     if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == INVALID_SOCKET) {
6048         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6049                                         "socket(AF_UNIX, SOCK_STREAM, 0): %s",
6050                                         strerror(errno));
6051         return INVALID_SOCKET;
6052     }
6053 
6054     if (evutil_make_socket_nonblocking(sfd) == -1) {
6055         safe_close(sfd);
6056         return INVALID_SOCKET;
6057     }
6058     return sfd;
6059 }
6060 
6061 /* this will probably not work on windows */
6062 static int server_socket_unix(const char *path, int access_mask) {
6063     int sfd;
6064     struct linger ling = {0, 0};
6065     struct sockaddr_un addr;
6066     struct stat tstat;
6067     int flags =1;
6068     int old_umask;
6069 
6070     if (!path) {
6071         return 1;
6072     }
6073 
6074     if ((sfd = new_socket_unix()) == -1) {
6075         return 1;
6076     }
6077 
6078     /*
6079      * Clean up a previous socket file if we left it around
6080      */
6081     if (lstat(path, &tstat) == 0) {
6082         if (S_ISSOCK(tstat.st_mode))
6083             unlink(path);
6084     }
6085 
6086     setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
6087     setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
6088     setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
6089 
6090     /*
6091      * the memset call clears nonstandard fields in some impementations
6092      * that otherwise mess things up.
6093      */
6094     memset(&addr, 0, sizeof(addr));
6095 
6096     addr.sun_family = AF_UNIX;
6097     strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
6098     assert(strcmp(addr.sun_path, path) == 0);
6099     old_umask = umask( ~(access_mask&0777));
6100     if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
6101         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6102                                         "bind(): %s",
6103                                         strerror(errno));
6104         safe_close(sfd);
6105         umask(old_umask);
6106         return 1;
6107     }
6108     umask(old_umask);
6109     if (listen(sfd, settings.backlog) == -1) {
6110         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6111                                         "listen(): %s",
6112                                         strerror(errno));
6113         safe_close(sfd);
6114         return 1;
6115     }
6116     if (!(listen_conn = conn_new(sfd, conn_listening,
6117                                  EV_READ | EV_PERSIST, 1,
6118                                  local_transport, main_base, NULL))) {
6119         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6120                  "failed to create listening connection\n");
6121         exit(EXIT_FAILURE);
6122     }
6123     STATS_LOCK();
6124     ++stats.daemon_conns;
6125     STATS_UNLOCK();
6126 
6127     return 0;
6128 }
6129 
6130 static struct event clockevent;
6131 
6132 /* time-sensitive callers can call it by hand with this, outside the normal ever-1-second timer */
6133 static void set_current_time(void) {
6134     struct timeval timer;
6135 
6136     gettimeofday(&timer, NULL);
6137     current_time = (rel_time_t) (timer.tv_sec - process_started);
6138 }
6139 
6140 static void clock_handler(const int fd, const short which, void *arg) {
6141     struct timeval t = {.tv_sec = 1, .tv_usec = 0};
6142     static bool initialized = false;
6143 
6144     if (memcached_shutdown) {
6145         event_base_loopbreak(main_base);
6146         return ;
6147     }
6148 
6149     if (initialized) {
6150         /* only delete the event if it's actually there. */
6151         evtimer_del(&clockevent);
6152     } else {
6153         initialized = true;
6154     }
6155 
6156     evtimer_set(&clockevent, clock_handler, 0);
6157     event_base_set(main_base, &clockevent);
6158     evtimer_add(&clockevent, &t);
6159 
6160     set_current_time();
6161 }
6162 
6163 static void usage(void) {
6164     printf(PACKAGE " " VERSION "\n");
6165     printf("-p <num>      TCP port number to listen on (default: 11211)\n"
6166            "-U <num>      UDP port number to listen on (default: 11211, 0 is off)\n"
6167            "-s <file>     UNIX socket path to listen on (disables network support)\n"
6168            "-a <mask>     access mask for UNIX socket, in octal (default: 0700)\n"
6169            "-l <addr>     interface to listen on (default: INADDR_ANY, all addresses)\n"
6170            "              <addr> may be specified as host:port. If you don't specify\n"
6171            "              a port number, the value you specified with -p or -U is\n"
6172            "              used. You may specify multiple addresses separated by comma\n"
6173            "              or by using -l multiple times\n"
6174            "-d            run as a daemon\n"
6175            "-r            maximize core file limit\n"
6176            "-u <username> assume identity of <username> (only when run as root)\n"
6177            "-m <num>      max memory to use for items in megabytes (default: 64 MB)\n"
6178            "-M            return error on memory exhausted (rather than removing items)\n"
6179            "-c <num>      max simultaneous connections (default: 1000)\n"
6180            "-k            lock down all paged memory.  Note that there is a\n"
6181            "              limit on how much memory you may lock.  Trying to\n"
6182            "              allocate more than that would fail, so be sure you\n"
6183            "              set the limit correctly for the user you started\n"
6184            "              the daemon with (not for -u <username> user;\n"
6185            "              under sh this is done with 'ulimit -S -l NUM_KB').\n"
6186            "-v            verbose (print errors/warnings while in event loop)\n"
6187            "-vv           very verbose (also print client commands/reponses)\n"
6188            "-vvv          extremely verbose (also print internal state transitions)\n"
6189            "-h            print this help and exit\n"
6190            "-i            print memcached and libevent license\n"
6191            "-P <file>     save PID in <file>, only used with -d option\n"
6192            "-f <factor>   chunk size growth factor (default: 1.25)\n"
6193            "-n <bytes>    minimum space allocated for key+value+flags (default: 48)\n");
6194     printf("-L            Try to use large memory pages (if available). Increasing\n"
6195            "              the memory page size could reduce the number of TLB misses\n"
6196            "              and improve the performance. In order to get large pages\n"
6197            "              from the OS, memcached will allocate the total item-cache\n"
6198            "              in one large chunk.\n");
6199     printf("-D <char>     Use <char> as the delimiter between key prefixes and IDs.\n"
6200            "              This is used for per-prefix stats reporting. The default is\n"
6201            "              \":\" (colon). If this option is specified, stats collection\n"
6202            "              is turned on automatically; if not, then it may be turned on\n"
6203            "              by sending the \"stats detail on\" command to the server.\n");
6204     printf("-t <num>      number of threads to use (default: 4)\n");
6205     printf("-R            Maximum number of requests per event, limits the number of\n"
6206            "              requests process for a given connection to prevent \n"
6207            "              starvation (default: 20)\n");
6208     printf("-C            Disable use of CAS\n");
6209     printf("-b            Set the backlog queue limit (default: 1024)\n");
6210     printf("-B            Binding protocol - one of ascii, binary, or auto (default)\n");
6211     printf("-I            Override the size of each slab page. Adjusts max item size\n"
6212            "              (default: 1mb, min: 1k, max: 128m)\n");
6213     printf("-q            Disable detailed stats commands\n");
6214 #ifdef SASL_ENABLED
6215     printf("-S            Require SASL authentication\n");
6216 #endif
6217     printf("-X module,cfg Load the module and initialize it with the config\n");
6218     printf("-E engine     Load engine as the storage engine\n");
6219     printf("-e config     Pass config as configuration options to the storage engine\n");
6220     printf("\nEnvironment variables:\n"
6221            "MEMCACHED_PORT_FILENAME   File to write port information to\n"
6222            "MEMCACHED_TOP_KEYS        Number of top keys to keep track of\n"
6223            "MEMCACHED_REQS_TAP_EVENT  Similar to -R but for tap_ship_log\n");
6224 }
6225 static void usage_license(void) {
6226     printf(PACKAGE " " VERSION "\n\n");
6227     printf(
6228     "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n"
6229     "All rights reserved.\n"
6230     "\n"
6231     "Redistribution and use in source and binary forms, with or without\n"
6232     "modification, are permitted provided that the following conditions are\n"
6233     "met:\n"
6234     "\n"
6235     "    * Redistributions of source code must retain the above copyright\n"
6236     "notice, this list of conditions and the following disclaimer.\n"
6237     "\n"
6238     "    * Redistributions in binary form must reproduce the above\n"
6239     "copyright notice, this list of conditions and the following disclaimer\n"
6240     "in the documentation and/or other materials provided with the\n"
6241     "distribution.\n"
6242     "\n"
6243     "    * Neither the name of the Danga Interactive nor the names of its\n"
6244     "contributors may be used to endorse or promote products derived from\n"
6245     "this software without specific prior written permission.\n"
6246     "\n"
6247     "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n"
6248     "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n"
6249     "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n"
6250     "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n"
6251     "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n"
6252     "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n"
6253     "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6254     "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6255     "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6256     "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n"
6257     "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6258     "\n"
6259     "\n"
6260     "This product includes software developed by Niels Provos.\n"
6261     "\n"
6262     "[ libevent ]\n"
6263     "\n"
6264     "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n"
6265     "All rights reserved.\n"
6266     "\n"
6267     "Redistribution and use in source and binary forms, with or without\n"
6268     "modification, are permitted provided that the following conditions\n"
6269     "are met:\n"
6270     "1. Redistributions of source code must retain the above copyright\n"
6271     "   notice, this list of conditions and the following disclaimer.\n"
6272     "2. Redistributions in binary form must reproduce the above copyright\n"
6273     "   notice, this list of conditions and the following disclaimer in the\n"
6274     "   documentation and/or other materials provided with the distribution.\n"
6275     "3. All advertising materials mentioning features or use of this software\n"
6276     "   must display the following acknowledgement:\n"
6277     "      This product includes software developed by Niels Provos.\n"
6278     "4. The name of the author may not be used to endorse or promote products\n"
6279     "   derived from this software without specific prior written permission.\n"
6280     "\n"
6281     "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
6282     "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
6283     "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
6284     "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
6285     "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n"
6286     "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6287     "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6288     "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6289     "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
6290     "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6291     );
6292 
6293     return;
6294 }
6295 
6296 static void save_pid(const char *pid_file) {
6297     FILE *fp;
6298 
6299     if (access(pid_file, F_OK) == 0) {
6300         if ((fp = fopen(pid_file, "r")) != NULL) {
6301             char buffer[1024];
6302             if (fgets(buffer, sizeof(buffer), fp) != NULL) {
6303                 unsigned int pid;
6304                 if (safe_strtoul(buffer, &pid) && kill((pid_t)pid, 0) == 0) {
6305                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6306                                "WARNING: The pid file contained the following (running) pid: %u\n", pid);
6307                 }
6308             }
6309             fclose(fp);
6310         }
6311     }
6312 
6313     if ((fp = fopen(pid_file, "w")) == NULL) {
6314         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6315                  "Could not open the pid file %s for writing: %s\n",
6316                  pid_file, strerror(errno));
6317         return;
6318     }
6319 
6320     fprintf(fp,"%ld\n", (long)getpid());
6321     if (fclose(fp) == -1) {
6322         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6323                 "Could not close the pid file %s: %s\n",
6324                 pid_file, strerror(errno));
6325     }
6326 }
6327 
6328 static void remove_pidfile(const char *pid_file) {
6329     if (pid_file != NULL) {
6330         if (unlink(pid_file) != 0) {
6331             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6332                     "Could not remove the pid file %s: %s\n",
6333                     pid_file, strerror(errno));
6334         }
6335     }
6336 }
6337 
6338 #ifndef HAVE_SIGIGNORE
6339 static int sigignore(int sig) {
6340     struct sigaction sa = { .sa_handler = SIG_IGN, .sa_flags = 0 };
6341 
6342     if (sigemptyset(&sa.sa_mask) == -1 || sigaction(sig, &sa, 0) == -1) {
6343         return -1;
6344     }
6345     return 0;
6346 }
6347 #endif /* !HAVE_SIGIGNORE */
6348 
6349 static void sigterm_handler(int sig) {
6350     assert(sig == SIGTERM || sig == SIGINT);
6351     memcached_shutdown = 1;
6352 }
6353 
6354 static int install_sigterm_handler(void) {
6355     struct sigaction sa = {.sa_handler = sigterm_handler, .sa_flags = 0};
6356 
6357     if (sigemptyset(&sa.sa_mask) == -1 || sigaction(SIGTERM, &sa, 0) == -1 ||
6358         sigaction(SIGINT, &sa, 0) == -1) {
6359         return -1;
6360     }
6361 
6362     return 0;
6363 }
6364 
6365 /*
6366  * On systems that supports multiple page sizes we may reduce the
6367  * number of TLB-misses by using the biggest available page size
6368  */
6369 static int enable_large_pages(void) {
6370 #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
6371     int ret = -1;
6372     size_t sizes[32];
6373     int avail = getpagesizes(sizes, 32);
6374     if (avail != -1) {
6375         size_t max = sizes[0];
6376         struct memcntl_mha arg = {0};
6377         int ii;
6378 
6379         for (ii = 1; ii < avail; ++ii) {
6380             if (max < sizes[ii]) {
6381                 max = sizes[ii];
6382             }
6383         }
6384 
6385         arg.mha_flags   = 0;
6386         arg.mha_pagesize = max;
6387         arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
6388 
6389         if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
6390             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6391                   "Failed to set large pages: %s\nWill use default page size\n",
6392                   strerror(errno));
6393         } else {
6394             ret = 0;
6395         }
6396     } else {
6397         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6398           "Failed to get supported pagesizes: %s\nWill use default page size\n",
6399           strerror(errno));
6400     }
6401 
6402     return ret;
6403 #else
6404     return 0;
6405 #endif
6406 }
6407 
6408 static const char* get_server_version(void) {
6409     return VERSION;
6410 }
6411 
6412 static void store_engine_specific(const void *cookie,
6413                                   void *engine_data) {
6414     conn *c = (conn*)cookie;
6415     c->engine_storage = engine_data;
6416 }
6417 
6418 static void *get_engine_specific(const void *cookie) {
6419     conn *c = (conn*)cookie;
6420     return c->engine_storage;
6421 }
6422 
6423 static int get_socket_fd(const void *cookie) {
6424     conn *c = (conn *)cookie;
6425     return c->sfd;
6426 }
6427 
6428 static void set_tap_nack_mode(const void *cookie, bool enable) {
6429     conn *c = (conn *)cookie;
6430     c->tap_nack_mode = enable;
6431 }
6432 
6433 static void reserve_cookie(const void *cookie) {
6434     conn *c = (conn *)cookie;
6435     ++c->refcount;
6436 }
6437 
6438 static void release_cookie(const void *cookie) {
6439     conn *c = (conn *)cookie;
6440     --c->refcount;
6441 }
6442 
6443 static int num_independent_stats(void) {
6444     return settings.num_threads + 1;
6445 }
6446 
6447 static void *new_independent_stats(void) {
6448     int ii;
6449     int nrecords = num_independent_stats();
6450     struct independent_stats *independent_stats = calloc(sizeof(independent_stats) + sizeof(struct thread_stats) * nrecords, 1);
6451 
6452 #ifdef INNODB_MEMCACHED
6453     if (independent_stats == NULL) {
6454 	fprintf(stderr, "Unable to allocate memory for"
6455 		       "independent_stats...\n");
6456        return (NULL);
6457     }
6458 #endif
6459 
6460     if (settings.topkeys > 0)
6461         independent_stats->topkeys = topkeys_init(settings.topkeys);
6462     for (ii = 0; ii < nrecords; ii++)
6463         pthread_mutex_init(&independent_stats->thread_stats[ii].mutex, NULL);
6464     return independent_stats;
6465 }
6466 
6467 static void release_independent_stats(void *stats) {
6468     int ii;
6469     int nrecords = num_independent_stats();
6470     struct independent_stats *independent_stats = stats;
6471     if (independent_stats->topkeys)
6472         topkeys_free(independent_stats->topkeys);
6473     for (ii = 0; ii < nrecords; ii++)
6474         pthread_mutex_destroy(&independent_stats->thread_stats[ii].mutex);
6475     free(independent_stats);
6476 }
6477 
6478 static inline struct independent_stats *get_independent_stats(conn *c) {
6479     struct independent_stats *independent_stats;
6480     if (settings.engine.v1->get_stats_struct != NULL) {
6481         independent_stats = settings.engine.v1->get_stats_struct(settings.engine.v0, (const void *)c);
6482         if (independent_stats == NULL)
6483             independent_stats = default_independent_stats;
6484     } else {
6485         independent_stats = default_independent_stats;
6486     }
6487     return independent_stats;
6488 }
6489 
6490 static inline struct thread_stats *get_thread_stats(conn *c) {
6491     struct independent_stats *independent_stats = get_independent_stats(c);
6492     assert(c->thread->index < num_independent_stats());
6493     return &independent_stats->thread_stats[c->thread->index];
6494 }
6495 
6496 static void register_callback(ENGINE_HANDLE *eh,
6497                               ENGINE_EVENT_TYPE type,
6498                               EVENT_CALLBACK cb, const void *cb_data) {
6499     struct engine_event_handler *h =
6500         calloc(sizeof(struct engine_event_handler), 1);
6501 
6502     assert(h);
6503     h->cb = cb;
6504     h->cb_data = cb_data;
6505     h->next = engine_event_handlers[type];
6506     engine_event_handlers[type] = h;
6507 }
6508 
6509 static rel_time_t get_current_time(void)
6510 {
6511     return current_time;
6512 }
6513 
6514 static void count_eviction(const void *cookie, const void *key, const int nkey) {
6515     topkeys_t *tk = get_independent_stats((conn*)cookie)->topkeys;
6516     TK(tk, evictions, key, nkey, get_current_time());
6517 }
6518 
6519 /**
6520  * To make it easy for engine implementors that doesn't want to care about
6521  * writing their own incr/decr code, they can just set the arithmetic function
6522  * to NULL and use this implementation. It is not efficient, due to the fact
6523  * that it does multiple calls through the interface (get and then cas store).
6524  * If you don't care, feel free to use it..
6525  */
6526 static ENGINE_ERROR_CODE internal_arithmetic(ENGINE_HANDLE* handle,
6527                                              const void* cookie,
6528                                              const void* key,
6529                                              const int nkey,
6530                                              const bool increment,
6531                                              const bool create,
6532                                              const uint64_t delta,
6533                                              const uint64_t initial,
6534                                              const rel_time_t exptime,
6535                                              uint64_t *cas,
6536                                              uint64_t *result,
6537                                              uint16_t vbucket)
6538 {
6539     ENGINE_HANDLE_V1 *e = (ENGINE_HANDLE_V1*)handle;
6540 
6541     item *it = NULL;
6542 
6543     ENGINE_ERROR_CODE ret;
6544     ret = e->get(handle, cookie, &it, key, nkey, vbucket);
6545 
6546     if (ret == ENGINE_SUCCESS) {
6547         item_info info = { .nvalue = 1 };
6548 
6549         if (!e->get_item_info(handle, cookie, it, &info)) {
6550             e->release(handle, cookie, it);
6551             return ENGINE_FAILED;
6552         }
6553 
6554         char value[80];
6555 
6556         if (info.value[0].iov_len > (sizeof(value) - 1)) {
6557             e->release(handle, cookie, it);
6558             return ENGINE_EINVAL;
6559         }
6560 
6561         memcpy(value, info.value[0].iov_base, info.value[0].iov_len);
6562         value[info.value[0].iov_len] = '\0';
6563 
6564         uint64_t val;
6565         if (!safe_strtoull(value, &val)) {
6566             e->release(handle, cookie, it);
6567             return ENGINE_EINVAL;
6568         }
6569 
6570         if (increment) {
6571             val += delta;
6572         } else {
6573             if (delta > val) {
6574                 val = 0;
6575             } else {
6576                 val -= delta;
6577             }
6578         }
6579 
6580         size_t nb = snprintf(value, sizeof(value), "%"PRIu64, val);
6581         *result = val;
6582         item *nit = NULL;
6583         if (e->allocate(handle, cookie, &nit, key,
6584                         nkey, nb, info.flags, info.exptime) != ENGINE_SUCCESS) {
6585             e->release(handle, cookie, it);
6586             return ENGINE_ENOMEM;
6587         }
6588 
6589         item_info i2 = { .nvalue = 1 };
6590         if (!e->get_item_info(handle, cookie, nit, &i2)) {
6591             e->release(handle, cookie, it);
6592             e->release(handle, cookie, nit);
6593             return ENGINE_FAILED;
6594         }
6595 
6596         memcpy(i2.value[0].iov_base, value, nb);
6597         e->item_set_cas(handle, cookie, nit, info.cas);
6598         ret = e->store(handle, cookie, nit, cas, OPERATION_CAS, vbucket);
6599         e->release(handle, cookie, it);
6600         e->release(handle, cookie, nit);
6601     } else if (ret == ENGINE_KEY_ENOENT && create) {
6602         char value[80];
6603         size_t nb = snprintf(value, sizeof(value), "%"PRIu64"\r\n", initial);
6604         *result = initial;
6605         if (e->allocate(handle, cookie, &it, key, nkey, nb, 0, exptime) != ENGINE_SUCCESS) {
6606             e->release(handle, cookie, it);
6607             return ENGINE_ENOMEM;
6608         }
6609 
6610         item_info info = { .nvalue = 1 };
6611         if (!e->get_item_info(handle, cookie, it, &info)) {
6612             e->release(handle, cookie, it);
6613             return ENGINE_FAILED;
6614         }
6615 
6616         memcpy(info.value[0].iov_base, value, nb);
6617         ret = e->store(handle, cookie, it, cas, OPERATION_CAS, vbucket);
6618         e->release(handle, cookie, it);
6619     }
6620 
6621     /* We had a race condition.. just call ourself recursively to retry */
6622     if (ret == ENGINE_KEY_EEXISTS) {
6623         return internal_arithmetic(handle, cookie, key, nkey, increment, create, delta,
6624                                    initial, exptime, cas, result, vbucket);
6625     }
6626 
6627     return ret;
6628 }
6629 
6630 /**
6631  * Register an extension if it's not already registered
6632  *
6633  * @param type the type of the extension to register
6634  * @param extension the extension to register
6635  * @return true if success, false otherwise
6636  */
6637 static bool register_extension(extension_type_t type, void *extension)
6638 {
6639     if (extension == NULL) {
6640         return false;
6641     }
6642 
6643     switch (type) {
6644     case EXTENSION_DAEMON:
6645         for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6646              ptr != NULL;
6647              ptr = ptr->next) {
6648             if (ptr == extension) {
6649                 return false;
6650             }
6651         }
6652         ((EXTENSION_DAEMON_DESCRIPTOR *)(extension))->next = settings.extensions.daemons;
6653         settings.extensions.daemons = extension;
6654         return true;
6655     case EXTENSION_LOGGER:
6656         settings.extensions.logger = extension;
6657         return true;
6658     case EXTENSION_ASCII_PROTOCOL:
6659         if (settings.extensions.ascii != NULL) {
6660             EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *last;
6661             for (last = settings.extensions.ascii; last->next != NULL;
6662                  last = last->next) {
6663                 if (last == extension) {
6664                     return false;
6665                 }
6666             }
6667             if (last == extension) {
6668                 return false;
6669             }
6670             last->next = extension;
6671             last->next->next = NULL;
6672         } else {
6673             settings.extensions.ascii = extension;
6674             settings.extensions.ascii->next = NULL;
6675         }
6676         return true;
6677 
6678     default:
6679         return false;
6680     }
6681 }
6682 
6683 /**
6684  * Unregister an extension
6685  *
6686  * @param type the type of the extension to remove
6687  * @param extension the extension to remove
6688  */
6689 static void unregister_extension(extension_type_t type, void *extension)
6690 {
6691     switch (type) {
6692     case EXTENSION_DAEMON:
6693         {
6694             EXTENSION_DAEMON_DESCRIPTOR *prev = NULL;
6695             EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6696 
6697             while (ptr != NULL && ptr != extension) {
6698                 prev = ptr;
6699                 ptr = ptr->next;
6700             }
6701 
6702             if (ptr != NULL && prev != NULL) {
6703                 prev->next = ptr->next;
6704             }
6705 
6706             if (settings.extensions.daemons == ptr) {
6707                 settings.extensions.daemons = ptr->next;
6708             }
6709         }
6710         break;
6711     case EXTENSION_LOGGER:
6712         if (settings.extensions.logger == extension) {
6713             if (get_stderr_logger() == extension) {
6714                 settings.extensions.logger = get_null_logger();
6715             } else {
6716                 settings.extensions.logger = get_stderr_logger();
6717             }
6718         }
6719         break;
6720     case EXTENSION_ASCII_PROTOCOL:
6721         {
6722             EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *prev = NULL;
6723             EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
6724 
6725             while (ptr != NULL && ptr != extension) {
6726                 prev = ptr;
6727                 ptr = ptr->next;
6728             }
6729 
6730             if (ptr != NULL && prev != NULL) {
6731                 prev->next = ptr->next;
6732             }
6733 
6734             if (settings.extensions.ascii == ptr) {
6735                 settings.extensions.ascii = ptr->next;
6736             }
6737         }
6738         break;
6739 
6740     default:
6741         ;
6742     }
6743 
6744 }
6745 
6746 /**
6747  * Get the named extension
6748  */
6749 static void* get_extension(extension_type_t type)
6750 {
6751     switch (type) {
6752     case EXTENSION_DAEMON:
6753         return settings.extensions.daemons;
6754 
6755     case EXTENSION_LOGGER:
6756         return settings.extensions.logger;
6757 
6758     case EXTENSION_ASCII_PROTOCOL:
6759         return settings.extensions.ascii;
6760 
6761     default:
6762         return NULL;
6763     }
6764 }
6765 
6766 #ifdef INNODB_MEMCACHED
6767 void shutdown_server(void) {
6768 #else
6769 static void shutdown_server(void) {
6770 #endif /* INNODB_MEMCACHED */
6771 #ifdef INNODB_MEMCACHED
6772     int i;
6773     /* Clean up connections */
6774     while (listen_conn) {
6775 	conn_closing(listen_conn);
6776 	listen_conn = listen_conn->next;
6777     }
6778 
6779     for (i = 0; i < num_udp_socket; i++) {
6780 	safe_close(udp_socket[i]);
6781     }
6782 #endif
6783     memcached_shutdown = 1;
6784 }
6785 
6786 #ifdef INNODB_MEMCACHED
6787 bool shutdown_complete(void)
6788 {
6789     return(memcached_shutdown == 2);
6790 }
6791 
6792 bool init_complete(void)
6793 {
6794     return(memcached_initialized == 1);
6795 }
6796 #endif
6797 
6798 static EXTENSION_LOGGER_DESCRIPTOR* get_logger(void)
6799 {
6800     return settings.extensions.logger;
6801 }
6802 
6803 static EXTENSION_LOG_LEVEL get_log_level(void)
6804 {
6805     EXTENSION_LOG_LEVEL ret;
6806     switch (settings.verbose) {
6807     case 0: ret = EXTENSION_LOG_WARNING; break;
6808     case 1: ret = EXTENSION_LOG_INFO; break;
6809     case 2: ret = EXTENSION_LOG_DEBUG; break;
6810     default:
6811         ret = EXTENSION_LOG_DETAIL;
6812     }
6813     return ret;
6814 }
6815 
6816 static void set_log_level(EXTENSION_LOG_LEVEL severity)
6817 {
6818     switch (severity) {
6819     case EXTENSION_LOG_WARNING: settings.verbose = 0; break;
6820     case EXTENSION_LOG_INFO: settings.verbose = 1; break;
6821     case EXTENSION_LOG_DEBUG: settings.verbose = 2; break;
6822     default:
6823         settings.verbose = 3;
6824     }
6825 }
6826 
6827 static void get_config_append_stats(const char *key, const uint16_t klen,
6828                                     const char *val, const uint32_t vlen,
6829                                     const void *cookie)
6830 {
6831     if (klen == 0  || vlen == 0) {
6832         return ;
6833     }
6834 
6835     char *pos = (char*)cookie;
6836     size_t nbytes = strlen(pos);
6837 
6838     if ((nbytes + klen + vlen + 3) > 1024) {
6839         // Not enough size in the buffer..
6840         return;
6841     }
6842 
6843     memcpy(pos + nbytes, key, klen);
6844     nbytes += klen;
6845     pos[nbytes] = '=';
6846     ++nbytes;
6847     memcpy(pos + nbytes, val, vlen);
6848     nbytes += vlen;
6849     memcpy(pos + nbytes, ";", 2);
6850 }
6851 
6852 static bool get_config(struct config_item items[]) {
6853     char config[1024];
6854     config[0] = '\0';
6855     process_stat_settings(get_config_append_stats, config);
6856     int rval = parse_config(config, items, NULL);
6857     return rval >= 0;
6858 }
6859 
6860 /**
6861  * Callback the engines may call to get the public server interface
6862  * @return pointer to a structure containing the interface. The client should
6863  *         know the layout and perform the proper casts.
6864  */
6865 static SERVER_HANDLE_V1 *get_server_api(void)
6866 {
6867     static SERVER_CORE_API core_api = {
6868         .server_version = get_server_version,
6869         .hash = hash,
6870         .realtime = realtime,
6871         .abstime = abstime,
6872         .get_current_time = get_current_time,
6873         .parse_config = parse_config,
6874         .shutdown = shutdown_server,
6875         .get_config = get_config
6876     };
6877 
6878     static SERVER_COOKIE_API server_cookie_api = {
6879         .get_auth_data = get_auth_data,
6880         .store_engine_specific = store_engine_specific,
6881         .get_engine_specific = get_engine_specific,
6882         .get_socket_fd = get_socket_fd,
6883         .set_tap_nack_mode = set_tap_nack_mode,
6884         .notify_io_complete = notify_io_complete,
6885         .reserve = reserve_cookie,
6886         .release = release_cookie
6887     };
6888 
6889     static SERVER_STAT_API server_stat_api = {
6890         .new_stats = new_independent_stats,
6891         .release_stats = release_independent_stats,
6892         .evicting = count_eviction
6893     };
6894 
6895     static SERVER_LOG_API server_log_api = {
6896         .get_logger = get_logger,
6897         .get_level = get_log_level,
6898         .set_level = set_log_level
6899     };
6900     static SERVER_EXTENSION_API extension_api = {
6901         .register_extension = register_extension,
6902         .unregister_extension = unregister_extension,
6903         .get_extension = get_extension
6904     };
6905 
6906     static SERVER_CALLBACK_API callback_api = {
6907         .register_callback = register_callback,
6908         .perform_callbacks = perform_callbacks,
6909     };
6910 
6911     static SERVER_HANDLE_V1 rv = {
6912         .interface = 1,
6913         .core = &core_api,
6914         .stat = &server_stat_api,
6915         .extension = &extension_api,
6916         .callback = &callback_api,
6917         .log = &server_log_api,
6918         .cookie = &server_cookie_api
6919     };
6920 
6921     if (rv.engine == NULL) {
6922         rv.engine = settings.engine.v0;
6923     }
6924 
6925     return &rv;
6926 }
6927 
6928 /**
6929  * Load a shared object and initialize all the extensions in there.
6930  *
6931  * @param soname the name of the shared object (may not be NULL)
6932  * @param config optional configuration parameters
6933  * @return true if success, false otherwise
6934  */
6935 static bool load_extension(const char *soname, const char *config) {
6936     if (soname == NULL) {
6937         return false;
6938     }
6939 
6940     /* Hack to remove the warning from C99 */
6941     union my_hack {
6942         MEMCACHED_EXTENSIONS_INITIALIZE initialize;
6943         void* voidptr;
6944     } funky = {.initialize = NULL };
6945 
6946     void *handle = dlopen(soname, RTLD_NOW | RTLD_LOCAL);
6947     if (handle == NULL) {
6948         const char *msg = dlerror();
6949         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6950                 "Failed to open library \"%s\": %s\n",
6951                 soname, msg ? msg : "unknown error");
6952         return false;
6953     }
6954 
6955     void *symbol = dlsym(handle, "memcached_extensions_initialize");
6956     if (symbol == NULL) {
6957         const char *msg = dlerror();
6958         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6959                 "Could not find symbol \"memcached_extensions_initialize\" in %s: %s\n",
6960                 soname, msg ? msg : "unknown error");
6961         return false;
6962     }
6963     funky.voidptr = symbol;
6964 
6965     EXTENSION_ERROR_CODE error = (*funky.initialize)(config, get_server_api);
6966 
6967     if (error != EXTENSION_SUCCESS) {
6968         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6969                 "Failed to initalize extensions from %s. Error code: %d\n",
6970                 soname, error);
6971         dlclose(handle);
6972         return false;
6973     }
6974 
6975     if (settings.verbose > 0) {
6976         settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
6977                 "Loaded extensions from: %s\n", soname);
6978     }
6979 
6980     return true;
6981 }
6982 
6983 /**
6984  * Do basic sanity check of the runtime environment
6985  * @return true if no errors found, false if we can't use this env
6986  */
6987 static bool sanitycheck(void) {
6988     /* One of our biggest problems is old and bogus libevents */
6989     const char *ever = event_get_version();
6990     if (ever != NULL) {
6991         if (strncmp(ever, "1.", 2) == 0) {
6992             /* Require at least 1.3 (that's still a couple of years old) */
6993             if ((ever[2] == '1' || ever[2] == '2') && !isdigit(ever[3])) {
6994                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6995                         "You are using libevent %s.\nPlease upgrade to"
6996                         " a more recent version (1.3 or newer)\n",
6997                         event_get_version());
6998                 return false;
6999             }
7000         }
7001     }
7002 
7003     return true;
7004 }
7005 
7006 #ifdef INNODB_MEMCACHED
7007 static
7008 char*
7009 my_strdupl(const char* str, int len)
7010 {
7011 	char*   s = (char*) malloc(len + 1);
7012 	s[len] = 0;
7013 	return((char*) memcpy(s, str, len));
7014 }
7015 
7016 /** Function that messages MySQL config variable string to something
7017 that can be parsed by getopt() */
7018 static
7019 void
7020 daemon_memcached_make_option(char* option, int* option_argc,
7021                              char*** option_argv)
7022 {
7023 	static const char*      sep = " ";
7024 	char*                   last;
7025 	char*                   opt_str;
7026 	char*                   my_str;
7027 	int                     num_arg = 0;
7028 	int                     i = 1;
7029 
7030 	my_str = my_strdupl(option, strlen(option));
7031 
7032 	for (opt_str = strtok_r(my_str, sep, &last);
7033 	     opt_str;
7034 	     opt_str = strtok_r(NULL, sep, &last)) {
7035 		num_arg++;
7036 	}
7037 
7038 	/* reset my_str, since strtok_r could alter it */
7039 	strncpy(my_str, option, strlen(option));
7040 
7041 	*option_argv = (char**) malloc((num_arg + 1)
7042 				       * sizeof(**option_argv));
7043 
7044 	for (opt_str = strtok_r(my_str, sep, &last);
7045 	     opt_str;
7046 	     opt_str = strtok_r(NULL, sep, &last)) {
7047 		(*option_argv)[i] = opt_str;
7048 		i++;
7049 	}
7050 
7051 	assert(i == num_arg + 1);
7052 
7053 	*option_argc = (num_arg + 1);
7054 
7055 	return;
7056 }
7057 
7058 /* Structure that adds the call back functions struture pointers,
7059 passed to InnoDB engine */
7060 typedef struct eng_config_info {
7061 	char*           option_string;
7062 	void*           cb_ptr;
7063 	unsigned int    eng_r_batch_size;
7064 	unsigned int    eng_w_batch_size;
7065 	bool		enable_binlog;
7066 } eng_config_info_t;
7067 #endif /* INNODB_MEMCACHED */
7068 
7069 #ifdef INNODB_MEMCACHED
7070 void* daemon_memcached_main(void *p) {
7071 #else
7072 int main (int argc, char **argv) {
7073 #endif
7074     int c;
7075     bool lock_memory = false;
7076     bool do_daemonize = false;
7077     bool preallocate = false;
7078     int maxcore = 0;
7079     char *username = NULL;
7080     char *pid_file = NULL;
7081     struct passwd *pw;
7082     struct rlimit rlim;
7083     char unit = '\0';
7084     int size_max = 0;
7085 
7086     bool protocol_specified = false;
7087     bool tcp_specified = false;
7088     bool udp_specified = false;
7089     memcached_context_t* m_config = (memcached_context_t*)p;
7090     const char *engine;
7091     const char *engine_config = NULL;
7092     char old_options[1024] = { [0] = '\0' };
7093     char *old_opts = old_options;
7094 #ifdef INNODB_MEMCACHED
7095     int option_argc = 0;
7096     char** option_argv = NULL;
7097     eng_config_info_t my_eng_config;
7098 
7099     memcached_initialized = 0;
7100 
7101     if (m_config->m_engine_library) {
7102 	engine = m_config->m_engine_library;
7103 
7104 	/* FIXME: We should have a better way to pass the callback structure
7105 	point to storage engine. It is now appended in the configure
7106 	string in eng_config_info_t structure */
7107 	my_eng_config.cb_ptr = m_config->m_innodb_api_cb;
7108 	my_eng_config.eng_r_batch_size = m_config->m_r_batch_size;
7109 	my_eng_config.eng_w_batch_size = m_config->m_w_batch_size;
7110 	my_eng_config.enable_binlog = m_config->m_enable_binlog;
7111 	my_eng_config.option_string = old_opts;
7112 	engine_config = (const char *) (&my_eng_config);
7113 
7114     } else {
7115 	engine = "default_engine.so";
7116     }
7117 #else
7118     engine = "default_engine.so";
7119 #endif /* INNODB_MEMCACHED */
7120 
7121     memcached_shutdown = 0;
7122     memcached_initialized = 0;
7123 
7124     if (!sanitycheck()) {
7125         return(NULL);
7126     }
7127 
7128     /* make the time we started always be 2 seconds before we really
7129        did, so time(0) - time.started is never zero.  if so, things
7130        like 'settings.oldest_live' which act as booleans as well as
7131        values are now false in boolean context... */
7132     process_started = time(0) - 2;
7133     set_current_time();
7134 
7135     /* Initialize the socket subsystem */
7136     initialize_sockets();
7137 
7138     /* init settings */
7139     settings_init();
7140 
7141     if (memcached_initialize_stderr_logger(get_server_api) != EXTENSION_SUCCESS) {
7142         fprintf(stderr, "Failed to initialize log system\n");
7143         return (NULL);
7144     }
7145 
7146     if (m_config->m_mem_option) {
7147 	daemon_memcached_make_option(m_config->m_mem_option,
7148 				     &option_argc,
7149 				     &option_argv);
7150     }
7151 
7152 #ifdef INNODB_MEMCACHED
7153 
7154     if (option_argc > 0 && option_argv) {
7155 	    /* Always reset the index to 1, since this function can
7156 	    be invoked multiple times with install/uninstall plugins */
7157 	    optind = 1;
7158 	    while (-1 != (c = getopt(option_argc, option_argv,
7159 		  "a:"  /* access mask for unix socket */
7160 		  "p:"  /* TCP port number to listen on */
7161 		  "s:"  /* unix socket path to listen on */
7162 		  "U:"  /* UDP port number to listen on */
7163 		  "m:"  /* max memory to use for items in megabytes */
7164 		  "M"   /* return error on memory exhausted */
7165 		  "c:"  /* max simultaneous connections */
7166 		  "k"   /* lock down all paged memory */
7167 		  "hi"  /* help, licence info */
7168 		  "r"   /* maximize core file limit */
7169 		  "v"   /* verbose */
7170 		  "d"   /* daemon mode */
7171 		  "l:"  /* interface to listen on */
7172 		  "u:"  /* user identity to run as */
7173 		  "P:"  /* save PID in file */
7174 		  "f:"  /* factor? */
7175 		  "n:"  /* minimum space allocated for key+value+flags */
7176 		  "t:"  /* threads */
7177 		  "D:"  /* prefix delimiter? */
7178 		  "L"   /* Large memory pages */
7179 		  "R:"  /* max requests per event */
7180 		  "C"   /* Disable use of CAS */
7181 		  "b:"  /* backlog queue limit */
7182 		  "B:"  /* Binding protocol */
7183 		  "I:"  /* Max item size */
7184 		  "S"   /* Sasl ON */
7185 		  "E:"  /* Engine to load */
7186 		  "e:"  /* Engine options */
7187 		  "q"   /* Disallow detailed stats */
7188 		  "X:"  /* Load extension */
7189 		))) {
7190 		switch (c) {
7191 		case 'a':
7192 		    /* access for unix domain socket, as octal mask (like chmod)*/
7193 		    settings.access= strtol(optarg,NULL,8);
7194 		    break;
7195 
7196 		case 'U':
7197 		    settings.udpport = atoi(optarg);
7198 		    udp_specified = true;
7199 		    break;
7200 		case 'p':
7201 		    settings.port = atoi(optarg);
7202 		    tcp_specified = true;
7203 		    break;
7204 		case 's':
7205 		    settings.socketpath = optarg;
7206 		    break;
7207 		case 'm':
7208 		    settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7209 		     old_opts += sprintf(old_opts, "cache_size=%lu;",
7210 					 (unsigned long)settings.maxbytes);
7211 		   break;
7212 		case 'M':
7213 		    settings.evict_to_free = 0;
7214 		    old_opts += sprintf(old_opts, "eviction=false;");
7215 		    break;
7216 		case 'c':
7217 		    settings.maxconns = atoi(optarg);
7218 		    break;
7219 		case 'h':
7220 		    usage();
7221 		    exit(EXIT_SUCCESS);
7222 		case 'i':
7223 		    usage_license();
7224 		    exit(EXIT_SUCCESS);
7225 		case 'k':
7226 		    lock_memory = true;
7227 		    break;
7228 		case 'v':
7229 		    settings.verbose++;
7230 		    perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7231 		    break;
7232 		case 'l':
7233 		    settings.inter= strdup(optarg);
7234 		    break;
7235 		case 'd':
7236 		    do_daemonize = true;
7237 		    break;
7238 		case 'r':
7239 		    maxcore = 1;
7240 		    break;
7241 		case 'R':
7242 		    settings.reqs_per_event = atoi(optarg);
7243 		    if (settings.reqs_per_event <= 0) {
7244 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7245 			      "Number of requests per event must be greater than 0\n");
7246 			return (void*)1;
7247 		    }
7248 		    break;
7249 		case 'u':
7250 		    username = optarg;
7251 		    break;
7252 		case 'P':
7253 		    pid_file = optarg;
7254 		    break;
7255 		case 'f':
7256 		    settings.factor = atof(optarg);
7257 		    if (settings.factor <= 1.0) {
7258 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7259 				"Factor must be greater than 1\n");
7260 			return (void*)1;
7261 		    }
7262 		     old_opts += sprintf(old_opts, "factor=%f;",
7263 					 settings.factor);
7264 		   break;
7265 		case 'n':
7266 		    settings.chunk_size = atoi(optarg);
7267 		    if (settings.chunk_size == 0) {
7268 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7269 				"Chunk size must be greater than 0\n");
7270 			return (void*)1;
7271 		    }
7272 		    old_opts += sprintf(old_opts, "chunk_size=%u;",
7273 					settings.chunk_size);
7274 		    break;
7275 		case 't':
7276 		    settings.num_threads = atoi(optarg);
7277 		    if (settings.num_threads <= 0) {
7278 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7279 				"Number of threads must be greater than 0\n");
7280 			return (void*)1;
7281 		    }
7282 		    /* There're other problems when you get above 64 threads.
7283 		     * In the future we should portably detect # of cores for the
7284 		     * default.
7285 		     */
7286 		    if (settings.num_threads > 64) {
7287 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7288 				"WARNING: Setting a high number of worker"
7289 				"threads is not recommended.\n"
7290 				" Set this value to the number of cores in"
7291 				" your machine or less.\n");
7292 		    }
7293 		    break;
7294 		case 'D':
7295 		    settings.prefix_delimiter = optarg[0];
7296 		    settings.detail_enabled = 1;
7297 		    break;
7298 		case 'L' :
7299 		    if (enable_large_pages() == 0) {
7300 			preallocate = true;
7301 			old_opts += sprintf(old_opts, "preallocate=true;");
7302 		    }
7303 		    break;
7304 		case 'C' :
7305 		    settings.use_cas = false;
7306 		    break;
7307 		case 'b' :
7308 		    settings.backlog = atoi(optarg);
7309 		    break;
7310 		case 'B':
7311 		    protocol_specified = true;
7312 		    if (strcmp(optarg, "auto") == 0) {
7313 			settings.binding_protocol = negotiating_prot;
7314 		    } else if (strcmp(optarg, "binary") == 0) {
7315 			settings.binding_protocol = binary_prot;
7316 		    } else if (strcmp(optarg, "ascii") == 0) {
7317 			settings.binding_protocol = ascii_prot;
7318 		    } else {
7319 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7320 				"Invalid value for binding protocol: %s\n"
7321 				" -- should be one of auto, binary, or ascii\n", optarg);
7322 			exit(EX_USAGE);
7323 		    }
7324 		    break;
7325 		case 'I':
7326 		    unit = optarg[strlen(optarg)-1];
7327 		    if (unit == 'k' || unit == 'm' ||
7328 			unit == 'K' || unit == 'M') {
7329 			optarg[strlen(optarg)-1] = '\0';
7330 			size_max = atoi(optarg);
7331 			if (unit == 'k' || unit == 'K')
7332 			    size_max *= 1024;
7333 			if (unit == 'm' || unit == 'M')
7334 			    size_max *= 1024 * 1024;
7335 			settings.item_size_max = size_max;
7336 		    } else {
7337 			settings.item_size_max = atoi(optarg);
7338 		    }
7339 		    if (settings.item_size_max < 1024) {
7340 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7341 				"Item max size cannot be less than 1024 bytes.\n");
7342 			return (void*)1;
7343 		    }
7344 		    if (settings.item_size_max > 1024 * 1024 * 128) {
7345 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7346 				"Cannot set item size limit higher than 128 mb.\n");
7347 			return (void*)1;
7348 		    }
7349 		    if (settings.item_size_max > 1024 * 1024) {
7350 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7351 			    "WARNING: Setting item max size above 1MB is not"
7352 			    " recommended!\n"
7353 			    " Raising this limit increases the minimum memory requirements\n"
7354 			    " and will decrease your memory efficiency.\n"
7355 			);
7356 		    }
7357 #ifndef __WIN32__
7358 		    old_opts += sprintf(old_opts, "item_size_max=%zu;",
7359 					settings.item_size_max);
7360 #else
7361 		    old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7362 					settings.item_size_max);
7363 #endif
7364 		    break;
7365 		case 'E':
7366 		    engine = optarg;
7367 		    break;
7368 		case 'e':
7369 		    /* FIXME, we use engine_config to pass callback function
7370 		    for now. Will need a better solution
7371 		    engine_config = optarg; */
7372 		    break;
7373 		case 'q':
7374 		    settings.allow_detailed = false;
7375 		    break;
7376 		case 'S': /* set Sasl authentication to true. Default is false */
7377 # ifdef ENABLE_MEMCACHED_SASL
7378 #  ifndef SASL_ENABLED
7379 		    settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7380 			    "This server is not built with SASL support.\n");
7381 		    exit(EX_USAGE);
7382 #  endif /* !SASL_ENABLED */
7383 		    settings.require_sasl = true;
7384 # endif /* ENABLE_MEMCACHED_SASL */
7385 		    break;
7386 		case 'X' :
7387 		    {
7388 			char *ptr = strchr(optarg, ',');
7389 			if (ptr != NULL) {
7390 			    *ptr = '\0';
7391 			    ++ptr;
7392 			}
7393 			if (!load_extension(optarg, ptr)) {
7394 			    exit(EXIT_FAILURE);
7395 			}
7396 			if (ptr != NULL) {
7397 			    *(ptr - 1) = ',';
7398 			}
7399 		    }
7400 		    break;
7401 		default:
7402 		    settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7403 			    "Illegal argument \"%c\"\n", c);
7404 		    return (void*)1;
7405 		}
7406 	}
7407 
7408 	free(option_argv);
7409     }
7410     fprintf(stderr, MEMCACHED_ATOMIC_MSG);
7411 #else
7412     /* process arguments */
7413     while (-1 != (c = getopt(argc, argv,
7414           "a:"  /* access mask for unix socket */
7415           "p:"  /* TCP port number to listen on */
7416           "s:"  /* unix socket path to listen on */
7417           "U:"  /* UDP port number to listen on */
7418           "m:"  /* max memory to use for items in megabytes */
7419           "M"   /* return error on memory exhausted */
7420           "c:"  /* max simultaneous connections */
7421           "k"   /* lock down all paged memory */
7422           "hi"  /* help, licence info */
7423           "r"   /* maximize core file limit */
7424           "v"   /* verbose */
7425           "d"   /* daemon mode */
7426           "l:"  /* interface to listen on */
7427           "u:"  /* user identity to run as */
7428           "P:"  /* save PID in file */
7429           "f:"  /* factor? */
7430           "n:"  /* minimum space allocated for key+value+flags */
7431           "t:"  /* threads */
7432           "D:"  /* prefix delimiter? */
7433           "L"   /* Large memory pages */
7434           "R:"  /* max requests per event */
7435           "C"   /* Disable use of CAS */
7436           "b:"  /* backlog queue limit */
7437           "B:"  /* Binding protocol */
7438           "I:"  /* Max item size */
7439           "S"   /* Sasl ON */
7440           "E:"  /* Engine to load */
7441           "e:"  /* Engine options */
7442           "q"   /* Disallow detailed stats */
7443           "X:"  /* Load extension */
7444         ))) {
7445         switch (c) {
7446         case 'a':
7447             /* access for unix domain socket, as octal mask (like chmod)*/
7448             settings.access= strtol(optarg,NULL,8);
7449             break;
7450 
7451         case 'U':
7452             settings.udpport = atoi(optarg);
7453             udp_specified = true;
7454             break;
7455         case 'p':
7456             settings.port = atoi(optarg);
7457             tcp_specified = true;
7458             break;
7459         case 's':
7460             settings.socketpath = optarg;
7461             break;
7462         case 'm':
7463             settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7464              old_opts += sprintf(old_opts, "cache_size=%lu;",
7465                                  (unsigned long)settings.maxbytes);
7466            break;
7467         case 'M':
7468             settings.evict_to_free = 0;
7469             old_opts += sprintf(old_opts, "eviction=false;");
7470             break;
7471         case 'c':
7472             settings.maxconns = atoi(optarg);
7473             break;
7474         case 'h':
7475             usage();
7476             exit(EXIT_SUCCESS);
7477         case 'i':
7478             usage_license();
7479             exit(EXIT_SUCCESS);
7480         case 'k':
7481             lock_memory = true;
7482             break;
7483         case 'v':
7484             settings.verbose++;
7485             perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7486             break;
7487         case 'l':
7488             if (settings.inter != NULL) {
7489                 size_t len = strlen(settings.inter) + strlen(optarg) + 2;
7490                 char *p = malloc(len);
7491                 if (p == NULL) {
7492                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7493                                                     "Failed to allocate memory\n");
7494                     return 1;
7495                 }
7496                 snprintf(p, len, "%s,%s", settings.inter, optarg);
7497                 free(settings.inter);
7498                 settings.inter = p;
7499             } else {
7500                 settings.inter= strdup(optarg);
7501             }
7502             break;
7503         case 'd':
7504             do_daemonize = true;
7505             break;
7506         case 'r':
7507             maxcore = 1;
7508             break;
7509         case 'R':
7510             settings.reqs_per_event = atoi(optarg);
7511             if (settings.reqs_per_event <= 0) {
7512                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7513                       "Number of requests per event must be greater than 0\n");
7514                 return 1;
7515             }
7516             break;
7517         case 'u':
7518             username = optarg;
7519             break;
7520         case 'P':
7521             pid_file = optarg;
7522             break;
7523         case 'f':
7524             settings.factor = atof(optarg);
7525             if (settings.factor <= 1.0) {
7526                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7527                         "Factor must be greater than 1\n");
7528                 return 1;
7529             }
7530              old_opts += sprintf(old_opts, "factor=%f;",
7531                                  settings.factor);
7532            break;
7533         case 'n':
7534             settings.chunk_size = atoi(optarg);
7535             if (settings.chunk_size == 0) {
7536                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7537                         "Chunk size must be greater than 0\n");
7538                 return 1;
7539             }
7540             old_opts += sprintf(old_opts, "chunk_size=%u;",
7541                                 settings.chunk_size);
7542             break;
7543         case 't':
7544             settings.num_threads = atoi(optarg);
7545             if (settings.num_threads <= 0) {
7546                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7547                         "Number of threads must be greater than 0\n");
7548                 return 1;
7549             }
7550             /* There're other problems when you get above 64 threads.
7551              * In the future we should portably detect # of cores for the
7552              * default.
7553              */
7554             if (settings.num_threads > 64) {
7555                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7556                         "WARNING: Setting a high number of worker"
7557                         "threads is not recommended.\n"
7558                         " Set this value to the number of cores in"
7559                         " your machine or less.\n");
7560             }
7561             break;
7562         case 'D':
7563             settings.prefix_delimiter = optarg[0];
7564             settings.detail_enabled = 1;
7565             break;
7566         case 'L' :
7567             if (enable_large_pages() == 0) {
7568                 preallocate = true;
7569                 old_opts += sprintf(old_opts, "preallocate=true;");
7570             }
7571             break;
7572         case 'C' :
7573             settings.use_cas = false;
7574             break;
7575         case 'b' :
7576             settings.backlog = atoi(optarg);
7577             break;
7578         case 'B':
7579             protocol_specified = true;
7580             if (strcmp(optarg, "auto") == 0) {
7581                 settings.binding_protocol = negotiating_prot;
7582             } else if (strcmp(optarg, "binary") == 0) {
7583                 settings.binding_protocol = binary_prot;
7584             } else if (strcmp(optarg, "ascii") == 0) {
7585                 settings.binding_protocol = ascii_prot;
7586             } else {
7587                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7588                         "Invalid value for binding protocol: %s\n"
7589                         " -- should be one of auto, binary, or ascii\n", optarg);
7590                 exit(EX_USAGE);
7591             }
7592             break;
7593         case 'I':
7594             unit = optarg[strlen(optarg)-1];
7595             if (unit == 'k' || unit == 'm' ||
7596                 unit == 'K' || unit == 'M') {
7597                 optarg[strlen(optarg)-1] = '\0';
7598                 size_max = atoi(optarg);
7599                 if (unit == 'k' || unit == 'K')
7600                     size_max *= 1024;
7601                 if (unit == 'm' || unit == 'M')
7602                     size_max *= 1024 * 1024;
7603                 settings.item_size_max = size_max;
7604             } else {
7605                 settings.item_size_max = atoi(optarg);
7606             }
7607             if (settings.item_size_max < 1024) {
7608                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7609                         "Item max size cannot be less than 1024 bytes.\n");
7610                 return 1;
7611             }
7612             if (settings.item_size_max > 1024 * 1024 * 128) {
7613                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7614                         "Cannot set item size limit higher than 128 mb.\n");
7615                 return 1;
7616             }
7617             if (settings.item_size_max > 1024 * 1024) {
7618                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7619                     "WARNING: Setting item max size above 1MB is not"
7620                     " recommended!\n"
7621                     " Raising this limit increases the minimum memory requirements\n"
7622                     " and will decrease your memory efficiency.\n"
7623                 );
7624             }
7625 #ifndef __WIN32__
7626             old_opts += sprintf(old_opts, "item_size_max=%zu;",
7627                                 settings.item_size_max);
7628 #else
7629             old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7630                                 settings.item_size_max);
7631 #endif
7632             break;
7633         case 'E':
7634             engine = optarg;
7635             break;
7636         case 'e':
7637             engine_config = optarg;
7638             break;
7639         case 'q':
7640             settings.allow_detailed = false;
7641             break;
7642         case 'S': /* set Sasl authentication to true. Default is false */
7643 #ifndef SASL_ENABLED
7644             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7645                     "This server is not built with SASL support.\n");
7646             exit(EX_USAGE);
7647 #endif
7648             settings.require_sasl = true;
7649             break;
7650         case 'X' :
7651             {
7652                 char *ptr = strchr(optarg, ',');
7653                 if (ptr != NULL) {
7654                     *ptr = '\0';
7655                     ++ptr;
7656                 }
7657                 if (!load_extension(optarg, ptr)) {
7658                     exit(EXIT_FAILURE);
7659                 }
7660                 if (ptr != NULL) {
7661                     *(ptr - 1) = ',';
7662                 }
7663             }
7664             break;
7665         default:
7666             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7667                     "Illegal argument \"%c\"\n", c);
7668             return 1;
7669         }
7670     }
7671 #endif /* INNODB_MEMCACHED */
7672 
7673     if (getenv("MEMCACHED_REQS_TAP_EVENT") != NULL) {
7674         settings.reqs_per_tap_event = atoi(getenv("MEMCACHED_REQS_TAP_EVENT"));
7675     }
7676 
7677     if (settings.reqs_per_tap_event <= 0) {
7678         settings.reqs_per_tap_event = DEFAULT_REQS_PER_TAP_EVENT;
7679     }
7680 
7681 
7682     if (install_sigterm_handler() != 0) {
7683         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7684                                         "Failed to install SIGTERM handler\n");
7685         exit(EXIT_FAILURE);
7686     }
7687 
7688     char *topkeys_env = getenv("MEMCACHED_TOP_KEYS");
7689     if (topkeys_env != NULL) {
7690         settings.topkeys = atoi(topkeys_env);
7691         if (settings.topkeys < 0) {
7692             settings.topkeys = 0;
7693         }
7694     }
7695 
7696     if (settings.require_sasl) {
7697         if (!protocol_specified) {
7698             settings.binding_protocol = binary_prot;
7699         } else {
7700             if (settings.binding_protocol == negotiating_prot) {
7701                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7702                         "ERROR: You cannot use auto-negotiating protocol while requiring SASL.\n");
7703                 exit(EX_USAGE);
7704             }
7705             if (settings.binding_protocol == ascii_prot) {
7706                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7707                         "ERROR: You cannot use only ASCII protocol while requiring SASL.\n");
7708                 exit(EX_USAGE);
7709             }
7710         }
7711     }
7712 
7713     if (tcp_specified && !udp_specified) {
7714         settings.udpport = settings.port;
7715     } else if (udp_specified && !tcp_specified) {
7716         settings.port = settings.udpport;
7717     }
7718 
7719     /*
7720     if (engine_config != NULL && strlen(old_options) > 0) {
7721         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7722                 "ERROR: You can't mix -e with the old options\n");
7723         return (NULL);
7724     } else if (engine_config == NULL && strlen(old_options) > 0) {
7725         engine_config = old_options;
7726     } */
7727 
7728     if (maxcore != 0) {
7729         struct rlimit rlim_new;
7730         /*
7731          * First try raising to infinity; if that fails, try bringing
7732          * the soft limit to the hard.
7733          */
7734         if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
7735             rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY;
7736             if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) {
7737                 /* failed. try raising just to the old max */
7738                 rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max;
7739                 (void)setrlimit(RLIMIT_CORE, &rlim_new);
7740             }
7741         }
7742         /*
7743          * getrlimit again to see what we ended up with. Only fail if
7744          * the soft limit ends up 0, because then no core files will be
7745          * created at all.
7746          */
7747 
7748         if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) {
7749             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7750                     "failed to ensure corefile creation\n");
7751             exit(EX_OSERR);
7752         }
7753     }
7754 
7755     /*
7756      * If needed, increase rlimits to allow as many connections
7757      * as needed.
7758      */
7759 
7760     if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7761         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7762                 "failed to getrlimit number of files\n");
7763         exit(EX_OSERR);
7764     } else {
7765         int maxfiles = settings.maxconns;
7766         if (rlim.rlim_cur < maxfiles)
7767             rlim.rlim_cur = maxfiles;
7768         if (rlim.rlim_max < rlim.rlim_cur)
7769             rlim.rlim_max = rlim.rlim_cur;
7770         if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7771             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7772                     "failed to set rlimit for open files. Try running as"
7773                     " root or requesting smaller maxconns value.\n");
7774             exit(EX_OSERR);
7775         }
7776     }
7777 
7778     /* Sanity check for the connection structures */
7779     int nfiles = 0;
7780     if (settings.port != 0) {
7781         nfiles += 2;
7782     }
7783     if (settings.udpport != 0) {
7784         nfiles += settings.num_threads * 2;
7785     }
7786 
7787     if (settings.maxconns <= nfiles) {
7788         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7789                 "Configuratioin error. \n"
7790                 "You specified %d connections, but the system will use at "
7791                 "least %d\nconnection structures to start.\n",
7792                 settings.maxconns, nfiles);
7793         exit(EX_USAGE);
7794     }
7795 
7796     /* lose root privileges if we have them */
7797     if (getuid() == 0 || geteuid() == 0) {
7798         if (username == 0 || *username == '\0') {
7799             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7800                     "can't run as root without the -u switch\n");
7801             exit(EX_USAGE);
7802         }
7803         if ((pw = getpwnam(username)) == 0) {
7804             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7805                     "can't find the user %s to switch to\n", username);
7806             exit(EX_NOUSER);
7807         }
7808         if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) {
7809             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7810                     "failed to assume identity of user %s: %s\n", username,
7811                     strerror(errno));
7812             exit(EX_OSERR);
7813         }
7814     }
7815 
7816 #ifdef SASL_ENABLED
7817     init_sasl();
7818 #endif /* SASL */
7819 
7820     /* daemonize if requested */
7821     /* if we want to ensure our ability to dump core, don't chdir to / */
7822     if (do_daemonize) {
7823         if (sigignore(SIGHUP) == -1) {
7824             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7825                     "Failed to ignore SIGHUP: ", strerror(errno));
7826         }
7827         if (daemonize(maxcore, settings.verbose) == -1) {
7828              settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7829                     "failed to daemon() in order to daemonize\n");
7830             exit(EXIT_FAILURE);
7831         }
7832     }
7833 
7834     /* lock paged memory if needed */
7835     if (lock_memory) {
7836 #ifdef HAVE_MLOCKALL
7837         int res = mlockall(MCL_CURRENT | MCL_FUTURE);
7838         if (res != 0) {
7839             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7840                     "warning: -k invalid, mlockall() failed: %s\n",
7841                     strerror(errno));
7842         }
7843 #else
7844         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7845                 "warning: -k invalid, mlockall() not supported on this platform.  proceeding without.\n");
7846 #endif
7847     }
7848 
7849     /* initialize main thread libevent instance */
7850     main_base = event_init();
7851 
7852     /* Load the storage engine */
7853     ENGINE_HANDLE *engine_handle = NULL;
7854     if (!load_engine(engine,get_server_api,settings.extensions.logger,&engine_handle)) {
7855         /* Error already reported */
7856         exit(EXIT_FAILURE);
7857     }
7858 
7859     if(!init_engine(engine_handle,engine_config,settings.extensions.logger)) {
7860 #ifdef INNODB_MEMCACHED
7861         shutdown_server();
7862         goto func_exit;
7863 #else
7864 	return(false);
7865 #endif /* INNODB_MEMCACHED */
7866     }
7867 
7868     if(settings.verbose > 0) {
7869         log_engine_details(engine_handle,settings.extensions.logger);
7870     }
7871     settings.engine.v1 = (ENGINE_HANDLE_V1 *) engine_handle;
7872 
7873     if (settings.engine.v1->arithmetic == NULL) {
7874         settings.engine.v1->arithmetic = internal_arithmetic;
7875     }
7876 
7877     /* initialize other stuff */
7878     stats_init();
7879 
7880     if (!(conn_cache = cache_create("conn", sizeof(conn), sizeof(void*),
7881                                     conn_constructor, conn_destructor))) {
7882         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7883                 "Failed to create connection cache\n");
7884         exit(EXIT_FAILURE);
7885     }
7886 
7887     default_independent_stats = new_independent_stats();
7888 
7889 #ifdef INNODB_MEMCACHED
7890     if (!default_independent_stats) {
7891 	exit(EXIT_FAILURE);
7892     }
7893 #endif
7894 
7895 #ifndef __WIN32__
7896     /*
7897      * ignore SIGPIPE signals; we can use errno == EPIPE if we
7898      * need that information
7899      */
7900     if (sigignore(SIGPIPE) == -1) {
7901         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7902                 "failed to ignore SIGPIPE; sigaction");
7903         exit(EX_OSERR);
7904     }
7905 #endif
7906 
7907     /* start up worker threads if MT mode */
7908     thread_init(settings.num_threads, main_base, dispatch_event_handler);
7909 
7910     /* initialise clock event */
7911     clock_handler(0, 0, 0);
7912 
7913     /* create unix mode sockets after dropping privileges */
7914     if (settings.socketpath != NULL) {
7915         if (server_socket_unix(settings.socketpath,settings.access)) {
7916             vperror("failed to listen on UNIX socket: %s", settings.socketpath);
7917             exit(EX_OSERR);
7918         }
7919     }
7920 
7921     /* create the listening socket, bind it, and init */
7922     if (settings.socketpath == NULL) {
7923         int udp_port;
7924 
7925         const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME");
7926         char temp_portnumber_filename[PATH_MAX];
7927         FILE *portnumber_file = NULL;
7928 
7929         if (portnumber_filename != NULL) {
7930             snprintf(temp_portnumber_filename,
7931                      sizeof(temp_portnumber_filename),
7932                      "%s.lck", portnumber_filename);
7933 
7934             portnumber_file = fopen(temp_portnumber_filename, "a");
7935             if (portnumber_file == NULL) {
7936                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7937                         "Failed to open \"%s\": %s\n",
7938                         temp_portnumber_filename, strerror(errno));
7939             }
7940         }
7941 
7942         if (settings.port && server_sockets(settings.port, tcp_transport,
7943                                             portnumber_file)) {
7944 		vperror("failed to listen on TCP port %d", settings.port);
7945 #ifdef INNODB_MEMCACHED
7946 		shutdown_server();
7947 		goto func_exit;
7948 #else
7949 		exit(EX_OSERR);
7950 #endif /* INNODB_MEMCACHED */
7951         }
7952 
7953         /*
7954          * initialization order: first create the listening sockets
7955          * (may need root on low ports), then drop root if needed,
7956          * then daemonise if needed, then init libevent (in some cases
7957          * descriptors created by libevent wouldn't survive forking).
7958          */
7959         udp_port = settings.udpport ? settings.udpport : settings.port;
7960 
7961         /* create the UDP listening socket and bind it */
7962         if (settings.udpport && server_sockets(settings.udpport, udp_transport,
7963                                                portnumber_file)) {
7964             vperror("failed to listen on UDP port %d", settings.udpport);
7965             exit(EX_OSERR);
7966         }
7967 
7968         if (portnumber_file) {
7969             fclose(portnumber_file);
7970             rename(temp_portnumber_filename, portnumber_filename);
7971         }
7972     }
7973 
7974     if (pid_file != NULL) {
7975         save_pid(pid_file);
7976     }
7977 
7978     /* Drop privileges no longer needed */
7979     drop_privileges();
7980 
7981     memcached_initialized = 1;
7982 
7983     /* enter the event loop */
7984     event_base_loop(main_base, 0);
7985 
7986     if (settings.verbose) {
7987         settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
7988                                         "Initiating shutdown\n");
7989     }
7990 
7991 func_exit:
7992 
7993     if (settings.engine.v1)
7994       settings.engine.v1->destroy(settings.engine.v0, false);
7995 
7996     threads_shutdown();
7997 
7998     /* remove the PID file if we're a daemon */
7999     if (do_daemonize)
8000         remove_pidfile(pid_file);
8001     /* Clean up strdup() call for bind() address */
8002     if (settings.inter)
8003       free(settings.inter);
8004 
8005 #ifdef INNODB_MEMCACHED
8006     /* free event base */
8007     if (main_base) {
8008         event_base_free(main_base);
8009         main_base = NULL;
8010     }
8011 #endif
8012 
8013     memcached_shutdown = 2;
8014     memcached_initialized = 2;
8015 
8016     return EXIT_SUCCESS;
8017 }
8018