1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *  memcached - memory caching daemon
4  *
5  *       http://www.danga.com/memcached/
6  *  Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
7  *  Copyright 2003 Danga Interactive, Inc.  All rights reserved.
8  *  This file was modified by Oracle on 28-08-2015 and 23-03-2016.
9  *  Modifications copyright (c) 2015, 2016, Oracle and/or its affiliates.
10  *  All rights reserved.
11  *
12  *  Use and distribution licensed under the BSD license.  See
13  *  the LICENSE file for full text.
14  *
15  *  Authors:
16  *      Anatoly Vorobey <mellon@pobox.com>
17  *      Brad Fitzpatrick <brad@danga.com>
18  *
19  *  Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
20  */
21 #include "config.h"
22 #include "config_static.h"
23 #include "memcached.h"
24 #include "memcached/extension_loggers.h"
25 #include "utilities/engine_loader.h"
26 
27 #include <signal.h>
28 #include <getopt.h>
29 #include <fcntl.h>
30 #include <errno.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <time.h>
35 #include <assert.h>
36 #include <limits.h>
37 #include <ctype.h>
38 #include <stdarg.h>
39 #include <stddef.h>
40 #include <dlfcn.h>
41 
42 #include "memcached_mysql.h"
43 
44 #define INNODB_MEMCACHED
45 void my_thread_init();
46 void my_thread_end();
47 
item_set_cas(const void * cookie,item * it,uint64_t cas)48 static inline void item_set_cas(const void *cookie, item *it, uint64_t cas) {
49     settings.engine.v1->item_set_cas(settings.engine.v0, cookie, it, cas);
50 }
51 
52 /* The item must always be called "it" */
53 #define SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
54     thread_stats->slab_stats[info.clsid].slab_op++;
55 
56 #define THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
57     thread_stats->thread_op++;
58 
59 #define THREAD_GUTS2(conn, thread_stats, slab_op, thread_op) \
60     thread_stats->slab_op++; \
61     thread_stats->thread_op++;
62 
63 #define SLAB_THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
64     SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
65     THREAD_GUTS(conn, thread_stats, slab_op, thread_op)
66 
67 #define STATS_INCR1(GUTS, conn, slab_op, thread_op, key, nkey) { \
68     struct independent_stats *independent_stats = get_independent_stats(conn); \
69     struct thread_stats *thread_stats = \
70         &independent_stats->thread_stats[conn->thread->index]; \
71     topkeys_t *topkeys = independent_stats->topkeys; \
72     pthread_mutex_lock(&thread_stats->mutex); \
73     GUTS(conn, thread_stats, slab_op, thread_op); \
74     pthread_mutex_unlock(&thread_stats->mutex); \
75     TK(topkeys, slab_op, key, nkey, current_time); \
76 }
77 
78 #define STATS_INCR(conn, op, key, nkey) \
79     STATS_INCR1(THREAD_GUTS, conn, op, op, key, nkey)
80 
81 #define SLAB_INCR(conn, op, key, nkey) \
82     STATS_INCR1(SLAB_GUTS, conn, op, op, key, nkey)
83 
84 #define STATS_TWO(conn, slab_op, thread_op, key, nkey) \
85     STATS_INCR1(THREAD_GUTS2, conn, slab_op, thread_op, key, nkey)
86 
87 #define SLAB_TWO(conn, slab_op, thread_op, key, nkey) \
88     STATS_INCR1(SLAB_THREAD_GUTS, conn, slab_op, thread_op, key, nkey)
89 
90 #define STATS_HIT(conn, op, key, nkey) \
91     SLAB_TWO(conn, op##_hits, cmd_##op, key, nkey)
92 
93 #define STATS_MISS(conn, op, key, nkey) \
94     STATS_TWO(conn, op##_misses, cmd_##op, key, nkey)
95 
96 #if defined(HAVE_GCC_SYNC_BUILTINS)
97 
98 #define STATS_NOKEY(conn, op)	\
99 do { \
100     struct thread_stats *thread_stats = \
101         get_thread_stats(conn); \
102 	__sync_add_and_fetch(&thread_stats->op, 1); \
103 } while (0)
104 
105 #define STATS_NOKEY2(conn, op1, op2)	\
106 do { \
107     struct thread_stats *thread_stats = \
108         get_thread_stats(conn); \
109 	__sync_add_and_fetch(&thread_stats->op1, 1); \
110 	__sync_add_and_fetch(&thread_stats->op2, 1); \
111 } while (0)
112 
113 #define STATS_ADD(conn, op, amt)	\
114 do { \
115     struct thread_stats *thread_stats = \
116         get_thread_stats(conn); \
117 	__sync_add_and_fetch(&thread_stats->op, amt); \
118 } while (0)
119 
120 #define MEMCACHED_ATOMIC_MSG	"InnoDB MEMCACHED: Memcached uses atomic increment \n"
121 
122 #else /* HAVE_GCC_SYNC_BUILTINS */
123 #define STATS_NOKEY(conn, op) { \
124     struct thread_stats *thread_stats = \
125         get_thread_stats(conn); \
126     pthread_mutex_lock(&thread_stats->mutex); \
127     thread_stats->op++; \
128     pthread_mutex_unlock(&thread_stats->mutex); \
129 }
130 
131 #define STATS_NOKEY2(conn, op1, op2) { \
132     struct thread_stats *thread_stats = \
133         get_thread_stats(conn); \
134     pthread_mutex_lock(&thread_stats->mutex); \
135     thread_stats->op1++; \
136     thread_stats->op2++; \
137     pthread_mutex_unlock(&thread_stats->mutex); \
138 }
139 
140 #define STATS_ADD(conn, op, amt) { \
141     struct thread_stats *thread_stats = \
142         get_thread_stats(conn); \
143     pthread_mutex_lock(&thread_stats->mutex); \
144     thread_stats->op += amt; \
145     pthread_mutex_unlock(&thread_stats->mutex); \
146 }
147 
148 #define MEMCACHED_ATOMIC_MSG	"InnoDB Memcached: Memcached DOES NOT use atomic increment"
149 #endif /* HAVE_GCC_SYNC_BUILTINS */
150 
151 volatile sig_atomic_t memcached_shutdown;
152 volatile sig_atomic_t memcached_initialized;
153 
154 /*
155  * We keep the current time of day in a global variable that's updated by a
156  * timer event. This saves us a bunch of time() system calls (we really only
157  * need to get the time once a second, whereas there can be tens of thousands
158  * of requests a second) and allows us to use server-start-relative timestamps
159  * rather than absolute UNIX timestamps, a space savings on systems where
160  * sizeof(time_t) > sizeof(unsigned int).
161  */
162 volatile rel_time_t current_time;
163 
164 /*
165  * forward declarations
166  */
167 static SOCKET new_socket(struct addrinfo *ai);
168 static int try_read_command(conn *c);
169 static inline struct independent_stats *get_independent_stats(conn *c);
170 static inline struct thread_stats *get_thread_stats(conn *c);
171 static void register_callback(ENGINE_HANDLE *eh,
172                               ENGINE_EVENT_TYPE type,
173                               EVENT_CALLBACK cb, const void *cb_data);
174 enum try_read_result {
175     READ_DATA_RECEIVED,
176     READ_NO_DATA_RECEIVED,
177     READ_ERROR,            /** an error occurred (on the socket) (or client closed connection) */
178     READ_MEMORY_ERROR      /** failed to allocate more memory */
179 };
180 
181 static enum try_read_result try_read_network(conn *c);
182 static enum try_read_result try_read_udp(conn *c);
183 
184 /* stats */
185 static void stats_init(void);
186 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate);
187 static void process_stat_settings(ADD_STAT add_stats, void *c);
188 
189 
190 /* defaults */
191 static void settings_init(void);
192 
193 /* event handling, network IO */
194 static void event_handler(const int fd, const short which, void *arg);
195 static void complete_nread(conn *c);
196 static char *process_command(conn *c, char *command);
197 static void write_and_free(conn *c, char *buf, int bytes);
198 static int ensure_iov_space(conn *c);
199 static int add_iov(conn *c, const void *buf, int len);
200 static int add_msghdr(conn *c);
201 
202 
203 /* time handling */
204 static void set_current_time(void);  /* update the global variable holding
205                               global 32-bit seconds-since-start time
206                               (to avoid 64 bit time_t) */
207 
208 /** exported globals **/
209 struct stats stats;
210 struct settings settings;
211 static time_t process_started;     /* when the process was started */
212 
213 /** file scope variables **/
214 static conn *listen_conn = NULL;
215 static int  udp_socket[100];
216 static int  num_udp_socket;
217 static struct event_base *main_base;
218 static struct independent_stats *default_independent_stats;
219 
220 static struct engine_event_handler *engine_event_handlers[MAX_ENGINE_EVENT_TYPE + 1];
221 
222 enum transmit_result {
223     TRANSMIT_COMPLETE,   /** All done writing. */
224     TRANSMIT_INCOMPLETE, /** More data remaining to write. */
225     TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */
226     TRANSMIT_HARD_ERROR  /** Can't write (c->state is set to conn_closing) */
227 };
228 
229 static enum transmit_result transmit(conn *c);
230 
231 #define REALTIME_MAXDELTA 60*60*24*30
232 
233 // Perform all callbacks of a given type for the given connection.
perform_callbacks(ENGINE_EVENT_TYPE type,const void * data,const void * c)234 static void perform_callbacks(ENGINE_EVENT_TYPE type,
235                               const void *data,
236                               const void *c) {
237     for (struct engine_event_handler *h = engine_event_handlers[type];
238          h; h = h->next) {
239         h->cb(c, type, data, h->cb_data);
240     }
241 }
242 
243 /*
244  * given time value that's either unix time or delta from current unix time,
245  * return unix time. Use the fact that delta can't exceed one month
246  * (and real time value can't be that low).
247  */
realtime(const time_t exptime)248 static rel_time_t realtime(const time_t exptime) {
249     /* no. of seconds in 30 days - largest possible delta exptime */
250 
251     if (exptime == 0) return 0; /* 0 means never expire */
252 
253     if (exptime > REALTIME_MAXDELTA) {
254         /* if item expiration is at/before the server started, give it an
255            expiration time of 1 second after the server started.
256            (because 0 means don't expire).  without this, we'd
257            underflow and wrap around to some large value way in the
258            future, effectively making items expiring in the past
259            really expiring never */
260         if (exptime <= process_started)
261             return (rel_time_t)1;
262         return (rel_time_t)(exptime - process_started);
263     } else {
264         return (rel_time_t)(exptime + current_time);
265     }
266 }
267 
268 /**
269  * Convert the relative time to an absolute time (relative to EPOC ;) )
270  */
abstime(const rel_time_t exptime)271 static time_t abstime(const rel_time_t exptime)
272 {
273     return process_started + exptime;
274 }
275 
stats_init(void)276 static void stats_init(void) {
277     stats.daemon_conns = 0;
278     stats.rejected_conns = 0;
279     stats.curr_conns = stats.total_conns = stats.conn_structs = 0;
280 
281     stats_prefix_init();
282 }
283 
stats_reset(const void * cookie)284 static void stats_reset(const void *cookie) {
285     struct conn *conn = (struct conn*)cookie;
286     STATS_LOCK();
287     stats.rejected_conns = 0;
288     stats.total_conns = 0;
289     stats_prefix_clear();
290     STATS_UNLOCK();
291     threadlocal_stats_reset(get_independent_stats(conn)->thread_stats);
292     settings.engine.v1->reset_stats(settings.engine.v0, cookie);
293 }
294 
settings_init(void)295 static void settings_init(void) {
296     settings.use_cas = true;
297     settings.access = 0700;
298     settings.port = 11211;
299     settings.udpport = 11211;
300     /* By default this string should be NULL for getaddrinfo() */
301     settings.inter = NULL;
302     settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
303     settings.maxconns = 1000;         /* to limit connections-related memory to about 5MB */
304     settings.verbose = 0;
305     settings.oldest_live = 0;
306     settings.evict_to_free = 1;       /* push old items out of cache when memory runs out */
307     settings.socketpath = NULL;       /* by default, not using a unix socket */
308     settings.factor = 1.25;
309     settings.chunk_size = 48;         /* space for a modest key and value */
310     settings.num_threads = 4;         /* N workers */
311     settings.num_threads_per_udp = 0;
312     settings.prefix_delimiter = ':';
313     settings.detail_enabled = 0;
314     settings.allow_detailed = true;
315     settings.reqs_per_event = DEFAULT_REQS_PER_EVENT;
316     settings.backlog = 1024;
317     settings.binding_protocol = negotiating_prot;
318     settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
319     settings.topkeys = 0;
320     settings.require_sasl = false;
321     settings.extensions.logger = get_stderr_logger();
322 }
323 
324 /*
325  * Adds a message header to a connection.
326  *
327  * Returns 0 on success, -1 on out-of-memory.
328  */
add_msghdr(conn * c)329 static int add_msghdr(conn *c)
330 {
331     struct msghdr *msg;
332 
333     assert(c != NULL);
334 
335     if (c->msgsize == c->msgused) {
336         msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr));
337         if (! msg)
338             return -1;
339         c->msglist = msg;
340         c->msgsize *= 2;
341     }
342 
343     msg = c->msglist + c->msgused;
344 
345     /* this wipes msg_iovlen, msg_control, msg_controllen, and
346        msg_flags, the last 3 of which aren't defined on solaris: */
347     memset(msg, 0, sizeof(struct msghdr));
348 
349     msg->msg_iov = &c->iov[c->iovused];
350 
351     if (c->request_addr_size > 0) {
352         msg->msg_name = &c->request_addr;
353         msg->msg_namelen = c->request_addr_size;
354     }
355 
356     c->msgbytes = 0;
357     c->msgused++;
358 
359     if (IS_UDP(c->transport)) {
360         /* Leave room for the UDP header, which we'll fill in later. */
361         return add_iov(c, NULL, UDP_HEADER_SIZE);
362     }
363 
364     return 0;
365 }
366 
prot_text(enum protocol prot)367 static const char *prot_text(enum protocol prot) {
368     char *rv = "unknown";
369     switch(prot) {
370         case ascii_prot:
371             rv = "ascii";
372             break;
373         case binary_prot:
374             rv = "binary";
375             break;
376         case negotiating_prot:
377             rv = "auto-negotiate";
378             break;
379     }
380     return rv;
381 }
382 
383 struct {
384     pthread_mutex_t mutex;
385     bool disabled;
386     ssize_t count;
387     uint64_t num_disable;
388 } listen_state;
389 
is_listen_disabled(void)390 static bool is_listen_disabled(void) {
391     bool ret;
392     pthread_mutex_lock(&listen_state.mutex);
393     ret = listen_state.disabled;
394     pthread_mutex_unlock(&listen_state.mutex);
395     return ret;
396 }
397 
get_listen_disabled_num(void)398 static uint64_t get_listen_disabled_num(void) {
399     uint64_t ret;
400     pthread_mutex_lock(&listen_state.mutex);
401     ret = listen_state.num_disable;
402     pthread_mutex_unlock(&listen_state.mutex);
403     return ret;
404 }
405 
disable_listen(void)406 static void disable_listen(void) {
407     pthread_mutex_lock(&listen_state.mutex);
408     listen_state.disabled = true;
409     listen_state.count = 10;
410     ++listen_state.num_disable;
411     pthread_mutex_unlock(&listen_state.mutex);
412 
413     conn *next;
414     for (next = listen_conn; next; next = next->next) {
415         update_event(next, 0);
416         if (listen(next->sfd, 1) != 0) {
417             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
418                                             "listen() failed",
419                                             strerror(errno));
420         }
421     }
422 }
423 
safe_close(SOCKET sfd)424 void safe_close(SOCKET sfd) {
425     if (sfd != INVALID_SOCKET) {
426         int rval;
427         while ((rval = closesocket(sfd)) == SOCKET_ERROR &&
428                (errno == EINTR || errno == EAGAIN)) {
429             /* go ahead and retry */
430         }
431 
432         if (rval == SOCKET_ERROR) {
433             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
434                                             "Failed to close socket %d (%s)!!\n", (int)sfd,
435                                             strerror(errno));
436         } else {
437             STATS_LOCK();
438             stats.curr_conns--;
439             STATS_UNLOCK();
440 
441             if (is_listen_disabled()) {
442                 notify_dispatcher();
443             }
444         }
445     }
446 }
447 
448 /*
449  * Free list management for connections.
450  */
451 cache_t *conn_cache;      /* suffix cache */
452 
453 /**
454  * Reset all of the dynamic buffers used by a connection back to their
455  * default sizes. The strategy for resizing the buffers is to allocate a
456  * new one of the correct size and free the old one if the allocation succeeds
457  * instead of using realloc to change the buffer size (because realloc may
458  * not shrink the buffers, and will also copy the memory). If the allocation
459  * fails the buffer will be unchanged.
460  *
461  * @param c the connection to resize the buffers for
462  * @return true if all allocations succeeded, false if one or more of the
463  *         allocations failed.
464  */
conn_reset_buffersize(conn * c)465 static bool conn_reset_buffersize(conn *c) {
466     bool ret = true;
467 
468     if (c->rsize != DATA_BUFFER_SIZE) {
469         void *ptr = malloc(DATA_BUFFER_SIZE);
470         if (ptr != NULL) {
471             free(c->rbuf);
472             c->rbuf = ptr;
473             c->rsize = DATA_BUFFER_SIZE;
474         } else {
475             ret = false;
476         }
477     }
478 
479     if (c->wsize != DATA_BUFFER_SIZE) {
480         void *ptr = malloc(DATA_BUFFER_SIZE);
481         if (ptr != NULL) {
482             free(c->wbuf);
483             c->wbuf = ptr;
484             c->wsize = DATA_BUFFER_SIZE;
485         } else {
486             ret = false;
487         }
488     }
489 
490     if (c->isize != ITEM_LIST_INITIAL) {
491         void *ptr = malloc(sizeof(item *) * ITEM_LIST_INITIAL);
492         if (ptr != NULL) {
493             free(c->ilist);
494             c->ilist = ptr;
495             c->isize = ITEM_LIST_INITIAL;
496         } else {
497             ret = false;
498         }
499     }
500 
501     if (c->suffixsize != SUFFIX_LIST_INITIAL) {
502         void *ptr = malloc(sizeof(char *) * SUFFIX_LIST_INITIAL);
503         if (ptr != NULL) {
504             free(c->suffixlist);
505             c->suffixlist = ptr;
506             c->suffixsize = SUFFIX_LIST_INITIAL;
507         } else {
508             ret = false;
509         }
510     }
511 
512     if (c->iovsize != IOV_LIST_INITIAL) {
513         void *ptr = malloc(sizeof(struct iovec) * IOV_LIST_INITIAL);
514         if (ptr != NULL) {
515             free(c->iov);
516             c->iov = ptr;
517             c->iovsize = IOV_LIST_INITIAL;
518         } else {
519             ret = false;
520         }
521     }
522 
523     if (c->msgsize != MSG_LIST_INITIAL) {
524         void *ptr = malloc(sizeof(struct msghdr) * MSG_LIST_INITIAL);
525         if (ptr != NULL) {
526             free(c->msglist);
527             c->msglist = ptr;
528             c->msgsize = MSG_LIST_INITIAL;
529         } else {
530             ret = false;
531         }
532     }
533 
534     return ret;
535 }
536 
537 /**
538  * Constructor for all memory allocations of connection objects. Initialize
539  * all members and allocate the transfer buffers.
540  *
541  * @param buffer The memory allocated by the object cache
542  * @param unused1 not used
543  * @param unused2 not used
544  * @return 0 on success, 1 if we failed to allocate memory
545  */
conn_constructor(void * buffer,void * unused1,int unused2)546 static int conn_constructor(void *buffer, void *unused1, int unused2) {
547     (void)unused1; (void)unused2;
548 
549     conn *c = buffer;
550     memset(c, 0, sizeof(*c));
551     MEMCACHED_CONN_CREATE(c);
552 
553     if (!conn_reset_buffersize(c)) {
554         free(c->rbuf);
555         free(c->wbuf);
556         free(c->ilist);
557         free(c->suffixlist);
558         free(c->iov);
559         free(c->msglist);
560         settings.extensions.logger->log(EXTENSION_LOG_WARNING,
561                                         NULL,
562                                         "Failed to allocate buffers for connection\n");
563         return 1;
564     }
565 
566     STATS_LOCK();
567     stats.conn_structs++;
568     STATS_UNLOCK();
569 
570     return 0;
571 }
572 
573 /**
574  * Destructor for all connection objects. Release all allocated resources.
575  *
576  * @param buffer The memory allocated by the objec cache
577  * @param unused not used
578  */
conn_destructor(void * buffer,void * unused)579 static void conn_destructor(void *buffer, void *unused) {
580     (void)unused;
581     conn *c = buffer;
582     free(c->rbuf);
583     free(c->wbuf);
584     free(c->ilist);
585     free(c->suffixlist);
586     free(c->iov);
587     free(c->msglist);
588 
589     STATS_LOCK();
590     stats.conn_structs--;
591     STATS_UNLOCK();
592 }
593 
conn_new(const SOCKET sfd,STATE_FUNC init_state,const int event_flags,const int read_buffer_size,enum network_transport transport,struct event_base * base,struct timeval * timeout)594 conn *conn_new(const SOCKET sfd, STATE_FUNC init_state,
595                const int event_flags,
596                const int read_buffer_size, enum network_transport transport,
597                struct event_base *base, struct timeval *timeout) {
598     conn *c = cache_alloc(conn_cache);
599     if (c == NULL) {
600         return NULL;
601     }
602 
603     assert(c->thread == NULL);
604 
605     if (c->rsize < read_buffer_size) {
606         void *mem = malloc(read_buffer_size);
607         if (mem) {
608             c->rsize = read_buffer_size;
609             free(c->rbuf);
610             c->rbuf = mem;
611         } else {
612             assert(c->thread == NULL);
613             cache_free(conn_cache, c);
614             return NULL;
615         }
616     }
617 
618     c->transport = transport;
619     c->protocol = settings.binding_protocol;
620 
621     /* unix socket mode doesn't need this, so zeroed out.  but why
622      * is this done for every command?  presumably for UDP
623      * mode.  */
624     if (!settings.socketpath) {
625         c->request_addr_size = sizeof(c->request_addr);
626     } else {
627         c->request_addr_size = 0;
628     }
629 
630     if (settings.verbose > 1) {
631         if (init_state == conn_listening) {
632             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
633                                             "<%d server listening (%s)\n", sfd,
634                                             prot_text(c->protocol));
635         } else if (IS_UDP(transport)) {
636             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
637                                             "<%d server listening (udp)\n", sfd);
638         } else if (c->protocol == negotiating_prot) {
639             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
640                                             "<%d new auto-negotiating client connection\n",
641                                             sfd);
642         } else if (c->protocol == ascii_prot) {
643             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
644                                             "<%d new ascii client connection.\n", sfd);
645         } else if (c->protocol == binary_prot) {
646             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
647                                             "<%d new binary client connection.\n", sfd);
648         } else {
649             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
650                                             "<%d new unknown (%d) client connection\n",
651                                             sfd, c->protocol);
652             assert(false);
653         }
654     }
655 
656     c->sfd = sfd;
657     c->state = init_state;
658     c->rlbytes = 0;
659     c->cmd = -1;
660     c->ascii_cmd = NULL;
661     c->rbytes = c->wbytes = 0;
662     c->wcurr = c->wbuf;
663     c->rcurr = c->rbuf;
664     c->ritem = 0;
665     c->icurr = c->ilist;
666     c->suffixcurr = c->suffixlist;
667     c->ileft = 0;
668     c->suffixleft = 0;
669     c->iovused = 0;
670     c->msgcurr = 0;
671     c->msgused = 0;
672     c->next = NULL;
673     c->list_state = 0;
674 
675     c->write_and_go = init_state;
676     c->write_and_free = 0;
677     c->item = 0;
678 
679     c->noreply = false;
680 
681     event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
682     event_base_set(base, &c->event);
683     c->ev_flags = event_flags;
684 
685     if (!register_event(c, timeout)) {
686         assert(c->thread == NULL);
687         cache_free(conn_cache, c);
688         return NULL;
689     }
690 
691     STATS_LOCK();
692     stats.total_conns++;
693     STATS_UNLOCK();
694 
695     c->aiostat = ENGINE_SUCCESS;
696     c->ewouldblock = false;
697     c->refcount = 1;
698 
699     MEMCACHED_CONN_ALLOCATE(c->sfd);
700 
701     perform_callbacks(ON_CONNECT, NULL, c);
702 
703     return c;
704 }
705 
conn_cleanup(conn * c)706 static void conn_cleanup(conn *c) {
707     assert(c != NULL);
708 
709     if (c->item) {
710         settings.engine.v1->release(settings.engine.v0, c, c->item);
711         c->item = 0;
712     }
713 
714     if (c->ileft != 0) {
715         for (; c->ileft > 0; c->ileft--,c->icurr++) {
716             settings.engine.v1->release(settings.engine.v0, c, *(c->icurr));
717         }
718     }
719 
720     if (c->suffixleft != 0) {
721         for (; c->suffixleft > 0; c->suffixleft--, c->suffixcurr++) {
722             cache_free(c->thread->suffix_cache, *(c->suffixcurr));
723         }
724     }
725 
726     if (c->write_and_free) {
727         free(c->write_and_free);
728         c->write_and_free = 0;
729     }
730 
731     if (c->sasl_conn) {
732         sasl_dispose(&c->sasl_conn);
733         c->sasl_conn = NULL;
734     }
735 
736     if (c->engine_storage) {
737 	void* cleanup_data = c->engine_storage;
738 	c->engine_storage = NULL;
739 	settings.engine.v1->clean_engine(settings.engine.v0, c, cleanup_data);
740     }
741 
742     c->tap_iterator = NULL;
743     c->thread = NULL;
744     assert(c->next == NULL);
745     c->ascii_cmd = NULL;
746     c->sfd = INVALID_SOCKET;
747     c->tap_nack_mode = false;
748 }
749 
conn_close(conn * c)750 void conn_close(conn *c) {
751     assert(c != NULL);
752     assert(c->sfd == INVALID_SOCKET);
753 
754     if (c->ascii_cmd != NULL) {
755         c->ascii_cmd->abort(c->ascii_cmd, c);
756     }
757 
758     assert(c->thread);
759     LOCK_THREAD(c->thread);
760     /* remove from pending-io list */
761     if (settings.verbose > 1 && list_contains(c->thread->pending_io, c)) {
762         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
763                                         "Current connection was in the pending-io list.. Nuking it\n");
764     }
765     c->thread->pending_io = list_remove(c->thread->pending_io, c);
766     c->thread->pending_close = list_remove(c->thread->pending_close, c);
767     UNLOCK_THREAD(c->thread);
768 
769     conn_cleanup(c);
770 
771     /*
772      * The contract with the object cache is that we should return the
773      * object in a constructed state. Reset the buffers to the default
774      * size
775      */
776     conn_reset_buffersize(c);
777     assert(c->thread == NULL);
778     cache_free(conn_cache, c);
779 }
780 
781 /*
782  * Shrinks a connection's buffers if they're too big.  This prevents
783  * periodic large "get" requests from permanently chewing lots of server
784  * memory.
785  *
786  * This should only be called in between requests since it can wipe output
787  * buffers!
788  */
conn_shrink(conn * c)789 static void conn_shrink(conn *c) {
790     assert(c != NULL);
791 
792     if (IS_UDP(c->transport))
793         return;
794 
795     if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) {
796         char *newbuf;
797 
798         if (c->rcurr != c->rbuf)
799             memmove(c->rbuf, c->rcurr, (size_t)c->rbytes);
800 
801         newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE);
802 
803         if (newbuf) {
804             c->rbuf = newbuf;
805             c->rsize = DATA_BUFFER_SIZE;
806         }
807         /* TODO check other branch... */
808         c->rcurr = c->rbuf;
809     }
810 
811     if (c->isize > ITEM_LIST_HIGHWAT) {
812         item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0]));
813         if (newbuf) {
814             c->ilist = newbuf;
815             c->isize = ITEM_LIST_INITIAL;
816         }
817     /* TODO check error condition? */
818     }
819 
820     if (c->msgsize > MSG_LIST_HIGHWAT) {
821         struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0]));
822         if (newbuf) {
823             c->msglist = newbuf;
824             c->msgsize = MSG_LIST_INITIAL;
825         }
826     /* TODO check error condition? */
827     }
828 
829     if (c->iovsize > IOV_LIST_HIGHWAT) {
830         struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0]));
831         if (newbuf) {
832             c->iov = newbuf;
833             c->iovsize = IOV_LIST_INITIAL;
834         }
835     /* TODO check return value */
836     }
837 }
838 
839 /**
840  * Convert a state name to a human readable form.
841  */
state_text(STATE_FUNC state)842 const char *state_text(STATE_FUNC state) {
843     if (state == conn_listening) {
844         return "conn_listening";
845     } else if (state == conn_new_cmd) {
846         return "conn_new_cmd";
847     } else if (state == conn_waiting) {
848         return "conn_waiting";
849     } else if (state == conn_read) {
850         return "conn_read";
851     } else if (state == conn_parse_cmd) {
852         return "conn_parse_cmd";
853     } else if (state == conn_write) {
854         return "conn_write";
855     } else if (state == conn_nread) {
856         return "conn_nread";
857     } else if (state == conn_swallow) {
858         return "conn_swallow";
859     } else if (state == conn_closing) {
860         return "conn_closing";
861     } else if (state == conn_mwrite) {
862         return "conn_mwrite";
863     } else if (state == conn_ship_log) {
864         return "conn_ship_log";
865     } else if (state == conn_add_tap_client) {
866         return "conn_add_tap_client";
867     } else if (state == conn_setup_tap_stream) {
868         return "conn_setup_tap_stream";
869     } else if (state == conn_pending_close) {
870         return "conn_pending_close";
871     } else if (state == conn_immediate_close) {
872         return "conn_immediate_close";
873     } else {
874         return "Unknown";
875     }
876 }
877 
878 /*
879  * Sets a connection's current state in the state machine. Any special
880  * processing that needs to happen on certain state transitions can
881  * happen here.
882  */
conn_set_state(conn * c,STATE_FUNC state)883 void conn_set_state(conn *c, STATE_FUNC state) {
884     assert(c != NULL);
885 
886     if (state != c->state) {
887         /*
888          * The connections in the "tap thread" behaves differently than
889          * normal connections because they operate in a full duplex mode.
890          * New messages may appear from both sides, so we can't block on
891          * read from the nework / engine
892          */
893         if (c->thread == tap_thread) {
894             if (state == conn_waiting) {
895                 c->which = EV_WRITE;
896                 state = conn_ship_log;
897             }
898         }
899 
900         if (settings.verbose > 2 || c->state == conn_closing
901             || c->state == conn_add_tap_client) {
902             settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
903                                             "%d: going from %s to %s\n",
904                                             c->sfd, state_text(c->state),
905                                             state_text(state));
906         }
907 
908         c->state = state;
909 
910         if (state == conn_write || state == conn_mwrite) {
911             MEMCACHED_PROCESS_COMMAND_END(c->sfd, c->wbuf, c->wbytes);
912         }
913     }
914 }
915 
916 /*
917  * Ensures that there is room for another struct iovec in a connection's
918  * iov list.
919  *
920  * Returns 0 on success, -1 on out-of-memory.
921  */
ensure_iov_space(conn * c)922 static int ensure_iov_space(conn *c) {
923     assert(c != NULL);
924 
925     if (c->iovused >= c->iovsize) {
926         int i, iovnum;
927         struct iovec *new_iov = (struct iovec *)realloc(c->iov,
928                                 (c->iovsize * 2) * sizeof(struct iovec));
929         if (! new_iov)
930             return -1;
931         c->iov = new_iov;
932         c->iovsize *= 2;
933 
934         /* Point all the msghdr structures at the new list. */
935         for (i = 0, iovnum = 0; i < c->msgused; i++) {
936             c->msglist[i].msg_iov = &c->iov[iovnum];
937             iovnum += c->msglist[i].msg_iovlen;
938         }
939     }
940 
941     return 0;
942 }
943 
944 
945 /*
946  * Adds data to the list of pending data that will be written out to a
947  * connection.
948  *
949  * Returns 0 on success, -1 on out-of-memory.
950  */
951 
add_iov(conn * c,const void * buf,int len)952 static int add_iov(conn *c, const void *buf, int len) {
953     struct msghdr *m;
954     int leftover;
955     bool limit_to_mtu;
956 
957     assert(c != NULL);
958 
959     do {
960         m = &c->msglist[c->msgused - 1];
961 
962         /*
963          * Limit UDP packets, and the first payloads of TCP replies, to
964          * UDP_MAX_PAYLOAD_SIZE bytes.
965          */
966         limit_to_mtu = IS_UDP(c->transport) || (1 == c->msgused);
967 
968         /* We may need to start a new msghdr if this one is full. */
969         if (m->msg_iovlen == IOV_MAX ||
970             (limit_to_mtu && c->msgbytes >= UDP_MAX_PAYLOAD_SIZE)) {
971             add_msghdr(c);
972             m = &c->msglist[c->msgused - 1];
973         }
974 
975         if (ensure_iov_space(c) != 0)
976             return -1;
977 
978         /* If the fragment is too big to fit in the datagram, split it up */
979         if (limit_to_mtu && len + c->msgbytes > UDP_MAX_PAYLOAD_SIZE) {
980             leftover = len + c->msgbytes - UDP_MAX_PAYLOAD_SIZE;
981             len -= leftover;
982         } else {
983             leftover = 0;
984         }
985 
986         m = &c->msglist[c->msgused - 1];
987         m->msg_iov[m->msg_iovlen].iov_base = (void *)buf;
988         m->msg_iov[m->msg_iovlen].iov_len = len;
989 
990         c->msgbytes += len;
991         c->iovused++;
992         m->msg_iovlen++;
993 
994         buf = ((char *)buf) + len;
995         len = leftover;
996     } while (leftover > 0);
997 
998     return 0;
999 }
1000 
1001 
1002 /*
1003  * Constructs a set of UDP headers and attaches them to the outgoing messages.
1004  */
build_udp_headers(conn * c)1005 static int build_udp_headers(conn *c) {
1006     int i;
1007     unsigned char *hdr;
1008 
1009     assert(c != NULL);
1010 
1011     if (c->msgused > c->hdrsize) {
1012         void *new_hdrbuf;
1013         if (c->hdrbuf)
1014             new_hdrbuf = realloc(c->hdrbuf, c->msgused * 2 * UDP_HEADER_SIZE);
1015         else
1016             new_hdrbuf = malloc(c->msgused * 2 * UDP_HEADER_SIZE);
1017         if (! new_hdrbuf)
1018             return -1;
1019         c->hdrbuf = (unsigned char *)new_hdrbuf;
1020         c->hdrsize = c->msgused * 2;
1021     }
1022 
1023     hdr = c->hdrbuf;
1024     for (i = 0; i < c->msgused; i++) {
1025         c->msglist[i].msg_iov[0].iov_base = (void*)hdr;
1026         c->msglist[i].msg_iov[0].iov_len = UDP_HEADER_SIZE;
1027         *hdr++ = c->request_id / 256;
1028         *hdr++ = c->request_id % 256;
1029         *hdr++ = i / 256;
1030         *hdr++ = i % 256;
1031         *hdr++ = c->msgused / 256;
1032         *hdr++ = c->msgused % 256;
1033         *hdr++ = 0;
1034         *hdr++ = 0;
1035         assert((void *) hdr == (caddr_t)c->msglist[i].msg_iov[0].iov_base + UDP_HEADER_SIZE);
1036     }
1037 
1038     return 0;
1039 }
1040 
1041 
out_string(conn * c,const char * str)1042 static void out_string(conn *c, const char *str) {
1043     size_t len;
1044 
1045     assert(c != NULL);
1046 
1047     if (c->noreply) {
1048         if (settings.verbose > 1) {
1049             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1050                                             ">%d NOREPLY %s\n", c->sfd, str);
1051         }
1052         c->noreply = false;
1053         if (c->sbytes > 0) {
1054             conn_set_state(c, conn_swallow);
1055         } else {
1056             conn_set_state(c, conn_new_cmd);
1057         }
1058         return;
1059     }
1060 
1061     if (settings.verbose > 1) {
1062         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1063                                         ">%d %s\n", c->sfd, str);
1064     }
1065 
1066     /* Nuke a partial output... */
1067     c->msgcurr = 0;
1068     c->msgused = 0;
1069     c->iovused = 0;
1070     add_msghdr(c);
1071 
1072     len = strlen(str);
1073     if ((len + 2) > c->wsize) {
1074         /* ought to be always enough. just fail for simplicity */
1075         str = "SERVER_ERROR output line too long";
1076         len = strlen(str);
1077     }
1078 
1079     memcpy(c->wbuf, str, len);
1080     memcpy(c->wbuf + len, "\r\n", 2);
1081     c->wbytes = len + 2;
1082     c->wcurr = c->wbuf;
1083 
1084     conn_set_state(c, conn_write);
1085 
1086     if (c->sbytes > 0) {
1087         c->write_and_go = conn_swallow;
1088     } else {
1089         c->write_and_go = conn_new_cmd;
1090     }
1091 
1092     return;
1093 }
1094 
1095 /*
1096  * we get here after reading the value in set/add/replace commands. The command
1097  * has been stored in c->cmd, and the item is ready in c->item.
1098  */
complete_update_ascii(conn * c)1099 static void complete_update_ascii(conn *c) {
1100     assert(c != NULL);
1101 
1102     item *it = c->item;
1103     item_info info = { .nvalue = 1 };
1104     if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1105         settings.engine.v1->release(settings.engine.v0, c, it);
1106         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1107                                         "%d: Failed to get item info\n",
1108                                         c->sfd);
1109         out_string(c, "SERVER_ERROR failed to get item details");
1110         return;
1111     }
1112 
1113     c->sbytes = 2; // swallow \r\n
1114     ENGINE_ERROR_CODE ret = c->aiostat;
1115     c->aiostat = ENGINE_SUCCESS;
1116     if (ret == ENGINE_SUCCESS) {
1117         ret = settings.engine.v1->store(settings.engine.v0, c, it, &c->cas,
1118                                         c->store_op, 0);
1119     }
1120 
1121 #ifdef ENABLE_DTRACE
1122     switch (c->store_op) {
1123     case OPERATION_ADD:
1124         MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1125                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1126         break;
1127     case OPERATION_REPLACE:
1128         MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1129                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1130         break;
1131     case OPERATION_APPEND:
1132         MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1133                                  (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1134         break;
1135     case OPERATION_PREPEND:
1136         MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1137                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1138         break;
1139     case OPERATION_SET:
1140         MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1141                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1142         break;
1143     case OPERATION_CAS:
1144         MEMCACHED_COMMAND_CAS(c->sfd, info.key, info.nkey, info.nbytes, c->cas);
1145         break;
1146     }
1147 #endif
1148 
1149     switch (ret) {
1150     case ENGINE_SUCCESS:
1151         out_string(c, "STORED");
1152         break;
1153     case ENGINE_KEY_EEXISTS:
1154         out_string(c, "EXISTS");
1155         break;
1156     case ENGINE_KEY_ENOENT:
1157         out_string(c, "NOT_FOUND");
1158         break;
1159     case ENGINE_NOT_STORED:
1160         out_string(c, "NOT_STORED");
1161         break;
1162     case ENGINE_DISCONNECT:
1163         c->state = conn_closing;
1164         break;
1165     case ENGINE_ENOTSUP:
1166         out_string(c, "SERVER_ERROR not supported");
1167         break;
1168     case ENGINE_ENOMEM:
1169         out_string(c, "SERVER_ERROR out of memory");
1170         break;
1171     case ENGINE_TMPFAIL:
1172         out_string(c, "SERVER_ERROR temporary failure");
1173         break;
1174     case ENGINE_EINVAL:
1175         out_string(c, "CLIENT_ERROR invalid arguments");
1176         break;
1177     case ENGINE_E2BIG:
1178         out_string(c, "CLIENT_ERROR value too big");
1179         break;
1180     case ENGINE_EACCESS:
1181         out_string(c, "CLIENT_ERROR access control violation");
1182         break;
1183     case ENGINE_NOT_MY_VBUCKET:
1184         out_string(c, "SERVER_ERROR not my vbucket");
1185         break;
1186     case ENGINE_FAILED:
1187         out_string(c, "SERVER_ERROR failure");
1188         break;
1189     case ENGINE_EWOULDBLOCK:
1190         c->ewouldblock = true;
1191         break;
1192     case ENGINE_WANT_MORE:
1193         assert(false);
1194         c->state = conn_closing;
1195         break;
1196 
1197     default:
1198         out_string(c, "SERVER_ERROR internal");
1199     }
1200 
1201     if (c->store_op == OPERATION_CAS) {
1202         switch (ret) {
1203         case ENGINE_SUCCESS:
1204             SLAB_INCR(c, cas_hits, info.key, info.nkey);
1205             break;
1206         case ENGINE_KEY_EEXISTS:
1207             SLAB_INCR(c, cas_badval, info.key, info.nkey);
1208             break;
1209         case ENGINE_KEY_ENOENT:
1210             STATS_NOKEY(c, cas_misses);
1211             break;
1212         default:
1213             ;
1214         }
1215     } else {
1216         SLAB_INCR(c, cmd_set, info.key, info.nkey);
1217     }
1218 
1219     if (!c->ewouldblock) {
1220         /* release the c->item reference */
1221         settings.engine.v1->release(settings.engine.v0, c, c->item);
1222         c->item = 0;
1223     }
1224 }
1225 
1226 /**
1227  * get a pointer to the start of the request struct for the current command
1228  */
binary_get_request(conn * c)1229 static void* binary_get_request(conn *c) {
1230     char *ret = c->rcurr;
1231     ret -= (sizeof(c->binary_header) + c->binary_header.request.keylen +
1232             c->binary_header.request.extlen);
1233 
1234     assert(ret >= c->rbuf);
1235     return ret;
1236 }
1237 
1238 /**
1239  * get a pointer to the key in this request
1240  */
binary_get_key(conn * c)1241 static char* binary_get_key(conn *c) {
1242     return c->rcurr - (c->binary_header.request.keylen);
1243 }
1244 
1245 /**
1246  * Insert a key into a buffer, but replace all non-printable characters
1247  * with a '.'.
1248  *
1249  * @param dest where to store the output
1250  * @param destsz size of destination buffer
1251  * @param prefix string to insert before the data
1252  * @param client the client we are serving
1253  * @param from_client set to true if this data is from the client
1254  * @param key the key to add to the buffer
1255  * @param nkey the number of bytes in the key
1256  * @return number of bytes in dest if success, -1 otherwise
1257  */
key_to_printable_buffer(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * key,size_t nkey)1258 static ssize_t key_to_printable_buffer(char *dest, size_t destsz,
1259                                        int client, bool from_client,
1260                                        const char *prefix,
1261                                        const char *key,
1262                                        size_t nkey)
1263 {
1264     ssize_t nw = snprintf(dest, destsz, "%c%d %s ", from_client ? '>' : '<',
1265                           client, prefix);
1266     if (nw == -1) {
1267         return -1;
1268     }
1269 
1270     char *ptr = dest + nw;
1271     destsz -= nw;
1272     if (nkey > destsz) {
1273         nkey = destsz;
1274     }
1275 
1276     for (ssize_t ii = 0; ii < nkey; ++ii, ++key, ++ptr) {
1277         if (isgraph(*key)) {
1278             *ptr = *key;
1279         } else {
1280             *ptr = '.';
1281         }
1282     }
1283 
1284     *ptr = '\0';
1285     return ptr - dest;
1286 }
1287 
1288 /**
1289  * Convert a byte array to a text string
1290  *
1291  * @param dest where to store the output
1292  * @param destsz size of destination buffer
1293  * @param prefix string to insert before the data
1294  * @param client the client we are serving
1295  * @param from_client set to true if this data is from the client
1296  * @param data the data to add to the buffer
1297  * @param size the number of bytes in data to print
1298  * @return number of bytes in dest if success, -1 otherwise
1299  */
bytes_to_output_string(char * dest,size_t destsz,int client,bool from_client,const char * prefix,const char * data,size_t size)1300 static ssize_t bytes_to_output_string(char *dest, size_t destsz,
1301                                       int client, bool from_client,
1302                                       const char *prefix,
1303                                       const char *data,
1304                                       size_t size)
1305 {
1306     ssize_t nw = snprintf(dest, destsz, "%c%d %s", from_client ? '>' : '<',
1307                           client, prefix);
1308     if (nw == -1) {
1309         return -1;
1310     }
1311     ssize_t offset = nw;
1312 
1313     for (ssize_t ii = 0; ii < size; ++ii) {
1314         if (ii % 4 == 0) {
1315             if ((nw = snprintf(dest + offset, destsz - offset, "\n%c%d  ",
1316                                from_client ? '>' : '<', client)) == -1) {
1317                 return  -1;
1318             }
1319             offset += nw;
1320         }
1321         if ((nw = snprintf(dest + offset, destsz - offset,
1322                            " 0x%02x", (unsigned char)data[ii])) == -1) {
1323             return -1;
1324         }
1325         offset += nw;
1326     }
1327 
1328     if ((nw = snprintf(dest + offset, destsz - offset, "\n")) == -1) {
1329         return -1;
1330     }
1331 
1332     return offset + nw;
1333 }
1334 
add_bin_header(conn * c,uint16_t err,uint8_t hdr_len,uint16_t key_len,uint32_t body_len)1335 static void add_bin_header(conn *c, uint16_t err, uint8_t hdr_len, uint16_t key_len, uint32_t body_len) {
1336     protocol_binary_response_header* header;
1337 
1338     assert(c);
1339 
1340     c->msgcurr = 0;
1341     c->msgused = 0;
1342     c->iovused = 0;
1343     if (add_msghdr(c) != 0) {
1344         /* XXX:  out_string is inappropriate here */
1345         out_string(c, "SERVER_ERROR out of memory");
1346         return;
1347     }
1348 
1349     header = (protocol_binary_response_header *)c->wbuf;
1350 
1351     header->response.magic = (uint8_t)PROTOCOL_BINARY_RES;
1352     header->response.opcode = c->binary_header.request.opcode;
1353     header->response.keylen = (uint16_t)htons(key_len);
1354 
1355     header->response.extlen = (uint8_t)hdr_len;
1356     header->response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES;
1357     header->response.status = (uint16_t)htons(err);
1358 
1359     header->response.bodylen = htonl(body_len);
1360     header->response.opaque = c->opaque;
1361     header->response.cas = htonll(c->cas);
1362 
1363     if (settings.verbose > 1) {
1364         char buffer[1024];
1365         if (bytes_to_output_string(buffer, sizeof(buffer), c->sfd, false,
1366                                    "Writing bin response:",
1367                                    (const char*)header->bytes,
1368                                    sizeof(header->bytes)) != -1) {
1369             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1370                                             "%s", buffer);
1371         }
1372     }
1373 
1374     add_iov(c, c->wbuf, sizeof(header->response));
1375 }
1376 
1377 /**
1378  * Convert an error code generated from the storage engine to the corresponding
1379  * error code used by the protocol layer.
1380  * @param e the error code as used in the engine
1381  * @return the error code as used by the protocol layer
1382  */
engine_error_2_protocol_error(ENGINE_ERROR_CODE e)1383 static protocol_binary_response_status engine_error_2_protocol_error(ENGINE_ERROR_CODE e) {
1384     protocol_binary_response_status ret;
1385 
1386     switch (e) {
1387     case ENGINE_SUCCESS:
1388         return PROTOCOL_BINARY_RESPONSE_SUCCESS;
1389     case ENGINE_KEY_ENOENT:
1390         return PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1391     case ENGINE_KEY_EEXISTS:
1392         return PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1393     case ENGINE_ENOMEM:
1394         return PROTOCOL_BINARY_RESPONSE_ENOMEM;
1395     case ENGINE_TMPFAIL:
1396         return PROTOCOL_BINARY_RESPONSE_ETMPFAIL;
1397     case ENGINE_NOT_STORED:
1398         return PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1399     case ENGINE_EINVAL:
1400         return PROTOCOL_BINARY_RESPONSE_EINVAL;
1401     case ENGINE_ENOTSUP:
1402         return PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED;
1403     case ENGINE_E2BIG:
1404         return PROTOCOL_BINARY_RESPONSE_E2BIG;
1405     case ENGINE_NOT_MY_VBUCKET:
1406         return PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET;
1407     default:
1408         ret = PROTOCOL_BINARY_RESPONSE_EINTERNAL;
1409     }
1410 
1411     return ret;
1412 }
1413 
write_bin_packet(conn * c,protocol_binary_response_status err,int swallow)1414 static void write_bin_packet(conn *c, protocol_binary_response_status err, int swallow) {
1415     ssize_t len;
1416     char buffer[1024] = { [sizeof(buffer) - 1] = '\0' };
1417 
1418     switch (err) {
1419     case PROTOCOL_BINARY_RESPONSE_SUCCESS:
1420         len = 0;
1421         break;
1422     case PROTOCOL_BINARY_RESPONSE_ENOMEM:
1423         len = snprintf(buffer, sizeof(buffer), "Out of memory");
1424         break;
1425     case PROTOCOL_BINARY_RESPONSE_ETMPFAIL:
1426         len = snprintf(buffer, sizeof(buffer), "Temporary failure");
1427         break;
1428     case PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND:
1429         len = snprintf(buffer, sizeof(buffer), "Unknown command");
1430         break;
1431     case PROTOCOL_BINARY_RESPONSE_KEY_ENOENT:
1432         len = snprintf(buffer, sizeof(buffer), "Not found");
1433         break;
1434     case PROTOCOL_BINARY_RESPONSE_EINVAL:
1435         len = snprintf(buffer, sizeof(buffer), "Invalid arguments");
1436         break;
1437     case PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS:
1438         len = snprintf(buffer, sizeof(buffer), "Data exists for key");
1439         break;
1440     case PROTOCOL_BINARY_RESPONSE_E2BIG:
1441         len = snprintf(buffer, sizeof(buffer), "Too large");
1442         break;
1443     case PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL:
1444         len = snprintf(buffer, sizeof(buffer),
1445                        "Non-numeric server-side value for incr or decr");
1446         break;
1447     case PROTOCOL_BINARY_RESPONSE_NOT_STORED:
1448         len = snprintf(buffer, sizeof(buffer), "Not stored");
1449         break;
1450     case PROTOCOL_BINARY_RESPONSE_AUTH_ERROR:
1451         len = snprintf(buffer, sizeof(buffer), "Auth failure");
1452         break;
1453     case PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED:
1454         len = snprintf(buffer, sizeof(buffer), "Not supported");
1455         break;
1456     case PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET:
1457         len = snprintf(buffer, sizeof(buffer),
1458                        "I'm not responsible for this vbucket");
1459         break;
1460 
1461     default:
1462         len = snprintf(buffer, sizeof(buffer), "UNHANDLED ERROR (%d)", err);
1463         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1464                                         ">%d UNHANDLED ERROR: %d\n", c->sfd, err);
1465     }
1466 
1467     /* Allow the engine to pass extra error information */
1468     if (settings.engine.v1->errinfo != NULL) {
1469         size_t elen = settings.engine.v1->errinfo(settings.engine.v0, c, buffer + len + 2,
1470                                                   sizeof(buffer) - len - 3);
1471 
1472         if (elen > 0) {
1473             memcpy(buffer + len, ": ", 2);
1474             len += elen + 2;
1475         }
1476     }
1477 
1478     if (err != PROTOCOL_BINARY_RESPONSE_SUCCESS && settings.verbose > 1) {
1479         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
1480                                         ">%d Writing an error: %s\n", c->sfd,
1481                                         buffer);
1482     }
1483 
1484     add_bin_header(c, err, 0, 0, len);
1485     if (len > 0) {
1486         add_iov(c, buffer, len);
1487     }
1488     conn_set_state(c, conn_mwrite);
1489     if (swallow > 0) {
1490         c->sbytes = swallow;
1491         c->write_and_go = conn_swallow;
1492     } else {
1493         c->write_and_go = conn_new_cmd;
1494     }
1495 }
1496 
1497 /* Form and send a response to a command over the binary protocol */
write_bin_response(conn * c,void * d,int hlen,int keylen,int dlen)1498 static void write_bin_response(conn *c, void *d, int hlen, int keylen, int dlen) {
1499     if (!c->noreply || c->cmd == PROTOCOL_BINARY_CMD_GET ||
1500         c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1501         add_bin_header(c, 0, hlen, keylen, dlen);
1502         if(dlen > 0) {
1503             add_iov(c, d, dlen);
1504         }
1505         conn_set_state(c, conn_mwrite);
1506         c->write_and_go = conn_new_cmd;
1507     } else {
1508         conn_set_state(c, conn_new_cmd);
1509     }
1510 }
1511 
1512 
complete_incr_bin(conn * c)1513 static void complete_incr_bin(conn *c) {
1514     protocol_binary_response_incr* rsp = (protocol_binary_response_incr*)c->wbuf;
1515     protocol_binary_request_incr* req = binary_get_request(c);
1516 
1517     assert(c != NULL);
1518     assert(c->wsize >= sizeof(*rsp));
1519 
1520     /* fix byteorder in the request */
1521     uint64_t delta = ntohll(req->message.body.delta);
1522     uint64_t initial = ntohll(req->message.body.initial);
1523     rel_time_t expiration = ntohl(req->message.body.expiration);
1524     char *key = binary_get_key(c);
1525     size_t nkey = c->binary_header.request.keylen;
1526     bool incr = (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT ||
1527                  c->cmd == PROTOCOL_BINARY_CMD_INCREMENTQ);
1528 
1529     if (settings.verbose > 1) {
1530         char buffer[1024];
1531         ssize_t nw;
1532         nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1533                                      incr ? "INCR" : "DECR", key, nkey);
1534         if (nw != -1) {
1535             if (snprintf(buffer + nw, sizeof(buffer) - nw,
1536                          " %" PRIu64 ", %" PRIu64 ", %" PRIu64 "\n",
1537                          delta, initial, (uint64_t)expiration) != -1) {
1538                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
1539                                                 buffer);
1540             }
1541         }
1542     }
1543 
1544     ENGINE_ERROR_CODE ret = c->aiostat;
1545     c->aiostat = ENGINE_SUCCESS;
1546     if (ret == ENGINE_SUCCESS) {
1547         ret = settings.engine.v1->arithmetic(settings.engine.v0,
1548                                              c, key, nkey, incr,
1549                                              req->message.body.expiration != 0xffffffff,
1550                                              delta, initial, expiration,
1551                                              &c->cas,
1552                                              &rsp->message.body.value,
1553                                              c->binary_header.request.vbucket);
1554     }
1555 
1556     switch (ret) {
1557     case ENGINE_SUCCESS:
1558         rsp->message.body.value = htonll(rsp->message.body.value);
1559         write_bin_response(c, &rsp->message.body, 0, 0,
1560                            sizeof (rsp->message.body.value));
1561         if (incr) {
1562             STATS_INCR(c, incr_hits, key, nkey);
1563         } else {
1564             STATS_INCR(c, decr_hits, key, nkey);
1565         }
1566         break;
1567     case ENGINE_KEY_EEXISTS:
1568         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1569         break;
1570     case ENGINE_KEY_ENOENT:
1571         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1572         if (c->cmd == PROTOCOL_BINARY_CMD_INCREMENT) {
1573             STATS_INCR(c, incr_misses, key, nkey);
1574         } else {
1575             STATS_INCR(c, decr_misses, key, nkey);
1576         }
1577         break;
1578     case ENGINE_ENOMEM:
1579         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1580         break;
1581     case ENGINE_TMPFAIL:
1582         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1583         break;
1584     case ENGINE_EINVAL:
1585         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_DELTA_BADVAL, 0);
1586         break;
1587     case ENGINE_NOT_STORED:
1588         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_STORED, 0);
1589         break;
1590     case ENGINE_DISCONNECT:
1591         c->state = conn_closing;
1592         break;
1593     case ENGINE_ENOTSUP:
1594         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1595         break;
1596     case ENGINE_NOT_MY_VBUCKET:
1597         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1598         break;
1599     case ENGINE_EWOULDBLOCK:
1600         c->ewouldblock = true;
1601         break;
1602     default:
1603         abort();
1604     }
1605 }
1606 
complete_update_bin(conn * c)1607 static void complete_update_bin(conn *c) {
1608     protocol_binary_response_status eno = PROTOCOL_BINARY_RESPONSE_EINVAL;
1609     assert(c != NULL);
1610 
1611     item *it = c->item;
1612     item_info info = { .nvalue = 1 };
1613     if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1614         settings.engine.v1->release(settings.engine.v0, c, it);
1615         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1616                                         "%d: Failed to get item info\n",
1617                                         c->sfd);
1618         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1619         return;
1620     }
1621 
1622     ENGINE_ERROR_CODE ret = c->aiostat;
1623     c->aiostat = ENGINE_SUCCESS;
1624     if (ret == ENGINE_SUCCESS) {
1625         ret = settings.engine.v1->store(settings.engine.v0, c,
1626                                         it, &c->cas, c->store_op,
1627                                         c->binary_header.request.vbucket);
1628     }
1629 
1630 #ifdef ENABLE_DTRACE
1631     switch (c->cmd) {
1632     case OPERATION_ADD:
1633         MEMCACHED_COMMAND_ADD(c->sfd, info.key, info.nkey,
1634                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1635         break;
1636     case OPERATION_REPLACE:
1637         MEMCACHED_COMMAND_REPLACE(c->sfd, info.key, info.nkey,
1638                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1639         break;
1640     case OPERATION_APPEND:
1641         MEMCACHED_COMMAND_APPEND(c->sfd, info.key, info.nkey,
1642                                  (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1643         break;
1644     case OPERATION_PREPEND:
1645         MEMCACHED_COMMAND_PREPEND(c->sfd, info.key, info.nkey,
1646                                   (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1647         break;
1648     case OPERATION_SET:
1649         MEMCACHED_COMMAND_SET(c->sfd, info.key, info.nkey,
1650                               (ret == ENGINE_SUCCESS) ? info.nbytes : -1, c->cas);
1651         break;
1652     }
1653 #endif
1654 
1655     switch (ret) {
1656     case ENGINE_SUCCESS:
1657         /* Stored */
1658         write_bin_response(c, NULL, 0, 0, 0);
1659         break;
1660     case ENGINE_KEY_EEXISTS:
1661         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
1662         break;
1663     case ENGINE_KEY_ENOENT:
1664         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1665         break;
1666     case ENGINE_ENOMEM:
1667         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1668         break;
1669     case ENGINE_TMPFAIL:
1670         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
1671         break;
1672     case ENGINE_EWOULDBLOCK:
1673         c->ewouldblock = true;
1674         break;
1675     case ENGINE_DISCONNECT:
1676         c->state = conn_closing;
1677         break;
1678     case ENGINE_ENOTSUP:
1679         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1680         break;
1681     case ENGINE_NOT_MY_VBUCKET:
1682         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1683         break;
1684     default:
1685         if (c->store_op == OPERATION_ADD) {
1686             eno = PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS;
1687         } else if(c->store_op == OPERATION_REPLACE) {
1688             eno = PROTOCOL_BINARY_RESPONSE_KEY_ENOENT;
1689         } else {
1690             eno = PROTOCOL_BINARY_RESPONSE_NOT_STORED;
1691         }
1692         write_bin_packet(c, eno, 0);
1693     }
1694 
1695     if (c->store_op == OPERATION_CAS) {
1696         switch (ret) {
1697         case ENGINE_SUCCESS:
1698             SLAB_INCR(c, cas_hits, info.key, info.nkey);
1699             break;
1700         case ENGINE_KEY_EEXISTS:
1701             SLAB_INCR(c, cas_badval, info.key, info.nkey);
1702             break;
1703         case ENGINE_KEY_ENOENT:
1704             STATS_NOKEY(c, cas_misses);
1705             break;
1706         default:
1707             ;
1708         }
1709     } else {
1710         SLAB_INCR(c, cmd_set, info.key, info.nkey);
1711     }
1712 
1713     if (!c->ewouldblock) {
1714         /* release the c->item reference */
1715         settings.engine.v1->release(settings.engine.v0, c, c->item);
1716         c->item = 0;
1717     }
1718 }
1719 
process_bin_get(conn * c)1720 static void process_bin_get(conn *c) {
1721     item *it = NULL;
1722 
1723     protocol_binary_response_get* rsp = (protocol_binary_response_get*)c->wbuf;
1724     char* key = binary_get_key(c);
1725     size_t nkey = c->binary_header.request.keylen;
1726 
1727     if (settings.verbose > 1) {
1728         char buffer[1024];
1729         if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1730                                     "GET", key, nkey) != -1) {
1731             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1732                                             buffer);
1733         }
1734     }
1735 
1736     ENGINE_ERROR_CODE ret = c->aiostat;
1737     c->aiostat = ENGINE_SUCCESS;
1738     if (ret == ENGINE_SUCCESS) {
1739         ret = settings.engine.v1->get(settings.engine.v0, c, &it, key, nkey,
1740                                       c->binary_header.request.vbucket);
1741     }
1742 
1743     uint16_t keylen;
1744     uint32_t bodylen;
1745     item_info info = { .nvalue = 1 };
1746 
1747     switch (ret) {
1748     case ENGINE_SUCCESS:
1749         if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
1750             settings.engine.v1->release(settings.engine.v0, c, it);
1751             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1752                                             "%d: Failed to get item info\n",
1753                                             c->sfd);
1754             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
1755             break;
1756         }
1757 
1758         keylen = 0;
1759         bodylen = sizeof(rsp->message.body) + info.nbytes;
1760 
1761         STATS_HIT(c, get, key, nkey);
1762 
1763         if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1764             bodylen += nkey;
1765             keylen = nkey;
1766         }
1767         add_bin_header(c, 0, sizeof(rsp->message.body), keylen, bodylen);
1768         rsp->message.header.response.cas = htonll(info.cas);
1769 
1770         // add the flags
1771         rsp->message.body.flags = info.flags;
1772         add_iov(c, &rsp->message.body, sizeof(rsp->message.body));
1773 
1774         if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1775             add_iov(c, info.key, nkey);
1776         }
1777 
1778         add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
1779         conn_set_state(c, conn_mwrite);
1780         /* Remember this item so we can garbage collect it later */
1781         c->item = it;
1782         break;
1783     case ENGINE_KEY_ENOENT:
1784         STATS_MISS(c, get, key, nkey);
1785 
1786         MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
1787 
1788         if (c->noreply) {
1789             conn_set_state(c, conn_new_cmd);
1790         } else {
1791             if (c->cmd == PROTOCOL_BINARY_CMD_GETK) {
1792                 char *ofs = c->wbuf + sizeof(protocol_binary_response_header);
1793                 add_bin_header(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT,
1794                                0, nkey, nkey);
1795                 memcpy(ofs, key, nkey);
1796                 add_iov(c, ofs, nkey);
1797                 conn_set_state(c, conn_mwrite);
1798             } else {
1799                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1800             }
1801         }
1802         break;
1803     case ENGINE_EWOULDBLOCK:
1804         c->ewouldblock = true;
1805         break;
1806     case ENGINE_DISCONNECT:
1807         c->state = conn_closing;
1808         break;
1809     case ENGINE_TMPFAIL:
1810 	break;
1811     case ENGINE_ENOTSUP:
1812         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
1813         break;
1814     case ENGINE_NOT_MY_VBUCKET:
1815         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
1816         break;
1817     default:
1818         /* @todo add proper error handling! */
1819         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
1820                                         "Unknown error code: %d\n", ret);
1821         abort();
1822     }
1823 
1824     if (settings.detail_enabled && ret != ENGINE_EWOULDBLOCK) {
1825         stats_prefix_record_get(key, nkey, ret == ENGINE_SUCCESS);
1826     }
1827 }
1828 
append_bin_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1829 static void append_bin_stats(const char *key, const uint16_t klen,
1830                              const char *val, const uint32_t vlen,
1831                              conn *c) {
1832     char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1833     uint32_t bodylen = klen + vlen;
1834     protocol_binary_response_header header = {
1835         .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
1836         .response.opcode = PROTOCOL_BINARY_CMD_STAT,
1837         .response.keylen = (uint16_t)htons(klen),
1838         .response.datatype = (uint8_t)PROTOCOL_BINARY_RAW_BYTES,
1839         .response.bodylen = htonl(bodylen),
1840         .response.opaque = c->opaque
1841     };
1842 
1843     memcpy(buf, header.bytes, sizeof(header.response));
1844     buf += sizeof(header.response);
1845 
1846     if (klen > 0) {
1847         memcpy(buf, key, klen);
1848         buf += klen;
1849 
1850         if (vlen > 0) {
1851             memcpy(buf, val, vlen);
1852         }
1853     }
1854 
1855     c->dynamic_buffer.offset += sizeof(header.response) + bodylen;
1856 }
1857 
1858 /**
1859  * Append a key-value pair to the stats output buffer. This function assumes
1860  * that the output buffer is big enough (it will be if you call it through
1861  * append_stats)
1862  */
append_ascii_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,conn * c)1863 static void append_ascii_stats(const char *key, const uint16_t klen,
1864                                const char *val, const uint32_t vlen,
1865                                conn *c) {
1866     char *pos = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
1867     uint32_t nbytes = 5; /* "END\r\n" or "STAT " */
1868 
1869     if (klen == 0 && vlen == 0) {
1870         memcpy(pos, "END\r\n", 5);
1871     } else {
1872         memcpy(pos, "STAT ", 5);
1873         memcpy(pos + nbytes, key, klen);
1874         nbytes += klen;
1875         if (vlen != 0) {
1876             pos[nbytes] = ' ';
1877             ++nbytes;
1878             memcpy(pos + nbytes, val, vlen);
1879             nbytes += vlen;
1880         }
1881         memcpy(pos + nbytes, "\r\n", 2);
1882         nbytes += 2;
1883     }
1884 
1885     c->dynamic_buffer.offset += nbytes;
1886 }
1887 
grow_dynamic_buffer(conn * c,size_t needed)1888 static bool grow_dynamic_buffer(conn *c, size_t needed) {
1889     size_t nsize = c->dynamic_buffer.size;
1890     size_t available = nsize - c->dynamic_buffer.offset;
1891     bool rv = true;
1892 
1893     /* Special case: No buffer -- need to allocate fresh */
1894     if (c->dynamic_buffer.buffer == NULL) {
1895         nsize = 1024;
1896         available = c->dynamic_buffer.size = c->dynamic_buffer.offset = 0;
1897     }
1898 
1899     while (needed > available) {
1900         assert(nsize > 0);
1901         nsize = nsize << 1;
1902         available = nsize - c->dynamic_buffer.offset;
1903     }
1904 
1905     if (nsize != c->dynamic_buffer.size) {
1906         char *ptr = realloc(c->dynamic_buffer.buffer, nsize);
1907         if (ptr) {
1908             c->dynamic_buffer.buffer = ptr;
1909             c->dynamic_buffer.size = nsize;
1910         } else {
1911             rv = false;
1912         }
1913     }
1914 
1915     return rv;
1916 }
1917 
append_stats(const char * key,const uint16_t klen,const char * val,const uint32_t vlen,const void * cookie)1918 static void append_stats(const char *key, const uint16_t klen,
1919                          const char *val, const uint32_t vlen,
1920                          const void *cookie)
1921 {
1922     /* value without a key is invalid */
1923     if (klen == 0 && vlen > 0) {
1924         return ;
1925     }
1926 
1927     conn *c = (conn*)cookie;
1928 
1929     if (c->protocol == binary_prot) {
1930         size_t needed = vlen + klen + sizeof(protocol_binary_response_header);
1931         if (!grow_dynamic_buffer(c, needed)) {
1932             return ;
1933         }
1934         append_bin_stats(key, klen, val, vlen, c);
1935     } else {
1936         size_t needed = vlen + klen + 10; // 10 == "STAT = \r\n"
1937         if (!grow_dynamic_buffer(c, needed)) {
1938             return ;
1939         }
1940         append_ascii_stats(key, klen, val, vlen, c);
1941     }
1942 
1943     assert(c->dynamic_buffer.offset <= c->dynamic_buffer.size);
1944 }
1945 
process_bin_stat(conn * c)1946 static void process_bin_stat(conn *c) {
1947     char *subcommand = binary_get_key(c);
1948     size_t nkey = c->binary_header.request.keylen;
1949 
1950     if (settings.verbose > 1) {
1951         char buffer[1024];
1952         if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
1953                                     "STATS", subcommand, nkey) != -1) {
1954             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
1955                                             buffer);
1956         }
1957     }
1958 
1959     ENGINE_ERROR_CODE ret = c->aiostat;
1960     c->aiostat = ENGINE_SUCCESS;
1961     c->ewouldblock = false;
1962 
1963     if (ret == ENGINE_SUCCESS) {
1964         if (nkey == 0) {
1965             /* request all statistics */
1966             ret = settings.engine.v1->get_stats(settings.engine.v0, c, NULL, 0, append_stats);
1967             if (ret == ENGINE_SUCCESS) {
1968                 server_stats(&append_stats, c, false);
1969             }
1970         } else if (strncmp(subcommand, "reset", 5) == 0) {
1971             stats_reset(c);
1972             settings.engine.v1->reset_stats(settings.engine.v0, c);
1973         } else if (strncmp(subcommand, "settings", 8) == 0) {
1974             process_stat_settings(&append_stats, c);
1975         } else if (strncmp(subcommand, "detail", 6) == 0) {
1976             char *subcmd_pos = subcommand + 6;
1977             if (settings.allow_detailed) {
1978                 if (strncmp(subcmd_pos, " dump", 5) == 0) {
1979                     int len;
1980                     char *dump_buf = stats_prefix_dump(&len);
1981                     if (dump_buf == NULL || len <= 0) {
1982                         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1983                         return ;
1984                     } else {
1985                         append_stats("detailed", strlen("detailed"), dump_buf, len, c);
1986                         free(dump_buf);
1987                     }
1988                 } else if (strncmp(subcmd_pos, " on", 3) == 0) {
1989                     settings.detail_enabled = 1;
1990                 } else if (strncmp(subcmd_pos, " off", 4) == 0) {
1991                     settings.detail_enabled = 0;
1992                 } else {
1993                     write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
1994                     return;
1995                 }
1996             } else {
1997                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
1998                 return;
1999             }
2000         } else if (strncmp(subcommand, "aggregate", 9) == 0) {
2001             server_stats(&append_stats, c, true);
2002         } else if (strncmp(subcommand, "topkeys", 7) == 0) {
2003             topkeys_t *tk = get_independent_stats(c)->topkeys;
2004             if (tk != NULL) {
2005                 topkeys_stats(tk, c, current_time, append_stats);
2006             } else {
2007                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2008                 return;
2009             }
2010         } else {
2011             ret = settings.engine.v1->get_stats(settings.engine.v0, c,
2012                                                 subcommand, nkey,
2013                                                 append_stats);
2014         }
2015     }
2016 
2017     switch (ret) {
2018     case ENGINE_SUCCESS:
2019         append_stats(NULL, 0, NULL, 0, c);
2020         write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2021         c->dynamic_buffer.buffer = NULL;
2022         break;
2023     case ENGINE_ENOMEM:
2024         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, 0);
2025         break;
2026     case ENGINE_TMPFAIL:
2027         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ETMPFAIL, 0);
2028         break;
2029     case ENGINE_KEY_ENOENT:
2030         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
2031         break;
2032     case ENGINE_DISCONNECT:
2033         c->state = conn_closing;
2034         break;
2035     case ENGINE_ENOTSUP:
2036         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2037         break;
2038     case ENGINE_EWOULDBLOCK:
2039         c->ewouldblock = true;
2040         break;
2041     default:
2042         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2043     }
2044 }
2045 
bin_read_chunk(conn * c,enum bin_substates next_substate,uint32_t chunk)2046 static void bin_read_chunk(conn *c, enum bin_substates next_substate, uint32_t chunk) {
2047     assert(c);
2048     c->substate = next_substate;
2049     c->rlbytes = chunk;
2050 
2051     /* Ok... do we have room for everything in our buffer? */
2052     ptrdiff_t offset = c->rcurr + sizeof(protocol_binary_request_header) - c->rbuf;
2053     if (c->rlbytes > c->rsize - offset) {
2054         size_t nsize = c->rsize;
2055         size_t size = c->rlbytes + sizeof(protocol_binary_request_header);
2056 
2057         while (size > nsize) {
2058             nsize *= 2;
2059         }
2060 
2061         if (nsize != c->rsize) {
2062             if (settings.verbose > 1) {
2063                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2064                         "%d: Need to grow buffer from %lu to %lu\n",
2065                         c->sfd, (unsigned long)c->rsize, (unsigned long)nsize);
2066             }
2067             char *newm = realloc(c->rbuf, nsize);
2068             if (newm == NULL) {
2069                 if (settings.verbose) {
2070                     settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2071                             "%d: Failed to grow buffer.. closing connection\n",
2072                             c->sfd);
2073                 }
2074                 conn_set_state(c, conn_closing);
2075                 return;
2076             }
2077 
2078             c->rbuf= newm;
2079             /* rcurr should point to the same offset in the packet */
2080             c->rcurr = c->rbuf + offset - sizeof(protocol_binary_request_header);
2081             c->rsize = nsize;
2082         }
2083         if (c->rbuf != c->rcurr) {
2084             memmove(c->rbuf, c->rcurr, c->rbytes);
2085             c->rcurr = c->rbuf;
2086             if (settings.verbose > 1) {
2087                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2088                                                 "%d: Repack input buffer\n",
2089                                                 c->sfd);
2090             }
2091         }
2092     }
2093 
2094     /* preserve the header in the buffer.. */
2095     c->ritem = c->rcurr + sizeof(protocol_binary_request_header);
2096     conn_set_state(c, conn_nread);
2097 }
2098 
bin_read_key(conn * c,enum bin_substates next_substate,int extra)2099 static void bin_read_key(conn *c, enum bin_substates next_substate, int extra) {
2100     bin_read_chunk(c, next_substate, c->keylen + extra);
2101 }
2102 
2103 
2104 /* Just write an error message and disconnect the client */
handle_binary_protocol_error(conn * c)2105 static void handle_binary_protocol_error(conn *c) {
2106     write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
2107     if (settings.verbose) {
2108         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2109                 "%d: Protocol error (opcode %02x), close connection\n",
2110                 c->sfd, c->binary_header.request.opcode);
2111     }
2112     c->write_and_go = conn_closing;
2113 }
2114 
init_sasl_conn(conn * c)2115 static void init_sasl_conn(conn *c) {
2116     assert(c);
2117     if (!c->sasl_conn) {
2118         int result=sasl_server_new("memcached",
2119                                    NULL, NULL, NULL, NULL,
2120                                    NULL, 0, &c->sasl_conn);
2121         if (result != SASL_OK) {
2122             if (settings.verbose) {
2123                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2124                          "%d: Failed to initialize SASL conn.\n",
2125                          c->sfd);
2126             }
2127             c->sasl_conn = NULL;
2128         }
2129     }
2130 }
2131 
get_auth_data(const void * cookie,auth_data_t * data)2132 static void get_auth_data(const void *cookie, auth_data_t *data) {
2133     conn *c = (conn*)cookie;
2134     if (c->sasl_conn) {
2135         sasl_getprop(c->sasl_conn, SASL_USERNAME, (void*)&data->username);
2136 #ifdef ENABLE_ISASL
2137         sasl_getprop(c->sasl_conn, ISASL_CONFIG, (void*)&data->config);
2138 #endif
2139     }
2140 }
2141 
2142 #ifdef SASL_ENABLED
bin_list_sasl_mechs(conn * c)2143 static void bin_list_sasl_mechs(conn *c) {
2144     init_sasl_conn(c);
2145     const char *result_string = NULL;
2146     unsigned int string_length = 0;
2147     int result=sasl_listmech(c->sasl_conn, NULL,
2148                              "",   /* What to prepend the string with */
2149                              " ",  /* What to separate mechanisms with */
2150                              "",   /* What to append to the string */
2151                              &result_string, &string_length,
2152                              NULL);
2153     if (result != SASL_OK) {
2154         /* Perhaps there's a better error for this... */
2155         if (settings.verbose) {
2156             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2157                      "%d: Failed to list SASL mechanisms.\n",
2158                      c->sfd);
2159         }
2160         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2161         return;
2162     }
2163     write_bin_response(c, (char*)result_string, 0, 0, string_length);
2164 }
2165 #endif
2166 
2167 struct sasl_tmp {
2168     int ksize;
2169     int vsize;
2170     char data[]; /* data + ksize == value */
2171 };
2172 
process_bin_sasl_auth(conn * c)2173 static void process_bin_sasl_auth(conn *c) {
2174     assert(c->binary_header.request.extlen == 0);
2175 
2176     int nkey = c->binary_header.request.keylen;
2177     int vlen = c->binary_header.request.bodylen - nkey;
2178 
2179     if (nkey > MAX_SASL_MECH_LEN) {
2180         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, vlen);
2181         c->write_and_go = conn_swallow;
2182         return;
2183     }
2184 
2185     char *key = binary_get_key(c);
2186     assert(key);
2187 
2188     size_t buffer_size = sizeof(struct sasl_tmp) + nkey + vlen + 2;
2189     struct sasl_tmp *data = calloc(sizeof(struct sasl_tmp) + buffer_size, 1);
2190     if (!data) {
2191         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
2192         c->write_and_go = conn_swallow;
2193         return;
2194     }
2195 
2196     data->ksize = nkey;
2197     data->vsize = vlen;
2198     memcpy(data->data, key, nkey);
2199 
2200     c->item = data;
2201     c->ritem = data->data + nkey;
2202     c->rlbytes = vlen;
2203     conn_set_state(c, conn_nread);
2204     c->substate = bin_reading_sasl_auth_data;
2205 }
2206 
process_bin_complete_sasl_auth(conn * c)2207 static void process_bin_complete_sasl_auth(conn *c) {
2208     const char *out = NULL;
2209     unsigned int outlen = 0;
2210 
2211     assert(c->item);
2212     init_sasl_conn(c);
2213 
2214     int nkey = c->binary_header.request.keylen;
2215     int vlen = c->binary_header.request.bodylen - nkey;
2216 
2217     struct sasl_tmp *stmp = c->item;
2218     char mech[nkey+1];
2219     memcpy(mech, stmp->data, nkey);
2220     mech[nkey] = 0x00;
2221 
2222     if (settings.verbose) {
2223         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2224                 "%d: mech: ``%s'' with %d bytes of data\n", c->sfd, mech, vlen);
2225     }
2226 
2227     const char *challenge = vlen == 0 ? NULL : (stmp->data + nkey);
2228 
2229     int result=-1;
2230 
2231     switch (c->cmd) {
2232     case PROTOCOL_BINARY_CMD_SASL_AUTH:
2233         result = sasl_server_start(c->sasl_conn, mech,
2234                                    challenge, vlen,
2235                                    &out, &outlen);
2236         break;
2237     case PROTOCOL_BINARY_CMD_SASL_STEP:
2238         result = sasl_server_step(c->sasl_conn,
2239                                   challenge, vlen,
2240                                   &out, &outlen);
2241         break;
2242     default:
2243         assert(false); /* CMD should be one of the above */
2244         /* This code is pretty much impossible, but makes the compiler
2245            happier */
2246         if (settings.verbose) {
2247             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2248                     "%d: Unhandled command %d with challenge %s\n",
2249                     c->sfd, c->cmd, challenge);
2250         }
2251         break;
2252     }
2253 
2254     free(c->item);
2255     c->item = NULL;
2256     c->ritem = NULL;
2257 
2258     if (settings.verbose) {
2259         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2260                                         "%d: sasl result code:  %d\n",
2261                                         c->sfd, result);
2262     }
2263 
2264     switch(result) {
2265     case SASL_OK:
2266         write_bin_response(c, "Authenticated", 0, 0, strlen("Authenticated"));
2267         auth_data_t data;
2268         get_auth_data(c, &data);
2269         perform_callbacks(ON_AUTH, (const void*)&data, c);
2270         STATS_NOKEY(c, auth_cmds);
2271         break;
2272     case SASL_CONTINUE:
2273         add_bin_header(c, PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE, 0, 0, outlen);
2274         if(outlen > 0) {
2275             add_iov(c, out, outlen);
2276         }
2277         conn_set_state(c, conn_mwrite);
2278         c->write_and_go = conn_new_cmd;
2279         break;
2280     default:
2281         if (settings.verbose) {
2282             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2283                                             "%d: Unknown sasl response:  %d\n",
2284                                             c->sfd, result);
2285         }
2286         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2287         STATS_NOKEY2(c, auth_cmds, auth_errors);
2288     }
2289 }
2290 
authenticated(conn * c)2291 static bool authenticated(conn *c) {
2292     bool rv = false;
2293 
2294     switch (c->cmd) {
2295     case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS: /* FALLTHROUGH */
2296     case PROTOCOL_BINARY_CMD_SASL_AUTH:       /* FALLTHROUGH */
2297     case PROTOCOL_BINARY_CMD_SASL_STEP:       /* FALLTHROUGH */
2298     case PROTOCOL_BINARY_CMD_VERSION:         /* FALLTHROUGH */
2299         rv = true;
2300         break;
2301     default:
2302         if (c->sasl_conn) {
2303             const void *uname = NULL;
2304             sasl_getprop(c->sasl_conn, SASL_USERNAME, &uname);
2305             rv = uname != NULL;
2306         }
2307     }
2308 
2309     if (settings.verbose > 1) {
2310         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2311                 "%d: authenticated() in cmd 0x%02x is %s\n",
2312                 c->sfd, c->cmd, rv ? "true" : "false");
2313     }
2314 
2315     return rv;
2316 }
2317 
binary_response_handler(const void * key,uint16_t keylen,const void * ext,uint8_t extlen,const void * body,uint32_t bodylen,uint8_t datatype,uint16_t status,uint64_t cas,const void * cookie)2318 static bool binary_response_handler(const void *key, uint16_t keylen,
2319                                     const void *ext, uint8_t extlen,
2320                                     const void *body, uint32_t bodylen,
2321                                     uint8_t datatype, uint16_t status,
2322                                     uint64_t cas, const void *cookie)
2323 {
2324     conn *c = (conn*)cookie;
2325     /* Look at append_bin_stats */
2326     size_t needed = keylen + extlen + bodylen + sizeof(protocol_binary_response_header);
2327     if (!grow_dynamic_buffer(c, needed)) {
2328         if (settings.verbose > 0) {
2329             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
2330                     "<%d ERROR: Failed to allocate memory for response\n",
2331                     c->sfd);
2332         }
2333         return false;
2334     }
2335 
2336     char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
2337     protocol_binary_response_header header = {
2338         .response.magic = (uint8_t)PROTOCOL_BINARY_RES,
2339         .response.opcode = c->binary_header.request.opcode,
2340         .response.keylen = (uint16_t)htons(keylen),
2341         .response.extlen = extlen,
2342         .response.datatype = datatype,
2343         .response.status = (uint16_t)htons(status),
2344         .response.bodylen = htonl(bodylen + keylen + extlen),
2345         .response.opaque = c->opaque,
2346         .response.cas = htonll(cas),
2347     };
2348 
2349     memcpy(buf, header.bytes, sizeof(header.response));
2350     buf += sizeof(header.response);
2351 
2352     if (extlen > 0) {
2353         memcpy(buf, ext, extlen);
2354         buf += extlen;
2355     }
2356 
2357     if (keylen > 0) {
2358         memcpy(buf, key, keylen);
2359         buf += keylen;
2360     }
2361 
2362     if (bodylen > 0) {
2363         memcpy(buf, body, bodylen);
2364     }
2365 
2366     c->dynamic_buffer.offset += needed;
2367 
2368     return true;
2369 }
2370 
2371 /**
2372  * Tap stats (these are only used by the tap thread, so they don't need
2373  * to be in the threadlocal struct right now...
2374  */
2375 struct tap_cmd_stats {
2376     uint64_t connect;
2377     uint64_t mutation;
2378     uint64_t checkpoint_start;
2379     uint64_t checkpoint_end;
2380     uint64_t delete;
2381     uint64_t flush;
2382     uint64_t opaque;
2383     uint64_t vbucket_set;
2384 };
2385 
2386 struct tap_stats {
2387     pthread_mutex_t mutex;
2388     struct tap_cmd_stats sent;
2389     struct tap_cmd_stats received;
2390 } tap_stats = { .mutex = PTHREAD_MUTEX_INITIALIZER };
2391 
ship_tap_log(conn * c)2392 static void ship_tap_log(conn *c) {
2393     assert(c->thread->type == TAP);
2394     c->msgcurr = 0;
2395     c->msgused = 0;
2396     c->iovused = 0;
2397     if (add_msghdr(c) != 0) {
2398         if (settings.verbose) {
2399             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2400                                             "%d: Failed to create output headers. Shutting down tap connection\n", c->sfd);
2401         }
2402         conn_set_state(c, conn_closing);
2403         return ;
2404     }
2405     /* @todo add check for buffer overflow of c->wbuf) */
2406     c->wcurr = c->wbuf;
2407 
2408     bool more_data = true;
2409     bool send_data = false;
2410     bool disconnect = false;
2411 
2412     item *it;
2413     uint32_t bodylen;
2414     int ii = 0;
2415     c->icurr = c->ilist;
2416     do {
2417         /* @todo fixme! */
2418         if (ii++ == 10) {
2419             break;
2420         }
2421 
2422         void *engine;
2423         uint16_t nengine;
2424         uint8_t ttl;
2425         uint16_t tap_flags;
2426         uint32_t seqno;
2427         uint16_t vbucket;
2428 
2429         tap_event_t event = c->tap_iterator(settings.engine.v0, c, &it,
2430                                             &engine, &nengine, &ttl,
2431                                             &tap_flags, &seqno, &vbucket);
2432         union {
2433             protocol_binary_request_tap_mutation mutation;
2434             protocol_binary_request_tap_delete delete;
2435             protocol_binary_request_tap_flush flush;
2436             protocol_binary_request_tap_opaque opaque;
2437             protocol_binary_request_noop noop;
2438         } msg = {
2439             .mutation.message.header.request.magic = (uint8_t)PROTOCOL_BINARY_REQ,
2440         };
2441 
2442         msg.opaque.message.header.request.opaque = htonl(seqno);
2443         msg.opaque.message.body.tap.enginespecific_length = htons(nengine);
2444         msg.opaque.message.body.tap.ttl = ttl;
2445         msg.opaque.message.body.tap.flags = htons(tap_flags);
2446         msg.opaque.message.header.request.extlen = 8;
2447         msg.opaque.message.header.request.vbucket = htons(vbucket);
2448         item_info info = { .nvalue = 1 };
2449 
2450         switch (event) {
2451         case TAP_NOOP :
2452             send_data = true;
2453             msg.noop.message.header.request.opcode = PROTOCOL_BINARY_CMD_NOOP;
2454             msg.noop.message.header.request.extlen = 0;
2455             msg.noop.message.header.request.bodylen = htonl(0);
2456             memcpy(c->wcurr, msg.noop.bytes, sizeof(msg.noop.bytes));
2457             add_iov(c, c->wcurr, sizeof(msg.noop.bytes));
2458             c->wcurr += sizeof(msg.noop.bytes);
2459             c->wbytes += sizeof(msg.noop.bytes);
2460             break;
2461         case TAP_PAUSE :
2462             more_data = false;
2463             break;
2464         case TAP_CHECKPOINT_START:
2465         case TAP_CHECKPOINT_END:
2466         case TAP_MUTATION:
2467             if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2468                 settings.engine.v1->release(settings.engine.v0, c, it);
2469                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2470                                                 "%d: Failed to get item info\n", c->sfd);
2471                 break;
2472             }
2473             send_data = true;
2474             c->ilist[c->ileft++] = it;
2475 
2476             if (event == TAP_CHECKPOINT_START) {
2477                 msg.mutation.message.header.request.opcode =
2478                     PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START;
2479                 pthread_mutex_lock(&tap_stats.mutex);
2480                 tap_stats.sent.checkpoint_start++;
2481                 pthread_mutex_unlock(&tap_stats.mutex);
2482             } else if (event == TAP_CHECKPOINT_END) {
2483                 msg.mutation.message.header.request.opcode =
2484                     PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END;
2485                 pthread_mutex_lock(&tap_stats.mutex);
2486                 tap_stats.sent.checkpoint_end++;
2487                 pthread_mutex_unlock(&tap_stats.mutex);
2488             } else if (event == TAP_MUTATION) {
2489                 msg.mutation.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_MUTATION;
2490                 pthread_mutex_lock(&tap_stats.mutex);
2491                 tap_stats.sent.mutation++;
2492                 pthread_mutex_unlock(&tap_stats.mutex);
2493             }
2494 
2495             msg.mutation.message.header.request.cas = htonll(info.cas);
2496             msg.mutation.message.header.request.keylen = htons(info.nkey);
2497             msg.mutation.message.header.request.extlen = 16;
2498 
2499             bodylen = 16 + info.nkey + nengine;
2500             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2501                 bodylen += info.nbytes;
2502             }
2503             msg.mutation.message.header.request.bodylen = htonl(bodylen);
2504             msg.mutation.message.body.item.flags = htonl(info.flags);
2505             msg.mutation.message.body.item.expiration = htonl(info.exptime);
2506             msg.mutation.message.body.tap.enginespecific_length = htons(nengine);
2507             msg.mutation.message.body.tap.ttl = ttl;
2508             msg.mutation.message.body.tap.flags = htons(tap_flags);
2509             memcpy(c->wcurr, msg.mutation.bytes, sizeof(msg.mutation.bytes));
2510 
2511             add_iov(c, c->wcurr, sizeof(msg.mutation.bytes));
2512             c->wcurr += sizeof(msg.mutation.bytes);
2513             c->wbytes += sizeof(msg.mutation.bytes);
2514 
2515             if (nengine > 0) {
2516                 memcpy(c->wcurr, engine, nengine);
2517                 add_iov(c, c->wcurr, nengine);
2518                 c->wcurr += nengine;
2519                 c->wbytes += nengine;
2520             }
2521 
2522             add_iov(c, info.key, info.nkey);
2523             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2524                 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2525             }
2526 
2527             break;
2528         case TAP_DELETION:
2529             /* This is a delete */
2530             if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
2531                 settings.engine.v1->release(settings.engine.v0, c, it);
2532                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2533                                                 "%d: Failed to get item info\n", c->sfd);
2534                 break;
2535             }
2536             send_data = true;
2537             c->ilist[c->ileft++] = it;
2538             msg.delete.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_DELETE;
2539             msg.delete.message.header.request.cas = htonll(info.cas);
2540             msg.delete.message.header.request.keylen = htons(info.nkey);
2541 
2542             bodylen = 8 + info.nkey + nengine;
2543             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2544                 bodylen += info.nbytes;
2545             }
2546             msg.delete.message.header.request.bodylen = htonl(bodylen);
2547 
2548             memcpy(c->wcurr, msg.delete.bytes, sizeof(msg.delete.bytes));
2549             add_iov(c, c->wcurr, sizeof(msg.delete.bytes));
2550             c->wcurr += sizeof(msg.delete.bytes);
2551             c->wbytes += sizeof(msg.delete.bytes);
2552 
2553             if (nengine > 0) {
2554                 memcpy(c->wcurr, engine, nengine);
2555                 add_iov(c, c->wcurr, nengine);
2556                 c->wcurr += nengine;
2557                 c->wbytes += nengine;
2558             }
2559 
2560             add_iov(c, info.key, info.nkey);
2561             if ((tap_flags & TAP_FLAG_NO_VALUE) == 0) {
2562                 add_iov(c, info.value[0].iov_base, info.value[0].iov_len);
2563             }
2564 
2565             pthread_mutex_lock(&tap_stats.mutex);
2566             tap_stats.sent.delete++;
2567             pthread_mutex_unlock(&tap_stats.mutex);
2568             break;
2569 
2570         case TAP_DISCONNECT:
2571             disconnect = true;
2572             more_data = false;
2573             break;
2574         case TAP_VBUCKET_SET:
2575         case TAP_FLUSH:
2576         case TAP_OPAQUE:
2577             send_data = true;
2578 
2579             if (event == TAP_OPAQUE) {
2580                 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_OPAQUE;
2581                 pthread_mutex_lock(&tap_stats.mutex);
2582                 tap_stats.sent.opaque++;
2583                 pthread_mutex_unlock(&tap_stats.mutex);
2584 
2585             } else if (event == TAP_FLUSH) {
2586                 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_FLUSH;
2587                 pthread_mutex_lock(&tap_stats.mutex);
2588                 tap_stats.sent.flush++;
2589                 pthread_mutex_unlock(&tap_stats.mutex);
2590             } else if (event == TAP_VBUCKET_SET) {
2591                 msg.flush.message.header.request.opcode = PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET;
2592                 msg.flush.message.body.tap.flags = htons(tap_flags);
2593                 pthread_mutex_lock(&tap_stats.mutex);
2594                 tap_stats.sent.vbucket_set++;
2595                 pthread_mutex_unlock(&tap_stats.mutex);
2596             }
2597 
2598             msg.flush.message.header.request.bodylen = htonl(8 + nengine);
2599             memcpy(c->wcurr, msg.flush.bytes, sizeof(msg.flush.bytes));
2600             add_iov(c, c->wcurr, sizeof(msg.flush.bytes));
2601             c->wcurr += sizeof(msg.flush.bytes);
2602             c->wbytes += sizeof(msg.flush.bytes);
2603             if (nengine > 0) {
2604                 memcpy(c->wcurr, engine, nengine);
2605                 add_iov(c, c->wcurr, nengine);
2606                 c->wcurr += nengine;
2607                 c->wbytes += nengine;
2608             }
2609             break;
2610         default:
2611             abort();
2612         }
2613     } while (more_data);
2614 
2615     c->ewouldblock = false;
2616     if (send_data) {
2617         conn_set_state(c, conn_mwrite);
2618         if (disconnect) {
2619             c->write_and_go = conn_closing;
2620         } else {
2621             c->write_and_go = conn_ship_log;
2622         }
2623     } else {
2624         if (disconnect) {
2625             conn_set_state(c, conn_closing);
2626         } else {
2627             /* No more items to ship to the slave at this time.. suspend.. */
2628             if (settings.verbose > 1) {
2629                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2630                                                 "%d: No more items in tap log.. waiting\n",
2631                                                 c->sfd);
2632             }
2633             c->ewouldblock = true;
2634         }
2635     }
2636 }
2637 
process_bin_unknown_packet(conn * c)2638 static void process_bin_unknown_packet(conn *c) {
2639     void *packet = c->rcurr - (c->binary_header.request.bodylen +
2640                                sizeof(c->binary_header));
2641 
2642     ENGINE_ERROR_CODE ret = c->aiostat;
2643     c->aiostat = ENGINE_SUCCESS;
2644     c->ewouldblock = false;
2645 
2646     if (ret == ENGINE_SUCCESS) {
2647         ret = settings.engine.v1->unknown_command(settings.engine.v0, c, packet,
2648                                                   binary_response_handler);
2649     }
2650 
2651     if (ret == ENGINE_SUCCESS) {
2652         if (c->dynamic_buffer.buffer != NULL) {
2653             write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
2654             c->dynamic_buffer.buffer = NULL;
2655         } else {
2656             conn_set_state(c, conn_new_cmd);
2657         }
2658     } else if (ret == ENGINE_ENOTSUP) {
2659         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND, 0);
2660     } else if (ret == ENGINE_EWOULDBLOCK) {
2661         c->ewouldblock = true;
2662     } else {
2663         /* FATAL ERROR, shut down connection */
2664         conn_set_state(c, conn_closing);
2665     }
2666 }
2667 
process_bin_tap_connect(conn * c)2668 static void process_bin_tap_connect(conn *c) {
2669     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2670                                 sizeof(c->binary_header)));
2671     protocol_binary_request_tap_connect *req = (void*)packet;
2672     const char *key = packet + sizeof(req->bytes);
2673     const char *data = key + c->binary_header.request.keylen;
2674     uint32_t flags = 0;
2675     size_t ndata = c->binary_header.request.bodylen -
2676         c->binary_header.request.extlen -
2677         c->binary_header.request.keylen;
2678 
2679     if (c->binary_header.request.extlen == 4) {
2680         flags = ntohl(req->message.body.flags);
2681 
2682         if (flags & TAP_CONNECT_FLAG_BACKFILL) {
2683             /* the userdata has to be at least 8 bytes! */
2684             if (ndata < 8) {
2685                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2686                                                 "%d: ERROR: Invalid tap connect message\n",
2687                                                 c->sfd);
2688                 conn_set_state(c, conn_closing);
2689                 return ;
2690             }
2691         }
2692     } else {
2693         data -= 4;
2694         key -= 4;
2695     }
2696 
2697     if (settings.verbose && c->binary_header.request.keylen > 0) {
2698         char buffer[1024];
2699         int len = c->binary_header.request.keylen;
2700         if (len >= sizeof(buffer)) {
2701             len = sizeof(buffer) - 1;
2702         }
2703         memcpy(buffer, key, len);
2704         buffer[len] = '\0';
2705         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
2706                                         "%d: Trying to connect with named tap connection: <%s>\n",
2707                                         c->sfd, buffer);
2708     }
2709 
2710     TAP_ITERATOR iterator = settings.engine.v1->get_tap_iterator(
2711         settings.engine.v0, c, key, c->binary_header.request.keylen,
2712         flags, data, ndata);
2713 
2714     if (iterator == NULL) {
2715         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
2716                                         "%d: FATAL: The engine does not support tap\n",
2717                                         c->sfd);
2718         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
2719         c->write_and_go = conn_closing;
2720     } else {
2721         c->tap_iterator = iterator;
2722         c->which = EV_WRITE;
2723         conn_set_state(c, conn_ship_log);
2724     }
2725 }
2726 
process_bin_tap_packet(tap_event_t event,conn * c)2727 static void process_bin_tap_packet(tap_event_t event, conn *c) {
2728     assert(c != NULL);
2729     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2730                                 sizeof(c->binary_header)));
2731     protocol_binary_request_tap_no_extras *tap = (void*)packet;
2732     uint16_t nengine = ntohs(tap->message.body.tap.enginespecific_length);
2733     uint16_t tap_flags = ntohs(tap->message.body.tap.flags);
2734     uint32_t seqno = ntohl(tap->message.header.request.opaque);
2735     uint8_t ttl = tap->message.body.tap.ttl;
2736     assert(ttl > 0);
2737     char *engine_specific = packet + sizeof(tap->bytes);
2738     char *key = engine_specific + nengine;
2739     uint16_t nkey = c->binary_header.request.keylen;
2740     char *data = key + nkey;
2741     uint32_t flags = 0;
2742     uint32_t exptime = 0;
2743     uint32_t ndata = c->binary_header.request.bodylen - nengine - nkey - 8;
2744 
2745     if (event == TAP_MUTATION || event == TAP_CHECKPOINT_START ||
2746         event == TAP_CHECKPOINT_END) {
2747         protocol_binary_request_tap_mutation *mutation = (void*)tap;
2748         flags = ntohl(mutation->message.body.item.flags);
2749         exptime = ntohl(mutation->message.body.item.expiration);
2750         key += 8;
2751         data += 8;
2752         ndata -= 8;
2753     }
2754 
2755     ENGINE_ERROR_CODE ret = c->aiostat;
2756     if (ret == ENGINE_SUCCESS) {
2757         ret = settings.engine.v1->tap_notify(settings.engine.v0, c,
2758                                              engine_specific, nengine,
2759                                              ttl - 1, tap_flags,
2760                                              event, seqno,
2761                                              key, nkey,
2762                                              flags, exptime,
2763                                              ntohll(tap->message.header.request.cas),
2764                                              data, ndata,
2765                                              c->binary_header.request.vbucket);
2766     }
2767 
2768     switch (ret) {
2769     case ENGINE_DISCONNECT:
2770         conn_set_state(c, conn_closing);
2771         break;
2772     case ENGINE_EWOULDBLOCK:
2773         c->ewouldblock = true;
2774         break;
2775     default:
2776         if ((tap_flags & TAP_FLAG_ACK) ||
2777             (ret != ENGINE_SUCCESS && c->tap_nack_mode))
2778         {
2779             write_bin_packet(c, engine_error_2_protocol_error(ret), 0);
2780         } else {
2781             conn_set_state(c, conn_new_cmd);
2782         }
2783     }
2784 }
2785 
process_bin_tap_ack(conn * c)2786 static void process_bin_tap_ack(conn *c) {
2787     assert(c != NULL);
2788     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2789                                 sizeof(c->binary_header)));
2790     protocol_binary_response_no_extras *rsp = (void*)packet;
2791     uint32_t seqno = ntohl(rsp->message.header.response.opaque);
2792     uint16_t status = ntohs(rsp->message.header.response.status);
2793     char *key = packet + sizeof(rsp->bytes);
2794 
2795     ENGINE_ERROR_CODE ret = ENGINE_DISCONNECT;
2796     if (settings.engine.v1->tap_notify != NULL) {
2797         ret = settings.engine.v1->tap_notify(settings.engine.v0, c, NULL, 0, 0, status,
2798                                              TAP_ACK, seqno, key,
2799                                              c->binary_header.request.keylen, 0, 0,
2800                                              0, NULL, 0, 0);
2801     }
2802 
2803     if (ret == ENGINE_DISCONNECT) {
2804         conn_set_state(c, conn_closing);
2805     } else {
2806         conn_set_state(c, conn_ship_log);
2807     }
2808 }
2809 
2810 /**
2811  * We received a noop response.. just ignore it
2812  */
process_bin_noop_response(conn * c)2813 static void process_bin_noop_response(conn *c) {
2814     assert(c != NULL);
2815     conn_set_state(c, conn_new_cmd);
2816 }
2817 
process_bin_verbosity(conn * c)2818 static void process_bin_verbosity(conn *c) {
2819     char *packet = (c->rcurr - (c->binary_header.request.bodylen +
2820                                 sizeof(c->binary_header)));
2821     protocol_binary_request_verbosity *req = (void*)packet;
2822     uint32_t level = (uint32_t)ntohl(req->message.body.level);
2823     if (level > MAX_VERBOSITY_LEVEL) {
2824         level = MAX_VERBOSITY_LEVEL;
2825     }
2826     settings.verbose = (int)level;
2827     perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
2828     write_bin_response(c, NULL, 0, 0, 0);
2829 }
2830 
process_bin_packet(conn * c)2831 static void process_bin_packet(conn *c) {
2832     /* @todo this should be an array of funciton pointers and call through */
2833     switch (c->binary_header.request.opcode) {
2834     case PROTOCOL_BINARY_CMD_TAP_CONNECT:
2835         pthread_mutex_lock(&tap_stats.mutex);
2836         tap_stats.received.connect++;
2837         pthread_mutex_unlock(&tap_stats.mutex);
2838         conn_set_state(c, conn_add_tap_client);
2839         break;
2840     case PROTOCOL_BINARY_CMD_TAP_MUTATION:
2841         pthread_mutex_lock(&tap_stats.mutex);
2842         tap_stats.received.mutation++;
2843         pthread_mutex_unlock(&tap_stats.mutex);
2844         process_bin_tap_packet(TAP_MUTATION, c);
2845         break;
2846     case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
2847         pthread_mutex_lock(&tap_stats.mutex);
2848         tap_stats.received.checkpoint_start++;
2849         pthread_mutex_unlock(&tap_stats.mutex);
2850         process_bin_tap_packet(TAP_CHECKPOINT_START, c);
2851         break;
2852     case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
2853         pthread_mutex_lock(&tap_stats.mutex);
2854         tap_stats.received.checkpoint_end++;
2855         pthread_mutex_unlock(&tap_stats.mutex);
2856         process_bin_tap_packet(TAP_CHECKPOINT_END, c);
2857         break;
2858     case PROTOCOL_BINARY_CMD_TAP_DELETE:
2859         pthread_mutex_lock(&tap_stats.mutex);
2860         tap_stats.received.delete++;
2861         pthread_mutex_unlock(&tap_stats.mutex);
2862         process_bin_tap_packet(TAP_DELETION, c);
2863         break;
2864     case PROTOCOL_BINARY_CMD_TAP_FLUSH:
2865         pthread_mutex_lock(&tap_stats.mutex);
2866         tap_stats.received.flush++;
2867         pthread_mutex_unlock(&tap_stats.mutex);
2868         process_bin_tap_packet(TAP_FLUSH, c);
2869         break;
2870     case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
2871         pthread_mutex_lock(&tap_stats.mutex);
2872         tap_stats.received.opaque++;
2873         pthread_mutex_unlock(&tap_stats.mutex);
2874         process_bin_tap_packet(TAP_OPAQUE, c);
2875         break;
2876     case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
2877         pthread_mutex_lock(&tap_stats.mutex);
2878         tap_stats.received.vbucket_set++;
2879         pthread_mutex_unlock(&tap_stats.mutex);
2880         process_bin_tap_packet(TAP_VBUCKET_SET, c);
2881         break;
2882     case PROTOCOL_BINARY_CMD_VERBOSITY:
2883         process_bin_verbosity(c);
2884         break;
2885     default:
2886         process_bin_unknown_packet(c);
2887     }
2888 }
2889 
2890 
2891 
2892 typedef void (*RESPONSE_HANDLER)(conn*);
2893 /**
2894  * A map between the response packets op-code and the function to handle
2895  * the response message.
2896  */
2897 static RESPONSE_HANDLER response_handlers[256] = {
2898     [PROTOCOL_BINARY_CMD_NOOP] = process_bin_noop_response,
2899     [PROTOCOL_BINARY_CMD_TAP_MUTATION] = process_bin_tap_ack,
2900     [PROTOCOL_BINARY_CMD_TAP_DELETE] = process_bin_tap_ack,
2901     [PROTOCOL_BINARY_CMD_TAP_FLUSH] = process_bin_tap_ack,
2902     [PROTOCOL_BINARY_CMD_TAP_OPAQUE] = process_bin_tap_ack,
2903     [PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET] = process_bin_tap_ack,
2904     [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START] = process_bin_tap_ack,
2905     [PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END] = process_bin_tap_ack
2906 };
2907 
dispatch_bin_command(conn * c)2908 static void dispatch_bin_command(conn *c) {
2909     int protocol_error = 0;
2910 
2911     int extlen = c->binary_header.request.extlen;
2912     uint16_t keylen = c->binary_header.request.keylen;
2913     uint32_t bodylen = c->binary_header.request.bodylen;
2914 
2915     if (settings.require_sasl && !authenticated(c)) {
2916         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_AUTH_ERROR, 0);
2917         c->write_and_go = conn_closing;
2918         return;
2919     }
2920 
2921     MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
2922     c->noreply = true;
2923 
2924     /* binprot supports 16bit keys, but internals are still 8bit */
2925     if (keylen > KEY_MAX_LENGTH) {
2926         handle_binary_protocol_error(c);
2927         return;
2928     }
2929 
2930     switch (c->cmd) {
2931     case PROTOCOL_BINARY_CMD_SETQ:
2932         c->cmd = PROTOCOL_BINARY_CMD_SET;
2933         break;
2934     case PROTOCOL_BINARY_CMD_ADDQ:
2935         c->cmd = PROTOCOL_BINARY_CMD_ADD;
2936         break;
2937     case PROTOCOL_BINARY_CMD_REPLACEQ:
2938         c->cmd = PROTOCOL_BINARY_CMD_REPLACE;
2939         break;
2940     case PROTOCOL_BINARY_CMD_DELETEQ:
2941         c->cmd = PROTOCOL_BINARY_CMD_DELETE;
2942         break;
2943     case PROTOCOL_BINARY_CMD_INCREMENTQ:
2944         c->cmd = PROTOCOL_BINARY_CMD_INCREMENT;
2945         break;
2946     case PROTOCOL_BINARY_CMD_DECREMENTQ:
2947         c->cmd = PROTOCOL_BINARY_CMD_DECREMENT;
2948         break;
2949     case PROTOCOL_BINARY_CMD_QUITQ:
2950         c->cmd = PROTOCOL_BINARY_CMD_QUIT;
2951         break;
2952     case PROTOCOL_BINARY_CMD_FLUSHQ:
2953         c->cmd = PROTOCOL_BINARY_CMD_FLUSH;
2954         break;
2955     case PROTOCOL_BINARY_CMD_APPENDQ:
2956         c->cmd = PROTOCOL_BINARY_CMD_APPEND;
2957         break;
2958     case PROTOCOL_BINARY_CMD_PREPENDQ:
2959         c->cmd = PROTOCOL_BINARY_CMD_PREPEND;
2960         break;
2961     case PROTOCOL_BINARY_CMD_GETQ:
2962         c->cmd = PROTOCOL_BINARY_CMD_GET;
2963         break;
2964     case PROTOCOL_BINARY_CMD_GETKQ:
2965         c->cmd = PROTOCOL_BINARY_CMD_GETK;
2966         break;
2967     default:
2968         c->noreply = false;
2969     }
2970 
2971     switch (c->cmd) {
2972         case PROTOCOL_BINARY_CMD_VERSION:
2973             if (extlen == 0 && keylen == 0 && bodylen == 0) {
2974                 write_bin_response(c, VERSION, 0, 0, strlen(VERSION));
2975             } else {
2976                 protocol_error = 1;
2977             }
2978             break;
2979         case PROTOCOL_BINARY_CMD_FLUSH:
2980             if (keylen == 0 && bodylen == extlen && (extlen == 0 || extlen == 4)) {
2981                 bin_read_key(c, bin_read_flush_exptime, extlen);
2982             } else {
2983                 protocol_error = 1;
2984             }
2985             break;
2986         case PROTOCOL_BINARY_CMD_NOOP:
2987             if (extlen == 0 && keylen == 0 && bodylen == 0) {
2988                 write_bin_response(c, NULL, 0, 0, 0);
2989             } else {
2990                 protocol_error = 1;
2991             }
2992             break;
2993         case PROTOCOL_BINARY_CMD_SET: /* FALLTHROUGH */
2994         case PROTOCOL_BINARY_CMD_ADD: /* FALLTHROUGH */
2995         case PROTOCOL_BINARY_CMD_REPLACE:
2996             if (extlen == 8 && keylen != 0 && bodylen >= (keylen + 8)) {
2997                 bin_read_key(c, bin_reading_set_header, 8);
2998             } else {
2999                 protocol_error = 1;
3000             }
3001             break;
3002         case PROTOCOL_BINARY_CMD_GETQ:  /* FALLTHROUGH */
3003         case PROTOCOL_BINARY_CMD_GET:   /* FALLTHROUGH */
3004         case PROTOCOL_BINARY_CMD_GETKQ: /* FALLTHROUGH */
3005         case PROTOCOL_BINARY_CMD_GETK:
3006             if (extlen == 0 && bodylen == keylen && keylen > 0) {
3007                 bin_read_key(c, bin_reading_get_key, 0);
3008             } else {
3009                 protocol_error = 1;
3010             }
3011             break;
3012         case PROTOCOL_BINARY_CMD_DELETE:
3013             if (keylen > 0 && extlen == 0 && bodylen == keylen) {
3014                 bin_read_key(c, bin_reading_del_header, extlen);
3015             } else {
3016                 protocol_error = 1;
3017             }
3018             break;
3019         case PROTOCOL_BINARY_CMD_INCREMENT:
3020         case PROTOCOL_BINARY_CMD_DECREMENT:
3021             if (keylen > 0 && extlen == 20 && bodylen == (keylen + extlen)) {
3022                 bin_read_key(c, bin_reading_incr_header, 20);
3023             } else {
3024                 protocol_error = 1;
3025             }
3026             break;
3027         case PROTOCOL_BINARY_CMD_APPEND:
3028         case PROTOCOL_BINARY_CMD_PREPEND:
3029             if (keylen > 0 && extlen == 0) {
3030                 bin_read_key(c, bin_reading_set_header, 0);
3031             } else {
3032                 protocol_error = 1;
3033             }
3034             break;
3035         case PROTOCOL_BINARY_CMD_STAT:
3036             if (extlen == 0) {
3037                 bin_read_key(c, bin_reading_stat, 0);
3038             } else {
3039                 protocol_error = 1;
3040             }
3041             break;
3042         case PROTOCOL_BINARY_CMD_QUIT:
3043             if (keylen == 0 && extlen == 0 && bodylen == 0) {
3044                 write_bin_response(c, NULL, 0, 0, 0);
3045                 c->write_and_go = conn_closing;
3046                 if (c->noreply) {
3047                     conn_set_state(c, conn_closing);
3048                 }
3049             } else {
3050                 protocol_error = 1;
3051             }
3052             break;
3053        case PROTOCOL_BINARY_CMD_TAP_CONNECT:
3054             if (settings.engine.v1->get_tap_iterator == NULL) {
3055                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3056             } else {
3057                 bin_read_chunk(c, bin_reading_packet,
3058                                c->binary_header.request.bodylen);
3059             }
3060             break;
3061        case PROTOCOL_BINARY_CMD_TAP_MUTATION:
3062        case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_START:
3063        case PROTOCOL_BINARY_CMD_TAP_CHECKPOINT_END:
3064        case PROTOCOL_BINARY_CMD_TAP_DELETE:
3065        case PROTOCOL_BINARY_CMD_TAP_FLUSH:
3066        case PROTOCOL_BINARY_CMD_TAP_OPAQUE:
3067        case PROTOCOL_BINARY_CMD_TAP_VBUCKET_SET:
3068             if (settings.engine.v1->tap_notify == NULL) {
3069                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, bodylen);
3070             } else {
3071                 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3072             }
3073             break;
3074 #ifdef SASL_ENABLED
3075         case PROTOCOL_BINARY_CMD_SASL_LIST_MECHS:
3076             if (extlen == 0 && keylen == 0 && bodylen == 0) {
3077                 bin_list_sasl_mechs(c);
3078             } else {
3079                 protocol_error = 1;
3080             }
3081             break;
3082         case PROTOCOL_BINARY_CMD_SASL_AUTH:
3083         case PROTOCOL_BINARY_CMD_SASL_STEP:
3084             if (extlen == 0 && keylen != 0) {
3085                 bin_read_key(c, bin_reading_sasl_auth, 0);
3086             } else {
3087                 protocol_error = 1;
3088             }
3089             break;
3090 #endif
3091         case PROTOCOL_BINARY_CMD_VERBOSITY:
3092             if (extlen == 4 && keylen == 0 && bodylen == 4) {
3093                 bin_read_chunk(c, bin_reading_packet,
3094                                c->binary_header.request.bodylen);
3095             } else {
3096                 protocol_error = 1;
3097             }
3098             break;
3099         default:
3100             if (settings.engine.v1->unknown_command == NULL) {
3101                 write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_UNKNOWN_COMMAND,
3102                                 bodylen);
3103             } else {
3104                 bin_read_chunk(c, bin_reading_packet, c->binary_header.request.bodylen);
3105             }
3106     }
3107 
3108     if (protocol_error)
3109         handle_binary_protocol_error(c);
3110 }
3111 
process_bin_update(conn * c)3112 static void process_bin_update(conn *c) {
3113     char *key;
3114     uint16_t nkey;
3115     uint32_t vlen;
3116     item *it = NULL;
3117     protocol_binary_request_set* req = binary_get_request(c);
3118 
3119     assert(c != NULL);
3120 
3121     key = binary_get_key(c);
3122     nkey = c->binary_header.request.keylen;
3123 
3124     /* fix byteorder in the request */
3125     req->message.body.flags = req->message.body.flags;
3126     rel_time_t expiration = ntohl(req->message.body.expiration);
3127 
3128     vlen = c->binary_header.request.bodylen - (nkey + c->binary_header.request.extlen);
3129 
3130     if (settings.verbose > 1) {
3131         char buffer[1024];
3132         const char *prefix;
3133         if (c->cmd == PROTOCOL_BINARY_CMD_ADD) {
3134             prefix = "ADD";
3135         } else if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3136             prefix = "SET";
3137         } else {
3138             prefix = "REPLACE";
3139         }
3140 
3141         size_t nw;
3142         nw = key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3143                                      prefix, key, nkey);
3144 
3145         if (nw != -1) {
3146             if (snprintf(buffer + nw, sizeof(buffer) - nw,
3147                          " Value len is %d\n", vlen)) {
3148                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s",
3149                                                 buffer);
3150             }
3151         }
3152     }
3153 
3154     if (settings.detail_enabled) {
3155         stats_prefix_record_set(key, nkey);
3156     }
3157 
3158     ENGINE_ERROR_CODE ret = c->aiostat;
3159     c->aiostat = ENGINE_SUCCESS;
3160     c->ewouldblock = false;
3161     item_info info = { .nvalue = 1 };
3162 
3163     if (ret == ENGINE_SUCCESS) {
3164         ret = settings.engine.v1->allocate(settings.engine.v0, c,
3165                                            &it, key, nkey,
3166                                            vlen,
3167                                            req->message.body.flags,
3168                                            expiration);
3169         if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3170                                                                         c, it, &info)) {
3171             settings.engine.v1->release(settings.engine.v0, c, it);
3172             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3173             return;
3174         }
3175     }
3176 
3177     switch (ret) {
3178     case ENGINE_SUCCESS:
3179         item_set_cas(c, it, c->binary_header.request.cas);
3180 
3181         switch (c->cmd) {
3182         case PROTOCOL_BINARY_CMD_ADD:
3183             c->store_op = OPERATION_ADD;
3184             break;
3185         case PROTOCOL_BINARY_CMD_SET:
3186             c->store_op = OPERATION_SET;
3187             break;
3188         case PROTOCOL_BINARY_CMD_REPLACE:
3189             c->store_op = OPERATION_REPLACE;
3190             break;
3191         default:
3192             assert(0);
3193         }
3194 
3195         if (c->binary_header.request.cas != 0) {
3196             c->store_op = OPERATION_CAS;
3197         }
3198 
3199         c->item = it;
3200         c->ritem = info.value[0].iov_base;
3201         c->rlbytes = vlen;
3202         conn_set_state(c, conn_nread);
3203         c->substate = bin_read_set_value;
3204         break;
3205     case ENGINE_EWOULDBLOCK:
3206         c->ewouldblock = true;
3207         break;
3208     case ENGINE_DISCONNECT:
3209         c->state = conn_closing;
3210         break;
3211     default:
3212         if (ret == ENGINE_E2BIG) {
3213             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3214         } else {
3215             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3216         }
3217 
3218         /*
3219          * Avoid stale data persisting in cache because we failed alloc.
3220          * Unacceptable for SET (but only if cas matches).
3221          * Anywhere else too?
3222          */
3223         if (c->cmd == PROTOCOL_BINARY_CMD_SET) {
3224             /* @todo fix this for the ASYNC interface! */
3225             settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3226                                        ntohll(req->message.header.request.cas),
3227                                        c->binary_header.request.vbucket);
3228         }
3229 
3230         /* swallow the data line */
3231         c->write_and_go = conn_swallow;
3232     }
3233 }
3234 
process_bin_append_prepend(conn * c)3235 static void process_bin_append_prepend(conn *c) {
3236     char *key;
3237     int nkey;
3238     int vlen;
3239     item *it = NULL;
3240 
3241     assert(c != NULL);
3242 
3243     key = binary_get_key(c);
3244     nkey = c->binary_header.request.keylen;
3245     vlen = c->binary_header.request.bodylen - nkey;
3246 
3247     if (settings.verbose > 1) {
3248         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3249                                         "Value len is %d\n", vlen);
3250     }
3251 
3252     if (settings.detail_enabled) {
3253         stats_prefix_record_set(key, nkey);
3254     }
3255 
3256     ENGINE_ERROR_CODE ret = c->aiostat;
3257     c->aiostat = ENGINE_SUCCESS;
3258     c->ewouldblock = false;
3259     item_info info = { .nvalue = 1 };
3260 
3261     if (ret == ENGINE_SUCCESS) {
3262         ret = settings.engine.v1->allocate(settings.engine.v0, c,
3263                                            &it, key, nkey,
3264                                            vlen, 0, 0);
3265         if (ret == ENGINE_SUCCESS && !settings.engine.v1->get_item_info(settings.engine.v0,
3266                                                                         c, it, &info)) {
3267             settings.engine.v1->release(settings.engine.v0, c, it);
3268             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINTERNAL, 0);
3269             return;
3270         }
3271     }
3272 
3273     switch (ret) {
3274     case ENGINE_SUCCESS:
3275         item_set_cas(c, it, c->binary_header.request.cas);
3276 
3277         switch (c->cmd) {
3278         case PROTOCOL_BINARY_CMD_APPEND:
3279             c->store_op = OPERATION_APPEND;
3280             break;
3281         case PROTOCOL_BINARY_CMD_PREPEND:
3282             c->store_op = OPERATION_PREPEND;
3283             break;
3284         default:
3285             assert(0);
3286         }
3287 
3288         c->item = it;
3289         c->ritem = info.value[0].iov_base;
3290         c->rlbytes = vlen;
3291         conn_set_state(c, conn_nread);
3292         c->substate = bin_read_set_value;
3293         break;
3294     case ENGINE_EWOULDBLOCK:
3295         c->ewouldblock = true;
3296         break;
3297     case ENGINE_DISCONNECT:
3298         c->state = conn_closing;
3299         break;
3300     default:
3301         if (ret == ENGINE_E2BIG) {
3302             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_E2BIG, vlen);
3303         } else {
3304             write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_ENOMEM, vlen);
3305         }
3306         /* swallow the data line */
3307         c->write_and_go = conn_swallow;
3308     }
3309 }
3310 
process_bin_flush(conn * c)3311 static void process_bin_flush(conn *c) {
3312     time_t exptime = 0;
3313     protocol_binary_request_flush* req = binary_get_request(c);
3314 
3315     if (c->binary_header.request.extlen == sizeof(req->message.body)) {
3316         exptime = ntohl(req->message.body.expiration);
3317     }
3318 
3319     if (settings.verbose > 1) {
3320         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
3321                                         "%d: flush %ld", c->sfd,
3322                                         (long)exptime);
3323     }
3324 
3325     ENGINE_ERROR_CODE ret;
3326     ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
3327 
3328     if (ret == ENGINE_SUCCESS) {
3329         write_bin_response(c, NULL, 0, 0, 0);
3330     } else if (ret == ENGINE_ENOTSUP) {
3331         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_SUPPORTED, 0);
3332     } else {
3333         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3334     }
3335     STATS_NOKEY(c, cmd_flush);
3336 }
3337 
process_bin_delete(conn * c)3338 static void process_bin_delete(conn *c) {
3339     protocol_binary_request_delete* req = binary_get_request(c);
3340 
3341     char* key = binary_get_key(c);
3342     size_t nkey = c->binary_header.request.keylen;
3343 
3344     assert(c != NULL);
3345 
3346     if (settings.verbose > 1) {
3347         char buffer[1024];
3348         if (key_to_printable_buffer(buffer, sizeof(buffer), c->sfd, true,
3349                                     "DELETE", key, nkey) != -1) {
3350             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c, "%s\n",
3351                                             buffer);
3352         }
3353     }
3354 
3355     ENGINE_ERROR_CODE ret = c->aiostat;
3356     c->aiostat = ENGINE_SUCCESS;
3357     c->ewouldblock = false;
3358 
3359     if (ret == ENGINE_SUCCESS) {
3360         if (settings.detail_enabled) {
3361             stats_prefix_record_delete(key, nkey);
3362         }
3363         ret = settings.engine.v1->remove(settings.engine.v0, c, key, nkey,
3364                                          ntohll(req->message.header.request.cas),
3365                                          c->binary_header.request.vbucket);
3366     }
3367 
3368     /* For some reason the SLAB_INCR tries to access this... */
3369     item_info info = { .nvalue = 1 };
3370     switch (ret) {
3371     case ENGINE_SUCCESS:
3372         write_bin_response(c, NULL, 0, 0, 0);
3373         SLAB_INCR(c, delete_hits, key, nkey);
3374         break;
3375     case ENGINE_KEY_EEXISTS:
3376         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_EEXISTS, 0);
3377         break;
3378     case ENGINE_KEY_ENOENT:
3379         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_KEY_ENOENT, 0);
3380         STATS_INCR(c, delete_misses, key, nkey);
3381         break;
3382     case ENGINE_NOT_MY_VBUCKET:
3383         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_NOT_MY_VBUCKET, 0);
3384         break;
3385     case ENGINE_EWOULDBLOCK:
3386         c->ewouldblock = true;
3387         break;
3388     default:
3389         write_bin_packet(c, PROTOCOL_BINARY_RESPONSE_EINVAL, 0);
3390     }
3391 }
3392 
complete_nread_binary(conn * c)3393 static void complete_nread_binary(conn *c) {
3394     assert(c != NULL);
3395     assert(c->cmd >= 0);
3396 
3397     switch(c->substate) {
3398     case bin_reading_set_header:
3399         if (c->cmd == PROTOCOL_BINARY_CMD_APPEND ||
3400                 c->cmd == PROTOCOL_BINARY_CMD_PREPEND) {
3401             process_bin_append_prepend(c);
3402         } else {
3403             process_bin_update(c);
3404         }
3405         break;
3406     case bin_read_set_value:
3407         complete_update_bin(c);
3408         break;
3409     case bin_reading_get_key:
3410         process_bin_get(c);
3411         break;
3412     case bin_reading_stat:
3413         process_bin_stat(c);
3414         break;
3415     case bin_reading_del_header:
3416         process_bin_delete(c);
3417         break;
3418     case bin_reading_incr_header:
3419         complete_incr_bin(c);
3420         break;
3421     case bin_read_flush_exptime:
3422         process_bin_flush(c);
3423         break;
3424     case bin_reading_sasl_auth:
3425         process_bin_sasl_auth(c);
3426         break;
3427     case bin_reading_sasl_auth_data:
3428         process_bin_complete_sasl_auth(c);
3429         break;
3430     case bin_reading_packet:
3431         if (c->binary_header.request.magic == PROTOCOL_BINARY_RES) {
3432             RESPONSE_HANDLER handler;
3433             handler = response_handlers[c->binary_header.request.opcode];
3434             if (handler) {
3435                 handler(c);
3436             } else {
3437                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3438                        "%d: ERROR: Unsupported response packet received: %u\n",
3439                         c->sfd, (unsigned int)c->binary_header.request.opcode);
3440                 conn_set_state(c, conn_closing);
3441             }
3442         } else {
3443             process_bin_packet(c);
3444         }
3445         break;
3446     default:
3447         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
3448                 "Not handling substate %d\n", c->substate);
3449         abort();
3450     }
3451 }
3452 
reset_cmd_handler(conn * c)3453 static void reset_cmd_handler(conn *c) {
3454     c->sbytes = 0;
3455     c->ascii_cmd = NULL;
3456     c->cmd = -1;
3457     c->substate = bin_no_state;
3458     if(c->item != NULL) {
3459         settings.engine.v1->release(settings.engine.v0, c, c->item);
3460         c->item = NULL;
3461     }
3462     conn_shrink(c);
3463     if (c->rbytes > 0) {
3464         conn_set_state(c, conn_parse_cmd);
3465     } else {
3466         conn_set_state(c, conn_waiting);
3467     }
3468 }
3469 
ascii_response_handler(const void * cookie,int nbytes,const char * dta)3470 static ENGINE_ERROR_CODE ascii_response_handler(const void *cookie,
3471                                                 int nbytes,
3472                                                 const char *dta)
3473 {
3474     conn *c = (conn*)cookie;
3475     if (!grow_dynamic_buffer(c, nbytes)) {
3476         if (settings.verbose > 0) {
3477             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
3478                     "<%d ERROR: Failed to allocate memory for response\n",
3479                     c->sfd);
3480         }
3481         return ENGINE_ENOMEM;
3482     }
3483 
3484     char *buf = c->dynamic_buffer.buffer + c->dynamic_buffer.offset;
3485     memcpy(buf, dta, nbytes);
3486     c->dynamic_buffer.offset += nbytes;
3487 
3488     return ENGINE_SUCCESS;
3489 }
3490 
complete_nread_ascii(conn * c)3491 static void complete_nread_ascii(conn *c) {
3492     if (c->ascii_cmd != NULL) {
3493         c->ewouldblock = false;
3494         switch (c->ascii_cmd->execute(c->ascii_cmd->cookie, c, 0, NULL,
3495                                       ascii_response_handler)) {
3496         case ENGINE_SUCCESS:
3497             if (c->dynamic_buffer.buffer != NULL) {
3498                 write_and_free(c, c->dynamic_buffer.buffer,
3499                                c->dynamic_buffer.offset);
3500                 c->dynamic_buffer.buffer = NULL;
3501             } else {
3502                 conn_set_state(c, conn_new_cmd);
3503             }
3504             break;
3505         case ENGINE_EWOULDBLOCK:
3506             c->ewouldblock = true;
3507             break;
3508         case ENGINE_DISCONNECT:
3509         default:
3510             conn_set_state(c, conn_closing);
3511         }
3512     } else {
3513         complete_update_ascii(c);
3514     }
3515 }
3516 
complete_nread(conn * c)3517 static void complete_nread(conn *c) {
3518     assert(c != NULL);
3519     assert(c->protocol == ascii_prot
3520            || c->protocol == binary_prot);
3521 
3522     if (c->protocol == ascii_prot) {
3523         complete_nread_ascii(c);
3524     } else if (c->protocol == binary_prot) {
3525         complete_nread_binary(c);
3526     }
3527 }
3528 
3529 #define COMMAND_TOKEN 0
3530 #define SUBCOMMAND_TOKEN 1
3531 #define KEY_TOKEN 1
3532 
3533 #define MAX_TOKENS 30
3534 
3535 /*
3536  * Tokenize the command string by replacing whitespace with '\0' and update
3537  * the token array tokens with pointer to start of each token and length.
3538  * Returns total number of tokens.  The last valid token is the terminal
3539  * token (value points to the first unprocessed character of the string and
3540  * length zero).
3541  *
3542  * Usage example:
3543  *
3544  *  while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) {
3545  *      for(int ix = 0; tokens[ix].length != 0; ix++) {
3546  *          ...
3547  *      }
3548  *      ncommand = tokens[ix].value - command;
3549  *      command  = tokens[ix].value;
3550  *   }
3551  */
tokenize_command(char * command,token_t * tokens,const size_t max_tokens)3552 static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) {
3553     char *s, *e;
3554     size_t ntokens = 0;
3555 
3556     assert(command != NULL && tokens != NULL && max_tokens > 1);
3557 
3558     for (s = e = command; ntokens < max_tokens - 1; ++e) {
3559         if (*e == ' ') {
3560             if (s != e) {
3561                 tokens[ntokens].value = s;
3562                 tokens[ntokens].length = e - s;
3563                 ntokens++;
3564                 *e = '\0';
3565             }
3566             s = e + 1;
3567         }
3568         else if (*e == '\0') {
3569             if (s != e) {
3570                 tokens[ntokens].value = s;
3571                 tokens[ntokens].length = e - s;
3572                 ntokens++;
3573             }
3574 
3575             break; /* string end */
3576         }
3577     }
3578 
3579     /*
3580      * If we scanned the whole string, the terminal value pointer is null,
3581      * otherwise it is the first unprocessed character.
3582      */
3583     tokens[ntokens].value =  *e == '\0' ? NULL : e;
3584     tokens[ntokens].length = 0;
3585     ntokens++;
3586 
3587     return ntokens;
3588 }
3589 
3590 #ifdef INNODB_MEMCACHED
detokenize(token_t * tokens,size_t ntokens,char ** out,int * nbytes)3591 static void detokenize(token_t *tokens, size_t ntokens, char **out, int *nbytes)
3592 #else
3593 static void detokenize(token_t *tokens, int ntokens, char **out, int *nbytes)
3594 #endif
3595 {
3596     int i, nb;
3597     char *buf, *p;
3598 
3599     nb = ntokens; // account for spaces, which is ntokens-1, plus the null
3600     for (i = 0; i < ntokens; ++i) {
3601         nb += tokens[i].length;
3602     }
3603 
3604     buf = malloc(nb * sizeof(char));
3605     if (buf != NULL) {
3606         p = buf;
3607         for (i = 0; i < ntokens; ++i) {
3608             memcpy(p, tokens[i].value, tokens[i].length);
3609             p += tokens[i].length;
3610             *p = ' ';
3611             p++;
3612         }
3613         buf[nb - 1] = '\0';
3614         *nbytes = nb - 1;
3615         *out = buf;
3616     }
3617 }
3618 
3619 
3620 /* set up a connection to write a buffer then free it, used for stats */
write_and_free(conn * c,char * buf,int bytes)3621 static void write_and_free(conn *c, char *buf, int bytes) {
3622     if (buf) {
3623         c->write_and_free = buf;
3624         c->wcurr = buf;
3625         c->wbytes = bytes;
3626         conn_set_state(c, conn_write);
3627         c->write_and_go = conn_new_cmd;
3628     } else {
3629         out_string(c, "SERVER_ERROR out of memory writing stats");
3630     }
3631 }
3632 
set_noreply_maybe(conn * c,token_t * tokens,size_t ntokens)3633 static inline bool set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens)
3634 {
3635     int noreply_index = ntokens - 2;
3636 
3637     /*
3638       NOTE: this function is not the first place where we are going to
3639       send the reply.  We could send it instead from process_command()
3640       if the request line has wrong number of tokens.  However parsing
3641       malformed line for "noreply" option is not reliable anyway, so
3642       it can't be helped.
3643     */
3644     if (tokens[noreply_index].value
3645         && strcmp(tokens[noreply_index].value, "noreply") == 0) {
3646         c->noreply = true;
3647     }
3648     return c->noreply;
3649 }
3650 
append_stat(const char * name,ADD_STAT add_stats,conn * c,const char * fmt,...)3651 void append_stat(const char *name, ADD_STAT add_stats, conn *c,
3652                  const char *fmt, ...) {
3653     char val_str[STAT_VAL_LEN];
3654     int vlen;
3655     va_list ap;
3656 
3657     assert(name);
3658     assert(add_stats);
3659     assert(c);
3660     assert(fmt);
3661 
3662     va_start(ap, fmt);
3663     vlen = vsnprintf(val_str, sizeof(val_str) - 1, fmt, ap);
3664     va_end(ap);
3665 
3666     add_stats(name, strlen(name), val_str, vlen, c);
3667 }
3668 
process_stats_detail(conn * c,const char * command)3669 inline static void process_stats_detail(conn *c, const char *command) {
3670     assert(c != NULL);
3671 
3672     if (settings.allow_detailed) {
3673         if (strcmp(command, "on") == 0) {
3674             settings.detail_enabled = 1;
3675             out_string(c, "OK");
3676         }
3677         else if (strcmp(command, "off") == 0) {
3678             settings.detail_enabled = 0;
3679             out_string(c, "OK");
3680         }
3681         else if (strcmp(command, "dump") == 0) {
3682             int len;
3683             char *stats = stats_prefix_dump(&len);
3684             write_and_free(c, stats, len);
3685         }
3686         else {
3687             out_string(c, "CLIENT_ERROR usage: stats detail on|off|dump");
3688         }
3689     }
3690     else {
3691         out_string(c, "CLIENT_ERROR detailed stats disabled");
3692     }
3693 }
3694 
aggregate_callback(void * in,void * out)3695 static void aggregate_callback(void *in, void *out) {
3696     struct thread_stats *out_thread_stats = out;
3697     struct independent_stats *in_independent_stats = in;
3698     threadlocal_stats_aggregate(in_independent_stats->thread_stats,
3699                                 out_thread_stats);
3700 }
3701 
3702 /* return server specific stats only */
server_stats(ADD_STAT add_stats,conn * c,bool aggregate)3703 static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate) {
3704     pid_t pid = getpid();
3705     rel_time_t now = current_time;
3706 
3707     struct thread_stats thread_stats;
3708     threadlocal_stats_clear(&thread_stats);
3709 
3710     if (aggregate && settings.engine.v1->aggregate_stats != NULL) {
3711         settings.engine.v1->aggregate_stats(settings.engine.v0,
3712                                             (const void *)c,
3713                                             aggregate_callback,
3714                                             &thread_stats);
3715     } else {
3716         threadlocal_stats_aggregate(get_independent_stats(c)->thread_stats,
3717                                     &thread_stats);
3718     }
3719 
3720     struct slab_stats slab_stats;
3721     slab_stats_aggregate(&thread_stats, &slab_stats);
3722 
3723 #ifndef __WIN32__
3724     struct rusage usage;
3725     getrusage(RUSAGE_SELF, &usage);
3726 #endif
3727 
3728     STATS_LOCK();
3729 
3730     APPEND_STAT("pid", "%lu", (long)pid);
3731     APPEND_STAT("uptime", "%u", now);
3732     APPEND_STAT("time", "%ld", now + (long)process_started);
3733     APPEND_STAT("version", "%s", VERSION);
3734     APPEND_STAT("libevent", "%s", event_get_version());
3735     APPEND_STAT("pointer_size", "%d", (int)(8 * sizeof(void *)));
3736 
3737 #ifndef __WIN32__
3738     append_stat("rusage_user", add_stats, c, "%ld.%06ld",
3739                 (long)usage.ru_utime.tv_sec,
3740                 (long)usage.ru_utime.tv_usec);
3741     append_stat("rusage_system", add_stats, c, "%ld.%06ld",
3742                 (long)usage.ru_stime.tv_sec,
3743                 (long)usage.ru_stime.tv_usec);
3744 #endif
3745 
3746     APPEND_STAT("daemon_connections", "%u", stats.daemon_conns);
3747     APPEND_STAT("curr_connections", "%u", stats.curr_conns);
3748     APPEND_STAT("total_connections", "%u", stats.total_conns);
3749     APPEND_STAT("connection_structures", "%u", stats.conn_structs);
3750     APPEND_STAT("cmd_get", "%"PRIu64, thread_stats.cmd_get);
3751     APPEND_STAT("cmd_set", "%"PRIu64, slab_stats.cmd_set);
3752     APPEND_STAT("cmd_flush", "%"PRIu64, thread_stats.cmd_flush);
3753     APPEND_STAT("auth_cmds", "%"PRIu64, thread_stats.auth_cmds);
3754     APPEND_STAT("auth_errors", "%"PRIu64, thread_stats.auth_errors);
3755     APPEND_STAT("get_hits", "%"PRIu64, slab_stats.get_hits);
3756     APPEND_STAT("get_misses", "%"PRIu64, thread_stats.get_misses);
3757     APPEND_STAT("delete_misses", "%"PRIu64, thread_stats.delete_misses);
3758     APPEND_STAT("delete_hits", "%"PRIu64, slab_stats.delete_hits);
3759     APPEND_STAT("incr_misses", "%"PRIu64, thread_stats.incr_misses);
3760     APPEND_STAT("incr_hits", "%"PRIu64, thread_stats.incr_hits);
3761     APPEND_STAT("decr_misses", "%"PRIu64, thread_stats.decr_misses);
3762     APPEND_STAT("decr_hits", "%"PRIu64, thread_stats.decr_hits);
3763     APPEND_STAT("cas_misses", "%"PRIu64, thread_stats.cas_misses);
3764     APPEND_STAT("cas_hits", "%"PRIu64, slab_stats.cas_hits);
3765     APPEND_STAT("cas_badval", "%"PRIu64, slab_stats.cas_badval);
3766     APPEND_STAT("bytes_read", "%"PRIu64, thread_stats.bytes_read);
3767     APPEND_STAT("bytes_written", "%"PRIu64, thread_stats.bytes_written);
3768     APPEND_STAT("limit_maxbytes", "%"PRIu64, settings.maxbytes);
3769     APPEND_STAT("accepting_conns", "%u",  is_listen_disabled() ? 0 : 1);
3770     APPEND_STAT("listen_disabled_num", "%"PRIu64, get_listen_disabled_num());
3771     APPEND_STAT("rejected_conns", "%" PRIu64, (unsigned long long)stats.rejected_conns);
3772     APPEND_STAT("threads", "%d", settings.num_threads);
3773     APPEND_STAT("conn_yields", "%" PRIu64, (unsigned long long)thread_stats.conn_yields);
3774     STATS_UNLOCK();
3775 
3776     /*
3777      * Add tap stats (only if non-zero)
3778      */
3779     struct tap_stats ts;
3780     pthread_mutex_lock(&tap_stats.mutex);
3781     ts = tap_stats;
3782     pthread_mutex_unlock(&tap_stats.mutex);
3783 
3784     if (ts.sent.connect) {
3785         APPEND_STAT("tap_connect_sent", "%"PRIu64, ts.sent.connect);
3786     }
3787     if (ts.sent.mutation) {
3788         APPEND_STAT("tap_mutation_sent", "%"PRIu64, ts.sent.mutation);
3789     }
3790     if (ts.sent.checkpoint_start) {
3791         APPEND_STAT("tap_checkpoint_start_sent", "%"PRIu64, ts.sent.checkpoint_start);
3792     }
3793     if (ts.sent.checkpoint_end) {
3794         APPEND_STAT("tap_checkpoint_end_sent", "%"PRIu64, ts.sent.checkpoint_end);
3795     }
3796     if (ts.sent.delete) {
3797         APPEND_STAT("tap_delete_sent", "%"PRIu64, ts.sent.delete);
3798     }
3799     if (ts.sent.flush) {
3800         APPEND_STAT("tap_flush_sent", "%"PRIu64, ts.sent.flush);
3801     }
3802     if (ts.sent.opaque) {
3803         APPEND_STAT("tap_opaque_sent", "%"PRIu64, ts.sent.opaque);
3804     }
3805     if (ts.sent.vbucket_set) {
3806         APPEND_STAT("tap_vbucket_set_sent", "%"PRIu64,
3807                     ts.sent.vbucket_set);
3808     }
3809     if (ts.received.connect) {
3810         APPEND_STAT("tap_connect_received", "%"PRIu64, ts.received.connect);
3811     }
3812     if (ts.received.mutation) {
3813         APPEND_STAT("tap_mutation_received", "%"PRIu64, ts.received.mutation);
3814     }
3815     if (ts.received.checkpoint_start) {
3816         APPEND_STAT("tap_checkpoint_start_received", "%"PRIu64, ts.received.checkpoint_start);
3817     }
3818     if (ts.received.checkpoint_end) {
3819         APPEND_STAT("tap_checkpoint_end_received", "%"PRIu64, ts.received.checkpoint_end);
3820     }
3821     if (ts.received.delete) {
3822         APPEND_STAT("tap_delete_received", "%"PRIu64, ts.received.delete);
3823     }
3824     if (ts.received.flush) {
3825         APPEND_STAT("tap_flush_received", "%"PRIu64, ts.received.flush);
3826     }
3827     if (ts.received.opaque) {
3828         APPEND_STAT("tap_opaque_received", "%"PRIu64, ts.received.opaque);
3829     }
3830     if (ts.received.vbucket_set) {
3831         APPEND_STAT("tap_vbucket_set_received", "%"PRIu64,
3832                     ts.received.vbucket_set);
3833     }
3834 }
3835 
process_stat_settings(ADD_STAT add_stats,void * c)3836 static void process_stat_settings(ADD_STAT add_stats, void *c) {
3837     assert(add_stats);
3838     APPEND_STAT("maxbytes", "%u", (unsigned int)settings.maxbytes);
3839     APPEND_STAT("maxconns", "%d", settings.maxconns);
3840     APPEND_STAT("tcpport", "%d", settings.port);
3841     APPEND_STAT("udpport", "%d", settings.udpport);
3842     APPEND_STAT("inter", "%s", settings.inter ? settings.inter : "NULL");
3843     APPEND_STAT("verbosity", "%d", settings.verbose);
3844     APPEND_STAT("oldest", "%lu", (unsigned long)settings.oldest_live);
3845     APPEND_STAT("evictions", "%s", settings.evict_to_free ? "on" : "off");
3846     APPEND_STAT("domain_socket", "%s",
3847                 settings.socketpath ? settings.socketpath : "NULL");
3848     APPEND_STAT("umask", "%o", settings.access);
3849     APPEND_STAT("growth_factor", "%.2f", settings.factor);
3850     APPEND_STAT("chunk_size", "%d", settings.chunk_size);
3851     APPEND_STAT("num_threads", "%d", settings.num_threads);
3852     APPEND_STAT("num_threads_per_udp", "%d", settings.num_threads_per_udp);
3853     APPEND_STAT("stat_key_prefix", "%c", settings.prefix_delimiter);
3854     APPEND_STAT("detail_enabled", "%s",
3855                 settings.detail_enabled ? "yes" : "no");
3856     APPEND_STAT("allow_detailed", "%s",
3857                 settings.allow_detailed ? "yes" : "no");
3858     APPEND_STAT("reqs_per_event", "%d", settings.reqs_per_event);
3859     APPEND_STAT("reqs_per_tap_event", "%d", settings.reqs_per_tap_event);
3860     APPEND_STAT("cas_enabled", "%s", settings.use_cas ? "yes" : "no");
3861     APPEND_STAT("tcp_backlog", "%d", settings.backlog);
3862     APPEND_STAT("binding_protocol", "%s",
3863                 prot_text(settings.binding_protocol));
3864 #ifdef SASL_ENABLED
3865     APPEND_STAT("auth_enabled_sasl", "%s", "yes");
3866 #else
3867     APPEND_STAT("auth_enabled_sasl", "%s", "no");
3868 #endif
3869 
3870 #ifdef ENABLE_ISASL
3871     APPEND_STAT("auth_sasl_engine", "%s", "isasl");
3872 #elif defined(ENABLE_SASL)
3873     APPEND_STAT("auth_sasl_engine", "%s", "cyrus");
3874 #else
3875     APPEND_STAT("auth_sasl_engine", "%s", "none");
3876 #endif
3877     APPEND_STAT("auth_required_sasl", "%s", settings.require_sasl ? "yes" : "no");
3878     APPEND_STAT("item_size_max", "%d", settings.item_size_max);
3879     APPEND_STAT("topkeys", "%d", settings.topkeys);
3880 
3881     for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
3882          ptr != NULL;
3883          ptr = ptr->next) {
3884         APPEND_STAT("extension", "%s", ptr->get_name());
3885     }
3886 
3887     APPEND_STAT("logger", "%s", settings.extensions.logger->get_name());
3888 
3889     for (EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
3890          ptr != NULL;
3891          ptr = ptr->next) {
3892         APPEND_STAT("ascii_extension", "%s", ptr->get_name(ptr->cookie));
3893     }
3894 }
3895 
process_stat(conn * c,token_t * tokens,const size_t ntokens)3896 static char *process_stat(conn *c, token_t *tokens, const size_t ntokens) {
3897     const char *subcommand = tokens[SUBCOMMAND_TOKEN].value;
3898     c->dynamic_buffer.offset = 0;
3899 
3900     if (ntokens == 2) {
3901         ENGINE_ERROR_CODE ret = c->aiostat;
3902         c->aiostat = ENGINE_SUCCESS;
3903         c->ewouldblock = false;
3904         if (ret == ENGINE_SUCCESS) {
3905             server_stats(&append_stats, c, false);
3906             ret = settings.engine.v1->get_stats(settings.engine.v0, c,
3907                                                 NULL, 0, &append_stats);
3908             if (ret == ENGINE_EWOULDBLOCK) {
3909                 c->ewouldblock = true;
3910                 return c->rcurr + 5;
3911             }
3912         }
3913     } else if (strcmp(subcommand, "reset") == 0) {
3914         stats_reset(c);
3915         out_string(c, "RESET");
3916         return NULL;
3917     } else if (strcmp(subcommand, "detail") == 0) {
3918         /* NOTE: how to tackle detail with binary? */
3919         if (ntokens < 4) {
3920             process_stats_detail(c, "");  /* outputs the error message */
3921         } else {
3922             process_stats_detail(c, tokens[2].value);
3923         }
3924         /* Output already generated */
3925         return NULL;
3926     } else if (strcmp(subcommand, "settings") == 0) {
3927         process_stat_settings(&append_stats, c);
3928     } else if (strcmp(subcommand, "cachedump") == 0) {
3929         char *buf = NULL;
3930         unsigned int bytes = 0, id, limit = 0;
3931 
3932         if (ntokens < 5) {
3933             out_string(c, "CLIENT_ERROR bad command line");
3934             return NULL;
3935         }
3936 
3937         if (!safe_strtoul(tokens[2].value, &id) ||
3938             !safe_strtoul(tokens[3].value, &limit)) {
3939             out_string(c, "CLIENT_ERROR bad command line format");
3940             return NULL;
3941         }
3942 
3943         if (id >= POWER_LARGEST) {
3944             out_string(c, "CLIENT_ERROR Illegal slab id");
3945             return NULL;
3946         }
3947 
3948 #ifdef FUTURE
3949         buf = item_cachedump(id, limit, &bytes);
3950 #endif
3951         write_and_free(c, buf, bytes);
3952         return NULL;
3953     } else if (strcmp(subcommand, "aggregate") == 0) {
3954         server_stats(&append_stats, c, true);
3955     } else if (strcmp(subcommand, "topkeys") == 0) {
3956         topkeys_t *tk = get_independent_stats(c)->topkeys;
3957         if (tk != NULL) {
3958             topkeys_stats(tk, c, current_time, append_stats);
3959         } else {
3960             out_string(c, "ERROR");
3961             return NULL;
3962         }
3963     } else {
3964         /* getting here means that the subcommand is either engine specific or
3965            is invalid. query the engine and see. */
3966         ENGINE_ERROR_CODE ret = c->aiostat;
3967         c->aiostat = ENGINE_SUCCESS;
3968         c->ewouldblock = false;
3969         if (ret == ENGINE_SUCCESS) {
3970             char *buf = NULL;
3971             int nb = -1;
3972             detokenize(&tokens[1], ntokens - 2, &buf, &nb);
3973             ret = settings.engine.v1->get_stats(settings.engine.v0, c, buf,
3974                                                 nb, append_stats);
3975             free(buf);
3976         }
3977 
3978         switch (ret) {
3979         case ENGINE_SUCCESS:
3980             append_stats(NULL, 0, NULL, 0, c);
3981             write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
3982             c->dynamic_buffer.buffer = NULL;
3983             break;
3984         case ENGINE_ENOMEM:
3985             out_string(c, "SERVER_ERROR out of memory writing stats");
3986             break;
3987         case ENGINE_DISCONNECT:
3988             c->state = conn_closing;
3989             break;
3990         case ENGINE_ENOTSUP:
3991             out_string(c, "SERVER_ERROR not supported");
3992             break;
3993         case ENGINE_EWOULDBLOCK:
3994             c->ewouldblock = true;
3995             return tokens[SUBCOMMAND_TOKEN].value;
3996         default:
3997             out_string(c, "ERROR");
3998             break;
3999         }
4000 
4001         return NULL;
4002     }
4003 
4004     /* append terminator and start the transfer */
4005     append_stats(NULL, 0, NULL, 0, c);
4006 
4007     if (c->dynamic_buffer.buffer == NULL) {
4008         out_string(c, "SERVER_ERROR out of memory writing stats");
4009     } else {
4010         write_and_free(c, c->dynamic_buffer.buffer, c->dynamic_buffer.offset);
4011         c->dynamic_buffer.buffer = NULL;
4012     }
4013 
4014     return NULL;
4015 }
4016 
4017 /**
4018  * Get a suffix buffer and insert it into the list of used suffix buffers
4019  * @param c the connection object
4020  * @return a pointer to a new suffix buffer or NULL if allocation failed
4021  */
get_suffix_buffer(conn * c)4022 static char *get_suffix_buffer(conn *c) {
4023     if (c->suffixleft == c->suffixsize) {
4024         char **new_suffix_list;
4025         size_t sz = sizeof(char*) * c->suffixsize * 2;
4026 
4027         new_suffix_list = realloc(c->suffixlist, sz);
4028         if (new_suffix_list) {
4029             c->suffixsize *= 2;
4030             c->suffixlist = new_suffix_list;
4031         } else {
4032             if (settings.verbose > 1) {
4033                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4034                         "=%d Failed to resize suffix buffer\n", c->sfd);
4035             }
4036 
4037             return NULL;
4038         }
4039     }
4040 
4041     char *suffix = cache_alloc(c->thread->suffix_cache);
4042     if (suffix != NULL) {
4043         *(c->suffixlist + c->suffixleft) = suffix;
4044         ++c->suffixleft;
4045     }
4046 
4047     return suffix;
4048 }
4049 
4050 /* ntokens is overwritten here... shrug.. */
process_get_command(conn * c,token_t * tokens,size_t ntokens,bool return_cas)4051 static inline char* process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) {
4052     char *key;
4053     size_t nkey;
4054     int i = c->ileft;
4055     item *it = NULL;
4056     token_t *key_token = &tokens[KEY_TOKEN];
4057     int range = false;
4058     assert(c != NULL);
4059 
4060     do {
4061         while(key_token->length != 0) {
4062             /* whether there are more keys to fetch */
4063             bool next_get = (key_token + 1)->value;
4064 
4065             key = key_token->value;
4066             nkey = key_token->length;
4067 
4068             /* whether this is a range search */
4069             if (nkey >=  2 && key[0] == '@'
4070 		&& (key[1] == '>' || key[1] == '<')) {
4071 		range = true;
4072             }
4073 
4074             if(nkey > KEY_MAX_LENGTH) {
4075                 out_string(c, "CLIENT_ERROR bad command line format");
4076                 return NULL;
4077             }
4078 
4079             ENGINE_ERROR_CODE ret = c->aiostat;
4080             c->aiostat = ENGINE_SUCCESS;
4081 
4082             if (ret == ENGINE_SUCCESS) {
4083                 ret = settings.engine.v1->get(settings.engine.v0, c, &it,
4084 					      key, nkey, next_get);
4085             }
4086 
4087             switch (ret) {
4088             case ENGINE_EWOULDBLOCK:
4089                 c->ewouldblock = true;
4090                 c->ileft = i;
4091                 return key;
4092 
4093             case ENGINE_SUCCESS:
4094                 break;
4095             case ENGINE_KEY_ENOENT:
4096             default:
4097                 it = NULL;
4098                 break;
4099             }
4100 
4101             if (settings.detail_enabled) {
4102                 stats_prefix_record_get(key, nkey, NULL != it);
4103             }
4104 
4105             if (it) {
4106                 item_info info = { .nvalue = 1 };
4107                 if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it,
4108                                                        &info)) {
4109                     settings.engine.v1->release(settings.engine.v0, c, it);
4110                     out_string(c, "SERVER_ERROR error getting item data");
4111                     break;
4112                 }
4113 
4114                 if (i >= c->isize) {
4115                     item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2);
4116                     if (new_list) {
4117                         c->isize *= 2;
4118                         c->ilist = new_list;
4119                     } else {
4120                         settings.engine.v1->release(settings.engine.v0, c, it);
4121                         break;
4122                     }
4123                 }
4124 
4125                 /* Rebuild the suffix */
4126                 char *suffix = get_suffix_buffer(c);
4127                 if (suffix == NULL) {
4128                     out_string(c, "SERVER_ERROR out of memory rebuilding suffix");
4129                     settings.engine.v1->release(settings.engine.v0, c, it);
4130                     return NULL;
4131                 }
4132                 int suffix_len = snprintf(suffix, SUFFIX_SIZE,
4133                                           " %u %u\r\n", htonl(info.flags),
4134                                           info.nbytes);
4135 
4136                 /*
4137                  * Construct the response. Each hit adds three elements to the
4138                  * outgoing data list:
4139                  *   "VALUE "
4140                  *   key
4141                  *   " " + flags + " " + data length + "\r\n" + data (with \r\n)
4142                  */
4143 
4144                 MEMCACHED_COMMAND_GET(c->sfd, info.key, info.nkey,
4145                                       info.nbytes, info.cas);
4146                 if (return_cas)
4147                 {
4148 
4149                   char *cas = get_suffix_buffer(c);
4150                   if (cas == NULL) {
4151                     out_string(c, "SERVER_ERROR out of memory making CAS suffix");
4152                     settings.engine.v1->release(settings.engine.v0, c, it);
4153                     return NULL;
4154                   }
4155                   int cas_len = snprintf(cas, SUFFIX_SIZE, " %"PRIu64"\r\n",
4156                                          info.cas);
4157                   if (add_iov(c, "VALUE ", 6) != 0 ||
4158                       add_iov(c, info.key, info.nkey) != 0 ||
4159                       add_iov(c, suffix, suffix_len - 2) != 0 ||
4160                       add_iov(c, cas, cas_len) != 0 ||
4161                       add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4162                       add_iov(c, "\r\n", 2) != 0)
4163                       {
4164                           settings.engine.v1->release(settings.engine.v0, c, it);
4165                           break;
4166                       }
4167                 }
4168                 else
4169                 {
4170                   if (add_iov(c, "VALUE ", 6) != 0 ||
4171                       add_iov(c, info.key, info.nkey) != 0 ||
4172                       add_iov(c, suffix, suffix_len) != 0 ||
4173                       add_iov(c, info.value[0].iov_base, info.value[0].iov_len) != 0 ||
4174                       add_iov(c, "\r\n", 2) != 0)
4175                       {
4176                           settings.engine.v1->release(settings.engine.v0, c, it);
4177                           break;
4178                       }
4179                 }
4180 
4181 
4182                 if (settings.verbose > 1) {
4183                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4184                                                     ">%d sending key %s\n",
4185                                                     c->sfd, info.key);
4186                 }
4187 
4188                 /* item_get() has incremented it->refcount for us */
4189                 STATS_HIT(c, get, key, nkey);
4190                 *(c->ilist + i) = it;
4191                 i++;
4192 
4193             } else {
4194                 STATS_MISS(c, get, key, nkey);
4195                 MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
4196             }
4197 
4198             if (!range) {
4199 		key_token++;
4200             } else {
4201 		if (ret == ENGINE_KEY_ENOENT) {
4202 			key_token->value = NULL;
4203 		}
4204 		break;
4205 	    }
4206         }
4207 
4208         /*
4209          * If the command string hasn't been fully processed, get the next set
4210          * of tokens.
4211          */
4212         if(key_token->value != NULL) {
4213             ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS);
4214             key_token = tokens;
4215         }
4216 
4217     } while(key_token->value != NULL);
4218 
4219     c->icurr = c->ilist;
4220     c->ileft = i;
4221     c->suffixcurr = c->suffixlist;
4222 
4223     if (settings.verbose > 1) {
4224         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4225                                         ">%d END\n", c->sfd);
4226     }
4227 
4228     /*
4229         If the loop was terminated because of out-of-memory, it is not
4230         reliable to add END\r\n to the buffer, because it might not end
4231         in \r\n. So we send SERVER_ERROR instead.
4232     */
4233     if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0
4234         || (IS_UDP(c->transport) && build_udp_headers(c) != 0)) {
4235         out_string(c, "SERVER_ERROR out of memory writing get response");
4236     }
4237     else {
4238         conn_set_state(c, conn_mwrite);
4239         c->msgcurr = 0;
4240     }
4241 
4242     return NULL;
4243 }
4244 
process_update_command(conn * c,token_t * tokens,const size_t ntokens,ENGINE_STORE_OPERATION store_op,bool handle_cas)4245 static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, ENGINE_STORE_OPERATION store_op, bool handle_cas) {
4246     char *key;
4247     size_t nkey;
4248     unsigned int flags;
4249     int32_t exptime_int = 0;
4250     time_t exptime;
4251     int vlen = 0;
4252     uint64_t req_cas_id=0;
4253     item *it = NULL;
4254 
4255     assert(c != NULL);
4256 
4257     set_noreply_maybe(c, tokens, ntokens);
4258 
4259     if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4260         out_string(c, "CLIENT_ERROR bad command line format");
4261         return;
4262     }
4263 
4264     key = tokens[KEY_TOKEN].value;
4265     nkey = tokens[KEY_TOKEN].length;
4266 
4267     if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags)
4268            && safe_strtol(tokens[3].value, &exptime_int)
4269            && safe_strtol(tokens[4].value, (int32_t *)&vlen))) {
4270         out_string(c, "CLIENT_ERROR bad command line format");
4271         return;
4272     }
4273 
4274     /* Negative expire values not allowed */
4275 
4276     if (exptime_int < 0) {
4277         out_string(c, "CLIENT_ERROR Invalid expire time");
4278         return;
4279     }
4280 
4281     /* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */
4282     exptime = exptime_int;
4283 
4284     // does cas value exist?
4285     if (handle_cas) {
4286         if (!safe_strtoull(tokens[5].value, &req_cas_id)) {
4287             out_string(c, "CLIENT_ERROR bad command line format");
4288             return;
4289         }
4290     }
4291 
4292     if (vlen < 0) {
4293         out_string(c, "CLIENT_ERROR bad command line format");
4294         return;
4295     }
4296 
4297     if (settings.detail_enabled) {
4298         stats_prefix_record_set(key, nkey);
4299     }
4300 
4301     ENGINE_ERROR_CODE ret = c->aiostat;
4302     c->aiostat = ENGINE_SUCCESS;
4303     c->ewouldblock = false;
4304 
4305     if (ret == ENGINE_SUCCESS) {
4306         ret = settings.engine.v1->allocate(settings.engine.v0, c,
4307                                            &it, key, nkey,
4308                                            vlen, htonl(flags), exptime);
4309     }
4310 
4311     item_info info = { .nvalue = 1 };
4312     switch (ret) {
4313     case ENGINE_SUCCESS:
4314         item_set_cas(c, it, req_cas_id);
4315         if (!settings.engine.v1->get_item_info(settings.engine.v0, c, it, &info)) {
4316             settings.engine.v1->release(settings.engine.v0, c, it);
4317             out_string(c, "SERVER_ERROR error getting item data");
4318             break;
4319         }
4320         c->item = it;
4321         c->ritem = info.value[0].iov_base;
4322         c->rlbytes = vlen;
4323         c->store_op = store_op;
4324         conn_set_state(c, conn_nread);
4325         break;
4326     case ENGINE_EWOULDBLOCK:
4327         c->ewouldblock = true;
4328         break;
4329     case ENGINE_DISCONNECT:
4330         c->state = conn_closing;
4331         break;
4332     default:
4333         if (ret == ENGINE_E2BIG) {
4334             out_string(c, "SERVER_ERROR object too large for cache");
4335         } else {
4336             out_string(c, "SERVER_ERROR out of memory storing object");
4337         }
4338         /* swallow the data line */
4339         c->write_and_go = conn_swallow;
4340         c->sbytes = vlen + 2;
4341 
4342         /* Avoid stale data persisting in cache because we failed alloc.
4343          * Unacceptable for SET. Anywhere else too? */
4344         if (store_op == OPERATION_SET) {
4345             settings.engine.v1->remove(settings.engine.v0, c, key, nkey, 0, 0);
4346         }
4347     }
4348 }
4349 
process_arithmetic_command(conn * c,token_t * tokens,const size_t ntokens,const bool incr)4350 static char* process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) {
4351 
4352     uint64_t delta;
4353     char *key;
4354     size_t nkey;
4355 
4356     assert(c != NULL);
4357 
4358     set_noreply_maybe(c, tokens, ntokens);
4359 
4360     if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
4361         out_string(c, "CLIENT_ERROR bad command line format");
4362         return NULL;
4363     }
4364 
4365     key = tokens[KEY_TOKEN].value;
4366     nkey = tokens[KEY_TOKEN].length;
4367 
4368     if (!safe_strtoull(tokens[2].value, &delta)) {
4369         out_string(c, "CLIENT_ERROR invalid numeric delta argument");
4370         return NULL;
4371     }
4372 
4373     ENGINE_ERROR_CODE ret = c->aiostat;
4374     c->aiostat = ENGINE_SUCCESS;
4375     uint64_t cas;
4376     uint64_t result = 0;
4377     if (ret == ENGINE_SUCCESS) {
4378         ret = settings.engine.v1->arithmetic(settings.engine.v0, c, key, nkey,
4379                                              incr, false, delta, 0, 0, &cas,
4380                                              &result, 0);
4381     }
4382 
4383     char temp[INCR_MAX_STORAGE_LEN];
4384     switch (ret) {
4385     case ENGINE_SUCCESS:
4386         if (incr) {
4387             STATS_INCR(c, incr_hits, key, nkey);
4388         } else {
4389             STATS_INCR(c, decr_hits, key, nkey);
4390         }
4391         snprintf(temp, sizeof(temp), "%"PRIu64, result);
4392         out_string(c, temp);
4393         break;
4394     case ENGINE_KEY_ENOENT:
4395         if (incr) {
4396             STATS_INCR(c, incr_misses, key, nkey);
4397         } else {
4398             STATS_INCR(c, decr_misses, key, nkey);
4399         }
4400         out_string(c, "NOT_FOUND");
4401         break;
4402     case ENGINE_ENOMEM:
4403         out_string(c, "SERVER_ERROR out of memory");
4404         break;
4405     case ENGINE_TMPFAIL:
4406         out_string(c, "SERVER_ERROR temporary failure");
4407         break;
4408     case ENGINE_EINVAL:
4409         out_string(c, "CLIENT_ERROR cannot increment or decrement non-numeric value");
4410         break;
4411     case ENGINE_NOT_STORED:
4412         out_string(c, "SERVER_ERROR failed to store item");
4413         break;
4414     case ENGINE_DISCONNECT:
4415         c->state = conn_closing;
4416         break;
4417     case ENGINE_ENOTSUP:
4418         out_string(c, "SERVER_ERROR not supported");
4419         break;
4420     case ENGINE_EWOULDBLOCK:
4421         c->ewouldblock = true;
4422         return key;
4423     default:
4424         abort();
4425     }
4426 
4427     return NULL;
4428 }
4429 
process_delete_command(conn * c,token_t * tokens,const size_t ntokens)4430 static char *process_delete_command(conn *c, token_t *tokens,
4431                                     const size_t ntokens) {
4432     char *key;
4433     size_t nkey;
4434 
4435     assert(c != NULL);
4436 
4437     if (ntokens > 3) {
4438         bool hold_is_zero = strcmp(tokens[KEY_TOKEN+1].value, "0") == 0;
4439         bool sets_noreply = set_noreply_maybe(c, tokens, ntokens);
4440         bool valid = (ntokens == 4 && (hold_is_zero || sets_noreply))
4441             || (ntokens == 5 && hold_is_zero && sets_noreply);
4442         if (!valid) {
4443             out_string(c, "CLIENT_ERROR bad command line format.  "
4444                        "Usage: delete <key> [noreply]");
4445             return NULL;
4446         }
4447     }
4448 
4449     key = tokens[KEY_TOKEN].value;
4450     nkey = tokens[KEY_TOKEN].length;
4451 
4452     if (nkey > KEY_MAX_LENGTH) {
4453         out_string(c, "CLIENT_ERROR bad command line format");
4454         return NULL;
4455     }
4456 
4457     ENGINE_ERROR_CODE ret = c->aiostat;
4458     c->aiostat = ENGINE_SUCCESS;
4459     c->ewouldblock = false;
4460     if (ret == ENGINE_SUCCESS) {
4461         ret = settings.engine.v1->remove(settings.engine.v0, c,
4462                                          key, nkey, 0, 0);
4463     }
4464 
4465     /* For some reason the SLAB_INCR tries to access this... */
4466     item_info info = { .nvalue = 1 };
4467     switch (ret) {
4468     case ENGINE_SUCCESS:
4469         out_string(c, "DELETED");
4470         SLAB_INCR(c, delete_hits, key, nkey);
4471         break;
4472     case ENGINE_EWOULDBLOCK:
4473         c->ewouldblock = true;
4474         return key;
4475     case ENGINE_TMPFAIL:
4476         out_string(c, "SERVER_ERROR temporary failure");
4477         break;
4478     default:
4479         out_string(c, "NOT_FOUND");
4480         STATS_INCR(c, delete_misses, key, nkey);
4481     }
4482 
4483     if (ret != ENGINE_EWOULDBLOCK && settings.detail_enabled) {
4484         stats_prefix_record_delete(key, nkey);
4485     }
4486     return NULL;
4487 }
4488 
process_bind_command(conn * c,token_t * tokens,const size_t ntokens)4489 static char *process_bind_command(conn *c, token_t *tokens,
4490                                   const size_t ntokens) {
4491     char *name;
4492     size_t name_len;
4493 
4494     assert(c != NULL);
4495 
4496     if (ntokens > 3) {
4497         out_string(c, "CLIENT_ERROR bad command line format.  "
4498                       "Usage: bind <table_id_name>");
4499         return NULL;
4500     }
4501 
4502     name = tokens[KEY_TOKEN].value;
4503     name_len = tokens[KEY_TOKEN].length;
4504 
4505     if (name_len > KEY_MAX_LENGTH || name_len == 0) {
4506         out_string(c, "CLIENT_ERROR bad command line format");
4507         return NULL;
4508     }
4509 
4510     ENGINE_ERROR_CODE ret = c->aiostat;
4511     c->aiostat = ENGINE_SUCCESS;
4512     c->ewouldblock = false;
4513     if (ret == ENGINE_SUCCESS) {
4514         ret = settings.engine.v1->bind(settings.engine.v0, c,
4515                                        name, name_len);
4516     }
4517 
4518     /* For some reason the SLAB_INCR tries to access this... */
4519     item_info info = { .nvalue = 1 };
4520     switch (ret) {
4521     case ENGINE_SUCCESS:
4522         out_string(c, "SUCCEED");
4523         break;
4524     case ENGINE_EWOULDBLOCK:
4525         c->ewouldblock = true;
4526         return name;
4527     case ENGINE_TMPFAIL:
4528     default:
4529         out_string(c, "NOT_FOUND");
4530     }
4531 
4532     return NULL;
4533 }
4534 
process_verbosity_command(conn * c,token_t * tokens,const size_t ntokens)4535 static void process_verbosity_command(conn *c, token_t *tokens, const size_t ntokens) {
4536     unsigned int level;
4537 
4538     assert(c != NULL);
4539 
4540     set_noreply_maybe(c, tokens, ntokens);
4541     if (c->noreply && ntokens == 3) {
4542         /* "verbosity noreply" is not according to the correct syntax */
4543         c->noreply = false;
4544         out_string(c, "ERROR");
4545         return;
4546     }
4547 
4548     if (safe_strtoul(tokens[1].value, &level)) {
4549         settings.verbose = level > MAX_VERBOSITY_LEVEL ? MAX_VERBOSITY_LEVEL : level;
4550         perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
4551         out_string(c, "OK");
4552     } else {
4553         out_string(c, "ERROR");
4554     }
4555 }
4556 
process_command(conn * c,char * command)4557 static char* process_command(conn *c, char *command) {
4558 
4559     token_t tokens[MAX_TOKENS];
4560     size_t ntokens;
4561     int comm;
4562     char *ret = NULL;
4563 
4564     assert(c != NULL);
4565 
4566     MEMCACHED_PROCESS_COMMAND_START(c->sfd, c->rcurr, c->rbytes);
4567 
4568     if (settings.verbose > 1) {
4569         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4570                                         "<%d %s\n", c->sfd, command);
4571     }
4572 
4573     /*
4574      * for commands set/add/replace, we build an item and read the data
4575      * directly into it, then continue in nread_complete().
4576      */
4577 
4578     if (c->ewouldblock) {
4579         /*
4580          * If we are retrying after the engine has completed a pending io for
4581          * this command, skip add_msghdr() etc and clear the ewouldblock flag.
4582          */
4583         c->ewouldblock = false;
4584     } else {
4585         c->msgcurr = 0;
4586         c->msgused = 0;
4587         c->iovused = 0;
4588         if (add_msghdr(c) != 0) {
4589             out_string(c, "SERVER_ERROR out of memory preparing response");
4590             return NULL;
4591         }
4592     }
4593 
4594     ntokens = tokenize_command(command, tokens, MAX_TOKENS);
4595     if (ntokens >= 3 &&
4596         ((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ||
4597          (strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) {
4598 
4599         ret = process_get_command(c, tokens, ntokens, false);
4600 
4601     } else if ((ntokens == 6 || ntokens == 7) &&
4602                ((strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = (int)OPERATION_ADD)) ||
4603                 (strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = (int)OPERATION_SET)) ||
4604                 (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = (int)OPERATION_REPLACE)) ||
4605                 (strcmp(tokens[COMMAND_TOKEN].value, "prepend") == 0 && (comm = (int)OPERATION_PREPEND)) ||
4606                 (strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = (int)OPERATION_APPEND)) )) {
4607 
4608         process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, false);
4609 
4610     } else if ((ntokens == 7 || ntokens == 8) && (strcmp(tokens[COMMAND_TOKEN].value, "cas") == 0 && (comm = (int)OPERATION_CAS))) {
4611 
4612         process_update_command(c, tokens, ntokens, (ENGINE_STORE_OPERATION)comm, true);
4613 
4614     } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "incr") == 0)) {
4615 
4616         ret = process_arithmetic_command(c, tokens, ntokens, 1);
4617 
4618     } else if (ntokens >= 3 && (strcmp(tokens[COMMAND_TOKEN].value, "gets") == 0)) {
4619 
4620         ret = process_get_command(c, tokens, ntokens, true);
4621 
4622     } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "decr") == 0)) {
4623 
4624         ret = process_arithmetic_command(c, tokens, ntokens, 0);
4625 
4626     } else if (ntokens >= 3 && ntokens <= 5 && (strcmp(tokens[COMMAND_TOKEN].value, "delete") == 0)) {
4627 
4628         ret = process_delete_command(c, tokens, ntokens);
4629 
4630     } else if (ntokens == 3 && (strcmp(tokens[COMMAND_TOKEN].value, "bind") == 0)) {
4631 
4632         ret = process_bind_command(c, tokens, ntokens);
4633 
4634     } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) {
4635 
4636         ret = process_stat(c, tokens, ntokens);
4637 
4638     } else if (ntokens >= 2 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) {
4639         time_t exptime;
4640 
4641         set_noreply_maybe(c, tokens, ntokens);
4642 
4643         if (ntokens == (c->noreply ? 3 : 2)) {
4644             exptime = 0;
4645         } else {
4646             exptime = strtol(tokens[1].value, NULL, 10);
4647             if(errno == ERANGE) {
4648                 out_string(c, "CLIENT_ERROR bad command line format");
4649                 return NULL;
4650             }
4651         }
4652 
4653         ENGINE_ERROR_CODE ret = c->aiostat;
4654         c->aiostat = ENGINE_SUCCESS;
4655         c->ewouldblock = false;
4656         if (ret == ENGINE_SUCCESS) {
4657             ret = settings.engine.v1->flush(settings.engine.v0, c, exptime);
4658         }
4659 
4660         switch (ret) {
4661         case  ENGINE_SUCCESS:
4662             out_string(c, "OK");
4663             break;
4664         case ENGINE_ENOTSUP:
4665             out_string(c, "SERVER_ERROR not supported");
4666             break;
4667         case ENGINE_EWOULDBLOCK:
4668             c->ewouldblock = true;
4669             return c->rcurr + 9;
4670         default:
4671             out_string(c, "SERVER_ERROR failed to flush cache");
4672         }
4673 
4674         if (ret != ENGINE_EWOULDBLOCK) {
4675             STATS_NOKEY(c, cmd_flush);
4676         }
4677         return NULL;
4678 
4679     } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) {
4680 
4681         out_string(c, "VERSION " VERSION);
4682 
4683     } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) {
4684 
4685         conn_set_state(c, conn_closing);
4686 
4687     } else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) {
4688         process_verbosity_command(c, tokens, ntokens);
4689     } else if (settings.extensions.ascii != NULL) {
4690         EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *cmd;
4691         size_t nbytes = 0;
4692         char *ptr = NULL;
4693 
4694         if (ntokens > 0) {
4695             if (ntokens == MAX_TOKENS) {
4696                 out_string(c, "ERROR too many arguments");
4697                 return NULL;
4698             }
4699 
4700             if (tokens[ntokens - 1].length == 0) {
4701                 --ntokens;
4702             }
4703         }
4704 
4705         for (cmd = settings.extensions.ascii; cmd != NULL; cmd = cmd->next) {
4706             if (cmd->accept(cmd->cookie, c, ntokens, tokens, &nbytes, &ptr)) {
4707                 break;
4708             }
4709         }
4710 
4711         if (cmd == NULL) {
4712             out_string(c, "ERROR unknown command");
4713         } else if (nbytes == 0) {
4714             switch (cmd->execute(cmd->cookie, c, ntokens, tokens,
4715                                  ascii_response_handler)) {
4716             case ENGINE_SUCCESS:
4717                 if (c->dynamic_buffer.buffer != NULL) {
4718                     write_and_free(c, c->dynamic_buffer.buffer,
4719                                    c->dynamic_buffer.offset);
4720                     c->dynamic_buffer.buffer = NULL;
4721                 } else {
4722                     conn_set_state(c, conn_new_cmd);
4723                 }
4724                 break;
4725             case ENGINE_EWOULDBLOCK:
4726                 c->ewouldblock = true;
4727                 ret = tokens[KEY_TOKEN].value;;
4728                 break;
4729             case ENGINE_DISCONNECT:
4730             default:
4731                 conn_set_state(c, conn_closing);
4732 
4733             }
4734         } else {
4735             c->rlbytes = nbytes;
4736             c->ritem = ptr;
4737             c->ascii_cmd = cmd;
4738             /* NOT SUPPORTED YET! */
4739             conn_set_state(c, conn_nread);
4740         }
4741     } else {
4742         out_string(c, "ERROR");
4743     }
4744     return ret;
4745 }
4746 
4747 /*
4748  * if we have a complete line in the buffer, process it.
4749  */
try_read_command(conn * c)4750 static int try_read_command(conn *c) {
4751     assert(c != NULL);
4752     assert(c->rcurr <= (c->rbuf + c->rsize));
4753     assert(c->rbytes > 0);
4754 
4755     if (c->protocol == negotiating_prot || c->transport == udp_transport)  {
4756         if ((unsigned char)c->rbuf[0] == (unsigned char)PROTOCOL_BINARY_REQ) {
4757             c->protocol = binary_prot;
4758         } else {
4759             c->protocol = ascii_prot;
4760         }
4761 
4762         if (settings.verbose > 1) {
4763             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4764                     "%d: Client using the %s protocol\n", c->sfd,
4765                     prot_text(c->protocol));
4766         }
4767     }
4768 
4769     if (c->protocol == binary_prot) {
4770         /* Do we have the complete packet header? */
4771         if (c->rbytes < sizeof(c->binary_header)) {
4772             /* need more data! */
4773             return 0;
4774         } else {
4775 #ifdef NEED_ALIGN
4776             if (((long)(c->rcurr)) % 8 != 0) {
4777                 /* must realign input buffer */
4778                 memmove(c->rbuf, c->rcurr, c->rbytes);
4779                 c->rcurr = c->rbuf;
4780                 if (settings.verbose > 1) {
4781                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4782                              "%d: Realign input buffer\n", c->sfd);
4783                 }
4784             }
4785 #endif
4786             protocol_binary_request_header* req;
4787             req = (protocol_binary_request_header*)c->rcurr;
4788 
4789             if (settings.verbose > 1) {
4790                 /* Dump the packet before we convert it to host order */
4791                 char buffer[1024];
4792                 ssize_t nw;
4793                 nw = bytes_to_output_string(buffer, sizeof(buffer), c->sfd,
4794                                             true, "Read binary protocol data:",
4795                                             (const char*)req->bytes,
4796                                             sizeof(req->bytes));
4797                 if (nw != -1) {
4798                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
4799                                                     "%s", buffer);
4800                 }
4801             }
4802 
4803             c->binary_header = *req;
4804             c->binary_header.request.keylen = ntohs(req->request.keylen);
4805             c->binary_header.request.bodylen = ntohl(req->request.bodylen);
4806             c->binary_header.request.vbucket = ntohs(req->request.vbucket);
4807             c->binary_header.request.cas = ntohll(req->request.cas);
4808 
4809 
4810             if (c->binary_header.request.magic != PROTOCOL_BINARY_REQ &&
4811                 !(c->binary_header.request.magic == PROTOCOL_BINARY_RES &&
4812                   response_handlers[c->binary_header.request.opcode])) {
4813                 if (settings.verbose) {
4814                     if (c->binary_header.request.magic != PROTOCOL_BINARY_RES) {
4815                         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4816                               "%d: Invalid magic:  %x\n", c->sfd,
4817                               c->binary_header.request.magic);
4818                     } else {
4819                         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
4820                               "%d: ERROR: Unsupported response packet received: %u\n",
4821                               c->sfd, (unsigned int)c->binary_header.request.opcode);
4822 
4823                     }
4824                 }
4825                 conn_set_state(c, conn_closing);
4826                 return -1;
4827             }
4828 
4829             c->msgcurr = 0;
4830             c->msgused = 0;
4831             c->iovused = 0;
4832             if (add_msghdr(c) != 0) {
4833                 out_string(c, "SERVER_ERROR out of memory");
4834                 return 0;
4835             }
4836 
4837             c->cmd = c->binary_header.request.opcode;
4838             c->keylen = c->binary_header.request.keylen;
4839             c->opaque = c->binary_header.request.opaque;
4840             /* clear the returned cas value */
4841             c->cas = 0;
4842 
4843             dispatch_bin_command(c);
4844 
4845             c->rbytes -= sizeof(c->binary_header);
4846             c->rcurr += sizeof(c->binary_header);
4847         }
4848     } else {
4849         char *el, *cont, *left, lb;
4850 
4851         if (c->rbytes == 0) {
4852             return 0;
4853         }
4854 
4855         el = memchr(c->rcurr, '\n', c->rbytes);
4856         if (!el) {
4857             if (c->rbytes > 1024) {
4858                 /*
4859                  * We didn't have a '\n' in the first k. This _has_ to be a
4860                  * large multiget, if not we should just nuke the connection.
4861                  */
4862                 char *ptr = c->rcurr;
4863                 while (*ptr == ' ') { /* ignore leading whitespaces */
4864                     ++ptr;
4865                 }
4866 
4867                 if (ptr - c->rcurr > 100 ||
4868                     (strncmp(ptr, "get ", 4) && strncmp(ptr, "gets ", 5))) {
4869 
4870                     conn_set_state(c, conn_closing);
4871                     return 1;
4872                 }
4873             }
4874 
4875             return 0;
4876         }
4877         cont = el + 1;
4878         if ((el - c->rcurr) > 1 && *(el - 1) == '\r') {
4879             el--;
4880         }
4881         lb = *el;
4882         *el = '\0';
4883 
4884         assert(cont <= (c->rcurr + c->rbytes));
4885 
4886         LIBEVENT_THREAD *thread = c->thread;
4887         LOCK_THREAD(thread);
4888         left = process_command(c, c->rcurr);
4889         if (c->ewouldblock) {
4890             unregister_event(c);
4891         }
4892         UNLOCK_THREAD(thread);
4893 
4894         if (left != NULL) {
4895             /*
4896              * We have not processed the entire command. This happens
4897              * when the engine returns ENGINE_EWOULDBLOCK for one of the
4898              * keys in a get/gets request.
4899              */
4900             assert (left <= el);
4901 
4902             int count = strlen(c->rcurr);
4903             if ((c->rcurr + count) == left) {
4904                 // Retry the entire command
4905                 cont = c->rcurr;
4906             } else {
4907                 left -= (count + 1);
4908                 cont = left;
4909                 assert(cont >= c->rcurr);
4910                 if (cont > c->rcurr) {
4911                     memmove(cont, c->rcurr, count);
4912                 }
4913             }
4914 
4915             /* de-tokenize the command */
4916             while ((left = memchr(left, '\0', el - left)) != NULL) {
4917                 *left = ' ';
4918             }
4919             *el = lb;
4920         }
4921 
4922         c->rbytes -= (cont - c->rcurr);
4923         c->rcurr = cont;
4924 
4925         assert(c->rcurr <= (c->rbuf + c->rsize));
4926     }
4927 
4928     return 1;
4929 }
4930 
4931 /*
4932  * read a UDP request.
4933  */
try_read_udp(conn * c)4934 static enum try_read_result try_read_udp(conn *c) {
4935     int res;
4936 
4937     assert(c != NULL);
4938 
4939     c->request_addr_size = sizeof(c->request_addr);
4940     res = recvfrom(c->sfd, c->rbuf, c->rsize,
4941                    0, (struct sockaddr *)&c->request_addr, &c->request_addr_size);
4942     if (res > 8) {
4943         unsigned char *buf = (unsigned char *)c->rbuf;
4944         STATS_ADD(c, bytes_read, res);
4945 
4946         /* Beginning of UDP packet is the request ID; save it. */
4947         c->request_id = buf[0] * 256 + buf[1];
4948 
4949         /* If this is a multi-packet request, drop it. */
4950         if (buf[4] != 0 || buf[5] != 1) {
4951             out_string(c, "SERVER_ERROR multi-packet request not supported");
4952             return READ_NO_DATA_RECEIVED;
4953         }
4954 
4955         /* Don't care about any of the rest of the header. */
4956         res -= 8;
4957         memmove(c->rbuf, c->rbuf + 8, res);
4958 
4959         c->rbytes += res;
4960         c->rcurr = c->rbuf;
4961         return READ_DATA_RECEIVED;
4962     }
4963     return READ_NO_DATA_RECEIVED;
4964 }
4965 
4966 /*
4967  * read from network as much as we can, handle buffer overflow and connection
4968  * close.
4969  * before reading, move the remaining incomplete fragment of a command
4970  * (if any) to the beginning of the buffer.
4971  *
4972  * To protect us from someone flooding a connection with bogus data causing
4973  * the connection to eat up all available memory, break out and start looking
4974  * at the data I've got after a number of reallocs...
4975  *
4976  * @return enum try_read_result
4977  */
try_read_network(conn * c)4978 static enum try_read_result try_read_network(conn *c) {
4979     enum try_read_result gotdata = READ_NO_DATA_RECEIVED;
4980     int res;
4981     int num_allocs = 0;
4982     assert(c != NULL);
4983 
4984     if (c->rcurr != c->rbuf) {
4985         if (c->rbytes != 0) /* otherwise there's nothing to copy */
4986             memmove(c->rbuf, c->rcurr, c->rbytes);
4987         c->rcurr = c->rbuf;
4988     }
4989 
4990     while (1) {
4991         if (c->rbytes >= c->rsize) {
4992             if (num_allocs == 4) {
4993                 return gotdata;
4994             }
4995             ++num_allocs;
4996             char *new_rbuf = realloc(c->rbuf, c->rsize * 2);
4997             if (!new_rbuf) {
4998                 if (settings.verbose > 0) {
4999                  settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5000                           "Couldn't realloc input buffer\n");
5001                 }
5002                 c->rbytes = 0; /* ignore what we read */
5003                 out_string(c, "SERVER_ERROR out of memory reading request");
5004                 c->write_and_go = conn_closing;
5005                 return READ_MEMORY_ERROR;
5006             }
5007             c->rcurr = c->rbuf = new_rbuf;
5008             c->rsize *= 2;
5009         }
5010 
5011         int avail = c->rsize - c->rbytes;
5012         res = recv(c->sfd, c->rbuf + c->rbytes, avail, 0);
5013         if (res > 0) {
5014             STATS_ADD(c, bytes_read, res);
5015             gotdata = READ_DATA_RECEIVED;
5016             c->rbytes += res;
5017             if (res == avail) {
5018                 continue;
5019             } else {
5020                 break;
5021             }
5022         }
5023         if (res == 0) {
5024             return READ_ERROR;
5025         }
5026         if (res == -1) {
5027             if (errno == EAGAIN || errno == EWOULDBLOCK) {
5028                 break;
5029             }
5030             return READ_ERROR;
5031         }
5032     }
5033     return gotdata;
5034 }
5035 
register_event(conn * c,struct timeval * timeout)5036 bool register_event(conn *c, struct timeval *timeout) {
5037 #ifdef DEBUG
5038     assert(!c->registered_in_libevent);
5039 #endif
5040 
5041     if (event_add(&c->event, timeout) == -1) {
5042         settings.extensions.logger->log(EXTENSION_LOG_WARNING,
5043                                         NULL,
5044                                         "Failed to add connection to libevent: %s",
5045                                         strerror(errno));
5046         return false;
5047     }
5048 
5049 #ifdef DEBUG
5050     c->registered_in_libevent = true;
5051 #endif
5052 
5053     return true;
5054 }
5055 
unregister_event(conn * c)5056 bool unregister_event(conn *c) {
5057 #ifdef DEBUG
5058     assert(c->registered_in_libevent);
5059 #endif
5060 
5061     if (event_del(&c->event) == -1) {
5062         return false;
5063     }
5064 
5065 #ifdef DEBUG
5066     c->registered_in_libevent = false;
5067 #endif
5068 
5069     return true;
5070 }
5071 
5072 
update_event(conn * c,const int new_flags)5073 bool update_event(conn *c, const int new_flags) {
5074     assert(c != NULL);
5075 
5076     struct event_base *base = c->event.ev_base;
5077     if (c->ev_flags == new_flags)
5078         return true;
5079 
5080     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5081                                     "Updated event for %d to read=%s, write=%s\n",
5082                                     c->sfd, (new_flags & EV_READ ? "yes" : "no"),
5083                                     (new_flags & EV_WRITE ? "yes" : "no"));
5084 
5085     if (!unregister_event(c)) {
5086         return false;
5087     }
5088 
5089     event_set(&c->event, c->sfd, new_flags, event_handler, (void *)c);
5090     event_base_set(base, &c->event);
5091     c->ev_flags = new_flags;
5092 
5093     return register_event(c, NULL);
5094 }
5095 
5096 /*
5097  * Transmit the next chunk of data from our list of msgbuf structures.
5098  *
5099  * Returns:
5100  *   TRANSMIT_COMPLETE   All done writing.
5101  *   TRANSMIT_INCOMPLETE More data remaining to write.
5102  *   TRANSMIT_SOFT_ERROR Can't write any more right now.
5103  *   TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing)
5104  */
transmit(conn * c)5105 static enum transmit_result transmit(conn *c) {
5106     assert(c != NULL);
5107 
5108     if (c->msgcurr < c->msgused &&
5109             c->msglist[c->msgcurr].msg_iovlen == 0) {
5110         /* Finished writing the current msg; advance to the next. */
5111         c->msgcurr++;
5112     }
5113     if (c->msgcurr < c->msgused) {
5114         ssize_t res;
5115         struct msghdr *m = &c->msglist[c->msgcurr];
5116 
5117         res = sendmsg(c->sfd, m, 0);
5118         if (res > 0) {
5119             STATS_ADD(c, bytes_written, res);
5120 
5121             /* We've written some of the data. Remove the completed
5122                iovec entries from the list of pending writes. */
5123             while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) {
5124                 res -= m->msg_iov->iov_len;
5125                 m->msg_iovlen--;
5126                 m->msg_iov++;
5127             }
5128 
5129             /* Might have written just part of the last iovec entry;
5130                adjust it so the next write will do the rest. */
5131             if (res > 0) {
5132                 m->msg_iov->iov_base = (caddr_t)m->msg_iov->iov_base + res;
5133                 m->msg_iov->iov_len -= res;
5134             }
5135             return TRANSMIT_INCOMPLETE;
5136         }
5137         if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5138             if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5139                 if (settings.verbose > 0) {
5140                     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5141                             "Couldn't update event\n");
5142                 }
5143                 conn_set_state(c, conn_closing);
5144                 return TRANSMIT_HARD_ERROR;
5145             }
5146             return TRANSMIT_SOFT_ERROR;
5147         }
5148         /* if res == 0 or res == -1 and error is not EAGAIN or EWOULDBLOCK,
5149            we have a real error, on which we close the connection */
5150         if (settings.verbose > 0) {
5151             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5152                                             "Failed to write, and not due to blocking: %s",
5153                                             strerror(errno));
5154         }
5155 
5156         if (IS_UDP(c->transport))
5157             conn_set_state(c, conn_read);
5158         else
5159             conn_set_state(c, conn_closing);
5160         return TRANSMIT_HARD_ERROR;
5161     } else {
5162         return TRANSMIT_COMPLETE;
5163     }
5164 }
5165 
conn_listening(conn * c)5166 bool conn_listening(conn *c)
5167 {
5168     int sfd;
5169     struct sockaddr_storage addr;
5170     socklen_t addrlen = sizeof(addr);
5171 
5172     if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) {
5173         if (errno == EMFILE) {
5174             if (settings.verbose > 0) {
5175                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5176                                                 "Too many open connections\n");
5177             }
5178             disable_listen();
5179         } else if (errno != EAGAIN && errno != EWOULDBLOCK) {
5180             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5181                                             "Failed to accept new client: %s\n",
5182                                             strerror(errno));
5183         }
5184 
5185         return false;
5186     }
5187 
5188     STATS_LOCK();
5189     int curr_conns = ++stats.curr_conns;
5190     STATS_UNLOCK();
5191 
5192     if (curr_conns >= settings.maxconns) {
5193         STATS_LOCK();
5194         ++stats.rejected_conns;
5195         STATS_UNLOCK();
5196 
5197         if (settings.verbose > 0) {
5198             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5199                                             "Too many open connections\n");
5200         }
5201 
5202         safe_close(sfd);
5203         return false;
5204     }
5205 
5206     if (evutil_make_socket_nonblocking(sfd) == -1) {
5207         safe_close(sfd);
5208         return false;
5209     }
5210 
5211     dispatch_conn_new(sfd, conn_new_cmd, EV_READ | EV_PERSIST,
5212                       DATA_BUFFER_SIZE, tcp_transport);
5213 
5214     return false;
5215 }
5216 
5217 /**
5218  * Ship tap log to the other end. This state differs with all other states
5219  * in the way that it support full duplex dialog. We're listening to both read
5220  * and write events from libevent most of the time. If a read event occurs we
5221  * switch to the conn_read state to read and execute the input message (that would
5222  * be an ack message from the other side). If a write event occurs we continue to
5223  * send tap log to the other end.
5224  * @param c the tap connection to drive
5225  * @return true if we should continue to process work for this connection, false
5226  *              if we should start processing events for other connections.
5227  */
conn_ship_log(conn * c)5228 bool conn_ship_log(conn *c) {
5229     bool cont = false;
5230 
5231     if (c->sfd == INVALID_SOCKET) {
5232         return false;
5233     }
5234 
5235     short mask = EV_READ | EV_PERSIST | EV_WRITE;
5236 
5237     if (c->which & EV_READ || c->rbytes > 0) {
5238         if (c->rbytes > 0) {
5239             if (try_read_command(c) == 0) {
5240                 conn_set_state(c, conn_read);
5241             }
5242         } else {
5243             conn_set_state(c, conn_read);
5244         }
5245 
5246         // we're going to process something.. let's proceed
5247         cont = true;
5248 
5249         // We have a finite number of messages in the input queue
5250         // so let's process all of them instead of backing off after
5251         // reading a subset of them.
5252         // Why? Because we've got every time we're calling ship_tap_log
5253         // we try to send a chunk of items.. This means that if we end
5254         // up in a situation where we're receiving a burst of nack messages
5255         // we'll only process a subset of messages in our input queue,
5256         // and it will slowly grow..
5257         c->nevents = settings.reqs_per_tap_event;
5258     } else if (c->which & EV_WRITE) {
5259         --c->nevents;
5260         if (c->nevents >= 0) {
5261             LOCK_THREAD(c->thread);
5262             c->ewouldblock = false;
5263             ship_tap_log(c);
5264             if (c->ewouldblock) {
5265                 mask = EV_READ | EV_PERSIST;
5266             } else {
5267                 cont = true;
5268             }
5269             UNLOCK_THREAD(c->thread);
5270         }
5271     }
5272 
5273     if (!update_event(c, mask)) {
5274         if (settings.verbose > 0) {
5275             settings.extensions.logger->log(EXTENSION_LOG_INFO,
5276                                             c, "Couldn't update event\n");
5277         }
5278         conn_set_state(c, conn_closing);
5279     }
5280 
5281     return cont;
5282 }
5283 
conn_waiting(conn * c)5284 bool conn_waiting(conn *c) {
5285     if (!update_event(c, EV_READ | EV_PERSIST)) {
5286         if (settings.verbose > 0) {
5287             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5288                                             "Couldn't update event\n");
5289         }
5290         conn_set_state(c, conn_closing);
5291         return true;
5292     }
5293     conn_set_state(c, conn_read);
5294     return false;
5295 }
5296 
conn_read(conn * c)5297 bool conn_read(conn *c) {
5298     int res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c);
5299     switch (res) {
5300     case READ_NO_DATA_RECEIVED:
5301         conn_set_state(c, conn_waiting);
5302         break;
5303     case READ_DATA_RECEIVED:
5304         conn_set_state(c, conn_parse_cmd);
5305         break;
5306     case READ_ERROR:
5307         conn_set_state(c, conn_closing);
5308         break;
5309     case READ_MEMORY_ERROR: /* Failed to allocate more memory */
5310         /* State already set by try_read_network */
5311         break;
5312     }
5313 
5314     return true;
5315 }
5316 
conn_parse_cmd(conn * c)5317 bool conn_parse_cmd(conn *c) {
5318     if (try_read_command(c) == 0) {
5319         /* wee need more data! */
5320         conn_set_state(c, conn_waiting);
5321     }
5322 
5323     return !c->ewouldblock;
5324 }
5325 
conn_new_cmd(conn * c)5326 bool conn_new_cmd(conn *c) {
5327     /* Only process nreqs at a time to avoid starving other connections */
5328     --c->nevents;
5329     if (c->nevents >= 0) {
5330         reset_cmd_handler(c);
5331     } else {
5332         STATS_NOKEY(c, conn_yields);
5333         if (c->rbytes > 0) {
5334             /* We have already read in data into the input buffer,
5335                so libevent will most likely not signal read events
5336                on the socket (unless more data is available. As a
5337                hack we should just put in a request to write data,
5338                because that should be possible ;-)
5339             */
5340             if (!update_event(c, EV_WRITE | EV_PERSIST)) {
5341                 if (settings.verbose > 0) {
5342                     settings.extensions.logger->log(EXTENSION_LOG_INFO,
5343                                                     c, "Couldn't update event\n");
5344                 }
5345                 conn_set_state(c, conn_closing);
5346                 return true;
5347             }
5348         }
5349         return false;
5350     }
5351 
5352     return true;
5353 }
5354 
5355 
conn_swallow(conn * c)5356 bool conn_swallow(conn *c) {
5357     ssize_t res;
5358     /* we are reading sbytes and throwing them away */
5359     if (c->sbytes == 0) {
5360         conn_set_state(c, conn_new_cmd);
5361         return true;
5362     }
5363 
5364     /* first check if we have leftovers in the conn_read buffer */
5365     if (c->rbytes > 0) {
5366         uint32_t tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;
5367         c->sbytes -= tocopy;
5368         c->rcurr += tocopy;
5369         c->rbytes -= tocopy;
5370         return true;
5371     }
5372 
5373     /*  now try reading from the socket */
5374     res = recv(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize, 0);
5375     if (res > 0) {
5376         STATS_ADD(c, bytes_read, res);
5377         c->sbytes -= res;
5378         return true;
5379     }
5380     if (res == 0) { /* end of stream */
5381         conn_set_state(c, conn_closing);
5382         return true;
5383     }
5384     if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5385         if (!update_event(c, EV_READ | EV_PERSIST)) {
5386             if (settings.verbose > 0) {
5387                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5388                                                 "Couldn't update event\n");
5389             }
5390             conn_set_state(c, conn_closing);
5391             return true;
5392         }
5393         return false;
5394     }
5395 
5396     if (errno != ENOTCONN && errno != ECONNRESET) {
5397         /* otherwise we have a real error, on which we close the connection */
5398         settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5399                                         "Failed to read, and not due to blocking (%s)\n",
5400                                         strerror(errno));
5401     }
5402 
5403     conn_set_state(c, conn_closing);
5404 
5405     return true;
5406 
5407 }
5408 
conn_nread(conn * c)5409 bool conn_nread(conn *c) {
5410     ssize_t res;
5411 
5412     if (c->rlbytes == 0) {
5413         LIBEVENT_THREAD *t = c->thread;
5414         LOCK_THREAD(t);
5415         bool block = c->ewouldblock = false;
5416         complete_nread(c);
5417         UNLOCK_THREAD(t);
5418         /* Breaking this into two, as complete_nread may have
5419            moved us to a different thread */
5420         t = c->thread;
5421         LOCK_THREAD(t);
5422         if (c->ewouldblock) {
5423             unregister_event(c);
5424             block = true;
5425         }
5426         UNLOCK_THREAD(t);
5427         return !block;
5428     }
5429     /* first check if we have leftovers in the conn_read buffer */
5430     if (c->rbytes > 0) {
5431         uint32_t tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
5432         if (c->ritem != c->rcurr) {
5433             memmove(c->ritem, c->rcurr, tocopy);
5434         }
5435         c->ritem += tocopy;
5436         c->rlbytes -= tocopy;
5437         c->rcurr += tocopy;
5438         c->rbytes -= tocopy;
5439         if (c->rlbytes == 0) {
5440             return true;
5441         }
5442     }
5443 
5444     /*  now try reading from the socket */
5445     res = recv(c->sfd, c->ritem, c->rlbytes, 0);
5446     if (res > 0) {
5447         STATS_ADD(c, bytes_read, res);
5448         if (c->rcurr == c->ritem) {
5449             c->rcurr += res;
5450         }
5451         c->ritem += res;
5452         c->rlbytes -= res;
5453         return true;
5454     }
5455     if (res == 0) { /* end of stream */
5456         conn_set_state(c, conn_closing);
5457         return true;
5458     }
5459 
5460 #ifdef INNODB_MEMCACHED
5461     /* MEMCACHED_RESOLVE: on solaris platform, when connect through
5462     telnet and waiting for input from an "add" or "set" command,
5463     it could have res == -1 and errno == 0. Thus causing early termination
5464     Add "!errno" condition here to deal with this scenario for now */
5465     if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK || !errno)) {
5466 #else
5467     if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
5468 #endif /* INNODB_MEMCACHED */
5469         if (!update_event(c, EV_READ | EV_PERSIST)) {
5470             if (settings.verbose > 0) {
5471                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5472                                                 "Couldn't update event\n");
5473             }
5474             conn_set_state(c, conn_closing);
5475             return true;
5476         }
5477         return false;
5478     }
5479 
5480     if (errno != ENOTCONN && errno != ECONNRESET) {
5481         /* otherwise we have a real error, on which we close the connection */
5482         settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5483                                         "Failed to read, and not due to blocking:\n"
5484                                         "errno: %d %s \n"
5485                                         "rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n",
5486                                         errno, strerror(errno),
5487                                         (long)c->rcurr, (long)c->ritem, (long)c->rbuf,
5488                                         (int)c->rlbytes, (int)c->rsize);
5489     }
5490     conn_set_state(c, conn_closing);
5491     return true;
5492 }
5493 
5494 bool conn_write(conn *c) {
5495     /*
5496      * We want to write out a simple response. If we haven't already,
5497      * assemble it into a msgbuf list (this will be a single-entry
5498      * list for TCP or a two-entry list for UDP).
5499      */
5500     if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) {
5501         if (add_iov(c, c->wcurr, c->wbytes) != 0) {
5502             if (settings.verbose > 0) {
5503                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5504                                                 "Couldn't build response\n");
5505             }
5506             conn_set_state(c, conn_closing);
5507             return true;
5508         }
5509     }
5510 
5511     return conn_mwrite(c);
5512 }
5513 
5514 bool conn_mwrite(conn *c) {
5515     if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) {
5516         if (settings.verbose > 0) {
5517             settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5518                                             "Failed to build UDP headers\n");
5519         }
5520         conn_set_state(c, conn_closing);
5521         return true;
5522     }
5523 
5524     switch (transmit(c)) {
5525     case TRANSMIT_COMPLETE:
5526         if (c->state == conn_mwrite) {
5527             while (c->ileft > 0) {
5528                 item *it = *(c->icurr);
5529                 settings.engine.v1->release(settings.engine.v0, c, it);
5530                 c->icurr++;
5531                 c->ileft--;
5532             }
5533             while (c->suffixleft > 0) {
5534                 char *suffix = *(c->suffixcurr);
5535                 cache_free(c->thread->suffix_cache, suffix);
5536                 c->suffixcurr++;
5537                 c->suffixleft--;
5538             }
5539             /* XXX:  I don't know why this wasn't the general case */
5540             if(c->protocol == binary_prot) {
5541                 conn_set_state(c, c->write_and_go);
5542             } else {
5543                 conn_set_state(c, conn_new_cmd);
5544             }
5545         } else if (c->state == conn_write) {
5546             if (c->write_and_free) {
5547                 free(c->write_and_free);
5548                 c->write_and_free = 0;
5549             }
5550             conn_set_state(c, c->write_and_go);
5551         } else {
5552             if (settings.verbose > 0) {
5553                 settings.extensions.logger->log(EXTENSION_LOG_INFO, c,
5554                                                 "Unexpected state %d\n", c->state);
5555             }
5556             conn_set_state(c, conn_closing);
5557         }
5558         break;
5559 
5560     case TRANSMIT_INCOMPLETE:
5561     case TRANSMIT_HARD_ERROR:
5562         break;                   /* Continue in state machine. */
5563 
5564     case TRANSMIT_SOFT_ERROR:
5565         return false;
5566     }
5567 
5568     return true;
5569 }
5570 
5571 bool conn_pending_close(conn *c) {
5572     assert(c->sfd == INVALID_SOCKET);
5573     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5574                                     "Awaiting clients to release the cookie (pending close for %p)",
5575                                     (void*)c);
5576     LOCK_THREAD(c->thread);
5577     c->thread->pending_io = list_remove(c->thread->pending_io, c);
5578     if (!list_contains(c->thread->pending_close, c)) {
5579         enlist_conn(c, &c->thread->pending_close);
5580     }
5581     UNLOCK_THREAD(c->thread);
5582 
5583     /*
5584      * tell the tap connection that we're disconnecting it now,
5585      * but give it a grace period
5586      */
5587     perform_callbacks(ON_DISCONNECT, NULL, c);
5588 
5589     /*
5590      * disconnect callback may have changed the state for the object
5591      * so we might complete the disconnect now
5592      */
5593     return c->state != conn_pending_close;
5594 }
5595 
5596 bool conn_immediate_close(conn *c) {
5597     settings.extensions.logger->log(EXTENSION_LOG_DETAIL, c,
5598                                     "Immediate close of %p",
5599                                     (void*)c);
5600     perform_callbacks(ON_DISCONNECT, NULL, c);
5601     conn_close(c);
5602 
5603     return false;
5604 }
5605 
5606 bool conn_closing(conn *c) {
5607     if (IS_UDP(c->transport)) {
5608         conn_cleanup(c);
5609         return false;
5610     }
5611 
5612     // We don't want any network notifications anymore..
5613     unregister_event(c);
5614     safe_close(c->sfd);
5615     c->sfd = INVALID_SOCKET;
5616 
5617     if (c->refcount > 1) {
5618         conn_set_state(c, conn_pending_close);
5619     } else {
5620         conn_set_state(c, conn_immediate_close);
5621     }
5622     return true;
5623 }
5624 
5625 bool conn_add_tap_client(conn *c) {
5626     LIBEVENT_THREAD *tp = tap_thread;
5627     LIBEVENT_THREAD *orig_thread = c->thread;
5628 
5629     assert(orig_thread);
5630     assert(orig_thread != tp);
5631 
5632     c->ewouldblock = true;
5633 
5634     unregister_event(c);
5635 
5636     LOCK_THREAD(orig_thread);
5637     /* Clean out the lists */
5638     orig_thread->pending_io = list_remove(orig_thread->pending_io, c);
5639     orig_thread->pending_close = list_remove(orig_thread->pending_close, c);
5640 
5641     LOCK_THREAD(tp);
5642     c->ev_flags = 0;
5643     conn_set_state(c, conn_setup_tap_stream);
5644     settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5645                                     "Moving %d conn from %p to %p\n",
5646                                     c->sfd, c->thread, tp);
5647     c->thread = tp;
5648     c->event.ev_base = tp->base;
5649     assert(c->next == NULL);
5650     assert(c->list_state == 0);
5651     enlist_conn(c, &tp->pending_io);
5652 
5653     UNLOCK_THREAD(tp);
5654 
5655     UNLOCK_THREAD(orig_thread);
5656 
5657     notify_thread(tp);
5658 
5659     return false;
5660 }
5661 
5662 bool conn_setup_tap_stream(conn *c) {
5663     process_bin_tap_connect(c);
5664     return true;
5665 }
5666 
5667 void event_handler(const int fd, const short which, void *arg) {
5668     conn *c;
5669 
5670     c = (conn *)arg;
5671     assert(c != NULL);
5672 
5673     if (memcached_shutdown) {
5674         event_base_loopbreak(c->event.ev_base);
5675         return ;
5676     }
5677 
5678     c->which = which;
5679 
5680     /* sanity */
5681     if (fd != c->sfd) {
5682         if (settings.verbose > 0) {
5683             settings.extensions.logger->log(EXTENSION_LOG_WARNING, c,
5684                     "Catastrophic: event fd doesn't match conn fd!\n");
5685         }
5686         conn_close(c);
5687         return;
5688     }
5689 
5690     perform_callbacks(ON_SWITCH_CONN, c, c);
5691 
5692     c->nevents = settings.reqs_per_event;
5693     if (c->state == conn_ship_log) {
5694         c->nevents = settings.reqs_per_tap_event;
5695     }
5696 
5697     LIBEVENT_THREAD *thr = c->thread;
5698 
5699     // Do we have pending closes?
5700     const size_t max_items = 256;
5701     conn *pending_close[max_items];
5702     size_t n_pending_close = 0;
5703     if (thr != NULL) {
5704         LOCK_THREAD(thr);
5705         if (thr->pending_close && thr->last_checked != current_time) {
5706             assert(!has_cycle(thr->pending_close));
5707             thr->last_checked = current_time;
5708 
5709             n_pending_close = list_to_array(pending_close, max_items,
5710                                             &thr->pending_close);
5711         }
5712         UNLOCK_THREAD(thr);
5713     }
5714 
5715     if (settings.verbose) {
5716         do {
5717             settings.extensions.logger->log(EXTENSION_LOG_DEBUG, c,
5718                                             "%d - Running task: (%s)\n",
5719                                             c->sfd, state_text(c->state));
5720         } while (c->state(c));
5721     } else {
5722         while (c->state(c)) {
5723             /* empty */
5724         }
5725     }
5726 
5727     /* Close any connections pending close */
5728     if (n_pending_close > 0) {
5729         for (size_t i = 0; i < n_pending_close; ++i) {
5730             conn *ce = pending_close[i];
5731             if (ce->refcount == 1) {
5732                 settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5733                                                 "OK, time to nuke: %p\n",
5734                                                 (void*)ce);
5735                 conn_close(ce);
5736             } else {
5737                 LOCK_THREAD(ce->thread);
5738                 enlist_conn(ce, &ce->thread->pending_close);
5739                 UNLOCK_THREAD(ce->thread);
5740             }
5741         }
5742     }
5743 
5744     if (thr != NULL) {
5745         LOCK_THREAD(thr);
5746         finalize_list(pending_close, n_pending_close);
5747         UNLOCK_THREAD(thr);
5748     }
5749 }
5750 
5751 static void dispatch_event_handler(int fd, short which, void *arg) {
5752     char buffer[80];
5753     ssize_t nr = recv(fd, buffer, sizeof(buffer), 0);
5754 
5755     if (nr != -1 && is_listen_disabled()) {
5756         bool enable = false;
5757         pthread_mutex_lock(&listen_state.mutex);
5758         listen_state.count -= nr;
5759         if (listen_state.count <= 0) {
5760             enable = true;
5761             listen_state.disabled = false;
5762         }
5763         pthread_mutex_unlock(&listen_state.mutex);
5764         if (enable) {
5765             conn *next;
5766             for (next = listen_conn; next; next = next->next) {
5767                 update_event(next, EV_READ | EV_PERSIST);
5768                 if (listen(next->sfd, settings.backlog) != 0) {
5769                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5770                                                     "listen() failed",
5771                                                     strerror(errno));
5772                 }
5773             }
5774         }
5775     }
5776 }
5777 
5778 
5779 
5780 static SOCKET new_socket(struct addrinfo *ai) {
5781     SOCKET sfd;
5782 
5783     sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
5784     if (sfd == INVALID_SOCKET) {
5785         return INVALID_SOCKET;
5786     }
5787 
5788     if (evutil_make_socket_nonblocking(sfd) == -1) {
5789         safe_close(sfd);
5790         return INVALID_SOCKET;
5791     }
5792 
5793     return sfd;
5794 }
5795 
5796 
5797 /*
5798  * Sets a socket's send buffer size to the maximum allowed by the system.
5799  */
5800 static void maximize_sndbuf(const int sfd) {
5801     socklen_t intsize = sizeof(int);
5802     int last_good = 0;
5803     int min, max, avg;
5804     int old_size;
5805 
5806     /* Start with the default size. */
5807     if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&old_size, &intsize) != 0) {
5808         if (settings.verbose > 0) {
5809             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5810                                             "getsockopt(SO_SNDBUF): %s",
5811                                             strerror(errno));
5812         }
5813 
5814         return;
5815     }
5816 
5817     /* Binary-search for the real maximum. */
5818     min = old_size;
5819     max = MAX_SENDBUF_SIZE;
5820 
5821     while (min <= max) {
5822         avg = ((unsigned int)(min + max)) / 2;
5823         if (setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (void *)&avg, intsize) == 0) {
5824             last_good = avg;
5825             min = avg + 1;
5826         } else {
5827             max = avg - 1;
5828         }
5829     }
5830 
5831     if (settings.verbose > 1) {
5832         settings.extensions.logger->log(EXTENSION_LOG_DEBUG, NULL,
5833                  "<%d send buffer was %d, now %d\n", sfd, old_size, last_good);
5834     }
5835 }
5836 
5837 
5838 
5839 /**
5840  * Create a socket and bind it to a specific port number
5841  * @param interface the interface to bind to
5842  * @param port the port number to bind to
5843  * @param transport the transport protocol (TCP / UDP)
5844  * @param portnumber_file A filepointer to write the port numbers to
5845  *        when they are successfully added to the list of ports we
5846  *        listen on.
5847  */
5848 static int server_socket(const char *interface,
5849                          int port,
5850                          enum network_transport transport,
5851                          FILE *portnumber_file) {
5852     int sfd;
5853     struct linger ling = {0, 0};
5854     struct addrinfo *ai;
5855     struct addrinfo *next;
5856     struct addrinfo hints = { .ai_flags = AI_PASSIVE,
5857                               .ai_family = AF_UNSPEC };
5858     char port_buf[NI_MAXSERV];
5859     int error;
5860     int success = 0;
5861     int flags =1;
5862     num_udp_socket = 0;
5863 
5864     hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM;
5865 
5866     if (port == -1) {
5867         port = 0;
5868     }
5869     snprintf(port_buf, sizeof(port_buf), "%d", port);
5870     error= getaddrinfo(interface, port_buf, &hints, &ai);
5871     if (error != 0) {
5872         if (error != EAI_SYSTEM) {
5873             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5874                      "getaddrinfo(): %s\n", gai_strerror(error));
5875         } else {
5876             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5877                      "getaddrinfo(): %s\n", strerror(error));
5878         }
5879         return 1;
5880     }
5881 
5882     for (next= ai; next; next= next->ai_next) {
5883         conn *listen_conn_add;
5884         if ((sfd = new_socket(next)) == INVALID_SOCKET) {
5885             /* getaddrinfo can return "junk" addresses,
5886              * we make sure at least one works before erroring.
5887              */
5888             continue;
5889         }
5890 
5891 #ifdef IPV6_V6ONLY
5892         if (next->ai_family == AF_INET6) {
5893             error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags));
5894             if (error != 0) {
5895                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5896                                                 "setsockopt(IPV6_V6ONLY): %s",
5897                                                 strerror(errno));
5898                 safe_close(sfd);
5899                 continue;
5900             }
5901         }
5902 #endif
5903 
5904         setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
5905         if (IS_UDP(transport)) {
5906             maximize_sndbuf(sfd);
5907 	    udp_socket[num_udp_socket] = sfd;
5908 	    num_udp_socket++;
5909         } else {
5910             error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
5911             if (error != 0) {
5912                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5913                                                 "setsockopt(SO_KEEPALIVE): %s",
5914                                                 strerror(errno));
5915             }
5916 
5917             error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
5918             if (error != 0) {
5919                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5920                                                 "setsockopt(SO_LINGER): %s",
5921                                                 strerror(errno));
5922             }
5923 
5924             error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));
5925             if (error != 0) {
5926                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5927                                                 "setsockopt(TCP_NODELAY): %s",
5928                                                 strerror(errno));
5929             }
5930         }
5931 
5932         if (bind(sfd, next->ai_addr, next->ai_addrlen) == SOCKET_ERROR) {
5933             if (errno != EADDRINUSE) {
5934                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5935                                                 "bind(): %s",
5936                                                 strerror(errno));
5937                 safe_close(sfd);
5938                 freeaddrinfo(ai);
5939                 return 1;
5940             }
5941             safe_close(sfd);
5942             continue;
5943         } else {
5944             success++;
5945             if (!IS_UDP(transport) && listen(sfd, settings.backlog) == SOCKET_ERROR) {
5946                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5947                                                 "listen(): %s",
5948                                                 strerror(errno));
5949                 safe_close(sfd);
5950                 freeaddrinfo(ai);
5951                 return 1;
5952             }
5953             if (portnumber_file != NULL &&
5954                 (next->ai_addr->sa_family == AF_INET ||
5955                  next->ai_addr->sa_family == AF_INET6)) {
5956                 union {
5957                     struct sockaddr_in in;
5958                     struct sockaddr_in6 in6;
5959                 } my_sockaddr;
5960                 socklen_t len = sizeof(my_sockaddr);
5961                 if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==0) {
5962                     if (next->ai_addr->sa_family == AF_INET) {
5963                         fprintf(portnumber_file, "%s INET: %u\n",
5964                                 IS_UDP(transport) ? "UDP" : "TCP",
5965                                 ntohs(my_sockaddr.in.sin_port));
5966                     } else {
5967                         fprintf(portnumber_file, "%s INET6: %u\n",
5968                                 IS_UDP(transport) ? "UDP" : "TCP",
5969                                 ntohs(my_sockaddr.in6.sin6_port));
5970                     }
5971                 }
5972             }
5973         }
5974 
5975         if (IS_UDP(transport)) {
5976             int c;
5977 
5978             for (c = 0; c < settings.num_threads_per_udp; c++) {
5979                 /* this is guaranteed to hit all threads because we round-robin */
5980                 dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,
5981                                   UDP_READ_BUFFER_SIZE, transport);
5982                 STATS_LOCK();
5983                 ++stats.curr_conns;
5984                 ++stats.daemon_conns;
5985                 STATS_UNLOCK();
5986             }
5987         } else {
5988             if (!(listen_conn_add = conn_new(sfd, conn_listening,
5989                                              EV_READ | EV_PERSIST, 1,
5990                                              transport, main_base, NULL))) {
5991                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
5992                         "failed to create listening connection\n");
5993                 exit(EXIT_FAILURE);
5994             }
5995             listen_conn_add->next = listen_conn;
5996             listen_conn = listen_conn_add;
5997             STATS_LOCK();
5998             ++stats.curr_conns;
5999             ++stats.daemon_conns;
6000             STATS_UNLOCK();
6001         }
6002     }
6003 
6004     freeaddrinfo(ai);
6005 
6006     /* Return zero iff we detected no errors in starting up connections */
6007     return success == 0;
6008 }
6009 
6010 static int server_sockets(int port, enum network_transport transport,
6011                           FILE *portnumber_file) {
6012     if (settings.inter == NULL) {
6013         return server_socket(settings.inter, port, transport, portnumber_file);
6014     } else {
6015         // tokenize them and bind to each one of them..
6016         char *b;
6017         int ret = 0;
6018         char *list = strdup(settings.inter);
6019 
6020         if (list == NULL) {
6021             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6022                                             "Failed to allocate memory for parsing server interface string\n");
6023             return 1;
6024         }
6025         for (char *p = strtok_r(list, ";,", &b);
6026              p != NULL;
6027              p = strtok_r(NULL, ";,", &b)) {
6028             int the_port = port;
6029 
6030             char *s = strchr(p, ':');
6031             if (s != NULL) {
6032                 *s = '\0';
6033                 ++s;
6034                 if (!safe_strtol(s, &the_port)) {
6035                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6036                                                     "Invalid port number: \"%s\"", s);
6037                     return 1;
6038                 }
6039             }
6040             if (strcmp(p, "*") == 0) {
6041                 p = NULL;
6042             }
6043             ret |= server_socket(p, the_port, transport, portnumber_file);
6044         }
6045         free(list);
6046         return ret;
6047     }
6048 }
6049 
6050 static int new_socket_unix(void) {
6051     int sfd;
6052 
6053     if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) == INVALID_SOCKET) {
6054         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6055                                         "socket(AF_UNIX, SOCK_STREAM, 0): %s",
6056                                         strerror(errno));
6057         return INVALID_SOCKET;
6058     }
6059 
6060     if (evutil_make_socket_nonblocking(sfd) == -1) {
6061         safe_close(sfd);
6062         return INVALID_SOCKET;
6063     }
6064     return sfd;
6065 }
6066 
6067 /* this will probably not work on windows */
6068 static int server_socket_unix(const char *path, int access_mask) {
6069     int sfd;
6070     struct linger ling = {0, 0};
6071     struct sockaddr_un addr;
6072     struct stat tstat;
6073     int flags =1;
6074     int old_umask;
6075 
6076     if (!path) {
6077         return 1;
6078     }
6079 
6080     if ((sfd = new_socket_unix()) == -1) {
6081         return 1;
6082     }
6083 
6084     /*
6085      * Clean up a previous socket file if we left it around
6086      */
6087     if (lstat(path, &tstat) == 0) {
6088         if (S_ISSOCK(tstat.st_mode))
6089             unlink(path);
6090     }
6091 
6092     setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));
6093     setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));
6094     setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));
6095 
6096     /*
6097      * the memset call clears nonstandard fields in some impementations
6098      * that otherwise mess things up.
6099      */
6100     memset(&addr, 0, sizeof(addr));
6101 
6102     addr.sun_family = AF_UNIX;
6103     strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
6104     assert(strcmp(addr.sun_path, path) == 0);
6105     old_umask = umask( ~(access_mask&0777));
6106     if (bind(sfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
6107         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6108                                         "bind(): %s",
6109                                         strerror(errno));
6110         safe_close(sfd);
6111         umask(old_umask);
6112         return 1;
6113     }
6114     umask(old_umask);
6115     if (listen(sfd, settings.backlog) == -1) {
6116         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6117                                         "listen(): %s",
6118                                         strerror(errno));
6119         safe_close(sfd);
6120         return 1;
6121     }
6122     if (!(listen_conn = conn_new(sfd, conn_listening,
6123                                  EV_READ | EV_PERSIST, 1,
6124                                  local_transport, main_base, NULL))) {
6125         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6126                  "failed to create listening connection\n");
6127         exit(EXIT_FAILURE);
6128     }
6129     STATS_LOCK();
6130     ++stats.daemon_conns;
6131     STATS_UNLOCK();
6132 
6133     return 0;
6134 }
6135 
6136 static struct event clockevent;
6137 
6138 /* time-sensitive callers can call it by hand with this, outside the normal ever-1-second timer */
6139 static void set_current_time(void) {
6140     struct timeval timer;
6141 
6142     gettimeofday(&timer, NULL);
6143     current_time = (rel_time_t) (timer.tv_sec - process_started);
6144 }
6145 
6146 static void clock_handler(const int fd, const short which, void *arg) {
6147     struct timeval t = {.tv_sec = 1, .tv_usec = 0};
6148     static bool initialized = false;
6149 
6150     if (memcached_shutdown) {
6151         event_base_loopbreak(main_base);
6152         return ;
6153     }
6154 
6155     if (initialized) {
6156         /* only delete the event if it's actually there. */
6157         evtimer_del(&clockevent);
6158     } else {
6159         initialized = true;
6160     }
6161 
6162     evtimer_set(&clockevent, clock_handler, 0);
6163     event_base_set(main_base, &clockevent);
6164     evtimer_add(&clockevent, &t);
6165 
6166     set_current_time();
6167 }
6168 
6169 static void usage(void) {
6170     printf(PACKAGE " " VERSION "\n");
6171     printf("-p <num>      TCP port number to listen on (default: 11211)\n"
6172            "-U <num>      UDP port number to listen on (default: 11211, 0 is off)\n"
6173            "-s <file>     UNIX socket path to listen on (disables network support)\n"
6174            "-a <mask>     access mask for UNIX socket, in octal (default: 0700)\n"
6175            "-l <addr>     interface to listen on (default: INADDR_ANY, all addresses)\n"
6176            "              <addr> may be specified as host:port. If you don't specify\n"
6177            "              a port number, the value you specified with -p or -U is\n"
6178            "              used. You may specify multiple addresses separated by comma\n"
6179            "              or by using -l multiple times\n"
6180            "-d            run as a daemon\n"
6181            "-r            maximize core file limit\n"
6182            "-u <username> assume identity of <username> (only when run as root)\n"
6183            "-m <num>      max memory to use for items in megabytes (default: 64 MB)\n"
6184            "-M            return error on memory exhausted (rather than removing items)\n"
6185            "-c <num>      max simultaneous connections (default: 1000)\n"
6186            "-k            lock down all paged memory.  Note that there is a\n"
6187            "              limit on how much memory you may lock.  Trying to\n"
6188            "              allocate more than that would fail, so be sure you\n"
6189            "              set the limit correctly for the user you started\n"
6190            "              the daemon with (not for -u <username> user;\n"
6191            "              under sh this is done with 'ulimit -S -l NUM_KB').\n"
6192            "-v            verbose (print errors/warnings while in event loop)\n"
6193            "-vv           very verbose (also print client commands/reponses)\n"
6194            "-vvv          extremely verbose (also print internal state transitions)\n"
6195            "-h            print this help and exit\n"
6196            "-i            print memcached and libevent license\n"
6197            "-P <file>     save PID in <file>, only used with -d option\n"
6198            "-f <factor>   chunk size growth factor (default: 1.25)\n"
6199            "-n <bytes>    minimum space allocated for key+value+flags (default: 48)\n");
6200     printf("-L            Try to use large memory pages (if available). Increasing\n"
6201            "              the memory page size could reduce the number of TLB misses\n"
6202            "              and improve the performance. In order to get large pages\n"
6203            "              from the OS, memcached will allocate the total item-cache\n"
6204            "              in one large chunk.\n");
6205     printf("-D <char>     Use <char> as the delimiter between key prefixes and IDs.\n"
6206            "              This is used for per-prefix stats reporting. The default is\n"
6207            "              \":\" (colon). If this option is specified, stats collection\n"
6208            "              is turned on automatically; if not, then it may be turned on\n"
6209            "              by sending the \"stats detail on\" command to the server.\n");
6210     printf("-t <num>      number of threads to use (default: 4)\n");
6211     printf("-R            Maximum number of requests per event, limits the number of\n"
6212            "              requests process for a given connection to prevent \n"
6213            "              starvation (default: 20)\n");
6214     printf("-C            Disable use of CAS\n");
6215     printf("-b            Set the backlog queue limit (default: 1024)\n");
6216     printf("-B            Binding protocol - one of ascii, binary, or auto (default)\n");
6217     printf("-I            Override the size of each slab page. Adjusts max item size\n"
6218            "              (default: 1mb, min: 1k, max: 128m)\n");
6219     printf("-q            Disable detailed stats commands\n");
6220 #ifdef SASL_ENABLED
6221     printf("-S            Require SASL authentication\n");
6222 #endif
6223     printf("-X module,cfg Load the module and initialize it with the config\n");
6224     printf("-E engine     Load engine as the storage engine\n");
6225     printf("-e config     Pass config as configuration options to the storage engine\n");
6226     printf("\nEnvironment variables:\n"
6227            "MEMCACHED_PORT_FILENAME   File to write port information to\n"
6228            "MEMCACHED_TOP_KEYS        Number of top keys to keep track of\n"
6229            "MEMCACHED_REQS_TAP_EVENT  Similar to -R but for tap_ship_log\n");
6230 }
6231 static void usage_license(void) {
6232     printf(PACKAGE " " VERSION "\n\n");
6233     printf(
6234     "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n"
6235     "All rights reserved.\n"
6236     "\n"
6237     "Redistribution and use in source and binary forms, with or without\n"
6238     "modification, are permitted provided that the following conditions are\n"
6239     "met:\n"
6240     "\n"
6241     "    * Redistributions of source code must retain the above copyright\n"
6242     "notice, this list of conditions and the following disclaimer.\n"
6243     "\n"
6244     "    * Redistributions in binary form must reproduce the above\n"
6245     "copyright notice, this list of conditions and the following disclaimer\n"
6246     "in the documentation and/or other materials provided with the\n"
6247     "distribution.\n"
6248     "\n"
6249     "    * Neither the name of the Danga Interactive nor the names of its\n"
6250     "contributors may be used to endorse or promote products derived from\n"
6251     "this software without specific prior written permission.\n"
6252     "\n"
6253     "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n"
6254     "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n"
6255     "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n"
6256     "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n"
6257     "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n"
6258     "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n"
6259     "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6260     "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6261     "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6262     "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n"
6263     "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6264     "\n"
6265     "\n"
6266     "This product includes software developed by Niels Provos.\n"
6267     "\n"
6268     "[ libevent ]\n"
6269     "\n"
6270     "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n"
6271     "All rights reserved.\n"
6272     "\n"
6273     "Redistribution and use in source and binary forms, with or without\n"
6274     "modification, are permitted provided that the following conditions\n"
6275     "are met:\n"
6276     "1. Redistributions of source code must retain the above copyright\n"
6277     "   notice, this list of conditions and the following disclaimer.\n"
6278     "2. Redistributions in binary form must reproduce the above copyright\n"
6279     "   notice, this list of conditions and the following disclaimer in the\n"
6280     "   documentation and/or other materials provided with the distribution.\n"
6281     "3. All advertising materials mentioning features or use of this software\n"
6282     "   must display the following acknowledgement:\n"
6283     "      This product includes software developed by Niels Provos.\n"
6284     "4. The name of the author may not be used to endorse or promote products\n"
6285     "   derived from this software without specific prior written permission.\n"
6286     "\n"
6287     "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
6288     "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
6289     "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
6290     "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
6291     "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n"
6292     "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n"
6293     "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n"
6294     "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n"
6295     "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n"
6296     "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
6297     );
6298 
6299     return;
6300 }
6301 
6302 static void save_pid(const char *pid_file) {
6303     FILE *fp;
6304 
6305     if (access(pid_file, F_OK) == 0) {
6306         if ((fp = fopen(pid_file, "r")) != NULL) {
6307             char buffer[1024];
6308             if (fgets(buffer, sizeof(buffer), fp) != NULL) {
6309                 unsigned int pid;
6310                 if (safe_strtoul(buffer, &pid) && kill((pid_t)pid, 0) == 0) {
6311                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6312                                "WARNING: The pid file contained the following (running) pid: %u\n", pid);
6313                 }
6314             }
6315             fclose(fp);
6316         }
6317     }
6318 
6319     if ((fp = fopen(pid_file, "w")) == NULL) {
6320         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6321                  "Could not open the pid file %s for writing: %s\n",
6322                  pid_file, strerror(errno));
6323         return;
6324     }
6325 
6326     fprintf(fp,"%ld\n", (long)getpid());
6327     if (fclose(fp) == -1) {
6328         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6329                 "Could not close the pid file %s: %s\n",
6330                 pid_file, strerror(errno));
6331     }
6332 }
6333 
6334 static void remove_pidfile(const char *pid_file) {
6335     if (pid_file != NULL) {
6336         if (unlink(pid_file) != 0) {
6337             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6338                     "Could not remove the pid file %s: %s\n",
6339                     pid_file, strerror(errno));
6340         }
6341     }
6342 }
6343 
6344 #ifndef HAVE_SIGIGNORE
6345 static int sigignore(int sig) {
6346     struct sigaction sa = { .sa_handler = SIG_IGN, .sa_flags = 0 };
6347 
6348     if (sigemptyset(&sa.sa_mask) == -1 || sigaction(sig, &sa, 0) == -1) {
6349         return -1;
6350     }
6351     return 0;
6352 }
6353 #endif /* !HAVE_SIGIGNORE */
6354 
6355 static void sigterm_handler(int sig) {
6356     assert(sig == SIGTERM || sig == SIGINT);
6357     memcached_shutdown = 1;
6358 }
6359 
6360 static int install_sigterm_handler(void) {
6361     struct sigaction sa = {.sa_handler = sigterm_handler, .sa_flags = 0};
6362 
6363     if (sigemptyset(&sa.sa_mask) == -1 || sigaction(SIGTERM, &sa, 0) == -1 ||
6364         sigaction(SIGINT, &sa, 0) == -1) {
6365         return -1;
6366     }
6367 
6368     return 0;
6369 }
6370 
6371 /*
6372  * On systems that supports multiple page sizes we may reduce the
6373  * number of TLB-misses by using the biggest available page size
6374  */
6375 static int enable_large_pages(void) {
6376 #if defined(HAVE_GETPAGESIZES) && defined(HAVE_MEMCNTL)
6377     int ret = -1;
6378     size_t sizes[32];
6379     int avail = getpagesizes(sizes, 32);
6380     if (avail != -1) {
6381         size_t max = sizes[0];
6382         struct memcntl_mha arg = {0};
6383         int ii;
6384 
6385         for (ii = 1; ii < avail; ++ii) {
6386             if (max < sizes[ii]) {
6387                 max = sizes[ii];
6388             }
6389         }
6390 
6391         arg.mha_flags   = 0;
6392         arg.mha_pagesize = max;
6393         arg.mha_cmd = MHA_MAPSIZE_BSSBRK;
6394 
6395         if (memcntl(0, 0, MC_HAT_ADVISE, (caddr_t)&arg, 0, 0) == -1) {
6396             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6397                   "Failed to set large pages: %s\nWill use default page size\n",
6398                   strerror(errno));
6399         } else {
6400             ret = 0;
6401         }
6402     } else {
6403         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6404           "Failed to get supported pagesizes: %s\nWill use default page size\n",
6405           strerror(errno));
6406     }
6407 
6408     return ret;
6409 #else
6410     return 0;
6411 #endif
6412 }
6413 
6414 static const char* get_server_version(void) {
6415     return VERSION;
6416 }
6417 
6418 static void store_engine_specific(const void *cookie,
6419                                   void *engine_data) {
6420     conn *c = (conn*)cookie;
6421     c->engine_storage = engine_data;
6422 }
6423 
6424 static void *get_engine_specific(const void *cookie) {
6425     conn *c = (conn*)cookie;
6426     return c->engine_storage;
6427 }
6428 
6429 static int get_socket_fd(const void *cookie) {
6430     conn *c = (conn *)cookie;
6431     return c->sfd;
6432 }
6433 
6434 static void set_tap_nack_mode(const void *cookie, bool enable) {
6435     conn *c = (conn *)cookie;
6436     c->tap_nack_mode = enable;
6437 }
6438 
6439 static void reserve_cookie(const void *cookie) {
6440     conn *c = (conn *)cookie;
6441     ++c->refcount;
6442 }
6443 
6444 static void release_cookie(const void *cookie) {
6445     conn *c = (conn *)cookie;
6446     --c->refcount;
6447 }
6448 
6449 static int num_independent_stats(void) {
6450     return settings.num_threads + 1;
6451 }
6452 
6453 static void *new_independent_stats(void) {
6454     int ii;
6455     int nrecords = num_independent_stats();
6456     struct independent_stats *independent_stats = calloc(sizeof(independent_stats) + sizeof(struct thread_stats) * nrecords, 1);
6457 
6458 #ifdef INNODB_MEMCACHED
6459     if (independent_stats == NULL) {
6460 	fprintf(stderr, "Unable to allocate memory for"
6461 		       "independent_stats...\n");
6462        return (NULL);
6463     }
6464 #endif
6465 
6466     if (settings.topkeys > 0)
6467         independent_stats->topkeys = topkeys_init(settings.topkeys);
6468     for (ii = 0; ii < nrecords; ii++)
6469         pthread_mutex_init(&independent_stats->thread_stats[ii].mutex, NULL);
6470     return independent_stats;
6471 }
6472 
6473 static void release_independent_stats(void *stats) {
6474     int ii;
6475     int nrecords = num_independent_stats();
6476     struct independent_stats *independent_stats = stats;
6477     if (independent_stats->topkeys)
6478         topkeys_free(independent_stats->topkeys);
6479     for (ii = 0; ii < nrecords; ii++)
6480         pthread_mutex_destroy(&independent_stats->thread_stats[ii].mutex);
6481     free(independent_stats);
6482 }
6483 
6484 static inline struct independent_stats *get_independent_stats(conn *c) {
6485     struct independent_stats *independent_stats;
6486     if (settings.engine.v1->get_stats_struct != NULL) {
6487         independent_stats = settings.engine.v1->get_stats_struct(settings.engine.v0, (const void *)c);
6488         if (independent_stats == NULL)
6489             independent_stats = default_independent_stats;
6490     } else {
6491         independent_stats = default_independent_stats;
6492     }
6493     return independent_stats;
6494 }
6495 
6496 static inline struct thread_stats *get_thread_stats(conn *c) {
6497     struct independent_stats *independent_stats = get_independent_stats(c);
6498     assert(c->thread->index < num_independent_stats());
6499     return &independent_stats->thread_stats[c->thread->index];
6500 }
6501 
6502 static void register_callback(ENGINE_HANDLE *eh,
6503                               ENGINE_EVENT_TYPE type,
6504                               EVENT_CALLBACK cb, const void *cb_data) {
6505     struct engine_event_handler *h =
6506         calloc(sizeof(struct engine_event_handler), 1);
6507 
6508     assert(h);
6509     h->cb = cb;
6510     h->cb_data = cb_data;
6511     h->next = engine_event_handlers[type];
6512     engine_event_handlers[type] = h;
6513 }
6514 
6515 static rel_time_t get_current_time(void)
6516 {
6517     return current_time;
6518 }
6519 
6520 static void count_eviction(const void *cookie, const void *key, const int nkey) {
6521     topkeys_t *tk = get_independent_stats((conn*)cookie)->topkeys;
6522     TK(tk, evictions, key, nkey, get_current_time());
6523 }
6524 
6525 /**
6526  * To make it easy for engine implementors that doesn't want to care about
6527  * writing their own incr/decr code, they can just set the arithmetic function
6528  * to NULL and use this implementation. It is not efficient, due to the fact
6529  * that it does multiple calls through the interface (get and then cas store).
6530  * If you don't care, feel free to use it..
6531  */
6532 static ENGINE_ERROR_CODE internal_arithmetic(ENGINE_HANDLE* handle,
6533                                              const void* cookie,
6534                                              const void* key,
6535                                              const int nkey,
6536                                              const bool increment,
6537                                              const bool create,
6538                                              const uint64_t delta,
6539                                              const uint64_t initial,
6540                                              const rel_time_t exptime,
6541                                              uint64_t *cas,
6542                                              uint64_t *result,
6543                                              uint16_t vbucket)
6544 {
6545     ENGINE_HANDLE_V1 *e = (ENGINE_HANDLE_V1*)handle;
6546 
6547     item *it = NULL;
6548 
6549     ENGINE_ERROR_CODE ret;
6550     ret = e->get(handle, cookie, &it, key, nkey, vbucket);
6551 
6552     if (ret == ENGINE_SUCCESS) {
6553         item_info info = { .nvalue = 1 };
6554 
6555         if (!e->get_item_info(handle, cookie, it, &info)) {
6556             e->release(handle, cookie, it);
6557             return ENGINE_FAILED;
6558         }
6559 
6560         char value[80];
6561 
6562         if (info.value[0].iov_len > (sizeof(value) - 1)) {
6563             e->release(handle, cookie, it);
6564             return ENGINE_EINVAL;
6565         }
6566 
6567         memcpy(value, info.value[0].iov_base, info.value[0].iov_len);
6568         value[info.value[0].iov_len] = '\0';
6569 
6570         uint64_t val;
6571         if (!safe_strtoull(value, &val)) {
6572             e->release(handle, cookie, it);
6573             return ENGINE_EINVAL;
6574         }
6575 
6576         if (increment) {
6577             val += delta;
6578         } else {
6579             if (delta > val) {
6580                 val = 0;
6581             } else {
6582                 val -= delta;
6583             }
6584         }
6585 
6586         size_t nb = snprintf(value, sizeof(value), "%"PRIu64, val);
6587         *result = val;
6588         item *nit = NULL;
6589         if (e->allocate(handle, cookie, &nit, key,
6590                         nkey, nb, info.flags, info.exptime) != ENGINE_SUCCESS) {
6591             e->release(handle, cookie, it);
6592             return ENGINE_ENOMEM;
6593         }
6594 
6595         item_info i2 = { .nvalue = 1 };
6596         if (!e->get_item_info(handle, cookie, nit, &i2)) {
6597             e->release(handle, cookie, it);
6598             e->release(handle, cookie, nit);
6599             return ENGINE_FAILED;
6600         }
6601 
6602         memcpy(i2.value[0].iov_base, value, nb);
6603         e->item_set_cas(handle, cookie, nit, info.cas);
6604         ret = e->store(handle, cookie, nit, cas, OPERATION_CAS, vbucket);
6605         e->release(handle, cookie, it);
6606         e->release(handle, cookie, nit);
6607     } else if (ret == ENGINE_KEY_ENOENT && create) {
6608         char value[80];
6609         size_t nb = snprintf(value, sizeof(value), "%"PRIu64"\r\n", initial);
6610         *result = initial;
6611         if (e->allocate(handle, cookie, &it, key, nkey, nb, 0, exptime) != ENGINE_SUCCESS) {
6612             e->release(handle, cookie, it);
6613             return ENGINE_ENOMEM;
6614         }
6615 
6616         item_info info = { .nvalue = 1 };
6617         if (!e->get_item_info(handle, cookie, it, &info)) {
6618             e->release(handle, cookie, it);
6619             return ENGINE_FAILED;
6620         }
6621 
6622         memcpy(info.value[0].iov_base, value, nb);
6623         ret = e->store(handle, cookie, it, cas, OPERATION_CAS, vbucket);
6624         e->release(handle, cookie, it);
6625     }
6626 
6627     /* We had a race condition.. just call ourself recursively to retry */
6628     if (ret == ENGINE_KEY_EEXISTS) {
6629         return internal_arithmetic(handle, cookie, key, nkey, increment, create, delta,
6630                                    initial, exptime, cas, result, vbucket);
6631     }
6632 
6633     return ret;
6634 }
6635 
6636 /**
6637  * Register an extension if it's not already registered
6638  *
6639  * @param type the type of the extension to register
6640  * @param extension the extension to register
6641  * @return true if success, false otherwise
6642  */
6643 static bool register_extension(extension_type_t type, void *extension)
6644 {
6645     if (extension == NULL) {
6646         return false;
6647     }
6648 
6649     switch (type) {
6650     case EXTENSION_DAEMON:
6651         for (EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6652              ptr != NULL;
6653              ptr = ptr->next) {
6654             if (ptr == extension) {
6655                 return false;
6656             }
6657         }
6658         ((EXTENSION_DAEMON_DESCRIPTOR *)(extension))->next = settings.extensions.daemons;
6659         settings.extensions.daemons = extension;
6660         return true;
6661     case EXTENSION_LOGGER:
6662         settings.extensions.logger = extension;
6663         return true;
6664     case EXTENSION_ASCII_PROTOCOL:
6665         if (settings.extensions.ascii != NULL) {
6666             EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *last;
6667             for (last = settings.extensions.ascii; last->next != NULL;
6668                  last = last->next) {
6669                 if (last == extension) {
6670                     return false;
6671                 }
6672             }
6673             if (last == extension) {
6674                 return false;
6675             }
6676             last->next = extension;
6677             last->next->next = NULL;
6678         } else {
6679             settings.extensions.ascii = extension;
6680             settings.extensions.ascii->next = NULL;
6681         }
6682         return true;
6683 
6684     default:
6685         return false;
6686     }
6687 }
6688 
6689 /**
6690  * Unregister an extension
6691  *
6692  * @param type the type of the extension to remove
6693  * @param extension the extension to remove
6694  */
6695 static void unregister_extension(extension_type_t type, void *extension)
6696 {
6697     switch (type) {
6698     case EXTENSION_DAEMON:
6699         {
6700             EXTENSION_DAEMON_DESCRIPTOR *prev = NULL;
6701             EXTENSION_DAEMON_DESCRIPTOR *ptr = settings.extensions.daemons;
6702 
6703             while (ptr != NULL && ptr != extension) {
6704                 prev = ptr;
6705                 ptr = ptr->next;
6706             }
6707 
6708             if (ptr != NULL && prev != NULL) {
6709                 prev->next = ptr->next;
6710             }
6711 
6712             if (settings.extensions.daemons == ptr) {
6713                 settings.extensions.daemons = ptr->next;
6714             }
6715         }
6716         break;
6717     case EXTENSION_LOGGER:
6718         if (settings.extensions.logger == extension) {
6719             if (get_stderr_logger() == extension) {
6720                 settings.extensions.logger = get_null_logger();
6721             } else {
6722                 settings.extensions.logger = get_stderr_logger();
6723             }
6724         }
6725         break;
6726     case EXTENSION_ASCII_PROTOCOL:
6727         {
6728             EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *prev = NULL;
6729             EXTENSION_ASCII_PROTOCOL_DESCRIPTOR *ptr = settings.extensions.ascii;
6730 
6731             while (ptr != NULL && ptr != extension) {
6732                 prev = ptr;
6733                 ptr = ptr->next;
6734             }
6735 
6736             if (ptr != NULL && prev != NULL) {
6737                 prev->next = ptr->next;
6738             }
6739 
6740             if (settings.extensions.ascii == ptr) {
6741                 settings.extensions.ascii = ptr->next;
6742             }
6743         }
6744         break;
6745 
6746     default:
6747         ;
6748     }
6749 
6750 }
6751 
6752 /**
6753  * Get the named extension
6754  */
6755 static void* get_extension(extension_type_t type)
6756 {
6757     switch (type) {
6758     case EXTENSION_DAEMON:
6759         return settings.extensions.daemons;
6760 
6761     case EXTENSION_LOGGER:
6762         return settings.extensions.logger;
6763 
6764     case EXTENSION_ASCII_PROTOCOL:
6765         return settings.extensions.ascii;
6766 
6767     default:
6768         return NULL;
6769     }
6770 }
6771 
6772 #ifdef INNODB_MEMCACHED
6773 void shutdown_server(void) {
6774 #else
6775 static void shutdown_server(void) {
6776 #endif /* INNODB_MEMCACHED */
6777 #ifdef INNODB_MEMCACHED
6778     int i;
6779     /* Clean up connections */
6780     while (listen_conn) {
6781 	conn_closing(listen_conn);
6782 	listen_conn = listen_conn->next;
6783     }
6784 
6785     for (i = 0; i < num_udp_socket; i++) {
6786 	safe_close(udp_socket[i]);
6787     }
6788 #endif
6789     memcached_shutdown = 1;
6790 }
6791 
6792 #ifdef INNODB_MEMCACHED
6793 bool shutdown_complete(void)
6794 {
6795     return(memcached_shutdown == 2);
6796 }
6797 
6798 bool init_complete(void)
6799 {
6800     return(memcached_initialized == 1);
6801 }
6802 #endif
6803 
6804 static EXTENSION_LOGGER_DESCRIPTOR* get_logger(void)
6805 {
6806     return settings.extensions.logger;
6807 }
6808 
6809 static EXTENSION_LOG_LEVEL get_log_level(void)
6810 {
6811     EXTENSION_LOG_LEVEL ret;
6812     switch (settings.verbose) {
6813     case 0: ret = EXTENSION_LOG_WARNING; break;
6814     case 1: ret = EXTENSION_LOG_INFO; break;
6815     case 2: ret = EXTENSION_LOG_DEBUG; break;
6816     default:
6817         ret = EXTENSION_LOG_DETAIL;
6818     }
6819     return ret;
6820 }
6821 
6822 static void set_log_level(EXTENSION_LOG_LEVEL severity)
6823 {
6824     switch (severity) {
6825     case EXTENSION_LOG_WARNING: settings.verbose = 0; break;
6826     case EXTENSION_LOG_INFO: settings.verbose = 1; break;
6827     case EXTENSION_LOG_DEBUG: settings.verbose = 2; break;
6828     default:
6829         settings.verbose = 3;
6830     }
6831 }
6832 
6833 static void get_config_append_stats(const char *key, const uint16_t klen,
6834                                     const char *val, const uint32_t vlen,
6835                                     const void *cookie)
6836 {
6837     if (klen == 0  || vlen == 0) {
6838         return ;
6839     }
6840 
6841     char *pos = (char*)cookie;
6842     size_t nbytes = strlen(pos);
6843 
6844     if ((nbytes + klen + vlen + 3) > 1024) {
6845         // Not enough size in the buffer..
6846         return;
6847     }
6848 
6849     memcpy(pos + nbytes, key, klen);
6850     nbytes += klen;
6851     pos[nbytes] = '=';
6852     ++nbytes;
6853     memcpy(pos + nbytes, val, vlen);
6854     nbytes += vlen;
6855     memcpy(pos + nbytes, ";", 2);
6856 }
6857 
6858 static bool get_config(struct config_item items[]) {
6859     char config[1024];
6860     config[0] = '\0';
6861     process_stat_settings(get_config_append_stats, config);
6862     int rval = parse_config(config, items, NULL);
6863     return rval >= 0;
6864 }
6865 
6866 /**
6867  * Callback the engines may call to get the public server interface
6868  * @return pointer to a structure containing the interface. The client should
6869  *         know the layout and perform the proper casts.
6870  */
6871 static SERVER_HANDLE_V1 *get_server_api(void)
6872 {
6873     static SERVER_CORE_API core_api = {
6874         .server_version = get_server_version,
6875         .hash = hash,
6876         .realtime = realtime,
6877         .abstime = abstime,
6878         .get_current_time = get_current_time,
6879         .parse_config = parse_config,
6880         .shutdown = shutdown_server,
6881         .get_config = get_config
6882     };
6883 
6884     static SERVER_COOKIE_API server_cookie_api = {
6885         .get_auth_data = get_auth_data,
6886         .store_engine_specific = store_engine_specific,
6887         .get_engine_specific = get_engine_specific,
6888         .get_socket_fd = get_socket_fd,
6889         .set_tap_nack_mode = set_tap_nack_mode,
6890         .notify_io_complete = notify_io_complete,
6891         .reserve = reserve_cookie,
6892         .release = release_cookie
6893     };
6894 
6895     static SERVER_STAT_API server_stat_api = {
6896         .new_stats = new_independent_stats,
6897         .release_stats = release_independent_stats,
6898         .evicting = count_eviction
6899     };
6900 
6901     static SERVER_LOG_API server_log_api = {
6902         .get_logger = get_logger,
6903         .get_level = get_log_level,
6904         .set_level = set_log_level
6905     };
6906     static SERVER_EXTENSION_API extension_api = {
6907         .register_extension = register_extension,
6908         .unregister_extension = unregister_extension,
6909         .get_extension = get_extension
6910     };
6911 
6912     static SERVER_CALLBACK_API callback_api = {
6913         .register_callback = register_callback,
6914         .perform_callbacks = perform_callbacks,
6915     };
6916 
6917     static SERVER_HANDLE_V1 rv = {
6918         .interface = 1,
6919         .core = &core_api,
6920         .stat = &server_stat_api,
6921         .extension = &extension_api,
6922         .callback = &callback_api,
6923         .log = &server_log_api,
6924         .cookie = &server_cookie_api
6925     };
6926 
6927     if (rv.engine == NULL) {
6928         rv.engine = settings.engine.v0;
6929     }
6930 
6931     return &rv;
6932 }
6933 
6934 /**
6935  * Load a shared object and initialize all the extensions in there.
6936  *
6937  * @param soname the name of the shared object (may not be NULL)
6938  * @param config optional configuration parameters
6939  * @return true if success, false otherwise
6940  */
6941 static bool load_extension(const char *soname, const char *config) {
6942     if (soname == NULL) {
6943         return false;
6944     }
6945 
6946     /* Hack to remove the warning from C99 */
6947     union my_hack {
6948         MEMCACHED_EXTENSIONS_INITIALIZE initialize;
6949         void* voidptr;
6950     } funky = {.initialize = NULL };
6951 
6952     void *handle = dlopen(soname, RTLD_NOW | RTLD_LOCAL);
6953     if (handle == NULL) {
6954         const char *msg = dlerror();
6955         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6956                 "Failed to open library \"%s\": %s\n",
6957                 soname, msg ? msg : "unknown error");
6958         return false;
6959     }
6960 
6961     void *symbol = dlsym(handle, "memcached_extensions_initialize");
6962     if (symbol == NULL) {
6963         const char *msg = dlerror();
6964         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6965                 "Could not find symbol \"memcached_extensions_initialize\" in %s: %s\n",
6966                 soname, msg ? msg : "unknown error");
6967         return false;
6968     }
6969     funky.voidptr = symbol;
6970 
6971     EXTENSION_ERROR_CODE error = (*funky.initialize)(config, get_server_api);
6972 
6973     if (error != EXTENSION_SUCCESS) {
6974         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
6975                 "Failed to initalize extensions from %s. Error code: %d\n",
6976                 soname, error);
6977         dlclose(handle);
6978         return false;
6979     }
6980 
6981     if (settings.verbose > 0) {
6982         settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
6983                 "Loaded extensions from: %s\n", soname);
6984     }
6985 
6986     return true;
6987 }
6988 
6989 /**
6990  * Do basic sanity check of the runtime environment
6991  * @return true if no errors found, false if we can't use this env
6992  */
6993 static bool sanitycheck(void) {
6994     /* One of our biggest problems is old and bogus libevents */
6995     const char *ever = event_get_version();
6996     if (ever != NULL) {
6997         if (strncmp(ever, "1.", 2) == 0) {
6998             /* Require at least 1.3 (that's still a couple of years old) */
6999             if ((ever[2] == '1' || ever[2] == '2') && !isdigit(ever[3])) {
7000                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7001                         "You are using libevent %s.\nPlease upgrade to"
7002                         " a more recent version (1.3 or newer)\n",
7003                         event_get_version());
7004                 return false;
7005             }
7006         }
7007     }
7008 
7009     return true;
7010 }
7011 
7012 #ifdef INNODB_MEMCACHED
7013 static
7014 char*
7015 my_strdupl(const char* str, int len)
7016 {
7017 	char*   s = (char*) malloc(len + 1);
7018 	s[len] = 0;
7019 	return((char*) memcpy(s, str, len));
7020 }
7021 
7022 /** Function that messages MySQL config variable string to something
7023 that can be parsed by getopt() */
7024 static
7025 void
7026 daemon_memcached_make_option(char* option, int* option_argc,
7027                              char*** option_argv)
7028 {
7029 	static const char*      sep = " ";
7030 	char*                   last;
7031 	char*                   opt_str;
7032 	char*                   my_str;
7033 	int                     num_arg = 0;
7034 	int                     i = 1;
7035 
7036 	my_str = my_strdupl(option, strlen(option));
7037 
7038 	for (opt_str = strtok_r(my_str, sep, &last);
7039 	     opt_str;
7040 	     opt_str = strtok_r(NULL, sep, &last)) {
7041 		num_arg++;
7042 	}
7043 
7044 	/* reset my_str, since strtok_r could alter it */
7045 	strncpy(my_str, option, strlen(option));
7046 
7047 	*option_argv = (char**) malloc((num_arg + 1)
7048 				       * sizeof(**option_argv));
7049 
7050 	for (opt_str = strtok_r(my_str, sep, &last);
7051 	     opt_str;
7052 	     opt_str = strtok_r(NULL, sep, &last)) {
7053 		(*option_argv)[i] = opt_str;
7054 		i++;
7055 	}
7056 
7057 	assert(i == num_arg + 1);
7058 
7059 	*option_argc = (num_arg + 1);
7060 
7061 	return;
7062 }
7063 
7064 /* Structure that adds the call back functions struture pointers,
7065 passed to InnoDB engine */
7066 typedef struct eng_config_info {
7067 	char*           option_string;
7068 	void*           cb_ptr;
7069 	unsigned int    eng_r_batch_size;
7070 	unsigned int    eng_w_batch_size;
7071 	bool		enable_binlog;
7072 } eng_config_info_t;
7073 #endif /* INNODB_MEMCACHED */
7074 
7075 #ifdef INNODB_MEMCACHED
7076 void* daemon_memcached_main(void *p) {
7077 #else
7078 int main (int argc, char **argv) {
7079 #endif
7080     int c;
7081     bool lock_memory = false;
7082     bool do_daemonize = false;
7083     bool preallocate = false;
7084     int maxcore = 0;
7085     char *username = NULL;
7086     char *pid_file = NULL;
7087     struct passwd *pw;
7088     struct rlimit rlim;
7089     char unit = '\0';
7090     int size_max = 0;
7091 
7092     bool protocol_specified = false;
7093     bool tcp_specified = false;
7094     bool udp_specified = false;
7095     memcached_context_t* m_config = (memcached_context_t*)p;
7096     const char *engine;
7097     const char *engine_config = NULL;
7098     char old_options[1024] = { [0] = '\0' };
7099     char *old_opts = old_options;
7100 #ifdef INNODB_MEMCACHED
7101     int option_argc = 0;
7102     char** option_argv = NULL;
7103     eng_config_info_t my_eng_config;
7104 
7105     memcached_initialized = 0;
7106 
7107     if (m_config->m_engine_library) {
7108 	engine = m_config->m_engine_library;
7109 
7110 	/* FIXME: We should have a better way to pass the callback structure
7111 	point to storage engine. It is now appended in the configure
7112 	string in eng_config_info_t structure */
7113 	my_eng_config.cb_ptr = m_config->m_innodb_api_cb;
7114 	my_eng_config.eng_r_batch_size = m_config->m_r_batch_size;
7115 	my_eng_config.eng_w_batch_size = m_config->m_w_batch_size;
7116 	my_eng_config.enable_binlog = m_config->m_enable_binlog;
7117 	my_eng_config.option_string = old_opts;
7118 	engine_config = (const char *) (&my_eng_config);
7119 
7120     } else {
7121 	engine = "default_engine.so";
7122     }
7123 #else
7124     engine = "default_engine.so";
7125 #endif /* INNODB_MEMCACHED */
7126 
7127     memcached_shutdown = 0;
7128     memcached_initialized = 0;
7129 
7130     if (!sanitycheck()) {
7131         return(NULL);
7132     }
7133 
7134     /* make the time we started always be 2 seconds before we really
7135        did, so time(0) - time.started is never zero.  if so, things
7136        like 'settings.oldest_live' which act as booleans as well as
7137        values are now false in boolean context... */
7138     process_started = time(0) - 2;
7139     set_current_time();
7140 
7141     /* Initialize the socket subsystem */
7142     initialize_sockets();
7143 
7144     /* init settings */
7145     settings_init();
7146 
7147     if (memcached_initialize_stderr_logger(get_server_api) != EXTENSION_SUCCESS) {
7148         fprintf(stderr, "Failed to initialize log system\n");
7149         return (NULL);
7150     }
7151 
7152     if (m_config->m_mem_option) {
7153 	daemon_memcached_make_option(m_config->m_mem_option,
7154 				     &option_argc,
7155 				     &option_argv);
7156     }
7157 
7158 #ifdef INNODB_MEMCACHED
7159 
7160     if (option_argc > 0 && option_argv) {
7161 	    /* Always reset the index to 1, since this function can
7162 	    be invoked multiple times with install/uninstall plugins */
7163 	    optind = 1;
7164 	    while (-1 != (c = getopt(option_argc, option_argv,
7165 		  "a:"  /* access mask for unix socket */
7166 		  "p:"  /* TCP port number to listen on */
7167 		  "s:"  /* unix socket path to listen on */
7168 		  "U:"  /* UDP port number to listen on */
7169 		  "m:"  /* max memory to use for items in megabytes */
7170 		  "M"   /* return error on memory exhausted */
7171 		  "c:"  /* max simultaneous connections */
7172 		  "k"   /* lock down all paged memory */
7173 		  "hi"  /* help, licence info */
7174 		  "r"   /* maximize core file limit */
7175 		  "v"   /* verbose */
7176 		  "d"   /* daemon mode */
7177 		  "l:"  /* interface to listen on */
7178 		  "u:"  /* user identity to run as */
7179 		  "P:"  /* save PID in file */
7180 		  "f:"  /* factor? */
7181 		  "n:"  /* minimum space allocated for key+value+flags */
7182 		  "t:"  /* threads */
7183 		  "D:"  /* prefix delimiter? */
7184 		  "L"   /* Large memory pages */
7185 		  "R:"  /* max requests per event */
7186 		  "C"   /* Disable use of CAS */
7187 		  "b:"  /* backlog queue limit */
7188 		  "B:"  /* Binding protocol */
7189 		  "I:"  /* Max item size */
7190 		  "S"   /* Sasl ON */
7191 		  "E:"  /* Engine to load */
7192 		  "e:"  /* Engine options */
7193 		  "q"   /* Disallow detailed stats */
7194 		  "X:"  /* Load extension */
7195 		))) {
7196 		switch (c) {
7197 		case 'a':
7198 		    /* access for unix domain socket, as octal mask (like chmod)*/
7199 		    settings.access= strtol(optarg,NULL,8);
7200 		    break;
7201 
7202 		case 'U':
7203 		    settings.udpport = atoi(optarg);
7204 		    udp_specified = true;
7205 		    break;
7206 		case 'p':
7207 		    settings.port = atoi(optarg);
7208 		    tcp_specified = true;
7209 		    break;
7210 		case 's':
7211 		    settings.socketpath = optarg;
7212 		    break;
7213 		case 'm':
7214 		    settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7215 		     old_opts += sprintf(old_opts, "cache_size=%lu;",
7216 					 (unsigned long)settings.maxbytes);
7217 		   break;
7218 		case 'M':
7219 		    settings.evict_to_free = 0;
7220 		    old_opts += sprintf(old_opts, "eviction=false;");
7221 		    break;
7222 		case 'c':
7223 		    settings.maxconns = atoi(optarg);
7224 		    break;
7225 		case 'h':
7226 		    usage();
7227 		    exit(EXIT_SUCCESS);
7228 		case 'i':
7229 		    usage_license();
7230 		    exit(EXIT_SUCCESS);
7231 		case 'k':
7232 		    lock_memory = true;
7233 		    break;
7234 		case 'v':
7235 		    settings.verbose++;
7236 		    perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7237 		    break;
7238 		case 'l':
7239 		    settings.inter= strdup(optarg);
7240 		    break;
7241 		case 'd':
7242 		    do_daemonize = true;
7243 		    break;
7244 		case 'r':
7245 		    maxcore = 1;
7246 		    break;
7247 		case 'R':
7248 		    settings.reqs_per_event = atoi(optarg);
7249 		    if (settings.reqs_per_event <= 0) {
7250 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7251 			      "Number of requests per event must be greater than 0\n");
7252 			return (void*)1;
7253 		    }
7254 		    break;
7255 		case 'u':
7256 		    username = optarg;
7257 		    break;
7258 		case 'P':
7259 		    pid_file = optarg;
7260 		    break;
7261 		case 'f':
7262 		    settings.factor = atof(optarg);
7263 		    if (settings.factor <= 1.0) {
7264 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7265 				"Factor must be greater than 1\n");
7266 			return (void*)1;
7267 		    }
7268 		     old_opts += sprintf(old_opts, "factor=%f;",
7269 					 settings.factor);
7270 		   break;
7271 		case 'n':
7272 		    settings.chunk_size = atoi(optarg);
7273 		    if (settings.chunk_size == 0) {
7274 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7275 				"Chunk size must be greater than 0\n");
7276 			return (void*)1;
7277 		    }
7278 		    old_opts += sprintf(old_opts, "chunk_size=%u;",
7279 					settings.chunk_size);
7280 		    break;
7281 		case 't':
7282 		    settings.num_threads = atoi(optarg);
7283 		    if (settings.num_threads <= 0) {
7284 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7285 				"Number of threads must be greater than 0\n");
7286 			return (void*)1;
7287 		    }
7288 		    /* There're other problems when you get above 64 threads.
7289 		     * In the future we should portably detect # of cores for the
7290 		     * default.
7291 		     */
7292 		    if (settings.num_threads > 64) {
7293 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7294 				"WARNING: Setting a high number of worker"
7295 				"threads is not recommended.\n"
7296 				" Set this value to the number of cores in"
7297 				" your machine or less.\n");
7298 		    }
7299 		    break;
7300 		case 'D':
7301 		    settings.prefix_delimiter = optarg[0];
7302 		    settings.detail_enabled = 1;
7303 		    break;
7304 		case 'L' :
7305 		    if (enable_large_pages() == 0) {
7306 			preallocate = true;
7307 			old_opts += sprintf(old_opts, "preallocate=true;");
7308 		    }
7309 		    break;
7310 		case 'C' :
7311 		    settings.use_cas = false;
7312 		    break;
7313 		case 'b' :
7314 		    settings.backlog = atoi(optarg);
7315 		    break;
7316 		case 'B':
7317 		    protocol_specified = true;
7318 		    if (strcmp(optarg, "auto") == 0) {
7319 			settings.binding_protocol = negotiating_prot;
7320 		    } else if (strcmp(optarg, "binary") == 0) {
7321 			settings.binding_protocol = binary_prot;
7322 		    } else if (strcmp(optarg, "ascii") == 0) {
7323 			settings.binding_protocol = ascii_prot;
7324 		    } else {
7325 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7326 				"Invalid value for binding protocol: %s\n"
7327 				" -- should be one of auto, binary, or ascii\n", optarg);
7328 			exit(EX_USAGE);
7329 		    }
7330 		    break;
7331 		case 'I':
7332 		    unit = optarg[strlen(optarg)-1];
7333 		    if (unit == 'k' || unit == 'm' ||
7334 			unit == 'K' || unit == 'M') {
7335 			optarg[strlen(optarg)-1] = '\0';
7336 			size_max = atoi(optarg);
7337 			if (unit == 'k' || unit == 'K')
7338 			    size_max *= 1024;
7339 			if (unit == 'm' || unit == 'M')
7340 			    size_max *= 1024 * 1024;
7341 			settings.item_size_max = size_max;
7342 		    } else {
7343 			settings.item_size_max = atoi(optarg);
7344 		    }
7345 		    if (settings.item_size_max < 1024) {
7346 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7347 				"Item max size cannot be less than 1024 bytes.\n");
7348 			return (void*)1;
7349 		    }
7350 		    if (settings.item_size_max > 1024 * 1024 * 128) {
7351 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7352 				"Cannot set item size limit higher than 128 mb.\n");
7353 			return (void*)1;
7354 		    }
7355 		    if (settings.item_size_max > 1024 * 1024) {
7356 			settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7357 			    "WARNING: Setting item max size above 1MB is not"
7358 			    " recommended!\n"
7359 			    " Raising this limit increases the minimum memory requirements\n"
7360 			    " and will decrease your memory efficiency.\n"
7361 			);
7362 		    }
7363 #ifndef __WIN32__
7364 		    old_opts += sprintf(old_opts, "item_size_max=%zu;",
7365 					settings.item_size_max);
7366 #else
7367 		    old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7368 					settings.item_size_max);
7369 #endif
7370 		    break;
7371 		case 'E':
7372 		    engine = optarg;
7373 		    break;
7374 		case 'e':
7375 		    /* FIXME, we use engine_config to pass callback function
7376 		    for now. Will need a better solution
7377 		    engine_config = optarg; */
7378 		    break;
7379 		case 'q':
7380 		    settings.allow_detailed = false;
7381 		    break;
7382 		case 'S': /* set Sasl authentication to true. Default is false */
7383 # ifdef ENABLE_MEMCACHED_SASL
7384 #  ifndef SASL_ENABLED
7385 		    settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7386 			    "This server is not built with SASL support.\n");
7387 		    exit(EX_USAGE);
7388 #  endif /* !SASL_ENABLED */
7389 		    settings.require_sasl = true;
7390 # endif /* ENABLE_MEMCACHED_SASL */
7391 		    break;
7392 		case 'X' :
7393 		    {
7394 			char *ptr = strchr(optarg, ',');
7395 			if (ptr != NULL) {
7396 			    *ptr = '\0';
7397 			    ++ptr;
7398 			}
7399 			if (!load_extension(optarg, ptr)) {
7400 			    exit(EXIT_FAILURE);
7401 			}
7402 			if (ptr != NULL) {
7403 			    *(ptr - 1) = ',';
7404 			}
7405 		    }
7406 		    break;
7407 		default:
7408 		    settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7409 			    "Illegal argument \"%c\"\n", c);
7410 		    return (void*)1;
7411 		}
7412 	}
7413 
7414 	free(option_argv);
7415     }
7416     fprintf(stderr, MEMCACHED_ATOMIC_MSG);
7417 #else
7418     /* process arguments */
7419     while (-1 != (c = getopt(argc, argv,
7420           "a:"  /* access mask for unix socket */
7421           "p:"  /* TCP port number to listen on */
7422           "s:"  /* unix socket path to listen on */
7423           "U:"  /* UDP port number to listen on */
7424           "m:"  /* max memory to use for items in megabytes */
7425           "M"   /* return error on memory exhausted */
7426           "c:"  /* max simultaneous connections */
7427           "k"   /* lock down all paged memory */
7428           "hi"  /* help, licence info */
7429           "r"   /* maximize core file limit */
7430           "v"   /* verbose */
7431           "d"   /* daemon mode */
7432           "l:"  /* interface to listen on */
7433           "u:"  /* user identity to run as */
7434           "P:"  /* save PID in file */
7435           "f:"  /* factor? */
7436           "n:"  /* minimum space allocated for key+value+flags */
7437           "t:"  /* threads */
7438           "D:"  /* prefix delimiter? */
7439           "L"   /* Large memory pages */
7440           "R:"  /* max requests per event */
7441           "C"   /* Disable use of CAS */
7442           "b:"  /* backlog queue limit */
7443           "B:"  /* Binding protocol */
7444           "I:"  /* Max item size */
7445           "S"   /* Sasl ON */
7446           "E:"  /* Engine to load */
7447           "e:"  /* Engine options */
7448           "q"   /* Disallow detailed stats */
7449           "X:"  /* Load extension */
7450         ))) {
7451         switch (c) {
7452         case 'a':
7453             /* access for unix domain socket, as octal mask (like chmod)*/
7454             settings.access= strtol(optarg,NULL,8);
7455             break;
7456 
7457         case 'U':
7458             settings.udpport = atoi(optarg);
7459             udp_specified = true;
7460             break;
7461         case 'p':
7462             settings.port = atoi(optarg);
7463             tcp_specified = true;
7464             break;
7465         case 's':
7466             settings.socketpath = optarg;
7467             break;
7468         case 'm':
7469             settings.maxbytes = ((size_t)atoi(optarg)) * 1024 * 1024;
7470              old_opts += sprintf(old_opts, "cache_size=%lu;",
7471                                  (unsigned long)settings.maxbytes);
7472            break;
7473         case 'M':
7474             settings.evict_to_free = 0;
7475             old_opts += sprintf(old_opts, "eviction=false;");
7476             break;
7477         case 'c':
7478             settings.maxconns = atoi(optarg);
7479             break;
7480         case 'h':
7481             usage();
7482             exit(EXIT_SUCCESS);
7483         case 'i':
7484             usage_license();
7485             exit(EXIT_SUCCESS);
7486         case 'k':
7487             lock_memory = true;
7488             break;
7489         case 'v':
7490             settings.verbose++;
7491             perform_callbacks(ON_LOG_LEVEL, NULL, NULL);
7492             break;
7493         case 'l':
7494             if (settings.inter != NULL) {
7495                 size_t len = strlen(settings.inter) + strlen(optarg) + 2;
7496                 char *p = malloc(len);
7497                 if (p == NULL) {
7498                     settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7499                                                     "Failed to allocate memory\n");
7500                     return 1;
7501                 }
7502                 snprintf(p, len, "%s,%s", settings.inter, optarg);
7503                 free(settings.inter);
7504                 settings.inter = p;
7505             } else {
7506                 settings.inter= strdup(optarg);
7507             }
7508             break;
7509         case 'd':
7510             do_daemonize = true;
7511             break;
7512         case 'r':
7513             maxcore = 1;
7514             break;
7515         case 'R':
7516             settings.reqs_per_event = atoi(optarg);
7517             if (settings.reqs_per_event <= 0) {
7518                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7519                       "Number of requests per event must be greater than 0\n");
7520                 return 1;
7521             }
7522             break;
7523         case 'u':
7524             username = optarg;
7525             break;
7526         case 'P':
7527             pid_file = optarg;
7528             break;
7529         case 'f':
7530             settings.factor = atof(optarg);
7531             if (settings.factor <= 1.0) {
7532                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7533                         "Factor must be greater than 1\n");
7534                 return 1;
7535             }
7536              old_opts += sprintf(old_opts, "factor=%f;",
7537                                  settings.factor);
7538            break;
7539         case 'n':
7540             settings.chunk_size = atoi(optarg);
7541             if (settings.chunk_size == 0) {
7542                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7543                         "Chunk size must be greater than 0\n");
7544                 return 1;
7545             }
7546             old_opts += sprintf(old_opts, "chunk_size=%u;",
7547                                 settings.chunk_size);
7548             break;
7549         case 't':
7550             settings.num_threads = atoi(optarg);
7551             if (settings.num_threads <= 0) {
7552                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7553                         "Number of threads must be greater than 0\n");
7554                 return 1;
7555             }
7556             /* There're other problems when you get above 64 threads.
7557              * In the future we should portably detect # of cores for the
7558              * default.
7559              */
7560             if (settings.num_threads > 64) {
7561                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7562                         "WARNING: Setting a high number of worker"
7563                         "threads is not recommended.\n"
7564                         " Set this value to the number of cores in"
7565                         " your machine or less.\n");
7566             }
7567             break;
7568         case 'D':
7569             settings.prefix_delimiter = optarg[0];
7570             settings.detail_enabled = 1;
7571             break;
7572         case 'L' :
7573             if (enable_large_pages() == 0) {
7574                 preallocate = true;
7575                 old_opts += sprintf(old_opts, "preallocate=true;");
7576             }
7577             break;
7578         case 'C' :
7579             settings.use_cas = false;
7580             break;
7581         case 'b' :
7582             settings.backlog = atoi(optarg);
7583             break;
7584         case 'B':
7585             protocol_specified = true;
7586             if (strcmp(optarg, "auto") == 0) {
7587                 settings.binding_protocol = negotiating_prot;
7588             } else if (strcmp(optarg, "binary") == 0) {
7589                 settings.binding_protocol = binary_prot;
7590             } else if (strcmp(optarg, "ascii") == 0) {
7591                 settings.binding_protocol = ascii_prot;
7592             } else {
7593                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7594                         "Invalid value for binding protocol: %s\n"
7595                         " -- should be one of auto, binary, or ascii\n", optarg);
7596                 exit(EX_USAGE);
7597             }
7598             break;
7599         case 'I':
7600             unit = optarg[strlen(optarg)-1];
7601             if (unit == 'k' || unit == 'm' ||
7602                 unit == 'K' || unit == 'M') {
7603                 optarg[strlen(optarg)-1] = '\0';
7604                 size_max = atoi(optarg);
7605                 if (unit == 'k' || unit == 'K')
7606                     size_max *= 1024;
7607                 if (unit == 'm' || unit == 'M')
7608                     size_max *= 1024 * 1024;
7609                 settings.item_size_max = size_max;
7610             } else {
7611                 settings.item_size_max = atoi(optarg);
7612             }
7613             if (settings.item_size_max < 1024) {
7614                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7615                         "Item max size cannot be less than 1024 bytes.\n");
7616                 return 1;
7617             }
7618             if (settings.item_size_max > 1024 * 1024 * 128) {
7619                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7620                         "Cannot set item size limit higher than 128 mb.\n");
7621                 return 1;
7622             }
7623             if (settings.item_size_max > 1024 * 1024) {
7624                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7625                     "WARNING: Setting item max size above 1MB is not"
7626                     " recommended!\n"
7627                     " Raising this limit increases the minimum memory requirements\n"
7628                     " and will decrease your memory efficiency.\n"
7629                 );
7630             }
7631 #ifndef __WIN32__
7632             old_opts += sprintf(old_opts, "item_size_max=%zu;",
7633                                 settings.item_size_max);
7634 #else
7635             old_opts += sprintf(old_opts, "item_size_max=%lu;", (long unsigned)
7636                                 settings.item_size_max);
7637 #endif
7638             break;
7639         case 'E':
7640             engine = optarg;
7641             break;
7642         case 'e':
7643             engine_config = optarg;
7644             break;
7645         case 'q':
7646             settings.allow_detailed = false;
7647             break;
7648         case 'S': /* set Sasl authentication to true. Default is false */
7649 #ifndef SASL_ENABLED
7650             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7651                     "This server is not built with SASL support.\n");
7652             exit(EX_USAGE);
7653 #endif
7654             settings.require_sasl = true;
7655             break;
7656         case 'X' :
7657             {
7658                 char *ptr = strchr(optarg, ',');
7659                 if (ptr != NULL) {
7660                     *ptr = '\0';
7661                     ++ptr;
7662                 }
7663                 if (!load_extension(optarg, ptr)) {
7664                     exit(EXIT_FAILURE);
7665                 }
7666                 if (ptr != NULL) {
7667                     *(ptr - 1) = ',';
7668                 }
7669             }
7670             break;
7671         default:
7672             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7673                     "Illegal argument \"%c\"\n", c);
7674             return 1;
7675         }
7676     }
7677 #endif /* INNODB_MEMCACHED */
7678 
7679     if (getenv("MEMCACHED_REQS_TAP_EVENT") != NULL) {
7680         settings.reqs_per_tap_event = atoi(getenv("MEMCACHED_REQS_TAP_EVENT"));
7681     }
7682 
7683     if (settings.reqs_per_tap_event <= 0) {
7684         settings.reqs_per_tap_event = DEFAULT_REQS_PER_TAP_EVENT;
7685     }
7686 
7687 
7688     if (install_sigterm_handler() != 0) {
7689         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7690                                         "Failed to install SIGTERM handler\n");
7691         exit(EXIT_FAILURE);
7692     }
7693 
7694     char *topkeys_env = getenv("MEMCACHED_TOP_KEYS");
7695     if (topkeys_env != NULL) {
7696         settings.topkeys = atoi(topkeys_env);
7697         if (settings.topkeys < 0) {
7698             settings.topkeys = 0;
7699         }
7700     }
7701 
7702     if (settings.require_sasl) {
7703         if (!protocol_specified) {
7704             settings.binding_protocol = binary_prot;
7705         } else {
7706             if (settings.binding_protocol == negotiating_prot) {
7707                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7708                         "ERROR: You cannot use auto-negotiating protocol while requiring SASL.\n");
7709                 exit(EX_USAGE);
7710             }
7711             if (settings.binding_protocol == ascii_prot) {
7712                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7713                         "ERROR: You cannot use only ASCII protocol while requiring SASL.\n");
7714                 exit(EX_USAGE);
7715             }
7716         }
7717     }
7718 
7719     if (tcp_specified && !udp_specified) {
7720         settings.udpport = settings.port;
7721     } else if (udp_specified && !tcp_specified) {
7722         settings.port = settings.udpport;
7723     }
7724 
7725     /*
7726     if (engine_config != NULL && strlen(old_options) > 0) {
7727         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7728                 "ERROR: You can't mix -e with the old options\n");
7729         return (NULL);
7730     } else if (engine_config == NULL && strlen(old_options) > 0) {
7731         engine_config = old_options;
7732     } */
7733 
7734     if (maxcore != 0) {
7735         struct rlimit rlim_new;
7736         /*
7737          * First try raising to infinity; if that fails, try bringing
7738          * the soft limit to the hard.
7739          */
7740         if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
7741             rlim_new.rlim_cur = rlim_new.rlim_max = RLIM_INFINITY;
7742             if (setrlimit(RLIMIT_CORE, &rlim_new)!= 0) {
7743                 /* failed. try raising just to the old max */
7744                 rlim_new.rlim_cur = rlim_new.rlim_max = rlim.rlim_max;
7745                 (void)setrlimit(RLIMIT_CORE, &rlim_new);
7746             }
7747         }
7748         /*
7749          * getrlimit again to see what we ended up with. Only fail if
7750          * the soft limit ends up 0, because then no core files will be
7751          * created at all.
7752          */
7753 
7754         if ((getrlimit(RLIMIT_CORE, &rlim) != 0) || rlim.rlim_cur == 0) {
7755             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7756                     "failed to ensure corefile creation\n");
7757             exit(EX_OSERR);
7758         }
7759     }
7760 
7761     /*
7762      * If needed, increase rlimits to allow as many connections
7763      * as needed.
7764      */
7765 
7766     if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7767         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7768                 "failed to getrlimit number of files\n");
7769         exit(EX_OSERR);
7770     } else {
7771         int maxfiles = settings.maxconns;
7772         if (rlim.rlim_cur < maxfiles)
7773             rlim.rlim_cur = maxfiles;
7774         if (rlim.rlim_max < rlim.rlim_cur)
7775             rlim.rlim_max = rlim.rlim_cur;
7776         if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
7777             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7778                     "failed to set rlimit for open files. Try running as"
7779                     " root or requesting smaller maxconns value.\n");
7780             exit(EX_OSERR);
7781         }
7782     }
7783 
7784     /* Sanity check for the connection structures */
7785     int nfiles = 0;
7786     if (settings.port != 0) {
7787         nfiles += 2;
7788     }
7789     if (settings.udpport != 0) {
7790         nfiles += settings.num_threads * 2;
7791     }
7792 
7793     if (settings.maxconns <= nfiles) {
7794         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7795                 "Configuratioin error. \n"
7796                 "You specified %d connections, but the system will use at "
7797                 "least %d\nconnection structures to start.\n",
7798                 settings.maxconns, nfiles);
7799         exit(EX_USAGE);
7800     }
7801 
7802     /* lose root privileges if we have them */
7803     if (getuid() == 0 || geteuid() == 0) {
7804         if (username == 0 || *username == '\0') {
7805             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7806                     "can't run as root without the -u switch\n");
7807             exit(EX_USAGE);
7808         }
7809         if ((pw = getpwnam(username)) == 0) {
7810             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7811                     "can't find the user %s to switch to\n", username);
7812             exit(EX_NOUSER);
7813         }
7814         if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) {
7815             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7816                     "failed to assume identity of user %s: %s\n", username,
7817                     strerror(errno));
7818             exit(EX_OSERR);
7819         }
7820     }
7821 
7822 #ifdef SASL_ENABLED
7823     init_sasl();
7824 #endif /* SASL */
7825 
7826     /* daemonize if requested */
7827     /* if we want to ensure our ability to dump core, don't chdir to / */
7828     if (do_daemonize) {
7829         if (sigignore(SIGHUP) == -1) {
7830             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7831                     "Failed to ignore SIGHUP: ", strerror(errno));
7832         }
7833         if (daemonize(maxcore, settings.verbose) == -1) {
7834              settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7835                     "failed to daemon() in order to daemonize\n");
7836             exit(EXIT_FAILURE);
7837         }
7838     }
7839 
7840     /* lock paged memory if needed */
7841     if (lock_memory) {
7842 #ifdef HAVE_MLOCKALL
7843         int res = mlockall(MCL_CURRENT | MCL_FUTURE);
7844         if (res != 0) {
7845             settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7846                     "warning: -k invalid, mlockall() failed: %s\n",
7847                     strerror(errno));
7848         }
7849 #else
7850         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7851                 "warning: -k invalid, mlockall() not supported on this platform.  proceeding without.\n");
7852 #endif
7853     }
7854 
7855     /* initialize main thread libevent instance */
7856     main_base = event_init();
7857 
7858     /* Load the storage engine */
7859     ENGINE_HANDLE *engine_handle = NULL;
7860     if (!load_engine(engine,get_server_api,settings.extensions.logger,&engine_handle)) {
7861         /* Error already reported */
7862 #ifdef INNODB_MEMCACHED
7863         shutdown_server();
7864         goto func_exit;
7865 #else
7866         exit(EXIT_FAILURE);
7867 #endif
7868     }
7869 
7870 #ifdef INNODB_MEMCACHED
7871     my_thread_init();
7872 #endif
7873 
7874     if(!init_engine(engine_handle,engine_config,settings.extensions.logger)) {
7875 #ifdef INNODB_MEMCACHED
7876 	my_thread_end();
7877         shutdown_server();
7878         goto func_exit;
7879 #else
7880 	return(false);
7881 #endif /* INNODB_MEMCACHED */
7882     }
7883 
7884     if(settings.verbose > 0) {
7885         log_engine_details(engine_handle,settings.extensions.logger);
7886     }
7887     settings.engine.v1 = (ENGINE_HANDLE_V1 *) engine_handle;
7888 
7889     if (settings.engine.v1->arithmetic == NULL) {
7890         settings.engine.v1->arithmetic = internal_arithmetic;
7891     }
7892 
7893     /* initialize other stuff */
7894     stats_init();
7895 
7896     if (!(conn_cache = cache_create("conn", sizeof(conn), sizeof(void*),
7897                                     conn_constructor, conn_destructor))) {
7898         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7899                 "Failed to create connection cache\n");
7900         exit(EXIT_FAILURE);
7901     }
7902 
7903     default_independent_stats = new_independent_stats();
7904 
7905 #ifdef INNODB_MEMCACHED
7906     if (!default_independent_stats) {
7907 	exit(EXIT_FAILURE);
7908     }
7909 #endif
7910 
7911 #ifndef __WIN32__
7912     /*
7913      * ignore SIGPIPE signals; we can use errno == EPIPE if we
7914      * need that information
7915      */
7916     if (sigignore(SIGPIPE) == -1) {
7917         settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7918                 "failed to ignore SIGPIPE; sigaction");
7919         exit(EX_OSERR);
7920     }
7921 #endif
7922 
7923     /* start up worker threads if MT mode */
7924     thread_init(settings.num_threads, main_base, dispatch_event_handler);
7925 
7926     /* initialise clock event */
7927     clock_handler(0, 0, 0);
7928 
7929     /* create unix mode sockets after dropping privileges */
7930     if (settings.socketpath != NULL) {
7931         if (server_socket_unix(settings.socketpath,settings.access)) {
7932             vperror("failed to listen on UNIX socket: %s", settings.socketpath);
7933             exit(EX_OSERR);
7934         }
7935     }
7936 
7937     /* create the listening socket, bind it, and init */
7938     if (settings.socketpath == NULL) {
7939         int udp_port;
7940 
7941         const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME");
7942         char temp_portnumber_filename[PATH_MAX];
7943         FILE *portnumber_file = NULL;
7944 
7945         if (portnumber_filename != NULL) {
7946             snprintf(temp_portnumber_filename,
7947                      sizeof(temp_portnumber_filename),
7948                      "%s.lck", portnumber_filename);
7949 
7950             portnumber_file = fopen(temp_portnumber_filename, "a");
7951             if (portnumber_file == NULL) {
7952                 settings.extensions.logger->log(EXTENSION_LOG_WARNING, NULL,
7953                         "Failed to open \"%s\": %s\n",
7954                         temp_portnumber_filename, strerror(errno));
7955             }
7956         }
7957 
7958         if (settings.port && server_sockets(settings.port, tcp_transport,
7959                                             portnumber_file)) {
7960 		vperror("failed to listen on TCP port %d", settings.port);
7961 #ifdef INNODB_MEMCACHED
7962 		my_thread_end();
7963 		shutdown_server();
7964 		goto func_exit;
7965 #else
7966 		exit(EX_OSERR);
7967 #endif /* INNODB_MEMCACHED */
7968         }
7969 
7970         /*
7971          * initialization order: first create the listening sockets
7972          * (may need root on low ports), then drop root if needed,
7973          * then daemonise if needed, then init libevent (in some cases
7974          * descriptors created by libevent wouldn't survive forking).
7975          */
7976         udp_port = settings.udpport ? settings.udpport : settings.port;
7977 
7978         /* create the UDP listening socket and bind it */
7979         if (settings.udpport && server_sockets(settings.udpport, udp_transport,
7980                                                portnumber_file)) {
7981             vperror("failed to listen on UDP port %d", settings.udpport);
7982             exit(EX_OSERR);
7983         }
7984 
7985         if (portnumber_file) {
7986             fclose(portnumber_file);
7987             rename(temp_portnumber_filename, portnumber_filename);
7988         }
7989     }
7990 
7991     if (pid_file != NULL) {
7992         save_pid(pid_file);
7993     }
7994 
7995     /* Drop privileges no longer needed */
7996     drop_privileges();
7997 
7998     memcached_initialized = 1;
7999 
8000     /* enter the event loop */
8001     event_base_loop(main_base, 0);
8002 
8003     if (settings.verbose) {
8004         settings.extensions.logger->log(EXTENSION_LOG_INFO, NULL,
8005                                         "Initiating shutdown\n");
8006     }
8007 
8008 func_exit:
8009 
8010     if (settings.engine.v1)
8011       settings.engine.v1->destroy(settings.engine.v0, false);
8012 
8013     threads_shutdown();
8014 
8015     /* remove the PID file if we're a daemon */
8016     if (do_daemonize)
8017         remove_pidfile(pid_file);
8018     /* Clean up strdup() call for bind() address */
8019     if (settings.inter)
8020       free(settings.inter);
8021 
8022 #ifdef INNODB_MEMCACHED
8023     /* free event base */
8024     if (main_base) {
8025         event_base_free(main_base);
8026         main_base = NULL;
8027     }
8028     my_thread_end();
8029 #endif
8030 
8031     memcached_shutdown = 2;
8032     memcached_initialized = 2;
8033 
8034     return EXIT_SUCCESS;
8035 }
8036