1 /*
2  * twemproxy - A fast and lightweight proxy for memcached protocol.
3  * Copyright (C) 2011 Twitter, Inc.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdarg.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <fcntl.h>
24 #include <netdb.h>
25 
26 #include <sys/time.h>
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/ioctl.h>
30 
31 #include <netinet/in.h>
32 #include <netinet/tcp.h>
33 
34 #include <nc_core.h>
35 
36 #ifdef NC_HAVE_BACKTRACE
37 # include <execinfo.h>
38 #endif
39 
40 int
nc_set_blocking(int sd)41 nc_set_blocking(int sd)
42 {
43     int flags;
44 
45     flags = fcntl(sd, F_GETFL, 0);
46     if (flags < 0) {
47         return flags;
48     }
49 
50     return fcntl(sd, F_SETFL, flags & ~O_NONBLOCK);
51 }
52 
53 int
nc_set_nonblocking(int sd)54 nc_set_nonblocking(int sd)
55 {
56     int flags;
57 
58     flags = fcntl(sd, F_GETFL, 0);
59     if (flags < 0) {
60         return flags;
61     }
62 
63     return fcntl(sd, F_SETFL, flags | O_NONBLOCK);
64 }
65 
66 int
nc_set_reuseaddr(int sd)67 nc_set_reuseaddr(int sd)
68 {
69     int reuse;
70     socklen_t len;
71 
72     reuse = 1;
73     len = sizeof(reuse);
74 
75     return setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &reuse, len);
76 }
77 
78 /*
79  * Disable Nagle algorithm on TCP socket.
80  *
81  * This option helps to minimize transmit latency by disabling coalescing
82  * of data to fill up a TCP segment inside the kernel. Sockets with this
83  * option must use readv() or writev() to do data transfer in bulk and
84  * hence avoid the overhead of small packets.
85  */
86 int
nc_set_tcpnodelay(int sd)87 nc_set_tcpnodelay(int sd)
88 {
89     int nodelay;
90     socklen_t len;
91 
92     nodelay = 1;
93     len = sizeof(nodelay);
94 
95     return setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, &nodelay, len);
96 }
97 
98 int
nc_set_linger(int sd,int timeout)99 nc_set_linger(int sd, int timeout)
100 {
101     struct linger linger;
102     socklen_t len;
103 
104     linger.l_onoff = 1;
105     linger.l_linger = timeout;
106 
107     len = sizeof(linger);
108 
109     return setsockopt(sd, SOL_SOCKET, SO_LINGER, &linger, len);
110 }
111 
112 int
nc_set_tcpkeepalive(int sd)113 nc_set_tcpkeepalive(int sd)
114 {
115     int val = 1;
116     return setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val));
117 }
118 
119 int
nc_set_sndbuf(int sd,int size)120 nc_set_sndbuf(int sd, int size)
121 {
122     socklen_t len;
123 
124     len = sizeof(size);
125 
126     return setsockopt(sd, SOL_SOCKET, SO_SNDBUF, &size, len);
127 }
128 
129 int
nc_set_rcvbuf(int sd,int size)130 nc_set_rcvbuf(int sd, int size)
131 {
132     socklen_t len;
133 
134     len = sizeof(size);
135 
136     return setsockopt(sd, SOL_SOCKET, SO_RCVBUF, &size, len);
137 }
138 
139 int
nc_get_soerror(int sd)140 nc_get_soerror(int sd)
141 {
142     int status, err;
143     socklen_t len;
144 
145     err = 0;
146     len = sizeof(err);
147 
148     status = getsockopt(sd, SOL_SOCKET, SO_ERROR, &err, &len);
149     if (status == 0) {
150         errno = err;
151     }
152 
153     return status;
154 }
155 
156 int
nc_get_sndbuf(int sd)157 nc_get_sndbuf(int sd)
158 {
159     int status, size;
160     socklen_t len;
161 
162     size = 0;
163     len = sizeof(size);
164 
165     status = getsockopt(sd, SOL_SOCKET, SO_SNDBUF, &size, &len);
166     if (status < 0) {
167         return status;
168     }
169 
170     return size;
171 }
172 
173 int
nc_get_rcvbuf(int sd)174 nc_get_rcvbuf(int sd)
175 {
176     int status, size;
177     socklen_t len;
178 
179     size = 0;
180     len = sizeof(size);
181 
182     status = getsockopt(sd, SOL_SOCKET, SO_RCVBUF, &size, &len);
183     if (status < 0) {
184         return status;
185     }
186 
187     return size;
188 }
189 
190 int
_nc_atoi(const uint8_t * line,size_t n)191 _nc_atoi(const uint8_t *line, size_t n)
192 {
193     int value;
194 
195     if (n == 0) {
196         return -1;
197     }
198 
199     for (value = 0; n--; line++) {
200         if (*line < '0' || *line > '9') {
201             return -1;
202         }
203 
204         value = value * 10 + (*line - '0');
205     }
206 
207     if (value < 0) {
208         return -1;
209     }
210 
211     return value;
212 }
213 
214 bool
nc_valid_port(int n)215 nc_valid_port(int n)
216 {
217     if (n < 1 || n > UINT16_MAX) {
218         return false;
219     }
220 
221     return true;
222 }
223 
224 void *
_nc_alloc(size_t size,const char * name,int line)225 _nc_alloc(size_t size, const char *name, int line)
226 {
227     void *p;
228 
229     ASSERT(size != 0);
230 
231     p = malloc(size);
232     if (p == NULL) {
233         log_error("malloc(%zu) failed @ %s:%d", size, name, line);
234     } else {
235         log_debug(LOG_VVERB, "malloc(%zu) at %p @ %s:%d", size, p, name, line);
236     }
237 
238     return p;
239 }
240 
241 void *
_nc_zalloc(size_t size,const char * name,int line)242 _nc_zalloc(size_t size, const char *name, int line)
243 {
244     void *p;
245 
246     p = _nc_alloc(size, name, line);
247     if (p != NULL) {
248         memset(p, 0, size);
249     }
250 
251     return p;
252 }
253 
254 void *
_nc_calloc(size_t nmemb,size_t size,const char * name,int line)255 _nc_calloc(size_t nmemb, size_t size, const char *name, int line)
256 {
257     return _nc_zalloc(nmemb * size, name, line);
258 }
259 
260 void *
_nc_realloc(void * ptr,size_t size,const char * name,int line)261 _nc_realloc(void *ptr, size_t size, const char *name, int line)
262 {
263     void *p;
264 
265     ASSERT(size != 0);
266 
267     p = realloc(ptr, size);
268     if (p == NULL) {
269         log_error("realloc(%zu) failed @ %s:%d", size, name, line);
270     } else {
271         log_debug(LOG_VVERB, "realloc(%zu) at %p @ %s:%d", size, p, name, line);
272     }
273 
274     return p;
275 }
276 
277 void
_nc_free(void * ptr,const char * name,int line)278 _nc_free(void *ptr, const char *name, int line)
279 {
280     ASSERT(ptr != NULL);
281     log_debug(LOG_VVERB, "free(%p) @ %s:%d", ptr, name, line);
282     free(ptr);
283 }
284 
285 void
nc_stacktrace(int skip_count)286 nc_stacktrace(int skip_count)
287 {
288 #ifdef NC_HAVE_BACKTRACE
289     void *stack[64];
290     char **symbols;
291     int size, i, j;
292 
293     size = backtrace(stack, 64);
294     symbols = backtrace_symbols(stack, size);
295     if (symbols == NULL) {
296         return;
297     }
298 
299     skip_count++; /* skip the current frame also */
300 
301     for (i = skip_count, j = 0; i < size; i++, j++) {
302         loga("[%d] %s", j, symbols[i]);
303     }
304 
305     free(symbols);
306 #endif
307 }
308 
309 void
nc_stacktrace_fd(int fd)310 nc_stacktrace_fd(int fd)
311 {
312 #ifdef NC_HAVE_BACKTRACE
313     void *stack[64];
314     int size;
315 
316     size = backtrace(stack, 64);
317     backtrace_symbols_fd(stack, size, fd);
318 #endif
319 }
320 
321 void
nc_assert(const char * cond,const char * file,int line,int panic)322 nc_assert(const char *cond, const char *file, int line, int panic)
323 {
324     log_error("assert '%s' failed @ (%s, %d)", cond, file, line);
325     if (panic) {
326         nc_stacktrace(1);
327         abort();
328     }
329 }
330 
331 int
_vscnprintf(char * buf,size_t size,const char * fmt,va_list args)332 _vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
333 {
334     int n;
335 
336     n = vsnprintf(buf, size, fmt, args);
337 
338     /*
339      * The return value is the number of characters which would be written
340      * into buf not including the trailing '\0'. If size is == 0 the
341      * function returns 0.
342      *
343      * On error, the function also returns 0. This is to allow idiom such
344      * as len += _vscnprintf(...)
345      *
346      * See: http://lwn.net/Articles/69419/
347      */
348     if (n <= 0) {
349         return 0;
350     }
351 
352     if (n < (int) size) {
353         return n;
354     }
355 
356     return (int)(size - 1);
357 }
358 
359 int
_scnprintf(char * buf,size_t size,const char * fmt,...)360 _scnprintf(char *buf, size_t size, const char *fmt, ...)
361 {
362     va_list args;
363     int n;
364 
365     va_start(args, fmt);
366     n = _vscnprintf(buf, size, fmt, args);
367     va_end(args);
368 
369     return n;
370 }
371 
372 /*
373  * Send n bytes on a blocking descriptor
374  */
375 ssize_t
_nc_sendn(int sd,const void * vptr,size_t n)376 _nc_sendn(int sd, const void *vptr, size_t n)
377 {
378     size_t nleft;
379     ssize_t	nsend;
380     const char *ptr;
381 
382     ptr = vptr;
383     nleft = n;
384     while (nleft > 0) {
385         nsend = send(sd, ptr, nleft, 0);
386         if (nsend < 0) {
387             if (errno == EINTR) {
388                 continue;
389             }
390             return nsend;
391         }
392         if (nsend == 0) {
393             return -1;
394         }
395 
396         nleft -= (size_t)nsend;
397         ptr += nsend;
398     }
399 
400     return (ssize_t)n;
401 }
402 
403 /*
404  * Recv n bytes from a blocking descriptor
405  */
406 ssize_t
_nc_recvn(int sd,void * vptr,size_t n)407 _nc_recvn(int sd, void *vptr, size_t n)
408 {
409 	size_t nleft;
410 	ssize_t	nrecv;
411 	char *ptr;
412 
413 	ptr = vptr;
414 	nleft = n;
415 	while (nleft > 0) {
416         nrecv = recv(sd, ptr, nleft, 0);
417         if (nrecv < 0) {
418             if (errno == EINTR) {
419                 continue;
420             }
421             return nrecv;
422         }
423         if (nrecv == 0) {
424             break;
425         }
426 
427         nleft -= (size_t)nrecv;
428         ptr += nrecv;
429     }
430 
431     return (ssize_t)(n - nleft);
432 }
433 
434 /*
435  * Return the current time in microseconds since Epoch
436  */
437 int64_t
nc_usec_now(void)438 nc_usec_now(void)
439 {
440     struct timeval now;
441     int64_t usec;
442     int status;
443 
444     status = gettimeofday(&now, NULL);
445     if (status < 0) {
446         log_error("gettimeofday failed: %s", strerror(errno));
447         return -1;
448     }
449 
450     usec = (int64_t)now.tv_sec * 1000000LL + (int64_t)now.tv_usec;
451 
452     return usec;
453 }
454 
455 /*
456  * Return the current time in milliseconds since Epoch
457  */
458 int64_t
nc_msec_now(void)459 nc_msec_now(void)
460 {
461     return nc_usec_now() / 1000LL;
462 }
463 
464 static int
nc_resolve_inet(const struct string * name,int port,struct sockinfo * si)465 nc_resolve_inet(const struct string *name, int port, struct sockinfo *si)
466 {
467     int status;
468     struct addrinfo *ai, *cai; /* head and current addrinfo */
469     struct addrinfo hints;
470     char *node, service[NC_UINTMAX_MAXLEN];
471     bool found;
472 
473     ASSERT(nc_valid_port(port));
474 
475     memset(&hints, 0, sizeof(hints));
476     hints.ai_flags = AI_NUMERICSERV;
477     hints.ai_family = AF_UNSPEC;     /* AF_INET or AF_INET6 */
478     hints.ai_socktype = SOCK_STREAM;
479     hints.ai_protocol = 0;
480     hints.ai_addrlen = 0;
481     hints.ai_addr = NULL;
482     hints.ai_canonname = NULL;
483 
484     if (name != NULL) {
485         node = (char *)name->data;
486     } else {
487         /*
488          * If AI_PASSIVE flag is specified in hints.ai_flags, and node is
489          * NULL, then the returned socket addresses will be suitable for
490          * bind(2)ing a socket that will accept(2) connections. The returned
491          * socket address will contain the wildcard IP address.
492          */
493         node = NULL;
494         hints.ai_flags |= AI_PASSIVE;
495     }
496 
497     nc_snprintf(service, NC_UINTMAX_MAXLEN, "%d", port);
498 
499     /*
500      * getaddrinfo() returns zero on success or one of the error codes listed
501      * in gai_strerror(3) if an error occurs
502      */
503     status = getaddrinfo(node, service, &hints, &ai);
504     if (status != 0) {
505         log_error("address resolution of node '%s' service '%s' failed: %s",
506                   node, service, gai_strerror(status));
507         return -1;
508     }
509 
510     /*
511      * getaddrinfo() can return a linked list of more than one addrinfo,
512      * since we requested for both AF_INET and AF_INET6 addresses and the
513      * host itself can be multi-homed. Since we don't care whether we are
514      * using ipv4 or ipv6, we just use the first address from this collection
515      * in the order in which it was returned.
516      *
517      * The sorting function used within getaddrinfo() is defined in RFC 3484;
518      * the order can be tweaked for a particular system by editing
519      * /etc/gai.conf
520      */
521     for (cai = ai, found = false; cai != NULL; cai = cai->ai_next) {
522         si->family = cai->ai_family;
523         si->addrlen = cai->ai_addrlen;
524         nc_memcpy(&si->addr, cai->ai_addr, si->addrlen);
525         found = true;
526         break;
527     }
528 
529     freeaddrinfo(ai);
530 
531     return !found ? -1 : 0;
532 }
533 
534 static int
nc_resolve_unix(const struct string * name,struct sockinfo * si)535 nc_resolve_unix(const struct string *name, struct sockinfo *si)
536 {
537     struct sockaddr_un *un;
538 
539     if (name->len >= NC_UNIX_ADDRSTRLEN) {
540         return -1;
541     }
542 
543     un = &si->addr.un;
544 
545     un->sun_family = AF_UNIX;
546     nc_memcpy(un->sun_path, name->data, name->len);
547     un->sun_path[name->len] = '\0';
548 
549     si->family = AF_UNIX;
550     si->addrlen = sizeof(*un);
551     /* si->addr is an alias of un */
552 
553     return 0;
554 }
555 
556 /*
557  * Resolve a hostname and service by translating it to socket address and
558  * return it in si
559  *
560  * This routine is reentrant
561  */
562 int
nc_resolve(const struct string * name,int port,struct sockinfo * si)563 nc_resolve(const struct string *name, int port, struct sockinfo *si)
564 {
565     if (name != NULL && name->data[0] == '/') {
566         return nc_resolve_unix(name, si);
567     }
568 
569     return nc_resolve_inet(name, port, si);
570 }
571 
572 /*
573  * Unresolve the socket address by translating it to a character string
574  * describing the host and service
575  *
576  * This routine is not reentrant
577  */
578 const char *
nc_unresolve_addr(struct sockaddr * addr,socklen_t addrlen)579 nc_unresolve_addr(struct sockaddr *addr, socklen_t addrlen)
580 {
581     static char unresolve[NI_MAXHOST + NI_MAXSERV];
582     static char host[NI_MAXHOST], service[NI_MAXSERV];
583     int status;
584 
585     status = getnameinfo(addr, addrlen, host, sizeof(host),
586                          service, sizeof(service),
587                          NI_NUMERICHOST | NI_NUMERICSERV);
588     if (status < 0) {
589         return "unknown";
590     }
591 
592     nc_snprintf(unresolve, sizeof(unresolve), "%s:%s", host, service);
593 
594     return unresolve;
595 }
596 
597 /*
598  * Unresolve the socket descriptor peer address by translating it to a
599  * character string describing the host and service
600  *
601  * This routine is not reentrant
602  */
603 const char *
nc_unresolve_peer_desc(int sd)604 nc_unresolve_peer_desc(int sd)
605 {
606     static struct sockinfo si;
607     struct sockaddr *addr;
608     socklen_t addrlen;
609     int status;
610 
611     memset(&si, 0, sizeof(si));
612     addr = (struct sockaddr *)&si.addr;
613     addrlen = sizeof(si.addr);
614 
615     status = getpeername(sd, addr, &addrlen);
616     if (status < 0) {
617         return "unknown";
618     }
619 
620     return nc_unresolve_addr(addr, addrlen);
621 }
622 
623 /*
624  * Unresolve the socket descriptor address by translating it to a
625  * character string describing the host and service
626  *
627  * This routine is not reentrant
628  */
629 const char *
nc_unresolve_desc(int sd)630 nc_unresolve_desc(int sd)
631 {
632     static struct sockinfo si;
633     struct sockaddr *addr;
634     socklen_t addrlen;
635     int status;
636 
637     memset(&si, 0, sizeof(si));
638     addr = (struct sockaddr *)&si.addr;
639     addrlen = sizeof(si.addr);
640 
641     status = getsockname(sd, addr, &addrlen);
642     if (status < 0) {
643         return "unknown";
644     }
645 
646     return nc_unresolve_addr(addr, addrlen);
647 }
648