1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 1997-2019. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 /* If we HAVE_SCTP_H and Solaris, we need to define the following in
26    order to get SCTP working:
27 */
28 #if (defined(HAVE_SCTP_H) && defined(__sun) && defined(__SVR4))
29 #define  SOLARIS10    1
30 /* WARNING: This is not quite correct, it may also be Solaris 11! */
31 #define  _XPG4_2
32 #define  __EXTENSIONS__
33 #endif
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stddef.h>
38 #include <ctype.h>
39 #include <sys/types.h>
40 #include <errno.h>
41 #include <stdint.h>
42 
43 #define IDENTITY(c) c
44 #define STRINGIFY_1(b) IDENTITY(#b)
45 #define STRINGIFY(a) STRINGIFY_1(a)
46 
47 #ifdef HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
50 #ifdef HAVE_SYS_UIO_H
51 #include <sys/uio.h>
52 #endif
53 
54 #ifdef HAVE_NET_IF_DL_H
55 #include <net/if_dl.h>
56 #endif
57 #ifdef HAVE_IFADDRS_H
58 #include <ifaddrs.h>
59 #endif
60 #ifdef HAVE_NETPACKET_PACKET_H
61 #include <netpacket/packet.h>
62 #endif
63 #ifdef HAVE_SYS_UN_H
64 #include <sys/un.h>
65 #endif
66 
67 #ifdef HAVE_SENDFILE
68 #if defined(__linux__) || (defined(__sun) && defined(__SVR4))
69     #include <sys/sendfile.h>
70 #elif defined(__FreeBSD__) || defined(__DragonFly__)
71     /* Need to define __BSD_VISIBLE in order to expose prototype of sendfile */
72     #define __BSD_VISIBLE 1
73     #include <sys/socket.h>
74 #endif
75 #endif
76 
77 #if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
78     #define __DARWIN__ 1
79 #endif
80 
81 /* All platforms fail on malloc errors. */
82 #define FATAL_MALLOC
83 
84 
85 #include "erl_driver.h"
86 
87 /* The IS_SOCKET_ERROR macro below is used for portability reasons. While
88    POSIX specifies that errors from socket-related system calls should be
89    indicated with a -1 return value, some users have experienced non-Windows
90    OS kernels that return negative values other than -1. While one can argue
91    that such kernels are technically broken, comparing against values less
92    than 0 covers their out-of-spec return values without imposing incorrect
93    semantics on systems that manage to correctly return -1 for errors, thus
94    increasing Erlang's portability.
95 */
96 #ifdef __WIN32__
97 #define IS_SOCKET_ERROR(val) ((val) == SOCKET_ERROR)
98 #else
99 #define IS_SOCKET_ERROR(val) ((val) < 0)
100 #endif
101 
102 #ifdef __WIN32__
103 #define LLU "%I64u"
104 #else
105 #define LLU "%llu"
106 #endif
107 typedef unsigned long long llu_t;
108 
109 #ifndef INT16_MIN
110 #define INT16_MIN (-32768)
111 #endif
112 #ifndef INT16_MAX
113 #define INT16_MAX (32767)
114 #endif
115 
116 #ifdef __WIN32__
117 #define  STRNCASECMP strncasecmp
118 
119 #define INCL_WINSOCK_API_TYPEDEFS 1
120 
121 #ifndef WINDOWS_H_INCLUDES_WINSOCK2_H
122 #include <winsock2.h>
123 #endif
124 #include <windows.h>
125 #include <Ws2tcpip.h>   /* NEED VC 6.0 or higher */
126 
127 /* Visual studio 2008+: NTDDI_VERSION needs to be set for iphlpapi.h
128    to define the right structures. It needs to be set to WINXP (or LONGHORN)
129    for IPV6 to work and it's set lower by default, so we need to change it. */
130 #ifdef HAVE_SDKDDKVER_H
131 #  include <sdkddkver.h>
132 #  ifdef NTDDI_VERSION
133 #    undef NTDDI_VERSION
134 #  endif
135 #  define NTDDI_VERSION NTDDI_WINXP
136 #endif
137 
138 #include <iphlpapi.h>
139 
140 
141 #undef WANT_NONBLOCKING
142 #include "sys.h"
143 
144 #undef EWOULDBLOCK
145 #undef ETIMEDOUT
146 
147 #ifdef EINPROGRESS
148 #undef EINPROGRESS
149 #endif
150 #ifdef EALREADY
151 #undef EALREADY
152 #endif
153 #ifdef ENOTSOCK
154 #undef ENOTSOCK
155 #endif
156 #ifdef EDESTADDRREQ
157 #undef EDESTADDRREQ
158 #endif
159 #ifdef EMSGSIZE
160 #undef EMSGSIZE
161 #endif
162 #ifdef EPROTOTYPE
163 #undef EPROTOTYPE
164 #endif
165 #ifdef ENOPROTOOPT
166 #undef ENOPROTOOPT
167 #endif
168 #ifdef EPROTONOSUPPORT
169 #undef EPROTONOSUPPORT
170 #endif
171 #ifdef EOPNOTSUPP
172 #undef EOPNOTSUPP
173 #endif
174 #ifdef EAFNOSUPPORT
175 #undef EAFNOSUPPORT
176 #endif
177 #ifdef EADDRINUSE
178 #undef EADDRINUSE
179 #endif
180 #ifdef EADDRNOTAVAIL
181 #undef EADDRNOTAVAIL
182 #endif
183 #ifdef ENETDOWN
184 #undef ENETDOWN
185 #endif
186 #ifdef ENETUNREACH
187 #undef ENETUNREACH
188 #endif
189 #ifdef ENETRESET
190 #undef ENETRESET
191 #endif
192 #ifdef ECONNABORTED
193 #undef ECONNABORTED
194 #endif
195 #ifdef ECONNRESET
196 #undef ECONNRESET
197 #endif
198 #ifdef ENOBUFS
199 #undef ENOBUFS
200 #endif
201 #ifdef EISCONN
202 #undef EISCONN
203 #endif
204 #ifdef ENOTCONN
205 #undef ENOTCONN
206 #endif
207 #ifdef ECONNREFUSED
208 #undef ECONNREFUSED
209 #endif
210 #ifdef ELOOP
211 #undef ELOOP
212 #endif
213 #ifdef EHOSTUNREACH
214 #undef EHOSTUNREACH
215 #endif
216 
217 
218 #define HAVE_MULTICAST_SUPPORT
219 #define HAVE_UDP
220 
221 #define ERRNO_BLOCK             WSAEWOULDBLOCK
222 
223 #define EWOULDBLOCK             WSAEWOULDBLOCK
224 #define EINPROGRESS             WSAEINPROGRESS
225 #define EALREADY                WSAEALREADY
226 #define ENOTSOCK                WSAENOTSOCK
227 #define EDESTADDRREQ            WSAEDESTADDRREQ
228 #define EMSGSIZE                WSAEMSGSIZE
229 #define EPROTOTYPE              WSAEPROTOTYPE
230 #define ENOPROTOOPT             WSAENOPROTOOPT
231 #define EPROTONOSUPPORT         WSAEPROTONOSUPPORT
232 #define ESOCKTNOSUPPORT         WSAESOCKTNOSUPPORT
233 #define EOPNOTSUPP              WSAEOPNOTSUPP
234 #define EPFNOSUPPORT            WSAEPFNOSUPPORT
235 #define EAFNOSUPPORT            WSAEAFNOSUPPORT
236 #define EADDRINUSE              WSAEADDRINUSE
237 #define EADDRNOTAVAIL           WSAEADDRNOTAVAIL
238 #define ENETDOWN                WSAENETDOWN
239 #define ENETUNREACH             WSAENETUNREACH
240 #define ENETRESET               WSAENETRESET
241 #define ECONNABORTED            WSAECONNABORTED
242 #define ECONNRESET              WSAECONNRESET
243 #define ENOBUFS                 WSAENOBUFS
244 #define EISCONN                 WSAEISCONN
245 #define ENOTCONN                WSAENOTCONN
246 #define ESHUTDOWN               WSAESHUTDOWN
247 #define ETOOMANYREFS            WSAETOOMANYREFS
248 #define ETIMEDOUT               WSAETIMEDOUT
249 #define ECONNREFUSED            WSAECONNREFUSED
250 #define ELOOP                   WSAELOOP
251 #undef ENAMETOOLONG
252 #define ENAMETOOLONG            WSAENAMETOOLONG
253 #define EHOSTDOWN               WSAEHOSTDOWN
254 #define EHOSTUNREACH            WSAEHOSTUNREACH
255 #undef ENOTEMPTY
256 #define ENOTEMPTY               WSAENOTEMPTY
257 #define EPROCLIM                WSAEPROCLIM
258 #define EUSERS                  WSAEUSERS
259 #define EDQUOT                  WSAEDQUOT
260 #define ESTALE                  WSAESTALE
261 #define EREMOTE                 WSAEREMOTE
262 
263 #define INVALID_EVENT           WSA_INVALID_EVENT
264 
265 static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD);
266 
267 #define sock_open(af, type, proto) \
268     make_noninheritable_handle(socket((af), (type), (proto)))
269 #define sock_close(s)              closesocket((s))
270 #define sock_shutdown(s, how)      shutdown((s), (how))
271 
272 #define sock_accept(s, addr, len) \
273     make_noninheritable_handle(accept((s), (addr), (len)))
274 #define sock_connect(s, addr, len) connect((s), (addr), (len))
275 #define sock_listen(s, b)          listen((s), (b))
276 #define sock_bind(s, addr, len)    bind((s), (addr), (len))
277 #define sock_getopt(s,t,n,v,l)     getsockopt((s),(t),(n),(v),(l))
278 #define sock_setopt(s,t,n,v,l)     setsockopt((s),(t),(n),(v),(l))
279 #define sock_name(s, addr, len)    getsockname((s), (addr), (len))
280 #define sock_peer(s, addr, len)    getpeername((s), (addr), (len))
281 #define sock_ntohs(x)              ntohs((x))
282 #define sock_ntohl(x)              ntohl((x))
283 #define sock_htons(x)              htons((x))
284 #define sock_htonl(x)              htonl((x))
285 #define sock_send(s,buf,len,flag)  send((s),(buf),(len),(flag))
286 #define sock_sendv(s, vec, size, np, flag) \
287             WSASend((s),(WSABUF*)(vec),(size),(np),(flag),NULL,NULL)
288 #define sock_recv(s,buf,len,flag)  recv((s),(buf),(len),(flag))
289 
290 #define sock_recvfrom(s,buf,blen,flag,addr,alen) \
291 	    recvfrom((s),(buf),(blen),(flag),(addr),(alen))
292 #define sock_sendto(s,buf,blen,flag,addr,alen) \
293 	    sendto((s),(buf),(blen),(flag),(addr),(alen))
294 #define sock_hostname(buf, len)    gethostname((buf), (len))
295 
296 #define sock_getservbyname(name,proto) getservbyname((name),(proto))
297 #define sock_getservbyport(port,proto) getservbyport((port),(proto))
298 
299 #define sock_errno() WSAGetLastError()
300 #define sock_create_event(d)       WSACreateEvent()
301 #define sock_close_event(e)        WSACloseEvent(e)
302 
303 #define sock_select(D, Flags, OnOff) winsock_event_select(D, Flags, OnOff)
304 
305 #define SET_BLOCKING(s)           ioctlsocket(s, FIONBIO, &zero_value)
306 #define SET_NONBLOCKING(s)        ioctlsocket(s, FIONBIO, &one_value)
307 
308 
309 static unsigned long zero_value = 0;
310 static unsigned long one_value = 1;
311 
312 #define TCP_SHUT_WR    SD_SEND
313 #define TCP_SHUT_RD    SD_RECEIVE
314 #define TCP_SHUT_RDWR  SD_BOTH
315 
316 #else /* !__WIN32__ */
317 
318 #include <sys/time.h>
319 #ifdef NETDB_H_NEEDS_IN_H
320 #include <netinet/in.h>
321 #endif
322 #include <netdb.h>
323 
324 #include <sys/socket.h>
325 #include <netinet/in.h>
326 
327 #ifdef DEF_INADDR_LOOPBACK_IN_RPC_TYPES_H
328 #include <rpc/types.h>
329 #endif
330 
331 #include <netinet/tcp.h>
332 #include <arpa/inet.h>
333 
334 #include <sys/param.h>
335 #ifdef HAVE_ARPA_NAMESER_H
336 #include <arpa/nameser.h>
337 #endif
338 
339 #ifdef HAVE_SYS_SOCKIO_H
340 #include <sys/sockio.h>
341 #endif
342 
343 #ifdef HAVE_SYS_IOCTL_H
344 #include <sys/ioctl.h>
345 #endif
346 
347 #include <net/if.h>
348 
349 #ifdef HAVE_SCHED_H
350 #include <sched.h>
351 #endif
352 
353 #ifdef HAVE_SETNS_H
354 #include <setns.h>
355 #endif
356 
357 #define HAVE_UDP
358 
359 /* SCTP support -- currently for UNIX platforms only: */
360 #undef HAVE_SCTP
361 #if defined(HAVE_SCTP_H)
362 
363 #include <netinet/sctp.h>
364 
365 /* SCTP Socket API Draft from version 11 on specifies that netinet/sctp.h must
366    explicitly define HAVE_SCTP in case when SCTP is supported,  but Solaris 10
367    still apparently uses Draft 10, and does not define that symbol, so we have
368    to define it explicitly:
369 */
370 #ifndef     HAVE_SCTP
371 #    define HAVE_SCTP
372 #endif
373 
374 /* These changed in draft 11, so SOLARIS10 uses the old MSG_* */
375 #if ! HAVE_DECL_SCTP_UNORDERED
376 #     define    SCTP_UNORDERED  MSG_UNORDERED
377 #endif
378 #if ! HAVE_DECL_SCTP_ADDR_OVER
379 #     define    SCTP_ADDR_OVER  MSG_ADDR_OVER
380 #endif
381 #if ! HAVE_DECL_SCTP_ABORT
382 #     define    SCTP_ABORT      MSG_ABORT
383 #endif
384 #if ! HAVE_DECL_SCTP_EOF
385 #     define    SCTP_EOF        MSG_EOF
386 #endif
387 
388 /* More Solaris 10 fixes: */
389 #if ! HAVE_DECL_SCTP_CLOSED && HAVE_DECL_SCTPS_IDLE
390 #    define SCTP_CLOSED SCTPS_IDLE
391 #    undef HAVE_DECL_SCTP_CLOSED
392 #    define HAVE_DECL_SCTP_CLOSED 1
393 #endif
394 #if ! HAVE_DECL_SCTP_BOUND && HAVE_DECL_SCTPS_BOUND
395 #    define SCTP_BOUND SCTPS_BOUND
396 #    undef HAVE_DECL_SCTP_BOUND
397 #    define HAVE_DECL_SCTP_BOUND 1
398 #endif
399 #if ! HAVE_DECL_SCTP_LISTEN && HAVE_DECL_SCTPS_LISTEN
400 #    define SCTP_LISTEN SCTPS_LISTEN
401 #    undef HAVE_DECL_SCTP_LISTEN
402 #    define HAVE_DECL_SCTP_LISTEN 1
403 #endif
404 #if ! HAVE_DECL_SCTP_COOKIE_WAIT && HAVE_DECL_SCTPS_COOKIE_WAIT
405 #    define SCTP_COOKIE_WAIT SCTPS_COOKIE_WAIT
406 #    undef HAVE_DECL_SCTP_COOKIE_WAIT
407 #    define HAVE_DECL_SCTP_COOKIE_WAIT 1
408 #endif
409 #if ! HAVE_DECL_SCTP_COOKIE_ECHOED && HAVE_DECL_SCTPS_COOKIE_ECHOED
410 #    define SCTP_COOKIE_ECHOED SCTPS_COOKIE_ECHOED
411 #    undef HAVE_DECL_SCTP_COOKIE_ECHOED
412 #    define HAVE_DECL_SCTP_COOKIE_ECHOED 1
413 #endif
414 #if ! HAVE_DECL_SCTP_ESTABLISHED && HAVE_DECL_SCTPS_ESTABLISHED
415 #    define SCTP_ESTABLISHED SCTPS_ESTABLISHED
416 #    undef HAVE_DECL_SCTP_ESTABLISHED
417 #    define HAVE_DECL_SCTP_ESTABLISHED 1
418 #endif
419 #if ! HAVE_DECL_SCTP_SHUTDOWN_PENDING && HAVE_DECL_SCTPS_SHUTDOWN_PENDING
420 #    define SCTP_SHUTDOWN_PENDING SCTPS_SHUTDOWN_PENDING
421 #    undef HAVE_DECL_SCTP_SHUTDOWN_PENDING
422 #    define HAVE_DECL_SCTP_SHUTDOWN_PENDING 1
423 #endif
424 #if ! HAVE_DECL_SCTP_SHUTDOWN_SENT && HAVE_DECL_SCTPS_SHUTDOWN_SENT
425 #    define SCTP_SHUTDOWN_SENT SCTPS_SHUTDOWN_SENT
426 #    undef HAVE_DECL_SCTP_SHUTDOWN_SENT
427 #    define HAVE_DECL_SCTP_SHUTDOWN_SENT 1
428 #endif
429 #if ! HAVE_DECL_SCTP_SHUTDOWN_RECEIVED && HAVE_DECL_SCTPS_SHUTDOWN_RECEIVED
430 #    define SCTP_SHUTDOWN_RECEIVED SCTPS_SHUTDOWN_RECEIVED
431 #    undef HAVE_DECL_SCTP_SHUTDOWN_RECEIVED
432 #    define HAVE_DECL_SCTP_SHUTDOWN_RECEIVED 1
433 #endif
434 #if ! HAVE_DECL_SCTP_SHUTDOWN_ACK_SENT && HAVE_DECL_SCTPS_SHUTDOWN_ACK_SENT
435 #    define SCTP_SHUTDOWN_ACK_SENT SCTPS_SHUTDOWN_ACK_SENT
436 #    undef HAVE_DECL_SCTP_SHUTDOWN_ACK_SENT
437 #    define HAVE_DECL_SCTP_SHUTDOWN_ACK_SENT 1
438 #endif
439 /* New spelling in lksctp 2.6.22 or maybe even earlier:
440  *  adaption -> adaptation
441  */
442 #if !defined(SCTP_ADAPTATION_LAYER) && defined (SCTP_ADAPTION_LAYER)
443 #     define SCTP_ADAPTATION_LAYER       SCTP_ADAPTION_LAYER
444 #     define SCTP_ADAPTATION_INDICATION  SCTP_ADAPTION_INDICATION
445 #     define sctp_adaptation_event       sctp_adaption_event
446 #     define sctp_setadaptation          sctp_setadaption
447 #     define sn_adaptation_event         sn_adaption_event
448 #     define sai_adaptation_ind          sai_adaption_ind
449 #     define ssb_adaptation_ind          ssb_adaption_ind
450 #     define sctp_adaptation_layer_event sctp_adaption_layer_event
451 #endif
452 
453 #if defined(__GNUC__) && defined(HAVE_SCTP_BINDX)
454 static typeof(sctp_bindx) *p_sctp_bindx = NULL;
455 #else
456 static int (*p_sctp_bindx)
457 	(int sd, struct sockaddr *addrs, int addrcnt, int flags) = NULL;
458 #endif
459 
460 #if defined(__GNUC__) && defined(HAVE_SCTP_PEELOFF)
461 static typeof(sctp_peeloff) *p_sctp_peeloff = NULL;
462 #else
463 static int (*p_sctp_peeloff)
464         (int sd, sctp_assoc_t assoc_id) = NULL;
465 #endif
466 
467 #if defined(__GNUC__) && defined(HAVE_SCTP_GETLADDRS)
468 static typeof(sctp_getladdrs) *p_sctp_getladdrs = NULL;
469 #else
470 static int (*p_sctp_getladdrs)
471         (int sd, sctp_assoc_t assoc_id, struct sockaddr **ss) = NULL;
472 #endif
473 
474 #if defined(__GNUC__) && defined(HAVE_SCTP_FREELADDRS)
475 static typeof(sctp_freeladdrs) *p_sctp_freeladdrs = NULL;
476 #else
477 static void (*p_sctp_freeladdrs)(struct sockaddr *addrs) = NULL;
478 #endif
479 
480 #if defined(__GNUC__) && defined(HAVE_SCTP_GETPADDRS)
481 static typeof(sctp_getpaddrs) *p_sctp_getpaddrs = NULL;
482 #else
483 static int (*p_sctp_getpaddrs)
484         (int sd, sctp_assoc_t assoc_id, struct sockaddr **ss) = NULL;
485 #endif
486 
487 #if defined(__GNUC__) && defined(HAVE_SCTP_FREEPADDRS)
488 static typeof(sctp_freepaddrs) *p_sctp_freepaddrs = NULL;
489 #else
490 static void (*p_sctp_freepaddrs)(struct sockaddr *addrs) = NULL;
491 #endif
492 
493 #endif /* #if defined(HAVE_SCTP_H) */
494 
495 #ifndef WANT_NONBLOCKING
496 #define WANT_NONBLOCKING
497 #endif
498 #include "sys.h"
499 
500 /* #define INET_DRV_DEBUG 1 */
501 #ifdef INET_DRV_DEBUG
502 #define DEBUG 1
503 #undef DEBUGF
504 #define DEBUGF(X) printf X
505 #endif
506 
507 #if !defined(HAVE_STRNCASECMP)
508 #define STRNCASECMP my_strncasecmp
509 
my_strncasecmp(const char * s1,const char * s2,size_t n)510 static int my_strncasecmp(const char *s1, const char *s2, size_t n)
511 {
512     int i;
513 
514     for (i=0;i<n-1 && s1[i] && s2[i] && toupper(s1[i]) == toupper(s2[i]);++i)
515 	;
516     return (toupper(s1[i]) - toupper(s2[i]));
517 }
518 
519 
520 #else
521 #define  STRNCASECMP strncasecmp
522 #endif
523 
524 #define INVALID_SOCKET -1
525 #define INVALID_EVENT  -1
526 #define SOCKET_ERROR   -1
527 
528 #define SOCKET int
529 #define HANDLE long int
530 #define FD_READ    ERL_DRV_READ
531 #define FD_WRITE   ERL_DRV_WRITE
532 #define FD_CLOSE   0
533 #define FD_CONNECT ERL_DRV_WRITE
534 #define FD_ACCEPT  ERL_DRV_READ
535 
536 #define sock_connect(s, addr, len)  connect((s), (addr), (len))
537 #define sock_listen(s, b)           listen((s), (b))
538 #define sock_bind(s, addr, len)     bind((s), (addr), (len))
539 #define sock_getopt(s,t,n,v,l)      getsockopt((s),(t),(n),(v),(l))
540 #define sock_setopt(s,t,n,v,l)      setsockopt((s),(t),(n),(v),(l))
541 #define sock_name(s, addr, len)     getsockname((s), (addr), (len))
542 #define sock_peer(s, addr, len)     getpeername((s), (addr), (len))
543 #define sock_ntohs(x)               ntohs((x))
544 #define sock_ntohl(x)               ntohl((x))
545 #define sock_htons(x)               htons((x))
546 #define sock_htonl(x)               htonl((x))
547 
548 #define sock_accept(s, addr, len)   accept((s), (addr), (len))
549 #define sock_send(s,buf,len,flag)   send((s),(buf),(len),(flag))
550 #define sock_sendto(s,buf,blen,flag,addr,alen) \
551                 sendto((s),(buf),(blen),(flag),(addr),(alen))
552 #define sock_sendv(s, vec, size, np, flag) \
553 		(*(np) = writev((s), (struct iovec*)(vec), (size)))
554 #define sock_sendmsg(s,msghdr,flag) sendmsg((s),(msghdr),(flag))
555 
556 #define sock_open(af, type, proto)  socket((af), (type), (proto))
557 #define sock_close(s)               close((s))
558 #define sock_shutdown(s, how)       shutdown((s), (how))
559 
560 #define sock_hostname(buf, len)     gethostname((buf), (len))
561 #define sock_getservbyname(name,proto) getservbyname((name), (proto))
562 #define sock_getservbyport(port,proto) getservbyport((port), (proto))
563 
564 #define sock_recv(s,buf,len,flag)   recv((s),(buf),(len),(flag))
565 #define sock_recvfrom(s,buf,blen,flag,addr,alen) \
566                 recvfrom((s),(buf),(blen),(flag),(addr),(alen))
567 #define sock_recvmsg(s,msghdr,flag) recvmsg((s),(msghdr),(flag))
568 
569 #define sock_errno()                errno
570 #define sock_create_event(d)        ((d)->s) /* return file descriptor */
571 #define sock_close_event(e)                  /* do nothing */
572 
573 #define inet_driver_select(port, e, mode, on) \
574                                     driver_select(port, e, mode | (on?ERL_DRV_USE:0), on)
575 
576 #define sock_select(d, flags, onoff) do { \
577         ASSERT(!INET_IGNORED(d));         \
578         (d)->event_mask = (onoff) ? \
579                  ((d)->event_mask | (flags)) : \
580                  ((d)->event_mask & ~(flags)); \
581         DEBUGF(("(%s / %d) sock_select(%ld): flags=%02X, onoff=%d, event_mask=%02lX\r\n", \
582 		__FILE__, __LINE__, (long) (d)->port, (flags), (onoff), (unsigned long) (d)->event_mask)); \
583         inet_driver_select((d)->port, (ErlDrvEvent)(long)(d)->event, (flags), (onoff)); \
584    } while(0)
585 
586 #define TCP_SHUT_WR    SHUT_WR
587 #define TCP_SHUT_RD    SHUT_RD
588 #define TCP_SHUT_RDWR  SHUT_RDWR
589 
590 #endif /* !__WIN32__ */
591 
592 #ifdef HAVE_SOCKLEN_T
593 #  define SOCKLEN_T socklen_t
594 #else
595 #  define SOCKLEN_T size_t
596 #endif
597 
598 #include "packet_parser.h"
599 
600 #define get_int24(s) ((((unsigned char*) (s))[0] << 16) | \
601                       (((unsigned char*) (s))[1] << 8)  | \
602                       (((unsigned char*) (s))[2]))
603 
604 #define get_little_int32(s) ((((unsigned char*) (s))[3] << 24) | \
605 			     (((unsigned char*) (s))[2] << 16)  | \
606 			     (((unsigned char*) (s))[1] << 8) | \
607 			     (((unsigned char*) (s))[0]))
608 
609 #if defined(HAVE_SYS_UN_H) || defined(SO_BINDTODEVICE)
610 
611 /* strnlen doesn't exist everywhere */
my_strnlen(const char * s,size_t maxlen)612 static size_t my_strnlen(const char *s, size_t maxlen)
613 {
614     size_t i = 0;
615     while (i < maxlen && s[i] != '\0')
616         i++;
617     return i;
618 }
619 
620 #endif
621 
622 #ifdef VALGRIND
623 #  include <valgrind/memcheck.h>
624 #else
625 #  define VALGRIND_MAKE_MEM_DEFINED(ptr,size)
626 #endif
627 
628 #ifndef __WIN32__
629 /* Calculate CMSG_NXTHDR without having a struct msghdr*.
630  * CMSG_LEN only caters for alignment for start of data.
631  * To get how much to advance we need to use CMSG_SPACE
632  * on the payload length.  To get the payload length we
633  * take the calculated cmsg->cmsg_len and subtract the
634  * header length.  To get the header length we use
635  * the pointer difference from the cmsg start pointer
636  * to the CMSG_DATA(cmsg) pointer.
637  *
638  * Some platforms (seen on ppc Linux 2.6.29-3.ydl61.3)
639  * may return 0 as the cmsg_len if the cmsg is to be ignored.
640  */
641 #define LEN_CMSG_DATA(cmsg)                                             \
642     ((cmsg)->cmsg_len < sizeof (struct cmsghdr) ? 0 :                   \
643      (cmsg)->cmsg_len - ((char*)CMSG_DATA(cmsg) - (char*)(cmsg)))
644 #define NXT_CMSG_HDR(cmsg)                                              \
645     ((struct cmsghdr*)(((char*)(cmsg)) + CMSG_SPACE(LEN_CMSG_DATA(cmsg))))
646 #endif
647 
648 #if !defined(IPV6_PKTOPTIONS) && defined(IPV6_2292PKTOPTIONS)
649 #define IPV6_PKTOPTIONS IPV6_2292PKTOPTIONS
650 #endif
651 
652 /*
653   Magic errno value used locally for return of {error, system_limit}
654   - the emulator definition of SYSTEM_LIMIT is not available here.
655 */
656 #define INET_ERRNO_SYSTEM_LIMIT  (15 << 8)
657 
658 /*----------------------------------------------------------------------------
659 ** Interface constants.
660 **
661 ** This section must be "identical" to the corresponding inet_int.hrl
662 */
663 
664 /* general address encode/decode tag */
665 #define INET_AF_UNSPEC      0
666 #define INET_AF_INET        1
667 #define INET_AF_INET6       2
668 #define INET_AF_ANY         3 /* INADDR_ANY or IN6ADDR_ANY_INIT */
669 #define INET_AF_LOOPBACK    4 /* INADDR_LOOPBACK or IN6ADDR_LOOPBACK_INIT */
670 #define INET_AF_LOCAL       5
671 #define INET_AF_UNDEFINED   6 /* Unknown */
672 
673 /* open and INET_REQ_GETTYPE enumeration */
674 #define INET_TYPE_STREAM    1
675 #define INET_TYPE_DGRAM     2
676 #define INET_TYPE_SEQPACKET 3
677 
678 /* INET_LOPT_MODE options */
679 #define INET_MODE_LIST      0
680 #define INET_MODE_BINARY    1
681 
682 /* INET_LOPT_DELIVER options */
683 #define INET_DELIVER_PORT   0
684 #define INET_DELIVER_TERM   1
685 
686 /* INET_LOPT_ACTIVE options */
687 #define INET_PASSIVE        0  /* false */
688 #define INET_ACTIVE         1  /* true */
689 #define INET_ONCE           2  /* true; active once then passive */
690 #define INET_MULTI          3  /* true; active N then passive */
691 
692 /* INET_REQ_GETSTATUS enumeration */
693 #define INET_F_OPEN         0x0001
694 /* INET_F_BOUND removed - renumber when there comes a bigger rewrite */
695 #define INET_F_ACTIVE       0x0004
696 #define INET_F_LISTEN       0x0008
697 #define INET_F_CON          0x0010
698 #define INET_F_ACC          0x0020
699 #define INET_F_LST          0x0040
700 #define INET_F_BUSY         0x0080
701 #define INET_F_MULTI_CLIENT 0x0100 /* Multiple clients for one descriptor, i.e. multi-accept */
702 
703 /* One numberspace for *_REQ_* so if an e.g UDP request is issued
704 ** for a TCP socket, the driver can protest.
705 */
706 #define INET_REQ_OPEN          1
707 #define INET_REQ_CLOSE         2
708 #define INET_REQ_CONNECT       3
709 #define INET_REQ_PEER          4
710 #define INET_REQ_NAME          5
711 #define INET_REQ_BIND          6
712 #define INET_REQ_SETOPTS       7
713 #define INET_REQ_GETOPTS       8
714 /* #define INET_REQ_GETIX         9  NOT USED ANY MORE */
715 /* #define INET_REQ_GETIF         10 REPLACE BY NEW STUFF */
716 #define INET_REQ_GETSTAT       11
717 #define INET_REQ_GETHOSTNAME   12
718 #define INET_REQ_FDOPEN        13
719 #define INET_REQ_GETFD         14
720 #define INET_REQ_GETTYPE       15
721 #define INET_REQ_GETSTATUS     16
722 #define INET_REQ_GETSERVBYNAME 17
723 #define INET_REQ_GETSERVBYPORT 18
724 #define INET_REQ_SETNAME       19
725 #define INET_REQ_SETPEER       20
726 #define INET_REQ_GETIFLIST     21
727 #define INET_REQ_IFGET         22
728 #define INET_REQ_IFSET         23
729 #define INET_REQ_SUBSCRIBE     24
730 #define INET_REQ_GETIFADDRS    25
731 #define INET_REQ_ACCEPT        26
732 #define INET_REQ_LISTEN        27
733 #define INET_REQ_IGNOREFD      28
734 #define INET_REQ_GETLADDRS     29
735 #define INET_REQ_GETPADDRS     30
736 
737 /* TCP requests */
738 /* #define TCP_REQ_ACCEPT         40 MOVED */
739 /* #define TCP_REQ_LISTEN         41 MERGED */
740 #define TCP_REQ_RECV           42
741 #define TCP_REQ_UNRECV         43
742 #define TCP_REQ_SHUTDOWN       44
743 #define TCP_REQ_SENDFILE       45
744 /* UDP and SCTP requests */
745 #define PACKET_REQ_RECV        60 /* Common for UDP and SCTP         */
746 /* #define SCTP_REQ_LISTEN       61 MERGED Different from TCP; not for UDP */
747 #define SCTP_REQ_BINDX	       62 /* Multi-home SCTP bind            */
748 #define SCTP_REQ_PEELOFF       63
749 
750 /* INET_REQ_SUBSCRIBE sub-requests */
751 #define INET_SUBS_EMPTY_OUT_Q  1
752 
753 /* TCP additional flags */
754 #define TCP_ADDF_DELAY_SEND    1
755 #define TCP_ADDF_CLOSE_SENT    2 /* Close sent (active mode only) */
756 #define TCP_ADDF_DELAYED_CLOSE_RECV 4 /* If receive fails, report {error,closed} (passive mode) */
757 #define TCP_ADDF_DELAYED_CLOSE_SEND 8 /* If send fails, report {error,closed} (passive mode) */
758 #define TCP_ADDF_PENDING_SHUT_WR   16 /* Call shutdown(sock, SHUT_WR) when queue empties */
759 #define TCP_ADDF_PENDING_SHUT_RDWR 32 /* Call shutdown(sock, SHUT_RDWR) when queue empties */
760 #define TCP_ADDF_PENDING_SHUTDOWN \
761 		(TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR)
762 #define TCP_ADDF_SHOW_ECONNRESET   64 /* Tell user about incoming RST */
763 #define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occurred on send or shutdown */
764 #define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */
765 #define TCP_ADDF_LINGER_ZERO 	  512 /* Discard driver queue on port close */
766 #define TCP_ADDF_SENDFILE         1024 /* Send from an fd instead of the driver queue */
767 
768 /* *_REQ_* replies */
769 #define INET_REP_ERROR       0
770 #define INET_REP_OK          1
771 #define INET_REP             2
772 
773 /* INET_REQ_SETOPTS and INET_REQ_GETOPTS options */
774 #define INET_OPT_REUSEADDR  0   /* enable/disable local address reuse */
775 #define INET_OPT_KEEPALIVE  1   /* enable/disable keep connections alive */
776 #define INET_OPT_DONTROUTE  2   /* enable/disable routing for messages */
777 #define INET_OPT_LINGER     3   /* linger on close if data is present */
778 #define INET_OPT_BROADCAST  4   /* enable/disable transmission of broadcast */
779 #define INET_OPT_OOBINLINE  5   /* enable/disable out-of-band data in band */
780 #define INET_OPT_SNDBUF     6   /* set send buffer size */
781 #define INET_OPT_RCVBUF     7   /* set receive buffer size */
782 #define INET_OPT_PRIORITY   8   /* set priority */
783 #define INET_OPT_TOS        9   /* Set type of service */
784 #define TCP_OPT_NODELAY     10  /* don't delay send to coalesce packets */
785 #define UDP_OPT_MULTICAST_IF 11  /* set/get IP multicast interface */
786 #define UDP_OPT_MULTICAST_TTL 12 /* set/get IP multicast timetolive */
787 #define UDP_OPT_MULTICAST_LOOP 13 /* set/get IP multicast loopback */
788 #define UDP_OPT_ADD_MEMBERSHIP 14 /* add an IP group membership */
789 #define UDP_OPT_DROP_MEMBERSHIP 15 /* drop an IP group membership */
790 #define INET_OPT_IPV6_V6ONLY 16 /* IPv6 only socket, no mapped v4 addrs */
791 /* LOPT is local options */
792 #define INET_LOPT_BUFFER      20  /* min buffer size hint */
793 #define INET_LOPT_HEADER      21  /* list header size */
794 #define INET_LOPT_ACTIVE      22  /* enable/disable active receive */
795 #define INET_LOPT_PACKET      23  /* packet header type (TCP) */
796 #define INET_LOPT_MODE        24  /* list or binary mode */
797 #define INET_LOPT_DELIVER     25  /* port or term delivery */
798 #define INET_LOPT_EXITONCLOSE 26  /* exit port on active close or not ! */
799 #define INET_LOPT_TCP_HIWTRMRK     27  /* set local high watermark */
800 #define INET_LOPT_TCP_LOWTRMRK     28  /* set local low watermark */
801                                 /* 29  unused */
802 #define INET_LOPT_TCP_SEND_TIMEOUT 30  /* set send timeout */
803 #define INET_LOPT_TCP_DELAY_SEND   31  /* Delay sends until next poll */
804 #define INET_LOPT_PACKET_SIZE      32  /* Max packet size */
805 #define INET_LOPT_UDP_READ_PACKETS 33  /* Number of packets to read */
806 #define INET_OPT_RAW               34  /* Raw socket options */
807 #define INET_LOPT_TCP_SEND_TIMEOUT_CLOSE 35  /* auto-close on send timeout or not */
808 #define INET_LOPT_MSGQ_HIWTRMRK     36  /* set local msgq high watermark */
809 #define INET_LOPT_MSGQ_LOWTRMRK     37  /* set local msgq low watermark */
810 #define INET_LOPT_NETNS             38  /* Network namespace pathname */
811 #define INET_LOPT_TCP_SHOW_ECONNRESET 39  /* tell user about incoming RST */
812 #define INET_LOPT_LINE_DELIM        40  /* Line delimiting char */
813 #define INET_OPT_TCLASS             41  /* IPv6 transport class */
814 #define INET_OPT_BIND_TO_DEVICE     42  /* get/set network device the socket is bound to */
815 #define INET_OPT_RECVTOS            43  /* IP_RECVTOS ancillary data */
816 #define INET_OPT_RECVTCLASS         44  /* IPV6_RECVTCLASS ancillary data */
817 #define INET_OPT_PKTOPTIONS         45  /* IP(V6)_PKTOPTIONS get ancillary data */
818 #define INET_OPT_TTL                46  /* IP_TTL */
819 #define INET_OPT_RECVTTL            47  /* IP_RECVTTL ancillary data */
820 #define TCP_OPT_NOPUSH              48  /* super-Nagle, aka TCP_CORK */
821 /* SCTP options: a separate range, from 100: */
822 #define SCTP_OPT_RTOINFO		100
823 #define SCTP_OPT_ASSOCINFO		101
824 #define SCTP_OPT_INITMSG		102
825 #define SCTP_OPT_AUTOCLOSE		103
826 #define SCTP_OPT_NODELAY		104
827 #define SCTP_OPT_DISABLE_FRAGMENTS	105
828 #define SCTP_OPT_I_WANT_MAPPED_V4_ADDR	106
829 #define SCTP_OPT_MAXSEG			107
830 #define SCTP_OPT_SET_PEER_PRIMARY_ADDR  108
831 #define SCTP_OPT_PRIMARY_ADDR		109
832 #define SCTP_OPT_ADAPTATION_LAYER 	110
833 #define SCTP_OPT_PEER_ADDR_PARAMS	111
834 #define SCTP_OPT_DEFAULT_SEND_PARAM	112
835 #define SCTP_OPT_EVENTS			113
836 #define SCTP_OPT_DELAYED_ACK_TIME	114
837 #define SCTP_OPT_STATUS			115
838 #define SCTP_OPT_GET_PEER_ADDR_INFO	116
839 
840 /* INET_REQ_IFGET and INET_REQ_IFSET options */
841 #define INET_IFOPT_ADDR       1
842 #define INET_IFOPT_BROADADDR  2
843 #define INET_IFOPT_DSTADDR    3
844 #define INET_IFOPT_MTU        4
845 #define INET_IFOPT_NETMASK    5
846 #define INET_IFOPT_FLAGS      6
847 #define INET_IFOPT_HWADDR     7
848 
849 /* INET_REQ_GETSTAT enumeration */
850 #define INET_STAT_RECV_CNT   1
851 #define INET_STAT_RECV_MAX   2
852 #define INET_STAT_RECV_AVG   3
853 #define INET_STAT_RECV_DVI   4
854 #define INET_STAT_SEND_CNT   5
855 #define INET_STAT_SEND_MAX   6
856 #define INET_STAT_SEND_AVG   7
857 #define INET_STAT_SEND_PND   8
858 #define INET_STAT_RECV_OCT   9      /* received octets */
859 #define INET_STAT_SEND_OCT   10     /* sent octets */
860 
861 /* INET_IFOPT_FLAGS enumeration */
862 #define INET_IFF_UP            0x0001
863 #define INET_IFF_BROADCAST     0x0002
864 #define INET_IFF_LOOPBACK      0x0004
865 #define INET_IFF_POINTTOPOINT  0x0008
866 #define INET_IFF_RUNNING       0x0010
867 #define INET_IFF_MULTICAST     0x0020
868 /* Complement flags for turning them off */
869 #define INET_IFF_DOWN            0x0100
870 #define INET_IFF_NBROADCAST      0x0200
871 /* #define INET_IFF_NLOOPBACK    0x0400 */
872 #define INET_IFF_NPOINTTOPOINT   0x0800
873 /* #define INET_IFF_NRUNNING     0x1000 */
874 /* #define INET_IFF_NMULTICAST   0x2000 */
875 
876 /* Flags for "sctp_sndrcvinfo". Used in a bitmask -- must be powers of 2:
877 ** INET_REQ_SETOPTS:SCTP_OPT_DEFAULT_SEND_PARAM
878 */
879 #define SCTP_FLAG_UNORDERED (1 /* am_unordered */)
880 #define SCTP_FLAG_ADDR_OVER (2 /* am_addr_over */)
881 #define SCTP_FLAG_ABORT     (4 /* am_abort */)
882 #define SCTP_FLAG_EOF       (8 /* am_eof */)
883 #define SCTP_FLAG_SNDALL   (16 /* am_sndall, NOT YET IMPLEMENTED */)
884 
885 /* Flags for "sctp_set_opts" (actually for SCTP_OPT_PEER_ADDR_PARAMS).
886 ** These flags are also used in a bitmask, so they must be powers of 2:
887 */
888 #define SCTP_FLAG_HB_ENABLE	    (1 /* am_hb_enable */)
889 #define SCTP_FLAG_HB_DISABLE	    (2 /* am_hb_disable */)
890 #define SCTP_FLAG_HB_DEMAND	    (4 /* am_hb_demand */)
891 #define	SCTP_FLAG_PMTUD_ENABLE	    (8 /* am_pmtud_enable */)
892 #define	SCTP_FLAG_PMTUD_DISABLE    (16 /* am_pmtud_disable */)
893 #define SCTP_FLAG_SACDELAY_ENABLE  (32 /* am_sackdelay_enable */)
894 #define SCTP_FLAG_SACDELAY_DISABLE (64 /* am_sackdelay_disable */)
895 
896 /* Flags for recv_cmsgflags */
897 #define INET_CMSG_RECVTOS        (1 << 0) /* am_recvtos, am_tos */
898 #define INET_CMSG_RECVTCLASS     (1 << 1) /* am_recvtclass, am_tclass */
899 #define INET_CMSG_RECVTTL        (1 << 2) /* am_recvttl, am_ttl */
900 
901 /* Inet flags */
902 #define INET_FLG_BUFFER_SET      (1 << 0) /* am_buffer has been set by user */
903 #define INET_FLG_IS_IGNORED      (1 << 1) /* If a fd is ignored by the inet_drv.
904                                              This flag should be set to true when
905                                              the fd is used outside of inet_drv. */
906 #define INET_FLG_IS_IGNORED_RD   (1 << 2)
907 #define INET_FLG_IS_IGNORED_WR   (1 << 3)
908 #define INET_FLG_IS_IGNORED_PASS (1 << 4)
909 
910 /*
911 ** End of interface constants.
912 **--------------------------------------------------------------------------*/
913 
914 #define INET_STATE_CLOSED          (0)
915 #define INET_STATE_OPEN            (INET_F_OPEN)
916 #define INET_STATE_CONNECTED       (INET_STATE_OPEN | INET_F_ACTIVE)
917 #define INET_STATE_LISTENING       (INET_STATE_OPEN | INET_F_LISTEN)
918 #define INET_STATE_CONNECTING      (INET_STATE_OPEN | INET_F_CON)
919 #define INET_STATE_ACCEPTING       (INET_STATE_LISTENING | INET_F_ACC)
920 #define INET_STATE_MULTI_ACCEPTING (INET_STATE_ACCEPTING | INET_F_MULTI_CLIENT)
921 
922 #define IS_OPEN(d) \
923  (((d)->state & INET_F_OPEN) == INET_F_OPEN)
924 
925 #define IS_CONNECTED(d) \
926   (((d)->state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED)
927 
928 #define IS_CONNECTING(d) \
929   (((d)->state & INET_F_CON) == INET_F_CON)
930 
931 #define IS_BUSY(d) \
932   (((d)->state & INET_F_BUSY) == INET_F_BUSY)
933 
934 #define INET_MAX_OPT_BUFFER (64*1024)
935 
936 #define INET_DEF_BUFFER     1460        /* default buffer size */
937 #define INET_MIN_BUFFER     1           /* internal min buffer */
938 
939 #define INET_HIGH_WATERMARK (1024*8) /* 8k pending high => busy  */
940 #define INET_LOW_WATERMARK  (1024*4) /* 4k pending => allow more */
941 #define INET_HIGH_MSGQ_WATERMARK (1024*8) /* 8k pending high => busy  */
942 #define INET_LOW_MSGQ_WATERMARK  (1024*4) /* 4k pending => allow more */
943 
944 #define INET_INFINITY  0xffffffff  /* infinity value */
945 
946 #define INET_MAX_ASYNC 1           /* max number of async queue ops */
947 
948 /* INET_LOPT_UDP_PACKETS */
949 #define INET_PACKET_POLL     5   /* maximum number of packets to poll */
950 
951 /* Max interface name */
952 #define INET_IFNAMSIZ          16
953 
954 /* INET Ignore states */
955 #define INET_IGNORE_CLEAR(desc) ((desc)->flags & ~(INET_IGNORE_READ|INET_IGNORE_WRITE|INET_IGNORE_PASSIVE))
956 #define INET_IGNORED(desc)   ((desc)->flags & INET_FLG_IS_IGNORED)
957 #define INET_IGNORE_READ    (INET_FLG_IS_IGNORED|INET_FLG_IS_IGNORED_RD)
958 #define INET_IGNORE_WRITE   (INET_FLG_IS_IGNORED|INET_FLG_IS_IGNORED_WR)
959 #define INET_IGNORE_PASSIVE (INET_FLG_IS_IGNORED|INET_FLG_IS_IGNORED_PASS)
960 
961 /* Max length of Erlang Term Buffer (for outputting structured terms):  */
962 #ifdef  HAVE_SCTP
963 #define PACKET_ERL_DRV_TERM_DATA_LEN  512
964 #else
965 #ifndef __WIN32__
966 /* Assume we have recvmsg() and might need room for ancillary data */
967 #define PACKET_ERL_DRV_TERM_DATA_LEN  64
968 #else
969 #define PACKET_ERL_DRV_TERM_DATA_LEN  32
970 #endif
971 #endif
972 
973 typedef struct _tcp_descriptor tcp_descriptor;
974 
975 #if defined(TCP_CORK)
976 #define INET_TCP_NOPUSH TCP_CORK
977 #elif defined(TCP_NOPUSH) && !defined(__DARWIN__)
978 #define INET_TCP_NOPUSH TCP_NOPUSH
979 #endif
980 
981 #define BIN_REALLOC_MARGIN(x)  ((x)/4)  /* 25% */
982 
983 /* The general purpose sockaddr */
984 typedef union {
985     struct sockaddr sa;
986     struct sockaddr_in sai;
987 #ifdef HAVE_IN6
988     struct sockaddr_in6 sai6;
989 #endif
990 #ifdef HAVE_SYS_UN_H
991     struct sockaddr_un sal;
992 #endif
993 } inet_address;
994 
995 
996 #define inet_address_port(x)			\
997   ((((x)->sai.sin_family == AF_INET) ||		\
998     ((x)->sai.sin_family == AF_INET6)) ?	\
999    ((x)->sai.sin_port) : -1)
1000 
1001 #ifdef HAVE_SYS_UN_H
1002 #define localaddrlen(data)				\
1003   ((((unsigned char*)(data))[0] == INET_AF_LOCAL) ?	\
1004    (1 + 1 + ((unsigned char*)(data))[1]) : 1)
1005 #else
1006 #define localaddrlen(data) (1)
1007 #endif
1008 
1009 #if defined(HAVE_IN6) && defined(AF_INET6)
1010 #define addrlen(data)					\
1011     ((((unsigned char*)(data))[0] == INET_AF_INET) ?	\
1012      (1 + 2 + 4) :					\
1013      ((((unsigned char*)(data))[0] == INET_AF_INET6) ?	\
1014       (1 + 2 + 16) : localaddrlen(data)))
1015 #else
1016 #define addrlen(data)					\
1017     ((((unsigned char*)(data))[0] == INET_AF_INET) ?	\
1018      (1 + 2 + 4) : localaddrlen(data))
1019 #endif
1020 
1021 typedef struct _multi_timer_data {
1022     ErlDrvTime when;
1023     ErlDrvTermData caller;
1024     void (*timeout_function)(ErlDrvData drv_data, ErlDrvTermData caller);
1025     struct _multi_timer_data *next;
1026     struct _multi_timer_data *prev;
1027 } MultiTimerData;
1028 
1029 static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
1030                                        ErlDrvTermData caller, unsigned timeout,
1031                                        void (*timeout_fun)(ErlDrvData drv_data,
1032                                                            ErlDrvTermData caller));
1033 static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
1034 			      ErlDrvData data);
1035 static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p);
1036 static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
1037                                void (*timeout_fun)(ErlDrvData drv_data,
1038                                                    ErlDrvTermData caller));
1039 
1040 static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller);
1041 static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port);
1042 
1043 typedef struct {
1044     int            id;      /* id used to identify reply */
1045     ErlDrvTermData caller;  /* recipient of async reply */
1046     int            req;     /* Request id (CONNECT/ACCEPT/RECV) */
1047     union {
1048 	unsigned       value; /* Request timeout (since op issued,not started) */
1049 	MultiTimerData *mtd;
1050     } tmo;
1051     ErlDrvMonitor monitor;
1052 } inet_async_op;
1053 
1054 typedef struct inet_async_multi_op_ {
1055     inet_async_op op;
1056     struct inet_async_multi_op_ *next;
1057 } inet_async_multi_op;
1058 
1059 
1060 typedef struct subs_list_ {
1061   ErlDrvTermData subscriber;
1062   struct subs_list_ *next;
1063 } subs_list;
1064 
1065 #define NO_PROCESS 0
1066 #define NO_SUBSCRIBERS(SLP) ((SLP)->subscriber == NO_PROCESS)
1067 static void send_to_subscribers(ErlDrvTermData, subs_list *, int,
1068 				ErlDrvTermData [], int);
1069 static void free_subscribers(subs_list*);
1070 static int save_subscriber(subs_list *, ErlDrvTermData);
1071 
1072 typedef struct {
1073     SOCKET s;                   /* the socket or INVALID_SOCKET if not open */
1074     HANDLE event;               /* Event handle (same as s in unix) */
1075     long  event_mask;           /* current FD events */
1076 #ifdef __WIN32__
1077     long forced_events;           /* Mask of events that are forcefully signalled
1078 				   on windows see winsock_event_select
1079 				   for details */
1080     int send_would_block;       /* Last send attempt failed with "WOULDBLOCK" */
1081 #endif
1082     ErlDrvPort  port;           /* the port identifier */
1083     ErlDrvTermData dport;       /* the port identifier as DriverTermData */
1084     int   state;                /* status */
1085     int   prebound;             /* only set when opened with inet_fdopen */
1086     int   mode;                 /* BINARY | LIST
1087 				   (affect how to interpret hsz) */
1088     int   exitf;                /* exit port on close or not */
1089     int   deliver;              /* Delivery mode, TERM or PORT */
1090 
1091     ErlDrvTermData caller;      /* recipient of sync reply */
1092     ErlDrvTermData busy_caller; /* recipient of sync reply when caller busy.
1093 				 * Only valid while INET_F_BUSY. */
1094 
1095     inet_async_op* oph;          /* queue head or NULL */
1096     inet_async_op* opt;          /* queue tail or NULL */
1097     inet_async_op  op_queue[INET_MAX_ASYNC];  /* call queue */
1098     int op_ref;                 /* queue reference generator  */
1099 
1100     int   active;               /* 0 = passive, 1 = active, 2 = active once */
1101     Sint16 active_count;        /* counter for {active,N} */
1102     int   stype;                /* socket type:
1103 				    SOCK_STREAM/SOCK_DGRAM/SOCK_SEQPACKET   */
1104     int   sprotocol;            /* socket protocol:
1105 				   IPPROTO_TCP|IPPROTO_UDP|IPPROTO_SCTP     */
1106     int   sfamily;              /* address family */
1107     enum PacketParseType htype; /* header type (TCP only?) */
1108     unsigned int psize;         /* max packet size (TCP only?) */
1109     inet_address remote;        /* remote address for connected sockets */
1110     inet_address peer_addr;     /* fake peer address */
1111     inet_address name_addr;     /* fake local address */
1112 
1113     inet_address* peer_ptr;     /* fake peername or NULL */
1114     inet_address* name_ptr;     /* fake sockname or NULL */
1115     SOCKLEN_T peer_addr_len;    /* fake peername size */
1116     SOCKLEN_T name_addr_len;    /* fake sockname size */
1117 
1118     int   bufsz;                /* minimum buffer constraint */
1119     unsigned int hsz;           /* the list header size, -1 is large !!! */
1120     /* statistics */
1121 #ifdef ARCH_64
1122     Uint64        recv_oct;     /* number of received octets, 64 bits */
1123 #else
1124     Uint32        recv_oct[2];  /* number of received octets, 64 bits */
1125 #endif
1126     unsigned long recv_cnt;     /* number of packets received */
1127     unsigned long recv_max;     /* maximum packet size received */
1128     double recv_avg;            /* average packet size received */
1129     double recv_dvi;            /* avarage deviation from avg_size */
1130 #ifdef ARCH_64
1131     Uint64        send_oct;     /* number of octets sent, 64 bits */
1132 #else
1133     Uint32        send_oct[2];  /* number of octets sent, 64 bits */
1134 #endif
1135     char          delimiter;    /* Line delimiting character (def: '\n')  */
1136     unsigned long send_cnt;     /* number of packets sent */
1137     unsigned long send_max;     /* maximum packet send */
1138     double send_avg;            /* average packet size sent */
1139 
1140     subs_list empty_out_q_subs; /* Empty out queue subscribers */
1141     int flags;
1142 #ifdef HAVE_SETNS
1143     char *netns;                /* Socket network namespace name
1144 				   as full file path */
1145 #endif
1146     int recv_cmsgflags;         /* Which ancillary data to expect */
1147 } inet_descriptor;
1148 
1149 
1150 
1151 #define TCP_MAX_PACKET_SIZE 0x4000000  /* 64 M */
1152 
1153 #define MAX_VSIZE 16		/* Max number of entries allowed in an I/O
1154 				 * vector sock_sendv().
1155 				 */
1156 
1157 static int tcp_inet_init(void);
1158 static void tcp_inet_stop(ErlDrvData);
1159 static void tcp_inet_command(ErlDrvData, char*, ErlDrvSizeT);
1160 static void tcp_inet_commandv(ErlDrvData, ErlIOVec*);
1161 static void tcp_inet_flush(ErlDrvData drv_data);
1162 static void tcp_inet_drv_input(ErlDrvData, ErlDrvEvent);
1163 static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event);
1164 static ErlDrvData tcp_inet_start(ErlDrvPort, char* command);
1165 static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData, unsigned int,
1166 				 char*, ErlDrvSizeT, char**, ErlDrvSizeT);
1167 static void tcp_inet_timeout(ErlDrvData);
1168 static void tcp_inet_process_exit(ErlDrvData, ErlDrvMonitor *);
1169 static void inet_stop_select(ErlDrvEvent, void*);
1170 static void inet_emergency_close(ErlDrvData);
1171 #ifdef __WIN32__
1172 static void tcp_inet_event(ErlDrvData, ErlDrvEvent);
1173 static void find_dynamic_functions(void);
1174 #endif
1175 
1176 static struct erl_drv_entry tcp_inet_driver_entry =
1177 {
1178     tcp_inet_init,  /* inet_init will add this driver !! */
1179     tcp_inet_start,
1180     tcp_inet_stop,
1181     tcp_inet_command,
1182 #ifdef __WIN32__
1183     tcp_inet_event,
1184     NULL,
1185 #else
1186     tcp_inet_drv_input,
1187     tcp_inet_drv_output,
1188 #endif
1189     "tcp_inet",
1190     NULL,
1191     NULL,
1192     tcp_inet_ctl,
1193     tcp_inet_timeout,
1194     tcp_inet_commandv,
1195     NULL,
1196     tcp_inet_flush,
1197     NULL,
1198     NULL,
1199     ERL_DRV_EXTENDED_MARKER,
1200     ERL_DRV_EXTENDED_MAJOR_VERSION,
1201     ERL_DRV_EXTENDED_MINOR_VERSION,
1202     ERL_DRV_FLAG_USE_PORT_LOCKING|ERL_DRV_FLAG_SOFT_BUSY,
1203     NULL,
1204     tcp_inet_process_exit,
1205     inet_stop_select,
1206     inet_emergency_close
1207 };
1208 
1209 
1210 
1211 #ifdef HAVE_UDP
1212 static int        packet_inet_init(void);
1213 static void       packet_inet_stop(ErlDrvData);
1214 static void       packet_inet_command(ErlDrvData, char*, ErlDrvSizeT);
1215 static void       packet_inet_drv_input(ErlDrvData data, ErlDrvEvent event);
1216 static ErlDrvData udp_inet_start(ErlDrvPort, char* command);
1217 #ifdef HAVE_SCTP
1218 static ErlDrvData sctp_inet_start(ErlDrvPort, char* command);
1219 #endif
1220 static ErlDrvSSizeT packet_inet_ctl(ErlDrvData, unsigned int, char*,
1221 				    ErlDrvSizeT, char**, ErlDrvSizeT);
1222 static void       packet_inet_timeout(ErlDrvData);
1223 #ifdef __WIN32__
1224 static void       packet_inet_event(ErlDrvData, ErlDrvEvent);
1225 static SOCKET     make_noninheritable_handle(SOCKET s);
1226 static int        winsock_event_select(inet_descriptor *, int, int);
1227 #endif
1228 
1229 static struct erl_drv_entry udp_inet_driver_entry =
1230 {
1231     packet_inet_init,  /* inet_init will add this driver !! */
1232     udp_inet_start,
1233     packet_inet_stop,
1234     packet_inet_command,
1235 #ifdef __WIN32__
1236     packet_inet_event,
1237     NULL,
1238 #else
1239     packet_inet_drv_input,
1240     NULL,
1241 #endif
1242     "udp_inet",
1243     NULL,
1244     NULL,
1245     packet_inet_ctl,
1246     packet_inet_timeout,
1247     NULL,
1248     NULL,
1249     NULL,
1250     NULL,
1251     NULL,
1252     ERL_DRV_EXTENDED_MARKER,
1253     ERL_DRV_EXTENDED_MAJOR_VERSION,
1254     ERL_DRV_EXTENDED_MINOR_VERSION,
1255     ERL_DRV_FLAG_USE_PORT_LOCKING,
1256     NULL,
1257     NULL,
1258     inet_stop_select,
1259     inet_emergency_close
1260 };
1261 #endif
1262 
1263 #ifdef HAVE_SCTP
1264 static struct erl_drv_entry sctp_inet_driver_entry =
1265 {
1266     packet_inet_init,  /* inet_init will add this driver !! */
1267     sctp_inet_start,
1268     packet_inet_stop,
1269     packet_inet_command,
1270 #ifdef __WIN32__
1271     packet_inet_event,
1272     NULL,
1273 #else
1274     packet_inet_drv_input,
1275     NULL,
1276 #endif
1277     "sctp_inet",
1278     NULL,
1279     NULL,
1280     packet_inet_ctl,
1281     packet_inet_timeout,
1282     NULL,
1283     NULL,
1284     NULL,
1285     NULL,
1286     NULL,
1287     ERL_DRV_EXTENDED_MARKER,
1288     ERL_DRV_EXTENDED_MAJOR_VERSION,
1289     ERL_DRV_EXTENDED_MINOR_VERSION,
1290     ERL_DRV_FLAG_USE_PORT_LOCKING,
1291     NULL,
1292     NULL, /* process_exit */
1293     inet_stop_select,
1294     inet_emergency_close
1295 };
1296 #endif
1297 
1298 struct _tcp_descriptor {
1299     inet_descriptor inet;       /* common data structure (DON'T MOVE) */
1300     int   high;                 /* high watermark */
1301     int   low;                  /* low watermark */
1302     int   send_timeout;         /* timeout to use in send */
1303     int   send_timeout_close;   /* auto-close socket on send_timeout */
1304     int   busy_on_send;         /* busy on send with timeout! */
1305     int   i_bufsz;              /* current input buffer size (<= bufsz) */
1306     ErlDrvBinary* i_buf;        /* current binary buffer */
1307     char*         i_ptr;        /* current pos in buf */
1308     char*         i_ptr_start;  /* packet start pos in buf */
1309     int           i_remain;     /* remaining chars to read */
1310     int           tcp_add_flags;/* Additional TCP descriptor flags */
1311     int           http_state;   /* 0 = response|request  1=headers fields */
1312     inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */
1313     inet_async_multi_op *multi_last;
1314     MultiTimerData *mtd;       /* Timer structures for multiple accept */
1315     MultiTimerData *mtd_cache; /* A cache for timer allocations */
1316 #ifdef HAVE_SENDFILE
1317     struct {
1318         ErlDrvSizeT ioq_skip;   /* The number of bytes in the queue at the time
1319                                  * sendfile was issued, which must be sent
1320                                  * before issuing the sendfile call itself. */
1321         int dup_file_fd;        /* The file handle to send from; this is
1322                                  * duplicated when sendfile is issued to
1323                                  * reduce (but not eliminate) the impact of a
1324                                  * nasty race, so we have to remember to close
1325                                  * it. */
1326         Uint64 bytes_sent;
1327         Uint64 offset;
1328         Uint64 length;
1329     } sendfile;
1330 #endif
1331 };
1332 
1333 /* send function */
1334 static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len);
1335 static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev);
1336 static int tcp_recv(tcp_descriptor* desc, int request_len);
1337 static int tcp_deliver(tcp_descriptor* desc, int len);
1338 
1339 static int tcp_shutdown_error(tcp_descriptor* desc, int err);
1340 
1341 #ifdef HAVE_SENDFILE
1342 static int tcp_inet_sendfile(tcp_descriptor* desc);
1343 static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error);
1344 #endif
1345 
1346 static int tcp_inet_output(tcp_descriptor* desc, HANDLE event);
1347 static int tcp_inet_input(tcp_descriptor* desc, HANDLE event);
1348 
1349 static void tcp_desc_close(tcp_descriptor*);
1350 
1351 #ifdef HAVE_UDP
1352 typedef struct {
1353     inet_descriptor inet;   /* common data structure (DON'T MOVE) */
1354     int read_packets;       /* Number of packets to read per invocation */
1355     int i_bufsz;            /* current input buffer size */
1356     ErlDrvBinary* i_buf;    /* current binary buffer */
1357     char* i_ptr;            /* current pos in buf */
1358 } udp_descriptor;
1359 
1360 
1361 static int packet_inet_input(udp_descriptor* udesc, HANDLE event);
1362 #endif
1363 
1364 /* convert descriptor pointer to inet_descriptor pointer */
1365 #define INETP(d) (&(d)->inet)
1366 
1367 #define NEW_ASYNC_ID(desc) ((desc)->op_ref++ & 0xffff)
1368 
1369 /* check for transition from active to passive */
1370 #define INET_CHECK_ACTIVE_TO_PASSIVE(inet)                              \
1371     do {                                                                \
1372         if ((inet)->active == INET_ONCE)                                \
1373             (inet)->active = INET_PASSIVE;                              \
1374         else if ((inet)->active == INET_MULTI && --((inet)->active_count) == 0) { \
1375             (inet)->active = INET_PASSIVE;                              \
1376             packet_passive_message(inet);                               \
1377         }                                                               \
1378     } while (0)
1379 
1380 static ErlDrvTermData am_ok;
1381 static ErlDrvTermData am_undefined;
1382 static ErlDrvTermData am_unspec;
1383 static ErlDrvTermData am_tcp;
1384 static ErlDrvTermData am_error;
1385 static ErlDrvTermData am_einval;
1386 static ErlDrvTermData am_inet_async;
1387 static ErlDrvTermData am_inet_reply;
1388 static ErlDrvTermData am_timeout;
1389 static ErlDrvTermData am_closed;
1390 static ErlDrvTermData am_tcp_passive;
1391 static ErlDrvTermData am_tcp_closed;
1392 static ErlDrvTermData am_tcp_error;
1393 static ErlDrvTermData am_empty_out_q;
1394 static ErlDrvTermData am_ssl_tls;
1395 #ifdef HAVE_UDP
1396 static ErlDrvTermData am_udp;
1397 static ErlDrvTermData am_udp_passive;
1398 static ErlDrvTermData am_udp_error;
1399 #endif
1400 #ifdef HAVE_SYS_UN_H
1401 static ErlDrvTermData am_local;
1402 #endif
1403 #ifndef __WIN32__
1404 static ErlDrvTermData am_tos;
1405 static ErlDrvTermData am_tclass;
1406 static ErlDrvTermData am_ttl;
1407 #endif
1408 #ifdef HAVE_SCTP
1409 static ErlDrvTermData am_sctp;
1410 static ErlDrvTermData am_sctp_passive;
1411 static ErlDrvTermData am_sctp_error;
1412 static ErlDrvTermData am_true;
1413 static ErlDrvTermData am_false;
1414 static ErlDrvTermData am_buffer;
1415 static ErlDrvTermData am_mode;
1416 static ErlDrvTermData am_list;
1417 static ErlDrvTermData am_binary;
1418 static ErlDrvTermData am_active;
1419 static ErlDrvTermData am_once;
1420 static ErlDrvTermData am_multi;
1421 static ErlDrvTermData am_buffer;
1422 static ErlDrvTermData am_linger;
1423 static ErlDrvTermData am_recbuf;
1424 static ErlDrvTermData am_sndbuf;
1425 static ErlDrvTermData am_reuseaddr;
1426 static ErlDrvTermData am_dontroute;
1427 static ErlDrvTermData am_priority;
1428 static ErlDrvTermData am_recvtos;
1429 static ErlDrvTermData am_recvtclass;
1430 static ErlDrvTermData am_recvttl;
1431 static ErlDrvTermData am_ipv6_v6only;
1432 static ErlDrvTermData am_netns;
1433 static ErlDrvTermData am_bind_to_device;
1434 #endif
1435 #ifdef HAVE_SENDFILE
1436 static ErlDrvTermData am_sendfile;
1437 #endif
1438 
1439 static char str_eafnosupport[] = "eafnosupport";
1440 static char str_einval[] = "einval";
1441 
1442 /* special errors for bad ports and sequences */
1443 #define EXBADPORT "exbadport"
1444 #define EXBADSEQ  "exbadseq"
1445 
1446 
1447 static int inet_init(void);
1448 static ErlDrvSSizeT ctl_reply(int, char*, ErlDrvSizeT, char**, ErlDrvSizeT);
1449 
1450 struct erl_drv_entry inet_driver_entry =
1451 {
1452     inet_init,  /* inet_init will add TCP, UDP and SCTP drivers */
1453     NULL, /* start */
1454     NULL, /* stop */
1455     NULL, /* output */
1456     NULL, /* ready_input */
1457     NULL, /* ready_output */
1458     "inet",
1459     NULL,
1460     NULL, /* handle */
1461     NULL, /* control */
1462     NULL, /* timeout */
1463     NULL, /* outputv */
1464     NULL, /* ready_async */
1465     NULL, /* flush */
1466     NULL, /* call */
1467     NULL, /* event */
1468     ERL_DRV_EXTENDED_MARKER,
1469     ERL_DRV_EXTENDED_MAJOR_VERSION,
1470     ERL_DRV_EXTENDED_MINOR_VERSION,
1471     0,
1472     NULL,
1473     NULL,
1474     NULL,
1475 };
1476 
1477 #if HAVE_IN6
1478 #  if ! defined(HAVE_IN6ADDR_ANY) || ! HAVE_IN6ADDR_ANY
1479 #    if HAVE_DECL_IN6ADDR_ANY_INIT
1480 static const struct in6_addr in6addr_any = { { IN6ADDR_ANY_INIT } };
1481 #    else
1482 static const struct in6_addr in6addr_any =
1483     { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } };
1484 #    endif /* HAVE_IN6ADDR_ANY_INIT */
1485 #  endif /* ! HAVE_DECL_IN6ADDR_ANY */
1486 
1487 #  if ! defined(HAVE_IN6ADDR_LOOPBACK) || ! HAVE_IN6ADDR_LOOPBACK
1488 #    if HAVE_DECL_IN6ADDR_LOOPBACK_INIT
1489 static const struct in6_addr in6addr_loopback =
1490     { { IN6ADDR_LOOPBACK_INIT } };
1491 #    else
1492 static const struct in6_addr in6addr_loopback =
1493     { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } };
1494 #    endif /* HAVE_IN6ADDR_LOOPBACk_INIT */
1495 #  endif /* ! HAVE_DECL_IN6ADDR_LOOPBACK */
1496 #endif /* HAVE_IN6 */
1497 
1498 /* XXX: is this a driver interface function ??? */
1499 void erts_exit(int n, char*, ...);
1500 
1501 /*
1502  * Malloc wrapper,
1503  * we would like to change the behaviour for different
1504  * systems here.
1505  */
1506 
1507 #ifdef FATAL_MALLOC
1508 
alloc_wrapper(ErlDrvSizeT size)1509 static void *alloc_wrapper(ErlDrvSizeT size){
1510     void *ret = driver_alloc(size);
1511     if(ret == NULL)
1512 	erts_exit(ERTS_ERROR_EXIT,"Out of virtual memory in malloc (%s)", __FILE__);
1513     return ret;
1514 }
1515 #define ALLOC(X) alloc_wrapper(X)
1516 
realloc_wrapper(void * current,ErlDrvSizeT size)1517 static void *realloc_wrapper(void *current, ErlDrvSizeT size){
1518     void *ret = driver_realloc(current,size);
1519     if(ret == NULL)
1520 	erts_exit(ERTS_ERROR_EXIT,"Out of virtual memory in realloc (%s)", __FILE__);
1521     return ret;
1522 }
1523 #define REALLOC(X,Y) realloc_wrapper(X,Y)
1524 #define FREE(P) driver_free((P))
1525 #else /* FATAL_MALLOC */
1526 
1527 #define ALLOC(X) driver_alloc((X))
1528 #define REALLOC(X,Y) driver_realloc((X), (Y))
1529 #define FREE(P) driver_free((P))
1530 
1531 #endif /* FATAL_MALLOC */
1532 
1533 #define INIT_ATOM(NAME) am_ ## NAME = driver_mk_atom(#NAME)
1534 
1535 #define LOAD_ATOM_CNT 2
1536 #define LOAD_ATOM(vec, i, atom) \
1537   (((vec)[(i)] = ERL_DRV_ATOM), \
1538   ((vec)[(i)+1] = (atom)), \
1539   ((i)+LOAD_ATOM_CNT))
1540 
1541 #define LOAD_INT_CNT 2
1542 #define LOAD_INT(vec, i, val) \
1543   (((vec)[(i)] = ERL_DRV_INT), \
1544   ((vec)[(i)+1] = (ErlDrvTermData)(val)), \
1545   ((i)+LOAD_INT_CNT))
1546 
1547 #define LOAD_UINT_CNT 2
1548 #define LOAD_UINT(vec, i, val) \
1549   (((vec)[(i)] = ERL_DRV_UINT), \
1550   ((vec)[(i)+1] = (ErlDrvTermData)(val)), \
1551   ((i)+LOAD_UINT_CNT))
1552 
1553 #define LOAD_PORT_CNT 2
1554 #define LOAD_PORT(vec, i, port) \
1555   (((vec)[(i)] = ERL_DRV_PORT), \
1556   ((vec)[(i)+1] = (port)), \
1557   ((i)+LOAD_PORT_CNT))
1558 
1559 #define LOAD_PID_CNT 2
1560 #define LOAD_PID(vec, i, pid) \
1561   (((vec)[(i)] = ERL_DRV_PID), \
1562   ((vec)[(i)+1] = (pid)), \
1563   ((i)+LOAD_PID_CNT))
1564 
1565 #define LOAD_BINARY_CNT 4
1566 #define LOAD_BINARY(vec, i, bin, offs, len) \
1567   (((vec)[(i)] = ERL_DRV_BINARY), \
1568   ((vec)[(i)+1] = (ErlDrvTermData)(bin)), \
1569   ((vec)[(i)+2] = (len)), \
1570   ((vec)[(i)+3] = (offs)), \
1571   ((i)+LOAD_BINARY_CNT))
1572 
1573 #define LOAD_BUF2BINARY_CNT 3
1574 #define LOAD_BUF2BINARY(vec, i, buf, len) \
1575   (((vec)[(i)] = ERL_DRV_BUF2BINARY), \
1576   ((vec)[(i)+1] = (ErlDrvTermData)(buf)), \
1577   ((vec)[(i)+2] = (len)), \
1578   ((i)+LOAD_BUF2BINARY_CNT))
1579 
1580 #define LOAD_STRING_CNT 3
1581 #define LOAD_STRING(vec, i, str, len) \
1582   (((vec)[(i)] = ERL_DRV_STRING), \
1583   ((vec)[(i)+1] = (ErlDrvTermData)(str)), \
1584   ((vec)[(i)+2] = (len)), \
1585   ((i)+LOAD_STRING_CNT))
1586 
1587 #define LOAD_STRING_CONS_CNT 3
1588 #define LOAD_STRING_CONS(vec, i, str, len) \
1589   (((vec)[(i)] = ERL_DRV_STRING_CONS), \
1590   ((vec)[(i)+1] = (ErlDrvTermData)(str)), \
1591   ((vec)[(i)+2] = (len)), \
1592   ((i)+LOAD_STRING_CONS_CNT))
1593 
1594 #define LOAD_TUPLE_CNT 2
1595 #define LOAD_TUPLE(vec, i, size) \
1596   (((vec)[(i)] = ERL_DRV_TUPLE), \
1597   ((vec)[(i)+1] = (size)), \
1598   ((i)+LOAD_TUPLE_CNT))
1599 
1600 #define LOAD_NIL_CNT 1
1601 #define LOAD_NIL(vec, i) \
1602   (((vec)[(i)] = ERL_DRV_NIL), \
1603   ((i)+LOAD_NIL_CNT))
1604 
1605 #define LOAD_LIST_CNT 2
1606 #define LOAD_LIST(vec, i, size) \
1607   (((vec)[(i)] = ERL_DRV_LIST), \
1608   ((vec)[(i)+1] = (size)), \
1609   ((i)+LOAD_LIST_CNT))
1610 
1611 
1612 #ifdef HAVE_SCTP
1613     /* "IS_SCTP": tells the difference between a UDP and an SCTP socket: */
1614 #   define IS_SCTP(desc)((desc)->sprotocol==IPPROTO_SCTP)
1615 
1616     /* For AssocID, 4 bytes should be enough -- checked by "init": */
1617 #   define GET_ASSOC_ID		get_int32
1618 #   define ASSOC_ID_LEN		4
1619 #   define LOAD_ASSOC_ID        LOAD_UINT
1620 #   define LOAD_ASSOC_ID_CNT    LOAD_UINT_CNT
1621 #else
1622 #   define IS_SCTP(desc) 0
1623 #endif
1624 #   define ANC_BUFF_SIZE   INET_DEF_BUFFER/2 /* XXX: not very good... */
1625 
1626 #ifdef HAVE_UDP
load_address(ErlDrvTermData * spec,int i,char * buf)1627 static int load_address(ErlDrvTermData* spec, int i, char* buf)
1628 {
1629     int n;
1630     switch (*buf++) { /* Family */
1631     case INET_AF_INET: {
1632         for (n = 2;  n < 2+4;  n++) {
1633 	    spec[i++] = ERL_DRV_INT;
1634 	    spec[i++] = (ErlDrvTermData) ((unsigned char)buf[n]);
1635 	}
1636 	spec[i++] = ERL_DRV_TUPLE;
1637 	spec[i++] = 4;
1638 	spec[i++] = ERL_DRV_INT;
1639 	spec[i++] = (ErlDrvTermData) get_int16(buf);
1640 	break;
1641     }
1642 #if defined(HAVE_IN6) && defined(AF_INET6)
1643     case INET_AF_INET6: {
1644 	for (n = 2;  n < 2+16;  n += 2) {
1645 	    spec[i++] = ERL_DRV_INT;
1646 	    spec[i++] = (ErlDrvTermData) get_int16(buf+n);
1647 	}
1648 	spec[i++] = ERL_DRV_TUPLE;
1649 	spec[i++] = 8;
1650 	spec[i++] = ERL_DRV_INT;
1651 	spec[i++] = (ErlDrvTermData) get_int16(buf);
1652 	break;
1653     }
1654 #endif
1655 #ifdef HAVE_SYS_UN_H
1656     case INET_AF_LOCAL: {
1657 	int len = *(unsigned char*)buf++;
1658 	i = LOAD_ATOM(spec, i, am_local);
1659 	i = LOAD_BUF2BINARY(spec, i, buf, len);
1660 	spec[i++] = ERL_DRV_TUPLE;
1661 	spec[i++] = 2;
1662 	spec[i++] = ERL_DRV_INT;
1663 	spec[i++] = 0;
1664 	break;
1665     }
1666 #endif
1667     case INET_AF_UNSPEC: {
1668         i = LOAD_ATOM(spec, i, am_unspec);
1669 	i = LOAD_BUF2BINARY(spec, i, buf, 0);
1670 	spec[i++] = ERL_DRV_TUPLE;
1671 	spec[i++] = 2;
1672 	spec[i++] = ERL_DRV_INT;
1673 	spec[i++] = 0;
1674 	break;
1675     }
1676     default: { /* INET_AF_UNDEFINED */
1677         i = LOAD_ATOM(spec, i, am_undefined);
1678 	i = LOAD_BUF2BINARY(spec, i, buf, 0);
1679 	spec[i++] = ERL_DRV_TUPLE;
1680 	spec[i++] = 2;
1681 	spec[i++] = ERL_DRV_INT;
1682 	spec[i++] = 0;
1683 	break;
1684     }
1685     }
1686     return i;
1687  }
1688 #endif
1689 
1690 
1691 #ifdef HAVE_SCTP
1692 /* For SCTP, we often need to return {IP, Port} tuples: */
1693 static int inet_get_address(char* dst, inet_address* src, unsigned int* len);
1694 
1695 /* Max of {{int()*8},int()} | {{int()*4},int()} |
1696  *        {{'local',binary()},int()}
1697  */
1698 #define LOAD_INET_GET_ADDRESS_CNT					\
1699         (8*LOAD_INT_CNT + LOAD_TUPLE_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT)
1700 
load_inet_get_address(ErlDrvTermData * spec,int i,inet_descriptor * desc,struct sockaddr_storage * addr)1701 static int load_inet_get_address
1702            (ErlDrvTermData* spec,    int i, inet_descriptor* desc,
1703 	    struct sockaddr_storage* addr)
1704 {
1705     /* The size of the buffer  used to stringify the addr  is the same as
1706        that of "sockaddr_storage" itself: only their layout is different:
1707     */
1708     unsigned int len  = sizeof(struct sockaddr_storage);
1709     unsigned int alen = len;
1710     char         abuf  [len];
1711     int res = inet_get_address(abuf, (inet_address*) addr, &alen);
1712     ASSERT(res==0); (void)res;
1713 
1714     /* Now "abuf" contains: Family(1b), Port(2b), IP(4|16b) */
1715 
1716     /* NB: the following functions are safe to use, as they create tuples
1717        of copied Ints on the "spec", and do not install any String pts --
1718        a ptr to "abuf" would be dangling upon exiting this function:   */
1719     i = load_address(spec, i, abuf);  /* IP,Port | Family,Addr */
1720     i = LOAD_TUPLE     (spec, i, 2);
1721     return i;
1722 }
1723 
1724 /* Loading Boolean flags as Atoms: */
1725 #define LOAD_BOOL_CNT LOAD_ATOM_CNT
1726 #define LOAD_BOOL(spec,   i,   flag)                          \
1727 	LOAD_ATOM((spec), (i), (flag) ? am_true : am_false);
1728 #endif /* HAVE_SCTP */
1729 
1730 /* Assume a cache line size of 64 bytes */
1731 #define INET_DRV_CACHE_LINE_SIZE ((ErlDrvUInt) 64)
1732 #define INET_DRV_CACHE_LINE_MASK (INET_DRV_CACHE_LINE_SIZE - 1)
1733 
1734 /*
1735 ** Binary Buffer Managment
1736 ** We keep a stack of usable buffers
1737 */
1738 #define BUFFER_STACK_SIZE 14
1739 #define BUFFER_STACK_MAX_MEM_SIZE (1024*1024)
1740 
1741 ErlDrvTSDKey buffer_stack_key;
1742 
1743 typedef struct {
1744     int mem_size;
1745     int pos;
1746     ErlDrvBinary* stk[BUFFER_STACK_SIZE];
1747 } InetDrvBufStkBase;
1748 
1749 typedef struct {
1750     InetDrvBufStkBase buf;
1751     char align[(((sizeof(InetDrvBufStkBase) - 1) / INET_DRV_CACHE_LINE_SIZE) + 1)
1752 	       * INET_DRV_CACHE_LINE_SIZE];
1753 } InetDrvBufStk;
1754 
get_bufstk(void)1755 static InetDrvBufStk *get_bufstk(void)
1756 {
1757     InetDrvBufStk *bs = erl_drv_tsd_get(buffer_stack_key);
1758     if (bs)
1759 	return bs;
1760     bs = driver_alloc(sizeof(InetDrvBufStk)
1761 		      + INET_DRV_CACHE_LINE_SIZE - 1);
1762     if (!bs)
1763 	return NULL;
1764     if ((((ErlDrvUInt) bs) & INET_DRV_CACHE_LINE_MASK) != 0)
1765 	bs = ((InetDrvBufStk *)
1766 	      ((((ErlDrvUInt) bs) & ~INET_DRV_CACHE_LINE_MASK)
1767 	       + INET_DRV_CACHE_LINE_SIZE));
1768     erl_drv_tsd_set(buffer_stack_key, bs);
1769     bs->buf.pos = 0;
1770     bs->buf.mem_size = 0;
1771 
1772     ASSERT(bs == erl_drv_tsd_get(buffer_stack_key));
1773 
1774     return bs;
1775 }
1776 
alloc_buffer(ErlDrvSizeT minsz)1777 static ErlDrvBinary* alloc_buffer(ErlDrvSizeT minsz)
1778 {
1779     InetDrvBufStk *bs = get_bufstk();
1780 
1781     DEBUGF(("alloc_buffer: "LLU"\r\n", (llu_t)minsz));
1782 
1783     if (bs && bs->buf.pos > 0) {
1784 	long size;
1785 	ErlDrvBinary* buf = bs->buf.stk[--bs->buf.pos];
1786 	size = buf->orig_size;
1787 	bs->buf.mem_size -= size;
1788 	ASSERT(0 <= bs->buf.mem_size
1789 	       && bs->buf.mem_size <= BUFFER_STACK_MAX_MEM_SIZE);
1790 	if (size >= minsz)
1791 	    return buf;
1792 
1793 	driver_free_binary(buf);
1794     }
1795 
1796     ASSERT(!bs || bs->buf.pos != 0 || bs->buf.mem_size == 0);
1797 
1798     return driver_alloc_binary(minsz);
1799 }
1800 
1801 /*#define CHECK_DOUBLE_RELEASE 1*/
1802 #ifdef CHECK_DOUBLE_RELEASE
1803 static void
check_double_release(InetDrvBufStk * bs,ErlDrvBinary * buf)1804 check_double_release(InetDrvBufStk *bs, ErlDrvBinary* buf)
1805 {
1806 #ifdef __GNUC__
1807 #warning CHECK_DOUBLE_RELEASE is enabled, this is a custom build emulator
1808 #endif
1809     int i;
1810     for (i = 0; i < bs->buf.pos; ++i) {
1811 	if (bs->buf.stk[i] == buf) {
1812 	    erts_exit(ERTS_ABORT_EXIT,
1813 		     "Multiple buffer release in inet_drv, this "
1814 		     "is a bug, save the core and send it to "
1815 		     "support@erlang.ericsson.se!");
1816 	}
1817     }
1818 }
1819 #endif
1820 
release_buffer(ErlDrvBinary * buf)1821 static void release_buffer(ErlDrvBinary* buf)
1822 {
1823     InetDrvBufStk *bs;
1824     long size;
1825 
1826     DEBUGF(("release_buffer: %ld\r\n", (buf==NULL) ? 0 : buf->orig_size));
1827 
1828     if (!buf)
1829 	return;
1830 
1831     size = buf->orig_size;
1832 
1833     if (size > BUFFER_STACK_MAX_MEM_SIZE)
1834 	goto free_binary;
1835 
1836     bs = get_bufstk();
1837     if (!bs
1838 	|| (bs->buf.mem_size + size > BUFFER_STACK_MAX_MEM_SIZE)
1839 	|| (bs->buf.pos >= BUFFER_STACK_SIZE)) {
1840     free_binary:
1841 	driver_free_binary(buf);
1842     }
1843     else {
1844 #ifdef CHECK_DOUBLE_RELEASE
1845 	check_double_release(bs, buf);
1846 #endif
1847 	ASSERT(bs->buf.pos != 0 || bs->buf.mem_size == 0);
1848 
1849 	bs->buf.mem_size += size;
1850 	bs->buf.stk[bs->buf.pos++] = buf;
1851 
1852 	ASSERT(0 <= bs->buf.mem_size
1853 	       && bs->buf.mem_size <= BUFFER_STACK_MAX_MEM_SIZE);
1854     }
1855 }
1856 
1857 #ifdef HAVE_UDP
realloc_buffer(ErlDrvBinary * buf,ErlDrvSizeT newsz)1858 static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, ErlDrvSizeT newsz)
1859 {
1860     DEBUGF(("realloc_buffer: %ld -> %ld\r\n", (buf==NULL) ? 0 : buf->orig_size, newsz));
1861     return driver_realloc_binary(buf, newsz);
1862 }
1863 #endif
1864 
1865 /* use a TRICK, access the refc field to see if any one else has
1866  * a ref to this buffer then call driver_free_binary else
1867  * release_buffer instead
1868  */
free_buffer(ErlDrvBinary * buf)1869 static void free_buffer(ErlDrvBinary* buf)
1870 {
1871     DEBUGF(("free_buffer: %ld\r\n", (buf==NULL) ? 0 : buf->orig_size));
1872 
1873     if (buf != NULL) {
1874 	if (driver_binary_get_refc(buf) == 1)
1875 	    release_buffer(buf);
1876 	else
1877 	    driver_free_binary(buf);
1878     }
1879 }
1880 
1881 
1882 #ifdef __WIN32__
1883 
dummy_start(ErlDrvPort port,char * command)1884 static ErlDrvData dummy_start(ErlDrvPort port, char* command)
1885 {
1886     return (ErlDrvData)port;
1887 }
1888 
dummy_ctl(ErlDrvData data,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)1889 static ErlDrvSSizeT dummy_ctl(ErlDrvData data, unsigned int cmd,
1890 			      char* buf, ErlDrvSizeT len, char** rbuf,
1891 			      ErlDrvSizeT rsize)
1892 {
1893     static char error[] = "no_winsock2";
1894 
1895     driver_failure_atom((ErlDrvPort)data, error);
1896     return ctl_reply(INET_REP_ERROR, error, sizeof(error), rbuf, rsize);
1897 }
1898 
dummy_command(ErlDrvData data,char * buf,ErlDrvSizeT len)1899 static void dummy_command(ErlDrvData data, char* buf, ErlDrvSizeT len)
1900 {
1901 }
1902 
1903 static struct erl_drv_entry dummy_tcp_driver_entry =
1904 {
1905     NULL,			/* init */
1906     dummy_start,		/* start */
1907     NULL,			/* stop */
1908     dummy_command,		/* command */
1909     NULL,			/* input */
1910     NULL,			/* output */
1911     "tcp_inet",			/* name */
1912     NULL,
1913     NULL,
1914     dummy_ctl,
1915     NULL,
1916     NULL
1917 };
1918 
1919 static struct erl_drv_entry dummy_udp_driver_entry =
1920 {
1921     NULL,			/* init */
1922     dummy_start,		/* start */
1923     NULL,			/* stop */
1924     dummy_command,		/* command */
1925     NULL,			/* input */
1926     NULL,			/* output */
1927     "udp_inet",			/* name */
1928     NULL,
1929     NULL,
1930     dummy_ctl,
1931     NULL,
1932     NULL
1933 };
1934 
1935 #ifdef HAVE_SCTP
1936 static struct erl_drv_entry dummy_sctp_driver_entry =
1937 {				/* Though there is no SCTP for Win32 yet... */
1938     NULL,			/* init */
1939     dummy_start,		/* start */
1940     NULL,			/* stop */
1941     dummy_command,		/* command */
1942     NULL,			/* input */
1943     NULL,			/* output */
1944     "sctp_inet",		/* name */
1945     NULL,
1946     NULL,
1947     dummy_ctl,
1948     NULL,
1949     NULL
1950 };
1951 #endif
1952 
1953 #endif
1954 
1955 /* return lowercase string form of errno value */
errno_str(int err)1956 static char *errno_str(int err)
1957 {
1958     switch (err) {
1959     case INET_ERRNO_SYSTEM_LIMIT:
1960 	return "system_limit";
1961     default:
1962 	return erl_errno_id(err);
1963     }
1964 }
1965 
1966 /* general control reply function */
ctl_reply(int rep,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)1967 static ErlDrvSSizeT ctl_reply(int rep, char* buf, ErlDrvSizeT len,
1968 			      char** rbuf, ErlDrvSizeT rsize)
1969 {
1970     char* ptr;
1971 
1972     if ((len+1) > rsize) {
1973 	ptr = ALLOC(len+1);
1974 	*rbuf = ptr;
1975     }
1976     else
1977 	ptr = *rbuf;
1978     *ptr++ = rep;
1979     memcpy(ptr, buf, len);
1980     return len+1;
1981 }
1982 
1983 /* general control error reply function */
ctl_error(int err,char ** rbuf,ErlDrvSizeT rsize)1984 static ErlDrvSSizeT ctl_error(int err, char** rbuf, ErlDrvSizeT rsize)
1985 {
1986     char* s = errno_str(err);
1987 
1988     return ctl_reply(INET_REP_ERROR, s, strlen(s), rbuf, rsize);
1989 }
1990 
ctl_xerror(char * xerr,char ** rbuf,ErlDrvSizeT rsize)1991 static ErlDrvSSizeT ctl_xerror(char* xerr, char** rbuf, ErlDrvSizeT rsize)
1992 {
1993     int n = strlen(xerr);
1994     return ctl_reply(INET_REP_ERROR, xerr, n, rbuf, rsize);
1995 }
1996 
1997 
error_atom(int err)1998 static ErlDrvTermData error_atom(int err)
1999 {
2000     return driver_mk_atom(errno_str(err));
2001 }
2002 
2003 
enq_old_multi_op(tcp_descriptor * desc,int id,int req,ErlDrvTermData caller,MultiTimerData * timeout,ErlDrvMonitor * monitorp)2004 static void enq_old_multi_op(tcp_descriptor *desc, int id, int req,
2005 			     ErlDrvTermData caller, MultiTimerData *timeout,
2006 			     ErlDrvMonitor *monitorp)
2007 {
2008     inet_async_multi_op *opp;
2009 
2010     opp = ALLOC(sizeof(inet_async_multi_op));
2011 
2012     opp->op.id = id;
2013     opp->op.caller = caller;
2014     opp->op.req = req;
2015     opp->op.tmo.mtd = timeout;
2016     memcpy(&(opp->op.monitor), monitorp, sizeof(ErlDrvMonitor));
2017     opp->next = NULL;
2018 
2019     if (desc->multi_first == NULL) {
2020 	desc->multi_first = opp;
2021     } else {
2022 	desc->multi_last->next = opp;
2023     }
2024     desc->multi_last = opp;
2025 }
2026 
enq_multi_op(tcp_descriptor * desc,char * buf,int req,ErlDrvTermData caller,MultiTimerData * timeout,ErlDrvMonitor * monitorp)2027 static void enq_multi_op(tcp_descriptor *desc, char *buf, int req,
2028 			 ErlDrvTermData caller, MultiTimerData *timeout,
2029 			 ErlDrvMonitor *monitorp)
2030 {
2031     int id = NEW_ASYNC_ID(INETP(desc));
2032     enq_old_multi_op(desc,id,req,caller,timeout,monitorp);
2033     if (buf != NULL)
2034 	put_int16(id, buf);
2035 }
2036 
deq_multi_op(tcp_descriptor * desc,int * id_p,int * req_p,ErlDrvTermData * caller_p,MultiTimerData ** timeout_p,ErlDrvMonitor * monitorp)2037 static int deq_multi_op(tcp_descriptor *desc, int *id_p, int *req_p,
2038 			ErlDrvTermData *caller_p, MultiTimerData **timeout_p,
2039 			ErlDrvMonitor *monitorp)
2040 {
2041     inet_async_multi_op *opp;
2042     opp = desc->multi_first;
2043     if (!opp) {
2044 	return -1;
2045     }
2046     desc->multi_first = opp->next;
2047     if (desc->multi_first == NULL) {
2048 	desc->multi_last = NULL;
2049     }
2050     *id_p = opp->op.id;
2051     *req_p = opp->op.req;
2052     *caller_p = opp->op.caller;
2053     if (timeout_p != NULL) {
2054 	*timeout_p = opp->op.tmo.mtd;
2055     }
2056     if (monitorp != NULL) {
2057 	memcpy(monitorp,&(opp->op.monitor),sizeof(ErlDrvMonitor));
2058     }
2059     FREE(opp);
2060     return 0;
2061 }
2062 
remove_multi_op(tcp_descriptor * desc,int * id_p,int * req_p,ErlDrvTermData caller,MultiTimerData ** timeout_p,ErlDrvMonitor * monitorp)2063 static int remove_multi_op(tcp_descriptor *desc, int *id_p, int *req_p,
2064 			   ErlDrvTermData caller, MultiTimerData **timeout_p,
2065 			   ErlDrvMonitor *monitorp)
2066 {
2067     inet_async_multi_op *opp, *slap;
2068     for (opp = desc->multi_first, slap = NULL;
2069 	 opp != NULL && opp->op.caller != caller;
2070 	 slap = opp, opp = opp->next)
2071 	;
2072     if (!opp) {
2073 	return -1;
2074     }
2075     if (slap == NULL) {
2076 	desc->multi_first = opp->next;
2077     } else {
2078 	slap->next = opp->next;
2079     }
2080     if (desc->multi_last == opp) {
2081 	desc->multi_last = slap;
2082     }
2083     *id_p = opp->op.id;
2084     *req_p = opp->op.req;
2085     if (timeout_p != NULL) {
2086 	*timeout_p = opp->op.tmo.mtd;
2087     }
2088     if (monitorp != NULL) {
2089 	memcpy(monitorp,&(opp->op.monitor),sizeof(ErlDrvMonitor));
2090     }
2091     FREE(opp);
2092     return 0;
2093 }
2094 
2095 /* setup a new async id + caller (format async_id into buf) */
2096 
enq_async_w_tmo(inet_descriptor * desc,char * buf,int req,unsigned timeout,ErlDrvMonitor * monitorp)2097 static int enq_async_w_tmo(inet_descriptor* desc, char* buf, int req, unsigned timeout,
2098 			   ErlDrvMonitor *monitorp)
2099 {
2100     int id = NEW_ASYNC_ID(desc);
2101     inet_async_op* opp;
2102 
2103     if ((opp = desc->oph) == NULL)            /* queue empty */
2104 	opp = desc->oph = desc->opt = desc->op_queue;
2105     else if (desc->oph == desc->opt) { /* queue full */
2106 	DEBUGF(("enq(%ld): queue full\r\n", (long)desc->port));
2107 	return -1;
2108     }
2109 
2110     opp->id = id;
2111     opp->caller = driver_caller(desc->port);
2112     opp->req = req;
2113     opp->tmo.value = timeout;
2114     if (monitorp != NULL) {
2115 	memcpy(&(opp->monitor),monitorp,sizeof(ErlDrvMonitor));
2116     }
2117 
2118     DEBUGF(("enq(%ld): %d %ld %d\r\n",
2119 	    (long) desc->port, opp->id, opp->caller, opp->req));
2120 
2121     opp++;
2122     if (opp >= desc->op_queue + INET_MAX_ASYNC)
2123 	desc->oph = desc->op_queue;
2124     else
2125 	desc->oph = opp;
2126 
2127     if (buf != NULL)
2128 	put_int16(id, buf);
2129     return 0;
2130 }
2131 
enq_async(inet_descriptor * desc,char * buf,int req)2132 static int enq_async(inet_descriptor* desc, char* buf, int req)
2133 {
2134     return enq_async_w_tmo(desc,buf,req,INET_INFINITY, NULL);
2135 }
2136 
deq_async_w_tmo(inet_descriptor * desc,int * ap,ErlDrvTermData * cp,int * rp,unsigned * tp,ErlDrvMonitor * monitorp)2137 static int deq_async_w_tmo(inet_descriptor* desc, int* ap, ErlDrvTermData* cp,
2138 			   int* rp, unsigned *tp, ErlDrvMonitor *monitorp)
2139 {
2140     inet_async_op* opp;
2141 
2142     if ((opp = desc->opt) == NULL) {  /* queue empty */
2143 	DEBUGF(("deq(%ld): queue empty\r\n", (long)desc->port));
2144 	return -1;
2145     }
2146     *ap = opp->id;
2147     *cp = opp->caller;
2148     *rp = opp->req;
2149     if (tp != NULL) {
2150 	*tp = opp->tmo.value;
2151     }
2152     if (monitorp != NULL) {
2153 	memcpy(monitorp,&(opp->monitor),sizeof(ErlDrvMonitor));
2154     }
2155 
2156     DEBUGF(("deq(%ld): %d %ld %d\r\n",
2157 	    (long)desc->port, opp->id, opp->caller, opp->req));
2158 
2159     opp++;
2160     if (opp >= desc->op_queue + INET_MAX_ASYNC)
2161 	desc->opt = desc->op_queue;
2162     else
2163 	desc->opt = opp;
2164 
2165     if (desc->opt == desc->oph)
2166 	desc->opt = desc->oph = NULL;
2167     return 0;
2168 }
2169 
deq_async(inet_descriptor * desc,int * ap,ErlDrvTermData * cp,int * rp)2170 static int deq_async(inet_descriptor* desc, int* ap, ErlDrvTermData* cp, int* rp)
2171 {
2172     return deq_async_w_tmo(desc,ap,cp,rp,NULL,NULL);
2173 }
2174 /* send message:
2175 **     {inet_async, Port, Ref, ok}
2176 */
2177 static int
send_async_ok(ErlDrvTermData Port,int Ref,ErlDrvTermData recipient)2178 send_async_ok(ErlDrvTermData Port, int Ref,ErlDrvTermData recipient)
2179 {
2180     ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PORT_CNT +
2181 			LOAD_INT_CNT + LOAD_TUPLE_CNT];
2182     int i = 0;
2183 
2184     i = LOAD_ATOM(spec, i, am_inet_async);
2185     i = LOAD_PORT(spec, i, Port);
2186     i = LOAD_INT(spec, i, Ref);
2187     i = LOAD_ATOM(spec, i, am_ok);
2188     i = LOAD_TUPLE(spec, i, 4);
2189 
2190     ASSERT(i == sizeof(spec)/sizeof(*spec));
2191 
2192     return erl_drv_send_term(Port, recipient, spec, i);
2193 }
2194 
2195 /* send message:
2196 **     {inet_async, Port, Ref, {ok,Port2}}
2197 */
2198 static int
send_async_ok_port(ErlDrvTermData Port,int Ref,ErlDrvTermData recipient,ErlDrvTermData Port2)2199 send_async_ok_port(ErlDrvTermData Port, int Ref,
2200 		   ErlDrvTermData recipient, ErlDrvTermData Port2)
2201 {
2202     ErlDrvTermData spec[2*LOAD_ATOM_CNT + 2*LOAD_PORT_CNT +
2203 			LOAD_INT_CNT + 2*LOAD_TUPLE_CNT];
2204     int i = 0;
2205 
2206     i = LOAD_ATOM(spec, i, am_inet_async);
2207     i = LOAD_PORT(spec, i, Port);
2208     i = LOAD_INT(spec, i, Ref);
2209     {
2210 	i = LOAD_ATOM(spec, i, am_ok);
2211 	i = LOAD_PORT(spec, i, Port2);
2212 	i = LOAD_TUPLE(spec, i, 2);
2213     }
2214     i = LOAD_TUPLE(spec, i, 4);
2215 
2216     ASSERT(i == sizeof(spec)/sizeof(*spec));
2217 
2218     return erl_drv_send_term(Port, recipient, spec, i);
2219 }
2220 
2221 /* send message:
2222 **      {inet_async, Port, Ref, {error,Reason}}
2223 */
2224 static int
send_async_error(ErlDrvTermData Port,int Ref,ErlDrvTermData recipient,ErlDrvTermData Reason)2225 send_async_error(ErlDrvTermData Port, int Ref,
2226 		 ErlDrvTermData recipient, ErlDrvTermData Reason)
2227 {
2228     ErlDrvTermData spec[3*LOAD_ATOM_CNT + LOAD_PORT_CNT +
2229 			LOAD_INT_CNT + 2*LOAD_TUPLE_CNT];
2230     int i = 0;
2231 
2232     i = LOAD_ATOM(spec, i, am_inet_async);
2233     i = LOAD_PORT(spec, i, Port);
2234     i = LOAD_INT(spec, i, Ref);
2235     {
2236 	i = LOAD_ATOM(spec, i, am_error);
2237 	i = LOAD_ATOM(spec, i, Reason);
2238 	i = LOAD_TUPLE(spec, i, 2);
2239     }
2240     i = LOAD_TUPLE(spec, i, 4);
2241     ASSERT(i == sizeof(spec)/sizeof(*spec));
2242     DEBUGF(("send_async_error %ld %ld\r\n", recipient, Reason));
2243     return erl_drv_send_term(Port, recipient, spec, i);
2244 }
2245 
2246 
async_ok(inet_descriptor * desc)2247 static int async_ok(inet_descriptor* desc)
2248 {
2249     int req;
2250     int aid;
2251     ErlDrvTermData caller;
2252 
2253     if (deq_async(desc, &aid, &caller, &req) < 0)
2254 	return -1;
2255     return send_async_ok(desc->dport, aid, caller);
2256 }
2257 
async_ok_port(inet_descriptor * desc,ErlDrvTermData Port2)2258 static int async_ok_port(inet_descriptor* desc, ErlDrvTermData Port2)
2259 {
2260     int req;
2261     int aid;
2262     ErlDrvTermData caller;
2263 
2264     if (deq_async(desc, &aid, &caller, &req) < 0)
2265 	return -1;
2266     return send_async_ok_port(desc->dport, aid, caller, Port2);
2267 }
2268 
async_error_am(inet_descriptor * desc,ErlDrvTermData reason)2269 static int async_error_am(inet_descriptor* desc, ErlDrvTermData reason)
2270 {
2271     int req;
2272     int aid;
2273     ErlDrvTermData caller;
2274 
2275     if (deq_async(desc, &aid, &caller, &req) < 0)
2276 	return -1;
2277     return send_async_error(desc->dport, aid, caller, reason);
2278 }
2279 
2280 /* dequeue all operations */
async_error_am_all(inet_descriptor * desc,ErlDrvTermData reason)2281 static int async_error_am_all(inet_descriptor* desc, ErlDrvTermData reason)
2282 {
2283     int req;
2284     int aid;
2285     ErlDrvTermData caller;
2286 
2287     while (deq_async(desc, &aid, &caller, &req) == 0) {
2288 	send_async_error(desc->dport, aid, caller, reason);
2289     }
2290     return 0;
2291 }
2292 
2293 
async_error(inet_descriptor * desc,int err)2294 static int async_error(inet_descriptor* desc, int err)
2295 {
2296     return async_error_am(desc, error_atom(err));
2297 }
2298 
2299 /* send:
2300 **   {inet_reply, S, ok}
2301 */
2302 
inet_reply_ok(inet_descriptor * desc)2303 static int inet_reply_ok(inet_descriptor* desc)
2304 {
2305     ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PORT_CNT + LOAD_TUPLE_CNT];
2306     ErlDrvTermData caller = desc->caller;
2307     int i = 0;
2308 
2309     desc->caller = 0;
2310     if (is_not_internal_pid(caller))
2311         return 0;
2312 
2313     i = LOAD_ATOM(spec, i, am_inet_reply);
2314     i = LOAD_PORT(spec, i, desc->dport);
2315     i = LOAD_ATOM(spec, i, am_ok);
2316     i = LOAD_TUPLE(spec, i, 3);
2317     ASSERT(i == sizeof(spec)/sizeof(*spec));
2318 
2319     return erl_drv_send_term(desc->dport, caller, spec, i);
2320 }
2321 
2322 #ifdef HAVE_SCTP
inet_reply_ok_port(inet_descriptor * desc,ErlDrvTermData dport)2323 static int inet_reply_ok_port(inet_descriptor* desc, ErlDrvTermData dport)
2324 {
2325     ErlDrvTermData spec[2*LOAD_ATOM_CNT + 2*LOAD_PORT_CNT + 2*LOAD_TUPLE_CNT];
2326     ErlDrvTermData caller = desc->caller;
2327     int i = 0;
2328 
2329     i = LOAD_ATOM(spec, i, am_inet_reply);
2330     i = LOAD_PORT(spec, i, desc->dport);
2331     i = LOAD_ATOM(spec, i, am_ok);
2332     i = LOAD_PORT(spec, i, dport);
2333     i = LOAD_TUPLE(spec, i, 2);
2334     i = LOAD_TUPLE(spec, i, 3);
2335     ASSERT(i == sizeof(spec)/sizeof(*spec));
2336 
2337     desc->caller = 0;
2338     return erl_drv_send_term(desc->dport, caller, spec, i);
2339 }
2340 #endif
2341 
2342 /* send:
2343 **   {inet_reply, S, {error, Reason}}
2344 */
inet_reply_error_am(inet_descriptor * desc,ErlDrvTermData reason)2345 static int inet_reply_error_am(inet_descriptor* desc, ErlDrvTermData reason)
2346 {
2347     ErlDrvTermData spec[3*LOAD_ATOM_CNT + LOAD_PORT_CNT + 2*LOAD_TUPLE_CNT];
2348     ErlDrvTermData caller = desc->caller;
2349     int i = 0;
2350 
2351     i = LOAD_ATOM(spec, i, am_inet_reply);
2352     i = LOAD_PORT(spec, i, desc->dport);
2353     i = LOAD_ATOM(spec, i, am_error);
2354     i = LOAD_ATOM(spec, i, reason);
2355     i = LOAD_TUPLE(spec, i, 2);
2356     i = LOAD_TUPLE(spec, i, 3);
2357     ASSERT(i == sizeof(spec)/sizeof(*spec));
2358     desc->caller = 0;
2359 
2360     DEBUGF(("inet_reply_error_am %ld %ld\r\n", caller, reason));
2361     return erl_drv_send_term(desc->dport, caller, spec, i);
2362 }
2363 
2364 /* send:
2365 **   {inet_reply, S, {error, Reason}}
2366 */
inet_reply_error(inet_descriptor * desc,int err)2367 static int inet_reply_error(inet_descriptor* desc, int err)
2368 {
2369     return inet_reply_error_am(desc, error_atom(err));
2370 }
2371 
2372 /*
2373 ** Deliver port data from buffer
2374 */
inet_port_data(inet_descriptor * desc,const char * buf,int len)2375 static int inet_port_data(inet_descriptor* desc, const char* buf, int len)
2376 {
2377     unsigned int hsz = desc->hsz;
2378 
2379     DEBUGF(("inet_port_data(%ld): len = %d\r\n", (long)desc->port, len));
2380 
2381     if ((desc->mode == INET_MODE_LIST) || (hsz > len))
2382 	return driver_output2(desc->port, (char*)buf, len, NULL, 0);
2383     else if (hsz > 0)
2384 	return driver_output2(desc->port, (char*)buf, hsz, (char*)buf+hsz, len-hsz);
2385     else
2386 	return driver_output(desc->port, (char*)buf, len);
2387 }
2388 
2389 /*
2390 ** Deliver port data from binary (for an active mode socket)
2391 */
2392 static int
inet_port_binary_data(inet_descriptor * desc,ErlDrvBinary * bin,int offs,int len)2393 inet_port_binary_data(inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len)
2394 {
2395     unsigned int hsz = desc->hsz;
2396 
2397     DEBUGF(("inet_port_binary_data(%ld): offs=%d, len = %d\r\n",
2398 	    (long)desc->port, offs, len));
2399 
2400     if ((desc->mode == INET_MODE_LIST) || (hsz > len))
2401 	return driver_output2(desc->port, bin->orig_bytes+offs, len, NULL, 0);
2402     else
2403 	return driver_output_binary(desc->port, bin->orig_bytes+offs, hsz,
2404 				    bin, offs+hsz, len-hsz);
2405 }
2406 
2407 static ErlDrvTermData am_http_eoh;
2408 static ErlDrvTermData am_http_header;
2409 static ErlDrvTermData am_http_request;
2410 static ErlDrvTermData am_http_response;
2411 static ErlDrvTermData am_http_error;
2412 static ErlDrvTermData am_abs_path;
2413 static ErlDrvTermData am_absoluteURI;
2414 static ErlDrvTermData am_star;
2415 static ErlDrvTermData am_http;
2416 static ErlDrvTermData am_https;
2417 static ErlDrvTermData am_scheme;
2418 
http_load_string(tcp_descriptor * desc,ErlDrvTermData * spec,int i,const char * str,int len)2419 static int http_load_string(tcp_descriptor* desc, ErlDrvTermData* spec, int i,
2420 			    const char* str, int len)
2421 {
2422     if (desc->inet.htype >= TCP_PB_HTTP_BIN) {
2423 	ASSERT(desc->inet.htype == TCP_PB_HTTP_BIN ||
2424 	       desc->inet.htype == TCP_PB_HTTPH_BIN);
2425 	i = LOAD_BUF2BINARY(spec, i, str, len);
2426     } else {
2427 	i = LOAD_STRING(spec, i, str, len);
2428     }
2429     return i;
2430 }
2431 
http_response_inetdrv(void * arg,int major,int minor,int status,const char * phrase,int phrase_len)2432 static int http_response_inetdrv(void *arg, int major, int minor,
2433 				 int status, const char* phrase, int phrase_len)
2434 {
2435     tcp_descriptor* desc = (tcp_descriptor*) arg;
2436     int i = 0;
2437     ErlDrvTermData spec[27];
2438     ErlDrvTermData caller = ERL_DRV_NIL;
2439 
2440     if (desc->inet.active == INET_PASSIVE) {
2441         /* {inet_async,S,Ref,{ok,{http_response,Version,Status,Phrase}}} */
2442         int req;
2443         int aid;
2444 
2445         if (deq_async(INETP(desc), &aid, &caller, &req) < 0)
2446             return -1;
2447         i = LOAD_ATOM(spec, i,  am_inet_async);
2448         i = LOAD_PORT(spec, i,  desc->inet.dport);
2449         i = LOAD_INT(spec, i,   aid);
2450         i = LOAD_ATOM(spec, i,  am_ok);
2451     }
2452     else {
2453         /* {http, S, {http_response,Version,Status,Phrase}} */
2454         i = LOAD_ATOM(spec, i, am_http);
2455         i = LOAD_PORT(spec, i, desc->inet.dport);
2456     }
2457     i = LOAD_ATOM(spec, i,  am_http_response);
2458     i = LOAD_INT(spec, i, major);
2459     i = LOAD_INT(spec, i, minor);
2460     i = LOAD_TUPLE(spec, i, 2);
2461     i = LOAD_INT(spec, i, status);
2462     i = http_load_string(desc, spec, i, phrase, phrase_len);
2463     i = LOAD_TUPLE(spec, i, 4);
2464 
2465     if (desc->inet.active == INET_PASSIVE) {
2466         i = LOAD_TUPLE(spec, i, 2);
2467         i = LOAD_TUPLE(spec, i, 4);
2468         ASSERT(i<=27);
2469         return erl_drv_send_term(desc->inet.dport, caller, spec, i);
2470     }
2471     else {
2472         i = LOAD_TUPLE(spec, i, 3);
2473         ASSERT(i<=27);
2474         return erl_drv_output_term(desc->inet.dport, spec, i);
2475     }
2476 }
2477 
http_load_uri(tcp_descriptor * desc,ErlDrvTermData * spec,int i,const PacketHttpURI * uri)2478 static int http_load_uri(tcp_descriptor* desc, ErlDrvTermData* spec, int i,
2479 			 const PacketHttpURI* uri)
2480 {
2481     ErlDrvTermData scheme;
2482 
2483     switch (uri->type) {
2484     case URI_STAR:
2485         i = LOAD_ATOM(spec, i, am_star);
2486         break;
2487     case URI_ABS_PATH:
2488         i = LOAD_ATOM(spec, i, am_abs_path);
2489         i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len);
2490         i = LOAD_TUPLE(spec, i, 2);
2491         break;
2492     case URI_HTTP:
2493         scheme = am_http;
2494         goto http_common;
2495     case URI_HTTPS:
2496         scheme = am_https;
2497     http_common:
2498         i = LOAD_ATOM(spec, i, am_absoluteURI);
2499         i = LOAD_ATOM(spec, i, scheme);
2500         i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len);
2501         if (uri->port == 0) {
2502             i = LOAD_ATOM(spec, i, am_undefined);
2503         } else {
2504             i = LOAD_INT(spec, i, uri->port);
2505         }
2506         i = http_load_string(desc, spec, i, uri->s2_ptr, uri->s2_len);
2507         i = LOAD_TUPLE(spec, i, 5);
2508         break;
2509 
2510     case URI_STRING:
2511         i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len);
2512         break;
2513     case URI_SCHEME:
2514         i = LOAD_ATOM(spec, i, am_scheme);
2515         i = http_load_string(desc, spec, i, uri->s1_ptr, uri->s1_len);
2516         i = http_load_string(desc, spec, i, uri->s2_ptr, uri->s2_len);
2517         i = LOAD_TUPLE(spec, i, 3);
2518     }
2519     return i;
2520 }
2521 
2522 
2523 static int
http_request_inetdrv(void * arg,const http_atom_t * meth,const char * meth_ptr,int meth_len,const PacketHttpURI * uri,int major,int minor)2524 http_request_inetdrv(void* arg, const http_atom_t* meth, const char* meth_ptr,
2525 		     int meth_len, const PacketHttpURI* uri,
2526 		     int major, int minor)
2527 {
2528     tcp_descriptor* desc = (tcp_descriptor*) arg;
2529     int i = 0;
2530     ErlDrvTermData spec[43];
2531     ErlDrvTermData caller = ERL_DRV_NIL;
2532 
2533     if (desc->inet.active == INET_PASSIVE) {
2534         /* {inet_async, S, Ref, {ok,{http_request,Meth,Uri,Version}}} */
2535         int req;
2536         int aid;
2537 
2538         if (deq_async(INETP(desc), &aid, &caller, &req) < 0)
2539             return -1;
2540         i = LOAD_ATOM(spec, i,  am_inet_async);
2541         i = LOAD_PORT(spec, i,  desc->inet.dport);
2542         i = LOAD_INT(spec, i,   aid);
2543         i = LOAD_ATOM(spec, i,  am_ok);
2544     }
2545     else {
2546         /* {http, S, {http_request,Meth,Uri,Version}}} */
2547         i = LOAD_ATOM(spec, i, am_http);
2548         i = LOAD_PORT(spec, i, desc->inet.dport);
2549     }
2550 
2551     i = LOAD_ATOM(spec, i,  am_http_request);
2552     if (meth != NULL)
2553       i = LOAD_ATOM(spec, i, meth->atom);
2554     else
2555       i = http_load_string(desc, spec, i, meth_ptr, meth_len);
2556     i = http_load_uri(desc, spec, i, uri);
2557     i = LOAD_INT(spec, i, major);
2558     i = LOAD_INT(spec, i, minor);
2559     i = LOAD_TUPLE(spec, i, 2);
2560     i = LOAD_TUPLE(spec, i, 4);
2561 
2562     if (desc->inet.active == INET_PASSIVE) {
2563         i = LOAD_TUPLE(spec, i, 2);
2564         i = LOAD_TUPLE(spec, i, 4);
2565         ASSERT(i <= 43);
2566         return erl_drv_send_term(desc->inet.dport, caller, spec, i);
2567     }
2568     else {
2569         i = LOAD_TUPLE(spec, i, 3);
2570         ASSERT(i <= 43);
2571         return erl_drv_output_term(desc->inet.dport, spec, i);
2572     }
2573 }
2574 
2575 static int
http_header_inetdrv(void * arg,const http_atom_t * name,const char * name_ptr,int name_len,const char * value_ptr,int value_len)2576 http_header_inetdrv(void* arg, const http_atom_t* name, const char* name_ptr,
2577 		    int name_len, const char* value_ptr, int value_len)
2578 {
2579     tcp_descriptor* desc = (tcp_descriptor*) arg;
2580     int i = 0;
2581     ErlDrvTermData spec[26];
2582     ErlDrvTermData caller = ERL_DRV_NIL;
2583 
2584     if (desc->inet.active == INET_PASSIVE) {
2585         /* {inet_async,S,Ref,{ok,{http_header,Bit,Name,IValue,Value}} */
2586         int req;
2587         int aid;
2588 
2589 
2590         if (deq_async(INETP(desc), &aid, &caller, &req) < 0)
2591             return -1;
2592         i = LOAD_ATOM(spec, i,  am_inet_async);
2593         i = LOAD_PORT(spec, i,  desc->inet.dport);
2594         i = LOAD_INT(spec, i,   aid);
2595         i = LOAD_ATOM(spec, i,  am_ok);
2596     }
2597     else {
2598         /* {http, S, {http_header,Bit,Name,IValue,Value}} */
2599         i = LOAD_ATOM(spec, i, am_http);
2600         i = LOAD_PORT(spec, i, desc->inet.dport);
2601     }
2602 
2603     i = LOAD_ATOM(spec, i,  am_http_header);
2604     if (name != NULL) {
2605       i = LOAD_INT(spec, i,  name->index+1);
2606       i = LOAD_ATOM(spec, i, name->atom);
2607     }
2608     else {
2609       i = LOAD_INT(spec, i,  0);
2610       i = http_load_string(desc, spec, i, name_ptr, name_len);
2611     }
2612     i = LOAD_ATOM(spec, i, am_undefined);
2613     i = http_load_string(desc, spec, i, value_ptr, value_len);
2614     i = LOAD_TUPLE(spec, i, 5);
2615 
2616     if (desc->inet.active == INET_PASSIVE) {
2617         i = LOAD_TUPLE(spec, i, 2);
2618         i = LOAD_TUPLE(spec, i, 4);
2619         ASSERT(i <= 26);
2620         return erl_drv_send_term(desc->inet.dport, caller, spec, i);
2621     }
2622     else {
2623         i = LOAD_TUPLE(spec, i, 3);
2624         ASSERT(i <= 26);
2625         return erl_drv_output_term(desc->inet.dport, spec, i);
2626     }
2627 }
2628 
http_eoh_inetdrv(void * arg)2629 static int http_eoh_inetdrv(void* arg)
2630 {
2631   tcp_descriptor* desc = (tcp_descriptor*) arg;
2632   int i = 0;
2633   ErlDrvTermData spec[14];
2634 
2635   if (desc->inet.active == INET_PASSIVE) {
2636     /* {inet_async,S,Ref,{ok,http_eoh}} */
2637     int req;
2638     int aid;
2639     ErlDrvTermData caller;
2640 
2641     if (deq_async(INETP(desc), &aid, &caller, &req) < 0)
2642       return -1;
2643     i = LOAD_ATOM(spec, i,  am_inet_async);
2644     i = LOAD_PORT(spec, i,  desc->inet.dport);
2645     i = LOAD_INT(spec, i,   aid);
2646     i = LOAD_ATOM(spec, i,  am_ok);
2647     i = LOAD_ATOM(spec, i,  am_http_eoh);
2648     i = LOAD_TUPLE(spec, i, 2);
2649     i = LOAD_TUPLE(spec, i, 4);
2650     ASSERT(i <= 14);
2651     return erl_drv_send_term(desc->inet.dport, caller, spec, i);
2652   }
2653   else {
2654       /* {http, S, http_eoh} */
2655       i = LOAD_ATOM(spec, i,  am_http);
2656       i = LOAD_PORT(spec, i,  desc->inet.dport);
2657       i = LOAD_ATOM(spec, i,  am_http_eoh);
2658       i = LOAD_TUPLE(spec, i, 3);
2659       ASSERT(i <= 14);
2660       return erl_drv_output_term(desc->inet.dport, spec, i);
2661   }
2662 }
2663 
http_error_inetdrv(void * arg,const char * buf,int len)2664 static int http_error_inetdrv(void* arg, const char* buf, int len)
2665 {
2666   tcp_descriptor* desc = (tcp_descriptor*) arg;
2667   int i = 0;
2668   ErlDrvTermData spec[19];
2669 
2670   if (desc->inet.active == INET_PASSIVE) {
2671     /* {inet_async,S,Ref,{ok,{http_error,Line}}} */
2672     int req;
2673     int aid;
2674     ErlDrvTermData caller;
2675 
2676     if (deq_async(INETP(desc), &aid, &caller, &req) < 0)
2677       return -1;
2678     i = LOAD_ATOM(spec, i,  am_inet_async);
2679     i = LOAD_PORT(spec, i,  desc->inet.dport);
2680     i = LOAD_INT(spec, i,   aid);
2681     i = LOAD_ATOM(spec, i,  am_ok);
2682     i = LOAD_ATOM(spec, i,  am_http_error);
2683     i = http_load_string(desc, spec, i, buf, len);
2684     i = LOAD_TUPLE(spec, i, 2);
2685     i = LOAD_TUPLE(spec, i, 2);
2686     i = LOAD_TUPLE(spec, i, 4);
2687     ASSERT(i <= 19);
2688     return erl_drv_send_term(desc->inet.dport, caller, spec, i);
2689   }
2690   else {
2691       /* {http, S, {http_error,Line} */
2692       i = LOAD_ATOM(spec, i,  am_http);
2693       i = LOAD_PORT(spec, i,  desc->inet.dport);
2694       i = LOAD_ATOM(spec, i,  am_http_error);
2695       i = http_load_string(desc, spec, i, buf, len);
2696       i = LOAD_TUPLE(spec, i, 2);
2697       i = LOAD_TUPLE(spec, i, 3);
2698       ASSERT(i <= 19);
2699       return erl_drv_output_term(desc->inet.dport, spec, i);
2700   }
2701 }
2702 
2703 
2704 static
ssl_tls_inetdrv(void * arg,unsigned type,unsigned major,unsigned minor,const char * buf,int len,const char * prefix,int plen)2705 int ssl_tls_inetdrv(void* arg, unsigned type, unsigned major, unsigned minor,
2706                     const char* buf, int len, const char* prefix, int plen)
2707 {
2708     tcp_descriptor* desc = (tcp_descriptor*) arg;
2709     int i = 0;
2710     ErlDrvTermData spec[30];
2711     ErlDrvTermData caller = ERL_DRV_NIL;
2712     ErlDrvBinary* bin;
2713     int ret;
2714 
2715     if ((bin = driver_alloc_binary(plen+len)) == NULL)
2716         return async_error(&desc->inet, ENOMEM);
2717     memcpy(bin->orig_bytes+plen, buf, len);
2718     if (plen) {
2719         memcpy(bin->orig_bytes, prefix, plen);
2720         len += plen;
2721     }
2722 
2723     if (desc->inet.active == INET_PASSIVE) {
2724         /* {inet_async,S,Ref,{ok,{ssl_tls,...}}} */
2725         int req;
2726         int aid;
2727 
2728         if (deq_async(INETP(desc), &aid, &caller, &req) < 0) {
2729             ret = -1;
2730             goto done;
2731         }
2732         i = LOAD_ATOM(spec, i,  am_inet_async);
2733         i = LOAD_PORT(spec, i,  desc->inet.dport);
2734         i = LOAD_INT(spec, i,   aid);
2735         i = LOAD_ATOM(spec, i,  am_ok);
2736     }
2737 
2738     /* {ssl_tls,S,ContentType,{Major,Minor},Bin} */
2739     i = LOAD_ATOM(spec, i,  am_ssl_tls);
2740     i = LOAD_PORT(spec, i,  desc->inet.dport);
2741     i = LOAD_INT(spec, i,   type);
2742     i = LOAD_INT(spec, i,   major);
2743     i = LOAD_INT(spec, i,   minor);
2744     i = LOAD_TUPLE(spec, i, 2);
2745     i = LOAD_BINARY(spec, i, bin, 0, len);
2746     i = LOAD_TUPLE(spec, i, 5);
2747 
2748     if (desc->inet.active == INET_PASSIVE) {
2749         i = LOAD_TUPLE(spec, i, 2);
2750         i = LOAD_TUPLE(spec, i, 4);
2751         ASSERT(i <= sizeof(spec)/sizeof(*spec));
2752         ret = erl_drv_send_term(desc->inet.dport, caller, spec, i);
2753     }
2754     else {
2755         ASSERT(i <= sizeof(spec)/sizeof(*spec));
2756         ret = erl_drv_output_term(desc->inet.dport, spec, i);
2757     }
2758 done:
2759     driver_free_binary(bin);
2760     return ret;
2761 }
2762 
2763 
2764 static PacketCallbacks packet_callbacks =
2765 {
2766     http_response_inetdrv,
2767     http_request_inetdrv,
2768     http_eoh_inetdrv,
2769     http_header_inetdrv,
2770     http_error_inetdrv,
2771     ssl_tls_inetdrv
2772 };
2773 
2774 
2775 /*
2776 ** passive mode reply:
2777 **        {inet_async, S, Ref, {ok,[H1,...Hsz | Data]}}
2778 ** NB: this is for TCP only;
2779 ** UDP and SCTP use inet_async_binary_data .
2780 */
inet_async_data(inet_descriptor * desc,const char * buf,int len)2781 static int inet_async_data(inet_descriptor* desc, const char* buf, int len)
2782 {
2783     unsigned int hsz = desc->hsz;
2784     ErlDrvTermData spec[20];
2785     ErlDrvTermData caller;
2786     int req;
2787     int aid;
2788     int i = 0;
2789 
2790     DEBUGF(("inet_async_data(%ld): len = %d\r\n", (long)desc->port, len));
2791 
2792     if (deq_async(desc, &aid, &caller, &req) < 0)
2793 	return -1;
2794 
2795     i = LOAD_ATOM(spec, i, am_inet_async);
2796     i = LOAD_PORT(spec, i, desc->dport);
2797     i = LOAD_INT(spec, i, aid);
2798 
2799     i = LOAD_ATOM(spec, i, am_ok);
2800     if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
2801 	i = LOAD_STRING(spec, i, buf, len); /* => [H1,H2,...Hn] */
2802 	i = LOAD_TUPLE(spec, i, 2);
2803 	i = LOAD_TUPLE(spec, i, 4);
2804 	ASSERT(i == 15);
2805 	desc->caller = 0;
2806 	return erl_drv_send_term(desc->dport, caller, spec, i);
2807     }
2808     else {
2809 	/* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */
2810 	int sz = len - hsz;
2811 	int code;
2812 
2813 	i = LOAD_BUF2BINARY(spec, i, buf+hsz, sz);
2814 	if (hsz > 0)
2815 	    i = LOAD_STRING_CONS(spec, i, buf, hsz);
2816 	i = LOAD_TUPLE(spec, i, 2);
2817 	i = LOAD_TUPLE(spec, i, 4);
2818 	ASSERT(i <= 20);
2819 	desc->caller = 0;
2820 	code = erl_drv_send_term(desc->dport, caller, spec, i);
2821 	return code;
2822     }
2823 }
2824 
2825 #ifndef __WIN32__
load_cmsg_int(ErlDrvTermData * spec,int i,struct cmsghdr * cmsg)2826 static int load_cmsg_int(ErlDrvTermData *spec, int i,
2827                     struct cmsghdr *cmsg) {
2828     union u {
2829         byte uint8;
2830         Uint16 uint16;
2831         Uint32 uint32;
2832         Uint64 uint64;
2833     } *p;
2834     p = (union u*) CMSG_DATA(cmsg);
2835     switch (LEN_CMSG_DATA(cmsg) * CHAR_BIT) {
2836     case 8:
2837         return LOAD_INT(spec, i, p->uint8);
2838     case 16:
2839         return LOAD_INT(spec, i, p->uint16);
2840 
2841     case 32:
2842         return LOAD_INT(spec, i, p->uint32);
2843 
2844     case 64:
2845         return LOAD_INT(spec, i, p->uint64);
2846     }
2847     return LOAD_INT(spec, i, 0);
2848 }
2849 
parse_ancillary_data_item(ErlDrvTermData * spec,int i,struct cmsghdr * cmsg,int * n)2850 static int parse_ancillary_data_item(ErlDrvTermData *spec, int i,
2851                                      struct cmsghdr *cmsg, int *n) {
2852 #define LOAD_CMSG_INT(proto, type, am)              \
2853     if (cmsg->cmsg_level == (proto) &&          \
2854         cmsg->cmsg_type == (type)) {            \
2855         i = LOAD_ATOM(spec, i, (am));           \
2856         i = load_cmsg_int(spec, i, cmsg);            \
2857         i = LOAD_TUPLE(spec, i, 2);             \
2858         (*n)++;                                 \
2859         return i;                               \
2860     }
2861 #if defined(IPPROTO_IP) && defined(IP_TOS)
2862     LOAD_CMSG_INT(IPPROTO_IP, IP_TOS, am_tos);
2863 #endif
2864 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
2865     LOAD_CMSG_INT(IPPROTO_IPV6, IPV6_TCLASS, am_tclass);
2866 #endif
2867 #if defined(IPPROTO_IP) && defined(IP_TTL)
2868     LOAD_CMSG_INT(IPPROTO_IP, IP_TTL, am_ttl);
2869 #endif
2870     /* BSD uses the RECV* names in CMSG fields */
2871 #if defined(IPPROTO_IP) && defined(IP_RECVTOS)
2872     LOAD_CMSG_INT(IPPROTO_IP, IP_RECVTOS, am_tos);
2873 #endif
2874 #if defined(IPPROTO_IPV6) && defined(IPV6_RECVTCLASS)
2875     LOAD_CMSG_INT(IPPROTO_IPV6, IPV6_RECVTCLASS, am_tclass);
2876 #endif
2877 #if defined(IPPROTO_IP) && defined(IP_RECVTTL)
2878     LOAD_CMSG_INT(IPPROTO_IP, IP_RECVTTL, am_ttl);
2879 #endif
2880 #undef LOAD_CMSG_INT
2881     return i;
2882 }
2883 #endif /* #ifndef __WIN32__ */
2884 
2885 #ifdef HAVE_SCTP
2886 /*
2887 ** SCTP-related atoms:
2888 */
2889 static ErlDrvTermData   am_sctp_rtoinfo, /* Option names */
2890     am_sctp_associnfo,                 am_sctp_initmsg,
2891     am_sctp_autoclose,                 am_sctp_nodelay,
2892     am_sctp_disable_fragments,         am_sctp_i_want_mapped_v4_addr,
2893     am_sctp_maxseg,                    am_sctp_set_peer_primary_addr,
2894     am_sctp_primary_addr,              am_sctp_adaptation_layer,
2895     am_sctp_peer_addr_params,          am_sctp_default_send_param,
2896     am_sctp_events,                    am_sctp_delayed_ack_time,
2897     am_sctp_status,                    am_sctp_get_peer_addr_info,
2898 
2899     /* Record names */
2900     am_sctp_sndrcvinfo,                am_sctp_assoc_change,
2901     am_sctp_paddr_change,              am_sctp_remote_error,
2902     am_sctp_send_failed,               am_sctp_shutdown_event,
2903     am_sctp_adaptation_event,          am_sctp_pdapi_event,
2904     am_sctp_assocparams,               am_sctp_prim,
2905     am_sctp_setpeerprim,               am_sctp_setadaptation,
2906     am_sctp_paddrparams,               am_sctp_event_subscribe,
2907     am_sctp_assoc_value,               am_sctp_paddrinfo,
2908 
2909     /* For #sctp_sndrcvinfo{}: */
2910     am_unordered,                      am_addr_over,
2911     am_abort,                          am_eof,
2912 
2913     /* For #sctp_assoc_change{}: */
2914     am_comm_up,                        am_comm_lost,
2915     am_restart,                        am_shutdown_comp,
2916     am_cant_assoc,
2917 
2918     /* For #sctp_paddr_change{}: */
2919     am_addr_available,                 am_addr_unreachable,
2920     am_addr_removed,                   am_addr_added,
2921     am_addr_made_prim,                 am_addr_confirmed,
2922 
2923     /* For #sctp_remote_error{}: */
2924     am_short_recv,                     am_wrong_anc_data,
2925 
2926     /* For #sctp_pdap_event{}: */
2927     am_partial_delivery_aborted,
2928 
2929     /* For #sctp_paddrparams{}: */
2930     am_hb_enable,                      am_hb_disable,
2931     am_hb_demand,                      am_pmtud_enable,
2932     am_pmtud_disable,                  am_sackdelay_enable,
2933     am_sackdelay_disable,
2934 
2935     /* For #sctp_paddrinfo{}: */
2936     am_active,                         am_inactive,
2937 #    if HAVE_DECL_SCTP_UNCONFIRMED
2938     am_unconfirmed,
2939 #    endif
2940 
2941     /* For #sctp_status{}: */
2942 #    if HAVE_DECL_SCTP_EMPTY
2943     am_empty,
2944 #    endif
2945 #    if HAVE_DECL_SCTP_BOUND
2946     am_bound,
2947 #    endif
2948 #    if HAVE_DECL_SCTP_LISTEN
2949     am_listen,
2950 #    endif
2951     am_cookie_wait,                    am_cookie_echoed,
2952     am_established,                    am_shutdown_pending,
2953     am_shutdown_sent,                  am_shutdown_received,
2954     am_shutdown_ack_sent;
2955 
2956 /*
2957 ** Parsing of "sctp_sndrcvinfo": ancillary data coming with received msgs.
2958 ** This function is mainly used by "sctp_parse_ancillary_data",  but also
2959 ** by "sctp_parse_async_event" in case of SCTP_SEND_FAILED:
2960 */
2961 #define SCTP_PARSE_SNDRCVINFO_CNT                            \
2962         (5*LOAD_ATOM_CNT + 5*LOAD_INT_CNT + 2*LOAD_UINT_CNT + \
2963 	 LOAD_NIL_CNT + LOAD_LIST_CNT + LOAD_ASSOC_ID_CNT + LOAD_TUPLE_CNT)
sctp_parse_sndrcvinfo(ErlDrvTermData * spec,int i,struct sctp_sndrcvinfo * sri)2964 static int sctp_parse_sndrcvinfo
2965 	   (ErlDrvTermData * spec, int i, struct sctp_sndrcvinfo * sri)
2966 {
2967     int n;
2968 
2969     i = LOAD_ATOM	(spec, i, am_sctp_sndrcvinfo);
2970     i = LOAD_INT	(spec, i, sri->sinfo_stream);
2971     i = LOAD_INT	(spec, i, sri->sinfo_ssn);
2972     /* Now Flags, as a list: */
2973     n = 0;
2974     if (sri->sinfo_flags & SCTP_UNORDERED)
2975 	{ i = LOAD_ATOM (spec, i, am_unordered);     n++; }
2976 
2977     if (sri->sinfo_flags & SCTP_ADDR_OVER)
2978 	{ i = LOAD_ATOM (spec, i, am_addr_over);     n++; }
2979 
2980     if (sri->sinfo_flags & SCTP_ABORT)
2981 	{ i = LOAD_ATOM (spec, i, am_abort);	     n++; }
2982 
2983     if (sri->sinfo_flags & SCTP_EOF)
2984 	{ i = LOAD_ATOM (spec, i, am_eof);	     n++; }
2985 
2986     /* SCTP_SENDALL is not yet supported by the Linux kernel     */
2987     i = LOAD_NIL	(spec, i);
2988     i = LOAD_LIST	(spec, i, n+1);
2989 
2990     /* Continue with other top-level fields: */
2991     i = LOAD_INT	(spec, i, sock_ntohl(sri->sinfo_ppid));
2992     i = LOAD_INT	(spec, i, sri->sinfo_context);
2993     i = LOAD_INT	(spec, i, sri->sinfo_timetolive);
2994     i = LOAD_UINT	(spec, i, sri->sinfo_tsn);
2995     i = LOAD_UINT	(spec, i, sri->sinfo_cumtsn);
2996     i = LOAD_ASSOC_ID	(spec, i, sri->sinfo_assoc_id);
2997 
2998     /* Close up the record: */
2999     i = LOAD_TUPLE	(spec, i, 10);
3000     return i;
3001 }
3002 
3003 /*
3004 ** This function skips non-SCTP ancillary data, returns SCTP-specific anc.data
3005 ** (currently "sctp_sndrcvinfo" only) as a list of records:
3006 */
sctp_parse_ancillary_data(ErlDrvTermData * spec,int i,struct msghdr * mptr)3007 static int sctp_parse_ancillary_data
3008 	   (ErlDrvTermData * spec, int i, struct msghdr * mptr)
3009 {
3010     /* First of all, check for ancillary data: */
3011     struct cmsghdr * cmsg, * frst_msg = CMSG_FIRSTHDR(mptr);
3012     int    s = 0;
3013     for (cmsg = frst_msg; cmsg != NULL; cmsg = CMSG_NXTHDR(mptr,cmsg))
3014     {
3015 	struct sctp_sndrcvinfo * sri;
3016 #ifndef __WIN32
3017         int old_s;
3018 
3019         /* Parse ancillary data common to UDP */
3020         old_s = s;
3021         i = parse_ancillary_data_item(spec, i, cmsg, &s);
3022         if (s > old_s) continue;
3023 	/* Skip other possible ancillary data, e.g. from IPv6: */
3024 	if (cmsg->cmsg_level != IPPROTO_SCTP ||
3025 	    cmsg->cmsg_type  != SCTP_SNDRCV)
3026 	continue;
3027 #endif
3028 
3029 	if (((char*)cmsg + cmsg->cmsg_len) - (char*)frst_msg >
3030 	    mptr->msg_controllen)
3031 	    /* MUST check this in Linux --  the returned "cmsg" may actually
3032 	       go too far! */
3033 	    break;
3034 
3035 	/* The ONLY kind of ancillary SCTP data which can occur on receiving
3036 	   is "sctp_sndrcvinfo" (on sending, "sctp_initmsg" can be specified
3037 	   by the user). So parse this type:
3038 	*/
3039 	sri = (struct sctp_sndrcvinfo*) CMSG_DATA(cmsg);
3040 	i = sctp_parse_sndrcvinfo (spec, i, sri);
3041 	s ++;
3042     }
3043     /* Now make the list of tuples created above. Normally, it will be [] or
3044        a singleton list.   The list must first be closed with NIL, otherwise
3045        traversing it in Erlang would be problematic:
3046     */
3047     i = LOAD_NIL (spec, i);
3048     i = LOAD_LIST(spec, i, s+1);
3049     return i;
3050 }
3051 
3052 /*
3053 ** Parsing of ERROR and ABORT SCTP chunks. The function returns a list of error
3054 ** causes (as atoms).  The chunks also contain some extended cause info, but it
3055 ** is not very detailed anyway, and of no interest at the user level   (it only
3056 ** concerns the protocol implementation), so we omit it:
3057 */
sctp_parse_error_chunk(ErlDrvTermData * spec,int i,char * chunk,int chlen)3058 static int sctp_parse_error_chunk
3059        (ErlDrvTermData * spec, int i, char * chunk, int chlen)
3060 {
3061     /* The "chunk" itself contains its length, which must not be greater than
3062        the "chlen" derived from the over-all msg size:
3063     */
3064     char *causes, *cause;
3065     int coff,  /* Cause offset */
3066 	ccode, /* Cause code */
3067 	clen,  /* cause length */
3068 	s;
3069     int len = sock_ntohs (*((uint16_t*)(chunk+2)));
3070     ASSERT(len >= 4 && len <= chlen);
3071 
3072     causes = chunk + 4;
3073     coff   = 0;
3074     len -= 4;  /* Total length of the "causes" fields */
3075     cause  = causes;
3076     s      = 0;
3077 
3078     while (coff < len)
3079     {
3080 	ccode = sock_ntohs (*((uint16_t*)(cause)));
3081 	clen  = sock_ntohs (*((uint16_t*)(cause + 2)));
3082 	if (clen <= 0)
3083 	    /* Strange, but must guard against that!  */
3084 	    break;
3085 
3086 	/* Install the corresp atom for this "ccode": */
3087 	i = LOAD_INT (spec, i, ccode);
3088 	cause += clen;
3089 	coff  += clen;
3090 	s ++;
3091     }
3092     i = LOAD_NIL (spec, i);
3093     i = LOAD_LIST(spec, i, s+1);
3094     return i;
3095 }
3096 
3097 /*
3098 ** Parsing of SCTP notification events. NB: they are NOT ancillary data: they
3099 ** are sent IN PLACE OF, not in conjunction with, the normal data:
3100 */
sctp_parse_async_event(ErlDrvTermData * spec,int i,int ok_pos,ErlDrvTermData error_atom,inet_descriptor * desc,ErlDrvBinary * bin,int offs,int sz)3101 static int sctp_parse_async_event
3102       (ErlDrvTermData * spec, int i,    int ok_pos,
3103        ErlDrvTermData   error_atom,     inet_descriptor* desc,
3104        ErlDrvBinary   * bin,  int offs, int sz)
3105 {
3106     char* body			   = bin->orig_bytes + offs;
3107     union sctp_notification * nptr = (union sctp_notification *) body;
3108 
3109     switch (nptr->sn_header.sn_type)
3110     {
3111 	case SCTP_ASSOC_CHANGE:
3112 	{   /* {sctp_assoc_change,
3113 		State		: Atom(),
3114 		Error		: Atom(),
3115 		OutBoundStreams : Int(),
3116 		InBoundStreams  : Int(),
3117 		AssocID		: Int(),
3118 		// AbortCauses	: [Atom()]   // NOT YET IMPLEMENTED
3119 	       }
3120 	    */
3121 	    struct sctp_assoc_change* sptr = &(nptr->sn_assoc_change);
3122 	    ASSERT(sptr->sac_length <= sz);  /* No buffer overrun */
3123 
3124 	    i = LOAD_ATOM (spec, i, am_sctp_assoc_change);
3125 
3126 	    switch (sptr->sac_state)
3127 	    {
3128 	    case SCTP_COMM_UP:
3129 		i = LOAD_ATOM (spec, i, am_comm_up);
3130 		break;
3131 	    case SCTP_COMM_LOST:
3132 		i = LOAD_ATOM (spec, i, am_comm_lost);
3133 		break;
3134 	    case SCTP_RESTART:
3135 		i = LOAD_ATOM (spec, i, am_restart);
3136 		break;
3137 	    case SCTP_SHUTDOWN_COMP:
3138 		i = LOAD_ATOM (spec, i, am_shutdown_comp);
3139 		break;
3140 	    case SCTP_CANT_STR_ASSOC:
3141 		i = LOAD_ATOM (spec, i, am_cant_assoc);
3142 		break;
3143 	    default:
3144 		ASSERT(0);
3145 	    }
3146 	    i = LOAD_INT (spec, i, sptr->sac_error);
3147 	    i = LOAD_INT (spec, i, sptr->sac_outbound_streams);
3148 	    i = LOAD_INT (spec, i, sptr->sac_inbound_streams);
3149 	    i = LOAD_INT (spec, i, sptr->sac_assoc_id);
3150 
3151 	    /* The ABORT chunk may or may not be present at the end, depending
3152 	       on whether there was really an ABORT.  In the Linux Kernel SCTP
3153 	       implementation, this chunk is not delivered anyway, so we leave
3154 	       it out. Just close up the tuple:
3155 	    */
3156 	    i = LOAD_TUPLE (spec, i, 6);
3157 	    break;
3158 	}
3159 
3160 	case SCTP_PEER_ADDR_CHANGE:
3161 	{   /* {sctp_paddr_change,
3162 		AffectedAddr	: String(),
3163 		State		: Atom(),
3164 		Error		: Atom(),
3165 		AssocID		: Int()
3166 	       }
3167 	    */
3168 	    struct sctp_paddr_change* sptr = &(nptr->sn_paddr_change);
3169 	    ASSERT(sptr->spc_length <= sz);  /* No buffer overrun */
3170 
3171 	    i = LOAD_ATOM	(spec, i, am_sctp_paddr_change);
3172 	    i = load_inet_get_address(spec, i, desc, &sptr->spc_aaddr);
3173 
3174 	    switch (sptr->spc_state)
3175 	    {
3176 	    case SCTP_ADDR_AVAILABLE:
3177 		i = LOAD_ATOM (spec, i, am_addr_available);
3178 		break;
3179 	    case SCTP_ADDR_UNREACHABLE:
3180 		i = LOAD_ATOM (spec, i, am_addr_unreachable);
3181 		break;
3182 	    case SCTP_ADDR_REMOVED:
3183 		i = LOAD_ATOM (spec, i, am_addr_removed);
3184 		break;
3185 	    case SCTP_ADDR_ADDED:
3186 		i = LOAD_ATOM (spec, i, am_addr_added);
3187 		break;
3188 	    case SCTP_ADDR_MADE_PRIM:
3189 		i = LOAD_ATOM (spec, i, am_addr_made_prim);
3190 		break;
3191 #if HAVE_DECL_SCTP_ADDR_CONFIRMED
3192 	    case SCTP_ADDR_CONFIRMED:
3193 		i = LOAD_ATOM (spec, i, am_addr_confirmed);
3194 		break;
3195 #endif
3196 	    default:
3197 		ASSERT(0);
3198 	    }
3199 	    i = LOAD_INT   (spec, i, sptr->spc_error);
3200 	    i = LOAD_INT   (spec, i, sptr->spc_assoc_id);
3201 	    i = LOAD_TUPLE (spec, i, 5);
3202 	    break;
3203 	}
3204 
3205 	case SCTP_REMOTE_ERROR:
3206 	{   /* This is an error condition, so we return an error term
3207 	       {sctp_remote_error,
3208 		Error		: Int(),
3209 		AssocID		: Int(),
3210 		RemoteCauses	: [Atom()] // Remote Error flags
3211 	       }
3212 	    */
3213 	    char *chunk;
3214 	    int chlen;
3215 	    struct sctp_remote_error * sptr = &(nptr->sn_remote_error);
3216 	    ASSERT(sptr->sre_length <= sz);   /* No buffer overrun */
3217 
3218 	    /* Over-write the prev part of the response with an error: */
3219 	    (void)LOAD_ATOM(spec, ok_pos, error_atom);
3220 
3221 	    /* Continue from the curr pos: */
3222 	    i = LOAD_ATOM  (spec, i, am_sctp_remote_error);
3223 
3224 	    i = LOAD_INT   (spec, i, sock_ntohs(sptr->sre_error));
3225 	    i = LOAD_INT   (spec, i, sptr->sre_assoc_id);
3226 
3227 #	    ifdef HAVE_STRUCT_SCTP_REMOTE_ERROR_SRE_DATA
3228 	    chunk = (char*) (&(sptr->sre_data));
3229 #	    else
3230 	    chunk = ((char*) &(sptr->sre_assoc_id))
3231 		+ sizeof(sptr->sre_assoc_id);
3232 #	    endif
3233 	    chlen = sptr->sre_length  - (chunk - (char *)sptr);
3234 	    i = sctp_parse_error_chunk(spec, i, chunk, chlen);
3235 
3236 	    i = LOAD_TUPLE (spec, i, 4);
3237 	    /* The {error, {...}} will be closed by the caller */
3238 	    break;
3239 	}
3240 
3241 	case SCTP_SEND_FAILED:
3242 	{   /* {sctp_send_failed,
3243 		DataSent	: Atom()	// true or false
3244 		Error		: Atom(),
3245 		OrigInfo	: Tuple(),
3246 		AssocID		: Int(),
3247 		OrigData	: Binary()
3248 	       }
3249 	       This is also an ERROR condition -- overwrite the 'ok':
3250 	    */
3251 	    char *chunk;
3252 	    int chlen, choff;
3253 	    struct sctp_send_failed * sptr = &(nptr->sn_send_failed);
3254 	    ASSERT(sptr->ssf_length <= sz);	/* No buffer overrun */
3255 
3256 	    /* Over-write 'ok' with 'error', continue from curr "i": */
3257 	    (void)LOAD_ATOM(spec, ok_pos, error_atom);
3258 
3259 	    i = LOAD_ATOM  (spec, i, am_sctp_send_failed);
3260 	    switch (sptr->ssf_flags) {
3261 	    case SCTP_DATA_SENT:
3262 		i = LOAD_ATOM (spec, i, am_true);
3263 		break;
3264 	    case SCTP_DATA_UNSENT:
3265 		i = LOAD_ATOM (spec, i, am_false);
3266 		break;
3267 	    default:
3268 		ASSERT(0);
3269 	    }
3270 	    i = LOAD_INT      (spec, i, sptr->ssf_error);
3271 	    /* Now parse the orig SCTP_SNDRCV info */
3272 	    i = sctp_parse_sndrcvinfo (spec, i, &sptr->ssf_info);
3273 	    i = LOAD_ASSOC_ID (spec, i, sptr->ssf_assoc_id);
3274 
3275 	    /* Load the orig data chunk, as an unparsed binary. Note that
3276 	       in LOAD_BINARY below, we must specify the offset wrt bin->
3277 	       orig_bytes. In Solaris 10, we don't have ssf_data:
3278 	    */
3279 #	    ifdef HAVE_STRUCT_SCTP_SEND_FAILED_SSF_DATA
3280 	    chunk = (char*) (&(sptr->ssf_data));
3281 #	    else
3282 	    chunk = ((char*) &(sptr->ssf_assoc_id))
3283 		+ sizeof(sptr->ssf_assoc_id);
3284 #	    endif
3285 	    chlen = sptr->ssf_length - (chunk - (char*) sptr);
3286 	    choff = chunk - bin->orig_bytes;
3287 
3288 	    i = LOAD_BINARY(spec, i, bin, choff, chlen);
3289 	    i = LOAD_TUPLE (spec, i, 6);
3290 	    /* The {error, {...}} tuple is not yet closed */
3291 	    break;
3292 	}
3293 
3294 	case SCTP_SHUTDOWN_EVENT:
3295 	{   /* {sctp_shutdown_event,
3296 		AssocID		: Int()
3297 	       }
3298 	    */
3299 	    struct sctp_shutdown_event * sptr = &(nptr->sn_shutdown_event);
3300 
3301 	    ASSERT (sptr->sse_length == sizeof(struct sctp_shutdown_event) &&
3302 		    sptr->sse_length <= sz);	/* No buffer overrun */
3303 
3304 	    i = LOAD_ATOM  (spec, i, am_sctp_shutdown_event);
3305 	    i = LOAD_INT   (spec, i, sptr->sse_assoc_id);
3306 	    i = LOAD_TUPLE (spec, i, 2);
3307 	    break;
3308 	}
3309 
3310 	case SCTP_ADAPTATION_INDICATION:
3311 	{   /* {sctp_adaptation_event,
3312 		Indication	: Atom(),
3313 		AssocID		: Int()
3314 	       }
3315 	    */
3316 	    struct sctp_adaptation_event * sptr =
3317 		&(nptr->sn_adaptation_event);
3318 	    ASSERT (sptr->sai_length == sizeof(struct sctp_adaptation_event)
3319 		    && sptr->sai_length <= sz);	/* No buffer overrun */
3320 
3321 	    i = LOAD_ATOM  (spec, i, am_sctp_adaptation_event);
3322 	    i = LOAD_INT   (spec, i, sock_ntohl(sptr->sai_adaptation_ind));
3323 	    i = LOAD_INT   (spec, i, sptr->sai_assoc_id);
3324 	    i = LOAD_TUPLE (spec, i, 3);
3325 	    break;
3326 	}
3327 
3328 	case SCTP_PARTIAL_DELIVERY_EVENT:
3329 	{   /* It is not clear  whether this event  is sent  to the sender
3330 		(when the receiver gets only a part of a message),   or to
3331 		the receiver itself.  In any case, we do not support partial
3332 		delivery of msgs in this implementation, so this is an error
3333 		condition:
3334 		{sctp_pdapi_event, sctp_partial_delivery_aborted, AssocID}:
3335 	    */
3336 	    struct sctp_pdapi_event * sptr;
3337 	    (void) LOAD_ATOM  (spec, ok_pos, error_atom);
3338 
3339 	    sptr = &(nptr->sn_pdapi_event);
3340 	    ASSERT (sptr->pdapi_length == sizeof(struct sctp_pdapi_event) &&
3341 		    sptr->pdapi_length <= sz);  /* No buffer overrun */
3342 
3343 	    i = LOAD_ATOM  (spec, i, am_sctp_pdapi_event);
3344 
3345 	    /* Currently, there is only one indication possible: */
3346 	    ASSERT (sptr->pdapi_indication == SCTP_PARTIAL_DELIVERY_ABORTED);
3347 
3348 	    i = LOAD_ATOM  (spec, i, am_partial_delivery_aborted);
3349 	    i = LOAD_INT   (spec, i, sptr->pdapi_assoc_id);
3350 	    i = LOAD_TUPLE (spec, i, 3);
3351 	    /* The {error, {...}} tuple is not yet closed */
3352 	    break;
3353 	}
3354 
3355 	/* XXX: No more supported SCTP Event types. The standard also provides
3356 	   SCTP_AUTHENTICATION_EVENT, but it is not implemented in the Linux
3357 	   kernel, hence not supported here either. It is not possible to
3358 	   request delivery of such events in this implementation, so they
3359 	   cannot occur:
3360 	*/
3361 	default:   ASSERT(0);
3362     }
3363     return i;
3364 }
3365 #endif  /* HAVE_SCTP */
3366 
3367 #ifndef __WIN32__
udp_parse_ancillary_data(ErlDrvTermData * spec,int i,struct msghdr * mptr)3368 static int udp_parse_ancillary_data(ErlDrvTermData *spec, int i,
3369                                 struct msghdr *mptr) {
3370     struct cmsghdr *cmsg;
3371     int n;
3372 
3373     n = 0;
3374     for (cmsg = CMSG_FIRSTHDR(mptr);
3375          cmsg != NULL;
3376          cmsg = CMSG_NXTHDR(mptr, cmsg)) {
3377         i = parse_ancillary_data_item(spec, i, cmsg, &n);
3378     }
3379     i = LOAD_NIL(spec, i);
3380     return LOAD_LIST(spec, i, n+1);
3381 }
3382 
compile_ancillary_data(struct msghdr * mhdr,char * ptr,ErlDrvSizeT anc_len)3383 static int compile_ancillary_data(struct msghdr *mhdr,
3384                                   char *ptr, ErlDrvSizeT anc_len) {
3385     struct cmsghdr *cmsg;
3386     size_t controllen = 0;
3387     cmsg = CMSG_FIRSTHDR(mhdr);
3388     for (;;) {
3389         if (anc_len == 0) {
3390             /* End of options to compile */
3391             mhdr->msg_controllen = controllen;
3392             return 0;
3393         }
3394         if (cmsg == NULL) {
3395             /* End of destination before end of options */
3396             return 1;
3397         }
3398 
3399 #define COMPILE_ANCILLARY_DATA_ITEM(Level, Opt, Type, Get, Size)        \
3400         do {                                                            \
3401             if (anc_len < (Size)) return 1;                             \
3402             sys_memset(cmsg, '\0', CMSG_SPACE(sizeof(Type)));           \
3403             cmsg->cmsg_level = Level;                                   \
3404             cmsg->cmsg_type = Opt;                                      \
3405             cmsg->cmsg_len = CMSG_LEN(sizeof(Type));                    \
3406             *((Type *) CMSG_DATA(cmsg)) = Get(ptr);                     \
3407             controllen += CMSG_SPACE(sizeof(Type));                     \
3408             cmsg = CMSG_NXTHDR(mhdr, cmsg);                             \
3409             ptr += 4;                                                   \
3410             anc_len -= 4;                                               \
3411         } while (0)
3412 #define SIZEOF_ANCILLARY_DATA (2 * CMSG_SPACE(sizeof(int)))
3413         /* (IP_TOS | IPV6_TCLASS) + IP_TTL */
3414 
3415         switch (anc_len--, *ptr++) {
3416         case INET_OPT_TOS: {
3417 #if defined(IPPROTO_IP) && defined(IP_TOS)
3418             COMPILE_ANCILLARY_DATA_ITEM(IPPROTO_IP, IP_TOS, int, get_int32, 4);
3419 #else
3420             return 1; /* Socket option not implemented */
3421 #endif
3422             break;
3423         }
3424         case INET_OPT_TTL: {
3425 #if defined(IPPROTO_IP) && defined(IP_TTL)
3426             COMPILE_ANCILLARY_DATA_ITEM(IPPROTO_IP, IP_TTL, int, get_int32, 4);
3427 #else
3428             return 1; /* Socket option not implemented */
3429 #endif
3430             break;
3431         }
3432         case INET_OPT_TCLASS: {
3433 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
3434             COMPILE_ANCILLARY_DATA_ITEM(IPPROTO_IPV6, IPV6_TCLASS, int, get_int32, 4);
3435 #else
3436             return 1; /* Socket option not implemented */
3437 #endif
3438             break;
3439         }
3440         default:
3441             /* Unknow socket option */
3442             return 1;
3443         }
3444 #undef COMPILE_ANCILLARY_DATA_ITEM
3445     }
3446 }
3447 #endif /* ifndef __WIN32__ */
3448 
3449 /*
3450 ** passive mode reply:
3451 ** for UDP:
3452 **        {inet_async, S, Ref, {ok, Data=[H1,...,Hsz | BinData]}}
3453 ** or (in the list mode)
3454 **	  {inet_async, S, Ref, {ok, Data=[H1,...,Hsz]}}
3455 **
3456 ** for SCTP:
3457 **	  {inet_async, S, Ref, {ok, {[H1,...,HSz], [AncilData], Data_OR_Event}}}
3458 ** where  each AncilDatum:Tuple();
3459 **	  Data:List() or Binary(), but if List(), then without the Addr part,
3460 **				   which is moved in front;
3461 **	  Event:Tuple();
3462 ** or
3463 ** 	  {inet_async, S, Ref, {error, {[H1,...,HSz], [AncilData], ErrorTerm}}}
3464 **
3465 ** Cf: the output of send_async_error() is
3466 **	  {inet_async, S, Ref, {error, Cause:Atom()}}
3467 */
3468 static int
inet_async_binary_data(inet_descriptor * desc,unsigned int phsz,ErlDrvBinary * bin,int offs,int len,void * mp)3469 inet_async_binary_data
3470 	(inet_descriptor* desc, unsigned  int phsz,
3471 	 ErlDrvBinary   * bin,  int offs, int len, void *mp)
3472 {
3473     unsigned int hsz = desc->hsz + phsz;
3474     ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN];
3475     ErlDrvTermData caller = desc->caller;
3476     int aid;
3477     int req;
3478     int i = 0;
3479 #ifdef HAVE_SCTP
3480     int ok_pos;
3481 #endif
3482 
3483     DEBUGF(("inet_async_binary_data(%ld): offs=%d, len=%d\r\n",
3484 	    (long)desc->port, offs, len));
3485 
3486     if (deq_async(desc, &aid, &caller, &req) < 0)
3487 	return -1;
3488 
3489     i = LOAD_ATOM(spec, i, am_inet_async);	/* 'inet_async' */
3490     i = LOAD_PORT(spec, i, desc->dport);	/* S		*/
3491     i = LOAD_INT (spec, i, aid);		/* Ref		*/
3492 
3493 #ifdef HAVE_SCTP
3494     /* Need to memoise the position of the 'ok' atom written, as it may
3495        later be overridden by an 'error': */
3496     ok_pos = i;
3497 #endif
3498     i = LOAD_ATOM(spec, i, am_ok);
3499 
3500 #ifdef HAVE_SCTP
3501     if (IS_SCTP(desc))
3502     {	/* For SCTP we always have desc->hsz==0 (i.e., no application-level
3503 	   headers are used), so hsz==phsz (see above): */
3504 	int sz;
3505         struct msghdr *mptr;
3506 
3507         mptr = mp;
3508 	ASSERT (hsz == phsz && hsz != 0);
3509 	sz = len - hsz;  /* Size of the msg data proper, w/o the addr */
3510 
3511 	/* We always put the Addr as a list in front */
3512 	i = LOAD_STRING(spec, i, bin->orig_bytes+offs, hsz);
3513 
3514 	/* Put in the list (possibly empty) of Ancillary Data: */
3515 	i = sctp_parse_ancillary_data (spec, i, mptr);
3516 
3517 	/* Then: Data or Event (Notification)? */
3518 	if (mptr->msg_flags & MSG_NOTIFICATION)
3519 	    /* This is an Event, parse it. It may indicate a normal or an error
3520 	       condition; in the latter case,   the 'ok' above is overridden by
3521 	       an 'error', and the Event we receive contains the error term: */
3522 	    i = sctp_parse_async_event
3523 		(spec, i, ok_pos, am_error, desc, bin, offs+hsz, sz);
3524         else
3525     	    /* This is SCTP data, not a notification event.   The data can be
3526 	       returned as a List or as a Binary, similar to the generic case:
3527 	    */
3528 	    if (desc->mode == INET_MODE_LIST)
3529 		/* INET_MODE_LIST   => [H1,H2,...Hn], addr and data together,
3530 		   butthe Addr has already been parsed, so start at offs+hsz:
3531 		*/
3532 		i = LOAD_STRING(spec, i, bin->orig_bytes+offs+hsz, sz);
3533 	    else
3534 	    	/* INET_MODE_BINARY => Binary */
3535 		i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
3536 
3537 	/* Close up the {[H1,...,HSz], [AncilData], Event_OR_Data} tuple. This
3538 	   is valid even in the case when Event is a error notification:  */
3539 	i = LOAD_TUPLE (spec, i, 3);
3540     }
3541     else
3542 #endif  /* HAVE_SCTP */
3543     {
3544         /* Generic case. Both Addr and Data
3545          * (or a single list of them together) are returned: */
3546 
3547         if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
3548             /* INET_MODE_LIST => [H1,H2,...Hn] */
3549             i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
3550         }
3551         else {
3552             /* INET_MODE_BINARY => [H1,H2,...HSz | Binary] or [Binary]: */
3553             int sz = len - hsz;
3554             i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
3555             if (hsz > 0)
3556                 i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
3557         }
3558 
3559 #ifndef __WIN32__
3560         if (mp) {
3561             /* We got ancillary data from an UDP recvmsg.
3562              * Insert an additional tuple level {[F|AddrData],AncData} */
3563             i = udp_parse_ancillary_data(spec, i, (struct msghdr*)mp);
3564             i = LOAD_TUPLE(spec, i, 2);
3565         }
3566 #endif
3567     }
3568 
3569     /* Close up the {ok, ...} or {error, ...} tuple: */
3570     i = LOAD_TUPLE(spec, i, 2);
3571 
3572     /* Close up the outer {inet_async, S, Ref, {ok|error, ...}} tuple: */
3573     i = LOAD_TUPLE(spec, i, 4);
3574 
3575     ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN);
3576     desc->caller = 0;
3577     return erl_drv_send_term(desc->dport, caller, spec, i);
3578 }
3579 
3580 /*
3581 ** active mode message:
3582 **        {tcp, S, [H1,...Hsz | Data]}
3583 */
tcp_message(inet_descriptor * desc,const char * buf,int len)3584 static int tcp_message(inet_descriptor* desc, const char* buf, int len)
3585 {
3586     unsigned int hsz = desc->hsz;
3587     ErlDrvTermData spec[20];
3588     int i = 0;
3589 
3590     DEBUGF(("tcp_message(%ld): len = %d\r\n", (long)desc->port, len));
3591     /* XXX fprintf(stderr,"tcp_message send.\r\n"); */
3592 
3593     i = LOAD_ATOM(spec, i, am_tcp);
3594     i = LOAD_PORT(spec, i, desc->dport);
3595 
3596     if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
3597 	i = LOAD_STRING(spec, i, buf, len); /* => [H1,H2,...Hn] */
3598 	i = LOAD_TUPLE(spec, i, 3);
3599 	ASSERT(i <= 20);
3600 	return erl_drv_output_term(desc->dport, spec, i);
3601     }
3602     else {
3603 	/* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */
3604 	int sz = len - hsz;
3605 	int code;
3606 
3607 	i = LOAD_BUF2BINARY(spec, i, buf+hsz, sz);
3608 	if (hsz > 0)
3609 	    i = LOAD_STRING_CONS(spec, i, buf, hsz);
3610 	i = LOAD_TUPLE(spec, i, 3);
3611 	ASSERT(i <= 20);
3612 	code = erl_drv_output_term(desc->dport, spec, i);
3613 	return code;
3614     }
3615 }
3616 
3617 /*
3618 ** active mode message:
3619 **        {tcp, S, [H1,...Hsz | Data]}
3620 */
3621 static int
tcp_binary_message(inet_descriptor * desc,ErlDrvBinary * bin,int offs,int len)3622 tcp_binary_message(inet_descriptor* desc, ErlDrvBinary* bin, int offs, int len)
3623 {
3624     unsigned int hsz = desc->hsz;
3625     ErlDrvTermData spec[20];
3626     int i = 0;
3627 
3628     DEBUGF(("tcp_binary_message(%ld): len = %d\r\n", (long)desc->port, len));
3629 
3630     i = LOAD_ATOM(spec, i, am_tcp);
3631     i = LOAD_PORT(spec, i, desc->dport);
3632 
3633     if ((desc->mode == INET_MODE_LIST) || (hsz > len)) {
3634 	/* INET_MODE_LIST => [H1,H2,...Hn] */
3635 	i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
3636     }
3637     else {
3638 	/* INET_MODE_BINARY => [H1,H2,...HSz | Binary] */
3639 	int sz = len - hsz;
3640 
3641 	i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
3642 	if (hsz > 0)
3643 	    i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
3644     }
3645     i = LOAD_TUPLE(spec, i, 3);
3646     ASSERT(i <= 20);
3647     return erl_drv_output_term(desc->dport, spec, i);
3648 }
3649 
3650 /*
3651 ** send:  active mode  {tcp_closed, S}
3652 */
tcp_closed_message(tcp_descriptor * desc)3653 static int tcp_closed_message(tcp_descriptor* desc)
3654 {
3655     ErlDrvTermData spec[6];
3656     int i = 0;
3657 
3658     DEBUGF(("tcp_closed_message(%ld):\r\n", (long)desc->inet.port));
3659     if (!(desc->tcp_add_flags & TCP_ADDF_CLOSE_SENT)) {
3660 	desc->tcp_add_flags |= TCP_ADDF_CLOSE_SENT;
3661 
3662 	i = LOAD_ATOM(spec, i, am_tcp_closed);
3663 	i = LOAD_PORT(spec, i, desc->inet.dport);
3664 	i = LOAD_TUPLE(spec, i, 2);
3665 	ASSERT(i <= 6);
3666 	return erl_drv_output_term(desc->inet.dport, spec, i);
3667     }
3668     return 0;
3669 }
3670 
3671 /*
3672 ** send active message {tcp_error, S, Error}
3673 */
tcp_error_message(tcp_descriptor * desc,int err)3674 static int tcp_error_message(tcp_descriptor* desc, int err)
3675 {
3676     ErlDrvTermData spec[8];
3677     ErlDrvTermData am_err = error_atom(err);
3678     int i = 0;
3679 
3680     DEBUGF(("tcp_error_message(%ld): %d\r\n", (long)desc->inet.port, err));
3681 
3682     i = LOAD_ATOM(spec, i, am_tcp_error);
3683     i = LOAD_PORT(spec, i, desc->inet.dport);
3684     i = LOAD_ATOM(spec, i, am_err);
3685     i = LOAD_TUPLE(spec, i, 3);
3686     ASSERT(i <= 8);
3687     return erl_drv_output_term(desc->inet.dport, spec, i);
3688 }
3689 
3690 #ifdef HAVE_UDP
3691 /*
3692 ** active mode message:
3693 **    {udp,  S, IP, Port, [H1,...Hsz | Data]} or
3694 **    {sctp, S, IP, Port, {[AncilData],  Event_or_Data}}
3695 ** where
3696 ** 	  [H1,...,HSz] are msg headers (without IP/Port, UDP only),
3697 **    [AddrLen, H2,...,HSz] are msg headers for UDP AF_UNIX only
3698 **	  Data  : List() | Binary()
3699 */
packet_binary_message(inet_descriptor * desc,ErlDrvBinary * bin,int offs,int len,void * mp)3700 static int packet_binary_message(inet_descriptor* desc,
3701                                  ErlDrvBinary* bin, int offs, int len,
3702                                  void *mp)
3703 {
3704     unsigned int hsz = desc->hsz;
3705     ErlDrvTermData spec [PACKET_ERL_DRV_TERM_DATA_LEN];
3706     int i = 0;
3707     int alen;
3708     char* data = bin->orig_bytes+offs;
3709 
3710     DEBUGF(("packet_binary_message(%ld): len = %d\r\n",
3711 	   (long)desc->port, len));
3712 #   ifdef HAVE_SCTP
3713     i = LOAD_ATOM(spec, i, IS_SCTP(desc) ? am_sctp : am_udp); /* UDP|SCTP */
3714 #   else
3715     i = LOAD_ATOM(spec, i, am_udp );			      /* UDP only */
3716 #   endif
3717     i = LOAD_PORT(spec, i, desc->dport);   		      /* S	  */
3718 
3719     alen = addrlen(data);
3720     i = load_address(spec, i, data);     /* IP,Port | Family,Addr */
3721 
3722     offs += alen;
3723     len  -= alen;
3724 
3725 #   ifdef HAVE_SCTP
3726     if (!IS_SCTP(desc))
3727 #   endif
3728     {
3729 #ifndef __WIN32__
3730         if (mp) i = udp_parse_ancillary_data(spec, i, (struct msghdr*)mp);
3731 #endif
3732         /* We got ancillary data from an UDP recvmsg.
3733          * Insert an additional tuple level {AncData,[F|AddrData]}
3734          */
3735 	if ((desc->mode == INET_MODE_LIST) || (hsz > len))
3736 	    /* INET_MODE_LIST, or only headers => [H1,H2,...Hn] */
3737 	    i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
3738 	else {
3739 	    /* INET_MODE_BINARY => [H1,H2,...HSz | Binary]	*/
3740 	    int sz = len - hsz;
3741 
3742 	    i = LOAD_BINARY(spec, i, bin, offs+hsz, sz);
3743 	    if (hsz > 0)
3744 		i = LOAD_STRING_CONS(spec, i, bin->orig_bytes+offs, hsz);
3745 	}
3746         /* Close up the outer 5-or-6-tuple */
3747 #ifndef __WIN32__
3748         if (mp) i = LOAD_TUPLE(spec, i, 6);
3749         else
3750 #endif
3751             i = LOAD_TUPLE(spec, i, 5);
3752     }
3753 #   ifdef HAVE_SCTP
3754     else
3755     {
3756         struct msghdr *mptr;
3757 
3758         mptr = mp;
3759 	/* For SCTP we always have desc->hsz==0 (i.e., no application-level
3760 	   headers are used): */
3761 	ASSERT(hsz == 0);
3762 
3763 	/* Put in the list (possibly empty) of Ancillary Data: */
3764 	i = sctp_parse_ancillary_data (spec, i, mptr);
3765 
3766 	/* Then: Data or Event (Notification)? */
3767 	if (mptr->msg_flags & MSG_NOTIFICATION)
3768 	    /* This is an Event, parse it. It may indicate a normal or an error
3769 	       condition; in the latter case,  the initial 'sctp' atom is over-
3770 	       ridden by 'sctp_error',   and the Event we receive contains the
3771 	       error term: */
3772 	    i = sctp_parse_async_event
3773 		(spec, i, 0, am_sctp_error, desc, bin, offs, len);
3774         else
3775     	    /* This is SCTP data, not a notification event.   The data can be
3776 	       returned as a List or as a Binary, similar to the generic case:
3777 	    */
3778 	    if (desc->mode == INET_MODE_LIST)
3779 		/* INET_MODE_LIST   => [H1,H2,...Hn], addr and data together,
3780 		   but the Addr has already been parsed, so start at offs:
3781 		*/
3782 		i = LOAD_STRING(spec, i, bin->orig_bytes+offs, len);
3783 	    else
3784 	    	/* INET_MODE_BINARY => Binary */
3785 		i = LOAD_BINARY(spec, i, bin, offs, len);
3786 
3787 	/* Close up the {[AncilData], Event_OR_Data} tuple: */
3788 	i = LOAD_TUPLE (spec, i, 2);
3789         /* Close up the outer 5-tuple: */
3790         i = LOAD_TUPLE(spec, i, 5);
3791     }
3792 #   endif /* HAVE_SCTP */
3793 
3794     ASSERT(i <= PACKET_ERL_DRV_TERM_DATA_LEN);
3795     return erl_drv_output_term(desc->dport, spec, i);
3796 }
3797 #endif
3798 
3799 /*
3800 ** active mode message: send active-to-passive transition message
3801 **        {tcp_passive, S} or
3802 **        {udp_passive, S} or
3803 **        {sctp_passive, S}
3804 */
packet_passive_message(inet_descriptor * desc)3805  static int packet_passive_message(inet_descriptor* desc)
3806  {
3807      ErlDrvTermData spec[6];
3808      int i = 0;
3809 
3810      DEBUGF(("packet_passive_message(%ld):\r\n", (long)desc->port));
3811 
3812 #if !defined(HAVE_UDP) && !defined(HAVE_SCTP)
3813      i = LOAD_ATOM(spec, i, am_tcp_passive);
3814 #else
3815      if (desc->sprotocol == IPPROTO_TCP)
3816          i = LOAD_ATOM(spec, i, am_tcp_passive);
3817      else {
3818 #ifdef HAVE_SCTP
3819          i = LOAD_ATOM(spec, i, IS_SCTP(desc) ? am_sctp_passive : am_udp_passive);
3820 #else
3821          i = LOAD_ATOM(spec, i, am_udp_passive);
3822 #endif
3823      }
3824 #endif
3825      i = LOAD_PORT(spec, i, desc->dport);
3826      i = LOAD_TUPLE(spec, i, 2);
3827      ASSERT(i <= 6);
3828      return erl_drv_output_term(desc->dport, spec, i);
3829  }
3830 
3831 #ifdef HAVE_UDP
3832 /*
3833 ** send active message {udp_error|sctp_error, S, Error}
3834 */
packet_error_message(udp_descriptor * udesc,int err)3835 static int packet_error_message(udp_descriptor* udesc, int err)
3836 {
3837     inet_descriptor* desc = INETP(udesc);
3838     ErlDrvTermData spec[2*LOAD_ATOM_CNT + LOAD_PORT_CNT + LOAD_TUPLE_CNT];
3839     ErlDrvTermData am_err = error_atom(err);
3840     int i = 0;
3841 
3842     DEBUGF(("packet_error_message(%ld): %d\r\n",
3843 	   (long)desc->port, err));
3844 
3845 #   ifdef HAVE_SCTP
3846     if (IS_SCTP(desc) )
3847     	i = LOAD_ATOM(spec, i, am_sctp_error);
3848     else
3849 #   endif
3850 	i = LOAD_ATOM(spec, i, am_udp_error);
3851 
3852     i = LOAD_PORT(spec, i, desc->dport);
3853     i = LOAD_ATOM(spec, i, am_err);
3854     i = LOAD_TUPLE(spec, i, 3);
3855     ASSERT(i == sizeof(spec)/sizeof(*spec));
3856     return erl_drv_output_term(desc->dport, spec, i);
3857 }
3858 #endif
3859 
3860 /*
3861 ** active=TRUE:
3862 **  (NOTE! distribution MUST use active=TRUE, deliver=PORT)
3863 **       deliver=PORT  {S, {data, [H1,..Hsz | Data]}}
3864 **       deliver=TERM  {tcp, S, [H1..Hsz | Data]}
3865 **
3866 ** active=FALSE:
3867 **       {async, S, Ref, {ok,[H1,...Hsz | Data]}}
3868 */
tcp_reply_data(tcp_descriptor * desc,char * buf,int len)3869 static int tcp_reply_data(tcp_descriptor* desc, char* buf, int len)
3870 {
3871     int code;
3872     const char* body = buf;
3873     int bodylen = len;
3874 
3875     packet_get_body(desc->inet.htype, &body, &bodylen);
3876 
3877     if (desc->inet.deliver == INET_DELIVER_PORT) {
3878         code = inet_port_data(INETP(desc), body, bodylen);
3879     }
3880     else if ((code=packet_parse(desc->inet.htype, buf, len,
3881                                 &desc->http_state, &packet_callbacks,
3882                                 desc)) == 0) {
3883         /* No body parsing, return raw binary */
3884         if (desc->inet.active == INET_PASSIVE)
3885             return inet_async_data(INETP(desc), body, bodylen);
3886         else
3887             code = tcp_message(INETP(desc), body, bodylen);
3888     }
3889 
3890     if (code < 0)
3891 	return code;
3892     INET_CHECK_ACTIVE_TO_PASSIVE(INETP(desc));
3893     return code;
3894 }
3895 
3896 static int
tcp_reply_binary_data(tcp_descriptor * desc,ErlDrvBinary * bin,int offs,int len)3897 tcp_reply_binary_data(tcp_descriptor* desc, ErlDrvBinary* bin, int offs, int len)
3898 {
3899     int code;
3900     const char* buf = bin->orig_bytes + offs;
3901     const char* body = buf;
3902     int bodylen = len;
3903 
3904     packet_get_body(desc->inet.htype, &body, &bodylen);
3905     offs = body - bin->orig_bytes; /* body offset now */
3906 
3907     if (desc->inet.deliver == INET_DELIVER_PORT)
3908         code = inet_port_binary_data(INETP(desc), bin, offs, bodylen);
3909     else if ((code=packet_parse(desc->inet.htype, buf, len, &desc->http_state,
3910                                      &packet_callbacks,desc)) == 0) {
3911         /* No body parsing, return raw data */
3912         if (desc->inet.active == INET_PASSIVE)
3913             return inet_async_binary_data(INETP(desc), 0, bin, offs, bodylen, NULL);
3914         else
3915             code = tcp_binary_message(INETP(desc), bin, offs, bodylen);
3916     }
3917     if (code < 0)
3918 	return code;
3919     INET_CHECK_ACTIVE_TO_PASSIVE(INETP(desc));
3920     return code;
3921 }
3922 
3923 #ifdef HAVE_UDP
3924 static int
packet_reply_binary_data(inet_descriptor * desc,unsigned int hsz,ErlDrvBinary * bin,int offs,int len,void * mp)3925 packet_reply_binary_data(inet_descriptor* desc, unsigned  int hsz,
3926 			 ErlDrvBinary   * bin,  int offs, int len,
3927                          void *mp)
3928 {
3929     int code;
3930 
3931     if (desc->active == INET_PASSIVE)
3932 	/* "inet" is actually for both UDP and SCTP, as well as TCP! */
3933 	return inet_async_binary_data(desc, hsz, bin, offs, len, mp);
3934     else
3935     {	/* INET_ACTIVE or INET_ONCE: */
3936 	if (desc->deliver == INET_DELIVER_PORT)
3937 	    code = inet_port_binary_data(desc, bin, offs, len);
3938 	else
3939 	    code = packet_binary_message(desc, bin, offs, len, mp);
3940 	if (code < 0)
3941 	    return code;
3942         INET_CHECK_ACTIVE_TO_PASSIVE(desc);
3943 	return code;
3944     }
3945 }
3946 #endif
3947 
3948 /* ----------------------------------------------------------------------------
3949 
3950    INET
3951 
3952 ---------------------------------------------------------------------------- */
3953 
3954 static int
sock_init(void)3955 sock_init(void) /* May be called multiple times. */
3956 {
3957 #ifdef __WIN32__
3958     WORD wVersionRequested;
3959     WSADATA wsaData;
3960     static int res = -1; /* res < 0 == initialization never attempted */
3961 
3962     if (res >= 0)
3963 	return res;
3964 
3965     wVersionRequested = MAKEWORD(2,0);
3966     if (WSAStartup(wVersionRequested, &wsaData) != 0)
3967 	goto error;
3968 
3969     if ((LOBYTE(wsaData.wVersion) != 2) || (HIBYTE(wsaData.wVersion) != 0))
3970 	goto error;
3971 
3972     find_dynamic_functions();
3973 
3974     return res = 1;
3975 
3976  error:
3977 
3978     WSACleanup();
3979     return res = 0;
3980 #else
3981     return 1;
3982 #endif
3983 }
3984 
3985 #ifdef HAVE_SCTP
inet_init_sctp(void)3986 static void inet_init_sctp(void) {
3987     INIT_ATOM(sctp);
3988     INIT_ATOM(sctp_passive);
3989     INIT_ATOM(sctp_error);
3990     INIT_ATOM(true);
3991     INIT_ATOM(false);
3992     INIT_ATOM(buffer);
3993     INIT_ATOM(mode);
3994     INIT_ATOM(list);
3995     INIT_ATOM(binary);
3996     INIT_ATOM(active);
3997     INIT_ATOM(once);
3998     INIT_ATOM(multi);
3999     INIT_ATOM(buffer);
4000     INIT_ATOM(linger);
4001     INIT_ATOM(recbuf);
4002     INIT_ATOM(sndbuf);
4003     INIT_ATOM(reuseaddr);
4004     INIT_ATOM(dontroute);
4005     INIT_ATOM(priority);
4006     INIT_ATOM(recvtos);
4007     INIT_ATOM(recvtclass);
4008     INIT_ATOM(recvttl);
4009     INIT_ATOM(ipv6_v6only);
4010     INIT_ATOM(netns);
4011     INIT_ATOM(bind_to_device);
4012 
4013     /* Option names */
4014     INIT_ATOM(sctp_rtoinfo);
4015     INIT_ATOM(sctp_associnfo);
4016     INIT_ATOM(sctp_initmsg);
4017     INIT_ATOM(sctp_autoclose);
4018     INIT_ATOM(sctp_nodelay);
4019     INIT_ATOM(sctp_disable_fragments);
4020     INIT_ATOM(sctp_i_want_mapped_v4_addr);
4021     INIT_ATOM(sctp_maxseg);
4022     INIT_ATOM(sctp_set_peer_primary_addr);
4023     INIT_ATOM(sctp_primary_addr);
4024     INIT_ATOM(sctp_adaptation_layer);
4025     INIT_ATOM(sctp_peer_addr_params);
4026     INIT_ATOM(sctp_default_send_param);
4027     INIT_ATOM(sctp_events);
4028     INIT_ATOM(sctp_delayed_ack_time);
4029     INIT_ATOM(sctp_status);
4030     INIT_ATOM(sctp_get_peer_addr_info);
4031 
4032     /* Record names */
4033     INIT_ATOM(sctp_sndrcvinfo);
4034     INIT_ATOM(sctp_assoc_change);
4035     INIT_ATOM(sctp_paddr_change);
4036     INIT_ATOM(sctp_remote_error);
4037     INIT_ATOM(sctp_send_failed);
4038     INIT_ATOM(sctp_shutdown_event);
4039     INIT_ATOM(sctp_adaptation_event);
4040     INIT_ATOM(sctp_pdapi_event);
4041     INIT_ATOM(sctp_assocparams);
4042     INIT_ATOM(sctp_prim);
4043     INIT_ATOM(sctp_setpeerprim);
4044     INIT_ATOM(sctp_setadaptation);
4045     INIT_ATOM(sctp_paddrparams);
4046     INIT_ATOM(sctp_event_subscribe);
4047     INIT_ATOM(sctp_assoc_value);
4048     INIT_ATOM(sctp_paddrinfo);
4049 
4050     /* For #sctp_sndrcvinfo{}: */
4051     INIT_ATOM(unordered);
4052     INIT_ATOM(addr_over);
4053     INIT_ATOM(abort);
4054     INIT_ATOM(eof);
4055 
4056     /* For #sctp_assoc_change{}: */
4057     INIT_ATOM(comm_up);
4058     INIT_ATOM(comm_lost);
4059     INIT_ATOM(restart);
4060     INIT_ATOM(shutdown_comp);
4061     INIT_ATOM(cant_assoc);
4062 
4063     /* For #sctp_paddr_change{}: */
4064     INIT_ATOM(addr_available);
4065     INIT_ATOM(addr_unreachable);
4066     INIT_ATOM(addr_removed);
4067     INIT_ATOM(addr_added);
4068     INIT_ATOM(addr_made_prim);
4069     INIT_ATOM(addr_confirmed);
4070 
4071     INIT_ATOM(short_recv);
4072     INIT_ATOM(wrong_anc_data);
4073 
4074     /* For #sctp_pdap_event{}: */
4075     INIT_ATOM(partial_delivery_aborted);
4076 
4077     /* For #sctp_paddrparams{}: */
4078     INIT_ATOM(hb_enable);
4079     INIT_ATOM(hb_disable);
4080     INIT_ATOM(hb_demand);
4081     INIT_ATOM(pmtud_enable);
4082     INIT_ATOM(pmtud_disable);
4083     INIT_ATOM(sackdelay_enable);
4084     INIT_ATOM(sackdelay_disable);
4085 
4086     /* For #sctp_paddrinfo{}: */
4087     INIT_ATOM(active);
4088     INIT_ATOM(inactive);
4089 #    if HAVE_DECL_SCTP_UNCONFIRMED
4090     INIT_ATOM(unconfirmed);
4091 #    endif
4092 
4093     /* For #sctp_status{}: */
4094 #    if HAVE_DECL_SCTP_EMPTY
4095     INIT_ATOM(empty);
4096 #    endif
4097 #    if HAVE_DECL_SCTP_BOUND
4098     INIT_ATOM(bound);
4099 #    endif
4100 #    if HAVE_DECL_SCTP_LISTEN
4101     INIT_ATOM(listen);
4102 #    endif
4103     INIT_ATOM(cookie_wait);
4104     INIT_ATOM(cookie_echoed);
4105     INIT_ATOM(established);
4106     INIT_ATOM(shutdown_pending);
4107     INIT_ATOM(shutdown_sent);
4108     INIT_ATOM(shutdown_received);
4109     INIT_ATOM(shutdown_ack_sent);
4110 }
4111 #endif /* HAVE_SCTP */
4112 
inet_init()4113 static int inet_init()
4114 {
4115     if (!sock_init())
4116 	goto error;
4117 
4118     if (0 != erl_drv_tsd_key_create("inet_buffer_stack_key", &buffer_stack_key))
4119 	goto error;
4120 
4121     ERTS_CT_ASSERT(sizeof(struct in_addr) == 4);
4122 #   if defined(HAVE_IN6) && defined(AF_INET6)
4123     ERTS_CT_ASSERT(sizeof(struct in6_addr) == 16);
4124 #   endif
4125 
4126     INIT_ATOM(ok);
4127     INIT_ATOM(undefined);
4128     INIT_ATOM(unspec);
4129     INIT_ATOM(tcp);
4130 #ifdef HAVE_UDP
4131     INIT_ATOM(udp);
4132 #endif
4133     INIT_ATOM(error);
4134     INIT_ATOM(einval);
4135     INIT_ATOM(inet_async);
4136     INIT_ATOM(inet_reply);
4137     INIT_ATOM(timeout);
4138     INIT_ATOM(closed);
4139     INIT_ATOM(tcp_passive);
4140     INIT_ATOM(tcp_closed);
4141     INIT_ATOM(tcp_error);
4142 #ifdef HAVE_UDP
4143     INIT_ATOM(udp_passive);
4144     INIT_ATOM(udp_error);
4145 #endif
4146 #ifdef HAVE_SYS_UN_H
4147     INIT_ATOM(local);
4148 #endif
4149     INIT_ATOM(empty_out_q);
4150     INIT_ATOM(ssl_tls);
4151 #ifndef __WIN32__
4152     INIT_ATOM(tos);
4153     INIT_ATOM(tclass);
4154     INIT_ATOM(ttl);
4155 #endif
4156 
4157     INIT_ATOM(http_eoh);
4158     INIT_ATOM(http_header);
4159     INIT_ATOM(http_request);
4160     INIT_ATOM(http_response);
4161     INIT_ATOM(http_error);
4162     INIT_ATOM(abs_path);
4163     INIT_ATOM(absoluteURI);
4164     am_star = driver_mk_atom("*");
4165     INIT_ATOM(http);
4166     INIT_ATOM(https);
4167     INIT_ATOM(scheme);
4168 
4169 #ifdef HAVE_SENDFILE
4170     INIT_ATOM(sendfile);
4171 #endif
4172 
4173     /* add TCP, UDP and SCTP drivers */
4174     add_driver_entry(&tcp_inet_driver_entry);
4175 #ifdef HAVE_UDP
4176     add_driver_entry(&udp_inet_driver_entry);
4177 #endif
4178 
4179 #ifdef HAVE_SCTP
4180     /* Check the size of SCTP AssocID -- currently both this driver and the
4181        Erlang part require 32 bit: */
4182     ERTS_CT_ASSERT(sizeof(sctp_assoc_t)==ASSOC_ID_LEN);
4183 #   if defined(HAVE_SCTP_BINDX)
4184     p_sctp_bindx = sctp_bindx;
4185 #     if defined(HAVE_SCTP_PEELOFF)
4186     p_sctp_peeloff = sctp_peeloff;
4187 #     else
4188     p_sctp_peeloff = NULL;
4189 #     endif
4190 #     if defined(HAVE_SCTP_GETLADDRS) && defined(HAVE_SCTP_FREELADDRS)
4191     p_sctp_getladdrs = sctp_getladdrs;
4192     p_sctp_freeladdrs = sctp_freeladdrs;
4193 #     else
4194     p_sctp_getladdrs = NULL;
4195     p_sctp_freeladdrs = NULL;
4196 #     endif
4197 #     if defined(HAVE_SCTP_GETPADDRS) && defined(HAVE_SCTP_FREEPADDRS)
4198     p_sctp_getpaddrs = sctp_getpaddrs;
4199     p_sctp_freepaddrs = sctp_freepaddrs;
4200 #     else
4201     p_sctp_getpaddrs = NULL;
4202     p_sctp_freepaddrs = NULL;
4203 #     endif
4204     inet_init_sctp();
4205     add_driver_entry(&sctp_inet_driver_entry);
4206 #   else
4207 #       ifndef LIBSCTP
4208 #           error LIBSCTP not defined
4209 #       endif
4210     {
4211 	static void *h_libsctp = NULL;
4212 
4213 	if (erts_sys_ddll_open_noext(STRINGIFY(LIBSCTP), &h_libsctp, NULL)
4214 	    == 0) {
4215 	    void *ptr;
4216 	    if (erts_sys_ddll_sym(h_libsctp, "sctp_bindx", &ptr) == 0) {
4217 		p_sctp_bindx = ptr;
4218 		if (erts_sys_ddll_sym(h_libsctp, "sctp_peeloff", &ptr) == 0) {
4219 		    p_sctp_peeloff = ptr;
4220 		}
4221 		else p_sctp_peeloff = NULL;
4222 		if (erts_sys_ddll_sym(h_libsctp, "sctp_getladdrs", &ptr) == 0) {
4223 		    p_sctp_getladdrs = ptr;
4224 		}
4225 		else p_sctp_getladdrs = NULL;
4226 		if (erts_sys_ddll_sym(h_libsctp, "sctp_freeladdrs", &ptr) == 0) {
4227 		    p_sctp_freeladdrs = ptr;
4228 		}
4229 		else {
4230 		    p_sctp_freeladdrs = NULL;
4231 		    p_sctp_getladdrs = NULL;
4232 		}
4233 		if (erts_sys_ddll_sym(h_libsctp, "sctp_getpaddrs", &ptr) == 0) {
4234 		    p_sctp_getpaddrs = ptr;
4235 		}
4236 		else p_sctp_getpaddrs = NULL;
4237 		if (erts_sys_ddll_sym(h_libsctp, "sctp_freepaddrs", &ptr) == 0) {
4238 		    p_sctp_freepaddrs = ptr;
4239 		}
4240 		else {
4241 		    p_sctp_freepaddrs = NULL;
4242 		    p_sctp_getpaddrs = NULL;
4243 		}
4244 		inet_init_sctp();
4245 		add_driver_entry(&sctp_inet_driver_entry);
4246 	    }
4247 	    else p_sctp_bindx = NULL;
4248 	}
4249     }
4250 #   endif
4251 #endif
4252 
4253     /* remove the dummy inet driver */
4254     remove_driver_entry(&inet_driver_entry);
4255     return 0;
4256 
4257  error:
4258     remove_driver_entry(&inet_driver_entry);
4259     return -1;
4260 }
4261 
4262 
4263 /*
4264 ** Set an inaddr structure:
4265 **  *src = [P1,P0,X1,X2,.....]
4266 **  dst points to a structure large enugh to keep any kind
4267 **  of inaddr.
4268 ** *len is set to length of src on call
4269 ** and is set to actual length of dst on return
4270 ** return NULL if ok or ptr to errno string for error
4271 */
inet_set_address(int family,inet_address * dst,char ** src,ErlDrvSizeT * len)4272 static char* inet_set_address(int family, inet_address* dst,
4273 			      char* *src, ErlDrvSizeT* len)
4274 {
4275     short port;
4276 
4277     switch (family) {
4278     case AF_INET: {
4279         if (*len < 2+4) return str_einval;
4280 	sys_memzero((char*)dst, sizeof(struct sockaddr_in));
4281 	port = get_int16(*src);
4282 #ifndef NO_SA_LEN
4283 	dst->sai.sin_len    = sizeof(struct sockaddr_in);
4284 #endif
4285 	dst->sai.sin_family = family;
4286 	dst->sai.sin_port   = sock_htons(port);
4287 	sys_memcpy(&dst->sai.sin_addr, (*src)+2, 4);
4288 	*len = sizeof(struct sockaddr_in);
4289 	*src += 2 + 4;
4290 	return NULL;
4291     }
4292 #if defined(HAVE_IN6) && defined(AF_INET6)
4293     case AF_INET6: {
4294         if (*len < 2+16) return str_einval;
4295 	sys_memzero((char*)dst, sizeof(struct sockaddr_in6));
4296 	port = get_int16(*src);
4297 #ifndef NO_SA_LEN
4298 	dst->sai6.sin6_len    = sizeof(struct sockaddr_in6);
4299 #endif
4300 	dst->sai6.sin6_family = family;
4301 	dst->sai6.sin6_port   = sock_htons(port);
4302 	dst->sai6.sin6_flowinfo = 0;   /* XXX this may be set as well ?? */
4303 	sys_memcpy(&dst->sai6.sin6_addr, (*src)+2, 16);
4304 	*len = sizeof(struct sockaddr_in6);
4305 	*src += 2 + 16;
4306 	return NULL;
4307     }
4308 #endif
4309 #ifdef HAVE_SYS_UN_H
4310     case AF_UNIX: {
4311         int n;
4312         if (*len == 0) return str_einval;
4313 	n = *((unsigned char*)(*src)); /* Length field */
4314 	if (*len < 1+n) return str_einval;
4315 	if (n +
4316 #ifdef __linux__
4317             /* Make sure the address gets zero terminated
4318              * except when the first byte is \0 because then it is
4319              * sort of zero terminated although the zero termination
4320              * comes before the address...
4321              * This fix handles Linux's nonportable
4322              * abstract socket address extension.
4323              */
4324             ((*len) > 1 && (*src)[1] == '\0' ? 0 : 1)
4325 #else
4326             1
4327 #endif
4328             > sizeof(dst->sal.sun_path)) {
4329 	    return str_einval;
4330 	}
4331 	sys_memzero((char*)dst, sizeof(struct sockaddr_un));
4332 	dst->sal.sun_family = family;
4333 	sys_memcpy(dst->sal.sun_path, (*src)+1, n);
4334 	*len = offsetof(struct sockaddr_un, sun_path) + n;
4335 #ifndef NO_SA_LEN
4336         dst->sal.sun_len = *len;
4337 #endif
4338 	*src += 1 + n;
4339 	return NULL;
4340     }
4341 #endif
4342     }
4343     return str_eafnosupport;
4344 }
4345 
4346 /*
4347 ** Set an inaddr structure, address family comes from source data,
4348 ** or from argument if source data specifies constant address.
4349 **
4350 ** *src = [TAG,P1,P0]
4351 **            when TAG = INET_AF_ANY  | INET_AF_LOOPBACK
4352 ** *src = [TAG,P1,P0,X1,X2,...]
4353 **            when TAG = INET_AF_INET | INET_AF_INET6 | INET_AF_LOCAL
4354 ** *src = [TAG,Len,...]
4355 **            when TAG = INET_AF_LOCAL
4356 */
inet_set_faddress(int family,inet_address * dst,char ** src,ErlDrvSizeT * len)4357 static char *inet_set_faddress(int family, inet_address* dst,
4358 			       char* *src, ErlDrvSizeT* len) {
4359     int tag;
4360 
4361     if (*len < 1) return str_einval;
4362     (*len) --;
4363     tag = *((*src) ++);
4364     switch (tag) {
4365     case INET_AF_INET:
4366 	family = AF_INET;
4367 	break;
4368 #   if defined(HAVE_IN6) && defined(AF_INET6)
4369     case INET_AF_INET6:
4370 	family = AF_INET6;
4371 	break;
4372 #   endif
4373 #   ifdef HAVE_SYS_UN_H
4374     case INET_AF_LOCAL: {
4375         family = AF_UNIX;
4376         break;
4377     }
4378 #   endif
4379     case INET_AF_ANY:
4380     case INET_AF_LOOPBACK: {
4381 	int port;
4382 
4383 	if (*len < 2) return str_einval;
4384 	port = get_int16(*src);
4385 	switch (family) {
4386 	case AF_INET: {
4387 	    struct in_addr addr;
4388 	    switch (tag) {
4389 	    case INET_AF_ANY:
4390 		addr.s_addr = sock_htonl(INADDR_ANY);
4391 		break;
4392 	    case INET_AF_LOOPBACK:
4393 		addr.s_addr = sock_htonl(INADDR_LOOPBACK);
4394 		break;
4395 	    default:
4396 		return str_einval;
4397 	    }
4398 	    sys_memzero((char*)dst, sizeof(struct sockaddr_in));
4399 #ifndef NO_SA_LEN
4400 	    dst->sai.sin_len         = sizeof(struct sockaddr_in6);
4401 #endif
4402 	    dst->sai.sin_family      = family;
4403 	    dst->sai.sin_port        = sock_htons(port);
4404 	    dst->sai.sin_addr.s_addr = addr.s_addr;
4405 	    *len = sizeof(struct sockaddr_in);
4406 	}   break;
4407 #       if defined(HAVE_IN6) && defined(AF_INET6)
4408 	case AF_INET6: {
4409 	    const struct in6_addr* paddr;
4410 	    switch (tag) {
4411 	    case INET_AF_ANY:
4412 		paddr = &in6addr_any;
4413 		break;
4414 	    case INET_AF_LOOPBACK:
4415 		paddr = &in6addr_loopback;
4416 		break;
4417 	    default:
4418 		return str_einval;
4419 	    }
4420 	    sys_memzero((char*)dst, sizeof(struct sockaddr_in6));
4421 #ifndef NO_SA_LEN
4422 	    dst->sai6.sin6_len    = sizeof(struct sockaddr_in6);
4423 #endif
4424 	    dst->sai6.sin6_family = family;
4425 	    dst->sai6.sin6_port   = sock_htons(port);
4426 	    dst->sai6.sin6_flowinfo = 0;   /* XXX this may be set as well ?? */
4427 	    dst->sai6.sin6_addr = *paddr;
4428 	    *len = sizeof(struct sockaddr_in6);
4429 	}   break;
4430 #       endif
4431 	default:
4432 	    return str_einval;
4433 	}
4434 	*src += 2;
4435 	return NULL;
4436     }   break;
4437     default:
4438 	return str_eafnosupport;
4439     }
4440     return inet_set_address(family, dst, src, len);
4441 }
4442 
4443 /* Get a inaddr structure
4444 ** src = inaddr structure
4445 ** dst is filled with [F,P1,P0,X1,....]
4446 ** *len is the length of structure
4447 ** where F is the family code (coded)
4448 ** and *len is the length of dst on return
4449 ** (suitable to deliver to erlang)
4450 */
inet_get_address(char * dst,inet_address * src,unsigned int * len)4451 static int inet_get_address(char* dst, inet_address* src, unsigned int* len)
4452 {
4453     /* Compare the code with inet_address_to_erlang() */
4454     int family;
4455     short port;
4456 
4457     family = src->sa.sa_family;
4458     if ((family == AF_INET) && (*len >= sizeof(struct sockaddr_in))) {
4459 	dst[0] = INET_AF_INET;
4460 	port = sock_ntohs(src->sai.sin_port);
4461 	put_int16(port, dst+1);
4462 	sys_memcpy(dst+3, (char*)&src->sai.sin_addr, sizeof(struct in_addr));
4463 	*len = 3 + sizeof(struct in_addr);
4464 	return 0;
4465     }
4466 #if defined(HAVE_IN6) && defined(AF_INET6)
4467     else if ((family == AF_INET6) && (*len >= sizeof(struct sockaddr_in6))) {
4468 	dst[0] = INET_AF_INET6;
4469 	port = sock_ntohs(src->sai6.sin6_port);
4470 	put_int16(port, dst+1);
4471 	sys_memcpy(dst+3, (char*)&src->sai6.sin6_addr,sizeof(struct in6_addr));
4472 	*len = 3 + sizeof(struct in6_addr);
4473 	return 0;
4474     }
4475 #endif
4476 #ifdef HAVE_SYS_UN_H
4477     else if (family == AF_UNIX) {
4478         size_t n, m;
4479         if (*len < offsetof(struct sockaddr_un, sun_path)) return -1;
4480         n = *len - offsetof(struct sockaddr_un, sun_path);
4481         if (255 < n) return -1;
4482         m = my_strnlen(src->sal.sun_path, n);
4483 #ifdef __linux__
4484 	/* Assume that the address is a zero terminated string,
4485          * except when the first byte is \0 i.e the string length is 0,
4486          * then use the reported length instead.
4487 	 * This fix handles Linux's nonportable
4488          * abstract socket address extension.
4489 	 */
4490 	if (m == 0)  m = n;
4491 #endif
4492         dst[0] = INET_AF_LOCAL;
4493         dst[1] = (char) ((unsigned char) m);
4494         sys_memcpy(dst+2, src->sal.sun_path, m);
4495         *len = 1 + 1 + m;
4496         return 0;
4497       }
4498 #endif
4499     else if (family == AF_UNSPEC) {
4500         dst[0] = INET_AF_UNSPEC;
4501 	*len = 1;
4502     }
4503     else {
4504         dst[0] = INET_AF_UNDEFINED;
4505 	*len = 1;
4506     }
4507     return -1;
4508 }
4509 
4510 /* Same as the above, but take family from the address structure,
4511 ** and advance the address pointer to the next address
4512 ** according to the size of the current,
4513 ** and return the resulting encoded size
4514 */
4515 static int
inet_address_to_erlang(char * dst,inet_address ** src,SOCKLEN_T sz)4516 inet_address_to_erlang(char *dst, inet_address **src, SOCKLEN_T sz) {
4517     /* Compare the code with inet_get_address() */
4518     short port;
4519 
4520     switch ((*src)->sa.sa_family) {
4521     case AF_INET:
4522 	if (dst) {
4523 	    dst[0] = INET_AF_INET;
4524 	    port = sock_ntohs((*src)->sai.sin_port);
4525 	    put_int16(port, dst+1);
4526 	    sys_memcpy(dst+1+2, (char *) &(*src)->sai.sin_addr, 4);
4527 	}
4528 	(*src) = (inet_address *) (&(*src)->sai + 1);
4529 	return 1 + 2 + 4;
4530 #if defined(HAVE_IN6) && defined(AF_INET6)
4531     case AF_INET6:
4532 	if (dst) {
4533 	    dst[0] = INET_AF_INET6;
4534 	    port = sock_ntohs((*src)->sai6.sin6_port);
4535 	    put_int16(port, dst+1);
4536             VALGRIND_MAKE_MEM_DEFINED(&(*src)->sai6.sin6_addr,16); /* false undefs from syscall sctp_get[lp]addrs */
4537 	    sys_memcpy(dst+1+2, (char *) &(*src)->sai6.sin6_addr, 16);
4538 	}
4539 	(*src) = (inet_address *) (&(*src)->sai6 + 1);
4540 	return 1 + 2 + 16;
4541 #endif
4542 #ifdef HAVE_SYS_UN_H
4543     case AF_UNIX: {
4544         size_t n, m;
4545 	if (sz < offsetof(struct sockaddr_un, sun_path)) return -1;
4546 	n = sz - offsetof(struct sockaddr_un, sun_path);
4547 	if (255 < n) return -1;
4548         m = my_strnlen((*src)->sal.sun_path, n);
4549 #ifdef __linux__
4550 	/* Assume that the address is a zero terminated string,
4551          * except when the first byte is \0 i.e the string length is 0,
4552          * Then use the reported length instead.
4553 	 * This fix handles Linux's nonportable
4554          * abstract socket address extension.
4555 	 */
4556 	if (m == 0)  m = n;
4557 #endif
4558 	if (dst) {
4559 	    dst[0] = INET_AF_LOCAL;
4560 	    dst[1] = (char) ((unsigned char) m);
4561             sys_memcpy(dst+2, (*src)->sal.sun_path, m);
4562 	}
4563 	(*src) = (inet_address *) (&(*src)->sal + 1);
4564 	return 1 + 1 + m;
4565     }
4566 #endif
4567     default:
4568 	return -1;
4569     }
4570 }
4571 
4572 /* Encode n encoded addresses from addrs in the result buffer
4573 */
reply_inet_addrs(int n,inet_address * addrs,char ** rbuf,ErlDrvSizeT rsize,SOCKLEN_T sz)4574 static ErlDrvSizeT reply_inet_addrs
4575 (int n, inet_address *addrs, char **rbuf, ErlDrvSizeT rsize, SOCKLEN_T sz) {
4576     inet_address *ia;
4577     int i, s;
4578     ErlDrvSizeT rlen;
4579 
4580     if (IS_SOCKET_ERROR(n)) return ctl_error(sock_errno(), rbuf, rsize);
4581     if (n == 0) return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
4582 
4583     /* The sz argument is only used when we have got an actual size
4584      * of addrs[0] from e.g getsockname() and then n == 1
4585      * so we will loop over 1 element below.  Otherwise sz
4586      * would be expected to differ between addresses but that
4587      * can only happen for AF_UNIX and we will only be called with
4588      * n > 1 for SCTP and that will never (?) happen with AF_UNIX
4589      */
4590 
4591     /* Calculate result length */
4592     rlen = 1;
4593     ia = addrs;
4594     for (i = 0;  i < n;  i++) {
4595         s = inet_address_to_erlang(NULL, &ia, sz);
4596 	if (s < 0) break;
4597 	rlen += s;
4598     }
4599 
4600     if (rlen > rsize) (*rbuf) = ALLOC(rlen);
4601 
4602     (*rbuf)[0] = INET_REP_OK;
4603     rlen = 1;
4604     ia = addrs;
4605     for (i = 0;  i < n;  i++) {
4606         s = inet_address_to_erlang((*rbuf)+rlen, &ia, sz);
4607 	if (s < 0) break;
4608 	rlen += s;
4609     }
4610 
4611     return rlen;
4612 }
4613 
desc_close(inet_descriptor * desc)4614 static void desc_close(inet_descriptor* desc)
4615 {
4616     if (desc->s != INVALID_SOCKET) {
4617 #ifdef __WIN32__
4618 	winsock_event_select(desc, FD_READ|FD_WRITE|FD_CLOSE, 0);
4619 	sock_close(desc->s);
4620 	desc->forced_events = 0;
4621 	desc->send_would_block = 0;
4622 #endif
4623 	/*
4624 	 * We should close the fd here, but the other driver might still
4625 	 * be selecting on it.
4626 	 */
4627 	if (!INET_IGNORED(desc))
4628 	    driver_select(desc->port,(ErlDrvEvent)(long)desc->event,
4629 			  ERL_DRV_USE, 0);
4630 	else
4631 	  inet_stop_select((ErlDrvEvent)(long)desc->event,NULL);
4632 	desc->event = INVALID_EVENT; /* closed by stop_select callback */
4633 	desc->s = INVALID_SOCKET;
4634 	desc->event_mask = 0;
4635 
4636 	/* mark as disconnected in case when socket is left lingering due to
4637 	 * {exit_on_close, false} option in gen_tcp socket creation. Next
4638 	 * write to socket should produce {error, enotconn} and send a
4639 	 * message {tcp_error,#Port<>,econnreset} */
4640 	desc->state &= ~INET_STATE_CONNECTED;
4641     }
4642 }
4643 
desc_close_read(inet_descriptor * desc)4644 static void desc_close_read(inet_descriptor* desc)
4645 {
4646     if (desc->s != INVALID_SOCKET) {
4647 #ifdef __WIN32__
4648 	/* This call can not be right???
4649 	 * We want to turn off read events but keep any write events.
4650 	 * But on windows driver_select(...,READ,1) is only used as a
4651 	 * way to hook into the pollset. sock_select is used to control
4652 	 * which events to wait for.
4653 	 * It seems we used to disabled all events for the socket here.
4654 	 *
4655 	driver_select(desc->port, desc->event, DO_READ, 0); REMOVED */
4656 #endif
4657 	sock_select(desc, FD_READ | FD_CLOSE, 0);
4658     }
4659 }
4660 
4661 
erl_inet_close(inet_descriptor * desc)4662 static int erl_inet_close(inet_descriptor* desc)
4663 {
4664     free_subscribers(&desc->empty_out_q_subs);
4665     if ((desc->prebound == 0) && (desc->state & INET_F_OPEN)) {
4666 	desc_close(desc);
4667 	desc->state = INET_STATE_CLOSED;
4668     } else if (desc->prebound && (desc->s != INVALID_SOCKET)) {
4669 	sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE | ERL_DRV_USE_NO_CALLBACK, 0);
4670 	desc->event_mask = 0;
4671 #ifdef __WIN32__
4672 	desc->forced_events = 0;
4673 	desc->send_would_block = 0;
4674 #endif
4675     }
4676     return 0;
4677 }
4678 
inet_ctl_open(inet_descriptor * desc,int domain,int type,char ** rbuf,ErlDrvSizeT rsize)4679 static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type,
4680 				  char** rbuf, ErlDrvSizeT rsize)
4681 {
4682     int save_errno;
4683     int protocol;
4684 #ifdef HAVE_SETNS
4685     int current_ns, new_ns;
4686     current_ns = new_ns = 0;
4687 #endif
4688     save_errno = 0;
4689 
4690     if (desc->state != INET_STATE_CLOSED)
4691 	return ctl_xerror(EXBADSEQ, rbuf, rsize);
4692 
4693 #ifdef HAVE_SETNS
4694     if (desc->netns != NULL) {
4695 	/* Temporarily change network namespace for this thread
4696 	 * while creating the socket
4697 	 */
4698 	current_ns = open("/proc/self/ns/net", O_RDONLY);
4699 	if (current_ns == INVALID_SOCKET)
4700 	    return ctl_error(sock_errno(), rbuf, rsize);
4701 	new_ns = open(desc->netns, O_RDONLY);
4702 	if (new_ns == INVALID_SOCKET) {
4703 	    save_errno = sock_errno();
4704 	    while (close(current_ns) == INVALID_SOCKET &&
4705 		   sock_errno() == EINTR);
4706 	    return ctl_error(save_errno, rbuf, rsize);
4707 	}
4708 	if (setns(new_ns, CLONE_NEWNET) != 0) {
4709 	    save_errno = sock_errno();
4710 	    while (close(new_ns) == INVALID_SOCKET &&
4711 		   sock_errno() == EINTR);
4712 	    while (close(current_ns) == INVALID_SOCKET &&
4713 		   sock_errno() == EINTR);
4714 	    return ctl_error(save_errno, rbuf, rsize);
4715 	}
4716 	else {
4717 	    while (close(new_ns) == INVALID_SOCKET &&
4718 		   sock_errno() == EINTR);
4719 	}
4720     }
4721 #endif
4722     protocol = desc->sprotocol;
4723 #ifdef HAVE_SYS_UN_H
4724     if (domain == AF_UNIX) protocol = 0;
4725 #endif
4726     if ((desc->s = sock_open(domain, type, protocol)) == INVALID_SOCKET)
4727 	save_errno = sock_errno();
4728 #ifdef HAVE_SETNS
4729     if (desc->netns != NULL) {
4730 	/* Restore network namespace */
4731 	if (setns(current_ns, CLONE_NEWNET) != 0) {
4732 	    /* XXX Failed to restore network namespace.
4733 	     * What to do? Tidy up and return an error...
4734 	     * Note that the thread now might still be in the namespace.
4735 	     * Can this even happen? Should the emulator be aborted?
4736 	     */
4737 	    if (desc->s != INVALID_SOCKET)
4738 		save_errno = sock_errno();
4739 	    while (close(desc->s) == INVALID_SOCKET &&
4740 		   sock_errno() == EINTR);
4741 	    desc->s = INVALID_SOCKET;
4742 	    while (close(current_ns) == INVALID_SOCKET &&
4743 		   sock_errno() == EINTR);
4744 	    return ctl_error(save_errno, rbuf, rsize);
4745 	}
4746 	else {
4747 	    while (close(current_ns) == INVALID_SOCKET &&
4748 		   sock_errno() == EINTR);
4749 	}
4750     }
4751 #endif
4752     if (desc->s == INVALID_SOCKET)
4753 	return ctl_error(save_errno, rbuf, rsize);
4754 
4755     if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) {
4756 	save_errno = sock_errno();
4757 	while (close(desc->s) == INVALID_SOCKET &&
4758 	       sock_errno() == EINTR);
4759 	desc->s = INVALID_SOCKET;
4760 	return ctl_error(save_errno, rbuf, rsize);
4761     }
4762     SET_NONBLOCKING(desc->s);
4763 #ifdef __WIN32__
4764     driver_select(desc->port, desc->event, ERL_DRV_READ, 1);
4765 #endif
4766 
4767     desc->state = INET_STATE_OPEN;
4768     desc->stype = type;
4769     desc->sfamily = domain;
4770     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
4771 }
4772 
4773 
4774 /* as inet_open but pass in an open socket (MUST BE OF RIGHT TYPE) */
inet_ctl_fdopen(inet_descriptor * desc,int domain,int type,SOCKET s,Uint32 bound,char ** rbuf,ErlDrvSizeT rsize)4775 static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type,
4776 				    SOCKET s, Uint32 bound,
4777                                     char** rbuf, ErlDrvSizeT rsize)
4778 {
4779     inet_address name;
4780     unsigned int sz;
4781 
4782     if (bound) {
4783         /* check that it is a socket and that the socket is bound */
4784         sz = sizeof(name);
4785 	sys_memzero((char *) &name, sz);
4786         if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz)))
4787             return ctl_error(sock_errno(), rbuf, rsize);
4788         if (name.sa.sa_family != domain)
4789             return ctl_error(EINVAL, rbuf, rsize);
4790     }
4791     desc->s = s;
4792 
4793     if ((desc->event = sock_create_event(desc)) == INVALID_EVENT)
4794 	return ctl_error(sock_errno(), rbuf, rsize);
4795     SET_NONBLOCKING(desc->s);
4796 #ifdef __WIN32__
4797     driver_select(desc->port, desc->event, ERL_DRV_READ, 1);
4798 #endif
4799 
4800     desc->state = INET_STATE_OPEN;
4801 
4802     if (type == SOCK_STREAM) { /* check if connected */
4803 	sz = sizeof(name);
4804 	if (!IS_SOCKET_ERROR(sock_peer(s, (struct sockaddr*) &name, &sz))) {
4805 	    desc->state = INET_STATE_CONNECTED;
4806         }
4807     }
4808 
4809     desc->prebound = 1; /* used to prevent a real close since
4810 			 * the fd probably comes from an
4811 			 * external wrapper program, so it is
4812 			 * not certain that we can open it again */
4813     desc->stype = type;
4814     desc->sfamily = domain;
4815     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
4816 }
4817 
4818 /*
4819 **  store interface info as: (bytes)
4820 **  [Len] Name(Len) Flags(1) addr(4) baddr(4) mask(4) bw(4)
4821 */
4822 struct addr_if {
4823     char name[INET_IFNAMSIZ];
4824     long           flags;        /* coded flags */
4825     struct in_addr addr;         /* interface address */
4826     struct in_addr baddr;        /* broadcast address */
4827     struct in_addr mask;         /* netmask */
4828 };
4829 
4830 
4831 #ifndef SIOCGIFNETMASK
net_mask(struct in_addr in)4832 static struct in_addr net_mask(struct in_addr in)
4833 {
4834     register u_long i = sock_ntohl(in.s_addr);
4835 
4836     if (IN_CLASSA(i))
4837 	in.s_addr = sock_htonl(IN_CLASSA_NET);
4838     else if (IN_CLASSB(i))
4839 	in.s_addr = sock_htonl(IN_CLASSB_NET);
4840     else
4841 	in.s_addr = sock_htonl(IN_CLASSC_NET);
4842     return in;
4843 }
4844 #endif
4845 
4846 #if defined(__WIN32__) && defined(SIO_GET_INTERFACE_LIST)
4847 
4848 /* format address in dot notation */
fmt_addr(unsigned long x,char * ptr)4849 static char* fmt_addr(unsigned long x, char* ptr)
4850 {
4851     int i;
4852     for (i = 0; i < 4; i++) {
4853 	int nb[3];
4854 	int y = (x >> 24) & 0xff;
4855 	x <<= 8;
4856 	nb[0] = y % 10; y /= 10;
4857 	nb[1] = y % 10; y /= 10;
4858 	nb[2] = y % 10; y /= 10;
4859 	switch((nb[2] ? 3 : (nb[1] ? 2 : 1))) {
4860 	case 3:  *ptr++ = nb[2] + '0';
4861 	case 2:  *ptr++ = nb[1] + '0';
4862 	case 1:  *ptr++ = nb[0] + '0';
4863 	}
4864 	*ptr++ = '.';
4865     }
4866     *(ptr-1) = '\0';
4867     return ptr;
4868 }
4869 
parse_addr(char * ptr,int n,long * x)4870 static int parse_addr(char* ptr, int n, long* x)
4871 {
4872     long addr = 0;
4873     int  dots = 0;
4874     int  digs = 0;
4875     int  v  = 0;
4876 
4877     while(n--) {
4878 	switch(*ptr) {
4879 	case '0': case '1': case '2':case '3':case '4':case '5':
4880 	case '6': case '7': case '8':case '9':
4881 	    v = v*10 + *ptr - '0';
4882 	    if (++digs > 3) return -1;
4883 	    break;
4884 	case '.':
4885 	    if ((dots>2) || (digs==0) || (digs > 3) || (v > 0xff)) return -1;
4886 	    dots++;
4887 	    digs = 0;
4888 	    addr = (addr << 8) | v;
4889 	    v = 0;
4890 	    break;
4891 	default:
4892 	    return -1;
4893 	}
4894 	ptr++;
4895     }
4896     if ((dots!=3) || (digs==0) || (digs > 3) || (v > 0xff)) return -1;
4897     addr = (addr << 8) | v;
4898     *x = addr;
4899     return 0;
4900 }
4901 
4902 #endif
4903 
4904 #define buf_check(ptr, end, n) \
4905 do { if ((end)-(ptr) < (n)) goto error; } while(0)
4906 
sockaddr_to_buf(struct sockaddr * addr,char * ptr,char * end)4907 static char* sockaddr_to_buf(struct sockaddr* addr, char* ptr, char* end)
4908 {
4909     if (addr->sa_family == AF_INET || addr->sa_family == 0) {
4910 	struct in_addr *p = &(((struct sockaddr_in*) addr)->sin_addr);
4911 	buf_check(ptr, end, 1 + sizeof(struct in_addr));
4912 	*ptr = INET_AF_INET;
4913 	sys_memcpy(ptr+1, (char*)p, sizeof(struct in_addr));
4914 	return ptr + 1 + sizeof(struct in_addr);
4915     }
4916 #if defined(HAVE_IN6) && defined(AF_INET6)
4917     else if (addr->sa_family == AF_INET6) {
4918 	struct in6_addr *p = &(((struct sockaddr_in6*) addr)->sin6_addr);
4919 	buf_check(ptr, end, 1 + sizeof(struct in6_addr));
4920 	*ptr = INET_AF_INET6;
4921 	sys_memcpy(ptr+1, (char*)p, sizeof(struct in6_addr));
4922 	return ptr + 1 + sizeof(struct in6_addr);
4923     }
4924 #endif
4925 #if defined(AF_LINK)
4926     else if (addr->sa_family == AF_LINK) {
4927 	struct sockaddr_dl *sdl_p = (struct sockaddr_dl*) addr;
4928 	buf_check(ptr, end, 2 + sdl_p->sdl_alen);
4929 	put_int16(sdl_p->sdl_alen, ptr); ptr += 2;
4930 	sys_memcpy(ptr, sdl_p->sdl_data + sdl_p->sdl_nlen, sdl_p->sdl_alen);
4931 	return ptr + sdl_p->sdl_alen;
4932     }
4933 #endif
4934 #if defined(AF_PACKET) && defined(HAVE_NETPACKET_PACKET_H)
4935     else if(addr->sa_family == AF_PACKET) {
4936 	struct sockaddr_ll *sll_p = (struct sockaddr_ll*) addr;
4937 	buf_check(ptr, end, 2 + sll_p->sll_halen);
4938 	put_int16(sll_p->sll_halen, ptr); ptr += 2;
4939 	sys_memcpy(ptr, sll_p->sll_addr, sll_p->sll_halen);
4940 	return ptr + sll_p->sll_halen;
4941     }
4942 #endif
4943     return ptr;
4944  error:
4945     return NULL;
4946 }
4947 
4948 /* sockaddr_bufsz_need
4949  * Returns the number of bytes needed to store the information
4950  * through sockaddr_to_buf
4951  */
4952 
sockaddr_bufsz_need(struct sockaddr * addr)4953 static size_t sockaddr_bufsz_need(struct sockaddr* addr)
4954 {
4955     if (addr->sa_family == AF_INET || addr->sa_family == 0) {
4956 	return 1 + sizeof(struct in_addr);
4957     }
4958 #if defined(HAVE_IN6) && defined(AF_INET6)
4959     else if (addr->sa_family == AF_INET6) {
4960 	return 1 + sizeof(struct in6_addr);
4961     }
4962 #endif
4963 #if defined(AF_LINK)
4964     if (addr->sa_family == AF_LINK) {
4965 	struct sockaddr_dl *sdl_p = (struct sockaddr_dl*) addr;
4966 	return 2 + sdl_p->sdl_alen;
4967     }
4968 #endif
4969 #if defined(AF_PACKET) && defined(HAVE_NETPACKET_PACKET_H)
4970     else if(addr->sa_family == AF_PACKET) {
4971 	struct sockaddr_ll *sll_p = (struct sockaddr_ll*) addr;
4972 	return 2 + sll_p->sll_halen;
4973     }
4974 #endif
4975     return 0;
4976 }
4977 
buf_to_sockaddr(char * ptr,char * end,struct sockaddr * addr)4978 static char* buf_to_sockaddr(char* ptr, char* end, struct sockaddr* addr)
4979 {
4980     buf_check(ptr,end,1);
4981     switch (*ptr++) {
4982     case INET_AF_INET: {
4983 	struct in_addr *p = &((struct sockaddr_in*)addr)->sin_addr;
4984 	buf_check(ptr,end,sizeof(struct in_addr));
4985 	sys_memcpy((char*) p, ptr, sizeof(struct in_addr));
4986 	addr->sa_family = AF_INET;
4987 	return ptr + sizeof(struct in_addr);
4988     }
4989 #if defined(HAVE_IN6) && defined(AF_INET6)
4990     case INET_AF_INET6: {
4991 	struct in6_addr *p = &((struct sockaddr_in6*)addr)->sin6_addr;
4992 	buf_check(ptr,end,sizeof(struct in6_addr));
4993 	sys_memcpy((char*) p, ptr, sizeof(struct in6_addr));
4994 	addr->sa_family = AF_INET6;
4995 	return ptr + sizeof(struct in6_addr);
4996     }
4997 #endif
4998     }
4999  error:
5000     return NULL;
5001 }
5002 
5003 
5004 #if defined (IFF_POINTOPOINT)
5005 #define IFGET_FLAGS(cflags) IFGET_FLAGS_P2P(cflags, IFF_POINTOPOINT)
5006 #elif defined IFF_POINTTOPOINT
5007 #define IFGET_FLAGS(cflags) IFGET_FLAGS_P2P(cflags, IFF_POINTTOPOINT)
5008 #endif
5009 
5010 #define IFGET_FLAGS_P2P(cflags, iff_ptp)				\
5011     ((((cflags) & IFF_UP) ? INET_IFF_UP : 0) |				\
5012      (((cflags) & IFF_BROADCAST) ? INET_IFF_BROADCAST : 0) |		\
5013      (((cflags) & IFF_LOOPBACK) ? INET_IFF_LOOPBACK : 0) |		\
5014      (((cflags) & iff_ptp) ? INET_IFF_POINTTOPOINT : 0) |		\
5015      (((cflags) & IFF_UP) ? INET_IFF_RUNNING : 0) |  /* emulate running ? */ \
5016      (((cflags) & IFF_MULTICAST) ? INET_IFF_MULTICAST : 0))
5017 
5018 #if defined(__WIN32__) && defined(SIO_GET_INTERFACE_LIST)
5019 
inet_ctl_getiflist(inet_descriptor * desc,char ** rbuf,ErlDrvSizeT rsize)5020 static ErlDrvSSizeT inet_ctl_getiflist(inet_descriptor* desc,
5021 				       char** rbuf, ErlDrvSizeT rsize)
5022 {
5023     char ifbuf[BUFSIZ];
5024     char sbuf[BUFSIZ];
5025     char* sptr;
5026     INTERFACE_INFO* ifp;
5027     DWORD len;
5028     ErlDrvSizeT n;
5029     int err;
5030 
5031     ifp = (INTERFACE_INFO*) ifbuf;
5032     len = 0;
5033     err = WSAIoctl(desc->s, SIO_GET_INTERFACE_LIST, NULL, 0,
5034 		   (LPVOID) ifp, BUFSIZ, (LPDWORD) &len,
5035 		   NULL, NULL);
5036 
5037     if (err == SOCKET_ERROR)
5038 	return ctl_error(sock_errno(), rbuf, rsize);
5039 
5040     n = (len + sizeof(INTERFACE_INFO) - 1) / sizeof(INTERFACE_INFO);
5041     sptr = sbuf;
5042 
5043     while(n--) {
5044 	if (((struct sockaddr*)&ifp->iiAddress)->sa_family == desc->sfamily) {
5045 	    struct in_addr sina = ((struct sockaddr_in*)&ifp->iiAddress)->sin_addr;
5046 	    /* discard INADDR_ANY interface address */
5047 	    if (sina.s_addr != INADDR_ANY)
5048 		sptr = fmt_addr(sock_ntohl(sina.s_addr), sptr);
5049 	}
5050 	ifp++;
5051     }
5052     return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize);
5053 }
5054 
5055 /* input is an ip-address in string format i.e A.B.C.D
5056 ** scan the INTERFACE_LIST to get the options
5057 */
inet_ctl_ifget(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)5058 static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc, char* buf,
5059 				   ErlDrvSizeT len, char** rbuf, ErlDrvSizeT rsize)
5060 {
5061     char ifbuf[BUFSIZ];
5062     int  n;
5063     char sbuf[BUFSIZ];
5064     char* sptr;
5065     char* s_end = sbuf + BUFSIZ;
5066     int namlen;
5067     int   err;
5068     INTERFACE_INFO* ifp;
5069     long namaddr;
5070 
5071     if ((len == 0) || ((namlen = get_int8(buf)) > len))
5072 	goto error;
5073     if (parse_addr(buf+1, namlen, &namaddr) < 0)
5074 	goto error;
5075     namaddr = sock_ntohl(namaddr);
5076     buf += (namlen+1);
5077     len -= (namlen+1);
5078 
5079     ifp = (INTERFACE_INFO*) ifbuf;
5080     err = WSAIoctl(desc->s, SIO_GET_INTERFACE_LIST, NULL, 0,
5081 			      (LPVOID) ifp, BUFSIZ, (LPDWORD) &n,
5082 			      NULL, NULL);
5083     if (err == SOCKET_ERROR) {
5084 	return ctl_error(sock_errno(), rbuf, rsize);
5085     }
5086 
5087     n = (n + sizeof(INTERFACE_INFO) - 1) / sizeof(INTERFACE_INFO);
5088 
5089     /* find interface */
5090     while(n) {
5091 	if (((struct sockaddr_in*)&ifp->iiAddress)->sin_addr.s_addr == namaddr)
5092 	    break;
5093 	ifp++;
5094 	n--;
5095     }
5096     if (n == 0)
5097 	goto error;
5098 
5099     sptr = sbuf;
5100 
5101     while (len--) {
5102 	switch(*buf++) {
5103 	case INET_IFOPT_ADDR:
5104 	    buf_check(sptr, s_end, 1);
5105 	    *sptr++ = INET_IFOPT_ADDR;
5106 	    if ((sptr = sockaddr_to_buf((struct sockaddr *)&ifp->iiAddress,
5107 					sptr, s_end)) == NULL)
5108 		goto error;
5109 	    break;
5110 
5111 	case INET_IFOPT_HWADDR:
5112 	    break;
5113 
5114 	case INET_IFOPT_BROADADDR:
5115 #ifdef SIOCGIFBRDADDR
5116 	    buf_check(sptr, s_end, 1);
5117 	    *sptr++ = INET_IFOPT_BROADADDR;
5118 	    if ((sptr=sockaddr_to_buf((struct sockaddr *)
5119 				      &ifp->iiBroadcastAddress,sptr,s_end))
5120 		== NULL)
5121 		goto error;
5122 #endif
5123 	    break;
5124 
5125 	case INET_IFOPT_DSTADDR:
5126 	    break;
5127 
5128 	case INET_IFOPT_NETMASK:
5129 	    buf_check(sptr, s_end, 1);
5130 	    *sptr++ = INET_IFOPT_NETMASK;
5131 	    if ((sptr = sockaddr_to_buf((struct sockaddr *)
5132 					&ifp->iiNetmask,sptr,s_end)) == NULL)
5133 		goto error;
5134 	    break;
5135 
5136 	case INET_IFOPT_MTU:
5137 	    break;
5138 
5139 	case INET_IFOPT_FLAGS: {
5140 	    int flags = ifp->iiFlags;
5141 	    /* just enumerate the interfaces (no names) */
5142 
5143 	    buf_check(sptr, s_end, 5);
5144 	    *sptr++ = INET_IFOPT_FLAGS;
5145 	    put_int32(IFGET_FLAGS(flags), sptr);
5146 	    sptr += 4;
5147 	    break;
5148 	}
5149 	default:
5150 	    goto error;
5151 	}
5152     }
5153     return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize);
5154 
5155  error:
5156     return ctl_error(EINVAL, rbuf, rsize);
5157 }
5158 
5159 /* not supported */
inet_ctl_ifset(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)5160 static ErlDrvSSizeT inet_ctl_ifset(inet_descriptor* desc,
5161 				   char* buf, ErlDrvSizeT len,
5162 				   char** rbuf, ErlDrvSizeT rsize)
5163 {
5164     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
5165 }
5166 
5167 #elif defined(SIOCGIFCONF) && defined(SIOCSIFFLAGS)
5168 /* cygwin has SIOCGIFCONF but not SIOCSIFFLAGS (Nov 2002) */
5169 
5170 #define VOIDP(x) ((void*)(x))
5171 #if defined(AF_LINK) && !defined(NO_SA_LEN)
5172 #define SIZEA(p) (((p).sa_len > sizeof(p)) ? (p).sa_len : sizeof(p))
5173 #else
5174 #define SIZEA(p) (sizeof (p))
5175 #endif
5176 
get_ifconf(SOCKET s,struct ifconf * ifcp)5177 static int get_ifconf(SOCKET s, struct ifconf *ifcp) {
5178     int ifc_len = 0;
5179     int buflen = 100 * sizeof(struct ifreq);
5180     char *buf = ALLOC(buflen);
5181 
5182     for (;;) {
5183 	ifcp->ifc_len = buflen;
5184 	ifcp->ifc_buf = buf;
5185 	if (ioctl(s, SIOCGIFCONF, (char *)ifcp) < 0) {
5186 	    int res = sock_errno();
5187 	    if (res != EINVAL || ifc_len) {
5188 		FREE(buf);
5189 		return -1;
5190 	    }
5191 	} else {
5192 	    if (ifcp->ifc_len == ifc_len) break; /* buf large enough */
5193 	    ifc_len = ifcp->ifc_len;
5194 	}
5195 	buflen += 10 * sizeof(struct ifreq);
5196 	buf = (char *)REALLOC(buf, buflen);
5197     }
5198     return 0;
5199 }
5200 
free_ifconf(struct ifconf * ifcp)5201 static void free_ifconf(struct ifconf *ifcp) {
5202     FREE(ifcp->ifc_buf);
5203 }
5204 
inet_ctl_getiflist(inet_descriptor * desc,char ** rbuf,ErlDrvSizeT rsize)5205 static ErlDrvSSizeT inet_ctl_getiflist(inet_descriptor* desc,
5206 				       char** rbuf, ErlDrvSizeT rsize)
5207 {
5208     struct ifconf ifc;
5209     struct ifreq *ifrp;
5210     char *sbuf, *sp;
5211     ErlDrvSizeT i;
5212 
5213     /* Courtesy of Per Bergqvist and W. Richard Stevens */
5214 
5215     if (get_ifconf(desc->s, &ifc) < 0) {
5216 	return ctl_error(sock_errno(), rbuf, rsize);
5217     }
5218 
5219     sp = sbuf = ALLOC(ifc.ifc_len+1);
5220     *sp++ = INET_REP_OK;
5221     i = 0;
5222     for (;;) {
5223 	ErlDrvSizeT n;
5224 
5225 	ifrp = (struct ifreq *) VOIDP(ifc.ifc_buf + i);
5226 	n = sizeof(ifrp->ifr_name) + SIZEA(ifrp->ifr_addr);
5227 	if (n < sizeof(*ifrp)) n = sizeof(*ifrp);
5228 	if (i+n > ifc.ifc_len) break;
5229 	i += n;
5230 
5231 	switch (ifrp->ifr_addr.sa_family) {
5232 #if defined(HAVE_IN6) && defined(AF_INET6)
5233 	case AF_INET6:
5234 #endif
5235 	case AF_INET:
5236 	    ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1);
5237 	    strncpy(sp, ifrp->ifr_name, IFNAMSIZ);
5238 	    sp[IFNAMSIZ] = '\0';
5239 	    sp += strlen(sp), ++sp;
5240 	}
5241 
5242 	if (i >= ifc.ifc_len) break;
5243     }
5244     free_ifconf(&ifc);
5245     *rbuf = sbuf;
5246     return sp - sbuf;
5247 }
5248 
5249 #ifdef HAVE_LIBDLPI_H
5250 #include <libdlpi.h>
hwaddr_libdlpi_lookup(const char * ifnm,uchar_t * addr,size_t * alen)5251 static int hwaddr_libdlpi_lookup(const char *ifnm,
5252                                  uchar_t *addr, size_t *alen)
5253 {
5254     dlpi_handle_t handle;
5255     dlpi_info_t linkinfo;
5256     int ret = -1;
5257 
5258     if (dlpi_open(ifnm, &handle, 0) != DLPI_SUCCESS) {
5259         return -1;
5260     }
5261 
5262     if (dlpi_get_physaddr(handle, DL_CURR_PHYS_ADDR,
5263                           addr, alen) == DLPI_SUCCESS &&
5264         dlpi_info(handle, &linkinfo, 0) == DLPI_SUCCESS)
5265     {
5266         ret = 0;
5267     }
5268 
5269     dlpi_close(handle);
5270     return ret;
5271 }
5272 #endif
5273 
5274 #ifdef HAVE_GETIFADDRS
5275 /* Returns 0 for success and errno() for failure */
call_getifaddrs(inet_descriptor * desc_p,struct ifaddrs ** ifa_pp)5276 static int call_getifaddrs(inet_descriptor* desc_p, struct ifaddrs **ifa_pp)
5277 {
5278     int result, save_errno;
5279 #ifdef HAVE_SETNS
5280     int current_ns;
5281 
5282     current_ns = 0;
5283     if (desc_p->netns != NULL) {
5284         int new_ns;
5285 	/* Temporarily change network namespace for this thread
5286 	 * over the getifaddrs() call
5287 	 */
5288 	current_ns = open("/proc/self/ns/net", O_RDONLY);
5289 	if (current_ns == INVALID_SOCKET)
5290 	    return sock_errno();
5291 	new_ns = open(desc_p->netns, O_RDONLY);
5292 	if (new_ns == INVALID_SOCKET) {
5293 	    save_errno = sock_errno();
5294 	    while (close(current_ns) == INVALID_SOCKET &&
5295 		   sock_errno() == EINTR);
5296 	    return save_errno;
5297 	}
5298 	if (setns(new_ns, CLONE_NEWNET) != 0) {
5299 	    save_errno = sock_errno();
5300 	    while (close(new_ns) == INVALID_SOCKET &&
5301 		   sock_errno() == EINTR);
5302 	    while (close(current_ns) == INVALID_SOCKET &&
5303 		   sock_errno() == EINTR);
5304 	    return save_errno;
5305 	}
5306 	else {
5307 	    while (close(new_ns) == INVALID_SOCKET &&
5308 		   sock_errno() == EINTR);
5309 	}
5310     }
5311 #endif
5312     save_errno = 0;
5313     result = getifaddrs(ifa_pp);
5314     if (result  < 0)
5315         save_errno = sock_errno();
5316 #ifdef HAVE_SETNS
5317     if (desc_p->netns != NULL) {
5318         /* Restore network namespace */
5319         if (setns(current_ns, CLONE_NEWNET) != 0) {
5320             /* XXX Failed to restore network namespace.
5321              * What to do? Tidy up and return an error...
5322              * Note that the thread now might still be in the set namespace.
5323 	     * Can this even happen? Should the emulator be aborted?
5324 	     */
5325             if (result >= 0) {
5326                 /* We got a result but have to waste it */
5327                 save_errno = sock_errno();
5328                 freeifaddrs(*ifa_pp);
5329             }
5330 	}
5331         while (close(current_ns) == INVALID_SOCKET &&
5332                sock_errno() == EINTR);
5333     }
5334 #endif
5335     return save_errno;
5336 }
5337 #endif /* #ifdef HAVE_GETIFADDRS */
5338 
5339 /* FIXME: temporary hack */
5340 #ifndef IFHWADDRLEN
5341 #define IFHWADDRLEN 6
5342 #endif
5343 
inet_ctl_ifget(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)5344 static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
5345 				   char* buf, ErlDrvSizeT len,
5346 				   char** rbuf, ErlDrvSizeT rsize)
5347 {
5348     char sbuf[BUFSIZ];
5349     char* sptr;
5350     char* s_end = sbuf + BUFSIZ;
5351     struct ifreq ifreq;
5352     int namlen;
5353 
5354     if ((len == 0) || ((namlen = get_int8(buf)) > len))
5355 	goto error;
5356     sys_memset(ifreq.ifr_name, '\0', IFNAMSIZ);
5357     sys_memcpy(ifreq.ifr_name, buf+1,
5358 	       (namlen >= IFNAMSIZ) ? IFNAMSIZ-1 : namlen);
5359     buf += (namlen+1);
5360     len -= (namlen+1);
5361     sptr = sbuf;
5362 
5363     while (len--) {
5364 	switch(*buf++) {
5365 	case INET_IFOPT_ADDR:
5366 	    if (ioctl(desc->s, SIOCGIFADDR, (char *)&ifreq) < 0)
5367 		break;
5368 	    buf_check(sptr, s_end, 1);
5369 	    *sptr++ = INET_IFOPT_ADDR;
5370 	    if ((sptr = sockaddr_to_buf(&ifreq.ifr_addr, sptr, s_end)) == NULL)
5371 		goto error;
5372 	    break;
5373 
5374 	case INET_IFOPT_HWADDR: {
5375 #ifdef HAVE_LIBDLPI_H
5376 	    /*
5377 	    ** OpenSolaris have SIGCGIFHWADDR, but no ifr_hwaddr member..
5378 	    ** The proper way to get the mac address would be to
5379 	    ** use libdlpi...
5380 	    */
5381 	    uchar_t addr[DLPI_PHYSADDR_MAX];
5382 	    size_t alen = sizeof(addr);
5383 
5384 	    if (hwaddr_libdlpi_lookup(ifreq.ifr_name, addr, &alen) == 0) {
5385 		buf_check(sptr, s_end, 1+2+alen);
5386 		*sptr++ = INET_IFOPT_HWADDR;
5387 		put_int16(alen, sptr);
5388                     sptr += 2;
5389                     sys_memcpy(sptr, addr, alen);
5390                     sptr += alen;
5391 	    }
5392 #elif defined(SIOCGIFHWADDR) && defined(HAVE_STRUCT_IFREQ_IFR_HWADDR)
5393 	    if (ioctl(desc->s, SIOCGIFHWADDR, (char *)&ifreq) < 0)
5394 		break;
5395 	    buf_check(sptr, s_end, 1+2+IFHWADDRLEN);
5396 	    *sptr++ = INET_IFOPT_HWADDR;
5397 	    put_int16(IFHWADDRLEN, sptr); sptr += 2;
5398 	    /* raw memcpy (fix include autoconf later) */
5399 	    sys_memcpy(sptr, (char*)(&ifreq.ifr_hwaddr.sa_data), IFHWADDRLEN);
5400 	    sptr += IFHWADDRLEN;
5401 #elif defined(SIOCGENADDR) && defined(HAVE_STRUCT_IFREQ_IFR_ENADDR)
5402 	    if (ioctl(desc->s, SIOCGENADDR, (char *)&ifreq) < 0)
5403 		break;
5404 	    buf_check(sptr, s_end, 1+2+sizeof(ifreq.ifr_enaddr));
5405 	    *sptr++ = INET_IFOPT_HWADDR;
5406 	    put_int16(sizeof(ifreq.ifr_enaddr), sptr); sptr += 2;
5407 	    /* raw memcpy (fix include autoconf later) */
5408 	    sys_memcpy(sptr, (char*)(&ifreq.ifr_enaddr),
5409 		       sizeof(ifreq.ifr_enaddr));
5410 	    sptr += sizeof(ifreq.ifr_enaddr);
5411 #elif defined(HAVE_GETIFADDRS) && defined(AF_LINK)
5412 	    struct ifaddrs *ifa, *ifp;
5413 	    struct sockaddr_dl *sdlp;
5414 	    int found = 0;
5415 
5416             if (call_getifaddrs(desc, &ifa) != 0)
5417                 goto error;
5418 
5419 	    for (ifp = ifa; ifp; ifp = ifp->ifa_next) {
5420 		if ((ifp->ifa_addr->sa_family == AF_LINK) &&
5421 		    (sys_strcmp(ifp->ifa_name, ifreq.ifr_name) == 0)) {
5422 		    found = 1;
5423 		    break;
5424 		}
5425 	    }
5426 
5427 	    if (found == 0) {
5428 		freeifaddrs(ifa);
5429 		break;
5430 	    }
5431 	    sdlp = (struct sockaddr_dl *)ifp->ifa_addr;
5432 
5433 	    buf_check(sptr, s_end, 1+2+sdlp->sdl_alen);
5434 	    *sptr++ = INET_IFOPT_HWADDR;
5435 	    put_int16(sdlp->sdl_alen, sptr); sptr += 2;
5436 	    sys_memcpy(sptr,
5437 		       sdlp->sdl_data + sdlp->sdl_nlen,
5438 		       sdlp->sdl_alen);
5439 	    sptr += sdlp->sdl_alen;
5440 	    freeifaddrs(ifa);
5441 #endif
5442 	    break;
5443 	}
5444 
5445 
5446 	case INET_IFOPT_BROADADDR:
5447 #ifdef SIOCGIFBRDADDR
5448 	    if (ioctl(desc->s, SIOCGIFBRDADDR, (char *)&ifreq) < 0)
5449 		break;
5450 	    buf_check(sptr, s_end, 1);
5451 	    *sptr++ = INET_IFOPT_BROADADDR;
5452 	    if ((sptr=sockaddr_to_buf(&ifreq.ifr_broadaddr,sptr,s_end)) == NULL)
5453 		goto error;
5454 #endif
5455 	    break;
5456 
5457 	case INET_IFOPT_DSTADDR:
5458 #ifdef SIOCGIFDSTADDR
5459 	    if (ioctl(desc->s, SIOCGIFDSTADDR, (char *)&ifreq) < 0)
5460 		break;
5461 	    buf_check(sptr, s_end, 1);
5462 	    *sptr++ = INET_IFOPT_DSTADDR;
5463 	    if ((sptr = sockaddr_to_buf(&ifreq.ifr_dstaddr,sptr,s_end)) == NULL)
5464 		goto error;
5465 #endif
5466 	    break;
5467 
5468 	case INET_IFOPT_NETMASK:
5469 #if defined(SIOCGIFNETMASK)
5470 	    if (ioctl(desc->s, SIOCGIFNETMASK, (char *)&ifreq) < 0)
5471 		break;
5472 	    buf_check(sptr, s_end, 1);
5473 	    *sptr++ = INET_IFOPT_NETMASK;
5474 #if defined(ifr_netmask)
5475 	    sptr = sockaddr_to_buf(&ifreq.ifr_netmask,sptr,s_end);
5476 #else
5477 	    /* SIOCGNETMASK exist but not macro ??? */
5478 	    sptr = sockaddr_to_buf(&ifreq.ifr_addr,sptr,s_end);
5479 #endif
5480 	    if (sptr == NULL)
5481 		goto error;
5482 #else
5483 	    if (ioctl(desc->s, SIOCGIFADDR, (char *)&ifreq) < 0)
5484 		break;
5485 	    else {
5486 		struct sockadd_in* ap;
5487 		/* emulate netmask,
5488 		 * (wasted stuff since noone uses classes)
5489 		 */
5490 		buf_check(sptr, s_end, 1);
5491 		*sptr++ = INET_IFOPT_NETMASK;
5492 		ap = (struct sockaddr_in*) VOIDP(&ifreq.ifr_addr);
5493 		ap->sin_addr = net_mask(ap->sin_addr);
5494 		if ((sptr = sockaddr_to_buf(&ifreq.ifr_addr,sptr,s_end)) == NULL)
5495 		    goto error;
5496 	    }
5497 #endif
5498 	    break;
5499 
5500 	case INET_IFOPT_MTU: {
5501 #if defined(SIOCGIFMTU) && defined(ifr_mtu)
5502 	    int n;
5503 
5504 	    if (ioctl(desc->s, SIOCGIFMTU, (char *)&ifreq) < 0)
5505 		break;
5506 	    buf_check(sptr, s_end, 5);
5507 	    *sptr++ = INET_IFOPT_MTU;
5508 	    n = ifreq.ifr_mtu;
5509 	    put_int32(n, sptr);
5510 	    sptr += 4;
5511 #endif
5512 	    break;
5513 	}
5514 
5515 	case INET_IFOPT_FLAGS: {
5516 	    int flags;
5517 
5518 	    if (ioctl(desc->s, SIOCGIFFLAGS, (char*)&ifreq) < 0)
5519 		flags = 0;
5520 	    else
5521 		flags = ifreq.ifr_flags;
5522 
5523 	    buf_check(sptr, s_end, 5);
5524 	    *sptr++ = INET_IFOPT_FLAGS;
5525 	    put_int32(IFGET_FLAGS(flags), sptr);
5526 	    sptr += 4;
5527 	    break;
5528 	}
5529 	default:
5530 	    goto error;
5531 	}
5532     }
5533     return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize);
5534 
5535  error:
5536     return ctl_error(EINVAL, rbuf, rsize);
5537 }
5538 
5539 
inet_ctl_ifset(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)5540 static ErlDrvSSizeT inet_ctl_ifset(inet_descriptor* desc,
5541 				   char* buf, ErlDrvSizeT len,
5542 				   char** rbuf, ErlDrvSizeT rsize)
5543 {
5544     struct ifreq ifreq;
5545     int namlen;
5546     char* b_end = buf + len;
5547 
5548     if ((len == 0) || ((namlen = get_int8(buf)) > len))
5549 	goto error;
5550     sys_memset(ifreq.ifr_name, '\0', IFNAMSIZ);
5551     sys_memcpy(ifreq.ifr_name, buf+1,
5552 	       (namlen >= IFNAMSIZ) ? IFNAMSIZ-1 : namlen);
5553     buf += (namlen+1);
5554     len -= (namlen+1);
5555 
5556     while(buf < b_end) {
5557 	switch(*buf++) {
5558 	case INET_IFOPT_ADDR:
5559 	    if ((buf = buf_to_sockaddr(buf, b_end, &ifreq.ifr_addr)) == NULL)
5560 		goto error;
5561 	    (void) ioctl(desc->s, SIOCSIFADDR, (char*)&ifreq);
5562 	    break;
5563 
5564 	case INET_IFOPT_HWADDR: {
5565 	    unsigned int hwalen;
5566 	    buf_check(buf, b_end, 2);
5567 	    hwalen = get_int16(buf); buf += 2;
5568 	    buf_check(buf, b_end, hwalen);
5569 #ifdef SIOCSIFHWADDR
5570 	    /* raw memcpy (fix include autoconf later) */
5571 	    sys_memset((char*)(&ifreq.ifr_hwaddr.sa_data),
5572 		       '\0', sizeof(ifreq.ifr_hwaddr.sa_data));
5573 	    sys_memcpy((char*)(&ifreq.ifr_hwaddr.sa_data), buf, hwalen);
5574 
5575 	    (void) ioctl(desc->s, SIOCSIFHWADDR, (char *)&ifreq);
5576 #endif
5577 	    buf += hwalen;
5578 	    break;
5579 	}
5580 
5581 	case INET_IFOPT_BROADADDR:
5582 #ifdef SIOCSIFBRDADDR
5583 	    if ((buf = buf_to_sockaddr(buf, b_end, &ifreq.ifr_broadaddr)) == NULL)
5584 		goto error;
5585 	    (void) ioctl(desc->s, SIOCSIFBRDADDR, (char *)&ifreq);
5586 #endif
5587 	    break;
5588 
5589 	case INET_IFOPT_DSTADDR:
5590 #ifdef SIOCSIFDSTADDR
5591 	    if ((buf = buf_to_sockaddr(buf, b_end, &ifreq.ifr_dstaddr)) == NULL)
5592 		goto error;
5593 	    (void) ioctl(desc->s, SIOCSIFDSTADDR, (char *)&ifreq);
5594 #endif
5595 	    break;
5596 
5597 	case INET_IFOPT_NETMASK:
5598 #ifdef SIOCSIFNETMASK
5599 
5600 #if defined(ifr_netmask)
5601 	    buf = buf_to_sockaddr(buf,b_end, &ifreq.ifr_netmask);
5602 #else
5603 	    buf = buf_to_sockaddr(buf,b_end, &ifreq.ifr_addr);
5604 #endif
5605 	    if (buf == NULL)
5606 		goto error;
5607 	    (void) ioctl(desc->s, SIOCSIFNETMASK, (char *)&ifreq);
5608 #endif
5609 	    break;
5610 
5611 	case INET_IFOPT_MTU:
5612 	    buf_check(buf, b_end, 4);
5613 #if defined(SIOCSIFMTU) && defined(ifr_mtu)
5614 	    ifreq.ifr_mtu = get_int32(buf);
5615 	    (void) ioctl(desc->s, SIOCSIFMTU, (char *)&ifreq);
5616 #endif
5617 	    buf += 4;
5618 	    break;
5619 
5620 	case INET_IFOPT_FLAGS: {
5621 	    int flags0;
5622 	    int flags;
5623 	    int eflags;
5624 
5625 	    buf_check(buf, b_end, 4);
5626 	    eflags = get_int32(buf);
5627 
5628 	    /* read current flags */
5629 	    if (ioctl(desc->s, SIOCGIFFLAGS, (char*)&ifreq) < 0)
5630 		flags0 = flags = 0;
5631 	    else
5632 		flags0 = flags = ifreq.ifr_flags;
5633 
5634 	    /* update flags */
5635 	    if (eflags & INET_IFF_UP)            flags |= IFF_UP;
5636 	    if (eflags & INET_IFF_DOWN)          flags &= ~IFF_UP;
5637 	    if (eflags & INET_IFF_BROADCAST)     flags |= IFF_BROADCAST;
5638 	    if (eflags & INET_IFF_NBROADCAST)    flags &= ~IFF_BROADCAST;
5639 	    if (eflags & INET_IFF_POINTTOPOINT)  flags |= IFF_POINTOPOINT;
5640 	    if (eflags & INET_IFF_NPOINTTOPOINT) flags &= ~IFF_POINTOPOINT;
5641 
5642 	    if (flags != flags0) {
5643 		ifreq.ifr_flags = flags;
5644 		(void) ioctl(desc->s, SIOCSIFFLAGS, (char*)&ifreq);
5645 	    }
5646 	    buf += 4;
5647 	    break;
5648 	}
5649 
5650 	default:
5651 	    goto error;
5652 	}
5653     }
5654     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
5655 
5656  error:
5657     return ctl_error(EINVAL, rbuf, rsize);
5658 }
5659 
5660 #else
5661 
5662 
inet_ctl_getiflist(inet_descriptor * desc,char ** rbuf,ErlDrvSizeT rsize)5663 static ErlDrvSSizeT inet_ctl_getiflist(inet_descriptor* desc,
5664 				       char** rbuf, ErlDrvSizeT rsize)
5665 {
5666     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
5667 }
5668 
5669 
inet_ctl_ifget(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)5670 static ErlDrvSSizeT inet_ctl_ifget(inet_descriptor* desc,
5671 				   char* buf, ErlDrvSizeT len,
5672 				   char** rbuf, ErlDrvSizeT rsize)
5673 {
5674     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
5675 }
5676 
5677 
inet_ctl_ifset(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)5678 static ErlDrvSSizeT inet_ctl_ifset(inet_descriptor* desc,
5679 				   char* buf, ErlDrvSizeT len,
5680 				   char** rbuf, ErlDrvSizeT rsize)
5681 {
5682     return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
5683 }
5684 
5685 #endif
5686 
5687 
5688 
5689 #if defined(__WIN32__) || defined(HAVE_GETIFADDRS)
5690 /* Latin-1 to utf8 */
5691 
utf8_len(const char * c,int m)5692 static int utf8_len(const char *c, int m) {
5693     int l;
5694     for (l = 0;  m;  c++, l++, m--) {
5695 	if (*c == '\0') break;
5696 	if ((*c & 0x7f) != *c) l++;
5697     }
5698     return l;
5699 }
5700 
utf8_encode(const char * c,int m,char * p)5701 static void utf8_encode(const char *c, int m, char *p) {
5702     for (;  m;  c++, m--) {
5703 	if (*c == '\0') break;
5704 	if ((*c & 0x7f) != *c) {
5705 	    *p++ = (char) (0xC0 | (0x03 & (*c >> 6)));
5706 	    *p++ = (char) (0x80 | (0x3F & *c));
5707 	} else {
5708 	    *p++ = (char) *c;
5709 	}
5710     }
5711 }
5712 #endif
5713 
5714 #if defined(__WIN32__)
5715 
set_netmask_bytes(char * c,int len,int pref_len)5716 static void set_netmask_bytes(char *c, int len, int pref_len) {
5717     int i, m;
5718     for (i = 0, m = pref_len >> 3;  i < m && i < len;  i++) c[i] = '\xFF';
5719     if (i < len) c[i++] = 0xFF << (8 - (pref_len & 7));
5720     for (;  i < len;  i++) c[i] = '\0';
5721 }
5722 
5723 
eq_masked_bytes(char * a,char * b,int pref_len)5724 int eq_masked_bytes(char *a, char *b, int pref_len) {
5725     int i, m;
5726     for (i = 0, m = pref_len >> 3;  i < m;  i++) {
5727 	if (a[i] != b[i]) return 0;
5728     }
5729     m = pref_len & 7;
5730     if (m) {
5731 	m = 0xFF & (0xFF << (8 - m));
5732 	if ((a[i] & m) != (b[i] & m)) return 0;
5733     }
5734     return !0;
5735 }
5736 
inet_ctl_getifaddrs(inet_descriptor * desc_p,char ** rbuf_pp,ErlDrvSizeT rsize)5737 static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
5738 					char **rbuf_pp, ErlDrvSizeT rsize)
5739 {
5740     int i;
5741     DWORD ret, n;
5742     IP_INTERFACE_INFO *info_p;
5743     MIB_IPADDRTABLE *ip_addrs_p;
5744     IP_ADAPTER_ADDRESSES *ip_adaddrs_p, *ia_p;
5745 
5746     char *buf_p;
5747     char *buf_alloc_p;
5748     ErlDrvSizeT buf_size = 512;
5749 #   define BUF_ENSURE(Size)						\
5750     do {								\
5751 	int NEED_, GOT_ = buf_p - buf_alloc_p;				\
5752 	NEED_ = GOT_ + (Size);						\
5753 	if (NEED_ > buf_size) {						\
5754 	    buf_size = NEED_ + 512;					\
5755 	    buf_alloc_p = REALLOC(buf_alloc_p, buf_size);		\
5756 	    buf_p = buf_alloc_p + GOT_;					\
5757 	}								\
5758     } while(0)
5759 #   define SOCKADDR_TO_BUF(opt, sa)					\
5760     do {								\
5761 	if (sa) {							\
5762 	    char *P_;							\
5763 	    *buf_p++ = (opt);						\
5764 	    while (! (P_ = sockaddr_to_buf((sa), buf_p,			\
5765 					   buf_alloc_p+buf_size))) {    \
5766 		int GOT_ = buf_p - buf_alloc_p;				\
5767 		buf_size += 512;					\
5768 		buf_alloc_p = REALLOC(buf_alloc_p, buf_size);		\
5769 		buf_p = buf_alloc_p + GOT_;				\
5770 	    }								\
5771 	    if (P_ == buf_p) {						\
5772 		buf_p--;						\
5773 	    } else {							\
5774 		buf_p = P_;						\
5775 	    }								\
5776 	}								\
5777     } while (0)
5778 
5779     {
5780 	/* Try GetAdaptersAddresses, if it is available */
5781 	unsigned long ip_adaddrs_size = 16 * 1024;
5782 	ULONG family = AF_UNSPEC;
5783 	ULONG flags =
5784 	    GAA_FLAG_INCLUDE_PREFIX | GAA_FLAG_SKIP_ANYCAST |
5785 	    GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_SKIP_FRIENDLY_NAME |
5786 	    GAA_FLAG_SKIP_MULTICAST;
5787 	ULONG (WINAPI *fpGetAdaptersAddresses)
5788 	    (ULONG, ULONG, PVOID, PIP_ADAPTER_ADDRESSES, PULONG);
5789 	HMODULE iphlpapi = GetModuleHandle("iphlpapi");
5790 	fpGetAdaptersAddresses = (void *)
5791 	    (iphlpapi ?
5792 		GetProcAddress(iphlpapi, "GetAdaptersAddresses") :
5793 		NULL);
5794 	if (fpGetAdaptersAddresses) {
5795 	    ip_adaddrs_p = ALLOC(ip_adaddrs_size);
5796 	    for (i = 17;  i;  i--) {
5797 		ret = fpGetAdaptersAddresses(
5798 		    family, flags, NULL, ip_adaddrs_p, &ip_adaddrs_size);
5799 		ip_adaddrs_p = REALLOC(ip_adaddrs_p, ip_adaddrs_size);
5800 		if (ret == NO_ERROR) break;
5801 		if (ret == ERROR_BUFFER_OVERFLOW) continue;
5802 		i = 0;
5803 	    }
5804 	    if (! i) {
5805 		FREE(ip_adaddrs_p);
5806 		ip_adaddrs_p = NULL;
5807 	    }
5808 	} else ip_adaddrs_p = NULL;
5809     }
5810 
5811     {
5812 	/* Load the IP_INTERFACE_INFO table (only IPv4 interfaces),
5813 	 * reliable source of interface names on XP
5814 	 */
5815 	unsigned long info_size = 4 * 1024;
5816 	info_p = ALLOC(info_size);
5817 	for (i = 17;  i;  i--) {
5818 	    ret = GetInterfaceInfo(info_p, &info_size);
5819 	    info_p = REALLOC(info_p, info_size);
5820 	    if (ret == NO_ERROR) break;
5821 	    if (ret == ERROR_INSUFFICIENT_BUFFER) continue;
5822 	    i = 0;
5823 	}
5824 	if (! i) {
5825 	    FREE(info_p);
5826 	    info_p = NULL;
5827 	}
5828     }
5829 
5830     if (! ip_adaddrs_p) {
5831 	/* If GetAdaptersAddresses gave nothing we fall back to
5832 	 * MIB_IPADDRTABLE (only IPv4 interfaces)
5833 	 */
5834 	unsigned long ip_addrs_size = 16 * sizeof(*ip_addrs_p);
5835 	ip_addrs_p = ALLOC(ip_addrs_size);
5836 	for (i = 17;  i;  i--) {
5837 	    ret = GetIpAddrTable(ip_addrs_p, &ip_addrs_size, FALSE);
5838 	    ip_addrs_p = REALLOC(ip_addrs_p, ip_addrs_size);
5839 	    if (ret == NO_ERROR) break;
5840 	    if (ret == ERROR_INSUFFICIENT_BUFFER) continue;
5841 	    i = 0;
5842 	}
5843 	if (! i) {
5844 	    if (info_p) FREE(info_p);
5845 	    FREE(ip_addrs_p);
5846 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf_pp, rsize);
5847 	}
5848     } else ip_addrs_p = NULL;
5849 
5850     buf_p = buf_alloc_p = ALLOC(buf_size);
5851     *buf_p++ = INET_REP_OK;
5852 
5853     /* Iterate over MIB_IPADDRTABLE or IP_ADAPTER_ADDRESSES */
5854     for (ia_p = NULL, ip_addrs_p ? ((void *)(i = 0)) : (ia_p = ip_adaddrs_p);
5855 	 ip_addrs_p ? (i < ip_addrs_p->dwNumEntries) : (ia_p != NULL);
5856 	 ip_addrs_p ? ((void *)(i++)) : (ia_p = ia_p->Next)) {
5857 	MIB_IPADDRROW *ipaddrrow_p = NULL;
5858 	DWORD flags = INET_IFF_MULTICAST;
5859 	DWORD index = 0;
5860 	WCHAR *wname_p = NULL;
5861 	MIB_IFROW ifrow;
5862 
5863 	if (ip_addrs_p) {
5864 	    ipaddrrow_p = ip_addrs_p->table + i;
5865 	    index = ipaddrrow_p->dwIndex;
5866 	} else {
5867 	    index = ia_p->IfIndex;
5868 	    if (ia_p->Flags & IP_ADAPTER_NO_MULTICAST) {
5869 		flags &= ~INET_IFF_MULTICAST;
5870 	    }
5871 	}
5872 index:
5873 	if (! index) goto done;
5874 	sys_memzero(&ifrow, sizeof(ifrow));
5875 	ifrow.dwIndex = index;
5876 	if (GetIfEntry(&ifrow) != NO_ERROR) break;
5877 	/* Find the interface name - first try MIB_IFROW.wzname */
5878 	if (ifrow.wszName[0] != 0) {
5879 	    wname_p = ifrow.wszName;
5880 	} else {
5881 	    /* Then try IP_ADAPTER_INDEX_MAP.Name (only IPv4 adapters) */
5882 	    int j;
5883 	    for (j = 0;  j < info_p->NumAdapters;  j++) {
5884 		if (info_p->Adapter[j].Index == (ULONG) ifrow.dwIndex) {
5885 		    if (info_p->Adapter[j].Name[0] != 0) {
5886 			wname_p = info_p->Adapter[j].Name;
5887 		    }
5888 		    break;
5889 		}
5890 	    }
5891 	}
5892 	if (wname_p) {
5893 	    int len;
5894 	    /* Convert interface name to UTF-8 */
5895 	    len =
5896 		WideCharToMultiByte(
5897 		    CP_UTF8, 0, wname_p, -1, NULL, 0, NULL, NULL);
5898 	    if (! len) break;
5899 	    BUF_ENSURE(len);
5900 	    WideCharToMultiByte(
5901 		CP_UTF8, 0, wname_p, -1, buf_p, len, NULL, NULL);
5902 	    buf_p += len;
5903 	} else {
5904 	    /* Found no name -
5905 	    * use "MIB_IFROW.dwIndex: MIB_IFROW.bDescr" as name instead */
5906 	    int l;
5907 	    l = utf8_len(ifrow.bDescr, ifrow.dwDescrLen);
5908 	    BUF_ENSURE(9 + l+1);
5909 	    buf_p +=
5910 		erts_sprintf(
5911 		    buf_p, "%lu: ", (unsigned long) ifrow.dwIndex);
5912 	    utf8_encode(ifrow.bDescr, ifrow.dwDescrLen, buf_p);
5913 	    buf_p += l;
5914 	    *buf_p++ = '\0';
5915 	}
5916 	/* Interface flags, often make up broadcast and multicast flags */
5917 	switch (ifrow.dwType) {
5918 	case IF_TYPE_ETHERNET_CSMACD:
5919 	    flags |= INET_IFF_BROADCAST;
5920 	    break;
5921 	case IF_TYPE_SOFTWARE_LOOPBACK:
5922 	    flags |= INET_IFF_LOOPBACK;
5923 	    flags &= ~INET_IFF_MULTICAST;
5924 	    break;
5925 	default:
5926 	    flags &= ~INET_IFF_MULTICAST;
5927 	    break;
5928 	}
5929 	if (ifrow.dwAdminStatus) {
5930 	    flags |= INET_IFF_UP;
5931 	    switch (ifrow.dwOperStatus) {
5932 	    case IF_OPER_STATUS_CONNECTING:
5933 		flags |= INET_IFF_POINTTOPOINT;
5934 		break;
5935 	    case IF_OPER_STATUS_CONNECTED:
5936 		flags |= INET_IFF_RUNNING | INET_IFF_POINTTOPOINT;
5937 		break;
5938 	    case IF_OPER_STATUS_OPERATIONAL:
5939 		flags |= INET_IFF_RUNNING;
5940 		break;
5941 	    }
5942 	}
5943 	BUF_ENSURE(1 + 4);
5944 	*buf_p++ = INET_IFOPT_FLAGS;
5945 	put_int32(flags, buf_p); buf_p += 4;
5946 	if (ipaddrrow_p) {
5947 	    /* Legacy implementation through GetIpAddrTable */
5948 	    struct sockaddr_in sin;
5949 	    /* IP Address */
5950 	    sys_memzero(&sin, sizeof(sin));
5951 	    sin.sin_family = AF_INET;
5952 	    sin.sin_addr.s_addr = ipaddrrow_p->dwAddr;
5953 	    BUF_ENSURE(1);
5954 	    /* Netmask */
5955 	    SOCKADDR_TO_BUF(INET_IFOPT_ADDR, (struct sockaddr *) &sin);
5956 	    sin.sin_addr.s_addr = ipaddrrow_p->dwMask;
5957 	    BUF_ENSURE(1);
5958 	    SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, (struct sockaddr *) &sin);
5959 	    if (flags & INET_IFF_BROADCAST) {
5960 		/* Broadcast address - fake it*/
5961 		sin.sin_addr.s_addr = ipaddrrow_p->dwAddr;
5962 		sin.sin_addr.s_addr |= ~ipaddrrow_p->dwMask;
5963 		BUF_ENSURE(1);
5964 		SOCKADDR_TO_BUF(
5965 		    INET_IFOPT_BROADADDR, (struct sockaddr *) &sin);
5966 	    }
5967 	} else {
5968 	    IP_ADAPTER_UNICAST_ADDRESS *p;
5969 	    /* IP Address(es) */
5970 	    for (p = ia_p->FirstUnicastAddress;
5971 		p;
5972 		p = p->Next)
5973 	    {
5974 		IP_ADAPTER_PREFIX *q;
5975 		ULONG shortest_length;
5976 		struct sockaddr *shortest_p, *sa_p = p->Address.lpSockaddr;
5977 		BUF_ENSURE(1);
5978 		SOCKADDR_TO_BUF(INET_IFOPT_ADDR, sa_p);
5979 		shortest_p = NULL;
5980 		shortest_length = 0;
5981 		for (q = ia_p->FirstPrefix;
5982 		     q;
5983 		     q = q->Next) {
5984 		    struct sockaddr *sp_p = q->Address.lpSockaddr;
5985 		    if (sa_p->sa_family != sp_p->sa_family) continue;
5986 		    switch (sa_p->sa_family) {
5987 		    case AF_INET: {
5988 			struct sockaddr_in sin;
5989 			DWORD sa, sp, mask;
5990 			sa = ntohl((DWORD)
5991 				   ((struct sockaddr_in *)
5992 				    sa_p)->sin_addr.s_addr);
5993 			sp = ntohl((DWORD)
5994 				   ((struct sockaddr_in *)
5995 				    sp_p)->sin_addr.s_addr);
5996 			mask = 0xFFFFFFFF << (32 - q->PrefixLength);
5997 			if ((sa & mask) != (sp & mask)) continue;
5998 			if ((! shortest_p)
5999 			    || q->PrefixLength < shortest_length) {
6000 			    shortest_p = sp_p;
6001 			    shortest_length = q->PrefixLength;
6002 			}
6003 		    }   break;
6004 		    case AF_INET6: {
6005 			struct sockaddr_in6 sin6;
6006 			if (!eq_masked_bytes((char *)
6007 					     &((struct sockaddr_in6 *)
6008 					       sa_p)->sin6_addr,
6009 					     (char *)
6010 					     &((struct sockaddr_in6 *)
6011 					       sp_p)->sin6_addr,
6012 					     q->PrefixLength)) {
6013 			    continue;
6014 			}
6015 			if ((! shortest_p)
6016 			    || q->PrefixLength < shortest_length) {
6017 			    shortest_p = sp_p;
6018 			    shortest_length = q->PrefixLength;
6019 			}
6020 		    }   break;
6021 		    }
6022 		}
6023 		if (! shortest_p) {
6024 		    /* Found no shortest prefix */
6025 		    shortest_p = sa_p;
6026 		    switch (shortest_p->sa_family) {
6027 		    case AF_INET: {
6028 			/* Fall back to old classfull network addresses */
6029 			DWORD addr = ntohl(((struct sockaddr_in *)shortest_p)
6030 					   ->sin_addr.s_addr);
6031 			if (! (addr & 0x800000)) {
6032 			    /* Class A */
6033 			    shortest_length = 8;
6034 			} else if (! (addr & 0x400000)) {
6035 			    /* Class B */
6036 			    shortest_length = 16;
6037 			} else if (! (addr & 0x200000)) {
6038 			    /* Class C */
6039 			    shortest_length = 24;
6040 			} else {
6041 			    shortest_length = 32;
6042 			}
6043 		    }   break;
6044 		    case AF_INET6: {
6045 			/* Just play it safe */
6046 			shortest_length = 128;
6047 		    }   break;
6048 		    }
6049 		}
6050 		switch (shortest_p->sa_family) {
6051 		case AF_INET: {
6052 		    struct sockaddr_in sin;
6053 		    DWORD mask = 0xFFFFFFFF << (32 - shortest_length);
6054 		    sys_memzero(&sin, sizeof(sin));
6055 		    sin.sin_family = shortest_p->sa_family;
6056 		    sin.sin_addr.s_addr = htonl(mask);
6057 		    BUF_ENSURE(1);
6058 		    SOCKADDR_TO_BUF(INET_IFOPT_NETMASK,
6059 				    (struct sockaddr *) &sin);
6060 		    if (flags & INET_IFF_BROADCAST) {
6061 			DWORD sp =
6062 			    ntohl((DWORD)
6063 				  ((struct sockaddr_in *)shortest_p)
6064 				  -> sin_addr.s_addr);
6065 			sin.sin_addr.s_addr = htonl(sp | ~mask);
6066 			BUF_ENSURE(1);
6067 			SOCKADDR_TO_BUF(INET_IFOPT_BROADADDR,
6068 					(struct sockaddr *) &sin);
6069 		    }
6070 		}   break;
6071 		case AF_INET6: {
6072 		    struct sockaddr_in6 sin6;
6073 		    sys_memzero(&sin6, sizeof(sin6));
6074 		    sin6.sin6_family = shortest_p->sa_family;
6075 		    set_netmask_bytes((char *) &sin6.sin6_addr,
6076 				      16,
6077 				      shortest_length);
6078 		    BUF_ENSURE(1);
6079 		    SOCKADDR_TO_BUF(INET_IFOPT_NETMASK,
6080 				    (struct sockaddr *) &sin6);
6081 		}   break;
6082 		}
6083 	    }
6084 	}
6085 	if (ifrow.dwPhysAddrLen) {
6086 	    /* Hardware Address */
6087 	    BUF_ENSURE(1 + 2 + ifrow.dwPhysAddrLen);
6088 	    *buf_p++ = INET_IFOPT_HWADDR;
6089 	    put_int16(ifrow.dwPhysAddrLen, buf_p); buf_p += 2;
6090 	    sys_memcpy(buf_p, ifrow.bPhysAddr, ifrow.dwPhysAddrLen);
6091 	    buf_p += ifrow.dwPhysAddrLen;
6092 	}
6093 
6094 done:
6095 	/* That is all for this interface */
6096 	BUF_ENSURE(1);
6097 	*buf_p++ = '\0';
6098 	if (ia_p &&
6099 	    ia_p->Ipv6IfIndex &&
6100 	    ia_p->Ipv6IfIndex != index)
6101 	{
6102 	    /* Oops, there was another interface for IPv6. Possible? XXX */
6103 	    index = ia_p->Ipv6IfIndex;
6104 	    goto index;
6105 	}
6106     }
6107 
6108     if (ip_adaddrs_p) FREE(ip_adaddrs_p);
6109     if (info_p) FREE(info_p);
6110     if (ip_addrs_p) FREE(ip_addrs_p);
6111 
6112     buf_size = buf_p - buf_alloc_p;
6113     buf_alloc_p = REALLOC(buf_alloc_p, buf_size);
6114     /* buf_p is now unreliable */
6115     *rbuf_pp = buf_alloc_p;
6116     return buf_size;
6117 #   undef BUF_ENSURE
6118 }
6119 
6120 #elif defined(HAVE_GETIFADDRS)
6121 #ifdef  DEBUG
6122 #define GETIFADDRS_BUFSZ (1)
6123 #else
6124 #define GETIFADDRS_BUFSZ (512)
6125 #endif
6126 
inet_ctl_getifaddrs(inet_descriptor * desc_p,char ** rbuf_pp,ErlDrvSizeT rsize)6127 static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
6128 					char **rbuf_pp, ErlDrvSizeT rsize)
6129 {
6130     struct ifaddrs *ifa_p, *ifa_free_p;
6131 
6132     ErlDrvSizeT buf_size;
6133     char *buf_p;
6134     char *buf_alloc_p;
6135     int save_errno;
6136 
6137     buf_size = GETIFADDRS_BUFSZ;
6138     buf_alloc_p = ALLOC(GETIFADDRS_BUFSZ);
6139     buf_p = buf_alloc_p;
6140 #   define BUF_ENSURE(Size)						\
6141     do {								\
6142 	int NEED_, GOT_ = buf_p - buf_alloc_p;				\
6143 	NEED_ = GOT_ + (Size);						\
6144 	if (NEED_ > buf_size) {						\
6145 	    buf_size = NEED_ + GETIFADDRS_BUFSZ;			\
6146 	    buf_alloc_p = REALLOC(buf_alloc_p, buf_size);		\
6147 	    buf_p = buf_alloc_p + GOT_;					\
6148 	}								\
6149     } while (0)
6150 #   define SOCKADDR_TO_BUF(opt, sa)				        \
6151     do {    						                \
6152 	if (sa) {                                                       \
6153 	    char *P_;							\
6154 	    *buf_p++ = (opt);						\
6155 	    while (! (P_ = sockaddr_to_buf((sa), buf_p,			\
6156 					   buf_alloc_p+buf_size))) {	\
6157 		int GOT_ = buf_p - buf_alloc_p;				\
6158 		buf_size += GETIFADDRS_BUFSZ;				\
6159 		buf_alloc_p = REALLOC(buf_alloc_p, buf_size);		\
6160 		buf_p = buf_alloc_p + GOT_;				\
6161 	    }								\
6162 	    if (P_ == buf_p) {						\
6163 		buf_p--;						\
6164 	    } else {							\
6165 		buf_p = P_;						\
6166 	    }								\
6167 	}                                                               \
6168     } while (0)
6169 
6170     if ((save_errno = call_getifaddrs(desc_p, &ifa_p)) != 0)
6171         return ctl_error(save_errno, rbuf_pp, rsize);
6172 
6173     ifa_free_p = ifa_p;
6174     *buf_p++ = INET_REP_OK;
6175     for (;  ifa_p;  ifa_p = ifa_p->ifa_next) {
6176 	int len = utf8_len(ifa_p->ifa_name, -1);
6177 	BUF_ENSURE(len+1 + 1+4 + 1);
6178 	utf8_encode(ifa_p->ifa_name, -1, buf_p);
6179 	buf_p += len;
6180 	*buf_p++ = '\0';
6181 	*buf_p++ = INET_IFOPT_FLAGS;
6182 	put_int32(IFGET_FLAGS(ifa_p->ifa_flags), buf_p); buf_p += 4;
6183 	if (ifa_p->ifa_addr) {
6184 	    if (ifa_p->ifa_addr->sa_family == AF_INET
6185 #if defined(AF_INET6)
6186 		|| ifa_p->ifa_addr->sa_family == AF_INET6
6187 #endif
6188 		) {
6189 		SOCKADDR_TO_BUF(INET_IFOPT_ADDR, ifa_p->ifa_addr);
6190 		if (ifa_p->ifa_netmask) {
6191 		    BUF_ENSURE(1);
6192 		    SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, ifa_p->ifa_netmask);
6193 		}
6194 		if (ifa_p->ifa_dstaddr &&
6195 		    (ifa_p->ifa_flags & IFF_POINTOPOINT)) {
6196 		    BUF_ENSURE(1);
6197 		    SOCKADDR_TO_BUF(INET_IFOPT_DSTADDR, ifa_p->ifa_dstaddr);
6198 		} else if (ifa_p->ifa_broadaddr &&
6199 			   (ifa_p->ifa_flags & IFF_BROADCAST)) {
6200 		    BUF_ENSURE(1);
6201 		    SOCKADDR_TO_BUF(INET_IFOPT_BROADADDR, ifa_p->ifa_broadaddr);
6202 		}
6203 	    }
6204 #if defined(AF_LINK) || defined(AF_PACKET)
6205 	    else if (
6206 #if defined(AF_LINK)
6207 		     ifa_p->ifa_addr->sa_family == AF_LINK
6208 #else
6209 		     0
6210 #endif
6211 #if defined(AF_PACKET)
6212 		     || ifa_p->ifa_addr->sa_family == AF_PACKET
6213 #endif
6214 		     ) {
6215 		size_t need = sockaddr_bufsz_need(ifa_p->ifa_addr);
6216 		if (need > 3) {
6217 		    BUF_ENSURE(1 + need);
6218 		    SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr);
6219 		}
6220 	    }
6221 #endif
6222 	}
6223 	BUF_ENSURE(1);
6224 	*buf_p++ = '\0';
6225     }
6226     buf_size = buf_p - buf_alloc_p;
6227     buf_alloc_p = REALLOC(buf_alloc_p, buf_size);
6228     /* buf_p is now unreliable */
6229     freeifaddrs(ifa_free_p);
6230     *rbuf_pp = buf_alloc_p;
6231     return buf_size;
6232 #   undef BUF_ENSURE
6233 }
6234 #undef GETIFADDRS_BUFSZ
6235 
6236 #else
6237 
inet_ctl_getifaddrs(inet_descriptor * desc_p,char ** rbuf_pp,ErlDrvSizeT rsize)6238 static ErlDrvSSizeT inet_ctl_getifaddrs(inet_descriptor* desc_p,
6239 					char **rbuf_pp, ErlDrvSizeT rsize)
6240 {
6241     return ctl_error(ENOTSUP, rbuf_pp, rsize);
6242 }
6243 
6244 #endif
6245 
6246 /* Per H @ Tail-f: The original code here had problems that possibly
6247    only occur if you abuse it for non-INET sockets, but anyway:
6248    a) If the getsockopt for SO_PRIORITY or IP_TOS failed, the actual
6249       requested setsockopt was never even attempted.
6250    b) If {get,set}sockopt for one of IP_TOS and SO_PRIORITY failed,
6251       but ditto for the other worked and that was actually the requested
6252       option, failure was still reported to erlang.                  */
6253 
6254 #if  defined(IP_TOS) && defined(IPPROTO_IP)             \
6255     && defined(SO_PRIORITY) && !defined(__WIN32__)
setopt_prio_tos_trick(int fd,int proto,int type,char * arg_ptr,int arg_sz,int propagate)6256 static int setopt_prio_tos_trick
6257 	(int fd, int proto, int type, char* arg_ptr, int arg_sz, int propagate)
6258 {
6259     /* The relations between SO_PRIORITY, TOS and other options
6260        is not what you (or at least I) would expect...:
6261        If TOS is set after priority, priority is zeroed.
6262        If any other option is set after tos, tos might be zeroed.
6263        Therefore, save tos and priority. If something else is set,
6264        restore both after setting, if  tos is set, restore only
6265        prio and if prio is set restore none... All to keep the
6266        user feeling socket options are independent. /PaN */
6267     int          tmp_ival_prio;
6268     int          tmp_ival_tos;
6269     int          res;
6270     int          res_prio;
6271     int          res_tos;
6272     SOCKLEN_T    tmp_arg_sz_prio = sizeof(tmp_ival_prio);
6273     SOCKLEN_T    tmp_arg_sz_tos  = sizeof(tmp_ival_tos);
6274 
6275     res_prio = sock_getopt(fd, SOL_SOCKET, SO_PRIORITY,
6276 		      (char *) &tmp_ival_prio, &tmp_arg_sz_prio);
6277     res_tos = sock_getopt(fd, IPPROTO_IP, IP_TOS,
6278 		      (char *) &tmp_ival_tos, &tmp_arg_sz_tos);
6279 	    res = sock_setopt(fd, proto, type, arg_ptr, arg_sz);
6280 	    if (res == 0) {
6281 		if (type != SO_PRIORITY) {
6282 	    if (type != IP_TOS && res_tos == 0) {
6283 		res_tos = sock_setopt(fd,
6284 					  IPPROTO_IP,
6285 					  IP_TOS,
6286 					  (char *) &tmp_ival_tos,
6287 					  tmp_arg_sz_tos);
6288 		if (propagate)
6289 		    res = res_tos;
6290 		    }
6291 	    if (res == 0 && res_prio == 0) {
6292 		res_prio = sock_setopt(fd,
6293 					   SOL_SOCKET,
6294 					   SO_PRIORITY,
6295 					   (char *) &tmp_ival_prio,
6296 					   tmp_arg_sz_prio);
6297 		if (propagate) {
6298 		    /* Some kernels set a SO_PRIORITY by default that you are not permitted to reset,
6299 		       silently ignore this error condition */
6300 		    if (res_prio != 0 && sock_errno() == EPERM) {
6301 			res = 0;
6302 		    } else {
6303 			res = res_prio;
6304 		    }
6305 		}
6306 	    }
6307 	}
6308     }
6309     return (res);
6310 }
6311 #endif
6312 
6313 /* set socket options:
6314 ** return -1 on error
6315 **         0 if ok
6316 **         1 if ok force deliver of queued data
6317 */
6318 #ifdef HAVE_SCTP
6319 static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len);
6320 #endif
6321 
inet_set_opts(inet_descriptor * desc,char * ptr,int len)6322 static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
6323 {
6324     int type;
6325     int proto;
6326     int opt;
6327     struct linger li_val;
6328 #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
6329     struct ip_mreq mreq_val;
6330 #endif
6331     int ival;
6332     char* arg_ptr;
6333     int arg_sz;
6334 #ifdef SO_BINDTODEVICE
6335     char ifname[IFNAMSIZ];
6336 #endif
6337     enum PacketParseType old_htype = desc->htype;
6338     int old_active = desc->active;
6339     int propagate; /* Set to 1 if failure to set this option
6340 		      should be propagated to erlang (not all
6341 		      errors can be propagated for BC reasons) */
6342     int res;
6343 #ifdef HAVE_SCTP
6344     /* SCTP sockets are treated completely separately: */
6345     if (IS_SCTP(desc))
6346 	return sctp_set_opts(desc, ptr, len);
6347 #endif
6348     /* XXX { int i; for(i=0;i<len;++i) fprintf(stderr,"0x%02X, ", (unsigned) ptr[i]); fprintf(stderr,"\r\n");} */
6349 
6350     while(len >= 5) {
6351         int recv_cmsgflags;
6352 
6353 	opt = *ptr++;
6354 	ival = get_int32(ptr);
6355 	ptr += 4;
6356 	len -= 5;
6357 	arg_ptr = (char*) &ival;
6358 	arg_sz = sizeof(ival);
6359 	proto = SOL_SOCKET;
6360 	propagate = 0;
6361         recv_cmsgflags = desc->recv_cmsgflags;
6362 
6363 	switch(opt) {
6364 	case INET_LOPT_HEADER:
6365 	    DEBUGF(("inet_set_opts(%ld): s=%d, HEADER=%d\r\n",
6366 		    (long)desc->port, desc->s,ival));
6367 	    desc->hsz = ival;
6368 	    continue;
6369 
6370 	case INET_LOPT_MODE:
6371 	    /* List or Binary: */
6372 	    DEBUGF(("inet_set_opts(%ld): s=%d, MODE=%d\r\n",
6373 		    (long)desc->port, desc->s, ival));
6374 	    desc->mode = ival;
6375 	    continue;
6376 
6377 	case INET_LOPT_DELIVER:
6378 	    DEBUGF(("inet_set_opts(%ld): s=%d, DELIVER=%d\r\n",
6379 		    (long)desc->port, desc->s, ival));
6380 	    desc->deliver = ival;
6381 	    continue;
6382 
6383 	case INET_LOPT_BUFFER:
6384 	    DEBUGF(("inet_set_opts(%ld): s=%d, BUFFER=%d\r\n",
6385 		    (long)desc->port, desc->s, ival));
6386 	    if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER;
6387 	    desc->bufsz = ival;
6388             desc->flags |= INET_FLG_BUFFER_SET;
6389 	    continue;
6390 
6391 	case INET_LOPT_ACTIVE:
6392 	    DEBUGF(("inet_set_opts(%ld): s=%d, ACTIVE=%d\r\n",
6393 		    (long)desc->port, desc->s, ival));
6394 	    desc->active = ival;
6395             if (desc->active == INET_MULTI) {
6396                 long ac = desc->active_count;
6397                 Sint16 nval = get_int16(ptr);
6398                 ptr += 2;
6399                 len -= 2;
6400                 ac += nval;
6401                 if (ac > INT16_MAX || ac < INT16_MIN)
6402                     return -1;
6403                 desc->active_count += nval;
6404                 if (desc->active_count < 0)
6405                     desc->active_count = 0;
6406                 if (desc->active_count == 0) {
6407                     desc->active = INET_PASSIVE;
6408                     packet_passive_message(desc);
6409                 }
6410             } else
6411                 desc->active_count = 0;
6412 	    if ((desc->stype == SOCK_STREAM) && (desc->active != INET_PASSIVE) &&
6413 		(desc->state == INET_STATE_CLOSED)) {
6414 		tcp_descriptor *tdesc = (tcp_descriptor *) desc;
6415 		if (tdesc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) {
6416 		    tdesc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET;
6417 		    tcp_error_message(tdesc, ECONNRESET);
6418 		}
6419 		tcp_closed_message(tdesc);
6420 		if (desc->exitf) {
6421 		    driver_exit(desc->port, 0);
6422 		    return 0; /* Give up on this socket, descriptor lost */
6423 		} else {
6424 		    desc_close_read(desc);
6425 		}
6426 	    }
6427 	    continue;
6428 
6429 	case INET_LOPT_PACKET:
6430 	    DEBUGF(("inet_set_opts(%ld): s=%d, PACKET=%d\r\n",
6431 		    (long)desc->port, desc->s, ival));
6432 	    desc->htype = ival;
6433 	    continue;
6434 
6435 	case INET_LOPT_PACKET_SIZE:
6436 	    DEBUGF(("inet_set_opts(%ld): s=%d, PACKET_SIZE=%d\r\n",
6437 		    (long)desc->port, desc->s, ival));
6438 	    desc->psize = (unsigned int)ival;
6439 	    continue;
6440 
6441 	case INET_LOPT_EXITONCLOSE:
6442 	    DEBUGF(("inet_set_opts(%ld): s=%d, EXITONCLOSE=%d\r\n",
6443 		    (long)desc->port, desc->s, ival));
6444 	    desc->exitf = ival;
6445 	    continue;
6446 
6447 	case INET_LOPT_TCP_HIWTRMRK:
6448 	    if (desc->stype == SOCK_STREAM) {
6449 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6450 		if (ival < 0) ival = 0;
6451 		if (tdesc->low > ival)
6452 		    tdesc->low = ival;
6453 		tdesc->high = ival;
6454 	    }
6455 	    continue;
6456 
6457 	case INET_LOPT_TCP_LOWTRMRK:
6458 	    if (desc->stype == SOCK_STREAM) {
6459 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6460 		if (ival < 0) ival = 0;
6461 		if (tdesc->high < ival)
6462 		    tdesc->high = ival;
6463 		tdesc->low = ival;
6464 	    }
6465 	    continue;
6466 
6467 	case INET_LOPT_MSGQ_HIWTRMRK: {
6468 	    ErlDrvSizeT high;
6469 	    if (ival < ERL_DRV_BUSY_MSGQ_LIM_MIN
6470 		|| ERL_DRV_BUSY_MSGQ_LIM_MAX < ival)
6471 		return -1;
6472 	    high = (ErlDrvSizeT) ival;
6473 	    erl_drv_busy_msgq_limits(desc->port, NULL, &high);
6474 	    continue;
6475 	}
6476 
6477 	case INET_LOPT_MSGQ_LOWTRMRK: {
6478 	    ErlDrvSizeT low;
6479 	    if (ival < ERL_DRV_BUSY_MSGQ_LIM_MIN
6480 		|| ERL_DRV_BUSY_MSGQ_LIM_MAX < ival)
6481 		return -1;
6482 	    low = (ErlDrvSizeT) ival;
6483 	    erl_drv_busy_msgq_limits(desc->port, &low, NULL);
6484 	    continue;
6485 	}
6486 
6487 	case INET_LOPT_TCP_SEND_TIMEOUT:
6488 	    if (desc->stype == SOCK_STREAM) {
6489 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6490 		tdesc->send_timeout = ival;
6491 	    }
6492 	    continue;
6493 
6494 	case INET_LOPT_TCP_SEND_TIMEOUT_CLOSE:
6495 	    if (desc->stype == SOCK_STREAM) {
6496 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6497 		tdesc->send_timeout_close = ival;
6498 	    }
6499 	    continue;
6500 
6501 
6502 	case INET_LOPT_TCP_DELAY_SEND:
6503 	    if (desc->stype == SOCK_STREAM) {
6504 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6505 		if (ival)
6506 		    tdesc->tcp_add_flags |= TCP_ADDF_DELAY_SEND;
6507 		else
6508 		    tdesc->tcp_add_flags &= ~TCP_ADDF_DELAY_SEND;
6509 	    }
6510 	    continue;
6511 
6512 #ifdef HAVE_UDP
6513 	case INET_LOPT_UDP_READ_PACKETS:
6514 	    if (desc->stype == SOCK_DGRAM) {
6515 		udp_descriptor* udesc = (udp_descriptor*) desc;
6516 		if (ival <= 0) return -1;
6517 		udesc->read_packets = ival;
6518 	    }
6519 	    continue;
6520 #endif
6521 
6522 #ifdef HAVE_SETNS
6523 	case INET_LOPT_NETNS:
6524 	    /* It is annoying that ival and len are both (signed) int */
6525 	    if (ival < 0) return -1;
6526 	    if (len < ival) return -1;
6527 	    if (desc->netns != NULL) FREE(desc->netns);
6528 	    desc->netns = ALLOC(((unsigned int) ival) + 1);
6529 	    memcpy(desc->netns, ptr, ival);
6530 	    desc->netns[ival] = '\0';
6531 	    ptr += ival;
6532 	    len -= ival;
6533 	    continue;
6534 #endif
6535 
6536 	case INET_LOPT_TCP_SHOW_ECONNRESET:
6537 	    if (desc->sprotocol == IPPROTO_TCP) {
6538 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6539 		if (ival)
6540 		    tdesc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET;
6541 		else
6542 		    tdesc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET;
6543 	    }
6544 	    continue;
6545 
6546 	case INET_LOPT_LINE_DELIM:
6547 	    DEBUGF(("inet_set_opts(%ld): s=%d, LINE_DELIM=%d\r\n",
6548 		    (long)desc->port, desc->s, ival));
6549 	    desc->delimiter = (char)ival;
6550 	    continue;
6551 
6552 	case INET_OPT_REUSEADDR:
6553 #ifdef __WIN32__
6554 	    continue;  /* Bjorn says */
6555 #else
6556 	    type = SO_REUSEADDR;
6557 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_REUSEADDR=%d\r\n",
6558 		    (long)desc->port, desc->s,ival));
6559 	    break;
6560 #endif
6561 	case INET_OPT_KEEPALIVE: type = SO_KEEPALIVE;
6562 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_KEEPALIVE=%d\r\n",
6563 		    (long)desc->port, desc->s, ival));
6564 	    break;
6565 	case INET_OPT_DONTROUTE: type = SO_DONTROUTE;
6566 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_DONTROUTE=%d\r\n",
6567 		    (long)desc->port, desc->s, ival));
6568 	    break;
6569 	case INET_OPT_BROADCAST: type = SO_BROADCAST;
6570 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_BROADCAST=%d\r\n",
6571 		    (long)desc->port, desc->s,ival));
6572 	    break;
6573 	case INET_OPT_OOBINLINE: type = SO_OOBINLINE;
6574 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_OOBINLINE=%d\r\n",
6575 		    (long)desc->port, desc->s, ival));
6576 	    break;
6577 	case INET_OPT_SNDBUF:    type = SO_SNDBUF;
6578 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_SNDBUF=%d\r\n",
6579 		    (long)desc->port, desc->s, ival));
6580 	    break;
6581 	case INET_OPT_RCVBUF:    type = SO_RCVBUF;
6582 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_RCVBUF=%d\r\n",
6583 		    (long)desc->port, desc->s, ival));
6584             if (!(desc->flags & INET_FLG_BUFFER_SET)) {
6585                 /* make sure we have desc->bufsz >= SO_RCVBUF */
6586                 if (ival > (1 << 16) && desc->stype == SOCK_DGRAM && !IS_SCTP(desc))
6587                     /* For UDP we don't want to automatically
6588                        set the buffer size to be larger than
6589                        the theoretical max MTU */
6590                     desc->bufsz = 1 << 16;
6591                 else if (ival > desc->bufsz)
6592                     desc->bufsz = ival;
6593             }
6594 	    break;
6595 	case INET_OPT_LINGER:    type = SO_LINGER;
6596 	    if (len < 4)
6597 		return -1;
6598 	    li_val.l_onoff = ival;
6599 	    li_val.l_linger = get_int32(ptr);
6600 	    ptr += 4;
6601 	    len -= 4;
6602 	    arg_ptr = (char*) &li_val;
6603 	    arg_sz = sizeof(li_val);
6604 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_LINGER=%d,%d",
6605 		    (long)desc->port, desc->s, li_val.l_onoff,li_val.l_linger));
6606 	    if (desc->sprotocol == IPPROTO_TCP) {
6607 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
6608 		if (li_val.l_onoff && li_val.l_linger == 0)
6609 		    tdesc->tcp_add_flags |= TCP_ADDF_LINGER_ZERO;
6610 		else
6611 		    tdesc->tcp_add_flags &= ~TCP_ADDF_LINGER_ZERO;
6612 	    }
6613 	    break;
6614 
6615 	case INET_OPT_PRIORITY:
6616 #ifdef SO_PRIORITY
6617 	    type = SO_PRIORITY;
6618 	    propagate = 1; /* We do want to know if this fails */
6619 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_PRIORITY=%d\r\n",
6620 		    (long)desc->port, desc->s, ival));
6621 	    break;
6622 #else
6623             /* inet_fill_opts always returns a value for this option,
6624              * so we need to ignore it if not implemented */
6625 	    continue;
6626 #endif
6627 	case INET_OPT_TOS:
6628 #if defined(IP_TOS) && defined(IPPROTO_IP)
6629 	    proto = IPPROTO_IP;
6630 	    type = IP_TOS;
6631 	    propagate = 1;
6632 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_TOS=%d\r\n",
6633 		    (long)desc->port, desc->s, ival));
6634 	    break;
6635 #else
6636             /* inet_fill_opts always returns a value for this option,
6637              * so we need to ignore it if not implemented. */
6638 	    continue;
6639 #endif
6640 #if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
6641 	case INET_OPT_TCLASS:
6642 	    proto = IPPROTO_IPV6;
6643 	    type = IPV6_TCLASS;
6644 	    propagate = 1;
6645 	    DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_TCLASS=%d\r\n",
6646 		    (long)desc->port, desc->s, ival));
6647 	    break;
6648 #endif
6649 #if defined(IP_TTL) && defined(IPPROTO_IP)
6650 	case INET_OPT_TTL:
6651 	    proto = IPPROTO_IP;
6652 	    type = IP_TTL;
6653 	    propagate = 1;
6654 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_TTL=%d\r\n",
6655 		    (long)desc->port, desc->s, ival));
6656 	    break;
6657 #endif
6658 #if defined(IP_RECVTOS) && defined(IPPROTO_IP)
6659 	case INET_OPT_RECVTOS:
6660 	    proto = IPPROTO_IP;
6661 	    type = IP_RECVTOS;
6662 	    propagate = 1;
6663             recv_cmsgflags =
6664                 ival ?
6665                 (desc->recv_cmsgflags | INET_CMSG_RECVTOS) :
6666                 (desc->recv_cmsgflags & ~INET_CMSG_RECVTOS);
6667 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_RECVTOS=%d\r\n",
6668 		    (long)desc->port, desc->s, ival));
6669 	    break;
6670 #endif
6671 #if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
6672 	case INET_OPT_RECVTCLASS:
6673 	    proto = IPPROTO_IPV6;
6674 	    type = IPV6_RECVTCLASS;
6675 	    propagate = 1;
6676             recv_cmsgflags =
6677                 ival ?
6678                 (desc->recv_cmsgflags | INET_CMSG_RECVTCLASS) :
6679                 (desc->recv_cmsgflags & ~INET_CMSG_RECVTCLASS);
6680 	    DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_RECVTCLASS=%d\r\n",
6681 		    (long)desc->port, desc->s, ival));
6682 	    break;
6683 #endif
6684 #if defined(IP_RECVTTL) && defined(IPPROTO_IP)
6685 	case INET_OPT_RECVTTL:
6686 	    proto = IPPROTO_IP;
6687 	    type = IP_RECVTTL;
6688 	    propagate = 1;
6689             recv_cmsgflags =
6690                 ival ?
6691                 (desc->recv_cmsgflags | INET_CMSG_RECVTTL) :
6692                 (desc->recv_cmsgflags & ~INET_CMSG_RECVTTL);
6693 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_RECVTTL=%d\r\n",
6694 		    (long)desc->port, desc->s, ival));
6695 	    break;
6696 #endif
6697 
6698 	case TCP_OPT_NODELAY:
6699 	    proto = IPPROTO_TCP;
6700 	    type = TCP_NODELAY;
6701 	    DEBUGF(("inet_set_opts(%ld): s=%d, TCP_NODELAY=%d\r\n",
6702 		    (long)desc->port, desc->s, ival));
6703 	    break;
6704 
6705 	case TCP_OPT_NOPUSH:
6706 #if defined(INET_TCP_NOPUSH)
6707 	    proto = IPPROTO_TCP;
6708 	    type = INET_TCP_NOPUSH;
6709 	    DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n",
6710 	            (long)desc->port, desc->s, type, ival));
6711 	    break;
6712 #else
6713 	    /* inet_fill_opts always returns a value for this option,
6714 	     * so we need to ignore it if not implemented, just in case */
6715 	    continue;
6716 #endif
6717 
6718 #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
6719 
6720 	case UDP_OPT_MULTICAST_TTL:
6721 	    proto = IPPROTO_IP;
6722 	    type = IP_MULTICAST_TTL;
6723 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_MULTICAST_TTL=%d\r\n",
6724 		    (long)desc->port,desc->s,ival));
6725 	    break;
6726 
6727 	case UDP_OPT_MULTICAST_LOOP:
6728 	    proto = IPPROTO_IP;
6729 	    type = IP_MULTICAST_LOOP;
6730 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_MULTICAST_LOOP=%d\r\n",
6731 		    (long)desc->port,desc->s,ival));
6732 	    break;
6733 
6734 	case UDP_OPT_MULTICAST_IF:
6735 	    proto = IPPROTO_IP;
6736 	    type = IP_MULTICAST_IF;
6737 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_MULTICAST_IF=%x\r\n",
6738 		    (long)desc->port, desc->s, ival));
6739 	    ival = sock_htonl(ival);
6740 	    break;
6741 
6742 	case UDP_OPT_ADD_MEMBERSHIP:
6743 	    proto = IPPROTO_IP;
6744 	    type = IP_ADD_MEMBERSHIP;
6745 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_ADD_MEMBERSHIP=%d\r\n",
6746 		    (long)desc->port, desc->s,ival));
6747 	    goto L_set_mreq;
6748 
6749 	case UDP_OPT_DROP_MEMBERSHIP:
6750 	    proto = IPPROTO_IP;
6751 	    type = IP_DROP_MEMBERSHIP;
6752 	    DEBUGF(("inet_set_opts(%ld): s=%d, IP_DROP_MEMBERSHIP=%x\r\n",
6753 		    (long)desc->port, desc->s, ival));
6754 	L_set_mreq:
6755 	    mreq_val.imr_multiaddr.s_addr = sock_htonl(ival);
6756 	    ival = get_int32(ptr);
6757 	    mreq_val.imr_interface.s_addr = sock_htonl(ival);
6758 	    ptr += 4;
6759 	    len -= 4;
6760 	    arg_ptr = (char*)&mreq_val;
6761 	    arg_sz = sizeof(mreq_val);
6762 	    break;
6763 
6764 #endif /* defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) */
6765 
6766 	case INET_OPT_IPV6_V6ONLY:
6767 #if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
6768 	    proto = IPPROTO_IPV6;
6769 	    type = IPV6_V6ONLY;
6770 	    propagate = 1;
6771 	    DEBUGF(("inet_set_opts(%ld): s=%d, IPV6_V6ONLY=%d\r\n",
6772 		    (long)desc->port, desc->s, ival));
6773 	    break;
6774 #elif defined(__WIN32__) && defined(HAVE_IN6) && defined(AF_INET6)
6775 	    /* Fake a'la OpenBSD; set to 'true' is fine but 'false' invalid. */
6776 	    if (ival != 0) continue;
6777 	    else return -1;
6778 	    break;
6779 #else
6780 	    continue;
6781 #endif
6782 
6783 	case INET_OPT_RAW:
6784 	    if (len < 8) {
6785 		return -1;
6786 	    }
6787 	    proto = ival;
6788 	    type = get_int32(ptr);
6789 	    ptr += 4;
6790 	    arg_sz = get_int32(ptr);
6791 	    ptr += 4;
6792 	    len -= 8;
6793 	    if (len < arg_sz) {
6794 		return -1;
6795 	    }
6796 	    arg_ptr = ptr;
6797 	    ptr += arg_sz;
6798 	    len -= arg_sz;
6799 	    break;
6800 
6801 #ifdef SO_BINDTODEVICE
6802 	case INET_OPT_BIND_TO_DEVICE:
6803 	    if (ival < 0) return -1;
6804 	    if (len < ival) return -1;
6805 	    if (ival > sizeof(ifname)) {
6806 		return -1;
6807 	    }
6808 	    memcpy(ifname, ptr, ival);
6809 	    ifname[ival] = '\0';
6810 	    ptr += ival;
6811 	    len -= ival;
6812 
6813 	    proto = SOL_SOCKET;
6814 	    type = SO_BINDTODEVICE;
6815 	    arg_ptr = (char*)&ifname;
6816 	    arg_sz = sizeof(ifname);
6817 	    propagate = 1; /* We do want to know if this fails */
6818 
6819 	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_BINDTODEVICE=%s\r\n",
6820 		    (long)desc->port, desc->s, ifname));
6821 	    break;
6822 #endif
6823 
6824 	default:
6825 	    return -1;
6826 	}
6827 #if  defined(IP_TOS) && defined(IPPROTO_IP) \
6828     && defined(SO_PRIORITY) && !defined(__WIN32__)
6829 	res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz, propagate);
6830 #else
6831 	res = sock_setopt	    (desc->s, proto, type, arg_ptr, arg_sz);
6832 #endif
6833         if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
6834 	if (propagate && res != 0) {
6835 	    return -1;
6836 	}
6837 	DEBUGF(("inet_set_opts(%ld): s=%d returned %d\r\n",
6838 		(long)desc->port, desc->s, res));
6839     }
6840 
6841     if ( ((desc->stype == SOCK_STREAM) && IS_CONNECTED(desc)) ||
6842 	((desc->stype == SOCK_DGRAM) && IS_OPEN(desc))) {
6843 
6844 	if (desc->active != old_active) {
6845             /* Need to cancel the read_packet timer if we go from active to passive. */
6846             if (desc->active == INET_PASSIVE && desc->stype == SOCK_DGRAM)
6847                 driver_cancel_timer(desc->port);
6848 	    sock_select(desc, (FD_READ|FD_CLOSE), (desc->active>0));
6849         }
6850 
6851 	/* XXX: UDP sockets could also trigger immediate read here NIY */
6852 	if ((desc->stype==SOCK_STREAM) && desc->active) {
6853 	    if (!old_active || (desc->htype != old_htype)) {
6854 		/* passive => active change OR header type change in active mode */
6855 		/* Return > 1 if only active changed to INET_ONCE -> direct read if
6856 		   header type is unchanged. */
6857 		/* XXX fprintf(stderr,"desc->htype == %d, old_htype == %d,
6858 		   desc->active == %d, old_active == %d\r\n",(int)desc->htype,
6859 		   (int) old_htype, (int) desc->active, (int) old_active );*/
6860 		return 1+(desc->htype == old_htype &&
6861                           (desc->active == INET_ONCE || desc->active == INET_MULTI));
6862 	    }
6863 	    return 0;
6864 	}
6865     }
6866     return 0;
6867 }
6868 
6869 #ifdef HAVE_SCTP
6870 
6871 /*  "sctp_get_initmsg":
6872 **  Used by both "send*" and "setsockopt". Gets the 4 fields of "sctp_initmsg"
6873 **  from the input buffer:
6874 */
6875 #define SCTP_GET_INITMSG_LEN (4*2)
sctp_get_initmsg(struct sctp_initmsg * ini,char * curr)6876 static char* sctp_get_initmsg(struct sctp_initmsg* ini, char* curr)
6877 {
6878     ini->sinit_num_ostreams   = get_int16 (curr);	curr += 2;
6879     ini->sinit_max_instreams  = get_int16 (curr);	curr += 2;
6880     ini->sinit_max_attempts   = get_int16 (curr);	curr += 2;
6881     ini->sinit_max_init_timeo = get_int16 (curr);	curr += 2;
6882     return curr;
6883 }
6884 
6885 /*  "sctp_get_sendparams":
6886 **  Parses (from the command buffer) the 6 user-sprcified parms of
6887 **  "sctp_sndrcvinfo":
6888 **	stream(u16),      flags(u16), ppid(u32), context(u32),
6889 **	timetoleave(u32), assoc_id
6890 **  Is used by both "send*" and "setsockopt":
6891 */
6892 #define SCTP_GET_SENDPARAMS_LEN (2*2 + 3*4 + ASSOC_ID_LEN)
sctp_get_sendparams(struct sctp_sndrcvinfo * sri,char * curr)6893 static char* sctp_get_sendparams (struct sctp_sndrcvinfo* sri, char* curr)
6894 {
6895     int eflags;
6896     int cflags;
6897 
6898     sri->sinfo_stream       = get_int16(curr);		curr += 2;
6899     sri->sinfo_ssn	    = 0;
6900 
6901     /* The "flags" are already ORed at the Erlang side, here we
6902        reconstruct the real SCTP flags:
6903     */
6904     eflags		    = get_int16(curr);		curr += 2;
6905     cflags		    = 0;
6906     if (eflags & SCTP_FLAG_UNORDERED) cflags |= SCTP_UNORDERED;
6907     if (eflags & SCTP_FLAG_ADDR_OVER) cflags |= SCTP_ADDR_OVER;
6908     if (eflags & SCTP_FLAG_ABORT)     cflags |= SCTP_ABORT;
6909     if (eflags & SCTP_FLAG_EOF)	      cflags |= SCTP_EOF;
6910 
6911     sri->sinfo_flags	    = cflags;
6912     sri->sinfo_ppid         = sock_htonl(get_int32(curr));
6913 							curr += 4;
6914     sri->sinfo_context      = get_int32(curr);		curr += 4;
6915     sri->sinfo_timetolive   = get_int32(curr);		curr += 4;
6916     sri->sinfo_tsn	    = 0;
6917     sri->sinfo_cumtsn	    = 0;
6918     sri->sinfo_assoc_id	    = GET_ASSOC_ID  (curr);	curr += ASSOC_ID_LEN;
6919 
6920     return curr;
6921 }
6922 
6923 /* Set SCTP options:
6924 ** return -1 on error
6925 **         0 if ok
6926 ** NB: unlike inet_set_opts(), we don't have an active mode here, so there is no
6927 ** mode change which could force data delivery on setting an option.
6928 ** Arg: "ptr": [(erlang_encoded_opt(u8), value(...)), ...];  thus, multiple opts
6929 ** can be set at a time.
6930 */
sctp_set_opts(inet_descriptor * desc,char * ptr,int len)6931 static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
6932 {
6933 #   define CHKLEN(Ptr, Len)                        \
6934     do {                                           \
6935 	if ((Ptr) + (Len) > ptr + len) return -1; \
6936     } while (0)
6937 
6938     char * curr = ptr;
6939     int    proto, type, res;
6940 
6941     /* The following union is used to hold any arg to "setsockopt": */
6942     union  opts_union
6943     {
6944 	int			    ival;
6945 	struct sctp_rtoinfo	    rtoi;
6946 	struct sctp_assocparams	    ap;
6947 	struct sctp_initmsg	    im;
6948 	struct linger		    lin;
6949 	struct sctp_setpeerprim	    prim;
6950 	struct sctp_setadaptation   ad;
6951 	struct sctp_paddrparams	    pap;
6952 	struct sctp_sndrcvinfo	    sri;
6953 	struct sctp_event_subscribe es;
6954 #	ifdef SCTP_DELAYED_ACK_TIME
6955 	struct sctp_assoc_value     av; /* Not in SOLARIS10 */
6956 #	endif
6957 #	ifdef SO_BINDTODEVICE
6958 	char ifname[IFNAMSIZ];
6959 #	endif
6960     }
6961     arg;
6962 
6963     char * arg_ptr = NULL;
6964     int    arg_sz  = 0;
6965     int    old_active = desc->active;
6966 
6967     while (curr < ptr + len)
6968     {
6969         int recv_cmsgflags;
6970 	/* Get the Erlang-encoded option type -- always 1 byte: */
6971 	int eopt;
6972 
6973         eopt = *curr;
6974 	curr++;
6975 
6976         recv_cmsgflags = desc->recv_cmsgflags;
6977 	/* Get the option value.  XXX: The condition  (curr < ptr + len)
6978 	   does not preclude us from reading from beyond the buffer end,
6979 	   if the Erlang part of the driver specifies its input wrongly!
6980 	*/
6981 	CHKLEN(curr, 4); /* All options need at least 4 bytes */
6982 	switch(eopt)
6983 	{
6984 	/* Local INET options: */
6985 
6986 	case INET_LOPT_BUFFER:
6987 	    desc->bufsz  = get_int32(curr);		curr += 4;
6988 
6989 	    if (desc->bufsz < INET_MIN_BUFFER)
6990 		desc->bufsz = INET_MIN_BUFFER;
6991             desc->flags |= INET_FLG_BUFFER_SET;
6992 	    res = 0;	  /* This does not affect the kernel buffer size */
6993 	    continue;
6994 
6995 	case INET_LOPT_MODE:
6996 	    desc->mode   = get_int32(curr);		curr += 4;
6997 	    res = 0;
6998 	    continue;
6999 
7000 	case INET_LOPT_ACTIVE:
7001 	    desc->active = get_int32(curr);		curr += 4;
7002             if (desc->active == INET_MULTI) {
7003                 long ac = desc->active_count;
7004                 Sint16 nval = get_int16(curr);          curr += 2;
7005 		ac += nval;
7006                 if (ac > INT16_MAX || ac < INT16_MIN)
7007                     return -1;
7008                 desc->active_count += nval;
7009                 if (desc->active_count < 0)
7010                     desc->active_count = 0;
7011                 if (desc->active_count == 0) {
7012                     desc->active = INET_PASSIVE;
7013                     packet_passive_message(desc);
7014                 }
7015             } else
7016                 desc->active_count = 0;
7017 	    res = 0;
7018 	    continue;
7019 
7020 #ifdef HAVE_SETNS
7021 	case INET_LOPT_NETNS:
7022 	{
7023 	    size_t ns_len;
7024 	    ns_len = get_int32(curr);                   curr += 4;
7025 	    CHKLEN(curr, ns_len);
7026 	    if (desc->netns != NULL) FREE(desc->netns);
7027 	    desc->netns = ALLOC(ns_len + 1);
7028 	    memcpy(desc->netns, curr, ns_len);
7029 	    desc->netns[ns_len] = '\0';
7030 	    curr += ns_len;
7031 	}
7032 	    continue;
7033 #endif
7034 
7035 	/* SCTP options and applicable generic INET options: */
7036 
7037 	case SCTP_OPT_RTOINFO:
7038 	{
7039 	    CHKLEN(curr, ASSOC_ID_LEN + 3*4);
7040 	    arg.rtoi.srto_assoc_id = GET_ASSOC_ID(curr);  curr += ASSOC_ID_LEN;
7041 	    arg.rtoi.srto_initial  = get_int32   (curr);  curr += 4;
7042 	    arg.rtoi.srto_max      = get_int32   (curr);  curr += 4;
7043 	    arg.rtoi.srto_min      = get_int32   (curr);  curr += 4;
7044 
7045 	    proto   = IPPROTO_SCTP;
7046 	    type    = SCTP_RTOINFO;
7047 	    arg_ptr = (char*) (&arg.rtoi);
7048 	    arg_sz  = sizeof  ( arg.rtoi);
7049 	    break;
7050 	}
7051 	case SCTP_OPT_ASSOCINFO:
7052 	{
7053 	    CHKLEN(curr, ASSOC_ID_LEN + 2*2 + 3*4);
7054 
7055 	    arg.ap.sasoc_assoc_id    = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN;
7056 	    arg.ap.sasoc_asocmaxrxt  = get_int16   (curr); curr += 2;
7057 	    arg.ap.sasoc_number_peer_destinations =
7058 				       get_int16   (curr); curr += 2;
7059 	    arg.ap.sasoc_peer_rwnd   = get_int32   (curr); curr += 4;
7060 	    arg.ap.sasoc_local_rwnd  = get_int32   (curr); curr += 4;
7061 	    arg.ap.sasoc_cookie_life = get_int32   (curr); curr += 4;
7062 
7063 	    proto   = IPPROTO_SCTP;
7064 	    type    = SCTP_ASSOCINFO;
7065 	    arg_ptr = (char*) (&arg.ap);
7066 	    arg_sz  = sizeof  ( arg.ap);
7067 	    break;
7068 	}
7069 	case SCTP_OPT_INITMSG:
7070 	{
7071 	    CHKLEN(curr, SCTP_GET_INITMSG_LEN);
7072 	    curr  = sctp_get_initmsg (&arg.im, curr);
7073 
7074 	    proto   = IPPROTO_SCTP;
7075 	    type    = SCTP_INITMSG;
7076 	    arg_ptr = (char*) (&arg.im);
7077 	    arg_sz  = sizeof  ( arg.im);
7078 	    break;
7079 	}
7080 	case INET_OPT_LINGER:
7081 	{
7082 	    CHKLEN(curr, 2*4);
7083 	    arg.lin.l_onoff  = get_int32 (curr);  curr += 4;
7084 	    arg.lin.l_linger = get_int32 (curr);  curr += 4;
7085 
7086 	    proto   = SOL_SOCKET;
7087 	    type    = SO_LINGER;
7088 	    arg_ptr = (char*) (&arg.lin);
7089 	    arg_sz  = sizeof  ( arg.lin);
7090 	    break;
7091 	}
7092 	case SCTP_OPT_NODELAY:
7093 	{
7094 	    arg.ival= get_int32 (curr);	  curr += 4;
7095 	    proto   = IPPROTO_SCTP;
7096 	    type    = SCTP_NODELAY;
7097 	    arg_ptr = (char*) (&arg.ival);
7098 	    arg_sz  = sizeof  ( arg.ival);
7099 	    break;
7100 	}
7101 	case INET_OPT_RCVBUF:
7102 	{
7103 	    arg.ival= get_int32 (curr);	  curr += 4;
7104 	    proto   = SOL_SOCKET;
7105 	    type    = SO_RCVBUF;
7106 	    arg_ptr = (char*) (&arg.ival);
7107 	    arg_sz  = sizeof  ( arg.ival);
7108 
7109 	    /* Adjust the size of the user-level recv buffer, so it's not
7110 	       smaller than the kernel one: */
7111 	    if (desc->bufsz <= arg.ival)
7112 		desc->bufsz  = arg.ival;
7113             desc->flags |= INET_FLG_BUFFER_SET;
7114 	    break;
7115 	}
7116 	case INET_OPT_SNDBUF:
7117 	{
7118 	    arg.ival= get_int32 (curr);	  curr += 4;
7119 	    proto   = SOL_SOCKET;
7120 	    type    = SO_SNDBUF;
7121 	    arg_ptr = (char*) (&arg.ival);
7122 	    arg_sz  = sizeof  ( arg.ival);
7123 
7124 	    break;
7125 	}
7126 	case INET_OPT_REUSEADDR:
7127 	{
7128 	    arg.ival= get_int32 (curr);	  curr += 4;
7129 	    proto   = SOL_SOCKET;
7130 	    type    = SO_REUSEADDR;
7131 	    arg_ptr = (char*) (&arg.ival);
7132 	    arg_sz  = sizeof  ( arg.ival);
7133 	    break;
7134 	}
7135 	case INET_OPT_DONTROUTE:
7136 	{
7137 	    arg.ival= get_int32 (curr);	  curr += 4;
7138 	    proto   = SOL_SOCKET;
7139 	    type    = SO_DONTROUTE;
7140 	    arg_ptr = (char*) (&arg.ival);
7141 	    arg_sz  = sizeof  ( arg.ival);
7142 	    break;
7143 	}
7144 	case INET_OPT_PRIORITY:
7145 #	ifdef SO_PRIORITY
7146 	{
7147 	    arg.ival= get_int32 (curr);	  curr += 4;
7148 	    proto   = SOL_SOCKET;
7149 	    type    = SO_PRIORITY;
7150 	    arg_ptr = (char*) (&arg.ival);
7151 	    arg_sz  = sizeof  ( arg.ival);
7152 	    break;
7153 	}
7154 #	else
7155         /* inet_fill_opts always returns a value for this option,
7156          * so we need to ignore it if not implemented, just in case */
7157 	    continue;
7158 #	endif
7159 
7160 	case INET_OPT_TOS:
7161 #	if defined(IP_TOS) && defined(IPPROTO_IP)
7162 	{
7163 	    arg.ival= get_int32 (curr);	  curr += 4;
7164 	    proto   = IPPROTO_IP;
7165 	    type    = IP_TOS;
7166 	    arg_ptr = (char*) (&arg.ival);
7167 	    arg_sz  = sizeof  ( arg.ival);
7168 	    break;
7169 	}
7170 #	else
7171         /* inet_fill_opts always returns a value for this option,
7172          * so we need to ignore it if not implemented, just in case */
7173 	    continue;
7174 #	endif
7175 
7176 #       if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
7177 	case INET_OPT_TCLASS:
7178 	{
7179 	    arg.ival= get_int32 (curr);	  curr += 4;
7180 	    proto   = IPPROTO_IPV6;
7181 	    type    = IPV6_TCLASS;
7182 	    arg_ptr = (char*) (&arg.ival);
7183 	    arg_sz  = sizeof  ( arg.ival);
7184 	    break;
7185 	}
7186 #	endif
7187 
7188 #       if defined(IP_TTL) && defined(IPPROTO_IP)
7189 	case INET_OPT_TTL:
7190 	{
7191 	    arg.ival= get_int32 (curr);	  curr += 4;
7192 	    proto   = IPPROTO_IP;
7193 	    type    = IP_TTL;
7194 	    arg_ptr = (char*) (&arg.ival);
7195 	    arg_sz  = sizeof  ( arg.ival);
7196 	    break;
7197 	}
7198 #	endif
7199 
7200 #	if defined(IP_RECVTOS) && defined(IPPROTO_IP)
7201 	case INET_OPT_RECVTOS:
7202 	{
7203 	    arg.ival= get_int32 (curr);	  curr += 4;
7204 	    proto   = IPPROTO_IP;
7205 	    type    = IP_RECVTOS;
7206 	    arg_ptr = (char*) (&arg.ival);
7207 	    arg_sz  = sizeof  ( arg.ival);
7208             recv_cmsgflags =
7209                 arg.ival ?
7210                 (desc->recv_cmsgflags | INET_CMSG_RECVTOS) :
7211                 (desc->recv_cmsgflags & ~INET_CMSG_RECVTOS);
7212 	    break;
7213 	}
7214 #	endif
7215 
7216 #       if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
7217 	case INET_OPT_RECVTCLASS:
7218 	{
7219 	    arg.ival= get_int32 (curr);	  curr += 4;
7220 	    proto   = IPPROTO_IPV6;
7221 	    type    = IPV6_RECVTCLASS;
7222 	    arg_ptr = (char*) (&arg.ival);
7223 	    arg_sz  = sizeof  ( arg.ival);
7224             recv_cmsgflags =
7225                 arg.ival ?
7226                 (desc->recv_cmsgflags | INET_CMSG_RECVTCLASS) :
7227                 (desc->recv_cmsgflags & ~INET_CMSG_RECVTCLASS);
7228 	    break;
7229 	}
7230 #	endif
7231 
7232 #	if defined(IP_RECVTTL) && defined(IPPROTO_IP)
7233 	case INET_OPT_RECVTTL:
7234 	{
7235 	    arg.ival= get_int32 (curr);	  curr += 4;
7236 	    proto   = IPPROTO_IP;
7237 	    type    = IP_RECVTTL;
7238 	    arg_ptr = (char*) (&arg.ival);
7239 	    arg_sz  = sizeof  ( arg.ival);
7240             recv_cmsgflags =
7241                 arg.ival ?
7242                 (desc->recv_cmsgflags | INET_CMSG_RECVTTL) :
7243                 (desc->recv_cmsgflags & ~INET_CMSG_RECVTTL);
7244 	    break;
7245 	}
7246 #	endif
7247 
7248 
7249 	case INET_OPT_IPV6_V6ONLY:
7250 #       if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
7251 	{
7252 	    arg.ival= get_int32 (curr);   curr += 4;
7253 	    proto   = IPPROTO_IPV6;
7254 	    type    = IPV6_V6ONLY;
7255 	    arg_ptr = (char*) (&arg.ival);
7256 	    arg_sz  = sizeof  ( arg.ival);
7257 	    break;
7258 	}
7259 #       elif defined(__WIN32__) && defined(HAVE_IN6) && defined(AF_INET6)
7260 #           error Here is a fix for Win IPv6 SCTP missing
7261 #       else
7262 	    continue; /* Option not supported -- ignore it */
7263 #       endif
7264 
7265 #ifdef SO_BINDTODEVICE
7266 	case INET_OPT_BIND_TO_DEVICE:
7267 	    arg_sz = get_int32(curr);			curr += 4;
7268 	    CHKLEN(curr, arg_sz);
7269 	    if (arg_sz >= sizeof(arg.ifname))
7270 		return -1;
7271 	    memcpy(arg.ifname, curr, arg_sz);
7272 	    arg.ifname[arg_sz] = '\0';
7273 	    curr += arg_sz;
7274 
7275 	    proto   = SOL_SOCKET;
7276 	    type    = SO_BINDTODEVICE;
7277 	    arg_ptr = (char*) (&arg.ifname);
7278 	    arg_sz  = sizeof  ( arg.ifname);
7279 	    break;
7280 #endif
7281 
7282 	case SCTP_OPT_AUTOCLOSE:
7283 	{
7284 	    arg.ival= get_int32 (curr);	  curr += 4;
7285 	    proto   = IPPROTO_SCTP;
7286 	    type    = SCTP_AUTOCLOSE;
7287 	    arg_ptr = (char*) (&arg.ival);
7288 	    arg_sz  = sizeof  ( arg.ival);
7289 	    break;
7290 	}
7291 	case SCTP_OPT_DISABLE_FRAGMENTS:
7292 	{
7293 	    arg.ival= get_int32 (curr);	  curr += 4;
7294 	    proto   = IPPROTO_SCTP;
7295 	    type    = SCTP_DISABLE_FRAGMENTS;
7296 	    arg_ptr = (char*) (&arg.ival);
7297 	    arg_sz  = sizeof  ( arg.ival);
7298 	    break;
7299 	}
7300 	case SCTP_OPT_I_WANT_MAPPED_V4_ADDR:
7301 	{
7302 	    arg.ival= get_int32 (curr);	  curr += 4;
7303 	    proto   = IPPROTO_SCTP;
7304 	    type    = SCTP_I_WANT_MAPPED_V4_ADDR;
7305 	    arg_ptr = (char*) (&arg.ival);
7306 	    arg_sz  = sizeof  ( arg.ival);
7307 	    break;
7308 	}
7309 	case SCTP_OPT_MAXSEG:
7310 	{
7311 	    arg.ival= get_int32 (curr);	  curr += 4;
7312 	    proto   = IPPROTO_SCTP;
7313 	    type    = SCTP_MAXSEG;
7314 	    arg_ptr = (char*) (&arg.ival);
7315 	    arg_sz  = sizeof  ( arg.ival);
7316 	    break;
7317 	}
7318 	case SCTP_OPT_PRIMARY_ADDR:
7319 	case SCTP_OPT_SET_PEER_PRIMARY_ADDR:
7320 	{
7321 	    ErlDrvSizeT alen;
7322 
7323 	    CHKLEN(curr, ASSOC_ID_LEN);
7324 	    /* XXX: These 2 opts have isomorphic value data structures,
7325 	       "sctp_setpeerprim" and "sctp_prim" (in Solaris 10, the latter
7326 	       is called "sctp_setprim"),  so we grouped them together:
7327 	    */
7328 	    arg.prim.sspp_assoc_id = GET_ASSOC_ID(curr); curr += ASSOC_ID_LEN;
7329 
7330 	    /* Fill in "arg.prim.sspp_addr": */
7331 	    alen  = ptr + len - curr;
7332 	    if (inet_set_faddress
7333 		(desc->sfamily, (inet_address*) (&arg.prim.sspp_addr),
7334 		 &curr,  &alen) != NULL) return -1;
7335 
7336 	    proto = IPPROTO_SCTP;
7337 	    if (eopt == SCTP_OPT_PRIMARY_ADDR)
7338 		type =  SCTP_PRIMARY_ADDR;
7339 	    else
7340 		type =  SCTP_SET_PEER_PRIMARY_ADDR;
7341 
7342 	    arg_ptr  =  (char*) (&arg.prim);
7343 	    arg_sz   =  sizeof  ( arg.prim);
7344 	    break;
7345 	}
7346 	case SCTP_OPT_ADAPTATION_LAYER:
7347 	{
7348 	    /* XXX: do we need to convert the Ind into network byte order??? */
7349 	    arg.ad.ssb_adaptation_ind = sock_htonl (get_int32(curr));  curr += 4;
7350 
7351 	    proto   = IPPROTO_SCTP;
7352 	    type    = SCTP_ADAPTATION_LAYER;
7353 	    arg_ptr = (char*) (&arg.ad);
7354 	    arg_sz  = sizeof  ( arg.ad);
7355 	    break;
7356 	}
7357 	case SCTP_OPT_PEER_ADDR_PARAMS:
7358 	{
7359 	    ErlDrvSizeT alen;
7360 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS
7361 	    int eflags, cflags, hb_enable, hb_disable,
7362 		pmtud_enable, pmtud_disable;
7363 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
7364 	    int
7365 		sackdelay_enable, sackdelay_disable;
7366 #           endif
7367 #           endif
7368 
7369 	    CHKLEN(curr, ASSOC_ID_LEN);
7370 	    arg.pap.spp_assoc_id = GET_ASSOC_ID(curr);	curr += ASSOC_ID_LEN;
7371 
7372 	    /* Fill in "pap.spp_address": */
7373 	    alen  = ptr + len - curr;
7374 	    if (inet_set_faddress
7375 		(desc->sfamily, (inet_address*) (&arg.pap.spp_address),
7376 		 &curr,  &alen) != NULL) return -1;
7377 
7378 	    CHKLEN(curr, 4 + 2 + 3*4);
7379 
7380 	    arg.pap.spp_hbinterval = get_int32(curr);	curr += 4;
7381 	    arg.pap.spp_pathmaxrxt = get_int16(curr);	curr += 2;
7382 
7383 	    /* The following are missing in Solaris 10: */
7384 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_PATHMTU
7385 	    arg.pap.spp_pathmtu    = get_int32(curr);
7386 #           endif
7387 	    curr += 4;
7388 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
7389 	    arg.pap.spp_sackdelay  = get_int32(curr);
7390 #           endif
7391 	    curr += 4;
7392 
7393 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS
7394 	    /* Now re-construct the flags: */
7395 	    eflags	       = get_int32(curr);
7396 	    cflags	       = 0;
7397 
7398 	    hb_enable      = eflags & SCTP_FLAG_HB_ENABLE;
7399 	    hb_disable     = eflags & SCTP_FLAG_HB_DISABLE;
7400 	    if (hb_enable && hb_disable)
7401 		return -1;
7402 	    if (hb_enable)	 		cflags |= SPP_HB_ENABLE;
7403 	    if (hb_disable)	 		cflags |= SPP_HB_DISABLE;
7404 	    if (eflags & SCTP_FLAG_HB_DEMAND)	cflags |= SPP_HB_DEMAND;
7405 
7406 	    pmtud_enable   = eflags & SCTP_FLAG_PMTUD_ENABLE;
7407 	    pmtud_disable  = eflags & SCTP_FLAG_PMTUD_DISABLE;
7408 	    if (pmtud_enable && pmtud_disable)
7409 		return -1;
7410 	    if (pmtud_enable)			cflags |= SPP_PMTUD_ENABLE;
7411 	    if (pmtud_disable)			cflags |= SPP_PMTUD_DISABLE;
7412 
7413 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
7414 	    /* The followings are missing in FreeBSD 7.1 */
7415 	    sackdelay_enable =eflags& SCTP_FLAG_SACDELAY_ENABLE;
7416 	    sackdelay_disable=eflags& SCTP_FLAG_SACDELAY_DISABLE;
7417 	    if (sackdelay_enable && sackdelay_disable)
7418 		return -1;
7419 	    if (sackdelay_enable)		cflags |= SPP_SACKDELAY_ENABLE;
7420 	    if (sackdelay_disable)		cflags |= SPP_SACKDELAY_DISABLE;
7421 #           endif
7422 
7423 	    arg.pap.spp_flags  = cflags;
7424 #	    endif
7425 	    curr += 4;
7426 
7427 	    proto   = IPPROTO_SCTP;
7428 	    type    = SCTP_PEER_ADDR_PARAMS;
7429 	    arg_ptr = (char*) (&arg.pap);
7430 	    arg_sz  = sizeof  ( arg.pap);
7431 	    break;
7432 	}
7433 	case SCTP_OPT_DEFAULT_SEND_PARAM:
7434 	{
7435 	    CHKLEN(curr, SCTP_GET_SENDPARAMS_LEN);
7436 	    curr = sctp_get_sendparams (&arg.sri, curr);
7437 
7438 	    proto   = IPPROTO_SCTP;
7439 	    type    = SCTP_DEFAULT_SEND_PARAM;
7440 	    arg_ptr = (char*) (&arg.sri);
7441 	    arg_sz  = sizeof  ( arg.sri);
7442 	    VALGRIND_MAKE_MEM_DEFINED(arg_ptr, arg_sz); /*suppress "uninitialised bytes"*/
7443 	    break;
7444 	}
7445 	case SCTP_OPT_EVENTS:
7446 	{
7447 	    CHKLEN(curr, 9);
7448 	    /* We do not support "sctp_authentication_event" -- it is not
7449 	       implemented in Linux Kernel SCTP anyway.   Just in case if
7450 	       the above structure has more fields than we support,  zero
7451 	       it out -- the extraneous events will NOT be used:
7452 	    */
7453 	    memset (&arg.es, 0, sizeof(arg.es));
7454 
7455 	    /* The input "buf" must contain the full definition of all the
7456 	       supported event fields, 1 byte per each,   as each event is
7457 	       either explicitly subscribed or cleared:
7458 	    */
7459 	    arg.es.sctp_data_io_event          = get_int8(curr);   curr++;
7460 	    arg.es.sctp_association_event      = get_int8(curr);   curr++;
7461 	    arg.es.sctp_address_event	       = get_int8(curr);   curr++;
7462 	    arg.es.sctp_send_failure_event     = get_int8(curr);   curr++;
7463 	    arg.es.sctp_peer_error_event       = get_int8(curr);   curr++;
7464 	    arg.es.sctp_shutdown_event	       = get_int8(curr);   curr++;
7465 	    arg.es.sctp_partial_delivery_event = get_int8(curr);   curr++;
7466 	    arg.es.sctp_adaptation_layer_event = get_int8(curr);   curr++;
7467 	    /* sctp_authentication_event not implemented */ curr++;
7468 
7469 	    proto   = IPPROTO_SCTP;
7470 	    type    = SCTP_EVENTS;
7471 	    arg_ptr = (char*) (&arg.es);
7472 	    arg_sz  = sizeof  ( arg.es);
7473 	    break;
7474 	}
7475 	/* The following is not available on Solaris 10: */
7476 #	ifdef SCTP_DELAYED_ACK_TIME
7477 	case SCTP_OPT_DELAYED_ACK_TIME:
7478 	{
7479 	    CHKLEN(curr, ASSOC_ID_LEN + 4);
7480 	    arg.av.assoc_id    = GET_ASSOC_ID(curr);	curr += ASSOC_ID_LEN;
7481 	    arg.av.assoc_value = get_int32(curr);	curr += 4;
7482 
7483 	    proto   = IPPROTO_SCTP;
7484 	    type    = SCTP_DELAYED_ACK_TIME;
7485 	    arg_ptr = (char*) (&arg.av);
7486 	    arg_sz  = sizeof  ( arg.av);
7487 	    break;
7488 	}
7489 #	endif
7490 	default:
7491 	    /* XXX: No more supported SCTP options. In particular, authentica-
7492 	       tion options (SCTP_AUTH_CHUNK, SCTP_AUTH_KEY, SCTP_PEER_AUTH_
7493                CHUNKS, SCTP_LOCAL_AUTH_CHUNKS, SCTP_AUTH_SETKEY_ACTIVE)  are
7494 	       not yet implemented in the Linux kernel,  hence not supported
7495 	       here.  Also not supported are SCTP_HMAC_IDENT, as well as any
7496 	       "generic" options except "INET_LOPT_MODE".    Raise an error:
7497 	    */
7498 	    return -1;
7499 	}
7500 #if  defined(IP_TOS) && defined(IPPROTO_IP)             \
7501     && defined(SO_PRIORITY) && !defined(__WIN32__)
7502 	res = setopt_prio_tos_trick (desc->s, proto, type, arg_ptr, arg_sz, 1);
7503 #else
7504 	res = sock_setopt	    (desc->s, proto, type, arg_ptr, arg_sz);
7505 #endif
7506 	/* The return values of "sock_setopt" can only be 0 or -1: */
7507 	ASSERT(res == 0 || res == -1);
7508         if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
7509 	if (res == -1)
7510 	{  /* Got an error, DO NOT continue with other options. However, on
7511 	      Solaris 10, we DO allow SO_SNDBUF and SO_RCVBUF to fail, assu-
7512 	      min that the default kernel versions are good enough:
7513 	   */
7514 #	   ifdef SOLARIS10
7515 	   if (type != SO_SNDBUF && type != SO_RCVBUF)
7516 #	   endif
7517 	   return res;
7518 	}
7519     }
7520     /* If we got here, all "sock_setopt"s above were successful:   */
7521     if (IS_OPEN(desc) && desc->active != old_active) {
7522 	sock_select(desc, (FD_READ|FD_CLOSE), (desc->active > 0));
7523     }
7524     return 0;
7525 #   undef CHKLEN
7526 }
7527 #endif /* HAVE_SCTP */
7528 
7529 #ifndef __WIN32__
put_cmsg_int32(struct cmsghdr * cmsg,char * ptr)7530 static void put_cmsg_int32(struct cmsghdr *cmsg, char *ptr) {
7531     union u {
7532         byte uint8;
7533         Uint16 uint16;
7534         Uint32 uint32;
7535         Uint64 uint64;
7536     } *p;
7537     p = (union u*) CMSG_DATA(cmsg);
7538     switch (LEN_CMSG_DATA(cmsg) * CHAR_BIT) {
7539     case 8:
7540         put_int32((Uint32) p->uint8, ptr);
7541         break;
7542     case 16:
7543         put_int32((Uint32) p->uint16, ptr);
7544         break;
7545     case 32:
7546         put_int32(p->uint32, ptr);
7547         break;
7548     case 64:
7549         put_int32((Uint32) p->uint64, ptr);
7550         break;
7551     default:
7552         put_int32(0, ptr);
7553     }
7554     return;
7555 }
7556 #endif
7557 
7558 /* load all option values into the buf and reply
7559 ** return total length of reply filled into ptr
7560 ** ptr should point to a buffer with 9*len +1 to be safe!!
7561 */
7562 
inet_fill_opts(inet_descriptor * desc,char * buf,ErlDrvSizeT len,char ** dest,ErlDrvSizeT destlen)7563 static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
7564 				   char* buf, ErlDrvSizeT len,
7565 				   char** dest, ErlDrvSizeT destlen)
7566 {
7567     int type;
7568     int proto;
7569     int opt;
7570     struct linger li_val;
7571     int ival;
7572     char* arg_ptr;
7573     unsigned int arg_sz;
7574     char *ptr = NULL;
7575     ErlDrvSizeT dest_used = 0;
7576     ErlDrvSizeT dest_allocated = destlen;
7577     char *orig_dest = *dest;
7578 #ifdef SO_BINDTODEVICE
7579     char ifname[IFNAMSIZ];
7580 #endif
7581 
7582     /* Ptr is a name parameter */
7583 #define RETURN_ERROR()				\
7584     do {					\
7585 	if (dest_allocated > destlen) {		\
7586 	    FREE(*dest);			\
7587 	    *dest = orig_dest;			\
7588 	}					\
7589 	return -1;				\
7590     } while(0)
7591 
7592 #define PLACE_FOR(Size,Ptr)						   \
7593     do {								   \
7594 	ErlDrvSizeT need = dest_used + (Size);					   \
7595 	if (need > INET_MAX_OPT_BUFFER) {				   \
7596 	    RETURN_ERROR();						   \
7597 	}								   \
7598 	if (need > dest_allocated) {					   \
7599 	    char *new_buffer;						   \
7600 	    if (dest_allocated == destlen) {				   \
7601 		new_buffer = ALLOC((dest_allocated = need + 10));	   \
7602 		memcpy(new_buffer,*dest,dest_used);			   \
7603 	    } else {							   \
7604 		new_buffer = REALLOC(*dest, (dest_allocated = need + 10)); \
7605 	    }								   \
7606 	    *dest = new_buffer;						   \
7607 	}								   \
7608 	(Ptr) = (*dest) + dest_used;					   \
7609 	dest_used = need;						   \
7610     } while (0)
7611 
7612     /* Ptr is a name parameter */
7613 #define TRUNCATE_TO(Size,Ptr)				\
7614     do {						\
7615 	ErlDrvSizeT new_need = ((Ptr) - (*dest)) + (Size);	\
7616 	if (new_need > dest_used) {			\
7617 	    erts_exit(ERTS_ERROR_EXIT,"Internal error in inet_drv, "	\
7618 		     "miscalculated buffer size");	\
7619 	}						\
7620 	dest_used = new_need;				\
7621     } while(0)
7622 
7623 
7624     PLACE_FOR(1,ptr);
7625     *ptr = INET_REP_OK;
7626 
7627     while(len--) {
7628 	opt = *buf++;
7629 	proto = SOL_SOCKET;
7630 	ival = 0; /* Windows Vista needs this (only writes part of it) */
7631 	arg_sz = sizeof(ival);
7632 	arg_ptr = (char*) &ival;
7633 
7634 	PLACE_FOR(5,ptr);
7635 
7636 	switch(opt) {
7637 	case INET_LOPT_BUFFER:
7638 	    *ptr++ = opt;
7639 	    put_int32(desc->bufsz, ptr);
7640 	    continue;
7641 	case INET_LOPT_HEADER:
7642 	    *ptr++ = opt;
7643 	    put_int32(desc->hsz, ptr);
7644 	    continue;
7645 	case INET_LOPT_MODE:
7646 	    *ptr++ = opt;
7647 	    put_int32(desc->mode, ptr);
7648 	    continue;
7649 	case INET_LOPT_DELIVER:
7650 	    *ptr++ = opt;
7651 	    put_int32(desc->deliver, ptr);
7652 	    continue;
7653 	case INET_LOPT_ACTIVE:
7654 	    *ptr++ = opt;
7655 	    put_int32(desc->active, ptr);
7656             if (desc->active == INET_MULTI) {
7657                 PLACE_FOR(2,ptr);
7658                 put_int16(desc->active_count, ptr);
7659                 ptr += 2;
7660             }
7661 	    continue;
7662 	case INET_LOPT_PACKET:
7663 	    *ptr++ = opt;
7664 	    put_int32(desc->htype, ptr);
7665 	    continue;
7666 	case INET_LOPT_PACKET_SIZE:
7667 	    *ptr++ = opt;
7668 	    put_int32(desc->psize, ptr);
7669 	    continue;
7670 	case INET_LOPT_EXITONCLOSE:
7671 	    *ptr++ = opt;
7672 	    put_int32(desc->exitf, ptr);
7673 	    continue;
7674 
7675 	case INET_LOPT_TCP_HIWTRMRK:
7676 	    if (desc->stype == SOCK_STREAM) {
7677 		*ptr++ = opt;
7678 		ival = ((tcp_descriptor*)desc)->high;
7679 		put_int32(ival, ptr);
7680 	    } else {
7681 		TRUNCATE_TO(0,ptr);
7682 	    }
7683 	    continue;
7684 
7685 	case INET_LOPT_TCP_LOWTRMRK:
7686 	    if (desc->stype == SOCK_STREAM) {
7687 		*ptr++ = opt;
7688 		ival = ((tcp_descriptor*)desc)->low;
7689 		put_int32(ival, ptr);
7690 	    } else {
7691 		TRUNCATE_TO(0,ptr);
7692 	    }
7693 	    continue;
7694 
7695 	case INET_LOPT_MSGQ_HIWTRMRK: {
7696 	    ErlDrvSizeT high = ERL_DRV_BUSY_MSGQ_READ_ONLY;
7697 	    *ptr++ = opt;
7698 	    erl_drv_busy_msgq_limits(desc->port, NULL, &high);
7699 	    ival = high > INT_MAX ? INT_MAX : (int) high;
7700 	    put_int32(ival, ptr);
7701 	    continue;
7702 	}
7703 
7704 	case INET_LOPT_MSGQ_LOWTRMRK: {
7705 	    ErlDrvSizeT low = ERL_DRV_BUSY_MSGQ_READ_ONLY;
7706 	    *ptr++ = opt;
7707 	    erl_drv_busy_msgq_limits(desc->port, &low, NULL);
7708 	    ival = low > INT_MAX ? INT_MAX : (int) low;
7709 	    put_int32(ival, ptr);
7710 	    continue;
7711 	}
7712 
7713 	case INET_LOPT_TCP_SEND_TIMEOUT:
7714 	    if (desc->stype == SOCK_STREAM) {
7715 		*ptr++ = opt;
7716 		ival = ((tcp_descriptor*)desc)->send_timeout;
7717 		put_int32(ival, ptr);
7718 	    } else {
7719 		TRUNCATE_TO(0,ptr);
7720 	    }
7721 	    continue;
7722 
7723 	case INET_LOPT_TCP_SEND_TIMEOUT_CLOSE:
7724 	    if (desc->stype == SOCK_STREAM) {
7725 		*ptr++ = opt;
7726 		ival = ((tcp_descriptor*)desc)->send_timeout_close;
7727 		put_int32(ival, ptr);
7728 	    } else {
7729 		TRUNCATE_TO(0,ptr);
7730 	    }
7731 	    continue;
7732 
7733 	case INET_LOPT_TCP_DELAY_SEND:
7734 	    if (desc->stype == SOCK_STREAM) {
7735 		*ptr++ = opt;
7736 		ival = !!(((tcp_descriptor*)desc)->tcp_add_flags & TCP_ADDF_DELAY_SEND);
7737 		put_int32(ival, ptr);
7738 	    } else {
7739 		TRUNCATE_TO(0,ptr);
7740 	    }
7741 	    continue;
7742 
7743 #ifdef HAVE_UDP
7744 	case INET_LOPT_UDP_READ_PACKETS:
7745 	    if (desc->stype == SOCK_DGRAM) {
7746 		*ptr++ = opt;
7747 		ival = ((udp_descriptor*)desc)->read_packets;
7748 		put_int32(ival, ptr);
7749 	    } else {
7750 		TRUNCATE_TO(0,ptr);
7751 	    }
7752 	    continue;
7753 #endif
7754 
7755 #ifdef HAVE_SETNS
7756 	case INET_LOPT_NETNS:
7757 	    if (desc->netns != NULL) {
7758 		size_t netns_len;
7759 		netns_len = strlen(desc->netns);
7760 		*ptr++ = opt;
7761 		put_int32(netns_len, ptr);
7762 		PLACE_FOR(netns_len, ptr);
7763 		memcpy(ptr, desc->netns, netns_len);
7764 		ptr += netns_len;
7765 	    } else {
7766 		TRUNCATE_TO(0,ptr);
7767 	    }
7768 	    continue;
7769 #endif
7770 
7771 	case INET_LOPT_TCP_SHOW_ECONNRESET:
7772 	    if (desc->sprotocol == IPPROTO_TCP) {
7773 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
7774 		*ptr++ = opt;
7775 		ival = !!(tdesc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET);
7776 		put_int32(ival, ptr);
7777 	    } else {
7778 		TRUNCATE_TO(0,ptr);
7779 	    }
7780 	    continue;
7781 
7782 	case INET_OPT_PRIORITY:
7783 #ifdef SO_PRIORITY
7784 	    type = SO_PRIORITY;
7785 	    break;
7786 #else
7787 	    *ptr++ = opt;
7788 	    put_int32(0, ptr);
7789 	    continue;
7790 #endif
7791 	case INET_OPT_TOS:
7792 #if defined(IP_TOS) && defined(IPPROTO_IP)
7793 	    proto = IPPROTO_IP;
7794 	    type = IP_TOS;
7795 	    break;
7796 #else
7797 	    *ptr++ = opt;
7798 	    put_int32(0, ptr);
7799 	    continue;
7800 #endif
7801 	case INET_OPT_TCLASS:
7802 #if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
7803 	    proto = IPPROTO_IPV6;
7804 	    type = IPV6_TCLASS;
7805 	    break;
7806 #else
7807 	    TRUNCATE_TO(0,ptr);
7808 	    continue;
7809 #endif
7810 	case INET_OPT_TTL:
7811 #if defined(IP_TTL) && defined(IPPROTO_IP)
7812 	    proto = IPPROTO_IP;
7813 	    type = IP_TTL;
7814 	    break;
7815 #else
7816 	    TRUNCATE_TO(0,ptr);
7817 	    continue;
7818 #endif
7819 	case INET_OPT_RECVTOS:
7820 #if defined(IP_RECVTOS) && defined(IPPROTO_IP)
7821 	    proto = IPPROTO_IP;
7822 	    type = IP_RECVTOS;
7823 	    break;
7824 #else
7825 	    TRUNCATE_TO(0,ptr);
7826 	    continue;
7827 #endif
7828 	case INET_OPT_RECVTCLASS:
7829 #if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
7830 	    proto = IPPROTO_IPV6;
7831 	    type = IPV6_RECVTCLASS;
7832 	    break;
7833 #else
7834 	    TRUNCATE_TO(0,ptr);
7835 	    continue;
7836 #endif
7837 	case INET_OPT_RECVTTL:
7838 #if defined(IP_RECVTTL) && defined(IPPROTO_IP)
7839 	    proto = IPPROTO_IP;
7840 	    type = IP_RECVTTL;
7841 	    break;
7842 #else
7843 	    TRUNCATE_TO(0,ptr);
7844 	    continue;
7845 #endif
7846 	case INET_OPT_REUSEADDR:
7847 	    type = SO_REUSEADDR;
7848 	    break;
7849 	case INET_OPT_KEEPALIVE:
7850 	    type = SO_KEEPALIVE;
7851 	    break;
7852 	case INET_OPT_DONTROUTE:
7853 	    type = SO_DONTROUTE;
7854 	    break;
7855 	case INET_OPT_BROADCAST:
7856 	    type = SO_BROADCAST;
7857 	    break;
7858 	case INET_OPT_OOBINLINE:
7859 	    type = SO_OOBINLINE;
7860 	    break;
7861 	case INET_OPT_SNDBUF:
7862 	    type = SO_SNDBUF;
7863 	    break;
7864 	case INET_OPT_RCVBUF:
7865 	    type = SO_RCVBUF;
7866 	    break;
7867 	case TCP_OPT_NODELAY:
7868 	    proto = IPPROTO_TCP;
7869 	    type = TCP_NODELAY;
7870 	    break;
7871 	case TCP_OPT_NOPUSH:
7872 #if defined(INET_TCP_NOPUSH)
7873 	    proto = IPPROTO_TCP;
7874 	    type = INET_TCP_NOPUSH;
7875 	    break;
7876 #else
7877 	    *ptr++ = opt;
7878 	    put_int32(0, ptr);
7879 	    continue;
7880 #endif
7881 
7882 #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP)
7883 	case UDP_OPT_MULTICAST_TTL:
7884 	    proto = IPPROTO_IP;
7885 	    type = IP_MULTICAST_TTL;
7886 	    break;
7887 	case UDP_OPT_MULTICAST_LOOP:
7888 	    proto = IPPROTO_IP;
7889 	    type = IP_MULTICAST_LOOP;
7890 	    break;
7891 	case UDP_OPT_MULTICAST_IF:
7892 	    proto = IPPROTO_IP;
7893 	    type = IP_MULTICAST_IF;
7894 	    break;
7895 	case INET_OPT_LINGER:
7896 	    arg_sz = sizeof(li_val);
7897 	    sys_memzero((void *) &li_val, sizeof(li_val));
7898 	    arg_ptr = (char*) &li_val;
7899 	    type = SO_LINGER;
7900 	    break;
7901 #endif /* defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) */
7902 
7903 	case INET_OPT_IPV6_V6ONLY:
7904 #if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
7905 	    proto = IPPROTO_IPV6;
7906 	    type = IPV6_V6ONLY;
7907 	    break;
7908 #elif defined(__WIN32__) && defined(HAVE_IN6) && defined(AF_INET6)
7909 	    /* Fake reading 'true' */
7910 	    *ptr++ = opt;
7911 	    put_int32(1, ptr);
7912 	    ptr += 4;
7913 	    continue;
7914 #else
7915 	    TRUNCATE_TO(0,ptr);
7916 	    continue; /* skip - no result */
7917 #endif
7918 
7919 	case INET_OPT_RAW:
7920 	    {
7921 		int data_provided;
7922 		/* Raw options are icky, handle directly... */
7923 		if (len < 13) {
7924 		    RETURN_ERROR();
7925 		}
7926 		len -= 13;
7927 		proto = get_int32(buf);
7928 		buf += 4;
7929 		type = get_int32(buf);
7930 		buf += 4;
7931 		data_provided = (int) *buf++;
7932 		arg_sz = get_int32(buf);
7933 		if (arg_sz > INET_MAX_OPT_BUFFER) {
7934 		    RETURN_ERROR();
7935 		}
7936 		buf += 4;
7937 		TRUNCATE_TO(0,ptr);
7938 		PLACE_FOR(13 + arg_sz,ptr);
7939 		arg_ptr = ptr + 13;
7940 		if (data_provided) {
7941 		    if (len < arg_sz) {
7942 			RETURN_ERROR();
7943 		    }
7944 		    memcpy(arg_ptr,buf,arg_sz);
7945 		    buf += arg_sz;
7946 		    len -= arg_sz;
7947 		}
7948 		if (IS_SOCKET_ERROR(sock_getopt(desc->s,proto,type,
7949 						arg_ptr,&arg_sz))) {
7950 		    TRUNCATE_TO(0,ptr);
7951 		    continue;
7952 		}
7953 		TRUNCATE_TO(arg_sz + 13,ptr);
7954 		*ptr++ = opt;
7955 		put_int32(proto,ptr);
7956 		ptr += 4;
7957 		put_int32(type,ptr);
7958 		ptr += 4;
7959 		put_int32(arg_sz,ptr);
7960 		continue;
7961 	    }
7962 
7963 #ifdef SO_BINDTODEVICE
7964 	case INET_OPT_BIND_TO_DEVICE:
7965 	    arg_sz = sizeof(ifname);
7966 	    TRUNCATE_TO(0,ptr);
7967 	    PLACE_FOR(5 + arg_sz,ptr);
7968 	    arg_ptr = ptr + 5;
7969 	    if (IS_SOCKET_ERROR(sock_getopt(desc->s,SOL_SOCKET,SO_BINDTODEVICE,
7970 						arg_ptr,&arg_sz))) {
7971 		    TRUNCATE_TO(0,ptr);
7972 		    continue;
7973 		}
7974 	    arg_sz = my_strnlen(arg_ptr, arg_sz);
7975 	    TRUNCATE_TO(arg_sz + 5,ptr);
7976 	    *ptr++ = opt;
7977 	    put_int32(arg_sz,ptr);
7978 	    ptr += arg_sz;
7979 	    continue;
7980 #endif
7981 
7982 #ifndef __WIN32__
7983             /* Winsock does not have struct cmsghdr */
7984         case INET_OPT_PKTOPTIONS: {
7985             struct cmsghdr *cmsg, *cmsg_top;
7986             SOCKLEN_T cmsg_sz;
7987             union {
7988                 /* Ensure alignment */
7989                 struct cmsghdr hdr;
7990                 /* Room for (IP_TOS | IPV6_TCLASS) + IP_TTL */
7991                 char buf[2*CMSG_SPACE(sizeof(int))];
7992             } cmsgbuf;
7993             /* Select between IPv4 or IPv6 PKTOPTIONS
7994              * depending on the socket protocol family
7995              */
7996             switch (desc->sfamily) {
7997 #if defined(IPPROTO_IP) && defined(IP_PKTOPTIONS)
7998             case AF_INET: {
7999                 proto = IPPROTO_IP;
8000                 type = IP_PKTOPTIONS;
8001             }
8002                 break;
8003 #endif
8004 #if defined(IPPROTO_IPV6) && defined(IPV6_PKTOPTIONS) && defined(AF_INET6)
8005             case AF_INET6: {
8006                 proto = IPPROTO_IPV6;
8007                 type = IPV6_PKTOPTIONS;
8008             }
8009                 break;
8010 #endif
8011             default: {
8012                 RETURN_ERROR();
8013             }
8014             } /* switch */
8015             TRUNCATE_TO(0, ptr);
8016             /* Fetch a cmsg buffer from the socket */
8017             cmsg_sz = sizeof(cmsgbuf.buf);
8018             if (IS_SOCKET_ERROR(sock_getopt(desc->s, proto, type,
8019                                             cmsgbuf.buf, &cmsg_sz))) {
8020                 continue;
8021             }
8022             /* Reply with Opt/8, Length/32, [COpt/8, Value/32]*
8023              * i.e opt, total length and then all returned
8024              * cmsg options and values
8025              */
8026             PLACE_FOR(1+4, ptr);
8027             *ptr++ = opt;
8028             arg_ptr = ptr; /* Where to put total length */
8029             arg_sz = 0; /* Total length */
8030             for (cmsg_top = (struct cmsghdr*)(cmsgbuf.buf + cmsg_sz),
8031                      cmsg = (struct cmsghdr*)cmsgbuf.buf;
8032                  cmsg < cmsg_top;
8033                  cmsg = NXT_CMSG_HDR(cmsg)) {
8034 #define PUT_CMSG_INT32(CMSG_LEVEL, CMSG_TYPE, OPT)      \
8035                 if ((cmsg->cmsg_level == CMSG_LEVEL) && \
8036                     (cmsg->cmsg_type == CMSG_TYPE)) {   \
8037                     PLACE_FOR(1+4, ptr);                \
8038                     *ptr++ = OPT;                       \
8039                     put_cmsg_int32(cmsg, ptr);          \
8040                     arg_sz += 1+4;                      \
8041                     continue;                           \
8042                 }
8043 #if defined(IPPROTO_IP) && defined(IP_TOS)
8044                 PUT_CMSG_INT32(IPPROTO_IP, IP_TOS, INET_OPT_TOS);
8045 #endif
8046 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
8047                 PUT_CMSG_INT32(IPPROTO_IPV6, IPV6_TCLASS, INET_OPT_TCLASS);
8048 #endif
8049 #if defined(IPPROTO_IP) && defined(IP_TTL)
8050                 PUT_CMSG_INT32(IPPROTO_IP, IP_TTL, INET_OPT_TTL);
8051 #endif
8052                 /* BSD uses the RECV* names in CMSG fields */
8053 #if defined(IPPROTO_IP) && defined(IP_RECVTOS)
8054                 PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTOS, INET_OPT_TOS);
8055 #endif
8056 #if defined(IPPROTO_IPV6) && defined(IPV6_RECVTCLASS)
8057                 PUT_CMSG_INT32(IPPROTO_IPV6, IPV6_RECVTCLASS, INET_OPT_TCLASS);
8058 #endif
8059 #if defined(IPPROTO_IP) && defined(IP_RECVTTL)
8060                 PUT_CMSG_INT32(IPPROTO_IP, IP_RECVTTL, INET_OPT_TTL);
8061 #endif
8062 #undef PUT_CMSG_INT32
8063             }
8064             put_int32(arg_sz, arg_ptr); /* Put total length */
8065             continue;
8066         }
8067 #endif /* #ifdef __WIN32__ */
8068 
8069 	default:
8070 	    RETURN_ERROR();
8071 	}
8072 	/* We have 5 bytes allocated to ptr */
8073 	if (IS_SOCKET_ERROR(sock_getopt(desc->s,proto,type,arg_ptr,&arg_sz))) {
8074 	    TRUNCATE_TO(0,ptr);
8075 	    continue;
8076 	}
8077 	*ptr++ = opt;
8078 	if (arg_ptr == (char*)&ival) {
8079 	    put_int32(ival, ptr);
8080 	}
8081 	else {
8082 	    put_int32(((Uint32) li_val.l_onoff), ptr);
8083 	    PLACE_FOR(4,ptr);
8084 	    put_int32(((Uint32) li_val.l_linger), ptr);
8085 	}
8086     }
8087     return (dest_used);
8088 #undef PLACE_FOR
8089 #undef TRUNCATE_TO
8090 #undef RETURN_ERROR
8091 }
8092 
8093 #ifdef HAVE_SCTP
8094 #define LOAD_PADDRINFO_CNT                                            \
8095         (2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT + LOAD_INET_GET_ADDRESS_CNT + \
8096 	 4*LOAD_INT_CNT + LOAD_TUPLE_CNT)
load_paddrinfo(ErlDrvTermData * spec,int i,inet_descriptor * desc,struct sctp_paddrinfo * pai)8097 static int load_paddrinfo (ErlDrvTermData * spec, int i,
8098 			   inet_descriptor* desc, struct sctp_paddrinfo* pai)
8099 {
8100     i = LOAD_ATOM	(spec, i, am_sctp_paddrinfo);
8101     i = LOAD_ASSOC_ID	(spec, i, pai->spinfo_assoc_id);
8102     i = load_inet_get_address(spec, i, desc, &pai->spinfo_address);
8103     switch(pai->spinfo_state)
8104     {
8105     case SCTP_ACTIVE:
8106 	i = LOAD_ATOM	(spec, i, am_active);
8107 	break;
8108     case SCTP_INACTIVE:
8109 	i = LOAD_ATOM	(spec, i, am_inactive);
8110 	break;
8111 #   if HAVE_DECL_SCTP_UNCONFIRMED
8112     case SCTP_UNCONFIRMED:
8113       i = LOAD_ATOM	(spec, i, am_unconfirmed);
8114       break;
8115 #   endif
8116     default:
8117       i = LOAD_ATOM	(spec, i, am_undefined);
8118     }
8119     i = LOAD_INT	(spec, i, pai->spinfo_cwnd);
8120     i = LOAD_INT	(spec, i, pai->spinfo_srtt);
8121     i = LOAD_INT	(spec, i, pai->spinfo_rto );
8122     i = LOAD_INT	(spec, i, pai->spinfo_mtu );
8123     /* Close up the record: */
8124     i = LOAD_TUPLE	(spec, i, 8);
8125     return i;
8126 }
8127 
8128 
8129 /*
8130 **  "sctp_fill_opts":   Returns {ok, Results}, or an error:
8131 */
sctp_fill_opts(inet_descriptor * desc,char * buf,ErlDrvSizeT buflen,char ** dest,ErlDrvSizeT destlen)8132 static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
8133 				   char* buf, ErlDrvSizeT buflen,
8134 				   char** dest, ErlDrvSizeT destlen)
8135 {
8136     /* In contrast to the generic "inet_fill_opts", the output here is
8137        represented by tuples/records, which are formed in the "spec":
8138     */
8139     ErlDrvTermData *spec;
8140     int i      = 0;
8141     int length = 0; /* Number of result list entries */
8142 
8143     int spec_allocated = PACKET_ERL_DRV_TERM_DATA_LEN;
8144     spec = ALLOC(sizeof(* spec) * spec_allocated);
8145 
8146 #   define RETURN_ERROR(Spec, Errno) \
8147     do {                    \
8148 	FREE(Spec);        \
8149 	return (Errno);     \
8150     } while(0)
8151 
8152     /* Spec is a name parmeter */
8153 #   define PLACE_FOR(Spec, Index, N)                            \
8154     do {                                                        \
8155 	int need;                                               \
8156 	if ((Index) > spec_allocated) {                         \
8157 	    erts_exit(ERTS_ERROR_EXIT,"Internal error in inet_drv, "           \
8158 		     "miscalculated buffer size");              \
8159 	}                                                       \
8160 	need = (Index) + (N);                                   \
8161 	if (need > INET_MAX_OPT_BUFFER/sizeof(ErlDrvTermData)) {\
8162 	    RETURN_ERROR((Spec), -ENOMEM);                      \
8163 	}                                                       \
8164 	if (need > spec_allocated) {                            \
8165 	    (Spec) = REALLOC((Spec),                            \
8166 			     sizeof(* (Spec))                   \
8167 			     * (spec_allocated = need + 20));   \
8168 	}                                                       \
8169     } while (0)
8170 
8171     PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_PORT_CNT);
8172     i = LOAD_ATOM (spec, i, am_inet_reply);
8173     i = LOAD_PORT (spec, i, desc->dport);
8174     i = LOAD_ATOM (spec, i, am_ok);
8175 
8176     while (buflen > 0) {
8177 	int eopt = *buf;   /* "eopt" is 1-byte encoded */
8178 	buf ++; buflen --;
8179 
8180 	switch(eopt)
8181 	{
8182 	/* Local options allowed for SCTP. For TCP and UDP, the values of
8183 	   these options are returned via "res" using integer encoding,
8184 	   but here, we encode them as proper terms the same way as we do
8185 	   it for all other SCTP options:
8186 	*/
8187 	case INET_LOPT_BUFFER:
8188 	{
8189 	    PLACE_FOR(spec, i, LOAD_ATOM_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT);
8190 	    i = LOAD_ATOM (spec, i, am_buffer);
8191 	    i = LOAD_INT  (spec, i, desc->bufsz);
8192 	    i = LOAD_TUPLE(spec, i, 2);
8193 	    break;
8194 	}
8195 	case INET_LOPT_MODE:
8196 	{
8197 	    PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT);
8198 	    i = LOAD_ATOM (spec, i, am_mode);
8199 	    switch (desc->mode)
8200 	    {
8201 	    	case INET_MODE_LIST  :
8202 		{ i = LOAD_ATOM (spec, i, am_list);   break; }
8203 
8204 		case INET_MODE_BINARY:
8205 		{ i = LOAD_ATOM (spec, i, am_binary); break; }
8206 
8207 		default: ASSERT (0);
8208 	    }
8209 	    i = LOAD_TUPLE (spec, i, 2);
8210 	    break;
8211 	}
8212 	case INET_LOPT_ACTIVE:
8213 	{
8214             if (desc->active == INET_MULTI)
8215                 PLACE_FOR(spec, i, LOAD_ATOM_CNT + LOAD_INT_CNT + LOAD_TUPLE_CNT);
8216             else
8217                 PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_TUPLE_CNT);
8218 	    i = LOAD_ATOM (spec, i, am_active);
8219 	    switch (desc->active)
8220 	    {
8221 		case INET_ACTIVE :
8222 		{ i = LOAD_ATOM (spec, i, am_true);  break; }
8223 
8224 		case INET_PASSIVE:
8225 		{ i = LOAD_ATOM (spec, i, am_false); break; }
8226 
8227 		case INET_ONCE   :
8228 		{ i = LOAD_ATOM (spec, i, am_once);  break; }
8229 
8230                 case INET_MULTI  :
8231                 { i = LOAD_INT(spec, i, desc->active_count); break; }
8232 
8233 		default: ASSERT (0);
8234 	    }
8235 	    i = LOAD_TUPLE (spec, i, 2);
8236 	    break;
8237 	}
8238 
8239 #ifdef HAVE_SETNS
8240 	case INET_LOPT_NETNS:
8241 	    if (desc->netns != NULL) {
8242 		PLACE_FOR
8243 		    (spec, i,
8244 		     LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT);
8245 		i = LOAD_ATOM (spec, i, am_netns);
8246 		i = LOAD_BUF2BINARY
8247 		    (spec, i, desc->netns, strlen(desc->netns));
8248 		i = LOAD_TUPLE (spec, i, 2);
8249 		break;
8250 	    }
8251 	    else
8252 		continue; /* Ignore */
8253 #endif
8254 
8255 	/* SCTP and generic INET options: */
8256 
8257 	case SCTP_OPT_RTOINFO:
8258 	{
8259 	    struct       sctp_rtoinfo rti;
8260 	    unsigned int sz  = sizeof(rti);
8261 
8262 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8263 	    rti.srto_assoc_id = GET_ASSOC_ID(buf);
8264 	    buf    += ASSOC_ID_LEN;
8265 	    buflen -= ASSOC_ID_LEN;
8266 
8267 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_RTOINFO,
8268 			    &rti, &sz) < 0) continue;
8269 	    /* Fill in the response: */
8270 	    PLACE_FOR(spec, i,
8271 		      2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT +
8272 		      3*LOAD_INT_CNT + 2*LOAD_TUPLE_CNT);
8273 	    i = LOAD_ATOM	(spec, i, am_sctp_rtoinfo);
8274 	    i = LOAD_ATOM	(spec, i, am_sctp_rtoinfo);
8275 	    i = LOAD_ASSOC_ID	(spec, i, rti.srto_assoc_id);
8276 	    i = LOAD_INT	(spec, i, rti.srto_initial);
8277 	    i = LOAD_INT	(spec, i, rti.srto_max);
8278 	    i = LOAD_INT	(spec, i, rti.srto_min);
8279 	    i = LOAD_TUPLE	(spec, i, 5);
8280 	    i = LOAD_TUPLE (spec, i, 2);
8281 	    break;
8282 	}
8283 	case SCTP_OPT_ASSOCINFO:
8284 	{
8285 	    struct       sctp_assocparams ap;
8286 	    unsigned int sz  = sizeof(ap);
8287 
8288 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8289 	    ap.sasoc_assoc_id = GET_ASSOC_ID(buf);
8290 	    buf    += ASSOC_ID_LEN;
8291 	    buflen -= ASSOC_ID_LEN;
8292 
8293 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_ASSOCINFO,
8294 			    &ap, &sz) < 0) continue;
8295 	    /* Fill in the response: */
8296 	    PLACE_FOR(spec, i,
8297 		      2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT +
8298 		      5*LOAD_INT_CNT + 2*LOAD_TUPLE_CNT);
8299 	    i = LOAD_ATOM	(spec, i, am_sctp_associnfo);
8300 	    i = LOAD_ATOM	(spec, i, am_sctp_assocparams);
8301 	    i = LOAD_ASSOC_ID	(spec, i, ap.sasoc_assoc_id);
8302 	    i = LOAD_INT	(spec, i, ap.sasoc_asocmaxrxt);
8303 	    i = LOAD_INT	(spec, i, ap.sasoc_number_peer_destinations);
8304 	    i = LOAD_INT	(spec, i, ap.sasoc_peer_rwnd);
8305 	    i = LOAD_INT	(spec, i, ap.sasoc_local_rwnd);
8306 	    i = LOAD_INT	(spec, i, ap.sasoc_cookie_life);
8307 	    i = LOAD_TUPLE	(spec, i, 7);
8308 	    i = LOAD_TUPLE	(spec, i, 2);
8309 	    break;
8310 	}
8311 	case SCTP_OPT_INITMSG:
8312 	{
8313 	    struct       sctp_initmsg im;
8314 	    unsigned int sz = sizeof(im);
8315 
8316 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_INITMSG,
8317 			    &im, &sz) < 0) continue;
8318 	    /* Fill in the response: */
8319 	    PLACE_FOR(spec, i,
8320 		      2*LOAD_ATOM_CNT +
8321 		      4*LOAD_INT_CNT + 2*LOAD_TUPLE_CNT);
8322 	    i = LOAD_ATOM	(spec, i, am_sctp_initmsg);
8323 	    i = LOAD_ATOM	(spec, i, am_sctp_initmsg);
8324 	    i = LOAD_INT	(spec, i, im.sinit_num_ostreams);
8325 	    i = LOAD_INT	(spec, i, im.sinit_max_instreams);
8326 	    i = LOAD_INT	(spec, i, im.sinit_max_attempts);
8327 	    i = LOAD_INT	(spec, i, im.sinit_max_init_timeo);
8328 	    i = LOAD_TUPLE	(spec, i, 5);
8329 	    i = LOAD_TUPLE	(spec, i, 2);
8330 	    break;
8331 	}
8332 	/* The following option returns a tuple {bool, int}:   */
8333 	case INET_OPT_LINGER:
8334 	{
8335 	    struct linger lg;
8336 	    unsigned int  sz = sizeof(lg);
8337 
8338 	    if (sock_getopt(desc->s, SOL_SOCKET, SO_LINGER,
8339 			    &lg, &sz) < 0) continue;
8340 	    /* Fill in the response: */
8341 	    PLACE_FOR(spec, i,
8342 		      LOAD_ATOM_CNT + LOAD_BOOL_CNT +
8343 		      LOAD_INT_CNT + 2*LOAD_TUPLE_CNT);
8344 	    i = LOAD_ATOM	(spec, i, am_linger);
8345 	    i = LOAD_BOOL	(spec, i, lg.l_onoff);
8346 	    i = LOAD_INT	(spec, i, lg.l_linger);
8347 	    i = LOAD_TUPLE	(spec, i, 2);
8348 	    i = LOAD_TUPLE	(spec, i, 2);
8349 	    break;
8350 	}
8351 
8352 #ifdef SO_BINDTODEVICE
8353 	/* The following option returns a binary:   */
8354 	case INET_OPT_BIND_TO_DEVICE: {
8355 	    char ifname[IFNAMSIZ];
8356 	    unsigned int  sz = sizeof(ifname);
8357 
8358 	    if (sock_getopt(desc->s, SOL_SOCKET, SO_BINDTODEVICE,
8359 			    &ifname, &sz) < 0) continue;
8360 	    /* Fill in the response: */
8361 	    PLACE_FOR(spec, i,
8362 		      LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT);
8363 	    i = LOAD_ATOM (spec, i, am_bind_to_device);
8364 	    i = LOAD_BUF2BINARY(spec, i, ifname, my_strnlen(ifname, sz));
8365 	    i = LOAD_TUPLE (spec, i, 2);
8366 	    break;
8367 	}
8368 #endif
8369 
8370 	/* The following options just return an integer value: */
8371 	case INET_OPT_RCVBUF   :
8372 	case INET_OPT_SNDBUF   :
8373 	case INET_OPT_REUSEADDR:
8374 	case INET_OPT_DONTROUTE:
8375 	case INET_OPT_PRIORITY :
8376 	case INET_OPT_TOS      :
8377 	case INET_OPT_TCLASS   :
8378 	case INET_OPT_TTL      :
8379 	case INET_OPT_IPV6_V6ONLY:
8380 	case SCTP_OPT_AUTOCLOSE:
8381 	case SCTP_OPT_MAXSEG   :
8382 	/* The following options return true or false:	       */
8383 	case SCTP_OPT_NODELAY  :
8384 	case SCTP_OPT_DISABLE_FRAGMENTS:
8385 	case SCTP_OPT_I_WANT_MAPPED_V4_ADDR:
8386 	case INET_OPT_RECVTOS  :
8387 	case INET_OPT_RECVTCLASS :
8388 	case INET_OPT_RECVTTL :
8389 	{
8390 	    int res   = 0;
8391 	    unsigned int sz = sizeof(res);
8392 	    int proto = 0, type = 0, is_int = 0;
8393 	    ErlDrvTermData tag = am_sctp_error;
8394 
8395 	    switch(eopt)
8396 	    {
8397 	    case INET_OPT_RCVBUF   :
8398 	    {
8399 		proto  = SOL_SOCKET;
8400 		type   = SO_RCVBUF;
8401 		is_int = 1;
8402 		tag    = am_recbuf;
8403 		break;
8404 	    }
8405 	    case INET_OPT_SNDBUF   :
8406 	    {
8407 		proto  = SOL_SOCKET;
8408 		type   = SO_SNDBUF;
8409 		is_int = 1;
8410 		tag    = am_sndbuf;
8411 		break;
8412 	    }
8413 	    case INET_OPT_REUSEADDR:
8414 	    {
8415 		proto  = SOL_SOCKET;
8416 		type   = SO_REUSEADDR;
8417 		is_int = 0;
8418 		tag    = am_reuseaddr;
8419 		break;
8420 	    }
8421 	    case INET_OPT_DONTROUTE:
8422 	    {
8423 		proto  = SOL_SOCKET;
8424 		type   = SO_DONTROUTE;
8425 		is_int = 0;
8426 		tag    = am_dontroute;
8427 		break;
8428 	    }
8429 	    case INET_OPT_PRIORITY:
8430 	    {
8431 #	    if defined(SO_PRIORITY)
8432 		proto  = SOL_SOCKET;
8433 		type   = SO_PRIORITY;
8434 		is_int = 1;
8435 		tag    = am_priority;
8436 		break;
8437 #	    else
8438 		/* Not supported -- ignore */
8439 		continue;
8440 #	    endif
8441 	    }
8442 	    case INET_OPT_TOS:
8443 	    {
8444 #	    if defined(IP_TOS) && defined(IPPROTO_IP)
8445 		proto  = IPPROTO_IP;
8446 		type   = IP_TOS;
8447 		is_int = 1;
8448 		tag    = am_tos;
8449 		break;
8450 #	    else
8451 		/* Not supported -- ignore */
8452 		continue;
8453 #	    endif
8454 	    }
8455 	    case INET_OPT_TCLASS:
8456 	    {
8457 #           if defined(IPV6_TCLASS) && defined(IPPROTO_IPV6)
8458 		proto  = IPPROTO_IPV6;
8459 		type   = IPV6_TCLASS;
8460 		is_int = 1;
8461 		tag    = am_tclass;
8462 		break;
8463 #	    else
8464 		/* Not supported -- ignore */
8465 		continue;
8466 #	    endif
8467 	    }
8468 	    case INET_OPT_TTL:
8469 	    {
8470 #           if defined(IP_TTL) && defined(IPPROTO_IP)
8471 		proto  = IPPROTO_IP;
8472 		type   = IP_TTL;
8473 		is_int = 1;
8474 		tag    = am_ttl;
8475 		break;
8476 #	    else
8477 		/* Not supported -- ignore */
8478 		continue;
8479 #	    endif
8480 	    }
8481 	    case INET_OPT_RECVTOS:
8482 	    {
8483 #	    if defined(IP_RECVTOS) && defined(IPPROTO_IP)
8484 		proto  = IPPROTO_IP;
8485 		type   = IP_RECVTOS;
8486 		is_int = 0;
8487 		tag    = am_recvtos;
8488 		break;
8489 #	    else
8490 		/* Not supported -- ignore */
8491 		continue;
8492 #	    endif
8493 	    }
8494 	    case INET_OPT_RECVTCLASS:
8495 	    {
8496 #           if defined(IPV6_RECVTCLASS) && defined(IPPROTO_IPV6)
8497 		proto  = IPPROTO_IPV6;
8498 		type   = IPV6_RECVTCLASS;
8499 		is_int = 0;
8500 		tag    = am_recvtclass;
8501 		break;
8502 #	    else
8503 		/* Not supported -- ignore */
8504 		continue;
8505 #	    endif
8506 	    }
8507 	    case INET_OPT_RECVTTL:
8508 	    {
8509 #	    if defined(IP_RECVTTL) && defined(IPPROTO_IP)
8510 		proto  = IPPROTO_IP;
8511 		type   = IP_RECVTTL;
8512 		is_int = 0;
8513 		tag    = am_recvttl;
8514 		break;
8515 #	    else
8516 		/* Not supported -- ignore */
8517 		continue;
8518 #	    endif
8519 	    }
8520 	    case INET_OPT_IPV6_V6ONLY:
8521 #           if HAVE_DECL_IPV6_V6ONLY && defined(IPPROTO_IPV6)
8522 	    {
8523 		proto  = IPPROTO_IPV6;
8524 		type   = IPV6_V6ONLY;
8525 		tag    = am_ipv6_v6only;
8526 		break;
8527 	    }
8528 #           elif defined(__WIN32__) && defined(HAVE_IN6) && defined(AF_INET6)
8529 #               error Here is a fix for Win IPv6 SCTP needed
8530 #           else
8531 		/* Not supported -- ignore */
8532 		continue;
8533 #           endif
8534 	    case SCTP_OPT_AUTOCLOSE:
8535 	    {
8536 		proto  = IPPROTO_SCTP;
8537 		type   = SCTP_AUTOCLOSE;
8538 		is_int = 1;
8539 		tag    = am_sctp_autoclose;
8540 		break;
8541 	    }
8542 	    case SCTP_OPT_MAXSEG   :
8543 	   {
8544 		proto  = IPPROTO_SCTP;
8545 		type   = SCTP_MAXSEG;
8546 		is_int = 1;
8547 		tag    = am_sctp_maxseg;
8548 		break;
8549 	    }
8550 	    case SCTP_OPT_NODELAY  :
8551 	   {
8552 		proto  = IPPROTO_SCTP;
8553 		type   = SCTP_NODELAY;
8554 		is_int = 0;
8555 		tag    = am_sctp_nodelay;
8556 		break;
8557 	    }
8558 	    case SCTP_OPT_DISABLE_FRAGMENTS:
8559 	    {
8560 		proto  = IPPROTO_SCTP;
8561 		type   = SCTP_DISABLE_FRAGMENTS;
8562 		is_int = 0;
8563 		tag    = am_sctp_disable_fragments;
8564 		break;
8565 	    }
8566 	    case SCTP_OPT_I_WANT_MAPPED_V4_ADDR:
8567 	    {
8568 		proto  = IPPROTO_SCTP;
8569 		type   = SCTP_I_WANT_MAPPED_V4_ADDR;
8570 		is_int = 0;
8571 		tag    = am_sctp_i_want_mapped_v4_addr;
8572 		break;
8573 	    }
8574 	    default:	 ASSERT(0);
8575 	    }
8576 	    if (sock_getopt (desc->s, proto, type, &res, &sz) < 0) continue;
8577 	    /* Form the result: */
8578 	    PLACE_FOR(spec, i, LOAD_ATOM_CNT +
8579 		      (is_int ? LOAD_INT_CNT : LOAD_BOOL_CNT) +
8580 		      LOAD_TUPLE_CNT);
8581 	    i = LOAD_ATOM	(spec, i, tag);
8582 	    if (is_int)
8583 	    	i = LOAD_INT	(spec, i, res);
8584 	    else
8585 	    	i = LOAD_BOOL	(spec, i, res);
8586 	    i = LOAD_TUPLE	(spec, i, 2);
8587 	    break;
8588 	}
8589 	case SCTP_OPT_PRIMARY_ADDR:
8590 	case SCTP_OPT_SET_PEER_PRIMARY_ADDR:
8591 	{
8592 	    /* These 2 options use completely isomorphic data structures: */
8593 	    struct       sctp_setpeerprim sp;
8594 	    unsigned int sz = sizeof(sp);
8595 
8596 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8597 	    sp.sspp_assoc_id = GET_ASSOC_ID(buf);
8598 	    buf    += ASSOC_ID_LEN;
8599 	    buflen -= ASSOC_ID_LEN;
8600 
8601 	    if (sock_getopt(desc->s, IPPROTO_SCTP,
8602 			    (eopt == SCTP_OPT_PRIMARY_ADDR) ?
8603 			    SCTP_PRIMARY_ADDR : SCTP_SET_PEER_PRIMARY_ADDR,
8604 			    &sp, &sz) < 0) continue;
8605 	    /* Fill in the response: */
8606 	    PLACE_FOR(spec, i,
8607 		      2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT +
8608 		      LOAD_INET_GET_ADDRESS_CNT + 2*LOAD_TUPLE_CNT);
8609 	    switch (eopt) {
8610 	    case SCTP_OPT_PRIMARY_ADDR:
8611 		i = LOAD_ATOM(spec, i, am_sctp_primary_addr);
8612 		i = LOAD_ATOM(spec, i, am_sctp_prim);
8613 		break;
8614 	    case SCTP_OPT_SET_PEER_PRIMARY_ADDR:
8615 		i = LOAD_ATOM(spec, i, am_sctp_set_peer_primary_addr);
8616 		i = LOAD_ATOM(spec, i, am_sctp_setpeerprim);
8617 		break;
8618 	    default:
8619 		ASSERT(0);
8620 	    }
8621 	    i = LOAD_ASSOC_ID	(spec, i, sp.sspp_assoc_id);
8622 	    i = load_inet_get_address(spec, i, desc, &sp.sspp_addr);
8623 	    i = LOAD_TUPLE	(spec, i, 3);
8624 	    i = LOAD_TUPLE	(spec, i, 2);
8625 	    break;
8626 	}
8627 	case SCTP_OPT_ADAPTATION_LAYER:
8628 	{
8629 	    struct       sctp_setadaptation ad;
8630 	    unsigned int sz  = sizeof (ad);
8631 
8632 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_ADAPTATION_LAYER,
8633 			    &ad, &sz) < 0) continue;
8634 	    /* Fill in the response: */
8635 	    PLACE_FOR(spec, i,
8636 		      2*LOAD_ATOM_CNT + LOAD_INT_CNT + 2*LOAD_TUPLE_CNT);
8637 	    i = LOAD_ATOM	(spec, i, am_sctp_adaptation_layer);
8638 	    i = LOAD_ATOM	(spec, i, am_sctp_setadaptation);
8639 	    i = LOAD_INT	(spec, i, sock_ntohl(ad.ssb_adaptation_ind));
8640 	    i = LOAD_TUPLE	(spec, i, 2);
8641 	    i = LOAD_TUPLE	(spec, i, 2);
8642 	    break;
8643 	}
8644 	case SCTP_OPT_PEER_ADDR_PARAMS:
8645 	{
8646 	    struct sctp_paddrparams  ap;
8647 	    unsigned int             sz = sizeof(ap);
8648 	    int                      n;
8649 	    char                    *before, *xerror;
8650 	    ErlDrvSizeT              alen;
8651 
8652 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8653 	    ap.spp_assoc_id = GET_ASSOC_ID(buf);
8654 	    buf += ASSOC_ID_LEN;
8655 	    buflen -= ASSOC_ID_LEN;
8656 	    alen = buflen;
8657 	    before = buf;
8658 	    xerror =
8659 	      inet_set_faddress
8660 	      (desc->sfamily, (inet_address*) (&ap.spp_address),
8661 	       &buf, &alen);
8662 	    if (xerror != NULL) {
8663 #ifdef EAFNOSUPPORT
8664 	        if (xerror == str_eafnosupport) {
8665 		    RETURN_ERROR(spec, -EAFNOSUPPORT);
8666 		}
8667 #else
8668 		RETURN_ERROR(spec, -EINVAL);
8669 #endif
8670 	    }
8671 	    buflen -= buf - before;
8672 
8673 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_PEER_ADDR_PARAMS,
8674 			    &ap, &sz) < 0) continue;
8675 	    /* Fill in the response: */
8676 	    PLACE_FOR(spec, i,
8677 		      2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT +
8678 		      LOAD_INET_GET_ADDRESS_CNT + 4*LOAD_INT_CNT);
8679 	    i = LOAD_ATOM	(spec, i, am_sctp_peer_addr_params);
8680 	    i = LOAD_ATOM	(spec, i, am_sctp_paddrparams);
8681 	    i = LOAD_ASSOC_ID	(spec, i, ap.spp_assoc_id);
8682 	    i = load_inet_get_address(spec, i, desc, &ap.spp_address);
8683 	    i = LOAD_INT	(spec, i, ap.spp_hbinterval);
8684 	    i = LOAD_INT	(spec, i, ap.spp_pathmaxrxt);
8685 
8686 	    /* The following fields are not suported in SOLARIS10,
8687 	    ** so put 0s for "spp_pathmtu", "spp_sackdelay",
8688 	    ** and empty list for "spp_flags":
8689 	    */
8690 
8691 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_PATHMTU
8692 	    i = LOAD_INT	(spec, i, ap.spp_pathmtu);
8693 #           else
8694 	    i = LOAD_INT	(spec, i, 0);
8695 #           endif
8696 
8697 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
8698 	    i = LOAD_INT	(spec, i, ap.spp_sackdelay);
8699 #           else
8700 	    i = LOAD_INT	(spec, i, 0);
8701 #           endif
8702 
8703 	    n = 0;
8704 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_FLAGS
8705 	    PLACE_FOR(spec, i, 7*LOAD_ATOM_CNT);
8706 	    /* Now Flags, as a list: */
8707 	    if (ap.spp_flags & SPP_HB_ENABLE)
8708 	    	{ i = LOAD_ATOM	(spec, i, am_hb_enable); 	     n++; }
8709 
8710 	    if (ap.spp_flags & SPP_HB_DISABLE)
8711 		{ i = LOAD_ATOM (spec, i, am_hb_disable); 	     n++; }
8712 
8713 	    if (ap.spp_flags & SPP_HB_DEMAND)
8714 		{ i = LOAD_ATOM (spec, i, am_hb_demand);	     n++; }
8715 
8716 	    if (ap.spp_flags & SPP_PMTUD_ENABLE)
8717 		{ i = LOAD_ATOM (spec, i, am_pmtud_enable);          n++; }
8718 
8719 	    if (ap.spp_flags & SPP_PMTUD_DISABLE)
8720 		{ i = LOAD_ATOM (spec, i, am_pmtud_disable);         n++; }
8721 #	    ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
8722 	    /* SPP_SACKDELAY_* not in FreeBSD 7.1 */
8723 	    if (ap.spp_flags & SPP_SACKDELAY_ENABLE)
8724 		{ i = LOAD_ATOM (spec, i, am_sackdelay_enable);      n++; }
8725 
8726 	    if (ap.spp_flags & SPP_SACKDELAY_DISABLE)
8727 		{ i = LOAD_ATOM (spec, i, am_sackdelay_disable);     n++; }
8728 #	    endif
8729 #	    endif
8730 
8731 	    PLACE_FOR(spec, i,
8732 		      LOAD_NIL_CNT + LOAD_LIST_CNT + 2*LOAD_TUPLE_CNT);
8733 
8734 	    /* Close up the Flags list: */
8735 	    i = LOAD_NIL	(spec, i);
8736 	    i = LOAD_LIST	(spec, i, n+1);
8737 
8738 	    /* Close up the record: */
8739 	    i = LOAD_TUPLE	(spec, i, 8);
8740 	    /* Close up the result tuple: */
8741 	    i = LOAD_TUPLE	(spec, i, 2);
8742 	    break;
8743 	}
8744 	case SCTP_OPT_DEFAULT_SEND_PARAM:
8745 	{
8746 	    struct       sctp_sndrcvinfo sri;
8747 	    unsigned int sz  = sizeof(sri);
8748 
8749 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8750 	    sri.sinfo_assoc_id = GET_ASSOC_ID(buf);
8751 	    buf += ASSOC_ID_LEN;
8752 	    buflen -= ASSOC_ID_LEN;
8753 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_DEFAULT_SEND_PARAM,
8754 			    &sri, &sz) < 0) continue;
8755 	    /* Fill in the response: */
8756 	    PLACE_FOR(spec, i, LOAD_ATOM_CNT +
8757 		      SCTP_PARSE_SNDRCVINFO_CNT + LOAD_TUPLE_CNT);
8758 	    i = LOAD_ATOM(spec, i, am_sctp_default_send_param);
8759 	    i = sctp_parse_sndrcvinfo(spec, i, &sri);
8760 	    i = LOAD_TUPLE(spec, i, 2);
8761 	    break;
8762 	}
8763 	case SCTP_OPT_EVENTS:
8764 	{
8765 	    struct       sctp_event_subscribe evs;
8766 	    unsigned int sz  = sizeof(evs);
8767 
8768 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_EVENTS,
8769 			    &evs, &sz) < 0) continue;
8770 	    /* Fill in the response: */
8771 	    PLACE_FOR(spec, i,
8772 		      2*LOAD_ATOM_CNT + 9*LOAD_BOOL_CNT + 2*LOAD_TUPLE_CNT);
8773 	    i = LOAD_ATOM	(spec, i, am_sctp_events);
8774 	    i = LOAD_ATOM	(spec, i, am_sctp_event_subscribe);
8775 	    i = LOAD_BOOL	(spec, i, evs.sctp_data_io_event);
8776 	    i = LOAD_BOOL	(spec, i, evs.sctp_association_event);
8777 	    i = LOAD_BOOL	(spec, i, evs.sctp_address_event);
8778 	    i = LOAD_BOOL	(spec, i, evs.sctp_send_failure_event);
8779 	    i = LOAD_BOOL	(spec, i, evs.sctp_peer_error_event);
8780 	    i = LOAD_BOOL	(spec, i, evs.sctp_shutdown_event);
8781 	    i = LOAD_BOOL	(spec, i, evs.sctp_partial_delivery_event);
8782 	    i = LOAD_BOOL	(spec, i, evs.sctp_adaptation_layer_event);
8783 	    i = LOAD_BOOL	(spec, i, 0);/* NB: sctp_authentication_event
8784 					      * is not yet supported in Linux
8785 					      */
8786 	    i = LOAD_TUPLE	(spec, i, 10);
8787 	    i = LOAD_TUPLE	(spec, i, 2);
8788 	    break;
8789 	}
8790 	/* The following option is not available in Solaris 10: */
8791 #	if HAVE_DECL_SCTP_DELAYED_ACK_TIME
8792 	case SCTP_OPT_DELAYED_ACK_TIME:
8793 	{
8794 	    struct       sctp_assoc_value av;
8795 	    unsigned int sz  = sizeof(av);
8796 
8797 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8798 	    av.assoc_id = GET_ASSOC_ID(buf);
8799 	    buf    += ASSOC_ID_LEN;
8800 	    buflen -= ASSOC_ID_LEN;
8801 
8802 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_DELAYED_ACK_TIME,
8803 			    &av, &sz) < 0) continue;
8804 	    /* Fill in the response: */
8805 	    PLACE_FOR(spec, i, 2*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT +
8806 		      LOAD_INT_CNT + 2*LOAD_TUPLE_CNT);
8807 	    i = LOAD_ATOM	(spec, i, am_sctp_delayed_ack_time);
8808 	    i = LOAD_ATOM	(spec, i, am_sctp_assoc_value);
8809 	    i = LOAD_ASSOC_ID	(spec, i, av.assoc_id);
8810 	    i = LOAD_INT	(spec, i, av.assoc_value);
8811 	    i = LOAD_TUPLE	(spec, i, 3);
8812 	    i = LOAD_TUPLE	(spec, i, 2);
8813 	    break;
8814 	}
8815 #	endif
8816 	case SCTP_OPT_STATUS:
8817 	{
8818 	    struct       sctp_status  st;
8819 	    unsigned int sz  = sizeof(st);
8820 
8821 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8822 	    st.sstat_assoc_id = GET_ASSOC_ID(buf);
8823 	    buf    += ASSOC_ID_LEN;
8824 	    buflen -= ASSOC_ID_LEN;
8825 
8826 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_STATUS,
8827 			    &st, &sz) < 0) continue;
8828 	    /* Fill in the response: */
8829 	    PLACE_FOR(spec, i, 3*LOAD_ATOM_CNT + LOAD_ASSOC_ID_CNT +
8830 		      6*LOAD_INT_CNT + LOAD_PADDRINFO_CNT +
8831 		      2*LOAD_TUPLE_CNT);
8832 	    i = LOAD_ATOM	(spec, i, am_sctp_status);
8833 	    i = LOAD_ATOM	(spec, i, am_sctp_status);
8834 	    i = LOAD_ASSOC_ID   (spec, i, st.sstat_assoc_id);
8835 	    switch(st.sstat_state)
8836 	    {
8837             /*  SCTP_EMPTY is not supported on SOLARIS10: */
8838 #	    if HAVE_DECL_SCTP_EMPTY
8839 	    case SCTP_EMPTY:
8840 		i = LOAD_ATOM	(spec, i, am_empty);
8841 		break;
8842 #	    endif
8843 	    case SCTP_CLOSED:
8844 		i = LOAD_ATOM   (spec, i, am_closed);
8845 		break;
8846 #           if HAVE_DECL_SCTP_BOUND
8847 	    case SCTP_BOUND:
8848 		i = LOAD_ATOM	(spec, i, am_bound);
8849 		break;
8850 #           endif
8851 #           if HAVE_DECL_SCTP_LISTEN
8852 	    case SCTP_LISTEN:
8853 		i = LOAD_ATOM	(spec, i, am_listen);
8854 		break;
8855 #           endif
8856 	    case SCTP_COOKIE_WAIT:
8857 		i = LOAD_ATOM	(spec, i, am_cookie_wait);
8858 		break;
8859 	    case SCTP_COOKIE_ECHOED:
8860 		i = LOAD_ATOM	(spec, i, am_cookie_echoed);
8861 		break;
8862 	    case SCTP_ESTABLISHED:
8863 		i = LOAD_ATOM	(spec, i, am_established);
8864 		break;
8865 	    case SCTP_SHUTDOWN_PENDING:
8866 		i = LOAD_ATOM	(spec, i, am_shutdown_pending);
8867 		break;
8868 	    case SCTP_SHUTDOWN_SENT:
8869 		i = LOAD_ATOM	(spec, i, am_shutdown_sent);
8870 		break;
8871 	    case SCTP_SHUTDOWN_RECEIVED:
8872 		i = LOAD_ATOM	(spec, i, am_shutdown_received);
8873 		break;
8874 	    case SCTP_SHUTDOWN_ACK_SENT:
8875 		i = LOAD_ATOM	(spec, i, am_shutdown_ack_sent);
8876 		break;
8877 	    default:
8878 		i = LOAD_ATOM	(spec, i, am_undefined);
8879 		break;
8880 	    }
8881 	    i = LOAD_INT	(spec, i, st.sstat_rwnd);
8882 	    i = LOAD_INT	(spec, i, st.sstat_unackdata);
8883 	    i = LOAD_INT	(spec, i, st.sstat_penddata);
8884 	    i = LOAD_INT	(spec, i, st.sstat_instrms);
8885 	    i = LOAD_INT	(spec, i, st.sstat_outstrms);
8886 	    i = LOAD_INT	(spec, i, st.sstat_fragmentation_point);
8887 	    i = load_paddrinfo	(spec, i, desc, &st.sstat_primary);
8888 	    /* Close up the record: */
8889 	    i = LOAD_TUPLE	(spec, i, 10);
8890 	    /* Close up the result tuple: */
8891 	    i = LOAD_TUPLE	(spec, i, 2);
8892 	    break;
8893 	}
8894 	case SCTP_OPT_GET_PEER_ADDR_INFO:
8895 	{
8896 	    struct sctp_paddrinfo  pai;
8897 	    unsigned int           sz = sizeof(pai);
8898 	    char                  *before, *xerror;
8899 	    ErlDrvSizeT            alen;
8900 
8901 	    if (buflen < ASSOC_ID_LEN) RETURN_ERROR(spec, -EINVAL);
8902 	    pai.spinfo_assoc_id = GET_ASSOC_ID(buf);
8903 	    buf    += ASSOC_ID_LEN;
8904 	    buflen -= ASSOC_ID_LEN;
8905 	    alen = buflen;
8906 	    before = buf;
8907 	    xerror =
8908 	      inet_set_faddress
8909 	      (desc->sfamily, (inet_address*) (&pai.spinfo_address),
8910 	       &buf, &alen);
8911 	    if (xerror != NULL) {
8912 #ifdef EAFNOSUPPORT
8913 	        if (xerror == str_eafnosupport) {
8914 		    RETURN_ERROR(spec, -EAFNOSUPPORT);
8915 		}
8916 #else
8917 		RETURN_ERROR(spec, -EINVAL);
8918 #endif
8919 	    }
8920 	    buflen -= buf - before;
8921 
8922 	    if (sock_getopt(desc->s, IPPROTO_SCTP, SCTP_GET_PEER_ADDR_INFO,
8923 			    &pai, &sz) < 0) continue;
8924 	    /* Fill in the response: */
8925 	    PLACE_FOR(spec, i,
8926 		      LOAD_ATOM_CNT + LOAD_PADDRINFO_CNT + LOAD_TUPLE_CNT);
8927 	    i = LOAD_ATOM       (spec, i, am_sctp_get_peer_addr_info);
8928 	    i = load_paddrinfo	(spec, i, desc, &pai);
8929 	    i = LOAD_TUPLE	(spec, i, 2);
8930 	    break;
8931 	}
8932 	default:
8933 	    RETURN_ERROR(spec, -EINVAL); /* No more valid options */
8934 	}
8935 	/* If we get here one result has been successfully loaded */
8936 	length ++;
8937     }
8938     if (buflen != 0) RETURN_ERROR(spec, -EINVAL); /* Optparam mismatch */
8939 
8940     PLACE_FOR(spec, i, LOAD_NIL_CNT + LOAD_LIST_CNT + 2*LOAD_TUPLE_CNT);
8941 
8942     /* If we get here, we have "length" options: */
8943     i = LOAD_NIL  (spec, i);
8944     i = LOAD_LIST (spec, i, length+1);
8945 
8946     /* Close up the {ok, List} response: */
8947     i = LOAD_TUPLE(spec, i, 2);
8948     /* Close up the {inet_reply, S, {ok, List}} response:    */
8949     i = LOAD_TUPLE(spec, i, 3);
8950 
8951     /* Now, convert "spec" into the returnable term: */
8952     erl_drv_send_term(desc->dport, driver_caller(desc->port), spec, i);
8953     FREE(spec);
8954 
8955     (*dest)[0] = INET_REP;
8956     return 1;   /* Response length */
8957 #   undef PLACE_FOR
8958 #   undef RETURN_ERROR
8959 }
8960 #endif
8961 
8962 /* fill statistics reply, op codes from src and result in dest
8963 ** dst area must be a least 5*len + 1 bytes
8964 */
inet_fill_stat(inet_descriptor * desc,char * src,ErlDrvSizeT len,char * dst)8965 static ErlDrvSSizeT inet_fill_stat(inet_descriptor* desc,
8966 				   char* src, ErlDrvSizeT len, char* dst)
8967 {
8968     unsigned long val;
8969     int op;
8970     char* dst_start = dst;
8971 
8972     *dst++ = INET_REP_OK;     /* put reply code */
8973     while (len--) {
8974 	op = *src++;
8975 	*dst++ = op;  /* copy op code */
8976 	switch(op) {
8977 	case INET_STAT_RECV_CNT:
8978 	    val = desc->recv_cnt;
8979 	    break;
8980 	case INET_STAT_RECV_MAX:
8981 	    val = (unsigned long) desc->recv_max;
8982 	    break;
8983 	case INET_STAT_RECV_AVG:
8984 	    val = (unsigned long) desc->recv_avg;
8985 	    break;
8986 	case INET_STAT_RECV_DVI:
8987 	    val = (unsigned long) fabs(desc->recv_dvi);
8988 	    break;
8989 	case INET_STAT_SEND_CNT:
8990 	    val = desc->send_cnt;
8991 	    break;
8992 	case INET_STAT_SEND_MAX:
8993 	    val = desc->send_max;
8994 	    break;
8995 	case INET_STAT_SEND_AVG:
8996 	    val = (unsigned long) desc->send_avg;
8997 	    break;
8998 	case INET_STAT_SEND_PND:
8999 	    val = (unsigned long) driver_sizeq(desc->port);
9000 	    break;
9001 	case INET_STAT_RECV_OCT:
9002 #ifdef ARCH_64
9003 	    put_int64(desc->recv_oct, dst); /* write it all */
9004 #else
9005 	    put_int32(desc->recv_oct[1], dst);   /* write high 32bit */
9006 	    put_int32(desc->recv_oct[0], dst+4); /* write low 32bit */
9007 #endif
9008 	    dst += 8;
9009 	    continue;
9010 	case INET_STAT_SEND_OCT:
9011 #ifdef ARCH_64
9012 	    put_int64(desc->send_oct, dst); /* write it all */
9013 #else
9014 	    put_int32(desc->send_oct[1], dst);   /* write high 32bit */
9015 	    put_int32(desc->send_oct[0], dst+4); /* write low 32bit */
9016 #endif
9017 	    dst += 8;
9018 	    continue;
9019 	default: return -1; /* invalid argument */
9020 	}
9021 	put_int32(val, dst);  /* write 32bit value */
9022 	dst += 4;
9023     }
9024     return dst - dst_start;  /* actual length */
9025 }
9026 
9027 static void
send_empty_out_q_msgs(inet_descriptor * desc)9028 send_empty_out_q_msgs(inet_descriptor* desc)
9029 {
9030   ErlDrvTermData msg[6];
9031   int msg_len = 0;
9032 
9033   if(NO_SUBSCRIBERS(&desc->empty_out_q_subs))
9034     return;
9035 
9036   msg_len = LOAD_ATOM(msg, msg_len, am_empty_out_q);
9037   msg_len = LOAD_PORT(msg, msg_len, desc->dport);
9038   msg_len = LOAD_TUPLE(msg, msg_len, 2);
9039 
9040   ASSERT(msg_len == sizeof(msg)/sizeof(*msg));
9041 
9042   send_to_subscribers(desc->dport,
9043 		      &desc->empty_out_q_subs,
9044 		      1,
9045 		      msg,
9046 		      msg_len);
9047 }
9048 
9049 /* subscribe and fill subscription reply, op codes from src and
9050 ** result in dest dst area must be a least 5*len + 1 bytes
9051 */
inet_subscribe(inet_descriptor * desc,char * src,ErlDrvSizeT len,char * dst)9052 static ErlDrvSSizeT inet_subscribe(inet_descriptor* desc,
9053 				   char* src, ErlDrvSizeT len, char* dst)
9054 {
9055     unsigned long val;
9056     int op;
9057     char* dst_start = dst;
9058 
9059     *dst++ = INET_REP_OK;     /* put reply code */
9060     while (len--) {
9061 	op = *src++;
9062 	*dst++ = op;  /* copy op code */
9063 	switch(op) {
9064 	case INET_SUBS_EMPTY_OUT_Q:
9065 	  val = driver_sizeq(desc->port);
9066 	  if(val > 0)
9067 	    if(!save_subscriber(&desc->empty_out_q_subs,
9068 				driver_caller(desc->port)))
9069 	      return 0;
9070 	  break;
9071 	default: return -1; /* invalid argument */
9072 	}
9073 	put_int32(val, dst);  /* write 32bit value */
9074 	dst += 4;
9075     }
9076     return dst - dst_start;  /* actual length */
9077 }
9078 
9079 /* Terminate socket */
inet_stop(inet_descriptor * desc)9080 static void inet_stop(inet_descriptor* desc)
9081 {
9082     erl_inet_close(desc);
9083 #ifdef HAVE_SETNS
9084     if (desc->netns != NULL)
9085 	FREE(desc->netns);
9086 #endif
9087     FREE(desc);
9088 }
9089 
inet_emergency_close(ErlDrvData data)9090 static void inet_emergency_close(ErlDrvData data)
9091 {
9092     /* valid for any (UDP, TCP or SCTP) descriptor */
9093     tcp_descriptor* tcp_desc = (tcp_descriptor*)data;
9094     inet_descriptor* desc = INETP(tcp_desc);
9095     DEBUGF(("inet_emergency_close(%ld) {s=%d\r\n",
9096 	    (long)desc->port, desc->s));
9097     if (desc->s != INVALID_SOCKET) {
9098 	sock_close(desc->s);
9099     }
9100 }
9101 
9102 
set_default_msgq_limits(ErlDrvPort port)9103 static void set_default_msgq_limits(ErlDrvPort port)
9104 {
9105     ErlDrvSizeT q_high = INET_HIGH_MSGQ_WATERMARK;
9106     ErlDrvSizeT q_low = INET_LOW_MSGQ_WATERMARK;
9107     if (q_low < ERL_DRV_BUSY_MSGQ_LIM_MIN)
9108 	q_low = ERL_DRV_BUSY_MSGQ_LIM_MIN;
9109     else if (q_low > ERL_DRV_BUSY_MSGQ_LIM_MAX)
9110 	q_low = ERL_DRV_BUSY_MSGQ_LIM_MAX;
9111     if (q_high < ERL_DRV_BUSY_MSGQ_LIM_MIN)
9112 	q_high = ERL_DRV_BUSY_MSGQ_LIM_MIN;
9113     else if (q_high > ERL_DRV_BUSY_MSGQ_LIM_MAX)
9114 	q_high = ERL_DRV_BUSY_MSGQ_LIM_MAX;
9115     erl_drv_busy_msgq_limits(port, &q_low, &q_high);
9116 }
9117 
9118 /* Allocate descriptor */
inet_start(ErlDrvPort port,int size,int protocol)9119 static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol)
9120 {
9121     inet_descriptor* desc;
9122 
9123     if ((desc = (inet_descriptor*) ALLOC(size)) == NULL)
9124 	return NULL;
9125 
9126     desc->s = INVALID_SOCKET;
9127     desc->event = INVALID_EVENT;
9128     desc->event_mask = 0;
9129 #ifdef __WIN32__
9130     desc->forced_events = 0;
9131     desc->send_would_block = 0;
9132 #endif
9133     desc->port = port;
9134     desc->dport = driver_mk_port(port);
9135     desc->state = INET_STATE_CLOSED;
9136     desc->prebound = 0;
9137     desc->bufsz = INET_DEF_BUFFER;
9138     desc->hsz = 0;                     /* list header size */
9139     desc->htype = TCP_PB_RAW;          /* default packet type */
9140     desc->psize = 0;                   /* no size check */
9141     desc->stype = -1;                  /* bad stype */
9142     desc->sfamily = -1;
9143     desc->sprotocol = protocol;
9144     desc->mode    = INET_MODE_LIST;    /* list mode */
9145     desc->exitf   = 1;                 /* exit port when close on active
9146 					  socket */
9147     desc->deliver = INET_DELIVER_TERM; /* standard term format */
9148     desc->active  = INET_PASSIVE;      /* start passive */
9149     desc->active_count = 0;
9150     desc->delimiter    = '\n';         /* line delimiting char */
9151     desc->oph = NULL;
9152     desc->opt = NULL;
9153     desc->op_ref = 0;
9154 
9155     desc->peer_ptr = NULL;
9156     desc->name_ptr = NULL;
9157 
9158 #ifdef ARCH_64
9159     desc->recv_oct  = 0;
9160 #else
9161     desc->recv_oct[0] = desc->recv_oct[1] = 0;
9162 #endif
9163     desc->recv_cnt = 0;
9164     desc->recv_max = 0;
9165     desc->recv_avg = 0.0;
9166     desc->recv_dvi = 0.0;
9167 #ifdef ARCH_64
9168     desc->send_oct = 0;
9169 #else
9170     desc->send_oct[0] = desc->send_oct[1] = 0;
9171 #endif
9172     desc->send_cnt = 0;
9173     desc->send_max = 0;
9174     desc->send_avg = 0.0;
9175     desc->empty_out_q_subs.subscriber = NO_PROCESS;
9176     desc->empty_out_q_subs.next = NULL;
9177 
9178     sys_memzero((char *)&desc->remote,sizeof(desc->remote));
9179 
9180     desc->flags = 0;
9181 
9182 #ifdef HAVE_SETNS
9183     desc->netns = NULL;
9184 #endif
9185 
9186     desc->recv_cmsgflags = 0;
9187 
9188     return (ErlDrvData)desc;
9189 }
9190 
9191 /* MAXHOSTNAMELEN could be 64 or 255 depending
9192 on the platform. Instead, use INET_MAXHOSTNAMELEN
9193 which is always 255 across all platforms */
9194 #define INET_MAXHOSTNAMELEN 255
9195 
9196 /*
9197 ** common TCP/UDP/SCTP control command
9198 */
inet_ctl(inet_descriptor * desc,int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)9199 static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf,
9200 			     ErlDrvSizeT len, char** rbuf, ErlDrvSizeT rsize)
9201 {
9202     switch (cmd) {
9203 
9204     case INET_REQ_GETSTAT: {
9205 	  char* dst;
9206 	  ErlDrvSizeT i;
9207 	  int dstlen = 1;  /* Reply code */
9208 
9209 	  for (i = 0; i < len; i++) {
9210 	      switch(buf[i]) {
9211 	      case INET_STAT_SEND_OCT: dstlen += 9; break;
9212 	      case INET_STAT_RECV_OCT: dstlen += 9; break;
9213 	      default: dstlen += 5; break;
9214 	      }
9215 	  }
9216 	  DEBUGF(("inet_ctl(%ld): GETSTAT\r\n", (long) desc->port));
9217 	  if (dstlen > INET_MAX_OPT_BUFFER) /* sanity check */
9218 	      return 0;
9219 	  if (dstlen > rsize) {
9220 	      if ((dst = (char*) ALLOC(dstlen)) == NULL)
9221 		  return 0;
9222 	      *rbuf = dst;  /* call will free this buffer */
9223 	  }
9224 	  else
9225 	      dst = *rbuf;  /* ok we fit in buffer given */
9226 	  return inet_fill_stat(desc, buf, len, dst);
9227       }
9228 
9229     case INET_REQ_SUBSCRIBE: {
9230 	  char* dst;
9231 	  int dstlen = 1 /* Reply code */ + len*5;
9232 	  DEBUGF(("inet_ctl(%ld): INET_REQ_SUBSCRIBE\r\n", (long) desc->port));
9233 	  if (dstlen > INET_MAX_OPT_BUFFER) /* sanity check */
9234 	      return 0;
9235 	  if (dstlen > rsize) {
9236 	      if ((dst = (char*) ALLOC(dstlen)) == NULL)
9237 		  return 0;
9238 	      *rbuf = dst;  /* call will free this buffer */
9239 	  }
9240 	  else
9241 	      dst = *rbuf;  /* ok we fit in buffer given */
9242 	  return inet_subscribe(desc, buf, len, dst);
9243       }
9244 
9245     case INET_REQ_GETOPTS: {    /* get options */
9246 	ErlDrvSSizeT replen;
9247 	DEBUGF(("inet_ctl(%ld): GETOPTS\r\n", (long)desc->port));
9248 #ifdef HAVE_SCTP
9249         if (IS_SCTP(desc))
9250         {
9251             if ((replen = sctp_fill_opts(desc, buf, len, rbuf, rsize)) < 0)
9252                 return ctl_error(-replen, rbuf, rsize);
9253         } else
9254 #endif
9255 	if ((replen = inet_fill_opts(desc, buf, len, rbuf, rsize)) < 0) {
9256 	    return ctl_error(EINVAL, rbuf, rsize);
9257 	}
9258 	return replen;
9259     }
9260 
9261     case INET_REQ_GETIFLIST: {
9262 	DEBUGF(("inet_ctl(%ld): GETIFLIST\r\n", (long)desc->port));
9263 	if (!IS_OPEN(desc))
9264 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
9265 	return inet_ctl_getiflist(desc, rbuf, rsize);
9266     }
9267 
9268     case INET_REQ_GETIFADDRS: {
9269 	DEBUGF(("inet_ctl(%ld): GETIFADDRS\r\n", (long)desc->port));
9270 	if (!IS_OPEN(desc))
9271 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
9272 	return inet_ctl_getifaddrs(desc, rbuf, rsize);
9273     }
9274 
9275     case INET_REQ_IFGET: {
9276 	DEBUGF(("inet_ctl(%ld): IFGET\r\n", (long)desc->port));
9277 	if (!IS_OPEN(desc))
9278 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
9279 	return inet_ctl_ifget(desc, buf, len, rbuf, rsize);
9280     }
9281 
9282     case INET_REQ_IFSET: {
9283 	DEBUGF(("inet_ctl(%ld): IFSET\r\n", (long)desc->port));
9284 	if (!IS_OPEN(desc))
9285 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
9286 	return inet_ctl_ifset(desc, buf, len, rbuf, rsize);
9287     }
9288 
9289     case INET_REQ_SETOPTS:  {   /* set options */
9290 	DEBUGF(("inet_ctl(%ld): SETOPTS\r\n", (long)desc->port));
9291 	/* XXX fprintf(stderr,"inet_ctl(%ld): SETOPTS (len = %d)\r\n", (long)desc->port,(int) len); */
9292 	switch(inet_set_opts(desc, buf, len)) {
9293 	case -1:
9294 	    return ctl_error(EINVAL, rbuf, rsize);
9295 	case 0:
9296 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9297 	case 1:
9298 	    /*
9299 	     * Let's hope that the descriptor really is a tcp_descriptor here.
9300 	     */
9301 	    /* fprintf(stderr,"Triggered tcp_deliver by setopt.\r\n"); */
9302 	    tcp_deliver((tcp_descriptor *) desc, 0);
9303 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9304 	default:
9305 	    /* fprintf(stderr,"Triggered tcp_recv by setopt.\r\n"); */
9306 	    /*
9307 	     * Same as above, but active changed to once w/o header type
9308 	     * change, so try a read instead of just deliver.
9309 	     */
9310 	    tcp_recv((tcp_descriptor *) desc, 0);
9311 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9312 	}
9313     }
9314 
9315     case INET_REQ_GETSTATUS: {
9316 	char tbuf[4];
9317 
9318 	DEBUGF(("inet_ctl(%ld): GETSTATUS\r\n", (long)desc->port));
9319 	put_int32(desc->state, tbuf);
9320 	return ctl_reply(INET_REP_OK, tbuf, 4, rbuf, rsize);
9321     }
9322 
9323     case INET_REQ_GETTYPE: {
9324 	char tbuf[8];
9325 
9326 	DEBUGF(("inet_ctl(%ld): GETTYPE\r\n", (long)desc->port));
9327 	if (desc->sfamily == AF_INET) {
9328 	    put_int32(INET_AF_INET, &tbuf[0]);
9329 	}
9330 #if defined(HAVE_IN6) && defined(AF_INET6)
9331         else if (desc->sfamily == AF_INET6) {
9332 	    put_int32(INET_AF_INET6, &tbuf[0]);
9333 	}
9334 #endif
9335 #ifdef HAVE_SYS_UN_H
9336 	else if (desc->sfamily == AF_UNIX) {
9337 	    put_int32(INET_AF_LOCAL, &tbuf[0]);
9338 	}
9339 #endif
9340 	else
9341 	    return ctl_error(EINVAL, rbuf, rsize);
9342 
9343 	if (desc->stype == SOCK_STREAM) {
9344 	    put_int32(INET_TYPE_STREAM, &tbuf[4]);
9345 	}
9346 	else if (desc->stype == SOCK_DGRAM) {
9347 	    put_int32(INET_TYPE_DGRAM, &tbuf[4]);
9348 	}
9349 #ifdef HAVE_SCTP
9350 	else if (desc->stype == SOCK_SEQPACKET) {
9351 	    put_int32(INET_TYPE_SEQPACKET, &tbuf[4]);
9352 	}
9353 #endif
9354 	else
9355 	    return ctl_error(EINVAL, rbuf, rsize);
9356 	return ctl_reply(INET_REP_OK, tbuf, 8, rbuf, rsize);
9357     }
9358 
9359 
9360     case INET_REQ_GETFD: {
9361 	char tbuf[4];
9362 
9363 	DEBUGF(("inet_ctl(%ld): GETFD\r\n", (long)desc->port));
9364 	if (!IS_OPEN(desc))
9365 	    return ctl_error(EINVAL, rbuf, rsize);
9366 	put_int32((long)desc->s, tbuf);
9367 	return ctl_reply(INET_REP_OK, tbuf, 4, rbuf, rsize);
9368     }
9369 
9370     case INET_REQ_GETHOSTNAME: { /* get host name */
9371 	char tbuf[INET_MAXHOSTNAMELEN + 1];
9372 
9373 	DEBUGF(("inet_ctl(%ld): GETHOSTNAME\r\n", (long)desc->port));
9374 	if (len != 0)
9375 	    return ctl_error(EINVAL, rbuf, rsize);
9376 
9377         /* gethostname requires len to be max(hostname) + 1 */
9378 	if (IS_SOCKET_ERROR(sock_hostname(tbuf, INET_MAXHOSTNAMELEN + 1)))
9379 	    return ctl_error(sock_errno(), rbuf, rsize);
9380 	return ctl_reply(INET_REP_OK, tbuf, strlen(tbuf), rbuf, rsize);
9381     }
9382 
9383     case INET_REQ_GETPADDRS: {
9384 	DEBUGF(("inet_ctl(%ld): INET_GETPADDRS\r\n", (long)desc->port));
9385 
9386 	if (len != 4) return ctl_error(EINVAL, rbuf, rsize);
9387 
9388 	if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize);
9389 
9390 #ifdef HAVE_SCTP
9391 	if (IS_SCTP(desc) && p_sctp_getpaddrs) {
9392 	    struct sockaddr *sa;
9393 	    Uint32 assoc_id;
9394 	    int n;
9395 	    ErlDrvSizeT rlen;
9396 
9397 	    assoc_id = get_int32(buf);
9398 	    n = p_sctp_getpaddrs(desc->s, assoc_id, &sa);
9399 	    rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize, 0);
9400 	    if (n > 0) p_sctp_freepaddrs(sa);
9401 	    return rlen;
9402 	}
9403 #endif
9404 	{ /* Fallback to sock_peer */
9405 	    inet_address addr;
9406 	    SOCKLEN_T sz;
9407 	    int i;
9408 
9409 	    sz = sizeof(addr);
9410 	    i = sock_peer(desc->s, (struct sockaddr *) &addr, &sz);
9411 	    return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize, sz);
9412 	}
9413     }
9414 
9415     case INET_REQ_PEER:  {      /* get peername */
9416 	char tbuf[sizeof(inet_address)];
9417 	inet_address peer;
9418 	inet_address* ptr;
9419 	unsigned int sz;
9420 
9421 	DEBUGF(("inet_ctl(%ld): PEER\r\n", (long)desc->port));
9422 
9423 	if (!(desc->state & INET_F_ACTIVE))
9424 	    return ctl_error(ENOTCONN, rbuf, rsize);
9425 	if ((ptr = desc->peer_ptr) != NULL) {
9426 	    sz = desc->peer_addr_len;
9427 	}
9428 	else {
9429 	    ptr = &peer;
9430             sz = sizeof(peer);
9431             sys_memzero((char *) &peer, sz);
9432 	    if (IS_SOCKET_ERROR
9433 		(sock_peer
9434 		 (desc->s, (struct sockaddr*)ptr, &sz)))
9435 		return ctl_error(sock_errno(), rbuf, rsize);
9436 	}
9437 	if (inet_get_address(tbuf, ptr, &sz) < 0)
9438 	    return ctl_error(EINVAL, rbuf, rsize);
9439 	return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize);
9440     }
9441 
9442     case INET_REQ_SETPEER: { /* set fake peername Port Address */
9443         char *xerror;
9444 	if (len == 0) {
9445 	    desc->peer_ptr = NULL;
9446 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9447 	}
9448 	else if (len < 2)
9449 	    return ctl_error(EINVAL, rbuf, rsize);
9450 	else if ((xerror = inet_set_faddress
9451 		  (desc->sfamily, &desc->peer_addr, &buf, &len)) != NULL)
9452 	    return ctl_xerror(xerror, rbuf, rsize);
9453 	else {
9454 	    desc->peer_ptr = &desc->peer_addr;
9455 	    desc->peer_addr_len = (SOCKLEN_T) len;
9456 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9457 	}
9458     }
9459 
9460     case INET_REQ_GETLADDRS: {
9461 	DEBUGF(("inet_ctl(%ld): INET_GETLADDRS\r\n", (long)desc->port));
9462 
9463 	if (len != 4) return ctl_error(EINVAL, rbuf, rsize);
9464 
9465 	if (! IS_OPEN(desc)) return ctl_xerror(EXBADPORT, rbuf, rsize);
9466 
9467 #ifdef HAVE_SCTP
9468 	if (IS_SCTP(desc) && p_sctp_getladdrs) {
9469 	    struct sockaddr *sa;
9470 	    Uint32 assoc_id;
9471 	    int n;
9472 	    ErlDrvSizeT rlen;
9473 
9474 	    assoc_id = get_int32(buf);
9475 	    n = p_sctp_getladdrs(desc->s, assoc_id, &sa);
9476 	    rlen = reply_inet_addrs(n, (inet_address *) sa, rbuf, rsize, 0);
9477 	    if (n > 0) p_sctp_freeladdrs(sa);
9478 	    return rlen;
9479 	}
9480 #endif
9481 	{ /* Fallback to sock_name */
9482 	    inet_address addr;
9483 	    SOCKLEN_T sz;
9484 	    int i;
9485 
9486 	    sz = sizeof(addr);
9487 	    sys_memzero((char *) &addr, sz);
9488 	    i = sock_name(desc->s, (struct sockaddr *) &addr, &sz);
9489 	    return reply_inet_addrs(i >= 0 ? 1 : i, &addr, rbuf, rsize, sz);
9490 	}
9491     }
9492 
9493     case INET_REQ_NAME:  {      /* get sockname */
9494 	char tbuf[sizeof(inet_address)];
9495 	inet_address name;
9496 	inet_address* ptr;
9497 	unsigned int sz;
9498 
9499 	DEBUGF(("inet_ctl(%ld): NAME\r\n", (long)desc->port));
9500 
9501 	if ((ptr = desc->name_ptr) != NULL) {
9502 	    sz = desc->name_addr_len;
9503 	}
9504 	else {
9505 	    ptr = &name;
9506 	    sz = sizeof(name);
9507 	    sys_memzero((char *) &name, sz);
9508 	    if (IS_SOCKET_ERROR
9509 		(sock_name(desc->s, (struct sockaddr*)ptr, &sz)))
9510 		return ctl_error(sock_errno(), rbuf, rsize);
9511 	}
9512 	if (inet_get_address(tbuf, ptr, &sz) < 0)
9513 	    return ctl_error(EINVAL, rbuf, rsize);
9514 	return ctl_reply(INET_REP_OK, tbuf, sz, rbuf, rsize);
9515     }
9516 
9517     case INET_REQ_SETNAME: { /* set fake sockname Port Address */
9518         char *xerror;
9519 	if (len == 0) {
9520 	    desc->name_ptr = NULL;
9521 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9522 	}
9523 	else if (len < 2)
9524 	    return ctl_error(EINVAL, rbuf, rsize);
9525 	else if ((xerror = inet_set_faddress
9526 		  (desc->sfamily, &desc->name_addr, &buf, &len)) != NULL)
9527 	    return ctl_xerror(xerror, rbuf, rsize);
9528 	else {
9529 	    desc->name_ptr = &desc->name_addr;
9530 	    desc->name_addr_len = (SOCKLEN_T) len;
9531 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9532 	}
9533     }
9534 
9535     case INET_REQ_BIND:  {      /* bind socket */
9536         char tbuf[2], *xerror;
9537 	inet_address local;
9538 	int port;
9539 
9540 	DEBUGF(("inet_ctl(%ld): BIND\r\n", (long)desc->port));
9541 
9542 	if (len < 2)
9543 	    return ctl_error(EINVAL, rbuf, rsize);
9544 	if (desc->state != INET_STATE_OPEN)
9545 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
9546 
9547 	if ((xerror = inet_set_faddress
9548 	     (desc->sfamily, &local, &buf, &len)) != NULL)
9549 	    return ctl_xerror(xerror, rbuf, rsize);
9550 
9551 	if (IS_SOCKET_ERROR(sock_bind(desc->s,(struct sockaddr*) &local, len)))
9552 	    return ctl_error(sock_errno(), rbuf, rsize);
9553 
9554 	desc->state = INET_STATE_OPEN;
9555 
9556 	port = inet_address_port(&local);
9557 	if (port == 0) {
9558 	    SOCKLEN_T adrlen = sizeof(local);
9559 	    sys_memzero((char *) &local, adrlen);
9560 	    sock_name(desc->s, &local.sa, &adrlen);
9561 	    port = inet_address_port(&local);
9562 	}
9563         else if (port == -1) port = 0;
9564 	put_int16(sock_ntohs((Uint16) port), tbuf);
9565 	return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
9566     }
9567 
9568     case INET_REQ_IGNOREFD: {
9569       DEBUGF(("inet_ctl(%ld): IGNOREFD, IGNORED = %d\r\n",
9570 	      (long)desc->port,(int)*buf));
9571 
9572       /*
9573        * FD can only be ignored for connected TCP connections for now,
9574        * possible to add UDP and SCTP support if needed.
9575        */
9576       if (!IS_CONNECTED(desc))
9577 	  return ctl_error(ENOTCONN, rbuf, rsize);
9578 
9579       if (desc->stype != SOCK_STREAM)
9580 	  return ctl_error(EINVAL, rbuf, rsize);
9581 
9582       if (*buf == 1 && !INET_IGNORED(desc)) {
9583 	  sock_select(desc, (FD_READ|FD_WRITE|FD_CLOSE|ERL_DRV_USE_NO_CALLBACK), 0);
9584 	  if (desc->active)
9585 	    desc->flags |= INET_IGNORE_READ;
9586 	  else
9587 	    desc->flags |= INET_IGNORE_PASSIVE;
9588       } else if (*buf == 0 && INET_IGNORED(desc)) {
9589 	  int flags = FD_CLOSE;
9590 	  if (desc->flags & INET_IGNORE_READ)
9591 	    flags |= FD_READ;
9592 	  if (desc->flags & INET_IGNORE_WRITE)
9593 	    flags |= FD_WRITE;
9594 	  desc->flags = INET_IGNORE_CLEAR(desc);
9595 	  if (flags != FD_CLOSE)
9596 	    sock_select(desc, flags, 1);
9597       } else
9598 	  return ctl_error(EINVAL, rbuf, rsize);
9599 
9600       return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
9601     }
9602 
9603     case INET_REQ_GETSERVBYNAME: { /* L1 Name-String L2 Proto-String */
9604 	char namebuf[256];
9605 	char protobuf[256];
9606 	char tbuf[2];
9607 	struct servent* srv;
9608 	short port;
9609 	int n;
9610 
9611 	if (len < 2)
9612 	    return ctl_error(EINVAL, rbuf, rsize);
9613 	n = get_int8(buf); buf++; len--;
9614 	if (n >= len) /* the = sign makes the test inklude next length byte */
9615 	    return ctl_error(EINVAL, rbuf, rsize);
9616 	memcpy(namebuf, buf, n);
9617 	namebuf[n] = '\0';
9618 	len -= n; buf += n;
9619 	n = get_int8(buf); buf++; len--;
9620 	if (n > len)
9621 	    return ctl_error(EINVAL, rbuf, rsize);
9622 	memcpy(protobuf, buf, n);
9623 	protobuf[n] = '\0';
9624 	if ((srv = sock_getservbyname(namebuf, protobuf)) == NULL)
9625 	    return ctl_error(EINVAL, rbuf, rsize);
9626 	port = sock_ntohs(srv->s_port);
9627 	put_int16(port, tbuf);
9628 	return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
9629     }
9630 
9631     case INET_REQ_GETSERVBYPORT: { /* P1 P0 L1 Proto-String */
9632 	char protobuf[256];
9633 	unsigned short port;
9634 	int n;
9635 	struct servent* srv;
9636 
9637 	if (len < 3)
9638 	    return ctl_error(EINVAL, rbuf, rsize);
9639 	port = get_int16(buf);
9640 	port = sock_htons(port);
9641 	buf += 2;
9642 	n = get_int8(buf); buf++; len -= 3;
9643 	if (n > len)
9644 	    return ctl_error(EINVAL, rbuf, rsize);
9645 	memcpy(protobuf, buf, n);
9646 	protobuf[n] = '\0';
9647 	if ((srv = sock_getservbyport(port, protobuf)) == NULL)
9648 	    return ctl_error(EINVAL, rbuf, rsize);
9649 	len = strlen(srv->s_name);
9650 	return ctl_reply(INET_REP_OK, srv->s_name, len, rbuf, rsize);
9651     }
9652 
9653     default:
9654 	return ctl_xerror(EXBADPORT, rbuf, rsize);
9655     }
9656 }
9657 
9658 /* update statistics on output packets */
inet_output_count(inet_descriptor * desc,ErlDrvSizeT len)9659 static void inet_output_count(inet_descriptor* desc, ErlDrvSizeT len)
9660 {
9661     unsigned long n = desc->send_cnt + 1;
9662 #ifndef ARCH_64
9663     Uint32 t = desc->send_oct[0] + len;
9664     int c = (t < desc->send_oct[0]);
9665 #endif
9666     double avg = desc->send_avg;
9667 
9668 #ifdef ARCH_64
9669     desc->send_oct += len;
9670 #else
9671     /* 64 bit octet count in 32 bit words */
9672     desc->send_oct[0] = t;
9673     desc->send_oct[1] += c;
9674 #endif
9675     if (n == 0) /* WRAP, use old avg as input to a new sequence */
9676 	n = 1;
9677     desc->send_avg += (len - avg) / n;
9678     if (len > desc->send_max)
9679 	desc->send_max = len;
9680     desc->send_cnt = n;
9681 }
9682 
9683 /* update statistics on input packets */
inet_input_count(inet_descriptor * desc,ErlDrvSizeT len)9684 static void inet_input_count(inet_descriptor* desc, ErlDrvSizeT len)
9685 {
9686     unsigned long n = desc->recv_cnt + 1;
9687 #ifndef ARCH_64
9688     Uint32 t = (desc->recv_oct[0] + len);
9689     int c = (t < desc->recv_oct[0]);
9690 #endif
9691     double avg = desc->recv_avg;
9692     double dvi;
9693 
9694 #ifdef ARCH_64
9695     desc->recv_oct += len;
9696 #else
9697     /* 64 bit octet count in 32 bit words */
9698     desc->recv_oct[0] = t;
9699     desc->recv_oct[1] += c;
9700 #endif
9701 
9702     if (n == 0) /* WRAP */
9703 	n = 1;
9704 
9705     /* average packet length */
9706     avg = avg + (len - avg) / n;
9707     desc->recv_avg = avg;
9708 
9709     if (len > desc->recv_max)
9710 	desc->recv_max = len;
9711 
9712     /* average deviation from average packet length */
9713     dvi = desc->recv_dvi;
9714     desc->recv_dvi = dvi + ((len - avg) - dvi) / n;
9715     desc->recv_cnt = n;
9716 }
9717 
9718 /*----------------------------------------------------------------------------
9719 
9720    TCP
9721 
9722 -----------------------------------------------------------------------------*/
9723 
9724 /*
9725 ** Set new size on buffer, used when packet size is determined
9726 ** and the buffer is to small.
9727 ** buffer must have a size of at least len bytes (counting from ptr_start!)
9728 */
tcp_expand_buffer(tcp_descriptor * desc,int len)9729 static int tcp_expand_buffer(tcp_descriptor* desc, int len)
9730 {
9731     ErlDrvBinary* bin;
9732     int offs1;
9733     int offs2;
9734     int used = desc->i_ptr_start - desc->i_buf->orig_bytes;
9735     int ulen = used + len;
9736 
9737     if (desc->i_bufsz >= ulen) /* packet will fit */
9738 	return 0;
9739     else if (desc->i_buf->orig_size >= ulen) { /* buffer is large enough */
9740 	desc->i_bufsz = ulen;  /* set "virtual" size */
9741 	return 0;
9742     }
9743 
9744     DEBUGF(("tcp_expand_buffer(%ld): s=%d, from %ld to %d\r\n",
9745 	    (long)desc->inet.port, desc->inet.s, desc->i_buf->orig_size, ulen));
9746 
9747     offs1 = desc->i_ptr_start - desc->i_buf->orig_bytes;
9748     offs2 = desc->i_ptr - desc->i_ptr_start;
9749 
9750     if ((bin = driver_realloc_binary(desc->i_buf, ulen)) == NULL)
9751 	return -1;
9752 
9753     desc->i_buf = bin;
9754     desc->i_ptr_start = bin->orig_bytes + offs1;
9755     desc->i_ptr       = desc->i_ptr_start + offs2;
9756     desc->i_bufsz     = ulen;
9757     return 0;
9758 }
9759 
9760 /* push data into i_buf  */
tcp_push_buffer(tcp_descriptor * desc,char * buf,int len)9761 static int tcp_push_buffer(tcp_descriptor* desc, char* buf, int len)
9762 {
9763     ErlDrvBinary* bin;
9764 
9765     if (desc->i_buf == NULL) {
9766 	bin = alloc_buffer(len);
9767 	sys_memcpy(bin->orig_bytes, buf, len);
9768 	desc->i_buf = bin;
9769 	desc->i_bufsz = len;
9770 	desc->i_ptr_start = desc->i_buf->orig_bytes;
9771 	desc->i_ptr = desc->i_ptr_start + len;
9772     }
9773     else {
9774 	char* start =  desc->i_buf->orig_bytes;
9775 	int sz_before = desc->i_ptr_start - start;
9776 	int sz_filled = desc->i_ptr - desc->i_ptr_start;
9777 
9778 	if (len <= sz_before) {
9779 	    sys_memcpy(desc->i_ptr_start - len, buf, len);
9780 	    desc->i_ptr_start -= len;
9781 	}
9782 	else {
9783 	    bin = alloc_buffer(desc->i_bufsz+len);
9784 	    sys_memcpy(bin->orig_bytes, buf, len);
9785 	    sys_memcpy(bin->orig_bytes+len, desc->i_ptr_start, sz_filled);
9786 	    free_buffer(desc->i_buf);
9787 	    desc->i_bufsz += len;
9788 	    desc->i_buf = bin;
9789 	    desc->i_ptr_start = bin->orig_bytes;
9790 	    desc->i_ptr = desc->i_ptr_start + sz_filled + len;
9791 	}
9792     }
9793     desc->i_remain = 0;
9794     return 0;
9795 }
9796 
9797 /* clear CURRENT input buffer */
tcp_clear_input(tcp_descriptor * desc)9798 static void tcp_clear_input(tcp_descriptor* desc)
9799 {
9800     if (desc->i_buf != NULL)
9801 	free_buffer(desc->i_buf);
9802     desc->i_buf = NULL;
9803     desc->i_remain    = 0;
9804     desc->i_ptr       = NULL;
9805     desc->i_ptr_start = NULL;
9806     desc->i_bufsz     = 0;
9807 }
9808 
9809 /* clear QUEUED output */
tcp_clear_output(tcp_descriptor * desc)9810 static void tcp_clear_output(tcp_descriptor* desc)
9811 {
9812     ErlDrvPort ix  = desc->inet.port;
9813     ErlDrvSizeT qsz = driver_sizeq(ix);
9814 
9815     driver_deq(ix, qsz);
9816     send_empty_out_q_msgs(INETP(desc));
9817 }
9818 
9819 
9820 /* Move data so that ptr_start point at buf->orig_bytes */
tcp_restart_input(tcp_descriptor * desc)9821 static void tcp_restart_input(tcp_descriptor* desc)
9822 {
9823     if (desc->i_ptr_start != desc->i_buf->orig_bytes) {
9824 	int n = desc->i_ptr - desc->i_ptr_start;
9825 
9826 	DEBUGF(("tcp_restart_input: move %d bytes\r\n", n));
9827 	sys_memmove(desc->i_buf->orig_bytes, desc->i_ptr_start, n);
9828 	desc->i_ptr_start = desc->i_buf->orig_bytes;
9829 	desc->i_ptr = desc->i_ptr_start + n;
9830     }
9831 }
9832 
9833 
tcp_inet_init(void)9834 static int tcp_inet_init(void)
9835 {
9836     DEBUGF(("tcp_inet_init() {}\r\n"));
9837     return 0;
9838 }
9839 
9840 /* initialize the TCP descriptor */
9841 
prep_tcp_inet_start(ErlDrvPort port,char * args)9842 static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args)
9843 {
9844     tcp_descriptor* desc;
9845     DEBUGF(("tcp_inet_start(%ld) {\r\n", (long)port));
9846 
9847     desc = (tcp_descriptor*)
9848 	inet_start(port, sizeof(tcp_descriptor), IPPROTO_TCP);
9849     if (desc == NULL)
9850 	return ERL_DRV_ERROR_ERRNO;
9851     desc->high = INET_HIGH_WATERMARK;
9852     desc->low  = INET_LOW_WATERMARK;
9853     desc->send_timeout = INET_INFINITY;
9854     desc->send_timeout_close = 0;
9855     desc->busy_on_send = 0;
9856     desc->i_buf = NULL;
9857     desc->i_ptr = NULL;
9858     desc->i_ptr_start = NULL;
9859     desc->i_remain = 0;
9860     desc->i_bufsz = 0;
9861     desc->tcp_add_flags = 0;
9862     desc->http_state = 0;
9863     desc->mtd = NULL;
9864     desc->mtd_cache = NULL;
9865     desc->multi_first = desc->multi_last = NULL;
9866     DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port));
9867     return (ErlDrvData) desc;
9868 }
9869 
tcp_inet_start(ErlDrvPort port,char * args)9870 static ErlDrvData tcp_inet_start(ErlDrvPort port, char* args)
9871 {
9872     ErlDrvData data = prep_tcp_inet_start(port, args);
9873     set_default_msgq_limits(port);
9874     return data;
9875 }
9876 /* Copy a descriptor, by creating a new port with same settings
9877  * as the descriptor desc.
9878  * return NULL on error (SYSTEM_LIMIT no ports avail)
9879  */
tcp_inet_copy(tcp_descriptor * desc,SOCKET s,ErlDrvTermData owner,int * err)9880 static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s,
9881 				     ErlDrvTermData owner, int* err)
9882 {
9883     ErlDrvSizeT q_low, q_high;
9884     ErlDrvPort port = desc->inet.port;
9885     tcp_descriptor* copy_desc;
9886 
9887     copy_desc = (tcp_descriptor*) prep_tcp_inet_start(port, NULL);
9888 
9889     /* Setup event if needed */
9890     if ((copy_desc->inet.s = s) != INVALID_SOCKET) {
9891 	if ((copy_desc->inet.event = sock_create_event(INETP(copy_desc))) ==
9892 	    INVALID_EVENT) {
9893 	    *err = sock_errno();
9894 	    FREE(copy_desc);
9895 	    return NULL;
9896 	}
9897     }
9898 
9899     /* Some flags must be inherited at this point */
9900     copy_desc->inet.mode     = desc->inet.mode;
9901     copy_desc->inet.exitf    = desc->inet.exitf;
9902     copy_desc->inet.deliver  = desc->inet.deliver;
9903     copy_desc->inet.htype    = desc->inet.htype;
9904     copy_desc->inet.psize    = desc->inet.psize;
9905     copy_desc->inet.stype    = desc->inet.stype;
9906     copy_desc->inet.sfamily  = desc->inet.sfamily;
9907     copy_desc->inet.hsz      = desc->inet.hsz;
9908     copy_desc->inet.bufsz    = desc->inet.bufsz;
9909     copy_desc->high          = desc->high;
9910     copy_desc->low           = desc->low;
9911     copy_desc->send_timeout  = desc->send_timeout;
9912     copy_desc->send_timeout_close = desc->send_timeout_close;
9913 
9914     copy_desc->tcp_add_flags = desc->tcp_add_flags
9915         & (TCP_ADDF_SHOW_ECONNRESET | TCP_ADDF_LINGER_ZERO);
9916 
9917     /* The new port will be linked and connected to the original caller */
9918     port = driver_create_port(port, owner, "tcp_inet", (ErlDrvData) copy_desc);
9919     if ((long)port == -1) {
9920 	*err = INET_ERRNO_SYSTEM_LIMIT;
9921 	FREE(copy_desc);
9922 	return NULL;
9923     }
9924 
9925     /* Read busy msgq limits of parent */
9926     q_low = q_high = ERL_DRV_BUSY_MSGQ_READ_ONLY;
9927     erl_drv_busy_msgq_limits(desc->inet.port, &q_low, &q_high);
9928     /* Write same busy msgq limits to child */
9929     erl_drv_busy_msgq_limits(port, &q_low, &q_high);
9930 
9931     copy_desc->inet.port = port;
9932     copy_desc->inet.dport = driver_mk_port(port);
9933 
9934     *err = 0;
9935     return copy_desc;
9936 }
9937 
9938 /*
9939 ** Check Special cases:
9940 ** 1. we are a listener doing nb accept -> report error on accept !
9941 ** 2. we are doing accept -> restore listener state
9942 */
tcp_close_check(tcp_descriptor * desc)9943 static void tcp_close_check(tcp_descriptor* desc)
9944 {
9945     /* XXX:PaN - multiple clients to handle! */
9946     if (desc->inet.state == INET_STATE_ACCEPTING) {
9947 	inet_async_op *this_op = desc->inet.opt;
9948 	sock_select(INETP(desc), FD_ACCEPT, 0);
9949 	desc->inet.state = INET_STATE_LISTENING;
9950 	if (this_op != NULL) {
9951 	    driver_demonitor_process(desc->inet.port, &(this_op->monitor));
9952 	}
9953 	async_error_am(INETP(desc), am_closed);
9954     }
9955     else if (desc->inet.state == INET_STATE_MULTI_ACCEPTING) {
9956 	int id,req;
9957 	ErlDrvTermData caller;
9958 	ErlDrvMonitor monitor;
9959 
9960 	sock_select(INETP(desc), FD_ACCEPT, 0);
9961 	desc->inet.state = INET_STATE_LISTENING;
9962 	while (deq_multi_op(desc,&id,&req,&caller,NULL,&monitor) == 0) {
9963 	    driver_demonitor_process(desc->inet.port, &monitor);
9964 	    send_async_error(desc->inet.dport, id, caller, am_closed);
9965 	}
9966     }
9967     else if (desc->inet.state == INET_STATE_CONNECTING) {
9968 	async_error_am(INETP(desc), am_closed);
9969     }
9970     else if (desc->inet.state == INET_STATE_CONNECTED) {
9971 	async_error_am_all(INETP(desc), am_closed);
9972     }
9973     clean_multi_timers(desc, desc->inet.port);
9974 }
9975 
9976 /*
9977 ** Cleanup & Free
9978 */
tcp_inet_stop(ErlDrvData e)9979 static void tcp_inet_stop(ErlDrvData e)
9980 {
9981     tcp_descriptor* desc = (tcp_descriptor*)e;
9982     DEBUGF(("tcp_inet_stop(%ld) {s=%d\r\n",
9983 	    (long)desc->inet.port, desc->inet.s));
9984 
9985     tcp_close_check(desc);
9986     tcp_clear_input(desc);
9987 
9988 #ifdef HAVE_SENDFILE
9989     if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
9990         desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
9991         close(desc->sendfile.dup_file_fd);
9992         DEBUGF(("tcp_inet_stop(%p): SENDFILE dup closed %d\r\n",
9993                 desc->inet.port, desc->sendfile.dup_file_fd));
9994     }
9995 #endif
9996 
9997     DEBUGF(("tcp_inet_stop(%ld) }\r\n", (long)desc->inet.port));
9998     inet_stop(INETP(desc));
9999 }
10000 
10001 /* Closes a tcp descriptor without leaving things hanging; the VM keeps trying
10002  * to flush IO queues as long as it contains anything even after the port has
10003  * been closed from the erlang side, which is desired behavior (Think escripts
10004  * writing to files) but pretty hopeless if the underlying fd has been set to
10005  * INVALID_SOCKET through desc_close.
10006  *
10007  * This function should be used in place of desc_close/erl_inet_close in all
10008  * TCP-related operations. Note that this only closes the desc cleanly; it
10009  * will be freed through tcp_inet_stop later on. */
tcp_desc_close(tcp_descriptor * desc)10010 static void tcp_desc_close(tcp_descriptor* desc)
10011 {
10012 
10013     tcp_clear_input(desc);
10014     tcp_clear_output(desc);
10015 
10016     erl_inet_close(INETP(desc));
10017 }
10018 
tcp_inet_recv_timeout(ErlDrvData e,ErlDrvTermData dummy)10019 static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy)
10020 {
10021     tcp_descriptor* desc = (tcp_descriptor*)e;
10022     ASSERT(!desc->inet.active);
10023     sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
10024     desc->i_remain = 0;
10025     async_error_am(INETP(desc), am_timeout);
10026 }
10027 
10028 /* TCP requests from Erlang */
tcp_inet_ctl(ErlDrvData e,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)10029 static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
10030 				 char* buf, ErlDrvSizeT len,
10031 				 char** rbuf, ErlDrvSizeT rsize)
10032 {
10033     tcp_descriptor* desc = (tcp_descriptor*)e;
10034 
10035     cmd -= ERTS_INET_DRV_CONTROL_MAGIC_NUMBER;
10036     switch(cmd) {
10037     case INET_REQ_OPEN: { /* open socket and return internal index */
10038 	int domain;
10039 	DEBUGF(("tcp_inet_ctl(%ld): OPEN\r\n", (long)desc->inet.port));
10040 	if (len != 2) return ctl_error(EINVAL, rbuf, rsize);
10041 	switch(buf[0]) {
10042 	case INET_AF_INET:
10043 	    domain = AF_INET;
10044 	    break;
10045 #if defined(HAVE_IN6) && defined(AF_INET6)
10046 	case INET_AF_INET6:
10047 	    domain = AF_INET6;
10048 	    break;
10049 #endif
10050 #ifdef HAVE_SYS_UN_H
10051 	case INET_AF_LOCAL:
10052 	    domain = AF_UNIX;
10053 	    break;
10054 #endif
10055 	default:
10056 	    return ctl_xerror(str_eafnosupport, rbuf, rsize);
10057 	}
10058 	if (buf[1] != INET_TYPE_STREAM) return ctl_error(EINVAL, rbuf, rsize);
10059 	return inet_ctl_open(INETP(desc), domain, SOCK_STREAM, rbuf, rsize);
10060 	break;
10061     }
10062 
10063     case INET_REQ_FDOPEN: {  /* pass in an open (and optionally bound) socket */
10064 	int domain;
10065         int bound;
10066 	DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port));
10067 	if (len != 6 && len != 10) return ctl_error(EINVAL, rbuf, rsize);
10068 	switch(buf[0]) {
10069 	case INET_AF_INET:
10070 	    domain = AF_INET;
10071 	    break;
10072 #if defined(HAVE_IN6) && defined(AF_INET6)
10073 	case INET_AF_INET6:
10074 	    domain = AF_INET6;
10075 	    break;
10076 #endif
10077 #ifdef HAVE_SYS_UN_H
10078 	case INET_AF_LOCAL:
10079 	    domain = AF_UNIX;
10080 	    break;
10081 #endif
10082 	default:
10083 	    return ctl_xerror(str_eafnosupport, rbuf, rsize);
10084 	}
10085 	if (buf[1] != INET_TYPE_STREAM) return ctl_error(EINVAL, rbuf, rsize);
10086 
10087         if (len == 6) bound = 1;
10088         else bound = get_int32(buf+2+4);
10089 
10090 	return inet_ctl_fdopen(INETP(desc), domain, SOCK_STREAM,
10091                                (SOCKET) get_int32(buf+2),
10092                                bound, rbuf, rsize);
10093 	break;
10094     }
10095 
10096     case INET_REQ_LISTEN: { /* argument backlog */
10097 
10098 	int backlog;
10099 	DEBUGF(("tcp_inet_ctl(%ld): LISTEN\r\n", (long)desc->inet.port));
10100 	if (desc->inet.state == INET_STATE_CLOSED)
10101 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
10102 	if (!IS_OPEN(INETP(desc)))
10103 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
10104 	if (len != 2)
10105 	    return ctl_error(EINVAL, rbuf, rsize);
10106 	backlog = get_int16(buf);
10107 	if (IS_SOCKET_ERROR(sock_listen(desc->inet.s, backlog)))
10108 	    return ctl_error(sock_errno(), rbuf, rsize);
10109 	desc->inet.state = INET_STATE_LISTENING;
10110 	return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
10111     }
10112 
10113 
10114     case INET_REQ_CONNECT: {   /* do async connect */
10115 	int code;
10116 	char tbuf[2], *xerror;
10117 	unsigned timeout;
10118 
10119 	DEBUGF(("tcp_inet_ctl(%ld): CONNECT\r\n", (long)desc->inet.port));
10120 	/* INPUT: Timeout(4), Port(2), Address(N) */
10121 
10122 	if (!IS_OPEN(INETP(desc)))
10123 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
10124 	if (IS_CONNECTED(INETP(desc)))
10125 	    return ctl_error(EISCONN, rbuf, rsize);
10126 	if (IS_CONNECTING(INETP(desc)))
10127 	    return ctl_error(EINVAL, rbuf, rsize);
10128 	if (len < 6)
10129 	    return ctl_error(EINVAL, rbuf, rsize);
10130 	timeout = get_int32(buf);
10131 	buf += 4;
10132 	len -= 4;
10133 	if ((xerror = inet_set_faddress
10134 	     (desc->inet.sfamily, &desc->inet.remote, &buf, &len)) != NULL)
10135 	    return ctl_xerror(xerror, rbuf, rsize);
10136 
10137 	code = sock_connect(desc->inet.s,
10138 			    (struct sockaddr*) &desc->inet.remote, len);
10139 	if (IS_SOCKET_ERROR(code) &&
10140 		((sock_errno() == ERRNO_BLOCK) ||  /* Winsock2 */
10141 		 (sock_errno() == EINPROGRESS))) {	/* Unix & OSE!! */
10142           sock_select(INETP(desc), FD_CONNECT, 1);
10143 	    desc->inet.state = INET_STATE_CONNECTING;
10144 	    if (timeout != INET_INFINITY)
10145 		driver_set_timer(desc->inet.port, timeout);
10146 	    enq_async(INETP(desc), tbuf, INET_REQ_CONNECT);
10147 	}
10148 	else if (code == 0) { /* ok we are connected */
10149 	    desc->inet.state = INET_STATE_CONNECTED;
10150 	    if (desc->inet.active)
10151 		sock_select(INETP(desc), (FD_READ|FD_CLOSE), 1);
10152 	    enq_async(INETP(desc), tbuf, INET_REQ_CONNECT);
10153 	    async_ok(INETP(desc));
10154 	}
10155 	else {
10156 	    return ctl_error(sock_errno(), rbuf, rsize);
10157 	}
10158 	return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
10159     }
10160 
10161     case INET_REQ_ACCEPT: {  /* do async accept */
10162 	char tbuf[2];
10163 	unsigned timeout;
10164 	inet_address remote;
10165 	unsigned int n;
10166 	SOCKET s;
10167 
10168 	DEBUGF(("tcp_inet_ctl(%ld): ACCEPT\r\n", (long)desc->inet.port));
10169 	/* INPUT: Timeout(4) */
10170 
10171 	if ((desc->inet.state != INET_STATE_LISTENING && desc->inet.state != INET_STATE_ACCEPTING &&
10172 	     desc->inet.state != INET_STATE_MULTI_ACCEPTING) || len != 4) {
10173 	    return ctl_error(EINVAL, rbuf, rsize);
10174 	}
10175 
10176 	timeout = get_int32(buf);
10177 
10178 	if (desc->inet.state == INET_STATE_ACCEPTING) {
10179 	    unsigned long time_left = 0;
10180 	    int oid = 0;
10181 	    ErlDrvTermData ocaller = ERL_DRV_NIL;
10182 	    int oreq = 0;
10183 	    unsigned otimeout = 0;
10184 	    ErlDrvTermData caller = driver_caller(desc->inet.port);
10185 	    MultiTimerData *mtd = NULL,*omtd = NULL;
10186 	    ErlDrvMonitor monitor, omonitor;
10187 
10188 
10189 	    if (driver_monitor_process(desc->inet.port, caller ,&monitor) != 0) {
10190 		return ctl_xerror("noproc", rbuf, rsize);
10191 	    }
10192 	    deq_async_w_tmo(INETP(desc),&oid,&ocaller,&oreq,&otimeout,&omonitor);
10193 	    if (otimeout != INET_INFINITY) {
10194 		driver_read_timer(desc->inet.port, &time_left);
10195 		driver_cancel_timer(desc->inet.port);
10196 		if (time_left <= 0) {
10197 		    time_left = 1;
10198 		}
10199 		omtd = add_multi_timer(desc, desc->inet.port, ocaller,
10200 				       time_left, &tcp_inet_multi_timeout);
10201 	    }
10202 	    enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor);
10203 	    if (timeout != INET_INFINITY) {
10204 		mtd = add_multi_timer(desc, desc->inet.port, caller,
10205 				      timeout, &tcp_inet_multi_timeout);
10206 	    }
10207 	    enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
10208 	    desc->inet.state = INET_STATE_MULTI_ACCEPTING;
10209 	    return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
10210 	} else if (desc->inet.state == INET_STATE_MULTI_ACCEPTING) {
10211 	    ErlDrvTermData caller = driver_caller(desc->inet.port);
10212 	    MultiTimerData *mtd = NULL;
10213 	    ErlDrvMonitor monitor;
10214 
10215 	    if (driver_monitor_process(desc->inet.port, caller ,&monitor) != 0) {
10216 		return ctl_xerror("noproc", rbuf, rsize);
10217 	    }
10218 	    if (timeout != INET_INFINITY) {
10219 		mtd = add_multi_timer(desc, desc->inet.port, caller,
10220 				      timeout, &tcp_inet_multi_timeout);
10221 	    }
10222 	    enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
10223 	    return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
10224  	} else {
10225 	    n = sizeof(desc->inet.remote);
10226 	    sys_memzero((char *) &remote, n);
10227 	    s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &n);
10228 	    if (s == INVALID_SOCKET) {
10229 		if (sock_errno() == ERRNO_BLOCK) {
10230 		    ErlDrvMonitor monitor;
10231 		    if (driver_monitor_process(desc->inet.port, driver_caller(desc->inet.port),
10232 					       &monitor) != 0) {
10233 			return ctl_xerror("noproc", rbuf, rsize);
10234 		    }
10235 		    enq_async_w_tmo(INETP(desc), tbuf, INET_REQ_ACCEPT, timeout, &monitor);
10236 		    desc->inet.state = INET_STATE_ACCEPTING;
10237 		    sock_select(INETP(desc),FD_ACCEPT,1);
10238 		    if (timeout != INET_INFINITY) {
10239 			driver_set_timer(desc->inet.port, timeout);
10240 		    }
10241 		} else {
10242 		    return ctl_error(sock_errno(), rbuf, rsize);
10243 		}
10244 	    } else {
10245 		ErlDrvTermData caller = driver_caller(desc->inet.port);
10246 		tcp_descriptor* accept_desc;
10247 		int err;
10248 
10249 		if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) {
10250 		    sock_close(s);
10251 		    return ctl_error(err, rbuf, rsize);
10252 		}
10253 		/* FIXME: may MUST lock access_port
10254 		 * 1 - Port is accessible via the erlang:ports()
10255 		 * 2 - Port is accessible via callers process_info(links)
10256 		 */
10257 		accept_desc->inet.remote = remote;
10258 		SET_NONBLOCKING(accept_desc->inet.s);
10259 #ifdef __WIN32__
10260 		driver_select(accept_desc->inet.port, accept_desc->inet.event,
10261 			      ERL_DRV_READ, 1);
10262 #endif
10263 		accept_desc->inet.state = INET_STATE_CONNECTED;
10264 		enq_async(INETP(desc), tbuf, INET_REQ_ACCEPT);
10265 		async_ok_port(INETP(desc), accept_desc->inet.dport);
10266 	    }
10267 	    return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
10268 	}
10269     }
10270     case INET_REQ_CLOSE:
10271 	DEBUGF(("tcp_inet_ctl(%ld): CLOSE\r\n", (long)desc->inet.port));
10272 	tcp_close_check(desc);
10273 	tcp_desc_close(desc);
10274 	return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
10275 
10276 
10277     case TCP_REQ_RECV: {
10278 	unsigned timeout;
10279 	char tbuf[2];
10280 	int n;
10281 
10282 	DEBUGF(("tcp_inet_ctl(%ld): RECV (s=%d)\r\n",
10283 		(long)desc->inet.port, desc->inet.s));
10284 	/* INPUT: Timeout(4),  Length(4) */
10285 	if (!IS_CONNECTED(INETP(desc))) {
10286 	    if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_RECV) {
10287 		desc->tcp_add_flags &= ~(TCP_ADDF_DELAYED_CLOSE_RECV|
10288 					 TCP_ADDF_DELAYED_CLOSE_SEND);
10289 		if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) {
10290 		    desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET;
10291 		    return ctl_reply(INET_REP_ERROR, "econnreset", 10, rbuf, rsize);
10292 		} else
10293 		    return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize);
10294 	    }
10295 	    return ctl_error(ENOTCONN, rbuf, rsize);
10296 	}
10297 	if (desc->inet.active || (len != 8))
10298 	    return ctl_error(EINVAL, rbuf, rsize);
10299 	timeout = get_int32(buf);
10300 	buf += 4;
10301 	n = get_int32(buf);
10302 	DEBUGF(("tcp_inet_ctl(%ld) timeout = %d, n = %d\r\n",
10303 		(long)desc->inet.port,timeout,n));
10304 	if ((desc->inet.htype != TCP_PB_RAW) && (n != 0))
10305 	    return ctl_error(EINVAL, rbuf, rsize);
10306 	if (n > TCP_MAX_PACKET_SIZE)
10307 	    return ctl_error(ENOMEM, rbuf, rsize);
10308 	if (enq_async(INETP(desc), tbuf, TCP_REQ_RECV) < 0)
10309 	    return ctl_error(EALREADY, rbuf, rsize);
10310 
10311 	if (INET_IGNORED(INETP(desc)) || tcp_recv(desc, n) == 0) {
10312 	    if (timeout == 0)
10313 		async_error_am(INETP(desc), am_timeout);
10314 	    else {
10315 		if (timeout != INET_INFINITY)
10316                     add_multi_timer(desc, INETP(desc)->port, 0,
10317                                     timeout, &tcp_inet_recv_timeout);
10318 		if (!INET_IGNORED(INETP(desc)))
10319 		    sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
10320 		else
10321 		  INETP(desc)->flags |= INET_IGNORE_READ;
10322 	    }
10323 	}
10324 	return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
10325     }
10326 
10327     case TCP_REQ_UNRECV: {
10328 	DEBUGF(("tcp_inet_ctl(%ld): UNRECV\r\n", (long)desc->inet.port));
10329 	if (!IS_CONNECTED(INETP(desc)))
10330    	    return ctl_error(ENOTCONN, rbuf, rsize);
10331 	tcp_push_buffer(desc, buf, len);
10332 	if (desc->inet.active)
10333 	    tcp_deliver(desc, 0);
10334 	return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
10335     }
10336     case TCP_REQ_SHUTDOWN: {
10337 	int how;
10338 	DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port));
10339 	if (!IS_CONNECTED(INETP(desc))) {
10340 	    return ctl_error(ENOTCONN, rbuf, rsize);
10341 	}
10342 	if (len != 1) {
10343 	    return ctl_error(EINVAL, rbuf, rsize);
10344 	}
10345 	how = buf[0];
10346 	if (how != TCP_SHUT_RD && driver_sizeq(desc->inet.port) > 0) {
10347 	    if (how == TCP_SHUT_WR) {
10348 		desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_WR;
10349 	    } else if (how == TCP_SHUT_RDWR) {
10350 		desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_RDWR;
10351 	    }
10352 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
10353 	}
10354 	if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) {
10355 	    if (how != TCP_SHUT_RD)
10356 		desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE;
10357 	    return ctl_error(sock_errno(), rbuf, rsize);
10358 	} else {
10359 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
10360 	}
10361     }
10362 
10363     case TCP_REQ_SENDFILE: {
10364 #ifdef HAVE_SENDFILE
10365         const ErlDrvSizeT required_len =
10366             sizeof(desc->sendfile.dup_file_fd) +
10367             sizeof(Uint64) * 2;
10368 
10369         int raw_file_fd;
10370 
10371         DEBUGF(("tcp_inet_ctl(%ld): SENDFILE\r\n", (long)desc->inet.port));
10372 
10373         if (len != required_len) {
10374             return ctl_error(EINVAL, rbuf, rsize);
10375         } else if (!IS_CONNECTED(INETP(desc))) {
10376             return ctl_error(ENOTCONN, rbuf, rsize);
10377         } else if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
10378             /* This should not happen as prim_inet.erl makes
10379                sure that only the controlling process can
10380                use the sendfile operation. But we add this
10381                check here anyways just in case that prim_inet
10382                is changed... */
10383             return ctl_error(EINVAL, rbuf, rsize);
10384         }
10385 
10386         sys_memcpy(&raw_file_fd, buf, sizeof(raw_file_fd));
10387         buf += sizeof(raw_file_fd);
10388 
10389         desc->sendfile.dup_file_fd = dup(raw_file_fd);
10390 
10391         DEBUGF(("tcp_inet_ctl(%p): SENDFILE dup %d\r\n",
10392                 desc->inet.port, desc->sendfile.dup_file_fd));
10393 
10394         if(desc->sendfile.dup_file_fd == -1) {
10395             return ctl_error(errno, rbuf, rsize);
10396         }
10397 
10398         desc->sendfile.offset = get_int64(buf);
10399         buf += sizeof(Uint64);
10400 
10401         desc->sendfile.length = get_int64(buf);
10402         buf += sizeof(Uint64);
10403 
10404         ASSERT(desc->sendfile.offset >= 0);
10405         ASSERT(desc->sendfile.length >= 0);
10406 
10407         desc->sendfile.ioq_skip = driver_sizeq(desc->inet.port);
10408         desc->sendfile.bytes_sent = 0;
10409 
10410         desc->inet.caller = driver_caller(desc->inet.port);
10411         desc->tcp_add_flags |= TCP_ADDF_SENDFILE;
10412 
10413         /* See if we can finish sending without selecting & rescheduling. */
10414         if (tcp_inet_sendfile(desc) == 0) {
10415             if(desc->sendfile.length > 0) {
10416                 sock_select(INETP(desc), FD_WRITE, 1);
10417             }
10418         }
10419         return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
10420 #else
10421         return ctl_error(ENOTSUP, rbuf, rsize);
10422 #endif
10423     }
10424 
10425     default:
10426 	DEBUGF(("tcp_inet_ctl(%ld): %u\r\n", (long)desc->inet.port, cmd));
10427 	return inet_ctl(INETP(desc), cmd, buf, len, rbuf, rsize);
10428     }
10429 
10430 }
10431 
tcp_inet_send_timeout(ErlDrvData e,ErlDrvTermData dummy)10432 static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy)
10433 {
10434     tcp_descriptor* desc = (tcp_descriptor*)e;
10435     ASSERT(IS_BUSY(INETP(desc)));
10436     ASSERT(desc->busy_on_send);
10437     desc->inet.caller = desc->inet.busy_caller;
10438     desc->inet.state &= ~INET_F_BUSY;
10439     desc->busy_on_send = 0;
10440     set_busy_port(desc->inet.port, 0);
10441     inet_reply_error_am(INETP(desc), am_timeout);
10442     if (desc->send_timeout_close) {
10443         tcp_desc_close(desc);
10444     }
10445 }
10446 
10447 /*
10448 ** tcp_inet_timeout:
10449 ** called when timer expire:
10450 ** TCP socket may be:
10451 **
10452 ** a)  receiving   -- send timeout
10453 ** b)  connecting  -- close socket
10454 ** c)  accepting   -- reset listener
10455 **
10456 */
10457 
tcp_inet_timeout(ErlDrvData e)10458 static void tcp_inet_timeout(ErlDrvData e)
10459 {
10460     tcp_descriptor* desc = (tcp_descriptor*)e;
10461     int state = desc->inet.state;
10462 
10463     DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n",
10464 	    (long)desc->inet.port, desc->inet.s));
10465     if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */
10466 	fire_multi_timers(desc, desc->inet.port, e);
10467     } else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) {
10468         fire_multi_timers(desc, desc->inet.port, e);
10469     }
10470     else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) {
10471 	/* assume connect timeout */
10472 	/* close the socket since it's not usable (see man pages) */
10473 	tcp_desc_close(desc);
10474 	async_error_am(INETP(desc), am_timeout);
10475     }
10476     else if ((state & INET_STATE_ACCEPTING) == INET_STATE_ACCEPTING) {
10477 	inet_async_op *this_op = desc->inet.opt;
10478 	/* timer is set on accept */
10479 	sock_select(INETP(desc), FD_ACCEPT, 0);
10480 	if (this_op != NULL) {
10481 	    driver_demonitor_process(desc->inet.port, &(this_op->monitor));
10482 	}
10483 	desc->inet.state = INET_STATE_LISTENING;
10484 	async_error_am(INETP(desc), am_timeout);
10485     }
10486     DEBUGF(("tcp_inet_timeout(%ld) }\r\n", (long)desc->inet.port));
10487 }
10488 
tcp_inet_multi_timeout(ErlDrvData e,ErlDrvTermData caller)10489 static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller)
10490 {
10491     tcp_descriptor* desc = (tcp_descriptor*)e;
10492     int id,req;
10493     ErlDrvMonitor monitor;
10494 
10495     if (remove_multi_op(desc, &id, &req, caller, NULL, &monitor) != 0) {
10496 	return;
10497     }
10498     driver_demonitor_process(desc->inet.port, &monitor);
10499     if (desc->multi_first == NULL) {
10500 	sock_select(INETP(desc),FD_ACCEPT,0);
10501 	desc->inet.state = INET_STATE_LISTENING; /* restore state */
10502     }
10503     send_async_error(desc->inet.dport, id, caller, am_timeout);
10504 }
10505 
10506 
10507 
10508 /*
10509 ** command:
10510 **   output on a socket only !
10511 **   a reply code will be sent to connected (caller later)
10512 **   {inet_reply, S, Status}
10513 ** NOTE! normal sockets use the the tcp_inet_commandv
10514 ** but distribution still uses the tcp_inet_command!!
10515 */
10516 
tcp_inet_command(ErlDrvData e,char * buf,ErlDrvSizeT len)10517 static void tcp_inet_command(ErlDrvData e, char *buf, ErlDrvSizeT len)
10518 {
10519     tcp_descriptor* desc = (tcp_descriptor*)e;
10520     desc->inet.caller = driver_caller(desc->inet.port);
10521 
10522     DEBUGF(("tcp_inet_command(%ld) {s=%d\r\n",
10523 	    (long)desc->inet.port, desc->inet.s));
10524     if (!IS_CONNECTED(INETP(desc)))
10525 	inet_reply_error(INETP(desc), ENOTCONN);
10526     else if (tcp_send(desc, buf, len) == 0)
10527 	inet_reply_ok(INETP(desc));
10528     DEBUGF(("tcp_inet_command(%ld) }\r\n", (long)desc->inet.port));
10529 }
10530 
tcp_inet_commandv(ErlDrvData e,ErlIOVec * ev)10531 static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev)
10532 {
10533     tcp_descriptor* desc = (tcp_descriptor*)e;
10534     desc->inet.caller = driver_caller(desc->inet.port);
10535 
10536     DEBUGF(("tcp_inet_commanv(%ld) {s=%d\r\n",
10537 	    (long)desc->inet.port, desc->inet.s));
10538     if (!IS_CONNECTED(INETP(desc))) {
10539 	if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_SEND) {
10540 	    desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_CLOSE_SEND;
10541 	    if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) {
10542 		/* Don't clear flag. Leave it enabled for the next receive
10543 		 * operation.
10544 		 */
10545 		inet_reply_error(INETP(desc), ECONNRESET);
10546 	    } else
10547 		inet_reply_error_am(INETP(desc), am_closed);
10548 	}
10549 	else
10550 	    inet_reply_error(INETP(desc), ENOTCONN);
10551     }
10552     else if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN)
10553 	tcp_shutdown_error(desc, EPIPE);
10554     else if (tcp_sendv(desc, ev) == 0)
10555 	inet_reply_ok(INETP(desc));
10556     DEBUGF(("tcp_inet_commandv(%ld) }\r\n", (long)desc->inet.port));
10557 }
10558 
tcp_inet_flush(ErlDrvData e)10559 static void tcp_inet_flush(ErlDrvData e)
10560 {
10561     tcp_descriptor* desc = (tcp_descriptor*)e;
10562     int discard_output;
10563 
10564     /* Discard send queue to avoid hanging port (OTP-7615) */
10565     discard_output = !(desc->inet.event_mask & FD_WRITE);
10566 
10567     discard_output |= desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO;
10568 
10569 #ifdef HAVE_SENDFILE
10570     /* The old file driver aborted when it was stopped during sendfile, so
10571      * we'll clear the flag and discard all output. It is the job of
10572      * tcp_inet_stop to close the extra sendfile fd. */
10573     if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
10574         discard_output = 1;
10575     }
10576 #endif
10577 
10578     if (discard_output) {
10579         tcp_clear_output(desc);
10580     }
10581 }
10582 
tcp_inet_process_exit(ErlDrvData e,ErlDrvMonitor * monitorp)10583 static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
10584 {
10585     tcp_descriptor* desc = (tcp_descriptor*)e;
10586     ErlDrvTermData who = driver_get_monitored_process(desc->inet.port,monitorp);
10587     int state = desc->inet.state;
10588 
10589     if ((state & INET_STATE_MULTI_ACCEPTING) == INET_STATE_MULTI_ACCEPTING) {
10590 	int id,req;
10591 	MultiTimerData *timeout;
10592 	if (remove_multi_op(desc, &id, &req, who, &timeout, NULL) != 0) {
10593 	    return;
10594 	}
10595 	if (timeout != NULL) {
10596 	    remove_multi_timer(desc, desc->inet.port, timeout);
10597 	}
10598 	if (desc->multi_first == NULL) {
10599 	    sock_select(INETP(desc),FD_ACCEPT,0);
10600 	    desc->inet.state = INET_STATE_LISTENING; /* restore state */
10601 	}
10602     } else if ((state & INET_STATE_ACCEPTING) == INET_STATE_ACCEPTING) {
10603 	int did,drid;
10604 	ErlDrvTermData dcaller;
10605 	deq_async(INETP(desc), &did, &dcaller, &drid);
10606 	driver_cancel_timer(desc->inet.port);
10607 	sock_select(INETP(desc),FD_ACCEPT,0);
10608 	desc->inet.state = INET_STATE_LISTENING; /* restore state */
10609     }
10610 }
10611 
inet_stop_select(ErlDrvEvent event,void * _)10612 static void inet_stop_select(ErlDrvEvent event, void* _)
10613 {
10614 #ifdef __WIN32__
10615     WSACloseEvent((HANDLE)event);
10616 #else
10617     sock_close((SOCKET)(long)event);
10618 #endif
10619 }
10620 
10621 /* The peer socket has closed, cleanup and send event */
tcp_recv_closed(tcp_descriptor * desc)10622 static int tcp_recv_closed(tcp_descriptor* desc)
10623 {
10624 #ifdef DEBUG
10625     long port = (long) desc->inet.port; /* Used after driver_exit() */
10626 #endif
10627     int blocking_send = 0;
10628     DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n",
10629 	    port, desc->inet.s, __FILE__, __LINE__));
10630     if (IS_BUSY(INETP(desc))) {
10631 	/* A send is blocked */
10632 	desc->inet.caller = desc->inet.busy_caller;
10633 	tcp_clear_output(desc);
10634 	if (desc->busy_on_send) {
10635             cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
10636 	    desc->busy_on_send = 0;
10637 	    DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port));
10638 	}
10639 	desc->inet.state &= ~INET_F_BUSY;
10640 	set_busy_port(desc->inet.port, 0);
10641 	inet_reply_error_am(INETP(desc), am_closed);
10642 	DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port));
10643         blocking_send = 1;
10644     }
10645 #ifdef HAVE_SENDFILE
10646     if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
10647         tcp_sendfile_aborted(desc, ENOTCONN);
10648         blocking_send = 1;
10649     }
10650 #endif
10651     if (!blocking_send) {
10652         /* No blocking send op to reply to right now.
10653          * If next op is a send, make sure it returns {error,closed}
10654          * rather than {error,enotconn}.
10655          */
10656         desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
10657     }
10658 
10659     if (!desc->inet.active) {
10660         /* We must cancel any timer here ! */
10661         clean_multi_timers(desc, INETP(desc)->port);
10662 	/* passive mode do not terminate port ! */
10663 	tcp_clear_input(desc);
10664 	if (desc->inet.exitf) {
10665 	    tcp_desc_close(desc);
10666 	} else {
10667 	    desc_close_read(INETP(desc));
10668 	}
10669 	async_error_am_all(INETP(desc), am_closed);
10670 	/* next time EXBADSEQ will be delivered  */
10671 	DEBUGF(("tcp_recv_closed(%ld): passive reply all 'closed'\r\n", port));
10672     } else {
10673 	tcp_clear_input(desc);
10674 	tcp_closed_message(desc);
10675 	if (desc->inet.exitf) {
10676 	    driver_exit(desc->inet.port, 0);
10677 	} else {
10678 	    desc_close_read(INETP(desc));
10679 	}
10680 	DEBUGF(("tcp_recv_closed(%ld): active close\r\n", port));
10681     }
10682     DEBUGF(("tcp_recv_closed(%ld): done\r\n", port));
10683     return -1;
10684 }
10685 
10686 
10687 /* We have a read error determine the action */
tcp_recv_error(tcp_descriptor * desc,int err)10688 static int tcp_recv_error(tcp_descriptor* desc, int err)
10689 {
10690     if (err != ERRNO_BLOCK) {
10691 	if (IS_BUSY(INETP(desc))) {
10692 	    /* A send is blocked */
10693 	    desc->inet.caller = desc->inet.busy_caller;
10694 	    tcp_clear_output(desc);
10695 	    if (desc->busy_on_send) {
10696                 cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
10697 		desc->busy_on_send = 0;
10698 	    }
10699 	    desc->inet.state &= ~INET_F_BUSY;
10700 	    set_busy_port(desc->inet.port, 0);
10701 	    inet_reply_error_am(INETP(desc), am_closed);
10702 	}
10703 #ifdef HAVE_SENDFILE
10704         if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
10705             tcp_sendfile_aborted(desc, err);
10706         }
10707 #endif
10708 	if (!desc->inet.active) {
10709 	    /* We must cancel any timer here ! */
10710             clean_multi_timers(desc, INETP(desc)->port);
10711 	    tcp_clear_input(desc);
10712 	    if (desc->inet.exitf) {
10713 		tcp_desc_close(desc);
10714 	    } else {
10715 		desc_close_read(INETP(desc));
10716 	    }
10717 	    async_error_am_all(INETP(desc), error_atom(err));
10718 	} else {
10719 	    tcp_clear_input(desc);
10720 	    tcp_error_message(desc, err); /* first error */
10721 	    tcp_closed_message(desc);     /* then closed */
10722 	    if (desc->inet.exitf)
10723 		driver_exit(desc->inet.port, err);
10724 	    else
10725 		desc_close_read(INETP(desc));
10726 	}
10727 	return -1;
10728     }
10729     return 0;
10730 }
10731 
10732 
10733 
10734 /*
10735 ** Calculate number of bytes that remain to read before deliver
10736 ** Assume buf, ptr_start, ptr has been setup
10737 **
10738 ** return  > 0 if more to read
10739 **         = 0 if holding complete packet
10740 **         < 0 on error
10741 **
10742 ** if return value == 0 then *len will hold the length of the first packet
10743 **    return value > 0 then if *len == 0 then value means upperbound
10744 **                             *len > 0  then value means exact
10745 **
10746 */
tcp_remain(tcp_descriptor * desc,int * len)10747 static int tcp_remain(tcp_descriptor* desc, int* len)
10748 {
10749     char* ptr = desc->i_ptr_start;
10750     int nfill = (desc->i_ptr - desc->i_buf->orig_bytes); /* filled */
10751     int nsz   = desc->i_bufsz - nfill;                   /* remain */
10752     int n = desc->i_ptr - ptr;  /* number of bytes read */
10753     int tlen;
10754 
10755     tlen = packet_get_length(desc->inet.htype, ptr, n,
10756                              desc->inet.psize, desc->i_bufsz,
10757                              desc->inet.delimiter, &desc->http_state);
10758 
10759     DEBUGF(("tcp_remain(%ld): s=%d, n=%d, nfill=%d nsz=%d, tlen %d\r\n",
10760 	    (long)desc->inet.port, desc->inet.s, n, nfill, nsz, tlen));
10761 
10762     if (tlen > 0) {
10763         if (tlen <= n) { /* got a packet */
10764             *len = tlen;
10765             DEBUGF((" => nothing remain packet=%d\r\n", tlen));
10766             return 0;
10767         }
10768         else { /* need known more */
10769             if (tcp_expand_buffer(desc, tlen) < 0)
10770                 return -1;
10771             *len = tlen - n;
10772             DEBUGF((" => remain=%d\r\n", *len));
10773             return *len;
10774         }
10775     }
10776     else if (tlen == 0) { /* need unknown more */
10777         *len = 0;
10778         if (nsz == 0) {
10779             if (nfill == n) {
10780                 if (desc->inet.psize != 0 && desc->inet.psize > nfill) {
10781                     if (tcp_expand_buffer(desc, desc->inet.psize) < 0)
10782                         return -1;
10783                     return desc->inet.psize;
10784                 }
10785                 else
10786                     goto error;
10787             }
10788             DEBUGF((" => restart more=%d\r\n", nfill - n));
10789             return nfill - n;
10790         }
10791         else {
10792             DEBUGF((" => more=%d \r\n", nsz));
10793             return nsz;
10794         }
10795     }
10796 
10797 error:
10798     DEBUGF((" => packet error\r\n"));
10799     return -1;
10800 }
10801 
10802 /*
10803 ** Deliver all packets ready
10804 ** if len == 0 then check start with a check for ready packet
10805 */
tcp_deliver(tcp_descriptor * desc,int len)10806 static int tcp_deliver(tcp_descriptor* desc, int len)
10807 {
10808     int count = 0;
10809     int n;
10810 
10811     /* Poll for ready packet */
10812     if (len == 0) {
10813 	/* empty buffer or waiting for more input */
10814 	if ((desc->i_buf == NULL) || (desc->i_remain > 0))
10815 	    return 0;
10816 	if ((n = tcp_remain(desc, &len)) != 0) {
10817 	    if (n < 0) /* packet error */
10818 		return n;
10819 	    if (len > 0)  /* more data pending */
10820 		desc->i_remain = len;
10821 	    return 0;
10822 	}
10823     }
10824 
10825     while (len > 0) {
10826 	int code;
10827 
10828 	inet_input_count(INETP(desc), len);
10829 
10830 	/* deliver binary? */
10831 	if (len*4 >= desc->i_buf->orig_size*3) { /* >=75% */
10832 	    code = tcp_reply_binary_data(desc, desc->i_buf,
10833 					 (desc->i_ptr_start -
10834 					  desc->i_buf->orig_bytes),
10835 					 len);
10836 	    if (code < 0)
10837 		return code;
10838 
10839 	    /* something after? */
10840 	    if (desc->i_ptr_start + len == desc->i_ptr) { /* no */
10841 		tcp_clear_input(desc);
10842 	    }
10843 	    else { /* move trail to beginning of a new buffer */
10844 		ErlDrvBinary* bin = alloc_buffer(desc->i_bufsz);
10845 		char* ptr_end = desc->i_ptr_start + len;
10846 		int sz = desc->i_ptr - ptr_end;
10847 
10848 		memcpy(bin->orig_bytes, ptr_end, sz);
10849 		free_buffer(desc->i_buf);
10850 		desc->i_buf = bin;
10851 		desc->i_ptr_start = desc->i_buf->orig_bytes;
10852 		desc->i_ptr = desc->i_ptr_start + sz;
10853 		desc->i_remain = 0;
10854 	    }
10855 	}
10856 	else {
10857 	    code = tcp_reply_data(desc, desc->i_ptr_start, len);
10858 	    /* XXX The buffer gets thrown away on error  (code < 0)    */
10859 	    /* Windows needs workaround for this in tcp_inet_event...  */
10860 	    if (code < 0)
10861 		return code;
10862 	    desc->i_ptr_start += len;
10863 	    if (desc->i_ptr_start == desc->i_ptr)
10864 		tcp_clear_input(desc);
10865 	    else
10866 		desc->i_remain = 0;
10867 	}
10868 
10869 	count++;
10870 	len = 0;
10871 
10872 	if (!desc->inet.active) {
10873             cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout);
10874 	    sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
10875 	    if (desc->i_buf != NULL)
10876 		tcp_restart_input(desc);
10877 	}
10878 	else if (desc->i_buf != NULL) {
10879 	    if ((n = tcp_remain(desc, &len)) != 0) {
10880 		if (n < 0) /* packet error */
10881 		    return n;
10882 		tcp_restart_input(desc);
10883 		if (len > 0)
10884 		    desc->i_remain = len;
10885 		len = 0;
10886 	    }
10887 	}
10888     }
10889     return count;
10890 }
10891 
10892 
tcp_recv(tcp_descriptor * desc,int request_len)10893 static int tcp_recv(tcp_descriptor* desc, int request_len)
10894 {
10895     int n;
10896     int len;
10897     int nread;
10898 
10899     if (desc->i_buf == NULL) {  /* allocate a read buffer */
10900 	int sz = (request_len > 0) ? request_len : desc->inet.bufsz;
10901 
10902 	if ((desc->i_buf = alloc_buffer(sz)) == NULL)
10903 	    return -1;
10904 	/* XXX: changing bufsz during recv SHOULD/MAY? affect
10905 	 * ongoing operation but is not now
10906 	 */
10907 	desc->i_bufsz = sz; /* use i_bufsz not i_buf->orig_size ! */
10908 	desc->i_ptr_start = desc->i_buf->orig_bytes;
10909 	desc->i_ptr = desc->i_ptr_start;
10910 	nread = sz;
10911 	if (request_len > 0)
10912 	    desc->i_remain = request_len;
10913 	else
10914 	    desc->i_remain = 0;
10915     }
10916     else if (request_len > 0) { /* we have a data in buffer and a request */
10917 	n = desc->i_ptr - desc->i_ptr_start;
10918 	if (n >= request_len)
10919 	    return tcp_deliver(desc, request_len);
10920 	else if (tcp_expand_buffer(desc, request_len) < 0)
10921 	    return tcp_recv_error(desc, ENOMEM);
10922 	else
10923 	    desc->i_remain = nread = request_len - n;
10924     }
10925     else if (desc->i_remain == 0) {  /* poll remain from buffer data */
10926 	if ((nread = tcp_remain(desc, &len)) < 0)
10927 	    return tcp_recv_error(desc, EMSGSIZE);
10928 	else if (nread == 0)
10929 	    return tcp_deliver(desc, len);
10930 	else if (len > 0)
10931 	    desc->i_remain = len;  /* set remain */
10932     }
10933     else  /* remain already set use it */
10934 	nread = desc->i_remain;
10935 
10936     DEBUGF(("tcp_recv(%ld): s=%d about to read %d bytes...\r\n",
10937 	    (long)desc->inet.port, desc->inet.s, nread));
10938 
10939     n = sock_recv(desc->inet.s, desc->i_ptr, nread, 0);
10940 
10941     if (IS_SOCKET_ERROR(n)) {
10942 	int err = sock_errno();
10943 	if (err == ECONNRESET) {
10944 	    DEBUGF((" => detected close (connreset)\r\n"));
10945 	    if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET)
10946 		return tcp_recv_error(desc, err);
10947 	    else
10948 		return tcp_recv_closed(desc);
10949 	}
10950 	if (err == ERRNO_BLOCK) {
10951 	    DEBUGF((" => would block\r\n"));
10952 	    return 0;
10953 	}
10954 	else {
10955 	    DEBUGF((" => error: %d\r\n", err));
10956 	    return tcp_recv_error(desc, err);
10957 	}
10958     }
10959     else if (n == 0) {
10960 	DEBUGF(("  => detected close\r\n"));
10961 	return tcp_recv_closed(desc);
10962     }
10963 
10964     DEBUGF((" => got %d bytes\r\n", n));
10965     desc->i_ptr += n;
10966     if (desc->i_remain > 0) {
10967 	desc->i_remain -= n;
10968 	if (desc->i_remain == 0)
10969 	    return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start);
10970     }
10971     else {
10972         nread = tcp_remain(desc, &len);
10973 	if (nread < 0)
10974 	    return tcp_recv_error(desc, EMSGSIZE);
10975 	else if (nread == 0)
10976             return tcp_deliver(desc, len);
10977 	else if (len > 0)
10978 	    desc->i_remain = len;  /* set remain */
10979     }
10980     return 0;
10981 }
10982 
10983 
10984 #ifdef __WIN32__
10985 
10986 
winsock_event_select(inet_descriptor * desc,int flags,int on)10987 static int winsock_event_select(inet_descriptor *desc, int flags, int on)
10988 {
10989     int save_event_mask = desc->event_mask;
10990 
10991     desc->forced_events = 0;
10992     if (on)
10993 	desc->event_mask |= flags;
10994     else
10995 	desc->event_mask &= (~flags);
10996     DEBUGF(("port %d: winsock_event_select: "
10997 	    "flags=%02X, on=%d, event_mask=%02X\n",
10998 	    desc->port, flags, on, desc->event_mask));
10999     /* The RIGHT WAY (TM) to do this is to make sure:
11000        A) The cancelling of all network events is done with
11001           NULL as the event parameter (bug in NT's winsock),
11002        B) The actual event handle is reset so that it is only
11003           raised if one of the requested network events is active,
11004        C) Avoid race conditions by making sure that the event cannot be set
11005           while we are preparing to set the correct network event mask.
11006        The simplest way to do it is to turn off all events, reset the
11007        event handle and then, if event_mask != 0, turn on the appropriate
11008        events again. */
11009     if (WSAEventSelect(desc->s, NULL, 0) != 0) {
11010 	DEBUGF(("port %d: winsock_event_select: "
11011 		"WSAEventSelect returned error, code %d.\n",
11012 		sock_errno()));
11013 	desc->event_mask = save_event_mask;
11014 	return -1;
11015     }
11016     if (!ResetEvent(desc->event)) {
11017 	DEBUGF(("port %d: winsock_event_select: "
11018 		"ResetEvent returned error, code %d.\n",
11019 		GetLastError()));
11020 	desc->event_mask = 0;
11021 	return -1;
11022     }
11023     if (desc->event_mask != 0) {
11024 	if (WSAEventSelect(desc->s,
11025 			     desc->event,
11026 			     desc->event_mask) != 0) {
11027 	    DEBUGF(("port %d: winsock_event_select: "
11028 		    "WSAEventSelect returned error, code %d.\n",
11029 		    sock_errno()));
11030 	    desc->event_mask = 0;
11031 	    return -1;
11032 	}
11033 
11034 	/* Now, WSAEventSelect() is trigged only when the queue goes from
11035 	   full to empty or from empty to full; therefore we need an extra test
11036 	   to see whether it is writeable, readable or closed... */
11037 	if ((desc->event_mask & FD_WRITE)) {
11038 	    int do_force = 1;
11039 	    if (desc->send_would_block) {
11040 		TIMEVAL tmo = {0,0};
11041 		FD_SET fds;
11042 		int ret;
11043 
11044 		FD_ZERO(&fds);
11045 		FD_SET(desc->s,&fds);
11046 		do_force = (select(desc->s+1,0,&fds,0,&tmo) > 0);
11047 	    }
11048 	    if (do_force) {
11049 		SetEvent(desc->event);
11050 		desc->forced_events |= FD_WRITE;
11051 	    }
11052 	}
11053 	if ((desc->event_mask & (FD_READ|FD_CLOSE))) {
11054 	    int readable = 0;
11055 	    int closed = 0;
11056 	    TIMEVAL tmo = {0,0};
11057 	    FD_SET fds;
11058 	    int ret;
11059 	    unsigned long arg;
11060 
11061 	    FD_ZERO(&fds);
11062 	    FD_SET(desc->s,&fds);
11063 	    ret = select(desc->s+1,&fds,0,0,&tmo);
11064 	    if (ret > 0) {
11065 		++readable;
11066 		if (ioctlsocket(desc->s,FIONREAD,&arg) != 0) {
11067 		    ++closed;	/* Which gives a FD_CLOSE event */
11068 		} else {
11069 		    closed = (arg == 0);
11070 		}
11071 	    }
11072 	    if ((desc->event_mask & FD_READ) && readable && !closed) {
11073 		SetEvent(desc->event);
11074 		desc->forced_events |= FD_READ;
11075 	    }
11076 	    if ((desc->event_mask & FD_CLOSE) && closed) {
11077 		SetEvent(desc->event);
11078 		desc->forced_events |= FD_CLOSE;
11079 	    }
11080 	}
11081     }
11082     return 0;
11083 }
11084 
tcp_inet_event(ErlDrvData e,ErlDrvEvent event)11085 static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event)
11086 {
11087     tcp_descriptor* desc = (tcp_descriptor*)e;
11088     WSANETWORKEVENTS netEv;
11089     int err;
11090 
11091     DEBUGF(("tcp_inet_event(%ld) {s=%d\r\n",
11092 	    (long)desc->inet.port, desc->inet.s));
11093     if (WSAEnumNetworkEvents(desc->inet.s, desc->inet.event,
11094 					&netEv) != 0) {
11095 	DEBUGF((" => EnumNetworkEvents = %d\r\n", sock_errno() ));
11096 	goto error;
11097     }
11098 
11099     DEBUGF((" => event=%02X, mask=%02X\r\n",
11100 	    netEv.lNetworkEvents, desc->inet.event_mask));
11101 
11102     /* Add the forced events. */
11103 
11104     netEv.lNetworkEvents |= desc->inet.forced_events;
11105 
11106     /*
11107      * Calling WSAEventSelect() with a mask of 0 doesn't always turn off
11108      * all events.  To avoid acting on events we don't want, we mask
11109      * the events with mask for the events we really want.
11110      */
11111 
11112 #ifdef DEBUG
11113     if ((netEv.lNetworkEvents & ~(desc->inet.event_mask)) != 0) {
11114 	DEBUGF(("port %d:  ... unexpected event: %d\r\n",
11115 		desc->inet.port, netEv.lNetworkEvents & ~(desc->inet.event_mask)));
11116     }
11117 #endif
11118     netEv.lNetworkEvents &= desc->inet.event_mask;
11119 
11120     if (netEv.lNetworkEvents & FD_READ) {
11121 	if (tcp_inet_input(desc, event) < 0) {
11122 	    goto error;
11123 	}
11124 	if (netEv.lNetworkEvents & FD_CLOSE) {
11125 	    /*
11126 	     * We must loop to read out the remaining packets (if any).
11127 	     */
11128 	    for (;;) {
11129 		DEBUGF(("Retrying read due to closed port\r\n"));
11130 		/* XXX The buffer will be thrown away on error (empty que).
11131 		   Possible SMP FIXME. */
11132 		if (!desc->inet.active && (desc->inet.opt) == NULL) {
11133 		    goto error;
11134 		}
11135 		if (tcp_inet_input(desc, event) < 0) {
11136 		    goto error;
11137 		}
11138 	    }
11139 	}
11140     }
11141     if (netEv.lNetworkEvents & FD_WRITE) {
11142 	desc->inet.send_would_block = 0;
11143 	if (tcp_inet_output(desc, event) < 0)
11144 	    goto error;
11145     }
11146     if (netEv.lNetworkEvents & FD_CONNECT) {
11147 	if ((err = netEv.iErrorCode[FD_CONNECT_BIT]) != 0) {
11148 	    async_error(INETP(desc), err);
11149 	} else {
11150 	    tcp_inet_output(desc, event);
11151 	}
11152     } else if (netEv.lNetworkEvents & FD_ACCEPT) {
11153 	if ((err = netEv.iErrorCode[FD_ACCEPT_BIT]) != 0)
11154 	    async_error(INETP(desc), err);
11155 	else
11156 	    tcp_inet_input(desc, event);
11157     }
11158     if (netEv.lNetworkEvents & FD_CLOSE) {
11159 	/* error in err = netEv.iErrorCode[FD_CLOSE_BIT] */
11160 	DEBUGF(("Detected close in %s, line %d\r\n", __FILE__, __LINE__));
11161 	if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) {
11162 	    err = netEv.iErrorCode[FD_CLOSE_BIT];
11163 	    if (err == ECONNRESET)
11164 		tcp_recv_error(desc, err);
11165 	    else if (err == ECONNABORTED && IS_CONNECTED(INETP(desc))) {
11166 		/* translate this error to ECONNRESET */
11167 		tcp_recv_error(desc, ECONNRESET);
11168 	    }
11169 	    else
11170 		tcp_recv_closed(desc);
11171 	}
11172 	else
11173 	    tcp_recv_closed(desc);
11174     }
11175     DEBUGF(("tcp_inet_event(%ld) }\r\n", (long)desc->inet.port));
11176     return;
11177 
11178  error:
11179     DEBUGF(("tcp_inet_event(%ld) error}\r\n", (long)desc->inet.port));
11180     return;
11181 }
11182 
11183 #endif /* __WIN32__ */
11184 
11185 
11186 /* socket has input:
11187 ** 1. INET_STATE_ACCEPTING  => non block accept ?
11188 ** 2. INET_STATE_CONNECTED => read input
11189 */
tcp_inet_input(tcp_descriptor * desc,HANDLE event)11190 static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
11191 {
11192     int ret = 0;
11193 #ifdef DEBUG
11194     long port = (long) desc->inet.port;  /* Used after driver_exit() */
11195 #endif
11196     ASSERT(!INET_IGNORED(INETP(desc)));
11197     DEBUGF(("tcp_inet_input(%ld) {s=%d\r\n", port, desc->inet.s));
11198     /* XXX fprintf(stderr,"tcp_inet_input(%ld) {s=%d}\r\n",(long) desc->inet.port, desc->inet.s); */
11199     if (desc->inet.state == INET_STATE_ACCEPTING) {
11200 	SOCKET s;
11201 	unsigned int len;
11202 	inet_address remote;
11203 	inet_async_op *this_op = desc->inet.opt;
11204 
11205 	len = sizeof(desc->inet.remote);
11206 	sys_memzero((char *) &remote, len);
11207 	s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len);
11208 	if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) {
11209 	    /* Just try again, no real error, just a ghost trigger from poll,
11210 	       keep the default return code and everything else as is */
11211 	    goto done;
11212 	}
11213 
11214 	sock_select(INETP(desc),FD_ACCEPT,0);
11215 	desc->inet.state = INET_STATE_LISTENING; /* restore state */
11216 
11217 	if (this_op != NULL) {
11218 	    driver_demonitor_process(desc->inet.port, &(this_op->monitor));
11219 	}
11220 
11221 
11222 	driver_cancel_timer(desc->inet.port); /* posssibly cancel a timer */
11223 
11224 	if (s == INVALID_SOCKET) {
11225 	    ret = async_error(INETP(desc), sock_errno());
11226 	    goto done;
11227 	}
11228 	else {
11229 	    ErlDrvTermData caller;
11230 	    tcp_descriptor* accept_desc;
11231 	    int err;
11232 
11233 	    if (desc->inet.opt == NULL) {
11234 		/* No caller setup */
11235 		sock_close(s);
11236 		ret = async_error(INETP(desc), EINVAL);
11237 		goto done;
11238 	    }
11239 	    caller = desc->inet.opt->caller;
11240 	    if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) {
11241 		sock_close(s);
11242 		ret = async_error(INETP(desc), err);
11243 		goto done;
11244 	    }
11245 	    /* FIXME: may MUST lock port
11246 	     * 1 - Port is accessible via the erlang:ports()
11247 	     * 2 - Port is accessible via callers process_info(links)
11248 	     */
11249 	    accept_desc->inet.remote = remote;
11250 	    SET_NONBLOCKING(accept_desc->inet.s);
11251 #ifdef __WIN32__
11252 	    driver_select(accept_desc->inet.port, accept_desc->inet.event,
11253 			  ERL_DRV_READ, 1);
11254 #endif
11255 	    accept_desc->inet.state = INET_STATE_CONNECTED;
11256 	    ret =  async_ok_port(INETP(desc), accept_desc->inet.dport);
11257 	    goto done;
11258 	}
11259     } else if (desc->inet.state == INET_STATE_MULTI_ACCEPTING) {
11260 	SOCKET s;
11261 	unsigned int len;
11262 	inet_address remote;
11263 	int id,req;
11264 	ErlDrvTermData caller;
11265 	MultiTimerData *timeout;
11266 	ErlDrvMonitor monitor;
11267 #ifdef HARDDEBUG
11268 	int times = 0;
11269 #endif
11270 
11271 	while (desc->inet.state == INET_STATE_MULTI_ACCEPTING) {
11272 	    len = sizeof(desc->inet.remote);
11273 	    sys_memzero((char *) &remote, len);
11274 	    s = sock_accept(desc->inet.s, (struct sockaddr*) &remote, &len);
11275 	    if (s == INVALID_SOCKET && sock_errno() == ERRNO_BLOCK) {
11276 		/* Just try again, no real error, keep the last return code */
11277 		goto done;
11278 	    }
11279 #ifdef HARDDEBUG
11280 	    if (++times > 1) {
11281 		erts_fprintf(stderr,"Accepts in one suite: %d :-)\r\n",times);
11282 	    }
11283 #endif
11284 	    if (deq_multi_op(desc,&id,&req,&caller,&timeout,&monitor) != 0) {
11285 		ret = -1;
11286 		goto done;
11287 	    }
11288 
11289 	    if (desc->multi_first == NULL) {
11290 		sock_select(INETP(desc),FD_ACCEPT,0);
11291 		desc->inet.state = INET_STATE_LISTENING; /* restore state */
11292 	    }
11293 
11294 	    if (timeout != NULL) {
11295 		remove_multi_timer(desc, desc->inet.port, timeout);
11296 	    }
11297 
11298 	    driver_demonitor_process(desc->inet.port, &monitor);
11299 
11300 
11301 	    if (s == INVALID_SOCKET) { /* Not ERRNO_BLOCK, that's handled right away */
11302 		ret = send_async_error(desc->inet.dport,
11303 				       id, caller, error_atom(sock_errno()));
11304 		goto done;
11305 	    }
11306 	    else {
11307 		tcp_descriptor* accept_desc;
11308 		int err;
11309 
11310 		if ((accept_desc = tcp_inet_copy(desc,s,caller,&err)) == NULL) {
11311 		    sock_close(s);
11312 		    ret = send_async_error(desc->inet.dport,
11313 					   id, caller, error_atom(err));
11314 		    goto done;
11315 		}
11316 		accept_desc->inet.remote = remote;
11317 		SET_NONBLOCKING(accept_desc->inet.s);
11318 #ifdef __WIN32__
11319 		driver_select(accept_desc->inet.port, accept_desc->inet.event,
11320 			      ERL_DRV_READ, 1);
11321 #endif
11322 		accept_desc->inet.state = INET_STATE_CONNECTED;
11323 		ret =  send_async_ok_port(desc->inet.dport,
11324 					  id, caller, accept_desc->inet.dport);
11325 	    }
11326 	}
11327     }
11328     else if (IS_CONNECTED(INETP(desc))) {
11329 	ret = tcp_recv(desc, 0);
11330 	goto done;
11331     }
11332     else {
11333 	/* maybe a close op from connection attempt?? */
11334 	sock_select(INETP(desc),FD_ACCEPT,0);
11335 	DEBUGF(("tcp_inet_input(%ld): s=%d bad state: %04x\r\n",
11336 		port, desc->inet.s, desc->inet.state));
11337     }
11338  done:
11339     DEBUGF(("tcp_inet_input(%ld) }\r\n", port));
11340     return ret;
11341 }
11342 
tcp_send_or_shutdown_error(tcp_descriptor * desc,int err)11343 static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
11344 {
11345     int show_econnreset = (err == ECONNRESET
11346 			   && desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET);
11347 
11348     /*
11349      * If the port is busy, we must do some clean-up before proceeding.
11350      */
11351     if (IS_BUSY(INETP(desc))) {
11352 	desc->inet.caller = desc->inet.busy_caller;
11353 	if (desc->busy_on_send) {
11354             cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
11355 	    desc->busy_on_send = 0;
11356 	}
11357 	desc->inet.state &= ~INET_F_BUSY;
11358 	set_busy_port(desc->inet.port, 0);
11359     }
11360 
11361     /*
11362      * We used to handle "expected errors" differently from unexpected ones.
11363      * Now we handle all errors in the same way (unless the show_econnreset
11364      * socket option is enabled). We just have to distinguish between passive
11365      * and active sockets.
11366      */
11367     DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n",
11368 	    (long)desc->inet.port, __FILE__, __LINE__));
11369     if (desc->inet.active) {
11370         ErlDrvTermData err_atom;
11371 	if (show_econnreset) {
11372 	    tcp_error_message(desc, err);
11373             err_atom = error_atom(err);
11374 	} else {
11375             err_atom = am_closed;
11376 	}
11377         tcp_closed_message(desc);
11378         if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE))
11379             inet_reply_error_am(INETP(desc), err_atom);
11380 
11381 	if (desc->inet.exitf)
11382 	    driver_exit(desc->inet.port, 0);
11383 	else
11384 	    tcp_desc_close(desc);
11385     } else {
11386 	tcp_close_check(desc);
11387 
11388 	if (desc->inet.caller) {
11389             if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) {
11390                 if (show_econnreset)
11391                     inet_reply_error(INETP(desc), err);
11392                 else
11393                     inet_reply_error_am(INETP(desc), am_closed);
11394             }
11395 	}
11396 	else {
11397 	    /* No blocking send op to reply to right now.
11398 	     * If next op is a send, make sure it returns {error,closed}
11399 	     * rather than {error,enotconn}.
11400 	     */
11401 	    desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
11402 	}
11403         tcp_desc_close(desc);
11404 
11405 	/*
11406 	 * Make sure that the next receive operation gets an {error,closed}
11407 	 * result rather than {error,enotconn}. That means that the caller
11408 	 * can safely ignore errors in the send operations and handle them
11409 	 * in the receive operation.
11410 	 */
11411 	desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_RECV;
11412 
11413 	if (show_econnreset) {
11414 	    /* Return {error, econnreset} instead of {error, closed}
11415 	     * on send or receive operations.
11416 	     */
11417 	    desc->tcp_add_flags |= TCP_ADDF_DELAYED_ECONNRESET;
11418 	}
11419     }
11420     return -1;
11421 }
11422 
tcp_send_error(tcp_descriptor * desc,int err)11423 static int tcp_send_error(tcp_descriptor* desc, int err)
11424 {
11425     /* EPIPE errors usually occur in one of three ways:
11426      * 1. We write to a socket when we've already shutdown() the write side. On
11427      *    Windows the error returned for this is ESHUTDOWN rather than EPIPE.
11428      * 2. The TCP peer sends us an RST through no fault of our own (perhaps
11429      *    by aborting the connection using SO_LINGER) and we then attempt
11430      *    to write to the socket. On Linux and Windows we would actually
11431      *    receive an ECONNRESET error for this, but on the BSDs, Darwin,
11432      *    Illumos and presumably Solaris, it's an EPIPE.
11433      * 3. We cause the TCP peer to send us an RST by writing to a socket
11434      *    after we receive a FIN from them. Our first write will be
11435      *    successful, but if the they have closed the connection (rather
11436      *    than just shutting down the write side of it) this will cause their
11437      *    OS to send us an RST. Then, when we attempt to write to the socket
11438      *    a second time, we will get an EPIPE error. On Windows we get an
11439      *    ECONNABORTED.
11440      *
11441      * What we are going to do here is to treat all EPIPE messages that aren't
11442      * of type 1 as ECONNRESET errors. This will allow users who have the
11443      * show_econnreset socket option enabled to receive {error, econnreset} on
11444      * both send and recv operations to indicate that an RST has been received.
11445      */
11446 #ifdef __WIN_32__
11447     if (err == ECONNABORTED)
11448 	err = ECONNRESET;
11449 #endif
11450     if (err == EPIPE && !(desc->tcp_add_flags & TCP_ADDF_SHUTDOWN_WR_DONE))
11451 	err = ECONNRESET;
11452     return tcp_send_or_shutdown_error(desc, err);
11453 }
11454 
tcp_shutdown_error(tcp_descriptor * desc,int err)11455 static int tcp_shutdown_error(tcp_descriptor* desc, int err)
11456 {
11457     return tcp_send_or_shutdown_error(desc, err);
11458 }
11459 
tcp_inet_delay_send(ErlDrvData data,ErlDrvTermData dummy)11460 static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy)
11461 {
11462     tcp_descriptor *desc = (tcp_descriptor*)data;
11463     (void)tcp_inet_output(desc, (HANDLE) INETP(desc)->s);
11464 }
11465 
11466 /*
11467 ** Send non-blocking vector data
11468 */
tcp_sendv(tcp_descriptor * desc,ErlIOVec * ev)11469 static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
11470 {
11471     ErlDrvSizeT sz;
11472     char buf[4];
11473     ErlDrvSizeT h_len;
11474     ssize_t n;
11475     ErlDrvPort ix = desc->inet.port;
11476     ErlDrvSizeT len = ev->size;
11477 
11478      switch(desc->inet.htype) {
11479      case TCP_PB_1:
11480          put_int8(len, buf);
11481          h_len = 1;
11482          break;
11483      case TCP_PB_2:
11484          put_int16(len, buf);
11485          h_len = 2;
11486          break;
11487      case TCP_PB_4:
11488          put_int32(len, buf);
11489          h_len = 4;
11490          break;
11491      default:
11492          if (len == 0)
11493              return 0;
11494          h_len = 0;
11495          break;
11496      }
11497 
11498     inet_output_count(INETP(desc), len+h_len);
11499 
11500     if (h_len > 0) {
11501 	ev->iov[0].iov_base = buf;
11502 	ev->iov[0].iov_len = h_len;
11503 	ev->size += h_len;
11504     }
11505 
11506     sz = driver_sizeq(ix);
11507 
11508     if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) {
11509 	driver_enqv(ix, ev, 0);
11510 	if (sz+ev->size >= desc->high) {
11511 	    DEBUGF(("tcp_sendv(%ld): s=%d, sender forced busy\r\n",
11512 		    (long)desc->inet.port, desc->inet.s));
11513 	    desc->inet.state |= INET_F_BUSY;  /* mark for low-watermark */
11514 	    desc->inet.busy_caller = desc->inet.caller;
11515 	    set_busy_port(desc->inet.port, 1);
11516 	    if (desc->send_timeout != INET_INFINITY) {
11517 		desc->busy_on_send = 1;
11518                 add_multi_timer(desc, INETP(desc)->port,
11519                                 0 /* arg */, desc->send_timeout /* timeout */,
11520                                 &tcp_inet_send_timeout);
11521 	    }
11522 	    return 1;
11523 	}
11524     }
11525     else {
11526 	int vsize = (ev->vsize > MAX_VSIZE) ? MAX_VSIZE : ev->vsize;
11527 
11528 	DEBUGF(("tcp_sendv(%ld): s=%d, about to send "LLU","LLU" bytes\r\n",
11529 		(long)desc->inet.port, desc->inet.s, (llu_t)h_len, (llu_t)len));
11530 
11531 	if (INET_IGNORED(INETP(desc))) {
11532 	    INETP(desc)->flags |= INET_IGNORE_WRITE;
11533 	    n = 0;
11534 	} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
11535             driver_enqv(ix, ev, 0);
11536             add_multi_timer(desc, INETP(desc)->port, 0,
11537                             0, &tcp_inet_delay_send);
11538 	    return 0;
11539 	} else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov,
11540 					      vsize, &n, 0))) {
11541 	    if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
11542 		int err = sock_errno();
11543 		DEBUGF(("tcp_sendv(%ld): s=%d, "
11544 			"sock_sendv(size=2) errno = %d\r\n",
11545 			(long)desc->inet.port, desc->inet.s, err));
11546 		return tcp_send_error(desc, err);
11547 	    }
11548 #ifdef __WIN32__
11549 	    desc->inet.send_would_block = 1;
11550 #endif
11551 	    n = 0;
11552 	}
11553 	else if (n == ev->size) {
11554 	    ASSERT(NO_SUBSCRIBERS(&INETP(desc)->empty_out_q_subs));
11555 	    return 0;
11556 	}
11557 	else {
11558 	    DEBUGF(("tcp_sendv(%ld): s=%d, only sent "
11559 		    LLU"/%d of "LLU"/%d bytes/items\r\n",
11560 		    (long)desc->inet.port, desc->inet.s,
11561 		    (llu_t)n, vsize, (llu_t)ev->size, ev->vsize));
11562 	}
11563 
11564 	DEBUGF(("tcp_sendv(%ld): s=%d, Send failed, queuing\r\n",
11565 		(long)desc->inet.port, desc->inet.s));
11566 	driver_enqv(ix, ev, n);
11567 	if (!INET_IGNORED(INETP(desc)))
11568 	    sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1);
11569     }
11570     return 0;
11571 }
11572 
11573 /*
11574 ** Send non blocking data
11575 */
tcp_send(tcp_descriptor * desc,char * ptr,ErlDrvSizeT len)11576 static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
11577 {
11578     int sz;
11579     char buf[4];
11580     int h_len;
11581     int n;
11582     ErlDrvPort ix = desc->inet.port;
11583     SysIOVec iov[2];
11584 
11585     switch(desc->inet.htype) {
11586     case TCP_PB_1:
11587 	put_int8(len, buf);
11588 	h_len = 1;
11589 	break;
11590     case TCP_PB_2:
11591 	put_int16(len, buf);
11592 	h_len = 2;
11593 	break;
11594     case TCP_PB_4:
11595 	put_int32(len, buf);
11596 	h_len = 4;
11597 	break;
11598     default:
11599 	if (len == 0)
11600 	    return 0;
11601 	h_len = 0;
11602 	break;
11603     }
11604 
11605     inet_output_count(INETP(desc), len+h_len);
11606 
11607     sz = driver_sizeq(ix);
11608 
11609     if ((desc->tcp_add_flags & TCP_ADDF_SENDFILE) || sz > 0) {
11610 	if (h_len > 0)
11611 	    driver_enq(ix, buf, h_len);
11612 	driver_enq(ix, ptr, len);
11613 	if (sz+h_len+len >= desc->high) {
11614 	    DEBUGF(("tcp_send(%ld): s=%d, sender forced busy\r\n",
11615 		    (long)desc->inet.port, desc->inet.s));
11616 	    desc->inet.state |= INET_F_BUSY;  /* mark for low-watermark */
11617 	    desc->inet.busy_caller = desc->inet.caller;
11618 	    set_busy_port(desc->inet.port, 1);
11619 	    if (desc->send_timeout != INET_INFINITY) {
11620 		desc->busy_on_send = 1;
11621                 add_multi_timer(desc, INETP(desc)->port,
11622                                 0 /* arg */, desc->send_timeout /* timeout */,
11623                                 &tcp_inet_send_timeout);
11624 	    }
11625 	    return 1;
11626 	}
11627     }
11628     else {
11629 	iov[0].iov_base = buf;
11630 	iov[0].iov_len = h_len;
11631 	iov[1].iov_base = ptr;
11632 	iov[1].iov_len = len;
11633 
11634 	DEBUGF(("tcp_send(%ld): s=%d, about to send "LLU","LLU" bytes\r\n",
11635 		(long)desc->inet.port, desc->inet.s, (llu_t)h_len, (llu_t)len));
11636 	if (INET_IGNORED(INETP(desc))) {
11637 	    INETP(desc)->flags |= INET_IGNORE_WRITE;
11638 	    n = 0;
11639 	} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
11640 	    sock_send(desc->inet.s, buf, 0, 0);
11641 	    n = 0;
11642 	} else 	if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s,iov,2,&n,0))) {
11643 	    if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
11644 		int err = sock_errno();
11645 		DEBUGF(("tcp_send(%ld): s=%d,sock_sendv(size=2) errno = %d\r\n",
11646 			(long)desc->inet.port, desc->inet.s, err));
11647 		return tcp_send_error(desc, err);
11648 	    }
11649 #ifdef __WIN32__
11650 	    desc->inet.send_would_block = 1;
11651 #endif
11652 	    n = 0;
11653 	}
11654 	else if (n == len+h_len) {
11655 	    ASSERT(NO_SUBSCRIBERS(&INETP(desc)->empty_out_q_subs));
11656 	    return 0;
11657 	}
11658 
11659 	DEBUGF(("tcp_send(%ld): s=%d, Send failed, queuing",
11660 		(long)desc->inet.port, desc->inet.s));
11661 
11662 	if (n < h_len) {
11663 	    driver_enq(ix, buf+n, h_len-n);
11664 	    driver_enq(ix, ptr, len);
11665 	}
11666 	else {
11667 	    n -= h_len;
11668 	    driver_enq(ix, ptr+n, len-n);
11669 	}
11670 	if (!INET_IGNORED(INETP(desc)))
11671 	    sock_select(INETP(desc),(FD_WRITE|FD_CLOSE), 1);
11672     }
11673     return 0;
11674 }
11675 
11676 /* shutdown on the socket:
11677 ** Assume caller has confirmed TCP_ADDF_PENDING_SHUTDOWN is set.
11678 */
tcp_shutdown_async(tcp_descriptor * desc)11679 static void tcp_shutdown_async(tcp_descriptor* desc)
11680 {
11681     int how;
11682 
11683     how = (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUT_WR) ?
11684 		TCP_SHUT_WR : TCP_SHUT_RDWR;
11685     if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how)))
11686 	tcp_shutdown_error(desc, sock_errno());
11687     else
11688 	desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE;
11689 }
11690 
tcp_inet_drv_output(ErlDrvData data,ErlDrvEvent event)11691 static void tcp_inet_drv_output(ErlDrvData data, ErlDrvEvent event)
11692 {
11693     (void)tcp_inet_output((tcp_descriptor*)data, (HANDLE)event);
11694 }
11695 
tcp_inet_drv_input(ErlDrvData data,ErlDrvEvent event)11696 static void tcp_inet_drv_input(ErlDrvData data, ErlDrvEvent event)
11697 {
11698     (void)tcp_inet_input((tcp_descriptor*)data, (HANDLE)event);
11699 }
11700 
11701 #ifdef HAVE_SENDFILE
tcp_sendfile_completed(tcp_descriptor * desc)11702 static int tcp_sendfile_completed(tcp_descriptor* desc) {
11703     ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 +
11704         LOAD_ATOM_CNT * 2 + LOAD_UINT_CNT * 2];
11705     Uint32 sent_low, sent_high;
11706     int i;
11707 
11708     desc->tcp_add_flags &= ~TCP_ADDF_SENDFILE;
11709     close(desc->sendfile.dup_file_fd);
11710 
11711     DEBUGF(("tcp_sendfile_completed(%p): SENDFILE dup closed %d\r\n",
11712             desc->inet.port, desc->sendfile.dup_file_fd));
11713 
11714     /* While we flushed the output queue prior to sending the file, we've
11715      * deferred clearing busy status until now as there's no point in doing so
11716      * while we still have a file to send.
11717      *
11718      * The watermark is checked since more data may have been added while we
11719      * were sending the file. */
11720 
11721     if (driver_sizeq(desc->inet.port) <= desc->low) {
11722         if (IS_BUSY(INETP(desc))) {
11723             desc->inet.caller = desc->inet.busy_caller;
11724             desc->inet.state &= ~INET_F_BUSY;
11725 
11726             set_busy_port(desc->inet.port, 0);
11727 
11728             /* if we have a timer then cancel and send ok to client */
11729             if (desc->busy_on_send) {
11730                 cancel_multi_timer(desc, INETP(desc)->port,
11731                                    &tcp_inet_send_timeout);
11732                 desc->busy_on_send = 0;
11733             }
11734 
11735             inet_reply_ok(INETP(desc));
11736         }
11737     }
11738 
11739     if (driver_sizeq(desc->inet.port) == 0) {
11740         sock_select(INETP(desc), FD_WRITE, 0);
11741         send_empty_out_q_msgs(INETP(desc));
11742 
11743         if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) {
11744             tcp_shutdown_async(desc);
11745         }
11746     }
11747 
11748     sent_low = ((Uint64)desc->sendfile.bytes_sent >> 0) & 0xFFFFFFFF;
11749     sent_high = ((Uint64)desc->sendfile.bytes_sent >> 32) & 0xFFFFFFFF;
11750 
11751     i = LOAD_ATOM(spec, 0, am_sendfile);
11752     i = LOAD_PORT(spec, i, desc->inet.dport);
11753     i = LOAD_ATOM(spec, i, am_ok);
11754     i = LOAD_UINT(spec, i, sent_low);
11755     i = LOAD_UINT(spec, i, sent_high);
11756     i = LOAD_TUPLE(spec, i, 3);
11757     i = LOAD_TUPLE(spec, i, 3);
11758 
11759     ASSERT(i == sizeof(spec)/sizeof(*spec));
11760 
11761     return erl_drv_output_term(desc->inet.dport, spec, i);
11762 }
11763 
tcp_sendfile_aborted(tcp_descriptor * desc,int socket_error)11764 static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error) {
11765     ErlDrvTermData spec[LOAD_PORT_CNT + LOAD_TUPLE_CNT * 2 + LOAD_ATOM_CNT * 3];
11766     int i;
11767 
11768     /* We don't clean up sendfile state here, as that's done in tcp_desc_close
11769      * following normal error handling. All we do here is report the failure. */
11770 
11771     i = LOAD_ATOM(spec, 0, am_sendfile);
11772     i = LOAD_PORT(spec, i, desc->inet.dport);
11773     i = LOAD_ATOM(spec, i, am_error);
11774 
11775     switch (socket_error) {
11776     case ECONNRESET:
11777     case ENOTCONN:
11778     case EPIPE:
11779         i = LOAD_ATOM(spec, i, am_closed);
11780         break;
11781     default:
11782         i = LOAD_ATOM(spec, i, error_atom(socket_error));
11783     }
11784 
11785     i = LOAD_TUPLE(spec, i, 2);
11786     i = LOAD_TUPLE(spec, i, 3);
11787 
11788     ASSERT(i == sizeof(spec)/sizeof(*spec));
11789 
11790     return erl_drv_output_term(desc->inet.dport, spec, i);
11791 }
11792 
tcp_inet_sendfile(tcp_descriptor * desc)11793 static int tcp_inet_sendfile(tcp_descriptor* desc) {
11794     ErlDrvPort ix = desc->inet.port;
11795     int result = 0;
11796     ssize_t n;
11797 
11798     DEBUGF(("tcp_inet_sendfile(%ld) {s=%d\r\n", (long)ix, desc->inet.s));
11799 
11800     /* If there was any data in the queue by the time sendfile was issued,
11801      * we'll need to skip it first. Note that we don't clear busy status until
11802      * we're finished sending the file. */
11803     while (desc->sendfile.ioq_skip > 0) {
11804         ssize_t bytes_to_send;
11805         SysIOVec* iov;
11806         int vsize;
11807 
11808         ASSERT(driver_sizeq(ix) >= desc->sendfile.ioq_skip);
11809 
11810         if ((iov = driver_peekq(ix, &vsize)) == NULL) {
11811             ERTS_INTERNAL_ERROR("ioq empty when sendfile.ioq_skip > 0");
11812         }
11813 
11814         bytes_to_send = MIN(desc->sendfile.ioq_skip, iov[0].iov_len);
11815         n = sock_send(desc->inet.s, iov[0].iov_base, bytes_to_send, 0);
11816 
11817         if (!IS_SOCKET_ERROR(n)) {
11818             desc->sendfile.ioq_skip -= n;
11819             driver_deq(ix, n);
11820         } else if (sock_errno() == ERRNO_BLOCK) {
11821 #ifdef __WIN32__
11822             desc->inet.send_would_block = 1;
11823 #endif
11824             goto done;
11825         } else if (sock_errno() != EINTR) {
11826             goto socket_error;
11827         }
11828     }
11829 
11830     while (desc->sendfile.length > 0) {
11831         /* For some reason the maximum ssize_t cannot be used as the max size.
11832          * 1GB seems to work on all platforms */
11833         const Sint64 SENDFILE_CHUNK_SIZE = ((1UL << 30) - 1);
11834 
11835         ssize_t bytes_to_send = MIN(SENDFILE_CHUNK_SIZE, desc->sendfile.length);
11836         off_t offset = desc->sendfile.offset;
11837 
11838 #if defined(__linux__)
11839         n = sendfile(desc->inet.s, desc->sendfile.dup_file_fd, &offset,
11840             bytes_to_send);
11841 #elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__DARWIN__)
11842         {
11843             off_t bytes_sent;
11844             int error;
11845 
11846     #if defined(__DARWIN__)
11847             bytes_sent = bytes_to_send;
11848 
11849             error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset,
11850                 &bytes_sent, NULL, 0);
11851             n = bytes_sent;
11852     #else
11853             error = sendfile(desc->sendfile.dup_file_fd, desc->inet.s, offset,
11854                 bytes_to_send, NULL, &bytes_sent, 0);
11855             n = bytes_sent;
11856     #endif
11857 
11858             if(error < 0) {
11859                 /* EAGAIN/EINTR report partial success by setting bytes_sent,
11860                  * so we have to skip error handling if nonzero, and skip EOF
11861                  * handling if zero, as it's possible that we didn't manage to
11862                  * send anything at all before being interrupted by a
11863                  * signal. */
11864                 if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) {
11865                     n = -1;
11866                 }
11867             }
11868         }
11869 #elif defined(__sun) && defined(__SVR4) && defined(HAVE_SENDFILEV)
11870         {
11871             sendfilevec_t sfvec[1];
11872             size_t bytes_sent;
11873             ssize_t error;
11874 
11875             sfvec[0].sfv_fd = desc->sendfile.dup_file_fd;
11876             sfvec[0].sfv_len = bytes_to_send;
11877             sfvec[0].sfv_off = offset;
11878             sfvec[0].sfv_flag = 0;
11879 
11880             error = sendfilev(desc->inet.s, sfvec, 1, &bytes_sent);
11881             n = bytes_sent;
11882 
11883             if(error < 0) {
11884                 if(errno == EINVAL) {
11885                     /* On some solaris versions (I've seen it on SunOS 5.10),
11886                      * using a sfv_len larger than the filesize will result in
11887                      * a (-1 && errno == EINVAL). We translate this to a
11888                      * successful send of the data.*/
11889                 } else {
11890                     /* EAGAIN/EINTR behavior is identical to *BSD. */
11891                     if((errno != EAGAIN && errno != EINTR) || bytes_sent == 0) {
11892                         n = -1;
11893                     }
11894                 }
11895             }
11896         }
11897 #else
11898         #error "Unsupported sendfile syscall; update configure test."
11899 #endif
11900 
11901         if (n > 0) {
11902             desc->sendfile.bytes_sent += n;
11903             desc->sendfile.offset += n;
11904             desc->sendfile.length -= n;
11905         } else if (n == 0) {
11906             /* EOF. */
11907             desc->sendfile.length = 0;
11908             break;
11909         } else if (IS_SOCKET_ERROR(n) && sock_errno() != EINTR) {
11910             if (sock_errno() != ERRNO_BLOCK) {
11911                 goto socket_error;
11912             }
11913 
11914 #ifdef __WIN32__
11915             desc->inet.send_would_block = 1;
11916 #endif
11917             break;
11918         }
11919     }
11920 
11921     if (desc->sendfile.length == 0) {
11922         tcp_sendfile_completed(desc);
11923     }
11924 
11925     goto done;
11926 
11927 socket_error: {
11928         int socket_errno = sock_errno();
11929 
11930         DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n",
11931             (long)desc->inet.port, socket_errno, errno));
11932 
11933         tcp_sendfile_aborted(desc, socket_errno);
11934         result = tcp_send_error(desc, socket_errno);
11935 
11936         goto done;
11937     }
11938 
11939 done:
11940     DEBUGF(("tcp_inet_sendfile(%ld) }\r\n", (long)desc->inet.port));
11941     return result;
11942 }
11943 #endif /* HAVE_SENDFILE */
11944 
11945 /* socket ready for ouput:
11946 ** 1. INET_STATE_CONNECTING => non block connect ?
11947 ** 2. INET_STATE_CONNECTED  => write output
11948 */
tcp_inet_output(tcp_descriptor * desc,HANDLE event)11949 static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
11950 {
11951     int ret = 0;
11952     ErlDrvPort ix = desc->inet.port;
11953 
11954     ASSERT(!INET_IGNORED(INETP(desc)));
11955     DEBUGF(("tcp_inet_output(%ld) {s=%d\r\n",
11956 	    (long)desc->inet.port, desc->inet.s));
11957     if (desc->inet.state == INET_STATE_CONNECTING) {
11958 	sock_select(INETP(desc),FD_CONNECT,0);
11959 
11960 	driver_cancel_timer(ix);  /* posssibly cancel a timer */
11961 #ifndef __WIN32__
11962 	/*
11963 	 * XXX This is strange.  This *should* work on Windows NT too,
11964 	 * but doesn't.  An bug in Winsock 2.0 for Windows NT?
11965 	 *
11966 	 * See "Unix Netwok Programming", W.R.Stevens, p 412 for a
11967 	 * discussion about Unix portability and non blocking connect.
11968 	 */
11969 
11970 #ifndef SO_ERROR
11971 	{
11972 	    int sz, code;
11973             sz = sizeof(desc->inet.remote);
11974             sys_memzero((char *) &desc->inet.remote, sz);
11975 	    code = sock_peer(desc->inet.s,
11976                              (struct sockaddr*) &desc->inet.remote, &sz);
11977 	    if (IS_SOCKET_ERROR(code)) {
11978 		desc->inet.state = INET_STATE_OPEN;  /* restore state */
11979 		ret =  async_error(INETP(desc), sock_errno());
11980 		goto done;
11981 	    }
11982 	}
11983 #else
11984 	{
11985 	    int error = 0;	/* Has to be initiated, we check it */
11986 	    unsigned int sz = sizeof(error); /* even if we get -1 */
11987 	    int code = sock_getopt(desc->inet.s, SOL_SOCKET, SO_ERROR,
11988 				   (void *)&error, &sz);
11989 
11990 	    if ((code < 0) || error) {
11991 		desc->inet.state = INET_STATE_OPEN;  /* restore state */
11992 		ret = async_error(INETP(desc), error);
11993 		goto done;
11994 	    }
11995 	}
11996 #endif /* SO_ERROR */
11997 #endif /* !__WIN32__ */
11998 
11999 	desc->inet.state = INET_STATE_CONNECTED;
12000 	if (desc->inet.active)
12001 	    sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
12002 	async_ok(INETP(desc));
12003     }
12004     else if (IS_CONNECTED(INETP(desc))) {
12005 
12006 #ifdef HAVE_SENDFILE
12007         if(desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
12008             return tcp_inet_sendfile(desc);
12009         }
12010 #endif
12011 
12012         for (;;) {
12013 	    int vsize;
12014 	    ssize_t n;
12015 	    SysIOVec* iov;
12016 
12017 	    if ((iov = driver_peekq(ix, &vsize)) == NULL) {
12018 		sock_select(INETP(desc), FD_WRITE, 0);
12019 		send_empty_out_q_msgs(INETP(desc));
12020 		if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN)
12021 		    tcp_shutdown_async(desc);
12022 		goto done;
12023 	    }
12024 	    vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize;
12025 	    DEBUGF(("tcp_inet_output(%ld): s=%d, About to send %d items\r\n",
12026 		    (long)desc->inet.port, desc->inet.s, vsize));
12027 	    if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, iov, vsize, &n, 0))) {
12028 	    write_error:
12029 		if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
12030 		    DEBUGF(("tcp_inet_output(%ld): sock_sendv(%d) errno = %d (errno %d)\r\n",
12031 			    (long)desc->inet.port, vsize, sock_errno(), errno));
12032 		    ret =  tcp_send_error(desc, sock_errno());
12033 		    goto done;
12034 		}
12035 #ifdef __WIN32__
12036 		desc->inet.send_would_block = 1;
12037 #endif
12038                 /* If DELAY_SEND is set ready_output may have
12039                    been called without doing select so we do
12040                    a select in order to get into the correct
12041                    state */
12042                 if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND)
12043                     sock_select(INETP(desc), FD_WRITE, 1);
12044 		goto done;
12045 	    } else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning
12046 				    0 when sending packets with
12047 				    sizes > (max 32 bit signed int) */
12048 	      size_t howmuch = 0x7FFFFFFF; /* max signed 32 bit */
12049 	      int x;
12050 	      for(x = 0; x < vsize && iov[x].iov_len == 0; ++x)
12051 		;
12052 	      if (x < vsize) {
12053 		if (howmuch > iov[x].iov_len) {
12054 		  howmuch = iov[x].iov_len;
12055 		}
12056 		n = sock_send(desc->inet.s, iov[x].iov_base,howmuch,0);
12057 		if (IS_SOCKET_ERROR(n)) {
12058 		  goto write_error;
12059 		}
12060 	      }
12061 	    }
12062 	    if (driver_deq(ix, n) <= desc->low) {
12063 		if (IS_BUSY(INETP(desc))) {
12064 		    desc->inet.caller = desc->inet.busy_caller;
12065 		    desc->inet.state &= ~INET_F_BUSY;
12066 		    set_busy_port(desc->inet.port, 0);
12067 		    /* if we have a timer then cancel and send ok to client */
12068 		    if (desc->busy_on_send) {
12069                         cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
12070 			desc->busy_on_send = 0;
12071 		    }
12072 		    inet_reply_ok(INETP(desc));
12073 		}
12074 	    }
12075 	}
12076     }
12077     else {
12078 	sock_select(INETP(desc),FD_CONNECT,0);
12079 	DEBUGF(("tcp_inet_output(%ld): bad state: %04x\r\n",
12080 		(long)desc->inet.port, desc->inet.state));
12081     }
12082  done:
12083     DEBUGF(("tcp_inet_output(%ld) }\r\n", (long)desc->inet.port));
12084     return ret;
12085 }
12086 
12087 /*-----------------------------------------------------------------------------
12088 
12089    UDP & SCTP (the latter in a 1<->M Mode)
12090 
12091 -----------------------------------------------------------------------------*/
12092 
12093 #if defined(HAVE_SO_BSDCOMPAT)
12094 #if defined(__linux__)
12095 #include <sys/utsname.h>
should_use_so_bsdcompat(void)12096 static int should_use_so_bsdcompat(void)
12097 {
12098     /* SMP: FIXME this is probably not SMP safe but may be ok anyway? */
12099     static int init_done;
12100     static int so_bsdcompat_is_obsolete;
12101 
12102     if (!init_done) {
12103 	struct utsname utsname;
12104 	unsigned int version, patchlevel;
12105 
12106 	init_done = 1;
12107 	if (uname(&utsname) < 0) {
12108 	    fprintf(stderr, "uname: %s\r\n", strerror(sock_errno()));
12109 	    return 1;
12110 	}
12111 	/* Format is <version>.<patchlevel>.<sublevel><extraversion>
12112 	   where the first three are unsigned integers and the last
12113 	   is an arbitrary string. We only care about the first two. */
12114 	if (sscanf(utsname.release, "%u.%u", &version, &patchlevel) != 2) {
12115 	    fprintf(stderr, "uname: unexpected release '%s'\r\n",
12116 		    utsname.release);
12117 	    return 1;
12118 	}
12119 	/* SO_BSDCOMPAT is deprecated and triggers warnings in 2.5
12120 	   kernels. It is a no-op in 2.4 but not in 2.2 kernels. */
12121 	if (version > 2 || (version == 2 && patchlevel >= 5))
12122 	    so_bsdcompat_is_obsolete = 1;
12123     }
12124     return !so_bsdcompat_is_obsolete;
12125 }
12126 #else	/* __linux__ */
12127 #define should_use_so_bsdcompat() 1
12128 #endif	/* __linux__ */
12129 #endif	/* HAVE_SO_BSDCOMPAT */
12130 
12131 
12132 
12133 #ifdef HAVE_SCTP
12134 /* Copy a descriptor, by creating a new port with same settings
12135  * as the descriptor desc.
12136  * return NULL on error (ENFILE no ports avail)
12137  */
12138 static ErlDrvData packet_inet_start(ErlDrvPort port, char* args, int protocol);
12139 
sctp_inet_copy(udp_descriptor * desc,SOCKET s,int * err)12140 static udp_descriptor* sctp_inet_copy(udp_descriptor* desc, SOCKET s, int* err)
12141 {
12142     ErlDrvSizeT q_low, q_high;
12143     ErlDrvPort port = desc->inet.port;
12144     udp_descriptor* copy_desc;
12145 
12146     copy_desc = (udp_descriptor*) packet_inet_start(port, NULL, IPPROTO_SCTP);
12147 
12148     /* Setup event if needed */
12149     if ((copy_desc->inet.s = s) != INVALID_SOCKET) {
12150 	if ((copy_desc->inet.event = sock_create_event(INETP(copy_desc))) ==
12151 	    INVALID_EVENT) {
12152 	    *err = sock_errno();
12153 	    FREE(copy_desc);
12154 	    return NULL;
12155 	}
12156     }
12157 
12158     /* Some flags must be inherited at this point */
12159     copy_desc->inet.mode     = desc->inet.mode;
12160     copy_desc->inet.exitf    = desc->inet.exitf;
12161     copy_desc->inet.deliver  = desc->inet.deliver;
12162     copy_desc->inet.htype    = desc->inet.htype;
12163     copy_desc->inet.psize    = desc->inet.psize;
12164     copy_desc->inet.stype    = desc->inet.stype;
12165     copy_desc->inet.sfamily  = desc->inet.sfamily;
12166     copy_desc->inet.hsz      = desc->inet.hsz;
12167     copy_desc->inet.bufsz    = desc->inet.bufsz;
12168 
12169     /* The new port will be linked and connected to the caller */
12170     port = driver_create_port(port, desc->inet.caller, "sctp_inet",
12171 			      (ErlDrvData) copy_desc);
12172     if ((long)port == -1) {
12173 	*err = ENFILE;
12174 	FREE(copy_desc);
12175 	return NULL;
12176     }
12177 
12178     /* Read busy msgq limits of parent */
12179     q_low = q_high = ERL_DRV_BUSY_MSGQ_READ_ONLY;
12180     erl_drv_busy_msgq_limits(desc->inet.port, &q_low, &q_high);
12181     /* Write same busy msgq limits to child */
12182     erl_drv_busy_msgq_limits(port, &q_low, &q_high);
12183 
12184     copy_desc->inet.port = port;
12185     copy_desc->inet.dport = driver_mk_port(port);
12186     *err = 0;
12187 
12188     return copy_desc;
12189 }
12190 #endif
12191 
12192 
12193 
12194 #ifdef HAVE_UDP
packet_inet_init()12195 static int packet_inet_init()
12196 {
12197     return 0;
12198 }
12199 
packet_inet_start(ErlDrvPort port,char * args,int protocol)12200 static ErlDrvData packet_inet_start(ErlDrvPort port, char* args, int protocol)
12201 {
12202     /* "inet_start" returns "ErlDrvData", but in fact it is "inet_descriptor*",
12203        so we can preserve it as "ErlDrvData":
12204     */
12205     ErlDrvData	    drvd = inet_start(port, sizeof(udp_descriptor),
12206 				      protocol);
12207     udp_descriptor* desc = (udp_descriptor*) drvd;
12208 
12209     if (desc == NULL)
12210 	return ERL_DRV_ERROR_ERRNO;
12211 
12212     desc->read_packets = INET_PACKET_POLL;
12213     desc->i_bufsz = 0;
12214     desc->i_buf = NULL;
12215     desc->i_ptr = NULL;
12216     return drvd;
12217 }
12218 
udp_inet_start(ErlDrvPort port,char * args)12219 static ErlDrvData udp_inet_start(ErlDrvPort port, char *args)
12220 {
12221     ErlDrvData data = packet_inet_start(port, args, IPPROTO_UDP);
12222     set_default_msgq_limits(port);
12223     return data;
12224 }
12225 #endif
12226 
12227 #ifdef HAVE_SCTP
sctp_inet_start(ErlDrvPort port,char * args)12228 static ErlDrvData sctp_inet_start(ErlDrvPort port, char *args)
12229 {
12230     ErlDrvData data = packet_inet_start(port, args, IPPROTO_SCTP);
12231     set_default_msgq_limits(port);
12232     return data;
12233 }
12234 #endif
12235 
12236 #ifdef HAVE_UDP
packet_inet_stop(ErlDrvData e)12237 static void packet_inet_stop(ErlDrvData e)
12238 {
12239     /* There should *never* be any "empty out q" subscribers on
12240        an UDP or SCTP socket!
12241        NB: as in "inet_start", we  can always cast "ErlDRvData"
12242        into "udp_descriptor*" or "inet_descriptor*":
12243     */
12244     udp_descriptor * udesc = (udp_descriptor*) e;
12245     inet_descriptor* descr = INETP(udesc);
12246     if (udesc->i_buf != NULL) {
12247 	release_buffer(udesc->i_buf);
12248 	udesc->i_buf = NULL;
12249     }
12250 
12251     ASSERT(NO_SUBSCRIBERS(&(descr->empty_out_q_subs)));
12252     inet_stop(descr);
12253 }
12254 
packet_error(udp_descriptor * udesc,int err)12255 static int packet_error(udp_descriptor* udesc, int err)
12256 {
12257     inet_descriptor * desc = INETP(udesc);
12258     if (!desc->active)
12259 	async_error(desc, err);
12260     driver_failure_posix(desc->port, err);
12261     return -1;
12262 }
12263 
12264 /*
12265 ** Various functions accessible via "port_control" on the Erlang side:
12266 */
packet_inet_ctl(ErlDrvData e,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rsize)12267 static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf,
12268 				    ErlDrvSizeT len, char** rbuf, ErlDrvSizeT rsize)
12269 {
12270     ErlDrvSSizeT replen;
12271     udp_descriptor * udesc = (udp_descriptor *) e;
12272     inet_descriptor* desc  = INETP(udesc);
12273     int type = SOCK_DGRAM;
12274     int af = AF_INET;
12275 
12276     cmd -= ERTS_INET_DRV_CONTROL_MAGIC_NUMBER;
12277     switch(cmd) {
12278     case INET_REQ_OPEN:   /* open socket and return internal index */
12279 	DEBUGF(("packet_inet_ctl(%ld): OPEN\r\n", (long)desc->port));
12280 	if (len != 2) {
12281 	    return ctl_error(EINVAL, rbuf, rsize);
12282 	}
12283 	switch (buf[0]) {
12284 	case INET_AF_INET:  af = AF_INET; break;
12285 #if defined(HAVE_IN6) && defined(AF_INET6)
12286 	case INET_AF_INET6: af = AF_INET6; break;
12287 #endif
12288 #ifdef HAVE_SYS_UN_H
12289 	case INET_AF_LOCAL: af = AF_UNIX; break;
12290 #endif
12291 	default:
12292 	    return ctl_xerror(str_eafnosupport, rbuf, rsize);
12293 	}
12294 	switch (buf[1]) {
12295 	case INET_TYPE_STREAM: type = SOCK_STREAM; break;
12296 	case INET_TYPE_DGRAM: type = SOCK_DGRAM; break;
12297 #ifdef HAVE_SCTP
12298 	case INET_TYPE_SEQPACKET: type = SOCK_SEQPACKET; break;
12299 #endif
12300 	default:
12301 	    return ctl_error(EINVAL, rbuf, rsize);
12302 	}
12303 	replen = inet_ctl_open(desc, af, type, rbuf, rsize);
12304 
12305 	if ((*rbuf)[0] != INET_REP_ERROR) {
12306 	    if (desc->active)
12307 		sock_select(desc,FD_READ,1);
12308 #ifdef HAVE_SO_BSDCOMPAT
12309 	    /*
12310 	     * Make sure that sending UDP packets to a non existing port on an
12311 	     * existing machine doesn't close the socket. (Linux behaves this
12312 	     * way)
12313 	     */
12314 	    if (should_use_so_bsdcompat()) {
12315 		int one = 1;
12316 		/* Ignore errors */
12317 		sock_setopt(desc->s, SOL_SOCKET, SO_BSDCOMPAT, &one,
12318 			    sizeof(one));
12319 	    }
12320 #endif
12321 	}
12322 	return replen;
12323 
12324 
12325     case INET_REQ_FDOPEN: {  /* pass in an open (and optionally bound) socket */
12326 	SOCKET s;
12327         int bound;
12328 	DEBUGF(("packet inet_ctl(%ld): FDOPEN\r\n", (long)desc->port));
12329 	if (len != 6 && len != 10) {
12330 	    return ctl_error(EINVAL, rbuf, rsize);
12331 	}
12332 	switch (buf[0]) {
12333 	case INET_AF_INET:  af = AF_INET; break;
12334 #if defined(HAVE_IN6) && defined(AF_INET6)
12335 	case INET_AF_INET6: af = AF_INET6; break;
12336 #endif
12337 #ifdef HAVE_SYS_UN_H
12338 	case INET_AF_LOCAL: af = AF_UNIX; break;
12339 #endif
12340 	default:
12341 	    return ctl_xerror(str_eafnosupport, rbuf, rsize);
12342 	}
12343 	switch (buf[1]) {
12344 	case INET_TYPE_STREAM: type = SOCK_STREAM; break;
12345 	case INET_TYPE_DGRAM: type = SOCK_DGRAM; break;
12346 #ifdef HAVE_SCTP
12347 	case INET_TYPE_SEQPACKET: type = SOCK_SEQPACKET; break;
12348 #endif
12349 	default:
12350 	    return ctl_error(EINVAL, rbuf, rsize);
12351 	}
12352 	s = (SOCKET)get_int32(buf+2);
12353 
12354         if (len == 6) bound = 1;
12355         else bound = get_int32(buf+2+4);
12356 
12357 	replen = inet_ctl_fdopen(desc, af, type, s, bound, rbuf, rsize);
12358 
12359 	if ((*rbuf)[0] != INET_REP_ERROR) {
12360 	    if (desc->active)
12361 		sock_select(desc,FD_READ,1);
12362 #ifdef HAVE_SO_BSDCOMPAT
12363 	    /*
12364 	     * Make sure that sending UDP packets to a non existing port on an
12365 	     * existing machine doesn't close the socket. (Linux behaves this
12366 	     * way)
12367 	     */
12368 	    if (should_use_so_bsdcompat()) {
12369 		int one = 1;
12370 		/* Ignore errors */
12371 		sock_setopt(desc->s, SOL_SOCKET, SO_BSDCOMPAT, &one,
12372 			    sizeof(one));
12373 	    }
12374 #endif
12375 	}
12376 	return replen;
12377     }
12378 
12379 
12380     case INET_REQ_CLOSE:
12381 	DEBUGF(("packet_inet_ctl(%ld): CLOSE\r\n", (long)desc->port));
12382 	erl_inet_close(desc);
12383 	return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
12384 
12385 
12386     case INET_REQ_CONNECT:  {
12387 	/* UDP and SCTP connect operations are completely different. UDP
12388 	   connect means only setting the default peer addr locally,  so
12389 	   it is always synchronous. SCTP connect means actual establish-
12390 	   ing of an SCTP association with a remote peer, so it is async-
12391 	   ronous, and similar to TCP connect. However, unlike TCP, SCTP
12392 	   allows the socket to have multiple simultaneous associations:
12393 	*/
12394 	int code;
12395 	char tbuf[2];
12396 #ifdef HAVE_SCTP
12397 	unsigned timeout;
12398 #endif
12399 	DEBUGF(("packet_inet_ctl(%ld): CONNECT\r\n", (long)desc->port));
12400 
12401 	/* INPUT: [ Timeout(4), Port(2), Address(N) ] */
12402 
12403 	if (!IS_OPEN(desc))
12404 	    return ctl_xerror(EXBADPORT, rbuf, rsize);
12405 
12406 #ifdef HAVE_SCTP
12407 	if (IS_SCTP(desc)) {
12408 	    inet_address remote;
12409 	    char *xerror;
12410 
12411 	    if (IS_CONNECTING(desc))
12412 		return ctl_error(EINVAL, rbuf, rsize);
12413 	    if (len < 6)
12414 		return ctl_error(EINVAL, rbuf, rsize);
12415 	    timeout = get_int32(buf);
12416 	    buf += 4;
12417 	    len -= 4;
12418 
12419 	    /* For SCTP, we do not set the peer's addr in desc->remote, as
12420 	       multiple peers are possible: */
12421 	    if ((xerror = inet_set_faddress
12422 		 (desc->sfamily, &remote, &buf, &len)) != NULL)
12423 	        return ctl_xerror(xerror, rbuf, rsize);
12424 
12425 	    code = sock_connect(desc->s, &remote.sa, len);
12426 
12427 	    if (IS_SOCKET_ERROR(code) && (sock_errno() == EINPROGRESS)) {
12428 		/* XXX: Unix only -- WinSock would have a different cond! */
12429 		if (timeout != INET_INFINITY)
12430 		    driver_set_timer(desc->port, timeout);
12431 		enq_async(desc, tbuf, INET_REQ_CONNECT);
12432 		async_ok(desc);
12433 	    }
12434 	    else if (code == 0) { /* OK we are connected */
12435 		enq_async(desc, tbuf, INET_REQ_CONNECT);
12436 		async_ok(desc);
12437 	    }
12438 	    else {
12439 		return ctl_error(sock_errno(), rbuf, rsize);
12440 	    }
12441 	    return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
12442 	}
12443 #endif
12444 	/* UDP */
12445 	if (len == 0) {
12446 	    /* What does it mean???  NULL sockaddr??? */
12447 	    sock_connect(desc->s, (struct sockaddr*) NULL, 0);
12448 	    desc->state &= ~INET_F_ACTIVE;
12449 	    enq_async(desc, tbuf, INET_REQ_CONNECT);
12450 	    async_ok (desc);
12451 	}
12452 	else if (len < 6)
12453 	    return ctl_error(EINVAL, rbuf, rsize);
12454 	else {
12455 	    char *xerror;
12456 	    /* Ignore timeout */
12457 	    buf += 4;
12458 	    len -= 4;
12459 	    if ((xerror = inet_set_faddress
12460 		 (desc->sfamily, &desc->remote, &buf, &len)) != NULL)
12461 	        return ctl_xerror(xerror, rbuf, rsize);
12462 
12463 	    code = sock_connect(desc->s,
12464 				(struct sockaddr*) &desc->remote, len);
12465 	    if (IS_SOCKET_ERROR(code)) {
12466 		sock_connect(desc->s, (struct sockaddr*) NULL, 0);
12467 		desc->state &= ~INET_F_ACTIVE;
12468 		return ctl_error(sock_errno(), rbuf, rsize);
12469 	    }
12470 	    else /* ok we are connected */ {
12471 		enq_async(desc, tbuf, INET_REQ_CONNECT);
12472 		desc->state |= INET_F_ACTIVE;
12473 		async_ok (desc);
12474 	    }
12475 	}
12476 	return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
12477     }
12478 
12479 #ifdef HAVE_SCTP
12480     case INET_REQ_LISTEN:
12481 	{	/* LISTEN is only for SCTP sockets, not UDP. This code is borrowed
12482 		   from the TCP section. Returns: {ok,[]} on success.
12483 		*/
12484 	    int backlog;
12485 
12486 	    DEBUGF(("packet_inet_ctl(%ld): LISTEN\r\n", (long)desc->port));
12487 	    if (!IS_SCTP(desc))
12488 		return ctl_xerror(EXBADPORT, rbuf, rsize);
12489 	    if (!IS_OPEN(desc))
12490 		return ctl_xerror(EXBADPORT, rbuf, rsize);
12491 
12492 	    if (len != 2)
12493 		return ctl_error(EINVAL, rbuf, rsize);
12494 	    backlog = get_int16(buf);
12495 
12496 	    if (IS_SOCKET_ERROR(sock_listen(desc->s, backlog)))
12497 		return ctl_error(sock_errno(), rbuf, rsize);
12498 
12499 	    desc->state = INET_STATE_LISTENING;   /* XXX: not used? */
12500 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
12501 	}
12502 
12503     case SCTP_REQ_BINDX:
12504 	{   /* Multi-homing bind for SCTP: */
12505 	    /* Add additional addresses by calling sctp_bindx with one address
12506 	       at a time, since this is what some OSes promise will work.
12507 	       Buff structure: Flags(1), ListItem,...:
12508 	    */
12509 	    inet_address addr;
12510 	    char* curr;
12511 	    int   add_flag, rflag;
12512 
12513 	    if (!IS_SCTP(desc))
12514 		return ctl_xerror(EXBADPORT, rbuf, rsize);
12515 
12516 	    curr = buf;
12517 	    add_flag = get_int8(curr);
12518 	    curr++;
12519 
12520 	    /* Make the real flags: */
12521 	    rflag = add_flag ? SCTP_BINDX_ADD_ADDR : SCTP_BINDX_REM_ADDR;
12522 
12523 	    while (curr < buf+len)
12524 		{
12525 		    char *xerror;
12526 		    /* List item format: see "inet_set_faddress": */
12527 		    ErlDrvSizeT alen  = buf + len - curr;
12528 		    xerror = inet_set_faddress
12529 		      (desc->sfamily, &addr, &curr, &alen);
12530 		    if (xerror != NULL)
12531 		        return ctl_xerror(xerror, rbuf, rsize);
12532 
12533 		    /* Invoke the call: */
12534 		    if (p_sctp_bindx(desc->s, (struct sockaddr *)&addr, 1,
12535 				     rflag) < 0)
12536 			return ctl_error(sock_errno(), rbuf, rsize);
12537 		}
12538 
12539 	    desc->state = INET_STATE_OPEN;
12540 
12541 	    return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
12542 	}
12543 
12544     case SCTP_REQ_PEELOFF:
12545 	{
12546 	    Uint32 assoc_id;
12547 	    udp_descriptor* new_udesc;
12548 	    int err;
12549 	    SOCKET new_socket;
12550 
12551 	    DEBUGF(("packet_inet_ctl(%ld): PEELOFF\r\n", (long)desc->port));
12552 	    if (!IS_SCTP(desc))
12553 		return ctl_xerror(EXBADPORT, rbuf, rsize);
12554 	    if (!IS_OPEN(desc))
12555 		return ctl_xerror(EXBADPORT, rbuf, rsize);
12556 	    if (! p_sctp_peeloff)
12557 		return ctl_error(ENOTSUP, rbuf, rsize);
12558 
12559 	    if (len != 4)
12560 		return ctl_error(EINVAL, rbuf, rsize);
12561 	    assoc_id = get_int32(buf);
12562 
12563 	    new_socket = p_sctp_peeloff(desc->s, assoc_id);
12564 	    if (IS_SOCKET_ERROR(new_socket)) {
12565 		return ctl_error(sock_errno(), rbuf, rsize);
12566 	    }
12567 
12568 	    desc->caller = driver_caller(desc->port);
12569 	    if ((new_udesc = sctp_inet_copy(udesc, new_socket, &err)) == NULL) {
12570 		sock_close(new_socket);
12571 		desc->caller = 0;
12572 		return ctl_error(err, rbuf, rsize);
12573 	    }
12574 	    new_udesc->inet.state = INET_STATE_CONNECTED;
12575 	    new_udesc->inet.stype = SOCK_STREAM;
12576 	    SET_NONBLOCKING(new_udesc->inet.s);
12577 
12578 	    inet_reply_ok_port(desc, new_udesc->inet.dport);
12579 	    (*rbuf)[0] = INET_REP;
12580 	    return 1;
12581 	}
12582 #endif  /* HAVE_SCTP */
12583 
12584     case PACKET_REQ_RECV:
12585 	{	/* THIS IS A FRONT-END for "recv*" requests. It only enqueues the
12586 		   request  and possibly returns the data  immediately available.
12587 		   The actual data returning function is the back-end ("*input"):
12588 		*/
12589 	    unsigned timeout;
12590 	    char tbuf[2];
12591 
12592 	    DEBUGF(("packet_inet_ctl(%ld): RECV\r\n", (long)desc->port));
12593 	    /* INPUT: Timeout(4), Length(4) */
12594 	    if (!IS_OPEN(desc))
12595 		return ctl_xerror(EXBADPORT, rbuf, rsize);
12596 	    if (desc->active || (len != 8))
12597 		return ctl_error(EINVAL, rbuf, rsize);
12598 	    timeout = get_int32(buf);
12599 	    /* The 2nd arg, Length(4), is ignored for both UDP and SCTP protocols,
12600 	       since they are msg-oriented. */
12601 
12602 	    if (enq_async(desc, tbuf, PACKET_REQ_RECV) < 0)
12603 		return ctl_error(EALREADY, rbuf, rsize);
12604 
12605 	    if (packet_inet_input(udesc, desc->event) == 0) {
12606 		if (timeout == 0)
12607 		    async_error_am(desc, am_timeout);
12608 		else {
12609 		    if (timeout != INET_INFINITY)
12610 			driver_set_timer(desc->port, timeout);
12611 		}
12612 	    }
12613 	    return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize);
12614 	}
12615 
12616     default:
12617 	/* Delegate the request to the INET layer. In particular,
12618 	   INET_REQ_BIND goes here. If the req is not recognised
12619 	   there either, an error is returned:
12620 	*/
12621 	return inet_ctl(desc, cmd, buf, len, rbuf, rsize);
12622     }
12623 }
12624 
packet_inet_timeout(ErlDrvData e)12625 static void packet_inet_timeout(ErlDrvData e)
12626 {
12627     udp_descriptor  * udesc = (udp_descriptor*) e;
12628     inet_descriptor * desc  = INETP(udesc);
12629     if (!(desc->active)) {
12630 	sock_select(desc, FD_READ, 0);
12631         async_error_am (desc, am_timeout);
12632     } else {
12633         (void)packet_inet_input(udesc, (HANDLE) desc->s);
12634     }
12635 }
12636 
12637 
12638 /* THIS IS A "send*" REQUEST; on the Erlang side: "port_command".
12639 ** input should be: Family Address buffer .
12640 ** For UDP,  buffer (after Address) is just data to be sent.
12641 ** For SCTP, buffer contains a list representing 2 items:
12642 **   (1) 6 parms for sctp_sndrcvinfo, as in sctp_get_sendparams();
12643 **   (2) 0+ real data bytes.
12644 ** There is no destination address -- SCTP send is performed over
12645 ** an existing association, using "sctp_sndrcvinfo" specified.
12646 */
packet_inet_command(ErlDrvData e,char * buf,ErlDrvSizeT len)12647 static void packet_inet_command(ErlDrvData e, char* buf, ErlDrvSizeT len)
12648 {
12649     udp_descriptor * udesc= (udp_descriptor*) e;
12650     inet_descriptor* desc = INETP(udesc);
12651     char* ptr		  = buf;
12652     char* qtr;
12653     char* xerror;
12654     ErlDrvSizeT sz;
12655     int code;
12656     inet_address other;
12657 
12658     desc->caller = driver_caller(desc->port);
12659 
12660     if (!IS_OPEN(desc)) {
12661 	inet_reply_error(desc, EINVAL);
12662 	return;
12663     }
12664 
12665 #ifdef HAVE_SCTP
12666     if (IS_SCTP(desc))
12667     {
12668 	ErlDrvSizeT   data_len;
12669 	struct iovec  iov[1];		 /* For real data              */
12670 	struct msghdr mhdr;		 /* Message wrapper            */
12671 	struct sctp_sndrcvinfo *sri;     /* The actual ancillary data  */
12672 	union {                          /* For ancillary data         */
12673 	    struct cmsghdr hdr;
12674 	    char ancd[CMSG_SPACE(sizeof(*sri))];
12675 	} cmsg;
12676 
12677 	if (len < SCTP_GET_SENDPARAMS_LEN) goto return_einval;
12678 
12679 	/* The ancillary data */
12680 	sri = (struct sctp_sndrcvinfo *) (CMSG_DATA(&cmsg.hdr));
12681 	/* Get the "sndrcvinfo" from the buffer, advancing the "ptr": */
12682 	ptr  = sctp_get_sendparams(sri, ptr);
12683 
12684 	/* The ancillary data wrapper */
12685 	cmsg.hdr.cmsg_level = IPPROTO_SCTP;
12686 	cmsg.hdr.cmsg_type  = SCTP_SNDRCV;
12687 	cmsg.hdr.cmsg_len   = CMSG_LEN(sizeof(*sri));
12688 
12689 	data_len = (buf + len) - ptr;
12690 	/* The whole msg.
12691 	 * Solaris (XPG 4.2) requires iovlen >= 1 even for data_len == 0.
12692 	 */
12693 	mhdr.msg_name           = NULL;	        /* Already connected  */
12694 	mhdr.msg_namelen        = 0;
12695 	iov[0].iov_len          = data_len;
12696 	iov[0].iov_base         = ptr;          /* The real data */
12697 	mhdr.msg_iov            = iov;
12698 	mhdr.msg_iovlen         = 1;
12699 	mhdr.msg_control        = cmsg.ancd;    /* For ancillary data  */
12700 	mhdr.msg_controllen     = cmsg.hdr.cmsg_len;
12701 	VALGRIND_MAKE_MEM_DEFINED(mhdr.msg_control, mhdr.msg_controllen); /*suppress "uninitialised bytes"*/
12702 	mhdr.msg_flags          = 0;            /* Not used with "sendmsg"   */
12703 
12704 	inet_output_count(desc, data_len);
12705 	/* Now do the actual sending. NB: "flags" in "sendmsg" itself are NOT
12706 	   used: */
12707 	code = sock_sendmsg(desc->s, &mhdr, 0);
12708 	goto check_result_code;
12709     }
12710 #endif
12711     {
12712         ErlDrvSizeT anc_len;
12713 
12714         /* UDP socket. Even if it is connected, there is an address prefix
12715            here -- ignored for connected sockets: */
12716         sz = len;
12717         qtr = ptr;
12718         xerror = inet_set_faddress(desc->sfamily, &other, &qtr, &sz);
12719         if (xerror != NULL) {
12720             inet_reply_error_am(desc, driver_mk_atom(xerror));
12721             return;
12722         }
12723         len -= (qtr - ptr);
12724         ptr = qtr;
12725 
12726         /* Here comes ancillary data */
12727         if (len < 4) goto return_einval;
12728         anc_len = get_int32(ptr);
12729         len -= 4; ptr += 4;
12730         if (len < anc_len) goto return_einval;
12731 
12732         if (anc_len == 0 && !!0/*XXX-short-circuit-for-testing*/) {
12733             /* Empty ancillary data */
12734             /* Now "ptr" is the user data ptr, "len" is data length: */
12735             inet_output_count(desc, len);
12736             if (desc->state & INET_F_ACTIVE) {
12737                 /* connected (ignore address) */
12738                 code = sock_send(desc->s, ptr, len, 0);
12739             }
12740             else {
12741                 code = sock_sendto(desc->s, ptr, len, 0, &other.sa, sz);
12742             }
12743         }
12744         else {
12745 #ifdef __WIN32__
12746             goto return_einval; /* Can not send ancillary data on Windows */
12747 #else
12748             struct iovec iov[1];
12749             struct msghdr mhdr;
12750             union { /* For ancillary data         */
12751                 struct cmsghdr hdr;
12752                 char ancd[SIZEOF_ANCILLARY_DATA];
12753             } cmsg;
12754             sys_memset(&iov, '\0', sizeof(iov));
12755             sys_memset(&mhdr, '\0', sizeof(mhdr));
12756             sys_memset(&cmsg, '\0', sizeof(cmsg));
12757             if (desc->state & INET_F_ACTIVE) {
12758                 /* connected (ignore address) */
12759                 mhdr.msg_name = NULL;
12760                 mhdr.msg_namelen = 0;
12761             }
12762             else {
12763                 mhdr.msg_name = &other;
12764                 mhdr.msg_namelen = sz;
12765             }
12766             mhdr.msg_control = cmsg.ancd;
12767             mhdr.msg_controllen = sizeof(cmsg.ancd);
12768             if (compile_ancillary_data(&mhdr, ptr, anc_len) != 0) {
12769                 goto return_einval;
12770             }
12771             if (mhdr.msg_controllen == 0) {
12772                 /* XXX Testing - only possible for anc_len == 0 */
12773                 mhdr.msg_control = NULL;
12774             }
12775             len -= anc_len;
12776             ptr += anc_len;
12777             /* Now "ptr" is the user data ptr, "len" is data length: */
12778             iov[0].iov_len = len;
12779             iov[0].iov_base = ptr;
12780             mhdr.msg_iov = iov;
12781             mhdr.msg_iovlen = 1;
12782             mhdr.msg_flags = 0;
12783             inet_output_count(desc, len);
12784             code = sock_sendmsg(desc->s, &mhdr, 0);
12785 #endif
12786         }
12787     }
12788 
12789 #ifdef HAVE_SCTP
12790  check_result_code:
12791     /* "code" analysis is the same for both SCTP and UDP cases above: */
12792 #endif
12793     if (IS_SOCKET_ERROR(code)) {
12794 	int err = sock_errno();
12795 	inet_reply_error(desc, err);
12796     }
12797     else
12798 	inet_reply_ok(desc);
12799     return;
12800 
12801  return_einval:
12802     inet_reply_error(desc, EINVAL);
12803     return;
12804 }
12805 
12806 #endif /* HAVE_UDP */
12807 
12808 
12809 #ifdef __WIN32__
packet_inet_event(ErlDrvData e,ErlDrvEvent event)12810 static void packet_inet_event(ErlDrvData e, ErlDrvEvent event)
12811 {
12812     udp_descriptor * udesc = (udp_descriptor*)e;
12813     inet_descriptor* desc  = INETP(udesc);
12814     WSANETWORKEVENTS netEv;
12815 
12816     if ((WSAEnumNetworkEvents)(desc->s, desc->event, &netEv) != 0) {
12817 	DEBUGF(( "port %d: EnumNetwrokEvents = %d\r\n",
12818 		desc->port, sock_errno() ));
12819 	return; /* -1; */
12820     }
12821     netEv.lNetworkEvents |= desc->forced_events;
12822     if (netEv.lNetworkEvents & FD_READ) {
12823 	packet_inet_input(udesc, (HANDLE)event);
12824     }
12825 }
12826 
12827 #endif
12828 
12829 #ifdef HAVE_UDP
packet_inet_drv_input(ErlDrvData e,ErlDrvEvent event)12830 static void packet_inet_drv_input(ErlDrvData e, ErlDrvEvent event)
12831 {
12832     (void)  packet_inet_input((udp_descriptor*)e, (HANDLE)event);
12833 }
12834 
12835 /*
12836 ** THIS IS A BACK-END FOR "recv*" REQUEST, which actually receives the
12837 **	data requested, and delivers them to the caller:
12838 */
packet_inet_input(udp_descriptor * udesc,HANDLE event)12839 static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
12840 {
12841     inet_descriptor* desc = INETP(udesc);
12842     int n;
12843     inet_address other;
12844     char abuf[sizeof(inet_address)];  /* buffer address; enough??? */
12845     int packet_count = udesc->read_packets;
12846     int count = 0;     /* number of packets delivered to owner */
12847 #ifndef __WIN32__
12848     struct msghdr mhdr;	  	     /* Top-level msg structure    */
12849     struct iovec  iov[1]; 	     /* Data or Notification Event */
12850     char   ancd[ANC_BUFF_SIZE];      /* Ancillary Data		   */
12851 #endif
12852 #ifdef HAVE_SCTP
12853     int short_recv = 0;
12854 #endif
12855 
12856     while(packet_count--) {
12857 	unsigned int len = sizeof(other);
12858 
12859 	sys_memzero((char *) &other, sizeof(other));
12860 
12861 	/* udesc->i_buf is only kept between SCTP fragments */
12862 #ifdef HAVE_SCTP
12863 	if (udesc->i_buf != NULL) {
12864             ErlDrvBinary* tmp;
12865 	    int bufsz;
12866             ASSERT(IS_SCTP(desc));
12867 	    bufsz = desc->bufsz + (udesc->i_ptr - udesc->i_buf->orig_bytes);
12868 	    if ((tmp = realloc_buffer(udesc->i_buf, bufsz)) == NULL) {
12869 		release_buffer(udesc->i_buf);
12870 		udesc->i_buf = NULL;
12871 		return packet_error(udesc, ENOMEM);
12872 	    } else {
12873 		udesc->i_ptr =
12874 		    tmp->orig_bytes + (udesc->i_ptr - udesc->i_buf->orig_bytes);
12875 		udesc->i_buf = tmp;
12876 		udesc->i_bufsz = bufsz;
12877 	    }
12878 	} else
12879 #endif
12880         {
12881             ASSERT(udesc->i_buf == NULL);
12882 	    udesc->i_bufsz = desc->bufsz + len;
12883 	    if ((udesc->i_buf = alloc_buffer(udesc->i_bufsz)) == NULL)
12884 		return packet_error(udesc, ENOMEM);
12885 	    /* pointer to message start */
12886 	    udesc->i_ptr = udesc->i_buf->orig_bytes + len;
12887 	}
12888 
12889 	/* Note: On Windows NT, recvfrom() fails if the socket is connected. */
12890 #ifdef HAVE_SCTP
12891 	/* For SCTP we must use recvmsg() */
12892 	if (IS_SCTP(desc)) {
12893 	    iov->iov_base = udesc->i_ptr; /* Data will come here    */
12894 	    iov->iov_len = desc->bufsz; /* Remaining buffer space */
12895 
12896 	    mhdr.msg_name	= &other; /* Peer addr comes into "other" */
12897 	    mhdr.msg_namelen	= len;
12898 	    mhdr.msg_iov	= iov;
12899 	    mhdr.msg_iovlen	= 1;
12900 	    mhdr.msg_control	= ancd;
12901 	    mhdr.msg_controllen	= ANC_BUFF_SIZE;
12902 	    mhdr.msg_flags	= 0;	   /* To be filled by "recvmsg"    */
12903 
12904 	    /* Do the actual SCTP receive: */
12905 	    n = sock_recvmsg(desc->s, &mhdr, 0);
12906 	    len = mhdr.msg_namelen;
12907 	    goto check_result;
12908 	}
12909 #endif
12910 	/* Use recv() instead on connected sockets. */
12911 	if ((desc->state & INET_F_ACTIVE)) {
12912 	    n = sock_recv(desc->s, udesc->i_ptr, desc->bufsz, 0);
12913 	    other = desc->remote;
12914 	    goto check_result;
12915 	}
12916 #ifndef __WIN32__
12917         /* recvmsg() does not exist in the Winsock API */
12918         if (desc->recv_cmsgflags) {
12919             /* Use recvmsg() */
12920             iov->iov_base = udesc->i_ptr;
12921             iov->iov_len = desc->bufsz;
12922             mhdr.msg_name = &other;
12923             mhdr.msg_namelen = len;
12924             mhdr.msg_iov = iov;
12925             mhdr.msg_iovlen = 1;
12926 	    mhdr.msg_control = ancd;
12927             mhdr.msg_controllen = ANC_BUFF_SIZE;
12928             mhdr.msg_flags = 0;
12929             n = sock_recvmsg(desc->s, &mhdr, 0);
12930             len = mhdr.msg_namelen;
12931             goto check_result;
12932         }
12933 #endif
12934 	n = sock_recvfrom(desc->s, udesc->i_ptr, desc->bufsz,
12935 			  0, &other.sa, &len);
12936     check_result:
12937 	/* Analyse the result: */
12938 	if (IS_SOCKET_ERROR(n)) {
12939 	    int err = sock_errno();
12940 	    if (err != ERRNO_BLOCK) {
12941 		/* real error */
12942 		release_buffer(udesc->i_buf);
12943 		udesc->i_buf = NULL;
12944 		if (!desc->active) {
12945 		    async_error(desc, err);
12946                     driver_cancel_timer(desc->port);
12947 		    sock_select(desc,FD_READ,0);
12948 		}
12949 		else {
12950 		    /* This is for an active desc only: */
12951 		    packet_error_message(udesc, err);
12952 		}
12953 		return count;
12954 	    }
12955 	    /* would block error - try again */
12956 	    if (!desc->active
12957 #ifdef HAVE_SCTP
12958 		|| short_recv
12959 #endif
12960 		) {
12961 		sock_select(desc,FD_READ,1);
12962 	    }
12963 #ifdef HAVE_SCTP
12964             if (!short_recv) {
12965 #endif
12966                 release_buffer(udesc->i_buf);
12967                 udesc->i_buf = NULL;
12968 #ifdef HAVE_SCTP
12969             }
12970 #endif
12971 	    return count;		/* strange, not ready */
12972 	}
12973 
12974 #ifdef HAVE_SCTP
12975 	if (IS_SCTP(desc) && (short_recv = !(mhdr.msg_flags & MSG_EOR))) {
12976 	    /* SCTP non-final message fragment */
12977 	    inet_input_count(desc, n);
12978 	    udesc->i_ptr += n;
12979 	    continue; /* wait for more fragments */
12980 	}
12981 #endif
12982 
12983 	{
12984 	    /* message received */
12985 	    int code;
12986             void *mp;
12987 	    char * ptr;
12988 	    int nsz;
12989 
12990 	    inet_input_count(desc, n);
12991 	    udesc->i_ptr += n;
12992 	    inet_get_address(abuf, &other, &len);
12993 	    /* Copy formatted address to the buffer allocated; "len" is the
12994 	       actual length which must be <= than the original reserved.
12995 	       This means that the addr + data in the buffer are contiguous,
12996 	       but they may start not at the "orig_bytes", instead at "ptr":
12997 	    */
12998 	    ASSERT (len <= sizeof(other));
12999 	    ptr = udesc->i_buf->orig_bytes + sizeof(other) - len;
13000 	    sys_memcpy(ptr, abuf, len);
13001 
13002 	    nsz = udesc->i_ptr - ptr;
13003 
13004 	    /* Check if we need to reallocate binary */
13005 	    if ((desc->mode == INET_MODE_BINARY)
13006 		&& (desc->hsz < (nsz - len))
13007 		&& (nsz + BIN_REALLOC_MARGIN(desc->bufsz) < udesc->i_bufsz)) {
13008 		ErlDrvBinary* tmp;
13009 		int bufsz;
13010 		bufsz = udesc->i_ptr - udesc->i_buf->orig_bytes;
13011 		if ((tmp = realloc_buffer(udesc->i_buf, bufsz)) != NULL) {
13012 		    udesc->i_buf = tmp;
13013 		    udesc->i_bufsz = bufsz;
13014 		    udesc->i_ptr = NULL;  /* not used from here */
13015 		}
13016 	    }
13017             mp = NULL;
13018 #ifdef HAVE_SCTP
13019 	    if (IS_SCTP(desc)) mp = &mhdr;
13020 #endif
13021 #ifndef __WIN32__
13022             if (desc->recv_cmsgflags) mp = &mhdr;
13023 #endif
13024 	    /* Actual parsing and return of the data received, occur here: */
13025 	    code = packet_reply_binary_data(desc, len, udesc->i_buf,
13026 					    (sizeof(other) - len),
13027 					    nsz,
13028 					    mp);
13029 	    free_buffer(udesc->i_buf);
13030 	    udesc->i_buf = NULL;
13031 	    if (code < 0)
13032 		return count;
13033 	    count++;
13034 	    if (!desc->active) {
13035                 driver_cancel_timer(desc->port);
13036 		sock_select(desc,FD_READ,0);
13037 		return count;  /* passive mode (read one packet only) */
13038 	    }
13039 	}
13040     } /*  while(packet_count--) { */
13041 
13042     /* we ran out of tries (packet_count) either on an active socket
13043      * that got that many messages or an SCTP socket that got that
13044      * many message fragments but still not the final
13045      */
13046 #ifdef HAVE_SCTP
13047     if (short_recv) {
13048 	sock_select(desc, FD_READ, 1);
13049     }
13050 #endif
13051 
13052     /* We set a timer on the port to trigger now.
13053        This emulates a "yield" operation as that is
13054        what we want to do here. We do *NOT* do a deselect
13055        as that is expensive, instead we check if the
13056        socket it still active when the timeout triggers
13057        and if it is not, then we just ignore the timeout */
13058     driver_set_timer(desc->port, 0);
13059 
13060     return count;
13061 }
13062 
13063 #endif
13064 
13065 /*---------------------------------------------------------------------------*/
13066 
13067 #ifdef __WIN32__
13068 
13069 /*
13070  * Although we no longer need to lookup all of winsock2 dynamically,
13071  * there are still some function(s) we need to look up.
13072  */
find_dynamic_functions(void)13073 static void find_dynamic_functions(void)
13074 {
13075     char kernel_dll_name[] = "kernel32";
13076     HMODULE module;
13077     module = GetModuleHandle(kernel_dll_name);
13078     fpSetHandleInformation = (module != NULL) ?
13079 	(BOOL (WINAPI *)(HANDLE,DWORD,DWORD))
13080 	    GetProcAddress(module,"SetHandleInformation") :
13081 	NULL;
13082 }
13083 
13084 
13085 
13086 /*
13087  * We must make sure that the socket handles are not inherited
13088  * by port programs (if there are inherited, the sockets will not
13089  * get closed when the emulator terminates, and epmd and other Erlang
13090  * nodes will not notice that we have exited).
13091  *
13092  * XXX It is not clear whether this works/is necessary in Windows 95.
13093  * There could also be problems with Winsock implementations from other
13094  * suppliers than Microsoft.
13095  */
13096 
13097 static SOCKET
make_noninheritable_handle(SOCKET s)13098 make_noninheritable_handle(SOCKET s)
13099 {
13100     if (s != INVALID_SOCKET) {
13101 	if (fpSetHandleInformation != NULL) {
13102 	    (*fpSetHandleInformation)((HANDLE) s, HANDLE_FLAG_INHERIT, 0);
13103 	} else {
13104 	    HANDLE non_inherited;
13105 	    HANDLE this_process = GetCurrentProcess();
13106 	    if (DuplicateHandle(this_process, (HANDLE) s,
13107 				this_process, &non_inherited, 0,
13108 				FALSE, DUPLICATE_SAME_ACCESS)) {
13109 		sock_close(s);
13110 		s = (SOCKET) non_inherited;
13111 	    }
13112 	}
13113     }
13114     return s;
13115 }
13116 
13117 #endif  /* UDP for __WIN32__ */
13118 
13119 /*
13120  * Multi-timers
13121  */
13122 
fire_multi_timers(tcp_descriptor * desc,ErlDrvPort port,ErlDrvData data)13123 static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
13124 			      ErlDrvData data)
13125 {
13126     ErlDrvTime next_timeout = 0;
13127     if (!desc->mtd) {
13128         ASSERT(0);
13129         return;
13130     }
13131 #ifdef DEBUG
13132     {
13133 	ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC);
13134 	ASSERT(chk >= desc->mtd->when);
13135     }
13136 #endif
13137     do {
13138 	MultiTimerData save = *desc->mtd;
13139 
13140         /* We first remove the timer so that the timeout_functions has
13141            can call clean_multi_timers without breaking anything */
13142         if (desc->mtd_cache == NULL) {
13143             desc->mtd_cache = desc->mtd;
13144         } else {
13145             FREE(desc->mtd);
13146         }
13147 
13148         desc->mtd = save.next;
13149         if (desc->mtd != NULL)
13150             desc->mtd->prev = NULL;
13151 
13152 	(*(save.timeout_function))(data,save.caller);
13153 
13154         if (desc->mtd == NULL)
13155 	    return;
13156 
13157 	next_timeout = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
13158     } while (next_timeout <= 0);
13159 
13160     driver_set_timer(port, (unsigned long) next_timeout);
13161 }
13162 
clean_multi_timers(tcp_descriptor * desc,ErlDrvPort port)13163 static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port)
13164 {
13165     if (desc->mtd) {
13166 	driver_cancel_timer(port);
13167     }
13168     while (desc->mtd) {
13169 	MultiTimerData *p = desc->mtd;
13170 	desc->mtd = p->next;
13171         FREE(p);
13172     }
13173     desc->mtd = NULL;
13174     if (desc->mtd_cache) {
13175         FREE(desc->mtd_cache);
13176         desc->mtd_cache = NULL;
13177     }
13178 }
remove_multi_timer(tcp_descriptor * desc,ErlDrvPort port,MultiTimerData * p)13179 static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p)
13180 {
13181     if (p->prev != NULL) {
13182 	p->prev->next = p->next;
13183     } else {
13184 	driver_cancel_timer(port);
13185 	desc->mtd = p->next;
13186 	if (desc->mtd) {
13187 	    ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
13188 	    if (ntmo < 0)
13189 		ntmo = 0;
13190 	    driver_set_timer(port, (unsigned long) ntmo);
13191 	}
13192     }
13193     if (p->next != NULL) {
13194 	p->next->prev = p->prev;
13195     }
13196     if (desc->mtd_cache == NULL)
13197         desc->mtd_cache = p;
13198     else
13199         FREE(p);
13200 }
13201 
13202 /* Cancel a timer based on the timeout_fun */
cancel_multi_timer(tcp_descriptor * desc,ErlDrvPort port,void (* timeout_fun)(ErlDrvData drv_data,ErlDrvTermData caller))13203 static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
13204                                void (*timeout_fun)(ErlDrvData drv_data,
13205                                                    ErlDrvTermData caller))
13206 {
13207     MultiTimerData *timer = desc->mtd;
13208     while(timer && timer->timeout_function != timeout_fun) {
13209         timer = timer->next;
13210     }
13211     if (timer) {
13212         remove_multi_timer(desc, port, timer);
13213     }
13214 }
13215 
add_multi_timer(tcp_descriptor * desc,ErlDrvPort port,ErlDrvTermData caller,unsigned timeout,void (* timeout_fun)(ErlDrvData drv_data,ErlDrvTermData caller))13216 static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
13217 				       ErlDrvTermData caller, unsigned timeout,
13218 				       void (*timeout_fun)(ErlDrvData drv_data,
13219 							   ErlDrvTermData caller))
13220 {
13221     MultiTimerData *mtd, *p, *s;
13222 
13223     /* Use cached timer if available */
13224     if (desc->mtd_cache != NULL) {
13225         mtd = desc->mtd_cache;
13226         desc->mtd_cache = NULL;
13227     } else
13228         mtd = ALLOC(sizeof(MultiTimerData));
13229 
13230     if (timeout)
13231         mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout);
13232     else
13233         mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */
13234 
13235     mtd->timeout_function = timeout_fun;
13236     mtd->caller = caller;
13237     mtd->next = mtd->prev = NULL;
13238 
13239     /* Find correct slot in timer linked list */
13240     for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) {
13241 	if (p->when >= mtd->when) {
13242 	    break;
13243 	}
13244     }
13245 
13246     /* Insert in linked list */
13247     if (!p) {
13248 	if (!s) {
13249 	    desc->mtd = mtd;
13250 	} else {
13251 	    s->next = mtd;
13252 	    mtd->prev = s;
13253 	}
13254     } else {
13255 	if (!s) {
13256 	    desc->mtd = mtd;
13257 	} else {
13258 	    s->next = mtd;
13259 	    mtd->prev = s;
13260 	}
13261 	mtd->next = p;
13262 	p->prev = mtd;
13263     }
13264     /* Possibly set new timer */
13265     if (!s) {
13266 	driver_set_timer(port,timeout);
13267     }
13268     return mtd;
13269 }
13270 
13271 /*-----------------------------------------------------------------------------
13272 
13273    Subscription
13274 
13275 -----------------------------------------------------------------------------*/
13276 
13277 static int
save_subscriber(subs,subs_pid)13278 save_subscriber(subs, subs_pid)
13279 subs_list *subs; ErlDrvTermData subs_pid;
13280 {
13281   subs_list *tmp;
13282 
13283   if(NO_SUBSCRIBERS(subs)) {
13284     subs->subscriber = subs_pid;
13285     subs->next = NULL;
13286   }
13287   else {
13288     tmp = subs->next;
13289     subs->next = ALLOC(sizeof(subs_list));
13290     if(subs->next == NULL) {
13291       subs->next = tmp;
13292       return 0;
13293     }
13294     subs->next->subscriber = subs_pid;
13295     subs->next->next = tmp;
13296   }
13297   return 1;
13298 }
13299 
13300 static void
free_subscribers(subs)13301 free_subscribers(subs)
13302 subs_list *subs;
13303 {
13304   subs_list *this;
13305   subs_list *next;
13306 
13307   this = subs->next;
13308   while(this) {
13309     next = this->next;
13310     FREE((void *) this);
13311     this = next;
13312   }
13313 
13314   subs->subscriber = NO_PROCESS;
13315   subs->next = NULL;
13316 }
13317 
send_to_subscribers(ErlDrvTermData port,subs_list * subs,int free_subs,ErlDrvTermData msg[],int msg_len)13318 static void send_to_subscribers
13319 (
13320     ErlDrvTermData port,
13321     subs_list	   *subs,
13322     int		   free_subs,
13323     ErlDrvTermData msg[],
13324     int msg_len
13325 )
13326 {
13327   subs_list *this;
13328   subs_list *next;
13329   int first = 1;
13330 
13331   if(NO_SUBSCRIBERS(subs))
13332     return;
13333 
13334   this = subs;
13335   while(this) {
13336 
13337     (void) erl_drv_send_term(port, this->subscriber, msg, msg_len);
13338 
13339     if(free_subs && !first) {
13340       next = this->next;
13341       FREE((void *) this);
13342       this = next;
13343     }
13344     else
13345       this = this->next;
13346     first = 0;
13347   }
13348 
13349   if(free_subs) {
13350     subs->subscriber = NO_PROCESS;
13351     subs->next = NULL;
13352   }
13353 
13354 }
13355 
13356 /*
13357  * A *very* limited socket interface. Used by the memory tracer
13358  * (erl_mtrace.c).
13359  */
13360 #include "erl_sock.h"
13361 
erts_sock_open(void)13362 erts_sock_t erts_sock_open(void)
13363 {
13364     SOCKET s;
13365 
13366     if(!sock_init())
13367 	return ERTS_SOCK_INVALID_SOCKET;
13368 
13369     s = sock_open(AF_INET, SOCK_STREAM, 0);
13370 
13371     if (s == INVALID_SOCKET)
13372 	return ERTS_SOCK_INVALID_SOCKET;
13373 
13374     return (erts_sock_t) s;
13375 }
13376 
erts_sock_close(erts_sock_t socket)13377 void erts_sock_close(erts_sock_t socket)
13378 {
13379     if (socket != ERTS_SOCK_INVALID_SOCKET)
13380 	sock_close((SOCKET) socket);
13381 }
13382 
13383 
erts_sock_connect(erts_sock_t socket,byte * ip_addr,int len,Uint16 port)13384 int erts_sock_connect(erts_sock_t socket, byte *ip_addr, int len, Uint16 port)
13385 {
13386     SOCKET s = (SOCKET) socket;
13387     char buf[2 + 4], *p;
13388     ErlDrvSizeT blen = 6;
13389     inet_address addr;
13390 
13391     if (socket == ERTS_SOCK_INVALID_SOCKET || len != 4)
13392 	return 0;
13393 
13394     put_int16(port, buf);
13395     memcpy((void *) (buf + 2), (void *) ip_addr, 4);
13396 
13397     p = buf;
13398     if (inet_set_address(AF_INET, &addr, &p, &blen) != NULL)
13399 	return 0;
13400 
13401     if (IS_SOCKET_ERROR
13402 	(sock_connect(s, (struct sockaddr *) &addr, blen)))
13403 	return 0;
13404     return 1;
13405 }
13406 
erts_sock_send(erts_sock_t socket,const void * buf,Sint len)13407 Sint erts_sock_send(erts_sock_t socket, const void *buf, Sint len)
13408 {
13409     Sint result = (Sint) sock_send((SOCKET) socket, buf, (size_t) len, 0);
13410     if (IS_SOCKET_ERROR(result))
13411 	return SOCKET_ERROR;
13412     return result;
13413 }
13414 
13415 
erts_sock_gethostname(char * buf,int bufsz)13416 int erts_sock_gethostname(char *buf, int bufsz)
13417 {
13418     if (IS_SOCKET_ERROR(sock_hostname(buf, bufsz)))
13419 	return SOCKET_ERROR;
13420     return 0;
13421 }
13422 
13423 
erts_sock_errno()13424 int erts_sock_errno()
13425 {
13426     return sock_errno();
13427 }
13428